完善音频信息提取能力,优化类型与工程化配置

This commit is contained in:
YILS
2025-08-04 10:35:26 +08:00
parent 6dad1af850
commit 4c1b8294ea
11 changed files with 139 additions and 23 deletions

1
.npmrc
View File

@@ -23,3 +23,4 @@ ffmpeg_binaries_url=https://cdn.npmmirror.com/binaries/ffmpeg-static
# 平铺依赖以便electron-builder依赖分析与打包
shamefully-hoist=true
engine-strict = true
update-notifier=false

View File

@@ -35,11 +35,11 @@ interface Window {
) => Promise<import('./types').ListFilesFromFolderRecord[]>
edgeTtsGetVoiceList: () => Promise<import('./lib/edge-tts').EdgeTTSVoice[]>
edgeTtsSynthesizeToBase64: (
params: import('./tts/types').edgeTtsSynthesizeCommonParams,
params: import('./tts/types').EdgeTtsSynthesizeCommonParams,
) => Promise<string>
edgeTtsSynthesizeToFile: (
params: import('./tts/types').edgeTtsSynthesizeToFileParams,
) => Promise<void>
params: import('./tts/types').EdgeTtsSynthesizeToFileParams,
) => Promise<import('./tts/types').EdgeTtsSynthesizeToFileResult>
renderVideo: (
params: import('./ffmpeg/types').RenderVideoParams,
) => Promise<import('./ffmpeg/types').ExecuteFFmpegResult>

View File

@@ -7,7 +7,7 @@ import {
BulkInsertOrUpdateParams,
} from './sqlite/types'
import { ListFilesFromFolderParams, SelectFolderParams } from './types'
import { edgeTtsSynthesizeCommonParams } from './tts/types'
import { EdgeTtsSynthesizeCommonParams } from './tts/types'
import { RenderVideoParams } from './ffmpeg/types'
// --------- 向界面渲染进程暴露某些API ---------
@@ -44,9 +44,9 @@ contextBridge.exposeInMainWorld('electron', {
listFilesFromFolder: (params: ListFilesFromFolderParams) =>
ipcRenderer.invoke('list-files-from-folder', params),
edgeTtsGetVoiceList: () => ipcRenderer.invoke('edge-tts-get-voice-list'),
edgeTtsSynthesizeToBase64: (params: edgeTtsSynthesizeCommonParams) =>
edgeTtsSynthesizeToBase64: (params: EdgeTtsSynthesizeCommonParams) =>
ipcRenderer.invoke('edge-tts-synthesize-to-base64', params),
edgeTtsSynthesizeToFile: (params: edgeTtsSynthesizeCommonParams) =>
edgeTtsSynthesizeToFile: (params: EdgeTtsSynthesizeCommonParams) =>
ipcRenderer.invoke('edge-tts-synthesize-to-file', params),
renderVideo: (params: RenderVideoParams) => ipcRenderer.invoke('render-video', params),
})

View File

@@ -2,7 +2,12 @@ import fs from 'node:fs'
import path from 'node:path'
import { app } from 'electron'
import { EdgeTTS } from '../lib/edge-tts'
import { edgeTtsSynthesizeCommonParams, edgeTtsSynthesizeToFileParams } from './types'
import { parseBuffer } from 'music-metadata'
import {
EdgeTtsSynthesizeCommonParams,
EdgeTtsSynthesizeToFileParams,
EdgeTtsSynthesizeToFileResult,
} from './types'
const edgeTts = new EdgeTTS()
@@ -14,13 +19,15 @@ export function edgeTtsGetVoiceList() {
return edgeTts.getVoices()
}
export async function edgeTtsSynthesizeToBase64(params: edgeTtsSynthesizeCommonParams) {
export async function edgeTtsSynthesizeToBase64(params: EdgeTtsSynthesizeCommonParams) {
const { text, voice, options } = params
const result = await edgeTts.synthesize(text, voice, options)
return result.toBase64()
}
export async function edgeTtsSynthesizeToFile(params: edgeTtsSynthesizeToFileParams) {
export async function edgeTtsSynthesizeToFile(
params: EdgeTtsSynthesizeToFileParams,
): Promise<EdgeTtsSynthesizeToFileResult> {
const { text, voice, options, withCaption } = params
const result = await edgeTts.synthesize(text, voice, options)
@@ -42,8 +49,8 @@ export async function edgeTtsSynthesizeToFile(params: edgeTtsSynthesizeToFilePar
fs.writeFileSync(srtPath, srtString)
}
// [TODO] 返回音频时长
const metadata = await parseBuffer(result.getBuffer())
return {
duration: 0,
duration: metadata.format.duration,
}
}

View File

@@ -1,12 +1,19 @@
import { SynthesisOptions } from '../lib/edge-tts'
export interface edgeTtsSynthesizeCommonParams {
export interface EdgeTtsSynthesizeCommonParams {
text: string
voice: string
options: SynthesisOptions
}
export interface edgeTtsSynthesizeToFileParams extends edgeTtsSynthesizeCommonParams {
export interface EdgeTtsSynthesizeToFileParams extends EdgeTtsSynthesizeCommonParams {
withCaption?: boolean
outputPath?: string
}
export interface EdgeTtsSynthesizeToFileResult {
/**
* 合成后的音频时长,单位秒
*/
duration: number | undefined
}

View File

@@ -20,6 +20,7 @@
"axios": "^1.11.0",
"better-sqlite3": "9.6.0",
"ffmpeg-static": "^5.2.0",
"music-metadata": "^11.7.3",
"subtitle": "4.2.2-alpha.0",
"ws": "^8.18.3"
},

90
pnpm-lock.yaml generated
View File

@@ -17,6 +17,9 @@ importers:
ffmpeg-static:
specifier: ^5.2.0
version: 5.2.0
music-metadata:
specifier: ^11.7.3
version: 11.7.3
subtitle:
specifier: 4.2.2-alpha.0
version: 4.2.2-alpha.0
@@ -765,6 +768,13 @@ packages:
resolution: {integrity: sha512-4BAffykYOgO+5nzBWYwE3W90sBgLJoUPRWWcL8wlyiM8IB8ipJz3UMJ9KXQd1RKQXpKp8Tutn80HZtWsu2u76w==}
engines: {node: '>=10'}
'@tokenizer/inflate@0.2.7':
resolution: {integrity: sha512-MADQgmZT1eKjp06jpI2yozxaU9uVs4GzzgSL+uEq7bVcJ9V1ZXQkeGNql1fsSI0gMy1vhvNTNbUqrx+pZfJVmg==}
engines: {node: '>=18'}
'@tokenizer/token@0.3.0':
resolution: {integrity: sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==}
'@tootallnate/once@2.0.0':
resolution: {integrity: sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==}
engines: {node: '>= 10'}
@@ -1315,6 +1325,10 @@ packages:
resolution: {integrity: sha512-5IKcdX0nnYavi6G7TtOhwkYzyjfJlatbjMjuLSfE2kYT5pMDOilZ4OvMhi637CcDICTmz3wARPoyhqyX1Y+XvA==}
engines: {node: ^14.18.0 || >=16.10.0}
content-type@1.0.5:
resolution: {integrity: sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==}
engines: {node: '>= 0.6'}
convert-source-map@2.0.0:
resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==}
@@ -1591,6 +1605,9 @@ packages:
picomatch:
optional: true
fflate@0.8.2:
resolution: {integrity: sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==}
ffmpeg-static@5.2.0:
resolution: {integrity: sha512-WrM7kLW+do9HLr+H6tk7LzQ7kPqbAgLjdzNE32+u3Ff11gXt9Kkkd2nusGFrlWMIe+XaA97t+I8JS7sZIrvRgA==}
engines: {node: '>=16'}
@@ -1599,6 +1616,10 @@ packages:
resolution: {integrity: sha512-d+l3qxjSesT4V7v2fh+QnmFnUWv9lSpjarhShNTgBOfA0ttejbQUAlHLitbjkoRiDulW0OPoQPYIGhIC8ohejg==}
engines: {node: '>=18'}
file-type@21.0.0:
resolution: {integrity: sha512-ek5xNX2YBYlXhiUXui3D/BXa3LdqPmoLJ7rqEx2bKJ7EAUEfmXgW0Das7Dc6Nr9MvqaOnIqiPV0mZk/r/UpNAg==}
engines: {node: '>=20'}
file-uri-to-path@1.0.0:
resolution: {integrity: sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==}
@@ -1990,6 +2011,10 @@ packages:
mdn-data@2.12.2:
resolution: {integrity: sha512-IEn+pegP1aManZuckezWCO+XZQDplx1366JoVhTpMpBB1sPey/SbveZQUosKiKiGYjg1wH4pMlNgXbCiYgihQA==}
media-typer@1.1.0:
resolution: {integrity: sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==}
engines: {node: '>= 0.8'}
micromatch@4.0.8:
resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==}
engines: {node: '>=8.6'}
@@ -2076,6 +2101,10 @@ packages:
multipipe@4.0.0:
resolution: {integrity: sha512-jzcEAzFXoWwWwUbvHCNPwBlTz3WCWe/jPcXSmTfbo/VjRwRTfvLZ/bdvtiTdqCe8d4otCSsPCbhGYcX+eggpKQ==}
music-metadata@11.7.3:
resolution: {integrity: sha512-xYbaSFpHi6qOUTFCb7fvC4/6Uk1NAJey2pSwNQPqJkCLwleYGUWVRKlxnNy2MsTaDRPnIwXdkgrmeiP/GDq/ww==}
engines: {node: '>=18'}
nanoid@3.3.11:
resolution: {integrity: sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==}
engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1}
@@ -2474,6 +2503,10 @@ packages:
resolution: {integrity: sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==}
engines: {node: '>=0.10.0'}
strtok3@10.3.4:
resolution: {integrity: sha512-KIy5nylvC5le1OdaaoCJ07L+8iQzJHGH6pWDuzS+d07Cu7n1MZ2x26P8ZKIWfbK02+XIL8Mp4RkWeqdUCrDMfg==}
engines: {node: '>=18'}
subtitle@4.2.2-alpha.0:
resolution: {integrity: sha512-IMS+L8lXjOLveg5BC/bVZy+36/x2NqMIQmVDhbquDpxLnXugzmz7/yHHFZ7b9YLfqNaBdXwh1lsnAds3g1FnCQ==}
engines: {node: '>=10'}
@@ -2531,6 +2564,10 @@ packages:
resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==}
engines: {node: '>=8.0'}
token-types@6.0.4:
resolution: {integrity: sha512-MD9MjpVNhVyH4fyd5rKphjvt/1qj+PtQUz65aFqAZA6XniWAuSFRjLk3e2VALEFlh9OwBpXUN7rfeqSnT/Fmkw==}
engines: {node: '>=14.16'}
totalist@3.0.1:
resolution: {integrity: sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ==}
engines: {node: '>=6'}
@@ -2556,6 +2593,10 @@ packages:
ufo@1.6.1:
resolution: {integrity: sha512-9a4/uxlTWJ4+a5i0ooc1rU7C7YOw3wT+UGqdeNNHWnOF9qcMBgLRS+4IYUqbczewFx4mLEig6gawh7X6mFlEkA==}
uint8array-extras@1.4.0:
resolution: {integrity: sha512-ZPtzy0hu4cZjv3z5NW9gfKnNLjoz4y6uv4HlelAjDK7sY/xOkKZv9xK/WQpcsBB3jEybChz9DPC2U/+cusjJVQ==}
engines: {node: '>=18'}
unconfig@7.3.2:
resolution: {integrity: sha512-nqG5NNL2wFVGZ0NA/aCFw0oJ2pxSf1lwg4Z5ill8wd7K4KX/rQbHlwbh+bjctXL5Ly1xtzHenHGOK0b+lG6JVg==}
@@ -3407,6 +3448,16 @@ snapshots:
dependencies:
defer-to-connect: 2.0.1
'@tokenizer/inflate@0.2.7':
dependencies:
debug: 4.4.1
fflate: 0.8.2
token-types: 6.0.4
transitivePeerDependencies:
- supports-color
'@tokenizer/token@0.3.0': {}
'@tootallnate/once@2.0.0': {}
'@types/better-sqlite3@7.6.13':
@@ -4166,6 +4217,8 @@ snapshots:
consola@3.4.2: {}
content-type@1.0.5: {}
convert-source-map@2.0.0: {}
copy-anything@3.0.5:
@@ -4483,6 +4536,8 @@ snapshots:
optionalDependencies:
picomatch: 4.0.2
fflate@0.8.2: {}
ffmpeg-static@5.2.0:
dependencies:
'@derhuerst/http-basic': 8.2.4
@@ -4496,6 +4551,15 @@ snapshots:
dependencies:
is-unicode-supported: 2.1.0
file-type@21.0.0:
dependencies:
'@tokenizer/inflate': 0.2.7
strtok3: 10.3.4
token-types: 6.0.4
uint8array-extras: 1.4.0
transitivePeerDependencies:
- supports-color
file-uri-to-path@1.0.0: {}
filelist@1.0.4:
@@ -4887,6 +4951,8 @@ snapshots:
mdn-data@2.12.2: {}
media-typer@1.1.0: {}
micromatch@4.0.8:
dependencies:
braces: 3.0.3
@@ -4960,6 +5026,19 @@ snapshots:
duplexer2: 0.1.4
object-assign: 4.1.1
music-metadata@11.7.3:
dependencies:
'@tokenizer/token': 0.3.0
content-type: 1.0.5
debug: 4.4.1
file-type: 21.0.0
media-typer: 1.1.0
strtok3: 10.3.4
token-types: 6.0.4
uint8array-extras: 1.4.0
transitivePeerDependencies:
- supports-color
nanoid@3.3.11: {}
nanoid@5.1.5: {}
@@ -5359,6 +5438,10 @@ snapshots:
strip-json-comments@2.0.1: {}
strtok3@10.3.4:
dependencies:
'@tokenizer/token': 0.3.0
subtitle@4.2.2-alpha.0:
dependencies:
'@types/multipipe': 3.0.5
@@ -5436,6 +5519,11 @@ snapshots:
dependencies:
is-number: 7.0.0
token-types@6.0.4:
dependencies:
'@tokenizer/token': 0.3.0
ieee754: 1.2.1
totalist@3.0.1: {}
truncate-utf8-bytes@1.0.2:
@@ -5455,6 +5543,8 @@ snapshots:
ufo@1.6.1: {}
uint8array-extras@1.4.0: {}
unconfig@7.3.2:
dependencies:
'@quansync/fs': 0.1.3

View File

@@ -129,7 +129,7 @@ const synthesizedSpeechToFile = async (option?: { withCaption?: boolean }) => {
if (!configValid()) throw new Error('TTS语音合成配置无效')
try {
await window.electron.edgeTtsSynthesizeToFile({
const result = await window.electron.edgeTtsSynthesizeToFile({
text: appStore.tryListeningText,
voice: appStore.voice!.ShortName,
options: {
@@ -137,6 +137,7 @@ const synthesizedSpeechToFile = async (option?: { withCaption?: boolean }) => {
},
withCaption: option?.withCaption,
})
return result
} catch (error) {
console.log('语音合成失败', error)
toast.error('语音合成失败,请检查网络')

View File

@@ -102,7 +102,9 @@ const refreshAssets = async () => {
refreshAssets()
// 获取视频分镜随机素材片段
const getVideoSegments = async () => {}
const getVideoSegments = async (options: { duration: number }) => {
}
defineExpose({ getVideoSegments })
</script>

View File

@@ -18,7 +18,7 @@
size="x-large"
color="deep-purple-accent-3"
prepend-icon="mdi-rocket-launch"
@click="onRenderVideo"
@click="emit('renderVideo')"
>
开始合成
</v-btn>
@@ -104,8 +104,8 @@ import { useAppStore } from '@/store'
const appStore = useAppStore()
defineEmits<{
renderVideo(): void
const emit = defineEmits<{
(e: 'renderVideo'): void
}>()
// 配置合成选项

View File

@@ -49,14 +49,21 @@ const handleRenderVideo = async () => {
return
}
toast.info('即将开发完成,敬请期待')
return
// TTS合成语音
await TtsControlInstance.value?.synthesizedSpeechToFile({ withCaption: true })
const ttsResult = await TtsControlInstance.value?.synthesizedSpeechToFile({ withCaption: true })
if (ttsResult?.duration === undefined) {
toast.warning('语音合成失败,音频文件损坏')
return
}
if (ttsResult?.duration === 0) {
toast.warning('语音时长为0秒可能文案为空')
return
}
// 获取视频片段
// const videoSegments = VideoManageInstance.value?.getVideoSegments()
const videoSegments = VideoManageInstance.value?.getVideoSegments({
duration: ttsResult.duration,
})
// await window.electron.renderVideo({})
} catch (error) {