diff --git a/app/services/voice.py b/app/services/voice.py index 4cc7327..e2d9fe9 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -16,7 +16,7 @@ from app.utils import utils def get_all_azure_voices(filter_locals=None) -> list[str]: - voices_str = """ + azure_voices_str = """ Name: af-ZA-AdriNeural Gender: Female @@ -1015,7 +1015,7 @@ Gender: Female # 定义正则表达式模式,用于匹配 Name 和 Gender 行 pattern = re.compile(r"Name:\s*(.+)\s*Gender:\s*(.+)\s*", re.MULTILINE) # 使用正则表达式查找所有匹配项 - matches = pattern.findall(voices_str) + matches = pattern.findall(azure_voices_str) for name, gender in matches: # 应用过滤条件 @@ -1219,7 +1219,7 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str) """ start_t = mktimestamp(start_time).replace(".", ",") end_t = mktimestamp(end_time).replace(".", ",") - return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n" + return f"{idx}\n{start_t} --> {end_t}\n{sub_text}\n" start_time = -1.0 sub_items = [] diff --git a/webui/Main.py b/webui/Main.py index 0ff0b02..4e4cee6 100644 --- a/webui/Main.py +++ b/webui/Main.py @@ -44,7 +44,7 @@ st.set_page_config( streamlit_style = """ """ @@ -107,6 +107,7 @@ support_locales = [ "th-TH", ] + def get_all_fonts(): fonts = [] for root, dirs, files in os.walk(font_dir): @@ -197,7 +198,8 @@ def tr(key): loc = locales.get(st.session_state["ui_language"], {}) return loc.get("Translation", {}).get(key, key) - # 创建基础设置折叠框 + +# 创建基础设置折叠框 if not config.app.get("hide_config", False): with st.expander(tr("Basic Settings"), expanded=False): config_panels = st.columns(3) @@ -220,7 +222,7 @@ if not config.app.get("hide_config", False): config.ui["hide_log"] = hide_log # 中间面板 - LLM 设置 - + with middle_config_panel: st.write(tr("LLM Settings")) llm_providers = [ @@ -423,31 +425,31 @@ if not config.app.get("hide_config", False): # 右侧面板 - API 密钥设置 with right_config_panel: - def get_keys_from_config(cfg_key): - api_keys = config.app.get(cfg_key, []) - if isinstance(api_keys, str): - api_keys = [api_keys] - api_key = ", ".join(api_keys) - return api_key + def get_keys_from_config(cfg_key): + api_keys = config.app.get(cfg_key, []) + if isinstance(api_keys, str): + api_keys = [api_keys] + api_key = ", ".join(api_keys) + return api_key - def save_keys_to_config(cfg_key, value): - value = value.replace(" ", "") - if value: - config.app[cfg_key] = value.split(",") + def save_keys_to_config(cfg_key, value): + value = value.replace(" ", "") + if value: + config.app[cfg_key] = value.split(",") - st.write(tr("Video Source Settings")) + st.write(tr("Video Source Settings")) - pexels_api_key = get_keys_from_config("pexels_api_keys") - pexels_api_key = st.text_input( - tr("Pexels API Key"), value=pexels_api_key, type="password" - ) - save_keys_to_config("pexels_api_keys", pexels_api_key) + pexels_api_key = get_keys_from_config("pexels_api_keys") + pexels_api_key = st.text_input( + tr("Pexels API Key"), value=pexels_api_key, type="password" + ) + save_keys_to_config("pexels_api_keys", pexels_api_key) - pixabay_api_key = get_keys_from_config("pixabay_api_keys") - pixabay_api_key = st.text_input( - tr("Pixabay API Key"), value=pixabay_api_key, type="password" - ) - save_keys_to_config("pixabay_api_keys", pixabay_api_key) + pixabay_api_key = get_keys_from_config("pixabay_api_keys") + pixabay_api_key = st.text_input( + tr("Pixabay API Key"), value=pixabay_api_key, type="password" + ) + save_keys_to_config("pixabay_api_keys", pixabay_api_key) llm_provider = config.app.get("llm_provider", "").lower() panel = st.columns(3) @@ -615,42 +617,96 @@ with middle_panel: with st.container(border=True): st.write(tr("Audio Settings")) - # tts_providers = ['edge', 'azure'] - # tts_provider = st.selectbox(tr("TTS Provider"), tts_providers) + # 添加TTS服务器选择下拉框 + tts_servers = [ + ("azure-tts-v1", "Azure TTS V1"), + ("azure-tts-v2", "Azure TTS V2"), + ] + + # 获取保存的TTS服务器,默认为v1 + saved_tts_server = config.ui.get("tts_server", "azure-tts-v1") + saved_tts_server_index = 0 + for i, (server_value, _) in enumerate(tts_servers): + if server_value == saved_tts_server: + saved_tts_server_index = i + break + + selected_tts_server_index = st.selectbox( + tr("TTS Servers"), + options=range(len(tts_servers)), + format_func=lambda x: tts_servers[x][1], + index=saved_tts_server_index, + ) + + selected_tts_server = tts_servers[selected_tts_server_index][0] + config.ui["tts_server"] = selected_tts_server + + # 获取所有声音 + all_voices = voice.get_all_azure_voices(filter_locals=None) + + # 根据选择的TTS服务器筛选声音 + filtered_voices = [] + for v in all_voices: + if selected_tts_server == "azure-tts-v2": + # V2版本的声音名称中包含"v2" + if "V2" in v: + filtered_voices.append(v) + else: + # V1版本的声音名称中不包含"v2" + if "V2" not in v: + filtered_voices.append(v) - voices = voice.get_all_azure_voices(filter_locals=None) friendly_names = { v: v.replace("Female", tr("Female")) .replace("Male", tr("Male")) .replace("Neural", "") - for v in voices + for v in filtered_voices } + saved_voice_name = config.ui.get("voice_name", "") saved_voice_name_index = 0 + + # 检查保存的声音是否在当前筛选的声音列表中 if saved_voice_name in friendly_names: saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name) else: - for i, v in enumerate(voices): - if ( - v.lower().startswith(st.session_state["ui_language"].lower()) - and "V2" not in v - ): + # 如果不在,则根据当前UI语言选择一个默认声音 + for i, v in enumerate(filtered_voices): + if v.lower().startswith(st.session_state["ui_language"].lower()): saved_voice_name_index = i break - selected_friendly_name = st.selectbox( - tr("Speech Synthesis"), - options=list(friendly_names.values()), - index=saved_voice_name_index, - ) + # 如果没有找到匹配的声音,使用第一个声音 + if saved_voice_name_index >= len(friendly_names) and friendly_names: + saved_voice_name_index = 0 - voice_name = list(friendly_names.keys())[ - list(friendly_names.values()).index(selected_friendly_name) - ] - params.voice_name = voice_name - config.ui["voice_name"] = voice_name + # 确保有声音可选 + if friendly_names: + selected_friendly_name = st.selectbox( + tr("Speech Synthesis"), + options=list(friendly_names.values()), + index=min(saved_voice_name_index, len(friendly_names) - 1) + if friendly_names + else 0, + ) - if st.button(tr("Play Voice")): + voice_name = list(friendly_names.keys())[ + list(friendly_names.values()).index(selected_friendly_name) + ] + params.voice_name = voice_name + config.ui["voice_name"] = voice_name + else: + # 如果没有声音可选,显示提示信息 + st.warning( + tr( + "No voices available for the selected TTS server. Please select another server." + ) + ) + params.voice_name = "" + config.ui["voice_name"] = "" + + # 只有在有声音可选时才显示试听按钮 + if friendly_names and st.button(tr("Play Voice")): play_content = params.video_subject if not play_content: play_content = params.video_script @@ -680,7 +736,10 @@ with middle_panel: if os.path.exists(audio_file): os.remove(audio_file) - if voice.is_azure_v2_voice(voice_name): + # 当选择V2版本或者声音是V2声音时,显示服务区域和API key输入框 + if selected_tts_server == "azure-tts-v2" or ( + voice_name and voice.is_azure_v2_voice(voice_name) + ): saved_azure_speech_region = config.azure.get("speech_region", "") saved_azure_speech_key = config.azure.get("speech_key", "") azure_speech_region = st.text_input( @@ -876,4 +935,4 @@ if start_button: logger.info(tr("Video Generation Completed")) scroll_to_bottom() -config.save_config() \ No newline at end of file +config.save_config() diff --git a/webui/i18n/de.json b/webui/i18n/de.json index f2fc0da..159154c 100644 --- a/webui/i18n/de.json +++ b/webui/i18n/de.json @@ -91,6 +91,8 @@ "Voice Example": "Dies ist ein Beispieltext zum Testen der Sprachsynthese", "Synthesizing Voice": "Sprachsynthese läuft, bitte warten...", "TTS Provider": "Sprachsynthese-Anbieter auswählen", + "TTS Servers": "TTS-Server", + "No voices available for the selected TTS server. Please select another server.": "Keine Stimmen für den ausgewählten TTS-Server verfügbar. Bitte wählen Sie einen anderen Server.", "Hide Log": "Protokoll ausblenden", "Hide Basic Settings": "Basis-Einstellungen ausblenden\n\nWenn diese Option deaktiviert ist, wird die Basis-Einstellungen-Leiste nicht auf der Seite angezeigt.\n\nWenn Sie sie erneut anzeigen möchten, setzen Sie `hide_config = false` in `config.toml`", "LLM Settings": "**LLM-Einstellungen**", diff --git a/webui/i18n/en.json b/webui/i18n/en.json index b1738e7..2116844 100644 --- a/webui/i18n/en.json +++ b/webui/i18n/en.json @@ -91,6 +91,8 @@ "Voice Example": "This is an example text for testing speech synthesis", "Synthesizing Voice": "Synthesizing voice, please wait...", "TTS Provider": "Select the voice synthesis provider", + "TTS Servers": "TTS Servers", + "No voices available for the selected TTS server. Please select another server.": "No voices available for the selected TTS server. Please select another server.", "Hide Log": "Hide Log", "Hide Basic Settings": "Hide Basic Settings\n\nHidden, the basic settings panel will not be displayed on the page.\n\nIf you need to display it again, please set `hide_config = false` in `config.toml`", "LLM Settings": "**LLM Settings**", diff --git a/webui/i18n/pt.json b/webui/i18n/pt.json index 0c1202e..55339c9 100644 --- a/webui/i18n/pt.json +++ b/webui/i18n/pt.json @@ -91,6 +91,8 @@ "Voice Example": "Este é um exemplo de texto para testar a síntese de fala", "Synthesizing Voice": "Sintetizando voz, por favor aguarde...", "TTS Provider": "Selecione o provedor de síntese de voz", + "TTS Servers": "Servidores TTS", + "No voices available for the selected TTS server. Please select another server.": "Não há vozes disponíveis para o servidor TTS selecionado. Por favor, selecione outro servidor.", "Hide Log": "Ocultar Log", "Hide Basic Settings": "Ocultar Configurações Básicas\n\nOculto, o painel de configurações básicas não será exibido na página.\n\nSe precisar exibi-lo novamente, defina `hide_config = false` em `config.toml`", "LLM Settings": "**Configurações do LLM**", diff --git a/webui/i18n/vi.json b/webui/i18n/vi.json index 7e1ebef..e71fa6d 100644 --- a/webui/i18n/vi.json +++ b/webui/i18n/vi.json @@ -91,6 +91,8 @@ "Voice Example": "Đây là văn bản mẫu để kiểm tra tổng hợp giọng nói", "Synthesizing Voice": "Đang tổng hợp giọng nói, vui lòng đợi...", "TTS Provider": "Chọn nhà cung cấp tổng hợp giọng nói", + "TTS Servers": "Máy chủ TTS", + "No voices available for the selected TTS server. Please select another server.": "Không có giọng nói nào cho máy chủ TTS đã chọn. Vui lòng chọn máy chủ khác.", "Hide Log": "Ẩn Nhật Ký", "Hide Basic Settings": "Ẩn Cài Đặt Cơ Bản\n\nẨn, thanh cài đặt cơ bản sẽ không hiển thị trên trang web.\n\nNếu bạn muốn hiển thị lại, vui lòng đặt `hide_config = false` trong `config.toml`", "LLM Settings": "**Cài Đặt LLM**", diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index 85289c0..e48d560 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -91,6 +91,8 @@ "Voice Example": "这是一段测试语音合成的示例文本", "Synthesizing Voice": "语音合成中,请稍候...", "TTS Provider": "语音合成提供商", + "TTS Servers": "TTS服务器", + "No voices available for the selected TTS server. Please select another server.": "当前选择的TTS服务器没有可用的声音,请选择其他服务器。", "Hide Log": "隐藏日志", "Hide Basic Settings": "隐藏基础设置\n\n隐藏后,基础设置面板将不会显示在页面中。\n\n如需要再次显示,请在 `config.toml` 中设置 `hide_config = false`", "LLM Settings": "**大模型设置**",