From e8b20c697dbcdba16d449a398e17e5bcd4ddd9aa Mon Sep 17 00:00:00 2001
From: yyhhyyyyyy <95077259+yyhhyyyyyy@users.noreply.github.com>
Date: Wed, 24 Jul 2024 14:25:20 +0800
Subject: [PATCH] feat: support custom subtitle positioning
---
app/models/schema.py | 1 +
app/router.py | 3 +-
app/services/video.py | 147 ++++++++------
main.py | 16 +-
webui/Main.py | 445 ++++++++++++++++++++++++++----------------
webui/i18n/de.json | 1 +
webui/i18n/en.json | 1 +
webui/i18n/vi.json | 1 +
webui/i18n/zh.json | 1 +
9 files changed, 382 insertions(+), 234 deletions(-)
diff --git a/app/models/schema.py b/app/models/schema.py
index 6ecb63a..2a77baf 100644
--- a/app/models/schema.py
+++ b/app/models/schema.py
@@ -119,6 +119,7 @@ class VideoParams(BaseModel):
subtitle_enabled: Optional[bool] = True
subtitle_position: Optional[str] = "bottom" # top, bottom, center
+ custom_position: float = 70.0
font_name: Optional[str] = "STHeitiMedium.ttc"
text_fore_color: Optional[str] = "#FFFFFF"
text_background_color: Optional[str] = "transparent"
diff --git a/app/router.py b/app/router.py
index 3822743..cf84037 100644
--- a/app/router.py
+++ b/app/router.py
@@ -6,9 +6,10 @@ Resources:
1. https://fastapi.tiangolo.com/tutorial/bigger-applications
"""
+
from fastapi import APIRouter
-from app.controllers.v1 import video, llm
+from app.controllers.v1 import llm, video
root_api_router = APIRouter()
# v1
diff --git a/app/services/video.py b/app/services/video.py
index f93dd8d..9aed533 100644
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -1,13 +1,14 @@
import glob
import random
from typing import List
-from PIL import ImageFont, Image
+
from loguru import logger
from moviepy.editor import *
from moviepy.video.tools.subtitles import SubtitlesClip
+from PIL import ImageFont
from app.models import const
-from app.models.schema import VideoAspect, VideoParams, VideoConcatMode, MaterialInfo
+from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode, VideoParams
from app.utils import utils
@@ -27,14 +28,15 @@ def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
return ""
-def combine_videos(combined_video_path: str,
- video_paths: List[str],
- audio_file: str,
- video_aspect: VideoAspect = VideoAspect.portrait,
- video_concat_mode: VideoConcatMode = VideoConcatMode.random,
- max_clip_duration: int = 5,
- threads: int = 2,
- ) -> str:
+def combine_videos(
+ combined_video_path: str,
+ video_paths: List[str],
+ audio_file: str,
+ video_aspect: VideoAspect = VideoAspect.portrait,
+ video_concat_mode: VideoConcatMode = VideoConcatMode.random,
+ max_clip_duration: int = 5,
+ threads: int = 2,
+) -> str:
audio_clip = AudioFileClip(audio_file)
audio_duration = audio_clip.duration
logger.info(f"max duration of audio: {audio_duration} seconds")
@@ -102,13 +104,19 @@ def combine_videos(combined_video_path: str,
new_height = int(clip_h * scale_factor)
clip_resized = clip.resize(newsize=(new_width, new_height))
- background = ColorClip(size=(video_width, video_height), color=(0, 0, 0))
- clip = CompositeVideoClip([
- background.set_duration(clip.duration),
- clip_resized.set_position("center")
- ])
+ background = ColorClip(
+ size=(video_width, video_height), color=(0, 0, 0)
+ )
+ clip = CompositeVideoClip(
+ [
+ background.set_duration(clip.duration),
+ clip_resized.set_position("center"),
+ ]
+ )
- logger.info(f"resizing video to {video_width} x {video_height}, clip size: {clip_w} x {clip_h}")
+ logger.info(
+ f"resizing video to {video_width} x {video_height}, clip size: {clip_w} x {clip_h}"
+ )
if clip.duration > max_clip_duration:
clip = clip.subclip(0, max_clip_duration)
@@ -118,21 +126,22 @@ def combine_videos(combined_video_path: str,
video_clip = concatenate_videoclips(clips)
video_clip = video_clip.set_fps(30)
- logger.info(f"writing")
+ logger.info("writing")
# https://github.com/harry0703/MoneyPrinterTurbo/issues/111#issuecomment-2032354030
- video_clip.write_videofile(filename=combined_video_path,
- threads=threads,
- logger=None,
- temp_audiofile_path=output_dir,
- audio_codec="aac",
- fps=30,
- )
+ video_clip.write_videofile(
+ filename=combined_video_path,
+ threads=threads,
+ logger=None,
+ temp_audiofile_path=output_dir,
+ audio_codec="aac",
+ fps=30,
+ )
video_clip.close()
- logger.success(f"completed")
+ logger.success("completed")
return combined_video_path
-def wrap_text(text, max_width, font='Arial', fontsize=60):
+def wrap_text(text, max_width, font="Arial", fontsize=60):
# 创建字体对象
font = ImageFont.truetype(font, fontsize)
@@ -151,7 +160,7 @@ def wrap_text(text, max_width, font='Arial', fontsize=60):
_wrapped_lines_ = []
words = text.split(" ")
- _txt_ = ''
+ _txt_ = ""
for word in words:
_before = _txt_
_txt_ += f"{word} "
@@ -167,14 +176,14 @@ def wrap_text(text, max_width, font='Arial', fontsize=60):
_wrapped_lines_.append(_txt_)
if processed:
_wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
- result = '\n'.join(_wrapped_lines_).strip()
+ result = "\n".join(_wrapped_lines_).strip()
height = len(_wrapped_lines_) * height
# logger.warning(f"wrapped text: {result}")
return result, height
_wrapped_lines_ = []
chars = list(text)
- _txt_ = ''
+ _txt_ = ""
for word in chars:
_txt_ += word
_width, _height = get_text_size(_txt_)
@@ -182,20 +191,21 @@ def wrap_text(text, max_width, font='Arial', fontsize=60):
continue
else:
_wrapped_lines_.append(_txt_)
- _txt_ = ''
+ _txt_ = ""
_wrapped_lines_.append(_txt_)
- result = '\n'.join(_wrapped_lines_).strip()
+ result = "\n".join(_wrapped_lines_).strip()
height = len(_wrapped_lines_) * height
# logger.warning(f"wrapped text: {result}")
return result, height
-def generate_video(video_path: str,
- audio_path: str,
- subtitle_path: str,
- output_file: str,
- params: VideoParams,
- ):
+def generate_video(
+ video_path: str,
+ audio_path: str,
+ subtitle_path: str,
+ output_file: str,
+ params: VideoParams,
+):
aspect = VideoAspect(params.video_aspect)
video_width, video_height = aspect.to_resolution()
@@ -215,7 +225,7 @@ def generate_video(video_path: str,
if not params.font_name:
params.font_name = "STHeitiMedium.ttc"
font_path = os.path.join(utils.font_dir(), params.font_name)
- if os.name == 'nt':
+ if os.name == "nt":
font_path = font_path.replace("\\", "/")
logger.info(f"using font: {font_path}")
@@ -223,11 +233,9 @@ def generate_video(video_path: str,
def create_text_clip(subtitle_item):
phrase = subtitle_item[1]
max_width = video_width * 0.9
- wrapped_txt, txt_height = wrap_text(phrase,
- max_width=max_width,
- font=font_path,
- fontsize=params.font_size
- )
+ wrapped_txt, txt_height = wrap_text(
+ phrase, max_width=max_width, font=font_path, fontsize=params.font_size
+ )
_clip = TextClip(
wrapped_txt,
font=font_path,
@@ -243,18 +251,26 @@ def generate_video(video_path: str,
_clip = _clip.set_end(subtitle_item[0][1])
_clip = _clip.set_duration(duration)
if params.subtitle_position == "bottom":
- _clip = _clip.set_position(('center', video_height * 0.95 - _clip.h))
+ _clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
elif params.subtitle_position == "top":
- _clip = _clip.set_position(('center', video_height * 0.1))
- else:
- _clip = _clip.set_position(('center', 'center'))
+ _clip = _clip.set_position(("center", video_height * 0.05))
+ elif params.subtitle_position == "custom":
+ # 确保字幕完全在屏幕内
+ margin = 10 # 额外的边距,单位为像素
+ max_y = video_height - _clip.h - margin
+ min_y = margin
+ custom_y = (video_height - _clip.h) * (params.custom_position / 100)
+ custom_y = max(min_y, min(custom_y, max_y)) # 限制 y 值在有效范围内
+ _clip = _clip.set_position(("center", custom_y))
+ else: # center
+ _clip = _clip.set_position(("center", "center"))
return _clip
video_clip = VideoFileClip(video_path)
audio_clip = AudioFileClip(audio_path).volumex(params.voice_volume)
if subtitle_path and os.path.exists(subtitle_path):
- sub = SubtitlesClip(subtitles=subtitle_path, encoding='utf-8')
+ sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
text_clips = []
for item in sub.subtitles:
clip = create_text_clip(subtitle_item=item)
@@ -264,24 +280,25 @@ def generate_video(video_path: str,
bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
if bgm_file:
try:
- bgm_clip = (AudioFileClip(bgm_file)
- .volumex(params.bgm_volume)
- .audio_fadeout(3))
+ bgm_clip = (
+ AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3)
+ )
bgm_clip = afx.audio_loop(bgm_clip, duration=video_clip.duration)
audio_clip = CompositeAudioClip([audio_clip, bgm_clip])
except Exception as e:
logger.error(f"failed to add bgm: {str(e)}")
video_clip = video_clip.set_audio(audio_clip)
- video_clip.write_videofile(output_file,
- audio_codec="aac",
- temp_audiofile_path=output_dir,
- threads=params.n_threads or 2,
- logger=None,
- fps=30,
- )
+ video_clip.write_videofile(
+ output_file,
+ audio_codec="aac",
+ temp_audiofile_path=output_dir,
+ threads=params.n_threads or 2,
+ logger=None,
+ fps=30,
+ )
video_clip.close()
- logger.success(f"completed")
+ logger.success("completed")
def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
@@ -292,7 +309,7 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
ext = utils.parse_extension(material.url)
try:
clip = VideoFileClip(material.url)
- except Exception as e:
+ except Exception:
clip = ImageClip(material.url)
width = clip.size[0]
@@ -304,12 +321,18 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
if ext in const.FILE_TYPE_IMAGES:
logger.info(f"processing image: {material.url}")
# 创建一个图片剪辑,并设置持续时间为3秒钟
- clip = ImageClip(material.url).set_duration(clip_duration).set_position("center")
+ clip = (
+ ImageClip(material.url)
+ .set_duration(clip_duration)
+ .set_position("center")
+ )
# 使用resize方法来添加缩放效果。这里使用了lambda函数来使得缩放效果随时间变化。
# 假设我们想要从原始大小逐渐放大到120%的大小。
# t代表当前时间,clip.duration为视频总时长,这里是3秒。
# 注意:1 表示100%的大小,所以1.2表示120%的大小
- zoom_clip = clip.resize(lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration))
+ zoom_clip = clip.resize(
+ lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
+ )
# 如果需要,可以创建一个包含缩放剪辑的复合视频剪辑
# (这在您想要在视频中添加其他元素时非常有用)
diff --git a/main.py b/main.py
index b7b8ab5..e84f32b 100644
--- a/main.py
+++ b/main.py
@@ -1,8 +1,16 @@
import uvicorn
from loguru import logger
+
from app.config import config
-if __name__ == '__main__':
- logger.info("start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs")
- uvicorn.run(app="app.asgi:app", host=config.listen_host, port=config.listen_port, reload=config.reload_debug,
- log_level="warning")
+if __name__ == "__main__":
+ logger.info(
+ "start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs"
+ )
+ uvicorn.run(
+ app="app.asgi:app",
+ host=config.listen_host,
+ port=config.listen_port,
+ reload=config.reload_debug,
+ log_level="warning",
+ )
diff --git a/webui/Main.py b/webui/Main.py
index ea0e89e..d4f52d4 100644
--- a/webui/Main.py
+++ b/webui/Main.py
@@ -1,6 +1,5 @@
-import sys
import os
-import time
+import sys
# Add the root directory of the project to the system path to allow importing modules from the project
root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
@@ -10,31 +9,33 @@ if root_dir not in sys.path:
print(sys.path)
print("")
-import streamlit as st
-
import os
-from uuid import uuid4
import platform
-import streamlit.components.v1 as components
+from uuid import uuid4
+
+import streamlit as st
from loguru import logger
-st.set_page_config(page_title="MoneyPrinterTurbo",
- page_icon="🤖",
- layout="wide",
- initial_sidebar_state="auto",
- menu_items={
- 'Report a bug': "https://github.com/harry0703/MoneyPrinterTurbo/issues",
- 'About': "# MoneyPrinterTurbo\nSimply provide a topic or keyword for a video, and it will "
- "automatically generate the video copy, video materials, video subtitles, "
- "and video background music before synthesizing a high-definition short "
- "video.\n\nhttps://github.com/harry0703/MoneyPrinterTurbo"
- })
+st.set_page_config(
+ page_title="MoneyPrinterTurbo",
+ page_icon="🤖",
+ layout="wide",
+ initial_sidebar_state="auto",
+ menu_items={
+ "Report a bug": "https://github.com/harry0703/MoneyPrinterTurbo/issues",
+ "About": "# MoneyPrinterTurbo\nSimply provide a topic or keyword for a video, and it will "
+ "automatically generate the video copy, video materials, video subtitles, "
+ "and video background music before synthesizing a high-definition short "
+ "video.\n\nhttps://github.com/harry0703/MoneyPrinterTurbo",
+ },
+)
-from app.models.schema import VideoParams, VideoAspect, VideoConcatMode, MaterialInfo
-from app.services import task as tm, llm, voice
-from app.utils import utils
from app.config import config
-from app.models.const import FILE_TYPE_VIDEOS, FILE_TYPE_IMAGES
+from app.models.const import FILE_TYPE_IMAGES, FILE_TYPE_VIDEOS
+from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode, VideoParams
+from app.services import llm, voice
+from app.services import task as tm
+from app.utils import utils
hide_streamlit_style = """
@@ -42,7 +43,16 @@ hide_streamlit_style = """
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
st.title(f"MoneyPrinterTurbo v{config.project_version}")
-support_locales = ["zh-CN", "zh-HK", "zh-TW", "de-DE", "en-US", "fr-FR", "vi-VN", "th-TH"]
+support_locales = [
+ "zh-CN",
+ "zh-HK",
+ "zh-TW",
+ "de-DE",
+ "en-US",
+ "fr-FR",
+ "vi-VN",
+ "th-TH",
+]
font_dir = os.path.join(root_dir, "resource", "fonts")
song_dir = os.path.join(root_dir, "resource", "songs")
@@ -51,14 +61,14 @@ config_file = os.path.join(root_dir, "webui", ".streamlit", "webui.toml")
system_locale = utils.get_system_locale()
# print(f"******** system locale: {system_locale} ********")
-if 'video_subject' not in st.session_state:
- st.session_state['video_subject'] = ''
-if 'video_script' not in st.session_state:
- st.session_state['video_script'] = ''
-if 'video_terms' not in st.session_state:
- st.session_state['video_terms'] = ''
-if 'ui_language' not in st.session_state:
- st.session_state['ui_language'] = config.ui.get("language", system_locale)
+if "video_subject" not in st.session_state:
+ st.session_state["video_subject"] = ""
+if "video_script" not in st.session_state:
+ st.session_state["video_script"] = ""
+if "video_terms" not in st.session_state:
+ st.session_state["video_terms"] = ""
+if "ui_language" not in st.session_state:
+ st.session_state["ui_language"] = config.ui.get("language", system_locale)
def get_all_fonts():
@@ -85,25 +95,25 @@ def open_task_folder(task_id):
sys = platform.system()
path = os.path.join(root_dir, "storage", "tasks", task_id)
if os.path.exists(path):
- if sys == 'Windows':
+ if sys == "Windows":
os.system(f"start {path}")
- if sys == 'Darwin':
+ if sys == "Darwin":
os.system(f"open {path}")
except Exception as e:
logger.error(e)
def scroll_to_bottom():
- js = f"""
+ js = """
"""
@@ -123,12 +133,15 @@ def init_log():
record["file"].path = f"./{relative_path}"
# 返回修改后的格式字符串
# 您可以根据需要调整这里的格式
- record['message'] = record['message'].replace(root_dir, ".")
+ record["message"] = record["message"].replace(root_dir, ".")
- _format = '{time:%Y-%m-%d %H:%M:%S}> | ' + \
- '{level}> | ' + \
- '"{file.path}:{line}": {function}> ' + \
- '- {message}>' + "\n"
+ _format = (
+ "{time:%Y-%m-%d %H:%M:%S}> | "
+ + "{level}> | "
+ + '"{file.path}:{line}": {function}> '
+ + "- {message}>"
+ + "\n"
+ )
return _format
logger.add(
@@ -145,7 +158,7 @@ locales = utils.load_locales(i18n_dir)
def tr(key):
- loc = locales.get(st.session_state['ui_language'], {})
+ loc = locales.get(st.session_state["ui_language"], {})
return loc.get("Translation", {}).get(key, key)
@@ -164,19 +177,22 @@ if not config.app.get("hide_config", False):
selected_index = 0
for i, code in enumerate(locales.keys()):
display_languages.append(f"{code} - {locales[code].get('Language')}")
- if code == st.session_state['ui_language']:
+ if code == st.session_state["ui_language"]:
selected_index = i
- selected_language = st.selectbox(tr("Language"), options=display_languages,
- index=selected_index)
+ selected_language = st.selectbox(
+ tr("Language"), options=display_languages, index=selected_index
+ )
if selected_language:
code = selected_language.split(" - ")[0].strip()
- st.session_state['ui_language'] = code
- config.ui['language'] = code
+ st.session_state["ui_language"] = code
+ config.ui["language"] = code
# 是否禁用日志显示
- hide_log = st.checkbox(tr("Hide Log"), value=config.app.get("hide_log", False))
- config.ui['hide_log'] = hide_log
+ hide_log = st.checkbox(
+ tr("Hide Log"), value=config.app.get("hide_log", False)
+ )
+ config.ui["hide_log"] = hide_log
with middle_config_panel:
# openai
@@ -187,8 +203,19 @@ if not config.app.get("hide_config", False):
# qwen (通义千问)
# gemini
# ollama
- llm_providers = ['OpenAI', 'Moonshot', 'Azure', 'Qwen', 'DeepSeek', 'Gemini', 'Ollama', 'G4f', 'OneAPI',
- "Cloudflare", "ERNIE"]
+ llm_providers = [
+ "OpenAI",
+ "Moonshot",
+ "Azure",
+ "Qwen",
+ "DeepSeek",
+ "Gemini",
+ "Ollama",
+ "G4f",
+ "OneAPI",
+ "Cloudflare",
+ "ERNIE",
+ ]
saved_llm_provider = config.app.get("llm_provider", "OpenAI").lower()
saved_llm_provider_index = 0
for i, provider in enumerate(llm_providers):
@@ -196,19 +223,25 @@ if not config.app.get("hide_config", False):
saved_llm_provider_index = i
break
- llm_provider = st.selectbox(tr("LLM Provider"), options=llm_providers, index=saved_llm_provider_index)
+ llm_provider = st.selectbox(
+ tr("LLM Provider"),
+ options=llm_providers,
+ index=saved_llm_provider_index,
+ )
llm_helper = st.container()
llm_provider = llm_provider.lower()
config.app["llm_provider"] = llm_provider
llm_api_key = config.app.get(f"{llm_provider}_api_key", "")
- llm_secret_key = config.app.get(f"{llm_provider}_secret_key", "") # only for baidu ernie
+ llm_secret_key = config.app.get(
+ f"{llm_provider}_secret_key", ""
+ ) # only for baidu ernie
llm_base_url = config.app.get(f"{llm_provider}_base_url", "")
llm_model_name = config.app.get(f"{llm_provider}_model_name", "")
llm_account_id = config.app.get(f"{llm_provider}_account_id", "")
tips = ""
- if llm_provider == 'ollama':
+ if llm_provider == "ollama":
if not llm_model_name:
llm_model_name = "qwen:7b"
if not llm_base_url:
@@ -224,7 +257,7 @@ if not config.app.get("hide_config", False):
- **Model Name**: 使用 `ollama list` 查看,比如 `qwen:7b`
"""
- if llm_provider == 'openai':
+ if llm_provider == "openai":
if not llm_model_name:
llm_model_name = "gpt-3.5-turbo"
with llm_helper:
@@ -236,7 +269,7 @@ if not config.app.get("hide_config", False):
- **Model Name**: 填写**有权限**的模型,[点击查看模型列表](https://platform.openai.com/settings/organization/limits)
"""
- if llm_provider == 'moonshot':
+ if llm_provider == "moonshot":
if not llm_model_name:
llm_model_name = "moonshot-v1-8k"
with llm_helper:
@@ -246,9 +279,11 @@ if not config.app.get("hide_config", False):
- **Base Url**: 固定为 https://api.moonshot.cn/v1
- **Model Name**: 比如 moonshot-v1-8k,[点击查看模型列表](https://platform.moonshot.cn/docs/intro#%E6%A8%A1%E5%9E%8B%E5%88%97%E8%A1%A8)
"""
- if llm_provider == 'oneapi':
+ if llm_provider == "oneapi":
if not llm_model_name:
- llm_model_name = "claude-3-5-sonnet-20240620" # 默认模型,可以根据需要调整
+ llm_model_name = (
+ "claude-3-5-sonnet-20240620" # 默认模型,可以根据需要调整
+ )
with llm_helper:
tips = """
##### OneAPI 配置说明
@@ -256,8 +291,8 @@ if not config.app.get("hide_config", False):
- **Base Url**: 填写 OneAPI 的基础 URL
- **Model Name**: 填写您要使用的模型名称,例如 claude-3-5-sonnet-20240620
"""
-
- if llm_provider == 'qwen':
+
+ if llm_provider == "qwen":
if not llm_model_name:
llm_model_name = "qwen-max"
with llm_helper:
@@ -268,7 +303,7 @@ if not config.app.get("hide_config", False):
- **Model Name**: 比如 qwen-max,[点击查看模型列表](https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction#3ef6d0bcf91wy)
"""
- if llm_provider == 'g4f':
+ if llm_provider == "g4f":
if not llm_model_name:
llm_model_name = "gpt-3.5-turbo"
with llm_helper:
@@ -279,7 +314,7 @@ if not config.app.get("hide_config", False):
- **Base Url**: 留空
- **Model Name**: 比如 gpt-3.5-turbo,[点击查看模型列表](https://github.com/xtekky/gpt4free/blob/main/g4f/models.py#L308)
"""
- if llm_provider == 'azure':
+ if llm_provider == "azure":
with llm_helper:
tips = """
##### Azure 配置说明
@@ -289,7 +324,7 @@ if not config.app.get("hide_config", False):
- **Model Name**: 填写你实际的部署名
"""
- if llm_provider == 'gemini':
+ if llm_provider == "gemini":
if not llm_model_name:
llm_model_name = "gemini-1.0-pro"
@@ -302,7 +337,7 @@ if not config.app.get("hide_config", False):
- **Model Name**: 比如 gemini-1.0-pro
"""
- if llm_provider == 'deepseek':
+ if llm_provider == "deepseek":
if not llm_model_name:
llm_model_name = "deepseek-chat"
if not llm_base_url:
@@ -315,7 +350,7 @@ if not config.app.get("hide_config", False):
- **Model Name**: 固定为 deepseek-chat
"""
- if llm_provider == 'ernie':
+ if llm_provider == "ernie":
with llm_helper:
tips = """
##### 百度文心一言 配置说明
@@ -324,16 +359,23 @@ if not config.app.get("hide_config", False):
- **Base Url**: 填写 **请求地址** [点击查看文档](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/jlil56u11#%E8%AF%B7%E6%B1%82%E8%AF%B4%E6%98%8E)
"""
- if tips and config.ui['language'] == 'zh':
+ if tips and config.ui["language"] == "zh":
st.warning(
- "中国用户建议使用 **DeepSeek** 或 **Moonshot** 作为大模型提供商\n- 国内可直接访问,不需要VPN \n- 注册就送额度,基本够用")
+ "中国用户建议使用 **DeepSeek** 或 **Moonshot** 作为大模型提供商\n- 国内可直接访问,不需要VPN \n- 注册就送额度,基本够用"
+ )
st.info(tips)
- st_llm_api_key = st.text_input(tr("API Key"), value=llm_api_key, type="password")
+ st_llm_api_key = st.text_input(
+ tr("API Key"), value=llm_api_key, type="password"
+ )
st_llm_base_url = st.text_input(tr("Base Url"), value=llm_base_url)
st_llm_model_name = ""
- if llm_provider != 'ernie':
- st_llm_model_name = st.text_input(tr("Model Name"), value=llm_model_name, key=f"{llm_provider}_model_name_input")
+ if llm_provider != "ernie":
+ st_llm_model_name = st.text_input(
+ tr("Model Name"),
+ value=llm_model_name,
+ key=f"{llm_provider}_model_name_input",
+ )
if st_llm_model_name:
config.app[f"{llm_provider}_model_name"] = st_llm_model_name
else:
@@ -345,16 +387,21 @@ if not config.app.get("hide_config", False):
config.app[f"{llm_provider}_base_url"] = st_llm_base_url
if st_llm_model_name:
config.app[f"{llm_provider}_model_name"] = st_llm_model_name
- if llm_provider == 'ernie':
- st_llm_secret_key = st.text_input(tr("Secret Key"), value=llm_secret_key, type="password")
+ if llm_provider == "ernie":
+ st_llm_secret_key = st.text_input(
+ tr("Secret Key"), value=llm_secret_key, type="password"
+ )
config.app[f"{llm_provider}_secret_key"] = st_llm_secret_key
- if llm_provider == 'cloudflare':
- st_llm_account_id = st.text_input(tr("Account ID"), value=llm_account_id)
+ if llm_provider == "cloudflare":
+ st_llm_account_id = st.text_input(
+ tr("Account ID"), value=llm_account_id
+ )
if st_llm_account_id:
config.app[f"{llm_provider}_account_id"] = st_llm_account_id
with right_config_panel:
+
def get_keys_from_config(cfg_key):
api_keys = config.app.get(cfg_key, [])
if isinstance(api_keys, str):
@@ -362,19 +409,21 @@ if not config.app.get("hide_config", False):
api_key = ", ".join(api_keys)
return api_key
-
def save_keys_to_config(cfg_key, value):
value = value.replace(" ", "")
if value:
config.app[cfg_key] = value.split(",")
-
pexels_api_key = get_keys_from_config("pexels_api_keys")
- pexels_api_key = st.text_input(tr("Pexels API Key"), value=pexels_api_key, type="password")
+ pexels_api_key = st.text_input(
+ tr("Pexels API Key"), value=pexels_api_key, type="password"
+ )
save_keys_to_config("pexels_api_keys", pexels_api_key)
pixabay_api_key = get_keys_from_config("pixabay_api_keys")
- pixabay_api_key = st.text_input(tr("Pixabay API Key"), value=pixabay_api_key, type="password")
+ pixabay_api_key = st.text_input(
+ tr("Pixabay API Key"), value=pixabay_api_key, type="password"
+ )
save_keys_to_config("pixabay_api_keys", pixabay_api_key)
panel = st.columns(3)
@@ -388,8 +437,9 @@ uploaded_files = []
with left_panel:
with st.container(border=True):
st.write(tr("Video Script Settings"))
- params.video_subject = st.text_input(tr("Video Subject"),
- value=st.session_state['video_subject']).strip()
+ params.video_subject = st.text_input(
+ tr("Video Subject"), value=st.session_state["video_subject"]
+ ).strip()
video_languages = [
(tr("Auto Detect"), ""),
@@ -397,24 +447,27 @@ with left_panel:
for code in support_locales:
video_languages.append((code, code))
- selected_index = st.selectbox(tr("Script Language"),
- index=0,
- options=range(len(video_languages)), # 使用索引作为内部选项值
- format_func=lambda x: video_languages[x][0] # 显示给用户的是标签
- )
+ selected_index = st.selectbox(
+ tr("Script Language"),
+ index=0,
+ options=range(len(video_languages)), # 使用索引作为内部选项值
+ format_func=lambda x: video_languages[x][0], # 显示给用户的是标签
+ )
params.video_language = video_languages[selected_index][1]
- if st.button(tr("Generate Video Script and Keywords"), key="auto_generate_script"):
+ if st.button(
+ tr("Generate Video Script and Keywords"), key="auto_generate_script"
+ ):
with st.spinner(tr("Generating Video Script and Keywords")):
- script = llm.generate_script(video_subject=params.video_subject, language=params.video_language)
+ script = llm.generate_script(
+ video_subject=params.video_subject, language=params.video_language
+ )
terms = llm.generate_terms(params.video_subject, script)
- st.session_state['video_script'] = script
- st.session_state['video_terms'] = ", ".join(terms)
+ st.session_state["video_script"] = script
+ st.session_state["video_terms"] = ", ".join(terms)
params.video_script = st.text_area(
- tr("Video Script"),
- value=st.session_state['video_script'],
- height=280
+ tr("Video Script"), value=st.session_state["video_script"], height=280
)
if st.button(tr("Generate Video Keywords"), key="auto_generate_terms"):
if not params.video_script:
@@ -423,12 +476,11 @@ with left_panel:
with st.spinner(tr("Generating Video Keywords")):
terms = llm.generate_terms(params.video_subject, params.video_script)
- st.session_state['video_terms'] = ", ".join(terms)
+ st.session_state["video_terms"] = ", ".join(terms)
params.video_terms = st.text_area(
- tr("Video Keywords"),
- value=st.session_state['video_terms'],
- height=50)
+ tr("Video Keywords"), value=st.session_state["video_terms"], height=50
+ )
with middle_panel:
with st.container(border=True):
@@ -447,73 +499,93 @@ with middle_panel:
]
saved_video_source_name = config.app.get("video_source", "pexels")
- saved_video_source_index = [v[1] for v in video_sources].index(saved_video_source_name)
+ saved_video_source_index = [v[1] for v in video_sources].index(
+ saved_video_source_name
+ )
- selected_index = st.selectbox(tr("Video Source"),
- options=range(len(video_sources)),
- format_func=lambda x: video_sources[x][0],
- index=saved_video_source_index
- )
+ selected_index = st.selectbox(
+ tr("Video Source"),
+ options=range(len(video_sources)),
+ format_func=lambda x: video_sources[x][0],
+ index=saved_video_source_index,
+ )
params.video_source = video_sources[selected_index][1]
config.app["video_source"] = params.video_source
- if params.video_source == 'local':
+ if params.video_source == "local":
_supported_types = FILE_TYPE_VIDEOS + FILE_TYPE_IMAGES
- uploaded_files = st.file_uploader("Upload Local Files",
- type=["mp4", "mov", "avi", "flv", "mkv", "jpg", "jpeg", "png"],
- accept_multiple_files=True)
+ uploaded_files = st.file_uploader(
+ "Upload Local Files",
+ type=["mp4", "mov", "avi", "flv", "mkv", "jpg", "jpeg", "png"],
+ accept_multiple_files=True,
+ )
- selected_index = st.selectbox(tr("Video Concat Mode"),
- index=1,
- options=range(len(video_concat_modes)), # 使用索引作为内部选项值
- format_func=lambda x: video_concat_modes[x][0] # 显示给用户的是标签
- )
- params.video_concat_mode = VideoConcatMode(video_concat_modes[selected_index][1])
+ selected_index = st.selectbox(
+ tr("Video Concat Mode"),
+ index=1,
+ options=range(len(video_concat_modes)), # 使用索引作为内部选项值
+ format_func=lambda x: video_concat_modes[x][0], # 显示给用户的是标签
+ )
+ params.video_concat_mode = VideoConcatMode(
+ video_concat_modes[selected_index][1]
+ )
video_aspect_ratios = [
(tr("Portrait"), VideoAspect.portrait.value),
(tr("Landscape"), VideoAspect.landscape.value),
]
- selected_index = st.selectbox(tr("Video Ratio"),
- options=range(len(video_aspect_ratios)), # 使用索引作为内部选项值
- format_func=lambda x: video_aspect_ratios[x][0] # 显示给用户的是标签
- )
+ selected_index = st.selectbox(
+ tr("Video Ratio"),
+ options=range(len(video_aspect_ratios)), # 使用索引作为内部选项值
+ format_func=lambda x: video_aspect_ratios[x][0], # 显示给用户的是标签
+ )
params.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])
- params.video_clip_duration = st.selectbox(tr("Clip Duration"), options=[2, 3, 4, 5, 6], index=1)
- params.video_count = st.selectbox(tr("Number of Videos Generated Simultaneously"), options=[1, 2, 3, 4, 5],
- index=0)
+ params.video_clip_duration = st.selectbox(
+ tr("Clip Duration"), options=[2, 3, 4, 5, 6], index=1
+ )
+ params.video_count = st.selectbox(
+ tr("Number of Videos Generated Simultaneously"),
+ options=[1, 2, 3, 4, 5],
+ index=0,
+ )
with st.container(border=True):
st.write(tr("Audio Settings"))
# tts_providers = ['edge', 'azure']
# tts_provider = st.selectbox(tr("TTS Provider"), tts_providers)
- voices = voice.get_all_azure_voices(
- filter_locals=support_locales)
+ voices = voice.get_all_azure_voices(filter_locals=support_locales)
friendly_names = {
- v: v.
- replace("Female", tr("Female")).
- replace("Male", tr("Male")).
- replace("Neural", "") for
- v in voices}
+ v: v.replace("Female", tr("Female"))
+ .replace("Male", tr("Male"))
+ .replace("Neural", "")
+ for v in voices
+ }
saved_voice_name = config.ui.get("voice_name", "")
saved_voice_name_index = 0
if saved_voice_name in friendly_names:
saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
else:
for i, v in enumerate(voices):
- if v.lower().startswith(st.session_state['ui_language'].lower()) and "V2" not in v:
+ if (
+ v.lower().startswith(st.session_state["ui_language"].lower())
+ and "V2" not in v
+ ):
saved_voice_name_index = i
break
- selected_friendly_name = st.selectbox(tr("Speech Synthesis"),
- options=list(friendly_names.values()),
- index=saved_voice_name_index)
+ selected_friendly_name = st.selectbox(
+ tr("Speech Synthesis"),
+ options=list(friendly_names.values()),
+ index=saved_voice_name_index,
+ )
- voice_name = list(friendly_names.keys())[list(friendly_names.values()).index(selected_friendly_name)]
+ voice_name = list(friendly_names.keys())[
+ list(friendly_names.values()).index(selected_friendly_name)
+ ]
params.voice_name = voice_name
- config.ui['voice_name'] = voice_name
+ config.ui["voice_name"] = voice_name
if st.button(tr("Play Voice")):
play_content = params.video_subject
@@ -524,11 +596,21 @@ with middle_panel:
with st.spinner(tr("Synthesizing Voice")):
temp_dir = utils.storage_dir("temp", create=True)
audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
- sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_rate=params.voice_rate, voice_file=audio_file)
+ sub_maker = voice.tts(
+ text=play_content,
+ voice_name=voice_name,
+ voice_rate=params.voice_rate,
+ voice_file=audio_file,
+ )
# if the voice file generation failed, try again with a default content.
if not sub_maker:
play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content."
- sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_rate=params.voice_rate, voice_file=audio_file)
+ sub_maker = voice.tts(
+ text=play_content,
+ voice_name=voice_name,
+ voice_rate=params.voice_rate,
+ voice_file=audio_file,
+ )
if sub_maker and os.path.exists(audio_file):
st.audio(audio_file, format="audio/mp3")
@@ -536,29 +618,40 @@ with middle_panel:
os.remove(audio_file)
if voice.is_azure_v2_voice(voice_name):
- saved_azure_speech_region = config.azure.get(f"speech_region", "")
- saved_azure_speech_key = config.azure.get(f"speech_key", "")
- azure_speech_region = st.text_input(tr("Speech Region"), value=saved_azure_speech_region)
- azure_speech_key = st.text_input(tr("Speech Key"), value=saved_azure_speech_key, type="password")
+ saved_azure_speech_region = config.azure.get("speech_region", "")
+ saved_azure_speech_key = config.azure.get("speech_key", "")
+ azure_speech_region = st.text_input(
+ tr("Speech Region"), value=saved_azure_speech_region
+ )
+ azure_speech_key = st.text_input(
+ tr("Speech Key"), value=saved_azure_speech_key, type="password"
+ )
config.azure["speech_region"] = azure_speech_region
config.azure["speech_key"] = azure_speech_key
- params.voice_volume = st.selectbox(tr("Speech Volume"),
- options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0], index=2)
-
- params.voice_rate = st.selectbox(tr("Speech Rate"),
- options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], index=2)
-
+ params.voice_volume = st.selectbox(
+ tr("Speech Volume"),
+ options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0],
+ index=2,
+ )
+
+ params.voice_rate = st.selectbox(
+ tr("Speech Rate"),
+ options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
+ index=2,
+ )
+
bgm_options = [
(tr("No Background Music"), ""),
(tr("Random Background Music"), "random"),
(tr("Custom Background Music"), "custom"),
]
- selected_index = st.selectbox(tr("Background Music"),
- index=1,
- options=range(len(bgm_options)), # 使用索引作为内部选项值
- format_func=lambda x: bgm_options[x][0] # 显示给用户的是标签
- )
+ selected_index = st.selectbox(
+ tr("Background Music"),
+ index=1,
+ options=range(len(bgm_options)), # 使用索引作为内部选项值
+ format_func=lambda x: bgm_options[x][0], # 显示给用户的是标签
+ )
# 获取选择的背景音乐类型
params.bgm_type = bgm_options[selected_index][1]
@@ -568,8 +661,11 @@ with middle_panel:
if custom_bgm_file and os.path.exists(custom_bgm_file):
params.bgm_file = custom_bgm_file
# st.write(f":red[已选择自定义背景音乐]:**{custom_bgm_file}**")
- params.bgm_volume = st.selectbox(tr("Background Music Volume"),
- options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], index=2)
+ params.bgm_volume = st.selectbox(
+ tr("Background Music Volume"),
+ options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
+ index=2,
+ )
with right_panel:
with st.container(border=True):
@@ -580,31 +676,48 @@ with right_panel:
saved_font_name_index = 0
if saved_font_name in font_names:
saved_font_name_index = font_names.index(saved_font_name)
- params.font_name = st.selectbox(tr("Font"), font_names, index=saved_font_name_index)
- config.ui['font_name'] = params.font_name
+ params.font_name = st.selectbox(
+ tr("Font"), font_names, index=saved_font_name_index
+ )
+ config.ui["font_name"] = params.font_name
subtitle_positions = [
(tr("Top"), "top"),
(tr("Center"), "center"),
(tr("Bottom"), "bottom"),
+ (tr("Custom"), "custom"),
]
- selected_index = st.selectbox(tr("Position"),
- index=2,
- options=range(len(subtitle_positions)), # 使用索引作为内部选项值
- format_func=lambda x: subtitle_positions[x][0] # 显示给用户的是标签
- )
+ selected_index = st.selectbox(
+ tr("Position"),
+ index=2,
+ options=range(len(subtitle_positions)),
+ format_func=lambda x: subtitle_positions[x][0],
+ )
params.subtitle_position = subtitle_positions[selected_index][1]
+ if params.subtitle_position == "custom":
+ custom_position = st.text_input(
+ tr("Custom Position (% from top)"), value="50"
+ )
+ try:
+ params.custom_position = float(custom_position)
+ if params.custom_position < 0 or params.custom_position > 100:
+ st.error(tr("Please enter a value between 0 and 100"))
+ except ValueError:
+ st.error(tr("Please enter a valid number"))
+
font_cols = st.columns([0.3, 0.7])
with font_cols[0]:
saved_text_fore_color = config.ui.get("text_fore_color", "#FFFFFF")
- params.text_fore_color = st.color_picker(tr("Font Color"), saved_text_fore_color)
- config.ui['text_fore_color'] = params.text_fore_color
+ params.text_fore_color = st.color_picker(
+ tr("Font Color"), saved_text_fore_color
+ )
+ config.ui["text_fore_color"] = params.text_fore_color
with font_cols[1]:
saved_font_size = config.ui.get("font_size", 60)
params.font_size = st.slider(tr("Font Size"), 30, 100, saved_font_size)
- config.ui['font_size'] = params.font_size
+ config.ui["font_size"] = params.font_size
stroke_cols = st.columns([0.3, 0.7])
with stroke_cols[0]:
@@ -621,7 +734,7 @@ if start_button:
scroll_to_bottom()
st.stop()
- if llm_provider != 'g4f' and not config.app.get(f"{llm_provider}_api_key", ""):
+ if llm_provider != "g4f" and not config.app.get(f"{llm_provider}_api_key", ""):
st.error(tr("Please Enter the LLM API Key"))
scroll_to_bottom()
st.stop()
@@ -657,15 +770,13 @@ if start_button:
log_container = st.empty()
log_records = []
-
def log_received(msg):
- if config.ui['hide_log']:
+ if config.ui["hide_log"]:
return
with log_container:
log_records.append(msg)
st.code("\n".join(log_records))
-
logger.add(log_received)
st.toast(tr("Generating Video"))
@@ -687,7 +798,7 @@ if start_button:
player_cols = st.columns(len(video_files) * 2 + 1)
for i, url in enumerate(video_files):
player_cols[i * 2 + 1].video(url)
- except Exception as e:
+ except Exception:
pass
open_task_folder(task_id)
diff --git a/webui/i18n/de.json b/webui/i18n/de.json
index 42a26c3..506ff36 100644
--- a/webui/i18n/de.json
+++ b/webui/i18n/de.json
@@ -42,6 +42,7 @@
"Top": "Oben",
"Center": "Mittig",
"Bottom": "Unten (empfohlen)",
+ "Custom": "Benutzerdefinierte Position (70, was 70% von oben bedeutet)",
"Font Size": "Schriftgröße für Untertitel",
"Font Color": "Schriftfarbe",
"Stroke Color": "Kontur",
diff --git a/webui/i18n/en.json b/webui/i18n/en.json
index 11d4831..37fadc7 100644
--- a/webui/i18n/en.json
+++ b/webui/i18n/en.json
@@ -42,6 +42,7 @@
"Top": "Top",
"Center": "Center",
"Bottom": "Bottom (Recommended)",
+ "Custom": "Custom position (70, indicating 70% down from the top)",
"Font Size": "Subtitle Font Size",
"Font Color": "Subtitle Font Color",
"Stroke Color": "Subtitle Outline Color",
diff --git a/webui/i18n/vi.json b/webui/i18n/vi.json
index 2cd6a98..651706d 100644
--- a/webui/i18n/vi.json
+++ b/webui/i18n/vi.json
@@ -42,6 +42,7 @@
"Top": "Trên",
"Center": "Giữa",
"Bottom": "Dưới (Được Khuyến Nghị)",
+ "Custom": "Vị trí tùy chỉnh (70, chỉ ra là cách đầu trang 70%)",
"Font Size": "Cỡ Chữ Phụ Đề",
"Font Color": "Màu Chữ Phụ Đề",
"Stroke Color": "Màu Viền Phụ Đề",
diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json
index 019e7c1..bf1a6b7 100644
--- a/webui/i18n/zh.json
+++ b/webui/i18n/zh.json
@@ -42,6 +42,7 @@
"Top": "顶部",
"Center": "中间",
"Bottom": "底部(推荐)",
+ "Custom": "自定义位置(70,表示离顶部70%的位置)",
"Font Size": "字幕大小",
"Font Color": "字幕颜色",
"Stroke Color": "描边颜色",