init

2025-11-25 03:15:04 +08:00 · 2024-03-11 16:37:49 +08:00
parent d4f7b53b84
commit 06df797234
71 changed files with 2725 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/config.toml
+/storage/
+/.idea/
--- a/2
+++ b/2
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,109 @@
+# MoneyPrinterTurbo 💸
+
+本地自动创建短视频，只需要提供一个视频主题或关键词，就可以全自动生成视频文案、视频素材、视频字幕、视频背景音乐，最后生成一个短视频。
+
+## 效果预览 📺
+
+### 竖屏 9:16
+
+#### 视频演示
+
+[▶️ 竖屏 9:16 Demo1 ](docs/demo-portrait-1.mp4)
+
+[▶️ 竖屏 9:16 Demo2 ](docs/demo-portrait-2.mp4)
+
+#### 图片预览
+
+<img src="docs/demo-portrait-1.jpg" width="300">
+<img src="docs/demo-portrait-2.jpg" width="300">
+
+### 横屏 16:9
+
+#### 视频演示
+
+[▶️ 横屏 16:9](docs/demo-landscape.mp4)
+
+#### 图片预览
+
+<img src="docs/demo-landscape.jpg" width="600">
+
+## 安装 📥
+
+建议使用 [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) 创建 python 虚拟环境
+
+```shell
+git clone https://github.com/harry0703/MoneyPrinterTurbo.git
+cd MoneyPrinterTurbo
+conda create -n MoneyPrinterTurbo python=3.10
+conda activate MoneyPrinterTurbo
+pip install -r requirements.txt
+
+cp config.example.toml config.toml
+```
+
+需要先配置 `config.toml` 中的参数
+
+## 使用 🚀
+
+完整的使用演示视频，可以查看：https://v.douyin.com/iFhnwsKY/
+
+请先确认你按照 `config.toml` 文件中的说明，配置好了 `openai_api_key` 和 `pexels_api_keys`。否则项目无法正常运行。
+
+### 启动Web界面
+
+```shell
+sh webui.sh
+```
+
+启动后，会自动打开浏览器，效果如下图：
+![](docs/webui.jpg)
+
+### 启动API服务
+
+```shell
+python main.py
+```
+
+启动后，可以查看 `API文档` http://127.0.0.1:8080/docs
+![](docs/api.jpg)
+
+## 语音合成 🗣
+
+所有支持的声音列表，可以查看：[声音列表](./docs/voice-list.txt)
+
+## 字幕生成 📜
+
+当前支持2种字幕生成方式：
+
+- edge
+- whisper
+
+可以修改 `config.toml` 配置文件中的 `subtitle_provider` 进行切换，如果留空，表示不生成字幕。
+
+## 背景音乐 🎵
+
+用于视频的背景音乐，位于项目的 `resource/songs` 目录下。当前项目里面放了一些默认的音乐，来自于 YouTube 视频，如有侵权，请删除。
+
+## 字幕字体 🅰
+
+用于视频字幕的渲染，位于项目的 `resource/fonts` 目录下，你也可以放进去自己的字体。
+
+## 反馈和建议 📢
+
+- 可以提交 [issue](https://github.com/harry0703/MoneyPrinterTurbo/issues) 或者 [pull request](https://github.com/harry0703/MoneyPrinterTurbo/pulls)。
+- 也可以关注我的抖音号：`@网旭哈瑞.AI`
+    - 我会在上面发布一些 **使用教程** 和 **纯技术** 分享。
+    - 如果有更新和优化，我也会在抖音上面 **及时通知**。
+    - 有问题也可以在抖音上面 **留言**，我会 **尽快回复**。
+
+<img src="docs/douyin.jpg" width="500">
+
+## 感谢 🙏
+
+该项目基于 https://github.com/FujiwaraChoki/MoneyPrinter 重构而来，做了大量的优化，增加了更多的功能。
+感谢原作者的开源精神。
+
+## License 📝
+
+点击查看 [`LICENSE`](LICENSE) 文件
+
--- a/app/init.py
+++ b/app/init.py
--- a/app/asgi.py
+++ b/app/asgi.py
@@ -0,0 +1,60 @@
+"""Application implementation - ASGI."""
+
+from fastapi import FastAPI, Request
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import JSONResponse
+from loguru import logger
+from fastapi.staticfiles import StaticFiles
+
+from app.config import config
+from app.models.exception import HttpException
+from app.router import root_api_router
+from app.utils import utils
+
+
+def exception_handler(request: Request, e: HttpException):
+    return JSONResponse(
+        status_code=e.status_code,
+        content=utils.get_response(e.status_code, e.data, e.message),
+    )
+
+
+def validation_exception_handler(request: Request, e: RequestValidationError):
+    return JSONResponse(
+        status_code=400,
+        content=utils.get_response(status=400, data=e.errors(), message='field required'),
+    )
+
+
+def get_application() -> FastAPI:
+    """Initialize FastAPI application.
+
+    Returns:
+       FastAPI: Application object instance.
+
+    """
+    instance = FastAPI(
+        title=config.project_name,
+        description=config.project_description,
+        version=config.project_version,
+        debug=False,
+    )
+    instance.include_router(root_api_router)
+    instance.add_exception_handler(HttpException, exception_handler)
+    instance.add_exception_handler(RequestValidationError, validation_exception_handler)
+    return instance
+
+
+app = get_application()
+public_dir = utils.public_dir()
+app.mount("/", StaticFiles(directory=public_dir, html=True), name="")
+
+
+@app.on_event("shutdown")
+def shutdown_event():
+    logger.info("shutdown event")
+
+
+@app.on_event("startup")
+def startup_event():
+    logger.info("startup event")
--- a/app/config/init.py
+++ b/app/config/init.py
@@ -0,0 +1,51 @@
+import os
+import sys
+
+from loguru import logger
+
+from app.config import config
+from app.utils import utils
+
+
+def __init_logger():
+    _log_file = utils.storage_dir("logs/server.log")
+    _lvl = config.log_level
+    root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+
+    def format_record(record):
+        # 获取日志记录中的文件全路径
+        file_path = record["file"].path
+        # 将绝对路径转换为相对于项目根目录的路径
+        relative_path = os.path.relpath(file_path, root_dir)
+        # 更新记录中的文件路径
+        record["file"].path = f"./{relative_path}"
+        # 返回修改后的格式字符串
+        # 您可以根据需要调整这里的格式
+        _format = '<green>{time:%Y-%m-%d %H:%M:%S}</> | ' + \
+                  '<level>{level}</> | ' + \
+                  '"{file.path}:{line}":<blue> {function}</> ' + \
+                  '- <level>{message}</>' + "\n"
+        return _format
+
+    logger.remove()
+
+    logger.add(
+        sys.stdout,
+        level=_lvl,
+        format=format_record,
+        colorize=True,
+    )
+
+    logger.add(
+        _log_file,
+        level=_lvl,
+        format=format_record,
+        rotation="00:00",
+        retention="3 days",
+        backtrace=True,
+        diagnose=True,
+        enqueue=True,
+    )
+
+
+__init_logger()
--- a/app/config/config.py
+++ b/app/config/config.py
@@ -0,0 +1,31 @@
+import os
+
+import tomli
+from loguru import logger
+
+root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+config_file = f"{root_dir}/config.toml"
+logger.info(f"load config from file: {config_file}")
+
+with open(config_file, mode="rb") as fp:
+    _cfg = tomli.load(fp)
+
+app = _cfg.get("app", {})
+whisper = _cfg.get("whisper", {})
+
+hostname = os.uname().nodename
+
+log_level = _cfg.get("log_level", "DEBUG")
+listen_host = _cfg.get("listen_host", "0.0.0.0")
+listen_port = _cfg.get("listen_port", 8080)
+project_name = _cfg.get("project_name", "MoneyPrinterTurbo")
+project_description = _cfg.get("project_description", "MoneyPrinterTurbo\n by 抖音-网旭哈瑞.AI")
+project_version = _cfg.get("project_version", "1.0.0")
+reload_debug = False
+
+__cfg = {
+    "hostname": hostname,
+    "listen_host": listen_host,
+    "listen_port": listen_port,
+}
+logger.info(__cfg)
--- a/app/controllers/base.py
+++ b/app/controllers/base.py
@@ -0,0 +1,27 @@
+from uuid import uuid4
+
+from fastapi import Request
+
+from app.config import config
+from app.models.exception import HttpException
+
+
+def get_task_id(request: Request):
+    task_id = request.headers.get('x-task-id')
+    if not task_id:
+        task_id = uuid4()
+    return str(task_id)
+
+
+def get_api_key(request: Request):
+    api_key = request.headers.get('x-api-key')
+    return api_key
+
+
+def verify_token(request: Request):
+    token = get_api_key(request)
+    if token != config.app.get("api_key", ""):
+        request_id = get_task_id(request)
+        request_url = request.url
+        user_agent = request.headers.get('user-agent')
+        raise HttpException(task_id=request_id, status_code=401, message=f"invalid token: {request_url}, {user_agent}")
--- a/app/controllers/ping.py
+++ b/app/controllers/ping.py
@@ -0,0 +1,9 @@
+from fastapi import APIRouter
+from fastapi import Request
+
+router = APIRouter()
+
+
+@router.get("/ping", tags=["Health Check"], description="检查服务可用性", response_description="pong")
+def ping(request: Request) -> str:
+    return "pong"
--- a/app/controllers/v1/base.py
+++ b/app/controllers/v1/base.py
@@ -0,0 +1,11 @@
+from fastapi import APIRouter, Depends
+
+
+def new_router(dependencies=None):
+    router = APIRouter()
+    router.tags = ['V1']
+    router.prefix = '/api/v1'
+    # 将认证依赖项应用于所有路由
+    if dependencies:
+        router.dependencies = dependencies
+    return router
--- a/app/controllers/v1/video.py
+++ b/app/controllers/v1/video.py
@@ -0,0 +1,44 @@
+from os import path
+
+from fastapi import Request, Depends, Path
+from loguru import logger
+
+from app.controllers import base
+from app.controllers.v1.base import new_router
+from app.models.exception import HttpException
+from app.models.schema import TaskVideoRequest, TaskQueryResponse, TaskResponse, TaskQueryRequest
+from app.services import task as tm
+from app.utils import utils
+
+# 认证依赖项
+# router = new_router(dependencies=[Depends(base.verify_token)])
+router = new_router()
+
+
+@router.post("/videos", response_model=TaskResponse, summary="使用主题来生成短视频")
+async def create_video(request: Request, body: TaskVideoRequest):
+    task_id = utils.get_uuid()
+    request_id = base.get_task_id(request)
+    try:
+        task = {
+            "task_id": task_id,
+            "request_id": request_id,
+        }
+        body_dict = body.dict()
+        task.update(body_dict)
+        result = tm.start(task_id=task_id, params=body)
+        task["result"] = result
+        logger.success(f"video created: {utils.to_json(task)}")
+        return utils.get_response(200, task)
+    except ValueError as e:
+        raise HttpException(task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}")
+
+
+@router.get("/tasks/{task_id}", response_model=TaskQueryResponse, summary="查询任务状态")
+async def get_task(request: Request, task_id: str = Path(..., description="任务ID"),
+                   query: TaskQueryRequest = Depends()):
+    request_id = base.get_task_id(request)
+    data = query.dict()
+    data["task_id"] = task_id
+    raise HttpException(task_id=task_id, status_code=404,
+                        message=f"{request_id}: task not found", data=data)
--- a/app/models/init.py
+++ b/app/models/init.py
--- a/app/models/const.py
+++ b/app/models/const.py
@@ -0,0 +1,4 @@
+punctuations = [
+    "?", ",", ".", "、", ";",
+    "？", "，", "。", "、", "；",
+]
--- a/app/models/exception.py
+++ b/app/models/exception.py
@@ -0,0 +1,26 @@
+import traceback
+from typing import Any
+
+from loguru import logger
+
+
+class HttpException(Exception):
+    def __init__(self, task_id: str, status_code: int, message: str = '', data: Any = None):
+        self.message = message
+        self.status_code = status_code
+        self.data = data
+        # 获取异常堆栈信息
+        tb_str = traceback.format_exc().strip()
+        if not tb_str or tb_str == "NoneType: None":
+            msg = f'HttpException: {status_code}, {task_id}, {message}'
+        else:
+            msg = f'HttpException: {status_code}, {task_id}, {message}\n{tb_str}'
+
+        if status_code == 400:
+            logger.warning(msg)
+        else:
+            logger.error(msg)
+
+
+class FileNotFoundException(Exception):
+    pass
--- a/app/models/schema.py
+++ b/app/models/schema.py
@@ -0,0 +1,119 @@
+from enum import Enum
+from typing import Any, Optional
+
+from pydantic import BaseModel
+import warnings
+
+# 忽略 Pydantic 的特定警告
+warnings.filterwarnings("ignore", category=UserWarning, message="Field name.*shadows an attribute in parent.*")
+
+
+class VideoAspect(str, Enum):
+    landscape = "16:9"
+    portrait = "9:16"
+    square = "1:1"
+
+    def to_resolution(self):
+        if self == VideoAspect.landscape.value:
+            return 1920, 1080
+        elif self == VideoAspect.portrait.value:
+            return 1080, 1920
+        elif self == VideoAspect.square.value:
+            return 1080, 1080
+        return 1080, 1920
+
+
+VoiceNames = [
+    # zh-CN
+    "female-zh-CN-XiaoxiaoNeural",
+    "female-zh-CN-XiaoyiNeural",
+    "female-zh-CN-liaoning-XiaobeiNeural",
+    "female-zh-CN-shaanxi-XiaoniNeural",
+
+    "male-zh-CN-YunjianNeural",
+    "male-zh-CN-YunxiNeural",
+    "male-zh-CN-YunxiaNeural",
+    "male-zh-CN-YunyangNeural",
+
+    # "female-zh-HK-HiuGaaiNeural",
+    # "female-zh-HK-HiuMaanNeural",
+    # "male-zh-HK-WanLungNeural",
+    #
+    # "female-zh-TW-HsiaoChenNeural",
+    # "female-zh-TW-HsiaoYuNeural",
+    # "male-zh-TW-YunJheNeural",
+
+    # en-US
+
+    "female-en-US-AnaNeural",
+    "female-en-US-AriaNeural",
+    "female-en-US-AvaNeural",
+    "female-en-US-EmmaNeural",
+    "female-en-US-JennyNeural",
+    "female-en-US-MichelleNeural",
+
+    "male-en-US-AndrewNeural",
+    "male-en-US-BrianNeural",
+    "male-en-US-ChristopherNeural",
+    "male-en-US-EricNeural",
+    "male-en-US-GuyNeural",
+    "male-en-US-RogerNeural",
+    "male-en-US-SteffanNeural",
+]
+
+
+class VideoParams:
+    """
+    {
+      "video_subject": "",
+      "video_aspect": "横屏 16:9（西瓜视频）",
+      "voice_name": "女生-晓晓",
+      "bgm_name": "random",
+      "font_name": "STHeitiMedium 黑体-中",
+      "text_color": "#FFFFFF",
+      "font_size": 60,
+      "stroke_color": "#000000",
+      "stroke_width": 1.5
+    }
+    """
+    video_subject: str
+    video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
+    voice_name: Optional[str] = VoiceNames[0]
+    bgm_name: Optional[str] = "random"
+    font_name: Optional[str] = "STHeitiMedium.ttc"
+    text_fore_color: Optional[str] = "#FFFFFF"
+    font_size: int = 60
+    stroke_color: Optional[str] = "#000000"
+    stroke_width: float = 1.5
+    n_threads: Optional[int] = 2
+    paragraph_number: Optional[int] = 1
+
+
+class BaseResponse(BaseModel):
+    status: int = 200
+    message: Optional[str] = 'success'
+    data: Any = None
+
+
+class TaskVideoRequest(VideoParams, BaseModel):
+    pass
+
+
+class TaskQueryRequest(BaseModel):
+    pass
+
+
+######################################################################################################
+######################################################################################################
+######################################################################################################
+######################################################################################################
+class TaskResponse(BaseResponse):
+    class TaskResponseData(BaseModel):
+        task_id: str
+        task_type: str = ""
+
+    data: TaskResponseData
+
+
+class TaskQueryResponse(BaseResponse):
+    pass
--- a/app/router.py
+++ b/app/router.py
@@ -0,0 +1,15 @@
+"""Application configuration - root APIRouter.
+
+Defines all FastAPI application endpoints.
+
+Resources:
+    1. https://fastapi.tiangolo.com/tutorial/bigger-applications
+
+"""
+from fastapi import APIRouter
+
+from app.controllers.v1 import video
+
+root_api_router = APIRouter()
+# v1
+root_api_router.include_router(video.router)
--- a/app/services/init.py
+++ b/app/services/init.py
--- a/app/services/gpt.py
+++ b/app/services/gpt.py
@@ -0,0 +1,152 @@
+import logging
+import re
+import json
+import openai
+from typing import List
+from loguru import logger
+
+from app.config import config
+
+openai_api_key = config.app.get("openai_api_key")
+if not openai_api_key:
+    raise ValueError("openai_api_key is not set, please set it in the config.toml file.")
+
+openai_model_name = config.app.get("openai_model_name")
+if not openai_model_name:
+    raise ValueError("openai_model_name is not set, please set it in the config.toml file.")
+
+openai_base_url = config.app.get("openai_base_url")
+
+openai.api_key = openai_api_key
+openai_model_name = openai_model_name
+if openai_base_url:
+    openai.base_url = openai_base_url
+
+
+def _generate_response(prompt: str) -> str:
+    model_name = openai_model_name
+
+    response = openai.chat.completions.create(
+        model=model_name,
+        messages=[{"role": "user", "content": prompt}],
+    ).choices[0].message.content
+    return response
+
+
+def generate_script(video_subject: str, language: str = "zh-CN", paragraph_number: int = 1) -> str:
+    prompt = f"""
+# Role: Video Script Generator
+
+## Goals:
+Generate a script for a video, depending on the subject of the video.
+
+## Constrains:
+1. the script is to be returned as a string with the specified number of paragraphs.
+2. do not under any circumstance reference this prompt in your response.
+3. get straight to the point, don't start with unnecessary things like, "welcome to this video".
+4. you must not include any type of markdown or formatting in the script, never use a title. 
+5. only return the raw content of the script. 
+6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line. 
+7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script.
+
+## Output Example:
+What is the meaning of life. This question has puzzled philosophers.
+
+# Initialization:
+- video subject: {video_subject}
+- output language: {language}
+- number of paragraphs: {paragraph_number}
+""".strip()
+
+    final_script = ""
+    logger.info(f"subject: {video_subject}")
+    logger.debug(f"prompt: \n{prompt}")
+    response = _generate_response(prompt=prompt)
+
+    # Return the generated script
+    if response:
+        # Clean the script
+        # Remove asterisks, hashes
+        response = response.replace("*", "")
+        response = response.replace("#", "")
+
+        # Remove markdown syntax
+        response = re.sub(r"\[.*\]", "", response)
+        response = re.sub(r"\(.*\)", "", response)
+
+        # Split the script into paragraphs
+        paragraphs = response.split("\n\n")
+
+        # Select the specified number of paragraphs
+        selected_paragraphs = paragraphs[:paragraph_number]
+
+        # Join the selected paragraphs into a single string
+        final_script = "\n\n".join(selected_paragraphs)
+
+        # Print to console the number of paragraphs used
+        # logger.info(f"number of paragraphs used: {len(selected_paragraphs)}")
+    else:
+        logging.error("gpt returned an empty response")
+
+    logger.success(f"completed: \n{final_script}")
+    return final_script
+
+
+def generate_terms(video_subject: str, video_script: str, amount: int = 5) -> List[str]:
+    prompt = f"""
+# Role: Video Search Terms Generator
+
+## Goals:
+Generate {amount} search terms for stock videos, depending on the subject of a video.
+
+## Constrains:
+1. the search terms are to be returned as a json-array of strings.
+2. each search term should consist of 1-3 words, always add the main subject of the video.
+3. you must only return the json-array of strings. you must not return anything else. you must not return the script.
+4. the search terms must be related to the subject of the video.
+5. reply with english search terms only.
+
+## Output Example:
+["search term 1", "search term 2", "search term 3","search term 4","search term 5"]
+
+## Context:
+### Video Subject
+{video_subject}
+
+### Video Script
+{video_script}
+""".strip()
+
+    logger.info(f"subject: {video_subject}")
+    logger.debug(f"prompt: \n{prompt}")
+    response = _generate_response(prompt)
+    search_terms = []
+
+    try:
+        search_terms = json.loads(response)
+        if not isinstance(search_terms, list) or not all(isinstance(term, str) for term in search_terms):
+            raise ValueError("response is not a list of strings.")
+
+    except (json.JSONDecodeError, ValueError):
+        # logger.warning(f"gpt returned an unformatted response. attempting to clean...")
+        # Attempt to extract list-like string and convert to list
+        match = re.search(r'\["(?:[^"\\]|\\.)*"(?:,\s*"[^"\\]*")*\]', response)
+        if match:
+            try:
+                search_terms = json.loads(match.group())
+            except json.JSONDecodeError:
+                logger.error(f"could not parse response: {response}")
+                return []
+
+    logger.success(f"completed: \n{search_terms}")
+    return search_terms
+
+
+if __name__ == "__main__":
+    video_subject = "生命的意义是什么"
+    script = generate_script(video_subject=video_subject, language="zh-CN", paragraph_number=1)
+    # print("######################")
+    # print(script)
+    search_terms = generate_terms(video_subject=video_subject, video_script=script, amount=5)
+    # print("######################")
+    # print(search_terms)
--- a/app/services/material.py
+++ b/app/services/material.py
@@ -0,0 +1,112 @@
+import time
+
+import requests
+from typing import List
+from loguru import logger
+
+from app.config import config
+from app.models.schema import VideoAspect
+from app.utils import utils
+
+requested_count = 0
+pexels_api_keys = config.app.get("pexels_api_keys")
+if not pexels_api_keys:
+    raise ValueError("pexels_api_keys is not set, please set it in the config.toml file.")
+
+
+def round_robin_api_key():
+    global requested_count
+    requested_count += 1
+    return pexels_api_keys[requested_count % len(pexels_api_keys)]
+
+
+def search_videos(search_term: str,
+                  wanted_count: int,
+                  minimum_duration: int,
+                  video_aspect: VideoAspect = VideoAspect.portrait,
+                  locale: str = "zh-CN"
+                  ) -> List[str]:
+    aspect = VideoAspect(video_aspect)
+    video_orientation = aspect.name
+    video_width, video_height = aspect.to_resolution()
+
+    headers = {
+        "Authorization": round_robin_api_key()
+    }
+
+    # Build URL
+    query_url = f"https://api.pexels.com/videos/search?query={search_term}&per_page=15&orientation={video_orientation}&locale={locale}"
+    logger.info(f"searching videos: {query_url}")
+    # Send the request
+    r = requests.get(query_url, headers=headers)
+
+    # Parse the response
+    response = r.json()
+    video_urls = []
+
+    try:
+        videos_count = min(len(response["videos"]), wanted_count)
+        # loop through each video in the result
+        for i in range(videos_count):
+            # check if video has desired minimum duration
+            if response["videos"][i]["duration"] < minimum_duration:
+                continue
+            video_files = response["videos"][i]["video_files"]
+            # loop through each url to determine the best quality
+            for video in video_files:
+                # Check if video has a valid download link
+                # if ".com/external" in video["link"]:
+                w = int(video["width"])
+                h = int(video["height"])
+                if w == video_width and h == video_height:
+                    video_urls.append(video["link"])
+                    break
+
+    except Exception as e:
+        logger.error(f"search videos failed: {e}")
+
+    return video_urls
+
+
+def save_video(video_url: str, save_dir: str) -> str:
+    video_id = f"vid-{str(int(time.time() * 1000))}"
+    video_path = f"{save_dir}/{video_id}.mp4"
+    with open(video_path, "wb") as f:
+        f.write(requests.get(video_url).content)
+
+    return video_path
+
+
+def download_videos(task_id: str,
+                    search_terms: List[str],
+                    video_aspect: VideoAspect = VideoAspect.portrait,
+                    wanted_count: int = 15,
+                    minimum_duration: int = 5
+                    ) -> List[str]:
+    valid_video_urls = []
+    for search_term in search_terms:
+        # logger.info(f"searching videos for '{search_term}'")
+        video_urls = search_videos(search_term=search_term,
+                                   wanted_count=wanted_count,
+                                   minimum_duration=minimum_duration,
+                                   video_aspect=video_aspect)
+        logger.info(f"found {len(video_urls)} videos for '{search_term}'")
+        i = 0
+        for url in video_urls:
+            if url not in valid_video_urls:
+                valid_video_urls.append(url)
+                i += 1
+                if i >= 3:
+                    break
+
+    logger.info(f"downloading videos: {len(valid_video_urls)}")
+    video_paths = []
+    save_dir = utils.task_dir(task_id)
+    for video_url in valid_video_urls:
+        try:
+            saved_video_path = save_video(video_url, save_dir)
+            video_paths.append(saved_video_path)
+        except Exception as e:
+            logger.error(f"failed to download video: {video_url}, {e}")
+    logger.success(f"downloaded {len(video_paths)} videos")
+    return video_paths
--- a/app/services/subtitle.py
+++ b/app/services/subtitle.py
@@ -0,0 +1,167 @@
+import json
+import re
+
+from faster_whisper import WhisperModel
+from timeit import default_timer as timer
+from loguru import logger
+
+from app.config import config
+from app.models import const
+from app.utils import utils
+
+model_size = config.whisper.get("model_size", "large-v3")
+device = config.whisper.get("device", "cpu")
+compute_type = config.whisper.get("compute_type", "int8")
+
+model = WhisperModel(model_size_or_path=model_size, device=device, compute_type=compute_type)
+
+
+def create(audio_file, subtitle_file: str = ""):
+    logger.info(f"start, output file: {subtitle_file}")
+    if not subtitle_file:
+        subtitle_file = f"{audio_file}.srt"
+
+    segments, info = model.transcribe(
+        audio_file,
+        beam_size=5,
+        word_timestamps=True,
+        vad_filter=True,
+        vad_parameters=dict(min_silence_duration_ms=500),
+    )
+
+    logger.info(f"detected language: '{info.language}', probability: {info.language_probability:.2f}")
+
+    start = timer()
+    subtitles = []
+
+    def recognized(seg_text, seg_start, seg_end):
+        seg_text = seg_text.strip()
+        if not seg_text:
+            return
+
+        msg = "[%.2fs -> %.2fs] %s" % (seg_start, seg_end, seg_text)
+        logger.debug(msg)
+
+        subtitles.append({
+            "msg": seg_text,
+            "start_time": seg_start,
+            "end_time": seg_end
+        })
+
+    for segment in segments:
+        words_idx = 0
+        words_len = len(segment.words)
+
+        seg_start = 0
+        seg_end = 0
+        seg_text = ""
+
+        if segment.words:
+            is_segmented = False
+            for word in segment.words:
+                if not is_segmented:
+                    seg_start = word.start
+                    is_segmented = True
+
+                seg_end = word.end
+                # 如果包含标点,则断句
+                seg_text += word.word
+
+                if utils.str_contains_punctuation(word.word):
+                    # remove last char
+                    seg_text = seg_text[:-1]
+                    if not seg_text:
+                        continue
+
+                    recognized(seg_text, seg_start, seg_end)
+
+                    is_segmented = False
+                    seg_text = ""
+
+                if words_idx == 0 and segment.start < word.start:
+                    seg_start = word.start
+                if words_idx == (words_len - 1) and segment.end > word.end:
+                    seg_end = word.end
+                words_idx += 1
+
+        if not seg_text:
+            continue
+
+        recognized(seg_text, seg_start, seg_end)
+
+    end = timer()
+
+    diff = end - start
+    logger.info(f"complete, elapsed: {diff:.2f} s")
+
+    idx = 1
+    lines = []
+    for subtitle in subtitles:
+        text = subtitle.get("msg")
+        if text:
+            lines.append(utils.text_to_srt(idx, text, subtitle.get("start_time"), subtitle.get("end_time")))
+            idx += 1
+
+    sub = "\n".join(lines)
+    with open(subtitle_file, "w") as f:
+        f.write(sub)
+    logger.info(f"subtitle file created: {subtitle_file}")
+
+
+def file_to_subtitles(filename):
+    times_texts = []
+    current_times = None
+    current_text = ""
+    index = 0
+    with open(filename, 'r') as f:
+        for line in f:
+            times = re.findall("([0-9]*:[0-9]*:[0-9]*,[0-9]*)", line)
+            if times:
+                current_times = line
+            elif line.strip() == '' and current_times:
+                index += 1
+                times_texts.append((index, current_times.strip(), current_text.strip()))
+                current_times, current_text = None, ""
+            elif current_times:
+                current_text += line
+    return times_texts
+
+
+def correct(subtitle_file, video_script):
+    subtitle_items = file_to_subtitles(subtitle_file)
+    script_lines = utils.split_string_by_punctuations(video_script)
+
+    corrected = False
+    if len(subtitle_items) == len(script_lines):
+        for i in range(len(script_lines)):
+            script_line = script_lines[i].strip()
+            subtitle_line = subtitle_items[i][2]
+            if script_line != subtitle_line:
+                logger.warning(f"line {i + 1}, script: {script_line}, subtitle: {subtitle_line}")
+                subtitle_items[i] = (subtitle_items[i][0], subtitle_items[i][1], script_line)
+                corrected = True
+
+    if corrected:
+        with open(subtitle_file, "w") as fd:
+            for item in subtitle_items:
+                fd.write(f"{item[0]}\n{item[1]}\n{item[2]}\n\n")
+        logger.info(f"subtitle corrected")
+    else:
+        logger.success(f"subtitle is correct")
+
+
+if __name__ == "__main__":
+    task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
+    task_dir = utils.task_dir(task_id)
+    subtitle_file = f"{task_dir}/subtitle.srt"
+
+    subtitles = file_to_subtitles(subtitle_file)
+    print(subtitles)
+
+    script_file = f"{task_dir}/script.json"
+    with open(script_file, "r") as f:
+        script_content = f.read()
+    s = json.loads(script_content)
+    script = s.get("script")
+
+    correct(subtitle_file, script)
--- a/app/services/task.py
+++ b/app/services/task.py
@@ -0,0 +1,113 @@
+from os import path
+
+from loguru import logger
+
+from app.config import config
+from app.models.schema import VideoParams, VoiceNames
+from app.services import gpt, material, voice, video, subtitle
+from app.utils import utils
+
+
+def _parse_voice(name: str):
+    # "female-zh-CN-XiaoxiaoNeural",
+    # remove first part split by "-"
+    if name not in VoiceNames:
+        name = VoiceNames[0]
+
+    parts = name.split("-")
+    _lang = f"{parts[1]}-{parts[2]}"
+    _voice = f"{_lang}-{parts[3]}"
+
+    return _voice, _lang
+
+
+def start(task_id, params: VideoParams):
+    """
+    {
+        "video_subject": "",
+        "video_aspect": "横屏 16:9（西瓜视频）",
+        "voice_name": "女生-晓晓",
+        "enable_bgm": false,
+        "font_name": "STHeitiMedium 黑体-中",
+        "text_color": "#FFFFFF",
+        "font_size": 60,
+        "stroke_color": "#000000",
+        "stroke_width": 1.5
+    }
+    """
+    logger.info(f"start task: {task_id}")
+    video_subject = params.video_subject
+    voice_name, language = _parse_voice(params.voice_name)
+    paragraph_number = params.paragraph_number
+    n_threads = params.n_threads
+
+    logger.info("\n\n## generating video script")
+    script = gpt.generate_script(video_subject=video_subject, language=language, paragraph_number=paragraph_number)
+
+    logger.info("\n\n## generating video terms")
+    search_terms = gpt.generate_terms(video_subject=video_subject, video_script=script, amount=5)
+
+    script_file = path.join(utils.task_dir(task_id), f"script.json")
+    script_data = {
+        "script": script,
+        "search_terms": search_terms
+    }
+
+    with open(script_file, "w") as f:
+        f.write(utils.to_json(script_data))
+
+    audio_file = path.join(utils.task_dir(task_id), f"audio.mp3")
+    subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
+
+    logger.info("\n\n## generating audio")
+    sub_maker = voice.tts(text=script, voice_name=voice_name, voice_file=audio_file)
+
+    subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
+    logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
+    if subtitle_provider == "edge":
+        voice.create_subtitle(text=script, sub_maker=sub_maker, subtitle_file=subtitle_path)
+    if subtitle_provider == "whisper":
+        subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
+        logger.info("\n\n## correcting subtitle")
+        subtitle.correct(subtitle_file=subtitle_path, video_script=script)
+
+    logger.info("\n\n## downloading videos")
+    video_paths = material.download_videos(task_id=task_id, search_terms=search_terms, video_aspect=params.video_aspect,
+                                           wanted_count=20,
+                                           minimum_duration=5)
+
+    logger.info("\n\n## combining videos")
+    combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
+    video.combine_videos(combined_video_path=combined_video_path,
+                         video_paths=video_paths,
+                         audio_file=audio_file,
+                         video_aspect=params.video_aspect,
+                         max_clip_duration=5,
+                         threads=n_threads)
+
+    final_video_path = path.join(utils.task_dir(task_id), f"final.mp4")
+
+    bgm_file = video.get_bgm_file(bgm_name=params.bgm_name)
+    logger.info("\n\n## generating video")
+    # Put everything together
+    video.generate_video(video_path=combined_video_path,
+                         audio_path=audio_file,
+                         subtitle_path=subtitle_path,
+                         output_file=final_video_path,
+
+                         video_aspect=params.video_aspect,
+
+                         threads=n_threads,
+
+                         font_name=params.font_name,
+                         fontsize=params.font_size,
+                         text_fore_color=params.text_fore_color,
+                         stroke_color=params.stroke_color,
+                         stroke_width=params.stroke_width,
+
+                         bgm_file=bgm_file
+                         )
+    logger.start(f"task {task_id} finished")
+    return {
+        "video_file": final_video_path,
+    }
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -0,0 +1,246 @@
+import glob
+import random
+from typing import List
+from PIL import ImageFont
+from loguru import logger
+from moviepy.editor import *
+from moviepy.video.fx.crop import crop
+from moviepy.video.tools.subtitles import SubtitlesClip
+
+from app.models.schema import VideoAspect
+from app.utils import utils
+
+
+def get_bgm_file(bgm_name: str = "random"):
+    if not bgm_name:
+        return ""
+    if bgm_name == "random":
+        suffix = "*.mp3"
+        song_dir = utils.song_dir()
+        # 使用glob.glob获取指定扩展名的文件列表
+        files = glob.glob(os.path.join(song_dir, suffix))
+        # 使用random.choice从列表中随机选择一个文件
+        return random.choice(files)
+
+    file = os.path.join(utils.song_dir(), bgm_name)
+    if os.path.exists(file):
+        return file
+    return ""
+
+
+def combine_videos(combined_video_path: str,
+                   video_paths: List[str],
+                   audio_file: str,
+                   video_aspect: VideoAspect = VideoAspect.portrait,
+                   max_clip_duration: int = 5,
+                   threads: int = 2,
+                   ) -> str:
+    logger.info(f"combining {len(video_paths)} videos into one file: {combined_video_path}")
+    audio_clip = AudioFileClip(audio_file)
+    max_duration = audio_clip.duration
+    logger.info(f"max duration of audio: {max_duration} seconds")
+    # Required duration of each clip
+    req_dur = max_duration / len(video_paths)
+    logger.info(f"each clip will be maximum {req_dur} seconds long")
+
+    aspect = VideoAspect(video_aspect)
+    video_width, video_height = aspect.to_resolution()
+
+    clips = []
+    tot_dur = 0
+    # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
+    while tot_dur < max_duration:
+        for video_path in video_paths:
+            clip = VideoFileClip(video_path)
+            clip = clip.without_audio()
+            # Check if clip is longer than the remaining audio
+            if (max_duration - tot_dur) < clip.duration:
+                clip = clip.subclip(0, (max_duration - tot_dur))
+            # Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
+            elif req_dur < clip.duration:
+                clip = clip.subclip(0, req_dur)
+            clip = clip.set_fps(30)
+
+            # Not all videos are same size, so we need to resize them
+            # logger.info(f"{video_path}: size is {clip.w} x {clip.h}, expected {video_width} x {video_height}")
+            if clip.w != video_width or clip.h != video_height:
+                if round((clip.w / clip.h), 4) < 0.5625:
+                    clip = crop(clip,
+                                width=clip.w,
+                                height=round(clip.w / 0.5625),
+                                x_center=clip.w / 2,
+                                y_center=clip.h / 2
+                                )
+                else:
+                    clip = crop(clip,
+                                width=round(0.5625 * clip.h),
+                                height=clip.h,
+                                x_center=clip.w / 2,
+                                y_center=clip.h / 2
+                                )
+                logger.info(f"resizing video to {video_width} x {video_height}")
+                clip = clip.resize((video_width, video_height))
+
+            if clip.duration > max_clip_duration:
+                clip = clip.subclip(0, max_clip_duration)
+
+            clips.append(clip)
+            tot_dur += clip.duration
+
+    final_clip = concatenate_videoclips(clips)
+    final_clip = final_clip.set_fps(30)
+    logger.info(f"writing")
+    final_clip.write_videofile(combined_video_path, threads=threads)
+    logger.success(f"completed")
+    return combined_video_path
+
+
+def wrap_text(text, max_width, font='Arial', fontsize=60):
+    # 创建字体对象
+    font = ImageFont.truetype(font, fontsize)
+
+    def get_text_size(inner_text):
+        left, top, right, bottom = font.getbbox(inner_text)
+        return right - left, bottom - top
+
+    width, height = get_text_size(text)
+    if width <= max_width:
+        return text
+
+    logger.warning(f"wrapping text, max_width: {max_width}, text_width: {width}, text: {text}")
+    _wrapped_lines_ = []
+    # 使用textwrap尝试分行，然后检查每行是否符合宽度限制
+
+    chars = list(text)
+    _txt_ = ''
+    for char in chars:
+        _txt_ += char
+        _width, _height = get_text_size(_txt_)
+        if _width <= max_width:
+            continue
+        else:
+            _wrapped_lines_.append(_txt_)
+            _txt_ = ''
+    _wrapped_lines_.append(_txt_)
+    return '\n'.join(_wrapped_lines_)
+
+
+def generate_video(video_path: str,
+                   audio_path: str,
+                   subtitle_path: str,
+                   output_file: str,
+                   video_aspect: VideoAspect = VideoAspect.portrait,
+
+                   threads: int = 2,
+
+                   font_name: str = "",
+                   fontsize: int = 60,
+                   stroke_color: str = "#000000",
+                   stroke_width: float = 1.5,
+                   text_fore_color: str = "white",
+                   text_background_color: str = "transparent",
+
+                   bgm_file: str = "",
+                   ):
+    aspect = VideoAspect(video_aspect)
+    video_width, video_height = aspect.to_resolution()
+
+    logger.info(f"start, video size: {video_width} x {video_height}")
+    logger.info(f"  ① video: {video_path}")
+    logger.info(f"  ② audio: {audio_path}")
+    logger.info(f"  ③ subtitle: {subtitle_path}")
+    logger.info(f"  ④ output: {output_file}")
+
+    if not font_name:
+        font_name = "STHeitiMedium.ttc"
+    font_path = os.path.join(utils.font_dir(), font_name)
+    logger.info(f"using font: {font_path}")
+
+    # 自定义的生成器函数，包含换行逻辑
+    def generator(txt):
+        # 应用自动换行
+        wrapped_txt = wrap_text(txt, max_width=video_width - 100,
+                                font=font_path,
+                                fontsize=fontsize)  # 调整max_width以适应你的视频
+        return TextClip(
+            wrapped_txt,
+            font=font_path,
+            fontsize=fontsize,
+            color=text_fore_color,
+            bg_color=text_background_color,
+            stroke_color=stroke_color,
+            stroke_width=stroke_width,
+            print_cmd=False,
+        )
+
+    position_height = video_height - 200
+    if video_aspect == VideoAspect.landscape:
+        position_height = video_height - 100
+
+    clips = [
+        VideoFileClip(video_path),
+        # subtitles.set_position(lambda _t: ('center', position_height))
+    ]
+    # Burn the subtitles into the video
+    if subtitle_path and os.path.exists(subtitle_path):
+        subtitles = SubtitlesClip(subtitle_path, generator)
+        clips.append(subtitles.set_position(lambda _t: ('center', position_height)))
+
+    result = CompositeVideoClip(clips)
+
+    # Add the audio
+    audio = AudioFileClip(audio_path)
+    result = result.set_audio(audio)
+
+    temp_output_file = f"{output_file}.temp.mp4"
+    logger.info(f"writing to temp file: {temp_output_file}")
+    result.write_videofile(temp_output_file, threads=threads or 2)
+
+    video_clip = VideoFileClip(temp_output_file)
+    if bgm_file:
+        logger.info(f"adding background music: {bgm_file}")
+        # Add song to video at 30% volume using moviepy
+        original_duration = video_clip.duration
+        original_audio = video_clip.audio
+        song_clip = AudioFileClip(bgm_file).set_fps(44100)
+        # Set the volume of the song to 10% of the original volume
+        song_clip = song_clip.volumex(0.2).set_fps(44100)
+        # Add the song to the video
+        comp_audio = CompositeAudioClip([original_audio, song_clip])
+        video_clip = video_clip.set_audio(comp_audio)
+        video_clip = video_clip.set_fps(30)
+        video_clip = video_clip.set_duration(original_duration)
+    # 编码为aac，否则iPhone里面无法播放
+    logger.info(f"encoding audio codec to aac")
+    video_clip.write_videofile(output_file, audio_codec="aac", threads=threads)
+    # delete the temp file
+    os.remove(temp_output_file)
+    logger.success(f"completed")
+
+
+if __name__ == "__main__":
+    txt = "hello 幸福经常被描述为最终人生目标和人类追求的核心 但它通常涉及对个人生活中意义和目的的深刻感悟"
+    font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
+    t = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
+    print(t)
+
+    task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
+    task_dir = utils.task_dir(task_id)
+    video_file = f"{task_dir}/combined.mp4"
+    audio_file = f"{task_dir}/audio.mp3"
+    subtitle_file = f"{task_dir}/subtitle.srt"
+    output_file = f"{task_dir}/final.mp4"
+    generate_video(video_path=video_file,
+                   audio_path=audio_file,
+                   subtitle_path=subtitle_file,
+                   output_file=output_file,
+                   video_aspect=VideoAspect.portrait,
+                   threads=2,
+                   font_name="STHeitiMedium.ttc",
+                   fontsize=60,
+                   stroke_color="#000000",
+                   stroke_width=1.5,
+                   text_fore_color="white",
+                   text_background_color="transparent",
+                   bgm_file=""
+                   )
--- a/app/services/voice.py
+++ b/app/services/voice.py
@@ -0,0 +1,101 @@
+import asyncio
+from xml.sax.saxutils import unescape
+from edge_tts.submaker import mktimestamp
+from loguru import logger
+from edge_tts import submaker, SubMaker
+import edge_tts
+from app.utils import utils
+
+
+def tts(text: str, voice_name: str, voice_file: str) -> SubMaker:
+    logger.info(f"start, voice name: {voice_name}")
+
+    async def _do() -> SubMaker:
+        communicate = edge_tts.Communicate(text, voice_name)
+        sub_maker = edge_tts.SubMaker()
+        with open(voice_file, "wb") as file:
+            async for chunk in communicate.stream():
+                if chunk["type"] == "audio":
+                    file.write(chunk["data"])
+                elif chunk["type"] == "WordBoundary":
+                    sub_maker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])
+        return sub_maker
+
+    sub_maker = asyncio.run(_do())
+    logger.info(f"completed, output file: {voice_file}")
+    return sub_maker
+
+
+def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
+    """
+    优化字幕文件
+    1. 将字幕文件按照标点符号分割成多行
+    2. 逐行匹配字幕文件中的文本
+    3. 生成新的字幕文件
+    """
+
+    def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
+        """
+        1
+        00:00:00,000 --> 00:00:02,360
+        跑步是一项简单易行的运动
+        """
+        start_t = mktimestamp(start_time).replace(".", ",")
+        end_t = mktimestamp(end_time).replace(".", ",")
+        return (
+            f"{idx}\n"
+            f"{start_t} --> {end_t}\n"
+            f"{sub_text}\n"
+        )
+
+    start_time = -1.0
+    sub_items = []
+    sub_index = 0
+
+    script_lines = utils.split_string_by_punctuations(text)
+
+    sub_line = ""
+    for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
+        _start_time, end_time = offset
+        if start_time < 0:
+            start_time = _start_time
+
+        sub = unescape(sub)
+        sub_line += sub
+        if sub_line == script_lines[sub_index]:
+            sub_index += 1
+            sub_items.append(formatter(
+                idx=sub_index,
+                start_time=start_time,
+                end_time=end_time,
+                sub_text=sub_line,
+            ))
+            start_time = -1.0
+            sub_line = ""
+
+    with open(subtitle_file, "w", encoding="utf-8") as file:
+        file.write("\n".join(sub_items))
+
+
+if __name__ == "__main__":
+    temp_dir = utils.storage_dir("temp")
+
+    voice_names = [
+        # 女性
+        "zh-CN-XiaoxiaoNeural",
+        "zh-CN-XiaoyiNeural",
+        # 男性
+        "zh-CN-YunyangNeural",
+        "zh-CN-YunxiNeural",
+    ]
+    text = """
+预计未来3天深圳冷空气活动频繁，未来两天持续阴天有小雨，出门带好雨具；
+10-11日持续阴天有小雨，日温差小，气温在13-17℃之间，体感阴凉；
+12日天气短暂好转，早晚清凉；
+    """
+
+    for voice_name in voice_names:
+        voice_file = f"{temp_dir}/tts-{voice_name}.mp3"
+        subtitle_file = f"{temp_dir}/tts.mp3.srt"
+        sub_maker = tts(text=text, voice_name=voice_name, voice_file=voice_file)
+        create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
--- a/app/utils/utils.py
+++ b/app/utils/utils.py
@@ -0,0 +1,167 @@
+import os
+import threading
+from typing import Any
+from loguru import logger
+import json
+from uuid import uuid4
+import urllib3
+
+from app.models import const
+
+urllib3.disable_warnings()
+
+
+def get_response(status: int, data: Any = None, message: str = ""):
+    obj = {
+        'status': status,
+    }
+    if data:
+        obj['data'] = data
+    if message:
+        obj['message'] = message
+    return obj
+
+
+def to_json(obj):
+    # 定义一个辅助函数来处理不同类型的对象
+    def serialize(o):
+        # 如果对象是可序列化类型，直接返回
+        if isinstance(o, (int, float, bool, str)) or o is None:
+            return o
+        # 如果对象是二进制数据，转换为base64编码的字符串
+        elif isinstance(o, bytes):
+            return "*** binary data ***"
+        # 如果对象是字典，递归处理每个键值对
+        elif isinstance(o, dict):
+            return {k: serialize(v) for k, v in o.items()}
+        # 如果对象是列表或元组，递归处理每个元素
+        elif isinstance(o, (list, tuple)):
+            return [serialize(item) for item in o]
+        # 如果对象是自定义类型，尝试返回其__dict__属性
+        elif hasattr(o, '__dict__'):
+            return serialize(o.__dict__)
+        # 其他情况返回None（或者可以选择抛出异常）
+        else:
+            return None
+
+    # 使用serialize函数处理输入对象
+    serialized_obj = serialize(obj)
+
+    # 序列化处理后的对象为JSON字符串
+    return json.dumps(serialized_obj, ensure_ascii=False, indent=4)
+
+
+def get_uuid(remove_hyphen: bool = False):
+    u = str(uuid4())
+    if remove_hyphen:
+        u = u.replace("-", "")
+    return u
+
+
+def root_dir():
+    return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+
+
+def storage_dir(sub_dir: str = ""):
+    d = os.path.join(root_dir(), "storage")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    return d
+
+
+def resource_dir(sub_dir: str = ""):
+    d = os.path.join(root_dir(), "resource")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    return d
+
+
+def task_dir(sub_dir: str = ""):
+    d = os.path.join(storage_dir(), "tasks")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    return d
+
+
+def font_dir(sub_dir: str = ""):
+    d = resource_dir(f"fonts")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    return d
+
+
+def song_dir(sub_dir: str = ""):
+    d = resource_dir(f"songs")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    return d
+
+
+def public_dir(sub_dir: str = ""):
+    d = resource_dir(f"public")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    return d
+
+
+def run_in_background(func, *args, **kwargs):
+    def run():
+        try:
+            func(*args, **kwargs)
+        except Exception as e:
+            logger.error(f"run_in_background error: {e}")
+
+    thread = threading.Thread(target=run)
+    thread.start()
+    return thread
+
+
+def time_convert_seconds_to_hmsm(seconds) -> str:
+    hours = int(seconds // 3600)
+    seconds = seconds % 3600
+    minutes = int(seconds // 60)
+    milliseconds = int(seconds * 1000) % 1000
+    seconds = int(seconds % 60)
+    return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, seconds, milliseconds)
+
+
+def text_to_srt(idx: int, msg: str, start_time: float, end_time: float) -> str:
+    start_time = time_convert_seconds_to_hmsm(start_time)
+    end_time = time_convert_seconds_to_hmsm(end_time)
+    srt = """%d
+%s --> %s
+%s
+        """ % (
+        idx,
+        start_time,
+        end_time,
+        msg,
+    )
+    return srt
+
+
+def str_contains_punctuation(word):
+    for p in const.punctuations:
+        if p in word:
+            return True
+    return False
+
+
+def split_string_by_punctuations(s):
+    result = []
+    txt = ""
+    for char in s:
+        if char not in const.punctuations:
+            txt += char
+        else:
+            result.append(txt.strip())
+            txt = ""
+    return result
--- a/config.example.toml
+++ b/config.example.toml
@@ -0,0 +1,33 @@
+[app]
+    # Pexels API Key
+    # Register at https://www.pexels.com/api/ to get your API key.
+    # You can use multiple keys to avoid rate limits.
+    pexels_api_keys = []
+
+    # OpenAI API Key
+    # Visit https://openai.com/api/ for details on obtaining an API key.
+    openai_api_key = ""
+    openai_base_url=""
+    openai_model_name = "gpt-4-turbo-preview"
+
+    # Subtitle Provider, "edge" or "whisper"
+    # If empty, the subtitle will not be generated
+    subtitle_provider = "edge"
+
+[whisper]
+    # Only effective when subtitle_provider is "whisper"
+
+    # Run on GPU with FP16
+    # model = WhisperModel(model_size, device="cuda", compute_type="float16")
+
+    # Run on GPU with INT8
+    # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
+
+    # Run on CPU with INT8
+    # model = WhisperModel(model_size, device="cpu", compute_type="int8")
+
+    # recommended model_size: "large-v3"
+    model_size="large-v3"
+    # if you want to use GPU, set device="cuda"
+    device="CPU"
+    compute_type="int8"
--- a/docs/api.jpg
+++ b/docs/api.jpg
--- a/docs/demo-landscape.jpg
+++ b/docs/demo-landscape.jpg
--- a/docs/demo-landscape.mp4
+++ b/docs/demo-landscape.mp4
--- a/docs/demo-portrait-1.jpg
+++ b/docs/demo-portrait-1.jpg
--- a/docs/demo-portrait-1.mp4
+++ b/docs/demo-portrait-1.mp4
--- a/docs/demo-portrait-2.jpg
+++ b/docs/demo-portrait-2.jpg
--- a/docs/demo-portrait-2.mp4
+++ b/docs/demo-portrait-2.mp4
--- a/docs/douyin.jpg
+++ b/docs/douyin.jpg
--- a/docs/voice-list.txt
+++ b/docs/voice-list.txt
@@ -0,0 +1,941 @@
+Name: af-ZA-AdriNeural
+Gender: Female
+
+Name: af-ZA-WillemNeural
+Gender: Male
+
+Name: am-ET-AmehaNeural
+Gender: Male
+
+Name: am-ET-MekdesNeural
+Gender: Female
+
+Name: ar-AE-FatimaNeural
+Gender: Female
+
+Name: ar-AE-HamdanNeural
+Gender: Male
+
+Name: ar-BH-AliNeural
+Gender: Male
+
+Name: ar-BH-LailaNeural
+Gender: Female
+
+Name: ar-DZ-AminaNeural
+Gender: Female
+
+Name: ar-DZ-IsmaelNeural
+Gender: Male
+
+Name: ar-EG-SalmaNeural
+Gender: Female
+
+Name: ar-EG-ShakirNeural
+Gender: Male
+
+Name: ar-IQ-BasselNeural
+Gender: Male
+
+Name: ar-IQ-RanaNeural
+Gender: Female
+
+Name: ar-JO-SanaNeural
+Gender: Female
+
+Name: ar-JO-TaimNeural
+Gender: Male
+
+Name: ar-KW-FahedNeural
+Gender: Male
+
+Name: ar-KW-NouraNeural
+Gender: Female
+
+Name: ar-LB-LaylaNeural
+Gender: Female
+
+Name: ar-LB-RamiNeural
+Gender: Male
+
+Name: ar-LY-ImanNeural
+Gender: Female
+
+Name: ar-LY-OmarNeural
+Gender: Male
+
+Name: ar-MA-JamalNeural
+Gender: Male
+
+Name: ar-MA-MounaNeural
+Gender: Female
+
+Name: ar-OM-AbdullahNeural
+Gender: Male
+
+Name: ar-OM-AyshaNeural
+Gender: Female
+
+Name: ar-QA-AmalNeural
+Gender: Female
+
+Name: ar-QA-MoazNeural
+Gender: Male
+
+Name: ar-SA-HamedNeural
+Gender: Male
+
+Name: ar-SA-ZariyahNeural
+Gender: Female
+
+Name: ar-SY-AmanyNeural
+Gender: Female
+
+Name: ar-SY-LaithNeural
+Gender: Male
+
+Name: ar-TN-HediNeural
+Gender: Male
+
+Name: ar-TN-ReemNeural
+Gender: Female
+
+Name: ar-YE-MaryamNeural
+Gender: Female
+
+Name: ar-YE-SalehNeural
+Gender: Male
+
+Name: az-AZ-BabekNeural
+Gender: Male
+
+Name: az-AZ-BanuNeural
+Gender: Female
+
+Name: bg-BG-BorislavNeural
+Gender: Male
+
+Name: bg-BG-KalinaNeural
+Gender: Female
+
+Name: bn-BD-NabanitaNeural
+Gender: Female
+
+Name: bn-BD-PradeepNeural
+Gender: Male
+
+Name: bn-IN-BashkarNeural
+Gender: Male
+
+Name: bn-IN-TanishaaNeural
+Gender: Female
+
+Name: bs-BA-GoranNeural
+Gender: Male
+
+Name: bs-BA-VesnaNeural
+Gender: Female
+
+Name: ca-ES-EnricNeural
+Gender: Male
+
+Name: ca-ES-JoanaNeural
+Gender: Female
+
+Name: cs-CZ-AntoninNeural
+Gender: Male
+
+Name: cs-CZ-VlastaNeural
+Gender: Female
+
+Name: cy-GB-AledNeural
+Gender: Male
+
+Name: cy-GB-NiaNeural
+Gender: Female
+
+Name: da-DK-ChristelNeural
+Gender: Female
+
+Name: da-DK-JeppeNeural
+Gender: Male
+
+Name: de-AT-IngridNeural
+Gender: Female
+
+Name: de-AT-JonasNeural
+Gender: Male
+
+Name: de-CH-JanNeural
+Gender: Male
+
+Name: de-CH-LeniNeural
+Gender: Female
+
+Name: de-DE-AmalaNeural
+Gender: Female
+
+Name: de-DE-ConradNeural
+Gender: Male
+
+Name: de-DE-FlorianMultilingualNeural
+Gender: Male
+
+Name: de-DE-KatjaNeural
+Gender: Female
+
+Name: de-DE-KillianNeural
+Gender: Male
+
+Name: de-DE-SeraphinaMultilingualNeural
+Gender: Female
+
+Name: el-GR-AthinaNeural
+Gender: Female
+
+Name: el-GR-NestorasNeural
+Gender: Male
+
+Name: en-AU-NatashaNeural
+Gender: Female
+
+Name: en-AU-WilliamNeural
+Gender: Male
+
+Name: en-CA-ClaraNeural
+Gender: Female
+
+Name: en-CA-LiamNeural
+Gender: Male
+
+Name: en-GB-LibbyNeural
+Gender: Female
+
+Name: en-GB-MaisieNeural
+Gender: Female
+
+Name: en-GB-RyanNeural
+Gender: Male
+
+Name: en-GB-SoniaNeural
+Gender: Female
+
+Name: en-GB-ThomasNeural
+Gender: Male
+
+Name: en-HK-SamNeural
+Gender: Male
+
+Name: en-HK-YanNeural
+Gender: Female
+
+Name: en-IE-ConnorNeural
+Gender: Male
+
+Name: en-IE-EmilyNeural
+Gender: Female
+
+Name: en-IN-NeerjaExpressiveNeural
+Gender: Female
+
+Name: en-IN-NeerjaNeural
+Gender: Female
+
+Name: en-IN-PrabhatNeural
+Gender: Male
+
+Name: en-KE-AsiliaNeural
+Gender: Female
+
+Name: en-KE-ChilembaNeural
+Gender: Male
+
+Name: en-NG-AbeoNeural
+Gender: Male
+
+Name: en-NG-EzinneNeural
+Gender: Female
+
+Name: en-NZ-MitchellNeural
+Gender: Male
+
+Name: en-NZ-MollyNeural
+Gender: Female
+
+Name: en-PH-JamesNeural
+Gender: Male
+
+Name: en-PH-RosaNeural
+Gender: Female
+
+Name: en-SG-LunaNeural
+Gender: Female
+
+Name: en-SG-WayneNeural
+Gender: Male
+
+Name: en-TZ-ElimuNeural
+Gender: Male
+
+Name: en-TZ-ImaniNeural
+Gender: Female
+
+Name: en-US-AnaNeural
+Gender: Female
+
+Name: en-US-AndrewNeural
+Gender: Male
+
+Name: en-US-AriaNeural
+Gender: Female
+
+Name: en-US-AvaNeural
+Gender: Female
+
+Name: en-US-BrianNeural
+Gender: Male
+
+Name: en-US-ChristopherNeural
+Gender: Male
+
+Name: en-US-EmmaNeural
+Gender: Female
+
+Name: en-US-EricNeural
+Gender: Male
+
+Name: en-US-GuyNeural
+Gender: Male
+
+Name: en-US-JennyNeural
+Gender: Female
+
+Name: en-US-MichelleNeural
+Gender: Female
+
+Name: en-US-RogerNeural
+Gender: Male
+
+Name: en-US-SteffanNeural
+Gender: Male
+
+Name: en-ZA-LeahNeural
+Gender: Female
+
+Name: en-ZA-LukeNeural
+Gender: Male
+
+Name: es-AR-ElenaNeural
+Gender: Female
+
+Name: es-AR-TomasNeural
+Gender: Male
+
+Name: es-BO-MarceloNeural
+Gender: Male
+
+Name: es-BO-SofiaNeural
+Gender: Female
+
+Name: es-CL-CatalinaNeural
+Gender: Female
+
+Name: es-CL-LorenzoNeural
+Gender: Male
+
+Name: es-CO-GonzaloNeural
+Gender: Male
+
+Name: es-CO-SalomeNeural
+Gender: Female
+
+Name: es-CR-JuanNeural
+Gender: Male
+
+Name: es-CR-MariaNeural
+Gender: Female
+
+Name: es-CU-BelkysNeural
+Gender: Female
+
+Name: es-CU-ManuelNeural
+Gender: Male
+
+Name: es-DO-EmilioNeural
+Gender: Male
+
+Name: es-DO-RamonaNeural
+Gender: Female
+
+Name: es-EC-AndreaNeural
+Gender: Female
+
+Name: es-EC-LuisNeural
+Gender: Male
+
+Name: es-ES-AlvaroNeural
+Gender: Male
+
+Name: es-ES-ElviraNeural
+Gender: Female
+
+Name: es-ES-XimenaNeural
+Gender: Female
+
+Name: es-GQ-JavierNeural
+Gender: Male
+
+Name: es-GQ-TeresaNeural
+Gender: Female
+
+Name: es-GT-AndresNeural
+Gender: Male
+
+Name: es-GT-MartaNeural
+Gender: Female
+
+Name: es-HN-CarlosNeural
+Gender: Male
+
+Name: es-HN-KarlaNeural
+Gender: Female
+
+Name: es-MX-DaliaNeural
+Gender: Female
+
+Name: es-MX-JorgeNeural
+Gender: Male
+
+Name: es-NI-FedericoNeural
+Gender: Male
+
+Name: es-NI-YolandaNeural
+Gender: Female
+
+Name: es-PA-MargaritaNeural
+Gender: Female
+
+Name: es-PA-RobertoNeural
+Gender: Male
+
+Name: es-PE-AlexNeural
+Gender: Male
+
+Name: es-PE-CamilaNeural
+Gender: Female
+
+Name: es-PR-KarinaNeural
+Gender: Female
+
+Name: es-PR-VictorNeural
+Gender: Male
+
+Name: es-PY-MarioNeural
+Gender: Male
+
+Name: es-PY-TaniaNeural
+Gender: Female
+
+Name: es-SV-LorenaNeural
+Gender: Female
+
+Name: es-SV-RodrigoNeural
+Gender: Male
+
+Name: es-US-AlonsoNeural
+Gender: Male
+
+Name: es-US-PalomaNeural
+Gender: Female
+
+Name: es-UY-MateoNeural
+Gender: Male
+
+Name: es-UY-ValentinaNeural
+Gender: Female
+
+Name: es-VE-PaolaNeural
+Gender: Female
+
+Name: es-VE-SebastianNeural
+Gender: Male
+
+Name: et-EE-AnuNeural
+Gender: Female
+
+Name: et-EE-KertNeural
+Gender: Male
+
+Name: fa-IR-DilaraNeural
+Gender: Female
+
+Name: fa-IR-FaridNeural
+Gender: Male
+
+Name: fi-FI-HarriNeural
+Gender: Male
+
+Name: fi-FI-NooraNeural
+Gender: Female
+
+Name: fil-PH-AngeloNeural
+Gender: Male
+
+Name: fil-PH-BlessicaNeural
+Gender: Female
+
+Name: fr-BE-CharlineNeural
+Gender: Female
+
+Name: fr-BE-GerardNeural
+Gender: Male
+
+Name: fr-CA-AntoineNeural
+Gender: Male
+
+Name: fr-CA-JeanNeural
+Gender: Male
+
+Name: fr-CA-SylvieNeural
+Gender: Female
+
+Name: fr-CA-ThierryNeural
+Gender: Male
+
+Name: fr-CH-ArianeNeural
+Gender: Female
+
+Name: fr-CH-FabriceNeural
+Gender: Male
+
+Name: fr-FR-DeniseNeural
+Gender: Female
+
+Name: fr-FR-EloiseNeural
+Gender: Female
+
+Name: fr-FR-HenriNeural
+Gender: Male
+
+Name: fr-FR-RemyMultilingualNeural
+Gender: Male
+
+Name: fr-FR-VivienneMultilingualNeural
+Gender: Female
+
+Name: ga-IE-ColmNeural
+Gender: Male
+
+Name: ga-IE-OrlaNeural
+Gender: Female
+
+Name: gl-ES-RoiNeural
+Gender: Male
+
+Name: gl-ES-SabelaNeural
+Gender: Female
+
+Name: gu-IN-DhwaniNeural
+Gender: Female
+
+Name: gu-IN-NiranjanNeural
+Gender: Male
+
+Name: he-IL-AvriNeural
+Gender: Male
+
+Name: he-IL-HilaNeural
+Gender: Female
+
+Name: hi-IN-MadhurNeural
+Gender: Male
+
+Name: hi-IN-SwaraNeural
+Gender: Female
+
+Name: hr-HR-GabrijelaNeural
+Gender: Female
+
+Name: hr-HR-SreckoNeural
+Gender: Male
+
+Name: hu-HU-NoemiNeural
+Gender: Female
+
+Name: hu-HU-TamasNeural
+Gender: Male
+
+Name: id-ID-ArdiNeural
+Gender: Male
+
+Name: id-ID-GadisNeural
+Gender: Female
+
+Name: is-IS-GudrunNeural
+Gender: Female
+
+Name: is-IS-GunnarNeural
+Gender: Male
+
+Name: it-IT-DiegoNeural
+Gender: Male
+
+Name: it-IT-ElsaNeural
+Gender: Female
+
+Name: it-IT-GiuseppeNeural
+Gender: Male
+
+Name: it-IT-IsabellaNeural
+Gender: Female
+
+Name: ja-JP-KeitaNeural
+Gender: Male
+
+Name: ja-JP-NanamiNeural
+Gender: Female
+
+Name: jv-ID-DimasNeural
+Gender: Male
+
+Name: jv-ID-SitiNeural
+Gender: Female
+
+Name: ka-GE-EkaNeural
+Gender: Female
+
+Name: ka-GE-GiorgiNeural
+Gender: Male
+
+Name: kk-KZ-AigulNeural
+Gender: Female
+
+Name: kk-KZ-DauletNeural
+Gender: Male
+
+Name: km-KH-PisethNeural
+Gender: Male
+
+Name: km-KH-SreymomNeural
+Gender: Female
+
+Name: kn-IN-GaganNeural
+Gender: Male
+
+Name: kn-IN-SapnaNeural
+Gender: Female
+
+Name: ko-KR-HyunsuNeural
+Gender: Male
+
+Name: ko-KR-InJoonNeural
+Gender: Male
+
+Name: ko-KR-SunHiNeural
+Gender: Female
+
+Name: lo-LA-ChanthavongNeural
+Gender: Male
+
+Name: lo-LA-KeomanyNeural
+Gender: Female
+
+Name: lt-LT-LeonasNeural
+Gender: Male
+
+Name: lt-LT-OnaNeural
+Gender: Female
+
+Name: lv-LV-EveritaNeural
+Gender: Female
+
+Name: lv-LV-NilsNeural
+Gender: Male
+
+Name: mk-MK-AleksandarNeural
+Gender: Male
+
+Name: mk-MK-MarijaNeural
+Gender: Female
+
+Name: ml-IN-MidhunNeural
+Gender: Male
+
+Name: ml-IN-SobhanaNeural
+Gender: Female
+
+Name: mn-MN-BataaNeural
+Gender: Male
+
+Name: mn-MN-YesuiNeural
+Gender: Female
+
+Name: mr-IN-AarohiNeural
+Gender: Female
+
+Name: mr-IN-ManoharNeural
+Gender: Male
+
+Name: ms-MY-OsmanNeural
+Gender: Male
+
+Name: ms-MY-YasminNeural
+Gender: Female
+
+Name: mt-MT-GraceNeural
+Gender: Female
+
+Name: mt-MT-JosephNeural
+Gender: Male
+
+Name: my-MM-NilarNeural
+Gender: Female
+
+Name: my-MM-ThihaNeural
+Gender: Male
+
+Name: nb-NO-FinnNeural
+Gender: Male
+
+Name: nb-NO-PernilleNeural
+Gender: Female
+
+Name: ne-NP-HemkalaNeural
+Gender: Female
+
+Name: ne-NP-SagarNeural
+Gender: Male
+
+Name: nl-BE-ArnaudNeural
+Gender: Male
+
+Name: nl-BE-DenaNeural
+Gender: Female
+
+Name: nl-NL-ColetteNeural
+Gender: Female
+
+Name: nl-NL-FennaNeural
+Gender: Female
+
+Name: nl-NL-MaartenNeural
+Gender: Male
+
+Name: pl-PL-MarekNeural
+Gender: Male
+
+Name: pl-PL-ZofiaNeural
+Gender: Female
+
+Name: ps-AF-GulNawazNeural
+Gender: Male
+
+Name: ps-AF-LatifaNeural
+Gender: Female
+
+Name: pt-BR-AntonioNeural
+Gender: Male
+
+Name: pt-BR-FranciscaNeural
+Gender: Female
+
+Name: pt-BR-ThalitaNeural
+Gender: Female
+
+Name: pt-PT-DuarteNeural
+Gender: Male
+
+Name: pt-PT-RaquelNeural
+Gender: Female
+
+Name: ro-RO-AlinaNeural
+Gender: Female
+
+Name: ro-RO-EmilNeural
+Gender: Male
+
+Name: ru-RU-DmitryNeural
+Gender: Male
+
+Name: ru-RU-SvetlanaNeural
+Gender: Female
+
+Name: si-LK-SameeraNeural
+Gender: Male
+
+Name: si-LK-ThiliniNeural
+Gender: Female
+
+Name: sk-SK-LukasNeural
+Gender: Male
+
+Name: sk-SK-ViktoriaNeural
+Gender: Female
+
+Name: sl-SI-PetraNeural
+Gender: Female
+
+Name: sl-SI-RokNeural
+Gender: Male
+
+Name: so-SO-MuuseNeural
+Gender: Male
+
+Name: so-SO-UbaxNeural
+Gender: Female
+
+Name: sq-AL-AnilaNeural
+Gender: Female
+
+Name: sq-AL-IlirNeural
+Gender: Male
+
+Name: sr-RS-NicholasNeural
+Gender: Male
+
+Name: sr-RS-SophieNeural
+Gender: Female
+
+Name: su-ID-JajangNeural
+Gender: Male
+
+Name: su-ID-TutiNeural
+Gender: Female
+
+Name: sv-SE-MattiasNeural
+Gender: Male
+
+Name: sv-SE-SofieNeural
+Gender: Female
+
+Name: sw-KE-RafikiNeural
+Gender: Male
+
+Name: sw-KE-ZuriNeural
+Gender: Female
+
+Name: sw-TZ-DaudiNeural
+Gender: Male
+
+Name: sw-TZ-RehemaNeural
+Gender: Female
+
+Name: ta-IN-PallaviNeural
+Gender: Female
+
+Name: ta-IN-ValluvarNeural
+Gender: Male
+
+Name: ta-LK-KumarNeural
+Gender: Male
+
+Name: ta-LK-SaranyaNeural
+Gender: Female
+
+Name: ta-MY-KaniNeural
+Gender: Female
+
+Name: ta-MY-SuryaNeural
+Gender: Male
+
+Name: ta-SG-AnbuNeural
+Gender: Male
+
+Name: ta-SG-VenbaNeural
+Gender: Female
+
+Name: te-IN-MohanNeural
+Gender: Male
+
+Name: te-IN-ShrutiNeural
+Gender: Female
+
+Name: th-TH-NiwatNeural
+Gender: Male
+
+Name: th-TH-PremwadeeNeural
+Gender: Female
+
+Name: tr-TR-AhmetNeural
+Gender: Male
+
+Name: tr-TR-EmelNeural
+Gender: Female
+
+Name: uk-UA-OstapNeural
+Gender: Male
+
+Name: uk-UA-PolinaNeural
+Gender: Female
+
+Name: ur-IN-GulNeural
+Gender: Female
+
+Name: ur-IN-SalmanNeural
+Gender: Male
+
+Name: ur-PK-AsadNeural
+Gender: Male
+
+Name: ur-PK-UzmaNeural
+Gender: Female
+
+Name: uz-UZ-MadinaNeural
+Gender: Female
+
+Name: uz-UZ-SardorNeural
+Gender: Male
+
+Name: vi-VN-HoaiMyNeural
+Gender: Female
+
+Name: vi-VN-NamMinhNeural
+Gender: Male
+
+Name: zh-CN-XiaoxiaoNeural
+Gender: Female
+
+Name: zh-CN-XiaoyiNeural
+Gender: Female
+
+Name: zh-CN-YunjianNeural
+Gender: Male
+
+Name: zh-CN-YunxiNeural
+Gender: Male
+
+Name: zh-CN-YunxiaNeural
+Gender: Male
+
+Name: zh-CN-YunyangNeural
+Gender: Male
+
+Name: zh-CN-liaoning-XiaobeiNeural
+Gender: Female
+
+Name: zh-CN-shaanxi-XiaoniNeural
+Gender: Female
+
+Name: zh-HK-HiuGaaiNeural
+Gender: Female
+
+Name: zh-HK-HiuMaanNeural
+Gender: Female
+
+Name: zh-HK-WanLungNeural
+Gender: Male
+
+Name: zh-TW-HsiaoChenNeural
+Gender: Female
+
+Name: zh-TW-HsiaoYuNeural
+Gender: Female
+
+Name: zh-TW-YunJheNeural
+Gender: Male
+
+Name: zu-ZA-ThandoNeural
+Gender: Female
+
+Name: zu-ZA-ThembaNeural
+Gender: Male
--- a/docs/webui.jpg
+++ b/docs/webui.jpg
--- a/main.py
+++ b/main.py
@@ -0,0 +1,10 @@
+import uvicorn
+from loguru import logger
+from app.config import config
+
+import __init__  # noqa
+
+if __name__ == '__main__':
+    logger.info("start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs")
+    uvicorn.run(app="app.asgi:app", host=config.listen_host, port=config.listen_port, reload=config.reload_debug,
+                log_level="warning")
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,14 @@
+requests~=2.31.0
+moviepy~=1.0.3
+openai~=1.13.3
+faster-whisper~=1.0.1
+edge_tts~=6.1.10
+uvicorn~=0.27.1
+fastapi~=0.110.0
+tomli~=2.0.1
+streamlit~=1.32.0
+loguru~=0.7.2
+aiohttp~=3.9.3
+urllib3~=2.2.1
+pillow~=10.2.0
+pydantic~=2.6.3
--- a/resource/fonts/STHeitiLight.ttc
+++ b/resource/fonts/STHeitiLight.ttc
--- a/resource/fonts/STHeitiMedium.ttc
+++ b/resource/fonts/STHeitiMedium.ttc
--- a/resource/songs/output000.mp3
+++ b/resource/songs/output000.mp3
--- a/resource/songs/output001.mp3
+++ b/resource/songs/output001.mp3
--- a/resource/songs/output002.mp3
+++ b/resource/songs/output002.mp3
--- a/resource/songs/output003.mp3
+++ b/resource/songs/output003.mp3
--- a/resource/songs/output004.mp3
+++ b/resource/songs/output004.mp3
--- a/resource/songs/output005.mp3
+++ b/resource/songs/output005.mp3
--- a/resource/songs/output006.mp3
+++ b/resource/songs/output006.mp3
--- a/resource/songs/output007.mp3
+++ b/resource/songs/output007.mp3
--- a/resource/songs/output008.mp3
+++ b/resource/songs/output008.mp3
--- a/resource/songs/output009.mp3
+++ b/resource/songs/output009.mp3
--- a/resource/songs/output010.mp3
+++ b/resource/songs/output010.mp3
--- a/resource/songs/output011.mp3
+++ b/resource/songs/output011.mp3
--- a/resource/songs/output012.mp3
+++ b/resource/songs/output012.mp3
--- a/resource/songs/output013.mp3
+++ b/resource/songs/output013.mp3
--- a/resource/songs/output014.mp3
+++ b/resource/songs/output014.mp3
--- a/resource/songs/output015.mp3
+++ b/resource/songs/output015.mp3
--- a/resource/songs/output016.mp3
+++ b/resource/songs/output016.mp3
--- a/resource/songs/output017.mp3
+++ b/resource/songs/output017.mp3
--- a/resource/songs/output018.mp3
+++ b/resource/songs/output018.mp3
--- a/resource/songs/output019.mp3
+++ b/resource/songs/output019.mp3
--- a/resource/songs/output020.mp3
+++ b/resource/songs/output020.mp3
--- a/resource/songs/output021.mp3
+++ b/resource/songs/output021.mp3
--- a/resource/songs/output022.mp3
+++ b/resource/songs/output022.mp3
--- a/resource/songs/output023.mp3
+++ b/resource/songs/output023.mp3
--- a/resource/songs/output024.mp3
+++ b/resource/songs/output024.mp3
--- a/resource/songs/output025.mp3
+++ b/resource/songs/output025.mp3
--- a/resource/songs/output027.mp3
+++ b/resource/songs/output027.mp3
--- a/resource/songs/output028.mp3
+++ b/resource/songs/output028.mp3
--- a/resource/songs/output029.mp3
+++ b/resource/songs/output029.mp3
--- a/webui.sh
+++ b/webui.sh
@@ -0,0 +1,4 @@
+CURRENT_DIR=$(pwd)
+echo "***** Current directory: $CURRENT_DIR *****"
+export PYTHONPATH="${CURRENT_DIR}:$PYTHONPATH"
+streamlit run ./webui/Main.py
--- a/webui/.streamlit/config.toml
+++ b/webui/.streamlit/config.toml
@@ -0,0 +1,2 @@
+[browser]
+gatherUsageStats = false
--- a/webui/Main.py
+++ b/webui/Main.py
@@ -0,0 +1,153 @@
+import sys
+import os
+from uuid import uuid4
+import streamlit as st
+from loguru import logger
+from app.models.schema import VideoParams, VideoAspect, VoiceNames
+from app.services import task as tm
+
+st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide",
+                   initial_sidebar_state="auto")
+st.title("MoneyPrinterTurbo")
+
+root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+font_dir = os.path.join(root_dir, "resource", "fonts")
+song_dir = os.path.join(root_dir, "resource", "songs")
+
+
+def get_all_fonts():
+    fonts = []
+    for root, dirs, files in os.walk(font_dir):
+        for file in files:
+            if file.endswith(".ttf") or file.endswith(".ttc"):
+                fonts.append(file)
+    return fonts
+
+
+def get_all_songs():
+    songs = []
+    for root, dirs, files in os.walk(song_dir):
+        for file in files:
+            if file.endswith(".mp3"):
+                songs.append(file)
+    return songs
+
+
+def init_log():
+    logger.remove()
+    _lvl = "DEBUG"
+
+    def format_record(record):
+        # 获取日志记录中的文件全路径
+        file_path = record["file"].path
+        # 将绝对路径转换为相对于项目根目录的路径
+        relative_path = os.path.relpath(file_path, root_dir)
+        # 更新记录中的文件路径
+        record["file"].path = f"./{relative_path}"
+        # 返回修改后的格式字符串
+        # 您可以根据需要调整这里的格式
+        record['message'] = record['message'].replace(root_dir, ".")
+
+        _format = '<green>{time:%Y-%m-%d %H:%M:%S}</> | ' + \
+                  '<level>{level}</> | ' + \
+                  '"{file.path}:{line}":<blue> {function}</> ' + \
+                  '- <level>{message}</>' + "\n"
+        return _format
+
+    logger.add(
+        sys.stdout,
+        level=_lvl,
+        format=format_record,
+        colorize=True,
+    )
+
+
+init_log()
+
+panel = st.columns(2)
+left_panel = panel[0]
+right_panel = panel[1]
+
+# define cfg as VideoParams class
+cfg = VideoParams()
+
+with left_panel:
+    with st.container(border=True):
+        st.write("**视频设置**")
+        cfg.video_subject = st.text_area("视频主题", help="请输入视频主题")
+        video_aspect_ratios = [
+            ("竖屏 9:16（抖音视频）", VideoAspect.portrait.value),
+            ("横屏 16:9（西瓜视频）", VideoAspect.landscape.value),
+            # ("方形 1:1", VideoAspect.square.value)
+        ]
+        selected_index = st.selectbox("视频比例",
+                                      options=range(len(video_aspect_ratios)),  # 使用索引作为内部选项值
+                                      format_func=lambda x: video_aspect_ratios[x][0]  # 显示给用户的是标签
+                                      )
+        cfg.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])
+
+    with st.container(border=True):
+        st.write("**声音设置**")
+        # 创建一个映射字典，将原始值映射到友好名称
+        friendly_names = {
+            voice: voice.
+            replace("female", "女性").
+            replace("male", "男性").
+            replace("zh-CN", "中文").
+            replace("zh-HK", "香港").
+            replace("zh-TW", "台湾").
+            replace("en-US", "英文").
+            replace("Neural", "") for
+            voice in VoiceNames}
+        selected_friendly_name = st.selectbox("声音", options=list(friendly_names.values()))
+        voice_name = list(friendly_names.keys())[list(friendly_names.values()).index(selected_friendly_name)]
+        cfg.voice_name = voice_name
+
+        song_names = [
+            ("无背景音乐 No BGM", ""),
+            ("随机背景音乐 Random BGM", "random"),
+            *[(song, song) for song in get_all_songs()]
+        ]
+        selected_index = st.selectbox("背景音乐",
+                                      index=1,
+                                      options=range(len(song_names)),  # 使用索引作为内部选项值
+                                      format_func=lambda x: song_names[x][0]  # 显示给用户的是标签
+                                      )
+        cfg.bgm_name = song_names[selected_index][1]
+
+with right_panel:
+    with st.container(border=True):
+        st.write("**字幕设置**")
+        font_names = get_all_fonts()
+        cfg.font_name = st.selectbox("字体", font_names)
+        cfg.text_fore_color = st.color_picker("字幕颜色", "#FFFFFF")
+        cfg.font_size = st.slider("字幕大小", 30, 100, 60)
+        cfg.stroke_color = st.color_picker("描边颜色", "#000000")
+        cfg.stroke_width = st.slider("描边粗细", 0.0, 10.0, 1.5)
+
+start_button = st.button("开始生成视频", use_container_width=True, type="primary")
+if start_button:
+    task_id = str(uuid4())
+    st.session_state['started'] = True
+    if not cfg.video_subject:
+        st.error("视频主题不能为空")
+        st.stop()
+
+    st.write(cfg)
+
+    log_container = st.empty()
+
+    log_records = []
+
+
+    def log_received(msg):
+        with log_container:
+            log_records.append(msg)
+            st.code("\n".join(log_records))
+
+
+    logger.add(log_received)
+
+    logger.info("开始生成视频")
+
+    tm.start(task_id=task_id, params=cfg)