feat: whisper language setting (#134)

* feat: whisper language setting

* fix: set auto as default language setting
This commit is contained in:
Xinrea
2025-07-15 22:38:31 +08:00
committed by GitHub
parent ca4e266ae6
commit 53897c66ee
10 changed files with 101 additions and 9 deletions

View File

@@ -33,6 +33,8 @@ pub struct Config {
pub status_check_interval: u64,
#[serde(skip)]
pub config_path: String,
#[serde(default = "default_whisper_language")]
pub whisper_language: String,
}
#[derive(Deserialize, Serialize, Clone)]
@@ -80,6 +82,10 @@ fn default_status_check_interval() -> u64 {
30
}
fn default_whisper_language() -> String {
"auto".to_string()
}
impl Config {
pub fn load(
config_path: &PathBuf,
@@ -116,6 +122,7 @@ impl Config {
auto_generate: default_auto_generate_config(),
status_check_interval: default_status_check_interval(),
config_path: config_path.to_str().unwrap().into(),
whisper_language: default_whisper_language(),
};
config.save();
@@ -142,6 +149,12 @@ impl Config {
self.save();
}
#[allow(dead_code)]
pub fn set_whisper_language(&mut self, language: &str) {
self.whisper_language = language.to_string();
self.save();
}
pub fn generate_clip_name(&self, params: &ClipRangeParams) -> PathBuf {
let platform = PlatformType::from_str(&params.platform).unwrap();

View File

@@ -234,3 +234,14 @@ pub async fn update_status_check_interval(
state.config.write().await.save();
Ok(())
}
#[cfg_attr(feature = "gui", tauri::command)]
pub async fn update_whisper_language(
state: state_type!(),
whisper_language: String,
) -> Result<(), ()> {
log::info!("Updating whisper language to {}", whisper_language);
state.config.write().await.whisper_language = whisper_language;
state.config.write().await.save();
Ok(())
}

View File

@@ -400,7 +400,9 @@ pub async fn generate_video_subtitle(
};
state.db.add_task(&task).await?;
log::info!("Create task: {:?}", task);
match generate_video_subtitle_inner(&state, &reporter, id).await {
let language_hint = state.config.read().await.whisper_language.clone();
let language_hint = language_hint.as_str();
match generate_video_subtitle_inner(&state, &reporter, id, language_hint).await {
Ok(result) => {
reporter.finish(true, "字幕生成完成").await;
// for local whisper, we need to update the task status to success
@@ -445,6 +447,7 @@ async fn generate_video_subtitle_inner(
state: &state_type!(),
reporter: &ProgressReporter,
id: i64,
language_hint: &str,
) -> Result<GenerateResult, String> {
let video = state.db.get_video(id).await?;
let filepath = Path::new(state.config.read().await.output.as_str()).join(&video.file);
@@ -484,7 +487,9 @@ async fn generate_video_subtitle_inner(
let mut results = Vec::new();
for path in chunk_paths {
let result = generator.generate_subtitle(reporter, &path).await;
let result = generator
.generate_subtitle(reporter, &path, language_hint)
.await;
results.push(result);
}
@@ -548,7 +553,9 @@ async fn generate_video_subtitle_inner(
let mut results = Vec::new();
for path in chunk_paths {
let result = generator.generate_subtitle(reporter, &path).await;
let result = generator
.generate_subtitle(reporter, &path, language_hint)
.await;
results.push(result);
}

View File

@@ -17,8 +17,8 @@ use crate::{
config::{
get_config, update_auto_generate, update_clip_name_format, update_notify,
update_openai_api_endpoint, update_openai_api_key, update_status_check_interval,
update_subtitle_generator_type, update_subtitle_setting, update_whisper_model,
update_whisper_prompt,
update_subtitle_generator_type, update_subtitle_setting, update_whisper_language,
update_whisper_model, update_whisper_prompt,
},
message::{delete_message, get_messages, read_message},
recorder::{
@@ -251,6 +251,22 @@ async fn handler_update_whisper_model(
Ok(Json(ApiResponse::success(())))
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
struct UpdateWhisperLanguageRequest {
whisper_language: Option<String>,
}
async fn handler_update_whisper_language(
state: axum::extract::State<State>,
Json(whisper_language): Json<UpdateWhisperLanguageRequest>,
) -> Result<Json<ApiResponse<()>>, ApiError> {
update_whisper_language(state.0, whisper_language.whisper_language)
.await
.expect("Failed to update whisper language");
Ok(Json(ApiResponse::success(())))
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
struct UpdateSubtitleSettingRequest {
@@ -1227,6 +1243,10 @@ pub async fn start_api_server(state: State) {
.route(
"/api/update_auto_generate",
post(handler_update_auto_generate),
)
.route(
"/api/update_whisper_language",
post(handler_update_whisper_language),
);
} else {
log::info!("Running in readonly mode, some api routes are disabled");

View File

@@ -408,6 +408,7 @@ fn setup_invoke_handlers(builder: tauri::Builder<tauri::Wry>) -> tauri::Builder<
crate::handlers::config::update_openai_api_endpoint,
crate::handlers::config::update_auto_generate,
crate::handlers::config::update_status_check_interval,
crate::handlers::config::update_whisper_language,
crate::handlers::message::get_messages,
crate::handlers::message::read_message,
crate::handlers::message::delete_message,

View File

@@ -108,5 +108,6 @@ pub trait SubtitleGenerator {
&self,
reporter: &impl ProgressReporterTrait,
audio_path: &Path,
language_hint: &str,
) -> Result<GenerateResult, String>;
}

View File

@@ -38,6 +38,7 @@ impl SubtitleGenerator for WhisperCPP {
&self,
reporter: &impl ProgressReporterTrait,
audio_path: &Path,
language_hint: &str,
) -> Result<GenerateResult, String> {
log::info!("Generating subtitle for {:?}", audio_path);
let start_time = std::time::Instant::now();
@@ -53,8 +54,8 @@ impl SubtitleGenerator for WhisperCPP {
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
// and set the language to translate to to auto
params.set_language(None);
// and set the language
params.set_language(Some(language_hint));
params.set_initial_prompt(self.prompt.as_str());
// we also explicitly disable anything that prints to stdout
@@ -179,7 +180,9 @@ mod tests {
.unwrap();
let audio_path = Path::new("tests/audio/test.wav");
let reporter = MockReporter::new();
let result = whisper.generate_subtitle(&reporter, audio_path).await;
let result = whisper
.generate_subtitle(&reporter, audio_path, "auto")
.await;
if let Err(e) = result {
println!("Error: {}", e);
panic!("Failed to generate subtitle");

View File

@@ -56,6 +56,7 @@ impl SubtitleGenerator for WhisperOnline {
&self,
reporter: &impl ProgressReporterTrait,
audio_path: &Path,
language_hint: &str,
) -> Result<GenerateResult, String> {
log::info!("Generating subtitle online for {:?}", audio_path);
let start_time = std::time::Instant::now();
@@ -98,6 +99,8 @@ impl SubtitleGenerator for WhisperOnline {
.text("response_format", "verbose_json")
.text("temperature", "0.0");
form = form.text("language", language_hint.to_string());
if let Some(prompt) = self.prompt.clone() {
form = form.text("prompt", prompt);
}
@@ -226,7 +229,11 @@ mod tests {
assert!(result.is_ok());
let result = result.unwrap();
let result = result
.generate_subtitle(&MockReporter::new(), Path::new("tests/audio/test.wav"))
.generate_subtitle(
&MockReporter::new(),
Path::new("tests/audio/test.wav"),
"auto",
)
.await;
println!("{:?}", result);
assert!(result.is_ok());

View File

@@ -105,6 +105,7 @@ export interface Config {
clip_name_format: string;
auto_generate: AutoGenerateConfig;
status_check_interval: number;
whisper_language: string;
}
export interface AutoGenerateConfig {

View File

@@ -36,6 +36,7 @@
encode_danmu: false,
},
status_check_interval: 30, // 默认30秒
whisper_language: "",
};
let showModal = false;
@@ -575,6 +576,33 @@
</div>
</div>
{/if}
<!-- Whisper Language -->
<div class="p-4">
<div class="flex items-center justify-between">
<div>
<h3
class="text-sm font-medium text-gray-900 dark:text-white"
>
Whisper 语言
</h3>
<p class="text-sm text-gray-500 dark:text-gray-400">
(测试)生成字幕时使用的语言,默认自动识别
</p>
</div>
<div class="flex items-center space-x-2">
<input
type="text"
class="px-3 py-2 bg-gray-100 dark:bg-gray-700 rounded-lg border border-gray-200 dark:border-gray-600 text-gray-900 dark:text-white w-96"
bind:value={setting_model.whisper_language}
on:change={async () => {
await invoke("update_whisper_language", {
whisperLanguage: setting_model.whisper_language,
});
}}
/>
</div>
</div>
</div>
<div class="p-4">
<div class="flex items-center justify-between">
<div>