mirror of
https://github.com/Xinrea/bili-shadowreplay.git
synced 2025-11-25 04:22:24 +08:00
feat: general subtitle generator interface
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -22,3 +22,7 @@ dist-ssr
|
||||
*.njsproj
|
||||
*.sln
|
||||
*.sw?
|
||||
|
||||
# test files
|
||||
src-tauri/tests/audio/*.srt
|
||||
src-tauri/tests/model/*.bin
|
||||
117
src-tauri/Cargo.lock
generated
117
src-tauri/Cargo.lock
generated
@@ -415,6 +415,7 @@ dependencies = [
|
||||
"felgens",
|
||||
"ffmpeg-sidecar",
|
||||
"futures",
|
||||
"hound",
|
||||
"hyper 0.14.25",
|
||||
"log",
|
||||
"m3u8-rs",
|
||||
@@ -446,6 +447,27 @@ dependencies = [
|
||||
"tokio",
|
||||
"toml 0.7.3",
|
||||
"urlencoding",
|
||||
"whisper-rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.71.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools",
|
||||
"log",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"syn 2.0.87",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -652,6 +674,15 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
|
||||
|
||||
[[package]]
|
||||
name = "cexpr"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
||||
dependencies = [
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfb"
|
||||
version = "0.7.3"
|
||||
@@ -700,6 +731,26 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
||||
dependencies = [
|
||||
"glob",
|
||||
"libc",
|
||||
"libloading 0.8.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cmake"
|
||||
version = "0.1.54"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cocoa"
|
||||
version = "0.26.0"
|
||||
@@ -1205,7 +1256,7 @@ version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "330c60081dcc4c72131f8eb70510f1ac07223e5d4163db481a04a0befcffa412"
|
||||
dependencies = [
|
||||
"libloading",
|
||||
"libloading 0.8.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1550,6 +1601,12 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fs_extra"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.5"
|
||||
@@ -2135,6 +2192,12 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hound"
|
||||
version = "3.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f"
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.26.0"
|
||||
@@ -2470,6 +2533,15 @@ dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.8"
|
||||
@@ -2621,7 +2693,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e9ec52138abedcc58dc17a7c6c0c00a2bdb4f3427c7f63fa97fd0d859155caf"
|
||||
dependencies = [
|
||||
"gtk-sys",
|
||||
"libloading",
|
||||
"libloading 0.7.4",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
@@ -2641,6 +2713,16 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
version = "0.2.8"
|
||||
@@ -3711,6 +3793,16 @@ version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||
|
||||
[[package]]
|
||||
name = "prettyplease"
|
||||
version = "0.2.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"syn 2.0.87",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-crate"
|
||||
version = "1.3.1"
|
||||
@@ -6441,6 +6533,27 @@ dependencies = [
|
||||
"windows-core 0.58.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "whisper-rs"
|
||||
version = "0.14.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b6dca51a101d32fa551d66d34fef899a39d5c8b68b6ea5adf4080b9ec37bb58"
|
||||
dependencies = [
|
||||
"whisper-rs-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "whisper-rs-sys"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7c6b8157262ff9e4239549db921ed40ba758e03f565893d4e700380286c643b"
|
||||
dependencies = [
|
||||
"bindgen",
|
||||
"cfg-if",
|
||||
"cmake",
|
||||
"fs_extra",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "whoami"
|
||||
version = "1.5.2"
|
||||
|
||||
@@ -28,7 +28,7 @@ toml = "0.7.3"
|
||||
custom_error = "1.9.2"
|
||||
felgens = { git = "https://github.com/Xinrea/felgens.git", tag = "v0.4.1" }
|
||||
regex = "1.7.3"
|
||||
tokio = "1.27.0"
|
||||
tokio = { version = "1.27.0", features = ["process"] }
|
||||
platform-dirs = "0.3.0"
|
||||
pct-str = "1.2.0"
|
||||
md5 = "0.7.0"
|
||||
@@ -51,6 +51,8 @@ rand = "0.8.5"
|
||||
base64 = "0.21"
|
||||
mime_guess = "2.0"
|
||||
async-trait = "0.1.87"
|
||||
whisper-rs = "0.14.2"
|
||||
hound = "3.5.1"
|
||||
|
||||
[features]
|
||||
# this feature is used for production builds or when `devPath` points to the filesystem
|
||||
@@ -59,3 +61,11 @@ custom-protocol = ["tauri/custom-protocol"]
|
||||
|
||||
[target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies]
|
||||
tauri-plugin-single-instance = "2"
|
||||
|
||||
[target.'cfg(windows)'.dependencies.whisper-rs]
|
||||
version = "0.14.2"
|
||||
features = ["cuda"]
|
||||
|
||||
[target.'cfg(darwin)'.dependencies.whisper-rs]
|
||||
version = "0.14.2"
|
||||
features = ["metal"]
|
||||
|
||||
@@ -9,6 +9,7 @@ mod progress_event;
|
||||
mod recorder;
|
||||
mod recorder_manager;
|
||||
mod state;
|
||||
mod subtitle_generator;
|
||||
mod tray;
|
||||
|
||||
use config::Config;
|
||||
|
||||
29
src-tauri/src/subtitle_generator.rs
Normal file
29
src-tauri/src/subtitle_generator.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
use async_std::path::{Path, PathBuf};
|
||||
use async_trait::async_trait;
|
||||
|
||||
pub mod whisper;
|
||||
|
||||
// subtitle_generator types
|
||||
pub enum SubtitleGeneratorType {
|
||||
Whisper,
|
||||
}
|
||||
|
||||
impl SubtitleGeneratorType {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
SubtitleGeneratorType::Whisper => "whisper",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_str(s: &str) -> Option<Self> {
|
||||
match s {
|
||||
"whisper" => Some(SubtitleGeneratorType::Whisper),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait SubtitleGenerator {
|
||||
async fn generate_subtitle(&self, video_path: &Path, output_path: &Path) -> Result<(), String>;
|
||||
}
|
||||
134
src-tauri/src/subtitle_generator/whisper.rs
Normal file
134
src-tauri/src/subtitle_generator/whisper.rs
Normal file
@@ -0,0 +1,134 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use async_std::path::{Path, PathBuf};
|
||||
use async_std::sync::{Arc, RwLock};
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
|
||||
|
||||
use super::SubtitleGenerator;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct WhisperCPP {
|
||||
ctx: Arc<RwLock<WhisperContext>>,
|
||||
model_path: Arc<RwLock<PathBuf>>,
|
||||
}
|
||||
|
||||
pub async fn new(model: &Path) -> Result<WhisperCPP, String> {
|
||||
let ctx = WhisperContext::new_with_params(
|
||||
model.to_str().unwrap(),
|
||||
WhisperContextParameters::default(),
|
||||
)
|
||||
.expect("failed to load model");
|
||||
|
||||
Ok(WhisperCPP {
|
||||
ctx: Arc::new(RwLock::new(ctx)),
|
||||
model_path: Arc::new(RwLock::new(model.to_path_buf())),
|
||||
})
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SubtitleGenerator for WhisperCPP {
|
||||
async fn generate_subtitle(&self, audio_path: &Path, output_path: &Path) -> Result<(), String> {
|
||||
let samples: Vec<i16> = hound::WavReader::open(audio_path)
|
||||
.unwrap()
|
||||
.into_samples::<i16>()
|
||||
.map(|x| x.unwrap())
|
||||
.collect();
|
||||
|
||||
let mut state = self
|
||||
.ctx
|
||||
.read()
|
||||
.await
|
||||
.create_state()
|
||||
.expect("failed to create state");
|
||||
|
||||
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
|
||||
|
||||
// and set the language to translate to to auto
|
||||
params.set_language(None);
|
||||
|
||||
// we also explicitly disable anything that prints to stdout
|
||||
params.set_print_special(false);
|
||||
params.set_print_progress(false);
|
||||
params.set_print_realtime(false);
|
||||
params.set_print_timestamps(false);
|
||||
params.set_token_timestamps(true);
|
||||
|
||||
let mut inter_samples = vec![Default::default(); samples.len()];
|
||||
|
||||
whisper_rs::convert_integer_to_float_audio(&samples, &mut inter_samples)
|
||||
.expect("failed to convert audio data");
|
||||
let samples = whisper_rs::convert_stereo_to_mono_audio(&inter_samples)
|
||||
.expect("failed to convert audio data");
|
||||
|
||||
state
|
||||
.full(params, &samples[..])
|
||||
.expect("failed to run model");
|
||||
|
||||
// open the output file
|
||||
let mut output_file = tokio::fs::File::create(output_path)
|
||||
.await
|
||||
.expect("failed to create output file");
|
||||
// fetch the results
|
||||
let num_segments = state
|
||||
.full_n_segments()
|
||||
.expect("failed to get number of segments");
|
||||
for i in 0..num_segments {
|
||||
let segment = state
|
||||
.full_get_segment_text(i)
|
||||
.expect("failed to get segment");
|
||||
let start_timestamp = state
|
||||
.full_get_segment_t0(i)
|
||||
.expect("failed to get segment start timestamp");
|
||||
let end_timestamp = state
|
||||
.full_get_segment_t1(i)
|
||||
.expect("failed to get segment end timestamp");
|
||||
|
||||
let format_time = |timestamp: f64| {
|
||||
let hours = (timestamp / 3600.0).floor();
|
||||
let minutes = ((timestamp - hours * 3600.0) / 60.0).floor();
|
||||
let seconds = timestamp - hours * 3600.0 - minutes * 60.0;
|
||||
format!("{:02}:{:02}:{:06.3}", hours, minutes, seconds)
|
||||
};
|
||||
|
||||
let line = format!(
|
||||
"{}\n{} --> {}\n{}\n\n",
|
||||
i + 1,
|
||||
format_time(start_timestamp as f64 / 100.0),
|
||||
format_time(end_timestamp as f64 / 100.0),
|
||||
segment,
|
||||
);
|
||||
|
||||
output_file
|
||||
.write_all(line.as_bytes())
|
||||
.await
|
||||
.expect("failed to write to output file");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
#[ignore = "need whisper-cli"]
|
||||
async fn create_whisper_cpp() {
|
||||
let result = new(Path::new("tests/model/ggml-model-whisper-tiny.bin")).await;
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[ignore = "need large model"]
|
||||
async fn process_by_whisper_cpp() {
|
||||
let whisper = new(Path::new("tests/model/ggml-model-whisper-large-q5_0.bin"))
|
||||
.await
|
||||
.unwrap();
|
||||
let audio_path = Path::new("tests/audio/test.wav");
|
||||
let output_path = Path::new("tests/audio/test.srt");
|
||||
let result = whisper.generate_subtitle(audio_path, output_path).await;
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
BIN
src-tauri/tests/audio/test.wav
Normal file
BIN
src-tauri/tests/audio/test.wav
Normal file
Binary file not shown.
0
src-tauri/tests/model/.gitkeep
Normal file
0
src-tauri/tests/model/.gitkeep
Normal file
Reference in New Issue
Block a user