mirror of
https://github.com/index-tts/index-tts.git
synced 2025-11-26 03:44:54 +08:00
feat: Add reusable Emotion Vector normalization helper
- The WebUI was secretly squashing all emotion vectors and re-scaling them. It's a good idea for user friendliness, but it makes it harder to learn what values will work in Python when using the WebUI for testing. - Instead, let's move the normalization code into IndexTTS2 as a helper function which is used by Gradio and can be used from other people's code too. - The emotion bias (which reduces the influence of certain emotions) has also been converted into an optional feature, which can be turned off if such biasing isn't wanted. And all biasing values have been re-scaled to use 1.0 as the reference, to avoid scaling relative to 0.8 (which previously meant that it applied double scaling).
This commit is contained in:
@@ -305,6 +305,22 @@ class IndexTTS2:
|
||||
print(f"Audio too long ({audio.shape[1]} samples), truncating to {max_audio_samples} samples")
|
||||
audio = audio[:, :max_audio_samples]
|
||||
return audio, sr
|
||||
|
||||
def normalize_emo_vec(self, emo_vector, apply_bias=True):
|
||||
# apply biased emotion factors for better user experience,
|
||||
# by de-emphasizing emotions that can cause strange results
|
||||
if apply_bias:
|
||||
# [happy, angry, sad, afraid, disgusted, melancholic, surprised, calm]
|
||||
emo_bias = [0.9375, 0.875, 1.0, 1.0, 0.9375, 0.9375, 0.6875, 0.5625]
|
||||
emo_vector = [vec * bias for vec, bias in zip(emo_vector, emo_bias)]
|
||||
|
||||
# the total emotion sum must be 0.8 or less
|
||||
emo_sum = sum(emo_vector)
|
||||
if emo_sum > 0.8:
|
||||
scale_factor = 0.8 / emo_sum
|
||||
emo_vector = [vec * scale_factor for vec in emo_vector]
|
||||
|
||||
return emo_vector
|
||||
|
||||
# 原始推理模式
|
||||
def infer(self, spk_audio_prompt, text, output_path,
|
||||
|
||||
12
webui.py
12
webui.py
@@ -6,8 +6,6 @@ import time
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
warnings.filterwarnings("ignore", category=FutureWarning)
|
||||
warnings.filterwarnings("ignore", category=UserWarning)
|
||||
|
||||
@@ -104,14 +102,6 @@ with open("examples/cases.jsonl", "r", encoding="utf-8") as f:
|
||||
example.get("emo_text") is not None]
|
||||
)
|
||||
|
||||
def normalize_emo_vec(emo_vec):
|
||||
# emotion factors for better user experience
|
||||
k_vec = [0.75,0.70,0.80,0.80,0.75,0.75,0.55,0.45]
|
||||
tmp = np.array(k_vec) * np.array(emo_vec)
|
||||
if np.sum(tmp) > 0.8:
|
||||
tmp = tmp * 0.8/ np.sum(tmp)
|
||||
return tmp.tolist()
|
||||
|
||||
def gen_single(emo_control_method,prompt, text,
|
||||
emo_ref_path, emo_weight,
|
||||
vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8,
|
||||
@@ -145,7 +135,7 @@ def gen_single(emo_control_method,prompt, text,
|
||||
pass
|
||||
if emo_control_method == 2: # emotion from custom vectors
|
||||
vec = [vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8]
|
||||
vec = normalize_emo_vec(vec)
|
||||
vec = tts.normalize_emo_vec(vec, apply_bias=True)
|
||||
else:
|
||||
# don't use the emotion vector inputs for the other modes
|
||||
vec = None
|
||||
|
||||
Reference in New Issue
Block a user