fix: Don't load DeepSpeed if use_deepspeed is False

- A recent change made DeepSpeed optional (off by default), but the code was still trying to load DeepSpeed even when `use_deepspeed = False`. This means users would still have a big startup slowdown and a lot of error messages if their DeepSpeed module isn't working (usually because it's not able to compile itself on their machines). - We now only load DeepSpeed if the user requested it. - Translated the DeepSpeed error message to English, since all other errors in the same function were already English.
2025-11-25 19:37:47 +08:00 · 2025-09-09 18:20:28 +02:00
parent 7aca90ba6c
commit 05a8ae45e5
2 changed files with 8 additions and 8 deletions
--- a/indextts/infer_v2.py
+++ b/indextts/infer_v2.py
@@ -47,7 +47,7 @@ class IndexTTS2:
            use_fp16 (bool): whether to use fp16.
            device (str): device to use (e.g., 'cuda:0', 'cpu'). If None, it will be set automatically based on the availability of CUDA or MPS.
            use_cuda_kernel (None | bool): whether to use BigVGan custom fused activation CUDA kernel, only for CUDA device.
-            use_deepspeed (bool): whether to use deepspeed or not.
+            use_deepspeed (bool): whether to use DeepSpeed or not.
        """
        if device is not None:
            self.device = device
@@ -88,12 +88,12 @@ class IndexTTS2:
            self.gpt.eval()
        print(">> GPT weights restored from:", self.gpt_path)

-        try:
-            import deepspeed
-        except (ImportError, OSError, CalledProcessError) as e:
-            if use_deepspeed:
-                print(f">> DeepSpeed加载失败，回退到标准推理: {e}")
-            use_deepspeed = False
+        if use_deepspeed:
+            try:
+                import deepspeed
+            except (ImportError, OSError, CalledProcessError) as e:
+                use_deepspeed = False
+                print(f">> Failed to load DeepSpeed. Falling back to normal inference. Error: {e}")

        self.gpt.post_init_gpt2_config(use_deepspeed=use_deepspeed, kv_cache=True, half=self.use_fp16)

--- a/webui.py
+++ b/webui.py
@@ -25,7 +25,7 @@ parser.add_argument("--port", type=int, default=7860, help="Port to run the web
 parser.add_argument("--host", type=str, default="0.0.0.0", help="Host to run the web UI on")
 parser.add_argument("--model_dir", type=str, default="./checkpoints", help="Model checkpoints directory")
 parser.add_argument("--fp16", action="store_true", default=False, help="Use FP16 for inference if available")
-parser.add_argument("--use_deepspeed", action="store_true", default=False, help="Use Deepspeed to accelerate if available")
+parser.add_argument("--use_deepspeed", action="store_true", default=False, help="Use DeepSpeed to accelerate if available")
 parser.add_argument("--cuda_kernel", action="store_true", default=False, help="Use CUDA kernel for inference if available")
 parser.add_argument("--gui_seg_tokens", type=int, default=120, help="GUI: Max tokens per generation segment")
 cmd_args = parser.parse_args()