From 3fe9b00de74b56fdacfa12a739e91be881de5ab2 Mon Sep 17 00:00:00 2001
From: theliu <admin@083011.xyz>
Date: Sat, 25 Apr 2026 14:10:09 +0800
Subject: [PATCH] Initial commit: V1

---
 .gitignore        |  10 ++-
 README.md         | 182 +++++++++++++++++++++++-----------------------
 config.py         |  93 +++++++++++++----------
 crash.log         |  34 +++++++++
 gui.py            |  24 +++---
 image_gen.py      | 147 ++++++++++++++++++-------------------
 run.bat           |  14 +---
 scene_generate.py |   2 +-
 text_ai.py        |  78 +++++++++-----------
 9 files changed, 305 insertions(+), 279 deletions(-)
 create mode 100644 crash.log

diff --git a/.gitignore b/.gitignore
index 73b62c7..2df8315 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,13 +12,15 @@ models/
 # Workspace data (user-generated)
 workspace/
 
+# Virtual env
+venv/
+.venv/
+
 # Backup
 _backup/
 
-# Environment
-.env
-venv/
-.venv/
+# Backup
+_backup/
 
 # IDE
 .vscode/
diff --git a/README.md b/README.md
index 87cb70a..f966693 100644
--- a/README.md
+++ b/README.md
@@ -1,132 +1,132 @@
-# Videoer
+# VidMarmot
 
-**AI-powered video generation pipeline** — 从文章到视频的一站式工具。
+> **VidMarmot** — 为英语课本音频配画的 AI 工具。
 
-给定一篇英文文章（文本）和对应的朗读音频，自动完成：
+给一篇课文文本 + 对应的朗读音频，VidMarmot 会自动拆分场景、生成配图、对齐语音时间轴，最终合成一个带字幕的视频。
+
+## 为什么做这个？
+
+老师总让我帮忙做课文视频。一次两次还好，做多了真的烦。
+
+所以我就写了这个工具——把整个流程自动化了：放进去文本和音频，点几下按钮，视频就出来了。
+
+## 主要用途
+
+- **英语课本课文** — 给每篇课文的朗读音频配上场景画面
+- **故事类文章** — 自动拆分场景，逐张生成配图
+- **教学演示** — 生成带字幕的场景切换视频
 
 ```
-文章文本 + 朗读音频 → AI 场景划分 → 逐场景生成配图 → ASR 时间对齐 → 合成视频（含字幕）
+课文文本 + 朗读音频 → AI 拆分场景 → 逐场景生成配图 → 语音对齐时间轴 → 合成视频（含字幕）
 ```
 
-## Preview
+## 功能
+
+- **AI 场景划分** — 支持 Qwen / GLM / DeepSeek / 阿里云百炼 / OpenAI 兼容接口
+- **AI 文生图** — 支持 Kolors / Qwen-Image 模型，逐张生成场景配图
+- **逐张审查** — 每张图生成后可以预览、确认、重新生成或跳过
+- **语音对齐** — 基于 Qwen3-ForcedAligner 的 ASR 强制对齐
+- **视频合成** — MoviePy 合成最终视频，自动添加字幕
+
+## 预览
 
 ![Pipeline Overview](docs/pipeline.png)
 
-## Features
+## 快速开始
 
-- **AI Scene Planning** — 基于 LLM（Qwen / GLM）智能划分场景，提取角色、画面描述
-- **AI Image Generation** — 支持 Kolors / Qwen-Image 文生图模型，逐张生成场景配图
-- **Interactive Review** — 逐张审查、确认/重新生成场景图
-- **Forced Alignment** — 基于 Qwen3-ForcedAligner 的语音-文本时间对齐
-- **Video Synthesis** — MoviePy 合成最终视频，自动添加字幕
-
-## Architecture
-
-```
-release1/
-├── gui.py            # PyQt6 GUI (main entry)
-├── scene_plan.py     # LLM scene planning + prompt engineering
-├── image_gen.py      # Text-to-image API calls
-├── asr.py            # ASR forced alignment
-├── make_video.py     # Video synthesis + subtitle rendering
-├── text_ai.py        # Shared LLM API client
-├── config.py         # Model paths, API keys, defaults
-├── run.bat           # Windows launcher
-└── qwen_download.py  # One-time model download script
-```
-
-## Workflow
-
-```
-1. Select workspace (folder with article.txt + voice.mp3)
-2. AI Scene Planning   → scene_plan.json
-3. Image Generation    → scene_01.png, scene_02.png, ...
-4. ASR Alignment       → result.json + timestamps into scene_plan
-5. Video Synthesis     → output_video.mp4
-```
-
-## Quick Start
-
-### Prerequisites
+### 环境要求
 
 - Python 3.12+
-- Conda (recommended)
-- NVIDIA GPU (for local ASR model)
+- Conda
+- NVIDIA GPU（本地 ASR 模型需要）
 
-### Setup
+### 安装
 
 ```bash
-# Create conda environment
-conda create -n Videoer python=3.12 -y
-conda activate Videoer
+# 创建环境
+conda create -n VidMarmot python=3.12 -y
+conda activate VidMarmot
 
-# Install dependencies
+# 安装依赖
 pip install PyQt6 moviepy Pillow requests openai
 pip install funasr modelscope torch torchaudio
 
-# Download ASR model
+# 下载 ASR 模型（约 1.2GB）
 python qwen_download.py
 ```
 
-### Configuration
+### 配置 API Key
 
-Edit `config.py` to set your API keys:
+编辑 `config.py`，在对应模型的 `api_key` 字段填入你的 Key。只需填你用到的服务即可。
 
-```python
-# LLM providers (scene planning)
-LLM_PROVIDERS = {
-    "Qwen3.5-35B (ModelScope)": {
-        "api_key": "YOUR_KEY",
-        ...
-    },
-    ...
-}
+| 服务 | 用途 | Key 对应 | 免费额度 |
+|------|------|----------|----------|
+| ModelScope | LLM + 文生图 | `MODELSCOPE_API_KEY` | 有 |
+| 硅基流动 | LLM + 文生图 | `SILICONFLOW_API_KEY` | 有 |
+| 阿里云百炼 | LLM (Qwen3-235B) | `DASHSCOPE_API_KEY` | 有 |
+| DeepSeek | LLM (V3/R1) | `DEEPSEEK_API_KEY` | 有 |
+| OpenAI 兼容 | 自定义 Router | `OPENAI_API_KEY` | - |
 
-# Image generation
-SILICONFLOW_API_KEY = "YOUR_KEY"
-MODELSCOPE_API_KEY = "YOUR_KEY"
-```
-
-> **Tip**: ModelScope and SiliconFlow both offer free-tier API keys.
-
-### Run
+### 运行
 
 ```bash
-# GUI mode (recommended)
 python gui.py
 
-# Or on Windows
+# 或 Windows 双击
 run.bat
 ```
 
-### Workspace Structure
+### 工作区结构
 
-Each video project lives in a workspace folder:
+每个视频项目是一个文件夹：
 
 ```
-workspace/my_project/
-├── article.txt          # Source article text
-├── voice.mp3            # Narration audio
-├── scene_plan.json      # Generated scene plan (auto)
-├── result.json          # ASR alignment result (auto)
-├── scene_01.png         # Generated images (auto)
-├── scene_02.png
-├── ...
-└── output_video.mp4     # Final output (auto)
+workspace/my_lesson/
+├── article.txt          # 课文文本
+├── voice.mp3            # 朗读音频
+├── scene_plan.json      # 场景计划（自动生成）
+├── result.json          # ASR 对齐结果（自动生成）
+├── scene/               # 生成的场景图
+│   ├── scene_001.png
+│   ├── scene_002.png
+│   └── ...
+└── output_video.mp4     # 最终视频（自动生成）
 ```
 
-## Dependencies
+## 项目结构
 
-| Package | Purpose |
-|---------|---------|
-| PyQt6 | GUI framework |
-| moviepy | Video composition |
-| Pillow | Image processing / subtitle rendering |
-| requests | HTTP API calls |
-| openai | Compatible LLM client (OpenAI API format) |
-| funasr | ASR forced alignment |
-| modelscope | Model loading |
-| torch / torchaudio | GPU inference backend |
+```
+├── gui.py              # PyQt6 GUI（主入口）
+├── scene_plan.py       # AI 场景划分 + Prompt 工程
+├── image_gen.py        # 文生图 API 调用
+├── asr.py              # ASR 强制对齐
+├── make_video.py       # 视频合成 + 字幕渲染
+├── text_ai.py          # LLM API 客户端
+├── config.py           # 配置管理（路径、API、模型）
+├── qwen_download.py    # ASR 模型下载脚本
+├── run.bat             # Windows 启动脚本
+└── .gitignore
+```
+
+## 依赖
+
+| 包 | 用途 |
+|----|------|
+| PyQt6 | GUI 框架 |
+| moviepy | 视频合成 |
+| Pillow | 图片处理 / 字幕渲染 |
+| requests | HTTP API 调用 |
+| openai | 兼容 OpenAI 格式的 LLM 客户端 |
+| funasr | ASR 强制对齐 |
+| modelscope | 模型加载 |
+| torch / torchaudio | GPU 推理后端 |
+
+## Roadmap
+
+- [ ] **图生视频** — 用生成的场景图做图生视频，让每张静态图变成动态片段，最终拼接成真正的动态视频
+- [ ] 更多文生图模型支持
+- [ ] 批量处理多个课文
+- [ ] 打包为可执行文件（pyinstaller）
 
 ## License
 
diff --git a/config.py b/config.py
index 37f381e..0085f43 100644
--- a/config.py
+++ b/config.py
@@ -1,81 +1,96 @@
 """
-release1 配置文件
-集中管理所有模型路径、API Key、默认参数
+config.py - All configuration for VidMarmot.
+
+API keys, model paths, default parameters — everything lives here.
+Edit this file directly to configure your setup.
 """
 
 import os
 
-# ========== 基础路径 ==========
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-VIDEO_PROJECT_DIR = os.path.dirname(BASE_DIR)  # 上级 video/ 目录
 
-# ========== ASR 模型（绝对路径指向 video/models/）==========
-ASR_MODEL_DIR = os.path.join(
-    r'C:\pythonproject\video', 'models', 'qwen', 'Qwen3-ForcedAligner-0.6B'
-).replace('\\', '/')
 
-# ========== LLM 提供商（划分场景/角色提取用）==========
+# ========== ASR Model ==========
+# Default: project_dir/models/qwen/Qwen3-ForcedAligner-0.6B
+# Override via env var VIDMARMOT_ASR_MODEL_DIR
+ASR_MODEL_DIR = os.path.join(BASE_DIR, "models", "qwen", "Qwen3-ForcedAligner-0.6B").replace("\\", "/")
+
+
+# ========== LLM Providers (scene planning / text generation) ==========
+# Fill in your API keys below. Only providers with keys will be usable.
 LLM_PROVIDERS = {
-    "Qwen3.5-35B (ModelScope 免费)": {
-        "api_key": "ms-38de567b-cf88-4523-bac2-ff63d8f1e0f6",
+    "Qwen3.5-35B (ModelScope)": {
+        "api_key": "",  # ← put your ModelScope API key here
         "api_base": "https://api-inference.modelscope.cn/v1/",
         "model": "Qwen/Qwen3.5-35B-A3B",
     },
-    "GLM-4-9B (硅基流动 免费)": {
-        "api_key": "sk-mjqgwknbttvqnrjjfnxemtjgdivogjaqsftbvoifwjvruwsq",
+    "GLM-4-9B (SiliconFlow)": {
+        "api_key": "",  # ← put your SiliconFlow API key here
         "api_base": "https://api.siliconflow.cn/v1/",
         "model": "THUDM/glm-4-9b-chat",
     },
-    "Qwen3-32B (硅基流动 付费)": {
-        "api_key": "sk-mjqgwknbttvqnrjjfnxemtjgdivogjaqsftbvoifwjvruwsq",
+    "Qwen3-32B (SiliconFlow)": {
+        "api_key": "",  # ← put your SiliconFlow API key here
         "api_base": "https://api.siliconflow.cn/v1/",
         "model": "Qwen/Qwen3-32B",
     },
-    "GLM-5 (ModelScope 免费)": {
-        "api_key": "ms-38de567b-cf88-4523-bac2-ff63d8f1e0f6",
+    "GLM-5 (ModelScope)": {
+        "api_key": "",  # ← put your ModelScope API key here
         "api_base": "https://api-inference.modelscope.cn/v1/",
         "model": "ZhipuAI/GLM-5",
     },
+    "Qwen3-235B-A22B (Aliyun)": {
+        "api_key": "",  # ← put your Aliyun DashScope API key here
+        "api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1/",
+        "model": "qwen3-235b-a22b",
+    },
+    "DeepSeek-V3": {
+        "api_key": "",  # ← put your DeepSeek API key here
+        "api_base": "https://api.deepseek.com/v1/",
+        "model": "deepseek-chat",
+    },
+    "DeepSeek-R1": {
+        "api_key": "",  # ← put your DeepSeek API key here
+        "api_base": "https://api.deepseek.com/v1/",
+        "model": "deepseek-reasoner",
+    },
+    "OpenAI (Custom Router)": {
+        "api_key": "",  # ← put your OpenAI-compatible API key here
+        "api_base": "https://api.openai.com/v1/",  # change if using a custom router
+        "model": "gpt-4o",
+    },
 }
 
-# 默认 LLM（兼容旧代码）
-DEFAULT_LLM = "Qwen3.5-35B (ModelScope 免费)"
-LLM_API_KEY = LLM_PROVIDERS[DEFAULT_LLM]["api_key"]
-LLM_API_BASE = LLM_PROVIDERS[DEFAULT_LLM]["api_base"]
-LLM_MODEL = LLM_PROVIDERS[DEFAULT_LLM]["model"]
 
-# ========== SiliconFlow API（Kolors 文生图）==========
-SILICONFLOW_API_KEY = "sk-mjqgwknbttvqnrjjfnxemtjgdivogjaqsftbvoifwjvruwsq"
-SILICONFLOW_API_BASE = "https://api.siliconflow.cn/v1/images/generations"
-
-# ========== ModelScope API（Qwen 文生图）==========
-MODELSCOPE_API_KEY = "ms-38de567b-cf88-4523-bac2-ff63d8f1e0f6"
-MODELSCOPE_API_BASE = "https://api-inference.modelscope.cn/v1/images/generations"
-MODELSCOPE_POLL_INTERVAL = 3  # 轮询间隔（秒）
-MODELSCOPE_MAX_WAIT = 180     # 最大等待时间（秒）
-
-# ========== 文生图模型 ==========
+# ========== Text-to-Image Models ==========
+# Fill in your API keys below.
 IMAGE_MODELS = {
-    "Kolors（便宜快速）": {
+    "Kolors (SiliconFlow)": {
         "provider": "siliconflow",
+        "api_key": "",  # ← put your SiliconFlow API key here
+        "api_base": "https://api.siliconflow.cn/v1/images/generations",
         "model": "Kwai-Kolors/Kolors",
         "default_size": "1280x720",
         "guidance_scale": 7.5,
     },
-    "Qwen-Image（高质量）": {
+    "Qwen-Image (ModelScope)": {
         "provider": "modelscope",
+        "api_key": "",  # ← put your ModelScope API key here
+        "api_base": "https://api-inference.modelscope.cn/v1/images/generations",
+        "poll_interval": 3,
+        "max_wait": 180,
         "model": "Qwen/Qwen-Image-2512",
         "default_size": "1280x720",
         "guidance_scale": 7.5,
     },
 }
 
-# 默认文生图模型
-DEFAULT_IMAGE_MODEL = "Kolors（便宜快速）"
+DEFAULT_IMAGE_MODEL = "Kolors (SiliconFlow)"
 
-# ========== 默认参数 ==========
+
+# ========== Defaults ==========
 DEFAULT_FPS = 24
 DEFAULT_VIDEO_SIZE = "1280x720"
 
-# ========== 通用 negative prompt ==========
+# Negative prompt for image generation
 NEGATIVE_PROMPT = "blurry, low quality, deformed, text, letters, words, subtitle, logo, watermark, caption, label, number"
diff --git a/crash.log b/crash.log
new file mode 100644
index 0000000..8dfd465
--- /dev/null
+++ b/crash.log
@@ -0,0 +1,34 @@
+
+============================================================
+2026-04-25T14:06:45.585985
+Worker.run() error:
+Traceback (most recent call last):
+  File "Z:\release1\git\V1\gui.py", line 459, in run
+    result = self.func(*self.args, **self.kwargs)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "Z:\release1\git\V1\gui.py", line 1139, in task
+    return _sp.main(workspace=workspace, provider=provider, user_note=user_note)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "Z:\release1\git\V1\scene_plan.py", line 535, in main
+    plan = plan_scenes(article_text, workspace=workspace, provider=provider, user_note=user_note)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "Z:\release1\git\V1\scene_plan.py", line 445, in plan_scenes
+    response = text_ai(prompt, PLANNER_SYSTEM, provider=provider)
+               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "Z:\release1\git\V1\text_ai.py", line 49, in text_ai
+    response = client.chat.completions.create(
+               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "C:\Users\Administrator\AppData\Roaming\Python\Python312\site-packages\openai\_utils\_utils.py", line 286, in wrapper
+    return func(*args, **kwargs)
+           ^^^^^^^^^^^^^^^^^^^^^
+  File "C:\Users\Administrator\AppData\Roaming\Python\Python312\site-packages\openai\resources\chat\completions\completions.py", line 1211, in create
+    return self._post(
+           ^^^^^^^^^^^
+  File "C:\Users\Administrator\AppData\Roaming\Python\Python312\site-packages\openai\_base_client.py", line 1297, in post
+    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
+                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "C:\Users\Administrator\AppData\Roaming\Python\Python312\site-packages\openai\_base_client.py", line 1070, in request
+    raise self._make_status_error_from_response(err.response) from None
+openai.AuthenticationError: Error code: 401 - {'error': {'message': 'Authentication failed, please make sure that a valid ModelScope token is supplied.', 'request_id': '56c98608-5ea6-402e-9843-4e497060518e'}}
+
+============================================================
diff --git a/gui.py b/gui.py
index 6aeec75..2db855b 100644
--- a/gui.py
+++ b/gui.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-gui.py - 视频制作流水线 GUI（release1）
+gui.py - VidMarmot GUI
 唯一入口，PyQt6 暗色主题
 
 流程：选工作区 → 划分场景 → 逐张生成+审查 → ASR → 合成视频
@@ -40,7 +40,8 @@ from PyQt6.QtCore import Qt, QThread, pyqtSignal, QMutex, QWaitCondition, QTimer
 from PyQt6.QtGui import QPixmap, QImage, QFont, QColor, QIcon
 
 
-from config import IMAGE_MODELS, DEFAULT_IMAGE_MODEL, DEFAULT_FPS, DEFAULT_VIDEO_SIZE, LLM_PROVIDERS, DEFAULT_LLM
+from config import (DEFAULT_FPS, DEFAULT_VIDEO_SIZE,
+                    LLM_PROVIDERS, IMAGE_MODELS, DEFAULT_IMAGE_MODEL)
 
 
 # ============================================================
@@ -635,7 +636,7 @@ class GenerationWorker(QThread):
 class VideoPipelineGUI(QMainWindow):
     def __init__(self):
         super().__init__()
-        self.setWindowTitle("视频制作流水线 - Release 1")
+        self.setWindowTitle("VidMarmot")
         self.setGeometry(80, 80, 1280, 820)
 
         # 状态
@@ -670,7 +671,7 @@ class VideoPipelineGUI(QMainWindow):
         main_layout.setSpacing(8)
 
         # --- 标题 ---
-        title = QLabel("视频制作流水线")
+        title = QLabel("VidMarmot")
         title.setObjectName("titleLabel")
         title.setAlignment(Qt.AlignmentFlag.AlignCenter)
         main_layout.addWidget(title)
@@ -690,22 +691,21 @@ class VideoPipelineGUI(QMainWindow):
 
         top_bar.addSpacing(20)
 
-        # LLM 模型选择（场景划分用）
+        # LLM model selector — show all providers, default to first
         top_bar.addWidget(QLabel("语言模型:"))
         self.llm_combo = QComboBox()
         self.llm_combo.addItems(LLM_PROVIDERS.keys())
-        idx = list(LLM_PROVIDERS.keys()).index(DEFAULT_LLM)
-        self.llm_combo.setCurrentIndex(idx)
         top_bar.addWidget(self.llm_combo)
 
         top_bar.addSpacing(20)
 
-        # 文生图模型选择
+        # Image model selector — show all models, default to first
         top_bar.addWidget(QLabel("文生图模型:"))
         self.model_combo = QComboBox()
         self.model_combo.addItems(IMAGE_MODELS.keys())
-        idx = list(IMAGE_MODELS.keys()).index(DEFAULT_IMAGE_MODEL)
-        self.model_combo.setCurrentIndex(idx)
+        default_img = DEFAULT_IMAGE_MODEL
+        if default_img and default_img in IMAGE_MODELS:
+            self.model_combo.setCurrentText(default_img)
         top_bar.addWidget(self.model_combo)
 
         top_bar.addSpacing(20)
@@ -884,8 +884,8 @@ class VideoPipelineGUI(QMainWindow):
 
         main_layout.addWidget(splitter, stretch=1)
 
-        # 初始日志
-        self.log("视频制作流水线 v1.0 已启动")
+        # Startup log
+        self.log("VidMarmot 已启动")
         self.log("请先选择一个工作区文件夹（包含 article.txt）")
 
     # ============================================================
diff --git a/image_gen.py b/image_gen.py
index 5acf5ba..889a76e 100644
--- a/image_gen.py
+++ b/image_gen.py
@@ -1,28 +1,24 @@
 """
-image_gen.py - 统一文生图接口
-支持两个模型：
-  - Kolors（便宜快速）→ SiliconFlow API（同步）
-  - Qwen-Image（高质量）→ ModelScope API（异步轮询）
+image_gen.py - Unified text-to-image interface.
+
+Providers:
+  - SiliconFlow (Kolors) — sync API
+  - ModelScope (Qwen-Image) — async polling API
 """
 
 import requests
 import os
 import time
 from datetime import datetime
-from config import (
-    SILICONFLOW_API_KEY,
-    SILICONFLOW_API_BASE,
-    MODELSCOPE_API_KEY,
-    MODELSCOPE_API_BASE,
-    MODELSCOPE_POLL_INTERVAL,
-    MODELSCOPE_MAX_WAIT,
-    IMAGE_MODELS,
-    NEGATIVE_PROMPT,
-)
+from config import IMAGE_MODELS, NEGATIVE_PROMPT
 
 
-def _generate_siliconflow(prompt, model_id, size, guidance, neg, save_dir, filename):
-    """SiliconFlow 同步 API（Kolors）"""
+def _generate_siliconflow(prompt, model_id, size, guidance, neg, save_dir, filename, api_key, api_base):
+    """SiliconFlow sync API"""
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
     payload = {
         "model": model_id,
         "prompt": prompt,
@@ -33,37 +29,32 @@ def _generate_siliconflow(prompt, model_id, size, guidance, neg, save_dir, filen
         "negative_prompt": neg,
     }
 
-    headers = {
-        "Authorization": f"Bearer {SILICONFLOW_API_KEY}",
-        "Content-Type": "application/json",
-    }
+    print(f"  [SiliconFlow] {prompt[:60]}{'...' if len(prompt) > 60 else ''}")
 
-    print(f"  [SiliconFlow] 提交: {prompt[:60]}{'...' if len(prompt) > 60 else ''}")
-
-    for attempt in range(6):  # 最多重试 5 次
-        resp = requests.post(SILICONFLOW_API_BASE, headers=headers, json=payload, timeout=120)
+    for attempt in range(6):
+        resp = requests.post(api_base, headers=headers, json=payload, timeout=120)
         print(f"    HTTP {resp.status_code}: {resp.text[:300]}")
 
         if resp.status_code == 429:
-            wait = 15 * (attempt + 1)  # 15s, 30s, 45s, 60s, 75s
-            print(f"    [!] 限频，等待 {wait}s 后重试 ({attempt+1}/5)...")
+            wait = 15 * (attempt + 1)
+            print(f"    [!] Rate limited, waiting {wait}s ({attempt+1}/5)...")
             time.sleep(wait)
             continue
 
         if resp.status_code != 200:
-            raise Exception(f"SiliconFlow 生成失败 ({resp.status_code}): {resp.text[:300]}")
+            raise Exception(f"SiliconFlow error ({resp.status_code}): {resp.text[:300]}")
         break
     else:
-        raise Exception("SiliconFlow 持续限频，已重试 5 次，请稍后再试或切换模型")
+        raise Exception("SiliconFlow rate limit, retried 5 times.")
 
     result = resp.json()
     images = result.get("images", [])
     if not images:
-        raise Exception(f"SiliconFlow 返回无图片: {result}")
+        raise Exception(f"SiliconFlow returned no images: {result}")
 
     img_url = images[0].get("url")
     if not img_url:
-        raise Exception(f"返回图片 URL 为空: {result}")
+        raise Exception(f"Empty image URL: {result}")
 
     img_data = requests.get(img_url, timeout=60).content
 
@@ -78,12 +69,12 @@ def _generate_siliconflow(prompt, model_id, size, guidance, neg, save_dir, filen
     return {"url": img_url, "filepath": filepath}
 
 
-def _generate_modelscope(prompt, model_id, size, guidance, neg, save_dir, filename):
-    """ModelScope 异步轮询 API（Qwen-Image）"""
+def _generate_modelscope(prompt, model_id, size, guidance, neg, save_dir, filename, api_key, api_base):
+    """ModelScope async polling API"""
     submit_headers = {
-        "Authorization": f"Bearer {MODELSCOPE_API_KEY}",
+        "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json",
-        "X-ModelScope-Async-Mode": "true"
+        "X-ModelScope-Async-Mode": "true",
     }
     payload = {
         "model": model_id,
@@ -94,31 +85,32 @@ def _generate_modelscope(prompt, model_id, size, guidance, neg, save_dir, filena
         "negative_prompt": neg,
     }
 
-    print(f"  [ModelScope] 提交: {prompt[:60]}{'...' if len(prompt) > 60 else ''}")
-    resp = requests.post(MODELSCOPE_API_BASE, headers=submit_headers, json=payload, timeout=60)
+    print(f"  [ModelScope] {prompt[:60]}{'...' if len(prompt) > 60 else ''}")
+    resp = requests.post(api_base, headers=submit_headers, json=payload, timeout=60)
     if resp.status_code != 200:
-        raise Exception(f"ModelScope 提交失败 ({resp.status_code}): {resp.text[:300]}")
+        raise Exception(f"ModelScope submit failed ({resp.status_code}): {resp.text[:300]}")
 
     result = resp.json()
     task_id = result.get("task_id")
     if not task_id:
-        raise Exception(f"未找到 task_id: {result}")
+        raise Exception(f"No task_id: {result}")
     print(f"    task_id: {task_id}")
 
-    # 轮询结果
     query_headers = {
-        "Authorization": f"Bearer {MODELSCOPE_API_KEY}",
-        "X-ModelScope-Task-Type": "image_generation"
+        "Authorization": f"Bearer {api_key}",
+        "X-ModelScope-Task-Type": "image_generation",
     }
     status_url = f"https://api-inference.modelscope.cn/v1/tasks/{task_id}"
+    poll_interval = IMAGE_MODELS["Qwen-Image (ModelScope)"].get("poll_interval", 3)
+    max_wait = IMAGE_MODELS["Qwen-Image (ModelScope)"].get("max_wait", 180)
     start = time.time()
 
     for attempt in range(100):
         if attempt > 0:
-            time.sleep(MODELSCOPE_POLL_INTERVAL)
+            time.sleep(poll_interval)
         elapsed = int(time.time() - start)
-        if elapsed > MODELSCOPE_MAX_WAIT:
-            raise Exception(f"ModelScope 超时（{MODELSCOPE_MAX_WAIT}s）")
+        if elapsed > max_wait:
+            raise Exception(f"ModelScope timeout ({max_wait}s)")
 
         qresp = requests.get(status_url, headers=query_headers, timeout=30)
         if qresp.status_code != 200:
@@ -130,11 +122,13 @@ def _generate_modelscope(prompt, model_id, size, guidance, neg, save_dir, filena
             print(f"    [{elapsed}s] {task_status}")
 
         if task_status == "SUCCEED":
-            output_images = (qresult.get("output_images")
-                             or qresult.get("outputs", {}).get("output_images")
-                             or [])
+            output_images = (
+                qresult.get("output_images")
+                or qresult.get("outputs", {}).get("output_images")
+                or []
+            )
             if not output_images:
-                raise Exception(f"SUCCEED 但无图片: {qresult}")
+                raise Exception(f"SUCCEED but no images: {qresult}")
             url = output_images[0]
             img_data = requests.get(url, timeout=180).content
 
@@ -149,34 +143,30 @@ def _generate_modelscope(prompt, model_id, size, guidance, neg, save_dir, filena
             return {"url": url, "filepath": filepath}
 
         elif task_status == "FAILED":
-            raise Exception(f"ModelScope 任务失败: {qresult.get('errors', qresult)}")
+            raise Exception(f"ModelScope task failed: {qresult.get('errors', qresult)}")
 
-    raise Exception(f"ModelScope 超时（{MODELSCOPE_MAX_WAIT}s）")
+    raise Exception(f"ModelScope timeout ({max_wait}s)")
 
 
 def image_generate(
     prompt: str,
     save_dir: str = "./generated_images",
     model_name: str = None,
-    n: int = 1,
-    seed: int = None,
-    num_inference_steps: int = 20,
-    guidance_scale: float = None,
-    negative_prompt: str = None,
     filename: str = None,
     image_size: str = None,
+    guidance_scale: float = None,
+    negative_prompt: str = None,
 ) -> dict:
-    """
-    统一文生图接口
+    """Unified text-to-image interface.
 
     Args:
-        prompt: 生成提示词
-        save_dir: 保存目录
-        model_name: 模型名称（IMAGE_MODELS 的 key），默认用 config 中的 DEFAULT_IMAGE_MODEL
-        image_size: 图片尺寸，默认 1280x720（16:9）
-
+        prompt: generation prompt
+        save_dir: output directory
+        model_name: model name (key in IMAGE_MODELS), None = default
+        filename: output filename, None = auto
+        image_size: image size, None = model default
     Returns:
-        dict: {"url": str, "filepath": str}
+        {"url": str, "filepath": str}
     """
     from config import DEFAULT_IMAGE_MODEL
 
@@ -185,31 +175,34 @@ def image_generate(
 
     model_config = IMAGE_MODELS.get(model_name)
     if not model_config:
-        raise ValueError(f"未知模型: {model_name}，可选: {list(IMAGE_MODELS.keys())}")
+        raise ValueError(f"Unknown model: {model_name}, available: {list(IMAGE_MODELS.keys())}")
+
+    api_key = model_config.get("api_key", "")
+    if not api_key:
+        raise ValueError(
+            f"API key not configured for '{model_name}'. "
+            f"Edit config.py and fill in the api_key field."
+        )
 
     model_id = model_config["model"]
     size = image_size or model_config["default_size"]
     guidance = guidance_scale if guidance_scale is not None else model_config["guidance_scale"]
     neg = negative_prompt or NEGATIVE_PROMPT
+    provider = model_config["provider"]
+    api_base = model_config.get("api_base", "")
 
     os.makedirs(save_dir, exist_ok=True)
 
-    provider = model_config["provider"]
     if provider == "siliconflow":
-        return _generate_siliconflow(prompt, model_id, size, guidance, neg, save_dir, filename)
+        return _generate_siliconflow(prompt, model_id, size, guidance, neg, save_dir, filename, api_key, api_base)
     elif provider == "modelscope":
-        return _generate_modelscope(prompt, model_id, size, guidance, neg, save_dir, filename)
+        return _generate_modelscope(prompt, model_id, size, guidance, neg, save_dir, filename, api_key, api_base)
     else:
-        raise ValueError(f"未知 provider: {provider}")
-
-
-def get_available_models() -> list[str]:
-    """返回可用的文生图模型名称列表"""
-    return list(IMAGE_MODELS.keys())
+        raise ValueError(f"Unknown provider: {provider}")
 
 
 if __name__ == "__main__":
-    for name in get_available_models():
-        print(f"\n测试模型: {name}")
-        result = image_generate("A cute cat sitting on a desk, 16:9 aspect ratio", model_name=name)
-        print(f"  路径: {result['filepath']}")
+    for name in list(IMAGE_MODELS.keys()):
+        print(f"\nTesting: {name}")
+        result = image_generate("A cute cat sitting on a desk, 16:9", model_name=name)
+        print(f"  Path: {result['filepath']}")
diff --git a/run.bat b/run.bat
index 446fe93..302d4a3 100644
--- a/run.bat
+++ b/run.bat
@@ -3,23 +3,15 @@ chcp 65001 >nul
 
 cd /d "%~dp0"
 
-:: 清除缓存
+:: clean cache
 del /s /q __pycache__\*.pyc 2>nul
 for /d %%d in (__pycache__) do rd /s /q "%%d" 2>nul
 
-echo 正在激活 Videoer 环境并启动 GUI...
+echo Starting VidMarmot...
 
-call C:\ProgramData\anaconda3\Scripts\activate.bat Videoer
-if errorlevel 1 (
-    echo [错误] 无法激活 Videoer 环境
-    pause
-    exit /b 1
-)
-
-cd /d "%~dp0"
 python "%~dp0gui.py"
 if errorlevel 1 (
     echo.
-    echo [错误] 启动失败
+    echo [ERROR] Startup failed. Make sure Python and dependencies are installed.
     pause
 )
diff --git a/scene_generate.py b/scene_generate.py
index 993b6f8..8177f8d 100644
--- a/scene_generate.py
+++ b/scene_generate.py
@@ -84,7 +84,7 @@ def generate_single_scene(
         return scene
 
 
-def main(workspace: str = None, model_name: str = "Kolors（便宜快速）"):
+def main(workspace: str = None, model_name: str = "Kolors (SiliconFlow)"):
     """主流程：生成所有 pending 场景"""
     global WORKSPACE, PLAN_PATH, SCENE_IMG_DIR
 
diff --git a/text_ai.py b/text_ai.py
index 4232450..1be73d2 100644
--- a/text_ai.py
+++ b/text_ai.py
@@ -1,43 +1,44 @@
 """
-text_ai.py - LLM 文本生成
-用于场景划分等 AI 推理任务
-支持多 LLM 提供商切换
+text_ai.py - LLM text generation client.
+
+Supports multiple providers defined in config.py.
 """
 
 from openai import OpenAI
-from config import LLM_PROVIDERS, DEFAULT_LLM, LLM_API_KEY, LLM_API_BASE, LLM_MODEL
+from config import LLM_PROVIDERS
 
 
 def text_ai(in_put: str, system_prompt: str = "You are a helpful assistant.",
             provider: str = None) -> str:
-    """
-    调用 LLM 生成文本
+    """Call LLM to generate text.
 
     Args:
-        in_put: 用户输入内容
-        system_prompt: 系统提示词
-        provider: LLM 提供商名称（对应 LLM_PROVIDERS 的 key），None 则用默认
-
+        in_put: user message
+        system_prompt: system prompt
+        provider: provider name (key in LLM_PROVIDERS), None = first in dict
     Returns:
-        AI 生成的文本
+        generated text
     """
     if provider and provider in LLM_PROVIDERS:
         cfg = LLM_PROVIDERS[provider]
-        api_key = cfg["api_key"]
-        api_base = cfg["api_base"]
-        model = cfg["model"]
     else:
-        api_key = LLM_API_KEY
-        api_base = LLM_API_BASE
-        model = LLM_MODEL
+        # Default to first provider in dict
+        cfg = next(iter(LLM_PROVIDERS.values()))
+        provider = next(iter(LLM_PROVIDERS))
 
-    client = OpenAI(
-        api_key=api_key,
-        base_url=api_base,
-    )
+    api_key = cfg["api_key"]
+    api_base = cfg["api_base"]
+    model = cfg["model"]
 
-    # ModelScope 的 Qwen3 系列和 GLM 系列默认开启 thinking，需要关掉
-    # 注意：MiniMax 系列不是 Qwen/GLM，不需要也不能传 enable_thinking
+    if not api_key:
+        raise ValueError(
+            f"API key not configured for '{provider}'. "
+            f"Edit config.py and fill in the api_key field."
+        )
+
+    client = OpenAI(api_key=api_key, base_url=api_base)
+
+    # ModelScope Qwen3/GLM default to thinking mode, disable it
     extra_body = {}
     is_modelscope = "modelscope" in api_base.lower()
     is_qwen = "qwen" in model.lower()
@@ -49,38 +50,31 @@ def text_ai(in_put: str, system_prompt: str = "You are a helpful assistant.",
         model=model,
         messages=[
             {"role": "system", "content": system_prompt},
-            {"role": "user", "content": in_put}
+            {"role": "user", "content": in_put},
         ],
         max_tokens=16384,
         stream=False,
-        extra_body=extra_body if extra_body else None,
+        extra_body=extra_body or None,
     )
 
-    # 防御：choices 为空或 None
     if not response.choices:
-        # 尝试从 response 对象提取有用信息
         resp_dict = response.model_dump() if hasattr(response, "model_dump") else {}
         error_msg = resp_dict.get("error", {})
-        if isinstance(error_msg, dict):
-            err_text = error_msg.get("message", str(error_msg))
-        else:
-            err_text = str(resp_dict)
+        err_text = error_msg.get("message", str(error_msg)) if isinstance(error_msg, dict) else str(resp_dict)
         raise ValueError(
-            f"模型 '{model}' 返回了空的 choices。\n"
-            f"响应内容: {err_text}\n"
-            f"可能是模型暂时不可用或请求被拒绝。"
+            f"Model '{model}' returned empty choices.\n"
+            f"Response: {err_text}\n"
+            f"Model may be unavailable or request was rejected."
         )
 
     msg = response.choices[0].message
     content = msg.content
 
-    # 检测输出是否被截断
-    finish = response.choices[0].finish_reason
-    if finish == "length":
-        print(f"[WARN] LLM output truncated (finish_reason=length), max_tokens may be too small")
+    if response.choices[0].finish_reason == "length":
+        print(f"[WARN] LLM output truncated (finish_reason=length)")
 
     if content is None:
-        # fallback：尝试多种字段名（不同 API 叫法不同）
+        # Fallback: try alternate field names
         for attr in ("thinking_content", "reasoning_content", "text", "output"):
             fallback = getattr(msg, attr, None)
             if fallback:
@@ -88,7 +82,6 @@ def text_ai(in_put: str, system_prompt: str = "You are a helpful assistant.",
                 break
 
     if content is None:
-        # 最后一搏：尝试把 message 对象当 dict 看
         try:
             msg_dict = msg.model_dump() if hasattr(msg, "model_dump") else vars(msg)
             for v in msg_dict.values():
@@ -99,11 +92,8 @@ def text_ai(in_put: str, system_prompt: str = "You are a helpful assistant.",
             pass
 
     if content is None:
-        finish = response.choices[0].finish_reason
         raise ValueError(
-            f"模型 '{model}' 返回内容为空（content=None），"
-            f"finish_reason={finish}。\n"
-            f"如果使用 MiniMax 系列，请改用 Qwen3.5-35B (ModelScope 免费) 或其他 Qwen 模型。"
+            f"Model '{model}' returned None content (finish_reason={response.choices[0].finish_reason})."
         )
     return content