video_translation/config.yaml

# * Settings marked with * are advanced settings that won't appear in the Streamlit page and can only be modified manually in config.py
# recommend to set in streamlit page
# -------------------
# version: "3.0.0"
# author: "Huanshere"
# -------------------

## ======================== Basic Settings ======================== ##

display_language: "zh-CN"

# API settings
api:
  key: 'sk-bd5ee62703bc41fc9b8a55d748dc1eb8'
  base_url: 'https://dashscope.aliyuncs.com/compatible-mode/v1'
  model: 'qwen-plus'
  llm_support_json: false
# *Number of LLM multi-threaded accesses, set to 1 if using local LLM
max_workers: 4

# Language settings, written into the prompt, can be described in natural language
target_language: 'English'

# Whether to use Demucs for vocal separation before transcription
demucs: true

whisper:
  # ["large-v3", "large-v3-turbo"]. Note: for zh model will force to use Belle/large-v3
  model: 'large-v3'
  # Whisper specified recognition language ISO 639-1
  language: 'zh'
  detected_language: 'en'
  # Whisper running mode ["local", "cloud", "elevenlabs"]. Specifies where to run, cloud uses 302.ai API
  runtime: 'local'
  # 302.ai API key
  whisperX_302_api_key: 'your_302_api_key'
  # ElevenLabs API key (experimental)
  elevenlabs_api_key: 'your_elevenlabs_api_key'

# Whether to burn subtitles into the video
burn_subtitles: true

## ======================== Advanced Settings ======================== ##
# *🔬 h264_nvenc GPU acceleration for ffmpeg, make sure your GPU supports it
ffmpeg_gpu: false

# *Youtube settings
youtube:
  cookies_path: ''

# *Default resolution for downloading YouTube videos [360, 1080, best]
ytb_resolution: '1080'

subtitle:
  # *Maximum length of each subtitle line in characters
  max_length: 40
  # *Translated subtitles are slightly larger than source subtitles, affecting the reference length for subtitle splitting
  target_multiplier: 1.2

# *Summary length, set low to 2k if using local LLM
summary_length: 8000

# *Maximum number of words for the first rough cut, below 18 will cut too finely affecting translation, above 22 is too long and will make subsequent subtitle splitting difficult to align
max_split_length: 20

# *Whether to reflect the translation result in the original text
reflect_translate: true

# *Whether to pause after extracting professional terms and before translation, allowing users to manually adjust the terminology table output\log\terminology.json
pause_before_translate: false

## ======================== Dubbing Settings ======================== ##
# TTS selection [sf_fish_tts, openai_tts, gpt_sovits, azure_tts, fish_tts, edge_tts, custom_tts]
tts_method: 'edge_tts'

# SiliconFlow FishTTS
sf_fish_tts:
  # SiliconFlow API key
  api_key: 'YOUR_API_KEY'
  # only for mode "preset"
  voice: 'anna'
  # *only for mode "custom", dont set manually
  custom_name: ''
  voice_id: ''
  # preset, custom, dynamic
  mode: "preset"

# OpenAI TTS-1 API configuration, 302.ai API only
openai_tts:
  api_key: 'YOUR_302_API_KEY'
  voice: 'alloy'

# Azure configuration, 302.ai API only
azure_tts:
  api_key: 'YOUR_302_API_KEY'
  voice: 'zh-CN-YunfengNeural'

# FishTTS configuration, 302.ai API only
fish_tts:
  api_key: 'YOUR_302_API_KEY'
  character: 'AD学姐'
  character_id_dict:
    'AD学姐': '7f92f8afb8ec43bf81429cc1c9199cb1'
    '丁真': '54a5170264694bfc8e9ad98df7bd89c3'

# SiliconFlow CosyVoice2 Clone
sf_cosyvoice2:
  api_key: 'YOUR_SF_KEY'

# Edge TTS configuration
edge_tts:
  voice: 'zh-CN-XiaoxiaoNeural'

# SoVITS configuration
gpt_sovits:
  character: 'Huanyuv2'
  refer_mode: 3

f5tts:
  302_api: 'YOUR_302_API_KEY'

# *Audio speed range
speed_factor:
  min: 1
  accept: 4 # Maximum acceptable speed
  max: 5

# *Merge audio configuration
min_subtitle_duration: 0.2 # Minimum subtitle duration, will be forcibly extended
min_trim_duration: 3.5 # Subtitles shorter than this value won't be split
tolerance: 1.5 # Allowed extension time to the next subtitle


## ======================== Additional settings ======================== ##

# Whisper model directory
model_dir: './_model_cache'

# Supported upload video formats
allowed_video_formats:
- 'mp4'
- 'mov'
- 'avi'
- 'mkv'
- 'flv'
- 'wmv'
- 'webm'

allowed_audio_formats:
- 'wav'
- 'mp3'
- 'flac'
- 'm4a'

# Spacy models
spacy_model_map:
  en: 'en_core_web_md'
  ru: 'ru_core_news_md'
  fr: 'fr_core_news_md'
  ja: 'ja_core_news_md'
  es: 'es_core_news_md'
  de: 'de_core_news_md'
  it: 'it_core_news_md'
  zh: 'zh_core_web_md'

# Languages that use space as separator
language_split_with_space:
- 'en'
- 'es'
- 'fr'
- 'de'
- 'it'
- 'ru'

# Languages that do not use space as separator
language_split_without_space:
- 'zh'
- 'ja'
github 2025-08-20 11:39:34 +08:00			`# * Settings marked with * are advanced settings that won't appear in the Streamlit page and can only be modified manually in config.py`
			`# recommend to set in streamlit page`
			`# -------------------`
			`# version: "3.0.0"`
			`# author: "Huanshere"`
			`# -------------------`

			`## ======================== Basic Settings ======================== ##`

			`display_language: "zh-CN"`

			`# API settings`
			`api:`
			`key: 'sk-bd5ee62703bc41fc9b8a55d748dc1eb8'`
			`base_url: 'https://dashscope.aliyuncs.com/compatible-mode/v1'`
			`model: 'qwen-plus'`
			`llm_support_json: false`
			`# *Number of LLM multi-threaded accesses, set to 1 if using local LLM`
			`max_workers: 4`

			`# Language settings, written into the prompt, can be described in natural language`
			`target_language: 'English'`

			`# Whether to use Demucs for vocal separation before transcription`
			`demucs: true`

			`whisper:`
			`# ["large-v3", "large-v3-turbo"]. Note: for zh model will force to use Belle/large-v3`
			`model: 'large-v3'`
			`# Whisper specified recognition language ISO 639-1`
			`language: 'zh'`
			`detected_language: 'en'`
			`# Whisper running mode ["local", "cloud", "elevenlabs"]. Specifies where to run, cloud uses 302.ai API`
			`runtime: 'local'`
			`# 302.ai API key`
			`whisperX_302_api_key: 'your_302_api_key'`
			`# ElevenLabs API key (experimental)`
			`elevenlabs_api_key: 'your_elevenlabs_api_key'`

			`# Whether to burn subtitles into the video`
			`burn_subtitles: true`

			`## ======================== Advanced Settings ======================== ##`
			`# *🔬 h264_nvenc GPU acceleration for ffmpeg, make sure your GPU supports it`
			`ffmpeg_gpu: false`

			`# *Youtube settings`
			`youtube:`
			`cookies_path: ''`

			`# *Default resolution for downloading YouTube videos [360, 1080, best]`
			`ytb_resolution: '1080'`

			`subtitle:`
			`# *Maximum length of each subtitle line in characters`
修改翻译效果 2025-08-21 18:09:57 +08:00			`max_length: 40`
github 2025-08-20 11:39:34 +08:00			`# *Translated subtitles are slightly larger than source subtitles, affecting the reference length for subtitle splitting`
			`target_multiplier: 1.2`

			`# *Summary length, set low to 2k if using local LLM`
			`summary_length: 8000`

			`# *Maximum number of words for the first rough cut, below 18 will cut too finely affecting translation, above 22 is too long and will make subsequent subtitle splitting difficult to align`
			`max_split_length: 20`

			`# *Whether to reflect the translation result in the original text`
			`reflect_translate: true`

			`# *Whether to pause after extracting professional terms and before translation, allowing users to manually adjust the terminology table output\log\terminology.json`
			`pause_before_translate: false`

			`## ======================== Dubbing Settings ======================== ##`
			`# TTS selection [sf_fish_tts, openai_tts, gpt_sovits, azure_tts, fish_tts, edge_tts, custom_tts]`
			`tts_method: 'edge_tts'`

			`# SiliconFlow FishTTS`
			`sf_fish_tts:`
			`# SiliconFlow API key`
			`api_key: 'YOUR_API_KEY'`
			`# only for mode "preset"`
			`voice: 'anna'`
			`# *only for mode "custom", dont set manually`
			`custom_name: ''`
			`voice_id: ''`
			`# preset, custom, dynamic`
			`mode: "preset"`

			`# OpenAI TTS-1 API configuration, 302.ai API only`
			`openai_tts:`
			`api_key: 'YOUR_302_API_KEY'`
			`voice: 'alloy'`

			`# Azure configuration, 302.ai API only`
			`azure_tts:`
			`api_key: 'YOUR_302_API_KEY'`
			`voice: 'zh-CN-YunfengNeural'`

			`# FishTTS configuration, 302.ai API only`
			`fish_tts:`
			`api_key: 'YOUR_302_API_KEY'`
			`character: 'AD学姐'`
			`character_id_dict:`
			`'AD学姐': '7f92f8afb8ec43bf81429cc1c9199cb1'`
			`'丁真': '54a5170264694bfc8e9ad98df7bd89c3'`

			`# SiliconFlow CosyVoice2 Clone`
			`sf_cosyvoice2:`
			`api_key: 'YOUR_SF_KEY'`

			`# Edge TTS configuration`
			`edge_tts:`
			`voice: 'zh-CN-XiaoxiaoNeural'`

			`# SoVITS configuration`
			`gpt_sovits:`
			`character: 'Huanyuv2'`
			`refer_mode: 3`

			`f5tts:`
			`302_api: 'YOUR_302_API_KEY'`

			`# *Audio speed range`
			`speed_factor:`
			`min: 1`
修改翻译效果 2025-08-21 18:09:57 +08:00			`accept: 4 # Maximum acceptable speed`
			`max: 5`
github 2025-08-20 11:39:34 +08:00
			`# *Merge audio configuration`
修改翻译效果 2025-08-21 18:09:57 +08:00			`min_subtitle_duration: 0.2 # Minimum subtitle duration, will be forcibly extended`
github 2025-08-20 11:39:34 +08:00			`min_trim_duration: 3.5 # Subtitles shorter than this value won't be split`
			`tolerance: 1.5 # Allowed extension time to the next subtitle`





			`## ======================== Additional settings ======================== ##`

			`# Whisper model directory`
			`model_dir: './_model_cache'`

			`# Supported upload video formats`
			`allowed_video_formats:`
			`- 'mp4'`
			`- 'mov'`
			`- 'avi'`
			`- 'mkv'`
			`- 'flv'`
			`- 'wmv'`
			`- 'webm'`

			`allowed_audio_formats:`
			`- 'wav'`
			`- 'mp3'`
			`- 'flac'`
			`- 'm4a'`

			`# Spacy models`
			`spacy_model_map:`
			`en: 'en_core_web_md'`
			`ru: 'ru_core_news_md'`
			`fr: 'fr_core_news_md'`
			`ja: 'ja_core_news_md'`
			`es: 'es_core_news_md'`
			`de: 'de_core_news_md'`
			`it: 'it_core_news_md'`
			`zh: 'zh_core_web_md'`

			`# Languages that use space as separator`
			`language_split_with_space:`
			`- 'en'`
			`- 'es'`
			`- 'fr'`
			`- 'de'`
			`- 'it'`
			`- 'ru'`

			`# Languages that do not use space as separator`
			`language_split_without_space:`
			`- 'zh'`
			`- 'ja'`