violin 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,67 @@
1
+ ---
2
+ name: video-translator
3
+ description: Dub a video into another language and generate subtitles using the default Together + Cartesia stack. Trigger when the user wants to translate / dub / voice-over a video file, or generate subtitles for it. Handles `.mp4` / `.mkv` / `.webm`. Installs as the `violin` CLI (and `violin-api` for the FastAPI server) via `uv tool install`. For alternative models (OpenAI / ElevenLabs) or custom configs, point the user to the repo: https://github.com/shang-zhu/violin.
4
+ allowed-tools: Bash, Read
5
+ ---
6
+
7
+ # Violin — operating skill
8
+
9
+ Always uses the default config (Together for translation, `cartesia/sonic-3` for TTS). If the user asks for OpenAI, ElevenLabs, or custom configs, **stop and point them to the Violin repo** — those flows aren't supported through the global CLI.
10
+
11
+ ## Pre-flight
12
+
13
+ Run these silently first. Abort if any fails:
14
+
15
+ ```bash
16
+ command -v violin # 1. CLI on PATH
17
+ test -f "<input>" # 2. Input exists
18
+ printenv TOGETHER_API_KEY # 3. Key available
19
+ ```
20
+
21
+ If `violin` is missing: tell the user to `uv tool install violin`, then `violin --install-skill` to refresh this skill file. Do not auto-install.
22
+
23
+ If `TOGETHER_API_KEY` is missing:
24
+ - Inside the Violin repo → populate `.env` (auto-loaded)
25
+ - Elsewhere → `export TOGETHER_API_KEY=...` in `~/.zshrc` / `~/.bashrc`, then `source` it
26
+
27
+ ## Decisions
28
+
29
+ - **CLI vs API**: single run-and-wait file → CLI (`violin`). Multi-job / HTTP / web UI → API server (`violin-api`); print the command, don't auto-start it.
30
+ - **Style** (`--style`): default `standard`. Kids content → `kids`, formal/lecture → `academic`, casual → `casual`, dramatic → `storyteller`, news → `news`. Run `violin --style list` if unsure.
31
+ - **Voiceover**: keep default (mix dubbed audio over a quiet original). Use `--no-voiceover` only when the user explicitly says "replace audio entirely".
32
+
33
+ ## Run
34
+
35
+ ```bash
36
+ violin <input> <output> --language <Lang> [flags]
37
+ ```
38
+
39
+ ## Flags
40
+
41
+ | Flag | Default | When to set |
42
+ |------|---------|-------------|
43
+ | `--language` / `-l` | *required* | Target language (e.g. `Chinese`, `Spanish`, `Japanese`). |
44
+ | `--voice` / `-v` | auto (native voice picked by `preferences.voice_gender`) | Only when the user names a specific voice from the catalog (e.g. `"warm female narrator"`). Otherwise omit and let the default kick in. |
45
+ | `--source-language` | `auto-detect` | Only if Whisper mis-detects the source language. |
46
+ | `--style` / `-s` | `standard` | See Decisions above. |
47
+ | `--no-subtitles` | off | User says "no SRT" / "video only". |
48
+ | `--no-voiceover` | off | User says "replace original audio entirely". |
49
+ | `--config` / `-c` | `config/default.yaml` | Don't use through this skill — repo-only flow. |
50
+ | `--timings-out` | off | Only when the user wants a per-step timing JSON for debugging / benchmarking. |
51
+
52
+ ## Language coverage
53
+
54
+ 33 target languages total. **16** ship with handpicked native-speaker voices: Chinese, Spanish, English, Hindi, Arabic, Portuguese, Russian, Japanese, Turkish, German, Korean, French, Italian, Polish, Dutch, Swedish. The other **17** fall back to the English voice catalog (multilingual under Cartesia Sonic 3) — quality is decent but the voice isn't a native speaker. Mention this caveat only if the user is translating to a fallback language and asks about voice quality.
55
+
56
+ ## Report back
57
+
58
+ - Output video path + SRT path (printed by the run).
59
+ - Total cost (printed at end — surface, don't hide).
60
+ - If voiceover was on, mention the `_original.m4a` sidecar.
61
+
62
+ ## Don'ts
63
+
64
+ - Don't run on multi-GB videos without first quoting the rough cost (audio length × per-provider rates in `pipeline/pricing.py`).
65
+ - Don't fabricate a "subtitles-only" mode — the CLI requires the full pipeline. If the user only wants SRT, run the full pipeline and hand them just the `.srt`, warning them of the cost first.
66
+ - Don't try to switch to OpenAI or ElevenLabs from this skill. Point the user to the repo + `--config config/other_api.yaml` (or their own override).
67
+ - Don't paraphrase the README. For supported languages (33), voice catalog, and full flag docs, point them at `README.md` or `violin --help`.
api/__init__.py ADDED
File without changes
api/app.py ADDED
@@ -0,0 +1,109 @@
1
+ """FastAPI application factory."""
2
+
3
+ import asyncio
4
+ import logging
5
+ import os
6
+ import pathlib
7
+
8
+ from fastapi import FastAPI
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from fastapi.responses import FileResponse
11
+ from fastapi.staticfiles import StaticFiles
12
+
13
+ from .routes import catalog, chat, files, jobs
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ _STATIC = pathlib.Path(__file__).parent / "static"
18
+
19
+ _ALLOWED_ORIGINS = os.environ.get("CORS_ORIGINS", "*").split(",")
20
+
21
+ app = FastAPI(
22
+ title="Violin API",
23
+ description=(
24
+ "Translate educational videos into 42 languages using Together AI. "
25
+ "Upload a video, poll for status, then download the dubbed output."
26
+ ),
27
+ version="0.1.0",
28
+ docs_url="/docs",
29
+ redoc_url="/redoc",
30
+ )
31
+
32
+ app.add_middleware(
33
+ CORSMiddleware,
34
+ allow_origins=_ALLOWED_ORIGINS,
35
+ allow_methods=["*"],
36
+ allow_headers=["*"],
37
+ )
38
+
39
+ app.include_router(jobs.router)
40
+ app.include_router(files.router)
41
+ app.include_router(catalog.router)
42
+ app.include_router(chat.router)
43
+
44
+ app.mount("/static", StaticFiles(directory=str(_STATIC)), name="static")
45
+
46
+
47
+ @app.get("/", include_in_schema=False)
48
+ def root():
49
+ # Force the SPA shell to revalidate on every load — otherwise users keep
50
+ # seeing stale UI after upgrading violin-api (esp. on 127.0.0.1 vs localhost,
51
+ # which Chrome caches as separate origins).
52
+ return FileResponse(
53
+ str(_STATIC / "index.html"),
54
+ headers={"Cache-Control": "no-cache, must-revalidate"},
55
+ )
56
+
57
+
58
+ # Lightweight health probe — accepts both GET and HEAD so external monitors
59
+ # (e.g. UptimeRobot's free tier, which only does HEAD) don't get 405s.
60
+ @app.api_route("/health", methods=["GET", "HEAD"], include_in_schema=False)
61
+ def health():
62
+ return {"ok": True}
63
+
64
+
65
+ @app.get("/app-config", include_in_schema=False)
66
+ def app_config():
67
+ from .config import (
68
+ FREE_TRIAL_JOBS,
69
+ JOB_TTL_HOURS,
70
+ MAX_DURATION_SECONDS,
71
+ MAX_FILE_SIZE_MB,
72
+ URL_UPLOAD,
73
+ )
74
+ return {
75
+ "url_upload": URL_UPLOAD,
76
+ "max_duration_seconds": MAX_DURATION_SECONDS,
77
+ "max_file_size_mb": MAX_FILE_SIZE_MB,
78
+ # Used by the footer to hide privacy/auto-delete warnings on local deployments.
79
+ "free_trial_jobs": FREE_TRIAL_JOBS,
80
+ "job_ttl_hours": JOB_TTL_HOURS,
81
+ }
82
+
83
+
84
+ @app.on_event("startup")
85
+ async def _init_stats():
86
+ """Create the stats table if it doesn't exist. Idempotent — existing rows
87
+ are preserved across restarts."""
88
+ from . import stats as _stats
89
+ _stats.init_db()
90
+
91
+
92
+ @app.on_event("startup")
93
+ async def _start_cleanup_loop():
94
+ from .config import JOB_TTL_HOURS
95
+ if JOB_TTL_HOURS <= 0:
96
+ return
97
+
98
+ async def _cleanup_loop():
99
+ from .storage import cleanup_old_jobs
100
+ while True:
101
+ await asyncio.sleep(3600)
102
+ try:
103
+ deleted = cleanup_old_jobs(JOB_TTL_HOURS)
104
+ if deleted:
105
+ logger.info("Cleaned up %d expired job(s)", deleted)
106
+ except Exception:
107
+ logger.exception("Job cleanup failed")
108
+
109
+ asyncio.create_task(_cleanup_loop())
api/config.py ADDED
@@ -0,0 +1,15 @@
1
+ """API configuration."""
2
+
3
+ from pathlib import Path
4
+
5
+ from pipeline import config as _conf
6
+
7
+ _api = _conf.get()["api"]
8
+
9
+ JOBS_DIR = Path(_api["jobs_dir"])
10
+ MAX_WORKERS = _api["max_workers"]
11
+ JOB_TTL_HOURS = _api.get("job_ttl_hours", 24)
12
+ FREE_TRIAL_JOBS = _api.get("free_trial_jobs", 1)
13
+ URL_UPLOAD = _api.get("url_upload", True)
14
+ MAX_DURATION_SECONDS = _api.get("max_duration_seconds", 1800)
15
+ MAX_FILE_SIZE_MB = _api.get("max_file_size_mb", 500)
api/models.py ADDED
@@ -0,0 +1,87 @@
1
+ """Pydantic models for API requests and responses."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from enum import Enum
6
+ from typing import Any
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+
11
+ class JobStatus(str, Enum):
12
+ queued = "queued"
13
+ running = "running"
14
+ done = "done"
15
+ failed = "failed"
16
+ cancelled = "cancelled"
17
+
18
+
19
+ class ProgressEvent(BaseModel):
20
+ step: int
21
+ total: int
22
+ message: str
23
+
24
+
25
+ class JobResponse(BaseModel):
26
+ id: str
27
+ status: JobStatus
28
+ language: str
29
+ voice: str
30
+ source_language: str
31
+ subtitles: bool
32
+ style: str = "standard"
33
+ progress: list[ProgressEvent] = Field(default_factory=list)
34
+ error: str | None = None
35
+
36
+
37
+ class CreateJobRequest(BaseModel):
38
+ """Used internally — the route uses Form() fields directly."""
39
+ language: str
40
+ voice: str = ""
41
+ source_language: str = "auto-detect"
42
+ subtitles: bool = True
43
+
44
+
45
+ class SubtitleSegment(BaseModel):
46
+ id: int
47
+ start: float
48
+ end: float
49
+ text: str
50
+ speaker: str = "SPEAKER_00"
51
+
52
+
53
+ class ChatMessage(BaseModel):
54
+ role: str
55
+ content: str
56
+
57
+
58
+ class VoiceMatchRequest(BaseModel):
59
+ description: str
60
+ language: str = ""
61
+ together_api_key: str = ""
62
+ openai_api_key: str = ""
63
+
64
+
65
+ class VoiceCandidate(BaseModel):
66
+ voice: str
67
+ explanation: str
68
+
69
+
70
+ class VoiceMatchResponse(BaseModel):
71
+ candidates: list[VoiceCandidate]
72
+
73
+
74
+ class VideoChatRequest(BaseModel):
75
+ question: str
76
+ current_time: float = Field(ge=0)
77
+ history: list[ChatMessage] = Field(default_factory=list)
78
+ language: str = ""
79
+
80
+
81
+ class VideoChatResponse(BaseModel):
82
+ answer: str
83
+ context_start: float
84
+ context_end: float
85
+ subtitle_context: list[SubtitleSegment] = Field(default_factory=list)
86
+ sampled_timestamps: list[float] = Field(default_factory=list)
87
+ style: str = "standard"
api/routes/__init__.py ADDED
File without changes
api/routes/catalog.py ADDED
@@ -0,0 +1,190 @@
1
+ """Catalog endpoints: list supported languages, voices, and styles."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import re
8
+
9
+ from dotenv import load_dotenv
10
+ from fastapi import APIRouter, HTTPException
11
+
12
+ from api.models import VoiceCandidate, VoiceMatchRequest, VoiceMatchResponse
13
+ from pipeline import config as _conf
14
+ from pipeline.languages import all_languages, language_code
15
+ from pipeline.llm_client import get_translation_model, make_translation_client
16
+ from pipeline.styles import list_styles
17
+ from pipeline.tts import all_voices, native_voices_for, voice_descriptions
18
+ import prompts as _prompts
19
+
20
+ load_dotenv(override=True)
21
+
22
+ router = APIRouter(tags=["catalog"])
23
+
24
+
25
+ @router.get("/languages")
26
+ def list_languages() -> dict[str, str]:
27
+ """Return a mapping of language name → BCP-47 code for all supported languages."""
28
+ return all_languages()
29
+
30
+
31
+ @router.get("/voices")
32
+ def list_voices() -> dict[str, list[str]]:
33
+ """Return all known native Cartesia Sonic 3 voices grouped by BCP-47 language code."""
34
+ return all_voices()
35
+
36
+
37
+ @router.get("/voices/{language}")
38
+ def voices_for_language(language: str) -> list[str]:
39
+ """Return native voices for a specific language name or BCP-47 code."""
40
+ return native_voices_for(language_code(language))
41
+
42
+
43
+ @router.get("/styles")
44
+ def get_styles() -> list[dict]:
45
+ """Return all available translation style profiles."""
46
+ return [
47
+ {
48
+ "name": s.name,
49
+ "description": s.description,
50
+ "tts_speed": s.tts_speed,
51
+ "tts_emotion": s.tts_emotion,
52
+ }
53
+ for s in list_styles()
54
+ ]
55
+
56
+
57
+ def _build_voice_catalog(target_lang: str) -> str:
58
+ """Format the active provider's voice catalog as a string for the LLM prompt.
59
+
60
+ Each voice is rendered as `- <name> — <description>`. For Cartesia the
61
+ description is the name itself (e.g. 'german conversational woman'); for
62
+ ElevenLabs it is the official metadata description.
63
+ """
64
+ voices = all_voices()
65
+ descriptions = voice_descriptions()
66
+ target_code = language_code(target_lang) if target_lang else ""
67
+ lines: list[str] = []
68
+
69
+ def _fmt(name: str) -> str:
70
+ d = descriptions.get(name, "")
71
+ return f" - {name} — {d}" if d and d != name else f" - {name}"
72
+
73
+ if target_code and target_code in voices:
74
+ lines.append(f"== Voices for target language ({target_code}) ==")
75
+ for v in voices[target_code]:
76
+ lines.append(_fmt(v))
77
+ lines.append("")
78
+
79
+ for code, voice_list in sorted(voices.items()):
80
+ if code == target_code:
81
+ continue
82
+ header = "All voices (multilingual)" if code == "multi" else code
83
+ lines.append(f"== {header} ==")
84
+ for v in voice_list:
85
+ lines.append(_fmt(v))
86
+ return "\n".join(lines)
87
+
88
+
89
+ _MAX_VOICE_MATCH_RETRIES = 3
90
+
91
+
92
+ def _parse_voice_candidates(raw: str, all_ids: list[str]) -> list[VoiceCandidate]:
93
+ """Try to extract a list of VoiceCandidates from the raw LLM response."""
94
+ raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
95
+ raw = raw.removeprefix("```json").removeprefix("```").removesuffix("```").strip()
96
+
97
+ bracket_match = re.search(r"\[.*\]", raw, flags=re.DOTALL)
98
+ if bracket_match:
99
+ raw = bracket_match.group(0)
100
+
101
+ items = json.loads(raw)
102
+ if isinstance(items, dict):
103
+ items = [items]
104
+
105
+ all_lower = {v.lower(): v for v in all_ids}
106
+
107
+ def _normalize(name: str) -> str | None:
108
+ return all_lower.get(name.lower().strip())
109
+
110
+ candidates = []
111
+ seen: set[str] = set()
112
+ for item in items:
113
+ if isinstance(item, str):
114
+ voice, explanation = _normalize(item), ""
115
+ else:
116
+ voice = _normalize(item.get("voice", ""))
117
+ explanation = item.get("explanation", "")
118
+ if voice and voice.lower() not in seen:
119
+ seen.add(voice.lower())
120
+ candidates.append(VoiceCandidate(voice=voice, explanation=explanation))
121
+ if len(candidates) == 3:
122
+ break
123
+ return candidates
124
+
125
+
126
+ @router.post("/voice-match", response_model=VoiceMatchResponse)
127
+ def match_voice(payload: VoiceMatchRequest):
128
+ """Use an LLM to map a natural language voice description to the best voice in the catalog.
129
+
130
+ Reuses the translation client + model (``models.translation``) — no separate
131
+ configuration needed.
132
+ """
133
+ cfg = _conf.get()
134
+ try:
135
+ client = make_translation_client(
136
+ cfg,
137
+ together_key_override=payload.together_api_key.strip() or None,
138
+ openai_key_override=payload.openai_api_key.strip() or None,
139
+ )
140
+ except RuntimeError as exc:
141
+ raise HTTPException(status_code=500, detail=str(exc))
142
+
143
+ catalog = _build_voice_catalog(payload.language)
144
+ all_ids: list[str] = []
145
+ for v_list in all_voices().values():
146
+ all_ids.extend(v_list)
147
+ all_ids = list(dict.fromkeys(all_ids))
148
+
149
+ messages = [
150
+ {
151
+ "role": "system",
152
+ "content": _prompts.load("voice_match", "system", catalog=catalog),
153
+ },
154
+ {
155
+ "role": "user",
156
+ "content": _prompts.load(
157
+ "voice_match", "user",
158
+ language=payload.language or "not specified",
159
+ description=payload.description,
160
+ ),
161
+ },
162
+ ]
163
+
164
+ # voice_match reuses the translation client + model — same LLM, same provider.
165
+ model = get_translation_model(cfg)
166
+
167
+ last_error = ""
168
+ for attempt in range(_MAX_VOICE_MATCH_RETRIES):
169
+ try:
170
+ response = client.chat.completions.create(
171
+ model=model,
172
+ messages=messages,
173
+ temperature=0.1,
174
+ max_tokens=400,
175
+ )
176
+ msg = response.choices[0].message
177
+ raw = (msg.content or "").strip()
178
+
179
+ if not raw:
180
+ last_error = "LLM returned empty response"
181
+ continue
182
+
183
+ candidates = _parse_voice_candidates(raw, all_ids)
184
+ if candidates:
185
+ return VoiceMatchResponse(candidates=candidates)
186
+ last_error = "parsed JSON but found no valid voice names"
187
+ except (json.JSONDecodeError, Exception) as exc:
188
+ last_error = str(exc)
189
+
190
+ raise HTTPException(status_code=502, detail=f"Voice matching failed after {_MAX_VOICE_MATCH_RETRIES} attempts ({last_error})")
api/routes/chat.py ADDED
@@ -0,0 +1,39 @@
1
+ """Video chat endpoints for completed jobs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fastapi import APIRouter, HTTPException
6
+
7
+ from api.models import SubtitleSegment, VideoChatRequest, VideoChatResponse
8
+ from api.storage import get_job, load_segments
9
+ from api.video_chat import answer_video_question
10
+
11
+ router = APIRouter(prefix="/jobs", tags=["chat"])
12
+
13
+
14
+ @router.get("/{job_id}/segments", response_model=list[SubtitleSegment])
15
+ def get_job_segments(job_id: str):
16
+ job = get_job(job_id)
17
+ if job is None:
18
+ raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found.")
19
+ if job.status != "done":
20
+ raise HTTPException(status_code=409, detail=f"Job '{job_id}' is not complete.")
21
+ return [SubtitleSegment(**item) for item in load_segments(job_id)]
22
+
23
+
24
+ @router.post("/{job_id}/chat", response_model=VideoChatResponse)
25
+ def chat_with_video(job_id: str, payload: VideoChatRequest):
26
+ try:
27
+ return answer_video_question(
28
+ job_id=job_id,
29
+ question=payload.question,
30
+ current_time=payload.current_time,
31
+ history=payload.history,
32
+ language=payload.language,
33
+ )
34
+ except FileNotFoundError as exc:
35
+ raise HTTPException(status_code=404, detail=str(exc)) from exc
36
+ except RuntimeError as exc:
37
+ raise HTTPException(status_code=409, detail=str(exc)) from exc
38
+ except Exception as exc:
39
+ raise HTTPException(status_code=500, detail=str(exc)) from exc
api/routes/files.py ADDED
@@ -0,0 +1,133 @@
1
+ """File download endpoints for completed jobs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import subprocess
6
+
7
+ from fastapi import APIRouter, HTTPException, Query
8
+ from fastapi.responses import FileResponse
9
+
10
+ from api.models import JobStatus
11
+ from api.storage import get_job, original_audio_path, output_srt_path, output_video_path, voiceover_video_path
12
+ from pipeline.ffmpeg_utils import FFMPEG_EXE
13
+
14
+ router = APIRouter(prefix="/jobs", tags=["files"])
15
+
16
+
17
+ def _assert_done(job_id: str) -> None:
18
+ job = get_job(job_id)
19
+ if job is None:
20
+ raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found.")
21
+ if job.status != JobStatus.done:
22
+ raise HTTPException(
23
+ status_code=409,
24
+ detail=f"Job '{job_id}' is not complete (status: {job.status}).",
25
+ )
26
+
27
+
28
+ @router.get("/{job_id}/video", response_class=FileResponse)
29
+ def download_video(job_id: str):
30
+ """Download the dubbed output video. Only available when status=done."""
31
+ _assert_done(job_id)
32
+ path = output_video_path(job_id)
33
+ if not path.exists():
34
+ raise HTTPException(status_code=404, detail="Output video not found.")
35
+ return FileResponse(
36
+ path=str(path),
37
+ media_type="video/mp4",
38
+ filename=f"{job_id}_dubbed.mp4",
39
+ )
40
+
41
+
42
+ @router.get("/{job_id}/original-audio")
43
+ def get_original_audio(job_id: str):
44
+ """Serve the original audio track (aligned to the dubbed timeline) for voice-over mixing."""
45
+ _assert_done(job_id)
46
+ path = original_audio_path(job_id)
47
+ if not path.exists():
48
+ raise HTTPException(
49
+ status_code=404,
50
+ detail="Original audio track not available. The job may not have used voice-over mode.",
51
+ )
52
+ return FileResponse(
53
+ path=str(path),
54
+ media_type="audio/mp4",
55
+ filename=f"{job_id}_original.m4a",
56
+ )
57
+
58
+
59
+ @router.get("/{job_id}/video-voiceover", response_class=FileResponse)
60
+ def download_voiceover_video(
61
+ job_id: str,
62
+ volume: float = Query(0.1, ge=0.0, le=1.0, description="Original audio volume (0.0–1.0)"),
63
+ ):
64
+ """Download the dubbed video with original audio mixed in at the given volume."""
65
+ _assert_done(job_id)
66
+
67
+ dubbed = output_video_path(job_id)
68
+ orig_audio = original_audio_path(job_id)
69
+ if not dubbed.exists():
70
+ raise HTTPException(status_code=404, detail="Output video not found.")
71
+ if not orig_audio.exists():
72
+ raise HTTPException(status_code=404, detail="Original audio not available. Job may not have used voice-over mode.")
73
+
74
+ out = voiceover_video_path(job_id)
75
+ if not out.exists() or not _volume_matches(job_id, volume):
76
+ _mix_voiceover(str(dubbed), str(orig_audio), str(out), volume)
77
+ _save_volume(job_id, volume)
78
+
79
+ return FileResponse(
80
+ path=str(out),
81
+ media_type="video/mp4",
82
+ filename=f"{job_id}_voiceover.mp4",
83
+ )
84
+
85
+
86
+ def _mix_voiceover(video_path: str, audio_path: str, output_path: str, volume: float) -> None:
87
+ """Mix original audio into the dubbed video using ffmpeg."""
88
+ subprocess.run([
89
+ FFMPEG_EXE,
90
+ "-i", video_path,
91
+ "-i", audio_path,
92
+ "-filter_complex",
93
+ f"[0:a]volume=1.0[dub];[1:a]volume={volume}[orig];[dub][orig]amix=inputs=2:duration=first[out]",
94
+ "-map", "0:v",
95
+ "-map", "[out]",
96
+ "-c:v", "copy",
97
+ "-c:a", "aac",
98
+ "-movflags", "+faststart",
99
+ "-y", output_path,
100
+ ], check=True, capture_output=True)
101
+
102
+
103
+ def _volume_matches(job_id: str, volume: float) -> bool:
104
+ """Check if the cached voiceover was mixed at the same volume."""
105
+ vol_file = voiceover_video_path(job_id).with_suffix(".vol")
106
+ if not vol_file.exists():
107
+ return False
108
+ try:
109
+ return abs(float(vol_file.read_text().strip()) - volume) < 0.001
110
+ except (ValueError, OSError):
111
+ return False
112
+
113
+
114
+ def _save_volume(job_id: str, volume: float) -> None:
115
+ vol_file = voiceover_video_path(job_id).with_suffix(".vol")
116
+ vol_file.write_text(str(volume))
117
+
118
+
119
+ @router.get("/{job_id}/srt", response_class=FileResponse)
120
+ def download_srt(job_id: str):
121
+ """Download the SRT subtitle file. Only available when status=done and subtitles=true."""
122
+ _assert_done(job_id)
123
+ path = output_srt_path(job_id)
124
+ if not path.exists():
125
+ raise HTTPException(
126
+ status_code=404,
127
+ detail="SRT file not found. The job may have been created with subtitles=false.",
128
+ )
129
+ return FileResponse(
130
+ path=str(path),
131
+ media_type="text/plain; charset=utf-8",
132
+ filename=f"{job_id}.srt",
133
+ )