unrender 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unrender/__init__.py +15 -0
- unrender/__main__.py +4 -0
- unrender/adapters/__init__.py +33 -0
- unrender/adapters/artifacts.py +32 -0
- unrender/adapters/paths.py +105 -0
- unrender/adapters/rows.py +96 -0
- unrender/adapters/stems.py +143 -0
- unrender/cli/__init__.py +5 -0
- unrender/cli/commands/__init__.py +39 -0
- unrender/cli/commands/audio.py +283 -0
- unrender/cli/commands/context.py +342 -0
- unrender/cli/commands/export.py +13 -0
- unrender/cli/commands/face.py +54 -0
- unrender/cli/commands/health.py +64 -0
- unrender/cli/commands/labels.py +33 -0
- unrender/cli/commands/timeline.py +57 -0
- unrender/cli/commands/voice.py +116 -0
- unrender/cli/helpers.py +53 -0
- unrender/cli/main.py +39 -0
- unrender/cli/parser.py +411 -0
- unrender/cloning/__init__.py +5 -0
- unrender/cloning/voice.py +476 -0
- unrender/dialogue/__init__.py +27 -0
- unrender/dialogue/clusters.py +344 -0
- unrender/dialogue/dub_script.py +326 -0
- unrender/dialogue/lines.py +684 -0
- unrender/dialogue/plan_writers.py +108 -0
- unrender/dialogue/transcription.py +266 -0
- unrender/exports/__init__.py +5 -0
- unrender/exports/artifacts.py +84 -0
- unrender/identity/__init__.py +27 -0
- unrender/identity/face.py +551 -0
- unrender/identity/resolution.py +203 -0
- unrender/identity/voice.py +410 -0
- unrender/io/__init__.py +1 -0
- unrender/io/csv.py +14 -0
- unrender/io/json.py +14 -0
- unrender/manifests/__init__.py +37 -0
- unrender/manifests/fingerprints.py +94 -0
- unrender/manifests/loaders.py +259 -0
- unrender/manifests/models.py +58 -0
- unrender/manifests/store.py +50 -0
- unrender/media/__init__.py +1 -0
- unrender/media/audio.py +143 -0
- unrender/media/ffmpeg.py +229 -0
- unrender/media/names.py +8 -0
- unrender/media/timecode.py +44 -0
- unrender/project/__init__.py +17 -0
- unrender/project/config.py +149 -0
- unrender/project/paths.py +177 -0
- unrender/py.typed +0 -0
- unrender/separation/__init__.py +20 -0
- unrender/separation/audioshake.py +367 -0
- unrender/separation/audioshake_client.py +306 -0
- unrender/separation/bandit.py +448 -0
- unrender/separation/models.py +25 -0
- unrender/shots/__init__.py +15 -0
- unrender/shots/dx.py +128 -0
- unrender/shots/stems.py +217 -0
- unrender/speakers/__init__.py +33 -0
- unrender/speakers/labeling.py +225 -0
- unrender/speakers/registry.py +219 -0
- unrender/timeline/__init__.py +20 -0
- unrender/timeline/builder.py +485 -0
- unrender/timeline/media.py +130 -0
- unrender/timeline/sources.py +276 -0
- unrender-0.2.1.dist-info/METADATA +478 -0
- unrender-0.2.1.dist-info/RECORD +73 -0
- unrender-0.2.1.dist-info/WHEEL +5 -0
- unrender-0.2.1.dist-info/entry_points.txt +2 -0
- unrender-0.2.1.dist-info/licenses/LICENSE +201 -0
- unrender-0.2.1.dist-info/licenses/NOTICE +18 -0
- unrender-0.2.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from unrender.cli.commands.context import (
|
|
7
|
+
_audio_arg,
|
|
8
|
+
_audio_output_prefix,
|
|
9
|
+
_audioshake_client,
|
|
10
|
+
_bandit_settings,
|
|
11
|
+
_default_voice_clip_spec,
|
|
12
|
+
_dx_stem_arg,
|
|
13
|
+
_path_arg,
|
|
14
|
+
_run_paths,
|
|
15
|
+
_shot_speakers_arg,
|
|
16
|
+
_speaker_db_path,
|
|
17
|
+
_stem_sources_arg,
|
|
18
|
+
_string_arg,
|
|
19
|
+
)
|
|
20
|
+
from unrender.cli.helpers import positive_float, threshold
|
|
21
|
+
from unrender.dialogue.clusters import resolve_voice_clips_clustered
|
|
22
|
+
from unrender.dialogue.lines import (
|
|
23
|
+
build_voice_clips,
|
|
24
|
+
import_dub_script_lines,
|
|
25
|
+
map_dialogue_to_shots,
|
|
26
|
+
materialize_dialogue_line_stems,
|
|
27
|
+
resolve_voice_clips,
|
|
28
|
+
transcribe_dialogue_lines,
|
|
29
|
+
)
|
|
30
|
+
from unrender.manifests import load_dialogue_lines, load_shot_manifest, load_voice_inputs, read_json
|
|
31
|
+
from unrender.separation import separate_full_audio_source, separate_global_dx_stem
|
|
32
|
+
from unrender.shots.dx import build_shot_dx, resolve_dx_stem
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _audio_separate(args: argparse.Namespace) -> int:
|
|
36
|
+
run = _run_paths(args, explicit=args.out)
|
|
37
|
+
audio_prefix = _audio_output_prefix(args, run)
|
|
38
|
+
if args.full_audio and args.dx_stem:
|
|
39
|
+
raise ValueError("--full-audio and --dx-stem are mutually exclusive for audio separate")
|
|
40
|
+
full_audio = args.full_audio or (
|
|
41
|
+
None
|
|
42
|
+
if args.dx_stem
|
|
43
|
+
else _string_arg(
|
|
44
|
+
args,
|
|
45
|
+
"full_audio",
|
|
46
|
+
None,
|
|
47
|
+
required=False,
|
|
48
|
+
aliases=("full_audio_stem", "full_stem", "full_mix", "mix_stem"),
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
if full_audio:
|
|
52
|
+
source_backend = str(_audio_arg(args, "source_backend", args.source_backend, "audioshake"))
|
|
53
|
+
if source_backend == "bandit":
|
|
54
|
+
from unrender.separation.bandit import separate_full_audio_source_bandit
|
|
55
|
+
|
|
56
|
+
source_result = separate_full_audio_source_bandit(
|
|
57
|
+
run=run,
|
|
58
|
+
full_audio=full_audio,
|
|
59
|
+
settings=_bandit_settings(args),
|
|
60
|
+
fmt=_audio_arg(args, "source_format", args.format, "wav"),
|
|
61
|
+
prefix=audio_prefix,
|
|
62
|
+
force=args.force,
|
|
63
|
+
dry_run=args.dry_run,
|
|
64
|
+
)
|
|
65
|
+
elif source_backend == "audioshake":
|
|
66
|
+
source_result = separate_full_audio_source(
|
|
67
|
+
run=run,
|
|
68
|
+
full_audio=full_audio,
|
|
69
|
+
fmt=_audio_arg(args, "audioshake_format", args.format, "wav"),
|
|
70
|
+
prefix=audio_prefix,
|
|
71
|
+
timeout=int(
|
|
72
|
+
positive_float(
|
|
73
|
+
_audio_arg(args, "audioshake_timeout", args.timeout, 1800), "timeout"
|
|
74
|
+
)
|
|
75
|
+
),
|
|
76
|
+
poll_interval=int(
|
|
77
|
+
positive_float(
|
|
78
|
+
_audio_arg(args, "audioshake_poll_interval", args.poll_interval, 5),
|
|
79
|
+
"poll-interval",
|
|
80
|
+
)
|
|
81
|
+
),
|
|
82
|
+
force=args.force,
|
|
83
|
+
dry_run=args.dry_run,
|
|
84
|
+
client=None if args.api_key is None else _audioshake_client(args.api_key),
|
|
85
|
+
)
|
|
86
|
+
else:
|
|
87
|
+
raise ValueError(f"unsupported source backend: {source_backend}")
|
|
88
|
+
if args.dry_run:
|
|
89
|
+
print("DRY RUN: would speaker-separate the generated dialogue stem", flush=True)
|
|
90
|
+
return 0
|
|
91
|
+
if source_result.dialogue_stem is None:
|
|
92
|
+
raise RuntimeError("source separation did not produce a dialogue stem")
|
|
93
|
+
dx_stem: str | Path = source_result.dialogue_stem
|
|
94
|
+
else:
|
|
95
|
+
dx_stem = _dx_stem_arg(args, run, explicit=args.dx_stem, required=True)
|
|
96
|
+
separate_global_dx_stem(
|
|
97
|
+
run=run,
|
|
98
|
+
dx_stem=dx_stem,
|
|
99
|
+
variant=_audio_arg(args, "audioshake_variant", args.variant, "n_speaker"),
|
|
100
|
+
fmt=_audio_arg(args, "audioshake_format", args.format, "wav"),
|
|
101
|
+
prefix=audio_prefix,
|
|
102
|
+
timeout=int(
|
|
103
|
+
positive_float(_audio_arg(args, "audioshake_timeout", args.timeout, 1800), "timeout")
|
|
104
|
+
),
|
|
105
|
+
poll_interval=int(
|
|
106
|
+
positive_float(
|
|
107
|
+
_audio_arg(args, "audioshake_poll_interval", args.poll_interval, 5),
|
|
108
|
+
"poll-interval",
|
|
109
|
+
)
|
|
110
|
+
),
|
|
111
|
+
force=args.force,
|
|
112
|
+
dry_run=args.dry_run,
|
|
113
|
+
client=None if args.api_key is None else _audioshake_client(args.api_key),
|
|
114
|
+
)
|
|
115
|
+
return 0
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _audio_transcribe_lines(args: argparse.Namespace) -> int:
|
|
119
|
+
run = _run_paths(args, explicit=args.out)
|
|
120
|
+
run.ensure()
|
|
121
|
+
if args.dub_script is not None and args.dx_stem:
|
|
122
|
+
raise ValueError("--dub-script and --dx-stem are mutually exclusive for transcribe-lines")
|
|
123
|
+
dub_script = _path_arg(args, "dub_script", args.dub_script, required=False)
|
|
124
|
+
if dub_script is not None:
|
|
125
|
+
import_dub_script_lines(
|
|
126
|
+
dub_script_path=dub_script,
|
|
127
|
+
output_json=run.dialogue_lines_json,
|
|
128
|
+
output_csv=run.dialogue_lines_csv,
|
|
129
|
+
fps=float(_audio_arg(args, "dub_script_fps", args.dub_script_fps, 24.0)),
|
|
130
|
+
default_duration_sec=float(
|
|
131
|
+
_audio_arg(args, "default_duration_sec", args.default_duration_sec, 3.0)
|
|
132
|
+
),
|
|
133
|
+
min_duration_sec=float(
|
|
134
|
+
_audio_arg(args, "min_duration_sec", args.min_duration_sec, 0.3)
|
|
135
|
+
),
|
|
136
|
+
handle_sec=float(_audio_arg(args, "handle_sec", args.handle_sec, 0.18)),
|
|
137
|
+
force=args.force,
|
|
138
|
+
)
|
|
139
|
+
return 0
|
|
140
|
+
shots_path = _path_arg(args, "shots", args.shots, required=False)
|
|
141
|
+
shots = load_shot_manifest(shots_path) if shots_path else []
|
|
142
|
+
transcribe_dialogue_lines(
|
|
143
|
+
audio_path=Path(_dx_stem_arg(args, run, explicit=args.dx_stem, required=True)),
|
|
144
|
+
output_json=run.dialogue_lines_json,
|
|
145
|
+
output_csv=run.dialogue_lines_csv,
|
|
146
|
+
shots=shots,
|
|
147
|
+
whisper_model=str(_audio_arg(args, "whisper_model", args.whisper_model, "small")),
|
|
148
|
+
device=str(_audio_arg(args, "device", args.device, "cpu")),
|
|
149
|
+
compute_type=str(_audio_arg(args, "compute_type", args.compute_type, "float32")),
|
|
150
|
+
language=_audio_arg(args, "language", args.language, None),
|
|
151
|
+
max_gap_sec=float(_audio_arg(args, "max_gap_sec", args.max_gap_sec, 0.5)),
|
|
152
|
+
handle_sec=float(_audio_arg(args, "handle_sec", args.handle_sec, 0.18)),
|
|
153
|
+
force=args.force,
|
|
154
|
+
)
|
|
155
|
+
return 0
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _audio_build_clips(args: argparse.Namespace) -> int:
|
|
159
|
+
run = _run_paths(args, explicit=args.out)
|
|
160
|
+
run.ensure()
|
|
161
|
+
lines = load_dialogue_lines(
|
|
162
|
+
_path_arg(args, "dialogue_lines", args.dialogue_lines, required=False)
|
|
163
|
+
or run.dialogue_lines_json
|
|
164
|
+
)
|
|
165
|
+
build_voice_clips(
|
|
166
|
+
lines=lines,
|
|
167
|
+
stems=_stem_sources_arg(args, run),
|
|
168
|
+
output_dir=run.voice_clips_dir,
|
|
169
|
+
output_json=run.voice_clips_json,
|
|
170
|
+
output_csv=run.voice_clips_csv,
|
|
171
|
+
clip_type="dialogue_line",
|
|
172
|
+
silence_threshold_db=float(
|
|
173
|
+
_audio_arg(args, "silence_threshold_db", args.silence_threshold_db, -100.0)
|
|
174
|
+
),
|
|
175
|
+
force=args.force,
|
|
176
|
+
ffmpeg=str(_audio_arg(args, "ffmpeg", args.ffmpeg, "ffmpeg")),
|
|
177
|
+
)
|
|
178
|
+
return 0
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _audio_shot_dx(args: argparse.Namespace) -> int:
|
|
182
|
+
run = _run_paths(args, explicit=args.out)
|
|
183
|
+
run.ensure()
|
|
184
|
+
shots = load_shot_manifest(_path_arg(args, "shots", args.shots, required=True))
|
|
185
|
+
explicit = _dx_stem_arg(args, run, explicit=args.dx_stem, required=False)
|
|
186
|
+
merge_sources = None
|
|
187
|
+
if not explicit:
|
|
188
|
+
# No DX anywhere: fall back to merging the separated speaker stems.
|
|
189
|
+
merge_sources = _stem_sources_arg(args, run)
|
|
190
|
+
dx_stem = resolve_dx_stem(
|
|
191
|
+
run,
|
|
192
|
+
explicit=Path(explicit) if explicit else None,
|
|
193
|
+
merge_sources=merge_sources,
|
|
194
|
+
force=args.force,
|
|
195
|
+
)
|
|
196
|
+
build_shot_dx(
|
|
197
|
+
run=run,
|
|
198
|
+
shots=shots,
|
|
199
|
+
dx_stem=dx_stem,
|
|
200
|
+
force=args.force,
|
|
201
|
+
ffmpeg=str(_audio_arg(args, "ffmpeg", args.ffmpeg, "ffmpeg")),
|
|
202
|
+
)
|
|
203
|
+
return 0
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _audio_resolve_clips(args: argparse.Namespace) -> int:
|
|
207
|
+
run = _run_paths(args, explicit=args.out)
|
|
208
|
+
run.ensure()
|
|
209
|
+
clips = load_voice_inputs(args.clips or _default_voice_clip_spec(run))
|
|
210
|
+
resolver = str(_audio_arg(args, "clip_resolver", args.resolver, "cluster"))
|
|
211
|
+
backend = str(_audio_arg(args, "voice_backend", args.backend, "pyannote"))
|
|
212
|
+
if resolver == "cluster":
|
|
213
|
+
clusters = _audio_arg(args, "clusters", args.clusters, None)
|
|
214
|
+
resolve_voice_clips_clustered(
|
|
215
|
+
clips=clips,
|
|
216
|
+
speaker_db_path=_speaker_db_path(args, args.speaker_db, run),
|
|
217
|
+
output_json=run.clip_stem_plan_json,
|
|
218
|
+
output_csv=run.clip_stem_plan_csv,
|
|
219
|
+
backend=backend,
|
|
220
|
+
n_clusters=None if clusters in (None, "") else int(clusters),
|
|
221
|
+
override_min_sec=positive_float(
|
|
222
|
+
_audio_arg(args, "override_sec", args.override_sec, 1.5), "override-sec"
|
|
223
|
+
),
|
|
224
|
+
override_margin=float(_audio_arg(args, "override_margin", args.override_margin, 0.15)),
|
|
225
|
+
)
|
|
226
|
+
elif resolver == "clip":
|
|
227
|
+
resolve_voice_clips(
|
|
228
|
+
clips=clips,
|
|
229
|
+
speaker_db_path=_speaker_db_path(args, args.speaker_db, run),
|
|
230
|
+
voice_matches_json=run.voice_matches_json,
|
|
231
|
+
output_json=run.clip_stem_plan_json,
|
|
232
|
+
output_csv=run.clip_stem_plan_csv,
|
|
233
|
+
backend=backend,
|
|
234
|
+
sim_threshold=threshold(
|
|
235
|
+
_audio_arg(args, "sim_threshold", args.sim_threshold, 0.65), "sim-threshold"
|
|
236
|
+
),
|
|
237
|
+
min_margin=float(_audio_arg(args, "min_margin", args.min_margin, 0.05)),
|
|
238
|
+
)
|
|
239
|
+
else:
|
|
240
|
+
raise ValueError(f"unsupported clip resolver: {resolver}")
|
|
241
|
+
return 0
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _audio_map_dialogue(args: argparse.Namespace) -> int:
|
|
245
|
+
run = _run_paths(args, explicit=args.out)
|
|
246
|
+
run.ensure()
|
|
247
|
+
lines = load_dialogue_lines(
|
|
248
|
+
_path_arg(args, "dialogue_lines", args.dialogue_lines, required=False)
|
|
249
|
+
or run.dialogue_lines_json
|
|
250
|
+
)
|
|
251
|
+
shots = load_shot_manifest(_path_arg(args, "shots", args.shots, required=True))
|
|
252
|
+
clip_plan_path = _path_arg(args, "clip_plan", args.clip_plan, required=False) or (
|
|
253
|
+
run.dialogue_stem_plan_json
|
|
254
|
+
if run.dialogue_stem_plan_json.exists()
|
|
255
|
+
else run.clip_stem_plan_json
|
|
256
|
+
)
|
|
257
|
+
clip_plan = read_json(clip_plan_path).get("clips") or []
|
|
258
|
+
shot_speakers = _shot_speakers_arg(args, run)
|
|
259
|
+
dialogue_plan = materialize_dialogue_line_stems(
|
|
260
|
+
clip_plan=clip_plan,
|
|
261
|
+
output_dir=run.dialogue_mapped_dir,
|
|
262
|
+
output_json=run.dialogue_stem_plan_json,
|
|
263
|
+
output_csv=run.dialogue_stem_plan_csv,
|
|
264
|
+
force=args.force,
|
|
265
|
+
)
|
|
266
|
+
cut_shot_stems = bool(_audio_arg(args, "cut_shot_stems", args.cut_shot_stems or None, False))
|
|
267
|
+
map_dialogue_to_shots(
|
|
268
|
+
lines=lines,
|
|
269
|
+
shots=shots,
|
|
270
|
+
clip_plan=dialogue_plan,
|
|
271
|
+
output_json=run.shot_dialogue_map_json,
|
|
272
|
+
output_csv=run.shot_dialogue_map_csv,
|
|
273
|
+
shot_plan_json=run.shot_stem_plan_json,
|
|
274
|
+
shot_plan_csv=run.shot_stem_plan_csv,
|
|
275
|
+
mapped_dir=run.shot_mapped_dir if cut_shot_stems else None,
|
|
276
|
+
shot_speakers_by_id=shot_speakers,
|
|
277
|
+
min_overlap_ratio=float(
|
|
278
|
+
_audio_arg(args, "min_overlap_ratio", args.min_overlap_ratio, 0.01)
|
|
279
|
+
),
|
|
280
|
+
force=args.force,
|
|
281
|
+
ffmpeg=str(_audio_arg(args, "ffmpeg", args.ffmpeg, "ffmpeg")),
|
|
282
|
+
)
|
|
283
|
+
return 0
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import glob as glob_module
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Literal, overload
|
|
7
|
+
|
|
8
|
+
from unrender.manifests import read_json
|
|
9
|
+
from unrender.media.names import safe_name
|
|
10
|
+
from unrender.project import RunPaths
|
|
11
|
+
from unrender.project.config import ProjectConfig, load_speaker_config, resolve_project_configs
|
|
12
|
+
from unrender.shots.stems import StemSource, load_stem_map, load_stem_sources
|
|
13
|
+
from unrender.speakers import configured_speakers
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _project_path_issue(
|
|
17
|
+
project: ProjectConfig,
|
|
18
|
+
key: str,
|
|
19
|
+
*,
|
|
20
|
+
allow_glob: bool = False,
|
|
21
|
+
) -> str | None:
|
|
22
|
+
value = project.string_value(key)
|
|
23
|
+
if not value:
|
|
24
|
+
return None
|
|
25
|
+
if value.startswith(("http://", "https://")):
|
|
26
|
+
return None
|
|
27
|
+
if allow_glob and any(char in value for char in "*?[]"):
|
|
28
|
+
if glob_module.glob(value):
|
|
29
|
+
return None
|
|
30
|
+
return f"paths.{key} matched no files: {value}"
|
|
31
|
+
path = project.path_value(key)
|
|
32
|
+
if path is not None and not path.exists():
|
|
33
|
+
return f"paths.{key} does not exist: {path}"
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _projects_from_args(args: argparse.Namespace) -> list[ProjectConfig]:
|
|
38
|
+
specs = getattr(args, "projects", None) or []
|
|
39
|
+
return resolve_project_configs([str(spec) for spec in specs]) if specs else []
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _project(args: argparse.Namespace) -> ProjectConfig | None:
|
|
43
|
+
return getattr(args, "project_config", None)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _run_paths(args: argparse.Namespace, *, explicit: Path | None) -> RunPaths:
|
|
47
|
+
if explicit is not None:
|
|
48
|
+
return RunPaths.from_path(explicit)
|
|
49
|
+
project = _project(args)
|
|
50
|
+
if project is not None:
|
|
51
|
+
value = project.path_value("run_dir")
|
|
52
|
+
if value is not None:
|
|
53
|
+
return RunPaths.from_path(value)
|
|
54
|
+
raise ValueError("missing --run-dir/--out/--run (or paths.run_dir in project config)")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@overload
|
|
58
|
+
def _path_arg(
|
|
59
|
+
args: argparse.Namespace,
|
|
60
|
+
key: str,
|
|
61
|
+
explicit: Path | None,
|
|
62
|
+
*,
|
|
63
|
+
required: Literal[True],
|
|
64
|
+
) -> Path: ...
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@overload
|
|
68
|
+
def _path_arg(
|
|
69
|
+
args: argparse.Namespace,
|
|
70
|
+
key: str,
|
|
71
|
+
explicit: Path | None,
|
|
72
|
+
*,
|
|
73
|
+
required: Literal[False],
|
|
74
|
+
) -> Path | None: ...
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _path_arg(
|
|
78
|
+
args: argparse.Namespace,
|
|
79
|
+
key: str,
|
|
80
|
+
explicit: Path | None,
|
|
81
|
+
*,
|
|
82
|
+
required: bool,
|
|
83
|
+
) -> Path | None:
|
|
84
|
+
if explicit is not None:
|
|
85
|
+
return explicit.expanduser()
|
|
86
|
+
project = _project(args)
|
|
87
|
+
if project is not None:
|
|
88
|
+
value = project.path_value(key)
|
|
89
|
+
if value is not None:
|
|
90
|
+
return value
|
|
91
|
+
if required:
|
|
92
|
+
raise ValueError(f"missing --{key.replace('_', '-')} (or paths.{key} in project config)")
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _dx_stem_arg(
|
|
97
|
+
args: argparse.Namespace,
|
|
98
|
+
run: RunPaths,
|
|
99
|
+
*,
|
|
100
|
+
explicit: str | None,
|
|
101
|
+
required: bool,
|
|
102
|
+
) -> str:
|
|
103
|
+
configured = _string_arg(
|
|
104
|
+
args,
|
|
105
|
+
"dx_stem",
|
|
106
|
+
explicit,
|
|
107
|
+
required=False,
|
|
108
|
+
aliases=("dialogue_stem", "dx_path", "dialogue_path"),
|
|
109
|
+
)
|
|
110
|
+
if configured:
|
|
111
|
+
return configured
|
|
112
|
+
derived = _derived_dialogue_stem(args, run)
|
|
113
|
+
if derived is not None:
|
|
114
|
+
return str(derived)
|
|
115
|
+
if required:
|
|
116
|
+
raise ValueError(
|
|
117
|
+
"missing --dx-stem (or paths.dx_stem/dialogue_stem in project config). "
|
|
118
|
+
"If you configured paths.full_audio, run `unrender audio separate -p <project>` first "
|
|
119
|
+
"so audio/source_separation.json can provide the generated DX stem."
|
|
120
|
+
)
|
|
121
|
+
return ""
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _derived_dialogue_stem(args: argparse.Namespace, run: RunPaths) -> Path | None:
|
|
125
|
+
if run.source_separation_json.exists():
|
|
126
|
+
data = read_json(run.source_separation_json)
|
|
127
|
+
raw = str(data.get("dialogue_stem") or "").strip()
|
|
128
|
+
if raw:
|
|
129
|
+
path = Path(raw).expanduser()
|
|
130
|
+
if path.exists():
|
|
131
|
+
return path
|
|
132
|
+
prefix = _audio_output_prefix(args, run)
|
|
133
|
+
candidates = sorted(run.source_stems_dir.glob(f"{prefix}_DX_stem.*"))
|
|
134
|
+
return candidates[0] if candidates else None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _shot_speakers_arg(args: argparse.Namespace, run: RunPaths) -> dict[str, list[str]]:
|
|
138
|
+
explicit = getattr(args, "shot_matches", None)
|
|
139
|
+
path = explicit.expanduser() if explicit else run.shot_matches_json
|
|
140
|
+
if not path.exists():
|
|
141
|
+
return {}
|
|
142
|
+
data = read_json(path)
|
|
143
|
+
out: dict[str, list[str]] = {}
|
|
144
|
+
for row in data.get("shots") or []:
|
|
145
|
+
shot_id = str(row.get("shot_id") or "").strip()
|
|
146
|
+
if not shot_id:
|
|
147
|
+
continue
|
|
148
|
+
speakers = _speaker_values(row.get("accepted") or row.get("proposed_value") or "")
|
|
149
|
+
if speakers:
|
|
150
|
+
out[shot_id] = speakers
|
|
151
|
+
return out
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _speaker_values(value: Any) -> list[str]:
|
|
155
|
+
if isinstance(value, list):
|
|
156
|
+
raw = value
|
|
157
|
+
else:
|
|
158
|
+
raw = str(value or "").replace(";", ",").split(",")
|
|
159
|
+
speakers: list[str] = []
|
|
160
|
+
for item in raw:
|
|
161
|
+
speaker = str(item or "").strip().upper()
|
|
162
|
+
if speaker and speaker not in speakers:
|
|
163
|
+
speakers.append(speaker)
|
|
164
|
+
return speakers
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _string_arg(
|
|
168
|
+
args: argparse.Namespace,
|
|
169
|
+
key: str,
|
|
170
|
+
explicit: str | None,
|
|
171
|
+
*,
|
|
172
|
+
required: bool,
|
|
173
|
+
aliases: tuple[str, ...] = (),
|
|
174
|
+
) -> str | None:
|
|
175
|
+
if explicit:
|
|
176
|
+
return explicit
|
|
177
|
+
project = _project(args)
|
|
178
|
+
if project is not None:
|
|
179
|
+
for candidate in (key, *aliases):
|
|
180
|
+
value = project.string_value(candidate)
|
|
181
|
+
if value:
|
|
182
|
+
return value
|
|
183
|
+
if required:
|
|
184
|
+
raise ValueError(f"missing --{key.replace('_', '-')} (or paths.{key} in project config)")
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _speaker_db_path(args: argparse.Namespace, explicit: Path | None, run: RunPaths) -> Path:
|
|
189
|
+
return _path_arg(args, "speaker_db", explicit, required=False) or run.speaker_db
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _speaker_config_arg(args: argparse.Namespace) -> dict[str, Any]:
|
|
193
|
+
if getattr(args, "config", None) is not None:
|
|
194
|
+
return load_speaker_config(args.config.expanduser())
|
|
195
|
+
project = _project(args)
|
|
196
|
+
if project is not None:
|
|
197
|
+
return project.data
|
|
198
|
+
raise ValueError("missing --config (or -p/--project with speakers)")
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _config_speaker_names(args: argparse.Namespace) -> list[str]:
|
|
202
|
+
project = _project(args)
|
|
203
|
+
if project is None:
|
|
204
|
+
return []
|
|
205
|
+
return [meta["name"] for meta in configured_speakers(project.data).values()]
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _stem_sources_arg(
|
|
209
|
+
args: argparse.Namespace,
|
|
210
|
+
run: RunPaths,
|
|
211
|
+
) -> list[StemSource]:
|
|
212
|
+
if args.stems:
|
|
213
|
+
return load_stem_sources(args.stems)
|
|
214
|
+
project = _project(args)
|
|
215
|
+
if project is not None:
|
|
216
|
+
stem_map = project.data.get("stem_map")
|
|
217
|
+
if stem_map:
|
|
218
|
+
if not isinstance(stem_map, dict):
|
|
219
|
+
raise ValueError(f"project config stem_map must be an object: {project.path}")
|
|
220
|
+
return load_stem_map(stem_map, speaker_names=_config_speaker_names(args))
|
|
221
|
+
separated = project.string_value("separated_stems")
|
|
222
|
+
if separated:
|
|
223
|
+
return load_stem_sources(separated)
|
|
224
|
+
return load_stem_sources(run.unmapped_speaker_glob)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _voice_clips_spec_arg(args: argparse.Namespace, run: RunPaths) -> str:
|
|
228
|
+
return (
|
|
229
|
+
args.clips
|
|
230
|
+
or _string_arg(args, "voice_clips", None, required=False, aliases=("clips",))
|
|
231
|
+
or _default_voice_clip_spec(run)
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _default_voice_clip_spec(run: RunPaths) -> str:
|
|
236
|
+
return str(run.voice_clips_json) if run.voice_clips_json.exists() else run.voice_clips_glob
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _default_voice_clone_plan(run: RunPaths) -> Path:
|
|
240
|
+
if run.dialogue_stem_plan_json.exists():
|
|
241
|
+
return run.dialogue_stem_plan_json
|
|
242
|
+
return run.clip_stem_plan_json
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _section_arg(
|
|
246
|
+
args: argparse.Namespace,
|
|
247
|
+
section: str,
|
|
248
|
+
key: str,
|
|
249
|
+
explicit: Any,
|
|
250
|
+
default: Any,
|
|
251
|
+
) -> Any:
|
|
252
|
+
if explicit is not None:
|
|
253
|
+
return explicit
|
|
254
|
+
project = _project(args)
|
|
255
|
+
if project is not None:
|
|
256
|
+
section_data = project.data.get(section) or {}
|
|
257
|
+
if not isinstance(section_data, dict):
|
|
258
|
+
raise ValueError(f"project config {section} must be an object: {project.path}")
|
|
259
|
+
value = section_data.get(key)
|
|
260
|
+
if value is not None:
|
|
261
|
+
return value
|
|
262
|
+
return default
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _audio_arg(args: argparse.Namespace, key: str, explicit: Any, default: Any) -> Any:
|
|
266
|
+
return _section_arg(args, "audio", key, explicit, default)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _audio_output_prefix(args: argparse.Namespace, run: RunPaths) -> str:
|
|
270
|
+
configured = _audio_arg(args, "audioshake_prefix", getattr(args, "prefix", None), None)
|
|
271
|
+
if configured:
|
|
272
|
+
return safe_name(configured, fallback="audio")
|
|
273
|
+
project = _project(args)
|
|
274
|
+
if project is not None:
|
|
275
|
+
return safe_name(project.name, fallback="audio")
|
|
276
|
+
return safe_name(run.root.name, fallback="audio")
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _face_arg(args: argparse.Namespace, key: str, explicit: Any, default: Any) -> Any:
|
|
280
|
+
return _section_arg(args, "face", key, explicit, default)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _voice_arg(args: argparse.Namespace, key: str, explicit: Any, default: Any) -> Any:
|
|
284
|
+
return _section_arg(args, "voice", key, explicit, default)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _shots_arg(args: argparse.Namespace, key: str, explicit: Any, default: Any) -> Any:
|
|
288
|
+
return _section_arg(args, "shots", key, explicit, default)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _voice_clone_arg(args: argparse.Namespace, key: str, explicit: Any, default: Any) -> Any:
|
|
292
|
+
return _section_arg(args, "voice_clone", key, explicit, default)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _timeline_arg(args: argparse.Namespace, key: str, explicit: Any, default: Any) -> Any:
|
|
296
|
+
return _section_arg(args, "timeline", key, explicit, default)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _voice_clone_speakers_arg(args: argparse.Namespace) -> list[str] | None:
|
|
300
|
+
if args.speakers:
|
|
301
|
+
return args.speakers
|
|
302
|
+
value = _voice_clone_arg(args, "speakers", None, None)
|
|
303
|
+
if isinstance(value, list):
|
|
304
|
+
return [str(item) for item in value if str(item).strip()]
|
|
305
|
+
if isinstance(value, str) and value.strip():
|
|
306
|
+
return [item.strip() for item in value.split(",") if item.strip()]
|
|
307
|
+
return None
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _timeline_fps(args: argparse.Namespace, default: float) -> float:
|
|
311
|
+
explicit = getattr(args, "fps", None)
|
|
312
|
+
if explicit is not None:
|
|
313
|
+
return float(explicit)
|
|
314
|
+
project = _project(args)
|
|
315
|
+
if project is not None:
|
|
316
|
+
audio = project.data.get("audio")
|
|
317
|
+
if isinstance(audio, dict):
|
|
318
|
+
value = audio.get("fps") or audio.get("dub_script_fps")
|
|
319
|
+
if value is not None and value != "":
|
|
320
|
+
return float(value)
|
|
321
|
+
value = project.data.get("fps")
|
|
322
|
+
if value is not None and value != "":
|
|
323
|
+
return float(value)
|
|
324
|
+
return default
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _audioshake_client(api_key: str):
|
|
328
|
+
from unrender.separation.audioshake import AudioShakeClient
|
|
329
|
+
|
|
330
|
+
return AudioShakeClient(api_key=api_key)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def _bandit_settings(args: argparse.Namespace):
|
|
334
|
+
from unrender.separation.bandit import settings_from_values
|
|
335
|
+
|
|
336
|
+
bigshifts = _audio_arg(args, "bandit_bigshifts", args.bandit_bigshifts, None)
|
|
337
|
+
return settings_from_values(
|
|
338
|
+
config=_audio_arg(args, "bandit_config", args.bandit_config, None),
|
|
339
|
+
checkpoint=_audio_arg(args, "bandit_checkpoint", args.bandit_checkpoint, None),
|
|
340
|
+
device=_audio_arg(args, "bandit_device", args.bandit_device, None),
|
|
341
|
+
bigshifts=None if bigshifts in (None, "") else int(bigshifts),
|
|
342
|
+
)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
|
|
5
|
+
from unrender.cli.commands.context import _run_paths
|
|
6
|
+
from unrender.exports.artifacts import export_artifacts
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _export_artifacts(args: argparse.Namespace) -> int:
|
|
10
|
+
export_artifacts(
|
|
11
|
+
_run_paths(args, explicit=args.run), args.out.expanduser() if args.out else None
|
|
12
|
+
)
|
|
13
|
+
return 0
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
|
|
5
|
+
from unrender.cli.commands.context import (
|
|
6
|
+
_face_arg,
|
|
7
|
+
_path_arg,
|
|
8
|
+
_run_paths,
|
|
9
|
+
_shots_arg,
|
|
10
|
+
_speaker_db_path,
|
|
11
|
+
)
|
|
12
|
+
from unrender.cli.helpers import threshold
|
|
13
|
+
from unrender.identity.face import build_face_db, match_shots
|
|
14
|
+
from unrender.manifests import load_shot_manifest
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _face_build(args: argparse.Namespace) -> int:
|
|
18
|
+
run = _run_paths(args, explicit=args.out)
|
|
19
|
+
run.ensure()
|
|
20
|
+
build_face_db(
|
|
21
|
+
video_path=_path_arg(args, "proxy_master", args.video, required=True),
|
|
22
|
+
face_db_path=run.face_db,
|
|
23
|
+
review_dir=run.face_review_dir,
|
|
24
|
+
interval_sec=float(_face_arg(args, "interval", args.interval, 2.0)),
|
|
25
|
+
max_frames=args.max_frames or None,
|
|
26
|
+
threshold=float(_face_arg(args, "threshold", args.threshold, 0.5)),
|
|
27
|
+
min_cluster_size=int(_face_arg(args, "min_cluster_size", args.min_cluster_size, 5)),
|
|
28
|
+
min_confidence=float(_face_arg(args, "min_confidence", args.min_confidence, 0.5)),
|
|
29
|
+
min_face_px=int(_face_arg(args, "min_face_px", args.min_face_px, 36)),
|
|
30
|
+
force=args.force,
|
|
31
|
+
)
|
|
32
|
+
return 0
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _shots_match(args: argparse.Namespace) -> int:
|
|
36
|
+
run = _run_paths(args, explicit=args.out)
|
|
37
|
+
run.ensure()
|
|
38
|
+
shots = load_shot_manifest(_path_arg(args, "shots", args.shots, required=True))
|
|
39
|
+
match_shots(
|
|
40
|
+
shots=shots,
|
|
41
|
+
speaker_db_path=_speaker_db_path(args, args.speaker_db, run),
|
|
42
|
+
output_json=run.shot_matches_json,
|
|
43
|
+
output_csv=run.shot_matches_csv,
|
|
44
|
+
samples_per_shot=int(_shots_arg(args, "samples_per_shot", args.samples_per_shot, 8)),
|
|
45
|
+
sim_threshold=threshold(
|
|
46
|
+
_shots_arg(args, "sim_threshold", args.sim_threshold, 0.45), "sim-threshold"
|
|
47
|
+
),
|
|
48
|
+
min_confidence=threshold(
|
|
49
|
+
_shots_arg(args, "min_confidence", args.min_confidence, 0.5), "min-confidence"
|
|
50
|
+
),
|
|
51
|
+
min_votes=int(_shots_arg(args, "min_votes", args.min_votes, 2)),
|
|
52
|
+
min_margin=float(_shots_arg(args, "min_margin", args.min_margin, 0.05)),
|
|
53
|
+
)
|
|
54
|
+
return 0
|