videopython 0.33.4__tar.gz → 0.33.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {videopython-0.33.4 → videopython-0.33.5}/PKG-INFO +1 -1
- {videopython-0.33.4 → videopython-0.33.5}/pyproject.toml +1 -1
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/base/transcription.py +60 -88
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/editing/video_edit.py +116 -4
- {videopython-0.33.4 → videopython-0.33.5}/.gitignore +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/LICENSE +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/README.md +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/__init__.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/__init__.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/_device.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/dubbing/__init__.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/dubbing/config.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/dubbing/dubber.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/dubbing/expressiveness.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/dubbing/loudness.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/dubbing/models.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/dubbing/pipeline.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/dubbing/quality.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/dubbing/remux.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/dubbing/timing.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/dubbing/voice_sample.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/generation/__init__.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/generation/audio.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/generation/image.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/generation/qwen3.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/generation/translation.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/generation/video.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/transforms.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/understanding/__init__.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/understanding/audio.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/understanding/faces.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/understanding/image.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/understanding/separation.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/understanding/temporal.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/video_analysis/__init__.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/video_analysis/analyzer.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/video_analysis/models.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/video_analysis/sampling.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/ai/video_analysis/stages.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/audio/__init__.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/audio/analysis.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/audio/audio.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/base/__init__.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/base/_dimensions.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/base/_ffmpeg.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/base/_video_io.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/base/description.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/base/exceptions.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/base/fonts/DejaVuSans.ttf +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/base/fonts/LICENSE_DEJAVU +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/base/fonts/__init__.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/base/image_text.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/base/video.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/editing/__init__.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/editing/effects.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/editing/operation.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/editing/streaming.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/editing/transcription_overlay.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/editing/transforms.py +0 -0
- {videopython-0.33.4 → videopython-0.33.5}/src/videopython/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from dataclasses import dataclass
|
|
3
|
+
from dataclasses import dataclass, replace
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
@@ -79,6 +79,38 @@ class TranscriptionSegment:
|
|
|
79
79
|
compression_ratio=data.get("compression_ratio"),
|
|
80
80
|
)
|
|
81
81
|
|
|
82
|
+
@classmethod
|
|
83
|
+
def from_words(
|
|
84
|
+
cls,
|
|
85
|
+
words: list[TranscriptionWord],
|
|
86
|
+
*,
|
|
87
|
+
speaker: str | None = None,
|
|
88
|
+
avg_logprob: float | None = None,
|
|
89
|
+
no_speech_prob: float | None = None,
|
|
90
|
+
compression_ratio: float | None = None,
|
|
91
|
+
) -> TranscriptionSegment:
|
|
92
|
+
"""Build a segment spanning ``words``, deriving start/end/text from them.
|
|
93
|
+
|
|
94
|
+
``words`` must be non-empty: ``start``/``end`` come from the first/last
|
|
95
|
+
word and ``text`` is the words joined by single spaces. Speaker and the
|
|
96
|
+
confidence fields are passed through so callers re-segmenting *within* a
|
|
97
|
+
known source segment can preserve them; callers regrouping words across
|
|
98
|
+
segments (where these are ambiguous) simply omit them, leaving ``None``.
|
|
99
|
+
The ``words`` list is copied, so the result never aliases the caller's.
|
|
100
|
+
"""
|
|
101
|
+
if not words:
|
|
102
|
+
raise ValueError("from_words requires a non-empty word list")
|
|
103
|
+
return cls(
|
|
104
|
+
start=words[0].start,
|
|
105
|
+
end=words[-1].end,
|
|
106
|
+
text=" ".join(w.word for w in words),
|
|
107
|
+
words=list(words),
|
|
108
|
+
speaker=speaker,
|
|
109
|
+
avg_logprob=avg_logprob,
|
|
110
|
+
no_speech_prob=no_speech_prob,
|
|
111
|
+
compression_ratio=compression_ratio,
|
|
112
|
+
)
|
|
113
|
+
|
|
82
114
|
|
|
83
115
|
class Transcription:
|
|
84
116
|
def __init__(
|
|
@@ -124,39 +156,19 @@ class Transcription:
|
|
|
124
156
|
return []
|
|
125
157
|
|
|
126
158
|
current_speaker = words[0].speaker
|
|
127
|
-
current_words = []
|
|
128
|
-
segment_start = words[0].start
|
|
159
|
+
current_words: list[TranscriptionWord] = []
|
|
129
160
|
segments = []
|
|
130
161
|
|
|
131
162
|
for word in words:
|
|
132
163
|
if current_speaker == word.speaker:
|
|
133
164
|
current_words.append(word)
|
|
134
165
|
else:
|
|
135
|
-
|
|
136
|
-
segments.append(
|
|
137
|
-
TranscriptionSegment(
|
|
138
|
-
start=segment_start,
|
|
139
|
-
end=current_words[-1].end,
|
|
140
|
-
text=segment_text.strip(),
|
|
141
|
-
words=current_words.copy(),
|
|
142
|
-
speaker=current_speaker,
|
|
143
|
-
)
|
|
144
|
-
)
|
|
166
|
+
segments.append(TranscriptionSegment.from_words(current_words, speaker=current_speaker))
|
|
145
167
|
current_speaker = word.speaker
|
|
146
168
|
current_words = [word]
|
|
147
|
-
segment_start = word.start
|
|
148
169
|
|
|
149
170
|
if current_words:
|
|
150
|
-
|
|
151
|
-
segments.append(
|
|
152
|
-
TranscriptionSegment(
|
|
153
|
-
start=segment_start,
|
|
154
|
-
end=current_words[-1].end,
|
|
155
|
-
text=segment_text.strip(),
|
|
156
|
-
words=current_words.copy(),
|
|
157
|
-
speaker=current_speaker,
|
|
158
|
-
)
|
|
159
|
-
)
|
|
171
|
+
segments.append(TranscriptionSegment.from_words(current_words, speaker=current_speaker))
|
|
160
172
|
|
|
161
173
|
return segments
|
|
162
174
|
|
|
@@ -190,22 +202,14 @@ class Transcription:
|
|
|
190
202
|
offset_segments = []
|
|
191
203
|
|
|
192
204
|
for segment in self.segments:
|
|
193
|
-
offset_words = [
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
)
|
|
200
|
-
|
|
205
|
+
offset_words = [
|
|
206
|
+
TranscriptionWord(start=w.start + time, end=w.end + time, word=w.word, speaker=w.speaker)
|
|
207
|
+
for w in segment.words
|
|
208
|
+
]
|
|
209
|
+
# ``replace`` carries text, speaker, and confidence fields through a
|
|
210
|
+
# pure timing shift unchanged -- only timestamps move.
|
|
201
211
|
offset_segments.append(
|
|
202
|
-
|
|
203
|
-
start=segment.start + time,
|
|
204
|
-
end=segment.end + time,
|
|
205
|
-
text=segment.text,
|
|
206
|
-
words=offset_words,
|
|
207
|
-
speaker=segment.speaker,
|
|
208
|
-
)
|
|
212
|
+
replace(segment, start=segment.start + time, end=segment.end + time, words=offset_words)
|
|
209
213
|
)
|
|
210
214
|
|
|
211
215
|
return Transcription(segments=offset_segments, language=self.language)
|
|
@@ -245,16 +249,9 @@ class Transcription:
|
|
|
245
249
|
def _flush(words: list[TranscriptionWord]) -> None:
|
|
246
250
|
if not words:
|
|
247
251
|
return
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
start=words[0].start,
|
|
252
|
-
end=words[-1].end,
|
|
253
|
-
text=segment_text,
|
|
254
|
-
words=words.copy(),
|
|
255
|
-
speaker=words[0].speaker,
|
|
256
|
-
)
|
|
257
|
-
)
|
|
252
|
+
# Words here are regrouped across original segments, so the source
|
|
253
|
+
# segments' confidence fields no longer apply -- left as None.
|
|
254
|
+
standardized_segments.append(TranscriptionSegment.from_words(words, speaker=words[0].speaker))
|
|
258
255
|
|
|
259
256
|
if time is not None:
|
|
260
257
|
current_words: list[TranscriptionWord] = []
|
|
@@ -315,18 +312,9 @@ class Transcription:
|
|
|
315
312
|
start_of_sentence = True
|
|
316
313
|
new_words.append(TranscriptionWord(start=word.start, end=word.end, word=token, speaker=word.speaker))
|
|
317
314
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
end=segment.end,
|
|
322
|
-
text=" ".join(w.word for w in new_words),
|
|
323
|
-
words=new_words,
|
|
324
|
-
speaker=segment.speaker,
|
|
325
|
-
avg_logprob=segment.avg_logprob,
|
|
326
|
-
no_speech_prob=segment.no_speech_prob,
|
|
327
|
-
compression_ratio=segment.compression_ratio,
|
|
328
|
-
)
|
|
329
|
-
)
|
|
315
|
+
# Casing-only rewrite: segment boundaries, speaker, and confidence
|
|
316
|
+
# are unchanged; only the tokens (and joined text) differ.
|
|
317
|
+
capitalized_segments.append(replace(segment, text=" ".join(w.word for w in new_words), words=new_words))
|
|
330
318
|
|
|
331
319
|
return Transcription(segments=capitalized_segments, language=self.language)
|
|
332
320
|
|
|
@@ -353,16 +341,17 @@ class Transcription:
|
|
|
353
341
|
for segment in self.segments:
|
|
354
342
|
words = segment.words
|
|
355
343
|
if not words:
|
|
356
|
-
|
|
344
|
+
# Nothing to split; emit a fresh copy so the result never
|
|
345
|
+
# aliases the source segment.
|
|
346
|
+
chunked_segments.append(replace(segment, words=list(segment.words)))
|
|
357
347
|
continue
|
|
358
348
|
for i in range(0, len(words), max_words):
|
|
359
349
|
group = words[i : i + max_words]
|
|
350
|
+
# Splitting *within* one source segment -- its confidence
|
|
351
|
+
# fields still apply, so carry them through.
|
|
360
352
|
chunked_segments.append(
|
|
361
|
-
TranscriptionSegment(
|
|
362
|
-
|
|
363
|
-
end=group[-1].end,
|
|
364
|
-
text=" ".join(w.word for w in group),
|
|
365
|
-
words=list(group),
|
|
353
|
+
TranscriptionSegment.from_words(
|
|
354
|
+
group,
|
|
366
355
|
speaker=segment.speaker,
|
|
367
356
|
avg_logprob=segment.avg_logprob,
|
|
368
357
|
no_speech_prob=segment.no_speech_prob,
|
|
@@ -409,34 +398,17 @@ class Transcription:
|
|
|
409
398
|
if word.speaker == current_speaker:
|
|
410
399
|
current_words.append(word)
|
|
411
400
|
else:
|
|
412
|
-
# Finish current segment
|
|
401
|
+
# Finish current segment (speaker is ambiguous across the
|
|
402
|
+
# original segments these words came from -- confidence omitted)
|
|
413
403
|
if current_words:
|
|
414
|
-
|
|
415
|
-
sliced_segments.append(
|
|
416
|
-
TranscriptionSegment(
|
|
417
|
-
start=current_words[0].start,
|
|
418
|
-
end=current_words[-1].end,
|
|
419
|
-
text=segment_text,
|
|
420
|
-
words=current_words.copy(),
|
|
421
|
-
speaker=current_speaker,
|
|
422
|
-
)
|
|
423
|
-
)
|
|
404
|
+
sliced_segments.append(TranscriptionSegment.from_words(current_words, speaker=current_speaker))
|
|
424
405
|
# Start new segment
|
|
425
406
|
current_speaker = word.speaker
|
|
426
407
|
current_words = [word]
|
|
427
408
|
|
|
428
409
|
# Add final segment
|
|
429
410
|
if current_words:
|
|
430
|
-
|
|
431
|
-
sliced_segments.append(
|
|
432
|
-
TranscriptionSegment(
|
|
433
|
-
start=current_words[0].start,
|
|
434
|
-
end=current_words[-1].end,
|
|
435
|
-
text=segment_text,
|
|
436
|
-
words=current_words.copy(),
|
|
437
|
-
speaker=current_speaker,
|
|
438
|
-
)
|
|
439
|
-
)
|
|
411
|
+
sliced_segments.append(TranscriptionSegment.from_words(current_words, speaker=current_speaker))
|
|
440
412
|
|
|
441
413
|
return Transcription(segments=sliced_segments, language=self.language)
|
|
442
414
|
|
|
@@ -24,7 +24,7 @@ import subprocess
|
|
|
24
24
|
import tempfile
|
|
25
25
|
import warnings
|
|
26
26
|
from pathlib import Path
|
|
27
|
-
from typing import Annotated, Any
|
|
27
|
+
from typing import Annotated, Any, Protocol, runtime_checkable
|
|
28
28
|
|
|
29
29
|
from pydantic import BaseModel, BeforeValidator, ConfigDict, Field, SerializeAsAny, model_validator
|
|
30
30
|
|
|
@@ -65,6 +65,72 @@ def _resolve_operation(value: Any) -> Operation:
|
|
|
65
65
|
OperationInput = Annotated[SerializeAsAny[Operation], BeforeValidator(_resolve_operation)]
|
|
66
66
|
|
|
67
67
|
|
|
68
|
+
@runtime_checkable
|
|
69
|
+
class SegmentRebaseable(Protocol):
|
|
70
|
+
"""A runtime-context value carrying a source-absolute timeline.
|
|
71
|
+
|
|
72
|
+
Any context entry implementing both ``slice(start, end)`` and
|
|
73
|
+
``offset(delta)`` -- e.g. :class:`videopython.base.transcription.Transcription`
|
|
74
|
+
-- is automatically re-based onto each segment's 0-based local timeline by
|
|
75
|
+
the runner, with no per-type wiring. Keying off structure rather than a
|
|
76
|
+
concrete class keeps the context mechanism generic for future time-based
|
|
77
|
+
context (beat maps, scene markers, ...) and avoids a layering dependency
|
|
78
|
+
from the editing layer onto every such type.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def slice(self, start: float, end: float) -> SegmentRebaseable | None: ...
|
|
82
|
+
|
|
83
|
+
def offset(self, delta: float) -> SegmentRebaseable: ...
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _rebaseable_keys(context: dict[str, Any] | None) -> set[str]:
|
|
87
|
+
"""Context keys whose value carries a re-baseable source-absolute timeline."""
|
|
88
|
+
if not context:
|
|
89
|
+
return set()
|
|
90
|
+
return {k for k, v in context.items() if isinstance(v, SegmentRebaseable)}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _segment_context(
|
|
94
|
+
context: dict[str, Any] | None,
|
|
95
|
+
start: float,
|
|
96
|
+
end: float,
|
|
97
|
+
) -> dict[str, Any] | None:
|
|
98
|
+
"""Re-base time-based context entries onto a cut segment's local timeline.
|
|
99
|
+
|
|
100
|
+
A cut segment is decoded 0-based -- its first frame is ``t=0`` -- but
|
|
101
|
+
context values may carry source-absolute timestamps. Every value
|
|
102
|
+
implementing :class:`SegmentRebaseable` (e.g. a ``Transcription``) is
|
|
103
|
+
sliced to ``[start, end)`` and shifted by ``-start`` so segment operations
|
|
104
|
+
(``add_subtitles``, ``silence_removal``) see segment-local time. Without
|
|
105
|
+
this, subtitles on a segment cut from the middle of a video render blank.
|
|
106
|
+
Values that don't implement the protocol pass through untouched.
|
|
107
|
+
|
|
108
|
+
Slicing always runs (even for ``start == 0``) so out-of-range entries do
|
|
109
|
+
not bleed in. When ``slice`` yields nothing the key is dropped rather than
|
|
110
|
+
passed empty, so the consuming operation raises its own clear "requires
|
|
111
|
+
..." error instead of silently doing nothing.
|
|
112
|
+
|
|
113
|
+
Scope: per-segment only. ``post_operations`` run on the assembled,
|
|
114
|
+
concatenated timeline; re-basing time-based context across a multi-segment
|
|
115
|
+
concat is unsupported and rejected up front by
|
|
116
|
+
:meth:`VideoEdit._assert_post_ops_supported` (single-segment plans are
|
|
117
|
+
unaffected).
|
|
118
|
+
"""
|
|
119
|
+
if not context:
|
|
120
|
+
return context
|
|
121
|
+
rebaseable = {k: v for k, v in context.items() if isinstance(v, SegmentRebaseable)}
|
|
122
|
+
if not rebaseable:
|
|
123
|
+
return context
|
|
124
|
+
rebased = dict(context)
|
|
125
|
+
for key, value in rebaseable.items():
|
|
126
|
+
sliced = value.slice(start, end)
|
|
127
|
+
if sliced is None:
|
|
128
|
+
del rebased[key]
|
|
129
|
+
else:
|
|
130
|
+
rebased[key] = sliced.offset(-start)
|
|
131
|
+
return rebased
|
|
132
|
+
|
|
133
|
+
|
|
68
134
|
def _apply_with_context(op: Operation, video: Video, context: dict[str, Any] | None) -> Video:
|
|
69
135
|
"""Apply ``op`` to ``video``, threading ``op.requires`` keys from ``context``."""
|
|
70
136
|
if op.requires and context:
|
|
@@ -139,9 +205,14 @@ class SegmentConfig(BaseModel):
|
|
|
139
205
|
)
|
|
140
206
|
|
|
141
207
|
def process(self, video: Video, context: dict[str, Any] | None = None) -> Video:
|
|
142
|
-
"""Apply every operation in this segment to ``video`` in order.
|
|
208
|
+
"""Apply every operation in this segment to ``video`` in order.
|
|
209
|
+
|
|
210
|
+
Time-based context (e.g. ``transcription``) is re-based onto this
|
|
211
|
+
segment's 0-based local timeline before any operation sees it.
|
|
212
|
+
"""
|
|
213
|
+
seg_context = _segment_context(context, self.start, self.end)
|
|
143
214
|
for op in self.operations:
|
|
144
|
-
video = _apply_with_context(op, video,
|
|
215
|
+
video = _apply_with_context(op, video, seg_context)
|
|
145
216
|
return video
|
|
146
217
|
|
|
147
218
|
|
|
@@ -288,11 +359,38 @@ class VideoEdit(BaseModel):
|
|
|
288
359
|
metas.append(source_metadata[key])
|
|
289
360
|
return self._validate(metas, context)
|
|
290
361
|
|
|
362
|
+
def _assert_post_ops_supported(self, context: dict[str, Any] | None) -> None:
|
|
363
|
+
"""Reject post_operations needing time-based context on a multi-segment plan.
|
|
364
|
+
|
|
365
|
+
``post_operations`` run on the assembled, concatenated timeline. A
|
|
366
|
+
source-absolute context value (e.g. a ``Transcription``) cannot be
|
|
367
|
+
re-based across a multi-segment concat, and passing the raw value would
|
|
368
|
+
silently mis-time the op (subtitles/silence-removal against the wrong
|
|
369
|
+
timeline). Fail fast with an actionable message instead of producing a
|
|
370
|
+
wrong render. Single-segment plans are unaffected -- their concatenated
|
|
371
|
+
timeline is just the one segment's, handled by ``_segment_context``.
|
|
372
|
+
"""
|
|
373
|
+
if len(self.segments) <= 1 or not self.post_operations:
|
|
374
|
+
return
|
|
375
|
+
rebaseable = _rebaseable_keys(context)
|
|
376
|
+
if not rebaseable:
|
|
377
|
+
return
|
|
378
|
+
for op in self.post_operations:
|
|
379
|
+
clash = sorted(set(op.requires) & rebaseable)
|
|
380
|
+
if clash:
|
|
381
|
+
raise ValueError(
|
|
382
|
+
f"post_operation '{op.op}' requires time-based context {clash}, but the plan "
|
|
383
|
+
f"has {len(self.segments)} segments. post_operations run on the concatenated "
|
|
384
|
+
"timeline and time-based context is not re-based across a multi-segment concat. "
|
|
385
|
+
f"Move '{op.op}' into a segment, or use a single-segment plan."
|
|
386
|
+
)
|
|
387
|
+
|
|
291
388
|
def _validate(
|
|
292
389
|
self,
|
|
293
390
|
source_metas: list[VideoMetadata],
|
|
294
391
|
context: dict[str, Any] | None,
|
|
295
392
|
) -> VideoMetadata:
|
|
393
|
+
self._assert_post_ops_supported(context)
|
|
296
394
|
cut_metas: list[VideoMetadata] = []
|
|
297
395
|
for i, (seg, meta) in enumerate(zip(self.segments, source_metas)):
|
|
298
396
|
if seg.end > meta.total_seconds + 1e-3:
|
|
@@ -325,10 +423,11 @@ class VideoEdit(BaseModel):
|
|
|
325
423
|
meta: VideoMetadata,
|
|
326
424
|
context: dict[str, Any] | None,
|
|
327
425
|
) -> VideoMetadata:
|
|
426
|
+
seg_context = _segment_context(context, segment.start, segment.end)
|
|
328
427
|
for op in segment.operations:
|
|
329
428
|
_validate_effect_window(op, meta.total_seconds)
|
|
330
429
|
try:
|
|
331
|
-
meta = _predict_with_context(op, meta,
|
|
430
|
+
meta = _predict_with_context(op, meta, seg_context)
|
|
332
431
|
except (ValueError, TypeError) as e:
|
|
333
432
|
raise ValueError(f"Segment {index}: metadata prediction failed for '{op.op}': {e}") from e
|
|
334
433
|
return meta
|
|
@@ -367,6 +466,7 @@ class VideoEdit(BaseModel):
|
|
|
367
466
|
|
|
368
467
|
def run(self, context: dict[str, Any] | None = None) -> Video:
|
|
369
468
|
"""Execute the plan in memory and return the final ``Video``."""
|
|
469
|
+
self._assert_post_ops_supported(context)
|
|
370
470
|
target_fps, target_w, target_h = self._matching_targets_from_disk()
|
|
371
471
|
videos = [
|
|
372
472
|
segment.process(segment.load(fps=target_fps, width=target_w, height=target_h), context)
|
|
@@ -393,6 +493,7 @@ class VideoEdit(BaseModel):
|
|
|
393
493
|
isn't streamable. Memory usage is O(1) w.r.t. video length for fully
|
|
394
494
|
streamable pipelines.
|
|
395
495
|
"""
|
|
496
|
+
self._assert_post_ops_supported(context)
|
|
396
497
|
output_path = Path(output_path).with_suffix(f".{format}")
|
|
397
498
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
398
499
|
|
|
@@ -412,6 +513,11 @@ class VideoEdit(BaseModel):
|
|
|
412
513
|
plan = plans[0]
|
|
413
514
|
total_frames = round((plan.end_second - plan.start_second) * plan.output_fps)
|
|
414
515
|
for op in self.post_operations:
|
|
516
|
+
if op.requires:
|
|
517
|
+
# Same reason as the per-segment guard: no runtime context
|
|
518
|
+
# in the streaming path. (Multi-segment + requires already
|
|
519
|
+
# raised by _assert_post_ops_supported.)
|
|
520
|
+
return self._run_to_file_eager(output_path, format, preset, crf, context)
|
|
415
521
|
if not isinstance(op, Effect) or not op.streamable:
|
|
416
522
|
return self._run_to_file_eager(output_path, format, preset, crf, context)
|
|
417
523
|
start_f, end_f = _effect_frame_range(op, plan.output_fps, total_frames)
|
|
@@ -477,6 +583,12 @@ class VideoEdit(BaseModel):
|
|
|
477
583
|
|
|
478
584
|
effect_schedule: list[EffectScheduleEntry] = []
|
|
479
585
|
for op in segment.operations:
|
|
586
|
+
if op.requires:
|
|
587
|
+
# Streaming schedules effects by frame range with no runtime
|
|
588
|
+
# context, so it can't supply -- let alone re-base onto the
|
|
589
|
+
# segment's local timeline -- anything an op `requires`. Defer
|
|
590
|
+
# to the eager path, where _segment_context handles re-basing.
|
|
591
|
+
return None
|
|
480
592
|
if isinstance(op, Effect):
|
|
481
593
|
if not op.streamable:
|
|
482
594
|
return None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|