videopython 0.31.3__tar.gz → 0.32.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {videopython-0.31.3 → videopython-0.32.0}/PKG-INFO +21 -8
- {videopython-0.31.3 → videopython-0.32.0}/README.md +20 -7
- {videopython-0.31.3 → videopython-0.32.0}/pyproject.toml +1 -1
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/dubbing/dubber.py +2 -2
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/dubbing/models.py +2 -2
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/dubbing/pipeline.py +7 -7
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/dubbing/quality.py +1 -1
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/dubbing/remux.py +1 -1
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/dubbing/timing.py +1 -1
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/generation/audio.py +1 -1
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/generation/qwen3.py +1 -1
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/generation/translation.py +1 -1
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/transforms.py +1 -1
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/understanding/audio.py +2 -2
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/understanding/separation.py +1 -1
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/video_analysis.py +3 -3
- {videopython-0.31.3/src/videopython/base → videopython-0.32.0/src/videopython}/audio/audio.py +5 -5
- videopython-0.32.0/src/videopython/base/__init__.py +62 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/base/_dimensions.py +1 -1
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/base/_video_io.py +1 -1
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/base/description.py +4 -2
- {videopython-0.31.3/src/videopython/base/text → videopython-0.32.0/src/videopython/base}/image_text.py +3 -2
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/base/video.py +1 -1
- videopython-0.32.0/src/videopython/editing/__init__.py +61 -0
- {videopython-0.31.3/src/videopython/base → videopython-0.32.0/src/videopython/editing}/effects.py +3 -3
- {videopython-0.31.3/src/videopython/base → videopython-0.32.0/src/videopython/editing}/operation.py +1 -1
- {videopython-0.31.3/src/videopython/base → videopython-0.32.0/src/videopython/editing}/streaming.py +2 -2
- videopython-0.31.3/src/videopython/base/text/overlay.py → videopython-0.32.0/src/videopython/editing/transcription_overlay.py +3 -3
- {videopython-0.31.3/src/videopython/base → videopython-0.32.0/src/videopython/editing}/transforms.py +3 -3
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/editing/video_edit.py +6 -6
- videopython-0.31.3/src/videopython/base/__init__.py +0 -128
- videopython-0.31.3/src/videopython/base/scene.py +0 -456
- videopython-0.31.3/src/videopython/base/text/__init__.py +0 -13
- videopython-0.31.3/src/videopython/editing/__init__.py +0 -6
- {videopython-0.31.3 → videopython-0.32.0}/.gitignore +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/LICENSE +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/__init__.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/__init__.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/_device.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/dubbing/__init__.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/generation/__init__.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/generation/image.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/generation/video.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/understanding/__init__.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/understanding/faces.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/understanding/image.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/ai/understanding/temporal.py +0 -0
- {videopython-0.31.3/src/videopython/base → videopython-0.32.0/src/videopython}/audio/__init__.py +0 -0
- {videopython-0.31.3/src/videopython/base → videopython-0.32.0/src/videopython}/audio/analysis.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/base/_ffmpeg.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/base/exceptions.py +0 -0
- {videopython-0.31.3/src/videopython/base/text → videopython-0.32.0/src/videopython/base}/transcription.py +0 -0
- {videopython-0.31.3 → videopython-0.32.0}/src/videopython/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: videopython
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.32.0
|
|
4
4
|
Summary: Minimal video generation and processing library.
|
|
5
5
|
Project-URL: Homepage, https://videopython.com
|
|
6
6
|
Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
|
|
@@ -91,7 +91,8 @@ Every editing primitive is an `Operation` subclass — a Pydantic model
|
|
|
91
91
|
whose fields ARE the JSON wire format. Apply one to a `Video`:
|
|
92
92
|
|
|
93
93
|
```python
|
|
94
|
-
from videopython.base import Video
|
|
94
|
+
from videopython.base import Video
|
|
95
|
+
from videopython.editing import CutSeconds, Resize, Fade
|
|
95
96
|
|
|
96
97
|
video = Video.from_path("raw.mp4")
|
|
97
98
|
video = CutSeconds(start=10, end=25).apply(video)
|
|
@@ -141,7 +142,7 @@ instead if you want the result back in memory as a `Video`.
|
|
|
141
142
|
|
|
142
143
|
```python
|
|
143
144
|
from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
|
|
144
|
-
from videopython.
|
|
145
|
+
from videopython.editing import Resize
|
|
145
146
|
|
|
146
147
|
image = TextToImage().generate_image("A cinematic mountain sunrise")
|
|
147
148
|
video = ImageToVideo().generate_video(image=image)
|
|
@@ -182,7 +183,7 @@ Every registered op exposes its own Pydantic schema, so an agent can
|
|
|
182
183
|
introspect what's available without hardcoded lists:
|
|
183
184
|
|
|
184
185
|
```python
|
|
185
|
-
from videopython.
|
|
186
|
+
from videopython.editing import Operation, OpCategory
|
|
186
187
|
|
|
187
188
|
for op_id, cls in Operation.registry().items():
|
|
188
189
|
print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
|
|
@@ -205,18 +206,30 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https
|
|
|
205
206
|
|
|
206
207
|
## Features
|
|
207
208
|
|
|
208
|
-
### `videopython.base` -
|
|
209
|
+
### `videopython.base` - data containers + I/O (no AI dependencies)
|
|
209
210
|
|
|
210
211
|
| Area | Highlights |
|
|
211
212
|
|---|---|
|
|
212
213
|
| **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
|
|
214
|
+
| **Text rendering** | `ImageText` - generic PIL text-on-image primitive |
|
|
215
|
+
| **Transcription** | `Transcription`, `TranscriptionSegment`, `TranscriptionWord` - data classes returned by transcription backends |
|
|
216
|
+
| **Result types** | `BoundingBox`, `DetectedFace`, `FaceTrack`, `SceneBoundary`, `AudioEvent`, `MotionInfo`, ... - shared by editing and AI |
|
|
217
|
+
|
|
218
|
+
### `videopython.audio` - audio data container
|
|
219
|
+
|
|
220
|
+
| Area | Highlights |
|
|
221
|
+
|---|---|
|
|
222
|
+
| **Audio** | `Audio`, `AudioMetadata` - load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
|
|
223
|
+
|
|
224
|
+
### `videopython.editing` - editing primitives + plan runner
|
|
225
|
+
|
|
226
|
+
| Area | Highlights |
|
|
227
|
+
|---|---|
|
|
213
228
|
| **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
|
|
214
229
|
| **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
|
|
215
230
|
| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
|
|
216
231
|
| **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
|
|
217
|
-
| **
|
|
218
|
-
| **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
|
|
219
|
-
| **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
|
|
232
|
+
| **Subtitles** | `TranscriptionOverlay` - animated word-by-word subtitle rendering |
|
|
220
233
|
|
|
221
234
|
API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
|
|
222
235
|
|
|
@@ -42,7 +42,8 @@ Every editing primitive is an `Operation` subclass — a Pydantic model
|
|
|
42
42
|
whose fields ARE the JSON wire format. Apply one to a `Video`:
|
|
43
43
|
|
|
44
44
|
```python
|
|
45
|
-
from videopython.base import Video
|
|
45
|
+
from videopython.base import Video
|
|
46
|
+
from videopython.editing import CutSeconds, Resize, Fade
|
|
46
47
|
|
|
47
48
|
video = Video.from_path("raw.mp4")
|
|
48
49
|
video = CutSeconds(start=10, end=25).apply(video)
|
|
@@ -92,7 +93,7 @@ instead if you want the result back in memory as a `Video`.
|
|
|
92
93
|
|
|
93
94
|
```python
|
|
94
95
|
from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
|
|
95
|
-
from videopython.
|
|
96
|
+
from videopython.editing import Resize
|
|
96
97
|
|
|
97
98
|
image = TextToImage().generate_image("A cinematic mountain sunrise")
|
|
98
99
|
video = ImageToVideo().generate_video(image=image)
|
|
@@ -133,7 +134,7 @@ Every registered op exposes its own Pydantic schema, so an agent can
|
|
|
133
134
|
introspect what's available without hardcoded lists:
|
|
134
135
|
|
|
135
136
|
```python
|
|
136
|
-
from videopython.
|
|
137
|
+
from videopython.editing import Operation, OpCategory
|
|
137
138
|
|
|
138
139
|
for op_id, cls in Operation.registry().items():
|
|
139
140
|
print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
|
|
@@ -156,18 +157,30 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https
|
|
|
156
157
|
|
|
157
158
|
## Features
|
|
158
159
|
|
|
159
|
-
### `videopython.base` -
|
|
160
|
+
### `videopython.base` - data containers + I/O (no AI dependencies)
|
|
160
161
|
|
|
161
162
|
| Area | Highlights |
|
|
162
163
|
|---|---|
|
|
163
164
|
| **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
|
|
165
|
+
| **Text rendering** | `ImageText` - generic PIL text-on-image primitive |
|
|
166
|
+
| **Transcription** | `Transcription`, `TranscriptionSegment`, `TranscriptionWord` - data classes returned by transcription backends |
|
|
167
|
+
| **Result types** | `BoundingBox`, `DetectedFace`, `FaceTrack`, `SceneBoundary`, `AudioEvent`, `MotionInfo`, ... - shared by editing and AI |
|
|
168
|
+
|
|
169
|
+
### `videopython.audio` - audio data container
|
|
170
|
+
|
|
171
|
+
| Area | Highlights |
|
|
172
|
+
|---|---|
|
|
173
|
+
| **Audio** | `Audio`, `AudioMetadata` - load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
|
|
174
|
+
|
|
175
|
+
### `videopython.editing` - editing primitives + plan runner
|
|
176
|
+
|
|
177
|
+
| Area | Highlights |
|
|
178
|
+
|---|---|
|
|
164
179
|
| **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
|
|
165
180
|
| **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
|
|
166
181
|
| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
|
|
167
182
|
| **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
|
|
168
|
-
| **
|
|
169
|
-
| **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
|
|
170
|
-
| **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
|
|
183
|
+
| **Subtitles** | `TranscriptionOverlay` - animated word-by-word subtitle rendering |
|
|
171
184
|
|
|
172
185
|
API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
|
|
173
186
|
|
|
@@ -218,7 +218,7 @@ class VideoDubber:
|
|
|
218
218
|
source transcription. The output video is written to ``output_path``.
|
|
219
219
|
"""
|
|
220
220
|
from videopython.ai.dubbing.remux import replace_audio_stream_from_audio
|
|
221
|
-
from videopython.
|
|
221
|
+
from videopython.audio import Audio
|
|
222
222
|
|
|
223
223
|
input_path = Path(input_path)
|
|
224
224
|
output_path = Path(output_path)
|
|
@@ -292,7 +292,7 @@ class VideoDubber:
|
|
|
292
292
|
video_duration = video.total_seconds
|
|
293
293
|
|
|
294
294
|
if video_duration > speech_duration:
|
|
295
|
-
from videopython.
|
|
295
|
+
from videopython.editing.transforms import CutSeconds
|
|
296
296
|
|
|
297
297
|
output_video = CutSeconds(start=0, end=speech_duration).apply(video)
|
|
298
298
|
else:
|
|
@@ -5,8 +5,8 @@ from __future__ import annotations
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
6
|
from typing import TYPE_CHECKING, Any
|
|
7
7
|
|
|
8
|
-
from videopython.
|
|
9
|
-
from videopython.base.
|
|
8
|
+
from videopython.audio import Audio
|
|
9
|
+
from videopython.base.transcription import Transcription, TranscriptionSegment
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
12
|
from videopython.ai.dubbing.quality import TranscriptQuality
|
|
@@ -22,8 +22,8 @@ from videopython.ai.generation.translation import (
|
|
|
22
22
|
|
|
23
23
|
if TYPE_CHECKING:
|
|
24
24
|
from videopython.ai.dubbing.models import TranslatedSegment
|
|
25
|
-
from videopython.
|
|
26
|
-
from videopython.base.
|
|
25
|
+
from videopython.audio import Audio
|
|
26
|
+
from videopython.base.transcription import Transcription
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
TranslatorChoice = Literal["auto", "marian", "qwen3"]
|
|
@@ -41,7 +41,7 @@ def _peak_match(target: Audio, reference: Audio) -> Audio:
|
|
|
41
41
|
Used as the fallback when LUFS measurement isn't viable (clip < 0.4s
|
|
42
42
|
or silent input). The new ``Audio`` shares no buffer with ``target``.
|
|
43
43
|
"""
|
|
44
|
-
from videopython.
|
|
44
|
+
from videopython.audio import Audio as _Audio
|
|
45
45
|
|
|
46
46
|
target_peak = float(np.max(np.abs(target.data))) if target.data.size else 0.0
|
|
47
47
|
reference_peak = float(np.max(np.abs(reference.data))) if reference.data.size else 0.0
|
|
@@ -71,7 +71,7 @@ def _loudness_match(target: Audio, reference: Audio) -> Audio:
|
|
|
71
71
|
are clamped to 0.99 — BS.1770 has no peak ceiling and a sufficiently
|
|
72
72
|
quiet source can demand gain that would otherwise clip.
|
|
73
73
|
"""
|
|
74
|
-
from videopython.
|
|
74
|
+
from videopython.audio import Audio as _Audio
|
|
75
75
|
|
|
76
76
|
target_dur = target.metadata.duration_seconds
|
|
77
77
|
ref_dur = reference.metadata.duration_seconds
|
|
@@ -427,7 +427,7 @@ class LocalDubbingPipeline:
|
|
|
427
427
|
every candidate is rejected, so the dub continues with the best
|
|
428
428
|
sample we have rather than silently dropping the speaker.
|
|
429
429
|
"""
|
|
430
|
-
from videopython.
|
|
430
|
+
from videopython.audio import Audio
|
|
431
431
|
|
|
432
432
|
voice_samples: dict[str, Audio] = {}
|
|
433
433
|
|
|
@@ -558,7 +558,7 @@ class LocalDubbingPipeline:
|
|
|
558
558
|
can use ``Audio.from_path(path)`` to avoid loading video frames.
|
|
559
559
|
transcription: Optional pre-computed Transcription object. When provided,
|
|
560
560
|
the internal Whisper transcription step is skipped (saving time and VRAM).
|
|
561
|
-
Must be a ``videopython.base.
|
|
561
|
+
Must be a ``videopython.base.transcription.Transcription`` instance
|
|
562
562
|
with populated ``segments``. Speaker labels on the supplied transcription
|
|
563
563
|
drive per-speaker voice cloning. If the supplied transcription has no
|
|
564
564
|
speakers and ``enable_diarization=True``, pyannote is run standalone on
|
|
@@ -805,7 +805,7 @@ class LocalDubbingPipeline:
|
|
|
805
805
|
source_audio: Source audio track to revoice. Callers with a ``Video``
|
|
806
806
|
object should pass ``video.audio``.
|
|
807
807
|
"""
|
|
808
|
-
from videopython.
|
|
808
|
+
from videopython.audio import Audio
|
|
809
809
|
|
|
810
810
|
def report_progress(stage: str, progress: float) -> None:
|
|
811
811
|
if progress_callback:
|
|
@@ -24,7 +24,7 @@ from dataclasses import dataclass, field
|
|
|
24
24
|
from typing import TYPE_CHECKING, Any, Literal
|
|
25
25
|
|
|
26
26
|
if TYPE_CHECKING:
|
|
27
|
-
from videopython.base.
|
|
27
|
+
from videopython.base.transcription import Transcription
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
# Tuned conservatively to favor "warn" over "reject"; first-week production
|
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import TYPE_CHECKING, Any
|
|
6
6
|
|
|
7
7
|
from videopython.ai._device import log_device_initialization, release_device_memory, select_device
|
|
8
|
-
from videopython.
|
|
8
|
+
from videopython.audio import Audio, AudioMetadata
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from pathlib import Path
|
|
@@ -27,7 +27,7 @@ from videopython.ai.generation.translation import (
|
|
|
27
27
|
MarianTranslator,
|
|
28
28
|
_is_translatable_text,
|
|
29
29
|
)
|
|
30
|
-
from videopython.base.
|
|
30
|
+
from videopython.base.transcription import TranscriptionSegment
|
|
31
31
|
|
|
32
32
|
# Imported under TYPE_CHECKING only — qwen3 sits below videopython.ai.dubbing
|
|
33
33
|
# in the import order (pipeline.py imports Qwen3Translator), so a top-level
|
|
@@ -17,7 +17,7 @@ from __future__ import annotations
|
|
|
17
17
|
from typing import TYPE_CHECKING, Any, Callable, Protocol, runtime_checkable
|
|
18
18
|
|
|
19
19
|
from videopython.ai._device import log_device_initialization, release_device_memory, select_device
|
|
20
|
-
from videopython.base.
|
|
20
|
+
from videopython.base.transcription import TranscriptionSegment
|
|
21
21
|
|
|
22
22
|
# Imported under TYPE_CHECKING to avoid a circular dep through
|
|
23
23
|
# videopython.ai.dubbing (the dubbing pipeline imports both
|
|
@@ -12,8 +12,8 @@ from tqdm import tqdm
|
|
|
12
12
|
|
|
13
13
|
from videopython.ai.understanding.faces import FaceTracker
|
|
14
14
|
from videopython.base._dimensions import floor_to_even
|
|
15
|
-
from videopython.base.operation import OpCategory, Operation
|
|
16
15
|
from videopython.base.video import Video
|
|
16
|
+
from videopython.editing.operation import OpCategory, Operation
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
@@ -6,9 +6,9 @@ import logging
|
|
|
6
6
|
from typing import Any, Literal
|
|
7
7
|
|
|
8
8
|
from videopython.ai._device import log_device_initialization, release_device_memory, select_device
|
|
9
|
-
from videopython.
|
|
9
|
+
from videopython.audio import Audio
|
|
10
10
|
from videopython.base.description import AudioClassification, AudioEvent
|
|
11
|
-
from videopython.base.
|
|
11
|
+
from videopython.base.transcription import Transcription, TranscriptionSegment, TranscriptionWord
|
|
12
12
|
from videopython.base.video import Video
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
@@ -7,7 +7,7 @@ from typing import Any
|
|
|
7
7
|
|
|
8
8
|
from videopython.ai._device import log_device_initialization, release_device_memory, select_device
|
|
9
9
|
from videopython.ai.dubbing.models import SeparatedAudio
|
|
10
|
-
from videopython.
|
|
10
|
+
from videopython.audio import Audio, AudioMetadata
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -25,8 +25,8 @@ from videopython.ai.understanding import (
|
|
|
25
25
|
SemanticSceneDetector,
|
|
26
26
|
)
|
|
27
27
|
from videopython.ai.understanding.faces import FaceTracker
|
|
28
|
+
from videopython.audio import Audio
|
|
28
29
|
from videopython.base import _ffmpeg
|
|
29
|
-
from videopython.base.audio import Audio
|
|
30
30
|
from videopython.base.description import (
|
|
31
31
|
AudioClassification,
|
|
32
32
|
AudioEvent,
|
|
@@ -35,7 +35,7 @@ from videopython.base.description import (
|
|
|
35
35
|
SceneDescription,
|
|
36
36
|
)
|
|
37
37
|
from videopython.base.exceptions import FFmpegProbeError
|
|
38
|
-
from videopython.base.
|
|
38
|
+
from videopython.base.transcription import Transcription
|
|
39
39
|
from videopython.base.video import Video, VideoMetadata, extract_frames_at_times
|
|
40
40
|
|
|
41
41
|
__all__ = ["VideoAnalysis", "VideoAnalysisConfig", "VideoAnalyzer"]
|
|
@@ -949,7 +949,7 @@ class VideoAnalyzer:
|
|
|
949
949
|
return None
|
|
950
950
|
if source_path is not None:
|
|
951
951
|
return Video.from_path(str(source_path), start_second=start_second, end_second=end_second)
|
|
952
|
-
from videopython.
|
|
952
|
+
from videopython.editing.transforms import CutSeconds
|
|
953
953
|
|
|
954
954
|
return CutSeconds(start=start_second, end=end_second).apply(_require_video(video))
|
|
955
955
|
|
{videopython-0.31.3/src/videopython/base → videopython-0.32.0/src/videopython}/audio/audio.py
RENAMED
|
@@ -13,7 +13,7 @@ from videopython.base import _ffmpeg
|
|
|
13
13
|
from videopython.base.exceptions import AudioLoadError, FFmpegProbeError
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
|
-
from videopython.
|
|
16
|
+
from videopython.audio.analysis import AudioLevels, AudioSegment, AudioSegmentType, SilentSegment
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
@dataclass
|
|
@@ -879,7 +879,7 @@ class Audio:
|
|
|
879
879
|
>>> levels = audio.get_levels()
|
|
880
880
|
>>> print(f"Peak: {levels.db_peak:.1f} dB")
|
|
881
881
|
"""
|
|
882
|
-
from videopython.
|
|
882
|
+
from videopython.audio.analysis import AudioLevels
|
|
883
883
|
|
|
884
884
|
segment = self.slice(start_seconds, end_seconds)
|
|
885
885
|
data = segment.data.flatten() if segment.metadata.channels == 2 else segment.data
|
|
@@ -947,7 +947,7 @@ class Audio:
|
|
|
947
947
|
>>> for seg in silent_segments:
|
|
948
948
|
... print(f"Silence: {seg.start:.2f}s - {seg.end:.2f}s")
|
|
949
949
|
"""
|
|
950
|
-
from videopython.
|
|
950
|
+
from videopython.audio.analysis import SilentSegment
|
|
951
951
|
|
|
952
952
|
levels_over_time = self.get_levels_over_time(window_seconds=window_seconds, hop_seconds=window_seconds / 2)
|
|
953
953
|
|
|
@@ -1027,7 +1027,7 @@ class Audio:
|
|
|
1027
1027
|
>>> for seg in segments:
|
|
1028
1028
|
... print(f"{seg.start:.1f}-{seg.end:.1f}s: {seg.segment_type.value}")
|
|
1029
1029
|
"""
|
|
1030
|
-
from videopython.
|
|
1030
|
+
from videopython.audio.analysis import AudioSegment
|
|
1031
1031
|
|
|
1032
1032
|
hop_length = segment_length * (1 - overlap)
|
|
1033
1033
|
segments = []
|
|
@@ -1064,7 +1064,7 @@ class Audio:
|
|
|
1064
1064
|
Returns:
|
|
1065
1065
|
Tuple of (AudioSegmentType, confidence)
|
|
1066
1066
|
"""
|
|
1067
|
-
from videopython.
|
|
1067
|
+
from videopython.audio.analysis import AudioSegmentType
|
|
1068
1068
|
|
|
1069
1069
|
data = segment.to_mono().data
|
|
1070
1070
|
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from .description import (
|
|
2
|
+
AudioClassification,
|
|
3
|
+
AudioEvent,
|
|
4
|
+
BoundingBox,
|
|
5
|
+
DetectedFace,
|
|
6
|
+
DetectedObject,
|
|
7
|
+
DetectedText,
|
|
8
|
+
FaceTrack,
|
|
9
|
+
MotionInfo,
|
|
10
|
+
SceneBoundary,
|
|
11
|
+
SceneDescription,
|
|
12
|
+
)
|
|
13
|
+
from .exceptions import (
|
|
14
|
+
AudioError,
|
|
15
|
+
AudioLoadError,
|
|
16
|
+
OutOfBoundsError,
|
|
17
|
+
TextRenderError,
|
|
18
|
+
TransformError,
|
|
19
|
+
VideoError,
|
|
20
|
+
VideoLoadError,
|
|
21
|
+
VideoMetadataError,
|
|
22
|
+
VideoPythonError,
|
|
23
|
+
)
|
|
24
|
+
from .image_text import AnchorPoint, ImageText, TextAlign
|
|
25
|
+
from .transcription import Transcription, TranscriptionSegment, TranscriptionWord
|
|
26
|
+
from .video import FrameIterator, Video, VideoMetadata
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
# Core
|
|
30
|
+
"Video",
|
|
31
|
+
"VideoMetadata",
|
|
32
|
+
"FrameIterator",
|
|
33
|
+
# Exceptions
|
|
34
|
+
"VideoPythonError",
|
|
35
|
+
"VideoError",
|
|
36
|
+
"VideoLoadError",
|
|
37
|
+
"VideoMetadataError",
|
|
38
|
+
"AudioError",
|
|
39
|
+
"AudioLoadError",
|
|
40
|
+
"TransformError",
|
|
41
|
+
"TextRenderError",
|
|
42
|
+
"OutOfBoundsError",
|
|
43
|
+
# Text rendering primitives
|
|
44
|
+
"ImageText",
|
|
45
|
+
"TextAlign",
|
|
46
|
+
"AnchorPoint",
|
|
47
|
+
# Transcription data classes
|
|
48
|
+
"Transcription",
|
|
49
|
+
"TranscriptionSegment",
|
|
50
|
+
"TranscriptionWord",
|
|
51
|
+
# Detection / scene / motion result types (consumed by ai/, editing/)
|
|
52
|
+
"BoundingBox",
|
|
53
|
+
"DetectedObject",
|
|
54
|
+
"DetectedFace",
|
|
55
|
+
"DetectedText",
|
|
56
|
+
"FaceTrack",
|
|
57
|
+
"AudioEvent",
|
|
58
|
+
"AudioClassification",
|
|
59
|
+
"MotionInfo",
|
|
60
|
+
"SceneBoundary",
|
|
61
|
+
"SceneDescription",
|
|
62
|
+
]
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
Centralises the libx264+yuv420p even-dimension constraint and the
|
|
4
4
|
two "round to even" calculations that previously lived (with subtly
|
|
5
5
|
different semantics) in ``base/video.py``, ``ai/transforms.py``, and
|
|
6
|
-
``
|
|
6
|
+
``editing/transforms.py``.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
@@ -20,9 +20,9 @@ from typing import Literal, get_args
|
|
|
20
20
|
|
|
21
21
|
import numpy as np
|
|
22
22
|
|
|
23
|
+
from videopython.audio import Audio
|
|
23
24
|
from videopython.base import _ffmpeg
|
|
24
25
|
from videopython.base._dimensions import require_even
|
|
25
|
-
from videopython.base.audio import Audio
|
|
26
26
|
from videopython.base.exceptions import (
|
|
27
27
|
AudioLoadError,
|
|
28
28
|
FFmpegRunError,
|
|
@@ -22,8 +22,10 @@ __all__ = [
|
|
|
22
22
|
class SceneBoundary:
|
|
23
23
|
"""Timing information for a detected scene.
|
|
24
24
|
|
|
25
|
-
A lightweight structure representing scene boundaries
|
|
26
|
-
|
|
25
|
+
A lightweight structure representing scene boundaries returned by
|
|
26
|
+
scene detectors (e.g. ``videopython.ai.SemanticSceneDetector``). This
|
|
27
|
+
is a backbone type — higher-level scene analysis lives in orchestration
|
|
28
|
+
packages.
|
|
27
29
|
|
|
28
30
|
Attributes:
|
|
29
31
|
start: Scene start time in seconds
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
"""PIL-backed text rendering primitive.
|
|
2
2
|
|
|
3
3
|
``ImageText`` is a general-purpose text renderer used both by the
|
|
4
|
-
``TranscriptionOverlay`` subtitle effect
|
|
5
|
-
directly by AI image
|
|
4
|
+
``TranscriptionOverlay`` subtitle effect
|
|
5
|
+
(``editing/transcription_overlay.py``) and directly by AI image
|
|
6
|
+
generation helpers (``ai/understanding/image.py``).
|
|
6
7
|
``TextAlign`` and ``AnchorPoint`` are its argument enums.
|
|
7
8
|
"""
|
|
8
9
|
|
|
@@ -7,9 +7,9 @@ from typing import Generator
|
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
10
|
+
from videopython.audio import Audio
|
|
10
11
|
from videopython.base import _ffmpeg, _video_io
|
|
11
12
|
from videopython.base._video_io import ALLOWED_VIDEO_FORMATS, ALLOWED_VIDEO_PRESETS
|
|
12
|
-
from videopython.base.audio import Audio
|
|
13
13
|
from videopython.base.exceptions import FFmpegProbeError, VideoMetadataError
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from .effects import (
|
|
2
|
+
Blur,
|
|
3
|
+
ColorGrading,
|
|
4
|
+
Effect,
|
|
5
|
+
Fade,
|
|
6
|
+
FullImageOverlay,
|
|
7
|
+
KenBurns,
|
|
8
|
+
TextOverlay,
|
|
9
|
+
Vignette,
|
|
10
|
+
VolumeAdjust,
|
|
11
|
+
Zoom,
|
|
12
|
+
)
|
|
13
|
+
from .operation import FilterCtx, OpCategory, Operation, TimeRange
|
|
14
|
+
from .transcription_overlay import TranscriptionOverlay
|
|
15
|
+
from .transforms import (
|
|
16
|
+
Crop,
|
|
17
|
+
CropMode,
|
|
18
|
+
CutFrames,
|
|
19
|
+
CutSeconds,
|
|
20
|
+
FreezeFrame,
|
|
21
|
+
ResampleFPS,
|
|
22
|
+
Resize,
|
|
23
|
+
Reverse,
|
|
24
|
+
SilenceRemoval,
|
|
25
|
+
SpeedChange,
|
|
26
|
+
)
|
|
27
|
+
from .video_edit import SegmentConfig, VideoEdit
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
# Operation foundation
|
|
31
|
+
"Operation",
|
|
32
|
+
"Effect",
|
|
33
|
+
"TimeRange",
|
|
34
|
+
"OpCategory",
|
|
35
|
+
"FilterCtx",
|
|
36
|
+
# Transforms
|
|
37
|
+
"CutFrames",
|
|
38
|
+
"CutSeconds",
|
|
39
|
+
"Resize",
|
|
40
|
+
"ResampleFPS",
|
|
41
|
+
"Crop",
|
|
42
|
+
"CropMode",
|
|
43
|
+
"SpeedChange",
|
|
44
|
+
"Reverse",
|
|
45
|
+
"FreezeFrame",
|
|
46
|
+
"SilenceRemoval",
|
|
47
|
+
# Effects
|
|
48
|
+
"FullImageOverlay",
|
|
49
|
+
"Blur",
|
|
50
|
+
"Zoom",
|
|
51
|
+
"ColorGrading",
|
|
52
|
+
"Vignette",
|
|
53
|
+
"KenBurns",
|
|
54
|
+
"Fade",
|
|
55
|
+
"VolumeAdjust",
|
|
56
|
+
"TextOverlay",
|
|
57
|
+
"TranscriptionOverlay",
|
|
58
|
+
# Plan runner
|
|
59
|
+
"VideoEdit",
|
|
60
|
+
"SegmentConfig",
|
|
61
|
+
]
|
{videopython-0.31.3/src/videopython/base → videopython-0.32.0/src/videopython/editing}/effects.py
RENAMED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
An ``Effect`` is an ``Operation`` that preserves video shape and frame count.
|
|
4
4
|
Subclasses override :meth:`Effect._apply` for in-memory execution and may
|
|
5
5
|
additionally override :meth:`Effect.streaming_init` / :meth:`Effect.process_frame`
|
|
6
|
-
for bounded-memory streaming via ``
|
|
6
|
+
for bounded-memory streaming via ``editing/streaming.py``.
|
|
7
7
|
|
|
8
8
|
Effects that need to modify audio (``Fade``, ``VolumeAdjust``) override
|
|
9
9
|
:meth:`Effect.apply` directly so the audio splice can stay coherent with the
|
|
@@ -24,10 +24,10 @@ from pydantic import Field, PrivateAttr, model_validator
|
|
|
24
24
|
from tqdm import tqdm
|
|
25
25
|
|
|
26
26
|
from videopython.base.description import BoundingBox
|
|
27
|
-
from videopython.
|
|
27
|
+
from videopython.editing.operation import Effect
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
30
|
-
from videopython.
|
|
30
|
+
from videopython.audio import Audio
|
|
31
31
|
from videopython.base.video import Video
|
|
32
32
|
|
|
33
33
|
logger = logging.getLogger(__name__)
|
{videopython-0.31.3/src/videopython/base → videopython-0.32.0/src/videopython/editing}/operation.py
RENAMED
|
@@ -193,7 +193,7 @@ class Effect(Operation):
|
|
|
193
193
|
|
|
194
194
|
Subclasses override :meth:`_apply` for in-memory execution and may
|
|
195
195
|
additionally override :meth:`streaming_init` / :meth:`process_frame` for
|
|
196
|
-
bounded-memory streaming via ``
|
|
196
|
+
bounded-memory streaming via ``editing/streaming.py``. The base
|
|
197
197
|
:meth:`apply` resolves :attr:`window`, slices the video, runs
|
|
198
198
|
``_apply`` on the slice, splices the result back, and asserts the
|
|
199
199
|
shape-preserving invariant.
|
{videopython-0.31.3/src/videopython/base → videopython-0.32.0/src/videopython/editing}/streaming.py
RENAMED
|
@@ -18,11 +18,11 @@ from typing import get_args
|
|
|
18
18
|
import numpy as np
|
|
19
19
|
from tqdm import tqdm
|
|
20
20
|
|
|
21
|
+
from videopython.audio import Audio
|
|
21
22
|
from videopython.base import _ffmpeg
|
|
22
23
|
from videopython.base._dimensions import require_even
|
|
23
|
-
from videopython.base.audio import Audio
|
|
24
|
-
from videopython.base.effects import Effect
|
|
25
24
|
from videopython.base.video import ALLOWED_VIDEO_FORMATS, ALLOWED_VIDEO_PRESETS, FrameIterator
|
|
25
|
+
from videopython.editing.effects import Effect
|
|
26
26
|
|
|
27
27
|
logger = logging.getLogger(__name__)
|
|
28
28
|
|
|
@@ -16,10 +16,10 @@ from PIL import Image
|
|
|
16
16
|
from pydantic import Field, PrivateAttr
|
|
17
17
|
from tqdm import tqdm
|
|
18
18
|
|
|
19
|
-
from videopython.base.
|
|
20
|
-
from videopython.base.
|
|
21
|
-
from videopython.base.text.transcription import Transcription, TranscriptionSegment
|
|
19
|
+
from videopython.base.image_text import AnchorPoint, ImageText, TextAlign
|
|
20
|
+
from videopython.base.transcription import Transcription, TranscriptionSegment
|
|
22
21
|
from videopython.base.video import Video
|
|
22
|
+
from videopython.editing.operation import Effect
|
|
23
23
|
|
|
24
24
|
__all__ = ["TranscriptionOverlay"]
|
|
25
25
|
|