videopython 0.29.1__tar.gz → 0.31.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {videopython-0.29.1 → videopython-0.31.0}/PKG-INFO +73 -45
- {videopython-0.29.1 → videopython-0.31.0}/README.md +72 -44
- {videopython-0.29.1 → videopython-0.31.0}/pyproject.toml +2 -1
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/__init__.py +1 -7
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/__init__.py +0 -3
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/dubber.py +4 -13
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/models.py +1 -2
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/pipeline.py +16 -148
- videopython-0.31.0/src/videopython/ai/transforms.py +193 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/understanding/faces.py +4 -5
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/understanding/temporal.py +1 -17
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/video_analysis.py +15 -13
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/__init__.py +7 -43
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/description.py +18 -18
- videopython-0.31.0/src/videopython/base/effects.py +765 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/exceptions.py +0 -12
- videopython-0.31.0/src/videopython/base/operation.py +269 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/streaming.py +7 -4
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/text/overlay.py +73 -105
- videopython-0.31.0/src/videopython/base/transforms.py +612 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/video.py +3 -514
- videopython-0.31.0/src/videopython/editing/__init__.py +6 -0
- videopython-0.31.0/src/videopython/editing/video_edit.py +539 -0
- videopython-0.29.1/src/videopython/ai/dubbing/cache.py +0 -325
- videopython-0.29.1/src/videopython/ai/registry.py +0 -33
- videopython-0.29.1/src/videopython/ai/swapping/__init__.py +0 -46
- videopython-0.29.1/src/videopython/ai/swapping/inpainter.py +0 -264
- videopython-0.29.1/src/videopython/ai/swapping/models.py +0 -221
- videopython-0.29.1/src/videopython/ai/swapping/segmenter.py +0 -577
- videopython-0.29.1/src/videopython/ai/swapping/swapper.py +0 -524
- videopython-0.29.1/src/videopython/ai/transforms.py +0 -531
- videopython-0.29.1/src/videopython/base/combine.py +0 -61
- videopython-0.29.1/src/videopython/base/effects.py +0 -1046
- videopython-0.29.1/src/videopython/base/progress.py +0 -63
- videopython-0.29.1/src/videopython/base/registry.py +0 -817
- videopython-0.29.1/src/videopython/base/transforms.py +0 -916
- videopython-0.29.1/src/videopython/base/transitions.py +0 -200
- videopython-0.29.1/src/videopython/base/utils.py +0 -6
- videopython-0.29.1/src/videopython/editing/__init__.py +0 -11
- videopython-0.29.1/src/videopython/editing/multicam.py +0 -398
- videopython-0.29.1/src/videopython/editing/premiere_xml.py +0 -313
- videopython-0.29.1/src/videopython/editing/video_edit.py +0 -1384
- {videopython-0.29.1 → videopython-0.31.0}/.gitignore +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/LICENSE +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/__init__.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/_device.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/quality.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/remux.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/timing.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/generation/__init__.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/generation/audio.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/generation/image.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/generation/qwen3.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/generation/translation.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/generation/video.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/understanding/__init__.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/understanding/audio.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/understanding/image.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/understanding/separation.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/audio/__init__.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/audio/analysis.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/audio/audio.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/scene.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/text/__init__.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/text/transcription.py +0 -0
- {videopython-0.29.1 → videopython-0.31.0}/src/videopython/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: videopython
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.31.0
|
|
4
4
|
Summary: Minimal video generation and processing library.
|
|
5
5
|
Project-URL: Homepage, https://videopython.com
|
|
6
6
|
Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
|
|
@@ -85,22 +85,31 @@ Python `>=3.10, <3.14`. AI features run locally - no cloud API keys required, bu
|
|
|
85
85
|
|
|
86
86
|
## Quick Start
|
|
87
87
|
|
|
88
|
-
###
|
|
88
|
+
### Imperative editing
|
|
89
|
+
|
|
90
|
+
Every editing primitive is an `Operation` subclass — a Pydantic model
|
|
91
|
+
whose fields ARE the JSON wire format. Apply one to a `Video`:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from videopython.base import Video, CutSeconds, Resize, Fade
|
|
95
|
+
|
|
96
|
+
video = Video.from_path("raw.mp4")
|
|
97
|
+
video = CutSeconds(start=10, end=25).apply(video)
|
|
98
|
+
video = Resize(width=1080, height=1920).apply(video)
|
|
99
|
+
video = Fade(mode="in", duration=0.5).apply(video)
|
|
100
|
+
video.save("output.mp4")
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Concatenate clips with `+` (must share fps + dimensions):
|
|
89
104
|
|
|
90
105
|
```python
|
|
91
|
-
|
|
92
|
-
from videopython.base import FadeTransition
|
|
93
|
-
|
|
94
|
-
intro = Video.from_path("intro.mp4").resize(1080, 1920)
|
|
95
|
-
clip = Video.from_path("raw.mp4").cut(10, 25).resize(1080, 1920).resample_fps(30)
|
|
96
|
-
final = intro.transition_to(clip, FadeTransition(effect_time_seconds=0.5))
|
|
97
|
-
final = final.add_audio_from_file("music.mp3")
|
|
98
|
-
final.save("output.mp4")
|
|
106
|
+
combined = video_a + video_b
|
|
99
107
|
```
|
|
100
108
|
|
|
101
109
|
### JSON editing plans
|
|
102
110
|
|
|
103
|
-
Define multi-segment edits as JSON
|
|
111
|
+
Define multi-segment edits as JSON — the format LLM-driven workflows
|
|
112
|
+
generate against. `VideoEdit.json_schema()` returns the schema:
|
|
104
113
|
|
|
105
114
|
```python
|
|
106
115
|
from videopython.editing import VideoEdit
|
|
@@ -110,68 +119,89 @@ plan = {
|
|
|
110
119
|
"source": "raw.mp4",
|
|
111
120
|
"start": 10.0,
|
|
112
121
|
"end": 20.0,
|
|
113
|
-
"
|
|
114
|
-
{"op": "resize", "
|
|
115
|
-
{"op": "
|
|
122
|
+
"operations": [
|
|
123
|
+
{"op": "resize", "width": 1080, "height": 1920},
|
|
124
|
+
{"op": "color_adjust", "saturation": 1.15, "contrast": 1.05},
|
|
125
|
+
{"op": "fade", "mode": "in", "duration": 0.5,
|
|
126
|
+
"window": {"stop": 0.5}},
|
|
116
127
|
],
|
|
117
128
|
}],
|
|
118
|
-
"post_effects": [
|
|
119
|
-
{"op": "fade", "args": {"mode": "in", "duration": 0.5}, "apply": {"start": 0.0, "stop": 0.5}},
|
|
120
|
-
],
|
|
121
129
|
}
|
|
122
130
|
|
|
123
131
|
edit = VideoEdit.from_dict(plan)
|
|
124
|
-
edit.validate()
|
|
125
|
-
|
|
126
|
-
final.save("output.mp4")
|
|
132
|
+
edit.validate() # dry-run via metadata, no frames loaded
|
|
133
|
+
edit.run_to_file("output.mp4") # stream to disk, ~constant memory
|
|
127
134
|
```
|
|
128
135
|
|
|
136
|
+
`run_to_file()` pipes ffmpeg decode → per-frame effects → ffmpeg encode,
|
|
137
|
+
so memory stays bounded even for hour-long sources. Use `edit.run()`
|
|
138
|
+
instead if you want the result back in memory as a `Video`.
|
|
139
|
+
|
|
129
140
|
### AI generation
|
|
130
141
|
|
|
131
142
|
```python
|
|
132
143
|
from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
|
|
144
|
+
from videopython.base import Resize
|
|
133
145
|
|
|
134
146
|
image = TextToImage().generate_image("A cinematic mountain sunrise")
|
|
135
|
-
video = ImageToVideo().generate_video(image=image)
|
|
147
|
+
video = ImageToVideo().generate_video(image=image)
|
|
136
148
|
audio = TextToSpeech().generate_audio("Welcome to videopython.")
|
|
149
|
+
|
|
150
|
+
video = Resize(width=1080, height=1920).apply(video)
|
|
137
151
|
video.add_audio(audio).save("ai_video.mp4")
|
|
138
152
|
```
|
|
139
153
|
|
|
140
154
|
## LLM & AI Agent Integration
|
|
141
155
|
|
|
142
|
-
|
|
156
|
+
The library is built for LLM-driven editing. Two surfaces matter:
|
|
143
157
|
|
|
144
|
-
**
|
|
158
|
+
**1. Plan schema for tool / structured-output calls.**
|
|
159
|
+
`VideoEdit.json_schema()` returns a JSON Schema covering segments,
|
|
160
|
+
`post_operations`, and a discriminated union over every registered
|
|
161
|
+
`Operation`. Drop it into any LLM API:
|
|
145
162
|
|
|
146
163
|
```python
|
|
147
164
|
from videopython.editing import VideoEdit
|
|
148
165
|
|
|
149
166
|
schema = VideoEdit.json_schema()
|
|
150
|
-
#
|
|
151
|
-
#
|
|
167
|
+
# Anthropic: tools=[{"name": "edit", "input_schema": schema}]
|
|
168
|
+
# OpenAI: tools=[{"type": "function",
|
|
169
|
+
# "function": {"name": "edit", "parameters": schema}}]
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Validate the LLM's output without touching the filesystem, then run it:
|
|
152
173
|
|
|
174
|
+
```python
|
|
153
175
|
edit = VideoEdit.from_dict(plan)
|
|
154
|
-
edit.validate()
|
|
155
|
-
|
|
156
|
-
final.save("output.mp4")
|
|
176
|
+
edit.validate() # catches bad ops, time ranges, fps mismatches
|
|
177
|
+
edit.run_to_file("output.mp4")
|
|
157
178
|
```
|
|
158
179
|
|
|
159
|
-
**Operation discovery
|
|
180
|
+
**2. Operation discovery for agent loops.**
|
|
181
|
+
Every registered op exposes its own Pydantic schema, so an agent can
|
|
182
|
+
introspect what's available without hardcoded lists:
|
|
160
183
|
|
|
161
184
|
```python
|
|
162
|
-
from videopython.base import
|
|
185
|
+
from videopython.base import Operation, OpCategory
|
|
163
186
|
|
|
164
|
-
|
|
165
|
-
|
|
187
|
+
for op_id, cls in Operation.registry().items():
|
|
188
|
+
print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
|
|
166
189
|
|
|
167
|
-
|
|
168
|
-
print(spec.description) # LLM-friendly docstring
|
|
169
|
-
print(spec.to_json_schema()) # {"brightness": {"type": "number", "minimum": -1, "maximum": 1}, ...}
|
|
190
|
+
schema = Operation.get("color_adjust").model_json_schema() # per-op schema
|
|
170
191
|
```
|
|
171
192
|
|
|
172
|
-
|
|
193
|
+
Field constraints (`minimum`, `maximum`, `enum`, `exclusiveMinimum`,
|
|
194
|
+
nullability) flow through to the schema, so LLMs that support
|
|
195
|
+
constrained generation produce valid parameters on the first try.
|
|
196
|
+
|
|
197
|
+
For ops that need side-channel data (e.g. `silence_removal` and
|
|
198
|
+
`add_subtitles` need a `Transcription`), pass it via `context`:
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
edit.run(context={"transcription": my_transcription})
|
|
202
|
+
```
|
|
173
203
|
|
|
174
|
-
Docs: [Editing Plans](https://videopython.com/api/editing/) | [
|
|
204
|
+
Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [LLM Integration Guide](https://videopython.com/guides/llm-integration/)
|
|
175
205
|
|
|
176
206
|
## Features
|
|
177
207
|
|
|
@@ -180,16 +210,15 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operation Registr
|
|
|
180
210
|
| Area | Highlights |
|
|
181
211
|
|---|---|
|
|
182
212
|
| **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
|
|
183
|
-
| **
|
|
184
|
-
| **
|
|
185
|
-
| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change,
|
|
186
|
-
| **Transitions** | `FadeTransition`, `BlurTransition`, `InstantTransition` |
|
|
213
|
+
| **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
|
|
214
|
+
| **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
|
|
215
|
+
| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
|
|
187
216
|
| **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
|
|
188
217
|
| **Audio** | Load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
|
|
189
218
|
| **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
|
|
190
219
|
| **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
|
|
191
220
|
|
|
192
|
-
API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [
|
|
221
|
+
API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
|
|
193
222
|
|
|
194
223
|
### `videopython.ai` - local AI features (install with `[ai]`)
|
|
195
224
|
|
|
@@ -199,11 +228,10 @@ API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopyth
|
|
|
199
228
|
| **Understanding** | `AudioToText` (transcription), `AudioClassifier`, `SceneVLM` (structured visual scene description), `FaceTracker` (per-shot face tracks) |
|
|
200
229
|
| **Scene detection** | `SemanticSceneDetector` (neural scene boundaries) |
|
|
201
230
|
| **Video analysis** | `VideoAnalyzer` - full-pipeline analysis combining multiple AI capabilities |
|
|
202
|
-
| **Transforms** | `FaceTrackingCrop
|
|
231
|
+
| **Transforms** | `FaceTrackingCrop` |
|
|
203
232
|
| **Dubbing** | `VideoDubber` - voice cloning and revoicing with timing sync |
|
|
204
|
-
| **Object swapping** | `ObjectSwapper` - detect, segment, and inpaint objects in video |
|
|
205
233
|
|
|
206
|
-
API docs: [Generation](https://videopython.com/api/ai/generation/) | [Understanding](https://videopython.com/api/ai/understanding/) | [Transforms](https://videopython.com/api/ai/transforms/) | [Dubbing](https://videopython.com/api/ai/dubbing/)
|
|
234
|
+
API docs: [Generation](https://videopython.com/api/ai/generation/) | [Understanding](https://videopython.com/api/ai/understanding/) | [Transforms](https://videopython.com/api/ai/transforms/) | [Dubbing](https://videopython.com/api/ai/dubbing/)
|
|
207
235
|
|
|
208
236
|
## Examples
|
|
209
237
|
|
|
@@ -36,22 +36,31 @@ Python `>=3.10, <3.14`. AI features run locally - no cloud API keys required, bu
|
|
|
36
36
|
|
|
37
37
|
## Quick Start
|
|
38
38
|
|
|
39
|
-
###
|
|
39
|
+
### Imperative editing
|
|
40
|
+
|
|
41
|
+
Every editing primitive is an `Operation` subclass — a Pydantic model
|
|
42
|
+
whose fields ARE the JSON wire format. Apply one to a `Video`:
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from videopython.base import Video, CutSeconds, Resize, Fade
|
|
46
|
+
|
|
47
|
+
video = Video.from_path("raw.mp4")
|
|
48
|
+
video = CutSeconds(start=10, end=25).apply(video)
|
|
49
|
+
video = Resize(width=1080, height=1920).apply(video)
|
|
50
|
+
video = Fade(mode="in", duration=0.5).apply(video)
|
|
51
|
+
video.save("output.mp4")
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Concatenate clips with `+` (must share fps + dimensions):
|
|
40
55
|
|
|
41
56
|
```python
|
|
42
|
-
|
|
43
|
-
from videopython.base import FadeTransition
|
|
44
|
-
|
|
45
|
-
intro = Video.from_path("intro.mp4").resize(1080, 1920)
|
|
46
|
-
clip = Video.from_path("raw.mp4").cut(10, 25).resize(1080, 1920).resample_fps(30)
|
|
47
|
-
final = intro.transition_to(clip, FadeTransition(effect_time_seconds=0.5))
|
|
48
|
-
final = final.add_audio_from_file("music.mp3")
|
|
49
|
-
final.save("output.mp4")
|
|
57
|
+
combined = video_a + video_b
|
|
50
58
|
```
|
|
51
59
|
|
|
52
60
|
### JSON editing plans
|
|
53
61
|
|
|
54
|
-
Define multi-segment edits as JSON
|
|
62
|
+
Define multi-segment edits as JSON — the format LLM-driven workflows
|
|
63
|
+
generate against. `VideoEdit.json_schema()` returns the schema:
|
|
55
64
|
|
|
56
65
|
```python
|
|
57
66
|
from videopython.editing import VideoEdit
|
|
@@ -61,68 +70,89 @@ plan = {
|
|
|
61
70
|
"source": "raw.mp4",
|
|
62
71
|
"start": 10.0,
|
|
63
72
|
"end": 20.0,
|
|
64
|
-
"
|
|
65
|
-
{"op": "resize", "
|
|
66
|
-
{"op": "
|
|
73
|
+
"operations": [
|
|
74
|
+
{"op": "resize", "width": 1080, "height": 1920},
|
|
75
|
+
{"op": "color_adjust", "saturation": 1.15, "contrast": 1.05},
|
|
76
|
+
{"op": "fade", "mode": "in", "duration": 0.5,
|
|
77
|
+
"window": {"stop": 0.5}},
|
|
67
78
|
],
|
|
68
79
|
}],
|
|
69
|
-
"post_effects": [
|
|
70
|
-
{"op": "fade", "args": {"mode": "in", "duration": 0.5}, "apply": {"start": 0.0, "stop": 0.5}},
|
|
71
|
-
],
|
|
72
80
|
}
|
|
73
81
|
|
|
74
82
|
edit = VideoEdit.from_dict(plan)
|
|
75
|
-
edit.validate()
|
|
76
|
-
|
|
77
|
-
final.save("output.mp4")
|
|
83
|
+
edit.validate() # dry-run via metadata, no frames loaded
|
|
84
|
+
edit.run_to_file("output.mp4") # stream to disk, ~constant memory
|
|
78
85
|
```
|
|
79
86
|
|
|
87
|
+
`run_to_file()` pipes ffmpeg decode → per-frame effects → ffmpeg encode,
|
|
88
|
+
so memory stays bounded even for hour-long sources. Use `edit.run()`
|
|
89
|
+
instead if you want the result back in memory as a `Video`.
|
|
90
|
+
|
|
80
91
|
### AI generation
|
|
81
92
|
|
|
82
93
|
```python
|
|
83
94
|
from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
|
|
95
|
+
from videopython.base import Resize
|
|
84
96
|
|
|
85
97
|
image = TextToImage().generate_image("A cinematic mountain sunrise")
|
|
86
|
-
video = ImageToVideo().generate_video(image=image)
|
|
98
|
+
video = ImageToVideo().generate_video(image=image)
|
|
87
99
|
audio = TextToSpeech().generate_audio("Welcome to videopython.")
|
|
100
|
+
|
|
101
|
+
video = Resize(width=1080, height=1920).apply(video)
|
|
88
102
|
video.add_audio(audio).save("ai_video.mp4")
|
|
89
103
|
```
|
|
90
104
|
|
|
91
105
|
## LLM & AI Agent Integration
|
|
92
106
|
|
|
93
|
-
|
|
107
|
+
The library is built for LLM-driven editing. Two surfaces matter:
|
|
94
108
|
|
|
95
|
-
**
|
|
109
|
+
**1. Plan schema for tool / structured-output calls.**
|
|
110
|
+
`VideoEdit.json_schema()` returns a JSON Schema covering segments,
|
|
111
|
+
`post_operations`, and a discriminated union over every registered
|
|
112
|
+
`Operation`. Drop it into any LLM API:
|
|
96
113
|
|
|
97
114
|
```python
|
|
98
115
|
from videopython.editing import VideoEdit
|
|
99
116
|
|
|
100
117
|
schema = VideoEdit.json_schema()
|
|
101
|
-
#
|
|
102
|
-
#
|
|
118
|
+
# Anthropic: tools=[{"name": "edit", "input_schema": schema}]
|
|
119
|
+
# OpenAI: tools=[{"type": "function",
|
|
120
|
+
# "function": {"name": "edit", "parameters": schema}}]
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Validate the LLM's output without touching the filesystem, then run it:
|
|
103
124
|
|
|
125
|
+
```python
|
|
104
126
|
edit = VideoEdit.from_dict(plan)
|
|
105
|
-
edit.validate()
|
|
106
|
-
|
|
107
|
-
final.save("output.mp4")
|
|
127
|
+
edit.validate() # catches bad ops, time ranges, fps mismatches
|
|
128
|
+
edit.run_to_file("output.mp4")
|
|
108
129
|
```
|
|
109
130
|
|
|
110
|
-
**Operation discovery
|
|
131
|
+
**2. Operation discovery for agent loops.**
|
|
132
|
+
Every registered op exposes its own Pydantic schema, so an agent can
|
|
133
|
+
introspect what's available without hardcoded lists:
|
|
111
134
|
|
|
112
135
|
```python
|
|
113
|
-
from videopython.base import
|
|
136
|
+
from videopython.base import Operation, OpCategory
|
|
114
137
|
|
|
115
|
-
|
|
116
|
-
|
|
138
|
+
for op_id, cls in Operation.registry().items():
|
|
139
|
+
print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
|
|
117
140
|
|
|
118
|
-
|
|
119
|
-
print(spec.description) # LLM-friendly docstring
|
|
120
|
-
print(spec.to_json_schema()) # {"brightness": {"type": "number", "minimum": -1, "maximum": 1}, ...}
|
|
141
|
+
schema = Operation.get("color_adjust").model_json_schema() # per-op schema
|
|
121
142
|
```
|
|
122
143
|
|
|
123
|
-
|
|
144
|
+
Field constraints (`minimum`, `maximum`, `enum`, `exclusiveMinimum`,
|
|
145
|
+
nullability) flow through to the schema, so LLMs that support
|
|
146
|
+
constrained generation produce valid parameters on the first try.
|
|
147
|
+
|
|
148
|
+
For ops that need side-channel data (e.g. `silence_removal` and
|
|
149
|
+
`add_subtitles` need a `Transcription`), pass it via `context`:
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
edit.run(context={"transcription": my_transcription})
|
|
153
|
+
```
|
|
124
154
|
|
|
125
|
-
Docs: [Editing Plans](https://videopython.com/api/editing/) | [
|
|
155
|
+
Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [LLM Integration Guide](https://videopython.com/guides/llm-integration/)
|
|
126
156
|
|
|
127
157
|
## Features
|
|
128
158
|
|
|
@@ -131,16 +161,15 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operation Registr
|
|
|
131
161
|
| Area | Highlights |
|
|
132
162
|
|---|---|
|
|
133
163
|
| **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
|
|
134
|
-
| **
|
|
135
|
-
| **
|
|
136
|
-
| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change,
|
|
137
|
-
| **Transitions** | `FadeTransition`, `BlurTransition`, `InstantTransition` |
|
|
164
|
+
| **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
|
|
165
|
+
| **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
|
|
166
|
+
| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
|
|
138
167
|
| **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
|
|
139
168
|
| **Audio** | Load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
|
|
140
169
|
| **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
|
|
141
170
|
| **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
|
|
142
171
|
|
|
143
|
-
API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [
|
|
172
|
+
API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
|
|
144
173
|
|
|
145
174
|
### `videopython.ai` - local AI features (install with `[ai]`)
|
|
146
175
|
|
|
@@ -150,11 +179,10 @@ API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopyth
|
|
|
150
179
|
| **Understanding** | `AudioToText` (transcription), `AudioClassifier`, `SceneVLM` (structured visual scene description), `FaceTracker` (per-shot face tracks) |
|
|
151
180
|
| **Scene detection** | `SemanticSceneDetector` (neural scene boundaries) |
|
|
152
181
|
| **Video analysis** | `VideoAnalyzer` - full-pipeline analysis combining multiple AI capabilities |
|
|
153
|
-
| **Transforms** | `FaceTrackingCrop
|
|
182
|
+
| **Transforms** | `FaceTrackingCrop` |
|
|
154
183
|
| **Dubbing** | `VideoDubber` - voice cloning and revoicing with timing sync |
|
|
155
|
-
| **Object swapping** | `ObjectSwapper` - detect, segment, and inpaint objects in video |
|
|
156
184
|
|
|
157
|
-
API docs: [Generation](https://videopython.com/api/ai/generation/) | [Understanding](https://videopython.com/api/ai/understanding/) | [Transforms](https://videopython.com/api/ai/transforms/) | [Dubbing](https://videopython.com/api/ai/dubbing/)
|
|
185
|
+
API docs: [Generation](https://videopython.com/api/ai/generation/) | [Understanding](https://videopython.com/api/ai/understanding/) | [Transforms](https://videopython.com/api/ai/transforms/) | [Dubbing](https://videopython.com/api/ai/dubbing/)
|
|
158
186
|
|
|
159
187
|
## Examples
|
|
160
188
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "videopython"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.31.0"
|
|
4
4
|
description = "Minimal video generation and processing library."
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
|
|
@@ -136,6 +136,7 @@ Documentation = "https://videopython.com"
|
|
|
136
136
|
|
|
137
137
|
[tool.mypy]
|
|
138
138
|
mypy_path = "src/stubs"
|
|
139
|
+
plugins = ["pydantic.mypy"]
|
|
139
140
|
|
|
140
141
|
[[tool.mypy.overrides]]
|
|
141
142
|
module = [
|
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
from videopython.ai import registry as _ai_registry # noqa: F401
|
|
2
|
-
|
|
3
1
|
from .generation import ImageToVideo, TextToImage, TextToMusic, TextToSpeech, TextToVideo
|
|
4
|
-
from .
|
|
5
|
-
from .transforms import FaceTrackingCrop, SplitScreenComposite
|
|
2
|
+
from .transforms import FaceTrackingCrop
|
|
6
3
|
from .understanding import (
|
|
7
4
|
AudioClassifier,
|
|
8
5
|
AudioToText,
|
|
@@ -27,9 +24,6 @@ __all__ = [
|
|
|
27
24
|
"SemanticSceneDetector",
|
|
28
25
|
# Transforms (AI-powered)
|
|
29
26
|
"FaceTrackingCrop",
|
|
30
|
-
"SplitScreenComposite",
|
|
31
|
-
# Swapping
|
|
32
|
-
"ObjectSwapper",
|
|
33
27
|
# Video analysis
|
|
34
28
|
"VideoAnalysis",
|
|
35
29
|
"VideoAnalysisConfig",
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Local video dubbing functionality."""
|
|
2
2
|
|
|
3
|
-
from videopython.ai.dubbing.cache import DubCache, dub_cache_clear
|
|
4
3
|
from videopython.ai.dubbing.dubber import VideoDubber
|
|
5
4
|
from videopython.ai.dubbing.models import (
|
|
6
5
|
DubbingResult,
|
|
@@ -26,7 +25,5 @@ __all__ = [
|
|
|
26
25
|
"TranscriptQuality",
|
|
27
26
|
"assess_transcript",
|
|
28
27
|
"UnsupportedLanguageError",
|
|
29
|
-
"DubCache",
|
|
30
|
-
"dub_cache_clear",
|
|
31
28
|
"Expressiveness",
|
|
32
29
|
]
|
|
@@ -55,13 +55,6 @@ class VideoDubber:
|
|
|
55
55
|
See :class:`videopython.ai.generation.qwen3.Qwen3Translator`
|
|
56
56
|
for tradeoffs (Qwen3 is slower on CPU but produces
|
|
57
57
|
context-aware, length-budgeted output).
|
|
58
|
-
cache_dir: When set, persist transcription, translated segments,
|
|
59
|
-
and per-segment TTS WAVs under this directory and skip stages
|
|
60
|
-
whose inputs already match a cache entry. Use to resume crashed
|
|
61
|
-
long runs or to iterate on dub configuration without paying
|
|
62
|
-
transcription cost each time. ``None`` (default) disables
|
|
63
|
-
caching. Cache grows unbounded; clear via
|
|
64
|
-
:func:`videopython.ai.dubbing.cache.dub_cache_clear`.
|
|
65
58
|
"""
|
|
66
59
|
|
|
67
60
|
def __init__(
|
|
@@ -75,7 +68,6 @@ class VideoDubber:
|
|
|
75
68
|
vocabulary: list[str] | None = None,
|
|
76
69
|
strict_quality: bool = False,
|
|
77
70
|
translator: TranslatorChoice = "auto",
|
|
78
|
-
cache_dir: str | Path | None = None,
|
|
79
71
|
):
|
|
80
72
|
self.device = device
|
|
81
73
|
self.low_memory = low_memory
|
|
@@ -86,16 +78,14 @@ class VideoDubber:
|
|
|
86
78
|
self.vocabulary = vocabulary
|
|
87
79
|
self.strict_quality = strict_quality
|
|
88
80
|
self.translator = translator
|
|
89
|
-
self.cache_dir = cache_dir
|
|
90
81
|
self._local_pipeline: Any = None
|
|
91
82
|
requested = device.lower() if isinstance(device, str) else "auto"
|
|
92
83
|
logger.info(
|
|
93
|
-
"VideoDubber initialized with device=%s low_memory=%s whisper_model=%s translator=%s
|
|
84
|
+
"VideoDubber initialized with device=%s low_memory=%s whisper_model=%s translator=%s",
|
|
94
85
|
requested,
|
|
95
86
|
low_memory,
|
|
96
87
|
whisper_model,
|
|
97
88
|
translator,
|
|
98
|
-
cache_dir,
|
|
99
89
|
)
|
|
100
90
|
|
|
101
91
|
def _init_local_pipeline(self) -> None:
|
|
@@ -111,7 +101,6 @@ class VideoDubber:
|
|
|
111
101
|
vocabulary=self.vocabulary,
|
|
112
102
|
strict_quality=self.strict_quality,
|
|
113
103
|
translator=self.translator,
|
|
114
|
-
cache_dir=self.cache_dir,
|
|
115
104
|
)
|
|
116
105
|
|
|
117
106
|
def dub(
|
|
@@ -303,7 +292,9 @@ class VideoDubber:
|
|
|
303
292
|
video_duration = video.total_seconds
|
|
304
293
|
|
|
305
294
|
if video_duration > speech_duration:
|
|
306
|
-
|
|
295
|
+
from videopython.base.transforms import CutSeconds
|
|
296
|
+
|
|
297
|
+
output_video = CutSeconds(start=0, end=speech_duration).apply(video)
|
|
307
298
|
else:
|
|
308
299
|
output_video = video
|
|
309
300
|
|
|
@@ -41,8 +41,7 @@ class Expressiveness:
|
|
|
41
41
|
def as_kwargs(self) -> dict[str, float]:
|
|
42
42
|
"""Knobs as a dict, dropping ``None`` entries.
|
|
43
43
|
|
|
44
|
-
Suitable for ``**``-expansion into Chatterbox
|
|
45
|
-
:meth:`DubCache.tts_key`.
|
|
44
|
+
Suitable for ``**``-expansion into Chatterbox.
|
|
46
45
|
"""
|
|
47
46
|
return {
|
|
48
47
|
name: value
|