videopython 0.30.0__tar.gz → 0.31.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {videopython-0.30.0 → videopython-0.31.1}/PKG-INFO +72 -43
- {videopython-0.30.0 → videopython-0.31.1}/README.md +71 -42
- {videopython-0.30.0 → videopython-0.31.1}/pyproject.toml +2 -1
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/__init__.py +1 -4
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/dubbing/dubber.py +3 -1
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/dubbing/remux.py +11 -8
- videopython-0.31.1/src/videopython/ai/transforms.py +189 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/understanding/faces.py +4 -5
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/video_analysis.py +7 -16
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/base/__init__.py +7 -38
- videopython-0.31.1/src/videopython/base/_dimensions.py +41 -0
- videopython-0.31.1/src/videopython/base/_ffmpeg.py +152 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/base/audio/audio.py +16 -34
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/base/description.py +18 -18
- videopython-0.31.1/src/videopython/base/effects.py +765 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/base/exceptions.py +18 -12
- videopython-0.31.1/src/videopython/base/operation.py +269 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/base/streaming.py +33 -47
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/base/text/overlay.py +68 -103
- videopython-0.31.1/src/videopython/base/transforms.py +613 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/base/video.py +64 -674
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/editing/__init__.py +0 -3
- videopython-0.31.1/src/videopython/editing/video_edit.py +539 -0
- videopython-0.30.0/src/videopython/ai/registry.py +0 -33
- videopython-0.30.0/src/videopython/ai/transforms.py +0 -533
- videopython-0.30.0/src/videopython/base/effects.py +0 -1049
- videopython-0.30.0/src/videopython/base/registry.py +0 -808
- videopython-0.30.0/src/videopython/base/transforms.py +0 -919
- videopython-0.30.0/src/videopython/base/transitions.py +0 -200
- videopython-0.30.0/src/videopython/editing/multicam.py +0 -398
- videopython-0.30.0/src/videopython/editing/video_edit.py +0 -1384
- {videopython-0.30.0 → videopython-0.31.1}/.gitignore +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/LICENSE +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/__init__.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/_device.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/dubbing/__init__.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/dubbing/models.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/dubbing/pipeline.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/dubbing/quality.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/dubbing/timing.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/generation/__init__.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/generation/audio.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/generation/image.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/generation/qwen3.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/generation/translation.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/generation/video.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/understanding/__init__.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/understanding/audio.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/understanding/image.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/understanding/separation.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/understanding/temporal.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/base/audio/__init__.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/base/audio/analysis.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/base/scene.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/base/text/__init__.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/base/text/transcription.py +0 -0
- {videopython-0.30.0 → videopython-0.31.1}/src/videopython/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: videopython
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.31.1
|
|
4
4
|
Summary: Minimal video generation and processing library.
|
|
5
5
|
Project-URL: Homepage, https://videopython.com
|
|
6
6
|
Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
|
|
@@ -85,22 +85,31 @@ Python `>=3.10, <3.14`. AI features run locally - no cloud API keys required, bu
|
|
|
85
85
|
|
|
86
86
|
## Quick Start
|
|
87
87
|
|
|
88
|
-
###
|
|
88
|
+
### Imperative editing
|
|
89
|
+
|
|
90
|
+
Every editing primitive is an `Operation` subclass — a Pydantic model
|
|
91
|
+
whose fields ARE the JSON wire format. Apply one to a `Video`:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from videopython.base import Video, CutSeconds, Resize, Fade
|
|
95
|
+
|
|
96
|
+
video = Video.from_path("raw.mp4")
|
|
97
|
+
video = CutSeconds(start=10, end=25).apply(video)
|
|
98
|
+
video = Resize(width=1080, height=1920).apply(video)
|
|
99
|
+
video = Fade(mode="in", duration=0.5).apply(video)
|
|
100
|
+
video.save("output.mp4")
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Concatenate clips with `+` (must share fps + dimensions):
|
|
89
104
|
|
|
90
105
|
```python
|
|
91
|
-
|
|
92
|
-
from videopython.base import FadeTransition
|
|
93
|
-
|
|
94
|
-
intro = Video.from_path("intro.mp4").resize(1080, 1920)
|
|
95
|
-
clip = Video.from_path("raw.mp4").cut(10, 25).resize(1080, 1920).resample_fps(30)
|
|
96
|
-
final = intro.transition_to(clip, FadeTransition(effect_time_seconds=0.5))
|
|
97
|
-
final = final.add_audio_from_file("music.mp3")
|
|
98
|
-
final.save("output.mp4")
|
|
106
|
+
combined = video_a + video_b
|
|
99
107
|
```
|
|
100
108
|
|
|
101
109
|
### JSON editing plans
|
|
102
110
|
|
|
103
|
-
Define multi-segment edits as JSON
|
|
111
|
+
Define multi-segment edits as JSON — the format LLM-driven workflows
|
|
112
|
+
generate against. `VideoEdit.json_schema()` returns the schema:
|
|
104
113
|
|
|
105
114
|
```python
|
|
106
115
|
from videopython.editing import VideoEdit
|
|
@@ -110,68 +119,89 @@ plan = {
|
|
|
110
119
|
"source": "raw.mp4",
|
|
111
120
|
"start": 10.0,
|
|
112
121
|
"end": 20.0,
|
|
113
|
-
"
|
|
114
|
-
{"op": "resize", "
|
|
115
|
-
{"op": "
|
|
122
|
+
"operations": [
|
|
123
|
+
{"op": "resize", "width": 1080, "height": 1920},
|
|
124
|
+
{"op": "color_adjust", "saturation": 1.15, "contrast": 1.05},
|
|
125
|
+
{"op": "fade", "mode": "in", "duration": 0.5,
|
|
126
|
+
"window": {"stop": 0.5}},
|
|
116
127
|
],
|
|
117
128
|
}],
|
|
118
|
-
"post_effects": [
|
|
119
|
-
{"op": "fade", "args": {"mode": "in", "duration": 0.5}, "apply": {"start": 0.0, "stop": 0.5}},
|
|
120
|
-
],
|
|
121
129
|
}
|
|
122
130
|
|
|
123
131
|
edit = VideoEdit.from_dict(plan)
|
|
124
|
-
edit.validate()
|
|
125
|
-
|
|
126
|
-
final.save("output.mp4")
|
|
132
|
+
edit.validate() # dry-run via metadata, no frames loaded
|
|
133
|
+
edit.run_to_file("output.mp4") # stream to disk, ~constant memory
|
|
127
134
|
```
|
|
128
135
|
|
|
136
|
+
`run_to_file()` pipes ffmpeg decode → per-frame effects → ffmpeg encode,
|
|
137
|
+
so memory stays bounded even for hour-long sources. Use `edit.run()`
|
|
138
|
+
instead if you want the result back in memory as a `Video`.
|
|
139
|
+
|
|
129
140
|
### AI generation
|
|
130
141
|
|
|
131
142
|
```python
|
|
132
143
|
from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
|
|
144
|
+
from videopython.base import Resize
|
|
133
145
|
|
|
134
146
|
image = TextToImage().generate_image("A cinematic mountain sunrise")
|
|
135
|
-
video = ImageToVideo().generate_video(image=image)
|
|
147
|
+
video = ImageToVideo().generate_video(image=image)
|
|
136
148
|
audio = TextToSpeech().generate_audio("Welcome to videopython.")
|
|
149
|
+
|
|
150
|
+
video = Resize(width=1080, height=1920).apply(video)
|
|
137
151
|
video.add_audio(audio).save("ai_video.mp4")
|
|
138
152
|
```
|
|
139
153
|
|
|
140
154
|
## LLM & AI Agent Integration
|
|
141
155
|
|
|
142
|
-
|
|
156
|
+
The library is built for LLM-driven editing. Two surfaces matter:
|
|
143
157
|
|
|
144
|
-
**
|
|
158
|
+
**1. Plan schema for tool / structured-output calls.**
|
|
159
|
+
`VideoEdit.json_schema()` returns a JSON Schema covering segments,
|
|
160
|
+
`post_operations`, and a discriminated union over every registered
|
|
161
|
+
`Operation`. Drop it into any LLM API:
|
|
145
162
|
|
|
146
163
|
```python
|
|
147
164
|
from videopython.editing import VideoEdit
|
|
148
165
|
|
|
149
166
|
schema = VideoEdit.json_schema()
|
|
150
|
-
#
|
|
151
|
-
#
|
|
167
|
+
# Anthropic: tools=[{"name": "edit", "input_schema": schema}]
|
|
168
|
+
# OpenAI: tools=[{"type": "function",
|
|
169
|
+
# "function": {"name": "edit", "parameters": schema}}]
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Validate the LLM's output without touching the filesystem, then run it:
|
|
152
173
|
|
|
174
|
+
```python
|
|
153
175
|
edit = VideoEdit.from_dict(plan)
|
|
154
|
-
edit.validate()
|
|
155
|
-
|
|
156
|
-
final.save("output.mp4")
|
|
176
|
+
edit.validate() # catches bad ops, time ranges, fps mismatches
|
|
177
|
+
edit.run_to_file("output.mp4")
|
|
157
178
|
```
|
|
158
179
|
|
|
159
|
-
**Operation discovery
|
|
180
|
+
**2. Operation discovery for agent loops.**
|
|
181
|
+
Every registered op exposes its own Pydantic schema, so an agent can
|
|
182
|
+
introspect what's available without hardcoded lists:
|
|
160
183
|
|
|
161
184
|
```python
|
|
162
|
-
from videopython.base import
|
|
185
|
+
from videopython.base import Operation, OpCategory
|
|
163
186
|
|
|
164
|
-
|
|
165
|
-
|
|
187
|
+
for op_id, cls in Operation.registry().items():
|
|
188
|
+
print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
|
|
166
189
|
|
|
167
|
-
|
|
168
|
-
print(spec.description) # LLM-friendly docstring
|
|
169
|
-
print(spec.to_json_schema()) # {"brightness": {"type": "number", "minimum": -1, "maximum": 1}, ...}
|
|
190
|
+
schema = Operation.get("color_adjust").model_json_schema() # per-op schema
|
|
170
191
|
```
|
|
171
192
|
|
|
172
|
-
|
|
193
|
+
Field constraints (`minimum`, `maximum`, `enum`, `exclusiveMinimum`,
|
|
194
|
+
nullability) flow through to the schema, so LLMs that support
|
|
195
|
+
constrained generation produce valid parameters on the first try.
|
|
196
|
+
|
|
197
|
+
For ops that need side-channel data (e.g. `silence_removal` and
|
|
198
|
+
`add_subtitles` need a `Transcription`), pass it via `context`:
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
edit.run(context={"transcription": my_transcription})
|
|
202
|
+
```
|
|
173
203
|
|
|
174
|
-
Docs: [Editing Plans](https://videopython.com/api/editing/) | [
|
|
204
|
+
Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [LLM Integration Guide](https://videopython.com/guides/llm-integration/)
|
|
175
205
|
|
|
176
206
|
## Features
|
|
177
207
|
|
|
@@ -180,16 +210,15 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operation Registr
|
|
|
180
210
|
| Area | Highlights |
|
|
181
211
|
|---|---|
|
|
182
212
|
| **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
|
|
183
|
-
| **
|
|
184
|
-
| **
|
|
185
|
-
| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change,
|
|
186
|
-
| **Transitions** | `FadeTransition`, `BlurTransition`, `InstantTransition` |
|
|
213
|
+
| **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
|
|
214
|
+
| **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
|
|
215
|
+
| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
|
|
187
216
|
| **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
|
|
188
217
|
| **Audio** | Load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
|
|
189
218
|
| **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
|
|
190
219
|
| **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
|
|
191
220
|
|
|
192
|
-
API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [
|
|
221
|
+
API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
|
|
193
222
|
|
|
194
223
|
### `videopython.ai` - local AI features (install with `[ai]`)
|
|
195
224
|
|
|
@@ -199,7 +228,7 @@ API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopyth
|
|
|
199
228
|
| **Understanding** | `AudioToText` (transcription), `AudioClassifier`, `SceneVLM` (structured visual scene description), `FaceTracker` (per-shot face tracks) |
|
|
200
229
|
| **Scene detection** | `SemanticSceneDetector` (neural scene boundaries) |
|
|
201
230
|
| **Video analysis** | `VideoAnalyzer` - full-pipeline analysis combining multiple AI capabilities |
|
|
202
|
-
| **Transforms** | `FaceTrackingCrop
|
|
231
|
+
| **Transforms** | `FaceTrackingCrop` |
|
|
203
232
|
| **Dubbing** | `VideoDubber` - voice cloning and revoicing with timing sync |
|
|
204
233
|
|
|
205
234
|
API docs: [Generation](https://videopython.com/api/ai/generation/) | [Understanding](https://videopython.com/api/ai/understanding/) | [Transforms](https://videopython.com/api/ai/transforms/) | [Dubbing](https://videopython.com/api/ai/dubbing/)
|
|
@@ -36,22 +36,31 @@ Python `>=3.10, <3.14`. AI features run locally - no cloud API keys required, bu
|
|
|
36
36
|
|
|
37
37
|
## Quick Start
|
|
38
38
|
|
|
39
|
-
###
|
|
39
|
+
### Imperative editing
|
|
40
|
+
|
|
41
|
+
Every editing primitive is an `Operation` subclass — a Pydantic model
|
|
42
|
+
whose fields ARE the JSON wire format. Apply one to a `Video`:
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from videopython.base import Video, CutSeconds, Resize, Fade
|
|
46
|
+
|
|
47
|
+
video = Video.from_path("raw.mp4")
|
|
48
|
+
video = CutSeconds(start=10, end=25).apply(video)
|
|
49
|
+
video = Resize(width=1080, height=1920).apply(video)
|
|
50
|
+
video = Fade(mode="in", duration=0.5).apply(video)
|
|
51
|
+
video.save("output.mp4")
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Concatenate clips with `+` (must share fps + dimensions):
|
|
40
55
|
|
|
41
56
|
```python
|
|
42
|
-
|
|
43
|
-
from videopython.base import FadeTransition
|
|
44
|
-
|
|
45
|
-
intro = Video.from_path("intro.mp4").resize(1080, 1920)
|
|
46
|
-
clip = Video.from_path("raw.mp4").cut(10, 25).resize(1080, 1920).resample_fps(30)
|
|
47
|
-
final = intro.transition_to(clip, FadeTransition(effect_time_seconds=0.5))
|
|
48
|
-
final = final.add_audio_from_file("music.mp3")
|
|
49
|
-
final.save("output.mp4")
|
|
57
|
+
combined = video_a + video_b
|
|
50
58
|
```
|
|
51
59
|
|
|
52
60
|
### JSON editing plans
|
|
53
61
|
|
|
54
|
-
Define multi-segment edits as JSON
|
|
62
|
+
Define multi-segment edits as JSON — the format LLM-driven workflows
|
|
63
|
+
generate against. `VideoEdit.json_schema()` returns the schema:
|
|
55
64
|
|
|
56
65
|
```python
|
|
57
66
|
from videopython.editing import VideoEdit
|
|
@@ -61,68 +70,89 @@ plan = {
|
|
|
61
70
|
"source": "raw.mp4",
|
|
62
71
|
"start": 10.0,
|
|
63
72
|
"end": 20.0,
|
|
64
|
-
"
|
|
65
|
-
{"op": "resize", "
|
|
66
|
-
{"op": "
|
|
73
|
+
"operations": [
|
|
74
|
+
{"op": "resize", "width": 1080, "height": 1920},
|
|
75
|
+
{"op": "color_adjust", "saturation": 1.15, "contrast": 1.05},
|
|
76
|
+
{"op": "fade", "mode": "in", "duration": 0.5,
|
|
77
|
+
"window": {"stop": 0.5}},
|
|
67
78
|
],
|
|
68
79
|
}],
|
|
69
|
-
"post_effects": [
|
|
70
|
-
{"op": "fade", "args": {"mode": "in", "duration": 0.5}, "apply": {"start": 0.0, "stop": 0.5}},
|
|
71
|
-
],
|
|
72
80
|
}
|
|
73
81
|
|
|
74
82
|
edit = VideoEdit.from_dict(plan)
|
|
75
|
-
edit.validate()
|
|
76
|
-
|
|
77
|
-
final.save("output.mp4")
|
|
83
|
+
edit.validate() # dry-run via metadata, no frames loaded
|
|
84
|
+
edit.run_to_file("output.mp4") # stream to disk, ~constant memory
|
|
78
85
|
```
|
|
79
86
|
|
|
87
|
+
`run_to_file()` pipes ffmpeg decode → per-frame effects → ffmpeg encode,
|
|
88
|
+
so memory stays bounded even for hour-long sources. Use `edit.run()`
|
|
89
|
+
instead if you want the result back in memory as a `Video`.
|
|
90
|
+
|
|
80
91
|
### AI generation
|
|
81
92
|
|
|
82
93
|
```python
|
|
83
94
|
from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
|
|
95
|
+
from videopython.base import Resize
|
|
84
96
|
|
|
85
97
|
image = TextToImage().generate_image("A cinematic mountain sunrise")
|
|
86
|
-
video = ImageToVideo().generate_video(image=image)
|
|
98
|
+
video = ImageToVideo().generate_video(image=image)
|
|
87
99
|
audio = TextToSpeech().generate_audio("Welcome to videopython.")
|
|
100
|
+
|
|
101
|
+
video = Resize(width=1080, height=1920).apply(video)
|
|
88
102
|
video.add_audio(audio).save("ai_video.mp4")
|
|
89
103
|
```
|
|
90
104
|
|
|
91
105
|
## LLM & AI Agent Integration
|
|
92
106
|
|
|
93
|
-
|
|
107
|
+
The library is built for LLM-driven editing. Two surfaces matter:
|
|
94
108
|
|
|
95
|
-
**
|
|
109
|
+
**1. Plan schema for tool / structured-output calls.**
|
|
110
|
+
`VideoEdit.json_schema()` returns a JSON Schema covering segments,
|
|
111
|
+
`post_operations`, and a discriminated union over every registered
|
|
112
|
+
`Operation`. Drop it into any LLM API:
|
|
96
113
|
|
|
97
114
|
```python
|
|
98
115
|
from videopython.editing import VideoEdit
|
|
99
116
|
|
|
100
117
|
schema = VideoEdit.json_schema()
|
|
101
|
-
#
|
|
102
|
-
#
|
|
118
|
+
# Anthropic: tools=[{"name": "edit", "input_schema": schema}]
|
|
119
|
+
# OpenAI: tools=[{"type": "function",
|
|
120
|
+
# "function": {"name": "edit", "parameters": schema}}]
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Validate the LLM's output without touching the filesystem, then run it:
|
|
103
124
|
|
|
125
|
+
```python
|
|
104
126
|
edit = VideoEdit.from_dict(plan)
|
|
105
|
-
edit.validate()
|
|
106
|
-
|
|
107
|
-
final.save("output.mp4")
|
|
127
|
+
edit.validate() # catches bad ops, time ranges, fps mismatches
|
|
128
|
+
edit.run_to_file("output.mp4")
|
|
108
129
|
```
|
|
109
130
|
|
|
110
|
-
**Operation discovery
|
|
131
|
+
**2. Operation discovery for agent loops.**
|
|
132
|
+
Every registered op exposes its own Pydantic schema, so an agent can
|
|
133
|
+
introspect what's available without hardcoded lists:
|
|
111
134
|
|
|
112
135
|
```python
|
|
113
|
-
from videopython.base import
|
|
136
|
+
from videopython.base import Operation, OpCategory
|
|
114
137
|
|
|
115
|
-
|
|
116
|
-
|
|
138
|
+
for op_id, cls in Operation.registry().items():
|
|
139
|
+
print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
|
|
117
140
|
|
|
118
|
-
|
|
119
|
-
print(spec.description) # LLM-friendly docstring
|
|
120
|
-
print(spec.to_json_schema()) # {"brightness": {"type": "number", "minimum": -1, "maximum": 1}, ...}
|
|
141
|
+
schema = Operation.get("color_adjust").model_json_schema() # per-op schema
|
|
121
142
|
```
|
|
122
143
|
|
|
123
|
-
|
|
144
|
+
Field constraints (`minimum`, `maximum`, `enum`, `exclusiveMinimum`,
|
|
145
|
+
nullability) flow through to the schema, so LLMs that support
|
|
146
|
+
constrained generation produce valid parameters on the first try.
|
|
147
|
+
|
|
148
|
+
For ops that need side-channel data (e.g. `silence_removal` and
|
|
149
|
+
`add_subtitles` need a `Transcription`), pass it via `context`:
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
edit.run(context={"transcription": my_transcription})
|
|
153
|
+
```
|
|
124
154
|
|
|
125
|
-
Docs: [Editing Plans](https://videopython.com/api/editing/) | [
|
|
155
|
+
Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [LLM Integration Guide](https://videopython.com/guides/llm-integration/)
|
|
126
156
|
|
|
127
157
|
## Features
|
|
128
158
|
|
|
@@ -131,16 +161,15 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operation Registr
|
|
|
131
161
|
| Area | Highlights |
|
|
132
162
|
|---|---|
|
|
133
163
|
| **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
|
|
134
|
-
| **
|
|
135
|
-
| **
|
|
136
|
-
| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change,
|
|
137
|
-
| **Transitions** | `FadeTransition`, `BlurTransition`, `InstantTransition` |
|
|
164
|
+
| **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
|
|
165
|
+
| **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
|
|
166
|
+
| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
|
|
138
167
|
| **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
|
|
139
168
|
| **Audio** | Load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
|
|
140
169
|
| **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
|
|
141
170
|
| **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
|
|
142
171
|
|
|
143
|
-
API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [
|
|
172
|
+
API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
|
|
144
173
|
|
|
145
174
|
### `videopython.ai` - local AI features (install with `[ai]`)
|
|
146
175
|
|
|
@@ -150,7 +179,7 @@ API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopyth
|
|
|
150
179
|
| **Understanding** | `AudioToText` (transcription), `AudioClassifier`, `SceneVLM` (structured visual scene description), `FaceTracker` (per-shot face tracks) |
|
|
151
180
|
| **Scene detection** | `SemanticSceneDetector` (neural scene boundaries) |
|
|
152
181
|
| **Video analysis** | `VideoAnalyzer` - full-pipeline analysis combining multiple AI capabilities |
|
|
153
|
-
| **Transforms** | `FaceTrackingCrop
|
|
182
|
+
| **Transforms** | `FaceTrackingCrop` |
|
|
154
183
|
| **Dubbing** | `VideoDubber` - voice cloning and revoicing with timing sync |
|
|
155
184
|
|
|
156
185
|
API docs: [Generation](https://videopython.com/api/ai/generation/) | [Understanding](https://videopython.com/api/ai/understanding/) | [Transforms](https://videopython.com/api/ai/transforms/) | [Dubbing](https://videopython.com/api/ai/dubbing/)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "videopython"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.31.1"
|
|
4
4
|
description = "Minimal video generation and processing library."
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
|
|
@@ -136,6 +136,7 @@ Documentation = "https://videopython.com"
|
|
|
136
136
|
|
|
137
137
|
[tool.mypy]
|
|
138
138
|
mypy_path = "src/stubs"
|
|
139
|
+
plugins = ["pydantic.mypy"]
|
|
139
140
|
|
|
140
141
|
[[tool.mypy.overrides]]
|
|
141
142
|
module = [
|
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
from videopython.ai import registry as _ai_registry # noqa: F401
|
|
2
|
-
|
|
3
1
|
from .generation import ImageToVideo, TextToImage, TextToMusic, TextToSpeech, TextToVideo
|
|
4
|
-
from .transforms import FaceTrackingCrop
|
|
2
|
+
from .transforms import FaceTrackingCrop
|
|
5
3
|
from .understanding import (
|
|
6
4
|
AudioClassifier,
|
|
7
5
|
AudioToText,
|
|
@@ -26,7 +24,6 @@ __all__ = [
|
|
|
26
24
|
"SemanticSceneDetector",
|
|
27
25
|
# Transforms (AI-powered)
|
|
28
26
|
"FaceTrackingCrop",
|
|
29
|
-
"SplitScreenComposite",
|
|
30
27
|
# Video analysis
|
|
31
28
|
"VideoAnalysis",
|
|
32
29
|
"VideoAnalysisConfig",
|
|
@@ -292,7 +292,9 @@ class VideoDubber:
|
|
|
292
292
|
video_duration = video.total_seconds
|
|
293
293
|
|
|
294
294
|
if video_duration > speech_duration:
|
|
295
|
-
|
|
295
|
+
from videopython.base.transforms import CutSeconds
|
|
296
|
+
|
|
297
|
+
output_video = CutSeconds(start=0, end=speech_duration).apply(video)
|
|
296
298
|
else:
|
|
297
299
|
output_video = video
|
|
298
300
|
|
|
@@ -4,13 +4,15 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import io
|
|
6
6
|
import logging
|
|
7
|
-
import subprocess
|
|
8
7
|
import wave
|
|
9
8
|
from pathlib import Path
|
|
10
9
|
from typing import TYPE_CHECKING
|
|
11
10
|
|
|
12
11
|
import numpy as np
|
|
13
12
|
|
|
13
|
+
from videopython.base import _ffmpeg
|
|
14
|
+
from videopython.base.exceptions import FFmpegRunError
|
|
15
|
+
|
|
14
16
|
if TYPE_CHECKING:
|
|
15
17
|
from videopython.base.audio import Audio
|
|
16
18
|
|
|
@@ -95,9 +97,10 @@ def replace_audio_stream(
|
|
|
95
97
|
]
|
|
96
98
|
|
|
97
99
|
logger.info("replace_audio_stream: %s + %s -> %s", video_path, audio_path, output_path)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
100
|
+
try:
|
|
101
|
+
_ffmpeg.run(cmd)
|
|
102
|
+
except FFmpegRunError as e:
|
|
103
|
+
raise RemuxError(str(e)) from e
|
|
101
104
|
|
|
102
105
|
|
|
103
106
|
def replace_audio_stream_from_audio(
|
|
@@ -175,7 +178,7 @@ def replace_audio_stream_from_audio(
|
|
|
175
178
|
len(wav_bytes),
|
|
176
179
|
output_path,
|
|
177
180
|
)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
raise RemuxError(
|
|
181
|
+
try:
|
|
182
|
+
_ffmpeg.run(cmd, stdin=wav_bytes)
|
|
183
|
+
except FFmpegRunError as e:
|
|
184
|
+
raise RemuxError(str(e)) from e
|