videopython 0.29.1__tar.gz → 0.31.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {videopython-0.29.1 → videopython-0.31.0}/PKG-INFO +73 -45
  2. {videopython-0.29.1 → videopython-0.31.0}/README.md +72 -44
  3. {videopython-0.29.1 → videopython-0.31.0}/pyproject.toml +2 -1
  4. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/__init__.py +1 -7
  5. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/__init__.py +0 -3
  6. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/dubber.py +4 -13
  7. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/models.py +1 -2
  8. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/pipeline.py +16 -148
  9. videopython-0.31.0/src/videopython/ai/transforms.py +193 -0
  10. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/understanding/faces.py +4 -5
  11. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/understanding/temporal.py +1 -17
  12. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/video_analysis.py +15 -13
  13. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/__init__.py +7 -43
  14. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/description.py +18 -18
  15. videopython-0.31.0/src/videopython/base/effects.py +765 -0
  16. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/exceptions.py +0 -12
  17. videopython-0.31.0/src/videopython/base/operation.py +269 -0
  18. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/streaming.py +7 -4
  19. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/text/overlay.py +73 -105
  20. videopython-0.31.0/src/videopython/base/transforms.py +612 -0
  21. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/video.py +3 -514
  22. videopython-0.31.0/src/videopython/editing/__init__.py +6 -0
  23. videopython-0.31.0/src/videopython/editing/video_edit.py +539 -0
  24. videopython-0.29.1/src/videopython/ai/dubbing/cache.py +0 -325
  25. videopython-0.29.1/src/videopython/ai/registry.py +0 -33
  26. videopython-0.29.1/src/videopython/ai/swapping/__init__.py +0 -46
  27. videopython-0.29.1/src/videopython/ai/swapping/inpainter.py +0 -264
  28. videopython-0.29.1/src/videopython/ai/swapping/models.py +0 -221
  29. videopython-0.29.1/src/videopython/ai/swapping/segmenter.py +0 -577
  30. videopython-0.29.1/src/videopython/ai/swapping/swapper.py +0 -524
  31. videopython-0.29.1/src/videopython/ai/transforms.py +0 -531
  32. videopython-0.29.1/src/videopython/base/combine.py +0 -61
  33. videopython-0.29.1/src/videopython/base/effects.py +0 -1046
  34. videopython-0.29.1/src/videopython/base/progress.py +0 -63
  35. videopython-0.29.1/src/videopython/base/registry.py +0 -817
  36. videopython-0.29.1/src/videopython/base/transforms.py +0 -916
  37. videopython-0.29.1/src/videopython/base/transitions.py +0 -200
  38. videopython-0.29.1/src/videopython/base/utils.py +0 -6
  39. videopython-0.29.1/src/videopython/editing/__init__.py +0 -11
  40. videopython-0.29.1/src/videopython/editing/multicam.py +0 -398
  41. videopython-0.29.1/src/videopython/editing/premiere_xml.py +0 -313
  42. videopython-0.29.1/src/videopython/editing/video_edit.py +0 -1384
  43. {videopython-0.29.1 → videopython-0.31.0}/.gitignore +0 -0
  44. {videopython-0.29.1 → videopython-0.31.0}/LICENSE +0 -0
  45. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/__init__.py +0 -0
  46. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/_device.py +0 -0
  47. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/quality.py +0 -0
  48. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/remux.py +0 -0
  49. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/dubbing/timing.py +0 -0
  50. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/generation/__init__.py +0 -0
  51. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/generation/audio.py +0 -0
  52. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/generation/image.py +0 -0
  53. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/generation/qwen3.py +0 -0
  54. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/generation/translation.py +0 -0
  55. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/generation/video.py +0 -0
  56. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/understanding/__init__.py +0 -0
  57. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/understanding/audio.py +0 -0
  58. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/understanding/image.py +0 -0
  59. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/ai/understanding/separation.py +0 -0
  60. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/audio/__init__.py +0 -0
  61. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/audio/analysis.py +0 -0
  62. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/audio/audio.py +0 -0
  63. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/scene.py +0 -0
  64. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/text/__init__.py +0 -0
  65. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/base/text/transcription.py +0 -0
  66. {videopython-0.29.1 → videopython-0.31.0}/src/videopython/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videopython
3
- Version: 0.29.1
3
+ Version: 0.31.0
4
4
  Summary: Minimal video generation and processing library.
5
5
  Project-URL: Homepage, https://videopython.com
6
6
  Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -85,22 +85,31 @@ Python `>=3.10, <3.14`. AI features run locally - no cloud API keys required, bu
85
85
 
86
86
  ## Quick Start
87
87
 
88
- ### Video editing
88
+ ### Imperative editing
89
+
90
+ Every editing primitive is an `Operation` subclass — a Pydantic model
91
+ whose fields ARE the JSON wire format. Apply one to a `Video`:
92
+
93
+ ```python
94
+ from videopython.base import Video, CutSeconds, Resize, Fade
95
+
96
+ video = Video.from_path("raw.mp4")
97
+ video = CutSeconds(start=10, end=25).apply(video)
98
+ video = Resize(width=1080, height=1920).apply(video)
99
+ video = Fade(mode="in", duration=0.5).apply(video)
100
+ video.save("output.mp4")
101
+ ```
102
+
103
+ Concatenate clips with `+` (must share fps + dimensions):
89
104
 
90
105
  ```python
91
- from videopython import Video
92
- from videopython.base import FadeTransition
93
-
94
- intro = Video.from_path("intro.mp4").resize(1080, 1920)
95
- clip = Video.from_path("raw.mp4").cut(10, 25).resize(1080, 1920).resample_fps(30)
96
- final = intro.transition_to(clip, FadeTransition(effect_time_seconds=0.5))
97
- final = final.add_audio_from_file("music.mp3")
98
- final.save("output.mp4")
106
+ combined = video_a + video_b
99
107
  ```
100
108
 
101
109
  ### JSON editing plans
102
110
 
103
- Define multi-segment edits as JSON - useful for LLM-driven workflows. `VideoEdit.json_schema()` returns a schema for plan generation/validation.
111
+ Define multi-segment edits as JSON the format LLM-driven workflows
112
+ generate against. `VideoEdit.json_schema()` returns the schema:
104
113
 
105
114
  ```python
106
115
  from videopython.editing import VideoEdit
@@ -110,68 +119,89 @@ plan = {
110
119
  "source": "raw.mp4",
111
120
  "start": 10.0,
112
121
  "end": 20.0,
113
- "transforms": [
114
- {"op": "resize", "args": {"height": 1280}},
115
- {"op": "speed_change", "args": {"speed": 1.25}},
122
+ "operations": [
123
+ {"op": "resize", "width": 1080, "height": 1920},
124
+ {"op": "color_adjust", "saturation": 1.15, "contrast": 1.05},
125
+ {"op": "fade", "mode": "in", "duration": 0.5,
126
+ "window": {"stop": 0.5}},
116
127
  ],
117
128
  }],
118
- "post_effects": [
119
- {"op": "fade", "args": {"mode": "in", "duration": 0.5}, "apply": {"start": 0.0, "stop": 0.5}},
120
- ],
121
129
  }
122
130
 
123
131
  edit = VideoEdit.from_dict(plan)
124
- edit.validate() # dry-run via metadata (no frame loading)
125
- final = edit.run()
126
- final.save("output.mp4")
132
+ edit.validate() # dry-run via metadata, no frames loaded
133
+ edit.run_to_file("output.mp4") # stream to disk, ~constant memory
127
134
  ```
128
135
 
136
+ `run_to_file()` pipes ffmpeg decode → per-frame effects → ffmpeg encode,
137
+ so memory stays bounded even for hour-long sources. Use `edit.run()`
138
+ instead if you want the result back in memory as a `Video`.
139
+
129
140
  ### AI generation
130
141
 
131
142
  ```python
132
143
  from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
144
+ from videopython.base import Resize
133
145
 
134
146
  image = TextToImage().generate_image("A cinematic mountain sunrise")
135
- video = ImageToVideo().generate_video(image=image).resize(1080, 1920)
147
+ video = ImageToVideo().generate_video(image=image)
136
148
  audio = TextToSpeech().generate_audio("Welcome to videopython.")
149
+
150
+ video = Resize(width=1080, height=1920).apply(video)
137
151
  video.add_audio(audio).save("ai_video.mp4")
138
152
  ```
139
153
 
140
154
  ## LLM & AI Agent Integration
141
155
 
142
- videopython is designed to be controlled by LLMs. Every video operation exposes a machine-readable spec with descriptions, parameter types, and value constraints - all available as JSON Schema at runtime.
156
+ The library is built for LLM-driven editing. Two surfaces matter:
143
157
 
144
- **Schema generation** - `VideoEdit.json_schema()` returns a complete JSON Schema describing valid edit plans. Pass it directly as a tool schema or structured-output format to any LLM API:
158
+ **1. Plan schema for tool / structured-output calls.**
159
+ `VideoEdit.json_schema()` returns a JSON Schema covering segments,
160
+ `post_operations`, and a discriminated union over every registered
161
+ `Operation`. Drop it into any LLM API:
145
162
 
146
163
  ```python
147
164
  from videopython.editing import VideoEdit
148
165
 
149
166
  schema = VideoEdit.json_schema()
150
- # Pass `schema` to your LLM as a function/tool definition or response format.
151
- # The LLM generates a plan dict, then:
167
+ # Anthropic: tools=[{"name": "edit", "input_schema": schema}]
168
+ # OpenAI: tools=[{"type": "function",
169
+ # "function": {"name": "edit", "parameters": schema}}]
170
+ ```
171
+
172
+ Validate the LLM's output without touching the filesystem, then run it:
152
173
 
174
+ ```python
153
175
  edit = VideoEdit.from_dict(plan)
154
- edit.validate() # dry-run: checks sources, time ranges, params - no frames loaded
155
- final = edit.run()
156
- final.save("output.mp4")
176
+ edit.validate() # catches bad ops, time ranges, fps mismatches
177
+ edit.run_to_file("output.mp4")
157
178
  ```
158
179
 
159
- **Operation discovery** - the registry lets an LLM (or your code) inspect all available operations, their parameters, and constraints:
180
+ **2. Operation discovery for agent loops.**
181
+ Every registered op exposes its own Pydantic schema, so an agent can
182
+ introspect what's available without hardcoded lists:
160
183
 
161
184
  ```python
162
- from videopython.base import get_operation_specs, get_specs_by_category, OperationCategory
185
+ from videopython.base import Operation, OpCategory
163
186
 
164
- all_ops = get_operation_specs() # all registered operations
165
- transforms = get_specs_by_category(OperationCategory.TRANSFORMATION) # just transforms
187
+ for op_id, cls in Operation.registry().items():
188
+ print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
166
189
 
167
- spec = all_ops["color_adjust"]
168
- print(spec.description) # LLM-friendly docstring
169
- print(spec.to_json_schema()) # {"brightness": {"type": "number", "minimum": -1, "maximum": 1}, ...}
190
+ schema = Operation.get("color_adjust").model_json_schema() # per-op schema
170
191
  ```
171
192
 
172
- Every operation has LLM-optimized descriptions and rich constraints (`minimum`, `maximum`, `enum`, `exclusive_minimum`, etc.) so models generate valid parameters on the first try.
193
+ Field constraints (`minimum`, `maximum`, `enum`, `exclusiveMinimum`,
194
+ nullability) flow through to the schema, so LLMs that support
195
+ constrained generation produce valid parameters on the first try.
196
+
197
+ For ops that need side-channel data (e.g. `silence_removal` and
198
+ `add_subtitles` need a `Transcription`), pass it via `context`:
199
+
200
+ ```python
201
+ edit.run(context={"transcription": my_transcription})
202
+ ```
173
203
 
174
- Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operation Registry](https://videopython.com/api/registry/)
204
+ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [LLM Integration Guide](https://videopython.com/guides/llm-integration/)
175
205
 
176
206
  ## Features
177
207
 
@@ -180,16 +210,15 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operation Registr
180
210
  | Area | Highlights |
181
211
  |---|---|
182
212
  | **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
183
- | **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with full JSON Schema generation, dry-run validation, and operation registry |
184
- | **Multicam editing** | `MultiCamEdit`, `CutPoint` - switch between synchronized camera angles with transitions, replace audio with external track |
185
- | **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, picture-in-picture, reverse, freeze frame, silence removal |
186
- | **Transitions** | `FadeTransition`, `BlurTransition`, `InstantTransition` |
213
+ | **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
214
+ | **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
215
+ | **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
187
216
  | **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
188
217
  | **Audio** | Load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
189
218
  | **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
190
219
  | **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
191
220
 
192
- API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Transforms](https://videopython.com/api/transforms/) | [Transitions](https://videopython.com/api/transitions/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
221
+ API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
193
222
 
194
223
  ### `videopython.ai` - local AI features (install with `[ai]`)
195
224
 
@@ -199,11 +228,10 @@ API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopyth
199
228
  | **Understanding** | `AudioToText` (transcription), `AudioClassifier`, `SceneVLM` (structured visual scene description), `FaceTracker` (per-shot face tracks) |
200
229
  | **Scene detection** | `SemanticSceneDetector` (neural scene boundaries) |
201
230
  | **Video analysis** | `VideoAnalyzer` - full-pipeline analysis combining multiple AI capabilities |
202
- | **Transforms** | `FaceTrackingCrop`, `SplitScreenComposite` |
231
+ | **Transforms** | `FaceTrackingCrop` |
203
232
  | **Dubbing** | `VideoDubber` - voice cloning and revoicing with timing sync |
204
- | **Object swapping** | `ObjectSwapper` - detect, segment, and inpaint objects in video |
205
233
 
206
- API docs: [Generation](https://videopython.com/api/ai/generation/) | [Understanding](https://videopython.com/api/ai/understanding/) | [Transforms](https://videopython.com/api/ai/transforms/) | [Dubbing](https://videopython.com/api/ai/dubbing/) | [Object Swapping](https://videopython.com/api/ai/swapping/)
234
+ API docs: [Generation](https://videopython.com/api/ai/generation/) | [Understanding](https://videopython.com/api/ai/understanding/) | [Transforms](https://videopython.com/api/ai/transforms/) | [Dubbing](https://videopython.com/api/ai/dubbing/)
207
235
 
208
236
  ## Examples
209
237
 
@@ -36,22 +36,31 @@ Python `>=3.10, <3.14`. AI features run locally - no cloud API keys required, bu
36
36
 
37
37
  ## Quick Start
38
38
 
39
- ### Video editing
39
+ ### Imperative editing
40
+
41
+ Every editing primitive is an `Operation` subclass — a Pydantic model
42
+ whose fields ARE the JSON wire format. Apply one to a `Video`:
43
+
44
+ ```python
45
+ from videopython.base import Video, CutSeconds, Resize, Fade
46
+
47
+ video = Video.from_path("raw.mp4")
48
+ video = CutSeconds(start=10, end=25).apply(video)
49
+ video = Resize(width=1080, height=1920).apply(video)
50
+ video = Fade(mode="in", duration=0.5).apply(video)
51
+ video.save("output.mp4")
52
+ ```
53
+
54
+ Concatenate clips with `+` (must share fps + dimensions):
40
55
 
41
56
  ```python
42
- from videopython import Video
43
- from videopython.base import FadeTransition
44
-
45
- intro = Video.from_path("intro.mp4").resize(1080, 1920)
46
- clip = Video.from_path("raw.mp4").cut(10, 25).resize(1080, 1920).resample_fps(30)
47
- final = intro.transition_to(clip, FadeTransition(effect_time_seconds=0.5))
48
- final = final.add_audio_from_file("music.mp3")
49
- final.save("output.mp4")
57
+ combined = video_a + video_b
50
58
  ```
51
59
 
52
60
  ### JSON editing plans
53
61
 
54
- Define multi-segment edits as JSON - useful for LLM-driven workflows. `VideoEdit.json_schema()` returns a schema for plan generation/validation.
62
+ Define multi-segment edits as JSON the format LLM-driven workflows
63
+ generate against. `VideoEdit.json_schema()` returns the schema:
55
64
 
56
65
  ```python
57
66
  from videopython.editing import VideoEdit
@@ -61,68 +70,89 @@ plan = {
61
70
  "source": "raw.mp4",
62
71
  "start": 10.0,
63
72
  "end": 20.0,
64
- "transforms": [
65
- {"op": "resize", "args": {"height": 1280}},
66
- {"op": "speed_change", "args": {"speed": 1.25}},
73
+ "operations": [
74
+ {"op": "resize", "width": 1080, "height": 1920},
75
+ {"op": "color_adjust", "saturation": 1.15, "contrast": 1.05},
76
+ {"op": "fade", "mode": "in", "duration": 0.5,
77
+ "window": {"stop": 0.5}},
67
78
  ],
68
79
  }],
69
- "post_effects": [
70
- {"op": "fade", "args": {"mode": "in", "duration": 0.5}, "apply": {"start": 0.0, "stop": 0.5}},
71
- ],
72
80
  }
73
81
 
74
82
  edit = VideoEdit.from_dict(plan)
75
- edit.validate() # dry-run via metadata (no frame loading)
76
- final = edit.run()
77
- final.save("output.mp4")
83
+ edit.validate() # dry-run via metadata, no frames loaded
84
+ edit.run_to_file("output.mp4") # stream to disk, ~constant memory
78
85
  ```
79
86
 
87
+ `run_to_file()` pipes ffmpeg decode → per-frame effects → ffmpeg encode,
88
+ so memory stays bounded even for hour-long sources. Use `edit.run()`
89
+ instead if you want the result back in memory as a `Video`.
90
+
80
91
  ### AI generation
81
92
 
82
93
  ```python
83
94
  from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
95
+ from videopython.base import Resize
84
96
 
85
97
  image = TextToImage().generate_image("A cinematic mountain sunrise")
86
- video = ImageToVideo().generate_video(image=image).resize(1080, 1920)
98
+ video = ImageToVideo().generate_video(image=image)
87
99
  audio = TextToSpeech().generate_audio("Welcome to videopython.")
100
+
101
+ video = Resize(width=1080, height=1920).apply(video)
88
102
  video.add_audio(audio).save("ai_video.mp4")
89
103
  ```
90
104
 
91
105
  ## LLM & AI Agent Integration
92
106
 
93
- videopython is designed to be controlled by LLMs. Every video operation exposes a machine-readable spec with descriptions, parameter types, and value constraints - all available as JSON Schema at runtime.
107
+ The library is built for LLM-driven editing. Two surfaces matter:
94
108
 
95
- **Schema generation** - `VideoEdit.json_schema()` returns a complete JSON Schema describing valid edit plans. Pass it directly as a tool schema or structured-output format to any LLM API:
109
+ **1. Plan schema for tool / structured-output calls.**
110
+ `VideoEdit.json_schema()` returns a JSON Schema covering segments,
111
+ `post_operations`, and a discriminated union over every registered
112
+ `Operation`. Drop it into any LLM API:
96
113
 
97
114
  ```python
98
115
  from videopython.editing import VideoEdit
99
116
 
100
117
  schema = VideoEdit.json_schema()
101
- # Pass `schema` to your LLM as a function/tool definition or response format.
102
- # The LLM generates a plan dict, then:
118
+ # Anthropic: tools=[{"name": "edit", "input_schema": schema}]
119
+ # OpenAI: tools=[{"type": "function",
120
+ # "function": {"name": "edit", "parameters": schema}}]
121
+ ```
122
+
123
+ Validate the LLM's output without touching the filesystem, then run it:
103
124
 
125
+ ```python
104
126
  edit = VideoEdit.from_dict(plan)
105
- edit.validate() # dry-run: checks sources, time ranges, params - no frames loaded
106
- final = edit.run()
107
- final.save("output.mp4")
127
+ edit.validate() # catches bad ops, time ranges, fps mismatches
128
+ edit.run_to_file("output.mp4")
108
129
  ```
109
130
 
110
- **Operation discovery** - the registry lets an LLM (or your code) inspect all available operations, their parameters, and constraints:
131
+ **2. Operation discovery for agent loops.**
132
+ Every registered op exposes its own Pydantic schema, so an agent can
133
+ introspect what's available without hardcoded lists:
111
134
 
112
135
  ```python
113
- from videopython.base import get_operation_specs, get_specs_by_category, OperationCategory
136
+ from videopython.base import Operation, OpCategory
114
137
 
115
- all_ops = get_operation_specs() # all registered operations
116
- transforms = get_specs_by_category(OperationCategory.TRANSFORMATION) # just transforms
138
+ for op_id, cls in Operation.registry().items():
139
+ print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
117
140
 
118
- spec = all_ops["color_adjust"]
119
- print(spec.description) # LLM-friendly docstring
120
- print(spec.to_json_schema()) # {"brightness": {"type": "number", "minimum": -1, "maximum": 1}, ...}
141
+ schema = Operation.get("color_adjust").model_json_schema() # per-op schema
121
142
  ```
122
143
 
123
- Every operation has LLM-optimized descriptions and rich constraints (`minimum`, `maximum`, `enum`, `exclusive_minimum`, etc.) so models generate valid parameters on the first try.
144
+ Field constraints (`minimum`, `maximum`, `enum`, `exclusiveMinimum`,
145
+ nullability) flow through to the schema, so LLMs that support
146
+ constrained generation produce valid parameters on the first try.
147
+
148
+ For ops that need side-channel data (e.g. `silence_removal` and
149
+ `add_subtitles` need a `Transcription`), pass it via `context`:
150
+
151
+ ```python
152
+ edit.run(context={"transcription": my_transcription})
153
+ ```
124
154
 
125
- Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operation Registry](https://videopython.com/api/registry/)
155
+ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [LLM Integration Guide](https://videopython.com/guides/llm-integration/)
126
156
 
127
157
  ## Features
128
158
 
@@ -131,16 +161,15 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operation Registr
131
161
  | Area | Highlights |
132
162
  |---|---|
133
163
  | **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
134
- | **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with full JSON Schema generation, dry-run validation, and operation registry |
135
- | **Multicam editing** | `MultiCamEdit`, `CutPoint` - switch between synchronized camera angles with transitions, replace audio with external track |
136
- | **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, picture-in-picture, reverse, freeze frame, silence removal |
137
- | **Transitions** | `FadeTransition`, `BlurTransition`, `InstantTransition` |
164
+ | **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
165
+ | **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
166
+ | **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
138
167
  | **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
139
168
  | **Audio** | Load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
140
169
  | **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
141
170
  | **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
142
171
 
143
- API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Transforms](https://videopython.com/api/transforms/) | [Transitions](https://videopython.com/api/transitions/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
172
+ API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
144
173
 
145
174
  ### `videopython.ai` - local AI features (install with `[ai]`)
146
175
 
@@ -150,11 +179,10 @@ API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopyth
150
179
  | **Understanding** | `AudioToText` (transcription), `AudioClassifier`, `SceneVLM` (structured visual scene description), `FaceTracker` (per-shot face tracks) |
151
180
  | **Scene detection** | `SemanticSceneDetector` (neural scene boundaries) |
152
181
  | **Video analysis** | `VideoAnalyzer` - full-pipeline analysis combining multiple AI capabilities |
153
- | **Transforms** | `FaceTrackingCrop`, `SplitScreenComposite` |
182
+ | **Transforms** | `FaceTrackingCrop` |
154
183
  | **Dubbing** | `VideoDubber` - voice cloning and revoicing with timing sync |
155
- | **Object swapping** | `ObjectSwapper` - detect, segment, and inpaint objects in video |
156
184
 
157
- API docs: [Generation](https://videopython.com/api/ai/generation/) | [Understanding](https://videopython.com/api/ai/understanding/) | [Transforms](https://videopython.com/api/ai/transforms/) | [Dubbing](https://videopython.com/api/ai/dubbing/) | [Object Swapping](https://videopython.com/api/ai/swapping/)
185
+ API docs: [Generation](https://videopython.com/api/ai/generation/) | [Understanding](https://videopython.com/api/ai/understanding/) | [Transforms](https://videopython.com/api/ai/transforms/) | [Dubbing](https://videopython.com/api/ai/dubbing/)
158
186
 
159
187
  ## Examples
160
188
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "videopython"
3
- version = "0.29.1"
3
+ version = "0.31.0"
4
4
  description = "Minimal video generation and processing library."
5
5
  authors = [
6
6
  { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -136,6 +136,7 @@ Documentation = "https://videopython.com"
136
136
 
137
137
  [tool.mypy]
138
138
  mypy_path = "src/stubs"
139
+ plugins = ["pydantic.mypy"]
139
140
 
140
141
  [[tool.mypy.overrides]]
141
142
  module = [
@@ -1,8 +1,5 @@
1
- from videopython.ai import registry as _ai_registry # noqa: F401
2
-
3
1
  from .generation import ImageToVideo, TextToImage, TextToMusic, TextToSpeech, TextToVideo
4
- from .swapping import ObjectSwapper
5
- from .transforms import FaceTrackingCrop, SplitScreenComposite
2
+ from .transforms import FaceTrackingCrop
6
3
  from .understanding import (
7
4
  AudioClassifier,
8
5
  AudioToText,
@@ -27,9 +24,6 @@ __all__ = [
27
24
  "SemanticSceneDetector",
28
25
  # Transforms (AI-powered)
29
26
  "FaceTrackingCrop",
30
- "SplitScreenComposite",
31
- # Swapping
32
- "ObjectSwapper",
33
27
  # Video analysis
34
28
  "VideoAnalysis",
35
29
  "VideoAnalysisConfig",
@@ -1,6 +1,5 @@
1
1
  """Local video dubbing functionality."""
2
2
 
3
- from videopython.ai.dubbing.cache import DubCache, dub_cache_clear
4
3
  from videopython.ai.dubbing.dubber import VideoDubber
5
4
  from videopython.ai.dubbing.models import (
6
5
  DubbingResult,
@@ -26,7 +25,5 @@ __all__ = [
26
25
  "TranscriptQuality",
27
26
  "assess_transcript",
28
27
  "UnsupportedLanguageError",
29
- "DubCache",
30
- "dub_cache_clear",
31
28
  "Expressiveness",
32
29
  ]
@@ -55,13 +55,6 @@ class VideoDubber:
55
55
  See :class:`videopython.ai.generation.qwen3.Qwen3Translator`
56
56
  for tradeoffs (Qwen3 is slower on CPU but produces
57
57
  context-aware, length-budgeted output).
58
- cache_dir: When set, persist transcription, translated segments,
59
- and per-segment TTS WAVs under this directory and skip stages
60
- whose inputs already match a cache entry. Use to resume crashed
61
- long runs or to iterate on dub configuration without paying
62
- transcription cost each time. ``None`` (default) disables
63
- caching. Cache grows unbounded; clear via
64
- :func:`videopython.ai.dubbing.cache.dub_cache_clear`.
65
58
  """
66
59
 
67
60
  def __init__(
@@ -75,7 +68,6 @@ class VideoDubber:
75
68
  vocabulary: list[str] | None = None,
76
69
  strict_quality: bool = False,
77
70
  translator: TranslatorChoice = "auto",
78
- cache_dir: str | Path | None = None,
79
71
  ):
80
72
  self.device = device
81
73
  self.low_memory = low_memory
@@ -86,16 +78,14 @@ class VideoDubber:
86
78
  self.vocabulary = vocabulary
87
79
  self.strict_quality = strict_quality
88
80
  self.translator = translator
89
- self.cache_dir = cache_dir
90
81
  self._local_pipeline: Any = None
91
82
  requested = device.lower() if isinstance(device, str) else "auto"
92
83
  logger.info(
93
- "VideoDubber initialized with device=%s low_memory=%s whisper_model=%s translator=%s cache_dir=%s",
84
+ "VideoDubber initialized with device=%s low_memory=%s whisper_model=%s translator=%s",
94
85
  requested,
95
86
  low_memory,
96
87
  whisper_model,
97
88
  translator,
98
- cache_dir,
99
89
  )
100
90
 
101
91
  def _init_local_pipeline(self) -> None:
@@ -111,7 +101,6 @@ class VideoDubber:
111
101
  vocabulary=self.vocabulary,
112
102
  strict_quality=self.strict_quality,
113
103
  translator=self.translator,
114
- cache_dir=self.cache_dir,
115
104
  )
116
105
 
117
106
  def dub(
@@ -303,7 +292,9 @@ class VideoDubber:
303
292
  video_duration = video.total_seconds
304
293
 
305
294
  if video_duration > speech_duration:
306
- output_video = video.cut(0, speech_duration)
295
+ from videopython.base.transforms import CutSeconds
296
+
297
+ output_video = CutSeconds(start=0, end=speech_duration).apply(video)
307
298
  else:
308
299
  output_video = video
309
300
 
@@ -41,8 +41,7 @@ class Expressiveness:
41
41
  def as_kwargs(self) -> dict[str, float]:
42
42
  """Knobs as a dict, dropping ``None`` entries.
43
43
 
44
- Suitable for ``**``-expansion into Chatterbox or
45
- :meth:`DubCache.tts_key`.
44
+ Suitable for ``**``-expansion into Chatterbox.
46
45
  """
47
46
  return {
48
47
  name: value