PyPI - videopython - Versions diffs - 0.30.0__tar.gz → 0.31.1__tar.gz - Mend

videopython 0.30.0tar.gz → 0.31.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

{videopython-0.30.0 → videopython-0.31.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: videopython
-Version: 0.30.0
+Version: 0.31.1
 Summary: Minimal video generation and processing library.
 Project-URL: Homepage, https://videopython.com
 Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -85,22 +85,31 @@ Python `>=3.10, <3.14`. AI features run locally - no cloud API keys required, bu
 ## Quick Start
-### Video editing
+### Imperative editing
+Every editing primitive is an `Operation` subclass — a Pydantic model
+whose fields ARE the JSON wire format. Apply one to a `Video`:
+```python
+from videopython.base import Video, CutSeconds, Resize, Fade
+video = Video.from_path("raw.mp4")
+video = CutSeconds(start=10, end=25).apply(video)
+video = Resize(width=1080, height=1920).apply(video)
+video = Fade(mode="in", duration=0.5).apply(video)
+video.save("output.mp4")
+```
+Concatenate clips with `+` (must share fps + dimensions):
 ```python
-from videopython import Video
-from videopython.base import FadeTransition
-intro = Video.from_path("intro.mp4").resize(1080, 1920)
-clip = Video.from_path("raw.mp4").cut(10, 25).resize(1080, 1920).resample_fps(30)
-final = intro.transition_to(clip, FadeTransition(effect_time_seconds=0.5))
-final = final.add_audio_from_file("music.mp3")
-final.save("output.mp4")
+combined = video_a + video_b
 ```
 ### JSON editing plans
-Define multi-segment edits as JSON - useful for LLM-driven workflows. `VideoEdit.json_schema()` returns a schema for plan generation/validation.
+Define multi-segment edits as JSON — the format LLM-driven workflows
+generate against. `VideoEdit.json_schema()` returns the schema:
 ```python
 from videopython.editing import VideoEdit
@@ -110,68 +119,89 @@ plan = {
         "source": "raw.mp4",
         "start": 10.0,
         "end": 20.0,
-        "transforms": [
-            {"op": "resize", "args": {"height": 1280}},
-            {"op": "speed_change", "args": {"speed": 1.25}},
+        "operations": [
+            {"op": "resize", "width": 1080, "height": 1920},
+            {"op": "color_adjust", "saturation": 1.15, "contrast": 1.05},
+            {"op": "fade", "mode": "in", "duration": 0.5,
+             "window": {"stop": 0.5}},
         ],
     }],
-    "post_effects": [
-        {"op": "fade", "args": {"mode": "in", "duration": 0.5}, "apply": {"start": 0.0, "stop": 0.5}},
-    ],
 }
 edit = VideoEdit.from_dict(plan)
-edit.validate()   # dry-run via metadata (no frame loading)
-final = edit.run()
-final.save("output.mp4")
+edit.validate()                  # dry-run via metadata, no frames loaded
+edit.run_to_file("output.mp4")   # stream to disk, ~constant memory
 ```
+`run_to_file()` pipes ffmpeg decode → per-frame effects → ffmpeg encode,
+so memory stays bounded even for hour-long sources. Use `edit.run()`
+instead if you want the result back in memory as a `Video`.
 ### AI generation
 ```python
 from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
+from videopython.base import Resize
 image = TextToImage().generate_image("A cinematic mountain sunrise")
-video = ImageToVideo().generate_video(image=image).resize(1080, 1920)
+video = ImageToVideo().generate_video(image=image)
 audio = TextToSpeech().generate_audio("Welcome to videopython.")
+video = Resize(width=1080, height=1920).apply(video)
 video.add_audio(audio).save("ai_video.mp4")
 ```
 ## LLM & AI Agent Integration
-videopython is designed to be controlled by LLMs. Every video operation exposes a machine-readable spec with descriptions, parameter types, and value constraints - all available as JSON Schema at runtime.
+The library is built for LLM-driven editing. Two surfaces matter:
-**Schema generation** - `VideoEdit.json_schema()` returns a complete JSON Schema describing valid edit plans. Pass it directly as a tool schema or structured-output format to any LLM API:
+**1. Plan schema for tool / structured-output calls.**
+`VideoEdit.json_schema()` returns a JSON Schema covering segments,
+`post_operations`, and a discriminated union over every registered
+`Operation`. Drop it into any LLM API:
 ```python
 from videopython.editing import VideoEdit
 schema = VideoEdit.json_schema()
-# Pass `schema` to your LLM as a function/tool definition or response format.
-# The LLM generates a plan dict, then:
+# Anthropic: tools=[{"name": "edit", "input_schema": schema}]
+# OpenAI:    tools=[{"type": "function",
+#                    "function": {"name": "edit", "parameters": schema}}]
+```
+Validate the LLM's output without touching the filesystem, then run it:
+```python
 edit = VideoEdit.from_dict(plan)
-edit.validate()   # dry-run: checks sources, time ranges, params - no frames loaded
-final = edit.run()
-final.save("output.mp4")
+edit.validate()                  # catches bad ops, time ranges, fps mismatches
+edit.run_to_file("output.mp4")
 ```
-**Operation discovery** - the registry lets an LLM (or your code) inspect all available operations, their parameters, and constraints:
+**2. Operation discovery for agent loops.**
+Every registered op exposes its own Pydantic schema, so an agent can
+introspect what's available without hardcoded lists:
 ```python
-from videopython.base import get_operation_specs, get_specs_by_category, OperationCategory
+from videopython.base import Operation, OpCategory
-all_ops = get_operation_specs()                                    # all registered operations
-transforms = get_specs_by_category(OperationCategory.TRANSFORMATION)  # just transforms
+for op_id, cls in Operation.registry().items():
+    print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
-spec = all_ops["color_adjust"]
-print(spec.description)       # LLM-friendly docstring
-print(spec.to_json_schema())  # {"brightness": {"type": "number", "minimum": -1, "maximum": 1}, ...}
+schema = Operation.get("color_adjust").model_json_schema()  # per-op schema
 ```
-Every operation has LLM-optimized descriptions and rich constraints (`minimum`, `maximum`, `enum`, `exclusive_minimum`, etc.) so models generate valid parameters on the first try.
+Field constraints (`minimum`, `maximum`, `enum`, `exclusiveMinimum`,
+nullability) flow through to the schema, so LLMs that support
+constrained generation produce valid parameters on the first try.
+For ops that need side-channel data (e.g. `silence_removal` and
+`add_subtitles` need a `Transcription`), pass it via `context`:
+```python
+edit.run(context={"transcription": my_transcription})
+```
-Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operation Registry](https://videopython.com/api/registry/)
+Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [LLM Integration Guide](https://videopython.com/guides/llm-integration/)
 ## Features
@@ -180,16 +210,15 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operation Registr
 | Area | Highlights |
 |---|---|
 | **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
-| **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with full JSON Schema generation, dry-run validation, and operation registry |
-| **Multicam editing** | `MultiCamEdit`, `CutPoint` - switch between synchronized camera angles with transitions, replace audio with external track |
-| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, picture-in-picture, reverse, freeze frame, silence removal |
-| **Transitions** | `FadeTransition`, `BlurTransition`, `InstantTransition` |
+| **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
+| **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
+| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
 | **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
 | **Audio** | Load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
 | **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
 | **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
-API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Transforms](https://videopython.com/api/transforms/) | [Transitions](https://videopython.com/api/transitions/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
+API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
 ### `videopython.ai` - local AI features (install with `[ai]`)
@@ -199,7 +228,7 @@ API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopyth
 | **Understanding** | `AudioToText` (transcription), `AudioClassifier`, `SceneVLM` (structured visual scene description), `FaceTracker` (per-shot face tracks) |
 | **Scene detection** | `SemanticSceneDetector` (neural scene boundaries) |
 | **Video analysis** | `VideoAnalyzer` - full-pipeline analysis combining multiple AI capabilities |
-| **Transforms** | `FaceTrackingCrop`, `SplitScreenComposite` |
+| **Transforms** | `FaceTrackingCrop` |
 | **Dubbing** | `VideoDubber` - voice cloning and revoicing with timing sync |
 API docs: [Generation](https://videopython.com/api/ai/generation/) | [Understanding](https://videopython.com/api/ai/understanding/) | [Transforms](https://videopython.com/api/ai/transforms/) | [Dubbing](https://videopython.com/api/ai/dubbing/)

{videopython-0.30.0 → videopython-0.31.1}/README.md RENAMED Viewed

@@ -36,22 +36,31 @@ Python `>=3.10, <3.14`. AI features run locally - no cloud API keys required, bu
 ## Quick Start
-### Video editing
+### Imperative editing
+Every editing primitive is an `Operation` subclass — a Pydantic model
+whose fields ARE the JSON wire format. Apply one to a `Video`:
+```python
+from videopython.base import Video, CutSeconds, Resize, Fade
+video = Video.from_path("raw.mp4")
+video = CutSeconds(start=10, end=25).apply(video)
+video = Resize(width=1080, height=1920).apply(video)
+video = Fade(mode="in", duration=0.5).apply(video)
+video.save("output.mp4")
+```
+Concatenate clips with `+` (must share fps + dimensions):
 ```python
-from videopython import Video
-from videopython.base import FadeTransition
-intro = Video.from_path("intro.mp4").resize(1080, 1920)
-clip = Video.from_path("raw.mp4").cut(10, 25).resize(1080, 1920).resample_fps(30)
-final = intro.transition_to(clip, FadeTransition(effect_time_seconds=0.5))
-final = final.add_audio_from_file("music.mp3")
-final.save("output.mp4")
+combined = video_a + video_b
 ```
 ### JSON editing plans
-Define multi-segment edits as JSON - useful for LLM-driven workflows. `VideoEdit.json_schema()` returns a schema for plan generation/validation.
+Define multi-segment edits as JSON — the format LLM-driven workflows
+generate against. `VideoEdit.json_schema()` returns the schema:
 ```python
 from videopython.editing import VideoEdit
@@ -61,68 +70,89 @@ plan = {
         "source": "raw.mp4",
         "start": 10.0,
         "end": 20.0,
-        "transforms": [
-            {"op": "resize", "args": {"height": 1280}},
-            {"op": "speed_change", "args": {"speed": 1.25}},
+        "operations": [
+            {"op": "resize", "width": 1080, "height": 1920},
+            {"op": "color_adjust", "saturation": 1.15, "contrast": 1.05},
+            {"op": "fade", "mode": "in", "duration": 0.5,
+             "window": {"stop": 0.5}},
         ],
     }],
-    "post_effects": [
-        {"op": "fade", "args": {"mode": "in", "duration": 0.5}, "apply": {"start": 0.0, "stop": 0.5}},
-    ],
 }
 edit = VideoEdit.from_dict(plan)
-edit.validate()   # dry-run via metadata (no frame loading)
-final = edit.run()
-final.save("output.mp4")
+edit.validate()                  # dry-run via metadata, no frames loaded
+edit.run_to_file("output.mp4")   # stream to disk, ~constant memory
 ```
+`run_to_file()` pipes ffmpeg decode → per-frame effects → ffmpeg encode,
+so memory stays bounded even for hour-long sources. Use `edit.run()`
+instead if you want the result back in memory as a `Video`.
 ### AI generation
 ```python
 from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
+from videopython.base import Resize
 image = TextToImage().generate_image("A cinematic mountain sunrise")
-video = ImageToVideo().generate_video(image=image).resize(1080, 1920)
+video = ImageToVideo().generate_video(image=image)
 audio = TextToSpeech().generate_audio("Welcome to videopython.")
+video = Resize(width=1080, height=1920).apply(video)
 video.add_audio(audio).save("ai_video.mp4")
 ```
 ## LLM & AI Agent Integration
-videopython is designed to be controlled by LLMs. Every video operation exposes a machine-readable spec with descriptions, parameter types, and value constraints - all available as JSON Schema at runtime.
+The library is built for LLM-driven editing. Two surfaces matter:
-**Schema generation** - `VideoEdit.json_schema()` returns a complete JSON Schema describing valid edit plans. Pass it directly as a tool schema or structured-output format to any LLM API:
+**1. Plan schema for tool / structured-output calls.**
+`VideoEdit.json_schema()` returns a JSON Schema covering segments,
+`post_operations`, and a discriminated union over every registered
+`Operation`. Drop it into any LLM API:
 ```python
 from videopython.editing import VideoEdit
 schema = VideoEdit.json_schema()
-# Pass `schema` to your LLM as a function/tool definition or response format.
-# The LLM generates a plan dict, then:
+# Anthropic: tools=[{"name": "edit", "input_schema": schema}]
+# OpenAI:    tools=[{"type": "function",
+#                    "function": {"name": "edit", "parameters": schema}}]
+```
+Validate the LLM's output without touching the filesystem, then run it:
+```python
 edit = VideoEdit.from_dict(plan)
-edit.validate()   # dry-run: checks sources, time ranges, params - no frames loaded
-final = edit.run()
-final.save("output.mp4")
+edit.validate()                  # catches bad ops, time ranges, fps mismatches
+edit.run_to_file("output.mp4")
 ```
-**Operation discovery** - the registry lets an LLM (or your code) inspect all available operations, their parameters, and constraints:
+**2. Operation discovery for agent loops.**
+Every registered op exposes its own Pydantic schema, so an agent can
+introspect what's available without hardcoded lists:
 ```python
-from videopython.base import get_operation_specs, get_specs_by_category, OperationCategory
+from videopython.base import Operation, OpCategory
-all_ops = get_operation_specs()                                    # all registered operations
-transforms = get_specs_by_category(OperationCategory.TRANSFORMATION)  # just transforms
+for op_id, cls in Operation.registry().items():
+    print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
-spec = all_ops["color_adjust"]
-print(spec.description)       # LLM-friendly docstring
-print(spec.to_json_schema())  # {"brightness": {"type": "number", "minimum": -1, "maximum": 1}, ...}
+schema = Operation.get("color_adjust").model_json_schema()  # per-op schema
 ```
-Every operation has LLM-optimized descriptions and rich constraints (`minimum`, `maximum`, `enum`, `exclusive_minimum`, etc.) so models generate valid parameters on the first try.
+Field constraints (`minimum`, `maximum`, `enum`, `exclusiveMinimum`,
+nullability) flow through to the schema, so LLMs that support
+constrained generation produce valid parameters on the first try.
+For ops that need side-channel data (e.g. `silence_removal` and
+`add_subtitles` need a `Transcription`), pass it via `context`:
+```python
+edit.run(context={"transcription": my_transcription})
+```
-Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operation Registry](https://videopython.com/api/registry/)
+Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [LLM Integration Guide](https://videopython.com/guides/llm-integration/)
 ## Features
@@ -131,16 +161,15 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operation Registr
 | Area | Highlights |
 |---|---|
 | **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
-| **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with full JSON Schema generation, dry-run validation, and operation registry |
-| **Multicam editing** | `MultiCamEdit`, `CutPoint` - switch between synchronized camera angles with transitions, replace audio with external track |
-| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, picture-in-picture, reverse, freeze frame, silence removal |
-| **Transitions** | `FadeTransition`, `BlurTransition`, `InstantTransition` |
+| **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
+| **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
+| **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
 | **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
 | **Audio** | Load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
 | **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
 | **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
-API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Transforms](https://videopython.com/api/transforms/) | [Transitions](https://videopython.com/api/transitions/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
+API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
 ### `videopython.ai` - local AI features (install with `[ai]`)
@@ -150,7 +179,7 @@ API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopyth
 | **Understanding** | `AudioToText` (transcription), `AudioClassifier`, `SceneVLM` (structured visual scene description), `FaceTracker` (per-shot face tracks) |
 | **Scene detection** | `SemanticSceneDetector` (neural scene boundaries) |
 | **Video analysis** | `VideoAnalyzer` - full-pipeline analysis combining multiple AI capabilities |
-| **Transforms** | `FaceTrackingCrop`, `SplitScreenComposite` |
+| **Transforms** | `FaceTrackingCrop` |
 | **Dubbing** | `VideoDubber` - voice cloning and revoicing with timing sync |
 API docs: [Generation](https://videopython.com/api/ai/generation/) | [Understanding](https://videopython.com/api/ai/understanding/) | [Transforms](https://videopython.com/api/ai/transforms/) | [Dubbing](https://videopython.com/api/ai/dubbing/)

{videopython-0.30.0 → videopython-0.31.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "videopython"
-version = "0.30.0"
+version = "0.31.1"
 description = "Minimal video generation and processing library."
 authors = [
     { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -136,6 +136,7 @@ Documentation = "https://videopython.com"
 [tool.mypy]
 mypy_path = "src/stubs"
+plugins = ["pydantic.mypy"]
 [[tool.mypy.overrides]]
 module = [

{videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/__init__.py RENAMED Viewed

@@ -1,7 +1,5 @@
-from videopython.ai import registry as _ai_registry  # noqa: F401
 from .generation import ImageToVideo, TextToImage, TextToMusic, TextToSpeech, TextToVideo
-from .transforms import FaceTrackingCrop, SplitScreenComposite
+from .transforms import FaceTrackingCrop
 from .understanding import (
     AudioClassifier,
     AudioToText,
@@ -26,7 +24,6 @@ __all__ = [
     "SemanticSceneDetector",
     # Transforms (AI-powered)
     "FaceTrackingCrop",
-    "SplitScreenComposite",
     # Video analysis
     "VideoAnalysis",
     "VideoAnalysisConfig",

{videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/dubbing/dubber.py RENAMED Viewed

@@ -292,7 +292,9 @@ class VideoDubber:
         video_duration = video.total_seconds
         if video_duration > speech_duration:
-            output_video = video.cut(0, speech_duration)
+            from videopython.base.transforms import CutSeconds
+            output_video = CutSeconds(start=0, end=speech_duration).apply(video)
         else:
             output_video = video

{videopython-0.30.0 → videopython-0.31.1}/src/videopython/ai/dubbing/remux.py RENAMED Viewed

@@ -4,13 +4,15 @@ from __future__ import annotations
 import io
 import logging
-import subprocess
 import wave
 from pathlib import Path
 from typing import TYPE_CHECKING
 import numpy as np
+from videopython.base import _ffmpeg
+from videopython.base.exceptions import FFmpegRunError
 if TYPE_CHECKING:
     from videopython.base.audio import Audio
@@ -95,9 +97,10 @@ def replace_audio_stream(
     ]
     logger.info("replace_audio_stream: %s + %s -> %s", video_path, audio_path, output_path)
-    result = subprocess.run(cmd, capture_output=True)
-    if result.returncode != 0:
-        raise RemuxError(f"ffmpeg failed (exit {result.returncode}): {result.stderr.decode(errors='replace')}")
+    try:
+        _ffmpeg.run(cmd)
+    except FFmpegRunError as e:
+        raise RemuxError(str(e)) from e
 def replace_audio_stream_from_audio(
@@ -175,7 +178,7 @@ def replace_audio_stream_from_audio(
         len(wav_bytes),
         output_path,
     )
-    process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
-    _, stderr = process.communicate(wav_bytes)
-    if process.returncode != 0:
-        raise RemuxError(f"ffmpeg failed (exit {process.returncode}): {stderr.decode(errors='replace')}")
+    try:
+        _ffmpeg.run(cmd, stdin=wav_bytes)
+    except FFmpegRunError as e:
+        raise RemuxError(str(e)) from e

videopython 0.30.0__tar.gz → 0.31.1__tar.gz

videopython 0.30.0tar.gz → 0.31.1tar.gz