PyPI - videopython - Versions diffs - 0.31.2__tar.gz → 0.32.0__tar.gz - Mend

videopython 0.31.2tar.gz → 0.32.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

{videopython-0.31.2 → videopython-0.32.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: videopython
-Version: 0.31.2
+Version: 0.32.0
 Summary: Minimal video generation and processing library.
 Project-URL: Homepage, https://videopython.com
 Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -91,7 +91,8 @@ Every editing primitive is an `Operation` subclass — a Pydantic model
 whose fields ARE the JSON wire format. Apply one to a `Video`:
 ```python
-from videopython.base import Video, CutSeconds, Resize, Fade
+from videopython.base import Video
+from videopython.editing import CutSeconds, Resize, Fade
 video = Video.from_path("raw.mp4")
 video = CutSeconds(start=10, end=25).apply(video)
@@ -141,7 +142,7 @@ instead if you want the result back in memory as a `Video`.
 ```python
 from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
-from videopython.base import Resize
+from videopython.editing import Resize
 image = TextToImage().generate_image("A cinematic mountain sunrise")
 video = ImageToVideo().generate_video(image=image)
@@ -182,7 +183,7 @@ Every registered op exposes its own Pydantic schema, so an agent can
 introspect what's available without hardcoded lists:
 ```python
-from videopython.base import Operation, OpCategory
+from videopython.editing import Operation, OpCategory
 for op_id, cls in Operation.registry().items():
     print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
@@ -205,18 +206,30 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https
 ## Features
-### `videopython.base` - core editing (no AI dependencies)
+### `videopython.base` - data containers + I/O (no AI dependencies)
 | Area | Highlights |
 |---|---|
 | **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
+| **Text rendering** | `ImageText` - generic PIL text-on-image primitive |
+| **Transcription** | `Transcription`, `TranscriptionSegment`, `TranscriptionWord` - data classes returned by transcription backends |
+| **Result types** | `BoundingBox`, `DetectedFace`, `FaceTrack`, `SceneBoundary`, `AudioEvent`, `MotionInfo`, ... - shared by editing and AI |
+### `videopython.audio` - audio data container
+| Area | Highlights |
+|---|---|
+| **Audio** | `Audio`, `AudioMetadata` - load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
+### `videopython.editing` - editing primitives + plan runner
+| Area | Highlights |
+|---|---|
 | **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
 | **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
 | **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
 | **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
-| **Audio** | Load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
-| **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
-| **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
+| **Subtitles** | `TranscriptionOverlay` - animated word-by-word subtitle rendering |
 API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)

{videopython-0.31.2 → videopython-0.32.0}/README.md RENAMED Viewed

@@ -42,7 +42,8 @@ Every editing primitive is an `Operation` subclass — a Pydantic model
 whose fields ARE the JSON wire format. Apply one to a `Video`:
 ```python
-from videopython.base import Video, CutSeconds, Resize, Fade
+from videopython.base import Video
+from videopython.editing import CutSeconds, Resize, Fade
 video = Video.from_path("raw.mp4")
 video = CutSeconds(start=10, end=25).apply(video)
@@ -92,7 +93,7 @@ instead if you want the result back in memory as a `Video`.
 ```python
 from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
-from videopython.base import Resize
+from videopython.editing import Resize
 image = TextToImage().generate_image("A cinematic mountain sunrise")
 video = ImageToVideo().generate_video(image=image)
@@ -133,7 +134,7 @@ Every registered op exposes its own Pydantic schema, so an agent can
 introspect what's available without hardcoded lists:
 ```python
-from videopython.base import Operation, OpCategory
+from videopython.editing import Operation, OpCategory
 for op_id, cls in Operation.registry().items():
     print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
@@ -156,18 +157,30 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https
 ## Features
-### `videopython.base` - core editing (no AI dependencies)
+### `videopython.base` - data containers + I/O (no AI dependencies)
 | Area | Highlights |
 |---|---|
 | **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
+| **Text rendering** | `ImageText` - generic PIL text-on-image primitive |
+| **Transcription** | `Transcription`, `TranscriptionSegment`, `TranscriptionWord` - data classes returned by transcription backends |
+| **Result types** | `BoundingBox`, `DetectedFace`, `FaceTrack`, `SceneBoundary`, `AudioEvent`, `MotionInfo`, ... - shared by editing and AI |
+### `videopython.audio` - audio data container
+| Area | Highlights |
+|---|---|
+| **Audio** | `Audio`, `AudioMetadata` - load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
+### `videopython.editing` - editing primitives + plan runner
+| Area | Highlights |
+|---|---|
 | **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
 | **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
 | **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
 | **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
-| **Audio** | Load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
-| **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
-| **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
+| **Subtitles** | `TranscriptionOverlay` - animated word-by-word subtitle rendering |
 API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)

{videopython-0.31.2 → videopython-0.32.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "videopython"
-version = "0.31.2"
+version = "0.32.0"
 description = "Minimal video generation and processing library."
 authors = [
     { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/dubber.py RENAMED Viewed

@@ -218,7 +218,7 @@ class VideoDubber:
             source transcription. The output video is written to ``output_path``.
         """
         from videopython.ai.dubbing.remux import replace_audio_stream_from_audio
-        from videopython.base.audio import Audio
+        from videopython.audio import Audio
         input_path = Path(input_path)
         output_path = Path(output_path)
@@ -292,7 +292,7 @@ class VideoDubber:
         video_duration = video.total_seconds
         if video_duration > speech_duration:
-            from videopython.base.transforms import CutSeconds
+            from videopython.editing.transforms import CutSeconds
             output_video = CutSeconds(start=0, end=speech_duration).apply(video)
         else:

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/models.py RENAMED Viewed

@@ -5,8 +5,8 @@ from __future__ import annotations
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any
-from videopython.base.audio import Audio
-from videopython.base.text.transcription import Transcription, TranscriptionSegment
+from videopython.audio import Audio
+from videopython.base.transcription import Transcription, TranscriptionSegment
 if TYPE_CHECKING:
     from videopython.ai.dubbing.quality import TranscriptQuality

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/pipeline.py RENAMED Viewed

@@ -22,8 +22,8 @@ from videopython.ai.generation.translation import (
 if TYPE_CHECKING:
     from videopython.ai.dubbing.models import TranslatedSegment
-    from videopython.base.audio import Audio
-    from videopython.base.text.transcription import Transcription
+    from videopython.audio import Audio
+    from videopython.base.transcription import Transcription
 TranslatorChoice = Literal["auto", "marian", "qwen3"]
@@ -41,7 +41,7 @@ def _peak_match(target: Audio, reference: Audio) -> Audio:
     Used as the fallback when LUFS measurement isn't viable (clip < 0.4s
     or silent input). The new ``Audio`` shares no buffer with ``target``.
     """
-    from videopython.base.audio import Audio as _Audio
+    from videopython.audio import Audio as _Audio
     target_peak = float(np.max(np.abs(target.data))) if target.data.size else 0.0
     reference_peak = float(np.max(np.abs(reference.data))) if reference.data.size else 0.0
@@ -71,7 +71,7 @@ def _loudness_match(target: Audio, reference: Audio) -> Audio:
     are clamped to 0.99 — BS.1770 has no peak ceiling and a sufficiently
     quiet source can demand gain that would otherwise clip.
     """
-    from videopython.base.audio import Audio as _Audio
+    from videopython.audio import Audio as _Audio
     target_dur = target.metadata.duration_seconds
     ref_dur = reference.metadata.duration_seconds
@@ -427,7 +427,7 @@ class LocalDubbingPipeline:
         every candidate is rejected, so the dub continues with the best
         sample we have rather than silently dropping the speaker.
         """
-        from videopython.base.audio import Audio
+        from videopython.audio import Audio
         voice_samples: dict[str, Audio] = {}
@@ -558,7 +558,7 @@ class LocalDubbingPipeline:
                 can use ``Audio.from_path(path)`` to avoid loading video frames.
             transcription: Optional pre-computed Transcription object. When provided,
                 the internal Whisper transcription step is skipped (saving time and VRAM).
-                Must be a ``videopython.base.text.transcription.Transcription`` instance
+                Must be a ``videopython.base.transcription.Transcription`` instance
                 with populated ``segments``. Speaker labels on the supplied transcription
                 drive per-speaker voice cloning. If the supplied transcription has no
                 speakers and ``enable_diarization=True``, pyannote is run standalone on
@@ -805,7 +805,7 @@ class LocalDubbingPipeline:
             source_audio: Source audio track to revoice. Callers with a ``Video``
                 object should pass ``video.audio``.
         """
-        from videopython.base.audio import Audio
+        from videopython.audio import Audio
         def report_progress(stage: str, progress: float) -> None:
             if progress_callback:

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/quality.py RENAMED Viewed

@@ -24,7 +24,7 @@ from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Literal
 if TYPE_CHECKING:
-    from videopython.base.text.transcription import Transcription
+    from videopython.base.transcription import Transcription
 # Tuned conservatively to favor "warn" over "reject"; first-week production

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/remux.py RENAMED Viewed

@@ -14,7 +14,7 @@ from videopython.base import _ffmpeg
 from videopython.base.exceptions import FFmpegRunError
 if TYPE_CHECKING:
-    from videopython.base.audio import Audio
+    from videopython.audio import Audio
 logger = logging.getLogger(__name__)

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/timing.py RENAMED Viewed

@@ -6,7 +6,7 @@ from dataclasses import dataclass
 import numpy as np
-from videopython.base.audio import Audio, AudioMetadata
+from videopython.audio import Audio, AudioMetadata
 @dataclass

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/generation/audio.py RENAMED Viewed

@@ -5,7 +5,7 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Any
 from videopython.ai._device import log_device_initialization, release_device_memory, select_device
-from videopython.base.audio import Audio, AudioMetadata
+from videopython.audio import Audio, AudioMetadata
 if TYPE_CHECKING:
     from pathlib import Path

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/generation/qwen3.py RENAMED Viewed

@@ -27,7 +27,7 @@ from videopython.ai.generation.translation import (
     MarianTranslator,
     _is_translatable_text,
 )
-from videopython.base.text.transcription import TranscriptionSegment
+from videopython.base.transcription import TranscriptionSegment
 # Imported under TYPE_CHECKING only — qwen3 sits below videopython.ai.dubbing
 # in the import order (pipeline.py imports Qwen3Translator), so a top-level

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/generation/translation.py RENAMED Viewed

@@ -17,7 +17,7 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Any, Callable, Protocol, runtime_checkable
 from videopython.ai._device import log_device_initialization, release_device_memory, select_device
-from videopython.base.text.transcription import TranscriptionSegment
+from videopython.base.transcription import TranscriptionSegment
 # Imported under TYPE_CHECKING to avoid a circular dep through
 # videopython.ai.dubbing (the dubbing pipeline imports both

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/transforms.py RENAMED Viewed

@@ -12,8 +12,8 @@ from tqdm import tqdm
 from videopython.ai.understanding.faces import FaceTracker
 from videopython.base._dimensions import floor_to_even
-from videopython.base.operation import OpCategory, Operation
 from videopython.base.video import Video
+from videopython.editing.operation import OpCategory, Operation
 logger = logging.getLogger(__name__)

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/understanding/audio.py RENAMED Viewed

@@ -6,9 +6,9 @@ import logging
 from typing import Any, Literal
 from videopython.ai._device import log_device_initialization, release_device_memory, select_device
-from videopython.base.audio import Audio
+from videopython.audio import Audio
 from videopython.base.description import AudioClassification, AudioEvent
-from videopython.base.text.transcription import Transcription, TranscriptionSegment, TranscriptionWord
+from videopython.base.transcription import Transcription, TranscriptionSegment, TranscriptionWord
 from videopython.base.video import Video
 logger = logging.getLogger(__name__)

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/understanding/separation.py RENAMED Viewed

@@ -7,7 +7,7 @@ from typing import Any
 from videopython.ai._device import log_device_initialization, release_device_memory, select_device
 from videopython.ai.dubbing.models import SeparatedAudio
-from videopython.base.audio import Audio, AudioMetadata
+from videopython.audio import Audio, AudioMetadata
 logger = logging.getLogger(__name__)

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/video_analysis.py RENAMED Viewed

@@ -25,8 +25,8 @@ from videopython.ai.understanding import (
     SemanticSceneDetector,
 )
 from videopython.ai.understanding.faces import FaceTracker
+from videopython.audio import Audio
 from videopython.base import _ffmpeg
-from videopython.base.audio import Audio
 from videopython.base.description import (
     AudioClassification,
     AudioEvent,
@@ -35,7 +35,7 @@ from videopython.base.description import (
     SceneDescription,
 )
 from videopython.base.exceptions import FFmpegProbeError
-from videopython.base.text.transcription import Transcription
+from videopython.base.transcription import Transcription
 from videopython.base.video import Video, VideoMetadata, extract_frames_at_times
 __all__ = ["VideoAnalysis", "VideoAnalysisConfig", "VideoAnalyzer"]
@@ -949,7 +949,7 @@ class VideoAnalyzer:
             return None
         if source_path is not None:
             return Video.from_path(str(source_path), start_second=start_second, end_second=end_second)
-        from videopython.base.transforms import CutSeconds
+        from videopython.editing.transforms import CutSeconds
         return CutSeconds(start=start_second, end=end_second).apply(_require_video(video))

{videopython-0.31.2/src/videopython/base → videopython-0.32.0/src/videopython}/audio/audio.py RENAMED Viewed

@@ -13,7 +13,7 @@ from videopython.base import _ffmpeg
 from videopython.base.exceptions import AudioLoadError, FFmpegProbeError
 if TYPE_CHECKING:
-    from videopython.base.audio.analysis import AudioLevels, AudioSegment, AudioSegmentType, SilentSegment
+    from videopython.audio.analysis import AudioLevels, AudioSegment, AudioSegmentType, SilentSegment
 @dataclass
@@ -879,7 +879,7 @@ class Audio:
             >>> levels = audio.get_levels()
             >>> print(f"Peak: {levels.db_peak:.1f} dB")
         """
-        from videopython.base.audio.analysis import AudioLevels
+        from videopython.audio.analysis import AudioLevels
         segment = self.slice(start_seconds, end_seconds)
         data = segment.data.flatten() if segment.metadata.channels == 2 else segment.data
@@ -947,7 +947,7 @@ class Audio:
             >>> for seg in silent_segments:
             ...     print(f"Silence: {seg.start:.2f}s - {seg.end:.2f}s")
         """
-        from videopython.base.audio.analysis import SilentSegment
+        from videopython.audio.analysis import SilentSegment
         levels_over_time = self.get_levels_over_time(window_seconds=window_seconds, hop_seconds=window_seconds / 2)
@@ -1027,7 +1027,7 @@ class Audio:
             >>> for seg in segments:
             ...     print(f"{seg.start:.1f}-{seg.end:.1f}s: {seg.segment_type.value}")
         """
-        from videopython.base.audio.analysis import AudioSegment
+        from videopython.audio.analysis import AudioSegment
         hop_length = segment_length * (1 - overlap)
         segments = []
@@ -1064,7 +1064,7 @@ class Audio:
         Returns:
             Tuple of (AudioSegmentType, confidence)
         """
-        from videopython.base.audio.analysis import AudioSegmentType
+        from videopython.audio.analysis import AudioSegmentType
         data = segment.to_mono().data

videopython-0.32.0/src/videopython/base/__init__.py ADDED Viewed

@@ -0,0 +1,62 @@
+from .description import (
+    AudioClassification,
+    AudioEvent,
+    BoundingBox,
+    DetectedFace,
+    DetectedObject,
+    DetectedText,
+    FaceTrack,
+    MotionInfo,
+    SceneBoundary,
+    SceneDescription,
+)
+from .exceptions import (
+    AudioError,
+    AudioLoadError,
+    OutOfBoundsError,
+    TextRenderError,
+    TransformError,
+    VideoError,
+    VideoLoadError,
+    VideoMetadataError,
+    VideoPythonError,
+)
+from .image_text import AnchorPoint, ImageText, TextAlign
+from .transcription import Transcription, TranscriptionSegment, TranscriptionWord
+from .video import FrameIterator, Video, VideoMetadata
+__all__ = [
+    # Core
+    "Video",
+    "VideoMetadata",
+    "FrameIterator",
+    # Exceptions
+    "VideoPythonError",
+    "VideoError",
+    "VideoLoadError",
+    "VideoMetadataError",
+    "AudioError",
+    "AudioLoadError",
+    "TransformError",
+    "TextRenderError",
+    "OutOfBoundsError",
+    # Text rendering primitives
+    "ImageText",
+    "TextAlign",
+    "AnchorPoint",
+    # Transcription data classes
+    "Transcription",
+    "TranscriptionSegment",
+    "TranscriptionWord",
+    # Detection / scene / motion result types (consumed by ai/, editing/)
+    "BoundingBox",
+    "DetectedObject",
+    "DetectedFace",
+    "DetectedText",
+    "FaceTrack",
+    "AudioEvent",
+    "AudioClassification",
+    "MotionInfo",
+    "SceneBoundary",
+    "SceneDescription",
+]

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/base/_dimensions.py RENAMED Viewed

@@ -3,7 +3,7 @@
 Centralises the libx264+yuv420p even-dimension constraint and the
 two "round to even" calculations that previously lived (with subtly
 different semantics) in ``base/video.py``, ``ai/transforms.py``, and
-``base/transforms.py``.
+``editing/transforms.py``.
 """
 from __future__ import annotations

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/base/_video_io.py RENAMED Viewed

@@ -20,9 +20,9 @@ from typing import Literal, get_args
 import numpy as np
+from videopython.audio import Audio
 from videopython.base import _ffmpeg
 from videopython.base._dimensions import require_even
-from videopython.base.audio import Audio
 from videopython.base.exceptions import (
     AudioLoadError,
     FFmpegRunError,

{videopython-0.31.2 → videopython-0.32.0}/src/videopython/base/description.py RENAMED Viewed

@@ -22,8 +22,10 @@ __all__ = [
 class SceneBoundary:
     """Timing information for a detected scene.
-    A lightweight structure representing scene boundaries detected by SceneDetector.
-    This is a backbone type - higher-level scene analysis belongs in orchestration packages.
+    A lightweight structure representing scene boundaries returned by
+    scene detectors (e.g. ``videopython.ai.SemanticSceneDetector``). This
+    is a backbone type — higher-level scene analysis lives in orchestration
+    packages.
     Attributes:
         start: Scene start time in seconds

videopython 0.31.2__tar.gz → 0.32.0__tar.gz

videopython 0.31.2tar.gz → 0.32.0tar.gz