PyPI - videopython - Versions diffs - 0.25.4__tar.gz → 0.25.6__tar.gz - Mend

videopython 0.25.4tar.gz → 0.25.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

{videopython-0.25.4 → videopython-0.25.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: videopython
-Version: 0.25.4
+Version: 0.25.6
 Summary: Minimal video generation and processing library.
 Project-URL: Homepage, https://videopython.com
 Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -21,13 +21,12 @@ Requires-Dist: numpy>=1.25.2
 Requires-Dist: opencv-python-headless>=4.9.0.80
 Requires-Dist: pillow>=12.1.1
 Requires-Dist: pydantic>=2.8.0
-Requires-Dist: torchcodec>=0.9.1
 Requires-Dist: tqdm>=4.66.3
 Provides-Extra: ai
 Requires-Dist: accelerate>=0.29.2; extra == 'ai'
-Requires-Dist: coqui-tts>=0.24.0; extra == 'ai'
+Requires-Dist: chatterbox-tts>=0.1.7; extra == 'ai'
 Requires-Dist: demucs>=4.0.0; extra == 'ai'
-Requires-Dist: diffusers>=0.26.3; extra == 'ai'
+Requires-Dist: diffusers>=0.30.0; extra == 'ai'
 Requires-Dist: easyocr>=1.7.0; extra == 'ai'
 Requires-Dist: hf-transfer>=0.1.9; extra == 'ai'
 Requires-Dist: numba>=0.61.0; extra == 'ai'
@@ -36,18 +35,12 @@ Requires-Dist: openai-whisper>=20240930; extra == 'ai'
 Requires-Dist: pyannote-audio>=4.0.0; extra == 'ai'
 Requires-Dist: scikit-learn>=1.3.0; extra == 'ai'
 Requires-Dist: scipy>=1.10.0; extra == 'ai'
-Requires-Dist: torch>=2.1.0; extra == 'ai'
+Requires-Dist: sentencepiece>=0.1.99; extra == 'ai'
+Requires-Dist: torch>=2.8.0; extra == 'ai'
+Requires-Dist: torchaudio>=2.8.0; extra == 'ai'
 Requires-Dist: transformers>=5.2.0; extra == 'ai'
 Requires-Dist: transnetv2-pytorch>=1.0.5; extra == 'ai'
 Requires-Dist: ultralytics>=8.0.0; extra == 'ai'
-Provides-Extra: dev
-Requires-Dist: mypy>=1.8.0; extra == 'dev'
-Requires-Dist: pre-commit>=3.8.0; extra == 'dev'
-Requires-Dist: pytest-cov>=6.1.1; extra == 'dev'
-Requires-Dist: pytest>=7.4.0; extra == 'dev'
-Requires-Dist: ruff>=0.1.14; extra == 'dev'
-Requires-Dist: types-pillow>=10.2.0.20240213; extra == 'dev'
-Requires-Dist: types-tqdm>=4.66.0.20240106; extra == 'dev'
 Description-Content-Type: text/markdown
 # videopython
@@ -133,7 +126,7 @@ final.save("output.mp4")
 from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
 image = TextToImage().generate_image("A cinematic mountain sunrise")
-video = ImageToVideo().generate_video(image=image, fps=24).resize(1080, 1920)
+video = ImageToVideo().generate_video(image=image).resize(1080, 1920)
 audio = TextToSpeech().generate_audio("Welcome to videopython.")
 video.add_audio(audio).save("ai_video.mp4")
 ```

{videopython-0.25.4 → videopython-0.25.6}/README.md RENAMED Viewed

@@ -81,7 +81,7 @@ final.save("output.mp4")
 from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
 image = TextToImage().generate_image("A cinematic mountain sunrise")
-video = ImageToVideo().generate_video(image=image, fps=24).resize(1080, 1920)
+video = ImageToVideo().generate_video(image=image).resize(1080, 1920)
 audio = TextToSpeech().generate_audio("Welcome to videopython.")
 video.add_audio(audio).save("ai_video.mp4")
 ```

{videopython-0.25.4 → videopython-0.25.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "videopython"
-version = "0.25.4"
+version = "0.25.6"
 description = "Minimal video generation and processing library."
 authors = [
     { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -35,7 +35,6 @@ dependencies = [
     "numpy>=1.25.2",
     "opencv-python-headless>=4.9.0.80",
     "pillow>=12.1.1",
-    "torchcodec>=0.9.1",
     "tqdm>=4.66.3",
     "pydantic>=2.8.0",
 ]
@@ -56,9 +55,10 @@ dev = [
 ]
 ai = [
     "accelerate>=0.29.2",
-    "diffusers>=0.26.3",
+    "diffusers>=0.30.0",
     "hf-transfer>=0.1.9",
-    "torch>=2.1.0",
+    "torch>=2.8.0",
+    "torchaudio>=2.8.0",
     "transformers>=5.2.0",
     "openai-whisper>=20240930",
     "pyannote-audio>=4.0.0",
@@ -72,28 +72,22 @@ ai = [
     # Audio classification (AST via transformers - no separate dep needed)
     # Scene detection
     "transnetv2-pytorch>=1.0.5",
-    # Voice cloning TTS (coqui-tts is the maintained fork of TTS)
-    "coqui-tts>=0.24.0",
+    # Voice cloning TTS (Chatterbox Multilingual by Resemble AI)
+    "chatterbox-tts>=0.1.7",
+    # Translation (Marian MT tokenizer requires sentencepiece)
+    "sentencepiece>=0.1.99",
     # Audio source separation
     "demucs>=4.0.0",
 ]
 # Required for pip install videopython[ai] - pip uses optional-dependencies, not dependency-groups
 [project.optional-dependencies]
-dev = [
-    "pre-commit>=3.8.0",
-    "ruff>=0.1.14",
-    "mypy>=1.8.0",
-    "pytest>=7.4.0",
-    "types-Pillow>=10.2.0.20240213",
-    "types-tqdm>=4.66.0.20240106",
-    "pytest-cov>=6.1.1",
-]
 ai = [
     "accelerate>=0.29.2",
-    "diffusers>=0.26.3",
+    "diffusers>=0.30.0",
     "hf-transfer>=0.1.9",
-    "torch>=2.1.0",
+    "torch>=2.8.0",
+    "torchaudio>=2.8.0",
     "transformers>=5.2.0",
     "openai-whisper>=20240930",
     "pyannote-audio>=4.0.0",
@@ -107,8 +101,10 @@ ai = [
     # Audio classification (AST via transformers - no separate dep needed)
     # Scene detection
     "transnetv2-pytorch>=1.0.5",
-    # Voice cloning TTS (coqui-tts is the maintained fork of TTS)
-    "coqui-tts>=0.24.0",
+    # Voice cloning TTS (Chatterbox Multilingual by Resemble AI)
+    "chatterbox-tts>=0.1.7",
+    # Translation (Marian MT tokenizer requires sentencepiece)
+    "sentencepiece>=0.1.99",
     # Audio source separation
     "demucs>=4.0.0",
 ]
@@ -130,13 +126,32 @@ module = [
     "easyocr", "easyocr.*",
     "transformers", "transformers.*",
     "transnetv2_pytorch", "transnetv2_pytorch.*",
-    "TTS", "TTS.*", "coqui", "coqui.*",
+    "chatterbox", "chatterbox.*",
     "demucs", "demucs.*",
+    "huggingface_hub", "huggingface_hub.*",
     "pyannote", "pyannote.*",
     "cv2", "cv2.*",
 ]
 ignore_missing_imports = true
+[tool.uv]
+# chatterbox-tts 0.1.7 pins strict versions of torch, torchaudio, numpy, and
+# diffusers that conflict with pyannote-audio (torch>=2.8) and CogVideoX
+# (diffusers>=0.30). Override to let the resolver pick compatible versions.
+# The ai dependency floors are aligned with these overrides to keep pip and uv
+# resolving similar versions.
+override-dependencies = [
+    "torch>=2.8.0", "torchaudio>=2.8.0", "numpy>=2.0.0", "diffusers>=0.30.0",
+    # ultralytics depends on opencv-python which conflicts with our
+    # opencv-python-headless (both provide cv2). Exclude opencv-python so
+    # only the headless variant is installed.
+    "opencv-python ; sys_platform == '_'",
+]
+# Pin minimum versions for transitive deps with known vulnerabilities.
+# Pygments 2.20.0 has a security fix but breaks mkdocs (passes None to html.escape).
+# Keep pygments<2.20.0 until a compatible release is available.
+constraint-dependencies = ["requests>=2.33.0", "pygments>=2.19.2,<2.20.0"]
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"

{videopython-0.25.4 → videopython-0.25.6}/src/videopython/ai/dubbing/dubber.py RENAMED Viewed

@@ -34,9 +34,15 @@ class VideoDubber:
         source_lang: str | None = None,
         preserve_background: bool = True,
         voice_clone: bool = True,
+        enable_diarization: bool = False,
         progress_callback: Callable[[str, float], None] | None = None,
     ) -> DubbingResult:
-        """Dub a video into a target language."""
+        """Dub a video into a target language.
+        Args:
+            enable_diarization: Enable speaker diarization to clone each speaker's
+                voice separately. Requires additional VRAM for the diarization model.
+        """
         if self._local_pipeline is None:
             self._init_local_pipeline()
@@ -46,6 +52,7 @@ class VideoDubber:
             source_lang=source_lang,
             preserve_background=preserve_background,
             voice_clone=voice_clone,
+            enable_diarization=enable_diarization,
             progress_callback=progress_callback,
         )
@@ -56,6 +63,7 @@ class VideoDubber:
         source_lang: str | None = None,
         preserve_background: bool = True,
         voice_clone: bool = True,
+        enable_diarization: bool = False,
         progress_callback: Callable[[str, float], None] | None = None,
     ) -> Video:
         """Dub a video and return a new video with the dubbed audio."""
@@ -65,6 +73,7 @@ class VideoDubber:
             source_lang=source_lang,
             preserve_background=preserve_background,
             voice_clone=voice_clone,
+            enable_diarization=enable_diarization,
             progress_callback=progress_callback,
         )
         return video.add_audio(result.dubbed_audio, overlay=False)

{videopython-0.25.4 → videopython-0.25.6}/src/videopython/ai/dubbing/pipeline.py RENAMED Viewed

@@ -23,16 +23,19 @@ class LocalDubbingPipeline:
         logger.info("LocalDubbingPipeline initialized with device=%s", requested)
         self._transcriber: Any = None
+        self._transcriber_diarization: bool | None = None
         self._translator: Any = None
         self._tts: Any = None
+        self._tts_voice_clone: bool | None = None
+        self._tts_language: str | None = None
         self._separator: Any = None
         self._synchronizer: TimingSynchronizer | None = None
-    def _init_transcriber(self) -> None:
+    def _init_transcriber(self, enable_diarization: bool = False) -> None:
         """Initialize the transcription model."""
         from videopython.ai.understanding.audio import AudioToText
-        self._transcriber = AudioToText(device=self.device)
+        self._transcriber = AudioToText(device=self.device, enable_diarization=enable_diarization)
     def _init_translator(self) -> None:
         """Initialize the translation model."""
@@ -40,17 +43,18 @@ class LocalDubbingPipeline:
         self._translator = TextTranslator(device=self.device)
-    def _init_tts(self, voice_clone: bool = False) -> None:
+    def _init_tts(self, voice_clone: bool = False, language: str = "en") -> None:
         """Initialize the text-to-speech model."""
         from videopython.ai.generation.audio import TextToSpeech
         if voice_clone:
             self._tts = TextToSpeech(
-                model_size="xtts",
+                model_size="chatterbox",
                 device=self.device,
+                language=language,
             )
         else:
-            self._tts = TextToSpeech(device=self.device)
+            self._tts = TextToSpeech(device=self.device, language=language)
     def _init_separator(self) -> None:
         """Initialize the audio separator."""
@@ -108,6 +112,7 @@ class LocalDubbingPipeline:
         source_lang: str | None = None,
         preserve_background: bool = True,
         voice_clone: bool = True,
+        enable_diarization: bool = False,
         progress_callback: Callable[[str, float], None] | None = None,
     ) -> DubbingResult:
         """Process a video through the local dubbing pipeline."""
@@ -118,8 +123,9 @@ class LocalDubbingPipeline:
                 progress_callback(stage, progress)
         report_progress("Transcribing audio", 0.05)
-        if self._transcriber is None:
-            self._init_transcriber()
+        if self._transcriber is None or self._transcriber_diarization != enable_diarization:
+            self._init_transcriber(enable_diarization=enable_diarization)
+            self._transcriber_diarization = enable_diarization
         source_audio = video.audio
         transcription = self._transcriber.transcribe(source_audio)
@@ -133,7 +139,7 @@ class LocalDubbingPipeline:
                 target_lang=target_lang,
             )
-        detected_lang = source_lang or "en"
+        detected_lang = source_lang or transcription.language or "en"
         separated_audio: SeparatedAudio | None = None
         vocal_audio = source_audio
@@ -162,14 +168,19 @@ class LocalDubbingPipeline:
         )
         report_progress("Generating dubbed speech", 0.50)
-        if self._tts is None:
-            self._init_tts(voice_clone=voice_clone)
+        if self._tts is None or self._tts_voice_clone != voice_clone or self._tts_language != target_lang:
+            self._init_tts(voice_clone=voice_clone, language=target_lang)
+            self._tts_voice_clone = voice_clone
+            self._tts_language = target_lang
         dubbed_segments: list[Audio] = []
         target_durations: list[float] = []
         start_times: list[float] = []
         for i, segment in enumerate(translated_segments):
+            if segment.duration < 0.1:
+                continue
             progress = 0.50 + (0.30 * (i / len(translated_segments)))
             report_progress(f"Generating speech ({i + 1}/{len(translated_segments)})", progress)
@@ -235,8 +246,9 @@ class LocalDubbingPipeline:
         original_duration = source_audio.metadata.duration_seconds
         report_progress("Analyzing audio", 0.05)
-        if self._transcriber is None:
-            self._init_transcriber()
+        if self._transcriber is None or self._transcriber_diarization is not False:
+            self._init_transcriber(enable_diarization=False)
+            self._transcriber_diarization = False
         transcription = self._transcriber.transcribe(source_audio)
@@ -264,8 +276,10 @@ class LocalDubbingPipeline:
             voice_sample = vocal_audio.slice(0, sample_duration)
         report_progress("Generating speech", 0.60)
-        if self._tts is None:
-            self._init_tts(voice_clone=True)
+        if self._tts is None or self._tts_voice_clone is not True or self._tts_language != "en":
+            self._init_tts(voice_clone=True, language="en")
+            self._tts_voice_clone = True
+            self._tts_language = "en"
         generated_speech = self._tts.generate_audio(text, voice_sample=voice_sample)
         speech_duration = generated_speech.metadata.duration_seconds

{videopython-0.25.4 → videopython-0.25.6}/src/videopython/ai/dubbing/timing.py RENAMED Viewed

@@ -79,8 +79,8 @@ class TimingSynchronizer:
         """
         original_duration = audio.metadata.duration_seconds
-        if original_duration <= 0:
-            # Empty audio, return as-is
+        if original_duration <= 0 or target_duration <= 0:
+            # Empty audio or zero-length target, return as-is
             return audio, TimingAdjustment(
                 segment_index=segment_index,
                 original_duration=original_duration,

{videopython-0.25.4 → videopython-0.25.6}/src/videopython/ai/generation/audio.py RENAMED Viewed

@@ -11,10 +11,13 @@ from videopython.base.audio import Audio, AudioMetadata
 class TextToSpeech:
     """Generates speech audio from text using local models.
-    Supports Bark (`base`, `small`) and XTTS voice cloning (`xtts`).
+    Supports Bark (`base`, `small`) for general TTS and Chatterbox Multilingual
+    (`chatterbox`) for multilingual voice cloning.
     """
-    SUPPORTED_LOCAL_MODELS: list[str] = ["base", "small", "xtts"]
+    SUPPORTED_LOCAL_MODELS: list[str] = ["base", "small", "chatterbox"]
+    CHATTERBOX_SAMPLE_RATE: int = 24000
     def __init__(
         self,
@@ -32,7 +35,7 @@ class TextToSpeech:
         self.language = language
         self._model: Any = None
         self._processor: Any = None
-        self._xtts_model: Any = None
+        self._chatterbox_model: Any = None
     def _init_local(self) -> None:
         """Initialize local Bark model."""
@@ -51,43 +54,14 @@ class TextToSpeech:
             resolved_device=device,
         )
-    def _patch_xtts_load_audio(self) -> None:
-        """Patch XTTS load_audio to avoid torchcodec dependency issues."""
-        import TTS.tts.models.xtts as xtts_module
-        def load_audio_soundfile(audiopath: str, sampling_rate: int):
-            import soundfile as sf  # type: ignore[import-untyped]
-            import torch
-            import torchaudio.functional as F  # type: ignore[import-untyped]
-            audio_np, sr = sf.read(audiopath, dtype="float32")
-            audio = torch.from_numpy(audio_np)
-            if audio.dim() == 1:
-                audio = audio.unsqueeze(0)
-            else:
-                audio = audio.T
-            if audio.size(0) != 1:
-                audio = torch.mean(audio, dim=0, keepdim=True)
-            if sr != sampling_rate:
-                audio = F.resample(audio, sr, sampling_rate)
-            return audio
-        xtts_module.load_audio = load_audio_soundfile
-    def _init_xtts(self) -> None:
-        """Initialize XTTS-v2 model for voice cloning."""
-        from TTS.api import TTS
-        self._patch_xtts_load_audio()
+    def _init_chatterbox(self) -> None:
+        """Initialize Chatterbox Multilingual model for voice cloning."""
+        from chatterbox.mtl_tts import ChatterboxMultilingualTTS  # type: ignore[import-untyped]
         requested_device = self.device
         device = select_device(self.device, mps_allowed=False)
-        self._xtts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
+        self._chatterbox_model = ChatterboxMultilingualTTS.from_pretrained(device=device)
         self.device = device
         log_device_initialization(
             "TextToSpeech",
@@ -120,28 +94,32 @@ class TextToSpeech:
         )
         return Audio(audio_data, metadata)
-    def _generate_xtts(self, text: str, voice_sample: Audio) -> Audio:
-        """Generate speech using XTTS-v2 with voice cloning."""
+    def _generate_chatterbox(self, text: str, voice_sample: Audio) -> Audio:
+        """Generate speech using Chatterbox Multilingual with voice cloning."""
         import tempfile
         from pathlib import Path
         import numpy as np
-        if self._xtts_model is None:
-            self._init_xtts()
+        if self._chatterbox_model is None:
+            self._init_chatterbox()
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
             voice_sample.save(f.name)
             speaker_wav_path = Path(f.name)
         try:
-            audio_list = self._xtts_model.tts(
+            wav = self._chatterbox_model.generate(
                 text=text,
-                speaker_wav=str(speaker_wav_path),
-                language=self.language,
+                language_id=self.language,
+                audio_prompt_path=str(speaker_wav_path),
             )
-            audio_data = np.array(audio_list, dtype=np.float32)
-            sample_rate = 24000
+            audio_data = wav.cpu().float().numpy().squeeze()
+            if audio_data.ndim == 0:
+                audio_data = np.array([audio_data], dtype=np.float32)
+            sample_rate = self.CHATTERBOX_SAMPLE_RATE
             metadata = AudioMetadata(
                 sample_rate=sample_rate,
@@ -163,12 +141,13 @@ class TextToSpeech:
         """Generate speech audio from text."""
         effective_voice = voice_preset or self.voice
-        if self.model_size == "xtts" or voice_sample is not None:
+        if self.model_size == "chatterbox" or voice_sample is not None:
             if voice_sample is None:
                 raise ValueError(
-                    "voice_sample is required for XTTS voice cloning. Provide an Audio sample of the voice to clone."
+                    "voice_sample is required for Chatterbox voice cloning. "
+                    "Provide an Audio sample of the voice to clone."
                 )
-            return self._generate_xtts(text, voice_sample)
+            return self._generate_chatterbox(text, voice_sample)
         return self._generate_local(text, effective_voice)

{videopython-0.25.4 → videopython-0.25.6}/src/videopython/ai/generation/translation.py RENAMED Viewed

@@ -61,15 +61,15 @@ class TextTranslator:
         return f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
     def _init_local(self, source_lang: str, target_lang: str) -> None:
-        from transformers import AutoModelForSeq2SeqLM, AutoTokenizer  # type: ignore[attr-defined]
+        from transformers import MarianMTModel, MarianTokenizer  # type: ignore[attr-defined]
         model_name = self._get_local_model_name(source_lang, target_lang)
         requested_device = self.device
         device = select_device(self.device, mps_allowed=True)
-        self._tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self._model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
+        self._tokenizer = MarianTokenizer.from_pretrained(model_name)
+        self._model = MarianMTModel.from_pretrained(model_name).to(device)
         self.device = device
         log_device_initialization(
             "TextTranslator",
@@ -103,6 +103,8 @@ class TextTranslator:
             return text
         effective_source = source_lang or "en"
+        if effective_source == target_lang:
+            return text
         return self._translate_local(text, target_lang, effective_source)
     def translate_batch(
@@ -118,6 +120,8 @@ class TextTranslator:
             return []
         effective_source = source_lang or "en"
+        if effective_source == target_lang:
+            return list(texts)
         if self._model is None or self._current_lang_pair != (effective_source, target_lang):
             self._init_local(effective_source, target_lang)

{videopython-0.25.4 → videopython-0.25.6}/src/videopython/base/effects.py RENAMED Viewed

@@ -32,6 +32,22 @@ __all__ = [
 ]
+def _resolve_time_range(start: float | None, stop: float | None, total_seconds: float) -> tuple[float, float]:
+    """Clamp and validate an effect time range against the video duration.
+    Returns resolved (start, stop) in seconds.
+    """
+    start_s = start if start is not None else 0
+    stop_s = stop if stop is not None else total_seconds
+    stop_s = min(stop_s, total_seconds)
+    start_s = min(start_s, total_seconds)
+    if start_s < 0:
+        raise ValueError(f"Effect start must be non-negative, got {start_s}!")
+    if stop_s < start_s:
+        raise ValueError(f"Effect stop ({stop_s}) must be >= start ({start_s})!")
+    return start_s, stop_s
 class Effect(ABC):
     """Abstract class for effect on frames of video.
@@ -54,20 +70,10 @@ class Effect(ABC):
                 Only set when the effect should end before the video does.
         """
         original_shape = video.video_shape
-        start = start if start is not None else 0
-        stop = stop if stop is not None else video.total_seconds
-        # Clamp to video duration (frame rounding can make stop slightly exceed
-        # actual duration after segment assembly).
-        stop = min(stop, video.total_seconds)
-        start = min(start, video.total_seconds)
-        # Check for start and stop correctness
-        if start < 0:
-            raise ValueError(f"Effect start must be non-negative, got {start}!")
-        if stop < start:
-            raise ValueError(f"Effect stop ({stop}) must be >= start ({start})!")
+        start_s, stop_s = _resolve_time_range(start, stop, video.total_seconds)
         # Apply effect on video slice
-        effect_start_frame = round(start * video.fps)
-        effect_end_frame = round(stop * video.fps)
+        effect_start_frame = round(start_s * video.fps)
+        effect_end_frame = round(stop_s * video.fps)
         video_with_effect = self._apply(video[effect_start_frame:effect_end_frame])
         old_audio = video.audio
         video = Video.from_frames(
@@ -601,16 +607,7 @@ class Fade(Effect):
                 Only set when the effect should end before the video does.
         """
         original_shape = video.video_shape
-        start_s = start if start is not None else 0
-        stop_s = stop if stop is not None else video.total_seconds
-        # Clamp to video duration (frame rounding can make stop slightly exceed
-        # actual duration after segment assembly).
-        stop_s = min(stop_s, video.total_seconds)
-        start_s = min(start_s, video.total_seconds)
-        if start_s < 0:
-            raise ValueError(f"Effect start must be non-negative, got {start_s}!")
-        if stop_s < start_s:
-            raise ValueError(f"Effect stop ({stop_s}) must be >= start ({start_s})!")
+        start_s, stop_s = _resolve_time_range(start, stop, video.total_seconds)
         effect_start_frame = round(start_s * video.fps)
         effect_end_frame = round(stop_s * video.fps)
@@ -689,16 +686,7 @@ class AudioEffect(Effect):
             stop: Stop time in seconds. Omit to apply until the end.
                 Only set when the effect should end before the video does.
         """
-        start_s = start if start is not None else 0
-        stop_s = stop if stop is not None else video.total_seconds
-        # Clamp to video duration (frame rounding can make stop slightly exceed
-        # actual duration after segment assembly).
-        stop_s = min(stop_s, video.total_seconds)
-        start_s = min(start_s, video.total_seconds)
-        if start_s < 0:
-            raise ValueError(f"Effect start must be non-negative, got {start_s}!")
-        if stop_s < start_s:
-            raise ValueError(f"Effect stop ({stop_s}) must be >= start ({start_s})!")
+        start_s, stop_s = _resolve_time_range(start, stop, video.total_seconds)
         video.audio = self._apply_audio(video.audio, start_s, stop_s, video.fps)
         return video

{videopython-0.25.4 → videopython-0.25.6}/src/videopython/base/transitions.py RENAMED Viewed

@@ -49,8 +49,9 @@ class Transition(ABC):
         return _TRANSITION_REGISTRY[transition_type]._from_dict(data)
     @classmethod
+    @abstractmethod
     def _from_dict(cls, data: dict[str, Any]) -> "Transition":
-        raise NotImplementedError
+        pass
 class InstantTransition(Transition):

{videopython-0.25.4 → videopython-0.25.6}/src/videopython/editing/multicam.py RENAMED Viewed

@@ -118,14 +118,12 @@ class MultiCamEdit:
         # Cache source metadata for validate() and run()
         self._source_meta = first
-        self._source_duration = first.total_seconds
+        self._source_duration = min(m.total_seconds for m in meta_list)
         self._source_metas = metas
         # Build per-camera time ranges (cut start, cut end) from the timeline
         camera_ranges: dict[str, list[tuple[float, float]]] = {}
-        for i, cut in enumerate(self.cuts):
-            start = cut.time
-            end = self.cuts[i + 1].time if i + 1 < len(self.cuts) else self._source_duration
+        for cut, start, end in self._cut_ranges():
             camera_ranges.setdefault(cut.camera, []).append((start, end))
         # Validate adjusted seek positions per source
@@ -146,20 +144,20 @@ class MultiCamEdit:
                         f"exceeds source duration ({source_dur}s)"
                     )
-    def run(self) -> Video:
-        """Execute the multicam edit and return the final video."""
-        source_duration = self._source_duration
-        # Build time ranges: each segment runs from its cut time to the next cut time
-        segments: list[tuple[CutPoint, float, float]] = []
+    def _cut_ranges(self) -> list[tuple[CutPoint, float, float]]:
+        """Build (cut, start_time, end_time) for each segment in the timeline."""
+        ranges: list[tuple[CutPoint, float, float]] = []
         for i, cut in enumerate(self.cuts):
             start = cut.time
-            end = self.cuts[i + 1].time if i + 1 < len(self.cuts) else source_duration
-            segments.append((cut, start, end))
+            end = self.cuts[i + 1].time if i + 1 < len(self.cuts) else self._source_duration
+            ranges.append((cut, start, end))
+        return ranges
+    def run(self) -> Video:
+        """Execute the multicam edit and return the final video."""
         # Load and join segments
         result: Video | None = None
-        for i, (cut, start, end) in enumerate(segments):
+        for i, (cut, start, end) in enumerate(self._cut_ranges()):
             source_path = self.sources[cut.camera]
             offset = self.source_offsets.get(cut.camera, 0.0)
             segment = Video.from_path(str(source_path), start_second=start - offset, end_second=end - offset)
@@ -185,6 +183,21 @@ class MultiCamEdit:
         return result
+    @property
+    def source_meta(self) -> VideoMetadata:
+        """Metadata of the reference source (first listed)."""
+        return self._source_meta
+    @property
+    def source_duration(self) -> float:
+        """Timeline duration in seconds (minimum across all sources)."""
+        return self._source_duration
+    @property
+    def source_metas(self) -> dict[str, VideoMetadata]:
+        """Per-camera metadata keyed by source name."""
+        return dict(self._source_metas)
     def validate(self) -> VideoMetadata:
         """Validate the plan and predict output metadata without loading frames."""
         total_seconds = self._source_duration

{videopython-0.25.4 → videopython-0.25.6}/src/videopython/editing/premiere_xml.py RENAMED Viewed

@@ -173,9 +173,9 @@ def to_premiere_xml(edit: MultiCamEdit) -> str:
     """
     from videopython.base.transitions import FadeTransition
-    meta = edit._source_meta
+    meta = edit.source_meta
     fps = meta.fps
-    source_duration = edit._source_duration
+    source_duration = edit.source_duration
     total_frames = _seconds_to_frames(source_duration, fps)
     def frames(s: float) -> int:
@@ -244,7 +244,7 @@ def to_premiere_xml(edit: MultiCamEdit) -> str:
         file_id = f"file-{camera}"
         if file_id not in defined_file_ids:
-            src_meta = edit._source_metas[camera]
+            src_meta = edit.source_metas[camera]
             src_dur_frames = _seconds_to_frames(src_meta.total_seconds, fps)
             _build_file_element(
                 ci,

{videopython-0.25.4 → videopython-0.25.6}/src/videopython/editing/video_edit.py RENAMED Viewed

@@ -664,9 +664,9 @@ def _normalize_effect_apply_args(apply_args: Mapping[str, Any], location: str) -
     """
     normalized = dict(apply_args)
     if "start" in normalized:
-        normalized["start"] = _coerce_optional_number_at_location(normalized["start"], f"{location}.start")
+        normalized["start"] = _coerce_optional_number(normalized["start"], "start", location=f"{location}.start")
     if "stop" in normalized:
-        normalized["stop"] = _coerce_optional_number_at_location(normalized["stop"], f"{location}.stop")
+        normalized["stop"] = _coerce_optional_number(normalized["stop"], "stop", location=f"{location}.stop")
     return normalized
@@ -1030,6 +1030,12 @@ def _predict_crop_metadata(meta: VideoMetadata, args: Mapping[str, Any]) -> Vide
 def _crop_value_to_pixels(value: Any, dimension: int) -> int:
+    """Convert a crop value to pixels.
+    Float values in the range (0, 1] are treated as fractions of *dimension*
+    (e.g. 0.5 means 50%). All other numeric values (including integers) are
+    treated as absolute pixel counts.
+    """
     if isinstance(value, bool) or not isinstance(value, (int, float)):
         raise ValueError("crop values must be numeric")
     if isinstance(value, float) and 0 < value <= 1:
@@ -1088,17 +1094,10 @@ def _require_number(value: Any, location: str) -> float:
     return float(value)
-def _coerce_optional_number(value: Any, param_name: str) -> float | None:
+def _coerce_optional_number(value: Any, param_name: str, *, location: str | None = None) -> float | None:
     if value is None:
         return None
     if isinstance(value, bool) or not isinstance(value, (int, float)):
-        raise ValueError(f"Effect apply parameter '{param_name}' must be a number")
-    return float(value)
-def _coerce_optional_number_at_location(value: Any, location: str) -> float | None:
-    if value is None:
-        return None
-    if isinstance(value, bool) or not isinstance(value, (int, float)):
-        raise ValueError(f"{location} must be a number")
+        label = location if location is not None else f"Effect apply parameter '{param_name}'"
+        raise ValueError(f"{label} must be a number")
     return float(value)