videopython 0.36.1__tar.gz → 0.38.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {videopython-0.36.1 → videopython-0.38.0}/PKG-INFO +6 -4
- {videopython-0.36.1 → videopython-0.38.0}/README.md +5 -3
- {videopython-0.36.1 → videopython-0.38.0}/pyproject.toml +1 -1
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/__init__.py +5 -0
- videopython-0.38.0/src/videopython/ai/effects.py +112 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/understanding/__init__.py +2 -0
- videopython-0.38.0/src/videopython/ai/understanding/objects.py +145 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/__init__.py +14 -0
- videopython-0.38.0/src/videopython/base/draw_detections.py +164 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/exceptions.py +39 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/editing/effects.py +6 -1
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/editing/operation.py +114 -11
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/editing/transforms.py +43 -4
- videopython-0.38.0/src/videopython/editing/video_edit.py +1353 -0
- videopython-0.36.1/src/videopython/editing/video_edit.py +0 -857
- {videopython-0.36.1 → videopython-0.38.0}/.gitignore +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/LICENSE +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/__init__.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/_device.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/dubbing/__init__.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/dubbing/config.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/dubbing/dubber.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/dubbing/expressiveness.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/dubbing/loudness.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/dubbing/models.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/dubbing/pipeline.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/dubbing/quality.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/dubbing/remux.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/dubbing/timing.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/dubbing/voice_sample.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/generation/__init__.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/generation/audio.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/generation/image.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/generation/qwen3.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/generation/translation.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/generation/video.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/transforms.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/understanding/audio.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/understanding/faces.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/understanding/image.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/understanding/separation.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/understanding/temporal.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/video_analysis/__init__.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/video_analysis/analyzer.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/video_analysis/models.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/video_analysis/sampling.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/ai/video_analysis/stages.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/audio/__init__.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/audio/analysis.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/audio/audio.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/_dimensions.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/_ffmpeg.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/_video_io.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/description.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/fonts/Anton-OFL.txt +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/fonts/Anton-Regular.ttf +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/fonts/BebasNeue-OFL.txt +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/fonts/BebasNeue-Regular.ttf +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/fonts/DejaVuSans.ttf +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/fonts/LICENSE_DEJAVU +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/fonts/Lato-Bold.ttf +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/fonts/Lato-OFL.txt +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/fonts/Poppins-Bold.ttf +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/fonts/Poppins-OFL.txt +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/fonts/__init__.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/image_text.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/transcription.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/base/video.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/editing/__init__.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/editing/_easing.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/editing/streaming.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/editing/transcription_overlay.py +0 -0
- {videopython-0.36.1 → videopython-0.38.0}/src/videopython/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: videopython
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.38.0
|
|
4
4
|
Summary: Minimal video generation and processing library.
|
|
5
5
|
Project-URL: Homepage, https://videopython.com
|
|
6
6
|
Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
|
|
@@ -109,16 +109,18 @@ video.add_audio(audio).save("ai_video.mp4")
|
|
|
109
109
|
|
|
110
110
|
## LLM & AI Agent Integration
|
|
111
111
|
|
|
112
|
-
Every operation is a Pydantic model whose fields ARE the JSON wire format. `VideoEdit.json_schema()` returns a JSON Schema with a discriminated union over every LLM-exposed `Operation` (server-only ops like `image_overlay` are excluded by default) — pass it straight to Anthropic tool use, OpenAI function calling, or any structured-output API.
|
|
112
|
+
Every operation is a Pydantic model whose fields ARE the JSON wire format. `VideoEdit.json_schema()` returns a JSON Schema with a discriminated union over every LLM-exposed `Operation` (server-only ops like `image_overlay` are excluded by default) — pass it straight to Anthropic tool use, OpenAI function calling, or any structured-output API. Pass `strict=True` for a provider strict-mode grammar that prevents simple bound violations at decode time.
|
|
113
113
|
|
|
114
|
-
|
|
114
|
+
The plan parses permissively (shape only) and owns numeric bounds at validation, so a refine loop converges fast: `edit.check(meta)` collects **every** structured `PlanError` in one pass, `edit.repair(meta)` auto-clamps the mechanical violations (window/timestamp overruns, negatives) with a reported changelog, and `edit.normalize_dimensions(meta, target)` makes heterogeneous segments concat-compatible by construction. `edit.validate()` still raises a typed `PlanValidationError` (a `ValueError` with structured `.errors`) for the single-error path.
|
|
115
|
+
|
|
116
|
+
See the [LLM Integration Guide](https://videopython.com/guides/llm-integration/) for end-to-end examples, the collect/repair/normalize refine loop, and operation discovery patterns.
|
|
115
117
|
|
|
116
118
|
## Features
|
|
117
119
|
|
|
118
120
|
- **`videopython.base`** — `Video`, `VideoMetadata`, `FrameIterator`, `ImageText`, `Transcription`, and shared result types (`BoundingBox`, `FaceTrack`, `SceneBoundary`, ...). No AI dependencies.
|
|
119
121
|
- **`videopython.audio`** — `Audio` with overlay, concat, normalize, time-stretch, silence detection, segment classification.
|
|
120
122
|
- **`videopython.editing`** — `Operation`/`Effect` foundation, `VideoEdit` plan runner with JSON Schema + streaming execution. Transforms (cut, resize, crop, fps, speed, reverse, freeze, silence removal) and effects (blur, zoom, color grading, vignette, Ken Burns, fade, overlays, animated subtitles).
|
|
121
|
-
- **`videopython.ai`** *(install with `[ai]`)* — generation (`TextToVideo`, `ImageToVideo`, `TextToImage`, `TextToSpeech`, `TextToMusic`), understanding (`AudioToText`, `AudioClassifier`, `SceneVLM`, `FaceTracker`, `SemanticSceneDetector`), `FaceTrackingCrop` transform, and the full-pipeline `VideoAnalyzer`.
|
|
123
|
+
- **`videopython.ai`** *(install with `[ai]`)* — generation (`TextToVideo`, `ImageToVideo`, `TextToImage`, `TextToSpeech`, `TextToMusic`), understanding (`AudioToText`, `AudioClassifier`, `SceneVLM`, `FaceTracker`, `ObjectDetector`, `SemanticSceneDetector`), the `FaceTrackingCrop` transform, the `ObjectDetectionOverlay` effect (per-frame bounding boxes + labels), and the full-pipeline `VideoAnalyzer`.
|
|
122
124
|
- **`videopython.ai.dubbing`** — `VideoDubber` for voice-cloned revoicing with timing sync.
|
|
123
125
|
|
|
124
126
|
## Examples
|
|
@@ -60,16 +60,18 @@ video.add_audio(audio).save("ai_video.mp4")
|
|
|
60
60
|
|
|
61
61
|
## LLM & AI Agent Integration
|
|
62
62
|
|
|
63
|
-
Every operation is a Pydantic model whose fields ARE the JSON wire format. `VideoEdit.json_schema()` returns a JSON Schema with a discriminated union over every LLM-exposed `Operation` (server-only ops like `image_overlay` are excluded by default) — pass it straight to Anthropic tool use, OpenAI function calling, or any structured-output API.
|
|
63
|
+
Every operation is a Pydantic model whose fields ARE the JSON wire format. `VideoEdit.json_schema()` returns a JSON Schema with a discriminated union over every LLM-exposed `Operation` (server-only ops like `image_overlay` are excluded by default) — pass it straight to Anthropic tool use, OpenAI function calling, or any structured-output API. Pass `strict=True` for a provider strict-mode grammar that prevents simple bound violations at decode time.
|
|
64
64
|
|
|
65
|
-
|
|
65
|
+
The plan parses permissively (shape only) and owns numeric bounds at validation, so a refine loop converges fast: `edit.check(meta)` collects **every** structured `PlanError` in one pass, `edit.repair(meta)` auto-clamps the mechanical violations (window/timestamp overruns, negatives) with a reported changelog, and `edit.normalize_dimensions(meta, target)` makes heterogeneous segments concat-compatible by construction. `edit.validate()` still raises a typed `PlanValidationError` (a `ValueError` with structured `.errors`) for the single-error path.
|
|
66
|
+
|
|
67
|
+
See the [LLM Integration Guide](https://videopython.com/guides/llm-integration/) for end-to-end examples, the collect/repair/normalize refine loop, and operation discovery patterns.
|
|
66
68
|
|
|
67
69
|
## Features
|
|
68
70
|
|
|
69
71
|
- **`videopython.base`** — `Video`, `VideoMetadata`, `FrameIterator`, `ImageText`, `Transcription`, and shared result types (`BoundingBox`, `FaceTrack`, `SceneBoundary`, ...). No AI dependencies.
|
|
70
72
|
- **`videopython.audio`** — `Audio` with overlay, concat, normalize, time-stretch, silence detection, segment classification.
|
|
71
73
|
- **`videopython.editing`** — `Operation`/`Effect` foundation, `VideoEdit` plan runner with JSON Schema + streaming execution. Transforms (cut, resize, crop, fps, speed, reverse, freeze, silence removal) and effects (blur, zoom, color grading, vignette, Ken Burns, fade, overlays, animated subtitles).
|
|
72
|
-
- **`videopython.ai`** *(install with `[ai]`)* — generation (`TextToVideo`, `ImageToVideo`, `TextToImage`, `TextToSpeech`, `TextToMusic`), understanding (`AudioToText`, `AudioClassifier`, `SceneVLM`, `FaceTracker`, `SemanticSceneDetector`), `FaceTrackingCrop` transform, and the full-pipeline `VideoAnalyzer`.
|
|
74
|
+
- **`videopython.ai`** *(install with `[ai]`)* — generation (`TextToVideo`, `ImageToVideo`, `TextToImage`, `TextToSpeech`, `TextToMusic`), understanding (`AudioToText`, `AudioClassifier`, `SceneVLM`, `FaceTracker`, `ObjectDetector`, `SemanticSceneDetector`), the `FaceTrackingCrop` transform, the `ObjectDetectionOverlay` effect (per-frame bounding boxes + labels), and the full-pipeline `VideoAnalyzer`.
|
|
73
75
|
- **`videopython.ai.dubbing`** — `VideoDubber` for voice-cloned revoicing with timing sync.
|
|
74
76
|
|
|
75
77
|
## Examples
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
from .effects import ObjectDetectionOverlay
|
|
1
2
|
from .generation import ImageToVideo, TextToImage, TextToMusic, TextToSpeech, TextToVideo
|
|
2
3
|
from .transforms import FaceTrackingCrop
|
|
3
4
|
from .understanding import (
|
|
4
5
|
AudioClassifier,
|
|
5
6
|
AudioToText,
|
|
6
7
|
FaceTracker,
|
|
8
|
+
ObjectDetector,
|
|
7
9
|
SceneVLM,
|
|
8
10
|
SemanticSceneDetector,
|
|
9
11
|
)
|
|
@@ -20,10 +22,13 @@ __all__ = [
|
|
|
20
22
|
"AudioToText",
|
|
21
23
|
"AudioClassifier",
|
|
22
24
|
"FaceTracker",
|
|
25
|
+
"ObjectDetector",
|
|
23
26
|
"SceneVLM",
|
|
24
27
|
"SemanticSceneDetector",
|
|
25
28
|
# Transforms (AI-powered)
|
|
26
29
|
"FaceTrackingCrop",
|
|
30
|
+
# Effects (AI-powered)
|
|
31
|
+
"ObjectDetectionOverlay",
|
|
27
32
|
# Video analysis
|
|
28
33
|
"VideoAnalysis",
|
|
29
34
|
"VideoAnalysisConfig",
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""AI-powered video effects that require object detection.
|
|
2
|
+
|
|
3
|
+
Effects here are real :class:`~videopython.editing.operation.Effect` subclasses
|
|
4
|
+
(shape-preserving, streamable) that physically live in ``videopython.ai`` so the
|
|
5
|
+
``videopython.editing`` layer keeps no AI dependency -- the same direction
|
|
6
|
+
``FaceTrackingCrop`` imports ``Operation``. The pixel work is delegated to the
|
|
7
|
+
AI-free renderer in :mod:`videopython.base.draw_detections`.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import ClassVar, Literal
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
from pydantic import Field, PrivateAttr
|
|
16
|
+
|
|
17
|
+
from videopython.ai.understanding.objects import ObjectDetector
|
|
18
|
+
from videopython.base.description import DetectedObject
|
|
19
|
+
from videopython.base.draw_detections import DetectionStyle, draw_detections
|
|
20
|
+
from videopython.editing.operation import Effect
|
|
21
|
+
|
|
22
|
+
__all__ = ["ObjectDetectionOverlay"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ObjectDetectionOverlay(Effect):
|
|
26
|
+
"""Detect objects per frame and overlay labelled bounding boxes.
|
|
27
|
+
|
|
28
|
+
Runs a YOLOv8-COCO detector and composites tidy, colour-coded boxes with
|
|
29
|
+
class labels (and optional confidence) onto every frame in the window.
|
|
30
|
+
|
|
31
|
+
Detection runs on a ``detection_interval`` cadence in the streaming path and
|
|
32
|
+
boxes are held between detections, so the cost is *compute*-bound, not
|
|
33
|
+
*memory*-bound: ``"streamable"`` here means bounded memory, not bounded
|
|
34
|
+
compute. On long clips, cap cost with ``window`` (limit the time range),
|
|
35
|
+
a larger ``detection_interval``, a ``class_filter``, and/or the smaller
|
|
36
|
+
``model_size``. Because only ``streaming_init`` and ``process_frame`` are
|
|
37
|
+
overridden, the base ``Effect._apply`` replays the identical contract for
|
|
38
|
+
in-memory execution, so eager and streaming results cannot drift.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
op: Literal["object_detection_overlay"] = "object_detection_overlay"
|
|
42
|
+
streamable: ClassVar[bool] = True
|
|
43
|
+
|
|
44
|
+
confidence_threshold: float = Field(0.5, ge=0, le=1, description="Minimum detection confidence to draw a box, 0-1.")
|
|
45
|
+
class_filter: list[str] | None = Field(
|
|
46
|
+
None,
|
|
47
|
+
description='Only draw these COCO class names, e.g. ["person", "car", "dog"]. Null draws all classes.',
|
|
48
|
+
)
|
|
49
|
+
show_confidence: bool = Field(True, description="Append the detection confidence as a percentage to each label.")
|
|
50
|
+
box_color: tuple[int, int, int] | None = Field(
|
|
51
|
+
None,
|
|
52
|
+
description="Fixed box color as [R, G, B] (0-255) for every box, or null for distinct per-class colors.",
|
|
53
|
+
)
|
|
54
|
+
line_thickness: float = Field(
|
|
55
|
+
0.003, gt=0, description="Box stroke width as a fraction of the frame's longer side (~3px at 1080p)."
|
|
56
|
+
)
|
|
57
|
+
label_font_size: float = Field(
|
|
58
|
+
0.022, gt=0, description="Label text height as a fraction of the frame's longer side (~24px at 1080p)."
|
|
59
|
+
)
|
|
60
|
+
detection_interval: int = Field(
|
|
61
|
+
2,
|
|
62
|
+
ge=1,
|
|
63
|
+
description="Run detection every Nth frame and reuse the last result in between. Higher is faster.",
|
|
64
|
+
)
|
|
65
|
+
model_size: Literal["n", "s", "m"] = Field(
|
|
66
|
+
"n",
|
|
67
|
+
description=(
|
|
68
|
+
"YOLOv8 model size: 'n' (nano, fastest), 's' (small), 'm' (medium, most accurate). "
|
|
69
|
+
"Larger detects better but is slower."
|
|
70
|
+
),
|
|
71
|
+
)
|
|
72
|
+
backend: Literal["cpu", "gpu", "auto"] = Field(
|
|
73
|
+
"auto",
|
|
74
|
+
description="Detection device: 'cpu', 'gpu', or 'auto'.",
|
|
75
|
+
json_schema_extra={"llm_hidden": True},
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
_detector: ObjectDetector | None = PrivateAttr(default=None)
|
|
79
|
+
_last: list[DetectedObject] = PrivateAttr(default_factory=list)
|
|
80
|
+
|
|
81
|
+
def _style(self) -> DetectionStyle:
|
|
82
|
+
return DetectionStyle(
|
|
83
|
+
box_color=self.box_color,
|
|
84
|
+
line_thickness=self.line_thickness,
|
|
85
|
+
show_confidence=self.show_confidence,
|
|
86
|
+
label_font_size=self.label_font_size,
|
|
87
|
+
min_confidence=self.confidence_threshold,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def _init_detector(self) -> None:
|
|
91
|
+
"""Build the detector lazily. Single patch point for tests."""
|
|
92
|
+
if self._detector is None:
|
|
93
|
+
self._detector = ObjectDetector(
|
|
94
|
+
model_name=f"yolov8{self.model_size}.pt",
|
|
95
|
+
confidence_threshold=self.confidence_threshold,
|
|
96
|
+
class_filter=tuple(self.class_filter or ()),
|
|
97
|
+
backend=self.backend,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
|
|
101
|
+
self._last = []
|
|
102
|
+
self._init_detector()
|
|
103
|
+
|
|
104
|
+
def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
|
|
105
|
+
if self._detector is None:
|
|
106
|
+
self._init_detector()
|
|
107
|
+
assert self._detector is not None
|
|
108
|
+
# frame_index is 0-based within the effect's window, so frame 0 always
|
|
109
|
+
# detects; intermediate frames reuse the last result.
|
|
110
|
+
if frame_index % self.detection_interval == 0:
|
|
111
|
+
self._last = self._detector.detect(frame)
|
|
112
|
+
return draw_detections(frame, self._last, self._style())
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
from .audio import AudioClassifier, AudioToText
|
|
2
2
|
from .faces import FaceTracker
|
|
3
3
|
from .image import SceneVLM
|
|
4
|
+
from .objects import ObjectDetector
|
|
4
5
|
from .temporal import SemanticSceneDetector
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
7
8
|
"AudioToText",
|
|
8
9
|
"AudioClassifier",
|
|
9
10
|
"FaceTracker",
|
|
11
|
+
"ObjectDetector",
|
|
10
12
|
"SceneVLM",
|
|
11
13
|
"SemanticSceneDetector",
|
|
12
14
|
]
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""General object detection for the understanding layer.
|
|
2
|
+
|
|
3
|
+
``ObjectDetector`` is the object-detection counterpart to the face detector in
|
|
4
|
+
``faces.py``: a lazy YOLOv8-COCO wrapper returning
|
|
5
|
+
:class:`~videopython.base.description.DetectedObject` with normalized bounding
|
|
6
|
+
boxes. It mirrors ``_FaceDetector`` (lazy init, device selection, ``detect`` /
|
|
7
|
+
``detect_batch``) so the two share one mental model. Consumed by
|
|
8
|
+
``videopython.ai.effects.ObjectDetectionOverlay``; usable directly for any
|
|
9
|
+
per-frame object analysis.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
from typing import Any, Literal
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from videopython.ai._device import select_device
|
|
20
|
+
from videopython.base.description import BoundingBox, DetectedObject
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
__all__ = ["ObjectDetector"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ObjectDetector:
|
|
28
|
+
"""Lazy YOLOv8-COCO object detector returning normalized detections.
|
|
29
|
+
|
|
30
|
+
The Ultralytics weights (default ``yolov8n.pt``) auto-download on first
|
|
31
|
+
real use; class names come from the loaded model. Detection is gated by
|
|
32
|
+
``confidence_threshold`` and optionally restricted to ``class_filter``.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
DEFAULT_CONFIDENCE_THRESHOLD = 0.5
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
model_name: str = "yolov8n.pt",
|
|
40
|
+
confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
|
|
41
|
+
class_filter: tuple[str, ...] = (),
|
|
42
|
+
backend: Literal["cpu", "gpu", "auto"] = "auto",
|
|
43
|
+
):
|
|
44
|
+
"""Initialize the detector.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
model_name: Ultralytics COCO model id or path (e.g. ``yolov8n.pt``,
|
|
48
|
+
``yolov8s.pt``, ``yolov8m.pt``). Downloaded on first use.
|
|
49
|
+
confidence_threshold: Minimum detection confidence in ``[0, 1]``.
|
|
50
|
+
class_filter: If non-empty, only these COCO class names are kept.
|
|
51
|
+
backend: Detection device - ``"cpu"``, ``"gpu"``, or ``"auto"``.
|
|
52
|
+
"""
|
|
53
|
+
self.model_name = model_name
|
|
54
|
+
self.confidence_threshold = confidence_threshold
|
|
55
|
+
self.class_filter = class_filter
|
|
56
|
+
self.backend: Literal["cpu", "gpu", "auto"] = backend
|
|
57
|
+
self._resolved_device: Literal["cpu", "cuda"] | None = None
|
|
58
|
+
self._yolo_model: Any = None
|
|
59
|
+
self._class_names: dict[int, str] = {}
|
|
60
|
+
logger.info("ObjectDetector initialized with model=%s backend=%s", model_name, backend)
|
|
61
|
+
|
|
62
|
+
def _resolve_device(self) -> Literal["cpu", "cuda"]:
|
|
63
|
+
if self._resolved_device is not None:
|
|
64
|
+
return self._resolved_device
|
|
65
|
+
|
|
66
|
+
if self.backend == "cpu":
|
|
67
|
+
self._resolved_device = "cpu"
|
|
68
|
+
return self._resolved_device
|
|
69
|
+
|
|
70
|
+
if self.backend == "gpu":
|
|
71
|
+
resolved = select_device(None, mps_allowed=False)
|
|
72
|
+
if resolved != "cuda":
|
|
73
|
+
raise ValueError("GPU backend requested but CUDA is not available.")
|
|
74
|
+
self._resolved_device = "cuda"
|
|
75
|
+
return self._resolved_device
|
|
76
|
+
|
|
77
|
+
resolved_auto = select_device(None, mps_allowed=False)
|
|
78
|
+
self._resolved_device = "cuda" if resolved_auto == "cuda" else "cpu"
|
|
79
|
+
return self._resolved_device
|
|
80
|
+
|
|
81
|
+
def execution_device(self) -> Literal["cpu", "cuda"]:
|
|
82
|
+
"""Resolved execution device for this detector."""
|
|
83
|
+
return self._resolve_device()
|
|
84
|
+
|
|
85
|
+
def _init_yolo(self) -> None:
|
|
86
|
+
from ultralytics import YOLO
|
|
87
|
+
|
|
88
|
+
self._yolo_model = YOLO(self.model_name)
|
|
89
|
+
self._class_names = dict(self._yolo_model.names)
|
|
90
|
+
|
|
91
|
+
if self._resolve_device() == "cuda":
|
|
92
|
+
self._yolo_model.to("cuda")
|
|
93
|
+
|
|
94
|
+
def _objects_from_yolo_result(self, result: Any) -> list[DetectedObject]:
|
|
95
|
+
detected: list[DetectedObject] = []
|
|
96
|
+
boxes = result.boxes
|
|
97
|
+
if boxes is None:
|
|
98
|
+
return detected
|
|
99
|
+
|
|
100
|
+
img_h, img_w = result.orig_shape
|
|
101
|
+
for i in range(len(boxes)):
|
|
102
|
+
label = self._class_names.get(int(boxes.cls[i]), str(int(boxes.cls[i])))
|
|
103
|
+
if self.class_filter and label not in self.class_filter:
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
x1, y1, x2, y2 = boxes.xyxy[i].tolist()
|
|
107
|
+
detected.append(
|
|
108
|
+
DetectedObject(
|
|
109
|
+
label=label,
|
|
110
|
+
confidence=float(boxes.conf[i]),
|
|
111
|
+
bounding_box=BoundingBox(
|
|
112
|
+
x=x1 / img_w,
|
|
113
|
+
y=y1 / img_h,
|
|
114
|
+
width=(x2 - x1) / img_w,
|
|
115
|
+
height=(y2 - y1) / img_h,
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
detected.sort(key=lambda d: d.confidence, reverse=True)
|
|
120
|
+
return detected
|
|
121
|
+
|
|
122
|
+
def detect(self, image: np.ndarray) -> list[DetectedObject]:
|
|
123
|
+
"""Detect objects in a single ``(H, W, 3)`` frame."""
|
|
124
|
+
if self._yolo_model is None:
|
|
125
|
+
self._init_yolo()
|
|
126
|
+
assert self._yolo_model is not None
|
|
127
|
+
|
|
128
|
+
results = self._yolo_model(image, conf=self.confidence_threshold, verbose=False)
|
|
129
|
+
if not results:
|
|
130
|
+
return []
|
|
131
|
+
return self._objects_from_yolo_result(results[0])
|
|
132
|
+
|
|
133
|
+
def detect_batch(self, images: list[np.ndarray] | np.ndarray) -> list[list[DetectedObject]]:
|
|
134
|
+
"""Detect objects in a batch of frames (list or stacked ``(N, H, W, 3)``)."""
|
|
135
|
+
if isinstance(images, np.ndarray):
|
|
136
|
+
images = [images[i] for i in range(images.shape[0])] if images.ndim == 4 else [images]
|
|
137
|
+
if not images:
|
|
138
|
+
return []
|
|
139
|
+
|
|
140
|
+
if self._yolo_model is None:
|
|
141
|
+
self._init_yolo()
|
|
142
|
+
assert self._yolo_model is not None
|
|
143
|
+
|
|
144
|
+
results = self._yolo_model(images, conf=self.confidence_threshold, verbose=False)
|
|
145
|
+
return [self._objects_from_yolo_result(result) for result in results]
|
|
@@ -10,10 +10,15 @@ from .description import (
|
|
|
10
10
|
SceneBoundary,
|
|
11
11
|
SceneDescription,
|
|
12
12
|
)
|
|
13
|
+
from .draw_detections import DetectionStyle, class_color, draw_detections
|
|
13
14
|
from .exceptions import (
|
|
14
15
|
AudioError,
|
|
15
16
|
AudioLoadError,
|
|
16
17
|
OutOfBoundsError,
|
|
18
|
+
PlanError,
|
|
19
|
+
PlanErrorCode,
|
|
20
|
+
PlanRepair,
|
|
21
|
+
PlanValidationError,
|
|
17
22
|
TextRenderError,
|
|
18
23
|
TransformError,
|
|
19
24
|
VideoError,
|
|
@@ -40,10 +45,19 @@ __all__ = [
|
|
|
40
45
|
"TransformError",
|
|
41
46
|
"TextRenderError",
|
|
42
47
|
"OutOfBoundsError",
|
|
48
|
+
# Structured plan validation / repair
|
|
49
|
+
"PlanError",
|
|
50
|
+
"PlanErrorCode",
|
|
51
|
+
"PlanValidationError",
|
|
52
|
+
"PlanRepair",
|
|
43
53
|
# Text rendering primitives
|
|
44
54
|
"ImageText",
|
|
45
55
|
"TextAlign",
|
|
46
56
|
"AnchorPoint",
|
|
57
|
+
# Detection overlay renderer (AI-free)
|
|
58
|
+
"draw_detections",
|
|
59
|
+
"DetectionStyle",
|
|
60
|
+
"class_color",
|
|
47
61
|
# Transcription data classes
|
|
48
62
|
"Transcription",
|
|
49
63
|
"TranscriptionSegment",
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Pure, AI-free renderer for object-detection overlays.
|
|
2
|
+
|
|
3
|
+
Draws labelled bounding boxes onto a frame from a list of
|
|
4
|
+
:class:`~videopython.base.description.DetectedObject`. This module has **no AI
|
|
5
|
+
dependencies** -- it is the single source of truth for how detections look, so
|
|
6
|
+
it can be unit-tested with synthetic detections and reused by any detector. The
|
|
7
|
+
AI side (``videopython.ai``) only produces the ``DetectedObject`` list and calls
|
|
8
|
+
:func:`draw_detections`.
|
|
9
|
+
|
|
10
|
+
Visuals: a resolution-scaled box stroke plus a label chip filled in the box's
|
|
11
|
+
own colour (so chip and box read as one unit) with anti-aliased text. Colours
|
|
12
|
+
are deterministic per class via :func:`class_color`, so the same class is the
|
|
13
|
+
same colour in every frame and across runs.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import colorsys
|
|
19
|
+
import hashlib
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
from PIL import Image, ImageDraw
|
|
24
|
+
|
|
25
|
+
from videopython.base.description import DetectedObject
|
|
26
|
+
from videopython.base.fonts import load_font
|
|
27
|
+
|
|
28
|
+
__all__ = ["DetectionStyle", "class_color", "draw_detections"]
|
|
29
|
+
|
|
30
|
+
# Hand-picked Material-palette hues for common COCO classes so busy scenes read
|
|
31
|
+
# clearly. Any class not listed gets a deterministic colour from ``class_color``.
|
|
32
|
+
_RESERVED_COLORS: dict[str, tuple[int, int, int]] = {
|
|
33
|
+
"person": (76, 175, 80), # green
|
|
34
|
+
"bicycle": (0, 188, 212), # cyan
|
|
35
|
+
"car": (33, 150, 243), # blue
|
|
36
|
+
"motorcycle": (156, 39, 176), # purple
|
|
37
|
+
"bus": (255, 193, 7), # amber
|
|
38
|
+
"truck": (255, 87, 34), # deep orange
|
|
39
|
+
"cat": (233, 30, 99), # pink
|
|
40
|
+
"dog": (255, 152, 0), # orange
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def class_color(label: str) -> tuple[int, int, int]:
|
|
45
|
+
"""Deterministic RGB colour for a class label.
|
|
46
|
+
|
|
47
|
+
Common COCO classes get a reserved Material hue; everything else maps
|
|
48
|
+
``md5(label) -> HSV hue`` at fixed saturation/value. ``md5`` (not the
|
|
49
|
+
salted built-in ``hash``) is used so colours are stable across processes
|
|
50
|
+
and test runs.
|
|
51
|
+
"""
|
|
52
|
+
reserved = _RESERVED_COLORS.get(label)
|
|
53
|
+
if reserved is not None:
|
|
54
|
+
return reserved
|
|
55
|
+
digest = int(hashlib.md5(label.encode("utf-8")).hexdigest(), 16)
|
|
56
|
+
hue = (digest % 360) / 360.0
|
|
57
|
+
r, g, b = colorsys.hsv_to_rgb(hue, 0.7, 0.95)
|
|
58
|
+
return int(r * 255), int(g * 255), int(b * 255)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass(frozen=True)
|
|
62
|
+
class DetectionStyle:
|
|
63
|
+
"""Styling for :func:`draw_detections`.
|
|
64
|
+
|
|
65
|
+
Lengths expressed as a fraction of the frame's longer side are
|
|
66
|
+
resolution-independent: the same style reads consistently at 1080p and 4k.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
box_color: tuple[int, int, int] | None = None
|
|
70
|
+
"""Fixed ``(R, G, B)`` for every box, or ``None`` for per-class colours."""
|
|
71
|
+
line_thickness: float = 0.003
|
|
72
|
+
"""Box stroke width as a fraction of ``max(height, width)`` (~3px at 1080p)."""
|
|
73
|
+
show_confidence: bool = True
|
|
74
|
+
"""Append the confidence as a whole-number percent to each label."""
|
|
75
|
+
label_font_size: float = 0.022
|
|
76
|
+
"""Label text height as a fraction of ``max(height, width)`` (~24px at 1080p)."""
|
|
77
|
+
label_text_color: tuple[int, int, int] = (255, 255, 255)
|
|
78
|
+
"""Colour of the label text drawn on the chip."""
|
|
79
|
+
label_bg_alpha: int = 200
|
|
80
|
+
"""Opacity (0-255) of the label chip background."""
|
|
81
|
+
min_confidence: float = 0.0
|
|
82
|
+
"""Detections below this confidence are skipped."""
|
|
83
|
+
font: str | None = None
|
|
84
|
+
"""Bundled font name or path; ``None`` uses the default font."""
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def draw_detections(
|
|
88
|
+
frame: np.ndarray,
|
|
89
|
+
detections: list[DetectedObject],
|
|
90
|
+
style: DetectionStyle = DetectionStyle(),
|
|
91
|
+
) -> np.ndarray:
|
|
92
|
+
"""Return a copy of ``frame`` with ``detections`` drawn as labelled boxes.
|
|
93
|
+
|
|
94
|
+
Shape-preserving: the result is the same ``(H, W, 3)`` ``uint8`` array. An
|
|
95
|
+
empty ``detections`` list (or one filtered out by ``min_confidence``) is a
|
|
96
|
+
no-op that returns ``frame`` unchanged. Boxes are clamped to the frame, so
|
|
97
|
+
off-frame coordinates clip cleanly instead of raising. Label chips flip
|
|
98
|
+
inside the box when they would overflow the top edge and clamp horizontally
|
|
99
|
+
so they never leave the frame.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
frame: Source frame as ``(H, W, 3)`` ``uint8`` (RGB).
|
|
103
|
+
detections: Objects to draw; each uses its normalized ``bounding_box``.
|
|
104
|
+
style: Visual styling (colours, stroke width, label options).
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
A new ``(H, W, 3)`` ``uint8`` frame with the overlays composited on.
|
|
108
|
+
"""
|
|
109
|
+
if not detections:
|
|
110
|
+
return frame
|
|
111
|
+
|
|
112
|
+
h, w = frame.shape[:2]
|
|
113
|
+
scale = max(h, w)
|
|
114
|
+
thickness = max(1, round(style.line_thickness * scale))
|
|
115
|
+
font_px = max(8, round(style.label_font_size * scale))
|
|
116
|
+
font = load_font(style.font, font_px)
|
|
117
|
+
|
|
118
|
+
canvas = Image.new("RGBA", (w, h), (0, 0, 0, 0))
|
|
119
|
+
draw = ImageDraw.Draw(canvas)
|
|
120
|
+
|
|
121
|
+
drew_any = False
|
|
122
|
+
for det in detections:
|
|
123
|
+
box = det.bounding_box
|
|
124
|
+
if box is None or det.confidence < style.min_confidence:
|
|
125
|
+
continue
|
|
126
|
+
drew_any = True
|
|
127
|
+
color = style.box_color or class_color(det.label)
|
|
128
|
+
|
|
129
|
+
x0 = max(0, min(w - 1, int(box.x * w)))
|
|
130
|
+
y0 = max(0, min(h - 1, int(box.y * h)))
|
|
131
|
+
x1 = max(0, min(w - 1, int((box.x + box.width) * w)))
|
|
132
|
+
y1 = max(0, min(h - 1, int((box.y + box.height) * h)))
|
|
133
|
+
draw.rectangle((x0, y0, x1, y1), outline=(*color, 255), width=thickness)
|
|
134
|
+
|
|
135
|
+
text = det.label.title()
|
|
136
|
+
if style.show_confidence:
|
|
137
|
+
text = f"{text} {det.confidence * 100:.0f}%"
|
|
138
|
+
|
|
139
|
+
tb = draw.textbbox((0, 0), text, font=font)
|
|
140
|
+
text_w, text_h = tb[2] - tb[0], tb[3] - tb[1]
|
|
141
|
+
pad = max(2, thickness)
|
|
142
|
+
chip_w, chip_h = text_w + 2 * pad, text_h + 2 * pad
|
|
143
|
+
|
|
144
|
+
# Flip the chip inside the box when it would overflow the top edge,
|
|
145
|
+
# and clamp horizontally so it never leaves the frame.
|
|
146
|
+
chip_y = y0 - chip_h if y0 - chip_h >= 0 else y0
|
|
147
|
+
chip_x = max(0, min(x0, w - chip_w))
|
|
148
|
+
draw.rectangle(
|
|
149
|
+
(chip_x, chip_y, chip_x + chip_w, chip_y + chip_h),
|
|
150
|
+
fill=(*color, style.label_bg_alpha),
|
|
151
|
+
)
|
|
152
|
+
draw.text(
|
|
153
|
+
(chip_x + pad - tb[0], chip_y + pad - tb[1]),
|
|
154
|
+
text,
|
|
155
|
+
font=font,
|
|
156
|
+
fill=(*style.label_text_color, 255),
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
if not drew_any:
|
|
160
|
+
return frame
|
|
161
|
+
|
|
162
|
+
out = Image.fromarray(frame).convert("RGBA")
|
|
163
|
+
out.alpha_composite(canvas)
|
|
164
|
+
return np.array(out.convert("RGB"), dtype=np.uint8)
|
|
@@ -85,12 +85,26 @@ class PlanErrorCode(str, Enum):
|
|
|
85
85
|
instead of substring-matching the human message text.
|
|
86
86
|
"""
|
|
87
87
|
|
|
88
|
+
# Segment range vs source / shape.
|
|
88
89
|
SEGMENT_END_EXCEEDS_SOURCE = "segment_end_exceeds_source"
|
|
90
|
+
SEGMENT_NEGATIVE = "segment_negative"
|
|
91
|
+
SEGMENT_RANGE = "segment_range"
|
|
92
|
+
# Effect windows.
|
|
89
93
|
EFFECT_WINDOW_EXCEEDS_DURATION = "effect_window_exceeds_duration"
|
|
94
|
+
WINDOW_NEGATIVE = "window_negative"
|
|
95
|
+
WINDOW_ORDER = "window_order"
|
|
96
|
+
# Operation-level, metadata-relative checks.
|
|
90
97
|
CUT_EXCEEDS_DURATION = "cut_exceeds_duration"
|
|
98
|
+
OP_TIMESTAMP_OUT_OF_RANGE = "op_timestamp_out_of_range"
|
|
99
|
+
CROP_EXCEEDS_SOURCE = "crop_exceeds_source"
|
|
100
|
+
DEGENERATE_DURATION = "degenerate_duration"
|
|
101
|
+
SOURCE_UNREADABLE = "source_unreadable"
|
|
102
|
+
OP_PREDICTION_FAILED = "op_prediction_failed"
|
|
103
|
+
# Assembly / structural.
|
|
91
104
|
UNKNOWN_OP = "unknown_op"
|
|
92
105
|
CONCAT_MISMATCH = "concat_mismatch"
|
|
93
106
|
SUBTITLE_UNFITTABLE = "subtitle_unfittable"
|
|
107
|
+
POST_OP_REQUIRES_CONTEXT = "post_op_requires_context"
|
|
94
108
|
|
|
95
109
|
|
|
96
110
|
@dataclass
|
|
@@ -110,12 +124,37 @@ class PlanError:
|
|
|
110
124
|
predicted_duration: float | None = None
|
|
111
125
|
|
|
112
126
|
|
|
127
|
+
@dataclass
|
|
128
|
+
class PlanRepair:
|
|
129
|
+
"""A single change a repair/normalize pass made to a plan.
|
|
130
|
+
|
|
131
|
+
The structured changelog returned by :meth:`VideoEdit.repair` and
|
|
132
|
+
:meth:`VideoEdit.normalize_dimensions`. ``location`` is a path into the
|
|
133
|
+
plan (e.g. ``'segments[0].operations[1]'``); ``field`` is the changed
|
|
134
|
+
field (``'window.stop'``, ``'timestamp'``, ``'dimensions'``, ...). ``old``
|
|
135
|
+
and ``new`` carry the before/after values -- a ``float`` for numeric
|
|
136
|
+
clamps, a ``str`` for composite values like ``'768x432'``. ``code`` is the
|
|
137
|
+
:class:`PlanErrorCode` of the violation that was repaired, so a consumer
|
|
138
|
+
can surface "we trimmed your effect to fit" wording keyed on the class.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
location: str
|
|
142
|
+
field: str
|
|
143
|
+
old: float | str | None
|
|
144
|
+
new: float | str | None
|
|
145
|
+
code: PlanErrorCode
|
|
146
|
+
|
|
147
|
+
|
|
113
148
|
class PlanValidationError(ValueError):
|
|
114
149
|
"""Typed plan-validation failure carrying structured :class:`PlanError`s.
|
|
115
150
|
|
|
116
151
|
Subclasses ``ValueError`` so ``str(e)`` stays byte-identical to the bare
|
|
117
152
|
``ValueError`` prose emitted before this type existed -- existing
|
|
118
153
|
``pytest.raises(match=...)`` and consumer substring fallbacks keep working.
|
|
154
|
+
|
|
155
|
+
``str(e)`` is the first error's human message; ``.errors`` carries every
|
|
156
|
+
structured :class:`PlanError`. The non-raising :meth:`VideoEdit.check`
|
|
157
|
+
returns the same ``PlanError`` list directly.
|
|
119
158
|
"""
|
|
120
159
|
|
|
121
160
|
def __init__(self, message: str, errors: list[PlanError]):
|
|
@@ -29,6 +29,7 @@ from pydantic import Field, PrivateAttr, model_validator
|
|
|
29
29
|
from tqdm import tqdm
|
|
30
30
|
|
|
31
31
|
from videopython.base.description import BoundingBox
|
|
32
|
+
from videopython.base.exceptions import PlanError, PlanErrorCode, PlanValidationError
|
|
32
33
|
from videopython.base.fonts import load_font
|
|
33
34
|
from videopython.editing._easing import ease, ease_out
|
|
34
35
|
from videopython.editing.operation import Effect
|
|
@@ -860,7 +861,11 @@ class ImageOverlay(_AnchoredOverlay):
|
|
|
860
861
|
with Image.open(self.source) as im:
|
|
861
862
|
im.verify()
|
|
862
863
|
except (OSError, ValueError) as exc:
|
|
863
|
-
|
|
864
|
+
message = f"image_overlay source {str(self.source)!r} is not a readable image: {exc}"
|
|
865
|
+
raise PlanValidationError(
|
|
866
|
+
message,
|
|
867
|
+
[PlanError(code=PlanErrorCode.SOURCE_UNREADABLE, op=self.op, field="source")],
|
|
868
|
+
) from exc
|
|
864
869
|
return meta
|
|
865
870
|
|
|
866
871
|
def _rasterize_svg(self, target_w: int) -> np.ndarray:
|