videopython 0.33.1__tar.gz → 0.33.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. videopython-0.33.3/PKG-INFO +133 -0
  2. videopython-0.33.3/README.md +84 -0
  3. {videopython-0.33.1 → videopython-0.33.3}/pyproject.toml +6 -1
  4. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/generation/audio.py +1 -1
  5. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/generation/translation.py +1 -1
  6. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/understanding/audio.py +2 -2
  7. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/understanding/faces.py +11 -16
  8. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/understanding/image.py +2 -2
  9. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/audio/audio.py +4 -4
  10. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/base/_ffmpeg.py +5 -5
  11. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/base/_video_io.py +1 -1
  12. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/base/description.py +21 -20
  13. videopython-0.33.3/src/videopython/base/fonts/DejaVuSans.ttf +0 -0
  14. videopython-0.33.3/src/videopython/base/fonts/LICENSE_DEJAVU +99 -0
  15. videopython-0.33.3/src/videopython/base/fonts/__init__.py +58 -0
  16. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/base/image_text.py +22 -22
  17. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/base/transcription.py +10 -8
  18. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/base/video.py +2 -2
  19. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/editing/__init__.py +20 -0
  20. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/editing/effects.py +651 -8
  21. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/editing/operation.py +4 -5
  22. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/editing/streaming.py +8 -2
  23. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/editing/transcription_overlay.py +4 -1
  24. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/editing/transforms.py +2 -2
  25. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/editing/video_edit.py +2 -2
  26. videopython-0.33.1/PKG-INFO +0 -258
  27. videopython-0.33.1/README.md +0 -209
  28. {videopython-0.33.1 → videopython-0.33.3}/.gitignore +0 -0
  29. {videopython-0.33.1 → videopython-0.33.3}/LICENSE +0 -0
  30. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/__init__.py +0 -0
  31. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/__init__.py +0 -0
  32. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/_device.py +0 -0
  33. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/dubbing/__init__.py +0 -0
  34. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/dubbing/config.py +0 -0
  35. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/dubbing/dubber.py +0 -0
  36. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/dubbing/expressiveness.py +0 -0
  37. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/dubbing/loudness.py +0 -0
  38. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/dubbing/models.py +0 -0
  39. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/dubbing/pipeline.py +0 -0
  40. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/dubbing/quality.py +0 -0
  41. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/dubbing/remux.py +0 -0
  42. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/dubbing/timing.py +0 -0
  43. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/dubbing/voice_sample.py +0 -0
  44. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/generation/__init__.py +0 -0
  45. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/generation/image.py +0 -0
  46. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/generation/qwen3.py +0 -0
  47. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/generation/video.py +0 -0
  48. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/transforms.py +0 -0
  49. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/understanding/__init__.py +0 -0
  50. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/understanding/separation.py +0 -0
  51. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/understanding/temporal.py +0 -0
  52. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/video_analysis/__init__.py +0 -0
  53. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/video_analysis/analyzer.py +0 -0
  54. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/video_analysis/models.py +0 -0
  55. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/video_analysis/sampling.py +0 -0
  56. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/ai/video_analysis/stages.py +0 -0
  57. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/audio/__init__.py +0 -0
  58. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/audio/analysis.py +0 -0
  59. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/base/__init__.py +0 -0
  60. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/base/_dimensions.py +0 -0
  61. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/base/exceptions.py +0 -0
  62. {videopython-0.33.1 → videopython-0.33.3}/src/videopython/py.typed +0 -0
@@ -0,0 +1,133 @@
1
+ Metadata-Version: 2.4
2
+ Name: videopython
3
+ Version: 0.33.3
4
+ Summary: Minimal video generation and processing library.
5
+ Project-URL: Homepage, https://videopython.com
6
+ Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
7
+ Project-URL: Documentation, https://videopython.com
8
+ Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
9
+ License: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: ai,editing,generation,movie,opencv,python,shorts,video,videopython
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Requires-Python: <3.14,>=3.10
20
+ Requires-Dist: numpy>=1.25.2
21
+ Requires-Dist: opencv-python-headless>=4.9.0.80
22
+ Requires-Dist: pillow>=12.1.1
23
+ Requires-Dist: pydantic>=2.8.0
24
+ Requires-Dist: tqdm>=4.66.3
25
+ Provides-Extra: ai
26
+ Requires-Dist: accelerate>=0.29.2; extra == 'ai'
27
+ Requires-Dist: chatterbox-tts>=0.1.7; extra == 'ai'
28
+ Requires-Dist: demucs>=4.0.0; extra == 'ai'
29
+ Requires-Dist: diffusers>=0.30.0; extra == 'ai'
30
+ Requires-Dist: hf-transfer>=0.1.9; extra == 'ai'
31
+ Requires-Dist: imagehash>=4.3; extra == 'ai'
32
+ Requires-Dist: llama-cpp-python>=0.3.0; extra == 'ai'
33
+ Requires-Dist: numba>=0.61.0; extra == 'ai'
34
+ Requires-Dist: ollama>=0.4.5; extra == 'ai'
35
+ Requires-Dist: openai-whisper>=20240930; extra == 'ai'
36
+ Requires-Dist: pyannote-audio>=4.0.0; extra == 'ai'
37
+ Requires-Dist: pyloudnorm>=0.1.1; extra == 'ai'
38
+ Requires-Dist: qwen-vl-utils>=0.0.10; extra == 'ai'
39
+ Requires-Dist: scikit-learn>=1.3.0; extra == 'ai'
40
+ Requires-Dist: scipy>=1.10.0; extra == 'ai'
41
+ Requires-Dist: sentencepiece>=0.1.99; extra == 'ai'
42
+ Requires-Dist: silero-vad>=5.1; extra == 'ai'
43
+ Requires-Dist: torch>=2.8.0; extra == 'ai'
44
+ Requires-Dist: torchaudio>=2.8.0; extra == 'ai'
45
+ Requires-Dist: transformers>=5.2.0; extra == 'ai'
46
+ Requires-Dist: transnetv2-pytorch>=1.0.5; extra == 'ai'
47
+ Requires-Dist: ultralytics>=8.0.0; extra == 'ai'
48
+ Description-Content-Type: text/markdown
49
+
50
+ # videopython
51
+
52
+ [![PyPI](https://img.shields.io/pypi/v/videopython)](https://pypi.org/project/videopython/)
53
+ [![Python](https://img.shields.io/pypi/pyversions/videopython)](https://pypi.org/project/videopython/)
54
+ [![License](https://img.shields.io/github/license/BartWojtowicz/videopython)](LICENSE)
55
+
56
+ Minimal, LLM-friendly Python library for programmatic video editing, processing, and AI video workflows.
57
+
58
+ Full documentation: [videopython.com](https://videopython.com)
59
+
60
+ > **Disclaimer:** This project started as a hand-written hobby project, but most of the code is now produced by LLM agents. Humans still drive direction, approve changes, and own design decisions.
61
+
62
+ ## Installation
63
+
64
+ ```bash
65
+ # Install FFmpeg first (macOS: brew install ffmpeg | Debian: apt-get install ffmpeg)
66
+ pip install videopython # core video/audio editing
67
+ pip install "videopython[ai]" # + local AI features (GPU recommended)
68
+ ```
69
+
70
+ Python `>=3.10, <3.14`. AI features run locally — no cloud API keys required, but model weights are downloaded on first use.
71
+
72
+ ## Quick Start
73
+
74
+ ### JSON editing plans
75
+
76
+ A `VideoEdit` is a multi-segment plan, defined as a dict (or JSON), validated and executed against the source files:
77
+
78
+ ```python
79
+ from videopython.editing import VideoEdit
80
+
81
+ edit = VideoEdit.from_dict({
82
+ "segments": [{
83
+ "source": "raw.mp4",
84
+ "start": 10.0,
85
+ "end": 20.0,
86
+ "operations": [
87
+ {"op": "resize", "width": 1080, "height": 1920},
88
+ {"op": "color_adjust", "saturation": 1.15, "contrast": 1.05},
89
+ {"op": "fade", "mode": "in", "duration": 0.5},
90
+ ],
91
+ }],
92
+ })
93
+ edit.validate() # dry-run via metadata, no frames loaded
94
+ edit.run_to_file("output.mp4") # streams ffmpeg decode → effects → encode
95
+ ```
96
+
97
+ `run_to_file()` streams ffmpeg decode → per-frame effects → encode, so memory stays bounded even for hour-long sources. Use `edit.run()` to get a `Video` back in memory instead.
98
+
99
+ ### AI generation
100
+
101
+ ```python
102
+ from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
103
+
104
+ image = TextToImage().generate_image("A cinematic mountain sunrise")
105
+ video = ImageToVideo().generate_video(image=image)
106
+ audio = TextToSpeech().generate_audio("Welcome to videopython.")
107
+ video.add_audio(audio).save("ai_video.mp4")
108
+ ```
109
+
110
+ ## LLM & AI Agent Integration
111
+
112
+ Every operation is a Pydantic model whose fields ARE the JSON wire format. `VideoEdit.json_schema()` returns a JSON Schema with a discriminated union over every registered `Operation` — pass it straight to Anthropic tool use, OpenAI function calling, or any structured-output API. Then `edit.validate()` dry-runs the plan via metadata before any frames are loaded, so a failed LLM output can be fed back as an error and retried cheaply.
113
+
114
+ See the [LLM Integration Guide](https://videopython.com/guides/llm-integration/) for end-to-end examples, validation error loops, and operation discovery patterns.
115
+
116
+ ## Features
117
+
118
+ - **`videopython.base`** — `Video`, `VideoMetadata`, `FrameIterator`, `ImageText`, `Transcription`, and shared result types (`BoundingBox`, `FaceTrack`, `SceneBoundary`, ...). No AI dependencies.
119
+ - **`videopython.audio`** — `Audio` with overlay, concat, normalize, time-stretch, silence detection, segment classification.
120
+ - **`videopython.editing`** — `Operation`/`Effect` foundation, `VideoEdit` plan runner with JSON Schema + streaming execution. Transforms (cut, resize, crop, fps, speed, reverse, freeze, silence removal) and effects (blur, zoom, color grading, vignette, Ken Burns, fade, overlays, animated subtitles).
121
+ - **`videopython.ai`** *(install with `[ai]`)* — generation (`TextToVideo`, `ImageToVideo`, `TextToImage`, `TextToSpeech`, `TextToMusic`), understanding (`AudioToText`, `AudioClassifier`, `SceneVLM`, `FaceTracker`, `SemanticSceneDetector`), `FaceTrackingCrop` transform, and the full-pipeline `VideoAnalyzer`.
122
+ - **`videopython.ai.dubbing`** — `VideoDubber` for voice-cloned revoicing with timing sync.
123
+
124
+ ## Examples
125
+
126
+ - [Social Media Clip](https://videopython.com/examples/social-clip/)
127
+ - [AI-Generated Video](https://videopython.com/examples/ai-video/)
128
+ - [Auto-Subtitles](https://videopython.com/examples/auto-subtitles/)
129
+ - [Processing Large Videos](https://videopython.com/examples/large-videos/)
130
+
131
+ ## Development
132
+
133
+ See [`DEVELOPMENT.md`](DEVELOPMENT.md) for local setup, testing, and contribution workflow.
@@ -0,0 +1,84 @@
1
+ # videopython
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/videopython)](https://pypi.org/project/videopython/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/videopython)](https://pypi.org/project/videopython/)
5
+ [![License](https://img.shields.io/github/license/BartWojtowicz/videopython)](LICENSE)
6
+
7
+ Minimal, LLM-friendly Python library for programmatic video editing, processing, and AI video workflows.
8
+
9
+ Full documentation: [videopython.com](https://videopython.com)
10
+
11
+ > **Disclaimer:** This project started as a hand-written hobby project, but most of the code is now produced by LLM agents. Humans still drive direction, approve changes, and own design decisions.
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ # Install FFmpeg first (macOS: brew install ffmpeg | Debian: apt-get install ffmpeg)
17
+ pip install videopython # core video/audio editing
18
+ pip install "videopython[ai]" # + local AI features (GPU recommended)
19
+ ```
20
+
21
+ Python `>=3.10, <3.14`. AI features run locally — no cloud API keys required, but model weights are downloaded on first use.
22
+
23
+ ## Quick Start
24
+
25
+ ### JSON editing plans
26
+
27
+ A `VideoEdit` is a multi-segment plan, defined as a dict (or JSON), validated and executed against the source files:
28
+
29
+ ```python
30
+ from videopython.editing import VideoEdit
31
+
32
+ edit = VideoEdit.from_dict({
33
+ "segments": [{
34
+ "source": "raw.mp4",
35
+ "start": 10.0,
36
+ "end": 20.0,
37
+ "operations": [
38
+ {"op": "resize", "width": 1080, "height": 1920},
39
+ {"op": "color_adjust", "saturation": 1.15, "contrast": 1.05},
40
+ {"op": "fade", "mode": "in", "duration": 0.5},
41
+ ],
42
+ }],
43
+ })
44
+ edit.validate() # dry-run via metadata, no frames loaded
45
+ edit.run_to_file("output.mp4") # streams ffmpeg decode → effects → encode
46
+ ```
47
+
48
+ `run_to_file()` streams ffmpeg decode → per-frame effects → encode, so memory stays bounded even for hour-long sources. Use `edit.run()` to get a `Video` back in memory instead.
49
+
50
+ ### AI generation
51
+
52
+ ```python
53
+ from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
54
+
55
+ image = TextToImage().generate_image("A cinematic mountain sunrise")
56
+ video = ImageToVideo().generate_video(image=image)
57
+ audio = TextToSpeech().generate_audio("Welcome to videopython.")
58
+ video.add_audio(audio).save("ai_video.mp4")
59
+ ```
60
+
61
+ ## LLM & AI Agent Integration
62
+
63
+ Every operation is a Pydantic model whose fields ARE the JSON wire format. `VideoEdit.json_schema()` returns a JSON Schema with a discriminated union over every registered `Operation` — pass it straight to Anthropic tool use, OpenAI function calling, or any structured-output API. Then `edit.validate()` dry-runs the plan via metadata before any frames are loaded, so a failed LLM output can be fed back as an error and retried cheaply.
64
+
65
+ See the [LLM Integration Guide](https://videopython.com/guides/llm-integration/) for end-to-end examples, validation error loops, and operation discovery patterns.
66
+
67
+ ## Features
68
+
69
+ - **`videopython.base`** — `Video`, `VideoMetadata`, `FrameIterator`, `ImageText`, `Transcription`, and shared result types (`BoundingBox`, `FaceTrack`, `SceneBoundary`, ...). No AI dependencies.
70
+ - **`videopython.audio`** — `Audio` with overlay, concat, normalize, time-stretch, silence detection, segment classification.
71
+ - **`videopython.editing`** — `Operation`/`Effect` foundation, `VideoEdit` plan runner with JSON Schema + streaming execution. Transforms (cut, resize, crop, fps, speed, reverse, freeze, silence removal) and effects (blur, zoom, color grading, vignette, Ken Burns, fade, overlays, animated subtitles).
72
+ - **`videopython.ai`** *(install with `[ai]`)* — generation (`TextToVideo`, `ImageToVideo`, `TextToImage`, `TextToSpeech`, `TextToMusic`), understanding (`AudioToText`, `AudioClassifier`, `SceneVLM`, `FaceTracker`, `SemanticSceneDetector`), `FaceTrackingCrop` transform, and the full-pipeline `VideoAnalyzer`.
73
+ - **`videopython.ai.dubbing`** — `VideoDubber` for voice-cloned revoicing with timing sync.
74
+
75
+ ## Examples
76
+
77
+ - [Social Media Clip](https://videopython.com/examples/social-clip/)
78
+ - [AI-Generated Video](https://videopython.com/examples/ai-video/)
79
+ - [Auto-Subtitles](https://videopython.com/examples/auto-subtitles/)
80
+ - [Processing Large Videos](https://videopython.com/examples/large-videos/)
81
+
82
+ ## Development
83
+
84
+ See [`DEVELOPMENT.md`](DEVELOPMENT.md) for local setup, testing, and contribution workflow.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "videopython"
3
- version = "0.33.1"
3
+ version = "0.33.3"
4
4
  description = "Minimal video generation and processing library."
5
5
  authors = [
6
6
  { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -137,6 +137,9 @@ Documentation = "https://videopython.com"
137
137
  [tool.mypy]
138
138
  mypy_path = "src/stubs"
139
139
  plugins = ["pydantic.mypy"]
140
+ warn_unused_ignores = true
141
+ warn_redundant_casts = true
142
+ disallow_any_generics = true
140
143
 
141
144
  [[tool.mypy.overrides]]
142
145
  module = [
@@ -183,9 +186,11 @@ build-backend = "hatchling.build"
183
186
 
184
187
  [tool.hatch.build.targets.wheel]
185
188
  packages = ["src/videopython"]
189
+ artifacts = ["src/videopython/base/fonts/*.ttf", "src/videopython/base/fonts/LICENSE_DEJAVU"]
186
190
 
187
191
  [tool.hatch.build.targets.sdist]
188
192
  include = ["src/videopython", "src/videopython/py.typed"]
193
+ artifacts = ["src/videopython/base/fonts/*.ttf", "src/videopython/base/fonts/LICENSE_DEJAVU"]
189
194
 
190
195
  [tool.pytest.ini_options]
191
196
  pythonpath = ["src/"]
@@ -33,7 +33,7 @@ class TextToSpeech:
33
33
  self._model: Any = None
34
34
 
35
35
  def _init_local(self) -> None:
36
- from chatterbox.mtl_tts import ChatterboxMultilingualTTS # type: ignore[import-untyped]
36
+ from chatterbox.mtl_tts import ChatterboxMultilingualTTS
37
37
 
38
38
  requested_device = self.device
39
39
  device = select_device(self.device, mps_allowed=False)
@@ -170,7 +170,7 @@ class MarianTranslator:
170
170
  return f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
171
171
 
172
172
  def _init_local(self, source_lang: str, target_lang: str) -> None:
173
- from transformers import MarianMTModel, MarianTokenizer # type: ignore[attr-defined]
173
+ from transformers import MarianMTModel, MarianTokenizer
174
174
 
175
175
  model_name = self._get_local_model_name(source_lang, target_lang)
176
176
 
@@ -188,7 +188,7 @@ class AudioToText:
188
188
  def _init_diarization(self) -> None:
189
189
  """Initialize pyannote speaker diarization pipeline."""
190
190
  import torch
191
- from pyannote.audio import Pipeline # type: ignore[import-untyped]
191
+ from pyannote.audio import Pipeline
192
192
 
193
193
  self._diarization_pipeline = Pipeline.from_pretrained(self.PYANNOTE_DIARIZATION_MODEL)
194
194
  self._diarization_pipeline.to(torch.device(self.device))
@@ -214,7 +214,7 @@ class AudioToText:
214
214
  self._vad_model = None
215
215
  release_device_memory(self.device)
216
216
 
217
- def _process_transcription_result(self, transcription_result: dict) -> Transcription:
217
+ def _process_transcription_result(self, transcription_result: dict[str, Any]) -> Transcription:
218
218
  """Process raw transcription result into a Transcription object."""
219
219
  transcription_segments = []
220
220
  for segment in transcription_result["segments"]:
@@ -237,7 +237,7 @@ class FaceTracker:
237
237
 
238
238
  def _select_face(
239
239
  self,
240
- faces: list,
240
+ faces: list[DetectedFace],
241
241
  frame_width: int,
242
242
  frame_height: int,
243
243
  ) -> tuple[float, float, float, float] | None:
@@ -251,29 +251,24 @@ class FaceTracker:
251
251
  Returns:
252
252
  Tuple of (center_x, center_y, width, height) in normalized coords, or None.
253
253
  """
254
- if not faces:
254
+ faces_with_box = [(f, f.bounding_box) for f in faces if f.bounding_box is not None]
255
+ if not faces_with_box:
255
256
  return None
256
257
 
257
258
  if self.selection_strategy == "largest":
258
- face = faces[0]
259
+ _, bbox = faces_with_box[0]
259
260
  elif self.selection_strategy == "centered":
260
261
  frame_center = (0.5, 0.5)
261
- face = min(
262
- faces,
263
- key=lambda f: (
264
- (f.bounding_box.center[0] - frame_center[0]) ** 2
265
- + (f.bounding_box.center[1] - frame_center[1]) ** 2
266
- ),
262
+ _, bbox = min(
263
+ faces_with_box,
264
+ key=lambda fb: ((fb[1].center[0] - frame_center[0]) ** 2 + (fb[1].center[1] - frame_center[1]) ** 2),
267
265
  )
268
266
  elif self.selection_strategy == "index":
269
- if self.face_index < len(faces):
270
- face = faces[self.face_index]
271
- else:
272
- face = faces[0]
267
+ idx = self.face_index if self.face_index < len(faces_with_box) else 0
268
+ _, bbox = faces_with_box[idx]
273
269
  else:
274
- face = faces[0]
270
+ _, bbox = faces_with_box[0]
275
271
 
276
- bbox = face.bounding_box
277
272
  return (bbox.center[0], bbox.center[1], bbox.width, bbox.height)
278
273
 
279
274
  def detect_and_track(
@@ -407,7 +402,7 @@ class FaceTracker:
407
402
 
408
403
  sampled_frames = [frames[i] for i in sample_indices]
409
404
 
410
- sampled_detections: list[list] = []
405
+ sampled_detections: list[list[DetectedFace]] = []
411
406
  for batch_start in range(0, len(sampled_frames), self.batch_size):
412
407
  batch_end = min(batch_start + self.batch_size, len(sampled_frames))
413
408
  batch = sampled_frames[batch_start:batch_end]
@@ -151,7 +151,7 @@ class SceneVLM:
151
151
  def _init_local(self) -> None:
152
152
  """Initialize local Qwen3.5 model."""
153
153
  import torch
154
- from transformers import AutoModelForImageTextToText, AutoProcessor # type: ignore[attr-defined]
154
+ from transformers import AutoModelForImageTextToText, AutoProcessor
155
155
 
156
156
  t0 = time.perf_counter()
157
157
  requested_device = self.device
@@ -275,7 +275,7 @@ class SceneVLM:
275
275
  def _generate_from_message_batch(self, messages_batch: list[list[dict[str, Any]]]) -> list[str]:
276
276
  """Run batch generation for one or more multimodal chat messages."""
277
277
  import torch
278
- from qwen_vl_utils import process_vision_info # type: ignore
278
+ from qwen_vl_utils import process_vision_info
279
279
 
280
280
  if self._model is None:
281
281
  self._init_local()
@@ -5,7 +5,7 @@ import subprocess
5
5
  import wave
6
6
  from dataclasses import dataclass
7
7
  from pathlib import Path
8
- from typing import TYPE_CHECKING
8
+ from typing import TYPE_CHECKING, Any
9
9
 
10
10
  import numpy as np
11
11
 
@@ -69,7 +69,7 @@ class Audio:
69
69
  return bool(np.all(np.abs(self.data) < 1e-7))
70
70
 
71
71
  @staticmethod
72
- def _get_ffmpeg_info(file_path: Path) -> dict:
72
+ def _get_ffmpeg_info(file_path: Path) -> dict[str, Any]:
73
73
  """Get audio metadata using ffprobe"""
74
74
  try:
75
75
  info = _ffmpeg.probe(file_path)
@@ -483,7 +483,7 @@ class Audio:
483
483
  if first.metadata.channels == 1:
484
484
  output = np.zeros(total_samples, dtype=np.float32)
485
485
  else:
486
- output = np.zeros((total_samples, 2), dtype=np.float32) # type: ignore
486
+ output = np.zeros((total_samples, 2), dtype=np.float32)
487
487
 
488
488
  # Copy non-crossfaded portions
489
489
  crossfade_start = len(first.data) - crossfade_samples
@@ -761,7 +761,7 @@ class Audio:
761
761
  if base.metadata.channels == 1:
762
762
  output = np.zeros(total_length, dtype=np.float32)
763
763
  else:
764
- output = np.zeros((total_length, 2), dtype=np.float32) # type: ignore
764
+ output = np.zeros((total_length, 2), dtype=np.float32)
765
765
 
766
766
  # Copy base audio
767
767
  output[: len(base.data)] = base.data
@@ -13,7 +13,7 @@ import json
13
13
  import subprocess
14
14
  from contextlib import contextmanager
15
15
  from pathlib import Path
16
- from typing import Iterator, Sequence
16
+ from typing import Any, Iterator, Sequence
17
17
 
18
18
  from videopython.base.exceptions import FFmpegProbeError, FFmpegRunError
19
19
 
@@ -44,7 +44,7 @@ def run(cmd: Sequence[str], *, stdin: bytes | None = None) -> bytes:
44
44
  return result.stdout
45
45
 
46
46
 
47
- def probe(path: str | Path, *, extra_args: Sequence[str] | None = None) -> dict:
47
+ def probe(path: str | Path, *, extra_args: Sequence[str] | None = None) -> dict[str, Any]:
48
48
  """Run ffprobe and return the parsed JSON payload.
49
49
 
50
50
  Args:
@@ -76,7 +76,7 @@ def probe(path: str | Path, *, extra_args: Sequence[str] | None = None) -> dict:
76
76
  raise FFmpegProbeError(f"Error parsing ffprobe output: {e}") from e
77
77
 
78
78
 
79
- def _terminate(proc: subprocess.Popen, *, timeout: float = 5) -> None:
79
+ def _terminate(proc: subprocess.Popen[bytes], *, timeout: float = 5) -> None:
80
80
  """Terminate a still-running process, escalating to kill after ``timeout``."""
81
81
  if proc.poll() is None:
82
82
  proc.terminate()
@@ -88,7 +88,7 @@ def _terminate(proc: subprocess.Popen, *, timeout: float = 5) -> None:
88
88
 
89
89
 
90
90
  @contextmanager
91
- def popen_decode(cmd: Sequence[str], *, bufsize: int = -1) -> Iterator[subprocess.Popen]:
91
+ def popen_decode(cmd: Sequence[str], *, bufsize: int = -1) -> Iterator[subprocess.Popen[bytes]]:
92
92
  """Context manager wrapping an ffmpeg decode process.
93
93
 
94
94
  Yields a Popen with ``stdout=PIPE`` and ``stderr=DEVNULL``. Callers
@@ -116,7 +116,7 @@ def popen_decode(cmd: Sequence[str], *, bufsize: int = -1) -> Iterator[subproces
116
116
 
117
117
 
118
118
  @contextmanager
119
- def popen_encode(cmd: Sequence[str]) -> Iterator[subprocess.Popen]:
119
+ def popen_encode(cmd: Sequence[str]) -> Iterator[subprocess.Popen[bytes]]:
120
120
  """Context manager wrapping an ffmpeg encode process via stdin pipe.
121
121
 
122
122
  Yields a Popen with ``stdin=PIPE``, ``stdout=DEVNULL``, and
@@ -173,7 +173,7 @@ def decode_video(
173
173
  if frames_read == 0:
174
174
  raise ValueError("No frames were read from the video")
175
175
 
176
- frames = frames[:frames_read] # type: ignore
176
+ frames = frames[:frames_read]
177
177
 
178
178
  try:
179
179
  audio = Audio.from_path(path)
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from dataclasses import dataclass, field
4
+ from typing import Any
4
5
 
5
6
  from pydantic import BaseModel, ConfigDict, Field
6
7
 
@@ -49,7 +50,7 @@ class SceneBoundary:
49
50
  """Number of frames in this scene."""
50
51
  return self.end_frame - self.start_frame
51
52
 
52
- def to_dict(self) -> dict:
53
+ def to_dict(self) -> dict[str, Any]:
53
54
  """Convert to dictionary for JSON serialization."""
54
55
  return {
55
56
  "start": self.start,
@@ -59,7 +60,7 @@ class SceneBoundary:
59
60
  }
60
61
 
61
62
  @classmethod
62
- def from_dict(cls, data: dict) -> "SceneBoundary":
63
+ def from_dict(cls, data: dict[str, Any]) -> "SceneBoundary":
63
64
  """Create SceneBoundary from dictionary."""
64
65
  return cls(
65
66
  start=data["start"],
@@ -95,12 +96,12 @@ class BoundingBox(BaseModel):
95
96
  """Area of the bounding box (normalized)."""
96
97
  return self.width * self.height
97
98
 
98
- def to_dict(self) -> dict:
99
+ def to_dict(self) -> dict[str, Any]:
99
100
  """Backwards-compat alias for ``model_dump()``."""
100
101
  return self.model_dump()
101
102
 
102
103
  @classmethod
103
- def from_dict(cls, data: dict) -> BoundingBox:
104
+ def from_dict(cls, data: dict[str, Any]) -> BoundingBox:
104
105
  """Backwards-compat alias for ``model_validate(data)``."""
105
106
  return cls.model_validate(data)
106
107
 
@@ -119,7 +120,7 @@ class DetectedObject:
119
120
  confidence: float
120
121
  bounding_box: BoundingBox | None = None
121
122
 
122
- def to_dict(self) -> dict:
123
+ def to_dict(self) -> dict[str, Any]:
123
124
  """Convert to dictionary for JSON serialization."""
124
125
  return {
125
126
  "label": self.label,
@@ -128,7 +129,7 @@ class DetectedObject:
128
129
  }
129
130
 
130
131
  @classmethod
131
- def from_dict(cls, data: dict) -> DetectedObject:
132
+ def from_dict(cls, data: dict[str, Any]) -> DetectedObject:
132
133
  """Create DetectedObject from dictionary."""
133
134
  return cls(
134
135
  label=data["label"],
@@ -160,7 +161,7 @@ class DetectedFace:
160
161
  """Area of the face bounding box (normalized), or None if no bounding box."""
161
162
  return self.bounding_box.area if self.bounding_box else None
162
163
 
163
- def to_dict(self) -> dict:
164
+ def to_dict(self) -> dict[str, Any]:
164
165
  """Convert to dictionary for JSON serialization."""
165
166
  return {
166
167
  "bounding_box": self.bounding_box.to_dict() if self.bounding_box else None,
@@ -168,7 +169,7 @@ class DetectedFace:
168
169
  }
169
170
 
170
171
  @classmethod
171
- def from_dict(cls, data: dict) -> DetectedFace:
172
+ def from_dict(cls, data: dict[str, Any]) -> DetectedFace:
172
173
  """Create DetectedFace from dictionary."""
173
174
  return cls(
174
175
  bounding_box=BoundingBox.from_dict(data["bounding_box"]) if data.get("bounding_box") else None,
@@ -190,7 +191,7 @@ class DetectedText:
190
191
  confidence: float
191
192
  bounding_box: BoundingBox | None = None
192
193
 
193
- def to_dict(self) -> dict:
194
+ def to_dict(self) -> dict[str, Any]:
194
195
  """Convert to dictionary for JSON serialization."""
195
196
  return {
196
197
  "text": self.text,
@@ -199,7 +200,7 @@ class DetectedText:
199
200
  }
200
201
 
201
202
  @classmethod
202
- def from_dict(cls, data: dict) -> "DetectedText":
203
+ def from_dict(cls, data: dict[str, Any]) -> "DetectedText":
203
204
  """Create DetectedText from dictionary."""
204
205
  return cls(
205
206
  text=data["text"],
@@ -229,7 +230,7 @@ class AudioEvent:
229
230
  """Duration of the audio event in seconds."""
230
231
  return self.end - self.start
231
232
 
232
- def to_dict(self) -> dict:
233
+ def to_dict(self) -> dict[str, Any]:
233
234
  """Convert to dictionary for JSON serialization."""
234
235
  return {
235
236
  "start": self.start,
@@ -239,7 +240,7 @@ class AudioEvent:
239
240
  }
240
241
 
241
242
  @classmethod
242
- def from_dict(cls, data: dict) -> AudioEvent:
243
+ def from_dict(cls, data: dict[str, Any]) -> AudioEvent:
243
244
  """Create AudioEvent from dictionary."""
244
245
  return cls(
245
246
  start=data["start"],
@@ -261,7 +262,7 @@ class AudioClassification:
261
262
  events: list[AudioEvent]
262
263
  clip_predictions: dict[str, float] = field(default_factory=dict)
263
264
 
264
- def to_dict(self) -> dict:
265
+ def to_dict(self) -> dict[str, Any]:
265
266
  """Convert to dictionary for JSON serialization."""
266
267
  return {
267
268
  "events": [event.to_dict() for event in self.events],
@@ -269,7 +270,7 @@ class AudioClassification:
269
270
  }
270
271
 
271
272
  @classmethod
272
- def from_dict(cls, data: dict) -> "AudioClassification":
273
+ def from_dict(cls, data: dict[str, Any]) -> "AudioClassification":
273
274
  """Create AudioClassification from dictionary."""
274
275
  return cls(
275
276
  events=[AudioEvent.from_dict(event) for event in data.get("events", [])],
@@ -306,7 +307,7 @@ class MotionInfo:
306
307
  """Check if this frame has significant motion."""
307
308
  return self.motion_type != "static"
308
309
 
309
- def to_dict(self) -> dict:
310
+ def to_dict(self) -> dict[str, Any]:
310
311
  """Convert to dictionary for JSON serialization."""
311
312
  return {
312
313
  "motion_type": self.motion_type,
@@ -315,7 +316,7 @@ class MotionInfo:
315
316
  }
316
317
 
317
318
  @classmethod
318
- def from_dict(cls, data: dict) -> MotionInfo:
319
+ def from_dict(cls, data: dict[str, Any]) -> MotionInfo:
319
320
  """Create MotionInfo from dictionary."""
320
321
  return cls(
321
322
  motion_type=data["motion_type"],
@@ -344,7 +345,7 @@ class SceneDescription:
344
345
  subjects: list[str] = field(default_factory=list)
345
346
  shot_type: str | None = None
346
347
 
347
- def to_dict(self) -> dict:
348
+ def to_dict(self) -> dict[str, Any]:
348
349
  return {
349
350
  "caption": self.caption,
350
351
  "subjects": list(self.subjects),
@@ -352,7 +353,7 @@ class SceneDescription:
352
353
  }
353
354
 
354
355
  @classmethod
355
- def from_dict(cls, data: dict) -> "SceneDescription":
356
+ def from_dict(cls, data: dict[str, Any]) -> "SceneDescription":
356
357
  return cls(
357
358
  caption=str(data["caption"]),
358
359
  subjects=[str(s) for s in data.get("subjects", [])],
@@ -386,7 +387,7 @@ class FaceTrack:
386
387
  """Number of frames in this track."""
387
388
  return len(self.frame_indices)
388
389
 
389
- def to_dict(self) -> dict:
390
+ def to_dict(self) -> dict[str, Any]:
390
391
  return {
391
392
  "track_id": self.track_id,
392
393
  "frame_indices": list(self.frame_indices),
@@ -395,7 +396,7 @@ class FaceTrack:
395
396
  }
396
397
 
397
398
  @classmethod
398
- def from_dict(cls, data: dict) -> "FaceTrack":
399
+ def from_dict(cls, data: dict[str, Any]) -> "FaceTrack":
399
400
  return cls(
400
401
  track_id=int(data["track_id"]),
401
402
  frame_indices=[int(i) for i in data.get("frame_indices", [])],