videopython 0.33.0__tar.gz → 0.33.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {videopython-0.33.0 → videopython-0.33.2}/PKG-INFO +1 -1
  2. {videopython-0.33.0 → videopython-0.33.2}/pyproject.toml +4 -1
  3. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/generation/audio.py +14 -9
  4. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/generation/image.py +6 -1
  5. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/generation/translation.py +2 -2
  6. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/generation/video.py +21 -13
  7. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/understanding/audio.py +11 -2
  8. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/understanding/faces.py +11 -16
  9. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/understanding/image.py +4 -13
  10. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/understanding/temporal.py +12 -6
  11. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/audio/audio.py +4 -4
  12. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/base/_ffmpeg.py +5 -5
  13. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/base/_video_io.py +1 -1
  14. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/base/description.py +21 -20
  15. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/base/transcription.py +10 -8
  16. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/base/video.py +2 -2
  17. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/editing/__init__.py +20 -0
  18. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/editing/effects.py +649 -2
  19. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/editing/operation.py +4 -5
  20. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/editing/streaming.py +8 -2
  21. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/editing/transforms.py +2 -2
  22. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/editing/video_edit.py +2 -2
  23. {videopython-0.33.0 → videopython-0.33.2}/.gitignore +0 -0
  24. {videopython-0.33.0 → videopython-0.33.2}/LICENSE +0 -0
  25. {videopython-0.33.0 → videopython-0.33.2}/README.md +0 -0
  26. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/__init__.py +0 -0
  27. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/__init__.py +0 -0
  28. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/_device.py +0 -0
  29. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/dubbing/__init__.py +0 -0
  30. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/dubbing/config.py +0 -0
  31. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/dubbing/dubber.py +0 -0
  32. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/dubbing/expressiveness.py +0 -0
  33. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/dubbing/loudness.py +0 -0
  34. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/dubbing/models.py +0 -0
  35. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/dubbing/pipeline.py +0 -0
  36. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/dubbing/quality.py +0 -0
  37. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/dubbing/remux.py +0 -0
  38. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/dubbing/timing.py +0 -0
  39. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/dubbing/voice_sample.py +0 -0
  40. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/generation/__init__.py +0 -0
  41. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/generation/qwen3.py +0 -0
  42. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/transforms.py +0 -0
  43. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/understanding/__init__.py +0 -0
  44. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/understanding/separation.py +0 -0
  45. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/video_analysis/__init__.py +0 -0
  46. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/video_analysis/analyzer.py +0 -0
  47. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/video_analysis/models.py +0 -0
  48. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/video_analysis/sampling.py +0 -0
  49. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/ai/video_analysis/stages.py +0 -0
  50. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/audio/__init__.py +0 -0
  51. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/audio/analysis.py +0 -0
  52. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/base/__init__.py +0 -0
  53. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/base/_dimensions.py +0 -0
  54. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/base/exceptions.py +0 -0
  55. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/base/image_text.py +0 -0
  56. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/editing/transcription_overlay.py +0 -0
  57. {videopython-0.33.0 → videopython-0.33.2}/src/videopython/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videopython
3
- Version: 0.33.0
3
+ Version: 0.33.2
4
4
  Summary: Minimal video generation and processing library.
5
5
  Project-URL: Homepage, https://videopython.com
6
6
  Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "videopython"
3
- version = "0.33.0"
3
+ version = "0.33.2"
4
4
  description = "Minimal video generation and processing library."
5
5
  authors = [
6
6
  { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -137,6 +137,9 @@ Documentation = "https://videopython.com"
137
137
  [tool.mypy]
138
138
  mypy_path = "src/stubs"
139
139
  plugins = ["pydantic.mypy"]
140
+ warn_unused_ignores = true
141
+ warn_redundant_casts = true
142
+ disallow_any_generics = true
140
143
 
141
144
  [[tool.mypy.overrides]]
142
145
  module = [
@@ -32,8 +32,8 @@ class TextToSpeech:
32
32
  self.language = language
33
33
  self._model: Any = None
34
34
 
35
- def _init_model(self) -> None:
36
- from chatterbox.mtl_tts import ChatterboxMultilingualTTS # type: ignore[import-untyped]
35
+ def _init_local(self) -> None:
36
+ from chatterbox.mtl_tts import ChatterboxMultilingualTTS
37
37
 
38
38
  requested_device = self.device
39
39
  device = select_device(self.device, mps_allowed=False)
@@ -83,7 +83,7 @@ class TextToSpeech:
83
83
  import numpy as np
84
84
 
85
85
  if self._model is None:
86
- self._init_model()
86
+ self._init_local()
87
87
 
88
88
  speaker_wav_path: Path | None = None
89
89
  cleanup_path = False
@@ -149,7 +149,6 @@ class TextToMusic:
149
149
  self.device = device
150
150
  self._processor: Any = None
151
151
  self._model: Any = None
152
- self._device: str | None = None
153
152
 
154
153
  def _init_local(self) -> None:
155
154
  """Initialize local MusicGen model."""
@@ -160,17 +159,17 @@ class TextToMusic:
160
159
  os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
161
160
 
162
161
  requested_device = self.device
163
- self._device = select_device(self.device, mps_allowed=True)
162
+ device = select_device(self.device, mps_allowed=True)
164
163
 
165
164
  model_name = "facebook/musicgen-small"
166
165
  self._processor = AutoProcessor.from_pretrained(model_name)
167
166
  self._model = MusicgenForConditionalGeneration.from_pretrained(model_name)
168
- self._model.to(self._device)
169
- self.device = self._device
167
+ self._model.to(device)
168
+ self.device = device
170
169
  log_device_initialization(
171
170
  "TextToMusic",
172
171
  requested_device=requested_device,
173
- resolved_device=self._device,
172
+ resolved_device=device,
174
173
  )
175
174
 
176
175
  def generate_audio(self, text: str, max_new_tokens: int = 256) -> Audio:
@@ -179,7 +178,7 @@ class TextToMusic:
179
178
  self._init_local()
180
179
 
181
180
  inputs = self._processor(text=[text], padding=True, return_tensors="pt")
182
- inputs = {k: v.to(self._device) if hasattr(v, "to") else v for k, v in inputs.items()}
181
+ inputs = {k: v.to(self.device) if hasattr(v, "to") else v for k, v in inputs.items()}
183
182
  audio_values = self._model.generate(**inputs, max_new_tokens=max_new_tokens)
184
183
  sampling_rate = self._model.config.audio_encoder.sampling_rate
185
184
 
@@ -193,3 +192,9 @@ class TextToMusic:
193
192
  frame_count=len(audio_data),
194
193
  )
195
194
  return Audio(audio_data, metadata)
195
+
196
+ def unload(self) -> None:
197
+ """Release the MusicGen model so the next generate_audio() re-initializes."""
198
+ self._model = None
199
+ self._processor = None
200
+ release_device_memory(self.device)
@@ -6,7 +6,7 @@ from typing import Any
6
6
 
7
7
  from PIL import Image
8
8
 
9
- from videopython.ai._device import log_device_initialization, select_device
9
+ from videopython.ai._device import log_device_initialization, release_device_memory, select_device
10
10
 
11
11
 
12
12
  class TextToImage:
@@ -49,3 +49,8 @@ class TextToImage:
49
49
  if self._pipeline is None:
50
50
  self._init_local()
51
51
  return self._pipeline(prompt=prompt).images[0]
52
+
53
+ def unload(self) -> None:
54
+ """Release the diffusion pipeline so the next generate_image() re-initializes."""
55
+ self._pipeline = None
56
+ release_device_memory(self.device)
@@ -170,7 +170,7 @@ class MarianTranslator:
170
170
  return f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
171
171
 
172
172
  def _init_local(self, source_lang: str, target_lang: str) -> None:
173
- from transformers import MarianMTModel, MarianTokenizer # type: ignore[attr-defined]
173
+ from transformers import MarianMTModel, MarianTokenizer
174
174
 
175
175
  model_name = self._get_local_model_name(source_lang, target_lang)
176
176
 
@@ -181,7 +181,7 @@ class MarianTranslator:
181
181
  self._model = MarianMTModel.from_pretrained(model_name).to(device)
182
182
  self.device = device
183
183
  log_device_initialization(
184
- "TextTranslator",
184
+ "MarianTranslator",
185
185
  requested_device=requested_device,
186
186
  resolved_device=device,
187
187
  )
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any
6
6
 
7
7
  import numpy as np
8
8
 
9
- from videopython.ai._device import log_device_initialization, select_device
9
+ from videopython.ai._device import log_device_initialization, release_device_memory, select_device
10
10
  from videopython.base.video import Video
11
11
 
12
12
  if TYPE_CHECKING:
@@ -29,22 +29,21 @@ class TextToVideo:
29
29
  def __init__(self, device: str | None = None):
30
30
  self.device = device
31
31
  self._pipeline: Any = None
32
- self._device: str | None = None
33
32
 
34
33
  def _init_local(self) -> None:
35
34
  from diffusers import CogVideoXPipeline
36
35
 
37
36
  requested_device = self.device
38
- self._device, dtype = _get_torch_device_and_dtype(self.device)
37
+ device, dtype = _get_torch_device_and_dtype(self.device)
39
38
 
40
39
  model_name = "THUDM/CogVideoX1.5-5B"
41
40
  self._pipeline = CogVideoXPipeline.from_pretrained(model_name, torch_dtype=dtype)
42
- self._pipeline.to(self._device)
43
- self.device = self._device
41
+ self._pipeline.to(device)
42
+ self.device = device
44
43
  log_device_initialization(
45
44
  "TextToVideo",
46
45
  requested_device=requested_device,
47
- resolved_device=self._device,
46
+ resolved_device=device,
48
47
  )
49
48
 
50
49
  def generate_video(
@@ -65,11 +64,16 @@ class TextToVideo:
65
64
  num_inference_steps=num_steps,
66
65
  num_frames=num_frames,
67
66
  guidance_scale=guidance_scale,
68
- generator=torch.Generator(device=self._device).manual_seed(42),
67
+ generator=torch.Generator(device=self.device).manual_seed(42),
69
68
  ).frames[0]
70
69
  video_frames = np.asarray(video_frames, dtype=np.uint8)
71
70
  return Video.from_frames(video_frames, fps=16.0)
72
71
 
72
+ def unload(self) -> None:
73
+ """Release the diffusion pipeline so the next generate_video() re-initializes."""
74
+ self._pipeline = None
75
+ release_device_memory(self.device)
76
+
73
77
 
74
78
  class ImageToVideo:
75
79
  """Generates videos from static images using local video diffusion."""
@@ -77,22 +81,21 @@ class ImageToVideo:
77
81
  def __init__(self, device: str | None = None):
78
82
  self.device = device
79
83
  self._pipeline: Any = None
80
- self._device: str | None = None
81
84
 
82
85
  def _init_local(self) -> None:
83
86
  from diffusers import CogVideoXImageToVideoPipeline
84
87
 
85
88
  requested_device = self.device
86
- self._device, dtype = _get_torch_device_and_dtype(self.device)
89
+ device, dtype = _get_torch_device_and_dtype(self.device)
87
90
 
88
91
  model_name = "THUDM/CogVideoX1.5-5B-I2V"
89
92
  self._pipeline = CogVideoXImageToVideoPipeline.from_pretrained(model_name, torch_dtype=dtype)
90
- self._pipeline.to(self._device)
91
- self.device = self._device
93
+ self._pipeline.to(device)
94
+ self.device = device
92
95
  log_device_initialization(
93
96
  "ImageToVideo",
94
97
  requested_device=requested_device,
95
- resolved_device=self._device,
98
+ resolved_device=device,
96
99
  )
97
100
 
98
101
  def generate_video(
@@ -115,7 +118,12 @@ class ImageToVideo:
115
118
  num_inference_steps=num_steps,
116
119
  num_frames=num_frames,
117
120
  guidance_scale=guidance_scale,
118
- generator=torch.Generator(device=self._device).manual_seed(42),
121
+ generator=torch.Generator(device=self.device).manual_seed(42),
119
122
  ).frames[0]
120
123
  video_frames = np.asarray(video_frames, dtype=np.uint8)
121
124
  return Video.from_frames(video_frames, fps=16.0)
125
+
126
+ def unload(self) -> None:
127
+ """Release the diffusion pipeline so the next generate_video() re-initializes."""
128
+ self._pipeline = None
129
+ release_device_memory(self.device)
@@ -188,7 +188,7 @@ class AudioToText:
188
188
  def _init_diarization(self) -> None:
189
189
  """Initialize pyannote speaker diarization pipeline."""
190
190
  import torch
191
- from pyannote.audio import Pipeline # type: ignore[import-untyped]
191
+ from pyannote.audio import Pipeline
192
192
 
193
193
  self._diarization_pipeline = Pipeline.from_pretrained(self.PYANNOTE_DIARIZATION_MODEL)
194
194
  self._diarization_pipeline.to(torch.device(self.device))
@@ -214,7 +214,7 @@ class AudioToText:
214
214
  self._vad_model = None
215
215
  release_device_memory(self.device)
216
216
 
217
- def _process_transcription_result(self, transcription_result: dict) -> Transcription:
217
+ def _process_transcription_result(self, transcription_result: dict[str, Any]) -> Transcription:
218
218
  """Process raw transcription result into a Transcription object."""
219
219
  transcription_segments = []
220
220
  for segment in transcription_result["segments"]:
@@ -520,6 +520,15 @@ class AudioClassifier:
520
520
 
521
521
  self._labels = [self._model.config.id2label[i] for i in range(len(self._model.config.id2label))]
522
522
 
523
+ def unload(self) -> None:
524
+ """Release the AST model so the next classify() re-initializes.
525
+
526
+ Used by low-memory dubbing to free VRAM between pipeline stages.
527
+ """
528
+ self._model = None
529
+ self._processor = None
530
+ release_device_memory(self.device)
531
+
523
532
  def _merge_events(self, events: list[AudioEvent], gap_threshold: float = 0.5) -> list[AudioEvent]:
524
533
  """Merge consecutive events of the same class."""
525
534
  if not events:
@@ -237,7 +237,7 @@ class FaceTracker:
237
237
 
238
238
  def _select_face(
239
239
  self,
240
- faces: list,
240
+ faces: list[DetectedFace],
241
241
  frame_width: int,
242
242
  frame_height: int,
243
243
  ) -> tuple[float, float, float, float] | None:
@@ -251,29 +251,24 @@ class FaceTracker:
251
251
  Returns:
252
252
  Tuple of (center_x, center_y, width, height) in normalized coords, or None.
253
253
  """
254
- if not faces:
254
+ faces_with_box = [(f, f.bounding_box) for f in faces if f.bounding_box is not None]
255
+ if not faces_with_box:
255
256
  return None
256
257
 
257
258
  if self.selection_strategy == "largest":
258
- face = faces[0]
259
+ _, bbox = faces_with_box[0]
259
260
  elif self.selection_strategy == "centered":
260
261
  frame_center = (0.5, 0.5)
261
- face = min(
262
- faces,
263
- key=lambda f: (
264
- (f.bounding_box.center[0] - frame_center[0]) ** 2
265
- + (f.bounding_box.center[1] - frame_center[1]) ** 2
266
- ),
262
+ _, bbox = min(
263
+ faces_with_box,
264
+ key=lambda fb: ((fb[1].center[0] - frame_center[0]) ** 2 + (fb[1].center[1] - frame_center[1]) ** 2),
267
265
  )
268
266
  elif self.selection_strategy == "index":
269
- if self.face_index < len(faces):
270
- face = faces[self.face_index]
271
- else:
272
- face = faces[0]
267
+ idx = self.face_index if self.face_index < len(faces_with_box) else 0
268
+ _, bbox = faces_with_box[idx]
273
269
  else:
274
- face = faces[0]
270
+ _, bbox = faces_with_box[0]
275
271
 
276
- bbox = face.bounding_box
277
272
  return (bbox.center[0], bbox.center[1], bbox.width, bbox.height)
278
273
 
279
274
  def detect_and_track(
@@ -407,7 +402,7 @@ class FaceTracker:
407
402
 
408
403
  sampled_frames = [frames[i] for i in sample_indices]
409
404
 
410
- sampled_detections: list[list] = []
405
+ sampled_detections: list[list[DetectedFace]] = []
411
406
  for batch_start in range(0, len(sampled_frames), self.batch_size):
412
407
  batch_end = min(batch_start + self.batch_size, len(sampled_frames))
413
408
  batch = sampled_frames[batch_start:batch_end]
@@ -11,7 +11,7 @@ from typing import Any, Literal
11
11
  import numpy as np
12
12
  from PIL import Image
13
13
 
14
- from videopython.ai._device import log_device_initialization, select_device
14
+ from videopython.ai._device import log_device_initialization, release_device_memory, select_device
15
15
  from videopython.base.description import SceneDescription
16
16
 
17
17
  logger = logging.getLogger(__name__)
@@ -151,7 +151,7 @@ class SceneVLM:
151
151
  def _init_local(self) -> None:
152
152
  """Initialize local Qwen3.5 model."""
153
153
  import torch
154
- from transformers import AutoModelForImageTextToText, AutoProcessor # type: ignore[attr-defined]
154
+ from transformers import AutoModelForImageTextToText, AutoProcessor
155
155
 
156
156
  t0 = time.perf_counter()
157
157
  requested_device = self.device
@@ -190,16 +190,7 @@ class SceneVLM:
190
190
  """
191
191
  self._model = None
192
192
  self._processor = None
193
- try:
194
- import gc
195
-
196
- import torch
197
-
198
- gc.collect()
199
- if torch.cuda.is_available():
200
- torch.cuda.empty_cache()
201
- except ImportError:
202
- pass
193
+ release_device_memory(self.device)
203
194
 
204
195
  def _downscale_image(self, img: Image.Image) -> Image.Image:
205
196
  """Downscale image to fit within max_image_pixels budget, preserving aspect ratio."""
@@ -284,7 +275,7 @@ class SceneVLM:
284
275
  def _generate_from_message_batch(self, messages_batch: list[list[dict[str, Any]]]) -> list[str]:
285
276
  """Run batch generation for one or more multimodal chat messages."""
286
277
  import torch
287
- from qwen_vl_utils import process_vision_info # type: ignore
278
+ from qwen_vl_utils import process_vision_info
288
279
 
289
280
  if self._model is None:
290
281
  self._init_local()
@@ -9,7 +9,7 @@ from __future__ import annotations
9
9
  from pathlib import Path
10
10
  from typing import TYPE_CHECKING, Any
11
11
 
12
- from videopython.ai._device import log_device_initialization, select_device
12
+ from videopython.ai._device import log_device_initialization, release_device_memory, select_device
13
13
  from videopython.base.description import SceneBoundary
14
14
 
15
15
  if TYPE_CHECKING:
@@ -56,26 +56,32 @@ class SemanticSceneDetector:
56
56
 
57
57
  self.threshold = threshold
58
58
  self.min_scene_length = min_scene_length
59
- self._device: str | None = device
59
+ self.device: str | None = device
60
60
  self._model: Any = None
61
61
 
62
- def _load_model(self) -> None:
62
+ def _init_local(self) -> None:
63
63
  """Load the TransNetV2 model with pretrained weights."""
64
64
  if self._model is not None:
65
65
  return
66
66
 
67
67
  from transnetv2_pytorch import TransNetV2
68
68
 
69
- requested_device = self._device
70
- device = select_device(self._device, mps_allowed=True)
69
+ requested_device = self.device
70
+ device = select_device(self.device, mps_allowed=True)
71
71
  log_device_initialization(
72
72
  "SemanticSceneDetector",
73
73
  requested_device=requested_device,
74
74
  resolved_device=device,
75
75
  )
76
+ self.device = device
76
77
  self._model = TransNetV2(device=device)
77
78
  self._model.eval()
78
79
 
80
+ def unload(self) -> None:
81
+ """Release the TransNetV2 model so the next call re-initializes."""
82
+ self._model = None
83
+ release_device_memory(self.device)
84
+
79
85
  def detect(self, video: Video) -> list[SceneBoundary]:
80
86
  """Detect scenes in a video using ML-based boundary detection.
81
87
 
@@ -114,7 +120,7 @@ class SemanticSceneDetector:
114
120
  Returns:
115
121
  List of SceneBoundary objects representing detected scenes.
116
122
  """
117
- self._load_model()
123
+ self._init_local()
118
124
 
119
125
  # Use TransNetV2's detect_scenes which handles everything internally
120
126
  raw_scenes = self._model.detect_scenes(str(path), threshold=self.threshold)
@@ -5,7 +5,7 @@ import subprocess
5
5
  import wave
6
6
  from dataclasses import dataclass
7
7
  from pathlib import Path
8
- from typing import TYPE_CHECKING
8
+ from typing import TYPE_CHECKING, Any
9
9
 
10
10
  import numpy as np
11
11
 
@@ -69,7 +69,7 @@ class Audio:
69
69
  return bool(np.all(np.abs(self.data) < 1e-7))
70
70
 
71
71
  @staticmethod
72
- def _get_ffmpeg_info(file_path: Path) -> dict:
72
+ def _get_ffmpeg_info(file_path: Path) -> dict[str, Any]:
73
73
  """Get audio metadata using ffprobe"""
74
74
  try:
75
75
  info = _ffmpeg.probe(file_path)
@@ -483,7 +483,7 @@ class Audio:
483
483
  if first.metadata.channels == 1:
484
484
  output = np.zeros(total_samples, dtype=np.float32)
485
485
  else:
486
- output = np.zeros((total_samples, 2), dtype=np.float32) # type: ignore
486
+ output = np.zeros((total_samples, 2), dtype=np.float32)
487
487
 
488
488
  # Copy non-crossfaded portions
489
489
  crossfade_start = len(first.data) - crossfade_samples
@@ -761,7 +761,7 @@ class Audio:
761
761
  if base.metadata.channels == 1:
762
762
  output = np.zeros(total_length, dtype=np.float32)
763
763
  else:
764
- output = np.zeros((total_length, 2), dtype=np.float32) # type: ignore
764
+ output = np.zeros((total_length, 2), dtype=np.float32)
765
765
 
766
766
  # Copy base audio
767
767
  output[: len(base.data)] = base.data
@@ -13,7 +13,7 @@ import json
13
13
  import subprocess
14
14
  from contextlib import contextmanager
15
15
  from pathlib import Path
16
- from typing import Iterator, Sequence
16
+ from typing import Any, Iterator, Sequence
17
17
 
18
18
  from videopython.base.exceptions import FFmpegProbeError, FFmpegRunError
19
19
 
@@ -44,7 +44,7 @@ def run(cmd: Sequence[str], *, stdin: bytes | None = None) -> bytes:
44
44
  return result.stdout
45
45
 
46
46
 
47
- def probe(path: str | Path, *, extra_args: Sequence[str] | None = None) -> dict:
47
+ def probe(path: str | Path, *, extra_args: Sequence[str] | None = None) -> dict[str, Any]:
48
48
  """Run ffprobe and return the parsed JSON payload.
49
49
 
50
50
  Args:
@@ -76,7 +76,7 @@ def probe(path: str | Path, *, extra_args: Sequence[str] | None = None) -> dict:
76
76
  raise FFmpegProbeError(f"Error parsing ffprobe output: {e}") from e
77
77
 
78
78
 
79
- def _terminate(proc: subprocess.Popen, *, timeout: float = 5) -> None:
79
+ def _terminate(proc: subprocess.Popen[bytes], *, timeout: float = 5) -> None:
80
80
  """Terminate a still-running process, escalating to kill after ``timeout``."""
81
81
  if proc.poll() is None:
82
82
  proc.terminate()
@@ -88,7 +88,7 @@ def _terminate(proc: subprocess.Popen, *, timeout: float = 5) -> None:
88
88
 
89
89
 
90
90
  @contextmanager
91
- def popen_decode(cmd: Sequence[str], *, bufsize: int = -1) -> Iterator[subprocess.Popen]:
91
+ def popen_decode(cmd: Sequence[str], *, bufsize: int = -1) -> Iterator[subprocess.Popen[bytes]]:
92
92
  """Context manager wrapping an ffmpeg decode process.
93
93
 
94
94
  Yields a Popen with ``stdout=PIPE`` and ``stderr=DEVNULL``. Callers
@@ -116,7 +116,7 @@ def popen_decode(cmd: Sequence[str], *, bufsize: int = -1) -> Iterator[subproces
116
116
 
117
117
 
118
118
  @contextmanager
119
- def popen_encode(cmd: Sequence[str]) -> Iterator[subprocess.Popen]:
119
+ def popen_encode(cmd: Sequence[str]) -> Iterator[subprocess.Popen[bytes]]:
120
120
  """Context manager wrapping an ffmpeg encode process via stdin pipe.
121
121
 
122
122
  Yields a Popen with ``stdin=PIPE``, ``stdout=DEVNULL``, and
@@ -173,7 +173,7 @@ def decode_video(
173
173
  if frames_read == 0:
174
174
  raise ValueError("No frames were read from the video")
175
175
 
176
- frames = frames[:frames_read] # type: ignore
176
+ frames = frames[:frames_read]
177
177
 
178
178
  try:
179
179
  audio = Audio.from_path(path)