videopython 0.31.2__tar.gz → 0.32.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {videopython-0.31.2 → videopython-0.32.0}/PKG-INFO +21 -8
  2. {videopython-0.31.2 → videopython-0.32.0}/README.md +20 -7
  3. {videopython-0.31.2 → videopython-0.32.0}/pyproject.toml +1 -1
  4. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/dubber.py +2 -2
  5. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/models.py +2 -2
  6. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/pipeline.py +7 -7
  7. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/quality.py +1 -1
  8. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/remux.py +1 -1
  9. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/timing.py +1 -1
  10. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/generation/audio.py +1 -1
  11. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/generation/qwen3.py +1 -1
  12. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/generation/translation.py +1 -1
  13. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/transforms.py +1 -1
  14. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/understanding/audio.py +2 -2
  15. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/understanding/separation.py +1 -1
  16. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/video_analysis.py +3 -3
  17. {videopython-0.31.2/src/videopython/base → videopython-0.32.0/src/videopython}/audio/audio.py +5 -5
  18. videopython-0.32.0/src/videopython/base/__init__.py +62 -0
  19. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/base/_dimensions.py +1 -1
  20. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/base/_video_io.py +1 -1
  21. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/base/description.py +4 -2
  22. videopython-0.31.2/src/videopython/base/text/overlay.py → videopython-0.32.0/src/videopython/base/image_text.py +8 -149
  23. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/base/video.py +1 -1
  24. videopython-0.32.0/src/videopython/editing/__init__.py +61 -0
  25. {videopython-0.31.2/src/videopython/base → videopython-0.32.0/src/videopython/editing}/effects.py +3 -3
  26. {videopython-0.31.2/src/videopython/base → videopython-0.32.0/src/videopython/editing}/operation.py +1 -1
  27. {videopython-0.31.2/src/videopython/base → videopython-0.32.0/src/videopython/editing}/streaming.py +2 -2
  28. videopython-0.32.0/src/videopython/editing/transcription_overlay.py +160 -0
  29. {videopython-0.31.2/src/videopython/base → videopython-0.32.0/src/videopython/editing}/transforms.py +3 -3
  30. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/editing/video_edit.py +6 -6
  31. videopython-0.31.2/src/videopython/base/__init__.py +0 -128
  32. videopython-0.31.2/src/videopython/base/scene.py +0 -456
  33. videopython-0.31.2/src/videopython/base/text/__init__.py +0 -12
  34. videopython-0.31.2/src/videopython/editing/__init__.py +0 -6
  35. {videopython-0.31.2 → videopython-0.32.0}/.gitignore +0 -0
  36. {videopython-0.31.2 → videopython-0.32.0}/LICENSE +0 -0
  37. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/__init__.py +0 -0
  38. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/__init__.py +0 -0
  39. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/_device.py +0 -0
  40. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/dubbing/__init__.py +0 -0
  41. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/generation/__init__.py +0 -0
  42. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/generation/image.py +0 -0
  43. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/generation/video.py +0 -0
  44. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/understanding/__init__.py +0 -0
  45. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/understanding/faces.py +0 -0
  46. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/understanding/image.py +0 -0
  47. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/ai/understanding/temporal.py +0 -0
  48. {videopython-0.31.2/src/videopython/base → videopython-0.32.0/src/videopython}/audio/__init__.py +0 -0
  49. {videopython-0.31.2/src/videopython/base → videopython-0.32.0/src/videopython}/audio/analysis.py +0 -0
  50. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/base/_ffmpeg.py +0 -0
  51. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/base/exceptions.py +0 -0
  52. {videopython-0.31.2/src/videopython/base/text → videopython-0.32.0/src/videopython/base}/transcription.py +0 -0
  53. {videopython-0.31.2 → videopython-0.32.0}/src/videopython/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videopython
3
- Version: 0.31.2
3
+ Version: 0.32.0
4
4
  Summary: Minimal video generation and processing library.
5
5
  Project-URL: Homepage, https://videopython.com
6
6
  Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -91,7 +91,8 @@ Every editing primitive is an `Operation` subclass — a Pydantic model
91
91
  whose fields ARE the JSON wire format. Apply one to a `Video`:
92
92
 
93
93
  ```python
94
- from videopython.base import Video, CutSeconds, Resize, Fade
94
+ from videopython.base import Video
95
+ from videopython.editing import CutSeconds, Resize, Fade
95
96
 
96
97
  video = Video.from_path("raw.mp4")
97
98
  video = CutSeconds(start=10, end=25).apply(video)
@@ -141,7 +142,7 @@ instead if you want the result back in memory as a `Video`.
141
142
 
142
143
  ```python
143
144
  from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
144
- from videopython.base import Resize
145
+ from videopython.editing import Resize
145
146
 
146
147
  image = TextToImage().generate_image("A cinematic mountain sunrise")
147
148
  video = ImageToVideo().generate_video(image=image)
@@ -182,7 +183,7 @@ Every registered op exposes its own Pydantic schema, so an agent can
182
183
  introspect what's available without hardcoded lists:
183
184
 
184
185
  ```python
185
- from videopython.base import Operation, OpCategory
186
+ from videopython.editing import Operation, OpCategory
186
187
 
187
188
  for op_id, cls in Operation.registry().items():
188
189
  print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
@@ -205,18 +206,30 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https
205
206
 
206
207
  ## Features
207
208
 
208
- ### `videopython.base` - core editing (no AI dependencies)
209
+ ### `videopython.base` - data containers + I/O (no AI dependencies)
209
210
 
210
211
  | Area | Highlights |
211
212
  |---|---|
212
213
  | **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
214
+ | **Text rendering** | `ImageText` - generic PIL text-on-image primitive |
215
+ | **Transcription** | `Transcription`, `TranscriptionSegment`, `TranscriptionWord` - data classes returned by transcription backends |
216
+ | **Result types** | `BoundingBox`, `DetectedFace`, `FaceTrack`, `SceneBoundary`, `AudioEvent`, `MotionInfo`, ... - shared by editing and AI |
217
+
218
+ ### `videopython.audio` - audio data container
219
+
220
+ | Area | Highlights |
221
+ |---|---|
222
+ | **Audio** | `Audio`, `AudioMetadata` - load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
223
+
224
+ ### `videopython.editing` - editing primitives + plan runner
225
+
226
+ | Area | Highlights |
227
+ |---|---|
213
228
  | **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
214
229
  | **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
215
230
  | **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
216
231
  | **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
217
- | **Audio** | Load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
218
- | **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
219
- | **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
232
+ | **Subtitles** | `TranscriptionOverlay` - animated word-by-word subtitle rendering |
220
233
 
221
234
  API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
222
235
 
@@ -42,7 +42,8 @@ Every editing primitive is an `Operation` subclass — a Pydantic model
42
42
  whose fields ARE the JSON wire format. Apply one to a `Video`:
43
43
 
44
44
  ```python
45
- from videopython.base import Video, CutSeconds, Resize, Fade
45
+ from videopython.base import Video
46
+ from videopython.editing import CutSeconds, Resize, Fade
46
47
 
47
48
  video = Video.from_path("raw.mp4")
48
49
  video = CutSeconds(start=10, end=25).apply(video)
@@ -92,7 +93,7 @@ instead if you want the result back in memory as a `Video`.
92
93
 
93
94
  ```python
94
95
  from videopython.ai import TextToImage, ImageToVideo, TextToSpeech
95
- from videopython.base import Resize
96
+ from videopython.editing import Resize
96
97
 
97
98
  image = TextToImage().generate_image("A cinematic mountain sunrise")
98
99
  video = ImageToVideo().generate_video(image=image)
@@ -133,7 +134,7 @@ Every registered op exposes its own Pydantic schema, so an agent can
133
134
  introspect what's available without hardcoded lists:
134
135
 
135
136
  ```python
136
- from videopython.base import Operation, OpCategory
137
+ from videopython.editing import Operation, OpCategory
137
138
 
138
139
  for op_id, cls in Operation.registry().items():
139
140
  print(f"{op_id}: {(cls.__doc__ or '').splitlines()[0]}")
@@ -156,18 +157,30 @@ Docs: [Editing Plans](https://videopython.com/api/editing/) | [Operations](https
156
157
 
157
158
  ## Features
158
159
 
159
- ### `videopython.base` - core editing (no AI dependencies)
160
+ ### `videopython.base` - data containers + I/O (no AI dependencies)
160
161
 
161
162
  | Area | Highlights |
162
163
  |---|---|
163
164
  | **Video I/O** | `Video`, `VideoMetadata`, `FrameIterator` - load, save, inspect |
165
+ | **Text rendering** | `ImageText` - generic PIL text-on-image primitive |
166
+ | **Transcription** | `Transcription`, `TranscriptionSegment`, `TranscriptionWord` - data classes returned by transcription backends |
167
+ | **Result types** | `BoundingBox`, `DetectedFace`, `FaceTrack`, `SceneBoundary`, `AudioEvent`, `MotionInfo`, ... - shared by editing and AI |
168
+
169
+ ### `videopython.audio` - audio data container
170
+
171
+ | Area | Highlights |
172
+ |---|---|
173
+ | **Audio** | `Audio`, `AudioMetadata` - load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
174
+
175
+ ### `videopython.editing` - editing primitives + plan runner
176
+
177
+ | Area | Highlights |
178
+ |---|---|
164
179
  | **Operation foundation** | `Operation`, `Effect`, `TimeRange`, `OpCategory` - Pydantic base + auto-registry + discriminated-union schema |
165
180
  | **Editing plans** | `VideoEdit`, `SegmentConfig` - JSON/LLM-friendly multi-segment plans with JSON Schema generation, dry-run validation, and streaming `run_to_file` |
166
181
  | **Transforms** | Cut (time/frame), resize, crop, FPS resampling, speed change, reverse, freeze frame, silence removal |
167
182
  | **Effects** | Blur, zoom, color grading, vignette, Ken Burns, image overlay, fade, text overlay, volume adjust |
168
- | **Audio** | Load/save, overlay, concat, normalize, time-stretch, silence detection, segment classification |
169
- | **Text** | Transcription data classes, `TranscriptionOverlay` for subtitle rendering |
170
- | **Scene detection** | Histogram-based scene boundaries (`detect`, `detect_streaming`, `detect_parallel`) |
183
+ | **Subtitles** | `TranscriptionOverlay` - animated word-by-word subtitle rendering |
171
184
 
172
185
  API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopython.com/api/core/video/) | [Audio](https://videopython.com/api/core/audio/) | [Editing Plans](https://videopython.com/api/editing/) | [Operations](https://videopython.com/api/operations/) | [Transforms](https://videopython.com/api/transforms/) | [Effects](https://videopython.com/api/effects/) | [Text](https://videopython.com/api/text/)
173
186
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "videopython"
3
- version = "0.31.2"
3
+ version = "0.32.0"
4
4
  description = "Minimal video generation and processing library."
5
5
  authors = [
6
6
  { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -218,7 +218,7 @@ class VideoDubber:
218
218
  source transcription. The output video is written to ``output_path``.
219
219
  """
220
220
  from videopython.ai.dubbing.remux import replace_audio_stream_from_audio
221
- from videopython.base.audio import Audio
221
+ from videopython.audio import Audio
222
222
 
223
223
  input_path = Path(input_path)
224
224
  output_path = Path(output_path)
@@ -292,7 +292,7 @@ class VideoDubber:
292
292
  video_duration = video.total_seconds
293
293
 
294
294
  if video_duration > speech_duration:
295
- from videopython.base.transforms import CutSeconds
295
+ from videopython.editing.transforms import CutSeconds
296
296
 
297
297
  output_video = CutSeconds(start=0, end=speech_duration).apply(video)
298
298
  else:
@@ -5,8 +5,8 @@ from __future__ import annotations
5
5
  from dataclasses import dataclass, field
6
6
  from typing import TYPE_CHECKING, Any
7
7
 
8
- from videopython.base.audio import Audio
9
- from videopython.base.text.transcription import Transcription, TranscriptionSegment
8
+ from videopython.audio import Audio
9
+ from videopython.base.transcription import Transcription, TranscriptionSegment
10
10
 
11
11
  if TYPE_CHECKING:
12
12
  from videopython.ai.dubbing.quality import TranscriptQuality
@@ -22,8 +22,8 @@ from videopython.ai.generation.translation import (
22
22
 
23
23
  if TYPE_CHECKING:
24
24
  from videopython.ai.dubbing.models import TranslatedSegment
25
- from videopython.base.audio import Audio
26
- from videopython.base.text.transcription import Transcription
25
+ from videopython.audio import Audio
26
+ from videopython.base.transcription import Transcription
27
27
 
28
28
 
29
29
  TranslatorChoice = Literal["auto", "marian", "qwen3"]
@@ -41,7 +41,7 @@ def _peak_match(target: Audio, reference: Audio) -> Audio:
41
41
  Used as the fallback when LUFS measurement isn't viable (clip < 0.4s
42
42
  or silent input). The new ``Audio`` shares no buffer with ``target``.
43
43
  """
44
- from videopython.base.audio import Audio as _Audio
44
+ from videopython.audio import Audio as _Audio
45
45
 
46
46
  target_peak = float(np.max(np.abs(target.data))) if target.data.size else 0.0
47
47
  reference_peak = float(np.max(np.abs(reference.data))) if reference.data.size else 0.0
@@ -71,7 +71,7 @@ def _loudness_match(target: Audio, reference: Audio) -> Audio:
71
71
  are clamped to 0.99 — BS.1770 has no peak ceiling and a sufficiently
72
72
  quiet source can demand gain that would otherwise clip.
73
73
  """
74
- from videopython.base.audio import Audio as _Audio
74
+ from videopython.audio import Audio as _Audio
75
75
 
76
76
  target_dur = target.metadata.duration_seconds
77
77
  ref_dur = reference.metadata.duration_seconds
@@ -427,7 +427,7 @@ class LocalDubbingPipeline:
427
427
  every candidate is rejected, so the dub continues with the best
428
428
  sample we have rather than silently dropping the speaker.
429
429
  """
430
- from videopython.base.audio import Audio
430
+ from videopython.audio import Audio
431
431
 
432
432
  voice_samples: dict[str, Audio] = {}
433
433
 
@@ -558,7 +558,7 @@ class LocalDubbingPipeline:
558
558
  can use ``Audio.from_path(path)`` to avoid loading video frames.
559
559
  transcription: Optional pre-computed Transcription object. When provided,
560
560
  the internal Whisper transcription step is skipped (saving time and VRAM).
561
- Must be a ``videopython.base.text.transcription.Transcription`` instance
561
+ Must be a ``videopython.base.transcription.Transcription`` instance
562
562
  with populated ``segments``. Speaker labels on the supplied transcription
563
563
  drive per-speaker voice cloning. If the supplied transcription has no
564
564
  speakers and ``enable_diarization=True``, pyannote is run standalone on
@@ -805,7 +805,7 @@ class LocalDubbingPipeline:
805
805
  source_audio: Source audio track to revoice. Callers with a ``Video``
806
806
  object should pass ``video.audio``.
807
807
  """
808
- from videopython.base.audio import Audio
808
+ from videopython.audio import Audio
809
809
 
810
810
  def report_progress(stage: str, progress: float) -> None:
811
811
  if progress_callback:
@@ -24,7 +24,7 @@ from dataclasses import dataclass, field
24
24
  from typing import TYPE_CHECKING, Any, Literal
25
25
 
26
26
  if TYPE_CHECKING:
27
- from videopython.base.text.transcription import Transcription
27
+ from videopython.base.transcription import Transcription
28
28
 
29
29
 
30
30
  # Tuned conservatively to favor "warn" over "reject"; first-week production
@@ -14,7 +14,7 @@ from videopython.base import _ffmpeg
14
14
  from videopython.base.exceptions import FFmpegRunError
15
15
 
16
16
  if TYPE_CHECKING:
17
- from videopython.base.audio import Audio
17
+ from videopython.audio import Audio
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
 
@@ -6,7 +6,7 @@ from dataclasses import dataclass
6
6
 
7
7
  import numpy as np
8
8
 
9
- from videopython.base.audio import Audio, AudioMetadata
9
+ from videopython.audio import Audio, AudioMetadata
10
10
 
11
11
 
12
12
  @dataclass
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  from typing import TYPE_CHECKING, Any
6
6
 
7
7
  from videopython.ai._device import log_device_initialization, release_device_memory, select_device
8
- from videopython.base.audio import Audio, AudioMetadata
8
+ from videopython.audio import Audio, AudioMetadata
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from pathlib import Path
@@ -27,7 +27,7 @@ from videopython.ai.generation.translation import (
27
27
  MarianTranslator,
28
28
  _is_translatable_text,
29
29
  )
30
- from videopython.base.text.transcription import TranscriptionSegment
30
+ from videopython.base.transcription import TranscriptionSegment
31
31
 
32
32
  # Imported under TYPE_CHECKING only — qwen3 sits below videopython.ai.dubbing
33
33
  # in the import order (pipeline.py imports Qwen3Translator), so a top-level
@@ -17,7 +17,7 @@ from __future__ import annotations
17
17
  from typing import TYPE_CHECKING, Any, Callable, Protocol, runtime_checkable
18
18
 
19
19
  from videopython.ai._device import log_device_initialization, release_device_memory, select_device
20
- from videopython.base.text.transcription import TranscriptionSegment
20
+ from videopython.base.transcription import TranscriptionSegment
21
21
 
22
22
  # Imported under TYPE_CHECKING to avoid a circular dep through
23
23
  # videopython.ai.dubbing (the dubbing pipeline imports both
@@ -12,8 +12,8 @@ from tqdm import tqdm
12
12
 
13
13
  from videopython.ai.understanding.faces import FaceTracker
14
14
  from videopython.base._dimensions import floor_to_even
15
- from videopython.base.operation import OpCategory, Operation
16
15
  from videopython.base.video import Video
16
+ from videopython.editing.operation import OpCategory, Operation
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
@@ -6,9 +6,9 @@ import logging
6
6
  from typing import Any, Literal
7
7
 
8
8
  from videopython.ai._device import log_device_initialization, release_device_memory, select_device
9
- from videopython.base.audio import Audio
9
+ from videopython.audio import Audio
10
10
  from videopython.base.description import AudioClassification, AudioEvent
11
- from videopython.base.text.transcription import Transcription, TranscriptionSegment, TranscriptionWord
11
+ from videopython.base.transcription import Transcription, TranscriptionSegment, TranscriptionWord
12
12
  from videopython.base.video import Video
13
13
 
14
14
  logger = logging.getLogger(__name__)
@@ -7,7 +7,7 @@ from typing import Any
7
7
 
8
8
  from videopython.ai._device import log_device_initialization, release_device_memory, select_device
9
9
  from videopython.ai.dubbing.models import SeparatedAudio
10
- from videopython.base.audio import Audio, AudioMetadata
10
+ from videopython.audio import Audio, AudioMetadata
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
@@ -25,8 +25,8 @@ from videopython.ai.understanding import (
25
25
  SemanticSceneDetector,
26
26
  )
27
27
  from videopython.ai.understanding.faces import FaceTracker
28
+ from videopython.audio import Audio
28
29
  from videopython.base import _ffmpeg
29
- from videopython.base.audio import Audio
30
30
  from videopython.base.description import (
31
31
  AudioClassification,
32
32
  AudioEvent,
@@ -35,7 +35,7 @@ from videopython.base.description import (
35
35
  SceneDescription,
36
36
  )
37
37
  from videopython.base.exceptions import FFmpegProbeError
38
- from videopython.base.text.transcription import Transcription
38
+ from videopython.base.transcription import Transcription
39
39
  from videopython.base.video import Video, VideoMetadata, extract_frames_at_times
40
40
 
41
41
  __all__ = ["VideoAnalysis", "VideoAnalysisConfig", "VideoAnalyzer"]
@@ -949,7 +949,7 @@ class VideoAnalyzer:
949
949
  return None
950
950
  if source_path is not None:
951
951
  return Video.from_path(str(source_path), start_second=start_second, end_second=end_second)
952
- from videopython.base.transforms import CutSeconds
952
+ from videopython.editing.transforms import CutSeconds
953
953
 
954
954
  return CutSeconds(start=start_second, end=end_second).apply(_require_video(video))
955
955
 
@@ -13,7 +13,7 @@ from videopython.base import _ffmpeg
13
13
  from videopython.base.exceptions import AudioLoadError, FFmpegProbeError
14
14
 
15
15
  if TYPE_CHECKING:
16
- from videopython.base.audio.analysis import AudioLevels, AudioSegment, AudioSegmentType, SilentSegment
16
+ from videopython.audio.analysis import AudioLevels, AudioSegment, AudioSegmentType, SilentSegment
17
17
 
18
18
 
19
19
  @dataclass
@@ -879,7 +879,7 @@ class Audio:
879
879
  >>> levels = audio.get_levels()
880
880
  >>> print(f"Peak: {levels.db_peak:.1f} dB")
881
881
  """
882
- from videopython.base.audio.analysis import AudioLevels
882
+ from videopython.audio.analysis import AudioLevels
883
883
 
884
884
  segment = self.slice(start_seconds, end_seconds)
885
885
  data = segment.data.flatten() if segment.metadata.channels == 2 else segment.data
@@ -947,7 +947,7 @@ class Audio:
947
947
  >>> for seg in silent_segments:
948
948
  ... print(f"Silence: {seg.start:.2f}s - {seg.end:.2f}s")
949
949
  """
950
- from videopython.base.audio.analysis import SilentSegment
950
+ from videopython.audio.analysis import SilentSegment
951
951
 
952
952
  levels_over_time = self.get_levels_over_time(window_seconds=window_seconds, hop_seconds=window_seconds / 2)
953
953
 
@@ -1027,7 +1027,7 @@ class Audio:
1027
1027
  >>> for seg in segments:
1028
1028
  ... print(f"{seg.start:.1f}-{seg.end:.1f}s: {seg.segment_type.value}")
1029
1029
  """
1030
- from videopython.base.audio.analysis import AudioSegment
1030
+ from videopython.audio.analysis import AudioSegment
1031
1031
 
1032
1032
  hop_length = segment_length * (1 - overlap)
1033
1033
  segments = []
@@ -1064,7 +1064,7 @@ class Audio:
1064
1064
  Returns:
1065
1065
  Tuple of (AudioSegmentType, confidence)
1066
1066
  """
1067
- from videopython.base.audio.analysis import AudioSegmentType
1067
+ from videopython.audio.analysis import AudioSegmentType
1068
1068
 
1069
1069
  data = segment.to_mono().data
1070
1070
 
@@ -0,0 +1,62 @@
1
+ from .description import (
2
+ AudioClassification,
3
+ AudioEvent,
4
+ BoundingBox,
5
+ DetectedFace,
6
+ DetectedObject,
7
+ DetectedText,
8
+ FaceTrack,
9
+ MotionInfo,
10
+ SceneBoundary,
11
+ SceneDescription,
12
+ )
13
+ from .exceptions import (
14
+ AudioError,
15
+ AudioLoadError,
16
+ OutOfBoundsError,
17
+ TextRenderError,
18
+ TransformError,
19
+ VideoError,
20
+ VideoLoadError,
21
+ VideoMetadataError,
22
+ VideoPythonError,
23
+ )
24
+ from .image_text import AnchorPoint, ImageText, TextAlign
25
+ from .transcription import Transcription, TranscriptionSegment, TranscriptionWord
26
+ from .video import FrameIterator, Video, VideoMetadata
27
+
28
+ __all__ = [
29
+ # Core
30
+ "Video",
31
+ "VideoMetadata",
32
+ "FrameIterator",
33
+ # Exceptions
34
+ "VideoPythonError",
35
+ "VideoError",
36
+ "VideoLoadError",
37
+ "VideoMetadataError",
38
+ "AudioError",
39
+ "AudioLoadError",
40
+ "TransformError",
41
+ "TextRenderError",
42
+ "OutOfBoundsError",
43
+ # Text rendering primitives
44
+ "ImageText",
45
+ "TextAlign",
46
+ "AnchorPoint",
47
+ # Transcription data classes
48
+ "Transcription",
49
+ "TranscriptionSegment",
50
+ "TranscriptionWord",
51
+ # Detection / scene / motion result types (consumed by ai/, editing/)
52
+ "BoundingBox",
53
+ "DetectedObject",
54
+ "DetectedFace",
55
+ "DetectedText",
56
+ "FaceTrack",
57
+ "AudioEvent",
58
+ "AudioClassification",
59
+ "MotionInfo",
60
+ "SceneBoundary",
61
+ "SceneDescription",
62
+ ]
@@ -3,7 +3,7 @@
3
3
  Centralises the libx264+yuv420p even-dimension constraint and the
4
4
  two "round to even" calculations that previously lived (with subtly
5
5
  different semantics) in ``base/video.py``, ``ai/transforms.py``, and
6
- ``base/transforms.py``.
6
+ ``editing/transforms.py``.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
@@ -20,9 +20,9 @@ from typing import Literal, get_args
20
20
 
21
21
  import numpy as np
22
22
 
23
+ from videopython.audio import Audio
23
24
  from videopython.base import _ffmpeg
24
25
  from videopython.base._dimensions import require_even
25
- from videopython.base.audio import Audio
26
26
  from videopython.base.exceptions import (
27
27
  AudioLoadError,
28
28
  FFmpegRunError,
@@ -22,8 +22,10 @@ __all__ = [
22
22
  class SceneBoundary:
23
23
  """Timing information for a detected scene.
24
24
 
25
- A lightweight structure representing scene boundaries detected by SceneDetector.
26
- This is a backbone type - higher-level scene analysis belongs in orchestration packages.
25
+ A lightweight structure representing scene boundaries returned by
26
+ scene detectors (e.g. ``videopython.ai.SemanticSceneDetector``). This
27
+ is a backbone type — higher-level scene analysis lives in orchestration
28
+ packages.
27
29
 
28
30
  Attributes:
29
31
  start: Scene start time in seconds