videopython 0.23.3__tar.gz → 0.24.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {videopython-0.23.3 → videopython-0.24.0}/PKG-INFO +1 -1
  2. {videopython-0.23.3 → videopython-0.24.0}/pyproject.toml +1 -1
  3. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/edit.py +86 -45
  4. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/video.py +31 -15
  5. {videopython-0.23.3 → videopython-0.24.0}/.gitignore +0 -0
  6. {videopython-0.23.3 → videopython-0.24.0}/LICENSE +0 -0
  7. {videopython-0.23.3 → videopython-0.24.0}/README.md +0 -0
  8. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/__init__.py +0 -0
  9. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/__init__.py +0 -0
  10. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/_device.py +0 -0
  11. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/dubbing/__init__.py +0 -0
  12. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/dubbing/dubber.py +0 -0
  13. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/dubbing/models.py +0 -0
  14. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/dubbing/pipeline.py +0 -0
  15. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/dubbing/timing.py +0 -0
  16. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/generation/__init__.py +0 -0
  17. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/generation/audio.py +0 -0
  18. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/generation/image.py +0 -0
  19. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/generation/translation.py +0 -0
  20. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/generation/video.py +0 -0
  21. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/registry.py +0 -0
  22. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/swapping/__init__.py +0 -0
  23. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/swapping/inpainter.py +0 -0
  24. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/swapping/models.py +0 -0
  25. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/swapping/segmenter.py +0 -0
  26. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/swapping/swapper.py +0 -0
  27. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/transforms.py +0 -0
  28. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/understanding/__init__.py +0 -0
  29. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/understanding/audio.py +0 -0
  30. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/understanding/image.py +0 -0
  31. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/understanding/separation.py +0 -0
  32. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/understanding/temporal.py +0 -0
  33. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/ai/video_analysis.py +0 -0
  34. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/__init__.py +0 -0
  35. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/audio/__init__.py +0 -0
  36. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/audio/analysis.py +0 -0
  37. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/audio/audio.py +0 -0
  38. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/combine.py +0 -0
  39. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/description.py +0 -0
  40. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/effects.py +0 -0
  41. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/exceptions.py +0 -0
  42. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/progress.py +0 -0
  43. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/registry.py +0 -0
  44. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/scene.py +0 -0
  45. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/text/__init__.py +0 -0
  46. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/text/overlay.py +0 -0
  47. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/text/transcription.py +0 -0
  48. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/transforms.py +0 -0
  49. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/transitions.py +0 -0
  50. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/base/utils.py +0 -0
  51. {videopython-0.23.3 → videopython-0.24.0}/src/videopython/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videopython
3
- Version: 0.23.3
3
+ Version: 0.24.0
4
4
  Summary: Minimal video generation and processing library.
5
5
  Project-URL: Homepage, https://videopython.com
6
6
  Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "videopython"
3
- version = "0.23.3"
3
+ version = "0.24.0"
4
4
  description = "Minimal video generation and processing library."
5
5
  authors = [
6
6
  { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -84,19 +84,27 @@ class SegmentConfig:
84
84
  f"SegmentConfig.effect_records must contain Effect operations, got {type(record.operation)}"
85
85
  )
86
86
 
87
- def process_segment(self, context: dict[str, Any] | None = None) -> Video:
88
- """Load the segment and apply transforms then effects.
87
+ def load_segment(
88
+ self,
89
+ fps: float | None = None,
90
+ width: int | None = None,
91
+ height: int | None = None,
92
+ ) -> Video:
93
+ """Load the raw segment from disk (cut only, no transforms or effects).
89
94
 
90
- Args:
91
- context: Optional side-channel data for context-dependent operations.
92
- Operations whose registry spec has a ``requires_transcript`` tag
93
- receive ``context["transcription"]`` as a keyword argument.
95
+ Optional fps/width/height are applied during decoding via ffmpeg filters.
94
96
  """
95
- video = Video.from_path(
97
+ return Video.from_path(
96
98
  str(self.source_video),
97
99
  start_second=self.start_second,
98
100
  end_second=self.end_second,
101
+ fps=fps,
102
+ width=width,
103
+ height=height,
99
104
  )
105
+
106
+ def apply_operations(self, video: Video, context: dict[str, Any] | None = None) -> Video:
107
+ """Apply per-segment transforms and effects to a loaded video."""
100
108
  for record in self.transform_records:
101
109
  video = _apply_transform_with_context(record, video, context)
102
110
  for record in self.effect_records:
@@ -111,6 +119,10 @@ class SegmentConfig:
111
119
  )
112
120
  return video
113
121
 
122
+ def process_segment(self, context: dict[str, Any] | None = None) -> Video:
123
+ """Load the segment and apply transforms then effects."""
124
+ return self.apply_operations(self.load_segment(), context)
125
+
114
126
 
115
127
  class VideoEdit:
116
128
  """Represents a complete multi-segment video editing plan."""
@@ -120,12 +132,16 @@ class VideoEdit:
120
132
  segments: Sequence[SegmentConfig],
121
133
  post_transform_records: Sequence[_StepRecord] | None = None,
122
134
  post_effect_records: Sequence[_StepRecord] | None = None,
135
+ match_to_lowest_fps: bool = True,
136
+ match_to_lowest_resolution: bool = True,
123
137
  ):
124
138
  if not segments:
125
139
  raise ValueError("VideoEdit requires at least one segment")
126
140
  self.segments: tuple[SegmentConfig, ...] = tuple(segments)
127
141
  self.post_transform_records: tuple[_StepRecord, ...] = tuple(post_transform_records or ())
128
142
  self.post_effect_records: tuple[_StepRecord, ...] = tuple(post_effect_records or ())
143
+ self.match_to_lowest_fps: bool = match_to_lowest_fps
144
+ self.match_to_lowest_resolution: bool = match_to_lowest_resolution
129
145
 
130
146
  for record in self.post_transform_records:
131
147
  if not isinstance(record.operation, Transformation):
@@ -183,6 +199,8 @@ class VideoEdit:
183
199
  segments=segments,
184
200
  post_transform_records=post_transform_records,
185
201
  post_effect_records=post_effect_records,
202
+ match_to_lowest_fps=data.get("match_to_lowest_fps", True),
203
+ match_to_lowest_resolution=data.get("match_to_lowest_resolution", True),
186
204
  )
187
205
 
188
206
  def to_dict(self) -> dict[str, Any]:
@@ -191,11 +209,16 @@ class VideoEdit:
191
209
  Serialization uses `_StepRecord` snapshots as the source of truth. Mutating
192
210
  live operation objects after parsing/construction does not affect output.
193
211
  """
194
- return {
212
+ result: dict[str, Any] = {
195
213
  "segments": [self._segment_to_dict(segment) for segment in self.segments],
196
214
  "post_transforms": [_step_to_dict(record, include_apply=False) for record in self.post_transform_records],
197
215
  "post_effects": [_step_to_dict(record, include_apply=True) for record in self.post_effect_records],
198
216
  }
217
+ if not self.match_to_lowest_fps:
218
+ result["match_to_lowest_fps"] = False
219
+ if not self.match_to_lowest_resolution:
220
+ result["match_to_lowest_resolution"] = False
221
+ return result
199
222
 
200
223
  @classmethod
201
224
  def json_schema(cls) -> dict[str, Any]:
@@ -282,9 +305,12 @@ class VideoEdit:
282
305
  Operations whose registry spec has a ``requires_transcript`` tag
283
306
  use ``context["transcription"]`` for metadata prediction.
284
307
  """
285
- segment_metas: list[VideoMetadata] = []
286
- for i, segment in enumerate(self.segments):
287
- segment_metas.append(self._validate_segment(i, segment, context))
308
+ source_metas = [self._validate_source_meta(i, seg) for i, seg in enumerate(self.segments)]
309
+ source_metas = self._match_metas(source_metas)
310
+ segment_metas = [
311
+ self._apply_segment_meta_ops(i, seg, meta, context)
312
+ for i, (seg, meta) in enumerate(zip(self.segments, source_metas))
313
+ ]
288
314
  return self._validate_assembled(segment_metas, context)
289
315
 
290
316
  def validate_with_metadata(
@@ -315,14 +341,19 @@ class VideoEdit:
315
341
  else:
316
342
  meta_map = source_metadata
317
343
 
318
- segment_metas: list[VideoMetadata] = []
344
+ source_metas: list[VideoMetadata] = []
319
345
  for i, segment in enumerate(self.segments):
320
346
  source_key = str(segment.source_video)
321
347
  if source_key not in meta_map:
322
348
  raise ValueError(
323
349
  f"Segment {i}: no metadata provided for source '{source_key}'. Available keys: {sorted(meta_map)}"
324
350
  )
325
- segment_metas.append(self._validate_segment_with_metadata(i, segment, meta_map[source_key], context))
351
+ source_metas.append(self._validate_source_meta(i, segment, meta_map[source_key]))
352
+ source_metas = self._match_metas(source_metas)
353
+ segment_metas = [
354
+ self._apply_segment_meta_ops(i, seg, meta, context)
355
+ for i, (seg, meta) in enumerate(zip(self.segments, source_metas))
356
+ ]
326
357
  return self._validate_assembled(segment_metas, context)
327
358
 
328
359
  def _validate_assembled(
@@ -373,9 +404,10 @@ class VideoEdit:
373
404
  "effects": [_step_to_dict(record, include_apply=True) for record in segment.effect_records],
374
405
  }
375
406
 
376
- def _validate_segment(
377
- self, index: int, segment: SegmentConfig, runtime_context: dict[str, Any] | None = None
407
+ def _validate_source_meta(
408
+ self, index: int, segment: SegmentConfig, source_meta: VideoMetadata | None = None
378
409
  ) -> VideoMetadata:
410
+ """Validate segment bounds and return cut source metadata (no transforms/effects)."""
379
411
  ctx = f"Segment {index}"
380
412
  if segment.start_second < 0:
381
413
  raise ValueError(f"{ctx}: start_second ({segment.start_second}) must be >= 0")
@@ -383,42 +415,22 @@ class VideoEdit:
383
415
  raise ValueError(
384
416
  f"{ctx}: end_second ({segment.end_second}) must be > start_second ({segment.start_second})"
385
417
  )
386
-
387
- meta = VideoMetadata.from_path(str(segment.source_video))
418
+ meta = source_meta if source_meta is not None else VideoMetadata.from_path(str(segment.source_video))
388
419
  if segment.end_second > meta.total_seconds:
389
420
  raise ValueError(
390
421
  f"{ctx}: end_second ({segment.end_second}) exceeds source duration ({meta.total_seconds}s)"
391
422
  )
392
- meta = meta.cut(segment.start_second, segment.end_second)
423
+ return meta.cut(segment.start_second, segment.end_second)
393
424
 
394
- for record in segment.transform_records:
395
- meta = _predict_transform_metadata(
396
- meta, record.op_id, record.args, context=f"{ctx} ({record.op_id})", runtime_context=runtime_context
397
- )
398
- for record in segment.effect_records:
399
- _validate_effect_bounds(record, meta.total_seconds, context=ctx)
400
- return meta
401
-
402
- def _validate_segment_with_metadata(
425
+ def _apply_segment_meta_ops(
403
426
  self,
404
427
  index: int,
405
428
  segment: SegmentConfig,
406
- source_meta: VideoMetadata,
429
+ meta: VideoMetadata,
407
430
  runtime_context: dict[str, Any] | None = None,
408
431
  ) -> VideoMetadata:
432
+ """Apply per-segment transform/effect metadata predictions."""
409
433
  ctx = f"Segment {index}"
410
- if segment.start_second < 0:
411
- raise ValueError(f"{ctx}: start_second ({segment.start_second}) must be >= 0")
412
- if segment.end_second <= segment.start_second:
413
- raise ValueError(
414
- f"{ctx}: end_second ({segment.end_second}) must be > start_second ({segment.start_second})"
415
- )
416
- if segment.end_second > source_meta.total_seconds:
417
- raise ValueError(
418
- f"{ctx}: end_second ({segment.end_second}) exceeds source duration ({source_meta.total_seconds}s)"
419
- )
420
- meta = source_meta.cut(segment.start_second, segment.end_second)
421
-
422
434
  for record in segment.transform_records:
423
435
  meta = _predict_transform_metadata(
424
436
  meta, record.op_id, record.args, context=f"{ctx} ({record.op_id})", runtime_context=runtime_context
@@ -427,12 +439,41 @@ class VideoEdit:
427
439
  _validate_effect_bounds(record, meta.total_seconds, context=ctx)
428
440
  return meta
429
441
 
442
+ def _match_metas(self, metas: list[VideoMetadata]) -> list[VideoMetadata]:
443
+ """Apply matching to source metadata list."""
444
+ if len(metas) <= 1:
445
+ return metas
446
+ if self.match_to_lowest_fps:
447
+ min_fps = min(m.fps for m in metas)
448
+ metas = [m.resample_fps(min_fps) if m.fps != min_fps else m for m in metas]
449
+ if self.match_to_lowest_resolution:
450
+ min_w = min(m.width for m in metas)
451
+ min_h = min(m.height for m in metas)
452
+ metas = [m.resize(width=min_w, height=min_h) if (m.width, m.height) != (min_w, min_h) else m for m in metas]
453
+ return metas
454
+
430
455
  def _assemble_segments(self, context: dict[str, Any] | None = None) -> Video:
431
- result: Video | None = None
432
- for segment in self.segments:
433
- video = segment.process_segment(context)
434
- result = video if result is None else result + video
435
- assert result is not None
456
+ # Compute matching targets from source metadata before loading.
457
+ target_fps, target_w, target_h = None, None, None
458
+ if len(self.segments) > 1 and (self.match_to_lowest_fps or self.match_to_lowest_resolution):
459
+ source_metas = [VideoMetadata.from_path(str(seg.source_video)) for seg in self.segments]
460
+ if self.match_to_lowest_fps:
461
+ target_fps = min(m.fps for m in source_metas)
462
+ if self.match_to_lowest_resolution:
463
+ target_w = min(m.width for m in source_metas)
464
+ target_h = min(m.height for m in source_metas)
465
+
466
+ # Load segments with matching applied via ffmpeg, then apply per-segment ops.
467
+ videos = [
468
+ segment.apply_operations(
469
+ segment.load_segment(fps=target_fps, width=target_w, height=target_h),
470
+ context,
471
+ )
472
+ for segment in self.segments
473
+ ]
474
+ result = videos[0]
475
+ for video in videos[1:]:
476
+ result = result + video
436
477
  return result
437
478
 
438
479
 
@@ -781,15 +781,22 @@ class Video:
781
781
 
782
782
  @classmethod
783
783
  def from_path(
784
- cls, path: str, read_batch_size: int = 100, start_second: float | None = None, end_second: float | None = None
784
+ cls,
785
+ path: str,
786
+ read_batch_size: int = 100,
787
+ start_second: float | None = None,
788
+ end_second: float | None = None,
789
+ fps: float | None = None,
790
+ width: int | None = None,
791
+ height: int | None = None,
785
792
  ) -> Video:
786
793
  try:
787
794
  # Get video metadata using VideoMetadata.from_path
788
795
  metadata = VideoMetadata.from_path(path)
789
796
 
790
- width = metadata.width
791
- height = metadata.height
792
- fps = metadata.fps
797
+ out_width = width if width is not None else metadata.width
798
+ out_height = height if height is not None else metadata.height
799
+ out_fps = fps if fps is not None else metadata.fps
793
800
  total_duration = metadata.total_seconds
794
801
 
795
802
  # Validate time bounds
@@ -809,8 +816,8 @@ class Video:
809
816
  elif start_second is not None:
810
817
  segment_duration = total_duration - start_second
811
818
 
812
- estimated_frames = int(segment_duration * fps)
813
- estimated_bytes = estimated_frames * height * width * 3
819
+ estimated_frames = int(segment_duration * out_fps)
820
+ estimated_bytes = estimated_frames * out_height * out_width * 3
814
821
  estimated_gb = estimated_bytes / (1024**3)
815
822
  if estimated_gb > 10:
816
823
  warnings.warn(
@@ -836,6 +843,15 @@ class Video:
836
843
  elif end_second is not None:
837
844
  ffmpeg_cmd.extend(["-t", str(end_second)])
838
845
 
846
+ # Apply video filters for resize and fps resampling
847
+ vf_filters: list[str] = []
848
+ if width is not None or height is not None:
849
+ vf_filters.append(f"scale={out_width}:{out_height}")
850
+ if fps is not None and fps != metadata.fps:
851
+ vf_filters.append(f"fps={out_fps}")
852
+ if vf_filters:
853
+ ffmpeg_cmd.extend(["-vf", ",".join(vf_filters)])
854
+
839
855
  # Output format settings - removed problematic -vsync 0
840
856
  ffmpeg_cmd.extend(
841
857
  [
@@ -861,7 +877,7 @@ class Video:
861
877
  )
862
878
 
863
879
  # Calculate frame size in bytes
864
- frame_size = width * height * 3 # 3 bytes per pixel for RGB
880
+ frame_size = out_width * out_height * 3 # 3 bytes per pixel for RGB
865
881
 
866
882
  # Estimate frame count for pre-allocation
867
883
  if start_second is not None and end_second is not None:
@@ -874,10 +890,10 @@ class Video:
874
890
  estimated_duration = total_duration
875
891
 
876
892
  # Add buffer to handle frame rate variations and rounding
877
- estimated_frames = int(estimated_duration * fps * FRAME_BUFFER_MULTIPLIER) + FRAME_BUFFER_PADDING
893
+ estimated_frames = int(estimated_duration * out_fps * FRAME_BUFFER_MULTIPLIER) + FRAME_BUFFER_PADDING
878
894
 
879
895
  # Pre-allocate numpy array
880
- frames = np.empty((estimated_frames, height, width, 3), dtype=np.uint8)
896
+ frames = np.empty((estimated_frames, out_height, out_width, 3), dtype=np.uint8)
881
897
  frames_read = 0
882
898
 
883
899
  try:
@@ -896,20 +912,20 @@ class Video:
896
912
  batch_frames = np.frombuffer(batch_data, dtype=np.uint8)
897
913
 
898
914
  # Calculate how many complete frames we got
899
- complete_frames = len(batch_frames) // (height * width * 3)
915
+ complete_frames = len(batch_frames) // (out_height * out_width * 3)
900
916
 
901
917
  if complete_frames == 0:
902
918
  break
903
919
 
904
920
  # Only keep complete frames
905
- complete_data = batch_frames[: complete_frames * height * width * 3]
906
- batch_frames_array = complete_data.reshape(complete_frames, height, width, 3)
921
+ complete_data = batch_frames[: complete_frames * out_height * out_width * 3]
922
+ batch_frames_array = complete_data.reshape(complete_frames, out_height, out_width, 3)
907
923
 
908
924
  # Check if we have room in pre-allocated array
909
925
  if frames_read + complete_frames > estimated_frames:
910
926
  # Need to expand array - this should be rare with our buffer
911
927
  new_size = max(estimated_frames * 2, frames_read + complete_frames + 100)
912
- new_frames = np.empty((new_size, height, width, 3), dtype=np.uint8)
928
+ new_frames = np.empty((new_size, out_height, out_width, 3), dtype=np.uint8)
913
929
  new_frames[:frames_read] = frames[:frames_read]
914
930
  frames = new_frames
915
931
  estimated_frames = new_size
@@ -954,10 +970,10 @@ class Video:
954
970
  except (AudioLoadError, FileNotFoundError, subprocess.CalledProcessError):
955
971
  warnings.warn(f"No audio found for `{path}`, adding silent track.")
956
972
  # Create silent audio based on actual frames read
957
- segment_duration = frames_read / fps
973
+ segment_duration = frames_read / out_fps
958
974
  audio = Audio.create_silent(duration_seconds=round(segment_duration, 2), stereo=True, sample_rate=44100)
959
975
 
960
- return cls(frames=frames, fps=fps, audio=audio)
976
+ return cls(frames=frames, fps=out_fps, audio=audio)
961
977
 
962
978
  except VideoMetadataError:
963
979
  raise
File without changes
File without changes
File without changes