videopython 0.34.0__tar.gz → 0.35.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {videopython-0.34.0 → videopython-0.35.0}/PKG-INFO +4 -4
  2. {videopython-0.34.0 → videopython-0.35.0}/README.md +1 -1
  3. {videopython-0.34.0 → videopython-0.35.0}/pyproject.toml +4 -4
  4. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/image_text.py +274 -97
  5. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/__init__.py +2 -0
  6. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/effects.py +217 -1
  7. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/operation.py +12 -1
  8. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/transcription_overlay.py +12 -1
  9. {videopython-0.34.0 → videopython-0.35.0}/.gitignore +0 -0
  10. {videopython-0.34.0 → videopython-0.35.0}/LICENSE +0 -0
  11. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/__init__.py +0 -0
  12. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/__init__.py +0 -0
  13. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/_device.py +0 -0
  14. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/__init__.py +0 -0
  15. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/config.py +0 -0
  16. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/dubber.py +0 -0
  17. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/expressiveness.py +0 -0
  18. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/loudness.py +0 -0
  19. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/models.py +0 -0
  20. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/pipeline.py +0 -0
  21. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/quality.py +0 -0
  22. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/remux.py +0 -0
  23. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/timing.py +0 -0
  24. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/voice_sample.py +0 -0
  25. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/generation/__init__.py +0 -0
  26. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/generation/audio.py +0 -0
  27. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/generation/image.py +0 -0
  28. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/generation/qwen3.py +0 -0
  29. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/generation/translation.py +0 -0
  30. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/generation/video.py +0 -0
  31. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/transforms.py +0 -0
  32. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/understanding/__init__.py +0 -0
  33. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/understanding/audio.py +0 -0
  34. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/understanding/faces.py +0 -0
  35. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/understanding/image.py +0 -0
  36. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/understanding/separation.py +0 -0
  37. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/understanding/temporal.py +0 -0
  38. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/video_analysis/__init__.py +0 -0
  39. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/video_analysis/analyzer.py +0 -0
  40. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/video_analysis/models.py +0 -0
  41. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/video_analysis/sampling.py +0 -0
  42. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/video_analysis/stages.py +0 -0
  43. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/audio/__init__.py +0 -0
  44. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/audio/analysis.py +0 -0
  45. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/audio/audio.py +0 -0
  46. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/__init__.py +0 -0
  47. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/_dimensions.py +0 -0
  48. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/_ffmpeg.py +0 -0
  49. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/_video_io.py +0 -0
  50. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/description.py +0 -0
  51. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/exceptions.py +0 -0
  52. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/fonts/DejaVuSans.ttf +0 -0
  53. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/fonts/LICENSE_DEJAVU +0 -0
  54. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/fonts/__init__.py +0 -0
  55. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/transcription.py +0 -0
  56. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/video.py +0 -0
  57. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/streaming.py +0 -0
  58. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/transforms.py +0 -0
  59. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/video_edit.py +0 -0
  60. {videopython-0.34.0 → videopython-0.35.0}/src/videopython/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videopython
3
- Version: 0.34.0
3
+ Version: 0.35.0
4
4
  Summary: Minimal video generation and processing library.
5
5
  Project-URL: Homepage, https://videopython.com
6
6
  Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -12,15 +12,15 @@ Keywords: ai,editing,generation,movie,opencv,python,shorts,video,videopython
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
13
  Classifier: Operating System :: OS Independent
14
14
  Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.10
16
15
  Classifier: Programming Language :: Python :: 3.11
17
16
  Classifier: Programming Language :: Python :: 3.12
18
17
  Classifier: Programming Language :: Python :: 3.13
19
- Requires-Python: <3.14,>=3.10
18
+ Requires-Python: <3.14,>=3.11
20
19
  Requires-Dist: numpy>=1.25.2
21
20
  Requires-Dist: opencv-python-headless>=4.9.0.80
22
21
  Requires-Dist: pillow>=12.1.1
23
22
  Requires-Dist: pydantic>=2.8.0
23
+ Requires-Dist: resvg-py>=0.3.2
24
24
  Requires-Dist: tqdm>=4.66.3
25
25
  Provides-Extra: ai
26
26
  Requires-Dist: accelerate>=0.29.2; extra == 'ai'
@@ -67,7 +67,7 @@ pip install videopython # core video/audio editing
67
67
  pip install "videopython[ai]" # + local AI features (GPU recommended)
68
68
  ```
69
69
 
70
- Python `>=3.10, <3.14`. AI features run locally — no cloud API keys required, but model weights are downloaded on first use.
70
+ Python `>=3.11, <3.14`. AI features run locally — no cloud API keys required, but model weights are downloaded on first use.
71
71
 
72
72
  ## Quick Start
73
73
 
@@ -18,7 +18,7 @@ pip install videopython # core video/audio editing
18
18
  pip install "videopython[ai]" # + local AI features (GPU recommended)
19
19
  ```
20
20
 
21
- Python `>=3.10, <3.14`. AI features run locally — no cloud API keys required, but model weights are downloaded on first use.
21
+ Python `>=3.11, <3.14`. AI features run locally — no cloud API keys required, but model weights are downloaded on first use.
22
22
 
23
23
  ## Quick Start
24
24
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "videopython"
3
- version = "0.34.0"
3
+ version = "0.35.0"
4
4
  description = "Minimal video generation and processing library."
5
5
  authors = [
6
6
  { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -9,7 +9,7 @@ authors = [
9
9
  ]
10
10
  license = { text = "Apache-2.0" }
11
11
  readme = "README.md"
12
- requires-python = ">=3.10, <3.14"
12
+ requires-python = ">=3.11, <3.14"
13
13
  keywords = [
14
14
  "python",
15
15
  "videopython",
@@ -24,7 +24,6 @@ keywords = [
24
24
  classifiers = [
25
25
  "License :: OSI Approved :: Apache Software License",
26
26
  "Programming Language :: Python :: 3",
27
- "Programming Language :: Python :: 3.10",
28
27
  "Programming Language :: Python :: 3.11",
29
28
  "Programming Language :: Python :: 3.12",
30
29
  "Programming Language :: Python :: 3.13",
@@ -35,6 +34,7 @@ dependencies = [
35
34
  "numpy>=1.25.2",
36
35
  "opencv-python-headless>=4.9.0.80",
37
36
  "pillow>=12.1.1",
37
+ "resvg-py>=0.3.2",
38
38
  "tqdm>=4.66.3",
39
39
  "pydantic>=2.8.0",
40
40
  ]
@@ -203,7 +203,7 @@ markers = [
203
203
 
204
204
  [tool.ruff]
205
205
  line-length = 120
206
- target-version = "py310"
206
+ target-version = "py311"
207
207
 
208
208
  [tool.ruff.lint]
209
209
  select = [
@@ -96,6 +96,17 @@ class TextBoxRect:
96
96
  callers short-circuit such boxes (nothing to draw). ``width`` mirrors the
97
97
  resolved ``box_width`` and may be a float when an absolute >1 value was
98
98
  passed, matching legacy behaviour.
99
+
100
+ ``content_width`` is the widest a rendered line actually gets -- worst
101
+ case over the animated highlight when ``highlight_size_multiplier > 1``.
102
+
103
+ There are two independent notions of "fitting" here. ``fits`` is
104
+ box-vs-image *only* -- the legacy contract that gates
105
+ :meth:`write_text_box`'s ``OutOfBoundsError`` -- and does **not** imply
106
+ the content fits the box: legacy callers intentionally overflow the box
107
+ while staying inside the image. A caller that needs the content inside
108
+ the box (subtitles, where the box is frame-clamped) must additionally
109
+ check ``content_width <= width`` itself.
99
110
  """
100
111
 
101
112
  x: float
@@ -104,6 +115,27 @@ class TextBoxRect:
104
115
  height: int
105
116
  fits: bool
106
117
  lines: tuple[str, ...]
118
+ content_width: int = 0
119
+
120
+
121
+ @dataclass(frozen=True)
122
+ class _WordPlacement:
123
+ """One word's resolved font/size and pixel offset within a highlighted line.
124
+
125
+ ``dx``/``dy`` are offsets from the line's left/top. Produced once by
126
+ :meth:`ImageText._layout_highlighted_line` and consumed by both the
127
+ measurer and the renderer, so the box reserved by ``measure_text_box``
128
+ and the pixels drawn by ``write_text_box`` cannot disagree.
129
+ """
130
+
131
+ word: str
132
+ font_filename: str | None
133
+ font_size: int
134
+ width: int
135
+ height: int
136
+ dx: int
137
+ dy: int
138
+ is_highlighted: bool
107
139
 
108
140
 
109
141
  class ImageText:
@@ -614,16 +646,22 @@ class ImageText:
614
646
  font_size: int = 11,
615
647
  anchor: AnchorPoint = AnchorPoint.TOP_LEFT,
616
648
  margin: MarginType = 0,
649
+ highlight_size_multiplier: float = 1.0,
650
+ highlight_bold_font: str | None = None,
617
651
  ) -> TextBoxRect:
618
652
  """Measure where a wrapped text box would land, without drawing it.
619
653
 
620
654
  Pure: resolves margins/box-width/position, wraps the text, applies the
621
655
  anchor, and bounds-checks against the image — the exact math
622
- :meth:`write_text_box` used to do inline. Highlighting and per-line
623
- alignment (``place``) do not change the box envelope, so they are not
624
- parameters here; this intentionally preserves the pre-existing
625
- behaviour that an enlarged highlighted word is *not* accounted for in
626
- the fit check.
656
+ :meth:`write_text_box` used to do inline.
657
+
658
+ ``highlight_size_multiplier > 1`` makes the measurement worst-case for
659
+ an *animated* highlight (any word may be the enlarged one over the
660
+ cue's lifetime): wrapping reserves room so even an enlarged word keeps
661
+ its line within ``box_width``, and ``height`` uses each line's tallest
662
+ possible highlighted variant. With the default ``1.0`` the result is
663
+ byte-identical to the plain base-font measurement, so existing callers
664
+ and ``place`` (alignment) are unaffected.
627
665
 
628
666
  Returns:
629
667
  A :class:`TextBoxRect`. ``fits`` is ``False`` when the box would
@@ -655,15 +693,32 @@ class ImageText:
655
693
  # Calculate initial position based on margin and anchor before splitting text
656
694
  x_pos, y_pos = self._convert_position(xy, margin_top, margin_left, available_width, available_height)
657
695
 
658
- # Split text into lines that fit within box_width
696
+ # Wrap at the real box width (same as the renderer).
659
697
  lines = self._split_lines_by_width(text, font_filename, font_size, int(box_width))
660
698
 
661
- # Calculate total height of all lines
662
- lines_height = sum(self.get_text_dimensions(font_filename, font_size, line)[1] for line in lines)
699
+ # Per-line extent. With an animated highlight any word may be the
700
+ # enlarged one over the cue's lifetime, so each line contributes the
701
+ # widest/tallest variant it could ever render as.
702
+ # ``_highlighted_line_max_extent`` derives that envelope from the same
703
+ # per-word geometry the renderer uses (single source of truth).
704
+ hl_mult = max(1.0, highlight_size_multiplier)
705
+ content_width = 0
706
+ lines_height = 0
707
+ for line in lines:
708
+ if hl_mult > 1.0:
709
+ line_w, line_h = self._highlighted_line_max_extent(
710
+ line, font_filename, font_size, hl_mult, highlight_bold_font
711
+ )
712
+ else:
713
+ line_w, line_h = self.get_text_dimensions(font_filename, font_size, line)
714
+ content_width = max(content_width, line_w)
715
+ lines_height += line_h
663
716
  if lines_height == 0:
664
717
  # No renderable lines (e.g. whitespace-only text); position is the
665
718
  # unadjusted insertion point and the box trivially "fits".
666
- return TextBoxRect(x=x_pos, y=y_pos, width=box_width, height=0, fits=True, lines=tuple(lines))
719
+ return TextBoxRect(
720
+ x=x_pos, y=y_pos, width=box_width, height=0, fits=True, lines=tuple(lines), content_width=0
721
+ )
667
722
 
668
723
  # Final position calculation based on anchor point
669
724
  if anchor in AnchorPoint.center_anchors():
@@ -682,7 +737,15 @@ class ImageText:
682
737
  or x_pos + box_width > self.image_size[1]
683
738
  or y_pos + lines_height > self.image_size[0]
684
739
  )
685
- return TextBoxRect(x=x_pos, y=y_pos, width=box_width, height=lines_height, fits=fits, lines=tuple(lines))
740
+ return TextBoxRect(
741
+ x=x_pos,
742
+ y=y_pos,
743
+ width=box_width,
744
+ height=lines_height,
745
+ fits=fits,
746
+ lines=tuple(lines),
747
+ content_width=content_width,
748
+ )
686
749
 
687
750
  def write_text_box(
688
751
  self,
@@ -761,7 +824,11 @@ class ImageText:
761
824
  if highlight_word_index is not None and highlight_color is None:
762
825
  highlight_color = text_color
763
826
 
764
- # Measure (single source of truth for box geometry), then render.
827
+ # Measure (single source of truth for box geometry), then render. When
828
+ # a word will be highlighted, measure worst-case so the box reserves
829
+ # room for the enlarged word -- otherwise stay byte-identical to the
830
+ # plain base-font measurement.
831
+ measure_mult = highlight_size_multiplier if highlight_word_index is not None else 1.0
765
832
  rect = self.measure_text_box(
766
833
  text=text,
767
834
  font_filename=font_filename,
@@ -770,6 +837,8 @@ class ImageText:
770
837
  font_size=font_size,
771
838
  anchor=anchor,
772
839
  margin=margin,
840
+ highlight_size_multiplier=measure_mult,
841
+ highlight_bold_font=highlight_bold_font,
773
842
  )
774
843
  lines = list(rect.lines)
775
844
  if rect.height == 0:
@@ -783,56 +852,53 @@ class ImageText:
783
852
  f"Text box with size ({box_width}x{lines_height}) at position ({x_pos}, {y_pos}) is out of bounds!"
784
853
  )
785
854
 
786
- # Write lines
855
+ # Write lines. The line that holds the highlighted word is positioned
856
+ # and advanced by its *true* (enlarged) extent via the shared
857
+ # ``_highlighted_line_size`` -- the same numbers ``measure_text_box``
858
+ # reserved -- so an enlarged word can never push the line out of the
859
+ # box (hence out of the frame) regardless of alignment.
787
860
  current_text_height = y_pos
788
861
  word_index_offset = 0 # Track global word index across lines
789
862
  for line in lines:
790
- line_dimensions = self.get_text_dimensions(font_filename, font_size, line)
863
+ line_words = line.split()
864
+ hl_local_index = -1
865
+ if highlight_word_index is not None:
866
+ line_end_word_index = word_index_offset + len(line_words) - 1
867
+ if word_index_offset <= highlight_word_index <= line_end_word_index:
868
+ hl_local_index = highlight_word_index - word_index_offset
791
869
 
792
- # Calculate horizontal position based on alignment
870
+ if hl_local_index >= 0:
871
+ line_w, line_h = self._highlighted_line_size(
872
+ line, font_filename, font_size, hl_local_index, highlight_size_multiplier, highlight_bold_font
873
+ )
874
+ else:
875
+ line_w, line_h = self.get_text_dimensions(font_filename, font_size, line)
876
+
877
+ # Calculate horizontal position based on alignment (true line width)
793
878
  if place == TextAlign.LEFT:
794
879
  x_left = x_pos
795
880
  elif place == TextAlign.RIGHT:
796
- x_left = x_pos + box_width - line_dimensions[0]
881
+ x_left = x_pos + box_width - line_w
797
882
  elif place == TextAlign.CENTER:
798
- x_left = int(x_pos + ((box_width - line_dimensions[0]) / 2))
883
+ x_left = int(x_pos + ((box_width - line_w) / 2))
799
884
  else:
800
885
  valid_places = [e.value for e in TextAlign]
801
886
  raise ValueError(f"Place '{place}' is not supported. Must be one of: {', '.join(valid_places)}")
802
887
 
803
- # Check if highlighting is needed for this line
804
- if highlight_word_index is not None:
805
- line_words = line.split()
806
- line_start_word_index = word_index_offset
807
- line_end_word_index = word_index_offset + len(line_words) - 1
808
-
809
- # Check if the highlighted word is in this line
810
- if line_start_word_index <= highlight_word_index <= line_end_word_index:
811
- self._write_line_with_highlight(
812
- line=line,
813
- font_filename=font_filename,
814
- font_size=font_size,
815
- font_border_size=font_border_size,
816
- text_color=text_color,
817
- highlight_color=highlight_color or (255, 255, 255),
818
- highlight_size_multiplier=highlight_size_multiplier,
819
- highlight_word_local_index=highlight_word_index - line_start_word_index,
820
- highlight_bold_font=highlight_bold_font,
821
- x_left=int(x_left),
822
- y_top=int(current_text_height),
823
- )
824
- else:
825
- # Write normal line without highlighting
826
- self.write_text(
827
- text=line,
828
- font_filename=font_filename,
829
- xy=(x_left, current_text_height),
830
- font_size=font_size,
831
- font_border_size=font_border_size,
832
- color=text_color,
833
- )
834
-
835
- word_index_offset += len(line_words)
888
+ if hl_local_index >= 0:
889
+ self._write_line_with_highlight(
890
+ line=line,
891
+ font_filename=font_filename,
892
+ font_size=font_size,
893
+ font_border_size=font_border_size,
894
+ text_color=text_color,
895
+ highlight_color=highlight_color or (255, 255, 255),
896
+ highlight_size_multiplier=highlight_size_multiplier,
897
+ highlight_word_local_index=hl_local_index,
898
+ highlight_bold_font=highlight_bold_font,
899
+ x_left=int(x_left),
900
+ y_top=int(current_text_height),
901
+ )
836
902
  else:
837
903
  # Write normal line without highlighting
838
904
  self.write_text(
@@ -844,8 +910,9 @@ class ImageText:
844
910
  color=text_color,
845
911
  )
846
912
 
847
- # Increment vertical position for next line
848
- current_text_height += line_dimensions[1]
913
+ word_index_offset += len(line_words)
914
+ # Increment vertical position for next line (true line height)
915
+ current_text_height += line_h
849
916
 
850
917
  # Add background color for the text if specified
851
918
  if background_color is not None:
@@ -921,6 +988,148 @@ class ImageText:
921
988
 
922
989
  return (int(x_pos + box_width), int(current_text_height))
923
990
 
991
+ def _highlight_font(
992
+ self,
993
+ font_filename: str | None,
994
+ font_size: int,
995
+ highlight_size_multiplier: float,
996
+ highlight_bold_font: str | None,
997
+ ) -> tuple[str | None, int, int, int]:
998
+ """Resolve the enlarged-word basics once.
999
+
1000
+ Returns ``(font_file, font_size, baseline_offset, space_width)`` -- the
1001
+ single definition of the highlight constants, shared by the per-word
1002
+ layout (render / exact-size path) and the worst-case extent (measure
1003
+ path) so those paths cannot drift apart on the fundamentals.
1004
+ """
1005
+ hl_font_size = int(font_size * highlight_size_multiplier)
1006
+ hl_font_file = highlight_bold_font if highlight_bold_font is not None else font_filename
1007
+ baseline_offset = self._get_font_baseline_offset(font_filename, font_size, hl_font_file, hl_font_size)
1008
+ space_width = self.get_text_dimensions(font_filename, font_size, " ")[0]
1009
+ return hl_font_file, hl_font_size, baseline_offset, space_width
1010
+
1011
+ def _layout_highlighted_line(
1012
+ self,
1013
+ line: str,
1014
+ font_filename: str | None,
1015
+ font_size: int,
1016
+ highlight_word_local_index: int,
1017
+ highlight_size_multiplier: float,
1018
+ highlight_bold_font: str | None,
1019
+ ) -> list[_WordPlacement]:
1020
+ """Per-word placement for ``line`` with one word enlarged.
1021
+
1022
+ The single source of truth for the highlighted-line advance (enlarged
1023
+ font size, bold-font swap, base-size inter-word space, baseline
1024
+ offset). Both :meth:`_highlighted_line_size` (measuring the line that
1025
+ actually owns the highlight) and :meth:`_write_line_with_highlight`
1026
+ (rendering it) consume this list, so the reserved box and the drawn
1027
+ pixels agree by construction.
1028
+
1029
+ Reached only for the line that owns the highlighted word, so
1030
+ ``highlight_word_local_index`` is in range; degenerate inputs are
1031
+ handled by the callers' own guards.
1032
+ """
1033
+ words = line.split()
1034
+ hl_font_file, hl_font_size, baseline_offset, space_width = self._highlight_font(
1035
+ font_filename, font_size, highlight_size_multiplier, highlight_bold_font
1036
+ )
1037
+ placements: list[_WordPlacement] = []
1038
+ dx = 0
1039
+ for i, word in enumerate(words):
1040
+ is_hl = i == highlight_word_local_index
1041
+ wf = hl_font_file if is_hl else font_filename
1042
+ ws = hl_font_size if is_hl else font_size
1043
+ w, h = self.get_text_dimensions(wf, ws, word)
1044
+ placements.append(
1045
+ _WordPlacement(
1046
+ word=word,
1047
+ font_filename=wf,
1048
+ font_size=ws,
1049
+ width=w,
1050
+ height=h,
1051
+ dx=dx,
1052
+ dy=baseline_offset if is_hl else 0,
1053
+ is_highlighted=is_hl,
1054
+ )
1055
+ )
1056
+ dx += w
1057
+ if i < len(words) - 1:
1058
+ dx += space_width
1059
+ return placements
1060
+
1061
+ def _highlighted_line_size(
1062
+ self,
1063
+ line: str,
1064
+ font_filename: str | None,
1065
+ font_size: int,
1066
+ highlight_word_local_index: int,
1067
+ highlight_size_multiplier: float,
1068
+ highlight_bold_font: str | None,
1069
+ ) -> tuple[int, int]:
1070
+ """Rendered (width, height) of ``line`` with one *specific* word enlarged.
1071
+
1072
+ A reduction of the shared :meth:`_layout_highlighted_line`, so it is
1073
+ exact w.r.t. the renderer by construction. Used to position/advance
1074
+ the line that owns the highlighted word. ``highlight_word_local_index``
1075
+ out of range falls back to the plain line size -- exactly what the
1076
+ renderer's own guard ends up drawing.
1077
+ """
1078
+ words = line.split()
1079
+ if not words:
1080
+ return (0, 0)
1081
+ if not (0 <= highlight_word_local_index < len(words)):
1082
+ return self.get_text_dimensions(font_filename, font_size, line)
1083
+ placements = self._layout_highlighted_line(
1084
+ line, font_filename, font_size, highlight_word_local_index, highlight_size_multiplier, highlight_bold_font
1085
+ )
1086
+ width = max(p.dx + p.width for p in placements)
1087
+ # ``min(0, ...)`` / ``max(0, ...)`` stay defensive for a *shrinking*
1088
+ # highlight (multiplier < 1 -> negative baseline offset, the word
1089
+ # rides above the line). The subtitle measure path clamps the
1090
+ # multiplier to >= 1 so there ``top`` is always 0, but
1091
+ # ``write_text_box`` forwards the raw multiplier, so keep the floor.
1092
+ top = min([0, *(p.dy for p in placements)])
1093
+ bottom = max([0, *(p.dy + p.height for p in placements)])
1094
+ return (width, bottom - top)
1095
+
1096
+ def _highlighted_line_max_extent(
1097
+ self,
1098
+ line: str,
1099
+ font_filename: str | None,
1100
+ font_size: int,
1101
+ highlight_size_multiplier: float,
1102
+ highlight_bold_font: str | None,
1103
+ ) -> tuple[int, int]:
1104
+ """Worst-case (width, height) over *any* word being the enlarged one.
1105
+
1106
+ Equal to ``max`` of :meth:`_highlighted_line_size` across every word
1107
+ position -- the envelope an animated highlight needs -- but in a
1108
+ single O(words) pass instead of O(words^2): only *which* word is
1109
+ enlarged varies, so the base metrics are shared and the extremes are
1110
+ closed-form. Uses the same :meth:`_highlight_font` constants as the
1111
+ layout, so this envelope can never under-reserve what the renderer
1112
+ draws (it over-reserves only in the safe direction).
1113
+ """
1114
+ words = line.split()
1115
+ if not words:
1116
+ return self.get_text_dimensions(font_filename, font_size, line)
1117
+ hl_font_file, hl_font_size, baseline_offset, space_width = self._highlight_font(
1118
+ font_filename, font_size, highlight_size_multiplier, highlight_bold_font
1119
+ )
1120
+ base = [self.get_text_dimensions(font_filename, font_size, w) for w in words]
1121
+ enlarged = [self.get_text_dimensions(hl_font_file, hl_font_size, w) for w in words]
1122
+ # width_k = (sum of base widths + spaces) - base_w[k] + enlarged_w[k];
1123
+ # the worst k just maximizes the (enlarged - base) swap.
1124
+ base_total = sum(w for w, _ in base) + space_width * (len(words) - 1)
1125
+ width = base_total + max(ew - bw for (bw, _), (ew, _) in zip(base, enlarged))
1126
+ # Non-highlighted words sit at dy=0, the enlarged one at
1127
+ # dy=baseline_offset; the worst line is the tallest base word vs. the
1128
+ # tallest enlarged word lifted by the baseline offset.
1129
+ top = min(0, baseline_offset)
1130
+ bottom = max([0, *(h for _, h in base), baseline_offset + max(h for _, h in enlarged)])
1131
+ return (width, bottom - top)
1132
+
924
1133
  def _write_line_with_highlight(
925
1134
  self,
926
1135
  line: str,
@@ -936,7 +1145,11 @@ class ImageText:
936
1145
  y_top: int,
937
1146
  ) -> None:
938
1147
  """
939
- Write a line of text with one word highlighted using word-by-word rendering with baseline alignment.
1148
+ Write a line of text with one word highlighted, word-by-word with baseline alignment.
1149
+
1150
+ Draws the placements from the shared :meth:`_layout_highlighted_line`,
1151
+ so every pixel lands exactly where :meth:`measure_text_box` reserved
1152
+ room for it (measurement and rendering use the same geometry).
940
1153
 
941
1154
  Args:
942
1155
  line: The text line to render
@@ -951,58 +1164,22 @@ class ImageText:
951
1164
  x_left: Left x position for the line
952
1165
  y_top: Top y position for the line
953
1166
  """
954
- # Split line into words
955
1167
  words = line.split()
956
1168
  if highlight_word_local_index >= len(words):
957
- return # Safety check
958
-
959
- # Calculate highlighted font size and determine font files
960
- highlight_font_size = int(font_size * highlight_size_multiplier)
961
- highlight_font_file = highlight_bold_font if highlight_bold_font is not None else font_filename
1169
+ return # Safety check: nothing to draw (matches the measure fallback)
962
1170
 
963
- # Calculate baseline offset for highlighted words (using the appropriate font files)
964
- baseline_offset = self._get_font_baseline_offset(
965
- font_filename, font_size, highlight_font_file, highlight_font_size
966
- )
967
-
968
- # Render words one by one with proper spacing
969
- current_x = x_left
970
-
971
- for i, word in enumerate(words):
972
- # Determine if this is the highlighted word
973
- is_highlighted = i == highlight_word_local_index
974
-
975
- # Choose font file, size, and color based on highlighting
976
- word_font_file = highlight_font_file if is_highlighted else font_filename
977
- word_font_size = highlight_font_size if is_highlighted else font_size
978
- word_color = highlight_color if is_highlighted else text_color
979
-
980
- # Calculate y position with baseline alignment
981
- word_y = y_top
982
- if is_highlighted:
983
- word_y += baseline_offset
984
-
985
- # Render the word
1171
+ for p in self._layout_highlighted_line(
1172
+ line, font_filename, font_size, highlight_word_local_index, highlight_size_multiplier, highlight_bold_font
1173
+ ):
986
1174
  self.write_text(
987
- text=word,
988
- font_filename=word_font_file,
989
- xy=(current_x, word_y),
990
- font_size=word_font_size,
1175
+ text=p.word,
1176
+ font_filename=p.font_filename,
1177
+ xy=(x_left + p.dx, y_top + p.dy),
1178
+ font_size=p.font_size,
991
1179
  font_border_size=font_border_size,
992
- color=word_color,
1180
+ color=highlight_color if p.is_highlighted else text_color,
993
1181
  )
994
1182
 
995
- # Calculate the width of this word for spacing
996
- word_width = self.get_text_dimensions(word_font_file, word_font_size, word)[0]
997
-
998
- # Update current_x for next word (add word width plus space)
999
- current_x += word_width
1000
-
1001
- # Add space between words (except after the last word)
1002
- if i < len(words) - 1:
1003
- space_width = self.get_text_dimensions(font_filename, font_size, " ")[0]
1004
- current_x += space_width
1005
-
1006
1183
  def _find_smallest_bounding_rect(self, mask: np.ndarray) -> tuple[int, int, int, int]:
1007
1184
  """
1008
1185
  Find the smallest bounding rectangle containing non-zero values in the mask.
@@ -8,6 +8,7 @@ from .effects import (
8
8
  Flash,
9
9
  FullImageOverlay,
10
10
  Glitch,
11
+ ImageOverlay,
11
12
  Kaleidoscope,
12
13
  KenBurns,
13
14
  MirrorFlip,
@@ -56,6 +57,7 @@ __all__ = [
56
57
  "SilenceRemoval",
57
58
  # Effects
58
59
  "FullImageOverlay",
60
+ "ImageOverlay",
59
61
  "Blur",
60
62
  "Zoom",
61
63
  "ColorGrading",
@@ -14,6 +14,7 @@ audio after ``_apply`` returns.
14
14
  from __future__ import annotations
15
15
 
16
16
  import logging
17
+ from io import BytesIO
17
18
  from pathlib import Path
18
19
  from typing import TYPE_CHECKING, Any, ClassVar, Literal
19
20
 
@@ -29,13 +30,14 @@ from videopython.editing.operation import Effect
29
30
 
30
31
  if TYPE_CHECKING:
31
32
  from videopython.audio import Audio
32
- from videopython.base.video import Video
33
+ from videopython.base.video import Video, VideoMetadata
33
34
 
34
35
  logger = logging.getLogger(__name__)
35
36
 
36
37
  __all__ = [
37
38
  "Effect",
38
39
  "FullImageOverlay",
40
+ "ImageOverlay",
39
41
  "Blur",
40
42
  "Zoom",
41
43
  "ColorGrading",
@@ -771,6 +773,220 @@ class TextOverlay(Effect):
771
773
  return video
772
774
 
773
775
 
776
+ class ImageOverlay(Effect):
777
+ """Composites a scaled image at an anchored position on every frame in the window.
778
+
779
+ A resolution-independent watermark / logo / brand mark. Unlike
780
+ :class:`FullImageOverlay` (full-frame only, raises on size mismatch), the
781
+ image is scaled to a fraction of the frame *width* and placed at an
782
+ anchored normalized position, so one config works across 1080p / 4k /
783
+ vertical / square. Loaded just-in-time from ``source`` so the op stays
784
+ JSON-serialisable. Off-frame or oversized placement clips to a partial
785
+ paste or a no-op -- the same contract as :class:`TextOverlay`, never an
786
+ error; only an unreadable ``source`` is rejected (in ``predict_metadata``).
787
+
788
+ ``source`` may be a raster image (PNG/JPEG/WebP) or an SVG (detected by the
789
+ ``.svg`` extension). An SVG is rasterised by ``resvg`` *at the exact target
790
+ pixel width* -- crisp at any frame size, not a blurry upscale of a
791
+ fixed-size bitmap -- with a transparent background and no remote-resource
792
+ fetching (the local path only; no SSRF). SVGs containing text depend on the
793
+ fonts available at render time.
794
+ """
795
+
796
+ op: Literal["image_overlay"] = "image_overlay"
797
+ streamable: ClassVar[bool] = True
798
+
799
+ source: Path = Field(
800
+ description=(
801
+ "Path to an image file: a raster RGB/RGBA image (PNG/JPEG/WebP) or "
802
+ "an SVG (`.svg`, rasterised at the target resolution). Loaded at "
803
+ "apply time; kept JSON-serialisable as a path."
804
+ ),
805
+ )
806
+ scale: float = Field(
807
+ 0.15,
808
+ gt=0,
809
+ le=1,
810
+ description=(
811
+ "Overlay width as a fraction of frame width (0-1). Height follows "
812
+ "the image's aspect ratio. Resolution-independent."
813
+ ),
814
+ )
815
+ opacity: float = Field(
816
+ 1.0,
817
+ ge=0,
818
+ le=1,
819
+ description="Multiplies the image's own alpha. 0 = fully transparent, 1 = use the image alpha unchanged.",
820
+ )
821
+ position: tuple[float, float] = Field(
822
+ (0.95, 0.95),
823
+ description=(
824
+ "Where to place the overlay as normalized (x, y) coordinates. "
825
+ "(0, 0) = top-left corner, (1, 1) = bottom-right corner."
826
+ ),
827
+ )
828
+ anchor: Literal["center", "top_left", "top_center", "bottom_center", "bottom_left", "bottom_right"] = Field(
829
+ "bottom_right",
830
+ description="Which point of the overlay box sits at the position coordinate.",
831
+ )
832
+
833
+ _overlay_rgba: np.ndarray | None = PrivateAttr(default=None)
834
+ _svg_cache: dict[int, np.ndarray] = PrivateAttr(default_factory=dict)
835
+ _stream_noop: bool = PrivateAttr(default=False)
836
+ _stream_alpha: np.ndarray | None = PrivateAttr(default=None)
837
+ _stream_rgb: np.ndarray | None = PrivateAttr(default=None)
838
+ _stream_dst: tuple[int, int, int, int] = PrivateAttr(default=(0, 0, 0, 0))
839
+
840
+ @model_validator(mode="after")
841
+ def _validate_position(self) -> ImageOverlay:
842
+ if not (0.0 <= self.position[0] <= 1.0 and 0.0 <= self.position[1] <= 1.0):
843
+ raise ValueError("position values must be in range [0, 1]")
844
+ return self
845
+
846
+ def _is_svg(self) -> bool:
847
+ return self.source.suffix.lower() == ".svg"
848
+
849
+ def predict_metadata(self, meta: VideoMetadata, **_context: Any) -> VideoMetadata:
850
+ """Reject only a missing/unreadable ``source`` (see :meth:`Operation.predict_metadata`).
851
+
852
+ An unreadable source is the one failure ``run()`` cannot survive -- it
853
+ would raise mid-stream after expensive frame decode -- so it is caught
854
+ at ``validate()`` time, symmetric with ``TranscriptionOverlay``.
855
+ Geometry (oversized / off-frame) is deliberately *not* checked here: it
856
+ clips to a valid no-op like :class:`TextOverlay`, so rejecting it would
857
+ break that contract and the parity with the op this is modeled on. Both
858
+ checks are cheap (a header ``verify()`` / a 1px SVG parse, no full
859
+ decode), so ``validate()`` stays frame-free.
860
+ """
861
+ try:
862
+ if self._is_svg():
863
+ import resvg_py
864
+
865
+ resvg_py.svg_to_bytes(svg_path=str(self.source), width=1)
866
+ else:
867
+ with Image.open(self.source) as im:
868
+ im.verify()
869
+ except (OSError, ValueError) as exc:
870
+ raise ValueError(f"image_overlay source {str(self.source)!r} is not a readable image: {exc}") from exc
871
+ return meta
872
+
873
+ def _rasterize_svg(self, target_w: int) -> np.ndarray:
874
+ cached = self._svg_cache.get(target_w)
875
+ if cached is not None:
876
+ return cached
877
+ # Lazy import: only when an SVG source is actually used. resvg renders
878
+ # at the exact target width (height proportional to the viewBox) with a
879
+ # transparent background and never fetches remote resources.
880
+ import resvg_py
881
+
882
+ png = resvg_py.svg_to_bytes(svg_path=str(self.source), width=target_w)
883
+ arr = np.array(Image.open(BytesIO(bytes(png))).convert("RGBA"), dtype=np.uint8)
884
+ self._svg_cache[target_w] = arr
885
+ return arr
886
+
887
+ def _load_overlay(self) -> np.ndarray:
888
+ if self._overlay_rgba is not None:
889
+ return self._overlay_rgba
890
+ img = Image.open(self.source).convert("RGBA")
891
+ self._overlay_rgba = np.array(img, dtype=np.uint8)
892
+ return self._overlay_rgba
893
+
894
+ def _compute_position(self, frame_width: int, frame_height: int, img_w: int, img_h: int) -> tuple[int, int]:
895
+ # Copied verbatim from TextOverlay: ImageOverlay's anchor Literal is
896
+ # deliberately the same set, so the geometry is shared by construction.
897
+ px = int(self.position[0] * frame_width)
898
+ py = int(self.position[1] * frame_height)
899
+
900
+ if self.anchor == "center":
901
+ return px - img_w // 2, py - img_h // 2
902
+ if self.anchor == "top_left":
903
+ return px, py
904
+ if self.anchor == "top_center":
905
+ return px - img_w // 2, py
906
+ if self.anchor == "bottom_center":
907
+ return px - img_w // 2, py - img_h
908
+ if self.anchor == "bottom_left":
909
+ return px, py - img_h
910
+ # bottom_right
911
+ return px - img_w, py - img_h
912
+
913
+ def _resized_overlay(self, frame_w: int) -> np.ndarray:
914
+ target_w = max(1, round(self.scale * frame_w))
915
+ if self._is_svg():
916
+ # Rasterise the vector at the target size (crisp) rather than
917
+ # upscaling a fixed bitmap. resvg derives height from the viewBox.
918
+ return self._rasterize_svg(target_w)
919
+ overlay = self._load_overlay()
920
+ src_h, src_w = overlay.shape[:2]
921
+ target_h = max(1, round(target_w * src_h / src_w))
922
+ if (target_w, target_h) == (src_w, src_h):
923
+ return overlay
924
+ resized = Image.fromarray(overlay).resize((target_w, target_h), Image.LANCZOS)
925
+ return np.array(resized, dtype=np.uint8)
926
+
927
+ def _blend_params(
928
+ self, frame_w: int, frame_h: int
929
+ ) -> tuple[np.ndarray, np.ndarray, tuple[int, int, int, int]] | None:
930
+ """Placement + blend inputs shared by the eager and streaming paths.
931
+
932
+ Single source of truth so the two paths cannot drift -- the
933
+ eager/stream parity-hole class of bug fixed in 0.34.1. Returns ``None``
934
+ when the overlay lands fully off-frame (the effect is a no-op).
935
+ """
936
+ overlay = self._resized_overlay(frame_w)
937
+ oh, ow = overlay.shape[:2]
938
+ x, y = self._compute_position(frame_w, frame_h, ow, oh)
939
+
940
+ src_x = max(0, -x)
941
+ src_y = max(0, -y)
942
+ dst_x = max(0, x)
943
+ dst_y = max(0, y)
944
+ paste_w = min(ow - src_x, frame_w - dst_x)
945
+ paste_h = min(oh - src_y, frame_h - dst_y)
946
+
947
+ if paste_w <= 0 or paste_h <= 0:
948
+ return None
949
+
950
+ region = overlay[src_y : src_y + paste_h, src_x : src_x + paste_w]
951
+ alpha = (region[:, :, 3:4].astype(np.float32) / 255.0) * self.opacity
952
+ rgb = region[:, :, :3].astype(np.float32)
953
+ return alpha, rgb, (dst_y, dst_x, paste_h, paste_w)
954
+
955
+ def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
956
+ params = self._blend_params(width, height)
957
+ if params is None:
958
+ self._stream_noop = True
959
+ return
960
+ self._stream_noop = False
961
+ self._stream_alpha, self._stream_rgb, self._stream_dst = params
962
+
963
+ def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
964
+ if self._stream_noop:
965
+ return frame
966
+ assert self._stream_alpha is not None and self._stream_rgb is not None
967
+ dy, dx, ph, pw = self._stream_dst
968
+ region = frame[dy : dy + ph, dx : dx + pw]
969
+ blended = (
970
+ self._stream_rgb * self._stream_alpha + region.astype(np.float32) * (1.0 - self._stream_alpha)
971
+ ).astype(np.uint8)
972
+ frame[dy : dy + ph, dx : dx + pw] = blended
973
+ return frame
974
+
975
+ def _apply(self, video: Video) -> Video:
976
+ frame_h, frame_w = video.frame_shape[:2]
977
+ params = self._blend_params(frame_w, frame_h)
978
+ if params is None:
979
+ return video
980
+ alpha, rgb, (dy, dx, ph, pw) = params
981
+
982
+ logger.info("Applying image overlay...")
983
+ for frame in tqdm(video.frames, desc="Image overlay"):
984
+ region = frame[dy : dy + ph, dx : dx + pw]
985
+ blended = (rgb * alpha + region.astype(np.float32) * (1.0 - alpha)).astype(np.uint8)
986
+ frame[dy : dy + ph, dx : dx + pw] = blended
987
+ return video
988
+
989
+
774
990
  class Shake(Effect):
775
991
  """Per-frame camera shake: jitters every frame by a random or rhythmic offset.
776
992
 
@@ -175,7 +175,18 @@ class Operation(BaseModel):
175
175
  raise NotImplementedError(f"{type(self).__name__}.apply not implemented")
176
176
 
177
177
  def predict_metadata(self, meta: VideoMetadata) -> VideoMetadata:
178
- """Predict output metadata from input metadata. Default: identity."""
178
+ """Predict output metadata from input metadata. Default: identity.
179
+
180
+ Run during ``VideoEdit.validate()``'s dry-run, before any frames are
181
+ decoded. Beyond predicting shape, this is the fail-fast gate, and it
182
+ has one contract: **reject exactly the plans that would otherwise crash
183
+ or do unrecoverable / expensive work in** :meth:`apply` **/** ``run()``;
184
+ anything ``run()`` can absorb by graceful degradation is NOT rejected.
185
+ ``TranscriptionOverlay`` rejects un-fittable subtitles (they used to
186
+ crash mid-render); ``TextOverlay``/``ImageOverlay`` do not reject
187
+ off-frame geometry (it clips to a valid no-op). Keep the check
188
+ metadata-cheap -- no frame decode.
189
+ """
179
190
  return meta
180
191
 
181
192
  def to_ffmpeg_filter(self, ctx: FilterCtx) -> str | None:
@@ -320,6 +320,11 @@ class TranscriptionOverlay(Effect):
320
320
  the fit search and the renderer, so they never diverge. Margin math
321
321
  comes from ``ImageText.available_region`` (one source of truth with
322
322
  ``measure_text_box``).
323
+
324
+ The highlight multiplier is threaded in so the measurement is
325
+ worst-case for the animated word enlargement: a cue that fits at base
326
+ size but overflows once a word is highlighted is rejected here (and
327
+ auto-shrunk by ``_resolve_layout``) instead of crashing mid-render.
323
328
  """
324
329
  rect = img_text.measure_text_box(
325
330
  text=text,
@@ -329,13 +334,19 @@ class TranscriptionOverlay(Effect):
329
334
  font_size=font_px,
330
335
  anchor=cfg.anchor,
331
336
  margin=cfg.margin,
337
+ highlight_size_multiplier=cfg.style.highlight_size_multiplier,
338
+ highlight_bold_font=self.highlight_bold_font,
332
339
  )
333
340
  if rect.height == 0:
334
341
  return None
335
342
  box_w = int(rect.width)
336
343
  box_h = rect.height
337
344
  left, top, avail_w, avail_h = img_text.available_region(cfg.margin)
338
- fits = box_w <= avail_w and box_h <= avail_h
345
+ # The box must fit the drawable area, AND the worst-case rendered line
346
+ # (incl. the enlarged highlighted word, or an unbreakable long word)
347
+ # must fit the box -- else the centered line spills off-frame at draw
348
+ # time. Failing this shrinks the font in ``_resolve_layout``.
349
+ fits = box_w <= avail_w and box_h <= avail_h and rect.content_width <= box_w
339
350
  x = min(max(int(round(rect.x)), left), left + avail_w - box_w)
340
351
  y = min(max(int(round(rect.y)), top), top + avail_h - box_h)
341
352
  return _CueBox(x=x, y=y, box_w=box_w, height=box_h, fits=fits)
File without changes
File without changes