videopython 0.34.0__tar.gz → 0.35.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {videopython-0.34.0 → videopython-0.35.0}/PKG-INFO +4 -4
- {videopython-0.34.0 → videopython-0.35.0}/README.md +1 -1
- {videopython-0.34.0 → videopython-0.35.0}/pyproject.toml +4 -4
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/image_text.py +274 -97
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/__init__.py +2 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/effects.py +217 -1
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/operation.py +12 -1
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/transcription_overlay.py +12 -1
- {videopython-0.34.0 → videopython-0.35.0}/.gitignore +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/LICENSE +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/__init__.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/__init__.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/_device.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/__init__.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/config.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/dubber.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/expressiveness.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/loudness.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/models.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/pipeline.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/quality.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/remux.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/timing.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/dubbing/voice_sample.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/generation/__init__.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/generation/audio.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/generation/image.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/generation/qwen3.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/generation/translation.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/generation/video.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/transforms.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/understanding/__init__.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/understanding/audio.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/understanding/faces.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/understanding/image.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/understanding/separation.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/understanding/temporal.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/video_analysis/__init__.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/video_analysis/analyzer.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/video_analysis/models.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/video_analysis/sampling.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/ai/video_analysis/stages.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/audio/__init__.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/audio/analysis.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/audio/audio.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/__init__.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/_dimensions.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/_ffmpeg.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/_video_io.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/description.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/exceptions.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/fonts/DejaVuSans.ttf +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/fonts/LICENSE_DEJAVU +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/fonts/__init__.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/transcription.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/video.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/streaming.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/transforms.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/video_edit.py +0 -0
- {videopython-0.34.0 → videopython-0.35.0}/src/videopython/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: videopython
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.35.0
|
|
4
4
|
Summary: Minimal video generation and processing library.
|
|
5
5
|
Project-URL: Homepage, https://videopython.com
|
|
6
6
|
Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
|
|
@@ -12,15 +12,15 @@ Keywords: ai,editing,generation,movie,opencv,python,shorts,video,videopython
|
|
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Operating System :: OS Independent
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
-
Requires-Python: <3.14,>=3.
|
|
18
|
+
Requires-Python: <3.14,>=3.11
|
|
20
19
|
Requires-Dist: numpy>=1.25.2
|
|
21
20
|
Requires-Dist: opencv-python-headless>=4.9.0.80
|
|
22
21
|
Requires-Dist: pillow>=12.1.1
|
|
23
22
|
Requires-Dist: pydantic>=2.8.0
|
|
23
|
+
Requires-Dist: resvg-py>=0.3.2
|
|
24
24
|
Requires-Dist: tqdm>=4.66.3
|
|
25
25
|
Provides-Extra: ai
|
|
26
26
|
Requires-Dist: accelerate>=0.29.2; extra == 'ai'
|
|
@@ -67,7 +67,7 @@ pip install videopython # core video/audio editing
|
|
|
67
67
|
pip install "videopython[ai]" # + local AI features (GPU recommended)
|
|
68
68
|
```
|
|
69
69
|
|
|
70
|
-
Python `>=3.
|
|
70
|
+
Python `>=3.11, <3.14`. AI features run locally — no cloud API keys required, but model weights are downloaded on first use.
|
|
71
71
|
|
|
72
72
|
## Quick Start
|
|
73
73
|
|
|
@@ -18,7 +18,7 @@ pip install videopython # core video/audio editing
|
|
|
18
18
|
pip install "videopython[ai]" # + local AI features (GPU recommended)
|
|
19
19
|
```
|
|
20
20
|
|
|
21
|
-
Python `>=3.
|
|
21
|
+
Python `>=3.11, <3.14`. AI features run locally — no cloud API keys required, but model weights are downloaded on first use.
|
|
22
22
|
|
|
23
23
|
## Quick Start
|
|
24
24
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "videopython"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.35.0"
|
|
4
4
|
description = "Minimal video generation and processing library."
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
|
|
@@ -9,7 +9,7 @@ authors = [
|
|
|
9
9
|
]
|
|
10
10
|
license = { text = "Apache-2.0" }
|
|
11
11
|
readme = "README.md"
|
|
12
|
-
requires-python = ">=3.
|
|
12
|
+
requires-python = ">=3.11, <3.14"
|
|
13
13
|
keywords = [
|
|
14
14
|
"python",
|
|
15
15
|
"videopython",
|
|
@@ -24,7 +24,6 @@ keywords = [
|
|
|
24
24
|
classifiers = [
|
|
25
25
|
"License :: OSI Approved :: Apache Software License",
|
|
26
26
|
"Programming Language :: Python :: 3",
|
|
27
|
-
"Programming Language :: Python :: 3.10",
|
|
28
27
|
"Programming Language :: Python :: 3.11",
|
|
29
28
|
"Programming Language :: Python :: 3.12",
|
|
30
29
|
"Programming Language :: Python :: 3.13",
|
|
@@ -35,6 +34,7 @@ dependencies = [
|
|
|
35
34
|
"numpy>=1.25.2",
|
|
36
35
|
"opencv-python-headless>=4.9.0.80",
|
|
37
36
|
"pillow>=12.1.1",
|
|
37
|
+
"resvg-py>=0.3.2",
|
|
38
38
|
"tqdm>=4.66.3",
|
|
39
39
|
"pydantic>=2.8.0",
|
|
40
40
|
]
|
|
@@ -203,7 +203,7 @@ markers = [
|
|
|
203
203
|
|
|
204
204
|
[tool.ruff]
|
|
205
205
|
line-length = 120
|
|
206
|
-
target-version = "
|
|
206
|
+
target-version = "py311"
|
|
207
207
|
|
|
208
208
|
[tool.ruff.lint]
|
|
209
209
|
select = [
|
|
@@ -96,6 +96,17 @@ class TextBoxRect:
|
|
|
96
96
|
callers short-circuit such boxes (nothing to draw). ``width`` mirrors the
|
|
97
97
|
resolved ``box_width`` and may be a float when an absolute >1 value was
|
|
98
98
|
passed, matching legacy behaviour.
|
|
99
|
+
|
|
100
|
+
``content_width`` is the widest a rendered line actually gets -- worst
|
|
101
|
+
case over the animated highlight when ``highlight_size_multiplier > 1``.
|
|
102
|
+
|
|
103
|
+
There are two independent notions of "fitting" here. ``fits`` is
|
|
104
|
+
box-vs-image *only* -- the legacy contract that gates
|
|
105
|
+
:meth:`write_text_box`'s ``OutOfBoundsError`` -- and does **not** imply
|
|
106
|
+
the content fits the box: legacy callers intentionally overflow the box
|
|
107
|
+
while staying inside the image. A caller that needs the content inside
|
|
108
|
+
the box (subtitles, where the box is frame-clamped) must additionally
|
|
109
|
+
check ``content_width <= width`` itself.
|
|
99
110
|
"""
|
|
100
111
|
|
|
101
112
|
x: float
|
|
@@ -104,6 +115,27 @@ class TextBoxRect:
|
|
|
104
115
|
height: int
|
|
105
116
|
fits: bool
|
|
106
117
|
lines: tuple[str, ...]
|
|
118
|
+
content_width: int = 0
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dataclass(frozen=True)
|
|
122
|
+
class _WordPlacement:
|
|
123
|
+
"""One word's resolved font/size and pixel offset within a highlighted line.
|
|
124
|
+
|
|
125
|
+
``dx``/``dy`` are offsets from the line's left/top. Produced once by
|
|
126
|
+
:meth:`ImageText._layout_highlighted_line` and consumed by both the
|
|
127
|
+
measurer and the renderer, so the box reserved by ``measure_text_box``
|
|
128
|
+
and the pixels drawn by ``write_text_box`` cannot disagree.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
word: str
|
|
132
|
+
font_filename: str | None
|
|
133
|
+
font_size: int
|
|
134
|
+
width: int
|
|
135
|
+
height: int
|
|
136
|
+
dx: int
|
|
137
|
+
dy: int
|
|
138
|
+
is_highlighted: bool
|
|
107
139
|
|
|
108
140
|
|
|
109
141
|
class ImageText:
|
|
@@ -614,16 +646,22 @@ class ImageText:
|
|
|
614
646
|
font_size: int = 11,
|
|
615
647
|
anchor: AnchorPoint = AnchorPoint.TOP_LEFT,
|
|
616
648
|
margin: MarginType = 0,
|
|
649
|
+
highlight_size_multiplier: float = 1.0,
|
|
650
|
+
highlight_bold_font: str | None = None,
|
|
617
651
|
) -> TextBoxRect:
|
|
618
652
|
"""Measure where a wrapped text box would land, without drawing it.
|
|
619
653
|
|
|
620
654
|
Pure: resolves margins/box-width/position, wraps the text, applies the
|
|
621
655
|
anchor, and bounds-checks against the image — the exact math
|
|
622
|
-
:meth:`write_text_box` used to do inline.
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
656
|
+
:meth:`write_text_box` used to do inline.
|
|
657
|
+
|
|
658
|
+
``highlight_size_multiplier > 1`` makes the measurement worst-case for
|
|
659
|
+
an *animated* highlight (any word may be the enlarged one over the
|
|
660
|
+
cue's lifetime): wrapping reserves room so even an enlarged word keeps
|
|
661
|
+
its line within ``box_width``, and ``height`` uses each line's tallest
|
|
662
|
+
possible highlighted variant. With the default ``1.0`` the result is
|
|
663
|
+
byte-identical to the plain base-font measurement, so existing callers
|
|
664
|
+
and ``place`` (alignment) are unaffected.
|
|
627
665
|
|
|
628
666
|
Returns:
|
|
629
667
|
A :class:`TextBoxRect`. ``fits`` is ``False`` when the box would
|
|
@@ -655,15 +693,32 @@ class ImageText:
|
|
|
655
693
|
# Calculate initial position based on margin and anchor before splitting text
|
|
656
694
|
x_pos, y_pos = self._convert_position(xy, margin_top, margin_left, available_width, available_height)
|
|
657
695
|
|
|
658
|
-
#
|
|
696
|
+
# Wrap at the real box width (same as the renderer).
|
|
659
697
|
lines = self._split_lines_by_width(text, font_filename, font_size, int(box_width))
|
|
660
698
|
|
|
661
|
-
#
|
|
662
|
-
|
|
699
|
+
# Per-line extent. With an animated highlight any word may be the
|
|
700
|
+
# enlarged one over the cue's lifetime, so each line contributes the
|
|
701
|
+
# widest/tallest variant it could ever render as.
|
|
702
|
+
# ``_highlighted_line_max_extent`` derives that envelope from the same
|
|
703
|
+
# per-word geometry the renderer uses (single source of truth).
|
|
704
|
+
hl_mult = max(1.0, highlight_size_multiplier)
|
|
705
|
+
content_width = 0
|
|
706
|
+
lines_height = 0
|
|
707
|
+
for line in lines:
|
|
708
|
+
if hl_mult > 1.0:
|
|
709
|
+
line_w, line_h = self._highlighted_line_max_extent(
|
|
710
|
+
line, font_filename, font_size, hl_mult, highlight_bold_font
|
|
711
|
+
)
|
|
712
|
+
else:
|
|
713
|
+
line_w, line_h = self.get_text_dimensions(font_filename, font_size, line)
|
|
714
|
+
content_width = max(content_width, line_w)
|
|
715
|
+
lines_height += line_h
|
|
663
716
|
if lines_height == 0:
|
|
664
717
|
# No renderable lines (e.g. whitespace-only text); position is the
|
|
665
718
|
# unadjusted insertion point and the box trivially "fits".
|
|
666
|
-
return TextBoxRect(
|
|
719
|
+
return TextBoxRect(
|
|
720
|
+
x=x_pos, y=y_pos, width=box_width, height=0, fits=True, lines=tuple(lines), content_width=0
|
|
721
|
+
)
|
|
667
722
|
|
|
668
723
|
# Final position calculation based on anchor point
|
|
669
724
|
if anchor in AnchorPoint.center_anchors():
|
|
@@ -682,7 +737,15 @@ class ImageText:
|
|
|
682
737
|
or x_pos + box_width > self.image_size[1]
|
|
683
738
|
or y_pos + lines_height > self.image_size[0]
|
|
684
739
|
)
|
|
685
|
-
return TextBoxRect(
|
|
740
|
+
return TextBoxRect(
|
|
741
|
+
x=x_pos,
|
|
742
|
+
y=y_pos,
|
|
743
|
+
width=box_width,
|
|
744
|
+
height=lines_height,
|
|
745
|
+
fits=fits,
|
|
746
|
+
lines=tuple(lines),
|
|
747
|
+
content_width=content_width,
|
|
748
|
+
)
|
|
686
749
|
|
|
687
750
|
def write_text_box(
|
|
688
751
|
self,
|
|
@@ -761,7 +824,11 @@ class ImageText:
|
|
|
761
824
|
if highlight_word_index is not None and highlight_color is None:
|
|
762
825
|
highlight_color = text_color
|
|
763
826
|
|
|
764
|
-
# Measure (single source of truth for box geometry), then render.
|
|
827
|
+
# Measure (single source of truth for box geometry), then render. When
|
|
828
|
+
# a word will be highlighted, measure worst-case so the box reserves
|
|
829
|
+
# room for the enlarged word -- otherwise stay byte-identical to the
|
|
830
|
+
# plain base-font measurement.
|
|
831
|
+
measure_mult = highlight_size_multiplier if highlight_word_index is not None else 1.0
|
|
765
832
|
rect = self.measure_text_box(
|
|
766
833
|
text=text,
|
|
767
834
|
font_filename=font_filename,
|
|
@@ -770,6 +837,8 @@ class ImageText:
|
|
|
770
837
|
font_size=font_size,
|
|
771
838
|
anchor=anchor,
|
|
772
839
|
margin=margin,
|
|
840
|
+
highlight_size_multiplier=measure_mult,
|
|
841
|
+
highlight_bold_font=highlight_bold_font,
|
|
773
842
|
)
|
|
774
843
|
lines = list(rect.lines)
|
|
775
844
|
if rect.height == 0:
|
|
@@ -783,56 +852,53 @@ class ImageText:
|
|
|
783
852
|
f"Text box with size ({box_width}x{lines_height}) at position ({x_pos}, {y_pos}) is out of bounds!"
|
|
784
853
|
)
|
|
785
854
|
|
|
786
|
-
# Write lines
|
|
855
|
+
# Write lines. The line that holds the highlighted word is positioned
|
|
856
|
+
# and advanced by its *true* (enlarged) extent via the shared
|
|
857
|
+
# ``_highlighted_line_size`` -- the same numbers ``measure_text_box``
|
|
858
|
+
# reserved -- so an enlarged word can never push the line out of the
|
|
859
|
+
# box (hence out of the frame) regardless of alignment.
|
|
787
860
|
current_text_height = y_pos
|
|
788
861
|
word_index_offset = 0 # Track global word index across lines
|
|
789
862
|
for line in lines:
|
|
790
|
-
|
|
863
|
+
line_words = line.split()
|
|
864
|
+
hl_local_index = -1
|
|
865
|
+
if highlight_word_index is not None:
|
|
866
|
+
line_end_word_index = word_index_offset + len(line_words) - 1
|
|
867
|
+
if word_index_offset <= highlight_word_index <= line_end_word_index:
|
|
868
|
+
hl_local_index = highlight_word_index - word_index_offset
|
|
791
869
|
|
|
792
|
-
|
|
870
|
+
if hl_local_index >= 0:
|
|
871
|
+
line_w, line_h = self._highlighted_line_size(
|
|
872
|
+
line, font_filename, font_size, hl_local_index, highlight_size_multiplier, highlight_bold_font
|
|
873
|
+
)
|
|
874
|
+
else:
|
|
875
|
+
line_w, line_h = self.get_text_dimensions(font_filename, font_size, line)
|
|
876
|
+
|
|
877
|
+
# Calculate horizontal position based on alignment (true line width)
|
|
793
878
|
if place == TextAlign.LEFT:
|
|
794
879
|
x_left = x_pos
|
|
795
880
|
elif place == TextAlign.RIGHT:
|
|
796
|
-
x_left = x_pos + box_width -
|
|
881
|
+
x_left = x_pos + box_width - line_w
|
|
797
882
|
elif place == TextAlign.CENTER:
|
|
798
|
-
x_left = int(x_pos + ((box_width -
|
|
883
|
+
x_left = int(x_pos + ((box_width - line_w) / 2))
|
|
799
884
|
else:
|
|
800
885
|
valid_places = [e.value for e in TextAlign]
|
|
801
886
|
raise ValueError(f"Place '{place}' is not supported. Must be one of: {', '.join(valid_places)}")
|
|
802
887
|
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
highlight_color=highlight_color or (255, 255, 255),
|
|
818
|
-
highlight_size_multiplier=highlight_size_multiplier,
|
|
819
|
-
highlight_word_local_index=highlight_word_index - line_start_word_index,
|
|
820
|
-
highlight_bold_font=highlight_bold_font,
|
|
821
|
-
x_left=int(x_left),
|
|
822
|
-
y_top=int(current_text_height),
|
|
823
|
-
)
|
|
824
|
-
else:
|
|
825
|
-
# Write normal line without highlighting
|
|
826
|
-
self.write_text(
|
|
827
|
-
text=line,
|
|
828
|
-
font_filename=font_filename,
|
|
829
|
-
xy=(x_left, current_text_height),
|
|
830
|
-
font_size=font_size,
|
|
831
|
-
font_border_size=font_border_size,
|
|
832
|
-
color=text_color,
|
|
833
|
-
)
|
|
834
|
-
|
|
835
|
-
word_index_offset += len(line_words)
|
|
888
|
+
if hl_local_index >= 0:
|
|
889
|
+
self._write_line_with_highlight(
|
|
890
|
+
line=line,
|
|
891
|
+
font_filename=font_filename,
|
|
892
|
+
font_size=font_size,
|
|
893
|
+
font_border_size=font_border_size,
|
|
894
|
+
text_color=text_color,
|
|
895
|
+
highlight_color=highlight_color or (255, 255, 255),
|
|
896
|
+
highlight_size_multiplier=highlight_size_multiplier,
|
|
897
|
+
highlight_word_local_index=hl_local_index,
|
|
898
|
+
highlight_bold_font=highlight_bold_font,
|
|
899
|
+
x_left=int(x_left),
|
|
900
|
+
y_top=int(current_text_height),
|
|
901
|
+
)
|
|
836
902
|
else:
|
|
837
903
|
# Write normal line without highlighting
|
|
838
904
|
self.write_text(
|
|
@@ -844,8 +910,9 @@ class ImageText:
|
|
|
844
910
|
color=text_color,
|
|
845
911
|
)
|
|
846
912
|
|
|
847
|
-
|
|
848
|
-
|
|
913
|
+
word_index_offset += len(line_words)
|
|
914
|
+
# Increment vertical position for next line (true line height)
|
|
915
|
+
current_text_height += line_h
|
|
849
916
|
|
|
850
917
|
# Add background color for the text if specified
|
|
851
918
|
if background_color is not None:
|
|
@@ -921,6 +988,148 @@ class ImageText:
|
|
|
921
988
|
|
|
922
989
|
return (int(x_pos + box_width), int(current_text_height))
|
|
923
990
|
|
|
991
|
+
def _highlight_font(
|
|
992
|
+
self,
|
|
993
|
+
font_filename: str | None,
|
|
994
|
+
font_size: int,
|
|
995
|
+
highlight_size_multiplier: float,
|
|
996
|
+
highlight_bold_font: str | None,
|
|
997
|
+
) -> tuple[str | None, int, int, int]:
|
|
998
|
+
"""Resolve the enlarged-word basics once.
|
|
999
|
+
|
|
1000
|
+
Returns ``(font_file, font_size, baseline_offset, space_width)`` -- the
|
|
1001
|
+
single definition of the highlight constants, shared by the per-word
|
|
1002
|
+
layout (render / exact-size path) and the worst-case extent (measure
|
|
1003
|
+
path) so those paths cannot drift apart on the fundamentals.
|
|
1004
|
+
"""
|
|
1005
|
+
hl_font_size = int(font_size * highlight_size_multiplier)
|
|
1006
|
+
hl_font_file = highlight_bold_font if highlight_bold_font is not None else font_filename
|
|
1007
|
+
baseline_offset = self._get_font_baseline_offset(font_filename, font_size, hl_font_file, hl_font_size)
|
|
1008
|
+
space_width = self.get_text_dimensions(font_filename, font_size, " ")[0]
|
|
1009
|
+
return hl_font_file, hl_font_size, baseline_offset, space_width
|
|
1010
|
+
|
|
1011
|
+
def _layout_highlighted_line(
|
|
1012
|
+
self,
|
|
1013
|
+
line: str,
|
|
1014
|
+
font_filename: str | None,
|
|
1015
|
+
font_size: int,
|
|
1016
|
+
highlight_word_local_index: int,
|
|
1017
|
+
highlight_size_multiplier: float,
|
|
1018
|
+
highlight_bold_font: str | None,
|
|
1019
|
+
) -> list[_WordPlacement]:
|
|
1020
|
+
"""Per-word placement for ``line`` with one word enlarged.
|
|
1021
|
+
|
|
1022
|
+
The single source of truth for the highlighted-line advance (enlarged
|
|
1023
|
+
font size, bold-font swap, base-size inter-word space, baseline
|
|
1024
|
+
offset). Both :meth:`_highlighted_line_size` (measuring the line that
|
|
1025
|
+
actually owns the highlight) and :meth:`_write_line_with_highlight`
|
|
1026
|
+
(rendering it) consume this list, so the reserved box and the drawn
|
|
1027
|
+
pixels agree by construction.
|
|
1028
|
+
|
|
1029
|
+
Reached only for the line that owns the highlighted word, so
|
|
1030
|
+
``highlight_word_local_index`` is in range; degenerate inputs are
|
|
1031
|
+
handled by the callers' own guards.
|
|
1032
|
+
"""
|
|
1033
|
+
words = line.split()
|
|
1034
|
+
hl_font_file, hl_font_size, baseline_offset, space_width = self._highlight_font(
|
|
1035
|
+
font_filename, font_size, highlight_size_multiplier, highlight_bold_font
|
|
1036
|
+
)
|
|
1037
|
+
placements: list[_WordPlacement] = []
|
|
1038
|
+
dx = 0
|
|
1039
|
+
for i, word in enumerate(words):
|
|
1040
|
+
is_hl = i == highlight_word_local_index
|
|
1041
|
+
wf = hl_font_file if is_hl else font_filename
|
|
1042
|
+
ws = hl_font_size if is_hl else font_size
|
|
1043
|
+
w, h = self.get_text_dimensions(wf, ws, word)
|
|
1044
|
+
placements.append(
|
|
1045
|
+
_WordPlacement(
|
|
1046
|
+
word=word,
|
|
1047
|
+
font_filename=wf,
|
|
1048
|
+
font_size=ws,
|
|
1049
|
+
width=w,
|
|
1050
|
+
height=h,
|
|
1051
|
+
dx=dx,
|
|
1052
|
+
dy=baseline_offset if is_hl else 0,
|
|
1053
|
+
is_highlighted=is_hl,
|
|
1054
|
+
)
|
|
1055
|
+
)
|
|
1056
|
+
dx += w
|
|
1057
|
+
if i < len(words) - 1:
|
|
1058
|
+
dx += space_width
|
|
1059
|
+
return placements
|
|
1060
|
+
|
|
1061
|
+
def _highlighted_line_size(
|
|
1062
|
+
self,
|
|
1063
|
+
line: str,
|
|
1064
|
+
font_filename: str | None,
|
|
1065
|
+
font_size: int,
|
|
1066
|
+
highlight_word_local_index: int,
|
|
1067
|
+
highlight_size_multiplier: float,
|
|
1068
|
+
highlight_bold_font: str | None,
|
|
1069
|
+
) -> tuple[int, int]:
|
|
1070
|
+
"""Rendered (width, height) of ``line`` with one *specific* word enlarged.
|
|
1071
|
+
|
|
1072
|
+
A reduction of the shared :meth:`_layout_highlighted_line`, so it is
|
|
1073
|
+
exact w.r.t. the renderer by construction. Used to position/advance
|
|
1074
|
+
the line that owns the highlighted word. ``highlight_word_local_index``
|
|
1075
|
+
out of range falls back to the plain line size -- exactly what the
|
|
1076
|
+
renderer's own guard ends up drawing.
|
|
1077
|
+
"""
|
|
1078
|
+
words = line.split()
|
|
1079
|
+
if not words:
|
|
1080
|
+
return (0, 0)
|
|
1081
|
+
if not (0 <= highlight_word_local_index < len(words)):
|
|
1082
|
+
return self.get_text_dimensions(font_filename, font_size, line)
|
|
1083
|
+
placements = self._layout_highlighted_line(
|
|
1084
|
+
line, font_filename, font_size, highlight_word_local_index, highlight_size_multiplier, highlight_bold_font
|
|
1085
|
+
)
|
|
1086
|
+
width = max(p.dx + p.width for p in placements)
|
|
1087
|
+
# ``min(0, ...)`` / ``max(0, ...)`` stay defensive for a *shrinking*
|
|
1088
|
+
# highlight (multiplier < 1 -> negative baseline offset, the word
|
|
1089
|
+
# rides above the line). The subtitle measure path clamps the
|
|
1090
|
+
# multiplier to >= 1 so there ``top`` is always 0, but
|
|
1091
|
+
# ``write_text_box`` forwards the raw multiplier, so keep the floor.
|
|
1092
|
+
top = min([0, *(p.dy for p in placements)])
|
|
1093
|
+
bottom = max([0, *(p.dy + p.height for p in placements)])
|
|
1094
|
+
return (width, bottom - top)
|
|
1095
|
+
|
|
1096
|
+
def _highlighted_line_max_extent(
|
|
1097
|
+
self,
|
|
1098
|
+
line: str,
|
|
1099
|
+
font_filename: str | None,
|
|
1100
|
+
font_size: int,
|
|
1101
|
+
highlight_size_multiplier: float,
|
|
1102
|
+
highlight_bold_font: str | None,
|
|
1103
|
+
) -> tuple[int, int]:
|
|
1104
|
+
"""Worst-case (width, height) over *any* word being the enlarged one.
|
|
1105
|
+
|
|
1106
|
+
Equal to ``max`` of :meth:`_highlighted_line_size` across every word
|
|
1107
|
+
position -- the envelope an animated highlight needs -- but in a
|
|
1108
|
+
single O(words) pass instead of O(words^2): only *which* word is
|
|
1109
|
+
enlarged varies, so the base metrics are shared and the extremes are
|
|
1110
|
+
closed-form. Uses the same :meth:`_highlight_font` constants as the
|
|
1111
|
+
layout, so this envelope can never under-reserve what the renderer
|
|
1112
|
+
draws (it over-reserves only in the safe direction).
|
|
1113
|
+
"""
|
|
1114
|
+
words = line.split()
|
|
1115
|
+
if not words:
|
|
1116
|
+
return self.get_text_dimensions(font_filename, font_size, line)
|
|
1117
|
+
hl_font_file, hl_font_size, baseline_offset, space_width = self._highlight_font(
|
|
1118
|
+
font_filename, font_size, highlight_size_multiplier, highlight_bold_font
|
|
1119
|
+
)
|
|
1120
|
+
base = [self.get_text_dimensions(font_filename, font_size, w) for w in words]
|
|
1121
|
+
enlarged = [self.get_text_dimensions(hl_font_file, hl_font_size, w) for w in words]
|
|
1122
|
+
# width_k = (sum of base widths + spaces) - base_w[k] + enlarged_w[k];
|
|
1123
|
+
# the worst k just maximizes the (enlarged - base) swap.
|
|
1124
|
+
base_total = sum(w for w, _ in base) + space_width * (len(words) - 1)
|
|
1125
|
+
width = base_total + max(ew - bw for (bw, _), (ew, _) in zip(base, enlarged))
|
|
1126
|
+
# Non-highlighted words sit at dy=0, the enlarged one at
|
|
1127
|
+
# dy=baseline_offset; the worst line is the tallest base word vs. the
|
|
1128
|
+
# tallest enlarged word lifted by the baseline offset.
|
|
1129
|
+
top = min(0, baseline_offset)
|
|
1130
|
+
bottom = max([0, *(h for _, h in base), baseline_offset + max(h for _, h in enlarged)])
|
|
1131
|
+
return (width, bottom - top)
|
|
1132
|
+
|
|
924
1133
|
def _write_line_with_highlight(
|
|
925
1134
|
self,
|
|
926
1135
|
line: str,
|
|
@@ -936,7 +1145,11 @@ class ImageText:
|
|
|
936
1145
|
y_top: int,
|
|
937
1146
|
) -> None:
|
|
938
1147
|
"""
|
|
939
|
-
Write a line of text with one word highlighted
|
|
1148
|
+
Write a line of text with one word highlighted, word-by-word with baseline alignment.
|
|
1149
|
+
|
|
1150
|
+
Draws the placements from the shared :meth:`_layout_highlighted_line`,
|
|
1151
|
+
so every pixel lands exactly where :meth:`measure_text_box` reserved
|
|
1152
|
+
room for it (measurement and rendering use the same geometry).
|
|
940
1153
|
|
|
941
1154
|
Args:
|
|
942
1155
|
line: The text line to render
|
|
@@ -951,58 +1164,22 @@ class ImageText:
|
|
|
951
1164
|
x_left: Left x position for the line
|
|
952
1165
|
y_top: Top y position for the line
|
|
953
1166
|
"""
|
|
954
|
-
# Split line into words
|
|
955
1167
|
words = line.split()
|
|
956
1168
|
if highlight_word_local_index >= len(words):
|
|
957
|
-
return # Safety check
|
|
958
|
-
|
|
959
|
-
# Calculate highlighted font size and determine font files
|
|
960
|
-
highlight_font_size = int(font_size * highlight_size_multiplier)
|
|
961
|
-
highlight_font_file = highlight_bold_font if highlight_bold_font is not None else font_filename
|
|
1169
|
+
return # Safety check: nothing to draw (matches the measure fallback)
|
|
962
1170
|
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
)
|
|
967
|
-
|
|
968
|
-
# Render words one by one with proper spacing
|
|
969
|
-
current_x = x_left
|
|
970
|
-
|
|
971
|
-
for i, word in enumerate(words):
|
|
972
|
-
# Determine if this is the highlighted word
|
|
973
|
-
is_highlighted = i == highlight_word_local_index
|
|
974
|
-
|
|
975
|
-
# Choose font file, size, and color based on highlighting
|
|
976
|
-
word_font_file = highlight_font_file if is_highlighted else font_filename
|
|
977
|
-
word_font_size = highlight_font_size if is_highlighted else font_size
|
|
978
|
-
word_color = highlight_color if is_highlighted else text_color
|
|
979
|
-
|
|
980
|
-
# Calculate y position with baseline alignment
|
|
981
|
-
word_y = y_top
|
|
982
|
-
if is_highlighted:
|
|
983
|
-
word_y += baseline_offset
|
|
984
|
-
|
|
985
|
-
# Render the word
|
|
1171
|
+
for p in self._layout_highlighted_line(
|
|
1172
|
+
line, font_filename, font_size, highlight_word_local_index, highlight_size_multiplier, highlight_bold_font
|
|
1173
|
+
):
|
|
986
1174
|
self.write_text(
|
|
987
|
-
text=word,
|
|
988
|
-
font_filename=
|
|
989
|
-
xy=(
|
|
990
|
-
font_size=
|
|
1175
|
+
text=p.word,
|
|
1176
|
+
font_filename=p.font_filename,
|
|
1177
|
+
xy=(x_left + p.dx, y_top + p.dy),
|
|
1178
|
+
font_size=p.font_size,
|
|
991
1179
|
font_border_size=font_border_size,
|
|
992
|
-
color=
|
|
1180
|
+
color=highlight_color if p.is_highlighted else text_color,
|
|
993
1181
|
)
|
|
994
1182
|
|
|
995
|
-
# Calculate the width of this word for spacing
|
|
996
|
-
word_width = self.get_text_dimensions(word_font_file, word_font_size, word)[0]
|
|
997
|
-
|
|
998
|
-
# Update current_x for next word (add word width plus space)
|
|
999
|
-
current_x += word_width
|
|
1000
|
-
|
|
1001
|
-
# Add space between words (except after the last word)
|
|
1002
|
-
if i < len(words) - 1:
|
|
1003
|
-
space_width = self.get_text_dimensions(font_filename, font_size, " ")[0]
|
|
1004
|
-
current_x += space_width
|
|
1005
|
-
|
|
1006
1183
|
def _find_smallest_bounding_rect(self, mask: np.ndarray) -> tuple[int, int, int, int]:
|
|
1007
1184
|
"""
|
|
1008
1185
|
Find the smallest bounding rectangle containing non-zero values in the mask.
|
|
@@ -8,6 +8,7 @@ from .effects import (
|
|
|
8
8
|
Flash,
|
|
9
9
|
FullImageOverlay,
|
|
10
10
|
Glitch,
|
|
11
|
+
ImageOverlay,
|
|
11
12
|
Kaleidoscope,
|
|
12
13
|
KenBurns,
|
|
13
14
|
MirrorFlip,
|
|
@@ -56,6 +57,7 @@ __all__ = [
|
|
|
56
57
|
"SilenceRemoval",
|
|
57
58
|
# Effects
|
|
58
59
|
"FullImageOverlay",
|
|
60
|
+
"ImageOverlay",
|
|
59
61
|
"Blur",
|
|
60
62
|
"Zoom",
|
|
61
63
|
"ColorGrading",
|
|
@@ -14,6 +14,7 @@ audio after ``_apply`` returns.
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
16
|
import logging
|
|
17
|
+
from io import BytesIO
|
|
17
18
|
from pathlib import Path
|
|
18
19
|
from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
|
19
20
|
|
|
@@ -29,13 +30,14 @@ from videopython.editing.operation import Effect
|
|
|
29
30
|
|
|
30
31
|
if TYPE_CHECKING:
|
|
31
32
|
from videopython.audio import Audio
|
|
32
|
-
from videopython.base.video import Video
|
|
33
|
+
from videopython.base.video import Video, VideoMetadata
|
|
33
34
|
|
|
34
35
|
logger = logging.getLogger(__name__)
|
|
35
36
|
|
|
36
37
|
__all__ = [
|
|
37
38
|
"Effect",
|
|
38
39
|
"FullImageOverlay",
|
|
40
|
+
"ImageOverlay",
|
|
39
41
|
"Blur",
|
|
40
42
|
"Zoom",
|
|
41
43
|
"ColorGrading",
|
|
@@ -771,6 +773,220 @@ class TextOverlay(Effect):
|
|
|
771
773
|
return video
|
|
772
774
|
|
|
773
775
|
|
|
776
|
+
class ImageOverlay(Effect):
|
|
777
|
+
"""Composites a scaled image at an anchored position on every frame in the window.
|
|
778
|
+
|
|
779
|
+
A resolution-independent watermark / logo / brand mark. Unlike
|
|
780
|
+
:class:`FullImageOverlay` (full-frame only, raises on size mismatch), the
|
|
781
|
+
image is scaled to a fraction of the frame *width* and placed at an
|
|
782
|
+
anchored normalized position, so one config works across 1080p / 4k /
|
|
783
|
+
vertical / square. Loaded just-in-time from ``source`` so the op stays
|
|
784
|
+
JSON-serialisable. Off-frame or oversized placement clips to a partial
|
|
785
|
+
paste or a no-op -- the same contract as :class:`TextOverlay`, never an
|
|
786
|
+
error; only an unreadable ``source`` is rejected (in ``predict_metadata``).
|
|
787
|
+
|
|
788
|
+
``source`` may be a raster image (PNG/JPEG/WebP) or an SVG (detected by the
|
|
789
|
+
``.svg`` extension). An SVG is rasterised by ``resvg`` *at the exact target
|
|
790
|
+
pixel width* -- crisp at any frame size, not a blurry upscale of a
|
|
791
|
+
fixed-size bitmap -- with a transparent background and no remote-resource
|
|
792
|
+
fetching (the local path only; no SSRF). SVGs containing text depend on the
|
|
793
|
+
fonts available at render time.
|
|
794
|
+
"""
|
|
795
|
+
|
|
796
|
+
op: Literal["image_overlay"] = "image_overlay"
|
|
797
|
+
streamable: ClassVar[bool] = True
|
|
798
|
+
|
|
799
|
+
source: Path = Field(
|
|
800
|
+
description=(
|
|
801
|
+
"Path to an image file: a raster RGB/RGBA image (PNG/JPEG/WebP) or "
|
|
802
|
+
"an SVG (`.svg`, rasterised at the target resolution). Loaded at "
|
|
803
|
+
"apply time; kept JSON-serialisable as a path."
|
|
804
|
+
),
|
|
805
|
+
)
|
|
806
|
+
scale: float = Field(
|
|
807
|
+
0.15,
|
|
808
|
+
gt=0,
|
|
809
|
+
le=1,
|
|
810
|
+
description=(
|
|
811
|
+
"Overlay width as a fraction of frame width (0-1). Height follows "
|
|
812
|
+
"the image's aspect ratio. Resolution-independent."
|
|
813
|
+
),
|
|
814
|
+
)
|
|
815
|
+
opacity: float = Field(
|
|
816
|
+
1.0,
|
|
817
|
+
ge=0,
|
|
818
|
+
le=1,
|
|
819
|
+
description="Multiplies the image's own alpha. 0 = fully transparent, 1 = use the image alpha unchanged.",
|
|
820
|
+
)
|
|
821
|
+
position: tuple[float, float] = Field(
|
|
822
|
+
(0.95, 0.95),
|
|
823
|
+
description=(
|
|
824
|
+
"Where to place the overlay as normalized (x, y) coordinates. "
|
|
825
|
+
"(0, 0) = top-left corner, (1, 1) = bottom-right corner."
|
|
826
|
+
),
|
|
827
|
+
)
|
|
828
|
+
anchor: Literal["center", "top_left", "top_center", "bottom_center", "bottom_left", "bottom_right"] = Field(
|
|
829
|
+
"bottom_right",
|
|
830
|
+
description="Which point of the overlay box sits at the position coordinate.",
|
|
831
|
+
)
|
|
832
|
+
|
|
833
|
+
_overlay_rgba: np.ndarray | None = PrivateAttr(default=None)
|
|
834
|
+
_svg_cache: dict[int, np.ndarray] = PrivateAttr(default_factory=dict)
|
|
835
|
+
_stream_noop: bool = PrivateAttr(default=False)
|
|
836
|
+
_stream_alpha: np.ndarray | None = PrivateAttr(default=None)
|
|
837
|
+
_stream_rgb: np.ndarray | None = PrivateAttr(default=None)
|
|
838
|
+
_stream_dst: tuple[int, int, int, int] = PrivateAttr(default=(0, 0, 0, 0))
|
|
839
|
+
|
|
840
|
+
@model_validator(mode="after")
|
|
841
|
+
def _validate_position(self) -> ImageOverlay:
|
|
842
|
+
if not (0.0 <= self.position[0] <= 1.0 and 0.0 <= self.position[1] <= 1.0):
|
|
843
|
+
raise ValueError("position values must be in range [0, 1]")
|
|
844
|
+
return self
|
|
845
|
+
|
|
846
|
+
def _is_svg(self) -> bool:
|
|
847
|
+
return self.source.suffix.lower() == ".svg"
|
|
848
|
+
|
|
849
|
+
def predict_metadata(self, meta: VideoMetadata, **_context: Any) -> VideoMetadata:
|
|
850
|
+
"""Reject only a missing/unreadable ``source`` (see :meth:`Operation.predict_metadata`).
|
|
851
|
+
|
|
852
|
+
An unreadable source is the one failure ``run()`` cannot survive -- it
|
|
853
|
+
would raise mid-stream after expensive frame decode -- so it is caught
|
|
854
|
+
at ``validate()`` time, symmetric with ``TranscriptionOverlay``.
|
|
855
|
+
Geometry (oversized / off-frame) is deliberately *not* checked here: it
|
|
856
|
+
clips to a valid no-op like :class:`TextOverlay`, so rejecting it would
|
|
857
|
+
break that contract and the parity with the op this is modeled on. Both
|
|
858
|
+
checks are cheap (a header ``verify()`` / a 1px SVG parse, no full
|
|
859
|
+
decode), so ``validate()`` stays frame-free.
|
|
860
|
+
"""
|
|
861
|
+
try:
|
|
862
|
+
if self._is_svg():
|
|
863
|
+
import resvg_py
|
|
864
|
+
|
|
865
|
+
resvg_py.svg_to_bytes(svg_path=str(self.source), width=1)
|
|
866
|
+
else:
|
|
867
|
+
with Image.open(self.source) as im:
|
|
868
|
+
im.verify()
|
|
869
|
+
except (OSError, ValueError) as exc:
|
|
870
|
+
raise ValueError(f"image_overlay source {str(self.source)!r} is not a readable image: {exc}") from exc
|
|
871
|
+
return meta
|
|
872
|
+
|
|
873
|
+
def _rasterize_svg(self, target_w: int) -> np.ndarray:
|
|
874
|
+
cached = self._svg_cache.get(target_w)
|
|
875
|
+
if cached is not None:
|
|
876
|
+
return cached
|
|
877
|
+
# Lazy import: only when an SVG source is actually used. resvg renders
|
|
878
|
+
# at the exact target width (height proportional to the viewBox) with a
|
|
879
|
+
# transparent background and never fetches remote resources.
|
|
880
|
+
import resvg_py
|
|
881
|
+
|
|
882
|
+
png = resvg_py.svg_to_bytes(svg_path=str(self.source), width=target_w)
|
|
883
|
+
arr = np.array(Image.open(BytesIO(bytes(png))).convert("RGBA"), dtype=np.uint8)
|
|
884
|
+
self._svg_cache[target_w] = arr
|
|
885
|
+
return arr
|
|
886
|
+
|
|
887
|
+
def _load_overlay(self) -> np.ndarray:
|
|
888
|
+
if self._overlay_rgba is not None:
|
|
889
|
+
return self._overlay_rgba
|
|
890
|
+
img = Image.open(self.source).convert("RGBA")
|
|
891
|
+
self._overlay_rgba = np.array(img, dtype=np.uint8)
|
|
892
|
+
return self._overlay_rgba
|
|
893
|
+
|
|
894
|
+
def _compute_position(self, frame_width: int, frame_height: int, img_w: int, img_h: int) -> tuple[int, int]:
|
|
895
|
+
# Copied verbatim from TextOverlay: ImageOverlay's anchor Literal is
|
|
896
|
+
# deliberately the same set, so the geometry is shared by construction.
|
|
897
|
+
px = int(self.position[0] * frame_width)
|
|
898
|
+
py = int(self.position[1] * frame_height)
|
|
899
|
+
|
|
900
|
+
if self.anchor == "center":
|
|
901
|
+
return px - img_w // 2, py - img_h // 2
|
|
902
|
+
if self.anchor == "top_left":
|
|
903
|
+
return px, py
|
|
904
|
+
if self.anchor == "top_center":
|
|
905
|
+
return px - img_w // 2, py
|
|
906
|
+
if self.anchor == "bottom_center":
|
|
907
|
+
return px - img_w // 2, py - img_h
|
|
908
|
+
if self.anchor == "bottom_left":
|
|
909
|
+
return px, py - img_h
|
|
910
|
+
# bottom_right
|
|
911
|
+
return px - img_w, py - img_h
|
|
912
|
+
|
|
913
|
+
def _resized_overlay(self, frame_w: int) -> np.ndarray:
|
|
914
|
+
target_w = max(1, round(self.scale * frame_w))
|
|
915
|
+
if self._is_svg():
|
|
916
|
+
# Rasterise the vector at the target size (crisp) rather than
|
|
917
|
+
# upscaling a fixed bitmap. resvg derives height from the viewBox.
|
|
918
|
+
return self._rasterize_svg(target_w)
|
|
919
|
+
overlay = self._load_overlay()
|
|
920
|
+
src_h, src_w = overlay.shape[:2]
|
|
921
|
+
target_h = max(1, round(target_w * src_h / src_w))
|
|
922
|
+
if (target_w, target_h) == (src_w, src_h):
|
|
923
|
+
return overlay
|
|
924
|
+
resized = Image.fromarray(overlay).resize((target_w, target_h), Image.LANCZOS)
|
|
925
|
+
return np.array(resized, dtype=np.uint8)
|
|
926
|
+
|
|
927
|
+
def _blend_params(
|
|
928
|
+
self, frame_w: int, frame_h: int
|
|
929
|
+
) -> tuple[np.ndarray, np.ndarray, tuple[int, int, int, int]] | None:
|
|
930
|
+
"""Placement + blend inputs shared by the eager and streaming paths.
|
|
931
|
+
|
|
932
|
+
Single source of truth so the two paths cannot drift -- the
|
|
933
|
+
eager/stream parity-hole class of bug fixed in 0.34.1. Returns ``None``
|
|
934
|
+
when the overlay lands fully off-frame (the effect is a no-op).
|
|
935
|
+
"""
|
|
936
|
+
overlay = self._resized_overlay(frame_w)
|
|
937
|
+
oh, ow = overlay.shape[:2]
|
|
938
|
+
x, y = self._compute_position(frame_w, frame_h, ow, oh)
|
|
939
|
+
|
|
940
|
+
src_x = max(0, -x)
|
|
941
|
+
src_y = max(0, -y)
|
|
942
|
+
dst_x = max(0, x)
|
|
943
|
+
dst_y = max(0, y)
|
|
944
|
+
paste_w = min(ow - src_x, frame_w - dst_x)
|
|
945
|
+
paste_h = min(oh - src_y, frame_h - dst_y)
|
|
946
|
+
|
|
947
|
+
if paste_w <= 0 or paste_h <= 0:
|
|
948
|
+
return None
|
|
949
|
+
|
|
950
|
+
region = overlay[src_y : src_y + paste_h, src_x : src_x + paste_w]
|
|
951
|
+
alpha = (region[:, :, 3:4].astype(np.float32) / 255.0) * self.opacity
|
|
952
|
+
rgb = region[:, :, :3].astype(np.float32)
|
|
953
|
+
return alpha, rgb, (dst_y, dst_x, paste_h, paste_w)
|
|
954
|
+
|
|
955
|
+
def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
|
|
956
|
+
params = self._blend_params(width, height)
|
|
957
|
+
if params is None:
|
|
958
|
+
self._stream_noop = True
|
|
959
|
+
return
|
|
960
|
+
self._stream_noop = False
|
|
961
|
+
self._stream_alpha, self._stream_rgb, self._stream_dst = params
|
|
962
|
+
|
|
963
|
+
def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
|
|
964
|
+
if self._stream_noop:
|
|
965
|
+
return frame
|
|
966
|
+
assert self._stream_alpha is not None and self._stream_rgb is not None
|
|
967
|
+
dy, dx, ph, pw = self._stream_dst
|
|
968
|
+
region = frame[dy : dy + ph, dx : dx + pw]
|
|
969
|
+
blended = (
|
|
970
|
+
self._stream_rgb * self._stream_alpha + region.astype(np.float32) * (1.0 - self._stream_alpha)
|
|
971
|
+
).astype(np.uint8)
|
|
972
|
+
frame[dy : dy + ph, dx : dx + pw] = blended
|
|
973
|
+
return frame
|
|
974
|
+
|
|
975
|
+
def _apply(self, video: Video) -> Video:
|
|
976
|
+
frame_h, frame_w = video.frame_shape[:2]
|
|
977
|
+
params = self._blend_params(frame_w, frame_h)
|
|
978
|
+
if params is None:
|
|
979
|
+
return video
|
|
980
|
+
alpha, rgb, (dy, dx, ph, pw) = params
|
|
981
|
+
|
|
982
|
+
logger.info("Applying image overlay...")
|
|
983
|
+
for frame in tqdm(video.frames, desc="Image overlay"):
|
|
984
|
+
region = frame[dy : dy + ph, dx : dx + pw]
|
|
985
|
+
blended = (rgb * alpha + region.astype(np.float32) * (1.0 - alpha)).astype(np.uint8)
|
|
986
|
+
frame[dy : dy + ph, dx : dx + pw] = blended
|
|
987
|
+
return video
|
|
988
|
+
|
|
989
|
+
|
|
774
990
|
class Shake(Effect):
|
|
775
991
|
"""Per-frame camera shake: jitters every frame by a random or rhythmic offset.
|
|
776
992
|
|
|
@@ -175,7 +175,18 @@ class Operation(BaseModel):
|
|
|
175
175
|
raise NotImplementedError(f"{type(self).__name__}.apply not implemented")
|
|
176
176
|
|
|
177
177
|
def predict_metadata(self, meta: VideoMetadata) -> VideoMetadata:
|
|
178
|
-
"""Predict output metadata from input metadata. Default: identity.
|
|
178
|
+
"""Predict output metadata from input metadata. Default: identity.
|
|
179
|
+
|
|
180
|
+
Run during ``VideoEdit.validate()``'s dry-run, before any frames are
|
|
181
|
+
decoded. Beyond predicting shape, this is the fail-fast gate, and it
|
|
182
|
+
has one contract: **reject exactly the plans that would otherwise crash
|
|
183
|
+
or do unrecoverable / expensive work in** :meth:`apply` **/** ``run()``;
|
|
184
|
+
anything ``run()`` can absorb by graceful degradation is NOT rejected.
|
|
185
|
+
``TranscriptionOverlay`` rejects un-fittable subtitles (they used to
|
|
186
|
+
crash mid-render); ``TextOverlay``/``ImageOverlay`` do not reject
|
|
187
|
+
off-frame geometry (it clips to a valid no-op). Keep the check
|
|
188
|
+
metadata-cheap -- no frame decode.
|
|
189
|
+
"""
|
|
179
190
|
return meta
|
|
180
191
|
|
|
181
192
|
def to_ffmpeg_filter(self, ctx: FilterCtx) -> str | None:
|
|
@@ -320,6 +320,11 @@ class TranscriptionOverlay(Effect):
|
|
|
320
320
|
the fit search and the renderer, so they never diverge. Margin math
|
|
321
321
|
comes from ``ImageText.available_region`` (one source of truth with
|
|
322
322
|
``measure_text_box``).
|
|
323
|
+
|
|
324
|
+
The highlight multiplier is threaded in so the measurement is
|
|
325
|
+
worst-case for the animated word enlargement: a cue that fits at base
|
|
326
|
+
size but overflows once a word is highlighted is rejected here (and
|
|
327
|
+
auto-shrunk by ``_resolve_layout``) instead of crashing mid-render.
|
|
323
328
|
"""
|
|
324
329
|
rect = img_text.measure_text_box(
|
|
325
330
|
text=text,
|
|
@@ -329,13 +334,19 @@ class TranscriptionOverlay(Effect):
|
|
|
329
334
|
font_size=font_px,
|
|
330
335
|
anchor=cfg.anchor,
|
|
331
336
|
margin=cfg.margin,
|
|
337
|
+
highlight_size_multiplier=cfg.style.highlight_size_multiplier,
|
|
338
|
+
highlight_bold_font=self.highlight_bold_font,
|
|
332
339
|
)
|
|
333
340
|
if rect.height == 0:
|
|
334
341
|
return None
|
|
335
342
|
box_w = int(rect.width)
|
|
336
343
|
box_h = rect.height
|
|
337
344
|
left, top, avail_w, avail_h = img_text.available_region(cfg.margin)
|
|
338
|
-
|
|
345
|
+
# The box must fit the drawable area, AND the worst-case rendered line
|
|
346
|
+
# (incl. the enlarged highlighted word, or an unbreakable long word)
|
|
347
|
+
# must fit the box -- else the centered line spills off-frame at draw
|
|
348
|
+
# time. Failing this shrinks the font in ``_resolve_layout``.
|
|
349
|
+
fits = box_w <= avail_w and box_h <= avail_h and rect.content_width <= box_w
|
|
339
350
|
x = min(max(int(round(rect.x)), left), left + avail_w - box_w)
|
|
340
351
|
y = min(max(int(round(rect.y)), top), top + avail_h - box_h)
|
|
341
352
|
return _CueBox(x=x, y=y, box_w=box_w, height=box_h, fits=fits)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|