PyPI - videopython - Versions diffs - 0.34.0__tar.gz → 0.35.0__tar.gz - Mend

videopython 0.34.0tar.gz → 0.35.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

{videopython-0.34.0 → videopython-0.35.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: videopython
-Version: 0.34.0
+Version: 0.35.0
 Summary: Minimal video generation and processing library.
 Project-URL: Homepage, https://videopython.com
 Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -12,15 +12,15 @@ Keywords: ai,editing,generation,movie,opencv,python,shorts,video,videopython
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
-Requires-Python: <3.14,>=3.10
+Requires-Python: <3.14,>=3.11
 Requires-Dist: numpy>=1.25.2
 Requires-Dist: opencv-python-headless>=4.9.0.80
 Requires-Dist: pillow>=12.1.1
 Requires-Dist: pydantic>=2.8.0
+Requires-Dist: resvg-py>=0.3.2
 Requires-Dist: tqdm>=4.66.3
 Provides-Extra: ai
 Requires-Dist: accelerate>=0.29.2; extra == 'ai'
@@ -67,7 +67,7 @@ pip install videopython          # core video/audio editing
 pip install "videopython[ai]"    # + local AI features (GPU recommended)
 ```
-Python `>=3.10, <3.14`. AI features run locally — no cloud API keys required, but model weights are downloaded on first use.
+Python `>=3.11, <3.14`. AI features run locally — no cloud API keys required, but model weights are downloaded on first use.
 ## Quick Start

{videopython-0.34.0 → videopython-0.35.0}/README.md RENAMED Viewed

@@ -18,7 +18,7 @@ pip install videopython          # core video/audio editing
 pip install "videopython[ai]"    # + local AI features (GPU recommended)
 ```
-Python `>=3.10, <3.14`. AI features run locally — no cloud API keys required, but model weights are downloaded on first use.
+Python `>=3.11, <3.14`. AI features run locally — no cloud API keys required, but model weights are downloaded on first use.
 ## Quick Start

{videopython-0.34.0 → videopython-0.35.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "videopython"
-version = "0.34.0"
+version = "0.35.0"
 description = "Minimal video generation and processing library."
 authors = [
     { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -9,7 +9,7 @@ authors = [
 ]
 license = { text = "Apache-2.0" }
 readme = "README.md"
-requires-python = ">=3.10, <3.14"
+requires-python = ">=3.11, <3.14"
 keywords = [
     "python",
     "videopython",
@@ -24,7 +24,6 @@ keywords = [
 classifiers = [
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
@@ -35,6 +34,7 @@ dependencies = [
     "numpy>=1.25.2",
     "opencv-python-headless>=4.9.0.80",
     "pillow>=12.1.1",
+    "resvg-py>=0.3.2",
     "tqdm>=4.66.3",
     "pydantic>=2.8.0",
 ]
@@ -203,7 +203,7 @@ markers = [
 [tool.ruff]
 line-length = 120
-target-version = "py310"
+target-version = "py311"
 [tool.ruff.lint]
 select = [

{videopython-0.34.0 → videopython-0.35.0}/src/videopython/base/image_text.py RENAMED Viewed

@@ -96,6 +96,17 @@ class TextBoxRect:
     callers short-circuit such boxes (nothing to draw). ``width`` mirrors the
     resolved ``box_width`` and may be a float when an absolute >1 value was
     passed, matching legacy behaviour.
+    ``content_width`` is the widest a rendered line actually gets -- worst
+    case over the animated highlight when ``highlight_size_multiplier > 1``.
+    There are two independent notions of "fitting" here. ``fits`` is
+    box-vs-image *only* -- the legacy contract that gates
+    :meth:`write_text_box`'s ``OutOfBoundsError`` -- and does **not** imply
+    the content fits the box: legacy callers intentionally overflow the box
+    while staying inside the image. A caller that needs the content inside
+    the box (subtitles, where the box is frame-clamped) must additionally
+    check ``content_width <= width`` itself.
     """
     x: float
@@ -104,6 +115,27 @@ class TextBoxRect:
     height: int
     fits: bool
     lines: tuple[str, ...]
+    content_width: int = 0
+@dataclass(frozen=True)
+class _WordPlacement:
+    """One word's resolved font/size and pixel offset within a highlighted line.
+    ``dx``/``dy`` are offsets from the line's left/top. Produced once by
+    :meth:`ImageText._layout_highlighted_line` and consumed by both the
+    measurer and the renderer, so the box reserved by ``measure_text_box``
+    and the pixels drawn by ``write_text_box`` cannot disagree.
+    """
+    word: str
+    font_filename: str | None
+    font_size: int
+    width: int
+    height: int
+    dx: int
+    dy: int
+    is_highlighted: bool
 class ImageText:
@@ -614,16 +646,22 @@ class ImageText:
         font_size: int = 11,
         anchor: AnchorPoint = AnchorPoint.TOP_LEFT,
         margin: MarginType = 0,
+        highlight_size_multiplier: float = 1.0,
+        highlight_bold_font: str | None = None,
     ) -> TextBoxRect:
         """Measure where a wrapped text box would land, without drawing it.
         Pure: resolves margins/box-width/position, wraps the text, applies the
         anchor, and bounds-checks against the image — the exact math
-        :meth:`write_text_box` used to do inline. Highlighting and per-line
-        alignment (``place``) do not change the box envelope, so they are not
-        parameters here; this intentionally preserves the pre-existing
-        behaviour that an enlarged highlighted word is *not* accounted for in
-        the fit check.
+        :meth:`write_text_box` used to do inline.
+        ``highlight_size_multiplier > 1`` makes the measurement worst-case for
+        an *animated* highlight (any word may be the enlarged one over the
+        cue's lifetime): wrapping reserves room so even an enlarged word keeps
+        its line within ``box_width``, and ``height`` uses each line's tallest
+        possible highlighted variant. With the default ``1.0`` the result is
+        byte-identical to the plain base-font measurement, so existing callers
+        and ``place`` (alignment) are unaffected.
         Returns:
             A :class:`TextBoxRect`. ``fits`` is ``False`` when the box would
@@ -655,15 +693,32 @@ class ImageText:
         # Calculate initial position based on margin and anchor before splitting text
         x_pos, y_pos = self._convert_position(xy, margin_top, margin_left, available_width, available_height)
-        # Split text into lines that fit within box_width
+        # Wrap at the real box width (same as the renderer).
         lines = self._split_lines_by_width(text, font_filename, font_size, int(box_width))
-        # Calculate total height of all lines
-        lines_height = sum(self.get_text_dimensions(font_filename, font_size, line)[1] for line in lines)
+        # Per-line extent. With an animated highlight any word may be the
+        # enlarged one over the cue's lifetime, so each line contributes the
+        # widest/tallest variant it could ever render as.
+        # ``_highlighted_line_max_extent`` derives that envelope from the same
+        # per-word geometry the renderer uses (single source of truth).
+        hl_mult = max(1.0, highlight_size_multiplier)
+        content_width = 0
+        lines_height = 0
+        for line in lines:
+            if hl_mult > 1.0:
+                line_w, line_h = self._highlighted_line_max_extent(
+                    line, font_filename, font_size, hl_mult, highlight_bold_font
+                )
+            else:
+                line_w, line_h = self.get_text_dimensions(font_filename, font_size, line)
+            content_width = max(content_width, line_w)
+            lines_height += line_h
         if lines_height == 0:
             # No renderable lines (e.g. whitespace-only text); position is the
             # unadjusted insertion point and the box trivially "fits".
-            return TextBoxRect(x=x_pos, y=y_pos, width=box_width, height=0, fits=True, lines=tuple(lines))
+            return TextBoxRect(
+                x=x_pos, y=y_pos, width=box_width, height=0, fits=True, lines=tuple(lines), content_width=0
+            )
         # Final position calculation based on anchor point
         if anchor in AnchorPoint.center_anchors():
@@ -682,7 +737,15 @@ class ImageText:
             or x_pos + box_width > self.image_size[1]
             or y_pos + lines_height > self.image_size[0]
         )
-        return TextBoxRect(x=x_pos, y=y_pos, width=box_width, height=lines_height, fits=fits, lines=tuple(lines))
+        return TextBoxRect(
+            x=x_pos,
+            y=y_pos,
+            width=box_width,
+            height=lines_height,
+            fits=fits,
+            lines=tuple(lines),
+            content_width=content_width,
+        )
     def write_text_box(
         self,
@@ -761,7 +824,11 @@ class ImageText:
         if highlight_word_index is not None and highlight_color is None:
             highlight_color = text_color
-        # Measure (single source of truth for box geometry), then render.
+        # Measure (single source of truth for box geometry), then render. When
+        # a word will be highlighted, measure worst-case so the box reserves
+        # room for the enlarged word -- otherwise stay byte-identical to the
+        # plain base-font measurement.
+        measure_mult = highlight_size_multiplier if highlight_word_index is not None else 1.0
         rect = self.measure_text_box(
             text=text,
             font_filename=font_filename,
@@ -770,6 +837,8 @@ class ImageText:
             font_size=font_size,
             anchor=anchor,
             margin=margin,
+            highlight_size_multiplier=measure_mult,
+            highlight_bold_font=highlight_bold_font,
         )
         lines = list(rect.lines)
         if rect.height == 0:
@@ -783,56 +852,53 @@ class ImageText:
                 f"Text box with size ({box_width}x{lines_height}) at position ({x_pos}, {y_pos}) is out of bounds!"
             )
-        # Write lines
+        # Write lines. The line that holds the highlighted word is positioned
+        # and advanced by its *true* (enlarged) extent via the shared
+        # ``_highlighted_line_size`` -- the same numbers ``measure_text_box``
+        # reserved -- so an enlarged word can never push the line out of the
+        # box (hence out of the frame) regardless of alignment.
         current_text_height = y_pos
         word_index_offset = 0  # Track global word index across lines
         for line in lines:
-            line_dimensions = self.get_text_dimensions(font_filename, font_size, line)
+            line_words = line.split()
+            hl_local_index = -1
+            if highlight_word_index is not None:
+                line_end_word_index = word_index_offset + len(line_words) - 1
+                if word_index_offset <= highlight_word_index <= line_end_word_index:
+                    hl_local_index = highlight_word_index - word_index_offset
-            # Calculate horizontal position based on alignment
+            if hl_local_index >= 0:
+                line_w, line_h = self._highlighted_line_size(
+                    line, font_filename, font_size, hl_local_index, highlight_size_multiplier, highlight_bold_font
+                )
+            else:
+                line_w, line_h = self.get_text_dimensions(font_filename, font_size, line)
+            # Calculate horizontal position based on alignment (true line width)
             if place == TextAlign.LEFT:
                 x_left = x_pos
             elif place == TextAlign.RIGHT:
-                x_left = x_pos + box_width - line_dimensions[0]
+                x_left = x_pos + box_width - line_w
             elif place == TextAlign.CENTER:
-                x_left = int(x_pos + ((box_width - line_dimensions[0]) / 2))
+                x_left = int(x_pos + ((box_width - line_w) / 2))
             else:
                 valid_places = [e.value for e in TextAlign]
                 raise ValueError(f"Place '{place}' is not supported. Must be one of: {', '.join(valid_places)}")
-            # Check if highlighting is needed for this line
-            if highlight_word_index is not None:
-                line_words = line.split()
-                line_start_word_index = word_index_offset
-                line_end_word_index = word_index_offset + len(line_words) - 1
-                # Check if the highlighted word is in this line
-                if line_start_word_index <= highlight_word_index <= line_end_word_index:
-                    self._write_line_with_highlight(
-                        line=line,
-                        font_filename=font_filename,
-                        font_size=font_size,
-                        font_border_size=font_border_size,
-                        text_color=text_color,
-                        highlight_color=highlight_color or (255, 255, 255),
-                        highlight_size_multiplier=highlight_size_multiplier,
-                        highlight_word_local_index=highlight_word_index - line_start_word_index,
-                        highlight_bold_font=highlight_bold_font,
-                        x_left=int(x_left),
-                        y_top=int(current_text_height),
-                    )
-                else:
-                    # Write normal line without highlighting
-                    self.write_text(
-                        text=line,
-                        font_filename=font_filename,
-                        xy=(x_left, current_text_height),
-                        font_size=font_size,
-                        font_border_size=font_border_size,
-                        color=text_color,
-                    )
-                word_index_offset += len(line_words)
+            if hl_local_index >= 0:
+                self._write_line_with_highlight(
+                    line=line,
+                    font_filename=font_filename,
+                    font_size=font_size,
+                    font_border_size=font_border_size,
+                    text_color=text_color,
+                    highlight_color=highlight_color or (255, 255, 255),
+                    highlight_size_multiplier=highlight_size_multiplier,
+                    highlight_word_local_index=hl_local_index,
+                    highlight_bold_font=highlight_bold_font,
+                    x_left=int(x_left),
+                    y_top=int(current_text_height),
+                )
             else:
                 # Write normal line without highlighting
                 self.write_text(
@@ -844,8 +910,9 @@ class ImageText:
                     color=text_color,
                 )
-            # Increment vertical position for next line
-            current_text_height += line_dimensions[1]
+            word_index_offset += len(line_words)
+            # Increment vertical position for next line (true line height)
+            current_text_height += line_h
         # Add background color for the text if specified
         if background_color is not None:
@@ -921,6 +988,148 @@ class ImageText:
         return (int(x_pos + box_width), int(current_text_height))
+    def _highlight_font(
+        self,
+        font_filename: str | None,
+        font_size: int,
+        highlight_size_multiplier: float,
+        highlight_bold_font: str | None,
+    ) -> tuple[str | None, int, int, int]:
+        """Resolve the enlarged-word basics once.
+        Returns ``(font_file, font_size, baseline_offset, space_width)`` -- the
+        single definition of the highlight constants, shared by the per-word
+        layout (render / exact-size path) and the worst-case extent (measure
+        path) so those paths cannot drift apart on the fundamentals.
+        """
+        hl_font_size = int(font_size * highlight_size_multiplier)
+        hl_font_file = highlight_bold_font if highlight_bold_font is not None else font_filename
+        baseline_offset = self._get_font_baseline_offset(font_filename, font_size, hl_font_file, hl_font_size)
+        space_width = self.get_text_dimensions(font_filename, font_size, " ")[0]
+        return hl_font_file, hl_font_size, baseline_offset, space_width
+    def _layout_highlighted_line(
+        self,
+        line: str,
+        font_filename: str | None,
+        font_size: int,
+        highlight_word_local_index: int,
+        highlight_size_multiplier: float,
+        highlight_bold_font: str | None,
+    ) -> list[_WordPlacement]:
+        """Per-word placement for ``line`` with one word enlarged.
+        The single source of truth for the highlighted-line advance (enlarged
+        font size, bold-font swap, base-size inter-word space, baseline
+        offset). Both :meth:`_highlighted_line_size` (measuring the line that
+        actually owns the highlight) and :meth:`_write_line_with_highlight`
+        (rendering it) consume this list, so the reserved box and the drawn
+        pixels agree by construction.
+        Reached only for the line that owns the highlighted word, so
+        ``highlight_word_local_index`` is in range; degenerate inputs are
+        handled by the callers' own guards.
+        """
+        words = line.split()
+        hl_font_file, hl_font_size, baseline_offset, space_width = self._highlight_font(
+            font_filename, font_size, highlight_size_multiplier, highlight_bold_font
+        )
+        placements: list[_WordPlacement] = []
+        dx = 0
+        for i, word in enumerate(words):
+            is_hl = i == highlight_word_local_index
+            wf = hl_font_file if is_hl else font_filename
+            ws = hl_font_size if is_hl else font_size
+            w, h = self.get_text_dimensions(wf, ws, word)
+            placements.append(
+                _WordPlacement(
+                    word=word,
+                    font_filename=wf,
+                    font_size=ws,
+                    width=w,
+                    height=h,
+                    dx=dx,
+                    dy=baseline_offset if is_hl else 0,
+                    is_highlighted=is_hl,
+                )
+            )
+            dx += w
+            if i < len(words) - 1:
+                dx += space_width
+        return placements
+    def _highlighted_line_size(
+        self,
+        line: str,
+        font_filename: str | None,
+        font_size: int,
+        highlight_word_local_index: int,
+        highlight_size_multiplier: float,
+        highlight_bold_font: str | None,
+    ) -> tuple[int, int]:
+        """Rendered (width, height) of ``line`` with one *specific* word enlarged.
+        A reduction of the shared :meth:`_layout_highlighted_line`, so it is
+        exact w.r.t. the renderer by construction. Used to position/advance
+        the line that owns the highlighted word. ``highlight_word_local_index``
+        out of range falls back to the plain line size -- exactly what the
+        renderer's own guard ends up drawing.
+        """
+        words = line.split()
+        if not words:
+            return (0, 0)
+        if not (0 <= highlight_word_local_index < len(words)):
+            return self.get_text_dimensions(font_filename, font_size, line)
+        placements = self._layout_highlighted_line(
+            line, font_filename, font_size, highlight_word_local_index, highlight_size_multiplier, highlight_bold_font
+        )
+        width = max(p.dx + p.width for p in placements)
+        # ``min(0, ...)`` / ``max(0, ...)`` stay defensive for a *shrinking*
+        # highlight (multiplier < 1 -> negative baseline offset, the word
+        # rides above the line). The subtitle measure path clamps the
+        # multiplier to >= 1 so there ``top`` is always 0, but
+        # ``write_text_box`` forwards the raw multiplier, so keep the floor.
+        top = min([0, *(p.dy for p in placements)])
+        bottom = max([0, *(p.dy + p.height for p in placements)])
+        return (width, bottom - top)
+    def _highlighted_line_max_extent(
+        self,
+        line: str,
+        font_filename: str | None,
+        font_size: int,
+        highlight_size_multiplier: float,
+        highlight_bold_font: str | None,
+    ) -> tuple[int, int]:
+        """Worst-case (width, height) over *any* word being the enlarged one.
+        Equal to ``max`` of :meth:`_highlighted_line_size` across every word
+        position -- the envelope an animated highlight needs -- but in a
+        single O(words) pass instead of O(words^2): only *which* word is
+        enlarged varies, so the base metrics are shared and the extremes are
+        closed-form. Uses the same :meth:`_highlight_font` constants as the
+        layout, so this envelope can never under-reserve what the renderer
+        draws (it over-reserves only in the safe direction).
+        """
+        words = line.split()
+        if not words:
+            return self.get_text_dimensions(font_filename, font_size, line)
+        hl_font_file, hl_font_size, baseline_offset, space_width = self._highlight_font(
+            font_filename, font_size, highlight_size_multiplier, highlight_bold_font
+        )
+        base = [self.get_text_dimensions(font_filename, font_size, w) for w in words]
+        enlarged = [self.get_text_dimensions(hl_font_file, hl_font_size, w) for w in words]
+        # width_k = (sum of base widths + spaces) - base_w[k] + enlarged_w[k];
+        # the worst k just maximizes the (enlarged - base) swap.
+        base_total = sum(w for w, _ in base) + space_width * (len(words) - 1)
+        width = base_total + max(ew - bw for (bw, _), (ew, _) in zip(base, enlarged))
+        # Non-highlighted words sit at dy=0, the enlarged one at
+        # dy=baseline_offset; the worst line is the tallest base word vs. the
+        # tallest enlarged word lifted by the baseline offset.
+        top = min(0, baseline_offset)
+        bottom = max([0, *(h for _, h in base), baseline_offset + max(h for _, h in enlarged)])
+        return (width, bottom - top)
     def _write_line_with_highlight(
         self,
         line: str,
@@ -936,7 +1145,11 @@ class ImageText:
         y_top: int,
     ) -> None:
         """
-        Write a line of text with one word highlighted using word-by-word rendering with baseline alignment.
+        Write a line of text with one word highlighted, word-by-word with baseline alignment.
+        Draws the placements from the shared :meth:`_layout_highlighted_line`,
+        so every pixel lands exactly where :meth:`measure_text_box` reserved
+        room for it (measurement and rendering use the same geometry).
         Args:
             line: The text line to render
@@ -951,58 +1164,22 @@ class ImageText:
             x_left: Left x position for the line
             y_top: Top y position for the line
         """
-        # Split line into words
         words = line.split()
         if highlight_word_local_index >= len(words):
-            return  # Safety check
-        # Calculate highlighted font size and determine font files
-        highlight_font_size = int(font_size * highlight_size_multiplier)
-        highlight_font_file = highlight_bold_font if highlight_bold_font is not None else font_filename
+            return  # Safety check: nothing to draw (matches the measure fallback)
-        # Calculate baseline offset for highlighted words (using the appropriate font files)
-        baseline_offset = self._get_font_baseline_offset(
-            font_filename, font_size, highlight_font_file, highlight_font_size
-        )
-        # Render words one by one with proper spacing
-        current_x = x_left
-        for i, word in enumerate(words):
-            # Determine if this is the highlighted word
-            is_highlighted = i == highlight_word_local_index
-            # Choose font file, size, and color based on highlighting
-            word_font_file = highlight_font_file if is_highlighted else font_filename
-            word_font_size = highlight_font_size if is_highlighted else font_size
-            word_color = highlight_color if is_highlighted else text_color
-            # Calculate y position with baseline alignment
-            word_y = y_top
-            if is_highlighted:
-                word_y += baseline_offset
-            # Render the word
+        for p in self._layout_highlighted_line(
+            line, font_filename, font_size, highlight_word_local_index, highlight_size_multiplier, highlight_bold_font
+        ):
             self.write_text(
-                text=word,
-                font_filename=word_font_file,
-                xy=(current_x, word_y),
-                font_size=word_font_size,
+                text=p.word,
+                font_filename=p.font_filename,
+                xy=(x_left + p.dx, y_top + p.dy),
+                font_size=p.font_size,
                 font_border_size=font_border_size,
-                color=word_color,
+                color=highlight_color if p.is_highlighted else text_color,
             )
-            # Calculate the width of this word for spacing
-            word_width = self.get_text_dimensions(word_font_file, word_font_size, word)[0]
-            # Update current_x for next word (add word width plus space)
-            current_x += word_width
-            # Add space between words (except after the last word)
-            if i < len(words) - 1:
-                space_width = self.get_text_dimensions(font_filename, font_size, " ")[0]
-                current_x += space_width
     def _find_smallest_bounding_rect(self, mask: np.ndarray) -> tuple[int, int, int, int]:
         """
         Find the smallest bounding rectangle containing non-zero values in the mask.

{videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/__init__.py RENAMED Viewed

@@ -8,6 +8,7 @@ from .effects import (
     Flash,
     FullImageOverlay,
     Glitch,
+    ImageOverlay,
     Kaleidoscope,
     KenBurns,
     MirrorFlip,
@@ -56,6 +57,7 @@ __all__ = [
     "SilenceRemoval",
     # Effects
     "FullImageOverlay",
+    "ImageOverlay",
     "Blur",
     "Zoom",
     "ColorGrading",

{videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/effects.py RENAMED Viewed

@@ -14,6 +14,7 @@ audio after ``_apply`` returns.
 from __future__ import annotations
 import logging
+from io import BytesIO
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, ClassVar, Literal
@@ -29,13 +30,14 @@ from videopython.editing.operation import Effect
 if TYPE_CHECKING:
     from videopython.audio import Audio
-    from videopython.base.video import Video
+    from videopython.base.video import Video, VideoMetadata
 logger = logging.getLogger(__name__)
 __all__ = [
     "Effect",
     "FullImageOverlay",
+    "ImageOverlay",
     "Blur",
     "Zoom",
     "ColorGrading",
@@ -771,6 +773,220 @@ class TextOverlay(Effect):
         return video
+class ImageOverlay(Effect):
+    """Composites a scaled image at an anchored position on every frame in the window.
+    A resolution-independent watermark / logo / brand mark. Unlike
+    :class:`FullImageOverlay` (full-frame only, raises on size mismatch), the
+    image is scaled to a fraction of the frame *width* and placed at an
+    anchored normalized position, so one config works across 1080p / 4k /
+    vertical / square. Loaded just-in-time from ``source`` so the op stays
+    JSON-serialisable. Off-frame or oversized placement clips to a partial
+    paste or a no-op -- the same contract as :class:`TextOverlay`, never an
+    error; only an unreadable ``source`` is rejected (in ``predict_metadata``).
+    ``source`` may be a raster image (PNG/JPEG/WebP) or an SVG (detected by the
+    ``.svg`` extension). An SVG is rasterised by ``resvg`` *at the exact target
+    pixel width* -- crisp at any frame size, not a blurry upscale of a
+    fixed-size bitmap -- with a transparent background and no remote-resource
+    fetching (the local path only; no SSRF). SVGs containing text depend on the
+    fonts available at render time.
+    """
+    op: Literal["image_overlay"] = "image_overlay"
+    streamable: ClassVar[bool] = True
+    source: Path = Field(
+        description=(
+            "Path to an image file: a raster RGB/RGBA image (PNG/JPEG/WebP) or "
+            "an SVG (`.svg`, rasterised at the target resolution). Loaded at "
+            "apply time; kept JSON-serialisable as a path."
+        ),
+    )
+    scale: float = Field(
+        0.15,
+        gt=0,
+        le=1,
+        description=(
+            "Overlay width as a fraction of frame width (0-1). Height follows "
+            "the image's aspect ratio. Resolution-independent."
+        ),
+    )
+    opacity: float = Field(
+        1.0,
+        ge=0,
+        le=1,
+        description="Multiplies the image's own alpha. 0 = fully transparent, 1 = use the image alpha unchanged.",
+    )
+    position: tuple[float, float] = Field(
+        (0.95, 0.95),
+        description=(
+            "Where to place the overlay as normalized (x, y) coordinates. "
+            "(0, 0) = top-left corner, (1, 1) = bottom-right corner."
+        ),
+    )
+    anchor: Literal["center", "top_left", "top_center", "bottom_center", "bottom_left", "bottom_right"] = Field(
+        "bottom_right",
+        description="Which point of the overlay box sits at the position coordinate.",
+    )
+    _overlay_rgba: np.ndarray | None = PrivateAttr(default=None)
+    _svg_cache: dict[int, np.ndarray] = PrivateAttr(default_factory=dict)
+    _stream_noop: bool = PrivateAttr(default=False)
+    _stream_alpha: np.ndarray | None = PrivateAttr(default=None)
+    _stream_rgb: np.ndarray | None = PrivateAttr(default=None)
+    _stream_dst: tuple[int, int, int, int] = PrivateAttr(default=(0, 0, 0, 0))
+    @model_validator(mode="after")
+    def _validate_position(self) -> ImageOverlay:
+        if not (0.0 <= self.position[0] <= 1.0 and 0.0 <= self.position[1] <= 1.0):
+            raise ValueError("position values must be in range [0, 1]")
+        return self
+    def _is_svg(self) -> bool:
+        return self.source.suffix.lower() == ".svg"
+    def predict_metadata(self, meta: VideoMetadata, **_context: Any) -> VideoMetadata:
+        """Reject only a missing/unreadable ``source`` (see :meth:`Operation.predict_metadata`).
+        An unreadable source is the one failure ``run()`` cannot survive -- it
+        would raise mid-stream after expensive frame decode -- so it is caught
+        at ``validate()`` time, symmetric with ``TranscriptionOverlay``.
+        Geometry (oversized / off-frame) is deliberately *not* checked here: it
+        clips to a valid no-op like :class:`TextOverlay`, so rejecting it would
+        break that contract and the parity with the op this is modeled on. Both
+        checks are cheap (a header ``verify()`` / a 1px SVG parse, no full
+        decode), so ``validate()`` stays frame-free.
+        """
+        try:
+            if self._is_svg():
+                import resvg_py
+                resvg_py.svg_to_bytes(svg_path=str(self.source), width=1)
+            else:
+                with Image.open(self.source) as im:
+                    im.verify()
+        except (OSError, ValueError) as exc:
+            raise ValueError(f"image_overlay source {str(self.source)!r} is not a readable image: {exc}") from exc
+        return meta
+    def _rasterize_svg(self, target_w: int) -> np.ndarray:
+        cached = self._svg_cache.get(target_w)
+        if cached is not None:
+            return cached
+        # Lazy import: only when an SVG source is actually used. resvg renders
+        # at the exact target width (height proportional to the viewBox) with a
+        # transparent background and never fetches remote resources.
+        import resvg_py
+        png = resvg_py.svg_to_bytes(svg_path=str(self.source), width=target_w)
+        arr = np.array(Image.open(BytesIO(bytes(png))).convert("RGBA"), dtype=np.uint8)
+        self._svg_cache[target_w] = arr
+        return arr
+    def _load_overlay(self) -> np.ndarray:
+        if self._overlay_rgba is not None:
+            return self._overlay_rgba
+        img = Image.open(self.source).convert("RGBA")
+        self._overlay_rgba = np.array(img, dtype=np.uint8)
+        return self._overlay_rgba
+    def _compute_position(self, frame_width: int, frame_height: int, img_w: int, img_h: int) -> tuple[int, int]:
+        # Copied verbatim from TextOverlay: ImageOverlay's anchor Literal is
+        # deliberately the same set, so the geometry is shared by construction.
+        px = int(self.position[0] * frame_width)
+        py = int(self.position[1] * frame_height)
+        if self.anchor == "center":
+            return px - img_w // 2, py - img_h // 2
+        if self.anchor == "top_left":
+            return px, py
+        if self.anchor == "top_center":
+            return px - img_w // 2, py
+        if self.anchor == "bottom_center":
+            return px - img_w // 2, py - img_h
+        if self.anchor == "bottom_left":
+            return px, py - img_h
+        # bottom_right
+        return px - img_w, py - img_h
+    def _resized_overlay(self, frame_w: int) -> np.ndarray:
+        target_w = max(1, round(self.scale * frame_w))
+        if self._is_svg():
+            # Rasterise the vector at the target size (crisp) rather than
+            # upscaling a fixed bitmap. resvg derives height from the viewBox.
+            return self._rasterize_svg(target_w)
+        overlay = self._load_overlay()
+        src_h, src_w = overlay.shape[:2]
+        target_h = max(1, round(target_w * src_h / src_w))
+        if (target_w, target_h) == (src_w, src_h):
+            return overlay
+        resized = Image.fromarray(overlay).resize((target_w, target_h), Image.LANCZOS)
+        return np.array(resized, dtype=np.uint8)
+    def _blend_params(
+        self, frame_w: int, frame_h: int
+    ) -> tuple[np.ndarray, np.ndarray, tuple[int, int, int, int]] | None:
+        """Placement + blend inputs shared by the eager and streaming paths.
+        Single source of truth so the two paths cannot drift -- the
+        eager/stream parity-hole class of bug fixed in 0.34.1. Returns ``None``
+        when the overlay lands fully off-frame (the effect is a no-op).
+        """
+        overlay = self._resized_overlay(frame_w)
+        oh, ow = overlay.shape[:2]
+        x, y = self._compute_position(frame_w, frame_h, ow, oh)
+        src_x = max(0, -x)
+        src_y = max(0, -y)
+        dst_x = max(0, x)
+        dst_y = max(0, y)
+        paste_w = min(ow - src_x, frame_w - dst_x)
+        paste_h = min(oh - src_y, frame_h - dst_y)
+        if paste_w <= 0 or paste_h <= 0:
+            return None
+        region = overlay[src_y : src_y + paste_h, src_x : src_x + paste_w]
+        alpha = (region[:, :, 3:4].astype(np.float32) / 255.0) * self.opacity
+        rgb = region[:, :, :3].astype(np.float32)
+        return alpha, rgb, (dst_y, dst_x, paste_h, paste_w)
+    def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
+        params = self._blend_params(width, height)
+        if params is None:
+            self._stream_noop = True
+            return
+        self._stream_noop = False
+        self._stream_alpha, self._stream_rgb, self._stream_dst = params
+    def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
+        if self._stream_noop:
+            return frame
+        assert self._stream_alpha is not None and self._stream_rgb is not None
+        dy, dx, ph, pw = self._stream_dst
+        region = frame[dy : dy + ph, dx : dx + pw]
+        blended = (
+            self._stream_rgb * self._stream_alpha + region.astype(np.float32) * (1.0 - self._stream_alpha)
+        ).astype(np.uint8)
+        frame[dy : dy + ph, dx : dx + pw] = blended
+        return frame
+    def _apply(self, video: Video) -> Video:
+        frame_h, frame_w = video.frame_shape[:2]
+        params = self._blend_params(frame_w, frame_h)
+        if params is None:
+            return video
+        alpha, rgb, (dy, dx, ph, pw) = params
+        logger.info("Applying image overlay...")
+        for frame in tqdm(video.frames, desc="Image overlay"):
+            region = frame[dy : dy + ph, dx : dx + pw]
+            blended = (rgb * alpha + region.astype(np.float32) * (1.0 - alpha)).astype(np.uint8)
+            frame[dy : dy + ph, dx : dx + pw] = blended
+        return video
 class Shake(Effect):
     """Per-frame camera shake: jitters every frame by a random or rhythmic offset.

{videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/operation.py RENAMED Viewed

@@ -175,7 +175,18 @@ class Operation(BaseModel):
         raise NotImplementedError(f"{type(self).__name__}.apply not implemented")
     def predict_metadata(self, meta: VideoMetadata) -> VideoMetadata:
-        """Predict output metadata from input metadata. Default: identity."""
+        """Predict output metadata from input metadata. Default: identity.
+        Run during ``VideoEdit.validate()``'s dry-run, before any frames are
+        decoded. Beyond predicting shape, this is the fail-fast gate, and it
+        has one contract: **reject exactly the plans that would otherwise crash
+        or do unrecoverable / expensive work in** :meth:`apply` **/** ``run()``;
+        anything ``run()`` can absorb by graceful degradation is NOT rejected.
+        ``TranscriptionOverlay`` rejects un-fittable subtitles (they used to
+        crash mid-render); ``TextOverlay``/``ImageOverlay`` do not reject
+        off-frame geometry (it clips to a valid no-op). Keep the check
+        metadata-cheap -- no frame decode.
+        """
         return meta
     def to_ffmpeg_filter(self, ctx: FilterCtx) -> str | None:

{videopython-0.34.0 → videopython-0.35.0}/src/videopython/editing/transcription_overlay.py RENAMED Viewed

@@ -320,6 +320,11 @@ class TranscriptionOverlay(Effect):
         the fit search and the renderer, so they never diverge. Margin math
         comes from ``ImageText.available_region`` (one source of truth with
         ``measure_text_box``).
+        The highlight multiplier is threaded in so the measurement is
+        worst-case for the animated word enlargement: a cue that fits at base
+        size but overflows once a word is highlighted is rejected here (and
+        auto-shrunk by ``_resolve_layout``) instead of crashing mid-render.
         """
         rect = img_text.measure_text_box(
             text=text,
@@ -329,13 +334,19 @@ class TranscriptionOverlay(Effect):
             font_size=font_px,
             anchor=cfg.anchor,
             margin=cfg.margin,
+            highlight_size_multiplier=cfg.style.highlight_size_multiplier,
+            highlight_bold_font=self.highlight_bold_font,
         )
         if rect.height == 0:
             return None
         box_w = int(rect.width)
         box_h = rect.height
         left, top, avail_w, avail_h = img_text.available_region(cfg.margin)
-        fits = box_w <= avail_w and box_h <= avail_h
+        # The box must fit the drawable area, AND the worst-case rendered line
+        # (incl. the enlarged highlighted word, or an unbreakable long word)
+        # must fit the box -- else the centered line spills off-frame at draw
+        # time. Failing this shrinks the font in ``_resolve_layout``.
+        fits = box_w <= avail_w and box_h <= avail_h and rect.content_width <= box_w
         x = min(max(int(round(rect.x)), left), left + avail_w - box_w)
         y = min(max(int(round(rect.y)), top), top + avail_h - box_h)
         return _CueBox(x=x, y=y, box_w=box_w, height=box_h, fits=fits)