videopython 0.37.0__tar.gz → 0.38.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {videopython-0.37.0 → videopython-0.38.0}/PKG-INFO +5 -3
- {videopython-0.37.0 → videopython-0.38.0}/README.md +4 -2
- {videopython-0.37.0 → videopython-0.38.0}/pyproject.toml +1 -1
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/__init__.py +9 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/exceptions.py +39 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/editing/effects.py +6 -1
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/editing/operation.py +114 -11
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/editing/transforms.py +43 -4
- videopython-0.38.0/src/videopython/editing/video_edit.py +1353 -0
- videopython-0.37.0/src/videopython/editing/video_edit.py +0 -857
- {videopython-0.37.0 → videopython-0.38.0}/.gitignore +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/LICENSE +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/__init__.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/__init__.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/_device.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/dubbing/__init__.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/dubbing/config.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/dubbing/dubber.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/dubbing/expressiveness.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/dubbing/loudness.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/dubbing/models.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/dubbing/pipeline.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/dubbing/quality.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/dubbing/remux.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/dubbing/timing.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/dubbing/voice_sample.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/effects.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/generation/__init__.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/generation/audio.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/generation/image.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/generation/qwen3.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/generation/translation.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/generation/video.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/transforms.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/understanding/__init__.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/understanding/audio.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/understanding/faces.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/understanding/image.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/understanding/objects.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/understanding/separation.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/understanding/temporal.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/video_analysis/__init__.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/video_analysis/analyzer.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/video_analysis/models.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/video_analysis/sampling.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/ai/video_analysis/stages.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/audio/__init__.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/audio/analysis.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/audio/audio.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/_dimensions.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/_ffmpeg.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/_video_io.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/description.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/draw_detections.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/fonts/Anton-OFL.txt +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/fonts/Anton-Regular.ttf +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/fonts/BebasNeue-OFL.txt +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/fonts/BebasNeue-Regular.ttf +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/fonts/DejaVuSans.ttf +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/fonts/LICENSE_DEJAVU +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/fonts/Lato-Bold.ttf +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/fonts/Lato-OFL.txt +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/fonts/Poppins-Bold.ttf +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/fonts/Poppins-OFL.txt +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/fonts/__init__.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/image_text.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/transcription.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/base/video.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/editing/__init__.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/editing/_easing.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/editing/streaming.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/editing/transcription_overlay.py +0 -0
- {videopython-0.37.0 → videopython-0.38.0}/src/videopython/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: videopython
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.38.0
|
|
4
4
|
Summary: Minimal video generation and processing library.
|
|
5
5
|
Project-URL: Homepage, https://videopython.com
|
|
6
6
|
Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
|
|
@@ -109,9 +109,11 @@ video.add_audio(audio).save("ai_video.mp4")
|
|
|
109
109
|
|
|
110
110
|
## LLM & AI Agent Integration
|
|
111
111
|
|
|
112
|
-
Every operation is a Pydantic model whose fields ARE the JSON wire format. `VideoEdit.json_schema()` returns a JSON Schema with a discriminated union over every LLM-exposed `Operation` (server-only ops like `image_overlay` are excluded by default) — pass it straight to Anthropic tool use, OpenAI function calling, or any structured-output API.
|
|
112
|
+
Every operation is a Pydantic model whose fields ARE the JSON wire format. `VideoEdit.json_schema()` returns a JSON Schema with a discriminated union over every LLM-exposed `Operation` (server-only ops like `image_overlay` are excluded by default) — pass it straight to Anthropic tool use, OpenAI function calling, or any structured-output API. Pass `strict=True` for a provider strict-mode grammar that prevents simple bound violations at decode time.
|
|
113
113
|
|
|
114
|
-
|
|
114
|
+
The plan parses permissively (shape only) and owns numeric bounds at validation, so a refine loop converges fast: `edit.check(meta)` collects **every** structured `PlanError` in one pass, `edit.repair(meta)` auto-clamps the mechanical violations (window/timestamp overruns, negatives) with a reported changelog, and `edit.normalize_dimensions(meta, target)` makes heterogeneous segments concat-compatible by construction. `edit.validate()` still raises a typed `PlanValidationError` (a `ValueError` with structured `.errors`) for the single-error path.
|
|
115
|
+
|
|
116
|
+
See the [LLM Integration Guide](https://videopython.com/guides/llm-integration/) for end-to-end examples, the collect/repair/normalize refine loop, and operation discovery patterns.
|
|
115
117
|
|
|
116
118
|
## Features
|
|
117
119
|
|
|
@@ -60,9 +60,11 @@ video.add_audio(audio).save("ai_video.mp4")
|
|
|
60
60
|
|
|
61
61
|
## LLM & AI Agent Integration
|
|
62
62
|
|
|
63
|
-
Every operation is a Pydantic model whose fields ARE the JSON wire format. `VideoEdit.json_schema()` returns a JSON Schema with a discriminated union over every LLM-exposed `Operation` (server-only ops like `image_overlay` are excluded by default) — pass it straight to Anthropic tool use, OpenAI function calling, or any structured-output API.
|
|
63
|
+
Every operation is a Pydantic model whose fields ARE the JSON wire format. `VideoEdit.json_schema()` returns a JSON Schema with a discriminated union over every LLM-exposed `Operation` (server-only ops like `image_overlay` are excluded by default) — pass it straight to Anthropic tool use, OpenAI function calling, or any structured-output API. Pass `strict=True` for a provider strict-mode grammar that prevents simple bound violations at decode time.
|
|
64
64
|
|
|
65
|
-
|
|
65
|
+
The plan parses permissively (shape only) and owns numeric bounds at validation, so a refine loop converges fast: `edit.check(meta)` collects **every** structured `PlanError` in one pass, `edit.repair(meta)` auto-clamps the mechanical violations (window/timestamp overruns, negatives) with a reported changelog, and `edit.normalize_dimensions(meta, target)` makes heterogeneous segments concat-compatible by construction. `edit.validate()` still raises a typed `PlanValidationError` (a `ValueError` with structured `.errors`) for the single-error path.
|
|
66
|
+
|
|
67
|
+
See the [LLM Integration Guide](https://videopython.com/guides/llm-integration/) for end-to-end examples, the collect/repair/normalize refine loop, and operation discovery patterns.
|
|
66
68
|
|
|
67
69
|
## Features
|
|
68
70
|
|
|
@@ -15,6 +15,10 @@ from .exceptions import (
|
|
|
15
15
|
AudioError,
|
|
16
16
|
AudioLoadError,
|
|
17
17
|
OutOfBoundsError,
|
|
18
|
+
PlanError,
|
|
19
|
+
PlanErrorCode,
|
|
20
|
+
PlanRepair,
|
|
21
|
+
PlanValidationError,
|
|
18
22
|
TextRenderError,
|
|
19
23
|
TransformError,
|
|
20
24
|
VideoError,
|
|
@@ -41,6 +45,11 @@ __all__ = [
|
|
|
41
45
|
"TransformError",
|
|
42
46
|
"TextRenderError",
|
|
43
47
|
"OutOfBoundsError",
|
|
48
|
+
# Structured plan validation / repair
|
|
49
|
+
"PlanError",
|
|
50
|
+
"PlanErrorCode",
|
|
51
|
+
"PlanValidationError",
|
|
52
|
+
"PlanRepair",
|
|
44
53
|
# Text rendering primitives
|
|
45
54
|
"ImageText",
|
|
46
55
|
"TextAlign",
|
|
@@ -85,12 +85,26 @@ class PlanErrorCode(str, Enum):
|
|
|
85
85
|
instead of substring-matching the human message text.
|
|
86
86
|
"""
|
|
87
87
|
|
|
88
|
+
# Segment range vs source / shape.
|
|
88
89
|
SEGMENT_END_EXCEEDS_SOURCE = "segment_end_exceeds_source"
|
|
90
|
+
SEGMENT_NEGATIVE = "segment_negative"
|
|
91
|
+
SEGMENT_RANGE = "segment_range"
|
|
92
|
+
# Effect windows.
|
|
89
93
|
EFFECT_WINDOW_EXCEEDS_DURATION = "effect_window_exceeds_duration"
|
|
94
|
+
WINDOW_NEGATIVE = "window_negative"
|
|
95
|
+
WINDOW_ORDER = "window_order"
|
|
96
|
+
# Operation-level, metadata-relative checks.
|
|
90
97
|
CUT_EXCEEDS_DURATION = "cut_exceeds_duration"
|
|
98
|
+
OP_TIMESTAMP_OUT_OF_RANGE = "op_timestamp_out_of_range"
|
|
99
|
+
CROP_EXCEEDS_SOURCE = "crop_exceeds_source"
|
|
100
|
+
DEGENERATE_DURATION = "degenerate_duration"
|
|
101
|
+
SOURCE_UNREADABLE = "source_unreadable"
|
|
102
|
+
OP_PREDICTION_FAILED = "op_prediction_failed"
|
|
103
|
+
# Assembly / structural.
|
|
91
104
|
UNKNOWN_OP = "unknown_op"
|
|
92
105
|
CONCAT_MISMATCH = "concat_mismatch"
|
|
93
106
|
SUBTITLE_UNFITTABLE = "subtitle_unfittable"
|
|
107
|
+
POST_OP_REQUIRES_CONTEXT = "post_op_requires_context"
|
|
94
108
|
|
|
95
109
|
|
|
96
110
|
@dataclass
|
|
@@ -110,12 +124,37 @@ class PlanError:
|
|
|
110
124
|
predicted_duration: float | None = None
|
|
111
125
|
|
|
112
126
|
|
|
127
|
+
@dataclass
|
|
128
|
+
class PlanRepair:
|
|
129
|
+
"""A single change a repair/normalize pass made to a plan.
|
|
130
|
+
|
|
131
|
+
The structured changelog returned by :meth:`VideoEdit.repair` and
|
|
132
|
+
:meth:`VideoEdit.normalize_dimensions`. ``location`` is a path into the
|
|
133
|
+
plan (e.g. ``'segments[0].operations[1]'``); ``field`` is the changed
|
|
134
|
+
field (``'window.stop'``, ``'timestamp'``, ``'dimensions'``, ...). ``old``
|
|
135
|
+
and ``new`` carry the before/after values -- a ``float`` for numeric
|
|
136
|
+
clamps, a ``str`` for composite values like ``'768x432'``. ``code`` is the
|
|
137
|
+
:class:`PlanErrorCode` of the violation that was repaired, so a consumer
|
|
138
|
+
can surface "we trimmed your effect to fit" wording keyed on the class.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
location: str
|
|
142
|
+
field: str
|
|
143
|
+
old: float | str | None
|
|
144
|
+
new: float | str | None
|
|
145
|
+
code: PlanErrorCode
|
|
146
|
+
|
|
147
|
+
|
|
113
148
|
class PlanValidationError(ValueError):
|
|
114
149
|
"""Typed plan-validation failure carrying structured :class:`PlanError`s.
|
|
115
150
|
|
|
116
151
|
Subclasses ``ValueError`` so ``str(e)`` stays byte-identical to the bare
|
|
117
152
|
``ValueError`` prose emitted before this type existed -- existing
|
|
118
153
|
``pytest.raises(match=...)`` and consumer substring fallbacks keep working.
|
|
154
|
+
|
|
155
|
+
``str(e)`` is the first error's human message; ``.errors`` carries every
|
|
156
|
+
structured :class:`PlanError`. The non-raising :meth:`VideoEdit.check`
|
|
157
|
+
returns the same ``PlanError`` list directly.
|
|
119
158
|
"""
|
|
120
159
|
|
|
121
160
|
def __init__(self, message: str, errors: list[PlanError]):
|
|
@@ -29,6 +29,7 @@ from pydantic import Field, PrivateAttr, model_validator
|
|
|
29
29
|
from tqdm import tqdm
|
|
30
30
|
|
|
31
31
|
from videopython.base.description import BoundingBox
|
|
32
|
+
from videopython.base.exceptions import PlanError, PlanErrorCode, PlanValidationError
|
|
32
33
|
from videopython.base.fonts import load_font
|
|
33
34
|
from videopython.editing._easing import ease, ease_out
|
|
34
35
|
from videopython.editing.operation import Effect
|
|
@@ -860,7 +861,11 @@ class ImageOverlay(_AnchoredOverlay):
|
|
|
860
861
|
with Image.open(self.source) as im:
|
|
861
862
|
im.verify()
|
|
862
863
|
except (OSError, ValueError) as exc:
|
|
863
|
-
|
|
864
|
+
message = f"image_overlay source {str(self.source)!r} is not a readable image: {exc}"
|
|
865
|
+
raise PlanValidationError(
|
|
866
|
+
message,
|
|
867
|
+
[PlanError(code=PlanErrorCode.SOURCE_UNREADABLE, op=self.op, field="source")],
|
|
868
|
+
) from exc
|
|
864
869
|
return meta
|
|
865
870
|
|
|
866
871
|
def _rasterize_svg(self, target_w: int) -> np.ndarray:
|
|
@@ -30,12 +30,13 @@ Subclass contract::
|
|
|
30
30
|
|
|
31
31
|
from __future__ import annotations
|
|
32
32
|
|
|
33
|
+
import copy
|
|
33
34
|
from dataclasses import dataclass
|
|
34
35
|
from enum import Enum
|
|
35
|
-
from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Literal, Union, get_args, get_origin
|
|
36
|
+
from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Literal, NamedTuple, Union, get_args, get_origin
|
|
36
37
|
|
|
37
38
|
import numpy as np
|
|
38
|
-
from pydantic import BaseModel, ConfigDict, Discriminator, Field, TypeAdapter
|
|
39
|
+
from pydantic import BaseModel, ConfigDict, Discriminator, Field, TypeAdapter
|
|
39
40
|
from tqdm import tqdm
|
|
40
41
|
|
|
41
42
|
if TYPE_CHECKING:
|
|
@@ -44,6 +45,7 @@ if TYPE_CHECKING:
|
|
|
44
45
|
__all__ = [
|
|
45
46
|
"OpCategory",
|
|
46
47
|
"TimeRange",
|
|
48
|
+
"BoundedTimeField",
|
|
47
49
|
"FilterCtx",
|
|
48
50
|
"Operation",
|
|
49
51
|
"Effect",
|
|
@@ -63,18 +65,36 @@ class TimeRange(BaseModel):
|
|
|
63
65
|
|
|
64
66
|
Either endpoint may be ``None``, meaning "from the beginning" / "to the
|
|
65
67
|
end" respectively. Used by :class:`Effect.window` and elsewhere.
|
|
68
|
+
|
|
69
|
+
Parsing is deliberately permissive: ``start``/``stop`` are plain floats
|
|
70
|
+
with no ``ge=0`` or ordering constraint. The plan skeleton accepts the
|
|
71
|
+
*shape*; the numeric bounds (``>= 0``, ``stop >= start``, in-duration) are
|
|
72
|
+
owned by :meth:`VideoEdit.validate` / :meth:`VideoEdit.check`, which report
|
|
73
|
+
them as structured, collectable, repairable :class:`PlanError`s instead of
|
|
74
|
+
aborting at ``from_dict``. :meth:`Effect._resolved_window` still clamps at
|
|
75
|
+
run time, so a plan run without validation degrades rather than crashes.
|
|
66
76
|
"""
|
|
67
77
|
|
|
68
78
|
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
69
79
|
|
|
70
|
-
start: float | None = Field(None,
|
|
71
|
-
stop: float | None = Field(None,
|
|
80
|
+
start: float | None = Field(None, description="Start time in seconds. None means 0.")
|
|
81
|
+
stop: float | None = Field(None, description="Stop time in seconds. None means end of video.")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class BoundedTimeField(NamedTuple):
|
|
85
|
+
"""Declares a time-valued (seconds) op field that :meth:`VideoEdit.repair` clamps.
|
|
86
|
+
|
|
87
|
+
``name`` is the field; the lower bound is always ``0``. ``exclusive_end``
|
|
88
|
+
distinguishes how the upper bound is enforced so repair clamps exactly what
|
|
89
|
+
validation rejects: ``False`` permits the clip duration (reject ``value >
|
|
90
|
+
total_seconds``, clamp to the duration); ``True`` is for a field that indexes
|
|
91
|
+
a frame and so must be *strictly* less than the duration (reject ``value >=
|
|
92
|
+
total_seconds``, clamp to the last addressable frame ``(frame_count - 1) /
|
|
93
|
+
fps``) -- e.g. ``freeze_frame.timestamp``.
|
|
94
|
+
"""
|
|
72
95
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
if self.start is not None and self.stop is not None and self.stop < self.start:
|
|
76
|
-
raise ValueError(f"TimeRange.stop ({self.stop}) must be >= start ({self.start})")
|
|
77
|
-
return self
|
|
96
|
+
name: str
|
|
97
|
+
exclusive_end: bool
|
|
78
98
|
|
|
79
99
|
|
|
80
100
|
@dataclass(frozen=True)
|
|
@@ -117,6 +137,60 @@ def _strip_llm_hidden(schema: dict[str, Any]) -> dict[str, Any]:
|
|
|
117
137
|
return schema
|
|
118
138
|
|
|
119
139
|
|
|
140
|
+
def _to_strict_schema(schema: dict[str, Any]) -> dict[str, Any]:
|
|
141
|
+
"""Rewrite a generated JSON schema into a provider strict-mode grammar.
|
|
142
|
+
|
|
143
|
+
Strict structured-output modes (OpenAI/OpenRouter ``json_schema``) require:
|
|
144
|
+
every object closed (``additionalProperties: false``); every declared
|
|
145
|
+
property listed in ``required``; and unions expressed as ``anyOf`` without a
|
|
146
|
+
``discriminator`` keyword. The ``default`` keyword (which strict mode
|
|
147
|
+
rejects, and which is moot once every field is required) is dropped. Numeric
|
|
148
|
+
constraints already emitted by Pydantic are kept verbatim.
|
|
149
|
+
|
|
150
|
+
Optionality is taken verbatim from what Pydantic emitted, *not* synthesized:
|
|
151
|
+
strict mode represents an optional field as a nullable required field, and
|
|
152
|
+
Pydantic already encodes exactly that -- an ``Optional`` field carries a
|
|
153
|
+
``{"type": "null"}`` branch while a defaulted-but-non-``Optional`` field
|
|
154
|
+
(e.g. ``operations: list = []``, ``match_to_lowest_fps: bool = True``) does
|
|
155
|
+
not. So we force every property into ``required`` without adding null
|
|
156
|
+
branches: synthesizing null for a non-``Optional`` field would let a grammar
|
|
157
|
+
emit a null the Pydantic model then rejects -- reintroducing the very
|
|
158
|
+
re-prompt strict mode exists to remove. The union discriminator ``op`` is a
|
|
159
|
+
defaulted ``const`` and is likewise kept required and non-nullable for free.
|
|
160
|
+
|
|
161
|
+
Returns a new schema; the input is not mutated. Pydantic ``$ref``/``$defs``
|
|
162
|
+
indirection is left intact (providers resolve it); the per-``$defs`` object
|
|
163
|
+
bodies are rewritten in place of their definitions.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
def walk(node: Any) -> Any:
|
|
167
|
+
if isinstance(node, list):
|
|
168
|
+
return [walk(item) for item in node]
|
|
169
|
+
if not isinstance(node, dict):
|
|
170
|
+
return node
|
|
171
|
+
|
|
172
|
+
out = {k: walk(v) for k, v in node.items()}
|
|
173
|
+
|
|
174
|
+
# A discriminated union: Pydantic emits `oneOf` + `discriminator`.
|
|
175
|
+
# Strict mode wants a plain `anyOf` of variants and no discriminator.
|
|
176
|
+
if "oneOf" in out:
|
|
177
|
+
out["anyOf"] = out.pop("oneOf")
|
|
178
|
+
# Drop keywords strict mode rejects (or that are moot once everything is
|
|
179
|
+
# required): the discriminator tag, `default`, custom `format`s like
|
|
180
|
+
# "path", and any `$schema`/`$id` envelope.
|
|
181
|
+
for key in ("discriminator", "default", "format", "$schema", "$id"):
|
|
182
|
+
out.pop(key, None)
|
|
183
|
+
|
|
184
|
+
# Close every object and require all of its properties. Nullability is
|
|
185
|
+
# left exactly as Pydantic emitted it (see the docstring) -- no synthesis.
|
|
186
|
+
if isinstance(out.get("properties"), dict):
|
|
187
|
+
out["additionalProperties"] = False
|
|
188
|
+
out["required"] = list(out["properties"].keys())
|
|
189
|
+
return out
|
|
190
|
+
|
|
191
|
+
return walk(copy.deepcopy(schema))
|
|
192
|
+
|
|
193
|
+
|
|
120
194
|
class Operation(BaseModel):
|
|
121
195
|
"""Pydantic base for every editing primitive.
|
|
122
196
|
|
|
@@ -137,6 +211,15 @@ class Operation(BaseModel):
|
|
|
137
211
|
streamable: ClassVar[bool] = False
|
|
138
212
|
requires: ClassVar[tuple[str, ...]] = ()
|
|
139
213
|
llm_exposed: ClassVar[bool] = True
|
|
214
|
+
time_fields: ClassVar[tuple[BoundedTimeField, ...]] = ()
|
|
215
|
+
"""Time-valued (seconds) fields :meth:`VideoEdit.repair` may clamp into range.
|
|
216
|
+
|
|
217
|
+
Declaring a :class:`BoundedTimeField` here lets ``repair`` clamp an
|
|
218
|
+
out-of-range timestamp (e.g. ``freeze_frame.timestamp`` past the clip end)
|
|
219
|
+
without per-op special-casing -- the repair pass reads the declaration,
|
|
220
|
+
clamps to ``[0, bound]``, and records a :class:`PlanRepair`. Empty by
|
|
221
|
+
default; ops with no time-valued params declare nothing.
|
|
222
|
+
"""
|
|
140
223
|
|
|
141
224
|
_registry: ClassVar[dict[str, type[Operation]]] = {}
|
|
142
225
|
|
|
@@ -196,7 +279,7 @@ class Operation(BaseModel):
|
|
|
196
279
|
raise KeyError(f"Unknown op_id {op_id!r}. Known ops: [{known}]") from exc
|
|
197
280
|
|
|
198
281
|
@classmethod
|
|
199
|
-
def json_schema(cls, include_server_only: bool = False) -> dict[str, Any]:
|
|
282
|
+
def json_schema(cls, include_server_only: bool = False, *, strict: bool = False) -> dict[str, Any]:
|
|
200
283
|
"""Discriminated-union JSON schema over registered Operations.
|
|
201
284
|
|
|
202
285
|
``op`` is the discriminator tag. This is the LLM-facing schema for
|
|
@@ -204,13 +287,33 @@ class Operation(BaseModel):
|
|
|
204
287
|
LLM-exposed ops (:meth:`llm_registry`); pass ``include_server_only=True``
|
|
205
288
|
to build the union from the full :meth:`registry`. Fields marked
|
|
206
289
|
``llm_hidden`` (advanced overrides like raw font paths) are stripped.
|
|
290
|
+
|
|
291
|
+
With ``strict=True`` the schema is rewritten for use as a provider
|
|
292
|
+
structured-output **grammar** (OpenAI/OpenRouter ``json_schema`` strict
|
|
293
|
+
mode): every object is closed (``additionalProperties: false``), every
|
|
294
|
+
property is listed in ``required`` with its optionality kept exactly as
|
|
295
|
+
Pydantic emitted it (an ``Optional`` field keeps its nullable branch; a
|
|
296
|
+
defaulted non-``Optional`` field -- including the ``op`` discriminator --
|
|
297
|
+
stays required and non-nullable), and the discriminated union is
|
|
298
|
+
expressed as a plain ``anyOf`` of closed variants (``discriminator``,
|
|
299
|
+
``default``, custom ``format``, and ``$schema`` -- all unsupported or moot
|
|
300
|
+
in strict mode -- are dropped). Numeric constraints
|
|
301
|
+
(``minimum``/``maximum``/``exclusiveMinimum``) are preserved, so an
|
|
302
|
+
entire class of bound violations becomes impossible at decode time.
|
|
303
|
+
|
|
304
|
+
Note: the strict result is a *root-level* ``anyOf`` union -- an embeddable
|
|
305
|
+
schema fragment, not a submittable strict root (providers require the root
|
|
306
|
+
to be a closed object). It is consumed inside
|
|
307
|
+
:meth:`VideoEdit.json_schema(strict=True) <VideoEdit.json_schema>`, which
|
|
308
|
+
*is* a submittable object root; use that to constrain a whole plan.
|
|
207
309
|
"""
|
|
208
310
|
source = Operation._registry if include_server_only else cls.llm_registry()
|
|
209
311
|
if not source:
|
|
210
312
|
return {"type": "object"}
|
|
211
313
|
ops = sorted(source.values(), key=lambda c: c.__name__)
|
|
212
314
|
annotated = Annotated[Union[tuple(ops)], Discriminator("op")] # type: ignore[valid-type] # noqa: UP007
|
|
213
|
-
|
|
315
|
+
schema = _strip_llm_hidden(TypeAdapter(annotated).json_schema())
|
|
316
|
+
return _to_strict_schema(schema) if strict else schema
|
|
214
317
|
|
|
215
318
|
@classmethod
|
|
216
319
|
def llm_json_schema(cls) -> dict[str, Any]:
|
|
@@ -19,7 +19,7 @@ from tqdm import tqdm
|
|
|
19
19
|
from videopython.base._dimensions import floor_to_even, round_to_even
|
|
20
20
|
from videopython.base.exceptions import PlanError, PlanErrorCode, PlanValidationError
|
|
21
21
|
from videopython.base.video import Video
|
|
22
|
-
from videopython.editing.operation import FilterCtx, OpCategory, Operation
|
|
22
|
+
from videopython.editing.operation import BoundedTimeField, FilterCtx, OpCategory, Operation
|
|
23
23
|
|
|
24
24
|
if TYPE_CHECKING:
|
|
25
25
|
from videopython.base.transcription import Transcription
|
|
@@ -281,7 +281,19 @@ class Crop(Operation):
|
|
|
281
281
|
def predict_metadata(self, meta: VideoMetadata) -> VideoMetadata:
|
|
282
282
|
_, _, cw, ch = self._resolve_box(meta.width, meta.height)
|
|
283
283
|
if cw > meta.width or ch > meta.height:
|
|
284
|
-
|
|
284
|
+
message = f"Crop {cw}x{ch} exceeds source {meta.width}x{meta.height}"
|
|
285
|
+
raise PlanValidationError(
|
|
286
|
+
message,
|
|
287
|
+
[
|
|
288
|
+
PlanError(
|
|
289
|
+
code=PlanErrorCode.CROP_EXCEEDS_SOURCE,
|
|
290
|
+
op=self.op,
|
|
291
|
+
field="width" if cw > meta.width else "height",
|
|
292
|
+
value=float(cw if cw > meta.width else ch),
|
|
293
|
+
limit=float(meta.width if cw > meta.width else meta.height),
|
|
294
|
+
)
|
|
295
|
+
],
|
|
296
|
+
)
|
|
285
297
|
if self.mode == CropMode.CENTER:
|
|
286
298
|
# Mirror apply()'s `mid - cw//2 : mid + cw//2` slice, which
|
|
287
299
|
# produces 2 * (cw // 2) pixels — odd targets round down.
|
|
@@ -368,7 +380,18 @@ class SpeedChange(Operation):
|
|
|
368
380
|
def predict_metadata(self, meta: VideoMetadata) -> VideoMetadata:
|
|
369
381
|
new_count = self._new_frame_count(meta.frame_count)
|
|
370
382
|
if new_count == 0:
|
|
371
|
-
|
|
383
|
+
message = f"Speed {self.speed}x would result in 0 frames!"
|
|
384
|
+
raise PlanValidationError(
|
|
385
|
+
message,
|
|
386
|
+
[
|
|
387
|
+
PlanError(
|
|
388
|
+
code=PlanErrorCode.DEGENERATE_DURATION,
|
|
389
|
+
op=self.op,
|
|
390
|
+
field="speed",
|
|
391
|
+
value=self.speed,
|
|
392
|
+
)
|
|
393
|
+
],
|
|
394
|
+
)
|
|
372
395
|
from videopython.base.video import VideoMetadata as _Meta
|
|
373
396
|
|
|
374
397
|
return _Meta(
|
|
@@ -400,6 +423,9 @@ class FreezeFrame(Operation):
|
|
|
400
423
|
|
|
401
424
|
op: Literal["freeze_frame"] = "freeze_frame"
|
|
402
425
|
category: ClassVar[OpCategory] = OpCategory.TRANSFORM
|
|
426
|
+
# `timestamp` indexes a frame, so it must be strictly < the clip duration;
|
|
427
|
+
# repair clamps an out-of-range value to the last frame.
|
|
428
|
+
time_fields: ClassVar[tuple[BoundedTimeField, ...]] = (BoundedTimeField("timestamp", exclusive_end=True),)
|
|
403
429
|
|
|
404
430
|
timestamp: float = Field(ge=0, description="Time in seconds at which to capture the frame.")
|
|
405
431
|
duration: float = Field(2.0, gt=0, description="How long to hold the frozen frame, in seconds.")
|
|
@@ -453,7 +479,20 @@ class FreezeFrame(Operation):
|
|
|
453
479
|
|
|
454
480
|
def predict_metadata(self, meta: VideoMetadata) -> VideoMetadata:
|
|
455
481
|
if self.timestamp >= meta.total_seconds:
|
|
456
|
-
|
|
482
|
+
message = f"timestamp ({self.timestamp}) must be less than video duration ({meta.total_seconds})"
|
|
483
|
+
raise PlanValidationError(
|
|
484
|
+
message,
|
|
485
|
+
[
|
|
486
|
+
PlanError(
|
|
487
|
+
code=PlanErrorCode.OP_TIMESTAMP_OUT_OF_RANGE,
|
|
488
|
+
op=self.op,
|
|
489
|
+
field="timestamp",
|
|
490
|
+
value=self.timestamp,
|
|
491
|
+
limit=meta.total_seconds,
|
|
492
|
+
predicted_duration=meta.total_seconds,
|
|
493
|
+
)
|
|
494
|
+
],
|
|
495
|
+
)
|
|
457
496
|
freeze_count = round(self.duration * meta.fps)
|
|
458
497
|
if self.position in ("after", "before"):
|
|
459
498
|
new_count = meta.frame_count + freeze_count
|