video2flow 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- video2flow-0.1.0/PKG-INFO +69 -0
- video2flow-0.1.0/README.md +44 -0
- video2flow-0.1.0/pyproject.toml +41 -0
- video2flow-0.1.0/setup.cfg +4 -0
- video2flow-0.1.0/src/video2flow/__init__.py +3 -0
- video2flow-0.1.0/src/video2flow/cli.py +52 -0
- video2flow-0.1.0/src/video2flow/core.py +56 -0
- video2flow-0.1.0/src/video2flow/describe.py +120 -0
- video2flow-0.1.0/src/video2flow.egg-info/PKG-INFO +69 -0
- video2flow-0.1.0/src/video2flow.egg-info/SOURCES.txt +13 -0
- video2flow-0.1.0/src/video2flow.egg-info/dependency_links.txt +1 -0
- video2flow-0.1.0/src/video2flow.egg-info/entry_points.txt +2 -0
- video2flow-0.1.0/src/video2flow.egg-info/requires.txt +4 -0
- video2flow-0.1.0/src/video2flow.egg-info/top_level.txt +1 -0
- video2flow-0.1.0/tests/test_video2flow.py +38 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: video2flow
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Extract video frames as descriptive text flows for LLM consumption
|
|
5
|
+
Author-email: deepstrain <dev@massiron.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/massiron/video2flow
|
|
8
|
+
Project-URL: Repository, https://github.com/massiron/video2flow
|
|
9
|
+
Project-URL: Documentation, https://github.com/massiron/video2flow#readme
|
|
10
|
+
Keywords: video,frames,llm,vision,description
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: opencv-python>=4.8
|
|
22
|
+
Requires-Dist: typer>=0.9
|
|
23
|
+
Requires-Dist: rich>=13.0
|
|
24
|
+
Requires-Dist: pillow>=10.0
|
|
25
|
+
|
|
26
|
+
# video2flow
|
|
27
|
+
|
|
28
|
+
**Extract video frames as descriptive text flows for LLM consumption.**
|
|
29
|
+
|
|
30
|
+
Claude, GPT-4o, Gemini and other multimodal models can see images but not
|
|
31
|
+
video. `video2flow` bridges the gap: extract frames → generate timestamped
|
|
32
|
+
descriptions → feed the text flow to any LLM.
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install video2flow
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Usage
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# Extract frames from a video
|
|
44
|
+
video2flow extract video.mp4 -o frames/ --fps 1
|
|
45
|
+
|
|
46
|
+
# Quick description (without vision API)
|
|
47
|
+
video2flow describe video.mp4 --max-frames 10
|
|
48
|
+
|
|
49
|
+
# Full pipeline
|
|
50
|
+
video2flow pipeline video.mp4 -o video_flow/
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## LLM Integration
|
|
54
|
+
|
|
55
|
+
Pass the output JSON to any LLM:
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
import json
|
|
59
|
+
flow = json.loads(open("video_flow/flow.json").read())
|
|
60
|
+
prompt = flow["usage"]["example_prompt"]
|
|
61
|
+
# Then: response = llm.invoke(prompt)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
For detailed vision understanding, send image files directly to a
|
|
65
|
+
multimodal model alongside the flow transcript.
|
|
66
|
+
|
|
67
|
+
## License
|
|
68
|
+
|
|
69
|
+
MIT
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# video2flow
|
|
2
|
+
|
|
3
|
+
**Extract video frames as descriptive text flows for LLM consumption.**
|
|
4
|
+
|
|
5
|
+
Claude, GPT-4o, Gemini and other multimodal models can see images but not
|
|
6
|
+
video. `video2flow` bridges the gap: extract frames → generate timestamped
|
|
7
|
+
descriptions → feed the text flow to any LLM.
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install video2flow
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Usage
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Extract frames from a video
|
|
19
|
+
video2flow extract video.mp4 -o frames/ --fps 1
|
|
20
|
+
|
|
21
|
+
# Quick description (without vision API)
|
|
22
|
+
video2flow describe video.mp4 --max-frames 10
|
|
23
|
+
|
|
24
|
+
# Full pipeline
|
|
25
|
+
video2flow pipeline video.mp4 -o video_flow/
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## LLM Integration
|
|
29
|
+
|
|
30
|
+
Pass the output JSON to any LLM:
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import json
|
|
34
|
+
flow = json.loads(open("video_flow/flow.json").read())
|
|
35
|
+
prompt = flow["usage"]["example_prompt"]
|
|
36
|
+
# Then: response = llm.invoke(prompt)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
For detailed vision understanding, send image files directly to a
|
|
40
|
+
multimodal model alongside the flow transcript.
|
|
41
|
+
|
|
42
|
+
## License
|
|
43
|
+
|
|
44
|
+
MIT
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=75.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "video2flow"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Extract video frames as descriptive text flows for LLM consumption"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{name = "deepstrain", email = "dev@massiron.com"}]
|
|
13
|
+
keywords = ["video", "frames", "llm", "vision", "description"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
dependencies = [
|
|
26
|
+
"opencv-python>=4.8",
|
|
27
|
+
"typer>=0.9",
|
|
28
|
+
"rich>=13.0",
|
|
29
|
+
"pillow>=10.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://github.com/massiron/video2flow"
|
|
34
|
+
Repository = "https://github.com/massiron/video2flow"
|
|
35
|
+
Documentation = "https://github.com/massiron/video2flow#readme"
|
|
36
|
+
|
|
37
|
+
[project.scripts]
|
|
38
|
+
video2flow = "video2flow.cli:app"
|
|
39
|
+
|
|
40
|
+
[tool.setuptools.packages.find]
|
|
41
|
+
where = ["src"]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""video2flow CLI — Typer-based command line interface."""
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from video2flow.core import extract_frames
|
|
8
|
+
from video2flow.describe import describe_frames, describe_video
|
|
9
|
+
|
|
10
|
+
app = typer.Typer(help="video2flow — Video frame description for LLMs")
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
@app.command()
|
|
14
|
+
def extract(
|
|
15
|
+
video: Path = typer.Argument(..., help="Input video file"),
|
|
16
|
+
output: Path = typer.Option(Path("frames"), "--output", "-o", help="Output directory"),
|
|
17
|
+
fps: float = typer.Option(1.0, "--fps", help="Frames per second to extract"),
|
|
18
|
+
max_frames: int = typer.Option(0, "--max-frames", "-n", help="Maximum frames (0 = unlimited)"),
|
|
19
|
+
):
|
|
20
|
+
"""Extract frames from a video file."""
|
|
21
|
+
result = extract_frames(video, output, fps=fps, max_frames=max_frames)
|
|
22
|
+
console.print(f"[green]OK[/green] Extracted {result['frame_count']} frames -> [bold]{output.resolve()}[/bold]")
|
|
23
|
+
|
|
24
|
+
@app.command()
|
|
25
|
+
def describe(
|
|
26
|
+
video: Path = typer.Argument(..., help="Input video file"),
|
|
27
|
+
fps: float = typer.Option(1.0, "--fps", help="Frames per second to sample"),
|
|
28
|
+
max_frames: int = typer.Option(10, "--max-frames", "-n", help="Maximum frames to describe"),
|
|
29
|
+
output: Optional[Path] = typer.Option(None, "--output", "-o", help="Output JSON file"),
|
|
30
|
+
provider: str = typer.Option("none", "--provider", "-p", help="Vision provider (none = filename-only descriptions)"),
|
|
31
|
+
):
|
|
32
|
+
"""Extract frames and generate textual descriptions for LLM consumption."""
|
|
33
|
+
result = describe_video(video, fps=fps, max_frames=max_frames, provider=provider)
|
|
34
|
+
if output:
|
|
35
|
+
import json
|
|
36
|
+
output.write_text(json.dumps(result, indent=2, ensure_ascii=False))
|
|
37
|
+
console.print(f"[green]OK[/green] Descriptions saved -> [bold]{output.resolve()}[/bold]")
|
|
38
|
+
else:
|
|
39
|
+
console.print(result["transcript"])
|
|
40
|
+
|
|
41
|
+
@app.command()
|
|
42
|
+
def pipeline(
|
|
43
|
+
video: Path = typer.Argument(..., help="Input video file"),
|
|
44
|
+
output: Path = typer.Option(Path("video_flow"), "--output", "-o", help="Output directory"),
|
|
45
|
+
fps: float = typer.Option(1.0, "--fps", help="Frames per second"),
|
|
46
|
+
max_frames: int = typer.Option(0, "--max-frames", "-n", help="Maximum frames"),
|
|
47
|
+
):
|
|
48
|
+
"""Full pipeline: extract frames + generate description flow."""
|
|
49
|
+
frames = extract_frames(video, output / "frames", fps=fps, max_frames=max_frames)
|
|
50
|
+
result = describe_frames(output / "frames", output / "flow.json")
|
|
51
|
+
console.print(f"[green]OK[/green] Pipeline complete -> [bold]{output.resolve()}[/bold]")
|
|
52
|
+
console.print(f" Frames: {frames['frame_count']}, Segments: {result['segment_count']}")
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""video2flow core — Extract frames from video files."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def extract_frames(
|
|
8
|
+
video: Path,
|
|
9
|
+
output: Path,
|
|
10
|
+
fps: float = 1.0,
|
|
11
|
+
max_frames: int = 0,
|
|
12
|
+
) -> dict:
|
|
13
|
+
video = Path(video).resolve()
|
|
14
|
+
output = Path(output).resolve()
|
|
15
|
+
output.mkdir(parents=True, exist_ok=True)
|
|
16
|
+
|
|
17
|
+
if not video.exists():
|
|
18
|
+
raise FileNotFoundError(f"Video not found: {video}")
|
|
19
|
+
|
|
20
|
+
import cv2
|
|
21
|
+
cap = cv2.VideoCapture(str(video))
|
|
22
|
+
if not cap.isOpened():
|
|
23
|
+
raise RuntimeError(f"Cannot open video: {video}")
|
|
24
|
+
|
|
25
|
+
video_fps = cap.get(cv2.CAP_PROP_FPS)
|
|
26
|
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
27
|
+
duration = total_frames / video_fps if video_fps > 0 else 0
|
|
28
|
+
interval = max(1, int(video_fps / fps))
|
|
29
|
+
|
|
30
|
+
frame_count = 0
|
|
31
|
+
saved = 0
|
|
32
|
+
while True:
|
|
33
|
+
ret, frame = cap.read()
|
|
34
|
+
if not ret:
|
|
35
|
+
break
|
|
36
|
+
if frame_count % interval == 0:
|
|
37
|
+
if max_frames > 0 and saved >= max_frames:
|
|
38
|
+
break
|
|
39
|
+
ts = frame_count / video_fps
|
|
40
|
+
name = f"frame_{saved:06d}_{ts:06.2f}s.jpg"
|
|
41
|
+
path = output / name
|
|
42
|
+
cv2.imwrite(str(path), frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
|
|
43
|
+
saved += 1
|
|
44
|
+
frame_count += 1
|
|
45
|
+
|
|
46
|
+
cap.release()
|
|
47
|
+
|
|
48
|
+
return {
|
|
49
|
+
"video": str(video),
|
|
50
|
+
"total_frames": total_frames,
|
|
51
|
+
"video_fps": round(video_fps, 2),
|
|
52
|
+
"duration_s": round(duration, 2),
|
|
53
|
+
"extracted_fps": fps,
|
|
54
|
+
"frame_count": saved,
|
|
55
|
+
"output_dir": str(output),
|
|
56
|
+
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""video2flow describe — Generate text descriptions of video frames for LLMs."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def describe_frames(frame_dir: Path, output: Optional[Path] = None) -> dict:
|
|
10
|
+
frame_dir = Path(frame_dir)
|
|
11
|
+
if not frame_dir.exists():
|
|
12
|
+
raise FileNotFoundError(f"Frame directory not found: {frame_dir}")
|
|
13
|
+
|
|
14
|
+
frames = sorted(frame_dir.glob("*.jpg")) + sorted(frame_dir.glob("*.png"))
|
|
15
|
+
segments = []
|
|
16
|
+
|
|
17
|
+
for i, fp in enumerate(frames):
|
|
18
|
+
ts = _parse_timestamp(fp.stem)
|
|
19
|
+
size = fp.stat().st_size
|
|
20
|
+
dimension = _get_image_dimension(fp)
|
|
21
|
+
|
|
22
|
+
segments.append({
|
|
23
|
+
"segment": i,
|
|
24
|
+
"file": fp.name,
|
|
25
|
+
"timestamp_s": ts,
|
|
26
|
+
"timestamp_str": _format_time(ts),
|
|
27
|
+
"size_bytes": size,
|
|
28
|
+
"description": f"[Frame {i}] at {_format_time(ts)} — {dimension}",
|
|
29
|
+
|
|
30
|
+
"hint": (
|
|
31
|
+
"Pass this image to a vision-capable LLM (Claude 3.5+, GPT-4o, "
|
|
32
|
+
"Gemini 2.0) for detailed scene description."
|
|
33
|
+
),
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
transcript_lines = [
|
|
37
|
+
f"[{s['timestamp_str']}] {s['description']}" for s in segments
|
|
38
|
+
]
|
|
39
|
+
transcript = "\n".join(transcript_lines)
|
|
40
|
+
|
|
41
|
+
result = {
|
|
42
|
+
"video_source": str(frame_dir.parent),
|
|
43
|
+
"total_segments": len(segments),
|
|
44
|
+
"segment_count": len(segments),
|
|
45
|
+
"duration_s": segments[-1]["timestamp_s"] if segments else 0,
|
|
46
|
+
"segments": segments,
|
|
47
|
+
"transcript": transcript,
|
|
48
|
+
"usage": {
|
|
49
|
+
"description": (
|
|
50
|
+
"Pass the 'segments' array to any LLM. Each segment contains "
|
|
51
|
+
"a timestamp and filename. For detailed vision understanding, "
|
|
52
|
+
"send the image files directly to a multimodal model."
|
|
53
|
+
),
|
|
54
|
+
"example_prompt": (
|
|
55
|
+
"You are analyzing a video. Below is the frame sequence:\n"
|
|
56
|
+
+ transcript
|
|
57
|
+
),
|
|
58
|
+
},
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if output:
|
|
62
|
+
output = Path(output)
|
|
63
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
output.write_text(json.dumps(result, indent=2, ensure_ascii=False))
|
|
65
|
+
|
|
66
|
+
return result
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def describe_video(
|
|
70
|
+
video: Path,
|
|
71
|
+
fps: float = 1.0,
|
|
72
|
+
max_frames: int = 10,
|
|
73
|
+
provider: str = "none",
|
|
74
|
+
) -> dict:
|
|
75
|
+
from video2flow.core import extract_frames
|
|
76
|
+
|
|
77
|
+
import tempfile
|
|
78
|
+
tmpdir = Path(tempfile.mkdtemp(prefix="v2f_"))
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
frames = extract_frames(
|
|
82
|
+
video=video,
|
|
83
|
+
output=tmpdir / "frames",
|
|
84
|
+
fps=fps,
|
|
85
|
+
max_frames=max_frames,
|
|
86
|
+
)
|
|
87
|
+
frame_dir = Path(frames["output_dir"])
|
|
88
|
+
result = describe_frames(frame_dir)
|
|
89
|
+
result["extraction"] = frames
|
|
90
|
+
return result
|
|
91
|
+
finally:
|
|
92
|
+
import shutil
|
|
93
|
+
shutil.rmtree(tmpdir, ignore_errors=True)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _parse_timestamp(stem: str) -> float:
|
|
97
|
+
parts = stem.split("_")
|
|
98
|
+
for p in parts:
|
|
99
|
+
if "s" in p:
|
|
100
|
+
try:
|
|
101
|
+
return float(p.replace("s", ""))
|
|
102
|
+
except ValueError:
|
|
103
|
+
pass
|
|
104
|
+
return 0.0
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _format_time(seconds: float) -> str:
|
|
108
|
+
h, r = divmod(int(seconds), 3600)
|
|
109
|
+
m, s = divmod(r, 60)
|
|
110
|
+
return f"{h:02d}:{m:02d}:{s:02d}"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _get_image_dimension(path: Path) -> str:
|
|
114
|
+
try:
|
|
115
|
+
from PIL import Image
|
|
116
|
+
with Image.open(path) as img:
|
|
117
|
+
w, h = img.size
|
|
118
|
+
return f"{w}x{h}"
|
|
119
|
+
except Exception:
|
|
120
|
+
return "unknown"
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: video2flow
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Extract video frames as descriptive text flows for LLM consumption
|
|
5
|
+
Author-email: deepstrain <dev@massiron.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/massiron/video2flow
|
|
8
|
+
Project-URL: Repository, https://github.com/massiron/video2flow
|
|
9
|
+
Project-URL: Documentation, https://github.com/massiron/video2flow#readme
|
|
10
|
+
Keywords: video,frames,llm,vision,description
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: opencv-python>=4.8
|
|
22
|
+
Requires-Dist: typer>=0.9
|
|
23
|
+
Requires-Dist: rich>=13.0
|
|
24
|
+
Requires-Dist: pillow>=10.0
|
|
25
|
+
|
|
26
|
+
# video2flow
|
|
27
|
+
|
|
28
|
+
**Extract video frames as descriptive text flows for LLM consumption.**
|
|
29
|
+
|
|
30
|
+
Claude, GPT-4o, Gemini and other multimodal models can see images but not
|
|
31
|
+
video. `video2flow` bridges the gap: extract frames → generate timestamped
|
|
32
|
+
descriptions → feed the text flow to any LLM.
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install video2flow
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Usage
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# Extract frames from a video
|
|
44
|
+
video2flow extract video.mp4 -o frames/ --fps 1
|
|
45
|
+
|
|
46
|
+
# Quick description (without vision API)
|
|
47
|
+
video2flow describe video.mp4 --max-frames 10
|
|
48
|
+
|
|
49
|
+
# Full pipeline
|
|
50
|
+
video2flow pipeline video.mp4 -o video_flow/
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## LLM Integration
|
|
54
|
+
|
|
55
|
+
Pass the output JSON to any LLM:
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
import json
|
|
59
|
+
flow = json.loads(open("video_flow/flow.json").read())
|
|
60
|
+
prompt = flow["usage"]["example_prompt"]
|
|
61
|
+
# Then: response = llm.invoke(prompt)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
For detailed vision understanding, send image files directly to a
|
|
65
|
+
multimodal model alongside the flow transcript.
|
|
66
|
+
|
|
67
|
+
## License
|
|
68
|
+
|
|
69
|
+
MIT
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/video2flow/__init__.py
|
|
4
|
+
src/video2flow/cli.py
|
|
5
|
+
src/video2flow/core.py
|
|
6
|
+
src/video2flow/describe.py
|
|
7
|
+
src/video2flow.egg-info/PKG-INFO
|
|
8
|
+
src/video2flow.egg-info/SOURCES.txt
|
|
9
|
+
src/video2flow.egg-info/dependency_links.txt
|
|
10
|
+
src/video2flow.egg-info/entry_points.txt
|
|
11
|
+
src/video2flow.egg-info/requires.txt
|
|
12
|
+
src/video2flow.egg-info/top_level.txt
|
|
13
|
+
tests/test_video2flow.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
video2flow
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Tests for video2flow."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import tempfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from video2flow.describe import describe_frames, _parse_timestamp, _format_time
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_parse_timestamp():
|
|
11
|
+
assert _parse_timestamp("frame_000000_000.00s") == 0.0
|
|
12
|
+
assert _parse_timestamp("frame_000005_005.50s") == 5.5
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_format_time():
|
|
16
|
+
assert _format_time(0) == "00:00:00"
|
|
17
|
+
assert _format_time(3661) == "01:01:01"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_describe_frames_empty_dir():
|
|
21
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
22
|
+
import pytest
|
|
23
|
+
with pytest.raises(FileNotFoundError):
|
|
24
|
+
describe_frames(Path(tmp) / "nonexistent")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_describe_frames_with_images():
|
|
28
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
29
|
+
frame_dir = Path(tmp) / "frames"
|
|
30
|
+
frame_dir.mkdir()
|
|
31
|
+
(frame_dir / "frame_000000_000.00s.jpg").write_text("fake")
|
|
32
|
+
(frame_dir / "frame_000001_001.00s.jpg").write_text("fake")
|
|
33
|
+
|
|
34
|
+
result = describe_frames(frame_dir)
|
|
35
|
+
assert result["segment_count"] == 2
|
|
36
|
+
assert len(result["segments"]) == 2
|
|
37
|
+
assert result["transcript"] != ""
|
|
38
|
+
assert "example_prompt" in result["usage"]
|