video2flow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
video2flow/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """video2flow — Extract video frames as descriptive text flows for LLM consumption."""
2
+
3
+ __version__ = "0.1.0"
video2flow/cli.py ADDED
@@ -0,0 +1,52 @@
1
+ """video2flow CLI — Typer-based command line interface."""
2
+
3
+ import typer
4
+ from pathlib import Path
5
+ from typing import Optional
6
+ from rich.console import Console
7
+ from video2flow.core import extract_frames
8
+ from video2flow.describe import describe_frames, describe_video
9
+
10
+ app = typer.Typer(help="video2flow — Video frame description for LLMs")
11
+ console = Console()
12
+
13
+ @app.command()
14
+ def extract(
15
+ video: Path = typer.Argument(..., help="Input video file"),
16
+ output: Path = typer.Option(Path("frames"), "--output", "-o", help="Output directory"),
17
+ fps: float = typer.Option(1.0, "--fps", help="Frames per second to extract"),
18
+ max_frames: int = typer.Option(0, "--max-frames", "-n", help="Maximum frames (0 = unlimited)"),
19
+ ):
20
+ """Extract frames from a video file."""
21
+ result = extract_frames(video, output, fps=fps, max_frames=max_frames)
22
+ console.print(f"[green]OK[/green] Extracted {result['frame_count']} frames -> [bold]{output.resolve()}[/bold]")
23
+
24
+ @app.command()
25
+ def describe(
26
+ video: Path = typer.Argument(..., help="Input video file"),
27
+ fps: float = typer.Option(1.0, "--fps", help="Frames per second to sample"),
28
+ max_frames: int = typer.Option(10, "--max-frames", "-n", help="Maximum frames to describe"),
29
+ output: Optional[Path] = typer.Option(None, "--output", "-o", help="Output JSON file"),
30
+ provider: str = typer.Option("none", "--provider", "-p", help="Vision provider (none = filename-only descriptions)"),
31
+ ):
32
+ """Extract frames and generate textual descriptions for LLM consumption."""
33
+ result = describe_video(video, fps=fps, max_frames=max_frames, provider=provider)
34
+ if output:
35
+ import json
36
+ output.write_text(json.dumps(result, indent=2, ensure_ascii=False))
37
+ console.print(f"[green]OK[/green] Descriptions saved -> [bold]{output.resolve()}[/bold]")
38
+ else:
39
+ console.print(result["transcript"])
40
+
41
+ @app.command()
42
+ def pipeline(
43
+ video: Path = typer.Argument(..., help="Input video file"),
44
+ output: Path = typer.Option(Path("video_flow"), "--output", "-o", help="Output directory"),
45
+ fps: float = typer.Option(1.0, "--fps", help="Frames per second"),
46
+ max_frames: int = typer.Option(0, "--max-frames", "-n", help="Maximum frames"),
47
+ ):
48
+ """Full pipeline: extract frames + generate description flow."""
49
+ frames = extract_frames(video, output / "frames", fps=fps, max_frames=max_frames)
50
+ result = describe_frames(output / "frames", output / "flow.json")
51
+ console.print(f"[green]OK[/green] Pipeline complete -> [bold]{output.resolve()}[/bold]")
52
+ console.print(f" Frames: {frames['frame_count']}, Segments: {result['segment_count']}")
video2flow/core.py ADDED
@@ -0,0 +1,56 @@
1
+ """video2flow core — Extract frames from video files."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+
7
+ def extract_frames(
8
+ video: Path,
9
+ output: Path,
10
+ fps: float = 1.0,
11
+ max_frames: int = 0,
12
+ ) -> dict:
13
+ video = Path(video).resolve()
14
+ output = Path(output).resolve()
15
+ output.mkdir(parents=True, exist_ok=True)
16
+
17
+ if not video.exists():
18
+ raise FileNotFoundError(f"Video not found: {video}")
19
+
20
+ import cv2
21
+ cap = cv2.VideoCapture(str(video))
22
+ if not cap.isOpened():
23
+ raise RuntimeError(f"Cannot open video: {video}")
24
+
25
+ video_fps = cap.get(cv2.CAP_PROP_FPS)
26
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
27
+ duration = total_frames / video_fps if video_fps > 0 else 0
28
+ interval = max(1, int(video_fps / fps))
29
+
30
+ frame_count = 0
31
+ saved = 0
32
+ while True:
33
+ ret, frame = cap.read()
34
+ if not ret:
35
+ break
36
+ if frame_count % interval == 0:
37
+ if max_frames > 0 and saved >= max_frames:
38
+ break
39
+ ts = frame_count / video_fps
40
+ name = f"frame_{saved:06d}_{ts:06.2f}s.jpg"
41
+ path = output / name
42
+ cv2.imwrite(str(path), frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
43
+ saved += 1
44
+ frame_count += 1
45
+
46
+ cap.release()
47
+
48
+ return {
49
+ "video": str(video),
50
+ "total_frames": total_frames,
51
+ "video_fps": round(video_fps, 2),
52
+ "duration_s": round(duration, 2),
53
+ "extracted_fps": fps,
54
+ "frame_count": saved,
55
+ "output_dir": str(output),
56
+ }
video2flow/describe.py ADDED
@@ -0,0 +1,120 @@
1
+ """video2flow describe — Generate text descriptions of video frames for LLMs."""
2
+
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+
9
+ def describe_frames(frame_dir: Path, output: Optional[Path] = None) -> dict:
10
+ frame_dir = Path(frame_dir)
11
+ if not frame_dir.exists():
12
+ raise FileNotFoundError(f"Frame directory not found: {frame_dir}")
13
+
14
+ frames = sorted(frame_dir.glob("*.jpg")) + sorted(frame_dir.glob("*.png"))
15
+ segments = []
16
+
17
+ for i, fp in enumerate(frames):
18
+ ts = _parse_timestamp(fp.stem)
19
+ size = fp.stat().st_size
20
+ dimension = _get_image_dimension(fp)
21
+
22
+ segments.append({
23
+ "segment": i,
24
+ "file": fp.name,
25
+ "timestamp_s": ts,
26
+ "timestamp_str": _format_time(ts),
27
+ "size_bytes": size,
28
+ "description": f"[Frame {i}] at {_format_time(ts)} — {dimension}",
29
+
30
+ "hint": (
31
+ "Pass this image to a vision-capable LLM (Claude 3.5+, GPT-4o, "
32
+ "Gemini 2.0) for detailed scene description."
33
+ ),
34
+ })
35
+
36
+ transcript_lines = [
37
+ f"[{s['timestamp_str']}] {s['description']}" for s in segments
38
+ ]
39
+ transcript = "\n".join(transcript_lines)
40
+
41
+ result = {
42
+ "video_source": str(frame_dir.parent),
43
+ "total_segments": len(segments),
44
+ "segment_count": len(segments),
45
+ "duration_s": segments[-1]["timestamp_s"] if segments else 0,
46
+ "segments": segments,
47
+ "transcript": transcript,
48
+ "usage": {
49
+ "description": (
50
+ "Pass the 'segments' array to any LLM. Each segment contains "
51
+ "a timestamp and filename. For detailed vision understanding, "
52
+ "send the image files directly to a multimodal model."
53
+ ),
54
+ "example_prompt": (
55
+ "You are analyzing a video. Below is the frame sequence:\n"
56
+ + transcript
57
+ ),
58
+ },
59
+ }
60
+
61
+ if output:
62
+ output = Path(output)
63
+ output.parent.mkdir(parents=True, exist_ok=True)
64
+ output.write_text(json.dumps(result, indent=2, ensure_ascii=False))
65
+
66
+ return result
67
+
68
+
69
+ def describe_video(
70
+ video: Path,
71
+ fps: float = 1.0,
72
+ max_frames: int = 10,
73
+ provider: str = "none",
74
+ ) -> dict:
75
+ from video2flow.core import extract_frames
76
+
77
+ import tempfile
78
+ tmpdir = Path(tempfile.mkdtemp(prefix="v2f_"))
79
+
80
+ try:
81
+ frames = extract_frames(
82
+ video=video,
83
+ output=tmpdir / "frames",
84
+ fps=fps,
85
+ max_frames=max_frames,
86
+ )
87
+ frame_dir = Path(frames["output_dir"])
88
+ result = describe_frames(frame_dir)
89
+ result["extraction"] = frames
90
+ return result
91
+ finally:
92
+ import shutil
93
+ shutil.rmtree(tmpdir, ignore_errors=True)
94
+
95
+
96
+ def _parse_timestamp(stem: str) -> float:
97
+ parts = stem.split("_")
98
+ for p in parts:
99
+ if "s" in p:
100
+ try:
101
+ return float(p.replace("s", ""))
102
+ except ValueError:
103
+ pass
104
+ return 0.0
105
+
106
+
107
+ def _format_time(seconds: float) -> str:
108
+ h, r = divmod(int(seconds), 3600)
109
+ m, s = divmod(r, 60)
110
+ return f"{h:02d}:{m:02d}:{s:02d}"
111
+
112
+
113
+ def _get_image_dimension(path: Path) -> str:
114
+ try:
115
+ from PIL import Image
116
+ with Image.open(path) as img:
117
+ w, h = img.size
118
+ return f"{w}x{h}"
119
+ except Exception:
120
+ return "unknown"
@@ -0,0 +1,69 @@
1
+ Metadata-Version: 2.4
2
+ Name: video2flow
3
+ Version: 0.1.0
4
+ Summary: Extract video frames as descriptive text flows for LLM consumption
5
+ Author-email: deepstrain <dev@massiron.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/massiron/video2flow
8
+ Project-URL: Repository, https://github.com/massiron/video2flow
9
+ Project-URL: Documentation, https://github.com/massiron/video2flow#readme
10
+ Keywords: video,frames,llm,vision,description
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: opencv-python>=4.8
22
+ Requires-Dist: typer>=0.9
23
+ Requires-Dist: rich>=13.0
24
+ Requires-Dist: pillow>=10.0
25
+
26
+ # video2flow
27
+
28
+ **Extract video frames as descriptive text flows for LLM consumption.**
29
+
30
+ Claude, GPT-4o, Gemini and other multimodal models can see images but not
31
+ video. `video2flow` bridges the gap: extract frames → generate timestamped
32
+ descriptions → feed the text flow to any LLM.
33
+
34
+ ## Installation
35
+
36
+ ```bash
37
+ pip install video2flow
38
+ ```
39
+
40
+ ## Usage
41
+
42
+ ```bash
43
+ # Extract frames from a video
44
+ video2flow extract video.mp4 -o frames/ --fps 1
45
+
46
+ # Quick description (without vision API)
47
+ video2flow describe video.mp4 --max-frames 10
48
+
49
+ # Full pipeline
50
+ video2flow pipeline video.mp4 -o video_flow/
51
+ ```
52
+
53
+ ## LLM Integration
54
+
55
+ Pass the output JSON to any LLM:
56
+
57
+ ```python
58
+ import json
59
+ flow = json.loads(open("video_flow/flow.json").read())
60
+ prompt = flow["usage"]["example_prompt"]
61
+ # Then: response = llm.invoke(prompt)
62
+ ```
63
+
64
+ For detailed vision understanding, send image files directly to a
65
+ multimodal model alongside the flow transcript.
66
+
67
+ ## License
68
+
69
+ MIT
@@ -0,0 +1,9 @@
1
+ video2flow/__init__.py,sha256=LYpwThh9fFSzb0oY-jPXSz2AgNp5xi9MPcepkpMObUY,112
2
+ video2flow/cli.py,sha256=QtZ7zHJWPBMymsDhI1qGJ7hgkG-179LlejojWvALJEU,2645
3
+ video2flow/core.py,sha256=2K7_XHpw3Vf6aQAM8YMcKfgZPjLD46wzKLKSCXySbWM,1539
4
+ video2flow/describe.py,sha256=rf9GLdgjGukKZ3QQUd3CrbYYrw4TKqcLtAvTwmu3xbI,3451
5
+ video2flow-0.1.0.dist-info/METADATA,sha256=UNLU2Xcgwo1qi9GgnUVY2XR6BCp0ItdkOqnaroS3LrE,2009
6
+ video2flow-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
7
+ video2flow-0.1.0.dist-info/entry_points.txt,sha256=B_QVW8KWUlmi6Oa7rCEGkT9y_YqQzt5Rwru4jxClFLs,50
8
+ video2flow-0.1.0.dist-info/top_level.txt,sha256=3NdZV1zggioOwjiMf9dj1328hO0yzklj7HHKCX8l8a8,11
9
+ video2flow-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ video2flow = video2flow.cli:app
@@ -0,0 +1 @@
1
+ video2flow