vision-agent 0.2.126__py3-none-any.whl → 0.2.127__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1518,7 +1518,9 @@ def save_video(
1518
1518
  raise ValueError(f"fps must be greater than 0 got {fps}")
1519
1519
 
1520
1520
  if output_video_path is None:
1521
- output_video_path = tempfile.NamedTemporaryFile(delete=False).name
1521
+ output_video_path = tempfile.NamedTemporaryFile(
1522
+ delete=False, suffix=".mp4"
1523
+ ).name
1522
1524
 
1523
1525
  output_video_path = video_writer(frames, fps, output_video_path)
1524
1526
  _save_video_to_result(output_video_path)
@@ -5,6 +5,7 @@ from functools import lru_cache
5
5
  from typing import List, Optional, Tuple
6
6
 
7
7
  import cv2
8
+ import av # type: ignore
8
9
  import numpy as np
9
10
  from decord import VideoReader # type: ignore
10
11
 
@@ -43,18 +44,36 @@ def play_video(video_base64: str) -> None:
43
44
  cv2.destroyAllWindows()
44
45
 
45
46
 
47
+ def _resize_frame(frame: np.ndarray) -> np.ndarray:
48
+ height, width = frame.shape[:2]
49
+ new_width = width - (width % 2)
50
+ new_height = height - (height % 2)
51
+ return cv2.resize(frame, (new_width, new_height))
52
+
53
+
46
54
  def video_writer(
47
55
  frames: List[np.ndarray], fps: float = 1.0, filename: Optional[str] = None
48
56
  ) -> str:
49
57
  if filename is None:
50
58
  filename = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
51
-
52
- fourcc = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore
59
+ container = av.open(filename, mode="w")
60
+ stream = container.add_stream("h264", rate=fps)
53
61
  height, width = frames[0].shape[:2]
54
- writer = cv2.VideoWriter(filename, fourcc, fps, (width, height))
62
+ stream.height = height - (height % 2)
63
+ stream.width = width - (width % 2)
64
+ stream.pix_fmt = "yuv420p"
55
65
  for frame in frames:
56
- writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
57
- writer.release()
66
+ # Remove the alpha channel (convert RGBA to RGB)
67
+ frame_rgb = frame[:, :, :3]
68
+ # Resize the frame to make dimensions divisible by 2
69
+ frame_rgb = _resize_frame(frame_rgb)
70
+ av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24")
71
+ for packet in stream.encode(av_frame):
72
+ container.mux(packet)
73
+
74
+ for packet in stream.encode():
75
+ container.mux(packet)
76
+ container.close()
58
77
  return filename
59
78
 
60
79
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.126
3
+ Version: 0.2.127
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -10,6 +10,7 @@ Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
12
  Requires-Dist: anthropic (>=0.31.0,<0.32.0)
13
+ Requires-Dist: av (>=11.0.0,<12.0.0)
13
14
  Requires-Dist: e2b (>=0.17.2a50,<0.18.0)
14
15
  Requires-Dist: e2b-code-interpreter (==0.0.11a37)
15
16
  Requires-Dist: eva-decord (>=0.6.1,<0.7.0)
@@ -18,7 +18,7 @@ vision_agent/tools/__init__.py,sha256=T8Hi5aHf4J2QJDoPRvu5fxbiqMpAY-1Gi2EFIhJbf3
18
18
  vision_agent/tools/meta_tools.py,sha256=KeGiw2OtY8ARpGbtWjoNAoO1dwevt7LbCupaJX61MkE,18929
19
19
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
20
20
  vision_agent/tools/tool_utils.py,sha256=62NVlojPMf9MuJ-3yJEcrB3mzmOxN2HrNQzzjVa-FZg,7527
21
- vision_agent/tools/tools.py,sha256=Y6BTLFoueLtjId2qG06UyZwCQA_TTA6uFxPkxzhRI50,65396
21
+ vision_agent/tools/tools.py,sha256=sO0J-ts2CsJnf2UPcvxvmowE_G0X3f1iSChnS-cnPlk,65433
22
22
  vision_agent/tools/tools_types.py,sha256=rLpCUODPY0yI65SLOTJOxfHFfqWM3WjOq-AYX25Chjk,2356
23
23
  vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
24
24
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -26,8 +26,8 @@ vision_agent/utils/execute.py,sha256=gc4R_0BKUrZyhiKvIxOpYuzQPYVWQEqxr3ANy1lJAw4
26
26
  vision_agent/utils/image_utils.py,sha256=zTTOJFOieMzwIquTFnW7T6ssx9o6XfoZ0Unqyk7GJrg,10746
27
27
  vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
- vision_agent/utils/video.py,sha256=oM3sdQVGGI3xwrCN2GKt9otzDb0SPW-JUo5SABxTVl4,3847
30
- vision_agent-0.2.126.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.126.dist-info/METADATA,sha256=4O_OuQh5yhJ8unzNtfU4E_0RNykXxkbdjkiGPAXi9Ek,12258
32
- vision_agent-0.2.126.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.126.dist-info/RECORD,,
29
+ vision_agent/utils/video.py,sha256=oDTCuTv1dFMYvwqis7y0frt9U2iDF9KGN1g21bOVjvE,4528
30
+ vision_agent-0.2.127.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.127.dist-info/METADATA,sha256=r3fKbSB79F3MsBsOTV0z054Qno3DTpf3Pa-xwkdIgD0,12295
32
+ vision_agent-0.2.127.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.127.dist-info/RECORD,,