vision-agent 0.2.237__py3-none-any.whl → 0.2.239__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -96,13 +96,24 @@ class Config(BaseModel):
96
96
  }
97
97
  )
98
98
 
99
+ # for get_tool_for_task
100
+ od_judge: Type[LMM] = Field(default=AnthropicLMM)
101
+ od_judge_kwargs: dict = Field(
102
+ default_factory=lambda: {
103
+ "model_name": "claude-3-5-sonnet-20241022",
104
+ "temperature": 0.0,
105
+ "image_size": 512,
106
+ }
107
+ )
108
+
99
109
  # for suggestions module
100
- suggester: Type[LMM] = Field(default=AnthropicLMM)
110
+ suggester: Type[LMM] = Field(default=OpenAILMM)
101
111
  suggester_kwargs: dict = Field(
102
112
  default_factory=lambda: {
103
- "model_name": "claude-3-5-sonnet-20241022",
113
+ "model_name": "o1",
104
114
  "temperature": 1.0,
105
- "image_size": 768,
115
+ "image_detail": "high",
116
+ "image_size": 1024,
106
117
  }
107
118
  )
108
119
 
@@ -143,6 +154,9 @@ class Config(BaseModel):
143
154
  def create_tool_chooser(self) -> LMM:
144
155
  return self.tool_chooser(**self.tool_chooser_kwargs)
145
156
 
157
+ def create_od_judge(self) -> LMM:
158
+ return self.od_judge(**self.od_judge_kwargs)
159
+
146
160
  def create_suggester(self) -> LMM:
147
161
  return self.suggester(**self.suggester_kwargs)
148
162
 
@@ -2,6 +2,7 @@ from .sim import (
2
2
  AzureSim,
3
3
  OllamaSim,
4
4
  Sim,
5
+ StellaSim,
5
6
  get_tool_recommender,
6
7
  load_cached_sim,
7
8
  load_sim,
@@ -368,6 +368,15 @@ def get_tool_for_task(
368
368
  tool_tester = CONFIG.create_tool_tester()
369
369
  tool_chooser = CONFIG.create_tool_chooser()
370
370
 
371
+ if isinstance(images, list):
372
+ if len(images) > 0 and isinstance(images[0], dict):
373
+ if all(["frame" in image for image in images]):
374
+ images = [image["frame"] for image in images]
375
+ else:
376
+ raise ValueError(
377
+ f"Expected a list of numpy arrays or a dictionary of strings to lists of numpy arrays, got a list of dictionaries instead: {images}"
378
+ )
379
+
371
380
  if isinstance(images, list):
372
381
  images = {"image": images}
373
382
 
@@ -410,6 +419,15 @@ def get_tool_for_task_human_reviewer(
410
419
  # NOTE: this will have the same documentation as get_tool_for_task
411
420
  tool_tester = CONFIG.create_tool_tester()
412
421
 
422
+ if isinstance(images, list):
423
+ if len(images) > 0 and isinstance(images[0], dict):
424
+ if all(["frame" in image for image in images]):
425
+ images = [image["frame"] for image in images]
426
+ else:
427
+ raise ValueError(
428
+ f"Expected a list of numpy arrays or a dictionary of strings to lists of numpy arrays, got a list of dictionaries instead: {images}"
429
+ )
430
+
413
431
  if isinstance(images, list):
414
432
  images = {"image": images}
415
433
 
@@ -424,6 +442,9 @@ def get_tool_for_task_human_reviewer(
424
442
  Image.fromarray(image).save(image_path)
425
443
  image_paths.append(image_path)
426
444
 
445
+ # run no more than 3 images or else it overloads the LLM
446
+ image_paths = image_paths[:3]
447
+
427
448
  tools = [
428
449
  t.__name__
429
450
  for t in get_tools()
@@ -2804,7 +2804,7 @@ def save_video(
2804
2804
  else:
2805
2805
  Path(output_video_path).parent.mkdir(parents=True, exist_ok=True)
2806
2806
 
2807
- output_video_path = video_writer(frames, fps, output_video_path)
2807
+ output_video_path = video_writer(frames, fps, filename=output_video_path)
2808
2808
  _save_video_to_result(output_video_path)
2809
2809
  return output_video_path
2810
2810
 
@@ -1,5 +1,5 @@
1
- import base64
2
1
  import logging
2
+ import os
3
3
  import tempfile
4
4
  from functools import lru_cache
5
5
  from typing import List, Optional, Tuple
@@ -15,37 +15,6 @@ _DEFAULT_VIDEO_FPS = 24
15
15
  _DEFAULT_INPUT_FPS = 1.0
16
16
 
17
17
 
18
- def play_video(video_base64: str) -> None:
19
- """Play a video file"""
20
- video_data = base64.b64decode(video_base64)
21
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
22
- temp_video.write(video_data)
23
- temp_video_path = temp_video.name
24
-
25
- cap = cv2.VideoCapture(temp_video_path)
26
- if not cap.isOpened():
27
- _LOGGER.error("Error: Could not open video.")
28
- return
29
-
30
- # Display the first frame and wait for any key press to start the video
31
- ret, frame = cap.read()
32
- if ret:
33
- cv2.imshow("Video Player", frame)
34
- _LOGGER.info(f"Press any key to start playing the video: {temp_video_path}")
35
- cv2.waitKey(0) # Wait for any key press
36
-
37
- while cap.isOpened():
38
- ret, frame = cap.read()
39
- if not ret:
40
- break
41
- cv2.imshow("Video Player", frame)
42
- # Press 'q' to exit the video
43
- if cv2.waitKey(200) & 0xFF == ord("q"):
44
- break
45
- cap.release()
46
- cv2.destroyAllWindows()
47
-
48
-
49
18
  def _resize_frame(frame: np.ndarray) -> np.ndarray:
50
19
  height, width = frame.shape[:2]
51
20
  new_width = width - (width % 2)
@@ -57,12 +26,15 @@ def video_writer(
57
26
  frames: List[np.ndarray],
58
27
  fps: float = _DEFAULT_INPUT_FPS,
59
28
  filename: Optional[str] = None,
29
+ file_ext: str = ".mp4",
60
30
  ) -> str:
31
+ tempf = None
61
32
  if isinstance(fps, str):
62
33
  # fps could be a string when it's passed in from a web endpoint deployment
63
34
  fps = float(fps)
64
35
  if filename is None:
65
- filename = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
36
+ tempf = tempfile.NamedTemporaryFile(delete=False, suffix=file_ext)
37
+ filename = tempf.name
66
38
  container = av.open(filename, mode="w")
67
39
  stream = container.add_stream("h264", rate=fps)
68
40
  height, width = frames[0].shape[:2]
@@ -82,6 +54,9 @@ def video_writer(
82
54
  for packet in stream.encode():
83
55
  container.mux(packet)
84
56
  container.close()
57
+ # for windows nee to manually close tempfile, cannot use with NamedTemporaryFile(delete=True)
58
+ if tempf is not None:
59
+ tempf.close()
85
60
  return filename
86
61
 
87
62
 
@@ -98,11 +73,11 @@ def frames_to_bytes(
98
73
  if isinstance(fps, str):
99
74
  # fps could be a string when it's passed in from a web endpoint deployment
100
75
  fps = float(fps)
101
- with tempfile.NamedTemporaryFile(delete=True, suffix=file_ext) as temp_file:
102
- video_writer(frames, fps, temp_file.name)
103
-
104
- with open(temp_file.name, "rb") as f:
105
- buffer_bytes = f.read()
76
+ filename = video_writer(frames, fps, file_ext=file_ext)
77
+ # TODO: look into memory-mapped files to avoid reading the entire file into memory
78
+ with open(filename, "rb") as f:
79
+ buffer_bytes = f.read()
80
+ os.unlink(filename)
106
81
  return buffer_bytes
107
82
 
108
83
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.237
3
+ Version: 0.2.239
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -15,7 +15,6 @@ Requires-Dist: e2b (>=0.17.2a50,<0.18.0)
15
15
  Requires-Dist: e2b-code-interpreter (==0.0.11a37)
16
16
  Requires-Dist: flake8 (>=7.0.0,<8.0.0)
17
17
  Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
18
- Requires-Dist: langsmith (>=0.1.58,<0.2.0)
19
18
  Requires-Dist: libcst (>=1.5.0,<2.0.0)
20
19
  Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
21
20
  Requires-Dist: nbclient (>=0.10.0,<0.11.0)
@@ -21,7 +21,7 @@ vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,
21
21
  vision_agent/clients/landing_public_api.py,sha256=Vz9lldtNbaJRWzT7T8-uQrC-dMnt47LIsDrxHgoVdEw,1492
22
22
  vision_agent/configs/__init__.py,sha256=Iu75-w9_nlPmnB_qKA7nYaaaHf7xtTrDmK8N4v2WV34,27
23
23
  vision_agent/configs/anthropic_config.py,sha256=T1UuESgiY8913A6wA42P7-cg8FTk9-LkJpyywo7OnIQ,4298
24
- vision_agent/configs/anthropic_openai_config.py,sha256=YQjFxmlxppn5L55dJjK_v1myBJQ_V5J4q25pmUtwTOU,4310
24
+ vision_agent/configs/anthropic_openai_config.py,sha256=rUz5zca4Pn5dTUwJXiJzRDYua5PWizApCKI3y0zOvhc,4699
25
25
  vision_agent/configs/config.py,sha256=rUz5zca4Pn5dTUwJXiJzRDYua5PWizApCKI3y0zOvhc,4699
26
26
  vision_agent/configs/openai_config.py,sha256=v2_AIY89d7LKWn4uqA2G047U2IdmnqZrGH2Iww9gRIw,4498
27
27
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -32,13 +32,13 @@ vision_agent/models/__init__.py,sha256=qAdygB-0EsmxMHNzYTPNM6tAF8Fym95gm9bsHJafd
32
32
  vision_agent/models/agent_types.py,sha256=dIdxATH_PP76pD5Wfo0oofWt6iPQh0vpf48QbEQSzhs,2472
33
33
  vision_agent/models/lmm_types.py,sha256=v04h-NjbczHOIN8UWa1vvO5-1BDuZ4JQhD2mge1cXmw,305
34
34
  vision_agent/models/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
35
- vision_agent/sim/__init__.py,sha256=XYL4BKCB-pamJzCR1y2d5lC8FL64WGu0oEzWzLfguAQ,120
35
+ vision_agent/sim/__init__.py,sha256=Aouz6HEPPTYcLxR5_0fTYCL1OvPKAH1RMWAF90QXAlA,135
36
36
  vision_agent/sim/sim.py,sha256=VSU_1rYd4ifvF45xKWBEYugxdeeEQVpj0QL6rjx49i4,9801
37
37
  vision_agent/tools/__init__.py,sha256=T-MPNBVbvWtfo71hobaZsdYzQ52oyymolk_OAb2Pq_g,2463
38
38
  vision_agent/tools/meta_tools.py,sha256=-heMwGkx0hX_9zUp1dgBqsJpVnl6Y6tErMsjFy0dwLM,28652
39
- vision_agent/tools/planner_tools.py,sha256=iXyHjTBIWeQOCfcdQNufoQXfipHu_H38DIoK375FdnA,18492
39
+ vision_agent/tools/planner_tools.py,sha256=orBTdJQz2NKoLuX9WE6XixaYuG305xz0UBYvZOiuquQ,19474
40
40
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
41
- vision_agent/tools/tools.py,sha256=-xg5Msq5ZtHgaISpHnbq5rJ5MIERwfH6wPHg6KpaYjg,111457
41
+ vision_agent/tools/tools.py,sha256=hhQYqypvBDfcel1p4bfZHZfOZom3plnxGPHwo2T52Ls,111466
42
42
  vision_agent/utils/__init__.py,sha256=mANUs_84VL-3gpZbXryvV2mWU623eWnRlJCSUHtMjuw,122
43
43
  vision_agent/utils/agent.py,sha256=QGKcbzpAjcVj0958bXYLv07-d2i1GU7-bXVG7bTGRMA,14619
44
44
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -47,9 +47,9 @@ vision_agent/utils/image_utils.py,sha256=bJM2mEvB6E__M9pxi74yQYzAiZ7mu3KE2ptyVrp
47
47
  vision_agent/utils/tools.py,sha256=USZL0MKsiJgqA8RFiYRTcj_Kn2FVYKLHK4wIk0gP1Ow,7694
48
48
  vision_agent/utils/tools_doc.py,sha256=yFue6KSXoa_Z1ngCdBEc4SdPZOWF1rVLeaHu02I8Wis,2523
49
49
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
50
- vision_agent/utils/video.py,sha256=Dt9_pqGgr63gmpurzisnpF6d9tr65-zxS1CccXdVuxk,6458
50
+ vision_agent/utils/video.py,sha256=0LsmH0sDaBWhvtV15CCJgqKxWzwDDos7Sv2wOd7wyzQ,5610
51
51
  vision_agent/utils/video_tracking.py,sha256=GM9qfeawqhmZVWoKrzw5-NETd4gEo7ImMfWtBnhC3bw,12086
52
- vision_agent-0.2.237.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
- vision_agent-0.2.237.dist-info/METADATA,sha256=MkwC7kWf1f5E1ArMWdjNx_GGNgFwfWQtHbfyDzHN8EM,5755
54
- vision_agent-0.2.237.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
55
- vision_agent-0.2.237.dist-info/RECORD,,
52
+ vision_agent-0.2.239.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
+ vision_agent-0.2.239.dist-info/METADATA,sha256=yC90fdYSDqbLrHHIU6OTm96QhNJ-39buRPoVgIxnDzM,5712
54
+ vision_agent-0.2.239.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
55
+ vision_agent-0.2.239.dist-info/RECORD,,