vision-agent 0.2.237__py3-none-any.whl → 0.2.238__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -96,13 +96,24 @@ class Config(BaseModel):
96
96
  }
97
97
  )
98
98
 
99
+ # for get_tool_for_task
100
+ od_judge: Type[LMM] = Field(default=AnthropicLMM)
101
+ od_judge_kwargs: dict = Field(
102
+ default_factory=lambda: {
103
+ "model_name": "claude-3-5-sonnet-20241022",
104
+ "temperature": 0.0,
105
+ "image_size": 512,
106
+ }
107
+ )
108
+
99
109
  # for suggestions module
100
- suggester: Type[LMM] = Field(default=AnthropicLMM)
110
+ suggester: Type[LMM] = Field(default=OpenAILMM)
101
111
  suggester_kwargs: dict = Field(
102
112
  default_factory=lambda: {
103
- "model_name": "claude-3-5-sonnet-20241022",
113
+ "model_name": "o1",
104
114
  "temperature": 1.0,
105
- "image_size": 768,
115
+ "image_detail": "high",
116
+ "image_size": 1024,
106
117
  }
107
118
  )
108
119
 
@@ -143,6 +154,9 @@ class Config(BaseModel):
143
154
  def create_tool_chooser(self) -> LMM:
144
155
  return self.tool_chooser(**self.tool_chooser_kwargs)
145
156
 
157
+ def create_od_judge(self) -> LMM:
158
+ return self.od_judge(**self.od_judge_kwargs)
159
+
146
160
  def create_suggester(self) -> LMM:
147
161
  return self.suggester(**self.suggester_kwargs)
148
162
 
@@ -2,6 +2,7 @@ from .sim import (
2
2
  AzureSim,
3
3
  OllamaSim,
4
4
  Sim,
5
+ StellaSim,
5
6
  get_tool_recommender,
6
7
  load_cached_sim,
7
8
  load_sim,
@@ -368,6 +368,15 @@ def get_tool_for_task(
368
368
  tool_tester = CONFIG.create_tool_tester()
369
369
  tool_chooser = CONFIG.create_tool_chooser()
370
370
 
371
+ if isinstance(images, list):
372
+ if len(images) > 0 and isinstance(images[0], dict):
373
+ if all(["frame" in image for image in images]):
374
+ images = [image["frame"] for image in images]
375
+ else:
376
+ raise ValueError(
377
+ f"Expected a list of numpy arrays or a dictionary of strings to lists of numpy arrays, got a list of dictionaries instead: {images}"
378
+ )
379
+
371
380
  if isinstance(images, list):
372
381
  images = {"image": images}
373
382
 
@@ -410,6 +419,15 @@ def get_tool_for_task_human_reviewer(
410
419
  # NOTE: this will have the same documentation as get_tool_for_task
411
420
  tool_tester = CONFIG.create_tool_tester()
412
421
 
422
+ if isinstance(images, list):
423
+ if len(images) > 0 and isinstance(images[0], dict):
424
+ if all(["frame" in image for image in images]):
425
+ images = [image["frame"] for image in images]
426
+ else:
427
+ raise ValueError(
428
+ f"Expected a list of numpy arrays or a dictionary of strings to lists of numpy arrays, got a list of dictionaries instead: {images}"
429
+ )
430
+
413
431
  if isinstance(images, list):
414
432
  images = {"image": images}
415
433
 
@@ -424,6 +442,9 @@ def get_tool_for_task_human_reviewer(
424
442
  Image.fromarray(image).save(image_path)
425
443
  image_paths.append(image_path)
426
444
 
445
+ # run no more than 3 images or else it overloads the LLM
446
+ image_paths = image_paths[:3]
447
+
427
448
  tools = [
428
449
  t.__name__
429
450
  for t in get_tools()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.237
3
+ Version: 0.2.238
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -15,7 +15,6 @@ Requires-Dist: e2b (>=0.17.2a50,<0.18.0)
15
15
  Requires-Dist: e2b-code-interpreter (==0.0.11a37)
16
16
  Requires-Dist: flake8 (>=7.0.0,<8.0.0)
17
17
  Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
18
- Requires-Dist: langsmith (>=0.1.58,<0.2.0)
19
18
  Requires-Dist: libcst (>=1.5.0,<2.0.0)
20
19
  Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
21
20
  Requires-Dist: nbclient (>=0.10.0,<0.11.0)
@@ -21,7 +21,7 @@ vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,
21
21
  vision_agent/clients/landing_public_api.py,sha256=Vz9lldtNbaJRWzT7T8-uQrC-dMnt47LIsDrxHgoVdEw,1492
22
22
  vision_agent/configs/__init__.py,sha256=Iu75-w9_nlPmnB_qKA7nYaaaHf7xtTrDmK8N4v2WV34,27
23
23
  vision_agent/configs/anthropic_config.py,sha256=T1UuESgiY8913A6wA42P7-cg8FTk9-LkJpyywo7OnIQ,4298
24
- vision_agent/configs/anthropic_openai_config.py,sha256=YQjFxmlxppn5L55dJjK_v1myBJQ_V5J4q25pmUtwTOU,4310
24
+ vision_agent/configs/anthropic_openai_config.py,sha256=rUz5zca4Pn5dTUwJXiJzRDYua5PWizApCKI3y0zOvhc,4699
25
25
  vision_agent/configs/config.py,sha256=rUz5zca4Pn5dTUwJXiJzRDYua5PWizApCKI3y0zOvhc,4699
26
26
  vision_agent/configs/openai_config.py,sha256=v2_AIY89d7LKWn4uqA2G047U2IdmnqZrGH2Iww9gRIw,4498
27
27
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -32,11 +32,11 @@ vision_agent/models/__init__.py,sha256=qAdygB-0EsmxMHNzYTPNM6tAF8Fym95gm9bsHJafd
32
32
  vision_agent/models/agent_types.py,sha256=dIdxATH_PP76pD5Wfo0oofWt6iPQh0vpf48QbEQSzhs,2472
33
33
  vision_agent/models/lmm_types.py,sha256=v04h-NjbczHOIN8UWa1vvO5-1BDuZ4JQhD2mge1cXmw,305
34
34
  vision_agent/models/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
35
- vision_agent/sim/__init__.py,sha256=XYL4BKCB-pamJzCR1y2d5lC8FL64WGu0oEzWzLfguAQ,120
35
+ vision_agent/sim/__init__.py,sha256=Aouz6HEPPTYcLxR5_0fTYCL1OvPKAH1RMWAF90QXAlA,135
36
36
  vision_agent/sim/sim.py,sha256=VSU_1rYd4ifvF45xKWBEYugxdeeEQVpj0QL6rjx49i4,9801
37
37
  vision_agent/tools/__init__.py,sha256=T-MPNBVbvWtfo71hobaZsdYzQ52oyymolk_OAb2Pq_g,2463
38
38
  vision_agent/tools/meta_tools.py,sha256=-heMwGkx0hX_9zUp1dgBqsJpVnl6Y6tErMsjFy0dwLM,28652
39
- vision_agent/tools/planner_tools.py,sha256=iXyHjTBIWeQOCfcdQNufoQXfipHu_H38DIoK375FdnA,18492
39
+ vision_agent/tools/planner_tools.py,sha256=orBTdJQz2NKoLuX9WE6XixaYuG305xz0UBYvZOiuquQ,19474
40
40
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
41
41
  vision_agent/tools/tools.py,sha256=-xg5Msq5ZtHgaISpHnbq5rJ5MIERwfH6wPHg6KpaYjg,111457
42
42
  vision_agent/utils/__init__.py,sha256=mANUs_84VL-3gpZbXryvV2mWU623eWnRlJCSUHtMjuw,122
@@ -49,7 +49,7 @@ vision_agent/utils/tools_doc.py,sha256=yFue6KSXoa_Z1ngCdBEc4SdPZOWF1rVLeaHu02I8W
49
49
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
50
50
  vision_agent/utils/video.py,sha256=Dt9_pqGgr63gmpurzisnpF6d9tr65-zxS1CccXdVuxk,6458
51
51
  vision_agent/utils/video_tracking.py,sha256=GM9qfeawqhmZVWoKrzw5-NETd4gEo7ImMfWtBnhC3bw,12086
52
- vision_agent-0.2.237.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
- vision_agent-0.2.237.dist-info/METADATA,sha256=MkwC7kWf1f5E1ArMWdjNx_GGNgFwfWQtHbfyDzHN8EM,5755
54
- vision_agent-0.2.237.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
55
- vision_agent-0.2.237.dist-info/RECORD,,
52
+ vision_agent-0.2.238.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
+ vision_agent-0.2.238.dist-info/METADATA,sha256=VnupHm4Iav889sO4JPGeWYM7902KwPKaJYem81_EDCk,5712
54
+ vision_agent-0.2.238.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
55
+ vision_agent-0.2.238.dist-info/RECORD,,