vision-agent 0.2.229__py3-none-any.whl → 0.2.231__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -222,7 +222,7 @@ def sam2(
222
222
  ret = _sam2(image, detections, image_size)
223
223
  _display_tool_trace(
224
224
  sam2.__name__,
225
- {},
225
+ {"detections": detections},
226
226
  ret["display_data"],
227
227
  ret["files"],
228
228
  )
@@ -314,18 +314,29 @@ def od_sam2_video_tracking(
314
314
 
315
315
  # Process each segment and collect detections
316
316
  detections_per_segment: List[Any] = []
317
- for segment_index, segment in enumerate(segments):
318
- segment_detections = process_segment(
319
- segment_frames=segment,
320
- od_model=od_model,
321
- prompt=prompt,
322
- fine_tune_id=fine_tune_id,
323
- chunk_length=chunk_length,
324
- image_size=image_size,
325
- segment_index=segment_index,
326
- object_detection_tool=_apply_object_detection,
327
- )
328
- detections_per_segment.append(segment_detections)
317
+ with ThreadPoolExecutor() as executor:
318
+ futures = {
319
+ executor.submit(
320
+ process_segment,
321
+ segment_frames=segment,
322
+ od_model=od_model,
323
+ prompt=prompt,
324
+ fine_tune_id=fine_tune_id,
325
+ chunk_length=chunk_length,
326
+ image_size=image_size,
327
+ segment_index=segment_index,
328
+ object_detection_tool=_apply_object_detection,
329
+ ): segment_index
330
+ for segment_index, segment in enumerate(segments)
331
+ }
332
+
333
+ for future in as_completed(futures):
334
+ segment_index = futures[future]
335
+ detections_per_segment.append((segment_index, future.result()))
336
+
337
+ detections_per_segment = [
338
+ x[1] for x in sorted(detections_per_segment, key=lambda x: x[0])
339
+ ]
329
340
 
330
341
  merged_detections = merge_segments(detections_per_segment)
331
342
  post_processed = post_process(merged_detections, image_size)
@@ -390,7 +401,7 @@ def _owlv2_object_detection(
390
401
  {
391
402
  "label": bbox["label"],
392
403
  "bbox": normalize_bbox(bbox["bounding_box"], image_size),
393
- "score": bbox["score"],
404
+ "score": round(bbox["score"], 2),
394
405
  }
395
406
  for bbox in bboxes
396
407
  ]
@@ -398,7 +409,7 @@ def _owlv2_object_detection(
398
409
  {
399
410
  "label": bbox["label"],
400
411
  "bbox": bbox["bounding_box"],
401
- "score": bbox["score"],
412
+ "score": round(bbox["score"], 2),
402
413
  }
403
414
  for bbox in bboxes
404
415
  ]
@@ -582,7 +593,7 @@ def owlv2_sam2_video_tracking(
582
593
  )
583
594
  _display_tool_trace(
584
595
  owlv2_sam2_video_tracking.__name__,
585
- {},
596
+ {"prompt": prompt, "chunk_length": chunk_length},
586
597
  ret["display_data"],
587
598
  ret["files"],
588
599
  )
@@ -595,14 +606,14 @@ def owlv2_sam2_video_tracking(
595
606
  def florence2_object_detection(
596
607
  prompt: str, image: np.ndarray, fine_tune_id: Optional[str] = None
597
608
  ) -> List[Dict[str, Any]]:
598
- """'florence2_object_detection' is a tool that can detect multiple
599
- objects given a text prompt which can be object names or caption. You
600
- can optionally separate the object names in the text with commas. It returns a list
601
- of bounding boxes with normalized coordinates, label names and associated
602
- confidence scores of 1.0.
609
+ """'florence2_object_detection' is a tool that can detect multiple objects given a
610
+ text prompt which can be object names or caption. You can optionally separate the
611
+ object names in the text with commas. It returns a list of bounding boxes with
612
+ normalized coordinates, label names and associated confidence scores of 1.0.
603
613
 
604
614
  Parameters:
605
- prompt (str): The prompt to ground to the image.
615
+ prompt (str): The prompt to ground to the image. Use exclusive categories that
616
+ do not overlap such as 'person, car' and NOT 'person, athlete'.
606
617
  image (np.ndarray): The image to used to detect objects
607
618
  fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
608
619
  fine-tuned model ID here to use it.
@@ -681,7 +692,8 @@ def florence2_sam2_instance_segmentation(
681
692
  1.0.
682
693
 
683
694
  Parameters:
684
- prompt (str): The prompt to ground to the image.
695
+ prompt (str): The prompt to ground to the image. Use exclusive categories that
696
+ do not overlap such as 'person, car' and NOT 'person, athlete'.
685
697
  image (np.ndarray): The image to ground the prompt to.
686
698
  fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
687
699
  fine-tuned model ID here to use it.
@@ -769,7 +781,8 @@ def florence2_sam2_video_tracking(
769
781
  is useful for tracking and counting without duplicating counts.
770
782
 
771
783
  Parameters:
772
- prompt (str): The prompt to ground to the video.
784
+ prompt (str): The prompt to ground to the image. Use exclusive categories that
785
+ do not overlap such as 'person, car' and NOT 'person, athlete'.
773
786
  frames (List[np.ndarray]): The list of frames to ground the prompt to.
774
787
  chunk_length (Optional[int]): The number of frames to re-run florence2 to find
775
788
  new objects.
@@ -1679,7 +1692,7 @@ def video_temporal_localization(
1679
1692
  prompt: str,
1680
1693
  frames: List[np.ndarray],
1681
1694
  model: str = "qwen2vl",
1682
- chunk_length_frames: Optional[int] = 2,
1695
+ chunk_length_frames: int = 2,
1683
1696
  ) -> List[float]:
1684
1697
  """'video_temporal_localization' will run qwen2vl on each chunk_length_frames
1685
1698
  value selected for the video. It can detect multiple objects independently per
@@ -1693,7 +1706,7 @@ def video_temporal_localization(
1693
1706
  frames (List[np.ndarray]): The reference frames used for the question
1694
1707
  model (str): The model to use for the inference. Valid values are
1695
1708
  'qwen2vl', 'gpt4o'.
1696
- chunk_length_frames (Optional[int]): length of each chunk in frames
1709
+ chunk_length_frames (int): length of each chunk in frames
1697
1710
 
1698
1711
  Returns:
1699
1712
  List[float]: A list of floats with a value of 1.0 if the objects to be found
@@ -1712,8 +1725,7 @@ def video_temporal_localization(
1712
1725
  "model": model,
1713
1726
  "function_name": "video_temporal_localization",
1714
1727
  }
1715
- if chunk_length_frames is not None:
1716
- payload["chunk_length_frames"] = chunk_length_frames
1728
+ payload["chunk_length_frames"] = chunk_length_frames
1717
1729
 
1718
1730
  data = send_inference_request(
1719
1731
  payload, "video-temporal-localization", files=files, v2=True
@@ -1724,7 +1736,13 @@ def video_temporal_localization(
1724
1736
  data,
1725
1737
  files,
1726
1738
  )
1727
- return [cast(float, value) for value in data]
1739
+ chunked_data = [cast(float, value) for value in data]
1740
+
1741
+ full_data = []
1742
+ for value in chunked_data:
1743
+ full_data.extend([value] * chunk_length_frames)
1744
+
1745
+ return full_data[: len(frames)]
1728
1746
 
1729
1747
 
1730
1748
  def vit_image_classification(image: np.ndarray) -> Dict[str, Any]:
@@ -2148,7 +2166,7 @@ def siglip_classification(image: np.ndarray, labels: List[str]) -> Dict[str, Any
2148
2166
  return response
2149
2167
 
2150
2168
 
2151
- # agentic od tools
2169
+ # Agentic OD Tools
2152
2170
 
2153
2171
 
2154
2172
  def _agentic_object_detection(
@@ -2644,7 +2662,7 @@ def save_image(image: np.ndarray, file_path: str) -> None:
2644
2662
 
2645
2663
 
2646
2664
  def save_video(
2647
- frames: List[np.ndarray], output_video_path: Optional[str] = None, fps: float = 1
2665
+ frames: List[np.ndarray], output_video_path: Optional[str] = None, fps: float = 5
2648
2666
  ) -> str:
2649
2667
  """'save_video' is a utility function that saves a list of frames as a mp4 video file on disk.
2650
2668
 
vision_agent/utils/sim.py CHANGED
@@ -98,10 +98,12 @@ class Sim:
98
98
  raise ValueError("key is required if no column 'embs' is present.")
99
99
 
100
100
  if sim_key is not None:
101
- self.df["embs"] = self.df[sim_key].apply(
102
- lambda x: get_embedding(
103
- self.emb_call,
104
- x,
101
+ self.df = self.df.assign(
102
+ embs=self.df[sim_key].apply(
103
+ lambda x: get_embedding(
104
+ self.emb_call,
105
+ x,
106
+ )
105
107
  )
106
108
  )
107
109
 
@@ -141,7 +143,9 @@ class Sim:
141
143
 
142
144
  df_load = pd.read_csv(load_dir / "df.csv")
143
145
  if platform.system() == "Windows":
144
- df_load["doc"] = df_load["doc"].apply(lambda x: x.replace("\r", ""))
146
+ df_load = df_load.assign(
147
+ doc=df_load.doc.apply(lambda x: x.replace("\r", ""))
148
+ )
145
149
  return df.equals(df_load) # type: ignore
146
150
 
147
151
  @lru_cache(maxsize=256)
@@ -166,7 +170,9 @@ class Sim:
166
170
  self.emb_call,
167
171
  query,
168
172
  )
169
- self.df["sim"] = self.df.embs.apply(lambda x: 1 - cosine(x, embedding))
173
+ self.df = self.df.assign(
174
+ sim=self.df.embs.apply(lambda x: 1 - cosine(x, embedding))
175
+ )
170
176
  res = self.df.sort_values("sim", ascending=False).head(k)
171
177
  if thresh is not None:
172
178
  res = res[res.sim > thresh]
@@ -214,8 +220,13 @@ class AzureSim(Sim):
214
220
  raise ValueError("key is required if no column 'embs' is present.")
215
221
 
216
222
  if sim_key is not None:
217
- self.df["embs"] = self.df[sim_key].apply(
218
- lambda x: get_embedding(self.emb_call, x)
223
+ self.df = self.df.assign(
224
+ embs=self.df[sim_key].apply(
225
+ lambda x: get_embedding(
226
+ self.emb_call,
227
+ x,
228
+ )
229
+ )
219
230
  )
220
231
 
221
232
 
@@ -245,8 +256,13 @@ class OllamaSim(Sim):
245
256
  raise ValueError("key is required if no column 'embs' is present.")
246
257
 
247
258
  if sim_key is not None:
248
- self.df["embs"] = self.df[sim_key].apply(
249
- lambda x: get_embedding(emb_call, x)
259
+ self.df = self.df.assign(
260
+ embs=self.df[sim_key].apply(
261
+ lambda x: get_embedding(
262
+ self.emb_call,
263
+ x,
264
+ )
265
+ )
250
266
  )
251
267
 
252
268
 
@@ -267,8 +283,13 @@ class StellaSim(Sim):
267
283
  raise ValueError("key is required if no column 'embs' is present.")
268
284
 
269
285
  if sim_key is not None:
270
- self.df["embs"] = self.df[sim_key].apply(
271
- lambda x: get_embedding(emb_call, x)
286
+ self.df = self.df.assign(
287
+ embs=self.df[sim_key].apply(
288
+ lambda x: get_embedding(
289
+ self.emb_call,
290
+ x,
291
+ )
292
+ )
272
293
  )
273
294
 
274
295
  @staticmethod
@@ -0,0 +1,148 @@
1
+ Metadata-Version: 2.1
2
+ Name: vision-agent
3
+ Version: 0.2.231
4
+ Summary: Toolset for Vision Agent
5
+ Author: Landing AI
6
+ Author-email: dev@landing.ai
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Requires-Dist: anthropic (>=0.31.0,<0.32.0)
13
+ Requires-Dist: av (>=11.0.0,<12.0.0)
14
+ Requires-Dist: e2b (>=0.17.2a50,<0.18.0)
15
+ Requires-Dist: e2b-code-interpreter (==0.0.11a37)
16
+ Requires-Dist: flake8 (>=7.0.0,<8.0.0)
17
+ Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
18
+ Requires-Dist: langsmith (>=0.1.58,<0.2.0)
19
+ Requires-Dist: libcst (>=1.5.0,<2.0.0)
20
+ Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
21
+ Requires-Dist: nbclient (>=0.10.0,<0.11.0)
22
+ Requires-Dist: nbformat (>=5.10.4,<6.0.0)
23
+ Requires-Dist: numpy (>=1.21.0,<2.0.0)
24
+ Requires-Dist: openai (>=1.0.0,<2.0.0)
25
+ Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
26
+ Requires-Dist: opentelemetry-api (>=1.29.0,<2.0.0)
27
+ Requires-Dist: pandas (>=2.0.0,<3.0.0)
28
+ Requires-Dist: pillow (>=10.0.0,<11.0.0)
29
+ Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
30
+ Requires-Dist: pydantic (==2.7.4)
31
+ Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
32
+ Requires-Dist: pytube (==15.0.0)
33
+ Requires-Dist: requests (>=2.0.0,<3.0.0)
34
+ Requires-Dist: rich (>=13.7.1,<14.0.0)
35
+ Requires-Dist: scikit-learn (>=1.5.2,<2.0.0)
36
+ Requires-Dist: scipy (>=1.13.0,<1.14.0)
37
+ Requires-Dist: tabulate (>=0.9.0,<0.10.0)
38
+ Requires-Dist: tenacity (>=8.3.0,<9.0.0)
39
+ Requires-Dist: tqdm (>=4.64.0,<5.0.0)
40
+ Requires-Dist: typing_extensions (>=4.0.0,<5.0.0)
41
+ Project-URL: Homepage, https://landing.ai
42
+ Project-URL: documentation, https://github.com/landing-ai/vision-agent
43
+ Project-URL: repository, https://github.com/landing-ai/vision-agent
44
+ Description-Content-Type: text/markdown
45
+
46
+ <div align="center">
47
+ <picture>
48
+ <source media="(prefers-color-scheme: dark)" srcset="https://github.com/landing-ai/vision-agent/blob/main/assets/logo_light.svg?raw=true">
49
+ <source media="(prefers-color-scheme: light)" srcset="https://github.com/landing-ai/vision-agent/blob/main/assets/logo_dark.svg?raw=true">
50
+ <img alt="VisionAgent" height="200px" src="https://github.com/landing-ai/vision-agent/blob/main/assets/logo_light.svg?raw=true">
51
+ </picture>
52
+
53
+ [![](https://dcbadge.vercel.app/api/server/wPdN8RCYew?compact=true&style=flat)](https://discord.gg/wPdN8RCYew)
54
+ ![ci_status](https://github.com/landing-ai/vision-agent/actions/workflows/ci_cd.yml/badge.svg)
55
+ [![PyPI version](https://badge.fury.io/py/vision-agent.svg)](https://badge.fury.io/py/vision-agent)
56
+ ![version](https://img.shields.io/pypi/pyversions/vision-agent)
57
+ </div>
58
+
59
+ ## VisionAgent
60
+ VisionAgent is a library that helps you utilize agent frameworks to generate code to
61
+ solve your vision task. Check out our discord for updates and roadmaps! The fastest
62
+ way to test out VisionAgent is to use our web application which you can find [here](https://va.landing.ai/).
63
+
64
+ ## Installation
65
+ ```bash
66
+ pip install vision-agent
67
+ ```
68
+
69
+ ```bash
70
+ export ANTHROPIC_API_KEY="your-api-key"
71
+ export OPENAI_API_KEY="your-api-key"
72
+ ```
73
+
74
+ > **_NOTE:_** We found using both Anthropic Claude-3.5 and OpenAI o1 to be provide the best performance for VisionAgent. If you want to use a different LLM provider or only one, see 'Using Other LLM Providers' below.
75
+
76
+ ## Documentation
77
+
78
+ [VisionAgent Library Docs](https://landing-ai.github.io/vision-agent/)
79
+
80
+ ## Examples
81
+ ### Counting cans in an image
82
+ You can run VisionAgent in a local Jupyter Notebook [Counting cans in an image](https://github.com/landing-ai/vision-agent/blob/main/examples/notebooks/counting_cans.ipynb)
83
+
84
+ ### Generating code
85
+ You can use VisionAgent to generate code to count the number of people in an image:
86
+ ```python
87
+ from vision_agent.agent import VisionAgentCoderV2
88
+ from vision_agent.agent.types import AgentMessage
89
+
90
+ agent = VisionAgentCoderV2(verbose=True)
91
+ code_context = agent.generate_code(
92
+ [
93
+ AgentMessage(
94
+ role="user",
95
+ content="Count the number of people in this image",
96
+ media=["people.png"]
97
+ )
98
+ ]
99
+ )
100
+
101
+ with open("generated_code.py", "w") as f:
102
+ f.write(code_context.code + "\n" + code_context.test)
103
+ ```
104
+
105
+ ### Using the tools directly
106
+ VisionAgent produces code that utilizes our tools. You can also use the tools directly.
107
+ For example if you wanted to detect people in an image and visualize the results:
108
+ ```python
109
+ import vision_agent.tools as T
110
+ import matplotlib.pyplot as plt
111
+
112
+ image = T.load_image("people.png")
113
+ dets = T.countgd_object_detection("person", image)
114
+ # visualize the countgd bounding boxes on the image
115
+ viz = T.overlay_bounding_boxes(image, dets)
116
+
117
+ # save the visualization to a file
118
+ T.save_image(viz, "people_detected.png")
119
+
120
+ # display the visualization
121
+ plt.imshow(viz)
122
+ plt.show()
123
+ ```
124
+
125
+ You can also use the tools for running on video files:
126
+ ```python
127
+ import vision_agent.tools as T
128
+
129
+ frames_and_ts = T.extract_frames_and_timestamps("people.mp4")
130
+ # extract the frames from the frames_and_ts list
131
+ frames = [f["frame"] for f in frames_and_ts]
132
+
133
+ # run the countgd tracking on the frames
134
+ tracks = T.countgd_sam2_video_tracking("person", frames)
135
+ # visualize the countgd tracking results on the frames and save the video
136
+ viz = T.overlay_segmentation_masks(frames, tracks)
137
+ T.save_video(viz, "people_detected.mp4")
138
+ ```
139
+
140
+ ## Using Other LLM Providers
141
+ You can use other LLM providers by changing `config.py` in the `vision_agent/configs`
142
+ directory. For example to change to Anthropic simply just run:
143
+ ```bash
144
+ cp vision_agent/configs/anthropic_config.py vision_agent/configs/config.py
145
+ ```
146
+
147
+ > **_NOTE:_** VisionAgent moves fast and we are constantly updating and changing the library. If you have any questions or need help, please reach out to us on our discord channel.
148
+
@@ -0,0 +1,52 @@
1
+ vision_agent/.sim_tools/df.csv,sha256=XdcgkjC7CjF_CoJnXmFkYOPUBwHemiwsauh62b1eh1M,42472
2
+ vision_agent/.sim_tools/embs.npy,sha256=YJe8EcKVNmeX_75CS2T1sbY-sUS_1HQAMT-34zc18a0,254080
3
+ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
4
+ vision_agent/agent/README.md,sha256=Q4w7FWw38qaWosQYAZ7NqWx8Q5XzuWrlv7nLhjUd1-8,5527
5
+ vision_agent/agent/__init__.py,sha256=M8CffavdIh8Zh-skznLHIaQkYGCGK7vk4dq1FaVkbs4,617
6
+ vision_agent/agent/agent.py,sha256=_1tHWAs7Jm5tqDzEcPfCRvJV3uRRveyh4n9_9pd6I1w,1565
7
+ vision_agent/agent/agent_utils.py,sha256=IXxN9XruaeNTreUrdztb3kWJhimpsdH6hjv6xT4jg1Q,14062
8
+ vision_agent/agent/types.py,sha256=dIdxATH_PP76pD5Wfo0oofWt6iPQh0vpf48QbEQSzhs,2472
9
+ vision_agent/agent/vision_agent.py,sha256=fH9NOLk7twL1fPr9vLSqkaYhah-gfDWfTOVF2FfMyzI,23461
10
+ vision_agent/agent/vision_agent_coder.py,sha256=flUxOibyGZK19BCSK5mhaD3HjCxHw6c6FtKom6N2q1E,27359
11
+ vision_agent/agent/vision_agent_coder_prompts.py,sha256=_kkPLezUVnBXieNPlxMQab_6J6P7F-aa6ItF5NhZZsM,12281
12
+ vision_agent/agent/vision_agent_coder_prompts_v2.py,sha256=idmSMfxebPULqqvllz3gqRzGDchEvS5dkGngvBs4PGo,4872
13
+ vision_agent/agent/vision_agent_coder_v2.py,sha256=ZR2PQoMqNM6yK3vn_0rrCJf_EplRKye7t7bVjyl51ls,16476
14
+ vision_agent/agent/vision_agent_planner.py,sha256=fFzjNkZBKkh8Y_oS06ATI4qz31xmIJvixb_tV1kX8KA,18590
15
+ vision_agent/agent/vision_agent_planner_prompts.py,sha256=rYRdJthc-sQN57VgCBKrF09Sd73BSxcBdjNe6C4WNZ8,6837
16
+ vision_agent/agent/vision_agent_planner_prompts_v2.py,sha256=5xTx93lNpoyT4eAD9jicwDyDAkuW7eQqicr17zCjrQw,33337
17
+ vision_agent/agent/vision_agent_planner_v2.py,sha256=7hBQdg9y4oCLDiQ54Kh12_uIMywedKKNPWiKPRA01cQ,20568
18
+ vision_agent/agent/vision_agent_prompts.py,sha256=KaJwYPUP7_GvQsCPPs6Fdawmi3AQWmWajBUuzj7gTG4,13812
19
+ vision_agent/agent/vision_agent_prompts_v2.py,sha256=AW_bW1boGiCLyLFd3h4GQenfDACttQagDHwpBkSW4Xo,2518
20
+ vision_agent/agent/vision_agent_v2.py,sha256=335VT0hk0jkB14y4W3cJo5ueEu1wY_jjN-R_m2xaQ30,10752
21
+ vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
23
+ vision_agent/clients/landing_public_api.py,sha256=lU2ev6E8NICmR8DMUljuGcVFy5VNJQ4WQkWC8WnnJEc,1503
24
+ vision_agent/configs/__init__.py,sha256=Iu75-w9_nlPmnB_qKA7nYaaaHf7xtTrDmK8N4v2WV34,27
25
+ vision_agent/configs/anthropic_config.py,sha256=T1UuESgiY8913A6wA42P7-cg8FTk9-LkJpyywo7OnIQ,4298
26
+ vision_agent/configs/anthropic_openai_config.py,sha256=YQjFxmlxppn5L55dJjK_v1myBJQ_V5J4q25pmUtwTOU,4310
27
+ vision_agent/configs/config.py,sha256=YQjFxmlxppn5L55dJjK_v1myBJQ_V5J4q25pmUtwTOU,4310
28
+ vision_agent/configs/openai_config.py,sha256=v2_AIY89d7LKWn4uqA2G047U2IdmnqZrGH2Iww9gRIw,4498
29
+ vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
+ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
31
+ vision_agent/lmm/__init__.py,sha256=xk2Rn8Zgpy2xwYaOGHzy4tXxnxo2aj6SkpNjeJ8yxcY,111
32
+ vision_agent/lmm/lmm.py,sha256=arwfYPWme_RxCxSpEQ0ZkpHO22GFPCwVeoSvXqLPOAk,19288
33
+ vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
34
+ vision_agent/tools/__init__.py,sha256=zopUrANPx7p0NGy6BxmEaYhDrj8DX8w7BLfgmCbz-mU,2897
35
+ vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
36
+ vision_agent/tools/planner_tools.py,sha256=Mk3N-I-Qs4ezeyv8EL9BxdxmJG5oWiH5bFkvgwJKB0s,14660
37
+ vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
38
+ vision_agent/tools/tool_utils.py,sha256=xJRWF96Ge9RvhhVHrOtifjUYoc4HIJ2y7c2VOQ2Lp8s,10152
39
+ vision_agent/tools/tools.py,sha256=3B3xWFVA3qfAO6ySSQ2yUPUAiTrgJomL48hLO_VP6RQ,106015
40
+ vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
41
+ vision_agent/utils/__init__.py,sha256=QKk4zVjMwGxQI0MQ-aZZA50N-qItxRY4EB9CwQkZ2HY,185
42
+ vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
43
+ vision_agent/utils/execute.py,sha256=vOEP5Ys7S2lc0_7pOJbgk7OaWi85hrCNu9_8Bo3zk6I,29356
44
+ vision_agent/utils/image_utils.py,sha256=z_ONgcza125B10NkoGwPOzXnL470bpTWZbkB16NeeH0,12188
45
+ vision_agent/utils/sim.py,sha256=DYya76dYVtifFyXilMLxBzGgyfyeqhEwU4RJ4894lCI,9796
46
+ vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
47
+ vision_agent/utils/video.py,sha256=e1VwKhXzzlC5LcFMyrcQYrPnpnX4wxDpnQ-76sB4jgM,6001
48
+ vision_agent/utils/video_tracking.py,sha256=wK5dOutqV2t2aeaxedstCBa7xy-NNQE0-QZqKu1QUds,9498
49
+ vision_agent-0.2.231.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
50
+ vision_agent-0.2.231.dist-info/METADATA,sha256=N8t9F4hZ4bgyZeDhrVepMZzO5dtRmzRB8VI6fq1fFAA,5760
51
+ vision_agent-0.2.231.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
52
+ vision_agent-0.2.231.dist-info/RECORD,,