vision-agent 0.2.229__py3-none-any.whl → 0.2.231__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/.sim_tools/df.csv +10 -8
- vision_agent/agent/agent_utils.py +10 -9
- vision_agent/agent/types.py +1 -0
- vision_agent/agent/vision_agent.py +3 -4
- vision_agent/agent/vision_agent_coder_prompts.py +6 -6
- vision_agent/agent/vision_agent_coder_v2.py +41 -26
- vision_agent/agent/vision_agent_planner_prompts.py +6 -6
- vision_agent/agent/vision_agent_planner_prompts_v2.py +16 -50
- vision_agent/agent/vision_agent_planner_v2.py +11 -12
- vision_agent/agent/vision_agent_prompts.py +11 -11
- vision_agent/agent/vision_agent_prompts_v2.py +18 -3
- vision_agent/agent/vision_agent_v2.py +29 -30
- vision_agent/configs/__init__.py +1 -0
- vision_agent/configs/anthropic_config.py +150 -0
- vision_agent/configs/anthropic_openai_config.py +150 -0
- vision_agent/configs/config.py +150 -0
- vision_agent/configs/openai_config.py +160 -0
- vision_agent/lmm/__init__.py +1 -1
- vision_agent/lmm/lmm.py +63 -9
- vision_agent/tools/__init__.py +4 -4
- vision_agent/tools/planner_tools.py +74 -48
- vision_agent/tools/tool_utils.py +3 -0
- vision_agent/tools/tools.py +49 -31
- vision_agent/utils/sim.py +33 -12
- vision_agent-0.2.231.dist-info/METADATA +148 -0
- vision_agent-0.2.231.dist-info/RECORD +52 -0
- vision_agent-0.2.229.dist-info/METADATA +0 -562
- vision_agent-0.2.229.dist-info/RECORD +0 -47
- {vision_agent-0.2.229.dist-info → vision_agent-0.2.231.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.229.dist-info → vision_agent-0.2.231.dist-info}/WHEEL +0 -0
vision_agent/tools/tools.py
CHANGED
@@ -222,7 +222,7 @@ def sam2(
|
|
222
222
|
ret = _sam2(image, detections, image_size)
|
223
223
|
_display_tool_trace(
|
224
224
|
sam2.__name__,
|
225
|
-
{},
|
225
|
+
{"detections": detections},
|
226
226
|
ret["display_data"],
|
227
227
|
ret["files"],
|
228
228
|
)
|
@@ -314,18 +314,29 @@ def od_sam2_video_tracking(
|
|
314
314
|
|
315
315
|
# Process each segment and collect detections
|
316
316
|
detections_per_segment: List[Any] = []
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
317
|
+
with ThreadPoolExecutor() as executor:
|
318
|
+
futures = {
|
319
|
+
executor.submit(
|
320
|
+
process_segment,
|
321
|
+
segment_frames=segment,
|
322
|
+
od_model=od_model,
|
323
|
+
prompt=prompt,
|
324
|
+
fine_tune_id=fine_tune_id,
|
325
|
+
chunk_length=chunk_length,
|
326
|
+
image_size=image_size,
|
327
|
+
segment_index=segment_index,
|
328
|
+
object_detection_tool=_apply_object_detection,
|
329
|
+
): segment_index
|
330
|
+
for segment_index, segment in enumerate(segments)
|
331
|
+
}
|
332
|
+
|
333
|
+
for future in as_completed(futures):
|
334
|
+
segment_index = futures[future]
|
335
|
+
detections_per_segment.append((segment_index, future.result()))
|
336
|
+
|
337
|
+
detections_per_segment = [
|
338
|
+
x[1] for x in sorted(detections_per_segment, key=lambda x: x[0])
|
339
|
+
]
|
329
340
|
|
330
341
|
merged_detections = merge_segments(detections_per_segment)
|
331
342
|
post_processed = post_process(merged_detections, image_size)
|
@@ -390,7 +401,7 @@ def _owlv2_object_detection(
|
|
390
401
|
{
|
391
402
|
"label": bbox["label"],
|
392
403
|
"bbox": normalize_bbox(bbox["bounding_box"], image_size),
|
393
|
-
"score": bbox["score"],
|
404
|
+
"score": round(bbox["score"], 2),
|
394
405
|
}
|
395
406
|
for bbox in bboxes
|
396
407
|
]
|
@@ -398,7 +409,7 @@ def _owlv2_object_detection(
|
|
398
409
|
{
|
399
410
|
"label": bbox["label"],
|
400
411
|
"bbox": bbox["bounding_box"],
|
401
|
-
"score": bbox["score"],
|
412
|
+
"score": round(bbox["score"], 2),
|
402
413
|
}
|
403
414
|
for bbox in bboxes
|
404
415
|
]
|
@@ -582,7 +593,7 @@ def owlv2_sam2_video_tracking(
|
|
582
593
|
)
|
583
594
|
_display_tool_trace(
|
584
595
|
owlv2_sam2_video_tracking.__name__,
|
585
|
-
{},
|
596
|
+
{"prompt": prompt, "chunk_length": chunk_length},
|
586
597
|
ret["display_data"],
|
587
598
|
ret["files"],
|
588
599
|
)
|
@@ -595,14 +606,14 @@ def owlv2_sam2_video_tracking(
|
|
595
606
|
def florence2_object_detection(
|
596
607
|
prompt: str, image: np.ndarray, fine_tune_id: Optional[str] = None
|
597
608
|
) -> List[Dict[str, Any]]:
|
598
|
-
"""'florence2_object_detection' is a tool that can detect multiple
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
confidence scores of 1.0.
|
609
|
+
"""'florence2_object_detection' is a tool that can detect multiple objects given a
|
610
|
+
text prompt which can be object names or caption. You can optionally separate the
|
611
|
+
object names in the text with commas. It returns a list of bounding boxes with
|
612
|
+
normalized coordinates, label names and associated confidence scores of 1.0.
|
603
613
|
|
604
614
|
Parameters:
|
605
|
-
prompt (str): The prompt to ground to the image.
|
615
|
+
prompt (str): The prompt to ground to the image. Use exclusive categories that
|
616
|
+
do not overlap such as 'person, car' and NOT 'person, athlete'.
|
606
617
|
image (np.ndarray): The image to used to detect objects
|
607
618
|
fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
|
608
619
|
fine-tuned model ID here to use it.
|
@@ -681,7 +692,8 @@ def florence2_sam2_instance_segmentation(
|
|
681
692
|
1.0.
|
682
693
|
|
683
694
|
Parameters:
|
684
|
-
prompt (str): The prompt to ground to the image.
|
695
|
+
prompt (str): The prompt to ground to the image. Use exclusive categories that
|
696
|
+
do not overlap such as 'person, car' and NOT 'person, athlete'.
|
685
697
|
image (np.ndarray): The image to ground the prompt to.
|
686
698
|
fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
|
687
699
|
fine-tuned model ID here to use it.
|
@@ -769,7 +781,8 @@ def florence2_sam2_video_tracking(
|
|
769
781
|
is useful for tracking and counting without duplicating counts.
|
770
782
|
|
771
783
|
Parameters:
|
772
|
-
prompt (str): The prompt to ground to the
|
784
|
+
prompt (str): The prompt to ground to the image. Use exclusive categories that
|
785
|
+
do not overlap such as 'person, car' and NOT 'person, athlete'.
|
773
786
|
frames (List[np.ndarray]): The list of frames to ground the prompt to.
|
774
787
|
chunk_length (Optional[int]): The number of frames to re-run florence2 to find
|
775
788
|
new objects.
|
@@ -1679,7 +1692,7 @@ def video_temporal_localization(
|
|
1679
1692
|
prompt: str,
|
1680
1693
|
frames: List[np.ndarray],
|
1681
1694
|
model: str = "qwen2vl",
|
1682
|
-
chunk_length_frames:
|
1695
|
+
chunk_length_frames: int = 2,
|
1683
1696
|
) -> List[float]:
|
1684
1697
|
"""'video_temporal_localization' will run qwen2vl on each chunk_length_frames
|
1685
1698
|
value selected for the video. It can detect multiple objects independently per
|
@@ -1693,7 +1706,7 @@ def video_temporal_localization(
|
|
1693
1706
|
frames (List[np.ndarray]): The reference frames used for the question
|
1694
1707
|
model (str): The model to use for the inference. Valid values are
|
1695
1708
|
'qwen2vl', 'gpt4o'.
|
1696
|
-
chunk_length_frames (
|
1709
|
+
chunk_length_frames (int): length of each chunk in frames
|
1697
1710
|
|
1698
1711
|
Returns:
|
1699
1712
|
List[float]: A list of floats with a value of 1.0 if the objects to be found
|
@@ -1712,8 +1725,7 @@ def video_temporal_localization(
|
|
1712
1725
|
"model": model,
|
1713
1726
|
"function_name": "video_temporal_localization",
|
1714
1727
|
}
|
1715
|
-
|
1716
|
-
payload["chunk_length_frames"] = chunk_length_frames
|
1728
|
+
payload["chunk_length_frames"] = chunk_length_frames
|
1717
1729
|
|
1718
1730
|
data = send_inference_request(
|
1719
1731
|
payload, "video-temporal-localization", files=files, v2=True
|
@@ -1724,7 +1736,13 @@ def video_temporal_localization(
|
|
1724
1736
|
data,
|
1725
1737
|
files,
|
1726
1738
|
)
|
1727
|
-
|
1739
|
+
chunked_data = [cast(float, value) for value in data]
|
1740
|
+
|
1741
|
+
full_data = []
|
1742
|
+
for value in chunked_data:
|
1743
|
+
full_data.extend([value] * chunk_length_frames)
|
1744
|
+
|
1745
|
+
return full_data[: len(frames)]
|
1728
1746
|
|
1729
1747
|
|
1730
1748
|
def vit_image_classification(image: np.ndarray) -> Dict[str, Any]:
|
@@ -2148,7 +2166,7 @@ def siglip_classification(image: np.ndarray, labels: List[str]) -> Dict[str, Any
|
|
2148
2166
|
return response
|
2149
2167
|
|
2150
2168
|
|
2151
|
-
#
|
2169
|
+
# Agentic OD Tools
|
2152
2170
|
|
2153
2171
|
|
2154
2172
|
def _agentic_object_detection(
|
@@ -2644,7 +2662,7 @@ def save_image(image: np.ndarray, file_path: str) -> None:
|
|
2644
2662
|
|
2645
2663
|
|
2646
2664
|
def save_video(
|
2647
|
-
frames: List[np.ndarray], output_video_path: Optional[str] = None, fps: float =
|
2665
|
+
frames: List[np.ndarray], output_video_path: Optional[str] = None, fps: float = 5
|
2648
2666
|
) -> str:
|
2649
2667
|
"""'save_video' is a utility function that saves a list of frames as a mp4 video file on disk.
|
2650
2668
|
|
vision_agent/utils/sim.py
CHANGED
@@ -98,10 +98,12 @@ class Sim:
|
|
98
98
|
raise ValueError("key is required if no column 'embs' is present.")
|
99
99
|
|
100
100
|
if sim_key is not None:
|
101
|
-
self.df
|
102
|
-
|
103
|
-
|
104
|
-
|
101
|
+
self.df = self.df.assign(
|
102
|
+
embs=self.df[sim_key].apply(
|
103
|
+
lambda x: get_embedding(
|
104
|
+
self.emb_call,
|
105
|
+
x,
|
106
|
+
)
|
105
107
|
)
|
106
108
|
)
|
107
109
|
|
@@ -141,7 +143,9 @@ class Sim:
|
|
141
143
|
|
142
144
|
df_load = pd.read_csv(load_dir / "df.csv")
|
143
145
|
if platform.system() == "Windows":
|
144
|
-
df_load
|
146
|
+
df_load = df_load.assign(
|
147
|
+
doc=df_load.doc.apply(lambda x: x.replace("\r", ""))
|
148
|
+
)
|
145
149
|
return df.equals(df_load) # type: ignore
|
146
150
|
|
147
151
|
@lru_cache(maxsize=256)
|
@@ -166,7 +170,9 @@ class Sim:
|
|
166
170
|
self.emb_call,
|
167
171
|
query,
|
168
172
|
)
|
169
|
-
self.df
|
173
|
+
self.df = self.df.assign(
|
174
|
+
sim=self.df.embs.apply(lambda x: 1 - cosine(x, embedding))
|
175
|
+
)
|
170
176
|
res = self.df.sort_values("sim", ascending=False).head(k)
|
171
177
|
if thresh is not None:
|
172
178
|
res = res[res.sim > thresh]
|
@@ -214,8 +220,13 @@ class AzureSim(Sim):
|
|
214
220
|
raise ValueError("key is required if no column 'embs' is present.")
|
215
221
|
|
216
222
|
if sim_key is not None:
|
217
|
-
self.df
|
218
|
-
|
223
|
+
self.df = self.df.assign(
|
224
|
+
embs=self.df[sim_key].apply(
|
225
|
+
lambda x: get_embedding(
|
226
|
+
self.emb_call,
|
227
|
+
x,
|
228
|
+
)
|
229
|
+
)
|
219
230
|
)
|
220
231
|
|
221
232
|
|
@@ -245,8 +256,13 @@ class OllamaSim(Sim):
|
|
245
256
|
raise ValueError("key is required if no column 'embs' is present.")
|
246
257
|
|
247
258
|
if sim_key is not None:
|
248
|
-
self.df
|
249
|
-
|
259
|
+
self.df = self.df.assign(
|
260
|
+
embs=self.df[sim_key].apply(
|
261
|
+
lambda x: get_embedding(
|
262
|
+
self.emb_call,
|
263
|
+
x,
|
264
|
+
)
|
265
|
+
)
|
250
266
|
)
|
251
267
|
|
252
268
|
|
@@ -267,8 +283,13 @@ class StellaSim(Sim):
|
|
267
283
|
raise ValueError("key is required if no column 'embs' is present.")
|
268
284
|
|
269
285
|
if sim_key is not None:
|
270
|
-
self.df
|
271
|
-
|
286
|
+
self.df = self.df.assign(
|
287
|
+
embs=self.df[sim_key].apply(
|
288
|
+
lambda x: get_embedding(
|
289
|
+
self.emb_call,
|
290
|
+
x,
|
291
|
+
)
|
292
|
+
)
|
272
293
|
)
|
273
294
|
|
274
295
|
@staticmethod
|
@@ -0,0 +1,148 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: vision-agent
|
3
|
+
Version: 0.2.231
|
4
|
+
Summary: Toolset for Vision Agent
|
5
|
+
Author: Landing AI
|
6
|
+
Author-email: dev@landing.ai
|
7
|
+
Requires-Python: >=3.9,<4.0
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
12
|
+
Requires-Dist: anthropic (>=0.31.0,<0.32.0)
|
13
|
+
Requires-Dist: av (>=11.0.0,<12.0.0)
|
14
|
+
Requires-Dist: e2b (>=0.17.2a50,<0.18.0)
|
15
|
+
Requires-Dist: e2b-code-interpreter (==0.0.11a37)
|
16
|
+
Requires-Dist: flake8 (>=7.0.0,<8.0.0)
|
17
|
+
Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
|
18
|
+
Requires-Dist: langsmith (>=0.1.58,<0.2.0)
|
19
|
+
Requires-Dist: libcst (>=1.5.0,<2.0.0)
|
20
|
+
Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
|
21
|
+
Requires-Dist: nbclient (>=0.10.0,<0.11.0)
|
22
|
+
Requires-Dist: nbformat (>=5.10.4,<6.0.0)
|
23
|
+
Requires-Dist: numpy (>=1.21.0,<2.0.0)
|
24
|
+
Requires-Dist: openai (>=1.0.0,<2.0.0)
|
25
|
+
Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
|
26
|
+
Requires-Dist: opentelemetry-api (>=1.29.0,<2.0.0)
|
27
|
+
Requires-Dist: pandas (>=2.0.0,<3.0.0)
|
28
|
+
Requires-Dist: pillow (>=10.0.0,<11.0.0)
|
29
|
+
Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
|
30
|
+
Requires-Dist: pydantic (==2.7.4)
|
31
|
+
Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
|
32
|
+
Requires-Dist: pytube (==15.0.0)
|
33
|
+
Requires-Dist: requests (>=2.0.0,<3.0.0)
|
34
|
+
Requires-Dist: rich (>=13.7.1,<14.0.0)
|
35
|
+
Requires-Dist: scikit-learn (>=1.5.2,<2.0.0)
|
36
|
+
Requires-Dist: scipy (>=1.13.0,<1.14.0)
|
37
|
+
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
38
|
+
Requires-Dist: tenacity (>=8.3.0,<9.0.0)
|
39
|
+
Requires-Dist: tqdm (>=4.64.0,<5.0.0)
|
40
|
+
Requires-Dist: typing_extensions (>=4.0.0,<5.0.0)
|
41
|
+
Project-URL: Homepage, https://landing.ai
|
42
|
+
Project-URL: documentation, https://github.com/landing-ai/vision-agent
|
43
|
+
Project-URL: repository, https://github.com/landing-ai/vision-agent
|
44
|
+
Description-Content-Type: text/markdown
|
45
|
+
|
46
|
+
<div align="center">
|
47
|
+
<picture>
|
48
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/landing-ai/vision-agent/blob/main/assets/logo_light.svg?raw=true">
|
49
|
+
<source media="(prefers-color-scheme: light)" srcset="https://github.com/landing-ai/vision-agent/blob/main/assets/logo_dark.svg?raw=true">
|
50
|
+
<img alt="VisionAgent" height="200px" src="https://github.com/landing-ai/vision-agent/blob/main/assets/logo_light.svg?raw=true">
|
51
|
+
</picture>
|
52
|
+
|
53
|
+
[](https://discord.gg/wPdN8RCYew)
|
54
|
+

|
55
|
+
[](https://badge.fury.io/py/vision-agent)
|
56
|
+

|
57
|
+
</div>
|
58
|
+
|
59
|
+
## VisionAgent
|
60
|
+
VisionAgent is a library that helps you utilize agent frameworks to generate code to
|
61
|
+
solve your vision task. Check out our discord for updates and roadmaps! The fastest
|
62
|
+
way to test out VisionAgent is to use our web application which you can find [here](https://va.landing.ai/).
|
63
|
+
|
64
|
+
## Installation
|
65
|
+
```bash
|
66
|
+
pip install vision-agent
|
67
|
+
```
|
68
|
+
|
69
|
+
```bash
|
70
|
+
export ANTHROPIC_API_KEY="your-api-key"
|
71
|
+
export OPENAI_API_KEY="your-api-key"
|
72
|
+
```
|
73
|
+
|
74
|
+
> **_NOTE:_** We found using both Anthropic Claude-3.5 and OpenAI o1 to be provide the best performance for VisionAgent. If you want to use a different LLM provider or only one, see 'Using Other LLM Providers' below.
|
75
|
+
|
76
|
+
## Documentation
|
77
|
+
|
78
|
+
[VisionAgent Library Docs](https://landing-ai.github.io/vision-agent/)
|
79
|
+
|
80
|
+
## Examples
|
81
|
+
### Counting cans in an image
|
82
|
+
You can run VisionAgent in a local Jupyter Notebook [Counting cans in an image](https://github.com/landing-ai/vision-agent/blob/main/examples/notebooks/counting_cans.ipynb)
|
83
|
+
|
84
|
+
### Generating code
|
85
|
+
You can use VisionAgent to generate code to count the number of people in an image:
|
86
|
+
```python
|
87
|
+
from vision_agent.agent import VisionAgentCoderV2
|
88
|
+
from vision_agent.agent.types import AgentMessage
|
89
|
+
|
90
|
+
agent = VisionAgentCoderV2(verbose=True)
|
91
|
+
code_context = agent.generate_code(
|
92
|
+
[
|
93
|
+
AgentMessage(
|
94
|
+
role="user",
|
95
|
+
content="Count the number of people in this image",
|
96
|
+
media=["people.png"]
|
97
|
+
)
|
98
|
+
]
|
99
|
+
)
|
100
|
+
|
101
|
+
with open("generated_code.py", "w") as f:
|
102
|
+
f.write(code_context.code + "\n" + code_context.test)
|
103
|
+
```
|
104
|
+
|
105
|
+
### Using the tools directly
|
106
|
+
VisionAgent produces code that utilizes our tools. You can also use the tools directly.
|
107
|
+
For example if you wanted to detect people in an image and visualize the results:
|
108
|
+
```python
|
109
|
+
import vision_agent.tools as T
|
110
|
+
import matplotlib.pyplot as plt
|
111
|
+
|
112
|
+
image = T.load_image("people.png")
|
113
|
+
dets = T.countgd_object_detection("person", image)
|
114
|
+
# visualize the countgd bounding boxes on the image
|
115
|
+
viz = T.overlay_bounding_boxes(image, dets)
|
116
|
+
|
117
|
+
# save the visualization to a file
|
118
|
+
T.save_image(viz, "people_detected.png")
|
119
|
+
|
120
|
+
# display the visualization
|
121
|
+
plt.imshow(viz)
|
122
|
+
plt.show()
|
123
|
+
```
|
124
|
+
|
125
|
+
You can also use the tools for running on video files:
|
126
|
+
```python
|
127
|
+
import vision_agent.tools as T
|
128
|
+
|
129
|
+
frames_and_ts = T.extract_frames_and_timestamps("people.mp4")
|
130
|
+
# extract the frames from the frames_and_ts list
|
131
|
+
frames = [f["frame"] for f in frames_and_ts]
|
132
|
+
|
133
|
+
# run the countgd tracking on the frames
|
134
|
+
tracks = T.countgd_sam2_video_tracking("person", frames)
|
135
|
+
# visualize the countgd tracking results on the frames and save the video
|
136
|
+
viz = T.overlay_segmentation_masks(frames, tracks)
|
137
|
+
T.save_video(viz, "people_detected.mp4")
|
138
|
+
```
|
139
|
+
|
140
|
+
## Using Other LLM Providers
|
141
|
+
You can use other LLM providers by changing `config.py` in the `vision_agent/configs`
|
142
|
+
directory. For example to change to Anthropic simply just run:
|
143
|
+
```bash
|
144
|
+
cp vision_agent/configs/anthropic_config.py vision_agent/configs/config.py
|
145
|
+
```
|
146
|
+
|
147
|
+
> **_NOTE:_** VisionAgent moves fast and we are constantly updating and changing the library. If you have any questions or need help, please reach out to us on our discord channel.
|
148
|
+
|
@@ -0,0 +1,52 @@
|
|
1
|
+
vision_agent/.sim_tools/df.csv,sha256=XdcgkjC7CjF_CoJnXmFkYOPUBwHemiwsauh62b1eh1M,42472
|
2
|
+
vision_agent/.sim_tools/embs.npy,sha256=YJe8EcKVNmeX_75CS2T1sbY-sUS_1HQAMT-34zc18a0,254080
|
3
|
+
vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
4
|
+
vision_agent/agent/README.md,sha256=Q4w7FWw38qaWosQYAZ7NqWx8Q5XzuWrlv7nLhjUd1-8,5527
|
5
|
+
vision_agent/agent/__init__.py,sha256=M8CffavdIh8Zh-skznLHIaQkYGCGK7vk4dq1FaVkbs4,617
|
6
|
+
vision_agent/agent/agent.py,sha256=_1tHWAs7Jm5tqDzEcPfCRvJV3uRRveyh4n9_9pd6I1w,1565
|
7
|
+
vision_agent/agent/agent_utils.py,sha256=IXxN9XruaeNTreUrdztb3kWJhimpsdH6hjv6xT4jg1Q,14062
|
8
|
+
vision_agent/agent/types.py,sha256=dIdxATH_PP76pD5Wfo0oofWt6iPQh0vpf48QbEQSzhs,2472
|
9
|
+
vision_agent/agent/vision_agent.py,sha256=fH9NOLk7twL1fPr9vLSqkaYhah-gfDWfTOVF2FfMyzI,23461
|
10
|
+
vision_agent/agent/vision_agent_coder.py,sha256=flUxOibyGZK19BCSK5mhaD3HjCxHw6c6FtKom6N2q1E,27359
|
11
|
+
vision_agent/agent/vision_agent_coder_prompts.py,sha256=_kkPLezUVnBXieNPlxMQab_6J6P7F-aa6ItF5NhZZsM,12281
|
12
|
+
vision_agent/agent/vision_agent_coder_prompts_v2.py,sha256=idmSMfxebPULqqvllz3gqRzGDchEvS5dkGngvBs4PGo,4872
|
13
|
+
vision_agent/agent/vision_agent_coder_v2.py,sha256=ZR2PQoMqNM6yK3vn_0rrCJf_EplRKye7t7bVjyl51ls,16476
|
14
|
+
vision_agent/agent/vision_agent_planner.py,sha256=fFzjNkZBKkh8Y_oS06ATI4qz31xmIJvixb_tV1kX8KA,18590
|
15
|
+
vision_agent/agent/vision_agent_planner_prompts.py,sha256=rYRdJthc-sQN57VgCBKrF09Sd73BSxcBdjNe6C4WNZ8,6837
|
16
|
+
vision_agent/agent/vision_agent_planner_prompts_v2.py,sha256=5xTx93lNpoyT4eAD9jicwDyDAkuW7eQqicr17zCjrQw,33337
|
17
|
+
vision_agent/agent/vision_agent_planner_v2.py,sha256=7hBQdg9y4oCLDiQ54Kh12_uIMywedKKNPWiKPRA01cQ,20568
|
18
|
+
vision_agent/agent/vision_agent_prompts.py,sha256=KaJwYPUP7_GvQsCPPs6Fdawmi3AQWmWajBUuzj7gTG4,13812
|
19
|
+
vision_agent/agent/vision_agent_prompts_v2.py,sha256=AW_bW1boGiCLyLFd3h4GQenfDACttQagDHwpBkSW4Xo,2518
|
20
|
+
vision_agent/agent/vision_agent_v2.py,sha256=335VT0hk0jkB14y4W3cJo5ueEu1wY_jjN-R_m2xaQ30,10752
|
21
|
+
vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
+
vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
|
23
|
+
vision_agent/clients/landing_public_api.py,sha256=lU2ev6E8NICmR8DMUljuGcVFy5VNJQ4WQkWC8WnnJEc,1503
|
24
|
+
vision_agent/configs/__init__.py,sha256=Iu75-w9_nlPmnB_qKA7nYaaaHf7xtTrDmK8N4v2WV34,27
|
25
|
+
vision_agent/configs/anthropic_config.py,sha256=T1UuESgiY8913A6wA42P7-cg8FTk9-LkJpyywo7OnIQ,4298
|
26
|
+
vision_agent/configs/anthropic_openai_config.py,sha256=YQjFxmlxppn5L55dJjK_v1myBJQ_V5J4q25pmUtwTOU,4310
|
27
|
+
vision_agent/configs/config.py,sha256=YQjFxmlxppn5L55dJjK_v1myBJQ_V5J4q25pmUtwTOU,4310
|
28
|
+
vision_agent/configs/openai_config.py,sha256=v2_AIY89d7LKWn4uqA2G047U2IdmnqZrGH2Iww9gRIw,4498
|
29
|
+
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
|
+
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
31
|
+
vision_agent/lmm/__init__.py,sha256=xk2Rn8Zgpy2xwYaOGHzy4tXxnxo2aj6SkpNjeJ8yxcY,111
|
32
|
+
vision_agent/lmm/lmm.py,sha256=arwfYPWme_RxCxSpEQ0ZkpHO22GFPCwVeoSvXqLPOAk,19288
|
33
|
+
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
34
|
+
vision_agent/tools/__init__.py,sha256=zopUrANPx7p0NGy6BxmEaYhDrj8DX8w7BLfgmCbz-mU,2897
|
35
|
+
vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
|
36
|
+
vision_agent/tools/planner_tools.py,sha256=Mk3N-I-Qs4ezeyv8EL9BxdxmJG5oWiH5bFkvgwJKB0s,14660
|
37
|
+
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
38
|
+
vision_agent/tools/tool_utils.py,sha256=xJRWF96Ge9RvhhVHrOtifjUYoc4HIJ2y7c2VOQ2Lp8s,10152
|
39
|
+
vision_agent/tools/tools.py,sha256=3B3xWFVA3qfAO6ySSQ2yUPUAiTrgJomL48hLO_VP6RQ,106015
|
40
|
+
vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
41
|
+
vision_agent/utils/__init__.py,sha256=QKk4zVjMwGxQI0MQ-aZZA50N-qItxRY4EB9CwQkZ2HY,185
|
42
|
+
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
43
|
+
vision_agent/utils/execute.py,sha256=vOEP5Ys7S2lc0_7pOJbgk7OaWi85hrCNu9_8Bo3zk6I,29356
|
44
|
+
vision_agent/utils/image_utils.py,sha256=z_ONgcza125B10NkoGwPOzXnL470bpTWZbkB16NeeH0,12188
|
45
|
+
vision_agent/utils/sim.py,sha256=DYya76dYVtifFyXilMLxBzGgyfyeqhEwU4RJ4894lCI,9796
|
46
|
+
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
47
|
+
vision_agent/utils/video.py,sha256=e1VwKhXzzlC5LcFMyrcQYrPnpnX4wxDpnQ-76sB4jgM,6001
|
48
|
+
vision_agent/utils/video_tracking.py,sha256=wK5dOutqV2t2aeaxedstCBa7xy-NNQE0-QZqKu1QUds,9498
|
49
|
+
vision_agent-0.2.231.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
50
|
+
vision_agent-0.2.231.dist-info/METADATA,sha256=N8t9F4hZ4bgyZeDhrVepMZzO5dtRmzRB8VI6fq1fFAA,5760
|
51
|
+
vision_agent-0.2.231.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
52
|
+
vision_agent-0.2.231.dist-info/RECORD,,
|