vision-agent 0.2.161__py3-none-any.whl → 0.2.163__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,199 @@
1
+ USER_REQ = """
2
+ ## User Request
3
+ {user_request}
4
+ """
5
+
6
+ PLAN = """
7
+ **Context**:
8
+ {context}
9
+
10
+ **Tools Available**:
11
+ {tool_desc}
12
+
13
+ **Previous Feedback**:
14
+ {feedback}
15
+
16
+ **Instructions**:
17
+ 1. Based on the context and tools you have available, create a plan of subtasks to achieve the user request.
18
+ 2. For each subtask, be sure to include the tool(s) you want to use to accomplish that subtask.
19
+ 3. Output three different plans each utilize a different strategy or set of tools ordering them from most likely to least likely to succeed.
20
+
21
+ Output a list of jsons in the following format:
22
+
23
+ ```json
24
+ {{
25
+ "plan1":
26
+ {{
27
+ "thoughts": str # your thought process for choosing this plan
28
+ "instructions": [
29
+ str # what you should do in this task associated with a tool
30
+ ]
31
+ }},
32
+ "plan2": ...,
33
+ "plan3": ...
34
+ }}
35
+ ```
36
+ """
37
+
38
+ TEST_PLANS = """
39
+ **Role**: You are a software programmer responsible for testing different tools.
40
+
41
+ **Task**: Your responsibility is to take a set of several plans and test the different tools for each plan.
42
+
43
+ **Documentation**:
44
+ This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools import *`.
45
+
46
+ {docstring}
47
+
48
+ **Plans**:
49
+ {plans}
50
+
51
+ **Previous Attempts**:
52
+ {previous_attempts}
53
+
54
+ **Examples**:
55
+ --- EXAMPLE1 ---
56
+ plan1:
57
+ - Load the image from the provided file path 'image.jpg'.
58
+ - Use the 'owl_v2_image' tool with the prompt 'person' to detect and count the number of people in the image.
59
+ plan2:
60
+ - Load the image from the provided file path 'image.jpg'.
61
+ - Use the 'florence2_sam2_image' tool with the prompt 'person' to detect and count the number of people in the image.
62
+ - Count the number of detected objects labeled as 'person'.
63
+ plan3:
64
+ - Load the image from the provided file path 'image.jpg'.
65
+ - Use the 'countgd_counting' tool to count the dominant foreground object, which in this case is people.
66
+
67
+ ```python
68
+ from vision_agent.tools import load_image, owl_v2_image, florence2_sam2_image, countgd_counting
69
+ image = load_image("image.jpg")
70
+ owl_v2_out = owl_v2_image("person", image)
71
+
72
+ f2s2_out = florence2_sam2_image("person", image)
73
+ # strip out the masks from the output becuase they don't provide useful information when printed
74
+ f2s2_out = [{{k: v for k, v in o.items() if k != "mask"}} for o in f2s2_out]
75
+
76
+ cgd_out = countgd_counting(image)
77
+
78
+ final_out = {{"owl_v2_image": owl_v2_out, "florence2_sam2_image": f2s2, "countgd_counting": cgd_out}}
79
+ print(final_out)
80
+ --- END EXAMPLE1 ---
81
+
82
+ --- EXAMPLE2 ---
83
+ plan1:
84
+ - Extract frames from 'video.mp4' at 10 FPS using the 'extract_frames_and_timestamps' tool.
85
+ - Use the 'owl_v2_video' tool with the prompt 'person' to detect where the people are in the video.
86
+ plan2:
87
+ - Extract frames from 'video.mp4' at 10 FPS using the 'extract_frames_and_timestamps' tool.
88
+ - Use the 'florence2_phrase_grounding' tool with the prompt 'person' to detect where the people are in the video.
89
+ plan3:
90
+ - Extract frames from 'video.mp4' at 10 FPS using the 'extract_frames_and_timestamps' tool.
91
+ - Use the 'florence2_sam2_video_tracking' tool with the prompt 'person' to detect where the people are in the video.
92
+
93
+
94
+ ```python
95
+ import numpy as np
96
+ from vision_agent.tools import extract_frames_and_timestamps, owl_v2_video, florence2_phrase_grounding, florence2_sam2_video_tracking
97
+
98
+ # sample at 1 FPS and use the first 10 frames to reduce processing time
99
+ frames = extract_frames_and_timestamps("video.mp4", 1)
100
+ frames = [f["frame"] for f in frames][:10]
101
+
102
+ # strip arrays from the output to make it easier to read
103
+ def remove_arrays(o):
104
+ if isinstance(o, list):
105
+ return [remove_arrays(e) for e in o]
106
+ elif isinstance(o, dict):
107
+ return {{k: remove_arrays(v) for k, v in o.items()}}
108
+ elif isinstance(o, np.ndarray):
109
+ return "array: " + str(o.shape)
110
+ else:
111
+ return o
112
+
113
+ # return the counts of each label per frame to help determine the stability of the model results
114
+ def get_counts(preds):
115
+ counts = {{}}
116
+ for i, pred_frame in enumerate(preds):
117
+ counts_i = {{}}
118
+ for pred in pred_frame:
119
+ label = pred["label"].split(":")[1] if ":" in pred["label"] else pred["label"]
120
+ counts_i[label] = counts_i.get(label, 0) + 1
121
+ counts[f"frame_{{i}}"] = counts_i
122
+ return counts
123
+
124
+
125
+ # plan1
126
+ owl_v2_out = owl_v2_video("person", frames)
127
+ owl_v2_counts = get_counts(owl_v2_out)
128
+
129
+ # plan2
130
+ florence2_out = [florence2_phrase_grounding("person", f) for f in frames]
131
+ florence2_counts = get_counts(florence2_out)
132
+
133
+ # plan3
134
+ f2s2_tracking_out = florence2_sam2_video_tracking("person", frames)
135
+ remove_arrays(f2s2_tracking_out)
136
+ f2s2_counts = get_counts(f2s2_tracking_out)
137
+
138
+ final_out = {{
139
+ "owl_v2_video": owl_v2_out,
140
+ "florence2_phrase_grounding": florence2_out,
141
+ "florence2_sam2_video_tracking": f2s2_out,
142
+ }}
143
+
144
+ counts = {{
145
+ "owl_v2_video": owl_v2_counts,
146
+ "florence2_phrase_grounding": florence2_counts,
147
+ "florence2_sam2_video_tracking": f2s2_counts,
148
+ }}
149
+
150
+ print(final_out)
151
+ print(labels_and_scores)
152
+ print(counts)
153
+ ```
154
+ --- END EXAMPLE2 ---
155
+
156
+ **Instructions**:
157
+ 1. Write a program to load the media and call each tool and print it's output along with other relevant information.
158
+ 2. Create a dictionary where the keys are the tool name and the values are the tool outputs. Remove numpy arrays from the printed dictionary.
159
+ 3. Your test case MUST run only on the given images which are {media}
160
+ 4. Print this final dictionary.
161
+ 5. For video input, sample at 1 FPS and use the first 10 frames only to reduce processing time.
162
+ """
163
+
164
+ PREVIOUS_FAILED = """
165
+ **Previous Failed Attempts**:
166
+ You previously ran this code:
167
+ ```python
168
+ {code}
169
+ ```
170
+
171
+ But got the following error or no stdout:
172
+ {error}
173
+ """
174
+
175
+ PICK_PLAN = """
176
+ **Role**: You are an advanced AI model that can understand the user request and construct plans to accomplish it.
177
+
178
+ **Task**: Your responsibility is to pick the best plan from the three plans provided.
179
+
180
+ **Context**:
181
+ {context}
182
+
183
+ **Plans**:
184
+ {plans}
185
+
186
+ **Tool Output**:
187
+ {tool_output}
188
+
189
+ **Instructions**:
190
+ 1. Re-read the user request, plans, tool outputs and examine the image.
191
+ 2. Solve the problem yourself given the image and pick the most accurate plan that matches your solution the best.
192
+ 3. Add modifications to improve the plan including: changing a tool, adding thresholds, string matching.
193
+ 3. Output a JSON object with the following format:
194
+ {{
195
+ "predicted_answer": str # the answer you would expect from the best plan
196
+ "thoughts": str # your thought process for choosing the best plan over other plans and any modifications you made
197
+ "best_plan": str # the best plan you have chosen, must be `plan1`, `plan2`, or `plan3`
198
+ }}
199
+ """
@@ -1,6 +1,5 @@
1
1
  from typing import Callable, List, Optional
2
2
 
3
- from .meta_tools import META_TOOL_DOCSTRING, Artifacts
4
3
  from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
5
4
  from .tool_utils import get_tool_descriptions_by_names
6
5
  from .tools import (
@@ -11,9 +11,12 @@ from typing import Any, Dict, List, Optional, Union
11
11
 
12
12
  import numpy as np
13
13
  from IPython.display import display
14
+ from redbaron import RedBaron # type: ignore
14
15
 
15
16
  import vision_agent as va
17
+ from vision_agent.agent.agent_utils import extract_json
16
18
  from vision_agent.clients.landing_public_api import LandingPublicAPI
19
+ from vision_agent.lmm import AnthropicLMM
17
20
  from vision_agent.lmm.types import Message
18
21
  from vision_agent.tools.tool_utils import get_tool_documentation
19
22
  from vision_agent.tools.tools import TOOL_DESCRIPTIONS
@@ -22,8 +25,6 @@ from vision_agent.utils.execute import Execution, MimeType
22
25
  from vision_agent.utils.image_utils import convert_to_b64, numpy_to_bytes
23
26
  from vision_agent.utils.video import frames_to_bytes
24
27
 
25
- # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
26
-
27
28
  CURRENT_FILE = None
28
29
  CURRENT_LINE = 0
29
30
  DEFAULT_WINDOW_SIZE = 100
@@ -152,6 +153,9 @@ class Artifacts:
152
153
  return name in self.artifacts
153
154
 
154
155
 
156
+ # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
157
+
158
+
155
159
  def format_lines(lines: List[str], start_idx: int) -> str:
156
160
  output = ""
157
161
  for i, line in enumerate(lines):
@@ -338,6 +342,85 @@ def edit_code_artifact(
338
342
  return open_code_artifact(artifacts, name, cur_line)
339
343
 
340
344
 
345
+ def generate_vision_plan(
346
+ artifacts: Artifacts,
347
+ name: str,
348
+ chat: str,
349
+ media: List[str],
350
+ test_multi_plan: bool = True,
351
+ custom_tool_names: Optional[List[str]] = None,
352
+ ) -> str:
353
+ """Generates a plan to solve vision based tasks.
354
+
355
+ Parameters:
356
+ artifacts (Artifacts): The artifacts object to save the plan to.
357
+ name (str): The name of the artifact to save the plan context to.
358
+ chat (str): The chat message from the user.
359
+ media (List[str]): The media files to use.
360
+ test_multi_plan (bool): Do not change this parameter.
361
+ custom_tool_names (Optional[List[str]]): Do not change this parameter.
362
+
363
+ Returns:
364
+ str: The generated plan.
365
+
366
+ Examples
367
+ --------
368
+ >>> generate_vision_plan(artifacts, "plan.json", "Can you detect the dogs in this image?", ["image.jpg"])
369
+ [Start Plan Context]
370
+ plan1: This is a plan to detect dogs in an image
371
+ -load image
372
+ -detect dogs
373
+ -return detections
374
+ [End Plan Context]
375
+ """
376
+
377
+ if ZMQ_PORT is not None:
378
+ agent = va.agent.VisionAgentPlanner(
379
+ report_progress_callback=lambda inp: report_progress_callback(
380
+ int(ZMQ_PORT), inp
381
+ )
382
+ )
383
+ else:
384
+ agent = va.agent.VisionAgentPlanner()
385
+
386
+ fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
387
+ response = agent.generate_plan(
388
+ fixed_chat,
389
+ test_multi_plan=test_multi_plan,
390
+ custom_tool_names=custom_tool_names,
391
+ )
392
+ if response.test_results is not None:
393
+ redisplay_results(response.test_results)
394
+ response.test_results = None
395
+ artifacts[name] = response.model_dump_json()
396
+ media_names = extract_json(
397
+ AnthropicLMM()( # type: ignore
398
+ f"""Extract any media file names from this output in the following JSON format:
399
+ {{"media": ["image1.jpg", "image2.jpg"]}}
400
+
401
+ {artifacts[name]}"""
402
+ )
403
+ )
404
+ if "media" in media_names and isinstance(media_names, dict):
405
+ for media in media_names["media"]:
406
+ if isinstance(media, str):
407
+ with open(media, "rb") as f:
408
+ artifacts[media] = f.read()
409
+
410
+ output_str = f"[Start Plan Context, saved at {name}]"
411
+ for plan in response.plans.keys():
412
+ output_str += f"\n{plan}: {response.plans[plan]['thoughts'].strip()}\n" # type: ignore
413
+ output_str += " -" + "\n -".join(
414
+ e.strip() for e in response.plans[plan]["instructions"]
415
+ )
416
+
417
+ output_str += f"\nbest plan: {response.best_plan}\n"
418
+ output_str += "thoughts: " + response.plan_thoughts.strip() + "\n"
419
+ output_str += "[End Plan Context]"
420
+ print(output_str)
421
+ return output_str
422
+
423
+
341
424
  def generate_vision_code(
342
425
  artifacts: Artifacts,
343
426
  name: str,
@@ -368,7 +451,6 @@ def generate_vision_code(
368
451
  dogs = owl_v2("dog", image)
369
452
  return dogs
370
453
  """
371
-
372
454
  if ZMQ_PORT is not None:
373
455
  agent = va.agent.VisionAgentCoder(
374
456
  report_progress_callback=lambda inp: report_progress_callback(
@@ -379,7 +461,7 @@ def generate_vision_code(
379
461
  agent = va.agent.VisionAgentCoder(verbosity=int(VERBOSITY))
380
462
 
381
463
  fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
382
- response = agent.chat_with_workflow(
464
+ response = agent.generate_code(
383
465
  fixed_chat,
384
466
  test_multi_plan=test_multi_plan,
385
467
  custom_tool_names=custom_tool_names,
@@ -411,7 +493,7 @@ def edit_vision_code(
411
493
  name: str,
412
494
  chat_history: List[str],
413
495
  media: List[str],
414
- customized_tool_names: Optional[List[str]] = None,
496
+ custom_tool_names: Optional[List[str]] = None,
415
497
  ) -> str:
416
498
  """Edits python code to solve a vision based task.
417
499
 
@@ -419,7 +501,7 @@ def edit_vision_code(
419
501
  artifacts (Artifacts): The artifacts object to save the code to.
420
502
  name (str): The file path to the code.
421
503
  chat_history (List[str]): The chat history to used to generate the code.
422
- customized_tool_names (Optional[List[str]]): Do not change this parameter.
504
+ custom_tool_names (Optional[List[str]]): Do not change this parameter.
423
505
 
424
506
  Returns:
425
507
  str: The edited code.
@@ -459,10 +541,10 @@ def edit_vision_code(
459
541
  fixed_chat_history.append({"role": "assistant", "content": code})
460
542
  fixed_chat_history.append({"role": "user", "content": chat})
461
543
 
462
- response = agent.chat_with_workflow(
544
+ response = agent.generate_code(
463
545
  fixed_chat_history,
464
546
  test_multi_plan=False,
465
- custom_tool_names=customized_tool_names,
547
+ custom_tool_names=custom_tool_names,
466
548
  )
467
549
  redisplay_results(response["test_result"])
468
550
  code = response["code"]
@@ -625,7 +707,7 @@ def get_diff_with_prompts(name: str, before: str, after: str) -> str:
625
707
  def use_extra_vision_agent_args(
626
708
  code: str,
627
709
  test_multi_plan: bool = True,
628
- customized_tool_names: Optional[List[str]] = None,
710
+ custom_tool_names: Optional[List[str]] = None,
629
711
  ) -> str:
630
712
  """This is for forcing arguments passed by the user to VisionAgent into the
631
713
  VisionAgentCoder call.
@@ -633,36 +715,25 @@ def use_extra_vision_agent_args(
633
715
  Parameters:
634
716
  code (str): The code to edit.
635
717
  test_multi_plan (bool): Do not change this parameter.
636
- customized_tool_names (Optional[List[str]]): Do not change this parameter.
718
+ custom_tool_names (Optional[List[str]]): Do not change this parameter.
637
719
 
638
720
  Returns:
639
721
  str: The edited code.
640
722
  """
641
- generate_pattern = r"generate_vision_code\(\s*([^\)]+)\s*\)"
642
-
643
- def generate_replacer(match: re.Match) -> str:
644
- arg = match.group(1)
645
- out_str = f"generate_vision_code({arg}, test_multi_plan={test_multi_plan}"
646
- if customized_tool_names is not None:
647
- out_str += f", custom_tool_names={customized_tool_names})"
648
- else:
649
- out_str += ")"
650
- return out_str
651
-
652
- edit_pattern = r"edit_vision_code\(\s*([^\)]+)\s*\)"
653
-
654
- def edit_replacer(match: re.Match) -> str:
655
- arg = match.group(1)
656
- out_str = f"edit_vision_code({arg}"
657
- if customized_tool_names is not None:
658
- out_str += f", custom_tool_names={customized_tool_names})"
659
- else:
660
- out_str += ")"
661
- return out_str
662
-
663
- new_code = re.sub(generate_pattern, generate_replacer, code)
664
- new_code = re.sub(edit_pattern, edit_replacer, new_code)
665
- return new_code
723
+ red = RedBaron(code)
724
+ for node in red:
725
+ # seems to always be atomtrailers not call type
726
+ if node.type == "atomtrailers":
727
+ if (
728
+ node.name.value == "generate_vision_code"
729
+ or node.name.value == "edit_vision_code"
730
+ ):
731
+ node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
732
+
733
+ if custom_tool_names is not None:
734
+ node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
735
+ cleaned_code = red.dumps().strip()
736
+ return cleaned_code if isinstance(cleaned_code, str) else code
666
737
 
667
738
 
668
739
  def use_object_detection_fine_tuning(
@@ -748,6 +819,7 @@ META_TOOL_DOCSTRING = get_tool_documentation(
748
819
  open_code_artifact,
749
820
  create_code_artifact,
750
821
  edit_code_artifact,
822
+ generate_vision_plan,
751
823
  generate_vision_code,
752
824
  edit_vision_code,
753
825
  write_media_artifact,
@@ -1923,7 +1923,7 @@ def overlay_bounding_boxes(
1923
1923
  bboxes = bbox_int[i]
1924
1924
  bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
1925
1925
 
1926
- if len(bboxes) > 20:
1926
+ if len(bboxes) > 40:
1927
1927
  pil_image = _plot_counting(pil_image, bboxes, color)
1928
1928
  else:
1929
1929
  width, height = pil_image.size
@@ -2117,7 +2117,7 @@ def _plot_counting(
2117
2117
  colors: Dict[str, Tuple[int, int, int]],
2118
2118
  ) -> Image.Image:
2119
2119
  width, height = image.size
2120
- fontsize = max(10, int(min(width, height) / 80))
2120
+ fontsize = max(12, int(min(width, height) / 40))
2121
2121
  draw = ImageDraw.Draw(image)
2122
2122
  font = ImageFont.truetype(
2123
2123
  str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.161
3
+ Version: 0.2.163
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -27,6 +27,7 @@ Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
27
27
  Requires-Dist: pydantic (==2.7.4)
28
28
  Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
29
29
  Requires-Dist: pytube (==15.0.0)
30
+ Requires-Dist: redbaron (>=0.9.2,<0.10.0)
30
31
  Requires-Dist: requests (>=2.0.0,<3.0.0)
31
32
  Requires-Dist: rich (>=13.7.1,<14.0.0)
32
33
  Requires-Dist: scipy (>=1.13.0,<1.14.0)
@@ -142,7 +143,7 @@ continuing, for example it may want to execute code and look at the output befor
142
143
  letting the user respond.
143
144
 
144
145
  ### Chatting and Artifacts
145
- If you run `chat_with_code` you will also notice an `Artifact` object. `Artifact`'s
146
+ If you run `chat_with_artifacts` you will also notice an `Artifact` object. `Artifact`'s
146
147
  are a way to sync files between local and remote environments. The agent will read and
147
148
  write to the artifact object, which is just a pickle object, when it wants to save or
148
149
  load files.
@@ -159,7 +160,7 @@ with open("image.png", "rb") as f:
159
160
  artifacts["image.png"] = f.read()
160
161
 
161
162
  agent = va.agent.VisionAgent()
162
- response, artifacts = agent.chat_with_code(
163
+ response, artifacts = agent.chat_with_artifacts(
163
164
  [
164
165
  {
165
166
  "role": "user",
@@ -339,11 +340,11 @@ mode by passing in the verbose argument:
339
340
  ```
340
341
 
341
342
  ### Detailed Usage
342
- You can also have it return more information by calling `chat_with_workflow`. The format
343
+ You can also have it return more information by calling `generate_code`. The format
343
344
  of the input is a list of dictionaries with the keys `role`, `content`, and `media`:
344
345
 
345
346
  ```python
346
- >>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
347
+ >>> results = agent.generate_code([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
347
348
  >>> print(results)
348
349
  {
349
350
  "code": "from vision_agent.tools import ..."
@@ -372,7 +373,7 @@ conv = [
372
373
  "media": ["workers.png"],
373
374
  }
374
375
  ]
375
- result = agent.chat_with_workflow(conv)
376
+ result = agent.generate_code(conv)
376
377
  code = result["code"]
377
378
  conv.append({"role": "assistant", "content": code})
378
379
  conv.append(
@@ -381,7 +382,7 @@ conv.append(
381
382
  "content": "Can you also return the number of workers wearing safety gear?",
382
383
  }
383
384
  )
384
- result = agent.chat_with_workflow(conv)
385
+ result = agent.generate_code(conv)
385
386
  ```
386
387
 
387
388
 
@@ -1,10 +1,12 @@
1
1
  vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
- vision_agent/agent/__init__.py,sha256=NF2LABqHixLvbsOIO-fe-VKZ7awvShLtcT0oQT4eWtI,235
2
+ vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xlM,436
3
3
  vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
4
- vision_agent/agent/agent_utils.py,sha256=eIpLz2NunEqEsBBrECJaD34-2uY0bsFNnW-XKfqqohs,2518
5
- vision_agent/agent/vision_agent.py,sha256=x0oSuQk-rcERUUdZp29FB77Ua-eyQD3fLi_UfmsBTV0,20761
6
- vision_agent/agent/vision_agent_coder.py,sha256=2ZoGikn2nakGDfs20XRshZjQUyvbw6l47UhExJAYkqI,38515
7
- vision_agent/agent/vision_agent_coder_prompts.py,sha256=BmbTMhth4v1qLexuoSeyo47QQ0kPQvL1pLbCJHMsWDw,18910
4
+ vision_agent/agent/agent_utils.py,sha256=eSgg8CwWylX_erLTqTg2pVhEEgVkMLRrQfYRyJzI3so,5443
5
+ vision_agent/agent/vision_agent.py,sha256=MUigVufYML2sYn9Hsngswa77XxlZBgCwQyBfK8tlsio,22551
6
+ vision_agent/agent/vision_agent_coder.py,sha256=aVkl0b9LKvy-auuHGYSag-ixYnue0iRQqD1PYLPBR-s,29312
7
+ vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
8
+ vision_agent/agent/vision_agent_planner.py,sha256=mjmnXG9CvYf_ZA7ZJ3ri4H-2U_Km55gF1sZYRSOlxpY,19027
9
+ vision_agent/agent/vision_agent_planner_prompts.py,sha256=JDARUzko2HZdxkBtcy6wuP9DCCmbqhK_gnVgrjr6l1k,6691
8
10
  vision_agent/agent/vision_agent_prompts.py,sha256=LZ9Bnx7ZFkqbNOMqwfdiWZU4niND9Z1ArcFHNSn_jzA,11187
9
11
  vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
12
  vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
@@ -14,11 +16,11 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
14
16
  vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
15
17
  vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
16
18
  vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
17
- vision_agent/tools/__init__.py,sha256=PLVbfTMjKxQlHIRWnq9b785W9a52AXQS_tOa0tkQ0ZY,2420
18
- vision_agent/tools/meta_tools.py,sha256=BF5-fVshLhhpck5lJErKxnfPu9YxudBhR7ar_qA9Mjo,25889
19
+ vision_agent/tools/__init__.py,sha256=50wwisjudmZn7_SEwigTiiDxQ0HXbSIhVI4O8kvE9Es,2365
20
+ vision_agent/tools/meta_tools.py,sha256=MULJrZiTODOAN20TGceLdXcwoSGMNaE7bQbywySITnA,28458
19
21
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
20
22
  vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
21
- vision_agent/tools/tools.py,sha256=vS1yCk3Fza9eYOTHPFwwroo_ULdw2ztMQMb81x1U5f8,78524
23
+ vision_agent/tools/tools.py,sha256=uWyR4pebTezXx9IWCKX4SL5sB9u_7LdRP0-KWU52zsU,78524
22
24
  vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
23
25
  vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
24
26
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -27,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
27
29
  vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
28
30
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
31
  vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
30
- vision_agent-0.2.161.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.161.dist-info/METADATA,sha256=81mQ74IJal478wgLMjkneAVg0kE89VVjX9Wa_hL0lMo,17753
32
- vision_agent-0.2.161.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.161.dist-info/RECORD,,
32
+ vision_agent-0.2.163.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
33
+ vision_agent-0.2.163.dist-info/METADATA,sha256=grneiMhM3Lwzi9ex9JL8A0R5cmpqyOHaaTcLGRfLwWs,17785
34
+ vision_agent-0.2.163.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
35
+ vision_agent-0.2.163.dist-info/RECORD,,