vision-agent 0.2.161__py3-none-any.whl → 0.2.163__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,199 @@
1
+ USER_REQ = """
2
+ ## User Request
3
+ {user_request}
4
+ """
5
+
6
+ PLAN = """
7
+ **Context**:
8
+ {context}
9
+
10
+ **Tools Available**:
11
+ {tool_desc}
12
+
13
+ **Previous Feedback**:
14
+ {feedback}
15
+
16
+ **Instructions**:
17
+ 1. Based on the context and tools you have available, create a plan of subtasks to achieve the user request.
18
+ 2. For each subtask, be sure to include the tool(s) you want to use to accomplish that subtask.
19
+ 3. Output three different plans each utilize a different strategy or set of tools ordering them from most likely to least likely to succeed.
20
+
21
+ Output a list of jsons in the following format:
22
+
23
+ ```json
24
+ {{
25
+ "plan1":
26
+ {{
27
+ "thoughts": str # your thought process for choosing this plan
28
+ "instructions": [
29
+ str # what you should do in this task associated with a tool
30
+ ]
31
+ }},
32
+ "plan2": ...,
33
+ "plan3": ...
34
+ }}
35
+ ```
36
+ """
37
+
38
+ TEST_PLANS = """
39
+ **Role**: You are a software programmer responsible for testing different tools.
40
+
41
+ **Task**: Your responsibility is to take a set of several plans and test the different tools for each plan.
42
+
43
+ **Documentation**:
44
+ This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools import *`.
45
+
46
+ {docstring}
47
+
48
+ **Plans**:
49
+ {plans}
50
+
51
+ **Previous Attempts**:
52
+ {previous_attempts}
53
+
54
+ **Examples**:
55
+ --- EXAMPLE1 ---
56
+ plan1:
57
+ - Load the image from the provided file path 'image.jpg'.
58
+ - Use the 'owl_v2_image' tool with the prompt 'person' to detect and count the number of people in the image.
59
+ plan2:
60
+ - Load the image from the provided file path 'image.jpg'.
61
+ - Use the 'florence2_sam2_image' tool with the prompt 'person' to detect and count the number of people in the image.
62
+ - Count the number of detected objects labeled as 'person'.
63
+ plan3:
64
+ - Load the image from the provided file path 'image.jpg'.
65
+ - Use the 'countgd_counting' tool to count the dominant foreground object, which in this case is people.
66
+
67
+ ```python
68
+ from vision_agent.tools import load_image, owl_v2_image, florence2_sam2_image, countgd_counting
69
+ image = load_image("image.jpg")
70
+ owl_v2_out = owl_v2_image("person", image)
71
+
72
+ f2s2_out = florence2_sam2_image("person", image)
73
+ # strip out the masks from the output becuase they don't provide useful information when printed
74
+ f2s2_out = [{{k: v for k, v in o.items() if k != "mask"}} for o in f2s2_out]
75
+
76
+ cgd_out = countgd_counting(image)
77
+
78
+ final_out = {{"owl_v2_image": owl_v2_out, "florence2_sam2_image": f2s2, "countgd_counting": cgd_out}}
79
+ print(final_out)
80
+ --- END EXAMPLE1 ---
81
+
82
+ --- EXAMPLE2 ---
83
+ plan1:
84
+ - Extract frames from 'video.mp4' at 10 FPS using the 'extract_frames_and_timestamps' tool.
85
+ - Use the 'owl_v2_video' tool with the prompt 'person' to detect where the people are in the video.
86
+ plan2:
87
+ - Extract frames from 'video.mp4' at 10 FPS using the 'extract_frames_and_timestamps' tool.
88
+ - Use the 'florence2_phrase_grounding' tool with the prompt 'person' to detect where the people are in the video.
89
+ plan3:
90
+ - Extract frames from 'video.mp4' at 10 FPS using the 'extract_frames_and_timestamps' tool.
91
+ - Use the 'florence2_sam2_video_tracking' tool with the prompt 'person' to detect where the people are in the video.
92
+
93
+
94
+ ```python
95
+ import numpy as np
96
+ from vision_agent.tools import extract_frames_and_timestamps, owl_v2_video, florence2_phrase_grounding, florence2_sam2_video_tracking
97
+
98
+ # sample at 1 FPS and use the first 10 frames to reduce processing time
99
+ frames = extract_frames_and_timestamps("video.mp4", 1)
100
+ frames = [f["frame"] for f in frames][:10]
101
+
102
+ # strip arrays from the output to make it easier to read
103
+ def remove_arrays(o):
104
+ if isinstance(o, list):
105
+ return [remove_arrays(e) for e in o]
106
+ elif isinstance(o, dict):
107
+ return {{k: remove_arrays(v) for k, v in o.items()}}
108
+ elif isinstance(o, np.ndarray):
109
+ return "array: " + str(o.shape)
110
+ else:
111
+ return o
112
+
113
+ # return the counts of each label per frame to help determine the stability of the model results
114
+ def get_counts(preds):
115
+ counts = {{}}
116
+ for i, pred_frame in enumerate(preds):
117
+ counts_i = {{}}
118
+ for pred in pred_frame:
119
+ label = pred["label"].split(":")[1] if ":" in pred["label"] else pred["label"]
120
+ counts_i[label] = counts_i.get(label, 0) + 1
121
+ counts[f"frame_{{i}}"] = counts_i
122
+ return counts
123
+
124
+
125
+ # plan1
126
+ owl_v2_out = owl_v2_video("person", frames)
127
+ owl_v2_counts = get_counts(owl_v2_out)
128
+
129
+ # plan2
130
+ florence2_out = [florence2_phrase_grounding("person", f) for f in frames]
131
+ florence2_counts = get_counts(florence2_out)
132
+
133
+ # plan3
134
+ f2s2_tracking_out = florence2_sam2_video_tracking("person", frames)
135
+ remove_arrays(f2s2_tracking_out)
136
+ f2s2_counts = get_counts(f2s2_tracking_out)
137
+
138
+ final_out = {{
139
+ "owl_v2_video": owl_v2_out,
140
+ "florence2_phrase_grounding": florence2_out,
141
+ "florence2_sam2_video_tracking": f2s2_out,
142
+ }}
143
+
144
+ counts = {{
145
+ "owl_v2_video": owl_v2_counts,
146
+ "florence2_phrase_grounding": florence2_counts,
147
+ "florence2_sam2_video_tracking": f2s2_counts,
148
+ }}
149
+
150
+ print(final_out)
151
+ print(labels_and_scores)
152
+ print(counts)
153
+ ```
154
+ --- END EXAMPLE2 ---
155
+
156
+ **Instructions**:
157
+ 1. Write a program to load the media and call each tool and print it's output along with other relevant information.
158
+ 2. Create a dictionary where the keys are the tool name and the values are the tool outputs. Remove numpy arrays from the printed dictionary.
159
+ 3. Your test case MUST run only on the given images which are {media}
160
+ 4. Print this final dictionary.
161
+ 5. For video input, sample at 1 FPS and use the first 10 frames only to reduce processing time.
162
+ """
163
+
164
+ PREVIOUS_FAILED = """
165
+ **Previous Failed Attempts**:
166
+ You previously ran this code:
167
+ ```python
168
+ {code}
169
+ ```
170
+
171
+ But got the following error or no stdout:
172
+ {error}
173
+ """
174
+
175
+ PICK_PLAN = """
176
+ **Role**: You are an advanced AI model that can understand the user request and construct plans to accomplish it.
177
+
178
+ **Task**: Your responsibility is to pick the best plan from the three plans provided.
179
+
180
+ **Context**:
181
+ {context}
182
+
183
+ **Plans**:
184
+ {plans}
185
+
186
+ **Tool Output**:
187
+ {tool_output}
188
+
189
+ **Instructions**:
190
+ 1. Re-read the user request, plans, tool outputs and examine the image.
191
+ 2. Solve the problem yourself given the image and pick the most accurate plan that matches your solution the best.
192
+ 3. Add modifications to improve the plan including: changing a tool, adding thresholds, string matching.
193
+ 3. Output a JSON object with the following format:
194
+ {{
195
+ "predicted_answer": str # the answer you would expect from the best plan
196
+ "thoughts": str # your thought process for choosing the best plan over other plans and any modifications you made
197
+ "best_plan": str # the best plan you have chosen, must be `plan1`, `plan2`, or `plan3`
198
+ }}
199
+ """
@@ -1,6 +1,5 @@
1
1
  from typing import Callable, List, Optional
2
2
 
3
- from .meta_tools import META_TOOL_DOCSTRING, Artifacts
4
3
  from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
5
4
  from .tool_utils import get_tool_descriptions_by_names
6
5
  from .tools import (
@@ -11,9 +11,12 @@ from typing import Any, Dict, List, Optional, Union
11
11
 
12
12
  import numpy as np
13
13
  from IPython.display import display
14
+ from redbaron import RedBaron # type: ignore
14
15
 
15
16
  import vision_agent as va
17
+ from vision_agent.agent.agent_utils import extract_json
16
18
  from vision_agent.clients.landing_public_api import LandingPublicAPI
19
+ from vision_agent.lmm import AnthropicLMM
17
20
  from vision_agent.lmm.types import Message
18
21
  from vision_agent.tools.tool_utils import get_tool_documentation
19
22
  from vision_agent.tools.tools import TOOL_DESCRIPTIONS
@@ -22,8 +25,6 @@ from vision_agent.utils.execute import Execution, MimeType
22
25
  from vision_agent.utils.image_utils import convert_to_b64, numpy_to_bytes
23
26
  from vision_agent.utils.video import frames_to_bytes
24
27
 
25
- # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
26
-
27
28
  CURRENT_FILE = None
28
29
  CURRENT_LINE = 0
29
30
  DEFAULT_WINDOW_SIZE = 100
@@ -152,6 +153,9 @@ class Artifacts:
152
153
  return name in self.artifacts
153
154
 
154
155
 
156
+ # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
157
+
158
+
155
159
  def format_lines(lines: List[str], start_idx: int) -> str:
156
160
  output = ""
157
161
  for i, line in enumerate(lines):
@@ -338,6 +342,85 @@ def edit_code_artifact(
338
342
  return open_code_artifact(artifacts, name, cur_line)
339
343
 
340
344
 
345
+ def generate_vision_plan(
346
+ artifacts: Artifacts,
347
+ name: str,
348
+ chat: str,
349
+ media: List[str],
350
+ test_multi_plan: bool = True,
351
+ custom_tool_names: Optional[List[str]] = None,
352
+ ) -> str:
353
+ """Generates a plan to solve vision based tasks.
354
+
355
+ Parameters:
356
+ artifacts (Artifacts): The artifacts object to save the plan to.
357
+ name (str): The name of the artifact to save the plan context to.
358
+ chat (str): The chat message from the user.
359
+ media (List[str]): The media files to use.
360
+ test_multi_plan (bool): Do not change this parameter.
361
+ custom_tool_names (Optional[List[str]]): Do not change this parameter.
362
+
363
+ Returns:
364
+ str: The generated plan.
365
+
366
+ Examples
367
+ --------
368
+ >>> generate_vision_plan(artifacts, "plan.json", "Can you detect the dogs in this image?", ["image.jpg"])
369
+ [Start Plan Context]
370
+ plan1: This is a plan to detect dogs in an image
371
+ -load image
372
+ -detect dogs
373
+ -return detections
374
+ [End Plan Context]
375
+ """
376
+
377
+ if ZMQ_PORT is not None:
378
+ agent = va.agent.VisionAgentPlanner(
379
+ report_progress_callback=lambda inp: report_progress_callback(
380
+ int(ZMQ_PORT), inp
381
+ )
382
+ )
383
+ else:
384
+ agent = va.agent.VisionAgentPlanner()
385
+
386
+ fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
387
+ response = agent.generate_plan(
388
+ fixed_chat,
389
+ test_multi_plan=test_multi_plan,
390
+ custom_tool_names=custom_tool_names,
391
+ )
392
+ if response.test_results is not None:
393
+ redisplay_results(response.test_results)
394
+ response.test_results = None
395
+ artifacts[name] = response.model_dump_json()
396
+ media_names = extract_json(
397
+ AnthropicLMM()( # type: ignore
398
+ f"""Extract any media file names from this output in the following JSON format:
399
+ {{"media": ["image1.jpg", "image2.jpg"]}}
400
+
401
+ {artifacts[name]}"""
402
+ )
403
+ )
404
+ if "media" in media_names and isinstance(media_names, dict):
405
+ for media in media_names["media"]:
406
+ if isinstance(media, str):
407
+ with open(media, "rb") as f:
408
+ artifacts[media] = f.read()
409
+
410
+ output_str = f"[Start Plan Context, saved at {name}]"
411
+ for plan in response.plans.keys():
412
+ output_str += f"\n{plan}: {response.plans[plan]['thoughts'].strip()}\n" # type: ignore
413
+ output_str += " -" + "\n -".join(
414
+ e.strip() for e in response.plans[plan]["instructions"]
415
+ )
416
+
417
+ output_str += f"\nbest plan: {response.best_plan}\n"
418
+ output_str += "thoughts: " + response.plan_thoughts.strip() + "\n"
419
+ output_str += "[End Plan Context]"
420
+ print(output_str)
421
+ return output_str
422
+
423
+
341
424
  def generate_vision_code(
342
425
  artifacts: Artifacts,
343
426
  name: str,
@@ -368,7 +451,6 @@ def generate_vision_code(
368
451
  dogs = owl_v2("dog", image)
369
452
  return dogs
370
453
  """
371
-
372
454
  if ZMQ_PORT is not None:
373
455
  agent = va.agent.VisionAgentCoder(
374
456
  report_progress_callback=lambda inp: report_progress_callback(
@@ -379,7 +461,7 @@ def generate_vision_code(
379
461
  agent = va.agent.VisionAgentCoder(verbosity=int(VERBOSITY))
380
462
 
381
463
  fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
382
- response = agent.chat_with_workflow(
464
+ response = agent.generate_code(
383
465
  fixed_chat,
384
466
  test_multi_plan=test_multi_plan,
385
467
  custom_tool_names=custom_tool_names,
@@ -411,7 +493,7 @@ def edit_vision_code(
411
493
  name: str,
412
494
  chat_history: List[str],
413
495
  media: List[str],
414
- customized_tool_names: Optional[List[str]] = None,
496
+ custom_tool_names: Optional[List[str]] = None,
415
497
  ) -> str:
416
498
  """Edits python code to solve a vision based task.
417
499
 
@@ -419,7 +501,7 @@ def edit_vision_code(
419
501
  artifacts (Artifacts): The artifacts object to save the code to.
420
502
  name (str): The file path to the code.
421
503
  chat_history (List[str]): The chat history to used to generate the code.
422
- customized_tool_names (Optional[List[str]]): Do not change this parameter.
504
+ custom_tool_names (Optional[List[str]]): Do not change this parameter.
423
505
 
424
506
  Returns:
425
507
  str: The edited code.
@@ -459,10 +541,10 @@ def edit_vision_code(
459
541
  fixed_chat_history.append({"role": "assistant", "content": code})
460
542
  fixed_chat_history.append({"role": "user", "content": chat})
461
543
 
462
- response = agent.chat_with_workflow(
544
+ response = agent.generate_code(
463
545
  fixed_chat_history,
464
546
  test_multi_plan=False,
465
- custom_tool_names=customized_tool_names,
547
+ custom_tool_names=custom_tool_names,
466
548
  )
467
549
  redisplay_results(response["test_result"])
468
550
  code = response["code"]
@@ -625,7 +707,7 @@ def get_diff_with_prompts(name: str, before: str, after: str) -> str:
625
707
  def use_extra_vision_agent_args(
626
708
  code: str,
627
709
  test_multi_plan: bool = True,
628
- customized_tool_names: Optional[List[str]] = None,
710
+ custom_tool_names: Optional[List[str]] = None,
629
711
  ) -> str:
630
712
  """This is for forcing arguments passed by the user to VisionAgent into the
631
713
  VisionAgentCoder call.
@@ -633,36 +715,25 @@ def use_extra_vision_agent_args(
633
715
  Parameters:
634
716
  code (str): The code to edit.
635
717
  test_multi_plan (bool): Do not change this parameter.
636
- customized_tool_names (Optional[List[str]]): Do not change this parameter.
718
+ custom_tool_names (Optional[List[str]]): Do not change this parameter.
637
719
 
638
720
  Returns:
639
721
  str: The edited code.
640
722
  """
641
- generate_pattern = r"generate_vision_code\(\s*([^\)]+)\s*\)"
642
-
643
- def generate_replacer(match: re.Match) -> str:
644
- arg = match.group(1)
645
- out_str = f"generate_vision_code({arg}, test_multi_plan={test_multi_plan}"
646
- if customized_tool_names is not None:
647
- out_str += f", custom_tool_names={customized_tool_names})"
648
- else:
649
- out_str += ")"
650
- return out_str
651
-
652
- edit_pattern = r"edit_vision_code\(\s*([^\)]+)\s*\)"
653
-
654
- def edit_replacer(match: re.Match) -> str:
655
- arg = match.group(1)
656
- out_str = f"edit_vision_code({arg}"
657
- if customized_tool_names is not None:
658
- out_str += f", custom_tool_names={customized_tool_names})"
659
- else:
660
- out_str += ")"
661
- return out_str
662
-
663
- new_code = re.sub(generate_pattern, generate_replacer, code)
664
- new_code = re.sub(edit_pattern, edit_replacer, new_code)
665
- return new_code
723
+ red = RedBaron(code)
724
+ for node in red:
725
+ # seems to always be atomtrailers not call type
726
+ if node.type == "atomtrailers":
727
+ if (
728
+ node.name.value == "generate_vision_code"
729
+ or node.name.value == "edit_vision_code"
730
+ ):
731
+ node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
732
+
733
+ if custom_tool_names is not None:
734
+ node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
735
+ cleaned_code = red.dumps().strip()
736
+ return cleaned_code if isinstance(cleaned_code, str) else code
666
737
 
667
738
 
668
739
  def use_object_detection_fine_tuning(
@@ -748,6 +819,7 @@ META_TOOL_DOCSTRING = get_tool_documentation(
748
819
  open_code_artifact,
749
820
  create_code_artifact,
750
821
  edit_code_artifact,
822
+ generate_vision_plan,
751
823
  generate_vision_code,
752
824
  edit_vision_code,
753
825
  write_media_artifact,
@@ -1923,7 +1923,7 @@ def overlay_bounding_boxes(
1923
1923
  bboxes = bbox_int[i]
1924
1924
  bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
1925
1925
 
1926
- if len(bboxes) > 20:
1926
+ if len(bboxes) > 40:
1927
1927
  pil_image = _plot_counting(pil_image, bboxes, color)
1928
1928
  else:
1929
1929
  width, height = pil_image.size
@@ -2117,7 +2117,7 @@ def _plot_counting(
2117
2117
  colors: Dict[str, Tuple[int, int, int]],
2118
2118
  ) -> Image.Image:
2119
2119
  width, height = image.size
2120
- fontsize = max(10, int(min(width, height) / 80))
2120
+ fontsize = max(12, int(min(width, height) / 40))
2121
2121
  draw = ImageDraw.Draw(image)
2122
2122
  font = ImageFont.truetype(
2123
2123
  str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.161
3
+ Version: 0.2.163
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -27,6 +27,7 @@ Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
27
27
  Requires-Dist: pydantic (==2.7.4)
28
28
  Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
29
29
  Requires-Dist: pytube (==15.0.0)
30
+ Requires-Dist: redbaron (>=0.9.2,<0.10.0)
30
31
  Requires-Dist: requests (>=2.0.0,<3.0.0)
31
32
  Requires-Dist: rich (>=13.7.1,<14.0.0)
32
33
  Requires-Dist: scipy (>=1.13.0,<1.14.0)
@@ -142,7 +143,7 @@ continuing, for example it may want to execute code and look at the output befor
142
143
  letting the user respond.
143
144
 
144
145
  ### Chatting and Artifacts
145
- If you run `chat_with_code` you will also notice an `Artifact` object. `Artifact`'s
146
+ If you run `chat_with_artifacts` you will also notice an `Artifact` object. `Artifact`'s
146
147
  are a way to sync files between local and remote environments. The agent will read and
147
148
  write to the artifact object, which is just a pickle object, when it wants to save or
148
149
  load files.
@@ -159,7 +160,7 @@ with open("image.png", "rb") as f:
159
160
  artifacts["image.png"] = f.read()
160
161
 
161
162
  agent = va.agent.VisionAgent()
162
- response, artifacts = agent.chat_with_code(
163
+ response, artifacts = agent.chat_with_artifacts(
163
164
  [
164
165
  {
165
166
  "role": "user",
@@ -339,11 +340,11 @@ mode by passing in the verbose argument:
339
340
  ```
340
341
 
341
342
  ### Detailed Usage
342
- You can also have it return more information by calling `chat_with_workflow`. The format
343
+ You can also have it return more information by calling `generate_code`. The format
343
344
  of the input is a list of dictionaries with the keys `role`, `content`, and `media`:
344
345
 
345
346
  ```python
346
- >>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
347
+ >>> results = agent.generate_code([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
347
348
  >>> print(results)
348
349
  {
349
350
  "code": "from vision_agent.tools import ..."
@@ -372,7 +373,7 @@ conv = [
372
373
  "media": ["workers.png"],
373
374
  }
374
375
  ]
375
- result = agent.chat_with_workflow(conv)
376
+ result = agent.generate_code(conv)
376
377
  code = result["code"]
377
378
  conv.append({"role": "assistant", "content": code})
378
379
  conv.append(
@@ -381,7 +382,7 @@ conv.append(
381
382
  "content": "Can you also return the number of workers wearing safety gear?",
382
383
  }
383
384
  )
384
- result = agent.chat_with_workflow(conv)
385
+ result = agent.generate_code(conv)
385
386
  ```
386
387
 
387
388
 
@@ -1,10 +1,12 @@
1
1
  vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
- vision_agent/agent/__init__.py,sha256=NF2LABqHixLvbsOIO-fe-VKZ7awvShLtcT0oQT4eWtI,235
2
+ vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xlM,436
3
3
  vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
4
- vision_agent/agent/agent_utils.py,sha256=eIpLz2NunEqEsBBrECJaD34-2uY0bsFNnW-XKfqqohs,2518
5
- vision_agent/agent/vision_agent.py,sha256=x0oSuQk-rcERUUdZp29FB77Ua-eyQD3fLi_UfmsBTV0,20761
6
- vision_agent/agent/vision_agent_coder.py,sha256=2ZoGikn2nakGDfs20XRshZjQUyvbw6l47UhExJAYkqI,38515
7
- vision_agent/agent/vision_agent_coder_prompts.py,sha256=BmbTMhth4v1qLexuoSeyo47QQ0kPQvL1pLbCJHMsWDw,18910
4
+ vision_agent/agent/agent_utils.py,sha256=eSgg8CwWylX_erLTqTg2pVhEEgVkMLRrQfYRyJzI3so,5443
5
+ vision_agent/agent/vision_agent.py,sha256=MUigVufYML2sYn9Hsngswa77XxlZBgCwQyBfK8tlsio,22551
6
+ vision_agent/agent/vision_agent_coder.py,sha256=aVkl0b9LKvy-auuHGYSag-ixYnue0iRQqD1PYLPBR-s,29312
7
+ vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
8
+ vision_agent/agent/vision_agent_planner.py,sha256=mjmnXG9CvYf_ZA7ZJ3ri4H-2U_Km55gF1sZYRSOlxpY,19027
9
+ vision_agent/agent/vision_agent_planner_prompts.py,sha256=JDARUzko2HZdxkBtcy6wuP9DCCmbqhK_gnVgrjr6l1k,6691
8
10
  vision_agent/agent/vision_agent_prompts.py,sha256=LZ9Bnx7ZFkqbNOMqwfdiWZU4niND9Z1ArcFHNSn_jzA,11187
9
11
  vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
12
  vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
@@ -14,11 +16,11 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
14
16
  vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
15
17
  vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
16
18
  vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
17
- vision_agent/tools/__init__.py,sha256=PLVbfTMjKxQlHIRWnq9b785W9a52AXQS_tOa0tkQ0ZY,2420
18
- vision_agent/tools/meta_tools.py,sha256=BF5-fVshLhhpck5lJErKxnfPu9YxudBhR7ar_qA9Mjo,25889
19
+ vision_agent/tools/__init__.py,sha256=50wwisjudmZn7_SEwigTiiDxQ0HXbSIhVI4O8kvE9Es,2365
20
+ vision_agent/tools/meta_tools.py,sha256=MULJrZiTODOAN20TGceLdXcwoSGMNaE7bQbywySITnA,28458
19
21
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
20
22
  vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
21
- vision_agent/tools/tools.py,sha256=vS1yCk3Fza9eYOTHPFwwroo_ULdw2ztMQMb81x1U5f8,78524
23
+ vision_agent/tools/tools.py,sha256=uWyR4pebTezXx9IWCKX4SL5sB9u_7LdRP0-KWU52zsU,78524
22
24
  vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
23
25
  vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
24
26
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -27,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
27
29
  vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
28
30
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
31
  vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
30
- vision_agent-0.2.161.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.161.dist-info/METADATA,sha256=81mQ74IJal478wgLMjkneAVg0kE89VVjX9Wa_hL0lMo,17753
32
- vision_agent-0.2.161.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.161.dist-info/RECORD,,
32
+ vision_agent-0.2.163.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
33
+ vision_agent-0.2.163.dist-info/METADATA,sha256=grneiMhM3Lwzi9ex9JL8A0R5cmpqyOHaaTcLGRfLwWs,17785
34
+ vision_agent-0.2.163.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
35
+ vision_agent-0.2.163.dist-info/RECORD,,