vision-agent 0.0.50__py3-none-any.whl → 0.0.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import tempfile
5
5
  from pathlib import Path
6
6
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
7
7
 
8
+ from PIL import Image
8
9
  from tabulate import tabulate
9
10
 
10
11
  from vision_agent.image_utils import overlay_bboxes, overlay_masks
@@ -288,9 +289,8 @@ def visualize_result(all_tool_results: List[Dict]) -> List[str]:
288
289
  continue
289
290
  parameters = [parameters]
290
291
  elif isinstance(tool_result["parameters"], list):
291
- if (
292
- len(tool_result["parameters"]) < 1
293
- and "image" not in tool_result["parameters"][0]
292
+ if len(tool_result["parameters"]) < 1 or (
293
+ "image" not in tool_result["parameters"][0]
294
294
  ):
295
295
  continue
296
296
 
@@ -304,10 +304,16 @@ def visualize_result(all_tool_results: List[Dict]) -> List[str]:
304
304
  # if the call was successful, then we can add the image data
305
305
  image = param["image"]
306
306
  if image not in image_to_data:
307
- image_to_data[image] = {"bboxes": [], "masks": [], "labels": []}
307
+ image_to_data[image] = {
308
+ "bboxes": [],
309
+ "masks": [],
310
+ "labels": [],
311
+ "scores": [],
312
+ }
308
313
 
309
314
  image_to_data[image]["bboxes"].extend(call_result["bboxes"])
310
315
  image_to_data[image]["labels"].extend(call_result["labels"])
316
+ image_to_data[image]["scores"].extend(call_result["scores"])
311
317
  if "masks" in call_result:
312
318
  image_to_data[image]["masks"].extend(call_result["masks"])
313
319
 
@@ -345,7 +351,7 @@ class VisionAgent(Agent):
345
351
  task_model: Optional[Union[LLM, LMM]] = None,
346
352
  answer_model: Optional[Union[LLM, LMM]] = None,
347
353
  reflect_model: Optional[Union[LLM, LMM]] = None,
348
- max_retries: int = 2,
354
+ max_retries: int = 3,
349
355
  verbose: bool = False,
350
356
  report_progress_callback: Optional[Callable[[str], None]] = None,
351
357
  ):
@@ -380,6 +386,7 @@ class VisionAgent(Agent):
380
386
  self,
381
387
  input: Union[List[Dict[str, str]], str],
382
388
  image: Optional[Union[str, Path]] = None,
389
+ visualize_output: Optional[bool] = False,
383
390
  ) -> str:
384
391
  """Invoke the vision agent.
385
392
 
@@ -393,7 +400,7 @@ class VisionAgent(Agent):
393
400
  """
394
401
  if isinstance(input, str):
395
402
  input = [{"role": "user", "content": input}]
396
- return self.chat(input, image=image)
403
+ return self.chat(input, image=image, visualize_output=visualize_output)
397
404
 
398
405
  def log_progress(self, description: str) -> None:
399
406
  _LOGGER.info(description)
@@ -401,7 +408,10 @@ class VisionAgent(Agent):
401
408
  self.report_progress_callback(description)
402
409
 
403
410
  def chat_with_workflow(
404
- self, chat: List[Dict[str, str]], image: Optional[Union[str, Path]] = None
411
+ self,
412
+ chat: List[Dict[str, str]],
413
+ image: Optional[Union[str, Path]] = None,
414
+ visualize_output: Optional[bool] = False,
405
415
  ) -> Tuple[str, List[Dict]]:
406
416
  question = chat[0]["content"]
407
417
  if image:
@@ -449,31 +459,42 @@ class VisionAgent(Agent):
449
459
  self.answer_model, question, answers, reflections
450
460
  )
451
461
 
452
- visualized_images = visualize_result(all_tool_results)
453
- all_tool_results.append({"visualized_images": visualized_images})
462
+ visualized_output = visualize_result(all_tool_results)
463
+ all_tool_results.append({"visualized_output": visualized_output})
454
464
  reflection = self_reflect(
455
465
  self.reflect_model,
456
466
  question,
457
467
  self.tools,
458
468
  all_tool_results,
459
469
  final_answer,
460
- visualized_images[0] if len(visualized_images) > 0 else image,
470
+ visualized_output[0] if len(visualized_output) > 0 else image,
461
471
  )
462
472
  self.log_progress(f"Reflection: {reflection}")
463
473
  if parse_reflect(reflection):
464
474
  break
465
475
  else:
466
- reflections += reflection
467
- # '<ANSWER>' is a symbol to indicate the end of the chat, which is useful for streaming logs.
476
+ reflections += "\n" + reflection
477
+ # '<END>' is a symbol to indicate the end of the chat, which is useful for streaming logs.
468
478
  self.log_progress(
469
479
  f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</<ANSWER>"
470
480
  )
481
+
482
+ if visualize_output:
483
+ visualized_output = all_tool_results[-1]["visualized_output"]
484
+ for image in visualized_output:
485
+ Image.open(image).show()
486
+
471
487
  return final_answer, all_tool_results
472
488
 
473
489
  def chat(
474
- self, chat: List[Dict[str, str]], image: Optional[Union[str, Path]] = None
490
+ self,
491
+ chat: List[Dict[str, str]],
492
+ image: Optional[Union[str, Path]] = None,
493
+ visualize_output: Optional[bool] = False,
475
494
  ) -> str:
476
- answer, _ = self.chat_with_workflow(chat, image=image)
495
+ answer, _ = self.chat_with_workflow(
496
+ chat, image=image, visualize_output=visualize_output
497
+ )
477
498
  return answer
478
499
 
479
500
  def retrieval(
@@ -1,4 +1,4 @@
1
- VISION_AGENT_REFLECTION = """You are an advanced reasoning agent that can improve based on self-refection. You will be given a previous reasoning trial in which you were given the user's question, the available tools that the agent has, the decomposed tasks and tools that the agent used to answer the question and the final answer the agent provided. You must determine if the agent's answer was correct or incorrect. If the agent's answer was correct, respond with Finish. If the agent's answer was incorrect, you must diagnose a possible reason for failure or phrasing discrepancy and devise a new, concise, high level plan that aims to mitigate the same failure with the tools available. Use complete sentences.
1
+ VISION_AGENT_REFLECTION = """You are an advanced reasoning agent that can improve based on self-refection. You will be given a previous reasoning trial in which you were given the user's question, the available tools that the agent has, the decomposed tasks and tools that the agent used to answer the question and the final answer the agent provided. You may also receive an image with the visualized bounding boxes or masks with their associated labels and scores from the tools used. You must determine if the agent's answer was correct or incorrect. If the agent's answer was correct, respond with Finish. If the agent's answer was incorrect, you must diagnose a possible reason for failure or phrasing discrepancy and devise a new, concise, concrete plan that aims to mitigate the same failure with the tools available. Do not make vague steps like re-evaluate the threshold, instead make concrete steps like use a threshold of 0.5 or whatever threshold you think would fix this issue. If the task cannot be completed with the existing tools, respond with Finish. Use complete sentences.
2
2
 
3
3
  User's question: {question}
4
4
 
@@ -49,7 +49,6 @@ Output: """
49
49
 
50
50
  CHOOSE_TOOL = """This is the user's question: {question}
51
51
  These are the tools you can select to solve the question:
52
-
53
52
  {tools}
54
53
 
55
54
  Please note that:
@@ -63,7 +62,6 @@ Output: """
63
62
 
64
63
  CHOOSE_TOOL_DEPENDS = """This is the user's question: {question}
65
64
  These are the tools you can select to solve the question:
66
-
67
65
  {tools}
68
66
 
69
67
  This is a reflection from a previous failed attempt:
File without changes
@@ -1,6 +1,7 @@
1
1
  """Utility functions for image processing."""
2
2
 
3
3
  import base64
4
+ from importlib import resources
4
5
  from io import BytesIO
5
6
  from pathlib import Path
6
7
  from typing import Dict, Tuple, Union
@@ -104,19 +105,28 @@ def overlay_bboxes(
104
105
 
105
106
  color = {label: COLORS[i % len(COLORS)] for i, label in enumerate(bboxes["labels"])}
106
107
 
107
- draw = ImageDraw.Draw(image)
108
- font = ImageFont.load_default()
109
108
  width, height = image.size
109
+ fontsize = max(12, int(min(width, height) / 40))
110
+ draw = ImageDraw.Draw(image)
111
+ font = ImageFont.truetype(
112
+ str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),
113
+ fontsize,
114
+ )
110
115
  if "bboxes" not in bboxes:
111
116
  return image.convert("RGB")
112
117
 
113
- for label, box in zip(bboxes["labels"], bboxes["bboxes"]):
114
- box = [box[0] * width, box[1] * height, box[2] * width, box[3] * height]
115
- draw.rectangle(box, outline=color[label], width=3)
116
- label = f"{label}"
117
- text_box = draw.textbbox((box[0], box[1]), text=label, font=font)
118
- draw.rectangle(text_box, fill=color[label])
119
- draw.text((text_box[0], text_box[1]), label, fill="black", font=font)
118
+ for label, box, scores in zip(bboxes["labels"], bboxes["bboxes"], bboxes["scores"]):
119
+ box = [
120
+ int(box[0] * width),
121
+ int(box[1] * height),
122
+ int(box[2] * width),
123
+ int(box[3] * height),
124
+ ]
125
+ draw.rectangle(box, outline=color[label], width=4)
126
+ text = f"{label}: {scores:.2f}"
127
+ text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
128
+ draw.rectangle((box[0], box[1], text_box[2], text_box[3]), fill=color[label])
129
+ draw.text((box[0], box[1]), text, fill="black", font=font)
120
130
  return image.convert("RGB")
121
131
 
122
132
 
@@ -138,7 +148,9 @@ def overlay_masks(
138
148
  elif isinstance(image, np.ndarray):
139
149
  image = Image.fromarray(image)
140
150
 
141
- color = {label: COLORS[i % len(COLORS)] for i, label in enumerate(masks["labels"])}
151
+ color = {
152
+ label: COLORS[i % len(COLORS)] for i, label in enumerate(set(masks["labels"]))
153
+ }
142
154
  if "masks" not in masks:
143
155
  return image.convert("RGB")
144
156
 
@@ -53,9 +53,7 @@ class Tool(ABC):
53
53
 
54
54
  class NoOp(Tool):
55
55
  name = "noop_"
56
- description = (
57
- "'noop_' is a no-op tool that does nothing if you do not need to use a tool."
58
- )
56
+ description = "'noop_' is a no-op tool that does nothing if you do not want answer the question directly and not use a tool."
59
57
  usage = {
60
58
  "required_parameters": [],
61
59
  "examples": [
@@ -85,7 +83,7 @@ class CLIP(Tool):
85
83
  _ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
86
84
 
87
85
  name = "clip_"
88
- description = "'clip_' is a tool that can classify or tag any image given a set of input classes or tags."
86
+ description = "'clip_' is a tool that can classify any image given a set of input names or tags. It returns a list of the input names along with their probability scores."
89
87
  usage = {
90
88
  "required_parameters": [
91
89
  {"name": "prompt", "type": "str"},
@@ -163,7 +161,7 @@ class GroundingDINO(Tool):
163
161
  _ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
164
162
 
165
163
  name = "grounding_dino_"
166
- description = "'grounding_dino_' is a tool that can detect arbitrary objects with inputs such as category names or referring expressions."
164
+ description = "'grounding_dino_' is a tool that can detect arbitrary objects with inputs such as category names or referring expressions. It returns a list of bounding boxes, label names and associated probability scores."
167
165
  usage = {
168
166
  "required_parameters": [
169
167
  {"name": "prompt", "type": "str"},
@@ -179,8 +177,11 @@ class GroundingDINO(Tool):
179
177
  "parameters": {"prompt": "car", "image": ""},
180
178
  },
181
179
  {
182
- "scenario": "Can you detect the person on the left? Image name: person.jpg",
183
- "parameters": {"prompt": "person on the left", "image": "person.jpg"},
180
+ "scenario": "Can you detect the person on the left and right? Image name: person.jpg",
181
+ "parameters": {
182
+ "prompt": "left person. right person",
183
+ "image": "person.jpg",
184
+ },
184
185
  },
185
186
  {
186
187
  "scenario": "Detect the red shirts and green shirst. Image name: shirts.jpg",
@@ -269,7 +270,7 @@ class GroundingSAM(Tool):
269
270
  _ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
270
271
 
271
272
  name = "grounding_sam_"
272
- description = "'grounding_sam_' is a tool that can detect and segment arbitrary objects with inputs such as category names or referring expressions."
273
+ description = "'grounding_sam_' is a tool that can detect arbitrary objects with inputs such as category names or referring expressions. It returns a list of bounding boxes, label names and masks file names and associated probability scores."
273
274
  usage = {
274
275
  "required_parameters": [
275
276
  {"name": "prompt", "type": "str"},
@@ -285,8 +286,11 @@ class GroundingSAM(Tool):
285
286
  "parameters": {"prompt": "car", "image": ""},
286
287
  },
287
288
  {
288
- "scenario": "Can you segment the person on the left? Image name: person.jpg",
289
- "parameters": {"prompt": "person on the left", "image": "person.jpg"},
289
+ "scenario": "Can you segment the person on the left and right? Image name: person.jpg",
290
+ "parameters": {
291
+ "prompt": "left person. right person",
292
+ "image": "person.jpg",
293
+ },
290
294
  },
291
295
  {
292
296
  "scenario": "Can you build me a tool that segments red shirts and green shirts? Image name: shirts.jpg",
@@ -370,8 +374,9 @@ class AgentGroundingSAM(GroundingSAM):
370
374
  mask_files = []
371
375
  for mask in rets["masks"]:
372
376
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
373
- Image.fromarray(mask * 255).save(tmp)
374
- mask_files.append(tmp.name)
377
+ file_name = Path(tmp.name).with_suffix(".mask.png")
378
+ Image.fromarray(mask * 255).save(file_name)
379
+ mask_files.append(str(file_name))
375
380
  rets["masks"] = mask_files
376
381
  return rets
377
382
 
@@ -380,7 +385,7 @@ class Counter(Tool):
380
385
  r"""Counter detects and counts the number of objects in an image given an input such as a category name or referring expression."""
381
386
 
382
387
  name = "counter_"
383
- description = "'counter_' detects and counts the number of objects in an image given an input such as a category name or referring expression."
388
+ description = "'counter_' detects and counts the number of objects in an image given an input such as a category name or referring expression. It returns a dictionary containing the labels and their counts."
384
389
  usage = {
385
390
  "required_parameters": [
386
391
  {"name": "prompt", "type": "str"},
@@ -400,14 +405,14 @@ class Counter(Tool):
400
405
 
401
406
  def __call__(self, prompt: str, image: Union[str, ImageType]) -> Dict:
402
407
  resp = GroundingDINO()(prompt, image)
403
- return dict(CounterClass(resp[0]["labels"]))
408
+ return dict(CounterClass(resp["labels"]))
404
409
 
405
410
 
406
411
  class Crop(Tool):
407
412
  r"""Crop crops an image given a bounding box and returns a file name of the cropped image."""
408
413
 
409
414
  name = "crop_"
410
- description = "'crop_' crops an image given a bounding box and returns a file name of the cropped image."
415
+ description = "'crop_' crops an image given a bounding box and returns a file name of the cropped image. It returns a file with the cropped image."
411
416
  usage = {
412
417
  "required_parameters": [
413
418
  {"name": "bbox", "type": "List[float]"},
@@ -495,9 +500,7 @@ class SegArea(Tool):
495
500
 
496
501
  class BboxIoU(Tool):
497
502
  name = "bbox_iou_"
498
- description = (
499
- "'bbox_iou_' returns the intersection over union of two bounding boxes."
500
- )
503
+ description = "'bbox_iou_' returns the intersection over union of two bounding boxes. This is a good tool for determining if two objects are overlapping."
501
504
  usage = {
502
505
  "required_parameters": [
503
506
  {"name": "bbox1", "type": "List[int]"},
@@ -591,85 +594,35 @@ class ExtractFrames(Tool):
591
594
  )
592
595
  for frame, ts in frames:
593
596
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
594
- Image.fromarray(frame).save(tmp)
595
- result.append((tmp.name, ts))
597
+ file_name = Path(tmp.name).with_suffix(".frame.png")
598
+ Image.fromarray(frame).save(file_name)
599
+ result.append((str(file_name), ts))
596
600
  return result
597
601
 
598
602
 
599
- class Add(Tool):
600
- r"""Add returns the sum of all the arguments passed to it, normalized to 2 decimal places."""
601
-
602
- name = "add_"
603
- description = "'add_' returns the sum of all the arguments passed to it, normalized to 2 decimal places."
604
- usage = {
605
- "required_parameters": [{"name": "input", "type": "List[int]"}],
606
- "examples": [
607
- {
608
- "scenario": "If you want to calculate 2 + 4",
609
- "parameters": {"input": [2, 4]},
610
- }
611
- ],
612
- }
613
-
614
- def __call__(self, input: List[int]) -> float:
615
- return round(sum(input), 2)
616
-
617
-
618
- class Subtract(Tool):
619
- r"""Subtract returns the difference of all the arguments passed to it, normalized to 2 decimal places."""
620
-
621
- name = "subtract_"
622
- description = "'subtract_' returns the difference of all the arguments passed to it, normalized to 2 decimal places."
623
- usage = {
624
- "required_parameters": [{"name": "input", "type": "List[int]"}],
625
- "examples": [
626
- {
627
- "scenario": "If you want to calculate 4 - 2",
628
- "parameters": {"input": [4, 2]},
629
- }
630
- ],
631
- }
632
-
633
- def __call__(self, input: List[int]) -> float:
634
- return round(input[0] - input[1], 2)
635
-
603
+ class Calculator(Tool):
604
+ r"""Calculator is a tool that can perform basic arithmetic operations."""
636
605
 
637
- class Multiply(Tool):
638
- r"""Multiply returns the product of all the arguments passed to it, normalized to 2 decimal places."""
639
-
640
- name = "multiply_"
641
- description = "'multiply_' returns the product of all the arguments passed to it, normalized to 2 decimal places."
606
+ name = "calculator_"
607
+ description = (
608
+ "'calculator_' is a tool that can perform basic arithmetic operations."
609
+ )
642
610
  usage = {
643
- "required_parameters": [{"name": "input", "type": "List[int]"}],
611
+ "required_parameters": [{"name": "equation", "type": "str"}],
644
612
  "examples": [
645
613
  {
646
- "scenario": "If you want to calculate 2 * 4",
647
- "parameters": {"input": [2, 4]},
648
- }
649
- ],
650
- }
651
-
652
- def __call__(self, input: List[int]) -> float:
653
- return round(input[0] * input[1], 2)
654
-
655
-
656
- class Divide(Tool):
657
- r"""Divide returns the division of all the arguments passed to it, normalized to 2 decimal places."""
658
-
659
- name = "divide_"
660
- description = "'divide_' returns the division of all the arguments passed to it, normalized to 2 decimal places."
661
- usage = {
662
- "required_parameters": [{"name": "input", "type": "List[int]"}],
663
- "examples": [
614
+ "scenario": "If you want to calculate (2 * 3) + 4",
615
+ "parameters": {"equation": "2 + 4"},
616
+ },
664
617
  {
665
- "scenario": "If you want to calculate 4 / 2",
666
- "parameters": {"input": [4, 2]},
667
- }
618
+ "scenario": "If you want to calculate (4 + 2.5) / 2.1",
619
+ "parameters": {"equation": "(4 + 2.5) / 2.1"},
620
+ },
668
621
  ],
669
622
  }
670
623
 
671
- def __call__(self, input: List[int]) -> float:
672
- return round(input[0] / input[1], 2)
624
+ def __call__(self, equation: str) -> float:
625
+ return cast(float, round(eval(equation), 2))
673
626
 
674
627
 
675
628
  TOOLS = {
@@ -687,10 +640,7 @@ TOOLS = {
687
640
  SegArea,
688
641
  BboxIoU,
689
642
  SegIoU,
690
- Add,
691
- Subtract,
692
- Multiply,
693
- Divide,
643
+ Calculator,
694
644
  ]
695
645
  )
696
646
  if (hasattr(c, "name") and hasattr(c, "description") and hasattr(c, "usage"))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.0.50
3
+ Version: 0.0.52
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -103,7 +103,8 @@ the individual steps and tools to get the answer:
103
103
  }
104
104
  ]],
105
105
  "answer": "The jar is located at [0.58, 0.2, 0.72, 0.45].",
106
- }]
106
+ },
107
+ {"visualize_output": "final_output.png"}]
107
108
  ```
108
109
 
109
110
  ### Tools
@@ -5,22 +5,24 @@ vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMV
5
5
  vision_agent/agent/easytool_prompts.py,sha256=dYzWa_RaiaFSQ-CowoQOcFmjZtBTTljRyA809bLgrvU,4519
6
6
  vision_agent/agent/reflexion.py,sha256=wzpptfALNZIh9Q5jgkK3imGL5LWjTW_n_Ypsvxdh07Q,10101
7
7
  vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
8
- vision_agent/agent/vision_agent.py,sha256=uHhcf7srgi5-1sf65PmGVA0aFijit2noEW9nBVqJU2o,19104
9
- vision_agent/agent/vision_agent_prompts.py,sha256=fYnOT6z7DmuVTfUknUuc6b_vPmO0vgCyVJRQSR5M-G8,6192
8
+ vision_agent/agent/vision_agent.py,sha256=TKseWK3C7kr9GmjQmYgNSBZJHPqd7wTP6BSkwYqJkdY,19765
9
+ vision_agent/agent/vision_agent_prompts.py,sha256=dPg0mLVK_fGJpYK2xXGhm-zuXX1KVZW_zFXyYsspUz8,6567
10
10
  vision_agent/data/__init__.py,sha256=YU-5g3LbEQ6a4drz0RLGTagXMVU2Z4Xr3RlfWE-R0jU,46
11
11
  vision_agent/data/data.py,sha256=pgtSGZdAnbQ8oGsuapLtFTMPajnCGDGekEXTnFuBwsY,5122
12
12
  vision_agent/emb/__init__.py,sha256=YmCkGrJBtXb6X6Z3lnKiFoQYKXMgHMJp8JJyMLVvqcI,75
13
13
  vision_agent/emb/emb.py,sha256=la9lhEzk7jqUCjYYQ5oRgVNSnC9_EJBJIpE_B9c6PJo,1375
14
- vision_agent/image_utils.py,sha256=_hDikKa40U-2nQufKMRDgU9t-OmwCK9Rb_6O3v1U3nE,4436
14
+ vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
16
+ vision_agent/image_utils.py,sha256=hFdPoRmeVU5jErFr5xaagMQ6Wy7Xbw8H8HXuLGdJIAM,4786
15
17
  vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
16
18
  vision_agent/llm/llm.py,sha256=tgL6ZtuwZKuxSNiCxJCuP2ETjNMrosdgxXkZJb0_00E,5024
17
19
  vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
18
20
  vision_agent/lmm/lmm.py,sha256=LxwxCArp7DfnPbjf_Gl55xBxPwo2Qx8eDp1gCnGYSO0,9535
19
21
  vision_agent/tools/__init__.py,sha256=AKN-T659HpwVearRnkCd6wWNoJ6K5kW9gAZwb8IQSLE,235
20
22
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
21
- vision_agent/tools/tools.py,sha256=bYc3Xeg0wDjpfd8WGxRPCSaGQxUHRLI2PJk-SThqjHY,25644
23
+ vision_agent/tools/tools.py,sha256=h11niI1JiOCOaOFkdHee_AnXegaIK5Al8QMoFdZaJuo,24583
22
24
  vision_agent/tools/video.py,sha256=40rscP8YvKN3lhZ4PDcOK4XbdFX2duCRpHY_krmBYKU,7476
23
- vision_agent-0.0.50.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
24
- vision_agent-0.0.50.dist-info/METADATA,sha256=AtP5GqtNH0ucnebFbM0ldol0S4qbiIg1rwAvWq2FeCk,6142
25
- vision_agent-0.0.50.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
26
- vision_agent-0.0.50.dist-info/RECORD,,
25
+ vision_agent-0.0.52.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
26
+ vision_agent-0.0.52.dist-info/METADATA,sha256=5OBmHCpSDZbvGb_pNU_cOKWI9AdUOhEufDHigk_cm3c,6184
27
+ vision_agent-0.0.52.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
28
+ vision_agent-0.0.52.dist-info/RECORD,,