vision-agent 0.2.27__py3-none-any.whl → 0.2.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from pathlib import Path
3
- from typing import Dict, List, Optional, Union
3
+ from typing import Dict, List, Optional, Union, Any
4
4
 
5
5
 
6
6
  class Agent(ABC):
@@ -13,7 +13,7 @@ class Agent(ABC):
13
13
  pass
14
14
 
15
15
  @abstractmethod
16
- def log_progress(self, description: str) -> None:
16
+ def log_progress(self, data: Dict[str, Any]) -> None:
17
17
  """Log the progress of the agent.
18
18
  This is a hook that is intended for reporting the progress of the agent.
19
19
  """
@@ -3,7 +3,7 @@ import logging
3
3
  import os
4
4
  import sys
5
5
  from pathlib import Path
6
- from typing import Dict, List, Optional, Union
6
+ from typing import Dict, List, Optional, Union, Any
7
7
 
8
8
  from rich.console import Console
9
9
  from rich.syntax import Syntax
@@ -206,5 +206,5 @@ class AgentCoder(Agent):
206
206
 
207
207
  return f"{IMPORT_HELPER}\n{code}"
208
208
 
209
- def log_progress(self, description: str) -> None:
210
- _LOGGER.info(description)
209
+ def log_progress(self, data: Dict[str, Any]) -> None:
210
+ _LOGGER.info(data)
@@ -451,7 +451,7 @@ class VisionAgent(Agent):
451
451
  reflect_model: Optional[Union[LLM, LMM]] = None,
452
452
  max_retries: int = 2,
453
453
  verbose: bool = False,
454
- report_progress_callback: Optional[Callable[[str], None]] = None,
454
+ report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
455
455
  ):
456
456
  """VisionAgent constructor.
457
457
 
@@ -518,23 +518,23 @@ class VisionAgent(Agent):
518
518
  self_reflection=self_reflection,
519
519
  )
520
520
 
521
- def log_progress(self, description: str) -> None:
522
- _LOGGER.info(description)
521
+ def log_progress(self, data: Dict[str, Any]) -> None:
522
+ _LOGGER.info(data)
523
523
  if self.report_progress_callback:
524
- self.report_progress_callback(description)
524
+ self.report_progress_callback(data)
525
525
 
526
526
  def _report_visualization_via_callback(
527
527
  self, images: Sequence[Union[str, Path]]
528
528
  ) -> None:
529
529
  """This is intended for streaming the visualization images via the callback to the client side."""
530
530
  if self.report_progress_callback:
531
- self.report_progress_callback("<VIZ>")
531
+ self.report_progress_callback({"log": "<VIZ>"})
532
532
  if images:
533
533
  for img in images:
534
534
  self.report_progress_callback(
535
- f"<IMG>base:64{convert_to_b64(img)}</IMG>"
535
+ {"log": f"<IMG>base:64{convert_to_b64(img)}</IMG>"}
536
536
  )
537
- self.report_progress_callback("</VIZ>")
537
+ self.report_progress_callback({"log": "</VIZ>"})
538
538
 
539
539
  def chat_with_workflow(
540
540
  self,
@@ -618,8 +618,8 @@ class VisionAgent(Agent):
618
618
  tool_results["answer"] = answer
619
619
  all_tool_results.append(tool_results)
620
620
 
621
- self.log_progress(f"\tCall Result: {call_results}")
622
- self.log_progress(f"\tAnswer: {answer}")
621
+ self.log_progress({"log": f"\tCall Result: {call_results}"})
622
+ self.log_progress({"log": f"\tAnswer: {answer}"})
623
623
  answers.append({"task": task_str, "answer": answer})
624
624
  task_depend[task["id"]]["answer"] = answer # type: ignore
625
625
  task_depend[task["id"]]["call_result"] = call_results # type: ignore
@@ -644,18 +644,22 @@ class VisionAgent(Agent):
644
644
  final_answer,
645
645
  reflection_images,
646
646
  )
647
- self.log_progress(f"Reflection: {reflection}")
647
+ self.log_progress({"log": f"Reflection: {reflection}"})
648
648
  parsed_reflection = parse_reflect(reflection)
649
649
  if parsed_reflection["Finish"]:
650
650
  break
651
651
  else:
652
652
  reflections += "\n" + parsed_reflection["Reflection"]
653
653
  else:
654
- self.log_progress("Self Reflection skipped based on user request.")
654
+ self.log_progress(
655
+ {"log": "Self Reflection skipped based on user request."}
656
+ )
655
657
  break
656
658
  # '<ANSWER>' is a symbol to indicate the end of the chat, which is useful for streaming logs.
657
659
  self.log_progress(
658
- f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</ANSWER>"
660
+ {
661
+ "log": f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</ANSWER>"
662
+ }
659
663
  )
660
664
 
661
665
  if visualize_output:
@@ -718,8 +722,10 @@ class VisionAgent(Agent):
718
722
  }
719
723
 
720
724
  self.log_progress(
721
- f"""Going to run the following tool(s) in sequence:
725
+ {
726
+ "log": f"""Going to run the following tool(s) in sequence:
722
727
  {tabulate(tabular_data=[tool_results], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
728
+ }
723
729
  )
724
730
 
725
731
  def parse_tool_results(result: Dict[str, Union[Dict, List]]) -> Any:
@@ -764,7 +770,9 @@ class VisionAgent(Agent):
764
770
  else:
765
771
  task_list = []
766
772
  self.log_progress(
767
- f"""Planned tasks:
768
- {tabulate(task_list, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
773
+ {
774
+ "log": "Planned tasks:",
775
+ "plan": task_list,
776
+ }
769
777
  )
770
778
  return task_list
@@ -165,7 +165,7 @@ def write_and_exec_code(
165
165
  tool_info: str,
166
166
  exec: Execute,
167
167
  retrieved_ltm: str,
168
- log_progress: Callable[..., str],
168
+ log_progress: Callable[[Dict[str, Any]], None],
169
169
  max_retry: int = 3,
170
170
  verbosity: int = 0,
171
171
  ) -> Tuple[bool, str, str, Dict[str, List[str]]]:
@@ -179,7 +179,23 @@ def write_and_exec_code(
179
179
  success, result = exec.run_isolation(code)
180
180
  if verbosity == 2:
181
181
  _CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
182
- log_progress(f"\tCode success: {success}\n\tResult: {str(result)}", code)
182
+ log_progress(
183
+ {
184
+ "log": f"Code success: {success}",
185
+ }
186
+ )
187
+ log_progress(
188
+ {
189
+ "log": "Code:",
190
+ "code": code,
191
+ }
192
+ )
193
+ log_progress(
194
+ {
195
+ "log": "Result:",
196
+ "result": str(result),
197
+ }
198
+ )
183
199
  _LOGGER.info(f"\tCode success: {success}, result: {str(result)}")
184
200
  working_memory: Dict[str, List[str]] = {}
185
201
  while not success and counter < max_retry:
@@ -206,7 +222,18 @@ def write_and_exec_code(
206
222
  _CONSOLE.print(
207
223
  Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
208
224
  )
209
- log_progress(f"\tDebugging reflection: {reflection}\n\tResult: {result}")
225
+ log_progress(
226
+ {
227
+ "log": "Debugging reflection:",
228
+ "reflection": reflection,
229
+ }
230
+ )
231
+ log_progress(
232
+ {
233
+ "log": "Result:",
234
+ "result": result,
235
+ }
236
+ )
210
237
  _LOGGER.info(f"\tDebugging reflection: {reflection}, result: {result}")
211
238
 
212
239
  if success:
@@ -227,7 +254,7 @@ def run_plan(
227
254
  exec: Execute,
228
255
  code: str,
229
256
  tool_recommender: Sim,
230
- log_progress: Callable[..., str],
257
+ log_progress: Callable[[Dict[str, Any]], None],
231
258
  long_term_memory: Optional[Sim] = None,
232
259
  verbosity: int = 0,
233
260
  ) -> Tuple[str, str, List[Dict[str, Any]], Dict[str, List[str]]]:
@@ -239,8 +266,7 @@ def run_plan(
239
266
 
240
267
  for task in active_plan:
241
268
  log_progress(
242
- f"""Going to run the following task(s) in sequence:
243
- {tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
269
+ {"log": "Going to run the following task(s) in sequence:", "task": task}
244
270
  )
245
271
  _LOGGER.info(
246
272
  f"""
@@ -250,7 +276,7 @@ def run_plan(
250
276
  tool_info = "\n".join([e["doc"] for e in tools])
251
277
 
252
278
  if verbosity == 2:
253
- log_progress(f"Tools retrieved: {[e['desc'] for e in tools]}")
279
+ log_progress({"log": f"Tools retrieved: {[e['desc'] for e in tools]}"})
254
280
  _LOGGER.info(f"Tools retrieved: {[e['desc'] for e in tools]}")
255
281
 
256
282
  if long_term_memory is not None:
@@ -282,7 +308,17 @@ def run_plan(
282
308
  Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
283
309
  )
284
310
 
285
- log_progress(f"\tCode success: {success}\n\tResult: {str(result)}")
311
+ log_progress(
312
+ {
313
+ "log": f"Code success: {success}",
314
+ }
315
+ )
316
+ log_progress(
317
+ {
318
+ "log": "Result:",
319
+ "result": str(result),
320
+ }
321
+ )
286
322
  _LOGGER.info(f"\tCode success: {success} result: {str(result)}")
287
323
 
288
324
  task["success"] = success
@@ -320,7 +356,7 @@ class VisionAgentV2(Agent):
320
356
  tool_recommender: Optional[Sim] = None,
321
357
  long_term_memory: Optional[Sim] = None,
322
358
  verbosity: int = 0,
323
- report_progress_callback: Optional[Callable[..., Any]] = None,
359
+ report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
324
360
  ) -> None:
325
361
  self.planner = OpenAILLM(temperature=0.0, json_mode=True)
326
362
  self.coder = OpenAILLM(temperature=0.0)
@@ -376,8 +412,10 @@ class VisionAgentV2(Agent):
376
412
 
377
413
  user_req, plan = write_plan(chat, plan, TOOL_DESCRIPTIONS, self.planner)
378
414
  self.log_progress(
379
- f"""Plan:
380
- {tabulate(tabular_data=plan, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
415
+ {
416
+ "log": "Plans:",
417
+ "plan": plan,
418
+ }
381
419
  )
382
420
  _LOGGER.info(
383
421
  f"""Plan:
@@ -412,8 +450,12 @@ class VisionAgentV2(Agent):
412
450
 
413
451
  retries += 1
414
452
 
415
- self.log_progress("The Vision Agent V2 has concluded this chat.")
416
- self.log_progress(f"<ANSWER>Plan success: {success}</ANSWER>")
453
+ self.log_progress(
454
+ {
455
+ "log": f"The Vision Agent V2 has concluded this chat.\nSuccess: {success}",
456
+ "finished": True,
457
+ }
458
+ )
417
459
 
418
460
  return {
419
461
  "code": working_code,
@@ -423,7 +465,7 @@ class VisionAgentV2(Agent):
423
465
  "plan": plan,
424
466
  }
425
467
 
426
- def log_progress(self, description: str, code: Optional[str] = "") -> None:
468
+ def log_progress(self, data: Dict[str, Any]) -> None:
427
469
  if self.report_progress_callback is not None:
428
- self.report_progress_callback(description, code)
470
+ self.report_progress_callback(data)
429
471
  pass
@@ -3,7 +3,7 @@ import json
3
3
  import logging
4
4
  import sys
5
5
  from pathlib import Path
6
- from typing import Any, Dict, List, Optional, Union, cast
6
+ from typing import Any, Dict, List, Optional, Union, cast, Callable, no_type_check
7
7
 
8
8
  from rich.console import Console
9
9
  from rich.syntax import Syntax
@@ -114,8 +114,10 @@ def write_and_test_code(
114
114
  coder: LLM,
115
115
  tester: LLM,
116
116
  debugger: LLM,
117
+ log_progress: Callable[[Dict[str, Any]], None],
117
118
  verbosity: int = 0,
118
119
  max_retries: int = 3,
120
+ input_media: Optional[Union[str, Path]] = None,
119
121
  ) -> Dict[str, Any]:
120
122
  code = extract_code(
121
123
  coder(CODE.format(docstring=tool_info, question=task, feedback=working_memory))
@@ -123,18 +125,40 @@ def write_and_test_code(
123
125
  test = extract_code(
124
126
  tester(
125
127
  SIMPLE_TEST.format(
126
- docstring=tool_utils, question=task, code=code, feedback=working_memory
128
+ docstring=tool_utils,
129
+ question=task,
130
+ code=code,
131
+ feedback=working_memory,
132
+ media=input_media,
127
133
  )
128
134
  )
129
135
  )
130
136
 
131
137
  success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
132
138
  if verbosity == 2:
133
- _LOGGER.info("First code and tests:")
139
+ _LOGGER.info("Initial code and tests:")
140
+ log_progress(
141
+ {
142
+ "log": "Code:",
143
+ "code": code,
144
+ }
145
+ )
146
+ log_progress(
147
+ {
148
+ "log": "Test:",
149
+ "code": test,
150
+ }
151
+ )
134
152
  _CONSOLE.print(
135
153
  Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
136
154
  )
137
- _LOGGER.info(f"First result: {result}")
155
+ log_progress(
156
+ {
157
+ "log": "Result:",
158
+ "result": result,
159
+ }
160
+ )
161
+ _LOGGER.info(f"Initial result: {result}")
138
162
 
139
163
  count = 0
140
164
  new_working_memory = []
@@ -156,6 +180,12 @@ def write_and_test_code(
156
180
 
157
181
  success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
158
182
  if verbosity == 2:
183
+ log_progress(
184
+ {
185
+ "log": f"Debug attempt {count + 1}, reflection:",
186
+ "result": fixed_code_and_test["reflections"],
187
+ }
188
+ )
159
189
  _LOGGER.info(
160
190
  f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
161
191
  )
@@ -164,25 +194,36 @@ def write_and_test_code(
164
194
  f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True
165
195
  )
166
196
  )
197
+ log_progress(
198
+ {
199
+ "log": "Debug result:",
200
+ "result": result,
201
+ }
202
+ )
167
203
  _LOGGER.info(f"Debug result: {result}")
168
204
  count += 1
169
205
 
170
- if verbosity == 1:
206
+ if verbosity >= 1:
207
+ _LOGGER.info("Final code and tests:")
171
208
  _CONSOLE.print(
172
209
  Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
173
210
  )
174
- _LOGGER.info(f"Result: {result}")
211
+ _LOGGER.info(f"Final Result: {result}")
175
212
 
176
213
  return {
177
214
  "code": code,
178
215
  "test": test,
179
216
  "success": success,
217
+ "test_result": result,
180
218
  "working_memory": new_working_memory,
181
219
  }
182
220
 
183
221
 
184
222
  def retrieve_tools(
185
- plan: List[Dict[str, str]], tool_recommender: Sim, verbosity: int = 0
223
+ plan: List[Dict[str, str]],
224
+ tool_recommender: Sim,
225
+ log_progress: Callable[[Dict[str, Any]], None],
226
+ verbosity: int = 0,
186
227
  ) -> str:
187
228
  tool_info = []
188
229
  tool_desc = []
@@ -191,6 +232,12 @@ def retrieve_tools(
191
232
  tool_info.extend([e["doc"] for e in tools])
192
233
  tool_desc.extend([e["desc"] for e in tools])
193
234
  if verbosity == 2:
235
+ log_progress(
236
+ {
237
+ "log": "Retrieved tools:",
238
+ "tools": tool_desc,
239
+ }
240
+ )
194
241
  _LOGGER.info(f"Tools: {tool_desc}")
195
242
  tool_info_set = set(tool_info)
196
243
  return "\n\n".join(tool_info_set)
@@ -206,6 +253,7 @@ class VisionAgentV3(Agent):
206
253
  debugger: Optional[LLM] = None,
207
254
  tool_recommender: Optional[Sim] = None,
208
255
  verbosity: int = 0,
256
+ report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
209
257
  ) -> None:
210
258
  self.planner = (
211
259
  OpenAILLM(temperature=0.0, json_mode=True) if planner is None else planner
@@ -222,22 +270,26 @@ class VisionAgentV3(Agent):
222
270
  else tool_recommender
223
271
  )
224
272
  self.verbosity = verbosity
225
- self.max_retries = 3
273
+ self.max_retries = 2
274
+ self.report_progress_callback = report_progress_callback
226
275
 
276
+ @no_type_check
227
277
  def __call__(
228
278
  self,
229
279
  input: Union[List[Dict[str, str]], str],
230
280
  image: Optional[Union[str, Path]] = None,
231
- ) -> str:
281
+ ) -> Dict[str, Any]:
232
282
  if isinstance(input, str):
233
283
  input = [{"role": "user", "content": input}]
234
284
  results = self.chat_with_workflow(input, image)
235
- return results["code"] # type: ignore
285
+ results.pop("working_memory")
286
+ return results
236
287
 
237
288
  def chat_with_workflow(
238
289
  self,
239
290
  chat: List[Dict[str, str]],
240
291
  image: Optional[Union[str, Path]] = None,
292
+ self_reflection: bool = False,
241
293
  ) -> Dict[str, Any]:
242
294
  if len(chat) == 0:
243
295
  raise ValueError("Chat cannot be empty.")
@@ -260,7 +312,14 @@ class VisionAgentV3(Agent):
260
312
  chat, TOOL_DESCRIPTIONS, format_memory(working_memory), self.planner
261
313
  )
262
314
  plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
263
- if self.verbosity == 1 or self.verbosity == 2:
315
+ if self.verbosity >= 1:
316
+ self.log_progress(
317
+ {
318
+ "log": "Going to run the following plan(s) in sequence:\n",
319
+ "plan": plan_i,
320
+ }
321
+ )
322
+
264
323
  _LOGGER.info(
265
324
  f"""
266
325
  {tabulate(tabular_data=plan_i, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
@@ -269,6 +328,7 @@ class VisionAgentV3(Agent):
269
328
  tool_info = retrieve_tools(
270
329
  plan_i,
271
330
  self.tool_recommender,
331
+ self.log_progress,
272
332
  self.verbosity,
273
333
  )
274
334
  results = write_and_test_code(
@@ -279,7 +339,9 @@ class VisionAgentV3(Agent):
279
339
  self.coder,
280
340
  self.tester,
281
341
  self.debugger,
342
+ self.log_progress,
282
343
  verbosity=self.verbosity,
344
+ input_media=image,
283
345
  )
284
346
  success = cast(bool, results["success"])
285
347
  code = cast(str, results["code"])
@@ -287,19 +349,38 @@ class VisionAgentV3(Agent):
287
349
  working_memory.extend(results["working_memory"]) # type: ignore
288
350
  plan.append({"code": code, "test": test, "plan": plan_i})
289
351
 
290
- reflection = reflect(chat, plan_i_str, code, self.planner)
291
- if self.verbosity > 0:
292
- _LOGGER.info(f"Reflection: {reflection}")
293
- feedback = cast(str, reflection["feedback"])
294
- success = cast(bool, reflection["success"])
295
- working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
352
+ if self_reflection:
353
+ reflection = reflect(chat, plan_i_str, code, self.planner)
354
+ if self.verbosity > 0:
355
+ self.log_progress(
356
+ {
357
+ "log": "Reflection:",
358
+ "reflection": reflection,
359
+ }
360
+ )
361
+ _LOGGER.info(f"Reflection: {reflection}")
362
+ feedback = cast(str, reflection["feedback"])
363
+ success = cast(bool, reflection["success"])
364
+ working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
365
+
366
+ retries += 1
367
+
368
+ self.log_progress(
369
+ {
370
+ "log": f"The Vision Agent V3 has concluded this chat.\nSuccess: {success}",
371
+ "finished": True,
372
+ }
373
+ )
296
374
 
297
375
  return {
298
376
  "code": code,
299
377
  "test": test,
378
+ "test_result": results["test_result"],
300
379
  "plan": plan,
301
380
  "working_memory": working_memory,
302
381
  }
303
382
 
304
- def log_progress(self, description: str) -> None:
383
+ def log_progress(self, data: Dict[str, Any]) -> None:
384
+ if self.report_progress_callback is not None:
385
+ self.report_progress_callback(data)
305
386
  pass
@@ -61,6 +61,7 @@ This is the documentation for the functions you have access to. You may call any
61
61
  2. **Algorithm/Method Selection**: Decide on the most efficient way.
62
62
  3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode.
63
63
  4. **Code Generation**: Translate your pseudocode into executable Python code.
64
+ 5. **Logging**: Log the output of the custom functions that were provided to you from `from vision_agent.tools.tools_v2 import *`. Use a debug flag in the function parameters to toggle logging on and off.
64
65
  """
65
66
 
66
67
  TEST = """
@@ -149,7 +150,7 @@ This is the documentation for the functions you have access to. You may call any
149
150
 
150
151
  **Input Code Snippet**:
151
152
  ```python
152
- ### Please decided how would you want to generate test cases. Based on incomplete code or completed version.
153
+ ### Please decide how would you want to generate test cases. Based on incomplete code or completed version.
153
154
  {code}
154
155
  ```
155
156
 
@@ -159,8 +160,12 @@ This is the documentation for the functions you have access to. You may call any
159
160
  **Instructions**:
160
161
  1. Verify the fundamental functionality under normal conditions.
161
162
  2. Ensure each test case is well-documented with comments explaining the scenario it covers.
162
- 3. DO NOT use any files that are not provided by the user's instructions, your test must be run and will crash if it tries to load a non-existent file.
163
- 4. DO NOT mock any functions, you must test their functionality as is.
163
+ 3. Your test case MUST run only on the given image which is {media}
164
+ 4. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
165
+ 5. DO NOT mock any functions, you must test their functionality as is.
166
+ 6. DO NOT assert the output value, run the code and verify it runs without any errors and assert only the output format or data structure.
167
+ 7. DO NOT import the testing function as it will available in the testing environment.
168
+ 8. Print the output of the function that is being tested.
164
169
  """
165
170
 
166
171
 
@@ -416,12 +416,15 @@ def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
416
416
  return cast(float, np.min(dist_matrix))
417
417
 
418
418
 
419
- def closest_box_distance(box1: List[float], box2: List[float]) -> float:
419
+ def closest_box_distance(
420
+ box1: List[float], box2: List[float], image_size: Tuple[int, int]
421
+ ) -> float:
420
422
  """'closest_box_distance' calculates the closest distance between two bounding boxes.
421
423
 
422
424
  Parameters:
423
425
  box1 (List[float]): The first bounding box.
424
426
  box2 (List[float]): The second bounding box.
427
+ image_size (Tuple[int, int]): The size of the image given as (height, width).
425
428
 
426
429
  Returns:
427
430
  float: The closest distance between the two bounding boxes.
@@ -432,8 +435,8 @@ def closest_box_distance(box1: List[float], box2: List[float]) -> float:
432
435
  141.42
433
436
  """
434
437
 
435
- x11, y11, x12, y12 = box1
436
- x21, y21, x22, y22 = box2
438
+ x11, y11, x12, y12 = denormalize_bbox(box1, image_size)
439
+ x21, y21, x22, y22 = denormalize_bbox(box2, image_size)
437
440
 
438
441
  horizontal_distance = np.max([0, x21 - x12, x11 - x22])
439
442
  vertical_distance = np.max([0, y21 - y12, y11 - y22])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.27
3
+ Version: 0.2.29
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -1,18 +1,18 @@
1
1
  vision_agent/__init__.py,sha256=GVLHCeK_R-zgldpbcPmOzJat-BkadvkuRCMxDvTIcXs,108
2
2
  vision_agent/agent/__init__.py,sha256=jpmL6z5e4PFfQM21JbSsRwcERRXn58XFmURAMwWeoRM,249
3
- vision_agent/agent/agent.py,sha256=X7kON-g9ePUKumCDaYfQNBX_MEFE-ax5PnRp7-Cc5Wo,529
4
- vision_agent/agent/agent_coder.py,sha256=4iB732bX4wDnPAuyYBk6HWlf4aFq2l9EcL695qfDIXw,7004
3
+ vision_agent/agent/agent.py,sha256=-HblUCrTZdxsP-MlSBzSfCTm5vELLnfb40l2L-bkVHw,538
4
+ vision_agent/agent/agent_coder.py,sha256=d4N0I7QMKOdGoI-0ZRQ3Fp2QusAHbbJNe8AXg1IyRk4,7006
5
5
  vision_agent/agent/agent_coder_prompts.py,sha256=CJe3v7xvHQ32u3RQAXQga_Tk_4UgU64RBAMHZ3S70KY,5538
6
6
  vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMVg,11511
7
7
  vision_agent/agent/easytool_prompts.py,sha256=Bikw-PPLkm78dwywTlnv32Y1Tw6JMeC-R7oCnXWLcTk,4656
8
8
  vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6wdM,10506
9
9
  vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
10
- vision_agent/agent/vision_agent.py,sha256=pnx7gtTPazR7Dck5_kfZC3S3QWKu4e28YVigzOicOX0,27130
10
+ vision_agent/agent/vision_agent.py,sha256=Rs7O0PXc2J9FlrpBa3UGs5NjqQT51Y507klQf9fC0UY,27281
11
11
  vision_agent/agent/vision_agent_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
12
- vision_agent/agent/vision_agent_v2.py,sha256=eQS5w0aURWWCc0x1dqlApep65DKttePR-ZQPSxkWuvw,14487
12
+ vision_agent/agent/vision_agent_v2.py,sha256=t2D1mMUYEv1dFeMrkEUVbDEdArunb7F1ZeYB8qijU2w,15109
13
13
  vision_agent/agent/vision_agent_v2_prompts.py,sha256=b_0BMq6GrbGfl09MHrv4mj-mqyE1FxMl3Xq44qD4S1E,6161
14
- vision_agent/agent/vision_agent_v3.py,sha256=EGA3zQKVIVdDlZOWwZNgueMnlqKqNwGvSc9v_XM-b34,9696
15
- vision_agent/agent/vision_agent_v3_prompts.py,sha256=LRZBKObeb0Bs48vo7vtB2M8loPO1lQzruH-3IiMS5ts,7484
14
+ vision_agent/agent/vision_agent_v3.py,sha256=MbczVWVaTY6d8OGJl5cQpX55AvfP5d_5F1YBqC29sR8,12123
15
+ vision_agent/agent/vision_agent_v3_prompts.py,sha256=ApubrCj72R7sBsRo43WVnckMVj-vqGxcMkSrC9kF8bM,7999
16
16
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
18
18
  vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=dRHXGpjhItXZRQs0r_l3Z3bQIreaZaYP0CJrl8mOJx
23
23
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
24
24
  vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
25
25
  vision_agent/tools/tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
26
- vision_agent/tools/tools_v2.py,sha256=3Bv1xuZFoPjaCb-VixF5Vl3uoyac03571FXUzBI8FBQ,21404
26
+ vision_agent/tools/tools_v2.py,sha256=mio0A1l5QcyRC5IgaD4Trfqg7hFTZ8rOjx1dYivwb4Q,21585
27
27
  vision_agent/utils/__init__.py,sha256=xsHFyJSDbLdonB9Dh74cwZnVTiT__2OQF3Brd3Nmglc,116
28
28
  vision_agent/utils/execute.py,sha256=8_SfK-IkHH4lXF0JVyV7sDFszZn9HKsh1bFITKGCJ1g,3881
29
29
  vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
30
30
  vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
31
31
  vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
32
32
  vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
33
- vision_agent-0.2.27.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- vision_agent-0.2.27.dist-info/METADATA,sha256=rnWYNUve9b4hBvZp5hlNCz_B_7PMb5mhjj_zo6al-O0,9212
35
- vision_agent-0.2.27.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
- vision_agent-0.2.27.dist-info/RECORD,,
33
+ vision_agent-0.2.29.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ vision_agent-0.2.29.dist-info/METADATA,sha256=WK1rUZk9YdAglrze7ubf4_dx0ipyBRAb980-a_uabLc,9212
35
+ vision_agent-0.2.29.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
+ vision_agent-0.2.29.dist-info/RECORD,,