vision-agent 0.2.28__py3-none-any.whl → 0.2.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ import json
3
3
  import logging
4
4
  import sys
5
5
  from pathlib import Path
6
- from typing import Any, Dict, List, Optional, Union, cast, Callable
6
+ from typing import Any, Dict, List, Optional, Union, cast, Callable, no_type_check
7
7
 
8
8
  from rich.console import Console
9
9
  from rich.syntax import Syntax
@@ -117,6 +117,7 @@ def write_and_test_code(
117
117
  log_progress: Callable[[Dict[str, Any]], None],
118
118
  verbosity: int = 0,
119
119
  max_retries: int = 3,
120
+ input_media: Optional[Union[str, Path]] = None,
120
121
  ) -> Dict[str, Any]:
121
122
  code = extract_code(
122
123
  coder(CODE.format(docstring=tool_info, question=task, feedback=working_memory))
@@ -124,14 +125,18 @@ def write_and_test_code(
124
125
  test = extract_code(
125
126
  tester(
126
127
  SIMPLE_TEST.format(
127
- docstring=tool_utils, question=task, code=code, feedback=working_memory
128
+ docstring=tool_utils,
129
+ question=task,
130
+ code=code,
131
+ feedback=working_memory,
132
+ media=input_media,
128
133
  )
129
134
  )
130
135
  )
131
136
 
132
137
  success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
133
138
  if verbosity == 2:
134
- _LOGGER.info("First code and tests:")
139
+ _LOGGER.info("Initial code and tests:")
135
140
  log_progress(
136
141
  {
137
142
  "log": "Code:",
@@ -153,7 +158,7 @@ def write_and_test_code(
153
158
  "result": result,
154
159
  }
155
160
  )
156
- _LOGGER.info(f"First result: {result}")
161
+ _LOGGER.info(f"Initial result: {result}")
157
162
 
158
163
  count = 0
159
164
  new_working_memory = []
@@ -198,16 +203,18 @@ def write_and_test_code(
198
203
  _LOGGER.info(f"Debug result: {result}")
199
204
  count += 1
200
205
 
201
- if verbosity == 1:
206
+ if verbosity >= 1:
207
+ _LOGGER.info("Final code and tests:")
202
208
  _CONSOLE.print(
203
209
  Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
204
210
  )
205
- _LOGGER.info(f"Result: {result}")
211
+ _LOGGER.info(f"Final Result: {result}")
206
212
 
207
213
  return {
208
214
  "code": code,
209
215
  "test": test,
210
216
  "success": success,
217
+ "test_result": result,
211
218
  "working_memory": new_working_memory,
212
219
  }
213
220
 
@@ -263,23 +270,26 @@ class VisionAgentV3(Agent):
263
270
  else tool_recommender
264
271
  )
265
272
  self.verbosity = verbosity
266
- self.max_retries = 3
273
+ self.max_retries = 2
267
274
  self.report_progress_callback = report_progress_callback
268
275
 
276
+ @no_type_check
269
277
  def __call__(
270
278
  self,
271
279
  input: Union[List[Dict[str, str]], str],
272
280
  image: Optional[Union[str, Path]] = None,
273
- ) -> str:
281
+ ) -> Dict[str, Any]:
274
282
  if isinstance(input, str):
275
283
  input = [{"role": "user", "content": input}]
276
284
  results = self.chat_with_workflow(input, image)
277
- return results["code"] # type: ignore
285
+ results.pop("working_memory")
286
+ return results
278
287
 
279
288
  def chat_with_workflow(
280
289
  self,
281
290
  chat: List[Dict[str, str]],
282
291
  image: Optional[Union[str, Path]] = None,
292
+ self_reflection: bool = False,
283
293
  ) -> Dict[str, Any]:
284
294
  if len(chat) == 0:
285
295
  raise ValueError("Chat cannot be empty.")
@@ -302,13 +312,14 @@ class VisionAgentV3(Agent):
302
312
  chat, TOOL_DESCRIPTIONS, format_memory(working_memory), self.planner
303
313
  )
304
314
  plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
305
- if self.verbosity == 1 or self.verbosity == 2:
315
+ if self.verbosity >= 1:
306
316
  self.log_progress(
307
317
  {
308
318
  "log": "Going to run the following plan(s) in sequence:\n",
309
319
  "plan": plan_i,
310
320
  }
311
321
  )
322
+
312
323
  _LOGGER.info(
313
324
  f"""
314
325
  {tabulate(tabular_data=plan_i, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
@@ -330,6 +341,7 @@ class VisionAgentV3(Agent):
330
341
  self.debugger,
331
342
  self.log_progress,
332
343
  verbosity=self.verbosity,
344
+ input_media=image,
333
345
  )
334
346
  success = cast(bool, results["success"])
335
347
  code = cast(str, results["code"])
@@ -337,18 +349,21 @@ class VisionAgentV3(Agent):
337
349
  working_memory.extend(results["working_memory"]) # type: ignore
338
350
  plan.append({"code": code, "test": test, "plan": plan_i})
339
351
 
340
- reflection = reflect(chat, plan_i_str, code, self.planner)
341
- if self.verbosity > 0:
342
- self.log_progress(
343
- {
344
- "log": "Reflection:",
345
- "reflection": reflection,
346
- }
347
- )
348
- _LOGGER.info(f"Reflection: {reflection}")
349
- feedback = cast(str, reflection["feedback"])
350
- success = cast(bool, reflection["success"])
351
- working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
352
+ if self_reflection:
353
+ reflection = reflect(chat, plan_i_str, code, self.planner)
354
+ if self.verbosity > 0:
355
+ self.log_progress(
356
+ {
357
+ "log": "Reflection:",
358
+ "reflection": reflection,
359
+ }
360
+ )
361
+ _LOGGER.info(f"Reflection: {reflection}")
362
+ feedback = cast(str, reflection["feedback"])
363
+ success = cast(bool, reflection["success"])
364
+ working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
365
+
366
+ retries += 1
352
367
 
353
368
  self.log_progress(
354
369
  {
@@ -360,6 +375,7 @@ class VisionAgentV3(Agent):
360
375
  return {
361
376
  "code": code,
362
377
  "test": test,
378
+ "test_result": results["test_result"],
363
379
  "plan": plan,
364
380
  "working_memory": working_memory,
365
381
  }
@@ -61,6 +61,7 @@ This is the documentation for the functions you have access to. You may call any
61
61
  2. **Algorithm/Method Selection**: Decide on the most efficient way.
62
62
  3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode.
63
63
  4. **Code Generation**: Translate your pseudocode into executable Python code.
64
+ 5. **Logging**: Log the output of the custom functions that were provided to you from `from vision_agent.tools.tools_v2 import *`. Use a debug flag in the function parameters to toggle logging on and off.
64
65
  """
65
66
 
66
67
  TEST = """
@@ -149,7 +150,7 @@ This is the documentation for the functions you have access to. You may call any
149
150
 
150
151
  **Input Code Snippet**:
151
152
  ```python
152
- ### Please decided how would you want to generate test cases. Based on incomplete code or completed version.
153
+ ### Please decide how would you want to generate test cases. Based on incomplete code or completed version.
153
154
  {code}
154
155
  ```
155
156
 
@@ -159,8 +160,12 @@ This is the documentation for the functions you have access to. You may call any
159
160
  **Instructions**:
160
161
  1. Verify the fundamental functionality under normal conditions.
161
162
  2. Ensure each test case is well-documented with comments explaining the scenario it covers.
162
- 3. DO NOT use any files that are not provided by the user's instructions, your test must be run and will crash if it tries to load a non-existent file.
163
- 4. DO NOT mock any functions, you must test their functionality as is.
163
+ 3. Your test case MUST run only on the given image which is {media}
164
+ 4. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
165
+ 5. DO NOT mock any functions, you must test their functionality as is.
166
+ 6. DO NOT assert the output value, run the code and verify it runs without any errors and assert only the output format or data structure.
167
+ 7. DO NOT import the testing function as it will available in the testing environment.
168
+ 8. Print the output of the function that is being tested.
164
169
  """
165
170
 
166
171
 
@@ -416,12 +416,15 @@ def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
416
416
  return cast(float, np.min(dist_matrix))
417
417
 
418
418
 
419
- def closest_box_distance(box1: List[float], box2: List[float]) -> float:
419
+ def closest_box_distance(
420
+ box1: List[float], box2: List[float], image_size: Tuple[int, int]
421
+ ) -> float:
420
422
  """'closest_box_distance' calculates the closest distance between two bounding boxes.
421
423
 
422
424
  Parameters:
423
425
  box1 (List[float]): The first bounding box.
424
426
  box2 (List[float]): The second bounding box.
427
+ image_size (Tuple[int, int]): The size of the image given as (height, width).
425
428
 
426
429
  Returns:
427
430
  float: The closest distance between the two bounding boxes.
@@ -432,8 +435,8 @@ def closest_box_distance(box1: List[float], box2: List[float]) -> float:
432
435
  141.42
433
436
  """
434
437
 
435
- x11, y11, x12, y12 = box1
436
- x21, y21, x22, y22 = box2
438
+ x11, y11, x12, y12 = denormalize_bbox(box1, image_size)
439
+ x21, y21, x22, y22 = denormalize_bbox(box2, image_size)
437
440
 
438
441
  horizontal_distance = np.max([0, x21 - x12, x11 - x22])
439
442
  vertical_distance = np.max([0, y21 - y12, y11 - y22])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.28
3
+ Version: 0.2.29
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -11,8 +11,8 @@ vision_agent/agent/vision_agent.py,sha256=Rs7O0PXc2J9FlrpBa3UGs5NjqQT51Y507klQf9
11
11
  vision_agent/agent/vision_agent_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
12
12
  vision_agent/agent/vision_agent_v2.py,sha256=t2D1mMUYEv1dFeMrkEUVbDEdArunb7F1ZeYB8qijU2w,15109
13
13
  vision_agent/agent/vision_agent_v2_prompts.py,sha256=b_0BMq6GrbGfl09MHrv4mj-mqyE1FxMl3Xq44qD4S1E,6161
14
- vision_agent/agent/vision_agent_v3.py,sha256=y6BB2Mv9vj5OT4b829GgVmbiHX24MGMsz0gncgVy-4g,11632
15
- vision_agent/agent/vision_agent_v3_prompts.py,sha256=LRZBKObeb0Bs48vo7vtB2M8loPO1lQzruH-3IiMS5ts,7484
14
+ vision_agent/agent/vision_agent_v3.py,sha256=MbczVWVaTY6d8OGJl5cQpX55AvfP5d_5F1YBqC29sR8,12123
15
+ vision_agent/agent/vision_agent_v3_prompts.py,sha256=ApubrCj72R7sBsRo43WVnckMVj-vqGxcMkSrC9kF8bM,7999
16
16
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
18
18
  vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=dRHXGpjhItXZRQs0r_l3Z3bQIreaZaYP0CJrl8mOJx
23
23
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
24
24
  vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
25
25
  vision_agent/tools/tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
26
- vision_agent/tools/tools_v2.py,sha256=3Bv1xuZFoPjaCb-VixF5Vl3uoyac03571FXUzBI8FBQ,21404
26
+ vision_agent/tools/tools_v2.py,sha256=mio0A1l5QcyRC5IgaD4Trfqg7hFTZ8rOjx1dYivwb4Q,21585
27
27
  vision_agent/utils/__init__.py,sha256=xsHFyJSDbLdonB9Dh74cwZnVTiT__2OQF3Brd3Nmglc,116
28
28
  vision_agent/utils/execute.py,sha256=8_SfK-IkHH4lXF0JVyV7sDFszZn9HKsh1bFITKGCJ1g,3881
29
29
  vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
30
30
  vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
31
31
  vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
32
32
  vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
33
- vision_agent-0.2.28.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- vision_agent-0.2.28.dist-info/METADATA,sha256=Vnz8N9UhaUvp6ljocoVpXIIuW26Q2Jf1vFQpfdXIXgg,9212
35
- vision_agent-0.2.28.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
- vision_agent-0.2.28.dist-info/RECORD,,
33
+ vision_agent-0.2.29.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ vision_agent-0.2.29.dist-info/METADATA,sha256=WK1rUZk9YdAglrze7ubf4_dx0ipyBRAb980-a_uabLc,9212
35
+ vision_agent-0.2.29.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
+ vision_agent-0.2.29.dist-info/RECORD,,