vision-agent 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from pathlib import Path
3
- from typing import Dict, List, Optional, Union, Any
3
+ from typing import Any, Dict, List, Optional, Union
4
4
 
5
5
 
6
6
  class Agent(ABC):
@@ -3,7 +3,7 @@ import logging
3
3
  import os
4
4
  import sys
5
5
  from pathlib import Path
6
- from typing import Dict, List, Optional, Union, Any
6
+ from typing import Any, Dict, List, Optional, Union
7
7
 
8
8
  from rich.console import Console
9
9
  from rich.syntax import Syntax
@@ -3,7 +3,7 @@ import json
3
3
  import logging
4
4
  import sys
5
5
  from pathlib import Path
6
- from typing import Any, Dict, List, Optional, Union, cast, Callable
6
+ from typing import Any, Callable, Dict, List, Optional, Union, cast, no_type_check
7
7
 
8
8
  from rich.console import Console
9
9
  from rich.syntax import Syntax
@@ -14,6 +14,7 @@ from vision_agent.agent.vision_agent_v3_prompts import (
14
14
  CODE,
15
15
  FEEDBACK,
16
16
  FIX_BUG,
17
+ FULL_TASK,
17
18
  PLAN,
18
19
  REFLECT,
19
20
  SIMPLE_TEST,
@@ -117,6 +118,7 @@ def write_and_test_code(
117
118
  log_progress: Callable[[Dict[str, Any]], None],
118
119
  verbosity: int = 0,
119
120
  max_retries: int = 3,
121
+ input_media: Optional[Union[str, Path]] = None,
120
122
  ) -> Dict[str, Any]:
121
123
  code = extract_code(
122
124
  coder(CODE.format(docstring=tool_info, question=task, feedback=working_memory))
@@ -124,14 +126,18 @@ def write_and_test_code(
124
126
  test = extract_code(
125
127
  tester(
126
128
  SIMPLE_TEST.format(
127
- docstring=tool_utils, question=task, code=code, feedback=working_memory
129
+ docstring=tool_utils,
130
+ question=task,
131
+ code=code,
132
+ feedback=working_memory,
133
+ media=input_media,
128
134
  )
129
135
  )
130
136
  )
131
137
 
132
138
  success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
133
139
  if verbosity == 2:
134
- _LOGGER.info("First code and tests:")
140
+ _LOGGER.info("Initial code and tests:")
135
141
  log_progress(
136
142
  {
137
143
  "log": "Code:",
@@ -153,7 +159,7 @@ def write_and_test_code(
153
159
  "result": result,
154
160
  }
155
161
  )
156
- _LOGGER.info(f"First result: {result}")
162
+ _LOGGER.info(f"Initial result: {result}")
157
163
 
158
164
  count = 0
159
165
  new_working_memory = []
@@ -198,16 +204,18 @@ def write_and_test_code(
198
204
  _LOGGER.info(f"Debug result: {result}")
199
205
  count += 1
200
206
 
201
- if verbosity == 1:
207
+ if verbosity >= 1:
208
+ _LOGGER.info("Final code and tests:")
202
209
  _CONSOLE.print(
203
210
  Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
204
211
  )
205
- _LOGGER.info(f"Result: {result}")
212
+ _LOGGER.info(f"Final Result: {result}")
206
213
 
207
214
  return {
208
215
  "code": code,
209
216
  "test": test,
210
217
  "success": success,
218
+ "test_result": result,
211
219
  "working_memory": new_working_memory,
212
220
  }
213
221
 
@@ -263,23 +271,26 @@ class VisionAgentV3(Agent):
263
271
  else tool_recommender
264
272
  )
265
273
  self.verbosity = verbosity
266
- self.max_retries = 3
274
+ self.max_retries = 2
267
275
  self.report_progress_callback = report_progress_callback
268
276
 
277
+ @no_type_check
269
278
  def __call__(
270
279
  self,
271
280
  input: Union[List[Dict[str, str]], str],
272
281
  image: Optional[Union[str, Path]] = None,
273
- ) -> str:
282
+ ) -> Dict[str, Any]:
274
283
  if isinstance(input, str):
275
284
  input = [{"role": "user", "content": input}]
276
285
  results = self.chat_with_workflow(input, image)
277
- return results["code"] # type: ignore
286
+ results.pop("working_memory")
287
+ return results
278
288
 
279
289
  def chat_with_workflow(
280
290
  self,
281
291
  chat: List[Dict[str, str]],
282
292
  image: Optional[Union[str, Path]] = None,
293
+ self_reflection: bool = False,
283
294
  ) -> Dict[str, Any]:
284
295
  if len(chat) == 0:
285
296
  raise ValueError("Chat cannot be empty.")
@@ -302,13 +313,14 @@ class VisionAgentV3(Agent):
302
313
  chat, TOOL_DESCRIPTIONS, format_memory(working_memory), self.planner
303
314
  )
304
315
  plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
305
- if self.verbosity == 1 or self.verbosity == 2:
316
+ if self.verbosity >= 1:
306
317
  self.log_progress(
307
318
  {
308
319
  "log": "Going to run the following plan(s) in sequence:\n",
309
320
  "plan": plan_i,
310
321
  }
311
322
  )
323
+
312
324
  _LOGGER.info(
313
325
  f"""
314
326
  {tabulate(tabular_data=plan_i, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
@@ -321,7 +333,7 @@ class VisionAgentV3(Agent):
321
333
  self.verbosity,
322
334
  )
323
335
  results = write_and_test_code(
324
- plan_i_str,
336
+ FULL_TASK.format(user_request=chat[0]["content"], subtasks=plan_i_str),
325
337
  tool_info,
326
338
  UTILITIES_DOCSTRING,
327
339
  format_memory(working_memory),
@@ -330,6 +342,7 @@ class VisionAgentV3(Agent):
330
342
  self.debugger,
331
343
  self.log_progress,
332
344
  verbosity=self.verbosity,
345
+ input_media=image,
333
346
  )
334
347
  success = cast(bool, results["success"])
335
348
  code = cast(str, results["code"])
@@ -337,18 +350,28 @@ class VisionAgentV3(Agent):
337
350
  working_memory.extend(results["working_memory"]) # type: ignore
338
351
  plan.append({"code": code, "test": test, "plan": plan_i})
339
352
 
340
- reflection = reflect(chat, plan_i_str, code, self.planner)
341
- if self.verbosity > 0:
342
- self.log_progress(
343
- {
344
- "log": "Reflection:",
345
- "reflection": reflection,
346
- }
353
+ if self_reflection:
354
+ reflection = reflect(
355
+ chat,
356
+ FULL_TASK.format(
357
+ user_request=chat[0]["content"], subtasks=plan_i_str
358
+ ),
359
+ code,
360
+ self.planner,
347
361
  )
348
- _LOGGER.info(f"Reflection: {reflection}")
349
- feedback = cast(str, reflection["feedback"])
350
- success = cast(bool, reflection["success"])
351
- working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
362
+ if self.verbosity > 0:
363
+ self.log_progress(
364
+ {
365
+ "log": "Reflection:",
366
+ "reflection": reflection,
367
+ }
368
+ )
369
+ _LOGGER.info(f"Reflection: {reflection}")
370
+ feedback = cast(str, reflection["feedback"])
371
+ success = cast(bool, reflection["success"])
372
+ working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
373
+
374
+ retries += 1
352
375
 
353
376
  self.log_progress(
354
377
  {
@@ -360,6 +383,7 @@ class VisionAgentV3(Agent):
360
383
  return {
361
384
  "code": code,
362
385
  "test": test,
386
+ "test_result": results["test_result"],
363
387
  "plan": plan,
364
388
  "working_memory": working_memory,
365
389
  }
@@ -3,6 +3,14 @@ USER_REQ = """
3
3
  {user_request}
4
4
  """
5
5
 
6
+ FULL_TASK = """
7
+ ## User Request
8
+ {user_request}
9
+
10
+ ## Subtasks
11
+ {subtasks}
12
+ """
13
+
6
14
  FEEDBACK = """
7
15
  ## This contains code and feedback from previous runs and is used for providing context so you do not make the same mistake again.
8
16
 
@@ -61,6 +69,7 @@ This is the documentation for the functions you have access to. You may call any
61
69
  2. **Algorithm/Method Selection**: Decide on the most efficient way.
62
70
  3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode.
63
71
  4. **Code Generation**: Translate your pseudocode into executable Python code.
72
+ 5. **Logging**: Log the output of the custom functions that were provided to you from `from vision_agent.tools.tools_v2 import *`. Use a debug flag in the function parameters to toggle logging on and off.
64
73
  """
65
74
 
66
75
  TEST = """
@@ -149,7 +158,7 @@ This is the documentation for the functions you have access to. You may call any
149
158
 
150
159
  **Input Code Snippet**:
151
160
  ```python
152
- ### Please decided how would you want to generate test cases. Based on incomplete code or completed version.
161
+ ### Please decide how would you want to generate test cases. Based on incomplete code or completed version.
153
162
  {code}
154
163
  ```
155
164
 
@@ -159,13 +168,17 @@ This is the documentation for the functions you have access to. You may call any
159
168
  **Instructions**:
160
169
  1. Verify the fundamental functionality under normal conditions.
161
170
  2. Ensure each test case is well-documented with comments explaining the scenario it covers.
162
- 3. DO NOT use any files that are not provided by the user's instructions, your test must be run and will crash if it tries to load a non-existent file.
163
- 4. DO NOT mock any functions, you must test their functionality as is.
171
+ 3. Your test case MUST run only on the given image which is {media}
172
+ 4. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
173
+ 5. DO NOT mock any functions, you must test their functionality as is.
174
+ 6. DO NOT assert the output value, run the code and verify it runs without any errors and assert only the output format or data structure.
175
+ 7. DO NOT import the testing function as it will available in the testing environment.
176
+ 8. Print the output of the function that is being tested.
164
177
  """
165
178
 
166
179
 
167
180
  FIX_BUG = """
168
- **Role** As a coder, your job is to find the error in the code and fix it. You are running in a notebook setting so feel free to run !pip install to install missing packages.
181
+ **Role** As a coder, your job is to find the error in the code and fix it. You are running in a notebook setting so you can run !pip install to install missing packages.
169
182
 
170
183
  **Instructions**:
171
184
  Please re-complete the code to fix the error message. Here is the previous version:
@@ -416,12 +416,15 @@ def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
416
416
  return cast(float, np.min(dist_matrix))
417
417
 
418
418
 
419
- def closest_box_distance(box1: List[float], box2: List[float]) -> float:
419
+ def closest_box_distance(
420
+ box1: List[float], box2: List[float], image_size: Tuple[int, int]
421
+ ) -> float:
420
422
  """'closest_box_distance' calculates the closest distance between two bounding boxes.
421
423
 
422
424
  Parameters:
423
425
  box1 (List[float]): The first bounding box.
424
426
  box2 (List[float]): The second bounding box.
427
+ image_size (Tuple[int, int]): The size of the image given as (height, width).
425
428
 
426
429
  Returns:
427
430
  float: The closest distance between the two bounding boxes.
@@ -432,8 +435,8 @@ def closest_box_distance(box1: List[float], box2: List[float]) -> float:
432
435
  141.42
433
436
  """
434
437
 
435
- x11, y11, x12, y12 = box1
436
- x21, y21, x22, y22 = box2
438
+ x11, y11, x12, y12 = denormalize_bbox(box1, image_size)
439
+ x21, y21, x22, y22 = denormalize_bbox(box2, image_size)
437
440
 
438
441
  horizontal_distance = np.max([0, x21 - x12, x11 - x22])
439
442
  vertical_distance = np.max([0, y21 - y12, y11 - y22])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.28
3
+ Version: 0.2.30
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -1,7 +1,7 @@
1
1
  vision_agent/__init__.py,sha256=GVLHCeK_R-zgldpbcPmOzJat-BkadvkuRCMxDvTIcXs,108
2
2
  vision_agent/agent/__init__.py,sha256=jpmL6z5e4PFfQM21JbSsRwcERRXn58XFmURAMwWeoRM,249
3
- vision_agent/agent/agent.py,sha256=-HblUCrTZdxsP-MlSBzSfCTm5vELLnfb40l2L-bkVHw,538
4
- vision_agent/agent/agent_coder.py,sha256=d4N0I7QMKOdGoI-0ZRQ3Fp2QusAHbbJNe8AXg1IyRk4,7006
3
+ vision_agent/agent/agent.py,sha256=4buKL_7PA6q_Ktlf26FxfX0JxRGrL-swYk0xJuYNVz4,538
4
+ vision_agent/agent/agent_coder.py,sha256=4Neo6qM9-J8sJ-PKqSaUHr28SYm43IjEvhDK8BfDosE,7006
5
5
  vision_agent/agent/agent_coder_prompts.py,sha256=CJe3v7xvHQ32u3RQAXQga_Tk_4UgU64RBAMHZ3S70KY,5538
6
6
  vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMVg,11511
7
7
  vision_agent/agent/easytool_prompts.py,sha256=Bikw-PPLkm78dwywTlnv32Y1Tw6JMeC-R7oCnXWLcTk,4656
@@ -11,8 +11,8 @@ vision_agent/agent/vision_agent.py,sha256=Rs7O0PXc2J9FlrpBa3UGs5NjqQT51Y507klQf9
11
11
  vision_agent/agent/vision_agent_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
12
12
  vision_agent/agent/vision_agent_v2.py,sha256=t2D1mMUYEv1dFeMrkEUVbDEdArunb7F1ZeYB8qijU2w,15109
13
13
  vision_agent/agent/vision_agent_v2_prompts.py,sha256=b_0BMq6GrbGfl09MHrv4mj-mqyE1FxMl3Xq44qD4S1E,6161
14
- vision_agent/agent/vision_agent_v3.py,sha256=y6BB2Mv9vj5OT4b829GgVmbiHX24MGMsz0gncgVy-4g,11632
15
- vision_agent/agent/vision_agent_v3_prompts.py,sha256=LRZBKObeb0Bs48vo7vtB2M8loPO1lQzruH-3IiMS5ts,7484
14
+ vision_agent/agent/vision_agent_v3.py,sha256=jPU__NueKQwFzIoJd0zzg6z9q7IDQa9QPaxt8Qlca98,12403
15
+ vision_agent/agent/vision_agent_v3_prompts.py,sha256=ejedMNDluVYZjHOIXKN98LzX-pOHin2DJhCyZUWULNE,8070
16
16
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
18
18
  vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=dRHXGpjhItXZRQs0r_l3Z3bQIreaZaYP0CJrl8mOJx
23
23
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
24
24
  vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
25
25
  vision_agent/tools/tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
26
- vision_agent/tools/tools_v2.py,sha256=3Bv1xuZFoPjaCb-VixF5Vl3uoyac03571FXUzBI8FBQ,21404
26
+ vision_agent/tools/tools_v2.py,sha256=mio0A1l5QcyRC5IgaD4Trfqg7hFTZ8rOjx1dYivwb4Q,21585
27
27
  vision_agent/utils/__init__.py,sha256=xsHFyJSDbLdonB9Dh74cwZnVTiT__2OQF3Brd3Nmglc,116
28
28
  vision_agent/utils/execute.py,sha256=8_SfK-IkHH4lXF0JVyV7sDFszZn9HKsh1bFITKGCJ1g,3881
29
29
  vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
30
30
  vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
31
31
  vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
32
32
  vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
33
- vision_agent-0.2.28.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- vision_agent-0.2.28.dist-info/METADATA,sha256=Vnz8N9UhaUvp6ljocoVpXIIuW26Q2Jf1vFQpfdXIXgg,9212
35
- vision_agent-0.2.28.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
- vision_agent-0.2.28.dist-info/RECORD,,
33
+ vision_agent-0.2.30.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ vision_agent-0.2.30.dist-info/METADATA,sha256=uVj7XfG4Hat1Bed9FYM2dipIseooN4AHY-Tl4rSPOIg,9212
35
+ vision_agent-0.2.30.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
+ vision_agent-0.2.30.dist-info/RECORD,,