vision-agent 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/agent.py +1 -1
- vision_agent/agent/agent_coder.py +1 -1
- vision_agent/agent/vision_agent_v3.py +46 -22
- vision_agent/agent/vision_agent_v3_prompts.py +17 -4
- vision_agent/tools/tools_v2.py +6 -3
- {vision_agent-0.2.28.dist-info → vision_agent-0.2.30.dist-info}/METADATA +1 -1
- {vision_agent-0.2.28.dist-info → vision_agent-0.2.30.dist-info}/RECORD +9 -9
- {vision_agent-0.2.28.dist-info → vision_agent-0.2.30.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.28.dist-info → vision_agent-0.2.30.dist-info}/WHEEL +0 -0
vision_agent/agent/agent.py
CHANGED
@@ -3,7 +3,7 @@ import json
|
|
3
3
|
import logging
|
4
4
|
import sys
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Any, Dict, List, Optional, Union, cast,
|
6
|
+
from typing import Any, Callable, Dict, List, Optional, Union, cast, no_type_check
|
7
7
|
|
8
8
|
from rich.console import Console
|
9
9
|
from rich.syntax import Syntax
|
@@ -14,6 +14,7 @@ from vision_agent.agent.vision_agent_v3_prompts import (
|
|
14
14
|
CODE,
|
15
15
|
FEEDBACK,
|
16
16
|
FIX_BUG,
|
17
|
+
FULL_TASK,
|
17
18
|
PLAN,
|
18
19
|
REFLECT,
|
19
20
|
SIMPLE_TEST,
|
@@ -117,6 +118,7 @@ def write_and_test_code(
|
|
117
118
|
log_progress: Callable[[Dict[str, Any]], None],
|
118
119
|
verbosity: int = 0,
|
119
120
|
max_retries: int = 3,
|
121
|
+
input_media: Optional[Union[str, Path]] = None,
|
120
122
|
) -> Dict[str, Any]:
|
121
123
|
code = extract_code(
|
122
124
|
coder(CODE.format(docstring=tool_info, question=task, feedback=working_memory))
|
@@ -124,14 +126,18 @@ def write_and_test_code(
|
|
124
126
|
test = extract_code(
|
125
127
|
tester(
|
126
128
|
SIMPLE_TEST.format(
|
127
|
-
docstring=tool_utils,
|
129
|
+
docstring=tool_utils,
|
130
|
+
question=task,
|
131
|
+
code=code,
|
132
|
+
feedback=working_memory,
|
133
|
+
media=input_media,
|
128
134
|
)
|
129
135
|
)
|
130
136
|
)
|
131
137
|
|
132
138
|
success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
|
133
139
|
if verbosity == 2:
|
134
|
-
_LOGGER.info("
|
140
|
+
_LOGGER.info("Initial code and tests:")
|
135
141
|
log_progress(
|
136
142
|
{
|
137
143
|
"log": "Code:",
|
@@ -153,7 +159,7 @@ def write_and_test_code(
|
|
153
159
|
"result": result,
|
154
160
|
}
|
155
161
|
)
|
156
|
-
_LOGGER.info(f"
|
162
|
+
_LOGGER.info(f"Initial result: {result}")
|
157
163
|
|
158
164
|
count = 0
|
159
165
|
new_working_memory = []
|
@@ -198,16 +204,18 @@ def write_and_test_code(
|
|
198
204
|
_LOGGER.info(f"Debug result: {result}")
|
199
205
|
count += 1
|
200
206
|
|
201
|
-
if verbosity
|
207
|
+
if verbosity >= 1:
|
208
|
+
_LOGGER.info("Final code and tests:")
|
202
209
|
_CONSOLE.print(
|
203
210
|
Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
|
204
211
|
)
|
205
|
-
_LOGGER.info(f"Result: {result}")
|
212
|
+
_LOGGER.info(f"Final Result: {result}")
|
206
213
|
|
207
214
|
return {
|
208
215
|
"code": code,
|
209
216
|
"test": test,
|
210
217
|
"success": success,
|
218
|
+
"test_result": result,
|
211
219
|
"working_memory": new_working_memory,
|
212
220
|
}
|
213
221
|
|
@@ -263,23 +271,26 @@ class VisionAgentV3(Agent):
|
|
263
271
|
else tool_recommender
|
264
272
|
)
|
265
273
|
self.verbosity = verbosity
|
266
|
-
self.max_retries =
|
274
|
+
self.max_retries = 2
|
267
275
|
self.report_progress_callback = report_progress_callback
|
268
276
|
|
277
|
+
@no_type_check
|
269
278
|
def __call__(
|
270
279
|
self,
|
271
280
|
input: Union[List[Dict[str, str]], str],
|
272
281
|
image: Optional[Union[str, Path]] = None,
|
273
|
-
) -> str:
|
282
|
+
) -> Dict[str, Any]:
|
274
283
|
if isinstance(input, str):
|
275
284
|
input = [{"role": "user", "content": input}]
|
276
285
|
results = self.chat_with_workflow(input, image)
|
277
|
-
|
286
|
+
results.pop("working_memory")
|
287
|
+
return results
|
278
288
|
|
279
289
|
def chat_with_workflow(
|
280
290
|
self,
|
281
291
|
chat: List[Dict[str, str]],
|
282
292
|
image: Optional[Union[str, Path]] = None,
|
293
|
+
self_reflection: bool = False,
|
283
294
|
) -> Dict[str, Any]:
|
284
295
|
if len(chat) == 0:
|
285
296
|
raise ValueError("Chat cannot be empty.")
|
@@ -302,13 +313,14 @@ class VisionAgentV3(Agent):
|
|
302
313
|
chat, TOOL_DESCRIPTIONS, format_memory(working_memory), self.planner
|
303
314
|
)
|
304
315
|
plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
|
305
|
-
if self.verbosity
|
316
|
+
if self.verbosity >= 1:
|
306
317
|
self.log_progress(
|
307
318
|
{
|
308
319
|
"log": "Going to run the following plan(s) in sequence:\n",
|
309
320
|
"plan": plan_i,
|
310
321
|
}
|
311
322
|
)
|
323
|
+
|
312
324
|
_LOGGER.info(
|
313
325
|
f"""
|
314
326
|
{tabulate(tabular_data=plan_i, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
@@ -321,7 +333,7 @@ class VisionAgentV3(Agent):
|
|
321
333
|
self.verbosity,
|
322
334
|
)
|
323
335
|
results = write_and_test_code(
|
324
|
-
plan_i_str,
|
336
|
+
FULL_TASK.format(user_request=chat[0]["content"], subtasks=plan_i_str),
|
325
337
|
tool_info,
|
326
338
|
UTILITIES_DOCSTRING,
|
327
339
|
format_memory(working_memory),
|
@@ -330,6 +342,7 @@ class VisionAgentV3(Agent):
|
|
330
342
|
self.debugger,
|
331
343
|
self.log_progress,
|
332
344
|
verbosity=self.verbosity,
|
345
|
+
input_media=image,
|
333
346
|
)
|
334
347
|
success = cast(bool, results["success"])
|
335
348
|
code = cast(str, results["code"])
|
@@ -337,18 +350,28 @@ class VisionAgentV3(Agent):
|
|
337
350
|
working_memory.extend(results["working_memory"]) # type: ignore
|
338
351
|
plan.append({"code": code, "test": test, "plan": plan_i})
|
339
352
|
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
"
|
345
|
-
|
346
|
-
|
353
|
+
if self_reflection:
|
354
|
+
reflection = reflect(
|
355
|
+
chat,
|
356
|
+
FULL_TASK.format(
|
357
|
+
user_request=chat[0]["content"], subtasks=plan_i_str
|
358
|
+
),
|
359
|
+
code,
|
360
|
+
self.planner,
|
347
361
|
)
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
362
|
+
if self.verbosity > 0:
|
363
|
+
self.log_progress(
|
364
|
+
{
|
365
|
+
"log": "Reflection:",
|
366
|
+
"reflection": reflection,
|
367
|
+
}
|
368
|
+
)
|
369
|
+
_LOGGER.info(f"Reflection: {reflection}")
|
370
|
+
feedback = cast(str, reflection["feedback"])
|
371
|
+
success = cast(bool, reflection["success"])
|
372
|
+
working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
|
373
|
+
|
374
|
+
retries += 1
|
352
375
|
|
353
376
|
self.log_progress(
|
354
377
|
{
|
@@ -360,6 +383,7 @@ class VisionAgentV3(Agent):
|
|
360
383
|
return {
|
361
384
|
"code": code,
|
362
385
|
"test": test,
|
386
|
+
"test_result": results["test_result"],
|
363
387
|
"plan": plan,
|
364
388
|
"working_memory": working_memory,
|
365
389
|
}
|
@@ -3,6 +3,14 @@ USER_REQ = """
|
|
3
3
|
{user_request}
|
4
4
|
"""
|
5
5
|
|
6
|
+
FULL_TASK = """
|
7
|
+
## User Request
|
8
|
+
{user_request}
|
9
|
+
|
10
|
+
## Subtasks
|
11
|
+
{subtasks}
|
12
|
+
"""
|
13
|
+
|
6
14
|
FEEDBACK = """
|
7
15
|
## This contains code and feedback from previous runs and is used for providing context so you do not make the same mistake again.
|
8
16
|
|
@@ -61,6 +69,7 @@ This is the documentation for the functions you have access to. You may call any
|
|
61
69
|
2. **Algorithm/Method Selection**: Decide on the most efficient way.
|
62
70
|
3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode.
|
63
71
|
4. **Code Generation**: Translate your pseudocode into executable Python code.
|
72
|
+
5. **Logging**: Log the output of the custom functions that were provided to you from `from vision_agent.tools.tools_v2 import *`. Use a debug flag in the function parameters to toggle logging on and off.
|
64
73
|
"""
|
65
74
|
|
66
75
|
TEST = """
|
@@ -149,7 +158,7 @@ This is the documentation for the functions you have access to. You may call any
|
|
149
158
|
|
150
159
|
**Input Code Snippet**:
|
151
160
|
```python
|
152
|
-
### Please
|
161
|
+
### Please decide how would you want to generate test cases. Based on incomplete code or completed version.
|
153
162
|
{code}
|
154
163
|
```
|
155
164
|
|
@@ -159,13 +168,17 @@ This is the documentation for the functions you have access to. You may call any
|
|
159
168
|
**Instructions**:
|
160
169
|
1. Verify the fundamental functionality under normal conditions.
|
161
170
|
2. Ensure each test case is well-documented with comments explaining the scenario it covers.
|
162
|
-
3.
|
163
|
-
4. DO NOT
|
171
|
+
3. Your test case MUST run only on the given image which is {media}
|
172
|
+
4. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
|
173
|
+
5. DO NOT mock any functions, you must test their functionality as is.
|
174
|
+
6. DO NOT assert the output value, run the code and verify it runs without any errors and assert only the output format or data structure.
|
175
|
+
7. DO NOT import the testing function as it will available in the testing environment.
|
176
|
+
8. Print the output of the function that is being tested.
|
164
177
|
"""
|
165
178
|
|
166
179
|
|
167
180
|
FIX_BUG = """
|
168
|
-
**Role** As a coder, your job is to find the error in the code and fix it. You are running in a notebook setting so
|
181
|
+
**Role** As a coder, your job is to find the error in the code and fix it. You are running in a notebook setting so you can run !pip install to install missing packages.
|
169
182
|
|
170
183
|
**Instructions**:
|
171
184
|
Please re-complete the code to fix the error message. Here is the previous version:
|
vision_agent/tools/tools_v2.py
CHANGED
@@ -416,12 +416,15 @@ def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
|
|
416
416
|
return cast(float, np.min(dist_matrix))
|
417
417
|
|
418
418
|
|
419
|
-
def closest_box_distance(
|
419
|
+
def closest_box_distance(
|
420
|
+
box1: List[float], box2: List[float], image_size: Tuple[int, int]
|
421
|
+
) -> float:
|
420
422
|
"""'closest_box_distance' calculates the closest distance between two bounding boxes.
|
421
423
|
|
422
424
|
Parameters:
|
423
425
|
box1 (List[float]): The first bounding box.
|
424
426
|
box2 (List[float]): The second bounding box.
|
427
|
+
image_size (Tuple[int, int]): The size of the image given as (height, width).
|
425
428
|
|
426
429
|
Returns:
|
427
430
|
float: The closest distance between the two bounding boxes.
|
@@ -432,8 +435,8 @@ def closest_box_distance(box1: List[float], box2: List[float]) -> float:
|
|
432
435
|
141.42
|
433
436
|
"""
|
434
437
|
|
435
|
-
x11, y11, x12, y12 = box1
|
436
|
-
x21, y21, x22, y22 = box2
|
438
|
+
x11, y11, x12, y12 = denormalize_bbox(box1, image_size)
|
439
|
+
x21, y21, x22, y22 = denormalize_bbox(box2, image_size)
|
437
440
|
|
438
441
|
horizontal_distance = np.max([0, x21 - x12, x11 - x22])
|
439
442
|
vertical_distance = np.max([0, y21 - y12, y11 - y22])
|
@@ -1,7 +1,7 @@
|
|
1
1
|
vision_agent/__init__.py,sha256=GVLHCeK_R-zgldpbcPmOzJat-BkadvkuRCMxDvTIcXs,108
|
2
2
|
vision_agent/agent/__init__.py,sha256=jpmL6z5e4PFfQM21JbSsRwcERRXn58XFmURAMwWeoRM,249
|
3
|
-
vision_agent/agent/agent.py,sha256
|
4
|
-
vision_agent/agent/agent_coder.py,sha256=
|
3
|
+
vision_agent/agent/agent.py,sha256=4buKL_7PA6q_Ktlf26FxfX0JxRGrL-swYk0xJuYNVz4,538
|
4
|
+
vision_agent/agent/agent_coder.py,sha256=4Neo6qM9-J8sJ-PKqSaUHr28SYm43IjEvhDK8BfDosE,7006
|
5
5
|
vision_agent/agent/agent_coder_prompts.py,sha256=CJe3v7xvHQ32u3RQAXQga_Tk_4UgU64RBAMHZ3S70KY,5538
|
6
6
|
vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMVg,11511
|
7
7
|
vision_agent/agent/easytool_prompts.py,sha256=Bikw-PPLkm78dwywTlnv32Y1Tw6JMeC-R7oCnXWLcTk,4656
|
@@ -11,8 +11,8 @@ vision_agent/agent/vision_agent.py,sha256=Rs7O0PXc2J9FlrpBa3UGs5NjqQT51Y507klQf9
|
|
11
11
|
vision_agent/agent/vision_agent_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
|
12
12
|
vision_agent/agent/vision_agent_v2.py,sha256=t2D1mMUYEv1dFeMrkEUVbDEdArunb7F1ZeYB8qijU2w,15109
|
13
13
|
vision_agent/agent/vision_agent_v2_prompts.py,sha256=b_0BMq6GrbGfl09MHrv4mj-mqyE1FxMl3Xq44qD4S1E,6161
|
14
|
-
vision_agent/agent/vision_agent_v3.py,sha256=
|
15
|
-
vision_agent/agent/vision_agent_v3_prompts.py,sha256=
|
14
|
+
vision_agent/agent/vision_agent_v3.py,sha256=jPU__NueKQwFzIoJd0zzg6z9q7IDQa9QPaxt8Qlca98,12403
|
15
|
+
vision_agent/agent/vision_agent_v3_prompts.py,sha256=ejedMNDluVYZjHOIXKN98LzX-pOHin2DJhCyZUWULNE,8070
|
16
16
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
18
18
|
vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
|
@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=dRHXGpjhItXZRQs0r_l3Z3bQIreaZaYP0CJrl8mOJx
|
|
23
23
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
24
24
|
vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
|
25
25
|
vision_agent/tools/tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
|
26
|
-
vision_agent/tools/tools_v2.py,sha256=
|
26
|
+
vision_agent/tools/tools_v2.py,sha256=mio0A1l5QcyRC5IgaD4Trfqg7hFTZ8rOjx1dYivwb4Q,21585
|
27
27
|
vision_agent/utils/__init__.py,sha256=xsHFyJSDbLdonB9Dh74cwZnVTiT__2OQF3Brd3Nmglc,116
|
28
28
|
vision_agent/utils/execute.py,sha256=8_SfK-IkHH4lXF0JVyV7sDFszZn9HKsh1bFITKGCJ1g,3881
|
29
29
|
vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
|
30
30
|
vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
|
31
31
|
vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
|
32
32
|
vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
36
|
-
vision_agent-0.2.
|
33
|
+
vision_agent-0.2.30.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
34
|
+
vision_agent-0.2.30.dist-info/METADATA,sha256=uVj7XfG4Hat1Bed9FYM2dipIseooN4AHY-Tl4rSPOIg,9212
|
35
|
+
vision_agent-0.2.30.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
36
|
+
vision_agent-0.2.30.dist-info/RECORD,,
|
File without changes
|
File without changes
|