vision-agent 0.2.28__tar.gz → 0.2.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vision_agent-0.2.28 → vision_agent-0.2.29}/PKG-INFO +1 -1
- {vision_agent-0.2.28 → vision_agent-0.2.29}/pyproject.toml +1 -1
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/vision_agent_v3.py +38 -22
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/vision_agent_v3_prompts.py +8 -3
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/tools/tools_v2.py +6 -3
- {vision_agent-0.2.28 → vision_agent-0.2.29}/LICENSE +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/README.md +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/agent_coder.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/agent_coder_prompts.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/easytool.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/easytool_prompts.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/reflexion.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/reflexion_prompts.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/vision_agent_v2.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/agent/vision_agent_v2_prompts.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/llm/__init__.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/llm/llm.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/tools/tools.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.28 → vision_agent-0.2.29}/vision_agent/utils/video.py +0 -0
@@ -3,7 +3,7 @@ import json
|
|
3
3
|
import logging
|
4
4
|
import sys
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Any, Dict, List, Optional, Union, cast, Callable
|
6
|
+
from typing import Any, Dict, List, Optional, Union, cast, Callable, no_type_check
|
7
7
|
|
8
8
|
from rich.console import Console
|
9
9
|
from rich.syntax import Syntax
|
@@ -117,6 +117,7 @@ def write_and_test_code(
|
|
117
117
|
log_progress: Callable[[Dict[str, Any]], None],
|
118
118
|
verbosity: int = 0,
|
119
119
|
max_retries: int = 3,
|
120
|
+
input_media: Optional[Union[str, Path]] = None,
|
120
121
|
) -> Dict[str, Any]:
|
121
122
|
code = extract_code(
|
122
123
|
coder(CODE.format(docstring=tool_info, question=task, feedback=working_memory))
|
@@ -124,14 +125,18 @@ def write_and_test_code(
|
|
124
125
|
test = extract_code(
|
125
126
|
tester(
|
126
127
|
SIMPLE_TEST.format(
|
127
|
-
docstring=tool_utils,
|
128
|
+
docstring=tool_utils,
|
129
|
+
question=task,
|
130
|
+
code=code,
|
131
|
+
feedback=working_memory,
|
132
|
+
media=input_media,
|
128
133
|
)
|
129
134
|
)
|
130
135
|
)
|
131
136
|
|
132
137
|
success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
|
133
138
|
if verbosity == 2:
|
134
|
-
_LOGGER.info("
|
139
|
+
_LOGGER.info("Initial code and tests:")
|
135
140
|
log_progress(
|
136
141
|
{
|
137
142
|
"log": "Code:",
|
@@ -153,7 +158,7 @@ def write_and_test_code(
|
|
153
158
|
"result": result,
|
154
159
|
}
|
155
160
|
)
|
156
|
-
_LOGGER.info(f"
|
161
|
+
_LOGGER.info(f"Initial result: {result}")
|
157
162
|
|
158
163
|
count = 0
|
159
164
|
new_working_memory = []
|
@@ -198,16 +203,18 @@ def write_and_test_code(
|
|
198
203
|
_LOGGER.info(f"Debug result: {result}")
|
199
204
|
count += 1
|
200
205
|
|
201
|
-
if verbosity
|
206
|
+
if verbosity >= 1:
|
207
|
+
_LOGGER.info("Final code and tests:")
|
202
208
|
_CONSOLE.print(
|
203
209
|
Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
|
204
210
|
)
|
205
|
-
_LOGGER.info(f"Result: {result}")
|
211
|
+
_LOGGER.info(f"Final Result: {result}")
|
206
212
|
|
207
213
|
return {
|
208
214
|
"code": code,
|
209
215
|
"test": test,
|
210
216
|
"success": success,
|
217
|
+
"test_result": result,
|
211
218
|
"working_memory": new_working_memory,
|
212
219
|
}
|
213
220
|
|
@@ -263,23 +270,26 @@ class VisionAgentV3(Agent):
|
|
263
270
|
else tool_recommender
|
264
271
|
)
|
265
272
|
self.verbosity = verbosity
|
266
|
-
self.max_retries =
|
273
|
+
self.max_retries = 2
|
267
274
|
self.report_progress_callback = report_progress_callback
|
268
275
|
|
276
|
+
@no_type_check
|
269
277
|
def __call__(
|
270
278
|
self,
|
271
279
|
input: Union[List[Dict[str, str]], str],
|
272
280
|
image: Optional[Union[str, Path]] = None,
|
273
|
-
) -> str:
|
281
|
+
) -> Dict[str, Any]:
|
274
282
|
if isinstance(input, str):
|
275
283
|
input = [{"role": "user", "content": input}]
|
276
284
|
results = self.chat_with_workflow(input, image)
|
277
|
-
|
285
|
+
results.pop("working_memory")
|
286
|
+
return results
|
278
287
|
|
279
288
|
def chat_with_workflow(
|
280
289
|
self,
|
281
290
|
chat: List[Dict[str, str]],
|
282
291
|
image: Optional[Union[str, Path]] = None,
|
292
|
+
self_reflection: bool = False,
|
283
293
|
) -> Dict[str, Any]:
|
284
294
|
if len(chat) == 0:
|
285
295
|
raise ValueError("Chat cannot be empty.")
|
@@ -302,13 +312,14 @@ class VisionAgentV3(Agent):
|
|
302
312
|
chat, TOOL_DESCRIPTIONS, format_memory(working_memory), self.planner
|
303
313
|
)
|
304
314
|
plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
|
305
|
-
if self.verbosity
|
315
|
+
if self.verbosity >= 1:
|
306
316
|
self.log_progress(
|
307
317
|
{
|
308
318
|
"log": "Going to run the following plan(s) in sequence:\n",
|
309
319
|
"plan": plan_i,
|
310
320
|
}
|
311
321
|
)
|
322
|
+
|
312
323
|
_LOGGER.info(
|
313
324
|
f"""
|
314
325
|
{tabulate(tabular_data=plan_i, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
@@ -330,6 +341,7 @@ class VisionAgentV3(Agent):
|
|
330
341
|
self.debugger,
|
331
342
|
self.log_progress,
|
332
343
|
verbosity=self.verbosity,
|
344
|
+
input_media=image,
|
333
345
|
)
|
334
346
|
success = cast(bool, results["success"])
|
335
347
|
code = cast(str, results["code"])
|
@@ -337,18 +349,21 @@ class VisionAgentV3(Agent):
|
|
337
349
|
working_memory.extend(results["working_memory"]) # type: ignore
|
338
350
|
plan.append({"code": code, "test": test, "plan": plan_i})
|
339
351
|
|
340
|
-
|
341
|
-
|
342
|
-
self.
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
+
if self_reflection:
|
353
|
+
reflection = reflect(chat, plan_i_str, code, self.planner)
|
354
|
+
if self.verbosity > 0:
|
355
|
+
self.log_progress(
|
356
|
+
{
|
357
|
+
"log": "Reflection:",
|
358
|
+
"reflection": reflection,
|
359
|
+
}
|
360
|
+
)
|
361
|
+
_LOGGER.info(f"Reflection: {reflection}")
|
362
|
+
feedback = cast(str, reflection["feedback"])
|
363
|
+
success = cast(bool, reflection["success"])
|
364
|
+
working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
|
365
|
+
|
366
|
+
retries += 1
|
352
367
|
|
353
368
|
self.log_progress(
|
354
369
|
{
|
@@ -360,6 +375,7 @@ class VisionAgentV3(Agent):
|
|
360
375
|
return {
|
361
376
|
"code": code,
|
362
377
|
"test": test,
|
378
|
+
"test_result": results["test_result"],
|
363
379
|
"plan": plan,
|
364
380
|
"working_memory": working_memory,
|
365
381
|
}
|
@@ -61,6 +61,7 @@ This is the documentation for the functions you have access to. You may call any
|
|
61
61
|
2. **Algorithm/Method Selection**: Decide on the most efficient way.
|
62
62
|
3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode.
|
63
63
|
4. **Code Generation**: Translate your pseudocode into executable Python code.
|
64
|
+
5. **Logging**: Log the output of the custom functions that were provided to you from `from vision_agent.tools.tools_v2 import *`. Use a debug flag in the function parameters to toggle logging on and off.
|
64
65
|
"""
|
65
66
|
|
66
67
|
TEST = """
|
@@ -149,7 +150,7 @@ This is the documentation for the functions you have access to. You may call any
|
|
149
150
|
|
150
151
|
**Input Code Snippet**:
|
151
152
|
```python
|
152
|
-
### Please
|
153
|
+
### Please decide how would you want to generate test cases. Based on incomplete code or completed version.
|
153
154
|
{code}
|
154
155
|
```
|
155
156
|
|
@@ -159,8 +160,12 @@ This is the documentation for the functions you have access to. You may call any
|
|
159
160
|
**Instructions**:
|
160
161
|
1. Verify the fundamental functionality under normal conditions.
|
161
162
|
2. Ensure each test case is well-documented with comments explaining the scenario it covers.
|
162
|
-
3.
|
163
|
-
4. DO NOT
|
163
|
+
3. Your test case MUST run only on the given image which is {media}
|
164
|
+
4. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
|
165
|
+
5. DO NOT mock any functions, you must test their functionality as is.
|
166
|
+
6. DO NOT assert the output value, run the code and verify it runs without any errors and assert only the output format or data structure.
|
167
|
+
7. DO NOT import the testing function as it will available in the testing environment.
|
168
|
+
8. Print the output of the function that is being tested.
|
164
169
|
"""
|
165
170
|
|
166
171
|
|
@@ -416,12 +416,15 @@ def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
|
|
416
416
|
return cast(float, np.min(dist_matrix))
|
417
417
|
|
418
418
|
|
419
|
-
def closest_box_distance(
|
419
|
+
def closest_box_distance(
|
420
|
+
box1: List[float], box2: List[float], image_size: Tuple[int, int]
|
421
|
+
) -> float:
|
420
422
|
"""'closest_box_distance' calculates the closest distance between two bounding boxes.
|
421
423
|
|
422
424
|
Parameters:
|
423
425
|
box1 (List[float]): The first bounding box.
|
424
426
|
box2 (List[float]): The second bounding box.
|
427
|
+
image_size (Tuple[int, int]): The size of the image given as (height, width).
|
425
428
|
|
426
429
|
Returns:
|
427
430
|
float: The closest distance between the two bounding boxes.
|
@@ -432,8 +435,8 @@ def closest_box_distance(box1: List[float], box2: List[float]) -> float:
|
|
432
435
|
141.42
|
433
436
|
"""
|
434
437
|
|
435
|
-
x11, y11, x12, y12 = box1
|
436
|
-
x21, y21, x22, y22 = box2
|
438
|
+
x11, y11, x12, y12 = denormalize_bbox(box1, image_size)
|
439
|
+
x21, y21, x22, y22 = denormalize_bbox(box2, image_size)
|
437
440
|
|
438
441
|
horizontal_distance = np.max([0, x21 - x12, x11 - x22])
|
439
442
|
vertical_distance = np.max([0, y21 - y12, y11 - y22])
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|