vision-agent 0.2.87__tar.gz → 0.2.89__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vision_agent-0.2.87 → vision_agent-0.2.89}/PKG-INFO +1 -1
- {vision_agent-0.2.87 → vision_agent-0.2.89}/pyproject.toml +1 -1
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/agent/vision_agent.py +106 -36
- {vision_agent-0.2.87 → vision_agent-0.2.89}/LICENSE +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/README.md +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/tools/tools.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/video.py +0 -0
@@ -176,13 +176,29 @@ def pick_plan(
|
|
176
176
|
model: LMM,
|
177
177
|
code_interpreter: CodeInterpreter,
|
178
178
|
test_multi_plan: bool,
|
179
|
+
log_progress: Callable[[Dict[str, Any]], None],
|
179
180
|
verbosity: int = 0,
|
180
181
|
max_retries: int = 3,
|
181
182
|
) -> Tuple[Any, str, str]:
|
182
183
|
if not test_multi_plan:
|
183
184
|
k = list(plans.keys())[0]
|
185
|
+
log_progress(
|
186
|
+
{
|
187
|
+
"type": "logs",
|
188
|
+
"log_content": "Plans created",
|
189
|
+
"status": "completed",
|
190
|
+
"payload": plans[k],
|
191
|
+
}
|
192
|
+
)
|
184
193
|
return plans[k], tool_infos[k], ""
|
185
194
|
|
195
|
+
log_progress(
|
196
|
+
{
|
197
|
+
"type": "logs",
|
198
|
+
"log_content": "Generating code to pick best plan",
|
199
|
+
"status": "started",
|
200
|
+
}
|
201
|
+
)
|
186
202
|
all_tool_info = tool_infos["all"]
|
187
203
|
chat = copy.deepcopy(chat)
|
188
204
|
if chat[-1]["role"] != "user":
|
@@ -194,6 +210,14 @@ def pick_plan(
|
|
194
210
|
)
|
195
211
|
|
196
212
|
code = extract_code(model(prompt))
|
213
|
+
log_progress(
|
214
|
+
{
|
215
|
+
"type": "logs",
|
216
|
+
"log_content": "Executing code to test plan",
|
217
|
+
"code": code,
|
218
|
+
"status": "running",
|
219
|
+
}
|
220
|
+
)
|
197
221
|
tool_output = code_interpreter.exec_isolation(DefaultImports.prepend_imports(code))
|
198
222
|
tool_output_str = ""
|
199
223
|
if len(tool_output.logs.stdout) > 0:
|
@@ -203,6 +227,18 @@ def pick_plan(
|
|
203
227
|
_print_code("Initial code and tests:", code)
|
204
228
|
_LOGGER.info(f"Initial code execution result:\n{tool_output.text()}")
|
205
229
|
|
230
|
+
log_progress(
|
231
|
+
{
|
232
|
+
"type": "logs",
|
233
|
+
"log_content": (
|
234
|
+
"Code execution succeed"
|
235
|
+
if tool_output.success
|
236
|
+
else "Code execution failed"
|
237
|
+
),
|
238
|
+
"payload": tool_output.to_json(),
|
239
|
+
"status": "completed" if tool_output.success else "failed",
|
240
|
+
}
|
241
|
+
)
|
206
242
|
# retry if the tool output is empty or code fails
|
207
243
|
count = 0
|
208
244
|
while (not tool_output.success or tool_output_str == "") and count < max_retries:
|
@@ -213,10 +249,33 @@ def pick_plan(
|
|
213
249
|
code=code, error=tool_output.text()
|
214
250
|
),
|
215
251
|
)
|
252
|
+
log_progress(
|
253
|
+
{
|
254
|
+
"type": "logs",
|
255
|
+
"log_content": "Retry running code",
|
256
|
+
"code": code,
|
257
|
+
"status": "running",
|
258
|
+
}
|
259
|
+
)
|
216
260
|
code = extract_code(model(prompt))
|
217
261
|
tool_output = code_interpreter.exec_isolation(
|
218
262
|
DefaultImports.prepend_imports(code)
|
219
263
|
)
|
264
|
+
log_progress(
|
265
|
+
{
|
266
|
+
"type": "logs",
|
267
|
+
"log_content": (
|
268
|
+
"Code execution succeed"
|
269
|
+
if tool_output.success
|
270
|
+
else "Code execution failed"
|
271
|
+
),
|
272
|
+
"code": code,
|
273
|
+
"payload": {
|
274
|
+
"result": tool_output.to_json(),
|
275
|
+
},
|
276
|
+
"status": "completed" if tool_output.success else "failed",
|
277
|
+
}
|
278
|
+
)
|
220
279
|
tool_output_str = ""
|
221
280
|
if len(tool_output.logs.stdout) > 0:
|
222
281
|
tool_output_str = tool_output.logs.stdout[0]
|
@@ -246,14 +305,26 @@ def pick_plan(
|
|
246
305
|
|
247
306
|
plan = best_plan["best_plan"]
|
248
307
|
if plan in plans and plan in tool_infos:
|
249
|
-
|
308
|
+
best_plans = plans[plan]
|
309
|
+
best_tool_infos = tool_infos[plan]
|
250
310
|
else:
|
251
311
|
if verbosity >= 1:
|
252
312
|
_LOGGER.warning(
|
253
313
|
f"Best plan {plan} not found in plans or tool_infos. Using the first plan and tool info."
|
254
314
|
)
|
255
315
|
k = list(plans.keys())[0]
|
256
|
-
|
316
|
+
best_plans = plans[k]
|
317
|
+
best_tool_infos = tool_infos[k]
|
318
|
+
|
319
|
+
log_progress(
|
320
|
+
{
|
321
|
+
"type": "logs",
|
322
|
+
"log_content": "Picked best plan",
|
323
|
+
"status": "complete",
|
324
|
+
"payload": best_plans,
|
325
|
+
}
|
326
|
+
)
|
327
|
+
return best_plans, best_tool_infos, tool_output_str
|
257
328
|
|
258
329
|
|
259
330
|
@traceable
|
@@ -323,7 +394,8 @@ def write_and_test_code(
|
|
323
394
|
) -> Dict[str, Any]:
|
324
395
|
log_progress(
|
325
396
|
{
|
326
|
-
"type": "
|
397
|
+
"type": "log",
|
398
|
+
"log_content": "Generating code",
|
327
399
|
"status": "started",
|
328
400
|
}
|
329
401
|
)
|
@@ -341,10 +413,11 @@ def write_and_test_code(
|
|
341
413
|
|
342
414
|
log_progress(
|
343
415
|
{
|
344
|
-
"type": "
|
416
|
+
"type": "log",
|
417
|
+
"log_content": "Running code",
|
345
418
|
"status": "running",
|
419
|
+
"code": DefaultImports.prepend_imports(code),
|
346
420
|
"payload": {
|
347
|
-
"code": DefaultImports.prepend_imports(code),
|
348
421
|
"test": test,
|
349
422
|
},
|
350
423
|
}
|
@@ -354,10 +427,13 @@ def write_and_test_code(
|
|
354
427
|
)
|
355
428
|
log_progress(
|
356
429
|
{
|
357
|
-
"type": "
|
430
|
+
"type": "log",
|
431
|
+
"log_content": (
|
432
|
+
"Code execution succeed" if result.success else "Code execution failed"
|
433
|
+
),
|
358
434
|
"status": "completed" if result.success else "failed",
|
435
|
+
"code": DefaultImports.prepend_imports(code),
|
359
436
|
"payload": {
|
360
|
-
"code": DefaultImports.prepend_imports(code),
|
361
437
|
"test": test,
|
362
438
|
"result": result.to_json(),
|
363
439
|
},
|
@@ -507,15 +583,8 @@ def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
|
|
507
583
|
def retrieve_tools(
|
508
584
|
plans: Dict[str, List[Dict[str, str]]],
|
509
585
|
tool_recommender: Sim,
|
510
|
-
log_progress: Callable[[Dict[str, Any]], None],
|
511
586
|
verbosity: int = 0,
|
512
|
-
) -> Dict[str, str]:
|
513
|
-
log_progress(
|
514
|
-
{
|
515
|
-
"type": "tools",
|
516
|
-
"status": "started",
|
517
|
-
}
|
518
|
-
)
|
587
|
+
) -> Tuple[Dict[str, str], Dict[str, List[Dict[str, str]]]]:
|
519
588
|
tool_info = []
|
520
589
|
tool_desc = []
|
521
590
|
tool_lists: Dict[str, List[Dict[str, str]]] = {}
|
@@ -526,7 +595,12 @@ def retrieve_tools(
|
|
526
595
|
tool_info.extend([e["doc"] for e in tools])
|
527
596
|
tool_desc.extend([e["desc"] for e in tools])
|
528
597
|
tool_lists[k].extend(
|
529
|
-
{
|
598
|
+
{
|
599
|
+
"plan": task["instructions"] if index == 0 else "",
|
600
|
+
"tool": e["desc"].strip().split()[0],
|
601
|
+
"documentation": e["doc"],
|
602
|
+
}
|
603
|
+
for index, e in enumerate(tools)
|
530
604
|
)
|
531
605
|
|
532
606
|
if verbosity == 2:
|
@@ -540,14 +614,7 @@ def retrieve_tools(
|
|
540
614
|
)
|
541
615
|
all_tools = "\n\n".join(set(tool_info))
|
542
616
|
tool_lists_unique["all"] = all_tools
|
543
|
-
|
544
|
-
{
|
545
|
-
"type": "tools",
|
546
|
-
"status": "completed",
|
547
|
-
"payload": tool_lists[list(plans.keys())[0]],
|
548
|
-
}
|
549
|
-
)
|
550
|
-
return tool_lists_unique
|
617
|
+
return tool_lists_unique, tool_lists
|
551
618
|
|
552
619
|
|
553
620
|
class VisionAgent(Agent):
|
@@ -704,7 +771,8 @@ class VisionAgent(Agent):
|
|
704
771
|
|
705
772
|
self.log_progress(
|
706
773
|
{
|
707
|
-
"type": "
|
774
|
+
"type": "logs",
|
775
|
+
"log_content": "Creating plans",
|
708
776
|
"status": "started",
|
709
777
|
}
|
710
778
|
)
|
@@ -715,27 +783,28 @@ class VisionAgent(Agent):
|
|
715
783
|
self.planner,
|
716
784
|
)
|
717
785
|
|
718
|
-
self.log_progress(
|
719
|
-
{
|
720
|
-
"type": "plans",
|
721
|
-
"status": "completed",
|
722
|
-
"payload": plans[list(plans.keys())[0]],
|
723
|
-
}
|
724
|
-
)
|
725
|
-
|
726
786
|
if self.verbosity >= 1 and test_multi_plan:
|
727
787
|
for p in plans:
|
728
788
|
_LOGGER.info(
|
729
789
|
f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
|
730
790
|
)
|
731
791
|
|
732
|
-
tool_infos = retrieve_tools(
|
792
|
+
tool_infos, tool_lists = retrieve_tools(
|
733
793
|
plans,
|
734
794
|
self.tool_recommender,
|
735
|
-
self.log_progress,
|
736
795
|
self.verbosity,
|
737
796
|
)
|
738
797
|
|
798
|
+
if test_multi_plan:
|
799
|
+
self.log_progress(
|
800
|
+
{
|
801
|
+
"type": "logs",
|
802
|
+
"log_content": "Creating plans",
|
803
|
+
"status": "completed",
|
804
|
+
"payload": tool_lists,
|
805
|
+
}
|
806
|
+
)
|
807
|
+
|
739
808
|
best_plan, best_tool_info, tool_output_str = pick_plan(
|
740
809
|
int_chat,
|
741
810
|
plans,
|
@@ -743,6 +812,7 @@ class VisionAgent(Agent):
|
|
743
812
|
self.coder,
|
744
813
|
code_interpreter,
|
745
814
|
test_multi_plan,
|
815
|
+
self.log_progress,
|
746
816
|
verbosity=self.verbosity,
|
747
817
|
)
|
748
818
|
|
@@ -777,8 +847,8 @@ class VisionAgent(Agent):
|
|
777
847
|
{
|
778
848
|
"type": "final_code",
|
779
849
|
"status": "completed" if success else "failed",
|
850
|
+
"code": DefaultImports.prepend_imports(code),
|
780
851
|
"payload": {
|
781
|
-
"code": DefaultImports.prepend_imports(code),
|
782
852
|
"test": test,
|
783
853
|
"result": execution_result.to_json(),
|
784
854
|
},
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|