vision-agent 0.2.87__tar.gz → 0.2.89__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {vision_agent-0.2.87 → vision_agent-0.2.89}/PKG-INFO +1 -1
  2. {vision_agent-0.2.87 → vision_agent-0.2.89}/pyproject.toml +1 -1
  3. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/agent/vision_agent.py +106 -36
  4. {vision_agent-0.2.87 → vision_agent-0.2.89}/LICENSE +0 -0
  5. {vision_agent-0.2.87 → vision_agent-0.2.89}/README.md +0 -0
  6. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/__init__.py +0 -0
  7. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/agent/__init__.py +0 -0
  8. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/agent/agent.py +0 -0
  9. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/agent/vision_agent_prompts.py +0 -0
  10. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/fonts/__init__.py +0 -0
  11. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  12. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/lmm/__init__.py +0 -0
  13. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/lmm/lmm.py +0 -0
  14. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/tools/__init__.py +0 -0
  15. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/tools/prompts.py +0 -0
  16. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/tools/tool_utils.py +0 -0
  17. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/tools/tools.py +0 -0
  18. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/__init__.py +0 -0
  19. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/exceptions.py +0 -0
  20. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/execute.py +0 -0
  21. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/image_utils.py +0 -0
  22. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/sim.py +0 -0
  23. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/type_defs.py +0 -0
  24. {vision_agent-0.2.87 → vision_agent-0.2.89}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.87
3
+ Version: 0.2.89
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.87"
7
+ version = "0.2.89"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -176,13 +176,29 @@ def pick_plan(
176
176
  model: LMM,
177
177
  code_interpreter: CodeInterpreter,
178
178
  test_multi_plan: bool,
179
+ log_progress: Callable[[Dict[str, Any]], None],
179
180
  verbosity: int = 0,
180
181
  max_retries: int = 3,
181
182
  ) -> Tuple[Any, str, str]:
182
183
  if not test_multi_plan:
183
184
  k = list(plans.keys())[0]
185
+ log_progress(
186
+ {
187
+ "type": "logs",
188
+ "log_content": "Plans created",
189
+ "status": "completed",
190
+ "payload": plans[k],
191
+ }
192
+ )
184
193
  return plans[k], tool_infos[k], ""
185
194
 
195
+ log_progress(
196
+ {
197
+ "type": "logs",
198
+ "log_content": "Generating code to pick best plan",
199
+ "status": "started",
200
+ }
201
+ )
186
202
  all_tool_info = tool_infos["all"]
187
203
  chat = copy.deepcopy(chat)
188
204
  if chat[-1]["role"] != "user":
@@ -194,6 +210,14 @@ def pick_plan(
194
210
  )
195
211
 
196
212
  code = extract_code(model(prompt))
213
+ log_progress(
214
+ {
215
+ "type": "logs",
216
+ "log_content": "Executing code to test plan",
217
+ "code": code,
218
+ "status": "running",
219
+ }
220
+ )
197
221
  tool_output = code_interpreter.exec_isolation(DefaultImports.prepend_imports(code))
198
222
  tool_output_str = ""
199
223
  if len(tool_output.logs.stdout) > 0:
@@ -203,6 +227,18 @@ def pick_plan(
203
227
  _print_code("Initial code and tests:", code)
204
228
  _LOGGER.info(f"Initial code execution result:\n{tool_output.text()}")
205
229
 
230
+ log_progress(
231
+ {
232
+ "type": "logs",
233
+ "log_content": (
234
+ "Code execution succeed"
235
+ if tool_output.success
236
+ else "Code execution failed"
237
+ ),
238
+ "payload": tool_output.to_json(),
239
+ "status": "completed" if tool_output.success else "failed",
240
+ }
241
+ )
206
242
  # retry if the tool output is empty or code fails
207
243
  count = 0
208
244
  while (not tool_output.success or tool_output_str == "") and count < max_retries:
@@ -213,10 +249,33 @@ def pick_plan(
213
249
  code=code, error=tool_output.text()
214
250
  ),
215
251
  )
252
+ log_progress(
253
+ {
254
+ "type": "logs",
255
+ "log_content": "Retry running code",
256
+ "code": code,
257
+ "status": "running",
258
+ }
259
+ )
216
260
  code = extract_code(model(prompt))
217
261
  tool_output = code_interpreter.exec_isolation(
218
262
  DefaultImports.prepend_imports(code)
219
263
  )
264
+ log_progress(
265
+ {
266
+ "type": "logs",
267
+ "log_content": (
268
+ "Code execution succeed"
269
+ if tool_output.success
270
+ else "Code execution failed"
271
+ ),
272
+ "code": code,
273
+ "payload": {
274
+ "result": tool_output.to_json(),
275
+ },
276
+ "status": "completed" if tool_output.success else "failed",
277
+ }
278
+ )
220
279
  tool_output_str = ""
221
280
  if len(tool_output.logs.stdout) > 0:
222
281
  tool_output_str = tool_output.logs.stdout[0]
@@ -246,14 +305,26 @@ def pick_plan(
246
305
 
247
306
  plan = best_plan["best_plan"]
248
307
  if plan in plans and plan in tool_infos:
249
- return plans[plan], tool_infos[plan], tool_output_str
308
+ best_plans = plans[plan]
309
+ best_tool_infos = tool_infos[plan]
250
310
  else:
251
311
  if verbosity >= 1:
252
312
  _LOGGER.warning(
253
313
  f"Best plan {plan} not found in plans or tool_infos. Using the first plan and tool info."
254
314
  )
255
315
  k = list(plans.keys())[0]
256
- return plans[k], tool_infos[k], tool_output_str
316
+ best_plans = plans[k]
317
+ best_tool_infos = tool_infos[k]
318
+
319
+ log_progress(
320
+ {
321
+ "type": "logs",
322
+ "log_content": "Picked best plan",
323
+ "status": "complete",
324
+ "payload": best_plans,
325
+ }
326
+ )
327
+ return best_plans, best_tool_infos, tool_output_str
257
328
 
258
329
 
259
330
  @traceable
@@ -323,7 +394,8 @@ def write_and_test_code(
323
394
  ) -> Dict[str, Any]:
324
395
  log_progress(
325
396
  {
326
- "type": "code",
397
+ "type": "log",
398
+ "log_content": "Generating code",
327
399
  "status": "started",
328
400
  }
329
401
  )
@@ -341,10 +413,11 @@ def write_and_test_code(
341
413
 
342
414
  log_progress(
343
415
  {
344
- "type": "code",
416
+ "type": "log",
417
+ "log_content": "Running code",
345
418
  "status": "running",
419
+ "code": DefaultImports.prepend_imports(code),
346
420
  "payload": {
347
- "code": DefaultImports.prepend_imports(code),
348
421
  "test": test,
349
422
  },
350
423
  }
@@ -354,10 +427,13 @@ def write_and_test_code(
354
427
  )
355
428
  log_progress(
356
429
  {
357
- "type": "code",
430
+ "type": "log",
431
+ "log_content": (
432
+ "Code execution succeed" if result.success else "Code execution failed"
433
+ ),
358
434
  "status": "completed" if result.success else "failed",
435
+ "code": DefaultImports.prepend_imports(code),
359
436
  "payload": {
360
- "code": DefaultImports.prepend_imports(code),
361
437
  "test": test,
362
438
  "result": result.to_json(),
363
439
  },
@@ -507,15 +583,8 @@ def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
507
583
  def retrieve_tools(
508
584
  plans: Dict[str, List[Dict[str, str]]],
509
585
  tool_recommender: Sim,
510
- log_progress: Callable[[Dict[str, Any]], None],
511
586
  verbosity: int = 0,
512
- ) -> Dict[str, str]:
513
- log_progress(
514
- {
515
- "type": "tools",
516
- "status": "started",
517
- }
518
- )
587
+ ) -> Tuple[Dict[str, str], Dict[str, List[Dict[str, str]]]]:
519
588
  tool_info = []
520
589
  tool_desc = []
521
590
  tool_lists: Dict[str, List[Dict[str, str]]] = {}
@@ -526,7 +595,12 @@ def retrieve_tools(
526
595
  tool_info.extend([e["doc"] for e in tools])
527
596
  tool_desc.extend([e["desc"] for e in tools])
528
597
  tool_lists[k].extend(
529
- {"description": e["desc"], "documentation": e["doc"]} for e in tools
598
+ {
599
+ "plan": task["instructions"] if index == 0 else "",
600
+ "tool": e["desc"].strip().split()[0],
601
+ "documentation": e["doc"],
602
+ }
603
+ for index, e in enumerate(tools)
530
604
  )
531
605
 
532
606
  if verbosity == 2:
@@ -540,14 +614,7 @@ def retrieve_tools(
540
614
  )
541
615
  all_tools = "\n\n".join(set(tool_info))
542
616
  tool_lists_unique["all"] = all_tools
543
- log_progress(
544
- {
545
- "type": "tools",
546
- "status": "completed",
547
- "payload": tool_lists[list(plans.keys())[0]],
548
- }
549
- )
550
- return tool_lists_unique
617
+ return tool_lists_unique, tool_lists
551
618
 
552
619
 
553
620
  class VisionAgent(Agent):
@@ -704,7 +771,8 @@ class VisionAgent(Agent):
704
771
 
705
772
  self.log_progress(
706
773
  {
707
- "type": "plans",
774
+ "type": "logs",
775
+ "log_content": "Creating plans",
708
776
  "status": "started",
709
777
  }
710
778
  )
@@ -715,27 +783,28 @@ class VisionAgent(Agent):
715
783
  self.planner,
716
784
  )
717
785
 
718
- self.log_progress(
719
- {
720
- "type": "plans",
721
- "status": "completed",
722
- "payload": plans[list(plans.keys())[0]],
723
- }
724
- )
725
-
726
786
  if self.verbosity >= 1 and test_multi_plan:
727
787
  for p in plans:
728
788
  _LOGGER.info(
729
789
  f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
730
790
  )
731
791
 
732
- tool_infos = retrieve_tools(
792
+ tool_infos, tool_lists = retrieve_tools(
733
793
  plans,
734
794
  self.tool_recommender,
735
- self.log_progress,
736
795
  self.verbosity,
737
796
  )
738
797
 
798
+ if test_multi_plan:
799
+ self.log_progress(
800
+ {
801
+ "type": "logs",
802
+ "log_content": "Creating plans",
803
+ "status": "completed",
804
+ "payload": tool_lists,
805
+ }
806
+ )
807
+
739
808
  best_plan, best_tool_info, tool_output_str = pick_plan(
740
809
  int_chat,
741
810
  plans,
@@ -743,6 +812,7 @@ class VisionAgent(Agent):
743
812
  self.coder,
744
813
  code_interpreter,
745
814
  test_multi_plan,
815
+ self.log_progress,
746
816
  verbosity=self.verbosity,
747
817
  )
748
818
 
@@ -777,8 +847,8 @@ class VisionAgent(Agent):
777
847
  {
778
848
  "type": "final_code",
779
849
  "status": "completed" if success else "failed",
850
+ "code": DefaultImports.prepend_imports(code),
780
851
  "payload": {
781
- "code": DefaultImports.prepend_imports(code),
782
852
  "test": test,
783
853
  "result": execution_result.to_json(),
784
854
  },
File without changes
File without changes