vision-agent 0.2.27__tar.gz → 0.2.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {vision_agent-0.2.27 → vision_agent-0.2.28}/PKG-INFO +1 -1
  2. {vision_agent-0.2.27 → vision_agent-0.2.28}/pyproject.toml +1 -1
  3. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/agent.py +2 -2
  4. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/agent_coder.py +3 -3
  5. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/vision_agent.py +23 -15
  6. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/vision_agent_v2.py +57 -15
  7. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/vision_agent_v3.py +68 -3
  8. {vision_agent-0.2.27 → vision_agent-0.2.28}/LICENSE +0 -0
  9. {vision_agent-0.2.27 → vision_agent-0.2.28}/README.md +0 -0
  10. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/__init__.py +0 -0
  11. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/__init__.py +0 -0
  12. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/agent_coder_prompts.py +0 -0
  13. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/easytool.py +0 -0
  14. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/easytool_prompts.py +0 -0
  15. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/reflexion.py +0 -0
  16. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/reflexion_prompts.py +0 -0
  17. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/vision_agent_prompts.py +0 -0
  18. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/vision_agent_v2_prompts.py +0 -0
  19. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/agent/vision_agent_v3_prompts.py +0 -0
  20. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/fonts/__init__.py +0 -0
  21. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  22. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/llm/__init__.py +0 -0
  23. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/llm/llm.py +0 -0
  24. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/lmm/__init__.py +0 -0
  25. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/lmm/lmm.py +0 -0
  26. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/tools/__init__.py +0 -0
  27. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/tools/prompts.py +0 -0
  28. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/tools/tool_utils.py +0 -0
  29. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/tools/tools.py +0 -0
  30. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/tools/tools_v2.py +0 -0
  31. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/utils/__init__.py +0 -0
  32. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/utils/execute.py +0 -0
  33. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/utils/image_utils.py +0 -0
  34. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/utils/sim.py +0 -0
  35. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/utils/type_defs.py +0 -0
  36. {vision_agent-0.2.27 → vision_agent-0.2.28}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.27
3
+ Version: 0.2.28
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.27"
7
+ version = "0.2.28"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -1,6 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from pathlib import Path
3
- from typing import Dict, List, Optional, Union
3
+ from typing import Dict, List, Optional, Union, Any
4
4
 
5
5
 
6
6
  class Agent(ABC):
@@ -13,7 +13,7 @@ class Agent(ABC):
13
13
  pass
14
14
 
15
15
  @abstractmethod
16
- def log_progress(self, description: str) -> None:
16
+ def log_progress(self, data: Dict[str, Any]) -> None:
17
17
  """Log the progress of the agent.
18
18
  This is a hook that is intended for reporting the progress of the agent.
19
19
  """
@@ -3,7 +3,7 @@ import logging
3
3
  import os
4
4
  import sys
5
5
  from pathlib import Path
6
- from typing import Dict, List, Optional, Union
6
+ from typing import Dict, List, Optional, Union, Any
7
7
 
8
8
  from rich.console import Console
9
9
  from rich.syntax import Syntax
@@ -206,5 +206,5 @@ class AgentCoder(Agent):
206
206
 
207
207
  return f"{IMPORT_HELPER}\n{code}"
208
208
 
209
- def log_progress(self, description: str) -> None:
210
- _LOGGER.info(description)
209
+ def log_progress(self, data: Dict[str, Any]) -> None:
210
+ _LOGGER.info(data)
@@ -451,7 +451,7 @@ class VisionAgent(Agent):
451
451
  reflect_model: Optional[Union[LLM, LMM]] = None,
452
452
  max_retries: int = 2,
453
453
  verbose: bool = False,
454
- report_progress_callback: Optional[Callable[[str], None]] = None,
454
+ report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
455
455
  ):
456
456
  """VisionAgent constructor.
457
457
 
@@ -518,23 +518,23 @@ class VisionAgent(Agent):
518
518
  self_reflection=self_reflection,
519
519
  )
520
520
 
521
- def log_progress(self, description: str) -> None:
522
- _LOGGER.info(description)
521
+ def log_progress(self, data: Dict[str, Any]) -> None:
522
+ _LOGGER.info(data)
523
523
  if self.report_progress_callback:
524
- self.report_progress_callback(description)
524
+ self.report_progress_callback(data)
525
525
 
526
526
  def _report_visualization_via_callback(
527
527
  self, images: Sequence[Union[str, Path]]
528
528
  ) -> None:
529
529
  """This is intended for streaming the visualization images via the callback to the client side."""
530
530
  if self.report_progress_callback:
531
- self.report_progress_callback("<VIZ>")
531
+ self.report_progress_callback({"log": "<VIZ>"})
532
532
  if images:
533
533
  for img in images:
534
534
  self.report_progress_callback(
535
- f"<IMG>base:64{convert_to_b64(img)}</IMG>"
535
+ {"log": f"<IMG>base:64{convert_to_b64(img)}</IMG>"}
536
536
  )
537
- self.report_progress_callback("</VIZ>")
537
+ self.report_progress_callback({"log": "</VIZ>"})
538
538
 
539
539
  def chat_with_workflow(
540
540
  self,
@@ -618,8 +618,8 @@ class VisionAgent(Agent):
618
618
  tool_results["answer"] = answer
619
619
  all_tool_results.append(tool_results)
620
620
 
621
- self.log_progress(f"\tCall Result: {call_results}")
622
- self.log_progress(f"\tAnswer: {answer}")
621
+ self.log_progress({"log": f"\tCall Result: {call_results}"})
622
+ self.log_progress({"log": f"\tAnswer: {answer}"})
623
623
  answers.append({"task": task_str, "answer": answer})
624
624
  task_depend[task["id"]]["answer"] = answer # type: ignore
625
625
  task_depend[task["id"]]["call_result"] = call_results # type: ignore
@@ -644,18 +644,22 @@ class VisionAgent(Agent):
644
644
  final_answer,
645
645
  reflection_images,
646
646
  )
647
- self.log_progress(f"Reflection: {reflection}")
647
+ self.log_progress({"log": f"Reflection: {reflection}"})
648
648
  parsed_reflection = parse_reflect(reflection)
649
649
  if parsed_reflection["Finish"]:
650
650
  break
651
651
  else:
652
652
  reflections += "\n" + parsed_reflection["Reflection"]
653
653
  else:
654
- self.log_progress("Self Reflection skipped based on user request.")
654
+ self.log_progress(
655
+ {"log": "Self Reflection skipped based on user request."}
656
+ )
655
657
  break
656
658
  # '<ANSWER>' is a symbol to indicate the end of the chat, which is useful for streaming logs.
657
659
  self.log_progress(
658
- f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</ANSWER>"
660
+ {
661
+ "log": f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</ANSWER>"
662
+ }
659
663
  )
660
664
 
661
665
  if visualize_output:
@@ -718,8 +722,10 @@ class VisionAgent(Agent):
718
722
  }
719
723
 
720
724
  self.log_progress(
721
- f"""Going to run the following tool(s) in sequence:
725
+ {
726
+ "log": f"""Going to run the following tool(s) in sequence:
722
727
  {tabulate(tabular_data=[tool_results], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
728
+ }
723
729
  )
724
730
 
725
731
  def parse_tool_results(result: Dict[str, Union[Dict, List]]) -> Any:
@@ -764,7 +770,9 @@ class VisionAgent(Agent):
764
770
  else:
765
771
  task_list = []
766
772
  self.log_progress(
767
- f"""Planned tasks:
768
- {tabulate(task_list, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
773
+ {
774
+ "log": "Planned tasks:",
775
+ "plan": task_list,
776
+ }
769
777
  )
770
778
  return task_list
@@ -165,7 +165,7 @@ def write_and_exec_code(
165
165
  tool_info: str,
166
166
  exec: Execute,
167
167
  retrieved_ltm: str,
168
- log_progress: Callable[..., str],
168
+ log_progress: Callable[[Dict[str, Any]], None],
169
169
  max_retry: int = 3,
170
170
  verbosity: int = 0,
171
171
  ) -> Tuple[bool, str, str, Dict[str, List[str]]]:
@@ -179,7 +179,23 @@ def write_and_exec_code(
179
179
  success, result = exec.run_isolation(code)
180
180
  if verbosity == 2:
181
181
  _CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
182
- log_progress(f"\tCode success: {success}\n\tResult: {str(result)}", code)
182
+ log_progress(
183
+ {
184
+ "log": f"Code success: {success}",
185
+ }
186
+ )
187
+ log_progress(
188
+ {
189
+ "log": "Code:",
190
+ "code": code,
191
+ }
192
+ )
193
+ log_progress(
194
+ {
195
+ "log": "Result:",
196
+ "result": str(result),
197
+ }
198
+ )
183
199
  _LOGGER.info(f"\tCode success: {success}, result: {str(result)}")
184
200
  working_memory: Dict[str, List[str]] = {}
185
201
  while not success and counter < max_retry:
@@ -206,7 +222,18 @@ def write_and_exec_code(
206
222
  _CONSOLE.print(
207
223
  Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
208
224
  )
209
- log_progress(f"\tDebugging reflection: {reflection}\n\tResult: {result}")
225
+ log_progress(
226
+ {
227
+ "log": "Debugging reflection:",
228
+ "reflection": reflection,
229
+ }
230
+ )
231
+ log_progress(
232
+ {
233
+ "log": "Result:",
234
+ "result": result,
235
+ }
236
+ )
210
237
  _LOGGER.info(f"\tDebugging reflection: {reflection}, result: {result}")
211
238
 
212
239
  if success:
@@ -227,7 +254,7 @@ def run_plan(
227
254
  exec: Execute,
228
255
  code: str,
229
256
  tool_recommender: Sim,
230
- log_progress: Callable[..., str],
257
+ log_progress: Callable[[Dict[str, Any]], None],
231
258
  long_term_memory: Optional[Sim] = None,
232
259
  verbosity: int = 0,
233
260
  ) -> Tuple[str, str, List[Dict[str, Any]], Dict[str, List[str]]]:
@@ -239,8 +266,7 @@ def run_plan(
239
266
 
240
267
  for task in active_plan:
241
268
  log_progress(
242
- f"""Going to run the following task(s) in sequence:
243
- {tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
269
+ {"log": "Going to run the following task(s) in sequence:", "task": task}
244
270
  )
245
271
  _LOGGER.info(
246
272
  f"""
@@ -250,7 +276,7 @@ def run_plan(
250
276
  tool_info = "\n".join([e["doc"] for e in tools])
251
277
 
252
278
  if verbosity == 2:
253
- log_progress(f"Tools retrieved: {[e['desc'] for e in tools]}")
279
+ log_progress({"log": f"Tools retrieved: {[e['desc'] for e in tools]}"})
254
280
  _LOGGER.info(f"Tools retrieved: {[e['desc'] for e in tools]}")
255
281
 
256
282
  if long_term_memory is not None:
@@ -282,7 +308,17 @@ def run_plan(
282
308
  Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
283
309
  )
284
310
 
285
- log_progress(f"\tCode success: {success}\n\tResult: {str(result)}")
311
+ log_progress(
312
+ {
313
+ "log": f"Code success: {success}",
314
+ }
315
+ )
316
+ log_progress(
317
+ {
318
+ "log": "Result:",
319
+ "result": str(result),
320
+ }
321
+ )
286
322
  _LOGGER.info(f"\tCode success: {success} result: {str(result)}")
287
323
 
288
324
  task["success"] = success
@@ -320,7 +356,7 @@ class VisionAgentV2(Agent):
320
356
  tool_recommender: Optional[Sim] = None,
321
357
  long_term_memory: Optional[Sim] = None,
322
358
  verbosity: int = 0,
323
- report_progress_callback: Optional[Callable[..., Any]] = None,
359
+ report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
324
360
  ) -> None:
325
361
  self.planner = OpenAILLM(temperature=0.0, json_mode=True)
326
362
  self.coder = OpenAILLM(temperature=0.0)
@@ -376,8 +412,10 @@ class VisionAgentV2(Agent):
376
412
 
377
413
  user_req, plan = write_plan(chat, plan, TOOL_DESCRIPTIONS, self.planner)
378
414
  self.log_progress(
379
- f"""Plan:
380
- {tabulate(tabular_data=plan, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
415
+ {
416
+ "log": "Plans:",
417
+ "plan": plan,
418
+ }
381
419
  )
382
420
  _LOGGER.info(
383
421
  f"""Plan:
@@ -412,8 +450,12 @@ class VisionAgentV2(Agent):
412
450
 
413
451
  retries += 1
414
452
 
415
- self.log_progress("The Vision Agent V2 has concluded this chat.")
416
- self.log_progress(f"<ANSWER>Plan success: {success}</ANSWER>")
453
+ self.log_progress(
454
+ {
455
+ "log": f"The Vision Agent V2 has concluded this chat.\nSuccess: {success}",
456
+ "finished": True,
457
+ }
458
+ )
417
459
 
418
460
  return {
419
461
  "code": working_code,
@@ -423,7 +465,7 @@ class VisionAgentV2(Agent):
423
465
  "plan": plan,
424
466
  }
425
467
 
426
- def log_progress(self, description: str, code: Optional[str] = "") -> None:
468
+ def log_progress(self, data: Dict[str, Any]) -> None:
427
469
  if self.report_progress_callback is not None:
428
- self.report_progress_callback(description, code)
470
+ self.report_progress_callback(data)
429
471
  pass
@@ -3,7 +3,7 @@ import json
3
3
  import logging
4
4
  import sys
5
5
  from pathlib import Path
6
- from typing import Any, Dict, List, Optional, Union, cast
6
+ from typing import Any, Dict, List, Optional, Union, cast, Callable
7
7
 
8
8
  from rich.console import Console
9
9
  from rich.syntax import Syntax
@@ -114,6 +114,7 @@ def write_and_test_code(
114
114
  coder: LLM,
115
115
  tester: LLM,
116
116
  debugger: LLM,
117
+ log_progress: Callable[[Dict[str, Any]], None],
117
118
  verbosity: int = 0,
118
119
  max_retries: int = 3,
119
120
  ) -> Dict[str, Any]:
@@ -131,9 +132,27 @@ def write_and_test_code(
131
132
  success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
132
133
  if verbosity == 2:
133
134
  _LOGGER.info("First code and tests:")
135
+ log_progress(
136
+ {
137
+ "log": "Code:",
138
+ "code": code,
139
+ }
140
+ )
141
+ log_progress(
142
+ {
143
+ "log": "Test:",
144
+ "code": test,
145
+ }
146
+ )
134
147
  _CONSOLE.print(
135
148
  Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
136
149
  )
150
+ log_progress(
151
+ {
152
+ "log": "Result:",
153
+ "result": result,
154
+ }
155
+ )
137
156
  _LOGGER.info(f"First result: {result}")
138
157
 
139
158
  count = 0
@@ -156,6 +175,12 @@ def write_and_test_code(
156
175
 
157
176
  success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
158
177
  if verbosity == 2:
178
+ log_progress(
179
+ {
180
+ "log": f"Debug attempt {count + 1}, reflection:",
181
+ "result": fixed_code_and_test["reflections"],
182
+ }
183
+ )
159
184
  _LOGGER.info(
160
185
  f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
161
186
  )
@@ -164,6 +189,12 @@ def write_and_test_code(
164
189
  f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True
165
190
  )
166
191
  )
192
+ log_progress(
193
+ {
194
+ "log": "Debug result:",
195
+ "result": result,
196
+ }
197
+ )
167
198
  _LOGGER.info(f"Debug result: {result}")
168
199
  count += 1
169
200
 
@@ -182,7 +213,10 @@ def write_and_test_code(
182
213
 
183
214
 
184
215
  def retrieve_tools(
185
- plan: List[Dict[str, str]], tool_recommender: Sim, verbosity: int = 0
216
+ plan: List[Dict[str, str]],
217
+ tool_recommender: Sim,
218
+ log_progress: Callable[[Dict[str, Any]], None],
219
+ verbosity: int = 0,
186
220
  ) -> str:
187
221
  tool_info = []
188
222
  tool_desc = []
@@ -191,6 +225,12 @@ def retrieve_tools(
191
225
  tool_info.extend([e["doc"] for e in tools])
192
226
  tool_desc.extend([e["desc"] for e in tools])
193
227
  if verbosity == 2:
228
+ log_progress(
229
+ {
230
+ "log": "Retrieved tools:",
231
+ "tools": tool_desc,
232
+ }
233
+ )
194
234
  _LOGGER.info(f"Tools: {tool_desc}")
195
235
  tool_info_set = set(tool_info)
196
236
  return "\n\n".join(tool_info_set)
@@ -206,6 +246,7 @@ class VisionAgentV3(Agent):
206
246
  debugger: Optional[LLM] = None,
207
247
  tool_recommender: Optional[Sim] = None,
208
248
  verbosity: int = 0,
249
+ report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
209
250
  ) -> None:
210
251
  self.planner = (
211
252
  OpenAILLM(temperature=0.0, json_mode=True) if planner is None else planner
@@ -223,6 +264,7 @@ class VisionAgentV3(Agent):
223
264
  )
224
265
  self.verbosity = verbosity
225
266
  self.max_retries = 3
267
+ self.report_progress_callback = report_progress_callback
226
268
 
227
269
  def __call__(
228
270
  self,
@@ -261,6 +303,12 @@ class VisionAgentV3(Agent):
261
303
  )
262
304
  plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
263
305
  if self.verbosity == 1 or self.verbosity == 2:
306
+ self.log_progress(
307
+ {
308
+ "log": "Going to run the following plan(s) in sequence:\n",
309
+ "plan": plan_i,
310
+ }
311
+ )
264
312
  _LOGGER.info(
265
313
  f"""
266
314
  {tabulate(tabular_data=plan_i, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
@@ -269,6 +317,7 @@ class VisionAgentV3(Agent):
269
317
  tool_info = retrieve_tools(
270
318
  plan_i,
271
319
  self.tool_recommender,
320
+ self.log_progress,
272
321
  self.verbosity,
273
322
  )
274
323
  results = write_and_test_code(
@@ -279,6 +328,7 @@ class VisionAgentV3(Agent):
279
328
  self.coder,
280
329
  self.tester,
281
330
  self.debugger,
331
+ self.log_progress,
282
332
  verbosity=self.verbosity,
283
333
  )
284
334
  success = cast(bool, results["success"])
@@ -289,11 +339,24 @@ class VisionAgentV3(Agent):
289
339
 
290
340
  reflection = reflect(chat, plan_i_str, code, self.planner)
291
341
  if self.verbosity > 0:
342
+ self.log_progress(
343
+ {
344
+ "log": "Reflection:",
345
+ "reflection": reflection,
346
+ }
347
+ )
292
348
  _LOGGER.info(f"Reflection: {reflection}")
293
349
  feedback = cast(str, reflection["feedback"])
294
350
  success = cast(bool, reflection["success"])
295
351
  working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
296
352
 
353
+ self.log_progress(
354
+ {
355
+ "log": f"The Vision Agent V3 has concluded this chat.\nSuccess: {success}",
356
+ "finished": True,
357
+ }
358
+ )
359
+
297
360
  return {
298
361
  "code": code,
299
362
  "test": test,
@@ -301,5 +364,7 @@ class VisionAgentV3(Agent):
301
364
  "working_memory": working_memory,
302
365
  }
303
366
 
304
- def log_progress(self, description: str) -> None:
367
+ def log_progress(self, data: Dict[str, Any]) -> None:
368
+ if self.report_progress_callback is not None:
369
+ self.report_progress_callback(data)
305
370
  pass
File without changes
File without changes