vision-agent 0.2.31__tar.gz → 0.2.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {vision_agent-0.2.31 → vision_agent-0.2.32}/PKG-INFO +1 -1
  2. {vision_agent-0.2.31 → vision_agent-0.2.32}/pyproject.toml +1 -1
  3. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/agent_coder.py +9 -3
  4. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/vision_agent.py +13 -4
  5. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/vision_agent_prompts.py +7 -5
  6. {vision_agent-0.2.31 → vision_agent-0.2.32}/LICENSE +0 -0
  7. {vision_agent-0.2.31 → vision_agent-0.2.32}/README.md +0 -0
  8. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/__init__.py +0 -0
  9. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/__init__.py +0 -0
  10. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/agent.py +0 -0
  11. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/agent_coder_prompts.py +0 -0
  12. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/data_interpreter.py +0 -0
  13. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/data_interpreter_prompts.py +0 -0
  14. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/easytool.py +0 -0
  15. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/easytool_prompts.py +0 -0
  16. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/easytool_v2.py +0 -0
  17. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/easytool_v2_prompts.py +0 -0
  18. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/reflexion.py +0 -0
  19. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/agent/reflexion_prompts.py +0 -0
  20. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/fonts/__init__.py +0 -0
  21. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  22. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/llm/__init__.py +0 -0
  23. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/llm/llm.py +0 -0
  24. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/lmm/__init__.py +0 -0
  25. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/lmm/lmm.py +0 -0
  26. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/tools/__init__.py +0 -0
  27. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/tools/easytool_tools.py +0 -0
  28. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/tools/prompts.py +0 -0
  29. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/tools/tool_utils.py +0 -0
  30. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/tools/tools.py +0 -0
  31. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/utils/__init__.py +0 -0
  32. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/utils/execute.py +0 -0
  33. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/utils/image_utils.py +0 -0
  34. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/utils/sim.py +0 -0
  35. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/utils/type_defs.py +0 -0
  36. {vision_agent-0.2.31 → vision_agent-0.2.32}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.31
3
+ Version: 0.2.32
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.31"
7
+ version = "0.2.32"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -67,11 +67,17 @@ def parse_file_name(s: str) -> str:
67
67
  return "".join([p for p in s.split(" ") if p.endswith(".png")])
68
68
 
69
69
 
70
- def write_program(question: str, feedback: str, model: LLM) -> str:
70
+ def write_program(
71
+ question: str, feedback: str, model: LLM, media: Optional[Union[str, Path]] = None
72
+ ) -> str:
71
73
  prompt = PROGRAM.format(
72
74
  docstring=TOOL_DOCSTRING, question=question, feedback=feedback
73
75
  )
74
- completion = model(prompt)
76
+ if isinstance(model, OpenAILMM):
77
+ completion = model(prompt, images=[media] if media else None)
78
+ else:
79
+ completion = model(prompt)
80
+
75
81
  return preprocess_data(completion)
76
82
 
77
83
 
@@ -168,7 +174,7 @@ class AgentCoder(Agent):
168
174
  code = ""
169
175
  feedback = ""
170
176
  for _ in range(self.max_turns):
171
- code = write_program(question, feedback, self.coder_agent)
177
+ code = write_program(question, feedback, self.coder_agent, media=media)
172
178
  if self.verbose:
173
179
  _CONSOLE.print(
174
180
  Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
@@ -21,6 +21,7 @@ from vision_agent.agent.vision_agent_prompts import (
21
21
  USER_REQ,
22
22
  )
23
23
  from vision_agent.llm import LLM, OpenAILLM
24
+ from vision_agent.lmm import LMM, OpenAILMM
24
25
  from vision_agent.tools import TOOL_DESCRIPTIONS, TOOLS_DF, UTILITIES_DOCSTRING
25
26
  from vision_agent.utils import Execute
26
27
  from vision_agent.utils.sim import Sim
@@ -77,7 +78,8 @@ def write_plan(
77
78
  chat: List[Dict[str, str]],
78
79
  tool_desc: str,
79
80
  working_memory: str,
80
- model: LLM,
81
+ model: Union[LLM, LMM],
82
+ media: Optional[List[Union[str, Path]]] = None,
81
83
  ) -> List[Dict[str, str]]:
82
84
  chat = copy.deepcopy(chat)
83
85
  if chat[-1]["role"] != "user":
@@ -87,7 +89,10 @@ def write_plan(
87
89
  context = USER_REQ.format(user_request=user_request)
88
90
  prompt = PLAN.format(context=context, tool_desc=tool_desc, feedback=working_memory)
89
91
  chat[-1]["content"] = prompt
90
- return extract_json(model.chat(chat))["plan"] # type: ignore
92
+ if isinstance(model, OpenAILMM):
93
+ return extract_json(model.chat(chat, images=media))["plan"] # type: ignore
94
+ else:
95
+ return extract_json(model.chat(chat))["plan"] # type: ignore
91
96
 
92
97
 
93
98
  def reflect(
@@ -324,7 +329,7 @@ class VisionAgent(Agent):
324
329
  input = [{"role": "user", "content": input}]
325
330
  results = self.chat_with_workflow(input, media)
326
331
  results.pop("working_memory")
327
- return results["code"] # type: ignore
332
+ return results # type: ignore
328
333
 
329
334
  def chat_with_workflow(
330
335
  self,
@@ -363,7 +368,11 @@ class VisionAgent(Agent):
363
368
 
364
369
  while not success and retries < self.max_retries:
365
370
  plan_i = write_plan(
366
- chat, TOOL_DESCRIPTIONS, format_memory(working_memory), self.planner
371
+ chat,
372
+ TOOL_DESCRIPTIONS,
373
+ format_memory(working_memory),
374
+ self.planner,
375
+ media=[media] if media else None,
367
376
  )
368
377
  plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
369
378
  if self.verbosity >= 1:
@@ -169,11 +169,13 @@ This is the documentation for the functions you have access to. You may call any
169
169
  1. Verify the fundamental functionality under normal conditions.
170
170
  2. Ensure each test case is well-documented with comments explaining the scenario it covers.
171
171
  3. Your test case MUST run only on the given image which is {media}
172
- 4. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
173
- 5. DO NOT mock any functions, you must test their functionality as is.
174
- 6. DO NOT assert the output value, run the code and verify it runs without any errors and assert only the output format or data structure.
175
- 7. DO NOT import the testing function as it will available in the testing environment.
176
- 8. Print the output of the function that is being tested.
172
+ 4. Your test case MUST run only with the given values which is available in the question - {question}
173
+ 5. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
174
+ 6. DO NOT mock any functions, you must test their functionality as is.
175
+ 7. DO NOT assert the output value, run the code and assert only the output format or data structure.
176
+ 8. DO NOT use try except block to handle the error, let the error be raised if the code is incorrect.
177
+ 9. DO NOT import the testing function as it will available in the testing environment.
178
+ 10. Print the output of the function that is being tested.
177
179
  """
178
180
 
179
181
 
File without changes
File without changes