vision-agent 0.2.31__py3-none-any.whl → 0.2.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -67,11 +67,17 @@ def parse_file_name(s: str) -> str:
67
67
  return "".join([p for p in s.split(" ") if p.endswith(".png")])
68
68
 
69
69
 
70
- def write_program(question: str, feedback: str, model: LLM) -> str:
70
+ def write_program(
71
+ question: str, feedback: str, model: LLM, media: Optional[Union[str, Path]] = None
72
+ ) -> str:
71
73
  prompt = PROGRAM.format(
72
74
  docstring=TOOL_DOCSTRING, question=question, feedback=feedback
73
75
  )
74
- completion = model(prompt)
76
+ if isinstance(model, OpenAILMM):
77
+ completion = model(prompt, images=[media] if media else None)
78
+ else:
79
+ completion = model(prompt)
80
+
75
81
  return preprocess_data(completion)
76
82
 
77
83
 
@@ -168,7 +174,7 @@ class AgentCoder(Agent):
168
174
  code = ""
169
175
  feedback = ""
170
176
  for _ in range(self.max_turns):
171
- code = write_program(question, feedback, self.coder_agent)
177
+ code = write_program(question, feedback, self.coder_agent, media=media)
172
178
  if self.verbose:
173
179
  _CONSOLE.print(
174
180
  Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
@@ -21,6 +21,7 @@ from vision_agent.agent.vision_agent_prompts import (
21
21
  USER_REQ,
22
22
  )
23
23
  from vision_agent.llm import LLM, OpenAILLM
24
+ from vision_agent.lmm import LMM, OpenAILMM
24
25
  from vision_agent.tools import TOOL_DESCRIPTIONS, TOOLS_DF, UTILITIES_DOCSTRING
25
26
  from vision_agent.utils import Execute
26
27
  from vision_agent.utils.sim import Sim
@@ -77,7 +78,8 @@ def write_plan(
77
78
  chat: List[Dict[str, str]],
78
79
  tool_desc: str,
79
80
  working_memory: str,
80
- model: LLM,
81
+ model: Union[LLM, LMM],
82
+ media: Optional[List[Union[str, Path]]] = None,
81
83
  ) -> List[Dict[str, str]]:
82
84
  chat = copy.deepcopy(chat)
83
85
  if chat[-1]["role"] != "user":
@@ -87,7 +89,10 @@ def write_plan(
87
89
  context = USER_REQ.format(user_request=user_request)
88
90
  prompt = PLAN.format(context=context, tool_desc=tool_desc, feedback=working_memory)
89
91
  chat[-1]["content"] = prompt
90
- return extract_json(model.chat(chat))["plan"] # type: ignore
92
+ if isinstance(model, OpenAILMM):
93
+ return extract_json(model.chat(chat, images=media))["plan"] # type: ignore
94
+ else:
95
+ return extract_json(model.chat(chat))["plan"] # type: ignore
91
96
 
92
97
 
93
98
  def reflect(
@@ -324,7 +329,7 @@ class VisionAgent(Agent):
324
329
  input = [{"role": "user", "content": input}]
325
330
  results = self.chat_with_workflow(input, media)
326
331
  results.pop("working_memory")
327
- return results["code"] # type: ignore
332
+ return results # type: ignore
328
333
 
329
334
  def chat_with_workflow(
330
335
  self,
@@ -363,7 +368,11 @@ class VisionAgent(Agent):
363
368
 
364
369
  while not success and retries < self.max_retries:
365
370
  plan_i = write_plan(
366
- chat, TOOL_DESCRIPTIONS, format_memory(working_memory), self.planner
371
+ chat,
372
+ TOOL_DESCRIPTIONS,
373
+ format_memory(working_memory),
374
+ self.planner,
375
+ media=[media] if media else None,
367
376
  )
368
377
  plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
369
378
  if self.verbosity >= 1:
@@ -169,11 +169,13 @@ This is the documentation for the functions you have access to. You may call any
169
169
  1. Verify the fundamental functionality under normal conditions.
170
170
  2. Ensure each test case is well-documented with comments explaining the scenario it covers.
171
171
  3. Your test case MUST run only on the given image which is {media}
172
- 4. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
173
- 5. DO NOT mock any functions, you must test their functionality as is.
174
- 6. DO NOT assert the output value, run the code and verify it runs without any errors and assert only the output format or data structure.
175
- 7. DO NOT import the testing function as it will available in the testing environment.
176
- 8. Print the output of the function that is being tested.
172
+ 4. Your test case MUST run only with the given values which is available in the question - {question}
173
+ 5. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
174
+ 6. DO NOT mock any functions, you must test their functionality as is.
175
+ 7. DO NOT assert the output value, run the code and assert only the output format or data structure.
176
+ 8. DO NOT use try except block to handle the error, let the error be raised if the code is incorrect.
177
+ 9. DO NOT import the testing function as it will available in the testing environment.
178
+ 10. Print the output of the function that is being tested.
177
179
  """
178
180
 
179
181
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.31
3
+ Version: 0.2.32
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -1,7 +1,7 @@
1
1
  vision_agent/__init__.py,sha256=GVLHCeK_R-zgldpbcPmOzJat-BkadvkuRCMxDvTIcXs,108
2
2
  vision_agent/agent/__init__.py,sha256=iiC5eknTQnv87iSwAoHqBthJ3g2Zm6D0dWbYPDfuQ7A,245
3
3
  vision_agent/agent/agent.py,sha256=TXh93MOwmArNRieOkYrhliq1rf7wIkhxvCdTiGhTqFs,538
4
- vision_agent/agent/agent_coder.py,sha256=MQw8SPeNy1D9tUvB-u60H9ab1eLXnrpV0Ggn7Eq_mIo,6988
4
+ vision_agent/agent/agent_coder.py,sha256=o66bpopKwo1kRDxiW6SujueMvLZ3NC5dawQKFrI3e_w,7171
5
5
  vision_agent/agent/agent_coder_prompts.py,sha256=CJe3v7xvHQ32u3RQAXQga_Tk_4UgU64RBAMHZ3S70KY,5538
6
6
  vision_agent/agent/data_interpreter.py,sha256=YCREEHWiyTYpKT8hibotylEkx1kF5AH0k9wnmymwPBY,15143
7
7
  vision_agent/agent/data_interpreter_prompts.py,sha256=RDJggOfXwGaEoIcTYGX41ZEayCgYei1AootDOc_SN2g,6134
@@ -11,8 +11,8 @@ vision_agent/agent/easytool_v2.py,sha256=CjY-sSj3abxnSq3ZHZMt-7YvRWDXEZsC6RN8FFI
11
11
  vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
12
12
  vision_agent/agent/reflexion.py,sha256=AlM5AvBJvCslXlYQdZiadq4oVHsNBm3IF_03DglTxRo,10506
13
13
  vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
14
- vision_agent/agent/vision_agent.py,sha256=5Bfxif2sqRKS1ZUlQ4yT468EfevI9CQ6V7_Y6xRbbq0,14992
15
- vision_agent/agent/vision_agent_prompts.py,sha256=s6T5UnyrKIAcaKqcMudWQOBCHt6Obn9QpX3QtqiDv2I,8034
14
+ vision_agent/agent/vision_agent.py,sha256=gvGNPSyZ1WpIzBcxz03xAe96cSdVBI0zCYIfrPs9jw0,15326
15
+ vision_agent/agent/vision_agent_prompts.py,sha256=0YbiS59IEWbiE43gCvOqfWrpudIAhTn8FHzXW0Y-Gaw,8201
16
16
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
18
18
  vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
@@ -30,7 +30,7 @@ vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOk
30
30
  vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
31
31
  vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
32
32
  vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
33
- vision_agent-0.2.31.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- vision_agent-0.2.31.dist-info/METADATA,sha256=tsCUD6WuSXUt5XLCmOD89DMzDTAxyrCPiA0cAES85AI,5942
35
- vision_agent-0.2.31.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
- vision_agent-0.2.31.dist-info/RECORD,,
33
+ vision_agent-0.2.32.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ vision_agent-0.2.32.dist-info/METADATA,sha256=okG2Wf1LavPw4agq2iH7tY_rLlrabKgOOCL43pzfEMY,5942
35
+ vision_agent-0.2.32.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
+ vision_agent-0.2.32.dist-info/RECORD,,