vision-agent 0.2.166__py3-none-any.whl → 0.2.168__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -85,6 +85,15 @@ def format_agent_message(agent_message: str) -> str:
85
85
  return output
86
86
 
87
87
 
88
+ def _clean_response(response: str) -> str:
89
+ # Sometimes the LLM will hallucinate responses to an <execute_python> tag as if it
90
+ # had already executed the code. This function removes the hallucinated response.
91
+ if "<execute_python>" in response:
92
+ end_execute_python = response.find("</execute_python>")
93
+ response = response[: end_execute_python + len("</execute_python>")]
94
+ return response
95
+
96
+
88
97
  def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
89
98
  chat = copy.deepcopy(chat)
90
99
 
@@ -114,6 +123,10 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
114
123
  message["media"] = chat[-1]["media"]
115
124
  conv_resp = cast(str, orch([message], stream=False))
116
125
 
126
+ # clean the response first, if we are executing code, do not resond or end
127
+ # conversation before the code has been executed.
128
+ conv_resp = _clean_response(conv_resp)
129
+
117
130
  let_user_respond_str = extract_tag(conv_resp, "let_user_respond")
118
131
  let_user_respond = (
119
132
  "true" in let_user_respond_str.lower() if let_user_respond_str else False
@@ -197,6 +210,51 @@ def add_step_descriptions(response: Dict[str, Any]) -> Dict[str, Any]:
197
210
  return response
198
211
 
199
212
 
213
+ def new_format_to_old_format(new_format: Dict[str, Any]) -> Dict[str, Any]:
214
+ thoughts = new_format["thinking"] if new_format["thinking"] is not None else ""
215
+ response = new_format["response"] if new_format["response"] is not None else ""
216
+ if new_format["execute_python"] is not None:
217
+ response += (
218
+ f"\n<execute_python>\n{new_format['execute_python']}\n</execute_python>"
219
+ )
220
+ return {
221
+ "thoughts": thoughts,
222
+ "response": response,
223
+ "let_user_respond": new_format["let_user_respond"],
224
+ }
225
+
226
+
227
+ def old_format_to_new_format(old_format_str: str) -> str:
228
+ try:
229
+ old_format = json.loads(old_format_str)
230
+ except json.JSONDecodeError:
231
+ return old_format_str
232
+
233
+ thinking = old_format["thoughts"] if old_format["thoughts"].strip() != "" else None
234
+ let_user_respond = old_format["let_user_respond"]
235
+ if "<execute_python>" in old_format["response"]:
236
+ execute_python = extract_tag(old_format["response"], "execute_python")
237
+ response = (
238
+ old_format["response"]
239
+ .replace(execute_python, "")
240
+ .replace("<execute_python>", "")
241
+ .replace("</execute_python>", "")
242
+ .strip()
243
+ )
244
+ else:
245
+ execute_python = None
246
+ response = old_format["response"]
247
+
248
+ return json.dumps(
249
+ {
250
+ "thinking": thinking,
251
+ "response": response,
252
+ "execute_python": execute_python,
253
+ "let_user_respond": let_user_respond,
254
+ }
255
+ )
256
+
257
+
200
258
  class VisionAgent(Agent):
201
259
  """Vision Agent is an agent that can chat with the user and call tools or other
202
260
  agents to generate code for it. Vision Agent uses python code to execute actions
@@ -361,11 +419,11 @@ class VisionAgent(Agent):
361
419
  (
362
420
  {
363
421
  "role": c["role"],
364
- "content": c["content"],
422
+ "content": old_format_to_new_format(c["content"]), # type: ignore
365
423
  "media": c["media"],
366
424
  }
367
425
  if "media" in c
368
- else {"role": c["role"], "content": c["content"]}
426
+ else {"role": c["role"], "content": old_format_to_new_format(c["content"])} # type: ignore
369
427
  )
370
428
  for c in int_chat
371
429
  ],
@@ -419,13 +477,17 @@ class VisionAgent(Agent):
419
477
  int_chat.append(
420
478
  {
421
479
  "role": "assistant",
422
- "content": json.dumps(add_step_descriptions(response)),
480
+ "content": json.dumps(
481
+ new_format_to_old_format(add_step_descriptions(response))
482
+ ),
423
483
  }
424
484
  )
425
485
  orig_chat.append(
426
486
  {
427
487
  "role": "assistant",
428
- "content": json.dumps(add_step_descriptions(response)),
488
+ "content": json.dumps(
489
+ new_format_to_old_format(add_step_descriptions(response))
490
+ ),
429
491
  }
430
492
  )
431
493
 
@@ -458,7 +520,11 @@ class VisionAgent(Agent):
458
520
  self.streaming_message(
459
521
  {
460
522
  "role": "assistant",
461
- "content": json.dumps(response),
523
+ "content": json.dumps(
524
+ new_format_to_old_format(
525
+ add_step_descriptions(response)
526
+ )
527
+ ),
462
528
  "finished": finished and code_action is None,
463
529
  }
464
530
  )
@@ -676,12 +676,13 @@ def use_extra_vision_agent_args(
676
676
  for node in red:
677
677
  # seems to always be atomtrailers not call type
678
678
  if node.type == "atomtrailers":
679
+ if node.name.value == "generate_vision_code":
680
+ node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
681
+
679
682
  if (
680
683
  node.name.value == "generate_vision_code"
681
684
  or node.name.value == "edit_vision_code"
682
685
  ):
683
- node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
684
-
685
686
  if custom_tool_names is not None:
686
687
  node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
687
688
  cleaned_code = red.dumps().strip()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.166
3
+ Version: 0.2.168
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xlM,436
3
3
  vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
4
4
  vision_agent/agent/agent_utils.py,sha256=eSgg8CwWylX_erLTqTg2pVhEEgVkMLRrQfYRyJzI3so,5443
5
- vision_agent/agent/vision_agent.py,sha256=cbY_V3f85_g8JmASa3m2LBX4G6xgsOKX1n7YtCf-C98,23676
5
+ vision_agent/agent/vision_agent.py,sha256=mZpfOGIhPwo96Cr8y9sN6iG9m4npmC_bHeSxtoxtkt8,26217
6
6
  vision_agent/agent/vision_agent_coder.py,sha256=aVkl0b9LKvy-auuHGYSag-ixYnue0iRQqD1PYLPBR-s,29312
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
8
8
  vision_agent/agent/vision_agent_planner.py,sha256=mjmnXG9CvYf_ZA7ZJ3ri4H-2U_Km55gF1sZYRSOlxpY,19027
@@ -17,7 +17,7 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
17
17
  vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
18
18
  vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
19
19
  vision_agent/tools/__init__.py,sha256=u-vS5iORB4ccvxoAjbtpvhTALDhXGilcATIq1_eZhKo,2332
20
- vision_agent/tools/meta_tools.py,sha256=ZF-7z3KT-Su08MvF5OhSm3Taqeu1Ek-EZjFhpN5w1uU,28257
20
+ vision_agent/tools/meta_tools.py,sha256=7XM3VP4EW4Dtg_Hvoov_laOAEaZLdSGOeA-iPb7CimU,28315
21
21
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
22
22
  vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
23
23
  vision_agent/tools/tools.py,sha256=iKsBZxJ5--xWK-mqgZ1jbX_bfGS5HmAp-VRZ69m9yPg,77921
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
29
29
  vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
30
30
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
31
31
  vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
32
- vision_agent-0.2.166.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
33
- vision_agent-0.2.166.dist-info/METADATA,sha256=e15d4yNaAJvLCViaBUFo_RNHII88W-y9WgJauEFfbyU,18034
34
- vision_agent-0.2.166.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
35
- vision_agent-0.2.166.dist-info/RECORD,,
32
+ vision_agent-0.2.168.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
33
+ vision_agent-0.2.168.dist-info/METADATA,sha256=R1REF7QKrhmMCsbDPr9NyeCV7oKFLrc7W90u850y-Rg,18034
34
+ vision_agent-0.2.168.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
35
+ vision_agent-0.2.168.dist-info/RECORD,,