PyPI - vision-agent - Versions diffs - 0.2.166__py3-none-any.whl → 0.2.168__py3-none-any.whl - Mend

vision-agent 0.2.166py3-none-any.whl → 0.2.168py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -85,6 +85,15 @@ def format_agent_message(agent_message: str) -> str:
     return output
+def _clean_response(response: str) -> str:
+    # Sometimes the LLM will hallucinate responses to an <execute_python> tag as if it
+    # had already executed the code. This function removes the hallucinated response.
+    if "<execute_python>" in response:
+        end_execute_python = response.find("</execute_python>")
+        response = response[: end_execute_python + len("</execute_python>")]
+    return response
 def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
     chat = copy.deepcopy(chat)
@@ -114,6 +123,10 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
         message["media"] = chat[-1]["media"]
     conv_resp = cast(str, orch([message], stream=False))
+    # clean the response first, if we are executing code, do not resond or end
+    # conversation before the code has been executed.
+    conv_resp = _clean_response(conv_resp)
     let_user_respond_str = extract_tag(conv_resp, "let_user_respond")
     let_user_respond = (
         "true" in let_user_respond_str.lower() if let_user_respond_str else False
@@ -197,6 +210,51 @@ def add_step_descriptions(response: Dict[str, Any]) -> Dict[str, Any]:
     return response
+def new_format_to_old_format(new_format: Dict[str, Any]) -> Dict[str, Any]:
+    thoughts = new_format["thinking"] if new_format["thinking"] is not None else ""
+    response = new_format["response"] if new_format["response"] is not None else ""
+    if new_format["execute_python"] is not None:
+        response += (
+            f"\n<execute_python>\n{new_format['execute_python']}\n</execute_python>"
+        )
+    return {
+        "thoughts": thoughts,
+        "response": response,
+        "let_user_respond": new_format["let_user_respond"],
+    }
+def old_format_to_new_format(old_format_str: str) -> str:
+    try:
+        old_format = json.loads(old_format_str)
+    except json.JSONDecodeError:
+        return old_format_str
+    thinking = old_format["thoughts"] if old_format["thoughts"].strip() != "" else None
+    let_user_respond = old_format["let_user_respond"]
+    if "<execute_python>" in old_format["response"]:
+        execute_python = extract_tag(old_format["response"], "execute_python")
+        response = (
+            old_format["response"]
+            .replace(execute_python, "")
+            .replace("<execute_python>", "")
+            .replace("</execute_python>", "")
+            .strip()
+        )
+    else:
+        execute_python = None
+        response = old_format["response"]
+    return json.dumps(
+        {
+            "thinking": thinking,
+            "response": response,
+            "execute_python": execute_python,
+            "let_user_respond": let_user_respond,
+        }
+    )
 class VisionAgent(Agent):
     """Vision Agent is an agent that can chat with the user and call tools or other
     agents to generate code for it. Vision Agent uses python code to execute actions
@@ -361,11 +419,11 @@ class VisionAgent(Agent):
                     (
                         {
                             "role": c["role"],
-                            "content": c["content"],
+                            "content": old_format_to_new_format(c["content"]),  # type: ignore
                             "media": c["media"],
                         }
                         if "media" in c
-                        else {"role": c["role"], "content": c["content"]}
+                        else {"role": c["role"], "content": old_format_to_new_format(c["content"])}  # type: ignore
                     )
                     for c in int_chat
                 ],
@@ -419,13 +477,17 @@ class VisionAgent(Agent):
                 int_chat.append(
                     {
                         "role": "assistant",
-                        "content": json.dumps(add_step_descriptions(response)),
+                        "content": json.dumps(
+                            new_format_to_old_format(add_step_descriptions(response))
+                        ),
                     }
                 )
                 orig_chat.append(
                     {
                         "role": "assistant",
-                        "content": json.dumps(add_step_descriptions(response)),
+                        "content": json.dumps(
+                            new_format_to_old_format(add_step_descriptions(response))
+                        ),
                     }
                 )
@@ -458,7 +520,11 @@ class VisionAgent(Agent):
                     self.streaming_message(
                         {
                             "role": "assistant",
-                            "content": json.dumps(response),
+                            "content": json.dumps(
+                                new_format_to_old_format(
+                                    add_step_descriptions(response)
+                                )
+                            ),
                             "finished": finished and code_action is None,
                         }
                     )

vision_agent/tools/meta_tools.py CHANGED Viewed

@@ -676,12 +676,13 @@ def use_extra_vision_agent_args(
     for node in red:
         # seems to always be atomtrailers not call type
         if node.type == "atomtrailers":
+            if node.name.value == "generate_vision_code":
+                node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
             if (
                 node.name.value == "generate_vision_code"
                 or node.name.value == "edit_vision_code"
             ):
-                node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
                 if custom_tool_names is not None:
                     node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
     cleaned_code = red.dumps().strip()

{vision_agent-0.2.166.dist-info → vision_agent-0.2.168.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.166
+Version: 0.2.168
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.166.dist-info → vision_agent-0.2.168.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
 vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xlM,436
 vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
 vision_agent/agent/agent_utils.py,sha256=eSgg8CwWylX_erLTqTg2pVhEEgVkMLRrQfYRyJzI3so,5443
-vision_agent/agent/vision_agent.py,sha256=cbY_V3f85_g8JmASa3m2LBX4G6xgsOKX1n7YtCf-C98,23676
+vision_agent/agent/vision_agent.py,sha256=mZpfOGIhPwo96Cr8y9sN6iG9m4npmC_bHeSxtoxtkt8,26217
 vision_agent/agent/vision_agent_coder.py,sha256=aVkl0b9LKvy-auuHGYSag-ixYnue0iRQqD1PYLPBR-s,29312
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
 vision_agent/agent/vision_agent_planner.py,sha256=mjmnXG9CvYf_ZA7ZJ3ri4H-2U_Km55gF1sZYRSOlxpY,19027
@@ -17,7 +17,7 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
 vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
 vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
 vision_agent/tools/__init__.py,sha256=u-vS5iORB4ccvxoAjbtpvhTALDhXGilcATIq1_eZhKo,2332
-vision_agent/tools/meta_tools.py,sha256=ZF-7z3KT-Su08MvF5OhSm3Taqeu1Ek-EZjFhpN5w1uU,28257
+vision_agent/tools/meta_tools.py,sha256=7XM3VP4EW4Dtg_Hvoov_laOAEaZLdSGOeA-iPb7CimU,28315
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
 vision_agent/tools/tools.py,sha256=iKsBZxJ5--xWK-mqgZ1jbX_bfGS5HmAp-VRZ69m9yPg,77921
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
 vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
-vision_agent-0.2.166.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.166.dist-info/METADATA,sha256=e15d4yNaAJvLCViaBUFo_RNHII88W-y9WgJauEFfbyU,18034
-vision_agent-0.2.166.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.166.dist-info/RECORD,,
+vision_agent-0.2.168.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.168.dist-info/METADATA,sha256=R1REF7QKrhmMCsbDPr9NyeCV7oKFLrc7W90u850y-Rg,18034
+vision_agent-0.2.168.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.168.dist-info/RECORD,,

{vision_agent-0.2.166.dist-info → vision_agent-0.2.168.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.166.dist-info → vision_agent-0.2.168.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.166__py3-none-any.whl → 0.2.168__py3-none-any.whl

vision-agent 0.2.166py3-none-any.whl → 0.2.168py3-none-any.whl