PyPI - vision-agent - Versions diffs - 0.2.233__tar.gz → 0.2.234__tar.gz - Mend

vision-agent 0.2.233tar.gz → 0.2.234tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{vision_agent-0.2.233 → vision_agent-0.2.234}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.233
+Version: 0.2.234
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.233 → vision_agent-0.2.234}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.233"
+version = "0.2.234"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"

{vision_agent-0.2.233 → vision_agent-0.2.234}/vision_agent/agent/vision_agent_planner_prompts_v2.py RENAMED Viewed

@@ -20,7 +20,7 @@ PLAN = """
 3. Only output <finalize_plan> when you are done planning and want to end the planning process. DO NOT output <finalize_plan> with <execute_python> tags, only after OBSERVATION's.
 4. Only load/save files from {media_list} unless you specifically saved the file previously.
 5. Ensure you always call `suggestion` initially and `get_tool_for_task` to get the right tool for the subtask.
-6. Calling `plt.imshow` or `save_image` will display the image to you, use this to visually check your results.
+6. Calling `plt.imshow` or `save_image` will display the image to you so you can check your results. If you see an image after <execute_python> it's generated from your code.
 7. DO NOT hard code the answer into your code, it should be dynamic and work for any similar request.
 8. DO NOT over index on claude35_vqa, if tool output is close to claude35_vqa's output you do not need to improve the tool.
 9. You can only respond in the following format with a single <thinking>, <execute_python> or <finalize_plan> tag:

{vision_agent-0.2.233 → vision_agent-0.2.234}/vision_agent/agent/vision_agent_planner_v2.py RENAMED Viewed

@@ -97,8 +97,7 @@ def run_planning(
     media_list: List[Union[str, Path]],
     model: LMM,
 ) -> str:
-    # only keep last 10 messages for planning
-    planning = get_planning(chat[-10:])
+    planning = get_planning(chat)
     prompt = PLAN.format(
         tool_desc=PLANNING_TOOLS_DOCSTRING,
         examples=f"{EXAMPLE_PLAN1}\n{EXAMPLE_PLAN2}",
@@ -372,7 +371,7 @@ def replace_interaction_with_obs(chat: List[AgentMessage]) -> List[AgentMessage]
                 function_name = response["function_name"]
                 tool_doc = get_tool_documentation(function_name)
                 if "box_threshold" in response:
-                    tool_doc = f"Use the following function with box_threshold={response['box_threshold']}\n\n{tool_doc}"
+                    tool_doc = f"Use the following function with box_threshold={response['box_threshold']}. This tool and its parameters were chosen by the user so do not change them in your planning.\n\n{tool_doc}."
                 new_chat.append(AgentMessage(role="observation", content=tool_doc))
             except (json.JSONDecodeError, KeyError):
                 raise ValueError(f"Invalid JSON in interaction response: {chat_i}")

{vision_agent-0.2.233 → vision_agent-0.2.234}/vision_agent/agent/vision_agent_v2.py RENAMED Viewed

@@ -91,8 +91,6 @@ def maybe_run_action(
     code_interpreter: Optional[CodeInterpreter] = None,
 ) -> Optional[List[AgentMessage]]:
     extracted_chat, final_code = extract_conversation(chat)
-    # only keep last 5 messages to keep context recent and not overwhelm LLM
-    extracted_chat = extracted_chat[-5:]
     if action == "generate_or_edit_vision_code":
         # there's an issue here because coder.generate_code will send it's code_context
         # to the outside user via it's update_callback, but we don't necessarily have
@@ -125,6 +123,7 @@ def maybe_run_action(
             ],
             code="",
         )
         context = coder.generate_code_from_plan(
             extracted_chat, plan_context, code_interpreter=code_interpreter
         )