PyPI - vision-agent - Versions diffs - 0.2.203__tar.gz → 0.2.205__tar.gz - Mend

vision-agent 0.2.203tar.gz → 0.2.205tar.gz

Files changed (46) hide show

{vision_agent-0.2.203 → vision_agent-0.2.205}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.203
+Version: 0.2.205
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.203 → vision_agent-0.2.205}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.203"
+version = "0.2.205"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"

{vision_agent-0.2.203 → vision_agent-0.2.205}/vision_agent/agent/vision_agent_planner_v2.py RENAMED Viewed

@@ -367,8 +367,10 @@ def replace_interaction_with_obs(chat: List[AgentMessage]) -> List[AgentMessage]
                 response = json.loads(chat[i + 1].content)
                 function_name = response["function_name"]
                 tool_doc = get_tool_documentation(function_name)
+                if "box_threshold" in response:
+                    tool_doc = f"Use the following function with box_threshold={response['box_threshold']}\n\n{tool_doc}"
                 new_chat.append(AgentMessage(role="observation", content=tool_doc))
-            except json.JSONDecodeError:
+            except (json.JSONDecodeError, KeyError):
                 raise ValueError(f"Invalid JSON in interaction response: {chat_i}")
         else:
             new_chat.append(chat_i)

{vision_agent-0.2.203 → vision_agent-0.2.205}/vision_agent/agent/vision_agent_prompts.py RENAMED Viewed

@@ -280,7 +280,7 @@ def main():
 if __name__ == "__main__":
     main()
 '''
-edit_code_artifact(artifacts, 'streamlit_app.py', CODE, 0, 0)
+edit_code_artifact(artifacts, 'streamlit_app.py', 0, 0, CODE)
 </execute_python>
 OBSERVATION:

{vision_agent-0.2.203 → vision_agent-0.2.205}/vision_agent/tools/planner_tools.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import inspect
 import logging
 import shutil
 import tempfile
@@ -63,12 +64,55 @@ def extract_tool_info(
     return tool, tool_thoughts, tool_docstring, ""
+def replace_box_threshold(code: str, functions: List[str], box_threshold: float) -> str:
+    class ReplaceBoxThresholdTransformer(cst.CSTTransformer):
+        def leave_Call(
+            self, original_node: cst.Call, updated_node: cst.Call
+        ) -> cst.Call:
+            if (
+                isinstance(updated_node.func, cst.Name)
+                and updated_node.func.value in functions
+            ) or (
+                isinstance(updated_node.func, cst.Attribute)
+                and updated_node.func.attr.value in functions
+            ):
+                new_args = []
+                found = False
+                for arg in updated_node.args:
+                    if arg.keyword and arg.keyword.value == "box_threshold":
+                        new_arg = arg.with_changes(value=cst.Float(str(box_threshold)))
+                        new_args.append(new_arg)
+                        found = True
+                    else:
+                        new_args.append(arg)
+                if not found:
+                    new_args.append(
+                        cst.Arg(
+                            keyword=cst.Name("box_threshold"),
+                            value=cst.Float(str(box_threshold)),
+                            equal=cst.AssignEqual(
+                                whitespace_before=cst.SimpleWhitespace(""),
+                                whitespace_after=cst.SimpleWhitespace(""),
+                            ),
+                        )
+                    )
+                return updated_node.with_changes(args=new_args)
+            return updated_node
+    tree = cst.parse_module(code)
+    transformer = ReplaceBoxThresholdTransformer()
+    new_tree = tree.visit(transformer)
+    return new_tree.code
 def run_tool_testing(
     task: str,
     image_paths: List[str],
     lmm: LMM,
     exclude_tools: Optional[List[str]],
     code_interpreter: CodeInterpreter,
+    process_code: Callable[[str], str] = lambda x: x,
 ) -> tuple[str, str, Execution]:
     """Helper function to generate and run tool testing code."""
     query = lmm.generate(CATEGORIZE_TOOL_REQUEST.format(task=task))
@@ -101,6 +145,7 @@ def run_tool_testing(
     code = extract_tag(response, "code")  # type: ignore
     if code is None:
         raise ValueError(f"Could not extract code from response: {response}")
+    code = process_code(code)
     tool_output = code_interpreter.exec_isolation(DefaultImports.prepend_imports(code))
     tool_output_str = tool_output.text(include_results=False).strip()
@@ -119,6 +164,7 @@ def run_tool_testing(
             media=str(image_paths),
         )
         code = extract_code(lmm.generate(prompt, media=image_paths))  # type: ignore
+        code = process_code(code)
         tool_output = code_interpreter.exec_isolation(
             DefaultImports.prepend_imports(code)
         )
@@ -221,36 +267,7 @@ def get_tool_documentation(tool_name: str) -> str:
 def get_tool_for_task_human_reviewer(
     task: str, images: List[np.ndarray], exclude_tools: Optional[List[str]] = None
 ) -> None:
-    # NOTE: this should be the same documentation as get_tool_for_task
-    """Given a task and one or more images this function will find a tool to accomplish
-    the jobs. It prints the tool documentation and thoughts on why it chose the tool.
-    It can produce tools for the following types of tasks:
-        - Object detection and counting
-        - Classification
-        - Segmentation
-        - OCR
-        - VQA
-        - Depth and pose estimation
-        - Video object tracking
-    Wait until the documentation is printed to use the function so you know what the
-    input and output signatures are.
-    Parameters:
-        task: str: The task to accomplish.
-        images: List[np.ndarray]: The images to use for the task.
-        exclude_tools: Optional[List[str]]: A list of tool names to exclude from the
-            recommendations. This is helpful if you are calling get_tool_for_task twice
-            and do not want the same tool recommended.
-    Returns:
-        The tool to use for the task is printed to stdout
-    Examples
-    --------
-        >>> get_tool_for_task("Give me an OCR model that can find 'hot chocolate' in the image", [image])
-    """
+    # NOTE: this will have the same documentation as get_tool_for_task
     lmm = AnthropicLMM()
     with (
@@ -263,8 +280,19 @@ def get_tool_for_task_human_reviewer(
             Image.fromarray(image).save(image_path)
             image_paths.append(image_path)
+        tools = [
+            t.__name__
+            for t in T.TOOLS
+            if inspect.signature(t).parameters.get("box_threshold")  # type: ignore
+        ]
         _, _, tool_output = run_tool_testing(
-            task, image_paths, lmm, exclude_tools, code_interpreter
+            task,
+            image_paths,
+            lmm,
+            exclude_tools,
+            code_interpreter,
+            process_code=lambda x: replace_box_threshold(x, tools, 0.05),
         )
         # need to re-display results for the outer notebook to see them