PyPI - vision-agent - Versions diffs - 0.2.204__tar.gz → 0.2.206__tar.gz - Mend

vision-agent 0.2.204tar.gz → 0.2.206tar.gz

Files changed (46) hide show

{vision_agent-0.2.204 → vision_agent-0.2.206}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.204
+Version: 0.2.206
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.204 → vision_agent-0.2.206}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.204"
+version = "0.2.206"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"

{vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_planner_v2.py RENAMED Viewed

@@ -367,8 +367,10 @@ def replace_interaction_with_obs(chat: List[AgentMessage]) -> List[AgentMessage]
                 response = json.loads(chat[i + 1].content)
                 function_name = response["function_name"]
                 tool_doc = get_tool_documentation(function_name)
+                if "box_threshold" in response:
+                    tool_doc = f"Use the following function with box_threshold={response['box_threshold']}\n\n{tool_doc}"
                 new_chat.append(AgentMessage(role="observation", content=tool_doc))
-            except json.JSONDecodeError:
+            except (json.JSONDecodeError, KeyError):
                 raise ValueError(f"Invalid JSON in interaction response: {chat_i}")
         else:
             new_chat.append(chat_i)

{vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/tools/planner_tools.py RENAMED Viewed

@@ -1,6 +1,8 @@
+import inspect
 import logging
 import shutil
 import tempfile
+from functools import lru_cache
 from typing import Any, Callable, Dict, List, Optional, Tuple, cast
 import libcst as cst
@@ -31,15 +33,19 @@ from vision_agent.utils.execute import (
     MimeType,
 )
 from vision_agent.utils.image_utils import convert_to_b64
-from vision_agent.utils.sim import load_cached_sim
+from vision_agent.utils.sim import Sim, load_cached_sim
 TOOL_FUNCTIONS = {tool.__name__: tool for tool in T.TOOLS}
-TOOL_RECOMMENDER = load_cached_sim(T.TOOLS_DF)
 _LOGGER = logging.getLogger(__name__)
 EXAMPLES = f"\n{TEST_TOOLS_EXAMPLE1}\n{TEST_TOOLS_EXAMPLE2}\n"
+@lru_cache(maxsize=1)
+def get_tool_recommender() -> Sim:
+    return load_cached_sim(T.TOOLS_DF)
 def format_tool_output(tool_thoughts: str, tool_docstring: str) -> str:
     return_str = "[get_tool_for_task output]\n"
     if tool_thoughts.strip() != "":
@@ -51,7 +57,7 @@ def format_tool_output(tool_thoughts: str, tool_docstring: str) -> str:
 def extract_tool_info(
-    tool_choice_context: Dict[str, Any]
+    tool_choice_context: Dict[str, Any],
 ) -> Tuple[Optional[Callable], str, str, str]:
     tool_thoughts = tool_choice_context.get("thoughts", "")
     tool_docstring = ""
@@ -63,12 +69,55 @@ def extract_tool_info(
     return tool, tool_thoughts, tool_docstring, ""
+def replace_box_threshold(code: str, functions: List[str], box_threshold: float) -> str:
+    class ReplaceBoxThresholdTransformer(cst.CSTTransformer):
+        def leave_Call(
+            self, original_node: cst.Call, updated_node: cst.Call
+        ) -> cst.Call:
+            if (
+                isinstance(updated_node.func, cst.Name)
+                and updated_node.func.value in functions
+            ) or (
+                isinstance(updated_node.func, cst.Attribute)
+                and updated_node.func.attr.value in functions
+            ):
+                new_args = []
+                found = False
+                for arg in updated_node.args:
+                    if arg.keyword and arg.keyword.value == "box_threshold":
+                        new_arg = arg.with_changes(value=cst.Float(str(box_threshold)))
+                        new_args.append(new_arg)
+                        found = True
+                    else:
+                        new_args.append(arg)
+                if not found:
+                    new_args.append(
+                        cst.Arg(
+                            keyword=cst.Name("box_threshold"),
+                            value=cst.Float(str(box_threshold)),
+                            equal=cst.AssignEqual(
+                                whitespace_before=cst.SimpleWhitespace(""),
+                                whitespace_after=cst.SimpleWhitespace(""),
+                            ),
+                        )
+                    )
+                return updated_node.with_changes(args=new_args)
+            return updated_node
+    tree = cst.parse_module(code)
+    transformer = ReplaceBoxThresholdTransformer()
+    new_tree = tree.visit(transformer)
+    return new_tree.code
 def run_tool_testing(
     task: str,
     image_paths: List[str],
     lmm: LMM,
     exclude_tools: Optional[List[str]],
     code_interpreter: CodeInterpreter,
+    process_code: Callable[[str], str] = lambda x: x,
 ) -> tuple[str, str, Execution]:
     """Helper function to generate and run tool testing code."""
     query = lmm.generate(CATEGORIZE_TOOL_REQUEST.format(task=task))
@@ -80,7 +129,7 @@ def run_tool_testing(
             f"I need models from the {category.strip()} category of tools. {task}"
         )
-    tool_docs = TOOL_RECOMMENDER.top_k(category, k=10, thresh=0.2)
+    tool_docs = get_tool_recommender().top_k(category, k=10, thresh=0.2)
     if exclude_tools is not None and len(exclude_tools) > 0:
         cleaned_tool_docs = []
         for tool_doc in tool_docs:
@@ -101,6 +150,7 @@ def run_tool_testing(
     code = extract_tag(response, "code")  # type: ignore
     if code is None:
         raise ValueError(f"Could not extract code from response: {response}")
+    code = process_code(code)
     tool_output = code_interpreter.exec_isolation(DefaultImports.prepend_imports(code))
     tool_output_str = tool_output.text(include_results=False).strip()
@@ -119,6 +169,7 @@ def run_tool_testing(
             media=str(image_paths),
         )
         code = extract_code(lmm.generate(prompt, media=image_paths))  # type: ignore
+        code = process_code(code)
         tool_output = code_interpreter.exec_isolation(
             DefaultImports.prepend_imports(code)
         )
@@ -200,7 +251,9 @@ def get_tool_for_task(
                 context=f"<code>\n{code}\n</code>\n<tool_output>\n{tool_output_str}\n</tool_output>",
                 previous_attempts=error_message,
             )
-            tool_choice_context_dict = extract_json(lmm.generate(prompt, media=image_paths))  # type: ignore
+            tool_choice_context_dict = extract_json(
+                lmm.generate(prompt, media=image_paths)  # type: ignore
+            )
             tool, tool_thoughts, tool_docstring, error_message = extract_tool_info(
                 tool_choice_context_dict
             )
@@ -221,36 +274,7 @@ def get_tool_documentation(tool_name: str) -> str:
 def get_tool_for_task_human_reviewer(
     task: str, images: List[np.ndarray], exclude_tools: Optional[List[str]] = None
 ) -> None:
-    # NOTE: this should be the same documentation as get_tool_for_task
-    """Given a task and one or more images this function will find a tool to accomplish
-    the jobs. It prints the tool documentation and thoughts on why it chose the tool.
-    It can produce tools for the following types of tasks:
-        - Object detection and counting
-        - Classification
-        - Segmentation
-        - OCR
-        - VQA
-        - Depth and pose estimation
-        - Video object tracking
-    Wait until the documentation is printed to use the function so you know what the
-    input and output signatures are.
-    Parameters:
-        task: str: The task to accomplish.
-        images: List[np.ndarray]: The images to use for the task.
-        exclude_tools: Optional[List[str]]: A list of tool names to exclude from the
-            recommendations. This is helpful if you are calling get_tool_for_task twice
-            and do not want the same tool recommended.
-    Returns:
-        The tool to use for the task is printed to stdout
-    Examples
-    --------
-        >>> get_tool_for_task("Give me an OCR model that can find 'hot chocolate' in the image", [image])
-    """
+    # NOTE: this will have the same documentation as get_tool_for_task
     lmm = AnthropicLMM()
     with (
@@ -263,8 +287,19 @@ def get_tool_for_task_human_reviewer(
             Image.fromarray(image).save(image_path)
             image_paths.append(image_path)
+        tools = [
+            t.__name__
+            for t in T.TOOLS
+            if inspect.signature(t).parameters.get("box_threshold")  # type: ignore
+        ]
         _, _, tool_output = run_tool_testing(
-            task, image_paths, lmm, exclude_tools, code_interpreter
+            task,
+            image_paths,
+            lmm,
+            exclude_tools,
+            code_interpreter,
+            process_code=lambda x: replace_box_threshold(x, tools, 0.05),
         )
         # need to re-display results for the outer notebook to see them