PyPI - vision-agent - Versions diffs - 0.2.204__py3-none-any.whl → 0.2.206__py3-none-any.whl - Mend

vision-agent 0.2.204py3-none-any.whl → 0.2.206py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

vision_agent/agent/vision_agent_planner_v2.py CHANGED Viewed

@@ -367,8 +367,10 @@ def replace_interaction_with_obs(chat: List[AgentMessage]) -> List[AgentMessage]
                 response = json.loads(chat[i + 1].content)
                 function_name = response["function_name"]
                 tool_doc = get_tool_documentation(function_name)
+                if "box_threshold" in response:
+                    tool_doc = f"Use the following function with box_threshold={response['box_threshold']}\n\n{tool_doc}"
                 new_chat.append(AgentMessage(role="observation", content=tool_doc))
-            except json.JSONDecodeError:
+            except (json.JSONDecodeError, KeyError):
                 raise ValueError(f"Invalid JSON in interaction response: {chat_i}")
         else:
             new_chat.append(chat_i)

vision_agent/tools/planner_tools.py CHANGED Viewed

@@ -1,6 +1,8 @@
+import inspect
 import logging
 import shutil
 import tempfile
+from functools import lru_cache
 from typing import Any, Callable, Dict, List, Optional, Tuple, cast
 import libcst as cst
@@ -31,15 +33,19 @@ from vision_agent.utils.execute import (
     MimeType,
 )
 from vision_agent.utils.image_utils import convert_to_b64
-from vision_agent.utils.sim import load_cached_sim
+from vision_agent.utils.sim import Sim, load_cached_sim
 TOOL_FUNCTIONS = {tool.__name__: tool for tool in T.TOOLS}
-TOOL_RECOMMENDER = load_cached_sim(T.TOOLS_DF)
 _LOGGER = logging.getLogger(__name__)
 EXAMPLES = f"\n{TEST_TOOLS_EXAMPLE1}\n{TEST_TOOLS_EXAMPLE2}\n"
+@lru_cache(maxsize=1)
+def get_tool_recommender() -> Sim:
+    return load_cached_sim(T.TOOLS_DF)
 def format_tool_output(tool_thoughts: str, tool_docstring: str) -> str:
     return_str = "[get_tool_for_task output]\n"
     if tool_thoughts.strip() != "":
@@ -51,7 +57,7 @@ def format_tool_output(tool_thoughts: str, tool_docstring: str) -> str:
 def extract_tool_info(
-    tool_choice_context: Dict[str, Any]
+    tool_choice_context: Dict[str, Any],
 ) -> Tuple[Optional[Callable], str, str, str]:
     tool_thoughts = tool_choice_context.get("thoughts", "")
     tool_docstring = ""
@@ -63,12 +69,55 @@ def extract_tool_info(
     return tool, tool_thoughts, tool_docstring, ""
+def replace_box_threshold(code: str, functions: List[str], box_threshold: float) -> str:
+    class ReplaceBoxThresholdTransformer(cst.CSTTransformer):
+        def leave_Call(
+            self, original_node: cst.Call, updated_node: cst.Call
+        ) -> cst.Call:
+            if (
+                isinstance(updated_node.func, cst.Name)
+                and updated_node.func.value in functions
+            ) or (
+                isinstance(updated_node.func, cst.Attribute)
+                and updated_node.func.attr.value in functions
+            ):
+                new_args = []
+                found = False
+                for arg in updated_node.args:
+                    if arg.keyword and arg.keyword.value == "box_threshold":
+                        new_arg = arg.with_changes(value=cst.Float(str(box_threshold)))
+                        new_args.append(new_arg)
+                        found = True
+                    else:
+                        new_args.append(arg)
+                if not found:
+                    new_args.append(
+                        cst.Arg(
+                            keyword=cst.Name("box_threshold"),
+                            value=cst.Float(str(box_threshold)),
+                            equal=cst.AssignEqual(
+                                whitespace_before=cst.SimpleWhitespace(""),
+                                whitespace_after=cst.SimpleWhitespace(""),
+                            ),
+                        )
+                    )
+                return updated_node.with_changes(args=new_args)
+            return updated_node
+    tree = cst.parse_module(code)
+    transformer = ReplaceBoxThresholdTransformer()
+    new_tree = tree.visit(transformer)
+    return new_tree.code
 def run_tool_testing(
     task: str,
     image_paths: List[str],
     lmm: LMM,
     exclude_tools: Optional[List[str]],
     code_interpreter: CodeInterpreter,
+    process_code: Callable[[str], str] = lambda x: x,
 ) -> tuple[str, str, Execution]:
     """Helper function to generate and run tool testing code."""
     query = lmm.generate(CATEGORIZE_TOOL_REQUEST.format(task=task))
@@ -80,7 +129,7 @@ def run_tool_testing(
             f"I need models from the {category.strip()} category of tools. {task}"
         )
-    tool_docs = TOOL_RECOMMENDER.top_k(category, k=10, thresh=0.2)
+    tool_docs = get_tool_recommender().top_k(category, k=10, thresh=0.2)
     if exclude_tools is not None and len(exclude_tools) > 0:
         cleaned_tool_docs = []
         for tool_doc in tool_docs:
@@ -101,6 +150,7 @@ def run_tool_testing(
     code = extract_tag(response, "code")  # type: ignore
     if code is None:
         raise ValueError(f"Could not extract code from response: {response}")
+    code = process_code(code)
     tool_output = code_interpreter.exec_isolation(DefaultImports.prepend_imports(code))
     tool_output_str = tool_output.text(include_results=False).strip()
@@ -119,6 +169,7 @@ def run_tool_testing(
             media=str(image_paths),
         )
         code = extract_code(lmm.generate(prompt, media=image_paths))  # type: ignore
+        code = process_code(code)
         tool_output = code_interpreter.exec_isolation(
             DefaultImports.prepend_imports(code)
         )
@@ -200,7 +251,9 @@ def get_tool_for_task(
                 context=f"<code>\n{code}\n</code>\n<tool_output>\n{tool_output_str}\n</tool_output>",
                 previous_attempts=error_message,
             )
-            tool_choice_context_dict = extract_json(lmm.generate(prompt, media=image_paths))  # type: ignore
+            tool_choice_context_dict = extract_json(
+                lmm.generate(prompt, media=image_paths)  # type: ignore
+            )
             tool, tool_thoughts, tool_docstring, error_message = extract_tool_info(
                 tool_choice_context_dict
             )
@@ -221,36 +274,7 @@ def get_tool_documentation(tool_name: str) -> str:
 def get_tool_for_task_human_reviewer(
     task: str, images: List[np.ndarray], exclude_tools: Optional[List[str]] = None
 ) -> None:
-    # NOTE: this should be the same documentation as get_tool_for_task
-    """Given a task and one or more images this function will find a tool to accomplish
-    the jobs. It prints the tool documentation and thoughts on why it chose the tool.
-    It can produce tools for the following types of tasks:
-        - Object detection and counting
-        - Classification
-        - Segmentation
-        - OCR
-        - VQA
-        - Depth and pose estimation
-        - Video object tracking
-    Wait until the documentation is printed to use the function so you know what the
-    input and output signatures are.
-    Parameters:
-        task: str: The task to accomplish.
-        images: List[np.ndarray]: The images to use for the task.
-        exclude_tools: Optional[List[str]]: A list of tool names to exclude from the
-            recommendations. This is helpful if you are calling get_tool_for_task twice
-            and do not want the same tool recommended.
-    Returns:
-        The tool to use for the task is printed to stdout
-    Examples
-    --------
-        >>> get_tool_for_task("Give me an OCR model that can find 'hot chocolate' in the image", [image])
-    """
+    # NOTE: this will have the same documentation as get_tool_for_task
     lmm = AnthropicLMM()
     with (
@@ -263,8 +287,19 @@ def get_tool_for_task_human_reviewer(
             Image.fromarray(image).save(image_path)
             image_paths.append(image_path)
+        tools = [
+            t.__name__
+            for t in T.TOOLS
+            if inspect.signature(t).parameters.get("box_threshold")  # type: ignore
+        ]
         _, _, tool_output = run_tool_testing(
-            task, image_paths, lmm, exclude_tools, code_interpreter
+            task,
+            image_paths,
+            lmm,
+            exclude_tools,
+            code_interpreter,
+            process_code=lambda x: replace_box_threshold(x, tools, 0.05),
         )
         # need to re-display results for the outer notebook to see them

{vision_agent-0.2.204.dist-info → vision_agent-0.2.206.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.204
+Version: 0.2.206
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.204.dist-info → vision_agent-0.2.206.dist-info}/RECORD RENAMED Viewed

@@ -14,7 +14,7 @@ vision_agent/agent/vision_agent_coder_v2.py,sha256=nXbMsCLpKxTEi075ZE932227tW-lE
 vision_agent/agent/vision_agent_planner.py,sha256=KWMA7XemcSmc_jn-MwdWz9wnKDtj-sYQ9tINi70_OoU,18583
 vision_agent/agent/vision_agent_planner_prompts.py,sha256=Y3jz9HRf8fz9NLUseN7cTgZqewP0RazxR7vw1sPhcn0,6691
 vision_agent/agent/vision_agent_planner_prompts_v2.py,sha256=Tzon3h5iZdHJglesk8GVS-2myNf5-fhf7HUbkpZWHQk,33143
-vision_agent/agent/vision_agent_planner_v2.py,sha256=3sXW4A-GZ5Bg2rGheuIYspAu_N2e00Sii1f_1HJS934,20255
+vision_agent/agent/vision_agent_planner_v2.py,sha256=pAtGWkY-9fFgxgO2ioebvMvASwbJ-8bAvzRNp8Z0Odc,20437
 vision_agent/agent/vision_agent_prompts.py,sha256=NtGdCfzzilCRtscKALC9FK55d1h4CBpMnbhLzg0PYlc,13772
 vision_agent/agent/vision_agent_prompts_v2.py,sha256=-vCWat-ARlCOOOeIDIFhg-kcwRRwjTXYEwsvvqPeaCs,1972
 vision_agent/agent/vision_agent_v2.py,sha256=6gGVV3FlL4NLzHRpjMqMz-fEP6f_JhwwOjUKczZ3TPA,10231
@@ -28,7 +28,7 @@ vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
 vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
 vision_agent/tools/__init__.py,sha256=xuNt5e4syQH28Vr6EdjLmO9ni9i00yav9yqcPMUx1oo,2878
 vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
-vision_agent/tools/planner_tools.py,sha256=amaycM_REQ4cwZCaKSyIWr-6ExqlHGEVs3PuIXjf-9M,12373
+vision_agent/tools/planner_tools.py,sha256=zlzyCv7tzSOs9W-MjsptaOeM-i4eoA6HxXQWuMc1KkY,13548
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=AT7rMcpKwZgIErfgfSvHS0gmtvd8KMHJoHnu5aMlgO0,10259
 vision_agent/tools/tools.py,sha256=vavzmDuIBHI-g13RMDnr9NALfWpiIvJWkXhD0pnhCuk,87576
@@ -40,7 +40,7 @@ vision_agent/utils/image_utils.py,sha256=rRWcxKggPXIRXIY_XT9rZt30ECDRq8zq7FDeXRD
 vision_agent/utils/sim.py,sha256=NZc9QGD6BTY5O29NVbHH7oxDePL_QMnylT1lYcDUn1Y,7437
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
-vision_agent-0.2.204.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.204.dist-info/METADATA,sha256=cuwR0b_QsTgq_dle_aATNpcNC-XGl78sLY11dS9OGbg,19026
-vision_agent-0.2.204.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.204.dist-info/RECORD,,
+vision_agent-0.2.206.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.206.dist-info/METADATA,sha256=3QLRuQR4YwcTTU1y6phpkl7hLXtCIKqxYlYjF1_oNzM,19026
+vision_agent-0.2.206.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.206.dist-info/RECORD,,

{vision_agent-0.2.204.dist-info → vision_agent-0.2.206.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.204.dist-info → vision_agent-0.2.206.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.204__py3-none-any.whl → 0.2.206__py3-none-any.whl

vision-agent 0.2.204py3-none-any.whl → 0.2.206py3-none-any.whl