PyPI - vision-agent - Versions diffs - 0.2.162__tar.gz → 0.2.163__tar.gz - Mend

vision-agent 0.2.162tar.gz → 0.2.163tar.gz

Files changed (35) hide show

{vision_agent-0.2.162 → vision_agent-0.2.163}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.162
+Version: 0.2.163
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -27,6 +27,7 @@ Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
 Requires-Dist: pydantic (==2.7.4)
 Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
 Requires-Dist: pytube (==15.0.0)
+Requires-Dist: redbaron (>=0.9.2,<0.10.0)
 Requires-Dist: requests (>=2.0.0,<3.0.0)
 Requires-Dist: rich (>=13.7.1,<14.0.0)
 Requires-Dist: scipy (>=1.13.0,<1.14.0)

{vision_agent-0.2.162 → vision_agent-0.2.163}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.162"
+version = "0.2.163"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"
@@ -43,6 +43,7 @@ pytube = "15.0.0"
 anthropic = "^0.31.0"
 pydantic = "2.7.4"
 av = "^11.0.0"
+redbaron = "^0.9.2"
 [tool.poetry.group.dev.dependencies]
 autoflake = "1.*"

{vision_agent-0.2.162 → vision_agent-0.2.163}/vision_agent/agent/agent_utils.py RENAMED Viewed

@@ -13,6 +13,7 @@ import vision_agent.tools as T
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
 _CONSOLE = Console()
+_MAX_TABULATE_COL_WIDTH = 80
 def _extract_sub_json(json_str: str) -> Optional[Dict[str, Any]]:
@@ -91,6 +92,27 @@ def extract_code(code: str) -> str:
     return code
+def extract_tag(
+    content: str,
+    tag: str,
+) -> Optional[str]:
+    inner_content = None
+    remaning = content
+    all_inner_content = []
+    while f"<{tag}>" in remaning:
+        inner_content_i = remaning[remaning.find(f"<{tag}>") + len(f"<{tag}>") :]
+        if f"</{tag}>" not in inner_content_i:
+            break
+        inner_content_i = inner_content_i[: inner_content_i.find(f"</{tag}>")]
+        remaning = remaning[remaning.find(f"</{tag}>") + len(f"</{tag}>") :]
+        all_inner_content.append(inner_content_i)
+    if len(all_inner_content) > 0:
+        inner_content = "\n".join(all_inner_content)
+    return inner_content
 def remove_installs_from_code(code: str) -> str:
     pattern = r"\n!pip install.*?(\n|\Z)\n"
     code = re.sub(pattern, "", code, flags=re.DOTALL)

{vision_agent-0.2.162 → vision_agent-0.2.163}/vision_agent/agent/vision_agent.py RENAMED Viewed

@@ -103,7 +103,7 @@ def execute_code_action(
 def parse_execution(
     response: str,
     test_multi_plan: bool = True,
-    customed_tool_names: Optional[List[str]] = None,
+    custom_tool_names: Optional[List[str]] = None,
 ) -> Optional[str]:
     code = None
     remaining = response
@@ -122,7 +122,7 @@ def parse_execution(
         code = "\n".join(all_code)
     if code is not None:
-        code = use_extra_vision_agent_args(code, test_multi_plan, customed_tool_names)
+        code = use_extra_vision_agent_args(code, test_multi_plan, custom_tool_names)
     return code
@@ -278,7 +278,7 @@ class VisionAgent(Agent):
         chat: List[Message],
         artifacts: Optional[Artifacts] = None,
         test_multi_plan: bool = True,
-        customized_tool_names: Optional[List[str]] = None,
+        custom_tool_names: Optional[List[str]] = None,
     ) -> Tuple[List[Message], Artifacts]:
         """Chat with VisionAgent, it will use code to execute actions to accomplish
         its tasks.
@@ -292,7 +292,7 @@ class VisionAgent(Agent):
             test_multi_plan (bool): If True, it will test tools for multiple plans and
                 pick the best one based off of the tool results. If False, it will go
                 with the first plan.
-            customized_tool_names (List[str]): A list of customized tools for agent to
+            custom_tool_names (List[str]): A list of customized tools for agent to
                 pick and use. If not provided, default to full tool set from
                 vision_agent.tools.
@@ -411,7 +411,7 @@ class VisionAgent(Agent):
                 finished = response["let_user_respond"]
                 code_action = parse_execution(
-                    response["response"], test_multi_plan, customized_tool_names
+                    response["response"], test_multi_plan, custom_tool_names
                 )
                 if last_response == response:

{vision_agent-0.2.162 → vision_agent-0.2.163}/vision_agent/agent/vision_agent_coder.py RENAMED Viewed

@@ -5,14 +5,16 @@ import sys
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast
+from redbaron import RedBaron  # type: ignore
 from tabulate import tabulate
 import vision_agent.tools as T
 from vision_agent.agent.agent import Agent
 from vision_agent.agent.agent_utils import (
+    _MAX_TABULATE_COL_WIDTH,
     DefaultImports,
     extract_code,
-    extract_json,
+    extract_tag,
     format_memory,
     print_code,
     remove_installs_from_code,
@@ -45,7 +47,44 @@ from vision_agent.utils.execute import CodeInterpreter
 logging.basicConfig(stream=sys.stdout)
 WORKSPACE = Path(os.getenv("WORKSPACE", ""))
 _LOGGER = logging.getLogger(__name__)
-_MAX_TABULATE_COL_WIDTH = 80
+def strip_function_calls(code: str, exclusions: Optional[List[str]] = None) -> str:
+    """This will strip out all code that calls functions except for functions included
+    in exclusions.
+    """
+    if exclusions is None:
+        exclusions = []
+    red = RedBaron(code)
+    nodes_to_remove = []
+    for node in red:
+        if node.type == "def":
+            continue
+        elif node.type == "import" or node.type == "from_import":
+            continue
+        elif node.type == "call":
+            if node.value and node.value[0].value in exclusions:
+                continue
+            nodes_to_remove.append(node)
+        elif node.type == "atomtrailers":
+            if node[0].value in exclusions:
+                continue
+            nodes_to_remove.append(node)
+        elif node.type == "assignment":
+            if node.value.type == "call" or node.value.type == "atomtrailers":
+                func_name = node.value[0].value
+                if func_name in exclusions:
+                    continue
+                nodes_to_remove.append(node)
+        elif node.type == "endl":
+            continue
+        else:
+            nodes_to_remove.append(node)
+    for node in nodes_to_remove:
+        node.parent.remove(node)
+    cleaned_code = red.dumps().strip()
+    return cleaned_code if isinstance(cleaned_code, str) else code
 def write_code(
@@ -130,6 +169,7 @@ def write_and_test_code(
         plan_thoughts,
         format_memory(working_memory),
     )
+    code = strip_function_calls(code)
     test = write_test(
         tester, chat, tool_utils, code, format_memory(working_memory), media
     )
@@ -220,7 +260,9 @@ def debug_code(
         }
     )
-    fixed_code_and_test = {"code": "", "test": "", "reflections": ""}
+    fixed_code = None
+    fixed_test = None
+    thoughts = ""
     success = False
     count = 0
     while not success and count < 3:
@@ -243,21 +285,16 @@ def debug_code(
                 stream=False,
             )
             fixed_code_and_test_str = cast(str, fixed_code_and_test_str)
-            fixed_code_and_test = extract_json(fixed_code_and_test_str)
-            code = extract_code(fixed_code_and_test_str)
-            if (
-                "which_code" in fixed_code_and_test
-                and fixed_code_and_test["which_code"] == "test"
-            ):
-                fixed_code_and_test["code"] = ""
-                fixed_code_and_test["test"] = code
-            else:  # for everything else always assume it's updating code
-                fixed_code_and_test["code"] = code
-                fixed_code_and_test["test"] = ""
-            if "which_code" in fixed_code_and_test:
-                del fixed_code_and_test["which_code"]
-            success = True
+            thoughts_tag = extract_tag(fixed_code_and_test_str, "thoughts")
+            thoughts = thoughts_tag if thoughts_tag is not None else ""
+            fixed_code = extract_tag(fixed_code_and_test_str, "code")
+            fixed_test = extract_tag(fixed_code_and_test_str, "test")
+            if fixed_code is None and fixed_test is None:
+                success = False
+            else:
+                success = True
         except Exception as e:
             _LOGGER.exception(f"Error while extracting JSON: {e}")
@@ -266,15 +303,15 @@ def debug_code(
     old_code = code
     old_test = test
-    if fixed_code_and_test["code"].strip() != "":
-        code = fixed_code_and_test["code"]
-    if fixed_code_and_test["test"].strip() != "":
-        test = fixed_code_and_test["test"]
+    if fixed_code is not None and fixed_code.strip() != "":
+        code = fixed_code
+    if fixed_test is not None and fixed_test.strip() != "":
+        test = fixed_test
     new_working_memory.append(
         {
             "code": f"{code}\n{test}",
-            "feedback": fixed_code_and_test["reflections"],
+            "feedback": thoughts,
             "edits": get_diff(f"{old_code}\n{old_test}", f"{code}\n{test}"),
         }
     )
@@ -310,7 +347,7 @@ def debug_code(
     if verbosity == 2:
         print_code("Code and test after attempted fix:", code, test)
         _LOGGER.info(
-            f"Reflection: {fixed_code_and_test['reflections']}\nCode execution result after attempted fix: {result.text(include_logs=True)}"
+            f"Reflection: {thoughts}\nCode execution result after attempted fix: {result.text(include_logs=True)}"
         )
     return code, test, result
@@ -514,7 +551,6 @@ class VisionAgentCoder(Agent):
             code = remove_installs_from_code(cast(str, results["code"]))
             test = remove_installs_from_code(cast(str, results["test"]))
             working_memory.extend(results["working_memory"])
             execution_result = cast(Execution, results["test_result"])
             return {

{vision_agent-0.2.162 → vision_agent-0.2.163}/vision_agent/agent/vision_agent_coder_prompts.py RENAMED Viewed

@@ -238,35 +238,29 @@ This is the documentation for the functions you have access to. You may call any
 {docstring}
 **Instructions**:
-Please re-complete the code to fix the error message. Here is the previous version:
-```python
+Please re-complete the code to fix the error message. Here is the current version of the CODE:
+<code>
 {code}
-```
+</code>
-When we run this test code:
-```python
+When we run the TEST code:
+<test>
 {tests}
-```
+</test>
 It raises this error:
-```
+<error>
 {result}
-```
+</error>
 This is previous feedback provided on the code:
 {feedback}
-Please fix the bug by correcting the error. Return the following JSON object followed by the fixed code in the below format:
-```json
-{{
-    "reflections": str # any thoughts you have about the bug and how you fixed it
-    "which_code": str # the code that was fixed, can only be 'code' or 'test'
-}}
-```
+Please fix the bug by correcting the error. Return thoughts you have about the bug and how you fixed in <thoughts> tags followed by the fixed CODE in <code> tags and the fixed TEST in <test> tags. For example:
-```python
-# Your fixed code here
-```
+<thoughts>Your thoughts here...</thoughts>
+<code># your fixed code here</code>
+<test># your fixed test here</test>
 """

{vision_agent-0.2.162 → vision_agent-0.2.163}/vision_agent/agent/vision_agent_planner.py RENAMED Viewed

@@ -5,10 +5,12 @@ from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
 from pydantic import BaseModel
+from tabulate import tabulate
 import vision_agent.tools as T
 from vision_agent.agent import Agent
 from vision_agent.agent.agent_utils import (
+    _MAX_TABULATE_COL_WIDTH,
     DefaultImports,
     extract_code,
     extract_json,
@@ -90,6 +92,18 @@ def retrieve_tools(
     return tool_lists_unique
+def _check_plan_format(plan: Dict[str, Any]) -> bool:
+    if not isinstance(plan, dict):
+        return False
+    for k in plan:
+        if "thoughts" not in plan[k] or "instructions" not in plan[k]:
+            return False
+        if not isinstance(plan[k]["instructions"], list):
+            return False
+    return True
 def write_plans(
     chat: List[Message], tool_desc: str, working_memory: str, model: LMM
 ) -> Dict[str, Any]:
@@ -105,7 +119,16 @@ def write_plans(
         feedback=working_memory,
     )
     chat[-1]["content"] = prompt
-    return extract_json(model(chat, stream=False))  # type: ignore
+    plans = extract_json(model(chat, stream=False))  # type: ignore
+    count = 0
+    while not _check_plan_format(plans) and count < 3:
+        _LOGGER.info("Invalid plan format. Retrying.")
+        plans = extract_json(model(chat, stream=False))  # type: ignore
+        count += 1
+        if count == 3:
+            raise ValueError("Failed to generate valid plans after 3 attempts.")
+    return plans
 def write_and_exec_plan_tests(
@@ -307,7 +330,6 @@ def pick_plan(
             "payload": plans[plan_thoughts["best_plan"]],
         }
     )
-    # return plan_thoughts, "```python\n" + code + "\n```\n" + tool_output_str
     return plan_thoughts, code, tool_output
@@ -404,6 +426,14 @@ class VisionAgentPlanner(Agent):
                 format_memory(working_memory),
                 self.planner,
             )
+            if self.verbosity >= 1:
+                for plan in plans:
+                    plan_fixed = [
+                        {"instructions": e} for e in plans[plan]["instructions"]
+                    ]
+                    _LOGGER.info(
+                        f"\n{tabulate(tabular_data=plan_fixed, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
+                    )
             tool_docs = retrieve_tools(
                 plans,

{vision_agent-0.2.162 → vision_agent-0.2.163}/vision_agent/tools/meta_tools.py RENAMED Viewed

@@ -11,6 +11,7 @@ from typing import Any, Dict, List, Optional, Union
 import numpy as np
 from IPython.display import display
+from redbaron import RedBaron  # type: ignore
 import vision_agent as va
 from vision_agent.agent.agent_utils import extract_json
@@ -24,8 +25,6 @@ from vision_agent.utils.execute import Execution, MimeType
 from vision_agent.utils.image_utils import convert_to_b64, numpy_to_bytes
 from vision_agent.utils.video import frames_to_bytes
-# These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
 CURRENT_FILE = None
 CURRENT_LINE = 0
 DEFAULT_WINDOW_SIZE = 100
@@ -154,6 +153,9 @@ class Artifacts:
         return name in self.artifacts
+# These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
 def format_lines(lines: List[str], start_idx: int) -> str:
     output = ""
     for i, line in enumerate(lines):
@@ -491,7 +493,7 @@ def edit_vision_code(
     name: str,
     chat_history: List[str],
     media: List[str],
-    customized_tool_names: Optional[List[str]] = None,
+    custom_tool_names: Optional[List[str]] = None,
 ) -> str:
     """Edits python code to solve a vision based task.
@@ -499,7 +501,7 @@ def edit_vision_code(
         artifacts (Artifacts): The artifacts object to save the code to.
         name (str): The file path to the code.
         chat_history (List[str]): The chat history to used to generate the code.
-        customized_tool_names (Optional[List[str]]): Do not change this parameter.
+        custom_tool_names (Optional[List[str]]): Do not change this parameter.
     Returns:
         str: The edited code.
@@ -542,7 +544,7 @@ def edit_vision_code(
     response = agent.generate_code(
         fixed_chat_history,
         test_multi_plan=False,
-        custom_tool_names=customized_tool_names,
+        custom_tool_names=custom_tool_names,
     )
     redisplay_results(response["test_result"])
     code = response["code"]
@@ -705,7 +707,7 @@ def get_diff_with_prompts(name: str, before: str, after: str) -> str:
 def use_extra_vision_agent_args(
     code: str,
     test_multi_plan: bool = True,
-    customized_tool_names: Optional[List[str]] = None,
+    custom_tool_names: Optional[List[str]] = None,
 ) -> str:
     """This is for forcing arguments passed by the user to VisionAgent into the
     VisionAgentCoder call.
@@ -713,36 +715,25 @@ def use_extra_vision_agent_args(
     Parameters:
         code (str): The code to edit.
         test_multi_plan (bool): Do not change this parameter.
-        customized_tool_names (Optional[List[str]]): Do not change this parameter.
+        custom_tool_names (Optional[List[str]]): Do not change this parameter.
     Returns:
         str: The edited code.
     """
-    generate_pattern = r"generate_vision_code\(\s*([^\)]+)\s*\)"
-    def generate_replacer(match: re.Match) -> str:
-        arg = match.group(1)
-        out_str = f"generate_vision_code({arg}, test_multi_plan={test_multi_plan}"
-        if customized_tool_names is not None:
-            out_str += f", custom_tool_names={customized_tool_names})"
-        else:
-            out_str += ")"
-        return out_str
-    edit_pattern = r"edit_vision_code\(\s*([^\)]+)\s*\)"
-    def edit_replacer(match: re.Match) -> str:
-        arg = match.group(1)
-        out_str = f"edit_vision_code({arg}"
-        if customized_tool_names is not None:
-            out_str += f", custom_tool_names={customized_tool_names})"
-        else:
-            out_str += ")"
-        return out_str
-    new_code = re.sub(generate_pattern, generate_replacer, code)
-    new_code = re.sub(edit_pattern, edit_replacer, new_code)
-    return new_code
+    red = RedBaron(code)
+    for node in red:
+        # seems to always be atomtrailers not call type
+        if node.type == "atomtrailers":
+            if (
+                node.name.value == "generate_vision_code"
+                or node.name.value == "edit_vision_code"
+            ):
+                node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
+                if custom_tool_names is not None:
+                    node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
+    cleaned_code = red.dumps().strip()
+    return cleaned_code if isinstance(cleaned_code, str) else code
 def use_object_detection_fine_tuning(

{vision_agent-0.2.162 → vision_agent-0.2.163}/vision_agent/tools/tools.py RENAMED Viewed

@@ -1923,7 +1923,7 @@ def overlay_bounding_boxes(
         bboxes = bbox_int[i]
         bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
-        if len(bboxes) > 20:
+        if len(bboxes) > 40:
             pil_image = _plot_counting(pil_image, bboxes, color)
         else:
             width, height = pil_image.size
@@ -2117,7 +2117,7 @@ def _plot_counting(
     colors: Dict[str, Tuple[int, int, int]],
 ) -> Image.Image:
     width, height = image.size
-    fontsize = max(10, int(min(width, height) / 80))
+    fontsize = max(12, int(min(width, height) / 40))
     draw = ImageDraw.Draw(image)
     font = ImageFont.truetype(
         str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),