PyPI - vision-agent - Versions diffs - 0.2.193__py3-none-any.whl → 0.2.196__py3-none-any.whl - Mend

vision-agent 0.2.193py3-none-any.whl → 0.2.196py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

vision_agent/.sim_tools/df.csv +640 -0
vision_agent/.sim_tools/embs.npy +0 -0
vision_agent/agent/__init__.py +2 -0
vision_agent/agent/agent_utils.py +211 -3
vision_agent/agent/vision_agent_coder.py +5 -113
vision_agent/agent/vision_agent_coder_prompts_v2.py +119 -0
vision_agent/agent/vision_agent_coder_v2.py +341 -0
vision_agent/agent/vision_agent_planner.py +2 -2
vision_agent/agent/vision_agent_planner_prompts.py +1 -1
vision_agent/agent/vision_agent_planner_prompts_v2.py +748 -0
vision_agent/agent/vision_agent_planner_v2.py +432 -0
vision_agent/lmm/lmm.py +4 -0
vision_agent/tools/__init__.py +2 -1
vision_agent/tools/planner_tools.py +246 -0
vision_agent/tools/tool_utils.py +65 -1
vision_agent/tools/tools.py +76 -22
vision_agent/utils/image_utils.py +12 -6
vision_agent/utils/sim.py +65 -14
{vision_agent-0.2.193.dist-info → vision_agent-0.2.196.dist-info}/METADATA +2 -1
vision_agent-0.2.196.dist-info/RECORD +42 -0
vision_agent-0.2.193.dist-info/RECORD +0 -35
{vision_agent-0.2.193.dist-info → vision_agent-0.2.196.dist-info}/LICENSE +0 -0
{vision_agent-0.2.193.dist-info → vision_agent-0.2.196.dist-info}/WHEEL +0 -0

vision_agent/agent/agent_utils.py CHANGED Viewed

@@ -1,14 +1,22 @@
+import copy
 import json
 import logging
 import re
 import sys
-from typing import Any, Dict, List, Optional
+import tempfile
+from typing import Any, Dict, List, Optional, Tuple, cast
+import libcst as cst
+from pydantic import BaseModel
 from rich.console import Console
 from rich.style import Style
 from rich.syntax import Syntax
+from rich.table import Table
 import vision_agent.tools as T
+from vision_agent.lmm.types import Message
+from vision_agent.utils.execute import CodeInterpreter, Execution
+from vision_agent.utils.image_utils import b64_to_pil, convert_to_b64
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
@@ -16,6 +24,19 @@ _CONSOLE = Console()
 _MAX_TABULATE_COL_WIDTH = 80
+class PlanContext(BaseModel):
+    plan: str
+    instructions: List[str]
+    code: str
+class CodeContext(BaseModel):
+    code: str
+    test: str
+    success: bool
+    test_result: Execution
 def _extract_sub_json(json_str: str) -> Optional[Dict[str, Any]]:
     json_pattern = r"\{.*\}"
     match = re.search(json_pattern, json_str, re.DOTALL)
@@ -121,7 +142,7 @@ def remove_installs_from_code(code: str) -> str:
     return code
-def format_memory(memory: List[Dict[str, str]]) -> str:
+def format_feedback(memory: List[Dict[str, str]]) -> str:
     output_str = ""
     for i, m in enumerate(memory):
         output_str += f"### Feedback {i}:\n"
@@ -134,6 +155,16 @@ def format_memory(memory: List[Dict[str, str]]) -> str:
     return output_str
+def format_plan_v2(plan: PlanContext) -> str:
+    plan_str = plan.plan + "\n"
+    plan_str += "Instructions:\n"
+    for v in plan.instructions:
+        plan_str += f"    - {v}\n"
+    plan_str += "Code:\n"
+    plan_str += plan.code
+    return plan_str
 def format_plans(plans: Dict[str, Any]) -> str:
     plan_str = ""
     for k, v in plans.items():
@@ -172,12 +203,189 @@ def print_code(title: str, code: str, test: Optional[str] = None) -> None:
     _CONSOLE.print("=" * 30 + " Code " + "=" * 30)
     _CONSOLE.print(
         Syntax(
-            DefaultImports.prepend_imports(code),
+            code,
             "python",
             theme="gruvbox-dark",
             line_numbers=True,
+            word_wrap=True,
         )
     )
     if test:
         _CONSOLE.print("=" * 30 + " Test " + "=" * 30)
         _CONSOLE.print(Syntax(test, "python", theme="gruvbox-dark", line_numbers=True))
+def print_table(title: str, columns: List[str], rows: List[List[str]]) -> None:
+    table = Table(title=title, show_header=True, header_style="bold magenta")
+    for col in columns:
+        table.add_column(col, style="cyan", no_wrap=True)
+    for i, row in enumerate(rows):
+        table.add_row(*row)
+        if i < len(rows) - 1:
+            table.add_row(*["-" * len(col) for col in row])
+    _CONSOLE.print(table)
+def add_media_to_chat(
+    chat: List[Message], code_interpreter: CodeInterpreter
+) -> Tuple[List[Message], List[Message], List[str]]:
+    orig_chat = copy.deepcopy(chat)
+    int_chat = copy.deepcopy(chat)
+    media_list = []
+    for chat_i in int_chat:
+        if "media" in chat_i:
+            media_list_i = []
+            for media in chat_i["media"]:
+                if isinstance(media, str) and media.startswith("data:image/"):
+                    media_pil = b64_to_pil(media)
+                    with tempfile.NamedTemporaryFile(
+                        mode="wb", suffix=".png", delete=False
+                    ) as temp_file:
+                        media_pil.save(temp_file, format="PNG")
+                        media = str(temp_file.name)
+                media = str(code_interpreter.upload_file(media))  # type: ignore
+                media_list_i.append(media)
+                # don't duplicate appending media name
+                if not str(chat_i["content"]).endswith(f" Media name {media}"):
+                    chat_i["content"] += f" Media name {media}"  # type: ignore
+            chat_i["media"] = media_list_i
+            media_list.extend(media_list_i)
+    int_chat = cast(
+        List[Message],
+        [
+            (
+                {
+                    "role": c["role"],
+                    "content": c["content"],
+                    "media": c["media"],
+                }
+                if "media" in c
+                else {"role": c["role"], "content": c["content"]}
+            )
+            for c in int_chat
+        ],
+    )
+    return int_chat, orig_chat, media_list
+def capture_media_from_exec(execution: Execution) -> List[str]:
+    images = []
+    for result in execution.results:
+        for format in result.formats():
+            if format in ["png", "jpeg"]:
+                # converts the image to png and then to base64
+                images.append(
+                    "data:image/png;base64,"
+                    + convert_to_b64(b64_to_pil(result[format]))
+                )
+    return images
+def strip_function_calls(  # noqa: C901
+    code: str, exclusions: Optional[List[str]] = None
+) -> str:
+    """This will strip out all code that calls functions except for functions included
+    in exclusions.
+    """
+    if exclusions is None:
+        exclusions = []
+    def check_and_remove_node(node: cst.CSTNode, exclusions: List[str]) -> cst.CSTNode:
+        if hasattr(node, "value") and isinstance(node.value, cst.Call):
+            if (
+                isinstance(node.value.func, cst.Name)
+                and node.value.func.value in exclusions
+            ):
+                return node
+            return cst.RemoveFromParent()  # type: ignore
+        return node
+    class StripFunctionCallsTransformer(cst.CSTTransformer):
+        def __init__(self, exclusions: List[str]):
+            # Store exclusions to skip removing certain function calls
+            self.exclusions = exclusions
+            self.in_function_or_class = False
+        def visit_FunctionDef(self, node: cst.FunctionDef) -> Optional[bool]:
+            self.in_function_or_class = True
+            return True
+        def leave_FunctionDef(
+            self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef
+        ) -> cst.BaseStatement:
+            self.in_function_or_class = False
+            return updated_node
+        def visit_ClassDef(self, node: cst.ClassDef) -> Optional[bool]:
+            self.in_function_or_class = True
+            return True
+        def leave_ClassDef(
+            self, node: cst.ClassDef, updated_node: cst.ClassDef
+        ) -> cst.BaseStatement:
+            self.in_function_or_class = False
+            return updated_node
+        def leave_Expr(
+            self, original_node: cst.Expr, updated_node: cst.Expr
+        ) -> cst.Expr:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.Expr, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+        def leave_Assign(
+            self, original_node: cst.Assign, updated_node: cst.Assign
+        ) -> cst.Assign:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.Assign, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+        def leave_If(self, original_node: cst.If, updated_node: cst.If) -> cst.If:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.If, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+        def leave_For(self, original_node: cst.For, updated_node: cst.For) -> cst.For:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.For, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+        def leave_While(
+            self, original_node: cst.While, updated_node: cst.While
+        ) -> cst.While:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.While, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+        def leave_With(
+            self, original_node: cst.With, updated_node: cst.With
+        ) -> cst.With:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.With, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+        def leave_Try(self, original_node: cst.Try, updated_node: cst.Try) -> cst.Try:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.Try, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+    tree = cst.parse_module(code)
+    transformer = StripFunctionCallsTransformer(exclusions)
+    modified_tree = tree.visit(transformer)
+    return modified_tree.code

vision_agent/agent/vision_agent_coder.py CHANGED Viewed

@@ -5,7 +5,6 @@ import sys
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast
-import libcst as cst
 from tabulate import tabulate
 import vision_agent.tools as T
@@ -15,9 +14,10 @@ from vision_agent.agent.agent_utils import (
     DefaultImports,
     extract_code,
     extract_tag,
-    format_memory,
+    format_feedback,
     print_code,
     remove_installs_from_code,
+    strip_function_calls,
 )
 from vision_agent.agent.vision_agent_coder_prompts import (
     CODE,
@@ -49,114 +49,6 @@ WORKSPACE = Path(os.getenv("WORKSPACE", ""))
 _LOGGER = logging.getLogger(__name__)
-def strip_function_calls(  # noqa: C901
-    code: str, exclusions: Optional[List[str]] = None
-) -> str:
-    """This will strip out all code that calls functions except for functions included
-    in exclusions.
-    """
-    if exclusions is None:
-        exclusions = []
-    def check_and_remove_node(node: cst.CSTNode, exclusions: List[str]) -> cst.CSTNode:
-        if hasattr(node, "value") and isinstance(node.value, cst.Call):
-            if (
-                isinstance(node.value.func, cst.Name)
-                and node.value.func.value in exclusions
-            ):
-                return node
-            return cst.RemoveFromParent()  # type: ignore
-        return node
-    class StripFunctionCallsTransformer(cst.CSTTransformer):
-        def __init__(self, exclusions: List[str]):
-            # Store exclusions to skip removing certain function calls
-            self.exclusions = exclusions
-            self.in_function_or_class = False
-        def visit_FunctionDef(self, node: cst.FunctionDef) -> Optional[bool]:
-            self.in_function_or_class = True
-            return True
-        def leave_FunctionDef(
-            self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef
-        ) -> cst.BaseStatement:
-            self.in_function_or_class = False
-            return updated_node
-        def visit_ClassDef(self, node: cst.ClassDef) -> Optional[bool]:
-            self.in_function_or_class = True
-            return True
-        def leave_ClassDef(
-            self, node: cst.ClassDef, updated_node: cst.ClassDef
-        ) -> cst.BaseStatement:
-            self.in_function_or_class = False
-            return updated_node
-        def leave_Expr(
-            self, original_node: cst.Expr, updated_node: cst.Expr
-        ) -> cst.Expr:
-            if not self.in_function_or_class:
-                return cast(
-                    cst.Expr, check_and_remove_node(updated_node, self.exclusions)
-                )
-            return updated_node
-        def leave_Assign(
-            self, original_node: cst.Assign, updated_node: cst.Assign
-        ) -> cst.Assign:
-            if not self.in_function_or_class:
-                return cast(
-                    cst.Assign, check_and_remove_node(updated_node, self.exclusions)
-                )
-            return updated_node
-        def leave_If(self, original_node: cst.If, updated_node: cst.If) -> cst.If:
-            if not self.in_function_or_class:
-                return cast(
-                    cst.If, check_and_remove_node(updated_node, self.exclusions)
-                )
-            return updated_node
-        def leave_For(self, original_node: cst.For, updated_node: cst.For) -> cst.For:
-            if not self.in_function_or_class:
-                return cast(
-                    cst.For, check_and_remove_node(updated_node, self.exclusions)
-                )
-            return updated_node
-        def leave_While(
-            self, original_node: cst.While, updated_node: cst.While
-        ) -> cst.While:
-            if not self.in_function_or_class:
-                return cast(
-                    cst.While, check_and_remove_node(updated_node, self.exclusions)
-                )
-            return updated_node
-        def leave_With(
-            self, original_node: cst.With, updated_node: cst.With
-        ) -> cst.With:
-            if not self.in_function_or_class:
-                return cast(
-                    cst.With, check_and_remove_node(updated_node, self.exclusions)
-                )
-            return updated_node
-        def leave_Try(self, original_node: cst.Try, updated_node: cst.Try) -> cst.Try:
-            if not self.in_function_or_class:
-                return cast(
-                    cst.Try, check_and_remove_node(updated_node, self.exclusions)
-                )
-            return updated_node
-    tree = cst.parse_module(code)
-    transformer = StripFunctionCallsTransformer(exclusions)
-    modified_tree = tree.visit(transformer)
-    return modified_tree.code
 def write_code(
     coder: LMM,
     chat: List[Message],
@@ -237,11 +129,11 @@ def write_and_test_code(
         tool_info,
         tool_output,
         plan_thoughts,
-        format_memory(working_memory),
+        format_feedback(working_memory),
     )
     code = strip_function_calls(code)
     test = write_test(
-        tester, chat, tool_utils, code, format_memory(working_memory), media
+        tester, chat, tool_utils, code, format_feedback(working_memory), media
     )
     log_progress(
@@ -350,7 +242,7 @@ def debug_code(
                     result="\n".join(
                         result.text(include_results=False).splitlines()[-50:]
                     ),
-                    feedback=format_memory(working_memory + new_working_memory),
+                    feedback=format_feedback(working_memory + new_working_memory),
                 ),
                 stream=False,
             )

vision_agent/agent/vision_agent_coder_prompts_v2.py ADDED Viewed

@@ -0,0 +1,119 @@
+FEEDBACK = """
+## This contains code and feedback from previous runs and is used for providing context so you do not make the same mistake again.
+{feedback}
+"""
+CODE = """
+**Role**: You are an expoert software programmer.
+**Task**: You are given a plan by a planning agent that solves a vision problem posed by the user. You are also given code snippets that the planning agent used to solve the task. Your job is to organize the code so that it can be easily called by the user to solve the task.
+**Documentation**:
+This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools import *`.
+{docstring}
+**User Instructions**:
+{question}
+**Plan**:
+--- START PLAN ---
+{plan}
+--- END PLAN ---
+**Instructions**:
+1. Reread the plan and all code and understand the task.
+2. Organize the code snippets into a single function that can be called by the user.
+3. DO NOT alter the code logic and ensure you utilize all the code provided as is without changing it.
+4. DO NOT create dummy input or functions, the code must be usable if the user provides new media.
+5. DO NOT hardcode the output, the function must work for any media provided by the user.
+6. Ensure the function is well-documented and follows the best practices and returns the expected output from the user.
+7. Output your code using <code> tags:
+<code>
+# your code here
+</code>
+"""
+TEST = """
+**Role**: As a tester, your task is to create a simple test case for the provided code. This test case should verify the fundamental functionality under normal conditions.
+**Documentation**:
+This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools import *`. You do not need to test these functions, only the code provided by the user.
+{docstring}
+**User Instructions**:
+{question}
+**Input Code Snippet**:
+<code>
+### Please decide how would you want to generate test cases. Based on incomplete code or completed version.
+{code}
+</code>
+**Instructions**:
+1. Verify the fundamental functionality under normal conditions.
+2. Ensure each test case is well-documented with comments explaining the scenario it covers.
+3. Your test case MUST run only on the given images which are {media}
+4. Your test case MUST run only with the given values which is available in the question - {question}
+5. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
+6. DO NOT mock any functions, you must test their functionality as is.
+7. DO NOT assert the output value, run the code and assert only the output format or data structure.
+8. DO NOT use try except block to handle the error, let the error be raised if the code is incorrect.
+9. DO NOT import the testing function as it will available in the testing environment.
+10. Print the output of the function that is being tested.
+11. Use the output of the function that is being tested as the return value of the testing function.
+12. Run the testing function in the end and don't assign a variable to its output.
+13. Output your test code using <code> tags:
+<code>
+# your test code here
+</code>
+"""
+FIX_BUG = """
+**Role**: As a coder, your job is to find the error in the code and fix it. You are running in a notebook setting so you can run !pip install to install missing packages.
+**Task**: A previous agent has written some code and some testing code according to a plan given to it. It has introduced a bug into it's code while trying to implement the plan. You are given the plan, code, test code and error. Your job is to fix the error in the code or test code.
+**Documentation**:
+This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools import *`.
+{docstring}
+**Plan**:
+--- START PLAN ---
+{plan}
+--- END PLAN ---
+**Instructions**:
+Please re-complete the code to fix the error message. Here is the current version of the CODE:
+<code>
+{code}
+</code>
+When we run the TEST code:
+<test>
+{tests}
+</test>
+It raises this error, if the error is empty it means the code and tests were not run:
+<error>
+{result}
+</error>
+This is from your previous attempt to fix the bug, if it is empty no previous attempt has been made:
+{debug}
+Please fix the bug by correcting the error. ONLY change the code logic if it is necessary to fix the bug. Do not change the code logic for any other reason. Output your fixed code using <code> tags and fixed test using <test> tags:
+<thoughts>Your thoughts here...</thoughts>
+<code># your fixed code here</code>
+<test># your fixed test here</test>
+"""

vision-agent 0.2.193__py3-none-any.whl → 0.2.196__py3-none-any.whl

vision-agent 0.2.193py3-none-any.whl → 0.2.196py3-none-any.whl