PyPI - vision-agent - Versions diffs - 0.2.160__tar.gz → 0.2.162__tar.gz - Mend

vision-agent 0.2.160tar.gz → 0.2.162tar.gz

Files changed (35) hide show

{vision_agent-0.2.160 → vision_agent-0.2.162}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.160
+Version: 0.2.162
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -142,7 +142,7 @@ continuing, for example it may want to execute code and look at the output befor
 letting the user respond.
 ### Chatting and Artifacts
-If you run `chat_with_code` you will also notice an `Artifact` object. `Artifact`'s
+If you run `chat_with_artifacts` you will also notice an `Artifact` object. `Artifact`'s
 are a way to sync files between local and remote environments. The agent will read and
 write to the artifact object, which is just a pickle object, when it wants to save or
 load files.
@@ -159,7 +159,7 @@ with open("image.png", "rb") as f:
     artifacts["image.png"] = f.read()
 agent = va.agent.VisionAgent()
-response, artifacts = agent.chat_with_code(
+response, artifacts = agent.chat_with_artifacts(
     [
         {
             "role": "user",
@@ -339,11 +339,11 @@ mode by passing in the verbose argument:
 ```
 ### Detailed Usage
-You can also have it return more information by calling `chat_with_workflow`. The format
+You can also have it return more information by calling `generate_code`. The format
 of the input is a list of dictionaries with the keys `role`, `content`, and `media`:
 ```python
->>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
+>>> results = agent.generate_code([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
 >>> print(results)
 {
     "code": "from vision_agent.tools import ..."
@@ -372,7 +372,7 @@ conv = [
         "media": ["workers.png"],
     }
 ]
-result = agent.chat_with_workflow(conv)
+result = agent.generate_code(conv)
 code = result["code"]
 conv.append({"role": "assistant", "content": code})
 conv.append(
@@ -381,7 +381,7 @@ conv.append(
         "content": "Can you also return the number of workers wearing safety gear?",
     }
 )
-result = agent.chat_with_workflow(conv)
+result = agent.generate_code(conv)
 ```

{vision_agent-0.2.160 → vision_agent-0.2.162}/README.md RENAMED Viewed

@@ -101,7 +101,7 @@ continuing, for example it may want to execute code and look at the output befor
 letting the user respond.
 ### Chatting and Artifacts
-If you run `chat_with_code` you will also notice an `Artifact` object. `Artifact`'s
+If you run `chat_with_artifacts` you will also notice an `Artifact` object. `Artifact`'s
 are a way to sync files between local and remote environments. The agent will read and
 write to the artifact object, which is just a pickle object, when it wants to save or
 load files.
@@ -118,7 +118,7 @@ with open("image.png", "rb") as f:
     artifacts["image.png"] = f.read()
 agent = va.agent.VisionAgent()
-response, artifacts = agent.chat_with_code(
+response, artifacts = agent.chat_with_artifacts(
     [
         {
             "role": "user",
@@ -298,11 +298,11 @@ mode by passing in the verbose argument:
 ```
 ### Detailed Usage
-You can also have it return more information by calling `chat_with_workflow`. The format
+You can also have it return more information by calling `generate_code`. The format
 of the input is a list of dictionaries with the keys `role`, `content`, and `media`:
 ```python
->>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
+>>> results = agent.generate_code([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
 >>> print(results)
 {
     "code": "from vision_agent.tools import ..."
@@ -331,7 +331,7 @@ conv = [
         "media": ["workers.png"],
     }
 ]
-result = agent.chat_with_workflow(conv)
+result = agent.generate_code(conv)
 code = result["code"]
 conv.append({"role": "assistant", "content": code})
 conv.append(
@@ -340,7 +340,7 @@ conv.append(
         "content": "Can you also return the number of workers wearing safety gear?",
     }
 )
-result = agent.chat_with_workflow(conv)
+result = agent.generate_code(conv)
 ```

{vision_agent-0.2.160 → vision_agent-0.2.162}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.160"
+version = "0.2.162"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"

{vision_agent-0.2.160 → vision_agent-0.2.162}/vision_agent/agent/__init__.py RENAMED Viewed

@@ -7,3 +7,11 @@ from .vision_agent_coder import (
     OpenAIVisionAgentCoder,
     VisionAgentCoder,
 )
+from .vision_agent_planner import (
+    AnthropicVisionAgentPlanner,
+    AzureVisionAgentPlanner,
+    OllamaVisionAgentPlanner,
+    OpenAIVisionAgentPlanner,
+    PlanContext,
+    VisionAgentPlanner,
+)

{vision_agent-0.2.160 → vision_agent-0.2.162}/vision_agent/agent/agent_utils.py RENAMED Viewed

@@ -2,10 +2,17 @@ import json
 import logging
 import re
 import sys
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
+from rich.console import Console
+from rich.style import Style
+from rich.syntax import Syntax
+import vision_agent.tools as T
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
+_CONSOLE = Console()
 def _extract_sub_json(json_str: str) -> Optional[Dict[str, Any]]:
@@ -41,11 +48,16 @@ def _strip_markdown_code(inp_str: str) -> str:
 def extract_json(json_str: str) -> Dict[str, Any]:
     json_str_mod = json_str.replace("\n", " ").strip()
-    json_str_mod = json_str_mod.replace("'", '"')
     json_str_mod = json_str_mod.replace(": True", ": true").replace(
         ": False", ": false"
     )
+    # sometimes the json is in single quotes
+    try:
+        return json.loads(json_str_mod.replace("'", '"'))  # type: ignore
+    except json.JSONDecodeError:
+        pass
     try:
         return json.loads(json_str_mod)  # type: ignore
     except json.JSONDecodeError:
@@ -83,3 +95,65 @@ def remove_installs_from_code(code: str) -> str:
     pattern = r"\n!pip install.*?(\n|\Z)\n"
     code = re.sub(pattern, "", code, flags=re.DOTALL)
     return code
+def format_memory(memory: List[Dict[str, str]]) -> str:
+    output_str = ""
+    for i, m in enumerate(memory):
+        output_str += f"### Feedback {i}:\n"
+        output_str += f"Code {i}:\n```python\n{m['code']}```\n\n"
+        output_str += f"Feedback {i}: {m['feedback']}\n\n"
+        if "edits" in m:
+            output_str += f"Edits {i}:\n{m['edits']}\n"
+        output_str += "\n"
+    return output_str
+def format_plans(plans: Dict[str, Any]) -> str:
+    plan_str = ""
+    for k, v in plans.items():
+        plan_str += "\n" + f"{k}: {v['thoughts']}\n"
+        plan_str += "    -" + "\n    -".join([e for e in v["instructions"]])
+    return plan_str
+class DefaultImports:
+    """Container for default imports used in the code execution."""
+    common_imports = [
+        "import os",
+        "import numpy as np",
+        "from vision_agent.tools import *",
+        "from typing import *",
+        "from pillow_heif import register_heif_opener",
+        "register_heif_opener()",
+    ]
+    @staticmethod
+    def to_code_string() -> str:
+        return "\n".join(DefaultImports.common_imports + T.__new_tools__)
+    @staticmethod
+    def prepend_imports(code: str) -> str:
+        """Run this method to prepend the default imports to the code.
+        NOTE: be sure to run this method after the custom tools have been registered.
+        """
+        return DefaultImports.to_code_string() + "\n\n" + code
+def print_code(title: str, code: str, test: Optional[str] = None) -> None:
+    _CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
+    _CONSOLE.print("=" * 30 + " Code " + "=" * 30)
+    _CONSOLE.print(
+        Syntax(
+            DefaultImports.prepend_imports(code),
+            "python",
+            theme="gruvbox-dark",
+            line_numbers=True,
+        )
+    )
+    if test:
+        _CONSOLE.print("=" * 30 + " Test " + "=" * 30)
+        _CONSOLE.print(Syntax(test, "python", theme="gruvbox-dark", line_numbers=True))

{vision_agent-0.2.160 → vision_agent-0.2.162}/vision_agent/agent/vision_agent.py RENAMED Viewed

@@ -14,8 +14,8 @@ from vision_agent.agent.vision_agent_prompts import (
     VA_CODE,
 )
 from vision_agent.lmm import LMM, AnthropicLMM, Message, OpenAILMM
-from vision_agent.tools import META_TOOL_DOCSTRING
 from vision_agent.tools.meta_tools import (
+    META_TOOL_DOCSTRING,
     Artifacts,
     check_and_load_image,
     use_extra_vision_agent_args,
@@ -195,8 +195,8 @@ class VisionAgent(Agent):
         agent: Optional[LMM] = None,
         verbosity: int = 0,
         local_artifacts_path: Optional[Union[str, Path]] = None,
-        code_sandbox_runtime: Optional[str] = None,
         callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
+        code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
         """Initialize the VisionAgent.
@@ -206,13 +206,18 @@ class VisionAgent(Agent):
             verbosity (int): The verbosity level of the agent.
             local_artifacts_path (Optional[Union[str, Path]]): The path to the local
                 artifacts file.
-            code_sandbox_runtime (Optional[str]): The code sandbox runtime to use.
+            callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback
+                function to send intermediate update messages.
+            code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values
+                it can be one of: None, "local" or "e2b". If None, it will read from
+                the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter
+                object is provided it will use that.
         """
         self.agent = AnthropicLMM(temperature=0.0) if agent is None else agent
         self.max_iterations = 12
         self.verbosity = verbosity
-        self.code_sandbox_runtime = code_sandbox_runtime
+        self.code_interpreter = code_interpreter
         self.callback_message = callback_message
         if self.verbosity >= 1:
             _LOGGER.setLevel(logging.INFO)
@@ -230,7 +235,7 @@ class VisionAgent(Agent):
         input: Union[str, List[Message]],
         media: Optional[Union[str, Path]] = None,
         artifacts: Optional[Artifacts] = None,
-    ) -> List[Message]:
+    ) -> str:
         """Chat with VisionAgent and get the conversation response.
         Parameters:
@@ -247,10 +252,28 @@ class VisionAgent(Agent):
             input = [{"role": "user", "content": input}]
             if media is not None:
                 input[0]["media"] = [media]
-        results, _ = self.chat_with_code(input, artifacts)
-        return results
+        results, _ = self.chat_with_artifacts(input, artifacts)
+        return results[-1]["content"]  # type: ignore
-    def chat_with_code(
+    def chat(
+        self,
+        chat: List[Message],
+    ) -> List[Message]:
+        """Chat with VisionAgent, it will use code to execute actions to accomplish
+        its tasks.
+        Parameters:
+            chat (List[Message]): A conversation in the format of:
+                [{"role": "user", "content": "describe your task here..."}]
+                or if it contains media files, it should be in the format of:
+                [{"role": "user", "content": "describe your task here...", "media": ["image1.jpg", "image2.jpg"]}]
+        Returns:
+            List[Message]: The conversation response.
+        """
+        return self.chat_with_artifacts(chat)[0]
+    def chat_with_artifacts(
         self,
         chat: List[Message],
         artifacts: Optional[Artifacts] = None,
@@ -284,9 +307,16 @@ class VisionAgent(Agent):
             # this is setting remote artifacts path
             artifacts = Artifacts(WORKSPACE / "artifacts.pkl")
-        with CodeInterpreterFactory.new_instance(
-            code_sandbox_runtime=self.code_sandbox_runtime,
-        ) as code_interpreter:
+        # NOTE: each chat should have a dedicated code interpreter instance to avoid concurrency issues
+        code_interpreter = (
+            self.code_interpreter
+            if self.code_interpreter is not None
+            and not isinstance(self.code_interpreter, str)
+            else CodeInterpreterFactory.new_instance(
+                code_sandbox_runtime=self.code_interpreter,
+            )
+        )
+        with code_interpreter:
             orig_chat = copy.deepcopy(chat)
             int_chat = copy.deepcopy(chat)
             last_user_message = chat[-1]
@@ -472,8 +502,8 @@ class OpenAIVisionAgent(VisionAgent):
         agent: Optional[LMM] = None,
         verbosity: int = 0,
         local_artifacts_path: Optional[Union[str, Path]] = None,
-        code_sandbox_runtime: Optional[str] = None,
         callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
+        code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
         """Initialize the VisionAgent using OpenAI LMMs.
@@ -483,7 +513,12 @@ class OpenAIVisionAgent(VisionAgent):
             verbosity (int): The verbosity level of the agent.
             local_artifacts_path (Optional[Union[str, Path]]): The path to the local
                 artifacts file.
-            code_sandbox_runtime (Optional[str]): The code sandbox runtime to use.
+            callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback
+                function to send intermediate update messages.
+            code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values
+                it can be one of: None, "local" or "e2b". If None, it will read from
+                the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter
+                object is provided it will use that.
         """
         agent = OpenAILMM(temperature=0.0, json_mode=True) if agent is None else agent
@@ -491,8 +526,8 @@ class OpenAIVisionAgent(VisionAgent):
             agent,
             verbosity,
             local_artifacts_path,
-            code_sandbox_runtime,
             callback_message,
+            code_interpreter,
         )
@@ -502,8 +537,8 @@ class AnthropicVisionAgent(VisionAgent):
         agent: Optional[LMM] = None,
         verbosity: int = 0,
         local_artifacts_path: Optional[Union[str, Path]] = None,
-        code_sandbox_runtime: Optional[str] = None,
         callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
+        code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
         """Initialize the VisionAgent using Anthropic LMMs.
@@ -513,7 +548,12 @@ class AnthropicVisionAgent(VisionAgent):
             verbosity (int): The verbosity level of the agent.
             local_artifacts_path (Optional[Union[str, Path]]): The path to the local
                 artifacts file.
-            code_sandbox_runtime (Optional[str]): The code sandbox runtime to use.
+            callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback
+                function to send intermediate update messages.
+            code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values
+                it can be one of: None, "local" or "e2b". If None, it will read from
+                the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter
+                object is provided it will use that.
         """
         agent = AnthropicLMM(temperature=0.0) if agent is None else agent
@@ -521,6 +561,6 @@ class AnthropicVisionAgent(VisionAgent):
             agent,
             verbosity,
             local_artifacts_path,
-            code_sandbox_runtime,
             callback_message,
+            code_interpreter,
         )

vision-agent 0.2.160__tar.gz → 0.2.162__tar.gz

vision-agent 0.2.160tar.gz → 0.2.162tar.gz