PyPI - vision-agent - Versions diffs - 0.2.23__py3-none-any.whl → 0.2.25__py3-none-any.whl - Mend

vision-agent 0.2.23py3-none-any.whl → 0.2.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

vision_agent/agent/agent_coder.py +19 -5
vision_agent/agent/vision_agent_v2.py +9 -2
vision_agent/agent/vision_agent_v2_prompt.py +4 -3
vision_agent/llm/llm.py +7 -4
vision_agent/tools/tool_utils.py +1 -1
vision_agent/tools/tools.py +1 -1
vision_agent/tools/tools_v2.py +199 -10
vision_agent/utils/image_utils.py +7 -2
vision_agent/utils/sim.py +6 -1
vision_agent/utils/type_defs.py +1 -1
{vision_agent-0.2.23.dist-info → vision_agent-0.2.25.dist-info}/METADATA +4 -2
{vision_agent-0.2.23.dist-info → vision_agent-0.2.25.dist-info}/RECORD +14 -14
{vision_agent-0.2.23.dist-info → vision_agent-0.2.25.dist-info}/LICENSE +0 -0
{vision_agent-0.2.23.dist-info → vision_agent-0.2.25.dist-info}/WHEEL +0 -0

vision_agent/agent/agent_coder.py CHANGED Viewed

@@ -5,6 +5,9 @@ import sys
 from pathlib import Path
 from typing import Dict, List, Optional, Union
+from rich.console import Console
+from rich.syntax import Syntax
 from vision_agent.agent import Agent
 from vision_agent.agent.agent_coder_prompts import (
     DEBUG,
@@ -40,6 +43,7 @@ from vision_agent.tools.tools_v2 import *
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
 _EXECUTE = Execute()
+_CONSOLE = Console()
 def write_tests(question: str, code: str, model: LLM) -> str:
@@ -103,7 +107,7 @@ def run_visual_tests(
 def fix_bugs(code: str, tests: str, result: str, feedback: str, model: LLM) -> str:
-    prompt = FIX_BUG.format(completion=code, test_case=tests, result=result)
+    prompt = FIX_BUG.format(code=code, tests=tests, result=result, feedback=feedback)
     completion = model(prompt)
     return preprocess_data(completion)
@@ -139,7 +143,8 @@ class AgentCoder(Agent):
             else visual_tester_agent
         )
         self.max_turns = 3
-        if verbose:
+        self.verbose = verbose
+        if self.verbose:
             _LOGGER.setLevel(logging.INFO)
     def __call__(
@@ -164,9 +169,15 @@ class AgentCoder(Agent):
         feedback = ""
         for _ in range(self.max_turns):
             code = write_program(question, feedback, self.coder_agent)
-            _LOGGER.info(f"code:\n{code}")
+            if self.verbose:
+                _CONSOLE.print(
+                    Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
+                )
             debug = write_debug(question, code, feedback, self.tester_agent)
-            _LOGGER.info(f"debug:\n{debug}")
+            if self.verbose:
+                _CONSOLE.print(
+                    Syntax(debug, "python", theme="gruvbox-dark", line_numbers=True)
+                )
             results = execute_tests(code, debug)
             _LOGGER.info(
                 f"execution results: passed: {results['passed']}\n{results['result']}"
@@ -176,7 +187,10 @@ class AgentCoder(Agent):
                 code = fix_bugs(
                     code, debug, results["result"].strip(), feedback, self.coder_agent  # type: ignore
                 )
-                _LOGGER.info(f"fixed code:\n{code}")
+                if self.verbose:
+                    _CONSOLE.print(
+                        Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
+                    )
             else:
                 # TODO: Sometimes it prints nothing, so we need to handle that case
                 # TODO: The visual agent reflection does not work very well, needs more testing

vision_agent/agent/vision_agent_v2.py CHANGED Viewed

@@ -4,6 +4,7 @@ from pathlib import Path
 from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union
 import pandas as pd
+from langsmith import traceable
 from rich.console import Console
 from rich.syntax import Syntax
 from tabulate import tabulate
@@ -66,6 +67,7 @@ def extract_json(json_str: str) -> Dict[str, Any]:
     return json_dict  # type: ignore
+@traceable(name="planning")
 def write_plan(
     chat: List[Dict[str, str]],
     plan: Optional[List[Dict[str, Any]]],
@@ -214,6 +216,7 @@ def write_and_exec_code(
     return success, code, result, working_memory
+@traceable(name="plan execution")
 def run_plan(
     user_req: str,
     plan: List[Dict[str, Any]],
@@ -235,7 +238,7 @@ def run_plan(
             f"""
 {tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
         )
-        tools = tool_recommender.top_k(task["instruction"])
+        tools = tool_recommender.top_k(task["instruction"], thresh=0.3)
         tool_info = "\n".join([e["doc"] for e in tools])
         if verbosity == 2:
@@ -285,6 +288,7 @@ class VisionAgentV2(Agent):
     solve vision tasks. It is inspired by MetaGPT's Data Interpreter
     https://arxiv.org/abs/2402.18679. Vision Agent has several key features to help it
     generate code:
     - A planner to generate a plan of tasks to solve a user requirement. The planner
     can output code tasks or test tasks, where test tasks are used to verify the code.
     - Automatic debugging, if a task fails, the agent will attempt to debug the code
@@ -333,6 +337,7 @@ class VisionAgentV2(Agent):
         results = self.chat_with_workflow(input, image, plan)
         return results["code"]  # type: ignore
+    @traceable
     def chat_with_workflow(
         self,
         chat: List[Dict[str, str]],
@@ -377,7 +382,9 @@ class VisionAgentV2(Agent):
                 self.long_term_memory,
                 self.verbosity,
             )
-            success = all(task["success"] for task in plan)
+            success = all(
+                task["success"] if "success" in task else False for task in plan
+            )
             working_memory.update(working_memory_i)
             if not success:

vision_agent/agent/vision_agent_v2_prompt.py CHANGED Viewed

@@ -34,7 +34,7 @@ PLAN = """
 # Task:
 Based on the context and the tools you have available, write a plan of subtasks to achieve the user request that adhere to the following requirements:
-- For each subtask, you should provide a short instruction on what to do. Ensure the subtasks are large enough to be meaningful, encompassing multiple lines of code.
+- For each subtask, you should provide instructions on what to do. Write detailed subtasks, ensure they are large enough to be meaningful, encompassing multiple lines of code.
 - You do not need to have the agent rewrite any tool functionality you already have, you should instead instruct it to utilize one or more of those tools in each subtask.
 - You can have agents either write coding tasks, to code some functionality or testing tasks to test previous functionality.
 - If a current plan exists, examine each item in the plan to determine if it was successful. If there was an item that failed, i.e. 'success': False, then you should rewrite that item and all subsequent items to ensure that the rewritten plan is successful.
@@ -73,9 +73,10 @@ CODE = """
 {code}
 # Constraints
-- Write a function that accomplishes the 'User Requirement'. You are supplied code from a previous task under 'Previous Code', feel free to copy over that code into your own implementation if you need it.
-- Always prioritize using pre-defined tools or code for the same functionality from 'Tool Info for Current Subtask'. You have access to all these tools through the `from vision_agent.tools.tools_v2 import *` import.
+- Write a function that accomplishes the 'Current Subtask'. You are supplied code from a previous task under 'Previous Code', do not delete or change previous code unless it contains a bug or it is necessary to complete the 'Current Subtask'.
+- Always prioritize using pre-defined tools or code for the same functionality from 'Tool Info' when working on 'Current Subtask'. You have access to all these tools through the `from vision_agent.tools.tools_v2 import *` import.
 - You may recieve previous trials and errors under 'Previous Task', this is code, output and reflections from previous tasks. You can use these to avoid running in to the same issues when writing your code.
+- Use the `save_json` function from `vision_agent.tools.tools_v2` to save your output as a json file.
 - Write clean, readable, and well-documented code.
 # Output

vision_agent/llm/llm.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 from abc import ABC, abstractmethod
 from typing import Any, Callable, Dict, List, Mapping, Optional, Union, cast
+from langsmith.wrappers import wrap_openai
 from openai import AzureOpenAI, OpenAI
 from vision_agent.tools import (
@@ -41,9 +42,9 @@ class OpenAILLM(LLM):
         **kwargs: Any
     ):
         if not api_key:
-            self.client = OpenAI()
+            self.client = wrap_openai(OpenAI())
         else:
-            self.client = OpenAI(api_key=api_key)
+            self.client = wrap_openai(OpenAI(api_key=api_key))
         self.model_name = model_name
         self.system_prompt = system_prompt
@@ -165,8 +166,10 @@ class AzureOpenAILLM(OpenAILLM):
         if not azure_endpoint:
             raise ValueError("Azure OpenAI endpoint is required.")
-        self.client = AzureOpenAI(
-            api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
+        self.client = wrap_openai(
+            AzureOpenAI(
+                api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
+            )
         )
         self.model_name = model_name
         self.kwargs = kwargs

vision_agent/tools/tool_utils.py CHANGED Viewed

@@ -8,7 +8,7 @@ from vision_agent.utils.type_defs import LandingaiAPIKey
 _LOGGER = logging.getLogger(__name__)
 _LND_API_KEY = LandingaiAPIKey().api_key
-_LND_API_URL = "https://api.dev.landing.ai/v1/agent"
+_LND_API_URL = "https://api.staging.landing.ai/v1/agent"
 def _send_inference_request(

vision_agent/tools/tools.py CHANGED Viewed

@@ -53,7 +53,7 @@ class NoOp(Tool):
 class CLIP(Tool):
-    r"""CLIP is a tool that can classify or tag any image given a set if input classes
+    r"""CLIP is a tool that can classify or tag any image given a set of input classes
     or tags.
     Example

vision_agent/tools/tools_v2.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import inspect
 import io
+import json
 import logging
 import tempfile
 from importlib import resources
@@ -14,7 +15,14 @@ from scipy.spatial import distance  # type: ignore
 from vision_agent.tools.tool_utils import _send_inference_request
 from vision_agent.utils import extract_frames_from_video
-from vision_agent.utils.image_utils import convert_to_b64, normalize_bbox, rle_decode
+from vision_agent.utils.image_utils import (
+    convert_to_b64,
+    normalize_bbox,
+    rle_decode,
+    b64_to_pil,
+    get_image_size,
+    denormalize_bbox,
+)
 COLORS = [
     (158, 218, 229),
@@ -48,7 +56,7 @@ def grounding_dino(
     prompt: str,
     image: np.ndarray,
     box_threshold: float = 0.20,
-    iou_threshold: float = 0.75,
+    iou_threshold: float = 0.20,
 ) -> List[Dict[str, Any]]:
     """'grounding_dino' is a tool that can detect and count objects given a text prompt
     such as category names or referring expressions. It returns a list and count of
@@ -60,12 +68,13 @@ def grounding_dino(
         box_threshold (float, optional): The threshold for the box detection. Defaults
             to 0.20.
         iou_threshold (float, optional): The threshold for the Intersection over Union
-            (IoU). Defaults to 0.75.
+            (IoU). Defaults to 0.20.
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
         bounding box of the detected objects with normalized coordinates
-        (x1, y1, x2, y2).
+        (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the top-left and
+        xmax and ymax are the coordinates of the bottom-right of the bounding box.
     Example
     -------
@@ -76,7 +85,7 @@ def grounding_dino(
     ]
     """
     image_size = image.shape[:2]
-    image_b64 = convert_to_b64(Image.fromarray(image))
+    image_b64 = convert_to_b64(image)
     request_data = {
         "prompt": prompt,
         "image": image_b64,
@@ -100,7 +109,7 @@ def grounding_sam(
     prompt: str,
     image: np.ndarray,
     box_threshold: float = 0.20,
-    iou_threshold: float = 0.75,
+    iou_threshold: float = 0.20,
 ) -> List[Dict[str, Any]]:
     """'grounding_sam' is a tool that can detect and segment objects given a text
     prompt such as category names or referring expressions. It returns a list of
@@ -112,12 +121,15 @@ def grounding_sam(
         box_threshold (float, optional): The threshold for the box detection. Defaults
             to 0.20.
         iou_threshold (float, optional): The threshold for the Intersection over Union
-            (IoU). Defaults to 0.75.
+            (IoU). Defaults to 0.20.
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label,
         bounding box, and mask of the detected objects with normalized coordinates
-        (x1, y1, x2, y2).
+        (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the top-left and
+        xmax and ymax are the coordinates of the bottom-right of the bounding box.
+        The mask is binary 2D numpy array where 1 indicates the object and 0 indicates
+        the background.
     Example
     -------
@@ -136,7 +148,7 @@ def grounding_sam(
     ]
     """
     image_size = image.shape[:2]
-    image_b64 = convert_to_b64(Image.fromarray(image))
+    image_b64 = convert_to_b64(image)
     request_data = {
         "prompt": prompt,
         "image": image_b64,
@@ -234,6 +246,152 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
     return output
+def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
+    """'zero_shot_counting' is a tool that counts the dominant foreground object given an image and no other information about the content.
+    It returns only the count of the objects in the image.
+    Parameters:
+        image (np.ndarray): The image that contains lot of instances of a single object
+    Returns:
+        Dict[str, Any]: A dictionary containing the key 'count' and the count as a value. E.g. {count: 12}.
+    Example
+    -------
+    >>> zero_shot_counting(image)
+    {'count': 45},
+    """
+    image_b64 = convert_to_b64(image)
+    data = {
+        "image": image_b64,
+        "tool": "zero_shot_counting",
+    }
+    resp_data = _send_inference_request(data, "tools")
+    resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
+    return resp_data
+def visual_prompt_counting(
+    image: np.ndarray, visual_prompt: Dict[str, List[float]]
+) -> Dict[str, Any]:
+    """'visual_prompt_counting' is a tool that counts the dominant foreground object given an image and a visual prompt which is a bounding box describing the object.
+    It returns only the count of the objects in the image.
+    Parameters:
+        image (np.ndarray): The image that contains lot of instances of a single object
+    Returns:
+        Dict[str, Any]: A dictionary containing the key 'count' and the count as a value. E.g. {count: 12}.
+    Example
+    -------
+    >>> visual_prompt_counting(image, {"bbox": [0.1, 0.1, 0.4, 0.42]})
+    {'count': 45},
+    """
+    image_size = get_image_size(image)
+    bbox = visual_prompt["bbox"]
+    bbox_str = ", ".join(map(str, denormalize_bbox(bbox, image_size)))
+    image_b64 = convert_to_b64(image)
+    data = {
+        "image": image_b64,
+        "prompt": bbox_str,
+        "tool": "few_shot_counting",
+    }
+    resp_data = _send_inference_request(data, "tools")
+    resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
+    return resp_data
+def image_question_answering(image: np.ndarray, prompt: str) -> str:
+    """'image_question_answering_' is a tool that can answer questions about the visual contents of an image given a question and an image.
+    It returns an answer to the question
+    Parameters:
+        image (np.ndarray): The reference image used for the question
+        prompt (str): The question about the image
+    Returns:
+        str: A string which is the answer to the given prompt. E.g. {'text': 'This image contains a cat sitting on a table with a bowl of milk.'}.
+    Example
+    -------
+    >>> image_question_answering(image, 'What is the cat doing ?')
+    'drinking milk'
+    """
+    image_b64 = convert_to_b64(image)
+    data = {
+        "image": image_b64,
+        "prompt": prompt,
+        "tool": "image_question_answering",
+    }
+    answer = _send_inference_request(data, "tools")
+    return answer["text"][0]  # type: ignore
+def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
+    """'clip' is a tool that can classify an image given a list of input classes or tags.
+    It returns the same list of the input classes along with their probability scores based on image content.
+    Parameters:
+        image (np.ndarray): The image to classify or tag
+        classes (List[str]): The list of classes or tags that is associated with the image
+    Returns:
+        Dict[str, Any]: A dictionary containing the labels and scores. One dictionary contains a list of given labels and other a list of scores.
+    Example
+    -------
+    >>> clip(image, ['dog', 'cat', 'bird'])
+    {"labels": ["dog", "cat", "bird"], "scores": [0.68, 0.30, 0.02]},
+    """
+    image_b64 = convert_to_b64(image)
+    data = {
+        "prompt": ",".join(classes),
+        "image": image_b64,
+        "tool": "closed_set_image_classification",
+    }
+    resp_data = _send_inference_request(data, "tools")
+    resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
+    return resp_data
+def image_caption(image: np.ndarray) -> str:
+    """'image_caption' is a tool that can caption an image based on its contents.
+    It returns a text describing the image.
+    Parameters:
+        image (np.ndarray): The image to caption
+    Returns:
+       str: A string which is the caption for the given image.
+    Example
+    -------
+    >>> image_caption(image)
+    'This image contains a cat sitting on a table with a bowl of milk.'
+    """
+    image_b64 = convert_to_b64(image)
+    data = {
+        "image": image_b64,
+        "tool": "image_captioning",
+    }
+    answer = _send_inference_request(data, "tools")
+    return answer["text"][0]  # type: ignore
 def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
     """'closest_mask_distance' calculates the closest distance between two masks.
@@ -285,6 +443,31 @@ def closest_box_distance(box1: List[float], box2: List[float]) -> float:
 # Utility and visualization functions
+def save_json(data: Any, file_path: str) -> None:
+    """'save_json' is a utility function that saves data as a JSON file. It is helpful
+    for saving data that contains NumPy arrays which are not JSON serializable.
+    Parameters:
+        data (Any): The data to save.
+        file_path (str): The path to save the JSON file.
+    Example
+    -------
+    >>> save_json(data, "path/to/file.json")
+    """
+    class NumpyEncoder(json.JSONEncoder):
+        def default(self, obj: Any):  # type: ignore
+            if isinstance(obj, np.ndarray):
+                return obj.tolist()
+            elif isinstance(obj, np.bool_):
+                return bool(obj)
+            return json.JSONEncoder.default(self, obj)
+    with open(file_path, "w") as f:
+        json.dump(data, f, cls=NumpyEncoder)
 def load_image(image_path: str) -> np.ndarray:
     """'load_image' is a utility function that loads an image from the given path.
@@ -478,8 +661,14 @@ TOOLS = [
     grounding_sam,
     extract_frames,
     ocr,
+    clip,
+    zero_shot_counting,
+    visual_prompt_counting,
+    image_question_answering,
+    image_caption,
     closest_mask_distance,
     closest_box_distance,
+    save_json,
     load_image,
     save_image,
     overlay_bounding_boxes,
@@ -489,5 +678,5 @@ TOOLS_DF = get_tools_df(TOOLS)  # type: ignore
 TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS)  # type: ignore
 TOOL_DOCSTRING = get_tool_documentation(TOOLS)  # type: ignore
 UTILITIES_DOCSTRING = get_tool_documentation(
-    [load_image, save_image, overlay_bounding_boxes]
+    [save_json, load_image, save_image, overlay_bounding_boxes]
 )

vision_agent/utils/image_utils.py CHANGED Viewed

@@ -104,15 +104,20 @@ def convert_to_b64(data: Union[str, Path, np.ndarray, ImageType]) -> str:
     """
     if data is None:
         raise ValueError(f"Invalid input image: {data}. Input image can't be None.")
     if isinstance(data, (str, Path)):
         data = Image.open(data)
+    elif isinstance(data, np.ndarray):
+        data = Image.fromarray(data)
     if isinstance(data, Image.Image):
         buffer = BytesIO()
         data.convert("RGB").save(buffer, format="PNG")
         return base64.b64encode(buffer.getvalue()).decode("utf-8")
     else:
-        arr_bytes = data.tobytes()
-        return base64.b64encode(arr_bytes).decode("utf-8")
+        raise ValueError(
+            f"Invalid input image: {data}. Input image must be a PIL Image or a numpy array."
+        )
 def denormalize_bbox(

vision_agent/utils/sim.py CHANGED Viewed

@@ -56,12 +56,15 @@ class Sim:
         df = df.drop("embs", axis=1)
         df.to_csv(sim_file / "df.csv", index=False)
-    def top_k(self, query: str, k: int = 5) -> Sequence[Dict]:
+    def top_k(
+        self, query: str, k: int = 5, thresh: Optional[float] = None
+    ) -> Sequence[Dict]:
         """Returns the top k most similar items to the query.
         Parameters:
             query: str: The query to compare to.
             k: int: The number of items to return.
+            thresh: Optional[float]: The minimum similarity threshold.
         Returns:
             Sequence[Dict]: The top k most similar items.
@@ -70,6 +73,8 @@ class Sim:
         embedding = get_embedding(self.client, query, model=self.model)
         self.df["sim"] = self.df.embs.apply(lambda x: 1 - cosine(x, embedding))
         res = self.df.sort_values("sim", ascending=False).head(k)
+        if thresh is not None:
+            res = res[res.sim > thresh]
         return res[[c for c in res.columns if c != "embs"]].to_dict(orient="records")

vision_agent/utils/type_defs.py CHANGED Viewed

@@ -12,7 +12,7 @@ class LandingaiAPIKey(BaseSettings):
     """
     api_key: str = Field(
-        default="land_sk_hw34v3tyEc35OAhP8F7hnGnrDv2C8hD2ycMyq0aMkVS1H40D22",
+        default="land_sk_IJrojHarPXRjqDj1Fng76mX7yCbzVm1s5rZYxaNXu5v0cNLn0w",
         alias="LANDINGAI_API_KEY",
         description="The API key of LandingAI.",
     )

{vision_agent-0.2.23.dist-info → vision_agent-0.2.25.dist-info}/METADATA RENAMED Viewed

@@ -1,14 +1,16 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.23
+Version: 0.2.25
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
-Requires-Python: >=3.9
+Requires-Python: >=3.9,<4.0
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
+Requires-Dist: langsmith (>=0.1.58,<0.2.0)
 Requires-Dist: moviepy (>=1.0.0,<2.0.0)
 Requires-Dist: nbclient (>=0.10.0,<0.11.0)
 Requires-Dist: nbformat (>=5.10.4,<6.0.0)

{vision_agent-0.2.23.dist-info → vision_agent-0.2.25.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 vision_agent/__init__.py,sha256=GVLHCeK_R-zgldpbcPmOzJat-BkadvkuRCMxDvTIcXs,108
 vision_agent/agent/__init__.py,sha256=Zv8lc91mPy0iDySId38_vc4mo56JQ9mCMvUWdAKQjh0,206
 vision_agent/agent/agent.py,sha256=X7kON-g9ePUKumCDaYfQNBX_MEFE-ax5PnRp7-Cc5Wo,529
-vision_agent/agent/agent_coder.py,sha256=e3mQn1xenahYk_uGflvuQ10s6dSHHM6p0jZN9UT1ZpE,6508
+vision_agent/agent/agent_coder.py,sha256=4iB732bX4wDnPAuyYBk6HWlf4aFq2l9EcL695qfDIXw,7004
 vision_agent/agent/agent_coder_prompts.py,sha256=CJe3v7xvHQ32u3RQAXQga_Tk_4UgU64RBAMHZ3S70KY,5538
 vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMVg,11511
 vision_agent/agent/easytool_prompts.py,sha256=Bikw-PPLkm78dwywTlnv32Y1Tw6JMeC-R7oCnXWLcTk,4656
@@ -9,26 +9,26 @@ vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6w
 vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
 vision_agent/agent/vision_agent.py,sha256=pnx7gtTPazR7Dck5_kfZC3S3QWKu4e28YVigzOicOX0,27130
 vision_agent/agent/vision_agent_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
-vision_agent/agent/vision_agent_v2.py,sha256=0-bJH_KiYB9fdfN5rbutnyJgQr1XYeszNYqmR69IxZc,13045
-vision_agent/agent/vision_agent_v2_prompt.py,sha256=dd9m9Vqp91r4dpsKMDwXr54jG_GTBdJNDzpgR115S8Q,5997
+vision_agent/agent/vision_agent_v2.py,sha256=3qjvaj-yyrXmoY_cecUsiuY4Rn6MmJanFZeoXFJRK2c,13229
+vision_agent/agent/vision_agent_v2_prompt.py,sha256=b_0BMq6GrbGfl09MHrv4mj-mqyE1FxMl3Xq44qD4S1E,6161
 vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
 vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
-vision_agent/llm/llm.py,sha256=A-gN0vMb79fSxhSK1qBs6PTu1fba9Gvy6pitOyjW2gM,5779
+vision_agent/llm/llm.py,sha256=_Klwngc35JdRuzezWe1P5BMBRkfRQSGJqNOtS44rM9s,5891
 vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
 vision_agent/lmm/lmm.py,sha256=gK90vMxh0OcGSuIZQikBkDXm4pfkdFk1R2y7rtWDl84,10539
 vision_agent/tools/__init__.py,sha256=dRHXGpjhItXZRQs0r_l3Z3bQIreaZaYP0CJrl8mOJxM,452
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
-vision_agent/tools/tool_utils.py,sha256=mK6QfbYr6oo9ci979-_6R1DrxU2i8HGhwosADyvciI0,865
-vision_agent/tools/tools.py,sha256=sVxN7SpDkz_XTc_SKwkoRF4EwaMTuHvTsCHwtR942Fc,47373
-vision_agent/tools/tools_v2.py,sha256=Dh5Rs1iaEs5ijRDwVI3Na9ylC7eOjtrIqtYOZSredH8,15364
+vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
+vision_agent/tools/tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
+vision_agent/tools/tools_v2.py,sha256=Tdam-cWBI4ipXWwGyxim-SK07zP97_hcdUtYd1a4CnI,21404
 vision_agent/utils/__init__.py,sha256=xsHFyJSDbLdonB9Dh74cwZnVTiT__2OQF3Brd3Nmglc,116
 vision_agent/utils/execute.py,sha256=RC_jKrm2kOWwzNe9xKuA2xJcbsNcD0Hb95_o3_Le0_E,3820
-vision_agent/utils/image_utils.py,sha256=1dggPBhW8_hUXDItCRLa23h-hdBwS50cjL4v1hsoUbg,7586
-vision_agent/utils/sim.py,sha256=SO4-pj2Fjs3yr-KT8S0nuUd66lf7m7XvMAp7_ecvKuQ,2813
-vision_agent/utils/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
+vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
+vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
+vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
 vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
-vision_agent-0.2.23.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.23.dist-info/METADATA,sha256=r3JWwYu2mKPjViXrm50ZS_9juGciOrYfEyz2YhPeczQ,9121
-vision_agent-0.2.23.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.23.dist-info/RECORD,,
+vision_agent-0.2.25.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.25.dist-info/METADATA,sha256=5bycdwOp0pnRpUBQo_JM1c1Abq2fmWJcVYE_7YgtoUY,9212
+vision_agent-0.2.25.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.25.dist-info/RECORD,,

{vision_agent-0.2.23.dist-info → vision_agent-0.2.25.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.23.dist-info → vision_agent-0.2.25.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.23__py3-none-any.whl → 0.2.25__py3-none-any.whl

vision-agent 0.2.23py3-none-any.whl → 0.2.25py3-none-any.whl