PyPI - vision-agent - Versions diffs - 0.0.53__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

vision-agent 0.0.53py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

vision_agent/agent/vision_agent.py +110 -51
vision_agent/agent/vision_agent_prompts.py +14 -1
vision_agent/llm/llm.py +1 -1
vision_agent/lmm/lmm.py +8 -4
vision_agent/tools/tools.py +27 -63
vision_agent/type_defs.py +48 -0
{vision_agent-0.0.53.dist-info → vision_agent-0.1.2.dist-info}/METADATA +2 -1
{vision_agent-0.0.53.dist-info → vision_agent-0.1.2.dist-info}/RECORD +10 -9
{vision_agent-0.0.53.dist-info → vision_agent-0.1.2.dist-info}/LICENSE +0 -0
{vision_agent-0.0.53.dist-info → vision_agent-0.1.2.dist-info}/WHEEL +0 -0

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -37,10 +37,10 @@ _LOGGER = logging.getLogger(__name__)
 def parse_json(s: str) -> Any:
     s = (
-        s.replace(": true", ": True")
-        .replace(": false", ": False")
-        .replace(":true", ": True")
-        .replace(":false", ": False")
+        s.replace(": True", ": true")
+        .replace(": False", ": false")
+        .replace(":True", ": true")
+        .replace(":False", ": false")
         .replace("```", "")
         .strip()
     )
@@ -62,6 +62,19 @@ def format_tools(tools: Dict[int, Any]) -> str:
     return tool_str
+def format_tool_usage(tools: Dict[int, Any], tool_result: List[Dict]) -> str:
+    usage = []
+    name_to_usage = {v["name"]: v["usage"] for v in tools.values()}
+    for tool_res in tool_result:
+        if "tool_name" in tool_res:
+            usage.append((tool_res["tool_name"], name_to_usage[tool_res["tool_name"]]))
+    usage_str = ""
+    for tool_name, tool_usage in usage:
+        usage_str += f"{tool_name} - {tool_usage}\n"
+    return usage_str
 def topological_sort(tasks: List[Dict]) -> List[Dict]:
     in_degree = {task["id"]: 0 for task in tasks}
     for task in tasks:
@@ -255,7 +268,8 @@ def self_reflect(
 ) -> str:
     prompt = VISION_AGENT_REFLECTION.format(
         question=question,
-        tools=format_tools(tools),
+        tools=format_tools({k: v["description"] for k, v in tools.items()}),
+        tool_usage=format_tool_usage(tools, tool_result),
         tool_results=str(tool_result),
         final_answer=final_answer,
     )
@@ -268,41 +282,28 @@ def self_reflect(
     return reflect_model(prompt)
-def parse_reflect(reflect: str) -> bool:
-    # GPT-4V has a hard time following directions, so make the criteria less strict
-    return (
+def parse_reflect(reflect: str) -> Any:
+    reflect = reflect.strip()
+    try:
+        return parse_json(reflect)
+    except Exception:
+        _LOGGER.error(f"Failed parse json reflection: {reflect}")
+    # LMMs have a hard time following directions, so make the criteria less strict
+    finish = (
         "finish" in reflect.lower() and len(reflect) < 100
     ) or "finish" in reflect.lower()[-10:]
-def visualize_result(all_tool_results: List[Dict]) -> List[str]:
-    image_to_data: Dict[str, Dict] = {}
-    for tool_result in all_tool_results:
-        if tool_result["tool_name"] not in ["grounding_sam_", "grounding_dino_"]:
-            continue
-        parameters = tool_result["parameters"]
-        # parameters can either be a dictionary or list, parameters can also be malformed
-        # becaus the LLM builds them
-        if isinstance(parameters, dict):
-            if "image" not in parameters:
-                continue
-            parameters = [parameters]
-        elif isinstance(tool_result["parameters"], list):
-            if len(tool_result["parameters"]) < 1 or (
-                "image" not in tool_result["parameters"][0]
-            ):
-                continue
-        for param, call_result in zip(parameters, tool_result["call_results"]):
-            # calls can fail, so we need to check if the call was successful
-            if not isinstance(call_result, dict):
-                continue
-            if "bboxes" not in call_result:
-                continue
-            # if the call was successful, then we can add the image data
-            image = param["image"]
+    return {"Finish": finish, "Reflection": reflect}
+def _handle_extract_frames(
+    image_to_data: Dict[str, Dict], tool_result: Dict
+) -> Dict[str, Dict]:
+    image_to_data = image_to_data.copy()
+    # handle extract_frames_ case, useful if it extracts frames but doesn't do
+    # any following processing
+    for video_file_output in tool_result["call_results"]:
+        for frame, _ in video_file_output:
+            image = frame
             if image not in image_to_data:
                 image_to_data[image] = {
                     "bboxes": [],
@@ -310,17 +311,72 @@ def visualize_result(all_tool_results: List[Dict]) -> List[str]:
                     "labels": [],
                     "scores": [],
                 }
+    return image_to_data
+def _handle_viz_tools(
+    image_to_data: Dict[str, Dict], tool_result: Dict
+) -> Dict[str, Dict]:
+    image_to_data = image_to_data.copy()
+    # handle grounding_sam_ and grounding_dino_
+    parameters = tool_result["parameters"]
+    # parameters can either be a dictionary or list, parameters can also be malformed
+    # becaus the LLM builds them
+    if isinstance(parameters, dict):
+        if "image" not in parameters:
+            return image_to_data
+        parameters = [parameters]
+    elif isinstance(tool_result["parameters"], list):
+        if len(tool_result["parameters"]) < 1 or (
+            "image" not in tool_result["parameters"][0]
+        ):
+            return image_to_data
+    for param, call_result in zip(parameters, tool_result["call_results"]):
+        # calls can fail, so we need to check if the call was successful
+        if not isinstance(call_result, dict) or "bboxes" not in call_result:
+            return image_to_data
+        # if the call was successful, then we can add the image data
+        image = param["image"]
+        if image not in image_to_data:
+            image_to_data[image] = {
+                "bboxes": [],
+                "masks": [],
+                "labels": [],
+                "scores": [],
+            }
+        image_to_data[image]["bboxes"].extend(call_result["bboxes"])
+        image_to_data[image]["labels"].extend(call_result["labels"])
+        image_to_data[image]["scores"].extend(call_result["scores"])
+        if "masks" in call_result:
+            image_to_data[image]["masks"].extend(call_result["masks"])
+    return image_to_data
-            image_to_data[image]["bboxes"].extend(call_result["bboxes"])
-            image_to_data[image]["labels"].extend(call_result["labels"])
-            image_to_data[image]["scores"].extend(call_result["scores"])
-            if "masks" in call_result:
-                image_to_data[image]["masks"].extend(call_result["masks"])
+def visualize_result(all_tool_results: List[Dict]) -> List[str]:
+    image_to_data: Dict[str, Dict] = {}
+    for tool_result in all_tool_results:
+        # only handle bbox/mask tools or frame extraction
+        if tool_result["tool_name"] not in [
+            "grounding_sam_",
+            "grounding_dino_",
+            "extract_frames_",
+        ]:
+            continue
+        if tool_result["tool_name"] == "extract_frames_":
+            image_to_data = _handle_extract_frames(image_to_data, tool_result)
+        else:
+            image_to_data = _handle_viz_tools(image_to_data, tool_result)
     visualized_images = []
-    for image in image_to_data:
-        image_path = Path(image)
-        image_data = image_to_data[image]
+    for image_str in image_to_data:
+        image_path = Path(image_str)
+        image_data = image_to_data[image_str]
         image = overlay_masks(image_path, image_data)
         image = overlay_bboxes(image, image_data)
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
@@ -374,7 +430,9 @@ class VisionAgent(Agent):
             OpenAILLM(temperature=0.1) if answer_model is None else answer_model
         )
         self.reflect_model = (
-            OpenAILMM(temperature=0.1) if reflect_model is None else reflect_model
+            OpenAILMM(json_mode=True, temperature=0.1)
+            if reflect_model is None
+            else reflect_model
         )
         self.max_retries = max_retries
         self.tools = TOOLS
@@ -470,13 +528,14 @@ class VisionAgent(Agent):
                 visualized_output[0] if len(visualized_output) > 0 else image,
             )
             self.log_progress(f"Reflection: {reflection}")
-            if parse_reflect(reflection):
+            parsed_reflection = parse_reflect(reflection)
+            if parsed_reflection["Finish"]:
                 break
             else:
-                reflections += "\n" + reflection
-        # '<END>' is a symbol to indicate the end of the chat, which is useful for streaming logs.
+                reflections += "\n" + parsed_reflection["Reflection"]
+        # '<ANSWER>' is a symbol to indicate the end of the chat, which is useful for streaming logs.
         self.log_progress(
-            f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</<ANSWER>"
+            f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</ANSWER>"
         )
         if visualize_output:

vision_agent/agent/vision_agent_prompts.py CHANGED Viewed

@@ -1,4 +1,14 @@
-VISION_AGENT_REFLECTION = """You are an advanced reasoning agent that can improve based on self-refection. You will be given a previous reasoning trial in which you were given the user's question, the available tools that the agent has, the decomposed tasks and tools that the agent used to answer the question and the final answer the agent provided. You may also receive an image with the visualized bounding boxes or masks with their associated labels and scores from the tools used. You must determine if the agent's answer was correct or incorrect. If the agent's answer was correct, respond with Finish. If the agent's answer was incorrect, you must diagnose a possible reason for failure or phrasing discrepancy and devise a new, concise, concrete plan that aims to mitigate the same failure with the tools available. Do not make vague steps like re-evaluate the threshold, instead make concrete steps like use a threshold of 0.5 or whatever threshold you think would fix this issue. If the task cannot be completed with the existing tools, respond with Finish. Use complete sentences.
+VISION_AGENT_REFLECTION = """You are an advanced reasoning agent that can improve based on self-refection. You will be given a previous reasoning trial in which you were given the user's question, the available tools that the agent has, the decomposed tasks and tools that the agent used to answer the question and the final answer the agent provided. You may also receive an image with the visualized bounding boxes or masks with their associated labels and scores from the tools used.
+Please note that:
+1. You must ONLY output parsible JSON format. If the agents output was correct set "Finish" to true, else set "Finish" to false. An example output looks like:
+{{"Finish": true, "Reflection": "The agent's answer was correct."}}
+2. You must utilize the image with the visualized bounding boxes or masks and determine if the tools were used correctly or, using your own judgement, utilized incorrectly.
+3. If the agent's answer was incorrect, you must diagnose a possible reason for failure or phrasing discrepancy and devise a new, concise, concrete plan that aims to mitigate the same failure with the tools available. An example output looks like:
+    {{"Finish": false, "Reflection": "I can see from teh visualized bounding boxes that the agent's answer was incorrect because the grounding_dino_ tool produced false positive predictions. The agent should use the following tools with the following parameters:
+        Step 1: Use 'grounding_dino_' with a 'prompt' of 'baby. bed' and a 'box_threshold' of 0.7 to reduce the false positives.
+        Step 2: Use 'box_iou_' with the baby bounding box and the bed bounding box to determine if the baby is on the bed or not."}}
+4. If the task cannot be completed with the existing tools or by adjusting the parameters, set "Finish" to true.
 User's question: {question}
@@ -8,6 +18,9 @@ Tools available:
 Tasks and tools used:
 {tool_results}
+Tool's used API documentation:
+{tool_usage}
 Final answer:
 {final_answer}

vision_agent/llm/llm.py CHANGED Viewed

@@ -33,7 +33,7 @@ class OpenAILLM(LLM):
     def __init__(
         self,
-        model_name: str = "gpt-4-turbo-preview",
+        model_name: str = "gpt-4-turbo",
         api_key: Optional[str] = None,
         json_mode: bool = False,
         **kwargs: Any

vision_agent/lmm/lmm.py CHANGED Viewed

@@ -99,9 +99,10 @@ class OpenAILMM(LMM):
     def __init__(
         self,
-        model_name: str = "gpt-4-vision-preview",
+        model_name: str = "gpt-4-turbo",
         api_key: Optional[str] = None,
         max_tokens: int = 1024,
+        json_mode: bool = False,
         **kwargs: Any,
     ):
         if not api_key:
@@ -111,7 +112,10 @@ class OpenAILMM(LMM):
         self.client = OpenAI(api_key=api_key)
         self.model_name = model_name
-        self.max_tokens = max_tokens
+        if "max_tokens" not in kwargs:
+            kwargs["max_tokens"] = max_tokens
+        if json_mode:
+            kwargs["response_format"] = {"type": "json_object"}
         self.kwargs = kwargs
     def __call__(
@@ -153,7 +157,7 @@ class OpenAILMM(LMM):
             )
         response = self.client.chat.completions.create(
-            model=self.model_name, messages=fixed_chat, max_tokens=self.max_tokens, **self.kwargs  # type: ignore
+            model=self.model_name, messages=fixed_chat, **self.kwargs  # type: ignore
         )
         return cast(str, response.choices[0].message.content)
@@ -181,7 +185,7 @@ class OpenAILMM(LMM):
             )
         response = self.client.chat.completions.create(
-            model=self.model_name, messages=message, max_tokens=self.max_tokens, **self.kwargs  # type: ignore
+            model=self.model_name, messages=message, **self.kwargs  # type: ignore
         )
         return cast(str, response.choices[0].message.content)

vision_agent/tools/tools.py CHANGED Viewed

@@ -12,8 +12,11 @@ from PIL.Image import Image as ImageType
 from vision_agent.image_utils import convert_to_b64, get_image_size
 from vision_agent.tools.video import extract_frames_from_video
+from vision_agent.type_defs import LandingaiAPIKey
 _LOGGER = logging.getLogger(__name__)
+_LND_API_KEY = LandingaiAPIKey().api_key
+_LND_API_URL = "https://api.dev.landing.ai/v1/agent"
 def normalize_bbox(
@@ -80,8 +83,6 @@ class CLIP(Tool):
         [{"labels": ["red line", "yellow dot"], "scores": [0.98, 0.02]}]
     """
-    _ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
     name = "clip_"
     description = "'clip_' is a tool that can classify any image given a set of input names or tags. It returns a list of the input names along with their probability scores."
     usage = {
@@ -125,23 +126,9 @@ class CLIP(Tool):
             "image": image_b64,
             "tool": "closed_set_image_classification",
         }
-        res = requests.post(
-            self._ENDPOINT,
-            headers={"Content-Type": "application/json"},
-            json=data,
-        )
-        resp_json: Dict[str, Any] = res.json()
-        if (
-            "statusCode" in resp_json and resp_json["statusCode"] != 200
-        ) or "statusCode" not in resp_json:
-            _LOGGER.error(f"Request failed: {resp_json}")
-            raise ValueError(f"Request failed: {resp_json}")
-        resp_json["data"]["scores"] = [
-            round(prob, 4) for prob in resp_json["data"]["scores"]
-        ]
-        return resp_json["data"]  # type: ignore
+        resp_data = _send_inference_request(data, "tools")
+        resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
+        return resp_data
 class ImageCaption(Tool):
@@ -156,8 +143,6 @@ class ImageCaption(Tool):
         {'text': ['a box of orange and white socks']}
     """
-    _ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
     name = "image_caption_"
     description = "'image_caption_' is a tool that can caption an image based on its contents or tags. It returns a text describing the image"
     usage = {
@@ -197,19 +182,7 @@ class ImageCaption(Tool):
             "image": image_b64,
             "tool": "image_captioning",
         }
-        res = requests.post(
-            self._ENDPOINT,
-            headers={"Content-Type": "application/json"},
-            json=data,
-        )
-        resp_json: Dict[str, Any] = res.json()
-        if (
-            "statusCode" in resp_json and resp_json["statusCode"] != 200
-        ) or "statusCode" not in resp_json:
-            _LOGGER.error(f"Request failed: {resp_json}")
-            raise ValueError(f"Request failed: {resp_json}")
-        return resp_json["data"]  # type: ignore
+        return _send_inference_request(data, "tools")
 class GroundingDINO(Tool):
@@ -226,8 +199,6 @@ class GroundingDINO(Tool):
         'scores': [0.98, 0.02]}]
     """
-    _ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
     name = "grounding_dino_"
     description = "'grounding_dino_' is a tool that can detect arbitrary objects with inputs such as category names or referring expressions. It returns a list of bounding boxes, label names and associated probability scores."
     usage = {
@@ -290,24 +261,13 @@ class GroundingDINO(Tool):
             "tool": "visual_grounding",
             "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
         }
-        res = requests.post(
-            self._ENDPOINT,
-            headers={"Content-Type": "application/json"},
-            json=request_data,
-        )
-        resp_json: Dict[str, Any] = res.json()
-        if (
-            "statusCode" in resp_json and resp_json["statusCode"] != 200
-        ) or "statusCode" not in resp_json:
-            _LOGGER.error(f"Request failed: {resp_json}")
-            raise ValueError(f"Request failed: {resp_json}")
-        data: Dict[str, Any] = resp_json["data"]
+        data: Dict[str, Any] = _send_inference_request(request_data, "tools")
         if "bboxes" in data:
             data["bboxes"] = [normalize_bbox(box, image_size) for box in data["bboxes"]]
         if "scores" in data:
             data["scores"] = [round(score, 2) for score in data["scores"]]
         if "labels" in data:
-            data["labels"] = [label for label in data["labels"]]
+            data["labels"] = list(data["labels"])
         data["size"] = (image_size[1], image_size[0])
         return data
@@ -335,8 +295,6 @@ class GroundingSAM(Tool):
            [1, 1, 1, ..., 1, 1, 1]], dtype=uint8)]}]
     """
-    _ENDPOINT = "https://soi4ewr6fjqqdf5vuss6rrilee0kumxq.lambda-url.us-east-2.on.aws"
     name = "grounding_sam_"
     description = "'grounding_sam_' is a tool that can detect arbitrary objects with inputs such as category names or referring expressions. It returns a list of bounding boxes, label names and masks file names and associated probability scores."
     usage = {
@@ -399,18 +357,7 @@ class GroundingSAM(Tool):
             "tool": "visual_grounding_segment",
             "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
         }
-        res = requests.post(
-            self._ENDPOINT,
-            headers={"Content-Type": "application/json"},
-            json=request_data,
-        )
-        resp_json: Dict[str, Any] = res.json()
-        if (
-            "statusCode" in resp_json and resp_json["statusCode"] != 200
-        ) or "statusCode" not in resp_json:
-            _LOGGER.error(f"Request failed: {resp_json}")
-            raise ValueError(f"Request failed: {resp_json}")
-        data: Dict[str, Any] = resp_json["data"]
+        data: Dict[str, Any] = _send_inference_request(request_data, "tools")
         ret_pred: Dict[str, List] = {"labels": [], "bboxes": [], "masks": []}
         if "bboxes" in data:
             ret_pred["bboxes"] = [
@@ -714,3 +661,20 @@ TOOLS = {
     )
     if (hasattr(c, "name") and hasattr(c, "description") and hasattr(c, "usage"))
 }
+def _send_inference_request(
+    payload: Dict[str, Any], endpoint_name: str
+) -> Dict[str, Any]:
+    res = requests.post(
+        f"{_LND_API_URL}/model/{endpoint_name}",
+        headers={
+            "Content-Type": "application/json",
+            "apikey": _LND_API_KEY,
+        },
+        json=payload,
+    )
+    if res.status_code != 200:
+        _LOGGER.error(f"Request failed: {res.text}")
+        raise ValueError(f"Request failed: {res.text}")
+    return res.json()["data"]  # type: ignore

vision_agent/type_defs.py ADDED Viewed

@@ -0,0 +1,48 @@
+from pydantic import Field, field_validator
+from pydantic_settings import BaseSettings
+class LandingaiAPIKey(BaseSettings):
+    """The API key of a user in a particular organization in LandingLens.
+    It supports loading from environment variables or .env files.
+    The supported name of the environment variables are (case-insensitive):
+    - LANDINGAI_API_KEY
+    Environment variables will always take priority over values loaded from a dotenv file.
+    """
+    api_key: str = Field(
+        default="land_sk_hw34v3tyEc35OAhP8F7hnGnrDv2C8hD2ycMyq0aMkVS1H40D22",
+        alias="LANDINGAI_API_KEY",
+        description="The API key of LandingAI.",
+    )
+    @field_validator("api_key")
+    @classmethod
+    def is_api_key_valid(cls, key: str) -> str:
+        """Check if the API key is a v2 key."""
+        if not key:
+            raise InvalidApiKeyError(f"LandingAI API key is required, but it's {key}")
+        if not key.startswith("land_sk_"):
+            raise InvalidApiKeyError(
+                f"LandingAI API key (v2) must start with 'land_sk_' prefix, but it's {key}. See https://support.landing.ai/docs/api-key for more information."
+            )
+        return key
+    class Config:
+        env_file = ".env"
+        env_prefix = "landingai_"
+        case_sensitive = False
+        extra = "ignore"
+class InvalidApiKeyError(Exception):
+    """Exception raised when the an invalid API key is provided. This error could be raised from any SDK code, not limited to a HTTP client."""
+    def __init__(self, message: str):
+        self.message = f"""{message}
+For more information, see https://landing-ai.github.io/landingai-python/landingai.html#manage-api-credentials"""
+        super().__init__(self.message)
+    def __str__(self) -> str:
+        return self.message

{vision_agent-0.0.53.dist-info → vision_agent-0.1.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.0.53
+Version: 0.1.2
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -16,6 +16,7 @@ Requires-Dist: openai (>=1.0.0,<2.0.0)
 Requires-Dist: opencv-python-headless (>=4.0.0,<5.0.0)
 Requires-Dist: pandas (>=2.0.0,<3.0.0)
 Requires-Dist: pillow (>=10.0.0,<11.0.0)
+Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
 Requires-Dist: requests (>=2.0.0,<3.0.0)
 Requires-Dist: sentence-transformers (>=2.0.0,<3.0.0)
 Requires-Dist: tabulate (>=0.9.0,<0.10.0)

{vision_agent-0.0.53.dist-info → vision_agent-0.1.2.dist-info}/RECORD RENAMED Viewed

@@ -5,8 +5,8 @@ vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMV
 vision_agent/agent/easytool_prompts.py,sha256=dYzWa_RaiaFSQ-CowoQOcFmjZtBTTljRyA809bLgrvU,4519
 vision_agent/agent/reflexion.py,sha256=wzpptfALNZIh9Q5jgkK3imGL5LWjTW_n_Ypsvxdh07Q,10101
 vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
-vision_agent/agent/vision_agent.py,sha256=UV7_mqejfF4B-AqqmETqWvfiPvRcjfq-0nlNfeo_RxM,19765
-vision_agent/agent/vision_agent_prompts.py,sha256=dPg0mLVK_fGJpYK2xXGhm-zuXX1KVZW_zFXyYsspUz8,6567
+vision_agent/agent/vision_agent.py,sha256=_xh3v7DaeH3r5JLeXtCvDbQgogGRvpmqH3dAW7ChA1E,21759
+vision_agent/agent/vision_agent_prompts.py,sha256=JC43AB0ZnL8jQW9LYe-5mTeEJmH0w-SuH9YmGQxf1eM,7311
 vision_agent/data/__init__.py,sha256=YU-5g3LbEQ6a4drz0RLGTagXMVU2Z4Xr3RlfWE-R0jU,46
 vision_agent/data/data.py,sha256=pgtSGZdAnbQ8oGsuapLtFTMPajnCGDGekEXTnFuBwsY,5122
 vision_agent/emb/__init__.py,sha256=YmCkGrJBtXb6X6Z3lnKiFoQYKXMgHMJp8JJyMLVvqcI,75
@@ -15,14 +15,15 @@ vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
 vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
 vision_agent/image_utils.py,sha256=hFdPoRmeVU5jErFr5xaagMQ6Wy7Xbw8H8HXuLGdJIAM,4786
 vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
-vision_agent/llm/llm.py,sha256=tgL6ZtuwZKuxSNiCxJCuP2ETjNMrosdgxXkZJb0_00E,5024
+vision_agent/llm/llm.py,sha256=Jty_RHdqVmIM0Mm31JNk50c882Tx7hHtkmh0WyXeJd8,5016
 vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
-vision_agent/lmm/lmm.py,sha256=LxwxCArp7DfnPbjf_Gl55xBxPwo2Qx8eDp1gCnGYSO0,9535
+vision_agent/lmm/lmm.py,sha256=qDdy_9Q9wRjJ9ZUfqB8zfjhVIgITgjF7K4hYaTAoPCI,9637
 vision_agent/tools/__init__.py,sha256=OEqEysxm5wnnOD73NKNCUggALB72GEmVg9FNsEkSBtA,253
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
-vision_agent/tools/tools.py,sha256=WPqLHw8D0tkaP2LFYo6cBithP4q0vb6Bve4Nv577Prk,27045
+vision_agent/tools/tools.py,sha256=Qsqe8X6VjB0EMWhyKJ5EMPyLIc_d5Vtlw4ugV2FB_Ks,25589
 vision_agent/tools/video.py,sha256=40rscP8YvKN3lhZ4PDcOK4XbdFX2duCRpHY_krmBYKU,7476
-vision_agent-0.0.53.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.0.53.dist-info/METADATA,sha256=ybezBW-LYFhlCovdbKNq6iC93mb0wZNOQ29HD30OPz4,6184
-vision_agent-0.0.53.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.0.53.dist-info/RECORD,,
+vision_agent/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
+vision_agent-0.1.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.1.2.dist-info/METADATA,sha256=6AP0Z9G4l15uCcfBGhUfHV1AnP4lwXQuey7uH-QuvlU,6233
+vision_agent-0.1.2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.1.2.dist-info/RECORD,,

{vision_agent-0.0.53.dist-info → vision_agent-0.1.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.0.53.dist-info → vision_agent-0.1.2.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.0.53__py3-none-any.whl → 0.1.2__py3-none-any.whl

vision-agent 0.0.53py3-none-any.whl → 0.1.2py3-none-any.whl