PyPI - vision-agent - Versions diffs - 0.2.110__tar.gz → 0.2.112__tar.gz - Mend

vision-agent 0.2.110tar.gz → 0.2.112tar.gz

Files changed (34) hide show

{vision_agent-0.2.110 → vision_agent-0.2.112}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.110
+Version: 0.2.112
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.110 → vision_agent-0.2.112}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.110"
+version = "0.2.112"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"
@@ -78,6 +78,8 @@ line_length = 88
 profile = "black"
 [tool.mypy]
+plugins = "pydantic.mypy"
 exclude = "tests"
 show_error_context = true
 pretty = true

{vision_agent-0.2.110 → vision_agent-0.2.112}/vision_agent/agent/agent_utils.py RENAMED Viewed

@@ -4,14 +4,13 @@ import sys
 from typing import Any, Dict
 logging.basicConfig(stream=sys.stdout)
-_LOGGER = logging.getLogger(__name__)
 def extract_json(json_str: str) -> Dict[str, Any]:
     try:
+        json_str = json_str.replace("\n", " ")
         json_dict = json.loads(json_str)
     except json.JSONDecodeError:
-        input_json_str = json_str
         if "```json" in json_str:
             json_str = json_str[json_str.find("```json") + len("```json") :]
             json_str = json_str[: json_str.find("```")]
@@ -19,12 +18,8 @@ def extract_json(json_str: str) -> Dict[str, Any]:
             json_str = json_str[json_str.find("```") + len("```") :]
             # get the last ``` not one from an intermediate string
             json_str = json_str[: json_str.find("}```")]
-        try:
-            json_dict = json.loads(json_str)
-        except json.JSONDecodeError as e:
-            error_msg = f"Could not extract JSON from the given str: {json_str}.\nFunction input:\n{input_json_str}"
-            _LOGGER.exception(error_msg)
-            raise ValueError(error_msg) from e
+        json_dict = json.loads(json_str)
     return json_dict  # type: ignore

{vision_agent-0.2.110 → vision_agent-0.2.112}/vision_agent/agent/vision_agent.py RENAMED Viewed

@@ -28,7 +28,7 @@ class DefaultImports:
     code = [
         "from typing import *",
         "from vision_agent.utils.execute import CodeInterpreter",
-        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions, florencev2_fine_tuning",
+        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions",
     ]
     @staticmethod

{vision_agent-0.2.110 → vision_agent-0.2.112}/vision_agent/agent/vision_agent_coder.py RENAMED Viewed

@@ -4,6 +4,7 @@ import logging
 import os
 import sys
 import tempfile
+from json import JSONDecodeError
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union, cast
@@ -86,8 +87,8 @@ def format_memory(memory: List[Dict[str, str]]) -> str:
 def format_plans(plans: Dict[str, Any]) -> str:
     plan_str = ""
     for k, v in plans.items():
-        plan_str += f"{k}:\n"
-        plan_str += "-" + "\n-".join([e["instructions"] for e in v])
+        plan_str += "\n" + f"{k}: {v['thoughts']}\n"
+        plan_str += "    -" + "\n    -".join([e for e in v["instructions"]])
     return plan_str
@@ -228,13 +229,11 @@ def pick_plan(
                 "status": "completed" if tool_output.success else "failed",
             }
         )
-        tool_output_str = ""
-        if len(tool_output.logs.stdout) > 0:
-            tool_output_str = tool_output.logs.stdout[0]
+        tool_output_str = tool_output.text().strip()
         if verbosity == 2:
             _print_code("Code and test after attempted fix:", code)
-            _LOGGER.info(f"Code execution result after attempte {count}")
+            _LOGGER.info(f"Code execution result after attempt {count}")
         count += 1
@@ -251,7 +250,21 @@ def pick_plan(
         tool_output=tool_output_str[:20_000],
     )
     chat[-1]["content"] = prompt
-    best_plan = extract_json(model(chat, stream=False))  # type: ignore
+    count = 0
+    best_plan = None
+    while best_plan is None and count < max_retries:
+        try:
+            best_plan = extract_json(model(chat, stream=False))  # type: ignore
+        except JSONDecodeError as e:
+            _LOGGER.exception(
+                f"Error while extracting JSON during picking best plan {str(e)}"
+            )
+            pass
+        count += 1
+    if best_plan is None:
+        best_plan = {"best_plan": list(plans.keys())[0]}
     if verbosity >= 1:
         _LOGGER.info(f"Best plan:\n{best_plan}")
@@ -525,7 +538,7 @@ def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
 def retrieve_tools(
-    plans: Dict[str, List[Dict[str, str]]],
+    plans: Dict[str, Dict[str, Any]],
     tool_recommender: Sim,
     log_progress: Callable[[Dict[str, Any]], None],
     verbosity: int = 0,
@@ -542,8 +555,8 @@ def retrieve_tools(
     tool_lists: Dict[str, List[Dict[str, str]]] = {}
     for k, plan in plans.items():
         tool_lists[k] = []
-        for task in plan:
-            tools = tool_recommender.top_k(task["instructions"], k=2, thresh=0.3)
+        for task in plan["instructions"]:
+            tools = tool_recommender.top_k(task, k=2, thresh=0.3)
             tool_info.extend([e["doc"] for e in tools])
             tool_desc.extend([e["desc"] for e in tools])
             tool_lists[k].extend(
@@ -737,14 +750,7 @@ class VisionAgentCoder(Agent):
             if self.verbosity >= 1:
                 for p in plans:
                     # tabulate will fail if the keys are not the same for all elements
-                    p_fixed = [
-                        {
-                            "instructions": (
-                                e["instructions"] if "instructions" in e else ""
-                            )
-                        }
-                        for e in plans[p]
-                    ]
+                    p_fixed = [{"instructions": e} for e in plans[p]["instructions"]]
                     _LOGGER.info(
                         f"\n{tabulate(tabular_data=p_fixed, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
                     )
@@ -793,13 +799,15 @@ class VisionAgentCoder(Agent):
             )
             if self.verbosity >= 1:
+                plan_i_fixed = [{"instructions": e} for e in plan_i["instructions"]]
                 _LOGGER.info(
-                    f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
+                    f"Picked best plan:\n{tabulate(tabular_data=plan_i_fixed, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
                 )
             results = write_and_test_code(
                 chat=[{"role": c["role"], "content": c["content"]} for c in int_chat],
-                plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
+                plan=f"\n{plan_i['thoughts']}\n-"
+                + "\n-".join([e for e in plan_i["instructions"]]),
                 tool_info=tool_info,
                 tool_output=tool_output_str,
                 tool_utils=T.UTILITIES_DOCSTRING,

{vision_agent-0.2.110 → vision_agent-0.2.112}/vision_agent/agent/vision_agent_coder_prompts.py RENAMED Viewed

@@ -30,18 +30,19 @@ PLAN = """
 **Instructions**:
 1. Based on the context and tools you have available, create a plan of subtasks to achieve the user request.
-2. Output three different plans each utilize a different strategy or tool.
+2. Output three different plans each utilize a different strategy or set of tools.
 Output a list of jsons in the following format
 ```json
 {{
     "plan1":
-        [
-            {{
-                "instructions": str # what you should do in this task associated with a tool
-            }}
-        ],
+        {{
+            "thoughts": str # your thought process for choosing this plan
+            "instructions": [
+                str # what you should do in this task associated with a tool
+            ]
+        }},
     "plan2": ...,
     "plan3": ...
 }}
@@ -127,7 +128,8 @@ PICK_PLAN = """
 **Instructions**:
 1. Given the plans, image, and tool outputs, decide which plan is the best to achieve the user request.
-2. Output a JSON object with the following format:
+2. Try solving the problem yourself given the image and pick the plan that matches your solution the best.
+3. Output a JSON object with the following format:
 {{
     "thoughts": str # your thought process for choosing the best plan
     "best_plan": str # the best plan you have chosen

{vision_agent-0.2.110 → vision_agent-0.2.112}/vision_agent/agent/vision_agent_prompts.py RENAMED Viewed

@@ -15,7 +15,7 @@ This is the documentation for the different actions you can take:
 **Examples**:
 Here is an example of how you can interact with a user and Actions to complete a task:
 --- START EXAMPLES ---
-[Current directory: /workspace/test]
+[Current directory: /example/workspace]
 {examples}
 --- END EXAMPLES ---
@@ -27,16 +27,17 @@ Here is an example of how you can interact with a user and Actions to complete a
 Here is the current conversation so far:
 --- START CONVERSATION ---
 [Current directory: {dir}]
 {conversation}
 """
 EXAMPLES_CODE1 = """
 USER: Can you detect the dogs in this image? Media name dog.jpg
-AGENT: {"thoughts": "I will use the generate_vision_code to detect the dogs in the image.", "response": "<execute_python>generate_vision_code('/workspace/test/dog_detector.py', 'Can you write code to detect dogs in this image?', media=['/workspace/test/dog.jpg'])</execute_python>", "let_user_respond": false}
+AGENT: {"thoughts": "I will use the generate_vision_code to detect the dogs in the image.", "response": "<execute_python>generate_vision_code('/example/workspace/dog_detector.py', 'Can you write code to detect dogs in this image?', media=['/example/workspace/dog.jpg'])</execute_python>", "let_user_respond": false}
 OBSERVATION:
-[File /workspace/test/dog_detector.py]
+[File /example/workspace/dog_detector.py]
 0|from vision_agent.tools import load_image, owl_v2
 1|def detect_dogs(image_path: str):
 2|    image = load_image(image_path)
@@ -44,7 +45,7 @@ OBSERVATION:
 4|    return dogs
 [End of file]
-AGENT: {"thoughts": "I have generated the code to detect the dogs in the image, I must now run the code to get the output.", "response": "<execute_python>from dog_detector import detect_dogs\n    print(detect_dogs('/workspace/test/dog.jpg'))</execute_python>", "let_user_respond": false}
+AGENT: {"thoughts": "I have generated the code to detect the dogs in the image, I must now run the code to get the output.", "response": "<execute_python>from dog_detector import detect_dogs\n    print(detect_dogs('/example/workspace/dog.jpg'))</execute_python>", "let_user_respond": false}
 OBSERVATION:
 ----- stdout -----
@@ -55,10 +56,10 @@ AGENT: {"thoughts": "Two dogs are detected, I will show this to the user and ask
 USER: The the image only has one dog, can you fix this?
-AGENT: {"thoughts": "I will use edit_vision_code to fix the code to detect only one dog. The lower score is 0.23 so I will set the threshold above that.", "response": "<execute_python>edit_vision_code('/workspace/test/dog_detector.py', ['Can you write code to detect dogs in this image?', 'Can you increase the threshold to 0.24?'], media=['/workspace/test/dog.jpg'])</execute_python>", "let_user_respond": false}
+AGENT: {"thoughts": "I will use edit_vision_code to fix the code to detect only one dog. The lower score is 0.23 so I will set the threshold above that.", "response": "<execute_python>edit_vision_code('/example/workspace/dog_detector.py', ['Can you write code to detect dogs in this image?', 'Can you increase the threshold to 0.24?'], media=['/example/workspace/dog.jpg'])</execute_python>", "let_user_respond": false}
 OBSERVATION:
-[File /workspace/test/dog_detector.py]
+[File /example/workspace/dog_detector.py]
 0|from vision_agent.tools import load_image, owl_v2
 1|def detect_dogs(image_path: str):
 2|    image = load_image(image_path)
@@ -66,7 +67,7 @@ OBSERVATION:
 4|    return dogs
 [End of file]
-AGENT: {"thoughts": "I have edited the code to detect only one dog, I must now run the code to get the output.", "response": "<execute_python>from dog_detector import detect_dogs\n    print(detect_dogs('/workspace/test/dog.jpg'))</execute_python>", "let_user_respond": false}
+AGENT: {"thoughts": "I have edited the code to detect only one dog, I must now run the code to get the output.", "response": "<execute_python>from dog_detector import detect_dogs\n    print(detect_dogs('/example/workspace/dog.jpg'))</execute_python>", "let_user_respond": false}
 OBSERVATION:
 ----- stdout -----
@@ -82,10 +83,10 @@ AGENT: {"thoughts": "The user has asked to count workers with helmets but has no
 USER: Yes you can use workers.png
-AGENT: {"thoughts": "I will use the generate_vision_code to count the workers with helmets in the image.", "response": "<execute_python>generate_vision_code('/workspace/test/code.py', 'Can you write code to count workers with helmets in this image?', media=['/workspace/test/workers.png'])</execute_python>", "let_user_respond": false}
+AGENT: {"thoughts": "I will use the generate_vision_code to count the workers with helmets in the image.", "response": "<execute_python>generate_vision_code('/example/workspace/code.py', 'Can you write code to count workers with helmets in this image?', media=['/example/workspace/workers.png'])</execute_python>", "let_user_respond": false}
 OBSERVATION:
-[File /workspace/test/code.py]
+[File /example/workspace/code.py]
 0|from vision_agent.tools import load_image, owl_v2, closest_box_distance
 1|def count_workers_with_helmets(image_path: str):
 2|    image = load_image(image_path)
@@ -104,7 +105,7 @@ OBSERVATION:
 15|    return count
 [End of file]
-AGENT: {"thoughts": "I have generated the code to count the workers with helmets in the image, I must now run the code to get the output.", "response": "<execute_python>from code import count_workers_with_helmets\n    print(count_workers_with_helmets('/workspace/test/workers.png'))</execute_python>", "let_user_respond": false}
+AGENT: {"thoughts": "I have generated the code to count the workers with helmets in the image, I must now run the code to get the output.", "response": "<execute_python>from code import count_workers_with_helmets\n    print(count_workers_with_helmets('/example/workspace/workers.png'))</execute_python>", "let_user_respond": false}
 OBSERVATION:
 ----- stdout -----

{vision_agent-0.2.110 → vision_agent-0.2.112}/vision_agent/clients/http.py RENAMED Viewed

@@ -4,7 +4,6 @@ from typing import Any, Dict, Optional
 from requests import Session
 from requests.adapters import HTTPAdapter
-from requests.exceptions import ConnectionError, RequestException, Timeout
 _LOGGER = logging.getLogger(__name__)
@@ -38,9 +37,22 @@ class BaseHTTP:
             response.raise_for_status()
             result: Dict[str, Any] = response.json()
             _LOGGER.info(json.dumps(result))
-        except (ConnectionError, Timeout, RequestException) as err:
-            _LOGGER.warning(f"Error: {err}.")
         except json.JSONDecodeError:
             resp_text = response.text
             _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
+            raise
+        return result
+    def get(self, url: str) -> Dict[str, Any]:
+        formatted_url = f"{self._base_endpoint}/{url}"
+        _LOGGER.info(f"Sending data to {formatted_url}")
+        try:
+            response = self._session.get(url=formatted_url, timeout=self._TIMEOUT)
+            response.raise_for_status()
+            result: Dict[str, Any] = response.json()
+            _LOGGER.info(json.dumps(result))
+        except json.JSONDecodeError:
+            resp_text = response.text
+            _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
+            raise
         return result

{vision_agent-0.2.110 → vision_agent-0.2.112}/vision_agent/clients/landing_public_api.py RENAMED Viewed

@@ -1,10 +1,13 @@
 import os
-from uuid import UUID
 from typing import List
+from uuid import UUID
+from requests.exceptions import HTTPError
 from vision_agent.clients.http import BaseHTTP
 from vision_agent.utils.type_defs import LandingaiAPIKey
-from vision_agent.tools.meta_tools_types import BboxInputBase64, PromptTask
+from vision_agent.utils.exceptions import FineTuneModelNotFound
+from vision_agent.tools.tools_types import BboxInputBase64, PromptTask, JobStatus
 class LandingPublicAPI(BaseHTTP):
@@ -24,3 +27,12 @@ class LandingPublicAPI(BaseHTTP):
         }
         response = self.post(url, payload=data)
         return UUID(response["jobId"])
+    def check_fine_tuning_job(self, job_id: UUID) -> JobStatus:
+        url = f"v1/agent/jobs/fine-tuning/{job_id}/status"
+        try:
+            get_job = self.get(url)
+        except HTTPError as err:
+            if err.response.status_code == 404:
+                raise FineTuneModelNotFound()
+        return JobStatus(get_job["status"])

{vision_agent-0.2.110 → vision_agent-0.2.112}/vision_agent/tools/__init__.py RENAMED Viewed

@@ -1,6 +1,8 @@
 from typing import Callable, List, Optional
-from .meta_tools import META_TOOL_DOCSTRING, florencev2_fine_tuning
+from .meta_tools import (
+    META_TOOL_DOCSTRING,
+)
 from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
 from .tools import (
     TOOL_DESCRIPTIONS,
@@ -17,16 +19,20 @@ from .tools import (
     detr_segmentation,
     dpt_hybrid_midas,
     extract_frames,
-    florencev2_image_caption,
-    florencev2_object_detection,
-    florencev2_roberta_vqa,
-    florencev2_ocr,
+    florence2_image_caption,
+    florence2_object_detection,
+    florence2_ocr,
+    florence2_roberta_vqa,
+    florence2_sam2_image,
+    florence2_sam2_video,
     generate_pose_image,
     generate_soft_edge_image,
     get_tool_documentation,
     git_vqa_v2,
     grounding_dino,
     grounding_sam,
+    ixc25_image_vqa,
+    ixc25_video_vqa,
     load_image,
     loca_visual_prompt_counting,
     loca_zero_shot_counting,

{vision_agent-0.2.110 → vision_agent-0.2.112}/vision_agent/tools/meta_tools.py RENAMED Viewed

@@ -1,6 +1,5 @@
 import os
 import subprocess
-from uuid import UUID
 from pathlib import Path
 from typing import Any, Dict, List, Union
@@ -8,9 +7,6 @@ import vision_agent as va
 from vision_agent.lmm.types import Message
 from vision_agent.tools.tool_utils import get_tool_documentation
 from vision_agent.tools.tools import TOOL_DESCRIPTIONS
-from vision_agent.utils.image_utils import convert_to_b64
-from vision_agent.clients.landing_public_api import LandingPublicAPI
-from vision_agent.tools.meta_tools_types import BboxInput, BboxInputBase64, PromptTask
 # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
@@ -384,51 +380,11 @@ def edit_file(file_path: str, start: int, end: int, content: str) -> str:
 def get_tool_descriptions() -> str:
     """Returns a description of all the tools that `generate_vision_code` has access to.
-    Helpful for answerings questions about what types of vision tasks you can do with
+    Helpful for answering questions about what types of vision tasks you can do with
     `generate_vision_code`."""
     return TOOL_DESCRIPTIONS
-def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
-    """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
-    to detect objects in an image based on a given dataset. It returns the fine
-    tuning job id.
-    Parameters:
-        bboxes (List[BboxInput]): A list of BboxInput containing the
-            image path, labels and bounding boxes.
-        task (PromptTask): The florencev2 fine-tuning task. The options are
-            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
-    Returns:
-        UUID: The fine tuning job id, this id will used to retrieve the fine
-            tuned model.
-    Example
-    -------
-        >>> fine_tuning_job_id = florencev2_fine_tuning(
-            [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
-             {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
-             "OBJECT_DETECTION"
-        )
-    """
-    bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
-    task_input = PromptTask[task]
-    fine_tuning_request = [
-        BboxInputBase64(
-            image=convert_to_b64(bbox_input.image_path),
-            filename=bbox_input.image_path.split("/")[-1],
-            labels=bbox_input.labels,
-            bboxes=bbox_input.bboxes,
-        )
-        for bbox_input in bboxes_input
-    ]
-    landing_api = LandingPublicAPI()
-    return landing_api.launch_fine_tuning_job(
-        "florencev2", task_input, fine_tuning_request
-    )
 META_TOOL_DOCSTRING = get_tool_documentation(
     [
         get_tool_descriptions,
@@ -442,6 +398,5 @@ META_TOOL_DOCSTRING = get_tool_documentation(
         search_dir,
         search_file,
         find_file,
-        florencev2_fine_tuning,
     ]
 )

{vision_agent-0.2.110 → vision_agent-0.2.112}/vision_agent/tools/tool_utils.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import inspect
 import logging
 import os
-from typing import Any, Callable, Dict, List, MutableMapping, Optional
+from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple
 import pandas as pd
 from IPython.display import display
@@ -15,9 +15,10 @@ from vision_agent.utils.execute import Error, MimeType
 from vision_agent.utils.type_defs import LandingaiAPIKey
 _LOGGER = logging.getLogger(__name__)
-_LND_API_KEY = LandingaiAPIKey().api_key
-_LND_API_URL = "https://api.landing.ai/v1/agent/model"
-_LND_API_URL_v2 = "https://api.landing.ai/v1/tools"
+_LND_API_KEY = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
+_LND_BASE_URL = os.environ.get("LANDINGAI_URL", "https://api.landing.ai")
+_LND_API_URL = f"{_LND_BASE_URL}/v1/agent/model"
+_LND_API_URL_v2 = f"{_LND_BASE_URL}/v1/tools"
 class ToolCallTrace(BaseModel):
@@ -28,8 +29,14 @@ class ToolCallTrace(BaseModel):
 def send_inference_request(
-    payload: Dict[str, Any], endpoint_name: str, v2: bool = False
+    payload: Dict[str, Any],
+    endpoint_name: str,
+    files: Optional[List[Tuple[Any, ...]]] = None,
+    v2: bool = False,
+    metadata_payload: Optional[Dict[str, Any]] = None,
 ) -> Dict[str, Any]:
+    # TODO: runtime_tag and function_name should be metadata_payload and now included
+    # in the service payload
     try:
         if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
             payload["runtime_tag"] = runtime_tag
@@ -44,7 +51,7 @@ def send_inference_request(
             response={},
             error=None,
         )
-        headers = {"Content-Type": "application/json", "apikey": _LND_API_KEY}
+        headers = {"apikey": _LND_API_KEY}
         if "TOOL_ENDPOINT_AUTH" in os.environ:
             headers["Authorization"] = os.environ["TOOL_ENDPOINT_AUTH"]
             headers.pop("apikey")
@@ -54,7 +61,11 @@ def send_inference_request(
             num_retry=3,
             headers=headers,
         )
-        res = session.post(url, json=payload)
+        if files is not None:
+            res = session.post(url, data=payload, files=files)
+        else:
+            res = session.post(url, json=payload)
         if res.status_code != 200:
             tool_call_trace.error = Error(
                 name="RemoteToolCallFailed",
@@ -62,9 +73,13 @@ def send_inference_request(
                 traceback_raw=[],
             )
             _LOGGER.error(f"Request failed: {res.status_code} {res.text}")
-            raise RemoteToolCallFailed(
-                payload["function_name"], res.status_code, res.text
-            )
+            # TODO: function_name should be in metadata_payload
+            function_name = "unknown"
+            if "function_name" in payload:
+                function_name = payload["function_name"]
+            elif metadata_payload is not None and "function_name" in metadata_payload:
+                function_name = metadata_payload["function_name"]
+            raise RemoteToolCallFailed(function_name, res.status_code, res.text)
         resp = res.json()
         tool_call_trace.response = resp

vision-agent 0.2.110__tar.gz → 0.2.112__tar.gz

vision-agent 0.2.110tar.gz → 0.2.112tar.gz