PyPI - vision-agent - Versions diffs - 0.2.240__py3-none-any.whl → 0.2.242__py3-none-any.whl - Mend

vision-agent 0.2.240py3-none-any.whl → 0.2.242py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

vision_agent/agent/agent.py +3 -2
vision_agent/agent/vision_agent_coder_v2.py +6 -1
vision_agent/agent/vision_agent_planner_v2.py +27 -10
vision_agent/agent/vision_agent_prompts_v2.py +15 -3
vision_agent/agent/vision_agent_v2.py +25 -6
vision_agent/models/__init__.py +7 -1
vision_agent/models/agent_types.py +16 -1
vision_agent/tools/__init__.py +0 -2
vision_agent/tools/meta_tools.py +1 -124
vision_agent/tools/tools.py +15 -104
vision_agent/utils/agent.py +5 -4
vision_agent/utils/exceptions.py +0 -7
vision_agent/utils/video_tracking.py +8 -3
{vision_agent-0.2.240.dist-info → vision_agent-0.2.242.dist-info}/METADATA +1 -1
{vision_agent-0.2.240.dist-info → vision_agent-0.2.242.dist-info}/RECORD +17 -18
vision_agent/clients/landing_public_api.py +0 -38
{vision_agent-0.2.240.dist-info → vision_agent-0.2.242.dist-info}/LICENSE +0 -0
{vision_agent-0.2.240.dist-info → vision_agent-0.2.242.dist-info}/WHEEL +0 -0

vision_agent/agent/agent.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Optional, Union
 from vision_agent.models import (
     AgentMessage,
     CodeContext,
+    ErrorContext,
     InteractionContext,
     Message,
     PlanContext,
@@ -36,7 +37,7 @@ class AgentCoder(Agent):
         chat: List[AgentMessage],
         max_steps: Optional[int] = None,
         code_interpreter: Optional[CodeInterpreter] = None,
-    ) -> Union[CodeContext, InteractionContext]:
+    ) -> Union[CodeContext, InteractionContext, ErrorContext]:
         pass
     @abstractmethod
@@ -56,5 +57,5 @@ class AgentPlanner(Agent):
         chat: List[AgentMessage],
         max_steps: Optional[int] = None,
         code_interpreter: Optional[CodeInterpreter] = None,
-    ) -> Union[PlanContext, InteractionContext]:
+    ) -> Union[PlanContext, InteractionContext, ErrorContext]:
         pass

vision_agent/agent/vision_agent_coder_v2.py CHANGED Viewed

@@ -13,6 +13,7 @@ from vision_agent.lmm import LMM
 from vision_agent.models import (
     AgentMessage,
     CodeContext,
+    ErrorContext,
     InteractionContext,
     Message,
     PlanContext,
@@ -365,6 +366,8 @@ class VisionAgentCoderV2(AgentCoder):
         code_or_interaction = self.generate_code(input_msg)
         if isinstance(code_or_interaction, InteractionContext):
             return code_or_interaction.chat[-1].content
+        elif isinstance(code_or_interaction, ErrorContext):
+            return code_or_interaction.error
         return code_or_interaction.code
     def generate_code(
@@ -372,7 +375,7 @@ class VisionAgentCoderV2(AgentCoder):
         chat: List[AgentMessage],
         max_steps: Optional[int] = None,
         code_interpreter: Optional[CodeInterpreter] = None,
-    ) -> Union[CodeContext, InteractionContext]:
+    ) -> Union[CodeContext, InteractionContext, ErrorContext]:
         """Generate vision code from a conversation.
         Parameters:
@@ -404,6 +407,8 @@ class VisionAgentCoderV2(AgentCoder):
             # the planner needs an interaction, so return before generating code
             if isinstance(plan_context, InteractionContext):
                 return plan_context
+            elif isinstance(plan_context, ErrorContext):
+                return plan_context
             code_context = self.generate_code_from_plan(
                 orig_chat,

vision_agent/agent/vision_agent_planner_v2.py CHANGED Viewed

@@ -24,7 +24,13 @@ from vision_agent.agent.vision_agent_planner_prompts_v2 import (
 )
 from vision_agent.configs import Config
 from vision_agent.lmm import LMM
-from vision_agent.models import AgentMessage, InteractionContext, Message, PlanContext
+from vision_agent.models import (
+    AgentMessage,
+    ErrorContext,
+    InteractionContext,
+    Message,
+    PlanContext,
+)
 from vision_agent.tools.planner_tools import check_function_call
 from vision_agent.utils.agent import (
     add_media_to_chat,
@@ -322,7 +328,7 @@ def create_finalize_plan(
     model: LMM,
     chat: List[AgentMessage],
     verbose: bool = False,
-) -> Tuple[List[AgentMessage], PlanContext]:
+) -> Tuple[List[AgentMessage], Union[PlanContext, ErrorContext]]:
     # if we're in the middle of an interaction, don't finalize the plan
     if chat[-1].role == "interaction":
         return [], PlanContext(plan="", instructions=[], code="")
@@ -337,11 +343,19 @@ def create_finalize_plan(
     return_chat = [AgentMessage(role="planner", content=plan_str, media=None)]
     plan_json = extract_tag(plan_str, "json")
-    plan = (
-        extract_json(plan_json)
-        if plan_json is not None
-        else {"plan": plan_str, "instructions": [], "code": ""}
-    )
+    # sometimes the planner model will refuse to answer a question becuase of some
+    # safety concern, we then wont be able to parse the response so we have to send
+    # it back to the user/conversation agent
+    try:
+        plan = (
+            extract_json(plan_json)
+            if plan_json is not None
+            else {"plan": plan_str, "instructions": [], "code": ""}
+        )
+    except json.JSONDecodeError:
+        return return_chat, ErrorContext(error=plan_str)
     code_snippets = extract_tag(plan_str, "code")
     plan["code"] = code_snippets if code_snippets is not None else ""
     if verbose:
@@ -473,14 +487,17 @@ class VisionAgentPlannerV2(AgentPlanner):
         plan_or_interaction = self.generate_plan(input_msg)
         if isinstance(plan_or_interaction, InteractionContext):
             return plan_or_interaction.chat[-1].content
-        return plan_or_interaction.plan
+        elif isinstance(plan_or_interaction, PlanContext):
+            return plan_or_interaction.plan
+        else:
+            return plan_or_interaction.error
     def generate_plan(
         self,
         chat: List[AgentMessage],
         max_steps: Optional[int] = None,
         code_interpreter: Optional[CodeInterpreter] = None,
-    ) -> Union[PlanContext, InteractionContext]:
+    ) -> Union[PlanContext, InteractionContext, ErrorContext]:
         """Generate a plan to solve a vision task.
         Parameters:
@@ -571,7 +588,7 @@ class VisionAgentPlannerV2(AgentPlanner):
                 for chat_elt in updated_chat:
                     self.update_callback(chat_elt.model_dump())
-            context: Union[PlanContext, InteractionContext]
+            context: Union[PlanContext, InteractionContext, ErrorContext]
             if interaction:
                 context = InteractionContext(chat=int_chat)
             else:

vision_agent/agent/vision_agent_prompts_v2.py CHANGED Viewed

@@ -16,17 +16,29 @@ AGENT: <response>Yes, I can help you with that. I will write the code to detect
 OBSERVATION:
 <final_code>
 from vision_agent.tools import load_image, owl_v2_image
-def detect_dogs(image_path: str):
+def detect_dogs(image_path: str) -> int:
     image = load_image(image_path)
     dogs = owl_v2_image(image)
-    return dogs
+    return len(dogs)
 </final_code>
 <final_test>
 def test_detect_dogs():
     dogs = detect_dogs("images/dogs.jpg")
-    assert len(dogs) > 0
+    assert isinstance(dogs, int)
+    print(f"Number of dogs detected: {{dogs}}")
+    return dogs
 </final_test>
+OBSERVATION: ----- stdout -----
+Number of dogs detected: 8
+----- stderr -----
+----- Intermediate output-----
+None
+----- Final output -----
+8
 AGENT: <response>Here is the code to detect dogs in the image.</response>
 --- END EXAMPLE1 ---

vision_agent/agent/vision_agent_v2.py CHANGED Viewed

@@ -11,6 +11,7 @@ from vision_agent.lmm import LMM
 from vision_agent.models import (
     AgentMessage,
     CodeContext,
+    ErrorContext,
     InteractionContext,
     Message,
     PlanContext,
@@ -27,7 +28,9 @@ CONFIG = Config()
 def extract_conversation(
-    chat: List[AgentMessage], include_conv: bool = False
+    chat: List[AgentMessage],
+    include_conv: bool = False,
+    include_errors: bool = False,
 ) -> Tuple[List[AgentMessage], Optional[str]]:
     chat = copy.deepcopy(chat)
@@ -43,13 +46,18 @@ def extract_conversation(
         elif chat_i.role == "coder":
             if "<final_code>" in chat_i.content:
                 extracted_chat.append(chat_i)
+        elif chat_i.role == "final_observation":
+            extracted_chat.append(chat_i)
         elif include_conv and chat_i.role == "conversation":
             extracted_chat.append(chat_i)
+        elif include_errors and chat_i.role == "error_observation":
+            extracted_chat.append(chat_i)
-    # only keep the last <final_code> and <final_test>
+    # only keep the last <final_code>, <final_test>
     final_code = None
     extracted_chat_strip_code: List[AgentMessage] = []
-    for chat_i in reversed(extracted_chat):
+    for chat_i in reversed((extracted_chat)):
+        # don't check role here because user could send updated <final_code>
         if "<final_code>" in chat_i.content and final_code is None:
             extracted_chat_strip_code = [chat_i] + extracted_chat_strip_code
             final_code = extract_tag(chat_i.content, "final_code")
@@ -66,7 +74,12 @@ def extract_conversation(
 def run_conversation(agent: LMM, chat: List[AgentMessage]) -> str:
-    extracted_chat, _ = extract_conversation(chat, include_conv=True)
+    # Include conversation and error messages. The error messages can come from one of
+    # the agents refusing to write a correctly formatted message, want to inform the
+    # conversation agent of this.
+    extracted_chat, _ = extract_conversation(
+        chat, include_conv=True, include_errors=True
+    )
     conv = format_conversation(extracted_chat)
     prompt = CONVERSATION.format(
@@ -101,7 +114,9 @@ def maybe_run_action(
         if isinstance(context, CodeContext):
             return [
                 AgentMessage(role="coder", content=format_code_context(context)),
-                AgentMessage(role="observation", content=context.test_result.text()),
+                AgentMessage(
+                    role="final_observation", content=context.test_result.text()
+                ),
             ]
         elif isinstance(context, InteractionContext):
             return [
@@ -110,6 +125,10 @@ def maybe_run_action(
                     content=json.dumps([elt.model_dump() for elt in context.chat]),
                 )
             ]
+        elif isinstance(context, ErrorContext):
+            return [
+                AgentMessage(role="error_observation", content=context.error),
+            ]
     elif action == "edit_code":
         # We don't want to pass code in plan_context.code so the coder will generate
         # new code from plan_context.plan
@@ -129,7 +148,7 @@ def maybe_run_action(
         )
         return [
             AgentMessage(role="coder", content=format_code_context(context)),
-            AgentMessage(role="observation", content=context.test_result.text()),
+            AgentMessage(role="final_observation", content=context.test_result.text()),
         ]
     elif action == "view_image":
         pass

vision_agent/models/__init__.py CHANGED Viewed

@@ -1,4 +1,10 @@
-from .agent_types import AgentMessage, CodeContext, InteractionContext, PlanContext
+from .agent_types import (
+    AgentMessage,
+    CodeContext,
+    ErrorContext,
+    InteractionContext,
+    PlanContext,
+)
 from .lmm_types import Message, TextOrImage
 from .tools_types import (
     BboxInput,

vision_agent/models/agent_types.py CHANGED Viewed

@@ -29,11 +29,15 @@ class AgentMessage(BaseModel):
         Literal["user"],
         Literal["assistant"],  # planner, coder and conversation are of type assistant
         Literal["observation"],
+        Literal["final_observation"],  # the observation from the final code output
+        Literal["error_observation"],  # the observation from the error message
         Literal["interaction"],
         Literal["interaction_response"],
         Literal["conversation"],
         Literal["planner"],
-        Literal["planner_update"],
+        Literal[
+            "planner_update"
+        ],  # an intermediate update from the planner to show partial information
         Literal["coder"],
     ]
     content: str
@@ -75,3 +79,14 @@ class InteractionContext(BaseModel):
     """
     chat: List[AgentMessage]
+class ErrorContext(BaseModel):
+    """ErrorContext is a data model that represents an error message. These errors can
+    happen in the planning phase when a model does not output correctly formatted
+    messages (often because it considers some response to be a safety issue).
+    error: The error message.
+    """
+    error: str

vision_agent/tools/__init__.py CHANGED Viewed

@@ -7,9 +7,7 @@ from .meta_tools import (
     generate_vision_code,
     get_tool_descriptions,
     list_artifacts,
-    object_detection_fine_tuning,
     open_code_artifact,
-    use_object_detection_fine_tuning,
     view_media_artifact,
 )
 from .planner_tools import judge_od_results

vision_agent/tools/meta_tools.py CHANGED Viewed

@@ -11,11 +11,9 @@ import libcst as cst
 from IPython.display import display
 import vision_agent as va
-from vision_agent.clients.landing_public_api import LandingPublicAPI
-from vision_agent.models import BboxInput, BboxInputBase64, Message, PromptTask
+from vision_agent.models import Message
 from vision_agent.tools.tools import get_tools_descriptions as _get_tool_descriptions
 from vision_agent.utils.execute import Execution, MimeType
-from vision_agent.utils.image_utils import convert_to_b64
 from vision_agent.utils.tools_doc import get_tool_documentation
 CURRENT_FILE = None
@@ -573,48 +571,6 @@ def get_tool_descriptions() -> str:
     return _get_tool_descriptions()
-def object_detection_fine_tuning(bboxes: List[Dict[str, Any]]) -> str:
-    """DO NOT use this function unless the user has supplied you with bboxes.
-    'object_detection_fine_tuning' is a tool that fine-tunes object detection models to
-    be able to detect objects in an image based on a given dataset. It returns the fine
-    tuning job id.
-    Parameters:
-        bboxes (List[BboxInput]): A list of BboxInput containing the image path, labels
-            and bounding boxes. The coordinates are unnormalized.
-    Returns:
-        str: The fine tuning job id, this id will used to retrieve the fine tuned
-            model.
-    Example
-    -------
-        >>> fine_tuning_job_id = object_detection_fine_tuning(
-            [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
-             {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
-             "phrase_grounding"
-        )
-    """
-    task = "phrase_grounding"
-    bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
-    task_type = PromptTask[task.upper()]
-    fine_tuning_request = [
-        BboxInputBase64(
-            image=convert_to_b64(bbox_input.image_path),
-            filename=Path(bbox_input.image_path).name,
-            labels=bbox_input.labels,
-            bboxes=bbox_input.bboxes,
-        )
-        for bbox_input in bboxes_input
-    ]
-    landing_api = LandingPublicAPI()
-    fine_tune_id = str(
-        landing_api.launch_fine_tuning_job("florencev2", task_type, fine_tuning_request)
-    )
-    print(f"[Fine tuning id: {fine_tune_id}]")
-    return fine_tune_id
 def get_diff(before: str, after: str) -> str:
     return "".join(
         difflib.unified_diff(
@@ -721,83 +677,6 @@ def use_extra_vision_agent_args(
     return modified_tree.code
-def use_object_detection_fine_tuning(
-    artifacts: Artifacts, name: str, fine_tune_id: str
-) -> str:
-    """Replaces calls to 'owl_v2_image', 'florence2_phrase_detection' and
-    'florence2_sam2_image' with the fine tuning id. This ensures that the code utilizes
-    the fined tuned florence2 model. Returns the diff between the original code and the
-    new code.
-    Parameters:
-        artifacts (Artifacts): The artifacts object to edit the code from.
-        name (str): The name of the artifact to edit.
-        fine_tune_id (str): The fine tuning job id.
-    Examples
-    --------
-        >>> diff = use_object_detection_fine_tuning(artifacts, "code.py", "23b3b022-5ebf-4798-9373-20ef36429abf")
-    """
-    if name not in artifacts:
-        output_str = f"[Artifact {name} does not exist]"
-        print(output_str)
-        return output_str
-    code = artifacts[name]
-    patterns_with_fine_tune_id = [
-        (
-            r'florence2_phrase_grounding\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
-            lambda match: f'florence2_phrase_grounding("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
-        ),
-        (
-            r'florence2_phrase_grounding_video\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
-            lambda match: f'florence2_phrase_grounding_video("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
-        ),
-        (
-            r'owl_v2_image\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
-            lambda match: f'owl_v2_image("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
-        ),
-        (
-            r'florence2_sam2_image\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
-            lambda match: f'florence2_sam2_image("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
-        ),
-    ]
-    new_code = code
-    for (
-        pattern_with_fine_tune_id,
-        replacer_with_fine_tune_id,
-    ) in patterns_with_fine_tune_id:
-        if re.search(pattern_with_fine_tune_id, new_code):
-            new_code = re.sub(
-                pattern_with_fine_tune_id, replacer_with_fine_tune_id, new_code
-            )
-    if new_code == code:
-        output_str = (
-            f"[No function calls to replace with fine tuning id in artifact {name}]"
-        )
-        print(output_str)
-        return output_str
-    artifacts[name] = new_code
-    diff = get_diff_with_prompts(name, code, new_code)
-    print(diff)
-    display(
-        {
-            MimeType.APPLICATION_ARTIFACT: json.dumps(
-                {"name": name, "content": new_code, "action": "edit"}
-            )
-        },
-        raw=True,
-    )
-    return diff
 META_TOOL_DOCSTRING = get_tool_documentation(
     [
         get_tool_descriptions,
@@ -807,8 +686,6 @@ META_TOOL_DOCSTRING = get_tool_documentation(
         generate_vision_code,
         edit_vision_code,
         view_media_artifact,
-        object_detection_fine_tuning,
-        use_object_detection_fine_tuning,
         list_artifacts,
     ]
 )

vision_agent/tools/tools.py CHANGED Viewed

@@ -9,7 +9,6 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 from importlib import resources
 from pathlib import Path
 from typing import IO, Any, Callable, Dict, List, Optional, Tuple, Union, cast
-from uuid import UUID
 import cv2
 import numpy as np
@@ -20,10 +19,7 @@ from PIL import Image, ImageDraw, ImageFont
 from pillow_heif import register_heif_opener  # type: ignore
 from pytube import YouTube  # type: ignore
-from vision_agent.clients.landing_public_api import LandingPublicAPI
 from vision_agent.lmm.lmm import LMM, AnthropicLMM, OpenAILMM
-from vision_agent.models import JobStatus
-from vision_agent.utils.exceptions import FineTuneModelIsNotReady
 from vision_agent.utils.execute import FileSerializer, MimeType
 from vision_agent.utils.image_utils import (
     b64_to_pil,
@@ -239,7 +235,7 @@ def od_sam2_video_tracking(
     frames: List[np.ndarray],
     box_threshold: float = 0.30,
     chunk_length: Optional[int] = 50,
-    fine_tune_id: Optional[str] = None,
+    deployment_id: Optional[str] = None,
 ) -> Dict[str, Any]:
     chunk_length = 50 if chunk_length is None else chunk_length
     segment_size = chunk_length
@@ -262,7 +258,7 @@ def od_sam2_video_tracking(
         prompt: str,
         segment_index: int,
         frame_number: int,
-        fine_tune_id: str,
+        deployment_id: str,
         segment_frames: list,
     ) -> tuple:
         """
@@ -273,7 +269,7 @@ def od_sam2_video_tracking(
             prompt: The prompt for the object detection model.
             segment_index: The index of the current segment.
             frame_number: The number of the current frame.
-            fine_tune_id: Optional fine-tune ID for the model.
+            deployment_id: Optional The Model deployment ID.
             segment_frames: List of frames for the current segment.
         Returns:
@@ -293,7 +289,6 @@ def od_sam2_video_tracking(
                 prompt=prompt,
                 image=segment_frames[frame_number],
                 box_threshold=box_threshold,
-                fine_tune_id=fine_tune_id,
             )
             function_name = "owlv2_object_detection"
@@ -301,7 +296,6 @@ def od_sam2_video_tracking(
             segment_results = florence2_object_detection(
                 prompt=prompt,
                 image=segment_frames[frame_number],
-                fine_tune_id=fine_tune_id,
             )
             function_name = "florence2_object_detection"
@@ -309,13 +303,12 @@ def od_sam2_video_tracking(
             segment_results = agentic_object_detection(
                 prompt=prompt,
                 image=segment_frames[frame_number],
-                fine_tune_id=fine_tune_id,
             )
             function_name = "agentic_object_detection"
         elif od_model == ODModels.CUSTOM:
             segment_results = custom_object_detection(
-                deployment_id=fine_tune_id,
+                deployment_id=deployment_id,
                 image=segment_frames[frame_number],
                 box_threshold=box_threshold,
             )
@@ -337,7 +330,7 @@ def od_sam2_video_tracking(
                 segment_frames=segment,
                 od_model=od_model,
                 prompt=prompt,
-                fine_tune_id=fine_tune_id,
+                deployment_id=deployment_id,
                 chunk_length=chunk_length,
                 image_size=image_size,
                 segment_index=segment_index,
@@ -376,7 +369,6 @@ def _owlv2_object_detection(
     box_threshold: float,
     image_size: Tuple[int, ...],
     image_bytes: Optional[bytes] = None,
-    fine_tune_id: Optional[str] = None,
 ) -> Dict[str, Any]:
     if image_bytes is None:
         image_bytes = numpy_to_bytes(image)
@@ -389,21 +381,6 @@ def _owlv2_object_detection(
     }
     metadata = {"function_name": "owlv2_object_detection"}
-    if fine_tune_id is not None:
-        landing_api = LandingPublicAPI()
-        status = landing_api.check_fine_tuning_job(UUID(fine_tune_id))
-        if status is not JobStatus.SUCCEEDED:
-            raise FineTuneModelIsNotReady(
-                f"Fine-tuned model {fine_tune_id} is not ready yet"
-            )
-        # we can only execute fine-tuned models with florence2
-        payload = {
-            "prompts": payload["prompts"],
-            "jobId": fine_tune_id,
-            "model": "florence2",
-        }
     detections = send_task_inference_request(
         payload,
         "text-to-object-detection",
@@ -440,7 +417,6 @@ def owlv2_object_detection(
     prompt: str,
     image: np.ndarray,
     box_threshold: float = 0.10,
-    fine_tune_id: Optional[str] = None,
 ) -> List[Dict[str, Any]]:
     """'owlv2_object_detection' is a tool that can detect and count multiple objects
     given a text prompt such as category names or referring expressions on images. The
@@ -452,8 +428,6 @@ def owlv2_object_detection(
         image (np.ndarray): The image to ground the prompt to.
         box_threshold (float, optional): The threshold for the box detection. Defaults
             to 0.10.
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -475,9 +449,7 @@ def owlv2_object_detection(
     if image_size[0] < 1 or image_size[1] < 1:
         return []
-    ret = _owlv2_object_detection(
-        prompt, image, box_threshold, image_size, fine_tune_id=fine_tune_id
-    )
+    ret = _owlv2_object_detection(prompt, image, box_threshold, image_size)
     _display_tool_trace(
         owlv2_object_detection.__name__,
@@ -556,7 +528,6 @@ def owlv2_sam2_video_tracking(
     frames: List[np.ndarray],
     box_threshold: float = 0.10,
     chunk_length: Optional[int] = 25,
-    fine_tune_id: Optional[str] = None,
 ) -> List[List[Dict[str, Any]]]:
     """'owlv2_sam2_video_tracking' is a tool that can track and segment multiple
     objects in a video given a text prompt such as category names or referring
@@ -571,8 +542,6 @@ def owlv2_sam2_video_tracking(
             to 0.10.
         chunk_length (Optional[int]): The number of frames to re-run owlv2 to find
             new objects.
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[List[Dict[str, Any]]]: A list of list of dictionaries containing the
@@ -609,7 +578,6 @@ def owlv2_sam2_video_tracking(
         frames=frames,
         box_threshold=box_threshold,
         chunk_length=chunk_length,
-        fine_tune_id=fine_tune_id,
     )
     _display_tool_trace(
         owlv2_sam2_video_tracking.__name__,
@@ -624,7 +592,8 @@ def owlv2_sam2_video_tracking(
 def florence2_object_detection(
-    prompt: str, image: np.ndarray, fine_tune_id: Optional[str] = None
+    prompt: str,
+    image: np.ndarray,
 ) -> List[Dict[str, Any]]:
     """'florence2_object_detection' is a tool that can detect multiple objects given a
     text prompt which can be object names or caption. You can optionally separate the
@@ -635,8 +604,6 @@ def florence2_object_detection(
         prompt (str): The prompt to ground to the image. Use exclusive categories that
             do not overlap such as 'person, car' and NOT 'person, athlete'.
         image (np.ndarray): The image to used to detect objects
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -653,6 +620,7 @@ def florence2_object_detection(
             {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
         ]
     """
     image_size = image.shape[:2]
     if image_size[0] < 1 or image_size[1] < 1:
         return []
@@ -665,16 +633,6 @@ def florence2_object_detection(
     }
     metadata = {"function_name": "florence2_object_detection"}
-    if fine_tune_id is not None:
-        landing_api = LandingPublicAPI()
-        status = landing_api.check_fine_tuning_job(UUID(fine_tune_id))
-        if status is not JobStatus.SUCCEEDED:
-            raise FineTuneModelIsNotReady(
-                f"Fine-tuned model {fine_tune_id} is not ready yet"
-            )
-        payload["jobId"] = fine_tune_id
     detections = send_task_inference_request(
         payload,
         "text-to-object-detection",
@@ -703,7 +661,8 @@ def florence2_object_detection(
 def florence2_sam2_instance_segmentation(
-    prompt: str, image: np.ndarray, fine_tune_id: Optional[str] = None
+    prompt: str,
+    image: np.ndarray,
 ) -> List[Dict[str, Any]]:
     """'florence2_sam2_instance_segmentation' is a tool that can segment multiple
     objects given a text prompt such as category names or referring expressions. The
@@ -715,8 +674,6 @@ def florence2_sam2_instance_segmentation(
         prompt (str): The prompt to ground to the image. Use exclusive categories that
             do not overlap such as 'person, car' and NOT 'person, athlete'.
         image (np.ndarray): The image to ground the prompt to.
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label,
@@ -742,6 +699,7 @@ def florence2_sam2_instance_segmentation(
             },
         ]
     """
     if image.shape[0] < 1 or image.shape[1] < 1:
         return []
@@ -753,16 +711,6 @@ def florence2_sam2_instance_segmentation(
     }
     metadata = {"function_name": "florence2_sam2_instance_segmentation"}
-    if fine_tune_id is not None:
-        landing_api = LandingPublicAPI()
-        status = landing_api.check_fine_tuning_job(UUID(fine_tune_id))
-        if status is not JobStatus.SUCCEEDED:
-            raise FineTuneModelIsNotReady(
-                f"Fine-tuned model {fine_tune_id} is not ready yet"
-            )
-        payload["jobId"] = fine_tune_id
     detections = send_task_inference_request(
         payload,
         "text-to-instance-segmentation",
@@ -792,7 +740,6 @@ def florence2_sam2_video_tracking(
     prompt: str,
     frames: List[np.ndarray],
     chunk_length: Optional[int] = 25,
-    fine_tune_id: Optional[str] = None,
 ) -> List[List[Dict[str, Any]]]:
     """'florence2_sam2_video_tracking' is a tool that can track and segment multiple
     objects in a video given a text prompt such as category names or referring
@@ -806,8 +753,6 @@ def florence2_sam2_video_tracking(
         frames (List[np.ndarray]): The list of frames to ground the prompt to.
         chunk_length (Optional[int]): The number of frames to re-run florence2 to find
             new objects.
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[List[Dict[str, Any]]]: A list of list of dictionaries containing the
@@ -837,6 +782,7 @@ def florence2_sam2_video_tracking(
             ...
         ]
     """
     if len(frames) == 0 or not isinstance(frames, List):
         raise ValueError("Must provide a list of numpy arrays for frames")
@@ -851,16 +797,6 @@ def florence2_sam2_video_tracking(
     if chunk_length is not None:
         payload["chunk_length_frames"] = chunk_length  # type: ignore
-    if fine_tune_id is not None:
-        landing_api = LandingPublicAPI()
-        status = landing_api.check_fine_tuning_job(UUID(fine_tune_id))
-        if status is not JobStatus.SUCCEEDED:
-            raise FineTuneModelIsNotReady(
-                f"Fine-tuned model {fine_tune_id} is not ready yet"
-            )
-        payload["jobId"] = fine_tune_id
     detections = send_task_inference_request(
         payload,
         "text-to-instance-segmentation",
@@ -1397,7 +1333,7 @@ def custom_od_sam2_video_tracking(
         prompt="",
         frames=frames,
         chunk_length=chunk_length,
-        fine_tune_id=deployment_id,
+        deployment_id=deployment_id,
     )
     _display_tool_trace(
         custom_od_sam2_video_tracking.__name__,
@@ -1416,7 +1352,6 @@ def _agentic_object_detection(
     image: np.ndarray,
     image_size: Tuple[int, ...],
     image_bytes: Optional[bytes] = None,
-    fine_tune_id: Optional[str] = None,
 ) -> Dict[str, Any]:
     if image_bytes is None:
         image_bytes = numpy_to_bytes(image)
@@ -1428,21 +1363,6 @@ def _agentic_object_detection(
     }
     metadata = {"function_name": "agentic_object_detection"}
-    if fine_tune_id is not None:
-        landing_api = LandingPublicAPI()
-        status = landing_api.check_fine_tuning_job(UUID(fine_tune_id))
-        if status is not JobStatus.SUCCEEDED:
-            raise FineTuneModelIsNotReady(
-                f"Fine-tuned model {fine_tune_id} is not ready yet"
-            )
-        # we can only execute fine-tuned models with florence2
-        payload = {
-            "prompts": payload["prompts"],
-            "jobId": fine_tune_id,
-            "model": "florence2",
-        }
     detections = send_task_inference_request(
         payload,
         "text-to-object-detection",
@@ -1478,7 +1398,6 @@ def _agentic_object_detection(
 def agentic_object_detection(
     prompt: str,
     image: np.ndarray,
-    fine_tune_id: Optional[str] = None,
 ) -> List[Dict[str, Any]]:
     """'agentic_object_detection' is a tool that can detect multiple objects given a
     text prompt such as object names or referring expressions on images. It's
@@ -1490,8 +1409,6 @@ def agentic_object_detection(
         prompt (str): The prompt to ground to the image, only supports a single prompt
             with no commas or periods.
         image (np.ndarray): The image to ground the prompt to.
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -1513,9 +1430,7 @@ def agentic_object_detection(
     if image_size[0] < 1 or image_size[1] < 1:
         return []
-    ret = _agentic_object_detection(
-        prompt, image, image_size, fine_tune_id=fine_tune_id
-    )
+    ret = _agentic_object_detection(prompt, image, image_size)
     _display_tool_trace(
         agentic_object_detection.__name__,
@@ -1586,7 +1501,6 @@ def agentic_sam2_video_tracking(
     prompt: str,
     frames: List[np.ndarray],
     chunk_length: Optional[int] = 25,
-    fine_tune_id: Optional[str] = None,
 ) -> List[List[Dict[str, Any]]]:
     """'agentic_sam2_video_tracking' is a tool that can track and segment multiple
     objects in a video given a text prompt such as object names or referring
@@ -1601,8 +1515,6 @@ def agentic_sam2_video_tracking(
         frames (List[np.ndarray]): The list of frames to ground the prompt to.
         chunk_length (Optional[int]): The number of frames to re-run agentic object detection to
             to find new objects.
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[List[Dict[str, Any]]]: A list of list of dictionaries containing the
@@ -1638,7 +1550,6 @@ def agentic_sam2_video_tracking(
         prompt=prompt,
         frames=frames,
         chunk_length=chunk_length,
-        fine_tune_id=fine_tune_id,
     )
     _display_tool_trace(
         agentic_sam2_video_tracking.__name__,

vision_agent/utils/agent.py CHANGED Viewed

@@ -159,11 +159,12 @@ def format_conversation(chat: List[AgentMessage]) -> str:
     chat = copy.deepcopy(chat)
     prompt = ""
     for chat_i in chat:
-        if chat_i.role == "user" or chat_i.role == "coder":
-            if "<final_code>" in chat_i.content:
-                prompt += f"OBSERVATION: {chat_i.content}\n\n"
-            elif chat_i.role == "user":
+        # we want to print user messages, final code, final code observations or errors
+        if chat_i.role in ["user", "coder", "final_observation", "error_observation"]:
+            if chat_i.role == "user":
                 prompt += f"USER: {chat_i.content}\n\n"
+            else:
+                prompt += f"OBSERVATION: {chat_i.content}\n\n"
         elif chat_i.role == "conversation":
             prompt += f"AGENT: {chat_i.content}\n\n"
     return prompt

vision_agent/utils/exceptions.py CHANGED Viewed

@@ -51,13 +51,6 @@ class RemoteSandboxClosedError(RemoteSandboxError):
     is_retryable = True
-class FineTuneModelIsNotReady(Exception):
-    """Exception raised when the fine-tune model is not ready.
-    If this is raised, it's recommended to wait 5 seconds before trying to use
-    the model again.
-    """
 class FineTuneModelNotFound(Exception):
     """Exception raised when the fine-tune model is not found.
     If this is raised, it's recommended to try another model id.

vision_agent/utils/video_tracking.py CHANGED Viewed

@@ -54,7 +54,7 @@ def process_segment(
     segment_frames: List[np.ndarray],
     od_model: ODModels,
     prompt: str,
-    fine_tune_id: Optional[str],
+    deployment_id: Optional[str],
     chunk_length: Optional[int],
     image_size: Tuple[int, ...],
     segment_index: int,
@@ -67,7 +67,7 @@ def process_segment(
         segment_frames (List[np.ndarray]): Frames in the segment.
         od_model (ODModels): Object detection model to use.
         prompt (str): Prompt for the model.
-        fine_tune_id (Optional[str]): Fine-tune model ID.
+        deployment_id (Optional[str]): The model deployment ID.
         chunk_length (Optional[int]): Chunk length for processing.
         image_size (Tuple[int, int]): Size of the images.
         segment_index (int): Index of the segment.
@@ -90,7 +90,12 @@ def process_segment(
     for idx in range(0, len(segment_frames), step):
         frame_number = idx
         segment_results[idx], function_name = object_detection_tool(
-            od_model, prompt, segment_index, frame_number, fine_tune_id, segment_frames
+            deployment_id=deployment_id,
+            frame_number=frame_number,
+            od_model=od_model,
+            prompt=prompt,
+            segment_frames=segment_frames,
+            segment_index=segment_index,
         )
     transformed_detections = transform_detections(

{vision_agent-0.2.240.dist-info → vision_agent-0.2.242.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.240
+Version: 0.2.242
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.240.dist-info → vision_agent-0.2.242.dist-info}/RECORD RENAMED Viewed

@@ -3,22 +3,21 @@ vision_agent/.sim_tools/embs.npy,sha256=pi7h3NHlrKncIGNR-oPn_XoTe2PzBb9-aFMi7qK0
 vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
 vision_agent/agent/README.md,sha256=Q4w7FWw38qaWosQYAZ7NqWx8Q5XzuWrlv7nLhjUd1-8,5527
 vision_agent/agent/__init__.py,sha256=M8CffavdIh8Zh-skznLHIaQkYGCGK7vk4dq1FaVkbs4,617
-vision_agent/agent/agent.py,sha256=RoS7kMfXYILv0zuPpcxqQIlaHGa3K-qw_5EwgsEJTPQ,1530
+vision_agent/agent/agent.py,sha256=o1Zuhl6h2R7uVwvUur0Aj38kak8U08plfeFWPst_ErM,1576
 vision_agent/agent/vision_agent.py,sha256=4LqvwPTSsiuJEDwBbMx9Dg9ALJwNR6x1c63TZvOMm8A,23486
 vision_agent/agent/vision_agent_coder.py,sha256=Ry6AiyAj3hsSeYPu_5guMcTzf2E4SoebPzpHyJtSPbQ,27360
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=D4RJxTWoxpl-WtYRvHNxaLSdWVHsdYb0jJIQ2ZCGU0A,12277
 vision_agent/agent/vision_agent_coder_prompts_v2.py,sha256=53b_DhQtffX5wxLuCbNQ83AJhB0P_3wEnuKr-v5bx-o,4866
-vision_agent/agent/vision_agent_coder_v2.py,sha256=bWAUyk7-lYKwLIjkL_wUTeYv06zIIroJE1yIDRaGUHw,17059
+vision_agent/agent/vision_agent_coder_v2.py,sha256=I4gWrneFIqhX6W-MxiaNyPKGk5tRKgC8xryV-YdeSZU,17289
 vision_agent/agent/vision_agent_planner.py,sha256=rp_atRMDg35WFXNKOTkjUpGPrpSCsiMhcfZtqK-DIV4,18668
 vision_agent/agent/vision_agent_planner_prompts.py,sha256=rYRdJthc-sQN57VgCBKrF09Sd73BSxcBdjNe6C4WNZ8,6837
 vision_agent/agent/vision_agent_planner_prompts_v2.py,sha256=TiiF5BGnFVraFlQnDaeRU67927LvszvpcMUOgVgo0ps,35843
-vision_agent/agent/vision_agent_planner_v2.py,sha256=IqEP5ded5P4ESkLEur81gUvJtTmTdlKAx9uQyLyIwPc,21212
+vision_agent/agent/vision_agent_planner_v2.py,sha256=GOhaTsVCh02X09IKkC4k9z79lsmU4VgRW7WJLKjdG1k,21755
 vision_agent/agent/vision_agent_prompts.py,sha256=KaJwYPUP7_GvQsCPPs6Fdawmi3AQWmWajBUuzj7gTG4,13812
-vision_agent/agent/vision_agent_prompts_v2.py,sha256=Wyxa15NOe75PefAfw3_RRwvgjg8YVqCrU7WvvWoYJpk,2733
-vision_agent/agent/vision_agent_v2.py,sha256=O070_QdgsqNzex5eRtye8QmJgCtHcf_B7zeteWpw3LM,10895
+vision_agent/agent/vision_agent_prompts_v2.py,sha256=jTfu_heNTBaHj1UNI0XIyyFDgDOjPTPP83vrS-g3A1U,2961
+vision_agent/agent/vision_agent_v2.py,sha256=QPAyDjnRRHUCD4Pw4TQYffWkucbn4WkEjYn8dBIWll4,11682
 vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
-vision_agent/clients/landing_public_api.py,sha256=Vz9lldtNbaJRWzT7T8-uQrC-dMnt47LIsDrxHgoVdEw,1492
 vision_agent/configs/__init__.py,sha256=Iu75-w9_nlPmnB_qKA7nYaaaHf7xtTrDmK8N4v2WV34,27
 vision_agent/configs/anthropic_config.py,sha256=T1UuESgiY8913A6wA42P7-cg8FTk9-LkJpyywo7OnIQ,4298
 vision_agent/configs/anthropic_openai_config.py,sha256=rUz5zca4Pn5dTUwJXiJzRDYua5PWizApCKI3y0zOvhc,4699
@@ -28,28 +27,28 @@ vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
 vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
 vision_agent/lmm/__init__.py,sha256=4qX2lmGnKWHeKftXueEi9xj_ieK2nQh_ipHf72nKGFk,84
 vision_agent/lmm/lmm.py,sha256=XYp1frrqQ-6q-0y2IWwM8-EIH5UrFZ21SAhkcM32J9w,19355
-vision_agent/models/__init__.py,sha256=qAdygB-0EsmxMHNzYTPNM6tAF8Fym95gm9bsHJafdgE,287
-vision_agent/models/agent_types.py,sha256=dIdxATH_PP76pD5Wfo0oofWt6iPQh0vpf48QbEQSzhs,2472
+vision_agent/models/__init__.py,sha256=eIP0pD5dYog8zUA7uuTmUxCF6SIutbLRLRE0cmuCJgQ,326
+vision_agent/models/agent_types.py,sha256=vBZ9-ns5lHDdFMO7ulCGGeZ6OwRo3gK4O3vN0814IWc,3064
 vision_agent/models/lmm_types.py,sha256=v04h-NjbczHOIN8UWa1vvO5-1BDuZ4JQhD2mge1cXmw,305
 vision_agent/models/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
 vision_agent/sim/__init__.py,sha256=Aouz6HEPPTYcLxR5_0fTYCL1OvPKAH1RMWAF90QXAlA,135
 vision_agent/sim/sim.py,sha256=VSU_1rYd4ifvF45xKWBEYugxdeeEQVpj0QL6rjx49i4,9801
-vision_agent/tools/__init__.py,sha256=T-MPNBVbvWtfo71hobaZsdYzQ52oyymolk_OAb2Pq_g,2463
-vision_agent/tools/meta_tools.py,sha256=-heMwGkx0hX_9zUp1dgBqsJpVnl6Y6tErMsjFy0dwLM,28652
+vision_agent/tools/__init__.py,sha256=bYrOPuqrpwFA3TeY_pxRXVv61oJsxVWVgv1psJlBEcc,2391
+vision_agent/tools/meta_tools.py,sha256=DNRXHX9nZ1GBeqeLiq87sBshoe0aiZeYasETbG-9neI,24053
 vision_agent/tools/planner_tools.py,sha256=orBTdJQz2NKoLuX9WE6XixaYuG305xz0UBYvZOiuquQ,19474
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
-vision_agent/tools/tools.py,sha256=8J-SYpyUeqMDajF7kp2aiTeBBQrJEWGVdEsQLPAc-OM,111511
+vision_agent/tools/tools.py,sha256=uhvgPeAzhOV2vfBa216vq-JVItqgzIRKs1JMBezj2Es,107631
 vision_agent/utils/__init__.py,sha256=mANUs_84VL-3gpZbXryvV2mWU623eWnRlJCSUHtMjuw,122
-vision_agent/utils/agent.py,sha256=QGKcbzpAjcVj0958bXYLv07-d2i1GU7-bXVG7bTGRMA,14619
-vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
+vision_agent/utils/agent.py,sha256=8z4Ei0q397lVWUga8v9nQKuenGAsh2wfkAKQOB8CwpI,14701
+vision_agent/utils/exceptions.py,sha256=zis8smCbdEylBVZBTVfEUfAh7Rb7cWV3MSPambu6FsQ,1837
 vision_agent/utils/execute.py,sha256=vOEP5Ys7S2lc0_7pOJbgk7OaWi85hrCNu9_8Bo3zk6I,29356
 vision_agent/utils/image_utils.py,sha256=bJM2mEvB6E__M9pxi74yQYzAiZ7mu3KE2ptyVrp5vzQ,12533
 vision_agent/utils/tools.py,sha256=USZL0MKsiJgqA8RFiYRTcj_Kn2FVYKLHK4wIk0gP1Ow,7694
 vision_agent/utils/tools_doc.py,sha256=yFue6KSXoa_Z1ngCdBEc4SdPZOWF1rVLeaHu02I8Wis,2523
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=rjsQ1sKKisaQ6AVjJz0zd_G4g-ovRweS_rs4JEhenoI,5340
-vision_agent/utils/video_tracking.py,sha256=GM9qfeawqhmZVWoKrzw5-NETd4gEo7ImMfWtBnhC3bw,12086
-vision_agent-0.2.240.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.240.dist-info/METADATA,sha256=l9FlzNIT3ncQNxkIlTTUsB1aaL-7u2b1OtvYcRv0AIE,5712
-vision_agent-0.2.240.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.240.dist-info/RECORD,,
+vision_agent/utils/video_tracking.py,sha256=eMIiWOG24bgXbqOy1DTtepO2gPo1ClW6Y0tdbEF_14k,12227
+vision_agent-0.2.242.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.242.dist-info/METADATA,sha256=Lvr9OdngkgZJd-ifod6Wp8FuX0BnAmR6fZIelqAmjz8,5712
+vision_agent-0.2.242.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.242.dist-info/RECORD,,

vision_agent/clients/landing_public_api.py DELETED Viewed

@@ -1,38 +0,0 @@
-import os
-from typing import List
-from uuid import UUID
-from requests.exceptions import HTTPError
-from vision_agent.clients.http import BaseHTTP
-from vision_agent.models import BboxInputBase64, JobStatus, PromptTask
-from vision_agent.utils.exceptions import FineTuneModelNotFound
-from vision_agent.utils.type_defs import LandingaiAPIKey
-class LandingPublicAPI(BaseHTTP):
-    def __init__(self) -> None:
-        landing_url = os.environ.get("LANDINGAI_URL", "https://api.landing.ai")
-        landing_api_key = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
-        headers = {"Content-Type": "application/json", "apikey": landing_api_key}
-        super().__init__(base_endpoint=landing_url, headers=headers)
-    def launch_fine_tuning_job(
-        self, model_name: str, task: PromptTask, bboxes: List[BboxInputBase64]
-    ) -> UUID:
-        url = "v1/agent/jobs/fine-tuning"
-        data = {
-            "model": {"name": model_name, "task": task.value},
-            "bboxes": [bbox.model_dump(by_alias=True) for bbox in bboxes],
-        }
-        response = self.post(url, payload=data)
-        return UUID(response["jobId"])
-    def check_fine_tuning_job(self, job_id: UUID) -> JobStatus:
-        url = f"v1/agent/jobs/fine-tuning/{job_id}/status"
-        try:
-            get_job = self.get(url)
-        except HTTPError as err:
-            if err.response.status_code == 404:
-                raise FineTuneModelNotFound()
-        return JobStatus(get_job["status"])

{vision_agent-0.2.240.dist-info → vision_agent-0.2.242.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.240.dist-info → vision_agent-0.2.242.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.240__py3-none-any.whl → 0.2.242__py3-none-any.whl

vision-agent 0.2.240py3-none-any.whl → 0.2.242py3-none-any.whl