PyPI - vision-agent - Versions diffs - 0.2.144__tar.gz → 0.2.146__tar.gz - Mend

vision-agent 0.2.144tar.gz → 0.2.146tar.gz

Files changed (33) hide show

{vision_agent-0.2.144 → vision_agent-0.2.146}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.144
+Version: 0.2.146
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.144 → vision_agent-0.2.146}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.144"
+version = "0.2.146"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"

{vision_agent-0.2.144 → vision_agent-0.2.146}/vision_agent/agent/vision_agent.py RENAMED Viewed

@@ -30,12 +30,6 @@ WORKSPACE.mkdir(parents=True, exist_ok=True)
 if str(WORKSPACE) != "":
     os.environ["PYTHONPATH"] = f"{WORKSPACE}:{os.getenv('PYTHONPATH', '')}"
-STUCK_IN_LOOP_ERROR_MESSAGE = {
-    "name": "Error when running conversation agent",
-    "value": "Agent is stuck in conversation loop, exited",
-    "traceback_raw": [],
-}
 class BoilerplateCode:
     pre_code = [
@@ -298,13 +292,6 @@ class VisionAgent(Agent):
                 # sometimes it gets stuck in a loop, so we force it to exit
                 if last_response == response:
                     response["let_user_respond"] = True
-                    self.streaming_message(
-                        {
-                            "role": "assistant",
-                            "content": "{}",
-                            "error": STUCK_IN_LOOP_ERROR_MESSAGE,
-                        }
-                    )
                 finished = response["let_user_respond"]
@@ -317,7 +304,11 @@ class VisionAgent(Agent):
                         {
                             "role": "assistant",
                             "content": "{}",
-                            "error": STUCK_IN_LOOP_ERROR_MESSAGE,
+                            "error": {
+                                "name": "Error when running conversation agent",
+                                "value": "Agent is stuck in conversation loop, exited",
+                                "traceback_raw": [],
+                            },
                             "finished": finished and code_action is None,
                         }
                     )

{vision_agent-0.2.144 → vision_agent-0.2.146}/vision_agent/tools/tools.py RENAMED Viewed

@@ -1181,7 +1181,12 @@ def florence2_phrase_grounding(
             fine_tuning=FineTuning(job_id=UUID(fine_tune_id)),
         )
         data = data_obj.model_dump(by_alias=True)
-        detections = send_inference_request(data, "tools", v2=False)
+        detections = send_inference_request(
+            data,
+            "tools",
+            v2=False,
+            metadata_payload={"function_name": "florence2_phrase_grounding"},
+        )
     else:
         data = {
             "image": image_b64,
@@ -1754,14 +1759,17 @@ def _save_video_to_result(video_uri: str) -> None:
 def overlay_bounding_boxes(
-    image: np.ndarray, bboxes: List[Dict[str, Any]]
-) -> np.ndarray:
+    medias: Union[np.ndarray, List[np.ndarray]],
+    bboxes: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]],
+) -> Union[np.ndarray, List[np.ndarray]]:
     """'overlay_bounding_boxes' is a utility function that displays bounding boxes on
     an image.
     Parameters:
-        image (np.ndarray): The image to display the bounding boxes on.
-        bboxes (List[Dict[str, Any]]): A list of dictionaries containing the bounding
+        medias (Union[np.ndarray, List[np.ndarra]]): The image or frames to display the
+            bounding boxes on.
+        bboxes (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
+            dictionaries or a list of list of dictionaries containing the bounding
             boxes.
     Returns:
@@ -1773,41 +1781,54 @@ def overlay_bounding_boxes(
             image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
         )
     """
-    pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
-    if len(set([box["label"] for box in bboxes])) > len(COLORS):
+    medias_int: List[np.ndarray] = (
+        [medias] if isinstance(medias, np.ndarray) else medias
+    )
+    bbox_int = [bboxes] if isinstance(bboxes[0], dict) else bboxes
+    bbox_int = cast(List[List[Dict[str, Any]]], bbox_int)
+    labels = set([bb["label"] for b in bbox_int for bb in b])
+    if len(labels) > len(COLORS):
         _LOGGER.warning(
             "Number of unique labels exceeds the number of available colors. Some labels may have the same color."
         )
-    color = {
-        label: COLORS[i % len(COLORS)]
-        for i, label in enumerate(set([box["label"] for box in bboxes]))
-    }
-    bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
+    color = {label: COLORS[i % len(COLORS)] for i, label in enumerate(labels)}
-    width, height = pil_image.size
-    fontsize = max(12, int(min(width, height) / 40))
-    draw = ImageDraw.Draw(pil_image)
-    font = ImageFont.truetype(
-        str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),
-        fontsize,
-    )
+    frame_out = []
+    for i, frame in enumerate(medias_int):
+        pil_image = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
-    for elt in bboxes:
-        label = elt["label"]
-        box = elt["bbox"]
-        scores = elt["score"]
+        bboxes = bbox_int[i]
+        bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
-        # denormalize the box if it is normalized
-        box = denormalize_bbox(box, (height, width))
+        width, height = pil_image.size
+        fontsize = max(12, int(min(width, height) / 40))
+        draw = ImageDraw.Draw(pil_image)
+        font = ImageFont.truetype(
+            str(
+                resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")
+            ),
+            fontsize,
+        )
-        draw.rectangle(box, outline=color[label], width=4)
-        text = f"{label}: {scores:.2f}"
-        text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
-        draw.rectangle((box[0], box[1], text_box[2], text_box[3]), fill=color[label])
-        draw.text((box[0], box[1]), text, fill="black", font=font)
-    return np.array(pil_image)
+        for elt in bboxes:
+            label = elt["label"]
+            box = elt["bbox"]
+            scores = elt["score"]
+            # denormalize the box if it is normalized
+            box = denormalize_bbox(box, (height, width))
+            draw.rectangle(box, outline=color[label], width=4)
+            text = f"{label}: {scores:.2f}"
+            text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
+            draw.rectangle(
+                (box[0], box[1], text_box[2], text_box[3]), fill=color[label]
+            )
+            draw.text((box[0], box[1]), text, fill="black", font=font)
+        frame_out.append(np.array(pil_image))
+    return frame_out[0] if len(frame_out) == 1 else frame_out
 def _get_text_coords_from_mask(
@@ -1847,7 +1868,8 @@ def overlay_segmentation_masks(
         medias (Union[np.ndarray, List[np.ndarray]]): The image or frames to display
             the masks on.
         masks (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
-            dictionaries containing the masks, labels and scores.
+            dictionaries or a list of list of dictionaries containing the masks, labels
+            and scores.
         draw_label (bool, optional): If True, the labels will be displayed on the image.
         secondary_label_key (str, optional): The key to use for the secondary
             tracking label which is needed in videos to display tracking information.