PyPI - vision-agent - Versions diffs - 0.2.144__py3-none-any.whl → 0.2.146__py3-none-any.whl - Mend

vision-agent 0.2.144py3-none-any.whl → 0.2.146py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -30,12 +30,6 @@ WORKSPACE.mkdir(parents=True, exist_ok=True)
 if str(WORKSPACE) != "":
     os.environ["PYTHONPATH"] = f"{WORKSPACE}:{os.getenv('PYTHONPATH', '')}"
-STUCK_IN_LOOP_ERROR_MESSAGE = {
-    "name": "Error when running conversation agent",
-    "value": "Agent is stuck in conversation loop, exited",
-    "traceback_raw": [],
-}
 class BoilerplateCode:
     pre_code = [
@@ -298,13 +292,6 @@ class VisionAgent(Agent):
                 # sometimes it gets stuck in a loop, so we force it to exit
                 if last_response == response:
                     response["let_user_respond"] = True
-                    self.streaming_message(
-                        {
-                            "role": "assistant",
-                            "content": "{}",
-                            "error": STUCK_IN_LOOP_ERROR_MESSAGE,
-                        }
-                    )
                 finished = response["let_user_respond"]
@@ -317,7 +304,11 @@ class VisionAgent(Agent):
                         {
                             "role": "assistant",
                             "content": "{}",
-                            "error": STUCK_IN_LOOP_ERROR_MESSAGE,
+                            "error": {
+                                "name": "Error when running conversation agent",
+                                "value": "Agent is stuck in conversation loop, exited",
+                                "traceback_raw": [],
+                            },
                             "finished": finished and code_action is None,
                         }
                     )

vision_agent/tools/tools.py CHANGED Viewed

@@ -1181,7 +1181,12 @@ def florence2_phrase_grounding(
             fine_tuning=FineTuning(job_id=UUID(fine_tune_id)),
         )
         data = data_obj.model_dump(by_alias=True)
-        detections = send_inference_request(data, "tools", v2=False)
+        detections = send_inference_request(
+            data,
+            "tools",
+            v2=False,
+            metadata_payload={"function_name": "florence2_phrase_grounding"},
+        )
     else:
         data = {
             "image": image_b64,
@@ -1754,14 +1759,17 @@ def _save_video_to_result(video_uri: str) -> None:
 def overlay_bounding_boxes(
-    image: np.ndarray, bboxes: List[Dict[str, Any]]
-) -> np.ndarray:
+    medias: Union[np.ndarray, List[np.ndarray]],
+    bboxes: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]],
+) -> Union[np.ndarray, List[np.ndarray]]:
     """'overlay_bounding_boxes' is a utility function that displays bounding boxes on
     an image.
     Parameters:
-        image (np.ndarray): The image to display the bounding boxes on.
-        bboxes (List[Dict[str, Any]]): A list of dictionaries containing the bounding
+        medias (Union[np.ndarray, List[np.ndarra]]): The image or frames to display the
+            bounding boxes on.
+        bboxes (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
+            dictionaries or a list of list of dictionaries containing the bounding
             boxes.
     Returns:
@@ -1773,41 +1781,54 @@ def overlay_bounding_boxes(
             image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
         )
     """
-    pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
-    if len(set([box["label"] for box in bboxes])) > len(COLORS):
+    medias_int: List[np.ndarray] = (
+        [medias] if isinstance(medias, np.ndarray) else medias
+    )
+    bbox_int = [bboxes] if isinstance(bboxes[0], dict) else bboxes
+    bbox_int = cast(List[List[Dict[str, Any]]], bbox_int)
+    labels = set([bb["label"] for b in bbox_int for bb in b])
+    if len(labels) > len(COLORS):
         _LOGGER.warning(
             "Number of unique labels exceeds the number of available colors. Some labels may have the same color."
         )
-    color = {
-        label: COLORS[i % len(COLORS)]
-        for i, label in enumerate(set([box["label"] for box in bboxes]))
-    }
-    bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
+    color = {label: COLORS[i % len(COLORS)] for i, label in enumerate(labels)}
-    width, height = pil_image.size
-    fontsize = max(12, int(min(width, height) / 40))
-    draw = ImageDraw.Draw(pil_image)
-    font = ImageFont.truetype(
-        str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),
-        fontsize,
-    )
+    frame_out = []
+    for i, frame in enumerate(medias_int):
+        pil_image = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
-    for elt in bboxes:
-        label = elt["label"]
-        box = elt["bbox"]
-        scores = elt["score"]
+        bboxes = bbox_int[i]
+        bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
-        # denormalize the box if it is normalized
-        box = denormalize_bbox(box, (height, width))
+        width, height = pil_image.size
+        fontsize = max(12, int(min(width, height) / 40))
+        draw = ImageDraw.Draw(pil_image)
+        font = ImageFont.truetype(
+            str(
+                resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")
+            ),
+            fontsize,
+        )
-        draw.rectangle(box, outline=color[label], width=4)
-        text = f"{label}: {scores:.2f}"
-        text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
-        draw.rectangle((box[0], box[1], text_box[2], text_box[3]), fill=color[label])
-        draw.text((box[0], box[1]), text, fill="black", font=font)
-    return np.array(pil_image)
+        for elt in bboxes:
+            label = elt["label"]
+            box = elt["bbox"]
+            scores = elt["score"]
+            # denormalize the box if it is normalized
+            box = denormalize_bbox(box, (height, width))
+            draw.rectangle(box, outline=color[label], width=4)
+            text = f"{label}: {scores:.2f}"
+            text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
+            draw.rectangle(
+                (box[0], box[1], text_box[2], text_box[3]), fill=color[label]
+            )
+            draw.text((box[0], box[1]), text, fill="black", font=font)
+        frame_out.append(np.array(pil_image))
+    return frame_out[0] if len(frame_out) == 1 else frame_out
 def _get_text_coords_from_mask(
@@ -1847,7 +1868,8 @@ def overlay_segmentation_masks(
         medias (Union[np.ndarray, List[np.ndarray]]): The image or frames to display
             the masks on.
         masks (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
-            dictionaries containing the masks, labels and scores.
+            dictionaries or a list of list of dictionaries containing the masks, labels
+            and scores.
         draw_label (bool, optional): If True, the labels will be displayed on the image.
         secondary_label_key (str, optional): The key to use for the secondary
             tracking label which is needed in videos to display tracking information.

{vision_agent-0.2.144.dist-info → vision_agent-0.2.146.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.144
+Version: 0.2.146
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.144.dist-info → vision_agent-0.2.146.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
 vision_agent/agent/__init__.py,sha256=NF2LABqHixLvbsOIO-fe-VKZ7awvShLtcT0oQT4eWtI,235
 vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
 vision_agent/agent/agent_utils.py,sha256=PEUHqvnHmFL4np_TeFmKMwr5s_dWfdfJz6TF_ogd1dU,2353
-vision_agent/agent/vision_agent.py,sha256=WW0vtu8EFp7sFmU8z5_GDEduMOh9e0y4R3ZDiFDYJmM,17812
+vision_agent/agent/vision_agent.py,sha256=Fp2uSbroRzGrxEwbb9srGdl0h31awkzDFm2tTfn28GI,17587
 vision_agent/agent/vision_agent_coder.py,sha256=4bbebV1sKE10vsxcZR-R8P54X2HjLeU9lDt7ylIZAT4,38429
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=YWK4C--YRS1Kuab11Gn-AXBzar1j_GNnTnxi_nnaPRY,14901
 vision_agent/agent/vision_agent_prompts.py,sha256=e_ASPeRFU1yZsQhCkK_bIBG-eyIWyWXmN64lFk-r7e0,10897
@@ -18,7 +18,7 @@ vision_agent/tools/__init__.py,sha256=zUv3aVPN1MXfyQiQi5To4rkQGtG7mxLQ1NjLI3pxM8
 vision_agent/tools/meta_tools.py,sha256=iHvMeBktWcVi-0DOrSMak1gsZrM_VKJlAq1mAFbBemE,23477
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=5ukuDMxbEH4iKetYR9I7twzsA8ECyP4tVwYXQq54mxI,8020
-vision_agent/tools/tools.py,sha256=dD_8AmAQb0oKVZHg2w2kSKlvWrG9yaKRbaHTz_kHgjA,73648
+vision_agent/tools/tools.py,sha256=c7SjtZD7YfxhEAGYYe-ExVCBA4NDXmRwerBIbd-XEH8,74557
 vision_agent/tools/tools_types.py,sha256=JUOZWGW2q-dlJ85CHr9gvo9KQk_rXyjJhi-iwPNn4eM,2397
 vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
 vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
 vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
-vision_agent-0.2.144.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.144.dist-info/METADATA,sha256=HcZyYla50SBGHFDstUNElj7524PT64XT5a6_VQV_y6E,13758
-vision_agent-0.2.144.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.144.dist-info/RECORD,,
+vision_agent-0.2.146.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.146.dist-info/METADATA,sha256=0xtIEPRJgc8ifV8z9C6OoLyi9w7wqQV7mpUmrWsLSVM,13758
+vision_agent-0.2.146.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.146.dist-info/RECORD,,

{vision_agent-0.2.144.dist-info → vision_agent-0.2.146.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.144.dist-info → vision_agent-0.2.146.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.144__py3-none-any.whl → 0.2.146__py3-none-any.whl

vision-agent 0.2.144py3-none-any.whl → 0.2.146py3-none-any.whl