vision-agent 0.2.144__py3-none-any.whl → 0.2.146__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,12 +30,6 @@ WORKSPACE.mkdir(parents=True, exist_ok=True)
30
30
  if str(WORKSPACE) != "":
31
31
  os.environ["PYTHONPATH"] = f"{WORKSPACE}:{os.getenv('PYTHONPATH', '')}"
32
32
 
33
- STUCK_IN_LOOP_ERROR_MESSAGE = {
34
- "name": "Error when running conversation agent",
35
- "value": "Agent is stuck in conversation loop, exited",
36
- "traceback_raw": [],
37
- }
38
-
39
33
 
40
34
  class BoilerplateCode:
41
35
  pre_code = [
@@ -298,13 +292,6 @@ class VisionAgent(Agent):
298
292
  # sometimes it gets stuck in a loop, so we force it to exit
299
293
  if last_response == response:
300
294
  response["let_user_respond"] = True
301
- self.streaming_message(
302
- {
303
- "role": "assistant",
304
- "content": "{}",
305
- "error": STUCK_IN_LOOP_ERROR_MESSAGE,
306
- }
307
- )
308
295
 
309
296
  finished = response["let_user_respond"]
310
297
 
@@ -317,7 +304,11 @@ class VisionAgent(Agent):
317
304
  {
318
305
  "role": "assistant",
319
306
  "content": "{}",
320
- "error": STUCK_IN_LOOP_ERROR_MESSAGE,
307
+ "error": {
308
+ "name": "Error when running conversation agent",
309
+ "value": "Agent is stuck in conversation loop, exited",
310
+ "traceback_raw": [],
311
+ },
321
312
  "finished": finished and code_action is None,
322
313
  }
323
314
  )
@@ -1181,7 +1181,12 @@ def florence2_phrase_grounding(
1181
1181
  fine_tuning=FineTuning(job_id=UUID(fine_tune_id)),
1182
1182
  )
1183
1183
  data = data_obj.model_dump(by_alias=True)
1184
- detections = send_inference_request(data, "tools", v2=False)
1184
+ detections = send_inference_request(
1185
+ data,
1186
+ "tools",
1187
+ v2=False,
1188
+ metadata_payload={"function_name": "florence2_phrase_grounding"},
1189
+ )
1185
1190
  else:
1186
1191
  data = {
1187
1192
  "image": image_b64,
@@ -1754,14 +1759,17 @@ def _save_video_to_result(video_uri: str) -> None:
1754
1759
 
1755
1760
 
1756
1761
  def overlay_bounding_boxes(
1757
- image: np.ndarray, bboxes: List[Dict[str, Any]]
1758
- ) -> np.ndarray:
1762
+ medias: Union[np.ndarray, List[np.ndarray]],
1763
+ bboxes: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]],
1764
+ ) -> Union[np.ndarray, List[np.ndarray]]:
1759
1765
  """'overlay_bounding_boxes' is a utility function that displays bounding boxes on
1760
1766
  an image.
1761
1767
 
1762
1768
  Parameters:
1763
- image (np.ndarray): The image to display the bounding boxes on.
1764
- bboxes (List[Dict[str, Any]]): A list of dictionaries containing the bounding
1769
+ medias (Union[np.ndarray, List[np.ndarra]]): The image or frames to display the
1770
+ bounding boxes on.
1771
+ bboxes (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
1772
+ dictionaries or a list of list of dictionaries containing the bounding
1765
1773
  boxes.
1766
1774
 
1767
1775
  Returns:
@@ -1773,41 +1781,54 @@ def overlay_bounding_boxes(
1773
1781
  image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
1774
1782
  )
1775
1783
  """
1776
- pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
1777
1784
 
1778
- if len(set([box["label"] for box in bboxes])) > len(COLORS):
1785
+ medias_int: List[np.ndarray] = (
1786
+ [medias] if isinstance(medias, np.ndarray) else medias
1787
+ )
1788
+ bbox_int = [bboxes] if isinstance(bboxes[0], dict) else bboxes
1789
+ bbox_int = cast(List[List[Dict[str, Any]]], bbox_int)
1790
+ labels = set([bb["label"] for b in bbox_int for bb in b])
1791
+
1792
+ if len(labels) > len(COLORS):
1779
1793
  _LOGGER.warning(
1780
1794
  "Number of unique labels exceeds the number of available colors. Some labels may have the same color."
1781
1795
  )
1782
1796
 
1783
- color = {
1784
- label: COLORS[i % len(COLORS)]
1785
- for i, label in enumerate(set([box["label"] for box in bboxes]))
1786
- }
1787
- bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
1797
+ color = {label: COLORS[i % len(COLORS)] for i, label in enumerate(labels)}
1788
1798
 
1789
- width, height = pil_image.size
1790
- fontsize = max(12, int(min(width, height) / 40))
1791
- draw = ImageDraw.Draw(pil_image)
1792
- font = ImageFont.truetype(
1793
- str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),
1794
- fontsize,
1795
- )
1799
+ frame_out = []
1800
+ for i, frame in enumerate(medias_int):
1801
+ pil_image = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
1796
1802
 
1797
- for elt in bboxes:
1798
- label = elt["label"]
1799
- box = elt["bbox"]
1800
- scores = elt["score"]
1803
+ bboxes = bbox_int[i]
1804
+ bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
1801
1805
 
1802
- # denormalize the box if it is normalized
1803
- box = denormalize_bbox(box, (height, width))
1806
+ width, height = pil_image.size
1807
+ fontsize = max(12, int(min(width, height) / 40))
1808
+ draw = ImageDraw.Draw(pil_image)
1809
+ font = ImageFont.truetype(
1810
+ str(
1811
+ resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")
1812
+ ),
1813
+ fontsize,
1814
+ )
1804
1815
 
1805
- draw.rectangle(box, outline=color[label], width=4)
1806
- text = f"{label}: {scores:.2f}"
1807
- text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
1808
- draw.rectangle((box[0], box[1], text_box[2], text_box[3]), fill=color[label])
1809
- draw.text((box[0], box[1]), text, fill="black", font=font)
1810
- return np.array(pil_image)
1816
+ for elt in bboxes:
1817
+ label = elt["label"]
1818
+ box = elt["bbox"]
1819
+ scores = elt["score"]
1820
+
1821
+ # denormalize the box if it is normalized
1822
+ box = denormalize_bbox(box, (height, width))
1823
+ draw.rectangle(box, outline=color[label], width=4)
1824
+ text = f"{label}: {scores:.2f}"
1825
+ text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
1826
+ draw.rectangle(
1827
+ (box[0], box[1], text_box[2], text_box[3]), fill=color[label]
1828
+ )
1829
+ draw.text((box[0], box[1]), text, fill="black", font=font)
1830
+ frame_out.append(np.array(pil_image))
1831
+ return frame_out[0] if len(frame_out) == 1 else frame_out
1811
1832
 
1812
1833
 
1813
1834
  def _get_text_coords_from_mask(
@@ -1847,7 +1868,8 @@ def overlay_segmentation_masks(
1847
1868
  medias (Union[np.ndarray, List[np.ndarray]]): The image or frames to display
1848
1869
  the masks on.
1849
1870
  masks (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
1850
- dictionaries containing the masks, labels and scores.
1871
+ dictionaries or a list of list of dictionaries containing the masks, labels
1872
+ and scores.
1851
1873
  draw_label (bool, optional): If True, the labels will be displayed on the image.
1852
1874
  secondary_label_key (str, optional): The key to use for the secondary
1853
1875
  tracking label which is needed in videos to display tracking information.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.144
3
+ Version: 0.2.146
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=NF2LABqHixLvbsOIO-fe-VKZ7awvShLtcT0oQT4eWtI,235
3
3
  vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
4
4
  vision_agent/agent/agent_utils.py,sha256=PEUHqvnHmFL4np_TeFmKMwr5s_dWfdfJz6TF_ogd1dU,2353
5
- vision_agent/agent/vision_agent.py,sha256=WW0vtu8EFp7sFmU8z5_GDEduMOh9e0y4R3ZDiFDYJmM,17812
5
+ vision_agent/agent/vision_agent.py,sha256=Fp2uSbroRzGrxEwbb9srGdl0h31awkzDFm2tTfn28GI,17587
6
6
  vision_agent/agent/vision_agent_coder.py,sha256=4bbebV1sKE10vsxcZR-R8P54X2HjLeU9lDt7ylIZAT4,38429
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=YWK4C--YRS1Kuab11Gn-AXBzar1j_GNnTnxi_nnaPRY,14901
8
8
  vision_agent/agent/vision_agent_prompts.py,sha256=e_ASPeRFU1yZsQhCkK_bIBG-eyIWyWXmN64lFk-r7e0,10897
@@ -18,7 +18,7 @@ vision_agent/tools/__init__.py,sha256=zUv3aVPN1MXfyQiQi5To4rkQGtG7mxLQ1NjLI3pxM8
18
18
  vision_agent/tools/meta_tools.py,sha256=iHvMeBktWcVi-0DOrSMak1gsZrM_VKJlAq1mAFbBemE,23477
19
19
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
20
20
  vision_agent/tools/tool_utils.py,sha256=5ukuDMxbEH4iKetYR9I7twzsA8ECyP4tVwYXQq54mxI,8020
21
- vision_agent/tools/tools.py,sha256=dD_8AmAQb0oKVZHg2w2kSKlvWrG9yaKRbaHTz_kHgjA,73648
21
+ vision_agent/tools/tools.py,sha256=c7SjtZD7YfxhEAGYYe-ExVCBA4NDXmRwerBIbd-XEH8,74557
22
22
  vision_agent/tools/tools_types.py,sha256=JUOZWGW2q-dlJ85CHr9gvo9KQk_rXyjJhi-iwPNn4eM,2397
23
23
  vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
24
24
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
27
27
  vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
30
- vision_agent-0.2.144.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.144.dist-info/METADATA,sha256=HcZyYla50SBGHFDstUNElj7524PT64XT5a6_VQV_y6E,13758
32
- vision_agent-0.2.144.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.144.dist-info/RECORD,,
30
+ vision_agent-0.2.146.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.146.dist-info/METADATA,sha256=0xtIEPRJgc8ifV8z9C6OoLyi9w7wqQV7mpUmrWsLSVM,13758
32
+ vision_agent-0.2.146.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.146.dist-info/RECORD,,