vision-agent 0.2.144__py3-none-any.whl → 0.2.146__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -30,12 +30,6 @@ WORKSPACE.mkdir(parents=True, exist_ok=True)
30
30
  if str(WORKSPACE) != "":
31
31
  os.environ["PYTHONPATH"] = f"{WORKSPACE}:{os.getenv('PYTHONPATH', '')}"
32
32
 
33
- STUCK_IN_LOOP_ERROR_MESSAGE = {
34
- "name": "Error when running conversation agent",
35
- "value": "Agent is stuck in conversation loop, exited",
36
- "traceback_raw": [],
37
- }
38
-
39
33
 
40
34
  class BoilerplateCode:
41
35
  pre_code = [
@@ -298,13 +292,6 @@ class VisionAgent(Agent):
298
292
  # sometimes it gets stuck in a loop, so we force it to exit
299
293
  if last_response == response:
300
294
  response["let_user_respond"] = True
301
- self.streaming_message(
302
- {
303
- "role": "assistant",
304
- "content": "{}",
305
- "error": STUCK_IN_LOOP_ERROR_MESSAGE,
306
- }
307
- )
308
295
 
309
296
  finished = response["let_user_respond"]
310
297
 
@@ -317,7 +304,11 @@ class VisionAgent(Agent):
317
304
  {
318
305
  "role": "assistant",
319
306
  "content": "{}",
320
- "error": STUCK_IN_LOOP_ERROR_MESSAGE,
307
+ "error": {
308
+ "name": "Error when running conversation agent",
309
+ "value": "Agent is stuck in conversation loop, exited",
310
+ "traceback_raw": [],
311
+ },
321
312
  "finished": finished and code_action is None,
322
313
  }
323
314
  )
@@ -1181,7 +1181,12 @@ def florence2_phrase_grounding(
1181
1181
  fine_tuning=FineTuning(job_id=UUID(fine_tune_id)),
1182
1182
  )
1183
1183
  data = data_obj.model_dump(by_alias=True)
1184
- detections = send_inference_request(data, "tools", v2=False)
1184
+ detections = send_inference_request(
1185
+ data,
1186
+ "tools",
1187
+ v2=False,
1188
+ metadata_payload={"function_name": "florence2_phrase_grounding"},
1189
+ )
1185
1190
  else:
1186
1191
  data = {
1187
1192
  "image": image_b64,
@@ -1754,14 +1759,17 @@ def _save_video_to_result(video_uri: str) -> None:
1754
1759
 
1755
1760
 
1756
1761
  def overlay_bounding_boxes(
1757
- image: np.ndarray, bboxes: List[Dict[str, Any]]
1758
- ) -> np.ndarray:
1762
+ medias: Union[np.ndarray, List[np.ndarray]],
1763
+ bboxes: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]],
1764
+ ) -> Union[np.ndarray, List[np.ndarray]]:
1759
1765
  """'overlay_bounding_boxes' is a utility function that displays bounding boxes on
1760
1766
  an image.
1761
1767
 
1762
1768
  Parameters:
1763
- image (np.ndarray): The image to display the bounding boxes on.
1764
- bboxes (List[Dict[str, Any]]): A list of dictionaries containing the bounding
1769
+ medias (Union[np.ndarray, List[np.ndarra]]): The image or frames to display the
1770
+ bounding boxes on.
1771
+ bboxes (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
1772
+ dictionaries or a list of list of dictionaries containing the bounding
1765
1773
  boxes.
1766
1774
 
1767
1775
  Returns:
@@ -1773,41 +1781,54 @@ def overlay_bounding_boxes(
1773
1781
  image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
1774
1782
  )
1775
1783
  """
1776
- pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
1777
1784
 
1778
- if len(set([box["label"] for box in bboxes])) > len(COLORS):
1785
+ medias_int: List[np.ndarray] = (
1786
+ [medias] if isinstance(medias, np.ndarray) else medias
1787
+ )
1788
+ bbox_int = [bboxes] if isinstance(bboxes[0], dict) else bboxes
1789
+ bbox_int = cast(List[List[Dict[str, Any]]], bbox_int)
1790
+ labels = set([bb["label"] for b in bbox_int for bb in b])
1791
+
1792
+ if len(labels) > len(COLORS):
1779
1793
  _LOGGER.warning(
1780
1794
  "Number of unique labels exceeds the number of available colors. Some labels may have the same color."
1781
1795
  )
1782
1796
 
1783
- color = {
1784
- label: COLORS[i % len(COLORS)]
1785
- for i, label in enumerate(set([box["label"] for box in bboxes]))
1786
- }
1787
- bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
1797
+ color = {label: COLORS[i % len(COLORS)] for i, label in enumerate(labels)}
1788
1798
 
1789
- width, height = pil_image.size
1790
- fontsize = max(12, int(min(width, height) / 40))
1791
- draw = ImageDraw.Draw(pil_image)
1792
- font = ImageFont.truetype(
1793
- str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),
1794
- fontsize,
1795
- )
1799
+ frame_out = []
1800
+ for i, frame in enumerate(medias_int):
1801
+ pil_image = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
1796
1802
 
1797
- for elt in bboxes:
1798
- label = elt["label"]
1799
- box = elt["bbox"]
1800
- scores = elt["score"]
1803
+ bboxes = bbox_int[i]
1804
+ bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
1801
1805
 
1802
- # denormalize the box if it is normalized
1803
- box = denormalize_bbox(box, (height, width))
1806
+ width, height = pil_image.size
1807
+ fontsize = max(12, int(min(width, height) / 40))
1808
+ draw = ImageDraw.Draw(pil_image)
1809
+ font = ImageFont.truetype(
1810
+ str(
1811
+ resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")
1812
+ ),
1813
+ fontsize,
1814
+ )
1804
1815
 
1805
- draw.rectangle(box, outline=color[label], width=4)
1806
- text = f"{label}: {scores:.2f}"
1807
- text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
1808
- draw.rectangle((box[0], box[1], text_box[2], text_box[3]), fill=color[label])
1809
- draw.text((box[0], box[1]), text, fill="black", font=font)
1810
- return np.array(pil_image)
1816
+ for elt in bboxes:
1817
+ label = elt["label"]
1818
+ box = elt["bbox"]
1819
+ scores = elt["score"]
1820
+
1821
+ # denormalize the box if it is normalized
1822
+ box = denormalize_bbox(box, (height, width))
1823
+ draw.rectangle(box, outline=color[label], width=4)
1824
+ text = f"{label}: {scores:.2f}"
1825
+ text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
1826
+ draw.rectangle(
1827
+ (box[0], box[1], text_box[2], text_box[3]), fill=color[label]
1828
+ )
1829
+ draw.text((box[0], box[1]), text, fill="black", font=font)
1830
+ frame_out.append(np.array(pil_image))
1831
+ return frame_out[0] if len(frame_out) == 1 else frame_out
1811
1832
 
1812
1833
 
1813
1834
  def _get_text_coords_from_mask(
@@ -1847,7 +1868,8 @@ def overlay_segmentation_masks(
1847
1868
  medias (Union[np.ndarray, List[np.ndarray]]): The image or frames to display
1848
1869
  the masks on.
1849
1870
  masks (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
1850
- dictionaries containing the masks, labels and scores.
1871
+ dictionaries or a list of list of dictionaries containing the masks, labels
1872
+ and scores.
1851
1873
  draw_label (bool, optional): If True, the labels will be displayed on the image.
1852
1874
  secondary_label_key (str, optional): The key to use for the secondary
1853
1875
  tracking label which is needed in videos to display tracking information.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.144
3
+ Version: 0.2.146
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=NF2LABqHixLvbsOIO-fe-VKZ7awvShLtcT0oQT4eWtI,235
3
3
  vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
4
4
  vision_agent/agent/agent_utils.py,sha256=PEUHqvnHmFL4np_TeFmKMwr5s_dWfdfJz6TF_ogd1dU,2353
5
- vision_agent/agent/vision_agent.py,sha256=WW0vtu8EFp7sFmU8z5_GDEduMOh9e0y4R3ZDiFDYJmM,17812
5
+ vision_agent/agent/vision_agent.py,sha256=Fp2uSbroRzGrxEwbb9srGdl0h31awkzDFm2tTfn28GI,17587
6
6
  vision_agent/agent/vision_agent_coder.py,sha256=4bbebV1sKE10vsxcZR-R8P54X2HjLeU9lDt7ylIZAT4,38429
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=YWK4C--YRS1Kuab11Gn-AXBzar1j_GNnTnxi_nnaPRY,14901
8
8
  vision_agent/agent/vision_agent_prompts.py,sha256=e_ASPeRFU1yZsQhCkK_bIBG-eyIWyWXmN64lFk-r7e0,10897
@@ -18,7 +18,7 @@ vision_agent/tools/__init__.py,sha256=zUv3aVPN1MXfyQiQi5To4rkQGtG7mxLQ1NjLI3pxM8
18
18
  vision_agent/tools/meta_tools.py,sha256=iHvMeBktWcVi-0DOrSMak1gsZrM_VKJlAq1mAFbBemE,23477
19
19
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
20
20
  vision_agent/tools/tool_utils.py,sha256=5ukuDMxbEH4iKetYR9I7twzsA8ECyP4tVwYXQq54mxI,8020
21
- vision_agent/tools/tools.py,sha256=dD_8AmAQb0oKVZHg2w2kSKlvWrG9yaKRbaHTz_kHgjA,73648
21
+ vision_agent/tools/tools.py,sha256=c7SjtZD7YfxhEAGYYe-ExVCBA4NDXmRwerBIbd-XEH8,74557
22
22
  vision_agent/tools/tools_types.py,sha256=JUOZWGW2q-dlJ85CHr9gvo9KQk_rXyjJhi-iwPNn4eM,2397
23
23
  vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
24
24
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
27
27
  vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
30
- vision_agent-0.2.144.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.144.dist-info/METADATA,sha256=HcZyYla50SBGHFDstUNElj7524PT64XT5a6_VQV_y6E,13758
32
- vision_agent-0.2.144.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.144.dist-info/RECORD,,
30
+ vision_agent-0.2.146.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.146.dist-info/METADATA,sha256=0xtIEPRJgc8ifV8z9C6OoLyi9w7wqQV7mpUmrWsLSVM,13758
32
+ vision_agent-0.2.146.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.146.dist-info/RECORD,,