vision-agent 0.2.104__py3-none-any.whl → 0.2.106__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -148,7 +148,7 @@ def owl_v2(
148
148
 
149
149
  Example
150
150
  -------
151
- >>> owl_v2("car. dinosaur", image)
151
+ >>> owl_v2("car, dinosaur", image)
152
152
  [
153
153
  {'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]},
154
154
  {'score': 0.98, 'label': 'car', 'bbox': [0.2, 0.21, 0.45, 0.5},
@@ -157,7 +157,7 @@ def owl_v2(
157
157
  image_size = image.shape[:2]
158
158
  image_b64 = convert_to_b64(image)
159
159
  request_data = {
160
- "prompts": prompt.split("."),
160
+ "prompts": prompt.split(","),
161
161
  "image": image_b64,
162
162
  "confidence": box_threshold,
163
163
  "function_name": "owl_v2",
@@ -607,12 +607,13 @@ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) ->
607
607
  return answer[task] # type: ignore
608
608
 
609
609
 
610
- def florencev2_object_detection(image: np.ndarray, prompt: str) -> List[Dict[str, Any]]:
610
+ def florencev2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str, Any]]:
611
611
  """'florencev2_object_detection' is a tool that can detect objects given a text
612
612
  prompt such as a phrase or class names separated by commas. It returns a list of
613
613
  detected objects as labels and their location as bounding boxes with score of 1.0.
614
614
 
615
615
  Parameters:
616
+ prompt (str): The prompt to ground to the image.
616
617
  image (np.ndarray): The image to used to detect objects
617
618
 
618
619
  Returns:
@@ -624,7 +625,7 @@ def florencev2_object_detection(image: np.ndarray, prompt: str) -> List[Dict[str
624
625
 
625
626
  Example
626
627
  -------
627
- >>> florencev2_object_detection(image, 'person looking at a coyote')
628
+ >>> florencev2_object_detection('person looking at a coyote', image)
628
629
  [
629
630
  {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
630
631
  {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.104
3
+ Version: 0.2.106
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -19,7 +19,7 @@ vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbn
19
19
  vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
20
20
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
21
21
  vision_agent/tools/tool_utils.py,sha256=ZhZ9oEcOvRSuWPy-gV0rx3pvaaXzBW-ZC3YQanXrq1g,4733
22
- vision_agent/tools/tools.py,sha256=fgPE0VHfBiQPJKkslBm_hugTOyRT-Hnw7eztvC-l4_o,44661
22
+ vision_agent/tools/tools.py,sha256=4RooFWZvXPLgA9EA8tmMpFtgfbZIzZhxc0u3507_msA,44718
23
23
  vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
24
24
  vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
25
25
  vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=c_g5i_cFC0C-Yw9gU_NaVgQdmBlyumw3bLIDtCU
27
27
  vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
- vision_agent-0.2.104.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.104.dist-info/METADATA,sha256=aSP8goyL8RZS_6SZSzrJZCsIzySrN_domJ2vvvbedQg,10729
32
- vision_agent-0.2.104.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.104.dist-info/RECORD,,
30
+ vision_agent-0.2.106.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.106.dist-info/METADATA,sha256=sXXjGtL175BZlm2TGwyukzMMVJHBQrBdYqltK2X5mwM,10729
32
+ vision_agent-0.2.106.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.106.dist-info/RECORD,,