vision-agent 0.2.104__py3-none-any.whl → 0.2.106__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/tools/tools.py +5 -4
- {vision_agent-0.2.104.dist-info → vision_agent-0.2.106.dist-info}/METADATA +1 -1
- {vision_agent-0.2.104.dist-info → vision_agent-0.2.106.dist-info}/RECORD +5 -5
- {vision_agent-0.2.104.dist-info → vision_agent-0.2.106.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.104.dist-info → vision_agent-0.2.106.dist-info}/WHEEL +0 -0
vision_agent/tools/tools.py
CHANGED
@@ -148,7 +148,7 @@ def owl_v2(
|
|
148
148
|
|
149
149
|
Example
|
150
150
|
-------
|
151
|
-
>>> owl_v2("car
|
151
|
+
>>> owl_v2("car, dinosaur", image)
|
152
152
|
[
|
153
153
|
{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]},
|
154
154
|
{'score': 0.98, 'label': 'car', 'bbox': [0.2, 0.21, 0.45, 0.5},
|
@@ -157,7 +157,7 @@ def owl_v2(
|
|
157
157
|
image_size = image.shape[:2]
|
158
158
|
image_b64 = convert_to_b64(image)
|
159
159
|
request_data = {
|
160
|
-
"prompts": prompt.split("
|
160
|
+
"prompts": prompt.split(","),
|
161
161
|
"image": image_b64,
|
162
162
|
"confidence": box_threshold,
|
163
163
|
"function_name": "owl_v2",
|
@@ -607,12 +607,13 @@ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) ->
|
|
607
607
|
return answer[task] # type: ignore
|
608
608
|
|
609
609
|
|
610
|
-
def florencev2_object_detection(image: np.ndarray
|
610
|
+
def florencev2_object_detection(prompt: str, image: np.ndarray) -> List[Dict[str, Any]]:
|
611
611
|
"""'florencev2_object_detection' is a tool that can detect objects given a text
|
612
612
|
prompt such as a phrase or class names separated by commas. It returns a list of
|
613
613
|
detected objects as labels and their location as bounding boxes with score of 1.0.
|
614
614
|
|
615
615
|
Parameters:
|
616
|
+
prompt (str): The prompt to ground to the image.
|
616
617
|
image (np.ndarray): The image to used to detect objects
|
617
618
|
|
618
619
|
Returns:
|
@@ -624,7 +625,7 @@ def florencev2_object_detection(image: np.ndarray, prompt: str) -> List[Dict[str
|
|
624
625
|
|
625
626
|
Example
|
626
627
|
-------
|
627
|
-
>>> florencev2_object_detection(
|
628
|
+
>>> florencev2_object_detection('person looking at a coyote', image)
|
628
629
|
[
|
629
630
|
{'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
|
630
631
|
{'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
|
@@ -19,7 +19,7 @@ vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbn
|
|
19
19
|
vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
|
20
20
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
21
21
|
vision_agent/tools/tool_utils.py,sha256=ZhZ9oEcOvRSuWPy-gV0rx3pvaaXzBW-ZC3YQanXrq1g,4733
|
22
|
-
vision_agent/tools/tools.py,sha256=
|
22
|
+
vision_agent/tools/tools.py,sha256=4RooFWZvXPLgA9EA8tmMpFtgfbZIzZhxc0u3507_msA,44718
|
23
23
|
vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
|
24
24
|
vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
|
25
25
|
vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
|
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=c_g5i_cFC0C-Yw9gU_NaVgQdmBlyumw3bLIDtCU
|
|
27
27
|
vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.106.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.106.dist-info/METADATA,sha256=sXXjGtL175BZlm2TGwyukzMMVJHBQrBdYqltK2X5mwM,10729
|
32
|
+
vision_agent-0.2.106.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.106.dist-info/RECORD,,
|
File without changes
|
File without changes
|