vision-agent 0.2.37__py3-none-any.whl → 0.2.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/tools/tools.py +4 -3
- {vision_agent-0.2.37.dist-info → vision_agent-0.2.39.dist-info}/METADATA +2 -2
- {vision_agent-0.2.37.dist-info → vision_agent-0.2.39.dist-info}/RECORD +5 -5
- {vision_agent-0.2.37.dist-info → vision_agent-0.2.39.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.37.dist-info → vision_agent-0.2.39.dist-info}/WHEEL +0 -0
vision_agent/tools/tools.py
CHANGED
@@ -198,7 +198,7 @@ def extract_frames(
|
|
198
198
|
|
199
199
|
def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
200
200
|
"""'ocr' extracts text from an image. It returns a list of detected text, bounding
|
201
|
-
boxes, and confidence scores.
|
201
|
+
boxes, and confidence scores. The results are sorted from top-left to bottom right
|
202
202
|
|
203
203
|
Parameters:
|
204
204
|
image (np.ndarray): The image to extract text from.
|
@@ -211,7 +211,7 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
211
211
|
-------
|
212
212
|
>>> ocr(image)
|
213
213
|
[
|
214
|
-
{'label': '
|
214
|
+
{'label': 'hello world', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
|
215
215
|
]
|
216
216
|
"""
|
217
217
|
|
@@ -245,7 +245,8 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
245
245
|
box = normalize_bbox(box, image_size)
|
246
246
|
output.append({"label": label, "bbox": box, "score": round(det["score"], 2)})
|
247
247
|
|
248
|
-
|
248
|
+
ocr_results = sorted(output, key=lambda x: (x["bbox"][1], x["bbox"][0]))
|
249
|
+
return ocr_results
|
249
250
|
|
250
251
|
|
251
252
|
def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.39
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -160,6 +160,7 @@ You can also add custom tools to the agent:
|
|
160
160
|
|
161
161
|
```python
|
162
162
|
import vision_agent as va
|
163
|
+
import numpy as np
|
163
164
|
|
164
165
|
@va.tools.register_tool(imports=["import numpy as np"])
|
165
166
|
def custom_tool(image_path: str) -> str:
|
@@ -176,7 +177,6 @@ def custom_tool(image_path: str) -> str:
|
|
176
177
|
>>> custom_tool("image.jpg")
|
177
178
|
"""
|
178
179
|
|
179
|
-
import numpy as np
|
180
180
|
return np.zeros((10, 10))
|
181
181
|
```
|
182
182
|
|
@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=oZa_sslb1UqEgpdWROChDcz5JHdB475ejJX78FMLYv
|
|
23
23
|
vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
|
24
24
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
25
25
|
vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
|
26
|
-
vision_agent/tools/tools.py,sha256=
|
26
|
+
vision_agent/tools/tools.py,sha256=DoVmuGF7WoU6BREaCocb0SwN2q8Szs25KfT5bUgy6cU,23660
|
27
27
|
vision_agent/utils/__init__.py,sha256=Ce4yPhoWanRsnTy3X7YzZNBYYRJsrJeT7N59WUf8GZM,209
|
28
28
|
vision_agent/utils/execute.py,sha256=OOix_tDoX1hUacGhszZcMl2D9IWSTwXBox-6DhEKGFA,19079
|
29
29
|
vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
|
30
30
|
vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
|
31
31
|
vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
|
32
32
|
vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
36
|
-
vision_agent-0.2.
|
33
|
+
vision_agent-0.2.39.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
34
|
+
vision_agent-0.2.39.dist-info/METADATA,sha256=hgiRU-muujo5x9faImu6Mnd0Lzl97OIzyY6BLHoVAHc,6826
|
35
|
+
vision_agent-0.2.39.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
36
|
+
vision_agent-0.2.39.dist-info/RECORD,,
|
File without changes
|
File without changes
|