vision-agent 0.2.37__py3-none-any.whl → 0.2.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/tools/tools.py +4 -3
- {vision_agent-0.2.37.dist-info → vision_agent-0.2.38.dist-info}/METADATA +1 -1
- {vision_agent-0.2.37.dist-info → vision_agent-0.2.38.dist-info}/RECORD +5 -5
- {vision_agent-0.2.37.dist-info → vision_agent-0.2.38.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.37.dist-info → vision_agent-0.2.38.dist-info}/WHEEL +0 -0
vision_agent/tools/tools.py
CHANGED
@@ -198,7 +198,7 @@ def extract_frames(
|
|
198
198
|
|
199
199
|
def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
200
200
|
"""'ocr' extracts text from an image. It returns a list of detected text, bounding
|
201
|
-
boxes, and confidence scores.
|
201
|
+
boxes, and confidence scores. The results are sorted from top-left to bottom right
|
202
202
|
|
203
203
|
Parameters:
|
204
204
|
image (np.ndarray): The image to extract text from.
|
@@ -211,7 +211,7 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
211
211
|
-------
|
212
212
|
>>> ocr(image)
|
213
213
|
[
|
214
|
-
{'label': '
|
214
|
+
{'label': 'hello world', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
|
215
215
|
]
|
216
216
|
"""
|
217
217
|
|
@@ -245,7 +245,8 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
245
245
|
box = normalize_bbox(box, image_size)
|
246
246
|
output.append({"label": label, "bbox": box, "score": round(det["score"], 2)})
|
247
247
|
|
248
|
-
|
248
|
+
ocr_results = sorted(output, key=lambda x: (x["bbox"][1], x["bbox"][0]))
|
249
|
+
return ocr_results
|
249
250
|
|
250
251
|
|
251
252
|
def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
|
@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=oZa_sslb1UqEgpdWROChDcz5JHdB475ejJX78FMLYv
|
|
23
23
|
vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
|
24
24
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
25
25
|
vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
|
26
|
-
vision_agent/tools/tools.py,sha256=
|
26
|
+
vision_agent/tools/tools.py,sha256=DoVmuGF7WoU6BREaCocb0SwN2q8Szs25KfT5bUgy6cU,23660
|
27
27
|
vision_agent/utils/__init__.py,sha256=Ce4yPhoWanRsnTy3X7YzZNBYYRJsrJeT7N59WUf8GZM,209
|
28
28
|
vision_agent/utils/execute.py,sha256=OOix_tDoX1hUacGhszZcMl2D9IWSTwXBox-6DhEKGFA,19079
|
29
29
|
vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
|
30
30
|
vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
|
31
31
|
vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
|
32
32
|
vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
36
|
-
vision_agent-0.2.
|
33
|
+
vision_agent-0.2.38.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
34
|
+
vision_agent-0.2.38.dist-info/METADATA,sha256=UvtQ7c201lDjofSavAo8sst25lC1mFML3Z8mWp1a52A,6830
|
35
|
+
vision_agent-0.2.38.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
36
|
+
vision_agent-0.2.38.dist-info/RECORD,,
|
File without changes
|
File without changes
|