PyPI - vision-agent - Versions diffs - 0.2.37__py3-none-any.whl → 0.2.38__py3-none-any.whl - Mend

vision-agent 0.2.37py3-none-any.whl → 0.2.38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

vision_agent/tools/tools.py CHANGED Viewed

@@ -198,7 +198,7 @@ def extract_frames(
 def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
     """'ocr' extracts text from an image. It returns a list of detected text, bounding
-    boxes, and confidence scores.
+    boxes, and confidence scores. The results are sorted from top-left to bottom right
     Parameters:
         image (np.ndarray): The image to extract text from.
@@ -211,7 +211,7 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
     -------
     >>> ocr(image)
     [
-        {'label': 'some text', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
+        {'label': 'hello world', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
     ]
     """
@@ -245,7 +245,8 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
         box = normalize_bbox(box, image_size)
         output.append({"label": label, "bbox": box, "score": round(det["score"], 2)})
-    return output
+    ocr_results = sorted(output, key=lambda x: (x["bbox"][1], x["bbox"][0]))
+    return ocr_results
 def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:

{vision_agent-0.2.37.dist-info → vision_agent-0.2.38.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.37
+Version: 0.2.38
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.37.dist-info → vision_agent-0.2.38.dist-info}/RECORD RENAMED Viewed

@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=oZa_sslb1UqEgpdWROChDcz5JHdB475ejJX78FMLYv
 vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
-vision_agent/tools/tools.py,sha256=RVBuaP8KZrBVppEPaqP0Tey3Am6O5zoNIaZpBSW125c,23523
+vision_agent/tools/tools.py,sha256=DoVmuGF7WoU6BREaCocb0SwN2q8Szs25KfT5bUgy6cU,23660
 vision_agent/utils/__init__.py,sha256=Ce4yPhoWanRsnTy3X7YzZNBYYRJsrJeT7N59WUf8GZM,209
 vision_agent/utils/execute.py,sha256=OOix_tDoX1hUacGhszZcMl2D9IWSTwXBox-6DhEKGFA,19079
 vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
 vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
 vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
 vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
-vision_agent-0.2.37.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.37.dist-info/METADATA,sha256=uwSwFslC71mMZC-eo34JlIcTdrGeV7IzW-JBlLmvy1Y,6830
-vision_agent-0.2.37.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.37.dist-info/RECORD,,
+vision_agent-0.2.38.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.38.dist-info/METADATA,sha256=UvtQ7c201lDjofSavAo8sst25lC1mFML3Z8mWp1a52A,6830
+vision_agent-0.2.38.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.38.dist-info/RECORD,,

{vision_agent-0.2.37.dist-info → vision_agent-0.2.38.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.37.dist-info → vision_agent-0.2.38.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.37__py3-none-any.whl → 0.2.38__py3-none-any.whl

vision-agent 0.2.37py3-none-any.whl → 0.2.38py3-none-any.whl