vision-agent 0.2.37__py3-none-any.whl → 0.2.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -198,7 +198,7 @@ def extract_frames(
198
198
 
199
199
  def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
200
200
  """'ocr' extracts text from an image. It returns a list of detected text, bounding
201
- boxes, and confidence scores.
201
+ boxes, and confidence scores. The results are sorted from top-left to bottom right
202
202
 
203
203
  Parameters:
204
204
  image (np.ndarray): The image to extract text from.
@@ -211,7 +211,7 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
211
211
  -------
212
212
  >>> ocr(image)
213
213
  [
214
- {'label': 'some text', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
214
+ {'label': 'hello world', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
215
215
  ]
216
216
  """
217
217
 
@@ -245,7 +245,8 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
245
245
  box = normalize_bbox(box, image_size)
246
246
  output.append({"label": label, "bbox": box, "score": round(det["score"], 2)})
247
247
 
248
- return output
248
+ ocr_results = sorted(output, key=lambda x: (x["bbox"][1], x["bbox"][0]))
249
+ return ocr_results
249
250
 
250
251
 
251
252
  def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.37
3
+ Version: 0.2.39
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -160,6 +160,7 @@ You can also add custom tools to the agent:
160
160
 
161
161
  ```python
162
162
  import vision_agent as va
163
+ import numpy as np
163
164
 
164
165
  @va.tools.register_tool(imports=["import numpy as np"])
165
166
  def custom_tool(image_path: str) -> str:
@@ -176,7 +177,6 @@ def custom_tool(image_path: str) -> str:
176
177
  >>> custom_tool("image.jpg")
177
178
  """
178
179
 
179
- import numpy as np
180
180
  return np.zeros((10, 10))
181
181
  ```
182
182
 
@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=oZa_sslb1UqEgpdWROChDcz5JHdB475ejJX78FMLYv
23
23
  vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
24
24
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
25
25
  vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
26
- vision_agent/tools/tools.py,sha256=RVBuaP8KZrBVppEPaqP0Tey3Am6O5zoNIaZpBSW125c,23523
26
+ vision_agent/tools/tools.py,sha256=DoVmuGF7WoU6BREaCocb0SwN2q8Szs25KfT5bUgy6cU,23660
27
27
  vision_agent/utils/__init__.py,sha256=Ce4yPhoWanRsnTy3X7YzZNBYYRJsrJeT7N59WUf8GZM,209
28
28
  vision_agent/utils/execute.py,sha256=OOix_tDoX1hUacGhszZcMl2D9IWSTwXBox-6DhEKGFA,19079
29
29
  vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
30
30
  vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
31
31
  vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
32
32
  vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
33
- vision_agent-0.2.37.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- vision_agent-0.2.37.dist-info/METADATA,sha256=uwSwFslC71mMZC-eo34JlIcTdrGeV7IzW-JBlLmvy1Y,6830
35
- vision_agent-0.2.37.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
- vision_agent-0.2.37.dist-info/RECORD,,
33
+ vision_agent-0.2.39.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ vision_agent-0.2.39.dist-info/METADATA,sha256=hgiRU-muujo5x9faImu6Mnd0Lzl97OIzyY6BLHoVAHc,6826
35
+ vision_agent-0.2.39.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
+ vision_agent-0.2.39.dist-info/RECORD,,