vision-agent 0.2.37__tar.gz → 0.2.38__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {vision_agent-0.2.37 → vision_agent-0.2.38}/PKG-INFO +1 -1
  2. {vision_agent-0.2.37 → vision_agent-0.2.38}/pyproject.toml +1 -1
  3. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/tools/tools.py +4 -3
  4. {vision_agent-0.2.37 → vision_agent-0.2.38}/LICENSE +0 -0
  5. {vision_agent-0.2.37 → vision_agent-0.2.38}/README.md +0 -0
  6. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/__init__.py +0 -0
  7. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/__init__.py +0 -0
  8. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/agent.py +0 -0
  9. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/agent_coder.py +0 -0
  10. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/agent_coder_prompts.py +0 -0
  11. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/data_interpreter.py +0 -0
  12. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/data_interpreter_prompts.py +0 -0
  13. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/easytool.py +0 -0
  14. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/easytool_prompts.py +0 -0
  15. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/easytool_v2.py +0 -0
  16. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/easytool_v2_prompts.py +0 -0
  17. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/reflexion.py +0 -0
  18. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/reflexion_prompts.py +0 -0
  19. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/vision_agent.py +0 -0
  20. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/agent/vision_agent_prompts.py +0 -0
  21. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/fonts/__init__.py +0 -0
  22. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  23. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/llm/__init__.py +0 -0
  24. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/llm/llm.py +0 -0
  25. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/lmm/__init__.py +0 -0
  26. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/lmm/lmm.py +0 -0
  27. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/tools/__init__.py +0 -0
  28. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/tools/easytool_tools.py +0 -0
  29. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/tools/prompts.py +0 -0
  30. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/tools/tool_utils.py +0 -0
  31. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/utils/__init__.py +0 -0
  32. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/utils/execute.py +0 -0
  33. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/utils/image_utils.py +0 -0
  34. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/utils/sim.py +0 -0
  35. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/utils/type_defs.py +0 -0
  36. {vision_agent-0.2.37 → vision_agent-0.2.38}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.37
3
+ Version: 0.2.38
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.37"
7
+ version = "0.2.38"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -198,7 +198,7 @@ def extract_frames(
198
198
 
199
199
  def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
200
200
  """'ocr' extracts text from an image. It returns a list of detected text, bounding
201
- boxes, and confidence scores.
201
+ boxes, and confidence scores. The results are sorted from top-left to bottom right
202
202
 
203
203
  Parameters:
204
204
  image (np.ndarray): The image to extract text from.
@@ -211,7 +211,7 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
211
211
  -------
212
212
  >>> ocr(image)
213
213
  [
214
- {'label': 'some text', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
214
+ {'label': 'hello world', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
215
215
  ]
216
216
  """
217
217
 
@@ -245,7 +245,8 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
245
245
  box = normalize_bbox(box, image_size)
246
246
  output.append({"label": label, "bbox": box, "score": round(det["score"], 2)})
247
247
 
248
- return output
248
+ ocr_results = sorted(output, key=lambda x: (x["bbox"][1], x["bbox"][0]))
249
+ return ocr_results
249
250
 
250
251
 
251
252
  def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
File without changes
File without changes