vision-agent 0.2.4__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {vision_agent-0.2.4 → vision_agent-0.2.5}/PKG-INFO +1 -1
  2. {vision_agent-0.2.4 → vision_agent-0.2.5}/pyproject.toml +1 -1
  3. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/agent/vision_agent.py +15 -1
  4. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/lmm/lmm.py +1 -4
  5. {vision_agent-0.2.4 → vision_agent-0.2.5}/LICENSE +0 -0
  6. {vision_agent-0.2.4 → vision_agent-0.2.5}/README.md +0 -0
  7. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/__init__.py +0 -0
  8. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/agent/__init__.py +0 -0
  9. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/agent/agent.py +0 -0
  10. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/agent/easytool.py +0 -0
  11. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/agent/easytool_prompts.py +0 -0
  12. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/agent/reflexion.py +0 -0
  13. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/agent/reflexion_prompts.py +0 -0
  14. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/agent/vision_agent_prompts.py +0 -0
  15. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/fonts/__init__.py +0 -0
  16. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  17. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/image_utils.py +0 -0
  18. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/llm/__init__.py +0 -0
  19. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/llm/llm.py +0 -0
  20. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/lmm/__init__.py +0 -0
  21. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/tools/__init__.py +3 -3
  22. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/tools/prompts.py +0 -0
  23. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/tools/tools.py +1 -1
  24. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/tools/video.py +0 -0
  25. {vision_agent-0.2.4 → vision_agent-0.2.5}/vision_agent/type_defs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.4"
7
+ version = "0.2.5"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -366,6 +366,20 @@ def _handle_viz_tools(
366
366
  return image_to_data
367
367
 
368
368
 
369
+ def sample_n_evenly_spaced(lst: Sequence, n: int) -> Sequence:
370
+ if n <= 0:
371
+ return []
372
+ elif len(lst) == 0:
373
+ return []
374
+ elif n == 1:
375
+ return [lst[0]]
376
+ elif n >= len(lst):
377
+ return lst
378
+
379
+ spacing = (len(lst) - 1) / (n - 1)
380
+ return [lst[round(spacing * i)] for i in range(n)]
381
+
382
+
369
383
  def visualize_result(all_tool_results: List[Dict]) -> Sequence[Union[str, Path]]:
370
384
  image_to_data: Dict[str, Dict] = {}
371
385
  for tool_result in all_tool_results:
@@ -584,7 +598,7 @@ class VisionAgent(Agent):
584
598
  visualized_output = visualize_result(all_tool_results)
585
599
  all_tool_results.append({"visualized_output": visualized_output})
586
600
  if len(visualized_output) > 0:
587
- reflection_images = visualized_output
601
+ reflection_images = sample_n_evenly_spaced(visualized_output, 3)
588
602
  elif image is not None:
589
603
  reflection_images = [image]
590
604
  else:
@@ -9,10 +9,7 @@ from typing import Any, Callable, Dict, List, Optional, Union, cast
9
9
  import requests
10
10
  from openai import AzureOpenAI, OpenAI
11
11
 
12
- from vision_agent.tools import (
13
- CHOOSE_PARAMS,
14
- SYSTEM_PROMPT,
15
- )
12
+ from vision_agent.tools import CHOOSE_PARAMS, SYSTEM_PROMPT
16
13
 
17
14
  _LOGGER = logging.getLogger(__name__)
18
15
 
File without changes
File without changes
@@ -12,12 +12,12 @@ from .tools import ( # Counter,
12
12
  GroundingDINO,
13
13
  GroundingSAM,
14
14
  ImageCaption,
15
- ZeroShotCounting,
16
- VisualPromptCounting,
17
- VisualQuestionAnswering,
18
15
  ImageQuestionAnswering,
19
16
  SegArea,
20
17
  SegIoU,
21
18
  Tool,
19
+ VisualPromptCounting,
20
+ VisualQuestionAnswering,
21
+ ZeroShotCounting,
22
22
  register_tool,
23
23
  )
@@ -17,9 +17,9 @@ from vision_agent.image_utils import (
17
17
  normalize_bbox,
18
18
  rle_decode,
19
19
  )
20
+ from vision_agent.lmm import OpenAILMM
20
21
  from vision_agent.tools.video import extract_frames_from_video
21
22
  from vision_agent.type_defs import LandingaiAPIKey
22
- from vision_agent.lmm import OpenAILMM
23
23
 
24
24
  _LOGGER = logging.getLogger(__name__)
25
25
  _LND_API_KEY = LandingaiAPIKey().api_key