vision-agent 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +15 -1
- vision_agent/lmm/lmm.py +1 -4
- vision_agent/tools/__init__.py +3 -3
- vision_agent/tools/tools.py +1 -1
- {vision_agent-0.2.4.dist-info → vision_agent-0.2.5.dist-info}/METADATA +1 -1
- {vision_agent-0.2.4.dist-info → vision_agent-0.2.5.dist-info}/RECORD +8 -8
- {vision_agent-0.2.4.dist-info → vision_agent-0.2.5.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.4.dist-info → vision_agent-0.2.5.dist-info}/WHEEL +0 -0
@@ -366,6 +366,20 @@ def _handle_viz_tools(
|
|
366
366
|
return image_to_data
|
367
367
|
|
368
368
|
|
369
|
+
def sample_n_evenly_spaced(lst: Sequence, n: int) -> Sequence:
|
370
|
+
if n <= 0:
|
371
|
+
return []
|
372
|
+
elif len(lst) == 0:
|
373
|
+
return []
|
374
|
+
elif n == 1:
|
375
|
+
return [lst[0]]
|
376
|
+
elif n >= len(lst):
|
377
|
+
return lst
|
378
|
+
|
379
|
+
spacing = (len(lst) - 1) / (n - 1)
|
380
|
+
return [lst[round(spacing * i)] for i in range(n)]
|
381
|
+
|
382
|
+
|
369
383
|
def visualize_result(all_tool_results: List[Dict]) -> Sequence[Union[str, Path]]:
|
370
384
|
image_to_data: Dict[str, Dict] = {}
|
371
385
|
for tool_result in all_tool_results:
|
@@ -584,7 +598,7 @@ class VisionAgent(Agent):
|
|
584
598
|
visualized_output = visualize_result(all_tool_results)
|
585
599
|
all_tool_results.append({"visualized_output": visualized_output})
|
586
600
|
if len(visualized_output) > 0:
|
587
|
-
reflection_images = visualized_output
|
601
|
+
reflection_images = sample_n_evenly_spaced(visualized_output, 3)
|
588
602
|
elif image is not None:
|
589
603
|
reflection_images = [image]
|
590
604
|
else:
|
vision_agent/lmm/lmm.py
CHANGED
@@ -9,10 +9,7 @@ from typing import Any, Callable, Dict, List, Optional, Union, cast
|
|
9
9
|
import requests
|
10
10
|
from openai import AzureOpenAI, OpenAI
|
11
11
|
|
12
|
-
from vision_agent.tools import
|
13
|
-
CHOOSE_PARAMS,
|
14
|
-
SYSTEM_PROMPT,
|
15
|
-
)
|
12
|
+
from vision_agent.tools import CHOOSE_PARAMS, SYSTEM_PROMPT
|
16
13
|
|
17
14
|
_LOGGER = logging.getLogger(__name__)
|
18
15
|
|
vision_agent/tools/__init__.py
CHANGED
@@ -12,12 +12,12 @@ from .tools import ( # Counter,
|
|
12
12
|
GroundingDINO,
|
13
13
|
GroundingSAM,
|
14
14
|
ImageCaption,
|
15
|
-
ZeroShotCounting,
|
16
|
-
VisualPromptCounting,
|
17
|
-
VisualQuestionAnswering,
|
18
15
|
ImageQuestionAnswering,
|
19
16
|
SegArea,
|
20
17
|
SegIoU,
|
21
18
|
Tool,
|
19
|
+
VisualPromptCounting,
|
20
|
+
VisualQuestionAnswering,
|
21
|
+
ZeroShotCounting,
|
22
22
|
register_tool,
|
23
23
|
)
|
vision_agent/tools/tools.py
CHANGED
@@ -17,9 +17,9 @@ from vision_agent.image_utils import (
|
|
17
17
|
normalize_bbox,
|
18
18
|
rle_decode,
|
19
19
|
)
|
20
|
+
from vision_agent.lmm import OpenAILMM
|
20
21
|
from vision_agent.tools.video import extract_frames_from_video
|
21
22
|
from vision_agent.type_defs import LandingaiAPIKey
|
22
|
-
from vision_agent.lmm import OpenAILMM
|
23
23
|
|
24
24
|
_LOGGER = logging.getLogger(__name__)
|
25
25
|
_LND_API_KEY = LandingaiAPIKey().api_key
|
@@ -5,7 +5,7 @@ vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMV
|
|
5
5
|
vision_agent/agent/easytool_prompts.py,sha256=zdQQw6WpXOmvwOMtlBlNKY5a3WNlr65dbUvMIGiqdeo,4526
|
6
6
|
vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6wdM,10506
|
7
7
|
vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
|
8
|
-
vision_agent/agent/vision_agent.py,sha256=
|
8
|
+
vision_agent/agent/vision_agent.py,sha256=SFdw6OBqWj0cr-YthFMM_x-Urg86CggazYQG4wy0n-U,25195
|
9
9
|
vision_agent/agent/vision_agent_prompts.py,sha256=W3Z72FpUt71UIJSkjAcgtQqxeMqkYuATqHAN5fYY26c,7342
|
10
10
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
@@ -13,13 +13,13 @@ vision_agent/image_utils.py,sha256=YvP5KE9NrWdgJKuHW2NR1glzfObkxtcXBknpmj3Gsbs,7
|
|
13
13
|
vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
|
14
14
|
vision_agent/llm/llm.py,sha256=1BkrSVBWEClyqLc0Rmyw4heLhi_ZVm6JO7-i1wd1ziw,5383
|
15
15
|
vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
|
16
|
-
vision_agent/lmm/lmm.py,sha256=
|
17
|
-
vision_agent/tools/__init__.py,sha256=
|
16
|
+
vision_agent/lmm/lmm.py,sha256=gK90vMxh0OcGSuIZQikBkDXm4pfkdFk1R2y7rtWDl84,10539
|
17
|
+
vision_agent/tools/__init__.py,sha256=HfUr0JQUwk0Kyieen93df9lMbbdpVf9Q6CcVFmKv_q4,413
|
18
18
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
19
|
-
vision_agent/tools/tools.py,sha256=
|
19
|
+
vision_agent/tools/tools.py,sha256=GvRDLeMVS9C7z56hlSpThGoV0r_x5pKSFw-g4JW_qnw,42779
|
20
20
|
vision_agent/tools/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
|
21
21
|
vision_agent/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
|
22
|
-
vision_agent-0.2.
|
23
|
-
vision_agent-0.2.
|
24
|
-
vision_agent-0.2.
|
25
|
-
vision_agent-0.2.
|
22
|
+
vision_agent-0.2.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
23
|
+
vision_agent-0.2.5.dist-info/METADATA,sha256=zSTYpM893hERFpO2j7-YdRmRPKeGI6-qU_wkq5MitFY,7697
|
24
|
+
vision_agent-0.2.5.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
25
|
+
vision_agent-0.2.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|