vision-agent 0.2.190__py3-none-any.whl → 0.2.191__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/agent/vision_agent_coder.py +0 -3
- vision_agent/agent/vision_agent_planner.py +2 -10
- vision_agent/tools/__init__.py +5 -5
- vision_agent/tools/tools.py +7 -10
- {vision_agent-0.2.190.dist-info → vision_agent-0.2.191.dist-info}/METADATA +7 -11
- {vision_agent-0.2.190.dist-info → vision_agent-0.2.191.dist-info}/RECORD +8 -8
- {vision_agent-0.2.190.dist-info → vision_agent-0.2.191.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.190.dist-info → vision_agent-0.2.191.dist-info}/WHEEL +0 -0
@@ -527,9 +527,6 @@ class VisionAgentCoder(Agent):
|
|
527
527
|
[{"role": "user", "content": "describe your task here..."}].
|
528
528
|
plan_context (PlanContext): The context of the plan, including the plans,
|
529
529
|
best_plan, plan_thoughts, tool_doc, and tool_output.
|
530
|
-
test_multi_plan (bool): Whether to test multiple plans or just the best plan.
|
531
|
-
custom_tool_names (Optional[List[str]]): A list of custom tool names to use
|
532
|
-
for the planner.
|
533
530
|
|
534
531
|
Returns:
|
535
532
|
Dict[str, Any]: A dictionary containing the code output by the
|
@@ -519,11 +519,7 @@ class OpenAIVisionAgentPlanner(VisionAgentPlanner):
|
|
519
519
|
code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
|
520
520
|
) -> None:
|
521
521
|
super().__init__(
|
522
|
-
planner=(
|
523
|
-
OpenAILMM(temperature=0.0, json_mode=True)
|
524
|
-
if planner is None
|
525
|
-
else planner
|
526
|
-
),
|
522
|
+
planner=(OpenAILMM(temperature=0.0) if planner is None else planner),
|
527
523
|
tool_recommender=tool_recommender,
|
528
524
|
verbosity=verbosity,
|
529
525
|
report_progress_callback=report_progress_callback,
|
@@ -567,11 +563,7 @@ class AzureVisionAgentPlanner(VisionAgentPlanner):
|
|
567
563
|
code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
|
568
564
|
) -> None:
|
569
565
|
super().__init__(
|
570
|
-
planner=(
|
571
|
-
AzureOpenAILMM(temperature=0.0, json_mode=True)
|
572
|
-
if planner is None
|
573
|
-
else planner
|
574
|
-
),
|
566
|
+
planner=(AzureOpenAILMM(temperature=0.0) if planner is None else planner),
|
575
567
|
tool_recommender=(
|
576
568
|
AzureSim(T.TOOLS_DF, sim_key="desc")
|
577
569
|
if tool_recommender is None
|
vision_agent/tools/__init__.py
CHANGED
@@ -40,6 +40,7 @@ from .tools import (
|
|
40
40
|
florence2_roberta_vqa,
|
41
41
|
florence2_sam2_image,
|
42
42
|
florence2_sam2_video_tracking,
|
43
|
+
flux_image_inpainting,
|
43
44
|
generate_pose_image,
|
44
45
|
generate_soft_edge_image,
|
45
46
|
get_tool_documentation,
|
@@ -59,17 +60,16 @@ from .tools import (
|
|
59
60
|
overlay_segmentation_masks,
|
60
61
|
owl_v2_image,
|
61
62
|
owl_v2_video,
|
63
|
+
qwen2_vl_images_vqa,
|
64
|
+
qwen2_vl_video_vqa,
|
62
65
|
save_image,
|
63
66
|
save_json,
|
64
67
|
save_video,
|
68
|
+
siglip_classification,
|
65
69
|
template_match,
|
70
|
+
video_temporal_localization,
|
66
71
|
vit_image_classification,
|
67
72
|
vit_nsfw_classification,
|
68
|
-
qwen2_vl_images_vqa,
|
69
|
-
qwen2_vl_video_vqa,
|
70
|
-
video_temporal_localization,
|
71
|
-
flux_image_inpainting,
|
72
|
-
siglip_classification,
|
73
73
|
)
|
74
74
|
|
75
75
|
__new_tools__ = [
|
vision_agent/tools/tools.py
CHANGED
@@ -27,10 +27,7 @@ from vision_agent.tools.tool_utils import (
|
|
27
27
|
send_inference_request,
|
28
28
|
send_task_inference_request,
|
29
29
|
)
|
30
|
-
from vision_agent.tools.tools_types import
|
31
|
-
JobStatus,
|
32
|
-
ODResponseData,
|
33
|
-
)
|
30
|
+
from vision_agent.tools.tools_types import JobStatus, ODResponseData
|
34
31
|
from vision_agent.utils.exceptions import FineTuneModelIsNotReady
|
35
32
|
from vision_agent.utils.execute import FileSerializer, MimeType
|
36
33
|
from vision_agent.utils.image_utils import (
|
@@ -641,8 +638,8 @@ def loca_visual_prompt_counting(
|
|
641
638
|
|
642
639
|
Parameters:
|
643
640
|
image (np.ndarray): The image that contains lot of instances of a single object
|
644
|
-
|
645
|
-
|
641
|
+
visual_prompt (Dict[str, List[float]]): Bounding box of the object in
|
642
|
+
format [xmin, ymin, xmax, ymax]. Only 1 bounding box can be provided.
|
646
643
|
|
647
644
|
Returns:
|
648
645
|
Dict[str, Any]: A dictionary containing the key 'count' and the count as a
|
@@ -750,10 +747,10 @@ def countgd_example_based_counting(
|
|
750
747
|
|
751
748
|
Parameters:
|
752
749
|
visual_prompts (List[List[float]]): Bounding boxes of the object in format
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
750
|
+
[xmin, ymin, xmax, ymax]. Upto 3 bounding boxes can be provided. image
|
751
|
+
(np.ndarray): The image that contains multiple instances of the object.
|
752
|
+
box_threshold (float, optional): The threshold for detection. Defaults to
|
753
|
+
0.23.
|
757
754
|
|
758
755
|
Returns:
|
759
756
|
List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.191
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -54,11 +54,7 @@ Description-Content-Type: text/markdown
|
|
54
54
|
</div>
|
55
55
|
|
56
56
|
VisionAgent is a library that helps you utilize agent frameworks to generate code to
|
57
|
-
solve your vision task.
|
58
|
-
solve, you need to find the right model, figure out how to use it and program it to
|
59
|
-
accomplish the task you want. VisionAgent aims to provide an in-seconds experience by
|
60
|
-
allowing users to describe their problem in text and have the agent framework generate
|
61
|
-
code to solve the task for them. Check out our discord for updates and roadmaps!
|
57
|
+
solve your vision task. Check out our discord for updates and roadmaps!
|
62
58
|
|
63
59
|
## Table of Contents
|
64
60
|
- [🚀Quick Start](#quick-start)
|
@@ -82,19 +78,19 @@ To get started with the python library, you can install it using pip:
|
|
82
78
|
pip install vision-agent
|
83
79
|
```
|
84
80
|
|
85
|
-
Ensure you have an Anthropic key and an OpenAI API key and set in your environment
|
81
|
+
Ensure you have both an Anthropic key and an OpenAI API key and set in your environment
|
86
82
|
variables (if you are using Azure OpenAI please see the Azure setup section):
|
87
83
|
|
88
84
|
```bash
|
89
|
-
export ANTHROPIC_API_KEY="your-api-key"
|
90
|
-
export OPENAI_API_KEY="your-api-key"
|
85
|
+
export ANTHROPIC_API_KEY="your-api-key" # needed for VisionAgent and VisionAgentCoder
|
86
|
+
export OPENAI_API_KEY="your-api-key" # needed for ToolRecommender
|
91
87
|
```
|
92
88
|
|
93
89
|
### Basic Usage
|
94
90
|
To get started you can just import the `VisionAgent` and start chatting with it:
|
95
91
|
```python
|
96
92
|
>>> from vision_agent.agent import VisionAgent
|
97
|
-
>>> agent = VisionAgent()
|
93
|
+
>>> agent = VisionAgent(verbosity=2)
|
98
94
|
>>> resp = agent("Hello")
|
99
95
|
>>> print(resp)
|
100
96
|
[{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "{'thoughts': 'The user has greeted me. I will respond with a greeting and ask how I can assist them.', 'response': 'Hello! How can I assist you today?', 'let_user_respond': True}"}]
|
@@ -103,7 +99,7 @@ To get started you can just import the `VisionAgent` and start chatting with it:
|
|
103
99
|
```
|
104
100
|
|
105
101
|
The chat messages are similar to `OpenAI`'s format with `role` and `content` keys but
|
106
|
-
in addition to those you can add `
|
102
|
+
in addition to those you can add `media` which is a list of media files that can either
|
107
103
|
be images or video files.
|
108
104
|
|
109
105
|
## Documentation
|
@@ -3,9 +3,9 @@ vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xl
|
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=WYJF11PfKXlRMPnogGz3s7c2TlWoxoGzuLiIptVYE1s,5524
|
5
5
|
vision_agent/agent/vision_agent.py,sha256=rr1P9iTbr7OsjgMYWCeIxQYI4cLwPWia3NIMJNi-9Yo,26110
|
6
|
-
vision_agent/agent/vision_agent_coder.py,sha256=
|
6
|
+
vision_agent/agent/vision_agent_coder.py,sha256=7Ko1c41dvdDbSP_Yw2yz_SlE3sO6hhlpf_oCjW0we2w,31749
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
|
8
|
-
vision_agent/agent/vision_agent_planner.py,sha256=
|
8
|
+
vision_agent/agent/vision_agent_planner.py,sha256=Hy4vKiae7zIIKVPgLetGArbsjGRNVOXlxY9xhFgW-A0,18871
|
9
9
|
vision_agent/agent/vision_agent_planner_prompts.py,sha256=JDARUzko2HZdxkBtcy6wuP9DCCmbqhK_gnVgrjr6l1k,6691
|
10
10
|
vision_agent/agent/vision_agent_prompts.py,sha256=4329ll0kqCznRALIMl-rlKWGjN92p3bcRrz8R-cO744,13748
|
11
11
|
vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -16,11 +16,11 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
|
|
16
16
|
vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
|
17
17
|
vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
|
18
18
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
19
|
-
vision_agent/tools/__init__.py,sha256=
|
19
|
+
vision_agent/tools/__init__.py,sha256=UrpGFB1ACOZZCAyj8vNw0IHhKm9wGp0qHOtci2cqAMU,2825
|
20
20
|
vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
|
21
21
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
22
22
|
vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
|
23
|
-
vision_agent/tools/tools.py,sha256=
|
23
|
+
vision_agent/tools/tools.py,sha256=72Ml8kxtaqIqrh4hiZQ81f5Mrl-7z-W1a6bCjIMBvoA,84952
|
24
24
|
vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
25
25
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
26
26
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
29
29
|
vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
|
30
30
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
31
31
|
vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
32
|
+
vision_agent-0.2.191.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
33
|
+
vision_agent-0.2.191.dist-info/METADATA,sha256=eZGSUWuHBTmyStliR_oxFyoWMeLW0_0qP2ULx8y_-1E,18067
|
34
|
+
vision_agent-0.2.191.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
35
|
+
vision_agent-0.2.191.dist-info/RECORD,,
|
File without changes
|
File without changes
|