vision-agent 0.2.190__tar.gz → 0.2.192__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.190 → vision_agent-0.2.192}/PKG-INFO +7 -11
- {vision_agent-0.2.190 → vision_agent-0.2.192}/README.md +6 -10
- {vision_agent-0.2.190 → vision_agent-0.2.192}/pyproject.toml +1 -1
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent_coder.py +0 -3
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent_planner.py +2 -10
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/tools/tools.py +13 -10
- {vision_agent-0.2.190 → vision_agent-0.2.192}/LICENSE +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/tools/__init__.py +5 -5
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/tools/meta_tools.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.192
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -54,11 +54,7 @@ Description-Content-Type: text/markdown
|
|
54
54
|
</div>
|
55
55
|
|
56
56
|
VisionAgent is a library that helps you utilize agent frameworks to generate code to
|
57
|
-
solve your vision task.
|
58
|
-
solve, you need to find the right model, figure out how to use it and program it to
|
59
|
-
accomplish the task you want. VisionAgent aims to provide an in-seconds experience by
|
60
|
-
allowing users to describe their problem in text and have the agent framework generate
|
61
|
-
code to solve the task for them. Check out our discord for updates and roadmaps!
|
57
|
+
solve your vision task. Check out our discord for updates and roadmaps!
|
62
58
|
|
63
59
|
## Table of Contents
|
64
60
|
- [🚀Quick Start](#quick-start)
|
@@ -82,19 +78,19 @@ To get started with the python library, you can install it using pip:
|
|
82
78
|
pip install vision-agent
|
83
79
|
```
|
84
80
|
|
85
|
-
Ensure you have an Anthropic key and an OpenAI API key and set in your environment
|
81
|
+
Ensure you have both an Anthropic key and an OpenAI API key and set in your environment
|
86
82
|
variables (if you are using Azure OpenAI please see the Azure setup section):
|
87
83
|
|
88
84
|
```bash
|
89
|
-
export ANTHROPIC_API_KEY="your-api-key"
|
90
|
-
export OPENAI_API_KEY="your-api-key"
|
85
|
+
export ANTHROPIC_API_KEY="your-api-key" # needed for VisionAgent and VisionAgentCoder
|
86
|
+
export OPENAI_API_KEY="your-api-key" # needed for ToolRecommender
|
91
87
|
```
|
92
88
|
|
93
89
|
### Basic Usage
|
94
90
|
To get started you can just import the `VisionAgent` and start chatting with it:
|
95
91
|
```python
|
96
92
|
>>> from vision_agent.agent import VisionAgent
|
97
|
-
>>> agent = VisionAgent()
|
93
|
+
>>> agent = VisionAgent(verbosity=2)
|
98
94
|
>>> resp = agent("Hello")
|
99
95
|
>>> print(resp)
|
100
96
|
[{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "{'thoughts': 'The user has greeted me. I will respond with a greeting and ask how I can assist them.', 'response': 'Hello! How can I assist you today?', 'let_user_respond': True}"}]
|
@@ -103,7 +99,7 @@ To get started you can just import the `VisionAgent` and start chatting with it:
|
|
103
99
|
```
|
104
100
|
|
105
101
|
The chat messages are similar to `OpenAI`'s format with `role` and `content` keys but
|
106
|
-
in addition to those you can add `
|
102
|
+
in addition to those you can add `media` which is a list of media files that can either
|
107
103
|
be images or video files.
|
108
104
|
|
109
105
|
## Documentation
|
@@ -12,11 +12,7 @@
|
|
12
12
|
</div>
|
13
13
|
|
14
14
|
VisionAgent is a library that helps you utilize agent frameworks to generate code to
|
15
|
-
solve your vision task.
|
16
|
-
solve, you need to find the right model, figure out how to use it and program it to
|
17
|
-
accomplish the task you want. VisionAgent aims to provide an in-seconds experience by
|
18
|
-
allowing users to describe their problem in text and have the agent framework generate
|
19
|
-
code to solve the task for them. Check out our discord for updates and roadmaps!
|
15
|
+
solve your vision task. Check out our discord for updates and roadmaps!
|
20
16
|
|
21
17
|
## Table of Contents
|
22
18
|
- [🚀Quick Start](#quick-start)
|
@@ -40,19 +36,19 @@ To get started with the python library, you can install it using pip:
|
|
40
36
|
pip install vision-agent
|
41
37
|
```
|
42
38
|
|
43
|
-
Ensure you have an Anthropic key and an OpenAI API key and set in your environment
|
39
|
+
Ensure you have both an Anthropic key and an OpenAI API key and set in your environment
|
44
40
|
variables (if you are using Azure OpenAI please see the Azure setup section):
|
45
41
|
|
46
42
|
```bash
|
47
|
-
export ANTHROPIC_API_KEY="your-api-key"
|
48
|
-
export OPENAI_API_KEY="your-api-key"
|
43
|
+
export ANTHROPIC_API_KEY="your-api-key" # needed for VisionAgent and VisionAgentCoder
|
44
|
+
export OPENAI_API_KEY="your-api-key" # needed for ToolRecommender
|
49
45
|
```
|
50
46
|
|
51
47
|
### Basic Usage
|
52
48
|
To get started you can just import the `VisionAgent` and start chatting with it:
|
53
49
|
```python
|
54
50
|
>>> from vision_agent.agent import VisionAgent
|
55
|
-
>>> agent = VisionAgent()
|
51
|
+
>>> agent = VisionAgent(verbosity=2)
|
56
52
|
>>> resp = agent("Hello")
|
57
53
|
>>> print(resp)
|
58
54
|
[{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "{'thoughts': 'The user has greeted me. I will respond with a greeting and ask how I can assist them.', 'response': 'Hello! How can I assist you today?', 'let_user_respond': True}"}]
|
@@ -61,7 +57,7 @@ To get started you can just import the `VisionAgent` and start chatting with it:
|
|
61
57
|
```
|
62
58
|
|
63
59
|
The chat messages are similar to `OpenAI`'s format with `role` and `content` keys but
|
64
|
-
in addition to those you can add `
|
60
|
+
in addition to those you can add `media` which is a list of media files that can either
|
65
61
|
be images or video files.
|
66
62
|
|
67
63
|
## Documentation
|
@@ -527,9 +527,6 @@ class VisionAgentCoder(Agent):
|
|
527
527
|
[{"role": "user", "content": "describe your task here..."}].
|
528
528
|
plan_context (PlanContext): The context of the plan, including the plans,
|
529
529
|
best_plan, plan_thoughts, tool_doc, and tool_output.
|
530
|
-
test_multi_plan (bool): Whether to test multiple plans or just the best plan.
|
531
|
-
custom_tool_names (Optional[List[str]]): A list of custom tool names to use
|
532
|
-
for the planner.
|
533
530
|
|
534
531
|
Returns:
|
535
532
|
Dict[str, Any]: A dictionary containing the code output by the
|
@@ -519,11 +519,7 @@ class OpenAIVisionAgentPlanner(VisionAgentPlanner):
|
|
519
519
|
code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
|
520
520
|
) -> None:
|
521
521
|
super().__init__(
|
522
|
-
planner=(
|
523
|
-
OpenAILMM(temperature=0.0, json_mode=True)
|
524
|
-
if planner is None
|
525
|
-
else planner
|
526
|
-
),
|
522
|
+
planner=(OpenAILMM(temperature=0.0) if planner is None else planner),
|
527
523
|
tool_recommender=tool_recommender,
|
528
524
|
verbosity=verbosity,
|
529
525
|
report_progress_callback=report_progress_callback,
|
@@ -567,11 +563,7 @@ class AzureVisionAgentPlanner(VisionAgentPlanner):
|
|
567
563
|
code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
|
568
564
|
) -> None:
|
569
565
|
super().__init__(
|
570
|
-
planner=(
|
571
|
-
AzureOpenAILMM(temperature=0.0, json_mode=True)
|
572
|
-
if planner is None
|
573
|
-
else planner
|
574
|
-
),
|
566
|
+
planner=(AzureOpenAILMM(temperature=0.0) if planner is None else planner),
|
575
567
|
tool_recommender=(
|
576
568
|
AzureSim(T.TOOLS_DF, sim_key="desc")
|
577
569
|
if tool_recommender is None
|
@@ -27,10 +27,7 @@ from vision_agent.tools.tool_utils import (
|
|
27
27
|
send_inference_request,
|
28
28
|
send_task_inference_request,
|
29
29
|
)
|
30
|
-
from vision_agent.tools.tools_types import
|
31
|
-
JobStatus,
|
32
|
-
ODResponseData,
|
33
|
-
)
|
30
|
+
from vision_agent.tools.tools_types import JobStatus, ODResponseData
|
34
31
|
from vision_agent.utils.exceptions import FineTuneModelIsNotReady
|
35
32
|
from vision_agent.utils.execute import FileSerializer, MimeType
|
36
33
|
from vision_agent.utils.image_utils import (
|
@@ -641,8 +638,8 @@ def loca_visual_prompt_counting(
|
|
641
638
|
|
642
639
|
Parameters:
|
643
640
|
image (np.ndarray): The image that contains lot of instances of a single object
|
644
|
-
|
645
|
-
|
641
|
+
visual_prompt (Dict[str, List[float]]): Bounding box of the object in
|
642
|
+
format [xmin, ymin, xmax, ymax]. Only 1 bounding box can be provided.
|
646
643
|
|
647
644
|
Returns:
|
648
645
|
Dict[str, Any]: A dictionary containing the key 'count' and the count as a
|
@@ -750,10 +747,10 @@ def countgd_example_based_counting(
|
|
750
747
|
|
751
748
|
Parameters:
|
752
749
|
visual_prompts (List[List[float]]): Bounding boxes of the object in format
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
750
|
+
[xmin, ymin, xmax, ymax]. Upto 3 bounding boxes can be provided. image
|
751
|
+
(np.ndarray): The image that contains multiple instances of the object.
|
752
|
+
box_threshold (float, optional): The threshold for detection. Defaults to
|
753
|
+
0.23.
|
757
754
|
|
758
755
|
Returns:
|
759
756
|
List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
|
@@ -1809,6 +1806,12 @@ def flux_image_inpainting(
|
|
1809
1806
|
):
|
1810
1807
|
raise ValueError("The image or mask does not have enough size for inpainting")
|
1811
1808
|
|
1809
|
+
if image.shape[0] % 8 != 0 or image.shape[1] % 8 != 0:
|
1810
|
+
new_height = (image.shape[0] // 8) * 8
|
1811
|
+
new_width = (image.shape[1] // 8) * 8
|
1812
|
+
image = cv2.resize(image, (new_width, new_height))
|
1813
|
+
mask = cv2.resize(mask, (new_width, new_height))
|
1814
|
+
|
1812
1815
|
if np.array_equal(mask, mask.astype(bool).astype(int)):
|
1813
1816
|
mask = np.where(mask > 0, 255, 0).astype(np.uint8)
|
1814
1817
|
else:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
{vision_agent-0.2.190 → vision_agent-0.2.192}/vision_agent/agent/vision_agent_planner_prompts.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -40,6 +40,7 @@ from .tools import (
|
|
40
40
|
florence2_roberta_vqa,
|
41
41
|
florence2_sam2_image,
|
42
42
|
florence2_sam2_video_tracking,
|
43
|
+
flux_image_inpainting,
|
43
44
|
generate_pose_image,
|
44
45
|
generate_soft_edge_image,
|
45
46
|
get_tool_documentation,
|
@@ -59,17 +60,16 @@ from .tools import (
|
|
59
60
|
overlay_segmentation_masks,
|
60
61
|
owl_v2_image,
|
61
62
|
owl_v2_video,
|
63
|
+
qwen2_vl_images_vqa,
|
64
|
+
qwen2_vl_video_vqa,
|
62
65
|
save_image,
|
63
66
|
save_json,
|
64
67
|
save_video,
|
68
|
+
siglip_classification,
|
65
69
|
template_match,
|
70
|
+
video_temporal_localization,
|
66
71
|
vit_image_classification,
|
67
72
|
vit_nsfw_classification,
|
68
|
-
qwen2_vl_images_vqa,
|
69
|
-
qwen2_vl_video_vqa,
|
70
|
-
video_temporal_localization,
|
71
|
-
flux_image_inpainting,
|
72
|
-
siglip_classification,
|
73
73
|
)
|
74
74
|
|
75
75
|
__new_tools__ = [
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|