vision-agent 0.2.47__tar.gz → 0.2.78__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vision_agent-0.2.47 → vision_agent-0.2.78}/PKG-INFO +85 -26
- {vision_agent-0.2.47 → vision_agent-0.2.78}/README.md +80 -23
- {vision_agent-0.2.47 → vision_agent-0.2.78}/pyproject.toml +5 -3
- vision_agent-0.2.78/vision_agent/__init__.py +2 -0
- vision_agent-0.2.78/vision_agent/agent/__init__.py +2 -0
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/agent/agent.py +3 -1
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/agent/vision_agent.py +345 -177
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/agent/vision_agent_prompts.py +4 -2
- vision_agent-0.2.78/vision_agent/lmm/__init__.py +1 -0
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/lmm/lmm.py +150 -116
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/tools/__init__.py +19 -6
- vision_agent-0.2.78/vision_agent/tools/tool_utils.py +67 -0
- vision_agent-0.2.78/vision_agent/tools/tools.py +1309 -0
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/utils/__init__.py +1 -1
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/utils/execute.py +41 -25
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/utils/sim.py +45 -3
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/utils/video.py +3 -5
- vision_agent-0.2.47/vision_agent/__init__.py +0 -3
- vision_agent-0.2.47/vision_agent/agent/__init__.py +0 -2
- vision_agent-0.2.47/vision_agent/agent/agent_coder.py +0 -216
- vision_agent-0.2.47/vision_agent/agent/agent_coder_prompts.py +0 -135
- vision_agent-0.2.47/vision_agent/agent/data_interpreter.py +0 -475
- vision_agent-0.2.47/vision_agent/agent/data_interpreter_prompts.py +0 -186
- vision_agent-0.2.47/vision_agent/agent/easytool.py +0 -346
- vision_agent-0.2.47/vision_agent/agent/easytool_prompts.py +0 -89
- vision_agent-0.2.47/vision_agent/agent/easytool_v2.py +0 -778
- vision_agent-0.2.47/vision_agent/agent/easytool_v2_prompts.py +0 -152
- vision_agent-0.2.47/vision_agent/agent/reflexion.py +0 -299
- vision_agent-0.2.47/vision_agent/agent/reflexion_prompts.py +0 -100
- vision_agent-0.2.47/vision_agent/llm/__init__.py +0 -1
- vision_agent-0.2.47/vision_agent/llm/llm.py +0 -176
- vision_agent-0.2.47/vision_agent/lmm/__init__.py +0 -1
- vision_agent-0.2.47/vision_agent/tools/easytool_tools.py +0 -1242
- vision_agent-0.2.47/vision_agent/tools/tool_utils.py +0 -30
- vision_agent-0.2.47/vision_agent/tools/tools.py +0 -826
- {vision_agent-0.2.47 → vision_agent-0.2.78}/LICENSE +0 -0
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/utils/type_defs.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.78
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -9,8 +9,8 @@ Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: e2b (>=0.17.
|
13
|
-
Requires-Dist: e2b-code-interpreter (
|
12
|
+
Requires-Dist: e2b (>=0.17.1,<0.18.0)
|
13
|
+
Requires-Dist: e2b-code-interpreter (==0.0.11a1)
|
14
14
|
Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
|
15
15
|
Requires-Dist: langsmith (>=0.1.58,<0.2.0)
|
16
16
|
Requires-Dist: moviepy (>=1.0.0,<2.0.0)
|
@@ -21,7 +21,9 @@ Requires-Dist: openai (>=1.0.0,<2.0.0)
|
|
21
21
|
Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
|
22
22
|
Requires-Dist: pandas (>=2.0.0,<3.0.0)
|
23
23
|
Requires-Dist: pillow (>=10.0.0,<11.0.0)
|
24
|
+
Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
|
24
25
|
Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
|
26
|
+
Requires-Dist: pytube (==15.0.0)
|
25
27
|
Requires-Dist: requests (>=2.0.0,<3.0.0)
|
26
28
|
Requires-Dist: rich (>=13.7.1,<14.0.0)
|
27
29
|
Requires-Dist: scipy (>=1.13.0,<1.14.0)
|
@@ -38,7 +40,6 @@ Description-Content-Type: text/markdown
|
|
38
40
|
<img alt="vision_agent" height="200px" src="https://github.com/landing-ai/vision-agent/blob/main/assets/logo.jpg?raw=true">
|
39
41
|
|
40
42
|
# 🔍🤖 Vision Agent
|
41
|
-
|
42
43
|
[](https://discord.gg/wPdN8RCYew)
|
43
44
|

|
44
45
|
[](https://badge.fury.io/py/vision-agent)
|
@@ -52,9 +53,14 @@ accomplish the task you want. Vision Agent aims to provide an in-seconds experie
|
|
52
53
|
allowing users to describe their problem in text and have the agent framework generate
|
53
54
|
code to solve the task for them. Check out our discord for updates and roadmaps!
|
54
55
|
|
56
|
+
|
57
|
+
## Web Application
|
58
|
+
|
59
|
+
Try Vision Agent live on [va.landing.ai](https://va.landing.ai/)
|
60
|
+
|
55
61
|
## Documentation
|
56
62
|
|
57
|
-
|
63
|
+
[Vision Agent Library Docs](https://landing-ai.github.io/vision-agent/)
|
58
64
|
|
59
65
|
|
60
66
|
## Getting Started
|
@@ -72,7 +78,11 @@ using Azure OpenAI please see the Azure setup section):
|
|
72
78
|
export OPENAI_API_KEY="your-api-key"
|
73
79
|
```
|
74
80
|
|
81
|
+
### Important Note on API Usage
|
82
|
+
Please be aware that using the API in this project requires you to have API credits (minimum of five US dollars). This is different from the OpenAI subscription used in this chatbot. If you don't have credit, further information can be found [here](https://github.com/landing-ai/vision-agent?tab=readme-ov-file#how-to-get-started-with-openai-api-credits)
|
83
|
+
|
75
84
|
### Vision Agent
|
85
|
+
#### Basic Usage
|
76
86
|
You can interact with the agent as you would with any LLM or LMM model:
|
77
87
|
|
78
88
|
```python
|
@@ -88,28 +98,28 @@ from vision_agent.tools import load_image, grounding_sam
|
|
88
98
|
def calculate_filled_percentage(image_path: str) -> float:
|
89
99
|
# Step 1: Load the image
|
90
100
|
image = load_image(image_path)
|
91
|
-
|
101
|
+
|
92
102
|
# Step 2: Segment the jar
|
93
103
|
jar_segments = grounding_sam(prompt="jar", image=image)
|
94
|
-
|
104
|
+
|
95
105
|
# Step 3: Segment the coffee beans
|
96
106
|
coffee_beans_segments = grounding_sam(prompt="coffee beans", image=image)
|
97
|
-
|
107
|
+
|
98
108
|
# Step 4: Calculate the area of the segmented jar
|
99
109
|
jar_area = 0
|
100
110
|
for segment in jar_segments:
|
101
111
|
jar_area += segment['mask'].sum()
|
102
|
-
|
112
|
+
|
103
113
|
# Step 5: Calculate the area of the segmented coffee beans
|
104
114
|
coffee_beans_area = 0
|
105
115
|
for segment in coffee_beans_segments:
|
106
116
|
coffee_beans_area += segment['mask'].sum()
|
107
|
-
|
117
|
+
|
108
118
|
# Step 6: Compute the percentage of the jar area that is filled with coffee beans
|
109
119
|
if jar_area == 0:
|
110
120
|
return 0.0 # To avoid division by zero
|
111
121
|
filled_percentage = (coffee_beans_area / jar_area) * 100
|
112
|
-
|
122
|
+
|
113
123
|
# Step 7: Return the computed percentage
|
114
124
|
return filled_percentage
|
115
125
|
```
|
@@ -121,10 +131,12 @@ mode by passing in the verbose argument:
|
|
121
131
|
>>> agent = VisionAgent(verbose=2)
|
122
132
|
```
|
123
133
|
|
124
|
-
|
134
|
+
#### Detailed Usage
|
135
|
+
You can also have it return more information by calling `chat_with_workflow`. The format
|
136
|
+
of the input is a list of dictionaries with the keys `role`, `content`, and `media`:
|
125
137
|
|
126
138
|
```python
|
127
|
-
>>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?"
|
139
|
+
>>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
|
128
140
|
>>> print(results)
|
129
141
|
{
|
130
142
|
"code": "from vision_agent.tools import ..."
|
@@ -135,19 +147,45 @@ You can also have it return more information by calling `chat_with_workflow`:
|
|
135
147
|
}
|
136
148
|
```
|
137
149
|
|
138
|
-
With this you can examine more detailed information such as the
|
150
|
+
With this you can examine more detailed information such as the testing code, testing
|
139
151
|
results, plan or working memory it used to complete the task.
|
140
152
|
|
153
|
+
#### Multi-turn conversations
|
154
|
+
You can have multi-turn conversations with vision-agent as well, giving it feedback on
|
155
|
+
the code and having it update. You just need to add the code as a response from the
|
156
|
+
assistant:
|
157
|
+
|
158
|
+
```python
|
159
|
+
agent = va.agent.VisionAgent(verbosity=2)
|
160
|
+
conv = [
|
161
|
+
{
|
162
|
+
"role": "user",
|
163
|
+
"content": "Are these workers wearing safety gear? Output only a True or False value.",
|
164
|
+
"media": ["workers.png"],
|
165
|
+
}
|
166
|
+
]
|
167
|
+
result = agent.chat_with_workflow(conv)
|
168
|
+
code = result["code"]
|
169
|
+
conv.append({"role": "assistant", "content": code})
|
170
|
+
conv.append(
|
171
|
+
{
|
172
|
+
"role": "user",
|
173
|
+
"content": "Can you also return the number of workers wearing safety gear?",
|
174
|
+
}
|
175
|
+
)
|
176
|
+
result = agent.chat_with_workflow(conv)
|
177
|
+
```
|
178
|
+
|
141
179
|
### Tools
|
142
180
|
There are a variety of tools for the model or the user to use. Some are executed locally
|
143
|
-
while others are hosted for you. You can also ask an
|
181
|
+
while others are hosted for you. You can also ask an LMM directly to build a tool for
|
144
182
|
you. For example:
|
145
183
|
|
146
184
|
```python
|
147
185
|
>>> import vision_agent as va
|
148
|
-
>>>
|
149
|
-
>>> detector =
|
150
|
-
>>> detector("jar.jpg")
|
186
|
+
>>> lmm = va.lmm.OpenAILMM()
|
187
|
+
>>> detector = lmm.generate_detector("Can you build a jar detector for me?")
|
188
|
+
>>> detector(va.tools.load_image("jar.jpg"))
|
151
189
|
[{"labels": ["jar",],
|
152
190
|
"scores": [0.99],
|
153
191
|
"bboxes": [
|
@@ -185,23 +223,44 @@ ensure the documentation is in the same format above with description, `Paramete
|
|
185
223
|
`Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
|
186
224
|
|
187
225
|
### Azure Setup
|
188
|
-
If you want to use Azure OpenAI models, you
|
226
|
+
If you want to use Azure OpenAI models, you need to have two OpenAI model deployments:
|
227
|
+
|
228
|
+
1. OpenAI GPT-4o model
|
229
|
+
2. OpenAI text embedding model
|
230
|
+
|
231
|
+
<img width="1201" alt="Screenshot 2024-06-12 at 5 54 48 PM" src="https://github.com/landing-ai/vision-agent/assets/2736300/da125592-b01d-45bc-bc99-d48c9dcdfa32">
|
232
|
+
|
233
|
+
Then you can set the following environment variables:
|
189
234
|
|
190
235
|
```bash
|
191
236
|
export AZURE_OPENAI_API_KEY="your-api-key"
|
192
237
|
export AZURE_OPENAI_ENDPOINT="your-endpoint"
|
238
|
+
# The deployment name of your Azure OpenAI chat model
|
239
|
+
export AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME="your_gpt4o_model_deployment_name"
|
240
|
+
# The deployment name of your Azure OpenAI text embedding model
|
241
|
+
export AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME="your_embedding_model_deployment_name"
|
193
242
|
```
|
194
243
|
|
244
|
+
> NOTE: make sure your Azure model deployment have enough quota (token per minute) to support it. The default value 8000TPM is not enough.
|
245
|
+
|
195
246
|
You can then run Vision Agent using the Azure OpenAI models:
|
196
247
|
|
197
248
|
```python
|
198
|
-
|
199
|
-
|
200
|
-
>>> planner=va.llm.AzureOpenAILLM(),
|
201
|
-
>>> coder=va.lmm.AzureOpenAILMM(),
|
202
|
-
>>> tester=va.lmm.AzureOpenAILMM(),
|
203
|
-
>>> debugger=va.lmm.AzureOpenAILMM(),
|
204
|
-
>>> )
|
249
|
+
import vision_agent as va
|
250
|
+
agent = va.agent.AzureVisionAgent()
|
205
251
|
```
|
206
252
|
|
253
|
+
******************************************************************************************************************************
|
254
|
+
|
255
|
+
### Q&A
|
256
|
+
|
257
|
+
#### How to get started with OpenAI API credits
|
258
|
+
|
259
|
+
1. Visit the[OpenAI API platform](https://beta.openai.com/signup/) to sign up for an API key.
|
260
|
+
2. Follow the instructions to purchase and manage your API credits.
|
261
|
+
3. Ensure your API key is correctly configured in your project settings.
|
262
|
+
|
263
|
+
Failure to have sufficient API credits may result in limited or no functionality for the features that rely on the OpenAI API.
|
264
|
+
|
265
|
+
For more details on managing your API usage and credits, please refer to the OpenAI API documentation.
|
207
266
|
|
@@ -2,7 +2,6 @@
|
|
2
2
|
<img alt="vision_agent" height="200px" src="https://github.com/landing-ai/vision-agent/blob/main/assets/logo.jpg?raw=true">
|
3
3
|
|
4
4
|
# 🔍🤖 Vision Agent
|
5
|
-
|
6
5
|
[](https://discord.gg/wPdN8RCYew)
|
7
6
|

|
8
7
|
[](https://badge.fury.io/py/vision-agent)
|
@@ -16,9 +15,14 @@ accomplish the task you want. Vision Agent aims to provide an in-seconds experie
|
|
16
15
|
allowing users to describe their problem in text and have the agent framework generate
|
17
16
|
code to solve the task for them. Check out our discord for updates and roadmaps!
|
18
17
|
|
18
|
+
|
19
|
+
## Web Application
|
20
|
+
|
21
|
+
Try Vision Agent live on [va.landing.ai](https://va.landing.ai/)
|
22
|
+
|
19
23
|
## Documentation
|
20
24
|
|
21
|
-
|
25
|
+
[Vision Agent Library Docs](https://landing-ai.github.io/vision-agent/)
|
22
26
|
|
23
27
|
|
24
28
|
## Getting Started
|
@@ -36,7 +40,11 @@ using Azure OpenAI please see the Azure setup section):
|
|
36
40
|
export OPENAI_API_KEY="your-api-key"
|
37
41
|
```
|
38
42
|
|
43
|
+
### Important Note on API Usage
|
44
|
+
Please be aware that using the API in this project requires you to have API credits (minimum of five US dollars). This is different from the OpenAI subscription used in this chatbot. If you don't have credit, further information can be found [here](https://github.com/landing-ai/vision-agent?tab=readme-ov-file#how-to-get-started-with-openai-api-credits)
|
45
|
+
|
39
46
|
### Vision Agent
|
47
|
+
#### Basic Usage
|
40
48
|
You can interact with the agent as you would with any LLM or LMM model:
|
41
49
|
|
42
50
|
```python
|
@@ -52,28 +60,28 @@ from vision_agent.tools import load_image, grounding_sam
|
|
52
60
|
def calculate_filled_percentage(image_path: str) -> float:
|
53
61
|
# Step 1: Load the image
|
54
62
|
image = load_image(image_path)
|
55
|
-
|
63
|
+
|
56
64
|
# Step 2: Segment the jar
|
57
65
|
jar_segments = grounding_sam(prompt="jar", image=image)
|
58
|
-
|
66
|
+
|
59
67
|
# Step 3: Segment the coffee beans
|
60
68
|
coffee_beans_segments = grounding_sam(prompt="coffee beans", image=image)
|
61
|
-
|
69
|
+
|
62
70
|
# Step 4: Calculate the area of the segmented jar
|
63
71
|
jar_area = 0
|
64
72
|
for segment in jar_segments:
|
65
73
|
jar_area += segment['mask'].sum()
|
66
|
-
|
74
|
+
|
67
75
|
# Step 5: Calculate the area of the segmented coffee beans
|
68
76
|
coffee_beans_area = 0
|
69
77
|
for segment in coffee_beans_segments:
|
70
78
|
coffee_beans_area += segment['mask'].sum()
|
71
|
-
|
79
|
+
|
72
80
|
# Step 6: Compute the percentage of the jar area that is filled with coffee beans
|
73
81
|
if jar_area == 0:
|
74
82
|
return 0.0 # To avoid division by zero
|
75
83
|
filled_percentage = (coffee_beans_area / jar_area) * 100
|
76
|
-
|
84
|
+
|
77
85
|
# Step 7: Return the computed percentage
|
78
86
|
return filled_percentage
|
79
87
|
```
|
@@ -85,10 +93,12 @@ mode by passing in the verbose argument:
|
|
85
93
|
>>> agent = VisionAgent(verbose=2)
|
86
94
|
```
|
87
95
|
|
88
|
-
|
96
|
+
#### Detailed Usage
|
97
|
+
You can also have it return more information by calling `chat_with_workflow`. The format
|
98
|
+
of the input is a list of dictionaries with the keys `role`, `content`, and `media`:
|
89
99
|
|
90
100
|
```python
|
91
|
-
>>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?"
|
101
|
+
>>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
|
92
102
|
>>> print(results)
|
93
103
|
{
|
94
104
|
"code": "from vision_agent.tools import ..."
|
@@ -99,19 +109,45 @@ You can also have it return more information by calling `chat_with_workflow`:
|
|
99
109
|
}
|
100
110
|
```
|
101
111
|
|
102
|
-
With this you can examine more detailed information such as the
|
112
|
+
With this you can examine more detailed information such as the testing code, testing
|
103
113
|
results, plan or working memory it used to complete the task.
|
104
114
|
|
115
|
+
#### Multi-turn conversations
|
116
|
+
You can have multi-turn conversations with vision-agent as well, giving it feedback on
|
117
|
+
the code and having it update. You just need to add the code as a response from the
|
118
|
+
assistant:
|
119
|
+
|
120
|
+
```python
|
121
|
+
agent = va.agent.VisionAgent(verbosity=2)
|
122
|
+
conv = [
|
123
|
+
{
|
124
|
+
"role": "user",
|
125
|
+
"content": "Are these workers wearing safety gear? Output only a True or False value.",
|
126
|
+
"media": ["workers.png"],
|
127
|
+
}
|
128
|
+
]
|
129
|
+
result = agent.chat_with_workflow(conv)
|
130
|
+
code = result["code"]
|
131
|
+
conv.append({"role": "assistant", "content": code})
|
132
|
+
conv.append(
|
133
|
+
{
|
134
|
+
"role": "user",
|
135
|
+
"content": "Can you also return the number of workers wearing safety gear?",
|
136
|
+
}
|
137
|
+
)
|
138
|
+
result = agent.chat_with_workflow(conv)
|
139
|
+
```
|
140
|
+
|
105
141
|
### Tools
|
106
142
|
There are a variety of tools for the model or the user to use. Some are executed locally
|
107
|
-
while others are hosted for you. You can also ask an
|
143
|
+
while others are hosted for you. You can also ask an LMM directly to build a tool for
|
108
144
|
you. For example:
|
109
145
|
|
110
146
|
```python
|
111
147
|
>>> import vision_agent as va
|
112
|
-
>>>
|
113
|
-
>>> detector =
|
114
|
-
>>> detector("jar.jpg")
|
148
|
+
>>> lmm = va.lmm.OpenAILMM()
|
149
|
+
>>> detector = lmm.generate_detector("Can you build a jar detector for me?")
|
150
|
+
>>> detector(va.tools.load_image("jar.jpg"))
|
115
151
|
[{"labels": ["jar",],
|
116
152
|
"scores": [0.99],
|
117
153
|
"bboxes": [
|
@@ -149,22 +185,43 @@ ensure the documentation is in the same format above with description, `Paramete
|
|
149
185
|
`Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
|
150
186
|
|
151
187
|
### Azure Setup
|
152
|
-
If you want to use Azure OpenAI models, you
|
188
|
+
If you want to use Azure OpenAI models, you need to have two OpenAI model deployments:
|
189
|
+
|
190
|
+
1. OpenAI GPT-4o model
|
191
|
+
2. OpenAI text embedding model
|
192
|
+
|
193
|
+
<img width="1201" alt="Screenshot 2024-06-12 at 5 54 48 PM" src="https://github.com/landing-ai/vision-agent/assets/2736300/da125592-b01d-45bc-bc99-d48c9dcdfa32">
|
194
|
+
|
195
|
+
Then you can set the following environment variables:
|
153
196
|
|
154
197
|
```bash
|
155
198
|
export AZURE_OPENAI_API_KEY="your-api-key"
|
156
199
|
export AZURE_OPENAI_ENDPOINT="your-endpoint"
|
200
|
+
# The deployment name of your Azure OpenAI chat model
|
201
|
+
export AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME="your_gpt4o_model_deployment_name"
|
202
|
+
# The deployment name of your Azure OpenAI text embedding model
|
203
|
+
export AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME="your_embedding_model_deployment_name"
|
157
204
|
```
|
158
205
|
|
206
|
+
> NOTE: make sure your Azure model deployment have enough quota (token per minute) to support it. The default value 8000TPM is not enough.
|
207
|
+
|
159
208
|
You can then run Vision Agent using the Azure OpenAI models:
|
160
209
|
|
161
210
|
```python
|
162
|
-
|
163
|
-
|
164
|
-
>>> planner=va.llm.AzureOpenAILLM(),
|
165
|
-
>>> coder=va.lmm.AzureOpenAILMM(),
|
166
|
-
>>> tester=va.lmm.AzureOpenAILMM(),
|
167
|
-
>>> debugger=va.lmm.AzureOpenAILMM(),
|
168
|
-
>>> )
|
211
|
+
import vision_agent as va
|
212
|
+
agent = va.agent.AzureVisionAgent()
|
169
213
|
```
|
170
214
|
|
215
|
+
******************************************************************************************************************************
|
216
|
+
|
217
|
+
### Q&A
|
218
|
+
|
219
|
+
#### How to get started with OpenAI API credits
|
220
|
+
|
221
|
+
1. Visit the[OpenAI API platform](https://beta.openai.com/signup/) to sign up for an API key.
|
222
|
+
2. Follow the instructions to purchase and manage your API credits.
|
223
|
+
3. Ensure your API key is correctly configured in your project settings.
|
224
|
+
|
225
|
+
Failure to have sufficient API credits may result in limited or no functionality for the features that rely on the OpenAI API.
|
226
|
+
|
227
|
+
For more details on managing your API usage and credits, please refer to the OpenAI API documentation.
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
4
4
|
|
5
5
|
[tool.poetry]
|
6
6
|
name = "vision-agent"
|
7
|
-
version = "0.2.
|
7
|
+
version = "0.2.78"
|
8
8
|
description = "Toolset for Vision Agent"
|
9
9
|
authors = ["Landing AI <dev@landing.ai>"]
|
10
10
|
readme = "README.md"
|
@@ -34,9 +34,11 @@ nbformat = "^5.10.4"
|
|
34
34
|
rich = "^13.7.1"
|
35
35
|
langsmith = "^0.1.58"
|
36
36
|
ipykernel = "^6.29.4"
|
37
|
-
e2b = "^0.17.
|
38
|
-
e2b-code-interpreter = "
|
37
|
+
e2b = "^0.17.1"
|
38
|
+
e2b-code-interpreter = "0.0.11a1"
|
39
39
|
tenacity = "^8.3.0"
|
40
|
+
pillow-heif = "^0.16.0"
|
41
|
+
pytube = "15.0.0"
|
40
42
|
|
41
43
|
[tool.poetry.group.dev.dependencies]
|
42
44
|
autoflake = "1.*"
|
@@ -2,12 +2,14 @@ from abc import ABC, abstractmethod
|
|
2
2
|
from pathlib import Path
|
3
3
|
from typing import Any, Dict, List, Optional, Union
|
4
4
|
|
5
|
+
from vision_agent.lmm import Message
|
6
|
+
|
5
7
|
|
6
8
|
class Agent(ABC):
|
7
9
|
@abstractmethod
|
8
10
|
def __call__(
|
9
11
|
self,
|
10
|
-
input: Union[
|
12
|
+
input: Union[str, List[Message]],
|
11
13
|
media: Optional[Union[str, Path]] = None,
|
12
14
|
) -> str:
|
13
15
|
pass
|