vision-agent 0.2.47__tar.gz → 0.2.78__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {vision_agent-0.2.47 → vision_agent-0.2.78}/PKG-INFO +85 -26
  2. {vision_agent-0.2.47 → vision_agent-0.2.78}/README.md +80 -23
  3. {vision_agent-0.2.47 → vision_agent-0.2.78}/pyproject.toml +5 -3
  4. vision_agent-0.2.78/vision_agent/__init__.py +2 -0
  5. vision_agent-0.2.78/vision_agent/agent/__init__.py +2 -0
  6. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/agent/agent.py +3 -1
  7. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/agent/vision_agent.py +345 -177
  8. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/agent/vision_agent_prompts.py +4 -2
  9. vision_agent-0.2.78/vision_agent/lmm/__init__.py +1 -0
  10. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/lmm/lmm.py +150 -116
  11. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/tools/__init__.py +19 -6
  12. vision_agent-0.2.78/vision_agent/tools/tool_utils.py +67 -0
  13. vision_agent-0.2.78/vision_agent/tools/tools.py +1309 -0
  14. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/utils/__init__.py +1 -1
  15. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/utils/execute.py +41 -25
  16. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/utils/sim.py +45 -3
  17. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/utils/video.py +3 -5
  18. vision_agent-0.2.47/vision_agent/__init__.py +0 -3
  19. vision_agent-0.2.47/vision_agent/agent/__init__.py +0 -2
  20. vision_agent-0.2.47/vision_agent/agent/agent_coder.py +0 -216
  21. vision_agent-0.2.47/vision_agent/agent/agent_coder_prompts.py +0 -135
  22. vision_agent-0.2.47/vision_agent/agent/data_interpreter.py +0 -475
  23. vision_agent-0.2.47/vision_agent/agent/data_interpreter_prompts.py +0 -186
  24. vision_agent-0.2.47/vision_agent/agent/easytool.py +0 -346
  25. vision_agent-0.2.47/vision_agent/agent/easytool_prompts.py +0 -89
  26. vision_agent-0.2.47/vision_agent/agent/easytool_v2.py +0 -778
  27. vision_agent-0.2.47/vision_agent/agent/easytool_v2_prompts.py +0 -152
  28. vision_agent-0.2.47/vision_agent/agent/reflexion.py +0 -299
  29. vision_agent-0.2.47/vision_agent/agent/reflexion_prompts.py +0 -100
  30. vision_agent-0.2.47/vision_agent/llm/__init__.py +0 -1
  31. vision_agent-0.2.47/vision_agent/llm/llm.py +0 -176
  32. vision_agent-0.2.47/vision_agent/lmm/__init__.py +0 -1
  33. vision_agent-0.2.47/vision_agent/tools/easytool_tools.py +0 -1242
  34. vision_agent-0.2.47/vision_agent/tools/tool_utils.py +0 -30
  35. vision_agent-0.2.47/vision_agent/tools/tools.py +0 -826
  36. {vision_agent-0.2.47 → vision_agent-0.2.78}/LICENSE +0 -0
  37. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/fonts/__init__.py +0 -0
  38. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  39. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/tools/prompts.py +0 -0
  40. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/utils/image_utils.py +0 -0
  41. {vision_agent-0.2.47 → vision_agent-0.2.78}/vision_agent/utils/type_defs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.47
3
+ Version: 0.2.78
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -9,8 +9,8 @@ Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: e2b (>=0.17.0,<0.18.0)
13
- Requires-Dist: e2b-code-interpreter (>=0.0.7,<0.0.8)
12
+ Requires-Dist: e2b (>=0.17.1,<0.18.0)
13
+ Requires-Dist: e2b-code-interpreter (==0.0.11a1)
14
14
  Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
15
15
  Requires-Dist: langsmith (>=0.1.58,<0.2.0)
16
16
  Requires-Dist: moviepy (>=1.0.0,<2.0.0)
@@ -21,7 +21,9 @@ Requires-Dist: openai (>=1.0.0,<2.0.0)
21
21
  Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
22
22
  Requires-Dist: pandas (>=2.0.0,<3.0.0)
23
23
  Requires-Dist: pillow (>=10.0.0,<11.0.0)
24
+ Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
24
25
  Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
26
+ Requires-Dist: pytube (==15.0.0)
25
27
  Requires-Dist: requests (>=2.0.0,<3.0.0)
26
28
  Requires-Dist: rich (>=13.7.1,<14.0.0)
27
29
  Requires-Dist: scipy (>=1.13.0,<1.14.0)
@@ -38,7 +40,6 @@ Description-Content-Type: text/markdown
38
40
  <img alt="vision_agent" height="200px" src="https://github.com/landing-ai/vision-agent/blob/main/assets/logo.jpg?raw=true">
39
41
 
40
42
  # 🔍🤖 Vision Agent
41
-
42
43
  [![](https://dcbadge.vercel.app/api/server/wPdN8RCYew?compact=true&style=flat)](https://discord.gg/wPdN8RCYew)
43
44
  ![ci_status](https://github.com/landing-ai/vision-agent/actions/workflows/ci_cd.yml/badge.svg)
44
45
  [![PyPI version](https://badge.fury.io/py/vision-agent.svg)](https://badge.fury.io/py/vision-agent)
@@ -52,9 +53,14 @@ accomplish the task you want. Vision Agent aims to provide an in-seconds experie
52
53
  allowing users to describe their problem in text and have the agent framework generate
53
54
  code to solve the task for them. Check out our discord for updates and roadmaps!
54
55
 
56
+
57
+ ## Web Application
58
+
59
+ Try Vision Agent live on [va.landing.ai](https://va.landing.ai/)
60
+
55
61
  ## Documentation
56
62
 
57
- - [Vision Agent Library Docs](https://landing-ai.github.io/vision-agent/)
63
+ [Vision Agent Library Docs](https://landing-ai.github.io/vision-agent/)
58
64
 
59
65
 
60
66
  ## Getting Started
@@ -72,7 +78,11 @@ using Azure OpenAI please see the Azure setup section):
72
78
  export OPENAI_API_KEY="your-api-key"
73
79
  ```
74
80
 
81
+ ### Important Note on API Usage
82
+ Please be aware that using the API in this project requires you to have API credits (minimum of five US dollars). This is different from the OpenAI subscription used in this chatbot. If you don't have credit, further information can be found [here](https://github.com/landing-ai/vision-agent?tab=readme-ov-file#how-to-get-started-with-openai-api-credits)
83
+
75
84
  ### Vision Agent
85
+ #### Basic Usage
76
86
  You can interact with the agent as you would with any LLM or LMM model:
77
87
 
78
88
  ```python
@@ -88,28 +98,28 @@ from vision_agent.tools import load_image, grounding_sam
88
98
  def calculate_filled_percentage(image_path: str) -> float:
89
99
  # Step 1: Load the image
90
100
  image = load_image(image_path)
91
-
101
+
92
102
  # Step 2: Segment the jar
93
103
  jar_segments = grounding_sam(prompt="jar", image=image)
94
-
104
+
95
105
  # Step 3: Segment the coffee beans
96
106
  coffee_beans_segments = grounding_sam(prompt="coffee beans", image=image)
97
-
107
+
98
108
  # Step 4: Calculate the area of the segmented jar
99
109
  jar_area = 0
100
110
  for segment in jar_segments:
101
111
  jar_area += segment['mask'].sum()
102
-
112
+
103
113
  # Step 5: Calculate the area of the segmented coffee beans
104
114
  coffee_beans_area = 0
105
115
  for segment in coffee_beans_segments:
106
116
  coffee_beans_area += segment['mask'].sum()
107
-
117
+
108
118
  # Step 6: Compute the percentage of the jar area that is filled with coffee beans
109
119
  if jar_area == 0:
110
120
  return 0.0 # To avoid division by zero
111
121
  filled_percentage = (coffee_beans_area / jar_area) * 100
112
-
122
+
113
123
  # Step 7: Return the computed percentage
114
124
  return filled_percentage
115
125
  ```
@@ -121,10 +131,12 @@ mode by passing in the verbose argument:
121
131
  >>> agent = VisionAgent(verbose=2)
122
132
  ```
123
133
 
124
- You can also have it return more information by calling `chat_with_workflow`:
134
+ #### Detailed Usage
135
+ You can also have it return more information by calling `chat_with_workflow`. The format
136
+ of the input is a list of dictionaries with the keys `role`, `content`, and `media`:
125
137
 
126
138
  ```python
127
- >>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?"}], media="jar.jpg")
139
+ >>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
128
140
  >>> print(results)
129
141
  {
130
142
  "code": "from vision_agent.tools import ..."
@@ -135,19 +147,45 @@ You can also have it return more information by calling `chat_with_workflow`:
135
147
  }
136
148
  ```
137
149
 
138
- With this you can examine more detailed information such as the etesting code, testing
150
+ With this you can examine more detailed information such as the testing code, testing
139
151
  results, plan or working memory it used to complete the task.
140
152
 
153
+ #### Multi-turn conversations
154
+ You can have multi-turn conversations with vision-agent as well, giving it feedback on
155
+ the code and having it update. You just need to add the code as a response from the
156
+ assistant:
157
+
158
+ ```python
159
+ agent = va.agent.VisionAgent(verbosity=2)
160
+ conv = [
161
+ {
162
+ "role": "user",
163
+ "content": "Are these workers wearing safety gear? Output only a True or False value.",
164
+ "media": ["workers.png"],
165
+ }
166
+ ]
167
+ result = agent.chat_with_workflow(conv)
168
+ code = result["code"]
169
+ conv.append({"role": "assistant", "content": code})
170
+ conv.append(
171
+ {
172
+ "role": "user",
173
+ "content": "Can you also return the number of workers wearing safety gear?",
174
+ }
175
+ )
176
+ result = agent.chat_with_workflow(conv)
177
+ ```
178
+
141
179
  ### Tools
142
180
  There are a variety of tools for the model or the user to use. Some are executed locally
143
- while others are hosted for you. You can also ask an LLM directly to build a tool for
181
+ while others are hosted for you. You can also ask an LMM directly to build a tool for
144
182
  you. For example:
145
183
 
146
184
  ```python
147
185
  >>> import vision_agent as va
148
- >>> llm = va.llm.OpenAILLM()
149
- >>> detector = llm.generate_detector("Can you build a jar detector for me?")
150
- >>> detector("jar.jpg")
186
+ >>> lmm = va.lmm.OpenAILMM()
187
+ >>> detector = lmm.generate_detector("Can you build a jar detector for me?")
188
+ >>> detector(va.tools.load_image("jar.jpg"))
151
189
  [{"labels": ["jar",],
152
190
  "scores": [0.99],
153
191
  "bboxes": [
@@ -185,23 +223,44 @@ ensure the documentation is in the same format above with description, `Paramete
185
223
  `Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
186
224
 
187
225
  ### Azure Setup
188
- If you want to use Azure OpenAI models, you can set the environment variable:
226
+ If you want to use Azure OpenAI models, you need to have two OpenAI model deployments:
227
+
228
+ 1. OpenAI GPT-4o model
229
+ 2. OpenAI text embedding model
230
+
231
+ <img width="1201" alt="Screenshot 2024-06-12 at 5 54 48 PM" src="https://github.com/landing-ai/vision-agent/assets/2736300/da125592-b01d-45bc-bc99-d48c9dcdfa32">
232
+
233
+ Then you can set the following environment variables:
189
234
 
190
235
  ```bash
191
236
  export AZURE_OPENAI_API_KEY="your-api-key"
192
237
  export AZURE_OPENAI_ENDPOINT="your-endpoint"
238
+ # The deployment name of your Azure OpenAI chat model
239
+ export AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME="your_gpt4o_model_deployment_name"
240
+ # The deployment name of your Azure OpenAI text embedding model
241
+ export AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME="your_embedding_model_deployment_name"
193
242
  ```
194
243
 
244
+ > NOTE: make sure your Azure model deployment have enough quota (token per minute) to support it. The default value 8000TPM is not enough.
245
+
195
246
  You can then run Vision Agent using the Azure OpenAI models:
196
247
 
197
248
  ```python
198
- >>> import vision_agent as va
199
- >>> agent = va.agent.VisionAgent(
200
- >>> planner=va.llm.AzureOpenAILLM(),
201
- >>> coder=va.lmm.AzureOpenAILMM(),
202
- >>> tester=va.lmm.AzureOpenAILMM(),
203
- >>> debugger=va.lmm.AzureOpenAILMM(),
204
- >>> )
249
+ import vision_agent as va
250
+ agent = va.agent.AzureVisionAgent()
205
251
  ```
206
252
 
253
+ ******************************************************************************************************************************
254
+
255
+ ### Q&A
256
+
257
+ #### How to get started with OpenAI API credits
258
+
259
+ 1. Visit the[OpenAI API platform](https://beta.openai.com/signup/) to sign up for an API key.
260
+ 2. Follow the instructions to purchase and manage your API credits.
261
+ 3. Ensure your API key is correctly configured in your project settings.
262
+
263
+ Failure to have sufficient API credits may result in limited or no functionality for the features that rely on the OpenAI API.
264
+
265
+ For more details on managing your API usage and credits, please refer to the OpenAI API documentation.
207
266
 
@@ -2,7 +2,6 @@
2
2
  <img alt="vision_agent" height="200px" src="https://github.com/landing-ai/vision-agent/blob/main/assets/logo.jpg?raw=true">
3
3
 
4
4
  # 🔍🤖 Vision Agent
5
-
6
5
  [![](https://dcbadge.vercel.app/api/server/wPdN8RCYew?compact=true&style=flat)](https://discord.gg/wPdN8RCYew)
7
6
  ![ci_status](https://github.com/landing-ai/vision-agent/actions/workflows/ci_cd.yml/badge.svg)
8
7
  [![PyPI version](https://badge.fury.io/py/vision-agent.svg)](https://badge.fury.io/py/vision-agent)
@@ -16,9 +15,14 @@ accomplish the task you want. Vision Agent aims to provide an in-seconds experie
16
15
  allowing users to describe their problem in text and have the agent framework generate
17
16
  code to solve the task for them. Check out our discord for updates and roadmaps!
18
17
 
18
+
19
+ ## Web Application
20
+
21
+ Try Vision Agent live on [va.landing.ai](https://va.landing.ai/)
22
+
19
23
  ## Documentation
20
24
 
21
- - [Vision Agent Library Docs](https://landing-ai.github.io/vision-agent/)
25
+ [Vision Agent Library Docs](https://landing-ai.github.io/vision-agent/)
22
26
 
23
27
 
24
28
  ## Getting Started
@@ -36,7 +40,11 @@ using Azure OpenAI please see the Azure setup section):
36
40
  export OPENAI_API_KEY="your-api-key"
37
41
  ```
38
42
 
43
+ ### Important Note on API Usage
44
+ Please be aware that using the API in this project requires you to have API credits (minimum of five US dollars). This is different from the OpenAI subscription used in this chatbot. If you don't have credit, further information can be found [here](https://github.com/landing-ai/vision-agent?tab=readme-ov-file#how-to-get-started-with-openai-api-credits)
45
+
39
46
  ### Vision Agent
47
+ #### Basic Usage
40
48
  You can interact with the agent as you would with any LLM or LMM model:
41
49
 
42
50
  ```python
@@ -52,28 +60,28 @@ from vision_agent.tools import load_image, grounding_sam
52
60
  def calculate_filled_percentage(image_path: str) -> float:
53
61
  # Step 1: Load the image
54
62
  image = load_image(image_path)
55
-
63
+
56
64
  # Step 2: Segment the jar
57
65
  jar_segments = grounding_sam(prompt="jar", image=image)
58
-
66
+
59
67
  # Step 3: Segment the coffee beans
60
68
  coffee_beans_segments = grounding_sam(prompt="coffee beans", image=image)
61
-
69
+
62
70
  # Step 4: Calculate the area of the segmented jar
63
71
  jar_area = 0
64
72
  for segment in jar_segments:
65
73
  jar_area += segment['mask'].sum()
66
-
74
+
67
75
  # Step 5: Calculate the area of the segmented coffee beans
68
76
  coffee_beans_area = 0
69
77
  for segment in coffee_beans_segments:
70
78
  coffee_beans_area += segment['mask'].sum()
71
-
79
+
72
80
  # Step 6: Compute the percentage of the jar area that is filled with coffee beans
73
81
  if jar_area == 0:
74
82
  return 0.0 # To avoid division by zero
75
83
  filled_percentage = (coffee_beans_area / jar_area) * 100
76
-
84
+
77
85
  # Step 7: Return the computed percentage
78
86
  return filled_percentage
79
87
  ```
@@ -85,10 +93,12 @@ mode by passing in the verbose argument:
85
93
  >>> agent = VisionAgent(verbose=2)
86
94
  ```
87
95
 
88
- You can also have it return more information by calling `chat_with_workflow`:
96
+ #### Detailed Usage
97
+ You can also have it return more information by calling `chat_with_workflow`. The format
98
+ of the input is a list of dictionaries with the keys `role`, `content`, and `media`:
89
99
 
90
100
  ```python
91
- >>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?"}], media="jar.jpg")
101
+ >>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
92
102
  >>> print(results)
93
103
  {
94
104
  "code": "from vision_agent.tools import ..."
@@ -99,19 +109,45 @@ You can also have it return more information by calling `chat_with_workflow`:
99
109
  }
100
110
  ```
101
111
 
102
- With this you can examine more detailed information such as the etesting code, testing
112
+ With this you can examine more detailed information such as the testing code, testing
103
113
  results, plan or working memory it used to complete the task.
104
114
 
115
+ #### Multi-turn conversations
116
+ You can have multi-turn conversations with vision-agent as well, giving it feedback on
117
+ the code and having it update. You just need to add the code as a response from the
118
+ assistant:
119
+
120
+ ```python
121
+ agent = va.agent.VisionAgent(verbosity=2)
122
+ conv = [
123
+ {
124
+ "role": "user",
125
+ "content": "Are these workers wearing safety gear? Output only a True or False value.",
126
+ "media": ["workers.png"],
127
+ }
128
+ ]
129
+ result = agent.chat_with_workflow(conv)
130
+ code = result["code"]
131
+ conv.append({"role": "assistant", "content": code})
132
+ conv.append(
133
+ {
134
+ "role": "user",
135
+ "content": "Can you also return the number of workers wearing safety gear?",
136
+ }
137
+ )
138
+ result = agent.chat_with_workflow(conv)
139
+ ```
140
+
105
141
  ### Tools
106
142
  There are a variety of tools for the model or the user to use. Some are executed locally
107
- while others are hosted for you. You can also ask an LLM directly to build a tool for
143
+ while others are hosted for you. You can also ask an LMM directly to build a tool for
108
144
  you. For example:
109
145
 
110
146
  ```python
111
147
  >>> import vision_agent as va
112
- >>> llm = va.llm.OpenAILLM()
113
- >>> detector = llm.generate_detector("Can you build a jar detector for me?")
114
- >>> detector("jar.jpg")
148
+ >>> lmm = va.lmm.OpenAILMM()
149
+ >>> detector = lmm.generate_detector("Can you build a jar detector for me?")
150
+ >>> detector(va.tools.load_image("jar.jpg"))
115
151
  [{"labels": ["jar",],
116
152
  "scores": [0.99],
117
153
  "bboxes": [
@@ -149,22 +185,43 @@ ensure the documentation is in the same format above with description, `Paramete
149
185
  `Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
150
186
 
151
187
  ### Azure Setup
152
- If you want to use Azure OpenAI models, you can set the environment variable:
188
+ If you want to use Azure OpenAI models, you need to have two OpenAI model deployments:
189
+
190
+ 1. OpenAI GPT-4o model
191
+ 2. OpenAI text embedding model
192
+
193
+ <img width="1201" alt="Screenshot 2024-06-12 at 5 54 48 PM" src="https://github.com/landing-ai/vision-agent/assets/2736300/da125592-b01d-45bc-bc99-d48c9dcdfa32">
194
+
195
+ Then you can set the following environment variables:
153
196
 
154
197
  ```bash
155
198
  export AZURE_OPENAI_API_KEY="your-api-key"
156
199
  export AZURE_OPENAI_ENDPOINT="your-endpoint"
200
+ # The deployment name of your Azure OpenAI chat model
201
+ export AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME="your_gpt4o_model_deployment_name"
202
+ # The deployment name of your Azure OpenAI text embedding model
203
+ export AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME="your_embedding_model_deployment_name"
157
204
  ```
158
205
 
206
+ > NOTE: make sure your Azure model deployment have enough quota (token per minute) to support it. The default value 8000TPM is not enough.
207
+
159
208
  You can then run Vision Agent using the Azure OpenAI models:
160
209
 
161
210
  ```python
162
- >>> import vision_agent as va
163
- >>> agent = va.agent.VisionAgent(
164
- >>> planner=va.llm.AzureOpenAILLM(),
165
- >>> coder=va.lmm.AzureOpenAILMM(),
166
- >>> tester=va.lmm.AzureOpenAILMM(),
167
- >>> debugger=va.lmm.AzureOpenAILMM(),
168
- >>> )
211
+ import vision_agent as va
212
+ agent = va.agent.AzureVisionAgent()
169
213
  ```
170
214
 
215
+ ******************************************************************************************************************************
216
+
217
+ ### Q&A
218
+
219
+ #### How to get started with OpenAI API credits
220
+
221
+ 1. Visit the[OpenAI API platform](https://beta.openai.com/signup/) to sign up for an API key.
222
+ 2. Follow the instructions to purchase and manage your API credits.
223
+ 3. Ensure your API key is correctly configured in your project settings.
224
+
225
+ Failure to have sufficient API credits may result in limited or no functionality for the features that rely on the OpenAI API.
226
+
227
+ For more details on managing your API usage and credits, please refer to the OpenAI API documentation.
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.47"
7
+ version = "0.2.78"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -34,9 +34,11 @@ nbformat = "^5.10.4"
34
34
  rich = "^13.7.1"
35
35
  langsmith = "^0.1.58"
36
36
  ipykernel = "^6.29.4"
37
- e2b = "^0.17.0"
38
- e2b-code-interpreter = "^0.0.7"
37
+ e2b = "^0.17.1"
38
+ e2b-code-interpreter = "0.0.11a1"
39
39
  tenacity = "^8.3.0"
40
+ pillow-heif = "^0.16.0"
41
+ pytube = "15.0.0"
40
42
 
41
43
  [tool.poetry.group.dev.dependencies]
42
44
  autoflake = "1.*"
@@ -0,0 +1,2 @@
1
+ from .agent import Agent
2
+ from .lmm import LMM, OpenAILMM
@@ -0,0 +1,2 @@
1
+ from .agent import Agent
2
+ from .vision_agent import AzureVisionAgent, VisionAgent
@@ -2,12 +2,14 @@ from abc import ABC, abstractmethod
2
2
  from pathlib import Path
3
3
  from typing import Any, Dict, List, Optional, Union
4
4
 
5
+ from vision_agent.lmm import Message
6
+
5
7
 
6
8
  class Agent(ABC):
7
9
  @abstractmethod
8
10
  def __call__(
9
11
  self,
10
- input: Union[List[Dict[str, str]], str],
12
+ input: Union[str, List[Message]],
11
13
  media: Optional[Union[str, Path]] = None,
12
14
  ) -> str:
13
15
  pass