vision-agent 0.2.59__tar.gz → 0.2.61__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vision_agent-0.2.59 → vision_agent-0.2.61}/PKG-INFO +17 -4
- {vision_agent-0.2.59 → vision_agent-0.2.61}/README.md +14 -1
- {vision_agent-0.2.59 → vision_agent-0.2.61}/pyproject.toml +3 -3
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/agent/vision_agent.py +95 -63
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/lmm/lmm.py +8 -2
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/utils/sim.py +7 -1
- {vision_agent-0.2.59 → vision_agent-0.2.61}/LICENSE +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/tools/tools.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.61
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -9,8 +9,8 @@ Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: e2b (>=0.17.
|
13
|
-
Requires-Dist: e2b-code-interpreter (>=0.0.
|
12
|
+
Requires-Dist: e2b (>=0.17.1,<0.18.0)
|
13
|
+
Requires-Dist: e2b-code-interpreter (>=0.0.9,<0.0.10)
|
14
14
|
Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
|
15
15
|
Requires-Dist: langsmith (>=0.1.58,<0.2.0)
|
16
16
|
Requires-Dist: moviepy (>=1.0.0,<2.0.0)
|
@@ -218,13 +218,26 @@ ensure the documentation is in the same format above with description, `Paramete
|
|
218
218
|
`Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
|
219
219
|
|
220
220
|
### Azure Setup
|
221
|
-
If you want to use Azure OpenAI models, you
|
221
|
+
If you want to use Azure OpenAI models, you need to have two OpenAI model deployments:
|
222
|
+
|
223
|
+
1. OpenAI GPT-4o model
|
224
|
+
2. OpenAI text embedding model
|
225
|
+
|
226
|
+
<img width="1201" alt="Screenshot 2024-06-12 at 5 54 48 PM" src="https://github.com/landing-ai/vision-agent/assets/2736300/da125592-b01d-45bc-bc99-d48c9dcdfa32">
|
227
|
+
|
228
|
+
Then you can set the following environment variables:
|
222
229
|
|
223
230
|
```bash
|
224
231
|
export AZURE_OPENAI_API_KEY="your-api-key"
|
225
232
|
export AZURE_OPENAI_ENDPOINT="your-endpoint"
|
233
|
+
# The deployment name of your Azure OpenAI chat model
|
234
|
+
export AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME="your_gpt4o_model_deployment_name"
|
235
|
+
# The deployment name of your Azure OpenAI text embedding model
|
236
|
+
export AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME="your_embedding_model_deployment_name"
|
226
237
|
```
|
227
238
|
|
239
|
+
> NOTE: make sure your Azure model deployment have enough quota (token per minute) to support it. The default value 8000TPM is not enough.
|
240
|
+
|
228
241
|
You can then run Vision Agent using the Azure OpenAI models:
|
229
242
|
|
230
243
|
```python
|
@@ -182,13 +182,26 @@ ensure the documentation is in the same format above with description, `Paramete
|
|
182
182
|
`Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
|
183
183
|
|
184
184
|
### Azure Setup
|
185
|
-
If you want to use Azure OpenAI models, you
|
185
|
+
If you want to use Azure OpenAI models, you need to have two OpenAI model deployments:
|
186
|
+
|
187
|
+
1. OpenAI GPT-4o model
|
188
|
+
2. OpenAI text embedding model
|
189
|
+
|
190
|
+
<img width="1201" alt="Screenshot 2024-06-12 at 5 54 48 PM" src="https://github.com/landing-ai/vision-agent/assets/2736300/da125592-b01d-45bc-bc99-d48c9dcdfa32">
|
191
|
+
|
192
|
+
Then you can set the following environment variables:
|
186
193
|
|
187
194
|
```bash
|
188
195
|
export AZURE_OPENAI_API_KEY="your-api-key"
|
189
196
|
export AZURE_OPENAI_ENDPOINT="your-endpoint"
|
197
|
+
# The deployment name of your Azure OpenAI chat model
|
198
|
+
export AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME="your_gpt4o_model_deployment_name"
|
199
|
+
# The deployment name of your Azure OpenAI text embedding model
|
200
|
+
export AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME="your_embedding_model_deployment_name"
|
190
201
|
```
|
191
202
|
|
203
|
+
> NOTE: make sure your Azure model deployment have enough quota (token per minute) to support it. The default value 8000TPM is not enough.
|
204
|
+
|
192
205
|
You can then run Vision Agent using the Azure OpenAI models:
|
193
206
|
|
194
207
|
```python
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
4
4
|
|
5
5
|
[tool.poetry]
|
6
6
|
name = "vision-agent"
|
7
|
-
version = "0.2.
|
7
|
+
version = "0.2.61"
|
8
8
|
description = "Toolset for Vision Agent"
|
9
9
|
authors = ["Landing AI <dev@landing.ai>"]
|
10
10
|
readme = "README.md"
|
@@ -34,8 +34,8 @@ nbformat = "^5.10.4"
|
|
34
34
|
rich = "^13.7.1"
|
35
35
|
langsmith = "^0.1.58"
|
36
36
|
ipykernel = "^6.29.4"
|
37
|
-
e2b = "^0.17.
|
38
|
-
e2b-code-interpreter = "^0.0.
|
37
|
+
e2b = "^0.17.1"
|
38
|
+
e2b-code-interpreter = "^0.0.9"
|
39
39
|
tenacity = "^8.3.0"
|
40
40
|
|
41
41
|
[tool.poetry.group.dev.dependencies]
|
@@ -8,6 +8,7 @@ from pathlib import Path
|
|
8
8
|
from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast
|
9
9
|
|
10
10
|
from PIL import Image
|
11
|
+
from langsmith import traceable
|
11
12
|
from rich.console import Console
|
12
13
|
from rich.style import Style
|
13
14
|
from rich.syntax import Syntax
|
@@ -130,6 +131,7 @@ def extract_image(
|
|
130
131
|
return new_media
|
131
132
|
|
132
133
|
|
134
|
+
@traceable
|
133
135
|
def write_plan(
|
134
136
|
chat: List[Message],
|
135
137
|
tool_desc: str,
|
@@ -147,6 +149,7 @@ def write_plan(
|
|
147
149
|
return extract_json(model.chat(chat))["plan"] # type: ignore
|
148
150
|
|
149
151
|
|
152
|
+
@traceable
|
150
153
|
def write_code(
|
151
154
|
coder: LMM,
|
152
155
|
chat: List[Message],
|
@@ -167,6 +170,7 @@ def write_code(
|
|
167
170
|
return extract_code(coder(chat))
|
168
171
|
|
169
172
|
|
173
|
+
@traceable
|
170
174
|
def write_test(
|
171
175
|
tester: LMM,
|
172
176
|
chat: List[Message],
|
@@ -191,6 +195,7 @@ def write_test(
|
|
191
195
|
return extract_code(tester(chat))
|
192
196
|
|
193
197
|
|
198
|
+
@traceable
|
194
199
|
def reflect(
|
195
200
|
chat: List[Message],
|
196
201
|
plan: str,
|
@@ -266,70 +271,19 @@ def write_and_test_code(
|
|
266
271
|
count = 0
|
267
272
|
new_working_memory: List[Dict[str, str]] = []
|
268
273
|
while not result.success and count < max_retries:
|
269
|
-
log_progress(
|
270
|
-
{
|
271
|
-
"type": "code",
|
272
|
-
"status": "started",
|
273
|
-
}
|
274
|
-
)
|
275
|
-
fixed_code_and_test = extract_json(
|
276
|
-
debugger(
|
277
|
-
FIX_BUG.format(
|
278
|
-
code=code,
|
279
|
-
tests=test,
|
280
|
-
result="\n".join(result.text().splitlines()[-50:]),
|
281
|
-
feedback=format_memory(working_memory + new_working_memory),
|
282
|
-
)
|
283
|
-
)
|
284
|
-
)
|
285
|
-
old_code = code
|
286
|
-
old_test = test
|
287
|
-
|
288
|
-
if fixed_code_and_test["code"].strip() != "":
|
289
|
-
code = extract_code(fixed_code_and_test["code"])
|
290
|
-
if fixed_code_and_test["test"].strip() != "":
|
291
|
-
test = extract_code(fixed_code_and_test["test"])
|
292
|
-
|
293
|
-
new_working_memory.append(
|
294
|
-
{
|
295
|
-
"code": f"{code}\n{test}",
|
296
|
-
"feedback": fixed_code_and_test["reflections"],
|
297
|
-
"edits": get_diff(f"{old_code}\n{old_test}", f"{code}\n{test}"),
|
298
|
-
}
|
299
|
-
)
|
300
|
-
log_progress(
|
301
|
-
{
|
302
|
-
"type": "code",
|
303
|
-
"status": "running",
|
304
|
-
"payload": {
|
305
|
-
"code": DefaultImports.prepend_imports(code),
|
306
|
-
"test": test,
|
307
|
-
},
|
308
|
-
}
|
309
|
-
)
|
310
|
-
|
311
|
-
result = code_interpreter.exec_isolation(
|
312
|
-
f"{DefaultImports.to_code_string()}\n{code}\n{test}"
|
313
|
-
)
|
314
|
-
log_progress(
|
315
|
-
{
|
316
|
-
"type": "code",
|
317
|
-
"status": "completed" if result.success else "failed",
|
318
|
-
"payload": {
|
319
|
-
"code": DefaultImports.prepend_imports(code),
|
320
|
-
"test": test,
|
321
|
-
"result": result.to_json(),
|
322
|
-
},
|
323
|
-
}
|
324
|
-
)
|
325
274
|
if verbosity == 2:
|
326
|
-
_LOGGER.info(
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
275
|
+
_LOGGER.info(f"Start debugging attempt {count + 1}")
|
276
|
+
code, test, result = debug_code(
|
277
|
+
working_memory,
|
278
|
+
debugger,
|
279
|
+
code_interpreter,
|
280
|
+
code,
|
281
|
+
test,
|
282
|
+
result,
|
283
|
+
new_working_memory,
|
284
|
+
log_progress,
|
285
|
+
verbosity,
|
286
|
+
)
|
333
287
|
count += 1
|
334
288
|
|
335
289
|
if verbosity >= 1:
|
@@ -344,6 +298,83 @@ def write_and_test_code(
|
|
344
298
|
}
|
345
299
|
|
346
300
|
|
301
|
+
@traceable
|
302
|
+
def debug_code(
|
303
|
+
working_memory: List[Dict[str, str]],
|
304
|
+
debugger: LMM,
|
305
|
+
code_interpreter: CodeInterpreter,
|
306
|
+
code: str,
|
307
|
+
test: str,
|
308
|
+
result: Execution,
|
309
|
+
new_working_memory: List[Dict[str, str]],
|
310
|
+
log_progress: Callable[[Dict[str, Any]], None],
|
311
|
+
verbosity: int = 0,
|
312
|
+
) -> tuple[str, str, Execution]:
|
313
|
+
log_progress(
|
314
|
+
{
|
315
|
+
"type": "code",
|
316
|
+
"status": "started",
|
317
|
+
}
|
318
|
+
)
|
319
|
+
fixed_code_and_test = extract_json(
|
320
|
+
debugger(
|
321
|
+
FIX_BUG.format(
|
322
|
+
code=code,
|
323
|
+
tests=test,
|
324
|
+
result="\n".join(result.text().splitlines()[-50:]),
|
325
|
+
feedback=format_memory(working_memory + new_working_memory),
|
326
|
+
)
|
327
|
+
)
|
328
|
+
)
|
329
|
+
old_code = code
|
330
|
+
old_test = test
|
331
|
+
|
332
|
+
if fixed_code_and_test["code"].strip() != "":
|
333
|
+
code = extract_code(fixed_code_and_test["code"])
|
334
|
+
if fixed_code_and_test["test"].strip() != "":
|
335
|
+
test = extract_code(fixed_code_and_test["test"])
|
336
|
+
|
337
|
+
new_working_memory.append(
|
338
|
+
{
|
339
|
+
"code": f"{code}\n{test}",
|
340
|
+
"feedback": fixed_code_and_test["reflections"],
|
341
|
+
"edits": get_diff(f"{old_code}\n{old_test}", f"{code}\n{test}"),
|
342
|
+
}
|
343
|
+
)
|
344
|
+
log_progress(
|
345
|
+
{
|
346
|
+
"type": "code",
|
347
|
+
"status": "running",
|
348
|
+
"payload": {
|
349
|
+
"code": DefaultImports.prepend_imports(code),
|
350
|
+
"test": test,
|
351
|
+
},
|
352
|
+
}
|
353
|
+
)
|
354
|
+
|
355
|
+
result = code_interpreter.exec_isolation(
|
356
|
+
f"{DefaultImports.to_code_string()}\n{code}\n{test}"
|
357
|
+
)
|
358
|
+
log_progress(
|
359
|
+
{
|
360
|
+
"type": "code",
|
361
|
+
"status": "completed" if result.success else "failed",
|
362
|
+
"payload": {
|
363
|
+
"code": DefaultImports.prepend_imports(code),
|
364
|
+
"test": test,
|
365
|
+
"result": result.to_json(),
|
366
|
+
},
|
367
|
+
}
|
368
|
+
)
|
369
|
+
if verbosity == 2:
|
370
|
+
_print_code("Code and test after attempted fix:", code, test)
|
371
|
+
_LOGGER.info(
|
372
|
+
f"Reflection: {fixed_code_and_test['reflections']}\nCode execution result after attempted fix: {result.text(include_logs=True)}"
|
373
|
+
)
|
374
|
+
|
375
|
+
return code, test, result
|
376
|
+
|
377
|
+
|
347
378
|
def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
|
348
379
|
_CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
|
349
380
|
_CONSOLE.print("=" * 30 + " Code " + "=" * 30)
|
@@ -481,6 +512,7 @@ class VisionAgent(Agent):
|
|
481
512
|
results.pop("working_memory")
|
482
513
|
return results # type: ignore
|
483
514
|
|
515
|
+
@traceable
|
484
516
|
def chat_with_workflow(
|
485
517
|
self,
|
486
518
|
chat: List[Message],
|
@@ -233,7 +233,7 @@ class OpenAILMM(LMM):
|
|
233
233
|
class AzureOpenAILMM(OpenAILMM):
|
234
234
|
def __init__(
|
235
235
|
self,
|
236
|
-
model_name: str =
|
236
|
+
model_name: Optional[str] = None,
|
237
237
|
api_key: Optional[str] = None,
|
238
238
|
api_version: str = "2024-02-01",
|
239
239
|
azure_endpoint: Optional[str] = None,
|
@@ -245,14 +245,20 @@ class AzureOpenAILMM(OpenAILMM):
|
|
245
245
|
api_key = os.getenv("AZURE_OPENAI_API_KEY")
|
246
246
|
if not azure_endpoint:
|
247
247
|
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
|
248
|
+
if not model_name:
|
249
|
+
model_name = os.getenv("AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME")
|
248
250
|
|
249
251
|
if not api_key:
|
250
252
|
raise ValueError("OpenAI API key is required.")
|
251
253
|
if not azure_endpoint:
|
252
254
|
raise ValueError("Azure OpenAI endpoint is required.")
|
255
|
+
if not model_name:
|
256
|
+
raise ValueError("Azure OpenAI chat model deployment name is required.")
|
253
257
|
|
254
258
|
self.client = AzureOpenAI(
|
255
|
-
api_key=api_key,
|
259
|
+
api_key=api_key,
|
260
|
+
api_version=api_version,
|
261
|
+
azure_endpoint=azure_endpoint,
|
256
262
|
)
|
257
263
|
self.model_name = model_name
|
258
264
|
|
@@ -87,17 +87,23 @@ class AzureSim(Sim):
|
|
87
87
|
api_key: Optional[str] = None,
|
88
88
|
api_version: str = "2024-02-01",
|
89
89
|
azure_endpoint: Optional[str] = None,
|
90
|
-
model: str =
|
90
|
+
model: Optional[str] = None,
|
91
91
|
) -> None:
|
92
92
|
if not api_key:
|
93
93
|
api_key = os.getenv("AZURE_OPENAI_API_KEY")
|
94
94
|
if not azure_endpoint:
|
95
95
|
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
|
96
|
+
if not model:
|
97
|
+
model = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME")
|
96
98
|
|
97
99
|
if not api_key:
|
98
100
|
raise ValueError("Azure OpenAI API key is required.")
|
99
101
|
if not azure_endpoint:
|
100
102
|
raise ValueError("Azure OpenAI endpoint is required.")
|
103
|
+
if not model:
|
104
|
+
raise ValueError(
|
105
|
+
"Azure OpenAI embedding model deployment name is required."
|
106
|
+
)
|
101
107
|
|
102
108
|
self.df = df
|
103
109
|
self.client = AzureOpenAI(
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|