vision-agent 0.2.59__tar.gz → 0.2.61__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {vision_agent-0.2.59 → vision_agent-0.2.61}/PKG-INFO +17 -4
  2. {vision_agent-0.2.59 → vision_agent-0.2.61}/README.md +14 -1
  3. {vision_agent-0.2.59 → vision_agent-0.2.61}/pyproject.toml +3 -3
  4. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/agent/vision_agent.py +95 -63
  5. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/lmm/lmm.py +8 -2
  6. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/utils/sim.py +7 -1
  7. {vision_agent-0.2.59 → vision_agent-0.2.61}/LICENSE +0 -0
  8. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/__init__.py +0 -0
  9. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/agent/__init__.py +0 -0
  10. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/agent/agent.py +0 -0
  11. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/agent/vision_agent_prompts.py +0 -0
  12. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/fonts/__init__.py +0 -0
  13. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  14. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/lmm/__init__.py +0 -0
  15. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/tools/__init__.py +0 -0
  16. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/tools/prompts.py +0 -0
  17. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/tools/tool_utils.py +0 -0
  18. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/tools/tools.py +0 -0
  19. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/utils/__init__.py +0 -0
  20. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/utils/execute.py +0 -0
  21. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/utils/image_utils.py +0 -0
  22. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/utils/type_defs.py +0 -0
  23. {vision_agent-0.2.59 → vision_agent-0.2.61}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.59
3
+ Version: 0.2.61
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -9,8 +9,8 @@ Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: e2b (>=0.17.0,<0.18.0)
13
- Requires-Dist: e2b-code-interpreter (>=0.0.7,<0.0.8)
12
+ Requires-Dist: e2b (>=0.17.1,<0.18.0)
13
+ Requires-Dist: e2b-code-interpreter (>=0.0.9,<0.0.10)
14
14
  Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
15
15
  Requires-Dist: langsmith (>=0.1.58,<0.2.0)
16
16
  Requires-Dist: moviepy (>=1.0.0,<2.0.0)
@@ -218,13 +218,26 @@ ensure the documentation is in the same format above with description, `Paramete
218
218
  `Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
219
219
 
220
220
  ### Azure Setup
221
- If you want to use Azure OpenAI models, you can set the environment variable:
221
+ If you want to use Azure OpenAI models, you need to have two OpenAI model deployments:
222
+
223
+ 1. OpenAI GPT-4o model
224
+ 2. OpenAI text embedding model
225
+
226
+ <img width="1201" alt="Screenshot 2024-06-12 at 5 54 48 PM" src="https://github.com/landing-ai/vision-agent/assets/2736300/da125592-b01d-45bc-bc99-d48c9dcdfa32">
227
+
228
+ Then you can set the following environment variables:
222
229
 
223
230
  ```bash
224
231
  export AZURE_OPENAI_API_KEY="your-api-key"
225
232
  export AZURE_OPENAI_ENDPOINT="your-endpoint"
233
+ # The deployment name of your Azure OpenAI chat model
234
+ export AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME="your_gpt4o_model_deployment_name"
235
+ # The deployment name of your Azure OpenAI text embedding model
236
+ export AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME="your_embedding_model_deployment_name"
226
237
  ```
227
238
 
239
+ > NOTE: make sure your Azure model deployment have enough quota (token per minute) to support it. The default value 8000TPM is not enough.
240
+
228
241
  You can then run Vision Agent using the Azure OpenAI models:
229
242
 
230
243
  ```python
@@ -182,13 +182,26 @@ ensure the documentation is in the same format above with description, `Paramete
182
182
  `Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
183
183
 
184
184
  ### Azure Setup
185
- If you want to use Azure OpenAI models, you can set the environment variable:
185
+ If you want to use Azure OpenAI models, you need to have two OpenAI model deployments:
186
+
187
+ 1. OpenAI GPT-4o model
188
+ 2. OpenAI text embedding model
189
+
190
+ <img width="1201" alt="Screenshot 2024-06-12 at 5 54 48 PM" src="https://github.com/landing-ai/vision-agent/assets/2736300/da125592-b01d-45bc-bc99-d48c9dcdfa32">
191
+
192
+ Then you can set the following environment variables:
186
193
 
187
194
  ```bash
188
195
  export AZURE_OPENAI_API_KEY="your-api-key"
189
196
  export AZURE_OPENAI_ENDPOINT="your-endpoint"
197
+ # The deployment name of your Azure OpenAI chat model
198
+ export AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME="your_gpt4o_model_deployment_name"
199
+ # The deployment name of your Azure OpenAI text embedding model
200
+ export AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME="your_embedding_model_deployment_name"
190
201
  ```
191
202
 
203
+ > NOTE: make sure your Azure model deployment have enough quota (token per minute) to support it. The default value 8000TPM is not enough.
204
+
192
205
  You can then run Vision Agent using the Azure OpenAI models:
193
206
 
194
207
  ```python
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.59"
7
+ version = "0.2.61"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -34,8 +34,8 @@ nbformat = "^5.10.4"
34
34
  rich = "^13.7.1"
35
35
  langsmith = "^0.1.58"
36
36
  ipykernel = "^6.29.4"
37
- e2b = "^0.17.0"
38
- e2b-code-interpreter = "^0.0.7"
37
+ e2b = "^0.17.1"
38
+ e2b-code-interpreter = "^0.0.9"
39
39
  tenacity = "^8.3.0"
40
40
 
41
41
  [tool.poetry.group.dev.dependencies]
@@ -8,6 +8,7 @@ from pathlib import Path
8
8
  from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast
9
9
 
10
10
  from PIL import Image
11
+ from langsmith import traceable
11
12
  from rich.console import Console
12
13
  from rich.style import Style
13
14
  from rich.syntax import Syntax
@@ -130,6 +131,7 @@ def extract_image(
130
131
  return new_media
131
132
 
132
133
 
134
+ @traceable
133
135
  def write_plan(
134
136
  chat: List[Message],
135
137
  tool_desc: str,
@@ -147,6 +149,7 @@ def write_plan(
147
149
  return extract_json(model.chat(chat))["plan"] # type: ignore
148
150
 
149
151
 
152
+ @traceable
150
153
  def write_code(
151
154
  coder: LMM,
152
155
  chat: List[Message],
@@ -167,6 +170,7 @@ def write_code(
167
170
  return extract_code(coder(chat))
168
171
 
169
172
 
173
+ @traceable
170
174
  def write_test(
171
175
  tester: LMM,
172
176
  chat: List[Message],
@@ -191,6 +195,7 @@ def write_test(
191
195
  return extract_code(tester(chat))
192
196
 
193
197
 
198
+ @traceable
194
199
  def reflect(
195
200
  chat: List[Message],
196
201
  plan: str,
@@ -266,70 +271,19 @@ def write_and_test_code(
266
271
  count = 0
267
272
  new_working_memory: List[Dict[str, str]] = []
268
273
  while not result.success and count < max_retries:
269
- log_progress(
270
- {
271
- "type": "code",
272
- "status": "started",
273
- }
274
- )
275
- fixed_code_and_test = extract_json(
276
- debugger(
277
- FIX_BUG.format(
278
- code=code,
279
- tests=test,
280
- result="\n".join(result.text().splitlines()[-50:]),
281
- feedback=format_memory(working_memory + new_working_memory),
282
- )
283
- )
284
- )
285
- old_code = code
286
- old_test = test
287
-
288
- if fixed_code_and_test["code"].strip() != "":
289
- code = extract_code(fixed_code_and_test["code"])
290
- if fixed_code_and_test["test"].strip() != "":
291
- test = extract_code(fixed_code_and_test["test"])
292
-
293
- new_working_memory.append(
294
- {
295
- "code": f"{code}\n{test}",
296
- "feedback": fixed_code_and_test["reflections"],
297
- "edits": get_diff(f"{old_code}\n{old_test}", f"{code}\n{test}"),
298
- }
299
- )
300
- log_progress(
301
- {
302
- "type": "code",
303
- "status": "running",
304
- "payload": {
305
- "code": DefaultImports.prepend_imports(code),
306
- "test": test,
307
- },
308
- }
309
- )
310
-
311
- result = code_interpreter.exec_isolation(
312
- f"{DefaultImports.to_code_string()}\n{code}\n{test}"
313
- )
314
- log_progress(
315
- {
316
- "type": "code",
317
- "status": "completed" if result.success else "failed",
318
- "payload": {
319
- "code": DefaultImports.prepend_imports(code),
320
- "test": test,
321
- "result": result.to_json(),
322
- },
323
- }
324
- )
325
274
  if verbosity == 2:
326
- _LOGGER.info(
327
- f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
328
- )
329
- _print_code("Code and test after attempted fix:", code, test)
330
- _LOGGER.info(
331
- f"Code execution result after attempted fix: {result.text(include_logs=True)}"
332
- )
275
+ _LOGGER.info(f"Start debugging attempt {count + 1}")
276
+ code, test, result = debug_code(
277
+ working_memory,
278
+ debugger,
279
+ code_interpreter,
280
+ code,
281
+ test,
282
+ result,
283
+ new_working_memory,
284
+ log_progress,
285
+ verbosity,
286
+ )
333
287
  count += 1
334
288
 
335
289
  if verbosity >= 1:
@@ -344,6 +298,83 @@ def write_and_test_code(
344
298
  }
345
299
 
346
300
 
301
+ @traceable
302
+ def debug_code(
303
+ working_memory: List[Dict[str, str]],
304
+ debugger: LMM,
305
+ code_interpreter: CodeInterpreter,
306
+ code: str,
307
+ test: str,
308
+ result: Execution,
309
+ new_working_memory: List[Dict[str, str]],
310
+ log_progress: Callable[[Dict[str, Any]], None],
311
+ verbosity: int = 0,
312
+ ) -> tuple[str, str, Execution]:
313
+ log_progress(
314
+ {
315
+ "type": "code",
316
+ "status": "started",
317
+ }
318
+ )
319
+ fixed_code_and_test = extract_json(
320
+ debugger(
321
+ FIX_BUG.format(
322
+ code=code,
323
+ tests=test,
324
+ result="\n".join(result.text().splitlines()[-50:]),
325
+ feedback=format_memory(working_memory + new_working_memory),
326
+ )
327
+ )
328
+ )
329
+ old_code = code
330
+ old_test = test
331
+
332
+ if fixed_code_and_test["code"].strip() != "":
333
+ code = extract_code(fixed_code_and_test["code"])
334
+ if fixed_code_and_test["test"].strip() != "":
335
+ test = extract_code(fixed_code_and_test["test"])
336
+
337
+ new_working_memory.append(
338
+ {
339
+ "code": f"{code}\n{test}",
340
+ "feedback": fixed_code_and_test["reflections"],
341
+ "edits": get_diff(f"{old_code}\n{old_test}", f"{code}\n{test}"),
342
+ }
343
+ )
344
+ log_progress(
345
+ {
346
+ "type": "code",
347
+ "status": "running",
348
+ "payload": {
349
+ "code": DefaultImports.prepend_imports(code),
350
+ "test": test,
351
+ },
352
+ }
353
+ )
354
+
355
+ result = code_interpreter.exec_isolation(
356
+ f"{DefaultImports.to_code_string()}\n{code}\n{test}"
357
+ )
358
+ log_progress(
359
+ {
360
+ "type": "code",
361
+ "status": "completed" if result.success else "failed",
362
+ "payload": {
363
+ "code": DefaultImports.prepend_imports(code),
364
+ "test": test,
365
+ "result": result.to_json(),
366
+ },
367
+ }
368
+ )
369
+ if verbosity == 2:
370
+ _print_code("Code and test after attempted fix:", code, test)
371
+ _LOGGER.info(
372
+ f"Reflection: {fixed_code_and_test['reflections']}\nCode execution result after attempted fix: {result.text(include_logs=True)}"
373
+ )
374
+
375
+ return code, test, result
376
+
377
+
347
378
  def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
348
379
  _CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
349
380
  _CONSOLE.print("=" * 30 + " Code " + "=" * 30)
@@ -481,6 +512,7 @@ class VisionAgent(Agent):
481
512
  results.pop("working_memory")
482
513
  return results # type: ignore
483
514
 
515
+ @traceable
484
516
  def chat_with_workflow(
485
517
  self,
486
518
  chat: List[Message],
@@ -233,7 +233,7 @@ class OpenAILMM(LMM):
233
233
  class AzureOpenAILMM(OpenAILMM):
234
234
  def __init__(
235
235
  self,
236
- model_name: str = "gpt-4o",
236
+ model_name: Optional[str] = None,
237
237
  api_key: Optional[str] = None,
238
238
  api_version: str = "2024-02-01",
239
239
  azure_endpoint: Optional[str] = None,
@@ -245,14 +245,20 @@ class AzureOpenAILMM(OpenAILMM):
245
245
  api_key = os.getenv("AZURE_OPENAI_API_KEY")
246
246
  if not azure_endpoint:
247
247
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
248
+ if not model_name:
249
+ model_name = os.getenv("AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME")
248
250
 
249
251
  if not api_key:
250
252
  raise ValueError("OpenAI API key is required.")
251
253
  if not azure_endpoint:
252
254
  raise ValueError("Azure OpenAI endpoint is required.")
255
+ if not model_name:
256
+ raise ValueError("Azure OpenAI chat model deployment name is required.")
253
257
 
254
258
  self.client = AzureOpenAI(
255
- api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
259
+ api_key=api_key,
260
+ api_version=api_version,
261
+ azure_endpoint=azure_endpoint,
256
262
  )
257
263
  self.model_name = model_name
258
264
 
@@ -87,17 +87,23 @@ class AzureSim(Sim):
87
87
  api_key: Optional[str] = None,
88
88
  api_version: str = "2024-02-01",
89
89
  azure_endpoint: Optional[str] = None,
90
- model: str = "text-embedding-3-small",
90
+ model: Optional[str] = None,
91
91
  ) -> None:
92
92
  if not api_key:
93
93
  api_key = os.getenv("AZURE_OPENAI_API_KEY")
94
94
  if not azure_endpoint:
95
95
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
96
+ if not model:
97
+ model = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME")
96
98
 
97
99
  if not api_key:
98
100
  raise ValueError("Azure OpenAI API key is required.")
99
101
  if not azure_endpoint:
100
102
  raise ValueError("Azure OpenAI endpoint is required.")
103
+ if not model:
104
+ raise ValueError(
105
+ "Azure OpenAI embedding model deployment name is required."
106
+ )
101
107
 
102
108
  self.df = df
103
109
  self.client = AzureOpenAI(
File without changes