vision-agent 0.2.133__tar.gz → 0.2.135__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. {vision_agent-0.2.133 → vision_agent-0.2.135}/PKG-INFO +1 -1
  2. {vision_agent-0.2.133 → vision_agent-0.2.135}/pyproject.toml +1 -1
  3. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/vision_agent.py +15 -3
  4. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/tools/meta_tools.py +19 -14
  5. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/execute.py +21 -1
  6. {vision_agent-0.2.133 → vision_agent-0.2.135}/LICENSE +0 -0
  7. {vision_agent-0.2.133 → vision_agent-0.2.135}/README.md +0 -0
  8. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/__init__.py +0 -0
  9. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/__init__.py +0 -0
  10. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/agent.py +0 -0
  11. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/agent_utils.py +0 -0
  12. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/vision_agent_coder.py +0 -0
  13. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  14. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/vision_agent_prompts.py +0 -0
  15. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/clients/__init__.py +0 -0
  16. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/clients/http.py +0 -0
  17. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/clients/landing_public_api.py +0 -0
  18. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/fonts/__init__.py +0 -0
  19. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  20. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/lmm/__init__.py +0 -0
  21. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/lmm/lmm.py +0 -0
  22. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/lmm/types.py +0 -0
  23. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/tools/__init__.py +0 -0
  24. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/tools/prompts.py +0 -0
  25. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/tools/tool_utils.py +0 -0
  26. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/tools/tools.py +0 -0
  27. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/tools/tools_types.py +0 -0
  28. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/__init__.py +0 -0
  29. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/exceptions.py +0 -0
  30. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/image_utils.py +0 -0
  31. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/sim.py +0 -0
  32. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/type_defs.py +0 -0
  33. {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.133
3
+ Version: 0.2.135
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.133"
7
+ version = "0.2.135"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -15,6 +15,7 @@ from vision_agent.agent.vision_agent_prompts import (
15
15
  from vision_agent.lmm import LMM, Message, OpenAILMM
16
16
  from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image
17
17
  from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
18
+ from vision_agent.tools.tools import extract_frames, save_video
18
19
  from vision_agent.utils import CodeInterpreterFactory
19
20
  from vision_agent.utils.execute import CodeInterpreter, Execution
20
21
 
@@ -224,9 +225,20 @@ class VisionAgent(Agent):
224
225
  for media in chat_i["media"]:
225
226
  if type(media) is str and media.startswith(("http", "https")):
226
227
  # TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
227
- file_path = Path(media).name
228
- ndarray = load_image(media)
229
- save_image(ndarray, file_path)
228
+ file_path = str(
229
+ Path(self.local_artifacts_path).parent
230
+ / Path(media).name
231
+ )
232
+ if file_path.lower().endswith(
233
+ ".mp4"
234
+ ) or file_path.lower().endswith(".mov"):
235
+ video_frames = extract_frames(media)
236
+ save_video(
237
+ [frame for frame, _ in video_frames], file_path
238
+ )
239
+ else:
240
+ ndarray = load_image(media)
241
+ save_image(ndarray, file_path)
230
242
  media = file_path
231
243
  else:
232
244
  media = cast(str, media)
@@ -53,25 +53,27 @@ def redisplay_results(execution: Execution) -> None:
53
53
  """
54
54
  for result in execution.results:
55
55
  if result.text is not None:
56
- display({MimeType.TEXT_PLAIN: result.text})
56
+ display({MimeType.TEXT_PLAIN: result.text}, raw=True)
57
57
  if result.html is not None:
58
- display({MimeType.TEXT_HTML: result.html})
58
+ display({MimeType.TEXT_HTML: result.html}, raw=True)
59
59
  if result.markdown is not None:
60
- display({MimeType.TEXT_MARKDOWN: result.markdown})
60
+ display({MimeType.TEXT_MARKDOWN: result.markdown}, raw=True)
61
61
  if result.svg is not None:
62
- display({MimeType.IMAGE_SVG: result.svg})
62
+ display({MimeType.IMAGE_SVG: result.svg}, raw=True)
63
63
  if result.png is not None:
64
- display({MimeType.IMAGE_PNG: result.png})
64
+ display({MimeType.IMAGE_PNG: result.png}, raw=True)
65
65
  if result.jpeg is not None:
66
- display({MimeType.IMAGE_JPEG: result.jpeg})
66
+ display({MimeType.IMAGE_JPEG: result.jpeg}, raw=True)
67
67
  if result.mp4 is not None:
68
- display({MimeType.VIDEO_MP4_B64: result.mp4})
68
+ display({MimeType.VIDEO_MP4_B64: result.mp4}, raw=True)
69
69
  if result.latex is not None:
70
- display({MimeType.TEXT_LATEX: result.latex})
70
+ display({MimeType.TEXT_LATEX: result.latex}, raw=True)
71
71
  if result.json is not None:
72
- display({MimeType.APPLICATION_JSON: result.json})
72
+ display({MimeType.APPLICATION_JSON: result.json}, raw=True)
73
+ if result.artifact_name is not None:
74
+ display({MimeType.TEXT_ARTIFACT_NAME: result.artifact_name}, raw=True)
73
75
  if result.extra is not None:
74
- display(result.extra)
76
+ display(result.extra, raw=True)
75
77
 
76
78
 
77
79
  class Artifacts:
@@ -208,7 +210,7 @@ def create_code_artifact(artifacts: Artifacts, name: str) -> str:
208
210
  return_str = f"[Artifact {name} created]"
209
211
  print(return_str)
210
212
 
211
- display({MimeType.APPLICATION_JSON: {"last_artifact": name}})
213
+ display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
212
214
  return return_str
213
215
 
214
216
 
@@ -292,7 +294,7 @@ def edit_code_artifact(
292
294
 
293
295
  artifacts[name] = "".join(edited_lines)
294
296
 
295
- display({MimeType.APPLICATION_JSON: {"last_artifact": name}})
297
+ display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
296
298
  return open_code_artifact(artifacts, name, cur_line)
297
299
 
298
300
 
@@ -348,7 +350,7 @@ def generate_vision_code(
348
350
  code_lines = code.splitlines(keepends=True)
349
351
  total_lines = len(code_lines)
350
352
 
351
- display({MimeType.APPLICATION_JSON: {"last_artifact": name}})
353
+ display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
352
354
  return view_lines(code_lines, 0, total_lines, name, total_lines)
353
355
 
354
356
 
@@ -413,7 +415,7 @@ def edit_vision_code(
413
415
  code_lines = code.splitlines(keepends=True)
414
416
  total_lines = len(code_lines)
415
417
 
416
- display({MimeType.APPLICATION_JSON: {"last_artifact": name}})
418
+ display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
417
419
  return view_lines(code_lines, 0, total_lines, name, total_lines)
418
420
 
419
421
 
@@ -427,6 +429,7 @@ def write_media_artifact(artifacts: Artifacts, local_path: str) -> str:
427
429
  with open(local_path, "rb") as f:
428
430
  media = f.read()
429
431
  artifacts[Path(local_path).name] = media
432
+ display({MimeType.TEXT_ARTIFACT_NAME: Path(local_path).name}, raw=True)
430
433
  return f"[Media {Path(local_path).name} saved]"
431
434
 
432
435
 
@@ -592,6 +595,8 @@ def use_florence2_fine_tuning(
592
595
 
593
596
  diff = get_diff_with_prompts(name, code, new_code)
594
597
  print(diff)
598
+
599
+ display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
595
600
  return diff
596
601
 
597
602
 
@@ -56,6 +56,7 @@ class MimeType(str, Enum):
56
56
  TEXT_LATEX = "text/latex"
57
57
  APPLICATION_JSON = "application/json"
58
58
  APPLICATION_JAVASCRIPT = "application/javascript"
59
+ TEXT_ARTIFACT_NAME = "text/artifact/name"
59
60
 
60
61
 
61
62
  class FileSerializer:
@@ -103,6 +104,7 @@ class Result:
103
104
  latex: Optional[str] = None
104
105
  json: Optional[Dict[str, Any]] = None
105
106
  javascript: Optional[str] = None
107
+ artifact_name: Optional[str] = None
106
108
  extra: Optional[Dict[str, Any]] = None
107
109
  "Extra data that can be included. Not part of the standard types."
108
110
 
@@ -127,6 +129,7 @@ class Result:
127
129
  self.latex = data.pop(MimeType.TEXT_LATEX, None)
128
130
  self.json = data.pop(MimeType.APPLICATION_JSON, None)
129
131
  self.javascript = data.pop(MimeType.APPLICATION_JAVASCRIPT, None)
132
+ self.artifact_name = data.pop(MimeType.TEXT_ARTIFACT_NAME, None)
130
133
  self.extra = data
131
134
  # Only keeping the PNG representation if both PNG and JPEG are present
132
135
  if self.png and self.jpeg:
@@ -204,6 +207,8 @@ class Result:
204
207
  formats.append("javascript")
205
208
  if self.mp4:
206
209
  formats.append("mp4")
210
+ if self.artifact_name:
211
+ formats.append("artifact_name")
207
212
  if self.extra:
208
213
  formats.extend(iter(self.extra))
209
214
  return formats
@@ -691,8 +696,9 @@ class CodeInterpreterFactory:
691
696
  if not code_sandbox_runtime:
692
697
  code_sandbox_runtime = os.getenv("CODE_SANDBOX_RUNTIME", "local")
693
698
  if code_sandbox_runtime == "e2b":
699
+ envs = _get_e2b_env()
694
700
  instance: CodeInterpreter = E2BCodeInterpreter(
695
- timeout=_SESSION_TIMEOUT, remote_path=remote_path
701
+ timeout=_SESSION_TIMEOUT, remote_path=remote_path, envs=envs
696
702
  )
697
703
  elif code_sandbox_runtime == "local":
698
704
  instance = LocalCodeInterpreter(
@@ -705,6 +711,20 @@ class CodeInterpreterFactory:
705
711
  return instance
706
712
 
707
713
 
714
+ def _get_e2b_env() -> Union[Dict[str, str], None]:
715
+ openai_api_key = os.getenv("OPENAI_API_KEY", "")
716
+ anthropic_api_key = os.getenv("ANTHROPIC_API_KEY", "")
717
+ if openai_api_key or anthropic_api_key:
718
+ envs = {}
719
+ if openai_api_key:
720
+ envs["OPENAI_API_KEY"] = openai_api_key
721
+ if anthropic_api_key:
722
+ envs["ANTHROPIC_API_KEY"] = anthropic_api_key
723
+ else:
724
+ envs = None
725
+ return envs
726
+
727
+
708
728
  def _parse_local_code_interpreter_outputs(outputs: List[Dict[str, Any]]) -> Execution:
709
729
  """Parse notebook cell outputs to Execution object. Output types:
710
730
  https://nbformat.readthedocs.io/en/latest/format_description.html#code-cell-outputs
File without changes
File without changes