vision-agent 0.2.200__py3-none-any.whl → 0.2.201__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,14 +36,10 @@ class BoilerplateCode:
36
36
  pre_code = [
37
37
  "from typing import *",
38
38
  "from vision_agent.utils.execute import CodeInterpreter",
39
- "from vision_agent.tools.meta_tools import Artifacts, open_code_artifact, create_code_artifact, edit_code_artifact, get_tool_descriptions, generate_vision_code, edit_vision_code, view_media_artifact, object_detection_fine_tuning, use_object_detection_fine_tuning, list_artifacts, capture_files_into_artifacts",
40
- "artifacts = Artifacts('{remote_path}', '{remote_path}')",
41
- "artifacts.load('{remote_path}')",
42
- ]
43
- post_code = [
44
- "capture_files_into_artifacts(artifacts)",
45
- "artifacts.save()",
39
+ "from vision_agent.tools.meta_tools import Artifacts, open_code_artifact, create_code_artifact, edit_code_artifact, get_tool_descriptions, generate_vision_code, edit_vision_code, view_media_artifact, object_detection_fine_tuning, use_object_detection_fine_tuning, list_artifacts",
40
+ "artifacts = Artifacts('{cwd}')",
46
41
  ]
42
+ post_code: List[str] = []
47
43
 
48
44
  @staticmethod
49
45
  def add_boilerplate(code: str, **format: Any) -> str:
@@ -149,9 +145,7 @@ def execute_code_action(
149
145
  code_interpreter: CodeInterpreter,
150
146
  ) -> Tuple[Execution, str]:
151
147
  result = code_interpreter.exec_isolation(
152
- BoilerplateCode.add_boilerplate(
153
- code, remote_path=str(artifacts.remote_save_path)
154
- )
148
+ BoilerplateCode.add_boilerplate(code, cwd=str(artifacts.cwd))
155
149
  )
156
150
 
157
151
  obs = str(result.logs)
@@ -212,19 +206,6 @@ def add_step_descriptions(response: Dict[str, Any]) -> Dict[str, Any]:
212
206
  return response
213
207
 
214
208
 
215
- def setup_artifacts() -> Artifacts:
216
- # this is setting remote artifacts path
217
- sandbox = os.environ.get("CODE_SANDBOX_RUNTIME", None)
218
- if sandbox is None or sandbox == "local":
219
- remote = WORKSPACE / "artifacts.pkl"
220
- elif sandbox == "e2b":
221
- remote = Path("/home/user/artifacts.pkl")
222
- else:
223
- raise ValueError(f"Unknown code sandbox runtime {sandbox}")
224
- artifacts = Artifacts(remote, Path(os.getcwd()) / "artifacts.pkl")
225
- return artifacts
226
-
227
-
228
209
  def new_format_to_old_format(new_format: Dict[str, Any]) -> Dict[str, Any]:
229
210
  thoughts = new_format["thinking"] if new_format["thinking"] is not None else ""
230
211
  response = new_format["response"] if new_format["response"] is not None else ""
@@ -297,9 +278,10 @@ class VisionAgent(Agent):
297
278
  def __init__(
298
279
  self,
299
280
  agent: Optional[LMM] = None,
281
+ cwd: Optional[Union[Path, str]] = None,
300
282
  verbosity: int = 0,
301
283
  callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
302
- code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
284
+ code_sandbox_runtime: Optional[str] = None,
303
285
  ) -> None:
304
286
  """Initialize the VisionAgent.
305
287
 
@@ -317,9 +299,10 @@ class VisionAgent(Agent):
317
299
 
318
300
  self.agent = AnthropicLMM(temperature=0.0) if agent is None else agent
319
301
  self.max_iterations = 12
302
+ self.cwd = Path(cwd) if cwd is not None else Path.cwd()
320
303
  self.verbosity = verbosity
321
- self.code_interpreter = code_interpreter
322
304
  self.callback_message = callback_message
305
+ self.code_sandbox_runtime = code_sandbox_runtime
323
306
  if self.verbosity >= 1:
324
307
  _LOGGER.setLevel(logging.INFO)
325
308
 
@@ -397,40 +380,21 @@ class VisionAgent(Agent):
397
380
  raise ValueError("chat cannot be empty")
398
381
 
399
382
  if not artifacts:
400
- artifacts = setup_artifacts()
401
-
402
- # NOTE: each chat should have a dedicated code interpreter instance to avoid concurrency issues
403
- code_interpreter = (
404
- self.code_interpreter
405
- if self.code_interpreter is not None
406
- and not isinstance(self.code_interpreter, str)
407
- else CodeInterpreterFactory.new_instance(
408
- code_sandbox_runtime=self.code_interpreter,
409
- remote_path=artifacts.remote_save_path.parent,
410
- )
411
- )
383
+ artifacts = Artifacts(self.cwd)
412
384
 
413
- if code_interpreter.remote_path != artifacts.remote_save_path.parent:
414
- raise ValueError(
415
- f"Code interpreter remote path {code_interpreter.remote_path} does not match artifacts remote path {artifacts.remote_save_path.parent}"
416
- )
417
-
418
- with code_interpreter:
385
+ with CodeInterpreterFactory.new_instance(
386
+ code_sandbox_runtime=self.code_sandbox_runtime,
387
+ remote_path=self.cwd,
388
+ ) as code_interpreter:
419
389
  orig_chat = copy.deepcopy(chat)
420
390
  int_chat = copy.deepcopy(chat)
421
391
  last_user_message = chat[-1]
422
- media_list = []
423
392
  for chat_i in int_chat:
424
393
  if "media" in chat_i:
425
394
  for media in chat_i["media"]:
426
395
  media = cast(str, media)
427
- artifacts.artifacts[Path(media).name] = open(media, "rb").read()
428
-
429
- media_remote_path = (
430
- Path(artifacts.remote_save_path.parent) / Path(media).name
431
- )
396
+ media_remote_path = Path(artifacts.cwd) / Path(media).name
432
397
  chat_i["content"] += f" Media name {media_remote_path}" # type: ignore
433
- media_list.append(media_remote_path)
434
398
 
435
399
  int_chat = cast(
436
400
  List[Message],
@@ -452,15 +416,10 @@ class VisionAgent(Agent):
452
416
  iterations = 0
453
417
  last_response = None
454
418
 
455
- # Save the current state of artifacts, will include any images the user
456
- # passed in.
457
- artifacts.save()
458
-
459
419
  # Upload artifacts to remote location and show where they are going
460
420
  # to be loaded to. The actual loading happens in BoilerplateCode as
461
421
  # part of the pre_code.
462
- code_interpreter.upload_file(artifacts.local_save_path)
463
- artifacts_loaded = artifacts.show(artifacts.remote_save_path.parent)
422
+ artifacts_loaded = artifacts.show()
464
423
  int_chat.append({"role": "observation", "content": artifacts_loaded})
465
424
  orig_chat.append({"role": "observation", "content": artifacts_loaded})
466
425
  self.streaming_message({"role": "observation", "content": artifacts_loaded})
@@ -487,10 +446,6 @@ class VisionAgent(Agent):
487
446
  )
488
447
 
489
448
  while not finished and iterations < self.max_iterations:
490
- # ensure we upload the artifacts before each turn, so any local
491
- # modifications we made to it will be reflected in the remote
492
- code_interpreter.upload_file(artifacts.local_save_path)
493
-
494
449
  response = run_conversation(self.agent, int_chat)
495
450
  if self.verbosity >= 1:
496
451
  _LOGGER.info(response)
@@ -555,11 +510,8 @@ class VisionAgent(Agent):
555
510
  obs_chat_elt: Message = {"role": "observation", "content": obs}
556
511
  media_obs = check_and_load_image(code_action)
557
512
  if media_obs and result.success:
558
- # media paths will be under the local_save_path when we download
559
- # them after each turn
560
513
  obs_chat_elt["media"] = [
561
- artifacts.local_save_path.parent / media_ob
562
- for media_ob in media_obs
514
+ artifacts.cwd / media_ob for media_ob in media_obs
563
515
  ]
564
516
 
565
517
  if self.verbosity >= 1:
@@ -581,15 +533,6 @@ class VisionAgent(Agent):
581
533
  iterations += 1
582
534
  last_response = response
583
535
 
584
- # after each turn, download the artifacts locally
585
- code_interpreter.download_file(
586
- str(artifacts.remote_save_path.name),
587
- str(artifacts.local_save_path),
588
- )
589
- artifacts.load(
590
- artifacts.local_save_path, artifacts.local_save_path.parent
591
- )
592
-
593
536
  return orig_chat, artifacts
594
537
 
595
538
  def streaming_message(self, message: Dict[str, Any]) -> None:
@@ -604,9 +547,9 @@ class OpenAIVisionAgent(VisionAgent):
604
547
  def __init__(
605
548
  self,
606
549
  agent: Optional[LMM] = None,
550
+ cwd: Optional[Union[Path, str]] = None,
607
551
  verbosity: int = 0,
608
552
  callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
609
- code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
610
553
  ) -> None:
611
554
  """Initialize the VisionAgent using OpenAI LMMs.
612
555
 
@@ -625,9 +568,9 @@ class OpenAIVisionAgent(VisionAgent):
625
568
  agent = OpenAILMM(temperature=0.0, json_mode=True) if agent is None else agent
626
569
  super().__init__(
627
570
  agent,
571
+ cwd,
628
572
  verbosity,
629
573
  callback_message,
630
- code_interpreter,
631
574
  )
632
575
 
633
576
 
@@ -635,9 +578,9 @@ class AnthropicVisionAgent(VisionAgent):
635
578
  def __init__(
636
579
  self,
637
580
  agent: Optional[LMM] = None,
581
+ cwd: Optional[Union[Path, str]] = None,
638
582
  verbosity: int = 0,
639
583
  callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
640
- code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
641
584
  ) -> None:
642
585
  """Initialize the VisionAgent using Anthropic LMMs.
643
586
 
@@ -656,7 +599,7 @@ class AnthropicVisionAgent(VisionAgent):
656
599
  agent = AnthropicLMM(temperature=0.0) if agent is None else agent
657
600
  super().__init__(
658
601
  agent,
602
+ cwd,
659
603
  verbosity,
660
604
  callback_message,
661
- code_interpreter,
662
605
  )
@@ -450,12 +450,6 @@ class VisionAgentCoder(Agent):
450
450
  for chat_i in chat:
451
451
  if "media" in chat_i:
452
452
  for media in chat_i["media"]:
453
- media = (
454
- media
455
- if type(media) is str
456
- and media.startswith(("http", "https"))
457
- else code_interpreter.upload_file(cast(str, media))
458
- )
459
453
  chat_i["content"] += f" Media name {media}" # type: ignore
460
454
  media_list.append(str(media))
461
455
 
@@ -391,12 +391,6 @@ class VisionAgentPlanner(Agent):
391
391
  for chat_i in chat:
392
392
  if "media" in chat_i:
393
393
  for media in chat_i["media"]:
394
- media = (
395
- media
396
- if type(media) is str
397
- and media.startswith(("http", "https"))
398
- else code_interpreter.upload_file(cast(str, media))
399
- )
400
394
  chat_i["content"] += f" Media name {media}" # type: ignore
401
395
  media_list.append(str(media))
402
396
 
@@ -1,7 +1,6 @@
1
1
  import difflib
2
2
  import json
3
3
  import os
4
- import pickle as pkl
5
4
  import re
6
5
  import subprocess
7
6
  import tempfile
@@ -73,95 +72,41 @@ class Artifacts:
73
72
  need to be in sync with the remote environment the VisionAgent is running in.
74
73
  """
75
74
 
76
- def __init__(
77
- self, remote_save_path: Union[str, Path], local_save_path: Union[str, Path]
78
- ) -> None:
75
+ def __init__(self, cwd: Union[str, Path]) -> None:
79
76
  """Initializes the Artifacts object with it's remote and local save paths.
80
77
 
81
78
  Parameters:
82
- remote_save_path (Union[str, Path]): The path to save the artifacts in the
83
- remote environment. For example "/home/user/artifacts.pkl".
84
- local_save_path (Union[str, Path]): The path to save the artifacts in the
85
- local environment. For example "/Users/my_user/workspace/artifacts.pkl".
79
+ cwd (Union[str, Path]): The path to save all the chat related files. For example "/home/user/chat_abc/".
86
80
  """
87
- self.remote_save_path = Path(remote_save_path)
88
- self.local_save_path = Path(local_save_path)
89
- self.artifacts: Dict[str, Any] = {}
81
+ self.cwd = Path(cwd)
90
82
 
91
- self.code_sandbox_runtime = None
92
-
93
- def load(
94
- self,
95
- artifacts_path: Union[str, Path],
96
- load_to_dir: Optional[Union[str, Path]] = None,
97
- ) -> None:
98
- """Loads are artifacts into the load_to_dir directory. If load_to_dir is None,
99
- it will load into remote_save_path directory. If an artifact value is None it
100
- will skip loading it.
101
-
102
- Parameters:
103
- artifacts_path (Union[str, Path]): The file path to load the artifacts from.
104
- If you are in the remote environment this would be remote_save_path, if
105
- you are in the local environment this would be local_save_path.
106
- load_to_dir (Optional[Union[str, Path]): The directory to load the artifacts
107
- into. If None, it will load into remote_save_path directory.
108
- """
109
- with open(artifacts_path, "rb") as f:
110
- self.artifacts = pkl.load(f)
111
-
112
- load_to_dir = (
113
- self.remote_save_path.parent if load_to_dir is None else Path(load_to_dir)
114
- )
115
-
116
- for k, v in self.artifacts.items():
117
- if v is not None:
118
- mode = "w" if isinstance(v, str) else "wb"
119
- with open(load_to_dir / k, mode) as f:
120
- f.write(v)
121
-
122
- def show(self, uploaded_file_dir: Optional[Union[str, Path]] = None) -> str:
123
- """Prints out the artifacts and the directory they have been loaded to. If you
124
- pass in upload_file_dir, it will show the artifacts have been loaded to the
125
- upload_file_dir directory. If you don't pass in upload_file_dir, it will show
126
- the artifacts have been loaded to the remote_save_path directory.
127
-
128
- Parameters:
129
- uploaded_file_dir (Optional[Union[str, Path]): The directory the artifacts
130
- have been loaded to.
131
- """
132
- loaded_path = (
133
- Path(uploaded_file_dir)
134
- if uploaded_file_dir is not None
135
- else self.remote_save_path.parent
136
- )
83
+ def show(self) -> str:
84
+ """Prints out all the files in the curret working directory"""
137
85
  output_str = "[Artifacts loaded]\n"
138
- for k in self.artifacts.keys():
139
- output_str += (
140
- f"Artifact name: {k}, loaded to path: {str(loaded_path / k)}\n"
141
- )
86
+ for k in self:
87
+ output_str += f"Artifact name: {k}, loaded to path: {str(self.cwd / k)}\n"
142
88
  output_str += "[End of artifacts]\n"
143
89
  print(output_str)
144
90
  return output_str
145
91
 
146
- def save(self, local_path: Optional[Union[str, Path]] = None) -> None:
147
- """Saves the artifacts to the local_save_path directory. If local_path is None,
148
- it will save to the local_save_path directory.
149
- """
150
- save_path = Path(local_path) if local_path is not None else self.local_save_path
151
- with open(save_path, "wb") as f:
152
- pkl.dump(self.artifacts, f)
153
-
154
92
  def __iter__(self) -> Any:
155
- return iter(self.artifacts)
93
+ return iter(os.listdir(self.cwd))
156
94
 
157
95
  def __getitem__(self, name: str) -> Any:
158
- return self.artifacts[name]
96
+ file_path = self.cwd / name
97
+ if file_path.exists():
98
+ with open(file_path, "r") as file:
99
+ return file.read()
100
+ else:
101
+ raise KeyError(f"File '{name}' not found in artifacts")
159
102
 
160
103
  def __setitem__(self, name: str, value: Any) -> None:
161
- self.artifacts[name] = value
104
+ file_path = self.cwd / name
105
+ with open(file_path, "w") as file:
106
+ file.write(value)
162
107
 
163
108
  def __contains__(self, name: str) -> bool:
164
- return name in self.artifacts
109
+ return name in os.listdir(self.cwd)
165
110
 
166
111
 
167
112
  def filter_file(file_name: Union[str, Path]) -> Tuple[bool, bool]:
@@ -175,27 +120,6 @@ def filter_file(file_name: Union[str, Path]) -> Tuple[bool, bool]:
175
120
  ), file_name_p.suffix in [".png", ".jpeg", ".jpg", ".mp4"]
176
121
 
177
122
 
178
- def capture_files_into_artifacts(artifacts: Artifacts) -> None:
179
- """This function is used to capture all files in the current directory into an
180
- artifact object. This is useful if you want to capture all files in the current
181
- directory and use them in a different environment where you don't have access to
182
- the file system.
183
-
184
- Parameters:
185
- artifact (Artifacts): The artifact object to save the files to.
186
- """
187
- for file in Path(".").glob("**/*"):
188
- usable_file, is_media = filter_file(file)
189
- mode = "rb" if is_media else "r"
190
- if usable_file:
191
- file_name = file.name
192
- if file_name.startswith(str(Path(artifacts.remote_save_path).parents)):
193
- idx = len(Path(artifacts.remote_save_path).parents)
194
- file_name = file_name[idx:]
195
- with open(file, mode) as f:
196
- artifacts[file_name] = f.read()
197
-
198
-
199
123
  # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
200
124
 
201
125
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.200
3
+ Version: 0.2.201
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -5,12 +5,12 @@ vision_agent/agent/__init__.py,sha256=M8CffavdIh8Zh-skznLHIaQkYGCGK7vk4dq1FaVkbs
5
5
  vision_agent/agent/agent.py,sha256=sf8JcA3LNy_4GaS_gQb2Q-PXkl4dBuGh-7raI9KAtZo,1470
6
6
  vision_agent/agent/agent_utils.py,sha256=NmrqjhSb6fpnrB8XGWtaywZjr9n89otusOZpcbWLf9k,13534
7
7
  vision_agent/agent/types.py,sha256=aAd_ez1-NQh04k27cmywyOV2uA_vWWYE-Ok7zq_JoAk,1532
8
- vision_agent/agent/vision_agent.py,sha256=rr1P9iTbr7OsjgMYWCeIxQYI4cLwPWia3NIMJNi-9Yo,26110
9
- vision_agent/agent/vision_agent_coder.py,sha256=waCmw_NTgsy9G-UqlRZFhsFJJVuWVrjxVnShe4Xp_lI,27743
8
+ vision_agent/agent/vision_agent.py,sha256=I75bEU-os9Lf9OSICKfvQ_H_ftg-zOwgTwWnu41oIdo,23555
9
+ vision_agent/agent/vision_agent_coder.py,sha256=ANwUuCO4JpTYJs4s6ynSRFcdjZFUVuSoSfcqp8ZQDDQ,27451
10
10
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
11
11
  vision_agent/agent/vision_agent_coder_prompts_v2.py,sha256=9v5HwbNidSzYUEFl6ZMniWWOmyLITM_moWLtKVaTen8,4845
12
12
  vision_agent/agent/vision_agent_coder_v2.py,sha256=SVIJC0N5TBgq9z-F99UebLimRuQuAe_HHvTFupBzVfo,14715
13
- vision_agent/agent/vision_agent_planner.py,sha256=F_5opnc0XmQmNH40rs2T7DFrai4CC6aDYe02Z8e93AM,18875
13
+ vision_agent/agent/vision_agent_planner.py,sha256=KWMA7XemcSmc_jn-MwdWz9wnKDtj-sYQ9tINi70_OoU,18583
14
14
  vision_agent/agent/vision_agent_planner_prompts.py,sha256=Y3jz9HRf8fz9NLUseN7cTgZqewP0RazxR7vw1sPhcn0,6691
15
15
  vision_agent/agent/vision_agent_planner_prompts_v2.py,sha256=Tzon3h5iZdHJglesk8GVS-2myNf5-fhf7HUbkpZWHQk,33143
16
16
  vision_agent/agent/vision_agent_planner_v2.py,sha256=mxQxD_B8sKYharh8e7W0uc1tN11YCztyLowc83seScc,17023
@@ -26,7 +26,7 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
26
26
  vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
27
27
  vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
28
28
  vision_agent/tools/__init__.py,sha256=xuNt5e4syQH28Vr6EdjLmO9ni9i00yav9yqcPMUx1oo,2878
29
- vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
29
+ vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
30
30
  vision_agent/tools/planner_tools.py,sha256=FROahw_6Taqvytv6pOjCHUEypOfjsi_f8Vo1c5vz6Mw,8823
31
31
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
32
32
  vision_agent/tools/tool_utils.py,sha256=GDGOmBCo4UfYz-DJ-olREJHPsqs5mzHu0YXiAnpNE8E,10179
@@ -39,7 +39,7 @@ vision_agent/utils/image_utils.py,sha256=rRWcxKggPXIRXIY_XT9rZt30ECDRq8zq7FDeXRD
39
39
  vision_agent/utils/sim.py,sha256=NZc9QGD6BTY5O29NVbHH7oxDePL_QMnylT1lYcDUn1Y,7437
40
40
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
41
41
  vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
42
- vision_agent-0.2.200.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
43
- vision_agent-0.2.200.dist-info/METADATA,sha256=goRTW73tD79-UlJiy4cL0twnVYm9iSjU9f5HsC4A1ZI,19026
44
- vision_agent-0.2.200.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
45
- vision_agent-0.2.200.dist-info/RECORD,,
42
+ vision_agent-0.2.201.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
43
+ vision_agent-0.2.201.dist-info/METADATA,sha256=Vbdn9gqa9uz0RTRV9SMvNgPQbqLGmgQJKUtuEe1buI0,19026
44
+ vision_agent-0.2.201.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
45
+ vision_agent-0.2.201.dist-info/RECORD,,