vision-agent 0.2.200__py3-none-any.whl → 0.2.201__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -36,14 +36,10 @@ class BoilerplateCode:
36
36
  pre_code = [
37
37
  "from typing import *",
38
38
  "from vision_agent.utils.execute import CodeInterpreter",
39
- "from vision_agent.tools.meta_tools import Artifacts, open_code_artifact, create_code_artifact, edit_code_artifact, get_tool_descriptions, generate_vision_code, edit_vision_code, view_media_artifact, object_detection_fine_tuning, use_object_detection_fine_tuning, list_artifacts, capture_files_into_artifacts",
40
- "artifacts = Artifacts('{remote_path}', '{remote_path}')",
41
- "artifacts.load('{remote_path}')",
42
- ]
43
- post_code = [
44
- "capture_files_into_artifacts(artifacts)",
45
- "artifacts.save()",
39
+ "from vision_agent.tools.meta_tools import Artifacts, open_code_artifact, create_code_artifact, edit_code_artifact, get_tool_descriptions, generate_vision_code, edit_vision_code, view_media_artifact, object_detection_fine_tuning, use_object_detection_fine_tuning, list_artifacts",
40
+ "artifacts = Artifacts('{cwd}')",
46
41
  ]
42
+ post_code: List[str] = []
47
43
 
48
44
  @staticmethod
49
45
  def add_boilerplate(code: str, **format: Any) -> str:
@@ -149,9 +145,7 @@ def execute_code_action(
149
145
  code_interpreter: CodeInterpreter,
150
146
  ) -> Tuple[Execution, str]:
151
147
  result = code_interpreter.exec_isolation(
152
- BoilerplateCode.add_boilerplate(
153
- code, remote_path=str(artifacts.remote_save_path)
154
- )
148
+ BoilerplateCode.add_boilerplate(code, cwd=str(artifacts.cwd))
155
149
  )
156
150
 
157
151
  obs = str(result.logs)
@@ -212,19 +206,6 @@ def add_step_descriptions(response: Dict[str, Any]) -> Dict[str, Any]:
212
206
  return response
213
207
 
214
208
 
215
- def setup_artifacts() -> Artifacts:
216
- # this is setting remote artifacts path
217
- sandbox = os.environ.get("CODE_SANDBOX_RUNTIME", None)
218
- if sandbox is None or sandbox == "local":
219
- remote = WORKSPACE / "artifacts.pkl"
220
- elif sandbox == "e2b":
221
- remote = Path("/home/user/artifacts.pkl")
222
- else:
223
- raise ValueError(f"Unknown code sandbox runtime {sandbox}")
224
- artifacts = Artifacts(remote, Path(os.getcwd()) / "artifacts.pkl")
225
- return artifacts
226
-
227
-
228
209
  def new_format_to_old_format(new_format: Dict[str, Any]) -> Dict[str, Any]:
229
210
  thoughts = new_format["thinking"] if new_format["thinking"] is not None else ""
230
211
  response = new_format["response"] if new_format["response"] is not None else ""
@@ -297,9 +278,10 @@ class VisionAgent(Agent):
297
278
  def __init__(
298
279
  self,
299
280
  agent: Optional[LMM] = None,
281
+ cwd: Optional[Union[Path, str]] = None,
300
282
  verbosity: int = 0,
301
283
  callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
302
- code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
284
+ code_sandbox_runtime: Optional[str] = None,
303
285
  ) -> None:
304
286
  """Initialize the VisionAgent.
305
287
 
@@ -317,9 +299,10 @@ class VisionAgent(Agent):
317
299
 
318
300
  self.agent = AnthropicLMM(temperature=0.0) if agent is None else agent
319
301
  self.max_iterations = 12
302
+ self.cwd = Path(cwd) if cwd is not None else Path.cwd()
320
303
  self.verbosity = verbosity
321
- self.code_interpreter = code_interpreter
322
304
  self.callback_message = callback_message
305
+ self.code_sandbox_runtime = code_sandbox_runtime
323
306
  if self.verbosity >= 1:
324
307
  _LOGGER.setLevel(logging.INFO)
325
308
 
@@ -397,40 +380,21 @@ class VisionAgent(Agent):
397
380
  raise ValueError("chat cannot be empty")
398
381
 
399
382
  if not artifacts:
400
- artifacts = setup_artifacts()
401
-
402
- # NOTE: each chat should have a dedicated code interpreter instance to avoid concurrency issues
403
- code_interpreter = (
404
- self.code_interpreter
405
- if self.code_interpreter is not None
406
- and not isinstance(self.code_interpreter, str)
407
- else CodeInterpreterFactory.new_instance(
408
- code_sandbox_runtime=self.code_interpreter,
409
- remote_path=artifacts.remote_save_path.parent,
410
- )
411
- )
383
+ artifacts = Artifacts(self.cwd)
412
384
 
413
- if code_interpreter.remote_path != artifacts.remote_save_path.parent:
414
- raise ValueError(
415
- f"Code interpreter remote path {code_interpreter.remote_path} does not match artifacts remote path {artifacts.remote_save_path.parent}"
416
- )
417
-
418
- with code_interpreter:
385
+ with CodeInterpreterFactory.new_instance(
386
+ code_sandbox_runtime=self.code_sandbox_runtime,
387
+ remote_path=self.cwd,
388
+ ) as code_interpreter:
419
389
  orig_chat = copy.deepcopy(chat)
420
390
  int_chat = copy.deepcopy(chat)
421
391
  last_user_message = chat[-1]
422
- media_list = []
423
392
  for chat_i in int_chat:
424
393
  if "media" in chat_i:
425
394
  for media in chat_i["media"]:
426
395
  media = cast(str, media)
427
- artifacts.artifacts[Path(media).name] = open(media, "rb").read()
428
-
429
- media_remote_path = (
430
- Path(artifacts.remote_save_path.parent) / Path(media).name
431
- )
396
+ media_remote_path = Path(artifacts.cwd) / Path(media).name
432
397
  chat_i["content"] += f" Media name {media_remote_path}" # type: ignore
433
- media_list.append(media_remote_path)
434
398
 
435
399
  int_chat = cast(
436
400
  List[Message],
@@ -452,15 +416,10 @@ class VisionAgent(Agent):
452
416
  iterations = 0
453
417
  last_response = None
454
418
 
455
- # Save the current state of artifacts, will include any images the user
456
- # passed in.
457
- artifacts.save()
458
-
459
419
  # Upload artifacts to remote location and show where they are going
460
420
  # to be loaded to. The actual loading happens in BoilerplateCode as
461
421
  # part of the pre_code.
462
- code_interpreter.upload_file(artifacts.local_save_path)
463
- artifacts_loaded = artifacts.show(artifacts.remote_save_path.parent)
422
+ artifacts_loaded = artifacts.show()
464
423
  int_chat.append({"role": "observation", "content": artifacts_loaded})
465
424
  orig_chat.append({"role": "observation", "content": artifacts_loaded})
466
425
  self.streaming_message({"role": "observation", "content": artifacts_loaded})
@@ -487,10 +446,6 @@ class VisionAgent(Agent):
487
446
  )
488
447
 
489
448
  while not finished and iterations < self.max_iterations:
490
- # ensure we upload the artifacts before each turn, so any local
491
- # modifications we made to it will be reflected in the remote
492
- code_interpreter.upload_file(artifacts.local_save_path)
493
-
494
449
  response = run_conversation(self.agent, int_chat)
495
450
  if self.verbosity >= 1:
496
451
  _LOGGER.info(response)
@@ -555,11 +510,8 @@ class VisionAgent(Agent):
555
510
  obs_chat_elt: Message = {"role": "observation", "content": obs}
556
511
  media_obs = check_and_load_image(code_action)
557
512
  if media_obs and result.success:
558
- # media paths will be under the local_save_path when we download
559
- # them after each turn
560
513
  obs_chat_elt["media"] = [
561
- artifacts.local_save_path.parent / media_ob
562
- for media_ob in media_obs
514
+ artifacts.cwd / media_ob for media_ob in media_obs
563
515
  ]
564
516
 
565
517
  if self.verbosity >= 1:
@@ -581,15 +533,6 @@ class VisionAgent(Agent):
581
533
  iterations += 1
582
534
  last_response = response
583
535
 
584
- # after each turn, download the artifacts locally
585
- code_interpreter.download_file(
586
- str(artifacts.remote_save_path.name),
587
- str(artifacts.local_save_path),
588
- )
589
- artifacts.load(
590
- artifacts.local_save_path, artifacts.local_save_path.parent
591
- )
592
-
593
536
  return orig_chat, artifacts
594
537
 
595
538
  def streaming_message(self, message: Dict[str, Any]) -> None:
@@ -604,9 +547,9 @@ class OpenAIVisionAgent(VisionAgent):
604
547
  def __init__(
605
548
  self,
606
549
  agent: Optional[LMM] = None,
550
+ cwd: Optional[Union[Path, str]] = None,
607
551
  verbosity: int = 0,
608
552
  callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
609
- code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
610
553
  ) -> None:
611
554
  """Initialize the VisionAgent using OpenAI LMMs.
612
555
 
@@ -625,9 +568,9 @@ class OpenAIVisionAgent(VisionAgent):
625
568
  agent = OpenAILMM(temperature=0.0, json_mode=True) if agent is None else agent
626
569
  super().__init__(
627
570
  agent,
571
+ cwd,
628
572
  verbosity,
629
573
  callback_message,
630
- code_interpreter,
631
574
  )
632
575
 
633
576
 
@@ -635,9 +578,9 @@ class AnthropicVisionAgent(VisionAgent):
635
578
  def __init__(
636
579
  self,
637
580
  agent: Optional[LMM] = None,
581
+ cwd: Optional[Union[Path, str]] = None,
638
582
  verbosity: int = 0,
639
583
  callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
640
- code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
641
584
  ) -> None:
642
585
  """Initialize the VisionAgent using Anthropic LMMs.
643
586
 
@@ -656,7 +599,7 @@ class AnthropicVisionAgent(VisionAgent):
656
599
  agent = AnthropicLMM(temperature=0.0) if agent is None else agent
657
600
  super().__init__(
658
601
  agent,
602
+ cwd,
659
603
  verbosity,
660
604
  callback_message,
661
- code_interpreter,
662
605
  )
@@ -450,12 +450,6 @@ class VisionAgentCoder(Agent):
450
450
  for chat_i in chat:
451
451
  if "media" in chat_i:
452
452
  for media in chat_i["media"]:
453
- media = (
454
- media
455
- if type(media) is str
456
- and media.startswith(("http", "https"))
457
- else code_interpreter.upload_file(cast(str, media))
458
- )
459
453
  chat_i["content"] += f" Media name {media}" # type: ignore
460
454
  media_list.append(str(media))
461
455
 
@@ -391,12 +391,6 @@ class VisionAgentPlanner(Agent):
391
391
  for chat_i in chat:
392
392
  if "media" in chat_i:
393
393
  for media in chat_i["media"]:
394
- media = (
395
- media
396
- if type(media) is str
397
- and media.startswith(("http", "https"))
398
- else code_interpreter.upload_file(cast(str, media))
399
- )
400
394
  chat_i["content"] += f" Media name {media}" # type: ignore
401
395
  media_list.append(str(media))
402
396
 
@@ -1,7 +1,6 @@
1
1
  import difflib
2
2
  import json
3
3
  import os
4
- import pickle as pkl
5
4
  import re
6
5
  import subprocess
7
6
  import tempfile
@@ -73,95 +72,41 @@ class Artifacts:
73
72
  need to be in sync with the remote environment the VisionAgent is running in.
74
73
  """
75
74
 
76
- def __init__(
77
- self, remote_save_path: Union[str, Path], local_save_path: Union[str, Path]
78
- ) -> None:
75
+ def __init__(self, cwd: Union[str, Path]) -> None:
79
76
  """Initializes the Artifacts object with it's remote and local save paths.
80
77
 
81
78
  Parameters:
82
- remote_save_path (Union[str, Path]): The path to save the artifacts in the
83
- remote environment. For example "/home/user/artifacts.pkl".
84
- local_save_path (Union[str, Path]): The path to save the artifacts in the
85
- local environment. For example "/Users/my_user/workspace/artifacts.pkl".
79
+ cwd (Union[str, Path]): The path to save all the chat related files. For example "/home/user/chat_abc/".
86
80
  """
87
- self.remote_save_path = Path(remote_save_path)
88
- self.local_save_path = Path(local_save_path)
89
- self.artifacts: Dict[str, Any] = {}
81
+ self.cwd = Path(cwd)
90
82
 
91
- self.code_sandbox_runtime = None
92
-
93
- def load(
94
- self,
95
- artifacts_path: Union[str, Path],
96
- load_to_dir: Optional[Union[str, Path]] = None,
97
- ) -> None:
98
- """Loads are artifacts into the load_to_dir directory. If load_to_dir is None,
99
- it will load into remote_save_path directory. If an artifact value is None it
100
- will skip loading it.
101
-
102
- Parameters:
103
- artifacts_path (Union[str, Path]): The file path to load the artifacts from.
104
- If you are in the remote environment this would be remote_save_path, if
105
- you are in the local environment this would be local_save_path.
106
- load_to_dir (Optional[Union[str, Path]): The directory to load the artifacts
107
- into. If None, it will load into remote_save_path directory.
108
- """
109
- with open(artifacts_path, "rb") as f:
110
- self.artifacts = pkl.load(f)
111
-
112
- load_to_dir = (
113
- self.remote_save_path.parent if load_to_dir is None else Path(load_to_dir)
114
- )
115
-
116
- for k, v in self.artifacts.items():
117
- if v is not None:
118
- mode = "w" if isinstance(v, str) else "wb"
119
- with open(load_to_dir / k, mode) as f:
120
- f.write(v)
121
-
122
- def show(self, uploaded_file_dir: Optional[Union[str, Path]] = None) -> str:
123
- """Prints out the artifacts and the directory they have been loaded to. If you
124
- pass in upload_file_dir, it will show the artifacts have been loaded to the
125
- upload_file_dir directory. If you don't pass in upload_file_dir, it will show
126
- the artifacts have been loaded to the remote_save_path directory.
127
-
128
- Parameters:
129
- uploaded_file_dir (Optional[Union[str, Path]): The directory the artifacts
130
- have been loaded to.
131
- """
132
- loaded_path = (
133
- Path(uploaded_file_dir)
134
- if uploaded_file_dir is not None
135
- else self.remote_save_path.parent
136
- )
83
+ def show(self) -> str:
84
+ """Prints out all the files in the curret working directory"""
137
85
  output_str = "[Artifacts loaded]\n"
138
- for k in self.artifacts.keys():
139
- output_str += (
140
- f"Artifact name: {k}, loaded to path: {str(loaded_path / k)}\n"
141
- )
86
+ for k in self:
87
+ output_str += f"Artifact name: {k}, loaded to path: {str(self.cwd / k)}\n"
142
88
  output_str += "[End of artifacts]\n"
143
89
  print(output_str)
144
90
  return output_str
145
91
 
146
- def save(self, local_path: Optional[Union[str, Path]] = None) -> None:
147
- """Saves the artifacts to the local_save_path directory. If local_path is None,
148
- it will save to the local_save_path directory.
149
- """
150
- save_path = Path(local_path) if local_path is not None else self.local_save_path
151
- with open(save_path, "wb") as f:
152
- pkl.dump(self.artifacts, f)
153
-
154
92
  def __iter__(self) -> Any:
155
- return iter(self.artifacts)
93
+ return iter(os.listdir(self.cwd))
156
94
 
157
95
  def __getitem__(self, name: str) -> Any:
158
- return self.artifacts[name]
96
+ file_path = self.cwd / name
97
+ if file_path.exists():
98
+ with open(file_path, "r") as file:
99
+ return file.read()
100
+ else:
101
+ raise KeyError(f"File '{name}' not found in artifacts")
159
102
 
160
103
  def __setitem__(self, name: str, value: Any) -> None:
161
- self.artifacts[name] = value
104
+ file_path = self.cwd / name
105
+ with open(file_path, "w") as file:
106
+ file.write(value)
162
107
 
163
108
  def __contains__(self, name: str) -> bool:
164
- return name in self.artifacts
109
+ return name in os.listdir(self.cwd)
165
110
 
166
111
 
167
112
  def filter_file(file_name: Union[str, Path]) -> Tuple[bool, bool]:
@@ -175,27 +120,6 @@ def filter_file(file_name: Union[str, Path]) -> Tuple[bool, bool]:
175
120
  ), file_name_p.suffix in [".png", ".jpeg", ".jpg", ".mp4"]
176
121
 
177
122
 
178
- def capture_files_into_artifacts(artifacts: Artifacts) -> None:
179
- """This function is used to capture all files in the current directory into an
180
- artifact object. This is useful if you want to capture all files in the current
181
- directory and use them in a different environment where you don't have access to
182
- the file system.
183
-
184
- Parameters:
185
- artifact (Artifacts): The artifact object to save the files to.
186
- """
187
- for file in Path(".").glob("**/*"):
188
- usable_file, is_media = filter_file(file)
189
- mode = "rb" if is_media else "r"
190
- if usable_file:
191
- file_name = file.name
192
- if file_name.startswith(str(Path(artifacts.remote_save_path).parents)):
193
- idx = len(Path(artifacts.remote_save_path).parents)
194
- file_name = file_name[idx:]
195
- with open(file, mode) as f:
196
- artifacts[file_name] = f.read()
197
-
198
-
199
123
  # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
200
124
 
201
125
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.200
3
+ Version: 0.2.201
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -5,12 +5,12 @@ vision_agent/agent/__init__.py,sha256=M8CffavdIh8Zh-skznLHIaQkYGCGK7vk4dq1FaVkbs
5
5
  vision_agent/agent/agent.py,sha256=sf8JcA3LNy_4GaS_gQb2Q-PXkl4dBuGh-7raI9KAtZo,1470
6
6
  vision_agent/agent/agent_utils.py,sha256=NmrqjhSb6fpnrB8XGWtaywZjr9n89otusOZpcbWLf9k,13534
7
7
  vision_agent/agent/types.py,sha256=aAd_ez1-NQh04k27cmywyOV2uA_vWWYE-Ok7zq_JoAk,1532
8
- vision_agent/agent/vision_agent.py,sha256=rr1P9iTbr7OsjgMYWCeIxQYI4cLwPWia3NIMJNi-9Yo,26110
9
- vision_agent/agent/vision_agent_coder.py,sha256=waCmw_NTgsy9G-UqlRZFhsFJJVuWVrjxVnShe4Xp_lI,27743
8
+ vision_agent/agent/vision_agent.py,sha256=I75bEU-os9Lf9OSICKfvQ_H_ftg-zOwgTwWnu41oIdo,23555
9
+ vision_agent/agent/vision_agent_coder.py,sha256=ANwUuCO4JpTYJs4s6ynSRFcdjZFUVuSoSfcqp8ZQDDQ,27451
10
10
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
11
11
  vision_agent/agent/vision_agent_coder_prompts_v2.py,sha256=9v5HwbNidSzYUEFl6ZMniWWOmyLITM_moWLtKVaTen8,4845
12
12
  vision_agent/agent/vision_agent_coder_v2.py,sha256=SVIJC0N5TBgq9z-F99UebLimRuQuAe_HHvTFupBzVfo,14715
13
- vision_agent/agent/vision_agent_planner.py,sha256=F_5opnc0XmQmNH40rs2T7DFrai4CC6aDYe02Z8e93AM,18875
13
+ vision_agent/agent/vision_agent_planner.py,sha256=KWMA7XemcSmc_jn-MwdWz9wnKDtj-sYQ9tINi70_OoU,18583
14
14
  vision_agent/agent/vision_agent_planner_prompts.py,sha256=Y3jz9HRf8fz9NLUseN7cTgZqewP0RazxR7vw1sPhcn0,6691
15
15
  vision_agent/agent/vision_agent_planner_prompts_v2.py,sha256=Tzon3h5iZdHJglesk8GVS-2myNf5-fhf7HUbkpZWHQk,33143
16
16
  vision_agent/agent/vision_agent_planner_v2.py,sha256=mxQxD_B8sKYharh8e7W0uc1tN11YCztyLowc83seScc,17023
@@ -26,7 +26,7 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
26
26
  vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
27
27
  vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
28
28
  vision_agent/tools/__init__.py,sha256=xuNt5e4syQH28Vr6EdjLmO9ni9i00yav9yqcPMUx1oo,2878
29
- vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
29
+ vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
30
30
  vision_agent/tools/planner_tools.py,sha256=FROahw_6Taqvytv6pOjCHUEypOfjsi_f8Vo1c5vz6Mw,8823
31
31
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
32
32
  vision_agent/tools/tool_utils.py,sha256=GDGOmBCo4UfYz-DJ-olREJHPsqs5mzHu0YXiAnpNE8E,10179
@@ -39,7 +39,7 @@ vision_agent/utils/image_utils.py,sha256=rRWcxKggPXIRXIY_XT9rZt30ECDRq8zq7FDeXRD
39
39
  vision_agent/utils/sim.py,sha256=NZc9QGD6BTY5O29NVbHH7oxDePL_QMnylT1lYcDUn1Y,7437
40
40
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
41
41
  vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
42
- vision_agent-0.2.200.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
43
- vision_agent-0.2.200.dist-info/METADATA,sha256=goRTW73tD79-UlJiy4cL0twnVYm9iSjU9f5HsC4A1ZI,19026
44
- vision_agent-0.2.200.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
45
- vision_agent-0.2.200.dist-info/RECORD,,
42
+ vision_agent-0.2.201.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
43
+ vision_agent-0.2.201.dist-info/METADATA,sha256=Vbdn9gqa9uz0RTRV9SMvNgPQbqLGmgQJKUtuEe1buI0,19026
44
+ vision_agent-0.2.201.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
45
+ vision_agent-0.2.201.dist-info/RECORD,,