vision-agent 0.2.118__py3-none-any.whl → 0.2.120__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,8 +1,9 @@
1
- import os
2
1
  import io
3
2
  import json
4
3
  import logging
4
+ import os
5
5
  import tempfile
6
+ import urllib.request
6
7
  from importlib import resources
7
8
  from pathlib import Path
8
9
  from typing import Any, Dict, List, Optional, Tuple, Union, cast
@@ -15,7 +16,6 @@ from moviepy.editor import ImageSequenceClip
15
16
  from PIL import Image, ImageDraw, ImageFont
16
17
  from pillow_heif import register_heif_opener # type: ignore
17
18
  from pytube import YouTube # type: ignore
18
- import urllib.request
19
19
 
20
20
  from vision_agent.clients.landing_public_api import LandingPublicAPI
21
21
  from vision_agent.tools.tool_utils import (
@@ -1332,7 +1332,7 @@ def save_video(
1332
1332
  video.write_videofile(f.name, codec="libx264")
1333
1333
  f.close()
1334
1334
  _save_video_to_result(f.name)
1335
- return f.name
1335
+ return f.name
1336
1336
 
1337
1337
 
1338
1338
  def _save_video_to_result(video_uri: str) -> None:
@@ -1,8 +1,8 @@
1
- from uuid import UUID
2
1
  from enum import Enum
3
- from typing import List, Tuple, Optional
2
+ from typing import List, Optional, Tuple
3
+ from uuid import UUID
4
4
 
5
- from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
5
+ from pydantic import BaseModel, ConfigDict, Field, SerializationInfo, field_serializer
6
6
 
7
7
 
8
8
  class BboxInput(BaseModel):
@@ -5,7 +5,6 @@ import os
5
5
  import platform
6
6
  import re
7
7
  import sys
8
- import tempfile
9
8
  import traceback
10
9
  import warnings
11
10
  from enum import Enum
@@ -40,6 +39,7 @@ from vision_agent.utils.exceptions import (
40
39
  load_dotenv()
41
40
  _LOGGER = logging.getLogger(__name__)
42
41
  _SESSION_TIMEOUT = 600 # 10 minutes
42
+ WORKSPACE = Path(os.getenv("WORKSPACE", ""))
43
43
 
44
44
 
45
45
  class MimeType(str, Enum):
@@ -384,8 +384,15 @@ class Execution(BaseModel):
384
384
  class CodeInterpreter(abc.ABC):
385
385
  """Code interpreter interface."""
386
386
 
387
- def __init__(self, timeout: int, *args: Any, **kwargs: Any) -> None:
387
+ def __init__(
388
+ self,
389
+ timeout: int,
390
+ remote_path: Optional[Union[str, Path]] = None,
391
+ *args: Any,
392
+ **kwargs: Any,
393
+ ) -> None:
388
394
  self.timeout = timeout
395
+ self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
389
396
 
390
397
  def __enter__(self) -> Self:
391
398
  return self
@@ -406,17 +413,21 @@ class CodeInterpreter(abc.ABC):
406
413
  self.restart_kernel()
407
414
  return self.exec_cell(code)
408
415
 
409
- def upload_file(self, file: Union[str, Path]) -> str:
416
+ def upload_file(self, file: Union[str, Path]) -> Path:
410
417
  # Default behavior is a no-op (for local code interpreter)
411
- return str(file)
418
+ return Path(file)
412
419
 
413
- def download_file(self, file_path: str) -> Path:
420
+ def download_file(
421
+ self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
422
+ ) -> Path:
414
423
  # Default behavior is a no-op (for local code interpreter)
415
- return Path(file_path)
424
+ return Path(local_file_path)
416
425
 
417
426
 
418
427
  class E2BCodeInterpreter(CodeInterpreter):
419
- def __init__(self, *args: Any, **kwargs: Any) -> None:
428
+ def __init__(
429
+ self, remote_path: Optional[Union[str, Path]] = None, *args: Any, **kwargs: Any
430
+ ) -> None:
420
431
  super().__init__(*args, **kwargs)
421
432
  assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
422
433
  try:
@@ -443,6 +454,9 @@ print(f"Vision Agent version: {va_version}")"""
443
454
  _LOGGER.info(
444
455
  f"E2BCodeInterpreter (sandbox id: {self.interpreter.sandbox_id}) initialized:\n{sys_versions}"
445
456
  )
457
+ self.remote_path = Path(
458
+ remote_path if remote_path is not None else "/home/user"
459
+ )
446
460
 
447
461
  def close(self, *args: Any, **kwargs: Any) -> None:
448
462
  try:
@@ -516,19 +530,22 @@ print(f"Vision Agent version: {va_version}")"""
516
530
  before_sleep=tenacity.before_sleep_log(_LOGGER, logging.INFO),
517
531
  after=tenacity.after_log(_LOGGER, logging.INFO),
518
532
  )
519
- def upload_file(self, file: Union[str, Path]) -> str:
533
+ def upload_file(self, file: Union[str, Path]) -> Path:
520
534
  file_name = Path(file).name
521
- remote_path = f"/home/user/{file_name}"
522
535
  with open(file, "rb") as f:
523
- self.interpreter.files.write(path=remote_path, data=f)
524
- _LOGGER.info(f"File ({file}) is uploaded to: {remote_path}")
525
- return remote_path
526
-
527
- def download_file(self, file_path: str) -> Path:
528
- with tempfile.NamedTemporaryFile(mode="w+b", delete=False) as file:
529
- file.write(self.interpreter.files.read(path=file_path, format="bytes"))
530
- _LOGGER.info(f"File ({file_path}) is downloaded to: {file.name}")
531
- return Path(file.name)
536
+ self.interpreter.files.write(path=str(self.remote_path / file_name), data=f)
537
+ _LOGGER.info(f"File ({file}) is uploaded to: {str(self.remote_path)}")
538
+ return self.remote_path / file_name
539
+
540
+ def download_file(
541
+ self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
542
+ ) -> Path:
543
+ with open(local_file_path, "w+b") as f:
544
+ f.write(
545
+ self.interpreter.files.read(path=str(remote_file_path), format="bytes")
546
+ )
547
+ _LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
548
+ return Path(local_file_path)
532
549
 
533
550
  @staticmethod
534
551
  def _new_e2b_interpreter_impl(*args, **kwargs) -> E2BCodeInterpreterImpl: # type: ignore
@@ -540,7 +557,11 @@ print(f"Vision Agent version: {va_version}")"""
540
557
 
541
558
 
542
559
  class LocalCodeInterpreter(CodeInterpreter):
543
- def __init__(self, timeout: int = _SESSION_TIMEOUT) -> None:
560
+ def __init__(
561
+ self,
562
+ timeout: int = _SESSION_TIMEOUT,
563
+ remote_path: Optional[Union[str, Path]] = None,
564
+ ) -> None:
544
565
  super().__init__(timeout=timeout)
545
566
  self.nb = nbformat.v4.new_notebook()
546
567
  self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
@@ -554,6 +575,7 @@ Timeout: {self.timeout}"""
554
575
  )
555
576
  sleep(1)
556
577
  self._new_kernel()
578
+ self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
557
579
 
558
580
  def _new_kernel(self) -> None:
559
581
  if self.nb_client.kc is None or not run_sync(self.nb_client.kc.is_alive)(): # type: ignore
@@ -607,6 +629,25 @@ Timeout: {self.timeout}"""
607
629
  traceback_raw = traceback.format_exc().splitlines()
608
630
  return Execution.from_exception(e, traceback_raw)
609
631
 
632
+ def upload_file(self, file_path: Union[str, Path]) -> Path:
633
+ with open(file_path, "rb") as f:
634
+ contents = f.read()
635
+ with open(self.remote_path / Path(file_path).name, "wb") as f:
636
+ f.write(contents)
637
+ _LOGGER.info(f"File ({file_path}) is uploaded to: {str(self.remote_path)}")
638
+
639
+ return Path(self.remote_path / file_path)
640
+
641
+ def download_file(
642
+ self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
643
+ ) -> Path:
644
+ with open(self.remote_path / remote_file_path, "rb") as f:
645
+ contents = f.read()
646
+ with open(local_file_path, "wb") as f:
647
+ f.write(contents)
648
+ _LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
649
+ return Path(local_file_path)
650
+
610
651
 
611
652
  class CodeInterpreterFactory:
612
653
  """Factory class for creating code interpreters.
@@ -630,13 +671,19 @@ class CodeInterpreterFactory:
630
671
  return instance
631
672
 
632
673
  @staticmethod
633
- def new_instance(code_sandbox_runtime: Optional[str] = None) -> CodeInterpreter:
674
+ def new_instance(
675
+ code_sandbox_runtime: Optional[str] = None, remote_path: Optional[str] = None
676
+ ) -> CodeInterpreter:
634
677
  if not code_sandbox_runtime:
635
678
  code_sandbox_runtime = os.getenv("CODE_SANDBOX_RUNTIME", "local")
636
679
  if code_sandbox_runtime == "e2b":
637
- instance: CodeInterpreter = E2BCodeInterpreter(timeout=_SESSION_TIMEOUT)
680
+ instance: CodeInterpreter = E2BCodeInterpreter(
681
+ timeout=_SESSION_TIMEOUT, remote_path=remote_path
682
+ )
638
683
  elif code_sandbox_runtime == "local":
639
- instance = LocalCodeInterpreter(timeout=_SESSION_TIMEOUT)
684
+ instance = LocalCodeInterpreter(
685
+ timeout=_SESSION_TIMEOUT, remote_path=remote_path
686
+ )
640
687
  else:
641
688
  raise ValueError(
642
689
  f"Unsupported code sandbox runtime: {code_sandbox_runtime}. Supported runtimes: e2b, local"
@@ -70,7 +70,7 @@ def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
70
70
  r"""Decode a run-length encoded mask. Returns numpy array, 1 - mask, 0 - background.
71
71
 
72
72
  Parameters:
73
- mask: The mask in run-length encoded as an array.
73
+ rle: The run-length encoded mask.
74
74
  """
75
75
  size = rle["size"]
76
76
  counts = rle["counts"]
@@ -100,7 +100,7 @@ def frames_to_bytes(
100
100
  """
101
101
  with tempfile.NamedTemporaryFile(delete=True) as temp_file:
102
102
  clip = ImageSequenceClip(frames, fps=fps)
103
- clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps)
103
+ clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps, codec="libx264")
104
104
  with open(temp_file.name + f".{file_ext}", "rb") as f:
105
105
  buffer_bytes = f.read()
106
106
  return buffer_bytes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.118
3
+ Version: 0.2.120
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -81,15 +81,15 @@ export OPENAI_API_KEY="your-api-key"
81
81
  ```
82
82
 
83
83
  ### Vision Agent
84
- There are two agents that you can use. Vision Agent is a conversational agent that has
84
+ There are two agents that you can use. `VisionAgent` is a conversational agent that has
85
85
  access to tools that allow it to write an navigate python code and file systems. It can
86
- converse with the user in natural language. VisionAgentCoder is an agent that can write
87
- code for vision tasks, such as counting people in an image. However, it cannot converse
88
- and can only respond with code. VisionAgent can call VisionAgentCoder to write vision
89
- code.
86
+ converse with the user in natural language. `VisionAgentCoder` is an agent specifically
87
+ for writing code for vision tasks, such as counting people in an image. However, it
88
+ cannot chat with you and can only respond with code. `VisionAgent` can call
89
+ `VisionAgentCoder` to write vision code.
90
90
 
91
91
  #### Basic Usage
92
- To run the streamlit app locally to chat with Vision Agent, you can run the following
92
+ To run the streamlit app locally to chat with `VisionAgent`, you can run the following
93
93
  command:
94
94
 
95
95
  ```bash
@@ -186,7 +186,7 @@ the code and having it update. You just need to add the code as a response from
186
186
  assistant:
187
187
 
188
188
  ```python
189
- agent = va.agent.VisionAgent(verbosity=2)
189
+ agent = va.agent.VisionAgentCoder(verbosity=2)
190
190
  conv = [
191
191
  {
192
192
  "role": "user",
@@ -252,6 +252,10 @@ function. Make sure the documentation is in the same format above with descripti
252
252
  `Parameters:`, `Returns:`, and `Example\n-------`. You can find an example use case
253
253
  [here](examples/custom_tools/) as this is what the agent uses to pick and use the tool.
254
254
 
255
+ Can't find the tool you need and want add it to `VisionAgent`? Check out our
256
+ [vision-agent-tools](https://github.com/landing-ai/vision-agent-tools) repository where
257
+ we add the source code for all the tools used in `VisionAgent`.
258
+
255
259
  ## Additional Backends
256
260
  ### Ollama
257
261
  We also provide a `VisionAgentCoder` that uses Ollama. To get started you must download
@@ -1,33 +1,33 @@
1
1
  vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
3
- vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
3
+ vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
4
4
  vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
5
- vision_agent/agent/vision_agent.py,sha256=5rgO-pScVOS3t4sWnLBnGYYkGftGgF4U0FpZzFVrDAY,8447
6
- vision_agent/agent/vision_agent_coder.py,sha256=tE-15ttnDxUsEdB0XJP4AVNyOU89KS8ZvXZDPcNKA-8,34380
5
+ vision_agent/agent/vision_agent.py,sha256=IEyXT_JPCuWmBHdEnM1Wrsj7hmCe5pKLf0gnZFJTddI,11046
6
+ vision_agent/agent/vision_agent_coder.py,sha256=DOTmDdGPxcI06Jp6yx4ekRMP0vhiVaK9B9Dl8UyJHeo,34396
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=xIya1txRZM8qoQHAWTEkEFCL8L3iZD7QD09t3ZtdxSE,11305
8
- vision_agent/agent/vision_agent_prompts.py,sha256=ydUU_Wvw-jqdL_vObSUr-VCQvjSwA5Fd74TbbhUzyxk,6112
8
+ vision_agent/agent/vision_agent_prompts.py,sha256=0GliXFtBf32aPu2ClU63FI5ii5CTxWYsvrsmnnDp-gs,7134
9
9
  vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
11
- vision_agent/clients/landing_public_api.py,sha256=6L15zh5lP5JHCpGnYpHMREgrrKiJin_OYdf2vT9HHZQ,1507
11
+ vision_agent/clients/landing_public_api.py,sha256=rGtACkr8o5egDuMHQ5MBO4NuvsgPTp9Ew3rbq4R-vs0,1507
12
12
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
14
  vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
- vision_agent/lmm/lmm.py,sha256=xkAxunToISzo5rCcjekqQBvm5SRW-98htieLuztKNbk,20802
16
- vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
17
- vision_agent/tools/__init__.py,sha256=lUUc2HV13eSxg5KPZop1D-mB4ecmiQ5fYlBTQLNSbYg,2190
18
- vision_agent/tools/meta_tools.py,sha256=q6h7hZarZrsWRloVE6PbTZwW8J2N1uUM9Ac-XxsT6hk,13365
15
+ vision_agent/lmm/lmm.py,sha256=AYrZNdhghG293wd3aKZ1jK1lUm2NLWwALktbM4wNais,20862
16
+ vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
17
+ vision_agent/tools/__init__.py,sha256=i7JOLxRaLdcY7-vCNOGAeOFMBfiAUIwWhnT32FO97VE,2201
18
+ vision_agent/tools/meta_tools.py,sha256=Vu9WnKicGhafx9dPzDbQjQdcIzRCYYFPF68o79hDP-8,14616
19
19
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
20
20
  vision_agent/tools/tool_utils.py,sha256=qMsb9d8QtpXGgF9rpPO2dA390BewKdYO68oWKDu-TGg,6504
21
- vision_agent/tools/tools.py,sha256=gAW6G9k1vzy8jwRACNnw2Vihsajm_oSlVJqd6E4JSRA,59957
22
- vision_agent/tools/tools_types.py,sha256=z6_XtUhWgh201yM7Z0CYtiLBEGdHPc_QUydMDHZ84EA,2216
21
+ vision_agent/tools/tools.py,sha256=kbbMToAaHxl42dDEvyz9Mvtpqts0l0hGoC5YQQyozr8,59953
22
+ vision_agent/tools/tools_types.py,sha256=iLWSirheC87fKQolIhx_O4Jk8Lv7DRiLuE8PJqLGiVQ,2216
23
23
  vision_agent/utils/__init__.py,sha256=pWk0ktvR4aUEhuEIzSLM9kSgW4WDVqptdvOTeGLkJ6M,230
24
24
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
25
- vision_agent/utils/execute.py,sha256=1_pyu16WRlFD81W8Uy_Sv7_jD_qkrkxHdUNeFstBzaA,25082
26
- vision_agent/utils/image_utils.py,sha256=c1LrmaHD331za8DbA1myJpgUmWoDzePaOK6-dsdpZQo,9847
25
+ vision_agent/utils/execute.py,sha256=Ap8Yx80spQq5f2QtKGx1MK03BR45mJKhlp1kfh-rIao,26751
26
+ vision_agent/utils/image_utils.py,sha256=eNghu_2L8624jEXy8ZZS9OX46Mv0DT9bcvLForujwTs,9848
27
27
  vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
- vision_agent-0.2.118.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.118.dist-info/METADATA,sha256=4ilO7j9MOLCtaNekUUVlhMNdDKMk02ecx7ipnXT9RC8,11997
32
- vision_agent-0.2.118.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.118.dist-info/RECORD,,
30
+ vision_agent-0.2.120.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.120.dist-info/METADATA,sha256=-FuNdlrzt5cTK6Ou_HTTROGVvsIwP3trsB5Edt2St3o,12255
32
+ vision_agent-0.2.120.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.120.dist-info/RECORD,,