vision-agent 0.2.118__py3-none-any.whl → 0.2.119__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,9 @@
1
- import os
2
1
  import io
3
2
  import json
4
3
  import logging
4
+ import os
5
5
  import tempfile
6
+ import urllib.request
6
7
  from importlib import resources
7
8
  from pathlib import Path
8
9
  from typing import Any, Dict, List, Optional, Tuple, Union, cast
@@ -15,7 +16,6 @@ from moviepy.editor import ImageSequenceClip
15
16
  from PIL import Image, ImageDraw, ImageFont
16
17
  from pillow_heif import register_heif_opener # type: ignore
17
18
  from pytube import YouTube # type: ignore
18
- import urllib.request
19
19
 
20
20
  from vision_agent.clients.landing_public_api import LandingPublicAPI
21
21
  from vision_agent.tools.tool_utils import (
@@ -1332,7 +1332,7 @@ def save_video(
1332
1332
  video.write_videofile(f.name, codec="libx264")
1333
1333
  f.close()
1334
1334
  _save_video_to_result(f.name)
1335
- return f.name
1335
+ return f.name
1336
1336
 
1337
1337
 
1338
1338
  def _save_video_to_result(video_uri: str) -> None:
@@ -1,8 +1,8 @@
1
- from uuid import UUID
2
1
  from enum import Enum
3
- from typing import List, Tuple, Optional
2
+ from typing import List, Optional, Tuple
3
+ from uuid import UUID
4
4
 
5
- from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
5
+ from pydantic import BaseModel, ConfigDict, Field, SerializationInfo, field_serializer
6
6
 
7
7
 
8
8
  class BboxInput(BaseModel):
@@ -5,7 +5,6 @@ import os
5
5
  import platform
6
6
  import re
7
7
  import sys
8
- import tempfile
9
8
  import traceback
10
9
  import warnings
11
10
  from enum import Enum
@@ -40,6 +39,7 @@ from vision_agent.utils.exceptions import (
40
39
  load_dotenv()
41
40
  _LOGGER = logging.getLogger(__name__)
42
41
  _SESSION_TIMEOUT = 600 # 10 minutes
42
+ WORKSPACE = Path(os.getenv("WORKSPACE", ""))
43
43
 
44
44
 
45
45
  class MimeType(str, Enum):
@@ -384,8 +384,15 @@ class Execution(BaseModel):
384
384
  class CodeInterpreter(abc.ABC):
385
385
  """Code interpreter interface."""
386
386
 
387
- def __init__(self, timeout: int, *args: Any, **kwargs: Any) -> None:
387
+ def __init__(
388
+ self,
389
+ timeout: int,
390
+ remote_path: Optional[Union[str, Path]] = None,
391
+ *args: Any,
392
+ **kwargs: Any,
393
+ ) -> None:
388
394
  self.timeout = timeout
395
+ self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
389
396
 
390
397
  def __enter__(self) -> Self:
391
398
  return self
@@ -406,17 +413,21 @@ class CodeInterpreter(abc.ABC):
406
413
  self.restart_kernel()
407
414
  return self.exec_cell(code)
408
415
 
409
- def upload_file(self, file: Union[str, Path]) -> str:
416
+ def upload_file(self, file: Union[str, Path]) -> Path:
410
417
  # Default behavior is a no-op (for local code interpreter)
411
- return str(file)
418
+ return Path(file)
412
419
 
413
- def download_file(self, file_path: str) -> Path:
420
+ def download_file(
421
+ self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
422
+ ) -> Path:
414
423
  # Default behavior is a no-op (for local code interpreter)
415
- return Path(file_path)
424
+ return Path(local_file_path)
416
425
 
417
426
 
418
427
  class E2BCodeInterpreter(CodeInterpreter):
419
- def __init__(self, *args: Any, **kwargs: Any) -> None:
428
+ def __init__(
429
+ self, remote_path: Optional[Union[str, Path]] = None, *args: Any, **kwargs: Any
430
+ ) -> None:
420
431
  super().__init__(*args, **kwargs)
421
432
  assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
422
433
  try:
@@ -443,6 +454,9 @@ print(f"Vision Agent version: {va_version}")"""
443
454
  _LOGGER.info(
444
455
  f"E2BCodeInterpreter (sandbox id: {self.interpreter.sandbox_id}) initialized:\n{sys_versions}"
445
456
  )
457
+ self.remote_path = Path(
458
+ remote_path if remote_path is not None else "/home/user"
459
+ )
446
460
 
447
461
  def close(self, *args: Any, **kwargs: Any) -> None:
448
462
  try:
@@ -516,19 +530,22 @@ print(f"Vision Agent version: {va_version}")"""
516
530
  before_sleep=tenacity.before_sleep_log(_LOGGER, logging.INFO),
517
531
  after=tenacity.after_log(_LOGGER, logging.INFO),
518
532
  )
519
- def upload_file(self, file: Union[str, Path]) -> str:
533
+ def upload_file(self, file: Union[str, Path]) -> Path:
520
534
  file_name = Path(file).name
521
- remote_path = f"/home/user/{file_name}"
522
535
  with open(file, "rb") as f:
523
- self.interpreter.files.write(path=remote_path, data=f)
524
- _LOGGER.info(f"File ({file}) is uploaded to: {remote_path}")
525
- return remote_path
526
-
527
- def download_file(self, file_path: str) -> Path:
528
- with tempfile.NamedTemporaryFile(mode="w+b", delete=False) as file:
529
- file.write(self.interpreter.files.read(path=file_path, format="bytes"))
530
- _LOGGER.info(f"File ({file_path}) is downloaded to: {file.name}")
531
- return Path(file.name)
536
+ self.interpreter.files.write(path=str(self.remote_path / file_name), data=f)
537
+ _LOGGER.info(f"File ({file}) is uploaded to: {str(self.remote_path)}")
538
+ return self.remote_path / file_name
539
+
540
+ def download_file(
541
+ self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
542
+ ) -> Path:
543
+ with open(local_file_path, "w+b") as f:
544
+ f.write(
545
+ self.interpreter.files.read(path=str(remote_file_path), format="bytes")
546
+ )
547
+ _LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
548
+ return Path(local_file_path)
532
549
 
533
550
  @staticmethod
534
551
  def _new_e2b_interpreter_impl(*args, **kwargs) -> E2BCodeInterpreterImpl: # type: ignore
@@ -540,7 +557,11 @@ print(f"Vision Agent version: {va_version}")"""
540
557
 
541
558
 
542
559
  class LocalCodeInterpreter(CodeInterpreter):
543
- def __init__(self, timeout: int = _SESSION_TIMEOUT) -> None:
560
+ def __init__(
561
+ self,
562
+ timeout: int = _SESSION_TIMEOUT,
563
+ remote_path: Optional[Union[str, Path]] = None,
564
+ ) -> None:
544
565
  super().__init__(timeout=timeout)
545
566
  self.nb = nbformat.v4.new_notebook()
546
567
  self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
@@ -554,6 +575,7 @@ Timeout: {self.timeout}"""
554
575
  )
555
576
  sleep(1)
556
577
  self._new_kernel()
578
+ self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
557
579
 
558
580
  def _new_kernel(self) -> None:
559
581
  if self.nb_client.kc is None or not run_sync(self.nb_client.kc.is_alive)(): # type: ignore
@@ -607,6 +629,25 @@ Timeout: {self.timeout}"""
607
629
  traceback_raw = traceback.format_exc().splitlines()
608
630
  return Execution.from_exception(e, traceback_raw)
609
631
 
632
+ def upload_file(self, file_path: Union[str, Path]) -> Path:
633
+ with open(file_path, "rb") as f:
634
+ contents = f.read()
635
+ with open(self.remote_path / Path(file_path).name, "wb") as f:
636
+ f.write(contents)
637
+ _LOGGER.info(f"File ({file_path}) is uploaded to: {str(self.remote_path)}")
638
+
639
+ return Path(self.remote_path / file_path)
640
+
641
+ def download_file(
642
+ self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
643
+ ) -> Path:
644
+ with open(self.remote_path / remote_file_path, "rb") as f:
645
+ contents = f.read()
646
+ with open(local_file_path, "wb") as f:
647
+ f.write(contents)
648
+ _LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
649
+ return Path(local_file_path)
650
+
610
651
 
611
652
  class CodeInterpreterFactory:
612
653
  """Factory class for creating code interpreters.
@@ -630,13 +671,19 @@ class CodeInterpreterFactory:
630
671
  return instance
631
672
 
632
673
  @staticmethod
633
- def new_instance(code_sandbox_runtime: Optional[str] = None) -> CodeInterpreter:
674
+ def new_instance(
675
+ code_sandbox_runtime: Optional[str] = None, remote_path: Optional[str] = None
676
+ ) -> CodeInterpreter:
634
677
  if not code_sandbox_runtime:
635
678
  code_sandbox_runtime = os.getenv("CODE_SANDBOX_RUNTIME", "local")
636
679
  if code_sandbox_runtime == "e2b":
637
- instance: CodeInterpreter = E2BCodeInterpreter(timeout=_SESSION_TIMEOUT)
680
+ instance: CodeInterpreter = E2BCodeInterpreter(
681
+ timeout=_SESSION_TIMEOUT, remote_path=remote_path
682
+ )
638
683
  elif code_sandbox_runtime == "local":
639
- instance = LocalCodeInterpreter(timeout=_SESSION_TIMEOUT)
684
+ instance = LocalCodeInterpreter(
685
+ timeout=_SESSION_TIMEOUT, remote_path=remote_path
686
+ )
640
687
  else:
641
688
  raise ValueError(
642
689
  f"Unsupported code sandbox runtime: {code_sandbox_runtime}. Supported runtimes: e2b, local"
@@ -70,7 +70,7 @@ def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
70
70
  r"""Decode a run-length encoded mask. Returns numpy array, 1 - mask, 0 - background.
71
71
 
72
72
  Parameters:
73
- mask: The mask in run-length encoded as an array.
73
+ rle: The run-length encoded mask.
74
74
  """
75
75
  size = rle["size"]
76
76
  counts = rle["counts"]
@@ -100,7 +100,7 @@ def frames_to_bytes(
100
100
  """
101
101
  with tempfile.NamedTemporaryFile(delete=True) as temp_file:
102
102
  clip = ImageSequenceClip(frames, fps=fps)
103
- clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps)
103
+ clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps, codec="libx264")
104
104
  with open(temp_file.name + f".{file_ext}", "rb") as f:
105
105
  buffer_bytes = f.read()
106
106
  return buffer_bytes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.118
3
+ Version: 0.2.119
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -81,15 +81,15 @@ export OPENAI_API_KEY="your-api-key"
81
81
  ```
82
82
 
83
83
  ### Vision Agent
84
- There are two agents that you can use. Vision Agent is a conversational agent that has
84
+ There are two agents that you can use. `VisionAgent` is a conversational agent that has
85
85
  access to tools that allow it to write an navigate python code and file systems. It can
86
- converse with the user in natural language. VisionAgentCoder is an agent that can write
87
- code for vision tasks, such as counting people in an image. However, it cannot converse
88
- and can only respond with code. VisionAgent can call VisionAgentCoder to write vision
89
- code.
86
+ converse with the user in natural language. `VisionAgentCoder` is an agent specifically
87
+ for writing code for vision tasks, such as counting people in an image. However, it
88
+ cannot chat with you and can only respond with code. `VisionAgent` can call
89
+ `VisionAgentCoder` to write vision code.
90
90
 
91
91
  #### Basic Usage
92
- To run the streamlit app locally to chat with Vision Agent, you can run the following
92
+ To run the streamlit app locally to chat with `VisionAgent`, you can run the following
93
93
  command:
94
94
 
95
95
  ```bash
@@ -186,7 +186,7 @@ the code and having it update. You just need to add the code as a response from
186
186
  assistant:
187
187
 
188
188
  ```python
189
- agent = va.agent.VisionAgent(verbosity=2)
189
+ agent = va.agent.VisionAgentCoder(verbosity=2)
190
190
  conv = [
191
191
  {
192
192
  "role": "user",
@@ -252,6 +252,10 @@ function. Make sure the documentation is in the same format above with descripti
252
252
  `Parameters:`, `Returns:`, and `Example\n-------`. You can find an example use case
253
253
  [here](examples/custom_tools/) as this is what the agent uses to pick and use the tool.
254
254
 
255
+ Can't find the tool you need and want add it to `VisionAgent`? Check out our
256
+ [vision-agent-tools](https://github.com/landing-ai/vision-agent-tools) repository where
257
+ we add the source code for all the tools used in `VisionAgent`.
258
+
255
259
  ## Additional Backends
256
260
  ### Ollama
257
261
  We also provide a `VisionAgentCoder` that uses Ollama. To get started you must download
@@ -1,33 +1,33 @@
1
1
  vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
3
- vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
3
+ vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
4
4
  vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
5
- vision_agent/agent/vision_agent.py,sha256=5rgO-pScVOS3t4sWnLBnGYYkGftGgF4U0FpZzFVrDAY,8447
6
- vision_agent/agent/vision_agent_coder.py,sha256=tE-15ttnDxUsEdB0XJP4AVNyOU89KS8ZvXZDPcNKA-8,34380
5
+ vision_agent/agent/vision_agent.py,sha256=IEyXT_JPCuWmBHdEnM1Wrsj7hmCe5pKLf0gnZFJTddI,11046
6
+ vision_agent/agent/vision_agent_coder.py,sha256=DOTmDdGPxcI06Jp6yx4ekRMP0vhiVaK9B9Dl8UyJHeo,34396
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=xIya1txRZM8qoQHAWTEkEFCL8L3iZD7QD09t3ZtdxSE,11305
8
- vision_agent/agent/vision_agent_prompts.py,sha256=ydUU_Wvw-jqdL_vObSUr-VCQvjSwA5Fd74TbbhUzyxk,6112
8
+ vision_agent/agent/vision_agent_prompts.py,sha256=0GliXFtBf32aPu2ClU63FI5ii5CTxWYsvrsmnnDp-gs,7134
9
9
  vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
11
- vision_agent/clients/landing_public_api.py,sha256=6L15zh5lP5JHCpGnYpHMREgrrKiJin_OYdf2vT9HHZQ,1507
11
+ vision_agent/clients/landing_public_api.py,sha256=rGtACkr8o5egDuMHQ5MBO4NuvsgPTp9Ew3rbq4R-vs0,1507
12
12
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
14
  vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
- vision_agent/lmm/lmm.py,sha256=xkAxunToISzo5rCcjekqQBvm5SRW-98htieLuztKNbk,20802
16
- vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
17
- vision_agent/tools/__init__.py,sha256=lUUc2HV13eSxg5KPZop1D-mB4ecmiQ5fYlBTQLNSbYg,2190
18
- vision_agent/tools/meta_tools.py,sha256=q6h7hZarZrsWRloVE6PbTZwW8J2N1uUM9Ac-XxsT6hk,13365
15
+ vision_agent/lmm/lmm.py,sha256=AYrZNdhghG293wd3aKZ1jK1lUm2NLWwALktbM4wNais,20862
16
+ vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
17
+ vision_agent/tools/__init__.py,sha256=i7JOLxRaLdcY7-vCNOGAeOFMBfiAUIwWhnT32FO97VE,2201
18
+ vision_agent/tools/meta_tools.py,sha256=Vu9WnKicGhafx9dPzDbQjQdcIzRCYYFPF68o79hDP-8,14616
19
19
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
20
20
  vision_agent/tools/tool_utils.py,sha256=qMsb9d8QtpXGgF9rpPO2dA390BewKdYO68oWKDu-TGg,6504
21
- vision_agent/tools/tools.py,sha256=gAW6G9k1vzy8jwRACNnw2Vihsajm_oSlVJqd6E4JSRA,59957
22
- vision_agent/tools/tools_types.py,sha256=z6_XtUhWgh201yM7Z0CYtiLBEGdHPc_QUydMDHZ84EA,2216
21
+ vision_agent/tools/tools.py,sha256=kbbMToAaHxl42dDEvyz9Mvtpqts0l0hGoC5YQQyozr8,59953
22
+ vision_agent/tools/tools_types.py,sha256=iLWSirheC87fKQolIhx_O4Jk8Lv7DRiLuE8PJqLGiVQ,2216
23
23
  vision_agent/utils/__init__.py,sha256=pWk0ktvR4aUEhuEIzSLM9kSgW4WDVqptdvOTeGLkJ6M,230
24
24
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
25
- vision_agent/utils/execute.py,sha256=1_pyu16WRlFD81W8Uy_Sv7_jD_qkrkxHdUNeFstBzaA,25082
26
- vision_agent/utils/image_utils.py,sha256=c1LrmaHD331za8DbA1myJpgUmWoDzePaOK6-dsdpZQo,9847
25
+ vision_agent/utils/execute.py,sha256=Ap8Yx80spQq5f2QtKGx1MK03BR45mJKhlp1kfh-rIao,26751
26
+ vision_agent/utils/image_utils.py,sha256=eNghu_2L8624jEXy8ZZS9OX46Mv0DT9bcvLForujwTs,9848
27
27
  vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
- vision_agent-0.2.118.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.118.dist-info/METADATA,sha256=4ilO7j9MOLCtaNekUUVlhMNdDKMk02ecx7ipnXT9RC8,11997
32
- vision_agent-0.2.118.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.118.dist-info/RECORD,,
30
+ vision_agent-0.2.119.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.119.dist-info/METADATA,sha256=ag8Cf800dZJtJqJtwEcf4gqf7Qjf-K1JMoeisDI7RWQ,12255
32
+ vision_agent-0.2.119.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.119.dist-info/RECORD,,