vision-agent 0.2.118__py3-none-any.whl → 0.2.119__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/agent.py +1 -1
- vision_agent/agent/vision_agent.py +107 -49
- vision_agent/agent/vision_agent_coder.py +2 -2
- vision_agent/agent/vision_agent_prompts.py +43 -22
- vision_agent/clients/landing_public_api.py +2 -2
- vision_agent/lmm/lmm.py +4 -2
- vision_agent/lmm/types.py +3 -1
- vision_agent/tools/__init__.py +2 -2
- vision_agent/tools/meta_tools.py +281 -273
- vision_agent/tools/tools.py +3 -3
- vision_agent/tools/tools_types.py +3 -3
- vision_agent/utils/execute.py +69 -22
- vision_agent/utils/image_utils.py +2 -2
- {vision_agent-0.2.118.dist-info → vision_agent-0.2.119.dist-info}/METADATA +12 -8
- {vision_agent-0.2.118.dist-info → vision_agent-0.2.119.dist-info}/RECORD +17 -17
- {vision_agent-0.2.118.dist-info → vision_agent-0.2.119.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.118.dist-info → vision_agent-0.2.119.dist-info}/WHEEL +0 -0
vision_agent/tools/tools.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
-
import os
|
2
1
|
import io
|
3
2
|
import json
|
4
3
|
import logging
|
4
|
+
import os
|
5
5
|
import tempfile
|
6
|
+
import urllib.request
|
6
7
|
from importlib import resources
|
7
8
|
from pathlib import Path
|
8
9
|
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
@@ -15,7 +16,6 @@ from moviepy.editor import ImageSequenceClip
|
|
15
16
|
from PIL import Image, ImageDraw, ImageFont
|
16
17
|
from pillow_heif import register_heif_opener # type: ignore
|
17
18
|
from pytube import YouTube # type: ignore
|
18
|
-
import urllib.request
|
19
19
|
|
20
20
|
from vision_agent.clients.landing_public_api import LandingPublicAPI
|
21
21
|
from vision_agent.tools.tool_utils import (
|
@@ -1332,7 +1332,7 @@ def save_video(
|
|
1332
1332
|
video.write_videofile(f.name, codec="libx264")
|
1333
1333
|
f.close()
|
1334
1334
|
_save_video_to_result(f.name)
|
1335
|
-
|
1335
|
+
return f.name
|
1336
1336
|
|
1337
1337
|
|
1338
1338
|
def _save_video_to_result(video_uri: str) -> None:
|
@@ -1,8 +1,8 @@
|
|
1
|
-
from uuid import UUID
|
2
1
|
from enum import Enum
|
3
|
-
from typing import List,
|
2
|
+
from typing import List, Optional, Tuple
|
3
|
+
from uuid import UUID
|
4
4
|
|
5
|
-
from pydantic import BaseModel, ConfigDict, Field,
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, SerializationInfo, field_serializer
|
6
6
|
|
7
7
|
|
8
8
|
class BboxInput(BaseModel):
|
vision_agent/utils/execute.py
CHANGED
@@ -5,7 +5,6 @@ import os
|
|
5
5
|
import platform
|
6
6
|
import re
|
7
7
|
import sys
|
8
|
-
import tempfile
|
9
8
|
import traceback
|
10
9
|
import warnings
|
11
10
|
from enum import Enum
|
@@ -40,6 +39,7 @@ from vision_agent.utils.exceptions import (
|
|
40
39
|
load_dotenv()
|
41
40
|
_LOGGER = logging.getLogger(__name__)
|
42
41
|
_SESSION_TIMEOUT = 600 # 10 minutes
|
42
|
+
WORKSPACE = Path(os.getenv("WORKSPACE", ""))
|
43
43
|
|
44
44
|
|
45
45
|
class MimeType(str, Enum):
|
@@ -384,8 +384,15 @@ class Execution(BaseModel):
|
|
384
384
|
class CodeInterpreter(abc.ABC):
|
385
385
|
"""Code interpreter interface."""
|
386
386
|
|
387
|
-
def __init__(
|
387
|
+
def __init__(
|
388
|
+
self,
|
389
|
+
timeout: int,
|
390
|
+
remote_path: Optional[Union[str, Path]] = None,
|
391
|
+
*args: Any,
|
392
|
+
**kwargs: Any,
|
393
|
+
) -> None:
|
388
394
|
self.timeout = timeout
|
395
|
+
self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
|
389
396
|
|
390
397
|
def __enter__(self) -> Self:
|
391
398
|
return self
|
@@ -406,17 +413,21 @@ class CodeInterpreter(abc.ABC):
|
|
406
413
|
self.restart_kernel()
|
407
414
|
return self.exec_cell(code)
|
408
415
|
|
409
|
-
def upload_file(self, file: Union[str, Path]) ->
|
416
|
+
def upload_file(self, file: Union[str, Path]) -> Path:
|
410
417
|
# Default behavior is a no-op (for local code interpreter)
|
411
|
-
return
|
418
|
+
return Path(file)
|
412
419
|
|
413
|
-
def download_file(
|
420
|
+
def download_file(
|
421
|
+
self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
|
422
|
+
) -> Path:
|
414
423
|
# Default behavior is a no-op (for local code interpreter)
|
415
|
-
return Path(
|
424
|
+
return Path(local_file_path)
|
416
425
|
|
417
426
|
|
418
427
|
class E2BCodeInterpreter(CodeInterpreter):
|
419
|
-
def __init__(
|
428
|
+
def __init__(
|
429
|
+
self, remote_path: Optional[Union[str, Path]] = None, *args: Any, **kwargs: Any
|
430
|
+
) -> None:
|
420
431
|
super().__init__(*args, **kwargs)
|
421
432
|
assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
|
422
433
|
try:
|
@@ -443,6 +454,9 @@ print(f"Vision Agent version: {va_version}")"""
|
|
443
454
|
_LOGGER.info(
|
444
455
|
f"E2BCodeInterpreter (sandbox id: {self.interpreter.sandbox_id}) initialized:\n{sys_versions}"
|
445
456
|
)
|
457
|
+
self.remote_path = Path(
|
458
|
+
remote_path if remote_path is not None else "/home/user"
|
459
|
+
)
|
446
460
|
|
447
461
|
def close(self, *args: Any, **kwargs: Any) -> None:
|
448
462
|
try:
|
@@ -516,19 +530,22 @@ print(f"Vision Agent version: {va_version}")"""
|
|
516
530
|
before_sleep=tenacity.before_sleep_log(_LOGGER, logging.INFO),
|
517
531
|
after=tenacity.after_log(_LOGGER, logging.INFO),
|
518
532
|
)
|
519
|
-
def upload_file(self, file: Union[str, Path]) ->
|
533
|
+
def upload_file(self, file: Union[str, Path]) -> Path:
|
520
534
|
file_name = Path(file).name
|
521
|
-
remote_path = f"/home/user/{file_name}"
|
522
535
|
with open(file, "rb") as f:
|
523
|
-
self.interpreter.files.write(path=remote_path, data=f)
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
def download_file(
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
536
|
+
self.interpreter.files.write(path=str(self.remote_path / file_name), data=f)
|
537
|
+
_LOGGER.info(f"File ({file}) is uploaded to: {str(self.remote_path)}")
|
538
|
+
return self.remote_path / file_name
|
539
|
+
|
540
|
+
def download_file(
|
541
|
+
self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
|
542
|
+
) -> Path:
|
543
|
+
with open(local_file_path, "w+b") as f:
|
544
|
+
f.write(
|
545
|
+
self.interpreter.files.read(path=str(remote_file_path), format="bytes")
|
546
|
+
)
|
547
|
+
_LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
|
548
|
+
return Path(local_file_path)
|
532
549
|
|
533
550
|
@staticmethod
|
534
551
|
def _new_e2b_interpreter_impl(*args, **kwargs) -> E2BCodeInterpreterImpl: # type: ignore
|
@@ -540,7 +557,11 @@ print(f"Vision Agent version: {va_version}")"""
|
|
540
557
|
|
541
558
|
|
542
559
|
class LocalCodeInterpreter(CodeInterpreter):
|
543
|
-
def __init__(
|
560
|
+
def __init__(
|
561
|
+
self,
|
562
|
+
timeout: int = _SESSION_TIMEOUT,
|
563
|
+
remote_path: Optional[Union[str, Path]] = None,
|
564
|
+
) -> None:
|
544
565
|
super().__init__(timeout=timeout)
|
545
566
|
self.nb = nbformat.v4.new_notebook()
|
546
567
|
self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
|
@@ -554,6 +575,7 @@ Timeout: {self.timeout}"""
|
|
554
575
|
)
|
555
576
|
sleep(1)
|
556
577
|
self._new_kernel()
|
578
|
+
self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
|
557
579
|
|
558
580
|
def _new_kernel(self) -> None:
|
559
581
|
if self.nb_client.kc is None or not run_sync(self.nb_client.kc.is_alive)(): # type: ignore
|
@@ -607,6 +629,25 @@ Timeout: {self.timeout}"""
|
|
607
629
|
traceback_raw = traceback.format_exc().splitlines()
|
608
630
|
return Execution.from_exception(e, traceback_raw)
|
609
631
|
|
632
|
+
def upload_file(self, file_path: Union[str, Path]) -> Path:
|
633
|
+
with open(file_path, "rb") as f:
|
634
|
+
contents = f.read()
|
635
|
+
with open(self.remote_path / Path(file_path).name, "wb") as f:
|
636
|
+
f.write(contents)
|
637
|
+
_LOGGER.info(f"File ({file_path}) is uploaded to: {str(self.remote_path)}")
|
638
|
+
|
639
|
+
return Path(self.remote_path / file_path)
|
640
|
+
|
641
|
+
def download_file(
|
642
|
+
self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
|
643
|
+
) -> Path:
|
644
|
+
with open(self.remote_path / remote_file_path, "rb") as f:
|
645
|
+
contents = f.read()
|
646
|
+
with open(local_file_path, "wb") as f:
|
647
|
+
f.write(contents)
|
648
|
+
_LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
|
649
|
+
return Path(local_file_path)
|
650
|
+
|
610
651
|
|
611
652
|
class CodeInterpreterFactory:
|
612
653
|
"""Factory class for creating code interpreters.
|
@@ -630,13 +671,19 @@ class CodeInterpreterFactory:
|
|
630
671
|
return instance
|
631
672
|
|
632
673
|
@staticmethod
|
633
|
-
def new_instance(
|
674
|
+
def new_instance(
|
675
|
+
code_sandbox_runtime: Optional[str] = None, remote_path: Optional[str] = None
|
676
|
+
) -> CodeInterpreter:
|
634
677
|
if not code_sandbox_runtime:
|
635
678
|
code_sandbox_runtime = os.getenv("CODE_SANDBOX_RUNTIME", "local")
|
636
679
|
if code_sandbox_runtime == "e2b":
|
637
|
-
instance: CodeInterpreter = E2BCodeInterpreter(
|
680
|
+
instance: CodeInterpreter = E2BCodeInterpreter(
|
681
|
+
timeout=_SESSION_TIMEOUT, remote_path=remote_path
|
682
|
+
)
|
638
683
|
elif code_sandbox_runtime == "local":
|
639
|
-
instance = LocalCodeInterpreter(
|
684
|
+
instance = LocalCodeInterpreter(
|
685
|
+
timeout=_SESSION_TIMEOUT, remote_path=remote_path
|
686
|
+
)
|
640
687
|
else:
|
641
688
|
raise ValueError(
|
642
689
|
f"Unsupported code sandbox runtime: {code_sandbox_runtime}. Supported runtimes: e2b, local"
|
@@ -70,7 +70,7 @@ def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
|
|
70
70
|
r"""Decode a run-length encoded mask. Returns numpy array, 1 - mask, 0 - background.
|
71
71
|
|
72
72
|
Parameters:
|
73
|
-
|
73
|
+
rle: The run-length encoded mask.
|
74
74
|
"""
|
75
75
|
size = rle["size"]
|
76
76
|
counts = rle["counts"]
|
@@ -100,7 +100,7 @@ def frames_to_bytes(
|
|
100
100
|
"""
|
101
101
|
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
|
102
102
|
clip = ImageSequenceClip(frames, fps=fps)
|
103
|
-
clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps)
|
103
|
+
clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps, codec="libx264")
|
104
104
|
with open(temp_file.name + f".{file_ext}", "rb") as f:
|
105
105
|
buffer_bytes = f.read()
|
106
106
|
return buffer_bytes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.119
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -81,15 +81,15 @@ export OPENAI_API_KEY="your-api-key"
|
|
81
81
|
```
|
82
82
|
|
83
83
|
### Vision Agent
|
84
|
-
There are two agents that you can use.
|
84
|
+
There are two agents that you can use. `VisionAgent` is a conversational agent that has
|
85
85
|
access to tools that allow it to write an navigate python code and file systems. It can
|
86
|
-
converse with the user in natural language. VisionAgentCoder is an agent
|
87
|
-
code for vision tasks, such as counting people in an image. However, it
|
88
|
-
and can only respond with code. VisionAgent can call
|
89
|
-
code.
|
86
|
+
converse with the user in natural language. `VisionAgentCoder` is an agent specifically
|
87
|
+
for writing code for vision tasks, such as counting people in an image. However, it
|
88
|
+
cannot chat with you and can only respond with code. `VisionAgent` can call
|
89
|
+
`VisionAgentCoder` to write vision code.
|
90
90
|
|
91
91
|
#### Basic Usage
|
92
|
-
To run the streamlit app locally to chat with
|
92
|
+
To run the streamlit app locally to chat with `VisionAgent`, you can run the following
|
93
93
|
command:
|
94
94
|
|
95
95
|
```bash
|
@@ -186,7 +186,7 @@ the code and having it update. You just need to add the code as a response from
|
|
186
186
|
assistant:
|
187
187
|
|
188
188
|
```python
|
189
|
-
agent = va.agent.
|
189
|
+
agent = va.agent.VisionAgentCoder(verbosity=2)
|
190
190
|
conv = [
|
191
191
|
{
|
192
192
|
"role": "user",
|
@@ -252,6 +252,10 @@ function. Make sure the documentation is in the same format above with descripti
|
|
252
252
|
`Parameters:`, `Returns:`, and `Example\n-------`. You can find an example use case
|
253
253
|
[here](examples/custom_tools/) as this is what the agent uses to pick and use the tool.
|
254
254
|
|
255
|
+
Can't find the tool you need and want add it to `VisionAgent`? Check out our
|
256
|
+
[vision-agent-tools](https://github.com/landing-ai/vision-agent-tools) repository where
|
257
|
+
we add the source code for all the tools used in `VisionAgent`.
|
258
|
+
|
255
259
|
## Additional Backends
|
256
260
|
### Ollama
|
257
261
|
We also provide a `VisionAgentCoder` that uses Ollama. To get started you must download
|
@@ -1,33 +1,33 @@
|
|
1
1
|
vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
2
2
|
vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
|
3
|
-
vision_agent/agent/agent.py,sha256=
|
3
|
+
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
6
|
-
vision_agent/agent/vision_agent_coder.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=IEyXT_JPCuWmBHdEnM1Wrsj7hmCe5pKLf0gnZFJTddI,11046
|
6
|
+
vision_agent/agent/vision_agent_coder.py,sha256=DOTmDdGPxcI06Jp6yx4ekRMP0vhiVaK9B9Dl8UyJHeo,34396
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=xIya1txRZM8qoQHAWTEkEFCL8L3iZD7QD09t3ZtdxSE,11305
|
8
|
-
vision_agent/agent/vision_agent_prompts.py,sha256=
|
8
|
+
vision_agent/agent/vision_agent_prompts.py,sha256=0GliXFtBf32aPu2ClU63FI5ii5CTxWYsvrsmnnDp-gs,7134
|
9
9
|
vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
|
11
|
-
vision_agent/clients/landing_public_api.py,sha256=
|
11
|
+
vision_agent/clients/landing_public_api.py,sha256=rGtACkr8o5egDuMHQ5MBO4NuvsgPTp9Ew3rbq4R-vs0,1507
|
12
12
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
14
14
|
vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
|
15
|
-
vision_agent/lmm/lmm.py,sha256=
|
16
|
-
vision_agent/lmm/types.py,sha256=
|
17
|
-
vision_agent/tools/__init__.py,sha256=
|
18
|
-
vision_agent/tools/meta_tools.py,sha256=
|
15
|
+
vision_agent/lmm/lmm.py,sha256=AYrZNdhghG293wd3aKZ1jK1lUm2NLWwALktbM4wNais,20862
|
16
|
+
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
17
|
+
vision_agent/tools/__init__.py,sha256=i7JOLxRaLdcY7-vCNOGAeOFMBfiAUIwWhnT32FO97VE,2201
|
18
|
+
vision_agent/tools/meta_tools.py,sha256=Vu9WnKicGhafx9dPzDbQjQdcIzRCYYFPF68o79hDP-8,14616
|
19
19
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
20
|
vision_agent/tools/tool_utils.py,sha256=qMsb9d8QtpXGgF9rpPO2dA390BewKdYO68oWKDu-TGg,6504
|
21
|
-
vision_agent/tools/tools.py,sha256=
|
22
|
-
vision_agent/tools/tools_types.py,sha256=
|
21
|
+
vision_agent/tools/tools.py,sha256=kbbMToAaHxl42dDEvyz9Mvtpqts0l0hGoC5YQQyozr8,59953
|
22
|
+
vision_agent/tools/tools_types.py,sha256=iLWSirheC87fKQolIhx_O4Jk8Lv7DRiLuE8PJqLGiVQ,2216
|
23
23
|
vision_agent/utils/__init__.py,sha256=pWk0ktvR4aUEhuEIzSLM9kSgW4WDVqptdvOTeGLkJ6M,230
|
24
24
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
25
|
-
vision_agent/utils/execute.py,sha256=
|
26
|
-
vision_agent/utils/image_utils.py,sha256=
|
25
|
+
vision_agent/utils/execute.py,sha256=Ap8Yx80spQq5f2QtKGx1MK03BR45mJKhlp1kfh-rIao,26751
|
26
|
+
vision_agent/utils/image_utils.py,sha256=eNghu_2L8624jEXy8ZZS9OX46Mv0DT9bcvLForujwTs,9848
|
27
27
|
vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.119.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.119.dist-info/METADATA,sha256=ag8Cf800dZJtJqJtwEcf4gqf7Qjf-K1JMoeisDI7RWQ,12255
|
32
|
+
vision_agent-0.2.119.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.119.dist-info/RECORD,,
|
File without changes
|
File without changes
|