vision-agent 0.2.118__py3-none-any.whl → 0.2.119__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/agent/agent.py +1 -1
- vision_agent/agent/vision_agent.py +107 -49
- vision_agent/agent/vision_agent_coder.py +2 -2
- vision_agent/agent/vision_agent_prompts.py +43 -22
- vision_agent/clients/landing_public_api.py +2 -2
- vision_agent/lmm/lmm.py +4 -2
- vision_agent/lmm/types.py +3 -1
- vision_agent/tools/__init__.py +2 -2
- vision_agent/tools/meta_tools.py +281 -273
- vision_agent/tools/tools.py +3 -3
- vision_agent/tools/tools_types.py +3 -3
- vision_agent/utils/execute.py +69 -22
- vision_agent/utils/image_utils.py +2 -2
- {vision_agent-0.2.118.dist-info → vision_agent-0.2.119.dist-info}/METADATA +12 -8
- {vision_agent-0.2.118.dist-info → vision_agent-0.2.119.dist-info}/RECORD +17 -17
- {vision_agent-0.2.118.dist-info → vision_agent-0.2.119.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.118.dist-info → vision_agent-0.2.119.dist-info}/WHEEL +0 -0
vision_agent/tools/tools.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
-
import os
|
2
1
|
import io
|
3
2
|
import json
|
4
3
|
import logging
|
4
|
+
import os
|
5
5
|
import tempfile
|
6
|
+
import urllib.request
|
6
7
|
from importlib import resources
|
7
8
|
from pathlib import Path
|
8
9
|
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
@@ -15,7 +16,6 @@ from moviepy.editor import ImageSequenceClip
|
|
15
16
|
from PIL import Image, ImageDraw, ImageFont
|
16
17
|
from pillow_heif import register_heif_opener # type: ignore
|
17
18
|
from pytube import YouTube # type: ignore
|
18
|
-
import urllib.request
|
19
19
|
|
20
20
|
from vision_agent.clients.landing_public_api import LandingPublicAPI
|
21
21
|
from vision_agent.tools.tool_utils import (
|
@@ -1332,7 +1332,7 @@ def save_video(
|
|
1332
1332
|
video.write_videofile(f.name, codec="libx264")
|
1333
1333
|
f.close()
|
1334
1334
|
_save_video_to_result(f.name)
|
1335
|
-
|
1335
|
+
return f.name
|
1336
1336
|
|
1337
1337
|
|
1338
1338
|
def _save_video_to_result(video_uri: str) -> None:
|
@@ -1,8 +1,8 @@
|
|
1
|
-
from uuid import UUID
|
2
1
|
from enum import Enum
|
3
|
-
from typing import List,
|
2
|
+
from typing import List, Optional, Tuple
|
3
|
+
from uuid import UUID
|
4
4
|
|
5
|
-
from pydantic import BaseModel, ConfigDict, Field,
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, SerializationInfo, field_serializer
|
6
6
|
|
7
7
|
|
8
8
|
class BboxInput(BaseModel):
|
vision_agent/utils/execute.py
CHANGED
@@ -5,7 +5,6 @@ import os
|
|
5
5
|
import platform
|
6
6
|
import re
|
7
7
|
import sys
|
8
|
-
import tempfile
|
9
8
|
import traceback
|
10
9
|
import warnings
|
11
10
|
from enum import Enum
|
@@ -40,6 +39,7 @@ from vision_agent.utils.exceptions import (
|
|
40
39
|
load_dotenv()
|
41
40
|
_LOGGER = logging.getLogger(__name__)
|
42
41
|
_SESSION_TIMEOUT = 600 # 10 minutes
|
42
|
+
WORKSPACE = Path(os.getenv("WORKSPACE", ""))
|
43
43
|
|
44
44
|
|
45
45
|
class MimeType(str, Enum):
|
@@ -384,8 +384,15 @@ class Execution(BaseModel):
|
|
384
384
|
class CodeInterpreter(abc.ABC):
|
385
385
|
"""Code interpreter interface."""
|
386
386
|
|
387
|
-
def __init__(
|
387
|
+
def __init__(
|
388
|
+
self,
|
389
|
+
timeout: int,
|
390
|
+
remote_path: Optional[Union[str, Path]] = None,
|
391
|
+
*args: Any,
|
392
|
+
**kwargs: Any,
|
393
|
+
) -> None:
|
388
394
|
self.timeout = timeout
|
395
|
+
self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
|
389
396
|
|
390
397
|
def __enter__(self) -> Self:
|
391
398
|
return self
|
@@ -406,17 +413,21 @@ class CodeInterpreter(abc.ABC):
|
|
406
413
|
self.restart_kernel()
|
407
414
|
return self.exec_cell(code)
|
408
415
|
|
409
|
-
def upload_file(self, file: Union[str, Path]) ->
|
416
|
+
def upload_file(self, file: Union[str, Path]) -> Path:
|
410
417
|
# Default behavior is a no-op (for local code interpreter)
|
411
|
-
return
|
418
|
+
return Path(file)
|
412
419
|
|
413
|
-
def download_file(
|
420
|
+
def download_file(
|
421
|
+
self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
|
422
|
+
) -> Path:
|
414
423
|
# Default behavior is a no-op (for local code interpreter)
|
415
|
-
return Path(
|
424
|
+
return Path(local_file_path)
|
416
425
|
|
417
426
|
|
418
427
|
class E2BCodeInterpreter(CodeInterpreter):
|
419
|
-
def __init__(
|
428
|
+
def __init__(
|
429
|
+
self, remote_path: Optional[Union[str, Path]] = None, *args: Any, **kwargs: Any
|
430
|
+
) -> None:
|
420
431
|
super().__init__(*args, **kwargs)
|
421
432
|
assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
|
422
433
|
try:
|
@@ -443,6 +454,9 @@ print(f"Vision Agent version: {va_version}")"""
|
|
443
454
|
_LOGGER.info(
|
444
455
|
f"E2BCodeInterpreter (sandbox id: {self.interpreter.sandbox_id}) initialized:\n{sys_versions}"
|
445
456
|
)
|
457
|
+
self.remote_path = Path(
|
458
|
+
remote_path if remote_path is not None else "/home/user"
|
459
|
+
)
|
446
460
|
|
447
461
|
def close(self, *args: Any, **kwargs: Any) -> None:
|
448
462
|
try:
|
@@ -516,19 +530,22 @@ print(f"Vision Agent version: {va_version}")"""
|
|
516
530
|
before_sleep=tenacity.before_sleep_log(_LOGGER, logging.INFO),
|
517
531
|
after=tenacity.after_log(_LOGGER, logging.INFO),
|
518
532
|
)
|
519
|
-
def upload_file(self, file: Union[str, Path]) ->
|
533
|
+
def upload_file(self, file: Union[str, Path]) -> Path:
|
520
534
|
file_name = Path(file).name
|
521
|
-
remote_path = f"/home/user/{file_name}"
|
522
535
|
with open(file, "rb") as f:
|
523
|
-
self.interpreter.files.write(path=remote_path, data=f)
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
def download_file(
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
536
|
+
self.interpreter.files.write(path=str(self.remote_path / file_name), data=f)
|
537
|
+
_LOGGER.info(f"File ({file}) is uploaded to: {str(self.remote_path)}")
|
538
|
+
return self.remote_path / file_name
|
539
|
+
|
540
|
+
def download_file(
|
541
|
+
self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
|
542
|
+
) -> Path:
|
543
|
+
with open(local_file_path, "w+b") as f:
|
544
|
+
f.write(
|
545
|
+
self.interpreter.files.read(path=str(remote_file_path), format="bytes")
|
546
|
+
)
|
547
|
+
_LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
|
548
|
+
return Path(local_file_path)
|
532
549
|
|
533
550
|
@staticmethod
|
534
551
|
def _new_e2b_interpreter_impl(*args, **kwargs) -> E2BCodeInterpreterImpl: # type: ignore
|
@@ -540,7 +557,11 @@ print(f"Vision Agent version: {va_version}")"""
|
|
540
557
|
|
541
558
|
|
542
559
|
class LocalCodeInterpreter(CodeInterpreter):
|
543
|
-
def __init__(
|
560
|
+
def __init__(
|
561
|
+
self,
|
562
|
+
timeout: int = _SESSION_TIMEOUT,
|
563
|
+
remote_path: Optional[Union[str, Path]] = None,
|
564
|
+
) -> None:
|
544
565
|
super().__init__(timeout=timeout)
|
545
566
|
self.nb = nbformat.v4.new_notebook()
|
546
567
|
self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
|
@@ -554,6 +575,7 @@ Timeout: {self.timeout}"""
|
|
554
575
|
)
|
555
576
|
sleep(1)
|
556
577
|
self._new_kernel()
|
578
|
+
self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
|
557
579
|
|
558
580
|
def _new_kernel(self) -> None:
|
559
581
|
if self.nb_client.kc is None or not run_sync(self.nb_client.kc.is_alive)(): # type: ignore
|
@@ -607,6 +629,25 @@ Timeout: {self.timeout}"""
|
|
607
629
|
traceback_raw = traceback.format_exc().splitlines()
|
608
630
|
return Execution.from_exception(e, traceback_raw)
|
609
631
|
|
632
|
+
def upload_file(self, file_path: Union[str, Path]) -> Path:
|
633
|
+
with open(file_path, "rb") as f:
|
634
|
+
contents = f.read()
|
635
|
+
with open(self.remote_path / Path(file_path).name, "wb") as f:
|
636
|
+
f.write(contents)
|
637
|
+
_LOGGER.info(f"File ({file_path}) is uploaded to: {str(self.remote_path)}")
|
638
|
+
|
639
|
+
return Path(self.remote_path / file_path)
|
640
|
+
|
641
|
+
def download_file(
|
642
|
+
self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
|
643
|
+
) -> Path:
|
644
|
+
with open(self.remote_path / remote_file_path, "rb") as f:
|
645
|
+
contents = f.read()
|
646
|
+
with open(local_file_path, "wb") as f:
|
647
|
+
f.write(contents)
|
648
|
+
_LOGGER.info(f"File ({remote_file_path}) is downloaded to: {local_file_path}")
|
649
|
+
return Path(local_file_path)
|
650
|
+
|
610
651
|
|
611
652
|
class CodeInterpreterFactory:
|
612
653
|
"""Factory class for creating code interpreters.
|
@@ -630,13 +671,19 @@ class CodeInterpreterFactory:
|
|
630
671
|
return instance
|
631
672
|
|
632
673
|
@staticmethod
|
633
|
-
def new_instance(
|
674
|
+
def new_instance(
|
675
|
+
code_sandbox_runtime: Optional[str] = None, remote_path: Optional[str] = None
|
676
|
+
) -> CodeInterpreter:
|
634
677
|
if not code_sandbox_runtime:
|
635
678
|
code_sandbox_runtime = os.getenv("CODE_SANDBOX_RUNTIME", "local")
|
636
679
|
if code_sandbox_runtime == "e2b":
|
637
|
-
instance: CodeInterpreter = E2BCodeInterpreter(
|
680
|
+
instance: CodeInterpreter = E2BCodeInterpreter(
|
681
|
+
timeout=_SESSION_TIMEOUT, remote_path=remote_path
|
682
|
+
)
|
638
683
|
elif code_sandbox_runtime == "local":
|
639
|
-
instance = LocalCodeInterpreter(
|
684
|
+
instance = LocalCodeInterpreter(
|
685
|
+
timeout=_SESSION_TIMEOUT, remote_path=remote_path
|
686
|
+
)
|
640
687
|
else:
|
641
688
|
raise ValueError(
|
642
689
|
f"Unsupported code sandbox runtime: {code_sandbox_runtime}. Supported runtimes: e2b, local"
|
@@ -70,7 +70,7 @@ def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
|
|
70
70
|
r"""Decode a run-length encoded mask. Returns numpy array, 1 - mask, 0 - background.
|
71
71
|
|
72
72
|
Parameters:
|
73
|
-
|
73
|
+
rle: The run-length encoded mask.
|
74
74
|
"""
|
75
75
|
size = rle["size"]
|
76
76
|
counts = rle["counts"]
|
@@ -100,7 +100,7 @@ def frames_to_bytes(
|
|
100
100
|
"""
|
101
101
|
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
|
102
102
|
clip = ImageSequenceClip(frames, fps=fps)
|
103
|
-
clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps)
|
103
|
+
clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps, codec="libx264")
|
104
104
|
with open(temp_file.name + f".{file_ext}", "rb") as f:
|
105
105
|
buffer_bytes = f.read()
|
106
106
|
return buffer_bytes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.119
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -81,15 +81,15 @@ export OPENAI_API_KEY="your-api-key"
|
|
81
81
|
```
|
82
82
|
|
83
83
|
### Vision Agent
|
84
|
-
There are two agents that you can use.
|
84
|
+
There are two agents that you can use. `VisionAgent` is a conversational agent that has
|
85
85
|
access to tools that allow it to write an navigate python code and file systems. It can
|
86
|
-
converse with the user in natural language. VisionAgentCoder is an agent
|
87
|
-
code for vision tasks, such as counting people in an image. However, it
|
88
|
-
and can only respond with code. VisionAgent can call
|
89
|
-
code.
|
86
|
+
converse with the user in natural language. `VisionAgentCoder` is an agent specifically
|
87
|
+
for writing code for vision tasks, such as counting people in an image. However, it
|
88
|
+
cannot chat with you and can only respond with code. `VisionAgent` can call
|
89
|
+
`VisionAgentCoder` to write vision code.
|
90
90
|
|
91
91
|
#### Basic Usage
|
92
|
-
To run the streamlit app locally to chat with
|
92
|
+
To run the streamlit app locally to chat with `VisionAgent`, you can run the following
|
93
93
|
command:
|
94
94
|
|
95
95
|
```bash
|
@@ -186,7 +186,7 @@ the code and having it update. You just need to add the code as a response from
|
|
186
186
|
assistant:
|
187
187
|
|
188
188
|
```python
|
189
|
-
agent = va.agent.
|
189
|
+
agent = va.agent.VisionAgentCoder(verbosity=2)
|
190
190
|
conv = [
|
191
191
|
{
|
192
192
|
"role": "user",
|
@@ -252,6 +252,10 @@ function. Make sure the documentation is in the same format above with descripti
|
|
252
252
|
`Parameters:`, `Returns:`, and `Example\n-------`. You can find an example use case
|
253
253
|
[here](examples/custom_tools/) as this is what the agent uses to pick and use the tool.
|
254
254
|
|
255
|
+
Can't find the tool you need and want add it to `VisionAgent`? Check out our
|
256
|
+
[vision-agent-tools](https://github.com/landing-ai/vision-agent-tools) repository where
|
257
|
+
we add the source code for all the tools used in `VisionAgent`.
|
258
|
+
|
255
259
|
## Additional Backends
|
256
260
|
### Ollama
|
257
261
|
We also provide a `VisionAgentCoder` that uses Ollama. To get started you must download
|
@@ -1,33 +1,33 @@
|
|
1
1
|
vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
2
2
|
vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
|
3
|
-
vision_agent/agent/agent.py,sha256=
|
3
|
+
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
6
|
-
vision_agent/agent/vision_agent_coder.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=IEyXT_JPCuWmBHdEnM1Wrsj7hmCe5pKLf0gnZFJTddI,11046
|
6
|
+
vision_agent/agent/vision_agent_coder.py,sha256=DOTmDdGPxcI06Jp6yx4ekRMP0vhiVaK9B9Dl8UyJHeo,34396
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=xIya1txRZM8qoQHAWTEkEFCL8L3iZD7QD09t3ZtdxSE,11305
|
8
|
-
vision_agent/agent/vision_agent_prompts.py,sha256=
|
8
|
+
vision_agent/agent/vision_agent_prompts.py,sha256=0GliXFtBf32aPu2ClU63FI5ii5CTxWYsvrsmnnDp-gs,7134
|
9
9
|
vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
|
11
|
-
vision_agent/clients/landing_public_api.py,sha256=
|
11
|
+
vision_agent/clients/landing_public_api.py,sha256=rGtACkr8o5egDuMHQ5MBO4NuvsgPTp9Ew3rbq4R-vs0,1507
|
12
12
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
14
14
|
vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
|
15
|
-
vision_agent/lmm/lmm.py,sha256=
|
16
|
-
vision_agent/lmm/types.py,sha256=
|
17
|
-
vision_agent/tools/__init__.py,sha256=
|
18
|
-
vision_agent/tools/meta_tools.py,sha256=
|
15
|
+
vision_agent/lmm/lmm.py,sha256=AYrZNdhghG293wd3aKZ1jK1lUm2NLWwALktbM4wNais,20862
|
16
|
+
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
17
|
+
vision_agent/tools/__init__.py,sha256=i7JOLxRaLdcY7-vCNOGAeOFMBfiAUIwWhnT32FO97VE,2201
|
18
|
+
vision_agent/tools/meta_tools.py,sha256=Vu9WnKicGhafx9dPzDbQjQdcIzRCYYFPF68o79hDP-8,14616
|
19
19
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
20
|
vision_agent/tools/tool_utils.py,sha256=qMsb9d8QtpXGgF9rpPO2dA390BewKdYO68oWKDu-TGg,6504
|
21
|
-
vision_agent/tools/tools.py,sha256=
|
22
|
-
vision_agent/tools/tools_types.py,sha256=
|
21
|
+
vision_agent/tools/tools.py,sha256=kbbMToAaHxl42dDEvyz9Mvtpqts0l0hGoC5YQQyozr8,59953
|
22
|
+
vision_agent/tools/tools_types.py,sha256=iLWSirheC87fKQolIhx_O4Jk8Lv7DRiLuE8PJqLGiVQ,2216
|
23
23
|
vision_agent/utils/__init__.py,sha256=pWk0ktvR4aUEhuEIzSLM9kSgW4WDVqptdvOTeGLkJ6M,230
|
24
24
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
25
|
-
vision_agent/utils/execute.py,sha256=
|
26
|
-
vision_agent/utils/image_utils.py,sha256=
|
25
|
+
vision_agent/utils/execute.py,sha256=Ap8Yx80spQq5f2QtKGx1MK03BR45mJKhlp1kfh-rIao,26751
|
26
|
+
vision_agent/utils/image_utils.py,sha256=eNghu_2L8624jEXy8ZZS9OX46Mv0DT9bcvLForujwTs,9848
|
27
27
|
vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.119.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.119.dist-info/METADATA,sha256=ag8Cf800dZJtJqJtwEcf4gqf7Qjf-K1JMoeisDI7RWQ,12255
|
32
|
+
vision_agent-0.2.119.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.119.dist-info/RECORD,,
|
File without changes
|
File without changes
|