vision-agent 0.2.110__py3-none-any.whl → 0.2.112__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/agent/agent_utils.py +3 -8
- vision_agent/agent/vision_agent.py +1 -1
- vision_agent/agent/vision_agent_coder.py +28 -20
- vision_agent/agent/vision_agent_coder_prompts.py +9 -7
- vision_agent/agent/vision_agent_prompts.py +11 -10
- vision_agent/clients/http.py +15 -3
- vision_agent/clients/landing_public_api.py +14 -2
- vision_agent/tools/__init__.py +11 -5
- vision_agent/tools/meta_tools.py +1 -46
- vision_agent/tools/tool_utils.py +25 -10
- vision_agent/tools/tools.py +463 -99
- vision_agent/tools/tools_types.py +84 -0
- vision_agent/utils/exceptions.py +13 -0
- vision_agent/utils/execute.py +0 -1
- vision_agent/utils/image_utils.py +52 -0
- {vision_agent-0.2.110.dist-info → vision_agent-0.2.112.dist-info}/METADATA +1 -1
- vision_agent-0.2.112.dist-info/RECORD +33 -0
- vision_agent/tools/meta_tools_types.py +0 -30
- vision_agent-0.2.110.dist-info/RECORD +0 -33
- {vision_agent-0.2.110.dist-info → vision_agent-0.2.112.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.110.dist-info → vision_agent-0.2.112.dist-info}/WHEEL +0 -0
@@ -0,0 +1,84 @@
|
|
1
|
+
from uuid import UUID
|
2
|
+
from enum import Enum
|
3
|
+
from typing import List, Tuple, Optional
|
4
|
+
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
|
6
|
+
|
7
|
+
|
8
|
+
class BboxInput(BaseModel):
|
9
|
+
image_path: str
|
10
|
+
labels: List[str]
|
11
|
+
bboxes: List[Tuple[int, int, int, int]]
|
12
|
+
|
13
|
+
|
14
|
+
class BboxInputBase64(BaseModel):
|
15
|
+
image: str
|
16
|
+
filename: str
|
17
|
+
labels: List[str]
|
18
|
+
bboxes: List[Tuple[int, int, int, int]]
|
19
|
+
|
20
|
+
|
21
|
+
class PromptTask(str, Enum):
|
22
|
+
"""
|
23
|
+
Valid task prompts options for the Florencev2 model.
|
24
|
+
"""
|
25
|
+
|
26
|
+
CAPTION = "<CAPTION>"
|
27
|
+
""""""
|
28
|
+
CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
|
29
|
+
""""""
|
30
|
+
OBJECT_DETECTION = "<OD>"
|
31
|
+
""""""
|
32
|
+
|
33
|
+
|
34
|
+
class FineTuning(BaseModel):
|
35
|
+
model_config = ConfigDict(populate_by_name=True)
|
36
|
+
|
37
|
+
job_id: UUID = Field(alias="jobId")
|
38
|
+
|
39
|
+
@field_serializer("job_id")
|
40
|
+
def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
|
41
|
+
return str(job_id)
|
42
|
+
|
43
|
+
|
44
|
+
class Florencev2FtRequest(BaseModel):
|
45
|
+
model_config = ConfigDict(populate_by_name=True)
|
46
|
+
|
47
|
+
image: str
|
48
|
+
task: PromptTask
|
49
|
+
tool: str
|
50
|
+
prompt: Optional[str] = ""
|
51
|
+
fine_tuning: Optional[FineTuning] = Field(None, alias="fineTuning")
|
52
|
+
|
53
|
+
|
54
|
+
class JobStatus(str, Enum):
|
55
|
+
"""The status of a fine-tuning job.
|
56
|
+
|
57
|
+
CREATED:
|
58
|
+
The job has been created and is waiting to be scheduled to run.
|
59
|
+
STARTING:
|
60
|
+
The job has started running, but not entering the training phase.
|
61
|
+
TRAINING:
|
62
|
+
The job is training a model.
|
63
|
+
EVALUATING:
|
64
|
+
The job is evaluating the model and computing metrics.
|
65
|
+
PUBLISHING:
|
66
|
+
The job is exporting the artifact(s) to an external directory (s3 or local).
|
67
|
+
SUCCEEDED:
|
68
|
+
The job has finished, including training, evaluation and publishing the
|
69
|
+
artifact(s).
|
70
|
+
FAILED:
|
71
|
+
The job has failed for some reason internally, it can be due to resources
|
72
|
+
issues or the code itself.
|
73
|
+
STOPPED:
|
74
|
+
The job has been stopped by the use locally or in the cloud.
|
75
|
+
"""
|
76
|
+
|
77
|
+
CREATED = "CREATED"
|
78
|
+
STARTING = "STARTING"
|
79
|
+
TRAINING = "TRAINING"
|
80
|
+
EVALUATING = "EVALUATING"
|
81
|
+
PUBLISHING = "PUBLISHING"
|
82
|
+
SUCCEEDED = "SUCCEEDED"
|
83
|
+
FAILED = "FAILED"
|
84
|
+
STOPPED = "STOPPED"
|
vision_agent/utils/exceptions.py
CHANGED
@@ -49,3 +49,16 @@ class RemoteSandboxClosedError(RemoteSandboxError):
|
|
49
49
|
"""
|
50
50
|
|
51
51
|
is_retryable = True
|
52
|
+
|
53
|
+
|
54
|
+
class FineTuneModelIsNotReady(Exception):
|
55
|
+
"""Exception raised when the fine-tune model is not ready.
|
56
|
+
If this is raised, it's recommended to wait 5 seconds before trying to use
|
57
|
+
the model again.
|
58
|
+
"""
|
59
|
+
|
60
|
+
|
61
|
+
class FineTuneModelNotFound(Exception):
|
62
|
+
"""Exception raised when the fine-tune model is not found.
|
63
|
+
If this is raised, it's recommended to try another model id.
|
64
|
+
"""
|
vision_agent/utils/execute.py
CHANGED
@@ -416,7 +416,6 @@ class CodeInterpreter(abc.ABC):
|
|
416
416
|
|
417
417
|
|
418
418
|
class E2BCodeInterpreter(CodeInterpreter):
|
419
|
-
|
420
419
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
421
420
|
super().__init__(*args, **kwargs)
|
422
421
|
assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
|
@@ -1,12 +1,15 @@
|
|
1
1
|
"""Utility functions for image processing."""
|
2
2
|
|
3
3
|
import base64
|
4
|
+
import io
|
5
|
+
import tempfile
|
4
6
|
from importlib import resources
|
5
7
|
from io import BytesIO
|
6
8
|
from pathlib import Path
|
7
9
|
from typing import Dict, List, Tuple, Union
|
8
10
|
|
9
11
|
import numpy as np
|
12
|
+
from moviepy.editor import ImageSequenceClip
|
10
13
|
from PIL import Image, ImageDraw, ImageFont
|
11
14
|
from PIL.Image import Image as ImageType
|
12
15
|
|
@@ -63,6 +66,46 @@ def rle_decode(mask_rle: str, shape: Tuple[int, int]) -> np.ndarray:
|
|
63
66
|
return img.reshape(shape)
|
64
67
|
|
65
68
|
|
69
|
+
def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
|
70
|
+
r"""Decode a run-length encoded mask. Returns numpy array, 1 - mask, 0 - background.
|
71
|
+
|
72
|
+
Parameters:
|
73
|
+
mask: The mask in run-length encoded as an array.
|
74
|
+
"""
|
75
|
+
size = rle["size"]
|
76
|
+
counts = rle["counts"]
|
77
|
+
|
78
|
+
total_elements = size[0] * size[1]
|
79
|
+
flattened_mask = np.zeros(total_elements, dtype=np.uint8)
|
80
|
+
|
81
|
+
current_pos = 0
|
82
|
+
for i, count in enumerate(counts):
|
83
|
+
if i % 2 == 1:
|
84
|
+
flattened_mask[current_pos : current_pos + count] = 1
|
85
|
+
current_pos += count
|
86
|
+
|
87
|
+
binary_mask = flattened_mask.reshape(size, order="F")
|
88
|
+
return binary_mask
|
89
|
+
|
90
|
+
|
91
|
+
def frames_to_bytes(
|
92
|
+
frames: List[np.ndarray], fps: float = 10, file_ext: str = "mp4"
|
93
|
+
) -> bytes:
|
94
|
+
r"""Convert a list of frames to a video file encoded into a byte string.
|
95
|
+
|
96
|
+
Parameters:
|
97
|
+
frames: the list of frames
|
98
|
+
fps: the frames per second of the video
|
99
|
+
file_ext: the file extension of the video file
|
100
|
+
"""
|
101
|
+
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
|
102
|
+
clip = ImageSequenceClip(frames, fps=fps)
|
103
|
+
clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps)
|
104
|
+
with open(temp_file.name + f".{file_ext}", "rb") as f:
|
105
|
+
buffer_bytes = f.read()
|
106
|
+
return buffer_bytes
|
107
|
+
|
108
|
+
|
66
109
|
def b64_to_pil(b64_str: str) -> ImageType:
|
67
110
|
r"""Convert a base64 string to a PIL Image.
|
68
111
|
|
@@ -78,6 +121,15 @@ def b64_to_pil(b64_str: str) -> ImageType:
|
|
78
121
|
return Image.open(BytesIO(base64.b64decode(b64_str)))
|
79
122
|
|
80
123
|
|
124
|
+
def numpy_to_bytes(image: np.ndarray) -> bytes:
|
125
|
+
pil_image = Image.fromarray(image).convert("RGB")
|
126
|
+
image_buffer = io.BytesIO()
|
127
|
+
pil_image.save(image_buffer, format="PNG")
|
128
|
+
buffer_bytes = image_buffer.getvalue()
|
129
|
+
image_buffer.close()
|
130
|
+
return buffer_bytes
|
131
|
+
|
132
|
+
|
81
133
|
def get_image_size(data: Union[str, Path, np.ndarray, ImageType]) -> Tuple[int, ...]:
|
82
134
|
r"""Get the size of an image.
|
83
135
|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
2
|
+
vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
|
3
|
+
vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
|
4
|
+
vision_agent/agent/agent_utils.py,sha256=ArHrmHIEkWxkxkUHm0WH7pOnWqqoOvNdTrgIpl-DAow,1124
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=5rgO-pScVOS3t4sWnLBnGYYkGftGgF4U0FpZzFVrDAY,8447
|
6
|
+
vision_agent/agent/vision_agent_coder.py,sha256=HaIOxPQajP2CJT7TbffSkz0MDbYxEs6_P9Ykz71nkUc,31209
|
7
|
+
vision_agent/agent/vision_agent_coder_prompts.py,sha256=xIya1txRZM8qoQHAWTEkEFCL8L3iZD7QD09t3ZtdxSE,11305
|
8
|
+
vision_agent/agent/vision_agent_prompts.py,sha256=ydUU_Wvw-jqdL_vObSUr-VCQvjSwA5Fd74TbbhUzyxk,6112
|
9
|
+
vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
|
11
|
+
vision_agent/clients/landing_public_api.py,sha256=6L15zh5lP5JHCpGnYpHMREgrrKiJin_OYdf2vT9HHZQ,1507
|
12
|
+
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
+
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
14
|
+
vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
|
15
|
+
vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
|
16
|
+
vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
|
17
|
+
vision_agent/tools/__init__.py,sha256=lyD7X-CXS4215K8mPXrzEoFVDHeQaNYn6KSn_uVoxlY,2108
|
18
|
+
vision_agent/tools/meta_tools.py,sha256=q6h7hZarZrsWRloVE6PbTZwW8J2N1uUM9Ac-XxsT6hk,13365
|
19
|
+
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
|
+
vision_agent/tools/tool_utils.py,sha256=1_ZnBubUctJYTv3GsSzRJ6cbQ0Y42yolBvehs6dZYao,5762
|
21
|
+
vision_agent/tools/tools.py,sha256=F3bf7uL84p4Cfe2b8ek-KSeWgABkUccvsdwBi3CgTCM,58561
|
22
|
+
vision_agent/tools/tools_types.py,sha256=z6_XtUhWgh201yM7Z0CYtiLBEGdHPc_QUydMDHZ84EA,2216
|
23
|
+
vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
|
24
|
+
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
25
|
+
vision_agent/utils/execute.py,sha256=3NklVR1PZqIDuF_nhq2HhYMy6ZqOsTSUL0DFTpo--4M,25092
|
26
|
+
vision_agent/utils/image_utils.py,sha256=c1LrmaHD331za8DbA1myJpgUmWoDzePaOK6-dsdpZQo,9847
|
27
|
+
vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
|
28
|
+
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
|
+
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
30
|
+
vision_agent-0.2.112.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.112.dist-info/METADATA,sha256=XBqbIsjZ128JvvmHvwKFkCP6-8MCpmZRsjr7osRdqGU,10732
|
32
|
+
vision_agent-0.2.112.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.112.dist-info/RECORD,,
|
@@ -1,30 +0,0 @@
|
|
1
|
-
from enum import Enum
|
2
|
-
from typing import List, Tuple
|
3
|
-
|
4
|
-
from pydantic import BaseModel
|
5
|
-
|
6
|
-
|
7
|
-
class BboxInput(BaseModel):
|
8
|
-
image_path: str
|
9
|
-
labels: List[str]
|
10
|
-
bboxes: List[Tuple[int, int, int, int]]
|
11
|
-
|
12
|
-
|
13
|
-
class BboxInputBase64(BaseModel):
|
14
|
-
image: str
|
15
|
-
filename: str
|
16
|
-
labels: List[str]
|
17
|
-
bboxes: List[Tuple[int, int, int, int]]
|
18
|
-
|
19
|
-
|
20
|
-
class PromptTask(str, Enum):
|
21
|
-
"""
|
22
|
-
Valid task prompts options for the Florencev2 model.
|
23
|
-
"""
|
24
|
-
|
25
|
-
CAPTION = "<CAPTION>"
|
26
|
-
""""""
|
27
|
-
CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
|
28
|
-
""""""
|
29
|
-
OBJECT_DETECTION = "<OD>"
|
30
|
-
""""""
|
@@ -1,33 +0,0 @@
|
|
1
|
-
vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
2
|
-
vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
|
3
|
-
vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
|
4
|
-
vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=4vzKYNoScv_sOZiqefo46iKJNZOtqSFvSJif0zZIdLI,8471
|
6
|
-
vision_agent/agent/vision_agent_coder.py,sha256=oo3IoRrc-cVdjKq_YsjzkBZNTBtiCTIctGfeC5C7MXM,30926
|
7
|
-
vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
|
8
|
-
vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
|
9
|
-
vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
vision_agent/clients/http.py,sha256=1WMt29F12YFfPH03AttKxnUNXx5sNOD9ZuH4etbB054,1598
|
11
|
-
vision_agent/clients/landing_public_api.py,sha256=Tjl8uBZWc3dvrCOKg-PCYjw3RC3X5Y6B50kaKn_QzL0,1050
|
12
|
-
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
14
|
-
vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
|
15
|
-
vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
|
16
|
-
vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
|
17
|
-
vision_agent/tools/__init__.py,sha256=NDEEOZrwpeNYhUA32bSKXrZ62uEsErb8Vn-70_0Oz1o,2033
|
18
|
-
vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbnNBs,15170
|
19
|
-
vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
|
20
|
-
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
21
|
-
vision_agent/tools/tool_utils.py,sha256=Y7I4OBW5GwXkHQwlAXqp29WB0OOPQXAMYCAHj_Vh8eQ,5036
|
22
|
-
vision_agent/tools/tools.py,sha256=IU7jTEJ8NH5zVmFwznOLEmjOBQ7IzBJpanzpqtjoJrY,44876
|
23
|
-
vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
|
24
|
-
vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
|
25
|
-
vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
|
26
|
-
vision_agent/utils/image_utils.py,sha256=c_g5i_cFC0C-Yw9gU_NaVgQdmBlyumw3bLIDtCU42xo,8200
|
27
|
-
vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
|
28
|
-
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
|
-
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
30
|
-
vision_agent-0.2.110.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
-
vision_agent-0.2.110.dist-info/METADATA,sha256=Qcxe0Nt5ObGSUmIhqWg8B0FWjw13YW2jKyz21sbzCtI,10732
|
32
|
-
vision_agent-0.2.110.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
-
vision_agent-0.2.110.dist-info/RECORD,,
|
File without changes
|
File without changes
|