vision-agent 0.2.110__py3-none-any.whl → 0.2.112__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/agent_utils.py +3 -8
- vision_agent/agent/vision_agent.py +1 -1
- vision_agent/agent/vision_agent_coder.py +28 -20
- vision_agent/agent/vision_agent_coder_prompts.py +9 -7
- vision_agent/agent/vision_agent_prompts.py +11 -10
- vision_agent/clients/http.py +15 -3
- vision_agent/clients/landing_public_api.py +14 -2
- vision_agent/tools/__init__.py +11 -5
- vision_agent/tools/meta_tools.py +1 -46
- vision_agent/tools/tool_utils.py +25 -10
- vision_agent/tools/tools.py +463 -99
- vision_agent/tools/tools_types.py +84 -0
- vision_agent/utils/exceptions.py +13 -0
- vision_agent/utils/execute.py +0 -1
- vision_agent/utils/image_utils.py +52 -0
- {vision_agent-0.2.110.dist-info → vision_agent-0.2.112.dist-info}/METADATA +1 -1
- vision_agent-0.2.112.dist-info/RECORD +33 -0
- vision_agent/tools/meta_tools_types.py +0 -30
- vision_agent-0.2.110.dist-info/RECORD +0 -33
- {vision_agent-0.2.110.dist-info → vision_agent-0.2.112.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.110.dist-info → vision_agent-0.2.112.dist-info}/WHEEL +0 -0
@@ -0,0 +1,84 @@
|
|
1
|
+
from uuid import UUID
|
2
|
+
from enum import Enum
|
3
|
+
from typing import List, Tuple, Optional
|
4
|
+
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
|
6
|
+
|
7
|
+
|
8
|
+
class BboxInput(BaseModel):
|
9
|
+
image_path: str
|
10
|
+
labels: List[str]
|
11
|
+
bboxes: List[Tuple[int, int, int, int]]
|
12
|
+
|
13
|
+
|
14
|
+
class BboxInputBase64(BaseModel):
|
15
|
+
image: str
|
16
|
+
filename: str
|
17
|
+
labels: List[str]
|
18
|
+
bboxes: List[Tuple[int, int, int, int]]
|
19
|
+
|
20
|
+
|
21
|
+
class PromptTask(str, Enum):
|
22
|
+
"""
|
23
|
+
Valid task prompts options for the Florencev2 model.
|
24
|
+
"""
|
25
|
+
|
26
|
+
CAPTION = "<CAPTION>"
|
27
|
+
""""""
|
28
|
+
CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
|
29
|
+
""""""
|
30
|
+
OBJECT_DETECTION = "<OD>"
|
31
|
+
""""""
|
32
|
+
|
33
|
+
|
34
|
+
class FineTuning(BaseModel):
|
35
|
+
model_config = ConfigDict(populate_by_name=True)
|
36
|
+
|
37
|
+
job_id: UUID = Field(alias="jobId")
|
38
|
+
|
39
|
+
@field_serializer("job_id")
|
40
|
+
def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
|
41
|
+
return str(job_id)
|
42
|
+
|
43
|
+
|
44
|
+
class Florencev2FtRequest(BaseModel):
|
45
|
+
model_config = ConfigDict(populate_by_name=True)
|
46
|
+
|
47
|
+
image: str
|
48
|
+
task: PromptTask
|
49
|
+
tool: str
|
50
|
+
prompt: Optional[str] = ""
|
51
|
+
fine_tuning: Optional[FineTuning] = Field(None, alias="fineTuning")
|
52
|
+
|
53
|
+
|
54
|
+
class JobStatus(str, Enum):
|
55
|
+
"""The status of a fine-tuning job.
|
56
|
+
|
57
|
+
CREATED:
|
58
|
+
The job has been created and is waiting to be scheduled to run.
|
59
|
+
STARTING:
|
60
|
+
The job has started running, but not entering the training phase.
|
61
|
+
TRAINING:
|
62
|
+
The job is training a model.
|
63
|
+
EVALUATING:
|
64
|
+
The job is evaluating the model and computing metrics.
|
65
|
+
PUBLISHING:
|
66
|
+
The job is exporting the artifact(s) to an external directory (s3 or local).
|
67
|
+
SUCCEEDED:
|
68
|
+
The job has finished, including training, evaluation and publishing the
|
69
|
+
artifact(s).
|
70
|
+
FAILED:
|
71
|
+
The job has failed for some reason internally, it can be due to resources
|
72
|
+
issues or the code itself.
|
73
|
+
STOPPED:
|
74
|
+
The job has been stopped by the use locally or in the cloud.
|
75
|
+
"""
|
76
|
+
|
77
|
+
CREATED = "CREATED"
|
78
|
+
STARTING = "STARTING"
|
79
|
+
TRAINING = "TRAINING"
|
80
|
+
EVALUATING = "EVALUATING"
|
81
|
+
PUBLISHING = "PUBLISHING"
|
82
|
+
SUCCEEDED = "SUCCEEDED"
|
83
|
+
FAILED = "FAILED"
|
84
|
+
STOPPED = "STOPPED"
|
vision_agent/utils/exceptions.py
CHANGED
@@ -49,3 +49,16 @@ class RemoteSandboxClosedError(RemoteSandboxError):
|
|
49
49
|
"""
|
50
50
|
|
51
51
|
is_retryable = True
|
52
|
+
|
53
|
+
|
54
|
+
class FineTuneModelIsNotReady(Exception):
|
55
|
+
"""Exception raised when the fine-tune model is not ready.
|
56
|
+
If this is raised, it's recommended to wait 5 seconds before trying to use
|
57
|
+
the model again.
|
58
|
+
"""
|
59
|
+
|
60
|
+
|
61
|
+
class FineTuneModelNotFound(Exception):
|
62
|
+
"""Exception raised when the fine-tune model is not found.
|
63
|
+
If this is raised, it's recommended to try another model id.
|
64
|
+
"""
|
vision_agent/utils/execute.py
CHANGED
@@ -416,7 +416,6 @@ class CodeInterpreter(abc.ABC):
|
|
416
416
|
|
417
417
|
|
418
418
|
class E2BCodeInterpreter(CodeInterpreter):
|
419
|
-
|
420
419
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
421
420
|
super().__init__(*args, **kwargs)
|
422
421
|
assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
|
@@ -1,12 +1,15 @@
|
|
1
1
|
"""Utility functions for image processing."""
|
2
2
|
|
3
3
|
import base64
|
4
|
+
import io
|
5
|
+
import tempfile
|
4
6
|
from importlib import resources
|
5
7
|
from io import BytesIO
|
6
8
|
from pathlib import Path
|
7
9
|
from typing import Dict, List, Tuple, Union
|
8
10
|
|
9
11
|
import numpy as np
|
12
|
+
from moviepy.editor import ImageSequenceClip
|
10
13
|
from PIL import Image, ImageDraw, ImageFont
|
11
14
|
from PIL.Image import Image as ImageType
|
12
15
|
|
@@ -63,6 +66,46 @@ def rle_decode(mask_rle: str, shape: Tuple[int, int]) -> np.ndarray:
|
|
63
66
|
return img.reshape(shape)
|
64
67
|
|
65
68
|
|
69
|
+
def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
|
70
|
+
r"""Decode a run-length encoded mask. Returns numpy array, 1 - mask, 0 - background.
|
71
|
+
|
72
|
+
Parameters:
|
73
|
+
mask: The mask in run-length encoded as an array.
|
74
|
+
"""
|
75
|
+
size = rle["size"]
|
76
|
+
counts = rle["counts"]
|
77
|
+
|
78
|
+
total_elements = size[0] * size[1]
|
79
|
+
flattened_mask = np.zeros(total_elements, dtype=np.uint8)
|
80
|
+
|
81
|
+
current_pos = 0
|
82
|
+
for i, count in enumerate(counts):
|
83
|
+
if i % 2 == 1:
|
84
|
+
flattened_mask[current_pos : current_pos + count] = 1
|
85
|
+
current_pos += count
|
86
|
+
|
87
|
+
binary_mask = flattened_mask.reshape(size, order="F")
|
88
|
+
return binary_mask
|
89
|
+
|
90
|
+
|
91
|
+
def frames_to_bytes(
|
92
|
+
frames: List[np.ndarray], fps: float = 10, file_ext: str = "mp4"
|
93
|
+
) -> bytes:
|
94
|
+
r"""Convert a list of frames to a video file encoded into a byte string.
|
95
|
+
|
96
|
+
Parameters:
|
97
|
+
frames: the list of frames
|
98
|
+
fps: the frames per second of the video
|
99
|
+
file_ext: the file extension of the video file
|
100
|
+
"""
|
101
|
+
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
|
102
|
+
clip = ImageSequenceClip(frames, fps=fps)
|
103
|
+
clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps)
|
104
|
+
with open(temp_file.name + f".{file_ext}", "rb") as f:
|
105
|
+
buffer_bytes = f.read()
|
106
|
+
return buffer_bytes
|
107
|
+
|
108
|
+
|
66
109
|
def b64_to_pil(b64_str: str) -> ImageType:
|
67
110
|
r"""Convert a base64 string to a PIL Image.
|
68
111
|
|
@@ -78,6 +121,15 @@ def b64_to_pil(b64_str: str) -> ImageType:
|
|
78
121
|
return Image.open(BytesIO(base64.b64decode(b64_str)))
|
79
122
|
|
80
123
|
|
124
|
+
def numpy_to_bytes(image: np.ndarray) -> bytes:
|
125
|
+
pil_image = Image.fromarray(image).convert("RGB")
|
126
|
+
image_buffer = io.BytesIO()
|
127
|
+
pil_image.save(image_buffer, format="PNG")
|
128
|
+
buffer_bytes = image_buffer.getvalue()
|
129
|
+
image_buffer.close()
|
130
|
+
return buffer_bytes
|
131
|
+
|
132
|
+
|
81
133
|
def get_image_size(data: Union[str, Path, np.ndarray, ImageType]) -> Tuple[int, ...]:
|
82
134
|
r"""Get the size of an image.
|
83
135
|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
2
|
+
vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
|
3
|
+
vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
|
4
|
+
vision_agent/agent/agent_utils.py,sha256=ArHrmHIEkWxkxkUHm0WH7pOnWqqoOvNdTrgIpl-DAow,1124
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=5rgO-pScVOS3t4sWnLBnGYYkGftGgF4U0FpZzFVrDAY,8447
|
6
|
+
vision_agent/agent/vision_agent_coder.py,sha256=HaIOxPQajP2CJT7TbffSkz0MDbYxEs6_P9Ykz71nkUc,31209
|
7
|
+
vision_agent/agent/vision_agent_coder_prompts.py,sha256=xIya1txRZM8qoQHAWTEkEFCL8L3iZD7QD09t3ZtdxSE,11305
|
8
|
+
vision_agent/agent/vision_agent_prompts.py,sha256=ydUU_Wvw-jqdL_vObSUr-VCQvjSwA5Fd74TbbhUzyxk,6112
|
9
|
+
vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
|
11
|
+
vision_agent/clients/landing_public_api.py,sha256=6L15zh5lP5JHCpGnYpHMREgrrKiJin_OYdf2vT9HHZQ,1507
|
12
|
+
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
+
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
14
|
+
vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
|
15
|
+
vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
|
16
|
+
vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
|
17
|
+
vision_agent/tools/__init__.py,sha256=lyD7X-CXS4215K8mPXrzEoFVDHeQaNYn6KSn_uVoxlY,2108
|
18
|
+
vision_agent/tools/meta_tools.py,sha256=q6h7hZarZrsWRloVE6PbTZwW8J2N1uUM9Ac-XxsT6hk,13365
|
19
|
+
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
|
+
vision_agent/tools/tool_utils.py,sha256=1_ZnBubUctJYTv3GsSzRJ6cbQ0Y42yolBvehs6dZYao,5762
|
21
|
+
vision_agent/tools/tools.py,sha256=F3bf7uL84p4Cfe2b8ek-KSeWgABkUccvsdwBi3CgTCM,58561
|
22
|
+
vision_agent/tools/tools_types.py,sha256=z6_XtUhWgh201yM7Z0CYtiLBEGdHPc_QUydMDHZ84EA,2216
|
23
|
+
vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
|
24
|
+
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
25
|
+
vision_agent/utils/execute.py,sha256=3NklVR1PZqIDuF_nhq2HhYMy6ZqOsTSUL0DFTpo--4M,25092
|
26
|
+
vision_agent/utils/image_utils.py,sha256=c1LrmaHD331za8DbA1myJpgUmWoDzePaOK6-dsdpZQo,9847
|
27
|
+
vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
|
28
|
+
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
|
+
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
30
|
+
vision_agent-0.2.112.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.112.dist-info/METADATA,sha256=XBqbIsjZ128JvvmHvwKFkCP6-8MCpmZRsjr7osRdqGU,10732
|
32
|
+
vision_agent-0.2.112.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.112.dist-info/RECORD,,
|
@@ -1,30 +0,0 @@
|
|
1
|
-
from enum import Enum
|
2
|
-
from typing import List, Tuple
|
3
|
-
|
4
|
-
from pydantic import BaseModel
|
5
|
-
|
6
|
-
|
7
|
-
class BboxInput(BaseModel):
|
8
|
-
image_path: str
|
9
|
-
labels: List[str]
|
10
|
-
bboxes: List[Tuple[int, int, int, int]]
|
11
|
-
|
12
|
-
|
13
|
-
class BboxInputBase64(BaseModel):
|
14
|
-
image: str
|
15
|
-
filename: str
|
16
|
-
labels: List[str]
|
17
|
-
bboxes: List[Tuple[int, int, int, int]]
|
18
|
-
|
19
|
-
|
20
|
-
class PromptTask(str, Enum):
|
21
|
-
"""
|
22
|
-
Valid task prompts options for the Florencev2 model.
|
23
|
-
"""
|
24
|
-
|
25
|
-
CAPTION = "<CAPTION>"
|
26
|
-
""""""
|
27
|
-
CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
|
28
|
-
""""""
|
29
|
-
OBJECT_DETECTION = "<OD>"
|
30
|
-
""""""
|
@@ -1,33 +0,0 @@
|
|
1
|
-
vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
2
|
-
vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
|
3
|
-
vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
|
4
|
-
vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=4vzKYNoScv_sOZiqefo46iKJNZOtqSFvSJif0zZIdLI,8471
|
6
|
-
vision_agent/agent/vision_agent_coder.py,sha256=oo3IoRrc-cVdjKq_YsjzkBZNTBtiCTIctGfeC5C7MXM,30926
|
7
|
-
vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
|
8
|
-
vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
|
9
|
-
vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
vision_agent/clients/http.py,sha256=1WMt29F12YFfPH03AttKxnUNXx5sNOD9ZuH4etbB054,1598
|
11
|
-
vision_agent/clients/landing_public_api.py,sha256=Tjl8uBZWc3dvrCOKg-PCYjw3RC3X5Y6B50kaKn_QzL0,1050
|
12
|
-
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
14
|
-
vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
|
15
|
-
vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
|
16
|
-
vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
|
17
|
-
vision_agent/tools/__init__.py,sha256=NDEEOZrwpeNYhUA32bSKXrZ62uEsErb8Vn-70_0Oz1o,2033
|
18
|
-
vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbnNBs,15170
|
19
|
-
vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
|
20
|
-
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
21
|
-
vision_agent/tools/tool_utils.py,sha256=Y7I4OBW5GwXkHQwlAXqp29WB0OOPQXAMYCAHj_Vh8eQ,5036
|
22
|
-
vision_agent/tools/tools.py,sha256=IU7jTEJ8NH5zVmFwznOLEmjOBQ7IzBJpanzpqtjoJrY,44876
|
23
|
-
vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
|
24
|
-
vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
|
25
|
-
vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
|
26
|
-
vision_agent/utils/image_utils.py,sha256=c_g5i_cFC0C-Yw9gU_NaVgQdmBlyumw3bLIDtCU42xo,8200
|
27
|
-
vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
|
28
|
-
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
|
-
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
30
|
-
vision_agent-0.2.110.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
-
vision_agent-0.2.110.dist-info/METADATA,sha256=Qcxe0Nt5ObGSUmIhqWg8B0FWjw13YW2jKyz21sbzCtI,10732
|
32
|
-
vision_agent-0.2.110.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
-
vision_agent-0.2.110.dist-info/RECORD,,
|
File without changes
|
File without changes
|