vision-agent 0.2.110__py3-none-any.whl → 0.2.112__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,84 @@
1
+ from uuid import UUID
2
+ from enum import Enum
3
+ from typing import List, Tuple, Optional
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
6
+
7
+
8
+ class BboxInput(BaseModel):
9
+ image_path: str
10
+ labels: List[str]
11
+ bboxes: List[Tuple[int, int, int, int]]
12
+
13
+
14
+ class BboxInputBase64(BaseModel):
15
+ image: str
16
+ filename: str
17
+ labels: List[str]
18
+ bboxes: List[Tuple[int, int, int, int]]
19
+
20
+
21
+ class PromptTask(str, Enum):
22
+ """
23
+ Valid task prompts options for the Florencev2 model.
24
+ """
25
+
26
+ CAPTION = "<CAPTION>"
27
+ """"""
28
+ CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
29
+ """"""
30
+ OBJECT_DETECTION = "<OD>"
31
+ """"""
32
+
33
+
34
+ class FineTuning(BaseModel):
35
+ model_config = ConfigDict(populate_by_name=True)
36
+
37
+ job_id: UUID = Field(alias="jobId")
38
+
39
+ @field_serializer("job_id")
40
+ def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
41
+ return str(job_id)
42
+
43
+
44
+ class Florencev2FtRequest(BaseModel):
45
+ model_config = ConfigDict(populate_by_name=True)
46
+
47
+ image: str
48
+ task: PromptTask
49
+ tool: str
50
+ prompt: Optional[str] = ""
51
+ fine_tuning: Optional[FineTuning] = Field(None, alias="fineTuning")
52
+
53
+
54
+ class JobStatus(str, Enum):
55
+ """The status of a fine-tuning job.
56
+
57
+ CREATED:
58
+ The job has been created and is waiting to be scheduled to run.
59
+ STARTING:
60
+ The job has started running, but not entering the training phase.
61
+ TRAINING:
62
+ The job is training a model.
63
+ EVALUATING:
64
+ The job is evaluating the model and computing metrics.
65
+ PUBLISHING:
66
+ The job is exporting the artifact(s) to an external directory (s3 or local).
67
+ SUCCEEDED:
68
+ The job has finished, including training, evaluation and publishing the
69
+ artifact(s).
70
+ FAILED:
71
+ The job has failed for some reason internally, it can be due to resources
72
+ issues or the code itself.
73
+ STOPPED:
74
+ The job has been stopped by the use locally or in the cloud.
75
+ """
76
+
77
+ CREATED = "CREATED"
78
+ STARTING = "STARTING"
79
+ TRAINING = "TRAINING"
80
+ EVALUATING = "EVALUATING"
81
+ PUBLISHING = "PUBLISHING"
82
+ SUCCEEDED = "SUCCEEDED"
83
+ FAILED = "FAILED"
84
+ STOPPED = "STOPPED"
@@ -49,3 +49,16 @@ class RemoteSandboxClosedError(RemoteSandboxError):
49
49
  """
50
50
 
51
51
  is_retryable = True
52
+
53
+
54
+ class FineTuneModelIsNotReady(Exception):
55
+ """Exception raised when the fine-tune model is not ready.
56
+ If this is raised, it's recommended to wait 5 seconds before trying to use
57
+ the model again.
58
+ """
59
+
60
+
61
+ class FineTuneModelNotFound(Exception):
62
+ """Exception raised when the fine-tune model is not found.
63
+ If this is raised, it's recommended to try another model id.
64
+ """
@@ -416,7 +416,6 @@ class CodeInterpreter(abc.ABC):
416
416
 
417
417
 
418
418
  class E2BCodeInterpreter(CodeInterpreter):
419
-
420
419
  def __init__(self, *args: Any, **kwargs: Any) -> None:
421
420
  super().__init__(*args, **kwargs)
422
421
  assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
@@ -1,12 +1,15 @@
1
1
  """Utility functions for image processing."""
2
2
 
3
3
  import base64
4
+ import io
5
+ import tempfile
4
6
  from importlib import resources
5
7
  from io import BytesIO
6
8
  from pathlib import Path
7
9
  from typing import Dict, List, Tuple, Union
8
10
 
9
11
  import numpy as np
12
+ from moviepy.editor import ImageSequenceClip
10
13
  from PIL import Image, ImageDraw, ImageFont
11
14
  from PIL.Image import Image as ImageType
12
15
 
@@ -63,6 +66,46 @@ def rle_decode(mask_rle: str, shape: Tuple[int, int]) -> np.ndarray:
63
66
  return img.reshape(shape)
64
67
 
65
68
 
69
+ def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
70
+ r"""Decode a run-length encoded mask. Returns numpy array, 1 - mask, 0 - background.
71
+
72
+ Parameters:
73
+ mask: The mask in run-length encoded as an array.
74
+ """
75
+ size = rle["size"]
76
+ counts = rle["counts"]
77
+
78
+ total_elements = size[0] * size[1]
79
+ flattened_mask = np.zeros(total_elements, dtype=np.uint8)
80
+
81
+ current_pos = 0
82
+ for i, count in enumerate(counts):
83
+ if i % 2 == 1:
84
+ flattened_mask[current_pos : current_pos + count] = 1
85
+ current_pos += count
86
+
87
+ binary_mask = flattened_mask.reshape(size, order="F")
88
+ return binary_mask
89
+
90
+
91
+ def frames_to_bytes(
92
+ frames: List[np.ndarray], fps: float = 10, file_ext: str = "mp4"
93
+ ) -> bytes:
94
+ r"""Convert a list of frames to a video file encoded into a byte string.
95
+
96
+ Parameters:
97
+ frames: the list of frames
98
+ fps: the frames per second of the video
99
+ file_ext: the file extension of the video file
100
+ """
101
+ with tempfile.NamedTemporaryFile(delete=True) as temp_file:
102
+ clip = ImageSequenceClip(frames, fps=fps)
103
+ clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps)
104
+ with open(temp_file.name + f".{file_ext}", "rb") as f:
105
+ buffer_bytes = f.read()
106
+ return buffer_bytes
107
+
108
+
66
109
  def b64_to_pil(b64_str: str) -> ImageType:
67
110
  r"""Convert a base64 string to a PIL Image.
68
111
 
@@ -78,6 +121,15 @@ def b64_to_pil(b64_str: str) -> ImageType:
78
121
  return Image.open(BytesIO(base64.b64decode(b64_str)))
79
122
 
80
123
 
124
+ def numpy_to_bytes(image: np.ndarray) -> bytes:
125
+ pil_image = Image.fromarray(image).convert("RGB")
126
+ image_buffer = io.BytesIO()
127
+ pil_image.save(image_buffer, format="PNG")
128
+ buffer_bytes = image_buffer.getvalue()
129
+ image_buffer.close()
130
+ return buffer_bytes
131
+
132
+
81
133
  def get_image_size(data: Union[str, Path, np.ndarray, ImageType]) -> Tuple[int, ...]:
82
134
  r"""Get the size of an image.
83
135
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.110
3
+ Version: 0.2.112
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -0,0 +1,33 @@
1
+ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
+ vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
3
+ vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
4
+ vision_agent/agent/agent_utils.py,sha256=ArHrmHIEkWxkxkUHm0WH7pOnWqqoOvNdTrgIpl-DAow,1124
5
+ vision_agent/agent/vision_agent.py,sha256=5rgO-pScVOS3t4sWnLBnGYYkGftGgF4U0FpZzFVrDAY,8447
6
+ vision_agent/agent/vision_agent_coder.py,sha256=HaIOxPQajP2CJT7TbffSkz0MDbYxEs6_P9Ykz71nkUc,31209
7
+ vision_agent/agent/vision_agent_coder_prompts.py,sha256=xIya1txRZM8qoQHAWTEkEFCL8L3iZD7QD09t3ZtdxSE,11305
8
+ vision_agent/agent/vision_agent_prompts.py,sha256=ydUU_Wvw-jqdL_vObSUr-VCQvjSwA5Fd74TbbhUzyxk,6112
9
+ vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
11
+ vision_agent/clients/landing_public_api.py,sha256=6L15zh5lP5JHCpGnYpHMREgrrKiJin_OYdf2vT9HHZQ,1507
12
+ vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
+ vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
+ vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
16
+ vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
17
+ vision_agent/tools/__init__.py,sha256=lyD7X-CXS4215K8mPXrzEoFVDHeQaNYn6KSn_uVoxlY,2108
18
+ vision_agent/tools/meta_tools.py,sha256=q6h7hZarZrsWRloVE6PbTZwW8J2N1uUM9Ac-XxsT6hk,13365
19
+ vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
20
+ vision_agent/tools/tool_utils.py,sha256=1_ZnBubUctJYTv3GsSzRJ6cbQ0Y42yolBvehs6dZYao,5762
21
+ vision_agent/tools/tools.py,sha256=F3bf7uL84p4Cfe2b8ek-KSeWgABkUccvsdwBi3CgTCM,58561
22
+ vision_agent/tools/tools_types.py,sha256=z6_XtUhWgh201yM7Z0CYtiLBEGdHPc_QUydMDHZ84EA,2216
23
+ vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
24
+ vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
25
+ vision_agent/utils/execute.py,sha256=3NklVR1PZqIDuF_nhq2HhYMy6ZqOsTSUL0DFTpo--4M,25092
26
+ vision_agent/utils/image_utils.py,sha256=c1LrmaHD331za8DbA1myJpgUmWoDzePaOK6-dsdpZQo,9847
27
+ vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
28
+ vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
+ vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
+ vision_agent-0.2.112.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.112.dist-info/METADATA,sha256=XBqbIsjZ128JvvmHvwKFkCP6-8MCpmZRsjr7osRdqGU,10732
32
+ vision_agent-0.2.112.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.112.dist-info/RECORD,,
@@ -1,30 +0,0 @@
1
- from enum import Enum
2
- from typing import List, Tuple
3
-
4
- from pydantic import BaseModel
5
-
6
-
7
- class BboxInput(BaseModel):
8
- image_path: str
9
- labels: List[str]
10
- bboxes: List[Tuple[int, int, int, int]]
11
-
12
-
13
- class BboxInputBase64(BaseModel):
14
- image: str
15
- filename: str
16
- labels: List[str]
17
- bboxes: List[Tuple[int, int, int, int]]
18
-
19
-
20
- class PromptTask(str, Enum):
21
- """
22
- Valid task prompts options for the Florencev2 model.
23
- """
24
-
25
- CAPTION = "<CAPTION>"
26
- """"""
27
- CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
28
- """"""
29
- OBJECT_DETECTION = "<OD>"
30
- """"""
@@ -1,33 +0,0 @@
1
- vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
- vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
3
- vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
4
- vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
5
- vision_agent/agent/vision_agent.py,sha256=4vzKYNoScv_sOZiqefo46iKJNZOtqSFvSJif0zZIdLI,8471
6
- vision_agent/agent/vision_agent_coder.py,sha256=oo3IoRrc-cVdjKq_YsjzkBZNTBtiCTIctGfeC5C7MXM,30926
7
- vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
8
- vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
9
- vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- vision_agent/clients/http.py,sha256=1WMt29F12YFfPH03AttKxnUNXx5sNOD9ZuH4etbB054,1598
11
- vision_agent/clients/landing_public_api.py,sha256=Tjl8uBZWc3dvrCOKg-PCYjw3RC3X5Y6B50kaKn_QzL0,1050
12
- vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
- vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
- vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
16
- vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
17
- vision_agent/tools/__init__.py,sha256=NDEEOZrwpeNYhUA32bSKXrZ62uEsErb8Vn-70_0Oz1o,2033
18
- vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbnNBs,15170
19
- vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
20
- vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
21
- vision_agent/tools/tool_utils.py,sha256=Y7I4OBW5GwXkHQwlAXqp29WB0OOPQXAMYCAHj_Vh8eQ,5036
22
- vision_agent/tools/tools.py,sha256=IU7jTEJ8NH5zVmFwznOLEmjOBQ7IzBJpanzpqtjoJrY,44876
23
- vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
24
- vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
25
- vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
26
- vision_agent/utils/image_utils.py,sha256=c_g5i_cFC0C-Yw9gU_NaVgQdmBlyumw3bLIDtCU42xo,8200
27
- vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
28
- vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
- vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
- vision_agent-0.2.110.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.110.dist-info/METADATA,sha256=Qcxe0Nt5ObGSUmIhqWg8B0FWjw13YW2jKyz21sbzCtI,10732
32
- vision_agent-0.2.110.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.110.dist-info/RECORD,,