vision-agent 0.2.110__py3-none-any.whl → 0.2.112__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,84 @@
1
+ from uuid import UUID
2
+ from enum import Enum
3
+ from typing import List, Tuple, Optional
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
6
+
7
+
8
+ class BboxInput(BaseModel):
9
+ image_path: str
10
+ labels: List[str]
11
+ bboxes: List[Tuple[int, int, int, int]]
12
+
13
+
14
+ class BboxInputBase64(BaseModel):
15
+ image: str
16
+ filename: str
17
+ labels: List[str]
18
+ bboxes: List[Tuple[int, int, int, int]]
19
+
20
+
21
+ class PromptTask(str, Enum):
22
+ """
23
+ Valid task prompts options for the Florencev2 model.
24
+ """
25
+
26
+ CAPTION = "<CAPTION>"
27
+ """"""
28
+ CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
29
+ """"""
30
+ OBJECT_DETECTION = "<OD>"
31
+ """"""
32
+
33
+
34
+ class FineTuning(BaseModel):
35
+ model_config = ConfigDict(populate_by_name=True)
36
+
37
+ job_id: UUID = Field(alias="jobId")
38
+
39
+ @field_serializer("job_id")
40
+ def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
41
+ return str(job_id)
42
+
43
+
44
+ class Florencev2FtRequest(BaseModel):
45
+ model_config = ConfigDict(populate_by_name=True)
46
+
47
+ image: str
48
+ task: PromptTask
49
+ tool: str
50
+ prompt: Optional[str] = ""
51
+ fine_tuning: Optional[FineTuning] = Field(None, alias="fineTuning")
52
+
53
+
54
+ class JobStatus(str, Enum):
55
+ """The status of a fine-tuning job.
56
+
57
+ CREATED:
58
+ The job has been created and is waiting to be scheduled to run.
59
+ STARTING:
60
+ The job has started running, but not entering the training phase.
61
+ TRAINING:
62
+ The job is training a model.
63
+ EVALUATING:
64
+ The job is evaluating the model and computing metrics.
65
+ PUBLISHING:
66
+ The job is exporting the artifact(s) to an external directory (s3 or local).
67
+ SUCCEEDED:
68
+ The job has finished, including training, evaluation and publishing the
69
+ artifact(s).
70
+ FAILED:
71
+ The job has failed for some reason internally, it can be due to resources
72
+ issues or the code itself.
73
+ STOPPED:
74
+ The job has been stopped by the use locally or in the cloud.
75
+ """
76
+
77
+ CREATED = "CREATED"
78
+ STARTING = "STARTING"
79
+ TRAINING = "TRAINING"
80
+ EVALUATING = "EVALUATING"
81
+ PUBLISHING = "PUBLISHING"
82
+ SUCCEEDED = "SUCCEEDED"
83
+ FAILED = "FAILED"
84
+ STOPPED = "STOPPED"
@@ -49,3 +49,16 @@ class RemoteSandboxClosedError(RemoteSandboxError):
49
49
  """
50
50
 
51
51
  is_retryable = True
52
+
53
+
54
+ class FineTuneModelIsNotReady(Exception):
55
+ """Exception raised when the fine-tune model is not ready.
56
+ If this is raised, it's recommended to wait 5 seconds before trying to use
57
+ the model again.
58
+ """
59
+
60
+
61
+ class FineTuneModelNotFound(Exception):
62
+ """Exception raised when the fine-tune model is not found.
63
+ If this is raised, it's recommended to try another model id.
64
+ """
@@ -416,7 +416,6 @@ class CodeInterpreter(abc.ABC):
416
416
 
417
417
 
418
418
  class E2BCodeInterpreter(CodeInterpreter):
419
-
420
419
  def __init__(self, *args: Any, **kwargs: Any) -> None:
421
420
  super().__init__(*args, **kwargs)
422
421
  assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
@@ -1,12 +1,15 @@
1
1
  """Utility functions for image processing."""
2
2
 
3
3
  import base64
4
+ import io
5
+ import tempfile
4
6
  from importlib import resources
5
7
  from io import BytesIO
6
8
  from pathlib import Path
7
9
  from typing import Dict, List, Tuple, Union
8
10
 
9
11
  import numpy as np
12
+ from moviepy.editor import ImageSequenceClip
10
13
  from PIL import Image, ImageDraw, ImageFont
11
14
  from PIL.Image import Image as ImageType
12
15
 
@@ -63,6 +66,46 @@ def rle_decode(mask_rle: str, shape: Tuple[int, int]) -> np.ndarray:
63
66
  return img.reshape(shape)
64
67
 
65
68
 
69
+ def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
70
+ r"""Decode a run-length encoded mask. Returns numpy array, 1 - mask, 0 - background.
71
+
72
+ Parameters:
73
+ mask: The mask in run-length encoded as an array.
74
+ """
75
+ size = rle["size"]
76
+ counts = rle["counts"]
77
+
78
+ total_elements = size[0] * size[1]
79
+ flattened_mask = np.zeros(total_elements, dtype=np.uint8)
80
+
81
+ current_pos = 0
82
+ for i, count in enumerate(counts):
83
+ if i % 2 == 1:
84
+ flattened_mask[current_pos : current_pos + count] = 1
85
+ current_pos += count
86
+
87
+ binary_mask = flattened_mask.reshape(size, order="F")
88
+ return binary_mask
89
+
90
+
91
+ def frames_to_bytes(
92
+ frames: List[np.ndarray], fps: float = 10, file_ext: str = "mp4"
93
+ ) -> bytes:
94
+ r"""Convert a list of frames to a video file encoded into a byte string.
95
+
96
+ Parameters:
97
+ frames: the list of frames
98
+ fps: the frames per second of the video
99
+ file_ext: the file extension of the video file
100
+ """
101
+ with tempfile.NamedTemporaryFile(delete=True) as temp_file:
102
+ clip = ImageSequenceClip(frames, fps=fps)
103
+ clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps)
104
+ with open(temp_file.name + f".{file_ext}", "rb") as f:
105
+ buffer_bytes = f.read()
106
+ return buffer_bytes
107
+
108
+
66
109
  def b64_to_pil(b64_str: str) -> ImageType:
67
110
  r"""Convert a base64 string to a PIL Image.
68
111
 
@@ -78,6 +121,15 @@ def b64_to_pil(b64_str: str) -> ImageType:
78
121
  return Image.open(BytesIO(base64.b64decode(b64_str)))
79
122
 
80
123
 
124
+ def numpy_to_bytes(image: np.ndarray) -> bytes:
125
+ pil_image = Image.fromarray(image).convert("RGB")
126
+ image_buffer = io.BytesIO()
127
+ pil_image.save(image_buffer, format="PNG")
128
+ buffer_bytes = image_buffer.getvalue()
129
+ image_buffer.close()
130
+ return buffer_bytes
131
+
132
+
81
133
  def get_image_size(data: Union[str, Path, np.ndarray, ImageType]) -> Tuple[int, ...]:
82
134
  r"""Get the size of an image.
83
135
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.110
3
+ Version: 0.2.112
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -0,0 +1,33 @@
1
+ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
+ vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
3
+ vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
4
+ vision_agent/agent/agent_utils.py,sha256=ArHrmHIEkWxkxkUHm0WH7pOnWqqoOvNdTrgIpl-DAow,1124
5
+ vision_agent/agent/vision_agent.py,sha256=5rgO-pScVOS3t4sWnLBnGYYkGftGgF4U0FpZzFVrDAY,8447
6
+ vision_agent/agent/vision_agent_coder.py,sha256=HaIOxPQajP2CJT7TbffSkz0MDbYxEs6_P9Ykz71nkUc,31209
7
+ vision_agent/agent/vision_agent_coder_prompts.py,sha256=xIya1txRZM8qoQHAWTEkEFCL8L3iZD7QD09t3ZtdxSE,11305
8
+ vision_agent/agent/vision_agent_prompts.py,sha256=ydUU_Wvw-jqdL_vObSUr-VCQvjSwA5Fd74TbbhUzyxk,6112
9
+ vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
11
+ vision_agent/clients/landing_public_api.py,sha256=6L15zh5lP5JHCpGnYpHMREgrrKiJin_OYdf2vT9HHZQ,1507
12
+ vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
+ vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
+ vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
16
+ vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
17
+ vision_agent/tools/__init__.py,sha256=lyD7X-CXS4215K8mPXrzEoFVDHeQaNYn6KSn_uVoxlY,2108
18
+ vision_agent/tools/meta_tools.py,sha256=q6h7hZarZrsWRloVE6PbTZwW8J2N1uUM9Ac-XxsT6hk,13365
19
+ vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
20
+ vision_agent/tools/tool_utils.py,sha256=1_ZnBubUctJYTv3GsSzRJ6cbQ0Y42yolBvehs6dZYao,5762
21
+ vision_agent/tools/tools.py,sha256=F3bf7uL84p4Cfe2b8ek-KSeWgABkUccvsdwBi3CgTCM,58561
22
+ vision_agent/tools/tools_types.py,sha256=z6_XtUhWgh201yM7Z0CYtiLBEGdHPc_QUydMDHZ84EA,2216
23
+ vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
24
+ vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
25
+ vision_agent/utils/execute.py,sha256=3NklVR1PZqIDuF_nhq2HhYMy6ZqOsTSUL0DFTpo--4M,25092
26
+ vision_agent/utils/image_utils.py,sha256=c1LrmaHD331za8DbA1myJpgUmWoDzePaOK6-dsdpZQo,9847
27
+ vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
28
+ vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
+ vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
+ vision_agent-0.2.112.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.112.dist-info/METADATA,sha256=XBqbIsjZ128JvvmHvwKFkCP6-8MCpmZRsjr7osRdqGU,10732
32
+ vision_agent-0.2.112.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.112.dist-info/RECORD,,
@@ -1,30 +0,0 @@
1
- from enum import Enum
2
- from typing import List, Tuple
3
-
4
- from pydantic import BaseModel
5
-
6
-
7
- class BboxInput(BaseModel):
8
- image_path: str
9
- labels: List[str]
10
- bboxes: List[Tuple[int, int, int, int]]
11
-
12
-
13
- class BboxInputBase64(BaseModel):
14
- image: str
15
- filename: str
16
- labels: List[str]
17
- bboxes: List[Tuple[int, int, int, int]]
18
-
19
-
20
- class PromptTask(str, Enum):
21
- """
22
- Valid task prompts options for the Florencev2 model.
23
- """
24
-
25
- CAPTION = "<CAPTION>"
26
- """"""
27
- CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
28
- """"""
29
- OBJECT_DETECTION = "<OD>"
30
- """"""
@@ -1,33 +0,0 @@
1
- vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
- vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
3
- vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
4
- vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
5
- vision_agent/agent/vision_agent.py,sha256=4vzKYNoScv_sOZiqefo46iKJNZOtqSFvSJif0zZIdLI,8471
6
- vision_agent/agent/vision_agent_coder.py,sha256=oo3IoRrc-cVdjKq_YsjzkBZNTBtiCTIctGfeC5C7MXM,30926
7
- vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
8
- vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
9
- vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- vision_agent/clients/http.py,sha256=1WMt29F12YFfPH03AttKxnUNXx5sNOD9ZuH4etbB054,1598
11
- vision_agent/clients/landing_public_api.py,sha256=Tjl8uBZWc3dvrCOKg-PCYjw3RC3X5Y6B50kaKn_QzL0,1050
12
- vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
14
- vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
15
- vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
16
- vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
17
- vision_agent/tools/__init__.py,sha256=NDEEOZrwpeNYhUA32bSKXrZ62uEsErb8Vn-70_0Oz1o,2033
18
- vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbnNBs,15170
19
- vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
20
- vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
21
- vision_agent/tools/tool_utils.py,sha256=Y7I4OBW5GwXkHQwlAXqp29WB0OOPQXAMYCAHj_Vh8eQ,5036
22
- vision_agent/tools/tools.py,sha256=IU7jTEJ8NH5zVmFwznOLEmjOBQ7IzBJpanzpqtjoJrY,44876
23
- vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
24
- vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
25
- vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
26
- vision_agent/utils/image_utils.py,sha256=c_g5i_cFC0C-Yw9gU_NaVgQdmBlyumw3bLIDtCU42xo,8200
27
- vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
28
- vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
- vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
30
- vision_agent-0.2.110.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.110.dist-info/METADATA,sha256=Qcxe0Nt5ObGSUmIhqWg8B0FWjw13YW2jKyz21sbzCtI,10732
32
- vision_agent-0.2.110.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.110.dist-info/RECORD,,