supervisely 6.73.343__py3-none-any.whl → 6.73.344__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,11 +8,15 @@ import asyncio
8
8
  import copy
9
9
  import io
10
10
  import json
11
+ import os
12
+ import pickle
11
13
  import re
14
+ import tempfile
12
15
  import urllib.parse
13
16
  from collections import defaultdict
14
17
  from concurrent.futures import ThreadPoolExecutor
15
18
  from contextlib import contextmanager
19
+ from dataclasses import dataclass
16
20
  from datetime import datetime
17
21
  from functools import partial
18
22
  from math import ceil
@@ -51,8 +55,10 @@ from supervisely._utils import (
51
55
  from supervisely.annotation.annotation import Annotation
52
56
  from supervisely.annotation.tag import Tag
53
57
  from supervisely.annotation.tag_meta import TagApplicableTo, TagMeta, TagValueType
58
+ from supervisely.api.dataset_api import DatasetInfo
54
59
  from supervisely.api.entity_annotation.figure_api import FigureApi
55
60
  from supervisely.api.entity_annotation.tag_api import TagApi
61
+ from supervisely.api.file_api import FileInfo
56
62
  from supervisely.api.module_api import (
57
63
  ApiField,
58
64
  RemoveableBulkModuleApi,
@@ -60,6 +66,8 @@ from supervisely.api.module_api import (
60
66
  )
61
67
  from supervisely.imaging import image as sly_image
62
68
  from supervisely.io.fs import (
69
+ OFFSETS_PKL_BATCH_SIZE,
70
+ OFFSETS_PKL_SUFFIX,
63
71
  clean_dir,
64
72
  ensure_base_path,
65
73
  get_file_ext,
@@ -72,6 +80,7 @@ from supervisely.io.fs import (
72
80
  )
73
81
  from supervisely.project.project_meta import ProjectMeta
74
82
  from supervisely.project.project_type import (
83
+ _BLOB_TAG_NAME,
75
84
  _MULTISPECTRAL_TAG_NAME,
76
85
  _MULTIVIEW_TAG_NAME,
77
86
  )
@@ -81,6 +90,181 @@ SUPPORTED_CONFLICT_RESOLUTIONS = ["skip", "rename", "replace"]
81
90
  API_DEFAULT_PER_PAGE = 500
82
91
 
83
92
 
93
+ @dataclass
94
+ class BlobImageInfo:
95
+ """
96
+ Object with image parameters that describes image in blob file.
97
+
98
+ :Example:
99
+
100
+ .. code-block:: python
101
+
102
+ BlobImageInfo(
103
+ name='IMG_3861.jpeg',
104
+ offset_start=0,
105
+ offset_end=148388,
106
+ )
107
+ """
108
+
109
+ name: str
110
+ offset_start: int
111
+ offset_end: int
112
+
113
+ @staticmethod
114
+ def from_image_info(image_info: ImageInfo) -> BlobImageInfo:
115
+ return BlobImageInfo(
116
+ name=image_info.name,
117
+ offset_start=image_info.offset_start,
118
+ offset_end=image_info.offset_end,
119
+ )
120
+
121
+ def add_team_file_id(self, team_file_id: int):
122
+ """
123
+ Add file ID from Team Files to BlobImageInfo object to extend data imported from offsets file.
124
+ This data is used to link offsets with blob file that is already uploaded to Supervisely storage.
125
+ """
126
+ setattr(self, "team_file_id", team_file_id)
127
+ return self
128
+
129
+ def to_dict(self, team_file_id: int = None) -> Dict:
130
+ """
131
+ Create dictionary from BlobImageInfo object that can be used for request to Supervisely API.
132
+ """
133
+ return {
134
+ ApiField.TITLE: self.name,
135
+ ApiField.TEAM_FILE_ID: team_file_id or getattr(self, "team_file_id", None),
136
+ ApiField.SOURCE_BLOB: {
137
+ ApiField.OFFSET_START: self.offset_start,
138
+ ApiField.OFFSET_END: self.offset_end,
139
+ },
140
+ }
141
+
142
+ @staticmethod
143
+ def from_dict(offset_dict: Dict, return_team_file_id: bool = False) -> BlobImageInfo:
144
+ """
145
+ Create BlobImageInfo object from dictionary that is returned by Supervisely API.
146
+
147
+ :param offset_dict: Dictionary with image offsets.
148
+ :type offset_dict: Dict
149
+ :param return_team_file_id: If True, return team file ID.
150
+ Default is False to make size of the object smaller for pickling.
151
+ :type return_team_file_id: bool
152
+ :return: BlobImageInfo object.
153
+ :rtype: BlobImageInfo
154
+ """
155
+ blob_info = BlobImageInfo(
156
+ name=offset_dict[ApiField.TITLE],
157
+ offset_start=offset_dict[ApiField.SOURCE_BLOB][ApiField.OFFSET_START],
158
+ offset_end=offset_dict[ApiField.SOURCE_BLOB][ApiField.OFFSET_END],
159
+ )
160
+ if return_team_file_id:
161
+ blob_info.add_team_file_id(offset_dict[ApiField.TEAM_FILE_ID])
162
+ return blob_info
163
+
164
+ @property
165
+ def offsets_dict(self) -> Dict:
166
+ return {
167
+ ApiField.OFFSET_START: self.offset_start,
168
+ ApiField.OFFSET_END: self.offset_end,
169
+ }
170
+
171
+ @staticmethod
172
+ def load_from_pickle_generator(
173
+ file_path: str, batch_size: int = OFFSETS_PKL_BATCH_SIZE
174
+ ) -> Generator[List["BlobImageInfo"], None, None]:
175
+ """
176
+ Load BlobImageInfo objects from a pickle file in batches of specified size.
177
+ The file should contain a list of BlobImageInfo objects.
178
+
179
+ :param file_path: Path to the pickle file containing BlobImageInfo objects.
180
+ :type file_path: str
181
+ :param batch_size: Size of each batch. Default is 10000.
182
+ :type batch_size: int
183
+ :return: Generator yielding batches of BlobImageInfo objects.
184
+ :rtype: Generator[List[BlobImageInfo], None, None]
185
+ """
186
+ try:
187
+ current_batch = []
188
+
189
+ with open(file_path, "rb") as f:
190
+ while True:
191
+ try:
192
+ # Load one pickle object at a time
193
+ data = pickle.load(f)
194
+
195
+ if isinstance(data, list):
196
+ # More efficient way to process lists
197
+ remaining_items = data
198
+ while remaining_items:
199
+ # Calculate how many more items we need to fill the current batch
200
+ items_needed = batch_size - len(current_batch)
201
+
202
+ if items_needed > 0:
203
+ # Take only what we need from the remaining items
204
+ current_batch.extend(remaining_items[:items_needed])
205
+ remaining_items = remaining_items[items_needed:]
206
+ else:
207
+ # current_batch is already full or overflowing, don't add more items
208
+ # and proceed directly to yielding the batch
209
+ pass
210
+
211
+ # If we have a full batch, yield it
212
+ if len(current_batch) >= batch_size:
213
+ yield current_batch
214
+ current_batch = []
215
+ else:
216
+ # Handle single item
217
+ current_batch.append(data)
218
+
219
+ if len(current_batch) >= batch_size:
220
+ yield current_batch
221
+ current_batch = []
222
+
223
+ except EOFError:
224
+ # End of file reached
225
+ break
226
+ except Exception as e:
227
+ logger.error(f"Error reading pickle data: {str(e)}")
228
+ break
229
+
230
+ # Yield any remaining items in the final batch
231
+ if current_batch:
232
+ yield current_batch
233
+
234
+ except Exception as e:
235
+ logger.error(f"Failed to load BlobImageInfo objects from {file_path}: {str(e)}")
236
+ yield []
237
+
238
+ @staticmethod
239
+ def dump_to_pickle(
240
+ offsets: Union[Generator[List[BlobImageInfo]], List[BlobImageInfo]], file_path: str
241
+ ):
242
+ """
243
+ Dump BlobImageInfo objects to a pickle file in batches.
244
+ To read the data back, use the `load_from_pickle_generator` method.
245
+
246
+ :param offsets: Generator yielding batches of BlobImageInfo objects or a list of BlobImageInfo objects.
247
+ :type offsets: Generator[List[BlobImageInfo]] or List[BlobImageInfo]
248
+ :param file_path: Path to the pickle file.
249
+ :type file_path: str
250
+ """
251
+
252
+ try:
253
+ if isinstance(offsets, Generator):
254
+ with open(file_path, "ab") as f:
255
+ for batch in offsets:
256
+ pickle.dump(batch, f)
257
+ elif isinstance(offsets, list):
258
+ with open(file_path, "ab") as f:
259
+ pickle.dump(offsets, f)
260
+ else:
261
+ raise NotImplementedError(
262
+ f"Invalid type of 'offsets' parameter for 'dump_offsets' method: {type(offsets)}"
263
+ )
264
+ except Exception as e:
265
+ logger.error(f"Failed to dump BlobImageInfo objects to {file_path}: {str(e)}")
266
+
267
+
84
268
  class ImageInfo(NamedTuple):
85
269
  """
86
270
  Object with image parameters from Supervisely.
@@ -108,6 +292,10 @@ class ImageInfo(NamedTuple):
108
292
  full_storage_url='http://app.supervise.ly/h5un6l2bnaz1vj8a9qgms4-public/images/original/7/h/Vo/...jpg'),
109
293
  tags=[],
110
294
  created_by='admin'
295
+ related_data_id=None,
296
+ download_id=None,
297
+ offset_start=None,
298
+ offset_end=None,
111
299
  )
112
300
  """
113
301
 
@@ -174,9 +362,25 @@ class ImageInfo(NamedTuple):
174
362
  #: 'createdAt': '2021-03-05T14:15:39.923Z', 'updatedAt': '2021-03-05T14:15:39.923Z'}, {...}]".
175
363
  tags: List[Dict]
176
364
 
177
- #: :class:`str`: Id of a user who created the image.
365
+ #: :class:`str`: ID of a user who created the image.
178
366
  created_by: str
179
367
 
368
+ #: :class:`int`: ID of the blob file in Supervisely storage related to the image.
369
+ related_data_id: Optional[int] = None
370
+
371
+ #: :class:`str`: Unique ID of the image that links it to the corresponding blob file in Supervisely storage
372
+ #: uses for downloading source blob file.
373
+ download_id: Optional[str] = None
374
+
375
+ #: :class:`int`: Bytes offset of the blob file that points to the start of the image data.
376
+ offset_start: Optional[int] = None
377
+
378
+ #: :class:`int`: Bytes offset of the blob file that points to the end of the image data.
379
+ offset_end: Optional[int] = None
380
+
381
+ # DO NOT DELETE THIS COMMENT
382
+ #! New fields must be added with default values to keep backward compatibility.
383
+
180
384
  @property
181
385
  def preview_url(self):
182
386
  """
@@ -247,6 +451,10 @@ class ImageApi(RemoveableBulkModuleApi):
247
451
  ApiField.FULL_STORAGE_URL,
248
452
  ApiField.TAGS,
249
453
  ApiField.CREATED_BY_ID[0][0],
454
+ ApiField.RELATED_DATA_ID,
455
+ ApiField.DOWNLOAD_ID,
456
+ ApiField.OFFSET_START,
457
+ ApiField.OFFSET_END,
250
458
  ]
251
459
 
252
460
  @staticmethod
@@ -913,7 +1121,7 @@ class ImageApi(RemoveableBulkModuleApi):
913
1121
 
914
1122
  img_bytes = api.image.download_bytes(dataset_id, [770918])
915
1123
  print(img_bytes)
916
- # Output: [b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\...]
1124
+ # Output: [b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\ ...']
917
1125
  """
918
1126
  if len(ids) == 0:
919
1127
  return []
@@ -1841,7 +2049,7 @@ class ImageApi(RemoveableBulkModuleApi):
1841
2049
  metas: Optional[List[Dict]] = None,
1842
2050
  batch_size: Optional[int] = 50,
1843
2051
  force_metadata_for_links: bool = True,
1844
- infos: List[ImageInfo] = None,
2052
+ infos: List[ImageInfo] = None, # deprecated
1845
2053
  skip_validation: Optional[bool] = False,
1846
2054
  conflict_resolution: Optional[Literal["rename", "skip", "replace"]] = None,
1847
2055
  ) -> List[ImageInfo]:
@@ -1866,7 +2074,7 @@ class ImageApi(RemoveableBulkModuleApi):
1866
2074
  :type batch_size: int, optional
1867
2075
  :param force_metadata_for_links: Calculate metadata for links. If False, metadata will be empty.
1868
2076
  :type force_metadata_for_links: bool, optional
1869
- :param infos: List of ImageInfo objects. If None, will be requested from server.
2077
+ :param infos: DEPRECATED: This parameter is not used.
1870
2078
  :type infos: List[ImageInfo], optional
1871
2079
  :param skip_validation: Skips validation for images, can result in invalid images being uploaded.
1872
2080
  :type skip_validation: bool, optional
@@ -1913,59 +2121,290 @@ class ImageApi(RemoveableBulkModuleApi):
1913
2121
  if metas is None:
1914
2122
  metas = [{}] * len(names)
1915
2123
 
1916
- if infos is None:
1917
- infos = self.get_info_by_id_batch(
1918
- ids, force_metadata_for_links=force_metadata_for_links
1919
- )
2124
+ return self._upload_bulk_add(
2125
+ lambda item: (ApiField.IMAGE_ID, item),
2126
+ dataset_id,
2127
+ names,
2128
+ ids,
2129
+ progress_cb,
2130
+ metas=metas,
2131
+ batch_size=batch_size,
2132
+ force_metadata_for_links=force_metadata_for_links,
2133
+ skip_validation=skip_validation,
2134
+ conflict_resolution=conflict_resolution,
2135
+ )
1920
2136
 
1921
- # prev implementation
1922
- # hashes = [info.hash for info in infos]
1923
- # return self.upload_hashes(dataset_id, names, hashes, progress_cb, metas=metas)
1924
-
1925
- links, links_names, links_order, links_metas = [], [], [], []
1926
- hashes, hashes_names, hashes_order, hashes_metas = [], [], [], []
1927
- for idx, (name, info, meta) in enumerate(zip(names, infos, metas)):
1928
- if info.link is not None:
1929
- links.append(info.link)
1930
- links_names.append(name)
1931
- links_order.append(idx)
1932
- links_metas.append(meta)
1933
- else:
1934
- hashes.append(info.hash)
1935
- hashes_names.append(name)
1936
- hashes_order.append(idx)
1937
- hashes_metas.append(meta)
1938
-
1939
- result = [None] * len(names)
1940
- if len(links) > 0:
1941
- res_infos_links = self.upload_links(
1942
- dataset_id,
1943
- links_names,
1944
- links,
1945
- progress_cb,
1946
- metas=links_metas,
1947
- batch_size=batch_size,
1948
- force_metadata_for_links=force_metadata_for_links,
1949
- skip_validation=skip_validation,
1950
- conflict_resolution=conflict_resolution,
2137
+ def upload_by_offsets(
2138
+ self,
2139
+ dataset: Union[DatasetInfo, int],
2140
+ team_file_id: int,
2141
+ names: List[str] = None,
2142
+ offsets: List[dict] = None,
2143
+ progress_cb: Optional[Union[tqdm, Callable]] = None,
2144
+ metas: Optional[List[Dict]] = None,
2145
+ batch_size: Optional[int] = 50,
2146
+ skip_validation: Optional[bool] = False,
2147
+ conflict_resolution: Optional[Literal["rename", "skip", "replace"]] = None,
2148
+ validate_meta: Optional[bool] = False,
2149
+ use_strict_validation: Optional[bool] = False,
2150
+ use_caching_for_validation: Optional[bool] = False,
2151
+ ) -> List[ImageInfo]:
2152
+ """
2153
+ Upload images from blob file in Team Files by offsets to Dataset with prepared names.
2154
+ To upload large number of images, use :func:`api.image.upload_by_offsets_generator` instead.
2155
+
2156
+ If you include `metas` during the upload, you can add a custom sort parameter for images.
2157
+ To achieve this, use the context manager :func:`api.image.add_custom_sort` with the desired key name from the meta dictionary to be used for sorting.
2158
+
2159
+ :param dataset: Dataset ID or DatasetInfo object in Supervisely.
2160
+ :type dataset: Union[DatasetInfo,int]
2161
+ :param team_file_id: ID of the binary file in the team storage.
2162
+ :type team_file_id: int
2163
+ :param names: Images names with extension.
2164
+
2165
+ REQUIRED if there is no file containing offsets in the team storage at the same level as the TAR file.
2166
+ Offset file must be named as the TAR file with the `_offsets.pkl` suffix and must be represented in pickle format.
2167
+ Example: `tar_name_offsets.pkl`
2168
+ :type names: List[str], optional
2169
+ :param offsets: List of dictionaries with file offsets that define the range of bytes representing the image in the binary.
2170
+ Example: `[{"offsetStart": 0, "offsetEnd": 100}, {"offsetStart": 101, "offsetEnd": 200}]`.
2171
+
2172
+ REQUIRED if there is no file containing offsets in the team storage at the same level as the TAR file.
2173
+ Offset file must be named as the TAR file with the `_offsets.pkl` suffix and must be represented in pickle format.
2174
+ Example: `tar_name_offsets.pkl`
2175
+ :type offsets: List[dict], optional
2176
+ :param progress_cb: Function for tracking the progress of uploading.
2177
+ :type progress_cb: tqdm or callable, optional
2178
+ :param metas: Custom additional image infos that contain images technical and/or user-generated data as list of separate dicts.
2179
+ :type metas: List[dict], optional
2180
+ :param batch_size: Number of images to upload in one batch.
2181
+ :type batch_size: int, optional
2182
+ :param skip_validation: Skips validation for images, can result in invalid images being uploaded.
2183
+ :type skip_validation: bool, optional
2184
+ :param conflict_resolution: The strategy to resolve upload conflicts. 'Replace' option will replace the existing images in the dataset with the new images. The images that are being deleted are logged. 'Skip' option will ignore the upload of new images that would result in a conflict. An original image's ImageInfo list will be returned instead. 'Rename' option will rename the new images to prevent any conflict.
2185
+ :type conflict_resolution: Optional[Literal["rename", "skip", "replace"]]
2186
+ :param validate_meta: If True, validates provided meta with saved JSON schema.
2187
+ :type validate_meta: bool, optional
2188
+ :param use_strict_validation: If True, uses strict validation.
2189
+ :type use_strict_validation: bool, optional
2190
+ :param use_caching_for_validation: If True, uses caching for validation.
2191
+ :type use_caching_for_validation: bool, optional
2192
+ :return: List with information about Images. See :class:`info_sequence<info_sequence>`
2193
+ :rtype: :class:`List[ImageInfo]`
2194
+ :Usage example:
2195
+
2196
+ .. code-block:: python
2197
+
2198
+ import supervisely as sly
2199
+ from supervisely.api.module_api import ApiField
2200
+
2201
+
2202
+ server_address = 'https://app.supervisely.com'
2203
+ api_token = 'Your Supervisely API Token'
2204
+ api = sly.Api(server_address, api_token)
2205
+
2206
+ dataset_id = 452984
2207
+ names = ['lemon_1.jpg', 'lemon_1.jpg']
2208
+ offsets = [
2209
+ {ApiField.OFFSET_START: 0, ApiField.OFFSET_END: 100},
2210
+ {ApiField.OFFSET_START: 101, ApiField.OFFSET_END: 200}
2211
+ ]
2212
+ team_file_id = 123456
2213
+ new_imgs_info = api.image.upload_by_offsets(dataset_id, team_file_id, names, offsets, metas)
2214
+
2215
+ # Output example:
2216
+ # ImageInfo(id=136281,
2217
+ # name='lemon_1.jpg',
2218
+ # link=None,
2219
+ # hash=None,
2220
+ # mime=None,
2221
+ # ext=None,
2222
+ # size=100,
2223
+ # width=None,
2224
+ # height=None,
2225
+ # labels_count=0,
2226
+ # dataset_id=452984,
2227
+ # created_at='2025-03-21T18:30:08.551Z',
2228
+ # updated_at='2025-03-21T18:30:08.551Z',
2229
+ # meta={},
2230
+ # path_original='/h5un6l2.../eyJ0eXBlIjoic291cmNlX2Jsb2I...',
2231
+ # full_storage_url='http://storage:port/h5un6l2...,
2232
+ # tags=[],
2233
+ # created_by_id=user),
2234
+ # ImageInfo(...)
2235
+ """
2236
+
2237
+ if isinstance(dataset, int):
2238
+ dataset = self._api.dataset.get_info_by_id(dataset)
2239
+
2240
+ items = []
2241
+ if len(names) != len(offsets):
2242
+ raise ValueError(
2243
+ f"The number of images in the offset file does not match the number of offsets: {len(names)} != {len(offsets)}"
1951
2244
  )
1952
- for info, pos in zip(res_infos_links, links_order):
1953
- result[pos] = info
2245
+ for offset in offsets:
2246
+ if not isinstance(offset, dict):
2247
+ raise ValueError("Offset should be a dictionary")
2248
+ if ApiField.OFFSET_START not in offset or ApiField.OFFSET_END not in offset:
2249
+ raise ValueError(
2250
+ f"Offset should contain '{ApiField.OFFSET_START}' and '{ApiField.OFFSET_END}' keys"
2251
+ )
1954
2252
 
1955
- if len(hashes) > 0:
1956
- res_infos_hashes = self.upload_hashes(
1957
- dataset_id,
1958
- hashes_names,
1959
- hashes,
1960
- progress_cb,
1961
- metas=hashes_metas,
2253
+ items.append({ApiField.TEAM_FILE_ID: team_file_id, ApiField.SOURCE_BLOB: offset})
2254
+
2255
+ custom_data = self._api.project.get_custom_data(dataset.project_id)
2256
+ custom_data[_BLOB_TAG_NAME] = True
2257
+ self._api.project.update_custom_data(dataset.project_id, custom_data)
2258
+
2259
+ return self._upload_bulk_add(
2260
+ func_item_to_kv=lambda image_data, item: {**image_data, **item},
2261
+ dataset_id=dataset.id,
2262
+ names=names,
2263
+ items=items,
2264
+ progress_cb=progress_cb,
2265
+ metas=metas,
2266
+ batch_size=batch_size,
2267
+ skip_validation=skip_validation,
2268
+ conflict_resolution=conflict_resolution,
2269
+ validate_meta=validate_meta,
2270
+ use_strict_validation=use_strict_validation,
2271
+ use_caching_for_validation=use_caching_for_validation,
2272
+ )
2273
+
2274
+ def upload_by_offsets_generator(
2275
+ self,
2276
+ dataset: Union[DatasetInfo, int],
2277
+ team_file_id: int,
2278
+ offsets_file_path: Optional[str] = None,
2279
+ progress_cb: Optional[Union[tqdm, Callable]] = None,
2280
+ metas: Optional[Dict] = None,
2281
+ batch_size: Optional[int] = 10000,
2282
+ skip_validation: Optional[bool] = False,
2283
+ conflict_resolution: Optional[Literal["rename", "skip", "replace"]] = None,
2284
+ validate_meta: Optional[bool] = False,
2285
+ use_strict_validation: Optional[bool] = False,
2286
+ use_caching_for_validation: Optional[bool] = False,
2287
+ ) -> Generator[ImageInfo, None, None]:
2288
+ """
2289
+ Upload images from blob file in Team Files by offsets to Dataset.
2290
+ Generates information about uploaded images in batches of max size 10000.
2291
+ File names will be taken from the offset file.
2292
+
2293
+ This method is better suited for large datasets, as it does not require resulting all the images into memory at once.
2294
+
2295
+ If you include `metas` during the upload, you can add a custom sort parameter for images.
2296
+ To achieve this, use the context manager :func:`api.image.add_custom_sort` with the desired key name from the meta dictionary to be used for sorting.
2297
+
2298
+ :param dataset: Dataset ID or DatasetInfo object in Supervisely.
2299
+ :type dataset: Union[DatasetInfo,int]
2300
+ :param team_file_id: ID of the binary file in the team storage.
2301
+ :type team_file_id: int
2302
+ :param offsets_file_path: Local path to the file with blob images offsets.
2303
+ :type offsets_file_path: str, optional
2304
+ :param progress_cb: Function for tracking the progress of uploading.
2305
+ :type progress_cb: tqdm or callable, optional
2306
+ :param metas: Custom additional image infos as dict where:
2307
+ `keys` - image names,
2308
+ `values` - image technical and/or user-generated data dicts
2309
+ :type metas: Dict, optional
2310
+ :param batch_size: Number of images to upload in one batch.
2311
+ :type batch_size: int, optional
2312
+ :param skip_validation: Skips validation for images, can result in invalid images being uploaded.
2313
+ :type skip_validation: bool, optional
2314
+ :param conflict_resolution: The strategy to resolve upload conflicts. 'Replace' option will replace the existing images in the dataset with the new images. The images that are being deleted are logged. 'Skip' option will ignore the upload of new images that would result in a conflict. An original image's ImageInfo list will be returned instead. 'Rename' option will rename the new images to prevent any conflict.
2315
+ :type conflict_resolution: Optional[Literal["rename", "skip", "replace"]]
2316
+ :param validate_meta: If True, validates provided meta with saved JSON schema.
2317
+ :type validate_meta: bool, optional
2318
+ :param use_strict_validation: If True, uses strict validation.
2319
+ :type use_strict_validation: bool, optional
2320
+ :param use_caching_for_validation: If True, uses caching for validation.
2321
+ :type use_caching_for_validation: bool, optional
2322
+ :return: Generator with information about Images. See :class:`ImageInfo`
2323
+ :rtype: :class:`Generator[ImageInfo, None, None]`
2324
+ :Usage example:
2325
+
2326
+ .. code-block:: python
2327
+
2328
+ import supervisely as sly
2329
+ from supervisely.api.module_api import ApiField
2330
+
2331
+
2332
+ server_address = 'https://app.supervisely.com'
2333
+ api_token = 'Your Supervisely API Token'
2334
+ api = sly.Api(server_address, api_token)
2335
+
2336
+ dataset_id = 452984
2337
+ team_file_id = 123456
2338
+ img_infos = []
2339
+ new_imgs_info_generator = api.image.upload_by_offsets_generator(dataset_id, team_file_id)
2340
+ for img_infos_batch in new_imgs_info_generator:
2341
+ img_infos.extend(img_infos_batch)
2342
+ """
2343
+
2344
+ if isinstance(dataset, int):
2345
+ dataset = self._api.dataset.get_info_by_id(dataset)
2346
+
2347
+ if offsets_file_path is None:
2348
+ offsets_file_path = self.get_blob_offsets_file(team_file_id)
2349
+ blob_image_infos_generator = BlobImageInfo.load_from_pickle_generator(
2350
+ offsets_file_path, OFFSETS_PKL_BATCH_SIZE
2351
+ )
2352
+
2353
+ for batch in blob_image_infos_generator:
2354
+ names = [item.name for item in batch]
2355
+ metas_batch = [metas[name] for name in names] if metas is not None else [{}] * len(names)
2356
+ items = [
2357
+ {ApiField.TEAM_FILE_ID: team_file_id, ApiField.SOURCE_BLOB: item.offsets_dict}
2358
+ for item in batch
2359
+ ]
2360
+ yield self._upload_bulk_add(
2361
+ func_item_to_kv=lambda image_data, item: {**image_data, **item},
2362
+ dataset_id=dataset.id,
2363
+ names=names,
2364
+ items=items,
2365
+ progress_cb=progress_cb,
2366
+ metas=metas_batch,
1962
2367
  batch_size=batch_size,
1963
2368
  skip_validation=skip_validation,
1964
2369
  conflict_resolution=conflict_resolution,
2370
+ validate_meta=validate_meta,
2371
+ use_strict_validation=use_strict_validation,
2372
+ use_caching_for_validation=use_caching_for_validation,
1965
2373
  )
1966
- for info, pos in zip(res_infos_hashes, hashes_order):
1967
- result[pos] = info
1968
- return result
2374
+ custom_data = self._api.project.get_custom_data(dataset.project_id)
2375
+ custom_data[_BLOB_TAG_NAME] = True
2376
+ self._api.project.update_custom_data(dataset.project_id, custom_data)
2377
+
2378
+ def get_blob_offsets_file(
2379
+ self,
2380
+ team_file_id: int,
2381
+ progress_cb: Optional[Union[tqdm, Callable]] = None,
2382
+ ) -> str:
2383
+ """
2384
+ Get file with blob images offsets from the team storage.
2385
+
2386
+ :param team_file_id: ID of the binary file in the team storage.
2387
+ :type team_file_id: int
2388
+ :param progress_cb: Function for tracking the progress of downloading.
2389
+ :type progress_cb: tqdm or callable, optional
2390
+ :return: Path to the file with blob images offsets in temporary directory.
2391
+ :rtype: str
2392
+
2393
+ """
2394
+ file_info = self._api.file.get_info_by_id(team_file_id)
2395
+ if file_info is None:
2396
+ raise ValueError(f"Blob file ID: {team_file_id} with images not found")
2397
+ offset_file_name = Path(file_info.path).stem + OFFSETS_PKL_SUFFIX
2398
+ offset_file_path = os.path.join(Path(file_info.path).parent, offset_file_name)
2399
+ temp_dir = tempfile.mkdtemp()
2400
+ local_offset_file_path = os.path.join(temp_dir, offset_file_name)
2401
+ self._api.file.download(
2402
+ team_id=file_info.team_id,
2403
+ remote_path=offset_file_path,
2404
+ local_save_path=local_offset_file_path,
2405
+ progress_cb=progress_cb,
2406
+ )
2407
+ return local_offset_file_path
1969
2408
 
1970
2409
  def _upload_bulk_add(
1971
2410
  self,
@@ -2036,8 +2475,17 @@ class ImageApi(RemoveableBulkModuleApi):
2036
2475
  def _pack_for_request(names: List[str], items: List[Any], metas: List[Dict]) -> List[Any]:
2037
2476
  images = []
2038
2477
  for name, item, meta in zip(names, items, metas):
2039
- item_tuple = func_item_to_kv(item)
2040
- image_data = {ApiField.TITLE: name, item_tuple[0]: item_tuple[1]}
2478
+ image_data = {ApiField.TITLE: name}
2479
+ # Check if the item is a data format for upload by offset
2480
+ if (
2481
+ isinstance(item, dict)
2482
+ and ApiField.TEAM_FILE_ID in item
2483
+ and ApiField.SOURCE_BLOB in item
2484
+ ):
2485
+ image_data = func_item_to_kv(image_data, item)
2486
+ else:
2487
+ item_tuple = func_item_to_kv(item)
2488
+ image_data[item_tuple[0]] = item_tuple[1]
2041
2489
  if hasattr(self, "sort_by") and self.sort_by is not None:
2042
2490
  meta = self._add_custom_sort(meta, name)
2043
2491
  if len(meta) != 0 and type(meta) == dict:
@@ -4615,3 +5063,243 @@ class ImageApi(RemoveableBulkModuleApi):
4615
5063
  meta_copy = copy.deepcopy(meta)
4616
5064
  meta_copy[ApiField.CUSTOM_SORT] = custom_sort
4617
5065
  return meta_copy
5066
+
5067
+ def download_blob_file(
5068
+ self,
5069
+ project_id: int,
5070
+ download_id: str,
5071
+ path: Optional[str] = None,
5072
+ log_progress: bool = True,
5073
+ chunk_size: Optional[int] = None,
5074
+ ) -> Optional[bytes]:
5075
+ """
5076
+ Downloads blob file from Supervisely storage by download ID of any Image that belongs to this file.
5077
+
5078
+ :param project_id: Project ID in Supervisely.
5079
+ :type project_id: int
5080
+ :param download_id: Download ID of any Image that belongs to the blob file in Supervisely storage.
5081
+ :type download_id: str
5082
+ :param path: Path to save the blob file. If None, returns blob file content as bytes.
5083
+ :type path: str, optional
5084
+ :param progress_cb: Function for tracking download progress.
5085
+ :type progress_cb: tqdm or callable, optional
5086
+ :param chunk_size: Size of chunk for streaming. Default is 8 MB.
5087
+ :type chunk_size: int, optional
5088
+ :return: Blob file content if path is None, otherwise None.
5089
+ :rtype: bytes or None
5090
+
5091
+ :Usage example:
5092
+
5093
+ .. code-block:: python
5094
+
5095
+
5096
+ api = sly.Api.from_env()
5097
+
5098
+
5099
+ image_id = 6789
5100
+ image_info = api.image.get_info_by_id(image_id)
5101
+ project_id = api.dataset.get_info_by_id(image_info.dataset_id).project_id
5102
+
5103
+ # Download and save to file
5104
+ api.image.download_blob_file(project_id, image_info.download_id, "/path/to/save/archive.tar")
5105
+
5106
+ # Get archive as bytes
5107
+ archive_bytes = api.image.download_blob_file(project_id, image_info.download_id)
5108
+ """
5109
+ if chunk_size is None:
5110
+ chunk_size = 8 * 1024 * 1024
5111
+
5112
+ response = self._api.post(
5113
+ "images.data.download",
5114
+ {ApiField.PROJECT_ID: project_id, ApiField.DOWNLOAD_ID: download_id},
5115
+ stream=True,
5116
+ )
5117
+
5118
+ if log_progress:
5119
+ total_size = int(response.headers.get("Content-Length", 0))
5120
+ progress_cb = tqdm(
5121
+ total=total_size,
5122
+ unit="B",
5123
+ unit_scale=True,
5124
+ desc="Downloading images blob file",
5125
+ leave=True,
5126
+ )
5127
+ if path is not None:
5128
+ ensure_base_path(path)
5129
+ with open(path, "wb") as fd:
5130
+ for chunk in response.iter_content(chunk_size=chunk_size):
5131
+ fd.write(chunk)
5132
+ if log_progress:
5133
+ progress_cb.update(len(chunk))
5134
+ return None
5135
+ else:
5136
+ content = response.content
5137
+ if log_progress:
5138
+ progress_cb.update(len(content))
5139
+ return content
5140
+
5141
+ def upload_blob_images(
5142
+ self,
5143
+ dataset: Union[DatasetInfo, int],
5144
+ blob_file: Union[FileInfo, str],
5145
+ metas: Optional[List[Dict[str, Any]]] = None,
5146
+ change_name_if_conflict: bool = True,
5147
+ progress_cb: Optional[Union[tqdm, Callable]] = None,
5148
+ return_image_infos_generator: bool = False,
5149
+ ) -> Union[Generator[ImageInfo, None], None]:
5150
+ """
5151
+ Uploads images from blob file in Team Files to dataset.
5152
+
5153
+ IMPORTANT: File with image offsets should be in the same directory as the blob file.
5154
+ This file should be named as the blob file but with the suffix `_offsets.pkl`.
5155
+ It must be a Pickle file with the BlobImageInfos that define the range of bytes representing the image in the binary.
5156
+ To prepare the offsets file, use the `supervisely.fs.save_blob_offsets_pkl` function.
5157
+
5158
+ :param dataset: Dataset in Supervisely. Can be DatasetInfo object or dataset ID.
5159
+ It is recommended to use DatasetInfo object to avoid additional API requests.
5160
+ :type dataset: Union[DatasetInfo, int]
5161
+ :param blob_file: Blob file in Team Files. Can be FileInfo object or path to blob file.
5162
+ It is recommended to use FileInfo object to avoid additional API requests.
5163
+ :type blob_file: Union[FileInfo, str]
5164
+ :param metas: List of metas for images.
5165
+ :type metas: Optional[List[Dict[str, Any]], optional
5166
+ :param change_name_if_conflict: If True adds suffix to the end of Image name when Dataset already contains an Image with identical name, If False and images with the identical names already exist in Dataset skips them.
5167
+ :type change_name_if_conflict: bool, optional
5168
+ :param progress_cb: Function for tracking upload progress. Tracks the count of processed items.
5169
+ :type progress_cb: Optional[Union[tqdm, Callable]]
5170
+ :param return_image_infos_generator: If True, returns generator of ImageInfo objects. Otherwise, returns None.
5171
+ :type return_image_infos_generator: bool, optional
5172
+
5173
+ :return: Generator of ImageInfo objects if return_image_infos_generator is True, otherwise None.
5174
+ :rtype: Union[Generator[ImageInfo, None], None]
5175
+
5176
+
5177
+ """
5178
+ if isinstance(dataset, int):
5179
+ dataset_id = dataset
5180
+ dataset_info = self._api.dataset.get_info_by_id(dataset_id)
5181
+ else:
5182
+ dataset_id = dataset.id
5183
+ dataset_info = dataset
5184
+
5185
+ if isinstance(blob_file, str):
5186
+ team_file_info = self._api.file.get_info_by_path(dataset_info.team_id, blob_file)
5187
+ else:
5188
+ team_file_info = blob_file
5189
+
5190
+ image_infos_generator = self.upload_by_offsets_generator(
5191
+ dataset=dataset_info,
5192
+ team_file_id=team_file_info.id,
5193
+ progress_cb=progress_cb,
5194
+ metas=metas,
5195
+ conflict_resolution="rename" if change_name_if_conflict else "skip",
5196
+ )
5197
+ if return_image_infos_generator:
5198
+ return image_infos_generator
5199
+ else:
5200
+ for _ in image_infos_generator:
5201
+ pass
5202
+
5203
+ async def download_blob_file_async(
5204
+ self,
5205
+ project_id: int,
5206
+ download_id: str,
5207
+ path: str,
5208
+ semaphore: Optional[asyncio.Semaphore] = None,
5209
+ log_progress: bool = True,
5210
+ progress_cb: Optional[Union[tqdm, Callable]] = None,
5211
+ ):
5212
+ """
5213
+ Downloads blob file from Supervisely storage by download ID asynchronously.
5214
+
5215
+ :param project_id: Project ID in Supervisely.
5216
+ :type project_id: int
5217
+ :param download_id: Download ID of any Image that belongs to the blob file in Supervisely storage.
5218
+ :type download_id: str
5219
+ :param path: Path to save the blob file.
5220
+ :type path: str
5221
+ :param semaphore: Semaphore for limiting the number of simultaneous downloads.
5222
+ :type semaphore: asyncio.Semaphore, optional
5223
+ :param log_progress: If True, shows progress bar.
5224
+ :type log_progress: bool, optional
5225
+ :param progress_cb: Function for tracking download progress.
5226
+ :type progress_cb: tqdm or callable, optional
5227
+ """
5228
+ api_method_name = "images.data.download"
5229
+
5230
+ if semaphore is None:
5231
+ semaphore = self._api.get_default_semaphore()
5232
+
5233
+ async with semaphore:
5234
+ ensure_base_path(path)
5235
+
5236
+ if log_progress:
5237
+ response = self._api.get(
5238
+ api_method_name,
5239
+ {ApiField.PROJECT_ID: project_id, ApiField.DOWNLOAD_ID: download_id},
5240
+ stream=True,
5241
+ )
5242
+ total_size = int(response.headers.get("Content-Length", 0))
5243
+ response.close()
5244
+ name = os.path.basename(path)
5245
+ if progress_cb is None:
5246
+ progress_cb = tqdm(
5247
+ total=total_size,
5248
+ unit="B",
5249
+ unit_scale=True,
5250
+ desc=f"Downloading images blob file {name}",
5251
+ leave=True,
5252
+ )
5253
+
5254
+ async with aiofiles.open(path, "wb") as fd:
5255
+ async for chunk, _ in self._api.stream_async(
5256
+ method=api_method_name,
5257
+ method_type="POST",
5258
+ data={ApiField.PROJECT_ID: project_id, ApiField.DOWNLOAD_ID: download_id},
5259
+ chunk_size=8 * 1024 * 1024,
5260
+ ):
5261
+ if log_progress:
5262
+ progress_cb.update(len(chunk))
5263
+ await fd.write(chunk)
5264
+
5265
+ async def download_blob_files_async(
5266
+ self,
5267
+ project_id: int,
5268
+ download_ids: List[str],
5269
+ paths: List[str],
5270
+ semaphore: Optional[asyncio.Semaphore] = None,
5271
+ log_progress: bool = True,
5272
+ progress_cb: Optional[Union[tqdm, Callable]] = None,
5273
+ ):
5274
+ """
5275
+ Downloads multiple blob files from Supervisely storage by download IDs asynchronously.
5276
+
5277
+ :param project_id: Project ID in Supervisely.
5278
+ :type project_id: int
5279
+ :param download_ids: List of download IDs of any Image that belongs to the blob files in Supervisely storage.
5280
+ :type download_ids: List[str]
5281
+ :param paths: List of paths to save the blob files.
5282
+ :type paths: List[str]
5283
+ :param semaphore: Semaphore for limiting the number of simultaneous downloads.
5284
+ :type semaphore: asyncio.Semaphore, optional
5285
+ :param log_progress: If True, shows progress bar.
5286
+ :type log_progress: bool, optional
5287
+ :param progress_cb: Function for tracking download progress.
5288
+ :type progress_cb: tqdm or callable, optional
5289
+ """
5290
+
5291
+ if semaphore is None:
5292
+ semaphore = self._api.get_default_semaphore()
5293
+
5294
+ tasks = []
5295
+ for download_id, path in zip(download_ids, paths):
5296
+ task = self.download_blob_file_async(
5297
+ project_id=project_id,
5298
+ download_id=download_id,
5299
+ path=path,
5300
+ semaphore=semaphore,
5301
+ log_progress=log_progress,
5302
+ progress_cb=progress_cb,
5303
+ )
5304
+ tasks.append(task)
5305
+ await asyncio.gather(*tasks)