PyPI - supervisely - Versions diffs - 6.73.342__py3-none-any.whl → 6.73.344__py3-none-any.whl - Mend

supervisely 6.73.342py3-none-any.whl → 6.73.344py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

supervisely/__init__.py +3 -2
supervisely/_utils.py +33 -1
supervisely/api/annotation_api.py +369 -2
supervisely/api/api.py +14 -5
supervisely/api/dataset_api.py +177 -1
supervisely/api/entity_annotation/figure_api.py +84 -0
supervisely/api/file_api.py +2 -2
supervisely/api/image_api.py +740 -52
supervisely/api/module_api.py +11 -0
supervisely/api/project_api.py +6 -1
supervisely/convert/converter.py +4 -0
supervisely/convert/image/image_helper.py +2 -2
supervisely/convert/image/sly/sly_image_converter.py +30 -1
supervisely/io/fs.py +238 -4
supervisely/project/download.py +5 -16
supervisely/project/project.py +636 -75
supervisely/project/project_type.py +2 -0
supervisely/project/readme_template.md +19 -13
{supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/METADATA +1 -1
{supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/RECORD +24 -24
{supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/LICENSE +0 -0
{supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/WHEEL +0 -0
{supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/entry_points.txt +0 -0
{supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/top_level.txt +0 -0

supervisely/api/image_api.py CHANGED Viewed

@@ -8,11 +8,15 @@ import asyncio
 import copy
 import io
 import json
+import os
+import pickle
 import re
+import tempfile
 import urllib.parse
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor
 from contextlib import contextmanager
+from dataclasses import dataclass
 from datetime import datetime
 from functools import partial
 from math import ceil
@@ -51,8 +55,10 @@ from supervisely._utils import (
 from supervisely.annotation.annotation import Annotation
 from supervisely.annotation.tag import Tag
 from supervisely.annotation.tag_meta import TagApplicableTo, TagMeta, TagValueType
+from supervisely.api.dataset_api import DatasetInfo
 from supervisely.api.entity_annotation.figure_api import FigureApi
 from supervisely.api.entity_annotation.tag_api import TagApi
+from supervisely.api.file_api import FileInfo
 from supervisely.api.module_api import (
     ApiField,
     RemoveableBulkModuleApi,
@@ -60,6 +66,8 @@ from supervisely.api.module_api import (
 )
 from supervisely.imaging import image as sly_image
 from supervisely.io.fs import (
+    OFFSETS_PKL_BATCH_SIZE,
+    OFFSETS_PKL_SUFFIX,
     clean_dir,
     ensure_base_path,
     get_file_ext,
@@ -72,6 +80,7 @@ from supervisely.io.fs import (
 )
 from supervisely.project.project_meta import ProjectMeta
 from supervisely.project.project_type import (
+    _BLOB_TAG_NAME,
     _MULTISPECTRAL_TAG_NAME,
     _MULTIVIEW_TAG_NAME,
 )
@@ -81,6 +90,181 @@ SUPPORTED_CONFLICT_RESOLUTIONS = ["skip", "rename", "replace"]
 API_DEFAULT_PER_PAGE = 500
+@dataclass
+class BlobImageInfo:
+    """
+    Object with image parameters that describes image in blob file.
+    :Example:
+     .. code-block:: python
+        BlobImageInfo(
+            name='IMG_3861.jpeg',
+            offset_start=0,
+            offset_end=148388,
+        )
+    """
+    name: str
+    offset_start: int
+    offset_end: int
+    @staticmethod
+    def from_image_info(image_info: ImageInfo) -> BlobImageInfo:
+        return BlobImageInfo(
+            name=image_info.name,
+            offset_start=image_info.offset_start,
+            offset_end=image_info.offset_end,
+        )
+    def add_team_file_id(self, team_file_id: int):
+        """
+        Add file ID from Team Files to BlobImageInfo object to extend data imported from offsets file.
+        This data is used to link offsets with blob file that is already uploaded to Supervisely storage.
+        """
+        setattr(self, "team_file_id", team_file_id)
+        return self
+    def to_dict(self, team_file_id: int = None) -> Dict:
+        """
+        Create dictionary from BlobImageInfo object that can be used for request to Supervisely API.
+        """
+        return {
+            ApiField.TITLE: self.name,
+            ApiField.TEAM_FILE_ID: team_file_id or getattr(self, "team_file_id", None),
+            ApiField.SOURCE_BLOB: {
+                ApiField.OFFSET_START: self.offset_start,
+                ApiField.OFFSET_END: self.offset_end,
+            },
+        }
+    @staticmethod
+    def from_dict(offset_dict: Dict, return_team_file_id: bool = False) -> BlobImageInfo:
+        """
+        Create BlobImageInfo object from dictionary that is returned by Supervisely API.
+        :param offset_dict: Dictionary with image offsets.
+        :type offset_dict: Dict
+        :param return_team_file_id: If True, return team file ID.
+                                    Default is False to make size of the object smaller for pickling.
+        :type return_team_file_id: bool
+        :return: BlobImageInfo object.
+        :rtype: BlobImageInfo
+        """
+        blob_info = BlobImageInfo(
+            name=offset_dict[ApiField.TITLE],
+            offset_start=offset_dict[ApiField.SOURCE_BLOB][ApiField.OFFSET_START],
+            offset_end=offset_dict[ApiField.SOURCE_BLOB][ApiField.OFFSET_END],
+        )
+        if return_team_file_id:
+            blob_info.add_team_file_id(offset_dict[ApiField.TEAM_FILE_ID])
+        return blob_info
+    @property
+    def offsets_dict(self) -> Dict:
+        return {
+            ApiField.OFFSET_START: self.offset_start,
+            ApiField.OFFSET_END: self.offset_end,
+        }
+    @staticmethod
+    def load_from_pickle_generator(
+        file_path: str, batch_size: int = OFFSETS_PKL_BATCH_SIZE
+    ) -> Generator[List["BlobImageInfo"], None, None]:
+        """
+        Load BlobImageInfo objects from a pickle file in batches of specified size.
+        The file should contain a list of BlobImageInfo objects.
+        :param file_path: Path to the pickle file containing BlobImageInfo objects.
+        :type file_path: str
+        :param batch_size: Size of each batch. Default is 10000.
+        :type batch_size: int
+        :return: Generator yielding batches of BlobImageInfo objects.
+        :rtype: Generator[List[BlobImageInfo], None, None]
+        """
+        try:
+            current_batch = []
+            with open(file_path, "rb") as f:
+                while True:
+                    try:
+                        # Load one pickle object at a time
+                        data = pickle.load(f)
+                        if isinstance(data, list):
+                            # More efficient way to process lists
+                            remaining_items = data
+                            while remaining_items:
+                                # Calculate how many more items we need to fill the current batch
+                                items_needed = batch_size - len(current_batch)
+                                if items_needed > 0:
+                                    # Take only what we need from the remaining items
+                                    current_batch.extend(remaining_items[:items_needed])
+                                    remaining_items = remaining_items[items_needed:]
+                                else:
+                                    # current_batch is already full or overflowing, don't add more items
+                                    # and proceed directly to yielding the batch
+                                    pass
+                                # If we have a full batch, yield it
+                                if len(current_batch) >= batch_size:
+                                    yield current_batch
+                                    current_batch = []
+                        else:
+                            # Handle single item
+                            current_batch.append(data)
+                            if len(current_batch) >= batch_size:
+                                yield current_batch
+                                current_batch = []
+                    except EOFError:
+                        # End of file reached
+                        break
+                    except Exception as e:
+                        logger.error(f"Error reading pickle data: {str(e)}")
+                        break
+            # Yield any remaining items in the final batch
+            if current_batch:
+                yield current_batch
+        except Exception as e:
+            logger.error(f"Failed to load BlobImageInfo objects from {file_path}: {str(e)}")
+            yield []
+    @staticmethod
+    def dump_to_pickle(
+        offsets: Union[Generator[List[BlobImageInfo]], List[BlobImageInfo]], file_path: str
+    ):
+        """
+        Dump BlobImageInfo objects to a pickle file in batches.
+        To read the data back, use the `load_from_pickle_generator` method.
+        :param offsets: Generator yielding batches of BlobImageInfo objects or a list of BlobImageInfo objects.
+        :type offsets: Generator[List[BlobImageInfo]] or List[BlobImageInfo]
+        :param file_path: Path to the pickle file.
+        :type file_path: str
+        """
+        try:
+            if isinstance(offsets, Generator):
+                with open(file_path, "ab") as f:
+                    for batch in offsets:
+                        pickle.dump(batch, f)
+            elif isinstance(offsets, list):
+                with open(file_path, "ab") as f:
+                    pickle.dump(offsets, f)
+            else:
+                raise NotImplementedError(
+                    f"Invalid type of 'offsets' parameter for 'dump_offsets' method: {type(offsets)}"
+                )
+        except Exception as e:
+            logger.error(f"Failed to dump BlobImageInfo objects to {file_path}: {str(e)}")
 class ImageInfo(NamedTuple):
     """
     Object with image parameters from Supervisely.
@@ -108,6 +292,10 @@ class ImageInfo(NamedTuple):
             full_storage_url='http://app.supervise.ly/h5un6l2bnaz1vj8a9qgms4-public/images/original/7/h/Vo/...jpg'),
             tags=[],
             created_by='admin'
+            related_data_id=None,
+            download_id=None,
+            offset_start=None,
+            offset_end=None,
         )
     """
@@ -174,9 +362,25 @@ class ImageInfo(NamedTuple):
     #: 'createdAt': '2021-03-05T14:15:39.923Z', 'updatedAt': '2021-03-05T14:15:39.923Z'}, {...}]".
     tags: List[Dict]
-    #: :class:`str`: Id of a user who created the image.
+    #: :class:`str`: ID of a user who created the image.
     created_by: str
+    #: :class:`int`: ID of the blob file in Supervisely storage related to the image.
+    related_data_id: Optional[int] = None
+    #: :class:`str`: Unique ID of the image that links it to the corresponding blob file in Supervisely storage
+    #: uses for downloading source blob file.
+    download_id: Optional[str] = None
+    #: :class:`int`: Bytes offset of the blob file that points to the start of the image data.
+    offset_start: Optional[int] = None
+    #: :class:`int`: Bytes offset of the blob file that points to the end of the image data.
+    offset_end: Optional[int] = None
+    # DO NOT DELETE THIS COMMENT
+    #! New fields must be added with default values to keep backward compatibility.
     @property
     def preview_url(self):
         """
@@ -247,6 +451,10 @@ class ImageApi(RemoveableBulkModuleApi):
             ApiField.FULL_STORAGE_URL,
             ApiField.TAGS,
             ApiField.CREATED_BY_ID[0][0],
+            ApiField.RELATED_DATA_ID,
+            ApiField.DOWNLOAD_ID,
+            ApiField.OFFSET_START,
+            ApiField.OFFSET_END,
         ]
     @staticmethod
@@ -913,7 +1121,7 @@ class ImageApi(RemoveableBulkModuleApi):
             img_bytes = api.image.download_bytes(dataset_id, [770918])
             print(img_bytes)
-            # Output: [b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\...]
+            # Output: [b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\ ...']
         """
         if len(ids) == 0:
             return []
@@ -1841,7 +2049,7 @@ class ImageApi(RemoveableBulkModuleApi):
         metas: Optional[List[Dict]] = None,
         batch_size: Optional[int] = 50,
         force_metadata_for_links: bool = True,
-        infos: List[ImageInfo] = None,
+        infos: List[ImageInfo] = None,  # deprecated
         skip_validation: Optional[bool] = False,
         conflict_resolution: Optional[Literal["rename", "skip", "replace"]] = None,
     ) -> List[ImageInfo]:
@@ -1866,7 +2074,7 @@ class ImageApi(RemoveableBulkModuleApi):
         :type batch_size: int, optional
         :param force_metadata_for_links: Calculate metadata for links. If False, metadata will be empty.
         :type force_metadata_for_links: bool, optional
-        :param infos: List of ImageInfo objects. If None, will be requested from server.
+        :param infos: DEPRECATED: This parameter is not used.
         :type infos: List[ImageInfo], optional
         :param skip_validation: Skips validation for images, can result in invalid images being uploaded.
         :type skip_validation: bool, optional
@@ -1913,59 +2121,290 @@ class ImageApi(RemoveableBulkModuleApi):
         if metas is None:
             metas = [{}] * len(names)
-        if infos is None:
-            infos = self.get_info_by_id_batch(
-                ids, force_metadata_for_links=force_metadata_for_links
-            )
+        return self._upload_bulk_add(
+            lambda item: (ApiField.IMAGE_ID, item),
+            dataset_id,
+            names,
+            ids,
+            progress_cb,
+            metas=metas,
+            batch_size=batch_size,
+            force_metadata_for_links=force_metadata_for_links,
+            skip_validation=skip_validation,
+            conflict_resolution=conflict_resolution,
+        )
-        # prev implementation
-        # hashes = [info.hash for info in infos]
-        # return self.upload_hashes(dataset_id, names, hashes, progress_cb, metas=metas)
-        links, links_names, links_order, links_metas = [], [], [], []
-        hashes, hashes_names, hashes_order, hashes_metas = [], [], [], []
-        for idx, (name, info, meta) in enumerate(zip(names, infos, metas)):
-            if info.link is not None:
-                links.append(info.link)
-                links_names.append(name)
-                links_order.append(idx)
-                links_metas.append(meta)
-            else:
-                hashes.append(info.hash)
-                hashes_names.append(name)
-                hashes_order.append(idx)
-                hashes_metas.append(meta)
-        result = [None] * len(names)
-        if len(links) > 0:
-            res_infos_links = self.upload_links(
-                dataset_id,
-                links_names,
-                links,
-                progress_cb,
-                metas=links_metas,
-                batch_size=batch_size,
-                force_metadata_for_links=force_metadata_for_links,
-                skip_validation=skip_validation,
-                conflict_resolution=conflict_resolution,
+    def upload_by_offsets(
+        self,
+        dataset: Union[DatasetInfo, int],
+        team_file_id: int,
+        names: List[str] = None,
+        offsets: List[dict] = None,
+        progress_cb: Optional[Union[tqdm, Callable]] = None,
+        metas: Optional[List[Dict]] = None,
+        batch_size: Optional[int] = 50,
+        skip_validation: Optional[bool] = False,
+        conflict_resolution: Optional[Literal["rename", "skip", "replace"]] = None,
+        validate_meta: Optional[bool] = False,
+        use_strict_validation: Optional[bool] = False,
+        use_caching_for_validation: Optional[bool] = False,
+    ) -> List[ImageInfo]:
+        """
+        Upload images from blob file in Team Files by offsets to Dataset with prepared names.
+        To upload large number of images, use :func:`api.image.upload_by_offsets_generator` instead.
+        If you include `metas` during the upload, you can add a custom sort parameter for images.
+        To achieve this, use the context manager :func:`api.image.add_custom_sort` with the desired key name from the meta dictionary to be used for sorting.
+        :param dataset: Dataset ID or DatasetInfo object in Supervisely.
+        :type dataset: Union[DatasetInfo,int]
+        :param team_file_id: ID of the binary file in the team storage.
+        :type team_file_id: int
+        :param names: Images names with extension.
+                      REQUIRED if there is no file containing offsets in the team storage at the same level as the TAR file.
+                      Offset file must be named as the TAR file with the `_offsets.pkl` suffix and must be represented in pickle format.
+                      Example: `tar_name_offsets.pkl`
+        :type names: List[str], optional
+        :param offsets: List of dictionaries with file offsets that define the range of bytes representing the image in the binary.
+                        Example: `[{"offsetStart": 0, "offsetEnd": 100}, {"offsetStart": 101, "offsetEnd": 200}]`.
+                        REQUIRED if there is no file containing offsets in the team storage at the same level as the TAR file.
+                        Offset file must be named as the TAR file with the `_offsets.pkl` suffix and must be represented in pickle format.
+                        Example: `tar_name_offsets.pkl`
+        :type offsets: List[dict], optional
+        :param progress_cb: Function for tracking the progress of uploading.
+        :type progress_cb: tqdm or callable, optional
+        :param metas: Custom additional image infos that contain images technical and/or user-generated data as list of separate dicts.
+        :type metas: List[dict], optional
+        :param batch_size: Number of images to upload in one batch.
+        :type batch_size: int, optional
+        :param skip_validation: Skips validation for images, can result in invalid images being uploaded.
+        :type skip_validation: bool, optional
+        :param conflict_resolution: The strategy to resolve upload conflicts. 'Replace' option will replace the existing images in the dataset with the new images. The images that are being deleted are logged. 'Skip' option will ignore the upload of new images that would result in a conflict. An original image's ImageInfo list will be returned instead. 'Rename' option will rename the new images to prevent any conflict.
+        :type conflict_resolution: Optional[Literal["rename", "skip", "replace"]]
+        :param validate_meta: If True, validates provided meta with saved JSON schema.
+        :type validate_meta: bool, optional
+        :param use_strict_validation: If True, uses strict validation.
+        :type use_strict_validation: bool, optional
+        :param use_caching_for_validation: If True, uses caching for validation.
+        :type use_caching_for_validation: bool, optional
+        :return: List with information about Images. See :class:`info_sequence<info_sequence>`
+        :rtype: :class:`List[ImageInfo]`
+        :Usage example:
+         .. code-block:: python
+            import supervisely as sly
+            from supervisely.api.module_api import ApiField
+            server_address = 'https://app.supervisely.com'
+            api_token = 'Your Supervisely API Token'
+            api = sly.Api(server_address, api_token)
+            dataset_id = 452984
+            names = ['lemon_1.jpg', 'lemon_1.jpg']
+            offsets = [
+                {ApiField.OFFSET_START: 0, ApiField.OFFSET_END: 100},
+                {ApiField.OFFSET_START: 101, ApiField.OFFSET_END: 200}
+            ]
+            team_file_id = 123456
+            new_imgs_info = api.image.upload_by_offsets(dataset_id, team_file_id, names, offsets,  metas)
+            # Output example:
+            #   ImageInfo(id=136281,
+            #             name='lemon_1.jpg',
+            #             link=None,
+            #             hash=None,
+            #             mime=None,
+            #             ext=None,
+            #             size=100,
+            #             width=None,
+            #             height=None,
+            #             labels_count=0,
+            #             dataset_id=452984,
+            #             created_at='2025-03-21T18:30:08.551Z',
+            #             updated_at='2025-03-21T18:30:08.551Z',
+            #             meta={},
+            #             path_original='/h5un6l2.../eyJ0eXBlIjoic291cmNlX2Jsb2I...',
+            #             full_storage_url='http://storage:port/h5un6l2...,
+            #             tags=[],
+            #             created_by_id=user),
+            #   ImageInfo(...)
+        """
+        if isinstance(dataset, int):
+            dataset = self._api.dataset.get_info_by_id(dataset)
+        items = []
+        if len(names) != len(offsets):
+            raise ValueError(
+                f"The number of images in the offset file does not match the number of offsets: {len(names)} != {len(offsets)}"
             )
-            for info, pos in zip(res_infos_links, links_order):
-                result[pos] = info
+        for offset in offsets:
+            if not isinstance(offset, dict):
+                raise ValueError("Offset should be a dictionary")
+            if ApiField.OFFSET_START not in offset or ApiField.OFFSET_END not in offset:
+                raise ValueError(
+                    f"Offset should contain '{ApiField.OFFSET_START}' and '{ApiField.OFFSET_END}' keys"
+                )
-        if len(hashes) > 0:
-            res_infos_hashes = self.upload_hashes(
-                dataset_id,
-                hashes_names,
-                hashes,
-                progress_cb,
-                metas=hashes_metas,
+            items.append({ApiField.TEAM_FILE_ID: team_file_id, ApiField.SOURCE_BLOB: offset})
+        custom_data = self._api.project.get_custom_data(dataset.project_id)
+        custom_data[_BLOB_TAG_NAME] = True
+        self._api.project.update_custom_data(dataset.project_id, custom_data)
+        return self._upload_bulk_add(
+            func_item_to_kv=lambda image_data, item: {**image_data, **item},
+            dataset_id=dataset.id,
+            names=names,
+            items=items,
+            progress_cb=progress_cb,
+            metas=metas,
+            batch_size=batch_size,
+            skip_validation=skip_validation,
+            conflict_resolution=conflict_resolution,
+            validate_meta=validate_meta,
+            use_strict_validation=use_strict_validation,
+            use_caching_for_validation=use_caching_for_validation,
+        )
+    def upload_by_offsets_generator(
+        self,
+        dataset: Union[DatasetInfo, int],
+        team_file_id: int,
+        offsets_file_path: Optional[str] = None,
+        progress_cb: Optional[Union[tqdm, Callable]] = None,
+        metas: Optional[Dict] = None,
+        batch_size: Optional[int] = 10000,
+        skip_validation: Optional[bool] = False,
+        conflict_resolution: Optional[Literal["rename", "skip", "replace"]] = None,
+        validate_meta: Optional[bool] = False,
+        use_strict_validation: Optional[bool] = False,
+        use_caching_for_validation: Optional[bool] = False,
+    ) -> Generator[ImageInfo, None, None]:
+        """
+        Upload images from blob file in Team Files by offsets to Dataset.
+        Generates information about uploaded images in batches of max size 10000.
+        File names will be taken from the offset file.
+        This method is better suited for large datasets, as it does not require resulting all the images into memory at once.
+        If you include `metas` during the upload, you can add a custom sort parameter for images.
+        To achieve this, use the context manager :func:`api.image.add_custom_sort` with the desired key name from the meta dictionary to be used for sorting.
+        :param dataset: Dataset ID or DatasetInfo object in Supervisely.
+        :type dataset: Union[DatasetInfo,int]
+        :param team_file_id: ID of the binary file in the team storage.
+        :type team_file_id: int
+        :param offsets_file_path: Local path to the file with blob images offsets.
+        :type offsets_file_path: str, optional
+        :param progress_cb: Function for tracking the progress of uploading.
+        :type progress_cb: tqdm or callable, optional
+        :param metas: Custom additional image infos as dict where:
+                     `keys` - image names,
+                     `values` - image technical and/or user-generated data dicts
+        :type metas: Dict, optional
+        :param batch_size: Number of images to upload in one batch.
+        :type batch_size: int, optional
+        :param skip_validation: Skips validation for images, can result in invalid images being uploaded.
+        :type skip_validation: bool, optional
+        :param conflict_resolution: The strategy to resolve upload conflicts. 'Replace' option will replace the existing images in the dataset with the new images. The images that are being deleted are logged. 'Skip' option will ignore the upload of new images that would result in a conflict. An original image's ImageInfo list will be returned instead. 'Rename' option will rename the new images to prevent any conflict.
+        :type conflict_resolution: Optional[Literal["rename", "skip", "replace"]]
+        :param validate_meta: If True, validates provided meta with saved JSON schema.
+        :type validate_meta: bool, optional
+        :param use_strict_validation: If True, uses strict validation.
+        :type use_strict_validation: bool, optional
+        :param use_caching_for_validation: If True, uses caching for validation.
+        :type use_caching_for_validation: bool, optional
+        :return: Generator with information about Images. See :class:`ImageInfo`
+        :rtype: :class:`Generator[ImageInfo, None, None]`
+        :Usage example:
+         .. code-block:: python
+            import supervisely as sly
+            from supervisely.api.module_api import ApiField
+            server_address = 'https://app.supervisely.com'
+            api_token = 'Your Supervisely API Token'
+            api = sly.Api(server_address, api_token)
+            dataset_id = 452984
+            team_file_id = 123456
+            img_infos = []
+            new_imgs_info_generator = api.image.upload_by_offsets_generator(dataset_id, team_file_id)
+            for img_infos_batch in new_imgs_info_generator:
+                img_infos.extend(img_infos_batch)
+        """
+        if isinstance(dataset, int):
+            dataset = self._api.dataset.get_info_by_id(dataset)
+        if offsets_file_path is None:
+            offsets_file_path = self.get_blob_offsets_file(team_file_id)
+        blob_image_infos_generator = BlobImageInfo.load_from_pickle_generator(
+            offsets_file_path, OFFSETS_PKL_BATCH_SIZE
+        )
+        for batch in blob_image_infos_generator:
+            names = [item.name for item in batch]
+            metas_batch = [metas[name] for name in names] if metas is not None else [{}] * len(names)
+            items = [
+                {ApiField.TEAM_FILE_ID: team_file_id, ApiField.SOURCE_BLOB: item.offsets_dict}
+                for item in batch
+            ]
+            yield self._upload_bulk_add(
+                func_item_to_kv=lambda image_data, item: {**image_data, **item},
+                dataset_id=dataset.id,
+                names=names,
+                items=items,
+                progress_cb=progress_cb,
+                metas=metas_batch,
                 batch_size=batch_size,
                 skip_validation=skip_validation,
                 conflict_resolution=conflict_resolution,
+                validate_meta=validate_meta,
+                use_strict_validation=use_strict_validation,
+                use_caching_for_validation=use_caching_for_validation,
             )
-            for info, pos in zip(res_infos_hashes, hashes_order):
-                result[pos] = info
-        return result
+        custom_data = self._api.project.get_custom_data(dataset.project_id)
+        custom_data[_BLOB_TAG_NAME] = True
+        self._api.project.update_custom_data(dataset.project_id, custom_data)
+    def get_blob_offsets_file(
+        self,
+        team_file_id: int,
+        progress_cb: Optional[Union[tqdm, Callable]] = None,
+    ) -> str:
+        """
+        Get file with blob images offsets from the team storage.
+        :param team_file_id: ID of the binary file in the team storage.
+        :type team_file_id: int
+        :param progress_cb: Function for tracking the progress of downloading.
+        :type progress_cb: tqdm or callable, optional
+        :return: Path to the file with blob images offsets in temporary directory.
+        :rtype: str
+        """
+        file_info = self._api.file.get_info_by_id(team_file_id)
+        if file_info is None:
+            raise ValueError(f"Blob file ID: {team_file_id} with images not found")
+        offset_file_name = Path(file_info.path).stem + OFFSETS_PKL_SUFFIX
+        offset_file_path = os.path.join(Path(file_info.path).parent, offset_file_name)
+        temp_dir = tempfile.mkdtemp()
+        local_offset_file_path = os.path.join(temp_dir, offset_file_name)
+        self._api.file.download(
+            team_id=file_info.team_id,
+            remote_path=offset_file_path,
+            local_save_path=local_offset_file_path,
+            progress_cb=progress_cb,
+        )
+        return local_offset_file_path
     def _upload_bulk_add(
         self,
@@ -2036,8 +2475,17 @@ class ImageApi(RemoveableBulkModuleApi):
         def _pack_for_request(names: List[str], items: List[Any], metas: List[Dict]) -> List[Any]:
             images = []
             for name, item, meta in zip(names, items, metas):
-                item_tuple = func_item_to_kv(item)
-                image_data = {ApiField.TITLE: name, item_tuple[0]: item_tuple[1]}
+                image_data = {ApiField.TITLE: name}
+                # Check if the item is a data format for upload by offset
+                if (
+                    isinstance(item, dict)
+                    and ApiField.TEAM_FILE_ID in item
+                    and ApiField.SOURCE_BLOB in item
+                ):
+                    image_data = func_item_to_kv(image_data, item)
+                else:
+                    item_tuple = func_item_to_kv(item)
+                    image_data[item_tuple[0]] = item_tuple[1]
                 if hasattr(self, "sort_by") and self.sort_by is not None:
                     meta = self._add_custom_sort(meta, name)
                 if len(meta) != 0 and type(meta) == dict:
@@ -4615,3 +5063,243 @@ class ImageApi(RemoveableBulkModuleApi):
         meta_copy = copy.deepcopy(meta)
         meta_copy[ApiField.CUSTOM_SORT] = custom_sort
         return meta_copy
+    def download_blob_file(
+        self,
+        project_id: int,
+        download_id: str,
+        path: Optional[str] = None,
+        log_progress: bool = True,
+        chunk_size: Optional[int] = None,
+    ) -> Optional[bytes]:
+        """
+        Downloads blob file from Supervisely storage by download ID of any Image that belongs to this file.
+        :param project_id: Project ID in Supervisely.
+        :type project_id: int
+        :param download_id: Download ID of any Image that belongs to the blob file in Supervisely storage.
+        :type download_id: str
+        :param path: Path to save the blob file. If None, returns blob file content as bytes.
+        :type path: str, optional
+        :param progress_cb: Function for tracking download progress.
+        :type progress_cb: tqdm or callable, optional
+        :param chunk_size: Size of chunk for streaming. Default is 8 MB.
+        :type chunk_size: int, optional
+        :return: Blob file content if path is None, otherwise None.
+        :rtype: bytes or None
+        :Usage example:
+         .. code-block:: python
+            api = sly.Api.from_env()
+            image_id = 6789
+            image_info = api.image.get_info_by_id(image_id)
+            project_id = api.dataset.get_info_by_id(image_info.dataset_id).project_id
+            # Download and save to file
+            api.image.download_blob_file(project_id, image_info.download_id, "/path/to/save/archive.tar")
+            # Get archive as bytes
+            archive_bytes = api.image.download_blob_file(project_id, image_info.download_id)
+        """
+        if chunk_size is None:
+            chunk_size = 8 * 1024 * 1024
+        response = self._api.post(
+            "images.data.download",
+            {ApiField.PROJECT_ID: project_id, ApiField.DOWNLOAD_ID: download_id},
+            stream=True,
+        )
+        if log_progress:
+            total_size = int(response.headers.get("Content-Length", 0))
+            progress_cb = tqdm(
+                total=total_size,
+                unit="B",
+                unit_scale=True,
+                desc="Downloading images blob file",
+                leave=True,
+            )
+        if path is not None:
+            ensure_base_path(path)
+            with open(path, "wb") as fd:
+                for chunk in response.iter_content(chunk_size=chunk_size):
+                    fd.write(chunk)
+                    if log_progress:
+                        progress_cb.update(len(chunk))
+            return None
+        else:
+            content = response.content
+            if log_progress:
+                progress_cb.update(len(content))
+            return content
+    def upload_blob_images(
+        self,
+        dataset: Union[DatasetInfo, int],
+        blob_file: Union[FileInfo, str],
+        metas: Optional[List[Dict[str, Any]]] = None,
+        change_name_if_conflict: bool = True,
+        progress_cb: Optional[Union[tqdm, Callable]] = None,
+        return_image_infos_generator: bool = False,
+    ) -> Union[Generator[ImageInfo, None], None]:
+        """
+        Uploads images from blob file in Team Files to dataset.
+        IMPORTANT: File with image offsets should be in the same directory as the blob file.
+        This file should be named as the blob file but with the suffix `_offsets.pkl`.
+        It must be a Pickle file with the BlobImageInfos that define the range of bytes representing the image in the binary.
+        To prepare the offsets file, use the `supervisely.fs.save_blob_offsets_pkl` function.
+        :param dataset: Dataset in Supervisely. Can be DatasetInfo object or dataset ID.
+                        It is recommended to use DatasetInfo object to avoid additional API requests.
+        :type dataset: Union[DatasetInfo, int]
+        :param blob_file: Blob file in Team Files. Can be FileInfo object or path to blob file.
+                        It is recommended to use FileInfo object to avoid additional API requests.
+        :type blob_file: Union[FileInfo, str]
+        :param metas: List of metas for images.
+        :type metas: Optional[List[Dict[str, Any]], optional
+        :param change_name_if_conflict: If True adds suffix to the end of Image name when Dataset already contains an Image with identical name, If False and images with the identical names already exist in Dataset skips them.
+        :type change_name_if_conflict: bool, optional
+        :param progress_cb: Function for tracking upload progress. Tracks the count of processed items.
+        :type progress_cb: Optional[Union[tqdm, Callable]]
+        :param return_image_infos_generator: If True, returns generator of ImageInfo objects. Otherwise, returns None.
+        :type return_image_infos_generator: bool, optional
+        :return: Generator of ImageInfo objects if return_image_infos_generator is True, otherwise None.
+        :rtype: Union[Generator[ImageInfo, None], None]
+        """
+        if isinstance(dataset, int):
+            dataset_id = dataset
+            dataset_info = self._api.dataset.get_info_by_id(dataset_id)
+        else:
+            dataset_id = dataset.id
+            dataset_info = dataset
+        if isinstance(blob_file, str):
+            team_file_info = self._api.file.get_info_by_path(dataset_info.team_id, blob_file)
+        else:
+            team_file_info = blob_file
+        image_infos_generator = self.upload_by_offsets_generator(
+            dataset=dataset_info,
+            team_file_id=team_file_info.id,
+            progress_cb=progress_cb,
+            metas=metas,
+            conflict_resolution="rename" if change_name_if_conflict else "skip",
+        )
+        if return_image_infos_generator:
+            return image_infos_generator
+        else:
+            for _ in image_infos_generator:
+                pass
+    async def download_blob_file_async(
+        self,
+        project_id: int,
+        download_id: str,
+        path: str,
+        semaphore: Optional[asyncio.Semaphore] = None,
+        log_progress: bool = True,
+        progress_cb: Optional[Union[tqdm, Callable]] = None,
+    ):
+        """
+        Downloads blob file from Supervisely storage by download ID asynchronously.
+        :param project_id: Project ID in Supervisely.
+        :type project_id: int
+        :param download_id: Download ID of any Image that belongs to the blob file in Supervisely storage.
+        :type download_id: str
+        :param path: Path to save the blob file.
+        :type path: str
+        :param semaphore: Semaphore for limiting the number of simultaneous downloads.
+        :type semaphore: asyncio.Semaphore, optional
+        :param log_progress: If True, shows progress bar.
+        :type log_progress: bool, optional
+        :param progress_cb: Function for tracking download progress.
+        :type progress_cb: tqdm or callable, optional
+        """
+        api_method_name = "images.data.download"
+        if semaphore is None:
+            semaphore = self._api.get_default_semaphore()
+        async with semaphore:
+            ensure_base_path(path)
+            if log_progress:
+                response = self._api.get(
+                    api_method_name,
+                    {ApiField.PROJECT_ID: project_id, ApiField.DOWNLOAD_ID: download_id},
+                    stream=True,
+                )
+                total_size = int(response.headers.get("Content-Length", 0))
+                response.close()
+                name = os.path.basename(path)
+                if progress_cb is None:
+                    progress_cb = tqdm(
+                        total=total_size,
+                        unit="B",
+                        unit_scale=True,
+                        desc=f"Downloading images blob file {name}",
+                        leave=True,
+                    )
+            async with aiofiles.open(path, "wb") as fd:
+                async for chunk, _ in self._api.stream_async(
+                    method=api_method_name,
+                    method_type="POST",
+                    data={ApiField.PROJECT_ID: project_id, ApiField.DOWNLOAD_ID: download_id},
+                    chunk_size=8 * 1024 * 1024,
+                ):
+                    if log_progress:
+                        progress_cb.update(len(chunk))
+                    await fd.write(chunk)
+    async def download_blob_files_async(
+        self,
+        project_id: int,
+        download_ids: List[str],
+        paths: List[str],
+        semaphore: Optional[asyncio.Semaphore] = None,
+        log_progress: bool = True,
+        progress_cb: Optional[Union[tqdm, Callable]] = None,
+    ):
+        """
+        Downloads multiple blob files from Supervisely storage by download IDs asynchronously.
+        :param project_id: Project ID in Supervisely.
+        :type project_id: int
+        :param download_ids: List of download IDs of any Image that belongs to the blob files in Supervisely storage.
+        :type download_ids: List[str]
+        :param paths: List of paths to save the blob files.
+        :type paths: List[str]
+        :param semaphore: Semaphore for limiting the number of simultaneous downloads.
+        :type semaphore: asyncio.Semaphore, optional
+        :param log_progress: If True, shows progress bar.
+        :type log_progress: bool, optional
+        :param progress_cb: Function for tracking download progress.
+        :type progress_cb: tqdm or callable, optional
+        """
+        if semaphore is None:
+            semaphore = self._api.get_default_semaphore()
+        tasks = []
+        for download_id, path in zip(download_ids, paths):
+            task = self.download_blob_file_async(
+                project_id=project_id,
+                download_id=download_id,
+                path=path,
+                semaphore=semaphore,
+                log_progress=log_progress,
+                progress_cb=progress_cb,
+            )
+            tasks.append(task)
+        await asyncio.gather(*tasks)

supervisely 6.73.342__py3-none-any.whl → 6.73.344__py3-none-any.whl

supervisely 6.73.342py3-none-any.whl → 6.73.344py3-none-any.whl