PyPI - supervisely - Versions diffs - 6.73.342__py3-none-any.whl → 6.73.344__py3-none-any.whl - Mend

supervisely 6.73.342py3-none-any.whl → 6.73.344py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

supervisely/__init__.py +3 -2
supervisely/_utils.py +33 -1
supervisely/api/annotation_api.py +369 -2
supervisely/api/api.py +14 -5
supervisely/api/dataset_api.py +177 -1
supervisely/api/entity_annotation/figure_api.py +84 -0
supervisely/api/file_api.py +2 -2
supervisely/api/image_api.py +740 -52
supervisely/api/module_api.py +11 -0
supervisely/api/project_api.py +6 -1
supervisely/convert/converter.py +4 -0
supervisely/convert/image/image_helper.py +2 -2
supervisely/convert/image/sly/sly_image_converter.py +30 -1
supervisely/io/fs.py +238 -4
supervisely/project/download.py +5 -16
supervisely/project/project.py +636 -75
supervisely/project/project_type.py +2 -0
supervisely/project/readme_template.md +19 -13
{supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/METADATA +1 -1
{supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/RECORD +24 -24
{supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/LICENSE +0 -0
{supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/WHEEL +0 -0
{supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/entry_points.txt +0 -0
{supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/top_level.txt +0 -0

supervisely/api/module_api.py CHANGED Viewed

@@ -627,6 +627,17 @@ class ApiField:
     """"""
     HOTKEY = "hotkey"
     """"""
+    RELATED_DATA_ID = "relatedDataId"
+    """"""
+    DOWNLOAD_ID = "downloadId"
+    """"""
+    OFFSET_START = "offsetStart"
+    """"""
+    OFFSET_END = "offsetEnd"
+    """"""
+    SOURCE_BLOB = "sourceBlob"
+    """"""
 def _get_single_item(items):
     """_get_single_item"""

supervisely/api/project_api.py CHANGED Viewed

@@ -954,7 +954,12 @@ class ProjectApi(CloneableModuleApi, UpdateableModule, RemoveableModuleApi):
     def update_custom_data(self, id: int, data: Dict, silent: bool = False) -> Dict:
         """
-        Updates custom data of the Project by ID
+        Updates custom data of the Project by ID.
+        IMPORTANT: This method replaces the current custom data with the provided one.
+        If you want to extend the custom data or update specific key-value pairs,
+        use :func:get_custom_data first to retrieve the existing data,
+        then modify it accordingly before calling this method.
         :param id: Project ID in Supervisely.
         :type id: int

supervisely/convert/converter.py CHANGED Viewed

@@ -29,6 +29,7 @@ from supervisely.io.fs import (
     touch,
     unpack_archive,
 )
+from supervisely.project.project import Project
 from supervisely.project.project_settings import LabelingInterface
 from supervisely.project.project_type import ProjectType
 from supervisely.sly_logger import logger
@@ -232,6 +233,9 @@ class ImportManager:
             archives = []
             path = new_paths_to_scan.pop()
             for root, _, files in os.walk(path):
+                if Path(root).name == Project.blob_dir_name:
+                    logger.info(f"Skip unpacking archive in blob dir: {root}")
+                    continue
                 for file in files:
                     file_path = os.path.join(root, file)
                     if is_archive(file_path=file_path):

supervisely/convert/image/image_helper.py CHANGED Viewed

@@ -48,8 +48,8 @@ def validate_mimetypes(name: str, path: str) -> list:
     mimetypes.add_type("image/webp", ".webp")  # to extend types_map
     mimetypes.add_type("image/jpeg", ".jfif")  # to extend types_map
-    mime = magic.Magic(mime=True)
-    mimetype = mime.from_file(path)
+    with open(path, "rb") as f:
+        mimetype = magic.from_buffer(f.read(), mime=True)
     file_ext = get_file_ext(path).lower()
     if file_ext in mimetypes.guess_all_extensions(mimetype):
         return name

supervisely/convert/image/sly/sly_image_converter.py CHANGED Viewed

@@ -20,6 +20,7 @@ from supervisely.convert.image.image_helper import validate_image_bounds
 from supervisely.io.fs import dirs_filter, file_exists, get_file_ext
 from supervisely.io.json import load_json_file
 from supervisely.project.project import find_project_dirs
+from supervisely.project.project import upload_project as upload_project_fs
 from supervisely.project.project_settings import LabelingInterface
 DATASET_ITEMS = "items"
@@ -32,10 +33,19 @@ class SLYImageConverter(ImageConverter):
         super().__init__(*args, **kwargs)
         self._project_structure = None
         self._supports_links = True
+        self._blob_project = False
     def __str__(self):
         return AvailableImageConverters.SLY
+    @property
+    def blob_project(self) -> bool:
+        return self._blob_project
+    @blob_project.setter
+    def blob_project(self, value: bool):
+        self._blob_project = value
     @property
     def ann_ext(self) -> str:
         return ".json"
@@ -174,6 +184,11 @@ class SLYImageConverter(ImageConverter):
             meta = None
             for project_dir in project_dirs:
                 project_fs = Project(project_dir, mode=OpenMode.READ)
+                if len(project_fs.blob_files) > 0:
+                    self.blob_project = True
+                    logger.info("Found blob files in the project, skipping")
+                    continue
                 if meta is None:
                     meta = project_fs.meta
                 else:
@@ -207,6 +222,8 @@ class SLYImageConverter(ImageConverter):
                 if ds_cnt > 1:  # multiple datasets
                     self._project_structure = project
                 return True
+            elif self.blob_project:
+                return True
             else:
                 return False
         except Exception as e:
@@ -272,6 +289,15 @@ class SLYImageConverter(ImageConverter):
         if self._project_structure:
             self.upload_project(api, dataset_id, batch_size, log_progress)
+        elif self.blob_project:
+            dataset_info = api.dataset.get_info_by_id(dataset_id, raise_error=True)
+            upload_project_fs(
+                dir=self._input_data,
+                api=api,
+                workspace_id=dataset_info.workspace_id,
+                log_progress=log_progress,
+                project_id=dataset_info.project_id,
+            )
         else:
             super().upload_dataset(api, dataset_id, batch_size, log_progress)
@@ -289,6 +315,7 @@ class SLYImageConverter(ImageConverter):
             progress, progress_cb = None, None
         logger.info("Uploading project structure")
         def _upload_project(
             project_structure: Dict,
             project_id: int,
@@ -306,7 +333,9 @@ class SLYImageConverter(ImageConverter):
                 items = value.get(DATASET_ITEMS, [])
                 nested_datasets = value.get(NESTED_DATASETS, {})
-                logger.info(f"Dataset: {ds_name}, items: {len(items)}, nested datasets: {len(nested_datasets)}")
+                logger.info(
+                    f"Dataset: {ds_name}, items: {len(items)}, nested datasets: {len(nested_datasets)}"
+                )
                 if items:
                     super(SLYImageConverter, self).upload_dataset(
                         api, dataset_id, batch_size, entities=items, progress_cb=progress_cb

supervisely/io/fs.py CHANGED Viewed

@@ -10,7 +10,18 @@ import re
 import shutil
 import subprocess
 import tarfile
-from typing import Callable, Dict, Generator, List, Literal, Optional, Tuple, Union
+from pathlib import Path
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    Dict,
+    Generator,
+    List,
+    Literal,
+    Optional,
+    Tuple,
+    Union,
+)
 import aiofiles
 import requests
@@ -18,11 +29,17 @@ from requests.structures import CaseInsensitiveDict
 from tqdm import tqdm
 from supervisely._utils import get_bytes_hash, get_or_create_event_loop, get_string_hash
+if TYPE_CHECKING:
+    from supervisely.api.image_api import BlobImageInfo
 from supervisely.io.fs_cache import FileCache
 from supervisely.sly_logger import logger
 from supervisely.task.progress import Progress
 JUNK_FILES = [".DS_Store", "__MACOSX", "._.DS_Store", "Thumbs.db", "desktop.ini"]
+OFFSETS_PKL_SUFFIX = "_offsets.pkl"  # suffix for pickle file with image offsets
+OFFSETS_PKL_BATCH_SIZE = 10000  # 10k images per batch when loading from pickle
 def get_file_name(path: str) -> str:
@@ -1571,12 +1588,12 @@ async def list_files_recursively_async(
     :rtype: List[str]
     :Usage example:
          .. code-block:: python
             import supervisely as sly
             from supervisely._utils import run_coroutine
             dir_path = '/home/admin/work/projects/examples'
             coroutine = sly.fs.list_files_recursively_async(dir_path)
@@ -1616,3 +1633,220 @@ async def list_files_recursively_async(
     loop = get_or_create_event_loop()
     return await loop.run_in_executor(None, sync_file_list)
+def get_file_offsets_batch_generator(
+    archive_path: str,
+    team_file_id: Optional[int] = None,
+    filter_func: Optional[Callable] = None,
+    output_format: Literal["dicts", "objects"] = "dicts",
+    batch_size: int = OFFSETS_PKL_BATCH_SIZE,
+) -> Generator[Union[List[Dict], List["BlobImageInfo"]], None, None]:
+    """
+    Extracts offset information for files from TAR archives and returns a generator that yields the information in batches.
+    `team_file_id` may be None if it's not possible to obtain the ID at this moment.
+    You can set the `team_file_id` later when uploading the file to Supervisely.
+    :param archive_path: Local path to the archive
+    :type archive_path: str
+    :param team_file_id: ID of file in Team Files. Default is None.
+                    `team_file_id` may be None if it's not possible to obtain the ID at this moment.
+                    You can set the `team_file_id` later when uploading the file to Supervisely.
+    :type team_file_id: Optional[int]
+    :param filter_func: Function to filter files. The function should take a filename as input and return True if the file should be included.
+    :type filter_func: Callable, optional
+    :param output_format: Format of the output. Default is `dicts`.
+                   `objects` - returns a list of BlobImageInfo objects.
+                   `dicts` - returns a list of dictionaries.
+    :type output_format: Literal["dicts", "objects"]
+    :returns: Generator yielding batches of file information in the specified format.
+    :rtype: Generator[Union[List[Dict], List[BlobImageInfo]]], None, None]
+    :raises ValueError: If the archive type is not supported or contains compressed files
+    :Usage example:
+     .. code-block:: python
+        import supervisely as sly
+        archive_path = '/home/admin/work/projects/examples.tar'
+        file_infos = sly.fs.get_file_offsets_batch_generator(archive_path)
+        for batch in file_infos:
+            print(batch)
+        # Output:
+        # [
+        #     {
+        #         "title": "image1.jpg",
+        #         "teamFileId": None,
+        #         "sourceBlob": {
+        #             "offsetStart": 0,
+        #             "offsetEnd": 123456
+        #         }
+        #     },
+        #     {
+        #         "title": "image2.jpg",
+        #         "teamFileId": None,
+        #         "sourceBlob": {
+        #             "offsetStart": 123456,
+        #             "offsetEnd": 234567
+        #         }
+        #     }
+        # ]
+    """
+    from supervisely.api.image_api import BlobImageInfo
+    ext = Path(archive_path).suffix.lower()
+    if ext == ".tar":
+        if output_format == "dicts":
+            yield from _process_tar_generator(
+                tar_path=archive_path,
+                team_file_id=team_file_id,
+                filter_func=filter_func,
+                batch_size=batch_size,
+            )
+        else:
+            for batch in _process_tar_generator(
+                tar_path=archive_path,
+                team_file_id=team_file_id,
+                filter_func=filter_func,
+                batch_size=batch_size,
+            ):
+                blob_file_infos = [BlobImageInfo.from_dict(file_info) for file_info in batch]
+                yield blob_file_infos
+    else:
+        raise ValueError(f"Unsupported archive type: {ext}. Only .tar are supported")
+def _process_tar_generator(
+    tar_path: str,
+    team_file_id: Optional[int] = None,
+    filter_func: Optional[Callable] = None,
+    batch_size: int = OFFSETS_PKL_BATCH_SIZE,
+) -> Generator[List[Dict], None, None]:
+    """
+    Processes a TAR archive and yields batches of offset information for files.
+    :param tar_path: Path to the TAR archive
+    :type tar_path: str
+    :param team_file_id: ID of the team file, defaults to None
+    :type team_file_id: Optional[int], optional
+    :param filter_func: Function to filter files. The function should take a filename as input and return True if the file should be included.
+    :type filter_func: Optional[Callable], optional
+    :param batch_size: Number of files in each batch, defaults to 10000
+    :type batch_size: int, optional
+    :yield: Batches of dictionaries with file offset information
+    :rtype: Generator[List[Dict], None, None]
+    """
+    from supervisely.api.api import ApiField
+    with tarfile.open(tar_path, "r") as tar:
+        batch = []
+        processed_count = 0
+        members = tar.getmembers()
+        total_members_count = len(members)  # for logging
+        logger.debug(f"Processing TAR archive with {total_members_count} members")
+        for member in members:
+            skip = not member.isfile()
+            if filter_func and not filter_func(member.name):
+                logger.debug(f"File '{member.name}' is skipped by filter function")
+                skip = True
+            if not skip:
+                file_info = {
+                    ApiField.TITLE: os.path.basename(member.name),
+                    ApiField.TEAM_FILE_ID: team_file_id,
+                    ApiField.SOURCE_BLOB: {
+                        ApiField.OFFSET_START: member.offset_data,
+                        ApiField.OFFSET_END: member.offset_data + member.size,
+                    },
+                }
+                batch.append(file_info)
+                # Yield batch when it reaches the specified size
+                if len(batch) >= batch_size:
+                    processed_count += len(batch)
+                    logger.debug(
+                        f"Yielding batch of {len(batch)} files, processed {processed_count} files so far"
+                    )
+                    yield batch
+                    batch = []
+        # Yield any remaining files in the last batch
+        if batch:
+            processed_count += len(batch)
+            logger.debug(
+                f"Yielding final batch of {len(batch)} files, processed {processed_count} files total"
+            )
+            yield batch
+def save_blob_offsets_pkl(
+    blob_file_path: str,
+    output_dir: str,
+    team_file_id: Optional[int] = None,
+    filter_func: Optional[Callable] = None,
+    batch_size: int = OFFSETS_PKL_BATCH_SIZE,
+    replace: bool = False,
+) -> str:
+    """
+    Processes blob file locally and creates a pickle file with offset information.
+    :param blob_file_path: Path to the local blob file
+    :type blob_file_path: str
+    :param output_dir: Path to the output directory
+    :type output_dir: str
+    :param team_file_id: ID of file in Team Files. Default is None.
+                    `team_file_id` may be None if it's not possible to obtain the ID at this moment.
+                    You can set the `team_file_id` later when uploading the file to Supervisely.
+    :type team_file_id: Optional[int]
+    :param filter_func: Function to filter files. The function should take a filename as input and return True if the file should be included.
+    :type filter_func: Callable, optional
+    :param batch_size: Number of files to process in each batch, defaults to 10000
+    :type batch_size: int, optional
+    :param replace: If True, overwrite the existing file if it exists.
+                    If False, skip processing if the file already exists and return its path.
+                    Default is False.
+    :type replace: bool
+    :returns: Path to the output pickle file
+    :rtype: str
+    :Usage example:
+        .. code-block:: python
+            import supervisely as sly
+            archive_path = '/path/to/examples.tar'
+            output_dir = '/path/to/output'
+            sly.fs.save_blob_offsets_pkl(archive_path, output_dir)
+    """
+    from supervisely.api.image_api import BlobImageInfo
+    archive_name = Path(blob_file_path).stem
+    output_path = os.path.join(output_dir, archive_name + OFFSETS_PKL_SUFFIX)
+    if file_exists(output_path):
+        logger.debug(f"Offsets file already exists: {output_path}")
+        if replace:
+            logger.debug(f"Replacing existing offsets file: {output_path}")
+            silent_remove(output_path)
+        else:
+            logger.debug(f"Skipping processing, using existing offsets file: {output_path}")
+            return output_path
+    offsets_batch_generator = get_file_offsets_batch_generator(
+        archive_path=blob_file_path,
+        team_file_id=team_file_id,
+        filter_func=filter_func,
+        output_format="objects",
+        batch_size=batch_size,
+    )
+    BlobImageInfo.dump_to_pickle(offsets_batch_generator, output_path)
+    return output_path

supervisely/project/download.py CHANGED Viewed

@@ -1,12 +1,11 @@
 import asyncio
 import os
-import shutil
 from typing import Callable, List, Optional, Tuple, Union
 from tqdm import tqdm
 from supervisely import get_project_class
-from supervisely._utils import get_or_create_event_loop, rand_str
+from supervisely._utils import run_coroutine
 from supervisely.annotation.annotation import Annotation, ProjectMeta
 from supervisely.api.api import Api
 from supervisely.api.dataset_api import DatasetInfo
@@ -20,7 +19,7 @@ from supervisely.io.fs import (
     get_directory_size,
     remove_dir,
 )
-from supervisely.io.json import dump_json_file, load_json_file
+from supervisely.io.json import load_json_file
 from supervisely.project import Project
 from supervisely.project.project import Dataset, OpenMode, ProjectType
 from supervisely.sly_logger import logger
@@ -46,7 +45,7 @@ def download(
     :type project_id: int
     :param dest_dir: Destination path to local directory.
     :type dest_dir: str
-    :param dataset_ids: Specified list of Dataset IDs which will be downloaded. Datasets could be downloaded from different projects but with the same data type.
+    :param dataset_ids: Specified list of Dataset IDs which will be downloaded.
     :type dataset_ids: list(int), optional
     :param log_progress: Show downloading logs in the output.
     :type log_progress: bool
@@ -205,12 +204,7 @@ def download_async(
             progress_cb=progress_cb,
             **kwargs,
         )
-        loop = get_or_create_event_loop()
-        if loop.is_running():
-            future = asyncio.run_coroutine_threadsafe(download_coro, loop=loop)
-            future.result()
-        else:
-            loop.run_until_complete(download_coro)
+        run_coroutine(download_coro)
     else:
         raise NotImplementedError(f"Method download_async is not implemented for {project_class}")
@@ -254,12 +248,7 @@ def download_async_or_sync(
                 progress_cb=progress_cb,
                 **kwargs,
             )
-            loop = get_or_create_event_loop()
-            if loop.is_running():
-                future = asyncio.run_coroutine_threadsafe(download_coro, loop=loop)
-                future.result()
-            else:
-                loop.run_until_complete(download_coro)
+            run_coroutine(download_coro)
         except Exception as e:
             if kwargs.get("resume_download", False) is False:
                 remove_dir(dest_dir)

supervisely 6.73.342__py3-none-any.whl → 6.73.344__py3-none-any.whl

supervisely 6.73.342py3-none-any.whl → 6.73.344py3-none-any.whl