supervisely 6.73.342__py3-none-any.whl → 6.73.344__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -627,6 +627,17 @@ class ApiField:
627
627
  """"""
628
628
  HOTKEY = "hotkey"
629
629
  """"""
630
+ RELATED_DATA_ID = "relatedDataId"
631
+ """"""
632
+ DOWNLOAD_ID = "downloadId"
633
+ """"""
634
+ OFFSET_START = "offsetStart"
635
+ """"""
636
+ OFFSET_END = "offsetEnd"
637
+ """"""
638
+ SOURCE_BLOB = "sourceBlob"
639
+ """"""
640
+
630
641
 
631
642
  def _get_single_item(items):
632
643
  """_get_single_item"""
@@ -954,7 +954,12 @@ class ProjectApi(CloneableModuleApi, UpdateableModule, RemoveableModuleApi):
954
954
 
955
955
  def update_custom_data(self, id: int, data: Dict, silent: bool = False) -> Dict:
956
956
  """
957
- Updates custom data of the Project by ID
957
+ Updates custom data of the Project by ID.
958
+
959
+ IMPORTANT: This method replaces the current custom data with the provided one.
960
+ If you want to extend the custom data or update specific key-value pairs,
961
+ use :func:get_custom_data first to retrieve the existing data,
962
+ then modify it accordingly before calling this method.
958
963
 
959
964
  :param id: Project ID in Supervisely.
960
965
  :type id: int
@@ -29,6 +29,7 @@ from supervisely.io.fs import (
29
29
  touch,
30
30
  unpack_archive,
31
31
  )
32
+ from supervisely.project.project import Project
32
33
  from supervisely.project.project_settings import LabelingInterface
33
34
  from supervisely.project.project_type import ProjectType
34
35
  from supervisely.sly_logger import logger
@@ -232,6 +233,9 @@ class ImportManager:
232
233
  archives = []
233
234
  path = new_paths_to_scan.pop()
234
235
  for root, _, files in os.walk(path):
236
+ if Path(root).name == Project.blob_dir_name:
237
+ logger.info(f"Skip unpacking archive in blob dir: {root}")
238
+ continue
235
239
  for file in files:
236
240
  file_path = os.path.join(root, file)
237
241
  if is_archive(file_path=file_path):
@@ -48,8 +48,8 @@ def validate_mimetypes(name: str, path: str) -> list:
48
48
  mimetypes.add_type("image/webp", ".webp") # to extend types_map
49
49
  mimetypes.add_type("image/jpeg", ".jfif") # to extend types_map
50
50
 
51
- mime = magic.Magic(mime=True)
52
- mimetype = mime.from_file(path)
51
+ with open(path, "rb") as f:
52
+ mimetype = magic.from_buffer(f.read(), mime=True)
53
53
  file_ext = get_file_ext(path).lower()
54
54
  if file_ext in mimetypes.guess_all_extensions(mimetype):
55
55
  return name
@@ -20,6 +20,7 @@ from supervisely.convert.image.image_helper import validate_image_bounds
20
20
  from supervisely.io.fs import dirs_filter, file_exists, get_file_ext
21
21
  from supervisely.io.json import load_json_file
22
22
  from supervisely.project.project import find_project_dirs
23
+ from supervisely.project.project import upload_project as upload_project_fs
23
24
  from supervisely.project.project_settings import LabelingInterface
24
25
 
25
26
  DATASET_ITEMS = "items"
@@ -32,10 +33,19 @@ class SLYImageConverter(ImageConverter):
32
33
  super().__init__(*args, **kwargs)
33
34
  self._project_structure = None
34
35
  self._supports_links = True
36
+ self._blob_project = False
35
37
 
36
38
  def __str__(self):
37
39
  return AvailableImageConverters.SLY
38
40
 
41
+ @property
42
+ def blob_project(self) -> bool:
43
+ return self._blob_project
44
+
45
+ @blob_project.setter
46
+ def blob_project(self, value: bool):
47
+ self._blob_project = value
48
+
39
49
  @property
40
50
  def ann_ext(self) -> str:
41
51
  return ".json"
@@ -174,6 +184,11 @@ class SLYImageConverter(ImageConverter):
174
184
  meta = None
175
185
  for project_dir in project_dirs:
176
186
  project_fs = Project(project_dir, mode=OpenMode.READ)
187
+ if len(project_fs.blob_files) > 0:
188
+ self.blob_project = True
189
+ logger.info("Found blob files in the project, skipping")
190
+ continue
191
+
177
192
  if meta is None:
178
193
  meta = project_fs.meta
179
194
  else:
@@ -207,6 +222,8 @@ class SLYImageConverter(ImageConverter):
207
222
  if ds_cnt > 1: # multiple datasets
208
223
  self._project_structure = project
209
224
  return True
225
+ elif self.blob_project:
226
+ return True
210
227
  else:
211
228
  return False
212
229
  except Exception as e:
@@ -272,6 +289,15 @@ class SLYImageConverter(ImageConverter):
272
289
 
273
290
  if self._project_structure:
274
291
  self.upload_project(api, dataset_id, batch_size, log_progress)
292
+ elif self.blob_project:
293
+ dataset_info = api.dataset.get_info_by_id(dataset_id, raise_error=True)
294
+ upload_project_fs(
295
+ dir=self._input_data,
296
+ api=api,
297
+ workspace_id=dataset_info.workspace_id,
298
+ log_progress=log_progress,
299
+ project_id=dataset_info.project_id,
300
+ )
275
301
  else:
276
302
  super().upload_dataset(api, dataset_id, batch_size, log_progress)
277
303
 
@@ -289,6 +315,7 @@ class SLYImageConverter(ImageConverter):
289
315
  progress, progress_cb = None, None
290
316
 
291
317
  logger.info("Uploading project structure")
318
+
292
319
  def _upload_project(
293
320
  project_structure: Dict,
294
321
  project_id: int,
@@ -306,7 +333,9 @@ class SLYImageConverter(ImageConverter):
306
333
 
307
334
  items = value.get(DATASET_ITEMS, [])
308
335
  nested_datasets = value.get(NESTED_DATASETS, {})
309
- logger.info(f"Dataset: {ds_name}, items: {len(items)}, nested datasets: {len(nested_datasets)}")
336
+ logger.info(
337
+ f"Dataset: {ds_name}, items: {len(items)}, nested datasets: {len(nested_datasets)}"
338
+ )
310
339
  if items:
311
340
  super(SLYImageConverter, self).upload_dataset(
312
341
  api, dataset_id, batch_size, entities=items, progress_cb=progress_cb
supervisely/io/fs.py CHANGED
@@ -10,7 +10,18 @@ import re
10
10
  import shutil
11
11
  import subprocess
12
12
  import tarfile
13
- from typing import Callable, Dict, Generator, List, Literal, Optional, Tuple, Union
13
+ from pathlib import Path
14
+ from typing import (
15
+ TYPE_CHECKING,
16
+ Callable,
17
+ Dict,
18
+ Generator,
19
+ List,
20
+ Literal,
21
+ Optional,
22
+ Tuple,
23
+ Union,
24
+ )
14
25
 
15
26
  import aiofiles
16
27
  import requests
@@ -18,11 +29,17 @@ from requests.structures import CaseInsensitiveDict
18
29
  from tqdm import tqdm
19
30
 
20
31
  from supervisely._utils import get_bytes_hash, get_or_create_event_loop, get_string_hash
32
+
33
+ if TYPE_CHECKING:
34
+ from supervisely.api.image_api import BlobImageInfo
35
+
21
36
  from supervisely.io.fs_cache import FileCache
22
37
  from supervisely.sly_logger import logger
23
38
  from supervisely.task.progress import Progress
24
39
 
25
40
  JUNK_FILES = [".DS_Store", "__MACOSX", "._.DS_Store", "Thumbs.db", "desktop.ini"]
41
+ OFFSETS_PKL_SUFFIX = "_offsets.pkl" # suffix for pickle file with image offsets
42
+ OFFSETS_PKL_BATCH_SIZE = 10000 # 10k images per batch when loading from pickle
26
43
 
27
44
 
28
45
  def get_file_name(path: str) -> str:
@@ -1571,12 +1588,12 @@ async def list_files_recursively_async(
1571
1588
  :rtype: List[str]
1572
1589
 
1573
1590
  :Usage example:
1574
-
1591
+
1575
1592
  .. code-block:: python
1576
-
1593
+
1577
1594
  import supervisely as sly
1578
1595
  from supervisely._utils import run_coroutine
1579
-
1596
+
1580
1597
  dir_path = '/home/admin/work/projects/examples'
1581
1598
 
1582
1599
  coroutine = sly.fs.list_files_recursively_async(dir_path)
@@ -1616,3 +1633,220 @@ async def list_files_recursively_async(
1616
1633
 
1617
1634
  loop = get_or_create_event_loop()
1618
1635
  return await loop.run_in_executor(None, sync_file_list)
1636
+
1637
+
1638
+ def get_file_offsets_batch_generator(
1639
+ archive_path: str,
1640
+ team_file_id: Optional[int] = None,
1641
+ filter_func: Optional[Callable] = None,
1642
+ output_format: Literal["dicts", "objects"] = "dicts",
1643
+ batch_size: int = OFFSETS_PKL_BATCH_SIZE,
1644
+ ) -> Generator[Union[List[Dict], List["BlobImageInfo"]], None, None]:
1645
+ """
1646
+ Extracts offset information for files from TAR archives and returns a generator that yields the information in batches.
1647
+
1648
+ `team_file_id` may be None if it's not possible to obtain the ID at this moment.
1649
+ You can set the `team_file_id` later when uploading the file to Supervisely.
1650
+
1651
+ :param archive_path: Local path to the archive
1652
+ :type archive_path: str
1653
+ :param team_file_id: ID of file in Team Files. Default is None.
1654
+ `team_file_id` may be None if it's not possible to obtain the ID at this moment.
1655
+ You can set the `team_file_id` later when uploading the file to Supervisely.
1656
+ :type team_file_id: Optional[int]
1657
+ :param filter_func: Function to filter files. The function should take a filename as input and return True if the file should be included.
1658
+ :type filter_func: Callable, optional
1659
+ :param output_format: Format of the output. Default is `dicts`.
1660
+ `objects` - returns a list of BlobImageInfo objects.
1661
+ `dicts` - returns a list of dictionaries.
1662
+ :type output_format: Literal["dicts", "objects"]
1663
+ :returns: Generator yielding batches of file information in the specified format.
1664
+ :rtype: Generator[Union[List[Dict], List[BlobImageInfo]]], None, None]
1665
+
1666
+ :raises ValueError: If the archive type is not supported or contains compressed files
1667
+ :Usage example:
1668
+
1669
+ .. code-block:: python
1670
+
1671
+ import supervisely as sly
1672
+
1673
+ archive_path = '/home/admin/work/projects/examples.tar'
1674
+ file_infos = sly.fs.get_file_offsets_batch_generator(archive_path)
1675
+ for batch in file_infos:
1676
+ print(batch)
1677
+
1678
+ # Output:
1679
+ # [
1680
+ # {
1681
+ # "title": "image1.jpg",
1682
+ # "teamFileId": None,
1683
+ # "sourceBlob": {
1684
+ # "offsetStart": 0,
1685
+ # "offsetEnd": 123456
1686
+ # }
1687
+ # },
1688
+ # {
1689
+ # "title": "image2.jpg",
1690
+ # "teamFileId": None,
1691
+ # "sourceBlob": {
1692
+ # "offsetStart": 123456,
1693
+ # "offsetEnd": 234567
1694
+ # }
1695
+ # }
1696
+ # ]
1697
+ """
1698
+ from supervisely.api.image_api import BlobImageInfo
1699
+
1700
+ ext = Path(archive_path).suffix.lower()
1701
+
1702
+ if ext == ".tar":
1703
+ if output_format == "dicts":
1704
+ yield from _process_tar_generator(
1705
+ tar_path=archive_path,
1706
+ team_file_id=team_file_id,
1707
+ filter_func=filter_func,
1708
+ batch_size=batch_size,
1709
+ )
1710
+ else:
1711
+ for batch in _process_tar_generator(
1712
+ tar_path=archive_path,
1713
+ team_file_id=team_file_id,
1714
+ filter_func=filter_func,
1715
+ batch_size=batch_size,
1716
+ ):
1717
+ blob_file_infos = [BlobImageInfo.from_dict(file_info) for file_info in batch]
1718
+ yield blob_file_infos
1719
+ else:
1720
+ raise ValueError(f"Unsupported archive type: {ext}. Only .tar are supported")
1721
+
1722
+
1723
+ def _process_tar_generator(
1724
+ tar_path: str,
1725
+ team_file_id: Optional[int] = None,
1726
+ filter_func: Optional[Callable] = None,
1727
+ batch_size: int = OFFSETS_PKL_BATCH_SIZE,
1728
+ ) -> Generator[List[Dict], None, None]:
1729
+ """
1730
+ Processes a TAR archive and yields batches of offset information for files.
1731
+
1732
+ :param tar_path: Path to the TAR archive
1733
+ :type tar_path: str
1734
+ :param team_file_id: ID of the team file, defaults to None
1735
+ :type team_file_id: Optional[int], optional
1736
+ :param filter_func: Function to filter files. The function should take a filename as input and return True if the file should be included.
1737
+ :type filter_func: Optional[Callable], optional
1738
+ :param batch_size: Number of files in each batch, defaults to 10000
1739
+ :type batch_size: int, optional
1740
+ :yield: Batches of dictionaries with file offset information
1741
+ :rtype: Generator[List[Dict], None, None]
1742
+ """
1743
+ from supervisely.api.api import ApiField
1744
+
1745
+ with tarfile.open(tar_path, "r") as tar:
1746
+ batch = []
1747
+ processed_count = 0
1748
+ members = tar.getmembers()
1749
+ total_members_count = len(members) # for logging
1750
+
1751
+ logger.debug(f"Processing TAR archive with {total_members_count} members")
1752
+
1753
+ for member in members:
1754
+ skip = not member.isfile()
1755
+
1756
+ if filter_func and not filter_func(member.name):
1757
+ logger.debug(f"File '{member.name}' is skipped by filter function")
1758
+ skip = True
1759
+
1760
+ if not skip:
1761
+ file_info = {
1762
+ ApiField.TITLE: os.path.basename(member.name),
1763
+ ApiField.TEAM_FILE_ID: team_file_id,
1764
+ ApiField.SOURCE_BLOB: {
1765
+ ApiField.OFFSET_START: member.offset_data,
1766
+ ApiField.OFFSET_END: member.offset_data + member.size,
1767
+ },
1768
+ }
1769
+ batch.append(file_info)
1770
+
1771
+ # Yield batch when it reaches the specified size
1772
+ if len(batch) >= batch_size:
1773
+ processed_count += len(batch)
1774
+ logger.debug(
1775
+ f"Yielding batch of {len(batch)} files, processed {processed_count} files so far"
1776
+ )
1777
+ yield batch
1778
+ batch = []
1779
+
1780
+ # Yield any remaining files in the last batch
1781
+ if batch:
1782
+ processed_count += len(batch)
1783
+ logger.debug(
1784
+ f"Yielding final batch of {len(batch)} files, processed {processed_count} files total"
1785
+ )
1786
+ yield batch
1787
+
1788
+
1789
+ def save_blob_offsets_pkl(
1790
+ blob_file_path: str,
1791
+ output_dir: str,
1792
+ team_file_id: Optional[int] = None,
1793
+ filter_func: Optional[Callable] = None,
1794
+ batch_size: int = OFFSETS_PKL_BATCH_SIZE,
1795
+ replace: bool = False,
1796
+ ) -> str:
1797
+ """
1798
+ Processes blob file locally and creates a pickle file with offset information.
1799
+
1800
+ :param blob_file_path: Path to the local blob file
1801
+ :type blob_file_path: str
1802
+ :param output_dir: Path to the output directory
1803
+ :type output_dir: str
1804
+ :param team_file_id: ID of file in Team Files. Default is None.
1805
+ `team_file_id` may be None if it's not possible to obtain the ID at this moment.
1806
+ You can set the `team_file_id` later when uploading the file to Supervisely.
1807
+ :type team_file_id: Optional[int]
1808
+ :param filter_func: Function to filter files. The function should take a filename as input and return True if the file should be included.
1809
+ :type filter_func: Callable, optional
1810
+ :param batch_size: Number of files to process in each batch, defaults to 10000
1811
+ :type batch_size: int, optional
1812
+ :param replace: If True, overwrite the existing file if it exists.
1813
+ If False, skip processing if the file already exists and return its path.
1814
+ Default is False.
1815
+ :type replace: bool
1816
+ :returns: Path to the output pickle file
1817
+ :rtype: str
1818
+
1819
+ :Usage example:
1820
+
1821
+ .. code-block:: python
1822
+
1823
+ import supervisely as sly
1824
+
1825
+ archive_path = '/path/to/examples.tar'
1826
+ output_dir = '/path/to/output'
1827
+ sly.fs.save_blob_offsets_pkl(archive_path, output_dir)
1828
+ """
1829
+ from supervisely.api.image_api import BlobImageInfo
1830
+
1831
+ archive_name = Path(blob_file_path).stem
1832
+ output_path = os.path.join(output_dir, archive_name + OFFSETS_PKL_SUFFIX)
1833
+
1834
+ if file_exists(output_path):
1835
+ logger.debug(f"Offsets file already exists: {output_path}")
1836
+ if replace:
1837
+ logger.debug(f"Replacing existing offsets file: {output_path}")
1838
+ silent_remove(output_path)
1839
+ else:
1840
+ logger.debug(f"Skipping processing, using existing offsets file: {output_path}")
1841
+ return output_path
1842
+
1843
+ offsets_batch_generator = get_file_offsets_batch_generator(
1844
+ archive_path=blob_file_path,
1845
+ team_file_id=team_file_id,
1846
+ filter_func=filter_func,
1847
+ output_format="objects",
1848
+ batch_size=batch_size,
1849
+ )
1850
+
1851
+ BlobImageInfo.dump_to_pickle(offsets_batch_generator, output_path)
1852
+ return output_path
@@ -1,12 +1,11 @@
1
1
  import asyncio
2
2
  import os
3
- import shutil
4
3
  from typing import Callable, List, Optional, Tuple, Union
5
4
 
6
5
  from tqdm import tqdm
7
6
 
8
7
  from supervisely import get_project_class
9
- from supervisely._utils import get_or_create_event_loop, rand_str
8
+ from supervisely._utils import run_coroutine
10
9
  from supervisely.annotation.annotation import Annotation, ProjectMeta
11
10
  from supervisely.api.api import Api
12
11
  from supervisely.api.dataset_api import DatasetInfo
@@ -20,7 +19,7 @@ from supervisely.io.fs import (
20
19
  get_directory_size,
21
20
  remove_dir,
22
21
  )
23
- from supervisely.io.json import dump_json_file, load_json_file
22
+ from supervisely.io.json import load_json_file
24
23
  from supervisely.project import Project
25
24
  from supervisely.project.project import Dataset, OpenMode, ProjectType
26
25
  from supervisely.sly_logger import logger
@@ -46,7 +45,7 @@ def download(
46
45
  :type project_id: int
47
46
  :param dest_dir: Destination path to local directory.
48
47
  :type dest_dir: str
49
- :param dataset_ids: Specified list of Dataset IDs which will be downloaded. Datasets could be downloaded from different projects but with the same data type.
48
+ :param dataset_ids: Specified list of Dataset IDs which will be downloaded.
50
49
  :type dataset_ids: list(int), optional
51
50
  :param log_progress: Show downloading logs in the output.
52
51
  :type log_progress: bool
@@ -205,12 +204,7 @@ def download_async(
205
204
  progress_cb=progress_cb,
206
205
  **kwargs,
207
206
  )
208
- loop = get_or_create_event_loop()
209
- if loop.is_running():
210
- future = asyncio.run_coroutine_threadsafe(download_coro, loop=loop)
211
- future.result()
212
- else:
213
- loop.run_until_complete(download_coro)
207
+ run_coroutine(download_coro)
214
208
  else:
215
209
  raise NotImplementedError(f"Method download_async is not implemented for {project_class}")
216
210
 
@@ -254,12 +248,7 @@ def download_async_or_sync(
254
248
  progress_cb=progress_cb,
255
249
  **kwargs,
256
250
  )
257
- loop = get_or_create_event_loop()
258
- if loop.is_running():
259
- future = asyncio.run_coroutine_threadsafe(download_coro, loop=loop)
260
- future.result()
261
- else:
262
- loop.run_until_complete(download_coro)
251
+ run_coroutine(download_coro)
263
252
  except Exception as e:
264
253
  if kwargs.get("resume_download", False) is False:
265
254
  remove_dir(dest_dir)