supervisely 6.73.226__py3-none-any.whl → 6.73.227__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of supervisely might be problematic. Click here for more details.

@@ -2,6 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import asyncio
5
6
  import io
6
7
  import os
7
8
  import pickle
@@ -11,11 +12,18 @@ from collections import defaultdict, namedtuple
11
12
  from enum import Enum
12
13
  from typing import Callable, Dict, Generator, List, NamedTuple, Optional, Tuple, Union
13
14
 
15
+ import aiofiles
14
16
  import numpy as np
15
17
  from tqdm import tqdm
16
18
 
17
19
  import supervisely as sly
18
- from supervisely._utils import abs_url, batched, is_development, snake_to_human
20
+ from supervisely._utils import (
21
+ abs_url,
22
+ batched,
23
+ get_or_create_event_loop,
24
+ is_development,
25
+ snake_to_human,
26
+ )
19
27
  from supervisely.annotation.annotation import ANN_EXT, Annotation, TagCollection
20
28
  from supervisely.annotation.obj_class import ObjClass
21
29
  from supervisely.annotation.obj_class_collection import ObjClassCollection
@@ -29,6 +37,7 @@ from supervisely.geometry.bitmap import Bitmap
29
37
  from supervisely.imaging import image as sly_image
30
38
  from supervisely.io.fs import (
31
39
  copy_file,
40
+ copy_file_async,
32
41
  dir_empty,
33
42
  dir_exists,
34
43
  ensure_base_path,
@@ -41,7 +50,7 @@ from supervisely.io.fs import (
41
50
  subdirs_tree,
42
51
  )
43
52
  from supervisely.io.fs_cache import FileCache
44
- from supervisely.io.json import dump_json_file, load_json_file
53
+ from supervisely.io.json import dump_json_file, dump_json_file_async, load_json_file
45
54
  from supervisely.project.project_meta import ProjectMeta
46
55
  from supervisely.project.project_type import ProjectType
47
56
  from supervisely.sly_logger import logger
@@ -1123,6 +1132,20 @@ class Dataset(KeyObject):
1123
1132
  # item info named tuple (ImageInfo, VideoInfo, PointcloudInfo, ..)
1124
1133
  dump_json_file(item_info._asdict(), dst_info_path, indent=4)
1125
1134
 
1135
+ async def _add_item_info_async(self, item_name, item_info=None):
1136
+ if item_info is None:
1137
+ return
1138
+
1139
+ dst_info_path = self.get_item_info_path(item_name)
1140
+ ensure_base_path(dst_info_path)
1141
+ if type(item_info) is dict:
1142
+ dump_json_file(item_info, dst_info_path, indent=4)
1143
+ elif type(item_info) is str and os.path.isfile(item_info):
1144
+ shutil.copy(item_info, dst_info_path)
1145
+ else:
1146
+ # item info named tuple (ImageInfo, VideoInfo, PointcloudInfo, ..)
1147
+ dump_json_file(item_info._asdict(), dst_info_path, indent=4)
1148
+
1126
1149
  def _check_add_item_name(self, item_name):
1127
1150
  """
1128
1151
  Generate exception error if item name already exists in dataset or has unsupported extension
@@ -1153,6 +1176,76 @@ class Dataset(KeyObject):
1153
1176
  fout.write(item_raw_bytes)
1154
1177
  self._validate_added_item_or_die(dst_img_path)
1155
1178
 
1179
+ async def _add_item_raw_bytes_async(self, item_name, item_raw_bytes):
1180
+ """
1181
+ Write given binary object to dataset items directory, Generate exception error if item_name already exists in
1182
+ dataset or item name has unsupported extension. Make sure we actually received a valid image file, clean it up and fail if not so.
1183
+ :param item_name: str
1184
+ :param item_raw_bytes: binary object
1185
+ """
1186
+ if item_raw_bytes is None:
1187
+ return
1188
+
1189
+ self._check_add_item_name(item_name)
1190
+ item_name = item_name.strip("/")
1191
+ dst_img_path = os.path.join(self.item_dir, item_name)
1192
+ os.makedirs(os.path.dirname(dst_img_path), exist_ok=True)
1193
+ async with aiofiles.open(dst_img_path, "wb") as fout:
1194
+ await fout.write(item_raw_bytes)
1195
+
1196
+ self._validate_added_item_or_die(dst_img_path)
1197
+
1198
+ async def add_item_raw_bytes_async(
1199
+ self,
1200
+ item_name: str,
1201
+ item_raw_bytes: bytes,
1202
+ ann: Optional[Union[Annotation, str]] = None,
1203
+ img_info: Optional[Union[ImageInfo, Dict, str]] = None,
1204
+ ) -> None:
1205
+ """
1206
+ Adds given binary object as an image to dataset items directory, and adds given annotation to dataset ann directory.
1207
+ If ann is None, creates empty annotation file.
1208
+
1209
+ :param item_name: Item name.
1210
+ :type item_name: :class:`str`
1211
+ :param item_raw_bytes: Binary object.
1212
+ :type item_raw_bytes: :class:`bytes`
1213
+ :param ann: Annotation object or path to annotation json file.
1214
+ :type ann: :class:`Annotation<supervisely.annotation.annotation.Annotation>` or :class:`str`, optional
1215
+ :param img_info: ImageInfo object or ImageInfo object converted to dict or path to item info json file for copying to dataset item info directory.
1216
+ :type img_info: :class:`ImageInfo<supervisely.api.image_api.ImageInfo>` or :class:`dict` or :class:`str`, optional
1217
+ :return: None
1218
+ :rtype: NoneType
1219
+ :raises: :class:`RuntimeError` if item_name already exists in dataset or item name has unsupported extension
1220
+ :Usage example:
1221
+
1222
+ .. code-block:: python
1223
+
1224
+ import supervisely as sly
1225
+ dataset_path = "/home/admin/work/supervisely/projects/lemons_annotated/ds1"
1226
+ ds = sly.Dataset(dataset_path, sly.OpenMode.READ)
1227
+
1228
+ img_path = "/home/admin/Pictures/Clouds.jpeg"
1229
+ img_np = sly.image.read(img_path)
1230
+ img_bytes = sly.image.write_bytes(img_np, "jpeg")
1231
+ loop = sly.utils.get_or_create_event_loop()
1232
+ coroutine = ds.add_item_raw_bytes_async("IMG_050.jpeg", img_bytes)
1233
+ if loop.is_running():
1234
+ future = asyncio.run_coroutine_threadsafe(coroutine, loop)
1235
+ future.result()
1236
+ else:
1237
+ loop.run_until_complete(coroutine)
1238
+
1239
+ print(ds.item_exists("IMG_050.jpeg"))
1240
+ # Output: True
1241
+ """
1242
+ if item_raw_bytes is None and ann is None and img_info is None:
1243
+ raise RuntimeError("No item_raw_bytes or ann or img_info provided.")
1244
+
1245
+ await self._add_item_raw_bytes_async(item_name, item_raw_bytes)
1246
+ await self._add_ann_by_type_async(item_name, ann)
1247
+ self._add_item_info(item_name, img_info)
1248
+
1156
1249
  def generate_item_path(self, item_name: str) -> str:
1157
1250
  """
1158
1251
  Generates full path to the given item.
@@ -1230,6 +1323,18 @@ class Dataset(KeyObject):
1230
1323
  os.remove(item_path)
1231
1324
  raise
1232
1325
 
1326
+ async def _validate_added_item_or_die_async(self, item_path):
1327
+ """
1328
+ Make sure we actually received a valid image file, clean it up and fail if not so
1329
+ :param item_path: str
1330
+ """
1331
+ # Make sure we actually received a valid image file, clean it up and fail if not so.
1332
+ try:
1333
+ sly_image.validate_format(item_path)
1334
+ except (sly_image.UnsupportedImageFormat, sly_image.ImageReadException):
1335
+ os.remove(item_path)
1336
+ raise
1337
+
1233
1338
  def set_ann(self, item_name: str, ann: Annotation) -> None:
1234
1339
  """
1235
1340
  Replaces given annotation for given item name to dataset annotations directory in json format.
@@ -1449,6 +1554,241 @@ class Dataset(KeyObject):
1449
1554
  res = abs_url(res)
1450
1555
  return res
1451
1556
 
1557
+ async def set_ann_file_async(self, item_name: str, ann_path: str) -> None:
1558
+ """
1559
+ Replaces given annotation json file for given item name to dataset annotations directory in json format.
1560
+
1561
+ :param item_name: Item Name.
1562
+ :type item_name: :class:`str`
1563
+ :param ann_path: Path to the :class:`Annotation<supervisely.annotation.annotation.Annotation>` json file.
1564
+ :type ann_path: :class:`str`
1565
+ :return: None
1566
+ :rtype: NoneType
1567
+ :raises: :class:`RuntimeError` if ann_path is not str
1568
+ :Usage example:
1569
+
1570
+ .. code-block:: python
1571
+
1572
+ import supervisely as sly
1573
+ dataset_path = "/home/admin/work/supervisely/projects/lemons_annotated/ds1"
1574
+ ds = sly.Dataset(dataset_path, sly.OpenMode.READ)
1575
+ new_ann = "/home/admin/work/supervisely/projects/kiwi_annotated/ds1/ann/IMG_1812.jpeg.json"
1576
+
1577
+ loop = sly.utils.get_or_create_event_loop()
1578
+ coroutine = ds.set_ann_file_async("IMG_1812.jpeg", new_ann)
1579
+ if loop.is_running():
1580
+ future = asyncio.run_coroutine_threadsafe(coroutine, loop)
1581
+ future.result()
1582
+ else:
1583
+ loop.run_until_complete(coroutine)
1584
+ """
1585
+ if type(ann_path) is not str:
1586
+ raise TypeError("Annotation path should be a string, not a {}".format(type(ann_path)))
1587
+ dst_ann_path = self.get_ann_path(item_name)
1588
+ await copy_file_async(ann_path, dst_ann_path)
1589
+
1590
+ async def set_ann_dict_async(self, item_name: str, ann: Dict) -> None:
1591
+ """
1592
+ Replaces given annotation json for given item name to dataset annotations directory in json format.
1593
+
1594
+ :param item_name: Item name.
1595
+ :type item_name: :class:`str`
1596
+ :param ann: :class:`Annotation<supervisely.annotation.annotation.Annotation>` as a dict in json format.
1597
+ :type ann: :class:`dict`
1598
+ :return: None
1599
+ :rtype: NoneType
1600
+ :raises: :class:`RuntimeError` if ann_path is not str
1601
+ :Usage example:
1602
+
1603
+ .. code-block:: python
1604
+
1605
+ import supervisely as sly
1606
+ dataset_path = "/home/admin/work/supervisely/projects/lemons_annotated/ds1"
1607
+ ds = sly.Dataset(dataset_path, sly.OpenMode.READ)
1608
+
1609
+ new_ann_json = {
1610
+ "description":"",
1611
+ "size":{
1612
+ "height":500,
1613
+ "width":700
1614
+ },
1615
+ "tags":[],
1616
+ "objects":[],
1617
+ "customBigData":{}
1618
+ }
1619
+
1620
+ loop = sly.utils.get_or_create_event_loop()
1621
+ coroutine = ds.set_ann_dict_async("IMG_8888.jpeg", new_ann_json)
1622
+ if loop.is_running():
1623
+ future = asyncio.run_coroutine_threadsafe(coroutine, loop)
1624
+ future.result()
1625
+ else:
1626
+ loop.run_until_complete(coroutine)
1627
+ """
1628
+ if type(ann) is not dict:
1629
+ raise TypeError("Ann should be a dict, not a {}".format(type(ann)))
1630
+ dst_ann_path = self.get_ann_path(item_name)
1631
+ os.makedirs(os.path.dirname(dst_ann_path), exist_ok=True)
1632
+ await dump_json_file_async(ann, dst_ann_path, indent=4)
1633
+
1634
+ async def set_ann_async(self, item_name: str, ann: Annotation) -> None:
1635
+ """
1636
+ Replaces given annotation for given item name to dataset annotations directory in json format.
1637
+
1638
+ :param item_name: Item name.
1639
+ :type item_name: :class:`str`
1640
+ :param ann: Annotation object.
1641
+ :type ann: :class:`Annotation<supervisely.annotation.annotation.Annotation>`
1642
+ :return: None
1643
+ :rtype: NoneType
1644
+ :Usage example:
1645
+
1646
+ .. code-block:: python
1647
+
1648
+ import supervisely as sly
1649
+ dataset_path = "/home/admin/work/supervisely/projects/lemons_annotated/ds1"
1650
+ ds = sly.Dataset(dataset_path, sly.OpenMode.READ)
1651
+
1652
+ height, width = 500, 700
1653
+ new_ann = sly.Annotation((height, width))
1654
+ loop = sly.utils.get_or_create_event_loop()
1655
+ coroutine = ds.set_ann_async("IMG_0748.jpeg", new_ann)
1656
+ if loop.is_running():
1657
+ future = asyncio.run_coroutine_threadsafe(coroutine, loop)
1658
+ future.result()
1659
+ else:
1660
+ loop.run_until_complete(coroutine)
1661
+ """
1662
+ if type(ann) is not self.annotation_class:
1663
+ raise TypeError(
1664
+ f"Type of 'ann' should be {self.annotation_class.__name__}, not a {type(ann).__name__}"
1665
+ )
1666
+ dst_ann_path = self.get_ann_path(item_name)
1667
+ await dump_json_file_async(ann.to_json(), dst_ann_path, indent=4)
1668
+
1669
+ async def _add_ann_by_type_async(self, item_name, ann):
1670
+ """
1671
+ Add given annotation to dataset annotations dir and to dictionary items: item file name -> annotation file name
1672
+ :param item_name: str
1673
+ :param ann: Annotation class object, str, dict, None (generate exception error if param type is another)
1674
+ """
1675
+ # This is a new-style annotation name, so if there was no image with this name yet, there should not have been
1676
+ # an annotation either.
1677
+ self._item_to_ann[item_name] = item_name + ANN_EXT
1678
+ if ann is None:
1679
+ await self.set_ann_async(item_name, self._get_empty_annotaion(item_name))
1680
+ elif type(ann) is self.annotation_class:
1681
+ await self.set_ann_async(item_name, ann)
1682
+ elif type(ann) is str:
1683
+ await self.set_ann_file_async(item_name, ann)
1684
+ elif type(ann) is dict:
1685
+ await self.set_ann_dict_async(item_name, ann)
1686
+ else:
1687
+ raise TypeError("Unsupported type {!r} for ann argument".format(type(ann)))
1688
+
1689
+ async def _add_item_file_async(
1690
+ self, item_name, item_path, _validate_item=True, _use_hardlink=False
1691
+ ):
1692
+ """
1693
+ Add given item file to dataset items directory. Generate exception error if item_name already exists in dataset
1694
+ or item name has unsupported extension
1695
+ :param item_name: str
1696
+ :param item_path: str
1697
+ :param _validate_item: bool
1698
+ :param _use_hardlink: bool
1699
+ """
1700
+ if item_path is None:
1701
+ return
1702
+
1703
+ self._check_add_item_name(item_name)
1704
+ dst_item_path = os.path.join(self.item_dir, item_name)
1705
+ if (
1706
+ item_path != dst_item_path and item_path is not None
1707
+ ): # used only for agent + api during download project + None to optimize internal usage
1708
+ hardlink_done = False
1709
+ if _use_hardlink:
1710
+ try:
1711
+ loop = get_or_create_event_loop()
1712
+ await loop.run_in_executor(None, os.link, item_path, dst_item_path)
1713
+ hardlink_done = True
1714
+ except OSError:
1715
+ pass
1716
+ if not hardlink_done:
1717
+ await copy_file_async(item_path, dst_item_path)
1718
+ if _validate_item:
1719
+ await self._validate_added_item_or_die_async(item_path)
1720
+
1721
+ async def add_item_file_async(
1722
+ self,
1723
+ item_name: str,
1724
+ item_path: str,
1725
+ ann: Optional[Union[Annotation, str]] = None,
1726
+ _validate_item: Optional[bool] = True,
1727
+ _use_hardlink: Optional[bool] = False,
1728
+ item_info: Optional[Union[ImageInfo, Dict, str]] = None,
1729
+ img_info: Optional[Union[ImageInfo, Dict, str]] = None,
1730
+ ) -> None:
1731
+ """
1732
+ Adds given item file to dataset items directory, and adds given annotation to dataset annotations directory.
1733
+ If ann is None, creates empty annotation file.
1734
+
1735
+ :param item_name: Item name.
1736
+ :type item_name: :class:`str`
1737
+ :param item_path: Path to the item.
1738
+ :type item_path: :class:`str`
1739
+ :param ann: Annotation object or path to annotation json file.
1740
+ :type ann: :class:`Annotation<supervisely.annotation.annotation.Annotation>` or :class:`str`, optional
1741
+ :param _validate_item: Checks input files format.
1742
+ :type _validate_item: :class:`bool`, optional
1743
+ :param _use_hardlink: If True creates a hardlink pointing to src named dst, otherwise don't.
1744
+ :type _use_hardlink: :class:`bool`, optional
1745
+ :param item_info: ImageInfo object or ImageInfo object converted to dict or path to item info json file for copying to dataset item info directory.
1746
+ :type item_info: :class:`ImageInfo<supervisely.api.image_api.ImageInfo>` or :class:`dict` or :class:`str`, optional
1747
+ :param img_info: Deprecated version of item_info parameter. Can be removed in future versions.
1748
+ :type img_info: :class:`ImageInfo<supervisely.api.image_api.ImageInfo>` or :class:`dict` or :class:`str`, optional
1749
+ :return: None
1750
+ :rtype: NoneType
1751
+ :raises: :class:`RuntimeError` if item_name already exists in dataset or item name has unsupported extension.
1752
+ :Usage example:
1753
+
1754
+ .. code-block:: python
1755
+
1756
+ import supervisely as sly
1757
+ dataset_path = "/home/admin/work/supervisely/projects/lemons_annotated/ds1"
1758
+ ds = sly.Dataset(dataset_path, sly.OpenMode.READ)
1759
+
1760
+ ann = "/home/admin/work/supervisely/projects/lemons_annotated/ds1/ann/IMG_8888.jpeg.json"
1761
+ loop = sly.utils.get_or_create_event_loop()
1762
+ loop.run_until_complete(
1763
+ ds.add_item_file_async("IMG_8888.jpeg", "/home/admin/work/supervisely/projects/lemons_annotated/ds1/img/IMG_8888.jpeg", ann=ann)
1764
+ )
1765
+ print(ds.item_exists("IMG_8888.jpeg"))
1766
+ # Output: True
1767
+ """
1768
+ # item_path is None when image is cached
1769
+ if item_path is None and ann is None and img_info is None:
1770
+ raise RuntimeError("No item_path or ann or img_info provided.")
1771
+
1772
+ if item_info is not None and img_info is not None:
1773
+ raise RuntimeError(
1774
+ "At least one parameter of two (item_info and img_info) must be None."
1775
+ )
1776
+
1777
+ if img_info is not None:
1778
+ logger.warning(
1779
+ "img_info parameter of add_item_file() method is deprecated and can be removed in future versions. Use item_info parameter instead."
1780
+ )
1781
+ item_info = img_info
1782
+
1783
+ await self._add_item_file_async(
1784
+ item_name,
1785
+ item_path,
1786
+ _validate_item=_validate_item,
1787
+ _use_hardlink=_use_hardlink,
1788
+ )
1789
+ await self._add_ann_by_type_async(item_name, ann)
1790
+ await self._add_item_info_async(item_name, item_info)
1791
+
1452
1792
 
1453
1793
  class Project:
1454
1794
  """
@@ -2500,6 +2840,7 @@ class Project:
2500
2840
  save_image_info: Optional[bool] = False,
2501
2841
  save_images: bool = True,
2502
2842
  save_image_meta: bool = False,
2843
+ resume_download: bool = False,
2503
2844
  ) -> None:
2504
2845
  """
2505
2846
  Download project from Supervisely to the given directory.
@@ -2565,6 +2906,7 @@ class Project:
2565
2906
  save_image_info=save_image_info,
2566
2907
  save_images=save_images,
2567
2908
  save_image_meta=save_image_meta,
2909
+ resume_download=resume_download,
2568
2910
  )
2569
2911
 
2570
2912
  @staticmethod
@@ -3064,6 +3406,91 @@ class Project:
3064
3406
  progress_cb=progress_cb,
3065
3407
  )
3066
3408
 
3409
+ @staticmethod
3410
+ async def download_async(
3411
+ api: Api,
3412
+ project_id: int,
3413
+ dest_dir: str,
3414
+ dataset_ids: Optional[List[int]] = None,
3415
+ log_progress: bool = True,
3416
+ semaphore: asyncio.Semaphore = None,
3417
+ progress_cb: Optional[Union[tqdm, Callable]] = None,
3418
+ only_image_tags: Optional[bool] = False,
3419
+ save_image_info: Optional[bool] = False,
3420
+ save_images: bool = True,
3421
+ save_image_meta: bool = False,
3422
+ images_ids: Optional[List[int]] = None,
3423
+ resume_download: Optional[bool] = False,
3424
+ ) -> None:
3425
+ """
3426
+ Download project from Supervisely to the given directory in asynchronous mode.
3427
+
3428
+ :param api: Supervisely API address and token.
3429
+ :type api: :class:`Api<supervisely.api.api.Api>`
3430
+ :param project_id: Supervisely downloadable project ID.
3431
+ :type project_id: :class:`int`
3432
+ :param dest_dir: Destination directory.
3433
+ :type dest_dir: :class:`str`
3434
+ :param dataset_ids: Filter datasets by IDs.
3435
+ :type dataset_ids: :class:`list` [ :class:`int` ], optional
3436
+ :param log_progress: Show uploading progress bar.
3437
+ :type log_progress: :class:`bool`
3438
+ :param semaphore: Semaphore to limit the number of concurrent downloads of items.
3439
+ :type semaphore: :class:`asyncio.Semaphore`, optional
3440
+ :param progress_cb: Function for tracking download progress.
3441
+ :type progress_cb: tqdm or callable, optional
3442
+ :param only_image_tags: Download project with only images tags (without objects tags).
3443
+ :type only_image_tags: :class:`bool`, optional
3444
+ :param save_image_info: Download images infos or not.
3445
+ :type save_image_info: :class:`bool`, optional
3446
+ :param save_images: Download images or not.
3447
+ :type save_images: :class:`bool`, optional
3448
+ :param save_image_meta: Download images metadata in JSON format or not.
3449
+ :type save_image_meta: :class:`bool`, optional
3450
+ :param images_ids: Filter images by IDs.
3451
+ :type images_ids: :class:`list` [ :class:`int` ], optional
3452
+ :param resume_download: Resume download enables to download only missing files avoiding erase of existing files.
3453
+ :type resume_download: :class:`bool`, optional
3454
+ :return: None
3455
+ :rtype: NoneType
3456
+
3457
+ :Usage example:
3458
+
3459
+ .. code-block:: python
3460
+
3461
+ import supervisely as sly
3462
+
3463
+ os.environ['SERVER_ADDRESS'] = 'https://app.supervisely.com'
3464
+ os.environ['API_TOKEN'] = 'Your Supervisely API Token'
3465
+ api = sly.Api.from_env()
3466
+
3467
+ project_id = 8888
3468
+ save_directory = "/path/to/save/projects"
3469
+
3470
+ loop = sly.utils.get_or_create_event_loop()
3471
+ coro = sly.Project.download_async(api, project_id, save_directory)
3472
+ if loop.is_running():
3473
+ future = asyncio.run_coroutine_threadsafe(coroutine, loop)
3474
+ future.result()
3475
+ else:
3476
+ loop.run_until_complete(coroutine)
3477
+ """
3478
+ await _download_project_async(
3479
+ api=api,
3480
+ project_id=project_id,
3481
+ dest_dir=dest_dir,
3482
+ dataset_ids=dataset_ids,
3483
+ log_progress=log_progress,
3484
+ semaphore=semaphore,
3485
+ only_image_tags=only_image_tags,
3486
+ save_image_info=save_image_info,
3487
+ save_images=save_images,
3488
+ progress_cb=progress_cb,
3489
+ save_image_meta=save_image_meta,
3490
+ images_ids=images_ids,
3491
+ resume_download=resume_download,
3492
+ )
3493
+
3067
3494
 
3068
3495
  def read_single_project(
3069
3496
  dir: str,
@@ -3158,10 +3585,16 @@ def _download_project(
3158
3585
  progress_cb: Optional[Callable] = None,
3159
3586
  save_image_meta: Optional[bool] = False,
3160
3587
  images_ids: Optional[List[int]] = None,
3588
+ resume_download: Optional[bool] = False,
3161
3589
  ):
3162
3590
  dataset_ids = set(dataset_ids) if (dataset_ids is not None) else None
3163
- project_fs = Project(dest_dir, OpenMode.CREATE)
3591
+ project_fs = None
3164
3592
  meta = ProjectMeta.from_json(api.project.get_meta(project_id, with_settings=True))
3593
+ if os.path.exists(dest_dir) and resume_download:
3594
+ dump_json_file(meta.to_json(), os.path.join(dest_dir, "meta.json"))
3595
+ project_fs = Project(dest_dir, OpenMode.READ)
3596
+ if project_fs is None:
3597
+ project_fs = Project(dest_dir, OpenMode.CREATE)
3165
3598
  project_fs.set_meta(meta)
3166
3599
 
3167
3600
  if progress_cb is not None:
@@ -3171,19 +3604,20 @@ def _download_project(
3171
3604
  if only_image_tags is True:
3172
3605
  id_to_tagmeta = meta.tag_metas.get_id_mapping()
3173
3606
 
3174
- images_filter = None
3175
- if images_ids is not None:
3176
- images_filter = [{"field": "id", "operator": "in", "value": images_ids}]
3177
-
3607
+ existing_datasets = {dataset.path: dataset for dataset in project_fs.datasets}
3178
3608
  for parents, dataset in api.dataset.tree(project_id):
3179
3609
  dataset_path = Dataset._get_dataset_path(dataset.name, parents)
3180
3610
  dataset_id = dataset.id
3181
3611
  if dataset_ids is not None and dataset_id not in dataset_ids:
3182
3612
  continue
3183
3613
 
3184
- dataset_fs = project_fs.create_dataset(dataset.name, dataset_path)
3614
+ if dataset_path in existing_datasets:
3615
+ dataset_fs = existing_datasets[dataset_path]
3616
+ else:
3617
+ dataset_fs = project_fs.create_dataset(dataset.name, dataset_path)
3185
3618
 
3186
- images = api.image.get_list(dataset_id, filters=images_filter)
3619
+ all_images = api.image.get_list(dataset_id, force_metadata_for_links=False)
3620
+ images = [image for image in all_images if images_ids is None or image.id in images_ids]
3187
3621
  ds_total = len(images)
3188
3622
 
3189
3623
  ds_progress = progress_cb
@@ -3211,42 +3645,79 @@ def _download_project(
3211
3645
  image_ids = [image_info.id for image_info in batch]
3212
3646
  image_names = [image_info.name for image_info in batch]
3213
3647
 
3648
+ existing_image_infos: Dict[str, ImageInfo] = {}
3649
+ for image_name in image_names:
3650
+ try:
3651
+ image_info = dataset_fs.get_item_info(image_name)
3652
+ except:
3653
+ image_info = None
3654
+ existing_image_infos[image_name] = image_info
3655
+
3656
+ indexes_to_download = []
3657
+ for i, image_info in enumerate(batch):
3658
+ existing_image_info = existing_image_infos[image_info.name]
3659
+ if (
3660
+ existing_image_info is None
3661
+ or existing_image_info.updated_at != image_info.updated_at
3662
+ ):
3663
+ indexes_to_download.append(i)
3664
+
3214
3665
  # download images in numpy format
3215
- if save_images:
3216
- batch_imgs_bytes = api.image.download_bytes(dataset_id, image_ids)
3217
- else:
3218
- batch_imgs_bytes = [None] * len(image_ids)
3666
+ batch_imgs_bytes = [None] * len(image_ids)
3667
+ if save_images and indexes_to_download:
3668
+ for index, img in zip(
3669
+ indexes_to_download,
3670
+ api.image.download_bytes(
3671
+ dataset_id,
3672
+ [image_ids[i] for i in indexes_to_download],
3673
+ progress_cb=ds_progress,
3674
+ ),
3675
+ ):
3676
+ batch_imgs_bytes[index] = img
3219
3677
 
3220
- if log_progress or progress_cb is not None:
3221
- ds_progress(len(batch))
3678
+ if ds_progress is not None:
3679
+ ds_progress(len(batch) - len(indexes_to_download))
3222
3680
 
3223
3681
  # download annotations in json format
3682
+ ann_jsons = [None] * len(image_ids)
3224
3683
  if only_image_tags is False:
3225
- ann_infos = api.annotation.download_batch(
3226
- dataset_id, image_ids, progress_cb=anns_progress
3227
- )
3228
- ann_jsons = [ann_info.annotation for ann_info in ann_infos]
3684
+ if indexes_to_download:
3685
+ for index, ann_info in zip(
3686
+ indexes_to_download,
3687
+ api.annotation.download_batch(
3688
+ dataset_id,
3689
+ [image_ids[i] for i in indexes_to_download],
3690
+ progress_cb=anns_progress,
3691
+ ),
3692
+ ):
3693
+ ann_jsons[index] = ann_info.annotation
3229
3694
  else:
3230
- ann_jsons = []
3231
- for image_info in batch:
3232
- # pylint: disable=possibly-used-before-assignment
3233
- tags = TagCollection.from_api_response(
3234
- image_info.tags,
3235
- meta.tag_metas,
3236
- id_to_tagmeta,
3237
- )
3238
- tmp_ann = Annotation(
3239
- img_size=(image_info.height, image_info.width), img_tags=tags
3240
- )
3241
- ann_jsons.append(tmp_ann.to_json())
3695
+ if indexes_to_download:
3696
+ for index in indexes_to_download:
3697
+ image_info = batch[index]
3698
+ tags = TagCollection.from_api_response(
3699
+ image_info.tags,
3700
+ meta.tag_metas,
3701
+ id_to_tagmeta,
3702
+ )
3703
+ tmp_ann = Annotation(
3704
+ img_size=(image_info.height, image_info.width), img_tags=tags
3705
+ )
3706
+ ann_jsons[index] = tmp_ann.to_json()
3707
+ if anns_progress is not None:
3708
+ anns_progress(len(indexes_to_download))
3709
+ if anns_progress is not None:
3710
+ anns_progress(len(batch) - len(indexes_to_download))
3242
3711
 
3243
3712
  for img_info, name, img_bytes, ann in zip(
3244
3713
  batch, image_names, batch_imgs_bytes, ann_jsons
3245
3714
  ):
3715
+ dataset_fs: Dataset
3716
+
3246
3717
  dataset_fs.add_item_raw_bytes(
3247
3718
  item_name=name,
3248
3719
  item_raw_bytes=img_bytes if save_images is True else None,
3249
- ann=ann,
3720
+ ann=dataset_fs.get_ann(name) if ann is None else ann,
3250
3721
  img_info=img_info if save_image_info is True else None,
3251
3722
  )
3252
3723
 
@@ -3258,6 +3729,12 @@ def _download_project(
3258
3729
  sly.json.dump_json_file(
3259
3730
  image_info.meta, dataset_fs.get_item_meta_path(image_info.name)
3260
3731
  )
3732
+
3733
+ # delete redundant items
3734
+ items_names_set = set([img.name for img in all_images])
3735
+ for item_name in dataset_fs.get_items_names():
3736
+ if item_name not in items_names_set:
3737
+ dataset_fs.delete_item(item_name)
3261
3738
  try:
3262
3739
  create_readme(dest_dir, project_id, api)
3263
3740
  except Exception as e:
@@ -3321,7 +3798,7 @@ def upload_project(
3321
3798
  img_paths = list(filter(lambda x: os.path.isfile(x), img_paths))
3322
3799
  ann_paths = list(filter(lambda x: os.path.isfile(x), ann_paths))
3323
3800
  metas = [{} for _ in names]
3324
-
3801
+
3325
3802
  if img_paths == []:
3326
3803
  # Dataset is empty
3327
3804
  continue
@@ -3423,6 +3900,7 @@ def download_project(
3423
3900
  save_images: bool = True,
3424
3901
  save_image_meta: bool = False,
3425
3902
  images_ids: Optional[List[int]] = None,
3903
+ resume_download: Optional[bool] = False,
3426
3904
  ) -> None:
3427
3905
  """
3428
3906
  Download image project to the local directory.
@@ -3451,7 +3929,10 @@ def download_project(
3451
3929
  :type save_images, bool, optional
3452
3930
  :param save_image_meta: Include images metadata in JSON format in the download.
3453
3931
  :type save_imgge_meta: bool, optional
3454
-
3932
+ :param images_ids: Specified list of Image IDs which will be downloaded.
3933
+ :type images_ids: list(int), optional
3934
+ :param resume_download: Resume download enables to download only missing files avoiding erase of existing files.
3935
+ :type resume_download: bool, optional
3455
3936
  :return: None.
3456
3937
  :rtype: NoneType
3457
3938
  :Usage example:
@@ -3502,6 +3983,7 @@ def download_project(
3502
3983
  progress_cb=progress_cb,
3503
3984
  save_image_meta=save_image_meta,
3504
3985
  images_ids=images_ids,
3986
+ resume_download=resume_download,
3505
3987
  )
3506
3988
  else:
3507
3989
  _download_project_optimized(
@@ -3530,7 +4012,7 @@ def _download_project_optimized(
3530
4012
  save_image_info=False,
3531
4013
  save_images=True,
3532
4014
  log_progress=True,
3533
- images_ids:List[int]=None,
4015
+ images_ids: List[int] = None,
3534
4016
  ):
3535
4017
  project_info = api.project.get_info_by_id(project_id)
3536
4018
  project_id = project_info.id
@@ -3889,4 +4371,163 @@ def _dataset_structure_md(
3889
4371
  return result_md
3890
4372
 
3891
4373
 
4374
+ async def _download_project_async(
4375
+ api: sly.Api,
4376
+ project_id: int,
4377
+ dest_dir: str,
4378
+ dataset_ids: Optional[List[int]] = None,
4379
+ log_progress: bool = True,
4380
+ semaphore: asyncio.Semaphore = None,
4381
+ only_image_tags: Optional[bool] = False,
4382
+ save_image_info: Optional[bool] = False,
4383
+ save_images: Optional[bool] = True,
4384
+ progress_cb: Optional[Callable] = None,
4385
+ save_image_meta: Optional[bool] = False,
4386
+ images_ids: Optional[List[int]] = None,
4387
+ resume_download: Optional[bool] = False,
4388
+ ):
4389
+ if semaphore is None:
4390
+ semaphore = api._get_default_semaphore()
4391
+
4392
+ dataset_ids = set(dataset_ids) if (dataset_ids is not None) else None
4393
+ project_fs = None
4394
+ meta = ProjectMeta.from_json(api.project.get_meta(project_id, with_settings=True))
4395
+ if os.path.exists(dest_dir) and resume_download:
4396
+ dump_json_file(meta.to_json(), os.path.join(dest_dir, "meta.json"))
4397
+ project_fs = Project(dest_dir, OpenMode.READ)
4398
+ if project_fs is None:
4399
+ project_fs = Project(dest_dir, OpenMode.CREATE)
4400
+ project_fs.set_meta(meta)
4401
+
4402
+ if progress_cb is not None:
4403
+ log_progress = False
4404
+
4405
+ id_to_tagmeta = None
4406
+ if only_image_tags is True:
4407
+ id_to_tagmeta = meta.tag_metas.get_id_mapping()
4408
+
4409
+ existing_datasets = {dataset.path: dataset for dataset in project_fs.datasets}
4410
+ for parents, dataset in api.dataset.tree(project_id):
4411
+ dataset_path = Dataset._get_dataset_path(dataset.name, parents)
4412
+ dataset_id = dataset.id
4413
+ if dataset_ids is not None and dataset_id not in dataset_ids:
4414
+ continue
4415
+
4416
+ if dataset_path in existing_datasets:
4417
+ dataset_fs = existing_datasets[dataset_path]
4418
+ else:
4419
+ dataset_fs = project_fs.create_dataset(dataset.name, dataset_path)
4420
+
4421
+ all_images = api.image.get_list(dataset_id, force_metadata_for_links=False)
4422
+ images = [image for image in all_images if images_ids is None or image.id in images_ids]
4423
+
4424
+ ds_progress = progress_cb
4425
+ if log_progress is True:
4426
+ ds_progress = tqdm_sly(
4427
+ desc="Downloading images from {!r}".format(dataset.name),
4428
+ total=len(images),
4429
+ )
4430
+
4431
+ with ApiContext(
4432
+ api,
4433
+ project_id=project_id,
4434
+ dataset_id=dataset_id,
4435
+ project_meta=meta,
4436
+ ):
4437
+ tasks = []
4438
+ for image in images:
4439
+ try:
4440
+ existing = dataset_fs.get_item_info(image.name)
4441
+ except:
4442
+ existing = None
4443
+ else:
4444
+ if existing.updated_at == image.updated_at:
4445
+ if ds_progress is not None:
4446
+ ds_progress(1)
4447
+ continue
4448
+
4449
+ task = _download_project_item_async(
4450
+ api=api,
4451
+ img_info=image,
4452
+ meta=meta,
4453
+ dataset_fs=dataset_fs,
4454
+ id_to_tagmeta=id_to_tagmeta,
4455
+ semaphore=semaphore,
4456
+ save_images=save_images,
4457
+ save_image_info=save_image_info,
4458
+ only_image_tags=only_image_tags,
4459
+ progress_cb=ds_progress,
4460
+ )
4461
+ tasks.append(task)
4462
+ await asyncio.gather(*tasks)
4463
+ if save_image_meta:
4464
+ meta_dir = dataset_fs.meta_dir
4465
+ for image_info in images:
4466
+ if image_info.meta:
4467
+ sly.fs.mkdir(meta_dir)
4468
+ sly.json.dump_json_file(
4469
+ image_info.meta, dataset_fs.get_item_meta_path(image_info.name)
4470
+ )
4471
+
4472
+ # delete redundant items
4473
+ items_names_set = set([img.name for img in all_images])
4474
+ for item_name in dataset_fs.get_items_names():
4475
+ if item_name not in items_names_set:
4476
+ dataset_fs.delete_item(item_name)
4477
+ try:
4478
+ create_readme(dest_dir, project_id, api)
4479
+ except Exception as e:
4480
+ logger.info(f"There was an error while creating README: {e}")
4481
+
4482
+
4483
+ async def _download_project_item_async(
4484
+ api: sly.Api,
4485
+ img_info: sly.ImageInfo,
4486
+ meta: ProjectMeta,
4487
+ dataset_fs: Dataset,
4488
+ id_to_tagmeta: Dict[int, sly.TagMeta],
4489
+ semaphore: asyncio.Semaphore,
4490
+ save_images: bool,
4491
+ save_image_info: bool,
4492
+ only_image_tags: bool,
4493
+ progress_cb: Optional[Callable],
4494
+ ) -> None:
4495
+ """Download image and annotation from Supervisely API and save it to the local filesystem.
4496
+ Uses parameters from the parent function _download_project_async.
4497
+ """
4498
+ if save_images:
4499
+ img_bytes = await api.image.download_bytes_single_async(
4500
+ img_info.id, semaphore=semaphore, check_hash=True
4501
+ )
4502
+ else:
4503
+ img_bytes = None
4504
+
4505
+ if only_image_tags is False:
4506
+ ann_info = await api.annotation.download_async(
4507
+ img_info.id,
4508
+ semaphore=semaphore,
4509
+ force_metadata_for_links=False,
4510
+ )
4511
+ ann_json = ann_info.annotation
4512
+ else:
4513
+ tags = TagCollection.from_api_response(
4514
+ img_info.tags,
4515
+ meta.tag_metas,
4516
+ id_to_tagmeta,
4517
+ )
4518
+ tmp_ann = Annotation(img_size=(img_info.height, img_info.width), img_tags=tags)
4519
+ ann_json = tmp_ann.to_json()
4520
+
4521
+ if dataset_fs.item_exists(img_info.name):
4522
+ dataset_fs.delete_item(img_info.name)
4523
+ await dataset_fs.add_item_raw_bytes_async(
4524
+ item_name=img_info.name,
4525
+ item_raw_bytes=img_bytes if save_images is True else None,
4526
+ ann=ann_json,
4527
+ img_info=img_info if save_image_info is True else None,
4528
+ )
4529
+ if progress_cb is not None:
4530
+ progress_cb(1)
4531
+
4532
+
3892
4533
  DatasetDict = Project.DatasetDict