supervisely 6.73.342__py3-none-any.whl → 6.73.344__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- supervisely/__init__.py +3 -2
- supervisely/_utils.py +33 -1
- supervisely/api/annotation_api.py +369 -2
- supervisely/api/api.py +14 -5
- supervisely/api/dataset_api.py +177 -1
- supervisely/api/entity_annotation/figure_api.py +84 -0
- supervisely/api/file_api.py +2 -2
- supervisely/api/image_api.py +740 -52
- supervisely/api/module_api.py +11 -0
- supervisely/api/project_api.py +6 -1
- supervisely/convert/converter.py +4 -0
- supervisely/convert/image/image_helper.py +2 -2
- supervisely/convert/image/sly/sly_image_converter.py +30 -1
- supervisely/io/fs.py +238 -4
- supervisely/project/download.py +5 -16
- supervisely/project/project.py +636 -75
- supervisely/project/project_type.py +2 -0
- supervisely/project/readme_template.md +19 -13
- {supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/METADATA +1 -1
- {supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/RECORD +24 -24
- {supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/LICENSE +0 -0
- {supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/WHEEL +0 -0
- {supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/entry_points.txt +0 -0
- {supervisely-6.73.342.dist-info → supervisely-6.73.344.dist-info}/top_level.txt +0 -0
supervisely/project/project.py
CHANGED
|
@@ -31,16 +31,22 @@ import supervisely as sly
|
|
|
31
31
|
from supervisely._utils import (
|
|
32
32
|
abs_url,
|
|
33
33
|
batched,
|
|
34
|
-
generate_free_name,
|
|
35
34
|
get_or_create_event_loop,
|
|
36
35
|
is_development,
|
|
36
|
+
removesuffix,
|
|
37
37
|
snake_to_human,
|
|
38
38
|
)
|
|
39
39
|
from supervisely.annotation.annotation import ANN_EXT, Annotation, TagCollection
|
|
40
40
|
from supervisely.annotation.obj_class import ObjClass
|
|
41
41
|
from supervisely.annotation.obj_class_collection import ObjClassCollection
|
|
42
|
-
from supervisely.api.api import Api, ApiContext
|
|
43
|
-
from supervisely.api.image_api import
|
|
42
|
+
from supervisely.api.api import Api, ApiContext, ApiField
|
|
43
|
+
from supervisely.api.image_api import (
|
|
44
|
+
OFFSETS_PKL_BATCH_SIZE,
|
|
45
|
+
OFFSETS_PKL_SUFFIX,
|
|
46
|
+
BlobImageInfo,
|
|
47
|
+
ImageInfo,
|
|
48
|
+
)
|
|
49
|
+
from supervisely.api.project_api import ProjectInfo
|
|
44
50
|
from supervisely.collection.key_indexed_collection import (
|
|
45
51
|
KeyIndexedCollection,
|
|
46
52
|
KeyObject,
|
|
@@ -68,7 +74,9 @@ from supervisely.io.json import dump_json_file, dump_json_file_async, load_json_
|
|
|
68
74
|
from supervisely.project.project_meta import ProjectMeta
|
|
69
75
|
from supervisely.project.project_type import ProjectType
|
|
70
76
|
from supervisely.sly_logger import logger
|
|
71
|
-
from supervisely.task.progress import
|
|
77
|
+
from supervisely.task.progress import tqdm_sly
|
|
78
|
+
|
|
79
|
+
TF_BLOB_DIR = "blob-files" # directory for project blob files in team files
|
|
72
80
|
|
|
73
81
|
|
|
74
82
|
class CustomUnpickler(pickle.Unpickler):
|
|
@@ -224,6 +232,7 @@ class Dataset(KeyObject):
|
|
|
224
232
|
seg_dir_name = "seg"
|
|
225
233
|
meta_dir_name = "meta"
|
|
226
234
|
datasets_dir_name = "datasets"
|
|
235
|
+
blob_dir_name = "blob"
|
|
227
236
|
|
|
228
237
|
def __init__(
|
|
229
238
|
self,
|
|
@@ -273,6 +282,7 @@ class Dataset(KeyObject):
|
|
|
273
282
|
self._project_dir = project_dir
|
|
274
283
|
self._name = full_ds_name
|
|
275
284
|
self._short_name = short_ds_name
|
|
285
|
+
self._blob_offset_paths = []
|
|
276
286
|
|
|
277
287
|
if self.dataset_id is not None:
|
|
278
288
|
self._read_api()
|
|
@@ -537,6 +547,23 @@ class Dataset(KeyObject):
|
|
|
537
547
|
"""
|
|
538
548
|
return os.path.join(self.directory, self.meta_dir_name)
|
|
539
549
|
|
|
550
|
+
@property
|
|
551
|
+
def blob_offsets(self):
|
|
552
|
+
"""
|
|
553
|
+
List of paths to the dataset blob offset files.
|
|
554
|
+
|
|
555
|
+
:return: List of paths to the dataset blob offset files.
|
|
556
|
+
:rtype: :class:`List[str]`
|
|
557
|
+
"""
|
|
558
|
+
return self._blob_offset_paths
|
|
559
|
+
|
|
560
|
+
@blob_offsets.setter
|
|
561
|
+
def blob_offsets(self, value: List[str]):
|
|
562
|
+
"""
|
|
563
|
+
Set the list of paths to the dataset blob offset files.
|
|
564
|
+
"""
|
|
565
|
+
self._blob_offset_paths = value
|
|
566
|
+
|
|
540
567
|
@classmethod
|
|
541
568
|
def _has_valid_ext(cls, path: str) -> bool:
|
|
542
569
|
"""
|
|
@@ -563,6 +590,23 @@ class Dataset(KeyObject):
|
|
|
563
590
|
raw_ann_names = set(os.path.basename(path) for path in raw_ann_paths)
|
|
564
591
|
img_names = [os.path.basename(path) for path in img_paths]
|
|
565
592
|
|
|
593
|
+
blob_offset_paths = list_files(
|
|
594
|
+
self.directory, filter_fn=lambda x: x.endswith(OFFSETS_PKL_SUFFIX)
|
|
595
|
+
)
|
|
596
|
+
has_blob_offsets = len(blob_offset_paths) > 0
|
|
597
|
+
|
|
598
|
+
# If we have blob offset files, add the image names from those
|
|
599
|
+
if has_blob_offsets:
|
|
600
|
+
self.blob_offsets = blob_offset_paths
|
|
601
|
+
for offset_file_path in self.blob_offsets:
|
|
602
|
+
try:
|
|
603
|
+
blob_img_info_lists = BlobImageInfo.load_from_pickle_generator(offset_file_path)
|
|
604
|
+
for blob_img_info_list in blob_img_info_lists:
|
|
605
|
+
for blob_img_info in blob_img_info_list:
|
|
606
|
+
img_names.append(blob_img_info.name)
|
|
607
|
+
except Exception as e:
|
|
608
|
+
logger.warning(f"Failed to read blob offset file {offset_file_path}: {str(e)}")
|
|
609
|
+
|
|
566
610
|
if len(img_names) == 0 and len(raw_ann_names) == 0:
|
|
567
611
|
logger.info("Dataset {!r} is empty".format(self.name))
|
|
568
612
|
# raise RuntimeError("Dataset {!r} is empty".format(self.name))
|
|
@@ -1308,7 +1352,7 @@ class Dataset(KeyObject):
|
|
|
1308
1352
|
|
|
1309
1353
|
img_path = "/home/admin/Pictures/Clouds.jpeg"
|
|
1310
1354
|
img_np = sly.image.read(img_path)
|
|
1311
|
-
img_bytes = sly.image.write_bytes(img_np, "jpeg")
|
|
1355
|
+
img_bytes = sly.image.write_bytes(img_np, "jpeg")
|
|
1312
1356
|
coroutine = ds.add_item_raw_bytes_async("IMG_050.jpeg", img_bytes)
|
|
1313
1357
|
run_coroutine(coroutine)
|
|
1314
1358
|
|
|
@@ -1691,7 +1735,7 @@ class Dataset(KeyObject):
|
|
|
1691
1735
|
"objects":[],
|
|
1692
1736
|
"customBigData":{}
|
|
1693
1737
|
}
|
|
1694
|
-
|
|
1738
|
+
|
|
1695
1739
|
coroutine = ds.set_ann_dict_async("IMG_8888.jpeg", new_ann_json)
|
|
1696
1740
|
run_coroutine(coroutine)
|
|
1697
1741
|
"""
|
|
@@ -1723,7 +1767,7 @@ class Dataset(KeyObject):
|
|
|
1723
1767
|
|
|
1724
1768
|
height, width = 500, 700
|
|
1725
1769
|
new_ann = sly.Annotation((height, width))
|
|
1726
|
-
|
|
1770
|
+
|
|
1727
1771
|
coroutine = ds.set_ann_async("IMG_0748.jpeg", new_ann)
|
|
1728
1772
|
run_coroutine(coroutine)
|
|
1729
1773
|
"""
|
|
@@ -2036,6 +2080,7 @@ class Project:
|
|
|
2036
2080
|
"""
|
|
2037
2081
|
|
|
2038
2082
|
dataset_class = Dataset
|
|
2083
|
+
blob_dir_name = "blob"
|
|
2039
2084
|
|
|
2040
2085
|
class DatasetDict(KeyIndexedCollection):
|
|
2041
2086
|
"""
|
|
@@ -2075,6 +2120,7 @@ class Project:
|
|
|
2075
2120
|
|
|
2076
2121
|
parent_dir, name = Project._parse_path(directory)
|
|
2077
2122
|
self._parent_dir = parent_dir
|
|
2123
|
+
self._blob_dir = os.path.join(directory, self.blob_dir_name)
|
|
2078
2124
|
self._api = api
|
|
2079
2125
|
self.project_id = project_id
|
|
2080
2126
|
|
|
@@ -2086,7 +2132,7 @@ class Project:
|
|
|
2086
2132
|
self._name = name
|
|
2087
2133
|
self._datasets = Project.DatasetDict() # ds_name -> dataset object
|
|
2088
2134
|
self._meta = None
|
|
2089
|
-
|
|
2135
|
+
self._blob_files = []
|
|
2090
2136
|
if project_id is not None:
|
|
2091
2137
|
self._read_api()
|
|
2092
2138
|
elif mode is OpenMode.READ:
|
|
@@ -2138,6 +2184,25 @@ class Project:
|
|
|
2138
2184
|
"""
|
|
2139
2185
|
return self._parent_dir
|
|
2140
2186
|
|
|
2187
|
+
@property
|
|
2188
|
+
def blob_dir(self) -> str:
|
|
2189
|
+
"""
|
|
2190
|
+
Directory for project blobs.
|
|
2191
|
+
Blobs are .tar files with images. Used for fast data transfer.
|
|
2192
|
+
|
|
2193
|
+
:return: Path to project blob directory
|
|
2194
|
+
:rtype: :class:`str`
|
|
2195
|
+
:Usage example:
|
|
2196
|
+
|
|
2197
|
+
.. code-block:: python
|
|
2198
|
+
|
|
2199
|
+
import supervisely as sly
|
|
2200
|
+
project = sly.Project("/home/admin/work/supervisely/projects/lemons_annotated", sly.OpenMode.READ)
|
|
2201
|
+
print(project.blob_dir)
|
|
2202
|
+
# Output: '/home/admin/work/supervisely/projects/lemons_annotated/blob'
|
|
2203
|
+
"""
|
|
2204
|
+
return self._blob_dir
|
|
2205
|
+
|
|
2141
2206
|
@property
|
|
2142
2207
|
def name(self) -> str:
|
|
2143
2208
|
"""
|
|
@@ -2259,6 +2324,61 @@ class Project:
|
|
|
2259
2324
|
"""
|
|
2260
2325
|
return sum(len(ds) for ds in self._datasets)
|
|
2261
2326
|
|
|
2327
|
+
@property
|
|
2328
|
+
def blob_files(self) -> List[str]:
|
|
2329
|
+
"""
|
|
2330
|
+
List of blob files.
|
|
2331
|
+
|
|
2332
|
+
:return: List of blob files
|
|
2333
|
+
:rtype: :class:`list`
|
|
2334
|
+
:Usage example:
|
|
2335
|
+
|
|
2336
|
+
.. code-block:: python
|
|
2337
|
+
|
|
2338
|
+
import supervisely as sly
|
|
2339
|
+
project = sly.Project("/home/admin/work/supervisely/projects/lemons_annotated", sly.OpenMode.READ)
|
|
2340
|
+
print(project.blob_files)
|
|
2341
|
+
# Output: []
|
|
2342
|
+
"""
|
|
2343
|
+
return self._blob_files
|
|
2344
|
+
|
|
2345
|
+
@blob_files.setter
|
|
2346
|
+
def blob_files(self, blob_files: List[str]) -> None:
|
|
2347
|
+
"""
|
|
2348
|
+
Sets blob files to the project.
|
|
2349
|
+
|
|
2350
|
+
:param blob_files: List of blob files.
|
|
2351
|
+
:type
|
|
2352
|
+
:return: None
|
|
2353
|
+
:rtype: NoneType
|
|
2354
|
+
:Usage example:
|
|
2355
|
+
|
|
2356
|
+
.. code-block:: python
|
|
2357
|
+
|
|
2358
|
+
import supervisely as sly
|
|
2359
|
+
project = sly.Project("/home/admin/work/supervisely/projects/lemons_annotated", sly.OpenMode.READ)
|
|
2360
|
+
project.blob_files = ["blob_file.tar"]
|
|
2361
|
+
"""
|
|
2362
|
+
self._blob_files = blob_files
|
|
2363
|
+
|
|
2364
|
+
def add_blob_file(self, file_name: str) -> None:
|
|
2365
|
+
"""
|
|
2366
|
+
Adds blob file to the project.
|
|
2367
|
+
|
|
2368
|
+
:param file_name: File name.
|
|
2369
|
+
:type file_name: :class:`str`
|
|
2370
|
+
:return: None
|
|
2371
|
+
:rtype: NoneType
|
|
2372
|
+
:Usage example:
|
|
2373
|
+
|
|
2374
|
+
.. code-block:: python
|
|
2375
|
+
|
|
2376
|
+
import supervisely as sly
|
|
2377
|
+
project = sly.Project("/home/admin/work/supervisely/projects/lemons_annotated", sly.OpenMode.READ)
|
|
2378
|
+
project.add_blob_file("blob_file.tar")
|
|
2379
|
+
"""
|
|
2380
|
+
self._blob_files.append(file_name)
|
|
2381
|
+
|
|
2262
2382
|
def get_classes_stats(
|
|
2263
2383
|
self,
|
|
2264
2384
|
dataset_names: Optional[List[str]] = None,
|
|
@@ -2296,6 +2416,10 @@ class Project:
|
|
|
2296
2416
|
def _read(self):
|
|
2297
2417
|
meta_json = load_json_file(self._get_project_meta_path())
|
|
2298
2418
|
self._meta = ProjectMeta.from_json(meta_json)
|
|
2419
|
+
if dir_exists(self.blob_dir):
|
|
2420
|
+
self.blob_files = [Path(file).name for file in list_files(self.blob_dir)]
|
|
2421
|
+
else:
|
|
2422
|
+
self.blob_files = []
|
|
2299
2423
|
|
|
2300
2424
|
ignore_dirs = self.dataset_class.ignorable_dirs() # dir names that can not be datasets
|
|
2301
2425
|
|
|
@@ -2350,6 +2474,7 @@ class Project:
|
|
|
2350
2474
|
else:
|
|
2351
2475
|
mkdir(self.directory)
|
|
2352
2476
|
self.set_meta(ProjectMeta())
|
|
2477
|
+
self.blob_files = []
|
|
2353
2478
|
|
|
2354
2479
|
def validate(self):
|
|
2355
2480
|
# @TODO: remove?
|
|
@@ -3085,6 +3210,7 @@ class Project:
|
|
|
3085
3210
|
save_images: bool = True,
|
|
3086
3211
|
save_image_meta: bool = False,
|
|
3087
3212
|
resume_download: bool = False,
|
|
3213
|
+
**kwargs,
|
|
3088
3214
|
) -> None:
|
|
3089
3215
|
"""
|
|
3090
3216
|
Download project from Supervisely to the given directory.
|
|
@@ -3113,6 +3239,9 @@ class Project:
|
|
|
3113
3239
|
:type save_images: :class:`bool`, optional
|
|
3114
3240
|
:param save_image_meta: Download images metadata in JSON format or not.
|
|
3115
3241
|
:type save_image_meta: :class:`bool`, optional
|
|
3242
|
+
:param download_blob_files: Default is False. It will download images in classic way.
|
|
3243
|
+
If True, it will download blob files, if they are present in the project, to optimize download process.
|
|
3244
|
+
:type download_blob_files: bool, optional
|
|
3116
3245
|
:return: None
|
|
3117
3246
|
:rtype: NoneType
|
|
3118
3247
|
:Usage example:
|
|
@@ -3151,6 +3280,7 @@ class Project:
|
|
|
3151
3280
|
save_images=save_images,
|
|
3152
3281
|
save_image_meta=save_image_meta,
|
|
3153
3282
|
resume_download=resume_download,
|
|
3283
|
+
**kwargs,
|
|
3154
3284
|
)
|
|
3155
3285
|
|
|
3156
3286
|
@staticmethod
|
|
@@ -3731,7 +3861,7 @@ class Project:
|
|
|
3731
3861
|
|
|
3732
3862
|
project_id = 8888
|
|
3733
3863
|
save_directory = "/path/to/save/projects"
|
|
3734
|
-
|
|
3864
|
+
|
|
3735
3865
|
coroutine = sly.Project.download_async(api, project_id, save_directory)
|
|
3736
3866
|
run_coroutine(coroutine)
|
|
3737
3867
|
"""
|
|
@@ -3755,6 +3885,7 @@ class Project:
|
|
|
3755
3885
|
save_image_meta=save_image_meta,
|
|
3756
3886
|
images_ids=images_ids,
|
|
3757
3887
|
resume_download=resume_download,
|
|
3888
|
+
**kwargs,
|
|
3758
3889
|
)
|
|
3759
3890
|
|
|
3760
3891
|
def to_coco(
|
|
@@ -4002,9 +4133,13 @@ def _download_project(
|
|
|
4002
4133
|
save_image_meta: Optional[bool] = False,
|
|
4003
4134
|
images_ids: Optional[List[int]] = None,
|
|
4004
4135
|
resume_download: Optional[bool] = False,
|
|
4136
|
+
**kwargs,
|
|
4005
4137
|
):
|
|
4138
|
+
download_blob_files = kwargs.pop("download_blob_files", False)
|
|
4139
|
+
|
|
4006
4140
|
dataset_ids = set(dataset_ids) if (dataset_ids is not None) else None
|
|
4007
4141
|
project_fs = None
|
|
4142
|
+
|
|
4008
4143
|
meta = ProjectMeta.from_json(api.project.get_meta(project_id, with_settings=True))
|
|
4009
4144
|
if os.path.exists(dest_dir) and resume_download:
|
|
4010
4145
|
dump_json_file(meta.to_json(), os.path.join(dest_dir, "meta.json"))
|
|
@@ -4029,6 +4164,7 @@ def _download_project(
|
|
|
4029
4164
|
|
|
4030
4165
|
existing_datasets = {dataset.path: dataset for dataset in project_fs.datasets}
|
|
4031
4166
|
for parents, dataset in api.dataset.tree(project_id):
|
|
4167
|
+
blob_files_to_download = {}
|
|
4032
4168
|
dataset_path = Dataset._get_dataset_path(dataset.name, parents)
|
|
4033
4169
|
dataset_id = dataset.id
|
|
4034
4170
|
if dataset_ids is not None and dataset_id not in dataset_ids:
|
|
@@ -4065,6 +4201,7 @@ def _download_project(
|
|
|
4065
4201
|
project_meta=meta,
|
|
4066
4202
|
):
|
|
4067
4203
|
for batch in batched(images, batch_size):
|
|
4204
|
+
batch: List[ImageInfo]
|
|
4068
4205
|
image_ids = [image_info.id for image_info in batch]
|
|
4069
4206
|
image_names = [image_info.name for image_info in batch]
|
|
4070
4207
|
|
|
@@ -4085,18 +4222,97 @@ def _download_project(
|
|
|
4085
4222
|
):
|
|
4086
4223
|
indexes_to_download.append(i)
|
|
4087
4224
|
|
|
4088
|
-
#
|
|
4225
|
+
# Collect images that was added to the project as offsets from archive in Team Files
|
|
4226
|
+
indexes_with_offsets = []
|
|
4227
|
+
for idx in indexes_to_download:
|
|
4228
|
+
image_info: ImageInfo = batch[idx]
|
|
4229
|
+
if image_info.related_data_id is not None:
|
|
4230
|
+
blob_files_to_download[image_info.related_data_id] = image_info.download_id
|
|
4231
|
+
indexes_with_offsets.append(idx)
|
|
4232
|
+
|
|
4233
|
+
# Download images in numpy format
|
|
4089
4234
|
batch_imgs_bytes = [None] * len(image_ids)
|
|
4090
4235
|
if save_images and indexes_to_download:
|
|
4091
|
-
|
|
4092
|
-
|
|
4093
|
-
|
|
4094
|
-
|
|
4095
|
-
|
|
4096
|
-
|
|
4097
|
-
|
|
4098
|
-
|
|
4099
|
-
|
|
4236
|
+
|
|
4237
|
+
# For a lot of small files that stored in blob file. Downloads blob files to optimize download process.
|
|
4238
|
+
if download_blob_files and len(indexes_with_offsets) > 0:
|
|
4239
|
+
bytes_indexes_to_download = indexes_to_download.copy()
|
|
4240
|
+
for blob_file_id, download_id in blob_files_to_download.items():
|
|
4241
|
+
if blob_file_id not in project_fs.blob_files:
|
|
4242
|
+
api.image.download_blob_file(
|
|
4243
|
+
project_id=project_id,
|
|
4244
|
+
download_id=download_id,
|
|
4245
|
+
path=os.path.join(project_fs.blob_dir, f"{blob_file_id}.tar"),
|
|
4246
|
+
log_progress=(
|
|
4247
|
+
True if log_progress or progress_cb is not None else False
|
|
4248
|
+
),
|
|
4249
|
+
)
|
|
4250
|
+
project_fs.add_blob_file(blob_file_id)
|
|
4251
|
+
|
|
4252
|
+
# Process blob image offsets
|
|
4253
|
+
offsets_file_name = f"{blob_file_id}{OFFSETS_PKL_SUFFIX}"
|
|
4254
|
+
offsets_file_path = os.path.join(
|
|
4255
|
+
dataset_fs.directory, offsets_file_name
|
|
4256
|
+
)
|
|
4257
|
+
|
|
4258
|
+
# Initialize counter for total image offsets for this blob file
|
|
4259
|
+
total_offsets_count = 0
|
|
4260
|
+
current_batch = []
|
|
4261
|
+
|
|
4262
|
+
# Get offsets from image infos
|
|
4263
|
+
for idx in indexes_with_offsets:
|
|
4264
|
+
image_info = batch[idx]
|
|
4265
|
+
if image_info.related_data_id == blob_file_id:
|
|
4266
|
+
blob_image_info = BlobImageInfo(
|
|
4267
|
+
name=image_info.name,
|
|
4268
|
+
offset_start=image_info.offset_start,
|
|
4269
|
+
offset_end=image_info.offset_end,
|
|
4270
|
+
)
|
|
4271
|
+
current_batch.append(blob_image_info)
|
|
4272
|
+
bytes_indexes_to_download.remove(idx)
|
|
4273
|
+
|
|
4274
|
+
# When batch size is reached, dump to file
|
|
4275
|
+
if len(current_batch) >= OFFSETS_PKL_BATCH_SIZE:
|
|
4276
|
+
BlobImageInfo.dump_to_pickle(
|
|
4277
|
+
current_batch, offsets_file_path
|
|
4278
|
+
)
|
|
4279
|
+
total_offsets_count += len(current_batch)
|
|
4280
|
+
current_batch = []
|
|
4281
|
+
# Dump any remaining items in the last batch
|
|
4282
|
+
if len(current_batch) > 0:
|
|
4283
|
+
BlobImageInfo.dump_to_pickle(current_batch, offsets_file_path)
|
|
4284
|
+
total_offsets_count += len(current_batch)
|
|
4285
|
+
|
|
4286
|
+
if total_offsets_count > 0:
|
|
4287
|
+
logger.debug(
|
|
4288
|
+
f"Saved {total_offsets_count} image offsets for {blob_file_id} to {offsets_file_path} in {(total_offsets_count + OFFSETS_PKL_BATCH_SIZE - 1) // OFFSETS_PKL_BATCH_SIZE} batches"
|
|
4289
|
+
)
|
|
4290
|
+
ds_progress(total_offsets_count)
|
|
4291
|
+
|
|
4292
|
+
image_ids_to_download = [
|
|
4293
|
+
image_ids[i] for i in bytes_indexes_to_download
|
|
4294
|
+
]
|
|
4295
|
+
for index, img in zip(
|
|
4296
|
+
bytes_indexes_to_download,
|
|
4297
|
+
api.image.download_bytes(
|
|
4298
|
+
dataset_id,
|
|
4299
|
+
image_ids_to_download,
|
|
4300
|
+
progress_cb=ds_progress,
|
|
4301
|
+
),
|
|
4302
|
+
):
|
|
4303
|
+
batch_imgs_bytes[index] = img
|
|
4304
|
+
# If you want to download images in classic way
|
|
4305
|
+
else:
|
|
4306
|
+
image_ids_to_download = [image_ids[i] for i in indexes_to_download]
|
|
4307
|
+
for index, img in zip(
|
|
4308
|
+
indexes_to_download,
|
|
4309
|
+
api.image.download_bytes(
|
|
4310
|
+
dataset_id,
|
|
4311
|
+
image_ids_to_download,
|
|
4312
|
+
progress_cb=ds_progress,
|
|
4313
|
+
),
|
|
4314
|
+
):
|
|
4315
|
+
batch_imgs_bytes[index] = img
|
|
4100
4316
|
|
|
4101
4317
|
if ds_progress is not None:
|
|
4102
4318
|
ds_progress(len(batch) - len(indexes_to_download))
|
|
@@ -4160,7 +4376,11 @@ def _download_project(
|
|
|
4160
4376
|
if item_name not in items_names_set:
|
|
4161
4377
|
dataset_fs.delete_item(item_name)
|
|
4162
4378
|
try:
|
|
4163
|
-
|
|
4379
|
+
if download_blob_files:
|
|
4380
|
+
project_info = api.project.get_info_by_id(project_id)
|
|
4381
|
+
create_blob_readme(project_fs=project_fs, project_info=project_info)
|
|
4382
|
+
else:
|
|
4383
|
+
create_readme(dest_dir, project_id, api)
|
|
4164
4384
|
except Exception as e:
|
|
4165
4385
|
logger.info(f"There was an error while creating README: {e}")
|
|
4166
4386
|
|
|
@@ -4172,15 +4392,20 @@ def upload_project(
|
|
|
4172
4392
|
project_name: Optional[str] = None,
|
|
4173
4393
|
log_progress: bool = True,
|
|
4174
4394
|
progress_cb: Optional[Union[tqdm, Callable]] = None,
|
|
4395
|
+
project_id: Optional[int] = None,
|
|
4175
4396
|
) -> Tuple[int, str]:
|
|
4176
4397
|
project_fs = read_single_project(dir)
|
|
4177
|
-
if project_name is None:
|
|
4178
|
-
project_name = project_fs.name
|
|
4179
4398
|
|
|
4180
|
-
if
|
|
4181
|
-
project_name
|
|
4399
|
+
if not project_id:
|
|
4400
|
+
if project_name is None:
|
|
4401
|
+
project_name = project_fs.name
|
|
4402
|
+
|
|
4403
|
+
if api.project.exists(workspace_id, project_name):
|
|
4404
|
+
project_name = api.project.get_free_name(workspace_id, project_name)
|
|
4182
4405
|
|
|
4183
|
-
|
|
4406
|
+
project = api.project.create(workspace_id, project_name, change_name_if_conflict=True)
|
|
4407
|
+
else:
|
|
4408
|
+
project = api.project.get_info_by_id(project_id)
|
|
4184
4409
|
updated_meta = api.project.update_meta(project.id, project_fs.meta.to_json())
|
|
4185
4410
|
|
|
4186
4411
|
if progress_cb is not None:
|
|
@@ -4189,6 +4414,29 @@ def upload_project(
|
|
|
4189
4414
|
# image_id_dct, anns_paths_dct = {}, {}
|
|
4190
4415
|
dataset_map = {}
|
|
4191
4416
|
|
|
4417
|
+
total_blob_size = 0
|
|
4418
|
+
upload_blob_progress = None
|
|
4419
|
+
src_paths = []
|
|
4420
|
+
dst_paths = []
|
|
4421
|
+
for blob_file in project_fs.blob_files:
|
|
4422
|
+
if log_progress:
|
|
4423
|
+
total_blob_size += os.path.getsize(os.path.join(project_fs.blob_dir, blob_file))
|
|
4424
|
+
src_paths.append(os.path.join(project_fs.blob_dir, blob_file))
|
|
4425
|
+
dst_paths.append(os.path.join(f"/{TF_BLOB_DIR}", blob_file))
|
|
4426
|
+
if log_progress and len(src_paths) > 0:
|
|
4427
|
+
upload_blob_progress = tqdm_sly(
|
|
4428
|
+
desc="Uploading blob files", total=total_blob_size, unit="B", unit_scale=True
|
|
4429
|
+
)
|
|
4430
|
+
if len(src_paths) > 0:
|
|
4431
|
+
blob_file_infos = api.file.upload_bulk(
|
|
4432
|
+
team_id=project.team_id,
|
|
4433
|
+
src_paths=src_paths,
|
|
4434
|
+
dst_paths=dst_paths,
|
|
4435
|
+
progress_cb=upload_blob_progress,
|
|
4436
|
+
)
|
|
4437
|
+
else:
|
|
4438
|
+
blob_file_infos = []
|
|
4439
|
+
|
|
4192
4440
|
for ds_fs in project_fs.datasets:
|
|
4193
4441
|
if len(ds_fs.parents) > 0:
|
|
4194
4442
|
parent = f"{os.path.sep}".join(ds_fs.parents)
|
|
@@ -4221,13 +4469,26 @@ def upload_project(
|
|
|
4221
4469
|
else:
|
|
4222
4470
|
img_infos.append(None)
|
|
4223
4471
|
|
|
4224
|
-
img_paths = list(filter(lambda x: os.path.isfile(x), img_paths))
|
|
4472
|
+
# img_paths = list(filter(lambda x: os.path.isfile(x), img_paths))
|
|
4473
|
+
source_img_paths_len = len(img_paths)
|
|
4474
|
+
valid_indices = []
|
|
4475
|
+
valid_paths = []
|
|
4476
|
+
offset_indices = []
|
|
4477
|
+
for i, path in enumerate(img_paths):
|
|
4478
|
+
if os.path.isfile(path):
|
|
4479
|
+
valid_indices.append(i)
|
|
4480
|
+
valid_paths.append(path)
|
|
4481
|
+
else:
|
|
4482
|
+
offset_indices.append(i)
|
|
4483
|
+
img_paths = valid_paths
|
|
4225
4484
|
ann_paths = list(filter(lambda x: os.path.isfile(x), ann_paths))
|
|
4485
|
+
# Create a mapping from name to index position for quick lookups
|
|
4486
|
+
offset_name_to_idx = {names[i]: i for i in offset_indices}
|
|
4226
4487
|
metas = [{} for _ in names]
|
|
4227
4488
|
|
|
4228
4489
|
img_infos_count = sum(1 for item in img_infos if item is not None)
|
|
4229
4490
|
|
|
4230
|
-
if len(img_paths) == 0 and img_infos_count == 0:
|
|
4491
|
+
if len(img_paths) == 0 and img_infos_count == 0 and len(offset_indices) == 0:
|
|
4231
4492
|
# Dataset is empty
|
|
4232
4493
|
continue
|
|
4233
4494
|
|
|
@@ -4258,56 +4519,57 @@ def upload_project(
|
|
|
4258
4519
|
merged_metas.append(merged_meta)
|
|
4259
4520
|
metas = merged_metas
|
|
4260
4521
|
|
|
4261
|
-
if len(img_paths) != 0:
|
|
4262
|
-
|
|
4263
|
-
|
|
4522
|
+
if len(img_paths) != 0 or len(offset_indices) != 0:
|
|
4523
|
+
|
|
4524
|
+
uploaded_img_infos = [None] * source_img_paths_len
|
|
4525
|
+
uploaded_img_infos_paths = api.image.upload_paths(
|
|
4526
|
+
dataset_id=dataset.id,
|
|
4527
|
+
names=[name for i, name in enumerate(names) if i in valid_indices],
|
|
4528
|
+
paths=img_paths,
|
|
4529
|
+
progress_cb=ds_progress,
|
|
4530
|
+
metas=[metas[i] for i in valid_indices],
|
|
4264
4531
|
)
|
|
4532
|
+
for i, img_info in zip(valid_indices, uploaded_img_infos_paths):
|
|
4533
|
+
uploaded_img_infos[i] = img_info
|
|
4534
|
+
for blob_offsets in ds_fs.blob_offsets:
|
|
4535
|
+
blob_file = None
|
|
4536
|
+
for blob_file_info in blob_file_infos:
|
|
4537
|
+
if Path(blob_file_info.name).stem == removesuffix(
|
|
4538
|
+
Path(blob_offsets).name, OFFSETS_PKL_SUFFIX
|
|
4539
|
+
):
|
|
4540
|
+
blob_file = blob_file_info
|
|
4541
|
+
break
|
|
4542
|
+
|
|
4543
|
+
if blob_file is None:
|
|
4544
|
+
raise ValueError(
|
|
4545
|
+
f"Cannot find blob file for offsets: {blob_offsets}. "
|
|
4546
|
+
f"Check the Team File directory '{TF_BLOB_DIR}', corresponding blob file should be uploaded."
|
|
4547
|
+
)
|
|
4548
|
+
uploaded_img_infos_offsets = api.image.upload_by_offsets_generator(
|
|
4549
|
+
dataset=dataset,
|
|
4550
|
+
team_file_id=blob_file.id,
|
|
4551
|
+
offsets_file_path=blob_offsets,
|
|
4552
|
+
progress_cb=ds_progress,
|
|
4553
|
+
metas={names[i]: metas[i] for i in offset_indices},
|
|
4554
|
+
)
|
|
4555
|
+
for img_info_batch in uploaded_img_infos_offsets:
|
|
4556
|
+
for img_info in img_info_batch:
|
|
4557
|
+
idx = offset_name_to_idx.get(img_info.name)
|
|
4558
|
+
if idx is not None:
|
|
4559
|
+
uploaded_img_infos[idx] = img_info
|
|
4265
4560
|
elif img_infos_count != 0:
|
|
4266
4561
|
if img_infos_count != len(names):
|
|
4267
4562
|
raise ValueError(
|
|
4268
4563
|
f"Cannot upload Project: image info files count ({img_infos_count}) doesn't match with images count ({len(names)}) that are going to be uploaded. "
|
|
4269
4564
|
"Check the directory structure, all annotation files should have corresponding image info files."
|
|
4270
4565
|
)
|
|
4271
|
-
|
|
4272
|
-
|
|
4273
|
-
|
|
4274
|
-
|
|
4275
|
-
|
|
4276
|
-
|
|
4277
|
-
|
|
4278
|
-
links_names.append(name)
|
|
4279
|
-
links_order.append(idx)
|
|
4280
|
-
links_metas.append(meta)
|
|
4281
|
-
else:
|
|
4282
|
-
hashes.append(info.hash)
|
|
4283
|
-
hashes_names.append(name)
|
|
4284
|
-
hashes_order.append(idx)
|
|
4285
|
-
hashes_metas.append(meta)
|
|
4286
|
-
|
|
4287
|
-
result = [None] * len(names)
|
|
4288
|
-
if len(links) > 0:
|
|
4289
|
-
res_infos_links = api.image.upload_links(
|
|
4290
|
-
dataset_id,
|
|
4291
|
-
links_names,
|
|
4292
|
-
links,
|
|
4293
|
-
ds_progress,
|
|
4294
|
-
metas=links_metas,
|
|
4295
|
-
)
|
|
4296
|
-
for info, pos in zip(res_infos_links, links_order):
|
|
4297
|
-
result[pos] = info
|
|
4298
|
-
|
|
4299
|
-
if len(hashes) > 0:
|
|
4300
|
-
res_infos_hashes = api.image.upload_hashes(
|
|
4301
|
-
dataset_id,
|
|
4302
|
-
hashes_names,
|
|
4303
|
-
hashes,
|
|
4304
|
-
ds_progress,
|
|
4305
|
-
metas=hashes_metas,
|
|
4306
|
-
)
|
|
4307
|
-
for info, pos in zip(res_infos_hashes, hashes_order):
|
|
4308
|
-
result[pos] = info
|
|
4309
|
-
|
|
4310
|
-
uploaded_img_infos = result
|
|
4566
|
+
uploaded_img_infos = api.image.upload_ids(
|
|
4567
|
+
dataset_id=dataset.id,
|
|
4568
|
+
names=names,
|
|
4569
|
+
ids=[img_info.id for img_info in img_infos],
|
|
4570
|
+
progress_cb=ds_progress,
|
|
4571
|
+
metas=metas,
|
|
4572
|
+
)
|
|
4311
4573
|
else:
|
|
4312
4574
|
raise ValueError(
|
|
4313
4575
|
"Cannot upload Project: img_paths is empty and img_infos_paths is empty"
|
|
@@ -4343,6 +4605,7 @@ def download_project(
|
|
|
4343
4605
|
save_image_meta: bool = False,
|
|
4344
4606
|
images_ids: Optional[List[int]] = None,
|
|
4345
4607
|
resume_download: Optional[bool] = False,
|
|
4608
|
+
**kwargs,
|
|
4346
4609
|
) -> None:
|
|
4347
4610
|
"""
|
|
4348
4611
|
Download image project to the local directory.
|
|
@@ -4353,7 +4616,7 @@ def download_project(
|
|
|
4353
4616
|
:type project_id: int
|
|
4354
4617
|
:param dest_dir: Destination path to local directory.
|
|
4355
4618
|
:type dest_dir: str
|
|
4356
|
-
:param dataset_ids: Specified list of Dataset IDs which will be downloaded.
|
|
4619
|
+
:param dataset_ids: Specified list of Dataset IDs which will be downloaded.
|
|
4357
4620
|
:type dataset_ids: list(int), optional
|
|
4358
4621
|
:param log_progress: Show downloading logs in the output. By default, it is True.
|
|
4359
4622
|
:type log_progress: bool, optional
|
|
@@ -4375,6 +4638,9 @@ def download_project(
|
|
|
4375
4638
|
:type images_ids: list(int), optional
|
|
4376
4639
|
:param resume_download: Resume download enables to download only missing files avoiding erase of existing files.
|
|
4377
4640
|
:type resume_download: bool, optional
|
|
4641
|
+
:param download_blob_files: Default is False. It will download images in classic way.
|
|
4642
|
+
If True, it will download blob files, if they are present in the project, to optimize download process.
|
|
4643
|
+
:type download_blob_files: bool, optional
|
|
4378
4644
|
:return: None.
|
|
4379
4645
|
:rtype: NoneType
|
|
4380
4646
|
:Usage example:
|
|
@@ -4426,6 +4692,7 @@ def download_project(
|
|
|
4426
4692
|
save_image_meta=save_image_meta,
|
|
4427
4693
|
images_ids=images_ids,
|
|
4428
4694
|
resume_download=resume_download,
|
|
4695
|
+
**kwargs,
|
|
4429
4696
|
)
|
|
4430
4697
|
else:
|
|
4431
4698
|
_download_project_optimized(
|
|
@@ -4440,6 +4707,7 @@ def download_project(
|
|
|
4440
4707
|
save_images=save_images,
|
|
4441
4708
|
log_progress=log_progress,
|
|
4442
4709
|
images_ids=images_ids,
|
|
4710
|
+
**kwargs,
|
|
4443
4711
|
)
|
|
4444
4712
|
|
|
4445
4713
|
|
|
@@ -4455,6 +4723,7 @@ def _download_project_optimized(
|
|
|
4455
4723
|
save_images=True,
|
|
4456
4724
|
log_progress=True,
|
|
4457
4725
|
images_ids: List[int] = None,
|
|
4726
|
+
**kwargs,
|
|
4458
4727
|
):
|
|
4459
4728
|
project_info = api.project.get_info_by_id(project_id)
|
|
4460
4729
|
project_id = project_info.id
|
|
@@ -4736,6 +5005,209 @@ def create_readme(
|
|
|
4736
5005
|
return readme_path
|
|
4737
5006
|
|
|
4738
5007
|
|
|
5008
|
+
def _dataset_blob_structure_md(
|
|
5009
|
+
project_fs: Project,
|
|
5010
|
+
project_info: sly.ProjectInfo,
|
|
5011
|
+
entity_limit: Optional[int] = 2,
|
|
5012
|
+
) -> str:
|
|
5013
|
+
"""Creates a markdown string with the dataset structure of the project.
|
|
5014
|
+
Supports only images and videos projects.
|
|
5015
|
+
|
|
5016
|
+
:project_fs: Project file system.
|
|
5017
|
+
:type project_fs: :class:`Project<supervisely.project.project.Project>`
|
|
5018
|
+
:param project_info: Project information.
|
|
5019
|
+
:type project_info: :class:`ProjectInfo<supervisely.project.project_info.ProjectInfo>`
|
|
5020
|
+
:param entity_limit: The maximum number of entities to display in the README.
|
|
5021
|
+
:type entity_limit: int, optional
|
|
5022
|
+
:return: Markdown string with the dataset structure of the project.
|
|
5023
|
+
:rtype: str
|
|
5024
|
+
"""
|
|
5025
|
+
supported_project_types = [sly.ProjectType.IMAGES.value]
|
|
5026
|
+
if project_info.type not in supported_project_types:
|
|
5027
|
+
return ""
|
|
5028
|
+
|
|
5029
|
+
entity_icons = {
|
|
5030
|
+
"images": " 🏞️ ",
|
|
5031
|
+
"blob_files": " 📦 ",
|
|
5032
|
+
"pkl_files": " 📄 ",
|
|
5033
|
+
"annotations": " 📝 ",
|
|
5034
|
+
}
|
|
5035
|
+
dataset_icon = " 📂 "
|
|
5036
|
+
folder_icon = " 📁 "
|
|
5037
|
+
|
|
5038
|
+
result_md = f"🗂️ {project_info.name}<br>"
|
|
5039
|
+
|
|
5040
|
+
# Add project-level blob files
|
|
5041
|
+
if os.path.exists(project_fs.blob_dir) and project_fs.blob_files:
|
|
5042
|
+
result_md += "┣" + folder_icon + f"{Project.blob_dir_name}<br>"
|
|
5043
|
+
blob_files = [entry.name for entry in os.scandir(project_fs.blob_dir) if entry.is_file()]
|
|
5044
|
+
|
|
5045
|
+
for idx, blob_file in enumerate(blob_files):
|
|
5046
|
+
if idx == entity_limit and len(blob_files) > entity_limit:
|
|
5047
|
+
result_md += "┃ ┗ ... " + str(len(blob_files) - entity_limit) + " more<br>"
|
|
5048
|
+
break
|
|
5049
|
+
symbol = "┗" if idx == len(blob_files) - 1 or idx == entity_limit - 1 else "┣"
|
|
5050
|
+
result_md += "┃ " + symbol + entity_icons["blob_files"] + blob_file + "<br>"
|
|
5051
|
+
|
|
5052
|
+
# Build a dataset hierarchy tree
|
|
5053
|
+
dataset_tree = {}
|
|
5054
|
+
root_datasets = []
|
|
5055
|
+
|
|
5056
|
+
# First pass: create nodes for all datasets
|
|
5057
|
+
for dataset in project_fs.datasets:
|
|
5058
|
+
dataset_tree[dataset.directory] = {
|
|
5059
|
+
"dataset": dataset,
|
|
5060
|
+
"children": [],
|
|
5061
|
+
"parent_dir": os.path.dirname(dataset.directory) if dataset.parents else None,
|
|
5062
|
+
}
|
|
5063
|
+
|
|
5064
|
+
# Second pass: build parent-child relationships
|
|
5065
|
+
for dir_path, node in dataset_tree.items():
|
|
5066
|
+
parent_dir = node["parent_dir"]
|
|
5067
|
+
if parent_dir in dataset_tree:
|
|
5068
|
+
dataset_tree[parent_dir]["children"].append(dir_path)
|
|
5069
|
+
else:
|
|
5070
|
+
root_datasets.append(dir_path)
|
|
5071
|
+
|
|
5072
|
+
# Function to recursively render the dataset tree
|
|
5073
|
+
def render_tree(dir_path, prefix=""):
|
|
5074
|
+
nonlocal result_md
|
|
5075
|
+
node = dataset_tree[dir_path]
|
|
5076
|
+
dataset = node["dataset"]
|
|
5077
|
+
children = node["children"]
|
|
5078
|
+
|
|
5079
|
+
# Create dataset display with proper path
|
|
5080
|
+
dataset_path = Dataset._get_dataset_path(dataset.name, dataset.parents)
|
|
5081
|
+
result_md += prefix + "┣" + dataset_icon + f"[{dataset.name}]({dataset_path})<br>"
|
|
5082
|
+
|
|
5083
|
+
# Set indentation for dataset content
|
|
5084
|
+
content_prefix = prefix + "┃ "
|
|
5085
|
+
|
|
5086
|
+
# Add pkl files at the dataset level
|
|
5087
|
+
offset_files = [
|
|
5088
|
+
entry.name
|
|
5089
|
+
for entry in os.scandir(dataset.directory)
|
|
5090
|
+
if entry.is_file() and entry.name.endswith(".pkl")
|
|
5091
|
+
]
|
|
5092
|
+
|
|
5093
|
+
if offset_files:
|
|
5094
|
+
for idx, pkl_file in enumerate(offset_files):
|
|
5095
|
+
last_file = idx == len(offset_files) - 1
|
|
5096
|
+
has_more_content = (
|
|
5097
|
+
os.path.exists(dataset.img_dir) or os.path.exists(dataset.ann_dir) or children
|
|
5098
|
+
)
|
|
5099
|
+
symbol = "┗" if last_file and not has_more_content else "┣"
|
|
5100
|
+
result_md += content_prefix + symbol + entity_icons["pkl_files"] + pkl_file + "<br>"
|
|
5101
|
+
|
|
5102
|
+
# Add img directory
|
|
5103
|
+
if os.path.exists(dataset.img_dir):
|
|
5104
|
+
has_ann_dir = os.path.exists(dataset.ann_dir)
|
|
5105
|
+
has_more_content = has_ann_dir or children
|
|
5106
|
+
symbol = "┣" if has_more_content else "┗"
|
|
5107
|
+
result_md += content_prefix + symbol + folder_icon + "img<br>"
|
|
5108
|
+
|
|
5109
|
+
# Add image files
|
|
5110
|
+
entities = [entry.name for entry in os.scandir(dataset.img_dir) if entry.is_file()]
|
|
5111
|
+
entities = sorted(entities)
|
|
5112
|
+
selected_entities = entities[: min(len(entities), entity_limit)]
|
|
5113
|
+
|
|
5114
|
+
img_prefix = content_prefix + "┃ "
|
|
5115
|
+
for idx, entity in enumerate(selected_entities):
|
|
5116
|
+
last_img = idx == len(selected_entities) - 1
|
|
5117
|
+
symbol = "┗" if last_img and len(entities) <= entity_limit else "┣"
|
|
5118
|
+
result_md += img_prefix + symbol + entity_icons["images"] + entity + "<br>"
|
|
5119
|
+
|
|
5120
|
+
if len(entities) > entity_limit:
|
|
5121
|
+
result_md += img_prefix + "┗ ... " + str(len(entities) - entity_limit) + " more<br>"
|
|
5122
|
+
|
|
5123
|
+
# Add ann directory
|
|
5124
|
+
if os.path.exists(dataset.ann_dir):
|
|
5125
|
+
has_more_content = bool(children)
|
|
5126
|
+
symbol = "┣"
|
|
5127
|
+
result_md += content_prefix + "┣" + folder_icon + "ann<br>"
|
|
5128
|
+
|
|
5129
|
+
anns = [entry.name for entry in os.scandir(dataset.ann_dir) if entry.is_file()]
|
|
5130
|
+
anns = sorted(anns)
|
|
5131
|
+
|
|
5132
|
+
# Try to match annotations with displayed images
|
|
5133
|
+
possible_anns = [f"{entity}.json" for entity in selected_entities]
|
|
5134
|
+
matched_anns = [pa for pa in possible_anns if pa in anns]
|
|
5135
|
+
|
|
5136
|
+
# Add additional annotations if we haven't reached the limit
|
|
5137
|
+
if len(matched_anns) < min(entity_limit, len(anns)):
|
|
5138
|
+
for ann in anns:
|
|
5139
|
+
if ann not in matched_anns and len(matched_anns) < entity_limit:
|
|
5140
|
+
matched_anns.append(ann)
|
|
5141
|
+
|
|
5142
|
+
ann_prefix = content_prefix + "┃ "
|
|
5143
|
+
for idx, ann in enumerate(matched_anns):
|
|
5144
|
+
last_ann = idx == len(matched_anns) - 1
|
|
5145
|
+
symbol = "┗" if last_ann and len(anns) <= entity_limit else "┣"
|
|
5146
|
+
result_md += ann_prefix + symbol + entity_icons["annotations"] + ann + "<br>"
|
|
5147
|
+
|
|
5148
|
+
if len(anns) > entity_limit:
|
|
5149
|
+
result_md += ann_prefix + "┗ ... " + str(len(anns) - entity_limit) + " more<br>"
|
|
5150
|
+
|
|
5151
|
+
if not has_more_content:
|
|
5152
|
+
result_md += content_prefix + "...<br>"
|
|
5153
|
+
# Recursively render child datasets
|
|
5154
|
+
for idx, child_dir in enumerate(children):
|
|
5155
|
+
render_tree(child_dir, content_prefix)
|
|
5156
|
+
|
|
5157
|
+
# Start rendering from root datasets
|
|
5158
|
+
for root_dir in sorted(root_datasets):
|
|
5159
|
+
render_tree(root_dir)
|
|
5160
|
+
|
|
5161
|
+
return result_md
|
|
5162
|
+
|
|
5163
|
+
|
|
5164
|
+
def create_blob_readme(
|
|
5165
|
+
project_fs: Project,
|
|
5166
|
+
project_info: ProjectInfo,
|
|
5167
|
+
) -> str:
|
|
5168
|
+
"""Creates a README.md file using the template, adds general information
|
|
5169
|
+
about the project and creates a dataset structure section.
|
|
5170
|
+
|
|
5171
|
+
:param project_fs: Project file system.
|
|
5172
|
+
:type project_fs: :class:`Project<supervisely.project.project.Project>`
|
|
5173
|
+
:param project_info: Project information.
|
|
5174
|
+
:type project_info: :class:`ProjectInfo<supervisely.project.project_info.ProjectInfo>`
|
|
5175
|
+
:return: Path to the created README.md file.
|
|
5176
|
+
:rtype: str
|
|
5177
|
+
|
|
5178
|
+
:Usage example:
|
|
5179
|
+
|
|
5180
|
+
.. code-block:: python
|
|
5181
|
+
|
|
5182
|
+
import supervisely as sly
|
|
5183
|
+
|
|
5184
|
+
api = sly.Api.from_env()
|
|
5185
|
+
|
|
5186
|
+
project_id = 123
|
|
5187
|
+
project_dir = "/path/to/project"
|
|
5188
|
+
|
|
5189
|
+
readme_path = sly.create_readme(project_dir, project_id, api)
|
|
5190
|
+
|
|
5191
|
+
print(f"README.md file was created at {readme_path}")
|
|
5192
|
+
"""
|
|
5193
|
+
current_path = os.path.dirname(os.path.abspath(__file__))
|
|
5194
|
+
template_path = os.path.join(current_path, "readme_template.md")
|
|
5195
|
+
with open(template_path, "r") as file:
|
|
5196
|
+
template = file.read()
|
|
5197
|
+
|
|
5198
|
+
readme_path = os.path.join(project_fs.directory, "README.md")
|
|
5199
|
+
|
|
5200
|
+
template = template.replace("{{general_info}}", _project_info_md(project_info))
|
|
5201
|
+
|
|
5202
|
+
template = template.replace(
|
|
5203
|
+
"{{dataset_structure_info}}", _dataset_blob_structure_md(project_fs, project_info)
|
|
5204
|
+
)
|
|
5205
|
+
|
|
5206
|
+
with open(readme_path, "w") as f:
|
|
5207
|
+
f.write(template)
|
|
5208
|
+
return readme_path
|
|
5209
|
+
|
|
5210
|
+
|
|
4739
5211
|
def _project_info_md(project_info: sly.ProjectInfo) -> str:
|
|
4740
5212
|
"""Creates a markdown string with general information about the project
|
|
4741
5213
|
using the fields of the ProjectInfo NamedTuple.
|
|
@@ -4784,6 +5256,9 @@ def _dataset_structure_md(
|
|
|
4784
5256
|
entity_icons = {
|
|
4785
5257
|
"images": " 🏞️ ",
|
|
4786
5258
|
"videos": " 🎥 ",
|
|
5259
|
+
"blob_files": " 📦 ",
|
|
5260
|
+
"pkl_files": " 📄 ",
|
|
5261
|
+
"annotations": " 📝 ",
|
|
4787
5262
|
}
|
|
4788
5263
|
dataset_icon = " 📂 "
|
|
4789
5264
|
list_function = list_functions[project_info.type]
|
|
@@ -4791,6 +5266,8 @@ def _dataset_structure_md(
|
|
|
4791
5266
|
|
|
4792
5267
|
result_md = f"🗂️ {project_info.name}<br>"
|
|
4793
5268
|
|
|
5269
|
+
# if project_info
|
|
5270
|
+
|
|
4794
5271
|
for parents, dataset_info in api.dataset.tree(project_info.id):
|
|
4795
5272
|
# The dataset path is needed to create a clickable link in the README.
|
|
4796
5273
|
dataset_path = Dataset._get_dataset_path(dataset_info.name, parents)
|
|
@@ -4841,6 +5318,8 @@ async def _download_project_async(
|
|
|
4841
5318
|
switch_size = kwargs.get("switch_size", 1.28 * 1024 * 1024)
|
|
4842
5319
|
# batch size for bulk download
|
|
4843
5320
|
batch_size = kwargs.get("batch_size", 100)
|
|
5321
|
+
# control whether to download blob files
|
|
5322
|
+
download_blob_files = kwargs.get("download_blob_files", False)
|
|
4844
5323
|
|
|
4845
5324
|
if semaphore is None:
|
|
4846
5325
|
semaphore = api.get_default_semaphore()
|
|
@@ -4890,11 +5369,19 @@ async def _download_project_async(
|
|
|
4890
5369
|
small_images = []
|
|
4891
5370
|
large_images = []
|
|
4892
5371
|
dataset_images = []
|
|
5372
|
+
blob_files_to_download = {}
|
|
5373
|
+
blob_images = []
|
|
5374
|
+
|
|
4893
5375
|
async for image_batch in all_images:
|
|
4894
5376
|
for image in image_batch:
|
|
4895
5377
|
if images_ids is None or image.id in images_ids:
|
|
4896
5378
|
dataset_images.append(image)
|
|
4897
|
-
|
|
5379
|
+
# Check for images with blob offsets
|
|
5380
|
+
|
|
5381
|
+
if download_blob_files and image.related_data_id is not None:
|
|
5382
|
+
blob_files_to_download[image.related_data_id] = image.download_id
|
|
5383
|
+
blob_images.append(image)
|
|
5384
|
+
elif image.size < switch_size:
|
|
4898
5385
|
small_images.append(image)
|
|
4899
5386
|
else:
|
|
4900
5387
|
large_images.append(image)
|
|
@@ -4903,7 +5390,7 @@ async def _download_project_async(
|
|
|
4903
5390
|
if log_progress is True:
|
|
4904
5391
|
ds_progress = tqdm_sly(
|
|
4905
5392
|
desc="Downloading images from {!r}".format(dataset.name),
|
|
4906
|
-
total=len(small_images) + len(large_images),
|
|
5393
|
+
total=len(small_images) + len(large_images) + len(blob_images),
|
|
4907
5394
|
leave=False,
|
|
4908
5395
|
)
|
|
4909
5396
|
|
|
@@ -4939,14 +5426,82 @@ async def _download_project_async(
|
|
|
4939
5426
|
)
|
|
4940
5427
|
return created_tasks
|
|
4941
5428
|
|
|
5429
|
+
# Download blob files if required
|
|
5430
|
+
if download_blob_files and len(blob_files_to_download) > 0:
|
|
5431
|
+
blob_paths = []
|
|
5432
|
+
download_ids = []
|
|
5433
|
+
# Process each blob file
|
|
5434
|
+
for blob_file_id, download_id in blob_files_to_download.items():
|
|
5435
|
+
if blob_file_id not in project_fs.blob_files:
|
|
5436
|
+
# Download the blob file
|
|
5437
|
+
blob_paths.append(os.path.join(project_fs.blob_dir, f"{blob_file_id}.tar"))
|
|
5438
|
+
download_ids.append(download_id)
|
|
5439
|
+
await api.image.download_blob_files_async(
|
|
5440
|
+
project_id=project_id,
|
|
5441
|
+
download_ids=download_ids,
|
|
5442
|
+
paths=blob_paths,
|
|
5443
|
+
semaphore=semaphore,
|
|
5444
|
+
log_progress=(True if log_progress or progress_cb is not None else False),
|
|
5445
|
+
)
|
|
5446
|
+
for blob_file_id, download_id in blob_files_to_download.items():
|
|
5447
|
+
project_fs.add_blob_file(blob_file_id)
|
|
5448
|
+
|
|
5449
|
+
# Process blob image offsets
|
|
5450
|
+
offsets_file_name = f"{blob_file_id}{OFFSETS_PKL_SUFFIX}"
|
|
5451
|
+
offsets_file_path = os.path.join(dataset_fs.directory, offsets_file_name)
|
|
5452
|
+
|
|
5453
|
+
total_offsets_count = 0 # for logging
|
|
5454
|
+
current_batch = []
|
|
5455
|
+
for img in blob_images:
|
|
5456
|
+
if img.related_data_id == blob_file_id:
|
|
5457
|
+
blob_image_info = BlobImageInfo(
|
|
5458
|
+
name=img.name,
|
|
5459
|
+
offset_start=img.offset_start,
|
|
5460
|
+
offset_end=img.offset_end,
|
|
5461
|
+
)
|
|
5462
|
+
current_batch.append(blob_image_info)
|
|
5463
|
+
if len(current_batch) >= OFFSETS_PKL_BATCH_SIZE:
|
|
5464
|
+
BlobImageInfo.dump_to_pickle(current_batch, offsets_file_path)
|
|
5465
|
+
total_offsets_count += len(current_batch)
|
|
5466
|
+
current_batch = []
|
|
5467
|
+
if len(current_batch) > 0:
|
|
5468
|
+
BlobImageInfo.dump_to_pickle(current_batch, offsets_file_path)
|
|
5469
|
+
total_offsets_count += len(current_batch)
|
|
5470
|
+
if total_offsets_count > 0:
|
|
5471
|
+
logger.debug(
|
|
5472
|
+
f"Saved {total_offsets_count} image offsets for {blob_file_id} to {offsets_file_path} in {(total_offsets_count + OFFSETS_PKL_BATCH_SIZE - 1) // OFFSETS_PKL_BATCH_SIZE} batches"
|
|
5473
|
+
)
|
|
5474
|
+
offset_tasks = []
|
|
5475
|
+
# Download annotations for images with offsets
|
|
5476
|
+
for offsets_batch in batched(blob_images, batch_size=batch_size):
|
|
5477
|
+
offset_task = _download_project_items_batch_async(
|
|
5478
|
+
api=api,
|
|
5479
|
+
dataset_id=dataset_id,
|
|
5480
|
+
img_infos=offsets_batch,
|
|
5481
|
+
meta=meta,
|
|
5482
|
+
dataset_fs=dataset_fs,
|
|
5483
|
+
id_to_tagmeta=id_to_tagmeta,
|
|
5484
|
+
semaphore=semaphore,
|
|
5485
|
+
save_images=False,
|
|
5486
|
+
save_image_info=save_image_info,
|
|
5487
|
+
only_image_tags=only_image_tags,
|
|
5488
|
+
progress_cb=ds_progress,
|
|
5489
|
+
)
|
|
5490
|
+
offset_tasks.append(offset_task)
|
|
5491
|
+
created_tasks = await run_tasks_with_delay(offset_tasks, 0.05)
|
|
5492
|
+
await asyncio.gather(*created_tasks)
|
|
5493
|
+
|
|
4942
5494
|
tasks = []
|
|
5495
|
+
# Check which images need to be downloaded
|
|
4943
5496
|
small_images = await check_items(small_images)
|
|
4944
5497
|
large_images = await check_items(large_images)
|
|
4945
5498
|
|
|
5499
|
+
# If only one small image, treat it as a large image for efficiency
|
|
4946
5500
|
if len(small_images) == 1:
|
|
4947
5501
|
large_images.append(small_images.pop())
|
|
4948
|
-
for images_batch in batched(small_images, batch_size=batch_size):
|
|
4949
5502
|
|
|
5503
|
+
# Create batch download tasks
|
|
5504
|
+
for images_batch in batched(small_images, batch_size=batch_size):
|
|
4950
5505
|
task = _download_project_items_batch_async(
|
|
4951
5506
|
api=api,
|
|
4952
5507
|
dataset_id=dataset_id,
|
|
@@ -4961,6 +5516,8 @@ async def _download_project_async(
|
|
|
4961
5516
|
progress_cb=ds_progress,
|
|
4962
5517
|
)
|
|
4963
5518
|
tasks.append(task)
|
|
5519
|
+
|
|
5520
|
+
# Create individual download tasks for large images
|
|
4964
5521
|
for image in large_images:
|
|
4965
5522
|
task = _download_project_item_async(
|
|
4966
5523
|
api=api,
|
|
@@ -4995,7 +5552,11 @@ async def _download_project_async(
|
|
|
4995
5552
|
dataset_fs.delete_item(item_name)
|
|
4996
5553
|
|
|
4997
5554
|
try:
|
|
4998
|
-
|
|
5555
|
+
if download_blob_files:
|
|
5556
|
+
project_info = api.project.get_info_by_id(project_id)
|
|
5557
|
+
create_blob_readme(project_fs=project_fs, project_info=project_info)
|
|
5558
|
+
else:
|
|
5559
|
+
create_readme(dest_dir, project_id, api)
|
|
4999
5560
|
except Exception as e:
|
|
5000
5561
|
logger.info(f"There was an error while creating README: {e}")
|
|
5001
5562
|
|