supervisely 6.73.343__py3-none-any.whl → 6.73.345__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- supervisely/__init__.py +3 -2
- supervisely/_utils.py +33 -1
- supervisely/api/annotation_api.py +369 -2
- supervisely/api/api.py +14 -5
- supervisely/api/dataset_api.py +177 -1
- supervisely/api/entity_annotation/figure_api.py +84 -0
- supervisely/api/file_api.py +2 -2
- supervisely/api/image_api.py +740 -52
- supervisely/api/module_api.py +11 -0
- supervisely/api/project_api.py +6 -1
- supervisely/convert/converter.py +4 -0
- supervisely/convert/image/sly/fast_sly_image_converter.py +11 -5
- supervisely/convert/image/sly/sly_image_converter.py +41 -1
- supervisely/io/fs.py +238 -4
- supervisely/project/data_version.py +7 -1
- supervisely/project/download.py +5 -16
- supervisely/project/project.py +721 -79
- supervisely/project/project_type.py +2 -0
- supervisely/project/readme_template.md +19 -13
- {supervisely-6.73.343.dist-info → supervisely-6.73.345.dist-info}/METADATA +1 -1
- {supervisely-6.73.343.dist-info → supervisely-6.73.345.dist-info}/RECORD +25 -25
- {supervisely-6.73.343.dist-info → supervisely-6.73.345.dist-info}/LICENSE +0 -0
- {supervisely-6.73.343.dist-info → supervisely-6.73.345.dist-info}/WHEEL +0 -0
- {supervisely-6.73.343.dist-info → supervisely-6.73.345.dist-info}/entry_points.txt +0 -0
- {supervisely-6.73.343.dist-info → supervisely-6.73.345.dist-info}/top_level.txt +0 -0
supervisely/project/project.py
CHANGED
|
@@ -31,16 +31,22 @@ import supervisely as sly
|
|
|
31
31
|
from supervisely._utils import (
|
|
32
32
|
abs_url,
|
|
33
33
|
batched,
|
|
34
|
-
generate_free_name,
|
|
35
34
|
get_or_create_event_loop,
|
|
36
35
|
is_development,
|
|
36
|
+
removesuffix,
|
|
37
37
|
snake_to_human,
|
|
38
38
|
)
|
|
39
39
|
from supervisely.annotation.annotation import ANN_EXT, Annotation, TagCollection
|
|
40
40
|
from supervisely.annotation.obj_class import ObjClass
|
|
41
41
|
from supervisely.annotation.obj_class_collection import ObjClassCollection
|
|
42
|
-
from supervisely.api.api import Api, ApiContext
|
|
43
|
-
from supervisely.api.image_api import
|
|
42
|
+
from supervisely.api.api import Api, ApiContext, ApiField
|
|
43
|
+
from supervisely.api.image_api import (
|
|
44
|
+
OFFSETS_PKL_BATCH_SIZE,
|
|
45
|
+
OFFSETS_PKL_SUFFIX,
|
|
46
|
+
BlobImageInfo,
|
|
47
|
+
ImageInfo,
|
|
48
|
+
)
|
|
49
|
+
from supervisely.api.project_api import ProjectInfo
|
|
44
50
|
from supervisely.collection.key_indexed_collection import (
|
|
45
51
|
KeyIndexedCollection,
|
|
46
52
|
KeyObject,
|
|
@@ -68,7 +74,9 @@ from supervisely.io.json import dump_json_file, dump_json_file_async, load_json_
|
|
|
68
74
|
from supervisely.project.project_meta import ProjectMeta
|
|
69
75
|
from supervisely.project.project_type import ProjectType
|
|
70
76
|
from supervisely.sly_logger import logger
|
|
71
|
-
from supervisely.task.progress import
|
|
77
|
+
from supervisely.task.progress import tqdm_sly
|
|
78
|
+
|
|
79
|
+
TF_BLOB_DIR = "blob-files" # directory for project blob files in team files
|
|
72
80
|
|
|
73
81
|
|
|
74
82
|
class CustomUnpickler(pickle.Unpickler):
|
|
@@ -224,6 +232,7 @@ class Dataset(KeyObject):
|
|
|
224
232
|
seg_dir_name = "seg"
|
|
225
233
|
meta_dir_name = "meta"
|
|
226
234
|
datasets_dir_name = "datasets"
|
|
235
|
+
blob_dir_name = "blob"
|
|
227
236
|
|
|
228
237
|
def __init__(
|
|
229
238
|
self,
|
|
@@ -273,6 +282,7 @@ class Dataset(KeyObject):
|
|
|
273
282
|
self._project_dir = project_dir
|
|
274
283
|
self._name = full_ds_name
|
|
275
284
|
self._short_name = short_ds_name
|
|
285
|
+
self._blob_offset_paths = []
|
|
276
286
|
|
|
277
287
|
if self.dataset_id is not None:
|
|
278
288
|
self._read_api()
|
|
@@ -537,6 +547,23 @@ class Dataset(KeyObject):
|
|
|
537
547
|
"""
|
|
538
548
|
return os.path.join(self.directory, self.meta_dir_name)
|
|
539
549
|
|
|
550
|
+
@property
|
|
551
|
+
def blob_offsets(self):
|
|
552
|
+
"""
|
|
553
|
+
List of paths to the dataset blob offset files.
|
|
554
|
+
|
|
555
|
+
:return: List of paths to the dataset blob offset files.
|
|
556
|
+
:rtype: :class:`List[str]`
|
|
557
|
+
"""
|
|
558
|
+
return self._blob_offset_paths
|
|
559
|
+
|
|
560
|
+
@blob_offsets.setter
|
|
561
|
+
def blob_offsets(self, value: List[str]):
|
|
562
|
+
"""
|
|
563
|
+
Set the list of paths to the dataset blob offset files.
|
|
564
|
+
"""
|
|
565
|
+
self._blob_offset_paths = value
|
|
566
|
+
|
|
540
567
|
@classmethod
|
|
541
568
|
def _has_valid_ext(cls, path: str) -> bool:
|
|
542
569
|
"""
|
|
@@ -552,16 +579,36 @@ class Dataset(KeyObject):
|
|
|
552
579
|
Consistency checks. Every item must have an annotation, and the correspondence must be one to one.
|
|
553
580
|
If not - it generate exception error.
|
|
554
581
|
"""
|
|
555
|
-
|
|
582
|
+
blob_offset_paths = list_files(
|
|
583
|
+
self.directory, filter_fn=lambda x: x.endswith(OFFSETS_PKL_SUFFIX)
|
|
584
|
+
)
|
|
585
|
+
has_blob_offsets = len(blob_offset_paths) > 0
|
|
586
|
+
|
|
587
|
+
if not dir_exists(self.item_dir) and not has_blob_offsets:
|
|
556
588
|
raise FileNotFoundError("Item directory not found: {!r}".format(self.item_dir))
|
|
557
589
|
if not dir_exists(self.ann_dir):
|
|
558
590
|
raise FileNotFoundError("Annotation directory not found: {!r}".format(self.ann_dir))
|
|
559
591
|
|
|
560
592
|
raw_ann_paths = list_files(self.ann_dir, [ANN_EXT])
|
|
561
|
-
img_paths = list_files(self.item_dir, filter_fn=self._has_valid_ext)
|
|
562
|
-
|
|
563
593
|
raw_ann_names = set(os.path.basename(path) for path in raw_ann_paths)
|
|
564
|
-
|
|
594
|
+
|
|
595
|
+
if dir_exists(self.item_dir):
|
|
596
|
+
img_paths = list_files(self.item_dir, filter_fn=self._has_valid_ext)
|
|
597
|
+
img_names = [os.path.basename(path) for path in img_paths]
|
|
598
|
+
else:
|
|
599
|
+
img_names = []
|
|
600
|
+
|
|
601
|
+
# If we have blob offset files, add the image names from those
|
|
602
|
+
if has_blob_offsets:
|
|
603
|
+
self.blob_offsets = blob_offset_paths
|
|
604
|
+
for offset_file_path in self.blob_offsets:
|
|
605
|
+
try:
|
|
606
|
+
blob_img_info_lists = BlobImageInfo.load_from_pickle_generator(offset_file_path)
|
|
607
|
+
for blob_img_info_list in blob_img_info_lists:
|
|
608
|
+
for blob_img_info in blob_img_info_list:
|
|
609
|
+
img_names.append(blob_img_info.name)
|
|
610
|
+
except Exception as e:
|
|
611
|
+
logger.warning(f"Failed to read blob offset file {offset_file_path}: {str(e)}")
|
|
565
612
|
|
|
566
613
|
if len(img_names) == 0 and len(raw_ann_names) == 0:
|
|
567
614
|
logger.info("Dataset {!r} is empty".format(self.name))
|
|
@@ -1308,7 +1355,7 @@ class Dataset(KeyObject):
|
|
|
1308
1355
|
|
|
1309
1356
|
img_path = "/home/admin/Pictures/Clouds.jpeg"
|
|
1310
1357
|
img_np = sly.image.read(img_path)
|
|
1311
|
-
img_bytes = sly.image.write_bytes(img_np, "jpeg")
|
|
1358
|
+
img_bytes = sly.image.write_bytes(img_np, "jpeg")
|
|
1312
1359
|
coroutine = ds.add_item_raw_bytes_async("IMG_050.jpeg", img_bytes)
|
|
1313
1360
|
run_coroutine(coroutine)
|
|
1314
1361
|
|
|
@@ -1691,7 +1738,7 @@ class Dataset(KeyObject):
|
|
|
1691
1738
|
"objects":[],
|
|
1692
1739
|
"customBigData":{}
|
|
1693
1740
|
}
|
|
1694
|
-
|
|
1741
|
+
|
|
1695
1742
|
coroutine = ds.set_ann_dict_async("IMG_8888.jpeg", new_ann_json)
|
|
1696
1743
|
run_coroutine(coroutine)
|
|
1697
1744
|
"""
|
|
@@ -1723,7 +1770,7 @@ class Dataset(KeyObject):
|
|
|
1723
1770
|
|
|
1724
1771
|
height, width = 500, 700
|
|
1725
1772
|
new_ann = sly.Annotation((height, width))
|
|
1726
|
-
|
|
1773
|
+
|
|
1727
1774
|
coroutine = ds.set_ann_async("IMG_0748.jpeg", new_ann)
|
|
1728
1775
|
run_coroutine(coroutine)
|
|
1729
1776
|
"""
|
|
@@ -2017,6 +2064,84 @@ class Dataset(KeyObject):
|
|
|
2017
2064
|
progress_cb=progress_cb,
|
|
2018
2065
|
)
|
|
2019
2066
|
|
|
2067
|
+
def get_blob_img_bytes(self, image_name: str) -> bytes:
|
|
2068
|
+
"""
|
|
2069
|
+
Get image bytes from blob file.
|
|
2070
|
+
|
|
2071
|
+
:param image_name: Image name with extension.
|
|
2072
|
+
:type image_name: :class:`str`
|
|
2073
|
+
:return: Bytes of the image.
|
|
2074
|
+
:rtype: :class:`bytes`
|
|
2075
|
+
|
|
2076
|
+
:Usage example:
|
|
2077
|
+
|
|
2078
|
+
.. code-block:: python
|
|
2079
|
+
|
|
2080
|
+
import supervisely as sly
|
|
2081
|
+
dataset_path = "/path/to/project/lemons_annotated/ds1"
|
|
2082
|
+
dataset = sly.Dataset(dataset_path, sly.OpenMode.READ)
|
|
2083
|
+
image_name = "IMG_0748.jpeg"
|
|
2084
|
+
|
|
2085
|
+
img_bytes = dataset.get_blob_img_bytes(image_name)
|
|
2086
|
+
"""
|
|
2087
|
+
|
|
2088
|
+
if self.project_dir is None:
|
|
2089
|
+
raise RuntimeError("Project directory is not set. Cannot get blob image bytes.")
|
|
2090
|
+
|
|
2091
|
+
blob_image_info = None
|
|
2092
|
+
|
|
2093
|
+
for offset in self.blob_offsets:
|
|
2094
|
+
for batch in BlobImageInfo.load_from_pickle_generator(offset):
|
|
2095
|
+
for file in batch:
|
|
2096
|
+
if file.name == image_name:
|
|
2097
|
+
blob_image_info = file
|
|
2098
|
+
blob_file_name = removesuffix(Path(offset).name, OFFSETS_PKL_SUFFIX)
|
|
2099
|
+
break
|
|
2100
|
+
if blob_image_info is None:
|
|
2101
|
+
logger.debug(
|
|
2102
|
+
f"Image '{image_name}' not found in blob offsets. "
|
|
2103
|
+
f"Make sure that the image is stored in the blob file."
|
|
2104
|
+
)
|
|
2105
|
+
return None
|
|
2106
|
+
|
|
2107
|
+
blob_file_path = os.path.join(self.project_dir, self.blob_dir_name, blob_file_name + ".tar")
|
|
2108
|
+
if file_exists(blob_file_path):
|
|
2109
|
+
with open(blob_file_path, "rb") as f:
|
|
2110
|
+
f.seek(blob_image_info.offset_start)
|
|
2111
|
+
img_bytes = f.read(blob_image_info.offset_end - blob_image_info.offset_start)
|
|
2112
|
+
else:
|
|
2113
|
+
logger.debug(
|
|
2114
|
+
f"Blob file '{blob_file_path}' not found. "
|
|
2115
|
+
f"Make sure that the blob file exists in the specified directory."
|
|
2116
|
+
)
|
|
2117
|
+
img_bytes = None
|
|
2118
|
+
return img_bytes
|
|
2119
|
+
|
|
2120
|
+
def get_blob_img_np(self, image_name: str) -> np.ndarray:
|
|
2121
|
+
"""
|
|
2122
|
+
Get image as numpy array from blob file.
|
|
2123
|
+
|
|
2124
|
+
:param image_name: Image name with extension.
|
|
2125
|
+
:type image_name: :class:`str`
|
|
2126
|
+
:return: Numpy array of the image.
|
|
2127
|
+
:rtype: :class:`numpy.ndarray`
|
|
2128
|
+
|
|
2129
|
+
:Usage example:
|
|
2130
|
+
|
|
2131
|
+
.. code-block:: python
|
|
2132
|
+
|
|
2133
|
+
import supervisely as sly
|
|
2134
|
+
dataset_path = "/path/to/project/lemons_annotated/ds1"
|
|
2135
|
+
dataset = sly.Dataset(dataset_path, sly.OpenMode.READ)
|
|
2136
|
+
image_name = "IMG_0748.jpeg"
|
|
2137
|
+
|
|
2138
|
+
img_np = dataset.get_blob_img_np(image_name)
|
|
2139
|
+
"""
|
|
2140
|
+
img_bytes = self.get_blob_img_bytes(image_name)
|
|
2141
|
+
if img_bytes is None:
|
|
2142
|
+
return None
|
|
2143
|
+
return sly_image.read_bytes(img_bytes)
|
|
2144
|
+
|
|
2020
2145
|
|
|
2021
2146
|
class Project:
|
|
2022
2147
|
"""
|
|
@@ -2036,6 +2161,7 @@ class Project:
|
|
|
2036
2161
|
"""
|
|
2037
2162
|
|
|
2038
2163
|
dataset_class = Dataset
|
|
2164
|
+
blob_dir_name = "blob"
|
|
2039
2165
|
|
|
2040
2166
|
class DatasetDict(KeyIndexedCollection):
|
|
2041
2167
|
"""
|
|
@@ -2075,6 +2201,7 @@ class Project:
|
|
|
2075
2201
|
|
|
2076
2202
|
parent_dir, name = Project._parse_path(directory)
|
|
2077
2203
|
self._parent_dir = parent_dir
|
|
2204
|
+
self._blob_dir = os.path.join(directory, self.blob_dir_name)
|
|
2078
2205
|
self._api = api
|
|
2079
2206
|
self.project_id = project_id
|
|
2080
2207
|
|
|
@@ -2086,7 +2213,7 @@ class Project:
|
|
|
2086
2213
|
self._name = name
|
|
2087
2214
|
self._datasets = Project.DatasetDict() # ds_name -> dataset object
|
|
2088
2215
|
self._meta = None
|
|
2089
|
-
|
|
2216
|
+
self._blob_files = []
|
|
2090
2217
|
if project_id is not None:
|
|
2091
2218
|
self._read_api()
|
|
2092
2219
|
elif mode is OpenMode.READ:
|
|
@@ -2138,6 +2265,25 @@ class Project:
|
|
|
2138
2265
|
"""
|
|
2139
2266
|
return self._parent_dir
|
|
2140
2267
|
|
|
2268
|
+
@property
|
|
2269
|
+
def blob_dir(self) -> str:
|
|
2270
|
+
"""
|
|
2271
|
+
Directory for project blobs.
|
|
2272
|
+
Blobs are .tar files with images. Used for fast data transfer.
|
|
2273
|
+
|
|
2274
|
+
:return: Path to project blob directory
|
|
2275
|
+
:rtype: :class:`str`
|
|
2276
|
+
:Usage example:
|
|
2277
|
+
|
|
2278
|
+
.. code-block:: python
|
|
2279
|
+
|
|
2280
|
+
import supervisely as sly
|
|
2281
|
+
project = sly.Project("/home/admin/work/supervisely/projects/lemons_annotated", sly.OpenMode.READ)
|
|
2282
|
+
print(project.blob_dir)
|
|
2283
|
+
# Output: '/home/admin/work/supervisely/projects/lemons_annotated/blob'
|
|
2284
|
+
"""
|
|
2285
|
+
return self._blob_dir
|
|
2286
|
+
|
|
2141
2287
|
@property
|
|
2142
2288
|
def name(self) -> str:
|
|
2143
2289
|
"""
|
|
@@ -2259,6 +2405,61 @@ class Project:
|
|
|
2259
2405
|
"""
|
|
2260
2406
|
return sum(len(ds) for ds in self._datasets)
|
|
2261
2407
|
|
|
2408
|
+
@property
|
|
2409
|
+
def blob_files(self) -> List[str]:
|
|
2410
|
+
"""
|
|
2411
|
+
List of blob files.
|
|
2412
|
+
|
|
2413
|
+
:return: List of blob files
|
|
2414
|
+
:rtype: :class:`list`
|
|
2415
|
+
:Usage example:
|
|
2416
|
+
|
|
2417
|
+
.. code-block:: python
|
|
2418
|
+
|
|
2419
|
+
import supervisely as sly
|
|
2420
|
+
project = sly.Project("/home/admin/work/supervisely/projects/lemons_annotated", sly.OpenMode.READ)
|
|
2421
|
+
print(project.blob_files)
|
|
2422
|
+
# Output: []
|
|
2423
|
+
"""
|
|
2424
|
+
return self._blob_files
|
|
2425
|
+
|
|
2426
|
+
@blob_files.setter
|
|
2427
|
+
def blob_files(self, blob_files: List[str]) -> None:
|
|
2428
|
+
"""
|
|
2429
|
+
Sets blob files to the project.
|
|
2430
|
+
|
|
2431
|
+
:param blob_files: List of blob files.
|
|
2432
|
+
:type
|
|
2433
|
+
:return: None
|
|
2434
|
+
:rtype: NoneType
|
|
2435
|
+
:Usage example:
|
|
2436
|
+
|
|
2437
|
+
.. code-block:: python
|
|
2438
|
+
|
|
2439
|
+
import supervisely as sly
|
|
2440
|
+
project = sly.Project("/home/admin/work/supervisely/projects/lemons_annotated", sly.OpenMode.READ)
|
|
2441
|
+
project.blob_files = ["blob_file.tar"]
|
|
2442
|
+
"""
|
|
2443
|
+
self._blob_files = blob_files
|
|
2444
|
+
|
|
2445
|
+
def add_blob_file(self, file_name: str) -> None:
|
|
2446
|
+
"""
|
|
2447
|
+
Adds blob file to the project.
|
|
2448
|
+
|
|
2449
|
+
:param file_name: File name.
|
|
2450
|
+
:type file_name: :class:`str`
|
|
2451
|
+
:return: None
|
|
2452
|
+
:rtype: NoneType
|
|
2453
|
+
:Usage example:
|
|
2454
|
+
|
|
2455
|
+
.. code-block:: python
|
|
2456
|
+
|
|
2457
|
+
import supervisely as sly
|
|
2458
|
+
project = sly.Project("/home/admin/work/supervisely/projects/lemons_annotated", sly.OpenMode.READ)
|
|
2459
|
+
project.add_blob_file("blob_file.tar")
|
|
2460
|
+
"""
|
|
2461
|
+
self._blob_files.append(file_name)
|
|
2462
|
+
|
|
2262
2463
|
def get_classes_stats(
|
|
2263
2464
|
self,
|
|
2264
2465
|
dataset_names: Optional[List[str]] = None,
|
|
@@ -2296,6 +2497,10 @@ class Project:
|
|
|
2296
2497
|
def _read(self):
|
|
2297
2498
|
meta_json = load_json_file(self._get_project_meta_path())
|
|
2298
2499
|
self._meta = ProjectMeta.from_json(meta_json)
|
|
2500
|
+
if dir_exists(self.blob_dir):
|
|
2501
|
+
self.blob_files = [Path(file).name for file in list_files(self.blob_dir)]
|
|
2502
|
+
else:
|
|
2503
|
+
self.blob_files = []
|
|
2299
2504
|
|
|
2300
2505
|
ignore_dirs = self.dataset_class.ignorable_dirs() # dir names that can not be datasets
|
|
2301
2506
|
|
|
@@ -2350,6 +2555,7 @@ class Project:
|
|
|
2350
2555
|
else:
|
|
2351
2556
|
mkdir(self.directory)
|
|
2352
2557
|
self.set_meta(ProjectMeta())
|
|
2558
|
+
self.blob_files = []
|
|
2353
2559
|
|
|
2354
2560
|
def validate(self):
|
|
2355
2561
|
# @TODO: remove?
|
|
@@ -3085,6 +3291,7 @@ class Project:
|
|
|
3085
3291
|
save_images: bool = True,
|
|
3086
3292
|
save_image_meta: bool = False,
|
|
3087
3293
|
resume_download: bool = False,
|
|
3294
|
+
**kwargs,
|
|
3088
3295
|
) -> None:
|
|
3089
3296
|
"""
|
|
3090
3297
|
Download project from Supervisely to the given directory.
|
|
@@ -3113,6 +3320,9 @@ class Project:
|
|
|
3113
3320
|
:type save_images: :class:`bool`, optional
|
|
3114
3321
|
:param save_image_meta: Download images metadata in JSON format or not.
|
|
3115
3322
|
:type save_image_meta: :class:`bool`, optional
|
|
3323
|
+
:param download_blob_files: Default is False. It will download images in classic way.
|
|
3324
|
+
If True, it will download blob files, if they are present in the project, to optimize download process.
|
|
3325
|
+
:type download_blob_files: bool, optional
|
|
3116
3326
|
:return: None
|
|
3117
3327
|
:rtype: NoneType
|
|
3118
3328
|
:Usage example:
|
|
@@ -3151,6 +3361,7 @@ class Project:
|
|
|
3151
3361
|
save_images=save_images,
|
|
3152
3362
|
save_image_meta=save_image_meta,
|
|
3153
3363
|
resume_download=resume_download,
|
|
3364
|
+
**kwargs,
|
|
3154
3365
|
)
|
|
3155
3366
|
|
|
3156
3367
|
@staticmethod
|
|
@@ -3731,7 +3942,7 @@ class Project:
|
|
|
3731
3942
|
|
|
3732
3943
|
project_id = 8888
|
|
3733
3944
|
save_directory = "/path/to/save/projects"
|
|
3734
|
-
|
|
3945
|
+
|
|
3735
3946
|
coroutine = sly.Project.download_async(api, project_id, save_directory)
|
|
3736
3947
|
run_coroutine(coroutine)
|
|
3737
3948
|
"""
|
|
@@ -3755,6 +3966,7 @@ class Project:
|
|
|
3755
3966
|
save_image_meta=save_image_meta,
|
|
3756
3967
|
images_ids=images_ids,
|
|
3757
3968
|
resume_download=resume_download,
|
|
3969
|
+
**kwargs,
|
|
3758
3970
|
)
|
|
3759
3971
|
|
|
3760
3972
|
def to_coco(
|
|
@@ -4002,9 +4214,13 @@ def _download_project(
|
|
|
4002
4214
|
save_image_meta: Optional[bool] = False,
|
|
4003
4215
|
images_ids: Optional[List[int]] = None,
|
|
4004
4216
|
resume_download: Optional[bool] = False,
|
|
4217
|
+
**kwargs,
|
|
4005
4218
|
):
|
|
4219
|
+
download_blob_files = kwargs.pop("download_blob_files", False)
|
|
4220
|
+
|
|
4006
4221
|
dataset_ids = set(dataset_ids) if (dataset_ids is not None) else None
|
|
4007
4222
|
project_fs = None
|
|
4223
|
+
|
|
4008
4224
|
meta = ProjectMeta.from_json(api.project.get_meta(project_id, with_settings=True))
|
|
4009
4225
|
if os.path.exists(dest_dir) and resume_download:
|
|
4010
4226
|
dump_json_file(meta.to_json(), os.path.join(dest_dir, "meta.json"))
|
|
@@ -4029,6 +4245,7 @@ def _download_project(
|
|
|
4029
4245
|
|
|
4030
4246
|
existing_datasets = {dataset.path: dataset for dataset in project_fs.datasets}
|
|
4031
4247
|
for parents, dataset in api.dataset.tree(project_id):
|
|
4248
|
+
blob_files_to_download = {}
|
|
4032
4249
|
dataset_path = Dataset._get_dataset_path(dataset.name, parents)
|
|
4033
4250
|
dataset_id = dataset.id
|
|
4034
4251
|
if dataset_ids is not None and dataset_id not in dataset_ids:
|
|
@@ -4065,6 +4282,7 @@ def _download_project(
|
|
|
4065
4282
|
project_meta=meta,
|
|
4066
4283
|
):
|
|
4067
4284
|
for batch in batched(images, batch_size):
|
|
4285
|
+
batch: List[ImageInfo]
|
|
4068
4286
|
image_ids = [image_info.id for image_info in batch]
|
|
4069
4287
|
image_names = [image_info.name for image_info in batch]
|
|
4070
4288
|
|
|
@@ -4085,18 +4303,97 @@ def _download_project(
|
|
|
4085
4303
|
):
|
|
4086
4304
|
indexes_to_download.append(i)
|
|
4087
4305
|
|
|
4088
|
-
#
|
|
4306
|
+
# Collect images that was added to the project as offsets from archive in Team Files
|
|
4307
|
+
indexes_with_offsets = []
|
|
4308
|
+
for idx in indexes_to_download:
|
|
4309
|
+
image_info: ImageInfo = batch[idx]
|
|
4310
|
+
if image_info.related_data_id is not None:
|
|
4311
|
+
blob_files_to_download[image_info.related_data_id] = image_info.download_id
|
|
4312
|
+
indexes_with_offsets.append(idx)
|
|
4313
|
+
|
|
4314
|
+
# Download images in numpy format
|
|
4089
4315
|
batch_imgs_bytes = [None] * len(image_ids)
|
|
4090
4316
|
if save_images and indexes_to_download:
|
|
4091
|
-
|
|
4092
|
-
|
|
4093
|
-
|
|
4094
|
-
|
|
4095
|
-
|
|
4096
|
-
|
|
4097
|
-
|
|
4098
|
-
|
|
4099
|
-
|
|
4317
|
+
|
|
4318
|
+
# For a lot of small files that stored in blob file. Downloads blob files to optimize download process.
|
|
4319
|
+
if download_blob_files and len(indexes_with_offsets) > 0:
|
|
4320
|
+
bytes_indexes_to_download = indexes_to_download.copy()
|
|
4321
|
+
for blob_file_id, download_id in blob_files_to_download.items():
|
|
4322
|
+
if blob_file_id not in project_fs.blob_files:
|
|
4323
|
+
api.image.download_blob_file(
|
|
4324
|
+
project_id=project_id,
|
|
4325
|
+
download_id=download_id,
|
|
4326
|
+
path=os.path.join(project_fs.blob_dir, f"{blob_file_id}.tar"),
|
|
4327
|
+
log_progress=(
|
|
4328
|
+
True if log_progress or progress_cb is not None else False
|
|
4329
|
+
),
|
|
4330
|
+
)
|
|
4331
|
+
project_fs.add_blob_file(blob_file_id)
|
|
4332
|
+
|
|
4333
|
+
# Process blob image offsets
|
|
4334
|
+
offsets_file_name = f"{blob_file_id}{OFFSETS_PKL_SUFFIX}"
|
|
4335
|
+
offsets_file_path = os.path.join(
|
|
4336
|
+
dataset_fs.directory, offsets_file_name
|
|
4337
|
+
)
|
|
4338
|
+
|
|
4339
|
+
# Initialize counter for total image offsets for this blob file
|
|
4340
|
+
total_offsets_count = 0
|
|
4341
|
+
current_batch = []
|
|
4342
|
+
|
|
4343
|
+
# Get offsets from image infos
|
|
4344
|
+
for idx in indexes_with_offsets:
|
|
4345
|
+
image_info = batch[idx]
|
|
4346
|
+
if image_info.related_data_id == blob_file_id:
|
|
4347
|
+
blob_image_info = BlobImageInfo(
|
|
4348
|
+
name=image_info.name,
|
|
4349
|
+
offset_start=image_info.offset_start,
|
|
4350
|
+
offset_end=image_info.offset_end,
|
|
4351
|
+
)
|
|
4352
|
+
current_batch.append(blob_image_info)
|
|
4353
|
+
bytes_indexes_to_download.remove(idx)
|
|
4354
|
+
|
|
4355
|
+
# When batch size is reached, dump to file
|
|
4356
|
+
if len(current_batch) >= OFFSETS_PKL_BATCH_SIZE:
|
|
4357
|
+
BlobImageInfo.dump_to_pickle(
|
|
4358
|
+
current_batch, offsets_file_path
|
|
4359
|
+
)
|
|
4360
|
+
total_offsets_count += len(current_batch)
|
|
4361
|
+
current_batch = []
|
|
4362
|
+
# Dump any remaining items in the last batch
|
|
4363
|
+
if len(current_batch) > 0:
|
|
4364
|
+
BlobImageInfo.dump_to_pickle(current_batch, offsets_file_path)
|
|
4365
|
+
total_offsets_count += len(current_batch)
|
|
4366
|
+
|
|
4367
|
+
if total_offsets_count > 0:
|
|
4368
|
+
logger.debug(
|
|
4369
|
+
f"Saved {total_offsets_count} image offsets for {blob_file_id} to {offsets_file_path} in {(total_offsets_count + OFFSETS_PKL_BATCH_SIZE - 1) // OFFSETS_PKL_BATCH_SIZE} batches"
|
|
4370
|
+
)
|
|
4371
|
+
ds_progress(total_offsets_count)
|
|
4372
|
+
|
|
4373
|
+
image_ids_to_download = [
|
|
4374
|
+
image_ids[i] for i in bytes_indexes_to_download
|
|
4375
|
+
]
|
|
4376
|
+
for index, img in zip(
|
|
4377
|
+
bytes_indexes_to_download,
|
|
4378
|
+
api.image.download_bytes(
|
|
4379
|
+
dataset_id,
|
|
4380
|
+
image_ids_to_download,
|
|
4381
|
+
progress_cb=ds_progress,
|
|
4382
|
+
),
|
|
4383
|
+
):
|
|
4384
|
+
batch_imgs_bytes[index] = img
|
|
4385
|
+
# If you want to download images in classic way
|
|
4386
|
+
else:
|
|
4387
|
+
image_ids_to_download = [image_ids[i] for i in indexes_to_download]
|
|
4388
|
+
for index, img in zip(
|
|
4389
|
+
indexes_to_download,
|
|
4390
|
+
api.image.download_bytes(
|
|
4391
|
+
dataset_id,
|
|
4392
|
+
image_ids_to_download,
|
|
4393
|
+
progress_cb=ds_progress,
|
|
4394
|
+
),
|
|
4395
|
+
):
|
|
4396
|
+
batch_imgs_bytes[index] = img
|
|
4100
4397
|
|
|
4101
4398
|
if ds_progress is not None:
|
|
4102
4399
|
ds_progress(len(batch) - len(indexes_to_download))
|
|
@@ -4160,7 +4457,11 @@ def _download_project(
|
|
|
4160
4457
|
if item_name not in items_names_set:
|
|
4161
4458
|
dataset_fs.delete_item(item_name)
|
|
4162
4459
|
try:
|
|
4163
|
-
|
|
4460
|
+
if download_blob_files:
|
|
4461
|
+
project_info = api.project.get_info_by_id(project_id)
|
|
4462
|
+
create_blob_readme(project_fs=project_fs, project_info=project_info)
|
|
4463
|
+
else:
|
|
4464
|
+
create_readme(dest_dir, project_id, api)
|
|
4164
4465
|
except Exception as e:
|
|
4165
4466
|
logger.info(f"There was an error while creating README: {e}")
|
|
4166
4467
|
|
|
@@ -4172,15 +4473,20 @@ def upload_project(
|
|
|
4172
4473
|
project_name: Optional[str] = None,
|
|
4173
4474
|
log_progress: bool = True,
|
|
4174
4475
|
progress_cb: Optional[Union[tqdm, Callable]] = None,
|
|
4476
|
+
project_id: Optional[int] = None,
|
|
4175
4477
|
) -> Tuple[int, str]:
|
|
4176
4478
|
project_fs = read_single_project(dir)
|
|
4177
|
-
if project_name is None:
|
|
4178
|
-
project_name = project_fs.name
|
|
4179
4479
|
|
|
4180
|
-
if
|
|
4181
|
-
project_name
|
|
4480
|
+
if not project_id:
|
|
4481
|
+
if project_name is None:
|
|
4482
|
+
project_name = project_fs.name
|
|
4182
4483
|
|
|
4183
|
-
|
|
4484
|
+
if api.project.exists(workspace_id, project_name):
|
|
4485
|
+
project_name = api.project.get_free_name(workspace_id, project_name)
|
|
4486
|
+
|
|
4487
|
+
project = api.project.create(workspace_id, project_name, change_name_if_conflict=True)
|
|
4488
|
+
else:
|
|
4489
|
+
project = api.project.get_info_by_id(project_id)
|
|
4184
4490
|
updated_meta = api.project.update_meta(project.id, project_fs.meta.to_json())
|
|
4185
4491
|
|
|
4186
4492
|
if progress_cb is not None:
|
|
@@ -4189,6 +4495,29 @@ def upload_project(
|
|
|
4189
4495
|
# image_id_dct, anns_paths_dct = {}, {}
|
|
4190
4496
|
dataset_map = {}
|
|
4191
4497
|
|
|
4498
|
+
total_blob_size = 0
|
|
4499
|
+
upload_blob_progress = None
|
|
4500
|
+
src_paths = []
|
|
4501
|
+
dst_paths = []
|
|
4502
|
+
for blob_file in project_fs.blob_files:
|
|
4503
|
+
if log_progress:
|
|
4504
|
+
total_blob_size += os.path.getsize(os.path.join(project_fs.blob_dir, blob_file))
|
|
4505
|
+
src_paths.append(os.path.join(project_fs.blob_dir, blob_file))
|
|
4506
|
+
dst_paths.append(os.path.join(f"/{TF_BLOB_DIR}", blob_file))
|
|
4507
|
+
if log_progress and len(src_paths) > 0:
|
|
4508
|
+
upload_blob_progress = tqdm_sly(
|
|
4509
|
+
desc="Uploading blob files", total=total_blob_size, unit="B", unit_scale=True
|
|
4510
|
+
)
|
|
4511
|
+
if len(src_paths) > 0:
|
|
4512
|
+
blob_file_infos = api.file.upload_bulk(
|
|
4513
|
+
team_id=project.team_id,
|
|
4514
|
+
src_paths=src_paths,
|
|
4515
|
+
dst_paths=dst_paths,
|
|
4516
|
+
progress_cb=upload_blob_progress,
|
|
4517
|
+
)
|
|
4518
|
+
else:
|
|
4519
|
+
blob_file_infos = []
|
|
4520
|
+
|
|
4192
4521
|
for ds_fs in project_fs.datasets:
|
|
4193
4522
|
if len(ds_fs.parents) > 0:
|
|
4194
4523
|
parent = f"{os.path.sep}".join(ds_fs.parents)
|
|
@@ -4221,13 +4550,26 @@ def upload_project(
|
|
|
4221
4550
|
else:
|
|
4222
4551
|
img_infos.append(None)
|
|
4223
4552
|
|
|
4224
|
-
img_paths = list(filter(lambda x: os.path.isfile(x), img_paths))
|
|
4553
|
+
# img_paths = list(filter(lambda x: os.path.isfile(x), img_paths))
|
|
4554
|
+
source_img_paths_len = len(img_paths)
|
|
4555
|
+
valid_indices = []
|
|
4556
|
+
valid_paths = []
|
|
4557
|
+
offset_indices = []
|
|
4558
|
+
for i, path in enumerate(img_paths):
|
|
4559
|
+
if os.path.isfile(path):
|
|
4560
|
+
valid_indices.append(i)
|
|
4561
|
+
valid_paths.append(path)
|
|
4562
|
+
else:
|
|
4563
|
+
offset_indices.append(i)
|
|
4564
|
+
img_paths = valid_paths
|
|
4225
4565
|
ann_paths = list(filter(lambda x: os.path.isfile(x), ann_paths))
|
|
4566
|
+
# Create a mapping from name to index position for quick lookups
|
|
4567
|
+
offset_name_to_idx = {names[i]: i for i in offset_indices}
|
|
4226
4568
|
metas = [{} for _ in names]
|
|
4227
4569
|
|
|
4228
4570
|
img_infos_count = sum(1 for item in img_infos if item is not None)
|
|
4229
4571
|
|
|
4230
|
-
if len(img_paths) == 0 and img_infos_count == 0:
|
|
4572
|
+
if len(img_paths) == 0 and img_infos_count == 0 and len(offset_indices) == 0:
|
|
4231
4573
|
# Dataset is empty
|
|
4232
4574
|
continue
|
|
4233
4575
|
|
|
@@ -4258,56 +4600,57 @@ def upload_project(
|
|
|
4258
4600
|
merged_metas.append(merged_meta)
|
|
4259
4601
|
metas = merged_metas
|
|
4260
4602
|
|
|
4261
|
-
if len(img_paths) != 0:
|
|
4262
|
-
|
|
4263
|
-
|
|
4603
|
+
if len(img_paths) != 0 or len(offset_indices) != 0:
|
|
4604
|
+
|
|
4605
|
+
uploaded_img_infos = [None] * source_img_paths_len
|
|
4606
|
+
uploaded_img_infos_paths = api.image.upload_paths(
|
|
4607
|
+
dataset_id=dataset.id,
|
|
4608
|
+
names=[name for i, name in enumerate(names) if i in valid_indices],
|
|
4609
|
+
paths=img_paths,
|
|
4610
|
+
progress_cb=ds_progress,
|
|
4611
|
+
metas=[metas[i] for i in valid_indices],
|
|
4264
4612
|
)
|
|
4613
|
+
for i, img_info in zip(valid_indices, uploaded_img_infos_paths):
|
|
4614
|
+
uploaded_img_infos[i] = img_info
|
|
4615
|
+
for blob_offsets in ds_fs.blob_offsets:
|
|
4616
|
+
blob_file = None
|
|
4617
|
+
for blob_file_info in blob_file_infos:
|
|
4618
|
+
if Path(blob_file_info.name).stem == removesuffix(
|
|
4619
|
+
Path(blob_offsets).name, OFFSETS_PKL_SUFFIX
|
|
4620
|
+
):
|
|
4621
|
+
blob_file = blob_file_info
|
|
4622
|
+
break
|
|
4623
|
+
|
|
4624
|
+
if blob_file is None:
|
|
4625
|
+
raise ValueError(
|
|
4626
|
+
f"Cannot find blob file for offsets: {blob_offsets}. "
|
|
4627
|
+
f"Check the Team File directory '{TF_BLOB_DIR}', corresponding blob file should be uploaded."
|
|
4628
|
+
)
|
|
4629
|
+
uploaded_img_infos_offsets = api.image.upload_by_offsets_generator(
|
|
4630
|
+
dataset=dataset,
|
|
4631
|
+
team_file_id=blob_file.id,
|
|
4632
|
+
offsets_file_path=blob_offsets,
|
|
4633
|
+
progress_cb=ds_progress,
|
|
4634
|
+
metas={names[i]: metas[i] for i in offset_indices},
|
|
4635
|
+
)
|
|
4636
|
+
for img_info_batch in uploaded_img_infos_offsets:
|
|
4637
|
+
for img_info in img_info_batch:
|
|
4638
|
+
idx = offset_name_to_idx.get(img_info.name)
|
|
4639
|
+
if idx is not None:
|
|
4640
|
+
uploaded_img_infos[idx] = img_info
|
|
4265
4641
|
elif img_infos_count != 0:
|
|
4266
4642
|
if img_infos_count != len(names):
|
|
4267
4643
|
raise ValueError(
|
|
4268
4644
|
f"Cannot upload Project: image info files count ({img_infos_count}) doesn't match with images count ({len(names)}) that are going to be uploaded. "
|
|
4269
4645
|
"Check the directory structure, all annotation files should have corresponding image info files."
|
|
4270
4646
|
)
|
|
4271
|
-
|
|
4272
|
-
|
|
4273
|
-
|
|
4274
|
-
|
|
4275
|
-
|
|
4276
|
-
|
|
4277
|
-
|
|
4278
|
-
links_names.append(name)
|
|
4279
|
-
links_order.append(idx)
|
|
4280
|
-
links_metas.append(meta)
|
|
4281
|
-
else:
|
|
4282
|
-
hashes.append(info.hash)
|
|
4283
|
-
hashes_names.append(name)
|
|
4284
|
-
hashes_order.append(idx)
|
|
4285
|
-
hashes_metas.append(meta)
|
|
4286
|
-
|
|
4287
|
-
result = [None] * len(names)
|
|
4288
|
-
if len(links) > 0:
|
|
4289
|
-
res_infos_links = api.image.upload_links(
|
|
4290
|
-
dataset_id,
|
|
4291
|
-
links_names,
|
|
4292
|
-
links,
|
|
4293
|
-
ds_progress,
|
|
4294
|
-
metas=links_metas,
|
|
4295
|
-
)
|
|
4296
|
-
for info, pos in zip(res_infos_links, links_order):
|
|
4297
|
-
result[pos] = info
|
|
4298
|
-
|
|
4299
|
-
if len(hashes) > 0:
|
|
4300
|
-
res_infos_hashes = api.image.upload_hashes(
|
|
4301
|
-
dataset_id,
|
|
4302
|
-
hashes_names,
|
|
4303
|
-
hashes,
|
|
4304
|
-
ds_progress,
|
|
4305
|
-
metas=hashes_metas,
|
|
4306
|
-
)
|
|
4307
|
-
for info, pos in zip(res_infos_hashes, hashes_order):
|
|
4308
|
-
result[pos] = info
|
|
4309
|
-
|
|
4310
|
-
uploaded_img_infos = result
|
|
4647
|
+
uploaded_img_infos = api.image.upload_ids(
|
|
4648
|
+
dataset_id=dataset.id,
|
|
4649
|
+
names=names,
|
|
4650
|
+
ids=[img_info.id for img_info in img_infos],
|
|
4651
|
+
progress_cb=ds_progress,
|
|
4652
|
+
metas=metas,
|
|
4653
|
+
)
|
|
4311
4654
|
else:
|
|
4312
4655
|
raise ValueError(
|
|
4313
4656
|
"Cannot upload Project: img_paths is empty and img_infos_paths is empty"
|
|
@@ -4343,6 +4686,7 @@ def download_project(
|
|
|
4343
4686
|
save_image_meta: bool = False,
|
|
4344
4687
|
images_ids: Optional[List[int]] = None,
|
|
4345
4688
|
resume_download: Optional[bool] = False,
|
|
4689
|
+
**kwargs,
|
|
4346
4690
|
) -> None:
|
|
4347
4691
|
"""
|
|
4348
4692
|
Download image project to the local directory.
|
|
@@ -4353,7 +4697,7 @@ def download_project(
|
|
|
4353
4697
|
:type project_id: int
|
|
4354
4698
|
:param dest_dir: Destination path to local directory.
|
|
4355
4699
|
:type dest_dir: str
|
|
4356
|
-
:param dataset_ids: Specified list of Dataset IDs which will be downloaded.
|
|
4700
|
+
:param dataset_ids: Specified list of Dataset IDs which will be downloaded.
|
|
4357
4701
|
:type dataset_ids: list(int), optional
|
|
4358
4702
|
:param log_progress: Show downloading logs in the output. By default, it is True.
|
|
4359
4703
|
:type log_progress: bool, optional
|
|
@@ -4375,6 +4719,9 @@ def download_project(
|
|
|
4375
4719
|
:type images_ids: list(int), optional
|
|
4376
4720
|
:param resume_download: Resume download enables to download only missing files avoiding erase of existing files.
|
|
4377
4721
|
:type resume_download: bool, optional
|
|
4722
|
+
:param download_blob_files: Default is False. It will download images in classic way.
|
|
4723
|
+
If True, it will download blob files, if they are present in the project, to optimize download process.
|
|
4724
|
+
:type download_blob_files: bool, optional
|
|
4378
4725
|
:return: None.
|
|
4379
4726
|
:rtype: NoneType
|
|
4380
4727
|
:Usage example:
|
|
@@ -4426,6 +4773,7 @@ def download_project(
|
|
|
4426
4773
|
save_image_meta=save_image_meta,
|
|
4427
4774
|
images_ids=images_ids,
|
|
4428
4775
|
resume_download=resume_download,
|
|
4776
|
+
**kwargs,
|
|
4429
4777
|
)
|
|
4430
4778
|
else:
|
|
4431
4779
|
_download_project_optimized(
|
|
@@ -4440,6 +4788,7 @@ def download_project(
|
|
|
4440
4788
|
save_images=save_images,
|
|
4441
4789
|
log_progress=log_progress,
|
|
4442
4790
|
images_ids=images_ids,
|
|
4791
|
+
**kwargs,
|
|
4443
4792
|
)
|
|
4444
4793
|
|
|
4445
4794
|
|
|
@@ -4455,6 +4804,7 @@ def _download_project_optimized(
|
|
|
4455
4804
|
save_images=True,
|
|
4456
4805
|
log_progress=True,
|
|
4457
4806
|
images_ids: List[int] = None,
|
|
4807
|
+
**kwargs,
|
|
4458
4808
|
):
|
|
4459
4809
|
project_info = api.project.get_info_by_id(project_id)
|
|
4460
4810
|
project_id = project_info.id
|
|
@@ -4736,6 +5086,209 @@ def create_readme(
|
|
|
4736
5086
|
return readme_path
|
|
4737
5087
|
|
|
4738
5088
|
|
|
5089
|
+
def _dataset_blob_structure_md(
|
|
5090
|
+
project_fs: Project,
|
|
5091
|
+
project_info: sly.ProjectInfo,
|
|
5092
|
+
entity_limit: Optional[int] = 2,
|
|
5093
|
+
) -> str:
|
|
5094
|
+
"""Creates a markdown string with the dataset structure of the project.
|
|
5095
|
+
Supports only images and videos projects.
|
|
5096
|
+
|
|
5097
|
+
:project_fs: Project file system.
|
|
5098
|
+
:type project_fs: :class:`Project<supervisely.project.project.Project>`
|
|
5099
|
+
:param project_info: Project information.
|
|
5100
|
+
:type project_info: :class:`ProjectInfo<supervisely.project.project_info.ProjectInfo>`
|
|
5101
|
+
:param entity_limit: The maximum number of entities to display in the README.
|
|
5102
|
+
:type entity_limit: int, optional
|
|
5103
|
+
:return: Markdown string with the dataset structure of the project.
|
|
5104
|
+
:rtype: str
|
|
5105
|
+
"""
|
|
5106
|
+
supported_project_types = [sly.ProjectType.IMAGES.value]
|
|
5107
|
+
if project_info.type not in supported_project_types:
|
|
5108
|
+
return ""
|
|
5109
|
+
|
|
5110
|
+
entity_icons = {
|
|
5111
|
+
"images": " 🏞️ ",
|
|
5112
|
+
"blob_files": " 📦 ",
|
|
5113
|
+
"pkl_files": " 📄 ",
|
|
5114
|
+
"annotations": " 📝 ",
|
|
5115
|
+
}
|
|
5116
|
+
dataset_icon = " 📂 "
|
|
5117
|
+
folder_icon = " 📁 "
|
|
5118
|
+
|
|
5119
|
+
result_md = f"🗂️ {project_info.name}<br>"
|
|
5120
|
+
|
|
5121
|
+
# Add project-level blob files
|
|
5122
|
+
if os.path.exists(project_fs.blob_dir) and project_fs.blob_files:
|
|
5123
|
+
result_md += "┣" + folder_icon + f"{Project.blob_dir_name}<br>"
|
|
5124
|
+
blob_files = [entry.name for entry in os.scandir(project_fs.blob_dir) if entry.is_file()]
|
|
5125
|
+
|
|
5126
|
+
for idx, blob_file in enumerate(blob_files):
|
|
5127
|
+
if idx == entity_limit and len(blob_files) > entity_limit:
|
|
5128
|
+
result_md += "┃ ┗ ... " + str(len(blob_files) - entity_limit) + " more<br>"
|
|
5129
|
+
break
|
|
5130
|
+
symbol = "┗" if idx == len(blob_files) - 1 or idx == entity_limit - 1 else "┣"
|
|
5131
|
+
result_md += "┃ " + symbol + entity_icons["blob_files"] + blob_file + "<br>"
|
|
5132
|
+
|
|
5133
|
+
# Build a dataset hierarchy tree
|
|
5134
|
+
dataset_tree = {}
|
|
5135
|
+
root_datasets = []
|
|
5136
|
+
|
|
5137
|
+
# First pass: create nodes for all datasets
|
|
5138
|
+
for dataset in project_fs.datasets:
|
|
5139
|
+
dataset_tree[dataset.directory] = {
|
|
5140
|
+
"dataset": dataset,
|
|
5141
|
+
"children": [],
|
|
5142
|
+
"parent_dir": os.path.dirname(dataset.directory) if dataset.parents else None,
|
|
5143
|
+
}
|
|
5144
|
+
|
|
5145
|
+
# Second pass: build parent-child relationships
|
|
5146
|
+
for dir_path, node in dataset_tree.items():
|
|
5147
|
+
parent_dir = node["parent_dir"]
|
|
5148
|
+
if parent_dir in dataset_tree:
|
|
5149
|
+
dataset_tree[parent_dir]["children"].append(dir_path)
|
|
5150
|
+
else:
|
|
5151
|
+
root_datasets.append(dir_path)
|
|
5152
|
+
|
|
5153
|
+
# Function to recursively render the dataset tree
|
|
5154
|
+
def render_tree(dir_path, prefix=""):
|
|
5155
|
+
nonlocal result_md
|
|
5156
|
+
node = dataset_tree[dir_path]
|
|
5157
|
+
dataset = node["dataset"]
|
|
5158
|
+
children = node["children"]
|
|
5159
|
+
|
|
5160
|
+
# Create dataset display with proper path
|
|
5161
|
+
dataset_path = Dataset._get_dataset_path(dataset.name, dataset.parents)
|
|
5162
|
+
result_md += prefix + "┣" + dataset_icon + f"[{dataset.name}]({dataset_path})<br>"
|
|
5163
|
+
|
|
5164
|
+
# Set indentation for dataset content
|
|
5165
|
+
content_prefix = prefix + "┃ "
|
|
5166
|
+
|
|
5167
|
+
# Add pkl files at the dataset level
|
|
5168
|
+
offset_files = [
|
|
5169
|
+
entry.name
|
|
5170
|
+
for entry in os.scandir(dataset.directory)
|
|
5171
|
+
if entry.is_file() and entry.name.endswith(".pkl")
|
|
5172
|
+
]
|
|
5173
|
+
|
|
5174
|
+
if offset_files:
|
|
5175
|
+
for idx, pkl_file in enumerate(offset_files):
|
|
5176
|
+
last_file = idx == len(offset_files) - 1
|
|
5177
|
+
has_more_content = (
|
|
5178
|
+
os.path.exists(dataset.img_dir) or os.path.exists(dataset.ann_dir) or children
|
|
5179
|
+
)
|
|
5180
|
+
symbol = "┗" if last_file and not has_more_content else "┣"
|
|
5181
|
+
result_md += content_prefix + symbol + entity_icons["pkl_files"] + pkl_file + "<br>"
|
|
5182
|
+
|
|
5183
|
+
# Add img directory
|
|
5184
|
+
if os.path.exists(dataset.img_dir):
|
|
5185
|
+
has_ann_dir = os.path.exists(dataset.ann_dir)
|
|
5186
|
+
has_more_content = has_ann_dir or children
|
|
5187
|
+
symbol = "┣" if has_more_content else "┗"
|
|
5188
|
+
result_md += content_prefix + symbol + folder_icon + "img<br>"
|
|
5189
|
+
|
|
5190
|
+
# Add image files
|
|
5191
|
+
entities = [entry.name for entry in os.scandir(dataset.img_dir) if entry.is_file()]
|
|
5192
|
+
entities = sorted(entities)
|
|
5193
|
+
selected_entities = entities[: min(len(entities), entity_limit)]
|
|
5194
|
+
|
|
5195
|
+
img_prefix = content_prefix + "┃ "
|
|
5196
|
+
for idx, entity in enumerate(selected_entities):
|
|
5197
|
+
last_img = idx == len(selected_entities) - 1
|
|
5198
|
+
symbol = "┗" if last_img and len(entities) <= entity_limit else "┣"
|
|
5199
|
+
result_md += img_prefix + symbol + entity_icons["images"] + entity + "<br>"
|
|
5200
|
+
|
|
5201
|
+
if len(entities) > entity_limit:
|
|
5202
|
+
result_md += img_prefix + "┗ ... " + str(len(entities) - entity_limit) + " more<br>"
|
|
5203
|
+
|
|
5204
|
+
# Add ann directory
|
|
5205
|
+
if os.path.exists(dataset.ann_dir):
|
|
5206
|
+
has_more_content = bool(children)
|
|
5207
|
+
symbol = "┣"
|
|
5208
|
+
result_md += content_prefix + "┣" + folder_icon + "ann<br>"
|
|
5209
|
+
|
|
5210
|
+
anns = [entry.name for entry in os.scandir(dataset.ann_dir) if entry.is_file()]
|
|
5211
|
+
anns = sorted(anns)
|
|
5212
|
+
|
|
5213
|
+
# Try to match annotations with displayed images
|
|
5214
|
+
possible_anns = [f"{entity}.json" for entity in selected_entities]
|
|
5215
|
+
matched_anns = [pa for pa in possible_anns if pa in anns]
|
|
5216
|
+
|
|
5217
|
+
# Add additional annotations if we haven't reached the limit
|
|
5218
|
+
if len(matched_anns) < min(entity_limit, len(anns)):
|
|
5219
|
+
for ann in anns:
|
|
5220
|
+
if ann not in matched_anns and len(matched_anns) < entity_limit:
|
|
5221
|
+
matched_anns.append(ann)
|
|
5222
|
+
|
|
5223
|
+
ann_prefix = content_prefix + "┃ "
|
|
5224
|
+
for idx, ann in enumerate(matched_anns):
|
|
5225
|
+
last_ann = idx == len(matched_anns) - 1
|
|
5226
|
+
symbol = "┗" if last_ann and len(anns) <= entity_limit else "┣"
|
|
5227
|
+
result_md += ann_prefix + symbol + entity_icons["annotations"] + ann + "<br>"
|
|
5228
|
+
|
|
5229
|
+
if len(anns) > entity_limit:
|
|
5230
|
+
result_md += ann_prefix + "┗ ... " + str(len(anns) - entity_limit) + " more<br>"
|
|
5231
|
+
|
|
5232
|
+
if not has_more_content:
|
|
5233
|
+
result_md += content_prefix + "...<br>"
|
|
5234
|
+
# Recursively render child datasets
|
|
5235
|
+
for idx, child_dir in enumerate(children):
|
|
5236
|
+
render_tree(child_dir, content_prefix)
|
|
5237
|
+
|
|
5238
|
+
# Start rendering from root datasets
|
|
5239
|
+
for root_dir in sorted(root_datasets):
|
|
5240
|
+
render_tree(root_dir)
|
|
5241
|
+
|
|
5242
|
+
return result_md
|
|
5243
|
+
|
|
5244
|
+
|
|
5245
|
+
def create_blob_readme(
|
|
5246
|
+
project_fs: Project,
|
|
5247
|
+
project_info: ProjectInfo,
|
|
5248
|
+
) -> str:
|
|
5249
|
+
"""Creates a README.md file using the template, adds general information
|
|
5250
|
+
about the project and creates a dataset structure section.
|
|
5251
|
+
|
|
5252
|
+
:param project_fs: Project file system.
|
|
5253
|
+
:type project_fs: :class:`Project<supervisely.project.project.Project>`
|
|
5254
|
+
:param project_info: Project information.
|
|
5255
|
+
:type project_info: :class:`ProjectInfo<supervisely.project.project_info.ProjectInfo>`
|
|
5256
|
+
:return: Path to the created README.md file.
|
|
5257
|
+
:rtype: str
|
|
5258
|
+
|
|
5259
|
+
:Usage example:
|
|
5260
|
+
|
|
5261
|
+
.. code-block:: python
|
|
5262
|
+
|
|
5263
|
+
import supervisely as sly
|
|
5264
|
+
|
|
5265
|
+
api = sly.Api.from_env()
|
|
5266
|
+
|
|
5267
|
+
project_id = 123
|
|
5268
|
+
project_dir = "/path/to/project"
|
|
5269
|
+
|
|
5270
|
+
readme_path = sly.create_readme(project_dir, project_id, api)
|
|
5271
|
+
|
|
5272
|
+
print(f"README.md file was created at {readme_path}")
|
|
5273
|
+
"""
|
|
5274
|
+
current_path = os.path.dirname(os.path.abspath(__file__))
|
|
5275
|
+
template_path = os.path.join(current_path, "readme_template.md")
|
|
5276
|
+
with open(template_path, "r") as file:
|
|
5277
|
+
template = file.read()
|
|
5278
|
+
|
|
5279
|
+
readme_path = os.path.join(project_fs.directory, "README.md")
|
|
5280
|
+
|
|
5281
|
+
template = template.replace("{{general_info}}", _project_info_md(project_info))
|
|
5282
|
+
|
|
5283
|
+
template = template.replace(
|
|
5284
|
+
"{{dataset_structure_info}}", _dataset_blob_structure_md(project_fs, project_info)
|
|
5285
|
+
)
|
|
5286
|
+
|
|
5287
|
+
with open(readme_path, "w") as f:
|
|
5288
|
+
f.write(template)
|
|
5289
|
+
return readme_path
|
|
5290
|
+
|
|
5291
|
+
|
|
4739
5292
|
def _project_info_md(project_info: sly.ProjectInfo) -> str:
|
|
4740
5293
|
"""Creates a markdown string with general information about the project
|
|
4741
5294
|
using the fields of the ProjectInfo NamedTuple.
|
|
@@ -4784,6 +5337,9 @@ def _dataset_structure_md(
|
|
|
4784
5337
|
entity_icons = {
|
|
4785
5338
|
"images": " 🏞️ ",
|
|
4786
5339
|
"videos": " 🎥 ",
|
|
5340
|
+
"blob_files": " 📦 ",
|
|
5341
|
+
"pkl_files": " 📄 ",
|
|
5342
|
+
"annotations": " 📝 ",
|
|
4787
5343
|
}
|
|
4788
5344
|
dataset_icon = " 📂 "
|
|
4789
5345
|
list_function = list_functions[project_info.type]
|
|
@@ -4791,6 +5347,8 @@ def _dataset_structure_md(
|
|
|
4791
5347
|
|
|
4792
5348
|
result_md = f"🗂️ {project_info.name}<br>"
|
|
4793
5349
|
|
|
5350
|
+
# if project_info
|
|
5351
|
+
|
|
4794
5352
|
for parents, dataset_info in api.dataset.tree(project_info.id):
|
|
4795
5353
|
# The dataset path is needed to create a clickable link in the README.
|
|
4796
5354
|
dataset_path = Dataset._get_dataset_path(dataset_info.name, parents)
|
|
@@ -4841,6 +5399,8 @@ async def _download_project_async(
|
|
|
4841
5399
|
switch_size = kwargs.get("switch_size", 1.28 * 1024 * 1024)
|
|
4842
5400
|
# batch size for bulk download
|
|
4843
5401
|
batch_size = kwargs.get("batch_size", 100)
|
|
5402
|
+
# control whether to download blob files
|
|
5403
|
+
download_blob_files = kwargs.get("download_blob_files", False)
|
|
4844
5404
|
|
|
4845
5405
|
if semaphore is None:
|
|
4846
5406
|
semaphore = api.get_default_semaphore()
|
|
@@ -4890,11 +5450,19 @@ async def _download_project_async(
|
|
|
4890
5450
|
small_images = []
|
|
4891
5451
|
large_images = []
|
|
4892
5452
|
dataset_images = []
|
|
5453
|
+
blob_files_to_download = {}
|
|
5454
|
+
blob_images = []
|
|
5455
|
+
|
|
4893
5456
|
async for image_batch in all_images:
|
|
4894
5457
|
for image in image_batch:
|
|
4895
5458
|
if images_ids is None or image.id in images_ids:
|
|
4896
5459
|
dataset_images.append(image)
|
|
4897
|
-
|
|
5460
|
+
# Check for images with blob offsets
|
|
5461
|
+
|
|
5462
|
+
if download_blob_files and image.related_data_id is not None:
|
|
5463
|
+
blob_files_to_download[image.related_data_id] = image.download_id
|
|
5464
|
+
blob_images.append(image)
|
|
5465
|
+
elif image.size < switch_size:
|
|
4898
5466
|
small_images.append(image)
|
|
4899
5467
|
else:
|
|
4900
5468
|
large_images.append(image)
|
|
@@ -4903,7 +5471,7 @@ async def _download_project_async(
|
|
|
4903
5471
|
if log_progress is True:
|
|
4904
5472
|
ds_progress = tqdm_sly(
|
|
4905
5473
|
desc="Downloading images from {!r}".format(dataset.name),
|
|
4906
|
-
total=len(small_images) + len(large_images),
|
|
5474
|
+
total=len(small_images) + len(large_images) + len(blob_images),
|
|
4907
5475
|
leave=False,
|
|
4908
5476
|
)
|
|
4909
5477
|
|
|
@@ -4939,14 +5507,82 @@ async def _download_project_async(
|
|
|
4939
5507
|
)
|
|
4940
5508
|
return created_tasks
|
|
4941
5509
|
|
|
5510
|
+
# Download blob files if required
|
|
5511
|
+
if download_blob_files and len(blob_files_to_download) > 0:
|
|
5512
|
+
blob_paths = []
|
|
5513
|
+
download_ids = []
|
|
5514
|
+
# Process each blob file
|
|
5515
|
+
for blob_file_id, download_id in blob_files_to_download.items():
|
|
5516
|
+
if blob_file_id not in project_fs.blob_files:
|
|
5517
|
+
# Download the blob file
|
|
5518
|
+
blob_paths.append(os.path.join(project_fs.blob_dir, f"{blob_file_id}.tar"))
|
|
5519
|
+
download_ids.append(download_id)
|
|
5520
|
+
await api.image.download_blob_files_async(
|
|
5521
|
+
project_id=project_id,
|
|
5522
|
+
download_ids=download_ids,
|
|
5523
|
+
paths=blob_paths,
|
|
5524
|
+
semaphore=semaphore,
|
|
5525
|
+
log_progress=(True if log_progress or progress_cb is not None else False),
|
|
5526
|
+
)
|
|
5527
|
+
for blob_file_id, download_id in blob_files_to_download.items():
|
|
5528
|
+
project_fs.add_blob_file(blob_file_id)
|
|
5529
|
+
|
|
5530
|
+
# Process blob image offsets
|
|
5531
|
+
offsets_file_name = f"{blob_file_id}{OFFSETS_PKL_SUFFIX}"
|
|
5532
|
+
offsets_file_path = os.path.join(dataset_fs.directory, offsets_file_name)
|
|
5533
|
+
|
|
5534
|
+
total_offsets_count = 0 # for logging
|
|
5535
|
+
current_batch = []
|
|
5536
|
+
for img in blob_images:
|
|
5537
|
+
if img.related_data_id == blob_file_id:
|
|
5538
|
+
blob_image_info = BlobImageInfo(
|
|
5539
|
+
name=img.name,
|
|
5540
|
+
offset_start=img.offset_start,
|
|
5541
|
+
offset_end=img.offset_end,
|
|
5542
|
+
)
|
|
5543
|
+
current_batch.append(blob_image_info)
|
|
5544
|
+
if len(current_batch) >= OFFSETS_PKL_BATCH_SIZE:
|
|
5545
|
+
BlobImageInfo.dump_to_pickle(current_batch, offsets_file_path)
|
|
5546
|
+
total_offsets_count += len(current_batch)
|
|
5547
|
+
current_batch = []
|
|
5548
|
+
if len(current_batch) > 0:
|
|
5549
|
+
BlobImageInfo.dump_to_pickle(current_batch, offsets_file_path)
|
|
5550
|
+
total_offsets_count += len(current_batch)
|
|
5551
|
+
if total_offsets_count > 0:
|
|
5552
|
+
logger.debug(
|
|
5553
|
+
f"Saved {total_offsets_count} image offsets for {blob_file_id} to {offsets_file_path} in {(total_offsets_count + OFFSETS_PKL_BATCH_SIZE - 1) // OFFSETS_PKL_BATCH_SIZE} batches"
|
|
5554
|
+
)
|
|
5555
|
+
offset_tasks = []
|
|
5556
|
+
# Download annotations for images with offsets
|
|
5557
|
+
for offsets_batch in batched(blob_images, batch_size=batch_size):
|
|
5558
|
+
offset_task = _download_project_items_batch_async(
|
|
5559
|
+
api=api,
|
|
5560
|
+
dataset_id=dataset_id,
|
|
5561
|
+
img_infos=offsets_batch,
|
|
5562
|
+
meta=meta,
|
|
5563
|
+
dataset_fs=dataset_fs,
|
|
5564
|
+
id_to_tagmeta=id_to_tagmeta,
|
|
5565
|
+
semaphore=semaphore,
|
|
5566
|
+
save_images=False,
|
|
5567
|
+
save_image_info=save_image_info,
|
|
5568
|
+
only_image_tags=only_image_tags,
|
|
5569
|
+
progress_cb=ds_progress,
|
|
5570
|
+
)
|
|
5571
|
+
offset_tasks.append(offset_task)
|
|
5572
|
+
created_tasks = await run_tasks_with_delay(offset_tasks, 0.05)
|
|
5573
|
+
await asyncio.gather(*created_tasks)
|
|
5574
|
+
|
|
4942
5575
|
tasks = []
|
|
5576
|
+
# Check which images need to be downloaded
|
|
4943
5577
|
small_images = await check_items(small_images)
|
|
4944
5578
|
large_images = await check_items(large_images)
|
|
4945
5579
|
|
|
5580
|
+
# If only one small image, treat it as a large image for efficiency
|
|
4946
5581
|
if len(small_images) == 1:
|
|
4947
5582
|
large_images.append(small_images.pop())
|
|
4948
|
-
for images_batch in batched(small_images, batch_size=batch_size):
|
|
4949
5583
|
|
|
5584
|
+
# Create batch download tasks
|
|
5585
|
+
for images_batch in batched(small_images, batch_size=batch_size):
|
|
4950
5586
|
task = _download_project_items_batch_async(
|
|
4951
5587
|
api=api,
|
|
4952
5588
|
dataset_id=dataset_id,
|
|
@@ -4961,6 +5597,8 @@ async def _download_project_async(
|
|
|
4961
5597
|
progress_cb=ds_progress,
|
|
4962
5598
|
)
|
|
4963
5599
|
tasks.append(task)
|
|
5600
|
+
|
|
5601
|
+
# Create individual download tasks for large images
|
|
4964
5602
|
for image in large_images:
|
|
4965
5603
|
task = _download_project_item_async(
|
|
4966
5604
|
api=api,
|
|
@@ -4995,7 +5633,11 @@ async def _download_project_async(
|
|
|
4995
5633
|
dataset_fs.delete_item(item_name)
|
|
4996
5634
|
|
|
4997
5635
|
try:
|
|
4998
|
-
|
|
5636
|
+
if download_blob_files:
|
|
5637
|
+
project_info = api.project.get_info_by_id(project_id)
|
|
5638
|
+
create_blob_readme(project_fs=project_fs, project_info=project_info)
|
|
5639
|
+
else:
|
|
5640
|
+
create_readme(dest_dir, project_id, api)
|
|
4999
5641
|
except Exception as e:
|
|
5000
5642
|
logger.info(f"There was an error while creating README: {e}")
|
|
5001
5643
|
|