supervisely 6.73.226__py3-none-any.whl → 6.73.228__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of supervisely might be problematic. Click here for more details.
- supervisely/__init__.py +2 -2
- supervisely/_utils.py +78 -1
- supervisely/api/annotation_api.py +184 -14
- supervisely/api/api.py +2 -2
- supervisely/api/dataset_api.py +90 -1
- supervisely/api/entity_annotation/figure_api.py +11 -2
- supervisely/api/file_api.py +144 -8
- supervisely/api/image_api.py +94 -13
- supervisely/api/pointcloud/pointcloud_api.py +4 -8
- supervisely/api/project_api.py +285 -1
- supervisely/api/video/video_annotation_api.py +45 -0
- supervisely/api/video/video_api.py +2 -4
- supervisely/api/volume/volume_api.py +2 -4
- supervisely/convert/base_converter.py +14 -10
- supervisely/io/fs.py +55 -8
- supervisely/io/json.py +32 -0
- supervisely/project/download.py +176 -64
- supervisely/project/project.py +676 -35
- supervisely/project/project_type.py +4 -1
- supervisely/project/video_project.py +293 -3
- {supervisely-6.73.226.dist-info → supervisely-6.73.228.dist-info}/METADATA +1 -1
- {supervisely-6.73.226.dist-info → supervisely-6.73.228.dist-info}/RECORD +26 -26
- {supervisely-6.73.226.dist-info → supervisely-6.73.228.dist-info}/LICENSE +0 -0
- {supervisely-6.73.226.dist-info → supervisely-6.73.228.dist-info}/WHEEL +0 -0
- {supervisely-6.73.226.dist-info → supervisely-6.73.228.dist-info}/entry_points.txt +0 -0
- {supervisely-6.73.226.dist-info → supervisely-6.73.228.dist-info}/top_level.txt +0 -0
supervisely/project/download.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import os
|
|
2
3
|
import shutil
|
|
3
4
|
from typing import Callable, List, Optional, Tuple, Union
|
|
@@ -5,7 +6,7 @@ from typing import Callable, List, Optional, Tuple, Union
|
|
|
5
6
|
from tqdm import tqdm
|
|
6
7
|
|
|
7
8
|
from supervisely import get_project_class
|
|
8
|
-
from supervisely._utils import rand_str
|
|
9
|
+
from supervisely._utils import get_or_create_event_loop, rand_str
|
|
9
10
|
from supervisely.annotation.annotation import Annotation, ProjectMeta
|
|
10
11
|
from supervisely.api.api import Api
|
|
11
12
|
from supervisely.api.dataset_api import DatasetInfo
|
|
@@ -19,7 +20,7 @@ from supervisely.io.fs import (
|
|
|
19
20
|
get_directory_size,
|
|
20
21
|
remove_dir,
|
|
21
22
|
)
|
|
22
|
-
from supervisely.io.json import load_json_file
|
|
23
|
+
from supervisely.io.json import dump_json_file, load_json_file
|
|
23
24
|
from supervisely.project import Project
|
|
24
25
|
from supervisely.project.project import Dataset, OpenMode, ProjectType
|
|
25
26
|
from supervisely.sly_logger import logger
|
|
@@ -177,29 +178,112 @@ def download(
|
|
|
177
178
|
)
|
|
178
179
|
|
|
179
180
|
|
|
180
|
-
def
|
|
181
|
+
def download_async(
|
|
182
|
+
api: Api,
|
|
183
|
+
project_id: int,
|
|
184
|
+
dest_dir: str,
|
|
185
|
+
semaphore: Optional[asyncio.Semaphore] = None,
|
|
186
|
+
dataset_ids: Optional[List[int]] = None,
|
|
187
|
+
log_progress: bool = True,
|
|
188
|
+
progress_cb: Optional[Union[tqdm, Callable]] = None,
|
|
189
|
+
**kwargs,
|
|
190
|
+
) -> None:
|
|
191
|
+
project_info = api.project.get_info_by_id(project_id)
|
|
192
|
+
|
|
193
|
+
if progress_cb is not None:
|
|
194
|
+
log_progress = False
|
|
195
|
+
|
|
196
|
+
project_class = get_project_class(project_info.type)
|
|
197
|
+
if hasattr(project_class, "download_async"):
|
|
198
|
+
download_coro = project_class.download_async(
|
|
199
|
+
api=api,
|
|
200
|
+
project_id=project_id,
|
|
201
|
+
dest_dir=dest_dir,
|
|
202
|
+
semaphore=semaphore,
|
|
203
|
+
dataset_ids=dataset_ids,
|
|
204
|
+
log_progress=log_progress,
|
|
205
|
+
progress_cb=progress_cb,
|
|
206
|
+
**kwargs,
|
|
207
|
+
)
|
|
208
|
+
loop = get_or_create_event_loop()
|
|
209
|
+
if loop.is_running():
|
|
210
|
+
future = asyncio.run_coroutine_threadsafe(download_coro, loop=loop)
|
|
211
|
+
future.result()
|
|
212
|
+
else:
|
|
213
|
+
loop.run_until_complete(download_coro)
|
|
214
|
+
else:
|
|
215
|
+
raise NotImplementedError(f"Method download_async is not implemented for {project_class}")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def download_async_or_sync(
|
|
219
|
+
api: Api,
|
|
220
|
+
project_id: int,
|
|
221
|
+
dest_dir: str,
|
|
222
|
+
dataset_ids: Optional[List[int]] = None,
|
|
223
|
+
log_progress: bool = True,
|
|
224
|
+
progress_cb: Optional[Union[tqdm, Callable]] = None,
|
|
225
|
+
semaphore: Optional[asyncio.Semaphore] = None,
|
|
226
|
+
**kwargs,
|
|
227
|
+
):
|
|
228
|
+
project_info = api.project.get_info_by_id(project_id)
|
|
229
|
+
|
|
230
|
+
if progress_cb is not None:
|
|
231
|
+
log_progress = False
|
|
232
|
+
|
|
233
|
+
project_class = get_project_class(project_info.type)
|
|
234
|
+
if hasattr(project_class, "download_async"):
|
|
235
|
+
download_coro = project_class.download_async(
|
|
236
|
+
api=api,
|
|
237
|
+
project_id=project_id,
|
|
238
|
+
dest_dir=dest_dir,
|
|
239
|
+
semaphore=semaphore,
|
|
240
|
+
dataset_ids=dataset_ids,
|
|
241
|
+
log_progress=log_progress,
|
|
242
|
+
progress_cb=progress_cb,
|
|
243
|
+
**kwargs,
|
|
244
|
+
)
|
|
245
|
+
loop = get_or_create_event_loop()
|
|
246
|
+
if loop.is_running():
|
|
247
|
+
future = asyncio.run_coroutine_threadsafe(download_coro, loop=loop)
|
|
248
|
+
future.result()
|
|
249
|
+
else:
|
|
250
|
+
loop.run_until_complete(download_coro)
|
|
251
|
+
|
|
252
|
+
else:
|
|
253
|
+
project_class.download(
|
|
254
|
+
api=api,
|
|
255
|
+
project_id=project_id,
|
|
256
|
+
dest_dir=dest_dir,
|
|
257
|
+
dataset_ids=dataset_ids,
|
|
258
|
+
log_progress=log_progress,
|
|
259
|
+
progress_cb=progress_cb,
|
|
260
|
+
**kwargs,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _get_cache_dir(project_id: int, dataset_path: str = None) -> str:
|
|
181
265
|
p = os.path.join(apps_cache_dir(), str(project_id))
|
|
182
|
-
if
|
|
183
|
-
p = os.path.join(p,
|
|
266
|
+
if dataset_path is not None:
|
|
267
|
+
p = os.path.join(p, dataset_path)
|
|
184
268
|
return p
|
|
185
269
|
|
|
186
270
|
|
|
187
|
-
def is_cached(project_id,
|
|
188
|
-
return dir_exists(_get_cache_dir(project_id,
|
|
271
|
+
def is_cached(project_id, dataset_path: str = None) -> bool:
|
|
272
|
+
return dir_exists(_get_cache_dir(project_id, dataset_path))
|
|
189
273
|
|
|
190
274
|
|
|
191
|
-
def _split_by_cache(project_id: int,
|
|
275
|
+
def _split_by_cache(project_id: int, dataset_paths: List[str]) -> Tuple[List, List]:
|
|
192
276
|
if not is_cached(project_id):
|
|
193
|
-
return
|
|
194
|
-
to_download = [
|
|
195
|
-
cached = [
|
|
277
|
+
return dataset_paths, []
|
|
278
|
+
to_download = [ds_path for ds_path in dataset_paths if not is_cached(project_id, ds_path)]
|
|
279
|
+
cached = [ds_path for ds_path in dataset_paths if is_cached(project_id, ds_path)]
|
|
196
280
|
return to_download, cached
|
|
197
281
|
|
|
198
282
|
|
|
199
|
-
def get_cache_size(project_id: int,
|
|
200
|
-
if not is_cached(project_id,
|
|
283
|
+
def get_cache_size(project_id: int, dataset_path: str = None) -> int:
|
|
284
|
+
if not is_cached(project_id, dataset_path):
|
|
201
285
|
return 0
|
|
202
|
-
cache_dir = _get_cache_dir(project_id,
|
|
286
|
+
cache_dir = _get_cache_dir(project_id, dataset_path)
|
|
203
287
|
return get_directory_size(cache_dir)
|
|
204
288
|
|
|
205
289
|
|
|
@@ -254,7 +338,7 @@ def _validate_dataset(
|
|
|
254
338
|
project_meta_changed = _project_meta_changed(project_meta, project.meta)
|
|
255
339
|
for dataset in project.datasets:
|
|
256
340
|
dataset: Dataset
|
|
257
|
-
if dataset.name
|
|
341
|
+
if dataset.name.endswith(dataset_info.name): # TODO: fix it later
|
|
258
342
|
diff = set(items_infos_dict.keys()).difference(set(dataset.get_items_names()))
|
|
259
343
|
if diff:
|
|
260
344
|
logger.debug(
|
|
@@ -305,10 +389,13 @@ def _validate(
|
|
|
305
389
|
api: Api, project_info: ProjectInfo, project_meta: ProjectMeta, dataset_infos: List[DatasetInfo]
|
|
306
390
|
):
|
|
307
391
|
project_id = project_info.id
|
|
308
|
-
to_download, cached = _split_by_cache(
|
|
392
|
+
to_download, cached = _split_by_cache(
|
|
393
|
+
project_id, [_get_dataset_path(api, dataset_infos, info.id) for info in dataset_infos]
|
|
394
|
+
)
|
|
309
395
|
to_download, cached = set(to_download), set(cached)
|
|
310
396
|
for dataset_info in dataset_infos:
|
|
311
|
-
|
|
397
|
+
ds_path = _get_dataset_path(api, dataset_infos, dataset_info.id)
|
|
398
|
+
if ds_path in to_download:
|
|
312
399
|
continue
|
|
313
400
|
if not _validate_dataset(
|
|
314
401
|
api,
|
|
@@ -317,10 +404,10 @@ def _validate(
|
|
|
317
404
|
project_meta,
|
|
318
405
|
dataset_info,
|
|
319
406
|
):
|
|
320
|
-
to_download.add(
|
|
321
|
-
cached.remove(
|
|
407
|
+
to_download.add(ds_path)
|
|
408
|
+
cached.remove(ds_path)
|
|
322
409
|
logger.info(
|
|
323
|
-
f"Dataset {
|
|
410
|
+
f"Dataset {ds_path} of project {project_id} is not up to date and will be re-downloaded."
|
|
324
411
|
)
|
|
325
412
|
return list(to_download), list(cached)
|
|
326
413
|
|
|
@@ -337,51 +424,40 @@ def _add_save_items_infos_to_kwargs(kwargs: dict, project_type: str):
|
|
|
337
424
|
return kwargs
|
|
338
425
|
|
|
339
426
|
|
|
427
|
+
def _add_resume_download_to_kwargs(kwargs: dict, project_type: str):
|
|
428
|
+
supported_force_projects = (str(ProjectType.IMAGES),)
|
|
429
|
+
if project_type in supported_force_projects:
|
|
430
|
+
kwargs["resume_download"] = True
|
|
431
|
+
return kwargs
|
|
432
|
+
|
|
433
|
+
|
|
340
434
|
def _download_project_to_cache(
|
|
341
435
|
api: Api,
|
|
342
436
|
project_info: ProjectInfo,
|
|
343
437
|
dataset_infos: List[DatasetInfo],
|
|
344
438
|
log_progress: bool = True,
|
|
345
439
|
progress_cb: Callable = None,
|
|
440
|
+
semaphore: Optional[asyncio.Semaphore] = None,
|
|
346
441
|
**kwargs,
|
|
347
442
|
):
|
|
348
443
|
project_id = project_info.id
|
|
349
444
|
project_type = project_info.type
|
|
350
445
|
kwargs = _add_save_items_infos_to_kwargs(kwargs, project_type)
|
|
446
|
+
kwargs = _add_resume_download_to_kwargs(kwargs, project_type)
|
|
351
447
|
cached_project_dir = _get_cache_dir(project_id)
|
|
352
448
|
if len(dataset_infos) == 0:
|
|
353
449
|
logger.debug("No datasets to download")
|
|
354
450
|
return
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
)
|
|
366
|
-
existing_project = Project(cached_project_dir, OpenMode.READ)
|
|
367
|
-
for dataset in existing_project.datasets:
|
|
368
|
-
dataset: Dataset
|
|
369
|
-
dataset.directory
|
|
370
|
-
if dataset.name in [info.name for info in dataset_infos]:
|
|
371
|
-
continue
|
|
372
|
-
copy_dir_recursively(dataset.directory, os.path.join(temp_pr_dir, dataset.name))
|
|
373
|
-
remove_dir(cached_project_dir)
|
|
374
|
-
shutil.move(temp_pr_dir, cached_project_dir)
|
|
375
|
-
else:
|
|
376
|
-
download(
|
|
377
|
-
api=api,
|
|
378
|
-
project_id=project_id,
|
|
379
|
-
dest_dir=cached_project_dir,
|
|
380
|
-
dataset_ids=[info.id for info in dataset_infos],
|
|
381
|
-
log_progress=log_progress,
|
|
382
|
-
progress_cb=progress_cb,
|
|
383
|
-
**kwargs,
|
|
384
|
-
)
|
|
451
|
+
download_async_or_sync(
|
|
452
|
+
api=api,
|
|
453
|
+
project_id=project_id,
|
|
454
|
+
dest_dir=cached_project_dir,
|
|
455
|
+
dataset_ids=[info.id for info in dataset_infos],
|
|
456
|
+
log_progress=log_progress,
|
|
457
|
+
progress_cb=progress_cb,
|
|
458
|
+
semaphore=semaphore,
|
|
459
|
+
**kwargs,
|
|
460
|
+
)
|
|
385
461
|
|
|
386
462
|
|
|
387
463
|
def download_to_cache(
|
|
@@ -391,6 +467,7 @@ def download_to_cache(
|
|
|
391
467
|
dataset_ids: List[int] = None,
|
|
392
468
|
log_progress: bool = True,
|
|
393
469
|
progress_cb=None,
|
|
470
|
+
semaphore: Optional[asyncio.Semaphore] = None,
|
|
394
471
|
**kwargs,
|
|
395
472
|
) -> Tuple[List, List]:
|
|
396
473
|
"""
|
|
@@ -410,6 +487,7 @@ def download_to_cache(
|
|
|
410
487
|
:type log_progress: bool, optional
|
|
411
488
|
:param progress_cb: Function for tracking download progress. Will be called with number of items downloaded.
|
|
412
489
|
:type progress_cb: tqdm or callable, optional
|
|
490
|
+
:param semaphore: Semaphore for limiting the number of concurrent downloads if using async download.
|
|
413
491
|
|
|
414
492
|
:return: Tuple where the first list contains names of downloaded datasets and the second list contains
|
|
415
493
|
names of cached datasets
|
|
@@ -421,27 +499,53 @@ def download_to_cache(
|
|
|
421
499
|
raise ValueError("dataset_infos and dataset_ids cannot be specified at the same time")
|
|
422
500
|
if dataset_infos is None:
|
|
423
501
|
if dataset_ids is None:
|
|
424
|
-
dataset_infos = api.dataset.get_list(project_id)
|
|
502
|
+
dataset_infos = api.dataset.get_list(project_id, recursive=True)
|
|
425
503
|
else:
|
|
426
504
|
dataset_infos = [api.dataset.get_info_by_id(dataset_id) for dataset_id in dataset_ids]
|
|
427
|
-
|
|
505
|
+
path_to_info = {_get_dataset_path(api, dataset_infos, info.id): info for info in dataset_infos}
|
|
428
506
|
to_download, cached = _validate(api, project_info, project_meta, dataset_infos)
|
|
429
507
|
if progress_cb is not None:
|
|
430
|
-
cached_items_n = sum(
|
|
508
|
+
cached_items_n = sum(path_to_info[ds_path].items_count for ds_path in cached)
|
|
431
509
|
progress_cb(cached_items_n)
|
|
432
510
|
_download_project_to_cache(
|
|
433
511
|
api=api,
|
|
434
512
|
project_info=project_info,
|
|
435
|
-
dataset_infos=[
|
|
513
|
+
dataset_infos=[path_to_info[ds_path] for ds_path in to_download],
|
|
436
514
|
log_progress=log_progress,
|
|
437
515
|
progress_cb=progress_cb,
|
|
516
|
+
semaphore=semaphore,
|
|
438
517
|
**kwargs,
|
|
439
518
|
)
|
|
440
519
|
return to_download, cached
|
|
441
520
|
|
|
442
521
|
|
|
522
|
+
def _get_dataset_parents(api, dataset_infos, dataset_id):
|
|
523
|
+
dataset_infos_dict = {info.id: info for info in dataset_infos}
|
|
524
|
+
this_dataset_info = dataset_infos_dict.get(dataset_id, api.dataset.get_info_by_id(dataset_id))
|
|
525
|
+
if this_dataset_info.parent_id is None:
|
|
526
|
+
return []
|
|
527
|
+
parent = _get_dataset_parents(
|
|
528
|
+
api, list(dataset_infos_dict.values()), this_dataset_info.parent_id
|
|
529
|
+
)
|
|
530
|
+
this_parent_name = dataset_infos_dict.get(
|
|
531
|
+
this_dataset_info.parent_id, api.dataset.get_info_by_id(dataset_id)
|
|
532
|
+
).name
|
|
533
|
+
return [*parent, this_parent_name]
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def _get_dataset_path(api: Api, dataset_infos: List[DatasetInfo], dataset_id: int) -> str:
|
|
537
|
+
parents = _get_dataset_parents(api, dataset_infos, dataset_id)
|
|
538
|
+
dataset_infos_dict = {info.id: info for info in dataset_infos}
|
|
539
|
+
this_dataset_info = dataset_infos_dict.get(dataset_id, api.dataset.get_info_by_id(dataset_id))
|
|
540
|
+
return Dataset._get_dataset_path(this_dataset_info.name, parents)
|
|
541
|
+
|
|
542
|
+
|
|
443
543
|
def copy_from_cache(
|
|
444
|
-
project_id: int,
|
|
544
|
+
project_id: int,
|
|
545
|
+
dest_dir: str,
|
|
546
|
+
dataset_names: List[str] = None,
|
|
547
|
+
progress_cb: Callable = None,
|
|
548
|
+
dataset_paths: List[str] = None,
|
|
445
549
|
):
|
|
446
550
|
"""
|
|
447
551
|
Copy project or dataset from cache to the specified directory.
|
|
@@ -451,31 +555,35 @@ def copy_from_cache(
|
|
|
451
555
|
:type project_id: int
|
|
452
556
|
:param dest_dir: Destination path to local directory.
|
|
453
557
|
:type dest_dir: str
|
|
454
|
-
:param dataset_name:
|
|
558
|
+
:param dataset_name: List of dataset paths to copy. If not specified, the whole project will be copied.
|
|
455
559
|
:type dataset_name: str, optional
|
|
456
560
|
:param progress_cb: Function for tracking copying progress. Will be called with number of bytes copied.
|
|
457
561
|
:type progress_cb: tqdm or callable, optional
|
|
562
|
+
:param dataset_paths: List of dataset paths to copy. If not specified, all datasets will be copied.
|
|
563
|
+
:type dataset_paths: list(str), optional
|
|
458
564
|
|
|
459
565
|
:return: None.
|
|
460
566
|
:rtype: NoneType
|
|
461
567
|
"""
|
|
462
568
|
if not is_cached(project_id):
|
|
463
569
|
raise RuntimeError(f"Project {project_id} is not cached")
|
|
464
|
-
if dataset_names is not None:
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
570
|
+
if dataset_names is not None or dataset_paths is not None:
|
|
571
|
+
if dataset_names is not None:
|
|
572
|
+
dataset_paths = dataset_names
|
|
573
|
+
for dataset_path in dataset_paths:
|
|
574
|
+
if not is_cached(project_id, dataset_path):
|
|
575
|
+
raise RuntimeError(f"Dataset {dataset_path} of project {project_id} is not cached")
|
|
468
576
|
cache_dir = _get_cache_dir(project_id)
|
|
469
|
-
if
|
|
577
|
+
if dataset_paths is None:
|
|
470
578
|
copy_dir_recursively(cache_dir, dest_dir, progress_cb)
|
|
471
579
|
else:
|
|
472
580
|
# copy meta
|
|
473
581
|
copy_file(os.path.join(cache_dir, "meta.json"), os.path.join(dest_dir, "meta.json"))
|
|
474
582
|
# copy datasets
|
|
475
|
-
for
|
|
583
|
+
for dataset_path in dataset_paths:
|
|
476
584
|
copy_dir_recursively(
|
|
477
|
-
os.path.join(cache_dir,
|
|
478
|
-
os.path.join(dest_dir,
|
|
585
|
+
os.path.join(cache_dir, dataset_path),
|
|
586
|
+
os.path.join(dest_dir, dataset_path),
|
|
479
587
|
progress_cb,
|
|
480
588
|
)
|
|
481
589
|
|
|
@@ -487,6 +595,7 @@ def download_using_cache(
|
|
|
487
595
|
dataset_ids: Optional[List[int]] = None,
|
|
488
596
|
log_progress: bool = True,
|
|
489
597
|
progress_cb: Optional[Union[tqdm, Callable]] = None,
|
|
598
|
+
semaphore: Optional[asyncio.Semaphore] = None,
|
|
490
599
|
**kwargs,
|
|
491
600
|
) -> None:
|
|
492
601
|
"""
|
|
@@ -505,6 +614,8 @@ def download_using_cache(
|
|
|
505
614
|
:type log_progress: bool
|
|
506
615
|
:param progress_cb: Function for tracking download progress. Will be called with number of items downloaded.
|
|
507
616
|
:type progress_cb: tqdm or callable, optional
|
|
617
|
+
:param semaphore: Semaphore for limiting the number of concurrent downloads if using async download.
|
|
618
|
+
:type semaphore: asyncio.Semaphore, optional
|
|
508
619
|
|
|
509
620
|
:return: None.
|
|
510
621
|
:rtype: NoneType
|
|
@@ -515,6 +626,7 @@ def download_using_cache(
|
|
|
515
626
|
dataset_ids=dataset_ids,
|
|
516
627
|
log_progress=log_progress,
|
|
517
628
|
progress_cb=progress_cb,
|
|
629
|
+
semaphore=semaphore,
|
|
518
630
|
**kwargs,
|
|
519
631
|
)
|
|
520
632
|
copy_from_cache(project_id, dest_dir, [*downloaded, *cached])
|