supervisely 6.73.226__py3-none-any.whl → 6.73.227__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of supervisely might be problematic. Click here for more details.

@@ -2466,8 +2466,7 @@ class VideoApi(RemoveableBulkModuleApi):
2466
2466
  save_path = os.path.join("/path/to/save/", video_info.name)
2467
2467
 
2468
2468
  semaphore = asyncio.Semaphore(100)
2469
- loop = asyncio.new_event_loop()
2470
- asyncio.set_event_loop(loop)
2469
+ loop = sly.utils.get_or_create_event_loop()
2471
2470
  loop.run_until_complete(
2472
2471
  api.video.download_path_async(video_info.id, save_path, semaphore)
2473
2472
  )
@@ -2555,8 +2554,7 @@ class VideoApi(RemoveableBulkModuleApi):
2555
2554
 
2556
2555
  ids = [770914, 770915]
2557
2556
  paths = ["/path/to/save/video1.mp4", "/path/to/save/video2.mp4"]
2558
- loop = asyncio.new_event_loop()
2559
- asyncio.set_event_loop(loop)
2557
+ loop = sly.utils.get_or_create_event_loop()
2560
2558
  loop.run_until_complete(api.video.download_paths_async(ids, paths))
2561
2559
  """
2562
2560
  if len(ids) == 0:
@@ -1343,8 +1343,7 @@ class VolumeApi(RemoveableBulkModuleApi):
1343
1343
  save_path = os.path.join("/path/to/save/", volume_info.name)
1344
1344
 
1345
1345
  semaphore = asyncio.Semaphore(100)
1346
- loop = asyncio.new_event_loop()
1347
- asyncio.set_event_loop(loop)
1346
+ loop = sly.utils.get_or_create_event_loop()
1348
1347
  loop.run_until_complete(
1349
1348
  api.volume.download_path_async(volume_info.id, save_path, semaphore)
1350
1349
  )
@@ -1433,8 +1432,7 @@ class VolumeApi(RemoveableBulkModuleApi):
1433
1432
 
1434
1433
  ids = [770914, 770915]
1435
1434
  paths = ["/path/to/save/volume1.nrrd", "/path/to/save/volume2.nrrd"]
1436
- loop = asyncio.new_event_loop()
1437
- asyncio.set_event_loop(loop)
1435
+ loop = sly.utils.get_or_create_event_loop()
1438
1436
  loop.run_until_complete(api.volume.download_paths_async(ids, paths))
1439
1437
  """
1440
1438
  if len(ids) == 0:
@@ -6,7 +6,7 @@ from typing import Dict, List, Optional, Tuple, Union
6
6
 
7
7
  from tqdm import tqdm
8
8
 
9
- from supervisely._utils import batched, is_production
9
+ from supervisely._utils import batched, get_or_create_event_loop, is_production
10
10
  from supervisely.annotation.annotation import Annotation
11
11
  from supervisely.annotation.tag_meta import TagValueType
12
12
  from supervisely.api.api import Api
@@ -468,7 +468,7 @@ class BaseConverter:
468
468
  for remote_path in files.values()
469
469
  )
470
470
 
471
- loop = asyncio.get_event_loop()
471
+ loop = get_or_create_event_loop()
472
472
  _, progress_cb = self.get_progress(
473
473
  len(files) if not is_archive_type else file_size,
474
474
  f"Downloading {files_type} from remote storage",
@@ -479,15 +479,19 @@ class BaseConverter:
479
479
  silent_remove(local_path)
480
480
 
481
481
  logger.info(f"Downloading {files_type} from remote storage...")
482
- loop.run_until_complete(
483
- self._api.storage.download_bulk_async(
484
- team_id=self._team_id,
485
- remote_paths=list(files.values()),
486
- local_save_paths=list(files.keys()),
487
- progress_cb=progress_cb,
488
- progress_cb_type="number" if not is_archive_type else "size",
489
- )
482
+ download_coro = self._api.storage.download_bulk_async(
483
+ team_id=self._team_id,
484
+ remote_paths=list(files.values()),
485
+ local_save_paths=list(files.keys()),
486
+ progress_cb=progress_cb,
487
+ progress_cb_type="number" if not is_archive_type else "size",
490
488
  )
489
+
490
+ if loop.is_running():
491
+ future = asyncio.run_coroutine_threadsafe(download_coro, loop=loop)
492
+ future.result()
493
+ else:
494
+ loop.run_until_complete(download_coro)
491
495
  logger.info("Possible annotations downloaded successfully.")
492
496
 
493
497
  if is_archive_type:
supervisely/io/fs.py CHANGED
@@ -1,7 +1,6 @@
1
1
  # coding: utf-8
2
2
 
3
3
  # docs
4
- import asyncio
5
4
  import errno
6
5
  import mimetypes
7
6
  import os
@@ -16,7 +15,7 @@ import requests
16
15
  from requests.structures import CaseInsensitiveDict
17
16
  from tqdm import tqdm
18
17
 
19
- from supervisely._utils import get_bytes_hash, get_string_hash
18
+ from supervisely._utils import get_bytes_hash, get_or_create_event_loop, get_string_hash
20
19
  from supervisely.io.fs_cache import FileCache
21
20
  from supervisely.sly_logger import logger
22
21
  from supervisely.task.progress import Progress
@@ -1375,8 +1374,15 @@ async def copy_file_async(
1375
1374
 
1376
1375
  .. code-block:: python
1377
1376
 
1378
- from supervisely.io.fs import async_copy_file
1379
- await async_copy_file('/home/admin/work/projects/example/1.png', '/home/admin/work/tests/2.png')
1377
+ import supervisely as sly
1378
+
1379
+ loop = sly.utils.get_or_create_event_loop()
1380
+ coro = sly.fs.copy_file_async('/home/admin/work/projects/example/1.png', '/home/admin/work/tests/2.png')
1381
+ if loop.is_running():
1382
+ future = asyncio.run_coroutine_threadsafe(coro, loop)
1383
+ future.result()
1384
+ else:
1385
+ loop.run_until_complete(coro)
1380
1386
  """
1381
1387
  ensure_base_path(dst)
1382
1388
  async with aiofiles.open(dst, "wb") as out_f:
@@ -1404,8 +1410,15 @@ async def get_file_hash_async(path: str) -> str:
1404
1410
 
1405
1411
  .. code-block:: python
1406
1412
 
1407
- from supervisely.io.fs import get_file_hash_async
1408
- hash = await get_file_hash_async('/home/admin/work/projects/examples/1.jpeg') # rKLYA/p/P64dzidaQ/G7itxIz3ZCVnyUhEE9fSMGxU4=
1413
+ import supervisely as sly
1414
+
1415
+ loop = sly.utils.get_or_create_event_loop()
1416
+ coro = sly.fs.get_file_hash_async('/home/admin/work/projects/examples/1.jpeg')
1417
+ if loop.is_running():
1418
+ future = asyncio.run_coroutine_threadsafe(coro, loop)
1419
+ hash = future.result()
1420
+ else:
1421
+ hash = loop.run_until_complete(coro)
1409
1422
  """
1410
1423
  async with aiofiles.open(path, "rb") as file:
1411
1424
  file_bytes = await file.read()
@@ -1442,7 +1455,13 @@ async def unpack_archive_async(
1442
1455
  archive_path = '/home/admin/work/examples.tar'
1443
1456
  target_dir = '/home/admin/work/projects'
1444
1457
 
1445
- await sly.fs.unpack_archive(archive_path, target_dir)
1458
+ loop = sly.utils.get_or_create_event_loop()
1459
+ coro = sly.fs.unpack_archive_async(archive_path, target_dir)
1460
+ if loop.is_running():
1461
+ future = asyncio.run_coroutine_threadsafe(coro, loop)
1462
+ future.result()
1463
+ else:
1464
+ loop.run_until_complete(coro)
1446
1465
  """
1447
1466
  if is_split:
1448
1467
  chunk = chunk_size_mb * 1024 * 1024
@@ -1467,9 +1486,37 @@ async def unpack_archive_async(
1467
1486
  await output_file.write(data)
1468
1487
  archive_path = combined
1469
1488
 
1470
- loop = asyncio.get_running_loop()
1489
+ loop = get_or_create_event_loop()
1471
1490
  await loop.run_in_executor(None, shutil.unpack_archive, archive_path, target_dir)
1472
1491
  if is_split:
1473
1492
  silent_remove(archive_path)
1474
1493
  if remove_junk:
1475
1494
  remove_junk_from_dir(target_dir)
1495
+
1496
+
1497
+ async def touch_async(path: str) -> None:
1498
+ """
1499
+ Sets access and modification times for a file asynchronously.
1500
+
1501
+ :param path: Target file path.
1502
+ :type path: str
1503
+ :returns: None
1504
+ :rtype: :class:`NoneType`
1505
+ :Usage example:
1506
+
1507
+ .. code-block:: python
1508
+
1509
+ import supervisely as sly
1510
+
1511
+ loop = sly.utils.get_or_create_event_loop()
1512
+ coro = sly.fs.touch_async('/home/admin/work/projects/examples/1.jpeg')
1513
+ if loop.is_running():
1514
+ future = asyncio.run_coroutine_threadsafe(coro, loop)
1515
+ future.result()
1516
+ else:
1517
+ loop.run_until_complete(coro)
1518
+ """
1519
+ ensure_base_path(path)
1520
+ async with aiofiles.open(path, "a"):
1521
+ loop = get_or_create_event_loop()
1522
+ await loop.run_in_executor(None, os.utime, path, None)
supervisely/io/json.py CHANGED
@@ -3,6 +3,7 @@ import json
3
3
  import os
4
4
  from typing import Dict, Optional
5
5
 
6
+ import aiofiles
6
7
  import jsonschema
7
8
 
8
9
 
@@ -230,3 +231,34 @@ def validate_json(data: Dict, schema: Dict, raise_error: bool = False) -> bool:
230
231
  if raise_error:
231
232
  raise ValueError("JSON data is invalid. See error message for more details.") from err
232
233
  return False
234
+
235
+
236
+ async def dump_json_file_async(data: Dict, filename: str, indent: Optional[int] = 4) -> None:
237
+ """
238
+ Write given data in json format in file with given name asynchronously.
239
+
240
+ :param data: Data in json format as a dict.
241
+ :type data: dict
242
+ :param filename: Target file path to write data.
243
+ :type filename: str
244
+ :param indent: Json array elements and object members will be pretty-printed with that indent level.
245
+ :type indent: int, optional
246
+ :returns: None
247
+ :rtype: :class:`NoneType`
248
+ :Usage example:
249
+
250
+ .. code-block:: python
251
+
252
+ import supervisely as sly
253
+
254
+ data = {1: 'example'}
255
+ loop = sly.utils.get_or_create_event_loop()
256
+ coro = sly.json.dump_json_file_async(data, '/home/admin/work/projects/examples/1.json')
257
+ if loop.is_running():
258
+ future = asyncio.run_coroutine_threadsafe(coro, loop)
259
+ future.result()
260
+ else:
261
+ loop.run_until_complete(coro)
262
+ """
263
+ async with aiofiles.open(filename, "w") as fout:
264
+ await fout.write(json.dumps(data, indent=indent))
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import os
2
3
  import shutil
3
4
  from typing import Callable, List, Optional, Tuple, Union
@@ -5,7 +6,7 @@ from typing import Callable, List, Optional, Tuple, Union
5
6
  from tqdm import tqdm
6
7
 
7
8
  from supervisely import get_project_class
8
- from supervisely._utils import rand_str
9
+ from supervisely._utils import get_or_create_event_loop, rand_str
9
10
  from supervisely.annotation.annotation import Annotation, ProjectMeta
10
11
  from supervisely.api.api import Api
11
12
  from supervisely.api.dataset_api import DatasetInfo
@@ -19,7 +20,7 @@ from supervisely.io.fs import (
19
20
  get_directory_size,
20
21
  remove_dir,
21
22
  )
22
- from supervisely.io.json import load_json_file
23
+ from supervisely.io.json import dump_json_file, load_json_file
23
24
  from supervisely.project import Project
24
25
  from supervisely.project.project import Dataset, OpenMode, ProjectType
25
26
  from supervisely.sly_logger import logger
@@ -177,29 +178,112 @@ def download(
177
178
  )
178
179
 
179
180
 
180
- def _get_cache_dir(project_id: int, dataset_name: str = None) -> str:
181
+ def download_async(
182
+ api: Api,
183
+ project_id: int,
184
+ dest_dir: str,
185
+ semaphore: Optional[asyncio.Semaphore] = None,
186
+ dataset_ids: Optional[List[int]] = None,
187
+ log_progress: bool = True,
188
+ progress_cb: Optional[Union[tqdm, Callable]] = None,
189
+ **kwargs,
190
+ ) -> None:
191
+ project_info = api.project.get_info_by_id(project_id)
192
+
193
+ if progress_cb is not None:
194
+ log_progress = False
195
+
196
+ project_class = get_project_class(project_info.type)
197
+ if hasattr(project_class, "download_async"):
198
+ download_coro = project_class.download_async(
199
+ api=api,
200
+ project_id=project_id,
201
+ dest_dir=dest_dir,
202
+ semaphore=semaphore,
203
+ dataset_ids=dataset_ids,
204
+ log_progress=log_progress,
205
+ progress_cb=progress_cb,
206
+ **kwargs,
207
+ )
208
+ loop = get_or_create_event_loop()
209
+ if loop.is_running():
210
+ future = asyncio.run_coroutine_threadsafe(download_coro, loop=loop)
211
+ future.result()
212
+ else:
213
+ loop.run_until_complete(download_coro)
214
+ else:
215
+ raise NotImplementedError(f"Method download_async is not implemented for {project_class}")
216
+
217
+
218
+ def download_async_or_sync(
219
+ api: Api,
220
+ project_id: int,
221
+ dest_dir: str,
222
+ dataset_ids: Optional[List[int]] = None,
223
+ log_progress: bool = True,
224
+ progress_cb: Optional[Union[tqdm, Callable]] = None,
225
+ semaphore: Optional[asyncio.Semaphore] = None,
226
+ **kwargs,
227
+ ):
228
+ project_info = api.project.get_info_by_id(project_id)
229
+
230
+ if progress_cb is not None:
231
+ log_progress = False
232
+
233
+ project_class = get_project_class(project_info.type)
234
+ if hasattr(project_class, "download_async"):
235
+ download_coro = project_class.download_async(
236
+ api=api,
237
+ project_id=project_id,
238
+ dest_dir=dest_dir,
239
+ semaphore=semaphore,
240
+ dataset_ids=dataset_ids,
241
+ log_progress=log_progress,
242
+ progress_cb=progress_cb,
243
+ **kwargs,
244
+ )
245
+ loop = get_or_create_event_loop()
246
+ if loop.is_running():
247
+ future = asyncio.run_coroutine_threadsafe(download_coro, loop=loop)
248
+ future.result()
249
+ else:
250
+ loop.run_until_complete(download_coro)
251
+
252
+ else:
253
+ project_class.download(
254
+ api=api,
255
+ project_id=project_id,
256
+ dest_dir=dest_dir,
257
+ dataset_ids=dataset_ids,
258
+ log_progress=log_progress,
259
+ progress_cb=progress_cb,
260
+ **kwargs,
261
+ )
262
+
263
+
264
+ def _get_cache_dir(project_id: int, dataset_path: str = None) -> str:
181
265
  p = os.path.join(apps_cache_dir(), str(project_id))
182
- if dataset_name is not None:
183
- p = os.path.join(p, dataset_name)
266
+ if dataset_path is not None:
267
+ p = os.path.join(p, dataset_path)
184
268
  return p
185
269
 
186
270
 
187
- def is_cached(project_id, dataset_name: str = None) -> bool:
188
- return dir_exists(_get_cache_dir(project_id, dataset_name))
271
+ def is_cached(project_id, dataset_path: str = None) -> bool:
272
+ return dir_exists(_get_cache_dir(project_id, dataset_path))
189
273
 
190
274
 
191
- def _split_by_cache(project_id: int, dataset_names: List[str]) -> Tuple[List, List]:
275
+ def _split_by_cache(project_id: int, dataset_paths: List[str]) -> Tuple[List, List]:
192
276
  if not is_cached(project_id):
193
- return dataset_names, []
194
- to_download = [ds_name for ds_name in dataset_names if not is_cached(project_id, ds_name)]
195
- cached = [ds_name for ds_name in dataset_names if is_cached(project_id, ds_name)]
277
+ return dataset_paths, []
278
+ to_download = [ds_path for ds_path in dataset_paths if not is_cached(project_id, ds_path)]
279
+ cached = [ds_path for ds_path in dataset_paths if is_cached(project_id, ds_path)]
196
280
  return to_download, cached
197
281
 
198
282
 
199
- def get_cache_size(project_id: int, dataset_name: str = None) -> int:
200
- if not is_cached(project_id, dataset_name):
283
+ def get_cache_size(project_id: int, dataset_path: str = None) -> int:
284
+ if not is_cached(project_id, dataset_path):
201
285
  return 0
202
- cache_dir = _get_cache_dir(project_id, dataset_name)
286
+ cache_dir = _get_cache_dir(project_id, dataset_path)
203
287
  return get_directory_size(cache_dir)
204
288
 
205
289
 
@@ -254,7 +338,7 @@ def _validate_dataset(
254
338
  project_meta_changed = _project_meta_changed(project_meta, project.meta)
255
339
  for dataset in project.datasets:
256
340
  dataset: Dataset
257
- if dataset.name == dataset_info.name:
341
+ if dataset.name.endswith(dataset_info.name): # TODO: fix it later
258
342
  diff = set(items_infos_dict.keys()).difference(set(dataset.get_items_names()))
259
343
  if diff:
260
344
  logger.debug(
@@ -305,10 +389,13 @@ def _validate(
305
389
  api: Api, project_info: ProjectInfo, project_meta: ProjectMeta, dataset_infos: List[DatasetInfo]
306
390
  ):
307
391
  project_id = project_info.id
308
- to_download, cached = _split_by_cache(project_id, [info.name for info in dataset_infos])
392
+ to_download, cached = _split_by_cache(
393
+ project_id, [_get_dataset_path(api, dataset_infos, info.id) for info in dataset_infos]
394
+ )
309
395
  to_download, cached = set(to_download), set(cached)
310
396
  for dataset_info in dataset_infos:
311
- if dataset_info.name in to_download:
397
+ ds_path = _get_dataset_path(api, dataset_infos, dataset_info.id)
398
+ if ds_path in to_download:
312
399
  continue
313
400
  if not _validate_dataset(
314
401
  api,
@@ -317,10 +404,10 @@ def _validate(
317
404
  project_meta,
318
405
  dataset_info,
319
406
  ):
320
- to_download.add(dataset_info.name)
321
- cached.remove(dataset_info.name)
407
+ to_download.add(ds_path)
408
+ cached.remove(ds_path)
322
409
  logger.info(
323
- f"Dataset {dataset_info.name} of project {project_id} is not up to date and will be re-downloaded."
410
+ f"Dataset {ds_path} of project {project_id} is not up to date and will be re-downloaded."
324
411
  )
325
412
  return list(to_download), list(cached)
326
413
 
@@ -337,51 +424,40 @@ def _add_save_items_infos_to_kwargs(kwargs: dict, project_type: str):
337
424
  return kwargs
338
425
 
339
426
 
427
+ def _add_resume_download_to_kwargs(kwargs: dict, project_type: str):
428
+ supported_force_projects = (str(ProjectType.IMAGES),)
429
+ if project_type in supported_force_projects:
430
+ kwargs["resume_download"] = True
431
+ return kwargs
432
+
433
+
340
434
  def _download_project_to_cache(
341
435
  api: Api,
342
436
  project_info: ProjectInfo,
343
437
  dataset_infos: List[DatasetInfo],
344
438
  log_progress: bool = True,
345
439
  progress_cb: Callable = None,
440
+ semaphore: Optional[asyncio.Semaphore] = None,
346
441
  **kwargs,
347
442
  ):
348
443
  project_id = project_info.id
349
444
  project_type = project_info.type
350
445
  kwargs = _add_save_items_infos_to_kwargs(kwargs, project_type)
446
+ kwargs = _add_resume_download_to_kwargs(kwargs, project_type)
351
447
  cached_project_dir = _get_cache_dir(project_id)
352
448
  if len(dataset_infos) == 0:
353
449
  logger.debug("No datasets to download")
354
450
  return
355
- elif is_cached(project_id):
356
- temp_pr_dir = os.path.join(apps_cache_dir(), rand_str(10))
357
- download(
358
- api=api,
359
- project_id=project_id,
360
- dest_dir=temp_pr_dir,
361
- dataset_ids=[info.id for info in dataset_infos],
362
- log_progress=log_progress,
363
- progress_cb=progress_cb,
364
- **kwargs,
365
- )
366
- existing_project = Project(cached_project_dir, OpenMode.READ)
367
- for dataset in existing_project.datasets:
368
- dataset: Dataset
369
- dataset.directory
370
- if dataset.name in [info.name for info in dataset_infos]:
371
- continue
372
- copy_dir_recursively(dataset.directory, os.path.join(temp_pr_dir, dataset.name))
373
- remove_dir(cached_project_dir)
374
- shutil.move(temp_pr_dir, cached_project_dir)
375
- else:
376
- download(
377
- api=api,
378
- project_id=project_id,
379
- dest_dir=cached_project_dir,
380
- dataset_ids=[info.id for info in dataset_infos],
381
- log_progress=log_progress,
382
- progress_cb=progress_cb,
383
- **kwargs,
384
- )
451
+ download_async_or_sync(
452
+ api=api,
453
+ project_id=project_id,
454
+ dest_dir=cached_project_dir,
455
+ dataset_ids=[info.id for info in dataset_infos],
456
+ log_progress=log_progress,
457
+ progress_cb=progress_cb,
458
+ semaphore=semaphore,
459
+ **kwargs,
460
+ )
385
461
 
386
462
 
387
463
  def download_to_cache(
@@ -391,6 +467,7 @@ def download_to_cache(
391
467
  dataset_ids: List[int] = None,
392
468
  log_progress: bool = True,
393
469
  progress_cb=None,
470
+ semaphore: Optional[asyncio.Semaphore] = None,
394
471
  **kwargs,
395
472
  ) -> Tuple[List, List]:
396
473
  """
@@ -410,6 +487,7 @@ def download_to_cache(
410
487
  :type log_progress: bool, optional
411
488
  :param progress_cb: Function for tracking download progress. Will be called with number of items downloaded.
412
489
  :type progress_cb: tqdm or callable, optional
490
+ :param semaphore: Semaphore for limiting the number of concurrent downloads if using async download.
413
491
 
414
492
  :return: Tuple where the first list contains names of downloaded datasets and the second list contains
415
493
  names of cached datasets
@@ -421,27 +499,53 @@ def download_to_cache(
421
499
  raise ValueError("dataset_infos and dataset_ids cannot be specified at the same time")
422
500
  if dataset_infos is None:
423
501
  if dataset_ids is None:
424
- dataset_infos = api.dataset.get_list(project_id)
502
+ dataset_infos = api.dataset.get_list(project_id, recursive=True)
425
503
  else:
426
504
  dataset_infos = [api.dataset.get_info_by_id(dataset_id) for dataset_id in dataset_ids]
427
- name_to_info = {info.name: info for info in dataset_infos}
505
+ path_to_info = {_get_dataset_path(api, dataset_infos, info.id): info for info in dataset_infos}
428
506
  to_download, cached = _validate(api, project_info, project_meta, dataset_infos)
429
507
  if progress_cb is not None:
430
- cached_items_n = sum(name_to_info[ds_name].items_count for ds_name in cached)
508
+ cached_items_n = sum(path_to_info[ds_path].items_count for ds_path in cached)
431
509
  progress_cb(cached_items_n)
432
510
  _download_project_to_cache(
433
511
  api=api,
434
512
  project_info=project_info,
435
- dataset_infos=[name_to_info[name] for name in to_download],
513
+ dataset_infos=[path_to_info[ds_path] for ds_path in to_download],
436
514
  log_progress=log_progress,
437
515
  progress_cb=progress_cb,
516
+ semaphore=semaphore,
438
517
  **kwargs,
439
518
  )
440
519
  return to_download, cached
441
520
 
442
521
 
522
+ def _get_dataset_parents(api, dataset_infos, dataset_id):
523
+ dataset_infos_dict = {info.id: info for info in dataset_infos}
524
+ this_dataset_info = dataset_infos_dict.get(dataset_id, api.dataset.get_info_by_id(dataset_id))
525
+ if this_dataset_info.parent_id is None:
526
+ return []
527
+ parent = _get_dataset_parents(
528
+ api, list(dataset_infos_dict.values()), this_dataset_info.parent_id
529
+ )
530
+ this_parent_name = dataset_infos_dict.get(
531
+ this_dataset_info.parent_id, api.dataset.get_info_by_id(dataset_id)
532
+ ).name
533
+ return [*parent, this_parent_name]
534
+
535
+
536
+ def _get_dataset_path(api: Api, dataset_infos: List[DatasetInfo], dataset_id: int) -> str:
537
+ parents = _get_dataset_parents(api, dataset_infos, dataset_id)
538
+ dataset_infos_dict = {info.id: info for info in dataset_infos}
539
+ this_dataset_info = dataset_infos_dict.get(dataset_id, api.dataset.get_info_by_id(dataset_id))
540
+ return Dataset._get_dataset_path(this_dataset_info.name, parents)
541
+
542
+
443
543
  def copy_from_cache(
444
- project_id: int, dest_dir: str, dataset_names: List[str] = None, progress_cb: Callable = None
544
+ project_id: int,
545
+ dest_dir: str,
546
+ dataset_names: List[str] = None,
547
+ progress_cb: Callable = None,
548
+ dataset_paths: List[str] = None,
445
549
  ):
446
550
  """
447
551
  Copy project or dataset from cache to the specified directory.
@@ -451,31 +555,35 @@ def copy_from_cache(
451
555
  :type project_id: int
452
556
  :param dest_dir: Destination path to local directory.
453
557
  :type dest_dir: str
454
- :param dataset_name: Name of the dataset to copy. If not specified, the whole project will be copied.
558
+ :param dataset_name: List of dataset paths to copy. If not specified, the whole project will be copied.
455
559
  :type dataset_name: str, optional
456
560
  :param progress_cb: Function for tracking copying progress. Will be called with number of bytes copied.
457
561
  :type progress_cb: tqdm or callable, optional
562
+ :param dataset_paths: List of dataset paths to copy. If not specified, all datasets will be copied.
563
+ :type dataset_paths: list(str), optional
458
564
 
459
565
  :return: None.
460
566
  :rtype: NoneType
461
567
  """
462
568
  if not is_cached(project_id):
463
569
  raise RuntimeError(f"Project {project_id} is not cached")
464
- if dataset_names is not None:
465
- for dataset_name in dataset_names:
466
- if not is_cached(project_id, dataset_name):
467
- raise RuntimeError(f"Dataset {dataset_name} of project {project_id} is not cached")
570
+ if dataset_names is not None or dataset_paths is not None:
571
+ if dataset_names is not None:
572
+ dataset_paths = dataset_names
573
+ for dataset_path in dataset_paths:
574
+ if not is_cached(project_id, dataset_path):
575
+ raise RuntimeError(f"Dataset {dataset_path} of project {project_id} is not cached")
468
576
  cache_dir = _get_cache_dir(project_id)
469
- if dataset_names is None:
577
+ if dataset_paths is None:
470
578
  copy_dir_recursively(cache_dir, dest_dir, progress_cb)
471
579
  else:
472
580
  # copy meta
473
581
  copy_file(os.path.join(cache_dir, "meta.json"), os.path.join(dest_dir, "meta.json"))
474
582
  # copy datasets
475
- for dataset_name in dataset_names:
583
+ for dataset_path in dataset_paths:
476
584
  copy_dir_recursively(
477
- os.path.join(cache_dir, dataset_name),
478
- os.path.join(dest_dir, dataset_name),
585
+ os.path.join(cache_dir, dataset_path),
586
+ os.path.join(dest_dir, dataset_path),
479
587
  progress_cb,
480
588
  )
481
589
 
@@ -487,6 +595,7 @@ def download_using_cache(
487
595
  dataset_ids: Optional[List[int]] = None,
488
596
  log_progress: bool = True,
489
597
  progress_cb: Optional[Union[tqdm, Callable]] = None,
598
+ semaphore: Optional[asyncio.Semaphore] = None,
490
599
  **kwargs,
491
600
  ) -> None:
492
601
  """
@@ -505,6 +614,8 @@ def download_using_cache(
505
614
  :type log_progress: bool
506
615
  :param progress_cb: Function for tracking download progress. Will be called with number of items downloaded.
507
616
  :type progress_cb: tqdm or callable, optional
617
+ :param semaphore: Semaphore for limiting the number of concurrent downloads if using async download.
618
+ :type semaphore: asyncio.Semaphore, optional
508
619
 
509
620
  :return: None.
510
621
  :rtype: NoneType
@@ -515,6 +626,7 @@ def download_using_cache(
515
626
  dataset_ids=dataset_ids,
516
627
  log_progress=log_progress,
517
628
  progress_cb=progress_cb,
629
+ semaphore=semaphore,
518
630
  **kwargs,
519
631
  )
520
632
  copy_from_cache(project_id, dest_dir, [*downloaded, *cached])