supervisely 6.73.226__py3-none-any.whl → 6.73.227__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of supervisely might be problematic. Click here for more details.
- supervisely/__init__.py +1 -1
- supervisely/_utils.py +23 -0
- supervisely/api/annotation_api.py +184 -14
- supervisely/api/api.py +2 -2
- supervisely/api/entity_annotation/figure_api.py +11 -2
- supervisely/api/file_api.py +144 -8
- supervisely/api/image_api.py +5 -10
- supervisely/api/pointcloud/pointcloud_api.py +4 -8
- supervisely/api/video/video_annotation_api.py +45 -0
- supervisely/api/video/video_api.py +2 -4
- supervisely/api/volume/volume_api.py +2 -4
- supervisely/convert/base_converter.py +14 -10
- supervisely/io/fs.py +55 -8
- supervisely/io/json.py +32 -0
- supervisely/project/download.py +176 -64
- supervisely/project/project.py +676 -35
- supervisely/project/video_project.py +293 -3
- {supervisely-6.73.226.dist-info → supervisely-6.73.227.dist-info}/METADATA +1 -1
- {supervisely-6.73.226.dist-info → supervisely-6.73.227.dist-info}/RECORD +23 -23
- {supervisely-6.73.226.dist-info → supervisely-6.73.227.dist-info}/LICENSE +0 -0
- {supervisely-6.73.226.dist-info → supervisely-6.73.227.dist-info}/WHEEL +0 -0
- {supervisely-6.73.226.dist-info → supervisely-6.73.227.dist-info}/entry_points.txt +0 -0
- {supervisely-6.73.226.dist-info → supervisely-6.73.227.dist-info}/top_level.txt +0 -0
|
@@ -2466,8 +2466,7 @@ class VideoApi(RemoveableBulkModuleApi):
|
|
|
2466
2466
|
save_path = os.path.join("/path/to/save/", video_info.name)
|
|
2467
2467
|
|
|
2468
2468
|
semaphore = asyncio.Semaphore(100)
|
|
2469
|
-
loop =
|
|
2470
|
-
asyncio.set_event_loop(loop)
|
|
2469
|
+
loop = sly.utils.get_or_create_event_loop()
|
|
2471
2470
|
loop.run_until_complete(
|
|
2472
2471
|
api.video.download_path_async(video_info.id, save_path, semaphore)
|
|
2473
2472
|
)
|
|
@@ -2555,8 +2554,7 @@ class VideoApi(RemoveableBulkModuleApi):
|
|
|
2555
2554
|
|
|
2556
2555
|
ids = [770914, 770915]
|
|
2557
2556
|
paths = ["/path/to/save/video1.mp4", "/path/to/save/video2.mp4"]
|
|
2558
|
-
loop =
|
|
2559
|
-
asyncio.set_event_loop(loop)
|
|
2557
|
+
loop = sly.utils.get_or_create_event_loop()
|
|
2560
2558
|
loop.run_until_complete(api.video.download_paths_async(ids, paths))
|
|
2561
2559
|
"""
|
|
2562
2560
|
if len(ids) == 0:
|
|
@@ -1343,8 +1343,7 @@ class VolumeApi(RemoveableBulkModuleApi):
|
|
|
1343
1343
|
save_path = os.path.join("/path/to/save/", volume_info.name)
|
|
1344
1344
|
|
|
1345
1345
|
semaphore = asyncio.Semaphore(100)
|
|
1346
|
-
loop =
|
|
1347
|
-
asyncio.set_event_loop(loop)
|
|
1346
|
+
loop = sly.utils.get_or_create_event_loop()
|
|
1348
1347
|
loop.run_until_complete(
|
|
1349
1348
|
api.volume.download_path_async(volume_info.id, save_path, semaphore)
|
|
1350
1349
|
)
|
|
@@ -1433,8 +1432,7 @@ class VolumeApi(RemoveableBulkModuleApi):
|
|
|
1433
1432
|
|
|
1434
1433
|
ids = [770914, 770915]
|
|
1435
1434
|
paths = ["/path/to/save/volume1.nrrd", "/path/to/save/volume2.nrrd"]
|
|
1436
|
-
loop =
|
|
1437
|
-
asyncio.set_event_loop(loop)
|
|
1435
|
+
loop = sly.utils.get_or_create_event_loop()
|
|
1438
1436
|
loop.run_until_complete(api.volume.download_paths_async(ids, paths))
|
|
1439
1437
|
"""
|
|
1440
1438
|
if len(ids) == 0:
|
|
@@ -6,7 +6,7 @@ from typing import Dict, List, Optional, Tuple, Union
|
|
|
6
6
|
|
|
7
7
|
from tqdm import tqdm
|
|
8
8
|
|
|
9
|
-
from supervisely._utils import batched, is_production
|
|
9
|
+
from supervisely._utils import batched, get_or_create_event_loop, is_production
|
|
10
10
|
from supervisely.annotation.annotation import Annotation
|
|
11
11
|
from supervisely.annotation.tag_meta import TagValueType
|
|
12
12
|
from supervisely.api.api import Api
|
|
@@ -468,7 +468,7 @@ class BaseConverter:
|
|
|
468
468
|
for remote_path in files.values()
|
|
469
469
|
)
|
|
470
470
|
|
|
471
|
-
loop =
|
|
471
|
+
loop = get_or_create_event_loop()
|
|
472
472
|
_, progress_cb = self.get_progress(
|
|
473
473
|
len(files) if not is_archive_type else file_size,
|
|
474
474
|
f"Downloading {files_type} from remote storage",
|
|
@@ -479,15 +479,19 @@ class BaseConverter:
|
|
|
479
479
|
silent_remove(local_path)
|
|
480
480
|
|
|
481
481
|
logger.info(f"Downloading {files_type} from remote storage...")
|
|
482
|
-
|
|
483
|
-
self.
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
progress_cb_type="number" if not is_archive_type else "size",
|
|
489
|
-
)
|
|
482
|
+
download_coro = self._api.storage.download_bulk_async(
|
|
483
|
+
team_id=self._team_id,
|
|
484
|
+
remote_paths=list(files.values()),
|
|
485
|
+
local_save_paths=list(files.keys()),
|
|
486
|
+
progress_cb=progress_cb,
|
|
487
|
+
progress_cb_type="number" if not is_archive_type else "size",
|
|
490
488
|
)
|
|
489
|
+
|
|
490
|
+
if loop.is_running():
|
|
491
|
+
future = asyncio.run_coroutine_threadsafe(download_coro, loop=loop)
|
|
492
|
+
future.result()
|
|
493
|
+
else:
|
|
494
|
+
loop.run_until_complete(download_coro)
|
|
491
495
|
logger.info("Possible annotations downloaded successfully.")
|
|
492
496
|
|
|
493
497
|
if is_archive_type:
|
supervisely/io/fs.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# coding: utf-8
|
|
2
2
|
|
|
3
3
|
# docs
|
|
4
|
-
import asyncio
|
|
5
4
|
import errno
|
|
6
5
|
import mimetypes
|
|
7
6
|
import os
|
|
@@ -16,7 +15,7 @@ import requests
|
|
|
16
15
|
from requests.structures import CaseInsensitiveDict
|
|
17
16
|
from tqdm import tqdm
|
|
18
17
|
|
|
19
|
-
from supervisely._utils import get_bytes_hash, get_string_hash
|
|
18
|
+
from supervisely._utils import get_bytes_hash, get_or_create_event_loop, get_string_hash
|
|
20
19
|
from supervisely.io.fs_cache import FileCache
|
|
21
20
|
from supervisely.sly_logger import logger
|
|
22
21
|
from supervisely.task.progress import Progress
|
|
@@ -1375,8 +1374,15 @@ async def copy_file_async(
|
|
|
1375
1374
|
|
|
1376
1375
|
.. code-block:: python
|
|
1377
1376
|
|
|
1378
|
-
|
|
1379
|
-
|
|
1377
|
+
import supervisely as sly
|
|
1378
|
+
|
|
1379
|
+
loop = sly.utils.get_or_create_event_loop()
|
|
1380
|
+
coro = sly.fs.copy_file_async('/home/admin/work/projects/example/1.png', '/home/admin/work/tests/2.png')
|
|
1381
|
+
if loop.is_running():
|
|
1382
|
+
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
|
1383
|
+
future.result()
|
|
1384
|
+
else:
|
|
1385
|
+
loop.run_until_complete(coro)
|
|
1380
1386
|
"""
|
|
1381
1387
|
ensure_base_path(dst)
|
|
1382
1388
|
async with aiofiles.open(dst, "wb") as out_f:
|
|
@@ -1404,8 +1410,15 @@ async def get_file_hash_async(path: str) -> str:
|
|
|
1404
1410
|
|
|
1405
1411
|
.. code-block:: python
|
|
1406
1412
|
|
|
1407
|
-
|
|
1408
|
-
|
|
1413
|
+
import supervisely as sly
|
|
1414
|
+
|
|
1415
|
+
loop = sly.utils.get_or_create_event_loop()
|
|
1416
|
+
coro = sly.fs.get_file_hash_async('/home/admin/work/projects/examples/1.jpeg')
|
|
1417
|
+
if loop.is_running():
|
|
1418
|
+
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
|
1419
|
+
hash = future.result()
|
|
1420
|
+
else:
|
|
1421
|
+
hash = loop.run_until_complete(coro)
|
|
1409
1422
|
"""
|
|
1410
1423
|
async with aiofiles.open(path, "rb") as file:
|
|
1411
1424
|
file_bytes = await file.read()
|
|
@@ -1442,7 +1455,13 @@ async def unpack_archive_async(
|
|
|
1442
1455
|
archive_path = '/home/admin/work/examples.tar'
|
|
1443
1456
|
target_dir = '/home/admin/work/projects'
|
|
1444
1457
|
|
|
1445
|
-
|
|
1458
|
+
loop = sly.utils.get_or_create_event_loop()
|
|
1459
|
+
coro = sly.fs.unpack_archive_async(archive_path, target_dir)
|
|
1460
|
+
if loop.is_running():
|
|
1461
|
+
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
|
1462
|
+
future.result()
|
|
1463
|
+
else:
|
|
1464
|
+
loop.run_until_complete(coro)
|
|
1446
1465
|
"""
|
|
1447
1466
|
if is_split:
|
|
1448
1467
|
chunk = chunk_size_mb * 1024 * 1024
|
|
@@ -1467,9 +1486,37 @@ async def unpack_archive_async(
|
|
|
1467
1486
|
await output_file.write(data)
|
|
1468
1487
|
archive_path = combined
|
|
1469
1488
|
|
|
1470
|
-
loop =
|
|
1489
|
+
loop = get_or_create_event_loop()
|
|
1471
1490
|
await loop.run_in_executor(None, shutil.unpack_archive, archive_path, target_dir)
|
|
1472
1491
|
if is_split:
|
|
1473
1492
|
silent_remove(archive_path)
|
|
1474
1493
|
if remove_junk:
|
|
1475
1494
|
remove_junk_from_dir(target_dir)
|
|
1495
|
+
|
|
1496
|
+
|
|
1497
|
+
async def touch_async(path: str) -> None:
|
|
1498
|
+
"""
|
|
1499
|
+
Sets access and modification times for a file asynchronously.
|
|
1500
|
+
|
|
1501
|
+
:param path: Target file path.
|
|
1502
|
+
:type path: str
|
|
1503
|
+
:returns: None
|
|
1504
|
+
:rtype: :class:`NoneType`
|
|
1505
|
+
:Usage example:
|
|
1506
|
+
|
|
1507
|
+
.. code-block:: python
|
|
1508
|
+
|
|
1509
|
+
import supervisely as sly
|
|
1510
|
+
|
|
1511
|
+
loop = sly.utils.get_or_create_event_loop()
|
|
1512
|
+
coro = sly.fs.touch_async('/home/admin/work/projects/examples/1.jpeg')
|
|
1513
|
+
if loop.is_running():
|
|
1514
|
+
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
|
1515
|
+
future.result()
|
|
1516
|
+
else:
|
|
1517
|
+
loop.run_until_complete(coro)
|
|
1518
|
+
"""
|
|
1519
|
+
ensure_base_path(path)
|
|
1520
|
+
async with aiofiles.open(path, "a"):
|
|
1521
|
+
loop = get_or_create_event_loop()
|
|
1522
|
+
await loop.run_in_executor(None, os.utime, path, None)
|
supervisely/io/json.py
CHANGED
|
@@ -3,6 +3,7 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
from typing import Dict, Optional
|
|
5
5
|
|
|
6
|
+
import aiofiles
|
|
6
7
|
import jsonschema
|
|
7
8
|
|
|
8
9
|
|
|
@@ -230,3 +231,34 @@ def validate_json(data: Dict, schema: Dict, raise_error: bool = False) -> bool:
|
|
|
230
231
|
if raise_error:
|
|
231
232
|
raise ValueError("JSON data is invalid. See error message for more details.") from err
|
|
232
233
|
return False
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
async def dump_json_file_async(data: Dict, filename: str, indent: Optional[int] = 4) -> None:
|
|
237
|
+
"""
|
|
238
|
+
Write given data in json format in file with given name asynchronously.
|
|
239
|
+
|
|
240
|
+
:param data: Data in json format as a dict.
|
|
241
|
+
:type data: dict
|
|
242
|
+
:param filename: Target file path to write data.
|
|
243
|
+
:type filename: str
|
|
244
|
+
:param indent: Json array elements and object members will be pretty-printed with that indent level.
|
|
245
|
+
:type indent: int, optional
|
|
246
|
+
:returns: None
|
|
247
|
+
:rtype: :class:`NoneType`
|
|
248
|
+
:Usage example:
|
|
249
|
+
|
|
250
|
+
.. code-block:: python
|
|
251
|
+
|
|
252
|
+
import supervisely as sly
|
|
253
|
+
|
|
254
|
+
data = {1: 'example'}
|
|
255
|
+
loop = sly.utils.get_or_create_event_loop()
|
|
256
|
+
coro = sly.json.dump_json_file_async(data, '/home/admin/work/projects/examples/1.json')
|
|
257
|
+
if loop.is_running():
|
|
258
|
+
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
|
259
|
+
future.result()
|
|
260
|
+
else:
|
|
261
|
+
loop.run_until_complete(coro)
|
|
262
|
+
"""
|
|
263
|
+
async with aiofiles.open(filename, "w") as fout:
|
|
264
|
+
await fout.write(json.dumps(data, indent=indent))
|
supervisely/project/download.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import os
|
|
2
3
|
import shutil
|
|
3
4
|
from typing import Callable, List, Optional, Tuple, Union
|
|
@@ -5,7 +6,7 @@ from typing import Callable, List, Optional, Tuple, Union
|
|
|
5
6
|
from tqdm import tqdm
|
|
6
7
|
|
|
7
8
|
from supervisely import get_project_class
|
|
8
|
-
from supervisely._utils import rand_str
|
|
9
|
+
from supervisely._utils import get_or_create_event_loop, rand_str
|
|
9
10
|
from supervisely.annotation.annotation import Annotation, ProjectMeta
|
|
10
11
|
from supervisely.api.api import Api
|
|
11
12
|
from supervisely.api.dataset_api import DatasetInfo
|
|
@@ -19,7 +20,7 @@ from supervisely.io.fs import (
|
|
|
19
20
|
get_directory_size,
|
|
20
21
|
remove_dir,
|
|
21
22
|
)
|
|
22
|
-
from supervisely.io.json import load_json_file
|
|
23
|
+
from supervisely.io.json import dump_json_file, load_json_file
|
|
23
24
|
from supervisely.project import Project
|
|
24
25
|
from supervisely.project.project import Dataset, OpenMode, ProjectType
|
|
25
26
|
from supervisely.sly_logger import logger
|
|
@@ -177,29 +178,112 @@ def download(
|
|
|
177
178
|
)
|
|
178
179
|
|
|
179
180
|
|
|
180
|
-
def
|
|
181
|
+
def download_async(
|
|
182
|
+
api: Api,
|
|
183
|
+
project_id: int,
|
|
184
|
+
dest_dir: str,
|
|
185
|
+
semaphore: Optional[asyncio.Semaphore] = None,
|
|
186
|
+
dataset_ids: Optional[List[int]] = None,
|
|
187
|
+
log_progress: bool = True,
|
|
188
|
+
progress_cb: Optional[Union[tqdm, Callable]] = None,
|
|
189
|
+
**kwargs,
|
|
190
|
+
) -> None:
|
|
191
|
+
project_info = api.project.get_info_by_id(project_id)
|
|
192
|
+
|
|
193
|
+
if progress_cb is not None:
|
|
194
|
+
log_progress = False
|
|
195
|
+
|
|
196
|
+
project_class = get_project_class(project_info.type)
|
|
197
|
+
if hasattr(project_class, "download_async"):
|
|
198
|
+
download_coro = project_class.download_async(
|
|
199
|
+
api=api,
|
|
200
|
+
project_id=project_id,
|
|
201
|
+
dest_dir=dest_dir,
|
|
202
|
+
semaphore=semaphore,
|
|
203
|
+
dataset_ids=dataset_ids,
|
|
204
|
+
log_progress=log_progress,
|
|
205
|
+
progress_cb=progress_cb,
|
|
206
|
+
**kwargs,
|
|
207
|
+
)
|
|
208
|
+
loop = get_or_create_event_loop()
|
|
209
|
+
if loop.is_running():
|
|
210
|
+
future = asyncio.run_coroutine_threadsafe(download_coro, loop=loop)
|
|
211
|
+
future.result()
|
|
212
|
+
else:
|
|
213
|
+
loop.run_until_complete(download_coro)
|
|
214
|
+
else:
|
|
215
|
+
raise NotImplementedError(f"Method download_async is not implemented for {project_class}")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def download_async_or_sync(
|
|
219
|
+
api: Api,
|
|
220
|
+
project_id: int,
|
|
221
|
+
dest_dir: str,
|
|
222
|
+
dataset_ids: Optional[List[int]] = None,
|
|
223
|
+
log_progress: bool = True,
|
|
224
|
+
progress_cb: Optional[Union[tqdm, Callable]] = None,
|
|
225
|
+
semaphore: Optional[asyncio.Semaphore] = None,
|
|
226
|
+
**kwargs,
|
|
227
|
+
):
|
|
228
|
+
project_info = api.project.get_info_by_id(project_id)
|
|
229
|
+
|
|
230
|
+
if progress_cb is not None:
|
|
231
|
+
log_progress = False
|
|
232
|
+
|
|
233
|
+
project_class = get_project_class(project_info.type)
|
|
234
|
+
if hasattr(project_class, "download_async"):
|
|
235
|
+
download_coro = project_class.download_async(
|
|
236
|
+
api=api,
|
|
237
|
+
project_id=project_id,
|
|
238
|
+
dest_dir=dest_dir,
|
|
239
|
+
semaphore=semaphore,
|
|
240
|
+
dataset_ids=dataset_ids,
|
|
241
|
+
log_progress=log_progress,
|
|
242
|
+
progress_cb=progress_cb,
|
|
243
|
+
**kwargs,
|
|
244
|
+
)
|
|
245
|
+
loop = get_or_create_event_loop()
|
|
246
|
+
if loop.is_running():
|
|
247
|
+
future = asyncio.run_coroutine_threadsafe(download_coro, loop=loop)
|
|
248
|
+
future.result()
|
|
249
|
+
else:
|
|
250
|
+
loop.run_until_complete(download_coro)
|
|
251
|
+
|
|
252
|
+
else:
|
|
253
|
+
project_class.download(
|
|
254
|
+
api=api,
|
|
255
|
+
project_id=project_id,
|
|
256
|
+
dest_dir=dest_dir,
|
|
257
|
+
dataset_ids=dataset_ids,
|
|
258
|
+
log_progress=log_progress,
|
|
259
|
+
progress_cb=progress_cb,
|
|
260
|
+
**kwargs,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _get_cache_dir(project_id: int, dataset_path: str = None) -> str:
|
|
181
265
|
p = os.path.join(apps_cache_dir(), str(project_id))
|
|
182
|
-
if
|
|
183
|
-
p = os.path.join(p,
|
|
266
|
+
if dataset_path is not None:
|
|
267
|
+
p = os.path.join(p, dataset_path)
|
|
184
268
|
return p
|
|
185
269
|
|
|
186
270
|
|
|
187
|
-
def is_cached(project_id,
|
|
188
|
-
return dir_exists(_get_cache_dir(project_id,
|
|
271
|
+
def is_cached(project_id, dataset_path: str = None) -> bool:
|
|
272
|
+
return dir_exists(_get_cache_dir(project_id, dataset_path))
|
|
189
273
|
|
|
190
274
|
|
|
191
|
-
def _split_by_cache(project_id: int,
|
|
275
|
+
def _split_by_cache(project_id: int, dataset_paths: List[str]) -> Tuple[List, List]:
|
|
192
276
|
if not is_cached(project_id):
|
|
193
|
-
return
|
|
194
|
-
to_download = [
|
|
195
|
-
cached = [
|
|
277
|
+
return dataset_paths, []
|
|
278
|
+
to_download = [ds_path for ds_path in dataset_paths if not is_cached(project_id, ds_path)]
|
|
279
|
+
cached = [ds_path for ds_path in dataset_paths if is_cached(project_id, ds_path)]
|
|
196
280
|
return to_download, cached
|
|
197
281
|
|
|
198
282
|
|
|
199
|
-
def get_cache_size(project_id: int,
|
|
200
|
-
if not is_cached(project_id,
|
|
283
|
+
def get_cache_size(project_id: int, dataset_path: str = None) -> int:
|
|
284
|
+
if not is_cached(project_id, dataset_path):
|
|
201
285
|
return 0
|
|
202
|
-
cache_dir = _get_cache_dir(project_id,
|
|
286
|
+
cache_dir = _get_cache_dir(project_id, dataset_path)
|
|
203
287
|
return get_directory_size(cache_dir)
|
|
204
288
|
|
|
205
289
|
|
|
@@ -254,7 +338,7 @@ def _validate_dataset(
|
|
|
254
338
|
project_meta_changed = _project_meta_changed(project_meta, project.meta)
|
|
255
339
|
for dataset in project.datasets:
|
|
256
340
|
dataset: Dataset
|
|
257
|
-
if dataset.name
|
|
341
|
+
if dataset.name.endswith(dataset_info.name): # TODO: fix it later
|
|
258
342
|
diff = set(items_infos_dict.keys()).difference(set(dataset.get_items_names()))
|
|
259
343
|
if diff:
|
|
260
344
|
logger.debug(
|
|
@@ -305,10 +389,13 @@ def _validate(
|
|
|
305
389
|
api: Api, project_info: ProjectInfo, project_meta: ProjectMeta, dataset_infos: List[DatasetInfo]
|
|
306
390
|
):
|
|
307
391
|
project_id = project_info.id
|
|
308
|
-
to_download, cached = _split_by_cache(
|
|
392
|
+
to_download, cached = _split_by_cache(
|
|
393
|
+
project_id, [_get_dataset_path(api, dataset_infos, info.id) for info in dataset_infos]
|
|
394
|
+
)
|
|
309
395
|
to_download, cached = set(to_download), set(cached)
|
|
310
396
|
for dataset_info in dataset_infos:
|
|
311
|
-
|
|
397
|
+
ds_path = _get_dataset_path(api, dataset_infos, dataset_info.id)
|
|
398
|
+
if ds_path in to_download:
|
|
312
399
|
continue
|
|
313
400
|
if not _validate_dataset(
|
|
314
401
|
api,
|
|
@@ -317,10 +404,10 @@ def _validate(
|
|
|
317
404
|
project_meta,
|
|
318
405
|
dataset_info,
|
|
319
406
|
):
|
|
320
|
-
to_download.add(
|
|
321
|
-
cached.remove(
|
|
407
|
+
to_download.add(ds_path)
|
|
408
|
+
cached.remove(ds_path)
|
|
322
409
|
logger.info(
|
|
323
|
-
f"Dataset {
|
|
410
|
+
f"Dataset {ds_path} of project {project_id} is not up to date and will be re-downloaded."
|
|
324
411
|
)
|
|
325
412
|
return list(to_download), list(cached)
|
|
326
413
|
|
|
@@ -337,51 +424,40 @@ def _add_save_items_infos_to_kwargs(kwargs: dict, project_type: str):
|
|
|
337
424
|
return kwargs
|
|
338
425
|
|
|
339
426
|
|
|
427
|
+
def _add_resume_download_to_kwargs(kwargs: dict, project_type: str):
|
|
428
|
+
supported_force_projects = (str(ProjectType.IMAGES),)
|
|
429
|
+
if project_type in supported_force_projects:
|
|
430
|
+
kwargs["resume_download"] = True
|
|
431
|
+
return kwargs
|
|
432
|
+
|
|
433
|
+
|
|
340
434
|
def _download_project_to_cache(
|
|
341
435
|
api: Api,
|
|
342
436
|
project_info: ProjectInfo,
|
|
343
437
|
dataset_infos: List[DatasetInfo],
|
|
344
438
|
log_progress: bool = True,
|
|
345
439
|
progress_cb: Callable = None,
|
|
440
|
+
semaphore: Optional[asyncio.Semaphore] = None,
|
|
346
441
|
**kwargs,
|
|
347
442
|
):
|
|
348
443
|
project_id = project_info.id
|
|
349
444
|
project_type = project_info.type
|
|
350
445
|
kwargs = _add_save_items_infos_to_kwargs(kwargs, project_type)
|
|
446
|
+
kwargs = _add_resume_download_to_kwargs(kwargs, project_type)
|
|
351
447
|
cached_project_dir = _get_cache_dir(project_id)
|
|
352
448
|
if len(dataset_infos) == 0:
|
|
353
449
|
logger.debug("No datasets to download")
|
|
354
450
|
return
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
)
|
|
366
|
-
existing_project = Project(cached_project_dir, OpenMode.READ)
|
|
367
|
-
for dataset in existing_project.datasets:
|
|
368
|
-
dataset: Dataset
|
|
369
|
-
dataset.directory
|
|
370
|
-
if dataset.name in [info.name for info in dataset_infos]:
|
|
371
|
-
continue
|
|
372
|
-
copy_dir_recursively(dataset.directory, os.path.join(temp_pr_dir, dataset.name))
|
|
373
|
-
remove_dir(cached_project_dir)
|
|
374
|
-
shutil.move(temp_pr_dir, cached_project_dir)
|
|
375
|
-
else:
|
|
376
|
-
download(
|
|
377
|
-
api=api,
|
|
378
|
-
project_id=project_id,
|
|
379
|
-
dest_dir=cached_project_dir,
|
|
380
|
-
dataset_ids=[info.id for info in dataset_infos],
|
|
381
|
-
log_progress=log_progress,
|
|
382
|
-
progress_cb=progress_cb,
|
|
383
|
-
**kwargs,
|
|
384
|
-
)
|
|
451
|
+
download_async_or_sync(
|
|
452
|
+
api=api,
|
|
453
|
+
project_id=project_id,
|
|
454
|
+
dest_dir=cached_project_dir,
|
|
455
|
+
dataset_ids=[info.id for info in dataset_infos],
|
|
456
|
+
log_progress=log_progress,
|
|
457
|
+
progress_cb=progress_cb,
|
|
458
|
+
semaphore=semaphore,
|
|
459
|
+
**kwargs,
|
|
460
|
+
)
|
|
385
461
|
|
|
386
462
|
|
|
387
463
|
def download_to_cache(
|
|
@@ -391,6 +467,7 @@ def download_to_cache(
|
|
|
391
467
|
dataset_ids: List[int] = None,
|
|
392
468
|
log_progress: bool = True,
|
|
393
469
|
progress_cb=None,
|
|
470
|
+
semaphore: Optional[asyncio.Semaphore] = None,
|
|
394
471
|
**kwargs,
|
|
395
472
|
) -> Tuple[List, List]:
|
|
396
473
|
"""
|
|
@@ -410,6 +487,7 @@ def download_to_cache(
|
|
|
410
487
|
:type log_progress: bool, optional
|
|
411
488
|
:param progress_cb: Function for tracking download progress. Will be called with number of items downloaded.
|
|
412
489
|
:type progress_cb: tqdm or callable, optional
|
|
490
|
+
:param semaphore: Semaphore for limiting the number of concurrent downloads if using async download.
|
|
413
491
|
|
|
414
492
|
:return: Tuple where the first list contains names of downloaded datasets and the second list contains
|
|
415
493
|
names of cached datasets
|
|
@@ -421,27 +499,53 @@ def download_to_cache(
|
|
|
421
499
|
raise ValueError("dataset_infos and dataset_ids cannot be specified at the same time")
|
|
422
500
|
if dataset_infos is None:
|
|
423
501
|
if dataset_ids is None:
|
|
424
|
-
dataset_infos = api.dataset.get_list(project_id)
|
|
502
|
+
dataset_infos = api.dataset.get_list(project_id, recursive=True)
|
|
425
503
|
else:
|
|
426
504
|
dataset_infos = [api.dataset.get_info_by_id(dataset_id) for dataset_id in dataset_ids]
|
|
427
|
-
|
|
505
|
+
path_to_info = {_get_dataset_path(api, dataset_infos, info.id): info for info in dataset_infos}
|
|
428
506
|
to_download, cached = _validate(api, project_info, project_meta, dataset_infos)
|
|
429
507
|
if progress_cb is not None:
|
|
430
|
-
cached_items_n = sum(
|
|
508
|
+
cached_items_n = sum(path_to_info[ds_path].items_count for ds_path in cached)
|
|
431
509
|
progress_cb(cached_items_n)
|
|
432
510
|
_download_project_to_cache(
|
|
433
511
|
api=api,
|
|
434
512
|
project_info=project_info,
|
|
435
|
-
dataset_infos=[
|
|
513
|
+
dataset_infos=[path_to_info[ds_path] for ds_path in to_download],
|
|
436
514
|
log_progress=log_progress,
|
|
437
515
|
progress_cb=progress_cb,
|
|
516
|
+
semaphore=semaphore,
|
|
438
517
|
**kwargs,
|
|
439
518
|
)
|
|
440
519
|
return to_download, cached
|
|
441
520
|
|
|
442
521
|
|
|
522
|
+
def _get_dataset_parents(api, dataset_infos, dataset_id):
|
|
523
|
+
dataset_infos_dict = {info.id: info for info in dataset_infos}
|
|
524
|
+
this_dataset_info = dataset_infos_dict.get(dataset_id, api.dataset.get_info_by_id(dataset_id))
|
|
525
|
+
if this_dataset_info.parent_id is None:
|
|
526
|
+
return []
|
|
527
|
+
parent = _get_dataset_parents(
|
|
528
|
+
api, list(dataset_infos_dict.values()), this_dataset_info.parent_id
|
|
529
|
+
)
|
|
530
|
+
this_parent_name = dataset_infos_dict.get(
|
|
531
|
+
this_dataset_info.parent_id, api.dataset.get_info_by_id(dataset_id)
|
|
532
|
+
).name
|
|
533
|
+
return [*parent, this_parent_name]
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def _get_dataset_path(api: Api, dataset_infos: List[DatasetInfo], dataset_id: int) -> str:
|
|
537
|
+
parents = _get_dataset_parents(api, dataset_infos, dataset_id)
|
|
538
|
+
dataset_infos_dict = {info.id: info for info in dataset_infos}
|
|
539
|
+
this_dataset_info = dataset_infos_dict.get(dataset_id, api.dataset.get_info_by_id(dataset_id))
|
|
540
|
+
return Dataset._get_dataset_path(this_dataset_info.name, parents)
|
|
541
|
+
|
|
542
|
+
|
|
443
543
|
def copy_from_cache(
|
|
444
|
-
project_id: int,
|
|
544
|
+
project_id: int,
|
|
545
|
+
dest_dir: str,
|
|
546
|
+
dataset_names: List[str] = None,
|
|
547
|
+
progress_cb: Callable = None,
|
|
548
|
+
dataset_paths: List[str] = None,
|
|
445
549
|
):
|
|
446
550
|
"""
|
|
447
551
|
Copy project or dataset from cache to the specified directory.
|
|
@@ -451,31 +555,35 @@ def copy_from_cache(
|
|
|
451
555
|
:type project_id: int
|
|
452
556
|
:param dest_dir: Destination path to local directory.
|
|
453
557
|
:type dest_dir: str
|
|
454
|
-
:param dataset_name:
|
|
558
|
+
:param dataset_name: List of dataset paths to copy. If not specified, the whole project will be copied.
|
|
455
559
|
:type dataset_name: str, optional
|
|
456
560
|
:param progress_cb: Function for tracking copying progress. Will be called with number of bytes copied.
|
|
457
561
|
:type progress_cb: tqdm or callable, optional
|
|
562
|
+
:param dataset_paths: List of dataset paths to copy. If not specified, all datasets will be copied.
|
|
563
|
+
:type dataset_paths: list(str), optional
|
|
458
564
|
|
|
459
565
|
:return: None.
|
|
460
566
|
:rtype: NoneType
|
|
461
567
|
"""
|
|
462
568
|
if not is_cached(project_id):
|
|
463
569
|
raise RuntimeError(f"Project {project_id} is not cached")
|
|
464
|
-
if dataset_names is not None:
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
570
|
+
if dataset_names is not None or dataset_paths is not None:
|
|
571
|
+
if dataset_names is not None:
|
|
572
|
+
dataset_paths = dataset_names
|
|
573
|
+
for dataset_path in dataset_paths:
|
|
574
|
+
if not is_cached(project_id, dataset_path):
|
|
575
|
+
raise RuntimeError(f"Dataset {dataset_path} of project {project_id} is not cached")
|
|
468
576
|
cache_dir = _get_cache_dir(project_id)
|
|
469
|
-
if
|
|
577
|
+
if dataset_paths is None:
|
|
470
578
|
copy_dir_recursively(cache_dir, dest_dir, progress_cb)
|
|
471
579
|
else:
|
|
472
580
|
# copy meta
|
|
473
581
|
copy_file(os.path.join(cache_dir, "meta.json"), os.path.join(dest_dir, "meta.json"))
|
|
474
582
|
# copy datasets
|
|
475
|
-
for
|
|
583
|
+
for dataset_path in dataset_paths:
|
|
476
584
|
copy_dir_recursively(
|
|
477
|
-
os.path.join(cache_dir,
|
|
478
|
-
os.path.join(dest_dir,
|
|
585
|
+
os.path.join(cache_dir, dataset_path),
|
|
586
|
+
os.path.join(dest_dir, dataset_path),
|
|
479
587
|
progress_cb,
|
|
480
588
|
)
|
|
481
589
|
|
|
@@ -487,6 +595,7 @@ def download_using_cache(
|
|
|
487
595
|
dataset_ids: Optional[List[int]] = None,
|
|
488
596
|
log_progress: bool = True,
|
|
489
597
|
progress_cb: Optional[Union[tqdm, Callable]] = None,
|
|
598
|
+
semaphore: Optional[asyncio.Semaphore] = None,
|
|
490
599
|
**kwargs,
|
|
491
600
|
) -> None:
|
|
492
601
|
"""
|
|
@@ -505,6 +614,8 @@ def download_using_cache(
|
|
|
505
614
|
:type log_progress: bool
|
|
506
615
|
:param progress_cb: Function for tracking download progress. Will be called with number of items downloaded.
|
|
507
616
|
:type progress_cb: tqdm or callable, optional
|
|
617
|
+
:param semaphore: Semaphore for limiting the number of concurrent downloads if using async download.
|
|
618
|
+
:type semaphore: asyncio.Semaphore, optional
|
|
508
619
|
|
|
509
620
|
:return: None.
|
|
510
621
|
:rtype: NoneType
|
|
@@ -515,6 +626,7 @@ def download_using_cache(
|
|
|
515
626
|
dataset_ids=dataset_ids,
|
|
516
627
|
log_progress=log_progress,
|
|
517
628
|
progress_cb=progress_cb,
|
|
629
|
+
semaphore=semaphore,
|
|
518
630
|
**kwargs,
|
|
519
631
|
)
|
|
520
632
|
copy_from_cache(project_id, dest_dir, [*downloaded, *cached])
|