supervisely 6.73.419__py3-none-any.whl → 6.73.420__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
+ import gc
6
7
  import io
7
8
  import json
8
9
  import os
@@ -26,6 +27,7 @@ from typing import (
26
27
 
27
28
  import aiofiles
28
29
  import numpy as np
30
+ from PIL import Image as PILImage
29
31
  from tqdm import tqdm
30
32
 
31
33
  import supervisely as sly
@@ -3333,7 +3335,6 @@ class Project:
3333
3335
 
3334
3336
  return train_items, val_items
3335
3337
 
3336
-
3337
3338
  @staticmethod
3338
3339
  def download(
3339
3340
  api: Api,
@@ -5612,6 +5613,7 @@ async def _download_project_async(
5612
5613
  blob_files_to_download = {}
5613
5614
  blob_images = []
5614
5615
 
5616
+ sly.logger.info("Calculating images to download...", extra={"dataset": dataset.name})
5615
5617
  async for image_batch in all_images:
5616
5618
  for image in image_batch:
5617
5619
  if images_ids is None or image.id in images_ids:
@@ -5655,16 +5657,55 @@ async def _download_project_async(
5655
5657
  ds_progress(1)
5656
5658
  return to_download
5657
5659
 
5658
- async def run_tasks_with_delay(tasks, delay=0.1):
5659
- created_tasks = []
5660
- for task in tasks:
5661
- created_task = asyncio.create_task(task)
5662
- created_tasks.append(created_task)
5663
- await asyncio.sleep(delay)
5660
+ async def run_tasks_with_semaphore_control(task_list: list, delay=0.05):
5661
+ """
5662
+ Execute tasks with semaphore control - create tasks only as semaphore permits become available.
5663
+ task_list - list of coroutines or callables that create tasks
5664
+ """
5665
+ random.shuffle(task_list)
5666
+ running_tasks = set()
5667
+ max_concurrent = getattr(semaphore, "_value", 10)
5668
+
5669
+ task_iter = iter(task_list)
5670
+ completed_count = 0
5671
+
5672
+ while True:
5673
+ # Add new tasks while we have capacity
5674
+ while len(running_tasks) < max_concurrent:
5675
+ try:
5676
+ task_gen = next(task_iter)
5677
+ if callable(task_gen):
5678
+ task = asyncio.create_task(task_gen())
5679
+ else:
5680
+ task = asyncio.create_task(task_gen)
5681
+ running_tasks.add(task)
5682
+ await asyncio.sleep(delay)
5683
+ except StopIteration:
5684
+ break
5685
+
5686
+ if not running_tasks:
5687
+ break
5688
+
5689
+ # Wait for at least one task to complete
5690
+ done, running_tasks = await asyncio.wait(
5691
+ running_tasks, return_when=asyncio.FIRST_COMPLETED
5692
+ )
5693
+
5694
+ # Process completed tasks
5695
+ for task in done:
5696
+ completed_count += 1
5697
+ try:
5698
+ await task
5699
+ except Exception as e:
5700
+ logger.error(f"Task error: {e}")
5701
+
5702
+ # Clear the done set - this should be enough for memory cleanup
5703
+ done.clear()
5704
+
5664
5705
  logger.debug(
5665
- f"{len(created_tasks)} tasks have been created for dataset ID: {dataset.id}, Name: {dataset.name}"
5706
+ f"{completed_count} tasks have been completed for dataset ID: {dataset.id}, Name: {dataset.name}"
5666
5707
  )
5667
- return created_tasks
5708
+ return completed_count
5668
5709
 
5669
5710
  # Download blob files if required
5670
5711
  if download_blob_files and len(blob_files_to_download) > 0:
@@ -5728,19 +5769,24 @@ async def _download_project_async(
5728
5769
  progress_cb=ds_progress,
5729
5770
  )
5730
5771
  offset_tasks.append(offset_task)
5731
- created_tasks = await run_tasks_with_delay(offset_tasks, 0.05)
5732
- await asyncio.gather(*created_tasks)
5772
+ await run_tasks_with_semaphore_control(offset_tasks, 0.05)
5733
5773
 
5734
5774
  tasks = []
5735
- # Check which images need to be downloaded
5736
- small_images = await check_items(small_images)
5737
- large_images = await check_items(large_images)
5775
+ if resume_download is True:
5776
+ sly.logger.info("Checking existing images...", extra={"dataset": dataset.name})
5777
+ # Check which images need to be downloaded
5778
+ small_images = await check_items(small_images)
5779
+ large_images = await check_items(large_images)
5738
5780
 
5739
5781
  # If only one small image, treat it as a large image for efficiency
5740
5782
  if len(small_images) == 1:
5741
5783
  large_images.append(small_images.pop())
5742
5784
 
5743
5785
  # Create batch download tasks
5786
+ sly.logger.debug(
5787
+ f"Downloading {len(small_images)} small images in batch number {len(small_images) // batch_size}...",
5788
+ extra={"dataset": dataset.name},
5789
+ )
5744
5790
  for images_batch in batched(small_images, batch_size=batch_size):
5745
5791
  task = _download_project_items_batch_async(
5746
5792
  api=api,
@@ -5758,6 +5804,10 @@ async def _download_project_async(
5758
5804
  tasks.append(task)
5759
5805
 
5760
5806
  # Create individual download tasks for large images
5807
+ sly.logger.debug(
5808
+ f"Downloading {len(large_images)} large images one by one...",
5809
+ extra={"dataset": dataset.name},
5810
+ )
5761
5811
  for image in large_images:
5762
5812
  task = _download_project_item_async(
5763
5813
  api=api,
@@ -5773,8 +5823,7 @@ async def _download_project_async(
5773
5823
  )
5774
5824
  tasks.append(task)
5775
5825
 
5776
- created_tasks = await run_tasks_with_delay(tasks)
5777
- await asyncio.gather(*created_tasks)
5826
+ await run_tasks_with_semaphore_control(tasks)
5778
5827
 
5779
5828
  if save_image_meta:
5780
5829
  meta_dir = dataset_fs.meta_dir
@@ -5815,20 +5864,10 @@ async def _download_project_item_async(
5815
5864
  ) -> None:
5816
5865
  """Download image and annotation from Supervisely API and save it to the local filesystem.
5817
5866
  Uses parameters from the parent function _download_project_async.
5867
+ Optimized version - uses streaming only for large images (>5MB) to avoid performance degradation.
5818
5868
  """
5819
- if save_images:
5820
- logger.debug(
5821
- f"Downloading 1 image in single mode with _download_project_item_async. ID: {img_info.id}, Name: {img_info.name}"
5822
- )
5823
- img_bytes = await api.image.download_bytes_single_async(
5824
- img_info.id, semaphore=semaphore, check_hash=True
5825
- )
5826
- if None in [img_info.height, img_info.width]:
5827
- width, height = sly.image.get_size_from_bytes(img_bytes)
5828
- img_info = img_info._replace(height=height, width=width)
5829
- else:
5830
- img_bytes = None
5831
5869
 
5870
+ # Prepare annotation first (small data)
5832
5871
  if only_image_tags is False:
5833
5872
  ann_info = await api.annotation.download_async(
5834
5873
  img_info.id,
@@ -5853,13 +5892,84 @@ async def _download_project_item_async(
5853
5892
  tmp_ann = Annotation(img_size=(img_info.height, img_info.width), img_tags=tags)
5854
5893
  ann_json = tmp_ann.to_json()
5855
5894
 
5856
- dataset_fs.delete_item(img_info.name)
5857
- await dataset_fs.add_item_raw_bytes_async(
5858
- item_name=img_info.name,
5859
- item_raw_bytes=img_bytes if save_images is True else None,
5860
- ann=ann_json,
5861
- img_info=img_info if save_image_info is True else None,
5862
- )
5895
+ # Handle image download - choose method based on estimated size
5896
+ if save_images:
5897
+ # Estimate size threshold: 5MB for streaming to avoid performance degradation
5898
+ size_threshold_for_streaming = 5 * 1024 * 1024 # 5MB
5899
+ estimated_size = getattr(img_info, "size", 0) or (
5900
+ img_info.height * img_info.width * 3 if img_info.height and img_info.width else 0
5901
+ )
5902
+
5903
+ if estimated_size > size_threshold_for_streaming:
5904
+ # Use streaming for large images only
5905
+ sly.logger.trace(
5906
+ f"Downloading large image in streaming mode: {img_info.size / 1024 / 1024:.1f}MB"
5907
+ )
5908
+
5909
+ # Clean up existing item first
5910
+ dataset_fs.delete_item(img_info.name)
5911
+
5912
+ final_path = dataset_fs.generate_item_path(img_info.name)
5913
+ temp_path = final_path + ".tmp"
5914
+ await api.image.download_path_async(
5915
+ img_info.id, temp_path, semaphore=semaphore, check_hash=True
5916
+ )
5917
+
5918
+ # Get dimensions if needed
5919
+ if None in [img_info.height, img_info.width]:
5920
+ # Use PIL directly on the file - it will only read the minimal header needed
5921
+ with PILImage.open(temp_path) as image:
5922
+ width, height = image.size
5923
+ img_info = img_info._replace(height=height, width=width)
5924
+
5925
+ # Update annotation with correct dimensions if needed
5926
+ if None in tmp_ann.img_size:
5927
+ tmp_ann = tmp_ann.clone(img_size=(img_info.height, img_info.width))
5928
+ ann_json = tmp_ann.to_json()
5929
+
5930
+ # os.rename is atomic and will overwrite the destination if it exists
5931
+ os.rename(temp_path, final_path)
5932
+
5933
+ # For streaming, we save directly to filesystem, so use add_item_raw_bytes_async with None
5934
+ await dataset_fs.add_item_raw_bytes_async(
5935
+ item_name=img_info.name,
5936
+ item_raw_bytes=None, # Image already saved to disk
5937
+ ann=ann_json,
5938
+ img_info=img_info if save_image_info is True else None,
5939
+ )
5940
+ else:
5941
+ sly.logger.trace(f"Downloading large image: {img_info.size / 1024 / 1024:.1f}MB")
5942
+ # Use fast in-memory download for small images
5943
+ img_bytes = await api.image.download_bytes_single_async(
5944
+ img_info.id, semaphore=semaphore, check_hash=True
5945
+ )
5946
+
5947
+ if None in [img_info.height, img_info.width]:
5948
+ width, height = sly.image.get_size_from_bytes(img_bytes)
5949
+ img_info = img_info._replace(height=height, width=width)
5950
+
5951
+ # Update annotation with correct dimensions if needed
5952
+ if None in tmp_ann.img_size:
5953
+ tmp_ann = tmp_ann.clone(img_size=(img_info.height, img_info.width))
5954
+ ann_json = tmp_ann.to_json()
5955
+
5956
+ # Clean up existing item first, then save new one
5957
+ dataset_fs.delete_item(img_info.name)
5958
+ await dataset_fs.add_item_raw_bytes_async(
5959
+ item_name=img_info.name,
5960
+ item_raw_bytes=img_bytes,
5961
+ ann=ann_json,
5962
+ img_info=img_info if save_image_info is True else None,
5963
+ )
5964
+ else:
5965
+ dataset_fs.delete_item(img_info.name)
5966
+ await dataset_fs.add_item_raw_bytes_async(
5967
+ item_name=img_info.name,
5968
+ item_raw_bytes=None,
5969
+ ann=ann_json,
5970
+ img_info=img_info if save_image_info is True else None,
5971
+ )
5972
+
5863
5973
  if progress_cb is not None:
5864
5974
  progress_cb(1)
5865
5975
  logger.debug(f"Single project item has been downloaded. Semaphore state: {semaphore._value}")
@@ -5882,32 +5992,14 @@ async def _download_project_items_batch_async(
5882
5992
  Download images and annotations from Supervisely API and save them to the local filesystem.
5883
5993
  Uses parameters from the parent function _download_project_async.
5884
5994
  It is used for batch download of images and annotations with the bulk download API methods.
5995
+
5996
+ IMPORTANT: The total size of all images in a batch must not exceed 130MB, and the size of each image must not exceed 1.28MB.
5885
5997
  """
5886
- if save_images:
5887
- img_ids = [img_info.id for img_info in img_infos]
5888
- imgs_bytes = [None] * len(img_ids)
5889
- temp_dict = {}
5890
- logger.debug(
5891
- f"Downloading {len(img_ids)} images in bulk with _download_project_items_batch_async"
5892
- )
5893
- async for img_id, img_bytes in api.image.download_bytes_generator_async(
5894
- dataset_id,
5895
- img_ids,
5896
- semaphore=semaphore,
5897
- check_hash=True,
5898
- ):
5899
- temp_dict[img_id] = img_bytes
5900
- # to be sure that the order is correct
5901
- for idx, img_id in enumerate(img_ids):
5902
- imgs_bytes[idx] = temp_dict[img_id]
5903
- for img_info, img_bytes in zip(img_infos, imgs_bytes):
5904
- if None in [img_info.height, img_info.width]:
5905
- width, height = sly.image.get_size_from_bytes(img_bytes)
5906
- img_info = img_info._replace(height=height, width=width)
5907
- else:
5908
- img_ids = [img_info.id for img_info in img_infos]
5909
- imgs_bytes = [None] * len(img_infos)
5998
+ img_ids = [img_info.id for img_info in img_infos]
5999
+ img_ids_to_info = {img_info.id: img_info for img_info in img_infos}
5910
6000
 
6001
+ sly.logger.trace(f"Downloading {len(img_infos)} images in batch mode.")
6002
+ # Download annotations first
5911
6003
  if only_image_tags is False:
5912
6004
  ann_infos = await api.annotation.download_bulk_async(
5913
6005
  dataset_id,
@@ -5915,20 +6007,20 @@ async def _download_project_items_batch_async(
5915
6007
  semaphore=semaphore,
5916
6008
  force_metadata_for_links=not save_images,
5917
6009
  )
5918
- ann_jsons = []
6010
+ id_to_annotation = {}
5919
6011
  for img_info, ann_info in zip(img_infos, ann_infos):
5920
6012
  try:
5921
6013
  tmp_ann = Annotation.from_json(ann_info.annotation, meta)
5922
6014
  if None in tmp_ann.img_size:
5923
6015
  tmp_ann = tmp_ann.clone(img_size=(img_info.height, img_info.width))
5924
- ann_jsons.append(tmp_ann.to_json())
6016
+ id_to_annotation[img_info.id] = tmp_ann.to_json()
5925
6017
  except Exception:
5926
6018
  logger.error(
5927
6019
  f"Error while deserializing annotation for image with ID: {img_info.id}"
5928
6020
  )
5929
6021
  raise
5930
6022
  else:
5931
- ann_jsons = []
6023
+ id_to_annotation = {}
5932
6024
  for img_info in img_infos:
5933
6025
  tags = TagCollection.from_api_response(
5934
6026
  img_info.tags,
@@ -5936,17 +6028,63 @@ async def _download_project_items_batch_async(
5936
6028
  id_to_tagmeta,
5937
6029
  )
5938
6030
  tmp_ann = Annotation(img_size=(img_info.height, img_info.width), img_tags=tags)
5939
- ann_jsons.append(tmp_ann.to_json())
5940
- for img_info, ann_json, img_bytes in zip(img_infos, ann_jsons, imgs_bytes):
5941
- dataset_fs.delete_item(img_info.name)
5942
- await dataset_fs.add_item_raw_bytes_async(
5943
- item_name=img_info.name,
5944
- item_raw_bytes=img_bytes,
5945
- ann=dataset_fs.get_ann(img_info.name, meta) if ann_json is None else ann_json,
5946
- img_info=img_info if save_image_info is True else None,
5947
- )
5948
- if progress_cb is not None:
5949
- progress_cb(1)
6031
+ id_to_annotation[img_info.id] = tmp_ann.to_json()
6032
+
6033
+ if save_images:
6034
+ async for img_id, img_bytes in api.image.download_bytes_generator_async(
6035
+ dataset_id=dataset_id, img_ids=img_ids, semaphore=semaphore, check_hash=True
6036
+ ):
6037
+ img_info = img_ids_to_info.get(img_id)
6038
+ if img_info is None:
6039
+ continue
6040
+
6041
+ if None in [img_info.height, img_info.width]:
6042
+ width, height = sly.image.get_size_from_bytes(img_bytes)
6043
+ img_info = img_info._replace(height=height, width=width)
6044
+
6045
+ # Update annotation if needed - use pop to get and remove at the same time
6046
+ ann_json = id_to_annotation.pop(img_id, None)
6047
+ if ann_json is not None:
6048
+ try:
6049
+ tmp_ann = Annotation.from_json(ann_json, meta)
6050
+ if None in tmp_ann.img_size:
6051
+ tmp_ann = tmp_ann.clone(img_size=(img_info.height, img_info.width))
6052
+ ann_json = tmp_ann.to_json()
6053
+ except Exception:
6054
+ pass
6055
+ else:
6056
+ ann_json = id_to_annotation.pop(img_id, None)
6057
+
6058
+ dataset_fs.delete_item(img_info.name)
6059
+ await dataset_fs.add_item_raw_bytes_async(
6060
+ item_name=img_info.name,
6061
+ item_raw_bytes=img_bytes,
6062
+ ann=ann_json,
6063
+ img_info=img_info if save_image_info is True else None,
6064
+ )
6065
+
6066
+ if progress_cb is not None:
6067
+ progress_cb(1)
6068
+ else:
6069
+ for img_info in img_infos:
6070
+ dataset_fs.delete_item(img_info.name)
6071
+ ann_json = id_to_annotation.pop(img_info.id, None)
6072
+ await dataset_fs.add_item_raw_bytes_async(
6073
+ item_name=img_info.name,
6074
+ item_raw_bytes=None,
6075
+ ann=ann_json,
6076
+ img_info=img_info if save_image_info is True else None,
6077
+ )
6078
+ if progress_cb is not None:
6079
+ progress_cb(1)
6080
+
6081
+ # Clear dictionaries and force GC for large batches only
6082
+ batch_size = len(img_infos)
6083
+ id_to_annotation.clear()
6084
+ img_ids_to_info.clear()
6085
+
6086
+ if batch_size > 50: # Only for large batches
6087
+ gc.collect()
5950
6088
 
5951
6089
  logger.debug(f"Batch of project items has been downloaded. Semaphore state: {semaphore._value}")
5952
6090
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: supervisely
3
- Version: 6.73.419
3
+ Version: 6.73.420
4
4
  Summary: Supervisely Python SDK.
5
5
  Home-page: https://github.com/supervisely/supervisely
6
6
  Author: Supervisely
@@ -1029,7 +1029,7 @@ supervisely/project/data_version.py,sha256=P5Lui6i64pYeJWmAdGJDv8GRXxjfpSSZ8zT_M
1029
1029
  supervisely/project/download.py,sha256=yCFpRum_q8fbY_z2mcRAhYAcYFcDc215ldioO3Gzg3Q,28680
1030
1030
  supervisely/project/pointcloud_episode_project.py,sha256=ahwL79Ojf5gLdvAph5SLKXr66jmVWzgiOXLsk2P8eVQ,48433
1031
1031
  supervisely/project/pointcloud_project.py,sha256=NcrsMf673cITLoErJhcdQN85w3k9TBqolEbWR4qjPZQ,56872
1032
- supervisely/project/project.py,sha256=YBDW0i83fpmE2UT28Tsqk4hMb9OVUsEd02pZsch4xgM,240428
1032
+ supervisely/project/project.py,sha256=Xl_L_dRJp0KZJMioabXWGO2Zt1X6dl2W9v4-Zt66zYs,246561
1033
1033
  supervisely/project/project_meta.py,sha256=UTQPstRmRJvbtCcQ1noCtzcw3Sd4llwRMHes-Sz-JQg,51429
1034
1034
  supervisely/project/project_settings.py,sha256=NLThzU_DCynOK6hkHhVdFyezwprn9UqlnrLDe_3qhkY,9347
1035
1035
  supervisely/project/project_type.py,sha256=7mQ7zg6r7Bm2oFn5aR8n_PeLqMmOaPZd6ph7Z8ZISTw,608
@@ -1103,9 +1103,9 @@ supervisely/worker_proto/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
1103
1103
  supervisely/worker_proto/worker_api_pb2.py,sha256=VQfi5JRBHs2pFCK1snec3JECgGnua3Xjqw_-b3aFxuM,59142
1104
1104
  supervisely/worker_proto/worker_api_pb2_grpc.py,sha256=3BwQXOaP9qpdi0Dt9EKG--Lm8KGN0C5AgmUfRv77_Jk,28940
1105
1105
  supervisely_lib/__init__.py,sha256=7-3QnN8Zf0wj8NCr2oJmqoQWMKKPKTECvjH9pd2S5vY,159
1106
- supervisely-6.73.419.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
1107
- supervisely-6.73.419.dist-info/METADATA,sha256=j4oQqDt11Jv3-V0vS0ILCLC4jh1o7KQvoIrAaTF8n3U,35338
1108
- supervisely-6.73.419.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
1109
- supervisely-6.73.419.dist-info/entry_points.txt,sha256=U96-5Hxrp2ApRjnCoUiUhWMqijqh8zLR03sEhWtAcms,102
1110
- supervisely-6.73.419.dist-info/top_level.txt,sha256=kcFVwb7SXtfqZifrZaSE3owHExX4gcNYe7Q2uoby084,28
1111
- supervisely-6.73.419.dist-info/RECORD,,
1106
+ supervisely-6.73.420.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
1107
+ supervisely-6.73.420.dist-info/METADATA,sha256=6wWuelPBg3V20xDmnkT_ZYPt4RxkNbbN94i0z9Q2y1M,35338
1108
+ supervisely-6.73.420.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
1109
+ supervisely-6.73.420.dist-info/entry_points.txt,sha256=U96-5Hxrp2ApRjnCoUiUhWMqijqh8zLR03sEhWtAcms,102
1110
+ supervisely-6.73.420.dist-info/top_level.txt,sha256=kcFVwb7SXtfqZifrZaSE3owHExX4gcNYe7Q2uoby084,28
1111
+ supervisely-6.73.420.dist-info/RECORD,,