synapse-sdk 1.0.0b14__py3-none-any.whl → 1.0.0b16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

Binary file
@@ -89,7 +89,7 @@ class DataCollectionClientMixin(BaseClient):
89
89
  path = 'data_files/'
90
90
  if use_chunked_upload:
91
91
  chunked_upload = self.create_chunked_upload(file_path)
92
- data = {'chunked_upload': chunked_upload['id']}
92
+ data = {'chunked_upload': chunked_upload['id'], 'meta': {'filename': file_path.name}}
93
93
  return self._post(path, data=data)
94
94
  else:
95
95
  return self._post(path, files={'file': file_path})
@@ -164,7 +164,7 @@ class DataCollectionClientMixin(BaseClient):
164
164
 
165
165
  self.create_tasks(tasks_data)
166
166
 
167
- def upload_data_file(self, data: Dict, data_collection_id: int) -> Dict:
167
+ def upload_data_file(self, data: Dict, data_collection_id: int, use_chunked_upload: bool = False) -> Dict:
168
168
  """Upload files to synapse-backend.
169
169
 
170
170
  Args:
@@ -173,12 +173,14 @@ class DataCollectionClientMixin(BaseClient):
173
173
  - files: The files to upload. (key: file name, value: file pathlib object)
174
174
  - meta: The meta data to upload.
175
175
  data_collection_id: The data_collection id to upload the data to.
176
+ use_chunked_upload: Whether to use chunked upload for large files.(default False)
177
+ Automatically determined based on file size threshold in upload plugin (default 50MB).
176
178
 
177
179
  Returns:
178
180
  Dict: The result of the upload.
179
181
  """
180
182
  for name, path in data['files'].items():
181
- data_file = self.create_data_file(path)
183
+ data_file = self.create_data_file(path, use_chunked_upload)
182
184
  data['data_collection'] = data_collection_id
183
185
  data['files'][name] = {'checksum': data_file['checksum'], 'path': str(path)}
184
186
  return data
@@ -415,6 +415,8 @@ class UploadParams(BaseModel):
415
415
  project: int | None
416
416
  excel_metadata_path: str | None = None
417
417
  is_recursive: bool = False
418
+ max_file_size_mb: int = 50
419
+ creating_data_unit_batch_size: int = 100
418
420
 
419
421
  @field_validator('storage', mode='before')
420
422
  @classmethod
@@ -901,7 +903,9 @@ class UploadAction(Action):
901
903
  if not uploaded_files:
902
904
  self.run.log_message_with_code('NO_FILES_UPLOADED')
903
905
  raise ActionError('Upload is aborted due to no uploaded files.')
904
- generated_data_units = self._generate_data_units(uploaded_files)
906
+ generated_data_units = self._generate_data_units(
907
+ uploaded_files, self.params.get('creating_data_unit_batch_size')
908
+ )
905
909
  result['generated_data_units_count'] = len(generated_data_units)
906
910
 
907
911
  # Setup task with uploaded synapse-backend data units.
@@ -956,7 +960,9 @@ class UploadAction(Action):
956
960
 
957
961
  for organized_file in organized_files:
958
962
  try:
959
- uploaded_data_file = client.upload_data_file(organized_file, collection_id)
963
+ # Determine if chunked upload should be used based on file size
964
+ use_chunked_upload = self._requires_chunked_upload(organized_file)
965
+ uploaded_data_file = client.upload_data_file(organized_file, collection_id, use_chunked_upload)
960
966
  self.run.log_data_file(organized_file, UploadStatus.SUCCESS)
961
967
  success_count += 1
962
968
  upload_result.append(uploaded_data_file)
@@ -974,10 +980,10 @@ class UploadAction(Action):
974
980
 
975
981
  return upload_result
976
982
 
977
- def _generate_data_units(self, uploaded_files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
983
+ def _generate_data_units(self, uploaded_files: List[Dict[str, Any]], batch_size: int) -> List[Dict[str, Any]]:
978
984
  """Generate data units for the uploaded data.
979
985
 
980
- TODO: make batch size configurable.
986
+ TODO: make dynamic batch size depend on uploaded file sizes
981
987
 
982
988
  Returns:
983
989
  Dict: The result of the generate data units process.
@@ -993,7 +999,7 @@ class UploadAction(Action):
993
999
  success_count = 0
994
1000
  failed_count = 0
995
1001
 
996
- batches = get_batched_list(uploaded_files, 100)
1002
+ batches = get_batched_list(uploaded_files, batch_size)
997
1003
  batches_count = len(batches)
998
1004
 
999
1005
  # Initialize metrics
@@ -1137,6 +1143,32 @@ class UploadAction(Action):
1137
1143
 
1138
1144
  return organized_files
1139
1145
 
1146
+ def _get_file_size_mb(self, file_path: Path) -> float:
1147
+ """Get file size in MB.
1148
+
1149
+ Args:
1150
+ file_path (Path): Path to the file.
1151
+
1152
+ Returns:
1153
+ float: File size in MB.
1154
+ """
1155
+ return file_path.stat().st_size / (1024 * 1024)
1156
+
1157
+ def _requires_chunked_upload(self, organized_file: Dict[str, Any]) -> bool:
1158
+ """Determine if chunked upload is required based on file size threshold.
1159
+
1160
+ Args:
1161
+ organized_file (Dict[str, Any]): Organized file data with 'files' dict.
1162
+
1163
+ Returns:
1164
+ bool: True if any file exceeds the threshold, False otherwise.
1165
+ """
1166
+ max_file_size_mb = self.params.get('max_file_size_mb', 50)
1167
+ for file_path in organized_file.get('files', {}).values():
1168
+ if isinstance(file_path, Path) and self._get_file_size_mb(file_path) > max_file_size_mb:
1169
+ return True
1170
+ return False
1171
+
1140
1172
  def _cleanup_temp_directory(self, temp_path: Optional[Path] = None) -> None:
1141
1173
  """Clean up temporary directory.
1142
1174
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: synapse-sdk
3
- Version: 1.0.0b14
3
+ Version: 1.0.0b16
4
4
  Summary: synapse sdk
5
5
  Author-email: datamaker <developer@datamaker.io>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  locale/en/LC_MESSAGES/messages.mo,sha256=fLIB0Lcriky4LLP8Qz1Ayq5Mr-spHNGrvEqzV2huweM,785
2
2
  locale/en/LC_MESSAGES/messages.po,sha256=YTtF-BPxoTfyw12m16zmcDb-wTv6DF8z2D5L_9VewPQ,1223
3
- locale/ko/LC_MESSAGES/messages.mo,sha256=n8EFy-d0AsNVolGtc2wktuyEmxh4HNdC9kKJmdi-_qQ,387
3
+ locale/ko/LC_MESSAGES/messages.mo,sha256=7HJEJA0wKlN14xQ5VF4FCNet54tjw6mfWYj3IaBokgw,678
4
4
  locale/ko/LC_MESSAGES/messages.po,sha256=TFii_RbURDH-Du_9ZQf3wNh-2briGk1IqY33-9GKrMU,1126
5
5
  synapse_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  synapse_sdk/i18n.py,sha256=VXMR-Zm_1hTAg9iPk3YZNNq-T1Bhx1J2fEtRT6kyYbg,766
@@ -34,7 +34,7 @@ synapse_sdk/clients/agent/service.py,sha256=s7KuPK_DB1nr2VHrigttV1WyFonaGHNrPvU8
34
34
  synapse_sdk/clients/backend/__init__.py,sha256=9FzjQn0ljRhtdaoG3n38Mdgte7GFwIh4OtEmoqVg2_E,2098
35
35
  synapse_sdk/clients/backend/annotation.py,sha256=t9KnSdCJ7NvDRkAOKn4lm5hgoawbLCwIHMeo45QiIlQ,1249
36
36
  synapse_sdk/clients/backend/core.py,sha256=oXAQwIz2QFuheaG79vXcYQFSlDDU1hxn2oRc6F5jyOc,2197
37
- synapse_sdk/clients/backend/data_collection.py,sha256=oLs7t_UzCLj3obcuw8K-zBW8UTDgDvmZ0RqHVHd3aPk,7451
37
+ synapse_sdk/clients/backend/data_collection.py,sha256=C3_vWWkzdX0nfHmeTL1KDAejPkIffL9_Yz0JN7Xz1IM,7740
38
38
  synapse_sdk/clients/backend/hitl.py,sha256=1rKczQBKOeEVS0Ynu--mctbBF-zIkSz4_aklfbFY2CU,713
39
39
  synapse_sdk/clients/backend/integration.py,sha256=IdjPkllvHJ_vASHSmxsWO3CRlZz2L4eWkMOch7bHWok,2744
40
40
  synapse_sdk/clients/backend/ml.py,sha256=ynm1UQ-gJkJ-n1wU_Hjcxdhe8VC1LliNWOUJfurOrRE,1197
@@ -220,7 +220,7 @@ synapse_sdk/plugins/categories/smart_tool/templates/plugin/__init__.py,sha256=47
220
220
  synapse_sdk/plugins/categories/smart_tool/templates/plugin/auto_label.py,sha256=eevNg0nOcYFR4z_L_R-sCvVOYoLWSAH1jwDkAf3YCjY,320
221
221
  synapse_sdk/plugins/categories/upload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
222
222
  synapse_sdk/plugins/categories/upload/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
223
- synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=oQRMnIFU-vYt2Ahfodxlc7IFvygpbhPtiu7M9UAabQ4,45663
223
+ synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=hWL42Uu3E7rghO8Ayebxs4G_WcUNxn_pEXH5pJOPj3E,46969
224
224
  synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=6_dRa0_J2aS8NSUfO4MKbPxZcdPS2FpJzzp51edYAZc,281
225
225
  synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
226
226
  synapse_sdk/plugins/categories/upload/templates/plugin/upload.py,sha256=IZU4sdSMSLKPCtlNqF7DP2howTdYR6hr74HCUZsGdPk,1559
@@ -277,9 +277,9 @@ synapse_sdk/utils/storage/providers/gcp.py,sha256=i2BQCu1Kej1If9SuNr2_lEyTcr5M_n
277
277
  synapse_sdk/utils/storage/providers/http.py,sha256=2DhIulND47JOnS5ZY7MZUex7Su3peAPksGo1Wwg07L4,5828
278
278
  synapse_sdk/utils/storage/providers/s3.py,sha256=ZmqekAvIgcQBdRU-QVJYv1Rlp6VHfXwtbtjTSphua94,2573
279
279
  synapse_sdk/utils/storage/providers/sftp.py,sha256=_8s9hf0JXIO21gvm-JVS00FbLsbtvly4c-ETLRax68A,1426
280
- synapse_sdk-1.0.0b14.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
281
- synapse_sdk-1.0.0b14.dist-info/METADATA,sha256=e4ZLvnHm57yBcrvRt6lcwipZt4nHqbntsg1v17ezbrk,3745
282
- synapse_sdk-1.0.0b14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
283
- synapse_sdk-1.0.0b14.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
284
- synapse_sdk-1.0.0b14.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
285
- synapse_sdk-1.0.0b14.dist-info/RECORD,,
280
+ synapse_sdk-1.0.0b16.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
281
+ synapse_sdk-1.0.0b16.dist-info/METADATA,sha256=FsjML_c4mqFuK23T5yo6xO4IYFFzQAnoFz4G2Mg8eIg,3745
282
+ synapse_sdk-1.0.0b16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
283
+ synapse_sdk-1.0.0b16.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
284
+ synapse_sdk-1.0.0b16.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
285
+ synapse_sdk-1.0.0b16.dist-info/RECORD,,