synapse-sdk 1.0.0a93__py3-none-any.whl → 1.0.0a95__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/plugins/categories/upload/actions/upload.py +82 -67
- {synapse_sdk-1.0.0a93.dist-info → synapse_sdk-1.0.0a95.dist-info}/METADATA +1 -1
- {synapse_sdk-1.0.0a93.dist-info → synapse_sdk-1.0.0a95.dist-info}/RECORD +7 -7
- {synapse_sdk-1.0.0a93.dist-info → synapse_sdk-1.0.0a95.dist-info}/WHEEL +0 -0
- {synapse_sdk-1.0.0a93.dist-info → synapse_sdk-1.0.0a95.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0a93.dist-info → synapse_sdk-1.0.0a95.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-1.0.0a93.dist-info → synapse_sdk-1.0.0a95.dist-info}/top_level.txt +0 -0
|
@@ -192,8 +192,7 @@ class UploadParams(BaseModel):
|
|
|
192
192
|
storage (int): The storage of the action.
|
|
193
193
|
collection (int): The collection of the action.
|
|
194
194
|
project (int | None): The project of the action.
|
|
195
|
-
|
|
196
|
-
excel_metadata_path (str | None): Path to excel file containing metadata.
|
|
195
|
+
excel_metadata_path (str | None): Path to excel file containing metadata. Defaults to 'meta.xlsx' or 'meta.xls' in the path directory.
|
|
197
196
|
"""
|
|
198
197
|
|
|
199
198
|
name: Annotated[str, AfterValidator(non_blank)]
|
|
@@ -202,7 +201,6 @@ class UploadParams(BaseModel):
|
|
|
202
201
|
storage: int
|
|
203
202
|
collection: int
|
|
204
203
|
project: int | None
|
|
205
|
-
use_excel_metadata: bool = False
|
|
206
204
|
excel_metadata_path: str | None = None
|
|
207
205
|
|
|
208
206
|
@field_validator('storage', mode='before')
|
|
@@ -251,7 +249,7 @@ class UploadParams(BaseModel):
|
|
|
251
249
|
@field_validator('excel_metadata_path', mode='before')
|
|
252
250
|
@classmethod
|
|
253
251
|
def check_excel_metadata_path(cls, value: str, info) -> str:
|
|
254
|
-
"""Validate excel metadata file exists and is secure if
|
|
252
|
+
"""Validate excel metadata file exists and is secure if provided.
|
|
255
253
|
|
|
256
254
|
This validator performs comprehensive security checks including:
|
|
257
255
|
- File existence and format validation
|
|
@@ -271,48 +269,45 @@ class UploadParams(BaseModel):
|
|
|
271
269
|
if not value:
|
|
272
270
|
return value
|
|
273
271
|
|
|
274
|
-
|
|
275
|
-
data = info.data
|
|
276
|
-
if data.get('use_excel_metadata', False):
|
|
277
|
-
excel_path = Path(value)
|
|
272
|
+
excel_path = Path(value)
|
|
278
273
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
# Check file extension
|
|
284
|
-
if excel_path.suffix.lower() not in ['.xlsx', '.xls']:
|
|
285
|
-
raise PydanticCustomError('invalid_file_type', _('Excel metadata file must be .xlsx or .xls format.'))
|
|
286
|
-
|
|
287
|
-
# Security check: file size limit
|
|
288
|
-
file_size = excel_path.stat().st_size
|
|
289
|
-
excel_config = ExcelSecurityConfig()
|
|
290
|
-
if file_size > excel_config.MAX_FILE_SIZE_BYTES:
|
|
291
|
-
raise PydanticCustomError(
|
|
292
|
-
'file_too_large',
|
|
293
|
-
_('Excel metadata file is too large. Maximum size is {}MB.').format(excel_config.MAX_FILE_SIZE_MB),
|
|
294
|
-
)
|
|
274
|
+
# Check file existence
|
|
275
|
+
if not excel_path.exists():
|
|
276
|
+
raise PydanticCustomError('file_not_found', _('Excel metadata file not found.'))
|
|
295
277
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
278
|
+
# Check file extension
|
|
279
|
+
if excel_path.suffix.lower() not in ['.xlsx', '.xls']:
|
|
280
|
+
raise PydanticCustomError('invalid_file_type', _('Excel metadata file must be .xlsx or .xls format.'))
|
|
281
|
+
|
|
282
|
+
# Security check: file size limit
|
|
283
|
+
file_size = excel_path.stat().st_size
|
|
284
|
+
excel_config = ExcelSecurityConfig()
|
|
285
|
+
if file_size > excel_config.MAX_FILE_SIZE_BYTES:
|
|
286
|
+
raise PydanticCustomError(
|
|
287
|
+
'file_too_large',
|
|
288
|
+
_('Excel metadata file is too large. Maximum size is {}MB.').format(excel_config.MAX_FILE_SIZE_MB),
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
# Basic security check: ensure file is readable and not corrupted
|
|
292
|
+
try:
|
|
293
|
+
with open(excel_path, 'rb') as f:
|
|
294
|
+
# Read first few bytes to check if it's a valid Excel file
|
|
295
|
+
header = f.read(8)
|
|
296
|
+
if not header:
|
|
297
|
+
raise PydanticCustomError('invalid_file', _('Excel metadata file appears to be empty.'))
|
|
298
|
+
|
|
299
|
+
# Check for valid Excel file signatures
|
|
300
|
+
if excel_path.suffix.lower() == '.xlsx':
|
|
301
|
+
# XLSX files start with PK (ZIP signature)
|
|
302
|
+
if not header.startswith(b'PK'):
|
|
303
|
+
raise PydanticCustomError('invalid_file', _('Excel metadata file appears to be corrupted.'))
|
|
304
|
+
elif excel_path.suffix.lower() == '.xls':
|
|
305
|
+
# XLS files have specific OLE signatures
|
|
306
|
+
if not (header.startswith(b'\xd0\xcf\x11\xe0') or header.startswith(b'\x09\x08')):
|
|
307
|
+
raise PydanticCustomError('invalid_file', _('Excel metadata file appears to be corrupted.'))
|
|
308
|
+
|
|
309
|
+
except (OSError, IOError):
|
|
310
|
+
raise PydanticCustomError('file_access_error', _('Cannot access Excel metadata file.'))
|
|
316
311
|
|
|
317
312
|
return value
|
|
318
313
|
|
|
@@ -354,12 +349,12 @@ class UploadAction(Action):
|
|
|
354
349
|
},
|
|
355
350
|
}
|
|
356
351
|
metrics_categories = {
|
|
357
|
-
'
|
|
352
|
+
'data_files': {
|
|
358
353
|
'stand_by': 0,
|
|
359
354
|
'failed': 0,
|
|
360
355
|
'success': 0,
|
|
361
356
|
},
|
|
362
|
-
'
|
|
357
|
+
'data_units': {
|
|
363
358
|
'stand_by': 0,
|
|
364
359
|
'failed': 0,
|
|
365
360
|
'success': 0,
|
|
@@ -543,6 +538,24 @@ class UploadAction(Action):
|
|
|
543
538
|
if row_count > self.excel_config.MAX_ROWS:
|
|
544
539
|
raise ExcelParsingError(f'Too many rows: {row_count} (max: {self.excel_config.MAX_ROWS})')
|
|
545
540
|
|
|
541
|
+
def _find_excel_metadata_file(self, pathlib_cwd: Path) -> Optional[Path]:
|
|
542
|
+
"""Find Excel metadata file in the directory.
|
|
543
|
+
|
|
544
|
+
Checks for meta.xlsx and meta.xls in the given directory.
|
|
545
|
+
|
|
546
|
+
Args:
|
|
547
|
+
pathlib_cwd (Path): The pathlib object representing the current working directory.
|
|
548
|
+
|
|
549
|
+
Returns:
|
|
550
|
+
Optional[Path]: Path to the Excel metadata file if found, None otherwise.
|
|
551
|
+
"""
|
|
552
|
+
# Check for xlsx first, then xls
|
|
553
|
+
for extension in ['.xlsx', '.xls']:
|
|
554
|
+
excel_path = pathlib_cwd / f'meta{extension}'
|
|
555
|
+
if excel_path.exists() and excel_path.is_file():
|
|
556
|
+
return excel_path
|
|
557
|
+
return None
|
|
558
|
+
|
|
546
559
|
def _read_excel_metadata(self, pathlib_cwd: Path) -> Dict[str, Dict[str, Any]]:
|
|
547
560
|
"""Read metadata from excel file with comprehensive security validation.
|
|
548
561
|
|
|
@@ -560,13 +573,20 @@ class UploadAction(Action):
|
|
|
560
573
|
ExcelSecurityError: If security validation fails
|
|
561
574
|
ExcelParsingError: If Excel content is invalid or exceeds limits
|
|
562
575
|
"""
|
|
563
|
-
|
|
564
|
-
return {}
|
|
576
|
+
excel_path = None
|
|
565
577
|
|
|
566
|
-
|
|
567
|
-
if
|
|
568
|
-
|
|
569
|
-
|
|
578
|
+
# Check if user provided a specific excel_metadata_path
|
|
579
|
+
if self.params.get('excel_metadata_path'):
|
|
580
|
+
excel_path = pathlib_cwd / self.params['excel_metadata_path']
|
|
581
|
+
if not excel_path.exists():
|
|
582
|
+
self.run.log_message(f'Excel metadata file not found: {excel_path}', context=Context.WARNING.value)
|
|
583
|
+
return {}
|
|
584
|
+
else:
|
|
585
|
+
# Look for default meta.xlsx or meta.xls
|
|
586
|
+
excel_path = self._find_excel_metadata_file(pathlib_cwd)
|
|
587
|
+
if not excel_path:
|
|
588
|
+
# No Excel metadata file found, return empty dict (not an error)
|
|
589
|
+
return {}
|
|
570
590
|
|
|
571
591
|
try:
|
|
572
592
|
# Prepare Excel file with security validation
|
|
@@ -611,28 +631,26 @@ class UploadAction(Action):
|
|
|
611
631
|
storage = self.client.get_storage(self.params['storage'])
|
|
612
632
|
pathlib_cwd = get_pathlib(storage, self.params['path'])
|
|
613
633
|
|
|
614
|
-
# Read excel metadata if configured
|
|
634
|
+
# Read excel metadata if configured or default file exists
|
|
615
635
|
excel_metadata: Dict[str, Dict[str, Any]] = {}
|
|
616
636
|
try:
|
|
617
637
|
excel_metadata = self._read_excel_metadata(pathlib_cwd)
|
|
618
638
|
if excel_metadata:
|
|
619
639
|
self.run.log_message(f'Excel metadata loaded for {len(excel_metadata)} files')
|
|
620
|
-
elif self.params.get('use_excel_metadata', False):
|
|
621
|
-
self.run.log_message('Excel metadata enabled but no entries found')
|
|
622
|
-
# Don't log anything if Excel metadata is not being used
|
|
623
640
|
except ExcelSecurityError as e:
|
|
624
641
|
# Security violations should stop the process entirely
|
|
625
642
|
self.run.log_message(f'Excel security validation failed: {str(e)}', context=Context.ERROR.value)
|
|
626
643
|
self.run.log_message('Upload aborted due to Excel security concerns.', context=Context.ERROR.value)
|
|
627
644
|
return result
|
|
628
645
|
except ExcelParsingError as e:
|
|
629
|
-
# Parsing errors can be non-critical if
|
|
630
|
-
if self.params.get('
|
|
646
|
+
# Parsing errors can be non-critical if user didn't explicitly provide Excel file
|
|
647
|
+
if self.params.get('excel_metadata_path'):
|
|
648
|
+
# User explicitly provided Excel file, treat as error
|
|
631
649
|
self.run.log_message(f'Excel parsing failed: {str(e)}', context=Context.ERROR.value)
|
|
632
650
|
self.run.log_message('Upload aborted due to Excel parsing failure.', context=Context.ERROR.value)
|
|
633
651
|
return result
|
|
634
652
|
else:
|
|
635
|
-
#
|
|
653
|
+
# Default Excel file found but failed to parse, treat as warning and continue
|
|
636
654
|
self.run.log_message(f'Excel parsing failed (continuing): {str(e)}', context=Context.WARNING.value)
|
|
637
655
|
excel_metadata = {}
|
|
638
656
|
|
|
@@ -693,7 +711,6 @@ class UploadAction(Action):
|
|
|
693
711
|
|
|
694
712
|
collection = self.run.client.get_data_collection(collection_id)
|
|
695
713
|
self.run.set_progress(2, 2, category='analyze_collection')
|
|
696
|
-
self.run.log_message('Collection analysis completed.')
|
|
697
714
|
|
|
698
715
|
return collection['file_specifications']
|
|
699
716
|
|
|
@@ -716,7 +733,7 @@ class UploadAction(Action):
|
|
|
716
733
|
failed_count = 0
|
|
717
734
|
|
|
718
735
|
# Initialize metrics
|
|
719
|
-
self._update_metrics(organized_files_count, success_count, failed_count, '
|
|
736
|
+
self._update_metrics(organized_files_count, success_count, failed_count, 'data_files')
|
|
720
737
|
|
|
721
738
|
for organized_file in organized_files:
|
|
722
739
|
try:
|
|
@@ -730,12 +747,11 @@ class UploadAction(Action):
|
|
|
730
747
|
failed_count += 1
|
|
731
748
|
|
|
732
749
|
current_progress += 1
|
|
733
|
-
self._update_metrics(organized_files_count, success_count, failed_count, '
|
|
750
|
+
self._update_metrics(organized_files_count, success_count, failed_count, 'data_files')
|
|
734
751
|
self.run.set_progress(current_progress, organized_files_count, category='upload_data_files')
|
|
735
752
|
|
|
736
753
|
# Finish progress
|
|
737
754
|
self.run.set_progress(organized_files_count, organized_files_count, category='upload_data_files')
|
|
738
|
-
self.run.log_message(f'Upload data files completed. Success: {success_count}, Failed: {failed_count}')
|
|
739
755
|
|
|
740
756
|
return upload_result
|
|
741
757
|
|
|
@@ -762,7 +778,7 @@ class UploadAction(Action):
|
|
|
762
778
|
batches_count = len(batches)
|
|
763
779
|
|
|
764
780
|
# Initialize metrics
|
|
765
|
-
self._update_metrics(upload_result_count, success_count, failed_count, '
|
|
781
|
+
self._update_metrics(upload_result_count, success_count, failed_count, 'data_units')
|
|
766
782
|
|
|
767
783
|
for batch in batches:
|
|
768
784
|
try:
|
|
@@ -780,12 +796,11 @@ class UploadAction(Action):
|
|
|
780
796
|
self.run.log_data_unit(None, UploadStatus.FAILED, data_unit_meta=None)
|
|
781
797
|
|
|
782
798
|
current_progress += 1
|
|
783
|
-
self._update_metrics(upload_result_count, success_count, failed_count, '
|
|
799
|
+
self._update_metrics(upload_result_count, success_count, failed_count, 'data_units')
|
|
784
800
|
self.run.set_progress(current_progress, batches_count, category='generate_data_units')
|
|
785
801
|
|
|
786
802
|
# Finish progress
|
|
787
803
|
self.run.set_progress(upload_result_count, upload_result_count, category='generate_data_units')
|
|
788
|
-
self.run.log_message(f'Data units generation completed. Success: {success_count}, Failed: {failed_count}')
|
|
789
804
|
|
|
790
805
|
return sum(generated_data_units, [])
|
|
791
806
|
|
|
@@ -899,7 +914,7 @@ class UploadAction(Action):
|
|
|
899
914
|
else:
|
|
900
915
|
missing = [req for req in required_specs if req not in files_dict]
|
|
901
916
|
self.run.log_message(
|
|
902
|
-
f'
|
|
917
|
+
f'{file_name} missing required files: {", ".join(missing)}',
|
|
903
918
|
context=Context.WARNING.value,
|
|
904
919
|
)
|
|
905
920
|
|
|
@@ -165,7 +165,7 @@ synapse_sdk/plugins/categories/smart_tool/templates/plugin/__init__.py,sha256=47
|
|
|
165
165
|
synapse_sdk/plugins/categories/smart_tool/templates/plugin/auto_label.py,sha256=eevNg0nOcYFR4z_L_R-sCvVOYoLWSAH1jwDkAf3YCjY,320
|
|
166
166
|
synapse_sdk/plugins/categories/upload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
167
|
synapse_sdk/plugins/categories/upload/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
168
|
-
synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=
|
|
168
|
+
synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=W2GYvzzV1ZPljUezwZiFYlihq1muIJVdinnsNW7Nyw8,37673
|
|
169
169
|
synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=6_dRa0_J2aS8NSUfO4MKbPxZcdPS2FpJzzp51edYAZc,281
|
|
170
170
|
synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
171
171
|
synapse_sdk/plugins/categories/upload/templates/plugin/upload.py,sha256=IZU4sdSMSLKPCtlNqF7DP2howTdYR6hr74HCUZsGdPk,1559
|
|
@@ -221,9 +221,9 @@ synapse_sdk/utils/storage/providers/gcp.py,sha256=i2BQCu1Kej1If9SuNr2_lEyTcr5M_n
|
|
|
221
221
|
synapse_sdk/utils/storage/providers/http.py,sha256=2DhIulND47JOnS5ZY7MZUex7Su3peAPksGo1Wwg07L4,5828
|
|
222
222
|
synapse_sdk/utils/storage/providers/s3.py,sha256=ZmqekAvIgcQBdRU-QVJYv1Rlp6VHfXwtbtjTSphua94,2573
|
|
223
223
|
synapse_sdk/utils/storage/providers/sftp.py,sha256=_8s9hf0JXIO21gvm-JVS00FbLsbtvly4c-ETLRax68A,1426
|
|
224
|
-
synapse_sdk-1.0.
|
|
225
|
-
synapse_sdk-1.0.
|
|
226
|
-
synapse_sdk-1.0.
|
|
227
|
-
synapse_sdk-1.0.
|
|
228
|
-
synapse_sdk-1.0.
|
|
229
|
-
synapse_sdk-1.0.
|
|
224
|
+
synapse_sdk-1.0.0a95.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
|
|
225
|
+
synapse_sdk-1.0.0a95.dist-info/METADATA,sha256=xvnJ6ZPqDHNbb1yBCb0jmh456ZLZTJjoWjhmGGXlSgk,3837
|
|
226
|
+
synapse_sdk-1.0.0a95.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
227
|
+
synapse_sdk-1.0.0a95.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
|
|
228
|
+
synapse_sdk-1.0.0a95.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
|
|
229
|
+
synapse_sdk-1.0.0a95.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|