synapse-sdk 1.0.0a93__py3-none-any.whl → 1.0.0a95__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

@@ -192,8 +192,7 @@ class UploadParams(BaseModel):
192
192
  storage (int): The storage of the action.
193
193
  collection (int): The collection of the action.
194
194
  project (int | None): The project of the action.
195
- use_excel_metadata (bool): Whether to use excel file for additional metadata.
196
- excel_metadata_path (str | None): Path to excel file containing metadata.
195
+ excel_metadata_path (str | None): Path to excel file containing metadata. Defaults to 'meta.xlsx' or 'meta.xls' in the path directory.
197
196
  """
198
197
 
199
198
  name: Annotated[str, AfterValidator(non_blank)]
@@ -202,7 +201,6 @@ class UploadParams(BaseModel):
202
201
  storage: int
203
202
  collection: int
204
203
  project: int | None
205
- use_excel_metadata: bool = False
206
204
  excel_metadata_path: str | None = None
207
205
 
208
206
  @field_validator('storage', mode='before')
@@ -251,7 +249,7 @@ class UploadParams(BaseModel):
251
249
  @field_validator('excel_metadata_path', mode='before')
252
250
  @classmethod
253
251
  def check_excel_metadata_path(cls, value: str, info) -> str:
254
- """Validate excel metadata file exists and is secure if use_excel_metadata is True.
252
+ """Validate excel metadata file exists and is secure if provided.
255
253
 
256
254
  This validator performs comprehensive security checks including:
257
255
  - File existence and format validation
@@ -271,48 +269,45 @@ class UploadParams(BaseModel):
271
269
  if not value:
272
270
  return value
273
271
 
274
- # Check if use_excel_metadata is True
275
- data = info.data
276
- if data.get('use_excel_metadata', False):
277
- excel_path = Path(value)
272
+ excel_path = Path(value)
278
273
 
279
- # Check file existence
280
- if not excel_path.exists():
281
- raise PydanticCustomError('file_not_found', _('Excel metadata file not found.'))
282
-
283
- # Check file extension
284
- if excel_path.suffix.lower() not in ['.xlsx', '.xls']:
285
- raise PydanticCustomError('invalid_file_type', _('Excel metadata file must be .xlsx or .xls format.'))
286
-
287
- # Security check: file size limit
288
- file_size = excel_path.stat().st_size
289
- excel_config = ExcelSecurityConfig()
290
- if file_size > excel_config.MAX_FILE_SIZE_BYTES:
291
- raise PydanticCustomError(
292
- 'file_too_large',
293
- _('Excel metadata file is too large. Maximum size is {}MB.').format(excel_config.MAX_FILE_SIZE_MB),
294
- )
274
+ # Check file existence
275
+ if not excel_path.exists():
276
+ raise PydanticCustomError('file_not_found', _('Excel metadata file not found.'))
295
277
 
296
- # Basic security check: ensure file is readable and not corrupted
297
- try:
298
- with open(excel_path, 'rb') as f:
299
- # Read first few bytes to check if it's a valid Excel file
300
- header = f.read(8)
301
- if not header:
302
- raise PydanticCustomError('invalid_file', _('Excel metadata file appears to be empty.'))
303
-
304
- # Check for valid Excel file signatures
305
- if excel_path.suffix.lower() == '.xlsx':
306
- # XLSX files start with PK (ZIP signature)
307
- if not header.startswith(b'PK'):
308
- raise PydanticCustomError('invalid_file', _('Excel metadata file appears to be corrupted.'))
309
- elif excel_path.suffix.lower() == '.xls':
310
- # XLS files have specific OLE signatures
311
- if not (header.startswith(b'\xd0\xcf\x11\xe0') or header.startswith(b'\x09\x08')):
312
- raise PydanticCustomError('invalid_file', _('Excel metadata file appears to be corrupted.'))
313
-
314
- except (OSError, IOError):
315
- raise PydanticCustomError('file_access_error', _('Cannot access Excel metadata file.'))
278
+ # Check file extension
279
+ if excel_path.suffix.lower() not in ['.xlsx', '.xls']:
280
+ raise PydanticCustomError('invalid_file_type', _('Excel metadata file must be .xlsx or .xls format.'))
281
+
282
+ # Security check: file size limit
283
+ file_size = excel_path.stat().st_size
284
+ excel_config = ExcelSecurityConfig()
285
+ if file_size > excel_config.MAX_FILE_SIZE_BYTES:
286
+ raise PydanticCustomError(
287
+ 'file_too_large',
288
+ _('Excel metadata file is too large. Maximum size is {}MB.').format(excel_config.MAX_FILE_SIZE_MB),
289
+ )
290
+
291
+ # Basic security check: ensure file is readable and not corrupted
292
+ try:
293
+ with open(excel_path, 'rb') as f:
294
+ # Read first few bytes to check if it's a valid Excel file
295
+ header = f.read(8)
296
+ if not header:
297
+ raise PydanticCustomError('invalid_file', _('Excel metadata file appears to be empty.'))
298
+
299
+ # Check for valid Excel file signatures
300
+ if excel_path.suffix.lower() == '.xlsx':
301
+ # XLSX files start with PK (ZIP signature)
302
+ if not header.startswith(b'PK'):
303
+ raise PydanticCustomError('invalid_file', _('Excel metadata file appears to be corrupted.'))
304
+ elif excel_path.suffix.lower() == '.xls':
305
+ # XLS files have specific OLE signatures
306
+ if not (header.startswith(b'\xd0\xcf\x11\xe0') or header.startswith(b'\x09\x08')):
307
+ raise PydanticCustomError('invalid_file', _('Excel metadata file appears to be corrupted.'))
308
+
309
+ except (OSError, IOError):
310
+ raise PydanticCustomError('file_access_error', _('Cannot access Excel metadata file.'))
316
311
 
317
312
  return value
318
313
 
@@ -354,12 +349,12 @@ class UploadAction(Action):
354
349
  },
355
350
  }
356
351
  metrics_categories = {
357
- 'data_file': {
352
+ 'data_files': {
358
353
  'stand_by': 0,
359
354
  'failed': 0,
360
355
  'success': 0,
361
356
  },
362
- 'data_unit': {
357
+ 'data_units': {
363
358
  'stand_by': 0,
364
359
  'failed': 0,
365
360
  'success': 0,
@@ -543,6 +538,24 @@ class UploadAction(Action):
543
538
  if row_count > self.excel_config.MAX_ROWS:
544
539
  raise ExcelParsingError(f'Too many rows: {row_count} (max: {self.excel_config.MAX_ROWS})')
545
540
 
541
+ def _find_excel_metadata_file(self, pathlib_cwd: Path) -> Optional[Path]:
542
+ """Find Excel metadata file in the directory.
543
+
544
+ Checks for meta.xlsx and meta.xls in the given directory.
545
+
546
+ Args:
547
+ pathlib_cwd (Path): The pathlib object representing the current working directory.
548
+
549
+ Returns:
550
+ Optional[Path]: Path to the Excel metadata file if found, None otherwise.
551
+ """
552
+ # Check for xlsx first, then xls
553
+ for extension in ['.xlsx', '.xls']:
554
+ excel_path = pathlib_cwd / f'meta{extension}'
555
+ if excel_path.exists() and excel_path.is_file():
556
+ return excel_path
557
+ return None
558
+
546
559
  def _read_excel_metadata(self, pathlib_cwd: Path) -> Dict[str, Dict[str, Any]]:
547
560
  """Read metadata from excel file with comprehensive security validation.
548
561
 
@@ -560,13 +573,20 @@ class UploadAction(Action):
560
573
  ExcelSecurityError: If security validation fails
561
574
  ExcelParsingError: If Excel content is invalid or exceeds limits
562
575
  """
563
- if not self.params.get('use_excel_metadata', False) or not self.params.get('excel_metadata_path'):
564
- return {}
576
+ excel_path = None
565
577
 
566
- excel_path = pathlib_cwd / self.params['excel_metadata_path']
567
- if not excel_path.exists():
568
- self.run.log_message(f'Excel metadata file not found: {excel_path}', context=Context.WARNING.value)
569
- return {}
578
+ # Check if user provided a specific excel_metadata_path
579
+ if self.params.get('excel_metadata_path'):
580
+ excel_path = pathlib_cwd / self.params['excel_metadata_path']
581
+ if not excel_path.exists():
582
+ self.run.log_message(f'Excel metadata file not found: {excel_path}', context=Context.WARNING.value)
583
+ return {}
584
+ else:
585
+ # Look for default meta.xlsx or meta.xls
586
+ excel_path = self._find_excel_metadata_file(pathlib_cwd)
587
+ if not excel_path:
588
+ # No Excel metadata file found, return empty dict (not an error)
589
+ return {}
570
590
 
571
591
  try:
572
592
  # Prepare Excel file with security validation
@@ -611,28 +631,26 @@ class UploadAction(Action):
611
631
  storage = self.client.get_storage(self.params['storage'])
612
632
  pathlib_cwd = get_pathlib(storage, self.params['path'])
613
633
 
614
- # Read excel metadata if configured
634
+ # Read excel metadata if configured or default file exists
615
635
  excel_metadata: Dict[str, Dict[str, Any]] = {}
616
636
  try:
617
637
  excel_metadata = self._read_excel_metadata(pathlib_cwd)
618
638
  if excel_metadata:
619
639
  self.run.log_message(f'Excel metadata loaded for {len(excel_metadata)} files')
620
- elif self.params.get('use_excel_metadata', False):
621
- self.run.log_message('Excel metadata enabled but no entries found')
622
- # Don't log anything if Excel metadata is not being used
623
640
  except ExcelSecurityError as e:
624
641
  # Security violations should stop the process entirely
625
642
  self.run.log_message(f'Excel security validation failed: {str(e)}', context=Context.ERROR.value)
626
643
  self.run.log_message('Upload aborted due to Excel security concerns.', context=Context.ERROR.value)
627
644
  return result
628
645
  except ExcelParsingError as e:
629
- # Parsing errors can be non-critical if Excel metadata is optional
630
- if self.params.get('use_excel_metadata', False):
646
+ # Parsing errors can be non-critical if user didn't explicitly provide Excel file
647
+ if self.params.get('excel_metadata_path'):
648
+ # User explicitly provided Excel file, treat as error
631
649
  self.run.log_message(f'Excel parsing failed: {str(e)}', context=Context.ERROR.value)
632
650
  self.run.log_message('Upload aborted due to Excel parsing failure.', context=Context.ERROR.value)
633
651
  return result
634
652
  else:
635
- # If Excel metadata is not explicitly enabled, treat as warning and continue
653
+ # Default Excel file found but failed to parse, treat as warning and continue
636
654
  self.run.log_message(f'Excel parsing failed (continuing): {str(e)}', context=Context.WARNING.value)
637
655
  excel_metadata = {}
638
656
 
@@ -693,7 +711,6 @@ class UploadAction(Action):
693
711
 
694
712
  collection = self.run.client.get_data_collection(collection_id)
695
713
  self.run.set_progress(2, 2, category='analyze_collection')
696
- self.run.log_message('Collection analysis completed.')
697
714
 
698
715
  return collection['file_specifications']
699
716
 
@@ -716,7 +733,7 @@ class UploadAction(Action):
716
733
  failed_count = 0
717
734
 
718
735
  # Initialize metrics
719
- self._update_metrics(organized_files_count, success_count, failed_count, 'data_file')
736
+ self._update_metrics(organized_files_count, success_count, failed_count, 'data_files')
720
737
 
721
738
  for organized_file in organized_files:
722
739
  try:
@@ -730,12 +747,11 @@ class UploadAction(Action):
730
747
  failed_count += 1
731
748
 
732
749
  current_progress += 1
733
- self._update_metrics(organized_files_count, success_count, failed_count, 'data_file')
750
+ self._update_metrics(organized_files_count, success_count, failed_count, 'data_files')
734
751
  self.run.set_progress(current_progress, organized_files_count, category='upload_data_files')
735
752
 
736
753
  # Finish progress
737
754
  self.run.set_progress(organized_files_count, organized_files_count, category='upload_data_files')
738
- self.run.log_message(f'Upload data files completed. Success: {success_count}, Failed: {failed_count}')
739
755
 
740
756
  return upload_result
741
757
 
@@ -762,7 +778,7 @@ class UploadAction(Action):
762
778
  batches_count = len(batches)
763
779
 
764
780
  # Initialize metrics
765
- self._update_metrics(upload_result_count, success_count, failed_count, 'data_unit')
781
+ self._update_metrics(upload_result_count, success_count, failed_count, 'data_units')
766
782
 
767
783
  for batch in batches:
768
784
  try:
@@ -780,12 +796,11 @@ class UploadAction(Action):
780
796
  self.run.log_data_unit(None, UploadStatus.FAILED, data_unit_meta=None)
781
797
 
782
798
  current_progress += 1
783
- self._update_metrics(upload_result_count, success_count, failed_count, 'data_unit')
799
+ self._update_metrics(upload_result_count, success_count, failed_count, 'data_units')
784
800
  self.run.set_progress(current_progress, batches_count, category='generate_data_units')
785
801
 
786
802
  # Finish progress
787
803
  self.run.set_progress(upload_result_count, upload_result_count, category='generate_data_units')
788
- self.run.log_message(f'Data units generation completed. Success: {success_count}, Failed: {failed_count}')
789
804
 
790
805
  return sum(generated_data_units, [])
791
806
 
@@ -899,7 +914,7 @@ class UploadAction(Action):
899
914
  else:
900
915
  missing = [req for req in required_specs if req not in files_dict]
901
916
  self.run.log_message(
902
- f'Dataset ID {file_name} missing required files: {", ".join(missing)}',
917
+ f'{file_name} missing required files: {", ".join(missing)}',
903
918
  context=Context.WARNING.value,
904
919
  )
905
920
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: synapse-sdk
3
- Version: 1.0.0a93
3
+ Version: 1.0.0a95
4
4
  Summary: synapse sdk
5
5
  Author-email: datamaker <developer@datamaker.io>
6
6
  License: MIT
@@ -165,7 +165,7 @@ synapse_sdk/plugins/categories/smart_tool/templates/plugin/__init__.py,sha256=47
165
165
  synapse_sdk/plugins/categories/smart_tool/templates/plugin/auto_label.py,sha256=eevNg0nOcYFR4z_L_R-sCvVOYoLWSAH1jwDkAf3YCjY,320
166
166
  synapse_sdk/plugins/categories/upload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
167
  synapse_sdk/plugins/categories/upload/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
- synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=Nhlq1aIvnhybxKhm7KlJoTGP3NO-JsE2Ya7Dgi7HVOg,37427
168
+ synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=W2GYvzzV1ZPljUezwZiFYlihq1muIJVdinnsNW7Nyw8,37673
169
169
  synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=6_dRa0_J2aS8NSUfO4MKbPxZcdPS2FpJzzp51edYAZc,281
170
170
  synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
171
171
  synapse_sdk/plugins/categories/upload/templates/plugin/upload.py,sha256=IZU4sdSMSLKPCtlNqF7DP2howTdYR6hr74HCUZsGdPk,1559
@@ -221,9 +221,9 @@ synapse_sdk/utils/storage/providers/gcp.py,sha256=i2BQCu1Kej1If9SuNr2_lEyTcr5M_n
221
221
  synapse_sdk/utils/storage/providers/http.py,sha256=2DhIulND47JOnS5ZY7MZUex7Su3peAPksGo1Wwg07L4,5828
222
222
  synapse_sdk/utils/storage/providers/s3.py,sha256=ZmqekAvIgcQBdRU-QVJYv1Rlp6VHfXwtbtjTSphua94,2573
223
223
  synapse_sdk/utils/storage/providers/sftp.py,sha256=_8s9hf0JXIO21gvm-JVS00FbLsbtvly4c-ETLRax68A,1426
224
- synapse_sdk-1.0.0a93.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
225
- synapse_sdk-1.0.0a93.dist-info/METADATA,sha256=NQ5zn-ID4jYmBauhJUpI351Eyfos0i9pRmIqPFuOZUY,3837
226
- synapse_sdk-1.0.0a93.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
227
- synapse_sdk-1.0.0a93.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
228
- synapse_sdk-1.0.0a93.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
229
- synapse_sdk-1.0.0a93.dist-info/RECORD,,
224
+ synapse_sdk-1.0.0a95.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
225
+ synapse_sdk-1.0.0a95.dist-info/METADATA,sha256=xvnJ6ZPqDHNbb1yBCb0jmh456ZLZTJjoWjhmGGXlSgk,3837
226
+ synapse_sdk-1.0.0a95.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
227
+ synapse_sdk-1.0.0a95.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
228
+ synapse_sdk-1.0.0a95.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
229
+ synapse_sdk-1.0.0a95.dist-info/RECORD,,