PyPI - synapse-sdk - Versions diffs - 1.0.0b21__py3-none-any.whl → 1.0.0b23__py3-none-any.whl - Mend

synapse-sdk 1.0.0b21py3-none-any.whl → 1.0.0b23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synapse-sdk might be problematic. Click here for more details.

Files changed (20) hide show

synapse_sdk/plugins/categories/upload/actions/upload/enums.py ADDED Viewed

@@ -0,0 +1,221 @@
+from enum import Enum
+from synapse_sdk.shared.enums import Context
+class UploadStatus(str, Enum):
+    """Upload processing status enumeration.
+    Defines the possible states for upload operations, data files, and data units
+    throughout the upload process.
+    Attributes:
+        SUCCESS: Upload completed successfully
+        FAILED: Upload failed with errors
+    """
+    SUCCESS = 'success'
+    FAILED = 'failed'
+class LogCode(str, Enum):
+    """Type-safe logging codes for upload operations.
+    Enumeration of all possible log events during upload processing. Each code
+    corresponds to a specific event or error state with predefined message
+    templates and log levels.
+    The codes are organized by category:
+    - Validation codes (VALIDATION_FAILED, STORAGE_VALIDATION_FAILED, etc.)
+    - File processing codes (NO_FILES_FOUND, FILES_DISCOVERED, etc.)
+    - Excel processing codes (EXCEL_SECURITY_VIOLATION, EXCEL_PARSING_ERROR, etc.)
+    - Progress tracking codes (UPLOADING_DATA_FILES, GENERATING_DATA_UNITS, etc.)
+    Each code maps to a configuration in LOG_MESSAGES with message template
+    and appropriate log level.
+    """
+    STORAGE_VALIDATION_FAILED = 'STORAGE_VALIDATION_FAILED'
+    COLLECTION_VALIDATION_FAILED = 'COLLECTION_VALIDATION_FAILED'
+    PROJECT_VALIDATION_FAILED = 'PROJECT_VALIDATION_FAILED'
+    VALIDATION_FAILED = 'VALIDATION_FAILED'
+    NO_FILES_FOUND = 'NO_FILES_FOUND'
+    NO_FILES_UPLOADED = 'NO_FILES_UPLOADED'
+    NO_DATA_UNITS_GENERATED = 'NO_DATA_UNITS_GENERATED'
+    NO_TYPE_DIRECTORIES = 'NO_TYPE_DIRECTORIES'
+    EXCEL_SECURITY_VIOLATION = 'EXCEL_SECURITY_VIOLATION'
+    EXCEL_PARSING_ERROR = 'EXCEL_PARSING_ERROR'
+    EXCEL_METADATA_LOADED = 'EXCEL_METADATA_LOADED'
+    UPLOADING_DATA_FILES = 'UPLOADING_DATA_FILES'
+    GENERATING_DATA_UNITS = 'GENERATING_DATA_UNITS'
+    IMPORT_COMPLETED = 'IMPORT_COMPLETED'
+    TYPE_DIRECTORIES_FOUND = 'TYPE_DIRECTORIES_FOUND'
+    TYPE_STRUCTURE_DETECTED = 'TYPE_STRUCTURE_DETECTED'
+    FILES_DISCOVERED = 'FILES_DISCOVERED'
+    NO_FILES_FOUND_WARNING = 'NO_FILES_FOUND_WARNING'
+    FILE_UPLOAD_FAILED = 'FILE_UPLOAD_FAILED'
+    DATA_UNIT_BATCH_FAILED = 'DATA_UNIT_BATCH_FAILED'
+    FILENAME_TOO_LONG = 'FILENAME_TOO_LONG'
+    MISSING_REQUIRED_FILES = 'MISSING_REQUIRED_FILES'
+    EXCEL_FILE_NOT_FOUND = 'EXCEL_FILE_NOT_FOUND'
+    EXCEL_FILE_VALIDATION_STARTED = 'EXCEL_FILE_VALIDATION_STARTED'
+    EXCEL_WORKBOOK_LOADED = 'EXCEL_WORKBOOK_LOADED'
+    FILE_ORGANIZATION_STARTED = 'FILE_ORGANIZATION_STARTED'
+    BATCH_PROCESSING_STARTED = 'BATCH_PROCESSING_STARTED'
+    EXCEL_SECURITY_VALIDATION_STARTED = 'EXCEL_SECURITY_VALIDATION_STARTED'
+    EXCEL_MEMORY_ESTIMATION = 'EXCEL_MEMORY_ESTIMATION'
+    EXCEL_FILE_NOT_FOUND_PATH = 'EXCEL_FILE_NOT_FOUND_PATH'
+    EXCEL_SECURITY_VALIDATION_FAILED = 'EXCEL_SECURITY_VALIDATION_FAILED'
+    EXCEL_PARSING_FAILED = 'EXCEL_PARSING_FAILED'
+    EXCEL_INVALID_FILE_FORMAT = 'EXCEL_INVALID_FILE_FORMAT'
+    EXCEL_FILE_TOO_LARGE = 'EXCEL_FILE_TOO_LARGE'
+    EXCEL_FILE_ACCESS_ERROR = 'EXCEL_FILE_ACCESS_ERROR'
+    EXCEL_UNEXPECTED_ERROR = 'EXCEL_UNEXPECTED_ERROR'
+LOG_MESSAGES = {
+    LogCode.STORAGE_VALIDATION_FAILED: {
+        'message': 'Storage validation failed.',
+        'level': Context.DANGER,
+    },
+    LogCode.COLLECTION_VALIDATION_FAILED: {
+        'message': 'Collection validation failed.',
+        'level': Context.DANGER,
+    },
+    LogCode.PROJECT_VALIDATION_FAILED: {
+        'message': 'Project validation failed.',
+        'level': Context.DANGER,
+    },
+    LogCode.VALIDATION_FAILED: {
+        'message': 'Validation failed.',
+        'level': Context.DANGER,
+    },
+    LogCode.NO_FILES_FOUND: {
+        'message': 'Files not found on the path.',
+        'level': Context.WARNING,
+    },
+    LogCode.NO_FILES_UPLOADED: {
+        'message': 'No files were uploaded.',
+        'level': Context.WARNING,
+    },
+    LogCode.NO_DATA_UNITS_GENERATED: {
+        'message': 'No data units were generated.',
+        'level': Context.WARNING,
+    },
+    LogCode.NO_TYPE_DIRECTORIES: {
+        'message': 'No type-based directory structure found.',
+        'level': Context.INFO,
+    },
+    LogCode.EXCEL_SECURITY_VIOLATION: {
+        'message': 'Excel security validation failed: {}',
+        'level': Context.DANGER,
+    },
+    LogCode.EXCEL_PARSING_ERROR: {
+        'message': 'Excel parsing failed: {}',
+        'level': Context.DANGER,
+    },
+    LogCode.EXCEL_METADATA_LOADED: {
+        'message': 'Excel metadata loaded for {} files',
+        'level': None,
+    },
+    LogCode.UPLOADING_DATA_FILES: {
+        'message': 'Uploading data files...',
+        'level': None,
+    },
+    LogCode.GENERATING_DATA_UNITS: {
+        'message': 'Generating data units...',
+        'level': None,
+    },
+    LogCode.IMPORT_COMPLETED: {
+        'message': 'Import completed.',
+        'level': None,
+    },
+    LogCode.TYPE_DIRECTORIES_FOUND: {
+        'message': 'Found type directories: {}',
+        'level': None,
+    },
+    LogCode.TYPE_STRUCTURE_DETECTED: {
+        'message': 'Detected type-based directory structure',
+        'level': None,
+    },
+    LogCode.FILES_DISCOVERED: {
+        'message': 'Discovered {} files',
+        'level': None,
+    },
+    LogCode.NO_FILES_FOUND_WARNING: {
+        'message': 'No files found.',
+        'level': Context.WARNING,
+    },
+    LogCode.FILE_UPLOAD_FAILED: {
+        'message': 'Failed to upload file: {}',
+        'level': Context.DANGER,
+    },
+    LogCode.DATA_UNIT_BATCH_FAILED: {
+        'message': 'Failed to create data units batch: {}',
+        'level': Context.DANGER,
+    },
+    LogCode.FILENAME_TOO_LONG: {
+        'message': 'Skipping file with overly long name: {}...',
+        'level': Context.WARNING,
+    },
+    LogCode.MISSING_REQUIRED_FILES: {
+        'message': '{} missing required files: {}',
+        'level': Context.WARNING,
+    },
+    LogCode.EXCEL_FILE_NOT_FOUND: {
+        'message': 'Excel metadata file not found: {}',
+        'level': Context.WARNING,
+    },
+    LogCode.EXCEL_FILE_VALIDATION_STARTED: {
+        'message': 'Excel file validation started',
+        'level': Context.INFO,
+    },
+    LogCode.EXCEL_WORKBOOK_LOADED: {
+        'message': 'Excel workbook loaded successfully',
+        'level': Context.INFO,
+    },
+    LogCode.FILE_ORGANIZATION_STARTED: {
+        'message': 'File organization started',
+        'level': Context.INFO,
+    },
+    LogCode.BATCH_PROCESSING_STARTED: {
+        'message': 'Batch processing started: {} batches of {} items each',
+        'level': Context.INFO,
+    },
+    LogCode.EXCEL_SECURITY_VALIDATION_STARTED: {
+        'message': 'Excel security validation started for file size: {} bytes',
+        'level': Context.INFO,
+    },
+    LogCode.EXCEL_MEMORY_ESTIMATION: {
+        'message': 'Excel memory estimation: {} bytes (file) * 3 = {} bytes (estimated)',
+        'level': Context.INFO,
+    },
+    LogCode.EXCEL_FILE_NOT_FOUND_PATH: {
+        'message': 'Excel metadata file not found',
+        'level': Context.WARNING,
+    },
+    LogCode.EXCEL_SECURITY_VALIDATION_FAILED: {
+        'message': 'Excel security validation failed: {}',
+        'level': Context.DANGER,
+    },
+    LogCode.EXCEL_PARSING_FAILED: {
+        'message': 'Excel parsing failed: {}',
+        'level': Context.DANGER,
+    },
+    LogCode.EXCEL_INVALID_FILE_FORMAT: {
+        'message': 'Invalid Excel file format: {}',
+        'level': Context.DANGER,
+    },
+    LogCode.EXCEL_FILE_TOO_LARGE: {
+        'message': 'Excel file too large to process (memory limit exceeded)',
+        'level': Context.DANGER,
+    },
+    LogCode.EXCEL_FILE_ACCESS_ERROR: {
+        'message': 'File access error reading excel metadata: {}',
+        'level': Context.DANGER,
+    },
+    LogCode.EXCEL_UNEXPECTED_ERROR: {
+        'message': 'Unexpected error reading excel metadata: {}',
+        'level': Context.DANGER,
+    },
+}

synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py ADDED Viewed

@@ -0,0 +1,36 @@
+class ExcelSecurityError(Exception):
+    """Exception raised when Excel file security validation fails.
+    This exception is raised when an Excel file violates security constraints
+    such as file size limits, memory usage limits, or contains potentially
+    dangerous content.
+    Used during Excel metadata processing to enforce security policies
+    and prevent processing of files that could pose security risks.
+    Example:
+        >>> if file_size > max_size:
+        ...     raise ExcelSecurityError(f"File size {file_size} exceeds limit {max_size}")
+    """
+    pass
+class ExcelParsingError(Exception):
+    """Exception raised when Excel file parsing encounters errors.
+    This exception is raised when an Excel file cannot be parsed due to
+    format issues, corruption, or other parsing-related problems that
+    prevent successful metadata extraction.
+    Used during Excel metadata loading to distinguish parsing errors
+    from security violations or other types of errors.
+    Example:
+        >>> try:
+        ...     workbook = load_workbook(excel_file)
+        ... except InvalidFileException as e:
+        ...     raise ExcelParsingError(f"Failed to parse Excel file: {e}")
+    """
+    pass

synapse_sdk/plugins/categories/upload/actions/upload/models.py ADDED Viewed

@@ -0,0 +1,149 @@
+from pathlib import Path
+from typing import Annotated
+from pydantic import AfterValidator, BaseModel, ValidationInfo, field_validator
+from pydantic_core import PydanticCustomError
+from synapse_sdk.clients.exceptions import ClientError
+from synapse_sdk.utils.pydantic.validators import non_blank
+from .utils import ExcelSecurityConfig
+class UploadParams(BaseModel):
+    """Upload action parameter validation model.
+    Defines and validates all parameters required for upload operations.
+    Uses Pydantic for type validation and custom validators to ensure
+    storage, data_collection, and project resources exist before processing.
+    Attributes:
+        name (str): Human-readable name for the upload operation
+        description (str | None): Optional description of the upload
+        path (str): File system path to upload from
+        storage (int): Storage ID where files will be uploaded
+        data_collection (int): Data data_collection ID for organizing uploads
+        project (int | None): Optional project ID for grouping
+        excel_metadata_path (str | None): Path to Excel metadata file
+        is_recursive (bool): Whether to recursively process subdirectories
+        max_file_size_mb (int): Maximum file size limit in megabytes
+        creating_data_unit_batch_size (int): Batch size for data unit creation
+        use_async_upload (bool): Whether to use asynchronous upload processing
+        extra_params (dict | None): Extra parameters for the action.
+            Example: {"include_metadata": True, "compression": "gzip"}
+    Validation:
+        - name: Must be non-blank after validation
+        - storage: Must exist and be accessible via client API
+        - data_collection: Must exist and be accessible via client API
+        - project: Must exist if specified, or can be None
+        - excel_metadata_path: Must be valid Excel file if specified
+    Example:
+        >>> params = UploadParams(
+        ...     name="Data Upload",
+        ...     path="/data/files",
+        ...     storage=1,
+        ...     data_collection=5
+        ... )
+    """
+    name: Annotated[str, AfterValidator(non_blank)]
+    description: str | None = None
+    path: str
+    storage: int
+    data_collection: int
+    project: int | None = None
+    excel_metadata_path: str | None = None
+    is_recursive: bool = True
+    max_file_size_mb: int = 50
+    creating_data_unit_batch_size: int = 1
+    use_async_upload: bool = True
+    extra_params: dict | None = None
+    @field_validator('storage', mode='before')
+    @classmethod
+    def check_storage_exists(cls, value, info: ValidationInfo) -> int:
+        if info.context is None:
+            raise PydanticCustomError('missing_context', 'Validation context is required.')
+        action = info.context['action']
+        client = action.client
+        try:
+            client.get_storage(value)
+        except ClientError:
+            raise PydanticCustomError('client_error', 'Error occurred while checking storage exists.')
+        return value
+    @field_validator('data_collection', mode='before')
+    @classmethod
+    def check_data_collection_exists(cls, value, info: ValidationInfo) -> int:
+        if info.context is None:
+            raise PydanticCustomError('missing_context', 'Validation context is required.')
+        action = info.context['action']
+        client = action.client
+        try:
+            client.get_data_collection(value)
+        except ClientError:
+            raise PydanticCustomError('client_error', 'Error occurred while checking data_collection exists.')
+        return value
+    @field_validator('project', mode='before')
+    @classmethod
+    def check_project_exists(cls, value, info: ValidationInfo) -> int | None:
+        if not value:
+            return value
+        if info.context is None:
+            raise PydanticCustomError('missing_context', 'Validation context is required.')
+        action = info.context['action']
+        client = action.client
+        try:
+            client.get_project(value)
+        except ClientError:
+            raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
+        return value
+    @field_validator('excel_metadata_path', mode='before')
+    @classmethod
+    def check_excel_metadata_path(cls, value, info: ValidationInfo) -> str | None:
+        if not value:
+            return value
+        excel_path = Path(value)
+        if not excel_path.exists():
+            raise PydanticCustomError('file_not_found', 'Excel metadata file not found.')
+        if excel_path.suffix.lower() not in ['.xlsx', '.xls']:
+            raise PydanticCustomError('invalid_file_type', 'Excel metadata file must be .xlsx or .xls format.')
+        file_size = excel_path.stat().st_size
+        excel_config = ExcelSecurityConfig()
+        if file_size > excel_config.MAX_FILE_SIZE_BYTES:
+            max_size_mb = excel_config.MAX_FILE_SIZE_MB
+            raise PydanticCustomError(
+                'file_too_large',
+                'Excel metadata file is too large. Maximum size is {max_size_mb}MB.',
+                {'max_size_mb': max_size_mb},
+            )
+        try:
+            with open(excel_path, 'rb') as f:
+                header = f.read(8)
+                if not header:
+                    raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be empty.')
+                if excel_path.suffix.lower() == '.xlsx':
+                    if not header.startswith(b'PK'):
+                        raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
+                elif excel_path.suffix.lower() == '.xls':
+                    if not (header.startswith(b'\xd0\xcf\x11\xe0') or header.startswith(b'\x09\x08')):
+                        raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
+        except (OSError, IOError):
+            raise PydanticCustomError('file_access_error', 'Cannot access Excel metadata file.')
+        return value

synapse_sdk/plugins/categories/upload/actions/upload/run.py ADDED Viewed

@@ -0,0 +1,178 @@
+import json
+from datetime import datetime
+from typing import Optional
+from pydantic import BaseModel
+from synapse_sdk.plugins.models import Run
+from synapse_sdk.shared.enums import Context
+from .enums import LOG_MESSAGES, LogCode, UploadStatus
+from .utils import PathAwareJSONEncoder
+class UploadRun(Run):
+    """Upload-specific run management class.
+    Extends the base Run class with upload-specific logging capabilities
+    and event tracking. Provides type-safe logging using LogCode enums
+    and specialized methods for tracking upload progress.
+    Manages logging for upload events, data files, data units, and tasks
+    throughout the upload lifecycle. Each log entry includes status,
+    timestamps, and relevant metadata.
+    Attributes:
+        Inherits all attributes from base Run class plus upload-specific
+        logging methods and nested model classes for structured logging.
+    Example:
+        >>> run = UploadRun(job_id, context)
+        >>> run.log_message_with_code(LogCode.UPLOAD_STARTED)
+        >>> run.log_upload_event(LogCode.FILES_DISCOVERED, file_count)
+    """
+    class UploadEventLog(BaseModel):
+        """Model for upload event log entries.
+        Records significant events during upload processing with
+        status information and timestamps.
+        Attributes:
+            info (str | None): Optional additional information
+            status (Context): Event status/severity level
+            created (str): Timestamp when event occurred
+        """
+        info: Optional[str] = None
+        status: Context
+        created: str
+    class DataFileLog(BaseModel):
+        """Model for data file processing log entries.
+        Tracks the processing status of individual data files
+        during upload operations.
+        Attributes:
+            data_file_info (str | None): Information about the data file
+            status (UploadStatus): Processing status (SUCCESS/FAILED)
+            created (str): Timestamp when log entry was created
+        """
+        data_file_info: str | None
+        status: UploadStatus
+        created: str
+    class DataUnitLog(BaseModel):
+        """Model for data unit creation log entries.
+        Records the creation status of data units generated from
+        uploaded files, including metadata and identifiers.
+        Attributes:
+            data_unit_id (int | None): ID of created data unit
+            status (UploadStatus): Creation status (SUCCESS/FAILED)
+            created (str): Timestamp when log entry was created
+            data_unit_meta (dict | None): Metadata associated with data unit
+        """
+        data_unit_id: int | None
+        status: UploadStatus
+        created: str
+        data_unit_meta: dict | None
+    class TaskLog(BaseModel):
+        """Model for task execution log entries.
+        Tracks the execution status of background tasks related
+        to upload processing.
+        Attributes:
+            task_id (int | None): ID of the executed task
+            status (UploadStatus): Task execution status (SUCCESS/FAILED)
+            created (str): Timestamp when log entry was created
+        """
+        task_id: int | None
+        status: UploadStatus
+        created: str
+    class MetricsRecord(BaseModel):
+        """Model for upload metrics tracking.
+        Records count-based metrics for monitoring upload
+        progress and success rates.
+        Attributes:
+            stand_by (int): Number of items waiting to be processed
+            failed (int): Number of items that failed processing
+            success (int): Number of items successfully processed
+        """
+        stand_by: int
+        failed: int
+        success: int
+    def log_message_with_code(self, code: LogCode, *args, level: Optional[Context] = None):
+        if code not in LOG_MESSAGES:
+            self.log_message(f'Unknown log code: {code}')
+            return
+        log_config = LOG_MESSAGES[code]
+        message = log_config['message'].format(*args) if args else log_config['message']
+        log_level = level or log_config['level'] or Context.INFO
+        # Always call log_message for basic logging
+        if log_level:
+            self.log_message(message, context=log_level.value)
+        else:
+            self.log_message(message)
+    def log_upload_event(self, code: LogCode, *args, level: Optional[Context] = None):
+        # Call log_message_with_code to handle the basic logging
+        self.log_message_with_code(code, *args, level=level)
+        # Also log the event for upload-specific tracking
+        if code not in LOG_MESSAGES:
+            now = datetime.now().isoformat()
+            self.log(
+                'upload_event',
+                self.UploadEventLog(info=f'Unknown log code: {code}', status=Context.DANGER, created=now).model_dump(),
+            )
+            return
+        log_config = LOG_MESSAGES[code]
+        message = log_config['message'].format(*args) if args else log_config['message']
+        log_level = level or log_config['level'] or Context.INFO
+        now = datetime.now().isoformat()
+        self.log(
+            'upload_event',
+            self.UploadEventLog(info=message, status=log_level, created=now).model_dump(),
+        )
+    def log_data_file(self, data_file_info: dict, status: UploadStatus):
+        now = datetime.now().isoformat()
+        data_file_info_str = json.dumps(data_file_info, ensure_ascii=False, cls=PathAwareJSONEncoder)
+        self.log(
+            'upload_data_file',
+            self.DataFileLog(data_file_info=data_file_info_str, status=status, created=now).model_dump(),
+        )
+    def log_data_unit(self, data_unit_id: int, status: UploadStatus, data_unit_meta: dict | None = None):
+        now = datetime.now().isoformat()
+        self.log(
+            'upload_data_unit',
+            self.DataUnitLog(
+                data_unit_id=data_unit_id, status=status, created=now, data_unit_meta=data_unit_meta
+            ).model_dump(),
+        )
+    def log_task(self, task_id: int, status: UploadStatus):
+        now = datetime.now().isoformat()
+        self.log('upload_task', self.TaskLog(task_id=task_id, status=status, created=now).model_dump())
+    def log_metrics(self, record: MetricsRecord, category: str):
+        record = self.MetricsRecord.model_validate(record)
+        self.set_metrics(value=record.model_dump(), category=category)

synapse-sdk 1.0.0b21__py3-none-any.whl → 1.0.0b23__py3-none-any.whl

Potentially problematic release.

synapse-sdk 1.0.0b21py3-none-any.whl → 1.0.0b23py3-none-any.whl