PyPI - synapse-sdk - Versions diffs - 2025.9.5__py3-none-any.whl → 2025.10.6__py3-none-any.whl - Mend

synapse-sdk 2025.9.5py3-none-any.whl → 2025.10.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synapse-sdk might be problematic. Click here for more details.

Files changed (78) hide show

synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py ADDED Viewed

@@ -0,0 +1,127 @@
+"""Pre-processor management strategies for ToTask action."""
+from typing import Any, Dict
+from .base import PreProcessorStrategy, ToTaskContext
+class PreProcessorManagementStrategy(PreProcessorStrategy):
+    """Strategy for managing pre-processor lifecycle."""
+    def get_preprocessor_info(self, context: ToTaskContext, preprocessor_id: int) -> Dict[str, Any]:
+        """Get pre-processor information from the backend.
+        Args:
+            context: Shared context for the action execution
+            preprocessor_id: The pre-processor ID
+        Returns:
+            Dict with pre-processor info or error
+        """
+        try:
+            client = context.client
+            pre_processor_response = client.get_plugin_release(preprocessor_id)
+            if isinstance(pre_processor_response, str):
+                return {'success': False, 'error': 'Invalid pre-processor response received'}
+            pre_processor: Dict[str, Any] = pre_processor_response
+            config = pre_processor.get('config', {})
+            code = config.get('code')
+            version = pre_processor.get('version')
+            if not code or not version:
+                return {'success': False, 'error': 'Invalid pre-processor configuration'}
+            return {'success': True, 'code': code, 'version': version}
+        except Exception as e:
+            return {'success': False, 'error': f'Failed to get pre-processor info: {str(e)}'}
+    def ensure_preprocessor_running(self, context: ToTaskContext, preprocessor_code: str) -> Dict[str, Any]:
+        """Ensure the pre-processor is running, restart if necessary.
+        Args:
+            context: Shared context for the action execution
+            preprocessor_code: The pre-processor code
+        Returns:
+            Dict indicating success or failure
+        """
+        try:
+            client = context.client
+            # Check if pre-processor is running
+            serve_applications_response = client.list_serve_applications(params={'plugin_code': preprocessor_code})
+            if isinstance(serve_applications_response, str):
+                return {'success': False, 'error': 'Invalid serve applications response'}
+            # Handle the response properly - it should be a dict with 'results' key
+            if not isinstance(serve_applications_response, dict):
+                return {'success': False, 'error': 'Unexpected serve applications response format'}
+            serve_applications: Dict[str, Any] = serve_applications_response
+            results = serve_applications.get('results', [])
+            running_serve_apps = [app for app in results if isinstance(app, dict) and app.get('status') == 'RUNNING']
+            # If not running, restart the pre-processor
+            if not running_serve_apps:
+                restart_result = self._restart_preprocessor(context, preprocessor_code)
+                if not restart_result['success']:
+                    return restart_result
+                # Verify restart was successful
+                serve_applications_response = client.list_serve_applications(params={'plugin_code': preprocessor_code})
+                if isinstance(serve_applications_response, str):
+                    return {'success': False, 'error': 'Failed to verify pre-processor restart'}
+                serve_applications = serve_applications_response
+                results = serve_applications.get('results', [])
+                running_serve_apps = [
+                    app for app in results if isinstance(app, dict) and app.get('status') == 'RUNNING'
+                ]
+                if not running_serve_apps:
+                    return {'success': False, 'error': 'Pre-processor failed to start after restart'}
+            return {'success': True}
+        except Exception as e:
+            return {'success': False, 'error': f'Failed to ensure pre-processor running: {str(e)}'}
+    def _restart_preprocessor(self, context: ToTaskContext, preprocessor_code: str) -> Dict[str, Any]:
+        """Restart the pre-processor.
+        Args:
+            context: Shared context for the action execution
+            preprocessor_code: The pre-processor code
+        Returns:
+            Dict indicating success or failure
+        """
+        try:
+            client = context.client
+            # Start the serve application
+            inference_options = context.config.get('inference_options', {})
+            serve_application_deployment_payload = {
+                'agent': context.params.get('agent') if context.params else None,
+                'action': 'deployment',
+                'params': {
+                    'num_cpus': inference_options.get('required_cpu_count', 2),
+                    'num_gpus': inference_options.get('required_gpu_count', 1),
+                },
+                'debug': True,
+            }
+            deployment_result = client.run_plugin(
+                preprocessor_code,
+                serve_application_deployment_payload,
+            )
+            deployment_job_id = deployment_result.get('job_id')
+            if not deployment_job_id:
+                return {'success': False, 'error': 'No deployment job ID returned'}
+            return {'success': True, 'error': 'Pre-processor restarted successfully'}
+        except Exception as e:
+            return {'success': False, 'error': f'Failed to restart pre-processor: {str(e)}'}

synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py ADDED Viewed

@@ -0,0 +1,143 @@
+"""Validation strategies for ToTask action."""
+from typing import Any, Dict
+from ..enums import LogCode
+from .base import ToTaskContext, ValidationStrategy
+class ProjectValidationStrategy(ValidationStrategy):
+    """Strategy for validating project and data collection."""
+    def validate(self, context: ToTaskContext) -> Dict[str, Any]:
+        """Validate project and data collection exist and are accessible.
+        Args:
+            context: Shared context for the action execution
+        Returns:
+            Dict with 'success' boolean and optional 'error' message
+        """
+        try:
+            client = context.client
+            project_id = context.params['project']
+            # Validate project response
+            project_response = client.get_project(project_id)
+            if isinstance(project_response, str):
+                context.logger.log_message_with_code(LogCode.INVALID_PROJECT_RESPONSE)
+                return {'success': False, 'error': 'Invalid project response received'}
+            project: Dict[str, Any] = project_response
+            context.project = project
+            # Validate data collection exists
+            data_collection_id = project.get('data_collection')
+            if not data_collection_id:
+                context.logger.log_message_with_code(LogCode.NO_DATA_COLLECTION)
+                return {'success': False, 'error': 'Project does not have a data collection'}
+            # Validate data collection response
+            data_collection_response = client.get_data_collection(data_collection_id)
+            if isinstance(data_collection_response, str):
+                context.logger.log_message_with_code(LogCode.INVALID_DATA_COLLECTION_RESPONSE)
+                return {'success': False, 'error': 'Invalid data collection response received'}
+            data_collection: Dict[str, Any] = data_collection_response
+            context.data_collection = data_collection
+            return {'success': True}
+        except Exception as e:
+            error_msg = f'Project validation failed: {str(e)}'
+            context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
+            return {'success': False, 'error': error_msg}
+class TaskValidationStrategy(ValidationStrategy):
+    """Strategy for validating and discovering tasks."""
+    def validate(self, context: ToTaskContext) -> Dict[str, Any]:
+        """Discover and validate tasks for processing.
+        Args:
+            context: Shared context for the action execution
+        Returns:
+            Dict with 'success' boolean and optional 'error' message
+        """
+        try:
+            client = context.client
+            # Build task query parameters
+            task_ids_query_params = {
+                'project': context.params['project'],
+                'fields': 'id',
+            }
+            if context.params.get('task_filters'):
+                task_ids_query_params.update(context.params['task_filters'])
+            # Get tasks
+            task_ids_generator, task_ids_count = client.list_tasks(params=task_ids_query_params, list_all=True)
+            task_ids = [
+                int(item.get('id', 0)) for item in task_ids_generator if isinstance(item, dict) and item.get('id')
+            ]
+            # Validate tasks found
+            if not task_ids_count:
+                context.logger.log_message_with_code(LogCode.NO_TASKS_FOUND)
+                return {'success': False, 'error': 'No tasks found to annotate'}
+            context.task_ids = task_ids
+            return {'success': True, 'task_count': len(task_ids)}
+        except Exception as e:
+            error_msg = f'Task validation failed: {str(e)}'
+            context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
+            return {'success': False, 'error': error_msg}
+class TargetSpecificationValidationStrategy(ValidationStrategy):
+    """Strategy for validating target specification for file annotation."""
+    def validate(self, context: ToTaskContext) -> Dict[str, Any]:
+        """Validate target specification exists in file specifications.
+        Args:
+            context: Shared context for the action execution
+        Returns:
+            Dict with 'success' boolean and optional 'error' message
+        """
+        try:
+            # Only validate if using FILE annotation method
+            from ..enums import AnnotationMethod
+            if context.annotation_method != AnnotationMethod.FILE:
+                return {'success': True}
+            target_specification_name = context.params.get('target_specification_name')
+            if not target_specification_name:
+                context.logger.log_message_with_code(LogCode.TARGET_SPEC_REQUIRED)
+                return {'success': False, 'error': 'Target specification name is required for file annotation method'}
+            # Check if target specification exists in file specifications
+            if not context.data_collection:
+                return {'success': False, 'error': 'Data collection not available for validation'}
+            file_specifications = context.data_collection.get('file_specifications', [])
+            target_spec_exists = any(spec.get('name') == target_specification_name for spec in file_specifications)
+            if not target_spec_exists:
+                context.logger.log_message_with_code(LogCode.TARGET_SPEC_NOT_FOUND, target_specification_name)
+                return {
+                    'success': False,
+                    'error': f"Target specification '{target_specification_name}' not found in file specifications",
+                }
+            return {'success': True}
+        except Exception as e:
+            error_msg = f'Target specification validation failed: {str(e)}'
+            context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
+            return {'success': False, 'error': error_msg}

synapse_sdk/plugins/categories/upload/actions/upload/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from .action import UploadAction
 from .enums import LOG_MESSAGES, LogCode, UploadStatus
 from .exceptions import ExcelParsingError, ExcelSecurityError
-from .models import UploadParams
+from .models import ExcelMetadataFile, UploadParams
 from .run import UploadRun
 from .utils import ExcelSecurityConfig, PathAwareJSONEncoder
@@ -9,6 +9,7 @@ __all__ = [
     'UploadAction',
     'UploadRun',
     'UploadParams',
+    'ExcelMetadataFile',
     'UploadStatus',
     'LogCode',
     'LOG_MESSAGES',

synapse_sdk/plugins/categories/upload/actions/upload/action.py CHANGED Viewed

@@ -173,11 +173,18 @@ class UploadAction(Action):
         organized_files = context.get('organized_files', [])
         file_specification_template = context.get('file_specification_template', {})
         pathlib_cwd = context.get('pathlib_cwd')
+        use_single_path = context.get_param('use_single_path', True)
-        if not organized_files or not file_specification_template or not pathlib_cwd:
+        # Validate required data based on mode
+        if not organized_files or not file_specification_template:
             raise ActionError('Required data not available from workflow steps')
+        # In single-path mode, pathlib_cwd is required
+        if use_single_path and not pathlib_cwd:
+            raise ActionError('pathlib_cwd is required in single-path mode')
         # CRITICAL: Integrate with existing uploader mechanism
+        # In multi-path mode, pathlib_cwd may be None, but uploader should still work
         uploader = self.get_uploader(pathlib_cwd, file_specification_template, organized_files, self.params)
         organized_files = uploader.handle_upload_files()

synapse_sdk/plugins/categories/upload/actions/upload/context.py CHANGED Viewed

@@ -87,7 +87,6 @@ class UploadContext:
             'generated_data_units_count': len(self.data_units),
             'success': len(self.errors) == 0,
             'errors': self.errors,
-            'metrics': self.metrics,
         }
     def has_errors(self) -> bool:

synapse_sdk/plugins/categories/upload/actions/upload/models.py CHANGED Viewed

@@ -1,14 +1,53 @@
-from pathlib import Path
 from typing import Annotated
-from pydantic import AfterValidator, BaseModel, ValidationInfo, field_validator
+from pydantic import AfterValidator, BaseModel, ValidationInfo, field_validator, model_validator
 from pydantic_core import PydanticCustomError
 from synapse_sdk.clients.exceptions import ClientError
 from synapse_sdk.utils.pydantic.validators import non_blank
-from synapse_sdk.utils.storage import get_pathlib
-from .utils import ExcelSecurityConfig
+class ExcelMetadataFile(BaseModel):
+    """Excel metadata configuration for base64 encoded data.
+    This model is used specifically for base64-encoded Excel metadata files,
+    typically from web frontends or API integrations.
+    Attributes:
+        data: Base64 encoded content of the Excel file
+        filename: Name of the original file before base64 encoding
+    Examples:
+        Base64 mode:
+            >>> config = ExcelMetadataFile(
+            ...     data="UEsDBBQABgAI...",
+            ...     filename="metadata.xlsx"
+            ... )
+    """
+    data: str
+    filename: str
+class AssetConfig(BaseModel):
+    """Configuration for individual asset in multi-path mode.
+    Used when use_single_path=False to specify unique paths
+    and recursive settings for each file specification.
+    Attributes:
+        path (str): File system path for this specific asset
+        is_recursive (bool): Whether to recursively search subdirectories for this asset
+    Example:
+        >>> asset_config = AssetConfig(
+        ...     path="/sensors/camera/front",
+        ...     is_recursive=True
+        ... )
+    """
+    path: str
+    is_recursive: bool = True
 class UploadParams(BaseModel):
@@ -18,45 +57,93 @@ class UploadParams(BaseModel):
     Uses Pydantic for type validation and custom validators to ensure
     storage, data_collection, and project resources exist before processing.
+    Supports two modes controlled by use_single_path flag:
+    1. Single Path Mode (use_single_path=True, DEFAULT):
+       Traditional mode - all file specifications share one base path.
+       Requires: path, is_recursive
+       Ignores: assets
+    2. Multi-Path Mode (use_single_path=False):
+       Advanced mode - each file specification has its own path.
+       Requires: assets (dict with file spec names as keys)
+       Ignores: path, is_recursive
     Attributes:
         name (str): Human-readable name for the upload operation
         description (str | None): Optional description of the upload
-        path (str): File system path to upload from
+        use_single_path (bool): Mode selector (True=single path, False=multi-path)
+        path (str | None): Base path for single path mode
+        is_recursive (bool): Global recursive setting for single path mode
+        assets (dict[str, AssetConfig] | None): Per-asset configs for multi-path mode
         storage (int): Storage ID where files will be uploaded
-        data_collection (int): Data data_collection ID for organizing uploads
+        data_collection (int): Data collection ID for organizing uploads
         project (int | None): Optional project ID for grouping
-        excel_metadata_path (str | None): Path to Excel metadata file
-        is_recursive (bool): Whether to recursively process subdirectories
+        excel_metadata_path (str | None): Path to Excel metadata file (traditional, backward compatible)
+            Note: This parameter will be deprecated in a future version. Consider using excel_metadata instead.
+        excel_metadata (ExcelMetadataFile | None): Base64 encoded Excel metadata (for web/API integration)
+            Note: Cannot use both excel_metadata_path and excel_metadata simultaneously
         max_file_size_mb (int): Maximum file size limit in megabytes
         creating_data_unit_batch_size (int): Batch size for data unit creation
         use_async_upload (bool): Whether to use asynchronous upload processing
-        extra_params (dict | None): Extra parameters for the action.
-            Example: {"include_metadata": True, "compression": "gzip"}
+        extra_params (dict | None): Extra parameters for the action
     Validation:
         - name: Must be non-blank after validation
         - storage: Must exist and be accessible via client API
         - data_collection: Must exist and be accessible via client API
         - project: Must exist if specified, or can be None
-        - excel_metadata_path: Must be valid Excel file if specified
-    Example:
-        >>> params = UploadParams(
-        ...     name="Data Upload",
-        ...     path="/data/files",
-        ...     storage=1,
-        ...     data_collection=5
-        ... )
+        - use_single_path mode: Validates required fields per mode
+    Examples:
+        Single Path Mode (Traditional):
+            >>> params = UploadParams(
+            ...     name="Standard Upload",
+            ...     use_single_path=True,
+            ...     path="/data/experiment_1",
+            ...     is_recursive=True,
+            ...     storage=1,
+            ...     data_collection=5
+            ... )
+        Multi-Path Mode (Advanced):
+            >>> params = UploadParams(
+            ...     name="Multi-Source Upload",
+            ...     use_single_path=False,
+            ...     assets={
+            ...         "image_1": AssetConfig(path="/sensors/camera", is_recursive=True),
+            ...         "pcd_1": AssetConfig(path="/sensors/lidar", is_recursive=False)
+            ...     },
+            ...     storage=1,
+            ...     data_collection=5
+            ... )
     """
     name: Annotated[str, AfterValidator(non_blank)]
     description: str | None = None
-    path: str
+    # Mode selector flag (True = single path mode, False = multi-path mode)
+    use_single_path: bool = True
+    # Single path mode fields (used when use_single_path=True)
+    path: str | None = None
+    is_recursive: bool = True
+    # Multi-path mode fields (used when use_single_path=False)
+    assets: dict[str, AssetConfig] | None = None
     storage: int
     data_collection: int
     project: int | None = None
+    # Excel metadata - two separate parameters for clarity:
+    # 1. excel_metadata_path: Simple file path string (backward compatible, traditional usage)
+    #    NOTE: Will be deprecated in a future version. Consider using excel_metadata instead.
+    # 2. excel_metadata: Dictionary with base64 encoded data (new, for web/API integration)
+    # TODO: Plan to deprecate excel_metadata_path in a few versions for backward compatibility
     excel_metadata_path: str | None = None
-    is_recursive: bool = True
+    excel_metadata: ExcelMetadataFile | None = None
     max_file_size_mb: int = 50
     creating_data_unit_batch_size: int = 1
     use_async_upload: bool = True
@@ -107,80 +194,33 @@ class UploadParams(BaseModel):
             raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
         return value
-    @field_validator('excel_metadata_path', mode='after')
-    @classmethod
-    def check_excel_metadata_path(cls, value, info: ValidationInfo) -> str | None:
-        if not value:
-            return value
-        # Validate file extension
-        if not value.lower().endswith(('.xlsx', '.xls')):
-            raise PydanticCustomError('invalid_file_type', 'Excel metadata file must be .xlsx or .xls format.')
-        # Get storage and path from validation data
-        if not (hasattr(info, 'data') and 'storage' in info.data and 'path' in info.data):
-            # If we don't have storage/path data yet, just validate extension
-            return value
-        if info.context is None:
-            raise PydanticCustomError('missing_context', 'Validation context is required.')
-        action = info.context['action']
-        client = action.client
-        try:
-            # Get storage configuration
-            storage_id = info.data['storage']
-            storage = client.get_storage(storage_id)
-            # Skip file system validation if storage doesn't have provider (likely test environment)
-            if not isinstance(storage, dict) or 'provider' not in storage:
-                # Basic validation only - likely in test environment
-                return value
-            # Get the actual file system path using storage + path
-            base_path = get_pathlib(storage, info.data['path'])
-            # Support both absolute and relative paths
-            if Path(value).is_absolute():
-                excel_path = Path(value)
-            else:
-                excel_path = base_path / value
-            if not excel_path.exists():
-                raise PydanticCustomError('file_not_found', 'Excel metadata file not found.')
-            # Validate file size
-            file_size = excel_path.stat().st_size
-            excel_config = ExcelSecurityConfig()
-            if file_size > excel_config.MAX_FILE_SIZE_BYTES:
-                max_size_mb = excel_config.MAX_FILE_SIZE_MB
+    @model_validator(mode='after')
+    def validate_path_configuration(self) -> 'UploadParams':
+        """Validate path configuration based on use_single_path mode."""
+        if self.use_single_path:
+            # Single path mode: requires path
+            if not self.path:
                 raise PydanticCustomError(
-                    'file_too_large',
-                    'Excel metadata file is too large. Maximum size is {max_size_mb}MB.',
-                    {'max_size_mb': max_size_mb},
+                    'missing_path', "When use_single_path=true (single path mode), 'path' is required"
                 )
+            # Warn if assets is provided in single path mode (it will be ignored)
+            # For now, we'll silently ignore it
+        else:
+            # Multi-path mode: requires assets
+            if not self.assets:
+                raise PydanticCustomError(
+                    'missing_assets',
+                    "When use_single_path=false (multi-path mode), 'assets' must be provided "
+                    'with path configurations for each file specification',
+                )
+            # path and is_recursive are ignored in multi-path mode
-            # Validate file format
-            try:
-                with open(excel_path, 'rb') as f:
-                    header = f.read(8)
-                    if not header:
-                        raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be empty.')
-                    if excel_path.suffix.lower() == '.xlsx':
-                        if not header.startswith(b'PK'):
-                            raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
-                    elif excel_path.suffix.lower() == '.xls':
-                        if not (header.startswith(b'\xd0\xcf\x11\xe0') or header.startswith(b'\x09\x08')):
-                            raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
-            except (OSError, IOError):
-                raise PydanticCustomError('file_access_error', 'Cannot access Excel metadata file.')
-        except ClientError:
-            raise PydanticCustomError('client_error', 'Error occurred while checking storage.')
-        except Exception as e:
-            raise PydanticCustomError('validation_error', f'Error validating Excel metadata file: {str(e)}')
+        # Validate excel metadata parameters - cannot use both at the same time
+        if self.excel_metadata_path and self.excel_metadata:
+            raise PydanticCustomError(
+                'conflicting_excel_metadata',
+                "Cannot specify both 'excel_metadata_path' and 'excel_metadata'. "
+                "Use 'excel_metadata_path' for file paths or 'excel_metadata' for base64 encoded data.",
+            )
-        return value
+        return self

synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py CHANGED Viewed

@@ -21,8 +21,8 @@ class CleanupStep(BaseStep):
     def execute(self, context: UploadContext) -> StepResult:
         """Execute cleanup step."""
         try:
-            # Cleanup temporary directory
-            self._cleanup_temp_directory(context)
+            # Cleanup temporary directory - commented out because duplicated process with ray cleanup process
+            # self._cleanup_temp_directory(context)
             # Log completion
             context.run.log_message_with_code(LogCode.IMPORT_COMPLETED)

synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py CHANGED Viewed

@@ -31,7 +31,9 @@ class GenerateDataUnitsStep(BaseStep):
             context.run.log_message_with_code(LogCode.GENERATING_DATA_UNITS)
             # Initialize metrics
-            context.update_metrics('data_units', {'stand_by': upload_result_count, 'success': 0, 'failed': 0})
+            initial_metrics = {'stand_by': upload_result_count, 'success': 0, 'failed': 0}
+            context.update_metrics('data_units', initial_metrics)
+            context.run.set_metrics(initial_metrics, category='data_units')
             # Get batch size from parameters
             batch_size = context.get_param('creating_data_unit_batch_size', 1)
@@ -49,7 +51,9 @@ class GenerateDataUnitsStep(BaseStep):
                 )
             # Update final metrics
-            context.update_metrics('data_units', {'stand_by': 0, 'success': len(generated_data_units), 'failed': 0})
+            final_metrics = {'stand_by': 0, 'success': len(generated_data_units), 'failed': 0}
+            context.update_metrics('data_units', final_metrics)
+            context.run.set_metrics(final_metrics, category='data_units')
             # Complete progress
             context.run.set_progress(upload_result_count, upload_result_count, category='generate_data_units')

synapse-sdk 2025.9.5__py3-none-any.whl → 2025.10.6__py3-none-any.whl

Potentially problematic release.

synapse-sdk 2025.9.5py3-none-any.whl → 2025.10.6py3-none-any.whl