PyPI - synapse-sdk - Versions diffs - 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl - Mend

synapse-sdk 1.0.0b5py3-none-any.whl → 2025.12.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py ADDED Viewed

@@ -0,0 +1,143 @@
+"""Validation strategies for ToTask action."""
+from typing import Any, Dict
+from ..enums import LogCode
+from .base import ToTaskContext, ValidationStrategy
+class ProjectValidationStrategy(ValidationStrategy):
+    """Strategy for validating project and data collection."""
+    def validate(self, context: ToTaskContext) -> Dict[str, Any]:
+        """Validate project and data collection exist and are accessible.
+        Args:
+            context: Shared context for the action execution
+        Returns:
+            Dict with 'success' boolean and optional 'error' message
+        """
+        try:
+            client = context.client
+            project_id = context.params['project']
+            # Validate project response
+            project_response = client.get_project(project_id)
+            if isinstance(project_response, str):
+                context.logger.log_message_with_code(LogCode.INVALID_PROJECT_RESPONSE)
+                return {'success': False, 'error': 'Invalid project response received'}
+            project: Dict[str, Any] = project_response
+            context.project = project
+            # Validate data collection exists
+            data_collection_id = project.get('data_collection')
+            if not data_collection_id:
+                context.logger.log_message_with_code(LogCode.NO_DATA_COLLECTION)
+                return {'success': False, 'error': 'Project does not have a data collection'}
+            # Validate data collection response
+            data_collection_response = client.get_data_collection(data_collection_id)
+            if isinstance(data_collection_response, str):
+                context.logger.log_message_with_code(LogCode.INVALID_DATA_COLLECTION_RESPONSE)
+                return {'success': False, 'error': 'Invalid data collection response received'}
+            data_collection: Dict[str, Any] = data_collection_response
+            context.data_collection = data_collection
+            return {'success': True}
+        except Exception as e:
+            error_msg = f'Project validation failed: {str(e)}'
+            context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
+            return {'success': False, 'error': error_msg}
+class TaskValidationStrategy(ValidationStrategy):
+    """Strategy for validating and discovering tasks."""
+    def validate(self, context: ToTaskContext) -> Dict[str, Any]:
+        """Discover and validate tasks for processing.
+        Args:
+            context: Shared context for the action execution
+        Returns:
+            Dict with 'success' boolean and optional 'error' message
+        """
+        try:
+            client = context.client
+            # Build task query parameters
+            task_ids_query_params = {
+                'project': context.params['project'],
+                'fields': 'id',
+            }
+            if context.params.get('task_filters'):
+                task_ids_query_params.update(context.params['task_filters'])
+            # Get tasks
+            task_ids_generator, task_ids_count = client.list_tasks(params=task_ids_query_params, list_all=True)
+            task_ids = [
+                int(item.get('id', 0)) for item in task_ids_generator if isinstance(item, dict) and item.get('id')
+            ]
+            # Validate tasks found
+            if not task_ids_count:
+                context.logger.log_message_with_code(LogCode.NO_TASKS_FOUND)
+                return {'success': False, 'error': 'No tasks found to annotate'}
+            context.task_ids = task_ids
+            return {'success': True, 'task_count': len(task_ids)}
+        except Exception as e:
+            error_msg = f'Task validation failed: {str(e)}'
+            context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
+            return {'success': False, 'error': error_msg}
+class TargetSpecificationValidationStrategy(ValidationStrategy):
+    """Strategy for validating target specification for file annotation."""
+    def validate(self, context: ToTaskContext) -> Dict[str, Any]:
+        """Validate target specification exists in file specifications.
+        Args:
+            context: Shared context for the action execution
+        Returns:
+            Dict with 'success' boolean and optional 'error' message
+        """
+        try:
+            # Only validate if using FILE annotation method
+            from ..enums import AnnotationMethod
+            if context.annotation_method != AnnotationMethod.FILE:
+                return {'success': True}
+            target_specification_name = context.params.get('target_specification_name')
+            if not target_specification_name:
+                context.logger.log_message_with_code(LogCode.TARGET_SPEC_REQUIRED)
+                return {'success': False, 'error': 'Target specification name is required for file annotation method'}
+            # Check if target specification exists in file specifications
+            if not context.data_collection:
+                return {'success': False, 'error': 'Data collection not available for validation'}
+            file_specifications = context.data_collection.get('file_specifications', [])
+            target_spec_exists = any(spec.get('name') == target_specification_name for spec in file_specifications)
+            if not target_spec_exists:
+                context.logger.log_message_with_code(LogCode.TARGET_SPEC_NOT_FOUND, target_specification_name)
+                return {
+                    'success': False,
+                    'error': f"Target specification '{target_specification_name}' not found in file specifications",
+                }
+            return {'success': True}
+        except Exception as e:
+            error_msg = f'Target specification validation failed: {str(e)}'
+            context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
+            return {'success': False, 'error': error_msg}

synapse_sdk/plugins/categories/upload/actions/upload/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+from .action import UploadAction
+from .enums import LOG_MESSAGES, LogCode, UploadStatus
+from .exceptions import ExcelParsingError, ExcelSecurityError
+from .models import UploadParams
+from .run import UploadRun
+from .utils import ExcelSecurityConfig, PathAwareJSONEncoder
+__all__ = [
+    'UploadAction',
+    'UploadRun',
+    'UploadParams',
+    'UploadStatus',
+    'LogCode',
+    'LOG_MESSAGES',
+    'ExcelSecurityError',
+    'ExcelParsingError',
+    'PathAwareJSONEncoder',
+    'ExcelSecurityConfig',
+]

synapse_sdk/plugins/categories/upload/actions/upload/action.py ADDED Viewed

@@ -0,0 +1,236 @@
+from typing import Any, Dict
+from synapse_sdk.plugins.categories.base import Action
+from synapse_sdk.plugins.categories.decorators import register_action
+from synapse_sdk.plugins.enums import PluginCategory, RunMethod
+from synapse_sdk.plugins.exceptions import ActionError
+from .context import UploadContext
+from .enums import LogCode
+from .factory import StrategyFactory
+from .models import UploadParams
+from .orchestrator import UploadOrchestrator
+from .registry import StepRegistry
+from .run import UploadRun
+from .steps.cleanup import CleanupStep
+from .steps.collection import AnalyzeCollectionStep
+from .steps.generate import GenerateDataUnitsStep
+from .steps.initialize import InitializeStep
+from .steps.metadata import ProcessMetadataStep
+from .steps.organize import OrganizeFilesStep
+from .steps.upload import UploadFilesStep
+from .steps.validate import ValidateFilesStep
+from .utils import ExcelSecurityConfig
+@register_action
+class UploadAction(Action):
+    """Upload action for processing and uploading files to storage.
+    This implementation uses Strategy and Facade patterns to provide a clean,
+    extensible architecture for upload operations. The monolithic legacy
+    implementation has been refactored into pluggable strategies and workflow steps.
+    Features:
+    - Strategy pattern for pluggable behaviors (validation, file discovery, etc.)
+    - Facade pattern with UploadOrchestrator for simplified workflow management
+    - Step-based workflow with automatic rollback on failures
+    - Comprehensive error handling and progress tracking
+    - Easy extensibility for new strategies and workflow steps
+    Class Attributes:
+        name (str): Action identifier ('upload')
+        category (PluginCategory): UPLOAD category
+        method (RunMethod): JOB execution method
+        run_class (type): UploadRun for specialized logging
+        params_model (type): UploadParams for parameter validation
+        progress_categories (dict): Progress tracking configuration
+        metrics_categories (dict): Metrics collection configuration
+    Example:
+        >>> action = UploadAction(
+        ...     params={
+        ...         'name': 'Data Upload',
+        ...         'path': '/data/files',
+        ...         'storage': 1,
+        ...         'data_collection': 5
+        ...     },
+        ...     plugin_config=config
+        ... )
+        >>> result = action.start()
+    """
+    name = 'upload'
+    category = PluginCategory.UPLOAD
+    method = RunMethod.JOB
+    run_class = UploadRun
+    params_model = UploadParams
+    progress_categories = {
+        'analyze_collection': {
+            'proportion': 2,
+        },
+        'upload_data_files': {
+            'proportion': 38,
+        },
+        'generate_data_units': {
+            'proportion': 60,
+        },
+    }
+    metrics_categories = {
+        'data_files': {
+            'stand_by': 0,
+            'failed': 0,
+            'success': 0,
+        },
+        'data_units': {
+            'stand_by': 0,
+            'failed': 0,
+            'success': 0,
+        },
+    }
+    def __init__(self, *args, **kwargs):
+        """Initialize the upload action."""
+        super().__init__(*args, **kwargs)
+        # Initialize Excel configuration from config.yaml
+        self.excel_config = ExcelSecurityConfig.from_action_config(self.config)
+        self.strategy_factory = StrategyFactory()
+        self.step_registry = StepRegistry()
+        self._configure_workflow()
+    def _configure_workflow(self) -> None:
+        """Configure workflow steps based on parameters.
+        Registers all workflow steps in the correct order. Steps can be
+        dynamically added, removed, or reordered for different use cases.
+        """
+        # Register steps in execution order
+        self.step_registry.register(InitializeStep())
+        self.step_registry.register(ProcessMetadataStep())
+        self.step_registry.register(AnalyzeCollectionStep())
+        self.step_registry.register(OrganizeFilesStep())
+        self.step_registry.register(ValidateFilesStep())
+        self.step_registry.register(UploadFilesStep())
+        self.step_registry.register(GenerateDataUnitsStep())
+        self.step_registry.register(CleanupStep())
+    def start(self) -> Dict[str, Any]:
+        """Execute upload workflow with uploader integration.
+        This method integrates the essential uploader mechanism with the new
+        strategy pattern architecture while maintaining backward compatibility.
+        Returns:
+            Dict[str, Any]: Upload result with file counts, success status, and metrics
+        Raises:
+            ActionError: If upload workflow fails
+        """
+        try:
+            # Ensure params is not None
+            params = self.params or {}
+            # Create upload context for sharing state between steps
+            context = UploadContext(params, self.run, self.client, action=self)
+            # Configure strategies based on parameters with context
+            strategies = self._configure_strategies(context)
+            # Create orchestrator but run it with uploader integration
+            orchestrator = UploadOrchestrator(context, self.step_registry, strategies)
+            # Execute the workflow steps, but intercept after organize step
+            result = self._execute_with_uploader_integration(orchestrator, context)
+            return result
+        except Exception as e:
+            # Log the error and re-raise as ActionError
+            if self.run:
+                self.run.log_message_with_code(LogCode.UPLOAD_WORKFLOW_FAILED, str(e))
+            raise ActionError(f'Upload failed: {str(e)}')
+        finally:
+            # Always emit completion log so backend can record end time even on failures
+            if self.run:
+                self.run.end_log()
+    def _execute_with_uploader_integration(self, orchestrator, context) -> Dict[str, Any]:
+        """Execute workflow with proper uploader integration."""
+        # Inject strategies into context before executing steps
+        orchestrator._inject_strategies_into_context()
+        # Run initial steps up to file organization
+        steps = orchestrator.step_registry.get_steps()
+        # Execute steps one by one until we reach the organization step
+        for i, step in enumerate(steps):
+            if step.name in ['initialize', 'process_metadata', 'analyze_collection', 'organize_files']:
+                try:
+                    result = step.safe_execute(context)
+                    context.update(result)
+                    if not result.success:
+                        raise Exception(f"Step '{step.name}' failed: {result.error}")
+                except Exception as e:
+                    raise ActionError(f"Failed at step '{step.name}': {str(e)}")
+        # Execute remaining steps
+        for step in steps:
+            if step.name in ['validate_files', 'upload_files', 'generate_data_units', 'cleanup']:
+                try:
+                    result = step.safe_execute(context)
+                    context.update(result)
+                    if not result.success:
+                        raise Exception(f"Step '{step.name}' failed: {result.error}")
+                except Exception as e:
+                    raise ActionError(f"Failed at step '{step.name}': {str(e)}")
+        # Return the final result from context
+        return context.get_result()
+    def _configure_strategies(self, context=None) -> Dict[str, Any]:
+        """Configure strategies based on parameters.
+        Uses the Strategy pattern to create appropriate strategy implementations
+        based on the action parameters. This allows for runtime selection of
+        different behaviors (recursive vs flat discovery, batch vs single data unit creation, etc.).
+        Args:
+            context: UploadContext for strategies that need access to client/run
+        Returns:
+            Dict[str, Any]: Dictionary of strategy instances keyed by type
+        """
+        # Ensure params is not None
+        params = self.params or {}
+        return {
+            'validation': self.strategy_factory.create_validation_strategy(params, context),
+            'file_discovery': self.strategy_factory.create_file_discovery_strategy(params, context),
+            'metadata': self.strategy_factory.create_metadata_strategy(params, context),
+            'upload': self.strategy_factory.create_upload_strategy(params, context),
+            'data_unit': self.strategy_factory.create_data_unit_strategy(params, context),
+        }
+    def get_uploader(self, path, file_specification, organized_files, params: Dict = {}):
+        """Get uploader from entrypoint (compatibility method).
+        This method is kept for backward compatibility with existing code
+        that may still call it directly.
+        """
+        return self.entrypoint(
+            self.run, path, file_specification, organized_files, extra_params=params.get('extra_params')
+        )
+    def get_workflow_summary(self) -> Dict[str, Any]:
+        """Get summary of configured workflow.
+        Returns:
+            Dict[str, Any]: Summary of steps and strategies
+        """
+        return {
+            'steps': [step.name for step in self.step_registry.get_steps()],
+            'step_count': len(self.step_registry),
+            'total_progress_weight': self.step_registry.get_total_progress_weight(),
+            'available_strategies': self.strategy_factory.get_available_strategies(),
+        }

synapse_sdk/plugins/categories/upload/actions/upload/context.py ADDED Viewed

@@ -0,0 +1,185 @@
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from .run import UploadRun
+class StepResult:
+    """Result of a workflow step execution."""
+    def __init__(
+        self,
+        success: bool = True,
+        data: Dict[str, Any] = None,
+        error: str = None,
+        rollback_data: Dict[str, Any] = None,
+        skipped: bool = False,
+        original_exception: Optional[Exception] = None,
+    ):
+        self.success = success
+        self.data = data or {}
+        self.error = error
+        self.rollback_data = rollback_data or {}
+        self.skipped = skipped
+        self.original_exception = original_exception
+        self.timestamp = datetime.now()
+    def __bool__(self):
+        return self.success
+class UploadContext:
+    """Shared context for all upload workflow steps."""
+    def __init__(self, params: Dict, run: UploadRun, client: Any, action: Any = None):
+        self.params = params
+        self.run = run
+        self.client = client
+        self._action = action  # Reference to parent action for uploader access
+        # Core state
+        self.storage = None
+        self.pathlib_cwd = None
+        self.metadata: Dict[str, Dict[str, Any]] = {}
+        self.file_specifications: Dict[str, Any] = {}
+        self.organized_files: List[Dict[str, Any]] = []
+        self.uploaded_files: List[Dict[str, Any]] = []
+        self.data_units: List[Dict[str, Any]] = []
+        # Progress and metrics
+        self.metrics: Dict[str, Any] = {}
+        self.errors: List[str] = []
+        self.step_results: List[StepResult] = []
+        # Strategies (injected by orchestrator)
+        self.strategies: Dict[str, Any] = {}
+        # Rollback information
+        self.rollback_data: Dict[str, Any] = {}
+    def update(self, result: StepResult) -> None:
+        """Update context with step results."""
+        self.step_results.append(result)
+        if result.success:
+            # Update context state with step data
+            for key, value in result.data.items():
+                if hasattr(self, key):
+                    setattr(self, key, value)
+                else:
+                    # Store in a general data dictionary
+                    if not hasattr(self, 'step_data'):
+                        self.step_data = {}
+                    self.step_data[key] = value
+            # Store rollback data
+            if result.rollback_data:
+                self.rollback_data.update(result.rollback_data)
+        else:
+            # Record error
+            if result.error:
+                self.errors.append(result.error)
+    def get_result(self) -> Dict[str, Any]:
+        """Get final result dictionary."""
+        return {
+            'uploaded_files_count': len(self.uploaded_files),
+            'generated_data_units_count': len(self.data_units),
+            'success': len(self.errors) == 0,
+            'errors': self.errors,
+        }
+    def has_errors(self) -> bool:
+        """Check if context has any errors."""
+        return len(self.errors) > 0
+    def get_last_step_result(self) -> Optional[StepResult]:
+        """Get the result of the last executed step."""
+        return self.step_results[-1] if self.step_results else None
+    def get_step_result_by_name(self, step_name: str) -> Optional[StepResult]:
+        """Get step result by step name (stored in rollback_data)."""
+        for result in self.step_results:
+            if result.rollback_data.get('step_name') == step_name:
+                return result
+        return None
+    def clear_errors(self) -> None:
+        """Clear all errors (useful for retry scenarios)."""
+        self.errors.clear()
+    def add_error(self, error: str) -> None:
+        """Add an error to the context."""
+        self.errors.append(error)
+    def get_param(self, key: str, default: Any = None) -> Any:
+        """Get parameter value with default."""
+        return self.params.get(key, default)
+    def set_storage(self, storage: Any) -> None:
+        """Set storage object."""
+        self.storage = storage
+    def set_pathlib_cwd(self, path: Path) -> None:
+        """Set current working directory path."""
+        self.pathlib_cwd = path
+    def set_file_specifications(self, specs: Dict[str, Any]) -> None:
+        """Set file specifications."""
+        self.file_specifications = specs
+    def add_organized_files(self, files: List[Dict[str, Any]]) -> None:
+        """Add organized files to context."""
+        self.organized_files.extend(files)
+    def add_uploaded_files(self, files: List[Dict[str, Any]]) -> None:
+        """Add uploaded files to context."""
+        self.uploaded_files.extend(files)
+    def add_data_units(self, units: List[Dict[str, Any]]) -> None:
+        """Add data units to context."""
+        self.data_units.extend(units)
+    def update_metrics(self, category: str, metrics: Dict[str, Any]) -> None:
+        """Update metrics for a specific category."""
+        if category not in self.metrics:
+            self.metrics[category] = {}
+        self.metrics[category].update(metrics)
+    def get(self, key: str, default: Any = None) -> Any:
+        """Get value from context by key."""
+        # First check direct attributes
+        if hasattr(self, key):
+            return getattr(self, key)
+        # Then check step_data if it exists
+        if hasattr(self, 'step_data') and key in self.step_data:
+            return self.step_data[key]
+        # Special mappings for expected keys
+        if key == 'file_specification_template':
+            return self.file_specifications
+        elif key == 'pathlib_cwd':
+            return self.pathlib_cwd
+        elif key == 'organized_files':
+            return self.organized_files
+        return default
+    def set(self, key: str, value: Any) -> None:
+        """Set value in context by key."""
+        # Special mappings for expected keys
+        if key == 'file_specification_template':
+            self.file_specifications = value
+        elif key == 'pathlib_cwd':
+            self.pathlib_cwd = value
+        elif key == 'organized_files':
+            self.organized_files = value
+        elif hasattr(self, key):
+            setattr(self, key, value)
+        else:
+            # Store in step_data
+            if not hasattr(self, 'step_data'):
+                self.step_data = {}
+            self.step_data[key] = value

synapse-sdk 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl

synapse-sdk 1.0.0b5py3-none-any.whl → 2025.12.3py3-none-any.whl