synapse-sdk 2025.9.1__py3-none-any.whl → 2025.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/devtools/docs/docs/api/clients/annotation-mixin.md +378 -0
- synapse_sdk/devtools/docs/docs/api/clients/backend.md +368 -1
- synapse_sdk/devtools/docs/docs/api/clients/core-mixin.md +477 -0
- synapse_sdk/devtools/docs/docs/api/clients/data-collection-mixin.md +422 -0
- synapse_sdk/devtools/docs/docs/api/clients/hitl-mixin.md +554 -0
- synapse_sdk/devtools/docs/docs/api/clients/index.md +391 -0
- synapse_sdk/devtools/docs/docs/api/clients/integration-mixin.md +571 -0
- synapse_sdk/devtools/docs/docs/api/clients/ml-mixin.md +578 -0
- synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +1463 -0
- synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +161 -34
- synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +1497 -213
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/annotation-mixin.md +289 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/backend.md +378 -11
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/core-mixin.md +417 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/data-collection-mixin.md +356 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/hitl-mixin.md +192 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/index.md +391 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/integration-mixin.md +479 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ml-mixin.md +284 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +1463 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +161 -34
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +1752 -572
- synapse_sdk/devtools/docs/sidebars.ts +7 -0
- synapse_sdk/plugins/README.md +1 -2
- synapse_sdk/plugins/categories/base.py +7 -0
- synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
- synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
- synapse_sdk/plugins/categories/export/actions/export/action.py +160 -0
- synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
- synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
- synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
- synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
- synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
- synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +1 -1
- synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +1 -2
- synapse_sdk/plugins/categories/upload/actions/upload/action.py +154 -531
- synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
- synapse_sdk/plugins/categories/upload/actions/upload/factory.py +143 -0
- synapse_sdk/plugins/categories/upload/actions/upload/models.py +66 -29
- synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +182 -0
- synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +106 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +62 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +80 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +66 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +101 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +89 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +96 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +61 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +86 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +34 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +233 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +253 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/async_upload.py +109 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +43 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +45 -0
- synapse_sdk/plugins/categories/upload/actions/upload/utils.py +194 -83
- synapse_sdk/plugins/categories/upload/templates/config.yaml +4 -0
- synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +269 -0
- synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +71 -27
- synapse_sdk/plugins/models.py +7 -0
- synapse_sdk/shared/__init__.py +21 -0
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/METADATA +2 -1
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/RECORD +79 -28
- synapse_sdk/plugins/categories/export/actions/export.py +0 -385
- synapse_sdk/plugins/categories/export/enums.py +0 -7
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/WHEEL +0 -0
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
from typing import Dict, List, Optional
|
|
2
|
+
|
|
3
|
+
from .steps.base import BaseStep
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class StepRegistry:
|
|
7
|
+
"""Registry for managing workflow steps."""
|
|
8
|
+
|
|
9
|
+
def __init__(self):
|
|
10
|
+
self._steps: List[BaseStep] = []
|
|
11
|
+
self._step_by_name: Dict[str, BaseStep] = {}
|
|
12
|
+
|
|
13
|
+
def register(self, step: BaseStep) -> None:
|
|
14
|
+
"""Register a step in the workflow."""
|
|
15
|
+
if step.name in self._step_by_name:
|
|
16
|
+
raise ValueError(f"Step with name '{step.name}' already registered")
|
|
17
|
+
|
|
18
|
+
self._steps.append(step)
|
|
19
|
+
self._step_by_name[step.name] = step
|
|
20
|
+
|
|
21
|
+
def unregister(self, step_name: str) -> bool:
|
|
22
|
+
"""Unregister a step by name. Returns True if step was found and removed."""
|
|
23
|
+
if step_name not in self._step_by_name:
|
|
24
|
+
return False
|
|
25
|
+
|
|
26
|
+
step = self._step_by_name[step_name]
|
|
27
|
+
self._steps.remove(step)
|
|
28
|
+
del self._step_by_name[step_name]
|
|
29
|
+
return True
|
|
30
|
+
|
|
31
|
+
def get_steps(self) -> List[BaseStep]:
|
|
32
|
+
"""Get all registered steps in order."""
|
|
33
|
+
return self._steps.copy()
|
|
34
|
+
|
|
35
|
+
def get_step(self, name: str) -> Optional[BaseStep]:
|
|
36
|
+
"""Get a step by name."""
|
|
37
|
+
return self._step_by_name.get(name)
|
|
38
|
+
|
|
39
|
+
def has_step(self, name: str) -> bool:
|
|
40
|
+
"""Check if a step is registered."""
|
|
41
|
+
return name in self._step_by_name
|
|
42
|
+
|
|
43
|
+
def clear(self) -> None:
|
|
44
|
+
"""Clear all registered steps."""
|
|
45
|
+
self._steps.clear()
|
|
46
|
+
self._step_by_name.clear()
|
|
47
|
+
|
|
48
|
+
def get_step_names(self) -> List[str]:
|
|
49
|
+
"""Get list of all registered step names."""
|
|
50
|
+
return list(self._step_by_name.keys())
|
|
51
|
+
|
|
52
|
+
def get_total_progress_weight(self) -> float:
|
|
53
|
+
"""Get total progress weight of all steps."""
|
|
54
|
+
return sum(step.progress_weight for step in self._steps)
|
|
55
|
+
|
|
56
|
+
def insert_step_after(self, after_step_name: str, new_step: BaseStep) -> None:
|
|
57
|
+
"""Insert a step after an existing step."""
|
|
58
|
+
if after_step_name not in self._step_by_name:
|
|
59
|
+
raise ValueError(f"Step '{after_step_name}' not found")
|
|
60
|
+
|
|
61
|
+
if new_step.name in self._step_by_name:
|
|
62
|
+
raise ValueError(f"Step with name '{new_step.name}' already registered")
|
|
63
|
+
|
|
64
|
+
# Find the index of the step to insert after
|
|
65
|
+
after_step = self._step_by_name[after_step_name]
|
|
66
|
+
index = self._steps.index(after_step) + 1
|
|
67
|
+
|
|
68
|
+
# Insert the new step
|
|
69
|
+
self._steps.insert(index, new_step)
|
|
70
|
+
self._step_by_name[new_step.name] = new_step
|
|
71
|
+
|
|
72
|
+
def insert_step_before(self, before_step_name: str, new_step: BaseStep) -> None:
|
|
73
|
+
"""Insert a step before an existing step."""
|
|
74
|
+
if before_step_name not in self._step_by_name:
|
|
75
|
+
raise ValueError(f"Step '{before_step_name}' not found")
|
|
76
|
+
|
|
77
|
+
if new_step.name in self._step_by_name:
|
|
78
|
+
raise ValueError(f"Step with name '{new_step.name}' already registered")
|
|
79
|
+
|
|
80
|
+
# Find the index of the step to insert before
|
|
81
|
+
before_step = self._step_by_name[before_step_name]
|
|
82
|
+
index = self._steps.index(before_step)
|
|
83
|
+
|
|
84
|
+
# Insert the new step
|
|
85
|
+
self._steps.insert(index, new_step)
|
|
86
|
+
self._step_by_name[new_step.name] = new_step
|
|
87
|
+
|
|
88
|
+
def reorder_steps(self, step_names: List[str]) -> None:
|
|
89
|
+
"""Reorder steps according to the provided list of step names."""
|
|
90
|
+
if set(step_names) != set(self._step_by_name.keys()):
|
|
91
|
+
raise ValueError('Step names list must contain all registered steps')
|
|
92
|
+
|
|
93
|
+
# Reorder the steps list
|
|
94
|
+
self._steps = [self._step_by_name[name] for name in step_names]
|
|
95
|
+
|
|
96
|
+
def __len__(self) -> int:
|
|
97
|
+
"""Return number of registered steps."""
|
|
98
|
+
return len(self._steps)
|
|
99
|
+
|
|
100
|
+
def __iter__(self):
|
|
101
|
+
"""Iterate over registered steps."""
|
|
102
|
+
return iter(self._steps)
|
|
103
|
+
|
|
104
|
+
def __contains__(self, step_name: str) -> bool:
|
|
105
|
+
"""Check if step name is registered."""
|
|
106
|
+
return step_name in self._step_by_name
|
|
107
|
+
|
|
108
|
+
def __str__(self) -> str:
|
|
109
|
+
step_names = [step.name for step in self._steps]
|
|
110
|
+
return f'StepRegistry({step_names})'
|
|
111
|
+
|
|
112
|
+
def __repr__(self) -> str:
|
|
113
|
+
return f'StepRegistry(steps={len(self._steps)})'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Step-based workflow implementations for upload actions
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Any, Dict, Optional
|
|
3
|
+
|
|
4
|
+
from ..context import StepResult, UploadContext
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BaseStep(ABC):
|
|
8
|
+
"""Abstract base class for all workflow steps."""
|
|
9
|
+
|
|
10
|
+
@property
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def name(self) -> str:
|
|
13
|
+
"""Step name for logging and tracking."""
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def progress_weight(self) -> float:
|
|
19
|
+
"""Relative weight for progress calculation (0.0 to 1.0)."""
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def execute(self, context: UploadContext) -> StepResult:
|
|
24
|
+
"""Execute the step logic."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def can_skip(self, context: UploadContext) -> bool:
|
|
29
|
+
"""Determine if this step can be skipped based on context."""
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
@abstractmethod
|
|
33
|
+
def rollback(self, context: UploadContext) -> None:
|
|
34
|
+
"""Rollback changes made by this step."""
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
def validate_prerequisites(self, context: UploadContext) -> None:
|
|
38
|
+
"""Validate step prerequisites. Raises exception if not met."""
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
def log_step_start(self, context: UploadContext) -> None:
|
|
42
|
+
"""Log step start."""
|
|
43
|
+
context.run.log_message(f'Starting step: {self.name}')
|
|
44
|
+
|
|
45
|
+
def log_step_complete(self, context: UploadContext) -> None:
|
|
46
|
+
"""Log step completion."""
|
|
47
|
+
context.run.log_message(f'Completed step: {self.name}')
|
|
48
|
+
|
|
49
|
+
def log_step_skipped(self, context: UploadContext) -> None:
|
|
50
|
+
"""Log step skipped."""
|
|
51
|
+
context.run.log_message(f'Skipped step: {self.name}')
|
|
52
|
+
|
|
53
|
+
def log_step_error(self, context: UploadContext, error: str) -> None:
|
|
54
|
+
"""Log step error."""
|
|
55
|
+
context.run.log_message(f'Error in step {self.name}: {error}')
|
|
56
|
+
|
|
57
|
+
def create_success_result(
|
|
58
|
+
self,
|
|
59
|
+
data: Optional[Dict[str, Any]] = None,
|
|
60
|
+
rollback_data: Optional[Dict[str, Any]] = None,
|
|
61
|
+
skipped: bool = False,
|
|
62
|
+
) -> StepResult:
|
|
63
|
+
"""Create a successful step result."""
|
|
64
|
+
rollback_data = rollback_data or {}
|
|
65
|
+
rollback_data['step_name'] = self.name
|
|
66
|
+
return StepResult(success=True, data=data or {}, rollback_data=rollback_data, skipped=skipped)
|
|
67
|
+
|
|
68
|
+
def create_error_result(
|
|
69
|
+
self, error: str, rollback_data: Optional[Dict[str, Any]] = None, original_exception: Optional[Exception] = None
|
|
70
|
+
) -> StepResult:
|
|
71
|
+
"""Create an error step result."""
|
|
72
|
+
rollback_data = rollback_data or {}
|
|
73
|
+
rollback_data['step_name'] = self.name
|
|
74
|
+
return StepResult(
|
|
75
|
+
success=False, error=error, rollback_data=rollback_data, original_exception=original_exception
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def safe_execute(self, context: UploadContext) -> StepResult:
|
|
79
|
+
"""Execute step with error handling and logging."""
|
|
80
|
+
try:
|
|
81
|
+
self.validate_prerequisites(context)
|
|
82
|
+
|
|
83
|
+
if self.can_skip(context):
|
|
84
|
+
self.log_step_skipped(context)
|
|
85
|
+
return self.create_success_result(skipped=True)
|
|
86
|
+
|
|
87
|
+
self.log_step_start(context)
|
|
88
|
+
result = self.execute(context)
|
|
89
|
+
|
|
90
|
+
if result.success:
|
|
91
|
+
self.log_step_complete(context)
|
|
92
|
+
else:
|
|
93
|
+
self.log_step_error(context, result.error or 'Unknown error')
|
|
94
|
+
|
|
95
|
+
return result
|
|
96
|
+
|
|
97
|
+
except Exception as e:
|
|
98
|
+
error_msg = f'Exception in step {self.name}: {str(e)}'
|
|
99
|
+
self.log_step_error(context, error_msg)
|
|
100
|
+
return self.create_error_result(error_msg, original_exception=e)
|
|
101
|
+
|
|
102
|
+
def __str__(self):
|
|
103
|
+
return f'{self.__class__.__name__}(name={self.name})'
|
|
104
|
+
|
|
105
|
+
def __repr__(self):
|
|
106
|
+
return f"{self.__class__.__name__}(name='{self.name}', weight={self.progress_weight})"
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from ..context import StepResult, UploadContext
|
|
6
|
+
from ..enums import LogCode
|
|
7
|
+
from .base import BaseStep
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CleanupStep(BaseStep):
|
|
11
|
+
"""Cleanup temporary resources and finalize workflow."""
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def name(self) -> str:
|
|
15
|
+
return 'cleanup'
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def progress_weight(self) -> float:
|
|
19
|
+
return 0.05
|
|
20
|
+
|
|
21
|
+
def execute(self, context: UploadContext) -> StepResult:
|
|
22
|
+
"""Execute cleanup step."""
|
|
23
|
+
try:
|
|
24
|
+
# Cleanup temporary directory
|
|
25
|
+
self._cleanup_temp_directory(context)
|
|
26
|
+
|
|
27
|
+
# Log completion
|
|
28
|
+
context.run.log_message_with_code(LogCode.IMPORT_COMPLETED)
|
|
29
|
+
|
|
30
|
+
return self.create_success_result(data={'cleanup_completed': True}, rollback_data={'temp_cleaned': True})
|
|
31
|
+
|
|
32
|
+
except Exception as e:
|
|
33
|
+
# Cleanup failures shouldn't stop the workflow
|
|
34
|
+
context.run.log_message(f'Cleanup warning: {str(e)}')
|
|
35
|
+
return self.create_success_result(
|
|
36
|
+
data={'cleanup_completed': False}, rollback_data={'cleanup_error': str(e)}
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def can_skip(self, context: UploadContext) -> bool:
|
|
40
|
+
"""Cleanup step can be skipped if disabled."""
|
|
41
|
+
return context.get_param('skip_cleanup', False)
|
|
42
|
+
|
|
43
|
+
def rollback(self, context: UploadContext) -> None:
|
|
44
|
+
"""Rollback cleanup (nothing to rollback for cleanup)."""
|
|
45
|
+
context.run.log_message('Cleanup step rollback - no action needed')
|
|
46
|
+
|
|
47
|
+
def _cleanup_temp_directory(self, context: UploadContext, temp_path: Path = None) -> None:
|
|
48
|
+
"""Clean up temporary directory."""
|
|
49
|
+
if temp_path is None:
|
|
50
|
+
try:
|
|
51
|
+
temp_path = Path(os.getcwd()) / 'temp'
|
|
52
|
+
except (FileNotFoundError, OSError):
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
if not temp_path.exists():
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
shutil.rmtree(temp_path, ignore_errors=True)
|
|
60
|
+
context.run.log_message(f'Cleaned up temporary directory: {temp_path}')
|
|
61
|
+
except Exception as e:
|
|
62
|
+
context.run.log_message(f'Failed to cleanup temporary directory: {str(e)}')
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from synapse_sdk.plugins.exceptions import ActionError
|
|
2
|
+
|
|
3
|
+
from ..context import StepResult, UploadContext
|
|
4
|
+
from .base import BaseStep
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AnalyzeCollectionStep(BaseStep):
|
|
8
|
+
"""Analyze data collection to get file specifications."""
|
|
9
|
+
|
|
10
|
+
@property
|
|
11
|
+
def name(self) -> str:
|
|
12
|
+
return 'analyze_collection'
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def progress_weight(self) -> float:
|
|
16
|
+
return 0.05
|
|
17
|
+
|
|
18
|
+
def execute(self, context: UploadContext) -> StepResult:
|
|
19
|
+
"""Execute collection analysis step."""
|
|
20
|
+
collection_id = context.get_param('data_collection')
|
|
21
|
+
if collection_id is None:
|
|
22
|
+
return self.create_error_result('Data collection parameter is required')
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
# Set initial progress
|
|
26
|
+
context.run.set_progress(0, 2, category='analyze_collection')
|
|
27
|
+
|
|
28
|
+
# Get collection from client
|
|
29
|
+
collection = context.client.get_data_collection(collection_id)
|
|
30
|
+
context.run.set_progress(1, 2, category='analyze_collection')
|
|
31
|
+
|
|
32
|
+
# Extract file specifications
|
|
33
|
+
file_specifications = collection.get('file_specifications', [])
|
|
34
|
+
context.set_file_specifications(file_specifications)
|
|
35
|
+
|
|
36
|
+
# Complete progress
|
|
37
|
+
context.run.set_progress(2, 2, category='analyze_collection')
|
|
38
|
+
|
|
39
|
+
return self.create_success_result(
|
|
40
|
+
data={'file_specifications': file_specifications}, rollback_data={'collection_id': collection_id}
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
except Exception as e:
|
|
44
|
+
return self.create_error_result(f'Failed to analyze collection {collection_id}: {str(e)}')
|
|
45
|
+
|
|
46
|
+
def can_skip(self, context: UploadContext) -> bool:
|
|
47
|
+
"""Collection analysis cannot be skipped."""
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
def rollback(self, context: UploadContext) -> None:
|
|
51
|
+
"""Rollback collection analysis."""
|
|
52
|
+
# Clear file specifications
|
|
53
|
+
context.file_specifications.clear()
|
|
54
|
+
context.run.log_message('Rolled back collection analysis')
|
|
55
|
+
|
|
56
|
+
def validate_prerequisites(self, context: UploadContext) -> None:
|
|
57
|
+
"""Validate prerequisites for collection analysis."""
|
|
58
|
+
if context.client is None:
|
|
59
|
+
raise ActionError('Client is required for collection analysis')
|
|
60
|
+
|
|
61
|
+
if context.get_param('data_collection') is None:
|
|
62
|
+
raise ActionError('Data collection parameter is required')
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from ..context import StepResult, UploadContext
|
|
2
|
+
from ..enums import LogCode, UploadStatus
|
|
3
|
+
from .base import BaseStep
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class GenerateDataUnitsStep(BaseStep):
|
|
7
|
+
"""Generate data units from uploaded files."""
|
|
8
|
+
|
|
9
|
+
@property
|
|
10
|
+
def name(self) -> str:
|
|
11
|
+
return 'generate_data_units'
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def progress_weight(self) -> float:
|
|
15
|
+
return 0.20
|
|
16
|
+
|
|
17
|
+
def execute(self, context: UploadContext) -> StepResult:
|
|
18
|
+
"""Execute data unit generation step."""
|
|
19
|
+
data_unit_strategy = context.strategies.get('data_unit')
|
|
20
|
+
if not data_unit_strategy:
|
|
21
|
+
return self.create_error_result('Data unit strategy not found')
|
|
22
|
+
|
|
23
|
+
if not context.uploaded_files:
|
|
24
|
+
context.run.log_message_with_code(LogCode.NO_DATA_UNITS_GENERATED)
|
|
25
|
+
return self.create_error_result('No uploaded files to generate data units from')
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
# Setup progress tracking
|
|
29
|
+
upload_result_count = len(context.uploaded_files)
|
|
30
|
+
context.run.set_progress(0, upload_result_count, category='generate_data_units')
|
|
31
|
+
context.run.log_message_with_code(LogCode.GENERATING_DATA_UNITS)
|
|
32
|
+
|
|
33
|
+
# Initialize metrics
|
|
34
|
+
context.update_metrics('data_units', {'stand_by': upload_result_count, 'success': 0, 'failed': 0})
|
|
35
|
+
|
|
36
|
+
# Get batch size from parameters
|
|
37
|
+
batch_size = context.get_param('creating_data_unit_batch_size', 1)
|
|
38
|
+
|
|
39
|
+
# Generate data units using strategy
|
|
40
|
+
generated_data_units = data_unit_strategy.generate(context.uploaded_files, batch_size)
|
|
41
|
+
|
|
42
|
+
# Update context
|
|
43
|
+
context.add_data_units(generated_data_units)
|
|
44
|
+
|
|
45
|
+
# Log data unit results
|
|
46
|
+
for data_unit in generated_data_units:
|
|
47
|
+
context.run.log_data_unit(
|
|
48
|
+
data_unit.get('id'), UploadStatus.SUCCESS, data_unit_meta=data_unit.get('meta')
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Update final metrics
|
|
52
|
+
context.update_metrics('data_units', {'stand_by': 0, 'success': len(generated_data_units), 'failed': 0})
|
|
53
|
+
|
|
54
|
+
# Complete progress
|
|
55
|
+
context.run.set_progress(upload_result_count, upload_result_count, category='generate_data_units')
|
|
56
|
+
|
|
57
|
+
return self.create_success_result(
|
|
58
|
+
data={'generated_data_units': generated_data_units},
|
|
59
|
+
rollback_data={'data_units_count': len(generated_data_units), 'batch_size': batch_size},
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
except Exception as e:
|
|
63
|
+
context.run.log_message_with_code(LogCode.DATA_UNIT_BATCH_FAILED, str(e))
|
|
64
|
+
return self.create_error_result(f'Data unit generation failed: {str(e)}')
|
|
65
|
+
|
|
66
|
+
def can_skip(self, context: UploadContext) -> bool:
|
|
67
|
+
"""Data unit generation cannot be skipped."""
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
def rollback(self, context: UploadContext) -> None:
|
|
71
|
+
"""Rollback data unit generation."""
|
|
72
|
+
# In a real implementation, this would delete generated data units
|
|
73
|
+
# For now, just clear the data units list and log
|
|
74
|
+
context.data_units.clear()
|
|
75
|
+
context.run.log_message('Rolled back data unit generation')
|
|
76
|
+
|
|
77
|
+
def validate_prerequisites(self, context: UploadContext) -> None:
|
|
78
|
+
"""Validate prerequisites for data unit generation."""
|
|
79
|
+
if not context.uploaded_files:
|
|
80
|
+
raise ValueError('No uploaded files available for data unit generation')
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from synapse_sdk.plugins.exceptions import ActionError
|
|
2
|
+
from synapse_sdk.utils.storage import get_pathlib
|
|
3
|
+
|
|
4
|
+
from ..context import StepResult, UploadContext
|
|
5
|
+
from .base import BaseStep
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class InitializeStep(BaseStep):
|
|
9
|
+
"""Initialize upload workflow by setting up storage and paths."""
|
|
10
|
+
|
|
11
|
+
@property
|
|
12
|
+
def name(self) -> str:
|
|
13
|
+
return 'initialize'
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def progress_weight(self) -> float:
|
|
17
|
+
return 0.05
|
|
18
|
+
|
|
19
|
+
def execute(self, context: UploadContext) -> StepResult:
|
|
20
|
+
"""Execute initialization step."""
|
|
21
|
+
# Get and validate storage
|
|
22
|
+
storage_id = context.get_param('storage')
|
|
23
|
+
if storage_id is None:
|
|
24
|
+
return self.create_error_result('Storage parameter is required')
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
storage = context.client.get_storage(storage_id)
|
|
28
|
+
context.set_storage(storage)
|
|
29
|
+
except Exception as e:
|
|
30
|
+
return self.create_error_result(f'Failed to get storage {storage_id}: {str(e)}')
|
|
31
|
+
|
|
32
|
+
# Get and validate path
|
|
33
|
+
path = context.get_param('path')
|
|
34
|
+
if path is None:
|
|
35
|
+
return self.create_error_result('Path parameter is required')
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
pathlib_cwd = get_pathlib(storage, path)
|
|
39
|
+
context.set_pathlib_cwd(pathlib_cwd)
|
|
40
|
+
except Exception as e:
|
|
41
|
+
return self.create_error_result(f'Failed to get path {path}: {str(e)}')
|
|
42
|
+
|
|
43
|
+
# Return success with rollback data
|
|
44
|
+
rollback_data = {'storage_id': storage_id, 'path': path}
|
|
45
|
+
|
|
46
|
+
return self.create_success_result(
|
|
47
|
+
data={'storage': storage, 'pathlib_cwd': pathlib_cwd}, rollback_data=rollback_data
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def can_skip(self, context: UploadContext) -> bool:
|
|
51
|
+
"""Initialize step cannot be skipped."""
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
def rollback(self, context: UploadContext) -> None:
|
|
55
|
+
"""Rollback initialization (cleanup if needed)."""
|
|
56
|
+
# For initialization, there's typically nothing to rollback
|
|
57
|
+
# But we could log the rollback action
|
|
58
|
+
context.run.log_message('Rolling back initialization step')
|
|
59
|
+
|
|
60
|
+
def validate_prerequisites(self, context: UploadContext) -> None:
|
|
61
|
+
"""Validate prerequisites for initialization."""
|
|
62
|
+
if context.client is None:
|
|
63
|
+
raise ActionError('Client is required for initialization')
|
|
64
|
+
|
|
65
|
+
if context.run is None:
|
|
66
|
+
raise ActionError('Run instance is required for initialization')
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from ..context import StepResult, UploadContext
|
|
4
|
+
from ..enums import LogCode
|
|
5
|
+
from ..exceptions import ExcelParsingError, ExcelSecurityError
|
|
6
|
+
from .base import BaseStep
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ProcessMetadataStep(BaseStep):
|
|
10
|
+
"""Process metadata from Excel files or other sources."""
|
|
11
|
+
|
|
12
|
+
@property
|
|
13
|
+
def name(self) -> str:
|
|
14
|
+
return 'process_metadata'
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def progress_weight(self) -> float:
|
|
18
|
+
return 0.10
|
|
19
|
+
|
|
20
|
+
def execute(self, context: UploadContext) -> StepResult:
|
|
21
|
+
"""Execute metadata processing step."""
|
|
22
|
+
metadata_strategy = context.strategies.get('metadata')
|
|
23
|
+
if not metadata_strategy:
|
|
24
|
+
context.run.log_message('No metadata strategy configured - skipping metadata processing')
|
|
25
|
+
return self.create_success_result(data={'metadata': {}})
|
|
26
|
+
|
|
27
|
+
excel_metadata = {}
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
# Check if Excel metadata path is specified
|
|
31
|
+
excel_metadata_path = context.get_param('excel_metadata_path')
|
|
32
|
+
if excel_metadata_path:
|
|
33
|
+
# Convert string to Path object
|
|
34
|
+
if isinstance(excel_metadata_path, str):
|
|
35
|
+
excel_metadata_path = Path(excel_metadata_path)
|
|
36
|
+
|
|
37
|
+
if excel_metadata_path.exists() and excel_metadata_path.is_file():
|
|
38
|
+
excel_path = excel_metadata_path
|
|
39
|
+
else:
|
|
40
|
+
excel_path = context.pathlib_cwd / excel_metadata_path
|
|
41
|
+
if not excel_path.exists():
|
|
42
|
+
context.run.log_message_with_code(LogCode.EXCEL_FILE_NOT_FOUND_PATH)
|
|
43
|
+
return self.create_success_result(data={'metadata': {}})
|
|
44
|
+
excel_metadata = metadata_strategy.extract(excel_path)
|
|
45
|
+
else:
|
|
46
|
+
# Look for default metadata files (meta.xlsx, meta.xls)
|
|
47
|
+
excel_path = self._find_excel_metadata_file(context.pathlib_cwd)
|
|
48
|
+
if excel_path:
|
|
49
|
+
excel_metadata = metadata_strategy.extract(excel_path)
|
|
50
|
+
|
|
51
|
+
# Validate extracted metadata
|
|
52
|
+
if excel_metadata:
|
|
53
|
+
validation_result = metadata_strategy.validate(excel_metadata)
|
|
54
|
+
if not validation_result.valid:
|
|
55
|
+
error_msg = f'Metadata validation failed: {", ".join(validation_result.errors)}'
|
|
56
|
+
return self.create_error_result(error_msg)
|
|
57
|
+
context.run.log_message_with_code(LogCode.EXCEL_METADATA_LOADED, len(excel_metadata))
|
|
58
|
+
|
|
59
|
+
return self.create_success_result(
|
|
60
|
+
data={'metadata': excel_metadata}, rollback_data={'metadata_processed': len(excel_metadata) > 0}
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
except ExcelSecurityError as e:
|
|
64
|
+
context.run.log_message_with_code(LogCode.EXCEL_SECURITY_VIOLATION, str(e))
|
|
65
|
+
return self.create_error_result(f'Excel security violation: {str(e)}')
|
|
66
|
+
|
|
67
|
+
except ExcelParsingError as e:
|
|
68
|
+
# If excel_metadata_path was specified, this is an error
|
|
69
|
+
# If we were just looking for default files, it's not an error
|
|
70
|
+
if context.get_param('excel_metadata_path'):
|
|
71
|
+
context.run.log_message_with_code(LogCode.EXCEL_PARSING_ERROR, str(e))
|
|
72
|
+
return self.create_error_result(f'Excel parsing error: {str(e)}')
|
|
73
|
+
else:
|
|
74
|
+
context.run.log_message_with_code(LogCode.EXCEL_PARSING_ERROR, str(e))
|
|
75
|
+
return self.create_success_result(data={'metadata': {}})
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
return self.create_error_result(f'Unexpected error processing metadata: {str(e)}')
|
|
79
|
+
|
|
80
|
+
def can_skip(self, context: UploadContext) -> bool:
|
|
81
|
+
"""Metadata step can be skipped if no metadata strategy is configured."""
|
|
82
|
+
return 'metadata' not in context.strategies
|
|
83
|
+
|
|
84
|
+
def rollback(self, context: UploadContext) -> None:
|
|
85
|
+
"""Rollback metadata processing."""
|
|
86
|
+
# Clear any loaded metadata
|
|
87
|
+
context.metadata.clear()
|
|
88
|
+
|
|
89
|
+
def _find_excel_metadata_file(self, pathlib_cwd: Path) -> Path:
|
|
90
|
+
"""Find default Excel metadata file."""
|
|
91
|
+
# Check .xlsx first as it's more common
|
|
92
|
+
excel_path = pathlib_cwd / 'meta.xlsx'
|
|
93
|
+
if excel_path.exists():
|
|
94
|
+
return excel_path
|
|
95
|
+
|
|
96
|
+
# Fallback to .xls
|
|
97
|
+
excel_path = pathlib_cwd / 'meta.xls'
|
|
98
|
+
if excel_path.exists():
|
|
99
|
+
return excel_path
|
|
100
|
+
|
|
101
|
+
return None
|