synapse-sdk 2025.9.1__py3-none-any.whl → 2025.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/devtools/docs/docs/api/clients/annotation-mixin.md +378 -0
- synapse_sdk/devtools/docs/docs/api/clients/backend.md +368 -1
- synapse_sdk/devtools/docs/docs/api/clients/core-mixin.md +477 -0
- synapse_sdk/devtools/docs/docs/api/clients/data-collection-mixin.md +422 -0
- synapse_sdk/devtools/docs/docs/api/clients/hitl-mixin.md +554 -0
- synapse_sdk/devtools/docs/docs/api/clients/index.md +391 -0
- synapse_sdk/devtools/docs/docs/api/clients/integration-mixin.md +571 -0
- synapse_sdk/devtools/docs/docs/api/clients/ml-mixin.md +578 -0
- synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +1463 -0
- synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +161 -34
- synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +1497 -213
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/annotation-mixin.md +289 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/backend.md +378 -11
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/core-mixin.md +417 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/data-collection-mixin.md +356 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/hitl-mixin.md +192 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/index.md +391 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/integration-mixin.md +479 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ml-mixin.md +284 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +1463 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +161 -34
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +1752 -572
- synapse_sdk/devtools/docs/sidebars.ts +7 -0
- synapse_sdk/plugins/README.md +1 -2
- synapse_sdk/plugins/categories/base.py +23 -0
- synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
- synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
- synapse_sdk/plugins/categories/export/actions/export/action.py +160 -0
- synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
- synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
- synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
- synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
- synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
- synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +1 -1
- synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +1 -2
- synapse_sdk/plugins/categories/upload/actions/upload/action.py +154 -531
- synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
- synapse_sdk/plugins/categories/upload/actions/upload/factory.py +143 -0
- synapse_sdk/plugins/categories/upload/actions/upload/models.py +66 -29
- synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +182 -0
- synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +106 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +62 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +80 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +66 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +101 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +89 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +96 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +61 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +86 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +34 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +233 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +238 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/async_upload.py +109 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +43 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +45 -0
- synapse_sdk/plugins/categories/upload/actions/upload/utils.py +194 -83
- synapse_sdk/plugins/categories/upload/templates/config.yaml +4 -0
- synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +269 -0
- synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +71 -27
- synapse_sdk/plugins/models.py +5 -0
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.3.dist-info}/METADATA +2 -1
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.3.dist-info}/RECORD +78 -27
- synapse_sdk/plugins/categories/export/actions/export.py +0 -385
- synapse_sdk/plugins/categories/export/enums.py +0 -7
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.3.dist-info}/WHEEL +0 -0
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.3.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.3.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
from .run import UploadRun
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StepResult:
|
|
9
|
+
"""Result of a workflow step execution."""
|
|
10
|
+
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
success: bool = True,
|
|
14
|
+
data: Dict[str, Any] = None,
|
|
15
|
+
error: str = None,
|
|
16
|
+
rollback_data: Dict[str, Any] = None,
|
|
17
|
+
skipped: bool = False,
|
|
18
|
+
original_exception: Optional[Exception] = None,
|
|
19
|
+
):
|
|
20
|
+
self.success = success
|
|
21
|
+
self.data = data or {}
|
|
22
|
+
self.error = error
|
|
23
|
+
self.rollback_data = rollback_data or {}
|
|
24
|
+
self.skipped = skipped
|
|
25
|
+
self.original_exception = original_exception
|
|
26
|
+
self.timestamp = datetime.now()
|
|
27
|
+
|
|
28
|
+
def __bool__(self):
|
|
29
|
+
return self.success
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class UploadContext:
|
|
33
|
+
"""Shared context for all upload workflow steps."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, params: Dict, run: UploadRun, client: Any):
|
|
36
|
+
self.params = params
|
|
37
|
+
self.run = run
|
|
38
|
+
self.client = client
|
|
39
|
+
|
|
40
|
+
# Core state
|
|
41
|
+
self.storage = None
|
|
42
|
+
self.pathlib_cwd = None
|
|
43
|
+
self.metadata: Dict[str, Dict[str, Any]] = {}
|
|
44
|
+
self.file_specifications: Dict[str, Any] = {}
|
|
45
|
+
self.organized_files: List[Dict[str, Any]] = []
|
|
46
|
+
self.uploaded_files: List[Dict[str, Any]] = []
|
|
47
|
+
self.data_units: List[Dict[str, Any]] = []
|
|
48
|
+
|
|
49
|
+
# Progress and metrics
|
|
50
|
+
self.metrics: Dict[str, Any] = {}
|
|
51
|
+
self.errors: List[str] = []
|
|
52
|
+
self.step_results: List[StepResult] = []
|
|
53
|
+
|
|
54
|
+
# Strategies (injected by orchestrator)
|
|
55
|
+
self.strategies: Dict[str, Any] = {}
|
|
56
|
+
|
|
57
|
+
# Rollback information
|
|
58
|
+
self.rollback_data: Dict[str, Any] = {}
|
|
59
|
+
|
|
60
|
+
def update(self, result: StepResult) -> None:
|
|
61
|
+
"""Update context with step results."""
|
|
62
|
+
self.step_results.append(result)
|
|
63
|
+
|
|
64
|
+
if result.success:
|
|
65
|
+
# Update context state with step data
|
|
66
|
+
for key, value in result.data.items():
|
|
67
|
+
if hasattr(self, key):
|
|
68
|
+
setattr(self, key, value)
|
|
69
|
+
else:
|
|
70
|
+
# Store in a general data dictionary
|
|
71
|
+
if not hasattr(self, 'step_data'):
|
|
72
|
+
self.step_data = {}
|
|
73
|
+
self.step_data[key] = value
|
|
74
|
+
|
|
75
|
+
# Store rollback data
|
|
76
|
+
if result.rollback_data:
|
|
77
|
+
self.rollback_data.update(result.rollback_data)
|
|
78
|
+
else:
|
|
79
|
+
# Record error
|
|
80
|
+
if result.error:
|
|
81
|
+
self.errors.append(result.error)
|
|
82
|
+
|
|
83
|
+
def get_result(self) -> Dict[str, Any]:
|
|
84
|
+
"""Get final result dictionary."""
|
|
85
|
+
return {
|
|
86
|
+
'uploaded_files_count': len(self.uploaded_files),
|
|
87
|
+
'generated_data_units_count': len(self.data_units),
|
|
88
|
+
'success': len(self.errors) == 0,
|
|
89
|
+
'errors': self.errors,
|
|
90
|
+
'metrics': self.metrics,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
def has_errors(self) -> bool:
|
|
94
|
+
"""Check if context has any errors."""
|
|
95
|
+
return len(self.errors) > 0
|
|
96
|
+
|
|
97
|
+
def get_last_step_result(self) -> Optional[StepResult]:
|
|
98
|
+
"""Get the result of the last executed step."""
|
|
99
|
+
return self.step_results[-1] if self.step_results else None
|
|
100
|
+
|
|
101
|
+
def get_step_result_by_name(self, step_name: str) -> Optional[StepResult]:
|
|
102
|
+
"""Get step result by step name (stored in rollback_data)."""
|
|
103
|
+
for result in self.step_results:
|
|
104
|
+
if result.rollback_data.get('step_name') == step_name:
|
|
105
|
+
return result
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
def clear_errors(self) -> None:
|
|
109
|
+
"""Clear all errors (useful for retry scenarios)."""
|
|
110
|
+
self.errors.clear()
|
|
111
|
+
|
|
112
|
+
def add_error(self, error: str) -> None:
|
|
113
|
+
"""Add an error to the context."""
|
|
114
|
+
self.errors.append(error)
|
|
115
|
+
|
|
116
|
+
def get_param(self, key: str, default: Any = None) -> Any:
|
|
117
|
+
"""Get parameter value with default."""
|
|
118
|
+
return self.params.get(key, default)
|
|
119
|
+
|
|
120
|
+
def set_storage(self, storage: Any) -> None:
|
|
121
|
+
"""Set storage object."""
|
|
122
|
+
self.storage = storage
|
|
123
|
+
|
|
124
|
+
def set_pathlib_cwd(self, path: Path) -> None:
|
|
125
|
+
"""Set current working directory path."""
|
|
126
|
+
self.pathlib_cwd = path
|
|
127
|
+
|
|
128
|
+
def set_file_specifications(self, specs: Dict[str, Any]) -> None:
|
|
129
|
+
"""Set file specifications."""
|
|
130
|
+
self.file_specifications = specs
|
|
131
|
+
|
|
132
|
+
def add_organized_files(self, files: List[Dict[str, Any]]) -> None:
|
|
133
|
+
"""Add organized files to context."""
|
|
134
|
+
self.organized_files.extend(files)
|
|
135
|
+
|
|
136
|
+
def add_uploaded_files(self, files: List[Dict[str, Any]]) -> None:
|
|
137
|
+
"""Add uploaded files to context."""
|
|
138
|
+
self.uploaded_files.extend(files)
|
|
139
|
+
|
|
140
|
+
def add_data_units(self, units: List[Dict[str, Any]]) -> None:
|
|
141
|
+
"""Add data units to context."""
|
|
142
|
+
self.data_units.extend(units)
|
|
143
|
+
|
|
144
|
+
def update_metrics(self, category: str, metrics: Dict[str, Any]) -> None:
|
|
145
|
+
"""Update metrics for a specific category."""
|
|
146
|
+
if category not in self.metrics:
|
|
147
|
+
self.metrics[category] = {}
|
|
148
|
+
self.metrics[category].update(metrics)
|
|
149
|
+
|
|
150
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
151
|
+
"""Get value from context by key."""
|
|
152
|
+
# First check direct attributes
|
|
153
|
+
if hasattr(self, key):
|
|
154
|
+
return getattr(self, key)
|
|
155
|
+
|
|
156
|
+
# Then check step_data if it exists
|
|
157
|
+
if hasattr(self, 'step_data') and key in self.step_data:
|
|
158
|
+
return self.step_data[key]
|
|
159
|
+
|
|
160
|
+
# Special mappings for expected keys
|
|
161
|
+
if key == 'file_specification_template':
|
|
162
|
+
return self.file_specifications
|
|
163
|
+
elif key == 'pathlib_cwd':
|
|
164
|
+
return self.pathlib_cwd
|
|
165
|
+
elif key == 'organized_files':
|
|
166
|
+
return self.organized_files
|
|
167
|
+
|
|
168
|
+
return default
|
|
169
|
+
|
|
170
|
+
def set(self, key: str, value: Any) -> None:
|
|
171
|
+
"""Set value in context by key."""
|
|
172
|
+
# Special mappings for expected keys
|
|
173
|
+
if key == 'file_specification_template':
|
|
174
|
+
self.file_specifications = value
|
|
175
|
+
elif key == 'pathlib_cwd':
|
|
176
|
+
self.pathlib_cwd = value
|
|
177
|
+
elif key == 'organized_files':
|
|
178
|
+
self.organized_files = value
|
|
179
|
+
elif hasattr(self, key):
|
|
180
|
+
setattr(self, key, value)
|
|
181
|
+
else:
|
|
182
|
+
# Store in step_data
|
|
183
|
+
if not hasattr(self, 'step_data'):
|
|
184
|
+
self.step_data = {}
|
|
185
|
+
self.step_data[key] = value
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
|
|
3
|
+
from .strategies.base import (
|
|
4
|
+
DataUnitStrategy,
|
|
5
|
+
FileDiscoveryStrategy,
|
|
6
|
+
MetadataStrategy,
|
|
7
|
+
UploadStrategy,
|
|
8
|
+
ValidationStrategy,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class StrategyFactory:
|
|
13
|
+
"""Factory for creating strategy instances based on configuration."""
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self._validation_strategies = {}
|
|
17
|
+
self._file_discovery_strategies = {}
|
|
18
|
+
self._metadata_strategies = {}
|
|
19
|
+
self._upload_strategies = {}
|
|
20
|
+
self._data_unit_strategies = {}
|
|
21
|
+
|
|
22
|
+
def register_validation_strategy(self, name: str, strategy_class: type) -> None:
|
|
23
|
+
"""Register a validation strategy class."""
|
|
24
|
+
self._validation_strategies[name] = strategy_class
|
|
25
|
+
|
|
26
|
+
def register_file_discovery_strategy(self, name: str, strategy_class: type) -> None:
|
|
27
|
+
"""Register a file discovery strategy class."""
|
|
28
|
+
self._file_discovery_strategies[name] = strategy_class
|
|
29
|
+
|
|
30
|
+
def register_metadata_strategy(self, name: str, strategy_class: type) -> None:
|
|
31
|
+
"""Register a metadata strategy class."""
|
|
32
|
+
self._metadata_strategies[name] = strategy_class
|
|
33
|
+
|
|
34
|
+
def register_upload_strategy(self, name: str, strategy_class: type) -> None:
|
|
35
|
+
"""Register an upload strategy class."""
|
|
36
|
+
self._upload_strategies[name] = strategy_class
|
|
37
|
+
|
|
38
|
+
def register_data_unit_strategy(self, name: str, strategy_class: type) -> None:
|
|
39
|
+
"""Register a data unit strategy class."""
|
|
40
|
+
self._data_unit_strategies[name] = strategy_class
|
|
41
|
+
|
|
42
|
+
def create_validation_strategy(self, params: Dict[str, Any], context=None) -> ValidationStrategy:
|
|
43
|
+
"""Create validation strategy based on parameters."""
|
|
44
|
+
strategy_name = params.get('validation_strategy', 'default')
|
|
45
|
+
|
|
46
|
+
if strategy_name not in self._validation_strategies:
|
|
47
|
+
# Import default strategy if not registered
|
|
48
|
+
from .strategies.validation.default import DefaultValidationStrategy
|
|
49
|
+
|
|
50
|
+
self.register_validation_strategy('default', DefaultValidationStrategy)
|
|
51
|
+
strategy_name = 'default'
|
|
52
|
+
|
|
53
|
+
strategy_class = self._validation_strategies[strategy_name]
|
|
54
|
+
return strategy_class()
|
|
55
|
+
|
|
56
|
+
def create_file_discovery_strategy(self, params: Dict[str, Any], context=None) -> FileDiscoveryStrategy:
|
|
57
|
+
"""Create file discovery strategy based on parameters."""
|
|
58
|
+
is_recursive = params.get('is_recursive', True)
|
|
59
|
+
strategy_name = 'recursive' if is_recursive else 'flat'
|
|
60
|
+
|
|
61
|
+
if strategy_name not in self._file_discovery_strategies:
|
|
62
|
+
# Import default strategies if not registered
|
|
63
|
+
if strategy_name == 'recursive':
|
|
64
|
+
from .strategies.file_discovery.recursive import RecursiveFileDiscoveryStrategy
|
|
65
|
+
|
|
66
|
+
self.register_file_discovery_strategy('recursive', RecursiveFileDiscoveryStrategy)
|
|
67
|
+
else:
|
|
68
|
+
from .strategies.file_discovery.flat import FlatFileDiscoveryStrategy
|
|
69
|
+
|
|
70
|
+
self.register_file_discovery_strategy('flat', FlatFileDiscoveryStrategy)
|
|
71
|
+
|
|
72
|
+
strategy_class = self._file_discovery_strategies[strategy_name]
|
|
73
|
+
return strategy_class()
|
|
74
|
+
|
|
75
|
+
def create_metadata_strategy(self, params: Dict[str, Any], context=None) -> MetadataStrategy:
|
|
76
|
+
"""Create metadata strategy based on parameters."""
|
|
77
|
+
# Always use Excel strategy for metadata processing
|
|
78
|
+
# It will handle both specified paths and default meta.xlsx/meta.xls files
|
|
79
|
+
strategy_name = 'excel'
|
|
80
|
+
|
|
81
|
+
if strategy_name not in self._metadata_strategies:
|
|
82
|
+
from .strategies.metadata.excel import ExcelMetadataStrategy
|
|
83
|
+
|
|
84
|
+
self.register_metadata_strategy('excel', ExcelMetadataStrategy)
|
|
85
|
+
|
|
86
|
+
strategy_class = self._metadata_strategies[strategy_name]
|
|
87
|
+
return strategy_class()
|
|
88
|
+
|
|
89
|
+
def create_upload_strategy(self, params: Dict[str, Any], context=None) -> UploadStrategy:
|
|
90
|
+
"""Create upload strategy based on parameters."""
|
|
91
|
+
if context is None:
|
|
92
|
+
raise ValueError('Upload strategies require context parameter')
|
|
93
|
+
|
|
94
|
+
use_async = params.get('use_async_upload', True)
|
|
95
|
+
strategy_name = 'async' if use_async else 'sync'
|
|
96
|
+
|
|
97
|
+
if strategy_name not in self._upload_strategies:
|
|
98
|
+
# Import default strategies if not registered
|
|
99
|
+
if strategy_name == 'async':
|
|
100
|
+
from .strategies.upload.async_upload import AsyncUploadStrategy
|
|
101
|
+
|
|
102
|
+
self.register_upload_strategy('async', AsyncUploadStrategy)
|
|
103
|
+
else:
|
|
104
|
+
from .strategies.upload.sync import SyncUploadStrategy
|
|
105
|
+
|
|
106
|
+
self.register_upload_strategy('sync', SyncUploadStrategy)
|
|
107
|
+
|
|
108
|
+
strategy_class = self._upload_strategies[strategy_name]
|
|
109
|
+
# Upload strategies always need context for client access
|
|
110
|
+
return strategy_class(context)
|
|
111
|
+
|
|
112
|
+
def create_data_unit_strategy(self, params: Dict[str, Any], context=None) -> DataUnitStrategy:
|
|
113
|
+
"""Create data unit strategy based on parameters."""
|
|
114
|
+
if context is None:
|
|
115
|
+
raise ValueError('Data unit strategies require context parameter')
|
|
116
|
+
|
|
117
|
+
batch_size = params.get('creating_data_unit_batch_size', 1)
|
|
118
|
+
strategy_name = 'batch' if batch_size > 1 else 'single'
|
|
119
|
+
|
|
120
|
+
if strategy_name not in self._data_unit_strategies:
|
|
121
|
+
# Import default strategies if not registered
|
|
122
|
+
if strategy_name == 'batch':
|
|
123
|
+
from .strategies.data_unit.batch import BatchDataUnitStrategy
|
|
124
|
+
|
|
125
|
+
self.register_data_unit_strategy('batch', BatchDataUnitStrategy)
|
|
126
|
+
else:
|
|
127
|
+
from .strategies.data_unit.single import SingleDataUnitStrategy
|
|
128
|
+
|
|
129
|
+
self.register_data_unit_strategy('single', SingleDataUnitStrategy)
|
|
130
|
+
|
|
131
|
+
strategy_class = self._data_unit_strategies[strategy_name]
|
|
132
|
+
# Data unit strategies always need context for client access
|
|
133
|
+
return strategy_class(context)
|
|
134
|
+
|
|
135
|
+
def get_available_strategies(self) -> Dict[str, list]:
|
|
136
|
+
"""Get all available strategy types and their registered names."""
|
|
137
|
+
return {
|
|
138
|
+
'validation': list(self._validation_strategies.keys()),
|
|
139
|
+
'file_discovery': list(self._file_discovery_strategies.keys()),
|
|
140
|
+
'metadata': list(self._metadata_strategies.keys()),
|
|
141
|
+
'upload': list(self._upload_strategies.keys()),
|
|
142
|
+
'data_unit': list(self._data_unit_strategies.keys()),
|
|
143
|
+
}
|
|
@@ -6,6 +6,7 @@ from pydantic_core import PydanticCustomError
|
|
|
6
6
|
|
|
7
7
|
from synapse_sdk.clients.exceptions import ClientError
|
|
8
8
|
from synapse_sdk.utils.pydantic.validators import non_blank
|
|
9
|
+
from synapse_sdk.utils.storage import get_pathlib
|
|
9
10
|
|
|
10
11
|
from .utils import ExcelSecurityConfig
|
|
11
12
|
|
|
@@ -106,44 +107,80 @@ class UploadParams(BaseModel):
|
|
|
106
107
|
raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
|
|
107
108
|
return value
|
|
108
109
|
|
|
109
|
-
@field_validator('excel_metadata_path', mode='
|
|
110
|
+
@field_validator('excel_metadata_path', mode='after')
|
|
110
111
|
@classmethod
|
|
111
112
|
def check_excel_metadata_path(cls, value, info: ValidationInfo) -> str | None:
|
|
112
113
|
if not value:
|
|
113
114
|
return value
|
|
114
115
|
|
|
115
|
-
|
|
116
|
+
# Validate file extension
|
|
117
|
+
if not value.lower().endswith(('.xlsx', '.xls')):
|
|
118
|
+
raise PydanticCustomError('invalid_file_type', 'Excel metadata file must be .xlsx or .xls format.')
|
|
116
119
|
|
|
117
|
-
|
|
118
|
-
|
|
120
|
+
# Get storage and path from validation data
|
|
121
|
+
if not (hasattr(info, 'data') and 'storage' in info.data and 'path' in info.data):
|
|
122
|
+
# If we don't have storage/path data yet, just validate extension
|
|
123
|
+
return value
|
|
119
124
|
|
|
120
|
-
if
|
|
121
|
-
raise PydanticCustomError('
|
|
125
|
+
if info.context is None:
|
|
126
|
+
raise PydanticCustomError('missing_context', 'Validation context is required.')
|
|
122
127
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
if file_size > excel_config.MAX_FILE_SIZE_BYTES:
|
|
126
|
-
max_size_mb = excel_config.MAX_FILE_SIZE_MB
|
|
127
|
-
raise PydanticCustomError(
|
|
128
|
-
'file_too_large',
|
|
129
|
-
'Excel metadata file is too large. Maximum size is {max_size_mb}MB.',
|
|
130
|
-
{'max_size_mb': max_size_mb},
|
|
131
|
-
)
|
|
128
|
+
action = info.context['action']
|
|
129
|
+
client = action.client
|
|
132
130
|
|
|
133
131
|
try:
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
132
|
+
# Get storage configuration
|
|
133
|
+
storage_id = info.data['storage']
|
|
134
|
+
storage = client.get_storage(storage_id)
|
|
135
|
+
|
|
136
|
+
# Skip file system validation if storage doesn't have provider (likely test environment)
|
|
137
|
+
if not isinstance(storage, dict) or 'provider' not in storage:
|
|
138
|
+
# Basic validation only - likely in test environment
|
|
139
|
+
return value
|
|
140
|
+
|
|
141
|
+
# Get the actual file system path using storage + path
|
|
142
|
+
base_path = get_pathlib(storage, info.data['path'])
|
|
143
|
+
|
|
144
|
+
# Support both absolute and relative paths
|
|
145
|
+
if Path(value).is_absolute():
|
|
146
|
+
excel_path = Path(value)
|
|
147
|
+
else:
|
|
148
|
+
excel_path = base_path / value
|
|
149
|
+
|
|
150
|
+
if not excel_path.exists():
|
|
151
|
+
raise PydanticCustomError('file_not_found', 'Excel metadata file not found.')
|
|
152
|
+
|
|
153
|
+
# Validate file size
|
|
154
|
+
file_size = excel_path.stat().st_size
|
|
155
|
+
excel_config = ExcelSecurityConfig()
|
|
156
|
+
if file_size > excel_config.MAX_FILE_SIZE_BYTES:
|
|
157
|
+
max_size_mb = excel_config.MAX_FILE_SIZE_MB
|
|
158
|
+
raise PydanticCustomError(
|
|
159
|
+
'file_too_large',
|
|
160
|
+
'Excel metadata file is too large. Maximum size is {max_size_mb}MB.',
|
|
161
|
+
{'max_size_mb': max_size_mb},
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Validate file format
|
|
165
|
+
try:
|
|
166
|
+
with open(excel_path, 'rb') as f:
|
|
167
|
+
header = f.read(8)
|
|
168
|
+
if not header:
|
|
169
|
+
raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be empty.')
|
|
170
|
+
|
|
171
|
+
if excel_path.suffix.lower() == '.xlsx':
|
|
172
|
+
if not header.startswith(b'PK'):
|
|
173
|
+
raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
|
|
174
|
+
elif excel_path.suffix.lower() == '.xls':
|
|
175
|
+
if not (header.startswith(b'\xd0\xcf\x11\xe0') or header.startswith(b'\x09\x08')):
|
|
176
|
+
raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
|
|
177
|
+
|
|
178
|
+
except (OSError, IOError):
|
|
179
|
+
raise PydanticCustomError('file_access_error', 'Cannot access Excel metadata file.')
|
|
180
|
+
|
|
181
|
+
except ClientError:
|
|
182
|
+
raise PydanticCustomError('client_error', 'Error occurred while checking storage.')
|
|
183
|
+
except Exception as e:
|
|
184
|
+
raise PydanticCustomError('validation_error', f'Error validating Excel metadata file: {str(e)}')
|
|
148
185
|
|
|
149
186
|
return value
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
import traceback
|
|
2
|
+
from typing import Any, Dict, List
|
|
3
|
+
|
|
4
|
+
from .context import UploadContext
|
|
5
|
+
from .registry import StepRegistry
|
|
6
|
+
from .steps.base import BaseStep
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class UploadOrchestrator:
|
|
10
|
+
"""Facade that orchestrates the upload workflow using strategies and steps."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, context: UploadContext, step_registry: StepRegistry, strategies: Dict[str, Any]):
|
|
13
|
+
self.context = context
|
|
14
|
+
self.step_registry = step_registry
|
|
15
|
+
self.strategies = strategies
|
|
16
|
+
self.executed_steps: List[BaseStep] = []
|
|
17
|
+
self.current_step_index = 0
|
|
18
|
+
|
|
19
|
+
def execute(self) -> Dict[str, Any]:
|
|
20
|
+
"""Execute the complete upload workflow."""
|
|
21
|
+
try:
|
|
22
|
+
self._log_workflow_start()
|
|
23
|
+
self._inject_strategies_into_context()
|
|
24
|
+
|
|
25
|
+
steps = self.step_registry.get_steps()
|
|
26
|
+
total_steps = len(steps)
|
|
27
|
+
|
|
28
|
+
for i, step in enumerate(steps):
|
|
29
|
+
self.current_step_index = i
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
result = step.safe_execute(self.context)
|
|
33
|
+
self.context.update(result)
|
|
34
|
+
|
|
35
|
+
if result.success:
|
|
36
|
+
if not result.skipped:
|
|
37
|
+
self.executed_steps.append(step)
|
|
38
|
+
self._update_progress(i + 1, total_steps)
|
|
39
|
+
else:
|
|
40
|
+
# Step failed, initiate rollback
|
|
41
|
+
self._log_step_failure(step, result.error)
|
|
42
|
+
self._rollback()
|
|
43
|
+
# Re-raise original exception if available, otherwise create new one
|
|
44
|
+
if result.original_exception:
|
|
45
|
+
raise result.original_exception
|
|
46
|
+
else:
|
|
47
|
+
raise Exception(f"Step '{step.name}' failed: {result.error}")
|
|
48
|
+
|
|
49
|
+
except Exception as e:
|
|
50
|
+
self._log_step_exception(step, str(e))
|
|
51
|
+
self._rollback()
|
|
52
|
+
raise
|
|
53
|
+
|
|
54
|
+
self._log_workflow_complete()
|
|
55
|
+
return self.context.get_result()
|
|
56
|
+
|
|
57
|
+
except Exception as e:
|
|
58
|
+
self._log_workflow_error(str(e))
|
|
59
|
+
# Ensure rollback is called if not already done
|
|
60
|
+
if not hasattr(self, '_rollback_executed'):
|
|
61
|
+
self._rollback()
|
|
62
|
+
raise
|
|
63
|
+
|
|
64
|
+
def _inject_strategies_into_context(self) -> None:
|
|
65
|
+
"""Inject strategies into context for steps to use."""
|
|
66
|
+
if not hasattr(self.context, 'strategies'):
|
|
67
|
+
self.context.strategies = {}
|
|
68
|
+
self.context.strategies.update(self.strategies)
|
|
69
|
+
|
|
70
|
+
def _rollback(self) -> None:
|
|
71
|
+
"""Rollback executed steps in reverse order."""
|
|
72
|
+
if hasattr(self, '_rollback_executed'):
|
|
73
|
+
return # Prevent multiple rollbacks
|
|
74
|
+
|
|
75
|
+
self._rollback_executed = True
|
|
76
|
+
self._log_rollback_start()
|
|
77
|
+
|
|
78
|
+
# Rollback in reverse order
|
|
79
|
+
for step in reversed(self.executed_steps):
|
|
80
|
+
try:
|
|
81
|
+
self._log_step_rollback(step)
|
|
82
|
+
step.rollback(self.context)
|
|
83
|
+
except Exception as e:
|
|
84
|
+
# Log rollback error but continue with other steps
|
|
85
|
+
self._log_rollback_error(step, str(e))
|
|
86
|
+
|
|
87
|
+
self._log_rollback_complete()
|
|
88
|
+
|
|
89
|
+
def _update_progress(self, current_step: int, total_steps: int) -> None:
|
|
90
|
+
"""Update overall progress based on step completion."""
|
|
91
|
+
if total_steps == 0:
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
# Calculate progress based on step weights
|
|
95
|
+
completed_weight = 0.0
|
|
96
|
+
total_weight = self.step_registry.get_total_progress_weight()
|
|
97
|
+
|
|
98
|
+
for i, step in enumerate(self.executed_steps):
|
|
99
|
+
completed_weight += step.progress_weight
|
|
100
|
+
|
|
101
|
+
progress_percentage = (completed_weight / total_weight) * 100 if total_weight > 0 else 0
|
|
102
|
+
|
|
103
|
+
# Update context with progress information
|
|
104
|
+
self.context.update_metrics(
|
|
105
|
+
'workflow',
|
|
106
|
+
{
|
|
107
|
+
'current_step': current_step,
|
|
108
|
+
'total_steps': total_steps,
|
|
109
|
+
'progress_percentage': progress_percentage,
|
|
110
|
+
'completed_weight': completed_weight,
|
|
111
|
+
'total_weight': total_weight,
|
|
112
|
+
},
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def _log_workflow_start(self) -> None:
|
|
116
|
+
"""Log workflow start."""
|
|
117
|
+
steps = self.step_registry.get_steps()
|
|
118
|
+
step_names = [step.name for step in steps]
|
|
119
|
+
self.context.run.log_message(f'Starting upload workflow with {len(steps)} steps: {step_names}')
|
|
120
|
+
|
|
121
|
+
def _log_workflow_complete(self) -> None:
|
|
122
|
+
"""Log workflow completion."""
|
|
123
|
+
self.context.run.log_message('Upload workflow completed successfully')
|
|
124
|
+
|
|
125
|
+
def _log_workflow_error(self, error: str) -> None:
|
|
126
|
+
"""Log workflow error."""
|
|
127
|
+
self.context.run.log_message(f'Upload workflow failed: {error}')
|
|
128
|
+
|
|
129
|
+
def _log_step_failure(self, step: BaseStep, error: str) -> None:
|
|
130
|
+
"""Log step failure."""
|
|
131
|
+
self.context.run.log_message(f"Step '{step.name}' failed: {error}")
|
|
132
|
+
|
|
133
|
+
def _log_step_exception(self, step: BaseStep, error: str) -> None:
|
|
134
|
+
"""Log step exception."""
|
|
135
|
+
self.context.run.log_message(f"Exception in step '{step.name}': {error}")
|
|
136
|
+
# Log full traceback for debugging
|
|
137
|
+
self.context.run.log_message(f'Traceback: {traceback.format_exc()}')
|
|
138
|
+
|
|
139
|
+
def _log_rollback_start(self) -> None:
|
|
140
|
+
"""Log rollback start."""
|
|
141
|
+
self.context.run.log_message(f'Starting rollback of {len(self.executed_steps)} executed steps')
|
|
142
|
+
|
|
143
|
+
def _log_rollback_complete(self) -> None:
|
|
144
|
+
"""Log rollback completion."""
|
|
145
|
+
self.context.run.log_message('Rollback completed')
|
|
146
|
+
|
|
147
|
+
def _log_step_rollback(self, step: BaseStep) -> None:
|
|
148
|
+
"""Log step rollback."""
|
|
149
|
+
self.context.run.log_message(f'Rolling back step: {step.name}')
|
|
150
|
+
|
|
151
|
+
def _log_rollback_error(self, step: BaseStep, error: str) -> None:
|
|
152
|
+
"""Log rollback error."""
|
|
153
|
+
self.context.run.log_message(f"Error rolling back step '{step.name}': {error}")
|
|
154
|
+
|
|
155
|
+
def get_executed_steps(self) -> List[BaseStep]:
|
|
156
|
+
"""Get list of successfully executed steps."""
|
|
157
|
+
return self.executed_steps.copy()
|
|
158
|
+
|
|
159
|
+
def get_current_step_index(self) -> int:
|
|
160
|
+
"""Get current step index."""
|
|
161
|
+
return self.current_step_index
|
|
162
|
+
|
|
163
|
+
def get_total_steps(self) -> int:
|
|
164
|
+
"""Get total number of steps."""
|
|
165
|
+
return len(self.step_registry.get_steps())
|
|
166
|
+
|
|
167
|
+
def is_rollback_executed(self) -> bool:
|
|
168
|
+
"""Check if rollback has been executed."""
|
|
169
|
+
return hasattr(self, '_rollback_executed')
|
|
170
|
+
|
|
171
|
+
def get_workflow_summary(self) -> Dict[str, Any]:
|
|
172
|
+
"""Get workflow execution summary."""
|
|
173
|
+
steps = self.step_registry.get_steps()
|
|
174
|
+
return {
|
|
175
|
+
'total_steps': len(steps),
|
|
176
|
+
'executed_steps': len(self.executed_steps),
|
|
177
|
+
'current_step_index': self.current_step_index,
|
|
178
|
+
'step_names': [step.name for step in steps],
|
|
179
|
+
'executed_step_names': [step.name for step in self.executed_steps],
|
|
180
|
+
'rollback_executed': self.is_rollback_executed(),
|
|
181
|
+
'strategies': list(self.strategies.keys()) if self.strategies else [],
|
|
182
|
+
}
|