synapse-sdk 1.0.0b24__py3-none-any.whl → 2025.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

Files changed (83) hide show
  1. synapse_sdk/clients/agent/ray.py +50 -0
  2. synapse_sdk/devtools/docs/docs/api/clients/annotation-mixin.md +378 -0
  3. synapse_sdk/devtools/docs/docs/api/clients/backend.md +368 -1
  4. synapse_sdk/devtools/docs/docs/api/clients/core-mixin.md +477 -0
  5. synapse_sdk/devtools/docs/docs/api/clients/data-collection-mixin.md +422 -0
  6. synapse_sdk/devtools/docs/docs/api/clients/hitl-mixin.md +554 -0
  7. synapse_sdk/devtools/docs/docs/api/clients/index.md +391 -0
  8. synapse_sdk/devtools/docs/docs/api/clients/integration-mixin.md +571 -0
  9. synapse_sdk/devtools/docs/docs/api/clients/ml-mixin.md +578 -0
  10. synapse_sdk/devtools/docs/docs/api/clients/ray.md +23 -2
  11. synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +1463 -0
  12. synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +161 -34
  13. synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +1497 -213
  14. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/annotation-mixin.md +289 -0
  15. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/backend.md +378 -11
  16. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/core-mixin.md +417 -0
  17. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/data-collection-mixin.md +356 -0
  18. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/hitl-mixin.md +192 -0
  19. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/index.md +391 -0
  20. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/integration-mixin.md +479 -0
  21. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ml-mixin.md +284 -0
  22. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ray.md +23 -2
  23. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +1463 -0
  24. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +161 -34
  25. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +1752 -572
  26. synapse_sdk/devtools/docs/sidebars.ts +7 -0
  27. synapse_sdk/plugins/README.md +1 -2
  28. synapse_sdk/plugins/categories/base.py +23 -0
  29. synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
  30. synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
  31. synapse_sdk/plugins/categories/export/actions/export/action.py +160 -0
  32. synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
  33. synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
  34. synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
  35. synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
  36. synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
  37. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +1 -1
  38. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +1 -2
  39. synapse_sdk/plugins/categories/upload/actions/upload/action.py +154 -531
  40. synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
  41. synapse_sdk/plugins/categories/upload/actions/upload/factory.py +143 -0
  42. synapse_sdk/plugins/categories/upload/actions/upload/models.py +66 -29
  43. synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +182 -0
  44. synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
  45. synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
  46. synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +106 -0
  47. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
  48. synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +62 -0
  49. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +80 -0
  50. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +66 -0
  51. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +101 -0
  52. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +89 -0
  53. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +96 -0
  54. synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +61 -0
  55. synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
  56. synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +86 -0
  57. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
  58. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
  59. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +34 -0
  60. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
  61. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +233 -0
  62. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +238 -0
  63. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
  64. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
  65. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
  66. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
  67. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/async_upload.py +109 -0
  68. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +43 -0
  69. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
  70. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +45 -0
  71. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +194 -83
  72. synapse_sdk/plugins/categories/upload/templates/config.yaml +4 -0
  73. synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +269 -0
  74. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +71 -27
  75. synapse_sdk/plugins/models.py +5 -0
  76. {synapse_sdk-1.0.0b24.dist-info → synapse_sdk-2025.9.3.dist-info}/METADATA +3 -2
  77. {synapse_sdk-1.0.0b24.dist-info → synapse_sdk-2025.9.3.dist-info}/RECORD +81 -30
  78. synapse_sdk/plugins/categories/export/actions/export.py +0 -385
  79. synapse_sdk/plugins/categories/export/enums.py +0 -7
  80. {synapse_sdk-1.0.0b24.dist-info → synapse_sdk-2025.9.3.dist-info}/WHEEL +0 -0
  81. {synapse_sdk-1.0.0b24.dist-info → synapse_sdk-2025.9.3.dist-info}/entry_points.txt +0 -0
  82. {synapse_sdk-1.0.0b24.dist-info → synapse_sdk-2025.9.3.dist-info}/licenses/LICENSE +0 -0
  83. {synapse_sdk-1.0.0b24.dist-info → synapse_sdk-2025.9.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,185 @@
1
+ from datetime import datetime
2
+ from pathlib import Path
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ from .run import UploadRun
6
+
7
+
8
+ class StepResult:
9
+ """Result of a workflow step execution."""
10
+
11
+ def __init__(
12
+ self,
13
+ success: bool = True,
14
+ data: Dict[str, Any] = None,
15
+ error: str = None,
16
+ rollback_data: Dict[str, Any] = None,
17
+ skipped: bool = False,
18
+ original_exception: Optional[Exception] = None,
19
+ ):
20
+ self.success = success
21
+ self.data = data or {}
22
+ self.error = error
23
+ self.rollback_data = rollback_data or {}
24
+ self.skipped = skipped
25
+ self.original_exception = original_exception
26
+ self.timestamp = datetime.now()
27
+
28
+ def __bool__(self):
29
+ return self.success
30
+
31
+
32
+ class UploadContext:
33
+ """Shared context for all upload workflow steps."""
34
+
35
+ def __init__(self, params: Dict, run: UploadRun, client: Any):
36
+ self.params = params
37
+ self.run = run
38
+ self.client = client
39
+
40
+ # Core state
41
+ self.storage = None
42
+ self.pathlib_cwd = None
43
+ self.metadata: Dict[str, Dict[str, Any]] = {}
44
+ self.file_specifications: Dict[str, Any] = {}
45
+ self.organized_files: List[Dict[str, Any]] = []
46
+ self.uploaded_files: List[Dict[str, Any]] = []
47
+ self.data_units: List[Dict[str, Any]] = []
48
+
49
+ # Progress and metrics
50
+ self.metrics: Dict[str, Any] = {}
51
+ self.errors: List[str] = []
52
+ self.step_results: List[StepResult] = []
53
+
54
+ # Strategies (injected by orchestrator)
55
+ self.strategies: Dict[str, Any] = {}
56
+
57
+ # Rollback information
58
+ self.rollback_data: Dict[str, Any] = {}
59
+
60
+ def update(self, result: StepResult) -> None:
61
+ """Update context with step results."""
62
+ self.step_results.append(result)
63
+
64
+ if result.success:
65
+ # Update context state with step data
66
+ for key, value in result.data.items():
67
+ if hasattr(self, key):
68
+ setattr(self, key, value)
69
+ else:
70
+ # Store in a general data dictionary
71
+ if not hasattr(self, 'step_data'):
72
+ self.step_data = {}
73
+ self.step_data[key] = value
74
+
75
+ # Store rollback data
76
+ if result.rollback_data:
77
+ self.rollback_data.update(result.rollback_data)
78
+ else:
79
+ # Record error
80
+ if result.error:
81
+ self.errors.append(result.error)
82
+
83
+ def get_result(self) -> Dict[str, Any]:
84
+ """Get final result dictionary."""
85
+ return {
86
+ 'uploaded_files_count': len(self.uploaded_files),
87
+ 'generated_data_units_count': len(self.data_units),
88
+ 'success': len(self.errors) == 0,
89
+ 'errors': self.errors,
90
+ 'metrics': self.metrics,
91
+ }
92
+
93
+ def has_errors(self) -> bool:
94
+ """Check if context has any errors."""
95
+ return len(self.errors) > 0
96
+
97
+ def get_last_step_result(self) -> Optional[StepResult]:
98
+ """Get the result of the last executed step."""
99
+ return self.step_results[-1] if self.step_results else None
100
+
101
+ def get_step_result_by_name(self, step_name: str) -> Optional[StepResult]:
102
+ """Get step result by step name (stored in rollback_data)."""
103
+ for result in self.step_results:
104
+ if result.rollback_data.get('step_name') == step_name:
105
+ return result
106
+ return None
107
+
108
+ def clear_errors(self) -> None:
109
+ """Clear all errors (useful for retry scenarios)."""
110
+ self.errors.clear()
111
+
112
+ def add_error(self, error: str) -> None:
113
+ """Add an error to the context."""
114
+ self.errors.append(error)
115
+
116
+ def get_param(self, key: str, default: Any = None) -> Any:
117
+ """Get parameter value with default."""
118
+ return self.params.get(key, default)
119
+
120
+ def set_storage(self, storage: Any) -> None:
121
+ """Set storage object."""
122
+ self.storage = storage
123
+
124
+ def set_pathlib_cwd(self, path: Path) -> None:
125
+ """Set current working directory path."""
126
+ self.pathlib_cwd = path
127
+
128
+ def set_file_specifications(self, specs: Dict[str, Any]) -> None:
129
+ """Set file specifications."""
130
+ self.file_specifications = specs
131
+
132
+ def add_organized_files(self, files: List[Dict[str, Any]]) -> None:
133
+ """Add organized files to context."""
134
+ self.organized_files.extend(files)
135
+
136
+ def add_uploaded_files(self, files: List[Dict[str, Any]]) -> None:
137
+ """Add uploaded files to context."""
138
+ self.uploaded_files.extend(files)
139
+
140
+ def add_data_units(self, units: List[Dict[str, Any]]) -> None:
141
+ """Add data units to context."""
142
+ self.data_units.extend(units)
143
+
144
+ def update_metrics(self, category: str, metrics: Dict[str, Any]) -> None:
145
+ """Update metrics for a specific category."""
146
+ if category not in self.metrics:
147
+ self.metrics[category] = {}
148
+ self.metrics[category].update(metrics)
149
+
150
+ def get(self, key: str, default: Any = None) -> Any:
151
+ """Get value from context by key."""
152
+ # First check direct attributes
153
+ if hasattr(self, key):
154
+ return getattr(self, key)
155
+
156
+ # Then check step_data if it exists
157
+ if hasattr(self, 'step_data') and key in self.step_data:
158
+ return self.step_data[key]
159
+
160
+ # Special mappings for expected keys
161
+ if key == 'file_specification_template':
162
+ return self.file_specifications
163
+ elif key == 'pathlib_cwd':
164
+ return self.pathlib_cwd
165
+ elif key == 'organized_files':
166
+ return self.organized_files
167
+
168
+ return default
169
+
170
+ def set(self, key: str, value: Any) -> None:
171
+ """Set value in context by key."""
172
+ # Special mappings for expected keys
173
+ if key == 'file_specification_template':
174
+ self.file_specifications = value
175
+ elif key == 'pathlib_cwd':
176
+ self.pathlib_cwd = value
177
+ elif key == 'organized_files':
178
+ self.organized_files = value
179
+ elif hasattr(self, key):
180
+ setattr(self, key, value)
181
+ else:
182
+ # Store in step_data
183
+ if not hasattr(self, 'step_data'):
184
+ self.step_data = {}
185
+ self.step_data[key] = value
@@ -0,0 +1,143 @@
1
+ from typing import Any, Dict
2
+
3
+ from .strategies.base import (
4
+ DataUnitStrategy,
5
+ FileDiscoveryStrategy,
6
+ MetadataStrategy,
7
+ UploadStrategy,
8
+ ValidationStrategy,
9
+ )
10
+
11
+
12
+ class StrategyFactory:
13
+ """Factory for creating strategy instances based on configuration."""
14
+
15
+ def __init__(self):
16
+ self._validation_strategies = {}
17
+ self._file_discovery_strategies = {}
18
+ self._metadata_strategies = {}
19
+ self._upload_strategies = {}
20
+ self._data_unit_strategies = {}
21
+
22
+ def register_validation_strategy(self, name: str, strategy_class: type) -> None:
23
+ """Register a validation strategy class."""
24
+ self._validation_strategies[name] = strategy_class
25
+
26
+ def register_file_discovery_strategy(self, name: str, strategy_class: type) -> None:
27
+ """Register a file discovery strategy class."""
28
+ self._file_discovery_strategies[name] = strategy_class
29
+
30
+ def register_metadata_strategy(self, name: str, strategy_class: type) -> None:
31
+ """Register a metadata strategy class."""
32
+ self._metadata_strategies[name] = strategy_class
33
+
34
+ def register_upload_strategy(self, name: str, strategy_class: type) -> None:
35
+ """Register an upload strategy class."""
36
+ self._upload_strategies[name] = strategy_class
37
+
38
+ def register_data_unit_strategy(self, name: str, strategy_class: type) -> None:
39
+ """Register a data unit strategy class."""
40
+ self._data_unit_strategies[name] = strategy_class
41
+
42
+ def create_validation_strategy(self, params: Dict[str, Any], context=None) -> ValidationStrategy:
43
+ """Create validation strategy based on parameters."""
44
+ strategy_name = params.get('validation_strategy', 'default')
45
+
46
+ if strategy_name not in self._validation_strategies:
47
+ # Import default strategy if not registered
48
+ from .strategies.validation.default import DefaultValidationStrategy
49
+
50
+ self.register_validation_strategy('default', DefaultValidationStrategy)
51
+ strategy_name = 'default'
52
+
53
+ strategy_class = self._validation_strategies[strategy_name]
54
+ return strategy_class()
55
+
56
+ def create_file_discovery_strategy(self, params: Dict[str, Any], context=None) -> FileDiscoveryStrategy:
57
+ """Create file discovery strategy based on parameters."""
58
+ is_recursive = params.get('is_recursive', True)
59
+ strategy_name = 'recursive' if is_recursive else 'flat'
60
+
61
+ if strategy_name not in self._file_discovery_strategies:
62
+ # Import default strategies if not registered
63
+ if strategy_name == 'recursive':
64
+ from .strategies.file_discovery.recursive import RecursiveFileDiscoveryStrategy
65
+
66
+ self.register_file_discovery_strategy('recursive', RecursiveFileDiscoveryStrategy)
67
+ else:
68
+ from .strategies.file_discovery.flat import FlatFileDiscoveryStrategy
69
+
70
+ self.register_file_discovery_strategy('flat', FlatFileDiscoveryStrategy)
71
+
72
+ strategy_class = self._file_discovery_strategies[strategy_name]
73
+ return strategy_class()
74
+
75
+ def create_metadata_strategy(self, params: Dict[str, Any], context=None) -> MetadataStrategy:
76
+ """Create metadata strategy based on parameters."""
77
+ # Always use Excel strategy for metadata processing
78
+ # It will handle both specified paths and default meta.xlsx/meta.xls files
79
+ strategy_name = 'excel'
80
+
81
+ if strategy_name not in self._metadata_strategies:
82
+ from .strategies.metadata.excel import ExcelMetadataStrategy
83
+
84
+ self.register_metadata_strategy('excel', ExcelMetadataStrategy)
85
+
86
+ strategy_class = self._metadata_strategies[strategy_name]
87
+ return strategy_class()
88
+
89
+ def create_upload_strategy(self, params: Dict[str, Any], context=None) -> UploadStrategy:
90
+ """Create upload strategy based on parameters."""
91
+ if context is None:
92
+ raise ValueError('Upload strategies require context parameter')
93
+
94
+ use_async = params.get('use_async_upload', True)
95
+ strategy_name = 'async' if use_async else 'sync'
96
+
97
+ if strategy_name not in self._upload_strategies:
98
+ # Import default strategies if not registered
99
+ if strategy_name == 'async':
100
+ from .strategies.upload.async_upload import AsyncUploadStrategy
101
+
102
+ self.register_upload_strategy('async', AsyncUploadStrategy)
103
+ else:
104
+ from .strategies.upload.sync import SyncUploadStrategy
105
+
106
+ self.register_upload_strategy('sync', SyncUploadStrategy)
107
+
108
+ strategy_class = self._upload_strategies[strategy_name]
109
+ # Upload strategies always need context for client access
110
+ return strategy_class(context)
111
+
112
+ def create_data_unit_strategy(self, params: Dict[str, Any], context=None) -> DataUnitStrategy:
113
+ """Create data unit strategy based on parameters."""
114
+ if context is None:
115
+ raise ValueError('Data unit strategies require context parameter')
116
+
117
+ batch_size = params.get('creating_data_unit_batch_size', 1)
118
+ strategy_name = 'batch' if batch_size > 1 else 'single'
119
+
120
+ if strategy_name not in self._data_unit_strategies:
121
+ # Import default strategies if not registered
122
+ if strategy_name == 'batch':
123
+ from .strategies.data_unit.batch import BatchDataUnitStrategy
124
+
125
+ self.register_data_unit_strategy('batch', BatchDataUnitStrategy)
126
+ else:
127
+ from .strategies.data_unit.single import SingleDataUnitStrategy
128
+
129
+ self.register_data_unit_strategy('single', SingleDataUnitStrategy)
130
+
131
+ strategy_class = self._data_unit_strategies[strategy_name]
132
+ # Data unit strategies always need context for client access
133
+ return strategy_class(context)
134
+
135
+ def get_available_strategies(self) -> Dict[str, list]:
136
+ """Get all available strategy types and their registered names."""
137
+ return {
138
+ 'validation': list(self._validation_strategies.keys()),
139
+ 'file_discovery': list(self._file_discovery_strategies.keys()),
140
+ 'metadata': list(self._metadata_strategies.keys()),
141
+ 'upload': list(self._upload_strategies.keys()),
142
+ 'data_unit': list(self._data_unit_strategies.keys()),
143
+ }
@@ -6,6 +6,7 @@ from pydantic_core import PydanticCustomError
6
6
 
7
7
  from synapse_sdk.clients.exceptions import ClientError
8
8
  from synapse_sdk.utils.pydantic.validators import non_blank
9
+ from synapse_sdk.utils.storage import get_pathlib
9
10
 
10
11
  from .utils import ExcelSecurityConfig
11
12
 
@@ -106,44 +107,80 @@ class UploadParams(BaseModel):
106
107
  raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
107
108
  return value
108
109
 
109
- @field_validator('excel_metadata_path', mode='before')
110
+ @field_validator('excel_metadata_path', mode='after')
110
111
  @classmethod
111
112
  def check_excel_metadata_path(cls, value, info: ValidationInfo) -> str | None:
112
113
  if not value:
113
114
  return value
114
115
 
115
- excel_path = Path(value)
116
+ # Validate file extension
117
+ if not value.lower().endswith(('.xlsx', '.xls')):
118
+ raise PydanticCustomError('invalid_file_type', 'Excel metadata file must be .xlsx or .xls format.')
116
119
 
117
- if not excel_path.exists():
118
- raise PydanticCustomError('file_not_found', 'Excel metadata file not found.')
120
+ # Get storage and path from validation data
121
+ if not (hasattr(info, 'data') and 'storage' in info.data and 'path' in info.data):
122
+ # If we don't have storage/path data yet, just validate extension
123
+ return value
119
124
 
120
- if excel_path.suffix.lower() not in ['.xlsx', '.xls']:
121
- raise PydanticCustomError('invalid_file_type', 'Excel metadata file must be .xlsx or .xls format.')
125
+ if info.context is None:
126
+ raise PydanticCustomError('missing_context', 'Validation context is required.')
122
127
 
123
- file_size = excel_path.stat().st_size
124
- excel_config = ExcelSecurityConfig()
125
- if file_size > excel_config.MAX_FILE_SIZE_BYTES:
126
- max_size_mb = excel_config.MAX_FILE_SIZE_MB
127
- raise PydanticCustomError(
128
- 'file_too_large',
129
- 'Excel metadata file is too large. Maximum size is {max_size_mb}MB.',
130
- {'max_size_mb': max_size_mb},
131
- )
128
+ action = info.context['action']
129
+ client = action.client
132
130
 
133
131
  try:
134
- with open(excel_path, 'rb') as f:
135
- header = f.read(8)
136
- if not header:
137
- raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be empty.')
138
-
139
- if excel_path.suffix.lower() == '.xlsx':
140
- if not header.startswith(b'PK'):
141
- raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
142
- elif excel_path.suffix.lower() == '.xls':
143
- if not (header.startswith(b'\xd0\xcf\x11\xe0') or header.startswith(b'\x09\x08')):
144
- raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
145
-
146
- except (OSError, IOError):
147
- raise PydanticCustomError('file_access_error', 'Cannot access Excel metadata file.')
132
+ # Get storage configuration
133
+ storage_id = info.data['storage']
134
+ storage = client.get_storage(storage_id)
135
+
136
+ # Skip file system validation if storage doesn't have provider (likely test environment)
137
+ if not isinstance(storage, dict) or 'provider' not in storage:
138
+ # Basic validation only - likely in test environment
139
+ return value
140
+
141
+ # Get the actual file system path using storage + path
142
+ base_path = get_pathlib(storage, info.data['path'])
143
+
144
+ # Support both absolute and relative paths
145
+ if Path(value).is_absolute():
146
+ excel_path = Path(value)
147
+ else:
148
+ excel_path = base_path / value
149
+
150
+ if not excel_path.exists():
151
+ raise PydanticCustomError('file_not_found', 'Excel metadata file not found.')
152
+
153
+ # Validate file size
154
+ file_size = excel_path.stat().st_size
155
+ excel_config = ExcelSecurityConfig()
156
+ if file_size > excel_config.MAX_FILE_SIZE_BYTES:
157
+ max_size_mb = excel_config.MAX_FILE_SIZE_MB
158
+ raise PydanticCustomError(
159
+ 'file_too_large',
160
+ 'Excel metadata file is too large. Maximum size is {max_size_mb}MB.',
161
+ {'max_size_mb': max_size_mb},
162
+ )
163
+
164
+ # Validate file format
165
+ try:
166
+ with open(excel_path, 'rb') as f:
167
+ header = f.read(8)
168
+ if not header:
169
+ raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be empty.')
170
+
171
+ if excel_path.suffix.lower() == '.xlsx':
172
+ if not header.startswith(b'PK'):
173
+ raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
174
+ elif excel_path.suffix.lower() == '.xls':
175
+ if not (header.startswith(b'\xd0\xcf\x11\xe0') or header.startswith(b'\x09\x08')):
176
+ raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
177
+
178
+ except (OSError, IOError):
179
+ raise PydanticCustomError('file_access_error', 'Cannot access Excel metadata file.')
180
+
181
+ except ClientError:
182
+ raise PydanticCustomError('client_error', 'Error occurred while checking storage.')
183
+ except Exception as e:
184
+ raise PydanticCustomError('validation_error', f'Error validating Excel metadata file: {str(e)}')
148
185
 
149
186
  return value
@@ -0,0 +1,182 @@
1
+ import traceback
2
+ from typing import Any, Dict, List
3
+
4
+ from .context import UploadContext
5
+ from .registry import StepRegistry
6
+ from .steps.base import BaseStep
7
+
8
+
9
+ class UploadOrchestrator:
10
+ """Facade that orchestrates the upload workflow using strategies and steps."""
11
+
12
+ def __init__(self, context: UploadContext, step_registry: StepRegistry, strategies: Dict[str, Any]):
13
+ self.context = context
14
+ self.step_registry = step_registry
15
+ self.strategies = strategies
16
+ self.executed_steps: List[BaseStep] = []
17
+ self.current_step_index = 0
18
+
19
+ def execute(self) -> Dict[str, Any]:
20
+ """Execute the complete upload workflow."""
21
+ try:
22
+ self._log_workflow_start()
23
+ self._inject_strategies_into_context()
24
+
25
+ steps = self.step_registry.get_steps()
26
+ total_steps = len(steps)
27
+
28
+ for i, step in enumerate(steps):
29
+ self.current_step_index = i
30
+
31
+ try:
32
+ result = step.safe_execute(self.context)
33
+ self.context.update(result)
34
+
35
+ if result.success:
36
+ if not result.skipped:
37
+ self.executed_steps.append(step)
38
+ self._update_progress(i + 1, total_steps)
39
+ else:
40
+ # Step failed, initiate rollback
41
+ self._log_step_failure(step, result.error)
42
+ self._rollback()
43
+ # Re-raise original exception if available, otherwise create new one
44
+ if result.original_exception:
45
+ raise result.original_exception
46
+ else:
47
+ raise Exception(f"Step '{step.name}' failed: {result.error}")
48
+
49
+ except Exception as e:
50
+ self._log_step_exception(step, str(e))
51
+ self._rollback()
52
+ raise
53
+
54
+ self._log_workflow_complete()
55
+ return self.context.get_result()
56
+
57
+ except Exception as e:
58
+ self._log_workflow_error(str(e))
59
+ # Ensure rollback is called if not already done
60
+ if not hasattr(self, '_rollback_executed'):
61
+ self._rollback()
62
+ raise
63
+
64
+ def _inject_strategies_into_context(self) -> None:
65
+ """Inject strategies into context for steps to use."""
66
+ if not hasattr(self.context, 'strategies'):
67
+ self.context.strategies = {}
68
+ self.context.strategies.update(self.strategies)
69
+
70
+ def _rollback(self) -> None:
71
+ """Rollback executed steps in reverse order."""
72
+ if hasattr(self, '_rollback_executed'):
73
+ return # Prevent multiple rollbacks
74
+
75
+ self._rollback_executed = True
76
+ self._log_rollback_start()
77
+
78
+ # Rollback in reverse order
79
+ for step in reversed(self.executed_steps):
80
+ try:
81
+ self._log_step_rollback(step)
82
+ step.rollback(self.context)
83
+ except Exception as e:
84
+ # Log rollback error but continue with other steps
85
+ self._log_rollback_error(step, str(e))
86
+
87
+ self._log_rollback_complete()
88
+
89
+ def _update_progress(self, current_step: int, total_steps: int) -> None:
90
+ """Update overall progress based on step completion."""
91
+ if total_steps == 0:
92
+ return
93
+
94
+ # Calculate progress based on step weights
95
+ completed_weight = 0.0
96
+ total_weight = self.step_registry.get_total_progress_weight()
97
+
98
+ for i, step in enumerate(self.executed_steps):
99
+ completed_weight += step.progress_weight
100
+
101
+ progress_percentage = (completed_weight / total_weight) * 100 if total_weight > 0 else 0
102
+
103
+ # Update context with progress information
104
+ self.context.update_metrics(
105
+ 'workflow',
106
+ {
107
+ 'current_step': current_step,
108
+ 'total_steps': total_steps,
109
+ 'progress_percentage': progress_percentage,
110
+ 'completed_weight': completed_weight,
111
+ 'total_weight': total_weight,
112
+ },
113
+ )
114
+
115
+ def _log_workflow_start(self) -> None:
116
+ """Log workflow start."""
117
+ steps = self.step_registry.get_steps()
118
+ step_names = [step.name for step in steps]
119
+ self.context.run.log_message(f'Starting upload workflow with {len(steps)} steps: {step_names}')
120
+
121
+ def _log_workflow_complete(self) -> None:
122
+ """Log workflow completion."""
123
+ self.context.run.log_message('Upload workflow completed successfully')
124
+
125
+ def _log_workflow_error(self, error: str) -> None:
126
+ """Log workflow error."""
127
+ self.context.run.log_message(f'Upload workflow failed: {error}')
128
+
129
+ def _log_step_failure(self, step: BaseStep, error: str) -> None:
130
+ """Log step failure."""
131
+ self.context.run.log_message(f"Step '{step.name}' failed: {error}")
132
+
133
+ def _log_step_exception(self, step: BaseStep, error: str) -> None:
134
+ """Log step exception."""
135
+ self.context.run.log_message(f"Exception in step '{step.name}': {error}")
136
+ # Log full traceback for debugging
137
+ self.context.run.log_message(f'Traceback: {traceback.format_exc()}')
138
+
139
+ def _log_rollback_start(self) -> None:
140
+ """Log rollback start."""
141
+ self.context.run.log_message(f'Starting rollback of {len(self.executed_steps)} executed steps')
142
+
143
+ def _log_rollback_complete(self) -> None:
144
+ """Log rollback completion."""
145
+ self.context.run.log_message('Rollback completed')
146
+
147
+ def _log_step_rollback(self, step: BaseStep) -> None:
148
+ """Log step rollback."""
149
+ self.context.run.log_message(f'Rolling back step: {step.name}')
150
+
151
+ def _log_rollback_error(self, step: BaseStep, error: str) -> None:
152
+ """Log rollback error."""
153
+ self.context.run.log_message(f"Error rolling back step '{step.name}': {error}")
154
+
155
+ def get_executed_steps(self) -> List[BaseStep]:
156
+ """Get list of successfully executed steps."""
157
+ return self.executed_steps.copy()
158
+
159
+ def get_current_step_index(self) -> int:
160
+ """Get current step index."""
161
+ return self.current_step_index
162
+
163
+ def get_total_steps(self) -> int:
164
+ """Get total number of steps."""
165
+ return len(self.step_registry.get_steps())
166
+
167
+ def is_rollback_executed(self) -> bool:
168
+ """Check if rollback has been executed."""
169
+ return hasattr(self, '_rollback_executed')
170
+
171
+ def get_workflow_summary(self) -> Dict[str, Any]:
172
+ """Get workflow execution summary."""
173
+ steps = self.step_registry.get_steps()
174
+ return {
175
+ 'total_steps': len(steps),
176
+ 'executed_steps': len(self.executed_steps),
177
+ 'current_step_index': self.current_step_index,
178
+ 'step_names': [step.name for step in steps],
179
+ 'executed_step_names': [step.name for step in self.executed_steps],
180
+ 'rollback_executed': self.is_rollback_executed(),
181
+ 'strategies': list(self.strategies.keys()) if self.strategies else [],
182
+ }