synapse-sdk 2025.9.1__py3-none-any.whl → 2025.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/devtools/docs/docs/api/clients/annotation-mixin.md +378 -0
- synapse_sdk/devtools/docs/docs/api/clients/backend.md +368 -1
- synapse_sdk/devtools/docs/docs/api/clients/core-mixin.md +477 -0
- synapse_sdk/devtools/docs/docs/api/clients/data-collection-mixin.md +422 -0
- synapse_sdk/devtools/docs/docs/api/clients/hitl-mixin.md +554 -0
- synapse_sdk/devtools/docs/docs/api/clients/index.md +391 -0
- synapse_sdk/devtools/docs/docs/api/clients/integration-mixin.md +571 -0
- synapse_sdk/devtools/docs/docs/api/clients/ml-mixin.md +578 -0
- synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +1463 -0
- synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +161 -34
- synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +1497 -213
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/annotation-mixin.md +289 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/backend.md +378 -11
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/core-mixin.md +417 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/data-collection-mixin.md +356 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/hitl-mixin.md +192 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/index.md +391 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/integration-mixin.md +479 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ml-mixin.md +284 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +1463 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +161 -34
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +1752 -572
- synapse_sdk/devtools/docs/sidebars.ts +7 -0
- synapse_sdk/plugins/README.md +1 -2
- synapse_sdk/plugins/categories/base.py +7 -0
- synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
- synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
- synapse_sdk/plugins/categories/export/actions/export/action.py +160 -0
- synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
- synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
- synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
- synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
- synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
- synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +1 -1
- synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +1 -2
- synapse_sdk/plugins/categories/upload/actions/upload/action.py +154 -531
- synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
- synapse_sdk/plugins/categories/upload/actions/upload/factory.py +143 -0
- synapse_sdk/plugins/categories/upload/actions/upload/models.py +66 -29
- synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +182 -0
- synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +106 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +62 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +80 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +66 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +101 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +89 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +96 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +61 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +86 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +34 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +233 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +253 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/async_upload.py +109 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +43 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +45 -0
- synapse_sdk/plugins/categories/upload/actions/upload/utils.py +194 -83
- synapse_sdk/plugins/categories/upload/templates/config.yaml +4 -0
- synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +269 -0
- synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +71 -27
- synapse_sdk/plugins/models.py +7 -0
- synapse_sdk/shared/__init__.py +21 -0
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/METADATA +2 -1
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/RECORD +79 -28
- synapse_sdk/plugins/categories/export/actions/export.py +0 -385
- synapse_sdk/plugins/categories/export/enums.py +0 -7
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/WHEEL +0 -0
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/top_level.txt +0 -0
|
@@ -1,49 +1,41 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
import shutil
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from io import BytesIO
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Any, Awaitable, Dict, List, Optional, TypeVar
|
|
8
|
-
|
|
9
|
-
from openpyxl import load_workbook
|
|
10
|
-
from openpyxl.utils.exceptions import InvalidFileException
|
|
11
|
-
|
|
12
|
-
from synapse_sdk.clients.exceptions import ClientError
|
|
13
|
-
from synapse_sdk.clients.utils import get_batched_list
|
|
14
|
-
from synapse_sdk.clients.validators.collections import FileSpecificationValidator
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
|
|
15
3
|
from synapse_sdk.plugins.categories.base import Action
|
|
16
4
|
from synapse_sdk.plugins.categories.decorators import register_action
|
|
17
|
-
from synapse_sdk.plugins.categories.upload.actions.upload.models import UploadParams
|
|
18
5
|
from synapse_sdk.plugins.enums import PluginCategory, RunMethod
|
|
19
6
|
from synapse_sdk.plugins.exceptions import ActionError
|
|
20
|
-
from synapse_sdk.utils.storage import get_pathlib
|
|
21
7
|
|
|
22
|
-
from .
|
|
23
|
-
from .
|
|
8
|
+
from .context import UploadContext
|
|
9
|
+
from .factory import StrategyFactory
|
|
10
|
+
from .models import UploadParams
|
|
11
|
+
from .orchestrator import UploadOrchestrator
|
|
12
|
+
from .registry import StepRegistry
|
|
24
13
|
from .run import UploadRun
|
|
25
|
-
from .
|
|
26
|
-
|
|
27
|
-
|
|
14
|
+
from .steps.cleanup import CleanupStep
|
|
15
|
+
from .steps.collection import AnalyzeCollectionStep
|
|
16
|
+
from .steps.generate import GenerateDataUnitsStep
|
|
17
|
+
from .steps.initialize import InitializeStep
|
|
18
|
+
from .steps.metadata import ProcessMetadataStep
|
|
19
|
+
from .steps.organize import OrganizeFilesStep
|
|
20
|
+
from .steps.upload import UploadFilesStep
|
|
21
|
+
from .steps.validate import ValidateFilesStep
|
|
22
|
+
from .utils import ExcelSecurityConfig
|
|
28
23
|
|
|
29
24
|
|
|
30
25
|
@register_action
|
|
31
26
|
class UploadAction(Action):
|
|
32
|
-
"""
|
|
27
|
+
"""Upload action for processing and uploading files to storage.
|
|
33
28
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
progress tracking and error handling.
|
|
29
|
+
This implementation uses Strategy and Facade patterns to provide a clean,
|
|
30
|
+
extensible architecture for upload operations. The monolithic legacy
|
|
31
|
+
implementation has been refactored into pluggable strategies and workflow steps.
|
|
38
32
|
|
|
39
33
|
Features:
|
|
40
|
-
-
|
|
41
|
-
-
|
|
42
|
-
-
|
|
43
|
-
-
|
|
44
|
-
-
|
|
45
|
-
- Progress tracking with detailed metrics
|
|
46
|
-
- Comprehensive error logging
|
|
34
|
+
- Strategy pattern for pluggable behaviors (validation, file discovery, etc.)
|
|
35
|
+
- Facade pattern with UploadOrchestrator for simplified workflow management
|
|
36
|
+
- Step-based workflow with automatic rollback on failures
|
|
37
|
+
- Comprehensive error handling and progress tracking
|
|
38
|
+
- Easy extensibility for new strategies and workflow steps
|
|
47
39
|
|
|
48
40
|
Class Attributes:
|
|
49
41
|
name (str): Action identifier ('upload')
|
|
@@ -60,11 +52,11 @@ class UploadAction(Action):
|
|
|
60
52
|
... 'name': 'Data Upload',
|
|
61
53
|
... 'path': '/data/files',
|
|
62
54
|
... 'storage': 1,
|
|
63
|
-
... '
|
|
55
|
+
... 'data_collection': 5
|
|
64
56
|
... },
|
|
65
57
|
... plugin_config=config
|
|
66
58
|
... )
|
|
67
|
-
>>> result = action.
|
|
59
|
+
>>> result = action.start()
|
|
68
60
|
"""
|
|
69
61
|
|
|
70
62
|
name = 'upload'
|
|
@@ -97,527 +89,158 @@ class UploadAction(Action):
|
|
|
97
89
|
}
|
|
98
90
|
|
|
99
91
|
def __init__(self, *args, **kwargs):
|
|
92
|
+
"""Initialize the upload action."""
|
|
100
93
|
super().__init__(*args, **kwargs)
|
|
101
|
-
self.excel_config = ExcelSecurityConfig()
|
|
102
|
-
self.excel_utils = ExcelMetadataUtils(self.excel_config)
|
|
103
|
-
|
|
104
|
-
def get_uploader(self, path, file_specification, organized_files, params: Dict = {}):
|
|
105
|
-
"""Get uploader from entrypoint."""
|
|
106
|
-
return self.entrypoint(
|
|
107
|
-
self.run, path, file_specification, organized_files, extra_params=params.get('extra_params')
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
def _discover_files_recursive(self, dir_path: Path) -> List[Path]:
|
|
111
|
-
return [file_path for file_path in dir_path.rglob('*') if file_path.is_file()]
|
|
112
|
-
|
|
113
|
-
def _discover_files_non_recursive(self, dir_path: Path) -> List[Path]:
|
|
114
|
-
return [file_path for file_path in dir_path.glob('*') if file_path.is_file()]
|
|
115
|
-
|
|
116
|
-
def _validate_excel_security(self, excel_path: Path) -> None:
|
|
117
|
-
file_size = excel_path.stat().st_size
|
|
118
|
-
if file_size > self.excel_config.MAX_FILE_SIZE_BYTES:
|
|
119
|
-
raise ExcelSecurityError(
|
|
120
|
-
f'Excel file too large: {file_size} bytes (max: {self.excel_config.MAX_FILE_SIZE_BYTES})'
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
estimated_memory = file_size * 3
|
|
124
|
-
if estimated_memory > self.excel_config.MAX_MEMORY_USAGE_BYTES:
|
|
125
|
-
raise ExcelSecurityError(
|
|
126
|
-
f'Excel file may consume too much memory: ~{estimated_memory} bytes '
|
|
127
|
-
f'(max: {self.excel_config.MAX_MEMORY_USAGE_BYTES})'
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
def _prepare_excel_file(self, excel_path: Path) -> BytesIO:
|
|
131
|
-
self._validate_excel_security(excel_path)
|
|
132
|
-
excel_bytes = excel_path.read_bytes()
|
|
133
|
-
return BytesIO(excel_bytes)
|
|
134
|
-
|
|
135
|
-
def _process_excel_headers(self, headers: tuple) -> tuple:
|
|
136
|
-
if len(headers) < 2:
|
|
137
|
-
raise ExcelParsingError('Excel file must have at least 2 columns (file name and metadata)')
|
|
138
|
-
self._validate_excel_content(headers, 0)
|
|
139
|
-
return headers
|
|
140
|
-
|
|
141
|
-
def _process_excel_data_row(self, row: tuple, headers: tuple) -> Optional[Dict[str, Any]]:
|
|
142
|
-
if not row[0] or str(row[0]).strip() == '':
|
|
143
|
-
return None
|
|
144
|
-
|
|
145
|
-
file_name = str(row[0]).strip()
|
|
146
|
-
if not self.excel_utils.is_valid_filename_length(file_name):
|
|
147
|
-
self.run.log_message_with_code(LogCode.FILENAME_TOO_LONG, file_name[:50])
|
|
148
|
-
return None
|
|
149
|
-
|
|
150
|
-
file_metadata: Dict[str, Any] = {}
|
|
151
|
-
for i, value in enumerate(row[1:], start=1):
|
|
152
|
-
if value is not None and i < len(headers):
|
|
153
|
-
header_value = headers[i]
|
|
154
|
-
column_name = str(header_value).strip() if header_value is not None else f'column_{i}'
|
|
155
|
-
|
|
156
|
-
column_name = self.excel_utils.validate_and_truncate_string(
|
|
157
|
-
column_name, self.excel_config.MAX_COLUMN_NAME_LENGTH
|
|
158
|
-
)
|
|
159
|
-
str_value = self.excel_utils.validate_and_truncate_string(
|
|
160
|
-
str(value), self.excel_config.MAX_METADATA_VALUE_LENGTH
|
|
161
|
-
)
|
|
162
|
-
file_metadata[column_name] = str_value
|
|
163
|
-
|
|
164
|
-
return {file_name: file_metadata} if file_metadata else None
|
|
165
|
-
|
|
166
|
-
def _process_excel_worksheet(self, worksheet) -> Dict[str, Dict[str, Any]]:
|
|
167
|
-
if worksheet is None:
|
|
168
|
-
raise ExcelParsingError('Excel file has no active worksheet')
|
|
169
|
-
|
|
170
|
-
metadata_dict: Dict[str, Dict[str, Any]] = {}
|
|
171
|
-
headers: Optional[tuple] = None
|
|
172
|
-
data_row_count = 0
|
|
173
|
-
validation_interval = getattr(self.excel_config, 'VALIDATION_CHECK_INTERVAL', 1000)
|
|
174
|
-
|
|
175
|
-
for row_idx, row in enumerate(worksheet.iter_rows(values_only=True)):
|
|
176
|
-
if not row or all(cell is None or str(cell).strip() == '' for cell in row):
|
|
177
|
-
continue
|
|
178
|
-
|
|
179
|
-
if row_idx == 0:
|
|
180
|
-
headers = self._process_excel_headers(row)
|
|
181
|
-
continue
|
|
182
|
-
|
|
183
|
-
if headers is None:
|
|
184
|
-
raise ExcelParsingError('Excel file missing header row')
|
|
185
|
-
|
|
186
|
-
data_row_count += 1
|
|
187
|
-
|
|
188
|
-
if data_row_count % validation_interval == 0:
|
|
189
|
-
self._validate_excel_content(headers, data_row_count)
|
|
190
|
-
|
|
191
|
-
row_result = self._process_excel_data_row(row, headers)
|
|
192
|
-
if row_result:
|
|
193
|
-
metadata_dict.update(row_result)
|
|
194
|
-
|
|
195
|
-
self._validate_excel_content(headers or (), data_row_count)
|
|
196
|
-
|
|
197
|
-
return metadata_dict
|
|
198
|
-
|
|
199
|
-
def _validate_excel_content(self, headers: tuple, row_count: int) -> None:
|
|
200
|
-
if len(headers) > self.excel_config.MAX_COLUMNS:
|
|
201
|
-
raise ExcelParsingError(f'Too many columns: {len(headers)} (max: {self.excel_config.MAX_COLUMNS})')
|
|
202
|
-
|
|
203
|
-
if row_count > self.excel_config.MAX_ROWS:
|
|
204
|
-
raise ExcelParsingError(f'Too many rows: {row_count} (max: {self.excel_config.MAX_ROWS})')
|
|
205
|
-
|
|
206
|
-
def _find_excel_metadata_file(self, pathlib_cwd: Path) -> Optional[Path]:
|
|
207
|
-
for extension in ['.xlsx', '.xls']:
|
|
208
|
-
excel_path = pathlib_cwd / f'meta{extension}'
|
|
209
|
-
if excel_path.exists() and excel_path.is_file():
|
|
210
|
-
return excel_path
|
|
211
|
-
return None
|
|
212
|
-
|
|
213
|
-
def _read_excel_metadata(self, pathlib_cwd: Path) -> Dict[str, Dict[str, Any]]:
|
|
214
|
-
excel_path = None
|
|
215
|
-
|
|
216
|
-
excel_metadata_path = self.params.get('excel_metadata_path')
|
|
217
|
-
if excel_metadata_path:
|
|
218
|
-
excel_path = pathlib_cwd / excel_metadata_path
|
|
219
|
-
if not excel_path.exists():
|
|
220
|
-
self.run.log_message_with_code(LogCode.EXCEL_FILE_NOT_FOUND_PATH)
|
|
221
|
-
return {}
|
|
222
|
-
else:
|
|
223
|
-
excel_path = self._find_excel_metadata_file(pathlib_cwd)
|
|
224
|
-
if not excel_path:
|
|
225
|
-
return {}
|
|
226
94
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
except MemoryError:
|
|
249
|
-
self.run.log_message_with_code(LogCode.EXCEL_FILE_TOO_LARGE)
|
|
250
|
-
raise ExcelSecurityError('Excel file exceeds memory limits')
|
|
251
|
-
except (OSError, IOError) as e:
|
|
252
|
-
self.run.log_message_with_code(LogCode.EXCEL_FILE_ACCESS_ERROR, str(e))
|
|
253
|
-
raise ExcelParsingError(f'File access error: {str(e)}')
|
|
254
|
-
except Exception as e:
|
|
255
|
-
self.run.log_message_with_code(LogCode.EXCEL_UNEXPECTED_ERROR, str(e))
|
|
256
|
-
raise ExcelParsingError(f'Unexpected error: {str(e)}')
|
|
95
|
+
# Initialize Excel configuration from config.yaml
|
|
96
|
+
self.excel_config = ExcelSecurityConfig.from_action_config(self.config)
|
|
97
|
+
self.strategy_factory = StrategyFactory()
|
|
98
|
+
self.step_registry = StepRegistry()
|
|
99
|
+
self._configure_workflow()
|
|
100
|
+
|
|
101
|
+
def _configure_workflow(self) -> None:
|
|
102
|
+
"""Configure workflow steps based on parameters.
|
|
103
|
+
|
|
104
|
+
Registers all workflow steps in the correct order. Steps can be
|
|
105
|
+
dynamically added, removed, or reordered for different use cases.
|
|
106
|
+
"""
|
|
107
|
+
# Register steps in execution order
|
|
108
|
+
self.step_registry.register(InitializeStep())
|
|
109
|
+
self.step_registry.register(ProcessMetadataStep())
|
|
110
|
+
self.step_registry.register(AnalyzeCollectionStep())
|
|
111
|
+
self.step_registry.register(OrganizeFilesStep())
|
|
112
|
+
self.step_registry.register(ValidateFilesStep())
|
|
113
|
+
self.step_registry.register(UploadFilesStep())
|
|
114
|
+
self.step_registry.register(GenerateDataUnitsStep())
|
|
115
|
+
self.step_registry.register(CleanupStep())
|
|
257
116
|
|
|
258
117
|
def start(self) -> Dict[str, Any]:
|
|
259
|
-
|
|
118
|
+
"""Execute upload workflow with uploader integration.
|
|
260
119
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
raise ActionError('Storage parameter is required')
|
|
264
|
-
storage = self.client.get_storage(storage_id)
|
|
120
|
+
This method integrates the essential uploader mechanism with the new
|
|
121
|
+
strategy pattern architecture while maintaining backward compatibility.
|
|
265
122
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
raise ActionError('Path parameter is required')
|
|
269
|
-
pathlib_cwd = get_pathlib(storage, path)
|
|
123
|
+
Returns:
|
|
124
|
+
Dict[str, Any]: Upload result with file counts, success status, and metrics
|
|
270
125
|
|
|
271
|
-
|
|
126
|
+
Raises:
|
|
127
|
+
ActionError: If upload workflow fails
|
|
128
|
+
"""
|
|
272
129
|
try:
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
self.run.log_message_with_code(LogCode.EXCEL_METADATA_LOADED, len(excel_metadata))
|
|
276
|
-
except ExcelSecurityError as e:
|
|
277
|
-
self.run.log_message_with_code(LogCode.EXCEL_SECURITY_VIOLATION, str(e))
|
|
278
|
-
return result
|
|
279
|
-
except ExcelParsingError as e:
|
|
280
|
-
if self.params.get('excel_metadata_path'):
|
|
281
|
-
self.run.log_message_with_code(LogCode.EXCEL_PARSING_ERROR, str(e))
|
|
282
|
-
return result
|
|
283
|
-
else:
|
|
284
|
-
self.run.log_message_with_code(LogCode.EXCEL_PARSING_ERROR, str(e))
|
|
285
|
-
excel_metadata = {}
|
|
286
|
-
|
|
287
|
-
file_specification_template = self._analyze_collection()
|
|
288
|
-
organized_files = self._organize_files(pathlib_cwd, file_specification_template, excel_metadata)
|
|
289
|
-
|
|
290
|
-
uploader = self.get_uploader(pathlib_cwd, file_specification_template, organized_files)
|
|
291
|
-
|
|
292
|
-
organized_files = uploader.handle_upload_files()
|
|
293
|
-
|
|
294
|
-
if not self._validate_organized_files(organized_files, file_specification_template):
|
|
295
|
-
self.run.log_message_with_code(LogCode.VALIDATION_FAILED)
|
|
296
|
-
raise ActionError('Upload is aborted due to validation errors.')
|
|
297
|
-
|
|
298
|
-
if not organized_files:
|
|
299
|
-
self.run.log_message_with_code(LogCode.NO_FILES_FOUND)
|
|
300
|
-
raise ActionError('Upload is aborted due to missing files.')
|
|
301
|
-
|
|
302
|
-
if self.params.get('use_async_upload', True):
|
|
303
|
-
uploaded_files = self.run_async(self._upload_files_async(organized_files, 10))
|
|
304
|
-
else:
|
|
305
|
-
uploaded_files = self._upload_files(organized_files)
|
|
306
|
-
result['uploaded_files_count'] = len(uploaded_files)
|
|
307
|
-
|
|
308
|
-
if not uploaded_files:
|
|
309
|
-
self.run.log_message_with_code(LogCode.NO_FILES_UPLOADED)
|
|
310
|
-
raise ActionError('Upload is aborted due to no uploaded files.')
|
|
311
|
-
generated_data_units = self._generate_data_units(
|
|
312
|
-
uploaded_files, self.params.get('creating_data_unit_batch_size', 1)
|
|
313
|
-
)
|
|
314
|
-
result['generated_data_units_count'] = len(generated_data_units)
|
|
130
|
+
# Ensure params is not None
|
|
131
|
+
params = self.params or {}
|
|
315
132
|
|
|
316
|
-
|
|
317
|
-
self.run.
|
|
318
|
-
raise ActionError('Upload is aborted due to no generated data units.')
|
|
133
|
+
# Create upload context for sharing state between steps
|
|
134
|
+
context = UploadContext(params, self.run, self.client)
|
|
319
135
|
|
|
320
|
-
|
|
136
|
+
# Configure strategies based on parameters with context
|
|
137
|
+
strategies = self._configure_strategies(context)
|
|
321
138
|
|
|
322
|
-
|
|
323
|
-
|
|
139
|
+
# Create orchestrator but run it with uploader integration
|
|
140
|
+
orchestrator = UploadOrchestrator(context, self.step_registry, strategies)
|
|
324
141
|
|
|
325
|
-
|
|
326
|
-
|
|
142
|
+
# Execute the workflow steps, but intercept after organize step
|
|
143
|
+
result = self._execute_with_uploader_integration(orchestrator, context)
|
|
327
144
|
|
|
328
|
-
|
|
329
|
-
if collection_id is None:
|
|
330
|
-
raise ActionError('Data collection parameter is required')
|
|
331
|
-
self.run.set_progress(1, 2, category='analyze_collection')
|
|
332
|
-
|
|
333
|
-
collection = self.run.client.get_data_collection(collection_id)
|
|
334
|
-
self.run.set_progress(2, 2, category='analyze_collection')
|
|
335
|
-
|
|
336
|
-
return collection['file_specifications']
|
|
337
|
-
|
|
338
|
-
def _upload_files(self, organized_files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
339
|
-
organized_files_count = len(organized_files)
|
|
340
|
-
self.run.set_progress(0, organized_files_count, category='upload_data_files')
|
|
341
|
-
self.run.log_message_with_code(LogCode.UPLOADING_DATA_FILES)
|
|
342
|
-
|
|
343
|
-
client = self.run.client
|
|
344
|
-
collection_id = self.params.get('data_collection')
|
|
345
|
-
if collection_id is None:
|
|
346
|
-
raise ActionError('Data collection parameter is required')
|
|
347
|
-
upload_result = []
|
|
348
|
-
current_progress = 0
|
|
349
|
-
success_count = 0
|
|
350
|
-
failed_count = 0
|
|
351
|
-
|
|
352
|
-
self._update_metrics(organized_files_count, success_count, failed_count, 'data_files')
|
|
353
|
-
|
|
354
|
-
for organized_file in organized_files:
|
|
355
|
-
try:
|
|
356
|
-
use_chunked_upload = self._requires_chunked_upload(organized_file)
|
|
357
|
-
uploaded_data_file = client.upload_data_file(organized_file, collection_id, use_chunked_upload)
|
|
358
|
-
self.run.log_data_file(organized_file, UploadStatus.SUCCESS)
|
|
359
|
-
success_count += 1
|
|
360
|
-
upload_result.append(uploaded_data_file)
|
|
361
|
-
except Exception as e:
|
|
362
|
-
self.run.log_data_file(organized_file, UploadStatus.FAILED)
|
|
363
|
-
self.run.log_message_with_code(LogCode.FILE_UPLOAD_FAILED, str(e))
|
|
364
|
-
failed_count += 1
|
|
145
|
+
return result
|
|
365
146
|
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
self.run
|
|
147
|
+
except Exception as e:
|
|
148
|
+
# Log the error and re-raise as ActionError
|
|
149
|
+
if self.run:
|
|
150
|
+
self.run.log_message(f'Upload workflow failed: {str(e)}')
|
|
151
|
+
raise ActionError(f'Upload failed: {str(e)}')
|
|
152
|
+
|
|
153
|
+
def _execute_with_uploader_integration(self, orchestrator, context) -> Dict[str, Any]:
|
|
154
|
+
"""Execute workflow with proper uploader integration."""
|
|
155
|
+
# Inject strategies into context before executing steps
|
|
156
|
+
orchestrator._inject_strategies_into_context()
|
|
157
|
+
|
|
158
|
+
# Run initial steps up to file organization
|
|
159
|
+
steps = orchestrator.step_registry.get_steps()
|
|
160
|
+
|
|
161
|
+
# Execute steps one by one until we reach the organization step
|
|
162
|
+
for i, step in enumerate(steps):
|
|
163
|
+
if step.name in ['initialize', 'process_metadata', 'analyze_collection', 'organize_files']:
|
|
164
|
+
try:
|
|
165
|
+
result = step.safe_execute(context)
|
|
166
|
+
context.update(result)
|
|
167
|
+
if not result.success:
|
|
168
|
+
raise Exception(f"Step '{step.name}' failed: {result.error}")
|
|
169
|
+
except Exception as e:
|
|
170
|
+
raise ActionError(f"Failed at step '{step.name}': {str(e)}")
|
|
369
171
|
|
|
370
|
-
|
|
172
|
+
# Get organized files from context
|
|
173
|
+
organized_files = context.get('organized_files', [])
|
|
174
|
+
file_specification_template = context.get('file_specification_template', {})
|
|
175
|
+
pathlib_cwd = context.get('pathlib_cwd')
|
|
371
176
|
|
|
372
|
-
|
|
177
|
+
if not organized_files or not file_specification_template or not pathlib_cwd:
|
|
178
|
+
raise ActionError('Required data not available from workflow steps')
|
|
373
179
|
|
|
374
|
-
|
|
375
|
-
|
|
180
|
+
# CRITICAL: Integrate with existing uploader mechanism
|
|
181
|
+
uploader = self.get_uploader(pathlib_cwd, file_specification_template, organized_files, self.params)
|
|
182
|
+
organized_files = uploader.handle_upload_files()
|
|
376
183
|
|
|
377
|
-
|
|
378
|
-
|
|
184
|
+
# Update context with processed files
|
|
185
|
+
context.set('organized_files', organized_files)
|
|
379
186
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
future = executor.submit(_run_in_thread)
|
|
384
|
-
return future.result()
|
|
385
|
-
except RuntimeError:
|
|
386
|
-
return asyncio.run(coro)
|
|
387
|
-
|
|
388
|
-
async def _upload_files_async(
|
|
389
|
-
self, organized_files: List[Dict[str, Any]], max_concurrent: int = 10
|
|
390
|
-
) -> List[Dict[str, Any]]:
|
|
391
|
-
organized_files_count = len(organized_files)
|
|
392
|
-
self.run.set_progress(0, organized_files_count, category='upload_data_files')
|
|
393
|
-
self.run.log_message_with_code(LogCode.UPLOADING_DATA_FILES)
|
|
394
|
-
|
|
395
|
-
client = self.run.client
|
|
396
|
-
collection_id = self.params.get('data_collection')
|
|
397
|
-
if collection_id is None:
|
|
398
|
-
raise ActionError('Data collection parameter is required')
|
|
399
|
-
upload_result = []
|
|
400
|
-
success_count = 0
|
|
401
|
-
failed_count = 0
|
|
402
|
-
|
|
403
|
-
self._update_metrics(organized_files_count, success_count, failed_count, 'data_files')
|
|
404
|
-
|
|
405
|
-
semaphore = asyncio.Semaphore(max_concurrent)
|
|
406
|
-
|
|
407
|
-
async def upload_single_file(organized_file):
|
|
408
|
-
async with semaphore:
|
|
409
|
-
loop = asyncio.get_event_loop()
|
|
187
|
+
# Execute remaining steps
|
|
188
|
+
for step in steps:
|
|
189
|
+
if step.name in ['validate_files', 'upload_files', 'generate_data_units', 'cleanup']:
|
|
410
190
|
try:
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
self.run.log_data_file(organized_file, UploadStatus.SUCCESS)
|
|
416
|
-
return {'status': 'success', 'result': uploaded_data_file}
|
|
417
|
-
except ClientError as e:
|
|
418
|
-
self.run.log_data_file(organized_file, UploadStatus.FAILED)
|
|
419
|
-
self.run.log_message_with_code(LogCode.FILE_UPLOAD_FAILED, f'Client error: {str(e)}')
|
|
420
|
-
return {'status': 'failed', 'error': str(e), 'error_type': 'client_error', 'retryable': True}
|
|
421
|
-
except (OSError, IOError) as e:
|
|
422
|
-
self.run.log_data_file(organized_file, UploadStatus.FAILED)
|
|
423
|
-
self.run.log_message_with_code(LogCode.FILE_UPLOAD_FAILED, f'File system error: {str(e)}')
|
|
424
|
-
return {'status': 'failed', 'error': str(e), 'error_type': 'file_error', 'retryable': False}
|
|
425
|
-
except MemoryError as e:
|
|
426
|
-
self.run.log_data_file(organized_file, UploadStatus.FAILED)
|
|
427
|
-
self.run.log_message_with_code(
|
|
428
|
-
LogCode.FILE_UPLOAD_FAILED, f'Memory error (file too large): {str(e)}'
|
|
429
|
-
)
|
|
430
|
-
return {'status': 'failed', 'error': str(e), 'error_type': 'memory_error', 'retryable': False}
|
|
431
|
-
except asyncio.TimeoutError as e:
|
|
432
|
-
self.run.log_data_file(organized_file, UploadStatus.FAILED)
|
|
433
|
-
self.run.log_message_with_code(LogCode.FILE_UPLOAD_FAILED, f'Upload timeout: {str(e)}')
|
|
434
|
-
return {'status': 'failed', 'error': str(e), 'error_type': 'timeout_error', 'retryable': True}
|
|
435
|
-
except ValueError as e:
|
|
436
|
-
self.run.log_data_file(organized_file, UploadStatus.FAILED)
|
|
437
|
-
self.run.log_message_with_code(LogCode.FILE_UPLOAD_FAILED, f'Data validation error: {str(e)}')
|
|
438
|
-
return {'status': 'failed', 'error': str(e), 'error_type': 'validation_error', 'retryable': False}
|
|
191
|
+
result = step.safe_execute(context)
|
|
192
|
+
context.update(result)
|
|
193
|
+
if not result.success:
|
|
194
|
+
raise Exception(f"Step '{step.name}' failed: {result.error}")
|
|
439
195
|
except Exception as e:
|
|
440
|
-
|
|
441
|
-
self.run.log_message_with_code(LogCode.FILE_UPLOAD_FAILED, f'Unexpected error: {str(e)}')
|
|
442
|
-
return {'status': 'failed', 'error': str(e), 'error_type': 'unknown_error', 'retryable': False}
|
|
443
|
-
|
|
444
|
-
tasks = [upload_single_file(organized_file) for organized_file in organized_files]
|
|
445
|
-
|
|
446
|
-
current_progress = 0
|
|
447
|
-
for completed_task in asyncio.as_completed(tasks):
|
|
448
|
-
result = await completed_task
|
|
449
|
-
current_progress += 1
|
|
450
|
-
|
|
451
|
-
if result['status'] == 'success':
|
|
452
|
-
success_count += 1
|
|
453
|
-
upload_result.append(result['result'])
|
|
454
|
-
else:
|
|
455
|
-
failed_count += 1
|
|
456
|
-
|
|
457
|
-
self._update_metrics(organized_files_count, success_count, failed_count, 'data_files')
|
|
458
|
-
self.run.set_progress(current_progress, organized_files_count, category='upload_data_files')
|
|
459
|
-
|
|
460
|
-
self.run.set_progress(organized_files_count, organized_files_count, category='upload_data_files')
|
|
461
|
-
|
|
462
|
-
return upload_result
|
|
463
|
-
|
|
464
|
-
def _generate_data_units(self, uploaded_files: List[Dict[str, Any]], batch_size: int) -> List[Dict[str, Any]]:
|
|
465
|
-
upload_result_count = len(uploaded_files)
|
|
466
|
-
self.run.set_progress(0, upload_result_count, category='generate_data_units')
|
|
467
|
-
self.run.log_message_with_code(LogCode.GENERATING_DATA_UNITS)
|
|
468
|
-
|
|
469
|
-
client = self.run.client
|
|
470
|
-
generated_data_units = []
|
|
471
|
-
current_progress = 0
|
|
472
|
-
success_count = 0
|
|
473
|
-
failed_count = 0
|
|
474
|
-
|
|
475
|
-
batches = get_batched_list(uploaded_files, batch_size)
|
|
476
|
-
batches_count = len(batches)
|
|
477
|
-
|
|
478
|
-
self._update_metrics(upload_result_count, success_count, failed_count, 'data_units')
|
|
479
|
-
|
|
480
|
-
for batch in batches:
|
|
481
|
-
try:
|
|
482
|
-
created_data_units = client.create_data_units(batch)
|
|
483
|
-
success_count += len(created_data_units)
|
|
484
|
-
generated_data_units.append(created_data_units)
|
|
485
|
-
for created_data_unit in created_data_units:
|
|
486
|
-
self.run.log_data_unit(
|
|
487
|
-
created_data_unit['id'], UploadStatus.SUCCESS, data_unit_meta=created_data_unit.get('meta')
|
|
488
|
-
)
|
|
489
|
-
except Exception as e:
|
|
490
|
-
failed_count += len(batch)
|
|
491
|
-
self.run.log_message_with_code(LogCode.DATA_UNIT_BATCH_FAILED, str(e))
|
|
492
|
-
for _ in batch:
|
|
493
|
-
self.run.log_data_unit(None, UploadStatus.FAILED, data_unit_meta=None)
|
|
494
|
-
|
|
495
|
-
current_progress += 1
|
|
496
|
-
self._update_metrics(upload_result_count, success_count, failed_count, 'data_units')
|
|
497
|
-
self.run.set_progress(current_progress, batches_count, category='generate_data_units')
|
|
498
|
-
|
|
499
|
-
self.run.set_progress(upload_result_count, upload_result_count, category='generate_data_units')
|
|
500
|
-
|
|
501
|
-
return sum(generated_data_units, [])
|
|
502
|
-
|
|
503
|
-
def _validate_organized_files(
|
|
504
|
-
self, organized_files: List[Dict[str, Any]], file_specification_template: Dict[str, Any]
|
|
505
|
-
) -> bool:
|
|
506
|
-
validator = FileSpecificationValidator(file_specification_template, organized_files)
|
|
507
|
-
return validator.validate()
|
|
508
|
-
|
|
509
|
-
def _organize_files(
|
|
510
|
-
self,
|
|
511
|
-
directory: Path,
|
|
512
|
-
file_specification: List[Dict[str, Any]],
|
|
513
|
-
excel_metadata: Optional[Dict[str, Dict[str, Any]]] = None,
|
|
514
|
-
) -> List[Dict[str, Any]]:
|
|
515
|
-
organized_files: List[Dict[str, Any]] = []
|
|
516
|
-
|
|
517
|
-
type_dirs: Dict[str, Path] = {}
|
|
518
|
-
|
|
519
|
-
for spec in file_specification:
|
|
520
|
-
spec_name = spec['name']
|
|
521
|
-
spec_dir = directory / spec_name
|
|
522
|
-
if spec_dir.exists() and spec_dir.is_dir():
|
|
523
|
-
type_dirs[spec_name] = spec_dir
|
|
524
|
-
|
|
525
|
-
if type_dirs:
|
|
526
|
-
self.run.log_message_with_code(LogCode.TYPE_DIRECTORIES_FOUND, list(type_dirs.keys()))
|
|
527
|
-
|
|
528
|
-
if not type_dirs:
|
|
529
|
-
self.run.log_message_with_code(LogCode.NO_TYPE_DIRECTORIES)
|
|
530
|
-
return organized_files
|
|
531
|
-
|
|
532
|
-
self.run.log_message_with_code(LogCode.TYPE_STRUCTURE_DETECTED)
|
|
533
|
-
self.run.log_message_with_code(LogCode.FILE_ORGANIZATION_STARTED)
|
|
534
|
-
|
|
535
|
-
dataset_files = {}
|
|
536
|
-
required_specs = [spec['name'] for spec in file_specification if spec.get('is_required', False)]
|
|
537
|
-
|
|
538
|
-
is_recursive = self.params.get('is_recursive', True)
|
|
539
|
-
|
|
540
|
-
for spec_name, dir_path in type_dirs.items():
|
|
541
|
-
if is_recursive:
|
|
542
|
-
files_list = self._discover_files_recursive(dir_path)
|
|
543
|
-
else:
|
|
544
|
-
files_list = self._discover_files_non_recursive(dir_path)
|
|
545
|
-
|
|
546
|
-
for file_path in files_list:
|
|
547
|
-
file_name = file_path.stem
|
|
548
|
-
|
|
549
|
-
if file_name not in dataset_files:
|
|
550
|
-
dataset_files[file_name] = {}
|
|
551
|
-
|
|
552
|
-
if spec_name not in dataset_files[file_name]:
|
|
553
|
-
dataset_files[file_name][spec_name] = file_path
|
|
554
|
-
else:
|
|
555
|
-
existing_file = dataset_files[file_name][spec_name]
|
|
556
|
-
if file_path.stat().st_mtime > existing_file.stat().st_mtime:
|
|
557
|
-
dataset_files[file_name][spec_name] = file_path
|
|
558
|
-
|
|
559
|
-
if not dataset_files:
|
|
560
|
-
self.run.log_message_with_code(LogCode.NO_FILES_FOUND_WARNING)
|
|
561
|
-
return organized_files
|
|
562
|
-
|
|
563
|
-
self.run.log_message_with_code(LogCode.FILES_DISCOVERED, len(dataset_files))
|
|
564
|
-
|
|
565
|
-
for file_name, files_dict in sorted(dataset_files.items()):
|
|
566
|
-
if all(req in files_dict for req in required_specs):
|
|
567
|
-
file_extensions = {}
|
|
568
|
-
for file_path in files_dict.values():
|
|
569
|
-
ext = file_path.suffix.lower()
|
|
570
|
-
if ext:
|
|
571
|
-
file_extensions[ext] = file_extensions.get(ext, 0) + 1
|
|
572
|
-
|
|
573
|
-
origin_file_extension = max(file_extensions.items(), key=lambda x: x[1])[0] if file_extensions else ''
|
|
574
|
-
|
|
575
|
-
meta_data: Dict[str, Any] = {
|
|
576
|
-
'origin_file_stem': file_name,
|
|
577
|
-
'origin_file_extension': origin_file_extension,
|
|
578
|
-
'created_at': datetime.now().isoformat(),
|
|
579
|
-
}
|
|
580
|
-
|
|
581
|
-
if excel_metadata and file_name in excel_metadata:
|
|
582
|
-
meta_data.update(excel_metadata[file_name])
|
|
583
|
-
|
|
584
|
-
organized_files.append({'files': files_dict, 'meta': meta_data})
|
|
585
|
-
else:
|
|
586
|
-
missing = [req for req in required_specs if req not in files_dict]
|
|
587
|
-
self.run.log_message_with_code(LogCode.MISSING_REQUIRED_FILES, file_name, ', '.join(missing))
|
|
588
|
-
|
|
589
|
-
return organized_files
|
|
590
|
-
|
|
591
|
-
def _get_file_size_mb(self, file_path: Path) -> float:
|
|
592
|
-
return file_path.stat().st_size / (1024 * 1024)
|
|
196
|
+
raise ActionError(f"Failed at step '{step.name}': {str(e)}")
|
|
593
197
|
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
for file_path in organized_file.get('files', {}).values():
|
|
597
|
-
if isinstance(file_path, Path) and self._get_file_size_mb(file_path) > max_file_size_mb:
|
|
598
|
-
return True
|
|
599
|
-
return False
|
|
198
|
+
# Return the final result from context
|
|
199
|
+
return context.get_result()
|
|
600
200
|
|
|
601
|
-
def
|
|
602
|
-
|
|
603
|
-
try:
|
|
604
|
-
temp_path = Path(os.getcwd()) / 'temp'
|
|
605
|
-
except (FileNotFoundError, OSError):
|
|
606
|
-
return
|
|
201
|
+
def _configure_strategies(self, context=None) -> Dict[str, Any]:
|
|
202
|
+
"""Configure strategies based on parameters.
|
|
607
203
|
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
shutil.rmtree(temp_path, ignore_errors=True)
|
|
612
|
-
self.run.log_message(f'Cleaned up temporary directory: {temp_path}')
|
|
204
|
+
Uses the Strategy pattern to create appropriate strategy implementations
|
|
205
|
+
based on the action parameters. This allows for runtime selection of
|
|
206
|
+
different behaviors (sync vs async upload, recursive vs flat discovery, etc.).
|
|
613
207
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
raise ValueError('Run instance not properly initialized')
|
|
208
|
+
Args:
|
|
209
|
+
context: UploadContext for strategies that need access to client/run
|
|
617
210
|
|
|
618
|
-
|
|
211
|
+
Returns:
|
|
212
|
+
Dict[str, Any]: Dictionary of strategy instances keyed by type
|
|
213
|
+
"""
|
|
214
|
+
# Ensure params is not None
|
|
215
|
+
params = self.params or {}
|
|
619
216
|
|
|
620
|
-
|
|
621
|
-
|
|
217
|
+
return {
|
|
218
|
+
'validation': self.strategy_factory.create_validation_strategy(params, context),
|
|
219
|
+
'file_discovery': self.strategy_factory.create_file_discovery_strategy(params, context),
|
|
220
|
+
'metadata': self.strategy_factory.create_metadata_strategy(params, context),
|
|
221
|
+
'upload': self.strategy_factory.create_upload_strategy(params, context),
|
|
222
|
+
'data_unit': self.strategy_factory.create_data_unit_strategy(params, context),
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
def get_uploader(self, path, file_specification, organized_files, params: Dict = {}):
|
|
226
|
+
"""Get uploader from entrypoint (compatibility method).
|
|
227
|
+
|
|
228
|
+
This method is kept for backward compatibility with existing code
|
|
229
|
+
that may still call it directly.
|
|
230
|
+
"""
|
|
231
|
+
return self.entrypoint(
|
|
232
|
+
self.run, path, file_specification, organized_files, extra_params=params.get('extra_params')
|
|
622
233
|
)
|
|
623
|
-
|
|
234
|
+
|
|
235
|
+
def get_workflow_summary(self) -> Dict[str, Any]:
|
|
236
|
+
"""Get summary of configured workflow.
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
Dict[str, Any]: Summary of steps and strategies
|
|
240
|
+
"""
|
|
241
|
+
return {
|
|
242
|
+
'steps': [step.name for step in self.step_registry.get_steps()],
|
|
243
|
+
'step_count': len(self.step_registry),
|
|
244
|
+
'total_progress_weight': self.step_registry.get_total_progress_weight(),
|
|
245
|
+
'available_strategies': self.strategy_factory.get_available_strategies(),
|
|
246
|
+
}
|