synapse-sdk 2025.9.5__py3-none-any.whl → 2025.10.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/clients/base.py +129 -9
- synapse_sdk/devtools/docs/docs/api/clients/base.md +230 -8
- synapse_sdk/devtools/docs/docs/api/plugins/models.md +58 -3
- synapse_sdk/devtools/docs/docs/plugins/categories/neural-net-plugins/train-action-overview.md +663 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-overview.md +585 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
- synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +39 -0
- synapse_sdk/devtools/docs/docs/plugins/plugins.md +12 -5
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/base.md +230 -8
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/plugins/models.md +114 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/neural-net-plugins/train-action-overview.md +621 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-overview.md +585 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +39 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current.json +16 -4
- synapse_sdk/devtools/docs/sidebars.ts +45 -1
- synapse_sdk/plugins/README.md +487 -80
- synapse_sdk/plugins/categories/base.py +1 -0
- synapse_sdk/plugins/categories/export/actions/export/action.py +8 -3
- synapse_sdk/plugins/categories/export/actions/export/utils.py +108 -8
- synapse_sdk/plugins/categories/export/templates/config.yaml +18 -0
- synapse_sdk/plugins/categories/export/templates/plugin/export.py +97 -0
- synapse_sdk/plugins/categories/neural_net/actions/train.py +592 -22
- synapse_sdk/plugins/categories/neural_net/actions/tune.py +150 -3
- synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +145 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +97 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +250 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +284 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +87 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +127 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
- synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +2 -1
- synapse_sdk/plugins/categories/upload/actions/upload/action.py +8 -1
- synapse_sdk/plugins/categories/upload/actions/upload/context.py +0 -1
- synapse_sdk/plugins/categories/upload/actions/upload/models.py +134 -94
- synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +2 -2
- synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +6 -2
- synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +24 -9
- synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +130 -18
- synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +147 -37
- synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +10 -5
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +31 -6
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +65 -37
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +17 -2
- synapse_sdk/plugins/categories/upload/templates/README.md +394 -0
- synapse_sdk/plugins/models.py +62 -0
- synapse_sdk/utils/file/download.py +261 -0
- {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/METADATA +15 -2
- {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/RECORD +74 -43
- synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +0 -1463
- synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +0 -1964
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +0 -1463
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +0 -2077
- {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/WHEEL +0 -0
- {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Pre-processor management strategies for ToTask action."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
from .base import PreProcessorStrategy, ToTaskContext
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PreProcessorManagementStrategy(PreProcessorStrategy):
|
|
9
|
+
"""Strategy for managing pre-processor lifecycle."""
|
|
10
|
+
|
|
11
|
+
def get_preprocessor_info(self, context: ToTaskContext, preprocessor_id: int) -> Dict[str, Any]:
|
|
12
|
+
"""Get pre-processor information from the backend.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
context: Shared context for the action execution
|
|
16
|
+
preprocessor_id: The pre-processor ID
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Dict with pre-processor info or error
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
client = context.client
|
|
23
|
+
pre_processor_response = client.get_plugin_release(preprocessor_id)
|
|
24
|
+
if isinstance(pre_processor_response, str):
|
|
25
|
+
return {'success': False, 'error': 'Invalid pre-processor response received'}
|
|
26
|
+
|
|
27
|
+
pre_processor: Dict[str, Any] = pre_processor_response
|
|
28
|
+
config = pre_processor.get('config', {})
|
|
29
|
+
code = config.get('code')
|
|
30
|
+
version = pre_processor.get('version')
|
|
31
|
+
|
|
32
|
+
if not code or not version:
|
|
33
|
+
return {'success': False, 'error': 'Invalid pre-processor configuration'}
|
|
34
|
+
|
|
35
|
+
return {'success': True, 'code': code, 'version': version}
|
|
36
|
+
except Exception as e:
|
|
37
|
+
return {'success': False, 'error': f'Failed to get pre-processor info: {str(e)}'}
|
|
38
|
+
|
|
39
|
+
def ensure_preprocessor_running(self, context: ToTaskContext, preprocessor_code: str) -> Dict[str, Any]:
|
|
40
|
+
"""Ensure the pre-processor is running, restart if necessary.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
context: Shared context for the action execution
|
|
44
|
+
preprocessor_code: The pre-processor code
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Dict indicating success or failure
|
|
48
|
+
"""
|
|
49
|
+
try:
|
|
50
|
+
client = context.client
|
|
51
|
+
|
|
52
|
+
# Check if pre-processor is running
|
|
53
|
+
serve_applications_response = client.list_serve_applications(params={'plugin_code': preprocessor_code})
|
|
54
|
+
if isinstance(serve_applications_response, str):
|
|
55
|
+
return {'success': False, 'error': 'Invalid serve applications response'}
|
|
56
|
+
|
|
57
|
+
# Handle the response properly - it should be a dict with 'results' key
|
|
58
|
+
if not isinstance(serve_applications_response, dict):
|
|
59
|
+
return {'success': False, 'error': 'Unexpected serve applications response format'}
|
|
60
|
+
|
|
61
|
+
serve_applications: Dict[str, Any] = serve_applications_response
|
|
62
|
+
results = serve_applications.get('results', [])
|
|
63
|
+
running_serve_apps = [app for app in results if isinstance(app, dict) and app.get('status') == 'RUNNING']
|
|
64
|
+
|
|
65
|
+
# If not running, restart the pre-processor
|
|
66
|
+
if not running_serve_apps:
|
|
67
|
+
restart_result = self._restart_preprocessor(context, preprocessor_code)
|
|
68
|
+
if not restart_result['success']:
|
|
69
|
+
return restart_result
|
|
70
|
+
|
|
71
|
+
# Verify restart was successful
|
|
72
|
+
serve_applications_response = client.list_serve_applications(params={'plugin_code': preprocessor_code})
|
|
73
|
+
if isinstance(serve_applications_response, str):
|
|
74
|
+
return {'success': False, 'error': 'Failed to verify pre-processor restart'}
|
|
75
|
+
|
|
76
|
+
serve_applications = serve_applications_response
|
|
77
|
+
results = serve_applications.get('results', [])
|
|
78
|
+
running_serve_apps = [
|
|
79
|
+
app for app in results if isinstance(app, dict) and app.get('status') == 'RUNNING'
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
if not running_serve_apps:
|
|
83
|
+
return {'success': False, 'error': 'Pre-processor failed to start after restart'}
|
|
84
|
+
|
|
85
|
+
return {'success': True}
|
|
86
|
+
|
|
87
|
+
except Exception as e:
|
|
88
|
+
return {'success': False, 'error': f'Failed to ensure pre-processor running: {str(e)}'}
|
|
89
|
+
|
|
90
|
+
def _restart_preprocessor(self, context: ToTaskContext, preprocessor_code: str) -> Dict[str, Any]:
|
|
91
|
+
"""Restart the pre-processor.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
context: Shared context for the action execution
|
|
95
|
+
preprocessor_code: The pre-processor code
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Dict indicating success or failure
|
|
99
|
+
"""
|
|
100
|
+
try:
|
|
101
|
+
client = context.client
|
|
102
|
+
|
|
103
|
+
# Start the serve application
|
|
104
|
+
inference_options = context.config.get('inference_options', {})
|
|
105
|
+
serve_application_deployment_payload = {
|
|
106
|
+
'agent': context.params.get('agent') if context.params else None,
|
|
107
|
+
'action': 'deployment',
|
|
108
|
+
'params': {
|
|
109
|
+
'num_cpus': inference_options.get('required_cpu_count', 2),
|
|
110
|
+
'num_gpus': inference_options.get('required_gpu_count', 1),
|
|
111
|
+
},
|
|
112
|
+
'debug': True,
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
deployment_result = client.run_plugin(
|
|
116
|
+
preprocessor_code,
|
|
117
|
+
serve_application_deployment_payload,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
deployment_job_id = deployment_result.get('job_id')
|
|
121
|
+
if not deployment_job_id:
|
|
122
|
+
return {'success': False, 'error': 'No deployment job ID returned'}
|
|
123
|
+
|
|
124
|
+
return {'success': True, 'error': 'Pre-processor restarted successfully'}
|
|
125
|
+
|
|
126
|
+
except Exception as e:
|
|
127
|
+
return {'success': False, 'error': f'Failed to restart pre-processor: {str(e)}'}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""Validation strategies for ToTask action."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
from ..enums import LogCode
|
|
6
|
+
from .base import ToTaskContext, ValidationStrategy
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ProjectValidationStrategy(ValidationStrategy):
|
|
10
|
+
"""Strategy for validating project and data collection."""
|
|
11
|
+
|
|
12
|
+
def validate(self, context: ToTaskContext) -> Dict[str, Any]:
|
|
13
|
+
"""Validate project and data collection exist and are accessible.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
context: Shared context for the action execution
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Dict with 'success' boolean and optional 'error' message
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
client = context.client
|
|
23
|
+
project_id = context.params['project']
|
|
24
|
+
|
|
25
|
+
# Validate project response
|
|
26
|
+
project_response = client.get_project(project_id)
|
|
27
|
+
if isinstance(project_response, str):
|
|
28
|
+
context.logger.log_message_with_code(LogCode.INVALID_PROJECT_RESPONSE)
|
|
29
|
+
return {'success': False, 'error': 'Invalid project response received'}
|
|
30
|
+
|
|
31
|
+
project: Dict[str, Any] = project_response
|
|
32
|
+
context.project = project
|
|
33
|
+
|
|
34
|
+
# Validate data collection exists
|
|
35
|
+
data_collection_id = project.get('data_collection')
|
|
36
|
+
if not data_collection_id:
|
|
37
|
+
context.logger.log_message_with_code(LogCode.NO_DATA_COLLECTION)
|
|
38
|
+
return {'success': False, 'error': 'Project does not have a data collection'}
|
|
39
|
+
|
|
40
|
+
# Validate data collection response
|
|
41
|
+
data_collection_response = client.get_data_collection(data_collection_id)
|
|
42
|
+
if isinstance(data_collection_response, str):
|
|
43
|
+
context.logger.log_message_with_code(LogCode.INVALID_DATA_COLLECTION_RESPONSE)
|
|
44
|
+
return {'success': False, 'error': 'Invalid data collection response received'}
|
|
45
|
+
|
|
46
|
+
data_collection: Dict[str, Any] = data_collection_response
|
|
47
|
+
context.data_collection = data_collection
|
|
48
|
+
|
|
49
|
+
return {'success': True}
|
|
50
|
+
|
|
51
|
+
except Exception as e:
|
|
52
|
+
error_msg = f'Project validation failed: {str(e)}'
|
|
53
|
+
context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
|
|
54
|
+
return {'success': False, 'error': error_msg}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class TaskValidationStrategy(ValidationStrategy):
|
|
58
|
+
"""Strategy for validating and discovering tasks."""
|
|
59
|
+
|
|
60
|
+
def validate(self, context: ToTaskContext) -> Dict[str, Any]:
|
|
61
|
+
"""Discover and validate tasks for processing.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
context: Shared context for the action execution
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Dict with 'success' boolean and optional 'error' message
|
|
68
|
+
"""
|
|
69
|
+
try:
|
|
70
|
+
client = context.client
|
|
71
|
+
|
|
72
|
+
# Build task query parameters
|
|
73
|
+
task_ids_query_params = {
|
|
74
|
+
'project': context.params['project'],
|
|
75
|
+
'fields': 'id',
|
|
76
|
+
}
|
|
77
|
+
if context.params.get('task_filters'):
|
|
78
|
+
task_ids_query_params.update(context.params['task_filters'])
|
|
79
|
+
|
|
80
|
+
# Get tasks
|
|
81
|
+
task_ids_generator, task_ids_count = client.list_tasks(params=task_ids_query_params, list_all=True)
|
|
82
|
+
task_ids = [
|
|
83
|
+
int(item.get('id', 0)) for item in task_ids_generator if isinstance(item, dict) and item.get('id')
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
# Validate tasks found
|
|
87
|
+
if not task_ids_count:
|
|
88
|
+
context.logger.log_message_with_code(LogCode.NO_TASKS_FOUND)
|
|
89
|
+
return {'success': False, 'error': 'No tasks found to annotate'}
|
|
90
|
+
|
|
91
|
+
context.task_ids = task_ids
|
|
92
|
+
return {'success': True, 'task_count': len(task_ids)}
|
|
93
|
+
|
|
94
|
+
except Exception as e:
|
|
95
|
+
error_msg = f'Task validation failed: {str(e)}'
|
|
96
|
+
context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
|
|
97
|
+
return {'success': False, 'error': error_msg}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class TargetSpecificationValidationStrategy(ValidationStrategy):
|
|
101
|
+
"""Strategy for validating target specification for file annotation."""
|
|
102
|
+
|
|
103
|
+
def validate(self, context: ToTaskContext) -> Dict[str, Any]:
|
|
104
|
+
"""Validate target specification exists in file specifications.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
context: Shared context for the action execution
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Dict with 'success' boolean and optional 'error' message
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
# Only validate if using FILE annotation method
|
|
114
|
+
from ..enums import AnnotationMethod
|
|
115
|
+
|
|
116
|
+
if context.annotation_method != AnnotationMethod.FILE:
|
|
117
|
+
return {'success': True}
|
|
118
|
+
|
|
119
|
+
target_specification_name = context.params.get('target_specification_name')
|
|
120
|
+
if not target_specification_name:
|
|
121
|
+
context.logger.log_message_with_code(LogCode.TARGET_SPEC_REQUIRED)
|
|
122
|
+
return {'success': False, 'error': 'Target specification name is required for file annotation method'}
|
|
123
|
+
|
|
124
|
+
# Check if target specification exists in file specifications
|
|
125
|
+
if not context.data_collection:
|
|
126
|
+
return {'success': False, 'error': 'Data collection not available for validation'}
|
|
127
|
+
|
|
128
|
+
file_specifications = context.data_collection.get('file_specifications', [])
|
|
129
|
+
target_spec_exists = any(spec.get('name') == target_specification_name for spec in file_specifications)
|
|
130
|
+
|
|
131
|
+
if not target_spec_exists:
|
|
132
|
+
context.logger.log_message_with_code(LogCode.TARGET_SPEC_NOT_FOUND, target_specification_name)
|
|
133
|
+
return {
|
|
134
|
+
'success': False,
|
|
135
|
+
'error': f"Target specification '{target_specification_name}' not found in file specifications",
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return {'success': True}
|
|
139
|
+
|
|
140
|
+
except Exception as e:
|
|
141
|
+
error_msg = f'Target specification validation failed: {str(e)}'
|
|
142
|
+
context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
|
|
143
|
+
return {'success': False, 'error': error_msg}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .action import UploadAction
|
|
2
2
|
from .enums import LOG_MESSAGES, LogCode, UploadStatus
|
|
3
3
|
from .exceptions import ExcelParsingError, ExcelSecurityError
|
|
4
|
-
from .models import UploadParams
|
|
4
|
+
from .models import ExcelMetadataFile, UploadParams
|
|
5
5
|
from .run import UploadRun
|
|
6
6
|
from .utils import ExcelSecurityConfig, PathAwareJSONEncoder
|
|
7
7
|
|
|
@@ -9,6 +9,7 @@ __all__ = [
|
|
|
9
9
|
'UploadAction',
|
|
10
10
|
'UploadRun',
|
|
11
11
|
'UploadParams',
|
|
12
|
+
'ExcelMetadataFile',
|
|
12
13
|
'UploadStatus',
|
|
13
14
|
'LogCode',
|
|
14
15
|
'LOG_MESSAGES',
|
|
@@ -173,11 +173,18 @@ class UploadAction(Action):
|
|
|
173
173
|
organized_files = context.get('organized_files', [])
|
|
174
174
|
file_specification_template = context.get('file_specification_template', {})
|
|
175
175
|
pathlib_cwd = context.get('pathlib_cwd')
|
|
176
|
+
use_single_path = context.get_param('use_single_path', True)
|
|
176
177
|
|
|
177
|
-
|
|
178
|
+
# Validate required data based on mode
|
|
179
|
+
if not organized_files or not file_specification_template:
|
|
178
180
|
raise ActionError('Required data not available from workflow steps')
|
|
179
181
|
|
|
182
|
+
# In single-path mode, pathlib_cwd is required
|
|
183
|
+
if use_single_path and not pathlib_cwd:
|
|
184
|
+
raise ActionError('pathlib_cwd is required in single-path mode')
|
|
185
|
+
|
|
180
186
|
# CRITICAL: Integrate with existing uploader mechanism
|
|
187
|
+
# In multi-path mode, pathlib_cwd may be None, but uploader should still work
|
|
181
188
|
uploader = self.get_uploader(pathlib_cwd, file_specification_template, organized_files, self.params)
|
|
182
189
|
organized_files = uploader.handle_upload_files()
|
|
183
190
|
|
|
@@ -1,14 +1,53 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
1
|
from typing import Annotated
|
|
3
2
|
|
|
4
|
-
from pydantic import AfterValidator, BaseModel, ValidationInfo, field_validator
|
|
3
|
+
from pydantic import AfterValidator, BaseModel, ValidationInfo, field_validator, model_validator
|
|
5
4
|
from pydantic_core import PydanticCustomError
|
|
6
5
|
|
|
7
6
|
from synapse_sdk.clients.exceptions import ClientError
|
|
8
7
|
from synapse_sdk.utils.pydantic.validators import non_blank
|
|
9
|
-
from synapse_sdk.utils.storage import get_pathlib
|
|
10
8
|
|
|
11
|
-
|
|
9
|
+
|
|
10
|
+
class ExcelMetadataFile(BaseModel):
|
|
11
|
+
"""Excel metadata configuration for base64 encoded data.
|
|
12
|
+
|
|
13
|
+
This model is used specifically for base64-encoded Excel metadata files,
|
|
14
|
+
typically from web frontends or API integrations.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
data: Base64 encoded content of the Excel file
|
|
18
|
+
filename: Name of the original file before base64 encoding
|
|
19
|
+
|
|
20
|
+
Examples:
|
|
21
|
+
Base64 mode:
|
|
22
|
+
>>> config = ExcelMetadataFile(
|
|
23
|
+
... data="UEsDBBQABgAI...",
|
|
24
|
+
... filename="metadata.xlsx"
|
|
25
|
+
... )
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
data: str
|
|
29
|
+
filename: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AssetConfig(BaseModel):
|
|
33
|
+
"""Configuration for individual asset in multi-path mode.
|
|
34
|
+
|
|
35
|
+
Used when use_single_path=False to specify unique paths
|
|
36
|
+
and recursive settings for each file specification.
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
path (str): File system path for this specific asset
|
|
40
|
+
is_recursive (bool): Whether to recursively search subdirectories for this asset
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
>>> asset_config = AssetConfig(
|
|
44
|
+
... path="/sensors/camera/front",
|
|
45
|
+
... is_recursive=True
|
|
46
|
+
... )
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
path: str
|
|
50
|
+
is_recursive: bool = True
|
|
12
51
|
|
|
13
52
|
|
|
14
53
|
class UploadParams(BaseModel):
|
|
@@ -18,45 +57,93 @@ class UploadParams(BaseModel):
|
|
|
18
57
|
Uses Pydantic for type validation and custom validators to ensure
|
|
19
58
|
storage, data_collection, and project resources exist before processing.
|
|
20
59
|
|
|
60
|
+
Supports two modes controlled by use_single_path flag:
|
|
61
|
+
|
|
62
|
+
1. Single Path Mode (use_single_path=True, DEFAULT):
|
|
63
|
+
Traditional mode - all file specifications share one base path.
|
|
64
|
+
Requires: path, is_recursive
|
|
65
|
+
Ignores: assets
|
|
66
|
+
|
|
67
|
+
2. Multi-Path Mode (use_single_path=False):
|
|
68
|
+
Advanced mode - each file specification has its own path.
|
|
69
|
+
Requires: assets (dict with file spec names as keys)
|
|
70
|
+
Ignores: path, is_recursive
|
|
71
|
+
|
|
21
72
|
Attributes:
|
|
22
73
|
name (str): Human-readable name for the upload operation
|
|
23
74
|
description (str | None): Optional description of the upload
|
|
24
|
-
|
|
75
|
+
use_single_path (bool): Mode selector (True=single path, False=multi-path)
|
|
76
|
+
path (str | None): Base path for single path mode
|
|
77
|
+
is_recursive (bool): Global recursive setting for single path mode
|
|
78
|
+
assets (dict[str, AssetConfig] | None): Per-asset configs for multi-path mode
|
|
25
79
|
storage (int): Storage ID where files will be uploaded
|
|
26
|
-
data_collection (int): Data
|
|
80
|
+
data_collection (int): Data collection ID for organizing uploads
|
|
27
81
|
project (int | None): Optional project ID for grouping
|
|
28
|
-
excel_metadata_path (str | None): Path to Excel metadata file
|
|
29
|
-
|
|
82
|
+
excel_metadata_path (str | None): Path to Excel metadata file (traditional, backward compatible)
|
|
83
|
+
Note: This parameter will be deprecated in a future version. Consider using excel_metadata instead.
|
|
84
|
+
excel_metadata (ExcelMetadataFile | None): Base64 encoded Excel metadata (for web/API integration)
|
|
85
|
+
Note: Cannot use both excel_metadata_path and excel_metadata simultaneously
|
|
30
86
|
max_file_size_mb (int): Maximum file size limit in megabytes
|
|
31
87
|
creating_data_unit_batch_size (int): Batch size for data unit creation
|
|
32
88
|
use_async_upload (bool): Whether to use asynchronous upload processing
|
|
33
|
-
extra_params (dict | None): Extra parameters for the action
|
|
34
|
-
Example: {"include_metadata": True, "compression": "gzip"}
|
|
89
|
+
extra_params (dict | None): Extra parameters for the action
|
|
35
90
|
|
|
36
91
|
Validation:
|
|
37
92
|
- name: Must be non-blank after validation
|
|
38
93
|
- storage: Must exist and be accessible via client API
|
|
39
94
|
- data_collection: Must exist and be accessible via client API
|
|
40
95
|
- project: Must exist if specified, or can be None
|
|
41
|
-
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
96
|
+
- use_single_path mode: Validates required fields per mode
|
|
97
|
+
|
|
98
|
+
Examples:
|
|
99
|
+
Single Path Mode (Traditional):
|
|
100
|
+
>>> params = UploadParams(
|
|
101
|
+
... name="Standard Upload",
|
|
102
|
+
... use_single_path=True,
|
|
103
|
+
... path="/data/experiment_1",
|
|
104
|
+
... is_recursive=True,
|
|
105
|
+
... storage=1,
|
|
106
|
+
... data_collection=5
|
|
107
|
+
... )
|
|
108
|
+
|
|
109
|
+
Multi-Path Mode (Advanced):
|
|
110
|
+
>>> params = UploadParams(
|
|
111
|
+
... name="Multi-Source Upload",
|
|
112
|
+
... use_single_path=False,
|
|
113
|
+
... assets={
|
|
114
|
+
... "image_1": AssetConfig(path="/sensors/camera", is_recursive=True),
|
|
115
|
+
... "pcd_1": AssetConfig(path="/sensors/lidar", is_recursive=False)
|
|
116
|
+
... },
|
|
117
|
+
... storage=1,
|
|
118
|
+
... data_collection=5
|
|
119
|
+
... )
|
|
50
120
|
"""
|
|
51
121
|
|
|
52
122
|
name: Annotated[str, AfterValidator(non_blank)]
|
|
53
123
|
description: str | None = None
|
|
54
|
-
|
|
124
|
+
|
|
125
|
+
# Mode selector flag (True = single path mode, False = multi-path mode)
|
|
126
|
+
use_single_path: bool = True
|
|
127
|
+
|
|
128
|
+
# Single path mode fields (used when use_single_path=True)
|
|
129
|
+
path: str | None = None
|
|
130
|
+
is_recursive: bool = True
|
|
131
|
+
|
|
132
|
+
# Multi-path mode fields (used when use_single_path=False)
|
|
133
|
+
assets: dict[str, AssetConfig] | None = None
|
|
134
|
+
|
|
55
135
|
storage: int
|
|
56
136
|
data_collection: int
|
|
57
137
|
project: int | None = None
|
|
138
|
+
|
|
139
|
+
# Excel metadata - two separate parameters for clarity:
|
|
140
|
+
# 1. excel_metadata_path: Simple file path string (backward compatible, traditional usage)
|
|
141
|
+
# NOTE: Will be deprecated in a future version. Consider using excel_metadata instead.
|
|
142
|
+
# 2. excel_metadata: Dictionary with base64 encoded data (new, for web/API integration)
|
|
143
|
+
# TODO: Plan to deprecate excel_metadata_path in a few versions for backward compatibility
|
|
58
144
|
excel_metadata_path: str | None = None
|
|
59
|
-
|
|
145
|
+
excel_metadata: ExcelMetadataFile | None = None
|
|
146
|
+
|
|
60
147
|
max_file_size_mb: int = 50
|
|
61
148
|
creating_data_unit_batch_size: int = 1
|
|
62
149
|
use_async_upload: bool = True
|
|
@@ -107,80 +194,33 @@ class UploadParams(BaseModel):
|
|
|
107
194
|
raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
|
|
108
195
|
return value
|
|
109
196
|
|
|
110
|
-
@
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
if
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
# Validate file extension
|
|
117
|
-
if not value.lower().endswith(('.xlsx', '.xls')):
|
|
118
|
-
raise PydanticCustomError('invalid_file_type', 'Excel metadata file must be .xlsx or .xls format.')
|
|
119
|
-
|
|
120
|
-
# Get storage and path from validation data
|
|
121
|
-
if not (hasattr(info, 'data') and 'storage' in info.data and 'path' in info.data):
|
|
122
|
-
# If we don't have storage/path data yet, just validate extension
|
|
123
|
-
return value
|
|
124
|
-
|
|
125
|
-
if info.context is None:
|
|
126
|
-
raise PydanticCustomError('missing_context', 'Validation context is required.')
|
|
127
|
-
|
|
128
|
-
action = info.context['action']
|
|
129
|
-
client = action.client
|
|
130
|
-
|
|
131
|
-
try:
|
|
132
|
-
# Get storage configuration
|
|
133
|
-
storage_id = info.data['storage']
|
|
134
|
-
storage = client.get_storage(storage_id)
|
|
135
|
-
|
|
136
|
-
# Skip file system validation if storage doesn't have provider (likely test environment)
|
|
137
|
-
if not isinstance(storage, dict) or 'provider' not in storage:
|
|
138
|
-
# Basic validation only - likely in test environment
|
|
139
|
-
return value
|
|
140
|
-
|
|
141
|
-
# Get the actual file system path using storage + path
|
|
142
|
-
base_path = get_pathlib(storage, info.data['path'])
|
|
143
|
-
|
|
144
|
-
# Support both absolute and relative paths
|
|
145
|
-
if Path(value).is_absolute():
|
|
146
|
-
excel_path = Path(value)
|
|
147
|
-
else:
|
|
148
|
-
excel_path = base_path / value
|
|
149
|
-
|
|
150
|
-
if not excel_path.exists():
|
|
151
|
-
raise PydanticCustomError('file_not_found', 'Excel metadata file not found.')
|
|
152
|
-
|
|
153
|
-
# Validate file size
|
|
154
|
-
file_size = excel_path.stat().st_size
|
|
155
|
-
excel_config = ExcelSecurityConfig()
|
|
156
|
-
if file_size > excel_config.MAX_FILE_SIZE_BYTES:
|
|
157
|
-
max_size_mb = excel_config.MAX_FILE_SIZE_MB
|
|
197
|
+
@model_validator(mode='after')
|
|
198
|
+
def validate_path_configuration(self) -> 'UploadParams':
|
|
199
|
+
"""Validate path configuration based on use_single_path mode."""
|
|
200
|
+
if self.use_single_path:
|
|
201
|
+
# Single path mode: requires path
|
|
202
|
+
if not self.path:
|
|
158
203
|
raise PydanticCustomError(
|
|
159
|
-
'
|
|
160
|
-
'Excel metadata file is too large. Maximum size is {max_size_mb}MB.',
|
|
161
|
-
{'max_size_mb': max_size_mb},
|
|
204
|
+
'missing_path', "When use_single_path=true (single path mode), 'path' is required"
|
|
162
205
|
)
|
|
206
|
+
# Warn if assets is provided in single path mode (it will be ignored)
|
|
207
|
+
# For now, we'll silently ignore it
|
|
208
|
+
else:
|
|
209
|
+
# Multi-path mode: requires assets
|
|
210
|
+
if not self.assets:
|
|
211
|
+
raise PydanticCustomError(
|
|
212
|
+
'missing_assets',
|
|
213
|
+
"When use_single_path=false (multi-path mode), 'assets' must be provided "
|
|
214
|
+
'with path configurations for each file specification',
|
|
215
|
+
)
|
|
216
|
+
# path and is_recursive are ignored in multi-path mode
|
|
163
217
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
if excel_path.suffix.lower() == '.xlsx':
|
|
172
|
-
if not header.startswith(b'PK'):
|
|
173
|
-
raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
|
|
174
|
-
elif excel_path.suffix.lower() == '.xls':
|
|
175
|
-
if not (header.startswith(b'\xd0\xcf\x11\xe0') or header.startswith(b'\x09\x08')):
|
|
176
|
-
raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
|
|
177
|
-
|
|
178
|
-
except (OSError, IOError):
|
|
179
|
-
raise PydanticCustomError('file_access_error', 'Cannot access Excel metadata file.')
|
|
180
|
-
|
|
181
|
-
except ClientError:
|
|
182
|
-
raise PydanticCustomError('client_error', 'Error occurred while checking storage.')
|
|
183
|
-
except Exception as e:
|
|
184
|
-
raise PydanticCustomError('validation_error', f'Error validating Excel metadata file: {str(e)}')
|
|
218
|
+
# Validate excel metadata parameters - cannot use both at the same time
|
|
219
|
+
if self.excel_metadata_path and self.excel_metadata:
|
|
220
|
+
raise PydanticCustomError(
|
|
221
|
+
'conflicting_excel_metadata',
|
|
222
|
+
"Cannot specify both 'excel_metadata_path' and 'excel_metadata'. "
|
|
223
|
+
"Use 'excel_metadata_path' for file paths or 'excel_metadata' for base64 encoded data.",
|
|
224
|
+
)
|
|
185
225
|
|
|
186
|
-
return
|
|
226
|
+
return self
|
|
@@ -21,8 +21,8 @@ class CleanupStep(BaseStep):
|
|
|
21
21
|
def execute(self, context: UploadContext) -> StepResult:
|
|
22
22
|
"""Execute cleanup step."""
|
|
23
23
|
try:
|
|
24
|
-
# Cleanup temporary directory
|
|
25
|
-
self._cleanup_temp_directory(context)
|
|
24
|
+
# Cleanup temporary directory - commented out because duplicated process with ray cleanup process
|
|
25
|
+
# self._cleanup_temp_directory(context)
|
|
26
26
|
|
|
27
27
|
# Log completion
|
|
28
28
|
context.run.log_message_with_code(LogCode.IMPORT_COMPLETED)
|
|
@@ -31,7 +31,9 @@ class GenerateDataUnitsStep(BaseStep):
|
|
|
31
31
|
context.run.log_message_with_code(LogCode.GENERATING_DATA_UNITS)
|
|
32
32
|
|
|
33
33
|
# Initialize metrics
|
|
34
|
-
|
|
34
|
+
initial_metrics = {'stand_by': upload_result_count, 'success': 0, 'failed': 0}
|
|
35
|
+
context.update_metrics('data_units', initial_metrics)
|
|
36
|
+
context.run.set_metrics(initial_metrics, category='data_units')
|
|
35
37
|
|
|
36
38
|
# Get batch size from parameters
|
|
37
39
|
batch_size = context.get_param('creating_data_unit_batch_size', 1)
|
|
@@ -49,7 +51,9 @@ class GenerateDataUnitsStep(BaseStep):
|
|
|
49
51
|
)
|
|
50
52
|
|
|
51
53
|
# Update final metrics
|
|
52
|
-
|
|
54
|
+
final_metrics = {'stand_by': 0, 'success': len(generated_data_units), 'failed': 0}
|
|
55
|
+
context.update_metrics('data_units', final_metrics)
|
|
56
|
+
context.run.set_metrics(final_metrics, category='data_units')
|
|
53
57
|
|
|
54
58
|
# Complete progress
|
|
55
59
|
context.run.set_progress(upload_result_count, upload_result_count, category='generate_data_units')
|