synapse-sdk 2025.10.1__py3-none-any.whl → 2025.10.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-overview.md +560 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
- synapse_sdk/devtools/docs/docs/plugins/plugins.md +12 -5
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-overview.md +560 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current.json +16 -4
- synapse_sdk/devtools/docs/sidebars.ts +27 -1
- synapse_sdk/plugins/README.md +487 -80
- synapse_sdk/plugins/categories/export/actions/export/action.py +8 -3
- synapse_sdk/plugins/categories/export/actions/export/utils.py +108 -8
- synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +145 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +97 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +250 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +284 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +87 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +127 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
- synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +2 -1
- synapse_sdk/plugins/categories/upload/actions/upload/models.py +134 -94
- synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +2 -2
- synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +106 -14
- synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +113 -36
- synapse_sdk/plugins/categories/upload/templates/README.md +365 -0
- {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.4.dist-info}/METADATA +1 -1
- {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.4.dist-info}/RECORD +50 -22
- synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +0 -1463
- synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +0 -1964
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +0 -1463
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +0 -2077
- {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.4.dist-info}/WHEEL +0 -0
- {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.4.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.4.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.4.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import tempfile
|
|
1
3
|
from pathlib import Path
|
|
2
4
|
|
|
3
5
|
from ..context import StepResult, UploadContext
|
|
4
6
|
from ..enums import LogCode
|
|
5
7
|
from ..exceptions import ExcelParsingError, ExcelSecurityError
|
|
8
|
+
from ..models import ExcelMetadataFile
|
|
6
9
|
from .base import BaseStep
|
|
7
10
|
|
|
8
11
|
|
|
@@ -25,22 +28,36 @@ class ProcessMetadataStep(BaseStep):
|
|
|
25
28
|
return self.create_success_result(data={'metadata': {}})
|
|
26
29
|
|
|
27
30
|
excel_metadata = {}
|
|
31
|
+
temp_file_to_cleanup = None
|
|
28
32
|
|
|
29
33
|
try:
|
|
30
|
-
# Check if Excel metadata
|
|
31
|
-
excel_metadata_path
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
excel_path = context.pathlib_cwd / excel_metadata_path
|
|
41
|
-
if not excel_path.exists():
|
|
34
|
+
# Check if Excel metadata is specified - try both parameters
|
|
35
|
+
# TODO: Plan to deprecate excel_metadata_path in a few versions (backward compatibility)
|
|
36
|
+
excel_metadata_path_config = context.get_param('excel_metadata_path')
|
|
37
|
+
excel_metadata_config = context.get_param('excel_metadata')
|
|
38
|
+
|
|
39
|
+
if excel_metadata_path_config:
|
|
40
|
+
# Traditional path-based approach (will be deprecated in future)
|
|
41
|
+
excel_path, is_temp = self._resolve_excel_path_from_string(excel_metadata_path_config, context)
|
|
42
|
+
|
|
43
|
+
if not excel_path or not excel_path.exists():
|
|
42
44
|
context.run.log_message_with_code(LogCode.EXCEL_FILE_NOT_FOUND_PATH)
|
|
43
45
|
return self.create_success_result(data={'metadata': {}})
|
|
46
|
+
|
|
47
|
+
excel_metadata = metadata_strategy.extract(excel_path)
|
|
48
|
+
|
|
49
|
+
elif excel_metadata_config:
|
|
50
|
+
# Base64 encoded approach
|
|
51
|
+
excel_path, is_temp = self._resolve_excel_path_from_base64(excel_metadata_config, context)
|
|
52
|
+
|
|
53
|
+
if not excel_path or not excel_path.exists():
|
|
54
|
+
context.run.log_message_with_code(LogCode.EXCEL_FILE_NOT_FOUND_PATH)
|
|
55
|
+
return self.create_success_result(data={'metadata': {}})
|
|
56
|
+
|
|
57
|
+
# Track temp file for cleanup
|
|
58
|
+
if is_temp:
|
|
59
|
+
temp_file_to_cleanup = excel_path
|
|
60
|
+
|
|
44
61
|
excel_metadata = metadata_strategy.extract(excel_path)
|
|
45
62
|
else:
|
|
46
63
|
# Look for default metadata files (meta.xlsx, meta.xls)
|
|
@@ -65,9 +82,9 @@ class ProcessMetadataStep(BaseStep):
|
|
|
65
82
|
return self.create_error_result(f'Excel security violation: {str(e)}')
|
|
66
83
|
|
|
67
84
|
except ExcelParsingError as e:
|
|
68
|
-
# If excel_metadata_path was specified, this is an error
|
|
85
|
+
# If excel_metadata_path or excel_metadata was specified, this is an error
|
|
69
86
|
# If we were just looking for default files, it's not an error
|
|
70
|
-
if context.get_param('excel_metadata_path'):
|
|
87
|
+
if context.get_param('excel_metadata_path') or context.get_param('excel_metadata'):
|
|
71
88
|
context.run.log_message_with_code(LogCode.EXCEL_PARSING_ERROR, str(e))
|
|
72
89
|
return self.create_error_result(f'Excel parsing error: {str(e)}')
|
|
73
90
|
else:
|
|
@@ -77,6 +94,15 @@ class ProcessMetadataStep(BaseStep):
|
|
|
77
94
|
except Exception as e:
|
|
78
95
|
return self.create_error_result(f'Unexpected error processing metadata: {str(e)}')
|
|
79
96
|
|
|
97
|
+
finally:
|
|
98
|
+
# Clean up temporary file if it was created from base64
|
|
99
|
+
if temp_file_to_cleanup and temp_file_to_cleanup.exists():
|
|
100
|
+
try:
|
|
101
|
+
temp_file_to_cleanup.unlink()
|
|
102
|
+
context.run.log_message(f'Cleaned up temporary Excel file: {temp_file_to_cleanup}')
|
|
103
|
+
except Exception as e:
|
|
104
|
+
context.run.log_message(f'Failed to clean up temporary file {temp_file_to_cleanup}: {str(e)}')
|
|
105
|
+
|
|
80
106
|
def can_skip(self, context: UploadContext) -> bool:
|
|
81
107
|
"""Metadata step can be skipped if no metadata strategy is configured."""
|
|
82
108
|
return 'metadata' not in context.strategies
|
|
@@ -86,6 +112,72 @@ class ProcessMetadataStep(BaseStep):
|
|
|
86
112
|
# Clear any loaded metadata
|
|
87
113
|
context.metadata.clear()
|
|
88
114
|
|
|
115
|
+
def _resolve_excel_path_from_string(self, excel_path_str: str, context: UploadContext) -> tuple[Path | None, bool]:
|
|
116
|
+
"""Resolve Excel metadata path from a string path.
|
|
117
|
+
|
|
118
|
+
Note: This method supports the excel_metadata_path parameter which will be deprecated
|
|
119
|
+
in a future version. Consider using _resolve_excel_path_from_base64 instead.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
excel_path_str: File path string to the Excel metadata file
|
|
123
|
+
context: Upload context for resolving relative paths
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Tuple of (resolved_path, is_temporary_file)
|
|
127
|
+
- resolved_path: Path object pointing to the Excel file, or None if resolution failed
|
|
128
|
+
- is_temporary_file: Always False for path-based approach
|
|
129
|
+
|
|
130
|
+
Examples:
|
|
131
|
+
>>> path, is_temp = self._resolve_excel_path_from_string("/data/meta.xlsx", context)
|
|
132
|
+
"""
|
|
133
|
+
# TODO: Plan to deprecate this method in a few versions (backward compatibility)
|
|
134
|
+
# Try absolute path first
|
|
135
|
+
path = Path(excel_path_str)
|
|
136
|
+
if path.exists() and path.is_file():
|
|
137
|
+
return path, False
|
|
138
|
+
|
|
139
|
+
# Try relative to cwd
|
|
140
|
+
path = context.pathlib_cwd / excel_path_str
|
|
141
|
+
return (path, False) if path.exists() else (None, False)
|
|
142
|
+
|
|
143
|
+
def _resolve_excel_path_from_base64(
|
|
144
|
+
self, excel_config: dict | ExcelMetadataFile, context: UploadContext
|
|
145
|
+
) -> tuple[Path | None, bool]:
|
|
146
|
+
"""Resolve Excel metadata path from base64 encoded data.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
excel_config: Either a dict or an ExcelMetadataFile object with base64 data
|
|
150
|
+
context: Upload context for logging
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Tuple of (resolved_path, is_temporary_file)
|
|
154
|
+
- resolved_path: Path object pointing to the temporary Excel file, or None if decoding failed
|
|
155
|
+
- is_temporary_file: Always True for base64 approach (requires cleanup)
|
|
156
|
+
|
|
157
|
+
Examples:
|
|
158
|
+
>>> config = ExcelMetadataFile(data="UEsDB...", filename="meta.xlsx")
|
|
159
|
+
>>> path, is_temp = self._resolve_excel_path_from_base64(config, context)
|
|
160
|
+
"""
|
|
161
|
+
if isinstance(excel_config, dict):
|
|
162
|
+
excel_config = ExcelMetadataFile(**excel_config)
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
# Decode base64 data
|
|
166
|
+
decoded_data = base64.b64decode(excel_config.data, validate=True)
|
|
167
|
+
|
|
168
|
+
# Create temp file
|
|
169
|
+
temp_dir = Path(tempfile.gettempdir())
|
|
170
|
+
filename = excel_config.filename
|
|
171
|
+
temp_file = temp_dir / filename
|
|
172
|
+
temp_file.write_bytes(decoded_data)
|
|
173
|
+
|
|
174
|
+
context.run.log_message(f'Decoded base64 Excel metadata to temporary file: {temp_file}')
|
|
175
|
+
return temp_file, True
|
|
176
|
+
|
|
177
|
+
except Exception as e:
|
|
178
|
+
context.run.log_message(f'Failed to decode base64 Excel metadata: {str(e)}')
|
|
179
|
+
return None, False
|
|
180
|
+
|
|
89
181
|
def _find_excel_metadata_file(self, pathlib_cwd: Path) -> Path:
|
|
90
182
|
"""Find default Excel metadata file."""
|
|
91
183
|
# Check .xlsx first as it's more common
|
|
@@ -24,51 +24,128 @@ class OrganizeFilesStep(BaseStep):
|
|
|
24
24
|
return self.create_error_result('File specifications not available')
|
|
25
25
|
|
|
26
26
|
try:
|
|
27
|
-
#
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
type_dirs[spec_name] = spec_dir
|
|
34
|
-
|
|
35
|
-
if type_dirs:
|
|
36
|
-
context.run.log_message_with_code(LogCode.TYPE_DIRECTORIES_FOUND, list(type_dirs.keys()))
|
|
27
|
+
# Check which mode we're in
|
|
28
|
+
use_single_path = context.get_param('use_single_path', True)
|
|
29
|
+
|
|
30
|
+
if use_single_path:
|
|
31
|
+
# Single path mode: all assets use same base path
|
|
32
|
+
return self._execute_single_path_mode(context, file_discovery_strategy)
|
|
37
33
|
else:
|
|
38
|
-
|
|
39
|
-
return self.
|
|
34
|
+
# Multi-path mode: each asset has its own path
|
|
35
|
+
return self._execute_multi_path_mode(context, file_discovery_strategy)
|
|
40
36
|
|
|
41
|
-
|
|
42
|
-
|
|
37
|
+
except Exception as e:
|
|
38
|
+
return self.create_error_result(f'File organization failed: {str(e)}')
|
|
43
39
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
40
|
+
def _execute_single_path_mode(self, context: UploadContext, file_discovery_strategy) -> StepResult:
|
|
41
|
+
"""Execute file organization in single path mode (traditional)."""
|
|
42
|
+
# Create type directories mapping
|
|
43
|
+
type_dirs = {}
|
|
44
|
+
for spec in context.file_specifications:
|
|
45
|
+
spec_name = spec['name']
|
|
46
|
+
spec_dir = context.pathlib_cwd / spec_name
|
|
47
|
+
if spec_dir.exists() and spec_dir.is_dir():
|
|
48
|
+
type_dirs[spec_name] = spec_dir
|
|
49
|
+
|
|
50
|
+
if type_dirs:
|
|
51
|
+
context.run.log_message_with_code(LogCode.TYPE_DIRECTORIES_FOUND, list(type_dirs.keys()))
|
|
52
|
+
else:
|
|
53
|
+
context.run.log_message_with_code(LogCode.NO_TYPE_DIRECTORIES)
|
|
54
|
+
return self.create_success_result(data={'organized_files': []})
|
|
55
|
+
|
|
56
|
+
context.run.log_message_with_code(LogCode.TYPE_STRUCTURE_DETECTED)
|
|
57
|
+
context.run.log_message_with_code(LogCode.FILE_ORGANIZATION_STARTED)
|
|
58
|
+
|
|
59
|
+
# Discover files in type directories
|
|
60
|
+
all_files = []
|
|
61
|
+
is_recursive = context.get_param('is_recursive', True)
|
|
62
|
+
|
|
63
|
+
for spec_name, dir_path in type_dirs.items():
|
|
64
|
+
files_in_dir = file_discovery_strategy.discover(dir_path, is_recursive)
|
|
65
|
+
all_files.extend(files_in_dir)
|
|
66
|
+
|
|
67
|
+
if not all_files:
|
|
68
|
+
context.run.log_message_with_code(LogCode.NO_FILES_FOUND_WARNING)
|
|
69
|
+
return self.create_success_result(data={'organized_files': []})
|
|
70
|
+
|
|
71
|
+
# Organize files using strategy
|
|
72
|
+
organized_files = file_discovery_strategy.organize(
|
|
73
|
+
all_files, context.file_specifications, context.metadata or {}, type_dirs
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if organized_files:
|
|
77
|
+
context.run.log_message_with_code(LogCode.FILES_DISCOVERED, len(organized_files))
|
|
78
|
+
context.add_organized_files(organized_files)
|
|
79
|
+
|
|
80
|
+
return self.create_success_result(
|
|
81
|
+
data={'organized_files': organized_files},
|
|
82
|
+
rollback_data={'files_count': len(organized_files), 'type_dirs': list(type_dirs.keys())},
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def _execute_multi_path_mode(self, context: UploadContext, file_discovery_strategy) -> StepResult:
|
|
86
|
+
"""Execute file organization in multi-path mode (each asset has own path)."""
|
|
87
|
+
from synapse_sdk.utils.storage import get_pathlib
|
|
88
|
+
|
|
89
|
+
assets = context.get_param('assets', {})
|
|
90
|
+
if not assets:
|
|
91
|
+
return self.create_error_result('Multi-path mode requires assets configuration')
|
|
92
|
+
|
|
93
|
+
context.run.log_message(f'Using multi-path mode with {len(assets)} asset configurations')
|
|
94
|
+
context.run.log_message_with_code(LogCode.FILE_ORGANIZATION_STARTED)
|
|
95
|
+
|
|
96
|
+
all_organized_files = []
|
|
97
|
+
type_dirs = {}
|
|
98
|
+
|
|
99
|
+
for spec in context.file_specifications:
|
|
100
|
+
spec_name = spec['name']
|
|
101
|
+
|
|
102
|
+
# Skip if no asset configuration for this spec
|
|
103
|
+
if spec_name not in assets:
|
|
104
|
+
context.run.log_message(f'Skipping {spec_name}: no asset path configured')
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
asset_config = assets[spec_name]
|
|
108
|
+
|
|
109
|
+
# Get the asset path from storage
|
|
110
|
+
try:
|
|
111
|
+
asset_path = get_pathlib(context.storage, asset_config.path)
|
|
112
|
+
type_dirs[spec_name] = asset_path
|
|
113
|
+
except Exception as e:
|
|
114
|
+
context.run.log_message(f'Error accessing path for {spec_name}: {str(e)}', 'WARNING')
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
if not asset_path.exists():
|
|
118
|
+
context.run.log_message(f'Path does not exist for {spec_name}: {asset_config.path}', 'WARNING')
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
# Discover files for this asset
|
|
122
|
+
is_recursive = asset_config.is_recursive
|
|
123
|
+
context.run.log_message(
|
|
124
|
+
f'Discovering files for {spec_name} at {asset_config.path} (recursive={is_recursive})'
|
|
125
|
+
)
|
|
47
126
|
|
|
48
|
-
|
|
49
|
-
files_in_dir = file_discovery_strategy.discover(dir_path, is_recursive)
|
|
50
|
-
all_files.extend(files_in_dir)
|
|
127
|
+
files = file_discovery_strategy.discover(asset_path, is_recursive)
|
|
51
128
|
|
|
52
|
-
if not
|
|
53
|
-
context.run.
|
|
54
|
-
|
|
129
|
+
if not files:
|
|
130
|
+
context.run.log_message(f'No files found for {spec_name}', 'WARNING')
|
|
131
|
+
continue
|
|
55
132
|
|
|
56
|
-
# Organize files
|
|
57
|
-
|
|
58
|
-
all_files, context.file_specifications, context.metadata or {}, type_dirs
|
|
59
|
-
)
|
|
133
|
+
# Organize files for this specific spec
|
|
134
|
+
organized = file_discovery_strategy.organize(files, [spec], context.metadata or {}, {spec_name: asset_path})
|
|
60
135
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
context.add_organized_files(organized_files)
|
|
136
|
+
all_organized_files.extend(organized)
|
|
137
|
+
context.run.log_message(f'Found {len(organized)} files for {spec_name}')
|
|
64
138
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
139
|
+
if all_organized_files:
|
|
140
|
+
context.run.log_message_with_code(LogCode.FILES_DISCOVERED, len(all_organized_files))
|
|
141
|
+
context.add_organized_files(all_organized_files)
|
|
142
|
+
else:
|
|
143
|
+
context.run.log_message_with_code(LogCode.NO_FILES_FOUND_WARNING)
|
|
69
144
|
|
|
70
|
-
|
|
71
|
-
|
|
145
|
+
return self.create_success_result(
|
|
146
|
+
data={'organized_files': all_organized_files},
|
|
147
|
+
rollback_data={'files_count': len(all_organized_files), 'type_dirs': list(type_dirs.keys())},
|
|
148
|
+
)
|
|
72
149
|
|
|
73
150
|
def can_skip(self, context: UploadContext) -> bool:
|
|
74
151
|
"""File organization cannot be skipped."""
|
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
# Upload Plugin
|
|
2
|
+
|
|
3
|
+
The Upload Plugin provides comprehensive file and data upload functionality with support for various storage backends, flexible asset path configuration, and Excel metadata integration.
|
|
4
|
+
|
|
5
|
+
## Quick Start Usage
|
|
6
|
+
|
|
7
|
+
### CLI Usage Examples
|
|
8
|
+
|
|
9
|
+
#### Standard Upload (Single Directory)
|
|
10
|
+
```bash
|
|
11
|
+
synapse plugin run upload '{
|
|
12
|
+
"name": "Dataset Upload",
|
|
13
|
+
"storage": 1,
|
|
14
|
+
"collection": 2,
|
|
15
|
+
"use_single_path": false,
|
|
16
|
+
"assets": {
|
|
17
|
+
"path": "/data/dataset",
|
|
18
|
+
"recursive": true
|
|
19
|
+
}
|
|
20
|
+
}'
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
#### Multi-Path Upload (Different Locations)
|
|
24
|
+
```bash
|
|
25
|
+
synapse plugin run upload '{
|
|
26
|
+
"name": "Complex Dataset Upload",
|
|
27
|
+
"storage": 1,
|
|
28
|
+
"collection": 2,
|
|
29
|
+
"use_single_path": true,
|
|
30
|
+
"assets": {
|
|
31
|
+
"images": {"path": "/images", "recursive": true},
|
|
32
|
+
"pointclouds": {"path": "/pcd", "recursive": false},
|
|
33
|
+
"annotations": {"path": "/labels", "recursive": true}
|
|
34
|
+
},
|
|
35
|
+
"excel_metadata_path": "/metadata/dataset_info.xlsx"
|
|
36
|
+
}' --debug
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Common Use Cases
|
|
40
|
+
|
|
41
|
+
#### 1. Simple Dataset Upload
|
|
42
|
+
```json
|
|
43
|
+
{
|
|
44
|
+
"name": "Training Dataset",
|
|
45
|
+
"storage": 1,
|
|
46
|
+
"collection": 2,
|
|
47
|
+
"assets": {
|
|
48
|
+
"path": "/datasets/training",
|
|
49
|
+
"recursive": true
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
#### 2. Multi-Source Dataset Upload
|
|
55
|
+
```json
|
|
56
|
+
{
|
|
57
|
+
"name": "Multi-Camera Dataset",
|
|
58
|
+
"storage": 1,
|
|
59
|
+
"collection": 2,
|
|
60
|
+
"use_single_path": true,
|
|
61
|
+
"assets": {
|
|
62
|
+
"front_camera": {"path": "/cameras/front", "recursive": true},
|
|
63
|
+
"rear_camera": {"path": "/cameras/rear", "recursive": true},
|
|
64
|
+
"lidar": {"path": "/sensors/lidar", "recursive": false}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
#### 3. Dataset with Metadata
|
|
70
|
+
```json
|
|
71
|
+
{
|
|
72
|
+
"name": "Annotated Dataset",
|
|
73
|
+
"storage": 1,
|
|
74
|
+
"collection": 2,
|
|
75
|
+
"assets": {
|
|
76
|
+
"path": "/data/annotated",
|
|
77
|
+
"recursive": true
|
|
78
|
+
},
|
|
79
|
+
"excel_metadata_path": "/data/metadata.xlsx"
|
|
80
|
+
}
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Configuration Parameters
|
|
84
|
+
|
|
85
|
+
### Required Parameters
|
|
86
|
+
|
|
87
|
+
| Parameter | Type | Description | Example |
|
|
88
|
+
|-----------|------|-------------|---------|
|
|
89
|
+
| `name` | string | Display name for the upload | `"My Dataset"` |
|
|
90
|
+
| `storage` | integer | Storage backend ID | `1` |
|
|
91
|
+
| `collection` | integer | Collection ID defining file specs | `2` |
|
|
92
|
+
| `assets` | object | Path configuration (varies by mode) | See examples below |
|
|
93
|
+
|
|
94
|
+
### Optional Parameters
|
|
95
|
+
|
|
96
|
+
| Parameter | Type | Default | Description |
|
|
97
|
+
|-----------|------|---------|-------------|
|
|
98
|
+
| `description` | string | `null` | Upload description |
|
|
99
|
+
| `project` | integer | `null` | Project ID to associate |
|
|
100
|
+
| `use_single_path` | boolean | `false` | Enable individual path mode |
|
|
101
|
+
| `is_recursive` | boolean | `false` | Global recursive setting |
|
|
102
|
+
| `excel_metadata_path` | `string` | `null` | File path to Excel metadata file (will be deprecated in future, consider using `excel_metadata`) |
|
|
103
|
+
| `excel_metadata` | `object` | `null` | Base64 encoded Excel metadata (recommended) |
|
|
104
|
+
|
|
105
|
+
## Excel Metadata Support
|
|
106
|
+
|
|
107
|
+
The upload plugin provides advanced Excel metadata processing with flexible header support, comprehensive filename matching, and two distinct input methods.
|
|
108
|
+
|
|
109
|
+
### Input Methods
|
|
110
|
+
|
|
111
|
+
There are two separate parameters for providing Excel metadata:
|
|
112
|
+
|
|
113
|
+
#### 1. File Path Method (`excel_metadata_path`)
|
|
114
|
+
|
|
115
|
+
:::info Future Deprecation Notice
|
|
116
|
+
This parameter will be deprecated in a future version.
|
|
117
|
+
We recommend using the `excel_metadata` parameter with base64 encoding for new implementations.
|
|
118
|
+
:::
|
|
119
|
+
|
|
120
|
+
**Use case:** Traditional file-based uploads where the Excel file exists on the server's file system.
|
|
121
|
+
|
|
122
|
+
Simple string path to an Excel file:
|
|
123
|
+
|
|
124
|
+
```json
|
|
125
|
+
{
|
|
126
|
+
"excel_metadata_path": "/data/metadata.xlsx"
|
|
127
|
+
}
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
**Advantages:**
|
|
131
|
+
- Backward compatible with existing implementations
|
|
132
|
+
- Simple and straightforward
|
|
133
|
+
- Direct file system access
|
|
134
|
+
|
|
135
|
+
**Note:** Consider migrating to base64 encoded method for future compatibility (see Method 2 below)
|
|
136
|
+
|
|
137
|
+
#### 2. Base64 Encoded Method (`excel_metadata`) - **RECOMMENDED**
|
|
138
|
+
|
|
139
|
+
**Use case:** Web frontends, APIs, and cloud integrations where files are transmitted as encoded data.
|
|
140
|
+
|
|
141
|
+
Send Excel file as base64-encoded data with original filename:
|
|
142
|
+
|
|
143
|
+
```json
|
|
144
|
+
{
|
|
145
|
+
"excel_metadata": {
|
|
146
|
+
"data": "UEsDBBQABgAIAAAAIQDd4Z...",
|
|
147
|
+
"filename": "metadata.xlsx"
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**Advantages:**
|
|
153
|
+
- No intermediate file storage required
|
|
154
|
+
- Perfect for web upload forms
|
|
155
|
+
- API-friendly JSON payload
|
|
156
|
+
- Automatic temporary file cleanup
|
|
157
|
+
- **This is the recommended method going forward**
|
|
158
|
+
|
|
159
|
+
**Important:** You cannot use both `excel_metadata_path` and `excel_metadata` at the same time
|
|
160
|
+
|
|
161
|
+
**Future-Proof Migration Example:**
|
|
162
|
+
```python
|
|
163
|
+
import base64
|
|
164
|
+
|
|
165
|
+
# Current approach (will be deprecated in future)
|
|
166
|
+
params = {
|
|
167
|
+
"excel_metadata_path": "/data/metadata.xlsx"
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
# Recommended approach for new implementations
|
|
171
|
+
with open("/data/metadata.xlsx", "rb") as f:
|
|
172
|
+
encoded = base64.b64encode(f.read()).decode("utf-8")
|
|
173
|
+
params = {
|
|
174
|
+
"excel_metadata": {
|
|
175
|
+
"data": encoded,
|
|
176
|
+
"filename": "metadata.xlsx"
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Excel Format Example
|
|
182
|
+
| filename | category | quality | notes |
|
|
183
|
+
|----------|----------|---------|-------|
|
|
184
|
+
| sample001 | vehicle | high | Clear visibility |
|
|
185
|
+
| sample002 | pedestrian | medium | Partial occlusion |
|
|
186
|
+
|
|
187
|
+
### Security Limits
|
|
188
|
+
- Max file size: 10MB
|
|
189
|
+
- Max rows: 10,000
|
|
190
|
+
- Max columns: 50
|
|
191
|
+
|
|
192
|
+
## File Matching Logic
|
|
193
|
+
|
|
194
|
+
Files are matched by **stem name** (filename without extension):
|
|
195
|
+
- `sample001.jpg` → stem: "sample001"
|
|
196
|
+
- `sample001.pcd` → stem: "sample001"
|
|
197
|
+
- `sample001.json` → stem: "sample001"
|
|
198
|
+
|
|
199
|
+
These files form a single dataset named "sample001".
|
|
200
|
+
|
|
201
|
+
## Troubleshooting Guide
|
|
202
|
+
|
|
203
|
+
### Common Issues
|
|
204
|
+
|
|
205
|
+
#### "No Files Found" Error
|
|
206
|
+
```bash
|
|
207
|
+
# Check path exists and is readable
|
|
208
|
+
ls -la /path/to/data
|
|
209
|
+
test -r /path/to/data && echo "Readable" || echo "Not readable"
|
|
210
|
+
|
|
211
|
+
# Verify files exist
|
|
212
|
+
find /path/to/data -name "*.jpg" | head -10
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
#### Excel Processing Errors
|
|
216
|
+
```bash
|
|
217
|
+
# Check file format and size
|
|
218
|
+
file /path/to/metadata.xlsx
|
|
219
|
+
ls -lh /path/to/metadata.xlsx
|
|
220
|
+
|
|
221
|
+
# Validate Excel content
|
|
222
|
+
python -c "
|
|
223
|
+
from openpyxl import load_workbook
|
|
224
|
+
wb = load_workbook('/path/to/metadata.xlsx')
|
|
225
|
+
print(f'Sheets: {wb.sheetnames}')
|
|
226
|
+
print(f'Rows: {wb.active.max_row}')
|
|
227
|
+
"
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
#### Upload Failures
|
|
231
|
+
```bash
|
|
232
|
+
# Test storage connection
|
|
233
|
+
synapse storage test --storage-id 1
|
|
234
|
+
|
|
235
|
+
# Verify collection configuration
|
|
236
|
+
synapse collection show --id 2
|
|
237
|
+
|
|
238
|
+
# Run with debug mode
|
|
239
|
+
synapse plugin run upload '{}' --debug
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## Best Practices
|
|
243
|
+
|
|
244
|
+
### Directory Organization
|
|
245
|
+
- Use clear, descriptive directory names
|
|
246
|
+
- Keep reasonable directory sizes (< 10,000 files)
|
|
247
|
+
- Use absolute paths for reliability
|
|
248
|
+
|
|
249
|
+
### Performance Optimization
|
|
250
|
+
- Enable recursive only when needed
|
|
251
|
+
- Keep Excel files under 5MB
|
|
252
|
+
- Organize files in balanced directory structures
|
|
253
|
+
|
|
254
|
+
### Security Considerations
|
|
255
|
+
- Validate all paths before processing
|
|
256
|
+
- Use read-only permissions for source data
|
|
257
|
+
- Set appropriate Excel size limits
|
|
258
|
+
|
|
259
|
+
## Advanced Features
|
|
260
|
+
|
|
261
|
+
### Batch Processing
|
|
262
|
+
The plugin automatically optimizes batch sizes based on dataset size:
|
|
263
|
+
- Small datasets (< 50 files): batch size 50
|
|
264
|
+
- Large datasets: dynamic batch size (10-100)
|
|
265
|
+
|
|
266
|
+
### Progress Tracking
|
|
267
|
+
Real-time progress updates with categories:
|
|
268
|
+
- Collection analysis: 2%
|
|
269
|
+
- File upload: 38%
|
|
270
|
+
- Data unit generation: 60%
|
|
271
|
+
|
|
272
|
+
### Error Handling
|
|
273
|
+
Comprehensive validation at multiple levels:
|
|
274
|
+
- Parameter validation (Pydantic)
|
|
275
|
+
- Runtime path validation
|
|
276
|
+
- File format validation
|
|
277
|
+
- Excel security checks
|
|
278
|
+
|
|
279
|
+
## Environment Variables
|
|
280
|
+
|
|
281
|
+
Configure Excel processing limits:
|
|
282
|
+
```bash
|
|
283
|
+
# File size limits
|
|
284
|
+
EXCEL_MAX_FILE_SIZE_MB=10
|
|
285
|
+
EXCEL_MAX_MEMORY_MB=30
|
|
286
|
+
|
|
287
|
+
# Content limits
|
|
288
|
+
EXCEL_MAX_ROWS=10000
|
|
289
|
+
EXCEL_MAX_COLUMNS=50
|
|
290
|
+
|
|
291
|
+
# String length limits
|
|
292
|
+
EXCEL_MAX_FILENAME_LENGTH=255
|
|
293
|
+
EXCEL_MAX_METADATA_VALUE_LENGTH=1000
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
## Migration Guide
|
|
297
|
+
|
|
298
|
+
### Upgrading from Previous Versions
|
|
299
|
+
All existing configurations continue to work. New features are additive:
|
|
300
|
+
|
|
301
|
+
#### Test Current Configuration
|
|
302
|
+
```bash
|
|
303
|
+
synapse plugin run upload '{}' --debug
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
#### Convert to Explicit Mode
|
|
307
|
+
```python
|
|
308
|
+
# Add explicit mode setting
|
|
309
|
+
config["use_single_path"] = False # or True for single path mode
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
#### Gradual Migration to Single Path Mode
|
|
313
|
+
```python
|
|
314
|
+
# Start with subset
|
|
315
|
+
test_config = {
|
|
316
|
+
"use_single_path": True,
|
|
317
|
+
"assets": {
|
|
318
|
+
"test_images": {"path": "/existing/path/images", "recursive": True}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
# Then migrate all assets
|
|
323
|
+
production_config = {
|
|
324
|
+
"use_single_path": True,
|
|
325
|
+
"assets": {
|
|
326
|
+
"images": {"path": "/optimized/path1", "recursive": True},
|
|
327
|
+
"annotations": {"path": "/optimized/path2", "recursive": False}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
## Storage Backend Support
|
|
333
|
+
|
|
334
|
+
The plugin supports multiple storage backends:
|
|
335
|
+
- **Local filesystem**: Optimized for high I/O
|
|
336
|
+
- **S3/GCS**: Cloud storage with retry logic
|
|
337
|
+
- **SFTP**: Connection pooling for remote servers
|
|
338
|
+
- **HTTP**: Streaming uploads for large files
|
|
339
|
+
|
|
340
|
+
## API Reference
|
|
341
|
+
|
|
342
|
+
### Plugin Class
|
|
343
|
+
```python
|
|
344
|
+
from synapse import Plugin
|
|
345
|
+
|
|
346
|
+
plugin = Plugin("upload")
|
|
347
|
+
result = plugin.run(config, debug=True)
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
### Result Structure
|
|
351
|
+
```python
|
|
352
|
+
{
|
|
353
|
+
"status": "success",
|
|
354
|
+
"uploaded_files": 150,
|
|
355
|
+
"data_units_created": 50,
|
|
356
|
+
"errors": [],
|
|
357
|
+
"metadata": {}
|
|
358
|
+
}
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
## Support and Resources
|
|
362
|
+
|
|
363
|
+
- **Documentation**: Full API documentation at [synapse-docs]
|
|
364
|
+
- **Issues**: Report bugs at [issue-tracker]
|
|
365
|
+
- **Examples**: More examples at [examples-repo]
|