synapse-sdk 2025.10.1__py3-none-any.whl → 2025.10.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-overview.md +560 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
- synapse_sdk/devtools/docs/docs/plugins/plugins.md +12 -5
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-overview.md +560 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current.json +16 -4
- synapse_sdk/devtools/docs/sidebars.ts +13 -1
- synapse_sdk/plugins/README.md +487 -80
- synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +145 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +97 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +250 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +284 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +87 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +127 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
- synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +2 -1
- synapse_sdk/plugins/categories/upload/actions/upload/models.py +134 -94
- synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +2 -2
- synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +106 -14
- synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +113 -36
- synapse_sdk/plugins/categories/upload/templates/README.md +365 -0
- {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.3.dist-info}/METADATA +1 -1
- {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.3.dist-info}/RECORD +40 -20
- synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +0 -1463
- synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +0 -1964
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +0 -1463
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +0 -2077
- {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.3.dist-info}/WHEEL +0 -0
- {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.3.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.3.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.3.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,53 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
1
|
from typing import Annotated
|
|
3
2
|
|
|
4
|
-
from pydantic import AfterValidator, BaseModel, ValidationInfo, field_validator
|
|
3
|
+
from pydantic import AfterValidator, BaseModel, ValidationInfo, field_validator, model_validator
|
|
5
4
|
from pydantic_core import PydanticCustomError
|
|
6
5
|
|
|
7
6
|
from synapse_sdk.clients.exceptions import ClientError
|
|
8
7
|
from synapse_sdk.utils.pydantic.validators import non_blank
|
|
9
|
-
from synapse_sdk.utils.storage import get_pathlib
|
|
10
8
|
|
|
11
|
-
|
|
9
|
+
|
|
10
|
+
class ExcelMetadataFile(BaseModel):
|
|
11
|
+
"""Excel metadata configuration for base64 encoded data.
|
|
12
|
+
|
|
13
|
+
This model is used specifically for base64-encoded Excel metadata files,
|
|
14
|
+
typically from web frontends or API integrations.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
data: Base64 encoded content of the Excel file
|
|
18
|
+
filename: Name of the original file before base64 encoding
|
|
19
|
+
|
|
20
|
+
Examples:
|
|
21
|
+
Base64 mode:
|
|
22
|
+
>>> config = ExcelMetadataFile(
|
|
23
|
+
... data="UEsDBBQABgAI...",
|
|
24
|
+
... filename="metadata.xlsx"
|
|
25
|
+
... )
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
data: str
|
|
29
|
+
filename: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AssetConfig(BaseModel):
|
|
33
|
+
"""Configuration for individual asset in multi-path mode.
|
|
34
|
+
|
|
35
|
+
Used when use_single_path=False to specify unique paths
|
|
36
|
+
and recursive settings for each file specification.
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
path (str): File system path for this specific asset
|
|
40
|
+
is_recursive (bool): Whether to recursively search subdirectories for this asset
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
>>> asset_config = AssetConfig(
|
|
44
|
+
... path="/sensors/camera/front",
|
|
45
|
+
... is_recursive=True
|
|
46
|
+
... )
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
path: str
|
|
50
|
+
is_recursive: bool = True
|
|
12
51
|
|
|
13
52
|
|
|
14
53
|
class UploadParams(BaseModel):
|
|
@@ -18,45 +57,93 @@ class UploadParams(BaseModel):
|
|
|
18
57
|
Uses Pydantic for type validation and custom validators to ensure
|
|
19
58
|
storage, data_collection, and project resources exist before processing.
|
|
20
59
|
|
|
60
|
+
Supports two modes controlled by use_single_path flag:
|
|
61
|
+
|
|
62
|
+
1. Single Path Mode (use_single_path=True, DEFAULT):
|
|
63
|
+
Traditional mode - all file specifications share one base path.
|
|
64
|
+
Requires: path, is_recursive
|
|
65
|
+
Ignores: assets
|
|
66
|
+
|
|
67
|
+
2. Multi-Path Mode (use_single_path=False):
|
|
68
|
+
Advanced mode - each file specification has its own path.
|
|
69
|
+
Requires: assets (dict with file spec names as keys)
|
|
70
|
+
Ignores: path, is_recursive
|
|
71
|
+
|
|
21
72
|
Attributes:
|
|
22
73
|
name (str): Human-readable name for the upload operation
|
|
23
74
|
description (str | None): Optional description of the upload
|
|
24
|
-
|
|
75
|
+
use_single_path (bool): Mode selector (True=single path, False=multi-path)
|
|
76
|
+
path (str | None): Base path for single path mode
|
|
77
|
+
is_recursive (bool): Global recursive setting for single path mode
|
|
78
|
+
assets (dict[str, AssetConfig] | None): Per-asset configs for multi-path mode
|
|
25
79
|
storage (int): Storage ID where files will be uploaded
|
|
26
|
-
data_collection (int): Data
|
|
80
|
+
data_collection (int): Data collection ID for organizing uploads
|
|
27
81
|
project (int | None): Optional project ID for grouping
|
|
28
|
-
excel_metadata_path (str | None): Path to Excel metadata file
|
|
29
|
-
|
|
82
|
+
excel_metadata_path (str | None): Path to Excel metadata file (traditional, backward compatible)
|
|
83
|
+
Note: This parameter will be deprecated in a future version. Consider using excel_metadata instead.
|
|
84
|
+
excel_metadata (ExcelMetadataFile | None): Base64 encoded Excel metadata (for web/API integration)
|
|
85
|
+
Note: Cannot use both excel_metadata_path and excel_metadata simultaneously
|
|
30
86
|
max_file_size_mb (int): Maximum file size limit in megabytes
|
|
31
87
|
creating_data_unit_batch_size (int): Batch size for data unit creation
|
|
32
88
|
use_async_upload (bool): Whether to use asynchronous upload processing
|
|
33
|
-
extra_params (dict | None): Extra parameters for the action
|
|
34
|
-
Example: {"include_metadata": True, "compression": "gzip"}
|
|
89
|
+
extra_params (dict | None): Extra parameters for the action
|
|
35
90
|
|
|
36
91
|
Validation:
|
|
37
92
|
- name: Must be non-blank after validation
|
|
38
93
|
- storage: Must exist and be accessible via client API
|
|
39
94
|
- data_collection: Must exist and be accessible via client API
|
|
40
95
|
- project: Must exist if specified, or can be None
|
|
41
|
-
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
96
|
+
- use_single_path mode: Validates required fields per mode
|
|
97
|
+
|
|
98
|
+
Examples:
|
|
99
|
+
Single Path Mode (Traditional):
|
|
100
|
+
>>> params = UploadParams(
|
|
101
|
+
... name="Standard Upload",
|
|
102
|
+
... use_single_path=True,
|
|
103
|
+
... path="/data/experiment_1",
|
|
104
|
+
... is_recursive=True,
|
|
105
|
+
... storage=1,
|
|
106
|
+
... data_collection=5
|
|
107
|
+
... )
|
|
108
|
+
|
|
109
|
+
Multi-Path Mode (Advanced):
|
|
110
|
+
>>> params = UploadParams(
|
|
111
|
+
... name="Multi-Source Upload",
|
|
112
|
+
... use_single_path=False,
|
|
113
|
+
... assets={
|
|
114
|
+
... "image_1": AssetConfig(path="/sensors/camera", is_recursive=True),
|
|
115
|
+
... "pcd_1": AssetConfig(path="/sensors/lidar", is_recursive=False)
|
|
116
|
+
... },
|
|
117
|
+
... storage=1,
|
|
118
|
+
... data_collection=5
|
|
119
|
+
... )
|
|
50
120
|
"""
|
|
51
121
|
|
|
52
122
|
name: Annotated[str, AfterValidator(non_blank)]
|
|
53
123
|
description: str | None = None
|
|
54
|
-
|
|
124
|
+
|
|
125
|
+
# Mode selector flag (True = single path mode, False = multi-path mode)
|
|
126
|
+
use_single_path: bool = True
|
|
127
|
+
|
|
128
|
+
# Single path mode fields (used when use_single_path=True)
|
|
129
|
+
path: str | None = None
|
|
130
|
+
is_recursive: bool = True
|
|
131
|
+
|
|
132
|
+
# Multi-path mode fields (used when use_single_path=False)
|
|
133
|
+
assets: dict[str, AssetConfig] | None = None
|
|
134
|
+
|
|
55
135
|
storage: int
|
|
56
136
|
data_collection: int
|
|
57
137
|
project: int | None = None
|
|
138
|
+
|
|
139
|
+
# Excel metadata - two separate parameters for clarity:
|
|
140
|
+
# 1. excel_metadata_path: Simple file path string (backward compatible, traditional usage)
|
|
141
|
+
# NOTE: Will be deprecated in a future version. Consider using excel_metadata instead.
|
|
142
|
+
# 2. excel_metadata: Dictionary with base64 encoded data (new, for web/API integration)
|
|
143
|
+
# TODO: Plan to deprecate excel_metadata_path in a few versions for backward compatibility
|
|
58
144
|
excel_metadata_path: str | None = None
|
|
59
|
-
|
|
145
|
+
excel_metadata: ExcelMetadataFile | None = None
|
|
146
|
+
|
|
60
147
|
max_file_size_mb: int = 50
|
|
61
148
|
creating_data_unit_batch_size: int = 1
|
|
62
149
|
use_async_upload: bool = True
|
|
@@ -107,80 +194,33 @@ class UploadParams(BaseModel):
|
|
|
107
194
|
raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
|
|
108
195
|
return value
|
|
109
196
|
|
|
110
|
-
@
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
if
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
# Validate file extension
|
|
117
|
-
if not value.lower().endswith(('.xlsx', '.xls')):
|
|
118
|
-
raise PydanticCustomError('invalid_file_type', 'Excel metadata file must be .xlsx or .xls format.')
|
|
119
|
-
|
|
120
|
-
# Get storage and path from validation data
|
|
121
|
-
if not (hasattr(info, 'data') and 'storage' in info.data and 'path' in info.data):
|
|
122
|
-
# If we don't have storage/path data yet, just validate extension
|
|
123
|
-
return value
|
|
124
|
-
|
|
125
|
-
if info.context is None:
|
|
126
|
-
raise PydanticCustomError('missing_context', 'Validation context is required.')
|
|
127
|
-
|
|
128
|
-
action = info.context['action']
|
|
129
|
-
client = action.client
|
|
130
|
-
|
|
131
|
-
try:
|
|
132
|
-
# Get storage configuration
|
|
133
|
-
storage_id = info.data['storage']
|
|
134
|
-
storage = client.get_storage(storage_id)
|
|
135
|
-
|
|
136
|
-
# Skip file system validation if storage doesn't have provider (likely test environment)
|
|
137
|
-
if not isinstance(storage, dict) or 'provider' not in storage:
|
|
138
|
-
# Basic validation only - likely in test environment
|
|
139
|
-
return value
|
|
140
|
-
|
|
141
|
-
# Get the actual file system path using storage + path
|
|
142
|
-
base_path = get_pathlib(storage, info.data['path'])
|
|
143
|
-
|
|
144
|
-
# Support both absolute and relative paths
|
|
145
|
-
if Path(value).is_absolute():
|
|
146
|
-
excel_path = Path(value)
|
|
147
|
-
else:
|
|
148
|
-
excel_path = base_path / value
|
|
149
|
-
|
|
150
|
-
if not excel_path.exists():
|
|
151
|
-
raise PydanticCustomError('file_not_found', 'Excel metadata file not found.')
|
|
152
|
-
|
|
153
|
-
# Validate file size
|
|
154
|
-
file_size = excel_path.stat().st_size
|
|
155
|
-
excel_config = ExcelSecurityConfig()
|
|
156
|
-
if file_size > excel_config.MAX_FILE_SIZE_BYTES:
|
|
157
|
-
max_size_mb = excel_config.MAX_FILE_SIZE_MB
|
|
197
|
+
@model_validator(mode='after')
|
|
198
|
+
def validate_path_configuration(self) -> 'UploadParams':
|
|
199
|
+
"""Validate path configuration based on use_single_path mode."""
|
|
200
|
+
if self.use_single_path:
|
|
201
|
+
# Single path mode: requires path
|
|
202
|
+
if not self.path:
|
|
158
203
|
raise PydanticCustomError(
|
|
159
|
-
'
|
|
160
|
-
'Excel metadata file is too large. Maximum size is {max_size_mb}MB.',
|
|
161
|
-
{'max_size_mb': max_size_mb},
|
|
204
|
+
'missing_path', "When use_single_path=true (single path mode), 'path' is required"
|
|
162
205
|
)
|
|
206
|
+
# Warn if assets is provided in single path mode (it will be ignored)
|
|
207
|
+
# For now, we'll silently ignore it
|
|
208
|
+
else:
|
|
209
|
+
# Multi-path mode: requires assets
|
|
210
|
+
if not self.assets:
|
|
211
|
+
raise PydanticCustomError(
|
|
212
|
+
'missing_assets',
|
|
213
|
+
"When use_single_path=false (multi-path mode), 'assets' must be provided "
|
|
214
|
+
'with path configurations for each file specification',
|
|
215
|
+
)
|
|
216
|
+
# path and is_recursive are ignored in multi-path mode
|
|
163
217
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
if excel_path.suffix.lower() == '.xlsx':
|
|
172
|
-
if not header.startswith(b'PK'):
|
|
173
|
-
raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
|
|
174
|
-
elif excel_path.suffix.lower() == '.xls':
|
|
175
|
-
if not (header.startswith(b'\xd0\xcf\x11\xe0') or header.startswith(b'\x09\x08')):
|
|
176
|
-
raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
|
|
177
|
-
|
|
178
|
-
except (OSError, IOError):
|
|
179
|
-
raise PydanticCustomError('file_access_error', 'Cannot access Excel metadata file.')
|
|
180
|
-
|
|
181
|
-
except ClientError:
|
|
182
|
-
raise PydanticCustomError('client_error', 'Error occurred while checking storage.')
|
|
183
|
-
except Exception as e:
|
|
184
|
-
raise PydanticCustomError('validation_error', f'Error validating Excel metadata file: {str(e)}')
|
|
218
|
+
# Validate excel metadata parameters - cannot use both at the same time
|
|
219
|
+
if self.excel_metadata_path and self.excel_metadata:
|
|
220
|
+
raise PydanticCustomError(
|
|
221
|
+
'conflicting_excel_metadata',
|
|
222
|
+
"Cannot specify both 'excel_metadata_path' and 'excel_metadata'. "
|
|
223
|
+
"Use 'excel_metadata_path' for file paths or 'excel_metadata' for base64 encoded data.",
|
|
224
|
+
)
|
|
185
225
|
|
|
186
|
-
return
|
|
226
|
+
return self
|
|
@@ -21,8 +21,8 @@ class CleanupStep(BaseStep):
|
|
|
21
21
|
def execute(self, context: UploadContext) -> StepResult:
|
|
22
22
|
"""Execute cleanup step."""
|
|
23
23
|
try:
|
|
24
|
-
# Cleanup temporary directory
|
|
25
|
-
self._cleanup_temp_directory(context)
|
|
24
|
+
# Cleanup temporary directory - commented out because duplicated process with ray cleanup process
|
|
25
|
+
# self._cleanup_temp_directory(context)
|
|
26
26
|
|
|
27
27
|
# Log completion
|
|
28
28
|
context.run.log_message_with_code(LogCode.IMPORT_COMPLETED)
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import tempfile
|
|
1
3
|
from pathlib import Path
|
|
2
4
|
|
|
3
5
|
from ..context import StepResult, UploadContext
|
|
4
6
|
from ..enums import LogCode
|
|
5
7
|
from ..exceptions import ExcelParsingError, ExcelSecurityError
|
|
8
|
+
from ..models import ExcelMetadataFile
|
|
6
9
|
from .base import BaseStep
|
|
7
10
|
|
|
8
11
|
|
|
@@ -25,22 +28,36 @@ class ProcessMetadataStep(BaseStep):
|
|
|
25
28
|
return self.create_success_result(data={'metadata': {}})
|
|
26
29
|
|
|
27
30
|
excel_metadata = {}
|
|
31
|
+
temp_file_to_cleanup = None
|
|
28
32
|
|
|
29
33
|
try:
|
|
30
|
-
# Check if Excel metadata
|
|
31
|
-
excel_metadata_path
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
excel_path = context.pathlib_cwd / excel_metadata_path
|
|
41
|
-
if not excel_path.exists():
|
|
34
|
+
# Check if Excel metadata is specified - try both parameters
|
|
35
|
+
# TODO: Plan to deprecate excel_metadata_path in a few versions (backward compatibility)
|
|
36
|
+
excel_metadata_path_config = context.get_param('excel_metadata_path')
|
|
37
|
+
excel_metadata_config = context.get_param('excel_metadata')
|
|
38
|
+
|
|
39
|
+
if excel_metadata_path_config:
|
|
40
|
+
# Traditional path-based approach (will be deprecated in future)
|
|
41
|
+
excel_path, is_temp = self._resolve_excel_path_from_string(excel_metadata_path_config, context)
|
|
42
|
+
|
|
43
|
+
if not excel_path or not excel_path.exists():
|
|
42
44
|
context.run.log_message_with_code(LogCode.EXCEL_FILE_NOT_FOUND_PATH)
|
|
43
45
|
return self.create_success_result(data={'metadata': {}})
|
|
46
|
+
|
|
47
|
+
excel_metadata = metadata_strategy.extract(excel_path)
|
|
48
|
+
|
|
49
|
+
elif excel_metadata_config:
|
|
50
|
+
# Base64 encoded approach
|
|
51
|
+
excel_path, is_temp = self._resolve_excel_path_from_base64(excel_metadata_config, context)
|
|
52
|
+
|
|
53
|
+
if not excel_path or not excel_path.exists():
|
|
54
|
+
context.run.log_message_with_code(LogCode.EXCEL_FILE_NOT_FOUND_PATH)
|
|
55
|
+
return self.create_success_result(data={'metadata': {}})
|
|
56
|
+
|
|
57
|
+
# Track temp file for cleanup
|
|
58
|
+
if is_temp:
|
|
59
|
+
temp_file_to_cleanup = excel_path
|
|
60
|
+
|
|
44
61
|
excel_metadata = metadata_strategy.extract(excel_path)
|
|
45
62
|
else:
|
|
46
63
|
# Look for default metadata files (meta.xlsx, meta.xls)
|
|
@@ -65,9 +82,9 @@ class ProcessMetadataStep(BaseStep):
|
|
|
65
82
|
return self.create_error_result(f'Excel security violation: {str(e)}')
|
|
66
83
|
|
|
67
84
|
except ExcelParsingError as e:
|
|
68
|
-
# If excel_metadata_path was specified, this is an error
|
|
85
|
+
# If excel_metadata_path or excel_metadata was specified, this is an error
|
|
69
86
|
# If we were just looking for default files, it's not an error
|
|
70
|
-
if context.get_param('excel_metadata_path'):
|
|
87
|
+
if context.get_param('excel_metadata_path') or context.get_param('excel_metadata'):
|
|
71
88
|
context.run.log_message_with_code(LogCode.EXCEL_PARSING_ERROR, str(e))
|
|
72
89
|
return self.create_error_result(f'Excel parsing error: {str(e)}')
|
|
73
90
|
else:
|
|
@@ -77,6 +94,15 @@ class ProcessMetadataStep(BaseStep):
|
|
|
77
94
|
except Exception as e:
|
|
78
95
|
return self.create_error_result(f'Unexpected error processing metadata: {str(e)}')
|
|
79
96
|
|
|
97
|
+
finally:
|
|
98
|
+
# Clean up temporary file if it was created from base64
|
|
99
|
+
if temp_file_to_cleanup and temp_file_to_cleanup.exists():
|
|
100
|
+
try:
|
|
101
|
+
temp_file_to_cleanup.unlink()
|
|
102
|
+
context.run.log_message(f'Cleaned up temporary Excel file: {temp_file_to_cleanup}')
|
|
103
|
+
except Exception as e:
|
|
104
|
+
context.run.log_message(f'Failed to clean up temporary file {temp_file_to_cleanup}: {str(e)}')
|
|
105
|
+
|
|
80
106
|
def can_skip(self, context: UploadContext) -> bool:
|
|
81
107
|
"""Metadata step can be skipped if no metadata strategy is configured."""
|
|
82
108
|
return 'metadata' not in context.strategies
|
|
@@ -86,6 +112,72 @@ class ProcessMetadataStep(BaseStep):
|
|
|
86
112
|
# Clear any loaded metadata
|
|
87
113
|
context.metadata.clear()
|
|
88
114
|
|
|
115
|
+
def _resolve_excel_path_from_string(self, excel_path_str: str, context: UploadContext) -> tuple[Path | None, bool]:
|
|
116
|
+
"""Resolve Excel metadata path from a string path.
|
|
117
|
+
|
|
118
|
+
Note: This method supports the excel_metadata_path parameter which will be deprecated
|
|
119
|
+
in a future version. Consider using _resolve_excel_path_from_base64 instead.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
excel_path_str: File path string to the Excel metadata file
|
|
123
|
+
context: Upload context for resolving relative paths
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Tuple of (resolved_path, is_temporary_file)
|
|
127
|
+
- resolved_path: Path object pointing to the Excel file, or None if resolution failed
|
|
128
|
+
- is_temporary_file: Always False for path-based approach
|
|
129
|
+
|
|
130
|
+
Examples:
|
|
131
|
+
>>> path, is_temp = self._resolve_excel_path_from_string("/data/meta.xlsx", context)
|
|
132
|
+
"""
|
|
133
|
+
# TODO: Plan to deprecate this method in a few versions (backward compatibility)
|
|
134
|
+
# Try absolute path first
|
|
135
|
+
path = Path(excel_path_str)
|
|
136
|
+
if path.exists() and path.is_file():
|
|
137
|
+
return path, False
|
|
138
|
+
|
|
139
|
+
# Try relative to cwd
|
|
140
|
+
path = context.pathlib_cwd / excel_path_str
|
|
141
|
+
return (path, False) if path.exists() else (None, False)
|
|
142
|
+
|
|
143
|
+
def _resolve_excel_path_from_base64(
|
|
144
|
+
self, excel_config: dict | ExcelMetadataFile, context: UploadContext
|
|
145
|
+
) -> tuple[Path | None, bool]:
|
|
146
|
+
"""Resolve Excel metadata path from base64 encoded data.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
excel_config: Either a dict or an ExcelMetadataFile object with base64 data
|
|
150
|
+
context: Upload context for logging
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Tuple of (resolved_path, is_temporary_file)
|
|
154
|
+
- resolved_path: Path object pointing to the temporary Excel file, or None if decoding failed
|
|
155
|
+
- is_temporary_file: Always True for base64 approach (requires cleanup)
|
|
156
|
+
|
|
157
|
+
Examples:
|
|
158
|
+
>>> config = ExcelMetadataFile(data="UEsDB...", filename="meta.xlsx")
|
|
159
|
+
>>> path, is_temp = self._resolve_excel_path_from_base64(config, context)
|
|
160
|
+
"""
|
|
161
|
+
if isinstance(excel_config, dict):
|
|
162
|
+
excel_config = ExcelMetadataFile(**excel_config)
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
# Decode base64 data
|
|
166
|
+
decoded_data = base64.b64decode(excel_config.data, validate=True)
|
|
167
|
+
|
|
168
|
+
# Create temp file
|
|
169
|
+
temp_dir = Path(tempfile.gettempdir())
|
|
170
|
+
filename = excel_config.filename
|
|
171
|
+
temp_file = temp_dir / filename
|
|
172
|
+
temp_file.write_bytes(decoded_data)
|
|
173
|
+
|
|
174
|
+
context.run.log_message(f'Decoded base64 Excel metadata to temporary file: {temp_file}')
|
|
175
|
+
return temp_file, True
|
|
176
|
+
|
|
177
|
+
except Exception as e:
|
|
178
|
+
context.run.log_message(f'Failed to decode base64 Excel metadata: {str(e)}')
|
|
179
|
+
return None, False
|
|
180
|
+
|
|
89
181
|
def _find_excel_metadata_file(self, pathlib_cwd: Path) -> Path:
|
|
90
182
|
"""Find default Excel metadata file."""
|
|
91
183
|
# Check .xlsx first as it's more common
|
|
@@ -24,51 +24,128 @@ class OrganizeFilesStep(BaseStep):
|
|
|
24
24
|
return self.create_error_result('File specifications not available')
|
|
25
25
|
|
|
26
26
|
try:
|
|
27
|
-
#
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
type_dirs[spec_name] = spec_dir
|
|
34
|
-
|
|
35
|
-
if type_dirs:
|
|
36
|
-
context.run.log_message_with_code(LogCode.TYPE_DIRECTORIES_FOUND, list(type_dirs.keys()))
|
|
27
|
+
# Check which mode we're in
|
|
28
|
+
use_single_path = context.get_param('use_single_path', True)
|
|
29
|
+
|
|
30
|
+
if use_single_path:
|
|
31
|
+
# Single path mode: all assets use same base path
|
|
32
|
+
return self._execute_single_path_mode(context, file_discovery_strategy)
|
|
37
33
|
else:
|
|
38
|
-
|
|
39
|
-
return self.
|
|
34
|
+
# Multi-path mode: each asset has its own path
|
|
35
|
+
return self._execute_multi_path_mode(context, file_discovery_strategy)
|
|
40
36
|
|
|
41
|
-
|
|
42
|
-
|
|
37
|
+
except Exception as e:
|
|
38
|
+
return self.create_error_result(f'File organization failed: {str(e)}')
|
|
43
39
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
40
|
+
def _execute_single_path_mode(self, context: UploadContext, file_discovery_strategy) -> StepResult:
|
|
41
|
+
"""Execute file organization in single path mode (traditional)."""
|
|
42
|
+
# Create type directories mapping
|
|
43
|
+
type_dirs = {}
|
|
44
|
+
for spec in context.file_specifications:
|
|
45
|
+
spec_name = spec['name']
|
|
46
|
+
spec_dir = context.pathlib_cwd / spec_name
|
|
47
|
+
if spec_dir.exists() and spec_dir.is_dir():
|
|
48
|
+
type_dirs[spec_name] = spec_dir
|
|
49
|
+
|
|
50
|
+
if type_dirs:
|
|
51
|
+
context.run.log_message_with_code(LogCode.TYPE_DIRECTORIES_FOUND, list(type_dirs.keys()))
|
|
52
|
+
else:
|
|
53
|
+
context.run.log_message_with_code(LogCode.NO_TYPE_DIRECTORIES)
|
|
54
|
+
return self.create_success_result(data={'organized_files': []})
|
|
55
|
+
|
|
56
|
+
context.run.log_message_with_code(LogCode.TYPE_STRUCTURE_DETECTED)
|
|
57
|
+
context.run.log_message_with_code(LogCode.FILE_ORGANIZATION_STARTED)
|
|
58
|
+
|
|
59
|
+
# Discover files in type directories
|
|
60
|
+
all_files = []
|
|
61
|
+
is_recursive = context.get_param('is_recursive', True)
|
|
62
|
+
|
|
63
|
+
for spec_name, dir_path in type_dirs.items():
|
|
64
|
+
files_in_dir = file_discovery_strategy.discover(dir_path, is_recursive)
|
|
65
|
+
all_files.extend(files_in_dir)
|
|
66
|
+
|
|
67
|
+
if not all_files:
|
|
68
|
+
context.run.log_message_with_code(LogCode.NO_FILES_FOUND_WARNING)
|
|
69
|
+
return self.create_success_result(data={'organized_files': []})
|
|
70
|
+
|
|
71
|
+
# Organize files using strategy
|
|
72
|
+
organized_files = file_discovery_strategy.organize(
|
|
73
|
+
all_files, context.file_specifications, context.metadata or {}, type_dirs
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if organized_files:
|
|
77
|
+
context.run.log_message_with_code(LogCode.FILES_DISCOVERED, len(organized_files))
|
|
78
|
+
context.add_organized_files(organized_files)
|
|
79
|
+
|
|
80
|
+
return self.create_success_result(
|
|
81
|
+
data={'organized_files': organized_files},
|
|
82
|
+
rollback_data={'files_count': len(organized_files), 'type_dirs': list(type_dirs.keys())},
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def _execute_multi_path_mode(self, context: UploadContext, file_discovery_strategy) -> StepResult:
|
|
86
|
+
"""Execute file organization in multi-path mode (each asset has own path)."""
|
|
87
|
+
from synapse_sdk.utils.storage import get_pathlib
|
|
88
|
+
|
|
89
|
+
assets = context.get_param('assets', {})
|
|
90
|
+
if not assets:
|
|
91
|
+
return self.create_error_result('Multi-path mode requires assets configuration')
|
|
92
|
+
|
|
93
|
+
context.run.log_message(f'Using multi-path mode with {len(assets)} asset configurations')
|
|
94
|
+
context.run.log_message_with_code(LogCode.FILE_ORGANIZATION_STARTED)
|
|
95
|
+
|
|
96
|
+
all_organized_files = []
|
|
97
|
+
type_dirs = {}
|
|
98
|
+
|
|
99
|
+
for spec in context.file_specifications:
|
|
100
|
+
spec_name = spec['name']
|
|
101
|
+
|
|
102
|
+
# Skip if no asset configuration for this spec
|
|
103
|
+
if spec_name not in assets:
|
|
104
|
+
context.run.log_message(f'Skipping {spec_name}: no asset path configured')
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
asset_config = assets[spec_name]
|
|
108
|
+
|
|
109
|
+
# Get the asset path from storage
|
|
110
|
+
try:
|
|
111
|
+
asset_path = get_pathlib(context.storage, asset_config.path)
|
|
112
|
+
type_dirs[spec_name] = asset_path
|
|
113
|
+
except Exception as e:
|
|
114
|
+
context.run.log_message(f'Error accessing path for {spec_name}: {str(e)}', 'WARNING')
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
if not asset_path.exists():
|
|
118
|
+
context.run.log_message(f'Path does not exist for {spec_name}: {asset_config.path}', 'WARNING')
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
# Discover files for this asset
|
|
122
|
+
is_recursive = asset_config.is_recursive
|
|
123
|
+
context.run.log_message(
|
|
124
|
+
f'Discovering files for {spec_name} at {asset_config.path} (recursive={is_recursive})'
|
|
125
|
+
)
|
|
47
126
|
|
|
48
|
-
|
|
49
|
-
files_in_dir = file_discovery_strategy.discover(dir_path, is_recursive)
|
|
50
|
-
all_files.extend(files_in_dir)
|
|
127
|
+
files = file_discovery_strategy.discover(asset_path, is_recursive)
|
|
51
128
|
|
|
52
|
-
if not
|
|
53
|
-
context.run.
|
|
54
|
-
|
|
129
|
+
if not files:
|
|
130
|
+
context.run.log_message(f'No files found for {spec_name}', 'WARNING')
|
|
131
|
+
continue
|
|
55
132
|
|
|
56
|
-
# Organize files
|
|
57
|
-
|
|
58
|
-
all_files, context.file_specifications, context.metadata or {}, type_dirs
|
|
59
|
-
)
|
|
133
|
+
# Organize files for this specific spec
|
|
134
|
+
organized = file_discovery_strategy.organize(files, [spec], context.metadata or {}, {spec_name: asset_path})
|
|
60
135
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
context.add_organized_files(organized_files)
|
|
136
|
+
all_organized_files.extend(organized)
|
|
137
|
+
context.run.log_message(f'Found {len(organized)} files for {spec_name}')
|
|
64
138
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
139
|
+
if all_organized_files:
|
|
140
|
+
context.run.log_message_with_code(LogCode.FILES_DISCOVERED, len(all_organized_files))
|
|
141
|
+
context.add_organized_files(all_organized_files)
|
|
142
|
+
else:
|
|
143
|
+
context.run.log_message_with_code(LogCode.NO_FILES_FOUND_WARNING)
|
|
69
144
|
|
|
70
|
-
|
|
71
|
-
|
|
145
|
+
return self.create_success_result(
|
|
146
|
+
data={'organized_files': all_organized_files},
|
|
147
|
+
rollback_data={'files_count': len(all_organized_files), 'type_dirs': list(type_dirs.keys())},
|
|
148
|
+
)
|
|
72
149
|
|
|
73
150
|
def can_skip(self, context: UploadContext) -> bool:
|
|
74
151
|
"""File organization cannot be skipped."""
|