synapse-sdk 2025.10.1__py3-none-any.whl → 2025.10.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

Files changed (44) hide show
  1. synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
  2. synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-overview.md +560 -0
  3. synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
  4. synapse_sdk/devtools/docs/docs/plugins/plugins.md +12 -5
  5. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
  6. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-overview.md +560 -0
  7. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
  8. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current.json +16 -4
  9. synapse_sdk/devtools/docs/sidebars.ts +13 -1
  10. synapse_sdk/plugins/README.md +487 -80
  11. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  12. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  13. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
  14. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  15. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +145 -0
  16. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  17. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  18. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  19. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +97 -0
  20. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +250 -0
  21. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  22. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  23. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +284 -0
  24. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  25. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  26. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +87 -0
  27. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +127 -0
  28. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  29. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +2 -1
  30. synapse_sdk/plugins/categories/upload/actions/upload/models.py +134 -94
  31. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +2 -2
  32. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +106 -14
  33. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +113 -36
  34. synapse_sdk/plugins/categories/upload/templates/README.md +365 -0
  35. {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.3.dist-info}/METADATA +1 -1
  36. {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.3.dist-info}/RECORD +40 -20
  37. synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +0 -1463
  38. synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +0 -1964
  39. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +0 -1463
  40. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +0 -2077
  41. {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.3.dist-info}/WHEEL +0 -0
  42. {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.3.dist-info}/entry_points.txt +0 -0
  43. {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.3.dist-info}/licenses/LICENSE +0 -0
  44. {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.3.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,53 @@
1
- from pathlib import Path
2
1
  from typing import Annotated
3
2
 
4
- from pydantic import AfterValidator, BaseModel, ValidationInfo, field_validator
3
+ from pydantic import AfterValidator, BaseModel, ValidationInfo, field_validator, model_validator
5
4
  from pydantic_core import PydanticCustomError
6
5
 
7
6
  from synapse_sdk.clients.exceptions import ClientError
8
7
  from synapse_sdk.utils.pydantic.validators import non_blank
9
- from synapse_sdk.utils.storage import get_pathlib
10
8
 
11
- from .utils import ExcelSecurityConfig
9
+
10
+ class ExcelMetadataFile(BaseModel):
11
+ """Excel metadata configuration for base64 encoded data.
12
+
13
+ This model is used specifically for base64-encoded Excel metadata files,
14
+ typically from web frontends or API integrations.
15
+
16
+ Attributes:
17
+ data: Base64 encoded content of the Excel file
18
+ filename: Name of the original file before base64 encoding
19
+
20
+ Examples:
21
+ Base64 mode:
22
+ >>> config = ExcelMetadataFile(
23
+ ... data="UEsDBBQABgAI...",
24
+ ... filename="metadata.xlsx"
25
+ ... )
26
+ """
27
+
28
+ data: str
29
+ filename: str
30
+
31
+
32
+ class AssetConfig(BaseModel):
33
+ """Configuration for individual asset in multi-path mode.
34
+
35
+ Used when use_single_path=False to specify unique paths
36
+ and recursive settings for each file specification.
37
+
38
+ Attributes:
39
+ path (str): File system path for this specific asset
40
+ is_recursive (bool): Whether to recursively search subdirectories for this asset
41
+
42
+ Example:
43
+ >>> asset_config = AssetConfig(
44
+ ... path="/sensors/camera/front",
45
+ ... is_recursive=True
46
+ ... )
47
+ """
48
+
49
+ path: str
50
+ is_recursive: bool = True
12
51
 
13
52
 
14
53
  class UploadParams(BaseModel):
@@ -18,45 +57,93 @@ class UploadParams(BaseModel):
18
57
  Uses Pydantic for type validation and custom validators to ensure
19
58
  storage, data_collection, and project resources exist before processing.
20
59
 
60
+ Supports two modes controlled by use_single_path flag:
61
+
62
+ 1. Single Path Mode (use_single_path=True, DEFAULT):
63
+ Traditional mode - all file specifications share one base path.
64
+ Requires: path, is_recursive
65
+ Ignores: assets
66
+
67
+ 2. Multi-Path Mode (use_single_path=False):
68
+ Advanced mode - each file specification has its own path.
69
+ Requires: assets (dict with file spec names as keys)
70
+ Ignores: path, is_recursive
71
+
21
72
  Attributes:
22
73
  name (str): Human-readable name for the upload operation
23
74
  description (str | None): Optional description of the upload
24
- path (str): File system path to upload from
75
+ use_single_path (bool): Mode selector (True=single path, False=multi-path)
76
+ path (str | None): Base path for single path mode
77
+ is_recursive (bool): Global recursive setting for single path mode
78
+ assets (dict[str, AssetConfig] | None): Per-asset configs for multi-path mode
25
79
  storage (int): Storage ID where files will be uploaded
26
- data_collection (int): Data data_collection ID for organizing uploads
80
+ data_collection (int): Data collection ID for organizing uploads
27
81
  project (int | None): Optional project ID for grouping
28
- excel_metadata_path (str | None): Path to Excel metadata file
29
- is_recursive (bool): Whether to recursively process subdirectories
82
+ excel_metadata_path (str | None): Path to Excel metadata file (traditional, backward compatible)
83
+ Note: This parameter will be deprecated in a future version. Consider using excel_metadata instead.
84
+ excel_metadata (ExcelMetadataFile | None): Base64 encoded Excel metadata (for web/API integration)
85
+ Note: Cannot use both excel_metadata_path and excel_metadata simultaneously
30
86
  max_file_size_mb (int): Maximum file size limit in megabytes
31
87
  creating_data_unit_batch_size (int): Batch size for data unit creation
32
88
  use_async_upload (bool): Whether to use asynchronous upload processing
33
- extra_params (dict | None): Extra parameters for the action.
34
- Example: {"include_metadata": True, "compression": "gzip"}
89
+ extra_params (dict | None): Extra parameters for the action
35
90
 
36
91
  Validation:
37
92
  - name: Must be non-blank after validation
38
93
  - storage: Must exist and be accessible via client API
39
94
  - data_collection: Must exist and be accessible via client API
40
95
  - project: Must exist if specified, or can be None
41
- - excel_metadata_path: Must be valid Excel file if specified
42
-
43
- Example:
44
- >>> params = UploadParams(
45
- ... name="Data Upload",
46
- ... path="/data/files",
47
- ... storage=1,
48
- ... data_collection=5
49
- ... )
96
+ - use_single_path mode: Validates required fields per mode
97
+
98
+ Examples:
99
+ Single Path Mode (Traditional):
100
+ >>> params = UploadParams(
101
+ ... name="Standard Upload",
102
+ ... use_single_path=True,
103
+ ... path="/data/experiment_1",
104
+ ... is_recursive=True,
105
+ ... storage=1,
106
+ ... data_collection=5
107
+ ... )
108
+
109
+ Multi-Path Mode (Advanced):
110
+ >>> params = UploadParams(
111
+ ... name="Multi-Source Upload",
112
+ ... use_single_path=False,
113
+ ... assets={
114
+ ... "image_1": AssetConfig(path="/sensors/camera", is_recursive=True),
115
+ ... "pcd_1": AssetConfig(path="/sensors/lidar", is_recursive=False)
116
+ ... },
117
+ ... storage=1,
118
+ ... data_collection=5
119
+ ... )
50
120
  """
51
121
 
52
122
  name: Annotated[str, AfterValidator(non_blank)]
53
123
  description: str | None = None
54
- path: str
124
+
125
+ # Mode selector flag (True = single path mode, False = multi-path mode)
126
+ use_single_path: bool = True
127
+
128
+ # Single path mode fields (used when use_single_path=True)
129
+ path: str | None = None
130
+ is_recursive: bool = True
131
+
132
+ # Multi-path mode fields (used when use_single_path=False)
133
+ assets: dict[str, AssetConfig] | None = None
134
+
55
135
  storage: int
56
136
  data_collection: int
57
137
  project: int | None = None
138
+
139
+ # Excel metadata - two separate parameters for clarity:
140
+ # 1. excel_metadata_path: Simple file path string (backward compatible, traditional usage)
141
+ # NOTE: Will be deprecated in a future version. Consider using excel_metadata instead.
142
+ # 2. excel_metadata: Dictionary with base64 encoded data (new, for web/API integration)
143
+ # TODO: Plan to deprecate excel_metadata_path in a few versions for backward compatibility
58
144
  excel_metadata_path: str | None = None
59
- is_recursive: bool = True
145
+ excel_metadata: ExcelMetadataFile | None = None
146
+
60
147
  max_file_size_mb: int = 50
61
148
  creating_data_unit_batch_size: int = 1
62
149
  use_async_upload: bool = True
@@ -107,80 +194,33 @@ class UploadParams(BaseModel):
107
194
  raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
108
195
  return value
109
196
 
110
- @field_validator('excel_metadata_path', mode='after')
111
- @classmethod
112
- def check_excel_metadata_path(cls, value, info: ValidationInfo) -> str | None:
113
- if not value:
114
- return value
115
-
116
- # Validate file extension
117
- if not value.lower().endswith(('.xlsx', '.xls')):
118
- raise PydanticCustomError('invalid_file_type', 'Excel metadata file must be .xlsx or .xls format.')
119
-
120
- # Get storage and path from validation data
121
- if not (hasattr(info, 'data') and 'storage' in info.data and 'path' in info.data):
122
- # If we don't have storage/path data yet, just validate extension
123
- return value
124
-
125
- if info.context is None:
126
- raise PydanticCustomError('missing_context', 'Validation context is required.')
127
-
128
- action = info.context['action']
129
- client = action.client
130
-
131
- try:
132
- # Get storage configuration
133
- storage_id = info.data['storage']
134
- storage = client.get_storage(storage_id)
135
-
136
- # Skip file system validation if storage doesn't have provider (likely test environment)
137
- if not isinstance(storage, dict) or 'provider' not in storage:
138
- # Basic validation only - likely in test environment
139
- return value
140
-
141
- # Get the actual file system path using storage + path
142
- base_path = get_pathlib(storage, info.data['path'])
143
-
144
- # Support both absolute and relative paths
145
- if Path(value).is_absolute():
146
- excel_path = Path(value)
147
- else:
148
- excel_path = base_path / value
149
-
150
- if not excel_path.exists():
151
- raise PydanticCustomError('file_not_found', 'Excel metadata file not found.')
152
-
153
- # Validate file size
154
- file_size = excel_path.stat().st_size
155
- excel_config = ExcelSecurityConfig()
156
- if file_size > excel_config.MAX_FILE_SIZE_BYTES:
157
- max_size_mb = excel_config.MAX_FILE_SIZE_MB
197
+ @model_validator(mode='after')
198
+ def validate_path_configuration(self) -> 'UploadParams':
199
+ """Validate path configuration based on use_single_path mode."""
200
+ if self.use_single_path:
201
+ # Single path mode: requires path
202
+ if not self.path:
158
203
  raise PydanticCustomError(
159
- 'file_too_large',
160
- 'Excel metadata file is too large. Maximum size is {max_size_mb}MB.',
161
- {'max_size_mb': max_size_mb},
204
+ 'missing_path', "When use_single_path=true (single path mode), 'path' is required"
162
205
  )
206
+ # Warn if assets is provided in single path mode (it will be ignored)
207
+ # For now, we'll silently ignore it
208
+ else:
209
+ # Multi-path mode: requires assets
210
+ if not self.assets:
211
+ raise PydanticCustomError(
212
+ 'missing_assets',
213
+ "When use_single_path=false (multi-path mode), 'assets' must be provided "
214
+ 'with path configurations for each file specification',
215
+ )
216
+ # path and is_recursive are ignored in multi-path mode
163
217
 
164
- # Validate file format
165
- try:
166
- with open(excel_path, 'rb') as f:
167
- header = f.read(8)
168
- if not header:
169
- raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be empty.')
170
-
171
- if excel_path.suffix.lower() == '.xlsx':
172
- if not header.startswith(b'PK'):
173
- raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
174
- elif excel_path.suffix.lower() == '.xls':
175
- if not (header.startswith(b'\xd0\xcf\x11\xe0') or header.startswith(b'\x09\x08')):
176
- raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
177
-
178
- except (OSError, IOError):
179
- raise PydanticCustomError('file_access_error', 'Cannot access Excel metadata file.')
180
-
181
- except ClientError:
182
- raise PydanticCustomError('client_error', 'Error occurred while checking storage.')
183
- except Exception as e:
184
- raise PydanticCustomError('validation_error', f'Error validating Excel metadata file: {str(e)}')
218
+ # Validate excel metadata parameters - cannot use both at the same time
219
+ if self.excel_metadata_path and self.excel_metadata:
220
+ raise PydanticCustomError(
221
+ 'conflicting_excel_metadata',
222
+ "Cannot specify both 'excel_metadata_path' and 'excel_metadata'. "
223
+ "Use 'excel_metadata_path' for file paths or 'excel_metadata' for base64 encoded data.",
224
+ )
185
225
 
186
- return value
226
+ return self
@@ -21,8 +21,8 @@ class CleanupStep(BaseStep):
21
21
  def execute(self, context: UploadContext) -> StepResult:
22
22
  """Execute cleanup step."""
23
23
  try:
24
- # Cleanup temporary directory
25
- self._cleanup_temp_directory(context)
24
+ # Cleanup temporary directory - commented out because duplicated process with ray cleanup process
25
+ # self._cleanup_temp_directory(context)
26
26
 
27
27
  # Log completion
28
28
  context.run.log_message_with_code(LogCode.IMPORT_COMPLETED)
@@ -1,8 +1,11 @@
1
+ import base64
2
+ import tempfile
1
3
  from pathlib import Path
2
4
 
3
5
  from ..context import StepResult, UploadContext
4
6
  from ..enums import LogCode
5
7
  from ..exceptions import ExcelParsingError, ExcelSecurityError
8
+ from ..models import ExcelMetadataFile
6
9
  from .base import BaseStep
7
10
 
8
11
 
@@ -25,22 +28,36 @@ class ProcessMetadataStep(BaseStep):
25
28
  return self.create_success_result(data={'metadata': {}})
26
29
 
27
30
  excel_metadata = {}
31
+ temp_file_to_cleanup = None
28
32
 
29
33
  try:
30
- # Check if Excel metadata path is specified
31
- excel_metadata_path = context.get_param('excel_metadata_path')
32
- if excel_metadata_path:
33
- # Convert string to Path object
34
- if isinstance(excel_metadata_path, str):
35
- excel_metadata_path = Path(excel_metadata_path)
36
-
37
- if excel_metadata_path.exists() and excel_metadata_path.is_file():
38
- excel_path = excel_metadata_path
39
- else:
40
- excel_path = context.pathlib_cwd / excel_metadata_path
41
- if not excel_path.exists():
34
+ # Check if Excel metadata is specified - try both parameters
35
+ # TODO: Plan to deprecate excel_metadata_path in a few versions (backward compatibility)
36
+ excel_metadata_path_config = context.get_param('excel_metadata_path')
37
+ excel_metadata_config = context.get_param('excel_metadata')
38
+
39
+ if excel_metadata_path_config:
40
+ # Traditional path-based approach (will be deprecated in future)
41
+ excel_path, is_temp = self._resolve_excel_path_from_string(excel_metadata_path_config, context)
42
+
43
+ if not excel_path or not excel_path.exists():
42
44
  context.run.log_message_with_code(LogCode.EXCEL_FILE_NOT_FOUND_PATH)
43
45
  return self.create_success_result(data={'metadata': {}})
46
+
47
+ excel_metadata = metadata_strategy.extract(excel_path)
48
+
49
+ elif excel_metadata_config:
50
+ # Base64 encoded approach
51
+ excel_path, is_temp = self._resolve_excel_path_from_base64(excel_metadata_config, context)
52
+
53
+ if not excel_path or not excel_path.exists():
54
+ context.run.log_message_with_code(LogCode.EXCEL_FILE_NOT_FOUND_PATH)
55
+ return self.create_success_result(data={'metadata': {}})
56
+
57
+ # Track temp file for cleanup
58
+ if is_temp:
59
+ temp_file_to_cleanup = excel_path
60
+
44
61
  excel_metadata = metadata_strategy.extract(excel_path)
45
62
  else:
46
63
  # Look for default metadata files (meta.xlsx, meta.xls)
@@ -65,9 +82,9 @@ class ProcessMetadataStep(BaseStep):
65
82
  return self.create_error_result(f'Excel security violation: {str(e)}')
66
83
 
67
84
  except ExcelParsingError as e:
68
- # If excel_metadata_path was specified, this is an error
85
+ # If excel_metadata_path or excel_metadata was specified, this is an error
69
86
  # If we were just looking for default files, it's not an error
70
- if context.get_param('excel_metadata_path'):
87
+ if context.get_param('excel_metadata_path') or context.get_param('excel_metadata'):
71
88
  context.run.log_message_with_code(LogCode.EXCEL_PARSING_ERROR, str(e))
72
89
  return self.create_error_result(f'Excel parsing error: {str(e)}')
73
90
  else:
@@ -77,6 +94,15 @@ class ProcessMetadataStep(BaseStep):
77
94
  except Exception as e:
78
95
  return self.create_error_result(f'Unexpected error processing metadata: {str(e)}')
79
96
 
97
+ finally:
98
+ # Clean up temporary file if it was created from base64
99
+ if temp_file_to_cleanup and temp_file_to_cleanup.exists():
100
+ try:
101
+ temp_file_to_cleanup.unlink()
102
+ context.run.log_message(f'Cleaned up temporary Excel file: {temp_file_to_cleanup}')
103
+ except Exception as e:
104
+ context.run.log_message(f'Failed to clean up temporary file {temp_file_to_cleanup}: {str(e)}')
105
+
80
106
  def can_skip(self, context: UploadContext) -> bool:
81
107
  """Metadata step can be skipped if no metadata strategy is configured."""
82
108
  return 'metadata' not in context.strategies
@@ -86,6 +112,72 @@ class ProcessMetadataStep(BaseStep):
86
112
  # Clear any loaded metadata
87
113
  context.metadata.clear()
88
114
 
115
+ def _resolve_excel_path_from_string(self, excel_path_str: str, context: UploadContext) -> tuple[Path | None, bool]:
116
+ """Resolve Excel metadata path from a string path.
117
+
118
+ Note: This method supports the excel_metadata_path parameter which will be deprecated
119
+ in a future version. Consider using _resolve_excel_path_from_base64 instead.
120
+
121
+ Args:
122
+ excel_path_str: File path string to the Excel metadata file
123
+ context: Upload context for resolving relative paths
124
+
125
+ Returns:
126
+ Tuple of (resolved_path, is_temporary_file)
127
+ - resolved_path: Path object pointing to the Excel file, or None if resolution failed
128
+ - is_temporary_file: Always False for path-based approach
129
+
130
+ Examples:
131
+ >>> path, is_temp = self._resolve_excel_path_from_string("/data/meta.xlsx", context)
132
+ """
133
+ # TODO: Plan to deprecate this method in a few versions (backward compatibility)
134
+ # Try absolute path first
135
+ path = Path(excel_path_str)
136
+ if path.exists() and path.is_file():
137
+ return path, False
138
+
139
+ # Try relative to cwd
140
+ path = context.pathlib_cwd / excel_path_str
141
+ return (path, False) if path.exists() else (None, False)
142
+
143
+ def _resolve_excel_path_from_base64(
144
+ self, excel_config: dict | ExcelMetadataFile, context: UploadContext
145
+ ) -> tuple[Path | None, bool]:
146
+ """Resolve Excel metadata path from base64 encoded data.
147
+
148
+ Args:
149
+ excel_config: Either a dict or an ExcelMetadataFile object with base64 data
150
+ context: Upload context for logging
151
+
152
+ Returns:
153
+ Tuple of (resolved_path, is_temporary_file)
154
+ - resolved_path: Path object pointing to the temporary Excel file, or None if decoding failed
155
+ - is_temporary_file: Always True for base64 approach (requires cleanup)
156
+
157
+ Examples:
158
+ >>> config = ExcelMetadataFile(data="UEsDB...", filename="meta.xlsx")
159
+ >>> path, is_temp = self._resolve_excel_path_from_base64(config, context)
160
+ """
161
+ if isinstance(excel_config, dict):
162
+ excel_config = ExcelMetadataFile(**excel_config)
163
+
164
+ try:
165
+ # Decode base64 data
166
+ decoded_data = base64.b64decode(excel_config.data, validate=True)
167
+
168
+ # Create temp file
169
+ temp_dir = Path(tempfile.gettempdir())
170
+ filename = excel_config.filename
171
+ temp_file = temp_dir / filename
172
+ temp_file.write_bytes(decoded_data)
173
+
174
+ context.run.log_message(f'Decoded base64 Excel metadata to temporary file: {temp_file}')
175
+ return temp_file, True
176
+
177
+ except Exception as e:
178
+ context.run.log_message(f'Failed to decode base64 Excel metadata: {str(e)}')
179
+ return None, False
180
+
89
181
  def _find_excel_metadata_file(self, pathlib_cwd: Path) -> Path:
90
182
  """Find default Excel metadata file."""
91
183
  # Check .xlsx first as it's more common
@@ -24,51 +24,128 @@ class OrganizeFilesStep(BaseStep):
24
24
  return self.create_error_result('File specifications not available')
25
25
 
26
26
  try:
27
- # Create type directories mapping
28
- type_dirs = {}
29
- for spec in context.file_specifications:
30
- spec_name = spec['name']
31
- spec_dir = context.pathlib_cwd / spec_name
32
- if spec_dir.exists() and spec_dir.is_dir():
33
- type_dirs[spec_name] = spec_dir
34
-
35
- if type_dirs:
36
- context.run.log_message_with_code(LogCode.TYPE_DIRECTORIES_FOUND, list(type_dirs.keys()))
27
+ # Check which mode we're in
28
+ use_single_path = context.get_param('use_single_path', True)
29
+
30
+ if use_single_path:
31
+ # Single path mode: all assets use same base path
32
+ return self._execute_single_path_mode(context, file_discovery_strategy)
37
33
  else:
38
- context.run.log_message_with_code(LogCode.NO_TYPE_DIRECTORIES)
39
- return self.create_success_result(data={'organized_files': []})
34
+ # Multi-path mode: each asset has its own path
35
+ return self._execute_multi_path_mode(context, file_discovery_strategy)
40
36
 
41
- context.run.log_message_with_code(LogCode.TYPE_STRUCTURE_DETECTED)
42
- context.run.log_message_with_code(LogCode.FILE_ORGANIZATION_STARTED)
37
+ except Exception as e:
38
+ return self.create_error_result(f'File organization failed: {str(e)}')
43
39
 
44
- # Discover files in type directories
45
- all_files = []
46
- is_recursive = context.get_param('is_recursive', True)
40
+ def _execute_single_path_mode(self, context: UploadContext, file_discovery_strategy) -> StepResult:
41
+ """Execute file organization in single path mode (traditional)."""
42
+ # Create type directories mapping
43
+ type_dirs = {}
44
+ for spec in context.file_specifications:
45
+ spec_name = spec['name']
46
+ spec_dir = context.pathlib_cwd / spec_name
47
+ if spec_dir.exists() and spec_dir.is_dir():
48
+ type_dirs[spec_name] = spec_dir
49
+
50
+ if type_dirs:
51
+ context.run.log_message_with_code(LogCode.TYPE_DIRECTORIES_FOUND, list(type_dirs.keys()))
52
+ else:
53
+ context.run.log_message_with_code(LogCode.NO_TYPE_DIRECTORIES)
54
+ return self.create_success_result(data={'organized_files': []})
55
+
56
+ context.run.log_message_with_code(LogCode.TYPE_STRUCTURE_DETECTED)
57
+ context.run.log_message_with_code(LogCode.FILE_ORGANIZATION_STARTED)
58
+
59
+ # Discover files in type directories
60
+ all_files = []
61
+ is_recursive = context.get_param('is_recursive', True)
62
+
63
+ for spec_name, dir_path in type_dirs.items():
64
+ files_in_dir = file_discovery_strategy.discover(dir_path, is_recursive)
65
+ all_files.extend(files_in_dir)
66
+
67
+ if not all_files:
68
+ context.run.log_message_with_code(LogCode.NO_FILES_FOUND_WARNING)
69
+ return self.create_success_result(data={'organized_files': []})
70
+
71
+ # Organize files using strategy
72
+ organized_files = file_discovery_strategy.organize(
73
+ all_files, context.file_specifications, context.metadata or {}, type_dirs
74
+ )
75
+
76
+ if organized_files:
77
+ context.run.log_message_with_code(LogCode.FILES_DISCOVERED, len(organized_files))
78
+ context.add_organized_files(organized_files)
79
+
80
+ return self.create_success_result(
81
+ data={'organized_files': organized_files},
82
+ rollback_data={'files_count': len(organized_files), 'type_dirs': list(type_dirs.keys())},
83
+ )
84
+
85
+ def _execute_multi_path_mode(self, context: UploadContext, file_discovery_strategy) -> StepResult:
86
+ """Execute file organization in multi-path mode (each asset has own path)."""
87
+ from synapse_sdk.utils.storage import get_pathlib
88
+
89
+ assets = context.get_param('assets', {})
90
+ if not assets:
91
+ return self.create_error_result('Multi-path mode requires assets configuration')
92
+
93
+ context.run.log_message(f'Using multi-path mode with {len(assets)} asset configurations')
94
+ context.run.log_message_with_code(LogCode.FILE_ORGANIZATION_STARTED)
95
+
96
+ all_organized_files = []
97
+ type_dirs = {}
98
+
99
+ for spec in context.file_specifications:
100
+ spec_name = spec['name']
101
+
102
+ # Skip if no asset configuration for this spec
103
+ if spec_name not in assets:
104
+ context.run.log_message(f'Skipping {spec_name}: no asset path configured')
105
+ continue
106
+
107
+ asset_config = assets[spec_name]
108
+
109
+ # Get the asset path from storage
110
+ try:
111
+ asset_path = get_pathlib(context.storage, asset_config.path)
112
+ type_dirs[spec_name] = asset_path
113
+ except Exception as e:
114
+ context.run.log_message(f'Error accessing path for {spec_name}: {str(e)}', 'WARNING')
115
+ continue
116
+
117
+ if not asset_path.exists():
118
+ context.run.log_message(f'Path does not exist for {spec_name}: {asset_config.path}', 'WARNING')
119
+ continue
120
+
121
+ # Discover files for this asset
122
+ is_recursive = asset_config.is_recursive
123
+ context.run.log_message(
124
+ f'Discovering files for {spec_name} at {asset_config.path} (recursive={is_recursive})'
125
+ )
47
126
 
48
- for spec_name, dir_path in type_dirs.items():
49
- files_in_dir = file_discovery_strategy.discover(dir_path, is_recursive)
50
- all_files.extend(files_in_dir)
127
+ files = file_discovery_strategy.discover(asset_path, is_recursive)
51
128
 
52
- if not all_files:
53
- context.run.log_message_with_code(LogCode.NO_FILES_FOUND_WARNING)
54
- return self.create_success_result(data={'organized_files': []})
129
+ if not files:
130
+ context.run.log_message(f'No files found for {spec_name}', 'WARNING')
131
+ continue
55
132
 
56
- # Organize files using strategy
57
- organized_files = file_discovery_strategy.organize(
58
- all_files, context.file_specifications, context.metadata or {}, type_dirs
59
- )
133
+ # Organize files for this specific spec
134
+ organized = file_discovery_strategy.organize(files, [spec], context.metadata or {}, {spec_name: asset_path})
60
135
 
61
- if organized_files:
62
- context.run.log_message_with_code(LogCode.FILES_DISCOVERED, len(organized_files))
63
- context.add_organized_files(organized_files)
136
+ all_organized_files.extend(organized)
137
+ context.run.log_message(f'Found {len(organized)} files for {spec_name}')
64
138
 
65
- return self.create_success_result(
66
- data={'organized_files': organized_files},
67
- rollback_data={'files_count': len(organized_files), 'type_dirs': list(type_dirs.keys())},
68
- )
139
+ if all_organized_files:
140
+ context.run.log_message_with_code(LogCode.FILES_DISCOVERED, len(all_organized_files))
141
+ context.add_organized_files(all_organized_files)
142
+ else:
143
+ context.run.log_message_with_code(LogCode.NO_FILES_FOUND_WARNING)
69
144
 
70
- except Exception as e:
71
- return self.create_error_result(f'File organization failed: {str(e)}')
145
+ return self.create_success_result(
146
+ data={'organized_files': all_organized_files},
147
+ rollback_data={'files_count': len(all_organized_files), 'type_dirs': list(type_dirs.keys())},
148
+ )
72
149
 
73
150
  def can_skip(self, context: UploadContext) -> bool:
74
151
  """File organization cannot be skipped."""