synapse-sdk 2025.10.1__py3-none-any.whl → 2025.10.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

Files changed (54) hide show
  1. synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
  2. synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
  3. synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
  4. synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
  5. synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
  6. synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-overview.md +560 -0
  7. synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
  8. synapse_sdk/devtools/docs/docs/plugins/plugins.md +12 -5
  9. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
  10. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
  11. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
  12. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
  13. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
  14. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-overview.md +560 -0
  15. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
  16. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current.json +16 -4
  17. synapse_sdk/devtools/docs/sidebars.ts +27 -1
  18. synapse_sdk/plugins/README.md +487 -80
  19. synapse_sdk/plugins/categories/export/actions/export/action.py +8 -3
  20. synapse_sdk/plugins/categories/export/actions/export/utils.py +108 -8
  21. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  22. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  23. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
  24. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  25. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +145 -0
  26. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  27. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  28. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  29. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +97 -0
  30. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +250 -0
  31. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  32. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  33. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +284 -0
  34. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  35. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  36. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +87 -0
  37. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +127 -0
  38. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  39. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +2 -1
  40. synapse_sdk/plugins/categories/upload/actions/upload/models.py +134 -94
  41. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +2 -2
  42. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +106 -14
  43. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +113 -36
  44. synapse_sdk/plugins/categories/upload/templates/README.md +365 -0
  45. {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.4.dist-info}/METADATA +1 -1
  46. {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.4.dist-info}/RECORD +50 -22
  47. synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +0 -1463
  48. synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +0 -1964
  49. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +0 -1463
  50. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +0 -2077
  51. {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.4.dist-info}/WHEEL +0 -0
  52. {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.4.dist-info}/entry_points.txt +0 -0
  53. {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.4.dist-info}/licenses/LICENSE +0 -0
  54. {synapse_sdk-2025.10.1.dist-info → synapse_sdk-2025.10.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,127 @@
1
+ """Pre-processor management strategies for ToTask action."""
2
+
3
+ from typing import Any, Dict
4
+
5
+ from .base import PreProcessorStrategy, ToTaskContext
6
+
7
+
8
+ class PreProcessorManagementStrategy(PreProcessorStrategy):
9
+ """Strategy for managing pre-processor lifecycle."""
10
+
11
+ def get_preprocessor_info(self, context: ToTaskContext, preprocessor_id: int) -> Dict[str, Any]:
12
+ """Get pre-processor information from the backend.
13
+
14
+ Args:
15
+ context: Shared context for the action execution
16
+ preprocessor_id: The pre-processor ID
17
+
18
+ Returns:
19
+ Dict with pre-processor info or error
20
+ """
21
+ try:
22
+ client = context.client
23
+ pre_processor_response = client.get_plugin_release(preprocessor_id)
24
+ if isinstance(pre_processor_response, str):
25
+ return {'success': False, 'error': 'Invalid pre-processor response received'}
26
+
27
+ pre_processor: Dict[str, Any] = pre_processor_response
28
+ config = pre_processor.get('config', {})
29
+ code = config.get('code')
30
+ version = pre_processor.get('version')
31
+
32
+ if not code or not version:
33
+ return {'success': False, 'error': 'Invalid pre-processor configuration'}
34
+
35
+ return {'success': True, 'code': code, 'version': version}
36
+ except Exception as e:
37
+ return {'success': False, 'error': f'Failed to get pre-processor info: {str(e)}'}
38
+
39
+ def ensure_preprocessor_running(self, context: ToTaskContext, preprocessor_code: str) -> Dict[str, Any]:
40
+ """Ensure the pre-processor is running, restart if necessary.
41
+
42
+ Args:
43
+ context: Shared context for the action execution
44
+ preprocessor_code: The pre-processor code
45
+
46
+ Returns:
47
+ Dict indicating success or failure
48
+ """
49
+ try:
50
+ client = context.client
51
+
52
+ # Check if pre-processor is running
53
+ serve_applications_response = client.list_serve_applications(params={'plugin_code': preprocessor_code})
54
+ if isinstance(serve_applications_response, str):
55
+ return {'success': False, 'error': 'Invalid serve applications response'}
56
+
57
+ # Handle the response properly - it should be a dict with 'results' key
58
+ if not isinstance(serve_applications_response, dict):
59
+ return {'success': False, 'error': 'Unexpected serve applications response format'}
60
+
61
+ serve_applications: Dict[str, Any] = serve_applications_response
62
+ results = serve_applications.get('results', [])
63
+ running_serve_apps = [app for app in results if isinstance(app, dict) and app.get('status') == 'RUNNING']
64
+
65
+ # If not running, restart the pre-processor
66
+ if not running_serve_apps:
67
+ restart_result = self._restart_preprocessor(context, preprocessor_code)
68
+ if not restart_result['success']:
69
+ return restart_result
70
+
71
+ # Verify restart was successful
72
+ serve_applications_response = client.list_serve_applications(params={'plugin_code': preprocessor_code})
73
+ if isinstance(serve_applications_response, str):
74
+ return {'success': False, 'error': 'Failed to verify pre-processor restart'}
75
+
76
+ serve_applications = serve_applications_response
77
+ results = serve_applications.get('results', [])
78
+ running_serve_apps = [
79
+ app for app in results if isinstance(app, dict) and app.get('status') == 'RUNNING'
80
+ ]
81
+
82
+ if not running_serve_apps:
83
+ return {'success': False, 'error': 'Pre-processor failed to start after restart'}
84
+
85
+ return {'success': True}
86
+
87
+ except Exception as e:
88
+ return {'success': False, 'error': f'Failed to ensure pre-processor running: {str(e)}'}
89
+
90
+ def _restart_preprocessor(self, context: ToTaskContext, preprocessor_code: str) -> Dict[str, Any]:
91
+ """Restart the pre-processor.
92
+
93
+ Args:
94
+ context: Shared context for the action execution
95
+ preprocessor_code: The pre-processor code
96
+
97
+ Returns:
98
+ Dict indicating success or failure
99
+ """
100
+ try:
101
+ client = context.client
102
+
103
+ # Start the serve application
104
+ inference_options = context.config.get('inference_options', {})
105
+ serve_application_deployment_payload = {
106
+ 'agent': context.params.get('agent') if context.params else None,
107
+ 'action': 'deployment',
108
+ 'params': {
109
+ 'num_cpus': inference_options.get('required_cpu_count', 2),
110
+ 'num_gpus': inference_options.get('required_gpu_count', 1),
111
+ },
112
+ 'debug': True,
113
+ }
114
+
115
+ deployment_result = client.run_plugin(
116
+ preprocessor_code,
117
+ serve_application_deployment_payload,
118
+ )
119
+
120
+ deployment_job_id = deployment_result.get('job_id')
121
+ if not deployment_job_id:
122
+ return {'success': False, 'error': 'No deployment job ID returned'}
123
+
124
+ return {'success': True, 'error': 'Pre-processor restarted successfully'}
125
+
126
+ except Exception as e:
127
+ return {'success': False, 'error': f'Failed to restart pre-processor: {str(e)}'}
@@ -0,0 +1,143 @@
1
+ """Validation strategies for ToTask action."""
2
+
3
+ from typing import Any, Dict
4
+
5
+ from ..enums import LogCode
6
+ from .base import ToTaskContext, ValidationStrategy
7
+
8
+
9
+ class ProjectValidationStrategy(ValidationStrategy):
10
+ """Strategy for validating project and data collection."""
11
+
12
+ def validate(self, context: ToTaskContext) -> Dict[str, Any]:
13
+ """Validate project and data collection exist and are accessible.
14
+
15
+ Args:
16
+ context: Shared context for the action execution
17
+
18
+ Returns:
19
+ Dict with 'success' boolean and optional 'error' message
20
+ """
21
+ try:
22
+ client = context.client
23
+ project_id = context.params['project']
24
+
25
+ # Validate project response
26
+ project_response = client.get_project(project_id)
27
+ if isinstance(project_response, str):
28
+ context.logger.log_message_with_code(LogCode.INVALID_PROJECT_RESPONSE)
29
+ return {'success': False, 'error': 'Invalid project response received'}
30
+
31
+ project: Dict[str, Any] = project_response
32
+ context.project = project
33
+
34
+ # Validate data collection exists
35
+ data_collection_id = project.get('data_collection')
36
+ if not data_collection_id:
37
+ context.logger.log_message_with_code(LogCode.NO_DATA_COLLECTION)
38
+ return {'success': False, 'error': 'Project does not have a data collection'}
39
+
40
+ # Validate data collection response
41
+ data_collection_response = client.get_data_collection(data_collection_id)
42
+ if isinstance(data_collection_response, str):
43
+ context.logger.log_message_with_code(LogCode.INVALID_DATA_COLLECTION_RESPONSE)
44
+ return {'success': False, 'error': 'Invalid data collection response received'}
45
+
46
+ data_collection: Dict[str, Any] = data_collection_response
47
+ context.data_collection = data_collection
48
+
49
+ return {'success': True}
50
+
51
+ except Exception as e:
52
+ error_msg = f'Project validation failed: {str(e)}'
53
+ context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
54
+ return {'success': False, 'error': error_msg}
55
+
56
+
57
+ class TaskValidationStrategy(ValidationStrategy):
58
+ """Strategy for validating and discovering tasks."""
59
+
60
+ def validate(self, context: ToTaskContext) -> Dict[str, Any]:
61
+ """Discover and validate tasks for processing.
62
+
63
+ Args:
64
+ context: Shared context for the action execution
65
+
66
+ Returns:
67
+ Dict with 'success' boolean and optional 'error' message
68
+ """
69
+ try:
70
+ client = context.client
71
+
72
+ # Build task query parameters
73
+ task_ids_query_params = {
74
+ 'project': context.params['project'],
75
+ 'fields': 'id',
76
+ }
77
+ if context.params.get('task_filters'):
78
+ task_ids_query_params.update(context.params['task_filters'])
79
+
80
+ # Get tasks
81
+ task_ids_generator, task_ids_count = client.list_tasks(params=task_ids_query_params, list_all=True)
82
+ task_ids = [
83
+ int(item.get('id', 0)) for item in task_ids_generator if isinstance(item, dict) and item.get('id')
84
+ ]
85
+
86
+ # Validate tasks found
87
+ if not task_ids_count:
88
+ context.logger.log_message_with_code(LogCode.NO_TASKS_FOUND)
89
+ return {'success': False, 'error': 'No tasks found to annotate'}
90
+
91
+ context.task_ids = task_ids
92
+ return {'success': True, 'task_count': len(task_ids)}
93
+
94
+ except Exception as e:
95
+ error_msg = f'Task validation failed: {str(e)}'
96
+ context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
97
+ return {'success': False, 'error': error_msg}
98
+
99
+
100
+ class TargetSpecificationValidationStrategy(ValidationStrategy):
101
+ """Strategy for validating target specification for file annotation."""
102
+
103
+ def validate(self, context: ToTaskContext) -> Dict[str, Any]:
104
+ """Validate target specification exists in file specifications.
105
+
106
+ Args:
107
+ context: Shared context for the action execution
108
+
109
+ Returns:
110
+ Dict with 'success' boolean and optional 'error' message
111
+ """
112
+ try:
113
+ # Only validate if using FILE annotation method
114
+ from ..enums import AnnotationMethod
115
+
116
+ if context.annotation_method != AnnotationMethod.FILE:
117
+ return {'success': True}
118
+
119
+ target_specification_name = context.params.get('target_specification_name')
120
+ if not target_specification_name:
121
+ context.logger.log_message_with_code(LogCode.TARGET_SPEC_REQUIRED)
122
+ return {'success': False, 'error': 'Target specification name is required for file annotation method'}
123
+
124
+ # Check if target specification exists in file specifications
125
+ if not context.data_collection:
126
+ return {'success': False, 'error': 'Data collection not available for validation'}
127
+
128
+ file_specifications = context.data_collection.get('file_specifications', [])
129
+ target_spec_exists = any(spec.get('name') == target_specification_name for spec in file_specifications)
130
+
131
+ if not target_spec_exists:
132
+ context.logger.log_message_with_code(LogCode.TARGET_SPEC_NOT_FOUND, target_specification_name)
133
+ return {
134
+ 'success': False,
135
+ 'error': f"Target specification '{target_specification_name}' not found in file specifications",
136
+ }
137
+
138
+ return {'success': True}
139
+
140
+ except Exception as e:
141
+ error_msg = f'Target specification validation failed: {str(e)}'
142
+ context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
143
+ return {'success': False, 'error': error_msg}
@@ -1,7 +1,7 @@
1
1
  from .action import UploadAction
2
2
  from .enums import LOG_MESSAGES, LogCode, UploadStatus
3
3
  from .exceptions import ExcelParsingError, ExcelSecurityError
4
- from .models import UploadParams
4
+ from .models import ExcelMetadataFile, UploadParams
5
5
  from .run import UploadRun
6
6
  from .utils import ExcelSecurityConfig, PathAwareJSONEncoder
7
7
 
@@ -9,6 +9,7 @@ __all__ = [
9
9
  'UploadAction',
10
10
  'UploadRun',
11
11
  'UploadParams',
12
+ 'ExcelMetadataFile',
12
13
  'UploadStatus',
13
14
  'LogCode',
14
15
  'LOG_MESSAGES',
@@ -1,14 +1,53 @@
1
- from pathlib import Path
2
1
  from typing import Annotated
3
2
 
4
- from pydantic import AfterValidator, BaseModel, ValidationInfo, field_validator
3
+ from pydantic import AfterValidator, BaseModel, ValidationInfo, field_validator, model_validator
5
4
  from pydantic_core import PydanticCustomError
6
5
 
7
6
  from synapse_sdk.clients.exceptions import ClientError
8
7
  from synapse_sdk.utils.pydantic.validators import non_blank
9
- from synapse_sdk.utils.storage import get_pathlib
10
8
 
11
- from .utils import ExcelSecurityConfig
9
+
10
+ class ExcelMetadataFile(BaseModel):
11
+ """Excel metadata configuration for base64 encoded data.
12
+
13
+ This model is used specifically for base64-encoded Excel metadata files,
14
+ typically from web frontends or API integrations.
15
+
16
+ Attributes:
17
+ data: Base64 encoded content of the Excel file
18
+ filename: Name of the original file before base64 encoding
19
+
20
+ Examples:
21
+ Base64 mode:
22
+ >>> config = ExcelMetadataFile(
23
+ ... data="UEsDBBQABgAI...",
24
+ ... filename="metadata.xlsx"
25
+ ... )
26
+ """
27
+
28
+ data: str
29
+ filename: str
30
+
31
+
32
+ class AssetConfig(BaseModel):
33
+ """Configuration for individual asset in multi-path mode.
34
+
35
+ Used when use_single_path=False to specify unique paths
36
+ and recursive settings for each file specification.
37
+
38
+ Attributes:
39
+ path (str): File system path for this specific asset
40
+ is_recursive (bool): Whether to recursively search subdirectories for this asset
41
+
42
+ Example:
43
+ >>> asset_config = AssetConfig(
44
+ ... path="/sensors/camera/front",
45
+ ... is_recursive=True
46
+ ... )
47
+ """
48
+
49
+ path: str
50
+ is_recursive: bool = True
12
51
 
13
52
 
14
53
  class UploadParams(BaseModel):
@@ -18,45 +57,93 @@ class UploadParams(BaseModel):
18
57
  Uses Pydantic for type validation and custom validators to ensure
19
58
  storage, data_collection, and project resources exist before processing.
20
59
 
60
+ Supports two modes controlled by use_single_path flag:
61
+
62
+ 1. Single Path Mode (use_single_path=True, DEFAULT):
63
+ Traditional mode - all file specifications share one base path.
64
+ Requires: path, is_recursive
65
+ Ignores: assets
66
+
67
+ 2. Multi-Path Mode (use_single_path=False):
68
+ Advanced mode - each file specification has its own path.
69
+ Requires: assets (dict with file spec names as keys)
70
+ Ignores: path, is_recursive
71
+
21
72
  Attributes:
22
73
  name (str): Human-readable name for the upload operation
23
74
  description (str | None): Optional description of the upload
24
- path (str): File system path to upload from
75
+ use_single_path (bool): Mode selector (True=single path, False=multi-path)
76
+ path (str | None): Base path for single path mode
77
+ is_recursive (bool): Global recursive setting for single path mode
78
+ assets (dict[str, AssetConfig] | None): Per-asset configs for multi-path mode
25
79
  storage (int): Storage ID where files will be uploaded
26
- data_collection (int): Data data_collection ID for organizing uploads
80
+ data_collection (int): Data collection ID for organizing uploads
27
81
  project (int | None): Optional project ID for grouping
28
- excel_metadata_path (str | None): Path to Excel metadata file
29
- is_recursive (bool): Whether to recursively process subdirectories
82
+ excel_metadata_path (str | None): Path to Excel metadata file (traditional, backward compatible)
83
+ Note: This parameter will be deprecated in a future version. Consider using excel_metadata instead.
84
+ excel_metadata (ExcelMetadataFile | None): Base64 encoded Excel metadata (for web/API integration)
85
+ Note: Cannot use both excel_metadata_path and excel_metadata simultaneously
30
86
  max_file_size_mb (int): Maximum file size limit in megabytes
31
87
  creating_data_unit_batch_size (int): Batch size for data unit creation
32
88
  use_async_upload (bool): Whether to use asynchronous upload processing
33
- extra_params (dict | None): Extra parameters for the action.
34
- Example: {"include_metadata": True, "compression": "gzip"}
89
+ extra_params (dict | None): Extra parameters for the action
35
90
 
36
91
  Validation:
37
92
  - name: Must be non-blank after validation
38
93
  - storage: Must exist and be accessible via client API
39
94
  - data_collection: Must exist and be accessible via client API
40
95
  - project: Must exist if specified, or can be None
41
- - excel_metadata_path: Must be valid Excel file if specified
42
-
43
- Example:
44
- >>> params = UploadParams(
45
- ... name="Data Upload",
46
- ... path="/data/files",
47
- ... storage=1,
48
- ... data_collection=5
49
- ... )
96
+ - use_single_path mode: Validates required fields per mode
97
+
98
+ Examples:
99
+ Single Path Mode (Traditional):
100
+ >>> params = UploadParams(
101
+ ... name="Standard Upload",
102
+ ... use_single_path=True,
103
+ ... path="/data/experiment_1",
104
+ ... is_recursive=True,
105
+ ... storage=1,
106
+ ... data_collection=5
107
+ ... )
108
+
109
+ Multi-Path Mode (Advanced):
110
+ >>> params = UploadParams(
111
+ ... name="Multi-Source Upload",
112
+ ... use_single_path=False,
113
+ ... assets={
114
+ ... "image_1": AssetConfig(path="/sensors/camera", is_recursive=True),
115
+ ... "pcd_1": AssetConfig(path="/sensors/lidar", is_recursive=False)
116
+ ... },
117
+ ... storage=1,
118
+ ... data_collection=5
119
+ ... )
50
120
  """
51
121
 
52
122
  name: Annotated[str, AfterValidator(non_blank)]
53
123
  description: str | None = None
54
- path: str
124
+
125
+ # Mode selector flag (True = single path mode, False = multi-path mode)
126
+ use_single_path: bool = True
127
+
128
+ # Single path mode fields (used when use_single_path=True)
129
+ path: str | None = None
130
+ is_recursive: bool = True
131
+
132
+ # Multi-path mode fields (used when use_single_path=False)
133
+ assets: dict[str, AssetConfig] | None = None
134
+
55
135
  storage: int
56
136
  data_collection: int
57
137
  project: int | None = None
138
+
139
+ # Excel metadata - two separate parameters for clarity:
140
+ # 1. excel_metadata_path: Simple file path string (backward compatible, traditional usage)
141
+ # NOTE: Will be deprecated in a future version. Consider using excel_metadata instead.
142
+ # 2. excel_metadata: Dictionary with base64 encoded data (new, for web/API integration)
143
+ # TODO: Plan to deprecate excel_metadata_path in a few versions for backward compatibility
58
144
  excel_metadata_path: str | None = None
59
- is_recursive: bool = True
145
+ excel_metadata: ExcelMetadataFile | None = None
146
+
60
147
  max_file_size_mb: int = 50
61
148
  creating_data_unit_batch_size: int = 1
62
149
  use_async_upload: bool = True
@@ -107,80 +194,33 @@ class UploadParams(BaseModel):
107
194
  raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
108
195
  return value
109
196
 
110
- @field_validator('excel_metadata_path', mode='after')
111
- @classmethod
112
- def check_excel_metadata_path(cls, value, info: ValidationInfo) -> str | None:
113
- if not value:
114
- return value
115
-
116
- # Validate file extension
117
- if not value.lower().endswith(('.xlsx', '.xls')):
118
- raise PydanticCustomError('invalid_file_type', 'Excel metadata file must be .xlsx or .xls format.')
119
-
120
- # Get storage and path from validation data
121
- if not (hasattr(info, 'data') and 'storage' in info.data and 'path' in info.data):
122
- # If we don't have storage/path data yet, just validate extension
123
- return value
124
-
125
- if info.context is None:
126
- raise PydanticCustomError('missing_context', 'Validation context is required.')
127
-
128
- action = info.context['action']
129
- client = action.client
130
-
131
- try:
132
- # Get storage configuration
133
- storage_id = info.data['storage']
134
- storage = client.get_storage(storage_id)
135
-
136
- # Skip file system validation if storage doesn't have provider (likely test environment)
137
- if not isinstance(storage, dict) or 'provider' not in storage:
138
- # Basic validation only - likely in test environment
139
- return value
140
-
141
- # Get the actual file system path using storage + path
142
- base_path = get_pathlib(storage, info.data['path'])
143
-
144
- # Support both absolute and relative paths
145
- if Path(value).is_absolute():
146
- excel_path = Path(value)
147
- else:
148
- excel_path = base_path / value
149
-
150
- if not excel_path.exists():
151
- raise PydanticCustomError('file_not_found', 'Excel metadata file not found.')
152
-
153
- # Validate file size
154
- file_size = excel_path.stat().st_size
155
- excel_config = ExcelSecurityConfig()
156
- if file_size > excel_config.MAX_FILE_SIZE_BYTES:
157
- max_size_mb = excel_config.MAX_FILE_SIZE_MB
197
+ @model_validator(mode='after')
198
+ def validate_path_configuration(self) -> 'UploadParams':
199
+ """Validate path configuration based on use_single_path mode."""
200
+ if self.use_single_path:
201
+ # Single path mode: requires path
202
+ if not self.path:
158
203
  raise PydanticCustomError(
159
- 'file_too_large',
160
- 'Excel metadata file is too large. Maximum size is {max_size_mb}MB.',
161
- {'max_size_mb': max_size_mb},
204
+ 'missing_path', "When use_single_path=true (single path mode), 'path' is required"
162
205
  )
206
+ # Warn if assets is provided in single path mode (it will be ignored)
207
+ # For now, we'll silently ignore it
208
+ else:
209
+ # Multi-path mode: requires assets
210
+ if not self.assets:
211
+ raise PydanticCustomError(
212
+ 'missing_assets',
213
+ "When use_single_path=false (multi-path mode), 'assets' must be provided "
214
+ 'with path configurations for each file specification',
215
+ )
216
+ # path and is_recursive are ignored in multi-path mode
163
217
 
164
- # Validate file format
165
- try:
166
- with open(excel_path, 'rb') as f:
167
- header = f.read(8)
168
- if not header:
169
- raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be empty.')
170
-
171
- if excel_path.suffix.lower() == '.xlsx':
172
- if not header.startswith(b'PK'):
173
- raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
174
- elif excel_path.suffix.lower() == '.xls':
175
- if not (header.startswith(b'\xd0\xcf\x11\xe0') or header.startswith(b'\x09\x08')):
176
- raise PydanticCustomError('invalid_file', 'Excel metadata file appears to be corrupted.')
177
-
178
- except (OSError, IOError):
179
- raise PydanticCustomError('file_access_error', 'Cannot access Excel metadata file.')
180
-
181
- except ClientError:
182
- raise PydanticCustomError('client_error', 'Error occurred while checking storage.')
183
- except Exception as e:
184
- raise PydanticCustomError('validation_error', f'Error validating Excel metadata file: {str(e)}')
218
+ # Validate excel metadata parameters - cannot use both at the same time
219
+ if self.excel_metadata_path and self.excel_metadata:
220
+ raise PydanticCustomError(
221
+ 'conflicting_excel_metadata',
222
+ "Cannot specify both 'excel_metadata_path' and 'excel_metadata'. "
223
+ "Use 'excel_metadata_path' for file paths or 'excel_metadata' for base64 encoded data.",
224
+ )
185
225
 
186
- return value
226
+ return self
@@ -21,8 +21,8 @@ class CleanupStep(BaseStep):
21
21
  def execute(self, context: UploadContext) -> StepResult:
22
22
  """Execute cleanup step."""
23
23
  try:
24
- # Cleanup temporary directory
25
- self._cleanup_temp_directory(context)
24
+ # Cleanup temporary directory - commented out because duplicated process with ray cleanup process
25
+ # self._cleanup_temp_directory(context)
26
26
 
27
27
  # Log completion
28
28
  context.run.log_message_with_code(LogCode.IMPORT_COMPLETED)