synapse-sdk 1.0.0b24__py3-none-any.whl → 2025.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/clients/agent/ray.py +50 -0
- synapse_sdk/devtools/docs/docs/api/clients/annotation-mixin.md +378 -0
- synapse_sdk/devtools/docs/docs/api/clients/backend.md +368 -1
- synapse_sdk/devtools/docs/docs/api/clients/core-mixin.md +477 -0
- synapse_sdk/devtools/docs/docs/api/clients/data-collection-mixin.md +422 -0
- synapse_sdk/devtools/docs/docs/api/clients/hitl-mixin.md +554 -0
- synapse_sdk/devtools/docs/docs/api/clients/index.md +391 -0
- synapse_sdk/devtools/docs/docs/api/clients/integration-mixin.md +571 -0
- synapse_sdk/devtools/docs/docs/api/clients/ml-mixin.md +578 -0
- synapse_sdk/devtools/docs/docs/api/clients/ray.md +23 -2
- synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +1463 -0
- synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +161 -34
- synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +1497 -213
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/annotation-mixin.md +289 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/backend.md +378 -11
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/core-mixin.md +417 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/data-collection-mixin.md +356 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/hitl-mixin.md +192 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/index.md +391 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/integration-mixin.md +479 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ml-mixin.md +284 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ray.md +23 -2
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +1463 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +161 -34
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +1752 -572
- synapse_sdk/devtools/docs/sidebars.ts +7 -0
- synapse_sdk/plugins/README.md +1 -2
- synapse_sdk/plugins/categories/base.py +23 -0
- synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
- synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
- synapse_sdk/plugins/categories/export/actions/export/action.py +160 -0
- synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
- synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
- synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
- synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
- synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
- synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +1 -1
- synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +1 -2
- synapse_sdk/plugins/categories/upload/actions/upload/action.py +154 -531
- synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
- synapse_sdk/plugins/categories/upload/actions/upload/factory.py +143 -0
- synapse_sdk/plugins/categories/upload/actions/upload/models.py +66 -29
- synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +182 -0
- synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +106 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +62 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +80 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +66 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +101 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +89 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +96 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +61 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +86 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +34 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +233 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +238 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/async_upload.py +109 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +43 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +45 -0
- synapse_sdk/plugins/categories/upload/actions/upload/utils.py +194 -83
- synapse_sdk/plugins/categories/upload/templates/config.yaml +4 -0
- synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +269 -0
- synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +71 -27
- synapse_sdk/plugins/models.py +5 -0
- {synapse_sdk-1.0.0b24.dist-info → synapse_sdk-2025.9.3.dist-info}/METADATA +3 -2
- {synapse_sdk-1.0.0b24.dist-info → synapse_sdk-2025.9.3.dist-info}/RECORD +81 -30
- synapse_sdk/plugins/categories/export/actions/export.py +0 -385
- synapse_sdk/plugins/categories/export/enums.py +0 -7
- {synapse_sdk-1.0.0b24.dist-info → synapse_sdk-2025.9.3.dist-info}/WHEEL +0 -0
- {synapse_sdk-1.0.0b24.dist-info → synapse_sdk-2025.9.3.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0b24.dist-info → synapse_sdk-2025.9.3.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-1.0.0b24.dist-info → synapse_sdk-2025.9.3.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import
|
|
2
|
+
from typing import Any, Dict, Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field, model_validator
|
|
3
5
|
|
|
4
6
|
|
|
5
7
|
class PathAwareJSONEncoder(json.JSONEncoder):
|
|
@@ -29,111 +31,220 @@ class PathAwareJSONEncoder(json.JSONEncoder):
|
|
|
29
31
|
return super().default(obj)
|
|
30
32
|
|
|
31
33
|
|
|
32
|
-
class ExcelSecurityConfig:
|
|
33
|
-
"""
|
|
34
|
+
class ExcelSecurityConfig(BaseModel):
|
|
35
|
+
"""Security configuration for Excel file processing using Pydantic.
|
|
34
36
|
|
|
35
|
-
|
|
36
|
-
resource exhaustion and
|
|
37
|
-
be configured via environment variables.
|
|
37
|
+
Defines essential security limits for Excel file processing to prevent
|
|
38
|
+
resource exhaustion attacks and ensure safe handling of potentially malicious files.
|
|
38
39
|
|
|
39
40
|
Attributes:
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
MAX_MEMORY_USAGE_BYTES (int): Maximum memory usage in bytes
|
|
44
|
-
MAX_ROWS (int): Maximum number of rows allowed
|
|
45
|
-
MAX_COLUMNS (int): Maximum number of columns allowed
|
|
46
|
-
MAX_FILENAME_LENGTH (int): Maximum filename length
|
|
47
|
-
MAX_COLUMN_NAME_LENGTH (int): Maximum column name length
|
|
48
|
-
MAX_METADATA_VALUE_LENGTH (int): Maximum metadata value length
|
|
49
|
-
|
|
50
|
-
Environment Variables:
|
|
51
|
-
EXCEL_MAX_FILE_SIZE_MB: Override default file size limit (default: 10)
|
|
52
|
-
EXCEL_MAX_MEMORY_MB: Override default memory limit (default: 30)
|
|
53
|
-
EXCEL_MAX_ROWS: Override default row limit (default: 10000)
|
|
54
|
-
EXCEL_MAX_COLUMNS: Override default column limit (default: 50)
|
|
55
|
-
EXCEL_MAX_FILENAME_LENGTH: Override filename length limit (default: 255)
|
|
56
|
-
EXCEL_MAX_COLUMN_NAME_LENGTH: Override column name length (default: 100)
|
|
57
|
-
EXCEL_MAX_METADATA_VALUE_LENGTH: Override metadata value length (default: 1000)
|
|
41
|
+
max_file_size_mb (int): Maximum file size in megabytes (default: 10)
|
|
42
|
+
max_rows (int): Maximum number of rows allowed (default: 100000)
|
|
43
|
+
max_columns (int): Maximum number of columns allowed (default: 50)
|
|
58
44
|
"""
|
|
59
45
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
46
|
+
max_file_size_mb: int = Field(
|
|
47
|
+
default=10,
|
|
48
|
+
ge=1,
|
|
49
|
+
le=1000,
|
|
50
|
+
description='Maximum file size in megabytes',
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
max_rows: int = Field(
|
|
54
|
+
default=100000,
|
|
55
|
+
ge=1,
|
|
56
|
+
le=100000,
|
|
57
|
+
description='Maximum number of rows allowed',
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
max_columns: int = Field(
|
|
61
|
+
default=50,
|
|
62
|
+
ge=1,
|
|
63
|
+
le=16384, # Excel's column limit
|
|
64
|
+
description='Maximum number of columns allowed',
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
max_memory_usage_mb: int = Field(
|
|
68
|
+
default=30,
|
|
69
|
+
ge=1,
|
|
70
|
+
le=1000,
|
|
71
|
+
description='Maximum memory usage in megabytes',
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
max_filename_length: int = Field(
|
|
75
|
+
default=255,
|
|
76
|
+
ge=1,
|
|
77
|
+
le=1000,
|
|
78
|
+
description='Maximum filename length',
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
max_column_name_length: int = Field(
|
|
82
|
+
default=100,
|
|
83
|
+
ge=1,
|
|
84
|
+
le=500,
|
|
85
|
+
description='Maximum column name length',
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
max_metadata_value_length: int = Field(
|
|
89
|
+
default=1000,
|
|
90
|
+
ge=1,
|
|
91
|
+
le=10000,
|
|
92
|
+
description='Maximum metadata value length',
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
validation_check_interval: int = Field(
|
|
96
|
+
default=1000,
|
|
97
|
+
ge=100,
|
|
98
|
+
le=10000,
|
|
99
|
+
description='Validation check interval for processing',
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
model_config = {'validate_assignment': True, 'extra': 'forbid'}
|
|
103
|
+
|
|
104
|
+
@model_validator(mode='after')
|
|
105
|
+
def validate_resource_limits(self) -> 'ExcelSecurityConfig':
|
|
106
|
+
"""Validate that resource limits are reasonable."""
|
|
107
|
+
# Check for unreasonable combinations
|
|
108
|
+
estimated_cells = self.max_rows * self.max_columns
|
|
109
|
+
if estimated_cells > 50000000: # 50 million cells
|
|
110
|
+
raise ValueError(
|
|
111
|
+
f'Combination of max_rows ({self.max_rows}) and max_columns ({self.max_columns}) '
|
|
112
|
+
f'would allow too many cells ({estimated_cells:,})'
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return self
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def max_file_size_bytes(self) -> int:
|
|
119
|
+
"""Get maximum file size in bytes."""
|
|
120
|
+
return self.max_file_size_mb * 1024 * 1024
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def max_memory_usage_bytes(self) -> int:
|
|
124
|
+
"""Get maximum memory usage in bytes."""
|
|
125
|
+
return self.max_memory_usage_mb * 1024 * 1024
|
|
126
|
+
|
|
127
|
+
# Backward compatibility properties (uppercase versions)
|
|
128
|
+
@property
|
|
129
|
+
def MAX_FILE_SIZE_MB(self) -> int:
|
|
130
|
+
"""Backward compatibility property."""
|
|
131
|
+
return self.max_file_size_mb
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def MAX_FILE_SIZE_BYTES(self) -> int:
|
|
135
|
+
"""Backward compatibility property."""
|
|
136
|
+
return self.max_file_size_bytes
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def MAX_MEMORY_USAGE_MB(self) -> int:
|
|
140
|
+
"""Backward compatibility property."""
|
|
141
|
+
return self.max_memory_usage_mb
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def MAX_MEMORY_USAGE_BYTES(self) -> int:
|
|
145
|
+
"""Backward compatibility property."""
|
|
146
|
+
return self.max_memory_usage_bytes
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def MAX_ROWS(self) -> int:
|
|
150
|
+
"""Backward compatibility property."""
|
|
151
|
+
return self.max_rows
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def MAX_COLUMNS(self) -> int:
|
|
155
|
+
"""Backward compatibility property."""
|
|
156
|
+
return self.max_columns
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def MAX_FILENAME_LENGTH(self) -> int:
|
|
160
|
+
"""Backward compatibility property."""
|
|
161
|
+
return self.max_filename_length
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def MAX_COLUMN_NAME_LENGTH(self) -> int:
|
|
165
|
+
"""Backward compatibility property."""
|
|
166
|
+
return self.max_column_name_length
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def MAX_METADATA_VALUE_LENGTH(self) -> int:
|
|
170
|
+
"""Backward compatibility property."""
|
|
171
|
+
return self.max_metadata_value_length
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def VALIDATION_CHECK_INTERVAL(self) -> int:
|
|
175
|
+
"""Backward compatibility property."""
|
|
176
|
+
return self.validation_check_interval
|
|
177
|
+
|
|
178
|
+
@classmethod
|
|
179
|
+
def from_action_config(cls, action_config: Optional[Dict[str, Any]]) -> 'ExcelSecurityConfig':
|
|
180
|
+
"""Create ExcelSecurityConfig from plugin action configuration (config.yaml).
|
|
66
181
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
self.MAX_FILENAME_LENGTH = int(os.getenv('EXCEL_MAX_FILENAME_LENGTH', '255'))
|
|
71
|
-
self.MAX_COLUMN_NAME_LENGTH = int(os.getenv('EXCEL_MAX_COLUMN_NAME_LENGTH', '100'))
|
|
72
|
-
self.MAX_METADATA_VALUE_LENGTH = int(os.getenv('EXCEL_MAX_METADATA_VALUE_LENGTH', '1000'))
|
|
182
|
+
Args:
|
|
183
|
+
action_config: Action configuration dictionary from config.yaml
|
|
73
184
|
|
|
185
|
+
Returns:
|
|
186
|
+
New ExcelSecurityConfig instance with config.yaml values
|
|
187
|
+
|
|
188
|
+
Example config.yaml:
|
|
189
|
+
actions:
|
|
190
|
+
upload:
|
|
191
|
+
excel_config:
|
|
192
|
+
max_file_size_mb: 25
|
|
193
|
+
max_rows: 50000
|
|
194
|
+
max_columns: 100
|
|
195
|
+
"""
|
|
196
|
+
if not action_config or 'excel_config' not in action_config:
|
|
197
|
+
return cls()
|
|
74
198
|
|
|
75
|
-
|
|
76
|
-
"""Utility class for Excel metadata processing and validation.
|
|
199
|
+
excel_config = action_config['excel_config']
|
|
77
200
|
|
|
78
|
-
|
|
79
|
-
|
|
201
|
+
return cls(
|
|
202
|
+
max_file_size_mb=excel_config.get('max_file_size_mb', 10),
|
|
203
|
+
max_rows=excel_config.get('max_rows', 100000),
|
|
204
|
+
max_columns=excel_config.get('max_columns', 50),
|
|
205
|
+
max_memory_usage_mb=excel_config.get('max_memory_usage_mb', 30),
|
|
206
|
+
max_filename_length=excel_config.get('max_filename_length', 255),
|
|
207
|
+
max_column_name_length=excel_config.get('max_column_name_length', 100),
|
|
208
|
+
max_metadata_value_length=excel_config.get('max_metadata_value_length', 1000),
|
|
209
|
+
validation_check_interval=excel_config.get('validation_check_interval', 1000),
|
|
210
|
+
)
|
|
80
211
|
|
|
81
|
-
Args:
|
|
82
|
-
config (ExcelSecurityConfig): Security configuration instance
|
|
83
212
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
>>> utils = ExcelMetadataUtils(config)
|
|
87
|
-
>>> safe_value = utils.validate_and_truncate_string("long text", 10)
|
|
88
|
-
>>> is_valid = utils.is_valid_filename_length("file.xlsx")
|
|
89
|
-
"""
|
|
213
|
+
class ExcelMetadataUtils:
|
|
214
|
+
"""Utility class for Excel metadata processing."""
|
|
90
215
|
|
|
91
216
|
def __init__(self, config: ExcelSecurityConfig):
|
|
217
|
+
"""Initialize with Excel security configuration."""
|
|
92
218
|
self.config = config
|
|
93
219
|
|
|
94
|
-
def
|
|
95
|
-
"""
|
|
96
|
-
|
|
97
|
-
Converts non-string values to strings, trims whitespace, and
|
|
98
|
-
truncates to the specified maximum length if necessary.
|
|
99
|
-
|
|
100
|
-
Args:
|
|
101
|
-
value (str): Value to validate and truncate
|
|
102
|
-
max_length (int): Maximum allowed length
|
|
103
|
-
|
|
104
|
-
Returns:
|
|
105
|
-
str: Validated and truncated string
|
|
220
|
+
def is_valid_filename_length(self, filename: str) -> bool:
|
|
221
|
+
"""Check if filename length is within limits."""
|
|
222
|
+
return len(filename) <= self.config.max_filename_length
|
|
106
223
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
'long '
|
|
110
|
-
"""
|
|
224
|
+
def validate_and_truncate_string(self, value: str, max_length: int) -> str:
|
|
225
|
+
"""Validate and truncate string to maximum length."""
|
|
111
226
|
if not isinstance(value, str):
|
|
112
227
|
value = str(value)
|
|
113
228
|
|
|
229
|
+
# Strip whitespace
|
|
114
230
|
value = value.strip()
|
|
115
231
|
|
|
232
|
+
# Truncate if too long
|
|
116
233
|
if len(value) > max_length:
|
|
117
|
-
|
|
234
|
+
value = value[:max_length]
|
|
118
235
|
|
|
119
236
|
return value
|
|
120
237
|
|
|
121
|
-
def
|
|
122
|
-
"""Check if
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
238
|
+
def is_valid_column_name(self, column_name: str) -> bool:
|
|
239
|
+
"""Check if column name is valid."""
|
|
240
|
+
if not column_name or not isinstance(column_name, str):
|
|
241
|
+
return False
|
|
242
|
+
return len(column_name.strip()) <= self.config.max_column_name_length
|
|
126
243
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
>>> utils.is_valid_filename_length("file.xlsx")
|
|
135
|
-
True
|
|
136
|
-
>>> utils.is_valid_filename_length("x" * 300)
|
|
137
|
-
False
|
|
138
|
-
"""
|
|
139
|
-
return len(filename.strip()) <= self.config.MAX_FILENAME_LENGTH
|
|
244
|
+
def is_valid_metadata_value(self, value: str) -> bool:
|
|
245
|
+
"""Check if metadata value is valid."""
|
|
246
|
+
if value is None:
|
|
247
|
+
return True
|
|
248
|
+
if not isinstance(value, str):
|
|
249
|
+
value = str(value)
|
|
250
|
+
return len(value) <= self.config.max_metadata_value_length
|
|
@@ -3,6 +3,10 @@ actions:
|
|
|
3
3
|
entrypoint: plugin.upload.Uploader
|
|
4
4
|
options:
|
|
5
5
|
supported_data_type: image # A primary data type of synapse backend collection. (e.g. 'image', 'text', 'video', 'pcd', 'audio')
|
|
6
|
+
excel_config: # Configuration for Excel file uploads
|
|
7
|
+
max_file_size_mb: 10
|
|
8
|
+
max_rows: 100000
|
|
9
|
+
max_columns: 50
|
|
6
10
|
ui_schema: # UI schema for the input of extra params
|
|
7
11
|
- $formkit: "radio"
|
|
8
12
|
name: "file_format"
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Dict, List
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BaseUploader:
|
|
6
|
+
"""Base class for upload plugins with common functionality.
|
|
7
|
+
|
|
8
|
+
This class handles common tasks like file organization, validation, and metadata
|
|
9
|
+
that are shared across all upload plugins. Plugin developers should inherit
|
|
10
|
+
from this class and implement the required methods for their specific logic.
|
|
11
|
+
|
|
12
|
+
Core Methods:
|
|
13
|
+
handle_upload_files(): Main upload method - handles the complete upload workflow
|
|
14
|
+
organize_files(): Handle file organization logic (can be overridden)
|
|
15
|
+
validate_files(): Handle file validation logic (can be overridden)
|
|
16
|
+
|
|
17
|
+
Required Methods (should be implemented by subclasses):
|
|
18
|
+
process_files(): Transform/process files during upload
|
|
19
|
+
|
|
20
|
+
Optional Methods (can be overridden by subclasses):
|
|
21
|
+
before_process(): Pre-process files before main processing
|
|
22
|
+
after_process(): Post-process files after main processing
|
|
23
|
+
setup_directories(): Setup custom directories
|
|
24
|
+
validate_file_types(): Custom file type validation
|
|
25
|
+
|
|
26
|
+
Helper Methods:
|
|
27
|
+
_log_validation_warning(): Log validation warnings
|
|
28
|
+
_log_conversion_warning(): Log conversion warnings
|
|
29
|
+
_filter_valid_files(): Filter files based on validation
|
|
30
|
+
|
|
31
|
+
Auto-provided Utilities:
|
|
32
|
+
Logging via self.run.log_message() and other run methods
|
|
33
|
+
File path utilities via self.path
|
|
34
|
+
Specification access via self.file_specification
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
run,
|
|
40
|
+
path: Path,
|
|
41
|
+
file_specification: List = None,
|
|
42
|
+
organized_files: List = None,
|
|
43
|
+
extra_params: Dict = None,
|
|
44
|
+
):
|
|
45
|
+
"""Initialize the base upload class.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
run: Plugin run object with logging capabilities.
|
|
49
|
+
path: Path object pointing to the upload target directory.
|
|
50
|
+
file_specification: List of specifications that define the structure of files to be uploaded.
|
|
51
|
+
organized_files: List of pre-organized files based on the default logic.
|
|
52
|
+
extra_params: Additional parameters for customization.
|
|
53
|
+
"""
|
|
54
|
+
self.run = run
|
|
55
|
+
self.path = path
|
|
56
|
+
self.file_specification = file_specification or []
|
|
57
|
+
self.organized_files = organized_files or []
|
|
58
|
+
self.extra_params = extra_params or {}
|
|
59
|
+
|
|
60
|
+
def _log_validation_warning(self, spec_name: str, invalid_extensions: List[str], expected_extensions: List[str]):
|
|
61
|
+
"""Log validation warning for invalid file extensions."""
|
|
62
|
+
self.run.log_message(
|
|
63
|
+
f"Validation warning in '{spec_name}': File extensions {invalid_extensions} do not match expected extensions {expected_extensions}. These files will be excluded from upload."
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def _log_conversion_warning(self, spec_name: str, extension: str, recommended_formats: str):
|
|
67
|
+
"""Log conversion warning for file formats that may need conversion."""
|
|
68
|
+
self.run.log_message(
|
|
69
|
+
f"Conversion warning in '{spec_name}': File extension '{extension}' may require conversion to [{recommended_formats}]."
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
def _filter_valid_files(self, files_to_validate: List) -> List:
|
|
73
|
+
"""Filter files based on validation criteria.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
files_to_validate: List of organized file dictionaries to validate
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
List: Filtered list containing only valid files
|
|
80
|
+
"""
|
|
81
|
+
return files_to_validate # Default: return all files
|
|
82
|
+
|
|
83
|
+
def get_file_extensions_config(self) -> Dict[str, List[str]]:
|
|
84
|
+
"""Get allowed file extensions configuration.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Dict mapping file categories to allowed extensions
|
|
88
|
+
"""
|
|
89
|
+
return {
|
|
90
|
+
'pcd': ['.pcd'],
|
|
91
|
+
'text': ['.txt', '.html'],
|
|
92
|
+
'audio': ['.wav', '.mp3'],
|
|
93
|
+
'data': ['.bin', '.json', '.fbx'],
|
|
94
|
+
'image': ['.jpg', '.jpeg', '.png'],
|
|
95
|
+
'video': ['.mp4'],
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
def get_conversion_warnings_config(self) -> Dict[str, str]:
|
|
99
|
+
"""Get file conversion warnings configuration.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Dict mapping problematic extensions to recommended formats
|
|
103
|
+
"""
|
|
104
|
+
return {
|
|
105
|
+
'.tif': ' .jpg, .png',
|
|
106
|
+
'.tiff': ' .jpg, .png',
|
|
107
|
+
'.avi': ' .mp4',
|
|
108
|
+
'.mov': ' .mp4',
|
|
109
|
+
'.mkv': ' .mp4',
|
|
110
|
+
'.wmv': ' .mp4',
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# Abstract methods that should be implemented by subclasses
|
|
114
|
+
def process_files(self, organized_files: List) -> List:
|
|
115
|
+
"""Process files. Should be implemented by subclasses."""
|
|
116
|
+
return organized_files
|
|
117
|
+
|
|
118
|
+
def before_process(self, organized_files: List) -> List:
|
|
119
|
+
"""Pre-process files before main processing. Can be overridden by subclasses."""
|
|
120
|
+
return organized_files
|
|
121
|
+
|
|
122
|
+
def after_process(self, processed_files: List) -> List:
|
|
123
|
+
"""Post-process files after main processing. Can be overridden by subclasses."""
|
|
124
|
+
return processed_files
|
|
125
|
+
|
|
126
|
+
def organize_files(self, files: List) -> List:
|
|
127
|
+
"""Organize files. Can be overridden by subclasses."""
|
|
128
|
+
return files
|
|
129
|
+
|
|
130
|
+
def validate_files(self, files: List) -> List:
|
|
131
|
+
"""Validate files. Can be overridden by subclasses."""
|
|
132
|
+
return self._filter_valid_files(files)
|
|
133
|
+
|
|
134
|
+
def setup_directories(self) -> None:
|
|
135
|
+
"""Setup custom directories. Can be overridden by subclasses."""
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
def validate_file_types(self, organized_files: List) -> List:
|
|
139
|
+
"""Validate file types against specifications with comprehensive filtering logic.
|
|
140
|
+
|
|
141
|
+
This method implements the complete validation logic from legacy code,
|
|
142
|
+
filtering out files that don't match their expected specifications.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
organized_files: List of organized file dictionaries
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
List: Filtered list containing only valid files that match specifications
|
|
149
|
+
"""
|
|
150
|
+
if not organized_files or not self.file_specification:
|
|
151
|
+
return organized_files
|
|
152
|
+
|
|
153
|
+
valid_files = []
|
|
154
|
+
allowed_extensions = self.get_file_extensions_config()
|
|
155
|
+
conversion_warnings = self.get_conversion_warnings_config()
|
|
156
|
+
warning_extensions = list(conversion_warnings.keys())
|
|
157
|
+
all_violation_case = {}
|
|
158
|
+
|
|
159
|
+
for file_group in organized_files:
|
|
160
|
+
files_dict = file_group.get('files', {})
|
|
161
|
+
invalid_case = {}
|
|
162
|
+
warning_case = {}
|
|
163
|
+
|
|
164
|
+
for spec_name, file_path in files_dict.items():
|
|
165
|
+
# Find the specification for this file type
|
|
166
|
+
file_spec = next((s for s in self.file_specification if s['name'] == spec_name), None)
|
|
167
|
+
if not file_spec:
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
# Handle file path lists
|
|
171
|
+
if isinstance(file_path, list):
|
|
172
|
+
file_path = file_path[0] if len(file_path) == 1 else file_path
|
|
173
|
+
|
|
174
|
+
# Extract file information
|
|
175
|
+
file_category = spec_name.split('_')[0]
|
|
176
|
+
file_type = file_spec['file_type']
|
|
177
|
+
file_extension = file_path.suffix.lower()
|
|
178
|
+
|
|
179
|
+
# Check if file needs conversion warning (these files will be excluded)
|
|
180
|
+
if file_extension in warning_extensions:
|
|
181
|
+
case = invalid_case.get(spec_name, {})
|
|
182
|
+
case['warning'] = case.get('warning', []) + [file_extension]
|
|
183
|
+
warning_case[spec_name] = case
|
|
184
|
+
break
|
|
185
|
+
|
|
186
|
+
# Validate against file category (e.g., 'image', 'data', etc.)
|
|
187
|
+
if file_category in allowed_extensions.keys():
|
|
188
|
+
if file_extension in allowed_extensions[file_category]:
|
|
189
|
+
continue # Valid file
|
|
190
|
+
else:
|
|
191
|
+
case = invalid_case.get(spec_name, {})
|
|
192
|
+
case['invalid'] = case.get('invalid', []) + [file_extension]
|
|
193
|
+
case['expected'] = allowed_extensions[file_category]
|
|
194
|
+
invalid_case[spec_name] = case
|
|
195
|
+
break
|
|
196
|
+
|
|
197
|
+
# Validate against file type from specification
|
|
198
|
+
if file_type in allowed_extensions.keys():
|
|
199
|
+
if file_extension in allowed_extensions[file_type]:
|
|
200
|
+
continue # Valid file
|
|
201
|
+
else:
|
|
202
|
+
case = invalid_case.get(spec_name, {})
|
|
203
|
+
case['invalid'] = case.get('invalid', []) + [file_extension]
|
|
204
|
+
case['expected'] = allowed_extensions[file_type]
|
|
205
|
+
invalid_case[spec_name] = case
|
|
206
|
+
break
|
|
207
|
+
|
|
208
|
+
# If violations found, exclude this file group
|
|
209
|
+
if invalid_case or warning_case:
|
|
210
|
+
all_violation_case[spec_name] = {
|
|
211
|
+
'invalid': invalid_case.get(spec_name, {}),
|
|
212
|
+
'warning': warning_case.get(spec_name, {}),
|
|
213
|
+
}
|
|
214
|
+
continue # Skip this file group
|
|
215
|
+
|
|
216
|
+
# No violations - add to valid files
|
|
217
|
+
valid_files.append(file_group)
|
|
218
|
+
|
|
219
|
+
# Log all violations found during validation
|
|
220
|
+
self._log_all_violations(all_violation_case, conversion_warnings)
|
|
221
|
+
|
|
222
|
+
return valid_files
|
|
223
|
+
|
|
224
|
+
def _log_all_violations(self, all_violation_case: Dict, conversion_warnings: Dict):
|
|
225
|
+
"""Log all validation violations found during file validation."""
|
|
226
|
+
for spec_name, violation_info in all_violation_case.items():
|
|
227
|
+
if violation_info['invalid']:
|
|
228
|
+
self.run.log_message(
|
|
229
|
+
f"Validation warning in '{spec_name}': File extensions {violation_info['invalid']['invalid']} do not match expected extensions {violation_info['invalid']['expected']}. These files will be excluded from upload."
|
|
230
|
+
)
|
|
231
|
+
if violation_info['warning']:
|
|
232
|
+
for warning in violation_info['warning']['warning']:
|
|
233
|
+
if warning in conversion_warnings:
|
|
234
|
+
self.run.log_message(
|
|
235
|
+
f"Conversion warning in '{spec_name}': File extension '{warning}' may require conversion to [{conversion_warnings[warning]}]."
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
def handle_upload_files(self) -> List:
|
|
239
|
+
"""Main upload method that handles the complete upload workflow.
|
|
240
|
+
|
|
241
|
+
This method provides the core workflow for upload plugins:
|
|
242
|
+
setup_directories -> organize_files -> before_process -> process_files ->
|
|
243
|
+
after_process -> validate_files
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
List: The final processed and validated list of files ready for upload.
|
|
247
|
+
"""
|
|
248
|
+
# Setup any required directories
|
|
249
|
+
self.setup_directories()
|
|
250
|
+
|
|
251
|
+
# Start with organized files from the workflow
|
|
252
|
+
current_files = self.organized_files
|
|
253
|
+
|
|
254
|
+
# Apply organization logic
|
|
255
|
+
current_files = self.organize_files(current_files)
|
|
256
|
+
|
|
257
|
+
# Pre-process files
|
|
258
|
+
current_files = self.before_process(current_files)
|
|
259
|
+
|
|
260
|
+
# Main processing step
|
|
261
|
+
current_files = self.process_files(current_files)
|
|
262
|
+
|
|
263
|
+
# Post-process files
|
|
264
|
+
current_files = self.after_process(current_files)
|
|
265
|
+
|
|
266
|
+
# Final validation
|
|
267
|
+
current_files = self.validate_files(current_files)
|
|
268
|
+
|
|
269
|
+
return current_files
|