synapse-sdk 2025.9.1__py3-none-any.whl → 2025.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

Files changed (81) hide show
  1. synapse_sdk/devtools/docs/docs/api/clients/annotation-mixin.md +378 -0
  2. synapse_sdk/devtools/docs/docs/api/clients/backend.md +368 -1
  3. synapse_sdk/devtools/docs/docs/api/clients/core-mixin.md +477 -0
  4. synapse_sdk/devtools/docs/docs/api/clients/data-collection-mixin.md +422 -0
  5. synapse_sdk/devtools/docs/docs/api/clients/hitl-mixin.md +554 -0
  6. synapse_sdk/devtools/docs/docs/api/clients/index.md +391 -0
  7. synapse_sdk/devtools/docs/docs/api/clients/integration-mixin.md +571 -0
  8. synapse_sdk/devtools/docs/docs/api/clients/ml-mixin.md +578 -0
  9. synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +1463 -0
  10. synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +161 -34
  11. synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +1497 -213
  12. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/annotation-mixin.md +289 -0
  13. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/backend.md +378 -11
  14. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/core-mixin.md +417 -0
  15. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/data-collection-mixin.md +356 -0
  16. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/hitl-mixin.md +192 -0
  17. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/index.md +391 -0
  18. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/integration-mixin.md +479 -0
  19. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ml-mixin.md +284 -0
  20. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +1463 -0
  21. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +161 -34
  22. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +1752 -572
  23. synapse_sdk/devtools/docs/sidebars.ts +7 -0
  24. synapse_sdk/plugins/README.md +1 -2
  25. synapse_sdk/plugins/categories/base.py +7 -0
  26. synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
  27. synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
  28. synapse_sdk/plugins/categories/export/actions/export/action.py +160 -0
  29. synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
  30. synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
  31. synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
  32. synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
  33. synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
  34. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +1 -1
  35. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +1 -2
  36. synapse_sdk/plugins/categories/upload/actions/upload/action.py +154 -531
  37. synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
  38. synapse_sdk/plugins/categories/upload/actions/upload/factory.py +143 -0
  39. synapse_sdk/plugins/categories/upload/actions/upload/models.py +66 -29
  40. synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +182 -0
  41. synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
  42. synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
  43. synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +106 -0
  44. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
  45. synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +62 -0
  46. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +80 -0
  47. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +66 -0
  48. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +101 -0
  49. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +89 -0
  50. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +96 -0
  51. synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +61 -0
  52. synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
  53. synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +86 -0
  54. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
  55. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
  56. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +34 -0
  57. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
  58. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +233 -0
  59. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +253 -0
  60. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
  61. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
  62. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
  63. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
  64. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/async_upload.py +109 -0
  65. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +43 -0
  66. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
  67. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +45 -0
  68. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +194 -83
  69. synapse_sdk/plugins/categories/upload/templates/config.yaml +4 -0
  70. synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +269 -0
  71. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +71 -27
  72. synapse_sdk/plugins/models.py +7 -0
  73. synapse_sdk/shared/__init__.py +21 -0
  74. {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/METADATA +2 -1
  75. {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/RECORD +79 -28
  76. synapse_sdk/plugins/categories/export/actions/export.py +0 -385
  77. synapse_sdk/plugins/categories/export/enums.py +0 -7
  78. {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/WHEEL +0 -0
  79. {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/entry_points.txt +0 -0
  80. {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/licenses/LICENSE +0 -0
  81. {synapse_sdk-2025.9.1.dist-info → synapse_sdk-2025.9.4.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,7 @@
1
1
  import json
2
- import os
2
+ from typing import Any, Dict, Optional
3
+
4
+ from pydantic import BaseModel, Field, model_validator
3
5
 
4
6
 
5
7
  class PathAwareJSONEncoder(json.JSONEncoder):
@@ -29,111 +31,220 @@ class PathAwareJSONEncoder(json.JSONEncoder):
29
31
  return super().default(obj)
30
32
 
31
33
 
32
- class ExcelSecurityConfig:
33
- """Configuration class for Excel file security limits.
34
+ class ExcelSecurityConfig(BaseModel):
35
+ """Security configuration for Excel file processing using Pydantic.
34
36
 
35
- Manages security constraints for Excel file processing to prevent
36
- resource exhaustion and security vulnerabilities. All limits can
37
- be configured via environment variables.
37
+ Defines essential security limits for Excel file processing to prevent
38
+ resource exhaustion attacks and ensure safe handling of potentially malicious files.
38
39
 
39
40
  Attributes:
40
- MAX_FILE_SIZE_MB (int): Maximum file size in megabytes
41
- MAX_FILE_SIZE_BYTES (int): Maximum file size in bytes
42
- MAX_MEMORY_USAGE_MB (int): Maximum memory usage in megabytes
43
- MAX_MEMORY_USAGE_BYTES (int): Maximum memory usage in bytes
44
- MAX_ROWS (int): Maximum number of rows allowed
45
- MAX_COLUMNS (int): Maximum number of columns allowed
46
- MAX_FILENAME_LENGTH (int): Maximum filename length
47
- MAX_COLUMN_NAME_LENGTH (int): Maximum column name length
48
- MAX_METADATA_VALUE_LENGTH (int): Maximum metadata value length
49
-
50
- Environment Variables:
51
- EXCEL_MAX_FILE_SIZE_MB: Override default file size limit (default: 10)
52
- EXCEL_MAX_MEMORY_MB: Override default memory limit (default: 30)
53
- EXCEL_MAX_ROWS: Override default row limit (default: 10000)
54
- EXCEL_MAX_COLUMNS: Override default column limit (default: 50)
55
- EXCEL_MAX_FILENAME_LENGTH: Override filename length limit (default: 255)
56
- EXCEL_MAX_COLUMN_NAME_LENGTH: Override column name length (default: 100)
57
- EXCEL_MAX_METADATA_VALUE_LENGTH: Override metadata value length (default: 1000)
41
+ max_file_size_mb (int): Maximum file size in megabytes (default: 10)
42
+ max_rows (int): Maximum number of rows allowed (default: 100000)
43
+ max_columns (int): Maximum number of columns allowed (default: 50)
58
44
  """
59
45
 
60
- def __init__(self):
61
- self.MAX_FILE_SIZE_MB = int(os.getenv('EXCEL_MAX_FILE_SIZE_MB', '10'))
62
- self.MAX_FILE_SIZE_BYTES = self.MAX_FILE_SIZE_MB * 1024 * 1024
63
-
64
- self.MAX_MEMORY_USAGE_MB = int(os.getenv('EXCEL_MAX_MEMORY_MB', '30'))
65
- self.MAX_MEMORY_USAGE_BYTES = self.MAX_MEMORY_USAGE_MB * 1024 * 1024
46
+ max_file_size_mb: int = Field(
47
+ default=10,
48
+ ge=1,
49
+ le=1000,
50
+ description='Maximum file size in megabytes',
51
+ )
52
+
53
+ max_rows: int = Field(
54
+ default=100000,
55
+ ge=1,
56
+ le=100000,
57
+ description='Maximum number of rows allowed',
58
+ )
59
+
60
+ max_columns: int = Field(
61
+ default=50,
62
+ ge=1,
63
+ le=16384, # Excel's column limit
64
+ description='Maximum number of columns allowed',
65
+ )
66
+
67
+ max_memory_usage_mb: int = Field(
68
+ default=30,
69
+ ge=1,
70
+ le=1000,
71
+ description='Maximum memory usage in megabytes',
72
+ )
73
+
74
+ max_filename_length: int = Field(
75
+ default=255,
76
+ ge=1,
77
+ le=1000,
78
+ description='Maximum filename length',
79
+ )
80
+
81
+ max_column_name_length: int = Field(
82
+ default=100,
83
+ ge=1,
84
+ le=500,
85
+ description='Maximum column name length',
86
+ )
87
+
88
+ max_metadata_value_length: int = Field(
89
+ default=1000,
90
+ ge=1,
91
+ le=10000,
92
+ description='Maximum metadata value length',
93
+ )
94
+
95
+ validation_check_interval: int = Field(
96
+ default=1000,
97
+ ge=100,
98
+ le=10000,
99
+ description='Validation check interval for processing',
100
+ )
101
+
102
+ model_config = {'validate_assignment': True, 'extra': 'forbid'}
103
+
104
+ @model_validator(mode='after')
105
+ def validate_resource_limits(self) -> 'ExcelSecurityConfig':
106
+ """Validate that resource limits are reasonable."""
107
+ # Check for unreasonable combinations
108
+ estimated_cells = self.max_rows * self.max_columns
109
+ if estimated_cells > 50000000: # 50 million cells
110
+ raise ValueError(
111
+ f'Combination of max_rows ({self.max_rows}) and max_columns ({self.max_columns}) '
112
+ f'would allow too many cells ({estimated_cells:,})'
113
+ )
114
+
115
+ return self
116
+
117
+ @property
118
+ def max_file_size_bytes(self) -> int:
119
+ """Get maximum file size in bytes."""
120
+ return self.max_file_size_mb * 1024 * 1024
121
+
122
+ @property
123
+ def max_memory_usage_bytes(self) -> int:
124
+ """Get maximum memory usage in bytes."""
125
+ return self.max_memory_usage_mb * 1024 * 1024
126
+
127
+ # Backward compatibility properties (uppercase versions)
128
+ @property
129
+ def MAX_FILE_SIZE_MB(self) -> int:
130
+ """Backward compatibility property."""
131
+ return self.max_file_size_mb
132
+
133
+ @property
134
+ def MAX_FILE_SIZE_BYTES(self) -> int:
135
+ """Backward compatibility property."""
136
+ return self.max_file_size_bytes
137
+
138
+ @property
139
+ def MAX_MEMORY_USAGE_MB(self) -> int:
140
+ """Backward compatibility property."""
141
+ return self.max_memory_usage_mb
142
+
143
+ @property
144
+ def MAX_MEMORY_USAGE_BYTES(self) -> int:
145
+ """Backward compatibility property."""
146
+ return self.max_memory_usage_bytes
147
+
148
+ @property
149
+ def MAX_ROWS(self) -> int:
150
+ """Backward compatibility property."""
151
+ return self.max_rows
152
+
153
+ @property
154
+ def MAX_COLUMNS(self) -> int:
155
+ """Backward compatibility property."""
156
+ return self.max_columns
157
+
158
+ @property
159
+ def MAX_FILENAME_LENGTH(self) -> int:
160
+ """Backward compatibility property."""
161
+ return self.max_filename_length
162
+
163
+ @property
164
+ def MAX_COLUMN_NAME_LENGTH(self) -> int:
165
+ """Backward compatibility property."""
166
+ return self.max_column_name_length
167
+
168
+ @property
169
+ def MAX_METADATA_VALUE_LENGTH(self) -> int:
170
+ """Backward compatibility property."""
171
+ return self.max_metadata_value_length
172
+
173
+ @property
174
+ def VALIDATION_CHECK_INTERVAL(self) -> int:
175
+ """Backward compatibility property."""
176
+ return self.validation_check_interval
177
+
178
+ @classmethod
179
+ def from_action_config(cls, action_config: Optional[Dict[str, Any]]) -> 'ExcelSecurityConfig':
180
+ """Create ExcelSecurityConfig from plugin action configuration (config.yaml).
66
181
 
67
- self.MAX_ROWS = int(os.getenv('EXCEL_MAX_ROWS', '10000'))
68
- self.MAX_COLUMNS = int(os.getenv('EXCEL_MAX_COLUMNS', '50'))
69
-
70
- self.MAX_FILENAME_LENGTH = int(os.getenv('EXCEL_MAX_FILENAME_LENGTH', '255'))
71
- self.MAX_COLUMN_NAME_LENGTH = int(os.getenv('EXCEL_MAX_COLUMN_NAME_LENGTH', '100'))
72
- self.MAX_METADATA_VALUE_LENGTH = int(os.getenv('EXCEL_MAX_METADATA_VALUE_LENGTH', '1000'))
182
+ Args:
183
+ action_config: Action configuration dictionary from config.yaml
73
184
 
185
+ Returns:
186
+ New ExcelSecurityConfig instance with config.yaml values
187
+
188
+ Example config.yaml:
189
+ actions:
190
+ upload:
191
+ excel_config:
192
+ max_file_size_mb: 25
193
+ max_rows: 50000
194
+ max_columns: 100
195
+ """
196
+ if not action_config or 'excel_config' not in action_config:
197
+ return cls()
74
198
 
75
- class ExcelMetadataUtils:
76
- """Utility class for Excel metadata processing and validation.
199
+ excel_config = action_config['excel_config']
77
200
 
78
- Provides helper methods for validating and processing Excel metadata
79
- while respecting security constraints defined in ExcelSecurityConfig.
201
+ return cls(
202
+ max_file_size_mb=excel_config.get('max_file_size_mb', 10),
203
+ max_rows=excel_config.get('max_rows', 100000),
204
+ max_columns=excel_config.get('max_columns', 50),
205
+ max_memory_usage_mb=excel_config.get('max_memory_usage_mb', 30),
206
+ max_filename_length=excel_config.get('max_filename_length', 255),
207
+ max_column_name_length=excel_config.get('max_column_name_length', 100),
208
+ max_metadata_value_length=excel_config.get('max_metadata_value_length', 1000),
209
+ validation_check_interval=excel_config.get('validation_check_interval', 1000),
210
+ )
80
211
 
81
- Args:
82
- config (ExcelSecurityConfig): Security configuration instance
83
212
 
84
- Example:
85
- >>> config = ExcelSecurityConfig()
86
- >>> utils = ExcelMetadataUtils(config)
87
- >>> safe_value = utils.validate_and_truncate_string("long text", 10)
88
- >>> is_valid = utils.is_valid_filename_length("file.xlsx")
89
- """
213
+ class ExcelMetadataUtils:
214
+ """Utility class for Excel metadata processing."""
90
215
 
91
216
  def __init__(self, config: ExcelSecurityConfig):
217
+ """Initialize with Excel security configuration."""
92
218
  self.config = config
93
219
 
94
- def validate_and_truncate_string(self, value: str, max_length: int) -> str:
95
- """Validate and truncate string to maximum length.
96
-
97
- Converts non-string values to strings, trims whitespace, and
98
- truncates to the specified maximum length if necessary.
99
-
100
- Args:
101
- value (str): Value to validate and truncate
102
- max_length (int): Maximum allowed length
103
-
104
- Returns:
105
- str: Validated and truncated string
220
+ def is_valid_filename_length(self, filename: str) -> bool:
221
+ """Check if filename length is within limits."""
222
+ return len(filename) <= self.config.max_filename_length
106
223
 
107
- Example:
108
- >>> utils.validate_and_truncate_string(" long text ", 5)
109
- 'long '
110
- """
224
+ def validate_and_truncate_string(self, value: str, max_length: int) -> str:
225
+ """Validate and truncate string to maximum length."""
111
226
  if not isinstance(value, str):
112
227
  value = str(value)
113
228
 
229
+ # Strip whitespace
114
230
  value = value.strip()
115
231
 
232
+ # Truncate if too long
116
233
  if len(value) > max_length:
117
- return value[:max_length]
234
+ value = value[:max_length]
118
235
 
119
236
  return value
120
237
 
121
- def is_valid_filename_length(self, filename: str) -> bool:
122
- """Check if filename length is within security limits.
123
-
124
- Validates that the filename (after trimming whitespace) does not
125
- exceed the maximum filename length configured in security settings.
238
+ def is_valid_column_name(self, column_name: str) -> bool:
239
+ """Check if column name is valid."""
240
+ if not column_name or not isinstance(column_name, str):
241
+ return False
242
+ return len(column_name.strip()) <= self.config.max_column_name_length
126
243
 
127
- Args:
128
- filename (str): Filename to validate
129
-
130
- Returns:
131
- bool: True if filename length is valid, False otherwise
132
-
133
- Example:
134
- >>> utils.is_valid_filename_length("file.xlsx")
135
- True
136
- >>> utils.is_valid_filename_length("x" * 300)
137
- False
138
- """
139
- return len(filename.strip()) <= self.config.MAX_FILENAME_LENGTH
244
+ def is_valid_metadata_value(self, value: str) -> bool:
245
+ """Check if metadata value is valid."""
246
+ if value is None:
247
+ return True
248
+ if not isinstance(value, str):
249
+ value = str(value)
250
+ return len(value) <= self.config.max_metadata_value_length
@@ -3,6 +3,10 @@ actions:
3
3
  entrypoint: plugin.upload.Uploader
4
4
  options:
5
5
  supported_data_type: image # A primary data type of synapse backend collection. (e.g. 'image', 'text', 'video', 'pcd', 'audio')
6
+ excel_config: # Configuration for Excel file uploads
7
+ max_file_size_mb: 10
8
+ max_rows: 100000
9
+ max_columns: 50
6
10
  ui_schema: # UI schema for the input of extra params
7
11
  - $formkit: "radio"
8
12
  name: "file_format"
@@ -0,0 +1,269 @@
1
+ from pathlib import Path
2
+ from typing import Dict, List
3
+
4
+
5
+ class BaseUploader:
6
+ """Base class for upload plugins with common functionality.
7
+
8
+ This class handles common tasks like file organization, validation, and metadata
9
+ that are shared across all upload plugins. Plugin developers should inherit
10
+ from this class and implement the required methods for their specific logic.
11
+
12
+ Core Methods:
13
+ handle_upload_files(): Main upload method - handles the complete upload workflow
14
+ organize_files(): Handle file organization logic (can be overridden)
15
+ validate_files(): Handle file validation logic (can be overridden)
16
+
17
+ Required Methods (should be implemented by subclasses):
18
+ process_files(): Transform/process files during upload
19
+
20
+ Optional Methods (can be overridden by subclasses):
21
+ before_process(): Pre-process files before main processing
22
+ after_process(): Post-process files after main processing
23
+ setup_directories(): Setup custom directories
24
+ validate_file_types(): Custom file type validation
25
+
26
+ Helper Methods:
27
+ _log_validation_warning(): Log validation warnings
28
+ _log_conversion_warning(): Log conversion warnings
29
+ _filter_valid_files(): Filter files based on validation
30
+
31
+ Auto-provided Utilities:
32
+ Logging via self.run.log_message() and other run methods
33
+ File path utilities via self.path
34
+ Specification access via self.file_specification
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ run,
40
+ path: Path,
41
+ file_specification: List = None,
42
+ organized_files: List = None,
43
+ extra_params: Dict = None,
44
+ ):
45
+ """Initialize the base upload class.
46
+
47
+ Args:
48
+ run: Plugin run object with logging capabilities.
49
+ path: Path object pointing to the upload target directory.
50
+ file_specification: List of specifications that define the structure of files to be uploaded.
51
+ organized_files: List of pre-organized files based on the default logic.
52
+ extra_params: Additional parameters for customization.
53
+ """
54
+ self.run = run
55
+ self.path = path
56
+ self.file_specification = file_specification or []
57
+ self.organized_files = organized_files or []
58
+ self.extra_params = extra_params or {}
59
+
60
+ def _log_validation_warning(self, spec_name: str, invalid_extensions: List[str], expected_extensions: List[str]):
61
+ """Log validation warning for invalid file extensions."""
62
+ self.run.log_message(
63
+ f"Validation warning in '{spec_name}': File extensions {invalid_extensions} do not match expected extensions {expected_extensions}. These files will be excluded from upload."
64
+ )
65
+
66
+ def _log_conversion_warning(self, spec_name: str, extension: str, recommended_formats: str):
67
+ """Log conversion warning for file formats that may need conversion."""
68
+ self.run.log_message(
69
+ f"Conversion warning in '{spec_name}': File extension '{extension}' may require conversion to [{recommended_formats}]."
70
+ )
71
+
72
+ def _filter_valid_files(self, files_to_validate: List) -> List:
73
+ """Filter files based on validation criteria.
74
+
75
+ Args:
76
+ files_to_validate: List of organized file dictionaries to validate
77
+
78
+ Returns:
79
+ List: Filtered list containing only valid files
80
+ """
81
+ return files_to_validate # Default: return all files
82
+
83
+ def get_file_extensions_config(self) -> Dict[str, List[str]]:
84
+ """Get allowed file extensions configuration.
85
+
86
+ Returns:
87
+ Dict mapping file categories to allowed extensions
88
+ """
89
+ return {
90
+ 'pcd': ['.pcd'],
91
+ 'text': ['.txt', '.html'],
92
+ 'audio': ['.wav', '.mp3'],
93
+ 'data': ['.bin', '.json', '.fbx'],
94
+ 'image': ['.jpg', '.jpeg', '.png'],
95
+ 'video': ['.mp4'],
96
+ }
97
+
98
+ def get_conversion_warnings_config(self) -> Dict[str, str]:
99
+ """Get file conversion warnings configuration.
100
+
101
+ Returns:
102
+ Dict mapping problematic extensions to recommended formats
103
+ """
104
+ return {
105
+ '.tif': ' .jpg, .png',
106
+ '.tiff': ' .jpg, .png',
107
+ '.avi': ' .mp4',
108
+ '.mov': ' .mp4',
109
+ '.mkv': ' .mp4',
110
+ '.wmv': ' .mp4',
111
+ }
112
+
113
+ # Abstract methods that should be implemented by subclasses
114
+ def process_files(self, organized_files: List) -> List:
115
+ """Process files. Should be implemented by subclasses."""
116
+ return organized_files
117
+
118
+ def before_process(self, organized_files: List) -> List:
119
+ """Pre-process files before main processing. Can be overridden by subclasses."""
120
+ return organized_files
121
+
122
+ def after_process(self, processed_files: List) -> List:
123
+ """Post-process files after main processing. Can be overridden by subclasses."""
124
+ return processed_files
125
+
126
+ def organize_files(self, files: List) -> List:
127
+ """Organize files. Can be overridden by subclasses."""
128
+ return files
129
+
130
+ def validate_files(self, files: List) -> List:
131
+ """Validate files. Can be overridden by subclasses."""
132
+ return self._filter_valid_files(files)
133
+
134
+ def setup_directories(self) -> None:
135
+ """Setup custom directories. Can be overridden by subclasses."""
136
+ pass
137
+
138
+ def validate_file_types(self, organized_files: List) -> List:
139
+ """Validate file types against specifications with comprehensive filtering logic.
140
+
141
+ This method implements the complete validation logic from legacy code,
142
+ filtering out files that don't match their expected specifications.
143
+
144
+ Args:
145
+ organized_files: List of organized file dictionaries
146
+
147
+ Returns:
148
+ List: Filtered list containing only valid files that match specifications
149
+ """
150
+ if not organized_files or not self.file_specification:
151
+ return organized_files
152
+
153
+ valid_files = []
154
+ allowed_extensions = self.get_file_extensions_config()
155
+ conversion_warnings = self.get_conversion_warnings_config()
156
+ warning_extensions = list(conversion_warnings.keys())
157
+ all_violation_case = {}
158
+
159
+ for file_group in organized_files:
160
+ files_dict = file_group.get('files', {})
161
+ invalid_case = {}
162
+ warning_case = {}
163
+
164
+ for spec_name, file_path in files_dict.items():
165
+ # Find the specification for this file type
166
+ file_spec = next((s for s in self.file_specification if s['name'] == spec_name), None)
167
+ if not file_spec:
168
+ continue
169
+
170
+ # Handle file path lists
171
+ if isinstance(file_path, list):
172
+ file_path = file_path[0] if len(file_path) == 1 else file_path
173
+
174
+ # Extract file information
175
+ file_category = spec_name.split('_')[0]
176
+ file_type = file_spec['file_type']
177
+ file_extension = file_path.suffix.lower()
178
+
179
+ # Check if file needs conversion warning (these files will be excluded)
180
+ if file_extension in warning_extensions:
181
+ case = invalid_case.get(spec_name, {})
182
+ case['warning'] = case.get('warning', []) + [file_extension]
183
+ warning_case[spec_name] = case
184
+ break
185
+
186
+ # Validate against file category (e.g., 'image', 'data', etc.)
187
+ if file_category in allowed_extensions.keys():
188
+ if file_extension in allowed_extensions[file_category]:
189
+ continue # Valid file
190
+ else:
191
+ case = invalid_case.get(spec_name, {})
192
+ case['invalid'] = case.get('invalid', []) + [file_extension]
193
+ case['expected'] = allowed_extensions[file_category]
194
+ invalid_case[spec_name] = case
195
+ break
196
+
197
+ # Validate against file type from specification
198
+ if file_type in allowed_extensions.keys():
199
+ if file_extension in allowed_extensions[file_type]:
200
+ continue # Valid file
201
+ else:
202
+ case = invalid_case.get(spec_name, {})
203
+ case['invalid'] = case.get('invalid', []) + [file_extension]
204
+ case['expected'] = allowed_extensions[file_type]
205
+ invalid_case[spec_name] = case
206
+ break
207
+
208
+ # If violations found, exclude this file group
209
+ if invalid_case or warning_case:
210
+ all_violation_case[spec_name] = {
211
+ 'invalid': invalid_case.get(spec_name, {}),
212
+ 'warning': warning_case.get(spec_name, {}),
213
+ }
214
+ continue # Skip this file group
215
+
216
+ # No violations - add to valid files
217
+ valid_files.append(file_group)
218
+
219
+ # Log all violations found during validation
220
+ self._log_all_violations(all_violation_case, conversion_warnings)
221
+
222
+ return valid_files
223
+
224
+ def _log_all_violations(self, all_violation_case: Dict, conversion_warnings: Dict):
225
+ """Log all validation violations found during file validation."""
226
+ for spec_name, violation_info in all_violation_case.items():
227
+ if violation_info['invalid']:
228
+ self.run.log_message(
229
+ f"Validation warning in '{spec_name}': File extensions {violation_info['invalid']['invalid']} do not match expected extensions {violation_info['invalid']['expected']}. These files will be excluded from upload."
230
+ )
231
+ if violation_info['warning']:
232
+ for warning in violation_info['warning']['warning']:
233
+ if warning in conversion_warnings:
234
+ self.run.log_message(
235
+ f"Conversion warning in '{spec_name}': File extension '{warning}' may require conversion to [{conversion_warnings[warning]}]."
236
+ )
237
+
238
+ def handle_upload_files(self) -> List:
239
+ """Main upload method that handles the complete upload workflow.
240
+
241
+ This method provides the core workflow for upload plugins:
242
+ setup_directories -> organize_files -> before_process -> process_files ->
243
+ after_process -> validate_files
244
+
245
+ Returns:
246
+ List: The final processed and validated list of files ready for upload.
247
+ """
248
+ # Setup any required directories
249
+ self.setup_directories()
250
+
251
+ # Start with organized files from the workflow
252
+ current_files = self.organized_files
253
+
254
+ # Apply organization logic
255
+ current_files = self.organize_files(current_files)
256
+
257
+ # Pre-process files
258
+ current_files = self.before_process(current_files)
259
+
260
+ # Main processing step
261
+ current_files = self.process_files(current_files)
262
+
263
+ # Post-process files
264
+ current_files = self.after_process(current_files)
265
+
266
+ # Final validation
267
+ current_files = self.validate_files(current_files)
268
+
269
+ return current_files