synapse-sdk 2025.9.5__py3-none-any.whl → 2025.10.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

Files changed (78) hide show
  1. synapse_sdk/clients/base.py +129 -9
  2. synapse_sdk/devtools/docs/docs/api/clients/base.md +230 -8
  3. synapse_sdk/devtools/docs/docs/api/plugins/models.md +58 -3
  4. synapse_sdk/devtools/docs/docs/plugins/categories/neural-net-plugins/train-action-overview.md +663 -0
  5. synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
  6. synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
  7. synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
  8. synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
  9. synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
  10. synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-overview.md +585 -0
  11. synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
  12. synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +39 -0
  13. synapse_sdk/devtools/docs/docs/plugins/plugins.md +12 -5
  14. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/base.md +230 -8
  15. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/plugins/models.md +114 -0
  16. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/neural-net-plugins/train-action-overview.md +621 -0
  17. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
  18. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
  19. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
  20. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
  21. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
  22. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-overview.md +585 -0
  23. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
  24. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +39 -0
  25. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current.json +16 -4
  26. synapse_sdk/devtools/docs/sidebars.ts +45 -1
  27. synapse_sdk/plugins/README.md +487 -80
  28. synapse_sdk/plugins/categories/base.py +1 -0
  29. synapse_sdk/plugins/categories/export/actions/export/action.py +8 -3
  30. synapse_sdk/plugins/categories/export/actions/export/utils.py +108 -8
  31. synapse_sdk/plugins/categories/export/templates/config.yaml +18 -0
  32. synapse_sdk/plugins/categories/export/templates/plugin/export.py +97 -0
  33. synapse_sdk/plugins/categories/neural_net/actions/train.py +592 -22
  34. synapse_sdk/plugins/categories/neural_net/actions/tune.py +150 -3
  35. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  36. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  37. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
  38. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  39. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +145 -0
  40. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  41. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  42. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  43. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +97 -0
  44. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +250 -0
  45. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  46. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  47. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +284 -0
  48. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  49. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  50. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +87 -0
  51. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +127 -0
  52. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  53. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +2 -1
  54. synapse_sdk/plugins/categories/upload/actions/upload/action.py +8 -1
  55. synapse_sdk/plugins/categories/upload/actions/upload/context.py +0 -1
  56. synapse_sdk/plugins/categories/upload/actions/upload/models.py +134 -94
  57. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +2 -2
  58. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +6 -2
  59. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +24 -9
  60. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +130 -18
  61. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +147 -37
  62. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +10 -5
  63. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +31 -6
  64. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +65 -37
  65. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +17 -2
  66. synapse_sdk/plugins/categories/upload/templates/README.md +394 -0
  67. synapse_sdk/plugins/models.py +62 -0
  68. synapse_sdk/utils/file/download.py +261 -0
  69. {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/METADATA +15 -2
  70. {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/RECORD +74 -43
  71. synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +0 -1463
  72. synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +0 -1964
  73. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +0 -1463
  74. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +0 -2077
  75. {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/WHEEL +0 -0
  76. {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/entry_points.txt +0 -0
  77. {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/licenses/LICENSE +0 -0
  78. {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,14 @@ class RecursiveFileDiscoveryStrategy(FileDiscoveryStrategy):
10
10
 
11
11
  def discover(self, path: Path, recursive: bool) -> List[Path]:
12
12
  """Discover files recursively in the given path."""
13
- return [file_path for file_path in path.rglob('*') if file_path.is_file()]
13
+ # Exclude system directories
14
+ excluded_dirs = {'@eaDir', '.@__thumb', '@Recycle', '#recycle', '.DS_Store', 'Thumbs.db', '.synology'}
15
+
16
+ def exclude_dirs(file_path: Path) -> bool:
17
+ """Check if file path contains excluded directories."""
18
+ return any(excluded_dir in file_path.parts for excluded_dir in excluded_dirs)
19
+
20
+ return [file_path for file_path in path.rglob('*') if file_path.is_file() and not exclude_dirs(file_path)]
14
21
 
15
22
  def organize(self, files: List[Path], specs: Dict, metadata: Dict, type_dirs: Dict = None) -> List[Dict]:
16
23
  """Organize files according to specifications with metadata."""
@@ -42,57 +49,78 @@ class RecursiveFileDiscoveryStrategy(FileDiscoveryStrategy):
42
49
  # Performance optimization 2: Build metadata index for faster lookups
43
50
  metadata_index = self._build_metadata_index(metadata)
44
51
 
45
- # Group files by dataset
52
+ # Group files by dataset_key (stem-based matching)
53
+ # Strategy:
54
+ # 1. Group all files (required + optional) by their file stem
55
+ # 2. Only create data units for groups that have ALL required files
56
+ # 3. Optional files are automatically included if they match the stem
46
57
  dataset_files = {}
47
58
  required_specs = [spec['name'] for spec in specs if spec.get('is_required', False)]
48
59
 
49
60
  for file_path in files:
50
61
  # Determine which type directory this file belongs to
51
- file_path_str = str(file_path)
62
+ matched = False
52
63
  for spec_name, dir_path in type_dirs.items():
53
- dir_path_str = path_cache[dir_path]
54
- if file_path_str.startswith(dir_path_str):
55
- # Create unique dataset key using relative path from spec directory
64
+ # Check if file is under this spec's directory
65
+ # Use try/except for relative_to to ensure proper path matching
66
+ try:
56
67
  relative_path = file_path.relative_to(dir_path)
57
- # Use parent directory + stem as unique key to group related files
58
- if relative_path.parent != Path('.'):
59
- dataset_key = f'{relative_path.parent}_{file_path.stem}'
60
- else:
61
- dataset_key = file_path.stem
62
-
63
- if dataset_key not in dataset_files:
64
- dataset_files[dataset_key] = {}
65
-
66
- if spec_name not in dataset_files[dataset_key]:
67
- dataset_files[dataset_key][spec_name] = file_path
68
- else:
69
- # Keep the most recent file - only stat when needed
70
- existing_file = dataset_files[dataset_key][spec_name]
71
- try:
72
- if file_path.stat().st_mtime > existing_file.stat().st_mtime:
73
- dataset_files[dataset_key][spec_name] = file_path
74
- except (OSError, IOError):
75
- # If stat fails, keep existing file
76
- pass
77
-
78
- # Create organized files for datasets with all required files
79
- for dataset_key, files_dict in sorted(dataset_files.items()):
80
- if all(req in files_dict for req in required_specs):
81
- # Extract original file stem from dataset_key
82
- # If dataset_key contains path info (parent_stem), extract just the stem part
83
- if '_' in dataset_key and len(dataset_key.split('_')) >= 2:
84
- # Extract the last part after the last underscore as the original stem
85
- original_stem = dataset_key.split('_')[-1]
68
+ matched = True
69
+ except ValueError:
70
+ # File is not under this directory
71
+ continue
72
+
73
+ # Create unique dataset key using relative path from spec directory
74
+ # Use parent directory + stem as unique key to group related files
75
+ if relative_path.parent != Path('.'):
76
+ dataset_key = f'{relative_path.parent}_{file_path.stem}'
77
+ else:
78
+ dataset_key = file_path.stem
79
+
80
+ if dataset_key not in dataset_files:
81
+ dataset_files[dataset_key] = {}
82
+
83
+ if spec_name not in dataset_files[dataset_key]:
84
+ dataset_files[dataset_key][spec_name] = file_path
86
85
  else:
87
- original_stem = dataset_key
86
+ # Keep the most recent file - only stat when needed
87
+ existing_file = dataset_files[dataset_key][spec_name]
88
+ try:
89
+ if file_path.stat().st_mtime > existing_file.stat().st_mtime:
90
+ dataset_files[dataset_key][spec_name] = file_path
91
+ except (OSError, IOError):
92
+ # If stat fails, keep existing file
93
+ pass
94
+
95
+ # Found matching directory, move to next file
96
+ break
97
+
98
+ # Create organized files ONLY for datasets with ALL required files
99
+ # Optional files are included automatically if they match the stem
100
+ for dataset_key, files_dict in sorted(dataset_files.items()):
101
+ # Check if all required files are present
102
+ has_all_required = all(req in files_dict for req in required_specs)
88
103
 
89
- # Calculate most common file extension (optimized)
104
+ if has_all_required:
105
+ # Extract original file stem from actual file paths (more reliable than parsing dataset_key)
106
+ # Collect stems from all files in the group
107
+ file_stems = {}
90
108
  file_extensions = {}
109
+
91
110
  for file_path in files_dict.values():
111
+ stem = file_path.stem
92
112
  ext = file_path.suffix.lower()
113
+
114
+ # Count stems (to handle multiple files with slightly different names)
115
+ if stem:
116
+ file_stems[stem] = file_stems.get(stem, 0) + 1
117
+
118
+ # Count extensions
93
119
  if ext:
94
120
  file_extensions[ext] = file_extensions.get(ext, 0) + 1
95
121
 
122
+ # Use the most common stem (usually they're all the same)
123
+ original_stem = max(file_stems, key=file_stems.get) if file_stems else dataset_key
96
124
  origin_file_extension = max(file_extensions, key=file_extensions.get) if file_extensions else ''
97
125
 
98
126
  meta_data = {
@@ -12,12 +12,24 @@ class DefaultValidationStrategy(ValidationStrategy):
12
12
  """Validate action parameters."""
13
13
  errors = []
14
14
 
15
- # Check required parameters
16
- required_params = ['storage', 'data_collection', 'path', 'name']
15
+ # Check required parameters (common to all modes)
16
+ required_params = ['storage', 'data_collection', 'name']
17
17
  for param in required_params:
18
18
  if param not in params:
19
19
  errors.append(f'Missing required parameter: {param}')
20
20
 
21
+ # Check mode-specific requirements
22
+ use_single_path = params.get('use_single_path', True)
23
+
24
+ if use_single_path:
25
+ # Single-path mode: 'path' is required
26
+ if 'path' not in params:
27
+ errors.append("Missing required parameter 'path' in single-path mode")
28
+ else:
29
+ # Multi-path mode: 'assets' is required
30
+ if 'assets' not in params:
31
+ errors.append("Missing required parameter 'assets' in multi-path mode")
32
+
21
33
  # Check parameter types
22
34
  if 'storage' in params and not isinstance(params['storage'], int):
23
35
  errors.append("Parameter 'storage' must be an integer")
@@ -28,6 +40,9 @@ class DefaultValidationStrategy(ValidationStrategy):
28
40
  if 'is_recursive' in params and not isinstance(params['is_recursive'], bool):
29
41
  errors.append("Parameter 'is_recursive' must be a boolean")
30
42
 
43
+ if 'use_single_path' in params and not isinstance(params['use_single_path'], bool):
44
+ errors.append("Parameter 'use_single_path' must be a boolean")
45
+
31
46
  return ValidationResult(valid=len(errors) == 0, errors=errors)
32
47
 
33
48
  def validate_files(self, files: List[Dict], specs: Dict) -> ValidationResult:
@@ -0,0 +1,394 @@
1
+ # Upload Plugin
2
+
3
+ The Upload Plugin provides comprehensive file and data upload functionality with support for various storage backends, flexible asset path configuration, and Excel metadata integration.
4
+
5
+ ## Quick Start Usage
6
+
7
+ ### CLI Usage Examples
8
+
9
+ #### Standard Upload (Single Directory)
10
+
11
+ ```bash
12
+ synapse plugin run upload '{
13
+ "name": "Dataset Upload",
14
+ "storage": 1,
15
+ "collection": 2,
16
+ "use_single_path": false,
17
+ "assets": {
18
+ "path": "/data/dataset",
19
+ "recursive": true
20
+ }
21
+ }'
22
+ ```
23
+
24
+ #### Multi-Path Upload (Different Locations)
25
+
26
+ ```bash
27
+ synapse plugin run upload '{
28
+ "name": "Complex Dataset Upload",
29
+ "storage": 1,
30
+ "collection": 2,
31
+ "use_single_path": true,
32
+ "assets": {
33
+ "images": {"path": "/images", "recursive": true},
34
+ "pointclouds": {"path": "/pcd", "recursive": false},
35
+ "annotations": {"path": "/labels", "recursive": true}
36
+ },
37
+ "excel_metadata_path": "/metadata/dataset_info.xlsx"
38
+ }' --debug
39
+ ```
40
+
41
+ ### Common Use Cases
42
+
43
+ #### 1. Simple Dataset Upload
44
+
45
+ ```json
46
+ {
47
+ "name": "Training Dataset",
48
+ "storage": 1,
49
+ "collection": 2,
50
+ "assets": {
51
+ "path": "/datasets/training",
52
+ "recursive": true
53
+ }
54
+ }
55
+ ```
56
+
57
+ #### 2. Multi-Source Dataset Upload
58
+
59
+ ```json
60
+ {
61
+ "name": "Multi-Camera Dataset",
62
+ "storage": 1,
63
+ "collection": 2,
64
+ "use_single_path": true,
65
+ "assets": {
66
+ "front_camera": { "path": "/cameras/front", "recursive": true },
67
+ "rear_camera": { "path": "/cameras/rear", "recursive": true },
68
+ "lidar": { "path": "/sensors/lidar", "recursive": false }
69
+ }
70
+ }
71
+ ```
72
+
73
+ #### 3. Dataset with Metadata
74
+
75
+ ```json
76
+ {
77
+ "name": "Annotated Dataset",
78
+ "storage": 1,
79
+ "collection": 2,
80
+ "assets": {
81
+ "path": "/data/annotated",
82
+ "recursive": true
83
+ },
84
+ "excel_metadata_path": "/data/metadata.xlsx"
85
+ }
86
+ ```
87
+
88
+ ## Configuration Parameters
89
+
90
+ ### Required Parameters
91
+
92
+ | Parameter | Type | Description | Example |
93
+ | ------------ | ------- | ----------------------------------- | ------------------ |
94
+ | `name` | string | Display name for the upload | `"My Dataset"` |
95
+ | `storage` | integer | Storage backend ID | `1` |
96
+ | `collection` | integer | Collection ID defining file specs | `2` |
97
+ | `assets` | object | Path configuration (varies by mode) | See examples below |
98
+
99
+ ### Optional Parameters
100
+
101
+ | Parameter | Type | Default | Description |
102
+ | --------------------- | -------- | ------- | -------------------------------------------------------------------------------- |
103
+ | `description` | string | `null` | Upload description |
104
+ | `project` | integer | `null` | Project ID to associate |
105
+ | `use_single_path` | boolean | `false` | Enable individual path mode |
106
+ | `is_recursive` | boolean | `false` | Global recursive setting |
107
+ | `excel_metadata_path` | `string` | `null` | **DEPRECATED** - File path to Excel metadata file (use `excel_metadata` instead) |
108
+ | `excel_metadata` | `object` | `null` | Base64 encoded Excel metadata (recommended) |
109
+
110
+ ## Excel Metadata Support
111
+
112
+ The upload plugin provides advanced Excel metadata processing with flexible header support, comprehensive filename matching, and two distinct input methods.
113
+
114
+ ### Input Methods
115
+
116
+ There are two separate parameters for providing Excel metadata:
117
+
118
+ #### 1. File Path Method (`excel_metadata_path`) - **DEPRECATED**
119
+
120
+ :::warning Deprecation Notice
121
+ This parameter is **deprecated** and will be removed in a future version.
122
+ Please migrate to using the `excel_metadata` parameter with base64 encoding instead.
123
+ :::
124
+
125
+ **Use case:** Traditional file-based uploads where the Excel file exists on the server's file system.
126
+
127
+ Simple string path to an Excel file:
128
+
129
+ ```json
130
+ {
131
+ "excel_metadata_path": "/data/metadata.xlsx"
132
+ }
133
+ ```
134
+
135
+ **Advantages:**
136
+
137
+ - Backward compatible with existing implementations
138
+ - Simple and straightforward
139
+ - Direct file system access
140
+
141
+ #### 2. Base64 Encoded Method (`excel_metadata`)
142
+
143
+ **Use case:** Web frontends, APIs, and cloud integrations where files are transmitted as encoded data.
144
+
145
+ Send Excel file as base64-encoded data with original filename:
146
+
147
+ ```json
148
+ {
149
+ "excel_metadata": {
150
+ "data": "UEsDBBQABgAIAAAAIQDd4Z...",
151
+ "filename": "metadata.xlsx"
152
+ }
153
+ }
154
+ ```
155
+
156
+ **Advantages:**
157
+
158
+ - No intermediate file storage required
159
+ - Perfect for web upload forms
160
+ - API-friendly JSON payload
161
+ - Automatic temporary file cleanup
162
+ - **This is the recommended method going forward**
163
+
164
+ **Important:** You cannot use both `excel_metadata_path` and `excel_metadata` at the same time
165
+
166
+ **Migration Example:**
167
+
168
+ ```python
169
+ import base64
170
+
171
+ # Old way (deprecated)
172
+ params = {
173
+ "excel_metadata_path": "/data/metadata.xlsx"
174
+ }
175
+
176
+ # New way (recommended)
177
+ with open("/data/metadata.xlsx", "rb") as f:
178
+ encoded = base64.b64encode(f.read()).decode("utf-8")
179
+ params = {
180
+ "excel_metadata": {
181
+ "data": encoded,
182
+ "filename": "metadata.xlsx"
183
+ }
184
+ }
185
+ ```
186
+
187
+ ### Excel Format Example
188
+
189
+ | filename | category | quality | notes |
190
+ | --------- | ---------- | ------- | ----------------- |
191
+ | sample001 | vehicle | high | Clear visibility |
192
+ | sample002 | pedestrian | medium | Partial occlusion |
193
+
194
+ ### Security Limits
195
+
196
+ - Max file size: 10MB
197
+ - Max rows: 10,000
198
+ - Max columns: 50
199
+
200
+ ## File Matching Logic
201
+
202
+ Files are matched by **stem name** (filename without extension):
203
+
204
+ - `sample001.jpg` → stem: "sample001"
205
+ - `sample001.pcd` → stem: "sample001"
206
+ - `sample001.json` → stem: "sample001"
207
+
208
+ These files form a single dataset named "sample001".
209
+
210
+ ## Troubleshooting Guide
211
+
212
+ ### Common Issues
213
+
214
+ #### "No Files Found" Error
215
+
216
+ ```bash
217
+ # Check path exists and is readable
218
+ ls -la /path/to/data
219
+ test -r /path/to/data && echo "Readable" || echo "Not readable"
220
+
221
+ # Verify files exist
222
+ find /path/to/data -name "*.jpg" | head -10
223
+ ```
224
+
225
+ #### Excel Processing Errors
226
+
227
+ ```bash
228
+ # Check file format and size
229
+ file /path/to/metadata.xlsx
230
+ ls -lh /path/to/metadata.xlsx
231
+
232
+ # Validate Excel content
233
+ python -c "
234
+ from openpyxl import load_workbook
235
+ wb = load_workbook('/path/to/metadata.xlsx')
236
+ print(f'Sheets: {wb.sheetnames}')
237
+ print(f'Rows: {wb.active.max_row}')
238
+ "
239
+ ```
240
+
241
+ #### Upload Failures
242
+
243
+ ```bash
244
+ # Test storage connection
245
+ synapse storage test --storage-id 1
246
+
247
+ # Verify collection configuration
248
+ synapse collection show --id 2
249
+
250
+ # Run with debug mode
251
+ synapse plugin run upload '{}' --debug
252
+ ```
253
+
254
+ ## Best Practices
255
+
256
+ ### Directory Organization
257
+
258
+ - Use clear, descriptive directory names
259
+ - Keep reasonable directory sizes (< 10,000 files)
260
+ - Use absolute paths for reliability
261
+
262
+ ### Performance Optimization
263
+
264
+ - Enable recursive only when needed
265
+ - Keep Excel files under 5MB
266
+ - Organize files in balanced directory structures
267
+
268
+ ### Security Considerations
269
+
270
+ - Validate all paths before processing
271
+ - Use read-only permissions for source data
272
+ - Set appropriate Excel size limits
273
+
274
+ ## Advanced Features
275
+
276
+ ### Batch Processing
277
+
278
+ The plugin automatically optimizes batch sizes based on dataset size:
279
+
280
+ - Small datasets (< 50 files): batch size 50
281
+ - Large datasets: dynamic batch size (10-100)
282
+
283
+ ### Progress Tracking
284
+
285
+ Real-time progress updates with categories:
286
+
287
+ - Collection analysis: 2%
288
+ - File upload: 38%
289
+ - Data unit generation: 60%
290
+
291
+ ### Error Handling
292
+
293
+ Comprehensive validation at multiple levels:
294
+
295
+ - Parameter validation (Pydantic)
296
+ - Runtime path validation
297
+ - File format validation
298
+ - Excel security checks
299
+
300
+ ## Environment Variables
301
+
302
+ Configure Excel processing limits:
303
+
304
+ ```bash
305
+ # File size limits
306
+ EXCEL_MAX_FILE_SIZE_MB=10
307
+ EXCEL_MAX_MEMORY_MB=30
308
+
309
+ # Content limits
310
+ EXCEL_MAX_ROWS=10000
311
+ EXCEL_MAX_COLUMNS=50
312
+
313
+ # String length limits
314
+ EXCEL_MAX_FILENAME_LENGTH=255
315
+ EXCEL_MAX_METADATA_VALUE_LENGTH=1000
316
+ ```
317
+
318
+ ## Migration Guide
319
+
320
+ ### Upgrading from Previous Versions
321
+
322
+ All existing configurations continue to work. New features are additive:
323
+
324
+ #### Test Current Configuration
325
+
326
+ ```bash
327
+ synapse plugin run upload '{}' --debug
328
+ ```
329
+
330
+ #### Convert to Explicit Mode
331
+
332
+ ```python
333
+ # Add explicit mode setting
334
+ config["use_single_path"] = False # or True for single path mode
335
+ ```
336
+
337
+ #### Gradual Migration to Single Path Mode
338
+
339
+ ```python
340
+ # Start with subset
341
+ test_config = {
342
+ "use_single_path": True,
343
+ "assets": {
344
+ "test_images": {"path": "/existing/path/images", "recursive": True}
345
+ }
346
+ }
347
+
348
+ # Then migrate all assets
349
+ production_config = {
350
+ "use_single_path": True,
351
+ "assets": {
352
+ "images": {"path": "/optimized/path1", "recursive": True},
353
+ "annotations": {"path": "/optimized/path2", "recursive": False}
354
+ }
355
+ }
356
+ ```
357
+
358
+ ## Storage Backend Support
359
+
360
+ The plugin supports multiple storage backends:
361
+
362
+ - **Local filesystem**: Optimized for high I/O
363
+ - **S3/GCS**: Cloud storage with retry logic
364
+ - **SFTP**: Connection pooling for remote servers
365
+ - **HTTP**: Streaming uploads for large files
366
+
367
+ ## API Reference
368
+
369
+ ### Plugin Class
370
+
371
+ ```python
372
+ from synapse import Plugin
373
+
374
+ plugin = Plugin("upload")
375
+ result = plugin.run(config, debug=True)
376
+ ```
377
+
378
+ ### Result Structure
379
+
380
+ ```python
381
+ {
382
+ "status": "success",
383
+ "uploaded_files": 150,
384
+ "data_units_created": 50,
385
+ "errors": [],
386
+ "metadata": {}
387
+ }
388
+ ```
389
+
390
+ ## Support and Resources
391
+
392
+ - **Documentation**: Full API documentation at [synapse-docs]
393
+ - **Issues**: Report bugs at [issue-tracker]
394
+ - **Examples**: More examples at [examples-repo]
@@ -1,7 +1,10 @@
1
1
  import os
2
+ from datetime import datetime
2
3
  from functools import cached_property
3
4
  from typing import Any, Dict
4
5
 
6
+ from pydantic import BaseModel
7
+
5
8
  from synapse_sdk.clients.backend import BackendClient
6
9
  from synapse_sdk.devtools.config import get_backend_config
7
10
  from synapse_sdk.loggers import BackendLogger, ConsoleLogger
@@ -131,6 +134,26 @@ class Run:
131
134
  context = None
132
135
  client = None
133
136
 
137
+ class DevLog(BaseModel):
138
+ """Model for developer log entries.
139
+
140
+ Records custom events and information that plugin developers want to track
141
+ during plugin execution for debugging and monitoring purposes.
142
+
143
+ Attributes:
144
+ event_type (str): Type/category of the development event
145
+ message (str): Descriptive message about the event
146
+ data (dict | None): Optional additional data/context
147
+ level (Context): Event status/severity level
148
+ created (str): Timestamp when event occurred
149
+ """
150
+
151
+ event_type: str
152
+ message: str
153
+ data: dict | None = None
154
+ level: Context
155
+ created: str
156
+
134
157
  def __init__(self, job_id, context=None):
135
158
  self.job_id = job_id
136
159
  self.context = context or {}
@@ -177,5 +200,44 @@ class Run:
177
200
  def log_message(self, message, context=Context.INFO.value):
178
201
  self.logger.log('message', {'context': context, 'content': message})
179
202
 
203
+ def log_dev_event(self, message: str, data: dict | None = None, level: Context = Context.INFO):
204
+ """Log development event for plugin developers.
205
+
206
+ This function allows plugin developers to log custom events and information
207
+ during plugin execution for debugging, monitoring, and development purposes.
208
+ The event_type is automatically constructed as '{action_name}_dev_log' and cannot
209
+ be modified by plugin developers.
210
+
211
+ Args:
212
+ message (str): Descriptive message about the event
213
+ data (dict | None): Optional additional data or context to include
214
+ level (Context): Event severity level (INFO, WARNING, DANGER, SUCCESS)
215
+
216
+ Example:
217
+ >>> run = Run(job_id, context)
218
+ >>> run.log_dev_event('Data validation completed', {'records_count': 100})
219
+ >>> run.log_dev_event('Processing time recorded', {'duration_ms': 1500})
220
+ >>> run.log_dev_event('Variable state at checkpoint', {'variable_x': 42}, level=Context.WARNING)
221
+ """
222
+ # Construct event_type from action name - this cannot be modified by developers
223
+ action_name = self.context.get('action_name', 'unknown')
224
+ event_type = f'{action_name}_dev_log'
225
+
226
+ # Log the message for basic logging
227
+ self.log_message(f'[{event_type.upper()}] {message}', context=level.value)
228
+
229
+ # Also log the structured event for development tracking
230
+ now = datetime.now().isoformat()
231
+ self.log(
232
+ 'dev_event',
233
+ self.DevLog(
234
+ event_type=event_type,
235
+ message=message,
236
+ data=data,
237
+ level=level,
238
+ created=now,
239
+ ).model_dump(),
240
+ )
241
+
180
242
  def end_log(self):
181
243
  self.log_message('Plugin run is complete.')