synapse-sdk 2025.9.5__py3-none-any.whl → 2025.10.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/clients/base.py +129 -9
- synapse_sdk/devtools/docs/docs/api/clients/base.md +230 -8
- synapse_sdk/devtools/docs/docs/api/plugins/models.md +58 -3
- synapse_sdk/devtools/docs/docs/plugins/categories/neural-net-plugins/train-action-overview.md +663 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-overview.md +585 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
- synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +39 -0
- synapse_sdk/devtools/docs/docs/plugins/plugins.md +12 -5
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/base.md +230 -8
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/plugins/models.md +114 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/neural-net-plugins/train-action-overview.md +621 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-action.md +934 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-overview.md +585 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-template.md +715 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +39 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current.json +16 -4
- synapse_sdk/devtools/docs/sidebars.ts +45 -1
- synapse_sdk/plugins/README.md +487 -80
- synapse_sdk/plugins/categories/base.py +1 -0
- synapse_sdk/plugins/categories/export/actions/export/action.py +8 -3
- synapse_sdk/plugins/categories/export/actions/export/utils.py +108 -8
- synapse_sdk/plugins/categories/export/templates/config.yaml +18 -0
- synapse_sdk/plugins/categories/export/templates/plugin/export.py +97 -0
- synapse_sdk/plugins/categories/neural_net/actions/train.py +592 -22
- synapse_sdk/plugins/categories/neural_net/actions/tune.py +150 -3
- synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +145 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +97 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +250 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +284 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +87 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +127 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
- synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +2 -1
- synapse_sdk/plugins/categories/upload/actions/upload/action.py +8 -1
- synapse_sdk/plugins/categories/upload/actions/upload/context.py +0 -1
- synapse_sdk/plugins/categories/upload/actions/upload/models.py +134 -94
- synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +2 -2
- synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +6 -2
- synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +24 -9
- synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +130 -18
- synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +147 -37
- synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +10 -5
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +31 -6
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +65 -37
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +17 -2
- synapse_sdk/plugins/categories/upload/templates/README.md +394 -0
- synapse_sdk/plugins/models.py +62 -0
- synapse_sdk/utils/file/download.py +261 -0
- {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/METADATA +15 -2
- {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/RECORD +74 -43
- synapse_sdk/devtools/docs/docs/plugins/developing-upload-template.md +0 -1463
- synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +0 -1964
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/developing-upload-template.md +0 -1463
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +0 -2077
- {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/WHEEL +0 -0
- {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-2025.9.5.dist-info → synapse_sdk-2025.10.6.dist-info}/top_level.txt +0 -0
|
@@ -10,7 +10,14 @@ class RecursiveFileDiscoveryStrategy(FileDiscoveryStrategy):
|
|
|
10
10
|
|
|
11
11
|
def discover(self, path: Path, recursive: bool) -> List[Path]:
|
|
12
12
|
"""Discover files recursively in the given path."""
|
|
13
|
-
|
|
13
|
+
# Exclude system directories
|
|
14
|
+
excluded_dirs = {'@eaDir', '.@__thumb', '@Recycle', '#recycle', '.DS_Store', 'Thumbs.db', '.synology'}
|
|
15
|
+
|
|
16
|
+
def exclude_dirs(file_path: Path) -> bool:
|
|
17
|
+
"""Check if file path contains excluded directories."""
|
|
18
|
+
return any(excluded_dir in file_path.parts for excluded_dir in excluded_dirs)
|
|
19
|
+
|
|
20
|
+
return [file_path for file_path in path.rglob('*') if file_path.is_file() and not exclude_dirs(file_path)]
|
|
14
21
|
|
|
15
22
|
def organize(self, files: List[Path], specs: Dict, metadata: Dict, type_dirs: Dict = None) -> List[Dict]:
|
|
16
23
|
"""Organize files according to specifications with metadata."""
|
|
@@ -42,57 +49,78 @@ class RecursiveFileDiscoveryStrategy(FileDiscoveryStrategy):
|
|
|
42
49
|
# Performance optimization 2: Build metadata index for faster lookups
|
|
43
50
|
metadata_index = self._build_metadata_index(metadata)
|
|
44
51
|
|
|
45
|
-
# Group files by
|
|
52
|
+
# Group files by dataset_key (stem-based matching)
|
|
53
|
+
# Strategy:
|
|
54
|
+
# 1. Group all files (required + optional) by their file stem
|
|
55
|
+
# 2. Only create data units for groups that have ALL required files
|
|
56
|
+
# 3. Optional files are automatically included if they match the stem
|
|
46
57
|
dataset_files = {}
|
|
47
58
|
required_specs = [spec['name'] for spec in specs if spec.get('is_required', False)]
|
|
48
59
|
|
|
49
60
|
for file_path in files:
|
|
50
61
|
# Determine which type directory this file belongs to
|
|
51
|
-
|
|
62
|
+
matched = False
|
|
52
63
|
for spec_name, dir_path in type_dirs.items():
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
64
|
+
# Check if file is under this spec's directory
|
|
65
|
+
# Use try/except for relative_to to ensure proper path matching
|
|
66
|
+
try:
|
|
56
67
|
relative_path = file_path.relative_to(dir_path)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
except (OSError, IOError):
|
|
75
|
-
# If stat fails, keep existing file
|
|
76
|
-
pass
|
|
77
|
-
|
|
78
|
-
# Create organized files for datasets with all required files
|
|
79
|
-
for dataset_key, files_dict in sorted(dataset_files.items()):
|
|
80
|
-
if all(req in files_dict for req in required_specs):
|
|
81
|
-
# Extract original file stem from dataset_key
|
|
82
|
-
# If dataset_key contains path info (parent_stem), extract just the stem part
|
|
83
|
-
if '_' in dataset_key and len(dataset_key.split('_')) >= 2:
|
|
84
|
-
# Extract the last part after the last underscore as the original stem
|
|
85
|
-
original_stem = dataset_key.split('_')[-1]
|
|
68
|
+
matched = True
|
|
69
|
+
except ValueError:
|
|
70
|
+
# File is not under this directory
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
# Create unique dataset key using relative path from spec directory
|
|
74
|
+
# Use parent directory + stem as unique key to group related files
|
|
75
|
+
if relative_path.parent != Path('.'):
|
|
76
|
+
dataset_key = f'{relative_path.parent}_{file_path.stem}'
|
|
77
|
+
else:
|
|
78
|
+
dataset_key = file_path.stem
|
|
79
|
+
|
|
80
|
+
if dataset_key not in dataset_files:
|
|
81
|
+
dataset_files[dataset_key] = {}
|
|
82
|
+
|
|
83
|
+
if spec_name not in dataset_files[dataset_key]:
|
|
84
|
+
dataset_files[dataset_key][spec_name] = file_path
|
|
86
85
|
else:
|
|
87
|
-
|
|
86
|
+
# Keep the most recent file - only stat when needed
|
|
87
|
+
existing_file = dataset_files[dataset_key][spec_name]
|
|
88
|
+
try:
|
|
89
|
+
if file_path.stat().st_mtime > existing_file.stat().st_mtime:
|
|
90
|
+
dataset_files[dataset_key][spec_name] = file_path
|
|
91
|
+
except (OSError, IOError):
|
|
92
|
+
# If stat fails, keep existing file
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
# Found matching directory, move to next file
|
|
96
|
+
break
|
|
97
|
+
|
|
98
|
+
# Create organized files ONLY for datasets with ALL required files
|
|
99
|
+
# Optional files are included automatically if they match the stem
|
|
100
|
+
for dataset_key, files_dict in sorted(dataset_files.items()):
|
|
101
|
+
# Check if all required files are present
|
|
102
|
+
has_all_required = all(req in files_dict for req in required_specs)
|
|
88
103
|
|
|
89
|
-
|
|
104
|
+
if has_all_required:
|
|
105
|
+
# Extract original file stem from actual file paths (more reliable than parsing dataset_key)
|
|
106
|
+
# Collect stems from all files in the group
|
|
107
|
+
file_stems = {}
|
|
90
108
|
file_extensions = {}
|
|
109
|
+
|
|
91
110
|
for file_path in files_dict.values():
|
|
111
|
+
stem = file_path.stem
|
|
92
112
|
ext = file_path.suffix.lower()
|
|
113
|
+
|
|
114
|
+
# Count stems (to handle multiple files with slightly different names)
|
|
115
|
+
if stem:
|
|
116
|
+
file_stems[stem] = file_stems.get(stem, 0) + 1
|
|
117
|
+
|
|
118
|
+
# Count extensions
|
|
93
119
|
if ext:
|
|
94
120
|
file_extensions[ext] = file_extensions.get(ext, 0) + 1
|
|
95
121
|
|
|
122
|
+
# Use the most common stem (usually they're all the same)
|
|
123
|
+
original_stem = max(file_stems, key=file_stems.get) if file_stems else dataset_key
|
|
96
124
|
origin_file_extension = max(file_extensions, key=file_extensions.get) if file_extensions else ''
|
|
97
125
|
|
|
98
126
|
meta_data = {
|
|
@@ -12,12 +12,24 @@ class DefaultValidationStrategy(ValidationStrategy):
|
|
|
12
12
|
"""Validate action parameters."""
|
|
13
13
|
errors = []
|
|
14
14
|
|
|
15
|
-
# Check required parameters
|
|
16
|
-
required_params = ['storage', 'data_collection', '
|
|
15
|
+
# Check required parameters (common to all modes)
|
|
16
|
+
required_params = ['storage', 'data_collection', 'name']
|
|
17
17
|
for param in required_params:
|
|
18
18
|
if param not in params:
|
|
19
19
|
errors.append(f'Missing required parameter: {param}')
|
|
20
20
|
|
|
21
|
+
# Check mode-specific requirements
|
|
22
|
+
use_single_path = params.get('use_single_path', True)
|
|
23
|
+
|
|
24
|
+
if use_single_path:
|
|
25
|
+
# Single-path mode: 'path' is required
|
|
26
|
+
if 'path' not in params:
|
|
27
|
+
errors.append("Missing required parameter 'path' in single-path mode")
|
|
28
|
+
else:
|
|
29
|
+
# Multi-path mode: 'assets' is required
|
|
30
|
+
if 'assets' not in params:
|
|
31
|
+
errors.append("Missing required parameter 'assets' in multi-path mode")
|
|
32
|
+
|
|
21
33
|
# Check parameter types
|
|
22
34
|
if 'storage' in params and not isinstance(params['storage'], int):
|
|
23
35
|
errors.append("Parameter 'storage' must be an integer")
|
|
@@ -28,6 +40,9 @@ class DefaultValidationStrategy(ValidationStrategy):
|
|
|
28
40
|
if 'is_recursive' in params and not isinstance(params['is_recursive'], bool):
|
|
29
41
|
errors.append("Parameter 'is_recursive' must be a boolean")
|
|
30
42
|
|
|
43
|
+
if 'use_single_path' in params and not isinstance(params['use_single_path'], bool):
|
|
44
|
+
errors.append("Parameter 'use_single_path' must be a boolean")
|
|
45
|
+
|
|
31
46
|
return ValidationResult(valid=len(errors) == 0, errors=errors)
|
|
32
47
|
|
|
33
48
|
def validate_files(self, files: List[Dict], specs: Dict) -> ValidationResult:
|
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
# Upload Plugin
|
|
2
|
+
|
|
3
|
+
The Upload Plugin provides comprehensive file and data upload functionality with support for various storage backends, flexible asset path configuration, and Excel metadata integration.
|
|
4
|
+
|
|
5
|
+
## Quick Start Usage
|
|
6
|
+
|
|
7
|
+
### CLI Usage Examples
|
|
8
|
+
|
|
9
|
+
#### Standard Upload (Single Directory)
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
synapse plugin run upload '{
|
|
13
|
+
"name": "Dataset Upload",
|
|
14
|
+
"storage": 1,
|
|
15
|
+
"collection": 2,
|
|
16
|
+
"use_single_path": false,
|
|
17
|
+
"assets": {
|
|
18
|
+
"path": "/data/dataset",
|
|
19
|
+
"recursive": true
|
|
20
|
+
}
|
|
21
|
+
}'
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
#### Multi-Path Upload (Different Locations)
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
synapse plugin run upload '{
|
|
28
|
+
"name": "Complex Dataset Upload",
|
|
29
|
+
"storage": 1,
|
|
30
|
+
"collection": 2,
|
|
31
|
+
"use_single_path": true,
|
|
32
|
+
"assets": {
|
|
33
|
+
"images": {"path": "/images", "recursive": true},
|
|
34
|
+
"pointclouds": {"path": "/pcd", "recursive": false},
|
|
35
|
+
"annotations": {"path": "/labels", "recursive": true}
|
|
36
|
+
},
|
|
37
|
+
"excel_metadata_path": "/metadata/dataset_info.xlsx"
|
|
38
|
+
}' --debug
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Common Use Cases
|
|
42
|
+
|
|
43
|
+
#### 1. Simple Dataset Upload
|
|
44
|
+
|
|
45
|
+
```json
|
|
46
|
+
{
|
|
47
|
+
"name": "Training Dataset",
|
|
48
|
+
"storage": 1,
|
|
49
|
+
"collection": 2,
|
|
50
|
+
"assets": {
|
|
51
|
+
"path": "/datasets/training",
|
|
52
|
+
"recursive": true
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
#### 2. Multi-Source Dataset Upload
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
{
|
|
61
|
+
"name": "Multi-Camera Dataset",
|
|
62
|
+
"storage": 1,
|
|
63
|
+
"collection": 2,
|
|
64
|
+
"use_single_path": true,
|
|
65
|
+
"assets": {
|
|
66
|
+
"front_camera": { "path": "/cameras/front", "recursive": true },
|
|
67
|
+
"rear_camera": { "path": "/cameras/rear", "recursive": true },
|
|
68
|
+
"lidar": { "path": "/sensors/lidar", "recursive": false }
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
#### 3. Dataset with Metadata
|
|
74
|
+
|
|
75
|
+
```json
|
|
76
|
+
{
|
|
77
|
+
"name": "Annotated Dataset",
|
|
78
|
+
"storage": 1,
|
|
79
|
+
"collection": 2,
|
|
80
|
+
"assets": {
|
|
81
|
+
"path": "/data/annotated",
|
|
82
|
+
"recursive": true
|
|
83
|
+
},
|
|
84
|
+
"excel_metadata_path": "/data/metadata.xlsx"
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Configuration Parameters
|
|
89
|
+
|
|
90
|
+
### Required Parameters
|
|
91
|
+
|
|
92
|
+
| Parameter | Type | Description | Example |
|
|
93
|
+
| ------------ | ------- | ----------------------------------- | ------------------ |
|
|
94
|
+
| `name` | string | Display name for the upload | `"My Dataset"` |
|
|
95
|
+
| `storage` | integer | Storage backend ID | `1` |
|
|
96
|
+
| `collection` | integer | Collection ID defining file specs | `2` |
|
|
97
|
+
| `assets` | object | Path configuration (varies by mode) | See examples below |
|
|
98
|
+
|
|
99
|
+
### Optional Parameters
|
|
100
|
+
|
|
101
|
+
| Parameter | Type | Default | Description |
|
|
102
|
+
| --------------------- | -------- | ------- | -------------------------------------------------------------------------------- |
|
|
103
|
+
| `description` | string | `null` | Upload description |
|
|
104
|
+
| `project` | integer | `null` | Project ID to associate |
|
|
105
|
+
| `use_single_path` | boolean | `false` | Enable individual path mode |
|
|
106
|
+
| `is_recursive` | boolean | `false` | Global recursive setting |
|
|
107
|
+
| `excel_metadata_path` | `string` | `null` | **DEPRECATED** - File path to Excel metadata file (use `excel_metadata` instead) |
|
|
108
|
+
| `excel_metadata` | `object` | `null` | Base64 encoded Excel metadata (recommended) |
|
|
109
|
+
|
|
110
|
+
## Excel Metadata Support
|
|
111
|
+
|
|
112
|
+
The upload plugin provides advanced Excel metadata processing with flexible header support, comprehensive filename matching, and two distinct input methods.
|
|
113
|
+
|
|
114
|
+
### Input Methods
|
|
115
|
+
|
|
116
|
+
There are two separate parameters for providing Excel metadata:
|
|
117
|
+
|
|
118
|
+
#### 1. File Path Method (`excel_metadata_path`) - **DEPRECATED**
|
|
119
|
+
|
|
120
|
+
:::warning Deprecation Notice
|
|
121
|
+
This parameter is **deprecated** and will be removed in a future version.
|
|
122
|
+
Please migrate to using the `excel_metadata` parameter with base64 encoding instead.
|
|
123
|
+
:::
|
|
124
|
+
|
|
125
|
+
**Use case:** Traditional file-based uploads where the Excel file exists on the server's file system.
|
|
126
|
+
|
|
127
|
+
Simple string path to an Excel file:
|
|
128
|
+
|
|
129
|
+
```json
|
|
130
|
+
{
|
|
131
|
+
"excel_metadata_path": "/data/metadata.xlsx"
|
|
132
|
+
}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
**Advantages:**
|
|
136
|
+
|
|
137
|
+
- Backward compatible with existing implementations
|
|
138
|
+
- Simple and straightforward
|
|
139
|
+
- Direct file system access
|
|
140
|
+
|
|
141
|
+
#### 2. Base64 Encoded Method (`excel_metadata`)
|
|
142
|
+
|
|
143
|
+
**Use case:** Web frontends, APIs, and cloud integrations where files are transmitted as encoded data.
|
|
144
|
+
|
|
145
|
+
Send Excel file as base64-encoded data with original filename:
|
|
146
|
+
|
|
147
|
+
```json
|
|
148
|
+
{
|
|
149
|
+
"excel_metadata": {
|
|
150
|
+
"data": "UEsDBBQABgAIAAAAIQDd4Z...",
|
|
151
|
+
"filename": "metadata.xlsx"
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
**Advantages:**
|
|
157
|
+
|
|
158
|
+
- No intermediate file storage required
|
|
159
|
+
- Perfect for web upload forms
|
|
160
|
+
- API-friendly JSON payload
|
|
161
|
+
- Automatic temporary file cleanup
|
|
162
|
+
- **This is the recommended method going forward**
|
|
163
|
+
|
|
164
|
+
**Important:** You cannot use both `excel_metadata_path` and `excel_metadata` at the same time
|
|
165
|
+
|
|
166
|
+
**Migration Example:**
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
import base64
|
|
170
|
+
|
|
171
|
+
# Old way (deprecated)
|
|
172
|
+
params = {
|
|
173
|
+
"excel_metadata_path": "/data/metadata.xlsx"
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
# New way (recommended)
|
|
177
|
+
with open("/data/metadata.xlsx", "rb") as f:
|
|
178
|
+
encoded = base64.b64encode(f.read()).decode("utf-8")
|
|
179
|
+
params = {
|
|
180
|
+
"excel_metadata": {
|
|
181
|
+
"data": encoded,
|
|
182
|
+
"filename": "metadata.xlsx"
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### Excel Format Example
|
|
188
|
+
|
|
189
|
+
| filename | category | quality | notes |
|
|
190
|
+
| --------- | ---------- | ------- | ----------------- |
|
|
191
|
+
| sample001 | vehicle | high | Clear visibility |
|
|
192
|
+
| sample002 | pedestrian | medium | Partial occlusion |
|
|
193
|
+
|
|
194
|
+
### Security Limits
|
|
195
|
+
|
|
196
|
+
- Max file size: 10MB
|
|
197
|
+
- Max rows: 10,000
|
|
198
|
+
- Max columns: 50
|
|
199
|
+
|
|
200
|
+
## File Matching Logic
|
|
201
|
+
|
|
202
|
+
Files are matched by **stem name** (filename without extension):
|
|
203
|
+
|
|
204
|
+
- `sample001.jpg` → stem: "sample001"
|
|
205
|
+
- `sample001.pcd` → stem: "sample001"
|
|
206
|
+
- `sample001.json` → stem: "sample001"
|
|
207
|
+
|
|
208
|
+
These files form a single dataset named "sample001".
|
|
209
|
+
|
|
210
|
+
## Troubleshooting Guide
|
|
211
|
+
|
|
212
|
+
### Common Issues
|
|
213
|
+
|
|
214
|
+
#### "No Files Found" Error
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
# Check path exists and is readable
|
|
218
|
+
ls -la /path/to/data
|
|
219
|
+
test -r /path/to/data && echo "Readable" || echo "Not readable"
|
|
220
|
+
|
|
221
|
+
# Verify files exist
|
|
222
|
+
find /path/to/data -name "*.jpg" | head -10
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
#### Excel Processing Errors
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
# Check file format and size
|
|
229
|
+
file /path/to/metadata.xlsx
|
|
230
|
+
ls -lh /path/to/metadata.xlsx
|
|
231
|
+
|
|
232
|
+
# Validate Excel content
|
|
233
|
+
python -c "
|
|
234
|
+
from openpyxl import load_workbook
|
|
235
|
+
wb = load_workbook('/path/to/metadata.xlsx')
|
|
236
|
+
print(f'Sheets: {wb.sheetnames}')
|
|
237
|
+
print(f'Rows: {wb.active.max_row}')
|
|
238
|
+
"
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
#### Upload Failures
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
# Test storage connection
|
|
245
|
+
synapse storage test --storage-id 1
|
|
246
|
+
|
|
247
|
+
# Verify collection configuration
|
|
248
|
+
synapse collection show --id 2
|
|
249
|
+
|
|
250
|
+
# Run with debug mode
|
|
251
|
+
synapse plugin run upload '{}' --debug
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
## Best Practices
|
|
255
|
+
|
|
256
|
+
### Directory Organization
|
|
257
|
+
|
|
258
|
+
- Use clear, descriptive directory names
|
|
259
|
+
- Keep reasonable directory sizes (< 10,000 files)
|
|
260
|
+
- Use absolute paths for reliability
|
|
261
|
+
|
|
262
|
+
### Performance Optimization
|
|
263
|
+
|
|
264
|
+
- Enable recursive only when needed
|
|
265
|
+
- Keep Excel files under 5MB
|
|
266
|
+
- Organize files in balanced directory structures
|
|
267
|
+
|
|
268
|
+
### Security Considerations
|
|
269
|
+
|
|
270
|
+
- Validate all paths before processing
|
|
271
|
+
- Use read-only permissions for source data
|
|
272
|
+
- Set appropriate Excel size limits
|
|
273
|
+
|
|
274
|
+
## Advanced Features
|
|
275
|
+
|
|
276
|
+
### Batch Processing
|
|
277
|
+
|
|
278
|
+
The plugin automatically optimizes batch sizes based on dataset size:
|
|
279
|
+
|
|
280
|
+
- Small datasets (< 50 files): batch size 50
|
|
281
|
+
- Large datasets: dynamic batch size (10-100)
|
|
282
|
+
|
|
283
|
+
### Progress Tracking
|
|
284
|
+
|
|
285
|
+
Real-time progress updates with categories:
|
|
286
|
+
|
|
287
|
+
- Collection analysis: 2%
|
|
288
|
+
- File upload: 38%
|
|
289
|
+
- Data unit generation: 60%
|
|
290
|
+
|
|
291
|
+
### Error Handling
|
|
292
|
+
|
|
293
|
+
Comprehensive validation at multiple levels:
|
|
294
|
+
|
|
295
|
+
- Parameter validation (Pydantic)
|
|
296
|
+
- Runtime path validation
|
|
297
|
+
- File format validation
|
|
298
|
+
- Excel security checks
|
|
299
|
+
|
|
300
|
+
## Environment Variables
|
|
301
|
+
|
|
302
|
+
Configure Excel processing limits:
|
|
303
|
+
|
|
304
|
+
```bash
|
|
305
|
+
# File size limits
|
|
306
|
+
EXCEL_MAX_FILE_SIZE_MB=10
|
|
307
|
+
EXCEL_MAX_MEMORY_MB=30
|
|
308
|
+
|
|
309
|
+
# Content limits
|
|
310
|
+
EXCEL_MAX_ROWS=10000
|
|
311
|
+
EXCEL_MAX_COLUMNS=50
|
|
312
|
+
|
|
313
|
+
# String length limits
|
|
314
|
+
EXCEL_MAX_FILENAME_LENGTH=255
|
|
315
|
+
EXCEL_MAX_METADATA_VALUE_LENGTH=1000
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
## Migration Guide
|
|
319
|
+
|
|
320
|
+
### Upgrading from Previous Versions
|
|
321
|
+
|
|
322
|
+
All existing configurations continue to work. New features are additive:
|
|
323
|
+
|
|
324
|
+
#### Test Current Configuration
|
|
325
|
+
|
|
326
|
+
```bash
|
|
327
|
+
synapse plugin run upload '{}' --debug
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
#### Convert to Explicit Mode
|
|
331
|
+
|
|
332
|
+
```python
|
|
333
|
+
# Add explicit mode setting
|
|
334
|
+
config["use_single_path"] = False # or True for single path mode
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
#### Gradual Migration to Single Path Mode
|
|
338
|
+
|
|
339
|
+
```python
|
|
340
|
+
# Start with subset
|
|
341
|
+
test_config = {
|
|
342
|
+
"use_single_path": True,
|
|
343
|
+
"assets": {
|
|
344
|
+
"test_images": {"path": "/existing/path/images", "recursive": True}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
# Then migrate all assets
|
|
349
|
+
production_config = {
|
|
350
|
+
"use_single_path": True,
|
|
351
|
+
"assets": {
|
|
352
|
+
"images": {"path": "/optimized/path1", "recursive": True},
|
|
353
|
+
"annotations": {"path": "/optimized/path2", "recursive": False}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
## Storage Backend Support
|
|
359
|
+
|
|
360
|
+
The plugin supports multiple storage backends:
|
|
361
|
+
|
|
362
|
+
- **Local filesystem**: Optimized for high I/O
|
|
363
|
+
- **S3/GCS**: Cloud storage with retry logic
|
|
364
|
+
- **SFTP**: Connection pooling for remote servers
|
|
365
|
+
- **HTTP**: Streaming uploads for large files
|
|
366
|
+
|
|
367
|
+
## API Reference
|
|
368
|
+
|
|
369
|
+
### Plugin Class
|
|
370
|
+
|
|
371
|
+
```python
|
|
372
|
+
from synapse import Plugin
|
|
373
|
+
|
|
374
|
+
plugin = Plugin("upload")
|
|
375
|
+
result = plugin.run(config, debug=True)
|
|
376
|
+
```
|
|
377
|
+
|
|
378
|
+
### Result Structure
|
|
379
|
+
|
|
380
|
+
```python
|
|
381
|
+
{
|
|
382
|
+
"status": "success",
|
|
383
|
+
"uploaded_files": 150,
|
|
384
|
+
"data_units_created": 50,
|
|
385
|
+
"errors": [],
|
|
386
|
+
"metadata": {}
|
|
387
|
+
}
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
## Support and Resources
|
|
391
|
+
|
|
392
|
+
- **Documentation**: Full API documentation at [synapse-docs]
|
|
393
|
+
- **Issues**: Report bugs at [issue-tracker]
|
|
394
|
+
- **Examples**: More examples at [examples-repo]
|
synapse_sdk/plugins/models.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from datetime import datetime
|
|
2
3
|
from functools import cached_property
|
|
3
4
|
from typing import Any, Dict
|
|
4
5
|
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
5
8
|
from synapse_sdk.clients.backend import BackendClient
|
|
6
9
|
from synapse_sdk.devtools.config import get_backend_config
|
|
7
10
|
from synapse_sdk.loggers import BackendLogger, ConsoleLogger
|
|
@@ -131,6 +134,26 @@ class Run:
|
|
|
131
134
|
context = None
|
|
132
135
|
client = None
|
|
133
136
|
|
|
137
|
+
class DevLog(BaseModel):
|
|
138
|
+
"""Model for developer log entries.
|
|
139
|
+
|
|
140
|
+
Records custom events and information that plugin developers want to track
|
|
141
|
+
during plugin execution for debugging and monitoring purposes.
|
|
142
|
+
|
|
143
|
+
Attributes:
|
|
144
|
+
event_type (str): Type/category of the development event
|
|
145
|
+
message (str): Descriptive message about the event
|
|
146
|
+
data (dict | None): Optional additional data/context
|
|
147
|
+
level (Context): Event status/severity level
|
|
148
|
+
created (str): Timestamp when event occurred
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
event_type: str
|
|
152
|
+
message: str
|
|
153
|
+
data: dict | None = None
|
|
154
|
+
level: Context
|
|
155
|
+
created: str
|
|
156
|
+
|
|
134
157
|
def __init__(self, job_id, context=None):
|
|
135
158
|
self.job_id = job_id
|
|
136
159
|
self.context = context or {}
|
|
@@ -177,5 +200,44 @@ class Run:
|
|
|
177
200
|
def log_message(self, message, context=Context.INFO.value):
|
|
178
201
|
self.logger.log('message', {'context': context, 'content': message})
|
|
179
202
|
|
|
203
|
+
def log_dev_event(self, message: str, data: dict | None = None, level: Context = Context.INFO):
|
|
204
|
+
"""Log development event for plugin developers.
|
|
205
|
+
|
|
206
|
+
This function allows plugin developers to log custom events and information
|
|
207
|
+
during plugin execution for debugging, monitoring, and development purposes.
|
|
208
|
+
The event_type is automatically constructed as '{action_name}_dev_log' and cannot
|
|
209
|
+
be modified by plugin developers.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
message (str): Descriptive message about the event
|
|
213
|
+
data (dict | None): Optional additional data or context to include
|
|
214
|
+
level (Context): Event severity level (INFO, WARNING, DANGER, SUCCESS)
|
|
215
|
+
|
|
216
|
+
Example:
|
|
217
|
+
>>> run = Run(job_id, context)
|
|
218
|
+
>>> run.log_dev_event('Data validation completed', {'records_count': 100})
|
|
219
|
+
>>> run.log_dev_event('Processing time recorded', {'duration_ms': 1500})
|
|
220
|
+
>>> run.log_dev_event('Variable state at checkpoint', {'variable_x': 42}, level=Context.WARNING)
|
|
221
|
+
"""
|
|
222
|
+
# Construct event_type from action name - this cannot be modified by developers
|
|
223
|
+
action_name = self.context.get('action_name', 'unknown')
|
|
224
|
+
event_type = f'{action_name}_dev_log'
|
|
225
|
+
|
|
226
|
+
# Log the message for basic logging
|
|
227
|
+
self.log_message(f'[{event_type.upper()}] {message}', context=level.value)
|
|
228
|
+
|
|
229
|
+
# Also log the structured event for development tracking
|
|
230
|
+
now = datetime.now().isoformat()
|
|
231
|
+
self.log(
|
|
232
|
+
'dev_event',
|
|
233
|
+
self.DevLog(
|
|
234
|
+
event_type=event_type,
|
|
235
|
+
message=message,
|
|
236
|
+
data=data,
|
|
237
|
+
level=level,
|
|
238
|
+
created=now,
|
|
239
|
+
).model_dump(),
|
|
240
|
+
)
|
|
241
|
+
|
|
180
242
|
def end_log(self):
|
|
181
243
|
self.log_message('Plugin run is complete.')
|