synapse-sdk 2025.10.5__py3-none-any.whl → 2025.10.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/clients/base.py +129 -9
- synapse_sdk/devtools/docs/docs/api/clients/base.md +230 -8
- synapse_sdk/devtools/docs/docs/plugins/categories/neural-net-plugins/train-action-overview.md +663 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/base.md +230 -8
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/neural-net-plugins/train-action-overview.md +621 -0
- synapse_sdk/devtools/docs/sidebars.ts +11 -0
- synapse_sdk/plugins/categories/neural_net/actions/train.py +592 -22
- synapse_sdk/plugins/categories/neural_net/actions/tune.py +150 -3
- synapse_sdk/plugins/categories/upload/templates/README.md +61 -32
- synapse_sdk/utils/file/download.py +261 -0
- {synapse_sdk-2025.10.5.dist-info → synapse_sdk-2025.10.6.dist-info}/METADATA +1 -1
- {synapse_sdk-2025.10.5.dist-info → synapse_sdk-2025.10.6.dist-info}/RECORD +16 -14
- {synapse_sdk-2025.10.5.dist-info → synapse_sdk-2025.10.6.dist-info}/WHEEL +0 -0
- {synapse_sdk-2025.10.5.dist-info → synapse_sdk-2025.10.6.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-2025.10.5.dist-info → synapse_sdk-2025.10.6.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-2025.10.5.dist-info → synapse_sdk-2025.10.6.dist-info}/top_level.txt +0 -0
|
@@ -23,16 +23,86 @@ class TuneRun(TrainRun):
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class SearchAlgo(BaseModel):
|
|
26
|
+
"""
|
|
27
|
+
Configuration for Ray Tune search algorithms.
|
|
28
|
+
|
|
29
|
+
Supported algorithms:
|
|
30
|
+
- 'bayesoptsearch': Bayesian optimization using Gaussian Processes
|
|
31
|
+
- 'hyperoptsearch': Tree-structured Parzen Estimator (TPE)
|
|
32
|
+
- 'basicvariantgenerator': Random search (default)
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
name (str): Name of the search algorithm (case-insensitive)
|
|
36
|
+
points_to_evaluate (Optional[dict]): Optional initial hyperparameter
|
|
37
|
+
configurations to evaluate before starting optimization
|
|
38
|
+
|
|
39
|
+
Example:
|
|
40
|
+
{
|
|
41
|
+
"name": "hyperoptsearch",
|
|
42
|
+
"points_to_evaluate": [
|
|
43
|
+
{"learning_rate": 0.001, "batch_size": 32}
|
|
44
|
+
]
|
|
45
|
+
}
|
|
46
|
+
"""
|
|
47
|
+
|
|
26
48
|
name: str
|
|
27
49
|
points_to_evaluate: Optional[dict] = None
|
|
28
50
|
|
|
29
51
|
|
|
30
52
|
class Scheduler(BaseModel):
|
|
53
|
+
"""
|
|
54
|
+
Configuration for Ray Tune schedulers.
|
|
55
|
+
|
|
56
|
+
Supported schedulers:
|
|
57
|
+
- 'fifo': First-In-First-Out scheduler (default, runs all trials)
|
|
58
|
+
- 'hyperband': HyperBand early stopping scheduler
|
|
59
|
+
|
|
60
|
+
Attributes:
|
|
61
|
+
name (str): Name of the scheduler (case-insensitive)
|
|
62
|
+
options (Optional[str]): Optional scheduler-specific configuration parameters
|
|
63
|
+
|
|
64
|
+
Example:
|
|
65
|
+
{
|
|
66
|
+
"name": "hyperband",
|
|
67
|
+
"options": {
|
|
68
|
+
"max_t": 100,
|
|
69
|
+
"reduction_factor": 3
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
"""
|
|
73
|
+
|
|
31
74
|
name: str
|
|
32
75
|
options: Optional[str] = None
|
|
33
76
|
|
|
34
77
|
|
|
35
78
|
class TuneConfig(BaseModel):
|
|
79
|
+
"""
|
|
80
|
+
Configuration for Ray Tune hyperparameter optimization.
|
|
81
|
+
|
|
82
|
+
Attributes:
|
|
83
|
+
mode (Optional[str]): Optimization mode - 'max' or 'min'
|
|
84
|
+
metric (Optional[str]): Name of the metric to optimize
|
|
85
|
+
num_samples (int): Number of hyperparameter configurations to try (default: 1)
|
|
86
|
+
max_concurrent_trials (Optional[int]): Maximum number of trials to run in parallel
|
|
87
|
+
search_alg (Optional[SearchAlgo]): Search algorithm configuration
|
|
88
|
+
scheduler (Optional[Scheduler]): Trial scheduler configuration
|
|
89
|
+
|
|
90
|
+
Example:
|
|
91
|
+
{
|
|
92
|
+
"mode": "max",
|
|
93
|
+
"metric": "accuracy",
|
|
94
|
+
"num_samples": 20,
|
|
95
|
+
"max_concurrent_trials": 4,
|
|
96
|
+
"search_alg": {
|
|
97
|
+
"name": "hyperoptsearch"
|
|
98
|
+
},
|
|
99
|
+
"scheduler": {
|
|
100
|
+
"name": "hyperband",
|
|
101
|
+
"options": {"max_t": 100}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
"""
|
|
105
|
+
|
|
36
106
|
mode: Optional[str] = None
|
|
37
107
|
metric: Optional[str] = None
|
|
38
108
|
num_samples: int = 1
|
|
@@ -42,10 +112,51 @@ class TuneConfig(BaseModel):
|
|
|
42
112
|
|
|
43
113
|
|
|
44
114
|
class TuneParams(BaseModel):
|
|
115
|
+
"""
|
|
116
|
+
Parameters for TuneAction (DEPRECATED - use TrainAction with is_tune=True instead).
|
|
117
|
+
|
|
118
|
+
Attributes:
|
|
119
|
+
name (str): Name for the tuning job
|
|
120
|
+
description (str): Description of the job
|
|
121
|
+
checkpoint (int | None): Optional checkpoint ID to resume from
|
|
122
|
+
dataset (int): Dataset ID to use for training
|
|
123
|
+
hyperparameter (list): Hyperparameter search space
|
|
124
|
+
tune_config (TuneConfig): Tune configuration
|
|
125
|
+
|
|
126
|
+
Hyperparameter format:
|
|
127
|
+
Each item in hyperparameter list must have:
|
|
128
|
+
- 'name': Parameter name (string)
|
|
129
|
+
- 'type': Distribution type (string)
|
|
130
|
+
- Type-specific parameters:
|
|
131
|
+
- uniform/quniform: 'min', 'max'
|
|
132
|
+
- loguniform/qloguniform: 'min', 'max', 'base'
|
|
133
|
+
- randn/qrandn: 'mean', 'sd'
|
|
134
|
+
- randint/qrandint: 'min', 'max'
|
|
135
|
+
- lograndint/qlograndint: 'min', 'max', 'base'
|
|
136
|
+
- choice/grid_search: 'options'
|
|
137
|
+
|
|
138
|
+
Example:
|
|
139
|
+
{
|
|
140
|
+
"name": "my_tuning",
|
|
141
|
+
"dataset": 123,
|
|
142
|
+
"hyperparameter": [
|
|
143
|
+
{"name": "batch_size", "type": "choice", "options": [16, 32, 64]},
|
|
144
|
+
{"name": "learning_rate", "type": "loguniform", "min": 0.0001, "max": 0.01, "base": 10},
|
|
145
|
+
{"name": "epochs", "type": "randint", "min": 5, "max": 15}
|
|
146
|
+
],
|
|
147
|
+
"tune_config": {
|
|
148
|
+
"mode": "max",
|
|
149
|
+
"metric": "accuracy",
|
|
150
|
+
"num_samples": 10
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
"""
|
|
154
|
+
|
|
45
155
|
name: Annotated[str, AfterValidator(non_blank)]
|
|
46
156
|
description: str
|
|
47
157
|
checkpoint: int | None
|
|
48
158
|
dataset: int
|
|
159
|
+
hyperparameter: list
|
|
49
160
|
tune_config: TuneConfig
|
|
50
161
|
|
|
51
162
|
@field_validator('name')
|
|
@@ -73,6 +184,23 @@ class TuneParams(BaseModel):
|
|
|
73
184
|
@register_action
|
|
74
185
|
class TuneAction(TrainAction):
|
|
75
186
|
"""
|
|
187
|
+
**DEPRECATED**: This action is deprecated. Please use TrainAction with is_tune=True instead.
|
|
188
|
+
|
|
189
|
+
To migrate from tune to train with tuning:
|
|
190
|
+
- Change action from "tune" to "train"
|
|
191
|
+
- Add "is_tune": true to params
|
|
192
|
+
- Keep tune_config and hyperparameter as they are
|
|
193
|
+
|
|
194
|
+
Example:
|
|
195
|
+
{
|
|
196
|
+
"action": "train",
|
|
197
|
+
"params": {
|
|
198
|
+
"is_tune": true,
|
|
199
|
+
"tune_config": { ... },
|
|
200
|
+
"hyperparameter": [ ... ]
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
76
204
|
**Must read** Important notes before using Tune:
|
|
77
205
|
|
|
78
206
|
1. Path to the model output (which is the return value of your train function)
|
|
@@ -256,6 +384,10 @@ class TuneAction(TrainAction):
|
|
|
256
384
|
|
|
257
385
|
Returns:
|
|
258
386
|
object: Ray Tune scheduler instance.
|
|
387
|
+
|
|
388
|
+
Supported schedulers:
|
|
389
|
+
- 'fifo': FIFOScheduler (default)
|
|
390
|
+
- 'hyperband': HyperBandScheduler
|
|
259
391
|
"""
|
|
260
392
|
|
|
261
393
|
from ray.tune.schedulers import (
|
|
@@ -278,7 +410,12 @@ class TuneAction(TrainAction):
|
|
|
278
410
|
}
|
|
279
411
|
|
|
280
412
|
scheduler_type = tune_config['scheduler'].get('name', 'fifo').lower()
|
|
281
|
-
scheduler_class = scheduler_map.get(scheduler_type
|
|
413
|
+
scheduler_class = scheduler_map.get(scheduler_type)
|
|
414
|
+
|
|
415
|
+
if scheduler_class is None:
|
|
416
|
+
raise ValueError(
|
|
417
|
+
f'Unsupported scheduler: {scheduler_type}. Supported schedulers are: {", ".join(scheduler_map.keys())}'
|
|
418
|
+
)
|
|
282
419
|
|
|
283
420
|
# 옵션이 있는 경우 전달하고, 없으면 기본 생성자 호출
|
|
284
421
|
options = tune_config['scheduler'].get('options')
|
|
@@ -291,13 +428,18 @@ class TuneAction(TrainAction):
|
|
|
291
428
|
@staticmethod
|
|
292
429
|
def convert_tune_search_alg(tune_config):
|
|
293
430
|
"""
|
|
294
|
-
Convert YAML hyperparameter configuration to Ray Tune search algorithm
|
|
431
|
+
Convert YAML hyperparameter configuration to Ray Tune search algorithm.
|
|
295
432
|
|
|
296
433
|
Args:
|
|
297
434
|
tune_config (dict): Hyperparameter configuration.
|
|
298
435
|
|
|
299
436
|
Returns:
|
|
300
|
-
|
|
437
|
+
object: Ray Tune search algorithm instance or None
|
|
438
|
+
|
|
439
|
+
Supported search algorithms:
|
|
440
|
+
- 'bayesoptsearch': Bayesian optimization
|
|
441
|
+
- 'hyperoptsearch': Tree-structured Parzen Estimator
|
|
442
|
+
- 'basicvariantgenerator': Random search (default)
|
|
301
443
|
"""
|
|
302
444
|
|
|
303
445
|
if tune_config.get('search_alg') is None:
|
|
@@ -328,6 +470,11 @@ class TuneAction(TrainAction):
|
|
|
328
470
|
from ray.tune.search.basic_variant import BasicVariantGenerator
|
|
329
471
|
|
|
330
472
|
search_alg = BasicVariantGenerator(points_to_evaluate=points_to_evaluate)
|
|
473
|
+
else:
|
|
474
|
+
raise ValueError(
|
|
475
|
+
f'Unsupported search algorithm: {search_alg_name}. '
|
|
476
|
+
f'Supported algorithms are: bayesoptsearch, hyperoptsearch, basicvariantgenerator'
|
|
477
|
+
)
|
|
331
478
|
|
|
332
479
|
return search_alg
|
|
333
480
|
|
|
@@ -7,6 +7,7 @@ The Upload Plugin provides comprehensive file and data upload functionality with
|
|
|
7
7
|
### CLI Usage Examples
|
|
8
8
|
|
|
9
9
|
#### Standard Upload (Single Directory)
|
|
10
|
+
|
|
10
11
|
```bash
|
|
11
12
|
synapse plugin run upload '{
|
|
12
13
|
"name": "Dataset Upload",
|
|
@@ -21,6 +22,7 @@ synapse plugin run upload '{
|
|
|
21
22
|
```
|
|
22
23
|
|
|
23
24
|
#### Multi-Path Upload (Different Locations)
|
|
25
|
+
|
|
24
26
|
```bash
|
|
25
27
|
synapse plugin run upload '{
|
|
26
28
|
"name": "Complex Dataset Upload",
|
|
@@ -39,6 +41,7 @@ synapse plugin run upload '{
|
|
|
39
41
|
### Common Use Cases
|
|
40
42
|
|
|
41
43
|
#### 1. Simple Dataset Upload
|
|
44
|
+
|
|
42
45
|
```json
|
|
43
46
|
{
|
|
44
47
|
"name": "Training Dataset",
|
|
@@ -52,6 +55,7 @@ synapse plugin run upload '{
|
|
|
52
55
|
```
|
|
53
56
|
|
|
54
57
|
#### 2. Multi-Source Dataset Upload
|
|
58
|
+
|
|
55
59
|
```json
|
|
56
60
|
{
|
|
57
61
|
"name": "Multi-Camera Dataset",
|
|
@@ -59,14 +63,15 @@ synapse plugin run upload '{
|
|
|
59
63
|
"collection": 2,
|
|
60
64
|
"use_single_path": true,
|
|
61
65
|
"assets": {
|
|
62
|
-
"front_camera": {"path": "/cameras/front", "recursive": true},
|
|
63
|
-
"rear_camera": {"path": "/cameras/rear", "recursive": true},
|
|
64
|
-
"lidar": {"path": "/sensors/lidar", "recursive": false}
|
|
66
|
+
"front_camera": { "path": "/cameras/front", "recursive": true },
|
|
67
|
+
"rear_camera": { "path": "/cameras/rear", "recursive": true },
|
|
68
|
+
"lidar": { "path": "/sensors/lidar", "recursive": false }
|
|
65
69
|
}
|
|
66
70
|
}
|
|
67
71
|
```
|
|
68
72
|
|
|
69
73
|
#### 3. Dataset with Metadata
|
|
74
|
+
|
|
70
75
|
```json
|
|
71
76
|
{
|
|
72
77
|
"name": "Annotated Dataset",
|
|
@@ -84,23 +89,23 @@ synapse plugin run upload '{
|
|
|
84
89
|
|
|
85
90
|
### Required Parameters
|
|
86
91
|
|
|
87
|
-
| Parameter
|
|
88
|
-
|
|
89
|
-
| `name`
|
|
90
|
-
| `storage`
|
|
91
|
-
| `collection` | integer | Collection ID defining file specs
|
|
92
|
-
| `assets`
|
|
92
|
+
| Parameter | Type | Description | Example |
|
|
93
|
+
| ------------ | ------- | ----------------------------------- | ------------------ |
|
|
94
|
+
| `name` | string | Display name for the upload | `"My Dataset"` |
|
|
95
|
+
| `storage` | integer | Storage backend ID | `1` |
|
|
96
|
+
| `collection` | integer | Collection ID defining file specs | `2` |
|
|
97
|
+
| `assets` | object | Path configuration (varies by mode) | See examples below |
|
|
93
98
|
|
|
94
99
|
### Optional Parameters
|
|
95
100
|
|
|
96
|
-
| Parameter
|
|
97
|
-
|
|
98
|
-
| `description`
|
|
99
|
-
| `project`
|
|
100
|
-
| `use_single_path`
|
|
101
|
-
| `is_recursive`
|
|
102
|
-
| `excel_metadata_path` | `string` | `null`
|
|
103
|
-
| `excel_metadata`
|
|
101
|
+
| Parameter | Type | Default | Description |
|
|
102
|
+
| --------------------- | -------- | ------- | -------------------------------------------------------------------------------- |
|
|
103
|
+
| `description` | string | `null` | Upload description |
|
|
104
|
+
| `project` | integer | `null` | Project ID to associate |
|
|
105
|
+
| `use_single_path` | boolean | `false` | Enable individual path mode |
|
|
106
|
+
| `is_recursive` | boolean | `false` | Global recursive setting |
|
|
107
|
+
| `excel_metadata_path` | `string` | `null` | **DEPRECATED** - File path to Excel metadata file (use `excel_metadata` instead) |
|
|
108
|
+
| `excel_metadata` | `object` | `null` | Base64 encoded Excel metadata (recommended) |
|
|
104
109
|
|
|
105
110
|
## Excel Metadata Support
|
|
106
111
|
|
|
@@ -110,11 +115,11 @@ The upload plugin provides advanced Excel metadata processing with flexible head
|
|
|
110
115
|
|
|
111
116
|
There are two separate parameters for providing Excel metadata:
|
|
112
117
|
|
|
113
|
-
#### 1. File Path Method (`excel_metadata_path`)
|
|
118
|
+
#### 1. File Path Method (`excel_metadata_path`) - **DEPRECATED**
|
|
114
119
|
|
|
115
|
-
:::
|
|
116
|
-
This parameter will be
|
|
117
|
-
|
|
120
|
+
:::warning Deprecation Notice
|
|
121
|
+
This parameter is **deprecated** and will be removed in a future version.
|
|
122
|
+
Please migrate to using the `excel_metadata` parameter with base64 encoding instead.
|
|
118
123
|
:::
|
|
119
124
|
|
|
120
125
|
**Use case:** Traditional file-based uploads where the Excel file exists on the server's file system.
|
|
@@ -128,13 +133,12 @@ Simple string path to an Excel file:
|
|
|
128
133
|
```
|
|
129
134
|
|
|
130
135
|
**Advantages:**
|
|
136
|
+
|
|
131
137
|
- Backward compatible with existing implementations
|
|
132
138
|
- Simple and straightforward
|
|
133
139
|
- Direct file system access
|
|
134
140
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
#### 2. Base64 Encoded Method (`excel_metadata`) - **RECOMMENDED**
|
|
141
|
+
#### 2. Base64 Encoded Method (`excel_metadata`)
|
|
138
142
|
|
|
139
143
|
**Use case:** Web frontends, APIs, and cloud integrations where files are transmitted as encoded data.
|
|
140
144
|
|
|
@@ -150,6 +154,7 @@ Send Excel file as base64-encoded data with original filename:
|
|
|
150
154
|
```
|
|
151
155
|
|
|
152
156
|
**Advantages:**
|
|
157
|
+
|
|
153
158
|
- No intermediate file storage required
|
|
154
159
|
- Perfect for web upload forms
|
|
155
160
|
- API-friendly JSON payload
|
|
@@ -158,16 +163,17 @@ Send Excel file as base64-encoded data with original filename:
|
|
|
158
163
|
|
|
159
164
|
**Important:** You cannot use both `excel_metadata_path` and `excel_metadata` at the same time
|
|
160
165
|
|
|
161
|
-
**
|
|
166
|
+
**Migration Example:**
|
|
167
|
+
|
|
162
168
|
```python
|
|
163
169
|
import base64
|
|
164
170
|
|
|
165
|
-
#
|
|
171
|
+
# Old way (deprecated)
|
|
166
172
|
params = {
|
|
167
173
|
"excel_metadata_path": "/data/metadata.xlsx"
|
|
168
174
|
}
|
|
169
175
|
|
|
170
|
-
#
|
|
176
|
+
# New way (recommended)
|
|
171
177
|
with open("/data/metadata.xlsx", "rb") as f:
|
|
172
178
|
encoded = base64.b64encode(f.read()).decode("utf-8")
|
|
173
179
|
params = {
|
|
@@ -179,12 +185,14 @@ params = {
|
|
|
179
185
|
```
|
|
180
186
|
|
|
181
187
|
### Excel Format Example
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
|
185
|
-
|
|
|
188
|
+
|
|
189
|
+
| filename | category | quality | notes |
|
|
190
|
+
| --------- | ---------- | ------- | ----------------- |
|
|
191
|
+
| sample001 | vehicle | high | Clear visibility |
|
|
192
|
+
| sample002 | pedestrian | medium | Partial occlusion |
|
|
186
193
|
|
|
187
194
|
### Security Limits
|
|
195
|
+
|
|
188
196
|
- Max file size: 10MB
|
|
189
197
|
- Max rows: 10,000
|
|
190
198
|
- Max columns: 50
|
|
@@ -192,6 +200,7 @@ params = {
|
|
|
192
200
|
## File Matching Logic
|
|
193
201
|
|
|
194
202
|
Files are matched by **stem name** (filename without extension):
|
|
203
|
+
|
|
195
204
|
- `sample001.jpg` → stem: "sample001"
|
|
196
205
|
- `sample001.pcd` → stem: "sample001"
|
|
197
206
|
- `sample001.json` → stem: "sample001"
|
|
@@ -203,6 +212,7 @@ These files form a single dataset named "sample001".
|
|
|
203
212
|
### Common Issues
|
|
204
213
|
|
|
205
214
|
#### "No Files Found" Error
|
|
215
|
+
|
|
206
216
|
```bash
|
|
207
217
|
# Check path exists and is readable
|
|
208
218
|
ls -la /path/to/data
|
|
@@ -213,6 +223,7 @@ find /path/to/data -name "*.jpg" | head -10
|
|
|
213
223
|
```
|
|
214
224
|
|
|
215
225
|
#### Excel Processing Errors
|
|
226
|
+
|
|
216
227
|
```bash
|
|
217
228
|
# Check file format and size
|
|
218
229
|
file /path/to/metadata.xlsx
|
|
@@ -228,6 +239,7 @@ print(f'Rows: {wb.active.max_row}')
|
|
|
228
239
|
```
|
|
229
240
|
|
|
230
241
|
#### Upload Failures
|
|
242
|
+
|
|
231
243
|
```bash
|
|
232
244
|
# Test storage connection
|
|
233
245
|
synapse storage test --storage-id 1
|
|
@@ -242,16 +254,19 @@ synapse plugin run upload '{}' --debug
|
|
|
242
254
|
## Best Practices
|
|
243
255
|
|
|
244
256
|
### Directory Organization
|
|
257
|
+
|
|
245
258
|
- Use clear, descriptive directory names
|
|
246
259
|
- Keep reasonable directory sizes (< 10,000 files)
|
|
247
260
|
- Use absolute paths for reliability
|
|
248
261
|
|
|
249
262
|
### Performance Optimization
|
|
263
|
+
|
|
250
264
|
- Enable recursive only when needed
|
|
251
265
|
- Keep Excel files under 5MB
|
|
252
266
|
- Organize files in balanced directory structures
|
|
253
267
|
|
|
254
268
|
### Security Considerations
|
|
269
|
+
|
|
255
270
|
- Validate all paths before processing
|
|
256
271
|
- Use read-only permissions for source data
|
|
257
272
|
- Set appropriate Excel size limits
|
|
@@ -259,18 +274,24 @@ synapse plugin run upload '{}' --debug
|
|
|
259
274
|
## Advanced Features
|
|
260
275
|
|
|
261
276
|
### Batch Processing
|
|
277
|
+
|
|
262
278
|
The plugin automatically optimizes batch sizes based on dataset size:
|
|
279
|
+
|
|
263
280
|
- Small datasets (< 50 files): batch size 50
|
|
264
281
|
- Large datasets: dynamic batch size (10-100)
|
|
265
282
|
|
|
266
283
|
### Progress Tracking
|
|
284
|
+
|
|
267
285
|
Real-time progress updates with categories:
|
|
286
|
+
|
|
268
287
|
- Collection analysis: 2%
|
|
269
288
|
- File upload: 38%
|
|
270
289
|
- Data unit generation: 60%
|
|
271
290
|
|
|
272
291
|
### Error Handling
|
|
292
|
+
|
|
273
293
|
Comprehensive validation at multiple levels:
|
|
294
|
+
|
|
274
295
|
- Parameter validation (Pydantic)
|
|
275
296
|
- Runtime path validation
|
|
276
297
|
- File format validation
|
|
@@ -279,6 +300,7 @@ Comprehensive validation at multiple levels:
|
|
|
279
300
|
## Environment Variables
|
|
280
301
|
|
|
281
302
|
Configure Excel processing limits:
|
|
303
|
+
|
|
282
304
|
```bash
|
|
283
305
|
# File size limits
|
|
284
306
|
EXCEL_MAX_FILE_SIZE_MB=10
|
|
@@ -296,20 +318,24 @@ EXCEL_MAX_METADATA_VALUE_LENGTH=1000
|
|
|
296
318
|
## Migration Guide
|
|
297
319
|
|
|
298
320
|
### Upgrading from Previous Versions
|
|
321
|
+
|
|
299
322
|
All existing configurations continue to work. New features are additive:
|
|
300
323
|
|
|
301
324
|
#### Test Current Configuration
|
|
325
|
+
|
|
302
326
|
```bash
|
|
303
327
|
synapse plugin run upload '{}' --debug
|
|
304
328
|
```
|
|
305
329
|
|
|
306
330
|
#### Convert to Explicit Mode
|
|
331
|
+
|
|
307
332
|
```python
|
|
308
333
|
# Add explicit mode setting
|
|
309
334
|
config["use_single_path"] = False # or True for single path mode
|
|
310
335
|
```
|
|
311
336
|
|
|
312
337
|
#### Gradual Migration to Single Path Mode
|
|
338
|
+
|
|
313
339
|
```python
|
|
314
340
|
# Start with subset
|
|
315
341
|
test_config = {
|
|
@@ -332,6 +358,7 @@ production_config = {
|
|
|
332
358
|
## Storage Backend Support
|
|
333
359
|
|
|
334
360
|
The plugin supports multiple storage backends:
|
|
361
|
+
|
|
335
362
|
- **Local filesystem**: Optimized for high I/O
|
|
336
363
|
- **S3/GCS**: Cloud storage with retry logic
|
|
337
364
|
- **SFTP**: Connection pooling for remote servers
|
|
@@ -340,6 +367,7 @@ The plugin supports multiple storage backends:
|
|
|
340
367
|
## API Reference
|
|
341
368
|
|
|
342
369
|
### Plugin Class
|
|
370
|
+
|
|
343
371
|
```python
|
|
344
372
|
from synapse import Plugin
|
|
345
373
|
|
|
@@ -348,6 +376,7 @@ result = plugin.run(config, debug=True)
|
|
|
348
376
|
```
|
|
349
377
|
|
|
350
378
|
### Result Structure
|
|
379
|
+
|
|
351
380
|
```python
|
|
352
381
|
{
|
|
353
382
|
"status": "success",
|
|
@@ -362,4 +391,4 @@ result = plugin.run(config, debug=True)
|
|
|
362
391
|
|
|
363
392
|
- **Documentation**: Full API documentation at [synapse-docs]
|
|
364
393
|
- **Issues**: Report bugs at [issue-tracker]
|
|
365
|
-
- **Examples**: More examples at [examples-repo]
|
|
394
|
+
- **Examples**: More examples at [examples-repo]
|