synapse-sdk 1.0.0b22__py3-none-any.whl → 1.0.0b23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +680 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +897 -0
- synapse_sdk/devtools/docs/sidebars.ts +1 -0
- synapse_sdk/plugins/README.md +934 -0
- synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +20 -0
- synapse_sdk/plugins/categories/upload/actions/upload/action.py +623 -0
- synapse_sdk/plugins/categories/upload/actions/upload/enums.py +221 -0
- synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
- synapse_sdk/plugins/categories/upload/actions/upload/models.py +149 -0
- synapse_sdk/plugins/categories/upload/actions/upload/run.py +178 -0
- synapse_sdk/plugins/categories/upload/actions/upload/utils.py +139 -0
- synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +6 -1
- synapse_sdk/plugins/models.py +13 -7
- {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b23.dist-info}/METADATA +1 -1
- {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b23.dist-info}/RECORD +19 -10
- synapse_sdk/plugins/categories/upload/actions/upload.py +0 -1368
- {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b23.dist-info}/WHEEL +0 -0
- {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b23.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b23.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b23.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,680 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: upload-plugins
|
|
3
|
+
title: Upload Plugins
|
|
4
|
+
sidebar_position: 3
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Upload Plugins
|
|
8
|
+
|
|
9
|
+
Upload plugins provide file upload and data ingestion operations for processing files into the Synapse platform with comprehensive metadata support, security validation, and organized data unit generation.
|
|
10
|
+
|
|
11
|
+
## Overview
|
|
12
|
+
|
|
13
|
+
**Available Actions:**
|
|
14
|
+
|
|
15
|
+
- `upload` - Upload files and directories to storage with optional Excel metadata processing
|
|
16
|
+
|
|
17
|
+
**Use Cases:**
|
|
18
|
+
|
|
19
|
+
- Bulk file uploads with metadata annotation
|
|
20
|
+
- Excel-based metadata mapping and validation
|
|
21
|
+
- Recursive directory processing
|
|
22
|
+
- Type-based file organization
|
|
23
|
+
- Batch data unit creation
|
|
24
|
+
- Secure file processing with size and content validation
|
|
25
|
+
|
|
26
|
+
**Supported Upload Sources:**
|
|
27
|
+
|
|
28
|
+
- Local file system paths (files and directories)
|
|
29
|
+
- Recursive directory scanning
|
|
30
|
+
- Excel metadata files for enhanced file annotation
|
|
31
|
+
- Mixed file types with automatic organization
|
|
32
|
+
|
|
33
|
+
## Upload Action Architecture
|
|
34
|
+
|
|
35
|
+
The upload system uses a modular architecture with specialized components for different aspects of file processing:
|
|
36
|
+
|
|
37
|
+
```mermaid
|
|
38
|
+
classDiagram
|
|
39
|
+
%% Light/Dark mode compatible colors with semi-transparency
|
|
40
|
+
classDef baseClass fill:#e1f5fe80,stroke:#0288d1,stroke-width:2px
|
|
41
|
+
classDef childClass fill:#c8e6c980,stroke:#388e3c,stroke-width:2px
|
|
42
|
+
classDef modelClass fill:#fff9c480,stroke:#f57c00,stroke-width:2px
|
|
43
|
+
classDef utilClass fill:#f5f5f580,stroke:#616161,stroke-width:2px
|
|
44
|
+
classDef enumClass fill:#ffccbc80,stroke:#d32f2f,stroke-width:2px
|
|
45
|
+
|
|
46
|
+
class UploadAction {
|
|
47
|
+
+name: str = "upload"
|
|
48
|
+
+category: PluginCategory.UPLOAD
|
|
49
|
+
+method: RunMethod.JOB
|
|
50
|
+
+run_class: UploadRun
|
|
51
|
+
+params_model: UploadParams
|
|
52
|
+
+progress_categories: dict
|
|
53
|
+
+metrics_categories: dict
|
|
54
|
+
|
|
55
|
+
+start() dict
|
|
56
|
+
+get_uploader(...) object
|
|
57
|
+
+_discover_files_recursive(path) List[Path]
|
|
58
|
+
+_discover_files_non_recursive(path) List[Path]
|
|
59
|
+
+_validate_excel_security(path) None
|
|
60
|
+
+_process_excel_metadata(path) dict
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
class UploadRun {
|
|
64
|
+
+log_message_with_code(code, args, level) None
|
|
65
|
+
+log_upload_event(code, args, level) None
|
|
66
|
+
+UploadEventLog: BaseModel
|
|
67
|
+
+DataFileLog: BaseModel
|
|
68
|
+
+DataUnitLog: BaseModel
|
|
69
|
+
+TaskLog: BaseModel
|
|
70
|
+
+MetricsRecord: BaseModel
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
class UploadParams {
|
|
74
|
+
+name: str
|
|
75
|
+
+description: str | None
|
|
76
|
+
+path: str
|
|
77
|
+
+storage: int
|
|
78
|
+
+collection: int
|
|
79
|
+
+project: int | None
|
|
80
|
+
+excel_metadata_path: str | None
|
|
81
|
+
+is_recursive: bool = False
|
|
82
|
+
+max_file_size_mb: int = 50
|
|
83
|
+
+creating_data_unit_batch_size: int = 100
|
|
84
|
+
+use_async_upload: bool = True
|
|
85
|
+
|
|
86
|
+
+check_storage_exists(value) str
|
|
87
|
+
+check_collection_exists(value) str
|
|
88
|
+
+check_project_exists(value) str
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
class ExcelSecurityConfig {
|
|
92
|
+
+MAX_FILE_SIZE_MB: int
|
|
93
|
+
+MAX_FILE_SIZE_BYTES: int
|
|
94
|
+
+MAX_MEMORY_USAGE_MB: int
|
|
95
|
+
+MAX_MEMORY_USAGE_BYTES: int
|
|
96
|
+
+MAX_ROWS: int
|
|
97
|
+
+MAX_COLUMNS: int
|
|
98
|
+
+MAX_FILENAME_LENGTH: int
|
|
99
|
+
+MAX_COLUMN_NAME_LENGTH: int
|
|
100
|
+
+MAX_METADATA_VALUE_LENGTH: int
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
class ExcelMetadataUtils {
|
|
104
|
+
+config: ExcelSecurityConfig
|
|
105
|
+
+validate_and_truncate_string(value, max_length) str
|
|
106
|
+
+is_valid_filename_length(filename) bool
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
class LogCode {
|
|
110
|
+
+VALIDATION_FAILED: str
|
|
111
|
+
+NO_FILES_FOUND: str
|
|
112
|
+
+EXCEL_SECURITY_VIOLATION: str
|
|
113
|
+
+EXCEL_PARSING_ERROR: str
|
|
114
|
+
+FILES_DISCOVERED: str
|
|
115
|
+
+UPLOADING_DATA_FILES: str
|
|
116
|
+
+GENERATING_DATA_UNITS: str
|
|
117
|
+
+IMPORT_COMPLETED: str
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
class UploadStatus {
|
|
121
|
+
+SUCCESS: str = "success"
|
|
122
|
+
+FAILED: str = "failed"
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
%% Relationships
|
|
126
|
+
UploadAction --> UploadRun : uses
|
|
127
|
+
UploadAction --> UploadParams : validates with
|
|
128
|
+
UploadAction --> ExcelSecurityConfig : configures
|
|
129
|
+
UploadAction --> ExcelMetadataUtils : processes with
|
|
130
|
+
UploadRun --> LogCode : logs with
|
|
131
|
+
UploadRun --> UploadStatus : tracks status
|
|
132
|
+
ExcelMetadataUtils --> ExcelSecurityConfig : validates against
|
|
133
|
+
|
|
134
|
+
%% Apply styles
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Upload Processing Flow
|
|
138
|
+
|
|
139
|
+
This flowchart shows the complete execution flow of upload operations:
|
|
140
|
+
|
|
141
|
+
```mermaid
|
|
142
|
+
flowchart TD
|
|
143
|
+
%% Start
|
|
144
|
+
A[Upload Action Started] --> B[Validate Parameters]
|
|
145
|
+
B --> C[Setup Output Paths]
|
|
146
|
+
C --> D[Discover Files]
|
|
147
|
+
|
|
148
|
+
%% File Discovery
|
|
149
|
+
D --> E{Recursive Mode?}
|
|
150
|
+
E -->|Yes| F[Scan Recursively]
|
|
151
|
+
E -->|No| G[Scan Directory Only]
|
|
152
|
+
F --> H[Collect All Files]
|
|
153
|
+
G --> H
|
|
154
|
+
|
|
155
|
+
%% Excel Processing
|
|
156
|
+
H --> I{Excel Metadata?}
|
|
157
|
+
I -->|Yes| J[Validate Excel Security]
|
|
158
|
+
I -->|No| L[Organize Files by Type]
|
|
159
|
+
|
|
160
|
+
J --> K[Process Excel Metadata]
|
|
161
|
+
K --> L
|
|
162
|
+
|
|
163
|
+
%% File Organization
|
|
164
|
+
L --> M[Create Type Directories]
|
|
165
|
+
M --> N[Batch Files for Processing]
|
|
166
|
+
|
|
167
|
+
%% Upload Processing
|
|
168
|
+
N --> O[Start File Upload]
|
|
169
|
+
O --> P[Process File Batch]
|
|
170
|
+
P --> Q{More Batches?}
|
|
171
|
+
Q -->|Yes| P
|
|
172
|
+
Q -->|No| R[Generate Data Units]
|
|
173
|
+
|
|
174
|
+
%% Data Unit Creation
|
|
175
|
+
R --> S[Create Data Unit Batch]
|
|
176
|
+
S --> T{More Units?}
|
|
177
|
+
T -->|Yes| S
|
|
178
|
+
T -->|No| U[Complete Upload]
|
|
179
|
+
|
|
180
|
+
%% Completion
|
|
181
|
+
U --> V[Update Metrics]
|
|
182
|
+
V --> W[Log Results]
|
|
183
|
+
W --> X[Return Summary]
|
|
184
|
+
|
|
185
|
+
%% Error Handling
|
|
186
|
+
B -->|Error| Y[Log Validation Error]
|
|
187
|
+
J -->|Error| Z[Log Excel Error]
|
|
188
|
+
P -->|Error| AA[Log Upload Error]
|
|
189
|
+
S -->|Error| BB[Log Data Unit Error]
|
|
190
|
+
|
|
191
|
+
Y --> CC[Return Error Result]
|
|
192
|
+
Z --> CC
|
|
193
|
+
AA --> CC
|
|
194
|
+
BB --> CC
|
|
195
|
+
|
|
196
|
+
%% Apply styles with light/dark mode compatibility
|
|
197
|
+
classDef startNode fill:#90caf980,stroke:#1565c0,stroke-width:2px
|
|
198
|
+
classDef processNode fill:#ce93d880,stroke:#6a1b9a,stroke-width:2px
|
|
199
|
+
classDef decisionNode fill:#ffcc8080,stroke:#ef6c00,stroke-width:2px
|
|
200
|
+
classDef errorNode fill:#ef9a9a80,stroke:#c62828,stroke-width:2px
|
|
201
|
+
classDef endNode fill:#a5d6a780,stroke:#2e7d32,stroke-width:2px
|
|
202
|
+
|
|
203
|
+
class A startNode
|
|
204
|
+
class B,C,D,F,G,H,J,K,L,M,N,O,P,R,S,U,V,W processNode
|
|
205
|
+
class E,I,Q,T decisionNode
|
|
206
|
+
class Y,Z,AA,BB,CC errorNode
|
|
207
|
+
class X endNode
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
## Upload Parameters
|
|
211
|
+
|
|
212
|
+
The upload action uses `UploadParams` for comprehensive parameter validation:
|
|
213
|
+
|
|
214
|
+
### Required Parameters
|
|
215
|
+
|
|
216
|
+
| Parameter | Type | Description | Validation |
|
|
217
|
+
| ------------ | ----- | -------------------------- | ------------------ |
|
|
218
|
+
| `name` | `str` | Human-readable upload name | Must be non-blank |
|
|
219
|
+
| `path` | `str` | Source file/directory path | Must be valid path |
|
|
220
|
+
| `storage` | `int` | Target storage ID | Must exist via API |
|
|
221
|
+
| `collection` | `int` | Data collection ID | Must exist via API |
|
|
222
|
+
|
|
223
|
+
### Optional Parameters
|
|
224
|
+
|
|
225
|
+
| Parameter | Type | Default | Description |
|
|
226
|
+
| ------------------------------- | ------------- | ------- | ---------------------------------- |
|
|
227
|
+
| `description` | `str \| None` | `None` | Upload description |
|
|
228
|
+
| `project` | `int \| None` | `None` | Project ID (validated if provided) |
|
|
229
|
+
| `excel_metadata_path` | `str \| None` | `None` | Path to Excel metadata file |
|
|
230
|
+
| `is_recursive` | `bool` | `False` | Scan directories recursively |
|
|
231
|
+
| `max_file_size_mb` | `int` | `50` | Maximum file size in MB |
|
|
232
|
+
| `creating_data_unit_batch_size` | `int` | `100` | Batch size for data units |
|
|
233
|
+
| `use_async_upload` | `bool` | `True` | Use asynchronous processing |
|
|
234
|
+
|
|
235
|
+
### Parameter Validation
|
|
236
|
+
|
|
237
|
+
The system performs real-time validation:
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
# Storage validation
|
|
241
|
+
@field_validator('storage', mode='before')
|
|
242
|
+
@classmethod
|
|
243
|
+
def check_storage_exists(cls, value: str, info) -> str:
|
|
244
|
+
action = info.context['action']
|
|
245
|
+
client = action.client
|
|
246
|
+
try:
|
|
247
|
+
client.get_storage(value)
|
|
248
|
+
except ClientError:
|
|
249
|
+
raise PydanticCustomError('client_error', 'Storage not found')
|
|
250
|
+
return value
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
## Excel Metadata Processing
|
|
254
|
+
|
|
255
|
+
Upload plugins support Excel files for enhanced metadata annotation:
|
|
256
|
+
|
|
257
|
+
### Excel File Format
|
|
258
|
+
|
|
259
|
+
The Excel file should follow this structure:
|
|
260
|
+
|
|
261
|
+
| filename | category | description | custom_field |
|
|
262
|
+
| ---------- | -------- | ------------------ | ------------ |
|
|
263
|
+
| image1.jpg | nature | Mountain landscape | high_res |
|
|
264
|
+
| image2.png | urban | City skyline | processed |
|
|
265
|
+
|
|
266
|
+
### Security Validation
|
|
267
|
+
|
|
268
|
+
Excel files undergo comprehensive security validation:
|
|
269
|
+
|
|
270
|
+
```python
|
|
271
|
+
class ExcelSecurityConfig:
|
|
272
|
+
MAX_FILE_SIZE_MB = 10 # File size limit
|
|
273
|
+
MAX_MEMORY_USAGE_MB = 30 # Memory usage limit
|
|
274
|
+
MAX_ROWS = 10000 # Row count limit
|
|
275
|
+
MAX_COLUMNS = 50 # Column count limit
|
|
276
|
+
MAX_FILENAME_LENGTH = 255 # Filename length limit
|
|
277
|
+
MAX_COLUMN_NAME_LENGTH = 100 # Column name length
|
|
278
|
+
MAX_METADATA_VALUE_LENGTH = 1000 # Metadata value length
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
### Environment Configuration
|
|
282
|
+
|
|
283
|
+
Security limits can be configured via environment variables:
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
export EXCEL_MAX_FILE_SIZE_MB=20
|
|
287
|
+
export EXCEL_MAX_MEMORY_MB=50
|
|
288
|
+
export EXCEL_MAX_ROWS=20000
|
|
289
|
+
export EXCEL_MAX_COLUMNS=100
|
|
290
|
+
export EXCEL_MAX_FILENAME_LENGTH=500
|
|
291
|
+
export EXCEL_MAX_COLUMN_NAME_LENGTH=200
|
|
292
|
+
export EXCEL_MAX_METADATA_VALUE_LENGTH=2000
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
### Metadata Processing Flow
|
|
296
|
+
|
|
297
|
+
1. **Security Validation**: File size, memory estimation
|
|
298
|
+
2. **Format Validation**: Header structure, column count
|
|
299
|
+
3. **Content Processing**: Row-by-row metadata extraction
|
|
300
|
+
4. **Data Sanitization**: Length limits, string truncation
|
|
301
|
+
5. **Mapping Creation**: Filename to metadata mapping
|
|
302
|
+
|
|
303
|
+
## File Organization
|
|
304
|
+
|
|
305
|
+
The upload system automatically organizes files based on their types:
|
|
306
|
+
|
|
307
|
+
### Type Detection
|
|
308
|
+
|
|
309
|
+
Files are categorized based on:
|
|
310
|
+
|
|
311
|
+
- File extension patterns
|
|
312
|
+
- MIME type detection
|
|
313
|
+
- Content analysis
|
|
314
|
+
- Custom type rules
|
|
315
|
+
|
|
316
|
+
### Directory Structure
|
|
317
|
+
|
|
318
|
+
```
|
|
319
|
+
upload_output/
|
|
320
|
+
├── images/
|
|
321
|
+
│ ├── image1.jpg
|
|
322
|
+
│ └── image2.png
|
|
323
|
+
├── documents/
|
|
324
|
+
│ ├── report.pdf
|
|
325
|
+
│ └── data.xlsx
|
|
326
|
+
└── videos/
|
|
327
|
+
└── presentation.mp4
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
### Batch Processing
|
|
331
|
+
|
|
332
|
+
Files are processed in configurable batches:
|
|
333
|
+
|
|
334
|
+
```python
|
|
335
|
+
# Configure batch size
|
|
336
|
+
params = {
|
|
337
|
+
"creating_data_unit_batch_size": 100,
|
|
338
|
+
"use_async_upload": True
|
|
339
|
+
}
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
## Progress Tracking and Metrics
|
|
343
|
+
|
|
344
|
+
### Progress Categories
|
|
345
|
+
|
|
346
|
+
The upload action tracks progress across three main phases:
|
|
347
|
+
|
|
348
|
+
| Category | Proportion | Description |
|
|
349
|
+
| --------------------- | ---------- | ----------------------------------- |
|
|
350
|
+
| `analyze_collection` | 2% | Parameter validation and setup |
|
|
351
|
+
| `upload_data_files` | 38% | File upload processing |
|
|
352
|
+
| `generate_data_units` | 60% | Data unit creation and finalization |
|
|
353
|
+
|
|
354
|
+
### Metrics Collection
|
|
355
|
+
|
|
356
|
+
Real-time metrics are collected for monitoring:
|
|
357
|
+
|
|
358
|
+
```python
|
|
359
|
+
metrics_categories = {
|
|
360
|
+
'data_files': {
|
|
361
|
+
'stand_by': 0, # Files waiting to be processed
|
|
362
|
+
'failed': 0, # Files that failed upload
|
|
363
|
+
'success': 0, # Successfully uploaded files
|
|
364
|
+
},
|
|
365
|
+
'data_units': {
|
|
366
|
+
'stand_by': 0, # Units waiting to be created
|
|
367
|
+
'failed': 0, # Units that failed creation
|
|
368
|
+
'success': 0, # Successfully created units
|
|
369
|
+
},
|
|
370
|
+
}
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
## Type-Safe Logging
|
|
374
|
+
|
|
375
|
+
The upload system uses enum-based logging for consistency:
|
|
376
|
+
|
|
377
|
+
### Log Codes
|
|
378
|
+
|
|
379
|
+
```python
|
|
380
|
+
class LogCode(str, Enum):
|
|
381
|
+
VALIDATION_FAILED = 'VALIDATION_FAILED'
|
|
382
|
+
NO_FILES_FOUND = 'NO_FILES_FOUND'
|
|
383
|
+
EXCEL_SECURITY_VIOLATION = 'EXCEL_SECURITY_VIOLATION'
|
|
384
|
+
EXCEL_PARSING_ERROR = 'EXCEL_PARSING_ERROR'
|
|
385
|
+
FILES_DISCOVERED = 'FILES_DISCOVERED'
|
|
386
|
+
UPLOADING_DATA_FILES = 'UPLOADING_DATA_FILES'
|
|
387
|
+
GENERATING_DATA_UNITS = 'GENERATING_DATA_UNITS'
|
|
388
|
+
IMPORT_COMPLETED = 'IMPORT_COMPLETED'
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
### Logging Usage
|
|
392
|
+
|
|
393
|
+
```python
|
|
394
|
+
# Basic logging
|
|
395
|
+
run.log_message_with_code(LogCode.FILES_DISCOVERED, file_count)
|
|
396
|
+
|
|
397
|
+
# With custom level
|
|
398
|
+
run.log_message_with_code(
|
|
399
|
+
LogCode.EXCEL_SECURITY_VIOLATION,
|
|
400
|
+
filename,
|
|
401
|
+
level=Context.DANGER
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
# Upload-specific events
|
|
405
|
+
run.log_upload_event(LogCode.UPLOADING_DATA_FILES, batch_size)
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
## Usage Examples
|
|
409
|
+
|
|
410
|
+
### Basic File Upload
|
|
411
|
+
|
|
412
|
+
```python
|
|
413
|
+
from synapse_sdk.plugins.categories.upload.actions.upload import UploadAction
|
|
414
|
+
|
|
415
|
+
# Basic upload configuration
|
|
416
|
+
params = {
|
|
417
|
+
"name": "Dataset Upload",
|
|
418
|
+
"description": "Training dataset for ML model",
|
|
419
|
+
"path": "/data/training_images",
|
|
420
|
+
"storage": 1,
|
|
421
|
+
"collection": 5,
|
|
422
|
+
"is_recursive": True,
|
|
423
|
+
"max_file_size_mb": 100
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
action = UploadAction(
|
|
427
|
+
params=params,
|
|
428
|
+
plugin_config=plugin_config
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
result = action.run_action()
|
|
432
|
+
print(f"Uploaded {result['uploaded_files_count']} files")
|
|
433
|
+
print(f"Created {result['generated_data_units_count']} data units")
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
### Excel Metadata Upload
|
|
437
|
+
|
|
438
|
+
```python
|
|
439
|
+
# Upload with Excel metadata
|
|
440
|
+
params = {
|
|
441
|
+
"name": "Annotated Dataset Upload",
|
|
442
|
+
"path": "/data/images",
|
|
443
|
+
"storage": 1,
|
|
444
|
+
"collection": 5,
|
|
445
|
+
"excel_metadata_path": "/data/metadata.xlsx",
|
|
446
|
+
"is_recursive": False,
|
|
447
|
+
"creating_data_unit_batch_size": 50
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
action = UploadAction(
|
|
451
|
+
params=params,
|
|
452
|
+
plugin_config=plugin_config
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
result = action.run_action()
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
### Custom Configuration
|
|
459
|
+
|
|
460
|
+
```python
|
|
461
|
+
# Custom environment setup
|
|
462
|
+
import os
|
|
463
|
+
|
|
464
|
+
os.environ['EXCEL_MAX_FILE_SIZE_MB'] = '20'
|
|
465
|
+
os.environ['EXCEL_MAX_ROWS'] = '20000'
|
|
466
|
+
|
|
467
|
+
# Large file upload
|
|
468
|
+
params = {
|
|
469
|
+
"name": "Large Dataset Upload",
|
|
470
|
+
"path": "/data/large_dataset",
|
|
471
|
+
"storage": 2,
|
|
472
|
+
"collection": 10,
|
|
473
|
+
"max_file_size_mb": 500,
|
|
474
|
+
"creating_data_unit_batch_size": 200,
|
|
475
|
+
"use_async_upload": True,
|
|
476
|
+
"is_recursive": True
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
action = UploadAction(
|
|
480
|
+
params=params,
|
|
481
|
+
plugin_config=plugin_config,
|
|
482
|
+
debug=True
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
result = action.run_action()
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
## Error Handling
|
|
489
|
+
|
|
490
|
+
### Exception Types
|
|
491
|
+
|
|
492
|
+
The upload system defines specific exceptions:
|
|
493
|
+
|
|
494
|
+
```python
|
|
495
|
+
# Security violations
|
|
496
|
+
try:
|
|
497
|
+
action.run_action()
|
|
498
|
+
except ExcelSecurityError as e:
|
|
499
|
+
print(f"Excel security violation: {e}")
|
|
500
|
+
|
|
501
|
+
# Parsing errors
|
|
502
|
+
except ExcelParsingError as e:
|
|
503
|
+
print(f"Excel parsing failed: {e}")
|
|
504
|
+
|
|
505
|
+
# General upload errors
|
|
506
|
+
except ActionError as e:
|
|
507
|
+
print(f"Upload action failed: {e}")
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
### Validation Errors
|
|
511
|
+
|
|
512
|
+
Parameter validation provides detailed error messages:
|
|
513
|
+
|
|
514
|
+
```python
|
|
515
|
+
from pydantic import ValidationError
|
|
516
|
+
|
|
517
|
+
try:
|
|
518
|
+
params = UploadParams(**invalid_params)
|
|
519
|
+
except ValidationError as e:
|
|
520
|
+
for error in e.errors():
|
|
521
|
+
print(f"Field {error['loc']}: {error['msg']}")
|
|
522
|
+
```
|
|
523
|
+
|
|
524
|
+
## API Reference
|
|
525
|
+
|
|
526
|
+
### Main Classes
|
|
527
|
+
|
|
528
|
+
#### UploadAction
|
|
529
|
+
|
|
530
|
+
Main upload action class for file processing operations.
|
|
531
|
+
|
|
532
|
+
**Class Attributes:**
|
|
533
|
+
|
|
534
|
+
- `name = 'upload'` - Action identifier
|
|
535
|
+
- `category = PluginCategory.UPLOAD` - Plugin category
|
|
536
|
+
- `method = RunMethod.JOB` - Execution method
|
|
537
|
+
- `run_class = UploadRun` - Specialized run management
|
|
538
|
+
- `params_model = UploadParams` - Parameter validation model
|
|
539
|
+
|
|
540
|
+
**Key Methods:**
|
|
541
|
+
|
|
542
|
+
- `start()` - Main upload processing logic
|
|
543
|
+
- `get_uploader()` - Get configured uploader instance
|
|
544
|
+
- `_discover_files_recursive()` - Recursive file discovery
|
|
545
|
+
- `_process_excel_metadata()` - Excel metadata processing
|
|
546
|
+
|
|
547
|
+
#### UploadRun
|
|
548
|
+
|
|
549
|
+
Specialized run management for upload operations.
|
|
550
|
+
|
|
551
|
+
**Logging Methods:**
|
|
552
|
+
|
|
553
|
+
- `log_message_with_code(code, *args, level=None)` - Type-safe logging
|
|
554
|
+
- `log_upload_event(code, *args, level=None)` - Upload-specific events
|
|
555
|
+
|
|
556
|
+
**Nested Models:**
|
|
557
|
+
|
|
558
|
+
- `UploadEventLog` - Upload event logging
|
|
559
|
+
- `DataFileLog` - Data file processing logs
|
|
560
|
+
- `DataUnitLog` - Data unit creation logs
|
|
561
|
+
- `TaskLog` - Task execution logs
|
|
562
|
+
- `MetricsRecord` - Metrics tracking
|
|
563
|
+
|
|
564
|
+
#### UploadParams
|
|
565
|
+
|
|
566
|
+
Parameter validation model with Pydantic integration.
|
|
567
|
+
|
|
568
|
+
**Validation Features:**
|
|
569
|
+
|
|
570
|
+
- Real-time API validation for storage/collection/project
|
|
571
|
+
- String sanitization and length validation
|
|
572
|
+
- Type checking and conversion
|
|
573
|
+
- Custom validator methods
|
|
574
|
+
|
|
575
|
+
### Utility Classes
|
|
576
|
+
|
|
577
|
+
#### ExcelSecurityConfig
|
|
578
|
+
|
|
579
|
+
Security configuration for Excel file processing.
|
|
580
|
+
|
|
581
|
+
**Configuration Attributes:**
|
|
582
|
+
|
|
583
|
+
- File size and memory limits
|
|
584
|
+
- Row and column count limits
|
|
585
|
+
- String length restrictions
|
|
586
|
+
- Environment variable overrides
|
|
587
|
+
|
|
588
|
+
#### ExcelMetadataUtils
|
|
589
|
+
|
|
590
|
+
Utility methods for Excel metadata processing.
|
|
591
|
+
|
|
592
|
+
**Key Methods:**
|
|
593
|
+
|
|
594
|
+
- `validate_and_truncate_string()` - String sanitization
|
|
595
|
+
- `is_valid_filename_length()` - Filename validation
|
|
596
|
+
|
|
597
|
+
#### PathAwareJSONEncoder
|
|
598
|
+
|
|
599
|
+
Custom JSON encoder for Path and datetime objects.
|
|
600
|
+
|
|
601
|
+
**Supported Types:**
|
|
602
|
+
|
|
603
|
+
- Path objects (converts to string)
|
|
604
|
+
- Datetime objects (ISO format)
|
|
605
|
+
- Standard JSON-serializable types
|
|
606
|
+
|
|
607
|
+
### Enums
|
|
608
|
+
|
|
609
|
+
#### LogCode
|
|
610
|
+
|
|
611
|
+
Type-safe logging codes for upload operations.
|
|
612
|
+
|
|
613
|
+
**Categories:**
|
|
614
|
+
|
|
615
|
+
- Validation codes (VALIDATION_FAILED, STORAGE_VALIDATION_FAILED)
|
|
616
|
+
- File processing codes (NO_FILES_FOUND, FILES_DISCOVERED)
|
|
617
|
+
- Excel processing codes (EXCEL_SECURITY_VIOLATION, EXCEL_PARSING_ERROR)
|
|
618
|
+
- Progress codes (UPLOADING_DATA_FILES, GENERATING_DATA_UNITS)
|
|
619
|
+
|
|
620
|
+
#### UploadStatus
|
|
621
|
+
|
|
622
|
+
Upload processing status enumeration.
|
|
623
|
+
|
|
624
|
+
**Values:**
|
|
625
|
+
|
|
626
|
+
- `SUCCESS = 'success'` - Operation completed successfully
|
|
627
|
+
- `FAILED = 'failed'` - Operation failed with errors
|
|
628
|
+
|
|
629
|
+
### Exceptions
|
|
630
|
+
|
|
631
|
+
#### ExcelSecurityError
|
|
632
|
+
|
|
633
|
+
Raised when Excel files violate security constraints.
|
|
634
|
+
|
|
635
|
+
**Common Causes:**
|
|
636
|
+
|
|
637
|
+
- File size exceeds limits
|
|
638
|
+
- Memory usage estimation too high
|
|
639
|
+
- Content security violations
|
|
640
|
+
|
|
641
|
+
#### ExcelParsingError
|
|
642
|
+
|
|
643
|
+
Raised when Excel files cannot be parsed.
|
|
644
|
+
|
|
645
|
+
**Common Causes:**
|
|
646
|
+
|
|
647
|
+
- File format corruption
|
|
648
|
+
- Invalid Excel structure
|
|
649
|
+
- Missing required columns
|
|
650
|
+
- Content parsing failures
|
|
651
|
+
|
|
652
|
+
## Best Practices
|
|
653
|
+
|
|
654
|
+
### Performance Optimization
|
|
655
|
+
|
|
656
|
+
1. **Batch Processing**: Use appropriate batch sizes for large uploads
|
|
657
|
+
2. **Async Operations**: Enable async processing for better throughput
|
|
658
|
+
3. **Memory Management**: Configure Excel security limits appropriately
|
|
659
|
+
4. **Progress Monitoring**: Track progress categories for user feedback
|
|
660
|
+
|
|
661
|
+
### Security Considerations
|
|
662
|
+
|
|
663
|
+
1. **File Validation**: Always validate file sizes and types
|
|
664
|
+
2. **Excel Security**: Configure appropriate security limits
|
|
665
|
+
3. **Path Sanitization**: Validate and sanitize file paths
|
|
666
|
+
4. **Content Filtering**: Implement content-based security checks
|
|
667
|
+
|
|
668
|
+
### Error Handling
|
|
669
|
+
|
|
670
|
+
1. **Graceful Degradation**: Handle partial upload failures
|
|
671
|
+
2. **Detailed Logging**: Use LogCode enum for consistent logging
|
|
672
|
+
3. **User Feedback**: Provide clear error messages
|
|
673
|
+
4. **Recovery Options**: Implement retry mechanisms where appropriate
|
|
674
|
+
|
|
675
|
+
### Development Guidelines
|
|
676
|
+
|
|
677
|
+
1. **Modular Structure**: Follow the established modular pattern
|
|
678
|
+
2. **Type Safety**: Use Pydantic models and enum logging
|
|
679
|
+
3. **Testing**: Comprehensive unit test coverage
|
|
680
|
+
4. **Documentation**: Document custom validators and methods
|