synapse-sdk 1.0.0b17__py3-none-any.whl → 1.0.0b19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/clients/backend/data_collection.py +2 -2
- synapse_sdk/devtools/docs/docs/contributing.md +1 -1
- synapse_sdk/devtools/docs/docs/features/index.md +4 -4
- synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +786 -0
- synapse_sdk/devtools/docs/docs/{features/plugins/index.md → plugins/plugins.md} +352 -21
- synapse_sdk/devtools/docs/docusaurus.config.ts +8 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +788 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/plugins.md +71 -0
- synapse_sdk/devtools/docs/package-lock.json +1366 -37
- synapse_sdk/devtools/docs/package.json +2 -1
- synapse_sdk/devtools/docs/sidebars.ts +8 -1
- synapse_sdk/plugins/categories/export/actions/export.py +2 -1
- synapse_sdk/plugins/categories/export/templates/config.yaml +1 -1
- synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +376 -0
- synapse_sdk/plugins/categories/export/templates/plugin/export.py +56 -190
- synapse_sdk/plugins/categories/upload/actions/upload.py +181 -22
- synapse_sdk/plugins/categories/upload/templates/config.yaml +24 -2
- synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +9 -2
- {synapse_sdk-1.0.0b17.dist-info → synapse_sdk-1.0.0b19.dist-info}/METADATA +1 -1
- {synapse_sdk-1.0.0b17.dist-info → synapse_sdk-1.0.0b19.dist-info}/RECORD +24 -22
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/plugins/index.md +0 -30
- {synapse_sdk-1.0.0b17.dist-info → synapse_sdk-1.0.0b19.dist-info}/WHEEL +0 -0
- {synapse_sdk-1.0.0b17.dist-info → synapse_sdk-1.0.0b19.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0b17.dist-info → synapse_sdk-1.0.0b19.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-1.0.0b17.dist-info → synapse_sdk-1.0.0b19.dist-info}/top_level.txt +0 -0
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
"dependencies": {
|
|
18
18
|
"@docusaurus/core": "3.8.1",
|
|
19
19
|
"@docusaurus/preset-classic": "3.8.1",
|
|
20
|
+
"@docusaurus/theme-mermaid": "^3.8.1",
|
|
20
21
|
"@mdx-js/react": "^3.0.0",
|
|
21
22
|
"clsx": "^2.0.0",
|
|
22
23
|
"prism-react-renderer": "^2.3.0",
|
|
@@ -44,4 +45,4 @@
|
|
|
44
45
|
"engines": {
|
|
45
46
|
"node": ">=18.0"
|
|
46
47
|
}
|
|
47
|
-
}
|
|
48
|
+
}
|
|
@@ -22,10 +22,17 @@ const sidebars: SidebarsConfig = {
|
|
|
22
22
|
label: 'Features',
|
|
23
23
|
items: [
|
|
24
24
|
'features/features',
|
|
25
|
-
'features/plugins/plugins',
|
|
26
25
|
'features/converters/converters',
|
|
27
26
|
],
|
|
28
27
|
},
|
|
28
|
+
{
|
|
29
|
+
type: 'category',
|
|
30
|
+
label: 'Plugin System',
|
|
31
|
+
items: [
|
|
32
|
+
'plugins/plugins',
|
|
33
|
+
'plugins/export-plugins',
|
|
34
|
+
],
|
|
35
|
+
},
|
|
29
36
|
{
|
|
30
37
|
type: 'category',
|
|
31
38
|
label: 'API Reference',
|
|
@@ -381,4 +381,5 @@ class ExportAction(Action):
|
|
|
381
381
|
export_items = handler.get_export_item(self.params['results'])
|
|
382
382
|
storage = self.client.get_storage(self.params['storage'])
|
|
383
383
|
pathlib_cwd = get_pathlib(storage, self.params['path'])
|
|
384
|
-
|
|
384
|
+
exporter = self.entrypoint(self.run, export_items, pathlib_cwd, **self.params)
|
|
385
|
+
return exporter.export()
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from itertools import tee
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Generator
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from synapse_sdk.plugins.categories.export.enums import ExportStatus
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseExporter:
|
|
12
|
+
"""Base class for export plugins with common functionality.
|
|
13
|
+
|
|
14
|
+
This class handles common tasks like progress tracking, logging, and metrics
|
|
15
|
+
that are shared across all export plugins. Plugin developers should inherit
|
|
16
|
+
from this class and implement the required methods for their specific logic.
|
|
17
|
+
|
|
18
|
+
Core Methods:
|
|
19
|
+
export(): Main export method - handles the complete export workflow
|
|
20
|
+
process_data_conversion(): Handle data conversion pipeline
|
|
21
|
+
process_file_saving(): Handle file saving operations (can be overridden)
|
|
22
|
+
setup_output_directories(): Setup output directories (can be overridden)
|
|
23
|
+
|
|
24
|
+
Required Methods (should be implemented by subclasses):
|
|
25
|
+
convert_data(): Transform data during export
|
|
26
|
+
|
|
27
|
+
Optional Methods (can be overridden by subclasses):
|
|
28
|
+
save_original_file(): Save original files from export items
|
|
29
|
+
save_as_json(): Save data as JSON files
|
|
30
|
+
before_convert(): Pre-process data before conversion
|
|
31
|
+
after_convert(): Post-process data after conversion
|
|
32
|
+
process_file_saving(): Custom file saving logic
|
|
33
|
+
|
|
34
|
+
Helper Methods:
|
|
35
|
+
_process_original_file_saving(): Handle original file saving with metrics
|
|
36
|
+
_process_json_file_saving(): Handle JSON file saving with metrics
|
|
37
|
+
|
|
38
|
+
Auto-provided Utilities:
|
|
39
|
+
Progress tracking via self.run.set_progress()
|
|
40
|
+
Logging via self.run.log_message() and other run methods
|
|
41
|
+
Error handling and metrics collection via self.run methods
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, run, export_items: Generator, path_root: Path, **params):
|
|
45
|
+
"""Initialize the base export class.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
run: Plugin run object with logging capabilities.
|
|
49
|
+
export_items (generator): Export items generator
|
|
50
|
+
path_root: pathlib object, the path to export
|
|
51
|
+
**params: Additional parameters
|
|
52
|
+
"""
|
|
53
|
+
self.run = run
|
|
54
|
+
self.export_items = export_items
|
|
55
|
+
self.path_root = path_root
|
|
56
|
+
self.params = params
|
|
57
|
+
|
|
58
|
+
def _create_unique_export_path(self, base_name: str) -> Path:
|
|
59
|
+
"""Create a unique export path to avoid conflicts."""
|
|
60
|
+
export_path = self.path_root / base_name
|
|
61
|
+
unique_export_path = export_path
|
|
62
|
+
counter = 1
|
|
63
|
+
while unique_export_path.exists():
|
|
64
|
+
unique_export_path = export_path.with_name(f'{export_path.name}({counter})')
|
|
65
|
+
counter += 1
|
|
66
|
+
unique_export_path.mkdir(parents=True)
|
|
67
|
+
return unique_export_path
|
|
68
|
+
|
|
69
|
+
def _save_error_list(self, export_path: Path, errors_json_file_list: list, errors_original_file_list: list):
|
|
70
|
+
"""Save error list files if there are any errors."""
|
|
71
|
+
if len(errors_json_file_list) > 0 or len(errors_original_file_list) > 0:
|
|
72
|
+
export_error_file = {'json_file_name': errors_json_file_list, 'origin_file_name': errors_original_file_list}
|
|
73
|
+
with (export_path / 'error_file_list.json').open('w', encoding='utf-8') as f:
|
|
74
|
+
json.dump(export_error_file, f, indent=4, ensure_ascii=False)
|
|
75
|
+
|
|
76
|
+
def get_original_file_name(self, files):
|
|
77
|
+
"""Retrieve the original file path from the given file information.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
files (dict): A dictionary containing file information
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
file_name (str): The original file name extracted from the file information.
|
|
84
|
+
"""
|
|
85
|
+
return files['file_name_original']
|
|
86
|
+
|
|
87
|
+
def save_original_file(self, result, base_path, error_file_list):
|
|
88
|
+
"""Saves the original file.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
result (dict): API response data containing file information.
|
|
92
|
+
base_path (Path): The directory where the file will be saved.
|
|
93
|
+
error_file_list (list): A list to store error files.
|
|
94
|
+
"""
|
|
95
|
+
file_url = result['files']['url']
|
|
96
|
+
file_name = self.get_original_file_name(result['files'])
|
|
97
|
+
response = requests.get(file_url)
|
|
98
|
+
file_info = {'file_name': file_name}
|
|
99
|
+
error_msg = ''
|
|
100
|
+
try:
|
|
101
|
+
with (base_path / file_name).open('wb') as file:
|
|
102
|
+
file.write(response.content)
|
|
103
|
+
status = ExportStatus.SUCCESS
|
|
104
|
+
except Exception as e:
|
|
105
|
+
error_msg = str(e)
|
|
106
|
+
error_file_list.append([file_name, error_msg])
|
|
107
|
+
status = ExportStatus.FAILED
|
|
108
|
+
|
|
109
|
+
self.run.export_log_original_file(result['id'], file_info, status, error_msg)
|
|
110
|
+
return status
|
|
111
|
+
|
|
112
|
+
def save_as_json(self, result, base_path, error_file_list):
|
|
113
|
+
"""Saves the data as a JSON file.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
result (dict): API response data containing file information.
|
|
117
|
+
base_path (Path): The directory where the file will be saved.
|
|
118
|
+
error_file_list (list): A list to store error files.
|
|
119
|
+
"""
|
|
120
|
+
file_name = Path(self.get_original_file_name(result['files'])).stem
|
|
121
|
+
json_data = result['data']
|
|
122
|
+
file_info = {'file_name': f'{file_name}.json'}
|
|
123
|
+
|
|
124
|
+
if json_data is None:
|
|
125
|
+
error_msg = 'data is Null'
|
|
126
|
+
error_file_list.append([f'{file_name}.json', error_msg])
|
|
127
|
+
status = ExportStatus.FAILED
|
|
128
|
+
self.run.log_export_event('NULL_DATA_DETECTED', result['id'])
|
|
129
|
+
self.run.export_log_json_file(result['id'], file_info, status, error_msg)
|
|
130
|
+
return status
|
|
131
|
+
|
|
132
|
+
error_msg = ''
|
|
133
|
+
try:
|
|
134
|
+
with (base_path / f'{file_name}.json').open('w', encoding='utf-8') as f:
|
|
135
|
+
json.dump(json_data, f, indent=4, ensure_ascii=False)
|
|
136
|
+
status = ExportStatus.SUCCESS
|
|
137
|
+
except Exception as e:
|
|
138
|
+
error_msg = str(e)
|
|
139
|
+
error_file_list.append([f'{file_name}.json', str(e)])
|
|
140
|
+
status = ExportStatus.FAILED
|
|
141
|
+
|
|
142
|
+
self.run.export_log_json_file(result['id'], file_info, status, error_msg)
|
|
143
|
+
return status
|
|
144
|
+
|
|
145
|
+
# Abstract methods that should be implemented by subclasses
|
|
146
|
+
def convert_data(self, data):
|
|
147
|
+
"""Converts the data. Should be implemented by subclasses."""
|
|
148
|
+
return data
|
|
149
|
+
|
|
150
|
+
def before_convert(self, data):
|
|
151
|
+
"""Preprocesses the data before conversion. Should be implemented by subclasses."""
|
|
152
|
+
return data
|
|
153
|
+
|
|
154
|
+
def after_convert(self, data):
|
|
155
|
+
"""Post-processes the data after conversion. Should be implemented by subclasses."""
|
|
156
|
+
return data
|
|
157
|
+
|
|
158
|
+
def _process_original_file_saving(
|
|
159
|
+
self, final_data, origin_files_output_path, errors_original_file_list, original_file_metrics_record, no
|
|
160
|
+
):
|
|
161
|
+
"""Process original file saving with metrics tracking.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
final_data: Converted data to save
|
|
165
|
+
origin_files_output_path: Path to save original files
|
|
166
|
+
errors_original_file_list: List to collect errors
|
|
167
|
+
original_file_metrics_record: Metrics record for tracking
|
|
168
|
+
no: Current item number for logging
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
bool: True if processing should continue, False if should skip to next item
|
|
172
|
+
"""
|
|
173
|
+
if no == 1:
|
|
174
|
+
self.run.log_message('Saving original file.')
|
|
175
|
+
original_status = self.save_original_file(final_data, origin_files_output_path, errors_original_file_list)
|
|
176
|
+
|
|
177
|
+
original_file_metrics_record.stand_by -= 1
|
|
178
|
+
if original_status == ExportStatus.FAILED:
|
|
179
|
+
original_file_metrics_record.failed += 1
|
|
180
|
+
return False # Skip to next item
|
|
181
|
+
else:
|
|
182
|
+
original_file_metrics_record.success += 1
|
|
183
|
+
return True # Continue processing
|
|
184
|
+
|
|
185
|
+
def _process_json_file_saving(
|
|
186
|
+
self, final_data, json_output_path, errors_json_file_list, data_file_metrics_record, no
|
|
187
|
+
):
|
|
188
|
+
"""Process JSON file saving with metrics tracking.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
final_data: Converted data to save
|
|
192
|
+
json_output_path: Path to save JSON files
|
|
193
|
+
errors_json_file_list: List to collect errors
|
|
194
|
+
data_file_metrics_record: Metrics record for tracking
|
|
195
|
+
no: Current item number for logging
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
bool: True if processing should continue, False if should skip to next item
|
|
199
|
+
"""
|
|
200
|
+
if no == 1:
|
|
201
|
+
self.run.log_message('Saving json file.')
|
|
202
|
+
data_status = self.save_as_json(final_data, json_output_path, errors_json_file_list)
|
|
203
|
+
|
|
204
|
+
data_file_metrics_record.stand_by -= 1
|
|
205
|
+
if data_status == ExportStatus.FAILED:
|
|
206
|
+
data_file_metrics_record.failed += 1
|
|
207
|
+
return False # Skip to next item
|
|
208
|
+
else:
|
|
209
|
+
data_file_metrics_record.success += 1
|
|
210
|
+
return True # Continue processing
|
|
211
|
+
|
|
212
|
+
def setup_output_directories(self, unique_export_path, save_original_file_flag):
|
|
213
|
+
"""Setup output directories for export.
|
|
214
|
+
|
|
215
|
+
This method can be overridden by subclasses to customize directory structure.
|
|
216
|
+
The default implementation creates 'json' and 'origin_files' directories.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
unique_export_path: Base path for export
|
|
220
|
+
save_original_file_flag: Whether original files will be saved
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
dict: Dictionary containing paths for different file types
|
|
224
|
+
Example: {'json_output_path': Path, 'origin_files_output_path': Path}
|
|
225
|
+
"""
|
|
226
|
+
# Path to save JSON files
|
|
227
|
+
json_output_path = unique_export_path / 'json'
|
|
228
|
+
json_output_path.mkdir(parents=True, exist_ok=True)
|
|
229
|
+
|
|
230
|
+
output_paths = {'json_output_path': json_output_path}
|
|
231
|
+
|
|
232
|
+
# Path to save original files
|
|
233
|
+
if save_original_file_flag:
|
|
234
|
+
origin_files_output_path = unique_export_path / 'origin_files'
|
|
235
|
+
origin_files_output_path.mkdir(parents=True, exist_ok=True)
|
|
236
|
+
output_paths['origin_files_output_path'] = origin_files_output_path
|
|
237
|
+
|
|
238
|
+
return output_paths
|
|
239
|
+
|
|
240
|
+
def process_data_conversion(self, export_item):
|
|
241
|
+
"""Process data conversion pipeline for a single export item.
|
|
242
|
+
|
|
243
|
+
This method handles the complete data conversion process:
|
|
244
|
+
before_convert -> convert_data -> after_convert
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
export_item: Single export item to process
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
Final processed data ready for saving
|
|
251
|
+
"""
|
|
252
|
+
preprocessed_data = self.before_convert(export_item)
|
|
253
|
+
converted_data = self.convert_data(preprocessed_data)
|
|
254
|
+
final_data = self.after_convert(converted_data)
|
|
255
|
+
return final_data
|
|
256
|
+
|
|
257
|
+
def process_file_saving(
|
|
258
|
+
self,
|
|
259
|
+
final_data,
|
|
260
|
+
unique_export_path,
|
|
261
|
+
save_original_file_flag,
|
|
262
|
+
errors_json_file_list,
|
|
263
|
+
errors_original_file_list,
|
|
264
|
+
original_file_metrics_record,
|
|
265
|
+
data_file_metrics_record,
|
|
266
|
+
no,
|
|
267
|
+
):
|
|
268
|
+
"""Process file saving operations for a single export item.
|
|
269
|
+
|
|
270
|
+
This method can be overridden by subclasses to implement custom file saving logic.
|
|
271
|
+
The default implementation saves original files and JSON files based on configuration.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
final_data: Converted data ready for saving
|
|
275
|
+
unique_export_path: Base path for export
|
|
276
|
+
save_original_file_flag: Whether to save original files
|
|
277
|
+
errors_json_file_list: List to collect JSON file errors
|
|
278
|
+
errors_original_file_list: List to collect original file errors
|
|
279
|
+
original_file_metrics_record: Metrics record for original files
|
|
280
|
+
data_file_metrics_record: Metrics record for JSON files
|
|
281
|
+
no: Current item number for logging
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
bool: True if processing should continue, False if should skip to next item
|
|
285
|
+
"""
|
|
286
|
+
# Get paths from setup (directories already created)
|
|
287
|
+
json_output_path = unique_export_path / 'json'
|
|
288
|
+
origin_files_output_path = unique_export_path / 'origin_files' if save_original_file_flag else None
|
|
289
|
+
|
|
290
|
+
if save_original_file_flag:
|
|
291
|
+
should_continue = self._process_original_file_saving(
|
|
292
|
+
final_data, origin_files_output_path, errors_original_file_list, original_file_metrics_record, no
|
|
293
|
+
)
|
|
294
|
+
if not should_continue:
|
|
295
|
+
return False
|
|
296
|
+
|
|
297
|
+
self.run.log_metrics(record=original_file_metrics_record, category='original_file')
|
|
298
|
+
|
|
299
|
+
# Extract data as JSON files
|
|
300
|
+
should_continue = self._process_json_file_saving(
|
|
301
|
+
final_data, json_output_path, errors_json_file_list, data_file_metrics_record, no
|
|
302
|
+
)
|
|
303
|
+
if not should_continue:
|
|
304
|
+
return False
|
|
305
|
+
|
|
306
|
+
self.run.log_metrics(record=data_file_metrics_record, category='data_file')
|
|
307
|
+
|
|
308
|
+
return True
|
|
309
|
+
|
|
310
|
+
def export(self, export_items=None, results=None, **_kwargs) -> dict:
|
|
311
|
+
"""Main export method that can be overridden by subclasses for custom logic.
|
|
312
|
+
|
|
313
|
+
This default implementation provides standard file saving functionality.
|
|
314
|
+
Subclasses can override this method to implement custom export logic
|
|
315
|
+
while still using the helper methods for specific operations.
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
Subclasses can override process_file_saving() method to implement custom file saving logic.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
export_items: Optional export items to process. If not provided, uses self.export_items.
|
|
322
|
+
results: Optional results data to process alongside export_items.
|
|
323
|
+
**kwargs: Additional parameters for export customization.
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
dict: Export result containing export path and status information.
|
|
327
|
+
"""
|
|
328
|
+
# Use provided export_items or fall back to instance variable
|
|
329
|
+
items_to_process = export_items if export_items is not None else self.export_items
|
|
330
|
+
|
|
331
|
+
unique_export_path = self._create_unique_export_path(self.params['name'])
|
|
332
|
+
|
|
333
|
+
self.run.log_message('Starting export process.')
|
|
334
|
+
|
|
335
|
+
save_original_file_flag = self.params.get('save_original_file')
|
|
336
|
+
errors_json_file_list = []
|
|
337
|
+
errors_original_file_list = []
|
|
338
|
+
|
|
339
|
+
# Setup output directories (can be customized by subclasses)
|
|
340
|
+
self.setup_output_directories(unique_export_path, save_original_file_flag)
|
|
341
|
+
|
|
342
|
+
total = self.params['count']
|
|
343
|
+
|
|
344
|
+
original_file_metrics_record = self.run.MetricsRecord(stand_by=total)
|
|
345
|
+
data_file_metrics_record = self.run.MetricsRecord(stand_by=total)
|
|
346
|
+
|
|
347
|
+
# progress init
|
|
348
|
+
self.run.set_progress(0, total, category='dataset_conversion')
|
|
349
|
+
|
|
350
|
+
for no, export_item in enumerate(items_to_process, start=1):
|
|
351
|
+
self.run.set_progress(min(no, total), total, category='dataset_conversion')
|
|
352
|
+
if no == 1:
|
|
353
|
+
self.run.log_message('Converting dataset.')
|
|
354
|
+
|
|
355
|
+
final_data = self.process_data_conversion(export_item)
|
|
356
|
+
|
|
357
|
+
# Process file saving (can be overridden by subclasses)
|
|
358
|
+
should_continue = self.process_file_saving(
|
|
359
|
+
final_data,
|
|
360
|
+
unique_export_path,
|
|
361
|
+
save_original_file_flag,
|
|
362
|
+
errors_json_file_list,
|
|
363
|
+
errors_original_file_list,
|
|
364
|
+
original_file_metrics_record,
|
|
365
|
+
data_file_metrics_record,
|
|
366
|
+
no,
|
|
367
|
+
)
|
|
368
|
+
if not should_continue:
|
|
369
|
+
continue
|
|
370
|
+
|
|
371
|
+
self.run.end_log()
|
|
372
|
+
|
|
373
|
+
# Save error list files
|
|
374
|
+
self._save_error_list(unique_export_path, errors_json_file_list, errors_original_file_list)
|
|
375
|
+
|
|
376
|
+
return {'export_path': str(self.path_root)}
|
|
@@ -1,197 +1,63 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from itertools import tee
|
|
3
1
|
from pathlib import Path
|
|
2
|
+
from typing import Generator
|
|
4
3
|
|
|
5
|
-
import
|
|
4
|
+
from . import BaseExporter
|
|
6
5
|
|
|
7
|
-
from synapse_sdk.plugins.categories.export.enums import ExportStatus
|
|
8
6
|
|
|
7
|
+
class Exporter(BaseExporter):
|
|
8
|
+
"""Plugin export action interface for organizing files.
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
Args:
|
|
14
|
-
run : Execution object
|
|
15
|
-
export_items (generator):
|
|
16
|
-
- data (dict): dm_schema_data information.
|
|
17
|
-
- files (dict): File information. Includes file URL, original file path, metadata, etc.
|
|
18
|
-
- id (int): ground_truth ID
|
|
19
|
-
path_root : pathlib object, the path to export
|
|
20
|
-
**params: Additional parameters
|
|
21
|
-
|
|
22
|
-
Returns:
|
|
23
|
-
dict: Result
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
export_path = path_root / params['name']
|
|
27
|
-
unique_export_path = export_path
|
|
28
|
-
counter = 1
|
|
29
|
-
while unique_export_path.exists():
|
|
30
|
-
unique_export_path = export_path.with_name(f'{export_path.name}({counter})')
|
|
31
|
-
counter += 1
|
|
32
|
-
unique_export_path.mkdir(parents=True)
|
|
33
|
-
|
|
34
|
-
run.log_message('Starting export process.')
|
|
35
|
-
|
|
36
|
-
# results contains all information fetched through the list API.
|
|
37
|
-
# example:
|
|
38
|
-
# params.get('results', [])
|
|
39
|
-
|
|
40
|
-
save_original_file_flag = params.get('save_original_file')
|
|
41
|
-
errors_json_file_list = []
|
|
42
|
-
errors_original_file_list = []
|
|
43
|
-
|
|
44
|
-
# Path to save JSON files
|
|
45
|
-
json_output_path = unique_export_path / 'json'
|
|
46
|
-
json_output_path.mkdir(parents=True, exist_ok=True)
|
|
47
|
-
|
|
48
|
-
# Path to save original files
|
|
49
|
-
if save_original_file_flag:
|
|
50
|
-
origin_files_output_path = unique_export_path / 'origin_files'
|
|
51
|
-
origin_files_output_path.mkdir(parents=True, exist_ok=True)
|
|
52
|
-
|
|
53
|
-
export_items_count, export_items_process = tee(export_items)
|
|
54
|
-
total = sum(1 for _ in export_items_count)
|
|
55
|
-
|
|
56
|
-
original_file_metrics_record = run.MetricsRecord(stand_by=total, success=0, failed=0)
|
|
57
|
-
data_file_metrics_record = run.MetricsRecord(stand_by=total, success=0, failed=0)
|
|
58
|
-
# progress init
|
|
59
|
-
run.set_progress(0, total, category='dataset_conversion')
|
|
60
|
-
for no, export_item in enumerate(export_items_process, start=1):
|
|
61
|
-
run.set_progress(no, total, category='dataset_conversion')
|
|
62
|
-
if no == 1:
|
|
63
|
-
run.log_message('Converting dataset.')
|
|
64
|
-
preprocessed_data = before_convert(export_item)
|
|
65
|
-
converted_data = convert_data(preprocessed_data)
|
|
66
|
-
final_data = after_convert(converted_data)
|
|
67
|
-
|
|
68
|
-
# Call if original file extraction is needed
|
|
69
|
-
if save_original_file_flag:
|
|
70
|
-
if no == 1:
|
|
71
|
-
run.log_message('Saving original file.')
|
|
72
|
-
original_status = save_original_file(run, final_data, origin_files_output_path, errors_original_file_list)
|
|
73
|
-
|
|
74
|
-
original_file_metrics_record.stand_by -= 1
|
|
75
|
-
if original_status == ExportStatus.FAILED:
|
|
76
|
-
original_file_metrics_record.failed += 1
|
|
77
|
-
continue
|
|
78
|
-
else:
|
|
79
|
-
original_file_metrics_record.success += 1
|
|
80
|
-
|
|
81
|
-
run.log_metrics(record=original_file_metrics_record, category='original_file')
|
|
82
|
-
|
|
83
|
-
# Extract data as JSON files
|
|
84
|
-
if no == 1:
|
|
85
|
-
run.log_message('Saving json file.')
|
|
86
|
-
data_status = save_as_json(run, final_data, json_output_path, errors_json_file_list)
|
|
87
|
-
|
|
88
|
-
data_file_metrics_record.stand_by -= 1
|
|
89
|
-
if data_status == ExportStatus.FAILED:
|
|
90
|
-
data_file_metrics_record.failed += 1
|
|
91
|
-
continue
|
|
92
|
-
else:
|
|
93
|
-
data_file_metrics_record.success += 1
|
|
94
|
-
|
|
95
|
-
run.log_metrics(record=data_file_metrics_record, category='data_file')
|
|
96
|
-
|
|
97
|
-
run.end_log()
|
|
98
|
-
|
|
99
|
-
# Save error list files
|
|
100
|
-
if len(errors_json_file_list) > 0 or len(errors_original_file_list) > 0:
|
|
101
|
-
export_error_file = {'json_file_name': errors_json_file_list, 'origin_file_name': errors_original_file_list}
|
|
102
|
-
with (unique_export_path / 'error_file_list.json').open('w', encoding='utf-8') as f:
|
|
103
|
-
json.dump(export_error_file, f, indent=4, ensure_ascii=False)
|
|
104
|
-
|
|
105
|
-
return {'export_path': str(path_root)}
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def convert_data(data):
|
|
109
|
-
"""Converts the data."""
|
|
110
|
-
return data
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def before_convert(data):
|
|
114
|
-
"""Preprocesses the data before conversion."""
|
|
115
|
-
return data
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def after_convert(data):
|
|
119
|
-
"""Post-processes the data after conversion."""
|
|
120
|
-
return data
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def get_original_file_name(files):
|
|
124
|
-
"""Retrieve the original file path from the given file information.
|
|
125
|
-
|
|
126
|
-
Args:
|
|
127
|
-
files (dict): A dictionary containing file information, including file URL,
|
|
128
|
-
original file path, metadata, etc.
|
|
129
|
-
|
|
130
|
-
Returns:
|
|
131
|
-
file_name (str): The original file name extracted from the file information.
|
|
10
|
+
This class provides a minimal interface for plugin developers to implement
|
|
11
|
+
their own export logic.
|
|
132
12
|
"""
|
|
133
|
-
return files['file_name_original']
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
def save_original_file(run, result, base_path, error_file_list):
|
|
137
|
-
"""Saves the original file.
|
|
138
|
-
|
|
139
|
-
Args:
|
|
140
|
-
run : Execution object
|
|
141
|
-
result (dict): API response data containing file information.
|
|
142
|
-
base_path (Path): The directory where the file will be saved.
|
|
143
|
-
error_file_list (list): A list to store error files.
|
|
144
|
-
"""
|
|
145
|
-
file_url = result['files']['url']
|
|
146
|
-
file_name = get_original_file_name(result['files'])
|
|
147
|
-
response = requests.get(file_url)
|
|
148
|
-
file_info = {'file_name': file_name}
|
|
149
|
-
error_msg = ''
|
|
150
|
-
try:
|
|
151
|
-
with (base_path / file_name).open('wb') as file:
|
|
152
|
-
file.write(response.content)
|
|
153
|
-
status = ExportStatus.SUCCESS
|
|
154
|
-
except Exception as e:
|
|
155
|
-
error_msg = str(e)
|
|
156
|
-
error_file_list.append([file_name, error_msg])
|
|
157
|
-
status = ExportStatus.FAILED
|
|
158
|
-
|
|
159
|
-
run.export_log_original_file(result['id'], file_info, status, error_msg)
|
|
160
|
-
return status
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
def save_as_json(run, result, base_path, error_file_list):
|
|
164
|
-
"""Saves the data as a JSON file.
|
|
165
|
-
|
|
166
|
-
Args:
|
|
167
|
-
run : Execution object
|
|
168
|
-
result (dict): API response data containing file information.
|
|
169
|
-
base_path (Path): The directory where the file will be saved.
|
|
170
|
-
error_file_list (list): A list to store error files.
|
|
171
|
-
"""
|
|
172
|
-
# Default save file name: original file name
|
|
173
|
-
file_name = Path(get_original_file_name(result['files'])).stem
|
|
174
|
-
json_data = result['data']
|
|
175
|
-
file_info = {'file_name': f'{file_name}.json'}
|
|
176
|
-
|
|
177
|
-
if json_data is None:
|
|
178
|
-
error_msg = 'data is Null'
|
|
179
|
-
error_file_list.append([f'{file_name}.json', error_msg])
|
|
180
|
-
status = ExportStatus.FAILED
|
|
181
|
-
run.log_export_event('NULL_DATA_DETECTED', result['id'])
|
|
182
|
-
run.export_log_json_file(result['id'], file_info, status, error_msg)
|
|
183
|
-
|
|
184
|
-
return status
|
|
185
|
-
|
|
186
|
-
error_msg = ''
|
|
187
|
-
try:
|
|
188
|
-
with (base_path / f'{file_name}.json').open('w', encoding='utf-8') as f:
|
|
189
|
-
json.dump(json_data, f, indent=4, ensure_ascii=False)
|
|
190
|
-
status = ExportStatus.SUCCESS
|
|
191
|
-
except Exception as e:
|
|
192
|
-
error_msg = str(e)
|
|
193
|
-
error_file_list.append([f'{file_name}.json', str(e)])
|
|
194
|
-
status = ExportStatus.FAILED
|
|
195
13
|
|
|
196
|
-
|
|
197
|
-
|
|
14
|
+
def __init__(self, run, export_items: Generator, path_root: Path, **params):
|
|
15
|
+
"""Initialize the plugin export action class.
|
|
16
|
+
Args:
|
|
17
|
+
run: Plugin run object with logging capabilities.
|
|
18
|
+
export_items (generator):
|
|
19
|
+
- data (dict): dm_schema_data information.
|
|
20
|
+
- files (dict): File information. Includes file URL, original file path, metadata, etc.
|
|
21
|
+
- id (int): target ID (ex. assignment id, task id, ground_truth_event id)
|
|
22
|
+
path_root: pathlib object, the path to export
|
|
23
|
+
**params: Additional parameters
|
|
24
|
+
- name (str): The name of the action.
|
|
25
|
+
- description (str | None): The description of the action.
|
|
26
|
+
- storage (int): The storage ID to save the exported data.
|
|
27
|
+
- save_original_file (bool): Whether to save the original file.
|
|
28
|
+
- path (str): The path to save the exported data.
|
|
29
|
+
- target (str): The target source to export data from. (ex. ground_truth, assignment, task)
|
|
30
|
+
- filter (dict): The filter criteria to apply.
|
|
31
|
+
- extra_params (dict | None): Additional parameters for export customization.
|
|
32
|
+
Example: {"include_metadata": True, "compression": "gzip"}
|
|
33
|
+
- count (int): Total number of results.
|
|
34
|
+
- results (list): List of results fetched through the list API.
|
|
35
|
+
- project_id (int): Project ID.
|
|
36
|
+
- configuration (dict): Project configuration.
|
|
37
|
+
"""
|
|
38
|
+
super().__init__(run, export_items, path_root, **params)
|
|
39
|
+
|
|
40
|
+
def export(self, export_items=None, results=None, **kwargs) -> dict:
|
|
41
|
+
"""Executes the export task using the base class implementation.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
export_items: Optional export items to process. If not provided, uses self.export_items.
|
|
45
|
+
results: Optional results data to process alongside export_items.
|
|
46
|
+
**kwargs: Additional parameters for export customization.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
dict: Result
|
|
50
|
+
"""
|
|
51
|
+
return super().export(export_items, results, **kwargs)
|
|
52
|
+
|
|
53
|
+
def convert_data(self, data):
|
|
54
|
+
"""Converts the data."""
|
|
55
|
+
return data
|
|
56
|
+
|
|
57
|
+
def before_convert(self, data):
|
|
58
|
+
"""Preprocesses the data before conversion."""
|
|
59
|
+
return data
|
|
60
|
+
|
|
61
|
+
def after_convert(self, data):
|
|
62
|
+
"""Post-processes the data after conversion."""
|
|
63
|
+
return data
|