synapse-sdk 1.0.0a53__py3-none-any.whl → 1.0.0a55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/clients/backend/__init__.py +2 -2
- synapse_sdk/clients/backend/{data_collection.py → dataset.py} +15 -15
- synapse_sdk/clients/ray/serve.py +2 -0
- synapse_sdk/plugins/categories/export/actions/export.py +0 -18
- synapse_sdk/plugins/categories/export/templates/plugin/export.py +2 -24
- synapse_sdk/plugins/categories/upload/actions/upload.py +296 -18
- synapse_sdk/plugins/categories/upload/templates/config.yaml +3 -4
- synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +20 -24
- {synapse_sdk-1.0.0a53.dist-info → synapse_sdk-1.0.0a55.dist-info}/METADATA +1 -1
- {synapse_sdk-1.0.0a53.dist-info → synapse_sdk-1.0.0a55.dist-info}/RECORD +14 -16
- synapse_sdk/plugins/categories/upload/actions/task_pre_annotation.py +0 -101
- synapse_sdk/plugins/categories/upload/templates/plugin/task_pre_annotation.py +0 -14
- {synapse_sdk-1.0.0a53.dist-info → synapse_sdk-1.0.0a55.dist-info}/WHEEL +0 -0
- {synapse_sdk-1.0.0a53.dist-info → synapse_sdk-1.0.0a55.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0a53.dist-info → synapse_sdk-1.0.0a55.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-1.0.0a53.dist-info → synapse_sdk-1.0.0a55.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from synapse_sdk.clients.backend.annotation import AnnotationClientMixin
|
|
2
2
|
from synapse_sdk.clients.backend.core import CoreClientMixin
|
|
3
|
-
from synapse_sdk.clients.backend.
|
|
3
|
+
from synapse_sdk.clients.backend.dataset import DatasetClientMixin
|
|
4
4
|
from synapse_sdk.clients.backend.hitl import HITLClientMixin
|
|
5
5
|
from synapse_sdk.clients.backend.integration import IntegrationClientMixin
|
|
6
6
|
from synapse_sdk.clients.backend.ml import MLClientMixin
|
|
@@ -9,7 +9,7 @@ from synapse_sdk.clients.backend.ml import MLClientMixin
|
|
|
9
9
|
class BackendClient(
|
|
10
10
|
AnnotationClientMixin,
|
|
11
11
|
CoreClientMixin,
|
|
12
|
-
|
|
12
|
+
DatasetClientMixin,
|
|
13
13
|
IntegrationClientMixin,
|
|
14
14
|
MLClientMixin,
|
|
15
15
|
HITLClientMixin,
|
|
@@ -8,13 +8,13 @@ from synapse_sdk.clients.base import BaseClient
|
|
|
8
8
|
from synapse_sdk.clients.utils import get_batched_list
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
class
|
|
12
|
-
def
|
|
11
|
+
class DatasetClientMixin(BaseClient):
|
|
12
|
+
def list_dataset(self):
|
|
13
13
|
path = 'data_collections/'
|
|
14
14
|
return self._list(path)
|
|
15
15
|
|
|
16
|
-
def
|
|
17
|
-
"""Get
|
|
16
|
+
def get_dataset(self, data_collection_id):
|
|
17
|
+
"""Get dataset from synapse-backend.
|
|
18
18
|
|
|
19
19
|
Args:
|
|
20
20
|
data_collection_id: The data_collection id to get.
|
|
@@ -40,19 +40,19 @@ class DataCollectionClientMixin(BaseClient):
|
|
|
40
40
|
path = 'data_units/'
|
|
41
41
|
return self._post(path, data=data)
|
|
42
42
|
|
|
43
|
-
def
|
|
43
|
+
def upload_dataset(
|
|
44
44
|
self,
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
dataset_id: int,
|
|
46
|
+
dataset: Dict,
|
|
47
47
|
project_id: Optional[int] = None,
|
|
48
48
|
batch_size: int = 1000,
|
|
49
49
|
process_pool: int = 10,
|
|
50
50
|
):
|
|
51
|
-
"""Upload
|
|
51
|
+
"""Upload dataset to synapse-backend.
|
|
52
52
|
|
|
53
53
|
Args:
|
|
54
|
-
|
|
55
|
-
|
|
54
|
+
dataset_id: The dataset id to upload the data to.
|
|
55
|
+
dataset: The dataset to upload.
|
|
56
56
|
* structure:
|
|
57
57
|
- files: The files to upload. (key: file name, value: file pathlib object)
|
|
58
58
|
- meta: The meta data to upload.
|
|
@@ -60,14 +60,14 @@ class DataCollectionClientMixin(BaseClient):
|
|
|
60
60
|
batch_size: The batch size to upload the data.
|
|
61
61
|
process_pool: The process pool to upload the data.
|
|
62
62
|
"""
|
|
63
|
-
# TODO validate
|
|
63
|
+
# TODO validate dataset with schema
|
|
64
64
|
|
|
65
|
-
params = [(data,
|
|
65
|
+
params = [(data, dataset_id) for data in dataset]
|
|
66
66
|
|
|
67
67
|
with Pool(processes=process_pool) as pool:
|
|
68
|
-
|
|
68
|
+
dataset = pool.starmap(self.upload_data_file, tqdm(params))
|
|
69
69
|
|
|
70
|
-
batches = get_batched_list(
|
|
70
|
+
batches = get_batched_list(dataset, batch_size)
|
|
71
71
|
|
|
72
72
|
for batch in tqdm(batches):
|
|
73
73
|
data_units = self.create_data_units(batch)
|
|
@@ -90,7 +90,7 @@ class DataCollectionClientMixin(BaseClient):
|
|
|
90
90
|
* structure:
|
|
91
91
|
- files: The files to upload. (key: file name, value: file pathlib object)
|
|
92
92
|
- meta: The meta data to upload.
|
|
93
|
-
data_collection_id: The
|
|
93
|
+
data_collection_id: The dataset id to upload the data to.
|
|
94
94
|
|
|
95
95
|
Returns:
|
|
96
96
|
Dict: The result of the upload.
|
synapse_sdk/clients/ray/serve.py
CHANGED
|
@@ -8,6 +8,7 @@ class ServeClientMixin(BaseClient):
|
|
|
8
8
|
response = self._get(path, params=params)
|
|
9
9
|
for key, item in response['applications'].items():
|
|
10
10
|
response['applications'][key]['deployments'] = list(item['deployments'].values())
|
|
11
|
+
response['applications'][key]['route_prefix'] = item['route_prefix']
|
|
11
12
|
return list(response['applications'].values())
|
|
12
13
|
|
|
13
14
|
def get_serve_application(self, pk, params=None):
|
|
@@ -15,6 +16,7 @@ class ServeClientMixin(BaseClient):
|
|
|
15
16
|
response = self._get(path, params=params)
|
|
16
17
|
try:
|
|
17
18
|
response['applications'][pk]['deployments'] = list(response['applications'][pk]['deployments'].values())
|
|
19
|
+
response['applications'][pk]['route_prefix'] = response['applications'][pk]['route_prefix']
|
|
18
20
|
return response['applications'][pk]
|
|
19
21
|
except KeyError:
|
|
20
22
|
raise ClientError(404, 'Serve Application Not Found')
|
|
@@ -27,13 +27,6 @@ class ExportRun(Run):
|
|
|
27
27
|
error: str | None = None
|
|
28
28
|
created: str
|
|
29
29
|
|
|
30
|
-
class MetricsRecord(BaseModel):
|
|
31
|
-
"""Metrics record model."""
|
|
32
|
-
|
|
33
|
-
stand_by: int
|
|
34
|
-
failed: int
|
|
35
|
-
success: int
|
|
36
|
-
|
|
37
30
|
def log_file(
|
|
38
31
|
self, log_type: str, target_id: int, data_file_info: dict, status: ExportStatus, error: str | None = None
|
|
39
32
|
):
|
|
@@ -58,16 +51,6 @@ class ExportRun(Run):
|
|
|
58
51
|
).model_dump(),
|
|
59
52
|
)
|
|
60
53
|
|
|
61
|
-
def log_metrics(self, record: MetricsRecord, category: str):
|
|
62
|
-
"""Log export metrics.
|
|
63
|
-
|
|
64
|
-
Args:
|
|
65
|
-
record (MetricsRecord): The metrics record to log.
|
|
66
|
-
category (str): The category of the metrics.
|
|
67
|
-
"""
|
|
68
|
-
record = self.MetricsRecord.model_validate(record)
|
|
69
|
-
self.set_metrics(value=record.dict(), category=category)
|
|
70
|
-
|
|
71
54
|
def export_log_json_file(
|
|
72
55
|
self,
|
|
73
56
|
target_id: int,
|
|
@@ -280,7 +263,6 @@ class ExportAction(Action):
|
|
|
280
263
|
'proportion': 100,
|
|
281
264
|
}
|
|
282
265
|
}
|
|
283
|
-
metrics_categories = {'data_file', 'original_file'}
|
|
284
266
|
|
|
285
267
|
def get_filtered_results(self, filters, handler):
|
|
286
268
|
"""Get filtered target results."""
|
|
@@ -42,8 +42,6 @@ def export(run, export_items, path_root, **params):
|
|
|
42
42
|
origin_files_output_path.mkdir(parents=True, exist_ok=True)
|
|
43
43
|
|
|
44
44
|
total = params['count']
|
|
45
|
-
original_file_metrics_record = run.MetricsRecord(stand_by=total, success=0, failed=0)
|
|
46
|
-
data_file_metrics_record = run.MetricsRecord(stand_by=total, success=0, failed=0)
|
|
47
45
|
# progress init
|
|
48
46
|
run.set_progress(0, total, category='dataset_conversion')
|
|
49
47
|
for no, export_item in enumerate(export_items, start=1):
|
|
@@ -58,30 +56,12 @@ def export(run, export_items, path_root, **params):
|
|
|
58
56
|
if save_original_file_flag:
|
|
59
57
|
if no == 1:
|
|
60
58
|
run.log_message('Saving original file.')
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
original_file_metrics_record.stand_by -= 1
|
|
64
|
-
if original_status == ExportStatus.FAILED:
|
|
65
|
-
original_file_metrics_record.failed += 1
|
|
66
|
-
continue
|
|
67
|
-
else:
|
|
68
|
-
original_file_metrics_record.success += 1
|
|
69
|
-
|
|
70
|
-
run.log_metrics(record=original_file_metrics_record, category='original_file')
|
|
59
|
+
save_original_file(run, final_data, origin_files_output_path, errors_original_file_list)
|
|
71
60
|
|
|
72
61
|
# Extract data as JSON files
|
|
73
62
|
if no == 1:
|
|
74
63
|
run.log_message('Saving json file.')
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
data_file_metrics_record.stand_by -= 1
|
|
78
|
-
if data_status == ExportStatus.FAILED:
|
|
79
|
-
data_file_metrics_record.failed += 1
|
|
80
|
-
continue
|
|
81
|
-
else:
|
|
82
|
-
data_file_metrics_record.success += 1
|
|
83
|
-
|
|
84
|
-
run.log_metrics(record=data_file_metrics_record, category='data_file')
|
|
64
|
+
save_as_json(run, final_data, json_output_path, errors_json_file_list)
|
|
85
65
|
|
|
86
66
|
run.end_log()
|
|
87
67
|
|
|
@@ -146,7 +126,6 @@ def save_original_file(run, result, base_path, error_file_list):
|
|
|
146
126
|
status = ExportStatus.FAILED
|
|
147
127
|
|
|
148
128
|
run.export_log_original_file(result['id'], file_info, status, error_msg)
|
|
149
|
-
return status
|
|
150
129
|
|
|
151
130
|
|
|
152
131
|
def save_as_json(run, result, base_path, error_file_list):
|
|
@@ -173,4 +152,3 @@ def save_as_json(run, result, base_path, error_file_list):
|
|
|
173
152
|
status = ExportStatus.FAILED
|
|
174
153
|
|
|
175
154
|
run.export_log_json_file(result['id'], file_info, status, error_msg)
|
|
176
|
-
return status
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import re
|
|
2
3
|
from datetime import datetime
|
|
3
4
|
from enum import Enum
|
|
5
|
+
from pathlib import Path
|
|
4
6
|
from typing import Annotated, Dict, List
|
|
5
7
|
|
|
6
8
|
from pydantic import AfterValidator, BaseModel, field_validator
|
|
@@ -105,6 +107,8 @@ class UploadParams(BaseModel):
|
|
|
105
107
|
storage: int
|
|
106
108
|
collection: int
|
|
107
109
|
project: int | None
|
|
110
|
+
is_generate_tasks: bool = False
|
|
111
|
+
is_generate_ground_truths: bool = False
|
|
108
112
|
|
|
109
113
|
@field_validator('storage', mode='before')
|
|
110
114
|
@classmethod
|
|
@@ -129,7 +133,7 @@ class UploadParams(BaseModel):
|
|
|
129
133
|
action = info.context['action']
|
|
130
134
|
client = action.client
|
|
131
135
|
try:
|
|
132
|
-
client.
|
|
136
|
+
client.get_dataset(value)
|
|
133
137
|
except ClientError:
|
|
134
138
|
raise PydanticCustomError('client_error', _('Error occurred while checking collection exists.'))
|
|
135
139
|
return value
|
|
@@ -163,6 +167,8 @@ class UploadAction(Action):
|
|
|
163
167
|
analyze_collection: The progress category for the analyze collection process.
|
|
164
168
|
data_file_upload: The progress category for the upload process.
|
|
165
169
|
generate_data_units: The progress category for the generate data units process.
|
|
170
|
+
generate_tasks: The progress category for the generate tasks process.
|
|
171
|
+
generate_ground_truths: The progress category for the generate ground truths process.
|
|
166
172
|
"""
|
|
167
173
|
|
|
168
174
|
name = 'upload'
|
|
@@ -171,19 +177,51 @@ class UploadAction(Action):
|
|
|
171
177
|
run_class = UploadRun
|
|
172
178
|
progress_categories = {
|
|
173
179
|
'analyze_collection': {
|
|
174
|
-
'proportion':
|
|
180
|
+
'proportion': 0,
|
|
175
181
|
},
|
|
176
182
|
'upload_data_files': {
|
|
177
|
-
'proportion':
|
|
183
|
+
'proportion': 0,
|
|
178
184
|
},
|
|
179
185
|
'generate_data_units': {
|
|
180
|
-
'proportion':
|
|
186
|
+
'proportion': 0,
|
|
187
|
+
},
|
|
188
|
+
'generate_tasks': {
|
|
189
|
+
'proportion': 0,
|
|
190
|
+
},
|
|
191
|
+
'generate_ground_truths': {
|
|
192
|
+
'proportion': 0,
|
|
181
193
|
},
|
|
182
194
|
}
|
|
183
195
|
|
|
184
|
-
def
|
|
196
|
+
def __init__(self, *args, **kwargs):
|
|
197
|
+
"""Initialize UploadAction."""
|
|
198
|
+
super().__init__(*args, **kwargs)
|
|
199
|
+
|
|
200
|
+
# Setup progress categories ratio by options.
|
|
201
|
+
progress_ratios = {
|
|
202
|
+
'upload_only': (5, 60, 35, 0, 0),
|
|
203
|
+
'generate_tasks': (5, 45, 25, 25, 0),
|
|
204
|
+
'generate_ground_truths': (5, 35, 30, 15, 15),
|
|
205
|
+
}
|
|
206
|
+
options = self.config.get('options', {})
|
|
207
|
+
progress_categories = self.progress_categories
|
|
208
|
+
if options['allow_generate_tasks'] and not kwargs['params']['allow_generate_ground_truths']:
|
|
209
|
+
ratio_name = 'generate_tasks'
|
|
210
|
+
elif options['allow_generate_ground_truths'] and kwargs['params']['allow_generate_tasks']:
|
|
211
|
+
ratio_name = 'generate_ground_truths'
|
|
212
|
+
else:
|
|
213
|
+
ratio_name = 'upload_only'
|
|
214
|
+
|
|
215
|
+
assert len(progress_categories) == len(progress_ratios[ratio_name]), (
|
|
216
|
+
'Progress categories and ratios length mismatch.'
|
|
217
|
+
)
|
|
218
|
+
for i, category in enumerate(progress_categories):
|
|
219
|
+
progress_categories[category]['proportion'] = progress_ratios[ratio_name][i]
|
|
220
|
+
self.progress_categories = progress_categories
|
|
221
|
+
|
|
222
|
+
def get_uploader(self, path, file_specification, organized_files):
|
|
185
223
|
"""Get uploader from entrypoint."""
|
|
186
|
-
return self.entrypoint(self.run, path)
|
|
224
|
+
return self.entrypoint(self.run, path, file_specification, organized_files)
|
|
187
225
|
|
|
188
226
|
def start(self) -> Dict:
|
|
189
227
|
"""Start upload process.
|
|
@@ -195,19 +233,23 @@ class UploadAction(Action):
|
|
|
195
233
|
storage = self.client.get_storage(self.params['storage'])
|
|
196
234
|
pathlib_cwd = get_pathlib(storage, self.params['path'])
|
|
197
235
|
|
|
198
|
-
# Initialize uploader.
|
|
199
|
-
uploader = self.get_uploader(pathlib_cwd)
|
|
200
|
-
|
|
201
236
|
# Analyze Collection file specifications to determine the data structure for upload.
|
|
202
237
|
file_specification_template = self._analyze_collection()
|
|
238
|
+
organized_files = self._organize_files(pathlib_cwd, file_specification_template)
|
|
239
|
+
|
|
240
|
+
# Initialize uploader.
|
|
241
|
+
uploader = self.get_uploader(pathlib_cwd, file_specification_template, organized_files)
|
|
203
242
|
|
|
204
243
|
# Setup result dict.
|
|
205
244
|
result = {}
|
|
206
245
|
|
|
207
|
-
#
|
|
246
|
+
# Get organized files from the uploader (plugin developer's custom implementation)
|
|
247
|
+
# or use the default organization method if uploader doesn't provide valid files
|
|
208
248
|
organized_files = uploader.handle_upload_files()
|
|
209
|
-
|
|
210
|
-
|
|
249
|
+
|
|
250
|
+
# Validate the organized files
|
|
251
|
+
if not self._validate_organized_files(organized_files, file_specification_template):
|
|
252
|
+
self.run.log_message('Validation failed.', context=Context.ERROR.value)
|
|
211
253
|
self.run.end_log()
|
|
212
254
|
return result
|
|
213
255
|
|
|
@@ -229,6 +271,26 @@ class UploadAction(Action):
|
|
|
229
271
|
generated_data_units = self._generate_data_units(uploaded_files, upload_result_count)
|
|
230
272
|
result['generated_data_units_count'] = len(generated_data_units)
|
|
231
273
|
|
|
274
|
+
# Setup task with uploaded synapse-backend data units.
|
|
275
|
+
if not len(generated_data_units):
|
|
276
|
+
self.run.log_message('No data units were generated.', context=Context.WARNING.value)
|
|
277
|
+
self.run.end_log()
|
|
278
|
+
return result
|
|
279
|
+
|
|
280
|
+
if self.config['options']['allow_generate_tasks'] and self.params['is_generate_tasks']:
|
|
281
|
+
generated_tasks = self._generate_tasks(generated_data_units)
|
|
282
|
+
result['generated_tasks_count'] = len(generated_tasks)
|
|
283
|
+
else:
|
|
284
|
+
self.run.log_message('Generating tasks process has passed.')
|
|
285
|
+
|
|
286
|
+
# Generate ground truths for the uploaded data.
|
|
287
|
+
# TODO: Need to add ground truths generation logic later.
|
|
288
|
+
if self.config['options']['allow_generate_ground_truths'] and self.params['is_generate_ground_truths']:
|
|
289
|
+
generated_ground_truths = self._generate_ground_truths()
|
|
290
|
+
result['generated_ground_truths_count'] = len(generated_ground_truths)
|
|
291
|
+
else:
|
|
292
|
+
self.run.log_message('Generating ground truths process has passed.')
|
|
293
|
+
|
|
232
294
|
self.run.end_log()
|
|
233
295
|
return result
|
|
234
296
|
|
|
@@ -244,18 +306,13 @@ class UploadAction(Action):
|
|
|
244
306
|
|
|
245
307
|
client = self.run.client
|
|
246
308
|
collection_id = self.params['collection']
|
|
247
|
-
collection = client.
|
|
309
|
+
collection = client.get_dataset(collection_id)
|
|
248
310
|
|
|
249
311
|
# Finish progress
|
|
250
312
|
self.run.set_progress(1, 1, category='analyze_collection')
|
|
251
313
|
|
|
252
314
|
return collection['file_specifications']
|
|
253
315
|
|
|
254
|
-
def _validate_organized_files(self, file_specification_template: Dict, organized_files: List) -> bool:
|
|
255
|
-
"""Validate organized files from Uploader."""
|
|
256
|
-
validator = FileSpecificationValidator(file_specification_template, organized_files)
|
|
257
|
-
return validator.validate()
|
|
258
|
-
|
|
259
316
|
def _upload_files(self, organized_files, organized_files_count: int) -> List:
|
|
260
317
|
"""Upload files to synapse-backend.
|
|
261
318
|
|
|
@@ -313,3 +370,224 @@ class UploadAction(Action):
|
|
|
313
370
|
self.run.set_progress(upload_result_count, upload_result_count, category='generate_data_units')
|
|
314
371
|
|
|
315
372
|
return sum(generated_data_units, [])
|
|
373
|
+
|
|
374
|
+
def _generate_tasks(self, generated_data_units: List) -> List:
|
|
375
|
+
"""Setup task with uploaded synapse-backend data units.
|
|
376
|
+
|
|
377
|
+
TODO: make batch size configurable.
|
|
378
|
+
"""
|
|
379
|
+
# Initialize progress
|
|
380
|
+
self.run.set_progress(0, 1, category='generate_tasks')
|
|
381
|
+
self.run.log_message('Generating tasks with data files...')
|
|
382
|
+
|
|
383
|
+
# Prepare batches for processing
|
|
384
|
+
client = self.run.client
|
|
385
|
+
project_id = self.params['project']
|
|
386
|
+
current_progress = 0
|
|
387
|
+
|
|
388
|
+
# Generate tasks
|
|
389
|
+
generated_tasks = []
|
|
390
|
+
generated_data_units_count = len(generated_data_units)
|
|
391
|
+
for data_unit in generated_data_units:
|
|
392
|
+
tasks_data = []
|
|
393
|
+
task_data = {'project': project_id, 'data_unit': data_unit['id']}
|
|
394
|
+
tasks_data.append(task_data)
|
|
395
|
+
if tasks_data:
|
|
396
|
+
created_tasks = client.create_tasks(tasks_data)
|
|
397
|
+
created_task_ids = [created_task['id'] for created_task in created_tasks]
|
|
398
|
+
generated_tasks.append(created_task_ids)
|
|
399
|
+
for created_task_id in created_task_ids:
|
|
400
|
+
self.run.log_task(created_task_id, UploadStatus.SUCCESS)
|
|
401
|
+
|
|
402
|
+
self.run.set_progress(current_progress, generated_data_units_count, category='generate_tasks')
|
|
403
|
+
current_progress += 1
|
|
404
|
+
|
|
405
|
+
# Finish progress
|
|
406
|
+
self.run.log_message('Generating tasks completed')
|
|
407
|
+
self.run.set_progress(1, 1, category='generate_tasks')
|
|
408
|
+
|
|
409
|
+
return sum(generated_tasks, [])
|
|
410
|
+
|
|
411
|
+
def _generate_ground_truths(self):
|
|
412
|
+
"""Generate ground truths for the uploaded data.
|
|
413
|
+
|
|
414
|
+
TODO: Need to add ground truths generation logic later.
|
|
415
|
+
"""
|
|
416
|
+
# Initialize progress
|
|
417
|
+
self.run.set_progress(0, 1, category='generate_ground_truths')
|
|
418
|
+
self.run.log_message('Generating ground truths...')
|
|
419
|
+
|
|
420
|
+
# Finish progress
|
|
421
|
+
self.run.log_message('Generating ground truths completed')
|
|
422
|
+
self.run.set_progress(1, 1, category='generate_ground_truths')
|
|
423
|
+
|
|
424
|
+
def _validate_organized_files(self, organized_files: List, file_specification_template: Dict) -> bool:
|
|
425
|
+
"""Validate organized files from Uploader."""
|
|
426
|
+
validator = FileSpecificationValidator(file_specification_template, organized_files)
|
|
427
|
+
return validator.validate()
|
|
428
|
+
|
|
429
|
+
def _organize_files(self, directory: Path, file_specification: List) -> List:
|
|
430
|
+
"""Organize files according to the file specification.
|
|
431
|
+
This method handles type-based directory structure where files are organized in
|
|
432
|
+
directories named after file types (e.g., 'image_1/' directory contains image files
|
|
433
|
+
like '1.jpg', '2.jpg'). For each dataset ID found in the primary directory, it attempts
|
|
434
|
+
to find corresponding files in all type directories.
|
|
435
|
+
|
|
436
|
+
TODO : Add Logic to handle file specific name patterns and extensions.
|
|
437
|
+
(e.g. pcd:S_DCH_230725_0156_LR_037.pcd, image_1:S_DCH_230725_0156_FC_037, image_2:S_DCH_230725_0156_LF_037.jpg)
|
|
438
|
+
Args:
|
|
439
|
+
directory (Path): Root directory containing files to organize.
|
|
440
|
+
file_specification (List): File specification list.
|
|
441
|
+
Returns:
|
|
442
|
+
List: List of dictionaries containing organized files.
|
|
443
|
+
"""
|
|
444
|
+
organized_files = []
|
|
445
|
+
self.run.log_message(f'Looking for files in {directory}...')
|
|
446
|
+
|
|
447
|
+
# Check for type-based directory structure (e.g., image_1/, pcd_1/)
|
|
448
|
+
type_dirs = {}
|
|
449
|
+
type_extensions = {} # Store common extensions for each type directory
|
|
450
|
+
|
|
451
|
+
for spec in file_specification:
|
|
452
|
+
spec_name = spec['name']
|
|
453
|
+
|
|
454
|
+
spec_dir = directory / spec_name
|
|
455
|
+
if spec_dir.exists() and spec_dir.is_dir():
|
|
456
|
+
type_dirs[spec_name] = spec_dir
|
|
457
|
+
|
|
458
|
+
# Analyze file extensions in this directory
|
|
459
|
+
extensions = {}
|
|
460
|
+
for file_path in spec_dir.glob('*'):
|
|
461
|
+
if file_path.is_file():
|
|
462
|
+
ext = file_path.suffix.lower()
|
|
463
|
+
extensions[ext] = extensions.get(ext, 0) + 1
|
|
464
|
+
|
|
465
|
+
# Find the most common extension
|
|
466
|
+
if extensions:
|
|
467
|
+
common_ext = max(extensions.items(), key=lambda x: x[1])[0]
|
|
468
|
+
type_extensions[spec_name] = common_ext
|
|
469
|
+
self.run.log_message(f'Found type directory: {spec_name} (common extension: {common_ext})')
|
|
470
|
+
|
|
471
|
+
# If type-based directories don't exist, exit early
|
|
472
|
+
if not type_dirs:
|
|
473
|
+
self.run.log_message('No type-based directory structure found.', context=Context.INFO.value)
|
|
474
|
+
return organized_files
|
|
475
|
+
|
|
476
|
+
self.run.log_message('Detected type-based directory structure')
|
|
477
|
+
|
|
478
|
+
# Build a comprehensive map of all dataset IDs across all type directories
|
|
479
|
+
dataset_files = {} # Dictionary: file_name -> {spec_name -> file_path}
|
|
480
|
+
|
|
481
|
+
# First pass: collect all dataset IDs from all type directories
|
|
482
|
+
for spec_name, dir_path in type_dirs.items():
|
|
483
|
+
for file_path in dir_path.glob('*'):
|
|
484
|
+
if file_path.is_file():
|
|
485
|
+
file_name = file_path.stem
|
|
486
|
+
|
|
487
|
+
# Initialize dataset entry if it doesn't exist
|
|
488
|
+
if file_name not in dataset_files:
|
|
489
|
+
dataset_files[file_name] = {}
|
|
490
|
+
|
|
491
|
+
# Map this file to its specification
|
|
492
|
+
if spec_name not in dataset_files[file_name]:
|
|
493
|
+
dataset_files[file_name][spec_name] = file_path
|
|
494
|
+
else:
|
|
495
|
+
# If multiple files with same file_name for same spec, use most recent
|
|
496
|
+
existing_file = dataset_files[file_name][spec_name]
|
|
497
|
+
if file_path.stat().st_mtime > existing_file.stat().st_mtime:
|
|
498
|
+
dataset_files[file_name][spec_name] = file_path
|
|
499
|
+
self.run.log_message(
|
|
500
|
+
f"Found newer file for name of {file_name}, spec '{spec_name}': "
|
|
501
|
+
f'{file_path.name} (replacing {existing_file.name})'
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
if not dataset_files:
|
|
505
|
+
self.run.log_message('No dataset files found.', context=Context.WARNING.value)
|
|
506
|
+
return organized_files
|
|
507
|
+
|
|
508
|
+
self.run.log_message(f'Found {len(dataset_files)} potential datasets by ID')
|
|
509
|
+
|
|
510
|
+
# Second pass: organize valid datasets
|
|
511
|
+
for file_name, files_dict in sorted(dataset_files.items()):
|
|
512
|
+
self.run.log_message(f'Processing file name: {file_name}')
|
|
513
|
+
|
|
514
|
+
# Add file spec details for logging
|
|
515
|
+
for spec_name, file_path in files_dict.items():
|
|
516
|
+
self.run.log_message(f"Mapped '{spec_name}' to: {file_path.name}")
|
|
517
|
+
|
|
518
|
+
# Check if all required files are present
|
|
519
|
+
required_specs = [spec['name'] for spec in file_specification if spec.get('is_required', False)]
|
|
520
|
+
if all(req in files_dict for req in required_specs):
|
|
521
|
+
# Create metadata for this dataset
|
|
522
|
+
meta_data = {
|
|
523
|
+
'origin_file_stem': file_name,
|
|
524
|
+
'created_at': datetime.now().isoformat(),
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
# Add the organized dataset
|
|
528
|
+
organized_files.append({'files': files_dict, 'meta': meta_data})
|
|
529
|
+
self.run.log_message(f'Successfully organized dataset for ID {file_name}')
|
|
530
|
+
else:
|
|
531
|
+
# Missing required files warning
|
|
532
|
+
missing = [req for req in required_specs if req not in files_dict]
|
|
533
|
+
self.run.log_message(
|
|
534
|
+
f'Dataset ID {file_name} is missing required files: {", ".join(missing)}',
|
|
535
|
+
context=Context.WARNING.value,
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
self.run.log_message(f'Total datasets organized: {len(organized_files)}')
|
|
539
|
+
return organized_files
|
|
540
|
+
|
|
541
|
+
def _map_files_to_specification(self, directory: Path, file_specification: List) -> Dict[str, Path]:
|
|
542
|
+
"""Map files in a directory to the file specification.
|
|
543
|
+
|
|
544
|
+
Args:
|
|
545
|
+
directory (Path): Directory containing files to map.
|
|
546
|
+
file_specification (List): File specification list.
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
Dict[str, Path]: Dictionary mapping file specification names to file paths.
|
|
550
|
+
"""
|
|
551
|
+
files_dict = {}
|
|
552
|
+
|
|
553
|
+
# Get all files in the directory once
|
|
554
|
+
all_files = [f for f in directory.iterdir() if f.is_file()]
|
|
555
|
+
|
|
556
|
+
# Process each file specification
|
|
557
|
+
for file_spec in file_specification:
|
|
558
|
+
file_name = file_spec['name']
|
|
559
|
+
is_required = file_spec.get('is_required', False)
|
|
560
|
+
|
|
561
|
+
# Generate name pattern based on the specification
|
|
562
|
+
name_parts = re.split(r'_(\d+)$', file_name)
|
|
563
|
+
|
|
564
|
+
# Find files matching the pattern
|
|
565
|
+
matching_files = []
|
|
566
|
+
if len(name_parts) > 1:
|
|
567
|
+
base_name = name_parts[0]
|
|
568
|
+
index = name_parts[1]
|
|
569
|
+
# Match patterns like "pcd_1.ext", "point_cloud_1.ext", etc.
|
|
570
|
+
for file in all_files:
|
|
571
|
+
if base_name in file.stem and f'_{index}' in file.stem:
|
|
572
|
+
matching_files.append(file)
|
|
573
|
+
else:
|
|
574
|
+
# Simple match - just find files containing the pattern
|
|
575
|
+
for file in all_files:
|
|
576
|
+
if file_name in file.stem:
|
|
577
|
+
matching_files.append(file)
|
|
578
|
+
|
|
579
|
+
# Process matching files
|
|
580
|
+
if matching_files:
|
|
581
|
+
# Sort by modification time (newest first) if multiple files match
|
|
582
|
+
if len(matching_files) > 1:
|
|
583
|
+
matching_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
|
|
584
|
+
self.run.log_message(
|
|
585
|
+
f"Multiple files match '{file_name}'. Using most recent: {matching_files[0].name}"
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
files_dict[file_name] = matching_files[0]
|
|
589
|
+
self.run.log_message(f"Mapped '{file_name}' to: {matching_files[0].name}")
|
|
590
|
+
elif is_required:
|
|
591
|
+
self.run.log_message(f"Required file '{file_name}' not found.", context=Context.WARNING.value)
|
|
592
|
+
|
|
593
|
+
return files_dict
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
actions:
|
|
2
2
|
upload:
|
|
3
3
|
entrypoint: plugin.upload.Uploader
|
|
4
|
+
options:
|
|
5
|
+
allow_generate_tasks: false # Allow the plugin to generate tasks for the uploaded data
|
|
6
|
+
allow_generate_ground_truths: false # Allow the plugin to generate ground truths for the uploaded data
|
|
4
7
|
supported_data_type: image # A primary data type of synapse backend collection. (e.g. 'image', 'text', 'video', 'pcd', 'audio')
|
|
5
8
|
ui_schema: |
|
|
6
9
|
Dumped FormKit Schema for upload plugin custom options
|
|
7
|
-
task_pre_annotation:
|
|
8
|
-
entrypoint: plugin.upload.TaskPreAnnotation
|
|
9
|
-
ui_schema: |
|
|
10
|
-
Dumped FormKit Schema for upload plugin custom options
|
|
@@ -3,42 +3,38 @@ from typing import List
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class Uploader:
|
|
6
|
-
"""Plugin upload action
|
|
6
|
+
"""Plugin upload action interface for organizing files.
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
This class provides a minimal interface for plugin developers to implement
|
|
9
|
+
their own file organization logic.
|
|
9
10
|
"""
|
|
10
11
|
|
|
11
|
-
def __init__(self, run, path: Path,
|
|
12
|
+
def __init__(self, run, path: Path, file_specification: List = None, organized_files: List = None):
|
|
12
13
|
"""Initialize the plugin upload action class.
|
|
13
14
|
|
|
14
15
|
Args:
|
|
15
|
-
run: Plugin run object.
|
|
16
|
-
path:
|
|
16
|
+
run: Plugin run object with logging capabilities.
|
|
17
|
+
path: Path object pointing to the upload target directory.
|
|
18
|
+
file_specification: List of specifications that define the structure of files to be uploaded.
|
|
19
|
+
Each specification contains details like file name, type, and requirements.
|
|
17
20
|
"""
|
|
18
21
|
self.run = run
|
|
19
22
|
self.path = path
|
|
23
|
+
self.file_specification = file_specification
|
|
24
|
+
self.organized_files = organized_files
|
|
20
25
|
|
|
21
26
|
def handle_upload_files(self) -> List:
|
|
22
|
-
"""
|
|
27
|
+
"""Customize the organization of files for upload.
|
|
23
28
|
|
|
24
|
-
|
|
25
|
-
|
|
29
|
+
This method provides a hook for plugin developers to modify the default file organization.
|
|
30
|
+
You can override this method to filter files, transform data, or add custom metadata
|
|
31
|
+
based on your specific requirements.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
organized_files (List): The default organized files structure.
|
|
35
|
+
Each item is a dictionary with 'files' and 'meta' keys.
|
|
26
36
|
|
|
27
37
|
Returns:
|
|
28
|
-
List:
|
|
29
|
-
|
|
30
|
-
Examples:
|
|
31
|
-
[
|
|
32
|
-
{
|
|
33
|
-
"files": {
|
|
34
|
-
'image_1': image_1_pathlib_object,
|
|
35
|
-
'image_2': image_2_pathlib_object,
|
|
36
|
-
'meta_1': meta_1_pathlib_object,
|
|
37
|
-
},
|
|
38
|
-
"meta": {
|
|
39
|
-
"key": "value"
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
]
|
|
38
|
+
List: The modified list of organized files to be uploaded.
|
|
43
39
|
"""
|
|
44
|
-
return
|
|
40
|
+
return self.organized_files
|
|
@@ -28,17 +28,17 @@ synapse_sdk/clients/agent/__init__.py,sha256=Pz8_iTbIbnb7ywGJ3feqoZVmO2I3mEbwpWs
|
|
|
28
28
|
synapse_sdk/clients/agent/core.py,sha256=x2jgORTjT7pJY67SLuc-5lMG6CD5OWpy8UgGeTf7IhA,270
|
|
29
29
|
synapse_sdk/clients/agent/ray.py,sha256=JrwLyVOUDG2yYsbPrxyUtWbM-FWp9B6Bl_GdDby0rt8,1559
|
|
30
30
|
synapse_sdk/clients/agent/service.py,sha256=s7KuPK_DB1nr2VHrigttV1WyFonaGHNrPvU8loRxHcE,478
|
|
31
|
-
synapse_sdk/clients/backend/__init__.py,sha256=
|
|
31
|
+
synapse_sdk/clients/backend/__init__.py,sha256=Fiehino2n3voaHTdpJHXSY7K_CDnMkQeokapbgeoTBk,1187
|
|
32
32
|
synapse_sdk/clients/backend/annotation.py,sha256=f4jS4qlXH7M7mQ3EuCq-NrjJ_hJNDz8pEFAYqf-e008,996
|
|
33
33
|
synapse_sdk/clients/backend/core.py,sha256=5XAOdo6JZ0drfk-FMPJ96SeTd9oja-VnTwzGXdvK7Bg,1027
|
|
34
|
-
synapse_sdk/clients/backend/
|
|
34
|
+
synapse_sdk/clients/backend/dataset.py,sha256=eQ0O43Ck91z5Om7mb-vW_P5NIaX1OZKirjGs-WQHdM4,3480
|
|
35
35
|
synapse_sdk/clients/backend/hitl.py,sha256=na2mSXFud92p4zUEuagcDWk2klxO7xn-e86cm0VZEvs,709
|
|
36
36
|
synapse_sdk/clients/backend/integration.py,sha256=9LjkYcBpi7aog-MODSDS4RlmYahypu65qxBj-AcY7xc,2683
|
|
37
37
|
synapse_sdk/clients/backend/ml.py,sha256=JoPH9Ly2E3HJ7S5mdGLtcGq7ruQVVrYfWArogwZLlms,1193
|
|
38
38
|
synapse_sdk/clients/backend/models.py,sha256=s5d9sGGQ0Elj0HOGC1TuwE-eBkY1aTfJPl6ls11bNCk,1961
|
|
39
39
|
synapse_sdk/clients/ray/__init__.py,sha256=9ZSPXVVxlJ8Wp8ku7l021ENtPjVrGgQDgqifkkVAXgM,187
|
|
40
40
|
synapse_sdk/clients/ray/core.py,sha256=a4wyCocAma2HAm-BHlbZnoVbpfdR-Aad2FM0z6vPFvw,731
|
|
41
|
-
synapse_sdk/clients/ray/serve.py,sha256=
|
|
41
|
+
synapse_sdk/clients/ray/serve.py,sha256=eFhCYIv_irc_2RyuV3bzeWIVyz_1NlqwoNVh5KSWilY,1092
|
|
42
42
|
synapse_sdk/clients/validators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
43
|
synapse_sdk/clients/validators/collections.py,sha256=LtnwvutsScubOUcZ2reGHLCzseXxtNIdnH2nv098aUU,1195
|
|
44
44
|
synapse_sdk/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -61,10 +61,10 @@ synapse_sdk/plugins/categories/data_validation/templates/plugin/validation.py,sh
|
|
|
61
61
|
synapse_sdk/plugins/categories/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
62
|
synapse_sdk/plugins/categories/export/enums.py,sha256=gtyngvQ1DKkos9iKGcbecwTVQQ6sDwbrBPSGPNb5Am0,127
|
|
63
63
|
synapse_sdk/plugins/categories/export/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
|
-
synapse_sdk/plugins/categories/export/actions/export.py,sha256=
|
|
64
|
+
synapse_sdk/plugins/categories/export/actions/export.py,sha256=xqPB_MufeMP3riaKCbGVFGukV8RdXcg6-zUrkw4t1-A,9922
|
|
65
65
|
synapse_sdk/plugins/categories/export/templates/config.yaml,sha256=N7YmnFROb3s3M35SA9nmabyzoSb5O2t2TRPicwFNN2o,56
|
|
66
66
|
synapse_sdk/plugins/categories/export/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
|
-
synapse_sdk/plugins/categories/export/templates/plugin/export.py,sha256=
|
|
67
|
+
synapse_sdk/plugins/categories/export/templates/plugin/export.py,sha256=JA2Y_A30QyJekSqDq8PeRuFR9k0yjQjOG-Xy6C8zPew,5196
|
|
68
68
|
synapse_sdk/plugins/categories/neural_net/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
69
|
synapse_sdk/plugins/categories/neural_net/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
70
|
synapse_sdk/plugins/categories/neural_net/actions/deployment.py,sha256=y2LrS-pwazqRI5O0q1NUy45NQYsBj6ykbrXnDMs_fqE,1987
|
|
@@ -100,12 +100,10 @@ synapse_sdk/plugins/categories/smart_tool/templates/plugin/__init__.py,sha256=47
|
|
|
100
100
|
synapse_sdk/plugins/categories/smart_tool/templates/plugin/auto_label.py,sha256=eevNg0nOcYFR4z_L_R-sCvVOYoLWSAH1jwDkAf3YCjY,320
|
|
101
101
|
synapse_sdk/plugins/categories/upload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
102
102
|
synapse_sdk/plugins/categories/upload/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
103
|
-
synapse_sdk/plugins/categories/upload/actions/
|
|
104
|
-
synapse_sdk/plugins/categories/upload/
|
|
105
|
-
synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=1O0kMfkFMGYwnpBcttrlC9bu4xzU9docw2MBOq_Elmo,417
|
|
103
|
+
synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=L9OqHWNyzO5qXi9-afkgRI1hfL7ysJjY0z0a5kujJrQ,24202
|
|
104
|
+
synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=kwHNWHFYbzDi1mEh40KozatPZbZGH44dlP0t0J7ejJw,483
|
|
106
105
|
synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
107
|
-
synapse_sdk/plugins/categories/upload/templates/plugin/
|
|
108
|
-
synapse_sdk/plugins/categories/upload/templates/plugin/upload.py,sha256=dnK8gy33GjG5ettayawDJv1gM3xCm1K6lM-PfeeTjQw,1163
|
|
106
|
+
synapse_sdk/plugins/categories/upload/templates/plugin/upload.py,sha256=IZU4sdSMSLKPCtlNqF7DP2howTdYR6hr74HCUZsGdPk,1559
|
|
109
107
|
synapse_sdk/plugins/templates/cookiecutter.json,sha256=NxOWk9A_v1pO0Ny4IYT9Cj5iiJ16--cIQrGC67QdR0I,396
|
|
110
108
|
synapse_sdk/plugins/templates/hooks/post_gen_project.py,sha256=jqlYkY1O2TxIR-Vh3gnwILYy8k-D39Xx66d2KNQVMCs,147
|
|
111
109
|
synapse_sdk/plugins/templates/hooks/pre_prompt.py,sha256=aOAMM623s0sKFGjTZaotAOYFvsNMxeii4tPyhOAFKVE,539
|
|
@@ -136,9 +134,9 @@ synapse_sdk/utils/storage/providers/__init__.py,sha256=x7RGwZryT2FpVxS7fGWryRVpq
|
|
|
136
134
|
synapse_sdk/utils/storage/providers/gcp.py,sha256=i2BQCu1Kej1If9SuNr2_lEyTcr5M_ncGITZrL0u5wEA,363
|
|
137
135
|
synapse_sdk/utils/storage/providers/s3.py,sha256=W94rQvhGRXti3R4mYP7gmU5pcyCQpGFIBLvxxqLVdRM,2231
|
|
138
136
|
synapse_sdk/utils/storage/providers/sftp.py,sha256=_8s9hf0JXIO21gvm-JVS00FbLsbtvly4c-ETLRax68A,1426
|
|
139
|
-
synapse_sdk-1.0.
|
|
140
|
-
synapse_sdk-1.0.
|
|
141
|
-
synapse_sdk-1.0.
|
|
142
|
-
synapse_sdk-1.0.
|
|
143
|
-
synapse_sdk-1.0.
|
|
144
|
-
synapse_sdk-1.0.
|
|
137
|
+
synapse_sdk-1.0.0a55.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
|
|
138
|
+
synapse_sdk-1.0.0a55.dist-info/METADATA,sha256=d2fJLQn1LuXa0NeLokHzsjeE8zgIyOJrOH3bRBUoIw0,1303
|
|
139
|
+
synapse_sdk-1.0.0a55.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
140
|
+
synapse_sdk-1.0.0a55.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
|
|
141
|
+
synapse_sdk-1.0.0a55.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
|
|
142
|
+
synapse_sdk-1.0.0a55.dist-info/RECORD,,
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
|
-
from typing import Annotated
|
|
3
|
-
|
|
4
|
-
from pydantic import AfterValidator, BaseModel, field_validator
|
|
5
|
-
from pydantic_core import PydanticCustomError
|
|
6
|
-
|
|
7
|
-
from synapse_sdk.clients.exceptions import ClientError
|
|
8
|
-
from synapse_sdk.plugins.categories.base import Action
|
|
9
|
-
from synapse_sdk.plugins.categories.decorators import register_action
|
|
10
|
-
from synapse_sdk.plugins.enums import PluginCategory, RunMethod
|
|
11
|
-
from synapse_sdk.plugins.models import Run
|
|
12
|
-
from synapse_sdk.utils.pydantic.validators import non_blank
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class TaskDataAnnotationType(str, Enum):
|
|
16
|
-
FILE = 'file'
|
|
17
|
-
INFERENCE = 'inference'
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class TaskPreAnnotationRun(Run):
|
|
21
|
-
pass
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class TaskPreAnnotationParams(BaseModel):
|
|
25
|
-
"""TaskPreAnnotation action parameters.
|
|
26
|
-
|
|
27
|
-
Args:
|
|
28
|
-
name (str): The name of the action.
|
|
29
|
-
description (str | None): The description of the action.
|
|
30
|
-
project (int): The project ID.
|
|
31
|
-
data_collection (int): The data collection ID.
|
|
32
|
-
task_data_annotation_type (TaskDataAnnotationType): The type of task data annotation.
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
name: Annotated[str, AfterValidator(non_blank)]
|
|
36
|
-
description: str | None
|
|
37
|
-
project: int
|
|
38
|
-
data_collection: int
|
|
39
|
-
task_data_annotation_type: TaskDataAnnotationType
|
|
40
|
-
|
|
41
|
-
@field_validator('data_collection', mode='before')
|
|
42
|
-
@classmethod
|
|
43
|
-
def check_data_collection_exists(cls, value: str, info) -> str:
|
|
44
|
-
"""Validate synapse-backend collection exists."""
|
|
45
|
-
action = info.context['action']
|
|
46
|
-
client = action.client
|
|
47
|
-
try:
|
|
48
|
-
client.get_data_collection(value)
|
|
49
|
-
except ClientError:
|
|
50
|
-
raise PydanticCustomError('client_error', 'Error occurred while checking data collection exists.')
|
|
51
|
-
return value
|
|
52
|
-
|
|
53
|
-
@field_validator('project', mode='before')
|
|
54
|
-
@classmethod
|
|
55
|
-
def check_project_exists(cls, value: str, info) -> str:
|
|
56
|
-
"""Validate synapse-backend project exists."""
|
|
57
|
-
if not value:
|
|
58
|
-
return value
|
|
59
|
-
|
|
60
|
-
action = info.context['action']
|
|
61
|
-
client = action.client
|
|
62
|
-
try:
|
|
63
|
-
client.get_project(value)
|
|
64
|
-
except ClientError:
|
|
65
|
-
raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
|
|
66
|
-
return value
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
@register_action
|
|
70
|
-
class TaskPreAnnotationAction(Action):
|
|
71
|
-
"""TaskPreAnnotation action class.
|
|
72
|
-
|
|
73
|
-
* Annotate data to tasks.
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
name = 'task_pre_annotation'
|
|
77
|
-
category = PluginCategory.UPLOAD
|
|
78
|
-
method = RunMethod.JOB
|
|
79
|
-
run_class = TaskPreAnnotationRun
|
|
80
|
-
progress_categories = {
|
|
81
|
-
'generate_tasks': {
|
|
82
|
-
'proportion': 10,
|
|
83
|
-
},
|
|
84
|
-
'annotate_task_data': {
|
|
85
|
-
'proportion': 90,
|
|
86
|
-
},
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
def start(self):
|
|
90
|
-
"""Start task_pre_annotation action.
|
|
91
|
-
|
|
92
|
-
* Generate tasks.
|
|
93
|
-
* Annotate data to tasks.
|
|
94
|
-
"""
|
|
95
|
-
task_pre_annotation = self.get_task_pre_annotation()
|
|
96
|
-
task_pre_annotation.handle_annotate_data_from_files()
|
|
97
|
-
return {}
|
|
98
|
-
|
|
99
|
-
def get_task_pre_annotation(self):
|
|
100
|
-
"""Get task pre annotation entrypoint."""
|
|
101
|
-
return self.entrypoint()
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
class TaskPreAnnotation:
|
|
2
|
-
def __init__(self, run, *args, **kwargs):
|
|
3
|
-
"""Initialize the plugin task pre annotation action class.
|
|
4
|
-
|
|
5
|
-
Args:
|
|
6
|
-
run: Plugin run object.
|
|
7
|
-
"""
|
|
8
|
-
self.run = run
|
|
9
|
-
|
|
10
|
-
def handle_annotate_data_from_files(self):
|
|
11
|
-
pass
|
|
12
|
-
|
|
13
|
-
def handle_annotate_data_with_inference(self):
|
|
14
|
-
pass
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|