synapse-sdk 1.0.0a57__py3-none-any.whl → 1.0.0a59__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/clients/backend/annotation.py +8 -0
- synapse_sdk/clients/backend/data_collection.py +10 -1
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py +210 -0
- synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml +4 -0
- synapse_sdk/plugins/categories/{upload/templates/plugin/task_pre_annotation.py → pre_annotation/templates/plugin/to_task.py} +1 -7
- synapse_sdk/plugins/categories/upload/actions/upload.py +161 -5
- synapse_sdk/plugins/categories/upload/templates/config.yaml +3 -4
- {synapse_sdk-1.0.0a57.dist-info → synapse_sdk-1.0.0a59.dist-info}/METADATA +1 -1
- {synapse_sdk-1.0.0a57.dist-info → synapse_sdk-1.0.0a59.dist-info}/RECORD +13 -13
- synapse_sdk/plugins/categories/upload/actions/task_pre_annotation.py +0 -101
- {synapse_sdk-1.0.0a57.dist-info → synapse_sdk-1.0.0a59.dist-info}/WHEEL +0 -0
- {synapse_sdk-1.0.0a57.dist-info → synapse_sdk-1.0.0a59.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0a57.dist-info → synapse_sdk-1.0.0a59.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-1.0.0a57.dist-info → synapse_sdk-1.0.0a59.dist-info}/top_level.txt +0 -0
|
@@ -7,6 +7,14 @@ class AnnotationClientMixin(BaseClient):
|
|
|
7
7
|
path = f'projects/{pk}/'
|
|
8
8
|
return self._get(path)
|
|
9
9
|
|
|
10
|
+
def get_task(self, pk, params):
|
|
11
|
+
path = f'tasks/{pk}/'
|
|
12
|
+
return self._get(path, params=params)
|
|
13
|
+
|
|
14
|
+
def patch_task(self, pk, data):
|
|
15
|
+
path = f'tasks/{pk}/'
|
|
16
|
+
return self._patch(path, data=data)
|
|
17
|
+
|
|
10
18
|
def get_task_tag(self, pk):
|
|
11
19
|
path = f'task_tags/{pk}/'
|
|
12
20
|
return self._get(path)
|
|
@@ -5,7 +5,7 @@ from typing import Dict, Optional
|
|
|
5
5
|
from tqdm import tqdm
|
|
6
6
|
|
|
7
7
|
from synapse_sdk.clients.base import BaseClient
|
|
8
|
-
from synapse_sdk.clients.utils import get_batched_list
|
|
8
|
+
from synapse_sdk.clients.utils import get_batched_list, get_default_url_conversion
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class DataCollectionClientMixin(BaseClient):
|
|
@@ -31,6 +31,10 @@ class DataCollectionClientMixin(BaseClient):
|
|
|
31
31
|
path = 'data_files/'
|
|
32
32
|
return self._post(path, files={'file': file_path})
|
|
33
33
|
|
|
34
|
+
def get_data_unit(self, data_unit_id: int, params=None):
|
|
35
|
+
path = f'data_units/{data_unit_id}/'
|
|
36
|
+
return self._get(path, params=params)
|
|
37
|
+
|
|
34
38
|
def create_data_units(self, data):
|
|
35
39
|
"""Create data units to synapse-backend.
|
|
36
40
|
|
|
@@ -40,6 +44,11 @@ class DataCollectionClientMixin(BaseClient):
|
|
|
40
44
|
path = 'data_units/'
|
|
41
45
|
return self._post(path, data=data)
|
|
42
46
|
|
|
47
|
+
def list_data_units(self, params=None, url_conversion=None, list_all=False):
|
|
48
|
+
path = 'data_units/'
|
|
49
|
+
url_conversion = get_default_url_conversion(url_conversion, files_fields=['files'])
|
|
50
|
+
return self._list(path, params=params, url_conversion=url_conversion, list_all=list_all)
|
|
51
|
+
|
|
43
52
|
def upload_data_collection(
|
|
44
53
|
self,
|
|
45
54
|
data_collection_id: int,
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Annotated
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
from pydantic import AfterValidator, BaseModel, field_validator
|
|
8
|
+
from pydantic_core import PydanticCustomError
|
|
9
|
+
|
|
10
|
+
from synapse_sdk.clients.exceptions import ClientError
|
|
11
|
+
from synapse_sdk.plugins.categories.base import Action
|
|
12
|
+
from synapse_sdk.plugins.categories.decorators import register_action
|
|
13
|
+
from synapse_sdk.plugins.enums import PluginCategory, RunMethod
|
|
14
|
+
from synapse_sdk.plugins.models import Run
|
|
15
|
+
from synapse_sdk.utils.pydantic.validators import non_blank
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AnnotationMethod(str, Enum):
|
|
19
|
+
FILE = 'file'
|
|
20
|
+
INFERENCE = 'inference'
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class AnnotateTaskDataStatus(str, Enum):
|
|
24
|
+
SUCCESS = 'success'
|
|
25
|
+
FAILED = 'failed'
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ToTaskRun(Run):
|
|
29
|
+
class AnnotateTaskDataLog(BaseModel):
|
|
30
|
+
"""Log model for annotate task data."""
|
|
31
|
+
|
|
32
|
+
task_info: str | None
|
|
33
|
+
status: AnnotateTaskDataStatus
|
|
34
|
+
created: str
|
|
35
|
+
|
|
36
|
+
class MetricsRecord(BaseModel):
|
|
37
|
+
"""Metrics record model."""
|
|
38
|
+
|
|
39
|
+
stand_by: int
|
|
40
|
+
failed: int
|
|
41
|
+
success: int
|
|
42
|
+
|
|
43
|
+
def log_annotate_task_data(self, task_info: dict, status: AnnotateTaskDataStatus):
|
|
44
|
+
"""Log annotate task data."""
|
|
45
|
+
now = datetime.now().isoformat()
|
|
46
|
+
self.log(
|
|
47
|
+
'annotate_task_data',
|
|
48
|
+
self.AnnotateTaskDataLog(task_info=json.dumps(task_info), status=status, created=now).model_dump(),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def log_metrics(self, record: MetricsRecord, category: str):
|
|
52
|
+
"""Log FileToTask metrics.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
record (MetricsRecord): The metrics record to log.
|
|
56
|
+
category (str): The category of the metrics.
|
|
57
|
+
"""
|
|
58
|
+
record = self.MetricsRecord.model_validate(record)
|
|
59
|
+
self.set_metrics(value=record.dict(), category=category)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ToTaskParams(BaseModel):
|
|
63
|
+
"""ToTask action parameters.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
name (str): The name of the action.
|
|
67
|
+
description (str | None): The description of the action.
|
|
68
|
+
project (int): The project ID.
|
|
69
|
+
task_filter (dict): The filter of tasks.
|
|
70
|
+
method (AnnotationMethod): The method of annotation.
|
|
71
|
+
target_specification_name (str | None): The name of the target specification.
|
|
72
|
+
pre_processor (int | None): The pre processor ID.
|
|
73
|
+
pre_processor_params (dict): The params of the pre processor.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
name: Annotated[str, AfterValidator(non_blank)]
|
|
77
|
+
description: str | None = None
|
|
78
|
+
project: int
|
|
79
|
+
task_filter: dict
|
|
80
|
+
method: AnnotationMethod | None = None
|
|
81
|
+
target_specification_name: str | None = None
|
|
82
|
+
pre_processor: int | None = None
|
|
83
|
+
pre_processor_params: dict
|
|
84
|
+
|
|
85
|
+
@field_validator('project', mode='before')
|
|
86
|
+
@classmethod
|
|
87
|
+
def check_project_exists(cls, value: str, info) -> str:
|
|
88
|
+
"""Validate synapse-backend project exists."""
|
|
89
|
+
if not value:
|
|
90
|
+
return value
|
|
91
|
+
|
|
92
|
+
action = info.context['action']
|
|
93
|
+
client = action.client
|
|
94
|
+
try:
|
|
95
|
+
client.get_project(value)
|
|
96
|
+
except ClientError:
|
|
97
|
+
raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
|
|
98
|
+
return value
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@register_action
|
|
102
|
+
class ToTaskAction(Action):
|
|
103
|
+
"""ToTask action class.
|
|
104
|
+
|
|
105
|
+
* Annotate data to tasks.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
name = 'to_task'
|
|
109
|
+
category = PluginCategory.PRE_ANNOTATION
|
|
110
|
+
method = RunMethod.JOB
|
|
111
|
+
run_class = ToTaskRun
|
|
112
|
+
progress_categories = {
|
|
113
|
+
'annotate_task_data': {
|
|
114
|
+
'proportion': 100,
|
|
115
|
+
},
|
|
116
|
+
}
|
|
117
|
+
metrics_categories = {'annotate_task_data'}
|
|
118
|
+
|
|
119
|
+
def start(self):
|
|
120
|
+
"""Start file_to_task action.
|
|
121
|
+
|
|
122
|
+
* Generate tasks.
|
|
123
|
+
* Annotate data to tasks.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
# entrypoint = self.entrypoint(self.run)
|
|
127
|
+
client = self.run.client
|
|
128
|
+
# project_id = self.params['project']
|
|
129
|
+
# project = client.get_project(project_id)
|
|
130
|
+
# data_collection_id = project['dataset']
|
|
131
|
+
# data_collection = client.get_dataset(data_collection_id)
|
|
132
|
+
|
|
133
|
+
# Generate tasks if provided project is empty.
|
|
134
|
+
task_ids_query_params = {
|
|
135
|
+
'project': self.params['project'],
|
|
136
|
+
'fields': 'id',
|
|
137
|
+
}
|
|
138
|
+
if self.params.get('task_filter'):
|
|
139
|
+
task_ids_query_params.update(self.params['task_filter'])
|
|
140
|
+
task_ids_generator, task_ids_count = client.list_tasks(params=task_ids_query_params, list_all=True)
|
|
141
|
+
task_ids = [item['id'] for item in task_ids_generator]
|
|
142
|
+
|
|
143
|
+
# If no tasks found, break the job.
|
|
144
|
+
if not task_ids_count:
|
|
145
|
+
self.run.log_message('Tasks to annotate not found.')
|
|
146
|
+
self.run.end_log()
|
|
147
|
+
|
|
148
|
+
# Annotate data to tasks.
|
|
149
|
+
task_data_annotation_type = self.params['task_data_annotation_type']
|
|
150
|
+
if task_data_annotation_type == AnnotationMethod.FILE:
|
|
151
|
+
self._handle_annotate_data_from_files(task_ids)
|
|
152
|
+
elif task_data_annotation_type == AnnotationMethod.INFERENCE:
|
|
153
|
+
self._handle_annotate_data_with_inference(task_ids)
|
|
154
|
+
|
|
155
|
+
return {}
|
|
156
|
+
|
|
157
|
+
def _handle_annotate_data_from_files(self, task_ids: list[int]):
|
|
158
|
+
"""Handle annotate data from files to tasks.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
task_ids (list[int]): List of task IDs to annotate data to.
|
|
162
|
+
"""
|
|
163
|
+
client = self.run.client
|
|
164
|
+
if not (target_task_data_specification_code := self.params.get('target_task_data_specification_code')):
|
|
165
|
+
self.run.log_message('Target task data specification code not found.')
|
|
166
|
+
self.run.end_log()
|
|
167
|
+
task_params = {
|
|
168
|
+
'fields': 'id,data,data_unit',
|
|
169
|
+
'expand': 'data_unit',
|
|
170
|
+
}
|
|
171
|
+
for task_id in task_ids:
|
|
172
|
+
task = client.get_task(task_id, params=task_params)
|
|
173
|
+
data_file = task['data_unit']['files'].get(target_task_data_specification_code)
|
|
174
|
+
if not data_file:
|
|
175
|
+
self.run.log_message(f'File specification not found for task {task_id}')
|
|
176
|
+
self.run.log_annotate_task_data(
|
|
177
|
+
{'task_id': task_id, 'error': 'File specification not found'}, AnnotateTaskDataStatus.FAILED
|
|
178
|
+
)
|
|
179
|
+
continue
|
|
180
|
+
url = data_file.get('url')
|
|
181
|
+
if not url:
|
|
182
|
+
self.run.log_message(f'URL not found for task {task_id}')
|
|
183
|
+
self.run.log_annotate_task_data(
|
|
184
|
+
{'task_id': task_id, 'error': 'URL not found'}, AnnotateTaskDataStatus.FAILED
|
|
185
|
+
)
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
response = requests.get(url)
|
|
190
|
+
response.raise_for_status() # Raise an exception for 4XX/5XX responses
|
|
191
|
+
data = json.loads(response.content)
|
|
192
|
+
client.patch_task(task_id, data={'data': data})
|
|
193
|
+
|
|
194
|
+
# Log success
|
|
195
|
+
self.run.log_annotate_task_data({'task_id': task_id, 'url': url}, AnnotateTaskDataStatus.SUCCESS)
|
|
196
|
+
except Exception as e:
|
|
197
|
+
self.run.log_message(f'Failed to get content from URL for task {task_id}: {str(e)}')
|
|
198
|
+
self.run.log_annotate_task_data(
|
|
199
|
+
{'task_id': task_id, 'url': url, 'error': str(e)}, AnnotateTaskDataStatus.FAILED
|
|
200
|
+
)
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
def _handle_annotate_data_with_inference(self, task_ids: list[int]):
|
|
204
|
+
"""Handle annotate data with inference to tasks.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
task_ids (list[int]): List of task IDs to annotate data to.
|
|
208
|
+
"""
|
|
209
|
+
self.run.log_message('Pre annotation with inference is not supported.')
|
|
210
|
+
self.run.end_log()
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
class
|
|
1
|
+
class AnnotationToTask:
|
|
2
2
|
def __init__(self, run, *args, **kwargs):
|
|
3
3
|
"""Initialize the plugin task pre annotation action class.
|
|
4
4
|
|
|
@@ -6,9 +6,3 @@ class TaskPreAnnotation:
|
|
|
6
6
|
run: Plugin run object.
|
|
7
7
|
"""
|
|
8
8
|
self.run = run
|
|
9
|
-
|
|
10
|
-
def handle_annotate_data_from_files(self):
|
|
11
|
-
pass
|
|
12
|
-
|
|
13
|
-
def handle_annotate_data_with_inference(self):
|
|
14
|
-
pass
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from enum import Enum
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
from typing import Annotated, Dict, List
|
|
5
6
|
|
|
6
7
|
from pydantic import AfterValidator, BaseModel, field_validator
|
|
@@ -8,6 +9,7 @@ from pydantic_core import PydanticCustomError
|
|
|
8
9
|
|
|
9
10
|
from synapse_sdk.clients.exceptions import ClientError
|
|
10
11
|
from synapse_sdk.clients.utils import get_batched_list
|
|
12
|
+
from synapse_sdk.clients.validators.collections import FileSpecificationValidator
|
|
11
13
|
from synapse_sdk.i18n import gettext as _
|
|
12
14
|
from synapse_sdk.plugins.categories.base import Action
|
|
13
15
|
from synapse_sdk.plugins.categories.decorators import register_action
|
|
@@ -45,6 +47,13 @@ class UploadRun(Run):
|
|
|
45
47
|
status: UploadStatus
|
|
46
48
|
created: str
|
|
47
49
|
|
|
50
|
+
class MetricsRecord(BaseModel):
|
|
51
|
+
"""Metrics record model."""
|
|
52
|
+
|
|
53
|
+
stand_by: int
|
|
54
|
+
failed: int
|
|
55
|
+
success: int
|
|
56
|
+
|
|
48
57
|
def log_data_file(self, data_file_info: dict, status: UploadStatus):
|
|
49
58
|
"""Upload data_file log.
|
|
50
59
|
|
|
@@ -82,6 +91,15 @@ class UploadRun(Run):
|
|
|
82
91
|
now = datetime.now().isoformat()
|
|
83
92
|
self.log('upload_task', self.TaskLog(task_id=task_id, status=status.value, created=now).model_dump())
|
|
84
93
|
|
|
94
|
+
def log_metrics(self, record: MetricsRecord, category: str):
|
|
95
|
+
"""Log upload metrics.
|
|
96
|
+
Args:
|
|
97
|
+
record (MetricsRecord): The metrics record to log.
|
|
98
|
+
category (str): The category of the metrics.
|
|
99
|
+
"""
|
|
100
|
+
record = self.MetricsRecord.model_validate(record)
|
|
101
|
+
self.set_metrics(value=record.model_dump(), category=category)
|
|
102
|
+
|
|
85
103
|
|
|
86
104
|
class UploadParams(BaseModel):
|
|
87
105
|
"""Upload action parameters.
|
|
@@ -94,8 +112,6 @@ class UploadParams(BaseModel):
|
|
|
94
112
|
storage (int): The storage of the action.
|
|
95
113
|
collection (int): The collection of the action.
|
|
96
114
|
project (int | None): The project of the action.
|
|
97
|
-
is_generate_tasks (bool): The flag to generate tasks.
|
|
98
|
-
is_generate_ground_truths (bool): The flag to generate ground truths
|
|
99
115
|
"""
|
|
100
116
|
|
|
101
117
|
name: Annotated[str, AfterValidator(non_blank)]
|
|
@@ -104,6 +120,8 @@ class UploadParams(BaseModel):
|
|
|
104
120
|
storage: int
|
|
105
121
|
collection: int
|
|
106
122
|
project: int | None
|
|
123
|
+
is_generate_tasks: bool = False
|
|
124
|
+
is_generate_ground_truths: bool = False
|
|
107
125
|
|
|
108
126
|
@field_validator('storage', mode='before')
|
|
109
127
|
@classmethod
|
|
@@ -162,6 +180,12 @@ class UploadAction(Action):
|
|
|
162
180
|
analyze_collection: The progress category for the analyze collection process.
|
|
163
181
|
data_file_upload: The progress category for the upload process.
|
|
164
182
|
generate_data_units: The progress category for the generate data units process.
|
|
183
|
+
generate_tasks: The progress category for the generate tasks process.
|
|
184
|
+
generate_ground_truths: The progress category for the generate ground truths process.
|
|
185
|
+
|
|
186
|
+
Metrics Categories:
|
|
187
|
+
data_file: The metrics category for the data file.
|
|
188
|
+
data_unit: The metrics category for the data unit.
|
|
165
189
|
"""
|
|
166
190
|
|
|
167
191
|
name = 'upload'
|
|
@@ -170,15 +194,16 @@ class UploadAction(Action):
|
|
|
170
194
|
run_class = UploadRun
|
|
171
195
|
progress_categories = {
|
|
172
196
|
'analyze_collection': {
|
|
173
|
-
'proportion':
|
|
197
|
+
'proportion': 0,
|
|
174
198
|
},
|
|
175
199
|
'upload_data_files': {
|
|
176
|
-
'proportion':
|
|
200
|
+
'proportion': 0,
|
|
177
201
|
},
|
|
178
202
|
'generate_data_units': {
|
|
179
|
-
'proportion':
|
|
203
|
+
'proportion': 0,
|
|
180
204
|
},
|
|
181
205
|
}
|
|
206
|
+
metrics_categories = {'data_file', 'data_unit'}
|
|
182
207
|
|
|
183
208
|
def get_uploader(self, path, file_specification, organized_files):
|
|
184
209
|
"""Get uploader from entrypoint."""
|
|
@@ -232,6 +257,12 @@ class UploadAction(Action):
|
|
|
232
257
|
generated_data_units = self._generate_data_units(uploaded_files, upload_result_count)
|
|
233
258
|
result['generated_data_units_count'] = len(generated_data_units)
|
|
234
259
|
|
|
260
|
+
# Setup task with uploaded synapse-backend data units.
|
|
261
|
+
if not len(generated_data_units):
|
|
262
|
+
self.run.log_message('No data units were generated.', context=Context.WARNING.value)
|
|
263
|
+
self.run.end_log()
|
|
264
|
+
return result
|
|
265
|
+
|
|
235
266
|
self.run.end_log()
|
|
236
267
|
return result
|
|
237
268
|
|
|
@@ -263,6 +294,7 @@ class UploadAction(Action):
|
|
|
263
294
|
# Initialize progress
|
|
264
295
|
self.run.set_progress(0, organized_files_count, category='upload_data_files')
|
|
265
296
|
self.run.log_message('Uploading data files...')
|
|
297
|
+
data_file_metrics_record = self.run.MetricsRecord(stand_by=organized_files_count, success=0, failed=0)
|
|
266
298
|
|
|
267
299
|
client = self.run.client
|
|
268
300
|
collection_id = self.params['data_collection']
|
|
@@ -272,6 +304,9 @@ class UploadAction(Action):
|
|
|
272
304
|
for organized_file in organized_files:
|
|
273
305
|
uploaded_data_file = client.upload_data_file(organized_file, collection_id)
|
|
274
306
|
self.run.log_data_file(organized_file, UploadStatus.SUCCESS)
|
|
307
|
+
data_file_metrics_record.stand_by -= 1
|
|
308
|
+
data_file_metrics_record.success += 1
|
|
309
|
+
self.run.log_metrics(record=data_file_metrics_record, category='data_file')
|
|
275
310
|
upload_result.append(uploaded_data_file)
|
|
276
311
|
self.run.set_progress(current_progress, organized_files_count, category='upload_data_files')
|
|
277
312
|
current_progress += 1
|
|
@@ -292,6 +327,7 @@ class UploadAction(Action):
|
|
|
292
327
|
"""
|
|
293
328
|
# Initialize progress
|
|
294
329
|
self.run.set_progress(0, upload_result_count, category='generate_data_units')
|
|
330
|
+
data_unit_metrics_record = self.run.MetricsRecord(stand_by=upload_result_count, success=0, failed=0)
|
|
295
331
|
|
|
296
332
|
client = self.run.client
|
|
297
333
|
|
|
@@ -301,6 +337,9 @@ class UploadAction(Action):
|
|
|
301
337
|
batches_count = len(batches)
|
|
302
338
|
for batch in batches:
|
|
303
339
|
created_data_units = client.create_data_units(batch)
|
|
340
|
+
data_unit_metrics_record.stand_by -= len(created_data_units)
|
|
341
|
+
data_unit_metrics_record.success += len(created_data_units)
|
|
342
|
+
self.run.log_metrics(record=data_unit_metrics_record, category='data_unit')
|
|
304
343
|
generated_data_units.append(created_data_units)
|
|
305
344
|
self.run.set_progress(current_progress, batches_count, category='generate_data_units')
|
|
306
345
|
current_progress += 1
|
|
@@ -311,3 +350,120 @@ class UploadAction(Action):
|
|
|
311
350
|
self.run.set_progress(upload_result_count, upload_result_count, category='generate_data_units')
|
|
312
351
|
|
|
313
352
|
return sum(generated_data_units, [])
|
|
353
|
+
|
|
354
|
+
def _validate_organized_files(self, organized_files: List, file_specification_template: Dict) -> bool:
|
|
355
|
+
"""Validate organized files from Uploader."""
|
|
356
|
+
validator = FileSpecificationValidator(file_specification_template, organized_files)
|
|
357
|
+
return validator.validate()
|
|
358
|
+
|
|
359
|
+
def _organize_files(self, directory: Path, file_specification: List) -> List:
|
|
360
|
+
"""Organize files according to the file specification.
|
|
361
|
+
This method handles type-based directory structure where files are organized in
|
|
362
|
+
directories named after file types (e.g., 'image_1/' directory contains image files
|
|
363
|
+
like '1.jpg', '2.jpg'). For each dataset ID found in the primary directory, it attempts
|
|
364
|
+
to find corresponding files in all type directories.
|
|
365
|
+
|
|
366
|
+
TODO : Add Logic to handle file specific name patterns and extensions.
|
|
367
|
+
(e.g. pcd:S_DCH_230725_0156_LR_037.pcd, image_1:S_DCH_230725_0156_FC_037, image_2:S_DCH_230725_0156_LF_037.jpg)
|
|
368
|
+
Args:
|
|
369
|
+
directory (Path): Root directory containing files to organize.
|
|
370
|
+
file_specification (List): File specification list.
|
|
371
|
+
Returns:
|
|
372
|
+
List: List of dictionaries containing organized files.
|
|
373
|
+
"""
|
|
374
|
+
organized_files = []
|
|
375
|
+
self.run.log_message(f'Looking for files in {directory}...')
|
|
376
|
+
|
|
377
|
+
# Check for type-based directory structure (e.g., image_1/, pcd_1/)
|
|
378
|
+
type_dirs = {}
|
|
379
|
+
type_extensions = {} # Store common extensions for each type directory
|
|
380
|
+
|
|
381
|
+
for spec in file_specification:
|
|
382
|
+
spec_name = spec['name']
|
|
383
|
+
|
|
384
|
+
spec_dir = directory / spec_name
|
|
385
|
+
if spec_dir.exists() and spec_dir.is_dir():
|
|
386
|
+
type_dirs[spec_name] = spec_dir
|
|
387
|
+
|
|
388
|
+
# Analyze file extensions in this directory
|
|
389
|
+
extensions = {}
|
|
390
|
+
for file_path in spec_dir.glob('*'):
|
|
391
|
+
if file_path.is_file():
|
|
392
|
+
ext = file_path.suffix.lower()
|
|
393
|
+
extensions[ext] = extensions.get(ext, 0) + 1
|
|
394
|
+
|
|
395
|
+
# Find the most common extension
|
|
396
|
+
if extensions:
|
|
397
|
+
common_ext = max(extensions.items(), key=lambda x: x[1])[0]
|
|
398
|
+
type_extensions[spec_name] = common_ext
|
|
399
|
+
self.run.log_message(f'Found type directory: {spec_name} (common extension: {common_ext})')
|
|
400
|
+
|
|
401
|
+
# If type-based directories don't exist, exit early
|
|
402
|
+
if not type_dirs:
|
|
403
|
+
self.run.log_message('No type-based directory structure found.', context=Context.INFO.value)
|
|
404
|
+
return organized_files
|
|
405
|
+
|
|
406
|
+
self.run.log_message('Detected type-based directory structure')
|
|
407
|
+
|
|
408
|
+
# Build a comprehensive map of all dataset IDs across all type directories
|
|
409
|
+
dataset_files = {} # Dictionary: file_name -> {spec_name -> file_path}
|
|
410
|
+
|
|
411
|
+
# First pass: collect all dataset IDs from all type directories
|
|
412
|
+
for spec_name, dir_path in type_dirs.items():
|
|
413
|
+
for file_path in dir_path.glob('*'):
|
|
414
|
+
if file_path.is_file():
|
|
415
|
+
file_name = file_path.stem
|
|
416
|
+
|
|
417
|
+
# Initialize dataset entry if it doesn't exist
|
|
418
|
+
if file_name not in dataset_files:
|
|
419
|
+
dataset_files[file_name] = {}
|
|
420
|
+
|
|
421
|
+
# Map this file to its specification
|
|
422
|
+
if spec_name not in dataset_files[file_name]:
|
|
423
|
+
dataset_files[file_name][spec_name] = file_path
|
|
424
|
+
else:
|
|
425
|
+
# If multiple files with same file_name for same spec, use most recent
|
|
426
|
+
existing_file = dataset_files[file_name][spec_name]
|
|
427
|
+
if file_path.stat().st_mtime > existing_file.stat().st_mtime:
|
|
428
|
+
dataset_files[file_name][spec_name] = file_path
|
|
429
|
+
self.run.log_message(
|
|
430
|
+
f"Found newer file for name of {file_name}, spec '{spec_name}': "
|
|
431
|
+
f'{file_path.name} (replacing {existing_file.name})'
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
if not dataset_files:
|
|
435
|
+
self.run.log_message('No dataset files found.', context=Context.WARNING.value)
|
|
436
|
+
return organized_files
|
|
437
|
+
|
|
438
|
+
self.run.log_message(f'Found {len(dataset_files)} potential datasets by ID')
|
|
439
|
+
|
|
440
|
+
# Second pass: organize valid datasets
|
|
441
|
+
for file_name, files_dict in sorted(dataset_files.items()):
|
|
442
|
+
self.run.log_message(f'Processing file name: {file_name}')
|
|
443
|
+
|
|
444
|
+
# Add file spec details for logging
|
|
445
|
+
for spec_name, file_path in files_dict.items():
|
|
446
|
+
self.run.log_message(f"Mapped '{spec_name}' to: {file_path.name}")
|
|
447
|
+
|
|
448
|
+
# Check if all required files are present
|
|
449
|
+
required_specs = [spec['name'] for spec in file_specification if spec.get('is_required', False)]
|
|
450
|
+
if all(req in files_dict for req in required_specs):
|
|
451
|
+
# Create metadata for this dataset
|
|
452
|
+
meta_data = {
|
|
453
|
+
'origin_file_stem': file_name,
|
|
454
|
+
'created_at': datetime.now().isoformat(),
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
# Add the organized dataset
|
|
458
|
+
organized_files.append({'files': files_dict, 'meta': meta_data})
|
|
459
|
+
self.run.log_message(f'Successfully organized dataset for ID {file_name}')
|
|
460
|
+
else:
|
|
461
|
+
# Missing required files warning
|
|
462
|
+
missing = [req for req in required_specs if req not in files_dict]
|
|
463
|
+
self.run.log_message(
|
|
464
|
+
f'Dataset ID {file_name} is missing required files: {", ".join(missing)}',
|
|
465
|
+
context=Context.WARNING.value,
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
self.run.log_message(f'Total datasets organized: {len(organized_files)}')
|
|
469
|
+
return organized_files
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
actions:
|
|
2
2
|
upload:
|
|
3
3
|
entrypoint: plugin.upload.Uploader
|
|
4
|
+
options:
|
|
5
|
+
allow_generate_tasks: false # Allow the plugin to generate tasks for the uploaded data
|
|
6
|
+
allow_generate_ground_truths: false # Allow the plugin to generate ground truths for the uploaded data
|
|
4
7
|
supported_data_type: image # A primary data type of synapse backend collection. (e.g. 'image', 'text', 'video', 'pcd', 'audio')
|
|
5
8
|
ui_schema: |
|
|
6
9
|
Dumped FormKit Schema for upload plugin custom options
|
|
7
|
-
task_pre_annotation:
|
|
8
|
-
entrypoint: plugin.upload.TaskPreAnnotation
|
|
9
|
-
ui_schema: |
|
|
10
|
-
Dumped FormKit Schema for upload plugin custom options
|
|
@@ -29,9 +29,9 @@ synapse_sdk/clients/agent/core.py,sha256=x2jgORTjT7pJY67SLuc-5lMG6CD5OWpy8UgGeTf
|
|
|
29
29
|
synapse_sdk/clients/agent/ray.py,sha256=JrwLyVOUDG2yYsbPrxyUtWbM-FWp9B6Bl_GdDby0rt8,1559
|
|
30
30
|
synapse_sdk/clients/agent/service.py,sha256=s7KuPK_DB1nr2VHrigttV1WyFonaGHNrPvU8loRxHcE,478
|
|
31
31
|
synapse_sdk/clients/backend/__init__.py,sha256=MC3pndBk-SPyW9L6WnrTozoub9-EK7auXFvPHCaxeFU,1209
|
|
32
|
-
synapse_sdk/clients/backend/annotation.py,sha256=
|
|
32
|
+
synapse_sdk/clients/backend/annotation.py,sha256=rtof8YVQ6M1wNm66EGbg6GRGDMAyY1YzkTnzN3GRT5o,1220
|
|
33
33
|
synapse_sdk/clients/backend/core.py,sha256=5XAOdo6JZ0drfk-FMPJ96SeTd9oja-VnTwzGXdvK7Bg,1027
|
|
34
|
-
synapse_sdk/clients/backend/data_collection.py,sha256=
|
|
34
|
+
synapse_sdk/clients/backend/data_collection.py,sha256=uI-_ByLh-Xez4VIIVRBO8FCNUpDcxhBcLxCVFb_aG7o,4104
|
|
35
35
|
synapse_sdk/clients/backend/hitl.py,sha256=na2mSXFud92p4zUEuagcDWk2klxO7xn-e86cm0VZEvs,709
|
|
36
36
|
synapse_sdk/clients/backend/integration.py,sha256=9LjkYcBpi7aog-MODSDS4RlmYahypu65qxBj-AcY7xc,2683
|
|
37
37
|
synapse_sdk/clients/backend/ml.py,sha256=JoPH9Ly2E3HJ7S5mdGLtcGq7ruQVVrYfWArogwZLlms,1193
|
|
@@ -89,9 +89,11 @@ synapse_sdk/plugins/categories/post_annotation/templates/plugin/post_annotation.
|
|
|
89
89
|
synapse_sdk/plugins/categories/pre_annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
90
90
|
synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
91
91
|
synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation.py,sha256=6ib3RmnGrjpsQ0e_G-mRH1lfFunQ3gh2M831vuDn7HU,344
|
|
92
|
-
synapse_sdk/plugins/categories/pre_annotation/
|
|
92
|
+
synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py,sha256=J_buXjizGGjme1U3Kn8QtvXIEbfJN_Iud6uX8t0AWq8,7490
|
|
93
|
+
synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml,sha256=A_yhaLm505FrOkNuy_mhqa6UUN3GSZphzJUiAN7rEoQ,239
|
|
93
94
|
synapse_sdk/plugins/categories/pre_annotation/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
94
95
|
synapse_sdk/plugins/categories/pre_annotation/templates/plugin/pre_annotation.py,sha256=HBHxHuv2gMBzDB2alFfrzI_SZ1Ztk6mo7eFbR5GqHKw,106
|
|
96
|
+
synapse_sdk/plugins/categories/pre_annotation/templates/plugin/to_task.py,sha256=fgmTNaPgk8MnElEds4A6LiSmktNTVTNR7jO7dM7FwbI,223
|
|
95
97
|
synapse_sdk/plugins/categories/smart_tool/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
96
98
|
synapse_sdk/plugins/categories/smart_tool/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
97
99
|
synapse_sdk/plugins/categories/smart_tool/actions/auto_label.py,sha256=fHiqA8ntmzjs2GMVMuByR7Clh2zhLie8OPF9B8OmwxM,1279
|
|
@@ -100,11 +102,9 @@ synapse_sdk/plugins/categories/smart_tool/templates/plugin/__init__.py,sha256=47
|
|
|
100
102
|
synapse_sdk/plugins/categories/smart_tool/templates/plugin/auto_label.py,sha256=eevNg0nOcYFR4z_L_R-sCvVOYoLWSAH1jwDkAf3YCjY,320
|
|
101
103
|
synapse_sdk/plugins/categories/upload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
102
104
|
synapse_sdk/plugins/categories/upload/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
103
|
-
synapse_sdk/plugins/categories/upload/actions/
|
|
104
|
-
synapse_sdk/plugins/categories/upload/
|
|
105
|
-
synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=1O0kMfkFMGYwnpBcttrlC9bu4xzU9docw2MBOq_Elmo,417
|
|
105
|
+
synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=zZx14F3789IJUH7BmZdXI43HXy0RdLknqT3aZk0WIHQ,18965
|
|
106
|
+
synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=kwHNWHFYbzDi1mEh40KozatPZbZGH44dlP0t0J7ejJw,483
|
|
106
107
|
synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
107
|
-
synapse_sdk/plugins/categories/upload/templates/plugin/task_pre_annotation.py,sha256=9XkUZu7USjVjDPufM0NlYmkdKfV7Hf_9v5GN1RgZzS0,350
|
|
108
108
|
synapse_sdk/plugins/categories/upload/templates/plugin/upload.py,sha256=IZU4sdSMSLKPCtlNqF7DP2howTdYR6hr74HCUZsGdPk,1559
|
|
109
109
|
synapse_sdk/plugins/templates/cookiecutter.json,sha256=NxOWk9A_v1pO0Ny4IYT9Cj5iiJ16--cIQrGC67QdR0I,396
|
|
110
110
|
synapse_sdk/plugins/templates/hooks/post_gen_project.py,sha256=jqlYkY1O2TxIR-Vh3gnwILYy8k-D39Xx66d2KNQVMCs,147
|
|
@@ -136,9 +136,9 @@ synapse_sdk/utils/storage/providers/__init__.py,sha256=x7RGwZryT2FpVxS7fGWryRVpq
|
|
|
136
136
|
synapse_sdk/utils/storage/providers/gcp.py,sha256=i2BQCu1Kej1If9SuNr2_lEyTcr5M_ncGITZrL0u5wEA,363
|
|
137
137
|
synapse_sdk/utils/storage/providers/s3.py,sha256=W94rQvhGRXti3R4mYP7gmU5pcyCQpGFIBLvxxqLVdRM,2231
|
|
138
138
|
synapse_sdk/utils/storage/providers/sftp.py,sha256=_8s9hf0JXIO21gvm-JVS00FbLsbtvly4c-ETLRax68A,1426
|
|
139
|
-
synapse_sdk-1.0.
|
|
140
|
-
synapse_sdk-1.0.
|
|
141
|
-
synapse_sdk-1.0.
|
|
142
|
-
synapse_sdk-1.0.
|
|
143
|
-
synapse_sdk-1.0.
|
|
144
|
-
synapse_sdk-1.0.
|
|
139
|
+
synapse_sdk-1.0.0a59.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
|
|
140
|
+
synapse_sdk-1.0.0a59.dist-info/METADATA,sha256=o12nDh_7n--A4zsPrm14q8c1ccDYZULnD6SgLIr8q3g,1303
|
|
141
|
+
synapse_sdk-1.0.0a59.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
142
|
+
synapse_sdk-1.0.0a59.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
|
|
143
|
+
synapse_sdk-1.0.0a59.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
|
|
144
|
+
synapse_sdk-1.0.0a59.dist-info/RECORD,,
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
|
-
from typing import Annotated
|
|
3
|
-
|
|
4
|
-
from pydantic import AfterValidator, BaseModel, field_validator
|
|
5
|
-
from pydantic_core import PydanticCustomError
|
|
6
|
-
|
|
7
|
-
from synapse_sdk.clients.exceptions import ClientError
|
|
8
|
-
from synapse_sdk.plugins.categories.base import Action
|
|
9
|
-
from synapse_sdk.plugins.categories.decorators import register_action
|
|
10
|
-
from synapse_sdk.plugins.enums import PluginCategory, RunMethod
|
|
11
|
-
from synapse_sdk.plugins.models import Run
|
|
12
|
-
from synapse_sdk.utils.pydantic.validators import non_blank
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class TaskDataAnnotationType(str, Enum):
|
|
16
|
-
FILE = 'file'
|
|
17
|
-
INFERENCE = 'inference'
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class TaskPreAnnotationRun(Run):
|
|
21
|
-
pass
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class TaskPreAnnotationParams(BaseModel):
|
|
25
|
-
"""TaskPreAnnotation action parameters.
|
|
26
|
-
|
|
27
|
-
Args:
|
|
28
|
-
name (str): The name of the action.
|
|
29
|
-
description (str | None): The description of the action.
|
|
30
|
-
project (int): The project ID.
|
|
31
|
-
data_collection (int): The data collection ID.
|
|
32
|
-
task_data_annotation_type (TaskDataAnnotationType): The type of task data annotation.
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
name: Annotated[str, AfterValidator(non_blank)]
|
|
36
|
-
description: str | None
|
|
37
|
-
project: int
|
|
38
|
-
data_collection: int
|
|
39
|
-
task_data_annotation_type: TaskDataAnnotationType
|
|
40
|
-
|
|
41
|
-
@field_validator('data_collection', mode='before')
|
|
42
|
-
@classmethod
|
|
43
|
-
def check_data_collection_exists(cls, value: str, info) -> str:
|
|
44
|
-
"""Validate synapse-backend collection exists."""
|
|
45
|
-
action = info.context['action']
|
|
46
|
-
client = action.client
|
|
47
|
-
try:
|
|
48
|
-
client.get_data_collection(value)
|
|
49
|
-
except ClientError:
|
|
50
|
-
raise PydanticCustomError('client_error', 'Error occurred while checking data collection exists.')
|
|
51
|
-
return value
|
|
52
|
-
|
|
53
|
-
@field_validator('project', mode='before')
|
|
54
|
-
@classmethod
|
|
55
|
-
def check_project_exists(cls, value: str, info) -> str:
|
|
56
|
-
"""Validate synapse-backend project exists."""
|
|
57
|
-
if not value:
|
|
58
|
-
return value
|
|
59
|
-
|
|
60
|
-
action = info.context['action']
|
|
61
|
-
client = action.client
|
|
62
|
-
try:
|
|
63
|
-
client.get_project(value)
|
|
64
|
-
except ClientError:
|
|
65
|
-
raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
|
|
66
|
-
return value
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
@register_action
|
|
70
|
-
class TaskPreAnnotationAction(Action):
|
|
71
|
-
"""TaskPreAnnotation action class.
|
|
72
|
-
|
|
73
|
-
* Annotate data to tasks.
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
name = 'task_pre_annotation'
|
|
77
|
-
category = PluginCategory.UPLOAD
|
|
78
|
-
method = RunMethod.JOB
|
|
79
|
-
run_class = TaskPreAnnotationRun
|
|
80
|
-
progress_categories = {
|
|
81
|
-
'generate_tasks': {
|
|
82
|
-
'proportion': 10,
|
|
83
|
-
},
|
|
84
|
-
'annotate_task_data': {
|
|
85
|
-
'proportion': 90,
|
|
86
|
-
},
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
def start(self):
|
|
90
|
-
"""Start task_pre_annotation action.
|
|
91
|
-
|
|
92
|
-
* Generate tasks.
|
|
93
|
-
* Annotate data to tasks.
|
|
94
|
-
"""
|
|
95
|
-
task_pre_annotation = self.get_task_pre_annotation()
|
|
96
|
-
task_pre_annotation.handle_annotate_data_from_files()
|
|
97
|
-
return {}
|
|
98
|
-
|
|
99
|
-
def get_task_pre_annotation(self):
|
|
100
|
-
"""Get task pre annotation entrypoint."""
|
|
101
|
-
return self.entrypoint()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|