synapse-sdk 1.0.0a79__py3-none-any.whl → 1.0.0a81__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/shared/enums.py +60 -0
- synapse_sdk/utils/converters/coco/from_dm.py +56 -3
- synapse_sdk/utils/converters/coco/to_dm.py +102 -0
- synapse_sdk/utils/converters/dm/__init__.py +24 -76
- synapse_sdk/utils/converters/dm/from_v1.py +300 -88
- synapse_sdk/utils/converters/dm/to_v1.py +135 -22
- synapse_sdk/utils/converters/pascal/from_dm.py +48 -3
- synapse_sdk/utils/converters/pascal/to_dm.py +80 -1
- synapse_sdk/utils/converters/yolo/to_dm.py +12 -8
- {synapse_sdk-1.0.0a79.dist-info → synapse_sdk-1.0.0a81.dist-info}/METADATA +1 -1
- {synapse_sdk-1.0.0a79.dist-info → synapse_sdk-1.0.0a81.dist-info}/RECORD +15 -15
- {synapse_sdk-1.0.0a79.dist-info → synapse_sdk-1.0.0a81.dist-info}/WHEEL +0 -0
- {synapse_sdk-1.0.0a79.dist-info → synapse_sdk-1.0.0a81.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0a79.dist-info → synapse_sdk-1.0.0a81.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-1.0.0a79.dist-info → synapse_sdk-1.0.0a81.dist-info}/top_level.txt +0 -0
synapse_sdk/shared/enums.py
CHANGED
|
@@ -7,3 +7,63 @@ class Context(str, Enum):
|
|
|
7
7
|
WARNING = 'warning'
|
|
8
8
|
DANGER = 'danger'
|
|
9
9
|
ERROR = 'error'
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SupportedTools(Enum):
|
|
13
|
+
"""Enum for supported annotation tools.
|
|
14
|
+
|
|
15
|
+
* TODO: Need dynamic configuration by referencing apps/annotation/categories/{file_type}/settings.py.
|
|
16
|
+
* Currently difficult to configure due to non-standardized prompt file types.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
BOUNDING_BOX = 'bounding_box', 'bounding_box'
|
|
20
|
+
NAMED_ENTITY = 'named_entity', 'named_entity'
|
|
21
|
+
CLASSIFICATION = 'classification', 'classification'
|
|
22
|
+
POLYLINE = 'polyline', 'polyline'
|
|
23
|
+
KEYPOINT = 'keypoint', 'keypoint'
|
|
24
|
+
BOUNDING_BOX_3D = '3d_bounding_box', '3d_bounding_box'
|
|
25
|
+
IMAGE_SEGMENTATION = 'segmentation', 'image_segmentation'
|
|
26
|
+
VIDEO_SEGMENTATION = 'segmentation', 'video_segmentation'
|
|
27
|
+
SEGMENTATION_3D = '3d_segmentation', '3d_segmentation'
|
|
28
|
+
POLYGON = 'polygon', 'polygon'
|
|
29
|
+
RELATION = 'relation', 'relation'
|
|
30
|
+
GROUP = 'group', 'group'
|
|
31
|
+
PROMPT = 'prompt', 'prompt'
|
|
32
|
+
ANSWER = 'answer', 'answer'
|
|
33
|
+
|
|
34
|
+
def __init__(self, annotation_tool, method_name):
|
|
35
|
+
self.annotation_tool = annotation_tool
|
|
36
|
+
self.method_name = method_name
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def get_all_values(cls):
|
|
40
|
+
"""Get all tool values as a list."""
|
|
41
|
+
return [tool.value for tool in cls]
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def get_tools_for_file_type(cls, file_type):
|
|
45
|
+
"""Get tools supported for a specific file type."""
|
|
46
|
+
basic_tools = [cls.RELATION, cls.GROUP, cls.CLASSIFICATION]
|
|
47
|
+
|
|
48
|
+
if file_type == 'image':
|
|
49
|
+
basic_tools.extend([
|
|
50
|
+
cls.BOUNDING_BOX,
|
|
51
|
+
cls.POLYLINE,
|
|
52
|
+
cls.KEYPOINT,
|
|
53
|
+
cls.IMAGE_SEGMENTATION,
|
|
54
|
+
cls.POLYGON,
|
|
55
|
+
])
|
|
56
|
+
elif file_type == 'video':
|
|
57
|
+
basic_tools.extend([
|
|
58
|
+
cls.BOUNDING_BOX,
|
|
59
|
+
cls.POLYLINE,
|
|
60
|
+
cls.KEYPOINT,
|
|
61
|
+
cls.VIDEO_SEGMENTATION,
|
|
62
|
+
cls.POLYGON,
|
|
63
|
+
])
|
|
64
|
+
elif file_type == 'pcd':
|
|
65
|
+
basic_tools.extend([cls.BOUNDING_BOX_3D, cls.SEGMENTATION_3D])
|
|
66
|
+
elif file_type == 'text':
|
|
67
|
+
basic_tools.extend([cls.PROMPT, cls.ANSWER, cls.NAMED_ENTITY])
|
|
68
|
+
|
|
69
|
+
return basic_tools
|
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
import shutil
|
|
5
5
|
from glob import glob
|
|
6
|
-
from typing import Any, Dict
|
|
6
|
+
from typing import Any, Dict, IO
|
|
7
7
|
|
|
8
8
|
from PIL import Image
|
|
9
9
|
from tqdm import tqdm
|
|
@@ -22,14 +22,23 @@ class FromDMToCOCOConverter(FromDMConverter):
|
|
|
22
22
|
# 'audio': ['.wav', '.mp3', ...]
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
-
def __init__(
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
root_dir=None,
|
|
28
|
+
info_dict=None,
|
|
29
|
+
licenses_list=None,
|
|
30
|
+
data_type='img',
|
|
31
|
+
is_categorized_dataset=False,
|
|
32
|
+
is_single_conversion=False,
|
|
33
|
+
):
|
|
26
34
|
"""Args:
|
|
27
35
|
root_dir (str): Root directory containing data.
|
|
28
36
|
info_dict, licenses_list: COCO metadata.
|
|
29
37
|
data_type (str): Which data type to use (default: 'img').
|
|
30
38
|
is_categorized_dataset (bool): Whether to handle train, test, valid splits.
|
|
39
|
+
is_single_conversion (bool): Whether to use single file conversion mode.
|
|
31
40
|
"""
|
|
32
|
-
super().__init__(root_dir, is_categorized_dataset)
|
|
41
|
+
super().__init__(root_dir, is_categorized_dataset, is_single_conversion)
|
|
33
42
|
self.data_type = data_type
|
|
34
43
|
self.info_dict = info_dict or self._default_info()
|
|
35
44
|
self.licenses_list = licenses_list or self._default_licenses()
|
|
@@ -267,3 +276,47 @@ class FromDMToCOCOConverter(FromDMConverter):
|
|
|
267
276
|
shutil.copy(src_path, dst_path)
|
|
268
277
|
else:
|
|
269
278
|
print(f'[WARNING] Image not found: {src_path}')
|
|
279
|
+
|
|
280
|
+
def convert_single_file(self, data: Dict[str, Any], original_file: IO) -> Dict[str, Any]:
|
|
281
|
+
"""Convert a single DM data dict and corresponding image file object to COCO format.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
data: DM format data dictionary (JSON content)
|
|
285
|
+
original_file: File object for the corresponding original image
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
Dictionary containing COCO format data for the single file
|
|
289
|
+
"""
|
|
290
|
+
if not self.is_single_conversion:
|
|
291
|
+
raise RuntimeError('convert_single_file is only available when is_single_conversion=True')
|
|
292
|
+
|
|
293
|
+
self.reset_state()
|
|
294
|
+
self.coco_dict = {
|
|
295
|
+
'info': self.info_dict,
|
|
296
|
+
'licenses': self.licenses_list,
|
|
297
|
+
'images': [],
|
|
298
|
+
'annotations': [],
|
|
299
|
+
'categories': [],
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
# Process the image file
|
|
303
|
+
with Image.open(original_file) as im:
|
|
304
|
+
width, height = im.size
|
|
305
|
+
|
|
306
|
+
image_info = {
|
|
307
|
+
'id': self.img_id,
|
|
308
|
+
'file_name': getattr(original_file, 'name', 'image.jpg'),
|
|
309
|
+
'width': width,
|
|
310
|
+
'height': height,
|
|
311
|
+
'license': self.license_id,
|
|
312
|
+
}
|
|
313
|
+
self.coco_dict['images'].append(image_info)
|
|
314
|
+
|
|
315
|
+
# Process annotations from the first (and only) image in data
|
|
316
|
+
if 'images' in data and len(data['images']) > 0:
|
|
317
|
+
anns = data['images'][0]
|
|
318
|
+
self._process_polylines(anns)
|
|
319
|
+
self._process_bboxes(anns)
|
|
320
|
+
self._process_keypoints(anns)
|
|
321
|
+
|
|
322
|
+
return self.coco_dict
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
from typing import IO, Any, Dict
|
|
3
4
|
|
|
4
5
|
from synapse_sdk.utils.converters import ToDMConverter
|
|
5
6
|
|
|
@@ -7,6 +8,9 @@ from synapse_sdk.utils.converters import ToDMConverter
|
|
|
7
8
|
class COCOToDMConverter(ToDMConverter):
|
|
8
9
|
"""Convert COCO format annotations to DM (Data Manager) format."""
|
|
9
10
|
|
|
11
|
+
def __init__(self, root_dir: str = None, is_categorized_dataset: bool = False, is_single_conversion: bool = False):
|
|
12
|
+
super().__init__(root_dir, is_categorized_dataset, is_single_conversion)
|
|
13
|
+
|
|
10
14
|
def convert(self):
|
|
11
15
|
if self.is_categorized_dataset:
|
|
12
16
|
splits = self._validate_splits(['train', 'valid'], ['test'])
|
|
@@ -111,3 +115,101 @@ class COCOToDMConverter(ToDMConverter):
|
|
|
111
115
|
dm_json = {'images': [dm_img]}
|
|
112
116
|
result[img_filename] = (dm_json, img_path)
|
|
113
117
|
return result
|
|
118
|
+
|
|
119
|
+
def convert_single_file(self, data: Dict[str, Any], original_file: IO) -> Dict[str, Any]:
|
|
120
|
+
"""Convert a single COCO annotation data and corresponding image to DM format.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
data: COCO format data dictionary (JSON content)
|
|
124
|
+
original_file: File object for the corresponding original image
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
Dictionary containing DM format data for the single file
|
|
128
|
+
"""
|
|
129
|
+
if not self.is_single_conversion:
|
|
130
|
+
raise RuntimeError('convert_single_file is only available when is_single_conversion=True')
|
|
131
|
+
|
|
132
|
+
images = data.get('images', [])
|
|
133
|
+
annotations = data.get('annotations', [])
|
|
134
|
+
categories = data.get('categories', [])
|
|
135
|
+
|
|
136
|
+
if not images:
|
|
137
|
+
raise ValueError('No images found in COCO data')
|
|
138
|
+
|
|
139
|
+
# Get file name from original_file
|
|
140
|
+
img_path = getattr(original_file, 'name', None)
|
|
141
|
+
if not img_path:
|
|
142
|
+
raise ValueError('original_file must have a "name" attribute representing its path or filename.')
|
|
143
|
+
img_basename = os.path.basename(img_path)
|
|
144
|
+
|
|
145
|
+
# Find the matching image info in COCO 'images' section by comparing file name
|
|
146
|
+
# COCO image dicts might use 'file_name', 'filename', or similar
|
|
147
|
+
matched_img = None
|
|
148
|
+
for img in images:
|
|
149
|
+
for key in ['file_name', 'filename', 'name']:
|
|
150
|
+
if key in img and os.path.basename(img[key]) == img_basename:
|
|
151
|
+
matched_img = img
|
|
152
|
+
break
|
|
153
|
+
if matched_img:
|
|
154
|
+
break
|
|
155
|
+
|
|
156
|
+
if not matched_img:
|
|
157
|
+
raise ValueError(f'No matching image found in COCO data for file: {img_basename}')
|
|
158
|
+
|
|
159
|
+
img_id = matched_img['id']
|
|
160
|
+
print('img_id : ', img_id)
|
|
161
|
+
cat_map = {cat['id']: cat for cat in categories}
|
|
162
|
+
anns = [ann for ann in annotations if ann['image_id'] == img_id]
|
|
163
|
+
|
|
164
|
+
dm_img = {
|
|
165
|
+
'bounding_box': [],
|
|
166
|
+
'keypoint': [],
|
|
167
|
+
'relation': [],
|
|
168
|
+
'group': [],
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
bbox_ids = []
|
|
172
|
+
for ann in anns:
|
|
173
|
+
cat = cat_map.get(ann['category_id'], {})
|
|
174
|
+
if 'bbox' in ann and ann['bbox']:
|
|
175
|
+
bbox_id = self._generate_unique_id()
|
|
176
|
+
bbox_ids.append(bbox_id)
|
|
177
|
+
dm_img['bounding_box'].append({
|
|
178
|
+
'id': bbox_id,
|
|
179
|
+
'classification': cat.get('name', str(ann['category_id'])),
|
|
180
|
+
'attrs': ann.get('attrs', []),
|
|
181
|
+
'data': list(ann['bbox']),
|
|
182
|
+
})
|
|
183
|
+
|
|
184
|
+
for ann in anns:
|
|
185
|
+
cat = cat_map.get(ann['category_id'], {})
|
|
186
|
+
attrs = ann.get('attrs', [])
|
|
187
|
+
if 'keypoints' in ann and ann['keypoints']:
|
|
188
|
+
kp_names = cat.get('keypoints', [])
|
|
189
|
+
kps = ann['keypoints']
|
|
190
|
+
keypoint_ids = []
|
|
191
|
+
for idx in range(min(len(kps) // 3, len(kp_names))):
|
|
192
|
+
x, y, _ = kps[idx * 3 : idx * 3 + 3]
|
|
193
|
+
kp_id = self._generate_unique_id()
|
|
194
|
+
keypoint_ids.append(kp_id)
|
|
195
|
+
dm_img['keypoint'].append({
|
|
196
|
+
'id': kp_id,
|
|
197
|
+
'classification': kp_names[idx] if idx < len(kp_names) else f'keypoint_{idx}',
|
|
198
|
+
'attrs': attrs,
|
|
199
|
+
'data': [x, y],
|
|
200
|
+
})
|
|
201
|
+
group_ids = bbox_ids + keypoint_ids
|
|
202
|
+
if group_ids:
|
|
203
|
+
dm_img['group'].append({
|
|
204
|
+
'id': self._generate_unique_id(),
|
|
205
|
+
'classification': cat.get('name', str(ann['category_id'])),
|
|
206
|
+
'attrs': attrs,
|
|
207
|
+
'data': group_ids,
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
dm_json = {'images': [dm_img]}
|
|
211
|
+
return {
|
|
212
|
+
'dm_json': dm_json,
|
|
213
|
+
'image_path': img_path,
|
|
214
|
+
'image_name': img_basename,
|
|
215
|
+
}
|
|
@@ -1,95 +1,43 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
from synapse_sdk.shared.enums import SupportedTools
|
|
2
5
|
|
|
3
6
|
|
|
4
7
|
class BaseDMConverter(ABC):
|
|
5
8
|
"""Base class for DM format converters."""
|
|
6
9
|
|
|
7
|
-
SUPPORTED_TOOLS =
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
'polyline',
|
|
12
|
-
'keypoint',
|
|
13
|
-
'3d_bounding_box',
|
|
14
|
-
'segmentation',
|
|
15
|
-
'polygon',
|
|
16
|
-
'relation',
|
|
17
|
-
'group',
|
|
18
|
-
]
|
|
10
|
+
SUPPORTED_TOOLS = SupportedTools.get_all_values()
|
|
11
|
+
|
|
12
|
+
def __init__(self, file_type=None):
|
|
13
|
+
"""Initialize the base converter.
|
|
19
14
|
|
|
20
|
-
|
|
21
|
-
|
|
15
|
+
Args:
|
|
16
|
+
file_type (str, optional): Type of file being converted (image, video, pcd, text, audio)
|
|
17
|
+
"""
|
|
18
|
+
self.file_type = file_type
|
|
22
19
|
self.tool_processors = self._setup_tool_processors()
|
|
23
20
|
|
|
24
21
|
def _setup_tool_processors(self):
|
|
25
|
-
"""Setup tool processor mapping."""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
'named_entity': self._process_named_entity,
|
|
29
|
-
'classification': self._process_classification,
|
|
30
|
-
'polyline': self._process_polyline,
|
|
31
|
-
'keypoint': self._process_keypoint,
|
|
32
|
-
'3d_bounding_box': self._process_3d_bounding_box,
|
|
33
|
-
'segmentation': self._process_segmentation,
|
|
34
|
-
'polygon': self._process_polygon,
|
|
35
|
-
'relation': self._process_relation,
|
|
36
|
-
'group': self._process_group,
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
@abstractmethod
|
|
40
|
-
def convert(self):
|
|
41
|
-
"""Convert data from one format to another."""
|
|
42
|
-
pass
|
|
43
|
-
|
|
44
|
-
@abstractmethod
|
|
45
|
-
def _process_bounding_box(self, *args, **kwargs):
|
|
46
|
-
"""Process bounding box annotation."""
|
|
47
|
-
pass
|
|
48
|
-
|
|
49
|
-
@abstractmethod
|
|
50
|
-
def _process_named_entity(self, *args, **kwargs):
|
|
51
|
-
"""Process named entity annotation."""
|
|
52
|
-
pass
|
|
53
|
-
|
|
54
|
-
@abstractmethod
|
|
55
|
-
def _process_classification(self, *args, **kwargs):
|
|
56
|
-
"""Process classification annotation."""
|
|
57
|
-
pass
|
|
22
|
+
"""Setup tool processor mapping dynamically based on file_type."""
|
|
23
|
+
if not self.file_type:
|
|
24
|
+
return {}
|
|
58
25
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
"""Process polyline annotation."""
|
|
62
|
-
pass
|
|
26
|
+
processors = {}
|
|
27
|
+
tools = SupportedTools.get_tools_for_file_type(self.file_type)
|
|
63
28
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
pass
|
|
29
|
+
for tool in tools:
|
|
30
|
+
# For other tools, use generic method names
|
|
31
|
+
method_name = f'_convert_{tool.method_name}'
|
|
68
32
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
"""Process 3D bounding box annotation."""
|
|
72
|
-
pass
|
|
33
|
+
if hasattr(self, method_name):
|
|
34
|
+
processors[tool.annotation_tool] = getattr(self, method_name)
|
|
73
35
|
|
|
74
|
-
|
|
75
|
-
def _process_segmentation(self, *args, **kwargs):
|
|
76
|
-
"""Process segmentation annotation."""
|
|
77
|
-
pass
|
|
36
|
+
return processors
|
|
78
37
|
|
|
79
38
|
@abstractmethod
|
|
80
|
-
def
|
|
81
|
-
"""
|
|
82
|
-
pass
|
|
83
|
-
|
|
84
|
-
@abstractmethod
|
|
85
|
-
def _process_relation(self, *args, **kwargs):
|
|
86
|
-
"""Process relation annotation."""
|
|
87
|
-
pass
|
|
88
|
-
|
|
89
|
-
@abstractmethod
|
|
90
|
-
def _process_group(self, *args, **kwargs):
|
|
91
|
-
"""Process group annotation."""
|
|
92
|
-
pass
|
|
39
|
+
def convert(self):
|
|
40
|
+
"""Convert data from one format to another."""
|
|
93
41
|
|
|
94
42
|
def _handle_unknown_tool(self, tool_type, item_id=None):
|
|
95
43
|
"""Handle unknown tool types with consistent warning message."""
|