synapse-sdk 1.0.0a73__py3-none-any.whl → 1.0.0a75__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/clients/backend/annotation.py +0 -4
- synapse_sdk/clients/backend/models.py +2 -0
- synapse_sdk/devtools/docs/sidebars.ts +8 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py +208 -32
- synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml +3 -0
- synapse_sdk/shared/enums.py +1 -0
- synapse_sdk/utils/converters/__init__.py +3 -1
- synapse_sdk/utils/converters/dm/__init__.py +109 -0
- synapse_sdk/utils/converters/dm/from_v1.py +415 -0
- synapse_sdk/utils/converters/dm/to_v1.py +254 -0
- synapse_sdk/utils/converters/pascal/__init__.py +0 -0
- synapse_sdk/utils/converters/pascal/from_dm.py +177 -0
- synapse_sdk/utils/converters/pascal/to_dm.py +135 -0
- synapse_sdk/utils/converters/yolo/from_dm.py +24 -18
- synapse_sdk/utils/converters/yolo/to_dm.py +185 -0
- synapse_sdk-1.0.0a75.dist-info/METADATA +72 -0
- {synapse_sdk-1.0.0a73.dist-info → synapse_sdk-1.0.0a75.dist-info}/RECORD +21 -14
- synapse_sdk-1.0.0a73.dist-info/METADATA +0 -37
- {synapse_sdk-1.0.0a73.dist-info → synapse_sdk-1.0.0a75.dist-info}/WHEEL +0 -0
- {synapse_sdk-1.0.0a73.dist-info → synapse_sdk-1.0.0a75.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0a73.dist-info → synapse_sdk-1.0.0a75.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-1.0.0a73.dist-info → synapse_sdk-1.0.0a75.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
import xml.etree.ElementTree as ET
|
|
5
|
+
from glob import glob
|
|
6
|
+
from typing import Any, List, Optional
|
|
7
|
+
|
|
8
|
+
from PIL import Image
|
|
9
|
+
|
|
10
|
+
from synapse_sdk.utils.converters import FromDMConverter
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FromDMToPascalConverter(FromDMConverter):
|
|
14
|
+
"""Convert DM format to Pascal VOC format."""
|
|
15
|
+
|
|
16
|
+
IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp']
|
|
17
|
+
|
|
18
|
+
def __init__(self, root_dir: str, is_categorized_dataset: bool = False):
|
|
19
|
+
super().__init__(root_dir, is_categorized_dataset)
|
|
20
|
+
self.class_names = set()
|
|
21
|
+
|
|
22
|
+
def find_image_for_base(self, img_dir: str, base: str) -> Optional[str]:
|
|
23
|
+
"""Find the image file for a given base name in the specified directory."""
|
|
24
|
+
for ext in self.IMG_EXTENSIONS:
|
|
25
|
+
img_path = os.path.join(img_dir, base + ext)
|
|
26
|
+
if os.path.exists(img_path):
|
|
27
|
+
return img_path
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
def build_pascal_xml(self, img_filename: str, img_size: tuple, objects: List[dict]) -> ET.ElementTree:
|
|
31
|
+
"""Build a Pascal VOC XML tree from image filename, size, and objects."""
|
|
32
|
+
folder = 'Images'
|
|
33
|
+
width, height, depth = img_size
|
|
34
|
+
annotation = ET.Element('annotation')
|
|
35
|
+
ET.SubElement(annotation, 'folder').text = folder
|
|
36
|
+
ET.SubElement(annotation, 'filename').text = img_filename
|
|
37
|
+
ET.SubElement(annotation, 'path').text = img_filename
|
|
38
|
+
source = ET.SubElement(annotation, 'source')
|
|
39
|
+
ET.SubElement(source, 'database').text = 'Unknown'
|
|
40
|
+
size = ET.SubElement(annotation, 'size')
|
|
41
|
+
ET.SubElement(size, 'width').text = str(width)
|
|
42
|
+
ET.SubElement(size, 'height').text = str(height)
|
|
43
|
+
ET.SubElement(size, 'depth').text = str(depth)
|
|
44
|
+
ET.SubElement(annotation, 'segmented').text = '0'
|
|
45
|
+
for obj in objects:
|
|
46
|
+
obj_elem = ET.SubElement(annotation, 'object')
|
|
47
|
+
ET.SubElement(obj_elem, 'name').text = obj['name']
|
|
48
|
+
ET.SubElement(obj_elem, 'pose').text = 'Unspecified'
|
|
49
|
+
ET.SubElement(obj_elem, 'truncated').text = '0'
|
|
50
|
+
ET.SubElement(obj_elem, 'difficult').text = '0'
|
|
51
|
+
bndbox = ET.SubElement(obj_elem, 'bndbox')
|
|
52
|
+
ET.SubElement(bndbox, 'xmin').text = str(obj['xmin'])
|
|
53
|
+
ET.SubElement(bndbox, 'ymin').text = str(obj['ymin'])
|
|
54
|
+
ET.SubElement(bndbox, 'xmax').text = str(obj['xmax'])
|
|
55
|
+
ET.SubElement(bndbox, 'ymax').text = str(obj['ymax'])
|
|
56
|
+
return ET.ElementTree(annotation)
|
|
57
|
+
|
|
58
|
+
def parse_dm_annotations(self, annotation: dict):
|
|
59
|
+
"""Parse DM annotations and convert to Pascal VOC format."""
|
|
60
|
+
objects = []
|
|
61
|
+
# Only include bounding_box (Pascal VOC does not support polyline/keypoint by default)
|
|
62
|
+
if 'bounding_box' in annotation:
|
|
63
|
+
for box in annotation['bounding_box']:
|
|
64
|
+
class_name = box['classification']
|
|
65
|
+
x, y, w, h = box['data']
|
|
66
|
+
xmin = int(round(x))
|
|
67
|
+
ymin = int(round(y))
|
|
68
|
+
xmax = int(round(x + w))
|
|
69
|
+
ymax = int(round(y + h))
|
|
70
|
+
objects.append({'name': class_name, 'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax})
|
|
71
|
+
self.class_names.add(class_name)
|
|
72
|
+
# polyline, keypoint 등은 무시
|
|
73
|
+
return objects
|
|
74
|
+
|
|
75
|
+
def _convert_split_dir(self, split_dir: str, split_name: str):
|
|
76
|
+
"""Convert a split dir (train/valid/test) to list of (xml_tree, xml_filename, img_src, img_name)."""
|
|
77
|
+
json_dir = os.path.join(split_dir, 'json')
|
|
78
|
+
img_dir = os.path.join(split_dir, 'original_files')
|
|
79
|
+
results = []
|
|
80
|
+
for jfile in glob(os.path.join(json_dir, '*.json')):
|
|
81
|
+
base = os.path.splitext(os.path.basename(jfile))[0]
|
|
82
|
+
img_path = self.find_image_for_base(img_dir, base)
|
|
83
|
+
if not img_path:
|
|
84
|
+
print(f'[{split_name}] Image for {base} not found, skipping.')
|
|
85
|
+
continue
|
|
86
|
+
with open(jfile, encoding='utf-8') as jf:
|
|
87
|
+
data = json.load(jf)
|
|
88
|
+
img_ann = data['images'][0]
|
|
89
|
+
with Image.open(img_path) as img:
|
|
90
|
+
width, height = img.size
|
|
91
|
+
depth = len(img.getbands())
|
|
92
|
+
objects = self.parse_dm_annotations(img_ann)
|
|
93
|
+
xml_tree = self.build_pascal_xml(os.path.basename(img_path), (width, height, depth), objects)
|
|
94
|
+
xml_filename = base + '.xml'
|
|
95
|
+
results.append((xml_tree, xml_filename, img_path, os.path.basename(img_path)))
|
|
96
|
+
return results
|
|
97
|
+
|
|
98
|
+
def _convert_root_dir(self):
|
|
99
|
+
"""Convert non-categorized dataset to list of (xml_tree, xml_filename, img_src, img_name)."""
|
|
100
|
+
json_dir = os.path.join(self.root_dir, 'json')
|
|
101
|
+
img_dir = os.path.join(self.root_dir, 'original_files')
|
|
102
|
+
results = []
|
|
103
|
+
for jfile in glob(os.path.join(json_dir, '*.json')):
|
|
104
|
+
base = os.path.splitext(os.path.basename(jfile))[0]
|
|
105
|
+
img_path = self.find_image_for_base(img_dir, base)
|
|
106
|
+
if not img_path:
|
|
107
|
+
print(f'[Pascal] Image for {base} not found, skipping.')
|
|
108
|
+
continue
|
|
109
|
+
with open(jfile, encoding='utf-8') as jf:
|
|
110
|
+
data = json.load(jf)
|
|
111
|
+
img_ann = data['images'][0]
|
|
112
|
+
with Image.open(img_path) as img:
|
|
113
|
+
width, height = img.size
|
|
114
|
+
depth = len(img.getbands())
|
|
115
|
+
objects = self.parse_dm_annotations(img_ann)
|
|
116
|
+
xml_tree = self.build_pascal_xml(os.path.basename(img_path), (width, height, depth), objects)
|
|
117
|
+
xml_filename = base + '.xml'
|
|
118
|
+
results.append((xml_tree, xml_filename, img_path, os.path.basename(img_path)))
|
|
119
|
+
return results
|
|
120
|
+
|
|
121
|
+
def convert(self) -> Any:
|
|
122
|
+
"""Converts DM format to Pascal VOC format.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
- If categorized: dict {split: list of (xml_tree, xml_filename, img_src, img_name)}
|
|
126
|
+
- If not: list of (xml_tree, xml_filename, img_src, img_name)
|
|
127
|
+
"""
|
|
128
|
+
self.class_names = set()
|
|
129
|
+
if self.is_categorized_dataset:
|
|
130
|
+
splits = self._validate_splits(['train', 'valid'], ['test'])
|
|
131
|
+
result = {}
|
|
132
|
+
for split, split_dir in splits.items():
|
|
133
|
+
result[split] = self._convert_split_dir(split_dir, split)
|
|
134
|
+
self.converted_data = result
|
|
135
|
+
return result
|
|
136
|
+
else:
|
|
137
|
+
self._validate_splits([], [])
|
|
138
|
+
result = self._convert_root_dir()
|
|
139
|
+
self.converted_data = result
|
|
140
|
+
return result
|
|
141
|
+
|
|
142
|
+
def save_to_folder(self, output_dir: Optional[str] = None):
|
|
143
|
+
"""Save all Pascal VOC XML/Images to output_dir (Annotations, Images).
|
|
144
|
+
- If categorized: per split under output_dir/{split}/{Annotations, Images}
|
|
145
|
+
- If not: directly under output_dir/{Annotations, Images}
|
|
146
|
+
"""
|
|
147
|
+
outdir = output_dir or self.root_dir
|
|
148
|
+
self.ensure_dir(outdir)
|
|
149
|
+
if self.converted_data is None:
|
|
150
|
+
self.converted_data = self.convert()
|
|
151
|
+
|
|
152
|
+
if self.is_categorized_dataset:
|
|
153
|
+
for split, entries in self.converted_data.items():
|
|
154
|
+
ann_dir = os.path.join(outdir, split, 'Annotations')
|
|
155
|
+
img_dir = os.path.join(outdir, split, 'Images')
|
|
156
|
+
os.makedirs(ann_dir, exist_ok=True)
|
|
157
|
+
os.makedirs(img_dir, exist_ok=True)
|
|
158
|
+
for xml_tree, xml_filename, img_src, img_name in entries:
|
|
159
|
+
xml_tree.write(os.path.join(ann_dir, xml_filename), encoding='utf-8', xml_declaration=True)
|
|
160
|
+
dst_path = os.path.join(img_dir, img_name)
|
|
161
|
+
if os.path.abspath(img_src) != os.path.abspath(dst_path):
|
|
162
|
+
shutil.copy(img_src, dst_path)
|
|
163
|
+
else:
|
|
164
|
+
ann_dir = os.path.join(outdir, 'Annotations')
|
|
165
|
+
img_dir = os.path.join(outdir, 'Images')
|
|
166
|
+
os.makedirs(ann_dir, exist_ok=True)
|
|
167
|
+
os.makedirs(img_dir, exist_ok=True)
|
|
168
|
+
for xml_tree, xml_filename, img_src, img_name in self.converted_data:
|
|
169
|
+
xml_tree.write(os.path.join(ann_dir, xml_filename), encoding='utf-8', xml_declaration=True)
|
|
170
|
+
dst_path = os.path.join(img_dir, img_name)
|
|
171
|
+
if os.path.abspath(img_src) != os.path.abspath(dst_path):
|
|
172
|
+
shutil.copy(img_src, dst_path)
|
|
173
|
+
# Save classes.txt
|
|
174
|
+
with open(os.path.join(outdir, 'classes.txt'), 'w', encoding='utf-8') as f:
|
|
175
|
+
for c in sorted(self.class_names):
|
|
176
|
+
f.write(f'{c}\n')
|
|
177
|
+
print(f'Pascal VOC data exported to {outdir}')
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import xml.etree.ElementTree as ET
|
|
3
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
from PIL import Image
|
|
6
|
+
|
|
7
|
+
from synapse_sdk.utils.converters import ToDMConverter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PascalToDMConverter(ToDMConverter):
|
|
11
|
+
"""Convert Pascal VOC formatted datasets to DM format."""
|
|
12
|
+
|
|
13
|
+
IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp']
|
|
14
|
+
|
|
15
|
+
def convert(self):
|
|
16
|
+
"""Convert the Pascal VOC dataset to DM format."""
|
|
17
|
+
if self.is_categorized_dataset:
|
|
18
|
+
splits = self._validate_splits(['train', 'valid'], ['test'])
|
|
19
|
+
all_split_data = {}
|
|
20
|
+
for split, split_dir in splits.items():
|
|
21
|
+
split_data = self._convert_pascal_split_to_dm(split_dir)
|
|
22
|
+
all_split_data[split] = split_data
|
|
23
|
+
self.converted_data = all_split_data
|
|
24
|
+
return all_split_data
|
|
25
|
+
else:
|
|
26
|
+
split_data = self._convert_pascal_split_to_dm(self.root_dir)
|
|
27
|
+
self.converted_data = split_data
|
|
28
|
+
return split_data
|
|
29
|
+
|
|
30
|
+
def _find_image_path(self, images_dir: str, filename: str) -> Optional[str]:
|
|
31
|
+
"""Find the image file in the specified directory."""
|
|
32
|
+
img_path = os.path.join(images_dir, filename)
|
|
33
|
+
if os.path.exists(img_path):
|
|
34
|
+
return img_path
|
|
35
|
+
base = os.path.splitext(filename)[0]
|
|
36
|
+
for ext in self.IMG_EXTENSIONS:
|
|
37
|
+
img_path = os.path.join(images_dir, base + ext)
|
|
38
|
+
if os.path.exists(img_path):
|
|
39
|
+
return img_path
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def _get_image_size(image_path: str) -> Tuple[int, int]:
|
|
44
|
+
"""Get the size of the image."""
|
|
45
|
+
with Image.open(image_path) as img:
|
|
46
|
+
return img.size
|
|
47
|
+
|
|
48
|
+
def _parse_pascal_xml(self, xml_path: str) -> Tuple[str, List[Dict[str, Any]]]:
|
|
49
|
+
"""Parse a Pascal VOC XML file and return the filename and objects."""
|
|
50
|
+
tree = ET.parse(xml_path)
|
|
51
|
+
root = tree.getroot()
|
|
52
|
+
filename_elem = root.find('filename')
|
|
53
|
+
filename = filename_elem.text if filename_elem is not None else None
|
|
54
|
+
objects = []
|
|
55
|
+
for obj in root.findall('object'):
|
|
56
|
+
name_elem = obj.find('name')
|
|
57
|
+
bndbox_elem = obj.find('bndbox')
|
|
58
|
+
if name_elem is None or bndbox_elem is None:
|
|
59
|
+
continue
|
|
60
|
+
class_name = name_elem.text
|
|
61
|
+
xmin_elem = bndbox_elem.find('xmin')
|
|
62
|
+
ymin_elem = bndbox_elem.find('ymin')
|
|
63
|
+
xmax_elem = bndbox_elem.find('xmax')
|
|
64
|
+
ymax_elem = bndbox_elem.find('ymax')
|
|
65
|
+
if any(elem is None for elem in [xmin_elem, ymin_elem, xmax_elem, ymax_elem]):
|
|
66
|
+
continue
|
|
67
|
+
xmin = int(float(xmin_elem.text))
|
|
68
|
+
ymin = int(float(ymin_elem.text))
|
|
69
|
+
xmax = int(float(xmax_elem.text))
|
|
70
|
+
ymax = int(float(ymax_elem.text))
|
|
71
|
+
width = xmax - xmin
|
|
72
|
+
height = ymax - ymin
|
|
73
|
+
objects.append({'classification': class_name, 'data': [xmin, ymin, width, height]})
|
|
74
|
+
return filename, objects
|
|
75
|
+
|
|
76
|
+
def _convert_pascal_split_to_dm(self, split_dir: str) -> Dict[str, Any]:
|
|
77
|
+
"""Convert a single Pascal VOC split directory to DM format."""
|
|
78
|
+
annotations_dir = None
|
|
79
|
+
for candidate in ['Annotations', 'annotations']:
|
|
80
|
+
candidate_path = os.path.join(split_dir, candidate)
|
|
81
|
+
if os.path.isdir(candidate_path):
|
|
82
|
+
annotations_dir = candidate_path
|
|
83
|
+
break
|
|
84
|
+
if annotations_dir is None:
|
|
85
|
+
raise FileNotFoundError(
|
|
86
|
+
f"No annotations directory found in {split_dir} (tried 'Annotations', 'annotations')."
|
|
87
|
+
)
|
|
88
|
+
images_dir = None
|
|
89
|
+
for candidate in ['Images', 'images', 'JPEGImages']:
|
|
90
|
+
candidate_path = os.path.join(split_dir, candidate)
|
|
91
|
+
if os.path.isdir(candidate_path):
|
|
92
|
+
images_dir = candidate_path
|
|
93
|
+
break
|
|
94
|
+
if images_dir is None:
|
|
95
|
+
raise FileNotFoundError(
|
|
96
|
+
f"No images directory found in {split_dir} (tried 'Images', 'images', 'JPEGImages')."
|
|
97
|
+
)
|
|
98
|
+
result = {}
|
|
99
|
+
for xml_filename in os.listdir(annotations_dir):
|
|
100
|
+
if not xml_filename.endswith('.xml'):
|
|
101
|
+
continue
|
|
102
|
+
xml_path = os.path.join(annotations_dir, xml_filename)
|
|
103
|
+
try:
|
|
104
|
+
filename, objects = self._parse_pascal_xml(xml_path)
|
|
105
|
+
if filename is None:
|
|
106
|
+
print(f'[WARNING] No filename found in {xml_filename}, skipping.')
|
|
107
|
+
continue
|
|
108
|
+
img_path = self._find_image_path(images_dir, filename)
|
|
109
|
+
if img_path is None:
|
|
110
|
+
print(f'[WARNING] Image not found for {filename}, skipping.')
|
|
111
|
+
continue
|
|
112
|
+
# Prepare DM annotation structure
|
|
113
|
+
dm_img = {
|
|
114
|
+
'bounding_box': [],
|
|
115
|
+
'polygon': [],
|
|
116
|
+
'keypoint': [],
|
|
117
|
+
'relation': [],
|
|
118
|
+
'group': [],
|
|
119
|
+
}
|
|
120
|
+
for obj in objects:
|
|
121
|
+
dm_img['bounding_box'].append({
|
|
122
|
+
'id': self._generate_unique_id(),
|
|
123
|
+
'classification': obj['classification'],
|
|
124
|
+
'attrs': [],
|
|
125
|
+
'data': obj['data'],
|
|
126
|
+
})
|
|
127
|
+
dm_json = {'images': [dm_img]}
|
|
128
|
+
result[os.path.basename(img_path)] = (dm_json, img_path)
|
|
129
|
+
except ET.ParseError as e:
|
|
130
|
+
print(f'[WARNING] Failed to parse {xml_filename}: {e}, skipping.')
|
|
131
|
+
continue
|
|
132
|
+
except Exception as e:
|
|
133
|
+
print(f'[WARNING] Error processing {xml_filename}: {e}, skipping.')
|
|
134
|
+
continue
|
|
135
|
+
return result
|
|
@@ -58,6 +58,22 @@ class FromDMToYOLOConverter(FromDMConverter):
|
|
|
58
58
|
h = y_max - y_min
|
|
59
59
|
return [cx, cy, w, h]
|
|
60
60
|
|
|
61
|
+
@staticmethod
|
|
62
|
+
def polygon_to_yolo_string(polygon: list, width: int, height: int):
|
|
63
|
+
"""Convert polygon points to normalized YOLO polygon format string (x1 y1 x2 y2 ...)."""
|
|
64
|
+
if not polygon or len(polygon) == 0:
|
|
65
|
+
return ''
|
|
66
|
+
|
|
67
|
+
coords = []
|
|
68
|
+
for point in polygon:
|
|
69
|
+
x, y = point
|
|
70
|
+
# Normalize coordinates to 0-1 range
|
|
71
|
+
x_norm = x / width
|
|
72
|
+
y_norm = y / height
|
|
73
|
+
coords.extend([f'{x_norm:.6f}', f'{y_norm:.6f}'])
|
|
74
|
+
|
|
75
|
+
return ' '.join(coords)
|
|
76
|
+
|
|
61
77
|
@staticmethod
|
|
62
78
|
def keypoints_to_yolo_string(keypoints: list, width: int, height: int):
|
|
63
79
|
"""Convert keypoints to normalized YOLO keypoint format string (x1 y1 v1 x2 y2 v2 ...)."""
|
|
@@ -112,16 +128,11 @@ class FromDMToYOLOConverter(FromDMConverter):
|
|
|
112
128
|
if 'polygon' in img_ann:
|
|
113
129
|
for poly in img_ann['polygon']:
|
|
114
130
|
cidx = self.class_map[poly['classification']]
|
|
115
|
-
|
|
116
|
-
if
|
|
131
|
+
poly_str = self.polygon_to_yolo_string(poly['data'], width, height)
|
|
132
|
+
if poly_str: # Only add if polygon is valid
|
|
133
|
+
label_lines.append(f'{cidx} {poly_str}')
|
|
134
|
+
else:
|
|
117
135
|
print(f'[{split_name}] Polygon for {base} is empty, skipping this polygon.')
|
|
118
|
-
continue
|
|
119
|
-
cx, cy, w, h = bbox
|
|
120
|
-
cx /= width
|
|
121
|
-
cy /= height
|
|
122
|
-
w /= width
|
|
123
|
-
h /= height
|
|
124
|
-
label_lines.append(f'{cidx} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}')
|
|
125
136
|
|
|
126
137
|
# keypoint
|
|
127
138
|
if 'keypoint' in img_ann:
|
|
@@ -189,16 +200,11 @@ class FromDMToYOLOConverter(FromDMConverter):
|
|
|
189
200
|
if 'polygon' in img_ann:
|
|
190
201
|
for poly in img_ann['polygon']:
|
|
191
202
|
cidx = self.class_map[poly['classification']]
|
|
192
|
-
|
|
193
|
-
if
|
|
203
|
+
poly_str = self.polygon_to_yolo_string(poly['data'], width, height)
|
|
204
|
+
if poly_str: # Only add if polygon is valid
|
|
205
|
+
label_lines.append(f'{cidx} {poly_str}')
|
|
206
|
+
else:
|
|
194
207
|
print(f'[single] Polygon for {base} is empty, skipping this polygon.')
|
|
195
|
-
continue
|
|
196
|
-
cx, cy, w, h = bbox
|
|
197
|
-
cx /= width
|
|
198
|
-
cy /= height
|
|
199
|
-
w /= width
|
|
200
|
-
h /= height
|
|
201
|
-
label_lines.append(f'{cidx} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}')
|
|
202
208
|
|
|
203
209
|
# keypoint
|
|
204
210
|
if 'keypoint' in img_ann:
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Any, Dict, List, Tuple
|
|
3
|
+
|
|
4
|
+
import yaml
|
|
5
|
+
from PIL import Image
|
|
6
|
+
|
|
7
|
+
from synapse_sdk.utils.converters import ToDMConverter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class YOLOToDMConverter(ToDMConverter):
|
|
11
|
+
"""Convert YOLO formatted datasets to DM format."""
|
|
12
|
+
|
|
13
|
+
IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp']
|
|
14
|
+
|
|
15
|
+
def convert(self):
|
|
16
|
+
"""Convert YOLO dataset to DM format."""
|
|
17
|
+
if self.is_categorized_dataset:
|
|
18
|
+
splits = self._validate_splits(['train', 'valid'], ['test'])
|
|
19
|
+
all_split_data = {}
|
|
20
|
+
for split, split_dir in splits.items():
|
|
21
|
+
split_data = self._convert_yolo_split_to_dm(split_dir)
|
|
22
|
+
all_split_data[split] = split_data
|
|
23
|
+
self.converted_data = all_split_data
|
|
24
|
+
return all_split_data
|
|
25
|
+
else:
|
|
26
|
+
split_data = self._convert_yolo_split_to_dm(self.root_dir)
|
|
27
|
+
self.converted_data = split_data
|
|
28
|
+
return split_data
|
|
29
|
+
|
|
30
|
+
def _find_image_path(self, images_dir, base):
|
|
31
|
+
"""Find the image file corresponding to the base name in the images directory."""
|
|
32
|
+
for ext in self.IMG_EXTENSIONS:
|
|
33
|
+
img_path = os.path.join(images_dir, base + ext)
|
|
34
|
+
if os.path.exists(img_path):
|
|
35
|
+
return img_path
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
@staticmethod
|
|
39
|
+
def _get_image_size(image_path: str) -> Tuple[int, int]:
|
|
40
|
+
"""Get the size of the image at the given path."""
|
|
41
|
+
with Image.open(image_path) as img:
|
|
42
|
+
return img.size
|
|
43
|
+
|
|
44
|
+
def _parse_yolo_line(self, line: str, class_names: List[str], img_size: Tuple[int, int]):
|
|
45
|
+
"""Parse a single line from a YOLO label file."""
|
|
46
|
+
parts = line.strip().split()
|
|
47
|
+
if len(parts) < 5:
|
|
48
|
+
return None # skip malformed
|
|
49
|
+
|
|
50
|
+
class_idx = int(parts[0])
|
|
51
|
+
class_name = class_names[class_idx] if class_idx < len(class_names) else f'class_{class_idx}'
|
|
52
|
+
img_w, img_h = img_size
|
|
53
|
+
|
|
54
|
+
# Check if it's a polygon (more than 5 values and even number of coordinates after class_id)
|
|
55
|
+
if len(parts) > 5 and (len(parts) - 1) % 2 == 0:
|
|
56
|
+
# Polygon format: class_id x1 y1 x2 y2 x3 y3 ... (normalized coordinates)
|
|
57
|
+
coords = []
|
|
58
|
+
for i in range(1, len(parts), 2):
|
|
59
|
+
x_norm = float(parts[i])
|
|
60
|
+
y_norm = float(parts[i + 1])
|
|
61
|
+
# Convert normalized coordinates to absolute coordinates
|
|
62
|
+
x_abs = int(x_norm * img_w)
|
|
63
|
+
y_abs = int(y_norm * img_h)
|
|
64
|
+
coords.append([x_abs, y_abs])
|
|
65
|
+
|
|
66
|
+
return {'type': 'polygon', 'classification': class_name, 'data': coords}
|
|
67
|
+
|
|
68
|
+
# Standard bounding box format
|
|
69
|
+
elif len(parts) == 5:
|
|
70
|
+
x_center, y_center, width, height = map(float, parts[1:5])
|
|
71
|
+
|
|
72
|
+
# Denormalize YOLO (x_center, y_center, w, h) to (left, top, w, h)
|
|
73
|
+
left = int((x_center - width / 2) * img_w)
|
|
74
|
+
top = int((y_center - height / 2) * img_h)
|
|
75
|
+
abs_w = int(width * img_w)
|
|
76
|
+
abs_h = int(height * img_h)
|
|
77
|
+
|
|
78
|
+
return {'type': 'bounding_box', 'classification': class_name, 'data': [left, top, abs_w, abs_h]}
|
|
79
|
+
|
|
80
|
+
# Keypoint format: class_id x_center y_center w h x1 y1 v1 x2 y2 v2 ...
|
|
81
|
+
elif len(parts) > 5 and (len(parts) - 5) % 3 == 0:
|
|
82
|
+
x_center, y_center, width, height = map(float, parts[1:5])
|
|
83
|
+
|
|
84
|
+
# Denormalize bounding box
|
|
85
|
+
left = int((x_center - width / 2) * img_w)
|
|
86
|
+
top = int((y_center - height / 2) * img_h)
|
|
87
|
+
abs_w = int(width * img_w)
|
|
88
|
+
abs_h = int(height * img_h)
|
|
89
|
+
|
|
90
|
+
keypoints = []
|
|
91
|
+
for i in range(5, len(parts), 3):
|
|
92
|
+
xk = int(float(parts[i]) * img_w)
|
|
93
|
+
yk = int(float(parts[i + 1]) * img_h)
|
|
94
|
+
vk = int(parts[i + 2])
|
|
95
|
+
keypoints.append([xk, yk, vk])
|
|
96
|
+
|
|
97
|
+
return {
|
|
98
|
+
'type': 'keypoint',
|
|
99
|
+
'classification': class_name,
|
|
100
|
+
'data': keypoints,
|
|
101
|
+
'bounding_box': [left, top, abs_w, abs_h],
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
def _convert_yolo_split_to_dm(self, split_dir: str) -> Dict[str, Any]:
|
|
107
|
+
"""Convert a single YOLO split directory to DM format."""
|
|
108
|
+
# Find image and label directories
|
|
109
|
+
images_dir = None
|
|
110
|
+
for candidate in ['images', 'img', 'imgs']:
|
|
111
|
+
candidate_path = os.path.join(split_dir, candidate)
|
|
112
|
+
if os.path.isdir(candidate_path):
|
|
113
|
+
images_dir = candidate_path
|
|
114
|
+
break
|
|
115
|
+
if images_dir is None:
|
|
116
|
+
raise FileNotFoundError(f"No images directory found in {split_dir} (tried 'images', 'img', 'imgs').")
|
|
117
|
+
|
|
118
|
+
labels_dir = os.path.join(split_dir, 'labels')
|
|
119
|
+
if not os.path.isdir(labels_dir):
|
|
120
|
+
raise FileNotFoundError(f"No labels directory found in {split_dir} (expected 'labels').")
|
|
121
|
+
|
|
122
|
+
# Load dataset.yaml
|
|
123
|
+
|
|
124
|
+
dataset_yaml_path = os.path.join(self.root_dir, 'dataset.yaml')
|
|
125
|
+
if not os.path.exists(dataset_yaml_path):
|
|
126
|
+
raise FileNotFoundError(f'No dataset.yaml file found in {split_dir}.')
|
|
127
|
+
with open(dataset_yaml_path, 'r', encoding='utf-8') as f:
|
|
128
|
+
dataset_yaml = yaml.safe_load(f)
|
|
129
|
+
class_names = dataset_yaml.get('names', [])
|
|
130
|
+
|
|
131
|
+
# Build DM data
|
|
132
|
+
result = {}
|
|
133
|
+
for label_filename in os.listdir(labels_dir):
|
|
134
|
+
if not label_filename.endswith('.txt'):
|
|
135
|
+
continue
|
|
136
|
+
base = os.path.splitext(label_filename)[0]
|
|
137
|
+
img_path = self._find_image_path(images_dir, base)
|
|
138
|
+
if img_path is None:
|
|
139
|
+
print(f'[WARNING] Image not found for label {label_filename}, skipping.')
|
|
140
|
+
continue
|
|
141
|
+
img_size = self._get_image_size(img_path)
|
|
142
|
+
label_path = os.path.join(labels_dir, label_filename)
|
|
143
|
+
with open(label_path, 'r', encoding='utf-8') as f:
|
|
144
|
+
label_lines = [line.strip() for line in f if line.strip()]
|
|
145
|
+
|
|
146
|
+
# Prepare DM annotation structure
|
|
147
|
+
dm_img = {
|
|
148
|
+
'bounding_box': [],
|
|
149
|
+
'polygon': [],
|
|
150
|
+
'keypoint': [],
|
|
151
|
+
'relation': [],
|
|
152
|
+
'group': [],
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
for line in label_lines:
|
|
156
|
+
ann = self._parse_yolo_line(line, class_names, img_size)
|
|
157
|
+
if ann is None:
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
if ann['type'] == 'bounding_box':
|
|
161
|
+
dm_img['bounding_box'].append({
|
|
162
|
+
'id': self._generate_unique_id(),
|
|
163
|
+
'classification': ann['classification'],
|
|
164
|
+
'attrs': [],
|
|
165
|
+
'data': ann['data'],
|
|
166
|
+
})
|
|
167
|
+
elif ann['type'] == 'polygon':
|
|
168
|
+
dm_img['polygon'].append({
|
|
169
|
+
'id': self._generate_unique_id(),
|
|
170
|
+
'classification': ann['classification'],
|
|
171
|
+
'attrs': [],
|
|
172
|
+
'data': ann['data'],
|
|
173
|
+
})
|
|
174
|
+
elif ann['type'] == 'keypoint':
|
|
175
|
+
dm_img['keypoint'].append({
|
|
176
|
+
'id': self._generate_unique_id(),
|
|
177
|
+
'classification': ann['classification'],
|
|
178
|
+
'attrs': [],
|
|
179
|
+
'data': ann['data'],
|
|
180
|
+
'bounding_box': ann['bounding_box'],
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
dm_json = {'images': [dm_img]}
|
|
184
|
+
result[os.path.basename(img_path)] = (dm_json, img_path)
|
|
185
|
+
return result
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: synapse-sdk
|
|
3
|
+
Version: 1.0.0a75
|
|
4
|
+
Summary: synapse sdk
|
|
5
|
+
Author-email: datamaker <developer@datamaker.io>
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: boto3
|
|
12
|
+
Requires-Dist: click
|
|
13
|
+
Requires-Dist: cookiecutter
|
|
14
|
+
Requires-Dist: requests
|
|
15
|
+
Requires-Dist: tqdm
|
|
16
|
+
Requires-Dist: python-dotenv
|
|
17
|
+
Requires-Dist: pyyaml
|
|
18
|
+
Requires-Dist: pydantic
|
|
19
|
+
Requires-Dist: pyjwt
|
|
20
|
+
Requires-Dist: universal-pathlib
|
|
21
|
+
Requires-Dist: fsspec[gcs,s3,sftp]
|
|
22
|
+
Requires-Dist: inquirer
|
|
23
|
+
Requires-Dist: pillow
|
|
24
|
+
Provides-Extra: all
|
|
25
|
+
Requires-Dist: ray[all]; extra == "all"
|
|
26
|
+
Requires-Dist: python-nmap; extra == "all"
|
|
27
|
+
Requires-Dist: hyperopt; extra == "all"
|
|
28
|
+
Requires-Dist: bayesian-optimization==1.4.3; extra == "all"
|
|
29
|
+
Provides-Extra: dashboard
|
|
30
|
+
Requires-Dist: fastapi>=0.115.13; extra == "dashboard"
|
|
31
|
+
Requires-Dist: uvicorn[standard]>=0.34.3; extra == "dashboard"
|
|
32
|
+
Requires-Dist: websockets>=15.0.1; extra == "dashboard"
|
|
33
|
+
Requires-Dist: aiofiles>=24.1.0; extra == "dashboard"
|
|
34
|
+
Provides-Extra: test
|
|
35
|
+
Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
36
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
37
|
+
Requires-Dist: pytest-mock>=3.10.0; extra == "test"
|
|
38
|
+
Requires-Dist: pytest-timeout>=2.1.0; extra == "test"
|
|
39
|
+
Requires-Dist: pytest-xdist>=3.0.0; extra == "test"
|
|
40
|
+
Requires-Dist: pytest-html>=3.1.0; extra == "test"
|
|
41
|
+
Requires-Dist: pytest-json-report>=1.5.0; extra == "test"
|
|
42
|
+
Requires-Dist: requests-mock>=1.10.0; extra == "test"
|
|
43
|
+
Requires-Dist: responses>=0.25.0; extra == "test"
|
|
44
|
+
Provides-Extra: dev
|
|
45
|
+
Requires-Dist: pre-commit; extra == "dev"
|
|
46
|
+
Dynamic: license-file
|
|
47
|
+
|
|
48
|
+
# 🧠 Synapse SDK
|
|
49
|
+
|
|
50
|
+

|
|
51
|
+

|
|
52
|
+
|
|
53
|
+
A Python SDK for building and managing ML plugins, data annotation workflows, and AI agents.
|
|
54
|
+
|
|
55
|
+
## ✨ Features
|
|
56
|
+
|
|
57
|
+
- **🔌 Plugin System**: Create and manage ML plugins with categories like neural networks, data validation, and export tools
|
|
58
|
+
- **🤖 Agent Management**: Backend and Ray-based agent clients for distributed AI workflows
|
|
59
|
+
- **🔄 Data Converters**: Convert between formats (COCO, Pascal VOC, YOLO) and annotation schemas
|
|
60
|
+
- **🛠️ Development Tools**: Interactive web dashboard for monitoring and debugging
|
|
61
|
+
- **⚡ CLI Interface**: Command-line tool for configuration, plugin management, and development
|
|
62
|
+
|
|
63
|
+
## 🚀 Quick Start
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install synapse-sdk
|
|
67
|
+
synapse --help
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## 📚 Documentation
|
|
71
|
+
|
|
72
|
+
*Docs [https://docs.synapse.sh](https://docs.synapse.sh)*
|