synapse-sdk 1.0.0a79__py3-none-any.whl → 1.0.0a81__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

@@ -4,13 +4,14 @@ from . import BaseDMConverter
4
4
  class DMV1ToV2Converter(BaseDMConverter):
5
5
  """DM v1 to v2 format converter class."""
6
6
 
7
- def __init__(self, old_dm_data={}):
7
+ def __init__(self, old_dm_data={}, file_type=None):
8
8
  """Initialize the converter.
9
9
 
10
10
  Args:
11
11
  old_dm_data (dict): DM v1 format data to be converted
12
+ file_type (str, optional): Type of file being converted
12
13
  """
13
- super().__init__()
14
+ super().__init__(file_type)
14
15
  self.old_dm_data = old_dm_data
15
16
  self.classification_info = {}
16
17
  self.media_data = {}
@@ -29,8 +30,16 @@ class DMV1ToV2Converter(BaseDMConverter):
29
30
  # Extract media IDs from annotations key
30
31
  media_ids = list(old_dm_data.get('annotations', {}).keys())
31
32
 
33
+ # If file_type is not specified, try to detect from media_ids
34
+ if not self.file_type and media_ids:
35
+ detected_file_type = self._detect_file_type(media_ids[0])
36
+ if detected_file_type:
37
+ self.file_type = detected_file_type
38
+ # Re-setup tool processors with detected file_type
39
+ self.tool_processors = self._setup_tool_processors()
40
+
32
41
  for media_id in media_ids:
33
- self._process_media_item(old_dm_data, media_id)
42
+ self._convert_media_item(old_dm_data, media_id)
34
43
 
35
44
  # Build final result (put classification at the front)
36
45
  result = {'classification': self.classification_info}
@@ -38,7 +47,13 @@ class DMV1ToV2Converter(BaseDMConverter):
38
47
 
39
48
  return result
40
49
 
41
- def _process_media_item(self, old_dm_data, media_id):
50
+ def _detect_file_type(self, media_id):
51
+ """Detect file type from media ID."""
52
+ if '_' in media_id:
53
+ return media_id.split('_')[0]
54
+ return media_id
55
+
56
+ def _convert_media_item(self, old_dm_data, media_id):
42
57
  """Process a single media item.
43
58
 
44
59
  Args:
@@ -53,17 +68,28 @@ class DMV1ToV2Converter(BaseDMConverter):
53
68
  self.media_data[media_type_plural] = []
54
69
 
55
70
  # Create id -> class and tool mappings
56
- id_to_class = {
57
- annotation['id']: annotation['classification']['class']
58
- for annotation in old_dm_data['annotations'][media_id]
59
- }
71
+ annotations = old_dm_data.get('annotations', {}).get(media_id, [])
60
72
 
61
- id_to_tool = {annotation['id']: annotation['tool'] for annotation in old_dm_data['annotations'][media_id]}
73
+ id_to_class = {}
74
+ id_to_tool = {}
75
+ for annotation in annotations:
76
+ id_to_class[annotation['id']] = annotation['classification']['class']
77
+ id_to_tool[annotation['id']] = annotation['tool']
62
78
 
63
79
  # Create id -> full classification mapping (including additional attributes)
64
- id_to_full_classification = {
65
- annotation['id']: annotation['classification'] for annotation in old_dm_data['annotations'][media_id]
66
- }
80
+ id_to_full_classification = {annotation['id']: annotation['classification'] for annotation in annotations}
81
+
82
+ # Collect all classifications from annotations (regardless of whether they have data)
83
+ for annotation in annotations:
84
+ tool_type = annotation['tool']
85
+ classification = annotation['classification']['class']
86
+
87
+ if tool_type not in self.classification_info:
88
+ self.classification_info[tool_type] = []
89
+
90
+ # Add only non-duplicate classifications
91
+ if classification and classification not in self.classification_info[tool_type]:
92
+ self.classification_info[tool_type].append(classification)
67
93
 
68
94
  # Initialize current media item
69
95
  media_item = {}
@@ -80,17 +106,9 @@ class DMV1ToV2Converter(BaseDMConverter):
80
106
  tool_type = id_to_tool.get(item_id, '')
81
107
  classification = id_to_class.get(item_id, '')
82
108
 
83
- # Collect classification info (maintain existing ID)
84
- if tool_type not in self.classification_info:
85
- self.classification_info[tool_type] = []
86
-
87
- # Add only non-duplicate classifications
88
- if classification and classification not in self.classification_info[tool_type]:
89
- self.classification_info[tool_type].append(classification)
90
-
91
109
  # Process by each tool type
92
- self._process_annotation_item(
93
- item, item_id, tool_type, classification, id_to_full_classification, tools_data
110
+ self._convert_annotation_item(
111
+ item, item_id, tool_type, classification, id_to_full_classification, tools_data, media_type
94
112
  )
95
113
 
96
114
  # Add processed tool data to media item
@@ -102,8 +120,10 @@ class DMV1ToV2Converter(BaseDMConverter):
102
120
  if media_item:
103
121
  self.media_data[media_type_plural].append(media_item)
104
122
 
105
- def _process_annotation_item(self, item, item_id, tool_type, classification, id_to_full_classification, tools_data):
106
- """Process a single annotation item based on its tool type.
123
+ def _convert_annotation_item(
124
+ self, item, item_id, tool_type, classification, id_to_full_classification, tools_data, media_type
125
+ ):
126
+ """Process a single annotation item based on its tool type and media type.
107
127
 
108
128
  Args:
109
129
  item (dict): Annotation item data
@@ -112,16 +132,45 @@ class DMV1ToV2Converter(BaseDMConverter):
112
132
  classification (str): Classification label
113
133
  id_to_full_classification (dict): Mapping of ID to full classification data
114
134
  tools_data (dict): Dictionary to store processed tool data
135
+ media_type (str): Type of media (image, video, pcd, text)
115
136
  """
116
- processor = self.tool_processors.get(tool_type)
117
- if processor:
118
- processor(item, item_id, classification, tools_data, id_to_full_classification)
137
+ # Check if tool_processors is available and contains the tool_type
138
+ if hasattr(self, 'tool_processors') and self.tool_processors:
139
+ processor = self.tool_processors.get(tool_type)
140
+ if processor:
141
+ processor(item, item_id, classification, tools_data, id_to_full_classification)
142
+ else:
143
+ self._handle_unknown_tool(tool_type, item_id)
119
144
  else:
120
- # Handle unknown tool_type
121
- self._handle_unknown_tool(tool_type, item_id)
122
-
123
- def _process_bounding_box(self, item, item_id, classification, tools_data, id_to_full_classification=None):
124
- """Process bounding box annotation.
145
+ # Use file_type + tool_type pattern for method names
146
+ method_name = f'_convert_{media_type}_{tool_type}'
147
+ if hasattr(self, method_name):
148
+ method = getattr(self, method_name)
149
+ method(item, item_id, classification, tools_data, id_to_full_classification)
150
+ else:
151
+ self._handle_unknown_tool(tool_type, item_id, media_type)
152
+
153
+ def _handle_unknown_tool(self, tool_type, item_id=None, media_type=None):
154
+ """Handle unknown tool types with consistent warning message."""
155
+ warning_msg = f"Warning: Unknown tool type '{tool_type}'"
156
+ if media_type:
157
+ warning_msg += f' for media type {media_type}'
158
+ if item_id:
159
+ warning_msg += f' for item {item_id}'
160
+ print(warning_msg)
161
+
162
+ def _extract_media_type_info(self, media_id):
163
+ """Extract media type information from media ID."""
164
+ media_type = media_id.split('_')[0] if '_' in media_id else media_id
165
+ media_type_plural = media_type + 's' if not media_type.endswith('s') else media_type
166
+ return media_type, media_type_plural
167
+
168
+ def _singularize_media_type(self, media_type_plural):
169
+ """Convert plural media type to singular."""
170
+ return media_type_plural.rstrip('s')
171
+
172
+ def _process_bounding_box_common(self, item, item_id, classification, tools_data, id_to_full_classification=None):
173
+ """Process bounding box annotation - common logic.
125
174
 
126
175
  Args:
127
176
  item (dict): Annotation item data
@@ -161,7 +210,11 @@ class DMV1ToV2Converter(BaseDMConverter):
161
210
  'data': data,
162
211
  })
163
212
 
164
- def _process_named_entity(self, item, item_id, classification, tools_data, id_to_full_classification=None):
213
+ def _convert_bounding_box(self, item, item_id, classification, tools_data, id_to_full_classification=None):
214
+ """Process bounding box annotation."""
215
+ return self._process_bounding_box_common(item, item_id, classification, tools_data, id_to_full_classification)
216
+
217
+ def _convert_named_entity(self, item, item_id, classification, tools_data, id_to_full_classification=None):
165
218
  """Process named entity annotation.
166
219
 
167
220
  Args:
@@ -191,43 +244,7 @@ class DMV1ToV2Converter(BaseDMConverter):
191
244
  'data': entity_data, # Format: {ranges: [...], content: "..."}
192
245
  })
193
246
 
194
- def _process_classification(self, item, item_id, classification, tools_data, id_to_full_classification):
195
- """Process classification annotation.
196
-
197
- Args:
198
- item (dict): Annotation item data
199
- item_id (str): ID of the annotation item
200
- classification (str): Classification label
201
- tools_data (dict): Dictionary to store processed tool data
202
- id_to_full_classification (dict): Full classification mapping
203
- """
204
- if 'classification' not in tools_data:
205
- tools_data['classification'] = []
206
-
207
- # Get full classification info (including additional attributes)
208
- full_classification = id_to_full_classification.get(item_id, {})
209
-
210
- # Store additional attributes in attrs array
211
- attrs = []
212
- classification_data = {}
213
-
214
- for key, value in full_classification.items():
215
- if key != 'class': # class is already stored in classification field
216
- if isinstance(value, list) and len(value) > 0:
217
- # Array attributes like multiple
218
- attrs.append({'name': key, 'value': value})
219
- elif isinstance(value, str) and value.strip():
220
- # String attributes like text, single_radio, single_dropdown
221
- attrs.append({'name': key, 'value': value})
222
-
223
- tools_data['classification'].append({
224
- 'id': item_id,
225
- 'classification': classification,
226
- 'attrs': attrs,
227
- 'data': classification_data, # Empty object for full text classification
228
- })
229
-
230
- def _process_polyline(self, item, item_id, classification, tools_data, id_to_full_classification=None):
247
+ def _process_polyline_common(self, item, item_id, classification, tools_data, id_to_full_classification=None):
231
248
  """Process polyline annotation.
232
249
 
233
250
  Args:
@@ -246,16 +263,16 @@ class DMV1ToV2Converter(BaseDMConverter):
246
263
  # Convert each coordinate point to [x, y] format
247
264
  for point in item['coordinate']:
248
265
  if 'x' in point and 'y' in point:
249
- polyline_data.extend([point['x'], point['y']])
266
+ polyline_data.append([point['x'], point['y']])
250
267
 
251
268
  tools_data['polyline'].append({
252
269
  'id': item_id,
253
270
  'classification': classification,
254
271
  'attrs': [],
255
- 'data': polyline_data, # Format: [x1, y1, x2, y2, x3, y3, ...]
272
+ 'data': polyline_data, # Format: [[x1, y1], [x2, y2], [x3, y3], ...]
256
273
  })
257
274
 
258
- def _process_keypoint(self, item, item_id, classification, tools_data, id_to_full_classification=None):
275
+ def _process_keypoint_common(self, item, item_id, classification, tools_data, id_to_full_classification=None):
259
276
  """Process keypoint annotation.
260
277
 
261
278
  Args:
@@ -282,7 +299,7 @@ class DMV1ToV2Converter(BaseDMConverter):
282
299
  'data': keypoint_data, # Format: [x, y]
283
300
  })
284
301
 
285
- def _process_3d_bounding_box(self, item, item_id, classification, tools_data, id_to_full_classification=None):
302
+ def _convert_3d_bounding_box(self, item, item_id, classification, tools_data, id_to_full_classification=None):
286
303
  """Process 3D bounding box annotation.
287
304
 
288
305
  Args:
@@ -298,7 +315,16 @@ class DMV1ToV2Converter(BaseDMConverter):
298
315
  # Process 3d_bounding_box psr (position, scale, rotation)
299
316
  psr_data = {}
300
317
  if 'psr' in item and isinstance(item['psr'], dict):
301
- psr_data = item['psr']
318
+ psr = item['psr']
319
+
320
+ # Extract only x, y, z values from position, scale, rotation
321
+ for component in ['position', 'scale', 'rotation']:
322
+ if component in psr and isinstance(psr[component], dict):
323
+ psr_data[component] = {
324
+ 'x': psr[component].get('x'),
325
+ 'y': psr[component].get('y'),
326
+ 'z': psr[component].get('z'),
327
+ }
302
328
 
303
329
  tools_data['3d_bounding_box'].append({
304
330
  'id': item_id,
@@ -307,8 +333,37 @@ class DMV1ToV2Converter(BaseDMConverter):
307
333
  'data': psr_data, # Format: {position: {x,y,z}, scale: {x,y,z}, rotation: {x,y,z}}
308
334
  })
309
335
 
310
- def _process_segmentation(self, item, item_id, classification, tools_data, id_to_full_classification=None):
311
- """Process segmentation annotation.
336
+ def _convert_video_segmentation_data(
337
+ self, item, item_id, classification, tools_data, id_to_full_classification=None
338
+ ):
339
+ """Process video segmentation annotation data.
340
+
341
+ Args:
342
+ item (dict): Annotation item data
343
+ item_id (str): ID of the annotation item
344
+ classification (str): Classification label
345
+ tools_data (dict): Dictionary to store processed tool data
346
+ id_to_full_classification (dict, optional): Full classification mapping
347
+ """
348
+ if 'segmentation' not in tools_data:
349
+ tools_data['segmentation'] = []
350
+
351
+ # Process frame section-based segmentation (videos)
352
+ segmentation_data = {}
353
+ if 'section' in item and isinstance(item['section'], dict):
354
+ segmentation_data = item['section']
355
+
356
+ tools_data['segmentation'].append({
357
+ 'id': item_id,
358
+ 'classification': classification,
359
+ 'attrs': [],
360
+ 'data': segmentation_data, # Format: {startFrame: x, endFrame: y}
361
+ })
362
+
363
+ def _convert_image_segmentation_data(
364
+ self, item, item_id, classification, tools_data, id_to_full_classification=None
365
+ ):
366
+ """Process image segmentation annotation data.
312
367
 
313
368
  Args:
314
369
  item (dict): Annotation item data
@@ -320,23 +375,19 @@ class DMV1ToV2Converter(BaseDMConverter):
320
375
  if 'segmentation' not in tools_data:
321
376
  tools_data['segmentation'] = []
322
377
 
323
- # Process segmentation pixel_indices or section
378
+ # Process pixel-based segmentation (images)
324
379
  segmentation_data = {}
325
380
  if 'pixel_indices' in item and isinstance(item['pixel_indices'], list):
326
- # Pixel-based segmentation (images)
327
381
  segmentation_data = item['pixel_indices']
328
- elif 'section' in item and isinstance(item['section'], dict):
329
- # Frame section-based segmentation (videos)
330
- segmentation_data = item['section']
331
382
 
332
383
  tools_data['segmentation'].append({
333
384
  'id': item_id,
334
385
  'classification': classification,
335
386
  'attrs': [],
336
- 'data': segmentation_data, # Format: [pixel_indices...] or {startFrame: x, endFrame: y}
387
+ 'data': segmentation_data, # Format: [pixel_indices...]
337
388
  })
338
389
 
339
- def _process_polygon(self, item, item_id, classification, tools_data, id_to_full_classification=None):
390
+ def _process_polygon_common(self, item, item_id, classification, tools_data, id_to_full_classification=None):
340
391
  """Process polygon annotation.
341
392
 
342
393
  Args:
@@ -355,16 +406,16 @@ class DMV1ToV2Converter(BaseDMConverter):
355
406
  # Convert each coordinate point to [x, y] format
356
407
  for point in item['coordinate']:
357
408
  if 'x' in point and 'y' in point:
358
- polygon_data.extend([point['x'], point['y']])
409
+ polygon_data.append([point['x'], point['y']])
359
410
 
360
411
  tools_data['polygon'].append({
361
412
  'id': item_id,
362
413
  'classification': classification,
363
414
  'attrs': [],
364
- 'data': polygon_data, # Format: [x1, y1, x2, y2, x3, y3, ...]
415
+ 'data': polygon_data, # Format: [[x1, y1], [x2, y2], [x3, y3], ...]
365
416
  })
366
417
 
367
- def _process_relation(self, item, item_id, classification, tools_data, id_to_full_classification=None):
418
+ def _process_relation_common(self, item, item_id, classification, tools_data, id_to_full_classification=None):
368
419
  """Process relation annotation.
369
420
 
370
421
  Args:
@@ -389,7 +440,7 @@ class DMV1ToV2Converter(BaseDMConverter):
389
440
  'data': relation_data, # Format: ['from_id', 'to_id']
390
441
  })
391
442
 
392
- def _process_group(self, item, item_id, classification, tools_data, id_to_full_classification=None):
443
+ def _convert_group(self, item, item_id, classification, tools_data, id_to_full_classification=None):
393
444
  """Process group annotation.
394
445
 
395
446
  Args:
@@ -413,3 +464,164 @@ class DMV1ToV2Converter(BaseDMConverter):
413
464
  'attrs': [],
414
465
  'data': group_data, # Format: ['id1', 'id2', 'id3', ...]
415
466
  })
467
+
468
+ # Include all the _convert_* methods from previous code...
469
+ def _convert_classification(self, item, item_id, classification, tools_data, id_to_full_classification):
470
+ """Process classification annotation."""
471
+ if 'classification' not in tools_data:
472
+ tools_data['classification'] = []
473
+
474
+ # Get full classification info (including additional attributes)
475
+ full_classification = id_to_full_classification.get(item_id, {})
476
+
477
+ # Store additional attributes in attrs array
478
+ attrs = []
479
+ classification_data = {}
480
+
481
+ for key, value in full_classification.items():
482
+ if key != 'class': # class is already stored in classification field
483
+ if isinstance(value, list) and len(value) > 0:
484
+ # Array attributes like multiple
485
+ attrs.append({'name': key, 'value': value})
486
+ elif isinstance(value, str) and value.strip():
487
+ # String attributes like text, single_radio, single_dropdown
488
+ attrs.append({'name': key, 'value': value})
489
+
490
+ tools_data['classification'].append({
491
+ 'id': item_id,
492
+ 'classification': classification,
493
+ 'attrs': attrs,
494
+ 'data': classification_data, # Empty object for full text classification
495
+ })
496
+
497
+ def _convert_prompt(self, item, item_id, classification, tools_data, id_to_full_classification=None):
498
+ """Process prompt annotation."""
499
+ if 'prompt' not in tools_data:
500
+ tools_data['prompt'] = []
501
+
502
+ # Process prompt input data from annotationsData
503
+ prompt_data = {}
504
+ attrs = []
505
+
506
+ if 'input' in item and isinstance(item['input'], list):
507
+ # Store complete input structure
508
+ input_items = []
509
+ for input_item in item['input']:
510
+ if isinstance(input_item, dict):
511
+ input_items.append(input_item)
512
+ # Extract text value for easy access
513
+ if input_item.get('type') == 'text' and 'value' in input_item:
514
+ prompt_data['text'] = input_item['value']
515
+ attrs.append('text')
516
+
517
+ prompt_data['input'] = input_items
518
+ attrs.append('input')
519
+
520
+ # Include any additional metadata
521
+ for key in ['model', 'displayName', 'generatedBy', 'timestamp']:
522
+ if key in item:
523
+ prompt_data[key] = item[key]
524
+ attrs.append(key)
525
+
526
+ result_item = {
527
+ 'id': item_id,
528
+ 'classification': classification,
529
+ 'attrs': attrs,
530
+ 'data': prompt_data, # Format: {text: "prompt text", input: [...], ...}
531
+ }
532
+ tools_data['prompt'].append(result_item)
533
+
534
+ def _convert_answer(self, item, item_id, classification, tools_data, id_to_full_classification=None):
535
+ """Process answer annotation."""
536
+ if 'answer' not in tools_data:
537
+ tools_data['answer'] = []
538
+
539
+ # Process answer output data from annotationsData
540
+ answer_data = {}
541
+ attrs = []
542
+
543
+ if 'output' in item and isinstance(item['output'], list):
544
+ # Store complete output structure
545
+ output_items = []
546
+ for output_item in item['output']:
547
+ if isinstance(output_item, dict):
548
+ output_items.append(output_item)
549
+ # Extract text value for easy access
550
+ if output_item.get('type') == 'text' and 'value' in output_item:
551
+ answer_data['text'] = output_item['value']
552
+ attrs.append('text')
553
+
554
+ answer_data['output'] = output_items
555
+ attrs.append('output')
556
+
557
+ # Include all additional metadata from annotationsData
558
+ metadata_fields = ['model', 'displayName', 'generatedBy', 'promptAnnotationId', 'timestamp', 'primaryKey']
559
+ for key in metadata_fields:
560
+ if key in item:
561
+ answer_data[key] = item[key]
562
+ attrs.append(key)
563
+
564
+ result_item = {
565
+ 'id': item_id,
566
+ 'classification': classification,
567
+ 'attrs': attrs,
568
+ 'data': answer_data, # Format: {text: "answer text", output: [...], model: "...", ...}
569
+ }
570
+
571
+ tools_data['answer'].append(result_item)
572
+
573
+ def _convert_3d_segmentation(self, item, item_id, classification, tools_data, id_to_full_classification=None):
574
+ """Process 3D segmentation annotation."""
575
+ if '3d_segmentation' not in tools_data:
576
+ tools_data['3d_segmentation'] = []
577
+
578
+ # Process 3D segmentation point data from annotationsData
579
+ segmentation_data = {}
580
+ attrs = []
581
+
582
+ if 'points' in item and isinstance(item['points'], list):
583
+ segmentation_data['points'] = item['points']
584
+ attrs.append('points')
585
+
586
+ # Include any additional metadata
587
+ for key in ['tool']:
588
+ if key in item:
589
+ segmentation_data[key] = item[key]
590
+ attrs.append(key)
591
+
592
+ result_item = {
593
+ 'id': item_id,
594
+ 'classification': classification,
595
+ 'attrs': attrs,
596
+ 'data': segmentation_data, # Format: {points: [146534, 146662, ...], ...}
597
+ }
598
+ tools_data['3d_segmentation'].append(result_item)
599
+
600
+ def _convert_polygon(self, item, item_id, classification, tools_data, id_to_full_classification=None):
601
+ """Process polygon annotation."""
602
+ return self._process_polygon_common(item, item_id, classification, tools_data, id_to_full_classification)
603
+
604
+ def _convert_polyline(self, item, item_id, classification, tools_data, id_to_full_classification=None):
605
+ """Process polyline annotation."""
606
+ return self._process_polyline_common(item, item_id, classification, tools_data, id_to_full_classification)
607
+
608
+ def _convert_keypoint(self, item, item_id, classification, tools_data, id_to_full_classification=None):
609
+ """Process keypoint annotation."""
610
+ return self._process_keypoint_common(item, item_id, classification, tools_data, id_to_full_classification)
611
+
612
+ # Segmentation methods
613
+ def _convert_image_segmentation(self, item, item_id, classification, tools_data, id_to_full_classification=None):
614
+ """Process segmentation annotation for image."""
615
+ return self._convert_image_segmentation_data(
616
+ item, item_id, classification, tools_data, id_to_full_classification
617
+ )
618
+
619
+ def _convert_video_segmentation(self, item, item_id, classification, tools_data, id_to_full_classification=None):
620
+ """Process segmentation annotation for video."""
621
+ return self._convert_video_segmentation_data(
622
+ item, item_id, classification, tools_data, id_to_full_classification
623
+ )
624
+
625
+ def _convert_relation(self, item, item_id, classification, tools_data, id_to_full_classification=None):
626
+ """Process relation annotation."""
627
+ return self._process_relation_common(item, item_id, classification, tools_data, id_to_full_classification)