synapse-sdk 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. synapse_sdk/__init__.py +24 -0
  2. synapse_sdk/cli/code_server.py +305 -33
  3. synapse_sdk/clients/agent/__init__.py +2 -1
  4. synapse_sdk/clients/agent/container.py +143 -0
  5. synapse_sdk/clients/agent/ray.py +296 -38
  6. synapse_sdk/clients/backend/annotation.py +1 -1
  7. synapse_sdk/clients/backend/core.py +31 -4
  8. synapse_sdk/clients/backend/data_collection.py +82 -7
  9. synapse_sdk/clients/backend/hitl.py +1 -1
  10. synapse_sdk/clients/backend/ml.py +1 -1
  11. synapse_sdk/clients/base.py +211 -61
  12. synapse_sdk/loggers.py +46 -0
  13. synapse_sdk/plugins/README.md +1340 -0
  14. synapse_sdk/plugins/categories/base.py +59 -9
  15. synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
  16. synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
  17. synapse_sdk/plugins/categories/export/actions/export/action.py +165 -0
  18. synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
  19. synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
  20. synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
  21. synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
  22. synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
  23. synapse_sdk/plugins/categories/export/templates/config.yaml +19 -1
  24. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +390 -0
  25. synapse_sdk/plugins/categories/export/templates/plugin/export.py +153 -177
  26. synapse_sdk/plugins/categories/neural_net/actions/train.py +1130 -32
  27. synapse_sdk/plugins/categories/neural_net/actions/tune.py +157 -4
  28. synapse_sdk/plugins/categories/neural_net/templates/config.yaml +7 -4
  29. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  30. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  31. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
  32. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  33. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +148 -0
  34. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  35. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  36. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  37. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +100 -0
  38. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +248 -0
  39. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  40. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  41. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +265 -0
  42. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  43. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  44. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +92 -0
  45. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +243 -0
  46. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  47. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +19 -0
  48. synapse_sdk/plugins/categories/upload/actions/upload/action.py +236 -0
  49. synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
  50. synapse_sdk/plugins/categories/upload/actions/upload/enums.py +493 -0
  51. synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
  52. synapse_sdk/plugins/categories/upload/actions/upload/factory.py +138 -0
  53. synapse_sdk/plugins/categories/upload/actions/upload/models.py +214 -0
  54. synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +183 -0
  55. synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
  56. synapse_sdk/plugins/categories/upload/actions/upload/run.py +179 -0
  57. synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
  58. synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +107 -0
  59. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
  60. synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +63 -0
  61. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +91 -0
  62. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +82 -0
  63. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +235 -0
  64. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +201 -0
  65. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +104 -0
  66. synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +71 -0
  67. synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
  68. synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +82 -0
  69. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
  70. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
  71. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +29 -0
  72. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
  73. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +300 -0
  74. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +287 -0
  75. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
  76. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
  77. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
  78. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
  79. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +84 -0
  80. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
  81. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +60 -0
  82. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +250 -0
  83. synapse_sdk/plugins/categories/upload/templates/README.md +470 -0
  84. synapse_sdk/plugins/categories/upload/templates/config.yaml +28 -2
  85. synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +310 -0
  86. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +82 -20
  87. synapse_sdk/plugins/models.py +111 -9
  88. synapse_sdk/plugins/templates/plugin-config-schema.json +7 -0
  89. synapse_sdk/plugins/templates/schema.json +7 -0
  90. synapse_sdk/plugins/utils/__init__.py +3 -0
  91. synapse_sdk/plugins/utils/ray_gcs.py +66 -0
  92. synapse_sdk/shared/__init__.py +25 -0
  93. synapse_sdk/utils/converters/dm/__init__.py +42 -41
  94. synapse_sdk/utils/converters/dm/base.py +137 -0
  95. synapse_sdk/utils/converters/dm/from_v1.py +208 -562
  96. synapse_sdk/utils/converters/dm/to_v1.py +258 -304
  97. synapse_sdk/utils/converters/dm/tools/__init__.py +214 -0
  98. synapse_sdk/utils/converters/dm/tools/answer.py +95 -0
  99. synapse_sdk/utils/converters/dm/tools/bounding_box.py +132 -0
  100. synapse_sdk/utils/converters/dm/tools/bounding_box_3d.py +121 -0
  101. synapse_sdk/utils/converters/dm/tools/classification.py +75 -0
  102. synapse_sdk/utils/converters/dm/tools/keypoint.py +117 -0
  103. synapse_sdk/utils/converters/dm/tools/named_entity.py +111 -0
  104. synapse_sdk/utils/converters/dm/tools/polygon.py +122 -0
  105. synapse_sdk/utils/converters/dm/tools/polyline.py +124 -0
  106. synapse_sdk/utils/converters/dm/tools/prompt.py +94 -0
  107. synapse_sdk/utils/converters/dm/tools/relation.py +86 -0
  108. synapse_sdk/utils/converters/dm/tools/segmentation.py +141 -0
  109. synapse_sdk/utils/converters/dm/tools/segmentation_3d.py +83 -0
  110. synapse_sdk/utils/converters/dm/types.py +168 -0
  111. synapse_sdk/utils/converters/dm/utils.py +162 -0
  112. synapse_sdk/utils/converters/dm_legacy/__init__.py +56 -0
  113. synapse_sdk/utils/converters/dm_legacy/from_v1.py +627 -0
  114. synapse_sdk/utils/converters/dm_legacy/to_v1.py +367 -0
  115. synapse_sdk/utils/file/__init__.py +58 -0
  116. synapse_sdk/utils/file/archive.py +32 -0
  117. synapse_sdk/utils/file/checksum.py +56 -0
  118. synapse_sdk/utils/file/chunking.py +31 -0
  119. synapse_sdk/utils/file/download.py +385 -0
  120. synapse_sdk/utils/file/encoding.py +40 -0
  121. synapse_sdk/utils/file/io.py +22 -0
  122. synapse_sdk/utils/file/upload.py +165 -0
  123. synapse_sdk/utils/file/video/__init__.py +29 -0
  124. synapse_sdk/utils/file/video/transcode.py +307 -0
  125. synapse_sdk/utils/{file.py → file.py.backup} +77 -0
  126. synapse_sdk/utils/network.py +272 -0
  127. synapse_sdk/utils/storage/__init__.py +6 -2
  128. synapse_sdk/utils/storage/providers/file_system.py +6 -0
  129. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/METADATA +19 -2
  130. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/RECORD +134 -74
  131. synapse_sdk/devtools/docs/.gitignore +0 -20
  132. synapse_sdk/devtools/docs/README.md +0 -41
  133. synapse_sdk/devtools/docs/blog/2019-05-28-first-blog-post.md +0 -12
  134. synapse_sdk/devtools/docs/blog/2019-05-29-long-blog-post.md +0 -44
  135. synapse_sdk/devtools/docs/blog/2021-08-01-mdx-blog-post.mdx +0 -24
  136. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
  137. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/index.md +0 -29
  138. synapse_sdk/devtools/docs/blog/authors.yml +0 -25
  139. synapse_sdk/devtools/docs/blog/tags.yml +0 -19
  140. synapse_sdk/devtools/docs/docusaurus.config.ts +0 -138
  141. synapse_sdk/devtools/docs/package-lock.json +0 -17455
  142. synapse_sdk/devtools/docs/package.json +0 -47
  143. synapse_sdk/devtools/docs/sidebars.ts +0 -44
  144. synapse_sdk/devtools/docs/src/components/HomepageFeatures/index.tsx +0 -71
  145. synapse_sdk/devtools/docs/src/components/HomepageFeatures/styles.module.css +0 -11
  146. synapse_sdk/devtools/docs/src/css/custom.css +0 -30
  147. synapse_sdk/devtools/docs/src/pages/index.module.css +0 -23
  148. synapse_sdk/devtools/docs/src/pages/index.tsx +0 -21
  149. synapse_sdk/devtools/docs/src/pages/markdown-page.md +0 -7
  150. synapse_sdk/devtools/docs/static/.nojekyll +0 -0
  151. synapse_sdk/devtools/docs/static/img/docusaurus-social-card.jpg +0 -0
  152. synapse_sdk/devtools/docs/static/img/docusaurus.png +0 -0
  153. synapse_sdk/devtools/docs/static/img/favicon.ico +0 -0
  154. synapse_sdk/devtools/docs/static/img/logo.png +0 -0
  155. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_mountain.svg +0 -171
  156. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_react.svg +0 -170
  157. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_tree.svg +0 -40
  158. synapse_sdk/devtools/docs/tsconfig.json +0 -8
  159. synapse_sdk/plugins/categories/export/actions/export.py +0 -346
  160. synapse_sdk/plugins/categories/export/enums.py +0 -7
  161. synapse_sdk/plugins/categories/neural_net/actions/gradio.py +0 -151
  162. synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py +0 -943
  163. synapse_sdk/plugins/categories/upload/actions/upload.py +0 -954
  164. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/WHEEL +0 -0
  165. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/entry_points.txt +0 -0
  166. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/licenses/LICENSE +0 -0
  167. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/top_level.txt +0 -0
@@ -1,627 +1,273 @@
1
- from . import BaseDMConverter
1
+ """
2
+ DM Schema V1 → V2 Converter
3
+
4
+ Created: 2025-12-11
5
+
6
+ V1→V2 conversion separates the result into annotation_data and annotation_meta.
7
+ """
8
+
9
+ from typing import Any
10
+
11
+ from .base import BaseDMConverter
12
+ from .types import (
13
+ AnnotationMeta,
14
+ V2AnnotationData,
15
+ V2ConversionResult,
16
+ )
2
17
 
3
18
 
4
19
  class DMV1ToV2Converter(BaseDMConverter):
5
- """DM v1 to v2 format converter class."""
20
+ """Converter from DM Schema V1 to V2
6
21
 
7
- def __init__(self, old_dm_data={}, file_type=None):
8
- """Initialize the converter.
22
+ V1→V2 conversion separates the result into annotation_data and annotation_meta.
9
23
 
10
- Args:
11
- old_dm_data (dict): DM v1 format data to be converted
12
- file_type (str, optional): Type of file being converted
13
- """
14
- super().__init__(file_type)
15
- self.old_dm_data = old_dm_data
16
- self.classification_info = {}
17
- self.media_data = {}
24
+ Example:
25
+ >>> converter = DMV1ToV2Converter()
26
+ >>> result = converter.convert(v1_data)
27
+ >>> annotation_data = result["annotation_data"]
28
+ >>> annotation_meta = result["annotation_meta"]
29
+ """
18
30
 
19
- def convert(self):
20
- """Convert DM v1 data to v2 format.
31
+ def _setup_tool_processors(self) -> None:
32
+ """Register tool processors"""
33
+ from .tools.bounding_box import BoundingBoxProcessor
21
34
 
22
- Returns:
23
- dict: Converted data in DM v2 format
24
- """
25
- # Reset state
26
- old_dm_data = self.old_dm_data
27
- self.classification_info = {}
28
- self.media_data = {}
35
+ self.register_processor(BoundingBoxProcessor())
29
36
 
30
- # Extract media IDs from annotations key
31
- media_ids = list(old_dm_data.get('annotations', {}).keys())
37
+ # polygon to be added later
38
+ try:
39
+ from .tools.polygon import PolygonProcessor
32
40
 
33
- # If file_type is not specified, try to detect from media_ids
34
- if not self.file_type and media_ids:
35
- detected_file_type = self._detect_file_type(media_ids[0])
36
- if detected_file_type:
37
- self.file_type = detected_file_type
38
- # Re-setup tool processors with detected file_type
39
- self.tool_processors = self._setup_tool_processors()
41
+ self.register_processor(PolygonProcessor())
42
+ except ImportError:
43
+ pass
40
44
 
41
- for media_id in media_ids:
42
- self._convert_media_item(old_dm_data, media_id)
45
+ try:
46
+ from .tools.polyline import PolylineProcessor
43
47
 
44
- # Build final result (put classification at the front)
45
- result = {'classification': self.classification_info}
46
- result.update(self.media_data)
48
+ self.register_processor(PolylineProcessor())
49
+ except ImportError:
50
+ pass
47
51
 
48
- return result
52
+ try:
53
+ from .tools.keypoint import KeypointProcessor
49
54
 
50
- def _detect_file_type(self, media_id):
51
- """Detect file type from media ID."""
52
- if '_' in media_id:
53
- return media_id.split('_')[0]
54
- return media_id
55
+ self.register_processor(KeypointProcessor())
56
+ except ImportError:
57
+ pass
55
58
 
56
- def _convert_media_item(self, old_dm_data, media_id):
57
- """Process a single media item.
59
+ try:
60
+ from .tools.bounding_box_3d import BoundingBox3DProcessor
58
61
 
59
- Args:
60
- old_dm_data (dict): Original DM v1 data
61
- media_id (str): ID of the media item to process
62
- """
63
- # Extract media type (e.g., "video_1" -> "videos", "image_2" -> "images")
64
- media_type, media_type_plural = self._extract_media_type_info(media_id)
62
+ self.register_processor(BoundingBox3DProcessor())
63
+ except ImportError:
64
+ pass
65
65
 
66
- # Create list for this media type if it doesn't exist
67
- if media_type_plural not in self.media_data:
68
- self.media_data[media_type_plural] = []
66
+ try:
67
+ from .tools.segmentation import SegmentationProcessor
69
68
 
70
- # Create id -> class and tool mappings
71
- annotations = old_dm_data.get('annotations', {}).get(media_id, [])
69
+ self.register_processor(SegmentationProcessor())
70
+ except ImportError:
71
+ pass
72
72
 
73
- id_to_class = {}
74
- id_to_tool = {}
75
- for annotation in annotations:
76
- id_to_class[annotation['id']] = annotation['classification']['class']
77
- id_to_tool[annotation['id']] = annotation['tool']
73
+ try:
74
+ from .tools.named_entity import NamedEntityProcessor
78
75
 
79
- # Create id -> full classification mapping (including additional attributes)
80
- id_to_full_classification = {annotation['id']: annotation['classification'] for annotation in annotations}
76
+ self.register_processor(NamedEntityProcessor())
77
+ except ImportError:
78
+ pass
81
79
 
82
- # Collect all classifications from annotations (regardless of whether they have data)
83
- for annotation in annotations:
84
- tool_type = annotation['tool']
85
- classification = annotation['classification']['class']
80
+ try:
81
+ from .tools.segmentation_3d import Segmentation3DProcessor
86
82
 
87
- if tool_type not in self.classification_info:
88
- self.classification_info[tool_type] = []
83
+ self.register_processor(Segmentation3DProcessor())
84
+ except ImportError:
85
+ pass
89
86
 
90
- # Add only non-duplicate classifications
91
- if classification and classification not in self.classification_info[tool_type]:
92
- self.classification_info[tool_type].append(classification)
87
+ try:
88
+ from .tools.classification import ClassificationProcessor
93
89
 
94
- # Initialize current media item
95
- media_item = {}
90
+ self.register_processor(ClassificationProcessor())
91
+ except ImportError:
92
+ pass
96
93
 
97
- # Process data from annotationsData for this media
98
- annotations_data = old_dm_data.get('annotationsData', {}).get(media_id, [])
94
+ try:
95
+ from .tools.relation import RelationProcessor
99
96
 
100
- # Group by annotation tool type
101
- tools_data = {}
97
+ self.register_processor(RelationProcessor())
98
+ except ImportError:
99
+ pass
102
100
 
103
- for item in annotations_data:
104
- item_id = item.get('id', '')
105
- # Get tool and classification info from annotations
106
- tool_type = id_to_tool.get(item_id, '')
107
- classification = id_to_class.get(item_id, '')
101
+ try:
102
+ from .tools.prompt import PromptProcessor
108
103
 
109
- # Process by each tool type
110
- self._convert_annotation_item(
111
- item, item_id, tool_type, classification, id_to_full_classification, tools_data, media_type
112
- )
104
+ self.register_processor(PromptProcessor())
105
+ except ImportError:
106
+ pass
113
107
 
114
- # Add processed tool data to media item
115
- for tool_type, tool_data in tools_data.items():
116
- if tool_data: # Only add if data exists
117
- media_item[tool_type] = tool_data
108
+ try:
109
+ from .tools.answer import AnswerProcessor
118
110
 
119
- # Add media item to result (only if data exists)
120
- if media_item:
121
- self.media_data[media_type_plural].append(media_item)
111
+ self.register_processor(AnswerProcessor())
112
+ except ImportError:
113
+ pass
122
114
 
123
- def _convert_annotation_item(
124
- self, item, item_id, tool_type, classification, id_to_full_classification, tools_data, media_type
125
- ):
126
- """Process a single annotation item based on its tool type and media type.
115
+ def convert(self, v1_data: dict[str, Any]) -> V2ConversionResult:
116
+ """Convert V1 data to V2 format (separated result)
127
117
 
128
118
  Args:
129
- item (dict): Annotation item data
130
- item_id (str): ID of the annotation item
131
- tool_type (str): Type of annotation tool
132
- classification (str): Classification label
133
- id_to_full_classification (dict): Mapping of ID to full classification data
134
- tools_data (dict): Dictionary to store processed tool data
135
- media_type (str): Type of media (image, video, pcd, text)
136
- """
137
- # Check if tool_processors is available and contains the tool_type
138
- if hasattr(self, 'tool_processors') and self.tool_processors:
139
- processor = self.tool_processors.get(tool_type)
140
- if processor:
141
- processor(item, item_id, classification, tools_data, id_to_full_classification)
142
- else:
143
- self._handle_unknown_tool(tool_type, item_id)
144
- else:
145
- # Use file_type + tool_type pattern for method names
146
- method_name = f'_convert_{media_type}_{tool_type}'
147
- if hasattr(self, method_name):
148
- method = getattr(self, method_name)
149
- method(item, item_id, classification, tools_data, id_to_full_classification)
150
- else:
151
- self._handle_unknown_tool(tool_type, item_id, media_type)
152
-
153
- def _handle_unknown_tool(self, tool_type, item_id=None, media_type=None):
154
- """Handle unknown tool types with consistent warning message."""
155
- warning_msg = f"Warning: Unknown tool type '{tool_type}'"
156
- if media_type:
157
- warning_msg += f' for media type {media_type}'
158
- if item_id:
159
- warning_msg += f' for item {item_id}'
160
- print(warning_msg)
161
-
162
- def _extract_media_type_info(self, media_id):
163
- """Extract media type information from media ID."""
164
- media_type = media_id.split('_')[0] if '_' in media_id else media_id
165
- media_type_plural = media_type + 's' if not media_type.endswith('s') else media_type
166
- return media_type, media_type_plural
167
-
168
- def _singularize_media_type(self, media_type_plural):
169
- """Convert plural media type to singular."""
170
- return media_type_plural.rstrip('s')
171
-
172
- def _process_bounding_box_common(self, item, item_id, classification, tools_data, id_to_full_classification=None):
173
- """Process bounding box annotation - common logic.
119
+ v1_data: DM Schema V1 format data
174
120
 
175
- Args:
176
- item (dict): Annotation item data
177
- item_id (str): ID of the annotation item
178
- classification (str): Classification label
179
- tools_data (dict): Dictionary to store processed tool data
180
- id_to_full_classification (dict, optional): Full classification mapping
181
- """
182
- if 'bounding_box' not in tools_data:
183
- tools_data['bounding_box'] = []
184
-
185
- # Process coordinate or coordinates
186
- coord_data = None
187
- if 'coordinate' in item and isinstance(item['coordinate'], dict):
188
- # Single coordinate structure (dictionary)
189
- coord_data = item['coordinate']
190
- elif 'coordinates' in item:
191
- # Multiple coordinates structure (video etc.)
192
- coords_data = item['coordinates']
193
- if coords_data:
194
- # Use coordinate data from first key
195
- first_key = list(coords_data.keys())[0]
196
- coord_data = coords_data[first_key]
197
-
198
- if coord_data and 'width' in coord_data and 'height' in coord_data:
199
- data = [
200
- coord_data['x'],
201
- coord_data['y'],
202
- coord_data['width'],
203
- coord_data['height'],
204
- ]
205
-
206
- tools_data['bounding_box'].append({
207
- 'id': item_id,
208
- 'classification': classification,
209
- 'attrs': [],
210
- 'data': data,
211
- })
212
-
213
- def _convert_bounding_box(self, item, item_id, classification, tools_data, id_to_full_classification=None):
214
- """Process bounding box annotation."""
215
- return self._process_bounding_box_common(item, item_id, classification, tools_data, id_to_full_classification)
216
-
217
- def _convert_named_entity(self, item, item_id, classification, tools_data, id_to_full_classification=None):
218
- """Process named entity annotation.
121
+ Returns:
122
+ V2ConversionResult: Separated conversion result
123
+ - annotation_data: V2 common annotation structure
124
+ - annotation_meta: Preserved V1 top-level structure
219
125
 
220
- Args:
221
- item (dict): Annotation item data
222
- item_id (str): ID of the annotation item
223
- classification (str): Classification label
224
- tools_data (dict): Dictionary to store processed tool data
225
- id_to_full_classification (dict, optional): Full classification mapping
126
+ Raises:
127
+ ValueError: Missing required fields or invalid format
226
128
  """
227
- if 'named_entity' not in tools_data:
228
- tools_data['named_entity'] = []
129
+ # Input validation
130
+ if 'annotations' not in v1_data:
131
+ raise ValueError("V1 data requires 'annotations' field")
132
+ if 'annotationsData' not in v1_data:
133
+ raise ValueError("V1 data requires 'annotationsData' field")
229
134
 
230
- # Process named_entity ranges and content
231
- entity_data = {}
232
- if 'ranges' in item and isinstance(item['ranges'], list):
233
- # Store ranges information
234
- entity_data['ranges'] = item['ranges']
135
+ # Create annotation_data
136
+ annotation_data = self._build_annotation_data(v1_data)
235
137
 
236
- if 'content' in item:
237
- # Store selected text content
238
- entity_data['content'] = item['content']
138
+ # Create annotation_meta (preserve V1 top-level structure)
139
+ annotation_meta = self._build_annotation_meta(v1_data)
239
140
 
240
- tools_data['named_entity'].append({
241
- 'id': item_id,
242
- 'classification': classification,
243
- 'attrs': [],
244
- 'data': entity_data, # Format: {ranges: [...], content: "..."}
245
- })
141
+ return {
142
+ 'annotation_data': annotation_data,
143
+ 'annotation_meta': annotation_meta,
144
+ }
246
145
 
247
- def _process_polyline_common(self, item, item_id, classification, tools_data, id_to_full_classification=None):
248
- """Process polyline annotation.
146
+ def _build_annotation_data(self, v1_data: dict[str, Any]) -> V2AnnotationData:
147
+ """Create annotation_data (V2 common structure) from V1 data
249
148
 
250
149
  Args:
251
- item (dict): Annotation item data
252
- item_id (str): ID of the annotation item
253
- classification (str): Classification label
254
- tools_data (dict): Dictionary to store processed tool data
255
- id_to_full_classification (dict, optional): Full classification mapping
256
- """
257
- if 'polyline' not in tools_data:
258
- tools_data['polyline'] = []
259
-
260
- # Process polyline coordinates
261
- polyline_data = []
262
- if 'coordinate' in item and isinstance(item['coordinate'], list):
263
- # Convert each coordinate point to [x, y] format
264
- for point in item['coordinate']:
265
- if 'x' in point and 'y' in point:
266
- polyline_data.append([point['x'], point['y']])
267
-
268
- tools_data['polyline'].append({
269
- 'id': item_id,
270
- 'classification': classification,
271
- 'attrs': [],
272
- 'data': polyline_data, # Format: [[x1, y1], [x2, y2], [x3, y3], ...]
273
- })
274
-
275
- def _process_keypoint_common(self, item, item_id, classification, tools_data, id_to_full_classification=None):
276
- """Process keypoint annotation.
150
+ v1_data: V1 data
277
151
 
278
- Args:
279
- item (dict): Annotation item data
280
- item_id (str): ID of the annotation item
281
- classification (str): Classification label
282
- tools_data (dict): Dictionary to store processed tool data
283
- id_to_full_classification (dict, optional): Full classification mapping
152
+ Returns:
153
+ V2 common annotation structure
284
154
  """
285
- if 'keypoint' not in tools_data:
286
- tools_data['keypoint'] = []
287
-
288
- # Process keypoint coordinate (single point)
289
- keypoint_data = []
290
- if 'coordinate' in item and isinstance(item['coordinate'], dict):
291
- coord = item['coordinate']
292
- if 'x' in coord and 'y' in coord:
293
- keypoint_data = [coord['x'], coord['y']]
294
-
295
- tools_data['keypoint'].append({
296
- 'id': item_id,
297
- 'classification': classification,
298
- 'attrs': [],
299
- 'data': keypoint_data, # Format: [x, y]
300
- })
301
-
302
- def _convert_3d_bounding_box(self, item, item_id, classification, tools_data, id_to_full_classification=None):
303
- """Process 3D bounding box annotation.
155
+ annotations = v1_data.get('annotations', {})
156
+ annotations_data = v1_data.get('annotationsData', {})
304
157
 
305
- Args:
306
- item (dict): Annotation item data
307
- item_id (str): ID of the annotation item
308
- classification (str): Classification label
309
- tools_data (dict): Dictionary to store processed tool data
310
- id_to_full_classification (dict, optional): Full classification mapping
311
- """
312
- if '3d_bounding_box' not in tools_data:
313
- tools_data['3d_bounding_box'] = []
314
-
315
- # Process 3d_bounding_box psr (position, scale, rotation)
316
- psr_data = {}
317
- if 'psr' in item and isinstance(item['psr'], dict):
318
- psr = item['psr']
319
-
320
- # Extract only x, y, z values from position, scale, rotation
321
- for component in ['position', 'scale', 'rotation']:
322
- if component in psr and isinstance(psr[component], dict):
323
- psr_data[component] = {
324
- 'x': psr[component].get('x'),
325
- 'y': psr[component].get('y'),
326
- 'z': psr[component].get('z'),
327
- }
328
-
329
- tools_data['3d_bounding_box'].append({
330
- 'id': item_id,
331
- 'classification': classification,
332
- 'attrs': [],
333
- 'data': psr_data, # Format: {position: {x,y,z}, scale: {x,y,z}, rotation: {x,y,z}}
334
- })
335
-
336
- def _convert_video_segmentation_data(
337
- self, item, item_id, classification, tools_data, id_to_full_classification=None
338
- ):
339
- """Process video segmentation annotation data.
158
+ # Build classification map
159
+ classification_map = self._build_classification_map(annotations)
340
160
 
341
- Args:
342
- item (dict): Annotation item data
343
- item_id (str): ID of the annotation item
344
- classification (str): Classification label
345
- tools_data (dict): Dictionary to store processed tool data
346
- id_to_full_classification (dict, optional): Full classification mapping
347
- """
348
- if 'segmentation' not in tools_data:
349
- tools_data['segmentation'] = []
350
-
351
- # Process frame section-based segmentation (videos)
352
- segmentation_data = {}
353
- if 'section' in item and isinstance(item['section'], dict):
354
- segmentation_data = item['section']
355
-
356
- tools_data['segmentation'].append({
357
- 'id': item_id,
358
- 'classification': classification,
359
- 'attrs': [],
360
- 'data': segmentation_data, # Format: {startFrame: x, endFrame: y}
361
- })
362
-
363
- def _convert_image_segmentation_data(
364
- self, item, item_id, classification, tools_data, id_to_full_classification=None
365
- ):
366
- """Process image segmentation annotation data.
161
+ # Convert annotations by media type
162
+ result: V2AnnotationData = {
163
+ 'classification': classification_map,
164
+ }
367
165
 
368
- Args:
369
- item (dict): Annotation item data
370
- item_id (str): ID of the annotation item
371
- classification (str): Classification label
372
- tools_data (dict): Dictionary to store processed tool data
373
- id_to_full_classification (dict, optional): Full classification mapping
374
- """
375
- if 'segmentation' not in tools_data:
376
- tools_data['segmentation'] = []
166
+ # Process by media ID
167
+ for media_id, ann_list in annotations.items():
168
+ # Detect media type
169
+ singular_type, plural_type = self._extract_media_type_info(media_id)
377
170
 
378
- # Process pixel-based segmentation (images)
379
- segmentation_data = {}
380
- if 'pixel_indices' in item and isinstance(item['pixel_indices'], list):
381
- segmentation_data = item['pixel_indices']
171
+ # Initialize media type array
172
+ if plural_type not in result:
173
+ result[plural_type] = []
382
174
 
383
- tools_data['segmentation'].append({
384
- 'id': item_id,
385
- 'classification': classification,
386
- 'attrs': [],
387
- 'data': segmentation_data, # Format: [pixel_indices...]
388
- })
175
+ # Convert media item
176
+ media_item = self._convert_media_item(media_id, ann_list, annotations_data.get(media_id, []))
389
177
 
390
- def _process_polygon_common(self, item, item_id, classification, tools_data, id_to_full_classification=None):
391
- """Process polygon annotation.
178
+ result[plural_type].append(media_item)
392
179
 
393
- Args:
394
- item (dict): Annotation item data
395
- item_id (str): ID of the annotation item
396
- classification (str): Classification label
397
- tools_data (dict): Dictionary to store processed tool data
398
- id_to_full_classification (dict, optional): Full classification mapping
399
- """
400
- if 'polygon' not in tools_data:
401
- tools_data['polygon'] = []
402
-
403
- # Process polygon coordinates
404
- polygon_data = []
405
- if 'coordinate' in item and isinstance(item['coordinate'], list):
406
- # Convert each coordinate point to [x, y] format
407
- for point in item['coordinate']:
408
- if 'x' in point and 'y' in point:
409
- polygon_data.append([point['x'], point['y']])
410
-
411
- tools_data['polygon'].append({
412
- 'id': item_id,
413
- 'classification': classification,
414
- 'attrs': [],
415
- 'data': polygon_data, # Format: [[x1, y1], [x2, y2], [x3, y3], ...]
416
- })
417
-
418
- def _process_relation_common(self, item, item_id, classification, tools_data, id_to_full_classification=None):
419
- """Process relation annotation.
180
+ return result
181
+
182
+ def _build_annotation_meta(self, v1_data: dict[str, Any]) -> AnnotationMeta:
183
+ """Create annotation_meta (V1 top-level structure) from V1 data
420
184
 
421
185
  Args:
422
- item (dict): Annotation item data
423
- item_id (str): ID of the annotation item
424
- classification (str): Classification label
425
- tools_data (dict): Dictionary to store processed tool data
426
- id_to_full_classification (dict, optional): Full classification mapping
186
+ v1_data: Complete V1 data
187
+
188
+ Returns:
189
+ V1 top-level structure (preserved as-is)
427
190
  """
428
- if 'relation' not in tools_data:
429
- tools_data['relation'] = []
191
+ return {
192
+ 'extra': v1_data.get('extra', {}),
193
+ 'annotations': v1_data.get('annotations', {}),
194
+ 'annotationsData': v1_data.get('annotationsData', {}),
195
+ 'relations': v1_data.get('relations', {}),
196
+ 'annotationGroups': v1_data.get('annotationGroups', {}),
197
+ 'assignmentId': v1_data.get('assignmentId'),
198
+ }
430
199
 
431
- # Process relation data (needs adjustment based on actual relation data structure)
432
- relation_data = []
433
- if 'data' in item:
434
- relation_data = item['data']
200
+ def _build_classification_map(self, annotations: dict[str, list[dict[str, Any]]]) -> dict[str, list[str]]:
201
+ """Build classification map from annotations
435
202
 
436
- tools_data['relation'].append({
437
- 'id': item_id,
438
- 'classification': classification,
439
- 'attrs': [],
440
- 'data': relation_data, # Format: ['from_id', 'to_id']
441
- })
203
+ Args:
204
+ annotations: V1 annotations data
442
205
 
443
- def _convert_group(self, item, item_id, classification, tools_data, id_to_full_classification=None):
444
- """Process group annotation.
206
+ Returns:
207
+ Class label map by tool
208
+ e.g., {"bounding_box": ["person", "car"], "polygon": ["road"]}
209
+ """
210
+ classification_map: dict[str, set[str]] = {}
211
+
212
+ for media_id, ann_list in annotations.items():
213
+ for ann in ann_list:
214
+ tool = ann.get('tool', '')
215
+ classification_obj = ann.get('classification') or {}
216
+ class_label = classification_obj.get('class', '')
217
+
218
+ if tool and class_label:
219
+ if tool not in classification_map:
220
+ classification_map[tool] = set()
221
+ classification_map[tool].add(class_label)
222
+
223
+ # Convert set to list
224
+ return {tool: sorted(list(labels)) for tool, labels in classification_map.items()}
225
+
226
+ def _convert_media_item(
227
+ self,
228
+ media_id: str,
229
+ annotations: list[dict[str, Any]],
230
+ annotations_data: list[dict[str, Any]],
231
+ ) -> dict[str, list[dict[str, Any]]]:
232
+ """Convert annotations for a single media item
445
233
 
446
234
  Args:
447
- item (dict): Annotation item data
448
- item_id (str): ID of the annotation item
449
- classification (str): Classification label
450
- tools_data (dict): Dictionary to store processed tool data
451
- id_to_full_classification (dict, optional): Full classification mapping
235
+ media_id: Media ID
236
+ annotations: V1 annotations for this media
237
+ annotations_data: V1 annotationsData for this media
238
+
239
+ Returns:
240
+ V2 annotations grouped by tool
452
241
  """
453
- if 'group' not in tools_data:
454
- tools_data['group'] = []
455
-
456
- # Process group data (needs adjustment based on actual group data structure)
457
- group_data = []
458
- if 'data' in item:
459
- group_data = item['data']
460
-
461
- tools_data['group'].append({
462
- 'id': item_id,
463
- 'classification': classification,
464
- 'attrs': [],
465
- 'data': group_data, # Format: ['id1', 'id2', 'id3', ...]
466
- })
467
-
468
- # Include all the _convert_* methods from previous code...
469
- def _convert_classification(self, item, item_id, classification, tools_data, id_to_full_classification):
470
- """Process classification annotation."""
471
- if 'classification' not in tools_data:
472
- tools_data['classification'] = []
473
-
474
- # Get full classification info (including additional attributes)
475
- full_classification = id_to_full_classification.get(item_id, {})
476
-
477
- # Store additional attributes in attrs array
478
- attrs = []
479
- classification_data = {}
480
-
481
- for key, value in full_classification.items():
482
- if key != 'class': # class is already stored in classification field
483
- if isinstance(value, list) and len(value) > 0:
484
- # Array attributes like multiple
485
- attrs.append({'name': key, 'value': value})
486
- elif isinstance(value, str) and value.strip():
487
- # String attributes like text, single_radio, single_dropdown
488
- attrs.append({'name': key, 'value': value})
489
-
490
- tools_data['classification'].append({
491
- 'id': item_id,
492
- 'classification': classification,
493
- 'attrs': attrs,
494
- 'data': classification_data, # Empty object for full text classification
495
- })
496
-
497
- def _convert_prompt(self, item, item_id, classification, tools_data, id_to_full_classification=None):
498
- """Process prompt annotation."""
499
- if 'prompt' not in tools_data:
500
- tools_data['prompt'] = []
501
-
502
- # Process prompt input data from annotationsData
503
- prompt_data = {}
504
- attrs = []
505
-
506
- if 'input' in item and isinstance(item['input'], list):
507
- # Store complete input structure
508
- input_items = []
509
- for input_item in item['input']:
510
- if isinstance(input_item, dict):
511
- input_items.append(input_item)
512
- # Extract text value for easy access
513
- if input_item.get('type') == 'text' and 'value' in input_item:
514
- prompt_data['text'] = input_item['value']
515
- attrs.append('text')
516
-
517
- prompt_data['input'] = input_items
518
- attrs.append('input')
519
-
520
- # Include any additional metadata
521
- for key in ['model', 'displayName', 'generatedBy', 'timestamp']:
522
- if key in item:
523
- prompt_data[key] = item[key]
524
- attrs.append(key)
525
-
526
- result_item = {
527
- 'id': item_id,
528
- 'classification': classification,
529
- 'attrs': attrs,
530
- 'data': prompt_data, # Format: {text: "prompt text", input: [...], ...}
531
- }
532
- tools_data['prompt'].append(result_item)
533
-
534
- def _convert_answer(self, item, item_id, classification, tools_data, id_to_full_classification=None):
535
- """Process answer annotation."""
536
- if 'answer' not in tools_data:
537
- tools_data['answer'] = []
538
-
539
- # Process answer output data from annotationsData
540
- answer_data = {}
541
- attrs = []
542
-
543
- if 'output' in item and isinstance(item['output'], list):
544
- # Store complete output structure
545
- output_items = []
546
- for output_item in item['output']:
547
- if isinstance(output_item, dict):
548
- output_items.append(output_item)
549
- # Extract text value for easy access
550
- if output_item.get('type') == 'text' and 'value' in output_item:
551
- answer_data['text'] = output_item['value']
552
- attrs.append('text')
553
-
554
- answer_data['output'] = output_items
555
- attrs.append('output')
556
-
557
- # Include all additional metadata from annotationsData
558
- metadata_fields = ['model', 'displayName', 'generatedBy', 'promptAnnotationId', 'timestamp', 'primaryKey']
559
- for key in metadata_fields:
560
- if key in item:
561
- answer_data[key] = item[key]
562
- attrs.append(key)
563
-
564
- result_item = {
565
- 'id': item_id,
566
- 'classification': classification,
567
- 'attrs': attrs,
568
- 'data': answer_data, # Format: {text: "answer text", output: [...], model: "...", ...}
569
- }
242
+ # Create ID annotationData mapping
243
+ data_by_id = {item['id']: item for item in annotations_data if 'id' in item}
570
244
 
571
- tools_data['answer'].append(result_item)
245
+ # Group by tool
246
+ result: dict[str, list[dict[str, Any]]] = {}
572
247
 
573
- def _convert_3d_segmentation(self, item, item_id, classification, tools_data, id_to_full_classification=None):
574
- """Process 3D segmentation annotation."""
575
- if '3d_segmentation' not in tools_data:
576
- tools_data['3d_segmentation'] = []
248
+ for ann in annotations:
249
+ ann_id = ann.get('id', '')
250
+ tool = ann.get('tool', '')
577
251
 
578
- # Process 3D segmentation point data from annotationsData
579
- segmentation_data = {}
580
- attrs = []
252
+ if not tool:
253
+ continue
581
254
 
582
- if 'points' in item and isinstance(item['points'], list):
583
- segmentation_data['points'] = item['points']
584
- attrs.append('points')
255
+ # Get processor
256
+ processor = self.get_processor(tool)
257
+ if not processor:
258
+ # Raise error for unsupported tool
259
+ supported_tools = list(self._tool_processors.keys())
260
+ raise ValueError(f"Unsupported tool: '{tool}'. Supported tools: {', '.join(sorted(supported_tools))}")
585
261
 
586
- # Include any additional metadata
587
- for key in ['tool']:
588
- if key in item:
589
- segmentation_data[key] = item[key]
590
- attrs.append(key)
262
+ # Find annotationData for this ID
263
+ ann_data = data_by_id.get(ann_id, {})
591
264
 
592
- result_item = {
593
- 'id': item_id,
594
- 'classification': classification,
595
- 'attrs': attrs,
596
- 'data': segmentation_data, # Format: {points: [146534, 146662, ...], ...}
597
- }
598
- tools_data['3d_segmentation'].append(result_item)
599
-
600
- def _convert_polygon(self, item, item_id, classification, tools_data, id_to_full_classification=None):
601
- """Process polygon annotation."""
602
- return self._process_polygon_common(item, item_id, classification, tools_data, id_to_full_classification)
603
-
604
- def _convert_polyline(self, item, item_id, classification, tools_data, id_to_full_classification=None):
605
- """Process polyline annotation."""
606
- return self._process_polyline_common(item, item_id, classification, tools_data, id_to_full_classification)
607
-
608
- def _convert_keypoint(self, item, item_id, classification, tools_data, id_to_full_classification=None):
609
- """Process keypoint annotation."""
610
- return self._process_keypoint_common(item, item_id, classification, tools_data, id_to_full_classification)
611
-
612
- # Segmentation methods
613
- def _convert_image_segmentation(self, item, item_id, classification, tools_data, id_to_full_classification=None):
614
- """Process segmentation annotation for image."""
615
- return self._convert_image_segmentation_data(
616
- item, item_id, classification, tools_data, id_to_full_classification
617
- )
618
-
619
- def _convert_video_segmentation(self, item, item_id, classification, tools_data, id_to_full_classification=None):
620
- """Process segmentation annotation for video."""
621
- return self._convert_video_segmentation_data(
622
- item, item_id, classification, tools_data, id_to_full_classification
623
- )
624
-
625
- def _convert_relation(self, item, item_id, classification, tools_data, id_to_full_classification=None):
626
- """Process relation annotation."""
627
- return self._process_relation_common(item, item_id, classification, tools_data, id_to_full_classification)
265
+ # Convert to V2
266
+ v2_annotation = processor.to_v2(ann, ann_data)
267
+
268
+ # Group by tool
269
+ if tool not in result:
270
+ result[tool] = []
271
+ result[tool].append(v2_annotation)
272
+
273
+ return result