synapse-sdk 1.0.0a23__py3-none-any.whl → 2025.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. synapse_sdk/__init__.py +24 -0
  2. synapse_sdk/cli/__init__.py +310 -5
  3. synapse_sdk/cli/alias/__init__.py +22 -0
  4. synapse_sdk/cli/alias/create.py +36 -0
  5. synapse_sdk/cli/alias/dataclass.py +31 -0
  6. synapse_sdk/cli/alias/default.py +16 -0
  7. synapse_sdk/cli/alias/delete.py +15 -0
  8. synapse_sdk/cli/alias/list.py +19 -0
  9. synapse_sdk/cli/alias/read.py +15 -0
  10. synapse_sdk/cli/alias/update.py +17 -0
  11. synapse_sdk/cli/alias/utils.py +61 -0
  12. synapse_sdk/cli/code_server.py +687 -0
  13. synapse_sdk/cli/config.py +440 -0
  14. synapse_sdk/cli/devtools.py +90 -0
  15. synapse_sdk/cli/plugin/__init__.py +33 -0
  16. synapse_sdk/cli/{create_plugin.py → plugin/create.py} +2 -2
  17. synapse_sdk/{plugins/cli → cli/plugin}/publish.py +23 -15
  18. synapse_sdk/clients/agent/__init__.py +9 -3
  19. synapse_sdk/clients/agent/container.py +143 -0
  20. synapse_sdk/clients/agent/core.py +19 -0
  21. synapse_sdk/clients/agent/ray.py +298 -9
  22. synapse_sdk/clients/backend/__init__.py +30 -12
  23. synapse_sdk/clients/backend/annotation.py +13 -5
  24. synapse_sdk/clients/backend/core.py +31 -4
  25. synapse_sdk/clients/backend/data_collection.py +186 -0
  26. synapse_sdk/clients/backend/hitl.py +17 -0
  27. synapse_sdk/clients/backend/integration.py +16 -1
  28. synapse_sdk/clients/backend/ml.py +5 -1
  29. synapse_sdk/clients/backend/models.py +78 -0
  30. synapse_sdk/clients/base.py +384 -41
  31. synapse_sdk/clients/ray/serve.py +2 -0
  32. synapse_sdk/clients/validators/collections.py +31 -0
  33. synapse_sdk/devtools/config.py +94 -0
  34. synapse_sdk/devtools/server.py +41 -0
  35. synapse_sdk/devtools/streamlit_app/__init__.py +5 -0
  36. synapse_sdk/devtools/streamlit_app/app.py +128 -0
  37. synapse_sdk/devtools/streamlit_app/services/__init__.py +11 -0
  38. synapse_sdk/devtools/streamlit_app/services/job_service.py +233 -0
  39. synapse_sdk/devtools/streamlit_app/services/plugin_service.py +236 -0
  40. synapse_sdk/devtools/streamlit_app/services/serve_service.py +95 -0
  41. synapse_sdk/devtools/streamlit_app/ui/__init__.py +15 -0
  42. synapse_sdk/devtools/streamlit_app/ui/config_tab.py +76 -0
  43. synapse_sdk/devtools/streamlit_app/ui/deployment_tab.py +66 -0
  44. synapse_sdk/devtools/streamlit_app/ui/http_tab.py +125 -0
  45. synapse_sdk/devtools/streamlit_app/ui/jobs_tab.py +573 -0
  46. synapse_sdk/devtools/streamlit_app/ui/serve_tab.py +346 -0
  47. synapse_sdk/devtools/streamlit_app/ui/status_bar.py +118 -0
  48. synapse_sdk/devtools/streamlit_app/utils/__init__.py +40 -0
  49. synapse_sdk/devtools/streamlit_app/utils/json_viewer.py +197 -0
  50. synapse_sdk/devtools/streamlit_app/utils/log_formatter.py +38 -0
  51. synapse_sdk/devtools/streamlit_app/utils/styles.py +241 -0
  52. synapse_sdk/devtools/streamlit_app/utils/ui_components.py +289 -0
  53. synapse_sdk/devtools/streamlit_app.py +10 -0
  54. synapse_sdk/loggers.py +120 -9
  55. synapse_sdk/plugins/README.md +1340 -0
  56. synapse_sdk/plugins/__init__.py +0 -13
  57. synapse_sdk/plugins/categories/base.py +117 -11
  58. synapse_sdk/plugins/categories/data_validation/actions/validation.py +72 -0
  59. synapse_sdk/plugins/categories/data_validation/templates/plugin/validation.py +33 -5
  60. synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
  61. synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
  62. synapse_sdk/plugins/categories/export/actions/export/action.py +165 -0
  63. synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
  64. synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
  65. synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
  66. synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
  67. synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
  68. synapse_sdk/plugins/categories/export/templates/config.yaml +21 -0
  69. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +390 -0
  70. synapse_sdk/plugins/categories/export/templates/plugin/export.py +160 -0
  71. synapse_sdk/plugins/categories/neural_net/actions/deployment.py +13 -12
  72. synapse_sdk/plugins/categories/neural_net/actions/train.py +1134 -31
  73. synapse_sdk/plugins/categories/neural_net/actions/tune.py +534 -0
  74. synapse_sdk/plugins/categories/neural_net/base/inference.py +1 -1
  75. synapse_sdk/plugins/categories/neural_net/templates/config.yaml +32 -4
  76. synapse_sdk/plugins/categories/neural_net/templates/plugin/inference.py +26 -10
  77. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  78. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  79. synapse_sdk/plugins/categories/{export/actions/export.py → pre_annotation/actions/pre_annotation/action.py} +4 -4
  80. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  81. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +148 -0
  82. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  83. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  84. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  85. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +100 -0
  86. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +248 -0
  87. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  88. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  89. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +265 -0
  90. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  91. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  92. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +92 -0
  93. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +243 -0
  94. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  95. synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml +19 -0
  96. synapse_sdk/plugins/categories/pre_annotation/templates/plugin/to_task.py +40 -0
  97. synapse_sdk/plugins/categories/smart_tool/templates/config.yaml +2 -0
  98. synapse_sdk/plugins/categories/upload/__init__.py +0 -0
  99. synapse_sdk/plugins/categories/upload/actions/__init__.py +0 -0
  100. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +19 -0
  101. synapse_sdk/plugins/categories/upload/actions/upload/action.py +236 -0
  102. synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
  103. synapse_sdk/plugins/categories/upload/actions/upload/enums.py +493 -0
  104. synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
  105. synapse_sdk/plugins/categories/upload/actions/upload/factory.py +138 -0
  106. synapse_sdk/plugins/categories/upload/actions/upload/models.py +214 -0
  107. synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +183 -0
  108. synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
  109. synapse_sdk/plugins/categories/upload/actions/upload/run.py +179 -0
  110. synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
  111. synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +107 -0
  112. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
  113. synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +63 -0
  114. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +91 -0
  115. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +82 -0
  116. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +235 -0
  117. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +201 -0
  118. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +104 -0
  119. synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +71 -0
  120. synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
  121. synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +82 -0
  122. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
  123. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
  124. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +29 -0
  125. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
  126. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +300 -0
  127. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +287 -0
  128. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
  129. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
  130. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
  131. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
  132. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +84 -0
  133. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
  134. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +60 -0
  135. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +250 -0
  136. synapse_sdk/plugins/categories/upload/templates/README.md +470 -0
  137. synapse_sdk/plugins/categories/upload/templates/config.yaml +33 -0
  138. synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +310 -0
  139. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +102 -0
  140. synapse_sdk/plugins/enums.py +3 -1
  141. synapse_sdk/plugins/models.py +148 -11
  142. synapse_sdk/plugins/templates/plugin-config-schema.json +406 -0
  143. synapse_sdk/plugins/templates/schema.json +491 -0
  144. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/config.yaml +1 -0
  145. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/requirements.txt +1 -1
  146. synapse_sdk/plugins/utils/__init__.py +46 -0
  147. synapse_sdk/plugins/utils/actions.py +119 -0
  148. synapse_sdk/plugins/utils/config.py +203 -0
  149. synapse_sdk/plugins/{utils.py → utils/legacy.py} +26 -46
  150. synapse_sdk/plugins/utils/ray_gcs.py +66 -0
  151. synapse_sdk/plugins/utils/registry.py +58 -0
  152. synapse_sdk/shared/__init__.py +25 -0
  153. synapse_sdk/shared/enums.py +93 -0
  154. synapse_sdk/types.py +19 -0
  155. synapse_sdk/utils/converters/__init__.py +240 -0
  156. synapse_sdk/utils/converters/coco/__init__.py +0 -0
  157. synapse_sdk/utils/converters/coco/from_dm.py +322 -0
  158. synapse_sdk/utils/converters/coco/to_dm.py +215 -0
  159. synapse_sdk/utils/converters/dm/__init__.py +57 -0
  160. synapse_sdk/utils/converters/dm/base.py +137 -0
  161. synapse_sdk/utils/converters/dm/from_v1.py +273 -0
  162. synapse_sdk/utils/converters/dm/to_v1.py +321 -0
  163. synapse_sdk/utils/converters/dm/tools/__init__.py +214 -0
  164. synapse_sdk/utils/converters/dm/tools/answer.py +95 -0
  165. synapse_sdk/utils/converters/dm/tools/bounding_box.py +132 -0
  166. synapse_sdk/utils/converters/dm/tools/bounding_box_3d.py +121 -0
  167. synapse_sdk/utils/converters/dm/tools/classification.py +75 -0
  168. synapse_sdk/utils/converters/dm/tools/keypoint.py +117 -0
  169. synapse_sdk/utils/converters/dm/tools/named_entity.py +111 -0
  170. synapse_sdk/utils/converters/dm/tools/polygon.py +122 -0
  171. synapse_sdk/utils/converters/dm/tools/polyline.py +124 -0
  172. synapse_sdk/utils/converters/dm/tools/prompt.py +94 -0
  173. synapse_sdk/utils/converters/dm/tools/relation.py +86 -0
  174. synapse_sdk/utils/converters/dm/tools/segmentation.py +141 -0
  175. synapse_sdk/utils/converters/dm/tools/segmentation_3d.py +83 -0
  176. synapse_sdk/utils/converters/dm/types.py +168 -0
  177. synapse_sdk/utils/converters/dm/utils.py +162 -0
  178. synapse_sdk/utils/converters/dm_legacy/__init__.py +56 -0
  179. synapse_sdk/utils/converters/dm_legacy/from_v1.py +627 -0
  180. synapse_sdk/utils/converters/dm_legacy/to_v1.py +367 -0
  181. synapse_sdk/utils/converters/pascal/__init__.py +0 -0
  182. synapse_sdk/utils/converters/pascal/from_dm.py +244 -0
  183. synapse_sdk/utils/converters/pascal/to_dm.py +214 -0
  184. synapse_sdk/utils/converters/yolo/__init__.py +0 -0
  185. synapse_sdk/utils/converters/yolo/from_dm.py +384 -0
  186. synapse_sdk/utils/converters/yolo/to_dm.py +267 -0
  187. synapse_sdk/utils/dataset.py +46 -0
  188. synapse_sdk/utils/encryption.py +158 -0
  189. synapse_sdk/utils/file/__init__.py +58 -0
  190. synapse_sdk/utils/file/archive.py +32 -0
  191. synapse_sdk/utils/file/checksum.py +56 -0
  192. synapse_sdk/utils/file/chunking.py +31 -0
  193. synapse_sdk/utils/file/download.py +385 -0
  194. synapse_sdk/utils/file/encoding.py +40 -0
  195. synapse_sdk/utils/file/io.py +22 -0
  196. synapse_sdk/utils/file/upload.py +165 -0
  197. synapse_sdk/utils/file/video/__init__.py +29 -0
  198. synapse_sdk/utils/file/video/transcode.py +307 -0
  199. synapse_sdk/utils/file.py.backup +301 -0
  200. synapse_sdk/utils/http.py +138 -0
  201. synapse_sdk/utils/network.py +309 -0
  202. synapse_sdk/utils/storage/__init__.py +72 -0
  203. synapse_sdk/utils/storage/providers/__init__.py +183 -0
  204. synapse_sdk/utils/storage/providers/file_system.py +134 -0
  205. synapse_sdk/utils/storage/providers/gcp.py +13 -0
  206. synapse_sdk/utils/storage/providers/http.py +190 -0
  207. synapse_sdk/utils/storage/providers/s3.py +91 -0
  208. synapse_sdk/utils/storage/providers/sftp.py +47 -0
  209. synapse_sdk/utils/storage/registry.py +17 -0
  210. synapse_sdk-2025.12.3.dist-info/METADATA +123 -0
  211. synapse_sdk-2025.12.3.dist-info/RECORD +279 -0
  212. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info}/WHEEL +1 -1
  213. synapse_sdk/clients/backend/dataset.py +0 -51
  214. synapse_sdk/plugins/categories/import/actions/import.py +0 -10
  215. synapse_sdk/plugins/cli/__init__.py +0 -21
  216. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env +0 -24
  217. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env.dist +0 -24
  218. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/main.py +0 -4
  219. synapse_sdk/utils/file.py +0 -168
  220. synapse_sdk/utils/storage.py +0 -91
  221. synapse_sdk-1.0.0a23.dist-info/METADATA +0 -44
  222. synapse_sdk-1.0.0a23.dist-info/RECORD +0 -114
  223. /synapse_sdk/{plugins/cli → cli/plugin}/run.py +0 -0
  224. /synapse_sdk/{plugins/categories/import → clients/validators}/__init__.py +0 -0
  225. /synapse_sdk/{plugins/categories/import/actions → devtools}/__init__.py +0 -0
  226. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info}/entry_points.txt +0 -0
  227. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info/licenses}/LICENSE +0 -0
  228. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,367 @@
1
+ import random
2
+ import string
3
+
4
+ from . import BaseDMConverter
5
+
6
+
7
+ class DMV2ToV1Converter(BaseDMConverter):
8
+ """DM v2 to v1 format converter class."""
9
+
10
+ def __init__(self, new_dm_data={}, file_type=None):
11
+ """Initialize the converter.
12
+
13
+ Args:
14
+ new_dm_data (dict): DM v2 format data to be converted
15
+ file_type (str, optional): Type of file being converted (image, video, pcd, text, audio)
16
+ """
17
+ # Auto-detect file type if not provided
18
+ if file_type is None:
19
+ file_type = self._detect_file_type(new_dm_data)
20
+
21
+ super().__init__(file_type=file_type)
22
+ self.new_dm_data = new_dm_data
23
+ self.annotations = {}
24
+ self.annotations_data = {}
25
+ self.extra = {}
26
+ self.relations = {}
27
+ self.annotation_groups = {}
28
+
29
+ def _detect_file_type(self, data):
30
+ """Auto-detect file type from the data structure.
31
+
32
+ Args:
33
+ data (dict): DM v2 format data
34
+
35
+ Returns:
36
+ str: Detected file type (image, video, pcd, text, audio)
37
+ """
38
+ if not data:
39
+ return None
40
+
41
+ # Check for media type keys (plural forms)
42
+ if 'images' in data:
43
+ return 'image'
44
+ elif 'videos' in data:
45
+ return 'video'
46
+ elif 'pcds' in data:
47
+ return 'pcd'
48
+ elif 'texts' in data:
49
+ return 'text'
50
+ elif 'audios' in data:
51
+ return 'audio'
52
+
53
+ return None
54
+
55
+ def convert(self):
56
+ """Convert DM v2 data to v1 format.
57
+
58
+ Returns:
59
+ dict: Converted data in DM v1 format
60
+ """
61
+ # Reset state
62
+ new_dm_data = self.new_dm_data
63
+ self.annotations = {}
64
+ self.annotations_data = {}
65
+ self.extra = {}
66
+ self.relations = {}
67
+ self.annotation_groups = {}
68
+
69
+ # Process each media type (images, videos, etc.)
70
+ for media_type_plural, media_items in new_dm_data.items():
71
+ if media_type_plural == 'classification':
72
+ continue
73
+
74
+ media_type = self._singularize_media_type(media_type_plural)
75
+
76
+ for index, media_item in enumerate(media_items, 1):
77
+ media_id = f'{media_type}_{index}'
78
+
79
+ # Initialize structures for this media
80
+ self.annotations[media_id] = []
81
+ self.annotations_data[media_id] = []
82
+ self.extra[media_id] = {}
83
+ self.relations[media_id] = []
84
+ self.annotation_groups[media_id] = []
85
+
86
+ # Process each tool type in the media item
87
+ for tool_type, tool_data in media_item.items():
88
+ self._process_tool_data(media_id, tool_type, tool_data)
89
+
90
+ # Build final result
91
+ result = {
92
+ 'extra': self.extra,
93
+ 'relations': self.relations,
94
+ 'annotations': self.annotations,
95
+ 'annotationsData': self.annotations_data,
96
+ 'annotationGroups': self.annotation_groups,
97
+ }
98
+
99
+ return result
100
+
101
+ def _process_tool_data(self, media_id, tool_type, tool_data):
102
+ """Process tool data for a specific media item.
103
+
104
+ Args:
105
+ media_id (str): ID of the media item
106
+ tool_type (str): Type of annotation tool
107
+ tool_data (list): List of annotation data for this tool
108
+ """
109
+ for annotation in tool_data:
110
+ annotation_id = annotation['id']
111
+ classification = annotation['classification']
112
+ attrs = annotation.get('attrs', [])
113
+ data = annotation.get('data', {})
114
+
115
+ # Create annotation entry
116
+ annotation_entry = {
117
+ 'id': annotation_id,
118
+ 'tool': tool_type,
119
+ 'isLocked': False,
120
+ 'isVisible': True,
121
+ 'classification': {'class': classification},
122
+ }
123
+
124
+ # Add additional classification attributes from attrs
125
+ for attr in attrs:
126
+ attr_name = attr.get('name')
127
+ attr_value = attr.get('value')
128
+ if attr_name and attr_value is not None:
129
+ annotation_entry['classification'][attr_name] = attr_value
130
+
131
+ # Add special attributes for specific tools
132
+ if tool_type == 'keypoint':
133
+ annotation_entry['shape'] = 'circle'
134
+
135
+ self.annotations[media_id].append(annotation_entry)
136
+
137
+ # Create annotations data entry using tool processor
138
+ processor = self.tool_processors.get(tool_type)
139
+ if processor:
140
+ processor(annotation_id, data, self.annotations_data[media_id])
141
+ else:
142
+ self._handle_unknown_tool(tool_type, annotation_id)
143
+
144
+ def _convert_bounding_box(self, annotation_id, data, annotations_data):
145
+ """Process bounding box annotation data.
146
+
147
+ Args:
148
+ annotation_id (str): ID of the annotation
149
+ data (list): Bounding box data [x1, y1, x2, y2]
150
+ annotations_data (list): List to append the processed data
151
+ """
152
+ if len(data) >= 4:
153
+ x1, y1, width, height = data[:4]
154
+ coordinate = {'x': x1, 'y': y1, 'width': width, 'height': height}
155
+
156
+ annotations_data.append({'id': annotation_id, 'coordinate': coordinate})
157
+
158
+ def _convert_named_entity(self, annotation_id, data, annotations_data):
159
+ """Process named entity annotation data.
160
+
161
+ Args:
162
+ annotation_id (str): ID of the annotation
163
+ data (dict): Named entity data with ranges and content
164
+ annotations_data (list): List to append the processed data
165
+ """
166
+ entity_data = {'id': annotation_id}
167
+
168
+ if 'ranges' in data:
169
+ entity_data['ranges'] = data['ranges']
170
+
171
+ if 'content' in data:
172
+ entity_data['content'] = data['content']
173
+
174
+ annotations_data.append(entity_data)
175
+
176
+ def _convert_classification(self, annotation_id, data, annotations_data):
177
+ """Process classification annotation data.
178
+
179
+ Args:
180
+ annotation_id (str): ID of the annotation
181
+ data (dict): Classification data (usually empty)
182
+ annotations_data (list): List to append the processed data
183
+ """
184
+ # Classification data is typically empty in v2, so we just add the ID
185
+ annotations_data.append({'id': annotation_id})
186
+
187
+ def _convert_polyline(self, annotation_id, data, annotations_data):
188
+ """Process polyline annotation data.
189
+
190
+ Args:
191
+ annotation_id (str): ID of the annotation
192
+ data (list): Polyline data - can be flat [x1, y1, x2, y2, ...] or nested [[x1, y1], [x2, y2], ...]
193
+ annotations_data (list): List to append the processed data
194
+ """
195
+ coordinates = []
196
+
197
+ if data and isinstance(data[0], list):
198
+ # Nested format: [[x1, y1], [x2, y2], ...]
199
+ for point in data:
200
+ if len(point) >= 2:
201
+ coordinates.append({'x': point[0], 'y': point[1], 'id': self._generate_random_id()})
202
+ else:
203
+ # Flat format: [x1, y1, x2, y2, ...]
204
+ for i in range(0, len(data), 2):
205
+ if i + 1 < len(data):
206
+ coordinates.append({'x': data[i], 'y': data[i + 1], 'id': self._generate_random_id()})
207
+
208
+ annotations_data.append({'id': annotation_id, 'coordinate': coordinates})
209
+
210
+ def _convert_keypoint(self, annotation_id, data, annotations_data):
211
+ """Process keypoint annotation data.
212
+
213
+ Args:
214
+ annotation_id (str): ID of the annotation
215
+ data (list): Keypoint data [x, y]
216
+ annotations_data (list): List to append the processed data
217
+ """
218
+ if len(data) >= 2:
219
+ coordinate = {'x': data[0], 'y': data[1]}
220
+
221
+ annotations_data.append({'id': annotation_id, 'coordinate': coordinate})
222
+
223
+ def _convert_3d_bounding_box(self, annotation_id, data, annotations_data):
224
+ """Process 3D bounding box annotation data.
225
+
226
+ Args:
227
+ annotation_id (str): ID of the annotation
228
+ data (dict): 3D bounding box PSR data
229
+ annotations_data (list): List to append the processed data
230
+ """
231
+ annotations_data.append({'id': annotation_id, 'psr': data})
232
+
233
+ def _convert_image_segmentation(self, annotation_id, data, annotations_data):
234
+ """Process segmentation annotation data.
235
+
236
+ Args:
237
+ annotation_id (str): ID of the annotation
238
+ data (list or dict): Segmentation data (pixel_indices or section)
239
+ annotations_data (list): List to append the processed data
240
+ """
241
+ annotation_data = {'id': annotation_id}
242
+
243
+ if isinstance(data, list):
244
+ # Pixel-based segmentation
245
+ annotation_data['pixel_indices'] = data
246
+ elif isinstance(data, dict):
247
+ # Section-based segmentation (video)
248
+ annotation_data['section'] = data
249
+
250
+ annotations_data.append(annotation_data)
251
+
252
+ def _convert_video_segmentation(self, annotation_id, data, annotations_data):
253
+ """Process video segmentation annotation data.
254
+
255
+ Args:
256
+ annotation_id (str): ID of the annotation
257
+ data (list or dict): Segmentation data (pixel_indices or section)
258
+ annotations_data (list): List to append the processed data
259
+ """
260
+ annotation_data = {'id': annotation_id}
261
+
262
+ if isinstance(data, list):
263
+ # Pixel-based segmentation
264
+ annotation_data['pixel_indices'] = data
265
+ elif isinstance(data, dict):
266
+ # Section-based segmentation (video)
267
+ annotation_data['section'] = data
268
+
269
+ annotations_data.append(annotation_data)
270
+
271
+ def _convert_3d_segmentation(self, annotation_id, data, annotations_data):
272
+ """Process 3D segmentation annotation data.
273
+
274
+ Args:
275
+ annotation_id (str): ID of the annotation
276
+ data (list or dict): 3D segmentation data
277
+ annotations_data (list): List to append the processed data
278
+ """
279
+ annotation_data = {'id': annotation_id}
280
+
281
+ if isinstance(data, list):
282
+ # Pixel-based segmentation
283
+ annotation_data['pixel_indices'] = data
284
+ elif isinstance(data, dict):
285
+ # Section-based segmentation
286
+ annotation_data['section'] = data
287
+
288
+ annotations_data.append(annotation_data)
289
+
290
+ def _convert_prompt(self, annotation_id, data, annotations_data):
291
+ """Process prompt annotation data.
292
+
293
+ Args:
294
+ annotation_id (str): ID of the annotation
295
+ data (dict): Prompt data
296
+ annotations_data (list): List to append the processed data
297
+ """
298
+ annotation_data = {'id': annotation_id}
299
+
300
+ if isinstance(data, dict):
301
+ annotation_data.update(data)
302
+
303
+ annotations_data.append(annotation_data)
304
+
305
+ def _convert_answer(self, annotation_id, data, annotations_data):
306
+ """Process answer annotation data.
307
+
308
+ Args:
309
+ annotation_id (str): ID of the annotation
310
+ data (dict): Answer data
311
+ annotations_data (list): List to append the processed data
312
+ """
313
+ annotation_data = {'id': annotation_id}
314
+
315
+ if isinstance(data, dict):
316
+ annotation_data.update(data)
317
+
318
+ annotations_data.append(annotation_data)
319
+
320
+ def _convert_polygon(self, annotation_id, data, annotations_data):
321
+ """Process polygon annotation data.
322
+
323
+ Args:
324
+ annotation_id (str): ID of the annotation
325
+ data (list): Polygon data - can be flat [x1, y1, x2, y2, ...] or nested [[x1, y1], [x2, y2], ...]
326
+ annotations_data (list): List to append the processed data
327
+ """
328
+ coordinates = []
329
+
330
+ if data and isinstance(data[0], list):
331
+ # Nested format: [[x1, y1], [x2, y2], ...]
332
+ for point in data:
333
+ if len(point) >= 2:
334
+ coordinates.append({'x': point[0], 'y': point[1], 'id': self._generate_random_id()})
335
+ else:
336
+ # Flat format: [x1, y1, x2, y2, ...]
337
+ for i in range(0, len(data), 2):
338
+ if i + 1 < len(data):
339
+ coordinates.append({'x': data[i], 'y': data[i + 1], 'id': self._generate_random_id()})
340
+
341
+ annotations_data.append({'id': annotation_id, 'coordinate': coordinates})
342
+
343
+ def _convert_relation(self, annotation_id, data, annotations_data):
344
+ """Process relation annotation data.
345
+
346
+ Args:
347
+ annotation_id (str): ID of the annotation
348
+ data (list): Relation data
349
+ annotations_data (list): List to append the processed data
350
+ """
351
+ annotations_data.append({'id': annotation_id, 'data': data})
352
+
353
+ def _convert_group(self, annotation_id, data, annotations_data):
354
+ """Process group annotation data.
355
+
356
+ Args:
357
+ annotation_id (str): ID of the annotation
358
+ data (list): Group data
359
+ annotations_data (list): List to append the processed data
360
+ """
361
+ annotations_data.append({'id': annotation_id, 'data': data})
362
+
363
+ def _generate_random_id(self):
364
+ """Generate a random ID similar to the original format."""
365
+ # Generate 10-character random string with letters, numbers, and symbols
366
+ chars = string.ascii_letters + string.digits + '-_'
367
+ return ''.join(random.choices(chars, k=10))
File without changes
@@ -0,0 +1,244 @@
1
+ import json
2
+ import os
3
+ import shutil
4
+ import xml.etree.ElementTree as ET
5
+ from glob import glob
6
+ from typing import IO, Any, Dict, List, Optional
7
+
8
+ from PIL import Image
9
+
10
+ from synapse_sdk.utils.converters import FromDMConverter
11
+
12
+
13
+ class FromDMToPascalConverter(FromDMConverter):
14
+ """Convert DM format to Pascal VOC format."""
15
+
16
+ IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp']
17
+
18
+ def __init__(self, root_dir: str = None, is_categorized_dataset: bool = False, is_single_conversion: bool = False):
19
+ super().__init__(root_dir, is_categorized_dataset, is_single_conversion)
20
+ self.class_names = set()
21
+
22
+ def find_image_for_base(self, img_dir: str, base: str) -> Optional[str]:
23
+ """Find the image file for a given base name in the specified directory."""
24
+ for ext in self.IMG_EXTENSIONS:
25
+ img_path = os.path.join(img_dir, base + ext)
26
+ if os.path.exists(img_path):
27
+ return img_path
28
+ return None
29
+
30
+ def build_pascal_xml(
31
+ self, img_filename: str, img_size: tuple, objects: List[dict], has_segmentation: bool = None
32
+ ) -> ET.ElementTree:
33
+ """Build a Pascal VOC XML tree from image filename, size, and objects."""
34
+ folder = 'Images'
35
+ width, height, depth = img_size
36
+ annotation = ET.Element('annotation')
37
+ ET.SubElement(annotation, 'folder').text = folder
38
+ ET.SubElement(annotation, 'filename').text = img_filename
39
+ ET.SubElement(annotation, 'path').text = img_filename
40
+ source = ET.SubElement(annotation, 'source')
41
+ ET.SubElement(source, 'database').text = 'Unknown'
42
+ size = ET.SubElement(annotation, 'size')
43
+ ET.SubElement(size, 'width').text = str(width)
44
+ ET.SubElement(size, 'height').text = str(height)
45
+ ET.SubElement(size, 'depth').text = str(depth)
46
+
47
+ # Set segmented to 1 if there are any segmentation objects, 0 otherwise
48
+ if has_segmentation is None:
49
+ has_segmentation = any(obj.get('has_segmentation', False) for obj in objects)
50
+ ET.SubElement(annotation, 'segmented').text = '1' if has_segmentation else '0'
51
+
52
+ for obj in objects:
53
+ obj_elem = ET.SubElement(annotation, 'object')
54
+ ET.SubElement(obj_elem, 'name').text = obj['name']
55
+ ET.SubElement(obj_elem, 'pose').text = 'Unspecified'
56
+ ET.SubElement(obj_elem, 'truncated').text = '0'
57
+ ET.SubElement(obj_elem, 'difficult').text = '0'
58
+ bndbox = ET.SubElement(obj_elem, 'bndbox')
59
+ ET.SubElement(bndbox, 'xmin').text = str(obj['xmin'])
60
+ ET.SubElement(bndbox, 'ymin').text = str(obj['ymin'])
61
+ ET.SubElement(bndbox, 'xmax').text = str(obj['xmax'])
62
+ ET.SubElement(bndbox, 'ymax').text = str(obj['ymax'])
63
+ return ET.ElementTree(annotation)
64
+
65
+ def parse_dm_annotations(self, annotation: dict):
66
+ """Parse DM annotations and convert to Pascal VOC format."""
67
+ objects = []
68
+ has_segmentation = 'segmentation' in annotation
69
+
70
+ # Only include bounding_box (Pascal VOC does not support polyline/keypoint by default)
71
+ if 'bounding_box' in annotation:
72
+ for box in annotation['bounding_box']:
73
+ class_name = box['classification']
74
+ x, y, w, h = box['data']
75
+ xmin = int(round(x))
76
+ ymin = int(round(y))
77
+ xmax = int(round(x + w))
78
+ ymax = int(round(y + h))
79
+ objects.append({
80
+ 'name': class_name,
81
+ 'xmin': xmin,
82
+ 'ymin': ymin,
83
+ 'xmax': xmax,
84
+ 'ymax': ymax,
85
+ 'has_segmentation': has_segmentation,
86
+ })
87
+ self.class_names.add(class_name)
88
+
89
+ # polyline, keypoint 등은 무시
90
+ return objects, has_segmentation
91
+
92
+ def _convert_split_dir(self, split_dir: str, split_name: str):
93
+ """Convert a split dir (train/valid/test) to list of (xml_tree, xml_filename, img_src, img_name)."""
94
+ json_dir = os.path.join(split_dir, 'json')
95
+ img_dir = os.path.join(split_dir, 'original_files')
96
+ results = []
97
+ for jfile in glob(os.path.join(json_dir, '*.json')):
98
+ base = os.path.splitext(os.path.basename(jfile))[0]
99
+ img_path = self.find_image_for_base(img_dir, base)
100
+ if not img_path:
101
+ print(f'[{split_name}] Image for {base} not found, skipping.')
102
+ continue
103
+ with open(jfile, encoding='utf-8') as jf:
104
+ data = json.load(jf)
105
+ img_ann = data['images'][0]
106
+ with Image.open(img_path) as img:
107
+ width, height = img.size
108
+ depth = len(img.getbands())
109
+ objects, has_segmentation = self.parse_dm_annotations(img_ann)
110
+ xml_tree = self.build_pascal_xml(
111
+ os.path.basename(img_path), (width, height, depth), objects, has_segmentation
112
+ )
113
+ xml_filename = base + '.xml'
114
+ results.append((xml_tree, xml_filename, img_path, os.path.basename(img_path)))
115
+ return results
116
+
117
+ def _convert_root_dir(self):
118
+ """Convert non-categorized dataset to list of (xml_tree, xml_filename, img_src, img_name)."""
119
+ json_dir = os.path.join(self.root_dir, 'json')
120
+ img_dir = os.path.join(self.root_dir, 'original_files')
121
+ results = []
122
+ for jfile in glob(os.path.join(json_dir, '*.json')):
123
+ base = os.path.splitext(os.path.basename(jfile))[0]
124
+ img_path = self.find_image_for_base(img_dir, base)
125
+ if not img_path:
126
+ print(f'[Pascal] Image for {base} not found, skipping.')
127
+ continue
128
+ with open(jfile, encoding='utf-8') as jf:
129
+ data = json.load(jf)
130
+ img_ann = data['images'][0]
131
+ with Image.open(img_path) as img:
132
+ width, height = img.size
133
+ depth = len(img.getbands())
134
+ objects, has_segmentation = self.parse_dm_annotations(img_ann)
135
+ xml_tree = self.build_pascal_xml(
136
+ os.path.basename(img_path), (width, height, depth), objects, has_segmentation
137
+ )
138
+ xml_filename = base + '.xml'
139
+ results.append((xml_tree, xml_filename, img_path, os.path.basename(img_path)))
140
+ return results
141
+
142
+ def convert(self) -> Any:
143
+ """Converts DM format to Pascal VOC format.
144
+
145
+ Returns:
146
+ - If categorized: dict {split: list of (xml_tree, xml_filename, img_src, img_name)}
147
+ - If not: list of (xml_tree, xml_filename, img_src, img_name)
148
+ """
149
+ self.class_names = set()
150
+ if self.is_categorized_dataset:
151
+ splits = self._validate_splits(['train', 'valid'], ['test'])
152
+ result = {}
153
+ for split, split_dir in splits.items():
154
+ result[split] = self._convert_split_dir(split_dir, split)
155
+ self.converted_data = result
156
+ return result
157
+ else:
158
+ self._validate_splits([], [])
159
+ result = self._convert_root_dir()
160
+ self.converted_data = result
161
+ return result
162
+
163
+ def save_to_folder(self, output_dir: Optional[str] = None):
164
+ """Save all Pascal VOC XML/Images to output_dir (Annotations, Images).
165
+ - If categorized: per split under output_dir/{split}/{Annotations, Images}
166
+ - If not: directly under output_dir/{Annotations, Images}
167
+ """
168
+ outdir = output_dir or self.root_dir
169
+ self.ensure_dir(outdir)
170
+ if self.converted_data is None:
171
+ self.converted_data = self.convert()
172
+
173
+ if self.is_categorized_dataset:
174
+ for split, entries in self.converted_data.items():
175
+ ann_dir = os.path.join(outdir, split, 'Annotations')
176
+ img_dir = os.path.join(outdir, split, 'Images')
177
+ os.makedirs(ann_dir, exist_ok=True)
178
+ os.makedirs(img_dir, exist_ok=True)
179
+ for xml_tree, xml_filename, img_src, img_name in entries:
180
+ xml_tree.write(os.path.join(ann_dir, xml_filename), encoding='utf-8', xml_declaration=True)
181
+ dst_path = os.path.join(img_dir, img_name)
182
+ if os.path.abspath(img_src) != os.path.abspath(dst_path):
183
+ shutil.copy(img_src, dst_path)
184
+ else:
185
+ ann_dir = os.path.join(outdir, 'Annotations')
186
+ img_dir = os.path.join(outdir, 'Images')
187
+ os.makedirs(ann_dir, exist_ok=True)
188
+ os.makedirs(img_dir, exist_ok=True)
189
+ for xml_tree, xml_filename, img_src, img_name in self.converted_data:
190
+ xml_tree.write(os.path.join(ann_dir, xml_filename), encoding='utf-8', xml_declaration=True)
191
+ dst_path = os.path.join(img_dir, img_name)
192
+ if os.path.abspath(img_src) != os.path.abspath(dst_path):
193
+ shutil.copy(img_src, dst_path)
194
+ # Save classes.txt
195
+ with open(os.path.join(outdir, 'classes.txt'), 'w', encoding='utf-8') as f:
196
+ for c in sorted(self.class_names):
197
+ f.write(f'{c}\n')
198
+ print(f'Pascal VOC data exported to {outdir}')
199
+
200
+ def convert_single_file(self, data: Dict[str, Any], original_file: IO) -> Dict[str, Any]:
201
+ """Convert a single DM data dict and corresponding image file object to Pascal VOC format.
202
+
203
+ Args:
204
+ data: DM format data dictionary (JSON content)
205
+ original_file: File object for the corresponding original image
206
+
207
+ Returns:
208
+ Dictionary containing Pascal VOC format data for the single file
209
+ """
210
+ if not self.is_single_conversion:
211
+ raise RuntimeError('convert_single_file is only available when is_single_conversion=True')
212
+
213
+ # Extract image info from file object
214
+ with Image.open(original_file) as img:
215
+ width, height = img.size
216
+ depth = len(img.getbands())
217
+
218
+ # Get filename from original_file
219
+ img_filename = getattr(original_file, 'name', 'image.jpg')
220
+ if img_filename:
221
+ img_filename = os.path.basename(img_filename)
222
+
223
+ # Process annotations from the first (and only) image in data
224
+ if 'images' in data and len(data['images']) > 0:
225
+ img_ann = data['images'][0]
226
+ objects, has_segmentation = self.parse_dm_annotations(img_ann)
227
+ else:
228
+ objects = []
229
+ has_segmentation = False
230
+
231
+ # Build Pascal VOC XML
232
+ xml_tree = self.build_pascal_xml(img_filename, (width, height, depth), objects, has_segmentation)
233
+ xml_filename = os.path.splitext(img_filename)[0] + '.xml'
234
+
235
+ # Convert XML tree to string for easy viewing
236
+ xml_string = ET.tostring(xml_tree.getroot(), encoding='unicode', xml_declaration=True)
237
+
238
+ return {
239
+ 'xml_tree': xml_tree,
240
+ 'xml_content': xml_string,
241
+ 'xml_filename': xml_filename,
242
+ 'image_filename': img_filename,
243
+ 'class_names': sorted(list(self.class_names)),
244
+ }