synapse-sdk 1.0.0a23__py3-none-any.whl → 2025.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. synapse_sdk/__init__.py +24 -0
  2. synapse_sdk/cli/__init__.py +310 -5
  3. synapse_sdk/cli/alias/__init__.py +22 -0
  4. synapse_sdk/cli/alias/create.py +36 -0
  5. synapse_sdk/cli/alias/dataclass.py +31 -0
  6. synapse_sdk/cli/alias/default.py +16 -0
  7. synapse_sdk/cli/alias/delete.py +15 -0
  8. synapse_sdk/cli/alias/list.py +19 -0
  9. synapse_sdk/cli/alias/read.py +15 -0
  10. synapse_sdk/cli/alias/update.py +17 -0
  11. synapse_sdk/cli/alias/utils.py +61 -0
  12. synapse_sdk/cli/code_server.py +687 -0
  13. synapse_sdk/cli/config.py +440 -0
  14. synapse_sdk/cli/devtools.py +90 -0
  15. synapse_sdk/cli/plugin/__init__.py +33 -0
  16. synapse_sdk/cli/{create_plugin.py → plugin/create.py} +2 -2
  17. synapse_sdk/{plugins/cli → cli/plugin}/publish.py +23 -15
  18. synapse_sdk/clients/agent/__init__.py +9 -3
  19. synapse_sdk/clients/agent/container.py +143 -0
  20. synapse_sdk/clients/agent/core.py +19 -0
  21. synapse_sdk/clients/agent/ray.py +298 -9
  22. synapse_sdk/clients/backend/__init__.py +30 -12
  23. synapse_sdk/clients/backend/annotation.py +13 -5
  24. synapse_sdk/clients/backend/core.py +31 -4
  25. synapse_sdk/clients/backend/data_collection.py +186 -0
  26. synapse_sdk/clients/backend/hitl.py +17 -0
  27. synapse_sdk/clients/backend/integration.py +16 -1
  28. synapse_sdk/clients/backend/ml.py +5 -1
  29. synapse_sdk/clients/backend/models.py +78 -0
  30. synapse_sdk/clients/base.py +384 -41
  31. synapse_sdk/clients/ray/serve.py +2 -0
  32. synapse_sdk/clients/validators/collections.py +31 -0
  33. synapse_sdk/devtools/config.py +94 -0
  34. synapse_sdk/devtools/server.py +41 -0
  35. synapse_sdk/devtools/streamlit_app/__init__.py +5 -0
  36. synapse_sdk/devtools/streamlit_app/app.py +128 -0
  37. synapse_sdk/devtools/streamlit_app/services/__init__.py +11 -0
  38. synapse_sdk/devtools/streamlit_app/services/job_service.py +233 -0
  39. synapse_sdk/devtools/streamlit_app/services/plugin_service.py +236 -0
  40. synapse_sdk/devtools/streamlit_app/services/serve_service.py +95 -0
  41. synapse_sdk/devtools/streamlit_app/ui/__init__.py +15 -0
  42. synapse_sdk/devtools/streamlit_app/ui/config_tab.py +76 -0
  43. synapse_sdk/devtools/streamlit_app/ui/deployment_tab.py +66 -0
  44. synapse_sdk/devtools/streamlit_app/ui/http_tab.py +125 -0
  45. synapse_sdk/devtools/streamlit_app/ui/jobs_tab.py +573 -0
  46. synapse_sdk/devtools/streamlit_app/ui/serve_tab.py +346 -0
  47. synapse_sdk/devtools/streamlit_app/ui/status_bar.py +118 -0
  48. synapse_sdk/devtools/streamlit_app/utils/__init__.py +40 -0
  49. synapse_sdk/devtools/streamlit_app/utils/json_viewer.py +197 -0
  50. synapse_sdk/devtools/streamlit_app/utils/log_formatter.py +38 -0
  51. synapse_sdk/devtools/streamlit_app/utils/styles.py +241 -0
  52. synapse_sdk/devtools/streamlit_app/utils/ui_components.py +289 -0
  53. synapse_sdk/devtools/streamlit_app.py +10 -0
  54. synapse_sdk/loggers.py +120 -9
  55. synapse_sdk/plugins/README.md +1340 -0
  56. synapse_sdk/plugins/__init__.py +0 -13
  57. synapse_sdk/plugins/categories/base.py +117 -11
  58. synapse_sdk/plugins/categories/data_validation/actions/validation.py +72 -0
  59. synapse_sdk/plugins/categories/data_validation/templates/plugin/validation.py +33 -5
  60. synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
  61. synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
  62. synapse_sdk/plugins/categories/export/actions/export/action.py +165 -0
  63. synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
  64. synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
  65. synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
  66. synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
  67. synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
  68. synapse_sdk/plugins/categories/export/templates/config.yaml +21 -0
  69. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +390 -0
  70. synapse_sdk/plugins/categories/export/templates/plugin/export.py +160 -0
  71. synapse_sdk/plugins/categories/neural_net/actions/deployment.py +13 -12
  72. synapse_sdk/plugins/categories/neural_net/actions/train.py +1134 -31
  73. synapse_sdk/plugins/categories/neural_net/actions/tune.py +534 -0
  74. synapse_sdk/plugins/categories/neural_net/base/inference.py +1 -1
  75. synapse_sdk/plugins/categories/neural_net/templates/config.yaml +32 -4
  76. synapse_sdk/plugins/categories/neural_net/templates/plugin/inference.py +26 -10
  77. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  78. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  79. synapse_sdk/plugins/categories/{export/actions/export.py → pre_annotation/actions/pre_annotation/action.py} +4 -4
  80. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  81. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +148 -0
  82. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  83. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  84. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  85. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +100 -0
  86. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +248 -0
  87. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  88. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  89. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +265 -0
  90. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  91. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  92. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +92 -0
  93. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +243 -0
  94. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  95. synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml +19 -0
  96. synapse_sdk/plugins/categories/pre_annotation/templates/plugin/to_task.py +40 -0
  97. synapse_sdk/plugins/categories/smart_tool/templates/config.yaml +2 -0
  98. synapse_sdk/plugins/categories/upload/__init__.py +0 -0
  99. synapse_sdk/plugins/categories/upload/actions/__init__.py +0 -0
  100. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +19 -0
  101. synapse_sdk/plugins/categories/upload/actions/upload/action.py +236 -0
  102. synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
  103. synapse_sdk/plugins/categories/upload/actions/upload/enums.py +493 -0
  104. synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
  105. synapse_sdk/plugins/categories/upload/actions/upload/factory.py +138 -0
  106. synapse_sdk/plugins/categories/upload/actions/upload/models.py +214 -0
  107. synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +183 -0
  108. synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
  109. synapse_sdk/plugins/categories/upload/actions/upload/run.py +179 -0
  110. synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
  111. synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +107 -0
  112. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
  113. synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +63 -0
  114. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +91 -0
  115. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +82 -0
  116. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +235 -0
  117. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +201 -0
  118. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +104 -0
  119. synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +71 -0
  120. synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
  121. synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +82 -0
  122. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
  123. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
  124. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +29 -0
  125. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
  126. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +300 -0
  127. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +287 -0
  128. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
  129. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
  130. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
  131. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
  132. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +84 -0
  133. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
  134. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +60 -0
  135. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +250 -0
  136. synapse_sdk/plugins/categories/upload/templates/README.md +470 -0
  137. synapse_sdk/plugins/categories/upload/templates/config.yaml +33 -0
  138. synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +310 -0
  139. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +102 -0
  140. synapse_sdk/plugins/enums.py +3 -1
  141. synapse_sdk/plugins/models.py +148 -11
  142. synapse_sdk/plugins/templates/plugin-config-schema.json +406 -0
  143. synapse_sdk/plugins/templates/schema.json +491 -0
  144. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/config.yaml +1 -0
  145. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/requirements.txt +1 -1
  146. synapse_sdk/plugins/utils/__init__.py +46 -0
  147. synapse_sdk/plugins/utils/actions.py +119 -0
  148. synapse_sdk/plugins/utils/config.py +203 -0
  149. synapse_sdk/plugins/{utils.py → utils/legacy.py} +26 -46
  150. synapse_sdk/plugins/utils/ray_gcs.py +66 -0
  151. synapse_sdk/plugins/utils/registry.py +58 -0
  152. synapse_sdk/shared/__init__.py +25 -0
  153. synapse_sdk/shared/enums.py +93 -0
  154. synapse_sdk/types.py +19 -0
  155. synapse_sdk/utils/converters/__init__.py +240 -0
  156. synapse_sdk/utils/converters/coco/__init__.py +0 -0
  157. synapse_sdk/utils/converters/coco/from_dm.py +322 -0
  158. synapse_sdk/utils/converters/coco/to_dm.py +215 -0
  159. synapse_sdk/utils/converters/dm/__init__.py +57 -0
  160. synapse_sdk/utils/converters/dm/base.py +137 -0
  161. synapse_sdk/utils/converters/dm/from_v1.py +273 -0
  162. synapse_sdk/utils/converters/dm/to_v1.py +321 -0
  163. synapse_sdk/utils/converters/dm/tools/__init__.py +214 -0
  164. synapse_sdk/utils/converters/dm/tools/answer.py +95 -0
  165. synapse_sdk/utils/converters/dm/tools/bounding_box.py +132 -0
  166. synapse_sdk/utils/converters/dm/tools/bounding_box_3d.py +121 -0
  167. synapse_sdk/utils/converters/dm/tools/classification.py +75 -0
  168. synapse_sdk/utils/converters/dm/tools/keypoint.py +117 -0
  169. synapse_sdk/utils/converters/dm/tools/named_entity.py +111 -0
  170. synapse_sdk/utils/converters/dm/tools/polygon.py +122 -0
  171. synapse_sdk/utils/converters/dm/tools/polyline.py +124 -0
  172. synapse_sdk/utils/converters/dm/tools/prompt.py +94 -0
  173. synapse_sdk/utils/converters/dm/tools/relation.py +86 -0
  174. synapse_sdk/utils/converters/dm/tools/segmentation.py +141 -0
  175. synapse_sdk/utils/converters/dm/tools/segmentation_3d.py +83 -0
  176. synapse_sdk/utils/converters/dm/types.py +168 -0
  177. synapse_sdk/utils/converters/dm/utils.py +162 -0
  178. synapse_sdk/utils/converters/dm_legacy/__init__.py +56 -0
  179. synapse_sdk/utils/converters/dm_legacy/from_v1.py +627 -0
  180. synapse_sdk/utils/converters/dm_legacy/to_v1.py +367 -0
  181. synapse_sdk/utils/converters/pascal/__init__.py +0 -0
  182. synapse_sdk/utils/converters/pascal/from_dm.py +244 -0
  183. synapse_sdk/utils/converters/pascal/to_dm.py +214 -0
  184. synapse_sdk/utils/converters/yolo/__init__.py +0 -0
  185. synapse_sdk/utils/converters/yolo/from_dm.py +384 -0
  186. synapse_sdk/utils/converters/yolo/to_dm.py +267 -0
  187. synapse_sdk/utils/dataset.py +46 -0
  188. synapse_sdk/utils/encryption.py +158 -0
  189. synapse_sdk/utils/file/__init__.py +58 -0
  190. synapse_sdk/utils/file/archive.py +32 -0
  191. synapse_sdk/utils/file/checksum.py +56 -0
  192. synapse_sdk/utils/file/chunking.py +31 -0
  193. synapse_sdk/utils/file/download.py +385 -0
  194. synapse_sdk/utils/file/encoding.py +40 -0
  195. synapse_sdk/utils/file/io.py +22 -0
  196. synapse_sdk/utils/file/upload.py +165 -0
  197. synapse_sdk/utils/file/video/__init__.py +29 -0
  198. synapse_sdk/utils/file/video/transcode.py +307 -0
  199. synapse_sdk/utils/file.py.backup +301 -0
  200. synapse_sdk/utils/http.py +138 -0
  201. synapse_sdk/utils/network.py +309 -0
  202. synapse_sdk/utils/storage/__init__.py +72 -0
  203. synapse_sdk/utils/storage/providers/__init__.py +183 -0
  204. synapse_sdk/utils/storage/providers/file_system.py +134 -0
  205. synapse_sdk/utils/storage/providers/gcp.py +13 -0
  206. synapse_sdk/utils/storage/providers/http.py +190 -0
  207. synapse_sdk/utils/storage/providers/s3.py +91 -0
  208. synapse_sdk/utils/storage/providers/sftp.py +47 -0
  209. synapse_sdk/utils/storage/registry.py +17 -0
  210. synapse_sdk-2025.12.3.dist-info/METADATA +123 -0
  211. synapse_sdk-2025.12.3.dist-info/RECORD +279 -0
  212. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info}/WHEEL +1 -1
  213. synapse_sdk/clients/backend/dataset.py +0 -51
  214. synapse_sdk/plugins/categories/import/actions/import.py +0 -10
  215. synapse_sdk/plugins/cli/__init__.py +0 -21
  216. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env +0 -24
  217. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env.dist +0 -24
  218. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/main.py +0 -4
  219. synapse_sdk/utils/file.py +0 -168
  220. synapse_sdk/utils/storage.py +0 -91
  221. synapse_sdk-1.0.0a23.dist-info/METADATA +0 -44
  222. synapse_sdk-1.0.0a23.dist-info/RECORD +0 -114
  223. /synapse_sdk/{plugins/cli → cli/plugin}/run.py +0 -0
  224. /synapse_sdk/{plugins/categories/import → clients/validators}/__init__.py +0 -0
  225. /synapse_sdk/{plugins/categories/import/actions → devtools}/__init__.py +0 -0
  226. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info}/entry_points.txt +0 -0
  227. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info/licenses}/LICENSE +0 -0
  228. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,214 @@
1
+ import os
2
+ import xml.etree.ElementTree as ET
3
+ from typing import IO, Any, Dict, List, Optional, Tuple
4
+
5
+ from PIL import Image
6
+
7
+ from synapse_sdk.utils.converters import ToDMConverter
8
+
9
+
10
+ class PascalToDMConverter(ToDMConverter):
11
+ """Convert Pascal VOC formatted datasets to DM format."""
12
+
13
+ IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp']
14
+
15
+ def __init__(self, root_dir: str = None, is_categorized_dataset: bool = False, is_single_conversion: bool = False):
16
+ super().__init__(root_dir, is_categorized_dataset, is_single_conversion)
17
+
18
+ def convert(self):
19
+ """Convert the Pascal VOC dataset to DM format."""
20
+ if self.is_categorized_dataset:
21
+ splits = self._validate_splits(['train', 'valid'], ['test'])
22
+ all_split_data = {}
23
+ for split, split_dir in splits.items():
24
+ split_data = self._convert_pascal_split_to_dm(split_dir)
25
+ all_split_data[split] = split_data
26
+ self.converted_data = all_split_data
27
+ return all_split_data
28
+ else:
29
+ split_data = self._convert_pascal_split_to_dm(self.root_dir)
30
+ self.converted_data = split_data
31
+ return split_data
32
+
33
+ def _find_image_path(self, images_dir: str, filename: str) -> Optional[str]:
34
+ """Find the image file in the specified directory."""
35
+ img_path = os.path.join(images_dir, filename)
36
+ if os.path.exists(img_path):
37
+ return img_path
38
+ base = os.path.splitext(filename)[0]
39
+ for ext in self.IMG_EXTENSIONS:
40
+ img_path = os.path.join(images_dir, base + ext)
41
+ if os.path.exists(img_path):
42
+ return img_path
43
+ return None
44
+
45
+ @staticmethod
46
+ def _get_image_size(image_path: str) -> Tuple[int, int]:
47
+ """Get the size of the image."""
48
+ with Image.open(image_path) as img:
49
+ return img.size
50
+
51
+ def _parse_pascal_xml(self, xml_path: str) -> Tuple[str, List[Dict[str, Any]]]:
52
+ """Parse a Pascal VOC XML file and return the filename and objects."""
53
+ tree = ET.parse(xml_path)
54
+ root = tree.getroot()
55
+ filename_elem = root.find('filename')
56
+ filename = filename_elem.text if filename_elem is not None else None
57
+ objects = []
58
+ for obj in root.findall('object'):
59
+ name_elem = obj.find('name')
60
+ bndbox_elem = obj.find('bndbox')
61
+ if name_elem is None or bndbox_elem is None:
62
+ continue
63
+ class_name = name_elem.text
64
+ xmin_elem = bndbox_elem.find('xmin')
65
+ ymin_elem = bndbox_elem.find('ymin')
66
+ xmax_elem = bndbox_elem.find('xmax')
67
+ ymax_elem = bndbox_elem.find('ymax')
68
+ if any(elem is None for elem in [xmin_elem, ymin_elem, xmax_elem, ymax_elem]):
69
+ continue
70
+ xmin = int(float(xmin_elem.text))
71
+ ymin = int(float(ymin_elem.text))
72
+ xmax = int(float(xmax_elem.text))
73
+ ymax = int(float(ymax_elem.text))
74
+ width = xmax - xmin
75
+ height = ymax - ymin
76
+ objects.append({'classification': class_name, 'data': [xmin, ymin, width, height]})
77
+ return filename, objects
78
+
79
+ def _convert_pascal_split_to_dm(self, split_dir: str) -> Dict[str, Any]:
80
+ """Convert a single Pascal VOC split directory to DM format."""
81
+ annotations_dir = None
82
+ for candidate in ['Annotations', 'annotations']:
83
+ candidate_path = os.path.join(split_dir, candidate)
84
+ if os.path.isdir(candidate_path):
85
+ annotations_dir = candidate_path
86
+ break
87
+ if annotations_dir is None:
88
+ raise FileNotFoundError(
89
+ f"No annotations directory found in {split_dir} (tried 'Annotations', 'annotations')."
90
+ )
91
+ images_dir = None
92
+ for candidate in ['Images', 'images', 'JPEGImages']:
93
+ candidate_path = os.path.join(split_dir, candidate)
94
+ if os.path.isdir(candidate_path):
95
+ images_dir = candidate_path
96
+ break
97
+ if images_dir is None:
98
+ raise FileNotFoundError(
99
+ f"No images directory found in {split_dir} (tried 'Images', 'images', 'JPEGImages')."
100
+ )
101
+ result = {}
102
+ for xml_filename in os.listdir(annotations_dir):
103
+ if not xml_filename.endswith('.xml'):
104
+ continue
105
+ xml_path = os.path.join(annotations_dir, xml_filename)
106
+ try:
107
+ filename, objects = self._parse_pascal_xml(xml_path)
108
+ if filename is None:
109
+ print(f'[WARNING] No filename found in {xml_filename}, skipping.')
110
+ continue
111
+ img_path = self._find_image_path(images_dir, filename)
112
+ if img_path is None:
113
+ print(f'[WARNING] Image not found for {filename}, skipping.')
114
+ continue
115
+ # Prepare DM annotation structure
116
+ dm_img = {
117
+ 'bounding_box': [],
118
+ 'polygon': [],
119
+ 'keypoint': [],
120
+ 'relation': [],
121
+ 'group': [],
122
+ }
123
+ for obj in objects:
124
+ dm_img['bounding_box'].append({
125
+ 'id': self._generate_unique_id(),
126
+ 'classification': obj['classification'],
127
+ 'attrs': [],
128
+ 'data': obj['data'],
129
+ })
130
+ dm_json = {'images': [dm_img]}
131
+ result[os.path.basename(img_path)] = (dm_json, img_path)
132
+ except ET.ParseError as e:
133
+ print(f'[WARNING] Failed to parse {xml_filename}: {e}, skipping.')
134
+ continue
135
+ except Exception as e:
136
+ print(f'[WARNING] Error processing {xml_filename}: {e}, skipping.')
137
+ continue
138
+ return result
139
+
140
+ def convert_single_file(self, data: str, original_file: IO) -> Dict[str, Any]:
141
+ """Convert a single Pascal VOC XML data and corresponding image to DM format.
142
+
143
+ Args:
144
+ data: Pascal VOC XML content as string
145
+ original_file: File object for the corresponding original image
146
+
147
+ Returns:
148
+ Dictionary containing DM format data for the single file
149
+ """
150
+ if not self.is_single_conversion:
151
+ raise RuntimeError('convert_single_file is only available when is_single_conversion=True')
152
+
153
+ # Get filename from original_file
154
+ img_path = getattr(original_file, 'name', None)
155
+ if not img_path:
156
+ raise ValueError('original_file must have a "name" attribute representing its path or filename.')
157
+
158
+ img_filename = os.path.basename(img_path)
159
+
160
+ # Parse XML data from string
161
+ try:
162
+ root = ET.fromstring(data)
163
+ except ET.ParseError as e:
164
+ raise ValueError(f'Failed to parse Pascal VOC XML data: {e}')
165
+
166
+ # Extract objects from XML
167
+ objects = []
168
+ for obj in root.findall('object'):
169
+ name_elem = obj.find('name')
170
+ bndbox_elem = obj.find('bndbox')
171
+ if name_elem is None or bndbox_elem is None:
172
+ continue
173
+
174
+ class_name = name_elem.text
175
+ xmin_elem = bndbox_elem.find('xmin')
176
+ ymin_elem = bndbox_elem.find('ymin')
177
+ xmax_elem = bndbox_elem.find('xmax')
178
+ ymax_elem = bndbox_elem.find('ymax')
179
+
180
+ if any(elem is None for elem in [xmin_elem, ymin_elem, xmax_elem, ymax_elem]):
181
+ continue
182
+
183
+ xmin = int(float(xmin_elem.text))
184
+ ymin = int(float(ymin_elem.text))
185
+ xmax = int(float(xmax_elem.text))
186
+ ymax = int(float(ymax_elem.text))
187
+ width = xmax - xmin
188
+ height = ymax - ymin
189
+
190
+ objects.append({'classification': class_name, 'data': [xmin, ymin, width, height]})
191
+
192
+ # Prepare DM annotation structure
193
+ dm_img = {
194
+ 'bounding_box': [],
195
+ 'polygon': [],
196
+ 'keypoint': [],
197
+ 'relation': [],
198
+ 'group': [],
199
+ }
200
+
201
+ for obj in objects:
202
+ dm_img['bounding_box'].append({
203
+ 'id': self._generate_unique_id(),
204
+ 'classification': obj['classification'],
205
+ 'attrs': [],
206
+ 'data': obj['data'],
207
+ })
208
+
209
+ dm_json = {'images': [dm_img]}
210
+ return {
211
+ 'dm_json': dm_json,
212
+ 'image_path': img_path,
213
+ 'image_name': img_filename,
214
+ }
File without changes
@@ -0,0 +1,384 @@
1
+ import json
2
+ import os
3
+ import shutil
4
+ from glob import glob
5
+ from typing import IO, Any, Dict, List, Optional, Union
6
+
7
+ from PIL import Image
8
+
9
+ from synapse_sdk.utils.converters import FromDMConverter
10
+
11
+
12
+ class FromDMToYOLOConverter(FromDMConverter):
13
+ """Convert DM dataset format to YOLO format."""
14
+
15
+ IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp']
16
+
17
+ def __init__(self, root_dir: str = None, is_categorized_dataset: bool = False, is_single_conversion: bool = False):
18
+ super().__init__(root_dir, is_categorized_dataset, is_single_conversion)
19
+ self.class_names: List[str] = []
20
+ self.class_map: Dict[str, int] = {}
21
+ self.dataset_yaml_content: str = ''
22
+
23
+ @staticmethod
24
+ def get_all_classes(list_of_dirs: List[str]) -> List[str]:
25
+ """Collect all unique class names from all splits or the root."""
26
+ classes = set()
27
+ for d in list_of_dirs:
28
+ if not d or not os.path.isdir(d):
29
+ continue
30
+ json_dir = os.path.join(d, 'json') if os.path.isdir(os.path.join(d, 'json')) else d
31
+ for jfile in glob(os.path.join(json_dir, '*.json')):
32
+ with open(jfile, encoding='utf-8') as jf:
33
+ data = json.load(jf)
34
+ for img_ann in data['images']:
35
+ for k in ['bounding_box', 'polygon', 'keypoint']:
36
+ if k in img_ann:
37
+ for ann in img_ann[k]:
38
+ classes.add(ann['classification'])
39
+ return sorted(list(classes))
40
+
41
+ @staticmethod
42
+ def get_image_size(image_path: str):
43
+ with Image.open(image_path) as img:
44
+ return img.size
45
+
46
+ @staticmethod
47
+ def polygon_to_bbox(polygon: list):
48
+ """Convert polygon points to bounding box [cx, cy, w, h]."""
49
+ if not polygon or len(polygon) == 0:
50
+ return None
51
+ xs = [p[0] for p in polygon]
52
+ ys = [p[1] for p in polygon]
53
+ x_min, y_min = min(xs), min(ys)
54
+ x_max, y_max = max(xs), max(ys)
55
+ cx = (x_min + x_max) / 2
56
+ cy = (y_min + y_max) / 2
57
+ w = x_max - x_min
58
+ h = y_max - y_min
59
+ return [cx, cy, w, h]
60
+
61
+ @staticmethod
62
+ def polygon_to_yolo_string(polygon: list, width: int, height: int):
63
+ """Convert polygon points to normalized YOLO polygon format string (x1 y1 x2 y2 ...)."""
64
+ if not polygon or len(polygon) == 0:
65
+ return ''
66
+
67
+ coords = []
68
+ for point in polygon:
69
+ x, y = point
70
+ # Normalize coordinates to 0-1 range
71
+ x_norm = x / width
72
+ y_norm = y / height
73
+ coords.extend([f'{x_norm:.6f}', f'{y_norm:.6f}'])
74
+
75
+ return ' '.join(coords)
76
+
77
+ @staticmethod
78
+ def keypoints_to_yolo_string(keypoints: list, width: int, height: int):
79
+ """Convert keypoints to normalized YOLO keypoint format string (x1 y1 v1 x2 y2 v2 ...)."""
80
+ kp_strs = []
81
+ for kp in keypoints:
82
+ # kp: [x, y, visible]
83
+ x, y, v = kp
84
+ x = x / width
85
+ y = y / height
86
+ kp_strs.extend([f'{x:.6f}', f'{y:.6f}', str(v)])
87
+ return ' '.join(kp_strs)
88
+
89
+ def _convert_split_dir(self, split_dir: str, split_name: str) -> List[Dict[str, Any]]:
90
+ """Convert one split folder to YOLO format."""
91
+ if not self.class_map:
92
+ raise ValueError('class_map is not initialized. Ensure get_all_classes() is called before this method.')
93
+
94
+ json_dir = os.path.join(split_dir, 'json')
95
+ img_dir = os.path.join(split_dir, 'original_files')
96
+ entries = []
97
+ for jfile in glob(os.path.join(json_dir, '*.json')):
98
+ base = os.path.splitext(os.path.basename(jfile))[0]
99
+ found_img = None
100
+ for ext in self.IMG_EXTENSIONS:
101
+ img_path = os.path.join(img_dir, base + ext)
102
+ if os.path.exists(img_path):
103
+ found_img = img_path
104
+ break
105
+ if not found_img:
106
+ print(f'[{split_name}] Image for {base} not found, skipping.')
107
+ continue
108
+ width, height = self.get_image_size(found_img)
109
+ with open(jfile, encoding='utf-8') as jf:
110
+ data = json.load(jf)
111
+ img_ann = data['images'][0]
112
+ label_lines = []
113
+
114
+ # bbox
115
+ if 'bounding_box' in img_ann:
116
+ for box in img_ann['bounding_box']:
117
+ cidx = self.class_map[box['classification']]
118
+ x, y, w, h = box['data']
119
+ cx = x + w / 2
120
+ cy = y + h / 2
121
+ cx /= width
122
+ cy /= height
123
+ w /= width
124
+ h /= height
125
+ label_lines.append(f'{cidx} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}')
126
+
127
+ # polygon
128
+ if 'polygon' in img_ann:
129
+ for poly in img_ann['polygon']:
130
+ cidx = self.class_map[poly['classification']]
131
+ poly_str = self.polygon_to_yolo_string(poly['data'], width, height)
132
+ if poly_str: # Only add if polygon is valid
133
+ label_lines.append(f'{cidx} {poly_str}')
134
+ else:
135
+ print(f'[{split_name}] Polygon for {base} is empty, skipping this polygon.')
136
+
137
+ # keypoint
138
+ if 'keypoint' in img_ann:
139
+ for kp in img_ann['keypoint']:
140
+ cidx = self.class_map[kp['classification']]
141
+ # Assume bounding box exists for keypoint, or fallback to full image
142
+ if 'bounding_box' in kp:
143
+ x, y, w, h = kp['bounding_box']
144
+ cx = x + w / 2
145
+ cy = y + h / 2
146
+ cx /= width
147
+ cy /= height
148
+ w /= width
149
+ h /= height
150
+ else:
151
+ # fallback to the whole image
152
+ cx, cy, w, h = 0.5, 0.5, 1.0, 1.0
153
+ kp_str = self.keypoints_to_yolo_string(kp['data'], width, height)
154
+ label_lines.append(f'{cidx} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f} {kp_str}')
155
+
156
+ entries.append({
157
+ 'img_path': found_img,
158
+ 'img_name': os.path.basename(found_img),
159
+ 'label_name': base + '.txt',
160
+ 'label_lines': label_lines,
161
+ })
162
+ return entries
163
+
164
+ def _convert_root_dir(self) -> List[Dict[str, Any]]:
165
+ """Convert non-categorized dataset to YOLO format."""
166
+ json_dir = os.path.join(self.root_dir, 'json')
167
+ img_dir = os.path.join(self.root_dir, 'original_files')
168
+ entries = []
169
+ for jfile in glob(os.path.join(json_dir, '*.json')):
170
+ base = os.path.splitext(os.path.basename(jfile))[0]
171
+ found_img = None
172
+ for ext in self.IMG_EXTENSIONS:
173
+ img_path = os.path.join(img_dir, base + ext)
174
+ if os.path.exists(img_path):
175
+ found_img = img_path
176
+ break
177
+ if not found_img:
178
+ print(f'[single] Image for {base} not found, skipping.')
179
+ continue
180
+ width, height = self.get_image_size(found_img)
181
+ with open(jfile, encoding='utf-8') as jf:
182
+ data = json.load(jf)
183
+ img_ann = data['images'][0]
184
+ label_lines = []
185
+
186
+ # bbox
187
+ if 'bounding_box' in img_ann:
188
+ for box in img_ann['bounding_box']:
189
+ cidx = self.class_map[box['classification']]
190
+ x, y, w, h = box['data']
191
+ cx = x + w / 2
192
+ cy = y + h / 2
193
+ cx /= width
194
+ cy /= height
195
+ w /= width
196
+ h /= height
197
+ label_lines.append(f'{cidx} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}')
198
+
199
+ # polygon
200
+ if 'polygon' in img_ann:
201
+ for poly in img_ann['polygon']:
202
+ cidx = self.class_map[poly['classification']]
203
+ poly_str = self.polygon_to_yolo_string(poly['data'], width, height)
204
+ if poly_str: # Only add if polygon is valid
205
+ label_lines.append(f'{cidx} {poly_str}')
206
+ else:
207
+ print(f'[single] Polygon for {base} is empty, skipping this polygon.')
208
+
209
+ # keypoint
210
+ if 'keypoint' in img_ann:
211
+ for kp in img_ann['keypoint']:
212
+ cidx = self.class_map[kp['classification']]
213
+ if 'bounding_box' in kp:
214
+ x, y, w, h = kp['bounding_box']
215
+ cx = x + w / 2
216
+ cy = y + h / 2
217
+ cx /= width
218
+ cy /= height
219
+ w /= width
220
+ h /= height
221
+ else:
222
+ cx, cy, w, h = 0.5, 0.5, 1.0, 1.0
223
+ kp_str = self.keypoints_to_yolo_string(kp['data'], width, height)
224
+ label_lines.append(f'{cidx} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f} {kp_str}')
225
+
226
+ entries.append({
227
+ 'img_path': found_img,
228
+ 'img_name': os.path.basename(found_img),
229
+ 'label_name': base + '.txt',
230
+ 'label_lines': label_lines,
231
+ })
232
+ return entries
233
+
234
+ def convert(self) -> Union[Dict[str, List[Dict[str, Any]]], List[Dict[str, Any]]]:
235
+ """Convert DM format to YOLO format (categorized split or not).
236
+
237
+ Returns:
238
+ - If categorized: dict {split: list of entries}
239
+ - If not: list of entries
240
+ """
241
+ # Prepare dataset.yaml content (for save_to_folder)
242
+ yaml_lines = [
243
+ 'path: ' + self.root_dir,
244
+ ]
245
+
246
+ if self.is_categorized_dataset:
247
+ splits = self._validate_splits(required_splits=['train', 'valid'], optional_splits=['test'])
248
+ self.class_names = self.get_all_classes(list(splits.values()))
249
+ self.class_map = {name: idx for idx, name in enumerate(self.class_names)}
250
+ result = {}
251
+ for split, split_dir in splits.items():
252
+ result[split] = self._convert_split_dir(split_dir, split)
253
+ self.converted_data = result
254
+
255
+ yaml_lines.append('train: train/images')
256
+ yaml_lines.append('val: valid/images')
257
+ if 'test' in splits:
258
+ yaml_lines.append('test: test/images')
259
+ else:
260
+ self._validate_splits(required_splits=[], optional_splits=[])
261
+ self.class_names = self.get_all_classes([self.root_dir])
262
+ self.class_map = {name: idx for idx, name in enumerate(self.class_names)}
263
+ result = self._convert_root_dir()
264
+ self.converted_data = result
265
+
266
+ yaml_lines += ['', f'nc: {len(self.class_names)}', f'names: {self.class_names}', '']
267
+ self.dataset_yaml_content = '\n'.join(yaml_lines)
268
+ return result
269
+
270
+ def save_to_folder(self, output_dir: Optional[str] = None) -> None:
271
+ """Save converted YOLO data to the specified folder."""
272
+ output_dir = output_dir or self.root_dir
273
+ self.ensure_dir(output_dir)
274
+ if self.converted_data is None:
275
+ self.converted_data = self.convert()
276
+
277
+ if self.is_categorized_dataset:
278
+ for split, entries in self.converted_data.items():
279
+ split_imgs = os.path.join(output_dir, split, 'images')
280
+ split_labels = os.path.join(output_dir, split, 'labels')
281
+ self.ensure_dir(split_imgs)
282
+ self.ensure_dir(split_labels)
283
+ for entry in entries:
284
+ shutil.copy(entry['img_path'], os.path.join(split_imgs, entry['img_name']))
285
+ with open(os.path.join(split_labels, entry['label_name']), 'w', encoding='utf-8') as f:
286
+ f.write('\n'.join(entry['label_lines']))
287
+ else:
288
+ imgs_dir = os.path.join(output_dir, 'images')
289
+ labels_dir = os.path.join(output_dir, 'labels')
290
+ self.ensure_dir(imgs_dir)
291
+ self.ensure_dir(labels_dir)
292
+ for entry in self.converted_data:
293
+ shutil.copy(entry['img_path'], os.path.join(imgs_dir, entry['img_name']))
294
+ with open(os.path.join(labels_dir, entry['label_name']), 'w', encoding='utf-8') as f:
295
+ f.write('\n'.join(entry['label_lines']))
296
+
297
+ with open(os.path.join(output_dir, 'dataset.yaml'), 'w', encoding='utf-8') as f:
298
+ f.write(self.dataset_yaml_content)
299
+ with open(os.path.join(output_dir, 'classes.txt'), 'w', encoding='utf-8') as f:
300
+ for c in self.class_names:
301
+ f.write(f'{c}\n')
302
+ print(f'YOLO data exported to {output_dir}')
303
+
304
+ def convert_single_file(
305
+ self, data: Dict[str, Any], original_file: IO, class_names: Optional[List[str]] = None
306
+ ) -> Dict[str, Any]:
307
+ """Convert a single DM data dict and corresponding image file object to YOLO format.
308
+
309
+ Args:
310
+ data: DM format data dictionary (JSON content)
311
+ original_file: File object for the corresponding original image
312
+ class_names: Optional list of class names. If not provided, classes will be extracted from data.
313
+
314
+ Returns:
315
+ Dictionary containing YOLO format data for the single file
316
+ """
317
+ if not self.is_single_conversion:
318
+ raise RuntimeError('convert_single_file is only available when is_single_conversion=True')
319
+
320
+ if class_names is None:
321
+ classes = set()
322
+ for img_ann in data['images']:
323
+ for k in ['bounding_box', 'polygon', 'keypoint']:
324
+ if k in img_ann:
325
+ for ann in img_ann[k]:
326
+ classes.add(ann['classification'])
327
+ class_names = sorted(list(classes))
328
+
329
+ class_map = {name: idx for idx, name in enumerate(class_names)}
330
+ # You need to update get_image_size to accept a file object
331
+ width, height = self.get_image_size(original_file)
332
+
333
+ img_ann = data['images'][0]
334
+ label_lines = []
335
+
336
+ # bbox
337
+ if 'bounding_box' in img_ann:
338
+ for box in img_ann['bounding_box']:
339
+ if box['classification'] not in class_map:
340
+ continue
341
+ cidx = class_map[box['classification']]
342
+ x, y, w, h = box['data']
343
+ cx = x + w / 2
344
+ cy = y + h / 2
345
+ cx /= width
346
+ cy /= height
347
+ w /= width
348
+ h /= height
349
+ label_lines.append(f'{cidx} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}')
350
+
351
+ # polygon
352
+ if 'polygon' in img_ann:
353
+ for poly in img_ann['polygon']:
354
+ if poly['classification'] not in class_map:
355
+ continue
356
+ cidx = class_map[poly['classification']]
357
+ poly_str = self.polygon_to_yolo_string(poly['data'], width, height)
358
+ if poly_str:
359
+ label_lines.append(f'{cidx} {poly_str}')
360
+
361
+ # keypoint
362
+ if 'keypoint' in img_ann:
363
+ for kp in img_ann['keypoint']:
364
+ if kp['classification'] not in class_map:
365
+ continue
366
+ cidx = class_map[kp['classification']]
367
+ if 'bounding_box' in kp:
368
+ x, y, w, h = kp['bounding_box']
369
+ cx = x + w / 2
370
+ cy = y + h / 2
371
+ cx /= width
372
+ cy /= height
373
+ w /= width
374
+ h /= height
375
+ else:
376
+ cx, cy, w, h = 0.5, 0.5, 1.0, 1.0
377
+ kp_str = self.keypoints_to_yolo_string(kp['data'], width, height)
378
+ label_lines.append(f'{cidx} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f} {kp_str}')
379
+
380
+ return {
381
+ 'label_lines': label_lines,
382
+ 'class_names': class_names,
383
+ 'class_map': class_map,
384
+ }