synapse-sdk 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. synapse_sdk/__init__.py +24 -0
  2. synapse_sdk/cli/code_server.py +305 -33
  3. synapse_sdk/clients/agent/__init__.py +2 -1
  4. synapse_sdk/clients/agent/container.py +143 -0
  5. synapse_sdk/clients/agent/ray.py +296 -38
  6. synapse_sdk/clients/backend/annotation.py +1 -1
  7. synapse_sdk/clients/backend/core.py +31 -4
  8. synapse_sdk/clients/backend/data_collection.py +82 -7
  9. synapse_sdk/clients/backend/hitl.py +1 -1
  10. synapse_sdk/clients/backend/ml.py +1 -1
  11. synapse_sdk/clients/base.py +211 -61
  12. synapse_sdk/loggers.py +46 -0
  13. synapse_sdk/plugins/README.md +1340 -0
  14. synapse_sdk/plugins/categories/base.py +59 -9
  15. synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
  16. synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
  17. synapse_sdk/plugins/categories/export/actions/export/action.py +165 -0
  18. synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
  19. synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
  20. synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
  21. synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
  22. synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
  23. synapse_sdk/plugins/categories/export/templates/config.yaml +19 -1
  24. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +390 -0
  25. synapse_sdk/plugins/categories/export/templates/plugin/export.py +153 -177
  26. synapse_sdk/plugins/categories/neural_net/actions/train.py +1130 -32
  27. synapse_sdk/plugins/categories/neural_net/actions/tune.py +157 -4
  28. synapse_sdk/plugins/categories/neural_net/templates/config.yaml +7 -4
  29. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  30. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  31. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
  32. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  33. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +148 -0
  34. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  35. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  36. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  37. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +100 -0
  38. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +248 -0
  39. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  40. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  41. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +265 -0
  42. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  43. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  44. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +92 -0
  45. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +243 -0
  46. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  47. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +19 -0
  48. synapse_sdk/plugins/categories/upload/actions/upload/action.py +236 -0
  49. synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
  50. synapse_sdk/plugins/categories/upload/actions/upload/enums.py +493 -0
  51. synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
  52. synapse_sdk/plugins/categories/upload/actions/upload/factory.py +138 -0
  53. synapse_sdk/plugins/categories/upload/actions/upload/models.py +214 -0
  54. synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +183 -0
  55. synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
  56. synapse_sdk/plugins/categories/upload/actions/upload/run.py +179 -0
  57. synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
  58. synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +107 -0
  59. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
  60. synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +63 -0
  61. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +91 -0
  62. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +82 -0
  63. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +235 -0
  64. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +201 -0
  65. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +104 -0
  66. synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +71 -0
  67. synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
  68. synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +82 -0
  69. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
  70. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
  71. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +29 -0
  72. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
  73. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +300 -0
  74. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +287 -0
  75. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
  76. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
  77. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
  78. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
  79. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +84 -0
  80. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
  81. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +60 -0
  82. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +250 -0
  83. synapse_sdk/plugins/categories/upload/templates/README.md +470 -0
  84. synapse_sdk/plugins/categories/upload/templates/config.yaml +28 -2
  85. synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +310 -0
  86. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +82 -20
  87. synapse_sdk/plugins/models.py +111 -9
  88. synapse_sdk/plugins/templates/plugin-config-schema.json +7 -0
  89. synapse_sdk/plugins/templates/schema.json +7 -0
  90. synapse_sdk/plugins/utils/__init__.py +3 -0
  91. synapse_sdk/plugins/utils/ray_gcs.py +66 -0
  92. synapse_sdk/shared/__init__.py +25 -0
  93. synapse_sdk/utils/converters/dm/__init__.py +42 -41
  94. synapse_sdk/utils/converters/dm/base.py +137 -0
  95. synapse_sdk/utils/converters/dm/from_v1.py +208 -562
  96. synapse_sdk/utils/converters/dm/to_v1.py +258 -304
  97. synapse_sdk/utils/converters/dm/tools/__init__.py +214 -0
  98. synapse_sdk/utils/converters/dm/tools/answer.py +95 -0
  99. synapse_sdk/utils/converters/dm/tools/bounding_box.py +132 -0
  100. synapse_sdk/utils/converters/dm/tools/bounding_box_3d.py +121 -0
  101. synapse_sdk/utils/converters/dm/tools/classification.py +75 -0
  102. synapse_sdk/utils/converters/dm/tools/keypoint.py +117 -0
  103. synapse_sdk/utils/converters/dm/tools/named_entity.py +111 -0
  104. synapse_sdk/utils/converters/dm/tools/polygon.py +122 -0
  105. synapse_sdk/utils/converters/dm/tools/polyline.py +124 -0
  106. synapse_sdk/utils/converters/dm/tools/prompt.py +94 -0
  107. synapse_sdk/utils/converters/dm/tools/relation.py +86 -0
  108. synapse_sdk/utils/converters/dm/tools/segmentation.py +141 -0
  109. synapse_sdk/utils/converters/dm/tools/segmentation_3d.py +83 -0
  110. synapse_sdk/utils/converters/dm/types.py +168 -0
  111. synapse_sdk/utils/converters/dm/utils.py +162 -0
  112. synapse_sdk/utils/converters/dm_legacy/__init__.py +56 -0
  113. synapse_sdk/utils/converters/dm_legacy/from_v1.py +627 -0
  114. synapse_sdk/utils/converters/dm_legacy/to_v1.py +367 -0
  115. synapse_sdk/utils/file/__init__.py +58 -0
  116. synapse_sdk/utils/file/archive.py +32 -0
  117. synapse_sdk/utils/file/checksum.py +56 -0
  118. synapse_sdk/utils/file/chunking.py +31 -0
  119. synapse_sdk/utils/file/download.py +385 -0
  120. synapse_sdk/utils/file/encoding.py +40 -0
  121. synapse_sdk/utils/file/io.py +22 -0
  122. synapse_sdk/utils/file/upload.py +165 -0
  123. synapse_sdk/utils/file/video/__init__.py +29 -0
  124. synapse_sdk/utils/file/video/transcode.py +307 -0
  125. synapse_sdk/utils/{file.py → file.py.backup} +77 -0
  126. synapse_sdk/utils/network.py +272 -0
  127. synapse_sdk/utils/storage/__init__.py +6 -2
  128. synapse_sdk/utils/storage/providers/file_system.py +6 -0
  129. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/METADATA +19 -2
  130. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/RECORD +134 -74
  131. synapse_sdk/devtools/docs/.gitignore +0 -20
  132. synapse_sdk/devtools/docs/README.md +0 -41
  133. synapse_sdk/devtools/docs/blog/2019-05-28-first-blog-post.md +0 -12
  134. synapse_sdk/devtools/docs/blog/2019-05-29-long-blog-post.md +0 -44
  135. synapse_sdk/devtools/docs/blog/2021-08-01-mdx-blog-post.mdx +0 -24
  136. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
  137. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/index.md +0 -29
  138. synapse_sdk/devtools/docs/blog/authors.yml +0 -25
  139. synapse_sdk/devtools/docs/blog/tags.yml +0 -19
  140. synapse_sdk/devtools/docs/docusaurus.config.ts +0 -138
  141. synapse_sdk/devtools/docs/package-lock.json +0 -17455
  142. synapse_sdk/devtools/docs/package.json +0 -47
  143. synapse_sdk/devtools/docs/sidebars.ts +0 -44
  144. synapse_sdk/devtools/docs/src/components/HomepageFeatures/index.tsx +0 -71
  145. synapse_sdk/devtools/docs/src/components/HomepageFeatures/styles.module.css +0 -11
  146. synapse_sdk/devtools/docs/src/css/custom.css +0 -30
  147. synapse_sdk/devtools/docs/src/pages/index.module.css +0 -23
  148. synapse_sdk/devtools/docs/src/pages/index.tsx +0 -21
  149. synapse_sdk/devtools/docs/src/pages/markdown-page.md +0 -7
  150. synapse_sdk/devtools/docs/static/.nojekyll +0 -0
  151. synapse_sdk/devtools/docs/static/img/docusaurus-social-card.jpg +0 -0
  152. synapse_sdk/devtools/docs/static/img/docusaurus.png +0 -0
  153. synapse_sdk/devtools/docs/static/img/favicon.ico +0 -0
  154. synapse_sdk/devtools/docs/static/img/logo.png +0 -0
  155. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_mountain.svg +0 -171
  156. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_react.svg +0 -170
  157. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_tree.svg +0 -40
  158. synapse_sdk/devtools/docs/tsconfig.json +0 -8
  159. synapse_sdk/plugins/categories/export/actions/export.py +0 -346
  160. synapse_sdk/plugins/categories/export/enums.py +0 -7
  161. synapse_sdk/plugins/categories/neural_net/actions/gradio.py +0 -151
  162. synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py +0 -943
  163. synapse_sdk/plugins/categories/upload/actions/upload.py +0 -954
  164. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/WHEEL +0 -0
  165. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/entry_points.txt +0 -0
  166. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/licenses/LICENSE +0 -0
  167. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,367 @@
1
+ import random
2
+ import string
3
+
4
+ from . import BaseDMConverter
5
+
6
+
7
+ class DMV2ToV1Converter(BaseDMConverter):
8
+ """DM v2 to v1 format converter class."""
9
+
10
+ def __init__(self, new_dm_data={}, file_type=None):
11
+ """Initialize the converter.
12
+
13
+ Args:
14
+ new_dm_data (dict): DM v2 format data to be converted
15
+ file_type (str, optional): Type of file being converted (image, video, pcd, text, audio)
16
+ """
17
+ # Auto-detect file type if not provided
18
+ if file_type is None:
19
+ file_type = self._detect_file_type(new_dm_data)
20
+
21
+ super().__init__(file_type=file_type)
22
+ self.new_dm_data = new_dm_data
23
+ self.annotations = {}
24
+ self.annotations_data = {}
25
+ self.extra = {}
26
+ self.relations = {}
27
+ self.annotation_groups = {}
28
+
29
+ def _detect_file_type(self, data):
30
+ """Auto-detect file type from the data structure.
31
+
32
+ Args:
33
+ data (dict): DM v2 format data
34
+
35
+ Returns:
36
+ str: Detected file type (image, video, pcd, text, audio)
37
+ """
38
+ if not data:
39
+ return None
40
+
41
+ # Check for media type keys (plural forms)
42
+ if 'images' in data:
43
+ return 'image'
44
+ elif 'videos' in data:
45
+ return 'video'
46
+ elif 'pcds' in data:
47
+ return 'pcd'
48
+ elif 'texts' in data:
49
+ return 'text'
50
+ elif 'audios' in data:
51
+ return 'audio'
52
+
53
+ return None
54
+
55
+ def convert(self):
56
+ """Convert DM v2 data to v1 format.
57
+
58
+ Returns:
59
+ dict: Converted data in DM v1 format
60
+ """
61
+ # Reset state
62
+ new_dm_data = self.new_dm_data
63
+ self.annotations = {}
64
+ self.annotations_data = {}
65
+ self.extra = {}
66
+ self.relations = {}
67
+ self.annotation_groups = {}
68
+
69
+ # Process each media type (images, videos, etc.)
70
+ for media_type_plural, media_items in new_dm_data.items():
71
+ if media_type_plural == 'classification':
72
+ continue
73
+
74
+ media_type = self._singularize_media_type(media_type_plural)
75
+
76
+ for index, media_item in enumerate(media_items, 1):
77
+ media_id = f'{media_type}_{index}'
78
+
79
+ # Initialize structures for this media
80
+ self.annotations[media_id] = []
81
+ self.annotations_data[media_id] = []
82
+ self.extra[media_id] = {}
83
+ self.relations[media_id] = []
84
+ self.annotation_groups[media_id] = []
85
+
86
+ # Process each tool type in the media item
87
+ for tool_type, tool_data in media_item.items():
88
+ self._process_tool_data(media_id, tool_type, tool_data)
89
+
90
+ # Build final result
91
+ result = {
92
+ 'extra': self.extra,
93
+ 'relations': self.relations,
94
+ 'annotations': self.annotations,
95
+ 'annotationsData': self.annotations_data,
96
+ 'annotationGroups': self.annotation_groups,
97
+ }
98
+
99
+ return result
100
+
101
+ def _process_tool_data(self, media_id, tool_type, tool_data):
102
+ """Process tool data for a specific media item.
103
+
104
+ Args:
105
+ media_id (str): ID of the media item
106
+ tool_type (str): Type of annotation tool
107
+ tool_data (list): List of annotation data for this tool
108
+ """
109
+ for annotation in tool_data:
110
+ annotation_id = annotation['id']
111
+ classification = annotation['classification']
112
+ attrs = annotation.get('attrs', [])
113
+ data = annotation.get('data', {})
114
+
115
+ # Create annotation entry
116
+ annotation_entry = {
117
+ 'id': annotation_id,
118
+ 'tool': tool_type,
119
+ 'isLocked': False,
120
+ 'isVisible': True,
121
+ 'classification': {'class': classification},
122
+ }
123
+
124
+ # Add additional classification attributes from attrs
125
+ for attr in attrs:
126
+ attr_name = attr.get('name')
127
+ attr_value = attr.get('value')
128
+ if attr_name and attr_value is not None:
129
+ annotation_entry['classification'][attr_name] = attr_value
130
+
131
+ # Add special attributes for specific tools
132
+ if tool_type == 'keypoint':
133
+ annotation_entry['shape'] = 'circle'
134
+
135
+ self.annotations[media_id].append(annotation_entry)
136
+
137
+ # Create annotations data entry using tool processor
138
+ processor = self.tool_processors.get(tool_type)
139
+ if processor:
140
+ processor(annotation_id, data, self.annotations_data[media_id])
141
+ else:
142
+ self._handle_unknown_tool(tool_type, annotation_id)
143
+
144
+ def _convert_bounding_box(self, annotation_id, data, annotations_data):
145
+ """Process bounding box annotation data.
146
+
147
+ Args:
148
+ annotation_id (str): ID of the annotation
149
+ data (list): Bounding box data [x1, y1, x2, y2]
150
+ annotations_data (list): List to append the processed data
151
+ """
152
+ if len(data) >= 4:
153
+ x1, y1, width, height = data[:4]
154
+ coordinate = {'x': x1, 'y': y1, 'width': width, 'height': height}
155
+
156
+ annotations_data.append({'id': annotation_id, 'coordinate': coordinate})
157
+
158
+ def _convert_named_entity(self, annotation_id, data, annotations_data):
159
+ """Process named entity annotation data.
160
+
161
+ Args:
162
+ annotation_id (str): ID of the annotation
163
+ data (dict): Named entity data with ranges and content
164
+ annotations_data (list): List to append the processed data
165
+ """
166
+ entity_data = {'id': annotation_id}
167
+
168
+ if 'ranges' in data:
169
+ entity_data['ranges'] = data['ranges']
170
+
171
+ if 'content' in data:
172
+ entity_data['content'] = data['content']
173
+
174
+ annotations_data.append(entity_data)
175
+
176
+ def _convert_classification(self, annotation_id, data, annotations_data):
177
+ """Process classification annotation data.
178
+
179
+ Args:
180
+ annotation_id (str): ID of the annotation
181
+ data (dict): Classification data (usually empty)
182
+ annotations_data (list): List to append the processed data
183
+ """
184
+ # Classification data is typically empty in v2, so we just add the ID
185
+ annotations_data.append({'id': annotation_id})
186
+
187
+ def _convert_polyline(self, annotation_id, data, annotations_data):
188
+ """Process polyline annotation data.
189
+
190
+ Args:
191
+ annotation_id (str): ID of the annotation
192
+ data (list): Polyline data - can be flat [x1, y1, x2, y2, ...] or nested [[x1, y1], [x2, y2], ...]
193
+ annotations_data (list): List to append the processed data
194
+ """
195
+ coordinates = []
196
+
197
+ if data and isinstance(data[0], list):
198
+ # Nested format: [[x1, y1], [x2, y2], ...]
199
+ for point in data:
200
+ if len(point) >= 2:
201
+ coordinates.append({'x': point[0], 'y': point[1], 'id': self._generate_random_id()})
202
+ else:
203
+ # Flat format: [x1, y1, x2, y2, ...]
204
+ for i in range(0, len(data), 2):
205
+ if i + 1 < len(data):
206
+ coordinates.append({'x': data[i], 'y': data[i + 1], 'id': self._generate_random_id()})
207
+
208
+ annotations_data.append({'id': annotation_id, 'coordinate': coordinates})
209
+
210
+ def _convert_keypoint(self, annotation_id, data, annotations_data):
211
+ """Process keypoint annotation data.
212
+
213
+ Args:
214
+ annotation_id (str): ID of the annotation
215
+ data (list): Keypoint data [x, y]
216
+ annotations_data (list): List to append the processed data
217
+ """
218
+ if len(data) >= 2:
219
+ coordinate = {'x': data[0], 'y': data[1]}
220
+
221
+ annotations_data.append({'id': annotation_id, 'coordinate': coordinate})
222
+
223
+ def _convert_3d_bounding_box(self, annotation_id, data, annotations_data):
224
+ """Process 3D bounding box annotation data.
225
+
226
+ Args:
227
+ annotation_id (str): ID of the annotation
228
+ data (dict): 3D bounding box PSR data
229
+ annotations_data (list): List to append the processed data
230
+ """
231
+ annotations_data.append({'id': annotation_id, 'psr': data})
232
+
233
+ def _convert_image_segmentation(self, annotation_id, data, annotations_data):
234
+ """Process segmentation annotation data.
235
+
236
+ Args:
237
+ annotation_id (str): ID of the annotation
238
+ data (list or dict): Segmentation data (pixel_indices or section)
239
+ annotations_data (list): List to append the processed data
240
+ """
241
+ annotation_data = {'id': annotation_id}
242
+
243
+ if isinstance(data, list):
244
+ # Pixel-based segmentation
245
+ annotation_data['pixel_indices'] = data
246
+ elif isinstance(data, dict):
247
+ # Section-based segmentation (video)
248
+ annotation_data['section'] = data
249
+
250
+ annotations_data.append(annotation_data)
251
+
252
+ def _convert_video_segmentation(self, annotation_id, data, annotations_data):
253
+ """Process video segmentation annotation data.
254
+
255
+ Args:
256
+ annotation_id (str): ID of the annotation
257
+ data (list or dict): Segmentation data (pixel_indices or section)
258
+ annotations_data (list): List to append the processed data
259
+ """
260
+ annotation_data = {'id': annotation_id}
261
+
262
+ if isinstance(data, list):
263
+ # Pixel-based segmentation
264
+ annotation_data['pixel_indices'] = data
265
+ elif isinstance(data, dict):
266
+ # Section-based segmentation (video)
267
+ annotation_data['section'] = data
268
+
269
+ annotations_data.append(annotation_data)
270
+
271
+ def _convert_3d_segmentation(self, annotation_id, data, annotations_data):
272
+ """Process 3D segmentation annotation data.
273
+
274
+ Args:
275
+ annotation_id (str): ID of the annotation
276
+ data (list or dict): 3D segmentation data
277
+ annotations_data (list): List to append the processed data
278
+ """
279
+ annotation_data = {'id': annotation_id}
280
+
281
+ if isinstance(data, list):
282
+ # Pixel-based segmentation
283
+ annotation_data['pixel_indices'] = data
284
+ elif isinstance(data, dict):
285
+ # Section-based segmentation
286
+ annotation_data['section'] = data
287
+
288
+ annotations_data.append(annotation_data)
289
+
290
+ def _convert_prompt(self, annotation_id, data, annotations_data):
291
+ """Process prompt annotation data.
292
+
293
+ Args:
294
+ annotation_id (str): ID of the annotation
295
+ data (dict): Prompt data
296
+ annotations_data (list): List to append the processed data
297
+ """
298
+ annotation_data = {'id': annotation_id}
299
+
300
+ if isinstance(data, dict):
301
+ annotation_data.update(data)
302
+
303
+ annotations_data.append(annotation_data)
304
+
305
+ def _convert_answer(self, annotation_id, data, annotations_data):
306
+ """Process answer annotation data.
307
+
308
+ Args:
309
+ annotation_id (str): ID of the annotation
310
+ data (dict): Answer data
311
+ annotations_data (list): List to append the processed data
312
+ """
313
+ annotation_data = {'id': annotation_id}
314
+
315
+ if isinstance(data, dict):
316
+ annotation_data.update(data)
317
+
318
+ annotations_data.append(annotation_data)
319
+
320
+ def _convert_polygon(self, annotation_id, data, annotations_data):
321
+ """Process polygon annotation data.
322
+
323
+ Args:
324
+ annotation_id (str): ID of the annotation
325
+ data (list): Polygon data - can be flat [x1, y1, x2, y2, ...] or nested [[x1, y1], [x2, y2], ...]
326
+ annotations_data (list): List to append the processed data
327
+ """
328
+ coordinates = []
329
+
330
+ if data and isinstance(data[0], list):
331
+ # Nested format: [[x1, y1], [x2, y2], ...]
332
+ for point in data:
333
+ if len(point) >= 2:
334
+ coordinates.append({'x': point[0], 'y': point[1], 'id': self._generate_random_id()})
335
+ else:
336
+ # Flat format: [x1, y1, x2, y2, ...]
337
+ for i in range(0, len(data), 2):
338
+ if i + 1 < len(data):
339
+ coordinates.append({'x': data[i], 'y': data[i + 1], 'id': self._generate_random_id()})
340
+
341
+ annotations_data.append({'id': annotation_id, 'coordinate': coordinates})
342
+
343
+ def _convert_relation(self, annotation_id, data, annotations_data):
344
+ """Process relation annotation data.
345
+
346
+ Args:
347
+ annotation_id (str): ID of the annotation
348
+ data (list): Relation data
349
+ annotations_data (list): List to append the processed data
350
+ """
351
+ annotations_data.append({'id': annotation_id, 'data': data})
352
+
353
+ def _convert_group(self, annotation_id, data, annotations_data):
354
+ """Process group annotation data.
355
+
356
+ Args:
357
+ annotation_id (str): ID of the annotation
358
+ data (list): Group data
359
+ annotations_data (list): List to append the processed data
360
+ """
361
+ annotations_data.append({'id': annotation_id, 'data': data})
362
+
363
+ def _generate_random_id(self):
364
+ """Generate a random ID similar to the original format."""
365
+ # Generate 10-character random string with letters, numbers, and symbols
366
+ chars = string.ascii_letters + string.digits + '-_'
367
+ return ''.join(random.choices(chars, k=10))
@@ -0,0 +1,58 @@
1
+ # File utilities module
2
+ # Maintains backward compatibility by re-exporting all functions
3
+
4
+ from .archive import archive, unarchive
5
+ from .checksum import calculate_checksum, get_checksum_from_file
6
+ from .chunking import read_file_in_chunks
7
+ from .download import (
8
+ adownload_file,
9
+ afiles_url_to_path,
10
+ afiles_url_to_path_from_objs,
11
+ download_file,
12
+ files_url_to_path,
13
+ files_url_to_path_from_objs,
14
+ )
15
+ from .encoding import convert_file_to_base64
16
+ from .io import get_dict_from_file, get_temp_path
17
+ from .upload import (
18
+ FilesDict,
19
+ FileProcessingError,
20
+ FileTuple,
21
+ FileUploadError,
22
+ FileValidationError,
23
+ RequestsFile,
24
+ close_file_handles,
25
+ process_files_for_upload,
26
+ )
27
+
28
+ __all__ = [
29
+ # Chunking
30
+ 'read_file_in_chunks',
31
+ # Download
32
+ 'download_file',
33
+ 'adownload_file',
34
+ 'files_url_to_path',
35
+ 'afiles_url_to_path',
36
+ 'files_url_to_path_from_objs',
37
+ 'afiles_url_to_path_from_objs',
38
+ # Checksum
39
+ 'calculate_checksum',
40
+ 'get_checksum_from_file',
41
+ # Archive
42
+ 'archive',
43
+ 'unarchive',
44
+ # Encoding
45
+ 'convert_file_to_base64',
46
+ # I/O
47
+ 'get_dict_from_file',
48
+ 'get_temp_path',
49
+ # Upload
50
+ 'process_files_for_upload',
51
+ 'close_file_handles',
52
+ 'FileUploadError',
53
+ 'FileValidationError',
54
+ 'FileProcessingError',
55
+ 'FileTuple',
56
+ 'FilesDict',
57
+ 'RequestsFile',
58
+ ]
@@ -0,0 +1,32 @@
1
+ import zipfile
2
+ from pathlib import Path
3
+
4
+
5
+ def archive(input_path, output_path, append=False):
6
+ input_path = Path(input_path)
7
+ output_path = Path(output_path)
8
+
9
+ mode = 'a' if append and output_path.exists() else 'w'
10
+ with zipfile.ZipFile(output_path, mode=mode, compression=zipfile.ZIP_DEFLATED) as zipf:
11
+ if input_path.is_file():
12
+ zipf.write(input_path, input_path.name)
13
+ else:
14
+ for file_path in input_path.rglob('*'):
15
+ if file_path.is_file(): # Only add files, skip directories
16
+ arcname = file_path.relative_to(input_path.parent)
17
+ zipf.write(file_path, arcname)
18
+
19
+
20
+ def unarchive(file_path, output_path):
21
+ """
22
+ Unarchives a ZIP file to a given directory.
23
+
24
+ Parameters:
25
+ file_path (str | Path): The path to the ZIP file.
26
+ output_path (str): The directory where the files will be extracted.
27
+ """
28
+ output_path = Path(output_path)
29
+ output_path.mkdir(parents=True, exist_ok=True)
30
+
31
+ with zipfile.ZipFile(str(file_path), 'r') as zip_ref:
32
+ zip_ref.extractall(output_path)
@@ -0,0 +1,56 @@
1
+ import hashlib
2
+ from typing import IO, Any, Callable
3
+
4
+
5
+ def calculate_checksum(file_path, prefix=''):
6
+ md5_hash = hashlib.md5()
7
+ with open(file_path, 'rb') as f:
8
+ for byte_block in iter(lambda: f.read(4096), b''):
9
+ md5_hash.update(byte_block)
10
+ checksum = md5_hash.hexdigest()
11
+ if prefix:
12
+ return f'dev-{checksum}'
13
+ return checksum
14
+
15
+
16
+ def get_checksum_from_file(file: IO[Any], digest_mod: Callable[[], Any] = hashlib.sha1) -> str:
17
+ """
18
+ Calculate checksum for a file-like object.
19
+
20
+ Args:
21
+ file (IO[Any]): File-like object with read() method that supports reading in chunks
22
+ digest_mod (Callable[[], Any]): Hash algorithm from hashlib (defaults to hashlib.sha1)
23
+
24
+ Returns:
25
+ str: Hexadecimal digest of the file contents
26
+
27
+ Example:
28
+ ```python
29
+ import hashlib
30
+ from io import BytesIO
31
+ from synapse_sdk.utils.file import get_checksum_from_file
32
+
33
+ # With BytesIO
34
+ data = BytesIO(b'Hello, world!')
35
+ checksum = get_checksum_from_file(data)
36
+
37
+ # With different hash algorithm
38
+ checksum = get_checksum_from_file(data, digest_mod=hashlib.sha256)
39
+ ```
40
+ """
41
+ digest = digest_mod()
42
+ chunk_size = 4096
43
+
44
+ # Reset file pointer to beginning if possible
45
+ if hasattr(file, 'seek'):
46
+ file.seek(0)
47
+
48
+ while True:
49
+ chunk = file.read(chunk_size)
50
+ if not chunk:
51
+ break
52
+ if isinstance(chunk, str):
53
+ chunk = chunk.encode('utf-8')
54
+ digest.update(chunk)
55
+
56
+ return digest.hexdigest()
@@ -0,0 +1,31 @@
1
+ def read_file_in_chunks(file_path, chunk_size=1024 * 1024 * 50):
2
+ """
3
+ Read a file in chunks for efficient memory usage during file processing.
4
+
5
+ This function is particularly useful for large files or when you need to process
6
+ files in chunks, such as for uploading or hashing.
7
+
8
+ Args:
9
+ file_path (str | Path): Path to the file to read
10
+ chunk_size (int, optional): Size of each chunk in bytes. Defaults to 50MB (1024 * 1024 * 50)
11
+
12
+ Yields:
13
+ bytes: File content chunks
14
+
15
+ Raises:
16
+ FileNotFoundError: If the file doesn't exist
17
+ PermissionError: If the file can't be read due to permissions
18
+ OSError: If there's an OS-level error reading the file
19
+
20
+ Example:
21
+ ```python
22
+ from synapse_sdk.utils.file import read_file_in_chunks
23
+
24
+ # Read a file in 10MB chunks
25
+ for chunk in read_file_in_chunks('large_file.bin', chunk_size=1024*1024*10):
26
+ process_chunk(chunk)
27
+ ```
28
+ """
29
+ with open(file_path, 'rb') as file:
30
+ while chunk := file.read(chunk_size):
31
+ yield chunk