synapse-sdk 1.0.0a58__py3-none-any.whl → 1.0.0a60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

Files changed (89) hide show
  1. synapse_sdk/cli/__init__.py +246 -5
  2. synapse_sdk/cli/alias/utils.py +1 -1
  3. synapse_sdk/cli/config.py +339 -0
  4. synapse_sdk/cli/devtools.py +61 -0
  5. synapse_sdk/cli/plugin/publish.py +3 -4
  6. synapse_sdk/clients/agent/__init__.py +7 -2
  7. synapse_sdk/clients/agent/ray.py +37 -6
  8. synapse_sdk/clients/backend/__init__.py +5 -9
  9. synapse_sdk/clients/backend/annotation.py +12 -0
  10. synapse_sdk/clients/backend/data_collection.py +10 -1
  11. synapse_sdk/clients/base.py +42 -3
  12. synapse_sdk/devtools/__init__.py +0 -0
  13. synapse_sdk/devtools/config.py +94 -0
  14. synapse_sdk/devtools/docs/.gitignore +20 -0
  15. synapse_sdk/devtools/docs/README.md +41 -0
  16. synapse_sdk/devtools/docs/blog/2019-05-28-first-blog-post.md +12 -0
  17. synapse_sdk/devtools/docs/blog/2019-05-29-long-blog-post.md +44 -0
  18. synapse_sdk/devtools/docs/blog/2021-08-01-mdx-blog-post.mdx +24 -0
  19. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
  20. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/index.md +29 -0
  21. synapse_sdk/devtools/docs/blog/authors.yml +25 -0
  22. synapse_sdk/devtools/docs/blog/tags.yml +19 -0
  23. synapse_sdk/devtools/docs/docusaurus.config.ts +138 -0
  24. synapse_sdk/devtools/docs/package-lock.json +17455 -0
  25. synapse_sdk/devtools/docs/package.json +47 -0
  26. synapse_sdk/devtools/docs/sidebars.ts +36 -0
  27. synapse_sdk/devtools/docs/src/components/HomepageFeatures/index.tsx +71 -0
  28. synapse_sdk/devtools/docs/src/components/HomepageFeatures/styles.module.css +11 -0
  29. synapse_sdk/devtools/docs/src/css/custom.css +30 -0
  30. synapse_sdk/devtools/docs/src/pages/index.module.css +23 -0
  31. synapse_sdk/devtools/docs/src/pages/index.tsx +21 -0
  32. synapse_sdk/devtools/docs/src/pages/markdown-page.md +7 -0
  33. synapse_sdk/devtools/docs/static/.nojekyll +0 -0
  34. synapse_sdk/devtools/docs/static/img/docusaurus-social-card.jpg +0 -0
  35. synapse_sdk/devtools/docs/static/img/docusaurus.png +0 -0
  36. synapse_sdk/devtools/docs/static/img/favicon.ico +0 -0
  37. synapse_sdk/devtools/docs/static/img/logo.png +0 -0
  38. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_mountain.svg +171 -0
  39. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_react.svg +170 -0
  40. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_tree.svg +40 -0
  41. synapse_sdk/devtools/docs/tsconfig.json +8 -0
  42. synapse_sdk/devtools/models.py +55 -0
  43. synapse_sdk/devtools/server.py +829 -0
  44. synapse_sdk/devtools/web/.gitignore +2 -0
  45. synapse_sdk/devtools/web/README.md +34 -0
  46. synapse_sdk/devtools/web/dist/index.html +17 -0
  47. synapse_sdk/devtools/web/index.html +16 -0
  48. synapse_sdk/devtools/web/jsconfig.json +15 -0
  49. synapse_sdk/devtools/web/package-lock.json +2609 -0
  50. synapse_sdk/devtools/web/package.json +27 -0
  51. synapse_sdk/devtools/web/pnpm-lock.yaml +1055 -0
  52. synapse_sdk/devtools/web/src/App.jsx +14 -0
  53. synapse_sdk/devtools/web/src/App.module.css +33 -0
  54. synapse_sdk/devtools/web/src/assets/favicon.ico +0 -0
  55. synapse_sdk/devtools/web/src/components/Breadcrumbs.jsx +42 -0
  56. synapse_sdk/devtools/web/src/components/Layout.jsx +12 -0
  57. synapse_sdk/devtools/web/src/components/LogViewer.jsx +266 -0
  58. synapse_sdk/devtools/web/src/components/MessageViewer.jsx +150 -0
  59. synapse_sdk/devtools/web/src/components/NavigationSidebar.jsx +137 -0
  60. synapse_sdk/devtools/web/src/components/ServerStatusBar.jsx +245 -0
  61. synapse_sdk/devtools/web/src/components/icons.jsx +325 -0
  62. synapse_sdk/devtools/web/src/index.css +470 -0
  63. synapse_sdk/devtools/web/src/index.jsx +15 -0
  64. synapse_sdk/devtools/web/src/logo.svg +1 -0
  65. synapse_sdk/devtools/web/src/router.jsx +34 -0
  66. synapse_sdk/devtools/web/src/utils/api.js +425 -0
  67. synapse_sdk/devtools/web/src/views/ApplicationDetailView.jsx +241 -0
  68. synapse_sdk/devtools/web/src/views/ApplicationsView.jsx +224 -0
  69. synapse_sdk/devtools/web/src/views/HomeView.jsx +197 -0
  70. synapse_sdk/devtools/web/src/views/JobDetailView.jsx +310 -0
  71. synapse_sdk/devtools/web/src/views/PluginView.jsx +914 -0
  72. synapse_sdk/devtools/web/vite.config.js +13 -0
  73. synapse_sdk/plugins/categories/neural_net/actions/tune.py +1 -1
  74. synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py +382 -0
  75. synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml +16 -0
  76. synapse_sdk/plugins/categories/{upload/templates/plugin/task_pre_annotation.py → pre_annotation/templates/plugin/to_task.py} +1 -7
  77. synapse_sdk/plugins/categories/upload/templates/config.yaml +0 -4
  78. synapse_sdk/plugins/templates/plugin-config-schema.json +409 -0
  79. synapse_sdk/plugins/templates/schema.json +484 -0
  80. synapse_sdk/utils/converters/__init__.py +145 -0
  81. synapse_sdk/utils/converters/coco/__init__.py +0 -0
  82. synapse_sdk/utils/converters/coco/from_dm.py +269 -0
  83. {synapse_sdk-1.0.0a58.dist-info → synapse_sdk-1.0.0a60.dist-info}/METADATA +9 -22
  84. {synapse_sdk-1.0.0a58.dist-info → synapse_sdk-1.0.0a60.dist-info}/RECORD +88 -20
  85. synapse_sdk/plugins/categories/upload/actions/task_pre_annotation.py +0 -101
  86. {synapse_sdk-1.0.0a58.dist-info → synapse_sdk-1.0.0a60.dist-info}/WHEEL +0 -0
  87. {synapse_sdk-1.0.0a58.dist-info → synapse_sdk-1.0.0a60.dist-info}/entry_points.txt +0 -0
  88. {synapse_sdk-1.0.0a58.dist-info → synapse_sdk-1.0.0a60.dist-info}/licenses/LICENSE +0 -0
  89. {synapse_sdk-1.0.0a58.dist-info → synapse_sdk-1.0.0a60.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,484 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://synapse.sh/schemas/plugin-config.json",
4
+ "title": "Synapse Plugin Configuration",
5
+ "description": "JSON Schema for validating Synapse plugin config.yaml files",
6
+ "type": "object",
7
+ "required": ["name", "code", "version", "category"],
8
+ "additionalProperties": false,
9
+ "properties": {
10
+ "name": {
11
+ "type": "string",
12
+ "description": "Human-readable name of the plugin",
13
+ "minLength": 1,
14
+ "maxLength": 100
15
+ },
16
+ "code": {
17
+ "type": "string",
18
+ "description": "Unique identifier for the plugin",
19
+ "pattern": "^[a-z0-9_-]+$",
20
+ "minLength": 1,
21
+ "maxLength": 50
22
+ },
23
+ "version": {
24
+ "type": "string",
25
+ "description": "Semantic version of the plugin",
26
+ "pattern": "^\\d+\\.\\d+\\.\\d+(-[a-zA-Z0-9.-]+)?(\\+[a-zA-Z0-9.-]+)?$"
27
+ },
28
+ "description": {
29
+ "type": "string",
30
+ "description": "Brief description of plugin functionality",
31
+ "maxLength": 500
32
+ },
33
+ "readme": {
34
+ "type": "string",
35
+ "description": "Path to README file relative to plugin root",
36
+ "default": "README.md"
37
+ },
38
+ "category": {
39
+ "$ref": "#/$defs/PluginCategory"
40
+ },
41
+ "package_manager": {
42
+ "type": "string",
43
+ "description": "Package manager for plugin dependencies",
44
+ "enum": ["pip", "uv"],
45
+ "default": "pip"
46
+ },
47
+ "data_type": {
48
+ "type": "string",
49
+ "description": "Primary data type handled by the plugin",
50
+ "enum": ["image", "text", "video", "pcd", "audio"]
51
+ },
52
+ "tasks": {
53
+ "type": "array",
54
+ "description": "List of tasks this plugin can perform",
55
+ "items": {
56
+ "type": "string",
57
+ "pattern": "^[a-z]+\\.[a-z_]+$",
58
+ "description": "Task in format: data_type.task_name"
59
+ },
60
+ "uniqueItems": true
61
+ },
62
+ "supported_data_type": {
63
+ "type": "array",
64
+ "description": "Data types supported by upload plugins",
65
+ "items": {
66
+ "type": "string",
67
+ "enum": ["image", "text", "video", "pcd", "audio"]
68
+ },
69
+ "uniqueItems": true
70
+ },
71
+ "annotation_category": {
72
+ "type": "string",
73
+ "description": "Annotation category for smart tools",
74
+ "enum": ["object_detection", "classification", "segmentation", "keypoint", "text"]
75
+ },
76
+ "annotation_type": {
77
+ "type": "string",
78
+ "description": "Specific annotation type for smart tools",
79
+ "enum": ["bbox", "polygon", "point", "line", "mask", "label"]
80
+ },
81
+ "smart_tool": {
82
+ "type": "string",
83
+ "description": "Type of smart tool implementation",
84
+ "enum": ["interactive", "automatic", "semi_automatic"]
85
+ },
86
+ "actions": {
87
+ "type": "object",
88
+ "description": "Available actions for this plugin",
89
+ "patternProperties": {
90
+ "^[a-z_]+$": {
91
+ "$ref": "#/$defs/ActionConfig"
92
+ }
93
+ },
94
+ "additionalProperties": false
95
+ }
96
+ },
97
+ "allOf": [
98
+ {
99
+ "if": {
100
+ "properties": {
101
+ "category": { "const": "neural_net" }
102
+ }
103
+ },
104
+ "then": {
105
+ "required": ["data_type", "tasks"],
106
+ "properties": {
107
+ "actions": {
108
+ "type": "object",
109
+ "patternProperties": {
110
+ "^(train|inference|test|deployment|gradio|tune)$": {
111
+ "$ref": "#/$defs/ActionConfig"
112
+ }
113
+ }
114
+ }
115
+ }
116
+ }
117
+ },
118
+ {
119
+ "if": {
120
+ "properties": {
121
+ "category": { "const": "smart_tool" }
122
+ }
123
+ },
124
+ "then": {
125
+ "required": ["annotation_category", "annotation_type", "smart_tool"],
126
+ "properties": {
127
+ "actions": {
128
+ "type": "object",
129
+ "patternProperties": {
130
+ "^(auto_label)$": {
131
+ "$ref": "#/$defs/ActionConfig"
132
+ }
133
+ }
134
+ }
135
+ }
136
+ }
137
+ },
138
+ {
139
+ "if": {
140
+ "properties": {
141
+ "category": { "const": "upload" }
142
+ }
143
+ },
144
+ "then": {
145
+ "required": ["supported_data_type"],
146
+ "properties": {
147
+ "actions": {
148
+ "type": "object",
149
+ "patternProperties": {
150
+ "^(upload)$": {
151
+ "$ref": "#/$defs/ActionConfig"
152
+ }
153
+ }
154
+ }
155
+ }
156
+ }
157
+ }
158
+ ],
159
+ "$defs": {
160
+ "PluginCategory": {
161
+ "type": "string",
162
+ "description": "Category classification for the plugin",
163
+ "enum": [
164
+ "neural_net",
165
+ "export",
166
+ "upload",
167
+ "smart_tool",
168
+ "post_annotation",
169
+ "pre_annotation",
170
+ "data_validation"
171
+ ]
172
+ },
173
+ "RunMethod": {
174
+ "type": "string",
175
+ "description": "Execution method for the action",
176
+ "enum": ["job", "task", "restapi"],
177
+ "default": "job"
178
+ },
179
+ "ActionConfig": {
180
+ "type": "object",
181
+ "description": "Configuration for a plugin action",
182
+ "required": ["entrypoint"],
183
+ "additionalProperties": false,
184
+ "properties": {
185
+ "entrypoint": {
186
+ "type": "string",
187
+ "description": "Python module path to action implementation",
188
+ "pattern": "^[a-zA-Z_][a-zA-Z0-9_.]*$"
189
+ },
190
+ "method": {
191
+ "$ref": "#/$defs/RunMethod"
192
+ },
193
+ "dataset": {
194
+ "type": "string",
195
+ "description": "Dataset identifier for the action"
196
+ },
197
+ "endpoints": {
198
+ "type": "array",
199
+ "description": "REST API endpoints for restapi method",
200
+ "items": {
201
+ "$ref": "#/$defs/EndpointConfig"
202
+ }
203
+ },
204
+ "hyperparameters": {
205
+ "$ref": "#/$defs/HyperparametersSchema"
206
+ },
207
+ "metrics": {
208
+ "$ref": "#/$defs/MetricsConfig"
209
+ },
210
+ "visualizations": {
211
+ "$ref": "#/$defs/VisualizationsConfig"
212
+ },
213
+ "options": {
214
+ "type": "object",
215
+ "description": "Additional configuration options",
216
+ "additionalProperties": true
217
+ },
218
+ "ui_schema": {
219
+ "type": "object",
220
+ "description": "FormKit UI schema for action parameters",
221
+ "additionalProperties": true
222
+ }
223
+ }
224
+ },
225
+ "EndpointConfig": {
226
+ "type": "object",
227
+ "description": "REST API endpoint configuration",
228
+ "required": ["method", "path"],
229
+ "additionalProperties": false,
230
+ "properties": {
231
+ "method": {
232
+ "type": "string",
233
+ "description": "HTTP method",
234
+ "enum": ["GET", "POST", "PUT", "DELETE", "PATCH"]
235
+ },
236
+ "path": {
237
+ "type": "string",
238
+ "description": "API endpoint path",
239
+ "pattern": "^/.*"
240
+ },
241
+ "description": {
242
+ "type": "string",
243
+ "description": "Endpoint description"
244
+ }
245
+ }
246
+ },
247
+ "HyperparametersSchema": {
248
+ "type": "object",
249
+ "description": "FormKit schema for hyperparameter configuration",
250
+ "additionalProperties": true,
251
+ "properties": {
252
+ "$formkit": {
253
+ "type": "string",
254
+ "const": "group"
255
+ },
256
+ "children": {
257
+ "type": "array",
258
+ "items": {
259
+ "$ref": "#/$defs/FormKitField"
260
+ }
261
+ }
262
+ }
263
+ },
264
+ "FormKitField": {
265
+ "type": "object",
266
+ "description": "FormKit field configuration",
267
+ "required": ["$formkit", "name"],
268
+ "properties": {
269
+ "$formkit": {
270
+ "type": "string",
271
+ "enum": ["text", "number", "select", "checkbox", "group", "list"]
272
+ },
273
+ "name": {
274
+ "type": "string",
275
+ "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$"
276
+ },
277
+ "label": {
278
+ "type": "string"
279
+ },
280
+ "help": {
281
+ "type": "string"
282
+ },
283
+ "value": {
284
+ "description": "Default value for the field"
285
+ },
286
+ "validation": {
287
+ "type": "string",
288
+ "description": "Validation rules"
289
+ },
290
+ "options": {
291
+ "type": "array",
292
+ "description": "Options for select fields",
293
+ "items": {
294
+ "type": "object",
295
+ "properties": {
296
+ "label": { "type": "string" },
297
+ "value": {}
298
+ }
299
+ }
300
+ },
301
+ "children": {
302
+ "type": "array",
303
+ "items": {
304
+ "$ref": "#/$defs/FormKitField"
305
+ }
306
+ }
307
+ },
308
+ "additionalProperties": true
309
+ },
310
+ "MetricsConfig": {
311
+ "type": "object",
312
+ "description": "Metrics tracking configuration",
313
+ "additionalProperties": false,
314
+ "properties": {
315
+ "epoch": {
316
+ "type": "array",
317
+ "description": "Per-epoch metrics to track",
318
+ "items": {
319
+ "type": "string"
320
+ }
321
+ },
322
+ "validation": {
323
+ "type": "array",
324
+ "description": "Validation metrics to track",
325
+ "items": {
326
+ "type": "string"
327
+ }
328
+ }
329
+ }
330
+ },
331
+ "VisualizationsConfig": {
332
+ "type": "object",
333
+ "description": "Training visualization configuration",
334
+ "additionalProperties": false,
335
+ "properties": {
336
+ "images": {
337
+ "$ref": "#/$defs/ImageVisualization"
338
+ },
339
+ "charts": {
340
+ "type": "array",
341
+ "description": "Chart visualizations",
342
+ "items": {
343
+ "$ref": "#/$defs/ChartVisualization"
344
+ }
345
+ }
346
+ }
347
+ },
348
+ "ImageVisualization": {
349
+ "type": "object",
350
+ "description": "Image grid visualization configuration",
351
+ "required": ["type"],
352
+ "additionalProperties": false,
353
+ "properties": {
354
+ "type": {
355
+ "type": "string",
356
+ "const": "image_grid"
357
+ },
358
+ "title": {
359
+ "type": "string",
360
+ "description": "Title for the image grid"
361
+ },
362
+ "columns": {
363
+ "type": "integer",
364
+ "description": "Number of columns in grid",
365
+ "minimum": 1,
366
+ "default": 3
367
+ },
368
+ "max_images": {
369
+ "type": "integer",
370
+ "description": "Maximum number of images to display",
371
+ "minimum": 1,
372
+ "default": 9
373
+ }
374
+ }
375
+ },
376
+ "ChartVisualization": {
377
+ "type": "object",
378
+ "description": "Chart visualization configuration",
379
+ "required": ["type", "title", "x_axis", "y_axis"],
380
+ "additionalProperties": false,
381
+ "properties": {
382
+ "type": {
383
+ "type": "string",
384
+ "enum": ["line", "bar", "scatter"]
385
+ },
386
+ "title": {
387
+ "type": "string",
388
+ "description": "Chart title"
389
+ },
390
+ "x_axis": {
391
+ "type": "string",
392
+ "description": "X-axis metric name"
393
+ },
394
+ "y_axis": {
395
+ "type": "string",
396
+ "description": "Y-axis metric name"
397
+ }
398
+ }
399
+ }
400
+ },
401
+ "examples": [
402
+ {
403
+ "name": "YOLO Object Detection",
404
+ "code": "yolo_detection",
405
+ "version": "1.0.0",
406
+ "description": "YOLO-based object detection model",
407
+ "category": "neural_net",
408
+ "package_manager": "uv",
409
+ "data_type": "image",
410
+ "tasks": ["image.object_detection"],
411
+ "actions": {
412
+ "train": {
413
+ "entrypoint": "plugin.train.train",
414
+ "method": "job",
415
+ "dataset": "coco",
416
+ "hyperparameters": {
417
+ "$formkit": "group",
418
+ "children": [
419
+ {
420
+ "$formkit": "number",
421
+ "name": "epochs",
422
+ "label": "Epochs",
423
+ "value": 100,
424
+ "validation": "required|min:1"
425
+ },
426
+ {
427
+ "$formkit": "number",
428
+ "name": "batch_size",
429
+ "label": "Batch Size",
430
+ "value": 16,
431
+ "validation": "required|min:1"
432
+ }
433
+ ]
434
+ }
435
+ },
436
+ "inference": {
437
+ "entrypoint": "plugin.inference.inference",
438
+ "method": "restapi",
439
+ "endpoints": [
440
+ {
441
+ "method": "POST",
442
+ "path": "/predict",
443
+ "description": "Run object detection inference"
444
+ }
445
+ ]
446
+ }
447
+ }
448
+ },
449
+ {
450
+ "name": "Auto Labeling Tool",
451
+ "code": "smart_labeler",
452
+ "version": "1.0.0",
453
+ "description": "Intelligent auto-labeling for object detection",
454
+ "category": "smart_tool",
455
+ "annotation_category": "object_detection",
456
+ "annotation_type": "bbox",
457
+ "smart_tool": "semi_automatic",
458
+ "actions": {
459
+ "auto_label": {
460
+ "entrypoint": "plugin.label.auto_label",
461
+ "method": "job"
462
+ }
463
+ }
464
+ },
465
+ {
466
+ "name": "Dataset Uploader",
467
+ "code": "dataset_upload",
468
+ "version": "1.0.0",
469
+ "description": "Upload datasets to cloud storage",
470
+ "category": "upload",
471
+ "supported_data_type": ["image", "video"],
472
+ "actions": {
473
+ "upload": {
474
+ "entrypoint": "plugin.upload.upload",
475
+ "method": "task",
476
+ "options": {
477
+ "chunk_size": 1024,
478
+ "parallel_uploads": 4
479
+ }
480
+ }
481
+ }
482
+ }
483
+ ]
484
+ }
@@ -0,0 +1,145 @@
1
+ import os
2
+
3
+
4
+ class FromDMConverter:
5
+ """Base class for converting data from DM format to a specific format.
6
+
7
+ Attrs:
8
+ root_dir (str): Root directory containing data.
9
+ is_categorized_dataset (bool): Whether to handle train, test, valid splits.
10
+ version (str): Version of the converter.
11
+ converted_data: Holds the converted data after calling `convert()`.
12
+
13
+ Usage:
14
+ 1. Subclass this base class and implement the `convert()` and `save_to_folder()` methods.
15
+ 2. Instantiate the converter with the required arguments.
16
+ 3. Call `convert()` to perform the in-memory conversion and obtain the result as a dict or list of dicts.
17
+ 4. Call `save_to_folder(output_dir)` to save the converted data and optionally copy original files.
18
+
19
+ Args:
20
+ root_dir (str): Path to the root directory containing data.
21
+ - If `is_categorized_dataset=True`, the directory should contain subdirectories for
22
+ `train`, `valid`, and optionally `test`.
23
+ - Each subdirectory should contain `json` and `original_file` folders.
24
+ - `train` and `valid` are required, while `test` is optional.
25
+ is_categorized_dataset (bool): Whether to handle train, test, valid splits.
26
+
27
+ Returns:
28
+ - convert(): Returns the converted data as a Python dict or a dictionary with keys for each split.
29
+ - save_to_folder(): Saves the converted data and optionally copies original files
30
+ to the specified output directory.
31
+
32
+ Example usage:
33
+ # Dataset with splits
34
+ converter = MyCustomConverter(root_dir='/path/to/data', is_categorized_dataset=True)
35
+ converted = converter.convert() # Returns a dict with keys for `train`, `valid`, and optionally `test`
36
+ converter.save_to_folder('/my/target/output') # Writes files/folders to output location
37
+
38
+ # Dataset without splits
39
+ converter = MyCustomConverter(root_dir='/path/to/data', is_categorized_dataset=False)
40
+ converted = converter.convert() # Returns a dict or a list, depending on the implementation
41
+ converter.save_to_folder('/my/target/output') # Writes files/folders to output location
42
+ """
43
+
44
+ def __init__(self, root_dir: str, is_categorized_dataset: bool = False) -> None:
45
+ self.root_dir: str = root_dir
46
+ self.is_categorized_dataset: bool = is_categorized_dataset
47
+ self.version: str = '1.0'
48
+ self.converted_data = None
49
+
50
+ def convert(self):
51
+ """Convert DM format to a specific format.
52
+
53
+ This method should be implemented by subclasses to perform the actual conversion.
54
+ """
55
+ raise NotImplementedError
56
+
57
+ def save_to_folder(self, output_dir: str) -> None:
58
+ """Save converted data to the specified folder."""
59
+ self.ensure_dir(output_dir)
60
+ if self.converted_data is None:
61
+ # Automatically call convert() if converted_data is not set
62
+ self.converted_data = self.convert()
63
+
64
+ @staticmethod
65
+ def ensure_dir(path: str) -> None:
66
+ """Ensure that the directory exists, creating it if necessary."""
67
+ if not os.path.exists(path):
68
+ os.makedirs(path)
69
+
70
+ def _validate_required_dirs(self, dirs):
71
+ """Validate that all required directories exist.
72
+
73
+ Args:
74
+ dirs (dict): A dictionary where keys are directory names and values are their paths.
75
+
76
+ Raises:
77
+ FileNotFoundError: If any required directory does not exist.
78
+ """
79
+ for name, path in dirs.items():
80
+ if not os.path.exists(path):
81
+ raise FileNotFoundError(f'[ERROR] Required directory "{name}" does not exist at {path}')
82
+
83
+ def _validate_optional_dirs(self, dirs):
84
+ """Validate optional directories and return those that exist.
85
+
86
+ Args:
87
+ dirs (dict): A dictionary where keys are directory names and values are their paths.
88
+
89
+ Returns:
90
+ dict: A dictionary of existing optional directories.
91
+ """
92
+ existing_dirs = {}
93
+ for name, path in dirs.items():
94
+ if os.path.exists(path):
95
+ existing_dirs[name] = path
96
+ else:
97
+ print(f'[WARNING] Optional directory "{name}" does not exist. Skipping.')
98
+ return existing_dirs
99
+
100
+ def _validate_splits(self, required_splits, optional_splits=[]):
101
+ """Validate required and optional splits in the dataset.
102
+
103
+ Args:
104
+ required_splits (list): List of required split names (e.g., ['train', 'valid']).
105
+ optional_splits (list): List of optional split names (e.g., ['test']).
106
+
107
+ Returns:
108
+ dict: A dictionary with split names as keys and their corresponding directories as values.
109
+ """
110
+ splits = {}
111
+
112
+ if self.is_categorized_dataset:
113
+ # Validate required splits
114
+ required_dirs = {split: os.path.join(self.root_dir, split) for split in required_splits}
115
+ self._validate_required_dirs(required_dirs)
116
+ splits.update(required_dirs)
117
+
118
+ # Validate optional splits
119
+ optional_dirs = {split: os.path.join(self.root_dir, split) for split in optional_splits}
120
+ splits.update(self._validate_optional_dirs(optional_dirs))
121
+ else:
122
+ # Validate `json` and `original_file` folders for non-split datasets
123
+ required_dirs = {
124
+ 'json': os.path.join(self.root_dir, 'json'),
125
+ 'original_file': os.path.join(self.root_dir, 'original_file'),
126
+ }
127
+ self._validate_required_dirs(required_dirs)
128
+ splits['root'] = self.root_dir
129
+
130
+ return splits
131
+
132
+ def _set_directories(self, split=None):
133
+ """Set `self.json_dir` and `self.original_file_dir` based on the dataset split.
134
+
135
+ Args:
136
+ split (str, optional): The name of the split (e.g., 'train', 'valid', 'test').
137
+ If None, assumes no dataset split.
138
+ """
139
+ if split:
140
+ split_dir = os.path.join(self.root_dir, split)
141
+ self.json_dir = os.path.join(split_dir, 'json')
142
+ self.original_file_dir = os.path.join(split_dir, 'original_file')
143
+ else:
144
+ self.json_dir = os.path.join(self.root_dir, 'json')
145
+ self.original_file_dir = os.path.join(self.root_dir, 'original_file')
File without changes