synapse-sdk 1.0.0a11__py3-none-any.whl → 2026.1.1b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

Files changed (261) hide show
  1. synapse_sdk/__init__.py +24 -0
  2. synapse_sdk/cli/__init__.py +9 -8
  3. synapse_sdk/cli/agent/__init__.py +25 -0
  4. synapse_sdk/cli/agent/config.py +104 -0
  5. synapse_sdk/cli/agent/select.py +197 -0
  6. synapse_sdk/cli/auth.py +104 -0
  7. synapse_sdk/cli/main.py +1025 -0
  8. synapse_sdk/cli/plugin/__init__.py +58 -0
  9. synapse_sdk/cli/plugin/create.py +566 -0
  10. synapse_sdk/cli/plugin/job.py +196 -0
  11. synapse_sdk/cli/plugin/publish.py +322 -0
  12. synapse_sdk/cli/plugin/run.py +131 -0
  13. synapse_sdk/cli/plugin/test.py +200 -0
  14. synapse_sdk/clients/README.md +239 -0
  15. synapse_sdk/clients/__init__.py +5 -0
  16. synapse_sdk/clients/_template.py +266 -0
  17. synapse_sdk/clients/agent/__init__.py +84 -29
  18. synapse_sdk/clients/agent/async_ray.py +289 -0
  19. synapse_sdk/clients/agent/container.py +83 -0
  20. synapse_sdk/clients/agent/plugin.py +101 -0
  21. synapse_sdk/clients/agent/ray.py +296 -39
  22. synapse_sdk/clients/backend/__init__.py +152 -12
  23. synapse_sdk/clients/backend/annotation.py +164 -22
  24. synapse_sdk/clients/backend/core.py +101 -0
  25. synapse_sdk/clients/backend/data_collection.py +292 -0
  26. synapse_sdk/clients/backend/hitl.py +87 -0
  27. synapse_sdk/clients/backend/integration.py +374 -46
  28. synapse_sdk/clients/backend/ml.py +134 -22
  29. synapse_sdk/clients/backend/models.py +247 -0
  30. synapse_sdk/clients/base.py +538 -59
  31. synapse_sdk/clients/exceptions.py +35 -7
  32. synapse_sdk/clients/pipeline/__init__.py +5 -0
  33. synapse_sdk/clients/pipeline/client.py +636 -0
  34. synapse_sdk/clients/protocols.py +178 -0
  35. synapse_sdk/clients/utils.py +86 -8
  36. synapse_sdk/clients/validation.py +58 -0
  37. synapse_sdk/enums.py +76 -0
  38. synapse_sdk/exceptions.py +168 -0
  39. synapse_sdk/integrations/__init__.py +74 -0
  40. synapse_sdk/integrations/_base.py +119 -0
  41. synapse_sdk/integrations/_context.py +53 -0
  42. synapse_sdk/integrations/ultralytics/__init__.py +78 -0
  43. synapse_sdk/integrations/ultralytics/_callbacks.py +126 -0
  44. synapse_sdk/integrations/ultralytics/_patches.py +124 -0
  45. synapse_sdk/loggers.py +476 -95
  46. synapse_sdk/mcp/MCP.md +69 -0
  47. synapse_sdk/mcp/__init__.py +48 -0
  48. synapse_sdk/mcp/__main__.py +6 -0
  49. synapse_sdk/mcp/config.py +349 -0
  50. synapse_sdk/mcp/prompts/__init__.py +4 -0
  51. synapse_sdk/mcp/resources/__init__.py +4 -0
  52. synapse_sdk/mcp/server.py +1352 -0
  53. synapse_sdk/mcp/tools/__init__.py +6 -0
  54. synapse_sdk/plugins/__init__.py +133 -9
  55. synapse_sdk/plugins/action.py +229 -0
  56. synapse_sdk/plugins/actions/__init__.py +82 -0
  57. synapse_sdk/plugins/actions/dataset/__init__.py +37 -0
  58. synapse_sdk/plugins/actions/dataset/action.py +471 -0
  59. synapse_sdk/plugins/actions/export/__init__.py +55 -0
  60. synapse_sdk/plugins/actions/export/action.py +183 -0
  61. synapse_sdk/plugins/actions/export/context.py +59 -0
  62. synapse_sdk/plugins/actions/inference/__init__.py +84 -0
  63. synapse_sdk/plugins/actions/inference/action.py +285 -0
  64. synapse_sdk/plugins/actions/inference/context.py +81 -0
  65. synapse_sdk/plugins/actions/inference/deployment.py +322 -0
  66. synapse_sdk/plugins/actions/inference/serve.py +252 -0
  67. synapse_sdk/plugins/actions/train/__init__.py +54 -0
  68. synapse_sdk/plugins/actions/train/action.py +326 -0
  69. synapse_sdk/plugins/actions/train/context.py +57 -0
  70. synapse_sdk/plugins/actions/upload/__init__.py +49 -0
  71. synapse_sdk/plugins/actions/upload/action.py +165 -0
  72. synapse_sdk/plugins/actions/upload/context.py +61 -0
  73. synapse_sdk/plugins/config.py +98 -0
  74. synapse_sdk/plugins/context/__init__.py +109 -0
  75. synapse_sdk/plugins/context/env.py +113 -0
  76. synapse_sdk/plugins/datasets/__init__.py +113 -0
  77. synapse_sdk/plugins/datasets/converters/__init__.py +76 -0
  78. synapse_sdk/plugins/datasets/converters/base.py +347 -0
  79. synapse_sdk/plugins/datasets/converters/yolo/__init__.py +9 -0
  80. synapse_sdk/plugins/datasets/converters/yolo/from_dm.py +468 -0
  81. synapse_sdk/plugins/datasets/converters/yolo/to_dm.py +381 -0
  82. synapse_sdk/plugins/datasets/formats/__init__.py +82 -0
  83. synapse_sdk/plugins/datasets/formats/dm.py +351 -0
  84. synapse_sdk/plugins/datasets/formats/yolo.py +240 -0
  85. synapse_sdk/plugins/decorators.py +83 -0
  86. synapse_sdk/plugins/discovery.py +790 -0
  87. synapse_sdk/plugins/docs/ACTION_DEV_GUIDE.md +933 -0
  88. synapse_sdk/plugins/docs/ARCHITECTURE.md +1225 -0
  89. synapse_sdk/plugins/docs/LOGGING_SYSTEM.md +683 -0
  90. synapse_sdk/plugins/docs/OVERVIEW.md +531 -0
  91. synapse_sdk/plugins/docs/PIPELINE_GUIDE.md +145 -0
  92. synapse_sdk/plugins/docs/README.md +513 -0
  93. synapse_sdk/plugins/docs/STEP.md +656 -0
  94. synapse_sdk/plugins/enums.py +70 -10
  95. synapse_sdk/plugins/errors.py +92 -0
  96. synapse_sdk/plugins/executors/__init__.py +43 -0
  97. synapse_sdk/plugins/executors/local.py +99 -0
  98. synapse_sdk/plugins/executors/ray/__init__.py +18 -0
  99. synapse_sdk/plugins/executors/ray/base.py +282 -0
  100. synapse_sdk/plugins/executors/ray/job.py +298 -0
  101. synapse_sdk/plugins/executors/ray/jobs_api.py +511 -0
  102. synapse_sdk/plugins/executors/ray/packaging.py +137 -0
  103. synapse_sdk/plugins/executors/ray/pipeline.py +792 -0
  104. synapse_sdk/plugins/executors/ray/task.py +257 -0
  105. synapse_sdk/plugins/models/__init__.py +26 -0
  106. synapse_sdk/plugins/models/logger.py +173 -0
  107. synapse_sdk/plugins/models/pipeline.py +25 -0
  108. synapse_sdk/plugins/pipelines/__init__.py +81 -0
  109. synapse_sdk/plugins/pipelines/action_pipeline.py +417 -0
  110. synapse_sdk/plugins/pipelines/context.py +107 -0
  111. synapse_sdk/plugins/pipelines/display.py +311 -0
  112. synapse_sdk/plugins/runner.py +114 -0
  113. synapse_sdk/plugins/schemas/__init__.py +19 -0
  114. synapse_sdk/plugins/schemas/results.py +152 -0
  115. synapse_sdk/plugins/steps/__init__.py +63 -0
  116. synapse_sdk/plugins/steps/base.py +128 -0
  117. synapse_sdk/plugins/steps/context.py +90 -0
  118. synapse_sdk/plugins/steps/orchestrator.py +128 -0
  119. synapse_sdk/plugins/steps/registry.py +103 -0
  120. synapse_sdk/plugins/steps/utils/__init__.py +20 -0
  121. synapse_sdk/plugins/steps/utils/logging.py +85 -0
  122. synapse_sdk/plugins/steps/utils/timing.py +71 -0
  123. synapse_sdk/plugins/steps/utils/validation.py +68 -0
  124. synapse_sdk/plugins/templates/__init__.py +50 -0
  125. synapse_sdk/plugins/templates/base/.gitignore.j2 +26 -0
  126. synapse_sdk/plugins/templates/base/.synapseignore.j2 +11 -0
  127. synapse_sdk/plugins/templates/base/README.md.j2 +26 -0
  128. synapse_sdk/plugins/templates/base/plugin/__init__.py.j2 +1 -0
  129. synapse_sdk/plugins/templates/base/pyproject.toml.j2 +14 -0
  130. synapse_sdk/plugins/templates/base/requirements.txt.j2 +1 -0
  131. synapse_sdk/plugins/templates/custom/plugin/main.py.j2 +18 -0
  132. synapse_sdk/plugins/templates/data_validation/plugin/validate.py.j2 +32 -0
  133. synapse_sdk/plugins/templates/export/plugin/export.py.j2 +36 -0
  134. synapse_sdk/plugins/templates/neural_net/plugin/inference.py.j2 +36 -0
  135. synapse_sdk/plugins/templates/neural_net/plugin/train.py.j2 +33 -0
  136. synapse_sdk/plugins/templates/post_annotation/plugin/post_annotate.py.j2 +32 -0
  137. synapse_sdk/plugins/templates/pre_annotation/plugin/pre_annotate.py.j2 +32 -0
  138. synapse_sdk/plugins/templates/smart_tool/plugin/auto_label.py.j2 +44 -0
  139. synapse_sdk/plugins/templates/upload/plugin/upload.py.j2 +35 -0
  140. synapse_sdk/plugins/testing/__init__.py +25 -0
  141. synapse_sdk/plugins/testing/sample_actions.py +98 -0
  142. synapse_sdk/plugins/types.py +206 -0
  143. synapse_sdk/plugins/upload.py +595 -64
  144. synapse_sdk/plugins/utils.py +325 -37
  145. synapse_sdk/shared/__init__.py +25 -0
  146. synapse_sdk/utils/__init__.py +1 -0
  147. synapse_sdk/utils/auth.py +74 -0
  148. synapse_sdk/utils/file/__init__.py +58 -0
  149. synapse_sdk/utils/file/archive.py +449 -0
  150. synapse_sdk/utils/file/checksum.py +167 -0
  151. synapse_sdk/utils/file/download.py +286 -0
  152. synapse_sdk/utils/file/io.py +129 -0
  153. synapse_sdk/utils/file/requirements.py +36 -0
  154. synapse_sdk/utils/network.py +168 -0
  155. synapse_sdk/utils/storage/__init__.py +238 -0
  156. synapse_sdk/utils/storage/config.py +188 -0
  157. synapse_sdk/utils/storage/errors.py +52 -0
  158. synapse_sdk/utils/storage/providers/__init__.py +13 -0
  159. synapse_sdk/utils/storage/providers/base.py +76 -0
  160. synapse_sdk/utils/storage/providers/gcs.py +168 -0
  161. synapse_sdk/utils/storage/providers/http.py +250 -0
  162. synapse_sdk/utils/storage/providers/local.py +126 -0
  163. synapse_sdk/utils/storage/providers/s3.py +177 -0
  164. synapse_sdk/utils/storage/providers/sftp.py +208 -0
  165. synapse_sdk/utils/storage/registry.py +125 -0
  166. synapse_sdk/utils/websocket.py +99 -0
  167. synapse_sdk-2026.1.1b2.dist-info/METADATA +715 -0
  168. synapse_sdk-2026.1.1b2.dist-info/RECORD +172 -0
  169. {synapse_sdk-1.0.0a11.dist-info → synapse_sdk-2026.1.1b2.dist-info}/WHEEL +1 -1
  170. synapse_sdk-2026.1.1b2.dist-info/licenses/LICENSE +201 -0
  171. locale/en/LC_MESSAGES/messages.mo +0 -0
  172. locale/en/LC_MESSAGES/messages.po +0 -39
  173. locale/ko/LC_MESSAGES/messages.mo +0 -0
  174. locale/ko/LC_MESSAGES/messages.po +0 -34
  175. synapse_sdk/cli/create_plugin.py +0 -10
  176. synapse_sdk/clients/agent/core.py +0 -7
  177. synapse_sdk/clients/agent/service.py +0 -15
  178. synapse_sdk/clients/backend/dataset.py +0 -51
  179. synapse_sdk/clients/ray/__init__.py +0 -6
  180. synapse_sdk/clients/ray/core.py +0 -22
  181. synapse_sdk/clients/ray/serve.py +0 -20
  182. synapse_sdk/i18n.py +0 -35
  183. synapse_sdk/plugins/categories/__init__.py +0 -0
  184. synapse_sdk/plugins/categories/base.py +0 -235
  185. synapse_sdk/plugins/categories/data_validation/__init__.py +0 -0
  186. synapse_sdk/plugins/categories/data_validation/actions/__init__.py +0 -0
  187. synapse_sdk/plugins/categories/data_validation/actions/validation.py +0 -10
  188. synapse_sdk/plugins/categories/data_validation/templates/config.yaml +0 -3
  189. synapse_sdk/plugins/categories/data_validation/templates/plugin/__init__.py +0 -0
  190. synapse_sdk/plugins/categories/data_validation/templates/plugin/validation.py +0 -5
  191. synapse_sdk/plugins/categories/decorators.py +0 -13
  192. synapse_sdk/plugins/categories/export/__init__.py +0 -0
  193. synapse_sdk/plugins/categories/export/actions/__init__.py +0 -0
  194. synapse_sdk/plugins/categories/export/actions/export.py +0 -10
  195. synapse_sdk/plugins/categories/import/__init__.py +0 -0
  196. synapse_sdk/plugins/categories/import/actions/__init__.py +0 -0
  197. synapse_sdk/plugins/categories/import/actions/import.py +0 -10
  198. synapse_sdk/plugins/categories/neural_net/__init__.py +0 -0
  199. synapse_sdk/plugins/categories/neural_net/actions/__init__.py +0 -0
  200. synapse_sdk/plugins/categories/neural_net/actions/deployment.py +0 -45
  201. synapse_sdk/plugins/categories/neural_net/actions/inference.py +0 -18
  202. synapse_sdk/plugins/categories/neural_net/actions/test.py +0 -10
  203. synapse_sdk/plugins/categories/neural_net/actions/train.py +0 -143
  204. synapse_sdk/plugins/categories/neural_net/templates/config.yaml +0 -12
  205. synapse_sdk/plugins/categories/neural_net/templates/plugin/__init__.py +0 -0
  206. synapse_sdk/plugins/categories/neural_net/templates/plugin/inference.py +0 -4
  207. synapse_sdk/plugins/categories/neural_net/templates/plugin/test.py +0 -2
  208. synapse_sdk/plugins/categories/neural_net/templates/plugin/train.py +0 -14
  209. synapse_sdk/plugins/categories/post_annotation/__init__.py +0 -0
  210. synapse_sdk/plugins/categories/post_annotation/actions/__init__.py +0 -0
  211. synapse_sdk/plugins/categories/post_annotation/actions/post_annotation.py +0 -10
  212. synapse_sdk/plugins/categories/post_annotation/templates/config.yaml +0 -3
  213. synapse_sdk/plugins/categories/post_annotation/templates/plugin/__init__.py +0 -0
  214. synapse_sdk/plugins/categories/post_annotation/templates/plugin/post_annotation.py +0 -3
  215. synapse_sdk/plugins/categories/pre_annotation/__init__.py +0 -0
  216. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +0 -0
  217. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation.py +0 -10
  218. synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml +0 -3
  219. synapse_sdk/plugins/categories/pre_annotation/templates/plugin/__init__.py +0 -0
  220. synapse_sdk/plugins/categories/pre_annotation/templates/plugin/pre_annotation.py +0 -3
  221. synapse_sdk/plugins/categories/registry.py +0 -16
  222. synapse_sdk/plugins/categories/smart_tool/__init__.py +0 -0
  223. synapse_sdk/plugins/categories/smart_tool/actions/__init__.py +0 -0
  224. synapse_sdk/plugins/categories/smart_tool/actions/auto_label.py +0 -37
  225. synapse_sdk/plugins/categories/smart_tool/templates/config.yaml +0 -7
  226. synapse_sdk/plugins/categories/smart_tool/templates/plugin/__init__.py +0 -0
  227. synapse_sdk/plugins/categories/smart_tool/templates/plugin/auto_label.py +0 -11
  228. synapse_sdk/plugins/categories/templates.py +0 -32
  229. synapse_sdk/plugins/cli/__init__.py +0 -21
  230. synapse_sdk/plugins/cli/publish.py +0 -37
  231. synapse_sdk/plugins/cli/run.py +0 -67
  232. synapse_sdk/plugins/exceptions.py +0 -22
  233. synapse_sdk/plugins/models.py +0 -121
  234. synapse_sdk/plugins/templates/cookiecutter.json +0 -11
  235. synapse_sdk/plugins/templates/hooks/post_gen_project.py +0 -3
  236. synapse_sdk/plugins/templates/hooks/pre_prompt.py +0 -21
  237. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env +0 -24
  238. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env.dist +0 -24
  239. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.gitignore +0 -27
  240. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.pre-commit-config.yaml +0 -7
  241. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/README.md +0 -5
  242. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/config.yaml +0 -6
  243. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/main.py +0 -4
  244. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/plugin/__init__.py +0 -0
  245. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/pyproject.toml +0 -13
  246. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/requirements.txt +0 -1
  247. synapse_sdk/shared/enums.py +0 -8
  248. synapse_sdk/utils/debug.py +0 -5
  249. synapse_sdk/utils/file.py +0 -87
  250. synapse_sdk/utils/module_loading.py +0 -29
  251. synapse_sdk/utils/pydantic/__init__.py +0 -0
  252. synapse_sdk/utils/pydantic/config.py +0 -4
  253. synapse_sdk/utils/pydantic/errors.py +0 -33
  254. synapse_sdk/utils/pydantic/validators.py +0 -7
  255. synapse_sdk/utils/storage.py +0 -91
  256. synapse_sdk/utils/string.py +0 -11
  257. synapse_sdk-1.0.0a11.dist-info/LICENSE +0 -21
  258. synapse_sdk-1.0.0a11.dist-info/METADATA +0 -43
  259. synapse_sdk-1.0.0a11.dist-info/RECORD +0 -111
  260. {synapse_sdk-1.0.0a11.dist-info → synapse_sdk-2026.1.1b2.dist-info}/entry_points.txt +0 -0
  261. {synapse_sdk-1.0.0a11.dist-info → synapse_sdk-2026.1.1b2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,471 @@
1
+ """Dataset action with download and convert operations.
2
+
3
+ A single action class that handles both dataset download and format conversion,
4
+ selected via the operation parameter. Designed for pipeline composition.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import shutil
11
+ from concurrent.futures import ThreadPoolExecutor, as_completed
12
+ from enum import StrEnum
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING, Any
15
+
16
+ from pydantic import BaseModel, Field
17
+
18
+ from synapse_sdk.plugins.action import BaseAction
19
+ from synapse_sdk.plugins.datasets.converters import DatasetFormat, get_converter
20
+ from synapse_sdk.plugins.datasets.formats.dm import DMVersion
21
+ from synapse_sdk.plugins.enums import PluginCategory
22
+ from synapse_sdk.plugins.types import YOLODataset
23
+
24
+ if TYPE_CHECKING:
25
+ from synapse_sdk.clients.backend import BackendClient
26
+
27
+
28
+ class DatasetOperation(StrEnum):
29
+ """Dataset operation types."""
30
+
31
+ DOWNLOAD = 'download'
32
+ CONVERT = 'convert'
33
+
34
+
35
+ class DatasetParams(BaseModel):
36
+ """Parameters for DatasetAction.
37
+
38
+ The operation field determines which operation to perform:
39
+ - download: Downloads dataset from backend
40
+ - convert: Converts dataset from one format to another
41
+
42
+ Attributes:
43
+ operation: Which operation to perform.
44
+ dataset_id: Data collection ID (for download).
45
+ splits: Split definitions for categorized download.
46
+ path: Source dataset path (for convert, or set by download).
47
+ source_format: Source format (for convert).
48
+ target_format: Target format (for convert).
49
+ dm_version: Datamaker version (for convert from DM).
50
+ output_dir: Output directory (optional for both).
51
+ is_categorized: Whether dataset has train/valid/test splits.
52
+ """
53
+
54
+ operation: DatasetOperation = DatasetOperation.DOWNLOAD
55
+
56
+ # Download params
57
+ dataset_id: int | None = Field(default=None, description='Data collection ID')
58
+ splits: dict[str, dict[str, Any]] | None = Field(
59
+ default=None,
60
+ description='Split definitions: {"train": {...filters}, "valid": {...}}',
61
+ )
62
+
63
+ # Convert params
64
+ path: Path | str | None = Field(default=None, description='Dataset path')
65
+ source_format: str = Field(default='dm_v2', description='Source format')
66
+ target_format: str = Field(default='yolo', description='Target format')
67
+ dm_version: str = Field(default='v2', description='Datamaker version')
68
+
69
+ # Shared params
70
+ output_dir: Path | str | None = Field(default=None, description='Output directory')
71
+ is_categorized: bool = Field(default=False, description='Has splits')
72
+
73
+
74
+ class DatasetResult(BaseModel):
75
+ """Result from DatasetAction.
76
+
77
+ Contains paths and metadata about the processed dataset.
78
+
79
+ Attributes:
80
+ path: Path to dataset directory.
81
+ format: Dataset format (e.g., 'dm_v2', 'yolo').
82
+ is_categorized: Whether dataset has splits.
83
+ config_path: Path to config file (e.g., dataset.yaml for YOLO).
84
+ count: Number of items processed.
85
+ source_path: Original source path (for convert).
86
+ data_path: Computed property returning config_path if set, otherwise path.
87
+ """
88
+
89
+ path: Path
90
+ format: str
91
+ is_categorized: bool = False
92
+ config_path: Path | None = None
93
+ count: int | None = None
94
+ source_path: Path | None = None
95
+
96
+ @property
97
+ def data_path(self) -> Path:
98
+ """Returns config_path if set, otherwise path.
99
+
100
+ Use this for downstream actions that need a single path
101
+ to the dataset (e.g., training with YOLO format).
102
+ """
103
+ return self.config_path if self.config_path is not None else self.path
104
+
105
+ class Config:
106
+ arbitrary_types_allowed = True
107
+
108
+
109
+ class DatasetAction(BaseAction[DatasetParams]):
110
+ """Dataset action with download and convert operations.
111
+
112
+ A unified action for dataset operations that can be used in pipelines.
113
+ The operation is determined by the params.operation field.
114
+
115
+ Type declarations:
116
+ - input_type: None (accepts initial params)
117
+ - output_type: Dynamic based on operation and target_format
118
+ - download: 'dm_dataset'
119
+ - convert to yolo: 'yolo_dataset'
120
+ - convert to coco: 'coco_dataset'
121
+
122
+ For download:
123
+ - Requires: dataset_id
124
+ - Optional: splits, output_dir
125
+ - Returns: path, format='dm_v2', is_categorized, count
126
+
127
+ For convert:
128
+ - Requires: path, target_format
129
+ - Optional: source_format, dm_version, output_dir
130
+ - Returns: path, format, config_path, source_path
131
+
132
+ Example:
133
+ >>> # Standalone usage
134
+ >>> action = DatasetAction(
135
+ ... DatasetParams(operation='download', dataset_id=123),
136
+ ... ctx,
137
+ ... )
138
+ >>> result = action.execute()
139
+ >>>
140
+ >>> # Pipeline usage
141
+ >>> pipeline = ActionPipeline([DatasetAction, DatasetAction, TrainAction])
142
+ >>> result = pipeline.execute({
143
+ ... 'operation': 'download',
144
+ ... 'dataset_id': 123,
145
+ ... 'target_format': 'yolo', # Used by second DatasetAction
146
+ ... }, ctx)
147
+ """
148
+
149
+ category = PluginCategory.NEURAL_NET
150
+
151
+ # Input type is flexible (accepts various initial params)
152
+ input_type = None
153
+ # Output type: use YOLODataset for convert (most common), DMv2Dataset for download
154
+ # For precise typing, use separate DownloadAction/ConvertAction classes
155
+ output_type = YOLODataset # Default assumes convert to YOLO
156
+
157
+ result_model = DatasetResult
158
+
159
+ @property
160
+ def client(self) -> BackendClient:
161
+ """Backend client from context."""
162
+ if self.ctx.client is None:
163
+ raise RuntimeError('No backend client in context')
164
+ return self.ctx.client
165
+
166
+ def execute(self) -> DatasetResult:
167
+ """Execute the dataset operation based on params.operation."""
168
+ if self.params.operation == DatasetOperation.DOWNLOAD:
169
+ return self.download()
170
+ elif self.params.operation == DatasetOperation.CONVERT:
171
+ return self.convert()
172
+ else:
173
+ raise ValueError(f'Unknown operation: {self.params.operation}')
174
+
175
+ def download(self) -> DatasetResult:
176
+ """Download dataset from backend.
177
+
178
+ Downloads data units from a data collection and saves them
179
+ locally in Datamaker format (json/ + original_files/).
180
+
181
+ Returns:
182
+ DatasetResult with path, format, count.
183
+
184
+ Raises:
185
+ ValueError: If dataset_id not provided.
186
+ """
187
+ from synapse_sdk.utils.file import get_temp_path
188
+
189
+ if self.params.dataset_id is None:
190
+ raise ValueError('dataset_id is required for download operation')
191
+
192
+ dataset_id = self.params.dataset_id
193
+ splits = self.params.splits
194
+ is_categorized = splits is not None and len(splits) > 0
195
+
196
+ # Determine output directory
197
+ output_dir = Path(self.params.output_dir) if self.params.output_dir else get_temp_path(f'datasets/{dataset_id}')
198
+ output_dir = Path(output_dir)
199
+
200
+ # Report initial progress
201
+ self.set_progress(0, 100, 'init')
202
+
203
+ # Get collection info
204
+ collection = self.client.get_data_collection(dataset_id)
205
+ self.log(
206
+ 'download_start',
207
+ {
208
+ 'dataset_id': dataset_id,
209
+ 'collection_name': collection.get('name'),
210
+ 'is_categorized': is_categorized,
211
+ },
212
+ )
213
+
214
+ # Report collection fetched
215
+ self.set_progress(1, 100, 'init')
216
+
217
+ total_downloaded = 0
218
+
219
+ if is_categorized and splits:
220
+ # Download each split separately
221
+ for split_name, filters in splits.items():
222
+ split_dir = output_dir / split_name
223
+ count = self._download_split(
224
+ dataset_id=dataset_id,
225
+ output_dir=split_dir,
226
+ filters=filters or {},
227
+ )
228
+ total_downloaded += count
229
+ self.log(
230
+ 'split_downloaded',
231
+ {
232
+ 'split': split_name,
233
+ 'count': count,
234
+ },
235
+ )
236
+ else:
237
+ # Download all data units
238
+ total_downloaded = self._download_split(
239
+ dataset_id=dataset_id,
240
+ output_dir=output_dir,
241
+ filters={},
242
+ )
243
+
244
+ self.log(
245
+ 'download_complete',
246
+ {
247
+ 'path': str(output_dir),
248
+ 'total_units': total_downloaded,
249
+ },
250
+ )
251
+
252
+ return DatasetResult(
253
+ path=output_dir,
254
+ format='dm_v2',
255
+ is_categorized=is_categorized,
256
+ count=total_downloaded,
257
+ )
258
+
259
+ def _download_split(
260
+ self,
261
+ dataset_id: int,
262
+ output_dir: Path,
263
+ filters: dict[str, Any],
264
+ max_workers: int = 10,
265
+ ) -> int:
266
+ """Download a single split of the dataset."""
267
+ # Create output directories
268
+ json_dir = output_dir / 'json'
269
+ files_dir = output_dir / 'original_files'
270
+ json_dir.mkdir(parents=True, exist_ok=True)
271
+ files_dir.mkdir(parents=True, exist_ok=True)
272
+
273
+ # Report fetching data units
274
+ self.set_progress(2, 100, 'fetch')
275
+
276
+ # List data units
277
+ params = {'data_collection': dataset_id, **filters}
278
+ data_units_gen, total_count = self.client.list_data_units(
279
+ params=params,
280
+ list_all=True,
281
+ )
282
+
283
+ # Report data units fetched
284
+ self.set_progress(5, 100, 'fetch')
285
+ self.log('data_units_listed', {'total_count': total_count})
286
+
287
+ downloaded = 0
288
+
289
+ def download_unit(unit: dict) -> bool:
290
+ """Download a single data unit."""
291
+ try:
292
+ unit_id = unit.get('id') or unit.get('data_unit_id')
293
+ files = unit.get('files', {})
294
+
295
+ # Build DM v2 JSON structure
296
+ dm_json = self._build_dm_json(unit)
297
+
298
+ # Determine base name from first file or unit ID
299
+ base_name = None
300
+ for file_info in files.values():
301
+ if isinstance(file_info, dict):
302
+ file_path = file_info.get('path') or file_info.get('url', '')
303
+ else:
304
+ file_path = str(file_info)
305
+ if file_path:
306
+ base_name = Path(file_path).stem
307
+ break
308
+
309
+ if not base_name:
310
+ base_name = str(unit_id)
311
+
312
+ # Save JSON
313
+ json_path = json_dir / f'{base_name}.json'
314
+ json_path.write_text(json.dumps(dm_json, indent=2, ensure_ascii=False))
315
+
316
+ # Copy/download files
317
+ for file_name, file_info in files.items():
318
+ if isinstance(file_info, dict):
319
+ file_path = file_info.get('path')
320
+ else:
321
+ file_path = str(file_info)
322
+
323
+ if file_path and Path(file_path).exists():
324
+ dest = files_dir / Path(file_path).name
325
+ if not dest.exists():
326
+ shutil.copy(file_path, dest)
327
+
328
+ return True
329
+ except Exception as e:
330
+ self.log('download_unit_error', {'unit_id': unit_id, 'error': str(e)})
331
+ return False
332
+
333
+ # Process units with thread pool
334
+ # Note: data_units_gen is a lazy generator that fetches pages from API
335
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
336
+ futures = []
337
+ units_fetched = 0
338
+
339
+ # Submit downloads as we iterate (reports fetch progress)
340
+ for unit in data_units_gen:
341
+ futures.append(executor.submit(download_unit, unit))
342
+ units_fetched += 1
343
+ # Report fetch progress (5-50% range)
344
+ fetch_progress = 5 + (units_fetched / total_count) * 45
345
+ if units_fetched % 10 == 0 or units_fetched == total_count:
346
+ self.set_progress(int(fetch_progress), 100, 'fetch')
347
+
348
+ # Process completed downloads (50-100% range)
349
+ for i, future in enumerate(as_completed(futures)):
350
+ if future.result():
351
+ downloaded += 1
352
+ # Report download progress (50-100% range)
353
+ download_progress = 50 + ((i + 1) / total_count) * 50
354
+ self.set_progress(int(download_progress), 100, 'download')
355
+
356
+ return downloaded
357
+
358
+ def _build_dm_json(self, unit: dict) -> dict[str, Any]:
359
+ """Build Datamaker v2 JSON from a data unit."""
360
+ annotations = unit.get('annotations', {})
361
+
362
+ dm_image: dict[str, list] = {
363
+ 'bounding_box': [],
364
+ 'polygon': [],
365
+ 'polyline': [],
366
+ 'keypoint': [],
367
+ 'relation': [],
368
+ 'group': [],
369
+ }
370
+
371
+ if isinstance(annotations, dict):
372
+ for key in dm_image.keys():
373
+ if key in annotations:
374
+ dm_image[key] = annotations[key]
375
+
376
+ # Build classification map from annotations
377
+ classifications: dict[str, set[str]] = {}
378
+ for ann_type, anns in dm_image.items():
379
+ if anns:
380
+ classifications[ann_type] = set()
381
+ for ann in anns:
382
+ if 'classification' in ann:
383
+ classifications[ann_type].add(ann['classification'])
384
+
385
+ return {
386
+ 'classification': {k: sorted(v) for k, v in classifications.items() if v},
387
+ 'images': [dm_image],
388
+ }
389
+
390
+ def convert(self) -> DatasetResult:
391
+ """Convert dataset from one format to another.
392
+
393
+ Converts the dataset at params.path to params.target_format.
394
+
395
+ Returns:
396
+ DatasetResult with converted path, format, config_path.
397
+
398
+ Raises:
399
+ ValueError: If path not provided.
400
+ """
401
+ if self.params.path is None:
402
+ raise ValueError('path is required for convert operation')
403
+
404
+ source_path = Path(self.params.path)
405
+ if not source_path.exists():
406
+ raise FileNotFoundError(f'Dataset path does not exist: {source_path}')
407
+
408
+ # Parse formats
409
+ target_format = DatasetFormat(self.params.target_format)
410
+ dm_version = DMVersion.V1 if self.params.dm_version == 'v1' else DMVersion.V2
411
+
412
+ # Determine source format
413
+ source_format_str = self.params.source_format
414
+ if source_format_str in ('dm_v1', 'dm_v2', 'dm'):
415
+ src_format = DatasetFormat.DM_V1 if dm_version == DMVersion.V1 else DatasetFormat.DM_V2
416
+ else:
417
+ src_format = DatasetFormat(source_format_str)
418
+
419
+ # Determine output directory
420
+ if self.params.output_dir:
421
+ output_dir = Path(self.params.output_dir)
422
+ else:
423
+ output_dir = source_path.parent / f'{source_path.name}_{target_format.value}'
424
+
425
+ self.log(
426
+ 'convert_start',
427
+ {
428
+ 'source_path': str(source_path),
429
+ 'source_format': src_format.value,
430
+ 'target_format': target_format.value,
431
+ 'is_categorized': self.params.is_categorized,
432
+ },
433
+ )
434
+
435
+ # Get converter and run conversion
436
+ converter = get_converter(
437
+ source=src_format,
438
+ target=target_format,
439
+ root_dir=source_path,
440
+ is_categorized=self.params.is_categorized,
441
+ dm_version=dm_version,
442
+ )
443
+
444
+ converter.convert()
445
+ converter.save_to_folder(output_dir)
446
+
447
+ # Determine config path
448
+ config_path = None
449
+ if target_format == DatasetFormat.YOLO:
450
+ config_path = output_dir / 'dataset.yaml'
451
+ if not config_path.exists():
452
+ config_path = None
453
+
454
+ self.log(
455
+ 'convert_complete',
456
+ {
457
+ 'output_path': str(output_dir),
458
+ 'config_path': str(config_path) if config_path else None,
459
+ },
460
+ )
461
+
462
+ return DatasetResult(
463
+ path=output_dir,
464
+ format=target_format.value,
465
+ is_categorized=self.params.is_categorized,
466
+ config_path=config_path,
467
+ source_path=source_path,
468
+ )
469
+
470
+
471
+ __all__ = ['DatasetAction', 'DatasetOperation', 'DatasetParams', 'DatasetResult']
@@ -0,0 +1,55 @@
1
+ """Export action module with optional workflow step support.
2
+
3
+ Provides the export action base class:
4
+ - BaseExportAction: Base class for export workflows
5
+ - ExportContext: Export-specific context extending BaseStepContext
6
+ - ExportProgressCategories: Standard progress category names
7
+
8
+ For step infrastructure (BaseStep, StepRegistry, Orchestrator),
9
+ use the steps module:
10
+ from synapse_sdk.plugins.steps import BaseStep, StepRegistry
11
+
12
+ Example (simple execute):
13
+ >>> class MyExportAction(BaseExportAction[MyParams]):
14
+ ... def get_filtered_results(self, filters: dict) -> tuple[Any, int]:
15
+ ... return self.client.get_assignments(filters)
16
+ ...
17
+ ... def execute(self) -> dict[str, Any]:
18
+ ... results, count = self.get_filtered_results(self.params.filter)
19
+ ... # ... export items ...
20
+ ... return {'exported': count}
21
+
22
+ Example (step-based):
23
+ >>> from synapse_sdk.plugins.steps import BaseStep, StepResult
24
+ >>>
25
+ >>> class FetchResultsStep(BaseStep[ExportContext]):
26
+ ... @property
27
+ ... def name(self) -> str:
28
+ ... return 'fetch_results'
29
+ ...
30
+ ... @property
31
+ ... def progress_weight(self) -> float:
32
+ ... return 0.2
33
+ ...
34
+ ... def execute(self, context: ExportContext) -> StepResult:
35
+ ... context.results, context.total_count = fetch_data(context.params)
36
+ ... return StepResult(success=True)
37
+ >>>
38
+ >>> class MyExportAction(BaseExportAction[MyParams]):
39
+ ... def setup_steps(self, registry) -> None:
40
+ ... registry.register(FetchResultsStep())
41
+ ... registry.register(ProcessStep())
42
+ ... registry.register(FinalizeStep())
43
+ """
44
+
45
+ from synapse_sdk.plugins.actions.export.action import (
46
+ BaseExportAction,
47
+ ExportProgressCategories,
48
+ )
49
+ from synapse_sdk.plugins.actions.export.context import ExportContext
50
+
51
+ __all__ = [
52
+ 'BaseExportAction',
53
+ 'ExportContext',
54
+ 'ExportProgressCategories',
55
+ ]