synapse-sdk 1.0.0a23__py3-none-any.whl → 2025.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. synapse_sdk/__init__.py +24 -0
  2. synapse_sdk/cli/__init__.py +310 -5
  3. synapse_sdk/cli/alias/__init__.py +22 -0
  4. synapse_sdk/cli/alias/create.py +36 -0
  5. synapse_sdk/cli/alias/dataclass.py +31 -0
  6. synapse_sdk/cli/alias/default.py +16 -0
  7. synapse_sdk/cli/alias/delete.py +15 -0
  8. synapse_sdk/cli/alias/list.py +19 -0
  9. synapse_sdk/cli/alias/read.py +15 -0
  10. synapse_sdk/cli/alias/update.py +17 -0
  11. synapse_sdk/cli/alias/utils.py +61 -0
  12. synapse_sdk/cli/code_server.py +687 -0
  13. synapse_sdk/cli/config.py +440 -0
  14. synapse_sdk/cli/devtools.py +90 -0
  15. synapse_sdk/cli/plugin/__init__.py +33 -0
  16. synapse_sdk/cli/{create_plugin.py → plugin/create.py} +2 -2
  17. synapse_sdk/{plugins/cli → cli/plugin}/publish.py +23 -15
  18. synapse_sdk/clients/agent/__init__.py +9 -3
  19. synapse_sdk/clients/agent/container.py +143 -0
  20. synapse_sdk/clients/agent/core.py +19 -0
  21. synapse_sdk/clients/agent/ray.py +298 -9
  22. synapse_sdk/clients/backend/__init__.py +30 -12
  23. synapse_sdk/clients/backend/annotation.py +13 -5
  24. synapse_sdk/clients/backend/core.py +31 -4
  25. synapse_sdk/clients/backend/data_collection.py +186 -0
  26. synapse_sdk/clients/backend/hitl.py +17 -0
  27. synapse_sdk/clients/backend/integration.py +16 -1
  28. synapse_sdk/clients/backend/ml.py +5 -1
  29. synapse_sdk/clients/backend/models.py +78 -0
  30. synapse_sdk/clients/base.py +384 -41
  31. synapse_sdk/clients/ray/serve.py +2 -0
  32. synapse_sdk/clients/validators/collections.py +31 -0
  33. synapse_sdk/devtools/config.py +94 -0
  34. synapse_sdk/devtools/server.py +41 -0
  35. synapse_sdk/devtools/streamlit_app/__init__.py +5 -0
  36. synapse_sdk/devtools/streamlit_app/app.py +128 -0
  37. synapse_sdk/devtools/streamlit_app/services/__init__.py +11 -0
  38. synapse_sdk/devtools/streamlit_app/services/job_service.py +233 -0
  39. synapse_sdk/devtools/streamlit_app/services/plugin_service.py +236 -0
  40. synapse_sdk/devtools/streamlit_app/services/serve_service.py +95 -0
  41. synapse_sdk/devtools/streamlit_app/ui/__init__.py +15 -0
  42. synapse_sdk/devtools/streamlit_app/ui/config_tab.py +76 -0
  43. synapse_sdk/devtools/streamlit_app/ui/deployment_tab.py +66 -0
  44. synapse_sdk/devtools/streamlit_app/ui/http_tab.py +125 -0
  45. synapse_sdk/devtools/streamlit_app/ui/jobs_tab.py +573 -0
  46. synapse_sdk/devtools/streamlit_app/ui/serve_tab.py +346 -0
  47. synapse_sdk/devtools/streamlit_app/ui/status_bar.py +118 -0
  48. synapse_sdk/devtools/streamlit_app/utils/__init__.py +40 -0
  49. synapse_sdk/devtools/streamlit_app/utils/json_viewer.py +197 -0
  50. synapse_sdk/devtools/streamlit_app/utils/log_formatter.py +38 -0
  51. synapse_sdk/devtools/streamlit_app/utils/styles.py +241 -0
  52. synapse_sdk/devtools/streamlit_app/utils/ui_components.py +289 -0
  53. synapse_sdk/devtools/streamlit_app.py +10 -0
  54. synapse_sdk/loggers.py +120 -9
  55. synapse_sdk/plugins/README.md +1340 -0
  56. synapse_sdk/plugins/__init__.py +0 -13
  57. synapse_sdk/plugins/categories/base.py +117 -11
  58. synapse_sdk/plugins/categories/data_validation/actions/validation.py +72 -0
  59. synapse_sdk/plugins/categories/data_validation/templates/plugin/validation.py +33 -5
  60. synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
  61. synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
  62. synapse_sdk/plugins/categories/export/actions/export/action.py +165 -0
  63. synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
  64. synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
  65. synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
  66. synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
  67. synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
  68. synapse_sdk/plugins/categories/export/templates/config.yaml +21 -0
  69. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +390 -0
  70. synapse_sdk/plugins/categories/export/templates/plugin/export.py +160 -0
  71. synapse_sdk/plugins/categories/neural_net/actions/deployment.py +13 -12
  72. synapse_sdk/plugins/categories/neural_net/actions/train.py +1134 -31
  73. synapse_sdk/plugins/categories/neural_net/actions/tune.py +534 -0
  74. synapse_sdk/plugins/categories/neural_net/base/inference.py +1 -1
  75. synapse_sdk/plugins/categories/neural_net/templates/config.yaml +32 -4
  76. synapse_sdk/plugins/categories/neural_net/templates/plugin/inference.py +26 -10
  77. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  78. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  79. synapse_sdk/plugins/categories/{export/actions/export.py → pre_annotation/actions/pre_annotation/action.py} +4 -4
  80. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  81. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +148 -0
  82. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  83. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  84. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  85. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +100 -0
  86. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +248 -0
  87. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  88. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  89. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +265 -0
  90. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  91. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  92. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +92 -0
  93. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +243 -0
  94. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  95. synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml +19 -0
  96. synapse_sdk/plugins/categories/pre_annotation/templates/plugin/to_task.py +40 -0
  97. synapse_sdk/plugins/categories/smart_tool/templates/config.yaml +2 -0
  98. synapse_sdk/plugins/categories/upload/__init__.py +0 -0
  99. synapse_sdk/plugins/categories/upload/actions/__init__.py +0 -0
  100. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +19 -0
  101. synapse_sdk/plugins/categories/upload/actions/upload/action.py +236 -0
  102. synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
  103. synapse_sdk/plugins/categories/upload/actions/upload/enums.py +493 -0
  104. synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
  105. synapse_sdk/plugins/categories/upload/actions/upload/factory.py +138 -0
  106. synapse_sdk/plugins/categories/upload/actions/upload/models.py +214 -0
  107. synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +183 -0
  108. synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
  109. synapse_sdk/plugins/categories/upload/actions/upload/run.py +179 -0
  110. synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
  111. synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +107 -0
  112. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
  113. synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +63 -0
  114. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +91 -0
  115. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +82 -0
  116. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +235 -0
  117. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +201 -0
  118. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +104 -0
  119. synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +71 -0
  120. synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
  121. synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +82 -0
  122. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
  123. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
  124. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +29 -0
  125. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
  126. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +300 -0
  127. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +287 -0
  128. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
  129. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
  130. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
  131. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
  132. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +84 -0
  133. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
  134. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +60 -0
  135. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +250 -0
  136. synapse_sdk/plugins/categories/upload/templates/README.md +470 -0
  137. synapse_sdk/plugins/categories/upload/templates/config.yaml +33 -0
  138. synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +310 -0
  139. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +102 -0
  140. synapse_sdk/plugins/enums.py +3 -1
  141. synapse_sdk/plugins/models.py +148 -11
  142. synapse_sdk/plugins/templates/plugin-config-schema.json +406 -0
  143. synapse_sdk/plugins/templates/schema.json +491 -0
  144. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/config.yaml +1 -0
  145. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/requirements.txt +1 -1
  146. synapse_sdk/plugins/utils/__init__.py +46 -0
  147. synapse_sdk/plugins/utils/actions.py +119 -0
  148. synapse_sdk/plugins/utils/config.py +203 -0
  149. synapse_sdk/plugins/{utils.py → utils/legacy.py} +26 -46
  150. synapse_sdk/plugins/utils/ray_gcs.py +66 -0
  151. synapse_sdk/plugins/utils/registry.py +58 -0
  152. synapse_sdk/shared/__init__.py +25 -0
  153. synapse_sdk/shared/enums.py +93 -0
  154. synapse_sdk/types.py +19 -0
  155. synapse_sdk/utils/converters/__init__.py +240 -0
  156. synapse_sdk/utils/converters/coco/__init__.py +0 -0
  157. synapse_sdk/utils/converters/coco/from_dm.py +322 -0
  158. synapse_sdk/utils/converters/coco/to_dm.py +215 -0
  159. synapse_sdk/utils/converters/dm/__init__.py +57 -0
  160. synapse_sdk/utils/converters/dm/base.py +137 -0
  161. synapse_sdk/utils/converters/dm/from_v1.py +273 -0
  162. synapse_sdk/utils/converters/dm/to_v1.py +321 -0
  163. synapse_sdk/utils/converters/dm/tools/__init__.py +214 -0
  164. synapse_sdk/utils/converters/dm/tools/answer.py +95 -0
  165. synapse_sdk/utils/converters/dm/tools/bounding_box.py +132 -0
  166. synapse_sdk/utils/converters/dm/tools/bounding_box_3d.py +121 -0
  167. synapse_sdk/utils/converters/dm/tools/classification.py +75 -0
  168. synapse_sdk/utils/converters/dm/tools/keypoint.py +117 -0
  169. synapse_sdk/utils/converters/dm/tools/named_entity.py +111 -0
  170. synapse_sdk/utils/converters/dm/tools/polygon.py +122 -0
  171. synapse_sdk/utils/converters/dm/tools/polyline.py +124 -0
  172. synapse_sdk/utils/converters/dm/tools/prompt.py +94 -0
  173. synapse_sdk/utils/converters/dm/tools/relation.py +86 -0
  174. synapse_sdk/utils/converters/dm/tools/segmentation.py +141 -0
  175. synapse_sdk/utils/converters/dm/tools/segmentation_3d.py +83 -0
  176. synapse_sdk/utils/converters/dm/types.py +168 -0
  177. synapse_sdk/utils/converters/dm/utils.py +162 -0
  178. synapse_sdk/utils/converters/dm_legacy/__init__.py +56 -0
  179. synapse_sdk/utils/converters/dm_legacy/from_v1.py +627 -0
  180. synapse_sdk/utils/converters/dm_legacy/to_v1.py +367 -0
  181. synapse_sdk/utils/converters/pascal/__init__.py +0 -0
  182. synapse_sdk/utils/converters/pascal/from_dm.py +244 -0
  183. synapse_sdk/utils/converters/pascal/to_dm.py +214 -0
  184. synapse_sdk/utils/converters/yolo/__init__.py +0 -0
  185. synapse_sdk/utils/converters/yolo/from_dm.py +384 -0
  186. synapse_sdk/utils/converters/yolo/to_dm.py +267 -0
  187. synapse_sdk/utils/dataset.py +46 -0
  188. synapse_sdk/utils/encryption.py +158 -0
  189. synapse_sdk/utils/file/__init__.py +58 -0
  190. synapse_sdk/utils/file/archive.py +32 -0
  191. synapse_sdk/utils/file/checksum.py +56 -0
  192. synapse_sdk/utils/file/chunking.py +31 -0
  193. synapse_sdk/utils/file/download.py +385 -0
  194. synapse_sdk/utils/file/encoding.py +40 -0
  195. synapse_sdk/utils/file/io.py +22 -0
  196. synapse_sdk/utils/file/upload.py +165 -0
  197. synapse_sdk/utils/file/video/__init__.py +29 -0
  198. synapse_sdk/utils/file/video/transcode.py +307 -0
  199. synapse_sdk/utils/file.py.backup +301 -0
  200. synapse_sdk/utils/http.py +138 -0
  201. synapse_sdk/utils/network.py +309 -0
  202. synapse_sdk/utils/storage/__init__.py +72 -0
  203. synapse_sdk/utils/storage/providers/__init__.py +183 -0
  204. synapse_sdk/utils/storage/providers/file_system.py +134 -0
  205. synapse_sdk/utils/storage/providers/gcp.py +13 -0
  206. synapse_sdk/utils/storage/providers/http.py +190 -0
  207. synapse_sdk/utils/storage/providers/s3.py +91 -0
  208. synapse_sdk/utils/storage/providers/sftp.py +47 -0
  209. synapse_sdk/utils/storage/registry.py +17 -0
  210. synapse_sdk-2025.12.3.dist-info/METADATA +123 -0
  211. synapse_sdk-2025.12.3.dist-info/RECORD +279 -0
  212. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info}/WHEEL +1 -1
  213. synapse_sdk/clients/backend/dataset.py +0 -51
  214. synapse_sdk/plugins/categories/import/actions/import.py +0 -10
  215. synapse_sdk/plugins/cli/__init__.py +0 -21
  216. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env +0 -24
  217. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env.dist +0 -24
  218. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/main.py +0 -4
  219. synapse_sdk/utils/file.py +0 -168
  220. synapse_sdk/utils/storage.py +0 -91
  221. synapse_sdk-1.0.0a23.dist-info/METADATA +0 -44
  222. synapse_sdk-1.0.0a23.dist-info/RECORD +0 -114
  223. /synapse_sdk/{plugins/cli → cli/plugin}/run.py +0 -0
  224. /synapse_sdk/{plugins/categories/import → clients/validators}/__init__.py +0 -0
  225. /synapse_sdk/{plugins/categories/import/actions → devtools}/__init__.py +0 -0
  226. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info}/entry_points.txt +0 -0
  227. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info/licenses}/LICENSE +0 -0
  228. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,201 @@
1
+ from ..context import StepResult, UploadContext
2
+ from ..enums import LogCode
3
+ from .base import BaseStep
4
+
5
+
6
+ class OrganizeFilesStep(BaseStep):
7
+ """Organize files according to specifications using file discovery strategy."""
8
+
9
+ @property
10
+ def name(self) -> str:
11
+ return 'organize_files'
12
+
13
+ @property
14
+ def progress_weight(self) -> float:
15
+ return 0.15
16
+
17
+ def execute(self, context: UploadContext) -> StepResult:
18
+ """Execute file organization step."""
19
+ file_discovery_strategy = context.strategies.get('file_discovery')
20
+ if not file_discovery_strategy:
21
+ return self.create_error_result('File discovery strategy not found')
22
+
23
+ if not context.file_specifications:
24
+ return self.create_error_result('File specifications not available')
25
+
26
+ try:
27
+ # Check which mode we're in
28
+ use_single_path = context.get_param('use_single_path', True)
29
+
30
+ if use_single_path:
31
+ # Single path mode: all assets use same base path
32
+ return self._execute_single_path_mode(context, file_discovery_strategy)
33
+ else:
34
+ # Multi-path mode: each asset has its own path
35
+ return self._execute_multi_path_mode(context, file_discovery_strategy)
36
+
37
+ except Exception as e:
38
+ return self.create_error_result(f'File organization failed: {str(e)}')
39
+
40
+ def _execute_single_path_mode(self, context: UploadContext, file_discovery_strategy) -> StepResult:
41
+ """Execute file organization in single path mode (traditional)."""
42
+ # Create type directories mapping
43
+ type_dirs = {}
44
+ for spec in context.file_specifications:
45
+ spec_name = spec['name']
46
+ spec_dir = context.pathlib_cwd / spec_name
47
+ if spec_dir.exists() and spec_dir.is_dir():
48
+ type_dirs[spec_name] = spec_dir
49
+
50
+ if type_dirs:
51
+ context.run.log_message_with_code(LogCode.TYPE_DIRECTORIES_FOUND, list(type_dirs.keys()))
52
+ else:
53
+ context.run.log_message_with_code(LogCode.NO_TYPE_DIRECTORIES)
54
+ return self.create_success_result(data={'organized_files': []})
55
+
56
+ context.run.log_message_with_code(LogCode.TYPE_STRUCTURE_DETECTED)
57
+ context.run.log_message_with_code(LogCode.FILE_ORGANIZATION_STARTED)
58
+
59
+ # Discover files in type directories
60
+ all_files = []
61
+ is_recursive = context.get_param('is_recursive', True)
62
+
63
+ for spec_name, dir_path in type_dirs.items():
64
+ files_in_dir = file_discovery_strategy.discover(dir_path, is_recursive)
65
+ all_files.extend(files_in_dir)
66
+
67
+ if not all_files:
68
+ context.run.log_message_with_code(LogCode.NO_FILES_FOUND_WARNING)
69
+ return self.create_success_result(data={'organized_files': []})
70
+
71
+ # Organize files using strategy
72
+ organized_files = file_discovery_strategy.organize(
73
+ all_files, context.file_specifications, context.metadata or {}, type_dirs
74
+ )
75
+
76
+ if organized_files:
77
+ context.run.log_message_with_code(LogCode.FILES_DISCOVERED, len(organized_files))
78
+ context.add_organized_files(organized_files)
79
+
80
+ return self.create_success_result(
81
+ data={'organized_files': organized_files},
82
+ rollback_data={'files_count': len(organized_files), 'type_dirs': list(type_dirs.keys())},
83
+ )
84
+
85
+ def _execute_multi_path_mode(self, context: UploadContext, file_discovery_strategy) -> StepResult:
86
+ """Execute file organization in multi-path mode (each asset has own path)."""
87
+ from synapse_sdk.utils.storage import get_pathlib
88
+
89
+ assets = context.get_param('assets', {})
90
+ if not assets:
91
+ return self.create_error_result('Multi-path mode requires assets configuration')
92
+
93
+ # Validate that all required specs have asset paths
94
+ required_specs = [spec['name'] for spec in context.file_specifications if spec.get('is_required', False)]
95
+ missing_required = [spec for spec in required_specs if spec not in assets]
96
+
97
+ if missing_required:
98
+ return self.create_error_result(
99
+ f'Multi-path mode requires asset paths for required specs: {", ".join(missing_required)}'
100
+ )
101
+
102
+ context.run.log_message_with_code(LogCode.MULTI_PATH_MODE_ENABLED, len(assets))
103
+ context.run.log_message_with_code(LogCode.FILE_ORGANIZATION_STARTED)
104
+
105
+ # Collect all files and specs first
106
+ all_files = []
107
+ type_dirs = {}
108
+ specs_with_files = []
109
+
110
+ for spec in context.file_specifications:
111
+ spec_name = spec['name']
112
+ is_required = spec.get('is_required', False)
113
+
114
+ # Skip if no asset configuration for this spec (only allowed for optional specs)
115
+ if spec_name not in assets:
116
+ if is_required:
117
+ # This should not happen due to validation above, but double-check
118
+ return self.create_error_result(f'Required spec {spec_name} missing asset path')
119
+ context.run.log_message_with_code(LogCode.OPTIONAL_SPEC_SKIPPED, spec_name)
120
+ continue
121
+
122
+ asset_config = assets[spec_name]
123
+
124
+ # Get the asset path from storage
125
+ try:
126
+ asset_path = get_pathlib(context.storage, asset_config.get('path', ''))
127
+ type_dirs[spec_name] = asset_path
128
+ except Exception as e:
129
+ context.run.log_message_with_code(LogCode.ASSET_PATH_ACCESS_ERROR, spec_name, str(e))
130
+ continue
131
+
132
+ if not asset_path.exists():
133
+ context.run.log_message_with_code(LogCode.ASSET_PATH_NOT_FOUND, spec_name, asset_config.get('path', ''))
134
+ continue
135
+
136
+ # Discover files for this asset
137
+ is_recursive = asset_config.get('is_recursive', True)
138
+ context.run.log_message_with_code(LogCode.DISCOVERING_FILES_FOR_ASSET, spec_name, is_recursive)
139
+
140
+ files = file_discovery_strategy.discover(asset_path, is_recursive)
141
+
142
+ if not files:
143
+ context.run.log_message_with_code(LogCode.NO_FILES_FOUND_FOR_ASSET, spec_name)
144
+ continue
145
+
146
+ all_files.extend(files)
147
+ specs_with_files.append(spec)
148
+ context.run.log_message_with_code(LogCode.FILES_FOUND_FOR_ASSET, len(files), spec_name)
149
+
150
+ # Organize all files together to group by dataset_key
151
+ all_organized_files = []
152
+ if all_files and specs_with_files:
153
+ context.run.log_message_with_code(
154
+ LogCode.ORGANIZING_FILES_MULTI_PATH, len(all_files), len(specs_with_files)
155
+ )
156
+ context.run.log_message_with_code(LogCode.TYPE_DIRECTORIES_MULTI_PATH, list(type_dirs.keys()))
157
+
158
+ all_organized_files = file_discovery_strategy.organize(
159
+ all_files, specs_with_files, context.metadata or {}, type_dirs
160
+ )
161
+
162
+ if all_organized_files:
163
+ context.run.log_message_with_code(LogCode.FILES_DISCOVERED, len(all_organized_files))
164
+ context.run.log_message_with_code(
165
+ LogCode.DATA_UNITS_CREATED_FROM_FILES, len(all_organized_files), len(all_files)
166
+ )
167
+ context.add_organized_files(all_organized_files)
168
+ else:
169
+ context.run.log_message_with_code(LogCode.NO_FILES_FOUND_WARNING)
170
+
171
+ return self.create_success_result(
172
+ data={'organized_files': all_organized_files},
173
+ rollback_data={'files_count': len(all_organized_files), 'type_dirs': list(type_dirs.keys())},
174
+ )
175
+
176
+ def can_skip(self, context: UploadContext) -> bool:
177
+ """File organization cannot be skipped."""
178
+ return False
179
+
180
+ def rollback(self, context: UploadContext) -> None:
181
+ """Rollback file organization."""
182
+ # Clear organized files
183
+ context.organized_files.clear()
184
+ context.run.log_message_with_code(LogCode.ROLLBACK_FILE_ORGANIZATION)
185
+
186
+ def validate_prerequisites(self, context: UploadContext) -> None:
187
+ """Validate prerequisites for file organization."""
188
+ use_single_path = context.get_param('use_single_path', True)
189
+
190
+ # In single-path mode, pathlib_cwd is required
191
+ if use_single_path and not context.pathlib_cwd:
192
+ raise ValueError('Working directory path not set in single-path mode')
193
+
194
+ # In multi-path mode, pathlib_cwd is optional (each asset has its own path)
195
+ if not use_single_path:
196
+ assets = context.get_param('assets', {})
197
+ if not assets:
198
+ raise ValueError('Multi-path mode requires assets configuration')
199
+
200
+ if not context.file_specifications:
201
+ raise ValueError('File specifications not available')
@@ -0,0 +1,104 @@
1
+ from synapse_sdk.plugins.exceptions import ActionError
2
+
3
+ from ..context import StepResult, UploadContext
4
+ from ..enums import LogCode, UploadStatus
5
+ from ..strategies.base import UploadConfig
6
+ from .base import BaseStep
7
+
8
+
9
+ class UploadFilesStep(BaseStep):
10
+ """Upload organized files using upload strategy."""
11
+
12
+ @property
13
+ def name(self) -> str:
14
+ return 'upload_files'
15
+
16
+ @property
17
+ def progress_weight(self) -> float:
18
+ return 0.30
19
+
20
+ def execute(self, context: UploadContext) -> StepResult:
21
+ """Execute file upload step."""
22
+ upload_strategy = context.strategies.get('upload')
23
+ if not upload_strategy:
24
+ return self.create_error_result('Upload strategy not found')
25
+
26
+ if not context.organized_files:
27
+ context.run.log_message_with_code(LogCode.NO_FILES_UPLOADED)
28
+ return self.create_error_result('No organized files to upload')
29
+
30
+ try:
31
+ # Setup progress tracking
32
+ organized_files_count = len(context.organized_files)
33
+ context.run.set_progress(0, organized_files_count, category='upload_data_files')
34
+ context.run.log_message_with_code(LogCode.UPLOADING_DATA_FILES)
35
+
36
+ # Initialize metrics
37
+ initial_metrics = {'stand_by': organized_files_count, 'success': 0, 'failed': 0}
38
+ context.update_metrics('data_files', initial_metrics)
39
+ context.run.set_metrics(initial_metrics, category='data_files')
40
+
41
+ # Create upload configuration
42
+ # Note: Always uses synchronous upload to guarantee file order
43
+ upload_config = UploadConfig(
44
+ chunked_threshold_mb=context.get_param('max_file_size_mb', 50),
45
+ batch_size=context.get_param('upload_batch_size', 1),
46
+ )
47
+
48
+ # Execute upload using strategy
49
+ uploaded_files = upload_strategy.upload(context.organized_files, upload_config)
50
+
51
+ # Update context and metrics
52
+ context.add_uploaded_files(uploaded_files)
53
+
54
+ # Log upload results
55
+ for uploaded_file in uploaded_files:
56
+ context.run.log_data_file(uploaded_file, UploadStatus.SUCCESS)
57
+
58
+ # Update final metrics
59
+ final_metrics = {
60
+ 'stand_by': 0,
61
+ 'success': len(uploaded_files),
62
+ 'failed': organized_files_count - len(uploaded_files),
63
+ }
64
+ context.update_metrics('data_files', final_metrics)
65
+ context.run.set_metrics(final_metrics, category='data_files')
66
+
67
+ # Handle success vs failure cases
68
+ if uploaded_files:
69
+ # Success: Set completion progress with elapsed time
70
+ context.run.set_progress(organized_files_count, organized_files_count, category='upload_data_files')
71
+ return self.create_success_result(
72
+ data={'uploaded_files': uploaded_files},
73
+ rollback_data={'uploaded_files_count': len(uploaded_files)},
74
+ )
75
+ else:
76
+ # Failure: Mark as failed with elapsed time but no completion
77
+ context.run.set_progress_failed(category='upload_data_files')
78
+ return self.create_error_result('No files were successfully uploaded')
79
+
80
+ except Exception as e:
81
+ # Exception: Mark as failed with elapsed time
82
+ context.run.set_progress_failed(category='upload_data_files')
83
+ context.run.log_message_with_code(LogCode.FILE_UPLOAD_FAILED, str(e))
84
+ return self.create_error_result(f'File upload failed: {str(e)}')
85
+
86
+ def can_skip(self, context: UploadContext) -> bool:
87
+ """File upload cannot be skipped."""
88
+ return False
89
+
90
+ def rollback(self, context: UploadContext) -> None:
91
+ """Rollback file upload."""
92
+ # In a real implementation, this would delete uploaded files
93
+ # For now, just clear the uploaded files list and log
94
+ context.uploaded_files.clear()
95
+ context.run.log_message_with_code(LogCode.ROLLBACK_FILE_UPLOADS)
96
+
97
+ def validate_prerequisites(self, context: UploadContext) -> None:
98
+ """Validate prerequisites for file upload."""
99
+ if not context.organized_files:
100
+ raise ValueError('No organized files available for upload')
101
+
102
+ collection_id = context.get_param('data_collection')
103
+ if collection_id is None:
104
+ raise ActionError('Data collection parameter is required for upload')
@@ -0,0 +1,71 @@
1
+ from ..context import StepResult, UploadContext
2
+ from ..enums import LogCode
3
+ from .base import BaseStep
4
+
5
+
6
+ class ValidateFilesStep(BaseStep):
7
+ """Validate organized files against specifications."""
8
+
9
+ @property
10
+ def name(self) -> str:
11
+ return 'validate_files'
12
+
13
+ @property
14
+ def progress_weight(self) -> float:
15
+ return 0.10
16
+
17
+ def execute(self, context: UploadContext) -> StepResult:
18
+ """Execute file validation step.
19
+
20
+ Validates organized files against specifications using validation strategy.
21
+
22
+ Args:
23
+ context (UploadContext): Upload workflow context containing organized files,
24
+ specifications, and strategies.
25
+
26
+ Returns:
27
+ StepResult: Success result with validation status, or error result if validation fails.
28
+ """
29
+ validation_strategy = context.strategies.get('validation')
30
+ if not validation_strategy:
31
+ return self.create_error_result('Validation strategy not found')
32
+
33
+ if not context.organized_files:
34
+ context.run.log_message_with_code(LogCode.NO_FILES_FOUND)
35
+ return self.create_error_result('No organized files to validate')
36
+
37
+ if not context.file_specifications:
38
+ return self.create_error_result('File specifications not available')
39
+
40
+ try:
41
+ # Validate organized files against specifications using strategy
42
+ validation_result = validation_strategy.validate_files(context.organized_files, context.file_specifications)
43
+
44
+ if not validation_result.valid:
45
+ context.run.log_message_with_code(LogCode.VALIDATION_FAILED)
46
+ error_msg = f'File validation failed: {", ".join(validation_result.errors)}'
47
+ return self.create_error_result(error_msg)
48
+
49
+ return self.create_success_result(
50
+ data={'validation_passed': True}, rollback_data={'validated_files_count': len(context.organized_files)}
51
+ )
52
+
53
+ except Exception as e:
54
+ return self.create_error_result(f'File validation failed: {str(e)}')
55
+
56
+ def can_skip(self, context: UploadContext) -> bool:
57
+ """File validation cannot be skipped."""
58
+ return False
59
+
60
+ def rollback(self, context: UploadContext) -> None:
61
+ """Rollback file validation."""
62
+ # Nothing specific to rollback for validation
63
+ context.run.log_message_with_code(LogCode.ROLLBACK_FILE_VALIDATION)
64
+
65
+ def validate_prerequisites(self, context: UploadContext) -> None:
66
+ """Validate prerequisites for file validation."""
67
+ if not context.organized_files:
68
+ raise ValueError('No organized files available for validation')
69
+
70
+ if not context.file_specifications:
71
+ raise ValueError('File specifications not available for validation')
@@ -0,0 +1 @@
1
+ # Strategy pattern implementations for upload actions
@@ -0,0 +1,82 @@
1
+ from abc import ABC, abstractmethod
2
+ from pathlib import Path
3
+ from typing import Any, Dict, List
4
+
5
+
6
+ class ValidationResult:
7
+ """Result of validation operations."""
8
+
9
+ def __init__(self, valid: bool, errors: List[str] = None):
10
+ self.valid = valid
11
+ self.errors = errors or []
12
+
13
+ def __bool__(self):
14
+ return self.valid
15
+
16
+
17
+ class ValidationStrategy(ABC):
18
+ """Strategy interface for validation operations."""
19
+
20
+ @abstractmethod
21
+ def validate_params(self, params: Dict) -> ValidationResult:
22
+ """Validate action parameters."""
23
+ pass
24
+
25
+ @abstractmethod
26
+ def validate_files(self, files: List[Dict], specs: Dict) -> ValidationResult:
27
+ """Validate organized files against specifications."""
28
+ pass
29
+
30
+
31
+ class FileDiscoveryStrategy(ABC):
32
+ """Strategy interface for file discovery and organization."""
33
+
34
+ @abstractmethod
35
+ def discover(self, path: Path, recursive: bool) -> List[Path]:
36
+ """Discover files in the given path."""
37
+ pass
38
+
39
+ @abstractmethod
40
+ def organize(self, files: List[Path], specs: Dict, metadata: Dict, type_dirs: Dict = None) -> List[Dict]:
41
+ """Organize files according to specifications."""
42
+ pass
43
+
44
+
45
+ class MetadataStrategy(ABC):
46
+ """Strategy interface for metadata extraction and processing."""
47
+
48
+ @abstractmethod
49
+ def extract(self, source_path: Path) -> Dict[str, Dict[str, Any]]:
50
+ """Extract metadata from source (e.g., Excel file)."""
51
+ pass
52
+
53
+ @abstractmethod
54
+ def validate(self, metadata: Dict) -> ValidationResult:
55
+ """Validate extracted metadata."""
56
+ pass
57
+
58
+
59
+ class UploadConfig:
60
+ """Configuration for upload operations."""
61
+
62
+ def __init__(self, chunked_threshold_mb: int = 50, batch_size: int = 1):
63
+ self.chunked_threshold_mb = chunked_threshold_mb
64
+ self.batch_size = batch_size
65
+
66
+
67
+ class UploadStrategy(ABC):
68
+ """Strategy interface for file upload operations."""
69
+
70
+ @abstractmethod
71
+ def upload(self, files: List[Dict], config: UploadConfig) -> List[Dict]:
72
+ """Upload files to storage."""
73
+ pass
74
+
75
+
76
+ class DataUnitStrategy(ABC):
77
+ """Strategy interface for data unit generation."""
78
+
79
+ @abstractmethod
80
+ def generate(self, uploaded_files: List[Dict], batch_size: int) -> List[Dict]:
81
+ """Generate data units from uploaded files."""
82
+ pass
@@ -0,0 +1 @@
1
+ # Data unit strategy implementations
@@ -0,0 +1,39 @@
1
+ from typing import Dict, List
2
+
3
+ from synapse_sdk.clients.utils import get_batched_list
4
+
5
+ from ...enums import LogCode, UploadStatus
6
+ from ..base import DataUnitStrategy
7
+
8
+
9
+ class BatchDataUnitStrategy(DataUnitStrategy):
10
+ """Batch data unit generation strategy."""
11
+
12
+ def __init__(self, context):
13
+ self.context = context
14
+
15
+ def generate(self, uploaded_files: List[Dict], batch_size: int) -> List[Dict]:
16
+ """Generate data units in batches."""
17
+ client = self.context.client
18
+ generated_data_units = []
19
+
20
+ # Use the same batching logic as the legacy implementation
21
+ batches = get_batched_list(uploaded_files, batch_size)
22
+
23
+ for batch in batches:
24
+ try:
25
+ created_data_units = client.create_data_units(batch)
26
+ generated_data_units.extend(created_data_units)
27
+
28
+ # Log each created data unit
29
+ for created_data_unit in created_data_units:
30
+ self.context.run.log_data_unit(
31
+ created_data_unit['id'], UploadStatus.SUCCESS, data_unit_meta=created_data_unit.get('meta')
32
+ )
33
+ except Exception as e:
34
+ self.context.run.log_message_with_code(LogCode.DATA_UNIT_BATCH_FAILED, str(e))
35
+ # Log failed data units
36
+ for _ in batch:
37
+ self.context.run.log_data_unit(None, UploadStatus.FAILED, data_unit_meta=None)
38
+
39
+ return generated_data_units
@@ -0,0 +1,29 @@
1
+ from typing import Dict, List
2
+
3
+ from ...enums import LogCode, UploadStatus
4
+ from ..base import DataUnitStrategy
5
+
6
+
7
+ class SingleDataUnitStrategy(DataUnitStrategy):
8
+ """Single data unit generation strategy."""
9
+
10
+ def __init__(self, context):
11
+ self.context = context
12
+
13
+ def generate(self, uploaded_files: List[Dict], batch_size: int) -> List[Dict]:
14
+ """Generate data units individually."""
15
+ client = self.context.client
16
+ generated_data_units = []
17
+
18
+ for uploaded_file in uploaded_files:
19
+ try:
20
+ # Create data unit for single file (batch of 1)
21
+ created_data_units = client.create_data_units([uploaded_file])
22
+ generated_data_units.extend(created_data_units)
23
+
24
+ except Exception as e:
25
+ self.context.run.log_message_with_code(LogCode.DATA_UNIT_BATCH_FAILED, str(e))
26
+ # Log failed data unit
27
+ self.context.run.log_data_unit(None, UploadStatus.FAILED, data_unit_meta=None)
28
+
29
+ return generated_data_units
@@ -0,0 +1 @@
1
+ # File discovery strategy implementations