synapse-sdk 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synapse_sdk/__init__.py +24 -0
- synapse_sdk/cli/code_server.py +305 -33
- synapse_sdk/clients/agent/__init__.py +2 -1
- synapse_sdk/clients/agent/container.py +143 -0
- synapse_sdk/clients/agent/ray.py +296 -38
- synapse_sdk/clients/backend/annotation.py +1 -1
- synapse_sdk/clients/backend/core.py +31 -4
- synapse_sdk/clients/backend/data_collection.py +82 -7
- synapse_sdk/clients/backend/hitl.py +1 -1
- synapse_sdk/clients/backend/ml.py +1 -1
- synapse_sdk/clients/base.py +211 -61
- synapse_sdk/loggers.py +46 -0
- synapse_sdk/plugins/README.md +1340 -0
- synapse_sdk/plugins/categories/base.py +59 -9
- synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
- synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
- synapse_sdk/plugins/categories/export/actions/export/action.py +165 -0
- synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
- synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
- synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
- synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
- synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
- synapse_sdk/plugins/categories/export/templates/config.yaml +19 -1
- synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +390 -0
- synapse_sdk/plugins/categories/export/templates/plugin/export.py +153 -177
- synapse_sdk/plugins/categories/neural_net/actions/train.py +1130 -32
- synapse_sdk/plugins/categories/neural_net/actions/tune.py +157 -4
- synapse_sdk/plugins/categories/neural_net/templates/config.yaml +7 -4
- synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +148 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +100 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +248 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +265 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +92 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +243 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
- synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +19 -0
- synapse_sdk/plugins/categories/upload/actions/upload/action.py +236 -0
- synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
- synapse_sdk/plugins/categories/upload/actions/upload/enums.py +493 -0
- synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
- synapse_sdk/plugins/categories/upload/actions/upload/factory.py +138 -0
- synapse_sdk/plugins/categories/upload/actions/upload/models.py +214 -0
- synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +183 -0
- synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
- synapse_sdk/plugins/categories/upload/actions/upload/run.py +179 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +107 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +63 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +91 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +82 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +235 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +201 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +104 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +71 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +82 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +29 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +300 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +287 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +84 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +60 -0
- synapse_sdk/plugins/categories/upload/actions/upload/utils.py +250 -0
- synapse_sdk/plugins/categories/upload/templates/README.md +470 -0
- synapse_sdk/plugins/categories/upload/templates/config.yaml +28 -2
- synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +310 -0
- synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +82 -20
- synapse_sdk/plugins/models.py +111 -9
- synapse_sdk/plugins/templates/plugin-config-schema.json +7 -0
- synapse_sdk/plugins/templates/schema.json +7 -0
- synapse_sdk/plugins/utils/__init__.py +3 -0
- synapse_sdk/plugins/utils/ray_gcs.py +66 -0
- synapse_sdk/shared/__init__.py +25 -0
- synapse_sdk/utils/converters/dm/__init__.py +42 -41
- synapse_sdk/utils/converters/dm/base.py +137 -0
- synapse_sdk/utils/converters/dm/from_v1.py +208 -562
- synapse_sdk/utils/converters/dm/to_v1.py +258 -304
- synapse_sdk/utils/converters/dm/tools/__init__.py +214 -0
- synapse_sdk/utils/converters/dm/tools/answer.py +95 -0
- synapse_sdk/utils/converters/dm/tools/bounding_box.py +132 -0
- synapse_sdk/utils/converters/dm/tools/bounding_box_3d.py +121 -0
- synapse_sdk/utils/converters/dm/tools/classification.py +75 -0
- synapse_sdk/utils/converters/dm/tools/keypoint.py +117 -0
- synapse_sdk/utils/converters/dm/tools/named_entity.py +111 -0
- synapse_sdk/utils/converters/dm/tools/polygon.py +122 -0
- synapse_sdk/utils/converters/dm/tools/polyline.py +124 -0
- synapse_sdk/utils/converters/dm/tools/prompt.py +94 -0
- synapse_sdk/utils/converters/dm/tools/relation.py +86 -0
- synapse_sdk/utils/converters/dm/tools/segmentation.py +141 -0
- synapse_sdk/utils/converters/dm/tools/segmentation_3d.py +83 -0
- synapse_sdk/utils/converters/dm/types.py +168 -0
- synapse_sdk/utils/converters/dm/utils.py +162 -0
- synapse_sdk/utils/converters/dm_legacy/__init__.py +56 -0
- synapse_sdk/utils/converters/dm_legacy/from_v1.py +627 -0
- synapse_sdk/utils/converters/dm_legacy/to_v1.py +367 -0
- synapse_sdk/utils/file/__init__.py +58 -0
- synapse_sdk/utils/file/archive.py +32 -0
- synapse_sdk/utils/file/checksum.py +56 -0
- synapse_sdk/utils/file/chunking.py +31 -0
- synapse_sdk/utils/file/download.py +385 -0
- synapse_sdk/utils/file/encoding.py +40 -0
- synapse_sdk/utils/file/io.py +22 -0
- synapse_sdk/utils/file/upload.py +165 -0
- synapse_sdk/utils/file/video/__init__.py +29 -0
- synapse_sdk/utils/file/video/transcode.py +307 -0
- synapse_sdk/utils/{file.py → file.py.backup} +77 -0
- synapse_sdk/utils/network.py +272 -0
- synapse_sdk/utils/storage/__init__.py +6 -2
- synapse_sdk/utils/storage/providers/file_system.py +6 -0
- {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/METADATA +19 -2
- {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/RECORD +134 -74
- synapse_sdk/devtools/docs/.gitignore +0 -20
- synapse_sdk/devtools/docs/README.md +0 -41
- synapse_sdk/devtools/docs/blog/2019-05-28-first-blog-post.md +0 -12
- synapse_sdk/devtools/docs/blog/2019-05-29-long-blog-post.md +0 -44
- synapse_sdk/devtools/docs/blog/2021-08-01-mdx-blog-post.mdx +0 -24
- synapse_sdk/devtools/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
- synapse_sdk/devtools/docs/blog/2021-08-26-welcome/index.md +0 -29
- synapse_sdk/devtools/docs/blog/authors.yml +0 -25
- synapse_sdk/devtools/docs/blog/tags.yml +0 -19
- synapse_sdk/devtools/docs/docusaurus.config.ts +0 -138
- synapse_sdk/devtools/docs/package-lock.json +0 -17455
- synapse_sdk/devtools/docs/package.json +0 -47
- synapse_sdk/devtools/docs/sidebars.ts +0 -44
- synapse_sdk/devtools/docs/src/components/HomepageFeatures/index.tsx +0 -71
- synapse_sdk/devtools/docs/src/components/HomepageFeatures/styles.module.css +0 -11
- synapse_sdk/devtools/docs/src/css/custom.css +0 -30
- synapse_sdk/devtools/docs/src/pages/index.module.css +0 -23
- synapse_sdk/devtools/docs/src/pages/index.tsx +0 -21
- synapse_sdk/devtools/docs/src/pages/markdown-page.md +0 -7
- synapse_sdk/devtools/docs/static/.nojekyll +0 -0
- synapse_sdk/devtools/docs/static/img/docusaurus-social-card.jpg +0 -0
- synapse_sdk/devtools/docs/static/img/docusaurus.png +0 -0
- synapse_sdk/devtools/docs/static/img/favicon.ico +0 -0
- synapse_sdk/devtools/docs/static/img/logo.png +0 -0
- synapse_sdk/devtools/docs/static/img/undraw_docusaurus_mountain.svg +0 -171
- synapse_sdk/devtools/docs/static/img/undraw_docusaurus_react.svg +0 -170
- synapse_sdk/devtools/docs/static/img/undraw_docusaurus_tree.svg +0 -40
- synapse_sdk/devtools/docs/tsconfig.json +0 -8
- synapse_sdk/plugins/categories/export/actions/export.py +0 -346
- synapse_sdk/plugins/categories/export/enums.py +0 -7
- synapse_sdk/plugins/categories/neural_net/actions/gradio.py +0 -151
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py +0 -943
- synapse_sdk/plugins/categories/upload/actions/upload.py +0 -954
- {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/WHEEL +0 -0
- {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
from ..context import StepResult, UploadContext
|
|
2
|
+
from ..enums import LogCode
|
|
3
|
+
from .base import BaseStep
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class OrganizeFilesStep(BaseStep):
|
|
7
|
+
"""Organize files according to specifications using file discovery strategy."""
|
|
8
|
+
|
|
9
|
+
@property
|
|
10
|
+
def name(self) -> str:
|
|
11
|
+
return 'organize_files'
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def progress_weight(self) -> float:
|
|
15
|
+
return 0.15
|
|
16
|
+
|
|
17
|
+
def execute(self, context: UploadContext) -> StepResult:
|
|
18
|
+
"""Execute file organization step."""
|
|
19
|
+
file_discovery_strategy = context.strategies.get('file_discovery')
|
|
20
|
+
if not file_discovery_strategy:
|
|
21
|
+
return self.create_error_result('File discovery strategy not found')
|
|
22
|
+
|
|
23
|
+
if not context.file_specifications:
|
|
24
|
+
return self.create_error_result('File specifications not available')
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
# Check which mode we're in
|
|
28
|
+
use_single_path = context.get_param('use_single_path', True)
|
|
29
|
+
|
|
30
|
+
if use_single_path:
|
|
31
|
+
# Single path mode: all assets use same base path
|
|
32
|
+
return self._execute_single_path_mode(context, file_discovery_strategy)
|
|
33
|
+
else:
|
|
34
|
+
# Multi-path mode: each asset has its own path
|
|
35
|
+
return self._execute_multi_path_mode(context, file_discovery_strategy)
|
|
36
|
+
|
|
37
|
+
except Exception as e:
|
|
38
|
+
return self.create_error_result(f'File organization failed: {str(e)}')
|
|
39
|
+
|
|
40
|
+
def _execute_single_path_mode(self, context: UploadContext, file_discovery_strategy) -> StepResult:
|
|
41
|
+
"""Execute file organization in single path mode (traditional)."""
|
|
42
|
+
# Create type directories mapping
|
|
43
|
+
type_dirs = {}
|
|
44
|
+
for spec in context.file_specifications:
|
|
45
|
+
spec_name = spec['name']
|
|
46
|
+
spec_dir = context.pathlib_cwd / spec_name
|
|
47
|
+
if spec_dir.exists() and spec_dir.is_dir():
|
|
48
|
+
type_dirs[spec_name] = spec_dir
|
|
49
|
+
|
|
50
|
+
if type_dirs:
|
|
51
|
+
context.run.log_message_with_code(LogCode.TYPE_DIRECTORIES_FOUND, list(type_dirs.keys()))
|
|
52
|
+
else:
|
|
53
|
+
context.run.log_message_with_code(LogCode.NO_TYPE_DIRECTORIES)
|
|
54
|
+
return self.create_success_result(data={'organized_files': []})
|
|
55
|
+
|
|
56
|
+
context.run.log_message_with_code(LogCode.TYPE_STRUCTURE_DETECTED)
|
|
57
|
+
context.run.log_message_with_code(LogCode.FILE_ORGANIZATION_STARTED)
|
|
58
|
+
|
|
59
|
+
# Discover files in type directories
|
|
60
|
+
all_files = []
|
|
61
|
+
is_recursive = context.get_param('is_recursive', True)
|
|
62
|
+
|
|
63
|
+
for spec_name, dir_path in type_dirs.items():
|
|
64
|
+
files_in_dir = file_discovery_strategy.discover(dir_path, is_recursive)
|
|
65
|
+
all_files.extend(files_in_dir)
|
|
66
|
+
|
|
67
|
+
if not all_files:
|
|
68
|
+
context.run.log_message_with_code(LogCode.NO_FILES_FOUND_WARNING)
|
|
69
|
+
return self.create_success_result(data={'organized_files': []})
|
|
70
|
+
|
|
71
|
+
# Organize files using strategy
|
|
72
|
+
organized_files = file_discovery_strategy.organize(
|
|
73
|
+
all_files, context.file_specifications, context.metadata or {}, type_dirs
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if organized_files:
|
|
77
|
+
context.run.log_message_with_code(LogCode.FILES_DISCOVERED, len(organized_files))
|
|
78
|
+
context.add_organized_files(organized_files)
|
|
79
|
+
|
|
80
|
+
return self.create_success_result(
|
|
81
|
+
data={'organized_files': organized_files},
|
|
82
|
+
rollback_data={'files_count': len(organized_files), 'type_dirs': list(type_dirs.keys())},
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def _execute_multi_path_mode(self, context: UploadContext, file_discovery_strategy) -> StepResult:
|
|
86
|
+
"""Execute file organization in multi-path mode (each asset has own path)."""
|
|
87
|
+
from synapse_sdk.utils.storage import get_pathlib
|
|
88
|
+
|
|
89
|
+
assets = context.get_param('assets', {})
|
|
90
|
+
if not assets:
|
|
91
|
+
return self.create_error_result('Multi-path mode requires assets configuration')
|
|
92
|
+
|
|
93
|
+
# Validate that all required specs have asset paths
|
|
94
|
+
required_specs = [spec['name'] for spec in context.file_specifications if spec.get('is_required', False)]
|
|
95
|
+
missing_required = [spec for spec in required_specs if spec not in assets]
|
|
96
|
+
|
|
97
|
+
if missing_required:
|
|
98
|
+
return self.create_error_result(
|
|
99
|
+
f'Multi-path mode requires asset paths for required specs: {", ".join(missing_required)}'
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
context.run.log_message_with_code(LogCode.MULTI_PATH_MODE_ENABLED, len(assets))
|
|
103
|
+
context.run.log_message_with_code(LogCode.FILE_ORGANIZATION_STARTED)
|
|
104
|
+
|
|
105
|
+
# Collect all files and specs first
|
|
106
|
+
all_files = []
|
|
107
|
+
type_dirs = {}
|
|
108
|
+
specs_with_files = []
|
|
109
|
+
|
|
110
|
+
for spec in context.file_specifications:
|
|
111
|
+
spec_name = spec['name']
|
|
112
|
+
is_required = spec.get('is_required', False)
|
|
113
|
+
|
|
114
|
+
# Skip if no asset configuration for this spec (only allowed for optional specs)
|
|
115
|
+
if spec_name not in assets:
|
|
116
|
+
if is_required:
|
|
117
|
+
# This should not happen due to validation above, but double-check
|
|
118
|
+
return self.create_error_result(f'Required spec {spec_name} missing asset path')
|
|
119
|
+
context.run.log_message_with_code(LogCode.OPTIONAL_SPEC_SKIPPED, spec_name)
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
asset_config = assets[spec_name]
|
|
123
|
+
|
|
124
|
+
# Get the asset path from storage
|
|
125
|
+
try:
|
|
126
|
+
asset_path = get_pathlib(context.storage, asset_config.get('path', ''))
|
|
127
|
+
type_dirs[spec_name] = asset_path
|
|
128
|
+
except Exception as e:
|
|
129
|
+
context.run.log_message_with_code(LogCode.ASSET_PATH_ACCESS_ERROR, spec_name, str(e))
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
if not asset_path.exists():
|
|
133
|
+
context.run.log_message_with_code(LogCode.ASSET_PATH_NOT_FOUND, spec_name, asset_config.get('path', ''))
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
# Discover files for this asset
|
|
137
|
+
is_recursive = asset_config.get('is_recursive', True)
|
|
138
|
+
context.run.log_message_with_code(LogCode.DISCOVERING_FILES_FOR_ASSET, spec_name, is_recursive)
|
|
139
|
+
|
|
140
|
+
files = file_discovery_strategy.discover(asset_path, is_recursive)
|
|
141
|
+
|
|
142
|
+
if not files:
|
|
143
|
+
context.run.log_message_with_code(LogCode.NO_FILES_FOUND_FOR_ASSET, spec_name)
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
all_files.extend(files)
|
|
147
|
+
specs_with_files.append(spec)
|
|
148
|
+
context.run.log_message_with_code(LogCode.FILES_FOUND_FOR_ASSET, len(files), spec_name)
|
|
149
|
+
|
|
150
|
+
# Organize all files together to group by dataset_key
|
|
151
|
+
all_organized_files = []
|
|
152
|
+
if all_files and specs_with_files:
|
|
153
|
+
context.run.log_message_with_code(
|
|
154
|
+
LogCode.ORGANIZING_FILES_MULTI_PATH, len(all_files), len(specs_with_files)
|
|
155
|
+
)
|
|
156
|
+
context.run.log_message_with_code(LogCode.TYPE_DIRECTORIES_MULTI_PATH, list(type_dirs.keys()))
|
|
157
|
+
|
|
158
|
+
all_organized_files = file_discovery_strategy.organize(
|
|
159
|
+
all_files, specs_with_files, context.metadata or {}, type_dirs
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
if all_organized_files:
|
|
163
|
+
context.run.log_message_with_code(LogCode.FILES_DISCOVERED, len(all_organized_files))
|
|
164
|
+
context.run.log_message_with_code(
|
|
165
|
+
LogCode.DATA_UNITS_CREATED_FROM_FILES, len(all_organized_files), len(all_files)
|
|
166
|
+
)
|
|
167
|
+
context.add_organized_files(all_organized_files)
|
|
168
|
+
else:
|
|
169
|
+
context.run.log_message_with_code(LogCode.NO_FILES_FOUND_WARNING)
|
|
170
|
+
|
|
171
|
+
return self.create_success_result(
|
|
172
|
+
data={'organized_files': all_organized_files},
|
|
173
|
+
rollback_data={'files_count': len(all_organized_files), 'type_dirs': list(type_dirs.keys())},
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
def can_skip(self, context: UploadContext) -> bool:
|
|
177
|
+
"""File organization cannot be skipped."""
|
|
178
|
+
return False
|
|
179
|
+
|
|
180
|
+
def rollback(self, context: UploadContext) -> None:
|
|
181
|
+
"""Rollback file organization."""
|
|
182
|
+
# Clear organized files
|
|
183
|
+
context.organized_files.clear()
|
|
184
|
+
context.run.log_message_with_code(LogCode.ROLLBACK_FILE_ORGANIZATION)
|
|
185
|
+
|
|
186
|
+
def validate_prerequisites(self, context: UploadContext) -> None:
|
|
187
|
+
"""Validate prerequisites for file organization."""
|
|
188
|
+
use_single_path = context.get_param('use_single_path', True)
|
|
189
|
+
|
|
190
|
+
# In single-path mode, pathlib_cwd is required
|
|
191
|
+
if use_single_path and not context.pathlib_cwd:
|
|
192
|
+
raise ValueError('Working directory path not set in single-path mode')
|
|
193
|
+
|
|
194
|
+
# In multi-path mode, pathlib_cwd is optional (each asset has its own path)
|
|
195
|
+
if not use_single_path:
|
|
196
|
+
assets = context.get_param('assets', {})
|
|
197
|
+
if not assets:
|
|
198
|
+
raise ValueError('Multi-path mode requires assets configuration')
|
|
199
|
+
|
|
200
|
+
if not context.file_specifications:
|
|
201
|
+
raise ValueError('File specifications not available')
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from synapse_sdk.plugins.exceptions import ActionError
|
|
2
|
+
|
|
3
|
+
from ..context import StepResult, UploadContext
|
|
4
|
+
from ..enums import LogCode, UploadStatus
|
|
5
|
+
from ..strategies.base import UploadConfig
|
|
6
|
+
from .base import BaseStep
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class UploadFilesStep(BaseStep):
|
|
10
|
+
"""Upload organized files using upload strategy."""
|
|
11
|
+
|
|
12
|
+
@property
|
|
13
|
+
def name(self) -> str:
|
|
14
|
+
return 'upload_files'
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def progress_weight(self) -> float:
|
|
18
|
+
return 0.30
|
|
19
|
+
|
|
20
|
+
def execute(self, context: UploadContext) -> StepResult:
|
|
21
|
+
"""Execute file upload step."""
|
|
22
|
+
upload_strategy = context.strategies.get('upload')
|
|
23
|
+
if not upload_strategy:
|
|
24
|
+
return self.create_error_result('Upload strategy not found')
|
|
25
|
+
|
|
26
|
+
if not context.organized_files:
|
|
27
|
+
context.run.log_message_with_code(LogCode.NO_FILES_UPLOADED)
|
|
28
|
+
return self.create_error_result('No organized files to upload')
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
# Setup progress tracking
|
|
32
|
+
organized_files_count = len(context.organized_files)
|
|
33
|
+
context.run.set_progress(0, organized_files_count, category='upload_data_files')
|
|
34
|
+
context.run.log_message_with_code(LogCode.UPLOADING_DATA_FILES)
|
|
35
|
+
|
|
36
|
+
# Initialize metrics
|
|
37
|
+
initial_metrics = {'stand_by': organized_files_count, 'success': 0, 'failed': 0}
|
|
38
|
+
context.update_metrics('data_files', initial_metrics)
|
|
39
|
+
context.run.set_metrics(initial_metrics, category='data_files')
|
|
40
|
+
|
|
41
|
+
# Create upload configuration
|
|
42
|
+
# Note: Always uses synchronous upload to guarantee file order
|
|
43
|
+
upload_config = UploadConfig(
|
|
44
|
+
chunked_threshold_mb=context.get_param('max_file_size_mb', 50),
|
|
45
|
+
batch_size=context.get_param('upload_batch_size', 1),
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Execute upload using strategy
|
|
49
|
+
uploaded_files = upload_strategy.upload(context.organized_files, upload_config)
|
|
50
|
+
|
|
51
|
+
# Update context and metrics
|
|
52
|
+
context.add_uploaded_files(uploaded_files)
|
|
53
|
+
|
|
54
|
+
# Log upload results
|
|
55
|
+
for uploaded_file in uploaded_files:
|
|
56
|
+
context.run.log_data_file(uploaded_file, UploadStatus.SUCCESS)
|
|
57
|
+
|
|
58
|
+
# Update final metrics
|
|
59
|
+
final_metrics = {
|
|
60
|
+
'stand_by': 0,
|
|
61
|
+
'success': len(uploaded_files),
|
|
62
|
+
'failed': organized_files_count - len(uploaded_files),
|
|
63
|
+
}
|
|
64
|
+
context.update_metrics('data_files', final_metrics)
|
|
65
|
+
context.run.set_metrics(final_metrics, category='data_files')
|
|
66
|
+
|
|
67
|
+
# Handle success vs failure cases
|
|
68
|
+
if uploaded_files:
|
|
69
|
+
# Success: Set completion progress with elapsed time
|
|
70
|
+
context.run.set_progress(organized_files_count, organized_files_count, category='upload_data_files')
|
|
71
|
+
return self.create_success_result(
|
|
72
|
+
data={'uploaded_files': uploaded_files},
|
|
73
|
+
rollback_data={'uploaded_files_count': len(uploaded_files)},
|
|
74
|
+
)
|
|
75
|
+
else:
|
|
76
|
+
# Failure: Mark as failed with elapsed time but no completion
|
|
77
|
+
context.run.set_progress_failed(category='upload_data_files')
|
|
78
|
+
return self.create_error_result('No files were successfully uploaded')
|
|
79
|
+
|
|
80
|
+
except Exception as e:
|
|
81
|
+
# Exception: Mark as failed with elapsed time
|
|
82
|
+
context.run.set_progress_failed(category='upload_data_files')
|
|
83
|
+
context.run.log_message_with_code(LogCode.FILE_UPLOAD_FAILED, str(e))
|
|
84
|
+
return self.create_error_result(f'File upload failed: {str(e)}')
|
|
85
|
+
|
|
86
|
+
def can_skip(self, context: UploadContext) -> bool:
|
|
87
|
+
"""File upload cannot be skipped."""
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
def rollback(self, context: UploadContext) -> None:
|
|
91
|
+
"""Rollback file upload."""
|
|
92
|
+
# In a real implementation, this would delete uploaded files
|
|
93
|
+
# For now, just clear the uploaded files list and log
|
|
94
|
+
context.uploaded_files.clear()
|
|
95
|
+
context.run.log_message_with_code(LogCode.ROLLBACK_FILE_UPLOADS)
|
|
96
|
+
|
|
97
|
+
def validate_prerequisites(self, context: UploadContext) -> None:
|
|
98
|
+
"""Validate prerequisites for file upload."""
|
|
99
|
+
if not context.organized_files:
|
|
100
|
+
raise ValueError('No organized files available for upload')
|
|
101
|
+
|
|
102
|
+
collection_id = context.get_param('data_collection')
|
|
103
|
+
if collection_id is None:
|
|
104
|
+
raise ActionError('Data collection parameter is required for upload')
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from ..context import StepResult, UploadContext
|
|
2
|
+
from ..enums import LogCode
|
|
3
|
+
from .base import BaseStep
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ValidateFilesStep(BaseStep):
|
|
7
|
+
"""Validate organized files against specifications."""
|
|
8
|
+
|
|
9
|
+
@property
|
|
10
|
+
def name(self) -> str:
|
|
11
|
+
return 'validate_files'
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def progress_weight(self) -> float:
|
|
15
|
+
return 0.10
|
|
16
|
+
|
|
17
|
+
def execute(self, context: UploadContext) -> StepResult:
|
|
18
|
+
"""Execute file validation step.
|
|
19
|
+
|
|
20
|
+
Validates organized files against specifications using validation strategy.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
context (UploadContext): Upload workflow context containing organized files,
|
|
24
|
+
specifications, and strategies.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
StepResult: Success result with validation status, or error result if validation fails.
|
|
28
|
+
"""
|
|
29
|
+
validation_strategy = context.strategies.get('validation')
|
|
30
|
+
if not validation_strategy:
|
|
31
|
+
return self.create_error_result('Validation strategy not found')
|
|
32
|
+
|
|
33
|
+
if not context.organized_files:
|
|
34
|
+
context.run.log_message_with_code(LogCode.NO_FILES_FOUND)
|
|
35
|
+
return self.create_error_result('No organized files to validate')
|
|
36
|
+
|
|
37
|
+
if not context.file_specifications:
|
|
38
|
+
return self.create_error_result('File specifications not available')
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
# Validate organized files against specifications using strategy
|
|
42
|
+
validation_result = validation_strategy.validate_files(context.organized_files, context.file_specifications)
|
|
43
|
+
|
|
44
|
+
if not validation_result.valid:
|
|
45
|
+
context.run.log_message_with_code(LogCode.VALIDATION_FAILED)
|
|
46
|
+
error_msg = f'File validation failed: {", ".join(validation_result.errors)}'
|
|
47
|
+
return self.create_error_result(error_msg)
|
|
48
|
+
|
|
49
|
+
return self.create_success_result(
|
|
50
|
+
data={'validation_passed': True}, rollback_data={'validated_files_count': len(context.organized_files)}
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
except Exception as e:
|
|
54
|
+
return self.create_error_result(f'File validation failed: {str(e)}')
|
|
55
|
+
|
|
56
|
+
def can_skip(self, context: UploadContext) -> bool:
|
|
57
|
+
"""File validation cannot be skipped."""
|
|
58
|
+
return False
|
|
59
|
+
|
|
60
|
+
def rollback(self, context: UploadContext) -> None:
|
|
61
|
+
"""Rollback file validation."""
|
|
62
|
+
# Nothing specific to rollback for validation
|
|
63
|
+
context.run.log_message_with_code(LogCode.ROLLBACK_FILE_VALIDATION)
|
|
64
|
+
|
|
65
|
+
def validate_prerequisites(self, context: UploadContext) -> None:
|
|
66
|
+
"""Validate prerequisites for file validation."""
|
|
67
|
+
if not context.organized_files:
|
|
68
|
+
raise ValueError('No organized files available for validation')
|
|
69
|
+
|
|
70
|
+
if not context.file_specifications:
|
|
71
|
+
raise ValueError('File specifications not available for validation')
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Strategy pattern implementations for upload actions
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ValidationResult:
|
|
7
|
+
"""Result of validation operations."""
|
|
8
|
+
|
|
9
|
+
def __init__(self, valid: bool, errors: List[str] = None):
|
|
10
|
+
self.valid = valid
|
|
11
|
+
self.errors = errors or []
|
|
12
|
+
|
|
13
|
+
def __bool__(self):
|
|
14
|
+
return self.valid
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ValidationStrategy(ABC):
|
|
18
|
+
"""Strategy interface for validation operations."""
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def validate_params(self, params: Dict) -> ValidationResult:
|
|
22
|
+
"""Validate action parameters."""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def validate_files(self, files: List[Dict], specs: Dict) -> ValidationResult:
|
|
27
|
+
"""Validate organized files against specifications."""
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class FileDiscoveryStrategy(ABC):
|
|
32
|
+
"""Strategy interface for file discovery and organization."""
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def discover(self, path: Path, recursive: bool) -> List[Path]:
|
|
36
|
+
"""Discover files in the given path."""
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def organize(self, files: List[Path], specs: Dict, metadata: Dict, type_dirs: Dict = None) -> List[Dict]:
|
|
41
|
+
"""Organize files according to specifications."""
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class MetadataStrategy(ABC):
|
|
46
|
+
"""Strategy interface for metadata extraction and processing."""
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def extract(self, source_path: Path) -> Dict[str, Dict[str, Any]]:
|
|
50
|
+
"""Extract metadata from source (e.g., Excel file)."""
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
@abstractmethod
|
|
54
|
+
def validate(self, metadata: Dict) -> ValidationResult:
|
|
55
|
+
"""Validate extracted metadata."""
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class UploadConfig:
|
|
60
|
+
"""Configuration for upload operations."""
|
|
61
|
+
|
|
62
|
+
def __init__(self, chunked_threshold_mb: int = 50, batch_size: int = 1):
|
|
63
|
+
self.chunked_threshold_mb = chunked_threshold_mb
|
|
64
|
+
self.batch_size = batch_size
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class UploadStrategy(ABC):
|
|
68
|
+
"""Strategy interface for file upload operations."""
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def upload(self, files: List[Dict], config: UploadConfig) -> List[Dict]:
|
|
72
|
+
"""Upload files to storage."""
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class DataUnitStrategy(ABC):
|
|
77
|
+
"""Strategy interface for data unit generation."""
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def generate(self, uploaded_files: List[Dict], batch_size: int) -> List[Dict]:
|
|
81
|
+
"""Generate data units from uploaded files."""
|
|
82
|
+
pass
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Data unit strategy implementations
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
|
|
3
|
+
from synapse_sdk.clients.utils import get_batched_list
|
|
4
|
+
|
|
5
|
+
from ...enums import LogCode, UploadStatus
|
|
6
|
+
from ..base import DataUnitStrategy
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BatchDataUnitStrategy(DataUnitStrategy):
|
|
10
|
+
"""Batch data unit generation strategy."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, context):
|
|
13
|
+
self.context = context
|
|
14
|
+
|
|
15
|
+
def generate(self, uploaded_files: List[Dict], batch_size: int) -> List[Dict]:
|
|
16
|
+
"""Generate data units in batches."""
|
|
17
|
+
client = self.context.client
|
|
18
|
+
generated_data_units = []
|
|
19
|
+
|
|
20
|
+
# Use the same batching logic as the legacy implementation
|
|
21
|
+
batches = get_batched_list(uploaded_files, batch_size)
|
|
22
|
+
|
|
23
|
+
for batch in batches:
|
|
24
|
+
try:
|
|
25
|
+
created_data_units = client.create_data_units(batch)
|
|
26
|
+
generated_data_units.extend(created_data_units)
|
|
27
|
+
|
|
28
|
+
# Log each created data unit
|
|
29
|
+
for created_data_unit in created_data_units:
|
|
30
|
+
self.context.run.log_data_unit(
|
|
31
|
+
created_data_unit['id'], UploadStatus.SUCCESS, data_unit_meta=created_data_unit.get('meta')
|
|
32
|
+
)
|
|
33
|
+
except Exception as e:
|
|
34
|
+
self.context.run.log_message_with_code(LogCode.DATA_UNIT_BATCH_FAILED, str(e))
|
|
35
|
+
# Log failed data units
|
|
36
|
+
for _ in batch:
|
|
37
|
+
self.context.run.log_data_unit(None, UploadStatus.FAILED, data_unit_meta=None)
|
|
38
|
+
|
|
39
|
+
return generated_data_units
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
|
|
3
|
+
from ...enums import LogCode, UploadStatus
|
|
4
|
+
from ..base import DataUnitStrategy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SingleDataUnitStrategy(DataUnitStrategy):
|
|
8
|
+
"""Single data unit generation strategy."""
|
|
9
|
+
|
|
10
|
+
def __init__(self, context):
|
|
11
|
+
self.context = context
|
|
12
|
+
|
|
13
|
+
def generate(self, uploaded_files: List[Dict], batch_size: int) -> List[Dict]:
|
|
14
|
+
"""Generate data units individually."""
|
|
15
|
+
client = self.context.client
|
|
16
|
+
generated_data_units = []
|
|
17
|
+
|
|
18
|
+
for uploaded_file in uploaded_files:
|
|
19
|
+
try:
|
|
20
|
+
# Create data unit for single file (batch of 1)
|
|
21
|
+
created_data_units = client.create_data_units([uploaded_file])
|
|
22
|
+
generated_data_units.extend(created_data_units)
|
|
23
|
+
|
|
24
|
+
except Exception as e:
|
|
25
|
+
self.context.run.log_message_with_code(LogCode.DATA_UNIT_BATCH_FAILED, str(e))
|
|
26
|
+
# Log failed data unit
|
|
27
|
+
self.context.run.log_data_unit(None, UploadStatus.FAILED, data_unit_meta=None)
|
|
28
|
+
|
|
29
|
+
return generated_data_units
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# File discovery strategy implementations
|