synapse-sdk 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. synapse_sdk/__init__.py +24 -0
  2. synapse_sdk/cli/code_server.py +305 -33
  3. synapse_sdk/clients/agent/__init__.py +2 -1
  4. synapse_sdk/clients/agent/container.py +143 -0
  5. synapse_sdk/clients/agent/ray.py +296 -38
  6. synapse_sdk/clients/backend/annotation.py +1 -1
  7. synapse_sdk/clients/backend/core.py +31 -4
  8. synapse_sdk/clients/backend/data_collection.py +82 -7
  9. synapse_sdk/clients/backend/hitl.py +1 -1
  10. synapse_sdk/clients/backend/ml.py +1 -1
  11. synapse_sdk/clients/base.py +211 -61
  12. synapse_sdk/loggers.py +46 -0
  13. synapse_sdk/plugins/README.md +1340 -0
  14. synapse_sdk/plugins/categories/base.py +59 -9
  15. synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
  16. synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
  17. synapse_sdk/plugins/categories/export/actions/export/action.py +165 -0
  18. synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
  19. synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
  20. synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
  21. synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
  22. synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
  23. synapse_sdk/plugins/categories/export/templates/config.yaml +19 -1
  24. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +390 -0
  25. synapse_sdk/plugins/categories/export/templates/plugin/export.py +153 -177
  26. synapse_sdk/plugins/categories/neural_net/actions/train.py +1130 -32
  27. synapse_sdk/plugins/categories/neural_net/actions/tune.py +157 -4
  28. synapse_sdk/plugins/categories/neural_net/templates/config.yaml +7 -4
  29. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  30. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  31. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
  32. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  33. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +148 -0
  34. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  35. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  36. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  37. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +100 -0
  38. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +248 -0
  39. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  40. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  41. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +265 -0
  42. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  43. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  44. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +92 -0
  45. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +243 -0
  46. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  47. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +19 -0
  48. synapse_sdk/plugins/categories/upload/actions/upload/action.py +236 -0
  49. synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
  50. synapse_sdk/plugins/categories/upload/actions/upload/enums.py +493 -0
  51. synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
  52. synapse_sdk/plugins/categories/upload/actions/upload/factory.py +138 -0
  53. synapse_sdk/plugins/categories/upload/actions/upload/models.py +214 -0
  54. synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +183 -0
  55. synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
  56. synapse_sdk/plugins/categories/upload/actions/upload/run.py +179 -0
  57. synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
  58. synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +107 -0
  59. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
  60. synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +63 -0
  61. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +91 -0
  62. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +82 -0
  63. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +235 -0
  64. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +201 -0
  65. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +104 -0
  66. synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +71 -0
  67. synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
  68. synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +82 -0
  69. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
  70. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
  71. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +29 -0
  72. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
  73. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +300 -0
  74. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +287 -0
  75. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
  76. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
  77. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
  78. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
  79. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +84 -0
  80. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
  81. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +60 -0
  82. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +250 -0
  83. synapse_sdk/plugins/categories/upload/templates/README.md +470 -0
  84. synapse_sdk/plugins/categories/upload/templates/config.yaml +28 -2
  85. synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +310 -0
  86. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +82 -20
  87. synapse_sdk/plugins/models.py +111 -9
  88. synapse_sdk/plugins/templates/plugin-config-schema.json +7 -0
  89. synapse_sdk/plugins/templates/schema.json +7 -0
  90. synapse_sdk/plugins/utils/__init__.py +3 -0
  91. synapse_sdk/plugins/utils/ray_gcs.py +66 -0
  92. synapse_sdk/shared/__init__.py +25 -0
  93. synapse_sdk/utils/converters/dm/__init__.py +42 -41
  94. synapse_sdk/utils/converters/dm/base.py +137 -0
  95. synapse_sdk/utils/converters/dm/from_v1.py +208 -562
  96. synapse_sdk/utils/converters/dm/to_v1.py +258 -304
  97. synapse_sdk/utils/converters/dm/tools/__init__.py +214 -0
  98. synapse_sdk/utils/converters/dm/tools/answer.py +95 -0
  99. synapse_sdk/utils/converters/dm/tools/bounding_box.py +132 -0
  100. synapse_sdk/utils/converters/dm/tools/bounding_box_3d.py +121 -0
  101. synapse_sdk/utils/converters/dm/tools/classification.py +75 -0
  102. synapse_sdk/utils/converters/dm/tools/keypoint.py +117 -0
  103. synapse_sdk/utils/converters/dm/tools/named_entity.py +111 -0
  104. synapse_sdk/utils/converters/dm/tools/polygon.py +122 -0
  105. synapse_sdk/utils/converters/dm/tools/polyline.py +124 -0
  106. synapse_sdk/utils/converters/dm/tools/prompt.py +94 -0
  107. synapse_sdk/utils/converters/dm/tools/relation.py +86 -0
  108. synapse_sdk/utils/converters/dm/tools/segmentation.py +141 -0
  109. synapse_sdk/utils/converters/dm/tools/segmentation_3d.py +83 -0
  110. synapse_sdk/utils/converters/dm/types.py +168 -0
  111. synapse_sdk/utils/converters/dm/utils.py +162 -0
  112. synapse_sdk/utils/converters/dm_legacy/__init__.py +56 -0
  113. synapse_sdk/utils/converters/dm_legacy/from_v1.py +627 -0
  114. synapse_sdk/utils/converters/dm_legacy/to_v1.py +367 -0
  115. synapse_sdk/utils/file/__init__.py +58 -0
  116. synapse_sdk/utils/file/archive.py +32 -0
  117. synapse_sdk/utils/file/checksum.py +56 -0
  118. synapse_sdk/utils/file/chunking.py +31 -0
  119. synapse_sdk/utils/file/download.py +385 -0
  120. synapse_sdk/utils/file/encoding.py +40 -0
  121. synapse_sdk/utils/file/io.py +22 -0
  122. synapse_sdk/utils/file/upload.py +165 -0
  123. synapse_sdk/utils/file/video/__init__.py +29 -0
  124. synapse_sdk/utils/file/video/transcode.py +307 -0
  125. synapse_sdk/utils/{file.py → file.py.backup} +77 -0
  126. synapse_sdk/utils/network.py +272 -0
  127. synapse_sdk/utils/storage/__init__.py +6 -2
  128. synapse_sdk/utils/storage/providers/file_system.py +6 -0
  129. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/METADATA +19 -2
  130. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/RECORD +134 -74
  131. synapse_sdk/devtools/docs/.gitignore +0 -20
  132. synapse_sdk/devtools/docs/README.md +0 -41
  133. synapse_sdk/devtools/docs/blog/2019-05-28-first-blog-post.md +0 -12
  134. synapse_sdk/devtools/docs/blog/2019-05-29-long-blog-post.md +0 -44
  135. synapse_sdk/devtools/docs/blog/2021-08-01-mdx-blog-post.mdx +0 -24
  136. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
  137. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/index.md +0 -29
  138. synapse_sdk/devtools/docs/blog/authors.yml +0 -25
  139. synapse_sdk/devtools/docs/blog/tags.yml +0 -19
  140. synapse_sdk/devtools/docs/docusaurus.config.ts +0 -138
  141. synapse_sdk/devtools/docs/package-lock.json +0 -17455
  142. synapse_sdk/devtools/docs/package.json +0 -47
  143. synapse_sdk/devtools/docs/sidebars.ts +0 -44
  144. synapse_sdk/devtools/docs/src/components/HomepageFeatures/index.tsx +0 -71
  145. synapse_sdk/devtools/docs/src/components/HomepageFeatures/styles.module.css +0 -11
  146. synapse_sdk/devtools/docs/src/css/custom.css +0 -30
  147. synapse_sdk/devtools/docs/src/pages/index.module.css +0 -23
  148. synapse_sdk/devtools/docs/src/pages/index.tsx +0 -21
  149. synapse_sdk/devtools/docs/src/pages/markdown-page.md +0 -7
  150. synapse_sdk/devtools/docs/static/.nojekyll +0 -0
  151. synapse_sdk/devtools/docs/static/img/docusaurus-social-card.jpg +0 -0
  152. synapse_sdk/devtools/docs/static/img/docusaurus.png +0 -0
  153. synapse_sdk/devtools/docs/static/img/favicon.ico +0 -0
  154. synapse_sdk/devtools/docs/static/img/logo.png +0 -0
  155. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_mountain.svg +0 -171
  156. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_react.svg +0 -170
  157. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_tree.svg +0 -40
  158. synapse_sdk/devtools/docs/tsconfig.json +0 -8
  159. synapse_sdk/plugins/categories/export/actions/export.py +0 -346
  160. synapse_sdk/plugins/categories/export/enums.py +0 -7
  161. synapse_sdk/plugins/categories/neural_net/actions/gradio.py +0 -151
  162. synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py +0 -943
  163. synapse_sdk/plugins/categories/upload/actions/upload.py +0 -954
  164. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/WHEEL +0 -0
  165. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/entry_points.txt +0 -0
  166. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/licenses/LICENSE +0 -0
  167. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,143 @@
1
+ """Validation strategies for ToTask action."""
2
+
3
+ from typing import Any, Dict
4
+
5
+ from ..enums import LogCode
6
+ from .base import ToTaskContext, ValidationStrategy
7
+
8
+
9
+ class ProjectValidationStrategy(ValidationStrategy):
10
+ """Strategy for validating project and data collection."""
11
+
12
+ def validate(self, context: ToTaskContext) -> Dict[str, Any]:
13
+ """Validate project and data collection exist and are accessible.
14
+
15
+ Args:
16
+ context: Shared context for the action execution
17
+
18
+ Returns:
19
+ Dict with 'success' boolean and optional 'error' message
20
+ """
21
+ try:
22
+ client = context.client
23
+ project_id = context.params['project']
24
+
25
+ # Validate project response
26
+ project_response = client.get_project(project_id)
27
+ if isinstance(project_response, str):
28
+ context.logger.log_message_with_code(LogCode.INVALID_PROJECT_RESPONSE)
29
+ return {'success': False, 'error': 'Invalid project response received'}
30
+
31
+ project: Dict[str, Any] = project_response
32
+ context.project = project
33
+
34
+ # Validate data collection exists
35
+ data_collection_id = project.get('data_collection')
36
+ if not data_collection_id:
37
+ context.logger.log_message_with_code(LogCode.NO_DATA_COLLECTION)
38
+ return {'success': False, 'error': 'Project does not have a data collection'}
39
+
40
+ # Validate data collection response
41
+ data_collection_response = client.get_data_collection(data_collection_id)
42
+ if isinstance(data_collection_response, str):
43
+ context.logger.log_message_with_code(LogCode.INVALID_DATA_COLLECTION_RESPONSE)
44
+ return {'success': False, 'error': 'Invalid data collection response received'}
45
+
46
+ data_collection: Dict[str, Any] = data_collection_response
47
+ context.data_collection = data_collection
48
+
49
+ return {'success': True}
50
+
51
+ except Exception as e:
52
+ error_msg = f'Project validation failed: {str(e)}'
53
+ context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
54
+ return {'success': False, 'error': error_msg}
55
+
56
+
57
+ class TaskValidationStrategy(ValidationStrategy):
58
+ """Strategy for validating and discovering tasks."""
59
+
60
+ def validate(self, context: ToTaskContext) -> Dict[str, Any]:
61
+ """Discover and validate tasks for processing.
62
+
63
+ Args:
64
+ context: Shared context for the action execution
65
+
66
+ Returns:
67
+ Dict with 'success' boolean and optional 'error' message
68
+ """
69
+ try:
70
+ client = context.client
71
+
72
+ # Build task query parameters
73
+ task_ids_query_params = {
74
+ 'project': context.params['project'],
75
+ 'fields': 'id',
76
+ }
77
+ if context.params.get('task_filters'):
78
+ task_ids_query_params.update(context.params['task_filters'])
79
+
80
+ # Get tasks
81
+ task_ids_generator, task_ids_count = client.list_tasks(params=task_ids_query_params, list_all=True)
82
+ task_ids = [
83
+ int(item.get('id', 0)) for item in task_ids_generator if isinstance(item, dict) and item.get('id')
84
+ ]
85
+
86
+ # Validate tasks found
87
+ if not task_ids_count:
88
+ context.logger.log_message_with_code(LogCode.NO_TASKS_FOUND)
89
+ return {'success': False, 'error': 'No tasks found to annotate'}
90
+
91
+ context.task_ids = task_ids
92
+ return {'success': True, 'task_count': len(task_ids)}
93
+
94
+ except Exception as e:
95
+ error_msg = f'Task validation failed: {str(e)}'
96
+ context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
97
+ return {'success': False, 'error': error_msg}
98
+
99
+
100
+ class TargetSpecificationValidationStrategy(ValidationStrategy):
101
+ """Strategy for validating target specification for file annotation."""
102
+
103
+ def validate(self, context: ToTaskContext) -> Dict[str, Any]:
104
+ """Validate target specification exists in file specifications.
105
+
106
+ Args:
107
+ context: Shared context for the action execution
108
+
109
+ Returns:
110
+ Dict with 'success' boolean and optional 'error' message
111
+ """
112
+ try:
113
+ # Only validate if using FILE annotation method
114
+ from ..enums import AnnotationMethod
115
+
116
+ if context.annotation_method != AnnotationMethod.FILE:
117
+ return {'success': True}
118
+
119
+ target_specification_name = context.params.get('target_specification_name')
120
+ if not target_specification_name:
121
+ context.logger.log_message_with_code(LogCode.TARGET_SPEC_REQUIRED)
122
+ return {'success': False, 'error': 'Target specification name is required for file annotation method'}
123
+
124
+ # Check if target specification exists in file specifications
125
+ if not context.data_collection:
126
+ return {'success': False, 'error': 'Data collection not available for validation'}
127
+
128
+ file_specifications = context.data_collection.get('file_specifications', [])
129
+ target_spec_exists = any(spec.get('name') == target_specification_name for spec in file_specifications)
130
+
131
+ if not target_spec_exists:
132
+ context.logger.log_message_with_code(LogCode.TARGET_SPEC_NOT_FOUND, target_specification_name)
133
+ return {
134
+ 'success': False,
135
+ 'error': f"Target specification '{target_specification_name}' not found in file specifications",
136
+ }
137
+
138
+ return {'success': True}
139
+
140
+ except Exception as e:
141
+ error_msg = f'Target specification validation failed: {str(e)}'
142
+ context.logger.log_message_with_code(LogCode.VALIDATION_FAILED, error_msg)
143
+ return {'success': False, 'error': error_msg}
@@ -0,0 +1,19 @@
1
+ from .action import UploadAction
2
+ from .enums import LOG_MESSAGES, LogCode, UploadStatus
3
+ from .exceptions import ExcelParsingError, ExcelSecurityError
4
+ from .models import UploadParams
5
+ from .run import UploadRun
6
+ from .utils import ExcelSecurityConfig, PathAwareJSONEncoder
7
+
8
+ __all__ = [
9
+ 'UploadAction',
10
+ 'UploadRun',
11
+ 'UploadParams',
12
+ 'UploadStatus',
13
+ 'LogCode',
14
+ 'LOG_MESSAGES',
15
+ 'ExcelSecurityError',
16
+ 'ExcelParsingError',
17
+ 'PathAwareJSONEncoder',
18
+ 'ExcelSecurityConfig',
19
+ ]
@@ -0,0 +1,236 @@
1
+ from typing import Any, Dict
2
+
3
+ from synapse_sdk.plugins.categories.base import Action
4
+ from synapse_sdk.plugins.categories.decorators import register_action
5
+ from synapse_sdk.plugins.enums import PluginCategory, RunMethod
6
+ from synapse_sdk.plugins.exceptions import ActionError
7
+
8
+ from .context import UploadContext
9
+ from .enums import LogCode
10
+ from .factory import StrategyFactory
11
+ from .models import UploadParams
12
+ from .orchestrator import UploadOrchestrator
13
+ from .registry import StepRegistry
14
+ from .run import UploadRun
15
+ from .steps.cleanup import CleanupStep
16
+ from .steps.collection import AnalyzeCollectionStep
17
+ from .steps.generate import GenerateDataUnitsStep
18
+ from .steps.initialize import InitializeStep
19
+ from .steps.metadata import ProcessMetadataStep
20
+ from .steps.organize import OrganizeFilesStep
21
+ from .steps.upload import UploadFilesStep
22
+ from .steps.validate import ValidateFilesStep
23
+ from .utils import ExcelSecurityConfig
24
+
25
+
26
+ @register_action
27
+ class UploadAction(Action):
28
+ """Upload action for processing and uploading files to storage.
29
+
30
+ This implementation uses Strategy and Facade patterns to provide a clean,
31
+ extensible architecture for upload operations. The monolithic legacy
32
+ implementation has been refactored into pluggable strategies and workflow steps.
33
+
34
+ Features:
35
+ - Strategy pattern for pluggable behaviors (validation, file discovery, etc.)
36
+ - Facade pattern with UploadOrchestrator for simplified workflow management
37
+ - Step-based workflow with automatic rollback on failures
38
+ - Comprehensive error handling and progress tracking
39
+ - Easy extensibility for new strategies and workflow steps
40
+
41
+ Class Attributes:
42
+ name (str): Action identifier ('upload')
43
+ category (PluginCategory): UPLOAD category
44
+ method (RunMethod): JOB execution method
45
+ run_class (type): UploadRun for specialized logging
46
+ params_model (type): UploadParams for parameter validation
47
+ progress_categories (dict): Progress tracking configuration
48
+ metrics_categories (dict): Metrics collection configuration
49
+
50
+ Example:
51
+ >>> action = UploadAction(
52
+ ... params={
53
+ ... 'name': 'Data Upload',
54
+ ... 'path': '/data/files',
55
+ ... 'storage': 1,
56
+ ... 'data_collection': 5
57
+ ... },
58
+ ... plugin_config=config
59
+ ... )
60
+ >>> result = action.start()
61
+ """
62
+
63
+ name = 'upload'
64
+ category = PluginCategory.UPLOAD
65
+ method = RunMethod.JOB
66
+ run_class = UploadRun
67
+ params_model = UploadParams
68
+ progress_categories = {
69
+ 'analyze_collection': {
70
+ 'proportion': 2,
71
+ },
72
+ 'upload_data_files': {
73
+ 'proportion': 38,
74
+ },
75
+ 'generate_data_units': {
76
+ 'proportion': 60,
77
+ },
78
+ }
79
+ metrics_categories = {
80
+ 'data_files': {
81
+ 'stand_by': 0,
82
+ 'failed': 0,
83
+ 'success': 0,
84
+ },
85
+ 'data_units': {
86
+ 'stand_by': 0,
87
+ 'failed': 0,
88
+ 'success': 0,
89
+ },
90
+ }
91
+
92
+ def __init__(self, *args, **kwargs):
93
+ """Initialize the upload action."""
94
+ super().__init__(*args, **kwargs)
95
+
96
+ # Initialize Excel configuration from config.yaml
97
+ self.excel_config = ExcelSecurityConfig.from_action_config(self.config)
98
+ self.strategy_factory = StrategyFactory()
99
+ self.step_registry = StepRegistry()
100
+ self._configure_workflow()
101
+
102
+ def _configure_workflow(self) -> None:
103
+ """Configure workflow steps based on parameters.
104
+
105
+ Registers all workflow steps in the correct order. Steps can be
106
+ dynamically added, removed, or reordered for different use cases.
107
+ """
108
+ # Register steps in execution order
109
+ self.step_registry.register(InitializeStep())
110
+ self.step_registry.register(ProcessMetadataStep())
111
+ self.step_registry.register(AnalyzeCollectionStep())
112
+ self.step_registry.register(OrganizeFilesStep())
113
+ self.step_registry.register(ValidateFilesStep())
114
+ self.step_registry.register(UploadFilesStep())
115
+ self.step_registry.register(GenerateDataUnitsStep())
116
+ self.step_registry.register(CleanupStep())
117
+
118
+ def start(self) -> Dict[str, Any]:
119
+ """Execute upload workflow with uploader integration.
120
+
121
+ This method integrates the essential uploader mechanism with the new
122
+ strategy pattern architecture while maintaining backward compatibility.
123
+
124
+ Returns:
125
+ Dict[str, Any]: Upload result with file counts, success status, and metrics
126
+
127
+ Raises:
128
+ ActionError: If upload workflow fails
129
+ """
130
+ try:
131
+ # Ensure params is not None
132
+ params = self.params or {}
133
+
134
+ # Create upload context for sharing state between steps
135
+ context = UploadContext(params, self.run, self.client, action=self)
136
+
137
+ # Configure strategies based on parameters with context
138
+ strategies = self._configure_strategies(context)
139
+
140
+ # Create orchestrator but run it with uploader integration
141
+ orchestrator = UploadOrchestrator(context, self.step_registry, strategies)
142
+
143
+ # Execute the workflow steps, but intercept after organize step
144
+ result = self._execute_with_uploader_integration(orchestrator, context)
145
+
146
+ return result
147
+
148
+ except Exception as e:
149
+ # Log the error and re-raise as ActionError
150
+ if self.run:
151
+ self.run.log_message_with_code(LogCode.UPLOAD_WORKFLOW_FAILED, str(e))
152
+ raise ActionError(f'Upload failed: {str(e)}')
153
+ finally:
154
+ # Always emit completion log so backend can record end time even on failures
155
+ if self.run:
156
+ self.run.end_log()
157
+
158
+ def _execute_with_uploader_integration(self, orchestrator, context) -> Dict[str, Any]:
159
+ """Execute workflow with proper uploader integration."""
160
+ # Inject strategies into context before executing steps
161
+ orchestrator._inject_strategies_into_context()
162
+
163
+ # Run initial steps up to file organization
164
+ steps = orchestrator.step_registry.get_steps()
165
+
166
+ # Execute steps one by one until we reach the organization step
167
+ for i, step in enumerate(steps):
168
+ if step.name in ['initialize', 'process_metadata', 'analyze_collection', 'organize_files']:
169
+ try:
170
+ result = step.safe_execute(context)
171
+ context.update(result)
172
+ if not result.success:
173
+ raise Exception(f"Step '{step.name}' failed: {result.error}")
174
+ except Exception as e:
175
+ raise ActionError(f"Failed at step '{step.name}': {str(e)}")
176
+
177
+ # Execute remaining steps
178
+ for step in steps:
179
+ if step.name in ['validate_files', 'upload_files', 'generate_data_units', 'cleanup']:
180
+ try:
181
+ result = step.safe_execute(context)
182
+ context.update(result)
183
+ if not result.success:
184
+ raise Exception(f"Step '{step.name}' failed: {result.error}")
185
+ except Exception as e:
186
+ raise ActionError(f"Failed at step '{step.name}': {str(e)}")
187
+
188
+ # Return the final result from context
189
+ return context.get_result()
190
+
191
+ def _configure_strategies(self, context=None) -> Dict[str, Any]:
192
+ """Configure strategies based on parameters.
193
+
194
+ Uses the Strategy pattern to create appropriate strategy implementations
195
+ based on the action parameters. This allows for runtime selection of
196
+ different behaviors (recursive vs flat discovery, batch vs single data unit creation, etc.).
197
+
198
+ Args:
199
+ context: UploadContext for strategies that need access to client/run
200
+
201
+ Returns:
202
+ Dict[str, Any]: Dictionary of strategy instances keyed by type
203
+ """
204
+ # Ensure params is not None
205
+ params = self.params or {}
206
+
207
+ return {
208
+ 'validation': self.strategy_factory.create_validation_strategy(params, context),
209
+ 'file_discovery': self.strategy_factory.create_file_discovery_strategy(params, context),
210
+ 'metadata': self.strategy_factory.create_metadata_strategy(params, context),
211
+ 'upload': self.strategy_factory.create_upload_strategy(params, context),
212
+ 'data_unit': self.strategy_factory.create_data_unit_strategy(params, context),
213
+ }
214
+
215
+ def get_uploader(self, path, file_specification, organized_files, params: Dict = {}):
216
+ """Get uploader from entrypoint (compatibility method).
217
+
218
+ This method is kept for backward compatibility with existing code
219
+ that may still call it directly.
220
+ """
221
+ return self.entrypoint(
222
+ self.run, path, file_specification, organized_files, extra_params=params.get('extra_params')
223
+ )
224
+
225
+ def get_workflow_summary(self) -> Dict[str, Any]:
226
+ """Get summary of configured workflow.
227
+
228
+ Returns:
229
+ Dict[str, Any]: Summary of steps and strategies
230
+ """
231
+ return {
232
+ 'steps': [step.name for step in self.step_registry.get_steps()],
233
+ 'step_count': len(self.step_registry),
234
+ 'total_progress_weight': self.step_registry.get_total_progress_weight(),
235
+ 'available_strategies': self.strategy_factory.get_available_strategies(),
236
+ }
@@ -0,0 +1,185 @@
1
+ from datetime import datetime
2
+ from pathlib import Path
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ from .run import UploadRun
6
+
7
+
8
+ class StepResult:
9
+ """Result of a workflow step execution."""
10
+
11
+ def __init__(
12
+ self,
13
+ success: bool = True,
14
+ data: Dict[str, Any] = None,
15
+ error: str = None,
16
+ rollback_data: Dict[str, Any] = None,
17
+ skipped: bool = False,
18
+ original_exception: Optional[Exception] = None,
19
+ ):
20
+ self.success = success
21
+ self.data = data or {}
22
+ self.error = error
23
+ self.rollback_data = rollback_data or {}
24
+ self.skipped = skipped
25
+ self.original_exception = original_exception
26
+ self.timestamp = datetime.now()
27
+
28
+ def __bool__(self):
29
+ return self.success
30
+
31
+
32
+ class UploadContext:
33
+ """Shared context for all upload workflow steps."""
34
+
35
+ def __init__(self, params: Dict, run: UploadRun, client: Any, action: Any = None):
36
+ self.params = params
37
+ self.run = run
38
+ self.client = client
39
+ self._action = action # Reference to parent action for uploader access
40
+
41
+ # Core state
42
+ self.storage = None
43
+ self.pathlib_cwd = None
44
+ self.metadata: Dict[str, Dict[str, Any]] = {}
45
+ self.file_specifications: Dict[str, Any] = {}
46
+ self.organized_files: List[Dict[str, Any]] = []
47
+ self.uploaded_files: List[Dict[str, Any]] = []
48
+ self.data_units: List[Dict[str, Any]] = []
49
+
50
+ # Progress and metrics
51
+ self.metrics: Dict[str, Any] = {}
52
+ self.errors: List[str] = []
53
+ self.step_results: List[StepResult] = []
54
+
55
+ # Strategies (injected by orchestrator)
56
+ self.strategies: Dict[str, Any] = {}
57
+
58
+ # Rollback information
59
+ self.rollback_data: Dict[str, Any] = {}
60
+
61
+ def update(self, result: StepResult) -> None:
62
+ """Update context with step results."""
63
+ self.step_results.append(result)
64
+
65
+ if result.success:
66
+ # Update context state with step data
67
+ for key, value in result.data.items():
68
+ if hasattr(self, key):
69
+ setattr(self, key, value)
70
+ else:
71
+ # Store in a general data dictionary
72
+ if not hasattr(self, 'step_data'):
73
+ self.step_data = {}
74
+ self.step_data[key] = value
75
+
76
+ # Store rollback data
77
+ if result.rollback_data:
78
+ self.rollback_data.update(result.rollback_data)
79
+ else:
80
+ # Record error
81
+ if result.error:
82
+ self.errors.append(result.error)
83
+
84
+ def get_result(self) -> Dict[str, Any]:
85
+ """Get final result dictionary."""
86
+ return {
87
+ 'uploaded_files_count': len(self.uploaded_files),
88
+ 'generated_data_units_count': len(self.data_units),
89
+ 'success': len(self.errors) == 0,
90
+ 'errors': self.errors,
91
+ }
92
+
93
+ def has_errors(self) -> bool:
94
+ """Check if context has any errors."""
95
+ return len(self.errors) > 0
96
+
97
+ def get_last_step_result(self) -> Optional[StepResult]:
98
+ """Get the result of the last executed step."""
99
+ return self.step_results[-1] if self.step_results else None
100
+
101
+ def get_step_result_by_name(self, step_name: str) -> Optional[StepResult]:
102
+ """Get step result by step name (stored in rollback_data)."""
103
+ for result in self.step_results:
104
+ if result.rollback_data.get('step_name') == step_name:
105
+ return result
106
+ return None
107
+
108
+ def clear_errors(self) -> None:
109
+ """Clear all errors (useful for retry scenarios)."""
110
+ self.errors.clear()
111
+
112
+ def add_error(self, error: str) -> None:
113
+ """Add an error to the context."""
114
+ self.errors.append(error)
115
+
116
+ def get_param(self, key: str, default: Any = None) -> Any:
117
+ """Get parameter value with default."""
118
+ return self.params.get(key, default)
119
+
120
+ def set_storage(self, storage: Any) -> None:
121
+ """Set storage object."""
122
+ self.storage = storage
123
+
124
+ def set_pathlib_cwd(self, path: Path) -> None:
125
+ """Set current working directory path."""
126
+ self.pathlib_cwd = path
127
+
128
+ def set_file_specifications(self, specs: Dict[str, Any]) -> None:
129
+ """Set file specifications."""
130
+ self.file_specifications = specs
131
+
132
+ def add_organized_files(self, files: List[Dict[str, Any]]) -> None:
133
+ """Add organized files to context."""
134
+ self.organized_files.extend(files)
135
+
136
+ def add_uploaded_files(self, files: List[Dict[str, Any]]) -> None:
137
+ """Add uploaded files to context."""
138
+ self.uploaded_files.extend(files)
139
+
140
+ def add_data_units(self, units: List[Dict[str, Any]]) -> None:
141
+ """Add data units to context."""
142
+ self.data_units.extend(units)
143
+
144
+ def update_metrics(self, category: str, metrics: Dict[str, Any]) -> None:
145
+ """Update metrics for a specific category."""
146
+ if category not in self.metrics:
147
+ self.metrics[category] = {}
148
+ self.metrics[category].update(metrics)
149
+
150
+ def get(self, key: str, default: Any = None) -> Any:
151
+ """Get value from context by key."""
152
+ # First check direct attributes
153
+ if hasattr(self, key):
154
+ return getattr(self, key)
155
+
156
+ # Then check step_data if it exists
157
+ if hasattr(self, 'step_data') and key in self.step_data:
158
+ return self.step_data[key]
159
+
160
+ # Special mappings for expected keys
161
+ if key == 'file_specification_template':
162
+ return self.file_specifications
163
+ elif key == 'pathlib_cwd':
164
+ return self.pathlib_cwd
165
+ elif key == 'organized_files':
166
+ return self.organized_files
167
+
168
+ return default
169
+
170
+ def set(self, key: str, value: Any) -> None:
171
+ """Set value in context by key."""
172
+ # Special mappings for expected keys
173
+ if key == 'file_specification_template':
174
+ self.file_specifications = value
175
+ elif key == 'pathlib_cwd':
176
+ self.pathlib_cwd = value
177
+ elif key == 'organized_files':
178
+ self.organized_files = value
179
+ elif hasattr(self, key):
180
+ setattr(self, key, value)
181
+ else:
182
+ # Store in step_data
183
+ if not hasattr(self, 'step_data'):
184
+ self.step_data = {}
185
+ self.step_data[key] = value