synapse-sdk 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. synapse_sdk/__init__.py +24 -0
  2. synapse_sdk/cli/code_server.py +305 -33
  3. synapse_sdk/clients/agent/__init__.py +2 -1
  4. synapse_sdk/clients/agent/container.py +143 -0
  5. synapse_sdk/clients/agent/ray.py +296 -38
  6. synapse_sdk/clients/backend/annotation.py +1 -1
  7. synapse_sdk/clients/backend/core.py +31 -4
  8. synapse_sdk/clients/backend/data_collection.py +82 -7
  9. synapse_sdk/clients/backend/hitl.py +1 -1
  10. synapse_sdk/clients/backend/ml.py +1 -1
  11. synapse_sdk/clients/base.py +211 -61
  12. synapse_sdk/loggers.py +46 -0
  13. synapse_sdk/plugins/README.md +1340 -0
  14. synapse_sdk/plugins/categories/base.py +59 -9
  15. synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
  16. synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
  17. synapse_sdk/plugins/categories/export/actions/export/action.py +165 -0
  18. synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
  19. synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
  20. synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
  21. synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
  22. synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
  23. synapse_sdk/plugins/categories/export/templates/config.yaml +19 -1
  24. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +390 -0
  25. synapse_sdk/plugins/categories/export/templates/plugin/export.py +153 -177
  26. synapse_sdk/plugins/categories/neural_net/actions/train.py +1130 -32
  27. synapse_sdk/plugins/categories/neural_net/actions/tune.py +157 -4
  28. synapse_sdk/plugins/categories/neural_net/templates/config.yaml +7 -4
  29. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  30. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  31. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
  32. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  33. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +148 -0
  34. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  35. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  36. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  37. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +100 -0
  38. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +248 -0
  39. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  40. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  41. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +265 -0
  42. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  43. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  44. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +92 -0
  45. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +243 -0
  46. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  47. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +19 -0
  48. synapse_sdk/plugins/categories/upload/actions/upload/action.py +236 -0
  49. synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
  50. synapse_sdk/plugins/categories/upload/actions/upload/enums.py +493 -0
  51. synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
  52. synapse_sdk/plugins/categories/upload/actions/upload/factory.py +138 -0
  53. synapse_sdk/plugins/categories/upload/actions/upload/models.py +214 -0
  54. synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +183 -0
  55. synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
  56. synapse_sdk/plugins/categories/upload/actions/upload/run.py +179 -0
  57. synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
  58. synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +107 -0
  59. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
  60. synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +63 -0
  61. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +91 -0
  62. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +82 -0
  63. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +235 -0
  64. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +201 -0
  65. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +104 -0
  66. synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +71 -0
  67. synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
  68. synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +82 -0
  69. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
  70. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
  71. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +29 -0
  72. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
  73. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +300 -0
  74. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +287 -0
  75. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
  76. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
  77. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
  78. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
  79. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +84 -0
  80. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
  81. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +60 -0
  82. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +250 -0
  83. synapse_sdk/plugins/categories/upload/templates/README.md +470 -0
  84. synapse_sdk/plugins/categories/upload/templates/config.yaml +28 -2
  85. synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +310 -0
  86. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +82 -20
  87. synapse_sdk/plugins/models.py +111 -9
  88. synapse_sdk/plugins/templates/plugin-config-schema.json +7 -0
  89. synapse_sdk/plugins/templates/schema.json +7 -0
  90. synapse_sdk/plugins/utils/__init__.py +3 -0
  91. synapse_sdk/plugins/utils/ray_gcs.py +66 -0
  92. synapse_sdk/shared/__init__.py +25 -0
  93. synapse_sdk/utils/converters/dm/__init__.py +42 -41
  94. synapse_sdk/utils/converters/dm/base.py +137 -0
  95. synapse_sdk/utils/converters/dm/from_v1.py +208 -562
  96. synapse_sdk/utils/converters/dm/to_v1.py +258 -304
  97. synapse_sdk/utils/converters/dm/tools/__init__.py +214 -0
  98. synapse_sdk/utils/converters/dm/tools/answer.py +95 -0
  99. synapse_sdk/utils/converters/dm/tools/bounding_box.py +132 -0
  100. synapse_sdk/utils/converters/dm/tools/bounding_box_3d.py +121 -0
  101. synapse_sdk/utils/converters/dm/tools/classification.py +75 -0
  102. synapse_sdk/utils/converters/dm/tools/keypoint.py +117 -0
  103. synapse_sdk/utils/converters/dm/tools/named_entity.py +111 -0
  104. synapse_sdk/utils/converters/dm/tools/polygon.py +122 -0
  105. synapse_sdk/utils/converters/dm/tools/polyline.py +124 -0
  106. synapse_sdk/utils/converters/dm/tools/prompt.py +94 -0
  107. synapse_sdk/utils/converters/dm/tools/relation.py +86 -0
  108. synapse_sdk/utils/converters/dm/tools/segmentation.py +141 -0
  109. synapse_sdk/utils/converters/dm/tools/segmentation_3d.py +83 -0
  110. synapse_sdk/utils/converters/dm/types.py +168 -0
  111. synapse_sdk/utils/converters/dm/utils.py +162 -0
  112. synapse_sdk/utils/converters/dm_legacy/__init__.py +56 -0
  113. synapse_sdk/utils/converters/dm_legacy/from_v1.py +627 -0
  114. synapse_sdk/utils/converters/dm_legacy/to_v1.py +367 -0
  115. synapse_sdk/utils/file/__init__.py +58 -0
  116. synapse_sdk/utils/file/archive.py +32 -0
  117. synapse_sdk/utils/file/checksum.py +56 -0
  118. synapse_sdk/utils/file/chunking.py +31 -0
  119. synapse_sdk/utils/file/download.py +385 -0
  120. synapse_sdk/utils/file/encoding.py +40 -0
  121. synapse_sdk/utils/file/io.py +22 -0
  122. synapse_sdk/utils/file/upload.py +165 -0
  123. synapse_sdk/utils/file/video/__init__.py +29 -0
  124. synapse_sdk/utils/file/video/transcode.py +307 -0
  125. synapse_sdk/utils/{file.py → file.py.backup} +77 -0
  126. synapse_sdk/utils/network.py +272 -0
  127. synapse_sdk/utils/storage/__init__.py +6 -2
  128. synapse_sdk/utils/storage/providers/file_system.py +6 -0
  129. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/METADATA +19 -2
  130. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/RECORD +134 -74
  131. synapse_sdk/devtools/docs/.gitignore +0 -20
  132. synapse_sdk/devtools/docs/README.md +0 -41
  133. synapse_sdk/devtools/docs/blog/2019-05-28-first-blog-post.md +0 -12
  134. synapse_sdk/devtools/docs/blog/2019-05-29-long-blog-post.md +0 -44
  135. synapse_sdk/devtools/docs/blog/2021-08-01-mdx-blog-post.mdx +0 -24
  136. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
  137. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/index.md +0 -29
  138. synapse_sdk/devtools/docs/blog/authors.yml +0 -25
  139. synapse_sdk/devtools/docs/blog/tags.yml +0 -19
  140. synapse_sdk/devtools/docs/docusaurus.config.ts +0 -138
  141. synapse_sdk/devtools/docs/package-lock.json +0 -17455
  142. synapse_sdk/devtools/docs/package.json +0 -47
  143. synapse_sdk/devtools/docs/sidebars.ts +0 -44
  144. synapse_sdk/devtools/docs/src/components/HomepageFeatures/index.tsx +0 -71
  145. synapse_sdk/devtools/docs/src/components/HomepageFeatures/styles.module.css +0 -11
  146. synapse_sdk/devtools/docs/src/css/custom.css +0 -30
  147. synapse_sdk/devtools/docs/src/pages/index.module.css +0 -23
  148. synapse_sdk/devtools/docs/src/pages/index.tsx +0 -21
  149. synapse_sdk/devtools/docs/src/pages/markdown-page.md +0 -7
  150. synapse_sdk/devtools/docs/static/.nojekyll +0 -0
  151. synapse_sdk/devtools/docs/static/img/docusaurus-social-card.jpg +0 -0
  152. synapse_sdk/devtools/docs/static/img/docusaurus.png +0 -0
  153. synapse_sdk/devtools/docs/static/img/favicon.ico +0 -0
  154. synapse_sdk/devtools/docs/static/img/logo.png +0 -0
  155. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_mountain.svg +0 -171
  156. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_react.svg +0 -170
  157. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_tree.svg +0 -40
  158. synapse_sdk/devtools/docs/tsconfig.json +0 -8
  159. synapse_sdk/plugins/categories/export/actions/export.py +0 -346
  160. synapse_sdk/plugins/categories/export/enums.py +0 -7
  161. synapse_sdk/plugins/categories/neural_net/actions/gradio.py +0 -151
  162. synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py +0 -943
  163. synapse_sdk/plugins/categories/upload/actions/upload.py +0 -954
  164. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/WHEEL +0 -0
  165. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/entry_points.txt +0 -0
  166. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/licenses/LICENSE +0 -0
  167. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,493 @@
1
+ from enum import Enum
2
+
3
+ from synapse_sdk.shared.enums import Context
4
+
5
+
6
+ class ValidationErrorCode(str, Enum):
7
+ """Validation error codes for Pydantic validators.
8
+
9
+ Used in field validators to provide consistent, type-safe error codes
10
+ for resource validation failures.
11
+ """
12
+
13
+ MISSING_CONTEXT = 'missing_context'
14
+ STORAGE_NOT_FOUND = 'storage_not_found'
15
+ DATA_COLLECTION_NOT_FOUND = 'data_collection_not_found'
16
+ PROJECT_NOT_FOUND = 'project_not_found'
17
+
18
+
19
+ # Validation error message templates
20
+ VALIDATION_ERROR_MESSAGES = {
21
+ ValidationErrorCode.MISSING_CONTEXT: 'Validation context is required.',
22
+ ValidationErrorCode.STORAGE_NOT_FOUND: 'Storage with ID {0} does not exist or is not accessible: {1}',
23
+ ValidationErrorCode.DATA_COLLECTION_NOT_FOUND: 'Data collection with ID {0} does not exist or is not accessible: {1}',
24
+ ValidationErrorCode.PROJECT_NOT_FOUND: 'Project with ID {0} does not exist or is not accessible: {1}',
25
+ }
26
+
27
+
28
+ class UploadStatus(str, Enum):
29
+ """Upload processing status enumeration.
30
+
31
+ Defines the possible states for upload operations, data files, and data units
32
+ throughout the upload process.
33
+
34
+ Attributes:
35
+ SUCCESS: Upload completed successfully
36
+ FAILED: Upload failed with errors
37
+ """
38
+
39
+ SUCCESS = 'success'
40
+ FAILED = 'failed'
41
+
42
+
43
+ class LogCode(str, Enum):
44
+ """Type-safe logging codes for upload operations.
45
+
46
+ Enumeration of all possible log events during upload processing. Each code
47
+ corresponds to a specific event or error state with predefined message
48
+ templates and log levels.
49
+
50
+ The codes are organized by category:
51
+ - Validation codes (VALIDATION_FAILED, STORAGE_VALIDATION_FAILED, etc.)
52
+ - File processing codes (NO_FILES_FOUND, FILES_DISCOVERED, etc.)
53
+ - Excel processing codes (EXCEL_SECURITY_VIOLATION, EXCEL_PARSING_ERROR, etc.)
54
+ - Progress tracking codes (UPLOADING_DATA_FILES, GENERATING_DATA_UNITS, etc.)
55
+
56
+ Each code maps to a configuration in LOG_MESSAGES with message template
57
+ and appropriate log level.
58
+ """
59
+
60
+ STORAGE_VALIDATION_FAILED = 'STORAGE_VALIDATION_FAILED'
61
+ COLLECTION_VALIDATION_FAILED = 'COLLECTION_VALIDATION_FAILED'
62
+ PROJECT_VALIDATION_FAILED = 'PROJECT_VALIDATION_FAILED'
63
+ VALIDATION_FAILED = 'VALIDATION_FAILED'
64
+ NO_FILES_FOUND = 'NO_FILES_FOUND'
65
+ NO_FILES_UPLOADED = 'NO_FILES_UPLOADED'
66
+ NO_DATA_UNITS_GENERATED = 'NO_DATA_UNITS_GENERATED'
67
+ NO_TYPE_DIRECTORIES = 'NO_TYPE_DIRECTORIES'
68
+ EXCEL_SECURITY_VIOLATION = 'EXCEL_SECURITY_VIOLATION'
69
+ EXCEL_PARSING_ERROR = 'EXCEL_PARSING_ERROR'
70
+ EXCEL_METADATA_LOADED = 'EXCEL_METADATA_LOADED'
71
+ UPLOADING_DATA_FILES = 'UPLOADING_DATA_FILES'
72
+ GENERATING_DATA_UNITS = 'GENERATING_DATA_UNITS'
73
+ IMPORT_COMPLETED = 'IMPORT_COMPLETED'
74
+ TYPE_DIRECTORIES_FOUND = 'TYPE_DIRECTORIES_FOUND'
75
+ TYPE_STRUCTURE_DETECTED = 'TYPE_STRUCTURE_DETECTED'
76
+ FILES_DISCOVERED = 'FILES_DISCOVERED'
77
+ NO_FILES_FOUND_WARNING = 'NO_FILES_FOUND_WARNING'
78
+ FILE_UPLOAD_FAILED = 'FILE_UPLOAD_FAILED'
79
+ DATA_UNIT_BATCH_FAILED = 'DATA_UNIT_BATCH_FAILED'
80
+ FILENAME_TOO_LONG = 'FILENAME_TOO_LONG'
81
+ MISSING_REQUIRED_FILES = 'MISSING_REQUIRED_FILES'
82
+ EXCEL_FILE_NOT_FOUND = 'EXCEL_FILE_NOT_FOUND'
83
+ EXCEL_FILE_VALIDATION_STARTED = 'EXCEL_FILE_VALIDATION_STARTED'
84
+ EXCEL_WORKBOOK_LOADED = 'EXCEL_WORKBOOK_LOADED'
85
+ FILE_ORGANIZATION_STARTED = 'FILE_ORGANIZATION_STARTED'
86
+ BATCH_PROCESSING_STARTED = 'BATCH_PROCESSING_STARTED'
87
+ EXCEL_SECURITY_VALIDATION_STARTED = 'EXCEL_SECURITY_VALIDATION_STARTED'
88
+ EXCEL_MEMORY_ESTIMATION = 'EXCEL_MEMORY_ESTIMATION'
89
+ EXCEL_FILE_NOT_FOUND_PATH = 'EXCEL_FILE_NOT_FOUND_PATH'
90
+ EXCEL_SECURITY_VALIDATION_FAILED = 'EXCEL_SECURITY_VALIDATION_FAILED'
91
+ EXCEL_PARSING_FAILED = 'EXCEL_PARSING_FAILED'
92
+ EXCEL_INVALID_FILE_FORMAT = 'EXCEL_INVALID_FILE_FORMAT'
93
+ EXCEL_FILE_TOO_LARGE = 'EXCEL_FILE_TOO_LARGE'
94
+ EXCEL_FILE_ACCESS_ERROR = 'EXCEL_FILE_ACCESS_ERROR'
95
+ EXCEL_UNEXPECTED_ERROR = 'EXCEL_UNEXPECTED_ERROR'
96
+ # Excel path resolution codes (from HEAD)
97
+ EXCEL_PATH_RESOLVED_STORAGE = 'EXCEL_PATH_RESOLVED_STORAGE'
98
+ EXCEL_PATH_RESOLUTION_FAILED = 'EXCEL_PATH_RESOLUTION_FAILED'
99
+ EXCEL_PATH_RESOLUTION_ERROR = 'EXCEL_PATH_RESOLUTION_ERROR'
100
+ # Asset path codes
101
+ ASSET_PATH_ACCESS_ERROR = 'ASSET_PATH_ACCESS_ERROR'
102
+ ASSET_PATH_NOT_FOUND = 'ASSET_PATH_NOT_FOUND'
103
+ # Step lifecycle codes
104
+ STEP_STARTING = 'STEP_STARTING'
105
+ STEP_COMPLETED = 'STEP_COMPLETED'
106
+ STEP_SKIPPED = 'STEP_SKIPPED'
107
+ STEP_ERROR = 'STEP_ERROR'
108
+ # Rollback codes
109
+ ROLLBACK_INITIALIZATION = 'ROLLBACK_INITIALIZATION'
110
+ ROLLBACK_DATA_UNIT_GENERATION = 'ROLLBACK_DATA_UNIT_GENERATION'
111
+ ROLLBACK_FILE_VALIDATION = 'ROLLBACK_FILE_VALIDATION'
112
+ ROLLBACK_FILE_UPLOADS = 'ROLLBACK_FILE_UPLOADS'
113
+ ROLLBACK_COLLECTION_ANALYSIS = 'ROLLBACK_COLLECTION_ANALYSIS'
114
+ ROLLBACK_FILE_ORGANIZATION = 'ROLLBACK_FILE_ORGANIZATION'
115
+ ROLLBACK_CLEANUP = 'ROLLBACK_CLEANUP'
116
+ # Metadata processing codes
117
+ NO_METADATA_STRATEGY = 'NO_METADATA_STRATEGY'
118
+ METADATA_FILE_ATTRIBUTE_PROCESSING = 'METADATA_FILE_ATTRIBUTE_PROCESSING'
119
+ METADATA_TEMP_FILE_CLEANUP = 'METADATA_TEMP_FILE_CLEANUP'
120
+ METADATA_TEMP_FILE_CLEANUP_FAILED = 'METADATA_TEMP_FILE_CLEANUP_FAILED'
121
+ METADATA_BASE64_DECODED = 'METADATA_BASE64_DECODED'
122
+ METADATA_BASE64_DECODE_FAILED = 'METADATA_BASE64_DECODE_FAILED'
123
+ # Multi-path mode codes
124
+ MULTI_PATH_MODE_ENABLED = 'MULTI_PATH_MODE_ENABLED'
125
+ OPTIONAL_SPEC_SKIPPED = 'OPTIONAL_SPEC_SKIPPED'
126
+ DISCOVERING_FILES_FOR_ASSET = 'DISCOVERING_FILES_FOR_ASSET'
127
+ NO_FILES_FOUND_FOR_ASSET = 'NO_FILES_FOUND_FOR_ASSET'
128
+ FILES_FOUND_FOR_ASSET = 'FILES_FOUND_FOR_ASSET'
129
+ ORGANIZING_FILES_MULTI_PATH = 'ORGANIZING_FILES_MULTI_PATH'
130
+ TYPE_DIRECTORIES_MULTI_PATH = 'TYPE_DIRECTORIES_MULTI_PATH'
131
+ DATA_UNITS_CREATED_FROM_FILES = 'DATA_UNITS_CREATED_FROM_FILES'
132
+ # Cleanup codes
133
+ CLEANUP_WARNING = 'CLEANUP_WARNING'
134
+ CLEANUP_TEMP_DIR_SUCCESS = 'CLEANUP_TEMP_DIR_SUCCESS'
135
+ CLEANUP_TEMP_DIR_FAILED = 'CLEANUP_TEMP_DIR_FAILED'
136
+ # Workflow error codes
137
+ UPLOAD_WORKFLOW_FAILED = 'UPLOAD_WORKFLOW_FAILED'
138
+ UNKNOWN_LOG_CODE = 'UNKNOWN_LOG_CODE'
139
+ # Orchestrator workflow codes
140
+ WORKFLOW_STARTING = 'WORKFLOW_STARTING'
141
+ WORKFLOW_COMPLETED = 'WORKFLOW_COMPLETED'
142
+ WORKFLOW_FAILED = 'WORKFLOW_FAILED'
143
+ STEP_FAILED = 'STEP_FAILED'
144
+ STEP_EXCEPTION = 'STEP_EXCEPTION'
145
+ STEP_TRACEBACK = 'STEP_TRACEBACK'
146
+ ROLLBACK_STARTING = 'ROLLBACK_STARTING'
147
+ ROLLBACK_COMPLETED = 'ROLLBACK_COMPLETED'
148
+ STEP_ROLLBACK = 'STEP_ROLLBACK'
149
+ ROLLBACK_ERROR = 'ROLLBACK_ERROR'
150
+ # Extension filtering codes
151
+ FILES_FILTERED_BY_EXTENSION = 'FILES_FILTERED_BY_EXTENSION'
152
+
153
+
154
+ LOG_MESSAGES = {
155
+ LogCode.STORAGE_VALIDATION_FAILED: {
156
+ 'message': 'Storage validation failed.',
157
+ 'level': Context.DANGER,
158
+ },
159
+ LogCode.COLLECTION_VALIDATION_FAILED: {
160
+ 'message': 'Collection validation failed.',
161
+ 'level': Context.DANGER,
162
+ },
163
+ LogCode.PROJECT_VALIDATION_FAILED: {
164
+ 'message': 'Project validation failed.',
165
+ 'level': Context.DANGER,
166
+ },
167
+ LogCode.VALIDATION_FAILED: {
168
+ 'message': 'Validation failed.',
169
+ 'level': Context.DANGER,
170
+ },
171
+ LogCode.NO_FILES_FOUND: {
172
+ 'message': 'Files not found on the path.',
173
+ 'level': Context.WARNING,
174
+ },
175
+ LogCode.NO_FILES_UPLOADED: {
176
+ 'message': 'No files were uploaded.',
177
+ 'level': Context.WARNING,
178
+ },
179
+ LogCode.NO_DATA_UNITS_GENERATED: {
180
+ 'message': 'No data units were generated.',
181
+ 'level': Context.WARNING,
182
+ },
183
+ LogCode.NO_TYPE_DIRECTORIES: {
184
+ 'message': 'No type-based directory structure found.',
185
+ 'level': Context.INFO,
186
+ },
187
+ LogCode.EXCEL_SECURITY_VIOLATION: {
188
+ 'message': 'Excel security validation failed: {}',
189
+ 'level': Context.DANGER,
190
+ },
191
+ LogCode.EXCEL_PARSING_ERROR: {
192
+ 'message': 'Excel parsing failed: {}',
193
+ 'level': Context.DANGER,
194
+ },
195
+ LogCode.EXCEL_METADATA_LOADED: {
196
+ 'message': 'Excel metadata loaded for {} files',
197
+ 'level': None,
198
+ },
199
+ LogCode.UPLOADING_DATA_FILES: {
200
+ 'message': 'Uploading data files...',
201
+ 'level': None,
202
+ },
203
+ LogCode.GENERATING_DATA_UNITS: {
204
+ 'message': 'Generating data units...',
205
+ 'level': None,
206
+ },
207
+ LogCode.IMPORT_COMPLETED: {
208
+ 'message': 'Import completed.',
209
+ 'level': None,
210
+ },
211
+ LogCode.TYPE_DIRECTORIES_FOUND: {
212
+ 'message': 'Found type directories: {}',
213
+ 'level': None,
214
+ },
215
+ LogCode.TYPE_STRUCTURE_DETECTED: {
216
+ 'message': 'Detected type-based directory structure',
217
+ 'level': None,
218
+ },
219
+ LogCode.FILES_DISCOVERED: {
220
+ 'message': 'Discovered {} files',
221
+ 'level': None,
222
+ },
223
+ LogCode.NO_FILES_FOUND_WARNING: {
224
+ 'message': 'No files found.',
225
+ 'level': Context.WARNING,
226
+ },
227
+ LogCode.FILE_UPLOAD_FAILED: {
228
+ 'message': 'Failed to upload file: {}',
229
+ 'level': Context.DANGER,
230
+ },
231
+ LogCode.DATA_UNIT_BATCH_FAILED: {
232
+ 'message': 'Failed to create data units batch: {}',
233
+ 'level': Context.DANGER,
234
+ },
235
+ LogCode.FILENAME_TOO_LONG: {
236
+ 'message': 'Skipping file with overly long name: {}...',
237
+ 'level': Context.WARNING,
238
+ },
239
+ LogCode.MISSING_REQUIRED_FILES: {
240
+ 'message': '{} missing required files: {}',
241
+ 'level': Context.WARNING,
242
+ },
243
+ LogCode.EXCEL_FILE_NOT_FOUND: {
244
+ 'message': 'Excel metadata file not found: {}',
245
+ 'level': Context.WARNING,
246
+ },
247
+ LogCode.EXCEL_FILE_VALIDATION_STARTED: {
248
+ 'message': 'Excel file validation started',
249
+ 'level': Context.INFO,
250
+ },
251
+ LogCode.EXCEL_WORKBOOK_LOADED: {
252
+ 'message': 'Excel workbook loaded successfully',
253
+ 'level': Context.INFO,
254
+ },
255
+ LogCode.FILE_ORGANIZATION_STARTED: {
256
+ 'message': 'File organization started',
257
+ 'level': Context.INFO,
258
+ },
259
+ LogCode.BATCH_PROCESSING_STARTED: {
260
+ 'message': 'Batch processing started: {} batches of {} items each',
261
+ 'level': Context.INFO,
262
+ },
263
+ LogCode.EXCEL_SECURITY_VALIDATION_STARTED: {
264
+ 'message': 'Excel security validation started for file size: {} bytes',
265
+ 'level': Context.INFO,
266
+ },
267
+ LogCode.EXCEL_MEMORY_ESTIMATION: {
268
+ 'message': 'Excel memory estimation: {} bytes (file) * 3 = {} bytes (estimated)',
269
+ 'level': Context.INFO,
270
+ },
271
+ LogCode.EXCEL_FILE_NOT_FOUND_PATH: {
272
+ 'message': 'Excel metadata file not found',
273
+ 'level': Context.WARNING,
274
+ },
275
+ LogCode.EXCEL_SECURITY_VALIDATION_FAILED: {
276
+ 'message': 'Excel security validation failed: {}',
277
+ 'level': Context.DANGER,
278
+ },
279
+ LogCode.EXCEL_PARSING_FAILED: {
280
+ 'message': 'Excel parsing failed: {}',
281
+ 'level': Context.DANGER,
282
+ },
283
+ LogCode.EXCEL_INVALID_FILE_FORMAT: {
284
+ 'message': 'Invalid Excel file format: {}',
285
+ 'level': Context.DANGER,
286
+ },
287
+ LogCode.EXCEL_FILE_TOO_LARGE: {
288
+ 'message': 'Excel file too large to process (memory limit exceeded)',
289
+ 'level': Context.DANGER,
290
+ },
291
+ LogCode.EXCEL_FILE_ACCESS_ERROR: {
292
+ 'message': 'File access error reading excel metadata: {}',
293
+ 'level': Context.DANGER,
294
+ },
295
+ LogCode.EXCEL_UNEXPECTED_ERROR: {
296
+ 'message': 'Unexpected error reading excel metadata: {}',
297
+ 'level': Context.DANGER,
298
+ },
299
+ # Excel path resolution messages (from HEAD)
300
+ LogCode.EXCEL_PATH_RESOLVED_STORAGE: {
301
+ 'message': 'Resolved Excel metadata path relative to storage: {}',
302
+ 'level': Context.INFO,
303
+ },
304
+ LogCode.EXCEL_PATH_RESOLUTION_FAILED: {
305
+ 'message': 'Storage path resolution failed ({}): {} - trying other strategies',
306
+ 'level': Context.INFO,
307
+ },
308
+ LogCode.EXCEL_PATH_RESOLUTION_ERROR: {
309
+ 'message': 'Unexpected error resolving storage path ({}): {} - trying other strategies',
310
+ 'level': Context.WARNING,
311
+ },
312
+ # Asset path messages
313
+ LogCode.ASSET_PATH_ACCESS_ERROR: {
314
+ 'message': 'Error accessing path for {}: {}',
315
+ 'level': Context.WARNING,
316
+ },
317
+ LogCode.ASSET_PATH_NOT_FOUND: {
318
+ 'message': 'Path does not exist for {}: {}',
319
+ 'level': Context.WARNING,
320
+ },
321
+ # Step lifecycle messages
322
+ LogCode.STEP_STARTING: {
323
+ 'message': 'Starting step: {}',
324
+ 'level': Context.INFO,
325
+ },
326
+ LogCode.STEP_COMPLETED: {
327
+ 'message': 'Completed step: {}',
328
+ 'level': Context.INFO,
329
+ },
330
+ LogCode.STEP_SKIPPED: {
331
+ 'message': 'Skipped step: {}',
332
+ 'level': Context.INFO,
333
+ },
334
+ LogCode.STEP_ERROR: {
335
+ 'message': 'Error in step {}: {}',
336
+ 'level': Context.DANGER,
337
+ },
338
+ # Rollback messages
339
+ LogCode.ROLLBACK_INITIALIZATION: {
340
+ 'message': 'Rolling back initialization step',
341
+ 'level': Context.INFO,
342
+ },
343
+ LogCode.ROLLBACK_DATA_UNIT_GENERATION: {
344
+ 'message': 'Rolled back data unit generation',
345
+ 'level': Context.INFO,
346
+ },
347
+ LogCode.ROLLBACK_FILE_VALIDATION: {
348
+ 'message': 'Rolled back file validation',
349
+ 'level': Context.INFO,
350
+ },
351
+ LogCode.ROLLBACK_FILE_UPLOADS: {
352
+ 'message': 'Rolled back file uploads',
353
+ 'level': Context.INFO,
354
+ },
355
+ LogCode.ROLLBACK_COLLECTION_ANALYSIS: {
356
+ 'message': 'Rolled back collection analysis',
357
+ 'level': Context.INFO,
358
+ },
359
+ LogCode.ROLLBACK_FILE_ORGANIZATION: {
360
+ 'message': 'Rolled back file organization',
361
+ 'level': Context.INFO,
362
+ },
363
+ LogCode.ROLLBACK_CLEANUP: {
364
+ 'message': 'Cleanup step rollback - no action needed',
365
+ 'level': Context.INFO,
366
+ },
367
+ # Metadata processing messages
368
+ LogCode.NO_METADATA_STRATEGY: {
369
+ 'message': 'No metadata strategy configured - skipping metadata processing',
370
+ 'level': Context.INFO,
371
+ },
372
+ LogCode.METADATA_FILE_ATTRIBUTE_PROCESSING: {
373
+ 'message': 'Processing metadata for file attribute: {}',
374
+ 'level': Context.INFO,
375
+ },
376
+ LogCode.METADATA_TEMP_FILE_CLEANUP: {
377
+ 'message': 'Cleaned up temporary Excel file: {}',
378
+ 'level': Context.INFO,
379
+ },
380
+ LogCode.METADATA_TEMP_FILE_CLEANUP_FAILED: {
381
+ 'message': 'Failed to clean up temporary file {}: {}',
382
+ 'level': Context.WARNING,
383
+ },
384
+ LogCode.METADATA_BASE64_DECODED: {
385
+ 'message': 'Decoded base64 Excel metadata to temporary file: {}',
386
+ 'level': Context.INFO,
387
+ },
388
+ LogCode.METADATA_BASE64_DECODE_FAILED: {
389
+ 'message': 'Failed to decode base64 Excel metadata: {}',
390
+ 'level': Context.DANGER,
391
+ },
392
+ # Multi-path mode messages
393
+ LogCode.MULTI_PATH_MODE_ENABLED: {
394
+ 'message': 'Using multi-path mode with {} asset configurations',
395
+ 'level': Context.INFO,
396
+ },
397
+ LogCode.OPTIONAL_SPEC_SKIPPED: {
398
+ 'message': 'Skipping optional spec {}: no asset path configured',
399
+ 'level': Context.INFO,
400
+ },
401
+ LogCode.DISCOVERING_FILES_FOR_ASSET: {
402
+ 'message': 'Discovering files for {} (recursive={})',
403
+ 'level': Context.INFO,
404
+ },
405
+ LogCode.NO_FILES_FOUND_FOR_ASSET: {
406
+ 'message': 'No files found for {}',
407
+ 'level': Context.WARNING,
408
+ },
409
+ LogCode.FILES_FOUND_FOR_ASSET: {
410
+ 'message': 'Found {} files for {}',
411
+ 'level': Context.INFO,
412
+ },
413
+ LogCode.ORGANIZING_FILES_MULTI_PATH: {
414
+ 'message': 'Organizing {} files across {} specs',
415
+ 'level': Context.INFO,
416
+ },
417
+ LogCode.TYPE_DIRECTORIES_MULTI_PATH: {
418
+ 'message': 'Type directories: {}',
419
+ 'level': Context.INFO,
420
+ },
421
+ LogCode.DATA_UNITS_CREATED_FROM_FILES: {
422
+ 'message': 'Created {} data units from {} files',
423
+ 'level': Context.INFO,
424
+ },
425
+ # Cleanup messages
426
+ LogCode.CLEANUP_WARNING: {
427
+ 'message': 'Cleanup warning: {}',
428
+ 'level': Context.WARNING,
429
+ },
430
+ LogCode.CLEANUP_TEMP_DIR_SUCCESS: {
431
+ 'message': 'Cleaned up temporary directory: {}',
432
+ 'level': Context.INFO,
433
+ },
434
+ LogCode.CLEANUP_TEMP_DIR_FAILED: {
435
+ 'message': 'Failed to cleanup temporary directory: {}',
436
+ 'level': Context.WARNING,
437
+ },
438
+ # Workflow error messages
439
+ LogCode.UPLOAD_WORKFLOW_FAILED: {
440
+ 'message': 'Upload workflow failed: {}',
441
+ 'level': Context.DANGER,
442
+ },
443
+ LogCode.UNKNOWN_LOG_CODE: {
444
+ 'message': 'Unknown log code: {}',
445
+ 'level': Context.WARNING,
446
+ },
447
+ # Orchestrator workflow messages
448
+ LogCode.WORKFLOW_STARTING: {
449
+ 'message': 'Starting upload workflow with {} steps: {}',
450
+ 'level': Context.INFO,
451
+ },
452
+ LogCode.WORKFLOW_COMPLETED: {
453
+ 'message': 'Upload workflow completed successfully',
454
+ 'level': Context.INFO,
455
+ },
456
+ LogCode.WORKFLOW_FAILED: {
457
+ 'message': 'Upload workflow failed: {}',
458
+ 'level': Context.DANGER,
459
+ },
460
+ LogCode.STEP_FAILED: {
461
+ 'message': "Step '{}' failed: {}",
462
+ 'level': Context.DANGER,
463
+ },
464
+ LogCode.STEP_EXCEPTION: {
465
+ 'message': "Exception in step '{}': {}",
466
+ 'level': Context.DANGER,
467
+ },
468
+ LogCode.STEP_TRACEBACK: {
469
+ 'message': 'Traceback: {}',
470
+ 'level': Context.DANGER,
471
+ },
472
+ LogCode.ROLLBACK_STARTING: {
473
+ 'message': 'Starting rollback of {} executed steps',
474
+ 'level': Context.WARNING,
475
+ },
476
+ LogCode.ROLLBACK_COMPLETED: {
477
+ 'message': 'Rollback completed',
478
+ 'level': Context.INFO,
479
+ },
480
+ LogCode.STEP_ROLLBACK: {
481
+ 'message': 'Rolling back step: {}',
482
+ 'level': Context.INFO,
483
+ },
484
+ LogCode.ROLLBACK_ERROR: {
485
+ 'message': "Error rolling back step '{}': {}",
486
+ 'level': Context.WARNING,
487
+ },
488
+ # Extension filtering messages
489
+ LogCode.FILES_FILTERED_BY_EXTENSION: {
490
+ 'message': 'Filtered {} {} files with unavailable extensions: {} (allowed: {})',
491
+ 'level': Context.WARNING,
492
+ },
493
+ }
@@ -0,0 +1,36 @@
1
+ class ExcelSecurityError(Exception):
2
+ """Exception raised when Excel file security validation fails.
3
+
4
+ This exception is raised when an Excel file violates security constraints
5
+ such as file size limits, memory usage limits, or contains potentially
6
+ dangerous content.
7
+
8
+ Used during Excel metadata processing to enforce security policies
9
+ and prevent processing of files that could pose security risks.
10
+
11
+ Example:
12
+ >>> if file_size > max_size:
13
+ ... raise ExcelSecurityError(f"File size {file_size} exceeds limit {max_size}")
14
+ """
15
+
16
+ pass
17
+
18
+
19
+ class ExcelParsingError(Exception):
20
+ """Exception raised when Excel file parsing encounters errors.
21
+
22
+ This exception is raised when an Excel file cannot be parsed due to
23
+ format issues, corruption, or other parsing-related problems that
24
+ prevent successful metadata extraction.
25
+
26
+ Used during Excel metadata loading to distinguish parsing errors
27
+ from security violations or other types of errors.
28
+
29
+ Example:
30
+ >>> try:
31
+ ... workbook = load_workbook(excel_file)
32
+ ... except InvalidFileException as e:
33
+ ... raise ExcelParsingError(f"Failed to parse Excel file: {e}")
34
+ """
35
+
36
+ pass
@@ -0,0 +1,138 @@
1
+ from typing import Any, Dict
2
+
3
+ from .strategies.base import (
4
+ DataUnitStrategy,
5
+ FileDiscoveryStrategy,
6
+ MetadataStrategy,
7
+ UploadStrategy,
8
+ ValidationStrategy,
9
+ )
10
+
11
+
12
+ class StrategyFactory:
13
+ """Factory for creating strategy instances based on configuration."""
14
+
15
+ def __init__(self):
16
+ self._validation_strategies = {}
17
+ self._file_discovery_strategies = {}
18
+ self._metadata_strategies = {}
19
+ self._upload_strategies = {}
20
+ self._data_unit_strategies = {}
21
+
22
+ def register_validation_strategy(self, name: str, strategy_class: type) -> None:
23
+ """Register a validation strategy class."""
24
+ self._validation_strategies[name] = strategy_class
25
+
26
+ def register_file_discovery_strategy(self, name: str, strategy_class: type) -> None:
27
+ """Register a file discovery strategy class."""
28
+ self._file_discovery_strategies[name] = strategy_class
29
+
30
+ def register_metadata_strategy(self, name: str, strategy_class: type) -> None:
31
+ """Register a metadata strategy class."""
32
+ self._metadata_strategies[name] = strategy_class
33
+
34
+ def register_upload_strategy(self, name: str, strategy_class: type) -> None:
35
+ """Register an upload strategy class."""
36
+ self._upload_strategies[name] = strategy_class
37
+
38
+ def register_data_unit_strategy(self, name: str, strategy_class: type) -> None:
39
+ """Register a data unit strategy class."""
40
+ self._data_unit_strategies[name] = strategy_class
41
+
42
+ def create_validation_strategy(self, params: Dict[str, Any], context=None) -> ValidationStrategy:
43
+ """Create validation strategy based on parameters."""
44
+ strategy_name = params.get('validation_strategy', 'default')
45
+
46
+ if strategy_name not in self._validation_strategies:
47
+ # Import default strategy if not registered
48
+ from .strategies.validation.default import DefaultValidationStrategy
49
+
50
+ self.register_validation_strategy('default', DefaultValidationStrategy)
51
+ strategy_name = 'default'
52
+
53
+ strategy_class = self._validation_strategies[strategy_name]
54
+ return strategy_class()
55
+
56
+ def create_file_discovery_strategy(self, params: Dict[str, Any], context=None) -> FileDiscoveryStrategy:
57
+ """Create file discovery strategy based on parameters."""
58
+ is_recursive = params.get('is_recursive', True)
59
+ strategy_name = 'recursive' if is_recursive else 'flat'
60
+
61
+ if strategy_name not in self._file_discovery_strategies:
62
+ # Import default strategies if not registered
63
+ if strategy_name == 'recursive':
64
+ from .strategies.file_discovery.recursive import RecursiveFileDiscoveryStrategy
65
+
66
+ self.register_file_discovery_strategy('recursive', RecursiveFileDiscoveryStrategy)
67
+ else:
68
+ from .strategies.file_discovery.flat import FlatFileDiscoveryStrategy
69
+
70
+ self.register_file_discovery_strategy('flat', FlatFileDiscoveryStrategy)
71
+
72
+ strategy_class = self._file_discovery_strategies[strategy_name]
73
+ return strategy_class()
74
+
75
+ def create_metadata_strategy(self, params: Dict[str, Any], context=None) -> MetadataStrategy:
76
+ """Create metadata strategy based on parameters."""
77
+ # Always use Excel strategy for metadata processing
78
+ # It will handle both specified paths and default meta.xlsx/meta.xls files
79
+ strategy_name = 'excel'
80
+
81
+ if strategy_name not in self._metadata_strategies:
82
+ from .strategies.metadata.excel import ExcelMetadataStrategy
83
+
84
+ self.register_metadata_strategy('excel', ExcelMetadataStrategy)
85
+
86
+ strategy_class = self._metadata_strategies[strategy_name]
87
+ return strategy_class()
88
+
89
+ def create_upload_strategy(self, params: Dict[str, Any], context=None) -> UploadStrategy:
90
+ """Create upload strategy (always uses synchronous upload for guaranteed ordering)."""
91
+ if context is None:
92
+ raise ValueError('Upload strategies require context parameter')
93
+
94
+ # Always use sync upload to guarantee file order
95
+ # This is critical for video frame extraction and PDF page extraction
96
+ strategy_name = 'sync'
97
+
98
+ if strategy_name not in self._upload_strategies:
99
+ from .strategies.upload.sync import SyncUploadStrategy
100
+
101
+ self.register_upload_strategy('sync', SyncUploadStrategy)
102
+
103
+ strategy_class = self._upload_strategies[strategy_name]
104
+ # Upload strategies always need context for client access
105
+ return strategy_class(context)
106
+
107
+ def create_data_unit_strategy(self, params: Dict[str, Any], context=None) -> DataUnitStrategy:
108
+ """Create data unit strategy based on parameters."""
109
+ if context is None:
110
+ raise ValueError('Data unit strategies require context parameter')
111
+
112
+ batch_size = params.get('creating_data_unit_batch_size', 1)
113
+ strategy_name = 'batch' if batch_size > 1 else 'single'
114
+
115
+ if strategy_name not in self._data_unit_strategies:
116
+ # Import default strategies if not registered
117
+ if strategy_name == 'batch':
118
+ from .strategies.data_unit.batch import BatchDataUnitStrategy
119
+
120
+ self.register_data_unit_strategy('batch', BatchDataUnitStrategy)
121
+ else:
122
+ from .strategies.data_unit.single import SingleDataUnitStrategy
123
+
124
+ self.register_data_unit_strategy('single', SingleDataUnitStrategy)
125
+
126
+ strategy_class = self._data_unit_strategies[strategy_name]
127
+ # Data unit strategies always need context for client access
128
+ return strategy_class(context)
129
+
130
+ def get_available_strategies(self) -> Dict[str, list]:
131
+ """Get all available strategy types and their registered names."""
132
+ return {
133
+ 'validation': list(self._validation_strategies.keys()),
134
+ 'file_discovery': list(self._file_discovery_strategies.keys()),
135
+ 'metadata': list(self._metadata_strategies.keys()),
136
+ 'upload': list(self._upload_strategies.keys()),
137
+ 'data_unit': list(self._data_unit_strategies.keys()),
138
+ }