synapse-sdk 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. synapse_sdk/__init__.py +24 -0
  2. synapse_sdk/cli/code_server.py +305 -33
  3. synapse_sdk/clients/agent/__init__.py +2 -1
  4. synapse_sdk/clients/agent/container.py +143 -0
  5. synapse_sdk/clients/agent/ray.py +296 -38
  6. synapse_sdk/clients/backend/annotation.py +1 -1
  7. synapse_sdk/clients/backend/core.py +31 -4
  8. synapse_sdk/clients/backend/data_collection.py +82 -7
  9. synapse_sdk/clients/backend/hitl.py +1 -1
  10. synapse_sdk/clients/backend/ml.py +1 -1
  11. synapse_sdk/clients/base.py +211 -61
  12. synapse_sdk/loggers.py +46 -0
  13. synapse_sdk/plugins/README.md +1340 -0
  14. synapse_sdk/plugins/categories/base.py +59 -9
  15. synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
  16. synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
  17. synapse_sdk/plugins/categories/export/actions/export/action.py +165 -0
  18. synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
  19. synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
  20. synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
  21. synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
  22. synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
  23. synapse_sdk/plugins/categories/export/templates/config.yaml +19 -1
  24. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +390 -0
  25. synapse_sdk/plugins/categories/export/templates/plugin/export.py +153 -177
  26. synapse_sdk/plugins/categories/neural_net/actions/train.py +1130 -32
  27. synapse_sdk/plugins/categories/neural_net/actions/tune.py +157 -4
  28. synapse_sdk/plugins/categories/neural_net/templates/config.yaml +7 -4
  29. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  30. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  31. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
  32. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  33. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +148 -0
  34. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  35. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  36. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  37. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +100 -0
  38. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +248 -0
  39. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  40. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  41. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +265 -0
  42. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  43. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  44. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +92 -0
  45. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +243 -0
  46. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  47. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +19 -0
  48. synapse_sdk/plugins/categories/upload/actions/upload/action.py +236 -0
  49. synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
  50. synapse_sdk/plugins/categories/upload/actions/upload/enums.py +493 -0
  51. synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
  52. synapse_sdk/plugins/categories/upload/actions/upload/factory.py +138 -0
  53. synapse_sdk/plugins/categories/upload/actions/upload/models.py +214 -0
  54. synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +183 -0
  55. synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
  56. synapse_sdk/plugins/categories/upload/actions/upload/run.py +179 -0
  57. synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
  58. synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +107 -0
  59. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
  60. synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +63 -0
  61. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +91 -0
  62. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +82 -0
  63. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +235 -0
  64. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +201 -0
  65. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +104 -0
  66. synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +71 -0
  67. synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
  68. synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +82 -0
  69. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
  70. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
  71. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +29 -0
  72. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
  73. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +300 -0
  74. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +287 -0
  75. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
  76. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
  77. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
  78. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
  79. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +84 -0
  80. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
  81. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +60 -0
  82. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +250 -0
  83. synapse_sdk/plugins/categories/upload/templates/README.md +470 -0
  84. synapse_sdk/plugins/categories/upload/templates/config.yaml +28 -2
  85. synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +310 -0
  86. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +82 -20
  87. synapse_sdk/plugins/models.py +111 -9
  88. synapse_sdk/plugins/templates/plugin-config-schema.json +7 -0
  89. synapse_sdk/plugins/templates/schema.json +7 -0
  90. synapse_sdk/plugins/utils/__init__.py +3 -0
  91. synapse_sdk/plugins/utils/ray_gcs.py +66 -0
  92. synapse_sdk/shared/__init__.py +25 -0
  93. synapse_sdk/utils/converters/dm/__init__.py +42 -41
  94. synapse_sdk/utils/converters/dm/base.py +137 -0
  95. synapse_sdk/utils/converters/dm/from_v1.py +208 -562
  96. synapse_sdk/utils/converters/dm/to_v1.py +258 -304
  97. synapse_sdk/utils/converters/dm/tools/__init__.py +214 -0
  98. synapse_sdk/utils/converters/dm/tools/answer.py +95 -0
  99. synapse_sdk/utils/converters/dm/tools/bounding_box.py +132 -0
  100. synapse_sdk/utils/converters/dm/tools/bounding_box_3d.py +121 -0
  101. synapse_sdk/utils/converters/dm/tools/classification.py +75 -0
  102. synapse_sdk/utils/converters/dm/tools/keypoint.py +117 -0
  103. synapse_sdk/utils/converters/dm/tools/named_entity.py +111 -0
  104. synapse_sdk/utils/converters/dm/tools/polygon.py +122 -0
  105. synapse_sdk/utils/converters/dm/tools/polyline.py +124 -0
  106. synapse_sdk/utils/converters/dm/tools/prompt.py +94 -0
  107. synapse_sdk/utils/converters/dm/tools/relation.py +86 -0
  108. synapse_sdk/utils/converters/dm/tools/segmentation.py +141 -0
  109. synapse_sdk/utils/converters/dm/tools/segmentation_3d.py +83 -0
  110. synapse_sdk/utils/converters/dm/types.py +168 -0
  111. synapse_sdk/utils/converters/dm/utils.py +162 -0
  112. synapse_sdk/utils/converters/dm_legacy/__init__.py +56 -0
  113. synapse_sdk/utils/converters/dm_legacy/from_v1.py +627 -0
  114. synapse_sdk/utils/converters/dm_legacy/to_v1.py +367 -0
  115. synapse_sdk/utils/file/__init__.py +58 -0
  116. synapse_sdk/utils/file/archive.py +32 -0
  117. synapse_sdk/utils/file/checksum.py +56 -0
  118. synapse_sdk/utils/file/chunking.py +31 -0
  119. synapse_sdk/utils/file/download.py +385 -0
  120. synapse_sdk/utils/file/encoding.py +40 -0
  121. synapse_sdk/utils/file/io.py +22 -0
  122. synapse_sdk/utils/file/upload.py +165 -0
  123. synapse_sdk/utils/file/video/__init__.py +29 -0
  124. synapse_sdk/utils/file/video/transcode.py +307 -0
  125. synapse_sdk/utils/{file.py → file.py.backup} +77 -0
  126. synapse_sdk/utils/network.py +272 -0
  127. synapse_sdk/utils/storage/__init__.py +6 -2
  128. synapse_sdk/utils/storage/providers/file_system.py +6 -0
  129. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/METADATA +19 -2
  130. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/RECORD +134 -74
  131. synapse_sdk/devtools/docs/.gitignore +0 -20
  132. synapse_sdk/devtools/docs/README.md +0 -41
  133. synapse_sdk/devtools/docs/blog/2019-05-28-first-blog-post.md +0 -12
  134. synapse_sdk/devtools/docs/blog/2019-05-29-long-blog-post.md +0 -44
  135. synapse_sdk/devtools/docs/blog/2021-08-01-mdx-blog-post.mdx +0 -24
  136. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
  137. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/index.md +0 -29
  138. synapse_sdk/devtools/docs/blog/authors.yml +0 -25
  139. synapse_sdk/devtools/docs/blog/tags.yml +0 -19
  140. synapse_sdk/devtools/docs/docusaurus.config.ts +0 -138
  141. synapse_sdk/devtools/docs/package-lock.json +0 -17455
  142. synapse_sdk/devtools/docs/package.json +0 -47
  143. synapse_sdk/devtools/docs/sidebars.ts +0 -44
  144. synapse_sdk/devtools/docs/src/components/HomepageFeatures/index.tsx +0 -71
  145. synapse_sdk/devtools/docs/src/components/HomepageFeatures/styles.module.css +0 -11
  146. synapse_sdk/devtools/docs/src/css/custom.css +0 -30
  147. synapse_sdk/devtools/docs/src/pages/index.module.css +0 -23
  148. synapse_sdk/devtools/docs/src/pages/index.tsx +0 -21
  149. synapse_sdk/devtools/docs/src/pages/markdown-page.md +0 -7
  150. synapse_sdk/devtools/docs/static/.nojekyll +0 -0
  151. synapse_sdk/devtools/docs/static/img/docusaurus-social-card.jpg +0 -0
  152. synapse_sdk/devtools/docs/static/img/docusaurus.png +0 -0
  153. synapse_sdk/devtools/docs/static/img/favicon.ico +0 -0
  154. synapse_sdk/devtools/docs/static/img/logo.png +0 -0
  155. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_mountain.svg +0 -171
  156. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_react.svg +0 -170
  157. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_tree.svg +0 -40
  158. synapse_sdk/devtools/docs/tsconfig.json +0 -8
  159. synapse_sdk/plugins/categories/export/actions/export.py +0 -346
  160. synapse_sdk/plugins/categories/export/enums.py +0 -7
  161. synapse_sdk/plugins/categories/neural_net/actions/gradio.py +0 -151
  162. synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py +0 -943
  163. synapse_sdk/plugins/categories/upload/actions/upload.py +0 -954
  164. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/WHEEL +0 -0
  165. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/entry_points.txt +0 -0
  166. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/licenses/LICENSE +0 -0
  167. {synapse_sdk-1.0.0b5.dist-info → synapse_sdk-2025.12.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,307 @@
1
+ import asyncio
2
+ import shutil
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Callable, Optional
6
+
7
+ import ffmpeg
8
+
9
+
10
+ # Exception classes
11
+ class VideoTranscodeError(Exception):
12
+ """Base exception for video transcoding errors."""
13
+
14
+ pass
15
+
16
+
17
+ class UnsupportedFormatError(VideoTranscodeError):
18
+ """Raised when input format is not supported."""
19
+
20
+ pass
21
+
22
+
23
+ class FFmpegNotFoundError(VideoTranscodeError):
24
+ """Raised when FFmpeg is not installed or not in PATH."""
25
+
26
+ pass
27
+
28
+
29
+ class TranscodingFailedError(VideoTranscodeError):
30
+ """Raised when FFmpeg transcoding process fails."""
31
+
32
+ pass
33
+
34
+
35
+ @dataclass
36
+ class TranscodeConfig:
37
+ """Video transcoding configuration."""
38
+
39
+ vcodec: str = 'libx264' # Video codec
40
+ preset: str = 'medium' # Encoding preset (ultrafast to veryslow)
41
+ crf: int = 28 # Constant Rate Factor (0-51, lower=better quality)
42
+ acodec: str = 'aac' # Audio codec
43
+ audio_bitrate: str = '128k' # Audio bitrate
44
+ movflags: str = '+faststart' # MP4 optimization flags
45
+ resolution: Optional[str] = None # Target resolution (e.g., '1920x1080')
46
+ fps: Optional[int] = None # Target frame rate
47
+ start_time: Optional[float] = None # Trim start time in seconds
48
+ duration: Optional[float] = None # Trim duration in seconds
49
+
50
+
51
+ # Supported input formats
52
+ SUPPORTED_FORMATS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv', '.wmv', '.mpeg', '.mpg', '.m4v', '.3gp', '.ogv'}
53
+
54
+
55
+ def _check_ffmpeg_available():
56
+ """Check if FFmpeg is available in PATH."""
57
+ if not shutil.which('ffmpeg'):
58
+ raise FFmpegNotFoundError(
59
+ 'FFmpeg is not installed or not found in PATH. Please install FFmpeg to use video transcoding features.'
60
+ )
61
+
62
+
63
+ def validate_video_format(video_path: str | Path) -> bool:
64
+ """
65
+ Check if video format is supported for transcoding.
66
+
67
+ Args:
68
+ video_path (str | Path): Path to the video file
69
+
70
+ Returns:
71
+ bool: True if format is supported, False otherwise
72
+ """
73
+ path = Path(video_path)
74
+ return path.suffix.lower() in SUPPORTED_FORMATS
75
+
76
+
77
+ def get_video_info(video_path: str | Path) -> dict:
78
+ """
79
+ Extract video metadata (resolution, duration, codecs, etc.).
80
+
81
+ Args:
82
+ video_path (str | Path): Path to the video file
83
+
84
+ Returns:
85
+ dict: Video metadata information
86
+
87
+ Raises:
88
+ VideoTranscodeError: If unable to probe video file
89
+ """
90
+ _check_ffmpeg_available()
91
+
92
+ try:
93
+ probe = ffmpeg.probe(str(video_path))
94
+
95
+ video_info = {}
96
+
97
+ # Get format information
98
+ if 'format' in probe:
99
+ format_info = probe['format']
100
+ video_info['duration'] = float(format_info.get('duration', 0))
101
+ video_info['size'] = int(format_info.get('size', 0))
102
+ video_info['bitrate'] = int(format_info.get('bit_rate', 0))
103
+
104
+ # Get stream information
105
+ video_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'video']
106
+ audio_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'audio']
107
+
108
+ if video_streams:
109
+ video_stream = video_streams[0]
110
+ video_info['width'] = int(video_stream.get('width', 0))
111
+ video_info['height'] = int(video_stream.get('height', 0))
112
+ video_info['video_codec'] = video_stream.get('codec_name', '')
113
+ video_info['fps'] = eval(video_stream.get('r_frame_rate', '0/1'))
114
+
115
+ if audio_streams:
116
+ audio_stream = audio_streams[0]
117
+ video_info['audio_codec'] = audio_stream.get('codec_name', '')
118
+ video_info['channels'] = int(audio_stream.get('channels', 0))
119
+ video_info['sample_rate'] = int(audio_stream.get('sample_rate', 0))
120
+
121
+ return video_info
122
+
123
+ except Exception as e:
124
+ raise VideoTranscodeError(f'Failed to probe video file: {str(e)}')
125
+
126
+
127
+ def _build_ffmpeg_stream(input_path: str | Path, output_path: str | Path, config: TranscodeConfig):
128
+ """Build FFmpeg stream with configuration."""
129
+ stream = ffmpeg.input(str(input_path))
130
+
131
+ # Apply start time and duration trimming
132
+ if config.start_time is not None or config.duration is not None:
133
+ kwargs = {}
134
+ if config.start_time is not None:
135
+ kwargs['ss'] = config.start_time
136
+ if config.duration is not None:
137
+ kwargs['t'] = config.duration
138
+ stream = ffmpeg.input(str(input_path), **kwargs)
139
+
140
+ # Apply video filters
141
+ if config.resolution or config.fps:
142
+ if config.resolution:
143
+ width, height = config.resolution.split('x')
144
+ stream = ffmpeg.filter(stream, 'scale', width, height)
145
+ if config.fps:
146
+ stream = ffmpeg.filter(stream, 'fps', fps=config.fps)
147
+
148
+ # Build output with encoding parameters
149
+ output_kwargs = {
150
+ 'vcodec': config.vcodec,
151
+ 'preset': config.preset,
152
+ 'crf': config.crf,
153
+ 'acodec': config.acodec,
154
+ 'audio_bitrate': config.audio_bitrate,
155
+ 'movflags': config.movflags,
156
+ }
157
+
158
+ return ffmpeg.output(stream, str(output_path), **output_kwargs)
159
+
160
+
161
+ def transcode_video(
162
+ input_path: str | Path,
163
+ output_path: str | Path,
164
+ config: Optional[TranscodeConfig] = None,
165
+ progress_callback: Optional[Callable[[float], None]] = None,
166
+ ) -> Path:
167
+ """
168
+ Transcode video with specified configuration.
169
+
170
+ Args:
171
+ input_path (str | Path): Path to input video file
172
+ output_path (str | Path): Path to output video file
173
+ config (Optional[TranscodeConfig]): Transcoding configuration
174
+ progress_callback (Optional[Callable[[float], None]]): Progress callback function
175
+
176
+ Returns:
177
+ Path: Path to the transcoded video file
178
+
179
+ Raises:
180
+ UnsupportedFormatError: If input format is not supported
181
+ FFmpegNotFoundError: If FFmpeg is not available
182
+ TranscodingFailedError: If transcoding fails
183
+ """
184
+ _check_ffmpeg_available()
185
+
186
+ input_path = Path(input_path)
187
+ output_path = Path(output_path)
188
+
189
+ if not validate_video_format(input_path):
190
+ raise UnsupportedFormatError(f'Unsupported video format: {input_path.suffix}')
191
+
192
+ if config is None:
193
+ config = TranscodeConfig()
194
+
195
+ # Ensure output directory exists
196
+ output_path.parent.mkdir(parents=True, exist_ok=True)
197
+
198
+ try:
199
+ # Build FFmpeg command
200
+ stream = _build_ffmpeg_stream(input_path, output_path, config)
201
+
202
+ # Run FFmpeg
203
+ if progress_callback:
204
+ # Get video duration for progress calculation
205
+ video_info = get_video_info(input_path)
206
+ total_duration = video_info.get('duration', 0)
207
+
208
+ # Run with progress monitoring
209
+ process = ffmpeg.run_async(stream, pipe_stderr=True, overwrite_output=True)
210
+
211
+ while True:
212
+ output = process.stderr.readline()
213
+ if output == b'' and process.poll() is not None:
214
+ break
215
+ if output:
216
+ line = output.decode('utf-8')
217
+ # Parse progress from FFmpeg output
218
+ if 'time=' in line and total_duration > 0:
219
+ try:
220
+ time_str = line.split('time=')[1].split()[0]
221
+ hours, minutes, seconds = time_str.split(':')
222
+ current_time = int(hours) * 3600 + int(minutes) * 60 + float(seconds)
223
+ progress = min(current_time / total_duration, 1.0)
224
+ progress_callback(progress)
225
+ except (ValueError, IndexError):
226
+ pass
227
+
228
+ if process.returncode != 0:
229
+ raise TranscodingFailedError('FFmpeg process failed')
230
+ else:
231
+ # Run without progress monitoring
232
+ ffmpeg.run(stream, overwrite_output=True, quiet=True)
233
+
234
+ return output_path
235
+
236
+ except ffmpeg.Error as e:
237
+ error_message = e.stderr.decode('utf-8') if e.stderr else str(e)
238
+ raise TranscodingFailedError(f'Transcoding failed: {error_message}')
239
+ except Exception as e:
240
+ raise VideoTranscodeError(f'Unexpected error during transcoding: {str(e)}')
241
+
242
+
243
+ def optimize_for_web(video_path: str | Path, output_path: str | Path) -> Path:
244
+ """
245
+ Quick optimization for web streaming with default settings.
246
+
247
+ Args:
248
+ video_path (str | Path): Path to input video file
249
+ output_path (str | Path): Path to output video file
250
+
251
+ Returns:
252
+ Path: Path to the optimized video file
253
+ """
254
+ config = TranscodeConfig(
255
+ preset='fast', # Faster encoding for web optimization
256
+ crf=23, # Better quality for web
257
+ movflags='+faststart+frag_keyframe+empty_moov', # Advanced web optimization
258
+ )
259
+ return transcode_video(video_path, output_path, config)
260
+
261
+
262
+ async def atranscode_video(
263
+ input_path: str | Path, output_path: str | Path, config: Optional[TranscodeConfig] = None
264
+ ) -> Path:
265
+ """
266
+ Async version of transcode_video.
267
+
268
+ Args:
269
+ input_path (str | Path): Path to input video file
270
+ output_path (str | Path): Path to output video file
271
+ config (Optional[TranscodeConfig]): Transcoding configuration
272
+
273
+ Returns:
274
+ Path: Path to the transcoded video file
275
+ """
276
+ loop = asyncio.get_event_loop()
277
+ return await loop.run_in_executor(None, transcode_video, input_path, output_path, config)
278
+
279
+
280
+ def transcode_batch(
281
+ video_paths: list[Path], output_dir: Path, config: Optional[TranscodeConfig] = None, max_workers: int = 4
282
+ ) -> list[Path]:
283
+ """
284
+ Process multiple videos concurrently.
285
+
286
+ Args:
287
+ video_paths (list[Path]): List of input video file paths
288
+ output_dir (Path): Directory for output files
289
+ config (Optional[TranscodeConfig]): Transcoding configuration
290
+ max_workers (int): Maximum number of concurrent workers
291
+
292
+ Returns:
293
+ list[Path]: List of paths to transcoded video files
294
+ """
295
+ import concurrent.futures
296
+
297
+ output_dir = Path(output_dir)
298
+ output_dir.mkdir(parents=True, exist_ok=True)
299
+
300
+ def process_video(video_path):
301
+ output_path = output_dir / f'{video_path.stem}_transcoded.mp4'
302
+ return transcode_video(video_path, output_path, config)
303
+
304
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
305
+ results = list(executor.map(process_video, video_paths))
306
+
307
+ return results
@@ -7,6 +7,7 @@ import operator
7
7
  import zipfile
8
8
  from functools import reduce
9
9
  from pathlib import Path
10
+ from typing import IO, Any, Callable
10
11
 
11
12
  import aiohttp
12
13
  import requests
@@ -16,6 +17,39 @@ from synapse_sdk.utils.network import clean_url
16
17
  from synapse_sdk.utils.string import hash_text
17
18
 
18
19
 
20
+ def read_file_in_chunks(file_path, chunk_size=1024 * 1024 * 50):
21
+ """
22
+ Read a file in chunks for efficient memory usage during file processing.
23
+
24
+ This function is particularly useful for large files or when you need to process
25
+ files in chunks, such as for uploading or hashing.
26
+
27
+ Args:
28
+ file_path (str | Path): Path to the file to read
29
+ chunk_size (int, optional): Size of each chunk in bytes. Defaults to 50MB (1024 * 1024 * 50)
30
+
31
+ Yields:
32
+ bytes: File content chunks
33
+
34
+ Raises:
35
+ FileNotFoundError: If the file doesn't exist
36
+ PermissionError: If the file can't be read due to permissions
37
+ OSError: If there's an OS-level error reading the file
38
+
39
+ Example:
40
+ ```python
41
+ from synapse_sdk.utils.file import read_file_in_chunks
42
+
43
+ # Read a file in 10MB chunks
44
+ for chunk in read_file_in_chunks('large_file.bin', chunk_size=1024*1024*10):
45
+ process_chunk(chunk)
46
+ ```
47
+ """
48
+ with open(file_path, 'rb') as file:
49
+ while chunk := file.read(chunk_size):
50
+ yield chunk
51
+
52
+
19
53
  def download_file(url, path_download, name=None, coerce=None, use_cached=True):
20
54
  chunk_size = 1024 * 1024 * 50
21
55
  cleaned_url = clean_url(url) # remove query params and fragment
@@ -150,6 +184,49 @@ def calculate_checksum(file_path, prefix=''):
150
184
  return checksum
151
185
 
152
186
 
187
+ def get_checksum_from_file(file: IO[Any], digest_mod: Callable[[], Any] = hashlib.sha1) -> str:
188
+ """
189
+ Calculate checksum for a file-like object.
190
+
191
+ Args:
192
+ file (IO[Any]): File-like object with read() method that supports reading in chunks
193
+ digest_mod (Callable[[], Any]): Hash algorithm from hashlib (defaults to hashlib.sha1)
194
+
195
+ Returns:
196
+ str: Hexadecimal digest of the file contents
197
+
198
+ Example:
199
+ ```python
200
+ import hashlib
201
+ from io import BytesIO
202
+ from synapse_sdk.utils.file import get_checksum_from_file
203
+
204
+ # With BytesIO
205
+ data = BytesIO(b'Hello, world!')
206
+ checksum = get_checksum_from_file(data)
207
+
208
+ # With different hash algorithm
209
+ checksum = get_checksum_from_file(data, digest_mod=hashlib.sha256)
210
+ ```
211
+ """
212
+ digest = digest_mod()
213
+ chunk_size = 4096
214
+
215
+ # Reset file pointer to beginning if possible
216
+ if hasattr(file, 'seek'):
217
+ file.seek(0)
218
+
219
+ while True:
220
+ chunk = file.read(chunk_size)
221
+ if not chunk:
222
+ break
223
+ if isinstance(chunk, str):
224
+ chunk = chunk.encode('utf-8')
225
+ digest.update(chunk)
226
+
227
+ return digest.hexdigest()
228
+
229
+
153
230
  def archive(input_path, output_path, append=False):
154
231
  input_path = Path(input_path)
155
232
  output_path = Path(output_path)
@@ -1,5 +1,277 @@
1
+ import asyncio
2
+ import queue as queue_module
3
+ import re
4
+ import ssl
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from dataclasses import dataclass
7
+ from typing import Any, Dict, Generator, Optional
1
8
  from urllib.parse import urlparse, urlunparse
2
9
 
10
+ import requests
11
+
12
+ from synapse_sdk.clients.exceptions import ClientError
13
+
14
+
15
+ @dataclass
16
+ class StreamLimits:
17
+ """Configuration for streaming limits."""
18
+
19
+ max_messages: int = 10000
20
+ max_lines: int = 50000
21
+ max_bytes: int = 50 * 1024 * 1024 # 50MB
22
+ max_message_size: int = 10240 # 10KB
23
+ queue_size: int = 1000
24
+ exception_queue_size: int = 10
25
+
26
+
27
+ def validate_resource_id(resource_id: Any, resource_name: str = 'resource') -> str:
28
+ """Validate resource ID to prevent injection attacks."""
29
+ if not resource_id:
30
+ raise ClientError(400, f'{resource_name} ID cannot be empty')
31
+
32
+ # Allow numeric IDs and UUID formats
33
+ id_str = str(resource_id)
34
+ if not re.match(r'^[a-zA-Z0-9\-_]+$', id_str):
35
+ raise ClientError(400, f'Invalid {resource_name} ID format')
36
+
37
+ if len(id_str) > 100:
38
+ raise ClientError(400, f'{resource_name} ID too long')
39
+
40
+ return id_str
41
+
42
+
43
+ def validate_timeout(timeout: Any, max_timeout: int = 300) -> float:
44
+ """Validate timeout value with bounds checking."""
45
+ if not isinstance(timeout, (int, float)) or timeout <= 0:
46
+ raise ClientError(400, 'Timeout must be a positive number')
47
+
48
+ if timeout > max_timeout:
49
+ raise ClientError(400, f'Timeout cannot exceed {max_timeout} seconds')
50
+
51
+ return float(timeout)
52
+
53
+
54
+ def sanitize_error_message(error_msg: str, context: str = '') -> str:
55
+ """Sanitize error messages to prevent information disclosure."""
56
+ sanitized = str(error_msg)[:100]
57
+ # Remove any potential sensitive information
58
+ sanitized = re.sub(r'["\']([^"\']*)["\']', '"[REDACTED]"', sanitized)
59
+
60
+ if context:
61
+ return f'{context}: {sanitized}'
62
+ return sanitized
63
+
64
+
65
+ def http_to_websocket_url(url: str) -> str:
66
+ """Convert HTTP/HTTPS URL to WebSocket URL safely."""
67
+ try:
68
+ parsed = urlparse(url)
69
+ if parsed.scheme == 'http':
70
+ ws_scheme = 'ws'
71
+ elif parsed.scheme == 'https':
72
+ ws_scheme = 'wss'
73
+ else:
74
+ raise ClientError(400, f'Invalid URL scheme: {parsed.scheme}')
75
+
76
+ ws_url = urlunparse((ws_scheme, parsed.netloc, parsed.path, parsed.params, parsed.query, parsed.fragment))
77
+ return ws_url
78
+ except Exception as e:
79
+ raise ClientError(400, f'Invalid URL format: {str(e)[:50]}')
80
+
81
+
82
+ def check_library_available(library_name: str) -> bool:
83
+ """Check if optional library is available."""
84
+ try:
85
+ __import__(library_name)
86
+ return True
87
+ except ImportError:
88
+ return False
89
+
90
+
91
+ class WebSocketStreamManager:
92
+ """Manages secure WebSocket streaming with rate limiting and error handling."""
93
+
94
+ def __init__(self, thread_pool: ThreadPoolExecutor, limits: Optional[StreamLimits] = None):
95
+ self.thread_pool = thread_pool
96
+ self.limits = limits or StreamLimits()
97
+
98
+ def stream_logs(
99
+ self, ws_url: str, headers: Dict[str, str], timeout: float, context: str
100
+ ) -> Generator[str, None, None]:
101
+ """Stream logs from WebSocket with proper error handling and cleanup."""
102
+ if not check_library_available('websockets'):
103
+ raise ClientError(500, 'websockets library not available for WebSocket connections')
104
+
105
+ try:
106
+ import websockets
107
+
108
+ # Use bounded queues to prevent memory exhaustion
109
+ message_queue = queue_module.Queue(maxsize=self.limits.queue_size)
110
+ exception_queue = queue_module.Queue(maxsize=self.limits.exception_queue_size)
111
+
112
+ async def websocket_client():
113
+ try:
114
+ # Add SSL verification and proper timeouts
115
+ connect_kwargs = {
116
+ 'extra_headers': headers,
117
+ 'close_timeout': timeout,
118
+ 'ping_timeout': timeout,
119
+ 'ping_interval': timeout // 2,
120
+ }
121
+
122
+ # For secure connections, add SSL context
123
+ if ws_url.startswith('wss://'):
124
+ ssl_context = ssl.create_default_context()
125
+ ssl_context.check_hostname = True
126
+ ssl_context.verify_mode = ssl.CERT_REQUIRED
127
+ connect_kwargs['ssl'] = ssl_context
128
+
129
+ async with websockets.connect(ws_url, **connect_kwargs) as websocket:
130
+ message_count = 0
131
+
132
+ async for message in websocket:
133
+ message_count += 1
134
+ if message_count > self.limits.max_messages:
135
+ exception_queue.put_nowait(ClientError(429, f'Message limit exceeded for {context}'))
136
+ break
137
+
138
+ # Validate message size
139
+ if len(str(message)) > self.limits.max_message_size:
140
+ continue
141
+
142
+ try:
143
+ message_queue.put_nowait(f'{message}\n')
144
+ except queue_module.Full:
145
+ exception_queue.put_nowait(ClientError(429, f'Message queue full for {context}'))
146
+ break
147
+
148
+ message_queue.put_nowait(None) # Signal end
149
+
150
+ except websockets.exceptions.ConnectionClosed:
151
+ exception_queue.put_nowait(ClientError(503, f'WebSocket connection closed for {context}'))
152
+ except asyncio.TimeoutError:
153
+ exception_queue.put_nowait(ClientError(408, f'WebSocket timed out for {context}'))
154
+ except Exception as e:
155
+ sanitized_error = sanitize_error_message(str(e), context)
156
+ exception_queue.put_nowait(ClientError(500, sanitized_error))
157
+
158
+ # Use thread pool instead of raw threading
159
+ future = self.thread_pool.submit(lambda: asyncio.run(websocket_client()))
160
+
161
+ # Yield messages with proper cleanup
162
+ try:
163
+ while True:
164
+ # Check for exceptions first
165
+ try:
166
+ exception = exception_queue.get_nowait()
167
+ raise exception
168
+ except queue_module.Empty:
169
+ pass
170
+
171
+ # Get message with timeout
172
+ try:
173
+ message = message_queue.get(timeout=1.0)
174
+ if message is None: # End signal
175
+ break
176
+ yield message
177
+ except queue_module.Empty:
178
+ # Check if future is done
179
+ if future.done():
180
+ try:
181
+ future.result() # This will raise any exception
182
+ break # Normal completion
183
+ except Exception:
184
+ break # Error already in queue
185
+ continue
186
+
187
+ finally:
188
+ # Cleanup: cancel future if still running
189
+ if not future.done():
190
+ future.cancel()
191
+
192
+ except ImportError:
193
+ raise ClientError(500, 'websockets library not available for WebSocket connections')
194
+ except Exception as e:
195
+ if isinstance(e, ClientError):
196
+ raise
197
+ sanitized_error = sanitize_error_message(str(e), context)
198
+ raise ClientError(500, sanitized_error)
199
+
200
+
201
+ class HTTPStreamManager:
202
+ """Manages HTTP streaming with rate limiting and proper resource cleanup."""
203
+
204
+ def __init__(self, requests_session: requests.Session, limits: Optional[StreamLimits] = None):
205
+ self.requests_session = requests_session
206
+ self.limits = limits or StreamLimits()
207
+
208
+ def stream_logs(
209
+ self, url: str, headers: Dict[str, str], timeout: tuple, context: str
210
+ ) -> Generator[str, None, None]:
211
+ """Stream logs from HTTP endpoint with proper error handling and cleanup."""
212
+ response = None
213
+ try:
214
+ # Use timeout for streaming to prevent hanging
215
+ response = self.requests_session.get(url, headers=headers, stream=True, timeout=timeout)
216
+ response.raise_for_status()
217
+
218
+ # Set up streaming with timeout and size limits
219
+ line_count = 0
220
+ total_bytes = 0
221
+
222
+ try:
223
+ for line in response.iter_lines(decode_unicode=True, chunk_size=1024):
224
+ if line:
225
+ line_count += 1
226
+ total_bytes += len(line.encode('utf-8'))
227
+
228
+ # Rate limiting checks
229
+ if line_count > self.limits.max_lines:
230
+ raise ClientError(429, f'Line limit exceeded for {context}')
231
+
232
+ if total_bytes > self.limits.max_bytes:
233
+ raise ClientError(429, f'Size limit exceeded for {context}')
234
+
235
+ # Validate line size
236
+ if len(line) > self.limits.max_message_size:
237
+ continue
238
+
239
+ yield f'{line}\n'
240
+
241
+ except requests.exceptions.ChunkedEncodingError:
242
+ raise ClientError(503, f'Log stream interrupted for {context}')
243
+ except requests.exceptions.ReadTimeout:
244
+ raise ClientError(408, f'Log stream timed out for {context}')
245
+
246
+ except requests.exceptions.ConnectTimeout:
247
+ raise ClientError(408, f'Failed to connect to log stream for {context}')
248
+ except requests.exceptions.ReadTimeout:
249
+ raise ClientError(408, f'Log stream read timeout for {context}')
250
+ except requests.exceptions.ConnectionError as e:
251
+ if 'Connection refused' in str(e):
252
+ raise ClientError(503, f'Agent connection refused for {context}')
253
+ else:
254
+ sanitized_error = sanitize_error_message(str(e), context)
255
+ raise ClientError(503, f'Agent connection error: {sanitized_error}')
256
+ except requests.exceptions.HTTPError as e:
257
+ if hasattr(e.response, 'status_code'):
258
+ status_code = e.response.status_code
259
+ else:
260
+ status_code = 500
261
+ raise ClientError(status_code, f'HTTP error streaming logs for {context}')
262
+ except Exception as e:
263
+ if isinstance(e, ClientError):
264
+ raise
265
+ sanitized_error = sanitize_error_message(str(e), context)
266
+ raise ClientError(500, sanitized_error)
267
+ finally:
268
+ # Ensure response is properly closed
269
+ if response is not None:
270
+ try:
271
+ response.close()
272
+ except Exception:
273
+ pass # Ignore cleanup errors
274
+
3
275
 
4
276
  def clean_url(url, remove_query_params=True, remove_fragment=True):
5
277
  parsed = urlparse(url)
@@ -20,7 +20,11 @@ def get_storage(connection_param: str | dict):
20
20
  else:
21
21
  storage_scheme = urlparse(connection_param).scheme
22
22
 
23
- assert storage_scheme in STORAGE_PROVIDERS.keys(), _('Storage provider not supported.')
23
+ assert storage_scheme in STORAGE_PROVIDERS.keys(), _(
24
+ f'Storage provider not supported. Got scheme: {storage_scheme}. '
25
+ f'Valid schemes: {", ".join(STORAGE_PROVIDERS.keys())}. '
26
+ f'Full connection_param: {connection_param}'
27
+ )
24
28
  return STORAGE_PROVIDERS[storage_scheme](connection_param)
25
29
 
26
30
 
@@ -29,7 +33,7 @@ def get_pathlib(storage_config: str | dict, path_root: str) -> Path:
29
33
 
30
34
  Args:
31
35
  storage_config (str | dict): The storage config by synapse-backend storage api.
32
- path_root (str): The path root.
36
+ path_root (str): The path root.
33
37
 
34
38
  Returns:
35
39
  pathlib.Path: The pathlib object.