synapse-sdk 1.0.0a23__py3-none-any.whl → 2025.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. synapse_sdk/__init__.py +24 -0
  2. synapse_sdk/cli/__init__.py +310 -5
  3. synapse_sdk/cli/alias/__init__.py +22 -0
  4. synapse_sdk/cli/alias/create.py +36 -0
  5. synapse_sdk/cli/alias/dataclass.py +31 -0
  6. synapse_sdk/cli/alias/default.py +16 -0
  7. synapse_sdk/cli/alias/delete.py +15 -0
  8. synapse_sdk/cli/alias/list.py +19 -0
  9. synapse_sdk/cli/alias/read.py +15 -0
  10. synapse_sdk/cli/alias/update.py +17 -0
  11. synapse_sdk/cli/alias/utils.py +61 -0
  12. synapse_sdk/cli/code_server.py +687 -0
  13. synapse_sdk/cli/config.py +440 -0
  14. synapse_sdk/cli/devtools.py +90 -0
  15. synapse_sdk/cli/plugin/__init__.py +33 -0
  16. synapse_sdk/cli/{create_plugin.py → plugin/create.py} +2 -2
  17. synapse_sdk/{plugins/cli → cli/plugin}/publish.py +23 -15
  18. synapse_sdk/clients/agent/__init__.py +9 -3
  19. synapse_sdk/clients/agent/container.py +143 -0
  20. synapse_sdk/clients/agent/core.py +19 -0
  21. synapse_sdk/clients/agent/ray.py +298 -9
  22. synapse_sdk/clients/backend/__init__.py +30 -12
  23. synapse_sdk/clients/backend/annotation.py +13 -5
  24. synapse_sdk/clients/backend/core.py +31 -4
  25. synapse_sdk/clients/backend/data_collection.py +186 -0
  26. synapse_sdk/clients/backend/hitl.py +17 -0
  27. synapse_sdk/clients/backend/integration.py +16 -1
  28. synapse_sdk/clients/backend/ml.py +5 -1
  29. synapse_sdk/clients/backend/models.py +78 -0
  30. synapse_sdk/clients/base.py +384 -41
  31. synapse_sdk/clients/ray/serve.py +2 -0
  32. synapse_sdk/clients/validators/collections.py +31 -0
  33. synapse_sdk/devtools/config.py +94 -0
  34. synapse_sdk/devtools/server.py +41 -0
  35. synapse_sdk/devtools/streamlit_app/__init__.py +5 -0
  36. synapse_sdk/devtools/streamlit_app/app.py +128 -0
  37. synapse_sdk/devtools/streamlit_app/services/__init__.py +11 -0
  38. synapse_sdk/devtools/streamlit_app/services/job_service.py +233 -0
  39. synapse_sdk/devtools/streamlit_app/services/plugin_service.py +236 -0
  40. synapse_sdk/devtools/streamlit_app/services/serve_service.py +95 -0
  41. synapse_sdk/devtools/streamlit_app/ui/__init__.py +15 -0
  42. synapse_sdk/devtools/streamlit_app/ui/config_tab.py +76 -0
  43. synapse_sdk/devtools/streamlit_app/ui/deployment_tab.py +66 -0
  44. synapse_sdk/devtools/streamlit_app/ui/http_tab.py +125 -0
  45. synapse_sdk/devtools/streamlit_app/ui/jobs_tab.py +573 -0
  46. synapse_sdk/devtools/streamlit_app/ui/serve_tab.py +346 -0
  47. synapse_sdk/devtools/streamlit_app/ui/status_bar.py +118 -0
  48. synapse_sdk/devtools/streamlit_app/utils/__init__.py +40 -0
  49. synapse_sdk/devtools/streamlit_app/utils/json_viewer.py +197 -0
  50. synapse_sdk/devtools/streamlit_app/utils/log_formatter.py +38 -0
  51. synapse_sdk/devtools/streamlit_app/utils/styles.py +241 -0
  52. synapse_sdk/devtools/streamlit_app/utils/ui_components.py +289 -0
  53. synapse_sdk/devtools/streamlit_app.py +10 -0
  54. synapse_sdk/loggers.py +120 -9
  55. synapse_sdk/plugins/README.md +1340 -0
  56. synapse_sdk/plugins/__init__.py +0 -13
  57. synapse_sdk/plugins/categories/base.py +117 -11
  58. synapse_sdk/plugins/categories/data_validation/actions/validation.py +72 -0
  59. synapse_sdk/plugins/categories/data_validation/templates/plugin/validation.py +33 -5
  60. synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
  61. synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
  62. synapse_sdk/plugins/categories/export/actions/export/action.py +165 -0
  63. synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
  64. synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
  65. synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
  66. synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
  67. synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
  68. synapse_sdk/plugins/categories/export/templates/config.yaml +21 -0
  69. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +390 -0
  70. synapse_sdk/plugins/categories/export/templates/plugin/export.py +160 -0
  71. synapse_sdk/plugins/categories/neural_net/actions/deployment.py +13 -12
  72. synapse_sdk/plugins/categories/neural_net/actions/train.py +1134 -31
  73. synapse_sdk/plugins/categories/neural_net/actions/tune.py +534 -0
  74. synapse_sdk/plugins/categories/neural_net/base/inference.py +1 -1
  75. synapse_sdk/plugins/categories/neural_net/templates/config.yaml +32 -4
  76. synapse_sdk/plugins/categories/neural_net/templates/plugin/inference.py +26 -10
  77. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  78. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  79. synapse_sdk/plugins/categories/{export/actions/export.py → pre_annotation/actions/pre_annotation/action.py} +4 -4
  80. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  81. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +148 -0
  82. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  83. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  84. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  85. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +100 -0
  86. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +248 -0
  87. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  88. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  89. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +265 -0
  90. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  91. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  92. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +92 -0
  93. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +243 -0
  94. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  95. synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml +19 -0
  96. synapse_sdk/plugins/categories/pre_annotation/templates/plugin/to_task.py +40 -0
  97. synapse_sdk/plugins/categories/smart_tool/templates/config.yaml +2 -0
  98. synapse_sdk/plugins/categories/upload/__init__.py +0 -0
  99. synapse_sdk/plugins/categories/upload/actions/__init__.py +0 -0
  100. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +19 -0
  101. synapse_sdk/plugins/categories/upload/actions/upload/action.py +236 -0
  102. synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
  103. synapse_sdk/plugins/categories/upload/actions/upload/enums.py +493 -0
  104. synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
  105. synapse_sdk/plugins/categories/upload/actions/upload/factory.py +138 -0
  106. synapse_sdk/plugins/categories/upload/actions/upload/models.py +214 -0
  107. synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +183 -0
  108. synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
  109. synapse_sdk/plugins/categories/upload/actions/upload/run.py +179 -0
  110. synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
  111. synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +107 -0
  112. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
  113. synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +63 -0
  114. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +91 -0
  115. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +82 -0
  116. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +235 -0
  117. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +201 -0
  118. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +104 -0
  119. synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +71 -0
  120. synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
  121. synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +82 -0
  122. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
  123. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
  124. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +29 -0
  125. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
  126. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +300 -0
  127. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +287 -0
  128. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
  129. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
  130. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
  131. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
  132. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +84 -0
  133. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
  134. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +60 -0
  135. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +250 -0
  136. synapse_sdk/plugins/categories/upload/templates/README.md +470 -0
  137. synapse_sdk/plugins/categories/upload/templates/config.yaml +33 -0
  138. synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +310 -0
  139. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +102 -0
  140. synapse_sdk/plugins/enums.py +3 -1
  141. synapse_sdk/plugins/models.py +148 -11
  142. synapse_sdk/plugins/templates/plugin-config-schema.json +406 -0
  143. synapse_sdk/plugins/templates/schema.json +491 -0
  144. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/config.yaml +1 -0
  145. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/requirements.txt +1 -1
  146. synapse_sdk/plugins/utils/__init__.py +46 -0
  147. synapse_sdk/plugins/utils/actions.py +119 -0
  148. synapse_sdk/plugins/utils/config.py +203 -0
  149. synapse_sdk/plugins/{utils.py → utils/legacy.py} +26 -46
  150. synapse_sdk/plugins/utils/ray_gcs.py +66 -0
  151. synapse_sdk/plugins/utils/registry.py +58 -0
  152. synapse_sdk/shared/__init__.py +25 -0
  153. synapse_sdk/shared/enums.py +93 -0
  154. synapse_sdk/types.py +19 -0
  155. synapse_sdk/utils/converters/__init__.py +240 -0
  156. synapse_sdk/utils/converters/coco/__init__.py +0 -0
  157. synapse_sdk/utils/converters/coco/from_dm.py +322 -0
  158. synapse_sdk/utils/converters/coco/to_dm.py +215 -0
  159. synapse_sdk/utils/converters/dm/__init__.py +57 -0
  160. synapse_sdk/utils/converters/dm/base.py +137 -0
  161. synapse_sdk/utils/converters/dm/from_v1.py +273 -0
  162. synapse_sdk/utils/converters/dm/to_v1.py +321 -0
  163. synapse_sdk/utils/converters/dm/tools/__init__.py +214 -0
  164. synapse_sdk/utils/converters/dm/tools/answer.py +95 -0
  165. synapse_sdk/utils/converters/dm/tools/bounding_box.py +132 -0
  166. synapse_sdk/utils/converters/dm/tools/bounding_box_3d.py +121 -0
  167. synapse_sdk/utils/converters/dm/tools/classification.py +75 -0
  168. synapse_sdk/utils/converters/dm/tools/keypoint.py +117 -0
  169. synapse_sdk/utils/converters/dm/tools/named_entity.py +111 -0
  170. synapse_sdk/utils/converters/dm/tools/polygon.py +122 -0
  171. synapse_sdk/utils/converters/dm/tools/polyline.py +124 -0
  172. synapse_sdk/utils/converters/dm/tools/prompt.py +94 -0
  173. synapse_sdk/utils/converters/dm/tools/relation.py +86 -0
  174. synapse_sdk/utils/converters/dm/tools/segmentation.py +141 -0
  175. synapse_sdk/utils/converters/dm/tools/segmentation_3d.py +83 -0
  176. synapse_sdk/utils/converters/dm/types.py +168 -0
  177. synapse_sdk/utils/converters/dm/utils.py +162 -0
  178. synapse_sdk/utils/converters/dm_legacy/__init__.py +56 -0
  179. synapse_sdk/utils/converters/dm_legacy/from_v1.py +627 -0
  180. synapse_sdk/utils/converters/dm_legacy/to_v1.py +367 -0
  181. synapse_sdk/utils/converters/pascal/__init__.py +0 -0
  182. synapse_sdk/utils/converters/pascal/from_dm.py +244 -0
  183. synapse_sdk/utils/converters/pascal/to_dm.py +214 -0
  184. synapse_sdk/utils/converters/yolo/__init__.py +0 -0
  185. synapse_sdk/utils/converters/yolo/from_dm.py +384 -0
  186. synapse_sdk/utils/converters/yolo/to_dm.py +267 -0
  187. synapse_sdk/utils/dataset.py +46 -0
  188. synapse_sdk/utils/encryption.py +158 -0
  189. synapse_sdk/utils/file/__init__.py +58 -0
  190. synapse_sdk/utils/file/archive.py +32 -0
  191. synapse_sdk/utils/file/checksum.py +56 -0
  192. synapse_sdk/utils/file/chunking.py +31 -0
  193. synapse_sdk/utils/file/download.py +385 -0
  194. synapse_sdk/utils/file/encoding.py +40 -0
  195. synapse_sdk/utils/file/io.py +22 -0
  196. synapse_sdk/utils/file/upload.py +165 -0
  197. synapse_sdk/utils/file/video/__init__.py +29 -0
  198. synapse_sdk/utils/file/video/transcode.py +307 -0
  199. synapse_sdk/utils/file.py.backup +301 -0
  200. synapse_sdk/utils/http.py +138 -0
  201. synapse_sdk/utils/network.py +309 -0
  202. synapse_sdk/utils/storage/__init__.py +72 -0
  203. synapse_sdk/utils/storage/providers/__init__.py +183 -0
  204. synapse_sdk/utils/storage/providers/file_system.py +134 -0
  205. synapse_sdk/utils/storage/providers/gcp.py +13 -0
  206. synapse_sdk/utils/storage/providers/http.py +190 -0
  207. synapse_sdk/utils/storage/providers/s3.py +91 -0
  208. synapse_sdk/utils/storage/providers/sftp.py +47 -0
  209. synapse_sdk/utils/storage/registry.py +17 -0
  210. synapse_sdk-2025.12.3.dist-info/METADATA +123 -0
  211. synapse_sdk-2025.12.3.dist-info/RECORD +279 -0
  212. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info}/WHEEL +1 -1
  213. synapse_sdk/clients/backend/dataset.py +0 -51
  214. synapse_sdk/plugins/categories/import/actions/import.py +0 -10
  215. synapse_sdk/plugins/cli/__init__.py +0 -21
  216. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env +0 -24
  217. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env.dist +0 -24
  218. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/main.py +0 -4
  219. synapse_sdk/utils/file.py +0 -168
  220. synapse_sdk/utils/storage.py +0 -91
  221. synapse_sdk-1.0.0a23.dist-info/METADATA +0 -44
  222. synapse_sdk-1.0.0a23.dist-info/RECORD +0 -114
  223. /synapse_sdk/{plugins/cli → cli/plugin}/run.py +0 -0
  224. /synapse_sdk/{plugins/categories/import → clients/validators}/__init__.py +0 -0
  225. /synapse_sdk/{plugins/categories/import/actions → devtools}/__init__.py +0 -0
  226. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info}/entry_points.txt +0 -0
  227. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info/licenses}/LICENSE +0 -0
  228. {synapse_sdk-1.0.0a23.dist-info → synapse_sdk-2025.12.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,307 @@
1
+ import asyncio
2
+ import shutil
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Callable, Optional
6
+
7
+ import ffmpeg
8
+
9
+
10
+ # Exception classes
11
+ class VideoTranscodeError(Exception):
12
+ """Base exception for video transcoding errors."""
13
+
14
+ pass
15
+
16
+
17
+ class UnsupportedFormatError(VideoTranscodeError):
18
+ """Raised when input format is not supported."""
19
+
20
+ pass
21
+
22
+
23
+ class FFmpegNotFoundError(VideoTranscodeError):
24
+ """Raised when FFmpeg is not installed or not in PATH."""
25
+
26
+ pass
27
+
28
+
29
+ class TranscodingFailedError(VideoTranscodeError):
30
+ """Raised when FFmpeg transcoding process fails."""
31
+
32
+ pass
33
+
34
+
35
+ @dataclass
36
+ class TranscodeConfig:
37
+ """Video transcoding configuration."""
38
+
39
+ vcodec: str = 'libx264' # Video codec
40
+ preset: str = 'medium' # Encoding preset (ultrafast to veryslow)
41
+ crf: int = 28 # Constant Rate Factor (0-51, lower=better quality)
42
+ acodec: str = 'aac' # Audio codec
43
+ audio_bitrate: str = '128k' # Audio bitrate
44
+ movflags: str = '+faststart' # MP4 optimization flags
45
+ resolution: Optional[str] = None # Target resolution (e.g., '1920x1080')
46
+ fps: Optional[int] = None # Target frame rate
47
+ start_time: Optional[float] = None # Trim start time in seconds
48
+ duration: Optional[float] = None # Trim duration in seconds
49
+
50
+
51
+ # Supported input formats
52
+ SUPPORTED_FORMATS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv', '.wmv', '.mpeg', '.mpg', '.m4v', '.3gp', '.ogv'}
53
+
54
+
55
+ def _check_ffmpeg_available():
56
+ """Check if FFmpeg is available in PATH."""
57
+ if not shutil.which('ffmpeg'):
58
+ raise FFmpegNotFoundError(
59
+ 'FFmpeg is not installed or not found in PATH. Please install FFmpeg to use video transcoding features.'
60
+ )
61
+
62
+
63
+ def validate_video_format(video_path: str | Path) -> bool:
64
+ """
65
+ Check if video format is supported for transcoding.
66
+
67
+ Args:
68
+ video_path (str | Path): Path to the video file
69
+
70
+ Returns:
71
+ bool: True if format is supported, False otherwise
72
+ """
73
+ path = Path(video_path)
74
+ return path.suffix.lower() in SUPPORTED_FORMATS
75
+
76
+
77
+ def get_video_info(video_path: str | Path) -> dict:
78
+ """
79
+ Extract video metadata (resolution, duration, codecs, etc.).
80
+
81
+ Args:
82
+ video_path (str | Path): Path to the video file
83
+
84
+ Returns:
85
+ dict: Video metadata information
86
+
87
+ Raises:
88
+ VideoTranscodeError: If unable to probe video file
89
+ """
90
+ _check_ffmpeg_available()
91
+
92
+ try:
93
+ probe = ffmpeg.probe(str(video_path))
94
+
95
+ video_info = {}
96
+
97
+ # Get format information
98
+ if 'format' in probe:
99
+ format_info = probe['format']
100
+ video_info['duration'] = float(format_info.get('duration', 0))
101
+ video_info['size'] = int(format_info.get('size', 0))
102
+ video_info['bitrate'] = int(format_info.get('bit_rate', 0))
103
+
104
+ # Get stream information
105
+ video_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'video']
106
+ audio_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'audio']
107
+
108
+ if video_streams:
109
+ video_stream = video_streams[0]
110
+ video_info['width'] = int(video_stream.get('width', 0))
111
+ video_info['height'] = int(video_stream.get('height', 0))
112
+ video_info['video_codec'] = video_stream.get('codec_name', '')
113
+ video_info['fps'] = eval(video_stream.get('r_frame_rate', '0/1'))
114
+
115
+ if audio_streams:
116
+ audio_stream = audio_streams[0]
117
+ video_info['audio_codec'] = audio_stream.get('codec_name', '')
118
+ video_info['channels'] = int(audio_stream.get('channels', 0))
119
+ video_info['sample_rate'] = int(audio_stream.get('sample_rate', 0))
120
+
121
+ return video_info
122
+
123
+ except Exception as e:
124
+ raise VideoTranscodeError(f'Failed to probe video file: {str(e)}')
125
+
126
+
127
+ def _build_ffmpeg_stream(input_path: str | Path, output_path: str | Path, config: TranscodeConfig):
128
+ """Build FFmpeg stream with configuration."""
129
+ stream = ffmpeg.input(str(input_path))
130
+
131
+ # Apply start time and duration trimming
132
+ if config.start_time is not None or config.duration is not None:
133
+ kwargs = {}
134
+ if config.start_time is not None:
135
+ kwargs['ss'] = config.start_time
136
+ if config.duration is not None:
137
+ kwargs['t'] = config.duration
138
+ stream = ffmpeg.input(str(input_path), **kwargs)
139
+
140
+ # Apply video filters
141
+ if config.resolution or config.fps:
142
+ if config.resolution:
143
+ width, height = config.resolution.split('x')
144
+ stream = ffmpeg.filter(stream, 'scale', width, height)
145
+ if config.fps:
146
+ stream = ffmpeg.filter(stream, 'fps', fps=config.fps)
147
+
148
+ # Build output with encoding parameters
149
+ output_kwargs = {
150
+ 'vcodec': config.vcodec,
151
+ 'preset': config.preset,
152
+ 'crf': config.crf,
153
+ 'acodec': config.acodec,
154
+ 'audio_bitrate': config.audio_bitrate,
155
+ 'movflags': config.movflags,
156
+ }
157
+
158
+ return ffmpeg.output(stream, str(output_path), **output_kwargs)
159
+
160
+
161
+ def transcode_video(
162
+ input_path: str | Path,
163
+ output_path: str | Path,
164
+ config: Optional[TranscodeConfig] = None,
165
+ progress_callback: Optional[Callable[[float], None]] = None,
166
+ ) -> Path:
167
+ """
168
+ Transcode video with specified configuration.
169
+
170
+ Args:
171
+ input_path (str | Path): Path to input video file
172
+ output_path (str | Path): Path to output video file
173
+ config (Optional[TranscodeConfig]): Transcoding configuration
174
+ progress_callback (Optional[Callable[[float], None]]): Progress callback function
175
+
176
+ Returns:
177
+ Path: Path to the transcoded video file
178
+
179
+ Raises:
180
+ UnsupportedFormatError: If input format is not supported
181
+ FFmpegNotFoundError: If FFmpeg is not available
182
+ TranscodingFailedError: If transcoding fails
183
+ """
184
+ _check_ffmpeg_available()
185
+
186
+ input_path = Path(input_path)
187
+ output_path = Path(output_path)
188
+
189
+ if not validate_video_format(input_path):
190
+ raise UnsupportedFormatError(f'Unsupported video format: {input_path.suffix}')
191
+
192
+ if config is None:
193
+ config = TranscodeConfig()
194
+
195
+ # Ensure output directory exists
196
+ output_path.parent.mkdir(parents=True, exist_ok=True)
197
+
198
+ try:
199
+ # Build FFmpeg command
200
+ stream = _build_ffmpeg_stream(input_path, output_path, config)
201
+
202
+ # Run FFmpeg
203
+ if progress_callback:
204
+ # Get video duration for progress calculation
205
+ video_info = get_video_info(input_path)
206
+ total_duration = video_info.get('duration', 0)
207
+
208
+ # Run with progress monitoring
209
+ process = ffmpeg.run_async(stream, pipe_stderr=True, overwrite_output=True)
210
+
211
+ while True:
212
+ output = process.stderr.readline()
213
+ if output == b'' and process.poll() is not None:
214
+ break
215
+ if output:
216
+ line = output.decode('utf-8')
217
+ # Parse progress from FFmpeg output
218
+ if 'time=' in line and total_duration > 0:
219
+ try:
220
+ time_str = line.split('time=')[1].split()[0]
221
+ hours, minutes, seconds = time_str.split(':')
222
+ current_time = int(hours) * 3600 + int(minutes) * 60 + float(seconds)
223
+ progress = min(current_time / total_duration, 1.0)
224
+ progress_callback(progress)
225
+ except (ValueError, IndexError):
226
+ pass
227
+
228
+ if process.returncode != 0:
229
+ raise TranscodingFailedError('FFmpeg process failed')
230
+ else:
231
+ # Run without progress monitoring
232
+ ffmpeg.run(stream, overwrite_output=True, quiet=True)
233
+
234
+ return output_path
235
+
236
+ except ffmpeg.Error as e:
237
+ error_message = e.stderr.decode('utf-8') if e.stderr else str(e)
238
+ raise TranscodingFailedError(f'Transcoding failed: {error_message}')
239
+ except Exception as e:
240
+ raise VideoTranscodeError(f'Unexpected error during transcoding: {str(e)}')
241
+
242
+
243
+ def optimize_for_web(video_path: str | Path, output_path: str | Path) -> Path:
244
+ """
245
+ Quick optimization for web streaming with default settings.
246
+
247
+ Args:
248
+ video_path (str | Path): Path to input video file
249
+ output_path (str | Path): Path to output video file
250
+
251
+ Returns:
252
+ Path: Path to the optimized video file
253
+ """
254
+ config = TranscodeConfig(
255
+ preset='fast', # Faster encoding for web optimization
256
+ crf=23, # Better quality for web
257
+ movflags='+faststart+frag_keyframe+empty_moov', # Advanced web optimization
258
+ )
259
+ return transcode_video(video_path, output_path, config)
260
+
261
+
262
+ async def atranscode_video(
263
+ input_path: str | Path, output_path: str | Path, config: Optional[TranscodeConfig] = None
264
+ ) -> Path:
265
+ """
266
+ Async version of transcode_video.
267
+
268
+ Args:
269
+ input_path (str | Path): Path to input video file
270
+ output_path (str | Path): Path to output video file
271
+ config (Optional[TranscodeConfig]): Transcoding configuration
272
+
273
+ Returns:
274
+ Path: Path to the transcoded video file
275
+ """
276
+ loop = asyncio.get_event_loop()
277
+ return await loop.run_in_executor(None, transcode_video, input_path, output_path, config)
278
+
279
+
280
+ def transcode_batch(
281
+ video_paths: list[Path], output_dir: Path, config: Optional[TranscodeConfig] = None, max_workers: int = 4
282
+ ) -> list[Path]:
283
+ """
284
+ Process multiple videos concurrently.
285
+
286
+ Args:
287
+ video_paths (list[Path]): List of input video file paths
288
+ output_dir (Path): Directory for output files
289
+ config (Optional[TranscodeConfig]): Transcoding configuration
290
+ max_workers (int): Maximum number of concurrent workers
291
+
292
+ Returns:
293
+ list[Path]: List of paths to transcoded video files
294
+ """
295
+ import concurrent.futures
296
+
297
+ output_dir = Path(output_dir)
298
+ output_dir.mkdir(parents=True, exist_ok=True)
299
+
300
+ def process_video(video_path):
301
+ output_path = output_dir / f'{video_path.stem}_transcoded.mp4'
302
+ return transcode_video(video_path, output_path, config)
303
+
304
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
305
+ results = list(executor.map(process_video, video_paths))
306
+
307
+ return results
@@ -0,0 +1,301 @@
1
+ import asyncio
2
+ import base64
3
+ import hashlib
4
+ import json
5
+ import mimetypes
6
+ import operator
7
+ import zipfile
8
+ from functools import reduce
9
+ from pathlib import Path
10
+ from typing import IO, Any, Callable
11
+
12
+ import aiohttp
13
+ import requests
14
+ import yaml
15
+
16
+ from synapse_sdk.utils.network import clean_url
17
+ from synapse_sdk.utils.string import hash_text
18
+
19
+
20
+ def read_file_in_chunks(file_path, chunk_size=1024 * 1024 * 50):
21
+ """
22
+ Read a file in chunks for efficient memory usage during file processing.
23
+
24
+ This function is particularly useful for large files or when you need to process
25
+ files in chunks, such as for uploading or hashing.
26
+
27
+ Args:
28
+ file_path (str | Path): Path to the file to read
29
+ chunk_size (int, optional): Size of each chunk in bytes. Defaults to 50MB (1024 * 1024 * 50)
30
+
31
+ Yields:
32
+ bytes: File content chunks
33
+
34
+ Raises:
35
+ FileNotFoundError: If the file doesn't exist
36
+ PermissionError: If the file can't be read due to permissions
37
+ OSError: If there's an OS-level error reading the file
38
+
39
+ Example:
40
+ ```python
41
+ from synapse_sdk.utils.file import read_file_in_chunks
42
+
43
+ # Read a file in 10MB chunks
44
+ for chunk in read_file_in_chunks('large_file.bin', chunk_size=1024*1024*10):
45
+ process_chunk(chunk)
46
+ ```
47
+ """
48
+ with open(file_path, 'rb') as file:
49
+ while chunk := file.read(chunk_size):
50
+ yield chunk
51
+
52
+
53
+ def download_file(url, path_download, name=None, coerce=None, use_cached=True):
54
+ chunk_size = 1024 * 1024 * 50
55
+ cleaned_url = clean_url(url) # remove query params and fragment
56
+
57
+ if name:
58
+ use_cached = False
59
+ else:
60
+ name = hash_text(cleaned_url)
61
+
62
+ name += Path(cleaned_url).suffix
63
+
64
+ path = Path(path_download) / name
65
+
66
+ if not use_cached or not path.is_file():
67
+ response = requests.get(url, allow_redirects=True, stream=True)
68
+ response.raise_for_status()
69
+
70
+ with path.open('wb') as file:
71
+ for chunk in response.iter_content(chunk_size=chunk_size):
72
+ file.write(chunk)
73
+
74
+ if coerce:
75
+ path = coerce(path)
76
+
77
+ return path
78
+
79
+
80
+ def files_url_to_path(files, coerce=None, file_field=None):
81
+ path_download = get_temp_path('media')
82
+ path_download.mkdir(parents=True, exist_ok=True)
83
+ if file_field:
84
+ files[file_field] = download_file(files[file_field], path_download, coerce=coerce)
85
+ else:
86
+ for file_name in files:
87
+ if isinstance(files[file_name], str):
88
+ files[file_name] = download_file(files[file_name], path_download, coerce=coerce)
89
+ else:
90
+ files[file_name]['path'] = download_file(files[file_name].pop('url'), path_download, coerce=coerce)
91
+
92
+
93
+ def files_url_to_path_from_objs(objs, files_fields, coerce=None, is_list=False, is_async=False):
94
+ if is_async:
95
+ asyncio.run(afiles_url_to_path_from_objs(objs, files_fields, coerce=coerce, is_list=is_list))
96
+ else:
97
+ if not is_list:
98
+ objs = [objs]
99
+
100
+ for obj in objs:
101
+ for files_field in files_fields:
102
+ try:
103
+ files = reduce(operator.getitem, files_field.split('.'), obj)
104
+ if isinstance(files, str):
105
+ files_url_to_path(obj, coerce=coerce, file_field=files_field)
106
+ else:
107
+ files_url_to_path(files, coerce=coerce)
108
+ except KeyError:
109
+ pass
110
+
111
+
112
+ async def adownload_file(url, path_download, name=None, coerce=None, use_cached=True):
113
+ chunk_size = 1024 * 1024 * 50
114
+ cleaned_url = clean_url(url) # remove query params and fragment
115
+
116
+ if name:
117
+ use_cached = False
118
+ else:
119
+ name = hash_text(cleaned_url)
120
+
121
+ name += Path(cleaned_url).suffix
122
+
123
+ path = Path(path_download) / name
124
+
125
+ if not use_cached or not path.is_file():
126
+ async with aiohttp.ClientSession() as session:
127
+ async with session.get(url) as response:
128
+ with path.open('wb') as file:
129
+ while chunk := await response.content.read(chunk_size):
130
+ file.write(chunk)
131
+
132
+ if coerce:
133
+ path = coerce(path)
134
+
135
+ return path
136
+
137
+
138
+ async def afiles_url_to_path(files, coerce=None):
139
+ path_download = get_temp_path('media')
140
+ path_download.mkdir(parents=True, exist_ok=True)
141
+ for file_name in files:
142
+ if isinstance(files[file_name], str):
143
+ files[file_name] = await adownload_file(files[file_name], path_download, coerce=coerce)
144
+ else:
145
+ files[file_name]['path'] = await adownload_file(files[file_name].pop('url'), path_download, coerce=coerce)
146
+
147
+
148
+ async def afiles_url_to_path_from_objs(objs, files_fields, coerce=None, is_list=False):
149
+ if not is_list:
150
+ objs = [objs]
151
+
152
+ tasks = []
153
+
154
+ for obj in objs:
155
+ for files_field in files_fields:
156
+ try:
157
+ files = reduce(operator.getitem, files_field.split('.'), obj)
158
+ tasks.append(afiles_url_to_path(files, coerce=coerce))
159
+ except KeyError:
160
+ pass
161
+
162
+ await asyncio.gather(*tasks)
163
+
164
+
165
+ def get_dict_from_file(file_path):
166
+ if isinstance(file_path, str):
167
+ file_path = Path(file_path)
168
+
169
+ with open(file_path) as f:
170
+ if file_path.suffix == '.yaml':
171
+ return yaml.safe_load(f)
172
+ else:
173
+ return json.load(f)
174
+
175
+
176
+ def calculate_checksum(file_path, prefix=''):
177
+ md5_hash = hashlib.md5()
178
+ with open(file_path, 'rb') as f:
179
+ for byte_block in iter(lambda: f.read(4096), b''):
180
+ md5_hash.update(byte_block)
181
+ checksum = md5_hash.hexdigest()
182
+ if prefix:
183
+ return f'dev-{checksum}'
184
+ return checksum
185
+
186
+
187
+ def get_checksum_from_file(file: IO[Any], digest_mod: Callable[[], Any] = hashlib.sha1) -> str:
188
+ """
189
+ Calculate checksum for a file-like object.
190
+
191
+ Args:
192
+ file (IO[Any]): File-like object with read() method that supports reading in chunks
193
+ digest_mod (Callable[[], Any]): Hash algorithm from hashlib (defaults to hashlib.sha1)
194
+
195
+ Returns:
196
+ str: Hexadecimal digest of the file contents
197
+
198
+ Example:
199
+ ```python
200
+ import hashlib
201
+ from io import BytesIO
202
+ from synapse_sdk.utils.file import get_checksum_from_file
203
+
204
+ # With BytesIO
205
+ data = BytesIO(b'Hello, world!')
206
+ checksum = get_checksum_from_file(data)
207
+
208
+ # With different hash algorithm
209
+ checksum = get_checksum_from_file(data, digest_mod=hashlib.sha256)
210
+ ```
211
+ """
212
+ digest = digest_mod()
213
+ chunk_size = 4096
214
+
215
+ # Reset file pointer to beginning if possible
216
+ if hasattr(file, 'seek'):
217
+ file.seek(0)
218
+
219
+ while True:
220
+ chunk = file.read(chunk_size)
221
+ if not chunk:
222
+ break
223
+ if isinstance(chunk, str):
224
+ chunk = chunk.encode('utf-8')
225
+ digest.update(chunk)
226
+
227
+ return digest.hexdigest()
228
+
229
+
230
+ def archive(input_path, output_path, append=False):
231
+ input_path = Path(input_path)
232
+ output_path = Path(output_path)
233
+
234
+ mode = 'a' if append and output_path.exists() else 'w'
235
+ with zipfile.ZipFile(output_path, mode=mode, compression=zipfile.ZIP_DEFLATED) as zipf:
236
+ if input_path.is_file():
237
+ zipf.write(input_path, input_path.name)
238
+ else:
239
+ for file_path in input_path.rglob('*'):
240
+ if file_path.is_file(): # Only add files, skip directories
241
+ arcname = file_path.relative_to(input_path.parent)
242
+ zipf.write(file_path, arcname)
243
+
244
+
245
+ def unarchive(file_path, output_path):
246
+ """
247
+ Unarchives a ZIP file to a given directory.
248
+
249
+ Parameters:
250
+ file_path (str | Path): The path to the ZIP file.
251
+ output_path (str): The directory where the files will be extracted.
252
+ """
253
+ output_path = Path(output_path)
254
+ output_path.mkdir(parents=True, exist_ok=True)
255
+
256
+ with zipfile.ZipFile(str(file_path), 'r') as zip_ref:
257
+ zip_ref.extractall(output_path)
258
+
259
+
260
+ def get_temp_path(sub_path=None):
261
+ path = Path('/tmp/datamaker')
262
+ if sub_path:
263
+ path = path / sub_path
264
+ return path
265
+
266
+
267
+ def convert_file_to_base64(file_path):
268
+ """
269
+ Convert a file to base64 using pathlib.
270
+
271
+ Args:
272
+ file_path (str): Path to the file to convert
273
+
274
+ Returns:
275
+ str: Base64 encoded string of the file contents
276
+ """
277
+ # FIXME base64 is sent sometimes.
278
+ if file_path.startswith('data:'):
279
+ return file_path
280
+
281
+ # Convert string path to Path object
282
+ path = Path(file_path)
283
+
284
+ try:
285
+ # Read binary content of the file
286
+ binary_content = path.read_bytes()
287
+
288
+ # Convert to base64
289
+ base64_encoded = base64.b64encode(binary_content).decode('utf-8')
290
+
291
+ # Get the MIME type of the file
292
+ mime_type, _ = mimetypes.guess_type(path)
293
+ assert mime_type is not None, 'MIME type cannot be guessed'
294
+
295
+ # Convert bytes to string for readable output
296
+ return f'data:{mime_type};base64,{base64_encoded}'
297
+
298
+ except FileNotFoundError:
299
+ raise FileNotFoundError(f'File not found: {file_path}')
300
+ except Exception as e:
301
+ raise Exception(f'Error converting file to base64: {str(e)}')