synapse-sdk 1.0.0a11__py3-none-any.whl → 2026.1.1b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

Files changed (261) hide show
  1. synapse_sdk/__init__.py +24 -0
  2. synapse_sdk/cli/__init__.py +9 -8
  3. synapse_sdk/cli/agent/__init__.py +25 -0
  4. synapse_sdk/cli/agent/config.py +104 -0
  5. synapse_sdk/cli/agent/select.py +197 -0
  6. synapse_sdk/cli/auth.py +104 -0
  7. synapse_sdk/cli/main.py +1025 -0
  8. synapse_sdk/cli/plugin/__init__.py +58 -0
  9. synapse_sdk/cli/plugin/create.py +566 -0
  10. synapse_sdk/cli/plugin/job.py +196 -0
  11. synapse_sdk/cli/plugin/publish.py +322 -0
  12. synapse_sdk/cli/plugin/run.py +131 -0
  13. synapse_sdk/cli/plugin/test.py +200 -0
  14. synapse_sdk/clients/README.md +239 -0
  15. synapse_sdk/clients/__init__.py +5 -0
  16. synapse_sdk/clients/_template.py +266 -0
  17. synapse_sdk/clients/agent/__init__.py +84 -29
  18. synapse_sdk/clients/agent/async_ray.py +289 -0
  19. synapse_sdk/clients/agent/container.py +83 -0
  20. synapse_sdk/clients/agent/plugin.py +101 -0
  21. synapse_sdk/clients/agent/ray.py +296 -39
  22. synapse_sdk/clients/backend/__init__.py +152 -12
  23. synapse_sdk/clients/backend/annotation.py +164 -22
  24. synapse_sdk/clients/backend/core.py +101 -0
  25. synapse_sdk/clients/backend/data_collection.py +292 -0
  26. synapse_sdk/clients/backend/hitl.py +87 -0
  27. synapse_sdk/clients/backend/integration.py +374 -46
  28. synapse_sdk/clients/backend/ml.py +134 -22
  29. synapse_sdk/clients/backend/models.py +247 -0
  30. synapse_sdk/clients/base.py +538 -59
  31. synapse_sdk/clients/exceptions.py +35 -7
  32. synapse_sdk/clients/pipeline/__init__.py +5 -0
  33. synapse_sdk/clients/pipeline/client.py +636 -0
  34. synapse_sdk/clients/protocols.py +178 -0
  35. synapse_sdk/clients/utils.py +86 -8
  36. synapse_sdk/clients/validation.py +58 -0
  37. synapse_sdk/enums.py +76 -0
  38. synapse_sdk/exceptions.py +168 -0
  39. synapse_sdk/integrations/__init__.py +74 -0
  40. synapse_sdk/integrations/_base.py +119 -0
  41. synapse_sdk/integrations/_context.py +53 -0
  42. synapse_sdk/integrations/ultralytics/__init__.py +78 -0
  43. synapse_sdk/integrations/ultralytics/_callbacks.py +126 -0
  44. synapse_sdk/integrations/ultralytics/_patches.py +124 -0
  45. synapse_sdk/loggers.py +476 -95
  46. synapse_sdk/mcp/MCP.md +69 -0
  47. synapse_sdk/mcp/__init__.py +48 -0
  48. synapse_sdk/mcp/__main__.py +6 -0
  49. synapse_sdk/mcp/config.py +349 -0
  50. synapse_sdk/mcp/prompts/__init__.py +4 -0
  51. synapse_sdk/mcp/resources/__init__.py +4 -0
  52. synapse_sdk/mcp/server.py +1352 -0
  53. synapse_sdk/mcp/tools/__init__.py +6 -0
  54. synapse_sdk/plugins/__init__.py +133 -9
  55. synapse_sdk/plugins/action.py +229 -0
  56. synapse_sdk/plugins/actions/__init__.py +82 -0
  57. synapse_sdk/plugins/actions/dataset/__init__.py +37 -0
  58. synapse_sdk/plugins/actions/dataset/action.py +471 -0
  59. synapse_sdk/plugins/actions/export/__init__.py +55 -0
  60. synapse_sdk/plugins/actions/export/action.py +183 -0
  61. synapse_sdk/plugins/actions/export/context.py +59 -0
  62. synapse_sdk/plugins/actions/inference/__init__.py +84 -0
  63. synapse_sdk/plugins/actions/inference/action.py +285 -0
  64. synapse_sdk/plugins/actions/inference/context.py +81 -0
  65. synapse_sdk/plugins/actions/inference/deployment.py +322 -0
  66. synapse_sdk/plugins/actions/inference/serve.py +252 -0
  67. synapse_sdk/plugins/actions/train/__init__.py +54 -0
  68. synapse_sdk/plugins/actions/train/action.py +326 -0
  69. synapse_sdk/plugins/actions/train/context.py +57 -0
  70. synapse_sdk/plugins/actions/upload/__init__.py +49 -0
  71. synapse_sdk/plugins/actions/upload/action.py +165 -0
  72. synapse_sdk/plugins/actions/upload/context.py +61 -0
  73. synapse_sdk/plugins/config.py +98 -0
  74. synapse_sdk/plugins/context/__init__.py +109 -0
  75. synapse_sdk/plugins/context/env.py +113 -0
  76. synapse_sdk/plugins/datasets/__init__.py +113 -0
  77. synapse_sdk/plugins/datasets/converters/__init__.py +76 -0
  78. synapse_sdk/plugins/datasets/converters/base.py +347 -0
  79. synapse_sdk/plugins/datasets/converters/yolo/__init__.py +9 -0
  80. synapse_sdk/plugins/datasets/converters/yolo/from_dm.py +468 -0
  81. synapse_sdk/plugins/datasets/converters/yolo/to_dm.py +381 -0
  82. synapse_sdk/plugins/datasets/formats/__init__.py +82 -0
  83. synapse_sdk/plugins/datasets/formats/dm.py +351 -0
  84. synapse_sdk/plugins/datasets/formats/yolo.py +240 -0
  85. synapse_sdk/plugins/decorators.py +83 -0
  86. synapse_sdk/plugins/discovery.py +790 -0
  87. synapse_sdk/plugins/docs/ACTION_DEV_GUIDE.md +933 -0
  88. synapse_sdk/plugins/docs/ARCHITECTURE.md +1225 -0
  89. synapse_sdk/plugins/docs/LOGGING_SYSTEM.md +683 -0
  90. synapse_sdk/plugins/docs/OVERVIEW.md +531 -0
  91. synapse_sdk/plugins/docs/PIPELINE_GUIDE.md +145 -0
  92. synapse_sdk/plugins/docs/README.md +513 -0
  93. synapse_sdk/plugins/docs/STEP.md +656 -0
  94. synapse_sdk/plugins/enums.py +70 -10
  95. synapse_sdk/plugins/errors.py +92 -0
  96. synapse_sdk/plugins/executors/__init__.py +43 -0
  97. synapse_sdk/plugins/executors/local.py +99 -0
  98. synapse_sdk/plugins/executors/ray/__init__.py +18 -0
  99. synapse_sdk/plugins/executors/ray/base.py +282 -0
  100. synapse_sdk/plugins/executors/ray/job.py +298 -0
  101. synapse_sdk/plugins/executors/ray/jobs_api.py +511 -0
  102. synapse_sdk/plugins/executors/ray/packaging.py +137 -0
  103. synapse_sdk/plugins/executors/ray/pipeline.py +792 -0
  104. synapse_sdk/plugins/executors/ray/task.py +257 -0
  105. synapse_sdk/plugins/models/__init__.py +26 -0
  106. synapse_sdk/plugins/models/logger.py +173 -0
  107. synapse_sdk/plugins/models/pipeline.py +25 -0
  108. synapse_sdk/plugins/pipelines/__init__.py +81 -0
  109. synapse_sdk/plugins/pipelines/action_pipeline.py +417 -0
  110. synapse_sdk/plugins/pipelines/context.py +107 -0
  111. synapse_sdk/plugins/pipelines/display.py +311 -0
  112. synapse_sdk/plugins/runner.py +114 -0
  113. synapse_sdk/plugins/schemas/__init__.py +19 -0
  114. synapse_sdk/plugins/schemas/results.py +152 -0
  115. synapse_sdk/plugins/steps/__init__.py +63 -0
  116. synapse_sdk/plugins/steps/base.py +128 -0
  117. synapse_sdk/plugins/steps/context.py +90 -0
  118. synapse_sdk/plugins/steps/orchestrator.py +128 -0
  119. synapse_sdk/plugins/steps/registry.py +103 -0
  120. synapse_sdk/plugins/steps/utils/__init__.py +20 -0
  121. synapse_sdk/plugins/steps/utils/logging.py +85 -0
  122. synapse_sdk/plugins/steps/utils/timing.py +71 -0
  123. synapse_sdk/plugins/steps/utils/validation.py +68 -0
  124. synapse_sdk/plugins/templates/__init__.py +50 -0
  125. synapse_sdk/plugins/templates/base/.gitignore.j2 +26 -0
  126. synapse_sdk/plugins/templates/base/.synapseignore.j2 +11 -0
  127. synapse_sdk/plugins/templates/base/README.md.j2 +26 -0
  128. synapse_sdk/plugins/templates/base/plugin/__init__.py.j2 +1 -0
  129. synapse_sdk/plugins/templates/base/pyproject.toml.j2 +14 -0
  130. synapse_sdk/plugins/templates/base/requirements.txt.j2 +1 -0
  131. synapse_sdk/plugins/templates/custom/plugin/main.py.j2 +18 -0
  132. synapse_sdk/plugins/templates/data_validation/plugin/validate.py.j2 +32 -0
  133. synapse_sdk/plugins/templates/export/plugin/export.py.j2 +36 -0
  134. synapse_sdk/plugins/templates/neural_net/plugin/inference.py.j2 +36 -0
  135. synapse_sdk/plugins/templates/neural_net/plugin/train.py.j2 +33 -0
  136. synapse_sdk/plugins/templates/post_annotation/plugin/post_annotate.py.j2 +32 -0
  137. synapse_sdk/plugins/templates/pre_annotation/plugin/pre_annotate.py.j2 +32 -0
  138. synapse_sdk/plugins/templates/smart_tool/plugin/auto_label.py.j2 +44 -0
  139. synapse_sdk/plugins/templates/upload/plugin/upload.py.j2 +35 -0
  140. synapse_sdk/plugins/testing/__init__.py +25 -0
  141. synapse_sdk/plugins/testing/sample_actions.py +98 -0
  142. synapse_sdk/plugins/types.py +206 -0
  143. synapse_sdk/plugins/upload.py +595 -64
  144. synapse_sdk/plugins/utils.py +325 -37
  145. synapse_sdk/shared/__init__.py +25 -0
  146. synapse_sdk/utils/__init__.py +1 -0
  147. synapse_sdk/utils/auth.py +74 -0
  148. synapse_sdk/utils/file/__init__.py +58 -0
  149. synapse_sdk/utils/file/archive.py +449 -0
  150. synapse_sdk/utils/file/checksum.py +167 -0
  151. synapse_sdk/utils/file/download.py +286 -0
  152. synapse_sdk/utils/file/io.py +129 -0
  153. synapse_sdk/utils/file/requirements.py +36 -0
  154. synapse_sdk/utils/network.py +168 -0
  155. synapse_sdk/utils/storage/__init__.py +238 -0
  156. synapse_sdk/utils/storage/config.py +188 -0
  157. synapse_sdk/utils/storage/errors.py +52 -0
  158. synapse_sdk/utils/storage/providers/__init__.py +13 -0
  159. synapse_sdk/utils/storage/providers/base.py +76 -0
  160. synapse_sdk/utils/storage/providers/gcs.py +168 -0
  161. synapse_sdk/utils/storage/providers/http.py +250 -0
  162. synapse_sdk/utils/storage/providers/local.py +126 -0
  163. synapse_sdk/utils/storage/providers/s3.py +177 -0
  164. synapse_sdk/utils/storage/providers/sftp.py +208 -0
  165. synapse_sdk/utils/storage/registry.py +125 -0
  166. synapse_sdk/utils/websocket.py +99 -0
  167. synapse_sdk-2026.1.1b2.dist-info/METADATA +715 -0
  168. synapse_sdk-2026.1.1b2.dist-info/RECORD +172 -0
  169. {synapse_sdk-1.0.0a11.dist-info → synapse_sdk-2026.1.1b2.dist-info}/WHEEL +1 -1
  170. synapse_sdk-2026.1.1b2.dist-info/licenses/LICENSE +201 -0
  171. locale/en/LC_MESSAGES/messages.mo +0 -0
  172. locale/en/LC_MESSAGES/messages.po +0 -39
  173. locale/ko/LC_MESSAGES/messages.mo +0 -0
  174. locale/ko/LC_MESSAGES/messages.po +0 -34
  175. synapse_sdk/cli/create_plugin.py +0 -10
  176. synapse_sdk/clients/agent/core.py +0 -7
  177. synapse_sdk/clients/agent/service.py +0 -15
  178. synapse_sdk/clients/backend/dataset.py +0 -51
  179. synapse_sdk/clients/ray/__init__.py +0 -6
  180. synapse_sdk/clients/ray/core.py +0 -22
  181. synapse_sdk/clients/ray/serve.py +0 -20
  182. synapse_sdk/i18n.py +0 -35
  183. synapse_sdk/plugins/categories/__init__.py +0 -0
  184. synapse_sdk/plugins/categories/base.py +0 -235
  185. synapse_sdk/plugins/categories/data_validation/__init__.py +0 -0
  186. synapse_sdk/plugins/categories/data_validation/actions/__init__.py +0 -0
  187. synapse_sdk/plugins/categories/data_validation/actions/validation.py +0 -10
  188. synapse_sdk/plugins/categories/data_validation/templates/config.yaml +0 -3
  189. synapse_sdk/plugins/categories/data_validation/templates/plugin/__init__.py +0 -0
  190. synapse_sdk/plugins/categories/data_validation/templates/plugin/validation.py +0 -5
  191. synapse_sdk/plugins/categories/decorators.py +0 -13
  192. synapse_sdk/plugins/categories/export/__init__.py +0 -0
  193. synapse_sdk/plugins/categories/export/actions/__init__.py +0 -0
  194. synapse_sdk/plugins/categories/export/actions/export.py +0 -10
  195. synapse_sdk/plugins/categories/import/__init__.py +0 -0
  196. synapse_sdk/plugins/categories/import/actions/__init__.py +0 -0
  197. synapse_sdk/plugins/categories/import/actions/import.py +0 -10
  198. synapse_sdk/plugins/categories/neural_net/__init__.py +0 -0
  199. synapse_sdk/plugins/categories/neural_net/actions/__init__.py +0 -0
  200. synapse_sdk/plugins/categories/neural_net/actions/deployment.py +0 -45
  201. synapse_sdk/plugins/categories/neural_net/actions/inference.py +0 -18
  202. synapse_sdk/plugins/categories/neural_net/actions/test.py +0 -10
  203. synapse_sdk/plugins/categories/neural_net/actions/train.py +0 -143
  204. synapse_sdk/plugins/categories/neural_net/templates/config.yaml +0 -12
  205. synapse_sdk/plugins/categories/neural_net/templates/plugin/__init__.py +0 -0
  206. synapse_sdk/plugins/categories/neural_net/templates/plugin/inference.py +0 -4
  207. synapse_sdk/plugins/categories/neural_net/templates/plugin/test.py +0 -2
  208. synapse_sdk/plugins/categories/neural_net/templates/plugin/train.py +0 -14
  209. synapse_sdk/plugins/categories/post_annotation/__init__.py +0 -0
  210. synapse_sdk/plugins/categories/post_annotation/actions/__init__.py +0 -0
  211. synapse_sdk/plugins/categories/post_annotation/actions/post_annotation.py +0 -10
  212. synapse_sdk/plugins/categories/post_annotation/templates/config.yaml +0 -3
  213. synapse_sdk/plugins/categories/post_annotation/templates/plugin/__init__.py +0 -0
  214. synapse_sdk/plugins/categories/post_annotation/templates/plugin/post_annotation.py +0 -3
  215. synapse_sdk/plugins/categories/pre_annotation/__init__.py +0 -0
  216. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +0 -0
  217. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation.py +0 -10
  218. synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml +0 -3
  219. synapse_sdk/plugins/categories/pre_annotation/templates/plugin/__init__.py +0 -0
  220. synapse_sdk/plugins/categories/pre_annotation/templates/plugin/pre_annotation.py +0 -3
  221. synapse_sdk/plugins/categories/registry.py +0 -16
  222. synapse_sdk/plugins/categories/smart_tool/__init__.py +0 -0
  223. synapse_sdk/plugins/categories/smart_tool/actions/__init__.py +0 -0
  224. synapse_sdk/plugins/categories/smart_tool/actions/auto_label.py +0 -37
  225. synapse_sdk/plugins/categories/smart_tool/templates/config.yaml +0 -7
  226. synapse_sdk/plugins/categories/smart_tool/templates/plugin/__init__.py +0 -0
  227. synapse_sdk/plugins/categories/smart_tool/templates/plugin/auto_label.py +0 -11
  228. synapse_sdk/plugins/categories/templates.py +0 -32
  229. synapse_sdk/plugins/cli/__init__.py +0 -21
  230. synapse_sdk/plugins/cli/publish.py +0 -37
  231. synapse_sdk/plugins/cli/run.py +0 -67
  232. synapse_sdk/plugins/exceptions.py +0 -22
  233. synapse_sdk/plugins/models.py +0 -121
  234. synapse_sdk/plugins/templates/cookiecutter.json +0 -11
  235. synapse_sdk/plugins/templates/hooks/post_gen_project.py +0 -3
  236. synapse_sdk/plugins/templates/hooks/pre_prompt.py +0 -21
  237. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env +0 -24
  238. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env.dist +0 -24
  239. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.gitignore +0 -27
  240. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.pre-commit-config.yaml +0 -7
  241. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/README.md +0 -5
  242. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/config.yaml +0 -6
  243. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/main.py +0 -4
  244. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/plugin/__init__.py +0 -0
  245. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/pyproject.toml +0 -13
  246. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/requirements.txt +0 -1
  247. synapse_sdk/shared/enums.py +0 -8
  248. synapse_sdk/utils/debug.py +0 -5
  249. synapse_sdk/utils/file.py +0 -87
  250. synapse_sdk/utils/module_loading.py +0 -29
  251. synapse_sdk/utils/pydantic/__init__.py +0 -0
  252. synapse_sdk/utils/pydantic/config.py +0 -4
  253. synapse_sdk/utils/pydantic/errors.py +0 -33
  254. synapse_sdk/utils/pydantic/validators.py +0 -7
  255. synapse_sdk/utils/storage.py +0 -91
  256. synapse_sdk/utils/string.py +0 -11
  257. synapse_sdk-1.0.0a11.dist-info/LICENSE +0 -21
  258. synapse_sdk-1.0.0a11.dist-info/METADATA +0 -43
  259. synapse_sdk-1.0.0a11.dist-info/RECORD +0 -111
  260. {synapse_sdk-1.0.0a11.dist-info → synapse_sdk-2026.1.1b2.dist-info}/entry_points.txt +0 -0
  261. {synapse_sdk-1.0.0a11.dist-info → synapse_sdk-2026.1.1b2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,286 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import hashlib
5
+ import operator
6
+ from functools import reduce
7
+ from pathlib import Path
8
+ from typing import Any, Callable, TypeVar
9
+ from urllib.parse import urlparse, urlunparse
10
+
11
+ import aiohttp
12
+ import requests
13
+
14
+ from .io import get_temp_path
15
+
16
+ T = TypeVar('T')
17
+
18
+ # Default chunk size: 50MB
19
+ _CHUNK_SIZE = 1024 * 1024 * 50
20
+
21
+
22
+ def _hash_text(text: str) -> str:
23
+ """Generate MD5 hash of text for cache keys."""
24
+ return hashlib.md5(text.encode('utf-8')).hexdigest()
25
+
26
+
27
+ def _clean_url(url: str) -> str:
28
+ """Remove query params and fragment from URL."""
29
+ parsed = urlparse(url)
30
+ return urlunparse((
31
+ parsed.scheme,
32
+ parsed.netloc,
33
+ parsed.path,
34
+ parsed.params,
35
+ '', # no query
36
+ '', # no fragment
37
+ ))
38
+
39
+
40
+ def download_file(
41
+ url: str,
42
+ path_download: str | Path,
43
+ *,
44
+ name: str | None = None,
45
+ coerce: Callable[[Path], T] | None = None,
46
+ use_cached: bool = True,
47
+ ) -> Path | T:
48
+ """Download a file from a URL to a specified directory.
49
+
50
+ Downloads are streamed in chunks for memory efficiency. Supports caching
51
+ based on URL hash to avoid redundant downloads.
52
+
53
+ Args:
54
+ url: The URL to download from.
55
+ path_download: Directory path where the file will be saved.
56
+ name: Custom filename (without extension). Disables caching if provided.
57
+ coerce: Optional function to transform the downloaded Path.
58
+ use_cached: If True, skip download if file already exists.
59
+
60
+ Returns:
61
+ Path to the downloaded file, or coerce(path) if coerce is provided.
62
+
63
+ Raises:
64
+ requests.HTTPError: If the HTTP request fails.
65
+ OSError: If file write fails.
66
+
67
+ Examples:
68
+ >>> path = download_file('https://example.com/image.jpg', '/tmp/downloads')
69
+ >>> path = download_file(url, '/tmp', name='my_file') # Custom name
70
+ >>> path_str = download_file(url, '/tmp', coerce=str) # As string
71
+ """
72
+ cleaned_url = _clean_url(url)
73
+
74
+ if name:
75
+ use_cached = False
76
+ else:
77
+ name = _hash_text(cleaned_url)
78
+
79
+ name += Path(cleaned_url).suffix
80
+ path = Path(path_download) / name
81
+
82
+ if not use_cached or not path.is_file():
83
+ response = requests.get(url, allow_redirects=True, stream=True, timeout=30)
84
+ response.raise_for_status()
85
+
86
+ with path.open('wb') as file:
87
+ for chunk in response.iter_content(chunk_size=_CHUNK_SIZE):
88
+ file.write(chunk)
89
+
90
+ if coerce:
91
+ return coerce(path)
92
+ return path
93
+
94
+
95
+ async def adownload_file(
96
+ url: str,
97
+ path_download: str | Path,
98
+ *,
99
+ name: str | None = None,
100
+ coerce: Callable[[Path], T] | None = None,
101
+ use_cached: bool = True,
102
+ ) -> Path | T:
103
+ """Asynchronously download a file from a URL.
104
+
105
+ Async version of download_file() using aiohttp for concurrent downloads.
106
+
107
+ Args:
108
+ url: The URL to download from.
109
+ path_download: Directory path where the file will be saved.
110
+ name: Custom filename (without extension). Disables caching if provided.
111
+ coerce: Optional function to transform the downloaded Path.
112
+ use_cached: If True, skip download if file already exists.
113
+
114
+ Returns:
115
+ Path to the downloaded file, or coerce(path) if coerce is provided.
116
+
117
+ Examples:
118
+ >>> path = await adownload_file('https://example.com/large.zip', '/tmp')
119
+ >>> paths = await asyncio.gather(*[adownload_file(u, '/tmp') for u in urls])
120
+ """
121
+ cleaned_url = _clean_url(url)
122
+
123
+ if name:
124
+ use_cached = False
125
+ else:
126
+ name = _hash_text(cleaned_url)
127
+
128
+ name += Path(cleaned_url).suffix
129
+ path = Path(path_download) / name
130
+
131
+ if not use_cached or not path.is_file():
132
+ async with aiohttp.ClientSession() as session:
133
+ async with session.get(url) as response:
134
+ response.raise_for_status()
135
+ with path.open('wb') as file:
136
+ while chunk := await response.content.read(_CHUNK_SIZE):
137
+ file.write(chunk)
138
+
139
+ if coerce:
140
+ return coerce(path)
141
+ return path
142
+
143
+
144
+ def files_url_to_path(
145
+ files: dict[str, Any],
146
+ *,
147
+ coerce: Callable[[Path], Any] | None = None,
148
+ file_field: str | None = None,
149
+ ) -> None:
150
+ """Convert file URLs to local paths by downloading them in-place.
151
+
152
+ Args:
153
+ files: Dictionary containing file URLs or file objects.
154
+ - String values: treated as URLs, replaced with local paths
155
+ - Dict values with 'url' key: 'url' is replaced with 'path'
156
+ coerce: Function to transform downloaded paths.
157
+ file_field: If provided, only process this specific field.
158
+
159
+ Examples:
160
+ >>> files = {'image': 'https://example.com/img.jpg'}
161
+ >>> files_url_to_path(files)
162
+ >>> files['image'] # Path('/tmp/datamaker/media/abc123.jpg')
163
+
164
+ >>> files = {'video': {'url': 'https://example.com/vid.mp4', 'size': 1024}}
165
+ >>> files_url_to_path(files)
166
+ >>> files['video'] # {'path': Path(...), 'size': 1024}
167
+ """
168
+ path_download = get_temp_path('media')
169
+ path_download.mkdir(parents=True, exist_ok=True)
170
+
171
+ if file_field:
172
+ files[file_field] = download_file(files[file_field], path_download, coerce=coerce)
173
+ else:
174
+ for file_name in files:
175
+ if isinstance(files[file_name], str):
176
+ files[file_name] = download_file(files[file_name], path_download, coerce=coerce)
177
+ else:
178
+ files[file_name]['path'] = download_file(files[file_name].pop('url'), path_download, coerce=coerce)
179
+
180
+
181
+ async def afiles_url_to_path(
182
+ files: dict[str, Any],
183
+ *,
184
+ coerce: Callable[[Path], Any] | None = None,
185
+ ) -> None:
186
+ """Asynchronously convert file URLs to local paths.
187
+
188
+ All files are downloaded concurrently for better performance.
189
+
190
+ Args:
191
+ files: Dictionary containing file URLs or file objects.
192
+ coerce: Function to transform downloaded paths.
193
+ """
194
+ path_download = get_temp_path('media')
195
+ path_download.mkdir(parents=True, exist_ok=True)
196
+
197
+ for file_name in files:
198
+ if isinstance(files[file_name], str):
199
+ files[file_name] = await adownload_file(files[file_name], path_download, coerce=coerce)
200
+ else:
201
+ files[file_name]['path'] = await adownload_file(files[file_name].pop('url'), path_download, coerce=coerce)
202
+
203
+
204
+ def files_url_to_path_from_objs(
205
+ objs: dict[str, Any] | list[dict[str, Any]],
206
+ files_fields: list[str],
207
+ *,
208
+ coerce: Callable[[Path], Any] | None = None,
209
+ is_list: bool = False,
210
+ is_async: bool = False,
211
+ ) -> None:
212
+ """Convert file URLs to paths for multiple objects with nested field support.
213
+
214
+ Args:
215
+ objs: Single object or list of objects to process.
216
+ files_fields: List of field paths (supports dot notation like 'data.files').
217
+ coerce: Function to transform downloaded paths.
218
+ is_list: If True, objs is treated as a list.
219
+ is_async: If True, uses async download for better performance.
220
+
221
+ Examples:
222
+ >>> obj = {'files': {'image': 'https://example.com/img.jpg'}}
223
+ >>> files_url_to_path_from_objs(obj, files_fields=['files'])
224
+
225
+ >>> objs = [{'data': {'files': {...}}}, ...]
226
+ >>> files_url_to_path_from_objs(objs, ['data.files'], is_list=True, is_async=True)
227
+ """
228
+ if is_async:
229
+ asyncio.run(afiles_url_to_path_from_objs(objs, files_fields, coerce=coerce, is_list=is_list))
230
+ else:
231
+ if not is_list:
232
+ objs = [objs]
233
+
234
+ for obj in objs:
235
+ for files_field in files_fields:
236
+ try:
237
+ files = reduce(operator.getitem, files_field.split('.'), obj)
238
+ if isinstance(files, str):
239
+ files_url_to_path(obj, coerce=coerce, file_field=files_field)
240
+ else:
241
+ files_url_to_path(files, coerce=coerce)
242
+ except KeyError:
243
+ pass
244
+
245
+
246
+ async def afiles_url_to_path_from_objs(
247
+ objs: dict[str, Any] | list[dict[str, Any]],
248
+ files_fields: list[str],
249
+ *,
250
+ coerce: Callable[[Path], Any] | None = None,
251
+ is_list: bool = False,
252
+ ) -> None:
253
+ """Asynchronously convert file URLs to paths for multiple objects.
254
+
255
+ All file downloads happen concurrently using asyncio.gather().
256
+
257
+ Args:
258
+ objs: Single object or list of objects to process.
259
+ files_fields: List of field paths (supports dot notation).
260
+ coerce: Function to transform downloaded paths.
261
+ is_list: If True, objs is treated as a list.
262
+ """
263
+ if not is_list:
264
+ objs = [objs]
265
+
266
+ tasks = []
267
+
268
+ for obj in objs:
269
+ for files_field in files_fields:
270
+ try:
271
+ files = reduce(operator.getitem, files_field.split('.'), obj)
272
+ tasks.append(afiles_url_to_path(files, coerce=coerce))
273
+ except KeyError:
274
+ pass
275
+
276
+ await asyncio.gather(*tasks)
277
+
278
+
279
+ __all__ = [
280
+ 'download_file',
281
+ 'adownload_file',
282
+ 'files_url_to_path',
283
+ 'afiles_url_to_path',
284
+ 'files_url_to_path_from_objs',
285
+ 'afiles_url_to_path_from_objs',
286
+ ]
@@ -0,0 +1,129 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import json
5
+ import mimetypes
6
+ from collections.abc import Generator
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import yaml
11
+
12
+ # Default chunk size: 50MB
13
+ DEFAULT_CHUNK_SIZE = 1024 * 1024 * 50
14
+
15
+
16
+ def read_file_in_chunks(
17
+ file_path: str | Path,
18
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
19
+ ) -> Generator[bytes, None, None]:
20
+ """Read a file in chunks, yielding each chunk.
21
+
22
+ Memory-efficient generator for processing large files.
23
+
24
+ Args:
25
+ file_path: Path to the file to read.
26
+ chunk_size: Size of each chunk in bytes (default 50MB).
27
+
28
+ Yields:
29
+ Bytes chunks of the file.
30
+
31
+ Raises:
32
+ FileNotFoundError: If file doesn't exist.
33
+ PermissionError: If file cannot be read.
34
+
35
+ Example:
36
+ >>> for chunk in read_file_in_chunks('/path/to/large_file.zip'):
37
+ ... process(chunk)
38
+ """
39
+ path = Path(file_path)
40
+ with path.open('rb') as f:
41
+ while chunk := f.read(chunk_size):
42
+ yield chunk
43
+
44
+
45
+ def convert_file_to_base64(file_path: str | Path) -> str:
46
+ """Convert a file to base64 data URI format.
47
+
48
+ Args:
49
+ file_path: Path to the file to encode.
50
+
51
+ Returns:
52
+ Data URI string: "data:{mime_type};base64,{encoded_content}"
53
+
54
+ Raises:
55
+ FileNotFoundError: If file doesn't exist.
56
+ ValueError: If MIME type cannot be determined.
57
+
58
+ Example:
59
+ >>> uri = convert_file_to_base64('/path/to/image.png')
60
+ >>> uri.startswith('data:image/png;base64,')
61
+ True
62
+ """
63
+ path = Path(file_path)
64
+
65
+ # Check if already base64 encoded
66
+ if isinstance(file_path, str) and file_path.startswith('data:'):
67
+ return file_path
68
+
69
+ mime_type, _ = mimetypes.guess_type(str(path))
70
+ if mime_type is None:
71
+ raise ValueError(f'Cannot determine MIME type for: {path}')
72
+
73
+ content = path.read_bytes()
74
+ encoded = base64.b64encode(content).decode('ascii')
75
+
76
+ return f'data:{mime_type};base64,{encoded}'
77
+
78
+
79
+ def get_temp_path(sub_path: str | None = None) -> Path:
80
+ """Get a temporary directory path for SDK operations.
81
+
82
+ Args:
83
+ sub_path: Optional subdirectory name to append.
84
+
85
+ Returns:
86
+ Path object pointing to /tmp/datamaker or /tmp/datamaker/{sub_path}.
87
+
88
+ Examples:
89
+ >>> get_temp_path()
90
+ PosixPath('/tmp/datamaker')
91
+ >>> get_temp_path('media')
92
+ PosixPath('/tmp/datamaker/media')
93
+ """
94
+ path = Path('/tmp/datamaker')
95
+ if sub_path:
96
+ path = path / sub_path
97
+ return path
98
+
99
+
100
+ def get_dict_from_file(file_path: str | Path) -> dict[str, Any]:
101
+ """Load a dictionary from a JSON or YAML file.
102
+
103
+ Args:
104
+ file_path: Path to the file (JSON or YAML).
105
+
106
+ Returns:
107
+ Dictionary parsed from the file.
108
+
109
+ Raises:
110
+ FileNotFoundError: If the file doesn't exist.
111
+ json.JSONDecodeError: If JSON parsing fails.
112
+ yaml.YAMLError: If YAML parsing fails.
113
+ """
114
+ if isinstance(file_path, str):
115
+ file_path = Path(file_path)
116
+
117
+ with open(file_path) as f:
118
+ if file_path.suffix in ('.yaml', '.yml'):
119
+ return yaml.safe_load(f)
120
+ return json.load(f)
121
+
122
+
123
+ __all__ = [
124
+ 'DEFAULT_CHUNK_SIZE',
125
+ 'read_file_in_chunks',
126
+ 'convert_file_to_base64',
127
+ 'get_temp_path',
128
+ 'get_dict_from_file',
129
+ ]
@@ -0,0 +1,36 @@
1
+ """Requirements file parsing utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+
8
+ def read_requirements(path: str | Path) -> list[str] | None:
9
+ """Parse requirements.txt file.
10
+
11
+ Reads a requirements.txt file and returns a list of requirement strings,
12
+ filtering out empty lines and comments.
13
+
14
+ Args:
15
+ path: Path to requirements.txt file
16
+
17
+ Returns:
18
+ List of requirement strings, or None if file doesn't exist.
19
+ Returns None if file exists but contains no valid requirements.
20
+ """
21
+ path = Path(path)
22
+ if not path.exists():
23
+ return None
24
+
25
+ requirements = []
26
+ with path.open() as f:
27
+ for line in f:
28
+ line = line.strip()
29
+ # Skip empty lines and comments
30
+ if line and not line.startswith('#'):
31
+ requirements.append(line)
32
+
33
+ return requirements if requirements else None
34
+
35
+
36
+ __all__ = ['read_requirements']
@@ -0,0 +1,168 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from typing import Any
6
+ from urllib.parse import urlparse, urlunparse
7
+
8
+ from synapse_sdk.exceptions import ClientError
9
+
10
+
11
+ @dataclass
12
+ class StreamLimits:
13
+ """Configuration for streaming resource limits.
14
+
15
+ Prevents resource exhaustion during long-running streaming operations.
16
+
17
+ Attributes:
18
+ max_messages: Maximum WebSocket messages before termination.
19
+ max_lines: Maximum lines for HTTP streaming.
20
+ max_bytes: Maximum total bytes to receive.
21
+ max_message_size: Maximum size of a single message/line in bytes.
22
+ queue_size: Bounded queue size for async operations.
23
+ """
24
+
25
+ max_messages: int = 10_000
26
+ max_lines: int = 50_000
27
+ max_bytes: int = 50 * 1024 * 1024 # 50MB
28
+ max_message_size: int = 10_240 # 10KB per message
29
+ queue_size: int = 1_000
30
+
31
+
32
+ # Resource ID validation pattern - alphanumeric, hyphens, underscores
33
+ _RESOURCE_ID_PATTERN = re.compile(r'^[a-zA-Z0-9\-_]+$')
34
+ _MAX_RESOURCE_ID_LENGTH = 100
35
+
36
+
37
+ def validate_resource_id(resource_id: Any, resource_name: str = 'resource') -> str:
38
+ """Validate resource ID to prevent injection attacks.
39
+
40
+ Args:
41
+ resource_id: The ID to validate.
42
+ resource_name: Human-readable name for error messages.
43
+
44
+ Returns:
45
+ Validated ID as string.
46
+
47
+ Raises:
48
+ ClientError: If ID is invalid (400 status code).
49
+
50
+ Example:
51
+ >>> validate_resource_id('job-abc123', 'job')
52
+ 'job-abc123'
53
+ >>> validate_resource_id('', 'job')
54
+ Traceback (most recent call last):
55
+ ...
56
+ ClientError: job ID cannot be empty
57
+ """
58
+ if not resource_id:
59
+ raise ClientError(400, f'{resource_name} ID cannot be empty')
60
+
61
+ id_str = str(resource_id)
62
+
63
+ if not _RESOURCE_ID_PATTERN.match(id_str):
64
+ raise ClientError(400, f'Invalid {resource_name} ID format')
65
+
66
+ if len(id_str) > _MAX_RESOURCE_ID_LENGTH:
67
+ raise ClientError(400, f'{resource_name} ID too long')
68
+
69
+ return id_str
70
+
71
+
72
+ def validate_timeout(timeout: Any, max_timeout: float = 300.0) -> float:
73
+ """Validate timeout value with bounds checking.
74
+
75
+ Args:
76
+ timeout: Timeout value to validate.
77
+ max_timeout: Maximum allowed timeout in seconds.
78
+
79
+ Returns:
80
+ Validated timeout as float.
81
+
82
+ Raises:
83
+ ClientError: If timeout is invalid (400 status code).
84
+
85
+ Example:
86
+ >>> validate_timeout(30.0)
87
+ 30.0
88
+ >>> validate_timeout(-1)
89
+ Traceback (most recent call last):
90
+ ...
91
+ ClientError: Timeout must be a positive number
92
+ """
93
+ if not isinstance(timeout, (int, float)) or timeout <= 0:
94
+ raise ClientError(400, 'Timeout must be a positive number')
95
+
96
+ if timeout > max_timeout:
97
+ raise ClientError(400, f'Timeout cannot exceed {max_timeout} seconds')
98
+
99
+ return float(timeout)
100
+
101
+
102
+ def sanitize_error_message(error_msg: str, context: str = '') -> str:
103
+ """Sanitize error messages to prevent information disclosure.
104
+
105
+ Redacts potentially sensitive information like credentials, paths, etc.
106
+
107
+ Args:
108
+ error_msg: Raw error message.
109
+ context: Optional context prefix.
110
+
111
+ Returns:
112
+ Sanitized error message.
113
+
114
+ Example:
115
+ >>> sanitize_error_message('Failed with token="secret123"', 'connection')
116
+ 'connection: Failed with token="[REDACTED]"'
117
+ """
118
+ sanitized = str(error_msg)[:200]
119
+ # Redact quoted strings which may contain sensitive data
120
+ sanitized = re.sub(r'["\']([^"\']*)["\']', '"[REDACTED]"', sanitized)
121
+
122
+ if context:
123
+ return f'{context}: {sanitized}'
124
+ return sanitized
125
+
126
+
127
+ def http_to_websocket_url(url: str) -> str:
128
+ """Convert HTTP/HTTPS URL to WebSocket URL.
129
+
130
+ Args:
131
+ url: HTTP or HTTPS URL.
132
+
133
+ Returns:
134
+ WebSocket URL (ws:// or wss://).
135
+
136
+ Raises:
137
+ ClientError: If URL scheme is invalid.
138
+
139
+ Example:
140
+ >>> http_to_websocket_url('https://example.com/ws/')
141
+ 'wss://example.com/ws/'
142
+ >>> http_to_websocket_url('http://localhost:8000/ws/')
143
+ 'ws://localhost:8000/ws/'
144
+ """
145
+ try:
146
+ parsed = urlparse(url)
147
+
148
+ if parsed.scheme == 'http':
149
+ ws_scheme = 'ws'
150
+ elif parsed.scheme == 'https':
151
+ ws_scheme = 'wss'
152
+ elif parsed.scheme in ('ws', 'wss'):
153
+ return url # Already a WebSocket URL
154
+ else:
155
+ raise ClientError(400, f'Invalid URL scheme: {parsed.scheme}')
156
+
157
+ return urlunparse((
158
+ ws_scheme,
159
+ parsed.netloc,
160
+ parsed.path,
161
+ parsed.params,
162
+ parsed.query,
163
+ parsed.fragment,
164
+ ))
165
+ except ClientError:
166
+ raise
167
+ except Exception as e:
168
+ raise ClientError(400, f'Invalid URL format: {str(e)[:50]}')