synth-ai 0.2.4.dev7__py3-none-any.whl → 0.2.4.dev8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. synth_ai/__init__.py +1 -1
  2. synth_ai/cli/balance.py +3 -15
  3. synth_ai/config/base_url.py +47 -0
  4. synth_ai/http.py +102 -0
  5. synth_ai/inference/__init__.py +7 -0
  6. synth_ai/inference/client.py +20 -0
  7. synth_ai/jobs/client.py +246 -0
  8. synth_ai/learning/__init__.py +24 -0
  9. synth_ai/learning/client.py +149 -0
  10. synth_ai/learning/config.py +43 -0
  11. synth_ai/learning/constants.py +29 -0
  12. synth_ai/learning/ft_client.py +59 -0
  13. synth_ai/learning/health.py +43 -0
  14. synth_ai/learning/jobs.py +205 -0
  15. synth_ai/learning/rl_client.py +256 -0
  16. synth_ai/learning/sse.py +58 -0
  17. synth_ai/learning/validators.py +48 -0
  18. synth_ai/lm/core/main_v3.py +13 -0
  19. synth_ai/lm/core/synth_models.py +48 -0
  20. synth_ai/lm/core/vendor_clients.py +9 -6
  21. synth_ai/lm/vendors/core/openai_api.py +31 -3
  22. synth_ai/lm/vendors/openai_standard.py +45 -14
  23. synth_ai/lm/vendors/supported/custom_endpoint.py +12 -2
  24. synth_ai/lm/vendors/synth_client.py +372 -28
  25. synth_ai/rl/__init__.py +30 -0
  26. synth_ai/rl/contracts.py +32 -0
  27. synth_ai/rl/env_keys.py +137 -0
  28. synth_ai/rl/secrets.py +19 -0
  29. synth_ai/scripts/verify_rewards.py +100 -0
  30. synth_ai/task/__init__.py +10 -0
  31. synth_ai/task/contracts.py +120 -0
  32. synth_ai/task/health.py +28 -0
  33. synth_ai/task/validators.py +12 -0
  34. synth_ai/tracing_v3/hooks.py +3 -1
  35. synth_ai/tracing_v3/session_tracer.py +123 -2
  36. synth_ai/tracing_v3/turso/manager.py +218 -0
  37. synth_ai/tracing_v3/turso/models.py +53 -0
  38. synth_ai-0.2.4.dev8.dist-info/METADATA +635 -0
  39. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/RECORD +43 -25
  40. synth_ai/tui/__init__.py +0 -1
  41. synth_ai/tui/__main__.py +0 -13
  42. synth_ai/tui/cli/__init__.py +0 -1
  43. synth_ai/tui/cli/query_experiments.py +0 -164
  44. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  45. synth_ai/tui/dashboard.py +0 -340
  46. synth_ai-0.2.4.dev7.dist-info/METADATA +0 -193
  47. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/WHEEL +0 -0
  48. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/entry_points.txt +0 -0
  49. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/licenses/LICENSE +0 -0
  50. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/top_level.txt +0 -0
synth_ai/__init__.py CHANGED
@@ -23,7 +23,7 @@ from synth_ai.tracing_v1.abstractions import (
23
23
  from synth_ai.tracing_v1.decorators import trace_event_async, trace_event_sync
24
24
  from synth_ai.tracing_v1.upload import upload
25
25
 
26
- __version__ = "0.2.4.dev7"
26
+ __version__ = "0.2.4.dev8"
27
27
  __all__ = [
28
28
  "LM",
29
29
  "tracing",
synth_ai/cli/balance.py CHANGED
@@ -19,10 +19,10 @@ PROD_BACKEND_BASE = "https://agent-learning.onrender.com/api/v1"
19
19
 
20
20
 
21
21
  def _get_default_base_url() -> str:
22
- # Prefer explicit backend variables that are NOT modal; else default to prod backend
22
+ # Prefer explicit backend variables; else default to prod backend
23
23
  for var in ("SYNTH_BACKEND_BASE_URL", "BACKEND_BASE_URL", "SYNTH_BASE_URL"):
24
24
  val = os.getenv(var)
25
- if val and ("modal" not in val.lower() and "modal.run" not in val.lower()):
25
+ if val:
26
26
  return val
27
27
  return PROD_BACKEND_BASE
28
28
 
@@ -87,19 +87,7 @@ def register(cli):
87
87
 
88
88
  base = _ensure_api_v1_prefix(base_url)
89
89
 
90
- # Hard guard: never hit Modal URLs for account balance
91
- try:
92
- parsed = urlparse(base)
93
- host = (parsed.hostname or "").lower()
94
- except Exception:
95
- host = ""
96
- if "modal" in host or "modal.run" in base.lower():
97
- # Override to prod backend unconditionally
98
- fallback = PROD_BACKEND_BASE
99
- console.print(
100
- f"[yellow]Detected remote Modal URL ({base}). Using backend instead:[/yellow] {fallback}"
101
- )
102
- base = fallback
90
+ # No special-casing for modal.run domains; honor the provided base URL
103
91
 
104
92
  try:
105
93
  resp: Response = requests.get(
@@ -49,3 +49,50 @@ def get_learning_v2_base_url(mode: Literal["dev", "prod"] = "prod") -> str:
49
49
  return _normalize_base(dev)
50
50
 
51
51
  raise Exception()
52
+
53
+
54
+ def _resolve_override_mode() -> str:
55
+ """Return one of 'local', 'dev', 'prod' based on SYNTH_BACKEND_URL_OVERRIDE.
56
+
57
+ Defaults to 'prod' when unset or unrecognized.
58
+ """
59
+ ov = (os.getenv("SYNTH_BACKEND_URL_OVERRIDE", "") or "").strip().lower()
60
+ if ov in {"local", "dev", "prod"}:
61
+ return ov
62
+ return "prod"
63
+
64
+
65
+ def get_backend_from_env() -> tuple[str, str]:
66
+ """Resolve (base_url, api_key) using a simple LOCAL/DEV/PROD override scheme.
67
+
68
+ Env vars consulted:
69
+ - SYNTH_BACKEND_URL_OVERRIDE = local|dev|prod (case-insensitive)
70
+ - LOCAL_BACKEND_URL, TESTING_LOCAL_SYNTH_API_KEY
71
+ - DEV_BACKEND_URL, DEV_SYNTH_API_KEY
72
+ - PROD_BACKEND_URL, TESTING_PROD_SYNTH_API_KEY (fallback to SYNTH_API_KEY)
73
+
74
+ Base URL is normalized to end with '/api'.
75
+ Defaults: prod base URL → https://agent-learning.onrender.com/api
76
+ """
77
+ mode = _resolve_override_mode()
78
+ if mode == "local":
79
+ base = os.getenv("LOCAL_BACKEND_URL", "http://localhost:8000")
80
+ key = os.getenv("TESTING_LOCAL_SYNTH_API_KEY", "")
81
+ return base.rstrip("/"), key
82
+ if mode == "dev":
83
+ base = os.getenv("DEV_BACKEND_URL", "") or "http://localhost:8000"
84
+ key = os.getenv("DEV_SYNTH_API_KEY", "")
85
+ return base.rstrip("/"), key
86
+ # prod
87
+ base = os.getenv("PROD_BACKEND_URL", f"{PROD_BASE_URL_DEFAULT}")
88
+ # Ensure we return the root (no trailing /api). If default includes /api, strip it.
89
+ base = base.rstrip("/")
90
+ if base.endswith("/api"):
91
+ base = base[: -len("/api")]
92
+ # Prefer explicit PROD key, then testing key, then generic fallback
93
+ key = (
94
+ os.getenv("PROD_SYNTH_API_KEY", "")
95
+ or os.getenv("TESTING_PROD_SYNTH_API_KEY", "")
96
+ or os.getenv("SYNTH_API_KEY", "")
97
+ )
98
+ return base, key
synth_ai/http.py ADDED
@@ -0,0 +1,102 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from dataclasses import dataclass
5
+ from typing import Any, Dict, Optional
6
+
7
+ import aiohttp
8
+
9
+
10
+ @dataclass
11
+ class HTTPError(Exception):
12
+ status: int
13
+ url: str
14
+ message: str
15
+ body_snippet: str | None = None
16
+ detail: Any | None = None
17
+
18
+ def __str__(self) -> str: # pragma: no cover - trivial
19
+ base = f"HTTP {self.status} for {self.url}: {self.message}"
20
+ if self.body_snippet:
21
+ base += f" | body[0:200]={self.body_snippet[:200]}"
22
+ return base
23
+
24
+
25
+ class AsyncHttpClient:
26
+ def __init__(self, base_url: str, api_key: str, timeout: float = 30.0) -> None:
27
+ self._base_url = base_url.rstrip("/")
28
+ self._api_key = api_key
29
+ self._timeout = aiohttp.ClientTimeout(total=timeout)
30
+ self._session: Optional[aiohttp.ClientSession] = None
31
+
32
+ async def __aenter__(self) -> "AsyncHttpClient":
33
+ if self._session is None:
34
+ headers = {"authorization": f"Bearer {self._api_key}"}
35
+ self._session = aiohttp.ClientSession(headers=headers, timeout=self._timeout)
36
+ return self
37
+
38
+ async def __aexit__(self, exc_type, exc, tb) -> None: # noqa: ANN001
39
+ if self._session is not None:
40
+ await self._session.close()
41
+ self._session = None
42
+
43
+ def _abs(self, path: str) -> str:
44
+ if path.startswith("http://") or path.startswith("https://"):
45
+ return path
46
+ # If base_url already ends with /api and path starts with /api, remove duplicate
47
+ if self._base_url.endswith("/api") and path.startswith("/api"):
48
+ path = path[4:] # Remove leading /api
49
+ return f"{self._base_url}/{path.lstrip('/')}"
50
+
51
+ async def get(self, path: str, *, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None) -> Any:
52
+ url = self._abs(path)
53
+ assert self._session is not None, "AsyncHttpClient must be used as an async context manager"
54
+ async with self._session.get(url, params=params, headers=headers) as resp:
55
+ return await self._handle_response(resp, url)
56
+
57
+ async def post_json(self, path: str, *, json: Dict[str, Any], headers: Optional[Dict[str, str]] = None) -> Any:
58
+ url = self._abs(path)
59
+ assert self._session is not None, "AsyncHttpClient must be used as an async context manager"
60
+ async with self._session.post(url, json=json, headers=headers) as resp:
61
+ return await self._handle_response(resp, url)
62
+
63
+ async def post_multipart(self, path: str, *, data: Dict[str, Any], files: Dict[str, tuple[str, bytes, str | None]], headers: Optional[Dict[str, str]] = None) -> Any:
64
+ url = self._abs(path)
65
+ assert self._session is not None, "AsyncHttpClient must be used as an async context manager"
66
+ form = aiohttp.FormData()
67
+ for k, v in data.items():
68
+ form.add_field(k, str(v))
69
+ for field, (filename, content, content_type) in files.items():
70
+ form.add_field(field, content, filename=filename, content_type=content_type or "application/octet-stream")
71
+ async with self._session.post(url, data=form, headers=headers) as resp:
72
+ return await self._handle_response(resp, url)
73
+
74
+ async def delete(self, path: str, *, headers: Optional[Dict[str, str]] = None) -> Any:
75
+ url = self._abs(path)
76
+ assert self._session is not None, "AsyncHttpClient must be used as an async context manager"
77
+ async with self._session.delete(url, headers=headers) as resp:
78
+ return await self._handle_response(resp, url)
79
+
80
+ async def _handle_response(self, resp: aiohttp.ClientResponse, url: str) -> Any:
81
+ text = await resp.text()
82
+ body_snippet = text[:200] if text else None
83
+ if 200 <= resp.status < 300:
84
+ ctype = resp.headers.get("content-type", "")
85
+ if "application/json" in ctype:
86
+ try:
87
+ return await resp.json()
88
+ except Exception:
89
+ # Fallback to text
90
+ return text
91
+ return text
92
+ # error
93
+ detail: Any | None = None
94
+ try:
95
+ detail = await resp.json()
96
+ except Exception:
97
+ detail = None
98
+ raise HTTPError(status=resp.status, url=url, message="request_failed", body_snippet=body_snippet, detail=detail)
99
+
100
+
101
+ async def sleep(seconds: float) -> None:
102
+ await asyncio.sleep(seconds)
@@ -0,0 +1,7 @@
1
+ from .client import InferenceClient
2
+
3
+ __all__ = [
4
+ "InferenceClient",
5
+ ]
6
+
7
+
@@ -0,0 +1,20 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict
4
+
5
+ from ..http import AsyncHttpClient
6
+
7
+
8
+ class InferenceClient:
9
+ def __init__(self, base_url: str, api_key: str, *, timeout: float = 30.0) -> None:
10
+ self._base_url = base_url.rstrip("/")
11
+ self._api_key = api_key
12
+ self._timeout = timeout
13
+
14
+ async def create_chat_completion(self, *, model: str, messages: list[dict], **kwargs: Any) -> Dict[str, Any]:
15
+ body: Dict[str, Any] = {"model": model, "messages": messages}
16
+ body.update(kwargs)
17
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
18
+ return await http.post_json("/v1/chat/completions", json=body)
19
+
20
+
@@ -0,0 +1,246 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, Optional
4
+
5
+ from synth_ai.http import AsyncHttpClient
6
+
7
+
8
+ class FilesApi:
9
+ def __init__(self, http: AsyncHttpClient) -> None:
10
+ self._http = http
11
+
12
+ async def upload(self, *, filename: str, content: bytes, purpose: str, content_type: Optional[str] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]:
13
+ data = {"purpose": purpose}
14
+ files = {"file": (filename, content, content_type)}
15
+ headers = {"Idempotency-Key": idempotency_key} if idempotency_key else None
16
+ return await self._http.post_multipart("/api/files", data=data, files=files, headers=headers)
17
+
18
+ async def list(self, *, purpose: Optional[str] = None, after: Optional[str] = None, limit: int = 20) -> Dict[str, Any]:
19
+ params: Dict[str, Any] = {}
20
+ if purpose is not None:
21
+ params["purpose"] = purpose
22
+ if after is not None:
23
+ params["after"] = after
24
+ params["limit"] = limit
25
+ return await self._http.get("/api/files", params=params)
26
+
27
+ async def retrieve(self, file_id: str) -> Dict[str, Any]:
28
+ return await self._http.get(f"/api/files/{file_id}")
29
+
30
+ async def delete(self, file_id: str) -> Any:
31
+ return await self._http.delete(f"/api/files/{file_id}")
32
+
33
+ async def list_jobs(self, file_id: str, *, after: Optional[str] = None, limit: int = 20) -> Dict[str, Any]:
34
+ params: Dict[str, Any] = {"limit": limit}
35
+ if after is not None:
36
+ params["after"] = after
37
+ return await self._http.get(f"/api/files/{file_id}/jobs", params=params)
38
+
39
+
40
+ class SftJobsApi:
41
+ def __init__(self, http: AsyncHttpClient) -> None:
42
+ self._http = http
43
+
44
+ async def create(
45
+ self,
46
+ *,
47
+ training_file: str,
48
+ model: str,
49
+ validation_file: Optional[str] = None,
50
+ hyperparameters: Optional[Dict[str, Any]] = None,
51
+ suffix: Optional[str] = None,
52
+ integrations: Optional[Dict[str, Any]] = None,
53
+ metadata: Optional[Dict[str, Any]] = None,
54
+ idempotency_key: Optional[str] = None,
55
+ ) -> Dict[str, Any]:
56
+ payload: Dict[str, Any] = {
57
+ "training_file": training_file,
58
+ "model": model,
59
+ }
60
+ if validation_file is not None:
61
+ payload["validation_file"] = validation_file
62
+ if hyperparameters is not None:
63
+ payload["hyperparameters"] = hyperparameters
64
+ if suffix is not None:
65
+ payload["suffix"] = suffix
66
+ if integrations is not None:
67
+ payload["integrations"] = integrations
68
+ if metadata is not None:
69
+ payload["metadata"] = metadata
70
+ headers = {"Idempotency-Key": idempotency_key} if idempotency_key else None
71
+ return await self._http.post_json("/api/sft/jobs", json=payload, headers=headers)
72
+
73
+ async def list(
74
+ self,
75
+ *,
76
+ status: Optional[str] = None,
77
+ model: Optional[str] = None,
78
+ file_id: Optional[str] = None,
79
+ created_after: Optional[int] = None,
80
+ created_before: Optional[int] = None,
81
+ after: Optional[str] = None,
82
+ limit: int = 20,
83
+ ) -> Dict[str, Any]:
84
+ params: Dict[str, Any] = {"limit": limit}
85
+ if status is not None:
86
+ params["status"] = status
87
+ if model is not None:
88
+ params["model"] = model
89
+ if file_id is not None:
90
+ params["file_id"] = file_id
91
+ if created_after is not None:
92
+ params["created_after"] = created_after
93
+ if created_before is not None:
94
+ params["created_before"] = created_before
95
+ if after is not None:
96
+ params["after"] = after
97
+ return await self._http.get("/api/sft/jobs", params=params)
98
+
99
+ async def retrieve(self, job_id: str) -> Dict[str, Any]:
100
+ return await self._http.get(f"/api/sft/jobs/{job_id}")
101
+
102
+ async def cancel(self, job_id: str) -> Dict[str, Any]:
103
+ return await self._http.post_json(f"/api/sft/jobs/{job_id}/cancel", json={})
104
+
105
+ async def list_events(self, job_id: str, *, since_seq: int = 0, limit: int = 200) -> Dict[str, Any]:
106
+ params = {"since_seq": since_seq, "limit": limit}
107
+ return await self._http.get(f"/api/sft/jobs/{job_id}/events", params=params)
108
+
109
+ async def checkpoints(self, job_id: str, *, after: Optional[str] = None, limit: int = 10) -> Dict[str, Any]:
110
+ params: Dict[str, Any] = {"limit": limit}
111
+ if after is not None:
112
+ params["after"] = after
113
+ return await self._http.get(f"/api/sft/jobs/{job_id}/checkpoints", params=params)
114
+
115
+
116
+ class RlJobsApi:
117
+ def __init__(self, http: AsyncHttpClient) -> None:
118
+ self._http = http
119
+
120
+ async def create(
121
+ self,
122
+ *,
123
+ model: str,
124
+ endpoint_base_url: str,
125
+ trainer_id: str,
126
+ trainer: Optional[Dict[str, Any]] = None,
127
+ job_config_id: Optional[str] = None,
128
+ config: Optional[Dict[str, Any]] = None,
129
+ metadata: Optional[Dict[str, Any]] = None,
130
+ idempotency_key: Optional[str] = None,
131
+ ) -> Dict[str, Any]:
132
+ payload: Dict[str, Any] = {
133
+ "model": model,
134
+ "endpoint_base_url": endpoint_base_url,
135
+ "trainer_id": trainer_id,
136
+ }
137
+ if trainer is not None:
138
+ payload["trainer"] = trainer
139
+ if job_config_id is not None:
140
+ payload["job_config_id"] = job_config_id
141
+ if config is not None:
142
+ payload["config"] = config
143
+ if metadata is not None:
144
+ payload["metadata"] = metadata
145
+ headers = {"Idempotency-Key": idempotency_key} if idempotency_key else None
146
+ return await self._http.post_json("/api/rl/jobs", json=payload, headers=headers)
147
+
148
+ async def list(
149
+ self,
150
+ *,
151
+ status: Optional[str] = None,
152
+ model: Optional[str] = None,
153
+ created_after: Optional[int] = None,
154
+ created_before: Optional[int] = None,
155
+ after: Optional[str] = None,
156
+ limit: int = 20,
157
+ ) -> Dict[str, Any]:
158
+ params: Dict[str, Any] = {"limit": limit}
159
+ if status is not None:
160
+ params["status"] = status
161
+ if model is not None:
162
+ params["model"] = model
163
+ if created_after is not None:
164
+ params["created_after"] = created_after
165
+ if created_before is not None:
166
+ params["created_before"] = created_before
167
+ if after is not None:
168
+ params["after"] = after
169
+ return await self._http.get("/api/rl/jobs", params=params)
170
+
171
+ async def retrieve(self, job_id: str) -> Dict[str, Any]:
172
+ return await self._http.get(f"/api/rl/jobs/{job_id}")
173
+
174
+ async def cancel(self, job_id: str) -> Dict[str, Any]:
175
+ return await self._http.post_json(f"/api/rl/jobs/{job_id}/cancel", json={})
176
+
177
+ async def list_events(self, job_id: str, *, since_seq: int = 0, limit: int = 200) -> Dict[str, Any]:
178
+ params = {"since_seq": since_seq, "limit": limit}
179
+ return await self._http.get(f"/api/rl/jobs/{job_id}/events", params=params)
180
+
181
+ async def metrics(self, job_id: str, *, after_step: int = -1, limit: int = 200) -> Dict[str, Any]:
182
+ params = {"after_step": after_step, "limit": limit}
183
+ return await self._http.get(f"/api/rl/jobs/{job_id}/metrics", params=params)
184
+
185
+
186
+ class ModelsApi:
187
+ def __init__(self, http: AsyncHttpClient) -> None:
188
+ self._http = http
189
+
190
+ async def list(
191
+ self,
192
+ *,
193
+ source: Optional[str] = None,
194
+ base_model: Optional[str] = None,
195
+ status: Optional[str] = None,
196
+ after: Optional[str] = None,
197
+ limit: int = 20,
198
+ ) -> Dict[str, Any]:
199
+ params: Dict[str, Any] = {"limit": limit}
200
+ if source is not None:
201
+ params["source"] = source
202
+ if base_model is not None:
203
+ params["base_model"] = base_model
204
+ if status is not None:
205
+ params["status"] = status
206
+ if after is not None:
207
+ params["after"] = after
208
+ return await self._http.get("/api/models", params=params)
209
+
210
+ async def retrieve(self, model_id: str) -> Dict[str, Any]:
211
+ return await self._http.get(f"/api/models/{model_id}")
212
+
213
+ async def delete(self, model_id: str) -> Any:
214
+ return await self._http.delete(f"/api/models/{model_id}")
215
+
216
+ async def list_jobs(self, model_id: str, *, after: Optional[str] = None, limit: int = 20) -> Dict[str, Any]:
217
+ params: Dict[str, Any] = {"limit": limit}
218
+ if after is not None:
219
+ params["after"] = after
220
+ return await self._http.get(f"/api/models/{model_id}/jobs", params=params)
221
+
222
+
223
+ class JobsClient:
224
+ """High-level client aggregating job APIs.
225
+
226
+ Usage:
227
+ async with JobsClient(base_url, api_key) as c:
228
+ await c.files.list()
229
+ """
230
+
231
+ def __init__(self, base_url: str, api_key: str, timeout: float = 30.0, http: Optional[AsyncHttpClient] = None) -> None:
232
+ self._base_url = base_url
233
+ self._api_key = api_key
234
+ self._timeout = timeout
235
+ self._http = http or AsyncHttpClient(base_url, api_key, timeout=timeout)
236
+ self.files = FilesApi(self._http)
237
+ self.sft = SftJobsApi(self._http)
238
+ self.rl = RlJobsApi(self._http)
239
+ self.models = ModelsApi(self._http)
240
+
241
+ async def __aenter__(self) -> "JobsClient":
242
+ await self._http.__aenter__()
243
+ return self
244
+
245
+ async def __aexit__(self, exc_type, exc, tb) -> None: # noqa: ANN001
246
+ await self._http.__aexit__(exc_type, exc, tb)
@@ -0,0 +1,24 @@
1
+ from .client import LearningClient
2
+ from .rl_client import RlClient
3
+ from .ft_client import FtClient
4
+ from .validators import validate_training_jsonl, validate_trainer_cfg_rl
5
+ from synth_ai.task import validate_task_app_url, task_app_health
6
+ from .health import backend_health, pricing_preflight, balance_autumn_normalized
7
+ from .sse import stream_events as stream_job_events
8
+ from .jobs import JobHandle, JobsApiResolver
9
+
10
+ __all__ = [
11
+ "LearningClient",
12
+ "RlClient",
13
+ "FtClient",
14
+ "validate_training_jsonl",
15
+ "validate_trainer_cfg_rl",
16
+ "validate_task_app_url",
17
+ "backend_health",
18
+ "task_app_health",
19
+ "pricing_preflight",
20
+ "balance_autumn_normalized",
21
+ "stream_job_events",
22
+ "JobHandle",
23
+ "JobsApiResolver",
24
+ ]
@@ -0,0 +1,149 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any, Callable, Dict, List, Optional
5
+
6
+ from ..http import AsyncHttpClient, HTTPError, sleep
7
+
8
+
9
+ class LearningClient:
10
+ def __init__(self, base_url: str, api_key: str, *, timeout: float = 30.0) -> None:
11
+ self._base_url = base_url.rstrip("/")
12
+ self._api_key = api_key
13
+ self._timeout = timeout
14
+
15
+ async def upload_training_file(self, path: str | Path, *, purpose: str = "fine-tune") -> str:
16
+ p = Path(path)
17
+ content = p.read_bytes()
18
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
19
+ data = {"purpose": purpose}
20
+ files = {"file": (p.name, content, _infer_content_type(p.name))}
21
+ js = await http.post_multipart("/api/learning/files", data=data, files=files)
22
+ if not isinstance(js, dict) or "id" not in js:
23
+ raise HTTPError(status=500, url="/api/learning/files", message="invalid_upload_response", body_snippet=str(js)[:200])
24
+ return str(js["id"])
25
+
26
+ async def create_job(
27
+ self,
28
+ *,
29
+ training_type: str,
30
+ model: str,
31
+ training_file_id: str,
32
+ hyperparameters: Optional[Dict[str, Any]] = None,
33
+ metadata: Optional[Dict[str, Any]] = None,
34
+ ) -> Dict[str, Any]:
35
+ body = {
36
+ "training_type": training_type,
37
+ "model": model,
38
+ "training_file_id": training_file_id,
39
+ "hyperparameters": hyperparameters or {},
40
+ "metadata": metadata or {},
41
+ }
42
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
43
+ return await http.post_json("/api/learning/jobs", json=body)
44
+
45
+ async def start_job(self, job_id: str) -> Dict[str, Any]:
46
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
47
+ return await http.post_json(f"/api/learning/jobs/{job_id}/start", json={})
48
+
49
+ async def get_job(self, job_id: str) -> Dict[str, Any]:
50
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
51
+ return await http.get(f"/api/learning/jobs/{job_id}")
52
+
53
+ async def get_events(self, job_id: str, *, since_seq: int = 0, limit: int = 200) -> List[Dict[str, Any]]:
54
+ params = {"since_seq": since_seq, "limit": limit}
55
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
56
+ js = await http.get(f"/api/learning/jobs/{job_id}/events", params=params)
57
+ if isinstance(js, dict) and isinstance(js.get("events"), list):
58
+ return js["events"]
59
+ return []
60
+
61
+ async def get_metrics(self, job_id: str, *, name: str | None = None, after_step: int | None = None, limit: int = 500, run_id: str | None = None) -> List[Dict[str, Any]]:
62
+ params: Dict[str, Any] = {"limit": limit}
63
+ if name is not None:
64
+ params["name"] = name
65
+ if after_step is not None:
66
+ params["after_step"] = after_step
67
+ if run_id is not None:
68
+ params["run_id"] = run_id
69
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
70
+ js = await http.get(f"/api/learning/jobs/{job_id}/metrics", params=params)
71
+ if isinstance(js, dict) and isinstance(js.get("points"), list):
72
+ return js["points"]
73
+ return []
74
+
75
+ async def get_timeline(self, job_id: str, *, limit: int = 200) -> List[Dict[str, Any]]:
76
+ params = {"limit": limit}
77
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
78
+ js = await http.get(f"/api/learning/jobs/{job_id}/timeline", params=params)
79
+ if isinstance(js, dict) and isinstance(js.get("events"), list):
80
+ return js["events"]
81
+ return []
82
+
83
+ async def poll_until_terminal(
84
+ self,
85
+ job_id: str,
86
+ *,
87
+ interval_seconds: float = 2.0,
88
+ max_seconds: float | None = 3600,
89
+ on_event: Callable[[Dict[str, Any]], None] | None = None,
90
+ ) -> Dict[str, Any]:
91
+ last_seq = 0
92
+ elapsed = 0.0
93
+ while True:
94
+ # Events
95
+ events = await self.get_events(job_id, since_seq=last_seq, limit=200)
96
+ for e in events:
97
+ if isinstance(e, dict) and isinstance(e.get("seq"), int):
98
+ last_seq = max(last_seq, int(e["seq"]))
99
+ if on_event:
100
+ try:
101
+ on_event(e)
102
+ except Exception:
103
+ pass
104
+
105
+ # Status
106
+ job = await self.get_job(job_id)
107
+ status = str(job.get("status") or "").lower()
108
+ if status in {"succeeded", "failed", "canceled", "cancelled"}:
109
+ return job
110
+
111
+ # Sleep and time budget
112
+ await sleep(interval_seconds)
113
+ elapsed += interval_seconds
114
+ if max_seconds is not None and elapsed >= max_seconds:
115
+ raise TimeoutError(f"Polling timed out after {elapsed} seconds for job {job_id}")
116
+
117
+ # --- Optional diagnostics ---
118
+ async def pricing_preflight(self, *, job_type: str, gpu_type: str, estimated_seconds: float, container_count: int) -> Dict[str, Any]:
119
+ body = {
120
+ "job_type": job_type,
121
+ "gpu_type": gpu_type,
122
+ "estimated_seconds": float(estimated_seconds or 0.0),
123
+ "container_count": int(container_count or 1),
124
+ }
125
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
126
+ js = await http.post_json("/api/v1/pricing/preflight", json=body)
127
+ if not isinstance(js, dict):
128
+ raise HTTPError(status=500, url="/api/v1/pricing/preflight", message="invalid_preflight_response", body_snippet=str(js)[:200])
129
+ return js
130
+
131
+ async def balance_autumn_normalized(self) -> Dict[str, Any]:
132
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
133
+ js = await http.get("/api/v1/balance/autumn-normalized")
134
+ if not isinstance(js, dict):
135
+ raise HTTPError(status=500, url="/api/v1/balance/autumn-normalized", message="invalid_balance_response", body_snippet=str(js)[:200])
136
+ return js
137
+
138
+
139
+ def _infer_content_type(filename: str) -> str:
140
+ name = filename.lower()
141
+ if name.endswith(".jsonl"):
142
+ return "application/jsonl"
143
+ if name.endswith(".json"):
144
+ return "application/json"
145
+ if name.endswith(".txt"):
146
+ return "text/plain"
147
+ return "application/octet-stream"
148
+
149
+