synth-ai 0.2.4.dev7__py3-none-any.whl → 0.2.4.dev8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +1 -1
- synth_ai/cli/balance.py +3 -15
- synth_ai/config/base_url.py +47 -0
- synth_ai/http.py +102 -0
- synth_ai/inference/__init__.py +7 -0
- synth_ai/inference/client.py +20 -0
- synth_ai/jobs/client.py +246 -0
- synth_ai/learning/__init__.py +24 -0
- synth_ai/learning/client.py +149 -0
- synth_ai/learning/config.py +43 -0
- synth_ai/learning/constants.py +29 -0
- synth_ai/learning/ft_client.py +59 -0
- synth_ai/learning/health.py +43 -0
- synth_ai/learning/jobs.py +205 -0
- synth_ai/learning/rl_client.py +256 -0
- synth_ai/learning/sse.py +58 -0
- synth_ai/learning/validators.py +48 -0
- synth_ai/lm/core/main_v3.py +13 -0
- synth_ai/lm/core/synth_models.py +48 -0
- synth_ai/lm/core/vendor_clients.py +9 -6
- synth_ai/lm/vendors/core/openai_api.py +31 -3
- synth_ai/lm/vendors/openai_standard.py +45 -14
- synth_ai/lm/vendors/supported/custom_endpoint.py +12 -2
- synth_ai/lm/vendors/synth_client.py +372 -28
- synth_ai/rl/__init__.py +30 -0
- synth_ai/rl/contracts.py +32 -0
- synth_ai/rl/env_keys.py +137 -0
- synth_ai/rl/secrets.py +19 -0
- synth_ai/scripts/verify_rewards.py +100 -0
- synth_ai/task/__init__.py +10 -0
- synth_ai/task/contracts.py +120 -0
- synth_ai/task/health.py +28 -0
- synth_ai/task/validators.py +12 -0
- synth_ai/tracing_v3/hooks.py +3 -1
- synth_ai/tracing_v3/session_tracer.py +123 -2
- synth_ai/tracing_v3/turso/manager.py +218 -0
- synth_ai/tracing_v3/turso/models.py +53 -0
- synth_ai-0.2.4.dev8.dist-info/METADATA +635 -0
- {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/RECORD +43 -25
- synth_ai/tui/__init__.py +0 -1
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -340
- synth_ai-0.2.4.dev7.dist-info/METADATA +0 -193
- {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/top_level.txt +0 -0
synth_ai/__init__.py
CHANGED
@@ -23,7 +23,7 @@ from synth_ai.tracing_v1.abstractions import (
|
|
23
23
|
from synth_ai.tracing_v1.decorators import trace_event_async, trace_event_sync
|
24
24
|
from synth_ai.tracing_v1.upload import upload
|
25
25
|
|
26
|
-
__version__ = "0.2.4.
|
26
|
+
__version__ = "0.2.4.dev8"
|
27
27
|
__all__ = [
|
28
28
|
"LM",
|
29
29
|
"tracing",
|
synth_ai/cli/balance.py
CHANGED
@@ -19,10 +19,10 @@ PROD_BACKEND_BASE = "https://agent-learning.onrender.com/api/v1"
|
|
19
19
|
|
20
20
|
|
21
21
|
def _get_default_base_url() -> str:
|
22
|
-
# Prefer explicit backend variables
|
22
|
+
# Prefer explicit backend variables; else default to prod backend
|
23
23
|
for var in ("SYNTH_BACKEND_BASE_URL", "BACKEND_BASE_URL", "SYNTH_BASE_URL"):
|
24
24
|
val = os.getenv(var)
|
25
|
-
if val
|
25
|
+
if val:
|
26
26
|
return val
|
27
27
|
return PROD_BACKEND_BASE
|
28
28
|
|
@@ -87,19 +87,7 @@ def register(cli):
|
|
87
87
|
|
88
88
|
base = _ensure_api_v1_prefix(base_url)
|
89
89
|
|
90
|
-
#
|
91
|
-
try:
|
92
|
-
parsed = urlparse(base)
|
93
|
-
host = (parsed.hostname or "").lower()
|
94
|
-
except Exception:
|
95
|
-
host = ""
|
96
|
-
if "modal" in host or "modal.run" in base.lower():
|
97
|
-
# Override to prod backend unconditionally
|
98
|
-
fallback = PROD_BACKEND_BASE
|
99
|
-
console.print(
|
100
|
-
f"[yellow]Detected remote Modal URL ({base}). Using backend instead:[/yellow] {fallback}"
|
101
|
-
)
|
102
|
-
base = fallback
|
90
|
+
# No special-casing for modal.run domains; honor the provided base URL
|
103
91
|
|
104
92
|
try:
|
105
93
|
resp: Response = requests.get(
|
synth_ai/config/base_url.py
CHANGED
@@ -49,3 +49,50 @@ def get_learning_v2_base_url(mode: Literal["dev", "prod"] = "prod") -> str:
|
|
49
49
|
return _normalize_base(dev)
|
50
50
|
|
51
51
|
raise Exception()
|
52
|
+
|
53
|
+
|
54
|
+
def _resolve_override_mode() -> str:
|
55
|
+
"""Return one of 'local', 'dev', 'prod' based on SYNTH_BACKEND_URL_OVERRIDE.
|
56
|
+
|
57
|
+
Defaults to 'prod' when unset or unrecognized.
|
58
|
+
"""
|
59
|
+
ov = (os.getenv("SYNTH_BACKEND_URL_OVERRIDE", "") or "").strip().lower()
|
60
|
+
if ov in {"local", "dev", "prod"}:
|
61
|
+
return ov
|
62
|
+
return "prod"
|
63
|
+
|
64
|
+
|
65
|
+
def get_backend_from_env() -> tuple[str, str]:
|
66
|
+
"""Resolve (base_url, api_key) using a simple LOCAL/DEV/PROD override scheme.
|
67
|
+
|
68
|
+
Env vars consulted:
|
69
|
+
- SYNTH_BACKEND_URL_OVERRIDE = local|dev|prod (case-insensitive)
|
70
|
+
- LOCAL_BACKEND_URL, TESTING_LOCAL_SYNTH_API_KEY
|
71
|
+
- DEV_BACKEND_URL, DEV_SYNTH_API_KEY
|
72
|
+
- PROD_BACKEND_URL, TESTING_PROD_SYNTH_API_KEY (fallback to SYNTH_API_KEY)
|
73
|
+
|
74
|
+
Base URL is normalized to end with '/api'.
|
75
|
+
Defaults: prod base URL → https://agent-learning.onrender.com/api
|
76
|
+
"""
|
77
|
+
mode = _resolve_override_mode()
|
78
|
+
if mode == "local":
|
79
|
+
base = os.getenv("LOCAL_BACKEND_URL", "http://localhost:8000")
|
80
|
+
key = os.getenv("TESTING_LOCAL_SYNTH_API_KEY", "")
|
81
|
+
return base.rstrip("/"), key
|
82
|
+
if mode == "dev":
|
83
|
+
base = os.getenv("DEV_BACKEND_URL", "") or "http://localhost:8000"
|
84
|
+
key = os.getenv("DEV_SYNTH_API_KEY", "")
|
85
|
+
return base.rstrip("/"), key
|
86
|
+
# prod
|
87
|
+
base = os.getenv("PROD_BACKEND_URL", f"{PROD_BASE_URL_DEFAULT}")
|
88
|
+
# Ensure we return the root (no trailing /api). If default includes /api, strip it.
|
89
|
+
base = base.rstrip("/")
|
90
|
+
if base.endswith("/api"):
|
91
|
+
base = base[: -len("/api")]
|
92
|
+
# Prefer explicit PROD key, then testing key, then generic fallback
|
93
|
+
key = (
|
94
|
+
os.getenv("PROD_SYNTH_API_KEY", "")
|
95
|
+
or os.getenv("TESTING_PROD_SYNTH_API_KEY", "")
|
96
|
+
or os.getenv("SYNTH_API_KEY", "")
|
97
|
+
)
|
98
|
+
return base, key
|
synth_ai/http.py
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from typing import Any, Dict, Optional
|
6
|
+
|
7
|
+
import aiohttp
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class HTTPError(Exception):
|
12
|
+
status: int
|
13
|
+
url: str
|
14
|
+
message: str
|
15
|
+
body_snippet: str | None = None
|
16
|
+
detail: Any | None = None
|
17
|
+
|
18
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
19
|
+
base = f"HTTP {self.status} for {self.url}: {self.message}"
|
20
|
+
if self.body_snippet:
|
21
|
+
base += f" | body[0:200]={self.body_snippet[:200]}"
|
22
|
+
return base
|
23
|
+
|
24
|
+
|
25
|
+
class AsyncHttpClient:
|
26
|
+
def __init__(self, base_url: str, api_key: str, timeout: float = 30.0) -> None:
|
27
|
+
self._base_url = base_url.rstrip("/")
|
28
|
+
self._api_key = api_key
|
29
|
+
self._timeout = aiohttp.ClientTimeout(total=timeout)
|
30
|
+
self._session: Optional[aiohttp.ClientSession] = None
|
31
|
+
|
32
|
+
async def __aenter__(self) -> "AsyncHttpClient":
|
33
|
+
if self._session is None:
|
34
|
+
headers = {"authorization": f"Bearer {self._api_key}"}
|
35
|
+
self._session = aiohttp.ClientSession(headers=headers, timeout=self._timeout)
|
36
|
+
return self
|
37
|
+
|
38
|
+
async def __aexit__(self, exc_type, exc, tb) -> None: # noqa: ANN001
|
39
|
+
if self._session is not None:
|
40
|
+
await self._session.close()
|
41
|
+
self._session = None
|
42
|
+
|
43
|
+
def _abs(self, path: str) -> str:
|
44
|
+
if path.startswith("http://") or path.startswith("https://"):
|
45
|
+
return path
|
46
|
+
# If base_url already ends with /api and path starts with /api, remove duplicate
|
47
|
+
if self._base_url.endswith("/api") and path.startswith("/api"):
|
48
|
+
path = path[4:] # Remove leading /api
|
49
|
+
return f"{self._base_url}/{path.lstrip('/')}"
|
50
|
+
|
51
|
+
async def get(self, path: str, *, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None) -> Any:
|
52
|
+
url = self._abs(path)
|
53
|
+
assert self._session is not None, "AsyncHttpClient must be used as an async context manager"
|
54
|
+
async with self._session.get(url, params=params, headers=headers) as resp:
|
55
|
+
return await self._handle_response(resp, url)
|
56
|
+
|
57
|
+
async def post_json(self, path: str, *, json: Dict[str, Any], headers: Optional[Dict[str, str]] = None) -> Any:
|
58
|
+
url = self._abs(path)
|
59
|
+
assert self._session is not None, "AsyncHttpClient must be used as an async context manager"
|
60
|
+
async with self._session.post(url, json=json, headers=headers) as resp:
|
61
|
+
return await self._handle_response(resp, url)
|
62
|
+
|
63
|
+
async def post_multipart(self, path: str, *, data: Dict[str, Any], files: Dict[str, tuple[str, bytes, str | None]], headers: Optional[Dict[str, str]] = None) -> Any:
|
64
|
+
url = self._abs(path)
|
65
|
+
assert self._session is not None, "AsyncHttpClient must be used as an async context manager"
|
66
|
+
form = aiohttp.FormData()
|
67
|
+
for k, v in data.items():
|
68
|
+
form.add_field(k, str(v))
|
69
|
+
for field, (filename, content, content_type) in files.items():
|
70
|
+
form.add_field(field, content, filename=filename, content_type=content_type or "application/octet-stream")
|
71
|
+
async with self._session.post(url, data=form, headers=headers) as resp:
|
72
|
+
return await self._handle_response(resp, url)
|
73
|
+
|
74
|
+
async def delete(self, path: str, *, headers: Optional[Dict[str, str]] = None) -> Any:
|
75
|
+
url = self._abs(path)
|
76
|
+
assert self._session is not None, "AsyncHttpClient must be used as an async context manager"
|
77
|
+
async with self._session.delete(url, headers=headers) as resp:
|
78
|
+
return await self._handle_response(resp, url)
|
79
|
+
|
80
|
+
async def _handle_response(self, resp: aiohttp.ClientResponse, url: str) -> Any:
|
81
|
+
text = await resp.text()
|
82
|
+
body_snippet = text[:200] if text else None
|
83
|
+
if 200 <= resp.status < 300:
|
84
|
+
ctype = resp.headers.get("content-type", "")
|
85
|
+
if "application/json" in ctype:
|
86
|
+
try:
|
87
|
+
return await resp.json()
|
88
|
+
except Exception:
|
89
|
+
# Fallback to text
|
90
|
+
return text
|
91
|
+
return text
|
92
|
+
# error
|
93
|
+
detail: Any | None = None
|
94
|
+
try:
|
95
|
+
detail = await resp.json()
|
96
|
+
except Exception:
|
97
|
+
detail = None
|
98
|
+
raise HTTPError(status=resp.status, url=url, message="request_failed", body_snippet=body_snippet, detail=detail)
|
99
|
+
|
100
|
+
|
101
|
+
async def sleep(seconds: float) -> None:
|
102
|
+
await asyncio.sleep(seconds)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Any, Dict
|
4
|
+
|
5
|
+
from ..http import AsyncHttpClient
|
6
|
+
|
7
|
+
|
8
|
+
class InferenceClient:
|
9
|
+
def __init__(self, base_url: str, api_key: str, *, timeout: float = 30.0) -> None:
|
10
|
+
self._base_url = base_url.rstrip("/")
|
11
|
+
self._api_key = api_key
|
12
|
+
self._timeout = timeout
|
13
|
+
|
14
|
+
async def create_chat_completion(self, *, model: str, messages: list[dict], **kwargs: Any) -> Dict[str, Any]:
|
15
|
+
body: Dict[str, Any] = {"model": model, "messages": messages}
|
16
|
+
body.update(kwargs)
|
17
|
+
async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
|
18
|
+
return await http.post_json("/v1/chat/completions", json=body)
|
19
|
+
|
20
|
+
|
synth_ai/jobs/client.py
ADDED
@@ -0,0 +1,246 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Any, Dict, Optional
|
4
|
+
|
5
|
+
from synth_ai.http import AsyncHttpClient
|
6
|
+
|
7
|
+
|
8
|
+
class FilesApi:
|
9
|
+
def __init__(self, http: AsyncHttpClient) -> None:
|
10
|
+
self._http = http
|
11
|
+
|
12
|
+
async def upload(self, *, filename: str, content: bytes, purpose: str, content_type: Optional[str] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]:
|
13
|
+
data = {"purpose": purpose}
|
14
|
+
files = {"file": (filename, content, content_type)}
|
15
|
+
headers = {"Idempotency-Key": idempotency_key} if idempotency_key else None
|
16
|
+
return await self._http.post_multipart("/api/files", data=data, files=files, headers=headers)
|
17
|
+
|
18
|
+
async def list(self, *, purpose: Optional[str] = None, after: Optional[str] = None, limit: int = 20) -> Dict[str, Any]:
|
19
|
+
params: Dict[str, Any] = {}
|
20
|
+
if purpose is not None:
|
21
|
+
params["purpose"] = purpose
|
22
|
+
if after is not None:
|
23
|
+
params["after"] = after
|
24
|
+
params["limit"] = limit
|
25
|
+
return await self._http.get("/api/files", params=params)
|
26
|
+
|
27
|
+
async def retrieve(self, file_id: str) -> Dict[str, Any]:
|
28
|
+
return await self._http.get(f"/api/files/{file_id}")
|
29
|
+
|
30
|
+
async def delete(self, file_id: str) -> Any:
|
31
|
+
return await self._http.delete(f"/api/files/{file_id}")
|
32
|
+
|
33
|
+
async def list_jobs(self, file_id: str, *, after: Optional[str] = None, limit: int = 20) -> Dict[str, Any]:
|
34
|
+
params: Dict[str, Any] = {"limit": limit}
|
35
|
+
if after is not None:
|
36
|
+
params["after"] = after
|
37
|
+
return await self._http.get(f"/api/files/{file_id}/jobs", params=params)
|
38
|
+
|
39
|
+
|
40
|
+
class SftJobsApi:
|
41
|
+
def __init__(self, http: AsyncHttpClient) -> None:
|
42
|
+
self._http = http
|
43
|
+
|
44
|
+
async def create(
|
45
|
+
self,
|
46
|
+
*,
|
47
|
+
training_file: str,
|
48
|
+
model: str,
|
49
|
+
validation_file: Optional[str] = None,
|
50
|
+
hyperparameters: Optional[Dict[str, Any]] = None,
|
51
|
+
suffix: Optional[str] = None,
|
52
|
+
integrations: Optional[Dict[str, Any]] = None,
|
53
|
+
metadata: Optional[Dict[str, Any]] = None,
|
54
|
+
idempotency_key: Optional[str] = None,
|
55
|
+
) -> Dict[str, Any]:
|
56
|
+
payload: Dict[str, Any] = {
|
57
|
+
"training_file": training_file,
|
58
|
+
"model": model,
|
59
|
+
}
|
60
|
+
if validation_file is not None:
|
61
|
+
payload["validation_file"] = validation_file
|
62
|
+
if hyperparameters is not None:
|
63
|
+
payload["hyperparameters"] = hyperparameters
|
64
|
+
if suffix is not None:
|
65
|
+
payload["suffix"] = suffix
|
66
|
+
if integrations is not None:
|
67
|
+
payload["integrations"] = integrations
|
68
|
+
if metadata is not None:
|
69
|
+
payload["metadata"] = metadata
|
70
|
+
headers = {"Idempotency-Key": idempotency_key} if idempotency_key else None
|
71
|
+
return await self._http.post_json("/api/sft/jobs", json=payload, headers=headers)
|
72
|
+
|
73
|
+
async def list(
|
74
|
+
self,
|
75
|
+
*,
|
76
|
+
status: Optional[str] = None,
|
77
|
+
model: Optional[str] = None,
|
78
|
+
file_id: Optional[str] = None,
|
79
|
+
created_after: Optional[int] = None,
|
80
|
+
created_before: Optional[int] = None,
|
81
|
+
after: Optional[str] = None,
|
82
|
+
limit: int = 20,
|
83
|
+
) -> Dict[str, Any]:
|
84
|
+
params: Dict[str, Any] = {"limit": limit}
|
85
|
+
if status is not None:
|
86
|
+
params["status"] = status
|
87
|
+
if model is not None:
|
88
|
+
params["model"] = model
|
89
|
+
if file_id is not None:
|
90
|
+
params["file_id"] = file_id
|
91
|
+
if created_after is not None:
|
92
|
+
params["created_after"] = created_after
|
93
|
+
if created_before is not None:
|
94
|
+
params["created_before"] = created_before
|
95
|
+
if after is not None:
|
96
|
+
params["after"] = after
|
97
|
+
return await self._http.get("/api/sft/jobs", params=params)
|
98
|
+
|
99
|
+
async def retrieve(self, job_id: str) -> Dict[str, Any]:
|
100
|
+
return await self._http.get(f"/api/sft/jobs/{job_id}")
|
101
|
+
|
102
|
+
async def cancel(self, job_id: str) -> Dict[str, Any]:
|
103
|
+
return await self._http.post_json(f"/api/sft/jobs/{job_id}/cancel", json={})
|
104
|
+
|
105
|
+
async def list_events(self, job_id: str, *, since_seq: int = 0, limit: int = 200) -> Dict[str, Any]:
|
106
|
+
params = {"since_seq": since_seq, "limit": limit}
|
107
|
+
return await self._http.get(f"/api/sft/jobs/{job_id}/events", params=params)
|
108
|
+
|
109
|
+
async def checkpoints(self, job_id: str, *, after: Optional[str] = None, limit: int = 10) -> Dict[str, Any]:
|
110
|
+
params: Dict[str, Any] = {"limit": limit}
|
111
|
+
if after is not None:
|
112
|
+
params["after"] = after
|
113
|
+
return await self._http.get(f"/api/sft/jobs/{job_id}/checkpoints", params=params)
|
114
|
+
|
115
|
+
|
116
|
+
class RlJobsApi:
|
117
|
+
def __init__(self, http: AsyncHttpClient) -> None:
|
118
|
+
self._http = http
|
119
|
+
|
120
|
+
async def create(
|
121
|
+
self,
|
122
|
+
*,
|
123
|
+
model: str,
|
124
|
+
endpoint_base_url: str,
|
125
|
+
trainer_id: str,
|
126
|
+
trainer: Optional[Dict[str, Any]] = None,
|
127
|
+
job_config_id: Optional[str] = None,
|
128
|
+
config: Optional[Dict[str, Any]] = None,
|
129
|
+
metadata: Optional[Dict[str, Any]] = None,
|
130
|
+
idempotency_key: Optional[str] = None,
|
131
|
+
) -> Dict[str, Any]:
|
132
|
+
payload: Dict[str, Any] = {
|
133
|
+
"model": model,
|
134
|
+
"endpoint_base_url": endpoint_base_url,
|
135
|
+
"trainer_id": trainer_id,
|
136
|
+
}
|
137
|
+
if trainer is not None:
|
138
|
+
payload["trainer"] = trainer
|
139
|
+
if job_config_id is not None:
|
140
|
+
payload["job_config_id"] = job_config_id
|
141
|
+
if config is not None:
|
142
|
+
payload["config"] = config
|
143
|
+
if metadata is not None:
|
144
|
+
payload["metadata"] = metadata
|
145
|
+
headers = {"Idempotency-Key": idempotency_key} if idempotency_key else None
|
146
|
+
return await self._http.post_json("/api/rl/jobs", json=payload, headers=headers)
|
147
|
+
|
148
|
+
async def list(
|
149
|
+
self,
|
150
|
+
*,
|
151
|
+
status: Optional[str] = None,
|
152
|
+
model: Optional[str] = None,
|
153
|
+
created_after: Optional[int] = None,
|
154
|
+
created_before: Optional[int] = None,
|
155
|
+
after: Optional[str] = None,
|
156
|
+
limit: int = 20,
|
157
|
+
) -> Dict[str, Any]:
|
158
|
+
params: Dict[str, Any] = {"limit": limit}
|
159
|
+
if status is not None:
|
160
|
+
params["status"] = status
|
161
|
+
if model is not None:
|
162
|
+
params["model"] = model
|
163
|
+
if created_after is not None:
|
164
|
+
params["created_after"] = created_after
|
165
|
+
if created_before is not None:
|
166
|
+
params["created_before"] = created_before
|
167
|
+
if after is not None:
|
168
|
+
params["after"] = after
|
169
|
+
return await self._http.get("/api/rl/jobs", params=params)
|
170
|
+
|
171
|
+
async def retrieve(self, job_id: str) -> Dict[str, Any]:
|
172
|
+
return await self._http.get(f"/api/rl/jobs/{job_id}")
|
173
|
+
|
174
|
+
async def cancel(self, job_id: str) -> Dict[str, Any]:
|
175
|
+
return await self._http.post_json(f"/api/rl/jobs/{job_id}/cancel", json={})
|
176
|
+
|
177
|
+
async def list_events(self, job_id: str, *, since_seq: int = 0, limit: int = 200) -> Dict[str, Any]:
|
178
|
+
params = {"since_seq": since_seq, "limit": limit}
|
179
|
+
return await self._http.get(f"/api/rl/jobs/{job_id}/events", params=params)
|
180
|
+
|
181
|
+
async def metrics(self, job_id: str, *, after_step: int = -1, limit: int = 200) -> Dict[str, Any]:
|
182
|
+
params = {"after_step": after_step, "limit": limit}
|
183
|
+
return await self._http.get(f"/api/rl/jobs/{job_id}/metrics", params=params)
|
184
|
+
|
185
|
+
|
186
|
+
class ModelsApi:
|
187
|
+
def __init__(self, http: AsyncHttpClient) -> None:
|
188
|
+
self._http = http
|
189
|
+
|
190
|
+
async def list(
|
191
|
+
self,
|
192
|
+
*,
|
193
|
+
source: Optional[str] = None,
|
194
|
+
base_model: Optional[str] = None,
|
195
|
+
status: Optional[str] = None,
|
196
|
+
after: Optional[str] = None,
|
197
|
+
limit: int = 20,
|
198
|
+
) -> Dict[str, Any]:
|
199
|
+
params: Dict[str, Any] = {"limit": limit}
|
200
|
+
if source is not None:
|
201
|
+
params["source"] = source
|
202
|
+
if base_model is not None:
|
203
|
+
params["base_model"] = base_model
|
204
|
+
if status is not None:
|
205
|
+
params["status"] = status
|
206
|
+
if after is not None:
|
207
|
+
params["after"] = after
|
208
|
+
return await self._http.get("/api/models", params=params)
|
209
|
+
|
210
|
+
async def retrieve(self, model_id: str) -> Dict[str, Any]:
|
211
|
+
return await self._http.get(f"/api/models/{model_id}")
|
212
|
+
|
213
|
+
async def delete(self, model_id: str) -> Any:
|
214
|
+
return await self._http.delete(f"/api/models/{model_id}")
|
215
|
+
|
216
|
+
async def list_jobs(self, model_id: str, *, after: Optional[str] = None, limit: int = 20) -> Dict[str, Any]:
|
217
|
+
params: Dict[str, Any] = {"limit": limit}
|
218
|
+
if after is not None:
|
219
|
+
params["after"] = after
|
220
|
+
return await self._http.get(f"/api/models/{model_id}/jobs", params=params)
|
221
|
+
|
222
|
+
|
223
|
+
class JobsClient:
|
224
|
+
"""High-level client aggregating job APIs.
|
225
|
+
|
226
|
+
Usage:
|
227
|
+
async with JobsClient(base_url, api_key) as c:
|
228
|
+
await c.files.list()
|
229
|
+
"""
|
230
|
+
|
231
|
+
def __init__(self, base_url: str, api_key: str, timeout: float = 30.0, http: Optional[AsyncHttpClient] = None) -> None:
|
232
|
+
self._base_url = base_url
|
233
|
+
self._api_key = api_key
|
234
|
+
self._timeout = timeout
|
235
|
+
self._http = http or AsyncHttpClient(base_url, api_key, timeout=timeout)
|
236
|
+
self.files = FilesApi(self._http)
|
237
|
+
self.sft = SftJobsApi(self._http)
|
238
|
+
self.rl = RlJobsApi(self._http)
|
239
|
+
self.models = ModelsApi(self._http)
|
240
|
+
|
241
|
+
async def __aenter__(self) -> "JobsClient":
|
242
|
+
await self._http.__aenter__()
|
243
|
+
return self
|
244
|
+
|
245
|
+
async def __aexit__(self, exc_type, exc, tb) -> None: # noqa: ANN001
|
246
|
+
await self._http.__aexit__(exc_type, exc, tb)
|
@@ -0,0 +1,24 @@
|
|
1
|
+
from .client import LearningClient
|
2
|
+
from .rl_client import RlClient
|
3
|
+
from .ft_client import FtClient
|
4
|
+
from .validators import validate_training_jsonl, validate_trainer_cfg_rl
|
5
|
+
from synth_ai.task import validate_task_app_url, task_app_health
|
6
|
+
from .health import backend_health, pricing_preflight, balance_autumn_normalized
|
7
|
+
from .sse import stream_events as stream_job_events
|
8
|
+
from .jobs import JobHandle, JobsApiResolver
|
9
|
+
|
10
|
+
__all__ = [
|
11
|
+
"LearningClient",
|
12
|
+
"RlClient",
|
13
|
+
"FtClient",
|
14
|
+
"validate_training_jsonl",
|
15
|
+
"validate_trainer_cfg_rl",
|
16
|
+
"validate_task_app_url",
|
17
|
+
"backend_health",
|
18
|
+
"task_app_health",
|
19
|
+
"pricing_preflight",
|
20
|
+
"balance_autumn_normalized",
|
21
|
+
"stream_job_events",
|
22
|
+
"JobHandle",
|
23
|
+
"JobsApiResolver",
|
24
|
+
]
|
@@ -0,0 +1,149 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Any, Callable, Dict, List, Optional
|
5
|
+
|
6
|
+
from ..http import AsyncHttpClient, HTTPError, sleep
|
7
|
+
|
8
|
+
|
9
|
+
class LearningClient:
|
10
|
+
def __init__(self, base_url: str, api_key: str, *, timeout: float = 30.0) -> None:
|
11
|
+
self._base_url = base_url.rstrip("/")
|
12
|
+
self._api_key = api_key
|
13
|
+
self._timeout = timeout
|
14
|
+
|
15
|
+
async def upload_training_file(self, path: str | Path, *, purpose: str = "fine-tune") -> str:
|
16
|
+
p = Path(path)
|
17
|
+
content = p.read_bytes()
|
18
|
+
async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
|
19
|
+
data = {"purpose": purpose}
|
20
|
+
files = {"file": (p.name, content, _infer_content_type(p.name))}
|
21
|
+
js = await http.post_multipart("/api/learning/files", data=data, files=files)
|
22
|
+
if not isinstance(js, dict) or "id" not in js:
|
23
|
+
raise HTTPError(status=500, url="/api/learning/files", message="invalid_upload_response", body_snippet=str(js)[:200])
|
24
|
+
return str(js["id"])
|
25
|
+
|
26
|
+
async def create_job(
|
27
|
+
self,
|
28
|
+
*,
|
29
|
+
training_type: str,
|
30
|
+
model: str,
|
31
|
+
training_file_id: str,
|
32
|
+
hyperparameters: Optional[Dict[str, Any]] = None,
|
33
|
+
metadata: Optional[Dict[str, Any]] = None,
|
34
|
+
) -> Dict[str, Any]:
|
35
|
+
body = {
|
36
|
+
"training_type": training_type,
|
37
|
+
"model": model,
|
38
|
+
"training_file_id": training_file_id,
|
39
|
+
"hyperparameters": hyperparameters or {},
|
40
|
+
"metadata": metadata or {},
|
41
|
+
}
|
42
|
+
async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
|
43
|
+
return await http.post_json("/api/learning/jobs", json=body)
|
44
|
+
|
45
|
+
async def start_job(self, job_id: str) -> Dict[str, Any]:
|
46
|
+
async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
|
47
|
+
return await http.post_json(f"/api/learning/jobs/{job_id}/start", json={})
|
48
|
+
|
49
|
+
async def get_job(self, job_id: str) -> Dict[str, Any]:
|
50
|
+
async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
|
51
|
+
return await http.get(f"/api/learning/jobs/{job_id}")
|
52
|
+
|
53
|
+
async def get_events(self, job_id: str, *, since_seq: int = 0, limit: int = 200) -> List[Dict[str, Any]]:
|
54
|
+
params = {"since_seq": since_seq, "limit": limit}
|
55
|
+
async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
|
56
|
+
js = await http.get(f"/api/learning/jobs/{job_id}/events", params=params)
|
57
|
+
if isinstance(js, dict) and isinstance(js.get("events"), list):
|
58
|
+
return js["events"]
|
59
|
+
return []
|
60
|
+
|
61
|
+
async def get_metrics(self, job_id: str, *, name: str | None = None, after_step: int | None = None, limit: int = 500, run_id: str | None = None) -> List[Dict[str, Any]]:
|
62
|
+
params: Dict[str, Any] = {"limit": limit}
|
63
|
+
if name is not None:
|
64
|
+
params["name"] = name
|
65
|
+
if after_step is not None:
|
66
|
+
params["after_step"] = after_step
|
67
|
+
if run_id is not None:
|
68
|
+
params["run_id"] = run_id
|
69
|
+
async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
|
70
|
+
js = await http.get(f"/api/learning/jobs/{job_id}/metrics", params=params)
|
71
|
+
if isinstance(js, dict) and isinstance(js.get("points"), list):
|
72
|
+
return js["points"]
|
73
|
+
return []
|
74
|
+
|
75
|
+
async def get_timeline(self, job_id: str, *, limit: int = 200) -> List[Dict[str, Any]]:
|
76
|
+
params = {"limit": limit}
|
77
|
+
async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
|
78
|
+
js = await http.get(f"/api/learning/jobs/{job_id}/timeline", params=params)
|
79
|
+
if isinstance(js, dict) and isinstance(js.get("events"), list):
|
80
|
+
return js["events"]
|
81
|
+
return []
|
82
|
+
|
83
|
+
async def poll_until_terminal(
|
84
|
+
self,
|
85
|
+
job_id: str,
|
86
|
+
*,
|
87
|
+
interval_seconds: float = 2.0,
|
88
|
+
max_seconds: float | None = 3600,
|
89
|
+
on_event: Callable[[Dict[str, Any]], None] | None = None,
|
90
|
+
) -> Dict[str, Any]:
|
91
|
+
last_seq = 0
|
92
|
+
elapsed = 0.0
|
93
|
+
while True:
|
94
|
+
# Events
|
95
|
+
events = await self.get_events(job_id, since_seq=last_seq, limit=200)
|
96
|
+
for e in events:
|
97
|
+
if isinstance(e, dict) and isinstance(e.get("seq"), int):
|
98
|
+
last_seq = max(last_seq, int(e["seq"]))
|
99
|
+
if on_event:
|
100
|
+
try:
|
101
|
+
on_event(e)
|
102
|
+
except Exception:
|
103
|
+
pass
|
104
|
+
|
105
|
+
# Status
|
106
|
+
job = await self.get_job(job_id)
|
107
|
+
status = str(job.get("status") or "").lower()
|
108
|
+
if status in {"succeeded", "failed", "canceled", "cancelled"}:
|
109
|
+
return job
|
110
|
+
|
111
|
+
# Sleep and time budget
|
112
|
+
await sleep(interval_seconds)
|
113
|
+
elapsed += interval_seconds
|
114
|
+
if max_seconds is not None and elapsed >= max_seconds:
|
115
|
+
raise TimeoutError(f"Polling timed out after {elapsed} seconds for job {job_id}")
|
116
|
+
|
117
|
+
# --- Optional diagnostics ---
|
118
|
+
async def pricing_preflight(self, *, job_type: str, gpu_type: str, estimated_seconds: float, container_count: int) -> Dict[str, Any]:
|
119
|
+
body = {
|
120
|
+
"job_type": job_type,
|
121
|
+
"gpu_type": gpu_type,
|
122
|
+
"estimated_seconds": float(estimated_seconds or 0.0),
|
123
|
+
"container_count": int(container_count or 1),
|
124
|
+
}
|
125
|
+
async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
|
126
|
+
js = await http.post_json("/api/v1/pricing/preflight", json=body)
|
127
|
+
if not isinstance(js, dict):
|
128
|
+
raise HTTPError(status=500, url="/api/v1/pricing/preflight", message="invalid_preflight_response", body_snippet=str(js)[:200])
|
129
|
+
return js
|
130
|
+
|
131
|
+
async def balance_autumn_normalized(self) -> Dict[str, Any]:
|
132
|
+
async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
|
133
|
+
js = await http.get("/api/v1/balance/autumn-normalized")
|
134
|
+
if not isinstance(js, dict):
|
135
|
+
raise HTTPError(status=500, url="/api/v1/balance/autumn-normalized", message="invalid_balance_response", body_snippet=str(js)[:200])
|
136
|
+
return js
|
137
|
+
|
138
|
+
|
139
|
+
def _infer_content_type(filename: str) -> str:
|
140
|
+
name = filename.lower()
|
141
|
+
if name.endswith(".jsonl"):
|
142
|
+
return "application/jsonl"
|
143
|
+
if name.endswith(".json"):
|
144
|
+
return "application/json"
|
145
|
+
if name.endswith(".txt"):
|
146
|
+
return "text/plain"
|
147
|
+
return "application/octet-stream"
|
148
|
+
|
149
|
+
|