trustmodel 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trustmodel/__init__.py +42 -0
- trustmodel/client.py +259 -0
- trustmodel/endpoints/__init__.py +3 -0
- trustmodel/endpoints/config.py +41 -0
- trustmodel/endpoints/credits.py +37 -0
- trustmodel/endpoints/evaluations.py +254 -0
- trustmodel/endpoints/models.py +84 -0
- trustmodel/exceptions.py +101 -0
- trustmodel/models/__init__.py +28 -0
- trustmodel/models/credits.py +31 -0
- trustmodel/models/evaluation.py +133 -0
- trustmodel/models/models.py +46 -0
- trustmodel/utils/__init__.py +30 -0
- trustmodel/utils/validation.py +72 -0
- trustmodel/utils/version.py +246 -0
- trustmodel-0.2.9.dist-info/METADATA +711 -0
- trustmodel-0.2.9.dist-info/RECORD +19 -0
- trustmodel-0.2.9.dist-info/WHEEL +4 -0
- trustmodel-0.2.9.dist-info/licenses/LICENSE +38 -0
trustmodel/__init__.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TrustModel Python SDK
|
|
3
|
+
|
|
4
|
+
Official Python SDK for the TrustModel AI evaluation platform.
|
|
5
|
+
Provides a simple interface to evaluate AI models and retrieve results.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
10
|
+
except ImportError:
|
|
11
|
+
# Python < 3.8 fallback
|
|
12
|
+
from importlib_metadata import ( # type: ignore[import-not-found, no-redef, assignment, unused-ignore]
|
|
13
|
+
PackageNotFoundError,
|
|
14
|
+
version,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from .client import TrustModelClient
|
|
18
|
+
from .exceptions import (
|
|
19
|
+
APIError,
|
|
20
|
+
AuthenticationError,
|
|
21
|
+
ConnectionValidationError,
|
|
22
|
+
InsufficientCreditsError,
|
|
23
|
+
RateLimitError,
|
|
24
|
+
TrustModelError,
|
|
25
|
+
ValidationError,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
__version__ = version("trustmodel")
|
|
30
|
+
except PackageNotFoundError:
|
|
31
|
+
# Package not installed, use fallback
|
|
32
|
+
__version__ = "0.1.0"
|
|
33
|
+
__all__ = [
|
|
34
|
+
"TrustModelClient",
|
|
35
|
+
"TrustModelError",
|
|
36
|
+
"AuthenticationError",
|
|
37
|
+
"APIError",
|
|
38
|
+
"RateLimitError",
|
|
39
|
+
"ValidationError",
|
|
40
|
+
"InsufficientCreditsError",
|
|
41
|
+
"ConnectionValidationError",
|
|
42
|
+
]
|
trustmodel/client.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main client class for the TrustModel SDK.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any, Optional
|
|
8
|
+
from urllib.parse import urljoin
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
from requests.adapters import HTTPAdapter
|
|
12
|
+
from urllib3.util.retry import Retry
|
|
13
|
+
|
|
14
|
+
from .endpoints.config import ConfigEndpoint
|
|
15
|
+
from .endpoints.credits import CreditsEndpoint
|
|
16
|
+
from .endpoints.evaluations import EvaluationsEndpoint
|
|
17
|
+
from .endpoints.models import ModelsEndpoint
|
|
18
|
+
from .exceptions import (
|
|
19
|
+
APIError,
|
|
20
|
+
AuthenticationError,
|
|
21
|
+
ConnectionValidationError,
|
|
22
|
+
InsufficientCreditsError,
|
|
23
|
+
RateLimitError,
|
|
24
|
+
TrustModelError,
|
|
25
|
+
)
|
|
26
|
+
from .utils.validation import validate_api_key
|
|
27
|
+
from .utils.version import get_user_agent, get_version
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TrustModelClient:
|
|
33
|
+
"""
|
|
34
|
+
Main client for interacting with the TrustModel API.
|
|
35
|
+
|
|
36
|
+
This client provides access to all TrustModel SDK endpoints for evaluating
|
|
37
|
+
AI models and retrieving results.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
api_key: Your TrustModel API key (starts with 'tm-')
|
|
41
|
+
base_url: Base URL for the TrustModel API (default: production URL)
|
|
42
|
+
timeout: Request timeout in seconds (default: 60)
|
|
43
|
+
max_retries: Maximum number of retry attempts (default: 3)
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
>>> import trustmodel
|
|
47
|
+
>>> client = trustmodel.TrustModelClient(api_key="tm-your_key_here")
|
|
48
|
+
>>> models = client.models.list()
|
|
49
|
+
>>> evaluation = client.evaluations.create(
|
|
50
|
+
... model_identifier="gpt-4",
|
|
51
|
+
... vendor_identifier="openai"
|
|
52
|
+
... )
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
api_key: str,
|
|
58
|
+
base_url: str = "https://api.trustmodel.ai",
|
|
59
|
+
timeout: int = 60,
|
|
60
|
+
max_retries: int = 3,
|
|
61
|
+
user_agent: Optional[str] = None,
|
|
62
|
+
) -> None:
|
|
63
|
+
# Validate API key
|
|
64
|
+
validate_api_key(api_key)
|
|
65
|
+
|
|
66
|
+
self._api_key = api_key
|
|
67
|
+
self._base_url = base_url.rstrip("/")
|
|
68
|
+
self._timeout = timeout
|
|
69
|
+
|
|
70
|
+
# Set up session with retries
|
|
71
|
+
self._session = requests.Session()
|
|
72
|
+
|
|
73
|
+
# Configure retry strategy
|
|
74
|
+
retry_strategy = Retry(
|
|
75
|
+
total=max_retries,
|
|
76
|
+
backoff_factor=1,
|
|
77
|
+
status_forcelist=[429, 500, 502, 503, 504],
|
|
78
|
+
allowed_methods=["HEAD", "GET", "OPTIONS", "POST"],
|
|
79
|
+
)
|
|
80
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
81
|
+
self._session.mount("http://", adapter)
|
|
82
|
+
self._session.mount("https://", adapter)
|
|
83
|
+
|
|
84
|
+
# Set default headers
|
|
85
|
+
self._session.headers.update(
|
|
86
|
+
{
|
|
87
|
+
"Authorization": f"Bearer {self._api_key}",
|
|
88
|
+
"Content-Type": "application/json",
|
|
89
|
+
"User-Agent": get_user_agent(user_agent),
|
|
90
|
+
"Accept": "application/json",
|
|
91
|
+
"X-SDK-Version": get_version(),
|
|
92
|
+
}
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Initialize endpoint handlers
|
|
96
|
+
self.models = ModelsEndpoint(self)
|
|
97
|
+
self.evaluations = EvaluationsEndpoint(self)
|
|
98
|
+
self.credits = CreditsEndpoint(self)
|
|
99
|
+
self.config = ConfigEndpoint(self)
|
|
100
|
+
|
|
101
|
+
def _request(
|
|
102
|
+
self,
|
|
103
|
+
method: str,
|
|
104
|
+
endpoint: str,
|
|
105
|
+
data: Optional[dict[str, Any]] = None,
|
|
106
|
+
params: Optional[dict[str, Any]] = None,
|
|
107
|
+
) -> dict[str, Any]:
|
|
108
|
+
"""
|
|
109
|
+
Make an HTTP request to the TrustModel API.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
method: HTTP method (GET, POST, etc.)
|
|
113
|
+
endpoint: API endpoint (without base URL)
|
|
114
|
+
data: Request body data
|
|
115
|
+
params: Query parameters
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Response data as dictionary
|
|
119
|
+
|
|
120
|
+
Raises:
|
|
121
|
+
Various TrustModelError subclasses based on the error type
|
|
122
|
+
"""
|
|
123
|
+
url = urljoin(f"{self._base_url}/", endpoint.lstrip("/"))
|
|
124
|
+
|
|
125
|
+
logger.debug(f"Making {method} request to {url}")
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
response = self._session.request(
|
|
129
|
+
method=method,
|
|
130
|
+
url=url,
|
|
131
|
+
json=data,
|
|
132
|
+
params=params,
|
|
133
|
+
timeout=self._timeout,
|
|
134
|
+
)
|
|
135
|
+
except requests.exceptions.Timeout as e:
|
|
136
|
+
raise TrustModelError("Request timed out") from e
|
|
137
|
+
except requests.exceptions.ConnectionError as e:
|
|
138
|
+
raise TrustModelError("Connection error - please check your internet connection") from e
|
|
139
|
+
except requests.exceptions.RequestException as e:
|
|
140
|
+
raise TrustModelError(f"Request failed: {str(e)}") from e
|
|
141
|
+
|
|
142
|
+
return self._handle_response(response)
|
|
143
|
+
|
|
144
|
+
def _handle_response(self, response: requests.Response) -> dict[str, Any]:
|
|
145
|
+
"""
|
|
146
|
+
Handle the API response and raise appropriate exceptions for errors.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
response: The HTTP response object
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Parsed response data
|
|
153
|
+
|
|
154
|
+
Raises:
|
|
155
|
+
Various TrustModelError subclasses based on the error type
|
|
156
|
+
"""
|
|
157
|
+
try:
|
|
158
|
+
response_data = response.json()
|
|
159
|
+
except json.JSONDecodeError:
|
|
160
|
+
response_data = {"detail": response.text or "Unknown error"}
|
|
161
|
+
|
|
162
|
+
# Handle successful responses
|
|
163
|
+
if 200 <= response.status_code < 300:
|
|
164
|
+
return dict(response_data)
|
|
165
|
+
|
|
166
|
+
# Handle error responses
|
|
167
|
+
error_message = response_data.get("detail", f"HTTP {response.status_code}")
|
|
168
|
+
error_code = response_data.get("code")
|
|
169
|
+
|
|
170
|
+
# Authentication errors
|
|
171
|
+
if response.status_code == 401:
|
|
172
|
+
raise AuthenticationError(
|
|
173
|
+
"Invalid API key. Please check your API key and try again.", response=response_data
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Rate limiting
|
|
177
|
+
if response.status_code == 429:
|
|
178
|
+
raise RateLimitError(
|
|
179
|
+
"Rate limit exceeded. Please wait and try again.",
|
|
180
|
+
status_code=response.status_code,
|
|
181
|
+
response=response_data,
|
|
182
|
+
error_code=error_code,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Insufficient credits
|
|
186
|
+
if response.status_code == 402 or error_code == "insufficient_credits":
|
|
187
|
+
credits_required = response_data.get("credits_required", 0)
|
|
188
|
+
credits_remaining = response_data.get("credits_remaining", 0)
|
|
189
|
+
raise InsufficientCreditsError(
|
|
190
|
+
error_message,
|
|
191
|
+
credits_required=credits_required,
|
|
192
|
+
credits_remaining=credits_remaining,
|
|
193
|
+
response=response_data,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Connection validation failed (BYOK or custom endpoint)
|
|
197
|
+
if error_code == "connection_validation_failed":
|
|
198
|
+
validation_details = response_data.get("validation_details", {})
|
|
199
|
+
raise ConnectionValidationError(
|
|
200
|
+
error_message,
|
|
201
|
+
status_code=response.status_code,
|
|
202
|
+
response=response_data,
|
|
203
|
+
validation_details=validation_details,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Other client errors (4xx)
|
|
207
|
+
if 400 <= response.status_code < 500:
|
|
208
|
+
raise APIError(
|
|
209
|
+
error_message,
|
|
210
|
+
status_code=response.status_code,
|
|
211
|
+
response=response_data,
|
|
212
|
+
error_code=error_code,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Server errors (5xx)
|
|
216
|
+
if response.status_code >= 500:
|
|
217
|
+
raise APIError(
|
|
218
|
+
f"Server error: {error_message}",
|
|
219
|
+
status_code=response.status_code,
|
|
220
|
+
response=response_data,
|
|
221
|
+
error_code=error_code,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Fallback for unexpected status codes
|
|
225
|
+
raise APIError(
|
|
226
|
+
f"Unexpected response: {error_message}",
|
|
227
|
+
status_code=response.status_code,
|
|
228
|
+
response=response_data,
|
|
229
|
+
error_code=error_code,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
def get(self, endpoint: str, params: Optional[dict[str, Any]] = None) -> dict[str, Any]:
|
|
233
|
+
"""Make a GET request."""
|
|
234
|
+
return self._request("GET", endpoint, params=params)
|
|
235
|
+
|
|
236
|
+
def post(self, endpoint: str, data: Optional[dict[str, Any]] = None) -> dict[str, Any]:
|
|
237
|
+
"""Make a POST request."""
|
|
238
|
+
return self._request("POST", endpoint, data=data)
|
|
239
|
+
|
|
240
|
+
def put(self, endpoint: str, data: Optional[dict[str, Any]] = None) -> dict[str, Any]:
|
|
241
|
+
"""Make a PUT request."""
|
|
242
|
+
return self._request("PUT", endpoint, data=data)
|
|
243
|
+
|
|
244
|
+
def delete(self, endpoint: str) -> dict[str, Any]:
|
|
245
|
+
"""Make a DELETE request."""
|
|
246
|
+
return self._request("DELETE", endpoint)
|
|
247
|
+
|
|
248
|
+
def close(self) -> None:
|
|
249
|
+
"""Close the underlying HTTP session."""
|
|
250
|
+
if self._session:
|
|
251
|
+
self._session.close()
|
|
252
|
+
|
|
253
|
+
def __enter__(self) -> "TrustModelClient":
|
|
254
|
+
"""Support for context manager."""
|
|
255
|
+
return self
|
|
256
|
+
|
|
257
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
258
|
+
"""Support for context manager."""
|
|
259
|
+
self.close()
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration endpoints for the TrustModel SDK.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from ..models.evaluation import EvaluationConfig
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from ..client import TrustModelClient
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ConfigEndpoint:
|
|
14
|
+
"""
|
|
15
|
+
Interface for configuration endpoints.
|
|
16
|
+
|
|
17
|
+
Provides methods to discover available options for evaluations.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, client: "TrustModelClient") -> None:
|
|
21
|
+
self._client = client
|
|
22
|
+
|
|
23
|
+
def get(self) -> EvaluationConfig:
|
|
24
|
+
"""
|
|
25
|
+
Get SDK configuration options.
|
|
26
|
+
|
|
27
|
+
Returns all available configuration options including application types,
|
|
28
|
+
user personas, categories, and default values.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Configuration options
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
>>> config = client.config.get()
|
|
35
|
+
>>> print("Available application types:")
|
|
36
|
+
>>> for app_type in config.application_types:
|
|
37
|
+
... print(f" {app_type['id']}: {app_type['name']}")
|
|
38
|
+
>>> print(f"Credits per category: {config.credits_per_category}")
|
|
39
|
+
"""
|
|
40
|
+
response = self._client.get("/sdk/v1/config/")
|
|
41
|
+
return EvaluationConfig(**response)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Credits endpoint wrapper for the TrustModel SDK.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from ..models.credits import Credits
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from ..client import TrustModelClient
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CreditsEndpoint:
|
|
14
|
+
"""
|
|
15
|
+
Interface for credit-related API endpoints.
|
|
16
|
+
|
|
17
|
+
Provides methods to check API key credit balance and usage.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, client: "TrustModelClient") -> None:
|
|
21
|
+
self._client = client
|
|
22
|
+
|
|
23
|
+
def get_balance(self) -> Credits:
|
|
24
|
+
"""
|
|
25
|
+
Get current credit balance for the API key.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Credit information including balance, usage, and limits
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
>>> credits = client.credits.get_balance()
|
|
32
|
+
>>> print(f"Credits remaining: {credits.credits_remaining}")
|
|
33
|
+
>>> print(f"Credits used: {credits.credits_used}")
|
|
34
|
+
>>> print(f"Credit limit: {credits.credit_limit}")
|
|
35
|
+
"""
|
|
36
|
+
response = self._client.get("/sdk/v1/credits/")
|
|
37
|
+
return Credits(**response)
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Evaluations endpoint wrapper for the TrustModel SDK.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
6
|
+
|
|
7
|
+
from ..models.evaluation import Evaluation
|
|
8
|
+
from ..utils.validation import validate_categories
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from ..client import TrustModelClient
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class EvaluationsEndpoint:
|
|
15
|
+
"""
|
|
16
|
+
Interface for evaluation-related API endpoints.
|
|
17
|
+
|
|
18
|
+
Provides methods to create, list, and monitor AI model evaluations.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, client: "TrustModelClient") -> None:
|
|
22
|
+
self._client = client
|
|
23
|
+
|
|
24
|
+
def create(
|
|
25
|
+
self,
|
|
26
|
+
model_identifier: str,
|
|
27
|
+
vendor_identifier: str,
|
|
28
|
+
api_key: Optional[str] = None,
|
|
29
|
+
categories: Optional[list[str]] = None,
|
|
30
|
+
model_config_name: Optional[str] = None,
|
|
31
|
+
**kwargs: Any,
|
|
32
|
+
) -> Evaluation:
|
|
33
|
+
"""
|
|
34
|
+
Create a new model evaluation.
|
|
35
|
+
|
|
36
|
+
This method creates an evaluation for a public model using either TrustModel's
|
|
37
|
+
platform key or your own API key (BYOK).
|
|
38
|
+
|
|
39
|
+
Note:
|
|
40
|
+
When using BYOK (providing api_key), the API validates the connection
|
|
41
|
+
to the vendor before creating the evaluation. If validation fails,
|
|
42
|
+
a ConnectionValidationError is raised with details about the failure.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
model_identifier: Model identifier (e.g., "gpt-4")
|
|
46
|
+
vendor_identifier: Vendor identifier (e.g., "openai")
|
|
47
|
+
api_key: Your API key for BYOK evaluation (optional). When provided,
|
|
48
|
+
connection is validated before evaluation creation.
|
|
49
|
+
categories: Evaluation categories (optional, defaults to all)
|
|
50
|
+
model_config_name: Custom name for this evaluation (optional)
|
|
51
|
+
**kwargs: Additional evaluation parameters (application_type, user_personas, etc.)
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Created evaluation details
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
ConnectionValidationError: If BYOK connection validation fails (invalid API key,
|
|
58
|
+
unreachable endpoint, etc.). Check validation_details for specifics.
|
|
59
|
+
InsufficientCreditsError: If account has insufficient credits
|
|
60
|
+
APIError: For other API errors
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
>>> # Evaluate with TrustModel platform key
|
|
64
|
+
>>> evaluation = client.evaluations.create(
|
|
65
|
+
... model_identifier="gpt-4",
|
|
66
|
+
... vendor_identifier="openai",
|
|
67
|
+
... categories=["safety", "bias"]
|
|
68
|
+
... )
|
|
69
|
+
|
|
70
|
+
>>> # Evaluate with your own API key (BYOK)
|
|
71
|
+
>>> evaluation = client.evaluations.create(
|
|
72
|
+
... model_identifier="gpt-4",
|
|
73
|
+
... vendor_identifier="openai",
|
|
74
|
+
... api_key="sk-your-openai-key",
|
|
75
|
+
... categories=["safety", "bias"]
|
|
76
|
+
... )
|
|
77
|
+
"""
|
|
78
|
+
validate_categories(categories)
|
|
79
|
+
|
|
80
|
+
# Build request data
|
|
81
|
+
request_data: dict[str, Any] = {
|
|
82
|
+
"model_identifier": model_identifier,
|
|
83
|
+
"vendor_identifier": vendor_identifier,
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if api_key:
|
|
87
|
+
request_data["api_key"] = api_key
|
|
88
|
+
|
|
89
|
+
if categories:
|
|
90
|
+
request_data["categories"] = categories
|
|
91
|
+
|
|
92
|
+
if model_config_name:
|
|
93
|
+
request_data["model_config_name"] = model_config_name
|
|
94
|
+
|
|
95
|
+
# Add additional parameters
|
|
96
|
+
for key, value in kwargs.items():
|
|
97
|
+
if value is not None:
|
|
98
|
+
request_data[key] = value
|
|
99
|
+
|
|
100
|
+
response = self._client.post("/sdk/v1/evaluate/", data=request_data)
|
|
101
|
+
return Evaluation(**response)
|
|
102
|
+
|
|
103
|
+
def create_custom_endpoint(
|
|
104
|
+
self,
|
|
105
|
+
api_endpoint: str,
|
|
106
|
+
api_key: str,
|
|
107
|
+
model_identifier: str,
|
|
108
|
+
vendor_identifier: str = "openai",
|
|
109
|
+
model_name: Optional[str] = None,
|
|
110
|
+
model_config_name: Optional[str] = None,
|
|
111
|
+
**kwargs: Any,
|
|
112
|
+
) -> Evaluation:
|
|
113
|
+
"""
|
|
114
|
+
Create an evaluation for a custom OpenAI-compatible endpoint.
|
|
115
|
+
|
|
116
|
+
Note:
|
|
117
|
+
The API validates the connection to your custom endpoint before creating
|
|
118
|
+
the evaluation. If validation fails, a ConnectionValidationError is raised
|
|
119
|
+
with details about the failure.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
api_endpoint: Your OpenAI-compatible API endpoint URL
|
|
123
|
+
api_key: API key for your endpoint
|
|
124
|
+
model_identifier: Model identifier to use in API calls
|
|
125
|
+
vendor_identifier: Vendor identifier for the custom endpoint. Determines
|
|
126
|
+
which validator is used. Defaults to "openai".
|
|
127
|
+
Available options:
|
|
128
|
+
- "openai": Others/OpenAI-compatible (default, works for Ollama, vLLM, etc.)
|
|
129
|
+
- "huggingface": Hugging Face endpoints
|
|
130
|
+
- "azure_ai": Azure AI endpoints
|
|
131
|
+
- "xai": Google Vertex AI endpoints
|
|
132
|
+
- "bedrock": AWS Bedrock endpoints
|
|
133
|
+
model_name: Human-readable model name (optional)
|
|
134
|
+
model_config_name: Custom name for this evaluation (optional)
|
|
135
|
+
**kwargs: Additional evaluation parameters
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Created evaluation details
|
|
139
|
+
|
|
140
|
+
Raises:
|
|
141
|
+
ConnectionValidationError: If endpoint connection validation fails (invalid
|
|
142
|
+
API key, unreachable endpoint, incompatible API, invalid vendor, etc.).
|
|
143
|
+
Check validation_details for specifics.
|
|
144
|
+
InsufficientCreditsError: If account has insufficient credits
|
|
145
|
+
APIError: For other API errors
|
|
146
|
+
|
|
147
|
+
Example:
|
|
148
|
+
>>> # OpenAI-compatible endpoint (Ollama, vLLM, etc.)
|
|
149
|
+
>>> evaluation = client.evaluations.create_custom_endpoint(
|
|
150
|
+
... api_endpoint="https://api.yourcompany.com/v1",
|
|
151
|
+
... api_key="your-api-key",
|
|
152
|
+
... model_identifier="custom-model-v1",
|
|
153
|
+
... model_name="My Custom Model"
|
|
154
|
+
... )
|
|
155
|
+
|
|
156
|
+
>>> # Azure AI endpoint
|
|
157
|
+
>>> evaluation = client.evaluations.create_custom_endpoint(
|
|
158
|
+
... api_endpoint="https://your-resource.openai.azure.com",
|
|
159
|
+
... api_key="your-azure-key",
|
|
160
|
+
... model_identifier="gpt-4",
|
|
161
|
+
... vendor_identifier="azure_ai"
|
|
162
|
+
... )
|
|
163
|
+
"""
|
|
164
|
+
request_data: dict[str, Any] = {
|
|
165
|
+
"api_endpoint": api_endpoint,
|
|
166
|
+
"api_key": api_key,
|
|
167
|
+
"model_identifier": model_identifier,
|
|
168
|
+
"vendor_identifier": vendor_identifier,
|
|
169
|
+
"evaluation_type": "custom", # Always custom for SDK
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if model_name:
|
|
173
|
+
request_data["model_name"] = model_name
|
|
174
|
+
|
|
175
|
+
if model_config_name:
|
|
176
|
+
request_data["model_config_name"] = model_config_name
|
|
177
|
+
|
|
178
|
+
# Add additional parameters
|
|
179
|
+
for key, value in kwargs.items():
|
|
180
|
+
if value is not None:
|
|
181
|
+
request_data[key] = value
|
|
182
|
+
|
|
183
|
+
response = self._client.post("/sdk/v1/evaluate/", data=request_data)
|
|
184
|
+
return Evaluation(**response)
|
|
185
|
+
|
|
186
|
+
def list(self, status: Optional[str] = None) -> list[Evaluation]:
|
|
187
|
+
"""
|
|
188
|
+
List all evaluations created with this API key.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
status: Optional status filter ("processing", "running", "completed", "failed")
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
List of evaluations
|
|
195
|
+
|
|
196
|
+
Example:
|
|
197
|
+
>>> # Get all evaluations
|
|
198
|
+
>>> evaluations = client.evaluations.list()
|
|
199
|
+
|
|
200
|
+
>>> # Get only completed evaluations
|
|
201
|
+
>>> completed = client.evaluations.list(status="completed")
|
|
202
|
+
"""
|
|
203
|
+
params = {}
|
|
204
|
+
if status:
|
|
205
|
+
params["status"] = status
|
|
206
|
+
|
|
207
|
+
response = self._client.get("/sdk/v1/evaluations/", params=params)
|
|
208
|
+
|
|
209
|
+
# Handle different response formats between production and development
|
|
210
|
+
if "results" in response:
|
|
211
|
+
# Production format: {"results": [...]}
|
|
212
|
+
eval_data = response["results"]
|
|
213
|
+
else:
|
|
214
|
+
# Development format or fallback: try "evaluations" key, then empty list
|
|
215
|
+
eval_data = response.get("evaluations", [])
|
|
216
|
+
|
|
217
|
+
return [Evaluation(**eval) for eval in eval_data]
|
|
218
|
+
|
|
219
|
+
def get(self, evaluation_id: int) -> Evaluation:
|
|
220
|
+
"""
|
|
221
|
+
Get detailed information about a specific evaluation.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
evaluation_id: ID of the evaluation
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Detailed evaluation information including scores if completed
|
|
228
|
+
|
|
229
|
+
Example:
|
|
230
|
+
>>> evaluation = client.evaluations.get(123)
|
|
231
|
+
>>> if evaluation.status == "completed":
|
|
232
|
+
... print(f"Overall score: {evaluation.overall_score}")
|
|
233
|
+
... for score in evaluation.scores:
|
|
234
|
+
... print(f"{score.category}: {score.score}")
|
|
235
|
+
"""
|
|
236
|
+
response = self._client.get(f"/sdk/v1/evaluations/{evaluation_id}/")
|
|
237
|
+
return Evaluation(**response)
|
|
238
|
+
|
|
239
|
+
def get_status(self, evaluation_id: int) -> dict:
|
|
240
|
+
"""
|
|
241
|
+
Get quick status information for an evaluation.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
evaluation_id: ID of the evaluation
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
Status information dictionary
|
|
248
|
+
|
|
249
|
+
Example:
|
|
250
|
+
>>> status = client.evaluations.get_status(123)
|
|
251
|
+
>>> print(f"Status: {status['status']}")
|
|
252
|
+
>>> print(f"Progress: {status['completion_percentage']}%")
|
|
253
|
+
"""
|
|
254
|
+
return self._client.get(f"/sdk/v1/evaluations/{evaluation_id}/status/")
|