trismik 0.9.12__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
trismik/client_async.py DELETED
@@ -1,405 +0,0 @@
1
- """
2
- Trismik async client for interacting with the Trismik API.
3
-
4
- This module provides an asynchronous client for interacting with the Trismik
5
- API. It uses httpx for making HTTP requests.
6
- """
7
-
8
- from typing import List, Optional
9
-
10
- import httpx
11
-
12
- from trismik._mapper import TrismikResponseMapper
13
- from trismik._utils import TrismikUtils
14
- from trismik.exceptions import (
15
- TrismikApiError,
16
- TrismikPayloadTooLargeError,
17
- TrismikValidationError,
18
- )
19
- from trismik.settings import client_settings, environment_settings
20
- from trismik.types import (
21
- TrismikClassicEvalRequest,
22
- TrismikClassicEvalResponse,
23
- TrismikDataset,
24
- TrismikMeResponse,
25
- TrismikProject,
26
- TrismikReplayRequest,
27
- TrismikReplayResponse,
28
- TrismikRunMetadata,
29
- TrismikRunResponse,
30
- TrismikRunSummary,
31
- )
32
-
33
-
34
- class TrismikAsyncClient:
35
- """
36
- Asynchronous client for the Trismik API.
37
-
38
- This class provides an asynchronous interface to interact with the Trismik
39
- API, handling authentication, dataset runs, and responses.
40
- """
41
-
42
- def __init__(
43
- self,
44
- service_url: Optional[str] = None,
45
- api_key: Optional[str] = None,
46
- http_client: Optional[httpx.AsyncClient] = None,
47
- ) -> None:
48
- """
49
- Initialize the Trismik async client.
50
-
51
- Args:
52
- service_url (Optional[str]): URL of the Trismik service.
53
- api_key (Optional[str]): API key for the Trismik service.
54
- http_client (Optional[httpx.AsyncClient]): HTTP client to use for
55
- requests.
56
-
57
- Raises:
58
- TrismikError: If service_url or api_key are not provided and not
59
- found in environment.
60
- TrismikApiError: If API request fails.
61
- """
62
- self._service_url = TrismikUtils.option(
63
- service_url,
64
- client_settings["endpoint"],
65
- environment_settings["trismik_service_url"],
66
- )
67
- self._api_key = TrismikUtils.required_option(
68
- api_key, "api_key", environment_settings["trismik_api_key"]
69
- )
70
-
71
- # Set default headers with API key
72
- default_headers = {"x-api-key": self._api_key}
73
-
74
- self._http_client = http_client or httpx.AsyncClient(
75
- base_url=self._service_url, headers=default_headers, timeout=30.0
76
- )
77
-
78
- def _handle_http_error(self, e: httpx.HTTPStatusError) -> Exception:
79
- """
80
- Handle HTTP errors and return appropriate Trismik exceptions.
81
-
82
- Args:
83
- e (httpx.HTTPStatusError): The HTTP status error to handle.
84
-
85
- Returns:
86
- Exception: The appropriate Trismik exception to raise.
87
- """
88
- if e.response.status_code == 413:
89
- # Handle payload too large error specifically
90
- try:
91
- backend_message = e.response.json().get(
92
- "detail", "Payload too large."
93
- )
94
- except Exception:
95
- backend_message = "Payload too large."
96
- return TrismikPayloadTooLargeError(backend_message)
97
- elif e.response.status_code == 422:
98
- # Handle validation error specifically
99
- try:
100
- backend_message = e.response.json().get(
101
- "detail", "Validation failed."
102
- )
103
- except Exception:
104
- backend_message = "Validation failed."
105
- return TrismikValidationError(backend_message)
106
- else:
107
- return TrismikApiError(TrismikUtils.get_error_message(e.response))
108
-
109
- async def list_datasets(self) -> List[TrismikDataset]:
110
- """
111
- Get a list of available datasets.
112
-
113
- Returns:
114
- List[TrismikDataset]: List of available datasets.
115
-
116
- Raises:
117
- TrismikApiError: If API request fails.
118
- """
119
- try:
120
- url = "/datasets"
121
- response = await self._http_client.get(url)
122
- response.raise_for_status()
123
- json = response.json()
124
- return TrismikResponseMapper.to_datasets(json)
125
- except httpx.HTTPStatusError as e:
126
- raise TrismikApiError(
127
- TrismikUtils.get_error_message(e.response)
128
- ) from e
129
- except httpx.HTTPError as e:
130
- raise TrismikApiError(str(e)) from e
131
-
132
- async def start_run(
133
- self,
134
- dataset_id: str,
135
- project_id: str,
136
- experiment: str,
137
- metadata: Optional[TrismikRunMetadata] = None,
138
- ) -> TrismikRunResponse:
139
- """
140
- Start a new run for a dataset and get the first item.
141
-
142
- Args:
143
- dataset_id (str): ID of the dataset.
144
- project_id (str): ID of the project.
145
- experiment (str): Name of the experiment.
146
- metadata (Optional[TrismikRunMetadata]): Run metadata.
147
-
148
- Returns:
149
- TrismikRunResponse: Run response.
150
-
151
- Raises:
152
- TrismikPayloadTooLargeError: If the request payload exceeds the
153
- server's size limit.
154
- TrismikApiError: If API request fails.
155
- """
156
- try:
157
- url = "/runs/start"
158
- body = {
159
- "datasetId": dataset_id,
160
- "projectId": project_id,
161
- "experiment": experiment,
162
- "metadata": metadata.toDict() if metadata else {},
163
- }
164
- response = await self._http_client.post(url, json=body)
165
- response.raise_for_status()
166
- json = response.json()
167
- return TrismikResponseMapper.to_run_response(json)
168
- except httpx.HTTPStatusError as e:
169
- raise self._handle_http_error(e) from e
170
- except httpx.HTTPError as e:
171
- raise TrismikApiError(str(e)) from e
172
-
173
- async def continue_run(
174
- self, run_id: str, item_choice_id: str
175
- ) -> TrismikRunResponse:
176
- """
177
- Continue a run: respond to the current item and get the next one.
178
-
179
- Args:
180
- run_id (str): ID of the run.
181
- item_choice_id (str): ID of the chosen item response.
182
-
183
- Returns:
184
- TrismikRunResponse: Run response.
185
-
186
- Raises:
187
- TrismikApiError: If API request fails.
188
- """
189
- try:
190
- url = "/runs/continue"
191
- body = {"itemChoiceId": item_choice_id, "runId": run_id}
192
- response = await self._http_client.post(url, json=body)
193
- response.raise_for_status()
194
- json = response.json()
195
- return TrismikResponseMapper.to_run_response(json)
196
- except httpx.HTTPStatusError as e:
197
- raise TrismikApiError(
198
- TrismikUtils.get_error_message(e.response)
199
- ) from e
200
- except httpx.HTTPError as e:
201
- raise TrismikApiError(str(e)) from e
202
-
203
- async def run_summary(self, run_id: str) -> TrismikRunSummary:
204
- """
205
- Get run summary including responses, dataset, and state.
206
-
207
- Args:
208
- run_id (str): ID of the run.
209
-
210
- Returns:
211
- TrismikRunSummary: Complete run summary with responses,
212
- dataset, state, and metadata.
213
-
214
- Raises:
215
- TrismikApiError: If API request fails.
216
- """
217
- try:
218
- url = f"/runs/adaptive/{run_id}"
219
- response = await self._http_client.get(url)
220
- response.raise_for_status()
221
- json = response.json()
222
- return TrismikResponseMapper.to_run_summary(json)
223
- except httpx.HTTPStatusError as e:
224
- raise TrismikApiError(
225
- TrismikUtils.get_error_message(e.response)
226
- ) from e
227
- except httpx.HTTPError as e:
228
- raise TrismikApiError(str(e)) from e
229
-
230
- async def submit_replay(
231
- self,
232
- run_id: str,
233
- replay_request: TrismikReplayRequest,
234
- metadata: Optional[TrismikRunMetadata] = None,
235
- ) -> TrismikReplayResponse:
236
- """
237
- Submit a replay of a run with specific responses.
238
-
239
- Args:
240
- run_id (str): ID of the run to replay.
241
- replay_request (TrismikReplayRequest): Request containing responses
242
- to submit.
243
- metadata (Optional[TrismikRunMetadata]): Run metadata.
244
-
245
- Returns:
246
- TrismikReplayResponse: Response from the replay endpoint.
247
-
248
- Raises:
249
- TrismikPayloadTooLargeError: If the request payload exceeds the
250
- server's size limit.
251
- TrismikValidationError: If the request fails validation (e.g.,
252
- duplicate item IDs, unknown item IDs).
253
- TrismikApiError: If API request fails.
254
- """
255
- try:
256
- url = f"runs/{run_id}/replay"
257
-
258
- # Convert TrismikReplayRequestItem objects to dictionaries
259
- responses_dict = [
260
- {"itemId": item.itemId, "itemChoiceId": item.itemChoiceId}
261
- for item in replay_request.responses
262
- ]
263
-
264
- body = {
265
- "responses": responses_dict,
266
- "metadata": metadata.toDict() if metadata else {},
267
- }
268
- response = await self._http_client.post(url, json=body)
269
- response.raise_for_status()
270
- json = response.json()
271
- return TrismikResponseMapper.to_replay_response(json)
272
- except httpx.HTTPStatusError as e:
273
- raise self._handle_http_error(e) from e
274
- except httpx.HTTPError as e:
275
- raise TrismikApiError(str(e)) from e
276
-
277
- async def me(self) -> TrismikMeResponse:
278
- """
279
- Get current user information.
280
-
281
- Returns:
282
- TrismikMeResponse: User information including validity and payload.
283
-
284
- Raises:
285
- TrismikApiError: If API request fails.
286
- """
287
- try:
288
- url = "../admin/api-keys/me"
289
- response = await self._http_client.get(url)
290
- response.raise_for_status()
291
- json = response.json()
292
- return TrismikResponseMapper.to_me_response(json)
293
- except httpx.HTTPStatusError as e:
294
- raise TrismikApiError(
295
- TrismikUtils.get_error_message(e.response)
296
- ) from e
297
- except httpx.HTTPError as e:
298
- raise TrismikApiError(str(e)) from e
299
-
300
- async def submit_classic_eval(
301
- self, classic_eval_request: TrismikClassicEvalRequest
302
- ) -> TrismikClassicEvalResponse:
303
- """
304
- Submit a classic evaluation run with pre-computed results.
305
-
306
- Args:
307
- classic_eval_request (TrismikClassicEvalRequest): Request containing
308
- project info, dataset, model outputs, and metrics.
309
-
310
- Returns:
311
- TrismikClassicEvalResponse: Response from the classic evaluation
312
- endpoint.
313
-
314
- Raises:
315
- TrismikPayloadTooLargeError: If the request payload exceeds the
316
- server's size limit.
317
- TrismikValidationError: If the request fails validation.
318
- TrismikApiError: If API request fails.
319
- """
320
- try:
321
- url = "/runs/classic"
322
-
323
- # Convert request object to dictionary
324
- items_dict = [
325
- {
326
- "datasetItemId": item.datasetItemId,
327
- "modelInput": item.modelInput,
328
- "modelOutput": item.modelOutput,
329
- "goldOutput": item.goldOutput,
330
- "metrics": item.metrics,
331
- }
332
- for item in classic_eval_request.items
333
- ]
334
-
335
- metrics_dict = [
336
- {
337
- "metricId": metric.metricId,
338
- "valueType": TrismikUtils.metric_value_to_type(
339
- metric.value
340
- ),
341
- "value": metric.value,
342
- }
343
- for metric in classic_eval_request.metrics
344
- ]
345
-
346
- body = {
347
- "projectId": classic_eval_request.projectId,
348
- "experimentName": classic_eval_request.experimentName,
349
- "datasetId": classic_eval_request.datasetId,
350
- "modelName": classic_eval_request.modelName,
351
- "hyperparameters": classic_eval_request.hyperparameters,
352
- "items": items_dict,
353
- "metrics": metrics_dict,
354
- }
355
-
356
- response = await self._http_client.post(url, json=body)
357
- response.raise_for_status()
358
- json = response.json()
359
- return TrismikResponseMapper.to_classic_eval_response(json)
360
- except httpx.HTTPStatusError as e:
361
- raise self._handle_http_error(e) from e
362
- except httpx.HTTPError as e:
363
- raise TrismikApiError(str(e)) from e
364
-
365
- async def create_project(
366
- self,
367
- name: str,
368
- team_id: Optional[str] = None,
369
- description: Optional[str] = None,
370
- ) -> TrismikProject:
371
- """
372
- Create a new project.
373
-
374
- Args:
375
- name (str): Name of the project.
376
- team_id (Optional[str]): ID of the team to create the
377
- project in.
378
- description (Optional[str]): Optional description of the project.
379
-
380
- Returns:
381
- TrismikProject: Created project information.
382
-
383
- Raises:
384
- TrismikValidationError: If the request fails validation.
385
- TrismikApiError: If API request fails.
386
- """
387
- try:
388
- url = "../admin/public/projects"
389
-
390
- body = {
391
- "name": name,
392
- }
393
- if team_id is not None:
394
- body["teamId"] = team_id
395
- if description is not None:
396
- body["description"] = description
397
-
398
- response = await self._http_client.post(url, json=body)
399
- response.raise_for_status()
400
- json = response.json()
401
- return TrismikResponseMapper.to_project(json)
402
- except httpx.HTTPStatusError as e:
403
- raise self._handle_http_error(e) from e
404
- except httpx.HTTPError as e:
405
- raise TrismikApiError(str(e)) from e
@@ -1,177 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: trismik
3
- Version: 0.9.12
4
- Summary:
5
- License-File: LICENSE
6
- Author: Bartosz Kielczewski
7
- Author-email: bk352@cam.ac.uk
8
- Requires-Python: >=3.9
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: Programming Language :: Python :: 3.9
11
- Classifier: Programming Language :: Python :: 3.10
12
- Classifier: Programming Language :: Python :: 3.11
13
- Classifier: Programming Language :: Python :: 3.12
14
- Classifier: Programming Language :: Python :: 3.13
15
- Classifier: Programming Language :: Python :: 3.14
16
- Provides-Extra: examples
17
- Requires-Dist: accelerate (>=1.7.0,<2.0.0) ; extra == "examples"
18
- Requires-Dist: httpx (>=0.27.2,<1.0.0)
19
- Requires-Dist: nest-asyncio (>=1.6.0,<2.0.0)
20
- Requires-Dist: notebook (>=7.4.4,<8.0.0) ; extra == "examples"
21
- Requires-Dist: openai (>=1.81.0,<2.0.0) ; extra == "examples"
22
- Requires-Dist: torch (>=2.7.0,<3.0.0) ; extra == "examples"
23
- Requires-Dist: torchaudio (>=2.7.0,<3.0.0) ; extra == "examples"
24
- Requires-Dist: torchvision (>=0.22.0,<1.0.0) ; extra == "examples"
25
- Requires-Dist: tqdm (>=4.67.1,<5.0.0)
26
- Requires-Dist: transformers (>=4.51.3,<5.0.0) ; extra == "examples"
27
- Description-Content-Type: text/markdown
28
-
29
- <h1 align="center"> Trismik SDK</h1>
30
-
31
- <p align="center">
32
- <img alt="PyPI - Version" src="https://img.shields.io/pypi/v/trismik">
33
- <img alt="Python Version" src="https://img.shields.io/badge/python-3.9%2B-blue">
34
- <img alt="License" src="https://img.shields.io/badge/license-MIT-green">
35
- </p>
36
-
37
- ## Table of Contents
38
- - [Overview](#overview)
39
- - [Quick Start](#quick-start)
40
- - [Installation](#installation)
41
- - [API Key Setup](#api-key-setup)
42
- - [Basic Usage](#basic-usage)
43
- - [Interpreting Results](#interpreting-results)
44
- - [Theta (θ)](#theta-θ)
45
- - [Other Metrics](#other-metrics)
46
- - [Contributing](#contributing)
47
- - [License](#license)
48
-
49
- ## Overview
50
-
51
- [**Trismik**](https://trismik.com) is a Cambridge, UK based startup offering adversarial testing for LLMs. The APIs we provide through this library allow you to call our adaptive test engine and evaluate LLMs up to 95% faster (and cheaper!) than traditional evaluation techniques.
52
-
53
- Our **adaptive testing** algorithm allows to estimate the precision of the model by looking only at a small portion of a dataset. Through this library, we provide access to a number of open source datasets over several dimensions (reasoning, toxicity, tool use...) to speed up model testing in several scenarios, like foundation model training, supervised fine tuning, prompt engineering, and so on.
54
-
55
- ## Quick Start
56
-
57
- ### Installation
58
-
59
- To use our API, you need to get an API key first. Please register on [dashboard.trismik.com](https://dashboard.trismik.com) and obtain an API key.
60
-
61
- Trismik is available via [pypi](https://pypi.org/project/trismik/). To install Trismik, run the following in your terminal (in a virtualenv, if you use one):
62
-
63
- ```bash
64
- pip install trismik
65
- ```
66
-
67
- ### API Key Setup
68
-
69
- You can provide your API key in one of the following ways:
70
-
71
- 1. **Environment Variable**:
72
- ```bash
73
- export TRISMIK_API_KEY="your-api-key"
74
- ```
75
-
76
- 2. **`.env` File**:
77
- ```bash
78
- # .env
79
- TRISMIK_API_KEY=your-api-key
80
- ```
81
- Then load it with `python-dotenv`:
82
- ```python
83
- from dotenv import load_dotenv
84
- load_dotenv()
85
- ```
86
-
87
- 3. **Direct Initialization**:
88
- ```python
89
- client = TrismikAsyncClient(api_key="YOUR_API_KEY")
90
- ```
91
-
92
- ### Basic Usage
93
-
94
- Running a test is straightforward:
95
-
96
- 1. Implement a method that wraps model inference over a dataset item
97
- 2. Create an `AdaptiveTest` instance
98
- 3. Run the test!
99
-
100
- Here's a basic example:
101
-
102
- ```python
103
- def model_inference(item: TrismikItem) -> Any:
104
- model_output = ... # call your model here
105
- return model_output
106
-
107
-
108
- # Initialize the test runner
109
- runner = AdaptiveTest(model_inference)
110
-
111
- # Run the test
112
- results = await runner.run_async(
113
- "MMLUPro2025", # or any dataset we support
114
- with_responses=True,
115
- run_metadata=sample_metadata,
116
- )
117
-
118
- # Print the test output
119
- for result in results:
120
- print(f"{result.trait} ({result.name}): {result.value}")
121
- ```
122
-
123
- ### Examples
124
-
125
- You can find more examples in the `examples` folder:
126
- - [`example_transformers.py`](examples/example_transformers.py) - Example using Hugging Face Transformers models
127
- - [`example_openai.py`](examples/example_openai.py) - Example using OpenAI models
128
- - [`example_adaptive_test.py`](examples/example_adaptive_test.py) - Example of adaptive testing configuration
129
-
130
- To run the examples, you will need to clone this repo, navigate to the
131
- source folder, and then run:
132
-
133
- ```bash
134
- poetry install --with examples
135
- poetry run python examples/example_adaptive_test.py # or any other example
136
- ```
137
-
138
- ## Interpreting Results
139
-
140
- ### Theta (θ)
141
-
142
- Our adversarial test returns several values; however, you will be interested mainly in `theta`. Theta ($\theta$) is our metric; it measures the ability of the model on a certain dataset, and it can be used as a proxy to approximate the original metric used on that dataset. For example, on an accuracy-based dataset, a high theta correlates with a high accuracy, and low theta correlates with low accuracy.
143
-
144
- To interpret a theta score, consider that $\theta=0$ corresponds to a 50% chance for a model to get an answer right - in other words, to an accuracy of 50%.
145
- A negative theta means that the model will give more bad answers then good ones, while a positive theta means that the model will give more good answers then bad answers.
146
- While theta is unbounded in our implementation (i.e. $-\infty < \theta < \infty$), in practice we have that for most cases $\theta$ will take values between -3 and 3.
147
-
148
- Compared to classical benchmark testing, the estimated accuracy from adaptive testing uses fewer but more informative items while avoiding noise from overly easy or difficult questions. This makes it a more efficient and stable measure, especially on very large datasets.
149
-
150
- ### Other Metrics
151
-
152
- - **Standard Deviation (`std`)**:
153
- - A measure of the uncertainty or error in the theta estimate
154
- - A smaller `std` indicates a more precise estimate
155
- - You should see a `std` around or below 0.25
156
-
157
- - **Correct Responses (`responsesCorrect`)**:
158
- - The number of correct answers delivered by the model
159
-
160
- - **Important note**: A higher number of correct answers does not necessarily
161
- correlate with a high theta. Our algorithm navigates the dataset to find a
162
- balance of “hard” and “easy” items for your model, so by the end of the test,
163
- it encounters a representative mix of inputs it can and cannot handle. In
164
- practice, expect responsesCorrect to be roughly half of responsesTotal.
165
-
166
- - **Total Responses (`responsesTotal`)**:
167
- - The number of items processed before reaching a stable theta.
168
- - Expected range: 60 ≤ responses_total ≤ 80
169
-
170
- ## Contributing
171
-
172
- See `CONTRIBUTING.md`.
173
-
174
- ## License
175
-
176
- This library is licensed under the MIT license. See `LICENSE` file.
177
-
@@ -1,12 +0,0 @@
1
- trismik/__init__.py,sha256=20SwXrda9YsgykaoPohwz6foj2FkraniPA-GTQS9m00,197
2
- trismik/_mapper.py,sha256=BFryJLwYYKDlX_vhEPKarDbdX8FkSYfHFtVQOnAK67o,10994
3
- trismik/_utils.py,sha256=4dVRTWapyOQn8suGhzxD0-5Vn5m8_Uuc7gB434n0SdM,3848
4
- trismik/adaptive_test.py,sha256=vG8OfL02IeQfuPpj3SdotxZRWKuMZGZiuPBhOU-PBpw,21594
5
- trismik/client_async.py,sha256=VXMxdIFHmIUP0zcHO1Kq7-uHWXYSqWbUqASPm8MSQjU,13909
6
- trismik/exceptions.py,sha256=2wb4_K7GdDf00s3xUaiSfw6718ZV3Eaa4M2lYbiEZl4,1945
7
- trismik/settings.py,sha256=ErXj3f9Uw1AgewytjxmIY7TmM5jB7RE_LJMp3T7szBY,419
8
- trismik/types.py,sha256=zo6QcXpo7ZfdEntvJDzftBPq_8_g0fS-XDfNTtKVg8k,6555
9
- trismik-0.9.12.dist-info/METADATA,sha256=sHnT177cyW8nQQ_Av9oXjrnMWFHR3KetBdUJ3i9cdmM,6815
10
- trismik-0.9.12.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
11
- trismik-0.9.12.dist-info/licenses/LICENSE,sha256=tgetRhapGLh7ZxfknW6Mm-WobfziPd64nAK52X5XKaw,1077
12
- trismik-0.9.12.dist-info/RECORD,,