vellum-ai 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vellum/core/client_wrapper.py +1 -1
- vellum/lib/test_suites/resources.py +115 -37
- vellum/lib/utils/env.py +4 -2
- vellum/lib/utils/exceptions.py +1 -1
- vellum/lib/utils/paginator.py +2 -1
- {vellum_ai-0.5.0.dist-info → vellum_ai-0.5.1.dist-info}/METADATA +1 -1
- {vellum_ai-0.5.0.dist-info → vellum_ai-0.5.1.dist-info}/RECORD +9 -9
- {vellum_ai-0.5.0.dist-info → vellum_ai-0.5.1.dist-info}/LICENSE +0 -0
- {vellum_ai-0.5.0.dist-info → vellum_ai-0.5.1.dist-info}/WHEEL +0 -0
vellum/core/client_wrapper.py
CHANGED
@@ -2,8 +2,10 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import logging
|
4
4
|
import time
|
5
|
-
from
|
5
|
+
from functools import cached_property
|
6
|
+
from typing import Callable, Generator, List, Any
|
6
7
|
|
8
|
+
from vellum import TestSuiteRunRead, TestSuiteRunMetricOutput_Number
|
7
9
|
from vellum.client import Vellum
|
8
10
|
from vellum.lib.test_suites.constants import (
|
9
11
|
DEFAULT_MAX_POLLING_DURATION_MS,
|
@@ -11,6 +13,7 @@ from vellum.lib.test_suites.constants import (
|
|
11
13
|
)
|
12
14
|
from vellum.lib.test_suites.exceptions import TestSuiteRunResultsException
|
13
15
|
from vellum.lib.utils.env import get_api_key
|
16
|
+
from vellum.lib.utils.paginator import PaginatedResults, get_all_results
|
14
17
|
from vellum.types import (
|
15
18
|
ExternalTestCaseExecutionRequest,
|
16
19
|
NamedTestCaseVariableValueRequest,
|
@@ -22,9 +25,6 @@ from vellum.types import (
|
|
22
25
|
TestSuiteRunState,
|
23
26
|
)
|
24
27
|
|
25
|
-
from vellum.lib.utils.paginator import PaginatedResults, get_all_results
|
26
|
-
|
27
|
-
|
28
28
|
logger = logging.getLogger(__name__)
|
29
29
|
|
30
30
|
|
@@ -116,44 +116,141 @@ class VellumTestSuiteRunResults:
|
|
116
116
|
|
117
117
|
def __init__(
|
118
118
|
self,
|
119
|
-
|
119
|
+
test_suite_run: TestSuiteRunRead,
|
120
120
|
*,
|
121
121
|
client: Vellum | None = None,
|
122
122
|
polling_interval: int = DEFAULT_POLLING_INTERVAL_MS,
|
123
123
|
max_polling_duration: int = DEFAULT_MAX_POLLING_DURATION_MS,
|
124
124
|
) -> None:
|
125
|
-
self.
|
125
|
+
self._test_suite_run = test_suite_run
|
126
126
|
self._client = client or Vellum(
|
127
127
|
api_key=get_api_key(),
|
128
128
|
)
|
129
|
-
self._state = "QUEUED"
|
130
129
|
self._executions: Generator[VellumTestSuiteRunExecution, None, None] | None = (
|
131
130
|
None
|
132
131
|
)
|
133
132
|
self._polling_interval = polling_interval
|
134
133
|
self._max_polling_duration = max_polling_duration
|
135
134
|
|
135
|
+
@property
|
136
|
+
def state(self) -> TestSuiteRunState:
|
137
|
+
return self._test_suite_run.state
|
138
|
+
|
139
|
+
@cached_property
|
140
|
+
def all_executions(self) -> list[VellumTestSuiteRunExecution]:
|
141
|
+
return list(self._get_test_suite_run_executions())
|
142
|
+
|
136
143
|
def get_metric_outputs(
|
137
144
|
self, metric_identifier: str | None = None, output_identifier: str | None = None
|
138
|
-
) ->
|
145
|
+
) -> List[TestSuiteRunMetricOutput]:
|
139
146
|
"""Retrieve a metric's output across all executions by providing the info needed to uniquely identify it."""
|
140
147
|
|
141
|
-
|
142
|
-
|
143
|
-
for execution in executions:
|
144
|
-
yield execution.get_metric_output(
|
148
|
+
return [
|
149
|
+
execution.get_metric_output(
|
145
150
|
metric_identifier=metric_identifier, output_identifier=output_identifier
|
146
151
|
)
|
152
|
+
for execution in self.all_executions
|
153
|
+
]
|
154
|
+
|
155
|
+
def get_count_metric_outputs(
|
156
|
+
self,
|
157
|
+
metric_identifier: str | None = None,
|
158
|
+
output_identifier: str | None = None,
|
159
|
+
*,
|
160
|
+
predicate: Callable[[TestSuiteRunMetricOutput], bool] | None = None,
|
161
|
+
) -> int:
|
162
|
+
"""Returns the count of all metric outputs that match the given criteria."""
|
163
|
+
|
164
|
+
metric_outputs = self.get_metric_outputs(
|
165
|
+
metric_identifier=metric_identifier, output_identifier=output_identifier
|
166
|
+
)
|
167
|
+
|
168
|
+
if predicate is None:
|
169
|
+
return len(metric_outputs)
|
170
|
+
|
171
|
+
return len([output for output in metric_outputs if predicate(output)])
|
172
|
+
|
173
|
+
def get_numeric_metric_output_values(
|
174
|
+
self,
|
175
|
+
metric_identifier: str | None = None,
|
176
|
+
output_identifier: str | None = None,
|
177
|
+
) -> List[float]:
|
178
|
+
"""Returns the values of a numeric metric output that match the given criteria."""
|
179
|
+
|
180
|
+
metric_outputs: list[TestSuiteRunMetricOutput_Number] = []
|
181
|
+
|
182
|
+
for output in self.get_metric_outputs(
|
183
|
+
metric_identifier=metric_identifier, output_identifier=output_identifier
|
184
|
+
):
|
185
|
+
if output.type != "NUMBER":
|
186
|
+
raise TestSuiteRunResultsException(
|
187
|
+
f"Expected a numeric metric output, but got a {output.type} output instead."
|
188
|
+
)
|
189
|
+
|
190
|
+
metric_outputs.append(output)
|
191
|
+
|
192
|
+
return [output.value for output in metric_outputs]
|
193
|
+
|
194
|
+
def get_mean_metric_output(
|
195
|
+
self, metric_identifier: str | None = None, output_identifier: str | None = None
|
196
|
+
) -> float:
|
197
|
+
"""Returns the mean of all metric outputs that match the given criteria."""
|
198
|
+
output_values = self.get_numeric_metric_output_values(
|
199
|
+
metric_identifier=metric_identifier, output_identifier=output_identifier
|
200
|
+
)
|
201
|
+
return sum(output_values) / len(output_values)
|
202
|
+
|
203
|
+
def get_min_metric_output(
|
204
|
+
self, metric_identifier: str | None = None, output_identifier: str | None = None
|
205
|
+
) -> float:
|
206
|
+
"""Returns the min value across= all metric outputs that match the given criteria."""
|
207
|
+
output_values = self.get_numeric_metric_output_values(
|
208
|
+
metric_identifier=metric_identifier, output_identifier=output_identifier
|
209
|
+
)
|
210
|
+
return min(output_values)
|
211
|
+
|
212
|
+
def get_max_metric_output(
|
213
|
+
self, metric_identifier: str | None = None, output_identifier: str | None = None
|
214
|
+
) -> float:
|
215
|
+
"""Returns the max value across all metric outputs that match the given criteria."""
|
216
|
+
output_values = self.get_numeric_metric_output_values(
|
217
|
+
metric_identifier=metric_identifier, output_identifier=output_identifier
|
218
|
+
)
|
219
|
+
return max(output_values)
|
220
|
+
|
221
|
+
def wait_until_complete(self) -> None:
|
222
|
+
"""Wait until the Test Suite Run is no longer in a QUEUED or RUNNING state."""
|
223
|
+
|
224
|
+
start_time = time.time_ns()
|
225
|
+
while True:
|
226
|
+
logger.debug("Polling for latest test suite run state...")
|
227
|
+
self._refresh_test_suite_run()
|
228
|
+
if self.state not in {"QUEUED", "RUNNING"}:
|
229
|
+
break
|
230
|
+
|
231
|
+
current_time = time.time_ns()
|
232
|
+
if ((current_time - start_time) / 1e6) > self._max_polling_duration:
|
233
|
+
raise TestSuiteRunResultsException(
|
234
|
+
"Test suite run timed out polling for executions"
|
235
|
+
)
|
236
|
+
|
237
|
+
time.sleep(self._polling_interval / 1000.0)
|
238
|
+
|
239
|
+
if self.state == "FAILED":
|
240
|
+
raise TestSuiteRunResultsException("Test suite run failed")
|
147
241
|
|
148
|
-
|
149
|
-
|
150
|
-
|
242
|
+
if self.state == "CANCELLED":
|
243
|
+
raise TestSuiteRunResultsException("Test suite run was cancelled")
|
244
|
+
|
245
|
+
def _refresh_test_suite_run(self):
|
246
|
+
test_suite_run = self._client.test_suite_runs.retrieve(self._test_suite_run.id)
|
247
|
+
self._test_suite_run = test_suite_run
|
151
248
|
|
152
249
|
def _list_paginated_executions(
|
153
250
|
self, offset: int | None, limit: int | None
|
154
251
|
) -> PaginatedResults[TestSuiteRunExecution]:
|
155
252
|
response = self._client.test_suite_runs.list_executions(
|
156
|
-
self.
|
253
|
+
self._test_suite_run.id,
|
157
254
|
offset=offset,
|
158
255
|
limit=limit,
|
159
256
|
expand=[
|
@@ -175,26 +272,7 @@ class VellumTestSuiteRunResults:
|
|
175
272
|
if self._executions is not None:
|
176
273
|
return self._executions
|
177
274
|
|
178
|
-
|
179
|
-
while True:
|
180
|
-
logger.debug("Polling for latest test suite run state...")
|
181
|
-
self._refresh_test_suite_run_state()
|
182
|
-
if self._state not in {"QUEUED", "RUNNING"}:
|
183
|
-
break
|
184
|
-
|
185
|
-
current_time = time.time_ns()
|
186
|
-
if ((current_time - start_time) / 1e6) > self._max_polling_duration:
|
187
|
-
raise TestSuiteRunResultsException(
|
188
|
-
"Test suite run timed out polling for executions"
|
189
|
-
)
|
190
|
-
|
191
|
-
time.sleep(self._polling_interval / 1000.0)
|
192
|
-
|
193
|
-
if self._state == "FAILED":
|
194
|
-
raise TestSuiteRunResultsException("Test suite run failed")
|
195
|
-
|
196
|
-
if self._state == "CANCELLED":
|
197
|
-
raise TestSuiteRunResultsException("Test suite run was cancelled")
|
275
|
+
self.wait_until_complete()
|
198
276
|
|
199
277
|
raw_api_executions = get_all_results(self._list_paginated_executions)
|
200
278
|
self._executions = self._wrap_api_executions(raw_api_executions)
|
@@ -250,4 +328,4 @@ class VellumTestSuite:
|
|
250
328
|
),
|
251
329
|
),
|
252
330
|
)
|
253
|
-
return VellumTestSuiteRunResults(test_suite_run
|
331
|
+
return VellumTestSuiteRunResults(test_suite_run, client=self.client)
|
vellum/lib/utils/env.py
CHANGED
@@ -6,6 +6,8 @@ from .exceptions import VellumClientException
|
|
6
6
|
def get_api_key() -> str:
|
7
7
|
api_key = os.environ.get("VELLUM_API_KEY")
|
8
8
|
if api_key is None:
|
9
|
-
raise VellumClientException(
|
10
|
-
|
9
|
+
raise VellumClientException(
|
10
|
+
"`VELLUM_API_KEY` environment variable is required to be set."
|
11
|
+
)
|
12
|
+
|
11
13
|
return api_key
|
vellum/lib/utils/exceptions.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
class VellumClientException(Exception):
|
2
|
-
pass
|
2
|
+
pass
|
vellum/lib/utils/paginator.py
CHANGED
@@ -12,7 +12,8 @@ class PaginatedResults(Generic[Result]):
|
|
12
12
|
|
13
13
|
|
14
14
|
def get_all_results(
|
15
|
-
paginated_api: Callable[[int, Union[int, None]], PaginatedResults[Result]],
|
15
|
+
paginated_api: Callable[[int, Union[int, None]], PaginatedResults[Result]],
|
16
|
+
page_size: Union[int, None] = None,
|
16
17
|
) -> Generator[Result, None, None]:
|
17
18
|
offset = 0
|
18
19
|
count = 0
|
@@ -2,7 +2,7 @@ vellum/__init__.py,sha256=RpP5FLUDUph2qBCQ-TlpodjRFc1PDGDZCk-JBGj0UBM,35591
|
|
2
2
|
vellum/client.py,sha256=7JaU104s0u_WhB8QAqIZcMv9IyvU-a0nKVZhTPKiEpw,97089
|
3
3
|
vellum/core/__init__.py,sha256=1pNSKkwyQvMl_F0wohBqmoQAITptg3zlvCwsoSSzy7c,853
|
4
4
|
vellum/core/api_error.py,sha256=RE8LELok2QCjABadECTvtDp7qejA1VmINCh6TbqPwSE,426
|
5
|
-
vellum/core/client_wrapper.py,sha256=
|
5
|
+
vellum/core/client_wrapper.py,sha256=PSXJj42WsaIoL7ae07LoF7941fD683wvW7fzz7T2V1o,1697
|
6
6
|
vellum/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
|
7
7
|
vellum/core/file.py,sha256=sy1RUGZ3aJYuw998bZytxxo6QdgKmlnlgBaMvwEKCGg,1480
|
8
8
|
vellum/core/http_client.py,sha256=5ok6hqgZDJhg57EHvMnr0BBaHdG50QxFPKaCZ9aVWTc,5059
|
@@ -20,11 +20,11 @@ vellum/lib/__init__.py,sha256=KTSY0V59WEOr5uNyAei1dDfaAatyXw_Aca5kNjo5mY0,79
|
|
20
20
|
vellum/lib/test_suites/__init__.py,sha256=hNsLoHSykqXDJP-MwFvu2lExImxo9KEyEJjt_fdAzpE,77
|
21
21
|
vellum/lib/test_suites/constants.py,sha256=Vteml4_csZsMgo_q3-71E3JRCAoN6308TXLu5nfLhmU,116
|
22
22
|
vellum/lib/test_suites/exceptions.py,sha256=6Xacoyv43fJvVf6Dt6Io5a-f9vF12Tx51jzsQRNSqhY,56
|
23
|
-
vellum/lib/test_suites/resources.py,sha256=
|
23
|
+
vellum/lib/test_suites/resources.py,sha256=hokRS0_wT6IdA_6HkWbrh7iFzFxCtiy8JXbUiGtlwRk,12323
|
24
24
|
vellum/lib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
|
-
vellum/lib/utils/env.py,sha256=
|
26
|
-
vellum/lib/utils/exceptions.py,sha256=
|
27
|
-
vellum/lib/utils/paginator.py,sha256=
|
25
|
+
vellum/lib/utils/env.py,sha256=ySl859lYBfls8hmlaU_RFdquHa_A_7SzaC6KEdFqh1Y,298
|
26
|
+
vellum/lib/utils/exceptions.py,sha256=dXMAkzqbHV_AP5FjjbegPlfUE0zQDlpA3qOsoOJUxfg,49
|
27
|
+
vellum/lib/utils/paginator.py,sha256=yDvgehocYBDclLt5SewZH4hCIyq0yLHdBzkyPCoYPjs,698
|
28
28
|
vellum/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
29
29
|
vellum/resources/__init__.py,sha256=pqoVsVVIrUG-v6yt4AMtc7F5O-K7wKlvqhQeht9-Ax4,730
|
30
30
|
vellum/resources/deployments/__init__.py,sha256=AE0TcFwLrLBljM0ZDX-pPw4Kqt-1f5JDpIok2HS80QI,157
|
@@ -376,7 +376,7 @@ vellum/types/workflow_result_event_output_data_search_results.py,sha256=gazaUrC5
|
|
376
376
|
vellum/types/workflow_result_event_output_data_string.py,sha256=aVWIIGbLj4TJJhTTj6WzhbYXQkcZatKuhhNy8UYwXbw,1482
|
377
377
|
vellum/types/workflow_stream_event.py,sha256=KA6Bkk_XA6AIPWR-1vKnwF1A8l_Bm5y0arQCWWWRpsk,911
|
378
378
|
vellum/version.py,sha256=neLt8HBHHUtDF9M5fsyUzHT-pKooEPvceaLDqqIGb0s,77
|
379
|
-
vellum_ai-0.5.
|
380
|
-
vellum_ai-0.5.
|
381
|
-
vellum_ai-0.5.
|
382
|
-
vellum_ai-0.5.
|
379
|
+
vellum_ai-0.5.1.dist-info/LICENSE,sha256=CcaljEIoOBaU-wItPH4PmM_mDCGpyuUY0Er1BGu5Ti8,1073
|
380
|
+
vellum_ai-0.5.1.dist-info/METADATA,sha256=TtLXbIJmAEV0EkDcuRYX2-SsLPvhP8L6GSE-WpEJqV4,3549
|
381
|
+
vellum_ai-0.5.1.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
382
|
+
vellum_ai-0.5.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|