vellum-ai 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vellum/core/client_wrapper.py +1 -1
- vellum/lib/test_suites/resources.py +115 -37
- vellum/lib/utils/env.py +4 -2
- vellum/lib/utils/exceptions.py +1 -1
- vellum/lib/utils/paginator.py +2 -1
- {vellum_ai-0.5.0.dist-info → vellum_ai-0.5.1.dist-info}/METADATA +1 -1
- {vellum_ai-0.5.0.dist-info → vellum_ai-0.5.1.dist-info}/RECORD +9 -9
- {vellum_ai-0.5.0.dist-info → vellum_ai-0.5.1.dist-info}/LICENSE +0 -0
- {vellum_ai-0.5.0.dist-info → vellum_ai-0.5.1.dist-info}/WHEEL +0 -0
vellum/core/client_wrapper.py
CHANGED
@@ -2,8 +2,10 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import logging
|
4
4
|
import time
|
5
|
-
from
|
5
|
+
from functools import cached_property
|
6
|
+
from typing import Callable, Generator, List, Any
|
6
7
|
|
8
|
+
from vellum import TestSuiteRunRead, TestSuiteRunMetricOutput_Number
|
7
9
|
from vellum.client import Vellum
|
8
10
|
from vellum.lib.test_suites.constants import (
|
9
11
|
DEFAULT_MAX_POLLING_DURATION_MS,
|
@@ -11,6 +13,7 @@ from vellum.lib.test_suites.constants import (
|
|
11
13
|
)
|
12
14
|
from vellum.lib.test_suites.exceptions import TestSuiteRunResultsException
|
13
15
|
from vellum.lib.utils.env import get_api_key
|
16
|
+
from vellum.lib.utils.paginator import PaginatedResults, get_all_results
|
14
17
|
from vellum.types import (
|
15
18
|
ExternalTestCaseExecutionRequest,
|
16
19
|
NamedTestCaseVariableValueRequest,
|
@@ -22,9 +25,6 @@ from vellum.types import (
|
|
22
25
|
TestSuiteRunState,
|
23
26
|
)
|
24
27
|
|
25
|
-
from vellum.lib.utils.paginator import PaginatedResults, get_all_results
|
26
|
-
|
27
|
-
|
28
28
|
logger = logging.getLogger(__name__)
|
29
29
|
|
30
30
|
|
@@ -116,44 +116,141 @@ class VellumTestSuiteRunResults:
|
|
116
116
|
|
117
117
|
def __init__(
|
118
118
|
self,
|
119
|
-
|
119
|
+
test_suite_run: TestSuiteRunRead,
|
120
120
|
*,
|
121
121
|
client: Vellum | None = None,
|
122
122
|
polling_interval: int = DEFAULT_POLLING_INTERVAL_MS,
|
123
123
|
max_polling_duration: int = DEFAULT_MAX_POLLING_DURATION_MS,
|
124
124
|
) -> None:
|
125
|
-
self.
|
125
|
+
self._test_suite_run = test_suite_run
|
126
126
|
self._client = client or Vellum(
|
127
127
|
api_key=get_api_key(),
|
128
128
|
)
|
129
|
-
self._state = "QUEUED"
|
130
129
|
self._executions: Generator[VellumTestSuiteRunExecution, None, None] | None = (
|
131
130
|
None
|
132
131
|
)
|
133
132
|
self._polling_interval = polling_interval
|
134
133
|
self._max_polling_duration = max_polling_duration
|
135
134
|
|
135
|
+
@property
|
136
|
+
def state(self) -> TestSuiteRunState:
|
137
|
+
return self._test_suite_run.state
|
138
|
+
|
139
|
+
@cached_property
|
140
|
+
def all_executions(self) -> list[VellumTestSuiteRunExecution]:
|
141
|
+
return list(self._get_test_suite_run_executions())
|
142
|
+
|
136
143
|
def get_metric_outputs(
|
137
144
|
self, metric_identifier: str | None = None, output_identifier: str | None = None
|
138
|
-
) ->
|
145
|
+
) -> List[TestSuiteRunMetricOutput]:
|
139
146
|
"""Retrieve a metric's output across all executions by providing the info needed to uniquely identify it."""
|
140
147
|
|
141
|
-
|
142
|
-
|
143
|
-
for execution in executions:
|
144
|
-
yield execution.get_metric_output(
|
148
|
+
return [
|
149
|
+
execution.get_metric_output(
|
145
150
|
metric_identifier=metric_identifier, output_identifier=output_identifier
|
146
151
|
)
|
152
|
+
for execution in self.all_executions
|
153
|
+
]
|
154
|
+
|
155
|
+
def get_count_metric_outputs(
|
156
|
+
self,
|
157
|
+
metric_identifier: str | None = None,
|
158
|
+
output_identifier: str | None = None,
|
159
|
+
*,
|
160
|
+
predicate: Callable[[TestSuiteRunMetricOutput], bool] | None = None,
|
161
|
+
) -> int:
|
162
|
+
"""Returns the count of all metric outputs that match the given criteria."""
|
163
|
+
|
164
|
+
metric_outputs = self.get_metric_outputs(
|
165
|
+
metric_identifier=metric_identifier, output_identifier=output_identifier
|
166
|
+
)
|
167
|
+
|
168
|
+
if predicate is None:
|
169
|
+
return len(metric_outputs)
|
170
|
+
|
171
|
+
return len([output for output in metric_outputs if predicate(output)])
|
172
|
+
|
173
|
+
def get_numeric_metric_output_values(
|
174
|
+
self,
|
175
|
+
metric_identifier: str | None = None,
|
176
|
+
output_identifier: str | None = None,
|
177
|
+
) -> List[float]:
|
178
|
+
"""Returns the values of a numeric metric output that match the given criteria."""
|
179
|
+
|
180
|
+
metric_outputs: list[TestSuiteRunMetricOutput_Number] = []
|
181
|
+
|
182
|
+
for output in self.get_metric_outputs(
|
183
|
+
metric_identifier=metric_identifier, output_identifier=output_identifier
|
184
|
+
):
|
185
|
+
if output.type != "NUMBER":
|
186
|
+
raise TestSuiteRunResultsException(
|
187
|
+
f"Expected a numeric metric output, but got a {output.type} output instead."
|
188
|
+
)
|
189
|
+
|
190
|
+
metric_outputs.append(output)
|
191
|
+
|
192
|
+
return [output.value for output in metric_outputs]
|
193
|
+
|
194
|
+
def get_mean_metric_output(
|
195
|
+
self, metric_identifier: str | None = None, output_identifier: str | None = None
|
196
|
+
) -> float:
|
197
|
+
"""Returns the mean of all metric outputs that match the given criteria."""
|
198
|
+
output_values = self.get_numeric_metric_output_values(
|
199
|
+
metric_identifier=metric_identifier, output_identifier=output_identifier
|
200
|
+
)
|
201
|
+
return sum(output_values) / len(output_values)
|
202
|
+
|
203
|
+
def get_min_metric_output(
|
204
|
+
self, metric_identifier: str | None = None, output_identifier: str | None = None
|
205
|
+
) -> float:
|
206
|
+
"""Returns the min value across= all metric outputs that match the given criteria."""
|
207
|
+
output_values = self.get_numeric_metric_output_values(
|
208
|
+
metric_identifier=metric_identifier, output_identifier=output_identifier
|
209
|
+
)
|
210
|
+
return min(output_values)
|
211
|
+
|
212
|
+
def get_max_metric_output(
|
213
|
+
self, metric_identifier: str | None = None, output_identifier: str | None = None
|
214
|
+
) -> float:
|
215
|
+
"""Returns the max value across all metric outputs that match the given criteria."""
|
216
|
+
output_values = self.get_numeric_metric_output_values(
|
217
|
+
metric_identifier=metric_identifier, output_identifier=output_identifier
|
218
|
+
)
|
219
|
+
return max(output_values)
|
220
|
+
|
221
|
+
def wait_until_complete(self) -> None:
|
222
|
+
"""Wait until the Test Suite Run is no longer in a QUEUED or RUNNING state."""
|
223
|
+
|
224
|
+
start_time = time.time_ns()
|
225
|
+
while True:
|
226
|
+
logger.debug("Polling for latest test suite run state...")
|
227
|
+
self._refresh_test_suite_run()
|
228
|
+
if self.state not in {"QUEUED", "RUNNING"}:
|
229
|
+
break
|
230
|
+
|
231
|
+
current_time = time.time_ns()
|
232
|
+
if ((current_time - start_time) / 1e6) > self._max_polling_duration:
|
233
|
+
raise TestSuiteRunResultsException(
|
234
|
+
"Test suite run timed out polling for executions"
|
235
|
+
)
|
236
|
+
|
237
|
+
time.sleep(self._polling_interval / 1000.0)
|
238
|
+
|
239
|
+
if self.state == "FAILED":
|
240
|
+
raise TestSuiteRunResultsException("Test suite run failed")
|
147
241
|
|
148
|
-
|
149
|
-
|
150
|
-
|
242
|
+
if self.state == "CANCELLED":
|
243
|
+
raise TestSuiteRunResultsException("Test suite run was cancelled")
|
244
|
+
|
245
|
+
def _refresh_test_suite_run(self):
|
246
|
+
test_suite_run = self._client.test_suite_runs.retrieve(self._test_suite_run.id)
|
247
|
+
self._test_suite_run = test_suite_run
|
151
248
|
|
152
249
|
def _list_paginated_executions(
|
153
250
|
self, offset: int | None, limit: int | None
|
154
251
|
) -> PaginatedResults[TestSuiteRunExecution]:
|
155
252
|
response = self._client.test_suite_runs.list_executions(
|
156
|
-
self.
|
253
|
+
self._test_suite_run.id,
|
157
254
|
offset=offset,
|
158
255
|
limit=limit,
|
159
256
|
expand=[
|
@@ -175,26 +272,7 @@ class VellumTestSuiteRunResults:
|
|
175
272
|
if self._executions is not None:
|
176
273
|
return self._executions
|
177
274
|
|
178
|
-
|
179
|
-
while True:
|
180
|
-
logger.debug("Polling for latest test suite run state...")
|
181
|
-
self._refresh_test_suite_run_state()
|
182
|
-
if self._state not in {"QUEUED", "RUNNING"}:
|
183
|
-
break
|
184
|
-
|
185
|
-
current_time = time.time_ns()
|
186
|
-
if ((current_time - start_time) / 1e6) > self._max_polling_duration:
|
187
|
-
raise TestSuiteRunResultsException(
|
188
|
-
"Test suite run timed out polling for executions"
|
189
|
-
)
|
190
|
-
|
191
|
-
time.sleep(self._polling_interval / 1000.0)
|
192
|
-
|
193
|
-
if self._state == "FAILED":
|
194
|
-
raise TestSuiteRunResultsException("Test suite run failed")
|
195
|
-
|
196
|
-
if self._state == "CANCELLED":
|
197
|
-
raise TestSuiteRunResultsException("Test suite run was cancelled")
|
275
|
+
self.wait_until_complete()
|
198
276
|
|
199
277
|
raw_api_executions = get_all_results(self._list_paginated_executions)
|
200
278
|
self._executions = self._wrap_api_executions(raw_api_executions)
|
@@ -250,4 +328,4 @@ class VellumTestSuite:
|
|
250
328
|
),
|
251
329
|
),
|
252
330
|
)
|
253
|
-
return VellumTestSuiteRunResults(test_suite_run
|
331
|
+
return VellumTestSuiteRunResults(test_suite_run, client=self.client)
|
vellum/lib/utils/env.py
CHANGED
@@ -6,6 +6,8 @@ from .exceptions import VellumClientException
|
|
6
6
|
def get_api_key() -> str:
|
7
7
|
api_key = os.environ.get("VELLUM_API_KEY")
|
8
8
|
if api_key is None:
|
9
|
-
raise VellumClientException(
|
10
|
-
|
9
|
+
raise VellumClientException(
|
10
|
+
"`VELLUM_API_KEY` environment variable is required to be set."
|
11
|
+
)
|
12
|
+
|
11
13
|
return api_key
|
vellum/lib/utils/exceptions.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
class VellumClientException(Exception):
|
2
|
-
pass
|
2
|
+
pass
|
vellum/lib/utils/paginator.py
CHANGED
@@ -12,7 +12,8 @@ class PaginatedResults(Generic[Result]):
|
|
12
12
|
|
13
13
|
|
14
14
|
def get_all_results(
|
15
|
-
paginated_api: Callable[[int, Union[int, None]], PaginatedResults[Result]],
|
15
|
+
paginated_api: Callable[[int, Union[int, None]], PaginatedResults[Result]],
|
16
|
+
page_size: Union[int, None] = None,
|
16
17
|
) -> Generator[Result, None, None]:
|
17
18
|
offset = 0
|
18
19
|
count = 0
|
@@ -2,7 +2,7 @@ vellum/__init__.py,sha256=RpP5FLUDUph2qBCQ-TlpodjRFc1PDGDZCk-JBGj0UBM,35591
|
|
2
2
|
vellum/client.py,sha256=7JaU104s0u_WhB8QAqIZcMv9IyvU-a0nKVZhTPKiEpw,97089
|
3
3
|
vellum/core/__init__.py,sha256=1pNSKkwyQvMl_F0wohBqmoQAITptg3zlvCwsoSSzy7c,853
|
4
4
|
vellum/core/api_error.py,sha256=RE8LELok2QCjABadECTvtDp7qejA1VmINCh6TbqPwSE,426
|
5
|
-
vellum/core/client_wrapper.py,sha256=
|
5
|
+
vellum/core/client_wrapper.py,sha256=PSXJj42WsaIoL7ae07LoF7941fD683wvW7fzz7T2V1o,1697
|
6
6
|
vellum/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
|
7
7
|
vellum/core/file.py,sha256=sy1RUGZ3aJYuw998bZytxxo6QdgKmlnlgBaMvwEKCGg,1480
|
8
8
|
vellum/core/http_client.py,sha256=5ok6hqgZDJhg57EHvMnr0BBaHdG50QxFPKaCZ9aVWTc,5059
|
@@ -20,11 +20,11 @@ vellum/lib/__init__.py,sha256=KTSY0V59WEOr5uNyAei1dDfaAatyXw_Aca5kNjo5mY0,79
|
|
20
20
|
vellum/lib/test_suites/__init__.py,sha256=hNsLoHSykqXDJP-MwFvu2lExImxo9KEyEJjt_fdAzpE,77
|
21
21
|
vellum/lib/test_suites/constants.py,sha256=Vteml4_csZsMgo_q3-71E3JRCAoN6308TXLu5nfLhmU,116
|
22
22
|
vellum/lib/test_suites/exceptions.py,sha256=6Xacoyv43fJvVf6Dt6Io5a-f9vF12Tx51jzsQRNSqhY,56
|
23
|
-
vellum/lib/test_suites/resources.py,sha256=
|
23
|
+
vellum/lib/test_suites/resources.py,sha256=hokRS0_wT6IdA_6HkWbrh7iFzFxCtiy8JXbUiGtlwRk,12323
|
24
24
|
vellum/lib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
|
-
vellum/lib/utils/env.py,sha256=
|
26
|
-
vellum/lib/utils/exceptions.py,sha256=
|
27
|
-
vellum/lib/utils/paginator.py,sha256=
|
25
|
+
vellum/lib/utils/env.py,sha256=ySl859lYBfls8hmlaU_RFdquHa_A_7SzaC6KEdFqh1Y,298
|
26
|
+
vellum/lib/utils/exceptions.py,sha256=dXMAkzqbHV_AP5FjjbegPlfUE0zQDlpA3qOsoOJUxfg,49
|
27
|
+
vellum/lib/utils/paginator.py,sha256=yDvgehocYBDclLt5SewZH4hCIyq0yLHdBzkyPCoYPjs,698
|
28
28
|
vellum/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
29
29
|
vellum/resources/__init__.py,sha256=pqoVsVVIrUG-v6yt4AMtc7F5O-K7wKlvqhQeht9-Ax4,730
|
30
30
|
vellum/resources/deployments/__init__.py,sha256=AE0TcFwLrLBljM0ZDX-pPw4Kqt-1f5JDpIok2HS80QI,157
|
@@ -376,7 +376,7 @@ vellum/types/workflow_result_event_output_data_search_results.py,sha256=gazaUrC5
|
|
376
376
|
vellum/types/workflow_result_event_output_data_string.py,sha256=aVWIIGbLj4TJJhTTj6WzhbYXQkcZatKuhhNy8UYwXbw,1482
|
377
377
|
vellum/types/workflow_stream_event.py,sha256=KA6Bkk_XA6AIPWR-1vKnwF1A8l_Bm5y0arQCWWWRpsk,911
|
378
378
|
vellum/version.py,sha256=neLt8HBHHUtDF9M5fsyUzHT-pKooEPvceaLDqqIGb0s,77
|
379
|
-
vellum_ai-0.5.
|
380
|
-
vellum_ai-0.5.
|
381
|
-
vellum_ai-0.5.
|
382
|
-
vellum_ai-0.5.
|
379
|
+
vellum_ai-0.5.1.dist-info/LICENSE,sha256=CcaljEIoOBaU-wItPH4PmM_mDCGpyuUY0Er1BGu5Ti8,1073
|
380
|
+
vellum_ai-0.5.1.dist-info/METADATA,sha256=TtLXbIJmAEV0EkDcuRYX2-SsLPvhP8L6GSE-WpEJqV4,3549
|
381
|
+
vellum_ai-0.5.1.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
382
|
+
vellum_ai-0.5.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|