vellum-ai 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,7 @@ class BaseClientWrapper:
18
18
  headers: typing.Dict[str, str] = {
19
19
  "X-Fern-Language": "Python",
20
20
  "X-Fern-SDK-Name": "vellum-ai",
21
- "X-Fern-SDK-Version": "0.5.0",
21
+ "X-Fern-SDK-Version": "0.5.1",
22
22
  }
23
23
  headers["X_API_KEY"] = self.api_key
24
24
  return headers
@@ -2,8 +2,10 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import time
5
- from typing import Callable, Generator, List
5
+ from functools import cached_property
6
+ from typing import Callable, Generator, List, Any
6
7
 
8
+ from vellum import TestSuiteRunRead, TestSuiteRunMetricOutput_Number
7
9
  from vellum.client import Vellum
8
10
  from vellum.lib.test_suites.constants import (
9
11
  DEFAULT_MAX_POLLING_DURATION_MS,
@@ -11,6 +13,7 @@ from vellum.lib.test_suites.constants import (
11
13
  )
12
14
  from vellum.lib.test_suites.exceptions import TestSuiteRunResultsException
13
15
  from vellum.lib.utils.env import get_api_key
16
+ from vellum.lib.utils.paginator import PaginatedResults, get_all_results
14
17
  from vellum.types import (
15
18
  ExternalTestCaseExecutionRequest,
16
19
  NamedTestCaseVariableValueRequest,
@@ -22,9 +25,6 @@ from vellum.types import (
22
25
  TestSuiteRunState,
23
26
  )
24
27
 
25
- from vellum.lib.utils.paginator import PaginatedResults, get_all_results
26
-
27
-
28
28
  logger = logging.getLogger(__name__)
29
29
 
30
30
 
@@ -116,44 +116,141 @@ class VellumTestSuiteRunResults:
116
116
 
117
117
  def __init__(
118
118
  self,
119
- test_suite_run_id: str,
119
+ test_suite_run: TestSuiteRunRead,
120
120
  *,
121
121
  client: Vellum | None = None,
122
122
  polling_interval: int = DEFAULT_POLLING_INTERVAL_MS,
123
123
  max_polling_duration: int = DEFAULT_MAX_POLLING_DURATION_MS,
124
124
  ) -> None:
125
- self._test_suite_run_id = test_suite_run_id
125
+ self._test_suite_run = test_suite_run
126
126
  self._client = client or Vellum(
127
127
  api_key=get_api_key(),
128
128
  )
129
- self._state = "QUEUED"
130
129
  self._executions: Generator[VellumTestSuiteRunExecution, None, None] | None = (
131
130
  None
132
131
  )
133
132
  self._polling_interval = polling_interval
134
133
  self._max_polling_duration = max_polling_duration
135
134
 
135
+ @property
136
+ def state(self) -> TestSuiteRunState:
137
+ return self._test_suite_run.state
138
+
139
+ @cached_property
140
+ def all_executions(self) -> list[VellumTestSuiteRunExecution]:
141
+ return list(self._get_test_suite_run_executions())
142
+
136
143
  def get_metric_outputs(
137
144
  self, metric_identifier: str | None = None, output_identifier: str | None = None
138
- ) -> Generator[TestSuiteRunMetricOutput, None, None]:
145
+ ) -> List[TestSuiteRunMetricOutput]:
139
146
  """Retrieve a metric's output across all executions by providing the info needed to uniquely identify it."""
140
147
 
141
- executions = self._get_test_suite_run_executions()
142
-
143
- for execution in executions:
144
- yield execution.get_metric_output(
148
+ return [
149
+ execution.get_metric_output(
145
150
  metric_identifier=metric_identifier, output_identifier=output_identifier
146
151
  )
152
+ for execution in self.all_executions
153
+ ]
154
+
155
+ def get_count_metric_outputs(
156
+ self,
157
+ metric_identifier: str | None = None,
158
+ output_identifier: str | None = None,
159
+ *,
160
+ predicate: Callable[[TestSuiteRunMetricOutput], bool] | None = None,
161
+ ) -> int:
162
+ """Returns the count of all metric outputs that match the given criteria."""
163
+
164
+ metric_outputs = self.get_metric_outputs(
165
+ metric_identifier=metric_identifier, output_identifier=output_identifier
166
+ )
167
+
168
+ if predicate is None:
169
+ return len(metric_outputs)
170
+
171
+ return len([output for output in metric_outputs if predicate(output)])
172
+
173
+ def get_numeric_metric_output_values(
174
+ self,
175
+ metric_identifier: str | None = None,
176
+ output_identifier: str | None = None,
177
+ ) -> List[float]:
178
+ """Returns the values of a numeric metric output that match the given criteria."""
179
+
180
+ metric_outputs: list[TestSuiteRunMetricOutput_Number] = []
181
+
182
+ for output in self.get_metric_outputs(
183
+ metric_identifier=metric_identifier, output_identifier=output_identifier
184
+ ):
185
+ if output.type != "NUMBER":
186
+ raise TestSuiteRunResultsException(
187
+ f"Expected a numeric metric output, but got a {output.type} output instead."
188
+ )
189
+
190
+ metric_outputs.append(output)
191
+
192
+ return [output.value for output in metric_outputs]
193
+
194
+ def get_mean_metric_output(
195
+ self, metric_identifier: str | None = None, output_identifier: str | None = None
196
+ ) -> float:
197
+ """Returns the mean of all metric outputs that match the given criteria."""
198
+ output_values = self.get_numeric_metric_output_values(
199
+ metric_identifier=metric_identifier, output_identifier=output_identifier
200
+ )
201
+ return sum(output_values) / len(output_values)
202
+
203
+ def get_min_metric_output(
204
+ self, metric_identifier: str | None = None, output_identifier: str | None = None
205
+ ) -> float:
206
+ """Returns the min value across= all metric outputs that match the given criteria."""
207
+ output_values = self.get_numeric_metric_output_values(
208
+ metric_identifier=metric_identifier, output_identifier=output_identifier
209
+ )
210
+ return min(output_values)
211
+
212
+ def get_max_metric_output(
213
+ self, metric_identifier: str | None = None, output_identifier: str | None = None
214
+ ) -> float:
215
+ """Returns the max value across all metric outputs that match the given criteria."""
216
+ output_values = self.get_numeric_metric_output_values(
217
+ metric_identifier=metric_identifier, output_identifier=output_identifier
218
+ )
219
+ return max(output_values)
220
+
221
+ def wait_until_complete(self) -> None:
222
+ """Wait until the Test Suite Run is no longer in a QUEUED or RUNNING state."""
223
+
224
+ start_time = time.time_ns()
225
+ while True:
226
+ logger.debug("Polling for latest test suite run state...")
227
+ self._refresh_test_suite_run()
228
+ if self.state not in {"QUEUED", "RUNNING"}:
229
+ break
230
+
231
+ current_time = time.time_ns()
232
+ if ((current_time - start_time) / 1e6) > self._max_polling_duration:
233
+ raise TestSuiteRunResultsException(
234
+ "Test suite run timed out polling for executions"
235
+ )
236
+
237
+ time.sleep(self._polling_interval / 1000.0)
238
+
239
+ if self.state == "FAILED":
240
+ raise TestSuiteRunResultsException("Test suite run failed")
147
241
 
148
- def _refresh_test_suite_run_state(self):
149
- test_suite_run = self._client.test_suite_runs.retrieve(self._test_suite_run_id)
150
- self._state = test_suite_run.state
242
+ if self.state == "CANCELLED":
243
+ raise TestSuiteRunResultsException("Test suite run was cancelled")
244
+
245
+ def _refresh_test_suite_run(self):
246
+ test_suite_run = self._client.test_suite_runs.retrieve(self._test_suite_run.id)
247
+ self._test_suite_run = test_suite_run
151
248
 
152
249
  def _list_paginated_executions(
153
250
  self, offset: int | None, limit: int | None
154
251
  ) -> PaginatedResults[TestSuiteRunExecution]:
155
252
  response = self._client.test_suite_runs.list_executions(
156
- self._test_suite_run_id,
253
+ self._test_suite_run.id,
157
254
  offset=offset,
158
255
  limit=limit,
159
256
  expand=[
@@ -175,26 +272,7 @@ class VellumTestSuiteRunResults:
175
272
  if self._executions is not None:
176
273
  return self._executions
177
274
 
178
- start_time = time.time_ns()
179
- while True:
180
- logger.debug("Polling for latest test suite run state...")
181
- self._refresh_test_suite_run_state()
182
- if self._state not in {"QUEUED", "RUNNING"}:
183
- break
184
-
185
- current_time = time.time_ns()
186
- if ((current_time - start_time) / 1e6) > self._max_polling_duration:
187
- raise TestSuiteRunResultsException(
188
- "Test suite run timed out polling for executions"
189
- )
190
-
191
- time.sleep(self._polling_interval / 1000.0)
192
-
193
- if self._state == "FAILED":
194
- raise TestSuiteRunResultsException("Test suite run failed")
195
-
196
- if self._state == "CANCELLED":
197
- raise TestSuiteRunResultsException("Test suite run was cancelled")
275
+ self.wait_until_complete()
198
276
 
199
277
  raw_api_executions = get_all_results(self._list_paginated_executions)
200
278
  self._executions = self._wrap_api_executions(raw_api_executions)
@@ -250,4 +328,4 @@ class VellumTestSuite:
250
328
  ),
251
329
  ),
252
330
  )
253
- return VellumTestSuiteRunResults(test_suite_run.id, client=self.client)
331
+ return VellumTestSuiteRunResults(test_suite_run, client=self.client)
vellum/lib/utils/env.py CHANGED
@@ -6,6 +6,8 @@ from .exceptions import VellumClientException
6
6
  def get_api_key() -> str:
7
7
  api_key = os.environ.get("VELLUM_API_KEY")
8
8
  if api_key is None:
9
- raise VellumClientException("`VELLUM_API_KEY` environment variable id required to be set.")
10
-
9
+ raise VellumClientException(
10
+ "`VELLUM_API_KEY` environment variable is required to be set."
11
+ )
12
+
11
13
  return api_key
@@ -1,2 +1,2 @@
1
1
  class VellumClientException(Exception):
2
- pass
2
+ pass
@@ -12,7 +12,8 @@ class PaginatedResults(Generic[Result]):
12
12
 
13
13
 
14
14
  def get_all_results(
15
- paginated_api: Callable[[int, Union[int, None]], PaginatedResults[Result]], page_size: Union[int, None] = None
15
+ paginated_api: Callable[[int, Union[int, None]], PaginatedResults[Result]],
16
+ page_size: Union[int, None] = None,
16
17
  ) -> Generator[Result, None, None]:
17
18
  offset = 0
18
19
  count = 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vellum-ai
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary:
5
5
  Requires-Python: >=3.8,<4.0
6
6
  Classifier: Programming Language :: Python :: 3
@@ -2,7 +2,7 @@ vellum/__init__.py,sha256=RpP5FLUDUph2qBCQ-TlpodjRFc1PDGDZCk-JBGj0UBM,35591
2
2
  vellum/client.py,sha256=7JaU104s0u_WhB8QAqIZcMv9IyvU-a0nKVZhTPKiEpw,97089
3
3
  vellum/core/__init__.py,sha256=1pNSKkwyQvMl_F0wohBqmoQAITptg3zlvCwsoSSzy7c,853
4
4
  vellum/core/api_error.py,sha256=RE8LELok2QCjABadECTvtDp7qejA1VmINCh6TbqPwSE,426
5
- vellum/core/client_wrapper.py,sha256=YUdARR7B9QqhNV9JOZYVoNyzoORUHfpBag8gvwu-BfA,1697
5
+ vellum/core/client_wrapper.py,sha256=PSXJj42WsaIoL7ae07LoF7941fD683wvW7fzz7T2V1o,1697
6
6
  vellum/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
7
7
  vellum/core/file.py,sha256=sy1RUGZ3aJYuw998bZytxxo6QdgKmlnlgBaMvwEKCGg,1480
8
8
  vellum/core/http_client.py,sha256=5ok6hqgZDJhg57EHvMnr0BBaHdG50QxFPKaCZ9aVWTc,5059
@@ -20,11 +20,11 @@ vellum/lib/__init__.py,sha256=KTSY0V59WEOr5uNyAei1dDfaAatyXw_Aca5kNjo5mY0,79
20
20
  vellum/lib/test_suites/__init__.py,sha256=hNsLoHSykqXDJP-MwFvu2lExImxo9KEyEJjt_fdAzpE,77
21
21
  vellum/lib/test_suites/constants.py,sha256=Vteml4_csZsMgo_q3-71E3JRCAoN6308TXLu5nfLhmU,116
22
22
  vellum/lib/test_suites/exceptions.py,sha256=6Xacoyv43fJvVf6Dt6Io5a-f9vF12Tx51jzsQRNSqhY,56
23
- vellum/lib/test_suites/resources.py,sha256=rjgPFktL37zNyB0WWErLqjDR1OzmBfjf6Ry6pb97r2A,9197
23
+ vellum/lib/test_suites/resources.py,sha256=hokRS0_wT6IdA_6HkWbrh7iFzFxCtiy8JXbUiGtlwRk,12323
24
24
  vellum/lib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- vellum/lib/utils/env.py,sha256=__k8PagSUxW09x2ZMmwFrM_mwy-ky68aqml-e6jaYys,280
26
- vellum/lib/utils/exceptions.py,sha256=h9s9PnHqrTX5ohmZyCXovpWoTB7f3tAd5z_5nP0drCM,48
27
- vellum/lib/utils/paginator.py,sha256=mQwHZEkZHmCNBCctp8zkyEXmcfZtuU5gOL5gC3vNgUA,693
25
+ vellum/lib/utils/env.py,sha256=ySl859lYBfls8hmlaU_RFdquHa_A_7SzaC6KEdFqh1Y,298
26
+ vellum/lib/utils/exceptions.py,sha256=dXMAkzqbHV_AP5FjjbegPlfUE0zQDlpA3qOsoOJUxfg,49
27
+ vellum/lib/utils/paginator.py,sha256=yDvgehocYBDclLt5SewZH4hCIyq0yLHdBzkyPCoYPjs,698
28
28
  vellum/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  vellum/resources/__init__.py,sha256=pqoVsVVIrUG-v6yt4AMtc7F5O-K7wKlvqhQeht9-Ax4,730
30
30
  vellum/resources/deployments/__init__.py,sha256=AE0TcFwLrLBljM0ZDX-pPw4Kqt-1f5JDpIok2HS80QI,157
@@ -376,7 +376,7 @@ vellum/types/workflow_result_event_output_data_search_results.py,sha256=gazaUrC5
376
376
  vellum/types/workflow_result_event_output_data_string.py,sha256=aVWIIGbLj4TJJhTTj6WzhbYXQkcZatKuhhNy8UYwXbw,1482
377
377
  vellum/types/workflow_stream_event.py,sha256=KA6Bkk_XA6AIPWR-1vKnwF1A8l_Bm5y0arQCWWWRpsk,911
378
378
  vellum/version.py,sha256=neLt8HBHHUtDF9M5fsyUzHT-pKooEPvceaLDqqIGb0s,77
379
- vellum_ai-0.5.0.dist-info/LICENSE,sha256=CcaljEIoOBaU-wItPH4PmM_mDCGpyuUY0Er1BGu5Ti8,1073
380
- vellum_ai-0.5.0.dist-info/METADATA,sha256=NMcjVbiC1qiFwXV12zgJEaDVbp_Ty7SK85_L1BGTc68,3549
381
- vellum_ai-0.5.0.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
382
- vellum_ai-0.5.0.dist-info/RECORD,,
379
+ vellum_ai-0.5.1.dist-info/LICENSE,sha256=CcaljEIoOBaU-wItPH4PmM_mDCGpyuUY0Er1BGu5Ti8,1073
380
+ vellum_ai-0.5.1.dist-info/METADATA,sha256=TtLXbIJmAEV0EkDcuRYX2-SsLPvhP8L6GSE-WpEJqV4,3549
381
+ vellum_ai-0.5.1.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
382
+ vellum_ai-0.5.1.dist-info/RECORD,,