vellum-ai 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -18,7 +18,7 @@ class BaseClientWrapper:
18
18
  headers: typing.Dict[str, str] = {
19
19
  "X-Fern-Language": "Python",
20
20
  "X-Fern-SDK-Name": "vellum-ai",
21
- "X-Fern-SDK-Version": "0.5.0",
21
+ "X-Fern-SDK-Version": "0.5.1",
22
22
  }
23
23
  headers["X_API_KEY"] = self.api_key
24
24
  return headers
@@ -2,8 +2,10 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import time
5
- from typing import Callable, Generator, List
5
+ from functools import cached_property
6
+ from typing import Callable, Generator, List, Any
6
7
 
8
+ from vellum import TestSuiteRunRead, TestSuiteRunMetricOutput_Number
7
9
  from vellum.client import Vellum
8
10
  from vellum.lib.test_suites.constants import (
9
11
  DEFAULT_MAX_POLLING_DURATION_MS,
@@ -11,6 +13,7 @@ from vellum.lib.test_suites.constants import (
11
13
  )
12
14
  from vellum.lib.test_suites.exceptions import TestSuiteRunResultsException
13
15
  from vellum.lib.utils.env import get_api_key
16
+ from vellum.lib.utils.paginator import PaginatedResults, get_all_results
14
17
  from vellum.types import (
15
18
  ExternalTestCaseExecutionRequest,
16
19
  NamedTestCaseVariableValueRequest,
@@ -22,9 +25,6 @@ from vellum.types import (
22
25
  TestSuiteRunState,
23
26
  )
24
27
 
25
- from vellum.lib.utils.paginator import PaginatedResults, get_all_results
26
-
27
-
28
28
  logger = logging.getLogger(__name__)
29
29
 
30
30
 
@@ -116,44 +116,141 @@ class VellumTestSuiteRunResults:
116
116
 
117
117
  def __init__(
118
118
  self,
119
- test_suite_run_id: str,
119
+ test_suite_run: TestSuiteRunRead,
120
120
  *,
121
121
  client: Vellum | None = None,
122
122
  polling_interval: int = DEFAULT_POLLING_INTERVAL_MS,
123
123
  max_polling_duration: int = DEFAULT_MAX_POLLING_DURATION_MS,
124
124
  ) -> None:
125
- self._test_suite_run_id = test_suite_run_id
125
+ self._test_suite_run = test_suite_run
126
126
  self._client = client or Vellum(
127
127
  api_key=get_api_key(),
128
128
  )
129
- self._state = "QUEUED"
130
129
  self._executions: Generator[VellumTestSuiteRunExecution, None, None] | None = (
131
130
  None
132
131
  )
133
132
  self._polling_interval = polling_interval
134
133
  self._max_polling_duration = max_polling_duration
135
134
 
135
+ @property
136
+ def state(self) -> TestSuiteRunState:
137
+ return self._test_suite_run.state
138
+
139
+ @cached_property
140
+ def all_executions(self) -> list[VellumTestSuiteRunExecution]:
141
+ return list(self._get_test_suite_run_executions())
142
+
136
143
  def get_metric_outputs(
137
144
  self, metric_identifier: str | None = None, output_identifier: str | None = None
138
- ) -> Generator[TestSuiteRunMetricOutput, None, None]:
145
+ ) -> List[TestSuiteRunMetricOutput]:
139
146
  """Retrieve a metric's output across all executions by providing the info needed to uniquely identify it."""
140
147
 
141
- executions = self._get_test_suite_run_executions()
142
-
143
- for execution in executions:
144
- yield execution.get_metric_output(
148
+ return [
149
+ execution.get_metric_output(
145
150
  metric_identifier=metric_identifier, output_identifier=output_identifier
146
151
  )
152
+ for execution in self.all_executions
153
+ ]
154
+
155
+ def get_count_metric_outputs(
156
+ self,
157
+ metric_identifier: str | None = None,
158
+ output_identifier: str | None = None,
159
+ *,
160
+ predicate: Callable[[TestSuiteRunMetricOutput], bool] | None = None,
161
+ ) -> int:
162
+ """Returns the count of all metric outputs that match the given criteria."""
163
+
164
+ metric_outputs = self.get_metric_outputs(
165
+ metric_identifier=metric_identifier, output_identifier=output_identifier
166
+ )
167
+
168
+ if predicate is None:
169
+ return len(metric_outputs)
170
+
171
+ return len([output for output in metric_outputs if predicate(output)])
172
+
173
+ def get_numeric_metric_output_values(
174
+ self,
175
+ metric_identifier: str | None = None,
176
+ output_identifier: str | None = None,
177
+ ) -> List[float]:
178
+ """Returns the values of a numeric metric output that match the given criteria."""
179
+
180
+ metric_outputs: list[TestSuiteRunMetricOutput_Number] = []
181
+
182
+ for output in self.get_metric_outputs(
183
+ metric_identifier=metric_identifier, output_identifier=output_identifier
184
+ ):
185
+ if output.type != "NUMBER":
186
+ raise TestSuiteRunResultsException(
187
+ f"Expected a numeric metric output, but got a {output.type} output instead."
188
+ )
189
+
190
+ metric_outputs.append(output)
191
+
192
+ return [output.value for output in metric_outputs]
193
+
194
+ def get_mean_metric_output(
195
+ self, metric_identifier: str | None = None, output_identifier: str | None = None
196
+ ) -> float:
197
+ """Returns the mean of all metric outputs that match the given criteria."""
198
+ output_values = self.get_numeric_metric_output_values(
199
+ metric_identifier=metric_identifier, output_identifier=output_identifier
200
+ )
201
+ return sum(output_values) / len(output_values)
202
+
203
+ def get_min_metric_output(
204
+ self, metric_identifier: str | None = None, output_identifier: str | None = None
205
+ ) -> float:
206
+ """Returns the min value across= all metric outputs that match the given criteria."""
207
+ output_values = self.get_numeric_metric_output_values(
208
+ metric_identifier=metric_identifier, output_identifier=output_identifier
209
+ )
210
+ return min(output_values)
211
+
212
+ def get_max_metric_output(
213
+ self, metric_identifier: str | None = None, output_identifier: str | None = None
214
+ ) -> float:
215
+ """Returns the max value across all metric outputs that match the given criteria."""
216
+ output_values = self.get_numeric_metric_output_values(
217
+ metric_identifier=metric_identifier, output_identifier=output_identifier
218
+ )
219
+ return max(output_values)
220
+
221
+ def wait_until_complete(self) -> None:
222
+ """Wait until the Test Suite Run is no longer in a QUEUED or RUNNING state."""
223
+
224
+ start_time = time.time_ns()
225
+ while True:
226
+ logger.debug("Polling for latest test suite run state...")
227
+ self._refresh_test_suite_run()
228
+ if self.state not in {"QUEUED", "RUNNING"}:
229
+ break
230
+
231
+ current_time = time.time_ns()
232
+ if ((current_time - start_time) / 1e6) > self._max_polling_duration:
233
+ raise TestSuiteRunResultsException(
234
+ "Test suite run timed out polling for executions"
235
+ )
236
+
237
+ time.sleep(self._polling_interval / 1000.0)
238
+
239
+ if self.state == "FAILED":
240
+ raise TestSuiteRunResultsException("Test suite run failed")
147
241
 
148
- def _refresh_test_suite_run_state(self):
149
- test_suite_run = self._client.test_suite_runs.retrieve(self._test_suite_run_id)
150
- self._state = test_suite_run.state
242
+ if self.state == "CANCELLED":
243
+ raise TestSuiteRunResultsException("Test suite run was cancelled")
244
+
245
+ def _refresh_test_suite_run(self):
246
+ test_suite_run = self._client.test_suite_runs.retrieve(self._test_suite_run.id)
247
+ self._test_suite_run = test_suite_run
151
248
 
152
249
  def _list_paginated_executions(
153
250
  self, offset: int | None, limit: int | None
154
251
  ) -> PaginatedResults[TestSuiteRunExecution]:
155
252
  response = self._client.test_suite_runs.list_executions(
156
- self._test_suite_run_id,
253
+ self._test_suite_run.id,
157
254
  offset=offset,
158
255
  limit=limit,
159
256
  expand=[
@@ -175,26 +272,7 @@ class VellumTestSuiteRunResults:
175
272
  if self._executions is not None:
176
273
  return self._executions
177
274
 
178
- start_time = time.time_ns()
179
- while True:
180
- logger.debug("Polling for latest test suite run state...")
181
- self._refresh_test_suite_run_state()
182
- if self._state not in {"QUEUED", "RUNNING"}:
183
- break
184
-
185
- current_time = time.time_ns()
186
- if ((current_time - start_time) / 1e6) > self._max_polling_duration:
187
- raise TestSuiteRunResultsException(
188
- "Test suite run timed out polling for executions"
189
- )
190
-
191
- time.sleep(self._polling_interval / 1000.0)
192
-
193
- if self._state == "FAILED":
194
- raise TestSuiteRunResultsException("Test suite run failed")
195
-
196
- if self._state == "CANCELLED":
197
- raise TestSuiteRunResultsException("Test suite run was cancelled")
275
+ self.wait_until_complete()
198
276
 
199
277
  raw_api_executions = get_all_results(self._list_paginated_executions)
200
278
  self._executions = self._wrap_api_executions(raw_api_executions)
@@ -250,4 +328,4 @@ class VellumTestSuite:
250
328
  ),
251
329
  ),
252
330
  )
253
- return VellumTestSuiteRunResults(test_suite_run.id, client=self.client)
331
+ return VellumTestSuiteRunResults(test_suite_run, client=self.client)
vellum/lib/utils/env.py CHANGED
@@ -6,6 +6,8 @@ from .exceptions import VellumClientException
6
6
  def get_api_key() -> str:
7
7
  api_key = os.environ.get("VELLUM_API_KEY")
8
8
  if api_key is None:
9
- raise VellumClientException("`VELLUM_API_KEY` environment variable id required to be set.")
10
-
9
+ raise VellumClientException(
10
+ "`VELLUM_API_KEY` environment variable is required to be set."
11
+ )
12
+
11
13
  return api_key
@@ -1,2 +1,2 @@
1
1
  class VellumClientException(Exception):
2
- pass
2
+ pass
@@ -12,7 +12,8 @@ class PaginatedResults(Generic[Result]):
12
12
 
13
13
 
14
14
  def get_all_results(
15
- paginated_api: Callable[[int, Union[int, None]], PaginatedResults[Result]], page_size: Union[int, None] = None
15
+ paginated_api: Callable[[int, Union[int, None]], PaginatedResults[Result]],
16
+ page_size: Union[int, None] = None,
16
17
  ) -> Generator[Result, None, None]:
17
18
  offset = 0
18
19
  count = 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vellum-ai
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary:
5
5
  Requires-Python: >=3.8,<4.0
6
6
  Classifier: Programming Language :: Python :: 3
@@ -2,7 +2,7 @@ vellum/__init__.py,sha256=RpP5FLUDUph2qBCQ-TlpodjRFc1PDGDZCk-JBGj0UBM,35591
2
2
  vellum/client.py,sha256=7JaU104s0u_WhB8QAqIZcMv9IyvU-a0nKVZhTPKiEpw,97089
3
3
  vellum/core/__init__.py,sha256=1pNSKkwyQvMl_F0wohBqmoQAITptg3zlvCwsoSSzy7c,853
4
4
  vellum/core/api_error.py,sha256=RE8LELok2QCjABadECTvtDp7qejA1VmINCh6TbqPwSE,426
5
- vellum/core/client_wrapper.py,sha256=YUdARR7B9QqhNV9JOZYVoNyzoORUHfpBag8gvwu-BfA,1697
5
+ vellum/core/client_wrapper.py,sha256=PSXJj42WsaIoL7ae07LoF7941fD683wvW7fzz7T2V1o,1697
6
6
  vellum/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
7
7
  vellum/core/file.py,sha256=sy1RUGZ3aJYuw998bZytxxo6QdgKmlnlgBaMvwEKCGg,1480
8
8
  vellum/core/http_client.py,sha256=5ok6hqgZDJhg57EHvMnr0BBaHdG50QxFPKaCZ9aVWTc,5059
@@ -20,11 +20,11 @@ vellum/lib/__init__.py,sha256=KTSY0V59WEOr5uNyAei1dDfaAatyXw_Aca5kNjo5mY0,79
20
20
  vellum/lib/test_suites/__init__.py,sha256=hNsLoHSykqXDJP-MwFvu2lExImxo9KEyEJjt_fdAzpE,77
21
21
  vellum/lib/test_suites/constants.py,sha256=Vteml4_csZsMgo_q3-71E3JRCAoN6308TXLu5nfLhmU,116
22
22
  vellum/lib/test_suites/exceptions.py,sha256=6Xacoyv43fJvVf6Dt6Io5a-f9vF12Tx51jzsQRNSqhY,56
23
- vellum/lib/test_suites/resources.py,sha256=rjgPFktL37zNyB0WWErLqjDR1OzmBfjf6Ry6pb97r2A,9197
23
+ vellum/lib/test_suites/resources.py,sha256=hokRS0_wT6IdA_6HkWbrh7iFzFxCtiy8JXbUiGtlwRk,12323
24
24
  vellum/lib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- vellum/lib/utils/env.py,sha256=__k8PagSUxW09x2ZMmwFrM_mwy-ky68aqml-e6jaYys,280
26
- vellum/lib/utils/exceptions.py,sha256=h9s9PnHqrTX5ohmZyCXovpWoTB7f3tAd5z_5nP0drCM,48
27
- vellum/lib/utils/paginator.py,sha256=mQwHZEkZHmCNBCctp8zkyEXmcfZtuU5gOL5gC3vNgUA,693
25
+ vellum/lib/utils/env.py,sha256=ySl859lYBfls8hmlaU_RFdquHa_A_7SzaC6KEdFqh1Y,298
26
+ vellum/lib/utils/exceptions.py,sha256=dXMAkzqbHV_AP5FjjbegPlfUE0zQDlpA3qOsoOJUxfg,49
27
+ vellum/lib/utils/paginator.py,sha256=yDvgehocYBDclLt5SewZH4hCIyq0yLHdBzkyPCoYPjs,698
28
28
  vellum/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  vellum/resources/__init__.py,sha256=pqoVsVVIrUG-v6yt4AMtc7F5O-K7wKlvqhQeht9-Ax4,730
30
30
  vellum/resources/deployments/__init__.py,sha256=AE0TcFwLrLBljM0ZDX-pPw4Kqt-1f5JDpIok2HS80QI,157
@@ -376,7 +376,7 @@ vellum/types/workflow_result_event_output_data_search_results.py,sha256=gazaUrC5
376
376
  vellum/types/workflow_result_event_output_data_string.py,sha256=aVWIIGbLj4TJJhTTj6WzhbYXQkcZatKuhhNy8UYwXbw,1482
377
377
  vellum/types/workflow_stream_event.py,sha256=KA6Bkk_XA6AIPWR-1vKnwF1A8l_Bm5y0arQCWWWRpsk,911
378
378
  vellum/version.py,sha256=neLt8HBHHUtDF9M5fsyUzHT-pKooEPvceaLDqqIGb0s,77
379
- vellum_ai-0.5.0.dist-info/LICENSE,sha256=CcaljEIoOBaU-wItPH4PmM_mDCGpyuUY0Er1BGu5Ti8,1073
380
- vellum_ai-0.5.0.dist-info/METADATA,sha256=NMcjVbiC1qiFwXV12zgJEaDVbp_Ty7SK85_L1BGTc68,3549
381
- vellum_ai-0.5.0.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
382
- vellum_ai-0.5.0.dist-info/RECORD,,
379
+ vellum_ai-0.5.1.dist-info/LICENSE,sha256=CcaljEIoOBaU-wItPH4PmM_mDCGpyuUY0Er1BGu5Ti8,1073
380
+ vellum_ai-0.5.1.dist-info/METADATA,sha256=TtLXbIJmAEV0EkDcuRYX2-SsLPvhP8L6GSE-WpEJqV4,3549
381
+ vellum_ai-0.5.1.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
382
+ vellum_ai-0.5.1.dist-info/RECORD,,