vellum-ai 0.0.20__py3-none-any.whl → 0.0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. vellum/__init__.py +38 -20
  2. vellum/client.py +278 -101
  3. vellum/core/__init__.py +11 -2
  4. vellum/core/client_wrapper.py +27 -0
  5. vellum/core/remove_none_from_dict.py +11 -0
  6. vellum/resources/deployments/client.py +35 -15
  7. vellum/resources/document_indexes/client.py +64 -16
  8. vellum/resources/documents/client.py +110 -35
  9. vellum/resources/model_versions/client.py +67 -25
  10. vellum/resources/registered_prompts/client.py +80 -16
  11. vellum/resources/sandboxes/client.py +90 -25
  12. vellum/resources/test_suites/client.py +90 -25
  13. vellum/types/__init__.py +24 -4
  14. vellum/types/deployment_read.py +2 -6
  15. vellum/types/document.py +3 -7
  16. vellum/types/document_document_to_document_index.py +2 -2
  17. vellum/types/document_index_read.py +3 -7
  18. vellum/types/enriched_normalized_completion.py +5 -9
  19. vellum/types/evaluation_params.py +1 -3
  20. vellum/types/evaluation_params_request.py +1 -3
  21. vellum/types/execute_workflow_stream_error_response.py +24 -0
  22. vellum/types/generate_error_response.py +1 -1
  23. vellum/types/generate_request.py +3 -7
  24. vellum/types/generate_result.py +2 -6
  25. vellum/types/generate_result_data.py +1 -1
  26. vellum/types/generate_result_error.py +1 -1
  27. vellum/types/model_version_build_config.py +2 -6
  28. vellum/types/model_version_compile_prompt_response.py +1 -1
  29. vellum/types/model_version_compiled_prompt.py +2 -4
  30. vellum/types/model_version_exec_config.py +3 -3
  31. vellum/types/model_version_read.py +7 -10
  32. vellum/types/model_version_sandbox_snapshot.py +3 -5
  33. vellum/types/prompt_template_block_properties.py +1 -0
  34. vellum/types/prompt_template_block_properties_request.py +3 -2
  35. vellum/types/prompt_template_block_request.py +1 -1
  36. vellum/types/prompt_template_input_variable.py +1 -1
  37. vellum/types/prompt_template_input_variable_request.py +1 -1
  38. vellum/types/provider_enum.py +5 -0
  39. vellum/types/register_prompt_error_response.py +1 -1
  40. vellum/types/register_prompt_prompt.py +2 -2
  41. vellum/types/register_prompt_prompt_info_request.py +1 -1
  42. vellum/types/register_prompt_response.py +5 -7
  43. vellum/types/registered_prompt_deployment.py +3 -3
  44. vellum/types/registered_prompt_model_version.py +2 -2
  45. vellum/types/registered_prompt_sandbox.py +2 -2
  46. vellum/types/registered_prompt_sandbox_snapshot.py +1 -1
  47. vellum/types/sandbox_scenario.py +2 -2
  48. vellum/types/scenario_input_request.py +1 -1
  49. vellum/types/search_error_response.py +1 -1
  50. vellum/types/search_filters_request.py +1 -1
  51. vellum/types/search_request_options_request.py +4 -6
  52. vellum/types/search_response.py +1 -1
  53. vellum/types/search_result.py +3 -3
  54. vellum/types/search_result_merging_request.py +1 -1
  55. vellum/types/search_weights_request.py +2 -2
  56. vellum/types/slim_document.py +5 -9
  57. vellum/types/submit_completion_actual_request.py +5 -15
  58. vellum/types/terminal_node_chat_history_result.py +1 -1
  59. vellum/types/terminal_node_json_result.py +1 -1
  60. vellum/types/terminal_node_result_output.py +2 -4
  61. vellum/types/terminal_node_string_result.py +1 -1
  62. vellum/types/test_suite_test_case.py +4 -8
  63. vellum/types/upload_document_response.py +1 -1
  64. vellum/types/workflow_event_error.py +26 -0
  65. vellum/types/workflow_execution_event_error_code.py +31 -0
  66. vellum/types/workflow_node_result_data.py +7 -11
  67. vellum/types/workflow_node_result_event.py +4 -3
  68. vellum/types/{workflow_node_result_event_state_enum.py → workflow_node_result_event_state.py} +5 -5
  69. vellum/types/workflow_request_chat_history_input_request.py +1 -3
  70. vellum/types/workflow_request_input_request.py +2 -6
  71. vellum/types/workflow_request_json_input_request.py +1 -3
  72. vellum/types/workflow_request_string_input_request.py +1 -3
  73. vellum/types/workflow_result_event.py +6 -3
  74. vellum/types/workflow_result_event_output_data.py +40 -0
  75. vellum/types/workflow_result_event_output_data_chat_history.py +32 -0
  76. vellum/types/workflow_result_event_output_data_json.py +31 -0
  77. vellum/types/workflow_result_event_output_data_string.py +33 -0
  78. vellum/types/workflow_stream_event.py +1 -4
  79. {vellum_ai-0.0.20.dist-info → vellum_ai-0.0.25.dist-info}/METADATA +1 -1
  80. vellum_ai-0.0.25.dist-info/RECORD +149 -0
  81. vellum/core/remove_none_from_headers.py +0 -11
  82. vellum/types/workflow_result_event_state_enum.py +0 -31
  83. vellum_ai-0.0.20.dist-info/RECORD +0 -142
  84. {vellum_ai-0.0.20.dist-info → vellum_ai-0.0.25.dist-info}/WHEEL +0 -0
vellum/client.py CHANGED
@@ -9,8 +9,8 @@ import httpx
9
9
  import pydantic
10
10
 
11
11
  from .core.api_error import ApiError
12
+ from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
12
13
  from .core.jsonable_encoder import jsonable_encoder
13
- from .core.remove_none_from_headers import remove_none_from_headers
14
14
  from .environment import VellumEnvironment
15
15
  from .errors.bad_request_error import BadRequestError
16
16
  from .errors.forbidden_error import ForbiddenError
@@ -39,16 +39,22 @@ OMIT = typing.cast(typing.Any, ...)
39
39
 
40
40
 
41
41
  class Vellum:
42
- def __init__(self, *, environment: VellumEnvironment = VellumEnvironment.PRODUCTION, api_key: str):
42
+ def __init__(
43
+ self,
44
+ *,
45
+ environment: VellumEnvironment = VellumEnvironment.PRODUCTION,
46
+ api_key: str,
47
+ timeout: typing.Optional[float] = None,
48
+ ):
43
49
  self._environment = environment
44
- self.api_key = api_key
45
- self.deployments = DeploymentsClient(environment=self._environment, api_key=self.api_key)
46
- self.document_indexes = DocumentIndexesClient(environment=self._environment, api_key=self.api_key)
47
- self.documents = DocumentsClient(environment=self._environment, api_key=self.api_key)
48
- self.model_versions = ModelVersionsClient(environment=self._environment, api_key=self.api_key)
49
- self.registered_prompts = RegisteredPromptsClient(environment=self._environment, api_key=self.api_key)
50
- self.sandboxes = SandboxesClient(environment=self._environment, api_key=self.api_key)
51
- self.test_suites = TestSuitesClient(environment=self._environment, api_key=self.api_key)
50
+ self._client_wrapper = SyncClientWrapper(api_key=api_key, httpx_client=httpx.Client(timeout=timeout))
51
+ self.deployments = DeploymentsClient(environment=environment, client_wrapper=self._client_wrapper)
52
+ self.document_indexes = DocumentIndexesClient(environment=environment, client_wrapper=self._client_wrapper)
53
+ self.documents = DocumentsClient(environment=environment, client_wrapper=self._client_wrapper)
54
+ self.model_versions = ModelVersionsClient(environment=environment, client_wrapper=self._client_wrapper)
55
+ self.registered_prompts = RegisteredPromptsClient(environment=environment, client_wrapper=self._client_wrapper)
56
+ self.sandboxes = SandboxesClient(environment=environment, client_wrapper=self._client_wrapper)
57
+ self.test_suites = TestSuitesClient(environment=environment, client_wrapper=self._client_wrapper)
52
58
 
53
59
  def execute_workflow_stream(
54
60
  self,
@@ -59,6 +65,22 @@ class Vellum:
59
65
  inputs: typing.List[WorkflowRequestInputRequest],
60
66
  external_id: typing.Optional[str] = OMIT,
61
67
  ) -> typing.Iterator[WorkflowStreamEvent]:
68
+ """
69
+ <strong style="background-color:#ffc107; color:white; padding:4px; border-radius:4px">Unstable</strong>
70
+
71
+ Executes a deployed Workflow and streams back its results.
72
+
73
+ Parameters:
74
+ - workflow_deployment_id: typing.Optional[str]. The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
75
+
76
+ - workflow_deployment_name: typing.Optional[str]. The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
77
+
78
+ - release_tag: typing.Optional[str]. Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
79
+
80
+ - inputs: typing.List[WorkflowRequestInputRequest].
81
+
82
+ - external_id: typing.Optional[str]. Optionally include a unique identifier for tracking purposes.
83
+ """
62
84
  _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
63
85
  if workflow_deployment_id is not OMIT:
64
86
  _request["workflow_deployment_id"] = workflow_deployment_id
@@ -68,20 +90,25 @@ class Vellum:
68
90
  _request["release_tag"] = release_tag
69
91
  if external_id is not OMIT:
70
92
  _request["external_id"] = external_id
71
- with httpx.stream(
93
+ with self._client_wrapper.httpx_client.stream(
72
94
  "POST",
73
95
  urllib.parse.urljoin(f"{self._environment.predict}/", "v1/execute-workflow-stream"),
74
96
  json=jsonable_encoder(_request),
75
- headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
97
+ headers=self._client_wrapper.get_headers(),
76
98
  timeout=None,
77
99
  ) as _response:
78
100
  if 200 <= _response.status_code < 300:
79
- for _text in _response.iter_text():
101
+ for _text in _response.iter_lines():
80
102
  if len(_text) == 0:
81
103
  continue
82
104
  yield pydantic.parse_obj_as(WorkflowStreamEvent, json.loads(_text)) # type: ignore
83
105
  return
106
+ if _response.status_code == 404:
107
+ raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
108
+ if _response.status_code == 500:
109
+ raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
84
110
  try:
111
+ _response.read()
85
112
  _response_json = _response.json()
86
113
  except JSONDecodeError:
87
114
  raise ApiError(status_code=_response.status_code, body=_response.text)
@@ -95,6 +122,22 @@ class Vellum:
95
122
  requests: typing.List[GenerateRequest],
96
123
  options: typing.Optional[GenerateOptionsRequest] = OMIT,
97
124
  ) -> GenerateResponse:
125
+ """
126
+ <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
127
+
128
+ Generate a completion using a previously defined deployment.
129
+
130
+ **Note:** Uses a base url of `https://predict.vellum.ai`.
131
+
132
+ Parameters:
133
+ - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
134
+
135
+ - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
136
+
137
+ - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
138
+
139
+ - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
140
+ """
98
141
  _request: typing.Dict[str, typing.Any] = {"requests": requests}
99
142
  if deployment_id is not OMIT:
100
143
  _request["deployment_id"] = deployment_id
@@ -102,11 +145,11 @@ class Vellum:
102
145
  _request["deployment_name"] = deployment_name
103
146
  if options is not OMIT:
104
147
  _request["options"] = options
105
- _response = httpx.request(
148
+ _response = self._client_wrapper.httpx_client.request(
106
149
  "POST",
107
150
  urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate"),
108
151
  json=jsonable_encoder(_request),
109
- headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
152
+ headers=self._client_wrapper.get_headers(),
110
153
  timeout=None,
111
154
  )
112
155
  if 200 <= _response.status_code < 300:
@@ -133,6 +176,22 @@ class Vellum:
133
176
  requests: typing.List[GenerateRequest],
134
177
  options: typing.Optional[GenerateOptionsRequest] = OMIT,
135
178
  ) -> typing.Iterator[GenerateStreamResponse]:
179
+ """
180
+ <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
181
+
182
+ Generate a stream of completions using a previously defined deployment.
183
+
184
+ **Note:** Uses a base url of `https://predict.vellum.ai`.
185
+
186
+ Parameters:
187
+ - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
188
+
189
+ - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
190
+
191
+ - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
192
+
193
+ - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
194
+ """
136
195
  _request: typing.Dict[str, typing.Any] = {"requests": requests}
137
196
  if deployment_id is not OMIT:
138
197
  _request["deployment_id"] = deployment_id
@@ -140,15 +199,15 @@ class Vellum:
140
199
  _request["deployment_name"] = deployment_name
141
200
  if options is not OMIT:
142
201
  _request["options"] = options
143
- with httpx.stream(
202
+ with self._client_wrapper.httpx_client.stream(
144
203
  "POST",
145
204
  urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate-stream"),
146
205
  json=jsonable_encoder(_request),
147
- headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
206
+ headers=self._client_wrapper.get_headers(),
148
207
  timeout=None,
149
208
  ) as _response:
150
209
  if 200 <= _response.status_code < 300:
151
- for _text in _response.iter_text():
210
+ for _text in _response.iter_lines():
152
211
  if len(_text) == 0:
153
212
  continue
154
213
  yield pydantic.parse_obj_as(GenerateStreamResponse, json.loads(_text)) # type: ignore
@@ -162,6 +221,7 @@ class Vellum:
162
221
  if _response.status_code == 500:
163
222
  raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
164
223
  try:
224
+ _response.read()
165
225
  _response_json = _response.json()
166
226
  except JSONDecodeError:
167
227
  raise ApiError(status_code=_response.status_code, body=_response.text)
@@ -175,6 +235,22 @@ class Vellum:
175
235
  query: str,
176
236
  options: typing.Optional[SearchRequestOptionsRequest] = OMIT,
177
237
  ) -> SearchResponse:
238
+ """
239
+ <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
240
+
241
+ Perform a search against a document index.
242
+
243
+ **Note:** Uses a base url of `https://predict.vellum.ai`.
244
+
245
+ Parameters:
246
+ - index_id: typing.Optional[str]. The ID of the index to search against. Must provide either this or index_name.
247
+
248
+ - index_name: typing.Optional[str]. The name of the index to search against. Must provide either this or index_id.
249
+
250
+ - query: str. The query to search for. <span style="white-space: nowrap">`non-empty`</span>
251
+
252
+ - options: typing.Optional[SearchRequestOptionsRequest]. Configuration options for the search.
253
+ """
178
254
  _request: typing.Dict[str, typing.Any] = {"query": query}
179
255
  if index_id is not OMIT:
180
256
  _request["index_id"] = index_id
@@ -182,11 +258,11 @@ class Vellum:
182
258
  _request["index_name"] = index_name
183
259
  if options is not OMIT:
184
260
  _request["options"] = options
185
- _response = httpx.request(
261
+ _response = self._client_wrapper.httpx_client.request(
186
262
  "POST",
187
263
  urllib.parse.urljoin(f"{self._environment.predict}/", "v1/search"),
188
264
  json=jsonable_encoder(_request),
189
- headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
265
+ headers=self._client_wrapper.get_headers(),
190
266
  timeout=None,
191
267
  )
192
268
  if 200 <= _response.status_code < 300:
@@ -210,16 +286,30 @@ class Vellum:
210
286
  deployment_name: typing.Optional[str] = OMIT,
211
287
  actuals: typing.List[SubmitCompletionActualRequest],
212
288
  ) -> None:
289
+ """
290
+ <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
291
+
292
+ Used to submit feedback regarding the quality of previously generated completions.
293
+
294
+ **Note:** Uses a base url of `https://predict.vellum.ai`.
295
+
296
+ Parameters:
297
+ - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
298
+
299
+ - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
300
+
301
+ - actuals: typing.List[SubmitCompletionActualRequest]. Feedback regarding the quality of previously generated completions
302
+ """
213
303
  _request: typing.Dict[str, typing.Any] = {"actuals": actuals}
214
304
  if deployment_id is not OMIT:
215
305
  _request["deployment_id"] = deployment_id
216
306
  if deployment_name is not OMIT:
217
307
  _request["deployment_name"] = deployment_name
218
- _response = httpx.request(
308
+ _response = self._client_wrapper.httpx_client.request(
219
309
  "POST",
220
310
  urllib.parse.urljoin(f"{self._environment.predict}/", "v1/submit-completion-actuals"),
221
311
  json=jsonable_encoder(_request),
222
- headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
312
+ headers=self._client_wrapper.get_headers(),
223
313
  timeout=None,
224
314
  )
225
315
  if 200 <= _response.status_code < 300:
@@ -238,16 +328,24 @@ class Vellum:
238
328
 
239
329
 
240
330
  class AsyncVellum:
241
- def __init__(self, *, environment: VellumEnvironment = VellumEnvironment.PRODUCTION, api_key: str):
331
+ def __init__(
332
+ self,
333
+ *,
334
+ environment: VellumEnvironment = VellumEnvironment.PRODUCTION,
335
+ api_key: str,
336
+ timeout: typing.Optional[float] = None,
337
+ ):
242
338
  self._environment = environment
243
- self.api_key = api_key
244
- self.deployments = AsyncDeploymentsClient(environment=self._environment, api_key=self.api_key)
245
- self.document_indexes = AsyncDocumentIndexesClient(environment=self._environment, api_key=self.api_key)
246
- self.documents = AsyncDocumentsClient(environment=self._environment, api_key=self.api_key)
247
- self.model_versions = AsyncModelVersionsClient(environment=self._environment, api_key=self.api_key)
248
- self.registered_prompts = AsyncRegisteredPromptsClient(environment=self._environment, api_key=self.api_key)
249
- self.sandboxes = AsyncSandboxesClient(environment=self._environment, api_key=self.api_key)
250
- self.test_suites = AsyncTestSuitesClient(environment=self._environment, api_key=self.api_key)
339
+ self._client_wrapper = AsyncClientWrapper(api_key=api_key, httpx_client=httpx.AsyncClient(timeout=timeout))
340
+ self.deployments = AsyncDeploymentsClient(environment=environment, client_wrapper=self._client_wrapper)
341
+ self.document_indexes = AsyncDocumentIndexesClient(environment=environment, client_wrapper=self._client_wrapper)
342
+ self.documents = AsyncDocumentsClient(environment=environment, client_wrapper=self._client_wrapper)
343
+ self.model_versions = AsyncModelVersionsClient(environment=environment, client_wrapper=self._client_wrapper)
344
+ self.registered_prompts = AsyncRegisteredPromptsClient(
345
+ environment=environment, client_wrapper=self._client_wrapper
346
+ )
347
+ self.sandboxes = AsyncSandboxesClient(environment=environment, client_wrapper=self._client_wrapper)
348
+ self.test_suites = AsyncTestSuitesClient(environment=environment, client_wrapper=self._client_wrapper)
251
349
 
252
350
  async def execute_workflow_stream(
253
351
  self,
@@ -258,6 +356,22 @@ class AsyncVellum:
258
356
  inputs: typing.List[WorkflowRequestInputRequest],
259
357
  external_id: typing.Optional[str] = OMIT,
260
358
  ) -> typing.AsyncIterator[WorkflowStreamEvent]:
359
+ """
360
+ <strong style="background-color:#ffc107; color:white; padding:4px; border-radius:4px">Unstable</strong>
361
+
362
+ Executes a deployed Workflow and streams back its results.
363
+
364
+ Parameters:
365
+ - workflow_deployment_id: typing.Optional[str]. The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
366
+
367
+ - workflow_deployment_name: typing.Optional[str]. The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
368
+
369
+ - release_tag: typing.Optional[str]. Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
370
+
371
+ - inputs: typing.List[WorkflowRequestInputRequest].
372
+
373
+ - external_id: typing.Optional[str]. Optionally include a unique identifier for tracking purposes.
374
+ """
261
375
  _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
262
376
  if workflow_deployment_id is not OMIT:
263
377
  _request["workflow_deployment_id"] = workflow_deployment_id
@@ -267,25 +381,29 @@ class AsyncVellum:
267
381
  _request["release_tag"] = release_tag
268
382
  if external_id is not OMIT:
269
383
  _request["external_id"] = external_id
270
- async with httpx.AsyncClient() as _client:
271
- async with _client.stream(
272
- "POST",
273
- urllib.parse.urljoin(f"{self._environment.predict}/", "v1/execute-workflow-stream"),
274
- json=jsonable_encoder(_request),
275
- headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
276
- timeout=None,
277
- ) as _response:
278
- if 200 <= _response.status_code < 300:
279
- async for _text in _response.aiter_text():
280
- if len(_text) == 0:
281
- continue
282
- yield pydantic.parse_obj_as(WorkflowStreamEvent, json.loads(_text)) # type: ignore
283
- return
284
- try:
285
- _response_json = _response.json()
286
- except JSONDecodeError:
287
- raise ApiError(status_code=_response.status_code, body=_response.text)
288
- raise ApiError(status_code=_response.status_code, body=_response_json)
384
+ async with self._client_wrapper.httpx_client.stream(
385
+ "POST",
386
+ urllib.parse.urljoin(f"{self._environment.predict}/", "v1/execute-workflow-stream"),
387
+ json=jsonable_encoder(_request),
388
+ headers=self._client_wrapper.get_headers(),
389
+ timeout=None,
390
+ ) as _response:
391
+ if 200 <= _response.status_code < 300:
392
+ async for _text in _response.aiter_lines():
393
+ if len(_text) == 0:
394
+ continue
395
+ yield pydantic.parse_obj_as(WorkflowStreamEvent, json.loads(_text)) # type: ignore
396
+ return
397
+ if _response.status_code == 404:
398
+ raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
399
+ if _response.status_code == 500:
400
+ raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
401
+ try:
402
+ await _response.aread()
403
+ _response_json = _response.json()
404
+ except JSONDecodeError:
405
+ raise ApiError(status_code=_response.status_code, body=_response.text)
406
+ raise ApiError(status_code=_response.status_code, body=_response_json)
289
407
 
290
408
  async def generate(
291
409
  self,
@@ -295,6 +413,22 @@ class AsyncVellum:
295
413
  requests: typing.List[GenerateRequest],
296
414
  options: typing.Optional[GenerateOptionsRequest] = OMIT,
297
415
  ) -> GenerateResponse:
416
+ """
417
+ <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
418
+
419
+ Generate a completion using a previously defined deployment.
420
+
421
+ **Note:** Uses a base url of `https://predict.vellum.ai`.
422
+
423
+ Parameters:
424
+ - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
425
+
426
+ - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
427
+
428
+ - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
429
+
430
+ - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
431
+ """
298
432
  _request: typing.Dict[str, typing.Any] = {"requests": requests}
299
433
  if deployment_id is not OMIT:
300
434
  _request["deployment_id"] = deployment_id
@@ -302,14 +436,13 @@ class AsyncVellum:
302
436
  _request["deployment_name"] = deployment_name
303
437
  if options is not OMIT:
304
438
  _request["options"] = options
305
- async with httpx.AsyncClient() as _client:
306
- _response = await _client.request(
307
- "POST",
308
- urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate"),
309
- json=jsonable_encoder(_request),
310
- headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
311
- timeout=None,
312
- )
439
+ _response = await self._client_wrapper.httpx_client.request(
440
+ "POST",
441
+ urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate"),
442
+ json=jsonable_encoder(_request),
443
+ headers=self._client_wrapper.get_headers(),
444
+ timeout=None,
445
+ )
313
446
  if 200 <= _response.status_code < 300:
314
447
  return pydantic.parse_obj_as(GenerateResponse, _response.json()) # type: ignore
315
448
  if _response.status_code == 400:
@@ -334,6 +467,22 @@ class AsyncVellum:
334
467
  requests: typing.List[GenerateRequest],
335
468
  options: typing.Optional[GenerateOptionsRequest] = OMIT,
336
469
  ) -> typing.AsyncIterator[GenerateStreamResponse]:
470
+ """
471
+ <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
472
+
473
+ Generate a stream of completions using a previously defined deployment.
474
+
475
+ **Note:** Uses a base url of `https://predict.vellum.ai`.
476
+
477
+ Parameters:
478
+ - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
479
+
480
+ - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
481
+
482
+ - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
483
+
484
+ - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
485
+ """
337
486
  _request: typing.Dict[str, typing.Any] = {"requests": requests}
338
487
  if deployment_id is not OMIT:
339
488
  _request["deployment_id"] = deployment_id
@@ -341,33 +490,33 @@ class AsyncVellum:
341
490
  _request["deployment_name"] = deployment_name
342
491
  if options is not OMIT:
343
492
  _request["options"] = options
344
- async with httpx.AsyncClient() as _client:
345
- async with _client.stream(
346
- "POST",
347
- urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate-stream"),
348
- json=jsonable_encoder(_request),
349
- headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
350
- timeout=None,
351
- ) as _response:
352
- if 200 <= _response.status_code < 300:
353
- async for _text in _response.aiter_text():
354
- if len(_text) == 0:
355
- continue
356
- yield pydantic.parse_obj_as(GenerateStreamResponse, json.loads(_text)) # type: ignore
357
- return
358
- if _response.status_code == 400:
359
- raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
360
- if _response.status_code == 403:
361
- raise ForbiddenError(pydantic.parse_obj_as(GenerateErrorResponse, _response.json())) # type: ignore
362
- if _response.status_code == 404:
363
- raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
364
- if _response.status_code == 500:
365
- raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
366
- try:
367
- _response_json = _response.json()
368
- except JSONDecodeError:
369
- raise ApiError(status_code=_response.status_code, body=_response.text)
370
- raise ApiError(status_code=_response.status_code, body=_response_json)
493
+ async with self._client_wrapper.httpx_client.stream(
494
+ "POST",
495
+ urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate-stream"),
496
+ json=jsonable_encoder(_request),
497
+ headers=self._client_wrapper.get_headers(),
498
+ timeout=None,
499
+ ) as _response:
500
+ if 200 <= _response.status_code < 300:
501
+ async for _text in _response.aiter_lines():
502
+ if len(_text) == 0:
503
+ continue
504
+ yield pydantic.parse_obj_as(GenerateStreamResponse, json.loads(_text)) # type: ignore
505
+ return
506
+ if _response.status_code == 400:
507
+ raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
508
+ if _response.status_code == 403:
509
+ raise ForbiddenError(pydantic.parse_obj_as(GenerateErrorResponse, _response.json())) # type: ignore
510
+ if _response.status_code == 404:
511
+ raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
512
+ if _response.status_code == 500:
513
+ raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
514
+ try:
515
+ await _response.aread()
516
+ _response_json = _response.json()
517
+ except JSONDecodeError:
518
+ raise ApiError(status_code=_response.status_code, body=_response.text)
519
+ raise ApiError(status_code=_response.status_code, body=_response_json)
371
520
 
372
521
  async def search(
373
522
  self,
@@ -377,6 +526,22 @@ class AsyncVellum:
377
526
  query: str,
378
527
  options: typing.Optional[SearchRequestOptionsRequest] = OMIT,
379
528
  ) -> SearchResponse:
529
+ """
530
+ <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
531
+
532
+ Perform a search against a document index.
533
+
534
+ **Note:** Uses a base url of `https://predict.vellum.ai`.
535
+
536
+ Parameters:
537
+ - index_id: typing.Optional[str]. The ID of the index to search against. Must provide either this or index_name.
538
+
539
+ - index_name: typing.Optional[str]. The name of the index to search against. Must provide either this or index_id.
540
+
541
+ - query: str. The query to search for. <span style="white-space: nowrap">`non-empty`</span>
542
+
543
+ - options: typing.Optional[SearchRequestOptionsRequest]. Configuration options for the search.
544
+ """
380
545
  _request: typing.Dict[str, typing.Any] = {"query": query}
381
546
  if index_id is not OMIT:
382
547
  _request["index_id"] = index_id
@@ -384,14 +549,13 @@ class AsyncVellum:
384
549
  _request["index_name"] = index_name
385
550
  if options is not OMIT:
386
551
  _request["options"] = options
387
- async with httpx.AsyncClient() as _client:
388
- _response = await _client.request(
389
- "POST",
390
- urllib.parse.urljoin(f"{self._environment.predict}/", "v1/search"),
391
- json=jsonable_encoder(_request),
392
- headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
393
- timeout=None,
394
- )
552
+ _response = await self._client_wrapper.httpx_client.request(
553
+ "POST",
554
+ urllib.parse.urljoin(f"{self._environment.predict}/", "v1/search"),
555
+ json=jsonable_encoder(_request),
556
+ headers=self._client_wrapper.get_headers(),
557
+ timeout=None,
558
+ )
395
559
  if 200 <= _response.status_code < 300:
396
560
  return pydantic.parse_obj_as(SearchResponse, _response.json()) # type: ignore
397
561
  if _response.status_code == 400:
@@ -413,19 +577,32 @@ class AsyncVellum:
413
577
  deployment_name: typing.Optional[str] = OMIT,
414
578
  actuals: typing.List[SubmitCompletionActualRequest],
415
579
  ) -> None:
580
+ """
581
+ <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
582
+
583
+ Used to submit feedback regarding the quality of previously generated completions.
584
+
585
+ **Note:** Uses a base url of `https://predict.vellum.ai`.
586
+
587
+ Parameters:
588
+ - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
589
+
590
+ - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
591
+
592
+ - actuals: typing.List[SubmitCompletionActualRequest]. Feedback regarding the quality of previously generated completions
593
+ """
416
594
  _request: typing.Dict[str, typing.Any] = {"actuals": actuals}
417
595
  if deployment_id is not OMIT:
418
596
  _request["deployment_id"] = deployment_id
419
597
  if deployment_name is not OMIT:
420
598
  _request["deployment_name"] = deployment_name
421
- async with httpx.AsyncClient() as _client:
422
- _response = await _client.request(
423
- "POST",
424
- urllib.parse.urljoin(f"{self._environment.predict}/", "v1/submit-completion-actuals"),
425
- json=jsonable_encoder(_request),
426
- headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
427
- timeout=None,
428
- )
599
+ _response = await self._client_wrapper.httpx_client.request(
600
+ "POST",
601
+ urllib.parse.urljoin(f"{self._environment.predict}/", "v1/submit-completion-actuals"),
602
+ json=jsonable_encoder(_request),
603
+ headers=self._client_wrapper.get_headers(),
604
+ timeout=None,
605
+ )
429
606
  if 200 <= _response.status_code < 300:
430
607
  return
431
608
  if _response.status_code == 400:
vellum/core/__init__.py CHANGED
@@ -1,8 +1,17 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  from .api_error import ApiError
4
+ from .client_wrapper import AsyncClientWrapper, BaseClientWrapper, SyncClientWrapper
4
5
  from .datetime_utils import serialize_datetime
5
6
  from .jsonable_encoder import jsonable_encoder
6
- from .remove_none_from_headers import remove_none_from_headers
7
+ from .remove_none_from_dict import remove_none_from_dict
7
8
 
8
- __all__ = ["ApiError", "jsonable_encoder", "remove_none_from_headers", "serialize_datetime"]
9
+ __all__ = [
10
+ "ApiError",
11
+ "AsyncClientWrapper",
12
+ "BaseClientWrapper",
13
+ "SyncClientWrapper",
14
+ "jsonable_encoder",
15
+ "remove_none_from_dict",
16
+ "serialize_datetime",
17
+ ]
@@ -0,0 +1,27 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import httpx
6
+
7
+
8
+ class BaseClientWrapper:
9
+ def __init__(self, *, api_key: str):
10
+ self.api_key = api_key
11
+
12
+ def get_headers(self) -> typing.Dict[str, str]:
13
+ headers: typing.Dict[str, str] = {}
14
+ headers["X_API_KEY"] = self.api_key
15
+ return headers
16
+
17
+
18
+ class SyncClientWrapper(BaseClientWrapper):
19
+ def __init__(self, *, api_key: str, httpx_client: httpx.Client):
20
+ super().__init__(api_key=api_key)
21
+ self.httpx_client = httpx_client
22
+
23
+
24
+ class AsyncClientWrapper(BaseClientWrapper):
25
+ def __init__(self, *, api_key: str, httpx_client: httpx.AsyncClient):
26
+ super().__init__(api_key=api_key)
27
+ self.httpx_client = httpx_client
@@ -0,0 +1,11 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from typing import Any, Dict, Optional
4
+
5
+
6
+ def remove_none_from_dict(original: Dict[str, Optional[Any]]) -> Dict[str, Any]:
7
+ new: Dict[str, Any] = {}
8
+ for key, value in original.items():
9
+ if value is not None:
10
+ new[key] = value
11
+ return new