PyPI - vellum-ai - Versions diffs - 0.0.21__py3-none-any.whl → 0.0.22__py3-none-any.whl - Mend

vellum-ai 0.0.21py3-none-any.whl → 0.0.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

vellum/__init__.py +19 -19
vellum/client.py +270 -101
vellum/core/__init__.py +11 -2
vellum/core/client_wrapper.py +27 -0
vellum/core/remove_none_from_dict.py +11 -0
vellum/resources/deployments/client.py +35 -15
vellum/resources/document_indexes/client.py +64 -16
vellum/resources/documents/client.py +110 -35
vellum/resources/model_versions/client.py +67 -25
vellum/resources/registered_prompts/client.py +80 -16
vellum/resources/sandboxes/client.py +90 -25
vellum/resources/test_suites/client.py +90 -25
vellum/types/deployment_read.py +2 -6
vellum/types/document.py +3 -7
vellum/types/document_document_to_document_index.py +2 -2
vellum/types/document_index_read.py +3 -7
vellum/types/enriched_normalized_completion.py +5 -9
vellum/types/evaluation_params.py +1 -3
vellum/types/evaluation_params_request.py +1 -3
vellum/types/generate_error_response.py +1 -1
vellum/types/generate_request.py +3 -7
vellum/types/generate_result.py +2 -6
vellum/types/generate_result_data.py +1 -1
vellum/types/generate_result_error.py +1 -1
vellum/types/model_version_build_config.py +2 -6
vellum/types/model_version_compile_prompt_response.py +1 -1
vellum/types/model_version_compiled_prompt.py +2 -4
vellum/types/model_version_exec_config.py +3 -3
vellum/types/model_version_read.py +6 -10
vellum/types/model_version_sandbox_snapshot.py +3 -5
vellum/types/prompt_template_block_properties_request.py +2 -2
vellum/types/prompt_template_block_request.py +1 -1
vellum/types/prompt_template_input_variable.py +1 -1
vellum/types/prompt_template_input_variable_request.py +1 -1
vellum/types/register_prompt_error_response.py +1 -1
vellum/types/register_prompt_prompt.py +2 -2
vellum/types/register_prompt_prompt_info_request.py +1 -1
vellum/types/register_prompt_response.py +5 -7
vellum/types/registered_prompt_deployment.py +3 -3
vellum/types/registered_prompt_model_version.py +2 -2
vellum/types/registered_prompt_sandbox.py +2 -2
vellum/types/registered_prompt_sandbox_snapshot.py +1 -1
vellum/types/sandbox_scenario.py +2 -2
vellum/types/scenario_input_request.py +1 -1
vellum/types/search_error_response.py +1 -1
vellum/types/search_filters_request.py +1 -1
vellum/types/search_request_options_request.py +4 -6
vellum/types/search_response.py +1 -1
vellum/types/search_result.py +3 -3
vellum/types/search_result_merging_request.py +1 -1
vellum/types/search_weights_request.py +2 -2
vellum/types/slim_document.py +5 -9
vellum/types/submit_completion_actual_request.py +5 -15
vellum/types/terminal_node_chat_history_result.py +1 -1
vellum/types/terminal_node_json_result.py +1 -1
vellum/types/terminal_node_result_output.py +2 -4
vellum/types/terminal_node_string_result.py +1 -1
vellum/types/test_suite_test_case.py +4 -8
vellum/types/upload_document_response.py +1 -1
vellum/types/workflow_node_result_data.py +7 -11
vellum/types/workflow_request_chat_history_input_request.py +1 -3
vellum/types/workflow_request_input_request.py +2 -6
vellum/types/workflow_request_json_input_request.py +1 -3
vellum/types/workflow_request_string_input_request.py +1 -3
vellum/types/workflow_result_event_output_data.py +2 -8
vellum/types/workflow_result_event_output_data_chat_history.py +3 -0
vellum/types/workflow_result_event_output_data_json.py +3 -0
vellum/types/workflow_result_event_output_data_string.py +6 -1
vellum/types/workflow_stream_event.py +1 -4
{vellum_ai-0.0.21.dist-info → vellum_ai-0.0.22.dist-info}/METADATA +1 -1
{vellum_ai-0.0.21.dist-info → vellum_ai-0.0.22.dist-info}/RECORD +72 -71
vellum/core/remove_none_from_headers.py +0 -11
{vellum_ai-0.0.21.dist-info → vellum_ai-0.0.22.dist-info}/WHEEL +0 -0

vellum/__init__.py CHANGED Viewed

@@ -1,16 +1,5 @@
 # This file was auto-generated by Fern from our API Definition.
-from .environment import VellumEnvironment
-from .errors import BadRequestError, ConflictError, ForbiddenError, InternalServerError, NotFoundError
-from .resources import (
-    deployments,
-    document_indexes,
-    documents,
-    model_versions,
-    registered_prompts,
-    sandboxes,
-    test_suites,
-)
 from .types import (
     BlockTypeEnum,
     ChatMessage,
@@ -46,8 +35,8 @@ from .types import (
     LogprobsEnum,
     ModelTypeEnum,
     ModelVersionBuildConfig,
-    ModelVersionCompiledPrompt,
     ModelVersionCompilePromptResponse,
+    ModelVersionCompiledPrompt,
     ModelVersionExecConfig,
     ModelVersionExecConfigParameters,
     ModelVersionRead,
@@ -69,15 +58,15 @@ from .types import (
     PromptTemplateInputVariable,
     PromptTemplateInputVariableRequest,
     ProviderEnum,
-    RegisteredPromptDeployment,
-    RegisteredPromptModelVersion,
-    RegisteredPromptSandbox,
-    RegisteredPromptSandboxSnapshot,
     RegisterPromptErrorResponse,
     RegisterPromptModelParametersRequest,
     RegisterPromptPrompt,
     RegisterPromptPromptInfoRequest,
     RegisterPromptResponse,
+    RegisteredPromptDeployment,
+    RegisteredPromptModelVersion,
+    RegisteredPromptSandbox,
+    RegisteredPromptSandboxSnapshot,
     SandboxMetricInputParams,
     SandboxMetricInputParamsRequest,
     SandboxNodeResult,
@@ -133,16 +122,27 @@ from .types import (
     WorkflowRequestStringInputRequest,
     WorkflowResultEvent,
     WorkflowResultEventOutputData,
-    WorkflowResultEventOutputData_ChatHistory,
-    WorkflowResultEventOutputData_Json,
-    WorkflowResultEventOutputData_String,
     WorkflowResultEventOutputDataChatHistory,
     WorkflowResultEventOutputDataJson,
     WorkflowResultEventOutputDataString,
+    WorkflowResultEventOutputData_ChatHistory,
+    WorkflowResultEventOutputData_Json,
+    WorkflowResultEventOutputData_String,
     WorkflowStreamEvent,
     WorkflowStreamEvent_Node,
     WorkflowStreamEvent_Workflow,
 )
+from .errors import BadRequestError, ConflictError, ForbiddenError, InternalServerError, NotFoundError
+from .resources import (
+    deployments,
+    document_indexes,
+    documents,
+    model_versions,
+    registered_prompts,
+    sandboxes,
+    test_suites,
+)
+from .environment import VellumEnvironment
 __all__ = [
     "BadRequestError",

vellum/client.py CHANGED Viewed

@@ -9,8 +9,8 @@ import httpx
 import pydantic
 from .core.api_error import ApiError
+from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from .core.jsonable_encoder import jsonable_encoder
-from .core.remove_none_from_headers import remove_none_from_headers
 from .environment import VellumEnvironment
 from .errors.bad_request_error import BadRequestError
 from .errors.forbidden_error import ForbiddenError
@@ -39,16 +39,22 @@ OMIT = typing.cast(typing.Any, ...)
 class Vellum:
-    def __init__(self, *, environment: VellumEnvironment = VellumEnvironment.PRODUCTION, api_key: str):
+    def __init__(
+        self,
+        *,
+        environment: VellumEnvironment = VellumEnvironment.PRODUCTION,
+        api_key: str,
+        timeout: typing.Optional[float] = None,
+    ):
         self._environment = environment
-        self.api_key = api_key
-        self.deployments = DeploymentsClient(environment=self._environment, api_key=self.api_key)
-        self.document_indexes = DocumentIndexesClient(environment=self._environment, api_key=self.api_key)
-        self.documents = DocumentsClient(environment=self._environment, api_key=self.api_key)
-        self.model_versions = ModelVersionsClient(environment=self._environment, api_key=self.api_key)
-        self.registered_prompts = RegisteredPromptsClient(environment=self._environment, api_key=self.api_key)
-        self.sandboxes = SandboxesClient(environment=self._environment, api_key=self.api_key)
-        self.test_suites = TestSuitesClient(environment=self._environment, api_key=self.api_key)
+        self._client_wrapper = SyncClientWrapper(api_key=api_key, httpx_client=httpx.Client(timeout=timeout))
+        self.deployments = DeploymentsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.document_indexes = DocumentIndexesClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.documents = DocumentsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.model_versions = ModelVersionsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.registered_prompts = RegisteredPromptsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.sandboxes = SandboxesClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.test_suites = TestSuitesClient(environment=environment, client_wrapper=self._client_wrapper)
     def execute_workflow_stream(
         self,
@@ -59,6 +65,22 @@ class Vellum:
         inputs: typing.List[WorkflowRequestInputRequest],
         external_id: typing.Optional[str] = OMIT,
     ) -> typing.Iterator[WorkflowStreamEvent]:
+        """
+        <strong style="background-color:#ffc107; color:white; padding:4px; border-radius:4px">Unstable</strong>
+        Executes a deployed Workflow and streams back its results.
+        Parameters:
+            - workflow_deployment_id: typing.Optional[str]. The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
+            - workflow_deployment_name: typing.Optional[str]. The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
+            - release_tag: typing.Optional[str]. Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
+            - inputs: typing.List[WorkflowRequestInputRequest].
+            - external_id: typing.Optional[str]. Optionally include a unique identifier for tracking purposes.
+        """
         _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
         if workflow_deployment_id is not OMIT:
             _request["workflow_deployment_id"] = workflow_deployment_id
@@ -68,20 +90,21 @@ class Vellum:
             _request["release_tag"] = release_tag
         if external_id is not OMIT:
             _request["external_id"] = external_id
-        with httpx.stream(
+        with self._client_wrapper.httpx_client.stream(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/execute-workflow-stream"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         ) as _response:
             if 200 <= _response.status_code < 300:
-                for _text in _response.iter_text():
+                for _text in _response.iter_lines():
                     if len(_text) == 0:
                         continue
                     yield pydantic.parse_obj_as(WorkflowStreamEvent, json.loads(_text))  # type: ignore
                 return
             try:
+                _response.read()
                 _response_json = _response.json()
             except JSONDecodeError:
                 raise ApiError(status_code=_response.status_code, body=_response.text)
@@ -95,6 +118,22 @@ class Vellum:
         requests: typing.List[GenerateRequest],
         options: typing.Optional[GenerateOptionsRequest] = OMIT,
     ) -> GenerateResponse:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Generate a completion using a previously defined deployment.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
+            - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
+        """
         _request: typing.Dict[str, typing.Any] = {"requests": requests}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
@@ -102,11 +141,11 @@ class Vellum:
             _request["deployment_name"] = deployment_name
         if options is not OMIT:
             _request["options"] = options
-        _response = httpx.request(
+        _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         )
         if 200 <= _response.status_code < 300:
@@ -133,6 +172,22 @@ class Vellum:
         requests: typing.List[GenerateRequest],
         options: typing.Optional[GenerateOptionsRequest] = OMIT,
     ) -> typing.Iterator[GenerateStreamResponse]:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Generate a stream of completions using a previously defined deployment.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
+            - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
+        """
         _request: typing.Dict[str, typing.Any] = {"requests": requests}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
@@ -140,15 +195,15 @@ class Vellum:
             _request["deployment_name"] = deployment_name
         if options is not OMIT:
             _request["options"] = options
-        with httpx.stream(
+        with self._client_wrapper.httpx_client.stream(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate-stream"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         ) as _response:
             if 200 <= _response.status_code < 300:
-                for _text in _response.iter_text():
+                for _text in _response.iter_lines():
                     if len(_text) == 0:
                         continue
                     yield pydantic.parse_obj_as(GenerateStreamResponse, json.loads(_text))  # type: ignore
@@ -162,6 +217,7 @@ class Vellum:
             if _response.status_code == 500:
                 raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
             try:
+                _response.read()
                 _response_json = _response.json()
             except JSONDecodeError:
                 raise ApiError(status_code=_response.status_code, body=_response.text)
@@ -175,6 +231,22 @@ class Vellum:
         query: str,
         options: typing.Optional[SearchRequestOptionsRequest] = OMIT,
     ) -> SearchResponse:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Perform a search against a document index.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - index_id: typing.Optional[str]. The ID of the index to search against. Must provide either this or index_name.
+            - index_name: typing.Optional[str]. The name of the index to search against. Must provide either this or index_id.
+            - query: str. The query to search for. <span style="white-space: nowrap">`non-empty`</span>
+            - options: typing.Optional[SearchRequestOptionsRequest]. Configuration options for the search.
+        """
         _request: typing.Dict[str, typing.Any] = {"query": query}
         if index_id is not OMIT:
             _request["index_id"] = index_id
@@ -182,11 +254,11 @@ class Vellum:
             _request["index_name"] = index_name
         if options is not OMIT:
             _request["options"] = options
-        _response = httpx.request(
+        _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/search"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         )
         if 200 <= _response.status_code < 300:
@@ -210,16 +282,30 @@ class Vellum:
         deployment_name: typing.Optional[str] = OMIT,
         actuals: typing.List[SubmitCompletionActualRequest],
     ) -> None:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Used to submit feedback regarding the quality of previously generated completions.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - actuals: typing.List[SubmitCompletionActualRequest]. Feedback regarding the quality of previously generated completions
+        """
         _request: typing.Dict[str, typing.Any] = {"actuals": actuals}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
         if deployment_name is not OMIT:
             _request["deployment_name"] = deployment_name
-        _response = httpx.request(
+        _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/submit-completion-actuals"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         )
         if 200 <= _response.status_code < 300:
@@ -238,16 +324,24 @@ class Vellum:
 class AsyncVellum:
-    def __init__(self, *, environment: VellumEnvironment = VellumEnvironment.PRODUCTION, api_key: str):
+    def __init__(
+        self,
+        *,
+        environment: VellumEnvironment = VellumEnvironment.PRODUCTION,
+        api_key: str,
+        timeout: typing.Optional[float] = None,
+    ):
         self._environment = environment
-        self.api_key = api_key
-        self.deployments = AsyncDeploymentsClient(environment=self._environment, api_key=self.api_key)
-        self.document_indexes = AsyncDocumentIndexesClient(environment=self._environment, api_key=self.api_key)
-        self.documents = AsyncDocumentsClient(environment=self._environment, api_key=self.api_key)
-        self.model_versions = AsyncModelVersionsClient(environment=self._environment, api_key=self.api_key)
-        self.registered_prompts = AsyncRegisteredPromptsClient(environment=self._environment, api_key=self.api_key)
-        self.sandboxes = AsyncSandboxesClient(environment=self._environment, api_key=self.api_key)
-        self.test_suites = AsyncTestSuitesClient(environment=self._environment, api_key=self.api_key)
+        self._client_wrapper = AsyncClientWrapper(api_key=api_key, httpx_client=httpx.AsyncClient(timeout=timeout))
+        self.deployments = AsyncDeploymentsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.document_indexes = AsyncDocumentIndexesClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.documents = AsyncDocumentsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.model_versions = AsyncModelVersionsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.registered_prompts = AsyncRegisteredPromptsClient(
+            environment=environment, client_wrapper=self._client_wrapper
+        )
+        self.sandboxes = AsyncSandboxesClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.test_suites = AsyncTestSuitesClient(environment=environment, client_wrapper=self._client_wrapper)
     async def execute_workflow_stream(
         self,
@@ -258,6 +352,22 @@ class AsyncVellum:
         inputs: typing.List[WorkflowRequestInputRequest],
         external_id: typing.Optional[str] = OMIT,
     ) -> typing.AsyncIterator[WorkflowStreamEvent]:
+        """
+        <strong style="background-color:#ffc107; color:white; padding:4px; border-radius:4px">Unstable</strong>
+        Executes a deployed Workflow and streams back its results.
+        Parameters:
+            - workflow_deployment_id: typing.Optional[str]. The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
+            - workflow_deployment_name: typing.Optional[str]. The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
+            - release_tag: typing.Optional[str]. Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
+            - inputs: typing.List[WorkflowRequestInputRequest].
+            - external_id: typing.Optional[str]. Optionally include a unique identifier for tracking purposes.
+        """
         _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
         if workflow_deployment_id is not OMIT:
             _request["workflow_deployment_id"] = workflow_deployment_id
@@ -267,25 +377,25 @@ class AsyncVellum:
             _request["release_tag"] = release_tag
         if external_id is not OMIT:
             _request["external_id"] = external_id
-        async with httpx.AsyncClient() as _client:
-            async with _client.stream(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/execute-workflow-stream"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            ) as _response:
-                if 200 <= _response.status_code < 300:
-                    async for _text in _response.aiter_text():
-                        if len(_text) == 0:
-                            continue
-                        yield pydantic.parse_obj_as(WorkflowStreamEvent, json.loads(_text))  # type: ignore
-                    return
-                try:
-                    _response_json = _response.json()
-                except JSONDecodeError:
-                    raise ApiError(status_code=_response.status_code, body=_response.text)
-                raise ApiError(status_code=_response.status_code, body=_response_json)
+        async with self._client_wrapper.httpx_client.stream(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/execute-workflow-stream"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        ) as _response:
+            if 200 <= _response.status_code < 300:
+                async for _text in _response.aiter_lines():
+                    if len(_text) == 0:
+                        continue
+                    yield pydantic.parse_obj_as(WorkflowStreamEvent, json.loads(_text))  # type: ignore
+                return
+            try:
+                await _response.aread()
+                _response_json = _response.json()
+            except JSONDecodeError:
+                raise ApiError(status_code=_response.status_code, body=_response.text)
+            raise ApiError(status_code=_response.status_code, body=_response_json)
     async def generate(
         self,
@@ -295,6 +405,22 @@ class AsyncVellum:
         requests: typing.List[GenerateRequest],
         options: typing.Optional[GenerateOptionsRequest] = OMIT,
     ) -> GenerateResponse:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Generate a completion using a previously defined deployment.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
+            - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
+        """
         _request: typing.Dict[str, typing.Any] = {"requests": requests}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
@@ -302,14 +428,13 @@ class AsyncVellum:
             _request["deployment_name"] = deployment_name
         if options is not OMIT:
             _request["options"] = options
-        async with httpx.AsyncClient() as _client:
-            _response = await _client.request(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            )
+        _response = await self._client_wrapper.httpx_client.request(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        )
         if 200 <= _response.status_code < 300:
             return pydantic.parse_obj_as(GenerateResponse, _response.json())  # type: ignore
         if _response.status_code == 400:
@@ -334,6 +459,22 @@ class AsyncVellum:
         requests: typing.List[GenerateRequest],
         options: typing.Optional[GenerateOptionsRequest] = OMIT,
     ) -> typing.AsyncIterator[GenerateStreamResponse]:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Generate a stream of completions using a previously defined deployment.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
+            - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
+        """
         _request: typing.Dict[str, typing.Any] = {"requests": requests}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
@@ -341,33 +482,33 @@ class AsyncVellum:
             _request["deployment_name"] = deployment_name
         if options is not OMIT:
             _request["options"] = options
-        async with httpx.AsyncClient() as _client:
-            async with _client.stream(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate-stream"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            ) as _response:
-                if 200 <= _response.status_code < 300:
-                    async for _text in _response.aiter_text():
-                        if len(_text) == 0:
-                            continue
-                        yield pydantic.parse_obj_as(GenerateStreamResponse, json.loads(_text))  # type: ignore
-                    return
-                if _response.status_code == 400:
-                    raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
-                if _response.status_code == 403:
-                    raise ForbiddenError(pydantic.parse_obj_as(GenerateErrorResponse, _response.json()))  # type: ignore
-                if _response.status_code == 404:
-                    raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
-                if _response.status_code == 500:
-                    raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
-                try:
-                    _response_json = _response.json()
-                except JSONDecodeError:
-                    raise ApiError(status_code=_response.status_code, body=_response.text)
-                raise ApiError(status_code=_response.status_code, body=_response_json)
+        async with self._client_wrapper.httpx_client.stream(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate-stream"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        ) as _response:
+            if 200 <= _response.status_code < 300:
+                async for _text in _response.aiter_lines():
+                    if len(_text) == 0:
+                        continue
+                    yield pydantic.parse_obj_as(GenerateStreamResponse, json.loads(_text))  # type: ignore
+                return
+            if _response.status_code == 400:
+                raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            if _response.status_code == 403:
+                raise ForbiddenError(pydantic.parse_obj_as(GenerateErrorResponse, _response.json()))  # type: ignore
+            if _response.status_code == 404:
+                raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            if _response.status_code == 500:
+                raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            try:
+                await _response.aread()
+                _response_json = _response.json()
+            except JSONDecodeError:
+                raise ApiError(status_code=_response.status_code, body=_response.text)
+            raise ApiError(status_code=_response.status_code, body=_response_json)
     async def search(
         self,
@@ -377,6 +518,22 @@ class AsyncVellum:
         query: str,
         options: typing.Optional[SearchRequestOptionsRequest] = OMIT,
     ) -> SearchResponse:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Perform a search against a document index.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - index_id: typing.Optional[str]. The ID of the index to search against. Must provide either this or index_name.
+            - index_name: typing.Optional[str]. The name of the index to search against. Must provide either this or index_id.
+            - query: str. The query to search for. <span style="white-space: nowrap">`non-empty`</span>
+            - options: typing.Optional[SearchRequestOptionsRequest]. Configuration options for the search.
+        """
         _request: typing.Dict[str, typing.Any] = {"query": query}
         if index_id is not OMIT:
             _request["index_id"] = index_id
@@ -384,14 +541,13 @@ class AsyncVellum:
             _request["index_name"] = index_name
         if options is not OMIT:
             _request["options"] = options
-        async with httpx.AsyncClient() as _client:
-            _response = await _client.request(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/search"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            )
+        _response = await self._client_wrapper.httpx_client.request(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/search"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        )
         if 200 <= _response.status_code < 300:
             return pydantic.parse_obj_as(SearchResponse, _response.json())  # type: ignore
         if _response.status_code == 400:
@@ -413,19 +569,32 @@ class AsyncVellum:
         deployment_name: typing.Optional[str] = OMIT,
         actuals: typing.List[SubmitCompletionActualRequest],
     ) -> None:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Used to submit feedback regarding the quality of previously generated completions.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - actuals: typing.List[SubmitCompletionActualRequest]. Feedback regarding the quality of previously generated completions
+        """
         _request: typing.Dict[str, typing.Any] = {"actuals": actuals}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
         if deployment_name is not OMIT:
             _request["deployment_name"] = deployment_name
-        async with httpx.AsyncClient() as _client:
-            _response = await _client.request(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/submit-completion-actuals"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            )
+        _response = await self._client_wrapper.httpx_client.request(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/submit-completion-actuals"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        )
         if 200 <= _response.status_code < 300:
             return
         if _response.status_code == 400:

vellum/core/__init__.py CHANGED Viewed

@@ -1,8 +1,17 @@
 # This file was auto-generated by Fern from our API Definition.
 from .api_error import ApiError
+from .client_wrapper import AsyncClientWrapper, BaseClientWrapper, SyncClientWrapper
 from .datetime_utils import serialize_datetime
 from .jsonable_encoder import jsonable_encoder
-from .remove_none_from_headers import remove_none_from_headers
+from .remove_none_from_dict import remove_none_from_dict
-__all__ = ["ApiError", "jsonable_encoder", "remove_none_from_headers", "serialize_datetime"]
+__all__ = [
+    "ApiError",
+    "AsyncClientWrapper",
+    "BaseClientWrapper",
+    "SyncClientWrapper",
+    "jsonable_encoder",
+    "remove_none_from_dict",
+    "serialize_datetime",
+]

vellum-ai 0.0.21__py3-none-any.whl → 0.0.22__py3-none-any.whl

vellum-ai 0.0.21py3-none-any.whl → 0.0.22py3-none-any.whl