PyPI - vellum-ai - Versions diffs - 0.0.21__py3-none-any.whl → 0.0.22__py3-none-any.whl - Mend

vellum-ai 0.0.21py3-none-any.whl → 0.0.22py3-none-any.whl

Files changed (73) hide show

vellum/__init__.py +19 -19
vellum/client.py +270 -101
vellum/core/__init__.py +11 -2
vellum/core/client_wrapper.py +27 -0
vellum/core/remove_none_from_dict.py +11 -0
vellum/resources/deployments/client.py +35 -15
vellum/resources/document_indexes/client.py +64 -16
vellum/resources/documents/client.py +110 -35
vellum/resources/model_versions/client.py +67 -25
vellum/resources/registered_prompts/client.py +80 -16
vellum/resources/sandboxes/client.py +90 -25
vellum/resources/test_suites/client.py +90 -25
vellum/types/deployment_read.py +2 -6
vellum/types/document.py +3 -7
vellum/types/document_document_to_document_index.py +2 -2
vellum/types/document_index_read.py +3 -7
vellum/types/enriched_normalized_completion.py +5 -9
vellum/types/evaluation_params.py +1 -3
vellum/types/evaluation_params_request.py +1 -3
vellum/types/generate_error_response.py +1 -1
vellum/types/generate_request.py +3 -7
vellum/types/generate_result.py +2 -6
vellum/types/generate_result_data.py +1 -1
vellum/types/generate_result_error.py +1 -1
vellum/types/model_version_build_config.py +2 -6
vellum/types/model_version_compile_prompt_response.py +1 -1
vellum/types/model_version_compiled_prompt.py +2 -4
vellum/types/model_version_exec_config.py +3 -3
vellum/types/model_version_read.py +6 -10
vellum/types/model_version_sandbox_snapshot.py +3 -5
vellum/types/prompt_template_block_properties_request.py +2 -2
vellum/types/prompt_template_block_request.py +1 -1
vellum/types/prompt_template_input_variable.py +1 -1
vellum/types/prompt_template_input_variable_request.py +1 -1
vellum/types/register_prompt_error_response.py +1 -1
vellum/types/register_prompt_prompt.py +2 -2
vellum/types/register_prompt_prompt_info_request.py +1 -1
vellum/types/register_prompt_response.py +5 -7
vellum/types/registered_prompt_deployment.py +3 -3
vellum/types/registered_prompt_model_version.py +2 -2
vellum/types/registered_prompt_sandbox.py +2 -2
vellum/types/registered_prompt_sandbox_snapshot.py +1 -1
vellum/types/sandbox_scenario.py +2 -2
vellum/types/scenario_input_request.py +1 -1
vellum/types/search_error_response.py +1 -1
vellum/types/search_filters_request.py +1 -1
vellum/types/search_request_options_request.py +4 -6
vellum/types/search_response.py +1 -1
vellum/types/search_result.py +3 -3
vellum/types/search_result_merging_request.py +1 -1
vellum/types/search_weights_request.py +2 -2
vellum/types/slim_document.py +5 -9
vellum/types/submit_completion_actual_request.py +5 -15
vellum/types/terminal_node_chat_history_result.py +1 -1
vellum/types/terminal_node_json_result.py +1 -1
vellum/types/terminal_node_result_output.py +2 -4
vellum/types/terminal_node_string_result.py +1 -1
vellum/types/test_suite_test_case.py +4 -8
vellum/types/upload_document_response.py +1 -1
vellum/types/workflow_node_result_data.py +7 -11
vellum/types/workflow_request_chat_history_input_request.py +1 -3
vellum/types/workflow_request_input_request.py +2 -6
vellum/types/workflow_request_json_input_request.py +1 -3
vellum/types/workflow_request_string_input_request.py +1 -3
vellum/types/workflow_result_event_output_data.py +2 -8
vellum/types/workflow_result_event_output_data_chat_history.py +3 -0
vellum/types/workflow_result_event_output_data_json.py +3 -0
vellum/types/workflow_result_event_output_data_string.py +6 -1
vellum/types/workflow_stream_event.py +1 -4
{vellum_ai-0.0.21.dist-info → vellum_ai-0.0.22.dist-info}/METADATA +1 -1
{vellum_ai-0.0.21.dist-info → vellum_ai-0.0.22.dist-info}/RECORD +72 -71
vellum/core/remove_none_from_headers.py +0 -11
{vellum_ai-0.0.21.dist-info → vellum_ai-0.0.22.dist-info}/WHEEL +0 -0

vellum/__init__.py CHANGED Viewed

@@ -1,16 +1,5 @@
 # This file was auto-generated by Fern from our API Definition.
-from .environment import VellumEnvironment
-from .errors import BadRequestError, ConflictError, ForbiddenError, InternalServerError, NotFoundError
-from .resources import (
-    deployments,
-    document_indexes,
-    documents,
-    model_versions,
-    registered_prompts,
-    sandboxes,
-    test_suites,
-)
 from .types import (
     BlockTypeEnum,
     ChatMessage,
@@ -46,8 +35,8 @@ from .types import (
     LogprobsEnum,
     ModelTypeEnum,
     ModelVersionBuildConfig,
-    ModelVersionCompiledPrompt,
     ModelVersionCompilePromptResponse,
+    ModelVersionCompiledPrompt,
     ModelVersionExecConfig,
     ModelVersionExecConfigParameters,
     ModelVersionRead,
@@ -69,15 +58,15 @@ from .types import (
     PromptTemplateInputVariable,
     PromptTemplateInputVariableRequest,
     ProviderEnum,
-    RegisteredPromptDeployment,
-    RegisteredPromptModelVersion,
-    RegisteredPromptSandbox,
-    RegisteredPromptSandboxSnapshot,
     RegisterPromptErrorResponse,
     RegisterPromptModelParametersRequest,
     RegisterPromptPrompt,
     RegisterPromptPromptInfoRequest,
     RegisterPromptResponse,
+    RegisteredPromptDeployment,
+    RegisteredPromptModelVersion,
+    RegisteredPromptSandbox,
+    RegisteredPromptSandboxSnapshot,
     SandboxMetricInputParams,
     SandboxMetricInputParamsRequest,
     SandboxNodeResult,
@@ -133,16 +122,27 @@ from .types import (
     WorkflowRequestStringInputRequest,
     WorkflowResultEvent,
     WorkflowResultEventOutputData,
-    WorkflowResultEventOutputData_ChatHistory,
-    WorkflowResultEventOutputData_Json,
-    WorkflowResultEventOutputData_String,
     WorkflowResultEventOutputDataChatHistory,
     WorkflowResultEventOutputDataJson,
     WorkflowResultEventOutputDataString,
+    WorkflowResultEventOutputData_ChatHistory,
+    WorkflowResultEventOutputData_Json,
+    WorkflowResultEventOutputData_String,
     WorkflowStreamEvent,
     WorkflowStreamEvent_Node,
     WorkflowStreamEvent_Workflow,
 )
+from .errors import BadRequestError, ConflictError, ForbiddenError, InternalServerError, NotFoundError
+from .resources import (
+    deployments,
+    document_indexes,
+    documents,
+    model_versions,
+    registered_prompts,
+    sandboxes,
+    test_suites,
+)
+from .environment import VellumEnvironment
 __all__ = [
     "BadRequestError",

vellum/client.py CHANGED Viewed

@@ -9,8 +9,8 @@ import httpx
 import pydantic
 from .core.api_error import ApiError
+from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from .core.jsonable_encoder import jsonable_encoder
-from .core.remove_none_from_headers import remove_none_from_headers
 from .environment import VellumEnvironment
 from .errors.bad_request_error import BadRequestError
 from .errors.forbidden_error import ForbiddenError
@@ -39,16 +39,22 @@ OMIT = typing.cast(typing.Any, ...)
 class Vellum:
-    def __init__(self, *, environment: VellumEnvironment = VellumEnvironment.PRODUCTION, api_key: str):
+    def __init__(
+        self,
+        *,
+        environment: VellumEnvironment = VellumEnvironment.PRODUCTION,
+        api_key: str,
+        timeout: typing.Optional[float] = None,
+    ):
         self._environment = environment
-        self.api_key = api_key
-        self.deployments = DeploymentsClient(environment=self._environment, api_key=self.api_key)
-        self.document_indexes = DocumentIndexesClient(environment=self._environment, api_key=self.api_key)
-        self.documents = DocumentsClient(environment=self._environment, api_key=self.api_key)
-        self.model_versions = ModelVersionsClient(environment=self._environment, api_key=self.api_key)
-        self.registered_prompts = RegisteredPromptsClient(environment=self._environment, api_key=self.api_key)
-        self.sandboxes = SandboxesClient(environment=self._environment, api_key=self.api_key)
-        self.test_suites = TestSuitesClient(environment=self._environment, api_key=self.api_key)
+        self._client_wrapper = SyncClientWrapper(api_key=api_key, httpx_client=httpx.Client(timeout=timeout))
+        self.deployments = DeploymentsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.document_indexes = DocumentIndexesClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.documents = DocumentsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.model_versions = ModelVersionsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.registered_prompts = RegisteredPromptsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.sandboxes = SandboxesClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.test_suites = TestSuitesClient(environment=environment, client_wrapper=self._client_wrapper)
     def execute_workflow_stream(
         self,
@@ -59,6 +65,22 @@ class Vellum:
         inputs: typing.List[WorkflowRequestInputRequest],
         external_id: typing.Optional[str] = OMIT,
     ) -> typing.Iterator[WorkflowStreamEvent]:
+        """
+        <strong style="background-color:#ffc107; color:white; padding:4px; border-radius:4px">Unstable</strong>
+        Executes a deployed Workflow and streams back its results.
+        Parameters:
+            - workflow_deployment_id: typing.Optional[str]. The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
+            - workflow_deployment_name: typing.Optional[str]. The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
+            - release_tag: typing.Optional[str]. Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
+            - inputs: typing.List[WorkflowRequestInputRequest].
+            - external_id: typing.Optional[str]. Optionally include a unique identifier for tracking purposes.
+        """
         _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
         if workflow_deployment_id is not OMIT:
             _request["workflow_deployment_id"] = workflow_deployment_id
@@ -68,20 +90,21 @@ class Vellum:
             _request["release_tag"] = release_tag
         if external_id is not OMIT:
             _request["external_id"] = external_id
-        with httpx.stream(
+        with self._client_wrapper.httpx_client.stream(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/execute-workflow-stream"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         ) as _response:
             if 200 <= _response.status_code < 300:
-                for _text in _response.iter_text():
+                for _text in _response.iter_lines():
                     if len(_text) == 0:
                         continue
                     yield pydantic.parse_obj_as(WorkflowStreamEvent, json.loads(_text))  # type: ignore
                 return
             try:
+                _response.read()
                 _response_json = _response.json()
             except JSONDecodeError:
                 raise ApiError(status_code=_response.status_code, body=_response.text)
@@ -95,6 +118,22 @@ class Vellum:
         requests: typing.List[GenerateRequest],
         options: typing.Optional[GenerateOptionsRequest] = OMIT,
     ) -> GenerateResponse:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Generate a completion using a previously defined deployment.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
+            - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
+        """
         _request: typing.Dict[str, typing.Any] = {"requests": requests}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
@@ -102,11 +141,11 @@ class Vellum:
             _request["deployment_name"] = deployment_name
         if options is not OMIT:
             _request["options"] = options
-        _response = httpx.request(
+        _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         )
         if 200 <= _response.status_code < 300:
@@ -133,6 +172,22 @@ class Vellum:
         requests: typing.List[GenerateRequest],
         options: typing.Optional[GenerateOptionsRequest] = OMIT,
     ) -> typing.Iterator[GenerateStreamResponse]:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Generate a stream of completions using a previously defined deployment.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
+            - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
+        """
         _request: typing.Dict[str, typing.Any] = {"requests": requests}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
@@ -140,15 +195,15 @@ class Vellum:
             _request["deployment_name"] = deployment_name
         if options is not OMIT:
             _request["options"] = options
-        with httpx.stream(
+        with self._client_wrapper.httpx_client.stream(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate-stream"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         ) as _response:
             if 200 <= _response.status_code < 300:
-                for _text in _response.iter_text():
+                for _text in _response.iter_lines():
                     if len(_text) == 0:
                         continue
                     yield pydantic.parse_obj_as(GenerateStreamResponse, json.loads(_text))  # type: ignore
@@ -162,6 +217,7 @@ class Vellum:
             if _response.status_code == 500:
                 raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
             try:
+                _response.read()
                 _response_json = _response.json()
             except JSONDecodeError:
                 raise ApiError(status_code=_response.status_code, body=_response.text)
@@ -175,6 +231,22 @@ class Vellum:
         query: str,
         options: typing.Optional[SearchRequestOptionsRequest] = OMIT,
     ) -> SearchResponse:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Perform a search against a document index.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - index_id: typing.Optional[str]. The ID of the index to search against. Must provide either this or index_name.
+            - index_name: typing.Optional[str]. The name of the index to search against. Must provide either this or index_id.
+            - query: str. The query to search for. <span style="white-space: nowrap">`non-empty`</span>
+            - options: typing.Optional[SearchRequestOptionsRequest]. Configuration options for the search.
+        """
         _request: typing.Dict[str, typing.Any] = {"query": query}
         if index_id is not OMIT:
             _request["index_id"] = index_id
@@ -182,11 +254,11 @@ class Vellum:
             _request["index_name"] = index_name
         if options is not OMIT:
             _request["options"] = options
-        _response = httpx.request(
+        _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/search"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         )
         if 200 <= _response.status_code < 300:
@@ -210,16 +282,30 @@ class Vellum:
         deployment_name: typing.Optional[str] = OMIT,
         actuals: typing.List[SubmitCompletionActualRequest],
     ) -> None:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Used to submit feedback regarding the quality of previously generated completions.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - actuals: typing.List[SubmitCompletionActualRequest]. Feedback regarding the quality of previously generated completions
+        """
         _request: typing.Dict[str, typing.Any] = {"actuals": actuals}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
         if deployment_name is not OMIT:
             _request["deployment_name"] = deployment_name
-        _response = httpx.request(
+        _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/submit-completion-actuals"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         )
         if 200 <= _response.status_code < 300:
@@ -238,16 +324,24 @@ class Vellum:
 class AsyncVellum:
-    def __init__(self, *, environment: VellumEnvironment = VellumEnvironment.PRODUCTION, api_key: str):
+    def __init__(
+        self,
+        *,
+        environment: VellumEnvironment = VellumEnvironment.PRODUCTION,
+        api_key: str,
+        timeout: typing.Optional[float] = None,
+    ):
         self._environment = environment
-        self.api_key = api_key
-        self.deployments = AsyncDeploymentsClient(environment=self._environment, api_key=self.api_key)
-        self.document_indexes = AsyncDocumentIndexesClient(environment=self._environment, api_key=self.api_key)
-        self.documents = AsyncDocumentsClient(environment=self._environment, api_key=self.api_key)
-        self.model_versions = AsyncModelVersionsClient(environment=self._environment, api_key=self.api_key)
-        self.registered_prompts = AsyncRegisteredPromptsClient(environment=self._environment, api_key=self.api_key)
-        self.sandboxes = AsyncSandboxesClient(environment=self._environment, api_key=self.api_key)
-        self.test_suites = AsyncTestSuitesClient(environment=self._environment, api_key=self.api_key)
+        self._client_wrapper = AsyncClientWrapper(api_key=api_key, httpx_client=httpx.AsyncClient(timeout=timeout))
+        self.deployments = AsyncDeploymentsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.document_indexes = AsyncDocumentIndexesClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.documents = AsyncDocumentsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.model_versions = AsyncModelVersionsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.registered_prompts = AsyncRegisteredPromptsClient(
+            environment=environment, client_wrapper=self._client_wrapper
+        )
+        self.sandboxes = AsyncSandboxesClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.test_suites = AsyncTestSuitesClient(environment=environment, client_wrapper=self._client_wrapper)
     async def execute_workflow_stream(
         self,
@@ -258,6 +352,22 @@ class AsyncVellum:
         inputs: typing.List[WorkflowRequestInputRequest],
         external_id: typing.Optional[str] = OMIT,
     ) -> typing.AsyncIterator[WorkflowStreamEvent]:
+        """
+        <strong style="background-color:#ffc107; color:white; padding:4px; border-radius:4px">Unstable</strong>
+        Executes a deployed Workflow and streams back its results.
+        Parameters:
+            - workflow_deployment_id: typing.Optional[str]. The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
+            - workflow_deployment_name: typing.Optional[str]. The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
+            - release_tag: typing.Optional[str]. Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
+            - inputs: typing.List[WorkflowRequestInputRequest].
+            - external_id: typing.Optional[str]. Optionally include a unique identifier for tracking purposes.
+        """
         _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
         if workflow_deployment_id is not OMIT:
             _request["workflow_deployment_id"] = workflow_deployment_id
@@ -267,25 +377,25 @@ class AsyncVellum:
             _request["release_tag"] = release_tag
         if external_id is not OMIT:
             _request["external_id"] = external_id
-        async with httpx.AsyncClient() as _client:
-            async with _client.stream(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/execute-workflow-stream"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            ) as _response:
-                if 200 <= _response.status_code < 300:
-                    async for _text in _response.aiter_text():
-                        if len(_text) == 0:
-                            continue
-                        yield pydantic.parse_obj_as(WorkflowStreamEvent, json.loads(_text))  # type: ignore
-                    return
-                try:
-                    _response_json = _response.json()
-                except JSONDecodeError:
-                    raise ApiError(status_code=_response.status_code, body=_response.text)
-                raise ApiError(status_code=_response.status_code, body=_response_json)
+        async with self._client_wrapper.httpx_client.stream(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/execute-workflow-stream"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        ) as _response:
+            if 200 <= _response.status_code < 300:
+                async for _text in _response.aiter_lines():
+                    if len(_text) == 0:
+                        continue
+                    yield pydantic.parse_obj_as(WorkflowStreamEvent, json.loads(_text))  # type: ignore
+                return
+            try:
+                await _response.aread()
+                _response_json = _response.json()
+            except JSONDecodeError:
+                raise ApiError(status_code=_response.status_code, body=_response.text)
+            raise ApiError(status_code=_response.status_code, body=_response_json)
     async def generate(
         self,
@@ -295,6 +405,22 @@ class AsyncVellum:
         requests: typing.List[GenerateRequest],
         options: typing.Optional[GenerateOptionsRequest] = OMIT,
     ) -> GenerateResponse:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Generate a completion using a previously defined deployment.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
+            - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
+        """
         _request: typing.Dict[str, typing.Any] = {"requests": requests}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
@@ -302,14 +428,13 @@ class AsyncVellum:
             _request["deployment_name"] = deployment_name
         if options is not OMIT:
             _request["options"] = options
-        async with httpx.AsyncClient() as _client:
-            _response = await _client.request(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            )
+        _response = await self._client_wrapper.httpx_client.request(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        )
         if 200 <= _response.status_code < 300:
             return pydantic.parse_obj_as(GenerateResponse, _response.json())  # type: ignore
         if _response.status_code == 400:
@@ -334,6 +459,22 @@ class AsyncVellum:
         requests: typing.List[GenerateRequest],
         options: typing.Optional[GenerateOptionsRequest] = OMIT,
     ) -> typing.AsyncIterator[GenerateStreamResponse]:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Generate a stream of completions using a previously defined deployment.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
+            - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
+        """
         _request: typing.Dict[str, typing.Any] = {"requests": requests}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
@@ -341,33 +482,33 @@ class AsyncVellum:
             _request["deployment_name"] = deployment_name
         if options is not OMIT:
             _request["options"] = options
-        async with httpx.AsyncClient() as _client:
-            async with _client.stream(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate-stream"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            ) as _response:
-                if 200 <= _response.status_code < 300:
-                    async for _text in _response.aiter_text():
-                        if len(_text) == 0:
-                            continue
-                        yield pydantic.parse_obj_as(GenerateStreamResponse, json.loads(_text))  # type: ignore
-                    return
-                if _response.status_code == 400:
-                    raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
-                if _response.status_code == 403:
-                    raise ForbiddenError(pydantic.parse_obj_as(GenerateErrorResponse, _response.json()))  # type: ignore
-                if _response.status_code == 404:
-                    raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
-                if _response.status_code == 500:
-                    raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
-                try:
-                    _response_json = _response.json()
-                except JSONDecodeError:
-                    raise ApiError(status_code=_response.status_code, body=_response.text)
-                raise ApiError(status_code=_response.status_code, body=_response_json)
+        async with self._client_wrapper.httpx_client.stream(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate-stream"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        ) as _response:
+            if 200 <= _response.status_code < 300:
+                async for _text in _response.aiter_lines():
+                    if len(_text) == 0:
+                        continue
+                    yield pydantic.parse_obj_as(GenerateStreamResponse, json.loads(_text))  # type: ignore
+                return
+            if _response.status_code == 400:
+                raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            if _response.status_code == 403:
+                raise ForbiddenError(pydantic.parse_obj_as(GenerateErrorResponse, _response.json()))  # type: ignore
+            if _response.status_code == 404:
+                raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            if _response.status_code == 500:
+                raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            try:
+                await _response.aread()
+                _response_json = _response.json()
+            except JSONDecodeError:
+                raise ApiError(status_code=_response.status_code, body=_response.text)
+            raise ApiError(status_code=_response.status_code, body=_response_json)
     async def search(
         self,
@@ -377,6 +518,22 @@ class AsyncVellum:
         query: str,
         options: typing.Optional[SearchRequestOptionsRequest] = OMIT,
     ) -> SearchResponse:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Perform a search against a document index.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - index_id: typing.Optional[str]. The ID of the index to search against. Must provide either this or index_name.
+            - index_name: typing.Optional[str]. The name of the index to search against. Must provide either this or index_id.
+            - query: str. The query to search for. <span style="white-space: nowrap">`non-empty`</span>
+            - options: typing.Optional[SearchRequestOptionsRequest]. Configuration options for the search.
+        """
         _request: typing.Dict[str, typing.Any] = {"query": query}
         if index_id is not OMIT:
             _request["index_id"] = index_id
@@ -384,14 +541,13 @@ class AsyncVellum:
             _request["index_name"] = index_name
         if options is not OMIT:
             _request["options"] = options
-        async with httpx.AsyncClient() as _client:
-            _response = await _client.request(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/search"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            )
+        _response = await self._client_wrapper.httpx_client.request(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/search"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        )
         if 200 <= _response.status_code < 300:
             return pydantic.parse_obj_as(SearchResponse, _response.json())  # type: ignore
         if _response.status_code == 400:
@@ -413,19 +569,32 @@ class AsyncVellum:
         deployment_name: typing.Optional[str] = OMIT,
         actuals: typing.List[SubmitCompletionActualRequest],
     ) -> None:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Used to submit feedback regarding the quality of previously generated completions.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - actuals: typing.List[SubmitCompletionActualRequest]. Feedback regarding the quality of previously generated completions
+        """
         _request: typing.Dict[str, typing.Any] = {"actuals": actuals}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
         if deployment_name is not OMIT:
             _request["deployment_name"] = deployment_name
-        async with httpx.AsyncClient() as _client:
-            _response = await _client.request(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/submit-completion-actuals"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            )
+        _response = await self._client_wrapper.httpx_client.request(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/submit-completion-actuals"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        )
         if 200 <= _response.status_code < 300:
             return
         if _response.status_code == 400:

vellum/core/__init__.py CHANGED Viewed

@@ -1,8 +1,17 @@
 # This file was auto-generated by Fern from our API Definition.
 from .api_error import ApiError
+from .client_wrapper import AsyncClientWrapper, BaseClientWrapper, SyncClientWrapper
 from .datetime_utils import serialize_datetime
 from .jsonable_encoder import jsonable_encoder
-from .remove_none_from_headers import remove_none_from_headers
+from .remove_none_from_dict import remove_none_from_dict
-__all__ = ["ApiError", "jsonable_encoder", "remove_none_from_headers", "serialize_datetime"]
+__all__ = [
+    "ApiError",
+    "AsyncClientWrapper",
+    "BaseClientWrapper",
+    "SyncClientWrapper",
+    "jsonable_encoder",
+    "remove_none_from_dict",
+    "serialize_datetime",
+]

vellum-ai 0.0.21__py3-none-any.whl → 0.0.22__py3-none-any.whl

vellum-ai 0.0.21py3-none-any.whl → 0.0.22py3-none-any.whl