PyPI - vellum-ai - Versions diffs - 0.0.18__py3-none-any.whl → 0.0.25__py3-none-any.whl - Mend

vellum-ai 0.0.18py3-none-any.whl → 0.0.25py3-none-any.whl

Files changed (99) hide show

vellum/__init__.py +119 -16
vellum/client.py +341 -77
vellum/core/__init__.py +11 -2
vellum/core/client_wrapper.py +27 -0
vellum/core/remove_none_from_dict.py +11 -0
vellum/errors/__init__.py +2 -1
vellum/errors/forbidden_error.py +9 -0
vellum/resources/deployments/client.py +35 -15
vellum/resources/document_indexes/client.py +64 -16
vellum/resources/documents/client.py +110 -35
vellum/resources/model_versions/client.py +67 -25
vellum/resources/registered_prompts/client.py +80 -16
vellum/resources/sandboxes/client.py +90 -25
vellum/resources/test_suites/client.py +90 -25
vellum/types/__init__.py +108 -0
vellum/types/conditional_node_result.py +25 -0
vellum/types/conditional_node_result_data.py +24 -0
vellum/types/deployment_node_result.py +25 -0
vellum/types/deployment_node_result_data.py +26 -0
vellum/types/deployment_read.py +2 -6
vellum/types/document.py +3 -7
vellum/types/document_document_to_document_index.py +2 -2
vellum/types/document_index_read.py +3 -7
vellum/types/enriched_normalized_completion.py +5 -9
vellum/types/evaluation_params.py +1 -3
vellum/types/evaluation_params_request.py +1 -3
vellum/types/execute_workflow_stream_error_response.py +24 -0
vellum/types/generate_error_response.py +1 -1
vellum/types/generate_request.py +3 -7
vellum/types/generate_result.py +2 -6
vellum/types/generate_result_data.py +1 -1
vellum/types/generate_result_error.py +1 -1
vellum/types/model_version_build_config.py +2 -6
vellum/types/model_version_compile_prompt_response.py +1 -1
vellum/types/model_version_compiled_prompt.py +2 -4
vellum/types/model_version_exec_config.py +3 -3
vellum/types/model_version_read.py +7 -10
vellum/types/model_version_sandbox_snapshot.py +3 -5
vellum/types/prompt_node_result.py +25 -0
vellum/types/prompt_node_result_data.py +26 -0
vellum/types/prompt_template_block_properties.py +1 -0
vellum/types/prompt_template_block_properties_request.py +3 -2
vellum/types/prompt_template_block_request.py +1 -1
vellum/types/prompt_template_input_variable.py +1 -1
vellum/types/prompt_template_input_variable_request.py +1 -1
vellum/types/provider_enum.py +5 -0
vellum/types/register_prompt_error_response.py +1 -1
vellum/types/register_prompt_prompt.py +2 -2
vellum/types/register_prompt_prompt_info_request.py +1 -1
vellum/types/register_prompt_response.py +5 -7
vellum/types/registered_prompt_deployment.py +3 -3
vellum/types/registered_prompt_model_version.py +2 -2
vellum/types/registered_prompt_sandbox.py +2 -2
vellum/types/registered_prompt_sandbox_snapshot.py +1 -1
vellum/types/sandbox_node_result.py +25 -0
vellum/types/sandbox_node_result_data.py +26 -0
vellum/types/sandbox_scenario.py +2 -2
vellum/types/scenario_input_request.py +1 -1
vellum/types/search_error_response.py +1 -1
vellum/types/search_filters_request.py +1 -1
vellum/types/search_node_result.py +25 -0
vellum/types/search_node_result_data.py +27 -0
vellum/types/search_request_options_request.py +4 -6
vellum/types/search_response.py +1 -1
vellum/types/search_result.py +3 -3
vellum/types/search_result_merging_request.py +1 -1
vellum/types/search_weights_request.py +2 -2
vellum/types/slim_document.py +5 -9
vellum/types/submit_completion_actual_request.py +5 -15
vellum/types/terminal_node_chat_history_result.py +26 -0
vellum/types/terminal_node_json_result.py +25 -0
vellum/types/terminal_node_result.py +25 -0
vellum/types/terminal_node_result_data.py +25 -0
vellum/types/terminal_node_result_output.py +40 -0
vellum/types/terminal_node_string_result.py +25 -0
vellum/types/test_suite_test_case.py +4 -8
vellum/types/upload_document_response.py +1 -1
vellum/types/workflow_event_error.py +26 -0
vellum/types/workflow_execution_event_error_code.py +31 -0
vellum/types/workflow_execution_node_result_event.py +27 -0
vellum/types/workflow_execution_workflow_result_event.py +27 -0
vellum/types/workflow_node_result_data.py +72 -0
vellum/types/workflow_node_result_event.py +33 -0
vellum/types/workflow_node_result_event_state.py +36 -0
vellum/types/workflow_request_chat_history_input_request.py +28 -0
vellum/types/workflow_request_input_request.py +40 -0
vellum/types/workflow_request_json_input_request.py +27 -0
vellum/types/workflow_request_string_input_request.py +27 -0
vellum/types/workflow_result_event.py +31 -0
vellum/types/workflow_result_event_output_data.py +40 -0
vellum/types/workflow_result_event_output_data_chat_history.py +32 -0
vellum/types/workflow_result_event_output_data_json.py +31 -0
vellum/types/workflow_result_event_output_data_string.py +33 -0
vellum/types/workflow_stream_event.py +29 -0
{vellum_ai-0.0.18.dist-info → vellum_ai-0.0.25.dist-info}/METADATA +1 -1
vellum_ai-0.0.25.dist-info/RECORD +149 -0
vellum/core/remove_none_from_headers.py +0 -11
vellum_ai-0.0.18.dist-info/RECORD +0 -113
{vellum_ai-0.0.18.dist-info → vellum_ai-0.0.25.dist-info}/WHEEL +0 -0

vellum/client.py CHANGED Viewed

@@ -9,10 +9,11 @@ import httpx
 import pydantic
 from .core.api_error import ApiError
+from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from .core.jsonable_encoder import jsonable_encoder
-from .core.remove_none_from_headers import remove_none_from_headers
 from .environment import VellumEnvironment
 from .errors.bad_request_error import BadRequestError
+from .errors.forbidden_error import ForbiddenError
 from .errors.internal_server_error import InternalServerError
 from .errors.not_found_error import NotFoundError
 from .resources.deployments.client import AsyncDeploymentsClient, DeploymentsClient
@@ -22,6 +23,7 @@ from .resources.model_versions.client import AsyncModelVersionsClient, ModelVers
 from .resources.registered_prompts.client import AsyncRegisteredPromptsClient, RegisteredPromptsClient
 from .resources.sandboxes.client import AsyncSandboxesClient, SandboxesClient
 from .resources.test_suites.client import AsyncTestSuitesClient, TestSuitesClient
+from .types.generate_error_response import GenerateErrorResponse
 from .types.generate_options_request import GenerateOptionsRequest
 from .types.generate_request import GenerateRequest
 from .types.generate_response import GenerateResponse
@@ -29,22 +31,88 @@ from .types.generate_stream_response import GenerateStreamResponse
 from .types.search_request_options_request import SearchRequestOptionsRequest
 from .types.search_response import SearchResponse
 from .types.submit_completion_actual_request import SubmitCompletionActualRequest
+from .types.workflow_request_input_request import WorkflowRequestInputRequest
+from .types.workflow_stream_event import WorkflowStreamEvent
 # this is used as the default value for optional parameters
 OMIT = typing.cast(typing.Any, ...)
 class Vellum:
-    def __init__(self, *, environment: VellumEnvironment = VellumEnvironment.PRODUCTION, api_key: str):
+    def __init__(
+        self,
+        *,
+        environment: VellumEnvironment = VellumEnvironment.PRODUCTION,
+        api_key: str,
+        timeout: typing.Optional[float] = None,
+    ):
         self._environment = environment
-        self.api_key = api_key
-        self.deployments = DeploymentsClient(environment=self._environment, api_key=self.api_key)
-        self.document_indexes = DocumentIndexesClient(environment=self._environment, api_key=self.api_key)
-        self.documents = DocumentsClient(environment=self._environment, api_key=self.api_key)
-        self.model_versions = ModelVersionsClient(environment=self._environment, api_key=self.api_key)
-        self.registered_prompts = RegisteredPromptsClient(environment=self._environment, api_key=self.api_key)
-        self.sandboxes = SandboxesClient(environment=self._environment, api_key=self.api_key)
-        self.test_suites = TestSuitesClient(environment=self._environment, api_key=self.api_key)
+        self._client_wrapper = SyncClientWrapper(api_key=api_key, httpx_client=httpx.Client(timeout=timeout))
+        self.deployments = DeploymentsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.document_indexes = DocumentIndexesClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.documents = DocumentsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.model_versions = ModelVersionsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.registered_prompts = RegisteredPromptsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.sandboxes = SandboxesClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.test_suites = TestSuitesClient(environment=environment, client_wrapper=self._client_wrapper)
+    def execute_workflow_stream(
+        self,
+        *,
+        workflow_deployment_id: typing.Optional[str] = OMIT,
+        workflow_deployment_name: typing.Optional[str] = OMIT,
+        release_tag: typing.Optional[str] = OMIT,
+        inputs: typing.List[WorkflowRequestInputRequest],
+        external_id: typing.Optional[str] = OMIT,
+    ) -> typing.Iterator[WorkflowStreamEvent]:
+        """
+        <strong style="background-color:#ffc107; color:white; padding:4px; border-radius:4px">Unstable</strong>
+        Executes a deployed Workflow and streams back its results.
+        Parameters:
+            - workflow_deployment_id: typing.Optional[str]. The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
+            - workflow_deployment_name: typing.Optional[str]. The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
+            - release_tag: typing.Optional[str]. Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
+            - inputs: typing.List[WorkflowRequestInputRequest].
+            - external_id: typing.Optional[str]. Optionally include a unique identifier for tracking purposes.
+        """
+        _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
+        if workflow_deployment_id is not OMIT:
+            _request["workflow_deployment_id"] = workflow_deployment_id
+        if workflow_deployment_name is not OMIT:
+            _request["workflow_deployment_name"] = workflow_deployment_name
+        if release_tag is not OMIT:
+            _request["release_tag"] = release_tag
+        if external_id is not OMIT:
+            _request["external_id"] = external_id
+        with self._client_wrapper.httpx_client.stream(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/execute-workflow-stream"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        ) as _response:
+            if 200 <= _response.status_code < 300:
+                for _text in _response.iter_lines():
+                    if len(_text) == 0:
+                        continue
+                    yield pydantic.parse_obj_as(WorkflowStreamEvent, json.loads(_text))  # type: ignore
+                return
+            if _response.status_code == 404:
+                raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            if _response.status_code == 500:
+                raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            try:
+                _response.read()
+                _response_json = _response.json()
+            except JSONDecodeError:
+                raise ApiError(status_code=_response.status_code, body=_response.text)
+            raise ApiError(status_code=_response.status_code, body=_response_json)
     def generate(
         self,
@@ -54,6 +122,22 @@ class Vellum:
         requests: typing.List[GenerateRequest],
         options: typing.Optional[GenerateOptionsRequest] = OMIT,
     ) -> GenerateResponse:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Generate a completion using a previously defined deployment.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
+            - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
+        """
         _request: typing.Dict[str, typing.Any] = {"requests": requests}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
@@ -61,17 +145,19 @@ class Vellum:
             _request["deployment_name"] = deployment_name
         if options is not OMIT:
             _request["options"] = options
-        _response = httpx.request(
+        _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         )
         if 200 <= _response.status_code < 300:
             return pydantic.parse_obj_as(GenerateResponse, _response.json())  # type: ignore
         if _response.status_code == 400:
             raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+        if _response.status_code == 403:
+            raise ForbiddenError(pydantic.parse_obj_as(GenerateErrorResponse, _response.json()))  # type: ignore
         if _response.status_code == 404:
             raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
         if _response.status_code == 500:
@@ -90,6 +176,22 @@ class Vellum:
         requests: typing.List[GenerateRequest],
         options: typing.Optional[GenerateOptionsRequest] = OMIT,
     ) -> typing.Iterator[GenerateStreamResponse]:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Generate a stream of completions using a previously defined deployment.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
+            - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
+        """
         _request: typing.Dict[str, typing.Any] = {"requests": requests}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
@@ -97,26 +199,29 @@ class Vellum:
             _request["deployment_name"] = deployment_name
         if options is not OMIT:
             _request["options"] = options
-        with httpx.stream(
+        with self._client_wrapper.httpx_client.stream(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate-stream"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         ) as _response:
             if 200 <= _response.status_code < 300:
-                for _text in _response.iter_text():
+                for _text in _response.iter_lines():
                     if len(_text) == 0:
                         continue
                     yield pydantic.parse_obj_as(GenerateStreamResponse, json.loads(_text))  # type: ignore
                 return
             if _response.status_code == 400:
                 raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            if _response.status_code == 403:
+                raise ForbiddenError(pydantic.parse_obj_as(GenerateErrorResponse, _response.json()))  # type: ignore
             if _response.status_code == 404:
                 raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
             if _response.status_code == 500:
                 raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
             try:
+                _response.read()
                 _response_json = _response.json()
             except JSONDecodeError:
                 raise ApiError(status_code=_response.status_code, body=_response.text)
@@ -130,6 +235,22 @@ class Vellum:
         query: str,
         options: typing.Optional[SearchRequestOptionsRequest] = OMIT,
     ) -> SearchResponse:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Perform a search against a document index.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - index_id: typing.Optional[str]. The ID of the index to search against. Must provide either this or index_name.
+            - index_name: typing.Optional[str]. The name of the index to search against. Must provide either this or index_id.
+            - query: str. The query to search for. <span style="white-space: nowrap">`non-empty`</span>
+            - options: typing.Optional[SearchRequestOptionsRequest]. Configuration options for the search.
+        """
         _request: typing.Dict[str, typing.Any] = {"query": query}
         if index_id is not OMIT:
             _request["index_id"] = index_id
@@ -137,11 +258,11 @@ class Vellum:
             _request["index_name"] = index_name
         if options is not OMIT:
             _request["options"] = options
-        _response = httpx.request(
+        _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/search"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         )
         if 200 <= _response.status_code < 300:
@@ -165,16 +286,30 @@ class Vellum:
         deployment_name: typing.Optional[str] = OMIT,
         actuals: typing.List[SubmitCompletionActualRequest],
     ) -> None:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Used to submit feedback regarding the quality of previously generated completions.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - actuals: typing.List[SubmitCompletionActualRequest]. Feedback regarding the quality of previously generated completions
+        """
         _request: typing.Dict[str, typing.Any] = {"actuals": actuals}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
         if deployment_name is not OMIT:
             _request["deployment_name"] = deployment_name
-        _response = httpx.request(
+        _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._environment.predict}/", "v1/submit-completion-actuals"),
             json=jsonable_encoder(_request),
-            headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
+            headers=self._client_wrapper.get_headers(),
             timeout=None,
         )
         if 200 <= _response.status_code < 300:
@@ -193,16 +328,82 @@ class Vellum:
 class AsyncVellum:
-    def __init__(self, *, environment: VellumEnvironment = VellumEnvironment.PRODUCTION, api_key: str):
+    def __init__(
+        self,
+        *,
+        environment: VellumEnvironment = VellumEnvironment.PRODUCTION,
+        api_key: str,
+        timeout: typing.Optional[float] = None,
+    ):
         self._environment = environment
-        self.api_key = api_key
-        self.deployments = AsyncDeploymentsClient(environment=self._environment, api_key=self.api_key)
-        self.document_indexes = AsyncDocumentIndexesClient(environment=self._environment, api_key=self.api_key)
-        self.documents = AsyncDocumentsClient(environment=self._environment, api_key=self.api_key)
-        self.model_versions = AsyncModelVersionsClient(environment=self._environment, api_key=self.api_key)
-        self.registered_prompts = AsyncRegisteredPromptsClient(environment=self._environment, api_key=self.api_key)
-        self.sandboxes = AsyncSandboxesClient(environment=self._environment, api_key=self.api_key)
-        self.test_suites = AsyncTestSuitesClient(environment=self._environment, api_key=self.api_key)
+        self._client_wrapper = AsyncClientWrapper(api_key=api_key, httpx_client=httpx.AsyncClient(timeout=timeout))
+        self.deployments = AsyncDeploymentsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.document_indexes = AsyncDocumentIndexesClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.documents = AsyncDocumentsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.model_versions = AsyncModelVersionsClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.registered_prompts = AsyncRegisteredPromptsClient(
+            environment=environment, client_wrapper=self._client_wrapper
+        )
+        self.sandboxes = AsyncSandboxesClient(environment=environment, client_wrapper=self._client_wrapper)
+        self.test_suites = AsyncTestSuitesClient(environment=environment, client_wrapper=self._client_wrapper)
+    async def execute_workflow_stream(
+        self,
+        *,
+        workflow_deployment_id: typing.Optional[str] = OMIT,
+        workflow_deployment_name: typing.Optional[str] = OMIT,
+        release_tag: typing.Optional[str] = OMIT,
+        inputs: typing.List[WorkflowRequestInputRequest],
+        external_id: typing.Optional[str] = OMIT,
+    ) -> typing.AsyncIterator[WorkflowStreamEvent]:
+        """
+        <strong style="background-color:#ffc107; color:white; padding:4px; border-radius:4px">Unstable</strong>
+        Executes a deployed Workflow and streams back its results.
+        Parameters:
+            - workflow_deployment_id: typing.Optional[str]. The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
+            - workflow_deployment_name: typing.Optional[str]. The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
+            - release_tag: typing.Optional[str]. Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
+            - inputs: typing.List[WorkflowRequestInputRequest].
+            - external_id: typing.Optional[str]. Optionally include a unique identifier for tracking purposes.
+        """
+        _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
+        if workflow_deployment_id is not OMIT:
+            _request["workflow_deployment_id"] = workflow_deployment_id
+        if workflow_deployment_name is not OMIT:
+            _request["workflow_deployment_name"] = workflow_deployment_name
+        if release_tag is not OMIT:
+            _request["release_tag"] = release_tag
+        if external_id is not OMIT:
+            _request["external_id"] = external_id
+        async with self._client_wrapper.httpx_client.stream(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/execute-workflow-stream"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        ) as _response:
+            if 200 <= _response.status_code < 300:
+                async for _text in _response.aiter_lines():
+                    if len(_text) == 0:
+                        continue
+                    yield pydantic.parse_obj_as(WorkflowStreamEvent, json.loads(_text))  # type: ignore
+                return
+            if _response.status_code == 404:
+                raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            if _response.status_code == 500:
+                raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            try:
+                await _response.aread()
+                _response_json = _response.json()
+            except JSONDecodeError:
+                raise ApiError(status_code=_response.status_code, body=_response.text)
+            raise ApiError(status_code=_response.status_code, body=_response_json)
     async def generate(
         self,
@@ -212,6 +413,22 @@ class AsyncVellum:
         requests: typing.List[GenerateRequest],
         options: typing.Optional[GenerateOptionsRequest] = OMIT,
     ) -> GenerateResponse:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Generate a completion using a previously defined deployment.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
+            - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
+        """
         _request: typing.Dict[str, typing.Any] = {"requests": requests}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
@@ -219,18 +436,19 @@ class AsyncVellum:
             _request["deployment_name"] = deployment_name
         if options is not OMIT:
             _request["options"] = options
-        async with httpx.AsyncClient() as _client:
-            _response = await _client.request(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            )
+        _response = await self._client_wrapper.httpx_client.request(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        )
         if 200 <= _response.status_code < 300:
             return pydantic.parse_obj_as(GenerateResponse, _response.json())  # type: ignore
         if _response.status_code == 400:
             raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+        if _response.status_code == 403:
+            raise ForbiddenError(pydantic.parse_obj_as(GenerateErrorResponse, _response.json()))  # type: ignore
         if _response.status_code == 404:
             raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
         if _response.status_code == 500:
@@ -249,6 +467,22 @@ class AsyncVellum:
         requests: typing.List[GenerateRequest],
         options: typing.Optional[GenerateOptionsRequest] = OMIT,
     ) -> typing.AsyncIterator[GenerateStreamResponse]:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Generate a stream of completions using a previously defined deployment.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - requests: typing.List[GenerateRequest]. The generation requests to make. Supplying multiple will perform a bulk request to the LLM provided when possible.
+            - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
+        """
         _request: typing.Dict[str, typing.Any] = {"requests": requests}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
@@ -256,31 +490,33 @@ class AsyncVellum:
             _request["deployment_name"] = deployment_name
         if options is not OMIT:
             _request["options"] = options
-        async with httpx.AsyncClient() as _client:
-            async with _client.stream(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate-stream"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            ) as _response:
-                if 200 <= _response.status_code < 300:
-                    async for _text in _response.aiter_text():
-                        if len(_text) == 0:
-                            continue
-                        yield pydantic.parse_obj_as(GenerateStreamResponse, json.loads(_text))  # type: ignore
-                    return
-                if _response.status_code == 400:
-                    raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
-                if _response.status_code == 404:
-                    raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
-                if _response.status_code == 500:
-                    raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
-                try:
-                    _response_json = _response.json()
-                except JSONDecodeError:
-                    raise ApiError(status_code=_response.status_code, body=_response.text)
-                raise ApiError(status_code=_response.status_code, body=_response_json)
+        async with self._client_wrapper.httpx_client.stream(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/generate-stream"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        ) as _response:
+            if 200 <= _response.status_code < 300:
+                async for _text in _response.aiter_lines():
+                    if len(_text) == 0:
+                        continue
+                    yield pydantic.parse_obj_as(GenerateStreamResponse, json.loads(_text))  # type: ignore
+                return
+            if _response.status_code == 400:
+                raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            if _response.status_code == 403:
+                raise ForbiddenError(pydantic.parse_obj_as(GenerateErrorResponse, _response.json()))  # type: ignore
+            if _response.status_code == 404:
+                raise NotFoundError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            if _response.status_code == 500:
+                raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
+            try:
+                await _response.aread()
+                _response_json = _response.json()
+            except JSONDecodeError:
+                raise ApiError(status_code=_response.status_code, body=_response.text)
+            raise ApiError(status_code=_response.status_code, body=_response_json)
     async def search(
         self,
@@ -290,6 +526,22 @@ class AsyncVellum:
         query: str,
         options: typing.Optional[SearchRequestOptionsRequest] = OMIT,
     ) -> SearchResponse:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Perform a search against a document index.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - index_id: typing.Optional[str]. The ID of the index to search against. Must provide either this or index_name.
+            - index_name: typing.Optional[str]. The name of the index to search against. Must provide either this or index_id.
+            - query: str. The query to search for. <span style="white-space: nowrap">`non-empty`</span>
+            - options: typing.Optional[SearchRequestOptionsRequest]. Configuration options for the search.
+        """
         _request: typing.Dict[str, typing.Any] = {"query": query}
         if index_id is not OMIT:
             _request["index_id"] = index_id
@@ -297,14 +549,13 @@ class AsyncVellum:
             _request["index_name"] = index_name
         if options is not OMIT:
             _request["options"] = options
-        async with httpx.AsyncClient() as _client:
-            _response = await _client.request(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/search"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            )
+        _response = await self._client_wrapper.httpx_client.request(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/search"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        )
         if 200 <= _response.status_code < 300:
             return pydantic.parse_obj_as(SearchResponse, _response.json())  # type: ignore
         if _response.status_code == 400:
@@ -326,19 +577,32 @@ class AsyncVellum:
         deployment_name: typing.Optional[str] = OMIT,
         actuals: typing.List[SubmitCompletionActualRequest],
     ) -> None:
+        """
+        <strong style="background-color:#4caf50; color:white; padding:4px; border-radius:4px">Stable</strong>
+        Used to submit feedback regarding the quality of previously generated completions.
+        **Note:** Uses a base url of `https://predict.vellum.ai`.
+        Parameters:
+            - deployment_id: typing.Optional[str]. The ID of the deployment. Must provide either this or deployment_name.
+            - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
+            - actuals: typing.List[SubmitCompletionActualRequest]. Feedback regarding the quality of previously generated completions
+        """
         _request: typing.Dict[str, typing.Any] = {"actuals": actuals}
         if deployment_id is not OMIT:
             _request["deployment_id"] = deployment_id
         if deployment_name is not OMIT:
             _request["deployment_name"] = deployment_name
-        async with httpx.AsyncClient() as _client:
-            _response = await _client.request(
-                "POST",
-                urllib.parse.urljoin(f"{self._environment.predict}/", "v1/submit-completion-actuals"),
-                json=jsonable_encoder(_request),
-                headers=remove_none_from_headers({"X_API_KEY": self.api_key}),
-                timeout=None,
-            )
+        _response = await self._client_wrapper.httpx_client.request(
+            "POST",
+            urllib.parse.urljoin(f"{self._environment.predict}/", "v1/submit-completion-actuals"),
+            json=jsonable_encoder(_request),
+            headers=self._client_wrapper.get_headers(),
+            timeout=None,
+        )
         if 200 <= _response.status_code < 300:
             return
         if _response.status_code == 400:

vellum/core/__init__.py CHANGED Viewed

@@ -1,8 +1,17 @@
 # This file was auto-generated by Fern from our API Definition.
 from .api_error import ApiError
+from .client_wrapper import AsyncClientWrapper, BaseClientWrapper, SyncClientWrapper
 from .datetime_utils import serialize_datetime
 from .jsonable_encoder import jsonable_encoder
-from .remove_none_from_headers import remove_none_from_headers
+from .remove_none_from_dict import remove_none_from_dict
-__all__ = ["ApiError", "jsonable_encoder", "remove_none_from_headers", "serialize_datetime"]
+__all__ = [
+    "ApiError",
+    "AsyncClientWrapper",
+    "BaseClientWrapper",
+    "SyncClientWrapper",
+    "jsonable_encoder",
+    "remove_none_from_dict",
+    "serialize_datetime",
+]

vellum/core/client_wrapper.py ADDED Viewed

@@ -0,0 +1,27 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+import httpx
+class BaseClientWrapper:
+    def __init__(self, *, api_key: str):
+        self.api_key = api_key
+    def get_headers(self) -> typing.Dict[str, str]:
+        headers: typing.Dict[str, str] = {}
+        headers["X_API_KEY"] = self.api_key
+        return headers
+class SyncClientWrapper(BaseClientWrapper):
+    def __init__(self, *, api_key: str, httpx_client: httpx.Client):
+        super().__init__(api_key=api_key)
+        self.httpx_client = httpx_client
+class AsyncClientWrapper(BaseClientWrapper):
+    def __init__(self, *, api_key: str, httpx_client: httpx.AsyncClient):
+        super().__init__(api_key=api_key)
+        self.httpx_client = httpx_client

vellum/core/remove_none_from_dict.py ADDED Viewed

@@ -0,0 +1,11 @@
+# This file was auto-generated by Fern from our API Definition.
+from typing import Any, Dict, Optional
+def remove_none_from_dict(original: Dict[str, Optional[Any]]) -> Dict[str, Any]:
+    new: Dict[str, Any] = {}
+    for key, value in original.items():
+        if value is not None:
+            new[key] = value
+    return new

vellum-ai 0.0.18__py3-none-any.whl → 0.0.25__py3-none-any.whl

vellum-ai 0.0.18py3-none-any.whl → 0.0.25py3-none-any.whl