PyPI - vellum-ai - Versions diffs - 1.1.2__py3-none-any.whl → 1.1.4__py3-none-any.whl - Mend

vellum-ai 1.1.2py3-none-any.whl → 1.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (693) hide show

vellum/client/raw_client.py CHANGED Viewed

@@ -1,43 +1,48 @@
 # This file was auto-generated by Fern from our API Definition.
+import contextlib
+import json
 import typing
-from .core.client_wrapper import SyncClientWrapper
-from .types.method_enum import MethodEnum
-from .types.execute_api_request_body import ExecuteApiRequestBody
-from .types.execute_api_request_headers_value import ExecuteApiRequestHeadersValue
-from .types.execute_api_request_bearer_token import ExecuteApiRequestBearerToken
-from .core.request_options import RequestOptions
-from .core.http_response import HttpResponse
-from .types.execute_api_response import ExecuteApiResponse
-from .core.serialization import convert_and_respect_annotation_metadata
-from .core.pydantic_utilities import parse_obj_as
 from json.decoder import JSONDecodeError
 from .core.api_error import ApiError
+from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+from .core.http_response import AsyncHttpResponse, HttpResponse
+from .core.pydantic_utilities import parse_obj_as
+from .core.request_options import RequestOptions
+from .core.serialization import convert_and_respect_annotation_metadata
+from .errors.bad_request_error import BadRequestError
+from .errors.forbidden_error import ForbiddenError
+from .errors.internal_server_error import InternalServerError
+from .errors.not_found_error import NotFoundError
+from .types.code_execution_package import CodeExecutionPackage
 from .types.code_execution_runtime import CodeExecutionRuntime
 from .types.code_executor_input import CodeExecutorInput
-from .types.code_execution_package import CodeExecutionPackage
-from .types.vellum_variable_type import VellumVariableType
 from .types.code_executor_response import CodeExecutorResponse
-from .errors.bad_request_error import BadRequestError
-from .types.prompt_deployment_input_request import PromptDeploymentInputRequest
-from .types.prompt_deployment_expand_meta_request import PromptDeploymentExpandMetaRequest
-from .types.raw_prompt_execution_overrides_request import RawPromptExecutionOverridesRequest
+from .types.execute_api_request_bearer_token import ExecuteApiRequestBearerToken
+from .types.execute_api_request_body import ExecuteApiRequestBody
+from .types.execute_api_request_headers_value import ExecuteApiRequestHeadersValue
+from .types.execute_api_response import ExecuteApiResponse
+from .types.execute_prompt_event import ExecutePromptEvent
 from .types.execute_prompt_response import ExecutePromptResponse
-from .errors.forbidden_error import ForbiddenError
-from .errors.not_found_error import NotFoundError
-from .errors.internal_server_error import InternalServerError
-from .types.workflow_request_input_request import WorkflowRequestInputRequest
-from .types.workflow_expand_meta_request import WorkflowExpandMetaRequest
 from .types.execute_workflow_response import ExecuteWorkflowResponse
-from .types.generate_request import GenerateRequest
 from .types.generate_options_request import GenerateOptionsRequest
+from .types.generate_request import GenerateRequest
 from .types.generate_response import GenerateResponse
+from .types.generate_stream_response import GenerateStreamResponse
+from .types.method_enum import MethodEnum
+from .types.prompt_deployment_expand_meta_request import PromptDeploymentExpandMetaRequest
+from .types.prompt_deployment_input_request import PromptDeploymentInputRequest
+from .types.raw_prompt_execution_overrides_request import RawPromptExecutionOverridesRequest
 from .types.search_request_options_request import SearchRequestOptionsRequest
 from .types.search_response import SearchResponse
 from .types.submit_completion_actual_request import SubmitCompletionActualRequest
 from .types.submit_workflow_execution_actual_request import SubmitWorkflowExecutionActualRequest
-from .core.client_wrapper import AsyncClientWrapper
-from .core.http_response import AsyncHttpResponse
+from .types.vellum_variable_type import VellumVariableType
+from .types.workflow_execution_event_type import WorkflowExecutionEventType
+from .types.workflow_expand_meta_request import WorkflowExpandMetaRequest
+from .types.workflow_request_input_request import WorkflowRequestInputRequest
+from .types.workflow_stream_event import WorkflowStreamEvent
 # this is used as the default value for optional parameters
 OMIT = typing.cast(typing.Any, ...)
@@ -113,8 +118,8 @@ class RawVellum:
                 return HttpResponse(response=_response, data=_data)
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
     def execute_code(
         self,
@@ -180,18 +185,19 @@ class RawVellum:
                 return HttpResponse(response=_response, data=_data)
             if _response.status_code == 400:
                 raise BadRequestError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
     def execute_prompt(
         self,
@@ -290,48 +296,214 @@ class RawVellum:
                 return HttpResponse(response=_response, data=_data)
             if _response.status_code == 400:
                 raise BadRequestError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 403:
                 raise ForbiddenError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 404:
                 raise NotFoundError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 500:
                 raise InternalServerError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+    @contextlib.contextmanager
+    def execute_prompt_stream(
+        self,
+        *,
+        inputs: typing.Sequence[PromptDeploymentInputRequest],
+        prompt_deployment_id: typing.Optional[str] = OMIT,
+        prompt_deployment_name: typing.Optional[str] = OMIT,
+        release_tag: typing.Optional[str] = OMIT,
+        external_id: typing.Optional[str] = OMIT,
+        expand_meta: typing.Optional[PromptDeploymentExpandMetaRequest] = OMIT,
+        raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest] = OMIT,
+        expand_raw: typing.Optional[typing.Sequence[str]] = OMIT,
+        metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.Iterator[HttpResponse[typing.Iterator[ExecutePromptEvent]]]:
+        """
+        Executes a deployed Prompt and streams back the results.
+        Parameters
+        ----------
+        inputs : typing.Sequence[PromptDeploymentInputRequest]
+            A list consisting of the Prompt Deployment's input variables and their values.
+        prompt_deployment_id : typing.Optional[str]
+            The ID of the Prompt Deployment. Must provide either this or prompt_deployment_name.
+        prompt_deployment_name : typing.Optional[str]
+            The unique name of the Prompt Deployment. Must provide either this or prompt_deployment_id.
+        release_tag : typing.Optional[str]
+            Optionally specify a release tag if you want to pin to a specific release of the Prompt Deployment
+        external_id : typing.Optional[str]
+            Optionally include a unique identifier for tracking purposes. Must be unique within a given Workspace.
+        expand_meta : typing.Optional[PromptDeploymentExpandMetaRequest]
+            An optionally specified configuration used to opt in to including additional metadata about this prompt execution in the API response. Corresponding values will be returned under the `meta` key of the API response.
+        raw_overrides : typing.Optional[RawPromptExecutionOverridesRequest]
+            Overrides for the raw API request sent to the model host. Combined with `expand_raw`, it can be used to access new features from models.
+        expand_raw : typing.Optional[typing.Sequence[str]]
+            A list of keys whose values you'd like to directly return from the JSON response of the model provider. Useful if you need lower-level info returned by model providers that Vellum would otherwise omit. Corresponding key/value pairs will be returned under the `raw` key of the API response.
+        metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
+            Arbitrary JSON metadata associated with this request. Can be used to capture additional monitoring data such as user id, session id, etc. for future analysis.
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Yields
+        ------
+        typing.Iterator[HttpResponse[typing.Iterator[ExecutePromptEvent]]]
+        """
+        with self._client_wrapper.httpx_client.stream(
+            "v1/execute-prompt-stream",
+            base_url=self._client_wrapper.get_environment().predict,
+            method="POST",
+            json={
+                "inputs": convert_and_respect_annotation_metadata(
+                    object_=inputs, annotation=typing.Sequence[PromptDeploymentInputRequest], direction="write"
+                ),
+                "prompt_deployment_id": prompt_deployment_id,
+                "prompt_deployment_name": prompt_deployment_name,
+                "release_tag": release_tag,
+                "external_id": external_id,
+                "expand_meta": convert_and_respect_annotation_metadata(
+                    object_=expand_meta,
+                    annotation=typing.Optional[PromptDeploymentExpandMetaRequest],
+                    direction="write",
+                ),
+                "raw_overrides": convert_and_respect_annotation_metadata(
+                    object_=raw_overrides,
+                    annotation=typing.Optional[RawPromptExecutionOverridesRequest],
+                    direction="write",
+                ),
+                "expand_raw": expand_raw,
+                "metadata": metadata,
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            def _stream() -> HttpResponse[typing.Iterator[ExecutePromptEvent]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+                        def _iter():
+                            for _text in _response.iter_lines():
+                                try:
+                                    if len(_text) == 0:
+                                        continue
+                                    yield typing.cast(
+                                        ExecutePromptEvent,
+                                        parse_obj_as(
+                                            type_=ExecutePromptEvent,  # type: ignore
+                                            object_=json.loads(_text),
+                                        ),
+                                    )
+                                except Exception:
+                                    pass
+                            return
+                        return HttpResponse(response=_response, data=_iter())
+                    _response.read()
+                    if _response.status_code == 400:
+                        raise BadRequestError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 403:
+                        raise ForbiddenError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 404:
+                        raise NotFoundError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 500:
+                        raise InternalServerError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(
+                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text
+                    )
+                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+            yield _stream()
     def execute_workflow(
         self,
@@ -414,38 +586,181 @@ class RawVellum:
                 return HttpResponse(response=_response, data=_data)
             if _response.status_code == 400:
                 raise BadRequestError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 404:
                 raise NotFoundError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 500:
                 raise InternalServerError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+    @contextlib.contextmanager
+    def execute_workflow_stream(
+        self,
+        *,
+        inputs: typing.Sequence[WorkflowRequestInputRequest],
+        expand_meta: typing.Optional[WorkflowExpandMetaRequest] = OMIT,
+        workflow_deployment_id: typing.Optional[str] = OMIT,
+        workflow_deployment_name: typing.Optional[str] = OMIT,
+        release_tag: typing.Optional[str] = OMIT,
+        external_id: typing.Optional[str] = OMIT,
+        event_types: typing.Optional[typing.Sequence[WorkflowExecutionEventType]] = OMIT,
+        metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.Iterator[HttpResponse[typing.Iterator[WorkflowStreamEvent]]]:
+        """
+        Executes a deployed Workflow and streams back its results.
+        Parameters
+        ----------
+        inputs : typing.Sequence[WorkflowRequestInputRequest]
+            The list of inputs defined in the Workflow's Deployment with their corresponding values.
+        expand_meta : typing.Optional[WorkflowExpandMetaRequest]
+            An optionally specified configuration used to opt in to including additional metadata about this workflow execution in the API response. Corresponding values will be returned under the `execution_meta` key within NODE events in the response stream.
+        workflow_deployment_id : typing.Optional[str]
+            The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
+        workflow_deployment_name : typing.Optional[str]
+            The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
+        release_tag : typing.Optional[str]
+            Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
+        external_id : typing.Optional[str]
+            Optionally include a unique identifier for tracking purposes. Must be unique within a given Workspace.
+        event_types : typing.Optional[typing.Sequence[WorkflowExecutionEventType]]
+            Optionally specify which events you want to receive. Defaults to only WORKFLOW events. Note that the schema of non-WORKFLOW events is unstable and should be used with caution.
+        metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
+            Arbitrary JSON metadata associated with this request. Can be used to capture additional monitoring data such as user id, session id, etc. for future analysis.
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Yields
+        ------
+        typing.Iterator[HttpResponse[typing.Iterator[WorkflowStreamEvent]]]
+        """
+        with self._client_wrapper.httpx_client.stream(
+            "v1/execute-workflow-stream",
+            base_url=self._client_wrapper.get_environment().predict,
+            method="POST",
+            json={
+                "inputs": convert_and_respect_annotation_metadata(
+                    object_=inputs, annotation=typing.Sequence[WorkflowRequestInputRequest], direction="write"
+                ),
+                "expand_meta": convert_and_respect_annotation_metadata(
+                    object_=expand_meta, annotation=typing.Optional[WorkflowExpandMetaRequest], direction="write"
+                ),
+                "workflow_deployment_id": workflow_deployment_id,
+                "workflow_deployment_name": workflow_deployment_name,
+                "release_tag": release_tag,
+                "external_id": external_id,
+                "event_types": event_types,
+                "metadata": metadata,
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            def _stream() -> HttpResponse[typing.Iterator[WorkflowStreamEvent]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+                        def _iter():
+                            for _text in _response.iter_lines():
+                                try:
+                                    if len(_text) == 0:
+                                        continue
+                                    yield typing.cast(
+                                        WorkflowStreamEvent,
+                                        parse_obj_as(
+                                            type_=WorkflowStreamEvent,  # type: ignore
+                                            object_=json.loads(_text),
+                                        ),
+                                    )
+                                except Exception:
+                                    pass
+                            return
+                        return HttpResponse(response=_response, data=_iter())
+                    _response.read()
+                    if _response.status_code == 400:
+                        raise BadRequestError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 404:
+                        raise NotFoundError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 500:
+                        raise InternalServerError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(
+                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text
+                    )
+                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+            yield _stream()
     def generate(
         self,
@@ -516,48 +831,186 @@ class RawVellum:
                 return HttpResponse(response=_response, data=_data)
             if _response.status_code == 400:
                 raise BadRequestError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 403:
                 raise ForbiddenError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 404:
                 raise NotFoundError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 500:
                 raise InternalServerError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+    @contextlib.contextmanager
+    def generate_stream(
+        self,
+        *,
+        requests: typing.Sequence[GenerateRequest],
+        deployment_id: typing.Optional[str] = OMIT,
+        deployment_name: typing.Optional[str] = OMIT,
+        options: typing.Optional[GenerateOptionsRequest] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.Iterator[HttpResponse[typing.Iterator[GenerateStreamResponse]]]:
+        """
+        Generate a stream of completions using a previously defined deployment.
+        Important: This endpoint is DEPRECATED and has been superseded by
+        [execute-prompt-stream](/api-reference/api-reference/execute-prompt-stream).
+        Parameters
+        ----------
+        requests : typing.Sequence[GenerateRequest]
+            The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
+        deployment_id : typing.Optional[str]
+            The ID of the deployment. Must provide either this or deployment_name.
+        deployment_name : typing.Optional[str]
+            The name of the deployment. Must provide either this or deployment_id.
+        options : typing.Optional[GenerateOptionsRequest]
+            Additional configuration that can be used to control what's included in the response.
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Yields
+        ------
+        typing.Iterator[HttpResponse[typing.Iterator[GenerateStreamResponse]]]
+        """
+        with self._client_wrapper.httpx_client.stream(
+            "v1/generate-stream",
+            base_url=self._client_wrapper.get_environment().predict,
+            method="POST",
+            json={
+                "deployment_id": deployment_id,
+                "deployment_name": deployment_name,
+                "requests": convert_and_respect_annotation_metadata(
+                    object_=requests, annotation=typing.Sequence[GenerateRequest], direction="write"
+                ),
+                "options": convert_and_respect_annotation_metadata(
+                    object_=options, annotation=typing.Optional[GenerateOptionsRequest], direction="write"
+                ),
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            def _stream() -> HttpResponse[typing.Iterator[GenerateStreamResponse]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+                        def _iter():
+                            for _text in _response.iter_lines():
+                                try:
+                                    if len(_text) == 0:
+                                        continue
+                                    yield typing.cast(
+                                        GenerateStreamResponse,
+                                        parse_obj_as(
+                                            type_=GenerateStreamResponse,  # type: ignore
+                                            object_=json.loads(_text),
+                                        ),
+                                    )
+                                except Exception:
+                                    pass
+                            return
+                        return HttpResponse(response=_response, data=_iter())
+                    _response.read()
+                    if _response.status_code == 400:
+                        raise BadRequestError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 403:
+                        raise ForbiddenError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 404:
+                        raise NotFoundError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 500:
+                        raise InternalServerError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(
+                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text
+                    )
+                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+            yield _stream()
     def search(
         self,
@@ -628,38 +1081,41 @@ class RawVellum:
                 return HttpResponse(response=_response, data=_data)
             if _response.status_code == 400:
                 raise BadRequestError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 404:
                 raise NotFoundError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 500:
                 raise InternalServerError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
     def submit_completion_actuals(
         self,
@@ -712,38 +1168,41 @@ class RawVellum:
                 return HttpResponse(response=_response, data=None)
             if _response.status_code == 400:
                 raise BadRequestError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 404:
                 raise NotFoundError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 500:
                 raise InternalServerError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
     def submit_workflow_execution_actuals(
         self,
@@ -798,8 +1257,8 @@ class RawVellum:
                 return HttpResponse(response=_response, data=None)
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
 class AsyncRawVellum:
@@ -872,8 +1331,8 @@ class AsyncRawVellum:
                 return AsyncHttpResponse(response=_response, data=_data)
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
     async def execute_code(
         self,
@@ -939,18 +1398,19 @@ class AsyncRawVellum:
                 return AsyncHttpResponse(response=_response, data=_data)
             if _response.status_code == 400:
                 raise BadRequestError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
     async def execute_prompt(
         self,
@@ -1049,48 +1509,214 @@ class AsyncRawVellum:
                 return AsyncHttpResponse(response=_response, data=_data)
             if _response.status_code == 400:
                 raise BadRequestError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 403:
                 raise ForbiddenError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 404:
                 raise NotFoundError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 500:
                 raise InternalServerError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+    @contextlib.asynccontextmanager
+    async def execute_prompt_stream(
+        self,
+        *,
+        inputs: typing.Sequence[PromptDeploymentInputRequest],
+        prompt_deployment_id: typing.Optional[str] = OMIT,
+        prompt_deployment_name: typing.Optional[str] = OMIT,
+        release_tag: typing.Optional[str] = OMIT,
+        external_id: typing.Optional[str] = OMIT,
+        expand_meta: typing.Optional[PromptDeploymentExpandMetaRequest] = OMIT,
+        raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest] = OMIT,
+        expand_raw: typing.Optional[typing.Sequence[str]] = OMIT,
+        metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[ExecutePromptEvent]]]:
+        """
+        Executes a deployed Prompt and streams back the results.
+        Parameters
+        ----------
+        inputs : typing.Sequence[PromptDeploymentInputRequest]
+            A list consisting of the Prompt Deployment's input variables and their values.
+        prompt_deployment_id : typing.Optional[str]
+            The ID of the Prompt Deployment. Must provide either this or prompt_deployment_name.
+        prompt_deployment_name : typing.Optional[str]
+            The unique name of the Prompt Deployment. Must provide either this or prompt_deployment_id.
+        release_tag : typing.Optional[str]
+            Optionally specify a release tag if you want to pin to a specific release of the Prompt Deployment
+        external_id : typing.Optional[str]
+            Optionally include a unique identifier for tracking purposes. Must be unique within a given Workspace.
+        expand_meta : typing.Optional[PromptDeploymentExpandMetaRequest]
+            An optionally specified configuration used to opt in to including additional metadata about this prompt execution in the API response. Corresponding values will be returned under the `meta` key of the API response.
+        raw_overrides : typing.Optional[RawPromptExecutionOverridesRequest]
+            Overrides for the raw API request sent to the model host. Combined with `expand_raw`, it can be used to access new features from models.
+        expand_raw : typing.Optional[typing.Sequence[str]]
+            A list of keys whose values you'd like to directly return from the JSON response of the model provider. Useful if you need lower-level info returned by model providers that Vellum would otherwise omit. Corresponding key/value pairs will be returned under the `raw` key of the API response.
+        metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
+            Arbitrary JSON metadata associated with this request. Can be used to capture additional monitoring data such as user id, session id, etc. for future analysis.
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Yields
+        ------
+        typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[ExecutePromptEvent]]]
+        """
+        async with self._client_wrapper.httpx_client.stream(
+            "v1/execute-prompt-stream",
+            base_url=self._client_wrapper.get_environment().predict,
+            method="POST",
+            json={
+                "inputs": convert_and_respect_annotation_metadata(
+                    object_=inputs, annotation=typing.Sequence[PromptDeploymentInputRequest], direction="write"
+                ),
+                "prompt_deployment_id": prompt_deployment_id,
+                "prompt_deployment_name": prompt_deployment_name,
+                "release_tag": release_tag,
+                "external_id": external_id,
+                "expand_meta": convert_and_respect_annotation_metadata(
+                    object_=expand_meta,
+                    annotation=typing.Optional[PromptDeploymentExpandMetaRequest],
+                    direction="write",
+                ),
+                "raw_overrides": convert_and_respect_annotation_metadata(
+                    object_=raw_overrides,
+                    annotation=typing.Optional[RawPromptExecutionOverridesRequest],
+                    direction="write",
+                ),
+                "expand_raw": expand_raw,
+                "metadata": metadata,
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            async def _stream() -> AsyncHttpResponse[typing.AsyncIterator[ExecutePromptEvent]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+                        async def _iter():
+                            async for _text in _response.aiter_lines():
+                                try:
+                                    if len(_text) == 0:
+                                        continue
+                                    yield typing.cast(
+                                        ExecutePromptEvent,
+                                        parse_obj_as(
+                                            type_=ExecutePromptEvent,  # type: ignore
+                                            object_=json.loads(_text),
+                                        ),
+                                    )
+                                except Exception:
+                                    pass
+                            return
+                        return AsyncHttpResponse(response=_response, data=_iter())
+                    await _response.aread()
+                    if _response.status_code == 400:
+                        raise BadRequestError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 403:
+                        raise ForbiddenError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 404:
+                        raise NotFoundError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 500:
+                        raise InternalServerError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(
+                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text
+                    )
+                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+            yield await _stream()
     async def execute_workflow(
         self,
@@ -1173,38 +1799,181 @@ class AsyncRawVellum:
                 return AsyncHttpResponse(response=_response, data=_data)
             if _response.status_code == 400:
                 raise BadRequestError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 404:
                 raise NotFoundError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 500:
                 raise InternalServerError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+    @contextlib.asynccontextmanager
+    async def execute_workflow_stream(
+        self,
+        *,
+        inputs: typing.Sequence[WorkflowRequestInputRequest],
+        expand_meta: typing.Optional[WorkflowExpandMetaRequest] = OMIT,
+        workflow_deployment_id: typing.Optional[str] = OMIT,
+        workflow_deployment_name: typing.Optional[str] = OMIT,
+        release_tag: typing.Optional[str] = OMIT,
+        external_id: typing.Optional[str] = OMIT,
+        event_types: typing.Optional[typing.Sequence[WorkflowExecutionEventType]] = OMIT,
+        metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[WorkflowStreamEvent]]]:
+        """
+        Executes a deployed Workflow and streams back its results.
+        Parameters
+        ----------
+        inputs : typing.Sequence[WorkflowRequestInputRequest]
+            The list of inputs defined in the Workflow's Deployment with their corresponding values.
+        expand_meta : typing.Optional[WorkflowExpandMetaRequest]
+            An optionally specified configuration used to opt in to including additional metadata about this workflow execution in the API response. Corresponding values will be returned under the `execution_meta` key within NODE events in the response stream.
+        workflow_deployment_id : typing.Optional[str]
+            The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
+        workflow_deployment_name : typing.Optional[str]
+            The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
+        release_tag : typing.Optional[str]
+            Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
+        external_id : typing.Optional[str]
+            Optionally include a unique identifier for tracking purposes. Must be unique within a given Workspace.
+        event_types : typing.Optional[typing.Sequence[WorkflowExecutionEventType]]
+            Optionally specify which events you want to receive. Defaults to only WORKFLOW events. Note that the schema of non-WORKFLOW events is unstable and should be used with caution.
+        metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
+            Arbitrary JSON metadata associated with this request. Can be used to capture additional monitoring data such as user id, session id, etc. for future analysis.
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Yields
+        ------
+        typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[WorkflowStreamEvent]]]
+        """
+        async with self._client_wrapper.httpx_client.stream(
+            "v1/execute-workflow-stream",
+            base_url=self._client_wrapper.get_environment().predict,
+            method="POST",
+            json={
+                "inputs": convert_and_respect_annotation_metadata(
+                    object_=inputs, annotation=typing.Sequence[WorkflowRequestInputRequest], direction="write"
+                ),
+                "expand_meta": convert_and_respect_annotation_metadata(
+                    object_=expand_meta, annotation=typing.Optional[WorkflowExpandMetaRequest], direction="write"
+                ),
+                "workflow_deployment_id": workflow_deployment_id,
+                "workflow_deployment_name": workflow_deployment_name,
+                "release_tag": release_tag,
+                "external_id": external_id,
+                "event_types": event_types,
+                "metadata": metadata,
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            async def _stream() -> AsyncHttpResponse[typing.AsyncIterator[WorkflowStreamEvent]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+                        async def _iter():
+                            async for _text in _response.aiter_lines():
+                                try:
+                                    if len(_text) == 0:
+                                        continue
+                                    yield typing.cast(
+                                        WorkflowStreamEvent,
+                                        parse_obj_as(
+                                            type_=WorkflowStreamEvent,  # type: ignore
+                                            object_=json.loads(_text),
+                                        ),
+                                    )
+                                except Exception:
+                                    pass
+                            return
+                        return AsyncHttpResponse(response=_response, data=_iter())
+                    await _response.aread()
+                    if _response.status_code == 400:
+                        raise BadRequestError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 404:
+                        raise NotFoundError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 500:
+                        raise InternalServerError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(
+                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text
+                    )
+                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+            yield await _stream()
     async def generate(
         self,
@@ -1275,48 +2044,186 @@ class AsyncRawVellum:
                 return AsyncHttpResponse(response=_response, data=_data)
             if _response.status_code == 400:
                 raise BadRequestError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 403:
                 raise ForbiddenError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 404:
                 raise NotFoundError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 500:
                 raise InternalServerError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+    @contextlib.asynccontextmanager
+    async def generate_stream(
+        self,
+        *,
+        requests: typing.Sequence[GenerateRequest],
+        deployment_id: typing.Optional[str] = OMIT,
+        deployment_name: typing.Optional[str] = OMIT,
+        options: typing.Optional[GenerateOptionsRequest] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[GenerateStreamResponse]]]:
+        """
+        Generate a stream of completions using a previously defined deployment.
+        Important: This endpoint is DEPRECATED and has been superseded by
+        [execute-prompt-stream](/api-reference/api-reference/execute-prompt-stream).
+        Parameters
+        ----------
+        requests : typing.Sequence[GenerateRequest]
+            The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
+        deployment_id : typing.Optional[str]
+            The ID of the deployment. Must provide either this or deployment_name.
+        deployment_name : typing.Optional[str]
+            The name of the deployment. Must provide either this or deployment_id.
+        options : typing.Optional[GenerateOptionsRequest]
+            Additional configuration that can be used to control what's included in the response.
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Yields
+        ------
+        typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[GenerateStreamResponse]]]
+        """
+        async with self._client_wrapper.httpx_client.stream(
+            "v1/generate-stream",
+            base_url=self._client_wrapper.get_environment().predict,
+            method="POST",
+            json={
+                "deployment_id": deployment_id,
+                "deployment_name": deployment_name,
+                "requests": convert_and_respect_annotation_metadata(
+                    object_=requests, annotation=typing.Sequence[GenerateRequest], direction="write"
+                ),
+                "options": convert_and_respect_annotation_metadata(
+                    object_=options, annotation=typing.Optional[GenerateOptionsRequest], direction="write"
+                ),
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            async def _stream() -> AsyncHttpResponse[typing.AsyncIterator[GenerateStreamResponse]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+                        async def _iter():
+                            async for _text in _response.aiter_lines():
+                                try:
+                                    if len(_text) == 0:
+                                        continue
+                                    yield typing.cast(
+                                        GenerateStreamResponse,
+                                        parse_obj_as(
+                                            type_=GenerateStreamResponse,  # type: ignore
+                                            object_=json.loads(_text),
+                                        ),
+                                    )
+                                except Exception:
+                                    pass
+                            return
+                        return AsyncHttpResponse(response=_response, data=_iter())
+                    await _response.aread()
+                    if _response.status_code == 400:
+                        raise BadRequestError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 403:
+                        raise ForbiddenError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 404:
+                        raise NotFoundError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    if _response.status_code == 500:
+                        raise InternalServerError(
+                            headers=dict(_response.headers),
+                            body=typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            ),
+                        )
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(
+                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text
+                    )
+                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+            yield await _stream()
     async def search(
         self,
@@ -1387,38 +2294,41 @@ class AsyncRawVellum:
                 return AsyncHttpResponse(response=_response, data=_data)
             if _response.status_code == 400:
                 raise BadRequestError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 404:
                 raise NotFoundError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 500:
                 raise InternalServerError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
     async def submit_completion_actuals(
         self,
@@ -1471,38 +2381,41 @@ class AsyncRawVellum:
                 return AsyncHttpResponse(response=_response, data=None)
             if _response.status_code == 400:
                 raise BadRequestError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 404:
                 raise NotFoundError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             if _response.status_code == 500:
                 raise InternalServerError(
-                    typing.cast(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
                         typing.Optional[typing.Any],
                         parse_obj_as(
                             type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
-                    )
+                    ),
                 )
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
     async def submit_workflow_execution_actuals(
         self,
@@ -1557,5 +2470,5 @@ class AsyncRawVellum:
                 return AsyncHttpResponse(response=_response, data=None)
             _response_json = _response.json()
         except JSONDecodeError:
-            raise ApiError(status_code=_response.status_code, body=_response.text)
-        raise ApiError(status_code=_response.status_code, body=_response_json)
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)

vellum-ai 1.1.2__py3-none-any.whl → 1.1.4__py3-none-any.whl

vellum-ai 1.1.2py3-none-any.whl → 1.1.4py3-none-any.whl