PyPI - vellum-ai - Versions diffs - 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

vellum-ai 1.1.1py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

vellum/__init__.py +16 -0
vellum/client/README.md +55 -0
vellum/client/__init__.py +66 -507
vellum/client/core/client_wrapper.py +2 -2
vellum/client/core/pydantic_utilities.py +10 -3
vellum/client/raw_client.py +844 -0
vellum/client/reference.md +692 -19
vellum/client/resources/ad_hoc/client.py +23 -180
vellum/client/resources/ad_hoc/raw_client.py +276 -0
vellum/client/resources/container_images/client.py +10 -36
vellum/client/resources/deployments/client.py +16 -62
vellum/client/resources/document_indexes/client.py +16 -72
vellum/client/resources/documents/client.py +8 -30
vellum/client/resources/folder_entities/client.py +4 -8
vellum/client/resources/metric_definitions/client.py +4 -14
vellum/client/resources/ml_models/client.py +2 -8
vellum/client/resources/organizations/client.py +2 -6
vellum/client/resources/prompts/client.py +2 -10
vellum/client/resources/sandboxes/client.py +4 -20
vellum/client/resources/test_suite_runs/client.py +4 -18
vellum/client/resources/test_suites/client.py +11 -86
vellum/client/resources/test_suites/raw_client.py +136 -0
vellum/client/resources/workflow_deployments/client.py +20 -78
vellum/client/resources/workflow_executions/client.py +2 -6
vellum/client/resources/workflow_sandboxes/client.py +2 -10
vellum/client/resources/workflows/client.py +7 -6
vellum/client/resources/workflows/raw_client.py +58 -47
vellum/client/resources/workspace_secrets/client.py +4 -20
vellum/client/resources/workspaces/client.py +2 -6
vellum/client/types/__init__.py +16 -0
vellum/client/types/array_chat_message_content_item.py +4 -2
vellum/client/types/array_chat_message_content_item_request.py +4 -2
vellum/client/types/chat_message_content.py +4 -2
vellum/client/types/chat_message_content_request.py +4 -2
vellum/client/types/node_execution_span.py +2 -0
vellum/client/types/prompt_block.py +4 -2
vellum/client/types/vellum_value.py +4 -2
vellum/client/types/vellum_value_request.py +4 -2
vellum/client/types/vellum_variable_type.py +2 -1
vellum/client/types/vellum_video.py +24 -0
vellum/client/types/vellum_video_request.py +24 -0
vellum/client/types/video_chat_message_content.py +25 -0
vellum/client/types/video_chat_message_content_request.py +25 -0
vellum/client/types/video_prompt_block.py +29 -0
vellum/client/types/video_vellum_value.py +25 -0
vellum/client/types/video_vellum_value_request.py +25 -0
vellum/client/types/workflow_execution_span.py +2 -0
vellum/client/types/workflow_execution_usage_calculation_fulfilled_body.py +22 -0
vellum/prompts/blocks/compilation.py +22 -10
vellum/types/vellum_video.py +3 -0
vellum/types/vellum_video_request.py +3 -0
vellum/types/video_chat_message_content.py +3 -0
vellum/types/video_chat_message_content_request.py +3 -0
vellum/types/video_prompt_block.py +3 -0
vellum/types/video_vellum_value.py +3 -0
vellum/types/video_vellum_value_request.py +3 -0
vellum/types/workflow_execution_usage_calculation_fulfilled_body.py +3 -0
vellum/workflows/events/workflow.py +11 -0
vellum/workflows/graph/graph.py +103 -1
vellum/workflows/graph/tests/test_graph.py +99 -0
vellum/workflows/nodes/bases/base.py +9 -1
vellum/workflows/nodes/displayable/bases/utils.py +4 -2
vellum/workflows/nodes/displayable/tool_calling_node/node.py +19 -18
vellum/workflows/nodes/displayable/tool_calling_node/tests/test_node.py +17 -7
vellum/workflows/nodes/displayable/tool_calling_node/tests/test_utils.py +7 -7
vellum/workflows/nodes/displayable/tool_calling_node/utils.py +47 -80
vellum/workflows/references/environment_variable.py +10 -0
vellum/workflows/runner/runner.py +18 -2
vellum/workflows/state/context.py +101 -12
vellum/workflows/types/definition.py +11 -1
vellum/workflows/types/tests/test_definition.py +19 -0
vellum/workflows/utils/vellum_variables.py +9 -5
vellum/workflows/workflows/base.py +12 -5
{vellum_ai-1.1.1.dist-info → vellum_ai-1.1.3.dist-info}/METADATA +1 -1
{vellum_ai-1.1.1.dist-info → vellum_ai-1.1.3.dist-info}/RECORD +85 -69
vellum_ee/workflows/display/nodes/vellum/code_execution_node.py +1 -1
vellum_ee/workflows/display/nodes/vellum/tests/test_code_execution_node.py +55 -1
vellum_ee/workflows/display/nodes/vellum/tests/test_tool_calling_node.py +15 -52
vellum_ee/workflows/display/tests/workflow_serialization/test_basic_tool_calling_node_mcp_serialization.py +15 -49
vellum_ee/workflows/display/types.py +14 -1
vellum_ee/workflows/display/utils/expressions.py +13 -4
vellum_ee/workflows/display/workflows/base_workflow_display.py +6 -19
{vellum_ai-1.1.1.dist-info → vellum_ai-1.1.3.dist-info}/LICENSE +0 -0
{vellum_ai-1.1.1.dist-info → vellum_ai-1.1.3.dist-info}/WHEEL +0 -0
{vellum_ai-1.1.1.dist-info → vellum_ai-1.1.3.dist-info}/entry_points.txt +0 -0

vellum/client/raw_client.py CHANGED Viewed

@@ -26,12 +26,18 @@ from .types.execute_prompt_response import ExecutePromptResponse
 from .errors.forbidden_error import ForbiddenError
 from .errors.not_found_error import NotFoundError
 from .errors.internal_server_error import InternalServerError
+from .types.execute_prompt_event import ExecutePromptEvent
+import json
+import contextlib
 from .types.workflow_request_input_request import WorkflowRequestInputRequest
 from .types.workflow_expand_meta_request import WorkflowExpandMetaRequest
 from .types.execute_workflow_response import ExecuteWorkflowResponse
+from .types.workflow_execution_event_type import WorkflowExecutionEventType
+from .types.workflow_stream_event import WorkflowStreamEvent
 from .types.generate_request import GenerateRequest
 from .types.generate_options_request import GenerateOptionsRequest
 from .types.generate_response import GenerateResponse
+from .types.generate_stream_response import GenerateStreamResponse
 from .types.search_request_options_request import SearchRequestOptionsRequest
 from .types.search_response import SearchResponse
 from .types.submit_completion_actual_request import SubmitCompletionActualRequest
@@ -333,6 +339,162 @@ class RawVellum:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
+    @contextlib.contextmanager
+    def execute_prompt_stream(
+        self,
+        *,
+        inputs: typing.Sequence[PromptDeploymentInputRequest],
+        prompt_deployment_id: typing.Optional[str] = OMIT,
+        prompt_deployment_name: typing.Optional[str] = OMIT,
+        release_tag: typing.Optional[str] = OMIT,
+        external_id: typing.Optional[str] = OMIT,
+        expand_meta: typing.Optional[PromptDeploymentExpandMetaRequest] = OMIT,
+        raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest] = OMIT,
+        expand_raw: typing.Optional[typing.Sequence[str]] = OMIT,
+        metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.Iterator[HttpResponse[typing.Iterator[ExecutePromptEvent]]]:
+        """
+        Executes a deployed Prompt and streams back the results.
+        Parameters
+        ----------
+        inputs : typing.Sequence[PromptDeploymentInputRequest]
+            A list consisting of the Prompt Deployment's input variables and their values.
+        prompt_deployment_id : typing.Optional[str]
+            The ID of the Prompt Deployment. Must provide either this or prompt_deployment_name.
+        prompt_deployment_name : typing.Optional[str]
+            The unique name of the Prompt Deployment. Must provide either this or prompt_deployment_id.
+        release_tag : typing.Optional[str]
+            Optionally specify a release tag if you want to pin to a specific release of the Prompt Deployment
+        external_id : typing.Optional[str]
+            Optionally include a unique identifier for tracking purposes. Must be unique within a given Workspace.
+        expand_meta : typing.Optional[PromptDeploymentExpandMetaRequest]
+            An optionally specified configuration used to opt in to including additional metadata about this prompt execution in the API response. Corresponding values will be returned under the `meta` key of the API response.
+        raw_overrides : typing.Optional[RawPromptExecutionOverridesRequest]
+            Overrides for the raw API request sent to the model host. Combined with `expand_raw`, it can be used to access new features from models.
+        expand_raw : typing.Optional[typing.Sequence[str]]
+            A list of keys whose values you'd like to directly return from the JSON response of the model provider. Useful if you need lower-level info returned by model providers that Vellum would otherwise omit. Corresponding key/value pairs will be returned under the `raw` key of the API response.
+        metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
+            Arbitrary JSON metadata associated with this request. Can be used to capture additional monitoring data such as user id, session id, etc. for future analysis.
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Yields
+        ------
+        typing.Iterator[HttpResponse[typing.Iterator[ExecutePromptEvent]]]
+        """
+        with self._client_wrapper.httpx_client.stream(
+            "v1/execute-prompt-stream",
+            base_url=self._client_wrapper.get_environment().predict,
+            method="POST",
+            json={
+                "inputs": convert_and_respect_annotation_metadata(
+                    object_=inputs, annotation=typing.Sequence[PromptDeploymentInputRequest], direction="write"
+                ),
+                "prompt_deployment_id": prompt_deployment_id,
+                "prompt_deployment_name": prompt_deployment_name,
+                "release_tag": release_tag,
+                "external_id": external_id,
+                "expand_meta": convert_and_respect_annotation_metadata(
+                    object_=expand_meta,
+                    annotation=typing.Optional[PromptDeploymentExpandMetaRequest],
+                    direction="write",
+                ),
+                "raw_overrides": convert_and_respect_annotation_metadata(
+                    object_=raw_overrides,
+                    annotation=typing.Optional[RawPromptExecutionOverridesRequest],
+                    direction="write",
+                ),
+                "expand_raw": expand_raw,
+                "metadata": metadata,
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            def stream() -> HttpResponse[typing.Iterator[ExecutePromptEvent]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+                        def _iter():
+                            for _text in _response.iter_lines():
+                                try:
+                                    if len(_text) == 0:
+                                        continue
+                                    yield typing.cast(
+                                        ExecutePromptEvent,
+                                        parse_obj_as(
+                                            type_=ExecutePromptEvent,  # type: ignore
+                                            object_=json.loads(_text),
+                                        ),
+                                    )
+                                except Exception:
+                                    pass
+                            return
+                        return HttpResponse(response=_response, data=_iter())
+                    _response.read()
+                    if _response.status_code == 400:
+                        raise BadRequestError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 403:
+                        raise ForbiddenError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 404:
+                        raise NotFoundError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 500:
+                        raise InternalServerError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(status_code=_response.status_code, body=_response.text)
+                raise ApiError(status_code=_response.status_code, body=_response_json)
+            yield stream()
     def execute_workflow(
         self,
         *,
@@ -447,6 +609,141 @@ class RawVellum:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
+    @contextlib.contextmanager
+    def execute_workflow_stream(
+        self,
+        *,
+        inputs: typing.Sequence[WorkflowRequestInputRequest],
+        expand_meta: typing.Optional[WorkflowExpandMetaRequest] = OMIT,
+        workflow_deployment_id: typing.Optional[str] = OMIT,
+        workflow_deployment_name: typing.Optional[str] = OMIT,
+        release_tag: typing.Optional[str] = OMIT,
+        external_id: typing.Optional[str] = OMIT,
+        event_types: typing.Optional[typing.Sequence[WorkflowExecutionEventType]] = OMIT,
+        metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.Iterator[HttpResponse[typing.Iterator[WorkflowStreamEvent]]]:
+        """
+        Executes a deployed Workflow and streams back its results.
+        Parameters
+        ----------
+        inputs : typing.Sequence[WorkflowRequestInputRequest]
+            The list of inputs defined in the Workflow's Deployment with their corresponding values.
+        expand_meta : typing.Optional[WorkflowExpandMetaRequest]
+            An optionally specified configuration used to opt in to including additional metadata about this workflow execution in the API response. Corresponding values will be returned under the `execution_meta` key within NODE events in the response stream.
+        workflow_deployment_id : typing.Optional[str]
+            The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
+        workflow_deployment_name : typing.Optional[str]
+            The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
+        release_tag : typing.Optional[str]
+            Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
+        external_id : typing.Optional[str]
+            Optionally include a unique identifier for tracking purposes. Must be unique within a given Workspace.
+        event_types : typing.Optional[typing.Sequence[WorkflowExecutionEventType]]
+            Optionally specify which events you want to receive. Defaults to only WORKFLOW events. Note that the schema of non-WORKFLOW events is unstable and should be used with caution.
+        metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
+            Arbitrary JSON metadata associated with this request. Can be used to capture additional monitoring data such as user id, session id, etc. for future analysis.
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Yields
+        ------
+        typing.Iterator[HttpResponse[typing.Iterator[WorkflowStreamEvent]]]
+        """
+        with self._client_wrapper.httpx_client.stream(
+            "v1/execute-workflow-stream",
+            base_url=self._client_wrapper.get_environment().predict,
+            method="POST",
+            json={
+                "inputs": convert_and_respect_annotation_metadata(
+                    object_=inputs, annotation=typing.Sequence[WorkflowRequestInputRequest], direction="write"
+                ),
+                "expand_meta": convert_and_respect_annotation_metadata(
+                    object_=expand_meta, annotation=typing.Optional[WorkflowExpandMetaRequest], direction="write"
+                ),
+                "workflow_deployment_id": workflow_deployment_id,
+                "workflow_deployment_name": workflow_deployment_name,
+                "release_tag": release_tag,
+                "external_id": external_id,
+                "event_types": event_types,
+                "metadata": metadata,
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            def stream() -> HttpResponse[typing.Iterator[WorkflowStreamEvent]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+                        def _iter():
+                            for _text in _response.iter_lines():
+                                try:
+                                    if len(_text) == 0:
+                                        continue
+                                    yield typing.cast(
+                                        WorkflowStreamEvent,
+                                        parse_obj_as(
+                                            type_=WorkflowStreamEvent,  # type: ignore
+                                            object_=json.loads(_text),
+                                        ),
+                                    )
+                                except Exception:
+                                    pass
+                            return
+                        return HttpResponse(response=_response, data=_iter())
+                    _response.read()
+                    if _response.status_code == 400:
+                        raise BadRequestError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 404:
+                        raise NotFoundError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 500:
+                        raise InternalServerError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(status_code=_response.status_code, body=_response.text)
+                raise ApiError(status_code=_response.status_code, body=_response_json)
+            yield stream()
     def generate(
         self,
         *,
@@ -559,6 +856,134 @@ class RawVellum:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
+    @contextlib.contextmanager
+    def generate_stream(
+        self,
+        *,
+        requests: typing.Sequence[GenerateRequest],
+        deployment_id: typing.Optional[str] = OMIT,
+        deployment_name: typing.Optional[str] = OMIT,
+        options: typing.Optional[GenerateOptionsRequest] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.Iterator[HttpResponse[typing.Iterator[GenerateStreamResponse]]]:
+        """
+        Generate a stream of completions using a previously defined deployment.
+        Important: This endpoint is DEPRECATED and has been superseded by
+        [execute-prompt-stream](/api-reference/api-reference/execute-prompt-stream).
+        Parameters
+        ----------
+        requests : typing.Sequence[GenerateRequest]
+            The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
+        deployment_id : typing.Optional[str]
+            The ID of the deployment. Must provide either this or deployment_name.
+        deployment_name : typing.Optional[str]
+            The name of the deployment. Must provide either this or deployment_id.
+        options : typing.Optional[GenerateOptionsRequest]
+            Additional configuration that can be used to control what's included in the response.
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Yields
+        ------
+        typing.Iterator[HttpResponse[typing.Iterator[GenerateStreamResponse]]]
+        """
+        with self._client_wrapper.httpx_client.stream(
+            "v1/generate-stream",
+            base_url=self._client_wrapper.get_environment().predict,
+            method="POST",
+            json={
+                "deployment_id": deployment_id,
+                "deployment_name": deployment_name,
+                "requests": convert_and_respect_annotation_metadata(
+                    object_=requests, annotation=typing.Sequence[GenerateRequest], direction="write"
+                ),
+                "options": convert_and_respect_annotation_metadata(
+                    object_=options, annotation=typing.Optional[GenerateOptionsRequest], direction="write"
+                ),
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            def stream() -> HttpResponse[typing.Iterator[GenerateStreamResponse]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+                        def _iter():
+                            for _text in _response.iter_lines():
+                                try:
+                                    if len(_text) == 0:
+                                        continue
+                                    yield typing.cast(
+                                        GenerateStreamResponse,
+                                        parse_obj_as(
+                                            type_=GenerateStreamResponse,  # type: ignore
+                                            object_=json.loads(_text),
+                                        ),
+                                    )
+                                except Exception:
+                                    pass
+                            return
+                        return HttpResponse(response=_response, data=_iter())
+                    _response.read()
+                    if _response.status_code == 400:
+                        raise BadRequestError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 403:
+                        raise ForbiddenError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 404:
+                        raise NotFoundError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 500:
+                        raise InternalServerError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(status_code=_response.status_code, body=_response.text)
+                raise ApiError(status_code=_response.status_code, body=_response_json)
+            yield stream()
     def search(
         self,
         *,
@@ -1092,6 +1517,162 @@ class AsyncRawVellum:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
+    @contextlib.asynccontextmanager
+    async def execute_prompt_stream(
+        self,
+        *,
+        inputs: typing.Sequence[PromptDeploymentInputRequest],
+        prompt_deployment_id: typing.Optional[str] = OMIT,
+        prompt_deployment_name: typing.Optional[str] = OMIT,
+        release_tag: typing.Optional[str] = OMIT,
+        external_id: typing.Optional[str] = OMIT,
+        expand_meta: typing.Optional[PromptDeploymentExpandMetaRequest] = OMIT,
+        raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest] = OMIT,
+        expand_raw: typing.Optional[typing.Sequence[str]] = OMIT,
+        metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[ExecutePromptEvent]]]:
+        """
+        Executes a deployed Prompt and streams back the results.
+        Parameters
+        ----------
+        inputs : typing.Sequence[PromptDeploymentInputRequest]
+            A list consisting of the Prompt Deployment's input variables and their values.
+        prompt_deployment_id : typing.Optional[str]
+            The ID of the Prompt Deployment. Must provide either this or prompt_deployment_name.
+        prompt_deployment_name : typing.Optional[str]
+            The unique name of the Prompt Deployment. Must provide either this or prompt_deployment_id.
+        release_tag : typing.Optional[str]
+            Optionally specify a release tag if you want to pin to a specific release of the Prompt Deployment
+        external_id : typing.Optional[str]
+            Optionally include a unique identifier for tracking purposes. Must be unique within a given Workspace.
+        expand_meta : typing.Optional[PromptDeploymentExpandMetaRequest]
+            An optionally specified configuration used to opt in to including additional metadata about this prompt execution in the API response. Corresponding values will be returned under the `meta` key of the API response.
+        raw_overrides : typing.Optional[RawPromptExecutionOverridesRequest]
+            Overrides for the raw API request sent to the model host. Combined with `expand_raw`, it can be used to access new features from models.
+        expand_raw : typing.Optional[typing.Sequence[str]]
+            A list of keys whose values you'd like to directly return from the JSON response of the model provider. Useful if you need lower-level info returned by model providers that Vellum would otherwise omit. Corresponding key/value pairs will be returned under the `raw` key of the API response.
+        metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
+            Arbitrary JSON metadata associated with this request. Can be used to capture additional monitoring data such as user id, session id, etc. for future analysis.
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Yields
+        ------
+        typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[ExecutePromptEvent]]]
+        """
+        async with self._client_wrapper.httpx_client.stream(
+            "v1/execute-prompt-stream",
+            base_url=self._client_wrapper.get_environment().predict,
+            method="POST",
+            json={
+                "inputs": convert_and_respect_annotation_metadata(
+                    object_=inputs, annotation=typing.Sequence[PromptDeploymentInputRequest], direction="write"
+                ),
+                "prompt_deployment_id": prompt_deployment_id,
+                "prompt_deployment_name": prompt_deployment_name,
+                "release_tag": release_tag,
+                "external_id": external_id,
+                "expand_meta": convert_and_respect_annotation_metadata(
+                    object_=expand_meta,
+                    annotation=typing.Optional[PromptDeploymentExpandMetaRequest],
+                    direction="write",
+                ),
+                "raw_overrides": convert_and_respect_annotation_metadata(
+                    object_=raw_overrides,
+                    annotation=typing.Optional[RawPromptExecutionOverridesRequest],
+                    direction="write",
+                ),
+                "expand_raw": expand_raw,
+                "metadata": metadata,
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            async def stream() -> AsyncHttpResponse[typing.AsyncIterator[ExecutePromptEvent]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+                        async def _iter():
+                            async for _text in _response.aiter_lines():
+                                try:
+                                    if len(_text) == 0:
+                                        continue
+                                    yield typing.cast(
+                                        ExecutePromptEvent,
+                                        parse_obj_as(
+                                            type_=ExecutePromptEvent,  # type: ignore
+                                            object_=json.loads(_text),
+                                        ),
+                                    )
+                                except Exception:
+                                    pass
+                            return
+                        return AsyncHttpResponse(response=_response, data=_iter())
+                    await _response.aread()
+                    if _response.status_code == 400:
+                        raise BadRequestError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 403:
+                        raise ForbiddenError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 404:
+                        raise NotFoundError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 500:
+                        raise InternalServerError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(status_code=_response.status_code, body=_response.text)
+                raise ApiError(status_code=_response.status_code, body=_response_json)
+            yield await stream()
     async def execute_workflow(
         self,
         *,
@@ -1206,6 +1787,141 @@ class AsyncRawVellum:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
+    @contextlib.asynccontextmanager
+    async def execute_workflow_stream(
+        self,
+        *,
+        inputs: typing.Sequence[WorkflowRequestInputRequest],
+        expand_meta: typing.Optional[WorkflowExpandMetaRequest] = OMIT,
+        workflow_deployment_id: typing.Optional[str] = OMIT,
+        workflow_deployment_name: typing.Optional[str] = OMIT,
+        release_tag: typing.Optional[str] = OMIT,
+        external_id: typing.Optional[str] = OMIT,
+        event_types: typing.Optional[typing.Sequence[WorkflowExecutionEventType]] = OMIT,
+        metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[WorkflowStreamEvent]]]:
+        """
+        Executes a deployed Workflow and streams back its results.
+        Parameters
+        ----------
+        inputs : typing.Sequence[WorkflowRequestInputRequest]
+            The list of inputs defined in the Workflow's Deployment with their corresponding values.
+        expand_meta : typing.Optional[WorkflowExpandMetaRequest]
+            An optionally specified configuration used to opt in to including additional metadata about this workflow execution in the API response. Corresponding values will be returned under the `execution_meta` key within NODE events in the response stream.
+        workflow_deployment_id : typing.Optional[str]
+            The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
+        workflow_deployment_name : typing.Optional[str]
+            The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
+        release_tag : typing.Optional[str]
+            Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
+        external_id : typing.Optional[str]
+            Optionally include a unique identifier for tracking purposes. Must be unique within a given Workspace.
+        event_types : typing.Optional[typing.Sequence[WorkflowExecutionEventType]]
+            Optionally specify which events you want to receive. Defaults to only WORKFLOW events. Note that the schema of non-WORKFLOW events is unstable and should be used with caution.
+        metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
+            Arbitrary JSON metadata associated with this request. Can be used to capture additional monitoring data such as user id, session id, etc. for future analysis.
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Yields
+        ------
+        typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[WorkflowStreamEvent]]]
+        """
+        async with self._client_wrapper.httpx_client.stream(
+            "v1/execute-workflow-stream",
+            base_url=self._client_wrapper.get_environment().predict,
+            method="POST",
+            json={
+                "inputs": convert_and_respect_annotation_metadata(
+                    object_=inputs, annotation=typing.Sequence[WorkflowRequestInputRequest], direction="write"
+                ),
+                "expand_meta": convert_and_respect_annotation_metadata(
+                    object_=expand_meta, annotation=typing.Optional[WorkflowExpandMetaRequest], direction="write"
+                ),
+                "workflow_deployment_id": workflow_deployment_id,
+                "workflow_deployment_name": workflow_deployment_name,
+                "release_tag": release_tag,
+                "external_id": external_id,
+                "event_types": event_types,
+                "metadata": metadata,
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            async def stream() -> AsyncHttpResponse[typing.AsyncIterator[WorkflowStreamEvent]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+                        async def _iter():
+                            async for _text in _response.aiter_lines():
+                                try:
+                                    if len(_text) == 0:
+                                        continue
+                                    yield typing.cast(
+                                        WorkflowStreamEvent,
+                                        parse_obj_as(
+                                            type_=WorkflowStreamEvent,  # type: ignore
+                                            object_=json.loads(_text),
+                                        ),
+                                    )
+                                except Exception:
+                                    pass
+                            return
+                        return AsyncHttpResponse(response=_response, data=_iter())
+                    await _response.aread()
+                    if _response.status_code == 400:
+                        raise BadRequestError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 404:
+                        raise NotFoundError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 500:
+                        raise InternalServerError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(status_code=_response.status_code, body=_response.text)
+                raise ApiError(status_code=_response.status_code, body=_response_json)
+            yield await stream()
     async def generate(
         self,
         *,
@@ -1318,6 +2034,134 @@ class AsyncRawVellum:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
+    @contextlib.asynccontextmanager
+    async def generate_stream(
+        self,
+        *,
+        requests: typing.Sequence[GenerateRequest],
+        deployment_id: typing.Optional[str] = OMIT,
+        deployment_name: typing.Optional[str] = OMIT,
+        options: typing.Optional[GenerateOptionsRequest] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[GenerateStreamResponse]]]:
+        """
+        Generate a stream of completions using a previously defined deployment.
+        Important: This endpoint is DEPRECATED and has been superseded by
+        [execute-prompt-stream](/api-reference/api-reference/execute-prompt-stream).
+        Parameters
+        ----------
+        requests : typing.Sequence[GenerateRequest]
+            The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
+        deployment_id : typing.Optional[str]
+            The ID of the deployment. Must provide either this or deployment_name.
+        deployment_name : typing.Optional[str]
+            The name of the deployment. Must provide either this or deployment_id.
+        options : typing.Optional[GenerateOptionsRequest]
+            Additional configuration that can be used to control what's included in the response.
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Yields
+        ------
+        typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[GenerateStreamResponse]]]
+        """
+        async with self._client_wrapper.httpx_client.stream(
+            "v1/generate-stream",
+            base_url=self._client_wrapper.get_environment().predict,
+            method="POST",
+            json={
+                "deployment_id": deployment_id,
+                "deployment_name": deployment_name,
+                "requests": convert_and_respect_annotation_metadata(
+                    object_=requests, annotation=typing.Sequence[GenerateRequest], direction="write"
+                ),
+                "options": convert_and_respect_annotation_metadata(
+                    object_=options, annotation=typing.Optional[GenerateOptionsRequest], direction="write"
+                ),
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        ) as _response:
+            async def stream() -> AsyncHttpResponse[typing.AsyncIterator[GenerateStreamResponse]]:
+                try:
+                    if 200 <= _response.status_code < 300:
+                        async def _iter():
+                            async for _text in _response.aiter_lines():
+                                try:
+                                    if len(_text) == 0:
+                                        continue
+                                    yield typing.cast(
+                                        GenerateStreamResponse,
+                                        parse_obj_as(
+                                            type_=GenerateStreamResponse,  # type: ignore
+                                            object_=json.loads(_text),
+                                        ),
+                                    )
+                                except Exception:
+                                    pass
+                            return
+                        return AsyncHttpResponse(response=_response, data=_iter())
+                    await _response.aread()
+                    if _response.status_code == 400:
+                        raise BadRequestError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 403:
+                        raise ForbiddenError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 404:
+                        raise NotFoundError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    if _response.status_code == 500:
+                        raise InternalServerError(
+                            typing.cast(
+                                typing.Optional[typing.Any],
+                                parse_obj_as(
+                                    type_=typing.Optional[typing.Any],  # type: ignore
+                                    object_=_response.json(),
+                                ),
+                            )
+                        )
+                    _response_json = _response.json()
+                except JSONDecodeError:
+                    raise ApiError(status_code=_response.status_code, body=_response.text)
+                raise ApiError(status_code=_response.status_code, body=_response_json)
+            yield await stream()
     async def search(
         self,
         *,

vellum-ai 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl

vellum-ai 1.1.1py3-none-any.whl → 1.1.3py3-none-any.whl