vellum-ai 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. vellum/__init__.py +16 -0
  2. vellum/client/README.md +55 -0
  3. vellum/client/__init__.py +66 -507
  4. vellum/client/core/client_wrapper.py +2 -2
  5. vellum/client/core/pydantic_utilities.py +10 -3
  6. vellum/client/raw_client.py +844 -0
  7. vellum/client/reference.md +692 -19
  8. vellum/client/resources/ad_hoc/client.py +23 -180
  9. vellum/client/resources/ad_hoc/raw_client.py +276 -0
  10. vellum/client/resources/container_images/client.py +10 -36
  11. vellum/client/resources/deployments/client.py +16 -62
  12. vellum/client/resources/document_indexes/client.py +16 -72
  13. vellum/client/resources/documents/client.py +8 -30
  14. vellum/client/resources/folder_entities/client.py +4 -8
  15. vellum/client/resources/metric_definitions/client.py +4 -14
  16. vellum/client/resources/ml_models/client.py +2 -8
  17. vellum/client/resources/organizations/client.py +2 -6
  18. vellum/client/resources/prompts/client.py +2 -10
  19. vellum/client/resources/sandboxes/client.py +4 -20
  20. vellum/client/resources/test_suite_runs/client.py +4 -18
  21. vellum/client/resources/test_suites/client.py +11 -86
  22. vellum/client/resources/test_suites/raw_client.py +136 -0
  23. vellum/client/resources/workflow_deployments/client.py +20 -78
  24. vellum/client/resources/workflow_executions/client.py +2 -6
  25. vellum/client/resources/workflow_sandboxes/client.py +2 -10
  26. vellum/client/resources/workflows/client.py +7 -6
  27. vellum/client/resources/workflows/raw_client.py +58 -47
  28. vellum/client/resources/workspace_secrets/client.py +4 -20
  29. vellum/client/resources/workspaces/client.py +2 -6
  30. vellum/client/types/__init__.py +16 -0
  31. vellum/client/types/array_chat_message_content_item.py +4 -2
  32. vellum/client/types/array_chat_message_content_item_request.py +4 -2
  33. vellum/client/types/chat_message_content.py +4 -2
  34. vellum/client/types/chat_message_content_request.py +4 -2
  35. vellum/client/types/node_execution_span.py +2 -0
  36. vellum/client/types/prompt_block.py +4 -2
  37. vellum/client/types/vellum_value.py +4 -2
  38. vellum/client/types/vellum_value_request.py +4 -2
  39. vellum/client/types/vellum_variable_type.py +2 -1
  40. vellum/client/types/vellum_video.py +24 -0
  41. vellum/client/types/vellum_video_request.py +24 -0
  42. vellum/client/types/video_chat_message_content.py +25 -0
  43. vellum/client/types/video_chat_message_content_request.py +25 -0
  44. vellum/client/types/video_prompt_block.py +29 -0
  45. vellum/client/types/video_vellum_value.py +25 -0
  46. vellum/client/types/video_vellum_value_request.py +25 -0
  47. vellum/client/types/workflow_execution_span.py +2 -0
  48. vellum/client/types/workflow_execution_usage_calculation_fulfilled_body.py +22 -0
  49. vellum/prompts/blocks/compilation.py +22 -10
  50. vellum/types/vellum_video.py +3 -0
  51. vellum/types/vellum_video_request.py +3 -0
  52. vellum/types/video_chat_message_content.py +3 -0
  53. vellum/types/video_chat_message_content_request.py +3 -0
  54. vellum/types/video_prompt_block.py +3 -0
  55. vellum/types/video_vellum_value.py +3 -0
  56. vellum/types/video_vellum_value_request.py +3 -0
  57. vellum/types/workflow_execution_usage_calculation_fulfilled_body.py +3 -0
  58. vellum/workflows/events/workflow.py +11 -0
  59. vellum/workflows/graph/graph.py +103 -1
  60. vellum/workflows/graph/tests/test_graph.py +99 -0
  61. vellum/workflows/nodes/bases/base.py +9 -1
  62. vellum/workflows/nodes/displayable/bases/utils.py +4 -2
  63. vellum/workflows/nodes/displayable/tool_calling_node/node.py +19 -18
  64. vellum/workflows/nodes/displayable/tool_calling_node/tests/test_node.py +17 -7
  65. vellum/workflows/nodes/displayable/tool_calling_node/tests/test_utils.py +7 -7
  66. vellum/workflows/nodes/displayable/tool_calling_node/utils.py +47 -80
  67. vellum/workflows/references/environment_variable.py +10 -0
  68. vellum/workflows/runner/runner.py +18 -2
  69. vellum/workflows/state/context.py +101 -12
  70. vellum/workflows/types/definition.py +11 -1
  71. vellum/workflows/types/tests/test_definition.py +19 -0
  72. vellum/workflows/utils/vellum_variables.py +9 -5
  73. vellum/workflows/workflows/base.py +12 -5
  74. {vellum_ai-1.1.1.dist-info → vellum_ai-1.1.3.dist-info}/METADATA +1 -1
  75. {vellum_ai-1.1.1.dist-info → vellum_ai-1.1.3.dist-info}/RECORD +85 -69
  76. vellum_ee/workflows/display/nodes/vellum/code_execution_node.py +1 -1
  77. vellum_ee/workflows/display/nodes/vellum/tests/test_code_execution_node.py +55 -1
  78. vellum_ee/workflows/display/nodes/vellum/tests/test_tool_calling_node.py +15 -52
  79. vellum_ee/workflows/display/tests/workflow_serialization/test_basic_tool_calling_node_mcp_serialization.py +15 -49
  80. vellum_ee/workflows/display/types.py +14 -1
  81. vellum_ee/workflows/display/utils/expressions.py +13 -4
  82. vellum_ee/workflows/display/workflows/base_workflow_display.py +6 -19
  83. {vellum_ai-1.1.1.dist-info → vellum_ai-1.1.3.dist-info}/LICENSE +0 -0
  84. {vellum_ai-1.1.1.dist-info → vellum_ai-1.1.3.dist-info}/WHEEL +0 -0
  85. {vellum_ai-1.1.1.dist-info → vellum_ai-1.1.3.dist-info}/entry_points.txt +0 -0
@@ -26,12 +26,18 @@ from .types.execute_prompt_response import ExecutePromptResponse
26
26
  from .errors.forbidden_error import ForbiddenError
27
27
  from .errors.not_found_error import NotFoundError
28
28
  from .errors.internal_server_error import InternalServerError
29
+ from .types.execute_prompt_event import ExecutePromptEvent
30
+ import json
31
+ import contextlib
29
32
  from .types.workflow_request_input_request import WorkflowRequestInputRequest
30
33
  from .types.workflow_expand_meta_request import WorkflowExpandMetaRequest
31
34
  from .types.execute_workflow_response import ExecuteWorkflowResponse
35
+ from .types.workflow_execution_event_type import WorkflowExecutionEventType
36
+ from .types.workflow_stream_event import WorkflowStreamEvent
32
37
  from .types.generate_request import GenerateRequest
33
38
  from .types.generate_options_request import GenerateOptionsRequest
34
39
  from .types.generate_response import GenerateResponse
40
+ from .types.generate_stream_response import GenerateStreamResponse
35
41
  from .types.search_request_options_request import SearchRequestOptionsRequest
36
42
  from .types.search_response import SearchResponse
37
43
  from .types.submit_completion_actual_request import SubmitCompletionActualRequest
@@ -333,6 +339,162 @@ class RawVellum:
333
339
  raise ApiError(status_code=_response.status_code, body=_response.text)
334
340
  raise ApiError(status_code=_response.status_code, body=_response_json)
335
341
 
342
+ @contextlib.contextmanager
343
+ def execute_prompt_stream(
344
+ self,
345
+ *,
346
+ inputs: typing.Sequence[PromptDeploymentInputRequest],
347
+ prompt_deployment_id: typing.Optional[str] = OMIT,
348
+ prompt_deployment_name: typing.Optional[str] = OMIT,
349
+ release_tag: typing.Optional[str] = OMIT,
350
+ external_id: typing.Optional[str] = OMIT,
351
+ expand_meta: typing.Optional[PromptDeploymentExpandMetaRequest] = OMIT,
352
+ raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest] = OMIT,
353
+ expand_raw: typing.Optional[typing.Sequence[str]] = OMIT,
354
+ metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
355
+ request_options: typing.Optional[RequestOptions] = None,
356
+ ) -> typing.Iterator[HttpResponse[typing.Iterator[ExecutePromptEvent]]]:
357
+ """
358
+ Executes a deployed Prompt and streams back the results.
359
+
360
+ Parameters
361
+ ----------
362
+ inputs : typing.Sequence[PromptDeploymentInputRequest]
363
+ A list consisting of the Prompt Deployment's input variables and their values.
364
+
365
+ prompt_deployment_id : typing.Optional[str]
366
+ The ID of the Prompt Deployment. Must provide either this or prompt_deployment_name.
367
+
368
+ prompt_deployment_name : typing.Optional[str]
369
+ The unique name of the Prompt Deployment. Must provide either this or prompt_deployment_id.
370
+
371
+ release_tag : typing.Optional[str]
372
+ Optionally specify a release tag if you want to pin to a specific release of the Prompt Deployment
373
+
374
+ external_id : typing.Optional[str]
375
+ Optionally include a unique identifier for tracking purposes. Must be unique within a given Workspace.
376
+
377
+ expand_meta : typing.Optional[PromptDeploymentExpandMetaRequest]
378
+ An optionally specified configuration used to opt in to including additional metadata about this prompt execution in the API response. Corresponding values will be returned under the `meta` key of the API response.
379
+
380
+ raw_overrides : typing.Optional[RawPromptExecutionOverridesRequest]
381
+ Overrides for the raw API request sent to the model host. Combined with `expand_raw`, it can be used to access new features from models.
382
+
383
+ expand_raw : typing.Optional[typing.Sequence[str]]
384
+ A list of keys whose values you'd like to directly return from the JSON response of the model provider. Useful if you need lower-level info returned by model providers that Vellum would otherwise omit. Corresponding key/value pairs will be returned under the `raw` key of the API response.
385
+
386
+ metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
387
+ Arbitrary JSON metadata associated with this request. Can be used to capture additional monitoring data such as user id, session id, etc. for future analysis.
388
+
389
+ request_options : typing.Optional[RequestOptions]
390
+ Request-specific configuration.
391
+
392
+ Yields
393
+ ------
394
+ typing.Iterator[HttpResponse[typing.Iterator[ExecutePromptEvent]]]
395
+
396
+ """
397
+ with self._client_wrapper.httpx_client.stream(
398
+ "v1/execute-prompt-stream",
399
+ base_url=self._client_wrapper.get_environment().predict,
400
+ method="POST",
401
+ json={
402
+ "inputs": convert_and_respect_annotation_metadata(
403
+ object_=inputs, annotation=typing.Sequence[PromptDeploymentInputRequest], direction="write"
404
+ ),
405
+ "prompt_deployment_id": prompt_deployment_id,
406
+ "prompt_deployment_name": prompt_deployment_name,
407
+ "release_tag": release_tag,
408
+ "external_id": external_id,
409
+ "expand_meta": convert_and_respect_annotation_metadata(
410
+ object_=expand_meta,
411
+ annotation=typing.Optional[PromptDeploymentExpandMetaRequest],
412
+ direction="write",
413
+ ),
414
+ "raw_overrides": convert_and_respect_annotation_metadata(
415
+ object_=raw_overrides,
416
+ annotation=typing.Optional[RawPromptExecutionOverridesRequest],
417
+ direction="write",
418
+ ),
419
+ "expand_raw": expand_raw,
420
+ "metadata": metadata,
421
+ },
422
+ headers={
423
+ "content-type": "application/json",
424
+ },
425
+ request_options=request_options,
426
+ omit=OMIT,
427
+ ) as _response:
428
+
429
+ def stream() -> HttpResponse[typing.Iterator[ExecutePromptEvent]]:
430
+ try:
431
+ if 200 <= _response.status_code < 300:
432
+
433
+ def _iter():
434
+ for _text in _response.iter_lines():
435
+ try:
436
+ if len(_text) == 0:
437
+ continue
438
+ yield typing.cast(
439
+ ExecutePromptEvent,
440
+ parse_obj_as(
441
+ type_=ExecutePromptEvent, # type: ignore
442
+ object_=json.loads(_text),
443
+ ),
444
+ )
445
+ except Exception:
446
+ pass
447
+ return
448
+
449
+ return HttpResponse(response=_response, data=_iter())
450
+ _response.read()
451
+ if _response.status_code == 400:
452
+ raise BadRequestError(
453
+ typing.cast(
454
+ typing.Optional[typing.Any],
455
+ parse_obj_as(
456
+ type_=typing.Optional[typing.Any], # type: ignore
457
+ object_=_response.json(),
458
+ ),
459
+ )
460
+ )
461
+ if _response.status_code == 403:
462
+ raise ForbiddenError(
463
+ typing.cast(
464
+ typing.Optional[typing.Any],
465
+ parse_obj_as(
466
+ type_=typing.Optional[typing.Any], # type: ignore
467
+ object_=_response.json(),
468
+ ),
469
+ )
470
+ )
471
+ if _response.status_code == 404:
472
+ raise NotFoundError(
473
+ typing.cast(
474
+ typing.Optional[typing.Any],
475
+ parse_obj_as(
476
+ type_=typing.Optional[typing.Any], # type: ignore
477
+ object_=_response.json(),
478
+ ),
479
+ )
480
+ )
481
+ if _response.status_code == 500:
482
+ raise InternalServerError(
483
+ typing.cast(
484
+ typing.Optional[typing.Any],
485
+ parse_obj_as(
486
+ type_=typing.Optional[typing.Any], # type: ignore
487
+ object_=_response.json(),
488
+ ),
489
+ )
490
+ )
491
+ _response_json = _response.json()
492
+ except JSONDecodeError:
493
+ raise ApiError(status_code=_response.status_code, body=_response.text)
494
+ raise ApiError(status_code=_response.status_code, body=_response_json)
495
+
496
+ yield stream()
497
+
336
498
  def execute_workflow(
337
499
  self,
338
500
  *,
@@ -447,6 +609,141 @@ class RawVellum:
447
609
  raise ApiError(status_code=_response.status_code, body=_response.text)
448
610
  raise ApiError(status_code=_response.status_code, body=_response_json)
449
611
 
612
+ @contextlib.contextmanager
613
+ def execute_workflow_stream(
614
+ self,
615
+ *,
616
+ inputs: typing.Sequence[WorkflowRequestInputRequest],
617
+ expand_meta: typing.Optional[WorkflowExpandMetaRequest] = OMIT,
618
+ workflow_deployment_id: typing.Optional[str] = OMIT,
619
+ workflow_deployment_name: typing.Optional[str] = OMIT,
620
+ release_tag: typing.Optional[str] = OMIT,
621
+ external_id: typing.Optional[str] = OMIT,
622
+ event_types: typing.Optional[typing.Sequence[WorkflowExecutionEventType]] = OMIT,
623
+ metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
624
+ request_options: typing.Optional[RequestOptions] = None,
625
+ ) -> typing.Iterator[HttpResponse[typing.Iterator[WorkflowStreamEvent]]]:
626
+ """
627
+ Executes a deployed Workflow and streams back its results.
628
+
629
+ Parameters
630
+ ----------
631
+ inputs : typing.Sequence[WorkflowRequestInputRequest]
632
+ The list of inputs defined in the Workflow's Deployment with their corresponding values.
633
+
634
+ expand_meta : typing.Optional[WorkflowExpandMetaRequest]
635
+ An optionally specified configuration used to opt in to including additional metadata about this workflow execution in the API response. Corresponding values will be returned under the `execution_meta` key within NODE events in the response stream.
636
+
637
+ workflow_deployment_id : typing.Optional[str]
638
+ The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
639
+
640
+ workflow_deployment_name : typing.Optional[str]
641
+ The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
642
+
643
+ release_tag : typing.Optional[str]
644
+ Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
645
+
646
+ external_id : typing.Optional[str]
647
+ Optionally include a unique identifier for tracking purposes. Must be unique within a given Workspace.
648
+
649
+ event_types : typing.Optional[typing.Sequence[WorkflowExecutionEventType]]
650
+ Optionally specify which events you want to receive. Defaults to only WORKFLOW events. Note that the schema of non-WORKFLOW events is unstable and should be used with caution.
651
+
652
+ metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
653
+ Arbitrary JSON metadata associated with this request. Can be used to capture additional monitoring data such as user id, session id, etc. for future analysis.
654
+
655
+ request_options : typing.Optional[RequestOptions]
656
+ Request-specific configuration.
657
+
658
+ Yields
659
+ ------
660
+ typing.Iterator[HttpResponse[typing.Iterator[WorkflowStreamEvent]]]
661
+
662
+ """
663
+ with self._client_wrapper.httpx_client.stream(
664
+ "v1/execute-workflow-stream",
665
+ base_url=self._client_wrapper.get_environment().predict,
666
+ method="POST",
667
+ json={
668
+ "inputs": convert_and_respect_annotation_metadata(
669
+ object_=inputs, annotation=typing.Sequence[WorkflowRequestInputRequest], direction="write"
670
+ ),
671
+ "expand_meta": convert_and_respect_annotation_metadata(
672
+ object_=expand_meta, annotation=typing.Optional[WorkflowExpandMetaRequest], direction="write"
673
+ ),
674
+ "workflow_deployment_id": workflow_deployment_id,
675
+ "workflow_deployment_name": workflow_deployment_name,
676
+ "release_tag": release_tag,
677
+ "external_id": external_id,
678
+ "event_types": event_types,
679
+ "metadata": metadata,
680
+ },
681
+ headers={
682
+ "content-type": "application/json",
683
+ },
684
+ request_options=request_options,
685
+ omit=OMIT,
686
+ ) as _response:
687
+
688
+ def stream() -> HttpResponse[typing.Iterator[WorkflowStreamEvent]]:
689
+ try:
690
+ if 200 <= _response.status_code < 300:
691
+
692
+ def _iter():
693
+ for _text in _response.iter_lines():
694
+ try:
695
+ if len(_text) == 0:
696
+ continue
697
+ yield typing.cast(
698
+ WorkflowStreamEvent,
699
+ parse_obj_as(
700
+ type_=WorkflowStreamEvent, # type: ignore
701
+ object_=json.loads(_text),
702
+ ),
703
+ )
704
+ except Exception:
705
+ pass
706
+ return
707
+
708
+ return HttpResponse(response=_response, data=_iter())
709
+ _response.read()
710
+ if _response.status_code == 400:
711
+ raise BadRequestError(
712
+ typing.cast(
713
+ typing.Optional[typing.Any],
714
+ parse_obj_as(
715
+ type_=typing.Optional[typing.Any], # type: ignore
716
+ object_=_response.json(),
717
+ ),
718
+ )
719
+ )
720
+ if _response.status_code == 404:
721
+ raise NotFoundError(
722
+ typing.cast(
723
+ typing.Optional[typing.Any],
724
+ parse_obj_as(
725
+ type_=typing.Optional[typing.Any], # type: ignore
726
+ object_=_response.json(),
727
+ ),
728
+ )
729
+ )
730
+ if _response.status_code == 500:
731
+ raise InternalServerError(
732
+ typing.cast(
733
+ typing.Optional[typing.Any],
734
+ parse_obj_as(
735
+ type_=typing.Optional[typing.Any], # type: ignore
736
+ object_=_response.json(),
737
+ ),
738
+ )
739
+ )
740
+ _response_json = _response.json()
741
+ except JSONDecodeError:
742
+ raise ApiError(status_code=_response.status_code, body=_response.text)
743
+ raise ApiError(status_code=_response.status_code, body=_response_json)
744
+
745
+ yield stream()
746
+
450
747
  def generate(
451
748
  self,
452
749
  *,
@@ -559,6 +856,134 @@ class RawVellum:
559
856
  raise ApiError(status_code=_response.status_code, body=_response.text)
560
857
  raise ApiError(status_code=_response.status_code, body=_response_json)
561
858
 
859
+ @contextlib.contextmanager
860
+ def generate_stream(
861
+ self,
862
+ *,
863
+ requests: typing.Sequence[GenerateRequest],
864
+ deployment_id: typing.Optional[str] = OMIT,
865
+ deployment_name: typing.Optional[str] = OMIT,
866
+ options: typing.Optional[GenerateOptionsRequest] = OMIT,
867
+ request_options: typing.Optional[RequestOptions] = None,
868
+ ) -> typing.Iterator[HttpResponse[typing.Iterator[GenerateStreamResponse]]]:
869
+ """
870
+ Generate a stream of completions using a previously defined deployment.
871
+
872
+ Important: This endpoint is DEPRECATED and has been superseded by
873
+ [execute-prompt-stream](/api-reference/api-reference/execute-prompt-stream).
874
+
875
+ Parameters
876
+ ----------
877
+ requests : typing.Sequence[GenerateRequest]
878
+ The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
879
+
880
+ deployment_id : typing.Optional[str]
881
+ The ID of the deployment. Must provide either this or deployment_name.
882
+
883
+ deployment_name : typing.Optional[str]
884
+ The name of the deployment. Must provide either this or deployment_id.
885
+
886
+ options : typing.Optional[GenerateOptionsRequest]
887
+ Additional configuration that can be used to control what's included in the response.
888
+
889
+ request_options : typing.Optional[RequestOptions]
890
+ Request-specific configuration.
891
+
892
+ Yields
893
+ ------
894
+ typing.Iterator[HttpResponse[typing.Iterator[GenerateStreamResponse]]]
895
+
896
+ """
897
+ with self._client_wrapper.httpx_client.stream(
898
+ "v1/generate-stream",
899
+ base_url=self._client_wrapper.get_environment().predict,
900
+ method="POST",
901
+ json={
902
+ "deployment_id": deployment_id,
903
+ "deployment_name": deployment_name,
904
+ "requests": convert_and_respect_annotation_metadata(
905
+ object_=requests, annotation=typing.Sequence[GenerateRequest], direction="write"
906
+ ),
907
+ "options": convert_and_respect_annotation_metadata(
908
+ object_=options, annotation=typing.Optional[GenerateOptionsRequest], direction="write"
909
+ ),
910
+ },
911
+ headers={
912
+ "content-type": "application/json",
913
+ },
914
+ request_options=request_options,
915
+ omit=OMIT,
916
+ ) as _response:
917
+
918
+ def stream() -> HttpResponse[typing.Iterator[GenerateStreamResponse]]:
919
+ try:
920
+ if 200 <= _response.status_code < 300:
921
+
922
+ def _iter():
923
+ for _text in _response.iter_lines():
924
+ try:
925
+ if len(_text) == 0:
926
+ continue
927
+ yield typing.cast(
928
+ GenerateStreamResponse,
929
+ parse_obj_as(
930
+ type_=GenerateStreamResponse, # type: ignore
931
+ object_=json.loads(_text),
932
+ ),
933
+ )
934
+ except Exception:
935
+ pass
936
+ return
937
+
938
+ return HttpResponse(response=_response, data=_iter())
939
+ _response.read()
940
+ if _response.status_code == 400:
941
+ raise BadRequestError(
942
+ typing.cast(
943
+ typing.Optional[typing.Any],
944
+ parse_obj_as(
945
+ type_=typing.Optional[typing.Any], # type: ignore
946
+ object_=_response.json(),
947
+ ),
948
+ )
949
+ )
950
+ if _response.status_code == 403:
951
+ raise ForbiddenError(
952
+ typing.cast(
953
+ typing.Optional[typing.Any],
954
+ parse_obj_as(
955
+ type_=typing.Optional[typing.Any], # type: ignore
956
+ object_=_response.json(),
957
+ ),
958
+ )
959
+ )
960
+ if _response.status_code == 404:
961
+ raise NotFoundError(
962
+ typing.cast(
963
+ typing.Optional[typing.Any],
964
+ parse_obj_as(
965
+ type_=typing.Optional[typing.Any], # type: ignore
966
+ object_=_response.json(),
967
+ ),
968
+ )
969
+ )
970
+ if _response.status_code == 500:
971
+ raise InternalServerError(
972
+ typing.cast(
973
+ typing.Optional[typing.Any],
974
+ parse_obj_as(
975
+ type_=typing.Optional[typing.Any], # type: ignore
976
+ object_=_response.json(),
977
+ ),
978
+ )
979
+ )
980
+ _response_json = _response.json()
981
+ except JSONDecodeError:
982
+ raise ApiError(status_code=_response.status_code, body=_response.text)
983
+ raise ApiError(status_code=_response.status_code, body=_response_json)
984
+
985
+ yield stream()
986
+
562
987
  def search(
563
988
  self,
564
989
  *,
@@ -1092,6 +1517,162 @@ class AsyncRawVellum:
1092
1517
  raise ApiError(status_code=_response.status_code, body=_response.text)
1093
1518
  raise ApiError(status_code=_response.status_code, body=_response_json)
1094
1519
 
1520
+ @contextlib.asynccontextmanager
1521
+ async def execute_prompt_stream(
1522
+ self,
1523
+ *,
1524
+ inputs: typing.Sequence[PromptDeploymentInputRequest],
1525
+ prompt_deployment_id: typing.Optional[str] = OMIT,
1526
+ prompt_deployment_name: typing.Optional[str] = OMIT,
1527
+ release_tag: typing.Optional[str] = OMIT,
1528
+ external_id: typing.Optional[str] = OMIT,
1529
+ expand_meta: typing.Optional[PromptDeploymentExpandMetaRequest] = OMIT,
1530
+ raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest] = OMIT,
1531
+ expand_raw: typing.Optional[typing.Sequence[str]] = OMIT,
1532
+ metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
1533
+ request_options: typing.Optional[RequestOptions] = None,
1534
+ ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[ExecutePromptEvent]]]:
1535
+ """
1536
+ Executes a deployed Prompt and streams back the results.
1537
+
1538
+ Parameters
1539
+ ----------
1540
+ inputs : typing.Sequence[PromptDeploymentInputRequest]
1541
+ A list consisting of the Prompt Deployment's input variables and their values.
1542
+
1543
+ prompt_deployment_id : typing.Optional[str]
1544
+ The ID of the Prompt Deployment. Must provide either this or prompt_deployment_name.
1545
+
1546
+ prompt_deployment_name : typing.Optional[str]
1547
+ The unique name of the Prompt Deployment. Must provide either this or prompt_deployment_id.
1548
+
1549
+ release_tag : typing.Optional[str]
1550
+ Optionally specify a release tag if you want to pin to a specific release of the Prompt Deployment
1551
+
1552
+ external_id : typing.Optional[str]
1553
+ Optionally include a unique identifier for tracking purposes. Must be unique within a given Workspace.
1554
+
1555
+ expand_meta : typing.Optional[PromptDeploymentExpandMetaRequest]
1556
+ An optionally specified configuration used to opt in to including additional metadata about this prompt execution in the API response. Corresponding values will be returned under the `meta` key of the API response.
1557
+
1558
+ raw_overrides : typing.Optional[RawPromptExecutionOverridesRequest]
1559
+ Overrides for the raw API request sent to the model host. Combined with `expand_raw`, it can be used to access new features from models.
1560
+
1561
+ expand_raw : typing.Optional[typing.Sequence[str]]
1562
+ A list of keys whose values you'd like to directly return from the JSON response of the model provider. Useful if you need lower-level info returned by model providers that Vellum would otherwise omit. Corresponding key/value pairs will be returned under the `raw` key of the API response.
1563
+
1564
+ metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
1565
+ Arbitrary JSON metadata associated with this request. Can be used to capture additional monitoring data such as user id, session id, etc. for future analysis.
1566
+
1567
+ request_options : typing.Optional[RequestOptions]
1568
+ Request-specific configuration.
1569
+
1570
+ Yields
1571
+ ------
1572
+ typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[ExecutePromptEvent]]]
1573
+
1574
+ """
1575
+ async with self._client_wrapper.httpx_client.stream(
1576
+ "v1/execute-prompt-stream",
1577
+ base_url=self._client_wrapper.get_environment().predict,
1578
+ method="POST",
1579
+ json={
1580
+ "inputs": convert_and_respect_annotation_metadata(
1581
+ object_=inputs, annotation=typing.Sequence[PromptDeploymentInputRequest], direction="write"
1582
+ ),
1583
+ "prompt_deployment_id": prompt_deployment_id,
1584
+ "prompt_deployment_name": prompt_deployment_name,
1585
+ "release_tag": release_tag,
1586
+ "external_id": external_id,
1587
+ "expand_meta": convert_and_respect_annotation_metadata(
1588
+ object_=expand_meta,
1589
+ annotation=typing.Optional[PromptDeploymentExpandMetaRequest],
1590
+ direction="write",
1591
+ ),
1592
+ "raw_overrides": convert_and_respect_annotation_metadata(
1593
+ object_=raw_overrides,
1594
+ annotation=typing.Optional[RawPromptExecutionOverridesRequest],
1595
+ direction="write",
1596
+ ),
1597
+ "expand_raw": expand_raw,
1598
+ "metadata": metadata,
1599
+ },
1600
+ headers={
1601
+ "content-type": "application/json",
1602
+ },
1603
+ request_options=request_options,
1604
+ omit=OMIT,
1605
+ ) as _response:
1606
+
1607
+ async def stream() -> AsyncHttpResponse[typing.AsyncIterator[ExecutePromptEvent]]:
1608
+ try:
1609
+ if 200 <= _response.status_code < 300:
1610
+
1611
+ async def _iter():
1612
+ async for _text in _response.aiter_lines():
1613
+ try:
1614
+ if len(_text) == 0:
1615
+ continue
1616
+ yield typing.cast(
1617
+ ExecutePromptEvent,
1618
+ parse_obj_as(
1619
+ type_=ExecutePromptEvent, # type: ignore
1620
+ object_=json.loads(_text),
1621
+ ),
1622
+ )
1623
+ except Exception:
1624
+ pass
1625
+ return
1626
+
1627
+ return AsyncHttpResponse(response=_response, data=_iter())
1628
+ await _response.aread()
1629
+ if _response.status_code == 400:
1630
+ raise BadRequestError(
1631
+ typing.cast(
1632
+ typing.Optional[typing.Any],
1633
+ parse_obj_as(
1634
+ type_=typing.Optional[typing.Any], # type: ignore
1635
+ object_=_response.json(),
1636
+ ),
1637
+ )
1638
+ )
1639
+ if _response.status_code == 403:
1640
+ raise ForbiddenError(
1641
+ typing.cast(
1642
+ typing.Optional[typing.Any],
1643
+ parse_obj_as(
1644
+ type_=typing.Optional[typing.Any], # type: ignore
1645
+ object_=_response.json(),
1646
+ ),
1647
+ )
1648
+ )
1649
+ if _response.status_code == 404:
1650
+ raise NotFoundError(
1651
+ typing.cast(
1652
+ typing.Optional[typing.Any],
1653
+ parse_obj_as(
1654
+ type_=typing.Optional[typing.Any], # type: ignore
1655
+ object_=_response.json(),
1656
+ ),
1657
+ )
1658
+ )
1659
+ if _response.status_code == 500:
1660
+ raise InternalServerError(
1661
+ typing.cast(
1662
+ typing.Optional[typing.Any],
1663
+ parse_obj_as(
1664
+ type_=typing.Optional[typing.Any], # type: ignore
1665
+ object_=_response.json(),
1666
+ ),
1667
+ )
1668
+ )
1669
+ _response_json = _response.json()
1670
+ except JSONDecodeError:
1671
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1672
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1673
+
1674
+ yield await stream()
1675
+
1095
1676
  async def execute_workflow(
1096
1677
  self,
1097
1678
  *,
@@ -1206,6 +1787,141 @@ class AsyncRawVellum:
1206
1787
  raise ApiError(status_code=_response.status_code, body=_response.text)
1207
1788
  raise ApiError(status_code=_response.status_code, body=_response_json)
1208
1789
 
1790
+ @contextlib.asynccontextmanager
1791
+ async def execute_workflow_stream(
1792
+ self,
1793
+ *,
1794
+ inputs: typing.Sequence[WorkflowRequestInputRequest],
1795
+ expand_meta: typing.Optional[WorkflowExpandMetaRequest] = OMIT,
1796
+ workflow_deployment_id: typing.Optional[str] = OMIT,
1797
+ workflow_deployment_name: typing.Optional[str] = OMIT,
1798
+ release_tag: typing.Optional[str] = OMIT,
1799
+ external_id: typing.Optional[str] = OMIT,
1800
+ event_types: typing.Optional[typing.Sequence[WorkflowExecutionEventType]] = OMIT,
1801
+ metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
1802
+ request_options: typing.Optional[RequestOptions] = None,
1803
+ ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[WorkflowStreamEvent]]]:
1804
+ """
1805
+ Executes a deployed Workflow and streams back its results.
1806
+
1807
+ Parameters
1808
+ ----------
1809
+ inputs : typing.Sequence[WorkflowRequestInputRequest]
1810
+ The list of inputs defined in the Workflow's Deployment with their corresponding values.
1811
+
1812
+ expand_meta : typing.Optional[WorkflowExpandMetaRequest]
1813
+ An optionally specified configuration used to opt in to including additional metadata about this workflow execution in the API response. Corresponding values will be returned under the `execution_meta` key within NODE events in the response stream.
1814
+
1815
+ workflow_deployment_id : typing.Optional[str]
1816
+ The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
1817
+
1818
+ workflow_deployment_name : typing.Optional[str]
1819
+ The name of the Workflow Deployment. Must provide either this or workflow_deployment_id.
1820
+
1821
+ release_tag : typing.Optional[str]
1822
+ Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
1823
+
1824
+ external_id : typing.Optional[str]
1825
+ Optionally include a unique identifier for tracking purposes. Must be unique within a given Workspace.
1826
+
1827
+ event_types : typing.Optional[typing.Sequence[WorkflowExecutionEventType]]
1828
+ Optionally specify which events you want to receive. Defaults to only WORKFLOW events. Note that the schema of non-WORKFLOW events is unstable and should be used with caution.
1829
+
1830
+ metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
1831
+ Arbitrary JSON metadata associated with this request. Can be used to capture additional monitoring data such as user id, session id, etc. for future analysis.
1832
+
1833
+ request_options : typing.Optional[RequestOptions]
1834
+ Request-specific configuration.
1835
+
1836
+ Yields
1837
+ ------
1838
+ typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[WorkflowStreamEvent]]]
1839
+
1840
+ """
1841
+ async with self._client_wrapper.httpx_client.stream(
1842
+ "v1/execute-workflow-stream",
1843
+ base_url=self._client_wrapper.get_environment().predict,
1844
+ method="POST",
1845
+ json={
1846
+ "inputs": convert_and_respect_annotation_metadata(
1847
+ object_=inputs, annotation=typing.Sequence[WorkflowRequestInputRequest], direction="write"
1848
+ ),
1849
+ "expand_meta": convert_and_respect_annotation_metadata(
1850
+ object_=expand_meta, annotation=typing.Optional[WorkflowExpandMetaRequest], direction="write"
1851
+ ),
1852
+ "workflow_deployment_id": workflow_deployment_id,
1853
+ "workflow_deployment_name": workflow_deployment_name,
1854
+ "release_tag": release_tag,
1855
+ "external_id": external_id,
1856
+ "event_types": event_types,
1857
+ "metadata": metadata,
1858
+ },
1859
+ headers={
1860
+ "content-type": "application/json",
1861
+ },
1862
+ request_options=request_options,
1863
+ omit=OMIT,
1864
+ ) as _response:
1865
+
1866
+ async def stream() -> AsyncHttpResponse[typing.AsyncIterator[WorkflowStreamEvent]]:
1867
+ try:
1868
+ if 200 <= _response.status_code < 300:
1869
+
1870
+ async def _iter():
1871
+ async for _text in _response.aiter_lines():
1872
+ try:
1873
+ if len(_text) == 0:
1874
+ continue
1875
+ yield typing.cast(
1876
+ WorkflowStreamEvent,
1877
+ parse_obj_as(
1878
+ type_=WorkflowStreamEvent, # type: ignore
1879
+ object_=json.loads(_text),
1880
+ ),
1881
+ )
1882
+ except Exception:
1883
+ pass
1884
+ return
1885
+
1886
+ return AsyncHttpResponse(response=_response, data=_iter())
1887
+ await _response.aread()
1888
+ if _response.status_code == 400:
1889
+ raise BadRequestError(
1890
+ typing.cast(
1891
+ typing.Optional[typing.Any],
1892
+ parse_obj_as(
1893
+ type_=typing.Optional[typing.Any], # type: ignore
1894
+ object_=_response.json(),
1895
+ ),
1896
+ )
1897
+ )
1898
+ if _response.status_code == 404:
1899
+ raise NotFoundError(
1900
+ typing.cast(
1901
+ typing.Optional[typing.Any],
1902
+ parse_obj_as(
1903
+ type_=typing.Optional[typing.Any], # type: ignore
1904
+ object_=_response.json(),
1905
+ ),
1906
+ )
1907
+ )
1908
+ if _response.status_code == 500:
1909
+ raise InternalServerError(
1910
+ typing.cast(
1911
+ typing.Optional[typing.Any],
1912
+ parse_obj_as(
1913
+ type_=typing.Optional[typing.Any], # type: ignore
1914
+ object_=_response.json(),
1915
+ ),
1916
+ )
1917
+ )
1918
+ _response_json = _response.json()
1919
+ except JSONDecodeError:
1920
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1921
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1922
+
1923
+ yield await stream()
1924
+
1209
1925
  async def generate(
1210
1926
  self,
1211
1927
  *,
@@ -1318,6 +2034,134 @@ class AsyncRawVellum:
1318
2034
  raise ApiError(status_code=_response.status_code, body=_response.text)
1319
2035
  raise ApiError(status_code=_response.status_code, body=_response_json)
1320
2036
 
2037
+ @contextlib.asynccontextmanager
2038
+ async def generate_stream(
2039
+ self,
2040
+ *,
2041
+ requests: typing.Sequence[GenerateRequest],
2042
+ deployment_id: typing.Optional[str] = OMIT,
2043
+ deployment_name: typing.Optional[str] = OMIT,
2044
+ options: typing.Optional[GenerateOptionsRequest] = OMIT,
2045
+ request_options: typing.Optional[RequestOptions] = None,
2046
+ ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[GenerateStreamResponse]]]:
2047
+ """
2048
+ Generate a stream of completions using a previously defined deployment.
2049
+
2050
+ Important: This endpoint is DEPRECATED and has been superseded by
2051
+ [execute-prompt-stream](/api-reference/api-reference/execute-prompt-stream).
2052
+
2053
+ Parameters
2054
+ ----------
2055
+ requests : typing.Sequence[GenerateRequest]
2056
+ The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
2057
+
2058
+ deployment_id : typing.Optional[str]
2059
+ The ID of the deployment. Must provide either this or deployment_name.
2060
+
2061
+ deployment_name : typing.Optional[str]
2062
+ The name of the deployment. Must provide either this or deployment_id.
2063
+
2064
+ options : typing.Optional[GenerateOptionsRequest]
2065
+ Additional configuration that can be used to control what's included in the response.
2066
+
2067
+ request_options : typing.Optional[RequestOptions]
2068
+ Request-specific configuration.
2069
+
2070
+ Yields
2071
+ ------
2072
+ typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[GenerateStreamResponse]]]
2073
+
2074
+ """
2075
+ async with self._client_wrapper.httpx_client.stream(
2076
+ "v1/generate-stream",
2077
+ base_url=self._client_wrapper.get_environment().predict,
2078
+ method="POST",
2079
+ json={
2080
+ "deployment_id": deployment_id,
2081
+ "deployment_name": deployment_name,
2082
+ "requests": convert_and_respect_annotation_metadata(
2083
+ object_=requests, annotation=typing.Sequence[GenerateRequest], direction="write"
2084
+ ),
2085
+ "options": convert_and_respect_annotation_metadata(
2086
+ object_=options, annotation=typing.Optional[GenerateOptionsRequest], direction="write"
2087
+ ),
2088
+ },
2089
+ headers={
2090
+ "content-type": "application/json",
2091
+ },
2092
+ request_options=request_options,
2093
+ omit=OMIT,
2094
+ ) as _response:
2095
+
2096
+ async def stream() -> AsyncHttpResponse[typing.AsyncIterator[GenerateStreamResponse]]:
2097
+ try:
2098
+ if 200 <= _response.status_code < 300:
2099
+
2100
+ async def _iter():
2101
+ async for _text in _response.aiter_lines():
2102
+ try:
2103
+ if len(_text) == 0:
2104
+ continue
2105
+ yield typing.cast(
2106
+ GenerateStreamResponse,
2107
+ parse_obj_as(
2108
+ type_=GenerateStreamResponse, # type: ignore
2109
+ object_=json.loads(_text),
2110
+ ),
2111
+ )
2112
+ except Exception:
2113
+ pass
2114
+ return
2115
+
2116
+ return AsyncHttpResponse(response=_response, data=_iter())
2117
+ await _response.aread()
2118
+ if _response.status_code == 400:
2119
+ raise BadRequestError(
2120
+ typing.cast(
2121
+ typing.Optional[typing.Any],
2122
+ parse_obj_as(
2123
+ type_=typing.Optional[typing.Any], # type: ignore
2124
+ object_=_response.json(),
2125
+ ),
2126
+ )
2127
+ )
2128
+ if _response.status_code == 403:
2129
+ raise ForbiddenError(
2130
+ typing.cast(
2131
+ typing.Optional[typing.Any],
2132
+ parse_obj_as(
2133
+ type_=typing.Optional[typing.Any], # type: ignore
2134
+ object_=_response.json(),
2135
+ ),
2136
+ )
2137
+ )
2138
+ if _response.status_code == 404:
2139
+ raise NotFoundError(
2140
+ typing.cast(
2141
+ typing.Optional[typing.Any],
2142
+ parse_obj_as(
2143
+ type_=typing.Optional[typing.Any], # type: ignore
2144
+ object_=_response.json(),
2145
+ ),
2146
+ )
2147
+ )
2148
+ if _response.status_code == 500:
2149
+ raise InternalServerError(
2150
+ typing.cast(
2151
+ typing.Optional[typing.Any],
2152
+ parse_obj_as(
2153
+ type_=typing.Optional[typing.Any], # type: ignore
2154
+ object_=_response.json(),
2155
+ ),
2156
+ )
2157
+ )
2158
+ _response_json = _response.json()
2159
+ except JSONDecodeError:
2160
+ raise ApiError(status_code=_response.status_code, body=_response.text)
2161
+ raise ApiError(status_code=_response.status_code, body=_response_json)
2162
+
2163
+ yield await stream()
2164
+
1321
2165
  async def search(
1322
2166
  self,
1323
2167
  *,