vellum-ai 0.3.14__py3-none-any.whl → 0.3.15__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (318) hide show
  1. vellum/__init__.py +22 -0
  2. vellum/client.py +846 -100
  3. vellum/core/__init__.py +8 -0
  4. vellum/core/client_wrapper.py +27 -8
  5. vellum/core/file.py +38 -0
  6. vellum/core/http_client.py +130 -0
  7. vellum/core/jsonable_encoder.py +2 -2
  8. vellum/core/request_options.py +32 -0
  9. vellum/resources/deployments/client.py +161 -24
  10. vellum/resources/document_indexes/client.py +337 -80
  11. vellum/resources/documents/client.py +278 -58
  12. vellum/resources/folder_entities/client.py +60 -10
  13. vellum/resources/model_versions/client.py +48 -8
  14. vellum/resources/registered_prompts/client.py +52 -6
  15. vellum/resources/sandboxes/client.py +108 -26
  16. vellum/resources/test_suite_runs/client.py +221 -38
  17. vellum/resources/test_suites/client.py +110 -24
  18. vellum/resources/workflow_deployments/client.py +110 -14
  19. vellum/types/__init__.py +20 -0
  20. vellum/types/api_node_result.py +1 -0
  21. vellum/types/api_node_result_data.py +4 -2
  22. vellum/types/array_chat_message_content.py +1 -0
  23. vellum/types/array_chat_message_content_item.py +6 -5
  24. vellum/types/array_chat_message_content_item_request.py +6 -5
  25. vellum/types/array_chat_message_content_request.py +1 -0
  26. vellum/types/array_enum.py +2 -2
  27. vellum/types/array_variable_value_item.py +16 -10
  28. vellum/types/chat_history_enum.py +2 -2
  29. vellum/types/chat_history_input_request.py +6 -1
  30. vellum/types/chat_history_variable_value.py +2 -1
  31. vellum/types/chat_message.py +7 -5
  32. vellum/types/chat_message_content.py +8 -6
  33. vellum/types/chat_message_content_request.py +8 -6
  34. vellum/types/chat_message_request.py +7 -5
  35. vellum/types/code_execution_node_array_result.py +31 -0
  36. vellum/types/code_execution_node_chat_history_result.py +2 -1
  37. vellum/types/code_execution_node_error_result.py +2 -1
  38. vellum/types/code_execution_node_function_call_result.py +31 -0
  39. vellum/types/code_execution_node_json_result.py +2 -1
  40. vellum/types/code_execution_node_number_result.py +2 -1
  41. vellum/types/code_execution_node_result.py +1 -0
  42. vellum/types/code_execution_node_result_data.py +2 -1
  43. vellum/types/code_execution_node_result_output.py +36 -8
  44. vellum/types/code_execution_node_search_results_result.py +2 -1
  45. vellum/types/code_execution_node_string_result.py +2 -1
  46. vellum/types/conditional_node_result.py +1 -0
  47. vellum/types/conditional_node_result_data.py +2 -1
  48. vellum/types/deployment_provider_payload_response.py +1 -0
  49. vellum/types/deployment_read.py +32 -17
  50. vellum/types/document_document_to_document_index.py +21 -15
  51. vellum/types/document_index_read.py +32 -19
  52. vellum/types/document_read.py +34 -24
  53. vellum/types/document_status.py +2 -2
  54. vellum/types/enriched_normalized_completion.py +36 -19
  55. vellum/types/error_enum.py +2 -2
  56. vellum/types/error_variable_value.py +2 -1
  57. vellum/types/execute_prompt_api_error_response.py +5 -1
  58. vellum/types/execute_prompt_event.py +8 -6
  59. vellum/types/execute_prompt_response.py +4 -4
  60. vellum/types/execute_workflow_error_response.py +5 -1
  61. vellum/types/execute_workflow_response.py +3 -2
  62. vellum/types/execute_workflow_stream_error_response.py +5 -1
  63. vellum/types/execute_workflow_workflow_result_event.py +4 -4
  64. vellum/types/execution_array_vellum_value.py +7 -2
  65. vellum/types/execution_chat_history_vellum_value.py +7 -2
  66. vellum/types/execution_error_vellum_value.py +7 -2
  67. vellum/types/execution_function_call_vellum_value.py +7 -2
  68. vellum/types/execution_json_vellum_value.py +7 -2
  69. vellum/types/execution_number_vellum_value.py +7 -2
  70. vellum/types/execution_search_results_vellum_value.py +7 -2
  71. vellum/types/execution_string_vellum_value.py +7 -2
  72. vellum/types/execution_vellum_value.py +16 -10
  73. vellum/types/fulfilled_enum.py +2 -2
  74. vellum/types/fulfilled_execute_prompt_event.py +2 -1
  75. vellum/types/fulfilled_execute_prompt_response.py +12 -5
  76. vellum/types/fulfilled_execute_workflow_workflow_result_event.py +1 -0
  77. vellum/types/fulfilled_function_call.py +2 -1
  78. vellum/types/fulfilled_prompt_execution_meta.py +3 -2
  79. vellum/types/fulfilled_workflow_node_result_event.py +6 -5
  80. vellum/types/function_call.py +4 -4
  81. vellum/types/function_call_chat_message_content.py +1 -0
  82. vellum/types/function_call_chat_message_content_request.py +1 -0
  83. vellum/types/function_call_chat_message_content_value.py +2 -1
  84. vellum/types/function_call_chat_message_content_value_request.py +2 -1
  85. vellum/types/function_call_enum.py +2 -2
  86. vellum/types/function_call_variable_value.py +1 -0
  87. vellum/types/generate_error_response.py +5 -1
  88. vellum/types/generate_options_request.py +8 -5
  89. vellum/types/generate_request.py +15 -9
  90. vellum/types/generate_result.py +10 -6
  91. vellum/types/generate_result_data.py +5 -3
  92. vellum/types/generate_result_error.py +5 -1
  93. vellum/types/generate_stream_response.py +1 -0
  94. vellum/types/generate_stream_result.py +3 -2
  95. vellum/types/generate_stream_result_data.py +1 -0
  96. vellum/types/image_chat_message_content.py +1 -0
  97. vellum/types/image_chat_message_content_request.py +1 -0
  98. vellum/types/image_enum.py +2 -2
  99. vellum/types/image_variable_value.py +2 -1
  100. vellum/types/initiated_enum.py +2 -2
  101. vellum/types/initiated_execute_prompt_event.py +2 -1
  102. vellum/types/initiated_prompt_execution_meta.py +5 -4
  103. vellum/types/initiated_workflow_node_result_event.py +5 -4
  104. vellum/types/json_enum.py +2 -2
  105. vellum/types/json_input_request.py +6 -1
  106. vellum/types/json_variable_value.py +2 -1
  107. vellum/types/logprobs_enum.py +2 -2
  108. vellum/types/metadata_filter_config_request.py +7 -6
  109. vellum/types/metadata_filter_rule_request.py +7 -6
  110. vellum/types/model_version_build_config.py +12 -7
  111. vellum/types/model_version_exec_config.py +18 -11
  112. vellum/types/model_version_exec_config_parameters.py +7 -6
  113. vellum/types/model_version_read.py +51 -32
  114. vellum/types/model_version_sandbox_snapshot.py +17 -4
  115. vellum/types/named_test_case_chat_history_variable_value_request.py +2 -1
  116. vellum/types/named_test_case_error_variable_value_request.py +2 -1
  117. vellum/types/named_test_case_json_variable_value_request.py +2 -1
  118. vellum/types/named_test_case_number_variable_value_request.py +2 -1
  119. vellum/types/named_test_case_search_results_variable_value_request.py +2 -1
  120. vellum/types/named_test_case_string_variable_value_request.py +2 -1
  121. vellum/types/named_test_case_variable_value_request.py +12 -8
  122. vellum/types/node_input_compiled_array_value.py +2 -1
  123. vellum/types/node_input_compiled_chat_history_value.py +2 -1
  124. vellum/types/node_input_compiled_error_value.py +2 -1
  125. vellum/types/node_input_compiled_function_call.py +32 -0
  126. vellum/types/node_input_compiled_json_value.py +2 -1
  127. vellum/types/node_input_compiled_number_value.py +2 -1
  128. vellum/types/node_input_compiled_search_results_value.py +2 -1
  129. vellum/types/node_input_compiled_string_value.py +2 -1
  130. vellum/types/node_input_variable_compiled_value.py +26 -9
  131. vellum/types/node_output_compiled_array_value.py +2 -1
  132. vellum/types/node_output_compiled_chat_history_value.py +2 -1
  133. vellum/types/node_output_compiled_error_value.py +2 -1
  134. vellum/types/node_output_compiled_function_value.py +2 -1
  135. vellum/types/node_output_compiled_json_value.py +2 -1
  136. vellum/types/node_output_compiled_number_value.py +2 -1
  137. vellum/types/node_output_compiled_search_results_value.py +2 -1
  138. vellum/types/node_output_compiled_string_value.py +2 -1
  139. vellum/types/node_output_compiled_value.py +16 -10
  140. vellum/types/normalized_log_probs.py +2 -1
  141. vellum/types/normalized_token_log_probs.py +3 -2
  142. vellum/types/number_enum.py +2 -2
  143. vellum/types/number_variable_value.py +2 -1
  144. vellum/types/paginated_document_index_read_list.py +5 -4
  145. vellum/types/paginated_slim_deployment_read_list.py +5 -4
  146. vellum/types/paginated_slim_document_list.py +5 -4
  147. vellum/types/paginated_slim_workflow_deployment_list.py +5 -4
  148. vellum/types/paginated_test_suite_run_execution_list.py +5 -4
  149. vellum/types/prompt_deployment_expand_meta_request_request.py +25 -15
  150. vellum/types/prompt_deployment_input_request.py +6 -5
  151. vellum/types/prompt_execution_meta.py +6 -5
  152. vellum/types/prompt_node_result.py +1 -0
  153. vellum/types/prompt_node_result_data.py +4 -3
  154. vellum/types/prompt_output.py +8 -6
  155. vellum/types/prompt_template_block.py +1 -0
  156. vellum/types/prompt_template_block_data.py +1 -0
  157. vellum/types/prompt_template_block_data_request.py +1 -0
  158. vellum/types/prompt_template_block_properties.py +11 -10
  159. vellum/types/prompt_template_block_properties_request.py +11 -10
  160. vellum/types/prompt_template_block_request.py +1 -0
  161. vellum/types/raw_prompt_execution_overrides_request.py +11 -5
  162. vellum/types/register_prompt_error_response.py +5 -1
  163. vellum/types/register_prompt_model_parameters_request.py +5 -4
  164. vellum/types/register_prompt_prompt.py +10 -2
  165. vellum/types/register_prompt_prompt_info_request.py +5 -3
  166. vellum/types/register_prompt_response.py +30 -10
  167. vellum/types/registered_prompt_deployment.py +15 -3
  168. vellum/types/registered_prompt_input_variable_request.py +3 -2
  169. vellum/types/registered_prompt_model_version.py +10 -2
  170. vellum/types/registered_prompt_sandbox.py +10 -2
  171. vellum/types/registered_prompt_sandbox_snapshot.py +5 -1
  172. vellum/types/rejected_enum.py +2 -2
  173. vellum/types/rejected_execute_prompt_event.py +2 -1
  174. vellum/types/rejected_execute_prompt_response.py +12 -5
  175. vellum/types/rejected_execute_workflow_workflow_result_event.py +1 -0
  176. vellum/types/rejected_function_call.py +2 -1
  177. vellum/types/rejected_prompt_execution_meta.py +3 -2
  178. vellum/types/rejected_workflow_node_result_event.py +4 -3
  179. vellum/types/sandbox_scenario.py +11 -3
  180. vellum/types/scenario_input.py +4 -3
  181. vellum/types/scenario_input_request.py +4 -3
  182. vellum/types/search_error_response.py +5 -1
  183. vellum/types/search_filters_request.py +10 -6
  184. vellum/types/search_node_result.py +1 -0
  185. vellum/types/search_node_result_data.py +7 -4
  186. vellum/types/search_request_options_request.py +20 -8
  187. vellum/types/search_response.py +5 -3
  188. vellum/types/search_result.py +15 -5
  189. vellum/types/search_result_document.py +20 -8
  190. vellum/types/search_result_document_request.py +15 -7
  191. vellum/types/search_result_merging_request.py +5 -1
  192. vellum/types/search_result_request.py +15 -5
  193. vellum/types/search_results_enum.py +2 -2
  194. vellum/types/search_results_variable_value.py +2 -1
  195. vellum/types/search_weights_request.py +10 -4
  196. vellum/types/slim_deployment_read.py +28 -14
  197. vellum/types/slim_document.py +56 -35
  198. vellum/types/slim_workflow_deployment.py +37 -24
  199. vellum/types/streaming_enum.py +2 -2
  200. vellum/types/streaming_execute_prompt_event.py +6 -4
  201. vellum/types/streaming_prompt_execution_meta.py +2 -1
  202. vellum/types/streaming_workflow_node_result_event.py +6 -5
  203. vellum/types/string_chat_message_content.py +1 -0
  204. vellum/types/string_chat_message_content_request.py +1 -0
  205. vellum/types/string_enum.py +2 -2
  206. vellum/types/string_input_request.py +6 -1
  207. vellum/types/string_variable_value.py +2 -1
  208. vellum/types/submit_completion_actual_request.py +25 -13
  209. vellum/types/submit_completion_actuals_error_response.py +1 -0
  210. vellum/types/submit_workflow_execution_actual_request.py +6 -5
  211. vellum/types/subworkflow_enum.py +2 -2
  212. vellum/types/subworkflow_node_result.py +1 -0
  213. vellum/types/templating_node_array_result.py +31 -0
  214. vellum/types/templating_node_chat_history_result.py +2 -1
  215. vellum/types/templating_node_error_result.py +2 -1
  216. vellum/types/templating_node_function_call_result.py +31 -0
  217. vellum/types/templating_node_json_result.py +2 -1
  218. vellum/types/templating_node_number_result.py +2 -1
  219. vellum/types/templating_node_result.py +1 -0
  220. vellum/types/templating_node_result_data.py +1 -0
  221. vellum/types/templating_node_result_output.py +36 -8
  222. vellum/types/templating_node_search_results_result.py +2 -1
  223. vellum/types/templating_node_string_result.py +2 -1
  224. vellum/types/terminal_node_array_result.py +8 -3
  225. vellum/types/terminal_node_chat_history_result.py +8 -3
  226. vellum/types/terminal_node_error_result.py +8 -3
  227. vellum/types/terminal_node_function_call_result.py +8 -3
  228. vellum/types/terminal_node_json_result.py +8 -3
  229. vellum/types/terminal_node_number_result.py +8 -3
  230. vellum/types/terminal_node_result.py +1 -0
  231. vellum/types/terminal_node_result_data.py +1 -0
  232. vellum/types/terminal_node_result_output.py +16 -10
  233. vellum/types/terminal_node_search_results_result.py +8 -3
  234. vellum/types/terminal_node_string_result.py +8 -3
  235. vellum/types/test_case_chat_history_variable_value.py +2 -1
  236. vellum/types/test_case_error_variable_value.py +2 -1
  237. vellum/types/test_case_json_variable_value.py +2 -1
  238. vellum/types/test_case_number_variable_value.py +2 -1
  239. vellum/types/test_case_search_results_variable_value.py +2 -1
  240. vellum/types/test_case_string_variable_value.py +2 -1
  241. vellum/types/test_case_variable_value.py +12 -8
  242. vellum/types/test_suite_run_deployment_release_tag_exec_config.py +5 -3
  243. vellum/types/test_suite_run_deployment_release_tag_exec_config_data.py +10 -4
  244. vellum/types/test_suite_run_deployment_release_tag_exec_config_data_request.py +10 -4
  245. vellum/types/test_suite_run_deployment_release_tag_exec_config_request.py +5 -3
  246. vellum/types/test_suite_run_deployment_release_tag_exec_config_type_enum.py +2 -2
  247. vellum/types/test_suite_run_exec_config.py +4 -4
  248. vellum/types/test_suite_run_exec_config_request.py +4 -4
  249. vellum/types/test_suite_run_execution.py +1 -0
  250. vellum/types/test_suite_run_execution_chat_history_output.py +2 -1
  251. vellum/types/test_suite_run_execution_error_output.py +2 -1
  252. vellum/types/test_suite_run_execution_json_output.py +2 -1
  253. vellum/types/test_suite_run_execution_metric_result.py +1 -0
  254. vellum/types/test_suite_run_execution_number_output.py +2 -1
  255. vellum/types/test_suite_run_execution_output.py +12 -8
  256. vellum/types/test_suite_run_execution_search_results_output.py +2 -1
  257. vellum/types/test_suite_run_execution_string_output.py +2 -1
  258. vellum/types/test_suite_run_metric_error_output.py +1 -0
  259. vellum/types/test_suite_run_metric_error_output_type_enum.py +2 -2
  260. vellum/types/test_suite_run_metric_number_output.py +1 -0
  261. vellum/types/test_suite_run_metric_number_output_type_enum.py +2 -2
  262. vellum/types/test_suite_run_metric_output.py +4 -4
  263. vellum/types/test_suite_run_read.py +16 -14
  264. vellum/types/test_suite_run_test_suite.py +1 -0
  265. vellum/types/test_suite_run_workflow_release_tag_exec_config.py +5 -3
  266. vellum/types/test_suite_run_workflow_release_tag_exec_config_data.py +10 -6
  267. vellum/types/test_suite_run_workflow_release_tag_exec_config_data_request.py +10 -6
  268. vellum/types/test_suite_run_workflow_release_tag_exec_config_request.py +5 -3
  269. vellum/types/test_suite_run_workflow_release_tag_exec_config_type_enum.py +2 -2
  270. vellum/types/test_suite_test_case.py +3 -2
  271. vellum/types/upload_document_error_response.py +1 -0
  272. vellum/types/upload_document_response.py +5 -1
  273. vellum/types/vellum_error.py +1 -0
  274. vellum/types/vellum_error_request.py +1 -0
  275. vellum/types/vellum_image.py +2 -1
  276. vellum/types/vellum_image_request.py +2 -1
  277. vellum/types/vellum_variable.py +1 -0
  278. vellum/types/workflow_deployment_read.py +37 -24
  279. vellum/types/workflow_event_error.py +1 -0
  280. vellum/types/workflow_execution_actual_chat_history_request.py +25 -15
  281. vellum/types/workflow_execution_actual_json_request.py +25 -15
  282. vellum/types/workflow_execution_actual_string_request.py +25 -15
  283. vellum/types/workflow_execution_node_result_event.py +3 -2
  284. vellum/types/workflow_execution_workflow_result_event.py +3 -2
  285. vellum/types/workflow_node_result_data.py +16 -10
  286. vellum/types/workflow_node_result_event.py +8 -6
  287. vellum/types/workflow_output.py +18 -11
  288. vellum/types/workflow_output_array.py +7 -2
  289. vellum/types/workflow_output_chat_history.py +7 -2
  290. vellum/types/workflow_output_error.py +7 -2
  291. vellum/types/workflow_output_function_call.py +7 -2
  292. vellum/types/workflow_output_image.py +7 -2
  293. vellum/types/workflow_output_json.py +7 -2
  294. vellum/types/workflow_output_number.py +7 -2
  295. vellum/types/workflow_output_search_results.py +7 -2
  296. vellum/types/workflow_output_string.py +7 -2
  297. vellum/types/workflow_request_chat_history_input_request.py +6 -1
  298. vellum/types/workflow_request_input_request.py +8 -6
  299. vellum/types/workflow_request_json_input_request.py +6 -1
  300. vellum/types/workflow_request_number_input_request.py +6 -1
  301. vellum/types/workflow_request_string_input_request.py +6 -1
  302. vellum/types/workflow_result_event.py +5 -4
  303. vellum/types/workflow_result_event_output_data.py +16 -10
  304. vellum/types/workflow_result_event_output_data_array.py +8 -5
  305. vellum/types/workflow_result_event_output_data_chat_history.py +8 -5
  306. vellum/types/workflow_result_event_output_data_error.py +8 -5
  307. vellum/types/workflow_result_event_output_data_function_call.py +8 -5
  308. vellum/types/workflow_result_event_output_data_json.py +8 -5
  309. vellum/types/workflow_result_event_output_data_number.py +8 -5
  310. vellum/types/workflow_result_event_output_data_search_results.py +8 -5
  311. vellum/types/workflow_result_event_output_data_string.py +11 -7
  312. vellum/types/workflow_stream_event.py +4 -4
  313. vellum/version.py +4 -0
  314. {vellum_ai-0.3.14.dist-info → vellum_ai-0.3.15.dist-info}/METADATA +4 -4
  315. vellum_ai-0.3.15.dist-info/RECORD +365 -0
  316. vellum_ai-0.3.14.dist-info/RECORD +0 -356
  317. {vellum_ai-0.3.14.dist-info → vellum_ai-0.3.15.dist-info}/LICENSE +0 -0
  318. {vellum_ai-0.3.14.dist-info → vellum_ai-0.3.15.dist-info}/WHEEL +0 -0
vellum/client.py CHANGED
@@ -10,6 +10,8 @@ import httpx
10
10
  from .core.api_error import ApiError
11
11
  from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
12
12
  from .core.jsonable_encoder import jsonable_encoder
13
+ from .core.remove_none_from_dict import remove_none_from_dict
14
+ from .core.request_options import RequestOptions
13
15
  from .environment import VellumEnvironment
14
16
  from .errors.bad_request_error import BadRequestError
15
17
  from .errors.forbidden_error import ForbiddenError
@@ -53,6 +55,27 @@ OMIT = typing.cast(typing.Any, ...)
53
55
 
54
56
 
55
57
  class Vellum:
58
+ """
59
+ Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propogate to these functions.
60
+
61
+ Parameters:
62
+ - environment: VellumEnvironment. The environment to use for requests from the client. from .environment import VellumEnvironment
63
+
64
+ Defaults to VellumEnvironment.PRODUCTION
65
+
66
+ - api_key: str.
67
+
68
+ - timeout: typing.Optional[float]. The timeout to be used, in seconds, for requests by default the timeout is 60 seconds, unless a custom httpx client is used, in which case a default is not set.
69
+
70
+ - httpx_client: typing.Optional[httpx.Client]. The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.
71
+ ---
72
+ from vellum.client import Vellum
73
+
74
+ client = Vellum(
75
+ api_key="YOUR_API_KEY",
76
+ )
77
+ """
78
+
56
79
  def __init__(
57
80
  self,
58
81
  *,
@@ -61,10 +84,12 @@ class Vellum:
61
84
  timeout: typing.Optional[float] = None,
62
85
  httpx_client: typing.Optional[httpx.Client] = None,
63
86
  ):
87
+ _defaulted_timeout = timeout if timeout is not None else None if httpx_client is None else None
64
88
  self._client_wrapper = SyncClientWrapper(
65
89
  environment=environment,
66
90
  api_key=api_key,
67
- httpx_client=httpx.Client(timeout=timeout) if httpx_client is None else httpx_client,
91
+ httpx_client=httpx.Client(timeout=_defaulted_timeout) if httpx_client is None else httpx_client,
92
+ timeout=_defaulted_timeout,
68
93
  )
69
94
  self.deployments = DeploymentsClient(client_wrapper=self._client_wrapper)
70
95
  self.document_indexes = DocumentIndexesClient(client_wrapper=self._client_wrapper)
@@ -80,21 +105,22 @@ class Vellum:
80
105
  def execute_prompt(
81
106
  self,
82
107
  *,
83
- inputs: typing.List[PromptDeploymentInputRequest],
108
+ inputs: typing.Sequence[PromptDeploymentInputRequest],
84
109
  prompt_deployment_id: typing.Optional[str] = OMIT,
85
110
  prompt_deployment_name: typing.Optional[str] = OMIT,
86
111
  release_tag: typing.Optional[str] = OMIT,
87
112
  external_id: typing.Optional[str] = OMIT,
88
113
  expand_meta: typing.Optional[PromptDeploymentExpandMetaRequestRequest] = OMIT,
89
114
  raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest] = OMIT,
90
- expand_raw: typing.Optional[typing.List[str]] = OMIT,
115
+ expand_raw: typing.Optional[typing.Sequence[str]] = OMIT,
91
116
  metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
117
+ request_options: typing.Optional[RequestOptions] = None,
92
118
  ) -> ExecutePromptResponse:
93
119
  """
94
120
  Executes a deployed Prompt and returns the result.
95
121
 
96
122
  Parameters:
97
- - inputs: typing.List[PromptDeploymentInputRequest]. The list of inputs defined in the Prompt's deployment with their corresponding values.
123
+ - inputs: typing.Sequence[PromptDeploymentInputRequest]. The list of inputs defined in the Prompt's deployment with their corresponding values.
98
124
 
99
125
  - prompt_deployment_id: typing.Optional[str]. The ID of the Prompt Deployment. Must provide either this or prompt_deployment_name.
100
126
 
@@ -108,9 +134,48 @@ class Vellum:
108
134
 
109
135
  - raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest].
110
136
 
111
- - expand_raw: typing.Optional[typing.List[str]]. Returns the raw API response data sent from the model host. Combined with `raw_overrides`, it can be used to access new features from models.
137
+ - expand_raw: typing.Optional[typing.Sequence[str]]. Returns the raw API response data sent from the model host. Combined with `raw_overrides`, it can be used to access new features from models.
112
138
 
113
139
  - metadata: typing.Optional[typing.Dict[str, typing.Any]].
140
+
141
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
142
+ ---
143
+ from vellum import (
144
+ PromptDeploymentExpandMetaRequestRequest,
145
+ PromptDeploymentInputRequest_String,
146
+ RawPromptExecutionOverridesRequest,
147
+ )
148
+ from vellum.client import Vellum
149
+
150
+ client = Vellum(
151
+ api_key="YOUR_API_KEY",
152
+ )
153
+ client.execute_prompt(
154
+ inputs=[
155
+ PromptDeploymentInputRequest_String(
156
+ name="string",
157
+ value="string",
158
+ )
159
+ ],
160
+ prompt_deployment_id="string",
161
+ prompt_deployment_name="string",
162
+ release_tag="string",
163
+ external_id="string",
164
+ expand_meta=PromptDeploymentExpandMetaRequestRequest(
165
+ model_name=True,
166
+ latency=True,
167
+ deployment_release_tag=True,
168
+ prompt_version_id=True,
169
+ finish_reason=True,
170
+ ),
171
+ raw_overrides=RawPromptExecutionOverridesRequest(
172
+ body={"string": {"key": "value"}},
173
+ headers={"string": {"key": "value"}},
174
+ url="string",
175
+ ),
176
+ expand_raw=["string"],
177
+ metadata={"string": {"key": "value"}},
178
+ )
114
179
  """
115
180
  _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
116
181
  if prompt_deployment_id is not OMIT:
@@ -132,9 +197,28 @@ class Vellum:
132
197
  _response = self._client_wrapper.httpx_client.request(
133
198
  "POST",
134
199
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/execute-prompt"),
135
- json=jsonable_encoder(_request),
136
- headers=self._client_wrapper.get_headers(),
137
- timeout=None,
200
+ params=jsonable_encoder(
201
+ request_options.get("additional_query_parameters") if request_options is not None else None
202
+ ),
203
+ json=jsonable_encoder(_request)
204
+ if request_options is None or request_options.get("additional_body_parameters") is None
205
+ else {
206
+ **jsonable_encoder(_request),
207
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
208
+ },
209
+ headers=jsonable_encoder(
210
+ remove_none_from_dict(
211
+ {
212
+ **self._client_wrapper.get_headers(),
213
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
214
+ }
215
+ )
216
+ ),
217
+ timeout=request_options.get("timeout_in_seconds")
218
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
219
+ else self._client_wrapper.get_timeout(),
220
+ retries=0,
221
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
138
222
  )
139
223
  if 200 <= _response.status_code < 300:
140
224
  return pydantic.parse_obj_as(ExecutePromptResponse, _response.json()) # type: ignore
@@ -155,21 +239,22 @@ class Vellum:
155
239
  def execute_prompt_stream(
156
240
  self,
157
241
  *,
158
- inputs: typing.List[PromptDeploymentInputRequest],
242
+ inputs: typing.Sequence[PromptDeploymentInputRequest],
159
243
  prompt_deployment_id: typing.Optional[str] = OMIT,
160
244
  prompt_deployment_name: typing.Optional[str] = OMIT,
161
245
  release_tag: typing.Optional[str] = OMIT,
162
246
  external_id: typing.Optional[str] = OMIT,
163
247
  expand_meta: typing.Optional[PromptDeploymentExpandMetaRequestRequest] = OMIT,
164
248
  raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest] = OMIT,
165
- expand_raw: typing.Optional[typing.List[str]] = OMIT,
249
+ expand_raw: typing.Optional[typing.Sequence[str]] = OMIT,
166
250
  metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
251
+ request_options: typing.Optional[RequestOptions] = None,
167
252
  ) -> typing.Iterator[ExecutePromptEvent]:
168
253
  """
169
254
  Executes a deployed Prompt and streams back the results.
170
255
 
171
256
  Parameters:
172
- - inputs: typing.List[PromptDeploymentInputRequest]. The list of inputs defined in the Prompt's deployment with their corresponding values.
257
+ - inputs: typing.Sequence[PromptDeploymentInputRequest]. The list of inputs defined in the Prompt's deployment with their corresponding values.
173
258
 
174
259
  - prompt_deployment_id: typing.Optional[str]. The ID of the Prompt Deployment. Must provide either this or prompt_deployment_name.
175
260
 
@@ -183,9 +268,48 @@ class Vellum:
183
268
 
184
269
  - raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest].
185
270
 
186
- - expand_raw: typing.Optional[typing.List[str]]. Returns the raw API response data sent from the model host. Combined with `raw_overrides`, it can be used to access new features from models.
271
+ - expand_raw: typing.Optional[typing.Sequence[str]]. Returns the raw API response data sent from the model host. Combined with `raw_overrides`, it can be used to access new features from models.
187
272
 
188
273
  - metadata: typing.Optional[typing.Dict[str, typing.Any]].
274
+
275
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
276
+ ---
277
+ from vellum import (
278
+ PromptDeploymentExpandMetaRequestRequest,
279
+ PromptDeploymentInputRequest_String,
280
+ RawPromptExecutionOverridesRequest,
281
+ )
282
+ from vellum.client import Vellum
283
+
284
+ client = Vellum(
285
+ api_key="YOUR_API_KEY",
286
+ )
287
+ client.execute_prompt_stream(
288
+ inputs=[
289
+ PromptDeploymentInputRequest_String(
290
+ name="string",
291
+ value="string",
292
+ )
293
+ ],
294
+ prompt_deployment_id="string",
295
+ prompt_deployment_name="string",
296
+ release_tag="string",
297
+ external_id="string",
298
+ expand_meta=PromptDeploymentExpandMetaRequestRequest(
299
+ model_name=True,
300
+ latency=True,
301
+ deployment_release_tag=True,
302
+ prompt_version_id=True,
303
+ finish_reason=True,
304
+ ),
305
+ raw_overrides=RawPromptExecutionOverridesRequest(
306
+ body={"string": {"key": "value"}},
307
+ headers={"string": {"key": "value"}},
308
+ url="string",
309
+ ),
310
+ expand_raw=["string"],
311
+ metadata={"string": {"key": "value"}},
312
+ )
189
313
  """
190
314
  _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
191
315
  if prompt_deployment_id is not OMIT:
@@ -207,9 +331,28 @@ class Vellum:
207
331
  with self._client_wrapper.httpx_client.stream(
208
332
  "POST",
209
333
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/execute-prompt-stream"),
210
- json=jsonable_encoder(_request),
211
- headers=self._client_wrapper.get_headers(),
212
- timeout=None,
334
+ params=jsonable_encoder(
335
+ request_options.get("additional_query_parameters") if request_options is not None else None
336
+ ),
337
+ json=jsonable_encoder(_request)
338
+ if request_options is None or request_options.get("additional_body_parameters") is None
339
+ else {
340
+ **jsonable_encoder(_request),
341
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
342
+ },
343
+ headers=jsonable_encoder(
344
+ remove_none_from_dict(
345
+ {
346
+ **self._client_wrapper.get_headers(),
347
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
348
+ }
349
+ )
350
+ ),
351
+ timeout=request_options.get("timeout_in_seconds")
352
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
353
+ else self._client_wrapper.get_timeout(),
354
+ retries=0,
355
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
213
356
  ) as _response:
214
357
  if 200 <= _response.status_code < 300:
215
358
  for _text in _response.iter_lines():
@@ -235,17 +378,18 @@ class Vellum:
235
378
  def execute_workflow(
236
379
  self,
237
380
  *,
238
- inputs: typing.List[WorkflowRequestInputRequest],
381
+ inputs: typing.Sequence[WorkflowRequestInputRequest],
239
382
  workflow_deployment_id: typing.Optional[str] = OMIT,
240
383
  workflow_deployment_name: typing.Optional[str] = OMIT,
241
384
  release_tag: typing.Optional[str] = OMIT,
242
385
  external_id: typing.Optional[str] = OMIT,
386
+ request_options: typing.Optional[RequestOptions] = None,
243
387
  ) -> ExecuteWorkflowResponse:
244
388
  """
245
389
  Executes a deployed Workflow and returns its outputs.
246
390
 
247
391
  Parameters:
248
- - inputs: typing.List[WorkflowRequestInputRequest]. The list of inputs defined in the Workflow's Deployment with their corresponding values.
392
+ - inputs: typing.Sequence[WorkflowRequestInputRequest]. The list of inputs defined in the Workflow's Deployment with their corresponding values.
249
393
 
250
394
  - workflow_deployment_id: typing.Optional[str]. The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
251
395
 
@@ -254,6 +398,27 @@ class Vellum:
254
398
  - release_tag: typing.Optional[str]. Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
255
399
 
256
400
  - external_id: typing.Optional[str]. Optionally include a unique identifier for tracking purposes. Must be unique for a given workflow deployment.
401
+
402
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
403
+ ---
404
+ from vellum import WorkflowRequestInputRequest_String
405
+ from vellum.client import Vellum
406
+
407
+ client = Vellum(
408
+ api_key="YOUR_API_KEY",
409
+ )
410
+ client.execute_workflow(
411
+ inputs=[
412
+ WorkflowRequestInputRequest_String(
413
+ name="string",
414
+ value="string",
415
+ )
416
+ ],
417
+ workflow_deployment_id="string",
418
+ workflow_deployment_name="string",
419
+ release_tag="string",
420
+ external_id="string",
421
+ )
257
422
  """
258
423
  _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
259
424
  if workflow_deployment_id is not OMIT:
@@ -267,9 +432,28 @@ class Vellum:
267
432
  _response = self._client_wrapper.httpx_client.request(
268
433
  "POST",
269
434
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/execute-workflow"),
270
- json=jsonable_encoder(_request),
271
- headers=self._client_wrapper.get_headers(),
272
- timeout=None,
435
+ params=jsonable_encoder(
436
+ request_options.get("additional_query_parameters") if request_options is not None else None
437
+ ),
438
+ json=jsonable_encoder(_request)
439
+ if request_options is None or request_options.get("additional_body_parameters") is None
440
+ else {
441
+ **jsonable_encoder(_request),
442
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
443
+ },
444
+ headers=jsonable_encoder(
445
+ remove_none_from_dict(
446
+ {
447
+ **self._client_wrapper.get_headers(),
448
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
449
+ }
450
+ )
451
+ ),
452
+ timeout=request_options.get("timeout_in_seconds")
453
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
454
+ else self._client_wrapper.get_timeout(),
455
+ retries=0,
456
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
273
457
  )
274
458
  if 200 <= _response.status_code < 300:
275
459
  return pydantic.parse_obj_as(ExecuteWorkflowResponse, _response.json()) # type: ignore
@@ -288,18 +472,19 @@ class Vellum:
288
472
  def execute_workflow_stream(
289
473
  self,
290
474
  *,
291
- inputs: typing.List[WorkflowRequestInputRequest],
475
+ inputs: typing.Sequence[WorkflowRequestInputRequest],
292
476
  workflow_deployment_id: typing.Optional[str] = OMIT,
293
477
  workflow_deployment_name: typing.Optional[str] = OMIT,
294
478
  release_tag: typing.Optional[str] = OMIT,
295
479
  external_id: typing.Optional[str] = OMIT,
296
- event_types: typing.Optional[typing.List[WorkflowExecutionEventType]] = OMIT,
480
+ event_types: typing.Optional[typing.Sequence[WorkflowExecutionEventType]] = OMIT,
481
+ request_options: typing.Optional[RequestOptions] = None,
297
482
  ) -> typing.Iterator[WorkflowStreamEvent]:
298
483
  """
299
484
  Executes a deployed Workflow and streams back its results.
300
485
 
301
486
  Parameters:
302
- - inputs: typing.List[WorkflowRequestInputRequest]. The list of inputs defined in the Workflow's Deployment with their corresponding values.
487
+ - inputs: typing.Sequence[WorkflowRequestInputRequest]. The list of inputs defined in the Workflow's Deployment with their corresponding values.
303
488
 
304
489
  - workflow_deployment_id: typing.Optional[str]. The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
305
490
 
@@ -309,7 +494,32 @@ class Vellum:
309
494
 
310
495
  - external_id: typing.Optional[str]. Optionally include a unique identifier for tracking purposes. Must be unique for a given workflow deployment.
311
496
 
312
- - event_types: typing.Optional[typing.List[WorkflowExecutionEventType]]. Optionally specify which events you want to receive. Defaults to only WORKFLOW events. Note that the schema of non-WORKFLOW events is unstable and should be used with caution.
497
+ - event_types: typing.Optional[typing.Sequence[WorkflowExecutionEventType]]. Optionally specify which events you want to receive. Defaults to only WORKFLOW events. Note that the schema of non-WORKFLOW events is unstable and should be used with caution.
498
+
499
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
500
+ ---
501
+ from vellum import (
502
+ WorkflowExecutionEventType,
503
+ WorkflowRequestInputRequest_String,
504
+ )
505
+ from vellum.client import Vellum
506
+
507
+ client = Vellum(
508
+ api_key="YOUR_API_KEY",
509
+ )
510
+ client.execute_workflow_stream(
511
+ inputs=[
512
+ WorkflowRequestInputRequest_String(
513
+ name="string",
514
+ value="string",
515
+ )
516
+ ],
517
+ workflow_deployment_id="string",
518
+ workflow_deployment_name="string",
519
+ release_tag="string",
520
+ external_id="string",
521
+ event_types=[WorkflowExecutionEventType.NODE],
522
+ )
313
523
  """
314
524
  _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
315
525
  if workflow_deployment_id is not OMIT:
@@ -325,9 +535,28 @@ class Vellum:
325
535
  with self._client_wrapper.httpx_client.stream(
326
536
  "POST",
327
537
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/execute-workflow-stream"),
328
- json=jsonable_encoder(_request),
329
- headers=self._client_wrapper.get_headers(),
330
- timeout=None,
538
+ params=jsonable_encoder(
539
+ request_options.get("additional_query_parameters") if request_options is not None else None
540
+ ),
541
+ json=jsonable_encoder(_request)
542
+ if request_options is None or request_options.get("additional_body_parameters") is None
543
+ else {
544
+ **jsonable_encoder(_request),
545
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
546
+ },
547
+ headers=jsonable_encoder(
548
+ remove_none_from_dict(
549
+ {
550
+ **self._client_wrapper.get_headers(),
551
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
552
+ }
553
+ )
554
+ ),
555
+ timeout=request_options.get("timeout_in_seconds")
556
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
557
+ else self._client_wrapper.get_timeout(),
558
+ retries=0,
559
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
331
560
  ) as _response:
332
561
  if 200 <= _response.status_code < 300:
333
562
  for _text in _response.iter_lines():
@@ -353,8 +582,9 @@ class Vellum:
353
582
  *,
354
583
  deployment_id: typing.Optional[str] = OMIT,
355
584
  deployment_name: typing.Optional[str] = OMIT,
356
- requests: typing.List[GenerateRequest],
585
+ requests: typing.Sequence[GenerateRequest],
357
586
  options: typing.Optional[GenerateOptionsRequest] = OMIT,
587
+ request_options: typing.Optional[RequestOptions] = None,
358
588
  ) -> GenerateResponse:
359
589
  """
360
590
  Generate a completion using a previously defined deployment.
@@ -366,9 +596,11 @@ class Vellum:
366
596
 
367
597
  - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
368
598
 
369
- - requests: typing.List[GenerateRequest]. The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
599
+ - requests: typing.Sequence[GenerateRequest]. The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
370
600
 
371
601
  - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
602
+
603
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
372
604
  ---
373
605
  from vellum import GenerateRequest
374
606
  from vellum.client import Vellum
@@ -394,9 +626,28 @@ class Vellum:
394
626
  _response = self._client_wrapper.httpx_client.request(
395
627
  "POST",
396
628
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/generate"),
397
- json=jsonable_encoder(_request),
398
- headers=self._client_wrapper.get_headers(),
399
- timeout=None,
629
+ params=jsonable_encoder(
630
+ request_options.get("additional_query_parameters") if request_options is not None else None
631
+ ),
632
+ json=jsonable_encoder(_request)
633
+ if request_options is None or request_options.get("additional_body_parameters") is None
634
+ else {
635
+ **jsonable_encoder(_request),
636
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
637
+ },
638
+ headers=jsonable_encoder(
639
+ remove_none_from_dict(
640
+ {
641
+ **self._client_wrapper.get_headers(),
642
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
643
+ }
644
+ )
645
+ ),
646
+ timeout=request_options.get("timeout_in_seconds")
647
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
648
+ else self._client_wrapper.get_timeout(),
649
+ retries=0,
650
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
400
651
  )
401
652
  if 200 <= _response.status_code < 300:
402
653
  return pydantic.parse_obj_as(GenerateResponse, _response.json()) # type: ignore
@@ -419,8 +670,9 @@ class Vellum:
419
670
  *,
420
671
  deployment_id: typing.Optional[str] = OMIT,
421
672
  deployment_name: typing.Optional[str] = OMIT,
422
- requests: typing.List[GenerateRequest],
673
+ requests: typing.Sequence[GenerateRequest],
423
674
  options: typing.Optional[GenerateOptionsRequest] = OMIT,
675
+ request_options: typing.Optional[RequestOptions] = None,
424
676
  ) -> typing.Iterator[GenerateStreamResponse]:
425
677
  """
426
678
  Generate a stream of completions using a previously defined deployment.
@@ -432,9 +684,46 @@ class Vellum:
432
684
 
433
685
  - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
434
686
 
435
- - requests: typing.List[GenerateRequest]. The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
687
+ - requests: typing.Sequence[GenerateRequest]. The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
436
688
 
437
689
  - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
690
+
691
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
692
+ ---
693
+ from vellum import (
694
+ ChatMessageContentRequest_String,
695
+ ChatMessageRequest,
696
+ ChatMessageRole,
697
+ GenerateOptionsRequest,
698
+ GenerateRequest,
699
+ LogprobsEnum,
700
+ )
701
+ from vellum.client import Vellum
702
+
703
+ client = Vellum(
704
+ api_key="YOUR_API_KEY",
705
+ )
706
+ client.generate_stream(
707
+ deployment_id="string",
708
+ deployment_name="string",
709
+ requests=[
710
+ GenerateRequest(
711
+ input_values={"string": {"key": "value"}},
712
+ chat_history=[
713
+ ChatMessageRequest(
714
+ text="string",
715
+ role=ChatMessageRole.SYSTEM,
716
+ content=ChatMessageContentRequest_String(),
717
+ source="string",
718
+ )
719
+ ],
720
+ external_ids=["string"],
721
+ )
722
+ ],
723
+ options=GenerateOptionsRequest(
724
+ logprobs=LogprobsEnum.ALL,
725
+ ),
726
+ )
438
727
  """
439
728
  _request: typing.Dict[str, typing.Any] = {"requests": requests}
440
729
  if deployment_id is not OMIT:
@@ -446,9 +735,28 @@ class Vellum:
446
735
  with self._client_wrapper.httpx_client.stream(
447
736
  "POST",
448
737
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/generate-stream"),
449
- json=jsonable_encoder(_request),
450
- headers=self._client_wrapper.get_headers(),
451
- timeout=None,
738
+ params=jsonable_encoder(
739
+ request_options.get("additional_query_parameters") if request_options is not None else None
740
+ ),
741
+ json=jsonable_encoder(_request)
742
+ if request_options is None or request_options.get("additional_body_parameters") is None
743
+ else {
744
+ **jsonable_encoder(_request),
745
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
746
+ },
747
+ headers=jsonable_encoder(
748
+ remove_none_from_dict(
749
+ {
750
+ **self._client_wrapper.get_headers(),
751
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
752
+ }
753
+ )
754
+ ),
755
+ timeout=request_options.get("timeout_in_seconds")
756
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
757
+ else self._client_wrapper.get_timeout(),
758
+ retries=0,
759
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
452
760
  ) as _response:
453
761
  if 200 <= _response.status_code < 300:
454
762
  for _text in _response.iter_lines():
@@ -478,6 +786,7 @@ class Vellum:
478
786
  index_name: typing.Optional[str] = OMIT,
479
787
  query: str,
480
788
  options: typing.Optional[SearchRequestOptionsRequest] = OMIT,
789
+ request_options: typing.Optional[RequestOptions] = None,
481
790
  ) -> SearchResponse:
482
791
  """
483
792
  Perform a search against a document index.
@@ -492,6 +801,8 @@ class Vellum:
492
801
  - query: str. The query to search for.
493
802
 
494
803
  - options: typing.Optional[SearchRequestOptionsRequest]. Configuration options for the search.
804
+
805
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
495
806
  ---
496
807
  from vellum.client import Vellum
497
808
 
@@ -512,9 +823,28 @@ class Vellum:
512
823
  _response = self._client_wrapper.httpx_client.request(
513
824
  "POST",
514
825
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/search"),
515
- json=jsonable_encoder(_request),
516
- headers=self._client_wrapper.get_headers(),
517
- timeout=None,
826
+ params=jsonable_encoder(
827
+ request_options.get("additional_query_parameters") if request_options is not None else None
828
+ ),
829
+ json=jsonable_encoder(_request)
830
+ if request_options is None or request_options.get("additional_body_parameters") is None
831
+ else {
832
+ **jsonable_encoder(_request),
833
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
834
+ },
835
+ headers=jsonable_encoder(
836
+ remove_none_from_dict(
837
+ {
838
+ **self._client_wrapper.get_headers(),
839
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
840
+ }
841
+ )
842
+ ),
843
+ timeout=request_options.get("timeout_in_seconds")
844
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
845
+ else self._client_wrapper.get_timeout(),
846
+ retries=0,
847
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
518
848
  )
519
849
  if 200 <= _response.status_code < 300:
520
850
  return pydantic.parse_obj_as(SearchResponse, _response.json()) # type: ignore
@@ -535,7 +865,8 @@ class Vellum:
535
865
  *,
536
866
  deployment_id: typing.Optional[str] = OMIT,
537
867
  deployment_name: typing.Optional[str] = OMIT,
538
- actuals: typing.List[SubmitCompletionActualRequest],
868
+ actuals: typing.Sequence[SubmitCompletionActualRequest],
869
+ request_options: typing.Optional[RequestOptions] = None,
539
870
  ) -> None:
540
871
  """
541
872
  Used to submit feedback regarding the quality of previously generated completions.
@@ -547,7 +878,9 @@ class Vellum:
547
878
 
548
879
  - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
549
880
 
550
- - actuals: typing.List[SubmitCompletionActualRequest]. Feedback regarding the quality of previously generated completions
881
+ - actuals: typing.Sequence[SubmitCompletionActualRequest]. Feedback regarding the quality of previously generated completions
882
+
883
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
551
884
  ---
552
885
  from vellum import SubmitCompletionActualRequest
553
886
  from vellum.client import Vellum
@@ -567,9 +900,28 @@ class Vellum:
567
900
  _response = self._client_wrapper.httpx_client.request(
568
901
  "POST",
569
902
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/submit-completion-actuals"),
570
- json=jsonable_encoder(_request),
571
- headers=self._client_wrapper.get_headers(),
572
- timeout=None,
903
+ params=jsonable_encoder(
904
+ request_options.get("additional_query_parameters") if request_options is not None else None
905
+ ),
906
+ json=jsonable_encoder(_request)
907
+ if request_options is None or request_options.get("additional_body_parameters") is None
908
+ else {
909
+ **jsonable_encoder(_request),
910
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
911
+ },
912
+ headers=jsonable_encoder(
913
+ remove_none_from_dict(
914
+ {
915
+ **self._client_wrapper.get_headers(),
916
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
917
+ }
918
+ )
919
+ ),
920
+ timeout=request_options.get("timeout_in_seconds")
921
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
922
+ else self._client_wrapper.get_timeout(),
923
+ retries=0,
924
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
573
925
  )
574
926
  if 200 <= _response.status_code < 300:
575
927
  return
@@ -588,9 +940,10 @@ class Vellum:
588
940
  def submit_workflow_execution_actuals(
589
941
  self,
590
942
  *,
591
- actuals: typing.List[SubmitWorkflowExecutionActualRequest],
943
+ actuals: typing.Sequence[SubmitWorkflowExecutionActualRequest],
592
944
  execution_id: typing.Optional[str] = OMIT,
593
945
  external_id: typing.Optional[str] = OMIT,
946
+ request_options: typing.Optional[RequestOptions] = None,
594
947
  ) -> None:
595
948
  """
596
949
  Used to submit feedback regarding the quality of previous workflow execution and its outputs.
@@ -598,11 +951,13 @@ class Vellum:
598
951
  **Note:** Uses a base url of `https://predict.vellum.ai`.
599
952
 
600
953
  Parameters:
601
- - actuals: typing.List[SubmitWorkflowExecutionActualRequest]. Feedback regarding the quality of an output on a previously executed workflow.
954
+ - actuals: typing.Sequence[SubmitWorkflowExecutionActualRequest]. Feedback regarding the quality of an output on a previously executed workflow.
602
955
 
603
956
  - execution_id: typing.Optional[str]. The Vellum-generated ID of a previously executed workflow. Must provide either this or external_id.
604
957
 
605
958
  - external_id: typing.Optional[str]. The external ID that was originally provided by when executing the workflow, if applicable, that you'd now like to submit actuals for. Must provide either this or execution_id.
959
+
960
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
606
961
  ---
607
962
  from vellum.client import Vellum
608
963
 
@@ -623,9 +978,28 @@ class Vellum:
623
978
  urllib.parse.urljoin(
624
979
  f"{self._client_wrapper.get_environment().predict}/", "v1/submit-workflow-execution-actuals"
625
980
  ),
626
- json=jsonable_encoder(_request),
627
- headers=self._client_wrapper.get_headers(),
628
- timeout=None,
981
+ params=jsonable_encoder(
982
+ request_options.get("additional_query_parameters") if request_options is not None else None
983
+ ),
984
+ json=jsonable_encoder(_request)
985
+ if request_options is None or request_options.get("additional_body_parameters") is None
986
+ else {
987
+ **jsonable_encoder(_request),
988
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
989
+ },
990
+ headers=jsonable_encoder(
991
+ remove_none_from_dict(
992
+ {
993
+ **self._client_wrapper.get_headers(),
994
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
995
+ }
996
+ )
997
+ ),
998
+ timeout=request_options.get("timeout_in_seconds")
999
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
1000
+ else self._client_wrapper.get_timeout(),
1001
+ retries=0,
1002
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
629
1003
  )
630
1004
  if 200 <= _response.status_code < 300:
631
1005
  return
@@ -637,6 +1011,27 @@ class Vellum:
637
1011
 
638
1012
 
639
1013
  class AsyncVellum:
1014
+ """
1015
+ Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propogate to these functions.
1016
+
1017
+ Parameters:
1018
+ - environment: VellumEnvironment. The environment to use for requests from the client. from .environment import VellumEnvironment
1019
+
1020
+ Defaults to VellumEnvironment.PRODUCTION
1021
+
1022
+ - api_key: str.
1023
+
1024
+ - timeout: typing.Optional[float]. The timeout to be used, in seconds, for requests by default the timeout is 60 seconds, unless a custom httpx client is used, in which case a default is not set.
1025
+
1026
+ - httpx_client: typing.Optional[httpx.AsyncClient]. The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.
1027
+ ---
1028
+ from vellum.client import AsyncVellum
1029
+
1030
+ client = AsyncVellum(
1031
+ api_key="YOUR_API_KEY",
1032
+ )
1033
+ """
1034
+
640
1035
  def __init__(
641
1036
  self,
642
1037
  *,
@@ -645,10 +1040,12 @@ class AsyncVellum:
645
1040
  timeout: typing.Optional[float] = None,
646
1041
  httpx_client: typing.Optional[httpx.AsyncClient] = None,
647
1042
  ):
1043
+ _defaulted_timeout = timeout if timeout is not None else None if httpx_client is None else None
648
1044
  self._client_wrapper = AsyncClientWrapper(
649
1045
  environment=environment,
650
1046
  api_key=api_key,
651
- httpx_client=httpx.AsyncClient(timeout=timeout) if httpx_client is None else httpx_client,
1047
+ httpx_client=httpx.AsyncClient(timeout=_defaulted_timeout) if httpx_client is None else httpx_client,
1048
+ timeout=_defaulted_timeout,
652
1049
  )
653
1050
  self.deployments = AsyncDeploymentsClient(client_wrapper=self._client_wrapper)
654
1051
  self.document_indexes = AsyncDocumentIndexesClient(client_wrapper=self._client_wrapper)
@@ -664,21 +1061,22 @@ class AsyncVellum:
664
1061
  async def execute_prompt(
665
1062
  self,
666
1063
  *,
667
- inputs: typing.List[PromptDeploymentInputRequest],
1064
+ inputs: typing.Sequence[PromptDeploymentInputRequest],
668
1065
  prompt_deployment_id: typing.Optional[str] = OMIT,
669
1066
  prompt_deployment_name: typing.Optional[str] = OMIT,
670
1067
  release_tag: typing.Optional[str] = OMIT,
671
1068
  external_id: typing.Optional[str] = OMIT,
672
1069
  expand_meta: typing.Optional[PromptDeploymentExpandMetaRequestRequest] = OMIT,
673
1070
  raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest] = OMIT,
674
- expand_raw: typing.Optional[typing.List[str]] = OMIT,
1071
+ expand_raw: typing.Optional[typing.Sequence[str]] = OMIT,
675
1072
  metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
1073
+ request_options: typing.Optional[RequestOptions] = None,
676
1074
  ) -> ExecutePromptResponse:
677
1075
  """
678
1076
  Executes a deployed Prompt and returns the result.
679
1077
 
680
1078
  Parameters:
681
- - inputs: typing.List[PromptDeploymentInputRequest]. The list of inputs defined in the Prompt's deployment with their corresponding values.
1079
+ - inputs: typing.Sequence[PromptDeploymentInputRequest]. The list of inputs defined in the Prompt's deployment with their corresponding values.
682
1080
 
683
1081
  - prompt_deployment_id: typing.Optional[str]. The ID of the Prompt Deployment. Must provide either this or prompt_deployment_name.
684
1082
 
@@ -692,9 +1090,48 @@ class AsyncVellum:
692
1090
 
693
1091
  - raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest].
694
1092
 
695
- - expand_raw: typing.Optional[typing.List[str]]. Returns the raw API response data sent from the model host. Combined with `raw_overrides`, it can be used to access new features from models.
1093
+ - expand_raw: typing.Optional[typing.Sequence[str]]. Returns the raw API response data sent from the model host. Combined with `raw_overrides`, it can be used to access new features from models.
696
1094
 
697
1095
  - metadata: typing.Optional[typing.Dict[str, typing.Any]].
1096
+
1097
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1098
+ ---
1099
+ from vellum import (
1100
+ PromptDeploymentExpandMetaRequestRequest,
1101
+ PromptDeploymentInputRequest_String,
1102
+ RawPromptExecutionOverridesRequest,
1103
+ )
1104
+ from vellum.client import AsyncVellum
1105
+
1106
+ client = AsyncVellum(
1107
+ api_key="YOUR_API_KEY",
1108
+ )
1109
+ await client.execute_prompt(
1110
+ inputs=[
1111
+ PromptDeploymentInputRequest_String(
1112
+ name="string",
1113
+ value="string",
1114
+ )
1115
+ ],
1116
+ prompt_deployment_id="string",
1117
+ prompt_deployment_name="string",
1118
+ release_tag="string",
1119
+ external_id="string",
1120
+ expand_meta=PromptDeploymentExpandMetaRequestRequest(
1121
+ model_name=True,
1122
+ latency=True,
1123
+ deployment_release_tag=True,
1124
+ prompt_version_id=True,
1125
+ finish_reason=True,
1126
+ ),
1127
+ raw_overrides=RawPromptExecutionOverridesRequest(
1128
+ body={"string": {"key": "value"}},
1129
+ headers={"string": {"key": "value"}},
1130
+ url="string",
1131
+ ),
1132
+ expand_raw=["string"],
1133
+ metadata={"string": {"key": "value"}},
1134
+ )
698
1135
  """
699
1136
  _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
700
1137
  if prompt_deployment_id is not OMIT:
@@ -716,9 +1153,28 @@ class AsyncVellum:
716
1153
  _response = await self._client_wrapper.httpx_client.request(
717
1154
  "POST",
718
1155
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/execute-prompt"),
719
- json=jsonable_encoder(_request),
720
- headers=self._client_wrapper.get_headers(),
721
- timeout=None,
1156
+ params=jsonable_encoder(
1157
+ request_options.get("additional_query_parameters") if request_options is not None else None
1158
+ ),
1159
+ json=jsonable_encoder(_request)
1160
+ if request_options is None or request_options.get("additional_body_parameters") is None
1161
+ else {
1162
+ **jsonable_encoder(_request),
1163
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1164
+ },
1165
+ headers=jsonable_encoder(
1166
+ remove_none_from_dict(
1167
+ {
1168
+ **self._client_wrapper.get_headers(),
1169
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1170
+ }
1171
+ )
1172
+ ),
1173
+ timeout=request_options.get("timeout_in_seconds")
1174
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
1175
+ else self._client_wrapper.get_timeout(),
1176
+ retries=0,
1177
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
722
1178
  )
723
1179
  if 200 <= _response.status_code < 300:
724
1180
  return pydantic.parse_obj_as(ExecutePromptResponse, _response.json()) # type: ignore
@@ -739,21 +1195,22 @@ class AsyncVellum:
739
1195
  async def execute_prompt_stream(
740
1196
  self,
741
1197
  *,
742
- inputs: typing.List[PromptDeploymentInputRequest],
1198
+ inputs: typing.Sequence[PromptDeploymentInputRequest],
743
1199
  prompt_deployment_id: typing.Optional[str] = OMIT,
744
1200
  prompt_deployment_name: typing.Optional[str] = OMIT,
745
1201
  release_tag: typing.Optional[str] = OMIT,
746
1202
  external_id: typing.Optional[str] = OMIT,
747
1203
  expand_meta: typing.Optional[PromptDeploymentExpandMetaRequestRequest] = OMIT,
748
1204
  raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest] = OMIT,
749
- expand_raw: typing.Optional[typing.List[str]] = OMIT,
1205
+ expand_raw: typing.Optional[typing.Sequence[str]] = OMIT,
750
1206
  metadata: typing.Optional[typing.Dict[str, typing.Any]] = OMIT,
1207
+ request_options: typing.Optional[RequestOptions] = None,
751
1208
  ) -> typing.AsyncIterator[ExecutePromptEvent]:
752
1209
  """
753
1210
  Executes a deployed Prompt and streams back the results.
754
1211
 
755
1212
  Parameters:
756
- - inputs: typing.List[PromptDeploymentInputRequest]. The list of inputs defined in the Prompt's deployment with their corresponding values.
1213
+ - inputs: typing.Sequence[PromptDeploymentInputRequest]. The list of inputs defined in the Prompt's deployment with their corresponding values.
757
1214
 
758
1215
  - prompt_deployment_id: typing.Optional[str]. The ID of the Prompt Deployment. Must provide either this or prompt_deployment_name.
759
1216
 
@@ -767,9 +1224,48 @@ class AsyncVellum:
767
1224
 
768
1225
  - raw_overrides: typing.Optional[RawPromptExecutionOverridesRequest].
769
1226
 
770
- - expand_raw: typing.Optional[typing.List[str]]. Returns the raw API response data sent from the model host. Combined with `raw_overrides`, it can be used to access new features from models.
1227
+ - expand_raw: typing.Optional[typing.Sequence[str]]. Returns the raw API response data sent from the model host. Combined with `raw_overrides`, it can be used to access new features from models.
771
1228
 
772
1229
  - metadata: typing.Optional[typing.Dict[str, typing.Any]].
1230
+
1231
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1232
+ ---
1233
+ from vellum import (
1234
+ PromptDeploymentExpandMetaRequestRequest,
1235
+ PromptDeploymentInputRequest_String,
1236
+ RawPromptExecutionOverridesRequest,
1237
+ )
1238
+ from vellum.client import AsyncVellum
1239
+
1240
+ client = AsyncVellum(
1241
+ api_key="YOUR_API_KEY",
1242
+ )
1243
+ await client.execute_prompt_stream(
1244
+ inputs=[
1245
+ PromptDeploymentInputRequest_String(
1246
+ name="string",
1247
+ value="string",
1248
+ )
1249
+ ],
1250
+ prompt_deployment_id="string",
1251
+ prompt_deployment_name="string",
1252
+ release_tag="string",
1253
+ external_id="string",
1254
+ expand_meta=PromptDeploymentExpandMetaRequestRequest(
1255
+ model_name=True,
1256
+ latency=True,
1257
+ deployment_release_tag=True,
1258
+ prompt_version_id=True,
1259
+ finish_reason=True,
1260
+ ),
1261
+ raw_overrides=RawPromptExecutionOverridesRequest(
1262
+ body={"string": {"key": "value"}},
1263
+ headers={"string": {"key": "value"}},
1264
+ url="string",
1265
+ ),
1266
+ expand_raw=["string"],
1267
+ metadata={"string": {"key": "value"}},
1268
+ )
773
1269
  """
774
1270
  _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
775
1271
  if prompt_deployment_id is not OMIT:
@@ -791,9 +1287,28 @@ class AsyncVellum:
791
1287
  async with self._client_wrapper.httpx_client.stream(
792
1288
  "POST",
793
1289
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/execute-prompt-stream"),
794
- json=jsonable_encoder(_request),
795
- headers=self._client_wrapper.get_headers(),
796
- timeout=None,
1290
+ params=jsonable_encoder(
1291
+ request_options.get("additional_query_parameters") if request_options is not None else None
1292
+ ),
1293
+ json=jsonable_encoder(_request)
1294
+ if request_options is None or request_options.get("additional_body_parameters") is None
1295
+ else {
1296
+ **jsonable_encoder(_request),
1297
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1298
+ },
1299
+ headers=jsonable_encoder(
1300
+ remove_none_from_dict(
1301
+ {
1302
+ **self._client_wrapper.get_headers(),
1303
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1304
+ }
1305
+ )
1306
+ ),
1307
+ timeout=request_options.get("timeout_in_seconds")
1308
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
1309
+ else self._client_wrapper.get_timeout(),
1310
+ retries=0,
1311
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
797
1312
  ) as _response:
798
1313
  if 200 <= _response.status_code < 300:
799
1314
  async for _text in _response.aiter_lines():
@@ -819,17 +1334,18 @@ class AsyncVellum:
819
1334
  async def execute_workflow(
820
1335
  self,
821
1336
  *,
822
- inputs: typing.List[WorkflowRequestInputRequest],
1337
+ inputs: typing.Sequence[WorkflowRequestInputRequest],
823
1338
  workflow_deployment_id: typing.Optional[str] = OMIT,
824
1339
  workflow_deployment_name: typing.Optional[str] = OMIT,
825
1340
  release_tag: typing.Optional[str] = OMIT,
826
1341
  external_id: typing.Optional[str] = OMIT,
1342
+ request_options: typing.Optional[RequestOptions] = None,
827
1343
  ) -> ExecuteWorkflowResponse:
828
1344
  """
829
1345
  Executes a deployed Workflow and returns its outputs.
830
1346
 
831
1347
  Parameters:
832
- - inputs: typing.List[WorkflowRequestInputRequest]. The list of inputs defined in the Workflow's Deployment with their corresponding values.
1348
+ - inputs: typing.Sequence[WorkflowRequestInputRequest]. The list of inputs defined in the Workflow's Deployment with their corresponding values.
833
1349
 
834
1350
  - workflow_deployment_id: typing.Optional[str]. The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
835
1351
 
@@ -838,6 +1354,27 @@ class AsyncVellum:
838
1354
  - release_tag: typing.Optional[str]. Optionally specify a release tag if you want to pin to a specific release of the Workflow Deployment
839
1355
 
840
1356
  - external_id: typing.Optional[str]. Optionally include a unique identifier for tracking purposes. Must be unique for a given workflow deployment.
1357
+
1358
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1359
+ ---
1360
+ from vellum import WorkflowRequestInputRequest_String
1361
+ from vellum.client import AsyncVellum
1362
+
1363
+ client = AsyncVellum(
1364
+ api_key="YOUR_API_KEY",
1365
+ )
1366
+ await client.execute_workflow(
1367
+ inputs=[
1368
+ WorkflowRequestInputRequest_String(
1369
+ name="string",
1370
+ value="string",
1371
+ )
1372
+ ],
1373
+ workflow_deployment_id="string",
1374
+ workflow_deployment_name="string",
1375
+ release_tag="string",
1376
+ external_id="string",
1377
+ )
841
1378
  """
842
1379
  _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
843
1380
  if workflow_deployment_id is not OMIT:
@@ -851,9 +1388,28 @@ class AsyncVellum:
851
1388
  _response = await self._client_wrapper.httpx_client.request(
852
1389
  "POST",
853
1390
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/execute-workflow"),
854
- json=jsonable_encoder(_request),
855
- headers=self._client_wrapper.get_headers(),
856
- timeout=None,
1391
+ params=jsonable_encoder(
1392
+ request_options.get("additional_query_parameters") if request_options is not None else None
1393
+ ),
1394
+ json=jsonable_encoder(_request)
1395
+ if request_options is None or request_options.get("additional_body_parameters") is None
1396
+ else {
1397
+ **jsonable_encoder(_request),
1398
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1399
+ },
1400
+ headers=jsonable_encoder(
1401
+ remove_none_from_dict(
1402
+ {
1403
+ **self._client_wrapper.get_headers(),
1404
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1405
+ }
1406
+ )
1407
+ ),
1408
+ timeout=request_options.get("timeout_in_seconds")
1409
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
1410
+ else self._client_wrapper.get_timeout(),
1411
+ retries=0,
1412
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
857
1413
  )
858
1414
  if 200 <= _response.status_code < 300:
859
1415
  return pydantic.parse_obj_as(ExecuteWorkflowResponse, _response.json()) # type: ignore
@@ -872,18 +1428,19 @@ class AsyncVellum:
872
1428
  async def execute_workflow_stream(
873
1429
  self,
874
1430
  *,
875
- inputs: typing.List[WorkflowRequestInputRequest],
1431
+ inputs: typing.Sequence[WorkflowRequestInputRequest],
876
1432
  workflow_deployment_id: typing.Optional[str] = OMIT,
877
1433
  workflow_deployment_name: typing.Optional[str] = OMIT,
878
1434
  release_tag: typing.Optional[str] = OMIT,
879
1435
  external_id: typing.Optional[str] = OMIT,
880
- event_types: typing.Optional[typing.List[WorkflowExecutionEventType]] = OMIT,
1436
+ event_types: typing.Optional[typing.Sequence[WorkflowExecutionEventType]] = OMIT,
1437
+ request_options: typing.Optional[RequestOptions] = None,
881
1438
  ) -> typing.AsyncIterator[WorkflowStreamEvent]:
882
1439
  """
883
1440
  Executes a deployed Workflow and streams back its results.
884
1441
 
885
1442
  Parameters:
886
- - inputs: typing.List[WorkflowRequestInputRequest]. The list of inputs defined in the Workflow's Deployment with their corresponding values.
1443
+ - inputs: typing.Sequence[WorkflowRequestInputRequest]. The list of inputs defined in the Workflow's Deployment with their corresponding values.
887
1444
 
888
1445
  - workflow_deployment_id: typing.Optional[str]. The ID of the Workflow Deployment. Must provide either this or workflow_deployment_name.
889
1446
 
@@ -893,7 +1450,32 @@ class AsyncVellum:
893
1450
 
894
1451
  - external_id: typing.Optional[str]. Optionally include a unique identifier for tracking purposes. Must be unique for a given workflow deployment.
895
1452
 
896
- - event_types: typing.Optional[typing.List[WorkflowExecutionEventType]]. Optionally specify which events you want to receive. Defaults to only WORKFLOW events. Note that the schema of non-WORKFLOW events is unstable and should be used with caution.
1453
+ - event_types: typing.Optional[typing.Sequence[WorkflowExecutionEventType]]. Optionally specify which events you want to receive. Defaults to only WORKFLOW events. Note that the schema of non-WORKFLOW events is unstable and should be used with caution.
1454
+
1455
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1456
+ ---
1457
+ from vellum import (
1458
+ WorkflowExecutionEventType,
1459
+ WorkflowRequestInputRequest_String,
1460
+ )
1461
+ from vellum.client import AsyncVellum
1462
+
1463
+ client = AsyncVellum(
1464
+ api_key="YOUR_API_KEY",
1465
+ )
1466
+ await client.execute_workflow_stream(
1467
+ inputs=[
1468
+ WorkflowRequestInputRequest_String(
1469
+ name="string",
1470
+ value="string",
1471
+ )
1472
+ ],
1473
+ workflow_deployment_id="string",
1474
+ workflow_deployment_name="string",
1475
+ release_tag="string",
1476
+ external_id="string",
1477
+ event_types=[WorkflowExecutionEventType.NODE],
1478
+ )
897
1479
  """
898
1480
  _request: typing.Dict[str, typing.Any] = {"inputs": inputs}
899
1481
  if workflow_deployment_id is not OMIT:
@@ -909,9 +1491,28 @@ class AsyncVellum:
909
1491
  async with self._client_wrapper.httpx_client.stream(
910
1492
  "POST",
911
1493
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/execute-workflow-stream"),
912
- json=jsonable_encoder(_request),
913
- headers=self._client_wrapper.get_headers(),
914
- timeout=None,
1494
+ params=jsonable_encoder(
1495
+ request_options.get("additional_query_parameters") if request_options is not None else None
1496
+ ),
1497
+ json=jsonable_encoder(_request)
1498
+ if request_options is None or request_options.get("additional_body_parameters") is None
1499
+ else {
1500
+ **jsonable_encoder(_request),
1501
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1502
+ },
1503
+ headers=jsonable_encoder(
1504
+ remove_none_from_dict(
1505
+ {
1506
+ **self._client_wrapper.get_headers(),
1507
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1508
+ }
1509
+ )
1510
+ ),
1511
+ timeout=request_options.get("timeout_in_seconds")
1512
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
1513
+ else self._client_wrapper.get_timeout(),
1514
+ retries=0,
1515
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
915
1516
  ) as _response:
916
1517
  if 200 <= _response.status_code < 300:
917
1518
  async for _text in _response.aiter_lines():
@@ -937,8 +1538,9 @@ class AsyncVellum:
937
1538
  *,
938
1539
  deployment_id: typing.Optional[str] = OMIT,
939
1540
  deployment_name: typing.Optional[str] = OMIT,
940
- requests: typing.List[GenerateRequest],
1541
+ requests: typing.Sequence[GenerateRequest],
941
1542
  options: typing.Optional[GenerateOptionsRequest] = OMIT,
1543
+ request_options: typing.Optional[RequestOptions] = None,
942
1544
  ) -> GenerateResponse:
943
1545
  """
944
1546
  Generate a completion using a previously defined deployment.
@@ -950,9 +1552,11 @@ class AsyncVellum:
950
1552
 
951
1553
  - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
952
1554
 
953
- - requests: typing.List[GenerateRequest]. The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
1555
+ - requests: typing.Sequence[GenerateRequest]. The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
954
1556
 
955
1557
  - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
1558
+
1559
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
956
1560
  ---
957
1561
  from vellum import GenerateRequest
958
1562
  from vellum.client import AsyncVellum
@@ -978,9 +1582,28 @@ class AsyncVellum:
978
1582
  _response = await self._client_wrapper.httpx_client.request(
979
1583
  "POST",
980
1584
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/generate"),
981
- json=jsonable_encoder(_request),
982
- headers=self._client_wrapper.get_headers(),
983
- timeout=None,
1585
+ params=jsonable_encoder(
1586
+ request_options.get("additional_query_parameters") if request_options is not None else None
1587
+ ),
1588
+ json=jsonable_encoder(_request)
1589
+ if request_options is None or request_options.get("additional_body_parameters") is None
1590
+ else {
1591
+ **jsonable_encoder(_request),
1592
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1593
+ },
1594
+ headers=jsonable_encoder(
1595
+ remove_none_from_dict(
1596
+ {
1597
+ **self._client_wrapper.get_headers(),
1598
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1599
+ }
1600
+ )
1601
+ ),
1602
+ timeout=request_options.get("timeout_in_seconds")
1603
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
1604
+ else self._client_wrapper.get_timeout(),
1605
+ retries=0,
1606
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
984
1607
  )
985
1608
  if 200 <= _response.status_code < 300:
986
1609
  return pydantic.parse_obj_as(GenerateResponse, _response.json()) # type: ignore
@@ -1003,8 +1626,9 @@ class AsyncVellum:
1003
1626
  *,
1004
1627
  deployment_id: typing.Optional[str] = OMIT,
1005
1628
  deployment_name: typing.Optional[str] = OMIT,
1006
- requests: typing.List[GenerateRequest],
1629
+ requests: typing.Sequence[GenerateRequest],
1007
1630
  options: typing.Optional[GenerateOptionsRequest] = OMIT,
1631
+ request_options: typing.Optional[RequestOptions] = None,
1008
1632
  ) -> typing.AsyncIterator[GenerateStreamResponse]:
1009
1633
  """
1010
1634
  Generate a stream of completions using a previously defined deployment.
@@ -1016,9 +1640,46 @@ class AsyncVellum:
1016
1640
 
1017
1641
  - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
1018
1642
 
1019
- - requests: typing.List[GenerateRequest]. The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
1643
+ - requests: typing.Sequence[GenerateRequest]. The generation request to make. Bulk requests are no longer supported, this field must be an array of length 1.
1020
1644
 
1021
1645
  - options: typing.Optional[GenerateOptionsRequest]. Additional configuration that can be used to control what's included in the response.
1646
+
1647
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1648
+ ---
1649
+ from vellum import (
1650
+ ChatMessageContentRequest_String,
1651
+ ChatMessageRequest,
1652
+ ChatMessageRole,
1653
+ GenerateOptionsRequest,
1654
+ GenerateRequest,
1655
+ LogprobsEnum,
1656
+ )
1657
+ from vellum.client import AsyncVellum
1658
+
1659
+ client = AsyncVellum(
1660
+ api_key="YOUR_API_KEY",
1661
+ )
1662
+ await client.generate_stream(
1663
+ deployment_id="string",
1664
+ deployment_name="string",
1665
+ requests=[
1666
+ GenerateRequest(
1667
+ input_values={"string": {"key": "value"}},
1668
+ chat_history=[
1669
+ ChatMessageRequest(
1670
+ text="string",
1671
+ role=ChatMessageRole.SYSTEM,
1672
+ content=ChatMessageContentRequest_String(),
1673
+ source="string",
1674
+ )
1675
+ ],
1676
+ external_ids=["string"],
1677
+ )
1678
+ ],
1679
+ options=GenerateOptionsRequest(
1680
+ logprobs=LogprobsEnum.ALL,
1681
+ ),
1682
+ )
1022
1683
  """
1023
1684
  _request: typing.Dict[str, typing.Any] = {"requests": requests}
1024
1685
  if deployment_id is not OMIT:
@@ -1030,9 +1691,28 @@ class AsyncVellum:
1030
1691
  async with self._client_wrapper.httpx_client.stream(
1031
1692
  "POST",
1032
1693
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/generate-stream"),
1033
- json=jsonable_encoder(_request),
1034
- headers=self._client_wrapper.get_headers(),
1035
- timeout=None,
1694
+ params=jsonable_encoder(
1695
+ request_options.get("additional_query_parameters") if request_options is not None else None
1696
+ ),
1697
+ json=jsonable_encoder(_request)
1698
+ if request_options is None or request_options.get("additional_body_parameters") is None
1699
+ else {
1700
+ **jsonable_encoder(_request),
1701
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1702
+ },
1703
+ headers=jsonable_encoder(
1704
+ remove_none_from_dict(
1705
+ {
1706
+ **self._client_wrapper.get_headers(),
1707
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1708
+ }
1709
+ )
1710
+ ),
1711
+ timeout=request_options.get("timeout_in_seconds")
1712
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
1713
+ else self._client_wrapper.get_timeout(),
1714
+ retries=0,
1715
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
1036
1716
  ) as _response:
1037
1717
  if 200 <= _response.status_code < 300:
1038
1718
  async for _text in _response.aiter_lines():
@@ -1062,6 +1742,7 @@ class AsyncVellum:
1062
1742
  index_name: typing.Optional[str] = OMIT,
1063
1743
  query: str,
1064
1744
  options: typing.Optional[SearchRequestOptionsRequest] = OMIT,
1745
+ request_options: typing.Optional[RequestOptions] = None,
1065
1746
  ) -> SearchResponse:
1066
1747
  """
1067
1748
  Perform a search against a document index.
@@ -1076,6 +1757,8 @@ class AsyncVellum:
1076
1757
  - query: str. The query to search for.
1077
1758
 
1078
1759
  - options: typing.Optional[SearchRequestOptionsRequest]. Configuration options for the search.
1760
+
1761
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1079
1762
  ---
1080
1763
  from vellum.client import AsyncVellum
1081
1764
 
@@ -1096,9 +1779,28 @@ class AsyncVellum:
1096
1779
  _response = await self._client_wrapper.httpx_client.request(
1097
1780
  "POST",
1098
1781
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/search"),
1099
- json=jsonable_encoder(_request),
1100
- headers=self._client_wrapper.get_headers(),
1101
- timeout=None,
1782
+ params=jsonable_encoder(
1783
+ request_options.get("additional_query_parameters") if request_options is not None else None
1784
+ ),
1785
+ json=jsonable_encoder(_request)
1786
+ if request_options is None or request_options.get("additional_body_parameters") is None
1787
+ else {
1788
+ **jsonable_encoder(_request),
1789
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1790
+ },
1791
+ headers=jsonable_encoder(
1792
+ remove_none_from_dict(
1793
+ {
1794
+ **self._client_wrapper.get_headers(),
1795
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1796
+ }
1797
+ )
1798
+ ),
1799
+ timeout=request_options.get("timeout_in_seconds")
1800
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
1801
+ else self._client_wrapper.get_timeout(),
1802
+ retries=0,
1803
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
1102
1804
  )
1103
1805
  if 200 <= _response.status_code < 300:
1104
1806
  return pydantic.parse_obj_as(SearchResponse, _response.json()) # type: ignore
@@ -1119,7 +1821,8 @@ class AsyncVellum:
1119
1821
  *,
1120
1822
  deployment_id: typing.Optional[str] = OMIT,
1121
1823
  deployment_name: typing.Optional[str] = OMIT,
1122
- actuals: typing.List[SubmitCompletionActualRequest],
1824
+ actuals: typing.Sequence[SubmitCompletionActualRequest],
1825
+ request_options: typing.Optional[RequestOptions] = None,
1123
1826
  ) -> None:
1124
1827
  """
1125
1828
  Used to submit feedback regarding the quality of previously generated completions.
@@ -1131,7 +1834,9 @@ class AsyncVellum:
1131
1834
 
1132
1835
  - deployment_name: typing.Optional[str]. The name of the deployment. Must provide either this or deployment_id.
1133
1836
 
1134
- - actuals: typing.List[SubmitCompletionActualRequest]. Feedback regarding the quality of previously generated completions
1837
+ - actuals: typing.Sequence[SubmitCompletionActualRequest]. Feedback regarding the quality of previously generated completions
1838
+
1839
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1135
1840
  ---
1136
1841
  from vellum import SubmitCompletionActualRequest
1137
1842
  from vellum.client import AsyncVellum
@@ -1151,9 +1856,28 @@ class AsyncVellum:
1151
1856
  _response = await self._client_wrapper.httpx_client.request(
1152
1857
  "POST",
1153
1858
  urllib.parse.urljoin(f"{self._client_wrapper.get_environment().predict}/", "v1/submit-completion-actuals"),
1154
- json=jsonable_encoder(_request),
1155
- headers=self._client_wrapper.get_headers(),
1156
- timeout=None,
1859
+ params=jsonable_encoder(
1860
+ request_options.get("additional_query_parameters") if request_options is not None else None
1861
+ ),
1862
+ json=jsonable_encoder(_request)
1863
+ if request_options is None or request_options.get("additional_body_parameters") is None
1864
+ else {
1865
+ **jsonable_encoder(_request),
1866
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1867
+ },
1868
+ headers=jsonable_encoder(
1869
+ remove_none_from_dict(
1870
+ {
1871
+ **self._client_wrapper.get_headers(),
1872
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1873
+ }
1874
+ )
1875
+ ),
1876
+ timeout=request_options.get("timeout_in_seconds")
1877
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
1878
+ else self._client_wrapper.get_timeout(),
1879
+ retries=0,
1880
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
1157
1881
  )
1158
1882
  if 200 <= _response.status_code < 300:
1159
1883
  return
@@ -1172,9 +1896,10 @@ class AsyncVellum:
1172
1896
  async def submit_workflow_execution_actuals(
1173
1897
  self,
1174
1898
  *,
1175
- actuals: typing.List[SubmitWorkflowExecutionActualRequest],
1899
+ actuals: typing.Sequence[SubmitWorkflowExecutionActualRequest],
1176
1900
  execution_id: typing.Optional[str] = OMIT,
1177
1901
  external_id: typing.Optional[str] = OMIT,
1902
+ request_options: typing.Optional[RequestOptions] = None,
1178
1903
  ) -> None:
1179
1904
  """
1180
1905
  Used to submit feedback regarding the quality of previous workflow execution and its outputs.
@@ -1182,11 +1907,13 @@ class AsyncVellum:
1182
1907
  **Note:** Uses a base url of `https://predict.vellum.ai`.
1183
1908
 
1184
1909
  Parameters:
1185
- - actuals: typing.List[SubmitWorkflowExecutionActualRequest]. Feedback regarding the quality of an output on a previously executed workflow.
1910
+ - actuals: typing.Sequence[SubmitWorkflowExecutionActualRequest]. Feedback regarding the quality of an output on a previously executed workflow.
1186
1911
 
1187
1912
  - execution_id: typing.Optional[str]. The Vellum-generated ID of a previously executed workflow. Must provide either this or external_id.
1188
1913
 
1189
1914
  - external_id: typing.Optional[str]. The external ID that was originally provided by when executing the workflow, if applicable, that you'd now like to submit actuals for. Must provide either this or execution_id.
1915
+
1916
+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1190
1917
  ---
1191
1918
  from vellum.client import AsyncVellum
1192
1919
 
@@ -1207,9 +1934,28 @@ class AsyncVellum:
1207
1934
  urllib.parse.urljoin(
1208
1935
  f"{self._client_wrapper.get_environment().predict}/", "v1/submit-workflow-execution-actuals"
1209
1936
  ),
1210
- json=jsonable_encoder(_request),
1211
- headers=self._client_wrapper.get_headers(),
1212
- timeout=None,
1937
+ params=jsonable_encoder(
1938
+ request_options.get("additional_query_parameters") if request_options is not None else None
1939
+ ),
1940
+ json=jsonable_encoder(_request)
1941
+ if request_options is None or request_options.get("additional_body_parameters") is None
1942
+ else {
1943
+ **jsonable_encoder(_request),
1944
+ **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1945
+ },
1946
+ headers=jsonable_encoder(
1947
+ remove_none_from_dict(
1948
+ {
1949
+ **self._client_wrapper.get_headers(),
1950
+ **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1951
+ }
1952
+ )
1953
+ ),
1954
+ timeout=request_options.get("timeout_in_seconds")
1955
+ if request_options is not None and request_options.get("timeout_in_seconds") is not None
1956
+ else self._client_wrapper.get_timeout(),
1957
+ retries=0,
1958
+ max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
1213
1959
  )
1214
1960
  if 200 <= _response.status_code < 300:
1215
1961
  return