together 1.2.11__py3-none-any.whl → 2.0.0a8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. together/__init__.py +101 -63
  2. together/_base_client.py +1995 -0
  3. together/_client.py +1033 -0
  4. together/_compat.py +219 -0
  5. together/_constants.py +14 -0
  6. together/_exceptions.py +108 -0
  7. together/_files.py +123 -0
  8. together/_models.py +857 -0
  9. together/_qs.py +150 -0
  10. together/_resource.py +43 -0
  11. together/_response.py +830 -0
  12. together/_streaming.py +370 -0
  13. together/_types.py +260 -0
  14. together/_utils/__init__.py +64 -0
  15. together/_utils/_compat.py +45 -0
  16. together/_utils/_datetime_parse.py +136 -0
  17. together/_utils/_logs.py +25 -0
  18. together/_utils/_proxy.py +65 -0
  19. together/_utils/_reflection.py +42 -0
  20. together/_utils/_resources_proxy.py +24 -0
  21. together/_utils/_streams.py +12 -0
  22. together/_utils/_sync.py +58 -0
  23. together/_utils/_transform.py +457 -0
  24. together/_utils/_typing.py +156 -0
  25. together/_utils/_utils.py +421 -0
  26. together/_version.py +4 -0
  27. together/lib/.keep +4 -0
  28. together/lib/__init__.py +23 -0
  29. together/lib/cli/api/endpoints.py +467 -0
  30. together/lib/cli/api/evals.py +588 -0
  31. together/{cli → lib/cli}/api/files.py +20 -17
  32. together/lib/cli/api/fine_tuning.py +566 -0
  33. together/lib/cli/api/models.py +140 -0
  34. together/lib/cli/api/utils.py +50 -0
  35. together/{cli → lib/cli}/cli.py +17 -23
  36. together/lib/constants.py +61 -0
  37. together/lib/resources/__init__.py +11 -0
  38. together/lib/resources/files.py +999 -0
  39. together/lib/resources/fine_tuning.py +280 -0
  40. together/lib/resources/models.py +35 -0
  41. together/lib/types/__init__.py +13 -0
  42. together/lib/types/error.py +9 -0
  43. together/lib/types/fine_tuning.py +455 -0
  44. together/{utils → lib/utils}/__init__.py +7 -10
  45. together/{utils → lib/utils}/_log.py +18 -13
  46. together/lib/utils/files.py +628 -0
  47. together/lib/utils/serializer.py +10 -0
  48. together/{utils → lib/utils}/tools.py +17 -2
  49. together/resources/__init__.py +225 -24
  50. together/resources/audio/__init__.py +75 -0
  51. together/resources/audio/audio.py +198 -0
  52. together/resources/audio/speech.py +605 -0
  53. together/resources/audio/transcriptions.py +282 -0
  54. together/resources/audio/translations.py +256 -0
  55. together/resources/audio/voices.py +135 -0
  56. together/resources/batches.py +417 -0
  57. together/resources/chat/__init__.py +30 -21
  58. together/resources/chat/chat.py +102 -0
  59. together/resources/chat/completions.py +1063 -257
  60. together/resources/code_interpreter/__init__.py +33 -0
  61. together/resources/code_interpreter/code_interpreter.py +258 -0
  62. together/resources/code_interpreter/sessions.py +135 -0
  63. together/resources/completions.py +890 -225
  64. together/resources/embeddings.py +172 -68
  65. together/resources/endpoints.py +711 -0
  66. together/resources/evals.py +452 -0
  67. together/resources/files.py +397 -120
  68. together/resources/fine_tuning.py +1033 -0
  69. together/resources/hardware.py +181 -0
  70. together/resources/images.py +256 -108
  71. together/resources/jobs.py +214 -0
  72. together/resources/models.py +251 -44
  73. together/resources/rerank.py +190 -92
  74. together/resources/videos.py +374 -0
  75. together/types/__init__.py +66 -73
  76. together/types/audio/__init__.py +10 -0
  77. together/types/audio/speech_create_params.py +75 -0
  78. together/types/audio/transcription_create_params.py +54 -0
  79. together/types/audio/transcription_create_response.py +111 -0
  80. together/types/audio/translation_create_params.py +40 -0
  81. together/types/audio/translation_create_response.py +70 -0
  82. together/types/audio/voice_list_response.py +23 -0
  83. together/types/audio_speech_stream_chunk.py +16 -0
  84. together/types/autoscaling.py +13 -0
  85. together/types/autoscaling_param.py +15 -0
  86. together/types/batch_create_params.py +24 -0
  87. together/types/batch_create_response.py +14 -0
  88. together/types/batch_job.py +45 -0
  89. together/types/batch_list_response.py +10 -0
  90. together/types/chat/__init__.py +18 -0
  91. together/types/chat/chat_completion.py +60 -0
  92. together/types/chat/chat_completion_chunk.py +61 -0
  93. together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
  94. together/types/chat/chat_completion_structured_message_text_param.py +13 -0
  95. together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
  96. together/types/chat/chat_completion_usage.py +13 -0
  97. together/types/chat/chat_completion_warning.py +9 -0
  98. together/types/chat/completion_create_params.py +329 -0
  99. together/types/code_interpreter/__init__.py +5 -0
  100. together/types/code_interpreter/session_list_response.py +31 -0
  101. together/types/code_interpreter_execute_params.py +45 -0
  102. together/types/completion.py +42 -0
  103. together/types/completion_chunk.py +66 -0
  104. together/types/completion_create_params.py +138 -0
  105. together/types/dedicated_endpoint.py +44 -0
  106. together/types/embedding.py +24 -0
  107. together/types/embedding_create_params.py +31 -0
  108. together/types/endpoint_create_params.py +43 -0
  109. together/types/endpoint_list_avzones_response.py +11 -0
  110. together/types/endpoint_list_params.py +18 -0
  111. together/types/endpoint_list_response.py +41 -0
  112. together/types/endpoint_update_params.py +27 -0
  113. together/types/eval_create_params.py +263 -0
  114. together/types/eval_create_response.py +16 -0
  115. together/types/eval_list_params.py +21 -0
  116. together/types/eval_list_response.py +10 -0
  117. together/types/eval_status_response.py +100 -0
  118. together/types/evaluation_job.py +139 -0
  119. together/types/execute_response.py +108 -0
  120. together/types/file_delete_response.py +13 -0
  121. together/types/file_list.py +12 -0
  122. together/types/file_purpose.py +9 -0
  123. together/types/file_response.py +31 -0
  124. together/types/file_type.py +7 -0
  125. together/types/fine_tuning_cancel_response.py +194 -0
  126. together/types/fine_tuning_content_params.py +24 -0
  127. together/types/fine_tuning_delete_params.py +11 -0
  128. together/types/fine_tuning_delete_response.py +12 -0
  129. together/types/fine_tuning_list_checkpoints_response.py +21 -0
  130. together/types/fine_tuning_list_events_response.py +12 -0
  131. together/types/fine_tuning_list_response.py +199 -0
  132. together/types/finetune_event.py +41 -0
  133. together/types/finetune_event_type.py +33 -0
  134. together/types/finetune_response.py +177 -0
  135. together/types/hardware_list_params.py +16 -0
  136. together/types/hardware_list_response.py +58 -0
  137. together/types/image_data_b64.py +15 -0
  138. together/types/image_data_url.py +15 -0
  139. together/types/image_file.py +23 -0
  140. together/types/image_generate_params.py +85 -0
  141. together/types/job_list_response.py +47 -0
  142. together/types/job_retrieve_response.py +43 -0
  143. together/types/log_probs.py +18 -0
  144. together/types/model_list_response.py +10 -0
  145. together/types/model_object.py +42 -0
  146. together/types/model_upload_params.py +36 -0
  147. together/types/model_upload_response.py +23 -0
  148. together/types/rerank_create_params.py +36 -0
  149. together/types/rerank_create_response.py +36 -0
  150. together/types/tool_choice.py +23 -0
  151. together/types/tool_choice_param.py +23 -0
  152. together/types/tools_param.py +23 -0
  153. together/types/training_method_dpo.py +22 -0
  154. together/types/training_method_sft.py +18 -0
  155. together/types/video_create_params.py +86 -0
  156. together/types/video_job.py +57 -0
  157. together-2.0.0a8.dist-info/METADATA +680 -0
  158. together-2.0.0a8.dist-info/RECORD +164 -0
  159. {together-1.2.11.dist-info → together-2.0.0a8.dist-info}/WHEEL +1 -1
  160. together-2.0.0a8.dist-info/entry_points.txt +2 -0
  161. {together-1.2.11.dist-info → together-2.0.0a8.dist-info/licenses}/LICENSE +1 -1
  162. together/abstract/api_requestor.py +0 -723
  163. together/cli/api/chat.py +0 -276
  164. together/cli/api/completions.py +0 -119
  165. together/cli/api/finetune.py +0 -272
  166. together/cli/api/images.py +0 -82
  167. together/cli/api/models.py +0 -42
  168. together/client.py +0 -157
  169. together/constants.py +0 -31
  170. together/error.py +0 -191
  171. together/filemanager.py +0 -388
  172. together/legacy/__init__.py +0 -0
  173. together/legacy/base.py +0 -27
  174. together/legacy/complete.py +0 -93
  175. together/legacy/embeddings.py +0 -27
  176. together/legacy/files.py +0 -146
  177. together/legacy/finetune.py +0 -177
  178. together/legacy/images.py +0 -27
  179. together/legacy/models.py +0 -44
  180. together/resources/finetune.py +0 -489
  181. together/together_response.py +0 -50
  182. together/types/abstract.py +0 -26
  183. together/types/chat_completions.py +0 -171
  184. together/types/common.py +0 -65
  185. together/types/completions.py +0 -104
  186. together/types/embeddings.py +0 -35
  187. together/types/error.py +0 -16
  188. together/types/files.py +0 -89
  189. together/types/finetune.py +0 -265
  190. together/types/images.py +0 -42
  191. together/types/models.py +0 -44
  192. together/types/rerank.py +0 -43
  193. together/utils/api_helpers.py +0 -84
  194. together/utils/files.py +0 -204
  195. together/version.py +0 -6
  196. together-1.2.11.dist-info/METADATA +0 -408
  197. together-1.2.11.dist-info/RECORD +0 -58
  198. together-1.2.11.dist-info/entry_points.txt +0 -3
  199. /together/{abstract → lib/cli}/__init__.py +0 -0
  200. /together/{cli → lib/cli/api}/__init__.py +0 -0
  201. /together/{cli/api/__init__.py → py.typed} +0 -0
@@ -0,0 +1,711 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+ from typing_extensions import Literal
7
+
8
+ import httpx
9
+
10
+ from ..types import endpoint_list_params, endpoint_create_params, endpoint_update_params
11
+ from .._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
12
+ from .._utils import maybe_transform, async_maybe_transform
13
+ from .._compat import cached_property
14
+ from .._resource import SyncAPIResource, AsyncAPIResource
15
+ from .._response import (
16
+ to_raw_response_wrapper,
17
+ to_streamed_response_wrapper,
18
+ async_to_raw_response_wrapper,
19
+ async_to_streamed_response_wrapper,
20
+ )
21
+ from .._base_client import make_request_options
22
+ from ..types.autoscaling_param import AutoscalingParam
23
+ from ..types.dedicated_endpoint import DedicatedEndpoint
24
+ from ..types.endpoint_list_response import EndpointListResponse
25
+ from ..types.endpoint_list_avzones_response import EndpointListAvzonesResponse
26
+
27
+ __all__ = ["EndpointsResource", "AsyncEndpointsResource"]
28
+
29
+
30
+ class EndpointsResource(SyncAPIResource):
31
+ @cached_property
32
+ def with_raw_response(self) -> EndpointsResourceWithRawResponse:
33
+ """
34
+ This property can be used as a prefix for any HTTP method call to return
35
+ the raw response object instead of the parsed content.
36
+
37
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
38
+ """
39
+ return EndpointsResourceWithRawResponse(self)
40
+
41
+ @cached_property
42
+ def with_streaming_response(self) -> EndpointsResourceWithStreamingResponse:
43
+ """
44
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
45
+
46
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
47
+ """
48
+ return EndpointsResourceWithStreamingResponse(self)
49
+
50
+ def create(
51
+ self,
52
+ *,
53
+ autoscaling: AutoscalingParam,
54
+ hardware: str,
55
+ model: str,
56
+ availability_zone: str | Omit = omit,
57
+ disable_prompt_cache: bool | Omit = omit,
58
+ disable_speculative_decoding: bool | Omit = omit,
59
+ display_name: str | Omit = omit,
60
+ inactive_timeout: Optional[int] | Omit = omit,
61
+ state: Literal["STARTED", "STOPPED"] | Omit = omit,
62
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
63
+ # The extra values given here take precedence over values defined on the client or passed to this method.
64
+ extra_headers: Headers | None = None,
65
+ extra_query: Query | None = None,
66
+ extra_body: Body | None = None,
67
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
68
+ ) -> DedicatedEndpoint:
69
+ """Creates a new dedicated endpoint for serving models.
70
+
71
+ The endpoint will
72
+ automatically start after creation. You can deploy any supported model on
73
+ hardware configurations that meet the model's requirements.
74
+
75
+ Args:
76
+ autoscaling: Configuration for automatic scaling of the endpoint
77
+
78
+ hardware: The hardware configuration to use for this endpoint
79
+
80
+ model: The model to deploy on this endpoint
81
+
82
+ availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
83
+
84
+ disable_prompt_cache: Whether to disable the prompt cache for this endpoint
85
+
86
+ disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
87
+
88
+ display_name: A human-readable name for the endpoint
89
+
90
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
91
+ automatically stopped. Set to null, omit or set to 0 to disable automatic
92
+ timeout.
93
+
94
+ state: The desired state of the endpoint
95
+
96
+ extra_headers: Send extra headers
97
+
98
+ extra_query: Add additional query parameters to the request
99
+
100
+ extra_body: Add additional JSON properties to the request
101
+
102
+ timeout: Override the client-level default timeout for this request, in seconds
103
+ """
104
+ return self._post(
105
+ "/endpoints",
106
+ body=maybe_transform(
107
+ {
108
+ "autoscaling": autoscaling,
109
+ "hardware": hardware,
110
+ "model": model,
111
+ "availability_zone": availability_zone,
112
+ "disable_prompt_cache": disable_prompt_cache,
113
+ "disable_speculative_decoding": disable_speculative_decoding,
114
+ "display_name": display_name,
115
+ "inactive_timeout": inactive_timeout,
116
+ "state": state,
117
+ },
118
+ endpoint_create_params.EndpointCreateParams,
119
+ ),
120
+ options=make_request_options(
121
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
122
+ ),
123
+ cast_to=DedicatedEndpoint,
124
+ )
125
+
126
+ def retrieve(
127
+ self,
128
+ endpoint_id: str,
129
+ *,
130
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
131
+ # The extra values given here take precedence over values defined on the client or passed to this method.
132
+ extra_headers: Headers | None = None,
133
+ extra_query: Query | None = None,
134
+ extra_body: Body | None = None,
135
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
136
+ ) -> DedicatedEndpoint:
137
+ """
138
+ Retrieves details about a specific endpoint, including its current state,
139
+ configuration, and scaling settings.
140
+
141
+ Args:
142
+ extra_headers: Send extra headers
143
+
144
+ extra_query: Add additional query parameters to the request
145
+
146
+ extra_body: Add additional JSON properties to the request
147
+
148
+ timeout: Override the client-level default timeout for this request, in seconds
149
+ """
150
+ if not endpoint_id:
151
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
152
+ return self._get(
153
+ f"/endpoints/{endpoint_id}",
154
+ options=make_request_options(
155
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
156
+ ),
157
+ cast_to=DedicatedEndpoint,
158
+ )
159
+
160
+ def update(
161
+ self,
162
+ endpoint_id: str,
163
+ *,
164
+ autoscaling: AutoscalingParam | Omit = omit,
165
+ display_name: str | Omit = omit,
166
+ inactive_timeout: Optional[int] | Omit = omit,
167
+ state: Literal["STARTED", "STOPPED"] | Omit = omit,
168
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
169
+ # The extra values given here take precedence over values defined on the client or passed to this method.
170
+ extra_headers: Headers | None = None,
171
+ extra_query: Query | None = None,
172
+ extra_body: Body | None = None,
173
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
174
+ ) -> DedicatedEndpoint:
175
+ """Updates an existing endpoint's configuration.
176
+
177
+ You can modify the display name,
178
+ autoscaling settings, or change the endpoint's state (start/stop).
179
+
180
+ Args:
181
+ autoscaling: New autoscaling configuration for the endpoint
182
+
183
+ display_name: A human-readable name for the endpoint
184
+
185
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
186
+ automatically stopped. Set to 0 to disable automatic timeout.
187
+
188
+ state: The desired state of the endpoint
189
+
190
+ extra_headers: Send extra headers
191
+
192
+ extra_query: Add additional query parameters to the request
193
+
194
+ extra_body: Add additional JSON properties to the request
195
+
196
+ timeout: Override the client-level default timeout for this request, in seconds
197
+ """
198
+ if not endpoint_id:
199
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
200
+ return self._patch(
201
+ f"/endpoints/{endpoint_id}",
202
+ body=maybe_transform(
203
+ {
204
+ "autoscaling": autoscaling,
205
+ "display_name": display_name,
206
+ "inactive_timeout": inactive_timeout,
207
+ "state": state,
208
+ },
209
+ endpoint_update_params.EndpointUpdateParams,
210
+ ),
211
+ options=make_request_options(
212
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
213
+ ),
214
+ cast_to=DedicatedEndpoint,
215
+ )
216
+
217
+ def list(
218
+ self,
219
+ *,
220
+ mine: bool | Omit = omit,
221
+ type: Literal["dedicated", "serverless"] | Omit = omit,
222
+ usage_type: Literal["on-demand", "reserved"] | Omit = omit,
223
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
224
+ # The extra values given here take precedence over values defined on the client or passed to this method.
225
+ extra_headers: Headers | None = None,
226
+ extra_query: Query | None = None,
227
+ extra_body: Body | None = None,
228
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
229
+ ) -> EndpointListResponse:
230
+ """Returns a list of all endpoints associated with your account.
231
+
232
+ You can filter the
233
+ results by type (dedicated or serverless).
234
+
235
+ Args:
236
+ mine: If true, return only endpoints owned by the caller
237
+
238
+ type: Filter endpoints by type
239
+
240
+ usage_type: Filter endpoints by usage type
241
+
242
+ extra_headers: Send extra headers
243
+
244
+ extra_query: Add additional query parameters to the request
245
+
246
+ extra_body: Add additional JSON properties to the request
247
+
248
+ timeout: Override the client-level default timeout for this request, in seconds
249
+ """
250
+ return self._get(
251
+ "/endpoints",
252
+ options=make_request_options(
253
+ extra_headers=extra_headers,
254
+ extra_query=extra_query,
255
+ extra_body=extra_body,
256
+ timeout=timeout,
257
+ query=maybe_transform(
258
+ {
259
+ "mine": mine,
260
+ "type": type,
261
+ "usage_type": usage_type,
262
+ },
263
+ endpoint_list_params.EndpointListParams,
264
+ ),
265
+ ),
266
+ cast_to=EndpointListResponse,
267
+ )
268
+
269
+ def delete(
270
+ self,
271
+ endpoint_id: str,
272
+ *,
273
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
274
+ # The extra values given here take precedence over values defined on the client or passed to this method.
275
+ extra_headers: Headers | None = None,
276
+ extra_query: Query | None = None,
277
+ extra_body: Body | None = None,
278
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
279
+ ) -> None:
280
+ """Permanently deletes an endpoint.
281
+
282
+ This action cannot be undone.
283
+
284
+ Args:
285
+ extra_headers: Send extra headers
286
+
287
+ extra_query: Add additional query parameters to the request
288
+
289
+ extra_body: Add additional JSON properties to the request
290
+
291
+ timeout: Override the client-level default timeout for this request, in seconds
292
+ """
293
+ if not endpoint_id:
294
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
295
+ extra_headers = {"Accept": "*/*", **(extra_headers or {})}
296
+ return self._delete(
297
+ f"/endpoints/{endpoint_id}",
298
+ options=make_request_options(
299
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
300
+ ),
301
+ cast_to=NoneType,
302
+ )
303
+
304
+ def list_avzones(
305
+ self,
306
+ *,
307
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
308
+ # The extra values given here take precedence over values defined on the client or passed to this method.
309
+ extra_headers: Headers | None = None,
310
+ extra_query: Query | None = None,
311
+ extra_body: Body | None = None,
312
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
313
+ ) -> EndpointListAvzonesResponse:
314
+ """List all available availability zones."""
315
+ return self._get(
316
+ "/clusters/availability-zones",
317
+ options=make_request_options(
318
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
319
+ ),
320
+ cast_to=EndpointListAvzonesResponse,
321
+ )
322
+
323
+
324
+ class AsyncEndpointsResource(AsyncAPIResource):
325
+ @cached_property
326
+ def with_raw_response(self) -> AsyncEndpointsResourceWithRawResponse:
327
+ """
328
+ This property can be used as a prefix for any HTTP method call to return
329
+ the raw response object instead of the parsed content.
330
+
331
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
332
+ """
333
+ return AsyncEndpointsResourceWithRawResponse(self)
334
+
335
+ @cached_property
336
+ def with_streaming_response(self) -> AsyncEndpointsResourceWithStreamingResponse:
337
+ """
338
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
339
+
340
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
341
+ """
342
+ return AsyncEndpointsResourceWithStreamingResponse(self)
343
+
344
+ async def create(
345
+ self,
346
+ *,
347
+ autoscaling: AutoscalingParam,
348
+ hardware: str,
349
+ model: str,
350
+ availability_zone: str | Omit = omit,
351
+ disable_prompt_cache: bool | Omit = omit,
352
+ disable_speculative_decoding: bool | Omit = omit,
353
+ display_name: str | Omit = omit,
354
+ inactive_timeout: Optional[int] | Omit = omit,
355
+ state: Literal["STARTED", "STOPPED"] | Omit = omit,
356
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
357
+ # The extra values given here take precedence over values defined on the client or passed to this method.
358
+ extra_headers: Headers | None = None,
359
+ extra_query: Query | None = None,
360
+ extra_body: Body | None = None,
361
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
362
+ ) -> DedicatedEndpoint:
363
+ """Creates a new dedicated endpoint for serving models.
364
+
365
+ The endpoint will
366
+ automatically start after creation. You can deploy any supported model on
367
+ hardware configurations that meet the model's requirements.
368
+
369
+ Args:
370
+ autoscaling: Configuration for automatic scaling of the endpoint
371
+
372
+ hardware: The hardware configuration to use for this endpoint
373
+
374
+ model: The model to deploy on this endpoint
375
+
376
+ availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
377
+
378
+ disable_prompt_cache: Whether to disable the prompt cache for this endpoint
379
+
380
+ disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
381
+
382
+ display_name: A human-readable name for the endpoint
383
+
384
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
385
+ automatically stopped. Set to null, omit or set to 0 to disable automatic
386
+ timeout.
387
+
388
+ state: The desired state of the endpoint
389
+
390
+ extra_headers: Send extra headers
391
+
392
+ extra_query: Add additional query parameters to the request
393
+
394
+ extra_body: Add additional JSON properties to the request
395
+
396
+ timeout: Override the client-level default timeout for this request, in seconds
397
+ """
398
+ return await self._post(
399
+ "/endpoints",
400
+ body=await async_maybe_transform(
401
+ {
402
+ "autoscaling": autoscaling,
403
+ "hardware": hardware,
404
+ "model": model,
405
+ "availability_zone": availability_zone,
406
+ "disable_prompt_cache": disable_prompt_cache,
407
+ "disable_speculative_decoding": disable_speculative_decoding,
408
+ "display_name": display_name,
409
+ "inactive_timeout": inactive_timeout,
410
+ "state": state,
411
+ },
412
+ endpoint_create_params.EndpointCreateParams,
413
+ ),
414
+ options=make_request_options(
415
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
416
+ ),
417
+ cast_to=DedicatedEndpoint,
418
+ )
419
+
420
+ async def retrieve(
421
+ self,
422
+ endpoint_id: str,
423
+ *,
424
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
425
+ # The extra values given here take precedence over values defined on the client or passed to this method.
426
+ extra_headers: Headers | None = None,
427
+ extra_query: Query | None = None,
428
+ extra_body: Body | None = None,
429
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
430
+ ) -> DedicatedEndpoint:
431
+ """
432
+ Retrieves details about a specific endpoint, including its current state,
433
+ configuration, and scaling settings.
434
+
435
+ Args:
436
+ extra_headers: Send extra headers
437
+
438
+ extra_query: Add additional query parameters to the request
439
+
440
+ extra_body: Add additional JSON properties to the request
441
+
442
+ timeout: Override the client-level default timeout for this request, in seconds
443
+ """
444
+ if not endpoint_id:
445
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
446
+ return await self._get(
447
+ f"/endpoints/{endpoint_id}",
448
+ options=make_request_options(
449
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
450
+ ),
451
+ cast_to=DedicatedEndpoint,
452
+ )
453
+
454
+ async def update(
455
+ self,
456
+ endpoint_id: str,
457
+ *,
458
+ autoscaling: AutoscalingParam | Omit = omit,
459
+ display_name: str | Omit = omit,
460
+ inactive_timeout: Optional[int] | Omit = omit,
461
+ state: Literal["STARTED", "STOPPED"] | Omit = omit,
462
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
463
+ # The extra values given here take precedence over values defined on the client or passed to this method.
464
+ extra_headers: Headers | None = None,
465
+ extra_query: Query | None = None,
466
+ extra_body: Body | None = None,
467
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
468
+ ) -> DedicatedEndpoint:
469
+ """Updates an existing endpoint's configuration.
470
+
471
+ You can modify the display name,
472
+ autoscaling settings, or change the endpoint's state (start/stop).
473
+
474
+ Args:
475
+ autoscaling: New autoscaling configuration for the endpoint
476
+
477
+ display_name: A human-readable name for the endpoint
478
+
479
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
480
+ automatically stopped. Set to 0 to disable automatic timeout.
481
+
482
+ state: The desired state of the endpoint
483
+
484
+ extra_headers: Send extra headers
485
+
486
+ extra_query: Add additional query parameters to the request
487
+
488
+ extra_body: Add additional JSON properties to the request
489
+
490
+ timeout: Override the client-level default timeout for this request, in seconds
491
+ """
492
+ if not endpoint_id:
493
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
494
+ return await self._patch(
495
+ f"/endpoints/{endpoint_id}",
496
+ body=await async_maybe_transform(
497
+ {
498
+ "autoscaling": autoscaling,
499
+ "display_name": display_name,
500
+ "inactive_timeout": inactive_timeout,
501
+ "state": state,
502
+ },
503
+ endpoint_update_params.EndpointUpdateParams,
504
+ ),
505
+ options=make_request_options(
506
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
507
+ ),
508
+ cast_to=DedicatedEndpoint,
509
+ )
510
+
511
+ async def list(
512
+ self,
513
+ *,
514
+ mine: bool | Omit = omit,
515
+ type: Literal["dedicated", "serverless"] | Omit = omit,
516
+ usage_type: Literal["on-demand", "reserved"] | Omit = omit,
517
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
518
+ # The extra values given here take precedence over values defined on the client or passed to this method.
519
+ extra_headers: Headers | None = None,
520
+ extra_query: Query | None = None,
521
+ extra_body: Body | None = None,
522
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
523
+ ) -> EndpointListResponse:
524
+ """Returns a list of all endpoints associated with your account.
525
+
526
+ You can filter the
527
+ results by type (dedicated or serverless).
528
+
529
+ Args:
530
+ mine: If true, return only endpoints owned by the caller
531
+
532
+ type: Filter endpoints by type
533
+
534
+ usage_type: Filter endpoints by usage type
535
+
536
+ extra_headers: Send extra headers
537
+
538
+ extra_query: Add additional query parameters to the request
539
+
540
+ extra_body: Add additional JSON properties to the request
541
+
542
+ timeout: Override the client-level default timeout for this request, in seconds
543
+ """
544
+ return await self._get(
545
+ "/endpoints",
546
+ options=make_request_options(
547
+ extra_headers=extra_headers,
548
+ extra_query=extra_query,
549
+ extra_body=extra_body,
550
+ timeout=timeout,
551
+ query=await async_maybe_transform(
552
+ {
553
+ "mine": mine,
554
+ "type": type,
555
+ "usage_type": usage_type,
556
+ },
557
+ endpoint_list_params.EndpointListParams,
558
+ ),
559
+ ),
560
+ cast_to=EndpointListResponse,
561
+ )
562
+
563
+ async def delete(
564
+ self,
565
+ endpoint_id: str,
566
+ *,
567
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
568
+ # The extra values given here take precedence over values defined on the client or passed to this method.
569
+ extra_headers: Headers | None = None,
570
+ extra_query: Query | None = None,
571
+ extra_body: Body | None = None,
572
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
573
+ ) -> None:
574
+ """Permanently deletes an endpoint.
575
+
576
+ This action cannot be undone.
577
+
578
+ Args:
579
+ extra_headers: Send extra headers
580
+
581
+ extra_query: Add additional query parameters to the request
582
+
583
+ extra_body: Add additional JSON properties to the request
584
+
585
+ timeout: Override the client-level default timeout for this request, in seconds
586
+ """
587
+ if not endpoint_id:
588
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
589
+ extra_headers = {"Accept": "*/*", **(extra_headers or {})}
590
+ return await self._delete(
591
+ f"/endpoints/{endpoint_id}",
592
+ options=make_request_options(
593
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
594
+ ),
595
+ cast_to=NoneType,
596
+ )
597
+
598
+ async def list_avzones(
599
+ self,
600
+ *,
601
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
602
+ # The extra values given here take precedence over values defined on the client or passed to this method.
603
+ extra_headers: Headers | None = None,
604
+ extra_query: Query | None = None,
605
+ extra_body: Body | None = None,
606
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
607
+ ) -> EndpointListAvzonesResponse:
608
+ """List all available availability zones."""
609
+ return await self._get(
610
+ "/clusters/availability-zones",
611
+ options=make_request_options(
612
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
613
+ ),
614
+ cast_to=EndpointListAvzonesResponse,
615
+ )
616
+
617
+
618
+ class EndpointsResourceWithRawResponse:
619
+ def __init__(self, endpoints: EndpointsResource) -> None:
620
+ self._endpoints = endpoints
621
+
622
+ self.create = to_raw_response_wrapper(
623
+ endpoints.create,
624
+ )
625
+ self.retrieve = to_raw_response_wrapper(
626
+ endpoints.retrieve,
627
+ )
628
+ self.update = to_raw_response_wrapper(
629
+ endpoints.update,
630
+ )
631
+ self.list = to_raw_response_wrapper(
632
+ endpoints.list,
633
+ )
634
+ self.delete = to_raw_response_wrapper(
635
+ endpoints.delete,
636
+ )
637
+ self.list_avzones = to_raw_response_wrapper(
638
+ endpoints.list_avzones,
639
+ )
640
+
641
+
642
+ class AsyncEndpointsResourceWithRawResponse:
643
+ def __init__(self, endpoints: AsyncEndpointsResource) -> None:
644
+ self._endpoints = endpoints
645
+
646
+ self.create = async_to_raw_response_wrapper(
647
+ endpoints.create,
648
+ )
649
+ self.retrieve = async_to_raw_response_wrapper(
650
+ endpoints.retrieve,
651
+ )
652
+ self.update = async_to_raw_response_wrapper(
653
+ endpoints.update,
654
+ )
655
+ self.list = async_to_raw_response_wrapper(
656
+ endpoints.list,
657
+ )
658
+ self.delete = async_to_raw_response_wrapper(
659
+ endpoints.delete,
660
+ )
661
+ self.list_avzones = async_to_raw_response_wrapper(
662
+ endpoints.list_avzones,
663
+ )
664
+
665
+
666
+ class EndpointsResourceWithStreamingResponse:
667
+ def __init__(self, endpoints: EndpointsResource) -> None:
668
+ self._endpoints = endpoints
669
+
670
+ self.create = to_streamed_response_wrapper(
671
+ endpoints.create,
672
+ )
673
+ self.retrieve = to_streamed_response_wrapper(
674
+ endpoints.retrieve,
675
+ )
676
+ self.update = to_streamed_response_wrapper(
677
+ endpoints.update,
678
+ )
679
+ self.list = to_streamed_response_wrapper(
680
+ endpoints.list,
681
+ )
682
+ self.delete = to_streamed_response_wrapper(
683
+ endpoints.delete,
684
+ )
685
+ self.list_avzones = to_streamed_response_wrapper(
686
+ endpoints.list_avzones,
687
+ )
688
+
689
+
690
+ class AsyncEndpointsResourceWithStreamingResponse:
691
+ def __init__(self, endpoints: AsyncEndpointsResource) -> None:
692
+ self._endpoints = endpoints
693
+
694
+ self.create = async_to_streamed_response_wrapper(
695
+ endpoints.create,
696
+ )
697
+ self.retrieve = async_to_streamed_response_wrapper(
698
+ endpoints.retrieve,
699
+ )
700
+ self.update = async_to_streamed_response_wrapper(
701
+ endpoints.update,
702
+ )
703
+ self.list = async_to_streamed_response_wrapper(
704
+ endpoints.list,
705
+ )
706
+ self.delete = async_to_streamed_response_wrapper(
707
+ endpoints.delete,
708
+ )
709
+ self.list_avzones = async_to_streamed_response_wrapper(
710
+ endpoints.list_avzones,
711
+ )