together 1.5.35__py3-none-any.whl → 2.0.0a6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. together/__init__.py +101 -114
  2. together/_base_client.py +1995 -0
  3. together/_client.py +1033 -0
  4. together/_compat.py +219 -0
  5. together/_constants.py +14 -0
  6. together/_exceptions.py +108 -0
  7. together/_files.py +123 -0
  8. together/_models.py +857 -0
  9. together/_qs.py +150 -0
  10. together/_resource.py +43 -0
  11. together/_response.py +830 -0
  12. together/_streaming.py +370 -0
  13. together/_types.py +260 -0
  14. together/_utils/__init__.py +64 -0
  15. together/_utils/_compat.py +45 -0
  16. together/_utils/_datetime_parse.py +136 -0
  17. together/_utils/_logs.py +25 -0
  18. together/_utils/_proxy.py +65 -0
  19. together/_utils/_reflection.py +42 -0
  20. together/_utils/_resources_proxy.py +24 -0
  21. together/_utils/_streams.py +12 -0
  22. together/_utils/_sync.py +58 -0
  23. together/_utils/_transform.py +457 -0
  24. together/_utils/_typing.py +156 -0
  25. together/_utils/_utils.py +421 -0
  26. together/_version.py +4 -0
  27. together/lib/.keep +4 -0
  28. together/lib/__init__.py +23 -0
  29. together/{cli → lib/cli}/api/endpoints.py +66 -84
  30. together/{cli/api/evaluation.py → lib/cli/api/evals.py} +152 -43
  31. together/{cli → lib/cli}/api/files.py +20 -17
  32. together/{cli/api/finetune.py → lib/cli/api/fine_tuning.py} +116 -172
  33. together/{cli → lib/cli}/api/models.py +34 -27
  34. together/lib/cli/api/utils.py +50 -0
  35. together/{cli → lib/cli}/cli.py +16 -26
  36. together/{constants.py → lib/constants.py} +11 -24
  37. together/lib/resources/__init__.py +11 -0
  38. together/lib/resources/files.py +999 -0
  39. together/lib/resources/fine_tuning.py +280 -0
  40. together/lib/resources/models.py +35 -0
  41. together/lib/types/__init__.py +13 -0
  42. together/lib/types/error.py +9 -0
  43. together/lib/types/fine_tuning.py +397 -0
  44. together/{utils → lib/utils}/__init__.py +6 -14
  45. together/{utils → lib/utils}/_log.py +11 -16
  46. together/{utils → lib/utils}/files.py +90 -288
  47. together/lib/utils/serializer.py +10 -0
  48. together/{utils → lib/utils}/tools.py +19 -55
  49. together/resources/__init__.py +225 -39
  50. together/resources/audio/__init__.py +72 -48
  51. together/resources/audio/audio.py +198 -0
  52. together/resources/audio/speech.py +574 -128
  53. together/resources/audio/transcriptions.py +247 -261
  54. together/resources/audio/translations.py +221 -241
  55. together/resources/audio/voices.py +111 -41
  56. together/resources/batches.py +417 -0
  57. together/resources/chat/__init__.py +30 -21
  58. together/resources/chat/chat.py +102 -0
  59. together/resources/chat/completions.py +1063 -263
  60. together/resources/code_interpreter/__init__.py +33 -0
  61. together/resources/code_interpreter/code_interpreter.py +258 -0
  62. together/resources/code_interpreter/sessions.py +135 -0
  63. together/resources/completions.py +884 -225
  64. together/resources/embeddings.py +172 -68
  65. together/resources/endpoints.py +589 -490
  66. together/resources/evals.py +452 -0
  67. together/resources/files.py +397 -129
  68. together/resources/fine_tuning.py +1033 -0
  69. together/resources/hardware.py +181 -0
  70. together/resources/images.py +258 -104
  71. together/resources/jobs.py +214 -0
  72. together/resources/models.py +223 -193
  73. together/resources/rerank.py +190 -92
  74. together/resources/videos.py +286 -214
  75. together/types/__init__.py +66 -167
  76. together/types/audio/__init__.py +10 -0
  77. together/types/audio/speech_create_params.py +75 -0
  78. together/types/audio/transcription_create_params.py +54 -0
  79. together/types/audio/transcription_create_response.py +111 -0
  80. together/types/audio/translation_create_params.py +40 -0
  81. together/types/audio/translation_create_response.py +70 -0
  82. together/types/audio/voice_list_response.py +23 -0
  83. together/types/audio_speech_stream_chunk.py +16 -0
  84. together/types/autoscaling.py +13 -0
  85. together/types/autoscaling_param.py +15 -0
  86. together/types/batch_create_params.py +24 -0
  87. together/types/batch_create_response.py +14 -0
  88. together/types/batch_job.py +45 -0
  89. together/types/batch_list_response.py +10 -0
  90. together/types/chat/__init__.py +18 -0
  91. together/types/chat/chat_completion.py +60 -0
  92. together/types/chat/chat_completion_chunk.py +61 -0
  93. together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
  94. together/types/chat/chat_completion_structured_message_text_param.py +13 -0
  95. together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
  96. together/types/chat/chat_completion_usage.py +13 -0
  97. together/types/chat/chat_completion_warning.py +9 -0
  98. together/types/chat/completion_create_params.py +329 -0
  99. together/types/code_interpreter/__init__.py +5 -0
  100. together/types/code_interpreter/session_list_response.py +31 -0
  101. together/types/code_interpreter_execute_params.py +45 -0
  102. together/types/completion.py +42 -0
  103. together/types/completion_chunk.py +66 -0
  104. together/types/completion_create_params.py +138 -0
  105. together/types/dedicated_endpoint.py +44 -0
  106. together/types/embedding.py +24 -0
  107. together/types/embedding_create_params.py +31 -0
  108. together/types/endpoint_create_params.py +43 -0
  109. together/types/endpoint_list_avzones_response.py +11 -0
  110. together/types/endpoint_list_params.py +18 -0
  111. together/types/endpoint_list_response.py +41 -0
  112. together/types/endpoint_update_params.py +27 -0
  113. together/types/eval_create_params.py +263 -0
  114. together/types/eval_create_response.py +16 -0
  115. together/types/eval_list_params.py +21 -0
  116. together/types/eval_list_response.py +10 -0
  117. together/types/eval_status_response.py +100 -0
  118. together/types/evaluation_job.py +139 -0
  119. together/types/execute_response.py +108 -0
  120. together/types/file_delete_response.py +13 -0
  121. together/types/file_list.py +12 -0
  122. together/types/file_purpose.py +9 -0
  123. together/types/file_response.py +31 -0
  124. together/types/file_type.py +7 -0
  125. together/types/fine_tuning_cancel_response.py +194 -0
  126. together/types/fine_tuning_content_params.py +24 -0
  127. together/types/fine_tuning_delete_params.py +11 -0
  128. together/types/fine_tuning_delete_response.py +12 -0
  129. together/types/fine_tuning_list_checkpoints_response.py +21 -0
  130. together/types/fine_tuning_list_events_response.py +12 -0
  131. together/types/fine_tuning_list_response.py +199 -0
  132. together/types/finetune_event.py +41 -0
  133. together/types/finetune_event_type.py +33 -0
  134. together/types/finetune_response.py +177 -0
  135. together/types/hardware_list_params.py +16 -0
  136. together/types/hardware_list_response.py +58 -0
  137. together/types/image_data_b64.py +15 -0
  138. together/types/image_data_url.py +15 -0
  139. together/types/image_file.py +23 -0
  140. together/types/image_generate_params.py +85 -0
  141. together/types/job_list_response.py +47 -0
  142. together/types/job_retrieve_response.py +43 -0
  143. together/types/log_probs.py +18 -0
  144. together/types/model_list_response.py +10 -0
  145. together/types/model_object.py +42 -0
  146. together/types/model_upload_params.py +36 -0
  147. together/types/model_upload_response.py +23 -0
  148. together/types/rerank_create_params.py +36 -0
  149. together/types/rerank_create_response.py +36 -0
  150. together/types/tool_choice.py +23 -0
  151. together/types/tool_choice_param.py +23 -0
  152. together/types/tools_param.py +23 -0
  153. together/types/training_method_dpo.py +22 -0
  154. together/types/training_method_sft.py +18 -0
  155. together/types/video_create_params.py +86 -0
  156. together/types/video_create_response.py +10 -0
  157. together/types/video_job.py +57 -0
  158. together-2.0.0a6.dist-info/METADATA +729 -0
  159. together-2.0.0a6.dist-info/RECORD +165 -0
  160. {together-1.5.35.dist-info → together-2.0.0a6.dist-info}/WHEEL +1 -1
  161. together-2.0.0a6.dist-info/entry_points.txt +2 -0
  162. {together-1.5.35.dist-info → together-2.0.0a6.dist-info}/licenses/LICENSE +1 -1
  163. together/abstract/api_requestor.py +0 -770
  164. together/cli/api/chat.py +0 -298
  165. together/cli/api/completions.py +0 -119
  166. together/cli/api/images.py +0 -93
  167. together/cli/api/utils.py +0 -139
  168. together/client.py +0 -186
  169. together/error.py +0 -194
  170. together/filemanager.py +0 -635
  171. together/legacy/__init__.py +0 -0
  172. together/legacy/base.py +0 -27
  173. together/legacy/complete.py +0 -93
  174. together/legacy/embeddings.py +0 -27
  175. together/legacy/files.py +0 -146
  176. together/legacy/finetune.py +0 -177
  177. together/legacy/images.py +0 -27
  178. together/legacy/models.py +0 -44
  179. together/resources/batch.py +0 -165
  180. together/resources/code_interpreter.py +0 -82
  181. together/resources/evaluation.py +0 -808
  182. together/resources/finetune.py +0 -1388
  183. together/together_response.py +0 -50
  184. together/types/abstract.py +0 -26
  185. together/types/audio_speech.py +0 -311
  186. together/types/batch.py +0 -54
  187. together/types/chat_completions.py +0 -210
  188. together/types/code_interpreter.py +0 -57
  189. together/types/common.py +0 -67
  190. together/types/completions.py +0 -107
  191. together/types/embeddings.py +0 -35
  192. together/types/endpoints.py +0 -123
  193. together/types/error.py +0 -16
  194. together/types/evaluation.py +0 -93
  195. together/types/files.py +0 -93
  196. together/types/finetune.py +0 -465
  197. together/types/images.py +0 -42
  198. together/types/models.py +0 -96
  199. together/types/rerank.py +0 -43
  200. together/types/videos.py +0 -69
  201. together/utils/api_helpers.py +0 -124
  202. together/version.py +0 -6
  203. together-1.5.35.dist-info/METADATA +0 -583
  204. together-1.5.35.dist-info/RECORD +0 -77
  205. together-1.5.35.dist-info/entry_points.txt +0 -3
  206. /together/{abstract → lib/cli}/__init__.py +0 -0
  207. /together/{cli → lib/cli/api}/__init__.py +0 -0
  208. /together/{cli/api/__init__.py → py.typed} +0 -0
@@ -1,612 +1,711 @@
1
- from __future__ import annotations
2
-
3
- import warnings
4
- from typing import Dict, List, Literal, Optional, Union
5
-
6
- from together.abstract import api_requestor
7
- from together.together_response import TogetherResponse
8
- from together.types import TogetherClient, TogetherRequest
9
- from together.types.endpoints import DedicatedEndpoint, HardwareWithStatus, ListEndpoint
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
10
2
 
3
+ from __future__ import annotations
11
4
 
12
- class Endpoints:
13
- def __init__(self, client: TogetherClient) -> None:
14
- self._client = client
15
-
16
- def list(
17
- self,
18
- type: Optional[Literal["dedicated", "serverless"]] = None,
19
- usage_type: Optional[Literal["on-demand", "reserved"]] = None,
20
- mine: Optional[bool] = None,
21
- ) -> List[ListEndpoint]:
5
+ from typing import Optional
6
+ from typing_extensions import Literal
7
+
8
+ import httpx
9
+
10
+ from ..types import endpoint_list_params, endpoint_create_params, endpoint_update_params
11
+ from .._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
12
+ from .._utils import maybe_transform, async_maybe_transform
13
+ from .._compat import cached_property
14
+ from .._resource import SyncAPIResource, AsyncAPIResource
15
+ from .._response import (
16
+ to_raw_response_wrapper,
17
+ to_streamed_response_wrapper,
18
+ async_to_raw_response_wrapper,
19
+ async_to_streamed_response_wrapper,
20
+ )
21
+ from .._base_client import make_request_options
22
+ from ..types.autoscaling_param import AutoscalingParam
23
+ from ..types.dedicated_endpoint import DedicatedEndpoint
24
+ from ..types.endpoint_list_response import EndpointListResponse
25
+ from ..types.endpoint_list_avzones_response import EndpointListAvzonesResponse
26
+
27
+ __all__ = ["EndpointsResource", "AsyncEndpointsResource"]
28
+
29
+
30
+ class EndpointsResource(SyncAPIResource):
31
+ @cached_property
32
+ def with_raw_response(self) -> EndpointsResourceWithRawResponse:
22
33
  """
23
- List all endpoints, can be filtered by endpoint type and ownership.
24
-
25
- Args:
26
- type (str, optional): Filter endpoints by endpoint type ("dedicated" or "serverless"). Defaults to None.
27
- usage_type (str, optional): Filter endpoints by usage type ("on-demand" or "reserved"). Defaults to None.
28
- mine (bool, optional): If True, return only endpoints owned by the caller. Defaults to None.
34
+ This property can be used as a prefix for any HTTP method call to return
35
+ the raw response object instead of the parsed content.
29
36
 
30
- Returns:
31
- List[ListEndpoint]: List of endpoint objects
37
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
32
38
  """
33
- requestor = api_requestor.APIRequestor(
34
- client=self._client,
35
- )
36
-
37
- params: Dict[
38
- str,
39
- Union[
40
- Literal["dedicated", "serverless"],
41
- Literal["on-demand", "reserved"],
42
- bool,
43
- ],
44
- ] = {}
45
- if type is not None:
46
- params["type"] = type
47
- if usage_type is not None:
48
- params["usage_type"] = usage_type
49
- if mine is not None:
50
- params["mine"] = mine
51
-
52
- response, _, _ = requestor.request(
53
- options=TogetherRequest(
54
- method="GET",
55
- url="endpoints",
56
- params=params,
57
- ),
58
- stream=False,
59
- )
39
+ return EndpointsResourceWithRawResponse(self)
60
40
 
61
- response.data = response.data["data"]
62
-
63
- assert isinstance(response, TogetherResponse)
64
- assert isinstance(response.data, list)
41
+ @cached_property
42
+ def with_streaming_response(self) -> EndpointsResourceWithStreamingResponse:
43
+ """
44
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
65
45
 
66
- return [ListEndpoint(**endpoint) for endpoint in response.data]
46
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
47
+ """
48
+ return EndpointsResourceWithStreamingResponse(self)
67
49
 
68
50
  def create(
69
51
  self,
70
52
  *,
71
- model: str,
53
+ autoscaling: AutoscalingParam,
72
54
  hardware: str,
73
- min_replicas: int,
74
- max_replicas: int,
75
- display_name: Optional[str] = None,
76
- disable_prompt_cache: bool = True,
77
- disable_speculative_decoding: bool = True,
78
- state: Literal["STARTED", "STOPPED"] = "STARTED",
79
- inactive_timeout: Optional[int] = None,
80
- availability_zone: Optional[str] = None,
55
+ model: str,
56
+ availability_zone: str | Omit = omit,
57
+ disable_prompt_cache: bool | Omit = omit,
58
+ disable_speculative_decoding: bool | Omit = omit,
59
+ display_name: str | Omit = omit,
60
+ inactive_timeout: Optional[int] | Omit = omit,
61
+ state: Literal["STARTED", "STOPPED"] | Omit = omit,
62
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
63
+ # The extra values given here take precedence over values defined on the client or passed to this method.
64
+ extra_headers: Headers | None = None,
65
+ extra_query: Query | None = None,
66
+ extra_body: Body | None = None,
67
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
81
68
  ) -> DedicatedEndpoint:
82
- """
83
- Create a new dedicated endpoint.
69
+ """Creates a new dedicated endpoint for serving models.
70
+
71
+ The endpoint will
72
+ automatically start after creation. You can deploy any supported model on
73
+ hardware configurations that meet the model's requirements.
84
74
 
85
75
  Args:
86
- model (str): The model to deploy on this endpoint
87
- hardware (str): The hardware configuration to use for this endpoint
88
- min_replicas (int): The minimum number of replicas to maintain
89
- max_replicas (int): The maximum number of replicas to scale up to
90
- display_name (str, optional): A human-readable name for the endpoint
91
- disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
92
- disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
93
- state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
94
- inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
95
- availability_zone (str, optional): Start endpoint in specified availability zone (e.g., us-central-4b).
96
-
97
- Returns:
98
- DedicatedEndpoint: Object containing endpoint information
99
- """
100
- if disable_prompt_cache:
101
- warnings.warn(
102
- "The 'disable_prompt_cache' parameter (CLI flag: '--no-prompt-cache') is deprecated and will be removed in a future version.",
103
- stacklevel=2,
104
- )
105
-
106
- requestor = api_requestor.APIRequestor(
107
- client=self._client,
108
- )
109
-
110
- data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
111
- "model": model,
112
- "hardware": hardware,
113
- "autoscaling": {
114
- "min_replicas": min_replicas,
115
- "max_replicas": max_replicas,
116
- },
117
- "disable_prompt_cache": disable_prompt_cache,
118
- "disable_speculative_decoding": disable_speculative_decoding,
119
- "state": state,
120
- }
121
-
122
- if display_name is not None:
123
- data["display_name"] = display_name
124
-
125
- if inactive_timeout is not None:
126
- data["inactive_timeout"] = inactive_timeout
127
-
128
- if availability_zone is not None:
129
- data["availability_zone"] = availability_zone
130
-
131
- response, _, _ = requestor.request(
132
- options=TogetherRequest(
133
- method="POST",
134
- url="endpoints",
135
- params=data,
136
- ),
137
- stream=False,
138
- )
76
+ autoscaling: Configuration for automatic scaling of the endpoint
139
77
 
140
- assert isinstance(response, TogetherResponse)
78
+ hardware: The hardware configuration to use for this endpoint
141
79
 
142
- return DedicatedEndpoint(**response.data)
80
+ model: The model to deploy on this endpoint
143
81
 
144
- def get(self, endpoint_id: str) -> DedicatedEndpoint:
145
- """
146
- Get details of a specific endpoint.
82
+ availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
147
83
 
148
- Args:
149
- endpoint_id (str): ID of the endpoint to retrieve
84
+ disable_prompt_cache: Whether to disable the prompt cache for this endpoint
150
85
 
151
- Returns:
152
- DedicatedEndpoint: Object containing endpoint information
153
- """
154
- requestor = api_requestor.APIRequestor(
155
- client=self._client,
156
- )
86
+ disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
157
87
 
158
- response, _, _ = requestor.request(
159
- options=TogetherRequest(
160
- method="GET",
161
- url=f"endpoints/{endpoint_id}",
162
- ),
163
- stream=False,
164
- )
88
+ display_name: A human-readable name for the endpoint
165
89
 
166
- assert isinstance(response, TogetherResponse)
90
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
91
+ automatically stopped. Set to null, omit or set to 0 to disable automatic
92
+ timeout.
167
93
 
168
- return DedicatedEndpoint(**response.data)
94
+ state: The desired state of the endpoint
169
95
 
170
- def delete(self, endpoint_id: str) -> None:
171
- """
172
- Delete a specific endpoint.
96
+ extra_headers: Send extra headers
173
97
 
174
- Args:
175
- endpoint_id (str): ID of the endpoint to delete
176
- """
177
- requestor = api_requestor.APIRequestor(
178
- client=self._client,
179
- )
98
+ extra_query: Add additional query parameters to the request
180
99
 
181
- requestor.request(
182
- options=TogetherRequest(
183
- method="DELETE",
184
- url=f"endpoints/{endpoint_id}",
100
+ extra_body: Add additional JSON properties to the request
101
+
102
+ timeout: Override the client-level default timeout for this request, in seconds
103
+ """
104
+ return self._post(
105
+ "/endpoints",
106
+ body=maybe_transform(
107
+ {
108
+ "autoscaling": autoscaling,
109
+ "hardware": hardware,
110
+ "model": model,
111
+ "availability_zone": availability_zone,
112
+ "disable_prompt_cache": disable_prompt_cache,
113
+ "disable_speculative_decoding": disable_speculative_decoding,
114
+ "display_name": display_name,
115
+ "inactive_timeout": inactive_timeout,
116
+ "state": state,
117
+ },
118
+ endpoint_create_params.EndpointCreateParams,
185
119
  ),
186
- stream=False,
120
+ options=make_request_options(
121
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
122
+ ),
123
+ cast_to=DedicatedEndpoint,
187
124
  )
188
125
 
189
- def update(
126
+ def retrieve(
190
127
  self,
191
128
  endpoint_id: str,
192
129
  *,
193
- min_replicas: Optional[int] = None,
194
- max_replicas: Optional[int] = None,
195
- state: Optional[Literal["STARTED", "STOPPED"]] = None,
196
- display_name: Optional[str] = None,
197
- inactive_timeout: Optional[int] = None,
130
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
131
+ # The extra values given here take precedence over values defined on the client or passed to this method.
132
+ extra_headers: Headers | None = None,
133
+ extra_query: Query | None = None,
134
+ extra_body: Body | None = None,
135
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
198
136
  ) -> DedicatedEndpoint:
199
137
  """
200
- Update an endpoint's configuration.
138
+ Retrieves details about a specific endpoint, including its current state,
139
+ configuration, and scaling settings.
201
140
 
202
141
  Args:
203
- endpoint_id (str): ID of the endpoint to update
204
- min_replicas (int, optional): The minimum number of replicas to maintain
205
- max_replicas (int, optional): The maximum number of replicas to scale up to
206
- state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
207
- display_name (str, optional): A human-readable name for the endpoint
208
- inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
209
-
210
- Returns:
211
- DedicatedEndpoint: Object containing endpoint information
142
+ extra_headers: Send extra headers
143
+
144
+ extra_query: Add additional query parameters to the request
145
+
146
+ extra_body: Add additional JSON properties to the request
147
+
148
+ timeout: Override the client-level default timeout for this request, in seconds
212
149
  """
213
- requestor = api_requestor.APIRequestor(
214
- client=self._client,
215
- )
216
-
217
- data: Dict[str, Union[str, Dict[str, int], int]] = {}
218
-
219
- if min_replicas is not None or max_replicas is not None:
220
- current_min = min_replicas
221
- current_max = max_replicas
222
- if current_min is None or current_max is None:
223
- # Get current values if only one is specified
224
- current = self.get(endpoint_id=endpoint_id)
225
- current_min = current_min or current.autoscaling.min_replicas
226
- current_max = current_max or current.autoscaling.max_replicas
227
- data["autoscaling"] = {
228
- "min_replicas": current_min,
229
- "max_replicas": current_max,
230
- }
231
-
232
- if state is not None:
233
- data["state"] = state
234
-
235
- if display_name is not None:
236
- data["display_name"] = display_name
237
-
238
- if inactive_timeout is not None:
239
- data["inactive_timeout"] = inactive_timeout
240
-
241
- response, _, _ = requestor.request(
242
- options=TogetherRequest(
243
- method="PATCH",
244
- url=f"endpoints/{endpoint_id}",
245
- params=data,
150
+ if not endpoint_id:
151
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
152
+ return self._get(
153
+ f"/endpoints/{endpoint_id}",
154
+ options=make_request_options(
155
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
246
156
  ),
247
- stream=False,
157
+ cast_to=DedicatedEndpoint,
248
158
  )
249
159
 
250
- assert isinstance(response, TogetherResponse)
251
-
252
- return DedicatedEndpoint(**response.data)
160
+ def update(
161
+ self,
162
+ endpoint_id: str,
163
+ *,
164
+ autoscaling: AutoscalingParam | Omit = omit,
165
+ display_name: str | Omit = omit,
166
+ inactive_timeout: Optional[int] | Omit = omit,
167
+ state: Literal["STARTED", "STOPPED"] | Omit = omit,
168
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
169
+ # The extra values given here take precedence over values defined on the client or passed to this method.
170
+ extra_headers: Headers | None = None,
171
+ extra_query: Query | None = None,
172
+ extra_body: Body | None = None,
173
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
174
+ ) -> DedicatedEndpoint:
175
+ """Updates an existing endpoint's configuration.
253
176
 
254
- def list_hardware(self, model: Optional[str] = None) -> List[HardwareWithStatus]:
255
- """
256
- List available hardware configurations.
177
+ You can modify the display name,
178
+ autoscaling settings, or change the endpoint's state (start/stop).
257
179
 
258
180
  Args:
259
- model (str, optional): Filter hardware configurations by model compatibility. When provided,
260
- the response includes availability status for each compatible configuration.
181
+ autoscaling: New autoscaling configuration for the endpoint
261
182
 
262
- Returns:
263
- List[HardwareWithStatus]: List of hardware configurations with their status
264
- """
265
- requestor = api_requestor.APIRequestor(
266
- client=self._client,
267
- )
183
+ display_name: A human-readable name for the endpoint
268
184
 
269
- params = {}
270
- if model is not None:
271
- params["model"] = model
185
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
186
+ automatically stopped. Set to 0 to disable automatic timeout.
272
187
 
273
- response, _, _ = requestor.request(
274
- options=TogetherRequest(
275
- method="GET",
276
- url="hardware",
277
- params=params,
278
- ),
279
- stream=False,
280
- )
188
+ state: The desired state of the endpoint
281
189
 
282
- assert isinstance(response, TogetherResponse)
283
- assert isinstance(response.data, dict)
284
- assert isinstance(response.data["data"], list)
190
+ extra_headers: Send extra headers
285
191
 
286
- return [HardwareWithStatus(**item) for item in response.data["data"]]
192
+ extra_query: Add additional query parameters to the request
287
193
 
288
- def list_avzones(self) -> List[str]:
289
- """
290
- List all available availability zones.
194
+ extra_body: Add additional JSON properties to the request
291
195
 
292
- Returns:
293
- List[str]: List of unique availability zones
196
+ timeout: Override the client-level default timeout for this request, in seconds
294
197
  """
295
- requestor = api_requestor.APIRequestor(
296
- client=self._client,
297
- )
298
-
299
- response, _, _ = requestor.request(
300
- options=TogetherRequest(
301
- method="GET",
302
- url="clusters/availability-zones",
198
+ if not endpoint_id:
199
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
200
+ return self._patch(
201
+ f"/endpoints/{endpoint_id}",
202
+ body=maybe_transform(
203
+ {
204
+ "autoscaling": autoscaling,
205
+ "display_name": display_name,
206
+ "inactive_timeout": inactive_timeout,
207
+ "state": state,
208
+ },
209
+ endpoint_update_params.EndpointUpdateParams,
210
+ ),
211
+ options=make_request_options(
212
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
303
213
  ),
304
- stream=False,
214
+ cast_to=DedicatedEndpoint,
305
215
  )
306
216
 
307
- assert isinstance(response, TogetherResponse)
308
- assert isinstance(response.data, dict)
309
- assert isinstance(response.data["avzones"], list)
217
+ def list(
218
+ self,
219
+ *,
220
+ mine: bool | Omit = omit,
221
+ type: Literal["dedicated", "serverless"] | Omit = omit,
222
+ usage_type: Literal["on-demand", "reserved"] | Omit = omit,
223
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
224
+ # The extra values given here take precedence over values defined on the client or passed to this method.
225
+ extra_headers: Headers | None = None,
226
+ extra_query: Query | None = None,
227
+ extra_body: Body | None = None,
228
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
229
+ ) -> EndpointListResponse:
230
+ """Returns a list of all endpoints associated with your account.
231
+
232
+ You can filter the
233
+ results by type (dedicated or serverless).
234
+
235
+ Args:
236
+ mine: If true, return only endpoints owned by the caller
237
+
238
+ type: Filter endpoints by type
310
239
 
311
- return response.data["avzones"]
240
+ usage_type: Filter endpoints by usage type
312
241
 
242
+ extra_headers: Send extra headers
313
243
 
314
- class AsyncEndpoints:
315
- def __init__(self, client: TogetherClient) -> None:
316
- self._client = client
244
+ extra_query: Add additional query parameters to the request
317
245
 
318
- async def list(
319
- self,
320
- type: Optional[Literal["dedicated", "serverless"]] = None,
321
- usage_type: Optional[Literal["on-demand", "reserved"]] = None,
322
- mine: Optional[bool] = None,
323
- ) -> List[ListEndpoint]:
246
+ extra_body: Add additional JSON properties to the request
247
+
248
+ timeout: Override the client-level default timeout for this request, in seconds
324
249
  """
325
- List all endpoints, can be filtered by type and ownership.
250
+ return self._get(
251
+ "/endpoints",
252
+ options=make_request_options(
253
+ extra_headers=extra_headers,
254
+ extra_query=extra_query,
255
+ extra_body=extra_body,
256
+ timeout=timeout,
257
+ query=maybe_transform(
258
+ {
259
+ "mine": mine,
260
+ "type": type,
261
+ "usage_type": usage_type,
262
+ },
263
+ endpoint_list_params.EndpointListParams,
264
+ ),
265
+ ),
266
+ cast_to=EndpointListResponse,
267
+ )
268
+
269
+ def delete(
270
+ self,
271
+ endpoint_id: str,
272
+ *,
273
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
274
+ # The extra values given here take precedence over values defined on the client or passed to this method.
275
+ extra_headers: Headers | None = None,
276
+ extra_query: Query | None = None,
277
+ extra_body: Body | None = None,
278
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
279
+ ) -> None:
280
+ """Permanently deletes an endpoint.
281
+
282
+ This action cannot be undone.
326
283
 
327
284
  Args:
328
- type (str, optional): Filter endpoints by type ("dedicated" or "serverless"). Defaults to None.
329
- usage_type (str, optional): Filter endpoints by usage type ("on-demand" or "reserved"). Defaults to None.
330
- mine (bool, optional): If True, return only endpoints owned by the caller. Defaults to None.
285
+ extra_headers: Send extra headers
331
286
 
332
- Returns:
333
- List[ListEndpoint]: List of endpoint objects
287
+ extra_query: Add additional query parameters to the request
288
+
289
+ extra_body: Add additional JSON properties to the request
290
+
291
+ timeout: Override the client-level default timeout for this request, in seconds
334
292
  """
335
- requestor = api_requestor.APIRequestor(
336
- client=self._client,
337
- )
338
-
339
- params: Dict[
340
- str,
341
- Union[
342
- Literal["dedicated", "serverless"],
343
- Literal["on-demand", "reserved"],
344
- bool,
345
- ],
346
- ] = {}
347
- if type is not None:
348
- params["type"] = type
349
- if usage_type is not None:
350
- params["usage_type"] = usage_type
351
- if mine is not None:
352
- params["mine"] = mine
353
-
354
- response, _, _ = await requestor.arequest(
355
- options=TogetherRequest(
356
- method="GET",
357
- url="endpoints",
358
- params=params,
293
+ if not endpoint_id:
294
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
295
+ extra_headers = {"Accept": "*/*", **(extra_headers or {})}
296
+ return self._delete(
297
+ f"/endpoints/{endpoint_id}",
298
+ options=make_request_options(
299
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
359
300
  ),
360
- stream=False,
301
+ cast_to=NoneType,
361
302
  )
362
303
 
363
- assert isinstance(response, TogetherResponse)
364
- assert isinstance(response.data, list)
304
+ def list_avzones(
305
+ self,
306
+ *,
307
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
308
+ # The extra values given here take precedence over values defined on the client or passed to this method.
309
+ extra_headers: Headers | None = None,
310
+ extra_query: Query | None = None,
311
+ extra_body: Body | None = None,
312
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
313
+ ) -> EndpointListAvzonesResponse:
314
+ """List all available availability zones."""
315
+ return self._get(
316
+ "/clusters/availability-zones",
317
+ options=make_request_options(
318
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
319
+ ),
320
+ cast_to=EndpointListAvzonesResponse,
321
+ )
322
+
323
+
324
+ class AsyncEndpointsResource(AsyncAPIResource):
325
+ @cached_property
326
+ def with_raw_response(self) -> AsyncEndpointsResourceWithRawResponse:
327
+ """
328
+ This property can be used as a prefix for any HTTP method call to return
329
+ the raw response object instead of the parsed content.
330
+
331
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
332
+ """
333
+ return AsyncEndpointsResourceWithRawResponse(self)
365
334
 
366
- return [ListEndpoint(**endpoint) for endpoint in response.data]
335
+ @cached_property
336
+ def with_streaming_response(self) -> AsyncEndpointsResourceWithStreamingResponse:
337
+ """
338
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
339
+
340
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
341
+ """
342
+ return AsyncEndpointsResourceWithStreamingResponse(self)
367
343
 
368
344
  async def create(
369
345
  self,
370
346
  *,
371
- model: str,
347
+ autoscaling: AutoscalingParam,
372
348
  hardware: str,
373
- min_replicas: int,
374
- max_replicas: int,
375
- display_name: Optional[str] = None,
376
- disable_prompt_cache: bool = True,
377
- disable_speculative_decoding: bool = True,
378
- state: Literal["STARTED", "STOPPED"] = "STARTED",
379
- inactive_timeout: Optional[int] = None,
380
- availability_zone: Optional[str] = None,
349
+ model: str,
350
+ availability_zone: str | Omit = omit,
351
+ disable_prompt_cache: bool | Omit = omit,
352
+ disable_speculative_decoding: bool | Omit = omit,
353
+ display_name: str | Omit = omit,
354
+ inactive_timeout: Optional[int] | Omit = omit,
355
+ state: Literal["STARTED", "STOPPED"] | Omit = omit,
356
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
357
+ # The extra values given here take precedence over values defined on the client or passed to this method.
358
+ extra_headers: Headers | None = None,
359
+ extra_query: Query | None = None,
360
+ extra_body: Body | None = None,
361
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
381
362
  ) -> DedicatedEndpoint:
382
- """
383
- Create a new dedicated endpoint.
363
+ """Creates a new dedicated endpoint for serving models.
364
+
365
+ The endpoint will
366
+ automatically start after creation. You can deploy any supported model on
367
+ hardware configurations that meet the model's requirements.
384
368
 
385
369
  Args:
386
- model (str): The model to deploy on this endpoint
387
- hardware (str): The hardware configuration to use for this endpoint
388
- min_replicas (int): The minimum number of replicas to maintain
389
- max_replicas (int): The maximum number of replicas to scale up to
390
- display_name (str, optional): A human-readable name for the endpoint
391
- disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
392
- disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
393
- state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
394
- inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
395
-
396
- Returns:
397
- DedicatedEndpoint: Object containing endpoint information
398
- """
399
- if disable_prompt_cache:
400
- warnings.warn(
401
- "The 'disable_prompt_cache' parameter (CLI flag: '--no-prompt-cache') is deprecated and will be removed in a future version.",
402
- stacklevel=2,
403
- )
404
-
405
- requestor = api_requestor.APIRequestor(
406
- client=self._client,
407
- )
408
-
409
- data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
410
- "model": model,
411
- "hardware": hardware,
412
- "autoscaling": {
413
- "min_replicas": min_replicas,
414
- "max_replicas": max_replicas,
415
- },
416
- "disable_prompt_cache": disable_prompt_cache,
417
- "disable_speculative_decoding": disable_speculative_decoding,
418
- "state": state,
419
- }
420
-
421
- if display_name is not None:
422
- data["display_name"] = display_name
423
-
424
- if inactive_timeout is not None:
425
- data["inactive_timeout"] = inactive_timeout
426
-
427
- if availability_zone is not None:
428
- data["availability_zone"] = availability_zone
429
-
430
- response, _, _ = await requestor.arequest(
431
- options=TogetherRequest(
432
- method="POST",
433
- url="endpoints",
434
- params=data,
435
- ),
436
- stream=False,
437
- )
370
+ autoscaling: Configuration for automatic scaling of the endpoint
438
371
 
439
- assert isinstance(response, TogetherResponse)
372
+ hardware: The hardware configuration to use for this endpoint
440
373
 
441
- return DedicatedEndpoint(**response.data)
374
+ model: The model to deploy on this endpoint
442
375
 
443
- async def get(self, endpoint_id: str) -> DedicatedEndpoint:
444
- """
445
- Get details of a specific endpoint.
376
+ availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
446
377
 
447
- Args:
448
- endpoint_id (str): ID of the endpoint to retrieve
378
+ disable_prompt_cache: Whether to disable the prompt cache for this endpoint
449
379
 
450
- Returns:
451
- DedicatedEndpoint: Object containing endpoint information
452
- """
453
- requestor = api_requestor.APIRequestor(
454
- client=self._client,
455
- )
380
+ disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
456
381
 
457
- response, _, _ = await requestor.arequest(
458
- options=TogetherRequest(
459
- method="GET",
460
- url=f"endpoints/{endpoint_id}",
461
- ),
462
- stream=False,
463
- )
382
+ display_name: A human-readable name for the endpoint
464
383
 
465
- assert isinstance(response, TogetherResponse)
384
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
385
+ automatically stopped. Set to null, omit or set to 0 to disable automatic
386
+ timeout.
466
387
 
467
- return DedicatedEndpoint(**response.data)
388
+ state: The desired state of the endpoint
468
389
 
469
- async def delete(self, endpoint_id: str) -> None:
470
- """
471
- Delete a specific endpoint.
390
+ extra_headers: Send extra headers
472
391
 
473
- Args:
474
- endpoint_id (str): ID of the endpoint to delete
392
+ extra_query: Add additional query parameters to the request
393
+
394
+ extra_body: Add additional JSON properties to the request
395
+
396
+ timeout: Override the client-level default timeout for this request, in seconds
475
397
  """
476
- requestor = api_requestor.APIRequestor(
477
- client=self._client,
398
+ return await self._post(
399
+ "/endpoints",
400
+ body=await async_maybe_transform(
401
+ {
402
+ "autoscaling": autoscaling,
403
+ "hardware": hardware,
404
+ "model": model,
405
+ "availability_zone": availability_zone,
406
+ "disable_prompt_cache": disable_prompt_cache,
407
+ "disable_speculative_decoding": disable_speculative_decoding,
408
+ "display_name": display_name,
409
+ "inactive_timeout": inactive_timeout,
410
+ "state": state,
411
+ },
412
+ endpoint_create_params.EndpointCreateParams,
413
+ ),
414
+ options=make_request_options(
415
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
416
+ ),
417
+ cast_to=DedicatedEndpoint,
478
418
  )
479
419
 
480
- await requestor.arequest(
481
- options=TogetherRequest(
482
- method="DELETE",
483
- url=f"endpoints/{endpoint_id}",
420
+ async def retrieve(
421
+ self,
422
+ endpoint_id: str,
423
+ *,
424
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
425
+ # The extra values given here take precedence over values defined on the client or passed to this method.
426
+ extra_headers: Headers | None = None,
427
+ extra_query: Query | None = None,
428
+ extra_body: Body | None = None,
429
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
430
+ ) -> DedicatedEndpoint:
431
+ """
432
+ Retrieves details about a specific endpoint, including its current state,
433
+ configuration, and scaling settings.
434
+
435
+ Args:
436
+ extra_headers: Send extra headers
437
+
438
+ extra_query: Add additional query parameters to the request
439
+
440
+ extra_body: Add additional JSON properties to the request
441
+
442
+ timeout: Override the client-level default timeout for this request, in seconds
443
+ """
444
+ if not endpoint_id:
445
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
446
+ return await self._get(
447
+ f"/endpoints/{endpoint_id}",
448
+ options=make_request_options(
449
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
484
450
  ),
485
- stream=False,
451
+ cast_to=DedicatedEndpoint,
486
452
  )
487
453
 
488
454
  async def update(
489
455
  self,
490
456
  endpoint_id: str,
491
457
  *,
492
- min_replicas: Optional[int] = None,
493
- max_replicas: Optional[int] = None,
494
- state: Optional[Literal["STARTED", "STOPPED"]] = None,
495
- display_name: Optional[str] = None,
496
- inactive_timeout: Optional[int] = None,
458
+ autoscaling: AutoscalingParam | Omit = omit,
459
+ display_name: str | Omit = omit,
460
+ inactive_timeout: Optional[int] | Omit = omit,
461
+ state: Literal["STARTED", "STOPPED"] | Omit = omit,
462
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
463
+ # The extra values given here take precedence over values defined on the client or passed to this method.
464
+ extra_headers: Headers | None = None,
465
+ extra_query: Query | None = None,
466
+ extra_body: Body | None = None,
467
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
497
468
  ) -> DedicatedEndpoint:
498
- """
499
- Update an endpoint's configuration.
469
+ """Updates an existing endpoint's configuration.
470
+
471
+ You can modify the display name,
472
+ autoscaling settings, or change the endpoint's state (start/stop).
500
473
 
501
474
  Args:
502
- endpoint_id (str): ID of the endpoint to update
503
- min_replicas (int, optional): The minimum number of replicas to maintain
504
- max_replicas (int, optional): The maximum number of replicas to scale up to
505
- state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
506
- display_name (str, optional): A human-readable name for the endpoint
507
- inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
508
-
509
- Returns:
510
- DedicatedEndpoint: Object containing endpoint information
475
+ autoscaling: New autoscaling configuration for the endpoint
476
+
477
+ display_name: A human-readable name for the endpoint
478
+
479
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
480
+ automatically stopped. Set to 0 to disable automatic timeout.
481
+
482
+ state: The desired state of the endpoint
483
+
484
+ extra_headers: Send extra headers
485
+
486
+ extra_query: Add additional query parameters to the request
487
+
488
+ extra_body: Add additional JSON properties to the request
489
+
490
+ timeout: Override the client-level default timeout for this request, in seconds
511
491
  """
512
- requestor = api_requestor.APIRequestor(
513
- client=self._client,
514
- )
515
-
516
- data: Dict[str, Union[str, Dict[str, int], int]] = {}
517
-
518
- if min_replicas is not None or max_replicas is not None:
519
- current_min = min_replicas
520
- current_max = max_replicas
521
- if current_min is None or current_max is None:
522
- # Get current values if only one is specified
523
- current = await self.get(endpoint_id=endpoint_id)
524
- current_min = current_min or current.autoscaling.min_replicas
525
- current_max = current_max or current.autoscaling.max_replicas
526
- data["autoscaling"] = {
527
- "min_replicas": current_min,
528
- "max_replicas": current_max,
529
- }
530
-
531
- if state is not None:
532
- data["state"] = state
533
-
534
- if display_name is not None:
535
- data["display_name"] = display_name
536
-
537
- if inactive_timeout is not None:
538
- data["inactive_timeout"] = inactive_timeout
539
-
540
- response, _, _ = await requestor.arequest(
541
- options=TogetherRequest(
542
- method="PATCH",
543
- url=f"endpoints/{endpoint_id}",
544
- params=data,
492
+ if not endpoint_id:
493
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
494
+ return await self._patch(
495
+ f"/endpoints/{endpoint_id}",
496
+ body=await async_maybe_transform(
497
+ {
498
+ "autoscaling": autoscaling,
499
+ "display_name": display_name,
500
+ "inactive_timeout": inactive_timeout,
501
+ "state": state,
502
+ },
503
+ endpoint_update_params.EndpointUpdateParams,
504
+ ),
505
+ options=make_request_options(
506
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
545
507
  ),
546
- stream=False,
508
+ cast_to=DedicatedEndpoint,
547
509
  )
548
510
 
549
- assert isinstance(response, TogetherResponse)
511
+ async def list(
512
+ self,
513
+ *,
514
+ mine: bool | Omit = omit,
515
+ type: Literal["dedicated", "serverless"] | Omit = omit,
516
+ usage_type: Literal["on-demand", "reserved"] | Omit = omit,
517
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
518
+ # The extra values given here take precedence over values defined on the client or passed to this method.
519
+ extra_headers: Headers | None = None,
520
+ extra_query: Query | None = None,
521
+ extra_body: Body | None = None,
522
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
523
+ ) -> EndpointListResponse:
524
+ """Returns a list of all endpoints associated with your account.
525
+
526
+ You can filter the
527
+ results by type (dedicated or serverless).
528
+
529
+ Args:
530
+ mine: If true, return only endpoints owned by the caller
531
+
532
+ type: Filter endpoints by type
533
+
534
+ usage_type: Filter endpoints by usage type
535
+
536
+ extra_headers: Send extra headers
537
+
538
+ extra_query: Add additional query parameters to the request
550
539
 
551
- return DedicatedEndpoint(**response.data)
540
+ extra_body: Add additional JSON properties to the request
552
541
 
553
- async def list_hardware(
554
- self, model: Optional[str] = None
555
- ) -> List[HardwareWithStatus]:
542
+ timeout: Override the client-level default timeout for this request, in seconds
556
543
  """
557
- List available hardware configurations.
544
+ return await self._get(
545
+ "/endpoints",
546
+ options=make_request_options(
547
+ extra_headers=extra_headers,
548
+ extra_query=extra_query,
549
+ extra_body=extra_body,
550
+ timeout=timeout,
551
+ query=await async_maybe_transform(
552
+ {
553
+ "mine": mine,
554
+ "type": type,
555
+ "usage_type": usage_type,
556
+ },
557
+ endpoint_list_params.EndpointListParams,
558
+ ),
559
+ ),
560
+ cast_to=EndpointListResponse,
561
+ )
562
+
563
+ async def delete(
564
+ self,
565
+ endpoint_id: str,
566
+ *,
567
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
568
+ # The extra values given here take precedence over values defined on the client or passed to this method.
569
+ extra_headers: Headers | None = None,
570
+ extra_query: Query | None = None,
571
+ extra_body: Body | None = None,
572
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
573
+ ) -> None:
574
+ """Permanently deletes an endpoint.
575
+
576
+ This action cannot be undone.
558
577
 
559
578
  Args:
560
- model (str, optional): Filter hardware configurations by model compatibility. When provided,
561
- the response includes availability status for each compatible configuration.
579
+ extra_headers: Send extra headers
580
+
581
+ extra_query: Add additional query parameters to the request
582
+
583
+ extra_body: Add additional JSON properties to the request
562
584
 
563
- Returns:
564
- List[HardwareWithStatus]: List of hardware configurations with their status
585
+ timeout: Override the client-level default timeout for this request, in seconds
565
586
  """
566
- requestor = api_requestor.APIRequestor(
567
- client=self._client,
587
+ if not endpoint_id:
588
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
589
+ extra_headers = {"Accept": "*/*", **(extra_headers or {})}
590
+ return await self._delete(
591
+ f"/endpoints/{endpoint_id}",
592
+ options=make_request_options(
593
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
594
+ ),
595
+ cast_to=NoneType,
568
596
  )
569
597
 
570
- params = {}
571
- if model is not None:
572
- params["model"] = model
573
-
574
- response, _, _ = await requestor.arequest(
575
- options=TogetherRequest(
576
- method="GET",
577
- url="hardware",
578
- params=params,
598
+ async def list_avzones(
599
+ self,
600
+ *,
601
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
602
+ # The extra values given here take precedence over values defined on the client or passed to this method.
603
+ extra_headers: Headers | None = None,
604
+ extra_query: Query | None = None,
605
+ extra_body: Body | None = None,
606
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
607
+ ) -> EndpointListAvzonesResponse:
608
+ """List all available availability zones."""
609
+ return await self._get(
610
+ "/clusters/availability-zones",
611
+ options=make_request_options(
612
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
579
613
  ),
580
- stream=False,
614
+ cast_to=EndpointListAvzonesResponse,
581
615
  )
582
616
 
583
- assert isinstance(response, TogetherResponse)
584
- assert isinstance(response.data, dict)
585
- assert isinstance(response.data["data"], list)
586
617
 
587
- return [HardwareWithStatus(**item) for item in response.data["data"]]
618
+ class EndpointsResourceWithRawResponse:
619
+ def __init__(self, endpoints: EndpointsResource) -> None:
620
+ self._endpoints = endpoints
588
621
 
589
- async def list_avzones(self) -> List[str]:
590
- """
591
- List all availability zones.
622
+ self.create = to_raw_response_wrapper(
623
+ endpoints.create,
624
+ )
625
+ self.retrieve = to_raw_response_wrapper(
626
+ endpoints.retrieve,
627
+ )
628
+ self.update = to_raw_response_wrapper(
629
+ endpoints.update,
630
+ )
631
+ self.list = to_raw_response_wrapper(
632
+ endpoints.list,
633
+ )
634
+ self.delete = to_raw_response_wrapper(
635
+ endpoints.delete,
636
+ )
637
+ self.list_avzones = to_raw_response_wrapper(
638
+ endpoints.list_avzones,
639
+ )
592
640
 
593
- Returns:
594
- List[str]: List of unique availability zones
595
- """
596
- requestor = api_requestor.APIRequestor(
597
- client=self._client,
641
+
642
+ class AsyncEndpointsResourceWithRawResponse:
643
+ def __init__(self, endpoints: AsyncEndpointsResource) -> None:
644
+ self._endpoints = endpoints
645
+
646
+ self.create = async_to_raw_response_wrapper(
647
+ endpoints.create,
648
+ )
649
+ self.retrieve = async_to_raw_response_wrapper(
650
+ endpoints.retrieve,
651
+ )
652
+ self.update = async_to_raw_response_wrapper(
653
+ endpoints.update,
654
+ )
655
+ self.list = async_to_raw_response_wrapper(
656
+ endpoints.list,
657
+ )
658
+ self.delete = async_to_raw_response_wrapper(
659
+ endpoints.delete,
660
+ )
661
+ self.list_avzones = async_to_raw_response_wrapper(
662
+ endpoints.list_avzones,
598
663
  )
599
664
 
600
- response, _, _ = await requestor.arequest(
601
- options=TogetherRequest(
602
- method="GET",
603
- url="clusters/availability-zones",
604
- ),
605
- stream=False,
665
+
666
+ class EndpointsResourceWithStreamingResponse:
667
+ def __init__(self, endpoints: EndpointsResource) -> None:
668
+ self._endpoints = endpoints
669
+
670
+ self.create = to_streamed_response_wrapper(
671
+ endpoints.create,
672
+ )
673
+ self.retrieve = to_streamed_response_wrapper(
674
+ endpoints.retrieve,
675
+ )
676
+ self.update = to_streamed_response_wrapper(
677
+ endpoints.update,
678
+ )
679
+ self.list = to_streamed_response_wrapper(
680
+ endpoints.list,
681
+ )
682
+ self.delete = to_streamed_response_wrapper(
683
+ endpoints.delete,
684
+ )
685
+ self.list_avzones = to_streamed_response_wrapper(
686
+ endpoints.list_avzones,
606
687
  )
607
688
 
608
- assert isinstance(response, TogetherResponse)
609
- assert isinstance(response.data, dict)
610
- assert isinstance(response.data["avzones"], list)
611
689
 
612
- return response.data["avzones"]
690
+ class AsyncEndpointsResourceWithStreamingResponse:
691
+ def __init__(self, endpoints: AsyncEndpointsResource) -> None:
692
+ self._endpoints = endpoints
693
+
694
+ self.create = async_to_streamed_response_wrapper(
695
+ endpoints.create,
696
+ )
697
+ self.retrieve = async_to_streamed_response_wrapper(
698
+ endpoints.retrieve,
699
+ )
700
+ self.update = async_to_streamed_response_wrapper(
701
+ endpoints.update,
702
+ )
703
+ self.list = async_to_streamed_response_wrapper(
704
+ endpoints.list,
705
+ )
706
+ self.delete = async_to_streamed_response_wrapper(
707
+ endpoints.delete,
708
+ )
709
+ self.list_avzones = async_to_streamed_response_wrapper(
710
+ endpoints.list_avzones,
711
+ )