together 1.5.34__py3-none-any.whl → 2.0.0a6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. together/__init__.py +101 -114
  2. together/_base_client.py +1995 -0
  3. together/_client.py +1033 -0
  4. together/_compat.py +219 -0
  5. together/_constants.py +14 -0
  6. together/_exceptions.py +108 -0
  7. together/_files.py +123 -0
  8. together/_models.py +857 -0
  9. together/_qs.py +150 -0
  10. together/_resource.py +43 -0
  11. together/_response.py +830 -0
  12. together/_streaming.py +370 -0
  13. together/_types.py +260 -0
  14. together/_utils/__init__.py +64 -0
  15. together/_utils/_compat.py +45 -0
  16. together/_utils/_datetime_parse.py +136 -0
  17. together/_utils/_logs.py +25 -0
  18. together/_utils/_proxy.py +65 -0
  19. together/_utils/_reflection.py +42 -0
  20. together/_utils/_resources_proxy.py +24 -0
  21. together/_utils/_streams.py +12 -0
  22. together/_utils/_sync.py +58 -0
  23. together/_utils/_transform.py +457 -0
  24. together/_utils/_typing.py +156 -0
  25. together/_utils/_utils.py +421 -0
  26. together/_version.py +4 -0
  27. together/lib/.keep +4 -0
  28. together/lib/__init__.py +23 -0
  29. together/{cli → lib/cli}/api/endpoints.py +65 -81
  30. together/{cli/api/evaluation.py → lib/cli/api/evals.py} +152 -43
  31. together/{cli → lib/cli}/api/files.py +20 -17
  32. together/{cli/api/finetune.py → lib/cli/api/fine_tuning.py} +116 -172
  33. together/{cli → lib/cli}/api/models.py +34 -27
  34. together/lib/cli/api/utils.py +50 -0
  35. together/{cli → lib/cli}/cli.py +16 -26
  36. together/{constants.py → lib/constants.py} +11 -24
  37. together/lib/resources/__init__.py +11 -0
  38. together/lib/resources/files.py +999 -0
  39. together/lib/resources/fine_tuning.py +280 -0
  40. together/lib/resources/models.py +35 -0
  41. together/lib/types/__init__.py +13 -0
  42. together/lib/types/error.py +9 -0
  43. together/lib/types/fine_tuning.py +397 -0
  44. together/{utils → lib/utils}/__init__.py +6 -14
  45. together/{utils → lib/utils}/_log.py +11 -16
  46. together/{utils → lib/utils}/files.py +90 -288
  47. together/lib/utils/serializer.py +10 -0
  48. together/{utils → lib/utils}/tools.py +19 -55
  49. together/resources/__init__.py +225 -39
  50. together/resources/audio/__init__.py +72 -48
  51. together/resources/audio/audio.py +198 -0
  52. together/resources/audio/speech.py +574 -128
  53. together/resources/audio/transcriptions.py +247 -261
  54. together/resources/audio/translations.py +221 -241
  55. together/resources/audio/voices.py +111 -41
  56. together/resources/batches.py +417 -0
  57. together/resources/chat/__init__.py +30 -21
  58. together/resources/chat/chat.py +102 -0
  59. together/resources/chat/completions.py +1063 -263
  60. together/resources/code_interpreter/__init__.py +33 -0
  61. together/resources/code_interpreter/code_interpreter.py +258 -0
  62. together/resources/code_interpreter/sessions.py +135 -0
  63. together/resources/completions.py +884 -225
  64. together/resources/embeddings.py +172 -68
  65. together/resources/endpoints.py +589 -477
  66. together/resources/evals.py +452 -0
  67. together/resources/files.py +397 -129
  68. together/resources/fine_tuning.py +1033 -0
  69. together/resources/hardware.py +181 -0
  70. together/resources/images.py +258 -104
  71. together/resources/jobs.py +214 -0
  72. together/resources/models.py +223 -193
  73. together/resources/rerank.py +190 -92
  74. together/resources/videos.py +286 -214
  75. together/types/__init__.py +66 -167
  76. together/types/audio/__init__.py +10 -0
  77. together/types/audio/speech_create_params.py +75 -0
  78. together/types/audio/transcription_create_params.py +54 -0
  79. together/types/audio/transcription_create_response.py +111 -0
  80. together/types/audio/translation_create_params.py +40 -0
  81. together/types/audio/translation_create_response.py +70 -0
  82. together/types/audio/voice_list_response.py +23 -0
  83. together/types/audio_speech_stream_chunk.py +16 -0
  84. together/types/autoscaling.py +13 -0
  85. together/types/autoscaling_param.py +15 -0
  86. together/types/batch_create_params.py +24 -0
  87. together/types/batch_create_response.py +14 -0
  88. together/types/batch_job.py +45 -0
  89. together/types/batch_list_response.py +10 -0
  90. together/types/chat/__init__.py +18 -0
  91. together/types/chat/chat_completion.py +60 -0
  92. together/types/chat/chat_completion_chunk.py +61 -0
  93. together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
  94. together/types/chat/chat_completion_structured_message_text_param.py +13 -0
  95. together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
  96. together/types/chat/chat_completion_usage.py +13 -0
  97. together/types/chat/chat_completion_warning.py +9 -0
  98. together/types/chat/completion_create_params.py +329 -0
  99. together/types/code_interpreter/__init__.py +5 -0
  100. together/types/code_interpreter/session_list_response.py +31 -0
  101. together/types/code_interpreter_execute_params.py +45 -0
  102. together/types/completion.py +42 -0
  103. together/types/completion_chunk.py +66 -0
  104. together/types/completion_create_params.py +138 -0
  105. together/types/dedicated_endpoint.py +44 -0
  106. together/types/embedding.py +24 -0
  107. together/types/embedding_create_params.py +31 -0
  108. together/types/endpoint_create_params.py +43 -0
  109. together/types/endpoint_list_avzones_response.py +11 -0
  110. together/types/endpoint_list_params.py +18 -0
  111. together/types/endpoint_list_response.py +41 -0
  112. together/types/endpoint_update_params.py +27 -0
  113. together/types/eval_create_params.py +263 -0
  114. together/types/eval_create_response.py +16 -0
  115. together/types/eval_list_params.py +21 -0
  116. together/types/eval_list_response.py +10 -0
  117. together/types/eval_status_response.py +100 -0
  118. together/types/evaluation_job.py +139 -0
  119. together/types/execute_response.py +108 -0
  120. together/types/file_delete_response.py +13 -0
  121. together/types/file_list.py +12 -0
  122. together/types/file_purpose.py +9 -0
  123. together/types/file_response.py +31 -0
  124. together/types/file_type.py +7 -0
  125. together/types/fine_tuning_cancel_response.py +194 -0
  126. together/types/fine_tuning_content_params.py +24 -0
  127. together/types/fine_tuning_delete_params.py +11 -0
  128. together/types/fine_tuning_delete_response.py +12 -0
  129. together/types/fine_tuning_list_checkpoints_response.py +21 -0
  130. together/types/fine_tuning_list_events_response.py +12 -0
  131. together/types/fine_tuning_list_response.py +199 -0
  132. together/types/finetune_event.py +41 -0
  133. together/types/finetune_event_type.py +33 -0
  134. together/types/finetune_response.py +177 -0
  135. together/types/hardware_list_params.py +16 -0
  136. together/types/hardware_list_response.py +58 -0
  137. together/types/image_data_b64.py +15 -0
  138. together/types/image_data_url.py +15 -0
  139. together/types/image_file.py +23 -0
  140. together/types/image_generate_params.py +85 -0
  141. together/types/job_list_response.py +47 -0
  142. together/types/job_retrieve_response.py +43 -0
  143. together/types/log_probs.py +18 -0
  144. together/types/model_list_response.py +10 -0
  145. together/types/model_object.py +42 -0
  146. together/types/model_upload_params.py +36 -0
  147. together/types/model_upload_response.py +23 -0
  148. together/types/rerank_create_params.py +36 -0
  149. together/types/rerank_create_response.py +36 -0
  150. together/types/tool_choice.py +23 -0
  151. together/types/tool_choice_param.py +23 -0
  152. together/types/tools_param.py +23 -0
  153. together/types/training_method_dpo.py +22 -0
  154. together/types/training_method_sft.py +18 -0
  155. together/types/video_create_params.py +86 -0
  156. together/types/video_create_response.py +10 -0
  157. together/types/video_job.py +57 -0
  158. together-2.0.0a6.dist-info/METADATA +729 -0
  159. together-2.0.0a6.dist-info/RECORD +165 -0
  160. {together-1.5.34.dist-info → together-2.0.0a6.dist-info}/WHEEL +1 -1
  161. together-2.0.0a6.dist-info/entry_points.txt +2 -0
  162. {together-1.5.34.dist-info → together-2.0.0a6.dist-info}/licenses/LICENSE +1 -1
  163. together/abstract/api_requestor.py +0 -770
  164. together/cli/api/chat.py +0 -298
  165. together/cli/api/completions.py +0 -119
  166. together/cli/api/images.py +0 -93
  167. together/cli/api/utils.py +0 -139
  168. together/client.py +0 -186
  169. together/error.py +0 -194
  170. together/filemanager.py +0 -635
  171. together/legacy/__init__.py +0 -0
  172. together/legacy/base.py +0 -27
  173. together/legacy/complete.py +0 -93
  174. together/legacy/embeddings.py +0 -27
  175. together/legacy/files.py +0 -146
  176. together/legacy/finetune.py +0 -177
  177. together/legacy/images.py +0 -27
  178. together/legacy/models.py +0 -44
  179. together/resources/batch.py +0 -165
  180. together/resources/code_interpreter.py +0 -82
  181. together/resources/evaluation.py +0 -808
  182. together/resources/finetune.py +0 -1388
  183. together/together_response.py +0 -50
  184. together/types/abstract.py +0 -26
  185. together/types/audio_speech.py +0 -311
  186. together/types/batch.py +0 -54
  187. together/types/chat_completions.py +0 -210
  188. together/types/code_interpreter.py +0 -57
  189. together/types/common.py +0 -67
  190. together/types/completions.py +0 -107
  191. together/types/embeddings.py +0 -35
  192. together/types/endpoints.py +0 -123
  193. together/types/error.py +0 -16
  194. together/types/evaluation.py +0 -93
  195. together/types/files.py +0 -93
  196. together/types/finetune.py +0 -464
  197. together/types/images.py +0 -42
  198. together/types/models.py +0 -96
  199. together/types/rerank.py +0 -43
  200. together/types/videos.py +0 -69
  201. together/utils/api_helpers.py +0 -124
  202. together/version.py +0 -6
  203. together-1.5.34.dist-info/METADATA +0 -583
  204. together-1.5.34.dist-info/RECORD +0 -77
  205. together-1.5.34.dist-info/entry_points.txt +0 -3
  206. /together/{abstract → lib/cli}/__init__.py +0 -0
  207. /together/{cli → lib/cli/api}/__init__.py +0 -0
  208. /together/{cli/api/__init__.py → py.typed} +0 -0
@@ -1,599 +1,711 @@
1
- from __future__ import annotations
2
-
3
- from typing import Dict, List, Literal, Optional, Union
4
-
5
- from together.abstract import api_requestor
6
- from together.together_response import TogetherResponse
7
- from together.types import TogetherClient, TogetherRequest
8
- from together.types.endpoints import DedicatedEndpoint, HardwareWithStatus, ListEndpoint
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
9
2
 
3
+ from __future__ import annotations
10
4
 
11
- class Endpoints:
12
- def __init__(self, client: TogetherClient) -> None:
13
- self._client = client
14
-
15
- def list(
16
- self,
17
- type: Optional[Literal["dedicated", "serverless"]] = None,
18
- usage_type: Optional[Literal["on-demand", "reserved"]] = None,
19
- mine: Optional[bool] = None,
20
- ) -> List[ListEndpoint]:
5
+ from typing import Optional
6
+ from typing_extensions import Literal
7
+
8
+ import httpx
9
+
10
+ from ..types import endpoint_list_params, endpoint_create_params, endpoint_update_params
11
+ from .._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
12
+ from .._utils import maybe_transform, async_maybe_transform
13
+ from .._compat import cached_property
14
+ from .._resource import SyncAPIResource, AsyncAPIResource
15
+ from .._response import (
16
+ to_raw_response_wrapper,
17
+ to_streamed_response_wrapper,
18
+ async_to_raw_response_wrapper,
19
+ async_to_streamed_response_wrapper,
20
+ )
21
+ from .._base_client import make_request_options
22
+ from ..types.autoscaling_param import AutoscalingParam
23
+ from ..types.dedicated_endpoint import DedicatedEndpoint
24
+ from ..types.endpoint_list_response import EndpointListResponse
25
+ from ..types.endpoint_list_avzones_response import EndpointListAvzonesResponse
26
+
27
+ __all__ = ["EndpointsResource", "AsyncEndpointsResource"]
28
+
29
+
30
+ class EndpointsResource(SyncAPIResource):
31
+ @cached_property
32
+ def with_raw_response(self) -> EndpointsResourceWithRawResponse:
21
33
  """
22
- List all endpoints, can be filtered by endpoint type and ownership.
23
-
24
- Args:
25
- type (str, optional): Filter endpoints by endpoint type ("dedicated" or "serverless"). Defaults to None.
26
- usage_type (str, optional): Filter endpoints by usage type ("on-demand" or "reserved"). Defaults to None.
27
- mine (bool, optional): If True, return only endpoints owned by the caller. Defaults to None.
34
+ This property can be used as a prefix for any HTTP method call to return
35
+ the raw response object instead of the parsed content.
28
36
 
29
- Returns:
30
- List[ListEndpoint]: List of endpoint objects
37
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
31
38
  """
32
- requestor = api_requestor.APIRequestor(
33
- client=self._client,
34
- )
35
-
36
- params: Dict[
37
- str,
38
- Union[
39
- Literal["dedicated", "serverless"],
40
- Literal["on-demand", "reserved"],
41
- bool,
42
- ],
43
- ] = {}
44
- if type is not None:
45
- params["type"] = type
46
- if usage_type is not None:
47
- params["usage_type"] = usage_type
48
- if mine is not None:
49
- params["mine"] = mine
50
-
51
- response, _, _ = requestor.request(
52
- options=TogetherRequest(
53
- method="GET",
54
- url="endpoints",
55
- params=params,
56
- ),
57
- stream=False,
58
- )
39
+ return EndpointsResourceWithRawResponse(self)
59
40
 
60
- response.data = response.data["data"]
61
-
62
- assert isinstance(response, TogetherResponse)
63
- assert isinstance(response.data, list)
41
+ @cached_property
42
+ def with_streaming_response(self) -> EndpointsResourceWithStreamingResponse:
43
+ """
44
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
64
45
 
65
- return [ListEndpoint(**endpoint) for endpoint in response.data]
46
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
47
+ """
48
+ return EndpointsResourceWithStreamingResponse(self)
66
49
 
67
50
  def create(
68
51
  self,
69
52
  *,
70
- model: str,
53
+ autoscaling: AutoscalingParam,
71
54
  hardware: str,
72
- min_replicas: int,
73
- max_replicas: int,
74
- display_name: Optional[str] = None,
75
- disable_prompt_cache: bool = True,
76
- disable_speculative_decoding: bool = True,
77
- state: Literal["STARTED", "STOPPED"] = "STARTED",
78
- inactive_timeout: Optional[int] = None,
79
- availability_zone: Optional[str] = None,
55
+ model: str,
56
+ availability_zone: str | Omit = omit,
57
+ disable_prompt_cache: bool | Omit = omit,
58
+ disable_speculative_decoding: bool | Omit = omit,
59
+ display_name: str | Omit = omit,
60
+ inactive_timeout: Optional[int] | Omit = omit,
61
+ state: Literal["STARTED", "STOPPED"] | Omit = omit,
62
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
63
+ # The extra values given here take precedence over values defined on the client or passed to this method.
64
+ extra_headers: Headers | None = None,
65
+ extra_query: Query | None = None,
66
+ extra_body: Body | None = None,
67
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
80
68
  ) -> DedicatedEndpoint:
81
- """
82
- Create a new dedicated endpoint.
69
+ """Creates a new dedicated endpoint for serving models.
70
+
71
+ The endpoint will
72
+ automatically start after creation. You can deploy any supported model on
73
+ hardware configurations that meet the model's requirements.
83
74
 
84
75
  Args:
85
- model (str): The model to deploy on this endpoint
86
- hardware (str): The hardware configuration to use for this endpoint
87
- min_replicas (int): The minimum number of replicas to maintain
88
- max_replicas (int): The maximum number of replicas to scale up to
89
- display_name (str, optional): A human-readable name for the endpoint
90
- disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
91
- disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
92
- state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
93
- inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
94
- availability_zone (str, optional): Start endpoint in specified availability zone (e.g., us-central-4b).
95
-
96
- Returns:
97
- DedicatedEndpoint: Object containing endpoint information
98
- """
99
- requestor = api_requestor.APIRequestor(
100
- client=self._client,
101
- )
102
-
103
- data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
104
- "model": model,
105
- "hardware": hardware,
106
- "autoscaling": {
107
- "min_replicas": min_replicas,
108
- "max_replicas": max_replicas,
109
- },
110
- "disable_prompt_cache": disable_prompt_cache,
111
- "disable_speculative_decoding": disable_speculative_decoding,
112
- "state": state,
113
- }
114
-
115
- if display_name is not None:
116
- data["display_name"] = display_name
117
-
118
- if inactive_timeout is not None:
119
- data["inactive_timeout"] = inactive_timeout
120
-
121
- if availability_zone is not None:
122
- data["availability_zone"] = availability_zone
123
-
124
- response, _, _ = requestor.request(
125
- options=TogetherRequest(
126
- method="POST",
127
- url="endpoints",
128
- params=data,
129
- ),
130
- stream=False,
131
- )
76
+ autoscaling: Configuration for automatic scaling of the endpoint
132
77
 
133
- assert isinstance(response, TogetherResponse)
78
+ hardware: The hardware configuration to use for this endpoint
134
79
 
135
- return DedicatedEndpoint(**response.data)
80
+ model: The model to deploy on this endpoint
136
81
 
137
- def get(self, endpoint_id: str) -> DedicatedEndpoint:
138
- """
139
- Get details of a specific endpoint.
82
+ availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
140
83
 
141
- Args:
142
- endpoint_id (str): ID of the endpoint to retrieve
84
+ disable_prompt_cache: Whether to disable the prompt cache for this endpoint
143
85
 
144
- Returns:
145
- DedicatedEndpoint: Object containing endpoint information
146
- """
147
- requestor = api_requestor.APIRequestor(
148
- client=self._client,
149
- )
86
+ disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
150
87
 
151
- response, _, _ = requestor.request(
152
- options=TogetherRequest(
153
- method="GET",
154
- url=f"endpoints/{endpoint_id}",
155
- ),
156
- stream=False,
157
- )
88
+ display_name: A human-readable name for the endpoint
158
89
 
159
- assert isinstance(response, TogetherResponse)
90
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
91
+ automatically stopped. Set to null, omit or set to 0 to disable automatic
92
+ timeout.
160
93
 
161
- return DedicatedEndpoint(**response.data)
94
+ state: The desired state of the endpoint
162
95
 
163
- def delete(self, endpoint_id: str) -> None:
164
- """
165
- Delete a specific endpoint.
96
+ extra_headers: Send extra headers
166
97
 
167
- Args:
168
- endpoint_id (str): ID of the endpoint to delete
169
- """
170
- requestor = api_requestor.APIRequestor(
171
- client=self._client,
172
- )
98
+ extra_query: Add additional query parameters to the request
173
99
 
174
- requestor.request(
175
- options=TogetherRequest(
176
- method="DELETE",
177
- url=f"endpoints/{endpoint_id}",
100
+ extra_body: Add additional JSON properties to the request
101
+
102
+ timeout: Override the client-level default timeout for this request, in seconds
103
+ """
104
+ return self._post(
105
+ "/endpoints",
106
+ body=maybe_transform(
107
+ {
108
+ "autoscaling": autoscaling,
109
+ "hardware": hardware,
110
+ "model": model,
111
+ "availability_zone": availability_zone,
112
+ "disable_prompt_cache": disable_prompt_cache,
113
+ "disable_speculative_decoding": disable_speculative_decoding,
114
+ "display_name": display_name,
115
+ "inactive_timeout": inactive_timeout,
116
+ "state": state,
117
+ },
118
+ endpoint_create_params.EndpointCreateParams,
178
119
  ),
179
- stream=False,
120
+ options=make_request_options(
121
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
122
+ ),
123
+ cast_to=DedicatedEndpoint,
180
124
  )
181
125
 
182
- def update(
126
+ def retrieve(
183
127
  self,
184
128
  endpoint_id: str,
185
129
  *,
186
- min_replicas: Optional[int] = None,
187
- max_replicas: Optional[int] = None,
188
- state: Optional[Literal["STARTED", "STOPPED"]] = None,
189
- display_name: Optional[str] = None,
190
- inactive_timeout: Optional[int] = None,
130
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
131
+ # The extra values given here take precedence over values defined on the client or passed to this method.
132
+ extra_headers: Headers | None = None,
133
+ extra_query: Query | None = None,
134
+ extra_body: Body | None = None,
135
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
191
136
  ) -> DedicatedEndpoint:
192
137
  """
193
- Update an endpoint's configuration.
138
+ Retrieves details about a specific endpoint, including its current state,
139
+ configuration, and scaling settings.
194
140
 
195
141
  Args:
196
- endpoint_id (str): ID of the endpoint to update
197
- min_replicas (int, optional): The minimum number of replicas to maintain
198
- max_replicas (int, optional): The maximum number of replicas to scale up to
199
- state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
200
- display_name (str, optional): A human-readable name for the endpoint
201
- inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
202
-
203
- Returns:
204
- DedicatedEndpoint: Object containing endpoint information
142
+ extra_headers: Send extra headers
143
+
144
+ extra_query: Add additional query parameters to the request
145
+
146
+ extra_body: Add additional JSON properties to the request
147
+
148
+ timeout: Override the client-level default timeout for this request, in seconds
205
149
  """
206
- requestor = api_requestor.APIRequestor(
207
- client=self._client,
208
- )
209
-
210
- data: Dict[str, Union[str, Dict[str, int], int]] = {}
211
-
212
- if min_replicas is not None or max_replicas is not None:
213
- current_min = min_replicas
214
- current_max = max_replicas
215
- if current_min is None or current_max is None:
216
- # Get current values if only one is specified
217
- current = self.get(endpoint_id=endpoint_id)
218
- current_min = current_min or current.autoscaling.min_replicas
219
- current_max = current_max or current.autoscaling.max_replicas
220
- data["autoscaling"] = {
221
- "min_replicas": current_min,
222
- "max_replicas": current_max,
223
- }
224
-
225
- if state is not None:
226
- data["state"] = state
227
-
228
- if display_name is not None:
229
- data["display_name"] = display_name
230
-
231
- if inactive_timeout is not None:
232
- data["inactive_timeout"] = inactive_timeout
233
-
234
- response, _, _ = requestor.request(
235
- options=TogetherRequest(
236
- method="PATCH",
237
- url=f"endpoints/{endpoint_id}",
238
- params=data,
150
+ if not endpoint_id:
151
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
152
+ return self._get(
153
+ f"/endpoints/{endpoint_id}",
154
+ options=make_request_options(
155
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
239
156
  ),
240
- stream=False,
157
+ cast_to=DedicatedEndpoint,
241
158
  )
242
159
 
243
- assert isinstance(response, TogetherResponse)
244
-
245
- return DedicatedEndpoint(**response.data)
160
+ def update(
161
+ self,
162
+ endpoint_id: str,
163
+ *,
164
+ autoscaling: AutoscalingParam | Omit = omit,
165
+ display_name: str | Omit = omit,
166
+ inactive_timeout: Optional[int] | Omit = omit,
167
+ state: Literal["STARTED", "STOPPED"] | Omit = omit,
168
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
169
+ # The extra values given here take precedence over values defined on the client or passed to this method.
170
+ extra_headers: Headers | None = None,
171
+ extra_query: Query | None = None,
172
+ extra_body: Body | None = None,
173
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
174
+ ) -> DedicatedEndpoint:
175
+ """Updates an existing endpoint's configuration.
246
176
 
247
- def list_hardware(self, model: Optional[str] = None) -> List[HardwareWithStatus]:
248
- """
249
- List available hardware configurations.
177
+ You can modify the display name,
178
+ autoscaling settings, or change the endpoint's state (start/stop).
250
179
 
251
180
  Args:
252
- model (str, optional): Filter hardware configurations by model compatibility. When provided,
253
- the response includes availability status for each compatible configuration.
181
+ autoscaling: New autoscaling configuration for the endpoint
254
182
 
255
- Returns:
256
- List[HardwareWithStatus]: List of hardware configurations with their status
257
- """
258
- requestor = api_requestor.APIRequestor(
259
- client=self._client,
260
- )
183
+ display_name: A human-readable name for the endpoint
261
184
 
262
- params = {}
263
- if model is not None:
264
- params["model"] = model
185
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
186
+ automatically stopped. Set to 0 to disable automatic timeout.
265
187
 
266
- response, _, _ = requestor.request(
267
- options=TogetherRequest(
268
- method="GET",
269
- url="hardware",
270
- params=params,
271
- ),
272
- stream=False,
273
- )
188
+ state: The desired state of the endpoint
274
189
 
275
- assert isinstance(response, TogetherResponse)
276
- assert isinstance(response.data, dict)
277
- assert isinstance(response.data["data"], list)
190
+ extra_headers: Send extra headers
278
191
 
279
- return [HardwareWithStatus(**item) for item in response.data["data"]]
192
+ extra_query: Add additional query parameters to the request
280
193
 
281
- def list_avzones(self) -> List[str]:
282
- """
283
- List all available availability zones.
194
+ extra_body: Add additional JSON properties to the request
284
195
 
285
- Returns:
286
- List[str]: List of unique availability zones
196
+ timeout: Override the client-level default timeout for this request, in seconds
287
197
  """
288
- requestor = api_requestor.APIRequestor(
289
- client=self._client,
290
- )
291
-
292
- response, _, _ = requestor.request(
293
- options=TogetherRequest(
294
- method="GET",
295
- url="clusters/availability-zones",
198
+ if not endpoint_id:
199
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
200
+ return self._patch(
201
+ f"/endpoints/{endpoint_id}",
202
+ body=maybe_transform(
203
+ {
204
+ "autoscaling": autoscaling,
205
+ "display_name": display_name,
206
+ "inactive_timeout": inactive_timeout,
207
+ "state": state,
208
+ },
209
+ endpoint_update_params.EndpointUpdateParams,
210
+ ),
211
+ options=make_request_options(
212
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
296
213
  ),
297
- stream=False,
214
+ cast_to=DedicatedEndpoint,
298
215
  )
299
216
 
300
- assert isinstance(response, TogetherResponse)
301
- assert isinstance(response.data, dict)
302
- assert isinstance(response.data["avzones"], list)
217
+ def list(
218
+ self,
219
+ *,
220
+ mine: bool | Omit = omit,
221
+ type: Literal["dedicated", "serverless"] | Omit = omit,
222
+ usage_type: Literal["on-demand", "reserved"] | Omit = omit,
223
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
224
+ # The extra values given here take precedence over values defined on the client or passed to this method.
225
+ extra_headers: Headers | None = None,
226
+ extra_query: Query | None = None,
227
+ extra_body: Body | None = None,
228
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
229
+ ) -> EndpointListResponse:
230
+ """Returns a list of all endpoints associated with your account.
231
+
232
+ You can filter the
233
+ results by type (dedicated or serverless).
234
+
235
+ Args:
236
+ mine: If true, return only endpoints owned by the caller
237
+
238
+ type: Filter endpoints by type
303
239
 
304
- return response.data["avzones"]
240
+ usage_type: Filter endpoints by usage type
305
241
 
242
+ extra_headers: Send extra headers
306
243
 
307
- class AsyncEndpoints:
308
- def __init__(self, client: TogetherClient) -> None:
309
- self._client = client
244
+ extra_query: Add additional query parameters to the request
310
245
 
311
- async def list(
312
- self,
313
- type: Optional[Literal["dedicated", "serverless"]] = None,
314
- usage_type: Optional[Literal["on-demand", "reserved"]] = None,
315
- mine: Optional[bool] = None,
316
- ) -> List[ListEndpoint]:
246
+ extra_body: Add additional JSON properties to the request
247
+
248
+ timeout: Override the client-level default timeout for this request, in seconds
317
249
  """
318
- List all endpoints, can be filtered by type and ownership.
250
+ return self._get(
251
+ "/endpoints",
252
+ options=make_request_options(
253
+ extra_headers=extra_headers,
254
+ extra_query=extra_query,
255
+ extra_body=extra_body,
256
+ timeout=timeout,
257
+ query=maybe_transform(
258
+ {
259
+ "mine": mine,
260
+ "type": type,
261
+ "usage_type": usage_type,
262
+ },
263
+ endpoint_list_params.EndpointListParams,
264
+ ),
265
+ ),
266
+ cast_to=EndpointListResponse,
267
+ )
268
+
269
+ def delete(
270
+ self,
271
+ endpoint_id: str,
272
+ *,
273
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
274
+ # The extra values given here take precedence over values defined on the client or passed to this method.
275
+ extra_headers: Headers | None = None,
276
+ extra_query: Query | None = None,
277
+ extra_body: Body | None = None,
278
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
279
+ ) -> None:
280
+ """Permanently deletes an endpoint.
281
+
282
+ This action cannot be undone.
319
283
 
320
284
  Args:
321
- type (str, optional): Filter endpoints by type ("dedicated" or "serverless"). Defaults to None.
322
- usage_type (str, optional): Filter endpoints by usage type ("on-demand" or "reserved"). Defaults to None.
323
- mine (bool, optional): If True, return only endpoints owned by the caller. Defaults to None.
285
+ extra_headers: Send extra headers
324
286
 
325
- Returns:
326
- List[ListEndpoint]: List of endpoint objects
287
+ extra_query: Add additional query parameters to the request
288
+
289
+ extra_body: Add additional JSON properties to the request
290
+
291
+ timeout: Override the client-level default timeout for this request, in seconds
327
292
  """
328
- requestor = api_requestor.APIRequestor(
329
- client=self._client,
330
- )
331
-
332
- params: Dict[
333
- str,
334
- Union[
335
- Literal["dedicated", "serverless"],
336
- Literal["on-demand", "reserved"],
337
- bool,
338
- ],
339
- ] = {}
340
- if type is not None:
341
- params["type"] = type
342
- if usage_type is not None:
343
- params["usage_type"] = usage_type
344
- if mine is not None:
345
- params["mine"] = mine
346
-
347
- response, _, _ = await requestor.arequest(
348
- options=TogetherRequest(
349
- method="GET",
350
- url="endpoints",
351
- params=params,
293
+ if not endpoint_id:
294
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
295
+ extra_headers = {"Accept": "*/*", **(extra_headers or {})}
296
+ return self._delete(
297
+ f"/endpoints/{endpoint_id}",
298
+ options=make_request_options(
299
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
352
300
  ),
353
- stream=False,
301
+ cast_to=NoneType,
354
302
  )
355
303
 
356
- assert isinstance(response, TogetherResponse)
357
- assert isinstance(response.data, list)
304
+ def list_avzones(
305
+ self,
306
+ *,
307
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
308
+ # The extra values given here take precedence over values defined on the client or passed to this method.
309
+ extra_headers: Headers | None = None,
310
+ extra_query: Query | None = None,
311
+ extra_body: Body | None = None,
312
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
313
+ ) -> EndpointListAvzonesResponse:
314
+ """List all available availability zones."""
315
+ return self._get(
316
+ "/clusters/availability-zones",
317
+ options=make_request_options(
318
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
319
+ ),
320
+ cast_to=EndpointListAvzonesResponse,
321
+ )
322
+
323
+
324
+ class AsyncEndpointsResource(AsyncAPIResource):
325
+ @cached_property
326
+ def with_raw_response(self) -> AsyncEndpointsResourceWithRawResponse:
327
+ """
328
+ This property can be used as a prefix for any HTTP method call to return
329
+ the raw response object instead of the parsed content.
330
+
331
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
332
+ """
333
+ return AsyncEndpointsResourceWithRawResponse(self)
358
334
 
359
- return [ListEndpoint(**endpoint) for endpoint in response.data]
335
+ @cached_property
336
+ def with_streaming_response(self) -> AsyncEndpointsResourceWithStreamingResponse:
337
+ """
338
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
339
+
340
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
341
+ """
342
+ return AsyncEndpointsResourceWithStreamingResponse(self)
360
343
 
361
344
  async def create(
362
345
  self,
363
346
  *,
364
- model: str,
347
+ autoscaling: AutoscalingParam,
365
348
  hardware: str,
366
- min_replicas: int,
367
- max_replicas: int,
368
- display_name: Optional[str] = None,
369
- disable_prompt_cache: bool = True,
370
- disable_speculative_decoding: bool = True,
371
- state: Literal["STARTED", "STOPPED"] = "STARTED",
372
- inactive_timeout: Optional[int] = None,
373
- availability_zone: Optional[str] = None,
349
+ model: str,
350
+ availability_zone: str | Omit = omit,
351
+ disable_prompt_cache: bool | Omit = omit,
352
+ disable_speculative_decoding: bool | Omit = omit,
353
+ display_name: str | Omit = omit,
354
+ inactive_timeout: Optional[int] | Omit = omit,
355
+ state: Literal["STARTED", "STOPPED"] | Omit = omit,
356
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
357
+ # The extra values given here take precedence over values defined on the client or passed to this method.
358
+ extra_headers: Headers | None = None,
359
+ extra_query: Query | None = None,
360
+ extra_body: Body | None = None,
361
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
374
362
  ) -> DedicatedEndpoint:
375
- """
376
- Create a new dedicated endpoint.
363
+ """Creates a new dedicated endpoint for serving models.
364
+
365
+ The endpoint will
366
+ automatically start after creation. You can deploy any supported model on
367
+ hardware configurations that meet the model's requirements.
377
368
 
378
369
  Args:
379
- model (str): The model to deploy on this endpoint
380
- hardware (str): The hardware configuration to use for this endpoint
381
- min_replicas (int): The minimum number of replicas to maintain
382
- max_replicas (int): The maximum number of replicas to scale up to
383
- display_name (str, optional): A human-readable name for the endpoint
384
- disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
385
- disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
386
- state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
387
- inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
388
-
389
- Returns:
390
- DedicatedEndpoint: Object containing endpoint information
391
- """
392
- requestor = api_requestor.APIRequestor(
393
- client=self._client,
394
- )
395
-
396
- data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
397
- "model": model,
398
- "hardware": hardware,
399
- "autoscaling": {
400
- "min_replicas": min_replicas,
401
- "max_replicas": max_replicas,
402
- },
403
- "disable_prompt_cache": disable_prompt_cache,
404
- "disable_speculative_decoding": disable_speculative_decoding,
405
- "state": state,
406
- }
407
-
408
- if display_name is not None:
409
- data["display_name"] = display_name
410
-
411
- if inactive_timeout is not None:
412
- data["inactive_timeout"] = inactive_timeout
413
-
414
- if availability_zone is not None:
415
- data["availability_zone"] = availability_zone
416
-
417
- response, _, _ = await requestor.arequest(
418
- options=TogetherRequest(
419
- method="POST",
420
- url="endpoints",
421
- params=data,
422
- ),
423
- stream=False,
424
- )
370
+ autoscaling: Configuration for automatic scaling of the endpoint
425
371
 
426
- assert isinstance(response, TogetherResponse)
372
+ hardware: The hardware configuration to use for this endpoint
427
373
 
428
- return DedicatedEndpoint(**response.data)
374
+ model: The model to deploy on this endpoint
429
375
 
430
- async def get(self, endpoint_id: str) -> DedicatedEndpoint:
431
- """
432
- Get details of a specific endpoint.
376
+ availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
433
377
 
434
- Args:
435
- endpoint_id (str): ID of the endpoint to retrieve
378
+ disable_prompt_cache: Whether to disable the prompt cache for this endpoint
436
379
 
437
- Returns:
438
- DedicatedEndpoint: Object containing endpoint information
439
- """
440
- requestor = api_requestor.APIRequestor(
441
- client=self._client,
442
- )
380
+ disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
443
381
 
444
- response, _, _ = await requestor.arequest(
445
- options=TogetherRequest(
446
- method="GET",
447
- url=f"endpoints/{endpoint_id}",
448
- ),
449
- stream=False,
450
- )
382
+ display_name: A human-readable name for the endpoint
451
383
 
452
- assert isinstance(response, TogetherResponse)
384
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
385
+ automatically stopped. Set to null, omit or set to 0 to disable automatic
386
+ timeout.
453
387
 
454
- return DedicatedEndpoint(**response.data)
388
+ state: The desired state of the endpoint
455
389
 
456
- async def delete(self, endpoint_id: str) -> None:
457
- """
458
- Delete a specific endpoint.
390
+ extra_headers: Send extra headers
459
391
 
460
- Args:
461
- endpoint_id (str): ID of the endpoint to delete
392
+ extra_query: Add additional query parameters to the request
393
+
394
+ extra_body: Add additional JSON properties to the request
395
+
396
+ timeout: Override the client-level default timeout for this request, in seconds
462
397
  """
463
- requestor = api_requestor.APIRequestor(
464
- client=self._client,
398
+ return await self._post(
399
+ "/endpoints",
400
+ body=await async_maybe_transform(
401
+ {
402
+ "autoscaling": autoscaling,
403
+ "hardware": hardware,
404
+ "model": model,
405
+ "availability_zone": availability_zone,
406
+ "disable_prompt_cache": disable_prompt_cache,
407
+ "disable_speculative_decoding": disable_speculative_decoding,
408
+ "display_name": display_name,
409
+ "inactive_timeout": inactive_timeout,
410
+ "state": state,
411
+ },
412
+ endpoint_create_params.EndpointCreateParams,
413
+ ),
414
+ options=make_request_options(
415
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
416
+ ),
417
+ cast_to=DedicatedEndpoint,
465
418
  )
466
419
 
467
- await requestor.arequest(
468
- options=TogetherRequest(
469
- method="DELETE",
470
- url=f"endpoints/{endpoint_id}",
420
+ async def retrieve(
421
+ self,
422
+ endpoint_id: str,
423
+ *,
424
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
425
+ # The extra values given here take precedence over values defined on the client or passed to this method.
426
+ extra_headers: Headers | None = None,
427
+ extra_query: Query | None = None,
428
+ extra_body: Body | None = None,
429
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
430
+ ) -> DedicatedEndpoint:
431
+ """
432
+ Retrieves details about a specific endpoint, including its current state,
433
+ configuration, and scaling settings.
434
+
435
+ Args:
436
+ extra_headers: Send extra headers
437
+
438
+ extra_query: Add additional query parameters to the request
439
+
440
+ extra_body: Add additional JSON properties to the request
441
+
442
+ timeout: Override the client-level default timeout for this request, in seconds
443
+ """
444
+ if not endpoint_id:
445
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
446
+ return await self._get(
447
+ f"/endpoints/{endpoint_id}",
448
+ options=make_request_options(
449
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
471
450
  ),
472
- stream=False,
451
+ cast_to=DedicatedEndpoint,
473
452
  )
474
453
 
475
454
  async def update(
476
455
  self,
477
456
  endpoint_id: str,
478
457
  *,
479
- min_replicas: Optional[int] = None,
480
- max_replicas: Optional[int] = None,
481
- state: Optional[Literal["STARTED", "STOPPED"]] = None,
482
- display_name: Optional[str] = None,
483
- inactive_timeout: Optional[int] = None,
458
+ autoscaling: AutoscalingParam | Omit = omit,
459
+ display_name: str | Omit = omit,
460
+ inactive_timeout: Optional[int] | Omit = omit,
461
+ state: Literal["STARTED", "STOPPED"] | Omit = omit,
462
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
463
+ # The extra values given here take precedence over values defined on the client or passed to this method.
464
+ extra_headers: Headers | None = None,
465
+ extra_query: Query | None = None,
466
+ extra_body: Body | None = None,
467
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
484
468
  ) -> DedicatedEndpoint:
485
- """
486
- Update an endpoint's configuration.
469
+ """Updates an existing endpoint's configuration.
470
+
471
+ You can modify the display name,
472
+ autoscaling settings, or change the endpoint's state (start/stop).
487
473
 
488
474
  Args:
489
- endpoint_id (str): ID of the endpoint to update
490
- min_replicas (int, optional): The minimum number of replicas to maintain
491
- max_replicas (int, optional): The maximum number of replicas to scale up to
492
- state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
493
- display_name (str, optional): A human-readable name for the endpoint
494
- inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
495
-
496
- Returns:
497
- DedicatedEndpoint: Object containing endpoint information
475
+ autoscaling: New autoscaling configuration for the endpoint
476
+
477
+ display_name: A human-readable name for the endpoint
478
+
479
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
480
+ automatically stopped. Set to 0 to disable automatic timeout.
481
+
482
+ state: The desired state of the endpoint
483
+
484
+ extra_headers: Send extra headers
485
+
486
+ extra_query: Add additional query parameters to the request
487
+
488
+ extra_body: Add additional JSON properties to the request
489
+
490
+ timeout: Override the client-level default timeout for this request, in seconds
498
491
  """
499
- requestor = api_requestor.APIRequestor(
500
- client=self._client,
501
- )
502
-
503
- data: Dict[str, Union[str, Dict[str, int], int]] = {}
504
-
505
- if min_replicas is not None or max_replicas is not None:
506
- current_min = min_replicas
507
- current_max = max_replicas
508
- if current_min is None or current_max is None:
509
- # Get current values if only one is specified
510
- current = await self.get(endpoint_id=endpoint_id)
511
- current_min = current_min or current.autoscaling.min_replicas
512
- current_max = current_max or current.autoscaling.max_replicas
513
- data["autoscaling"] = {
514
- "min_replicas": current_min,
515
- "max_replicas": current_max,
516
- }
517
-
518
- if state is not None:
519
- data["state"] = state
520
-
521
- if display_name is not None:
522
- data["display_name"] = display_name
523
-
524
- if inactive_timeout is not None:
525
- data["inactive_timeout"] = inactive_timeout
526
-
527
- response, _, _ = await requestor.arequest(
528
- options=TogetherRequest(
529
- method="PATCH",
530
- url=f"endpoints/{endpoint_id}",
531
- params=data,
492
+ if not endpoint_id:
493
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
494
+ return await self._patch(
495
+ f"/endpoints/{endpoint_id}",
496
+ body=await async_maybe_transform(
497
+ {
498
+ "autoscaling": autoscaling,
499
+ "display_name": display_name,
500
+ "inactive_timeout": inactive_timeout,
501
+ "state": state,
502
+ },
503
+ endpoint_update_params.EndpointUpdateParams,
504
+ ),
505
+ options=make_request_options(
506
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
532
507
  ),
533
- stream=False,
508
+ cast_to=DedicatedEndpoint,
534
509
  )
535
510
 
536
- assert isinstance(response, TogetherResponse)
511
+ async def list(
512
+ self,
513
+ *,
514
+ mine: bool | Omit = omit,
515
+ type: Literal["dedicated", "serverless"] | Omit = omit,
516
+ usage_type: Literal["on-demand", "reserved"] | Omit = omit,
517
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
518
+ # The extra values given here take precedence over values defined on the client or passed to this method.
519
+ extra_headers: Headers | None = None,
520
+ extra_query: Query | None = None,
521
+ extra_body: Body | None = None,
522
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
523
+ ) -> EndpointListResponse:
524
+ """Returns a list of all endpoints associated with your account.
525
+
526
+ You can filter the
527
+ results by type (dedicated or serverless).
528
+
529
+ Args:
530
+ mine: If true, return only endpoints owned by the caller
531
+
532
+ type: Filter endpoints by type
533
+
534
+ usage_type: Filter endpoints by usage type
535
+
536
+ extra_headers: Send extra headers
537
+
538
+ extra_query: Add additional query parameters to the request
537
539
 
538
- return DedicatedEndpoint(**response.data)
540
+ extra_body: Add additional JSON properties to the request
539
541
 
540
- async def list_hardware(
541
- self, model: Optional[str] = None
542
- ) -> List[HardwareWithStatus]:
542
+ timeout: Override the client-level default timeout for this request, in seconds
543
543
  """
544
- List available hardware configurations.
544
+ return await self._get(
545
+ "/endpoints",
546
+ options=make_request_options(
547
+ extra_headers=extra_headers,
548
+ extra_query=extra_query,
549
+ extra_body=extra_body,
550
+ timeout=timeout,
551
+ query=await async_maybe_transform(
552
+ {
553
+ "mine": mine,
554
+ "type": type,
555
+ "usage_type": usage_type,
556
+ },
557
+ endpoint_list_params.EndpointListParams,
558
+ ),
559
+ ),
560
+ cast_to=EndpointListResponse,
561
+ )
562
+
563
+ async def delete(
564
+ self,
565
+ endpoint_id: str,
566
+ *,
567
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
568
+ # The extra values given here take precedence over values defined on the client or passed to this method.
569
+ extra_headers: Headers | None = None,
570
+ extra_query: Query | None = None,
571
+ extra_body: Body | None = None,
572
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
573
+ ) -> None:
574
+ """Permanently deletes an endpoint.
575
+
576
+ This action cannot be undone.
545
577
 
546
578
  Args:
547
- model (str, optional): Filter hardware configurations by model compatibility. When provided,
548
- the response includes availability status for each compatible configuration.
579
+ extra_headers: Send extra headers
580
+
581
+ extra_query: Add additional query parameters to the request
582
+
583
+ extra_body: Add additional JSON properties to the request
549
584
 
550
- Returns:
551
- List[HardwareWithStatus]: List of hardware configurations with their status
585
+ timeout: Override the client-level default timeout for this request, in seconds
552
586
  """
553
- requestor = api_requestor.APIRequestor(
554
- client=self._client,
587
+ if not endpoint_id:
588
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
589
+ extra_headers = {"Accept": "*/*", **(extra_headers or {})}
590
+ return await self._delete(
591
+ f"/endpoints/{endpoint_id}",
592
+ options=make_request_options(
593
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
594
+ ),
595
+ cast_to=NoneType,
555
596
  )
556
597
 
557
- params = {}
558
- if model is not None:
559
- params["model"] = model
560
-
561
- response, _, _ = await requestor.arequest(
562
- options=TogetherRequest(
563
- method="GET",
564
- url="hardware",
565
- params=params,
598
+ async def list_avzones(
599
+ self,
600
+ *,
601
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
602
+ # The extra values given here take precedence over values defined on the client or passed to this method.
603
+ extra_headers: Headers | None = None,
604
+ extra_query: Query | None = None,
605
+ extra_body: Body | None = None,
606
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
607
+ ) -> EndpointListAvzonesResponse:
608
+ """List all available availability zones."""
609
+ return await self._get(
610
+ "/clusters/availability-zones",
611
+ options=make_request_options(
612
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
566
613
  ),
567
- stream=False,
614
+ cast_to=EndpointListAvzonesResponse,
568
615
  )
569
616
 
570
- assert isinstance(response, TogetherResponse)
571
- assert isinstance(response.data, dict)
572
- assert isinstance(response.data["data"], list)
573
617
 
574
- return [HardwareWithStatus(**item) for item in response.data["data"]]
618
+ class EndpointsResourceWithRawResponse:
619
+ def __init__(self, endpoints: EndpointsResource) -> None:
620
+ self._endpoints = endpoints
575
621
 
576
- async def list_avzones(self) -> List[str]:
577
- """
578
- List all availability zones.
622
+ self.create = to_raw_response_wrapper(
623
+ endpoints.create,
624
+ )
625
+ self.retrieve = to_raw_response_wrapper(
626
+ endpoints.retrieve,
627
+ )
628
+ self.update = to_raw_response_wrapper(
629
+ endpoints.update,
630
+ )
631
+ self.list = to_raw_response_wrapper(
632
+ endpoints.list,
633
+ )
634
+ self.delete = to_raw_response_wrapper(
635
+ endpoints.delete,
636
+ )
637
+ self.list_avzones = to_raw_response_wrapper(
638
+ endpoints.list_avzones,
639
+ )
579
640
 
580
- Returns:
581
- List[str]: List of unique availability zones
582
- """
583
- requestor = api_requestor.APIRequestor(
584
- client=self._client,
641
+
642
+ class AsyncEndpointsResourceWithRawResponse:
643
+ def __init__(self, endpoints: AsyncEndpointsResource) -> None:
644
+ self._endpoints = endpoints
645
+
646
+ self.create = async_to_raw_response_wrapper(
647
+ endpoints.create,
648
+ )
649
+ self.retrieve = async_to_raw_response_wrapper(
650
+ endpoints.retrieve,
651
+ )
652
+ self.update = async_to_raw_response_wrapper(
653
+ endpoints.update,
654
+ )
655
+ self.list = async_to_raw_response_wrapper(
656
+ endpoints.list,
657
+ )
658
+ self.delete = async_to_raw_response_wrapper(
659
+ endpoints.delete,
660
+ )
661
+ self.list_avzones = async_to_raw_response_wrapper(
662
+ endpoints.list_avzones,
585
663
  )
586
664
 
587
- response, _, _ = await requestor.arequest(
588
- options=TogetherRequest(
589
- method="GET",
590
- url="clusters/availability-zones",
591
- ),
592
- stream=False,
665
+
666
+ class EndpointsResourceWithStreamingResponse:
667
+ def __init__(self, endpoints: EndpointsResource) -> None:
668
+ self._endpoints = endpoints
669
+
670
+ self.create = to_streamed_response_wrapper(
671
+ endpoints.create,
672
+ )
673
+ self.retrieve = to_streamed_response_wrapper(
674
+ endpoints.retrieve,
675
+ )
676
+ self.update = to_streamed_response_wrapper(
677
+ endpoints.update,
678
+ )
679
+ self.list = to_streamed_response_wrapper(
680
+ endpoints.list,
681
+ )
682
+ self.delete = to_streamed_response_wrapper(
683
+ endpoints.delete,
684
+ )
685
+ self.list_avzones = to_streamed_response_wrapper(
686
+ endpoints.list_avzones,
593
687
  )
594
688
 
595
- assert isinstance(response, TogetherResponse)
596
- assert isinstance(response.data, dict)
597
- assert isinstance(response.data["avzones"], list)
598
689
 
599
- return response.data["avzones"]
690
+ class AsyncEndpointsResourceWithStreamingResponse:
691
+ def __init__(self, endpoints: AsyncEndpointsResource) -> None:
692
+ self._endpoints = endpoints
693
+
694
+ self.create = async_to_streamed_response_wrapper(
695
+ endpoints.create,
696
+ )
697
+ self.retrieve = async_to_streamed_response_wrapper(
698
+ endpoints.retrieve,
699
+ )
700
+ self.update = async_to_streamed_response_wrapper(
701
+ endpoints.update,
702
+ )
703
+ self.list = async_to_streamed_response_wrapper(
704
+ endpoints.list,
705
+ )
706
+ self.delete = async_to_streamed_response_wrapper(
707
+ endpoints.delete,
708
+ )
709
+ self.list_avzones = async_to_streamed_response_wrapper(
710
+ endpoints.list_avzones,
711
+ )