together 1.5.35__py3-none-any.whl → 2.0.0a6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/__init__.py +101 -114
- together/_base_client.py +1995 -0
- together/_client.py +1033 -0
- together/_compat.py +219 -0
- together/_constants.py +14 -0
- together/_exceptions.py +108 -0
- together/_files.py +123 -0
- together/_models.py +857 -0
- together/_qs.py +150 -0
- together/_resource.py +43 -0
- together/_response.py +830 -0
- together/_streaming.py +370 -0
- together/_types.py +260 -0
- together/_utils/__init__.py +64 -0
- together/_utils/_compat.py +45 -0
- together/_utils/_datetime_parse.py +136 -0
- together/_utils/_logs.py +25 -0
- together/_utils/_proxy.py +65 -0
- together/_utils/_reflection.py +42 -0
- together/_utils/_resources_proxy.py +24 -0
- together/_utils/_streams.py +12 -0
- together/_utils/_sync.py +58 -0
- together/_utils/_transform.py +457 -0
- together/_utils/_typing.py +156 -0
- together/_utils/_utils.py +421 -0
- together/_version.py +4 -0
- together/lib/.keep +4 -0
- together/lib/__init__.py +23 -0
- together/{cli → lib/cli}/api/endpoints.py +66 -84
- together/{cli/api/evaluation.py → lib/cli/api/evals.py} +152 -43
- together/{cli → lib/cli}/api/files.py +20 -17
- together/{cli/api/finetune.py → lib/cli/api/fine_tuning.py} +116 -172
- together/{cli → lib/cli}/api/models.py +34 -27
- together/lib/cli/api/utils.py +50 -0
- together/{cli → lib/cli}/cli.py +16 -26
- together/{constants.py → lib/constants.py} +11 -24
- together/lib/resources/__init__.py +11 -0
- together/lib/resources/files.py +999 -0
- together/lib/resources/fine_tuning.py +280 -0
- together/lib/resources/models.py +35 -0
- together/lib/types/__init__.py +13 -0
- together/lib/types/error.py +9 -0
- together/lib/types/fine_tuning.py +397 -0
- together/{utils → lib/utils}/__init__.py +6 -14
- together/{utils → lib/utils}/_log.py +11 -16
- together/{utils → lib/utils}/files.py +90 -288
- together/lib/utils/serializer.py +10 -0
- together/{utils → lib/utils}/tools.py +19 -55
- together/resources/__init__.py +225 -39
- together/resources/audio/__init__.py +72 -48
- together/resources/audio/audio.py +198 -0
- together/resources/audio/speech.py +574 -128
- together/resources/audio/transcriptions.py +247 -261
- together/resources/audio/translations.py +221 -241
- together/resources/audio/voices.py +111 -41
- together/resources/batches.py +417 -0
- together/resources/chat/__init__.py +30 -21
- together/resources/chat/chat.py +102 -0
- together/resources/chat/completions.py +1063 -263
- together/resources/code_interpreter/__init__.py +33 -0
- together/resources/code_interpreter/code_interpreter.py +258 -0
- together/resources/code_interpreter/sessions.py +135 -0
- together/resources/completions.py +884 -225
- together/resources/embeddings.py +172 -68
- together/resources/endpoints.py +589 -490
- together/resources/evals.py +452 -0
- together/resources/files.py +397 -129
- together/resources/fine_tuning.py +1033 -0
- together/resources/hardware.py +181 -0
- together/resources/images.py +258 -104
- together/resources/jobs.py +214 -0
- together/resources/models.py +223 -193
- together/resources/rerank.py +190 -92
- together/resources/videos.py +286 -214
- together/types/__init__.py +66 -167
- together/types/audio/__init__.py +10 -0
- together/types/audio/speech_create_params.py +75 -0
- together/types/audio/transcription_create_params.py +54 -0
- together/types/audio/transcription_create_response.py +111 -0
- together/types/audio/translation_create_params.py +40 -0
- together/types/audio/translation_create_response.py +70 -0
- together/types/audio/voice_list_response.py +23 -0
- together/types/audio_speech_stream_chunk.py +16 -0
- together/types/autoscaling.py +13 -0
- together/types/autoscaling_param.py +15 -0
- together/types/batch_create_params.py +24 -0
- together/types/batch_create_response.py +14 -0
- together/types/batch_job.py +45 -0
- together/types/batch_list_response.py +10 -0
- together/types/chat/__init__.py +18 -0
- together/types/chat/chat_completion.py +60 -0
- together/types/chat/chat_completion_chunk.py +61 -0
- together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
- together/types/chat/chat_completion_structured_message_text_param.py +13 -0
- together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
- together/types/chat/chat_completion_usage.py +13 -0
- together/types/chat/chat_completion_warning.py +9 -0
- together/types/chat/completion_create_params.py +329 -0
- together/types/code_interpreter/__init__.py +5 -0
- together/types/code_interpreter/session_list_response.py +31 -0
- together/types/code_interpreter_execute_params.py +45 -0
- together/types/completion.py +42 -0
- together/types/completion_chunk.py +66 -0
- together/types/completion_create_params.py +138 -0
- together/types/dedicated_endpoint.py +44 -0
- together/types/embedding.py +24 -0
- together/types/embedding_create_params.py +31 -0
- together/types/endpoint_create_params.py +43 -0
- together/types/endpoint_list_avzones_response.py +11 -0
- together/types/endpoint_list_params.py +18 -0
- together/types/endpoint_list_response.py +41 -0
- together/types/endpoint_update_params.py +27 -0
- together/types/eval_create_params.py +263 -0
- together/types/eval_create_response.py +16 -0
- together/types/eval_list_params.py +21 -0
- together/types/eval_list_response.py +10 -0
- together/types/eval_status_response.py +100 -0
- together/types/evaluation_job.py +139 -0
- together/types/execute_response.py +108 -0
- together/types/file_delete_response.py +13 -0
- together/types/file_list.py +12 -0
- together/types/file_purpose.py +9 -0
- together/types/file_response.py +31 -0
- together/types/file_type.py +7 -0
- together/types/fine_tuning_cancel_response.py +194 -0
- together/types/fine_tuning_content_params.py +24 -0
- together/types/fine_tuning_delete_params.py +11 -0
- together/types/fine_tuning_delete_response.py +12 -0
- together/types/fine_tuning_list_checkpoints_response.py +21 -0
- together/types/fine_tuning_list_events_response.py +12 -0
- together/types/fine_tuning_list_response.py +199 -0
- together/types/finetune_event.py +41 -0
- together/types/finetune_event_type.py +33 -0
- together/types/finetune_response.py +177 -0
- together/types/hardware_list_params.py +16 -0
- together/types/hardware_list_response.py +58 -0
- together/types/image_data_b64.py +15 -0
- together/types/image_data_url.py +15 -0
- together/types/image_file.py +23 -0
- together/types/image_generate_params.py +85 -0
- together/types/job_list_response.py +47 -0
- together/types/job_retrieve_response.py +43 -0
- together/types/log_probs.py +18 -0
- together/types/model_list_response.py +10 -0
- together/types/model_object.py +42 -0
- together/types/model_upload_params.py +36 -0
- together/types/model_upload_response.py +23 -0
- together/types/rerank_create_params.py +36 -0
- together/types/rerank_create_response.py +36 -0
- together/types/tool_choice.py +23 -0
- together/types/tool_choice_param.py +23 -0
- together/types/tools_param.py +23 -0
- together/types/training_method_dpo.py +22 -0
- together/types/training_method_sft.py +18 -0
- together/types/video_create_params.py +86 -0
- together/types/video_create_response.py +10 -0
- together/types/video_job.py +57 -0
- together-2.0.0a6.dist-info/METADATA +729 -0
- together-2.0.0a6.dist-info/RECORD +165 -0
- {together-1.5.35.dist-info → together-2.0.0a6.dist-info}/WHEEL +1 -1
- together-2.0.0a6.dist-info/entry_points.txt +2 -0
- {together-1.5.35.dist-info → together-2.0.0a6.dist-info}/licenses/LICENSE +1 -1
- together/abstract/api_requestor.py +0 -770
- together/cli/api/chat.py +0 -298
- together/cli/api/completions.py +0 -119
- together/cli/api/images.py +0 -93
- together/cli/api/utils.py +0 -139
- together/client.py +0 -186
- together/error.py +0 -194
- together/filemanager.py +0 -635
- together/legacy/__init__.py +0 -0
- together/legacy/base.py +0 -27
- together/legacy/complete.py +0 -93
- together/legacy/embeddings.py +0 -27
- together/legacy/files.py +0 -146
- together/legacy/finetune.py +0 -177
- together/legacy/images.py +0 -27
- together/legacy/models.py +0 -44
- together/resources/batch.py +0 -165
- together/resources/code_interpreter.py +0 -82
- together/resources/evaluation.py +0 -808
- together/resources/finetune.py +0 -1388
- together/together_response.py +0 -50
- together/types/abstract.py +0 -26
- together/types/audio_speech.py +0 -311
- together/types/batch.py +0 -54
- together/types/chat_completions.py +0 -210
- together/types/code_interpreter.py +0 -57
- together/types/common.py +0 -67
- together/types/completions.py +0 -107
- together/types/embeddings.py +0 -35
- together/types/endpoints.py +0 -123
- together/types/error.py +0 -16
- together/types/evaluation.py +0 -93
- together/types/files.py +0 -93
- together/types/finetune.py +0 -465
- together/types/images.py +0 -42
- together/types/models.py +0 -96
- together/types/rerank.py +0 -43
- together/types/videos.py +0 -69
- together/utils/api_helpers.py +0 -124
- together/version.py +0 -6
- together-1.5.35.dist-info/METADATA +0 -583
- together-1.5.35.dist-info/RECORD +0 -77
- together-1.5.35.dist-info/entry_points.txt +0 -3
- /together/{abstract → lib/cli}/__init__.py +0 -0
- /together/{cli → lib/cli/api}/__init__.py +0 -0
- /together/{cli/api/__init__.py → py.typed} +0 -0
together/resources/endpoints.py
CHANGED
|
@@ -1,612 +1,711 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
import warnings
|
|
4
|
-
from typing import Dict, List, Literal, Optional, Union
|
|
5
|
-
|
|
6
|
-
from together.abstract import api_requestor
|
|
7
|
-
from together.together_response import TogetherResponse
|
|
8
|
-
from together.types import TogetherClient, TogetherRequest
|
|
9
|
-
from together.types.endpoints import DedicatedEndpoint, HardwareWithStatus, ListEndpoint
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
10
2
|
|
|
3
|
+
from __future__ import annotations
|
|
11
4
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from typing_extensions import Literal
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from ..types import endpoint_list_params, endpoint_create_params, endpoint_update_params
|
|
11
|
+
from .._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
|
|
12
|
+
from .._utils import maybe_transform, async_maybe_transform
|
|
13
|
+
from .._compat import cached_property
|
|
14
|
+
from .._resource import SyncAPIResource, AsyncAPIResource
|
|
15
|
+
from .._response import (
|
|
16
|
+
to_raw_response_wrapper,
|
|
17
|
+
to_streamed_response_wrapper,
|
|
18
|
+
async_to_raw_response_wrapper,
|
|
19
|
+
async_to_streamed_response_wrapper,
|
|
20
|
+
)
|
|
21
|
+
from .._base_client import make_request_options
|
|
22
|
+
from ..types.autoscaling_param import AutoscalingParam
|
|
23
|
+
from ..types.dedicated_endpoint import DedicatedEndpoint
|
|
24
|
+
from ..types.endpoint_list_response import EndpointListResponse
|
|
25
|
+
from ..types.endpoint_list_avzones_response import EndpointListAvzonesResponse
|
|
26
|
+
|
|
27
|
+
__all__ = ["EndpointsResource", "AsyncEndpointsResource"]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class EndpointsResource(SyncAPIResource):
|
|
31
|
+
@cached_property
|
|
32
|
+
def with_raw_response(self) -> EndpointsResourceWithRawResponse:
|
|
22
33
|
"""
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
Args:
|
|
26
|
-
type (str, optional): Filter endpoints by endpoint type ("dedicated" or "serverless"). Defaults to None.
|
|
27
|
-
usage_type (str, optional): Filter endpoints by usage type ("on-demand" or "reserved"). Defaults to None.
|
|
28
|
-
mine (bool, optional): If True, return only endpoints owned by the caller. Defaults to None.
|
|
34
|
+
This property can be used as a prefix for any HTTP method call to return
|
|
35
|
+
the raw response object instead of the parsed content.
|
|
29
36
|
|
|
30
|
-
|
|
31
|
-
List[ListEndpoint]: List of endpoint objects
|
|
37
|
+
For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
|
|
32
38
|
"""
|
|
33
|
-
|
|
34
|
-
client=self._client,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
params: Dict[
|
|
38
|
-
str,
|
|
39
|
-
Union[
|
|
40
|
-
Literal["dedicated", "serverless"],
|
|
41
|
-
Literal["on-demand", "reserved"],
|
|
42
|
-
bool,
|
|
43
|
-
],
|
|
44
|
-
] = {}
|
|
45
|
-
if type is not None:
|
|
46
|
-
params["type"] = type
|
|
47
|
-
if usage_type is not None:
|
|
48
|
-
params["usage_type"] = usage_type
|
|
49
|
-
if mine is not None:
|
|
50
|
-
params["mine"] = mine
|
|
51
|
-
|
|
52
|
-
response, _, _ = requestor.request(
|
|
53
|
-
options=TogetherRequest(
|
|
54
|
-
method="GET",
|
|
55
|
-
url="endpoints",
|
|
56
|
-
params=params,
|
|
57
|
-
),
|
|
58
|
-
stream=False,
|
|
59
|
-
)
|
|
39
|
+
return EndpointsResourceWithRawResponse(self)
|
|
60
40
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
41
|
+
@cached_property
|
|
42
|
+
def with_streaming_response(self) -> EndpointsResourceWithStreamingResponse:
|
|
43
|
+
"""
|
|
44
|
+
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
65
45
|
|
|
66
|
-
|
|
46
|
+
For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
|
|
47
|
+
"""
|
|
48
|
+
return EndpointsResourceWithStreamingResponse(self)
|
|
67
49
|
|
|
68
50
|
def create(
|
|
69
51
|
self,
|
|
70
52
|
*,
|
|
71
|
-
|
|
53
|
+
autoscaling: AutoscalingParam,
|
|
72
54
|
hardware: str,
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
55
|
+
model: str,
|
|
56
|
+
availability_zone: str | Omit = omit,
|
|
57
|
+
disable_prompt_cache: bool | Omit = omit,
|
|
58
|
+
disable_speculative_decoding: bool | Omit = omit,
|
|
59
|
+
display_name: str | Omit = omit,
|
|
60
|
+
inactive_timeout: Optional[int] | Omit = omit,
|
|
61
|
+
state: Literal["STARTED", "STOPPED"] | Omit = omit,
|
|
62
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
63
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
64
|
+
extra_headers: Headers | None = None,
|
|
65
|
+
extra_query: Query | None = None,
|
|
66
|
+
extra_body: Body | None = None,
|
|
67
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
81
68
|
) -> DedicatedEndpoint:
|
|
82
|
-
"""
|
|
83
|
-
|
|
69
|
+
"""Creates a new dedicated endpoint for serving models.
|
|
70
|
+
|
|
71
|
+
The endpoint will
|
|
72
|
+
automatically start after creation. You can deploy any supported model on
|
|
73
|
+
hardware configurations that meet the model's requirements.
|
|
84
74
|
|
|
85
75
|
Args:
|
|
86
|
-
|
|
87
|
-
hardware (str): The hardware configuration to use for this endpoint
|
|
88
|
-
min_replicas (int): The minimum number of replicas to maintain
|
|
89
|
-
max_replicas (int): The maximum number of replicas to scale up to
|
|
90
|
-
display_name (str, optional): A human-readable name for the endpoint
|
|
91
|
-
disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
|
|
92
|
-
disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
|
|
93
|
-
state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
|
|
94
|
-
inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
|
|
95
|
-
availability_zone (str, optional): Start endpoint in specified availability zone (e.g., us-central-4b).
|
|
96
|
-
|
|
97
|
-
Returns:
|
|
98
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
99
|
-
"""
|
|
100
|
-
if disable_prompt_cache:
|
|
101
|
-
warnings.warn(
|
|
102
|
-
"The 'disable_prompt_cache' parameter (CLI flag: '--no-prompt-cache') is deprecated and will be removed in a future version.",
|
|
103
|
-
stacklevel=2,
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
requestor = api_requestor.APIRequestor(
|
|
107
|
-
client=self._client,
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
|
|
111
|
-
"model": model,
|
|
112
|
-
"hardware": hardware,
|
|
113
|
-
"autoscaling": {
|
|
114
|
-
"min_replicas": min_replicas,
|
|
115
|
-
"max_replicas": max_replicas,
|
|
116
|
-
},
|
|
117
|
-
"disable_prompt_cache": disable_prompt_cache,
|
|
118
|
-
"disable_speculative_decoding": disable_speculative_decoding,
|
|
119
|
-
"state": state,
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
if display_name is not None:
|
|
123
|
-
data["display_name"] = display_name
|
|
124
|
-
|
|
125
|
-
if inactive_timeout is not None:
|
|
126
|
-
data["inactive_timeout"] = inactive_timeout
|
|
127
|
-
|
|
128
|
-
if availability_zone is not None:
|
|
129
|
-
data["availability_zone"] = availability_zone
|
|
130
|
-
|
|
131
|
-
response, _, _ = requestor.request(
|
|
132
|
-
options=TogetherRequest(
|
|
133
|
-
method="POST",
|
|
134
|
-
url="endpoints",
|
|
135
|
-
params=data,
|
|
136
|
-
),
|
|
137
|
-
stream=False,
|
|
138
|
-
)
|
|
76
|
+
autoscaling: Configuration for automatic scaling of the endpoint
|
|
139
77
|
|
|
140
|
-
|
|
78
|
+
hardware: The hardware configuration to use for this endpoint
|
|
141
79
|
|
|
142
|
-
|
|
80
|
+
model: The model to deploy on this endpoint
|
|
143
81
|
|
|
144
|
-
|
|
145
|
-
"""
|
|
146
|
-
Get details of a specific endpoint.
|
|
82
|
+
availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
|
|
147
83
|
|
|
148
|
-
|
|
149
|
-
endpoint_id (str): ID of the endpoint to retrieve
|
|
84
|
+
disable_prompt_cache: Whether to disable the prompt cache for this endpoint
|
|
150
85
|
|
|
151
|
-
|
|
152
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
153
|
-
"""
|
|
154
|
-
requestor = api_requestor.APIRequestor(
|
|
155
|
-
client=self._client,
|
|
156
|
-
)
|
|
86
|
+
disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
|
|
157
87
|
|
|
158
|
-
|
|
159
|
-
options=TogetherRequest(
|
|
160
|
-
method="GET",
|
|
161
|
-
url=f"endpoints/{endpoint_id}",
|
|
162
|
-
),
|
|
163
|
-
stream=False,
|
|
164
|
-
)
|
|
88
|
+
display_name: A human-readable name for the endpoint
|
|
165
89
|
|
|
166
|
-
|
|
90
|
+
inactive_timeout: The number of minutes of inactivity after which the endpoint will be
|
|
91
|
+
automatically stopped. Set to null, omit or set to 0 to disable automatic
|
|
92
|
+
timeout.
|
|
167
93
|
|
|
168
|
-
|
|
94
|
+
state: The desired state of the endpoint
|
|
169
95
|
|
|
170
|
-
|
|
171
|
-
"""
|
|
172
|
-
Delete a specific endpoint.
|
|
96
|
+
extra_headers: Send extra headers
|
|
173
97
|
|
|
174
|
-
|
|
175
|
-
endpoint_id (str): ID of the endpoint to delete
|
|
176
|
-
"""
|
|
177
|
-
requestor = api_requestor.APIRequestor(
|
|
178
|
-
client=self._client,
|
|
179
|
-
)
|
|
98
|
+
extra_query: Add additional query parameters to the request
|
|
180
99
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
100
|
+
extra_body: Add additional JSON properties to the request
|
|
101
|
+
|
|
102
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
103
|
+
"""
|
|
104
|
+
return self._post(
|
|
105
|
+
"/endpoints",
|
|
106
|
+
body=maybe_transform(
|
|
107
|
+
{
|
|
108
|
+
"autoscaling": autoscaling,
|
|
109
|
+
"hardware": hardware,
|
|
110
|
+
"model": model,
|
|
111
|
+
"availability_zone": availability_zone,
|
|
112
|
+
"disable_prompt_cache": disable_prompt_cache,
|
|
113
|
+
"disable_speculative_decoding": disable_speculative_decoding,
|
|
114
|
+
"display_name": display_name,
|
|
115
|
+
"inactive_timeout": inactive_timeout,
|
|
116
|
+
"state": state,
|
|
117
|
+
},
|
|
118
|
+
endpoint_create_params.EndpointCreateParams,
|
|
185
119
|
),
|
|
186
|
-
|
|
120
|
+
options=make_request_options(
|
|
121
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
122
|
+
),
|
|
123
|
+
cast_to=DedicatedEndpoint,
|
|
187
124
|
)
|
|
188
125
|
|
|
189
|
-
def
|
|
126
|
+
def retrieve(
|
|
190
127
|
self,
|
|
191
128
|
endpoint_id: str,
|
|
192
129
|
*,
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
130
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
131
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
132
|
+
extra_headers: Headers | None = None,
|
|
133
|
+
extra_query: Query | None = None,
|
|
134
|
+
extra_body: Body | None = None,
|
|
135
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
198
136
|
) -> DedicatedEndpoint:
|
|
199
137
|
"""
|
|
200
|
-
|
|
138
|
+
Retrieves details about a specific endpoint, including its current state,
|
|
139
|
+
configuration, and scaling settings.
|
|
201
140
|
|
|
202
141
|
Args:
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
Returns:
|
|
211
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
142
|
+
extra_headers: Send extra headers
|
|
143
|
+
|
|
144
|
+
extra_query: Add additional query parameters to the request
|
|
145
|
+
|
|
146
|
+
extra_body: Add additional JSON properties to the request
|
|
147
|
+
|
|
148
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
212
149
|
"""
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
if min_replicas is not None or max_replicas is not None:
|
|
220
|
-
current_min = min_replicas
|
|
221
|
-
current_max = max_replicas
|
|
222
|
-
if current_min is None or current_max is None:
|
|
223
|
-
# Get current values if only one is specified
|
|
224
|
-
current = self.get(endpoint_id=endpoint_id)
|
|
225
|
-
current_min = current_min or current.autoscaling.min_replicas
|
|
226
|
-
current_max = current_max or current.autoscaling.max_replicas
|
|
227
|
-
data["autoscaling"] = {
|
|
228
|
-
"min_replicas": current_min,
|
|
229
|
-
"max_replicas": current_max,
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
if state is not None:
|
|
233
|
-
data["state"] = state
|
|
234
|
-
|
|
235
|
-
if display_name is not None:
|
|
236
|
-
data["display_name"] = display_name
|
|
237
|
-
|
|
238
|
-
if inactive_timeout is not None:
|
|
239
|
-
data["inactive_timeout"] = inactive_timeout
|
|
240
|
-
|
|
241
|
-
response, _, _ = requestor.request(
|
|
242
|
-
options=TogetherRequest(
|
|
243
|
-
method="PATCH",
|
|
244
|
-
url=f"endpoints/{endpoint_id}",
|
|
245
|
-
params=data,
|
|
150
|
+
if not endpoint_id:
|
|
151
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
152
|
+
return self._get(
|
|
153
|
+
f"/endpoints/{endpoint_id}",
|
|
154
|
+
options=make_request_options(
|
|
155
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
246
156
|
),
|
|
247
|
-
|
|
157
|
+
cast_to=DedicatedEndpoint,
|
|
248
158
|
)
|
|
249
159
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
160
|
+
def update(
|
|
161
|
+
self,
|
|
162
|
+
endpoint_id: str,
|
|
163
|
+
*,
|
|
164
|
+
autoscaling: AutoscalingParam | Omit = omit,
|
|
165
|
+
display_name: str | Omit = omit,
|
|
166
|
+
inactive_timeout: Optional[int] | Omit = omit,
|
|
167
|
+
state: Literal["STARTED", "STOPPED"] | Omit = omit,
|
|
168
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
169
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
170
|
+
extra_headers: Headers | None = None,
|
|
171
|
+
extra_query: Query | None = None,
|
|
172
|
+
extra_body: Body | None = None,
|
|
173
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
174
|
+
) -> DedicatedEndpoint:
|
|
175
|
+
"""Updates an existing endpoint's configuration.
|
|
253
176
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
List available hardware configurations.
|
|
177
|
+
You can modify the display name,
|
|
178
|
+
autoscaling settings, or change the endpoint's state (start/stop).
|
|
257
179
|
|
|
258
180
|
Args:
|
|
259
|
-
|
|
260
|
-
the response includes availability status for each compatible configuration.
|
|
181
|
+
autoscaling: New autoscaling configuration for the endpoint
|
|
261
182
|
|
|
262
|
-
|
|
263
|
-
List[HardwareWithStatus]: List of hardware configurations with their status
|
|
264
|
-
"""
|
|
265
|
-
requestor = api_requestor.APIRequestor(
|
|
266
|
-
client=self._client,
|
|
267
|
-
)
|
|
183
|
+
display_name: A human-readable name for the endpoint
|
|
268
184
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
params["model"] = model
|
|
185
|
+
inactive_timeout: The number of minutes of inactivity after which the endpoint will be
|
|
186
|
+
automatically stopped. Set to 0 to disable automatic timeout.
|
|
272
187
|
|
|
273
|
-
|
|
274
|
-
options=TogetherRequest(
|
|
275
|
-
method="GET",
|
|
276
|
-
url="hardware",
|
|
277
|
-
params=params,
|
|
278
|
-
),
|
|
279
|
-
stream=False,
|
|
280
|
-
)
|
|
188
|
+
state: The desired state of the endpoint
|
|
281
189
|
|
|
282
|
-
|
|
283
|
-
assert isinstance(response.data, dict)
|
|
284
|
-
assert isinstance(response.data["data"], list)
|
|
190
|
+
extra_headers: Send extra headers
|
|
285
191
|
|
|
286
|
-
|
|
192
|
+
extra_query: Add additional query parameters to the request
|
|
287
193
|
|
|
288
|
-
|
|
289
|
-
"""
|
|
290
|
-
List all available availability zones.
|
|
194
|
+
extra_body: Add additional JSON properties to the request
|
|
291
195
|
|
|
292
|
-
|
|
293
|
-
List[str]: List of unique availability zones
|
|
196
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
294
197
|
"""
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
198
|
+
if not endpoint_id:
|
|
199
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
200
|
+
return self._patch(
|
|
201
|
+
f"/endpoints/{endpoint_id}",
|
|
202
|
+
body=maybe_transform(
|
|
203
|
+
{
|
|
204
|
+
"autoscaling": autoscaling,
|
|
205
|
+
"display_name": display_name,
|
|
206
|
+
"inactive_timeout": inactive_timeout,
|
|
207
|
+
"state": state,
|
|
208
|
+
},
|
|
209
|
+
endpoint_update_params.EndpointUpdateParams,
|
|
210
|
+
),
|
|
211
|
+
options=make_request_options(
|
|
212
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
303
213
|
),
|
|
304
|
-
|
|
214
|
+
cast_to=DedicatedEndpoint,
|
|
305
215
|
)
|
|
306
216
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
217
|
+
def list(
|
|
218
|
+
self,
|
|
219
|
+
*,
|
|
220
|
+
mine: bool | Omit = omit,
|
|
221
|
+
type: Literal["dedicated", "serverless"] | Omit = omit,
|
|
222
|
+
usage_type: Literal["on-demand", "reserved"] | Omit = omit,
|
|
223
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
224
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
225
|
+
extra_headers: Headers | None = None,
|
|
226
|
+
extra_query: Query | None = None,
|
|
227
|
+
extra_body: Body | None = None,
|
|
228
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
229
|
+
) -> EndpointListResponse:
|
|
230
|
+
"""Returns a list of all endpoints associated with your account.
|
|
231
|
+
|
|
232
|
+
You can filter the
|
|
233
|
+
results by type (dedicated or serverless).
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
mine: If true, return only endpoints owned by the caller
|
|
237
|
+
|
|
238
|
+
type: Filter endpoints by type
|
|
310
239
|
|
|
311
|
-
|
|
240
|
+
usage_type: Filter endpoints by usage type
|
|
312
241
|
|
|
242
|
+
extra_headers: Send extra headers
|
|
313
243
|
|
|
314
|
-
|
|
315
|
-
def __init__(self, client: TogetherClient) -> None:
|
|
316
|
-
self._client = client
|
|
244
|
+
extra_query: Add additional query parameters to the request
|
|
317
245
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
usage_type: Optional[Literal["on-demand", "reserved"]] = None,
|
|
322
|
-
mine: Optional[bool] = None,
|
|
323
|
-
) -> List[ListEndpoint]:
|
|
246
|
+
extra_body: Add additional JSON properties to the request
|
|
247
|
+
|
|
248
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
324
249
|
"""
|
|
325
|
-
|
|
250
|
+
return self._get(
|
|
251
|
+
"/endpoints",
|
|
252
|
+
options=make_request_options(
|
|
253
|
+
extra_headers=extra_headers,
|
|
254
|
+
extra_query=extra_query,
|
|
255
|
+
extra_body=extra_body,
|
|
256
|
+
timeout=timeout,
|
|
257
|
+
query=maybe_transform(
|
|
258
|
+
{
|
|
259
|
+
"mine": mine,
|
|
260
|
+
"type": type,
|
|
261
|
+
"usage_type": usage_type,
|
|
262
|
+
},
|
|
263
|
+
endpoint_list_params.EndpointListParams,
|
|
264
|
+
),
|
|
265
|
+
),
|
|
266
|
+
cast_to=EndpointListResponse,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def delete(
|
|
270
|
+
self,
|
|
271
|
+
endpoint_id: str,
|
|
272
|
+
*,
|
|
273
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
274
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
275
|
+
extra_headers: Headers | None = None,
|
|
276
|
+
extra_query: Query | None = None,
|
|
277
|
+
extra_body: Body | None = None,
|
|
278
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
279
|
+
) -> None:
|
|
280
|
+
"""Permanently deletes an endpoint.
|
|
281
|
+
|
|
282
|
+
This action cannot be undone.
|
|
326
283
|
|
|
327
284
|
Args:
|
|
328
|
-
|
|
329
|
-
usage_type (str, optional): Filter endpoints by usage type ("on-demand" or "reserved"). Defaults to None.
|
|
330
|
-
mine (bool, optional): If True, return only endpoints owned by the caller. Defaults to None.
|
|
285
|
+
extra_headers: Send extra headers
|
|
331
286
|
|
|
332
|
-
|
|
333
|
-
|
|
287
|
+
extra_query: Add additional query parameters to the request
|
|
288
|
+
|
|
289
|
+
extra_body: Add additional JSON properties to the request
|
|
290
|
+
|
|
291
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
334
292
|
"""
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
)
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
Literal["dedicated", "serverless"],
|
|
343
|
-
Literal["on-demand", "reserved"],
|
|
344
|
-
bool,
|
|
345
|
-
],
|
|
346
|
-
] = {}
|
|
347
|
-
if type is not None:
|
|
348
|
-
params["type"] = type
|
|
349
|
-
if usage_type is not None:
|
|
350
|
-
params["usage_type"] = usage_type
|
|
351
|
-
if mine is not None:
|
|
352
|
-
params["mine"] = mine
|
|
353
|
-
|
|
354
|
-
response, _, _ = await requestor.arequest(
|
|
355
|
-
options=TogetherRequest(
|
|
356
|
-
method="GET",
|
|
357
|
-
url="endpoints",
|
|
358
|
-
params=params,
|
|
293
|
+
if not endpoint_id:
|
|
294
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
295
|
+
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
|
|
296
|
+
return self._delete(
|
|
297
|
+
f"/endpoints/{endpoint_id}",
|
|
298
|
+
options=make_request_options(
|
|
299
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
359
300
|
),
|
|
360
|
-
|
|
301
|
+
cast_to=NoneType,
|
|
361
302
|
)
|
|
362
303
|
|
|
363
|
-
|
|
364
|
-
|
|
304
|
+
def list_avzones(
|
|
305
|
+
self,
|
|
306
|
+
*,
|
|
307
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
308
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
309
|
+
extra_headers: Headers | None = None,
|
|
310
|
+
extra_query: Query | None = None,
|
|
311
|
+
extra_body: Body | None = None,
|
|
312
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
313
|
+
) -> EndpointListAvzonesResponse:
|
|
314
|
+
"""List all available availability zones."""
|
|
315
|
+
return self._get(
|
|
316
|
+
"/clusters/availability-zones",
|
|
317
|
+
options=make_request_options(
|
|
318
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
319
|
+
),
|
|
320
|
+
cast_to=EndpointListAvzonesResponse,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class AsyncEndpointsResource(AsyncAPIResource):
|
|
325
|
+
@cached_property
|
|
326
|
+
def with_raw_response(self) -> AsyncEndpointsResourceWithRawResponse:
|
|
327
|
+
"""
|
|
328
|
+
This property can be used as a prefix for any HTTP method call to return
|
|
329
|
+
the raw response object instead of the parsed content.
|
|
330
|
+
|
|
331
|
+
For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
|
|
332
|
+
"""
|
|
333
|
+
return AsyncEndpointsResourceWithRawResponse(self)
|
|
365
334
|
|
|
366
|
-
|
|
335
|
+
@cached_property
|
|
336
|
+
def with_streaming_response(self) -> AsyncEndpointsResourceWithStreamingResponse:
|
|
337
|
+
"""
|
|
338
|
+
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
339
|
+
|
|
340
|
+
For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
|
|
341
|
+
"""
|
|
342
|
+
return AsyncEndpointsResourceWithStreamingResponse(self)
|
|
367
343
|
|
|
368
344
|
async def create(
|
|
369
345
|
self,
|
|
370
346
|
*,
|
|
371
|
-
|
|
347
|
+
autoscaling: AutoscalingParam,
|
|
372
348
|
hardware: str,
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
349
|
+
model: str,
|
|
350
|
+
availability_zone: str | Omit = omit,
|
|
351
|
+
disable_prompt_cache: bool | Omit = omit,
|
|
352
|
+
disable_speculative_decoding: bool | Omit = omit,
|
|
353
|
+
display_name: str | Omit = omit,
|
|
354
|
+
inactive_timeout: Optional[int] | Omit = omit,
|
|
355
|
+
state: Literal["STARTED", "STOPPED"] | Omit = omit,
|
|
356
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
357
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
358
|
+
extra_headers: Headers | None = None,
|
|
359
|
+
extra_query: Query | None = None,
|
|
360
|
+
extra_body: Body | None = None,
|
|
361
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
381
362
|
) -> DedicatedEndpoint:
|
|
382
|
-
"""
|
|
383
|
-
|
|
363
|
+
"""Creates a new dedicated endpoint for serving models.
|
|
364
|
+
|
|
365
|
+
The endpoint will
|
|
366
|
+
automatically start after creation. You can deploy any supported model on
|
|
367
|
+
hardware configurations that meet the model's requirements.
|
|
384
368
|
|
|
385
369
|
Args:
|
|
386
|
-
|
|
387
|
-
hardware (str): The hardware configuration to use for this endpoint
|
|
388
|
-
min_replicas (int): The minimum number of replicas to maintain
|
|
389
|
-
max_replicas (int): The maximum number of replicas to scale up to
|
|
390
|
-
display_name (str, optional): A human-readable name for the endpoint
|
|
391
|
-
disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
|
|
392
|
-
disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
|
|
393
|
-
state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
|
|
394
|
-
inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
|
|
395
|
-
|
|
396
|
-
Returns:
|
|
397
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
398
|
-
"""
|
|
399
|
-
if disable_prompt_cache:
|
|
400
|
-
warnings.warn(
|
|
401
|
-
"The 'disable_prompt_cache' parameter (CLI flag: '--no-prompt-cache') is deprecated and will be removed in a future version.",
|
|
402
|
-
stacklevel=2,
|
|
403
|
-
)
|
|
404
|
-
|
|
405
|
-
requestor = api_requestor.APIRequestor(
|
|
406
|
-
client=self._client,
|
|
407
|
-
)
|
|
408
|
-
|
|
409
|
-
data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
|
|
410
|
-
"model": model,
|
|
411
|
-
"hardware": hardware,
|
|
412
|
-
"autoscaling": {
|
|
413
|
-
"min_replicas": min_replicas,
|
|
414
|
-
"max_replicas": max_replicas,
|
|
415
|
-
},
|
|
416
|
-
"disable_prompt_cache": disable_prompt_cache,
|
|
417
|
-
"disable_speculative_decoding": disable_speculative_decoding,
|
|
418
|
-
"state": state,
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
if display_name is not None:
|
|
422
|
-
data["display_name"] = display_name
|
|
423
|
-
|
|
424
|
-
if inactive_timeout is not None:
|
|
425
|
-
data["inactive_timeout"] = inactive_timeout
|
|
426
|
-
|
|
427
|
-
if availability_zone is not None:
|
|
428
|
-
data["availability_zone"] = availability_zone
|
|
429
|
-
|
|
430
|
-
response, _, _ = await requestor.arequest(
|
|
431
|
-
options=TogetherRequest(
|
|
432
|
-
method="POST",
|
|
433
|
-
url="endpoints",
|
|
434
|
-
params=data,
|
|
435
|
-
),
|
|
436
|
-
stream=False,
|
|
437
|
-
)
|
|
370
|
+
autoscaling: Configuration for automatic scaling of the endpoint
|
|
438
371
|
|
|
439
|
-
|
|
372
|
+
hardware: The hardware configuration to use for this endpoint
|
|
440
373
|
|
|
441
|
-
|
|
374
|
+
model: The model to deploy on this endpoint
|
|
442
375
|
|
|
443
|
-
|
|
444
|
-
"""
|
|
445
|
-
Get details of a specific endpoint.
|
|
376
|
+
availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
|
|
446
377
|
|
|
447
|
-
|
|
448
|
-
endpoint_id (str): ID of the endpoint to retrieve
|
|
378
|
+
disable_prompt_cache: Whether to disable the prompt cache for this endpoint
|
|
449
379
|
|
|
450
|
-
|
|
451
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
452
|
-
"""
|
|
453
|
-
requestor = api_requestor.APIRequestor(
|
|
454
|
-
client=self._client,
|
|
455
|
-
)
|
|
380
|
+
disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
|
|
456
381
|
|
|
457
|
-
|
|
458
|
-
options=TogetherRequest(
|
|
459
|
-
method="GET",
|
|
460
|
-
url=f"endpoints/{endpoint_id}",
|
|
461
|
-
),
|
|
462
|
-
stream=False,
|
|
463
|
-
)
|
|
382
|
+
display_name: A human-readable name for the endpoint
|
|
464
383
|
|
|
465
|
-
|
|
384
|
+
inactive_timeout: The number of minutes of inactivity after which the endpoint will be
|
|
385
|
+
automatically stopped. Set to null, omit or set to 0 to disable automatic
|
|
386
|
+
timeout.
|
|
466
387
|
|
|
467
|
-
|
|
388
|
+
state: The desired state of the endpoint
|
|
468
389
|
|
|
469
|
-
|
|
470
|
-
"""
|
|
471
|
-
Delete a specific endpoint.
|
|
390
|
+
extra_headers: Send extra headers
|
|
472
391
|
|
|
473
|
-
|
|
474
|
-
|
|
392
|
+
extra_query: Add additional query parameters to the request
|
|
393
|
+
|
|
394
|
+
extra_body: Add additional JSON properties to the request
|
|
395
|
+
|
|
396
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
475
397
|
"""
|
|
476
|
-
|
|
477
|
-
|
|
398
|
+
return await self._post(
|
|
399
|
+
"/endpoints",
|
|
400
|
+
body=await async_maybe_transform(
|
|
401
|
+
{
|
|
402
|
+
"autoscaling": autoscaling,
|
|
403
|
+
"hardware": hardware,
|
|
404
|
+
"model": model,
|
|
405
|
+
"availability_zone": availability_zone,
|
|
406
|
+
"disable_prompt_cache": disable_prompt_cache,
|
|
407
|
+
"disable_speculative_decoding": disable_speculative_decoding,
|
|
408
|
+
"display_name": display_name,
|
|
409
|
+
"inactive_timeout": inactive_timeout,
|
|
410
|
+
"state": state,
|
|
411
|
+
},
|
|
412
|
+
endpoint_create_params.EndpointCreateParams,
|
|
413
|
+
),
|
|
414
|
+
options=make_request_options(
|
|
415
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
416
|
+
),
|
|
417
|
+
cast_to=DedicatedEndpoint,
|
|
478
418
|
)
|
|
479
419
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
420
|
+
async def retrieve(
|
|
421
|
+
self,
|
|
422
|
+
endpoint_id: str,
|
|
423
|
+
*,
|
|
424
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
425
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
426
|
+
extra_headers: Headers | None = None,
|
|
427
|
+
extra_query: Query | None = None,
|
|
428
|
+
extra_body: Body | None = None,
|
|
429
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
430
|
+
) -> DedicatedEndpoint:
|
|
431
|
+
"""
|
|
432
|
+
Retrieves details about a specific endpoint, including its current state,
|
|
433
|
+
configuration, and scaling settings.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
extra_headers: Send extra headers
|
|
437
|
+
|
|
438
|
+
extra_query: Add additional query parameters to the request
|
|
439
|
+
|
|
440
|
+
extra_body: Add additional JSON properties to the request
|
|
441
|
+
|
|
442
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
443
|
+
"""
|
|
444
|
+
if not endpoint_id:
|
|
445
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
446
|
+
return await self._get(
|
|
447
|
+
f"/endpoints/{endpoint_id}",
|
|
448
|
+
options=make_request_options(
|
|
449
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
484
450
|
),
|
|
485
|
-
|
|
451
|
+
cast_to=DedicatedEndpoint,
|
|
486
452
|
)
|
|
487
453
|
|
|
488
454
|
async def update(
|
|
489
455
|
self,
|
|
490
456
|
endpoint_id: str,
|
|
491
457
|
*,
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
458
|
+
autoscaling: AutoscalingParam | Omit = omit,
|
|
459
|
+
display_name: str | Omit = omit,
|
|
460
|
+
inactive_timeout: Optional[int] | Omit = omit,
|
|
461
|
+
state: Literal["STARTED", "STOPPED"] | Omit = omit,
|
|
462
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
463
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
464
|
+
extra_headers: Headers | None = None,
|
|
465
|
+
extra_query: Query | None = None,
|
|
466
|
+
extra_body: Body | None = None,
|
|
467
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
497
468
|
) -> DedicatedEndpoint:
|
|
498
|
-
"""
|
|
499
|
-
|
|
469
|
+
"""Updates an existing endpoint's configuration.
|
|
470
|
+
|
|
471
|
+
You can modify the display name,
|
|
472
|
+
autoscaling settings, or change the endpoint's state (start/stop).
|
|
500
473
|
|
|
501
474
|
Args:
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
475
|
+
autoscaling: New autoscaling configuration for the endpoint
|
|
476
|
+
|
|
477
|
+
display_name: A human-readable name for the endpoint
|
|
478
|
+
|
|
479
|
+
inactive_timeout: The number of minutes of inactivity after which the endpoint will be
|
|
480
|
+
automatically stopped. Set to 0 to disable automatic timeout.
|
|
481
|
+
|
|
482
|
+
state: The desired state of the endpoint
|
|
483
|
+
|
|
484
|
+
extra_headers: Send extra headers
|
|
485
|
+
|
|
486
|
+
extra_query: Add additional query parameters to the request
|
|
487
|
+
|
|
488
|
+
extra_body: Add additional JSON properties to the request
|
|
489
|
+
|
|
490
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
511
491
|
"""
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
"min_replicas": current_min,
|
|
528
|
-
"max_replicas": current_max,
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
if state is not None:
|
|
532
|
-
data["state"] = state
|
|
533
|
-
|
|
534
|
-
if display_name is not None:
|
|
535
|
-
data["display_name"] = display_name
|
|
536
|
-
|
|
537
|
-
if inactive_timeout is not None:
|
|
538
|
-
data["inactive_timeout"] = inactive_timeout
|
|
539
|
-
|
|
540
|
-
response, _, _ = await requestor.arequest(
|
|
541
|
-
options=TogetherRequest(
|
|
542
|
-
method="PATCH",
|
|
543
|
-
url=f"endpoints/{endpoint_id}",
|
|
544
|
-
params=data,
|
|
492
|
+
if not endpoint_id:
|
|
493
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
494
|
+
return await self._patch(
|
|
495
|
+
f"/endpoints/{endpoint_id}",
|
|
496
|
+
body=await async_maybe_transform(
|
|
497
|
+
{
|
|
498
|
+
"autoscaling": autoscaling,
|
|
499
|
+
"display_name": display_name,
|
|
500
|
+
"inactive_timeout": inactive_timeout,
|
|
501
|
+
"state": state,
|
|
502
|
+
},
|
|
503
|
+
endpoint_update_params.EndpointUpdateParams,
|
|
504
|
+
),
|
|
505
|
+
options=make_request_options(
|
|
506
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
545
507
|
),
|
|
546
|
-
|
|
508
|
+
cast_to=DedicatedEndpoint,
|
|
547
509
|
)
|
|
548
510
|
|
|
549
|
-
|
|
511
|
+
async def list(
|
|
512
|
+
self,
|
|
513
|
+
*,
|
|
514
|
+
mine: bool | Omit = omit,
|
|
515
|
+
type: Literal["dedicated", "serverless"] | Omit = omit,
|
|
516
|
+
usage_type: Literal["on-demand", "reserved"] | Omit = omit,
|
|
517
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
518
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
519
|
+
extra_headers: Headers | None = None,
|
|
520
|
+
extra_query: Query | None = None,
|
|
521
|
+
extra_body: Body | None = None,
|
|
522
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
523
|
+
) -> EndpointListResponse:
|
|
524
|
+
"""Returns a list of all endpoints associated with your account.
|
|
525
|
+
|
|
526
|
+
You can filter the
|
|
527
|
+
results by type (dedicated or serverless).
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
mine: If true, return only endpoints owned by the caller
|
|
531
|
+
|
|
532
|
+
type: Filter endpoints by type
|
|
533
|
+
|
|
534
|
+
usage_type: Filter endpoints by usage type
|
|
535
|
+
|
|
536
|
+
extra_headers: Send extra headers
|
|
537
|
+
|
|
538
|
+
extra_query: Add additional query parameters to the request
|
|
550
539
|
|
|
551
|
-
|
|
540
|
+
extra_body: Add additional JSON properties to the request
|
|
552
541
|
|
|
553
|
-
|
|
554
|
-
self, model: Optional[str] = None
|
|
555
|
-
) -> List[HardwareWithStatus]:
|
|
542
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
556
543
|
"""
|
|
557
|
-
|
|
544
|
+
return await self._get(
|
|
545
|
+
"/endpoints",
|
|
546
|
+
options=make_request_options(
|
|
547
|
+
extra_headers=extra_headers,
|
|
548
|
+
extra_query=extra_query,
|
|
549
|
+
extra_body=extra_body,
|
|
550
|
+
timeout=timeout,
|
|
551
|
+
query=await async_maybe_transform(
|
|
552
|
+
{
|
|
553
|
+
"mine": mine,
|
|
554
|
+
"type": type,
|
|
555
|
+
"usage_type": usage_type,
|
|
556
|
+
},
|
|
557
|
+
endpoint_list_params.EndpointListParams,
|
|
558
|
+
),
|
|
559
|
+
),
|
|
560
|
+
cast_to=EndpointListResponse,
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
async def delete(
|
|
564
|
+
self,
|
|
565
|
+
endpoint_id: str,
|
|
566
|
+
*,
|
|
567
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
568
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
569
|
+
extra_headers: Headers | None = None,
|
|
570
|
+
extra_query: Query | None = None,
|
|
571
|
+
extra_body: Body | None = None,
|
|
572
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
573
|
+
) -> None:
|
|
574
|
+
"""Permanently deletes an endpoint.
|
|
575
|
+
|
|
576
|
+
This action cannot be undone.
|
|
558
577
|
|
|
559
578
|
Args:
|
|
560
|
-
|
|
561
|
-
|
|
579
|
+
extra_headers: Send extra headers
|
|
580
|
+
|
|
581
|
+
extra_query: Add additional query parameters to the request
|
|
582
|
+
|
|
583
|
+
extra_body: Add additional JSON properties to the request
|
|
562
584
|
|
|
563
|
-
|
|
564
|
-
List[HardwareWithStatus]: List of hardware configurations with their status
|
|
585
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
565
586
|
"""
|
|
566
|
-
|
|
567
|
-
|
|
587
|
+
if not endpoint_id:
|
|
588
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
589
|
+
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
|
|
590
|
+
return await self._delete(
|
|
591
|
+
f"/endpoints/{endpoint_id}",
|
|
592
|
+
options=make_request_options(
|
|
593
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
594
|
+
),
|
|
595
|
+
cast_to=NoneType,
|
|
568
596
|
)
|
|
569
597
|
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
598
|
+
async def list_avzones(
|
|
599
|
+
self,
|
|
600
|
+
*,
|
|
601
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
602
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
603
|
+
extra_headers: Headers | None = None,
|
|
604
|
+
extra_query: Query | None = None,
|
|
605
|
+
extra_body: Body | None = None,
|
|
606
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
607
|
+
) -> EndpointListAvzonesResponse:
|
|
608
|
+
"""List all available availability zones."""
|
|
609
|
+
return await self._get(
|
|
610
|
+
"/clusters/availability-zones",
|
|
611
|
+
options=make_request_options(
|
|
612
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
579
613
|
),
|
|
580
|
-
|
|
614
|
+
cast_to=EndpointListAvzonesResponse,
|
|
581
615
|
)
|
|
582
616
|
|
|
583
|
-
assert isinstance(response, TogetherResponse)
|
|
584
|
-
assert isinstance(response.data, dict)
|
|
585
|
-
assert isinstance(response.data["data"], list)
|
|
586
617
|
|
|
587
|
-
|
|
618
|
+
class EndpointsResourceWithRawResponse:
|
|
619
|
+
def __init__(self, endpoints: EndpointsResource) -> None:
|
|
620
|
+
self._endpoints = endpoints
|
|
588
621
|
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
622
|
+
self.create = to_raw_response_wrapper(
|
|
623
|
+
endpoints.create,
|
|
624
|
+
)
|
|
625
|
+
self.retrieve = to_raw_response_wrapper(
|
|
626
|
+
endpoints.retrieve,
|
|
627
|
+
)
|
|
628
|
+
self.update = to_raw_response_wrapper(
|
|
629
|
+
endpoints.update,
|
|
630
|
+
)
|
|
631
|
+
self.list = to_raw_response_wrapper(
|
|
632
|
+
endpoints.list,
|
|
633
|
+
)
|
|
634
|
+
self.delete = to_raw_response_wrapper(
|
|
635
|
+
endpoints.delete,
|
|
636
|
+
)
|
|
637
|
+
self.list_avzones = to_raw_response_wrapper(
|
|
638
|
+
endpoints.list_avzones,
|
|
639
|
+
)
|
|
592
640
|
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
641
|
+
|
|
642
|
+
class AsyncEndpointsResourceWithRawResponse:
|
|
643
|
+
def __init__(self, endpoints: AsyncEndpointsResource) -> None:
|
|
644
|
+
self._endpoints = endpoints
|
|
645
|
+
|
|
646
|
+
self.create = async_to_raw_response_wrapper(
|
|
647
|
+
endpoints.create,
|
|
648
|
+
)
|
|
649
|
+
self.retrieve = async_to_raw_response_wrapper(
|
|
650
|
+
endpoints.retrieve,
|
|
651
|
+
)
|
|
652
|
+
self.update = async_to_raw_response_wrapper(
|
|
653
|
+
endpoints.update,
|
|
654
|
+
)
|
|
655
|
+
self.list = async_to_raw_response_wrapper(
|
|
656
|
+
endpoints.list,
|
|
657
|
+
)
|
|
658
|
+
self.delete = async_to_raw_response_wrapper(
|
|
659
|
+
endpoints.delete,
|
|
660
|
+
)
|
|
661
|
+
self.list_avzones = async_to_raw_response_wrapper(
|
|
662
|
+
endpoints.list_avzones,
|
|
598
663
|
)
|
|
599
664
|
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
665
|
+
|
|
666
|
+
class EndpointsResourceWithStreamingResponse:
|
|
667
|
+
def __init__(self, endpoints: EndpointsResource) -> None:
|
|
668
|
+
self._endpoints = endpoints
|
|
669
|
+
|
|
670
|
+
self.create = to_streamed_response_wrapper(
|
|
671
|
+
endpoints.create,
|
|
672
|
+
)
|
|
673
|
+
self.retrieve = to_streamed_response_wrapper(
|
|
674
|
+
endpoints.retrieve,
|
|
675
|
+
)
|
|
676
|
+
self.update = to_streamed_response_wrapper(
|
|
677
|
+
endpoints.update,
|
|
678
|
+
)
|
|
679
|
+
self.list = to_streamed_response_wrapper(
|
|
680
|
+
endpoints.list,
|
|
681
|
+
)
|
|
682
|
+
self.delete = to_streamed_response_wrapper(
|
|
683
|
+
endpoints.delete,
|
|
684
|
+
)
|
|
685
|
+
self.list_avzones = to_streamed_response_wrapper(
|
|
686
|
+
endpoints.list_avzones,
|
|
606
687
|
)
|
|
607
688
|
|
|
608
|
-
assert isinstance(response, TogetherResponse)
|
|
609
|
-
assert isinstance(response.data, dict)
|
|
610
|
-
assert isinstance(response.data["avzones"], list)
|
|
611
689
|
|
|
612
|
-
|
|
690
|
+
class AsyncEndpointsResourceWithStreamingResponse:
|
|
691
|
+
def __init__(self, endpoints: AsyncEndpointsResource) -> None:
|
|
692
|
+
self._endpoints = endpoints
|
|
693
|
+
|
|
694
|
+
self.create = async_to_streamed_response_wrapper(
|
|
695
|
+
endpoints.create,
|
|
696
|
+
)
|
|
697
|
+
self.retrieve = async_to_streamed_response_wrapper(
|
|
698
|
+
endpoints.retrieve,
|
|
699
|
+
)
|
|
700
|
+
self.update = async_to_streamed_response_wrapper(
|
|
701
|
+
endpoints.update,
|
|
702
|
+
)
|
|
703
|
+
self.list = async_to_streamed_response_wrapper(
|
|
704
|
+
endpoints.list,
|
|
705
|
+
)
|
|
706
|
+
self.delete = async_to_streamed_response_wrapper(
|
|
707
|
+
endpoints.delete,
|
|
708
|
+
)
|
|
709
|
+
self.list_avzones = async_to_streamed_response_wrapper(
|
|
710
|
+
endpoints.list_avzones,
|
|
711
|
+
)
|