together 1.5.34__py3-none-any.whl → 2.0.0a6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/__init__.py +101 -114
- together/_base_client.py +1995 -0
- together/_client.py +1033 -0
- together/_compat.py +219 -0
- together/_constants.py +14 -0
- together/_exceptions.py +108 -0
- together/_files.py +123 -0
- together/_models.py +857 -0
- together/_qs.py +150 -0
- together/_resource.py +43 -0
- together/_response.py +830 -0
- together/_streaming.py +370 -0
- together/_types.py +260 -0
- together/_utils/__init__.py +64 -0
- together/_utils/_compat.py +45 -0
- together/_utils/_datetime_parse.py +136 -0
- together/_utils/_logs.py +25 -0
- together/_utils/_proxy.py +65 -0
- together/_utils/_reflection.py +42 -0
- together/_utils/_resources_proxy.py +24 -0
- together/_utils/_streams.py +12 -0
- together/_utils/_sync.py +58 -0
- together/_utils/_transform.py +457 -0
- together/_utils/_typing.py +156 -0
- together/_utils/_utils.py +421 -0
- together/_version.py +4 -0
- together/lib/.keep +4 -0
- together/lib/__init__.py +23 -0
- together/{cli → lib/cli}/api/endpoints.py +65 -81
- together/{cli/api/evaluation.py → lib/cli/api/evals.py} +152 -43
- together/{cli → lib/cli}/api/files.py +20 -17
- together/{cli/api/finetune.py → lib/cli/api/fine_tuning.py} +116 -172
- together/{cli → lib/cli}/api/models.py +34 -27
- together/lib/cli/api/utils.py +50 -0
- together/{cli → lib/cli}/cli.py +16 -26
- together/{constants.py → lib/constants.py} +11 -24
- together/lib/resources/__init__.py +11 -0
- together/lib/resources/files.py +999 -0
- together/lib/resources/fine_tuning.py +280 -0
- together/lib/resources/models.py +35 -0
- together/lib/types/__init__.py +13 -0
- together/lib/types/error.py +9 -0
- together/lib/types/fine_tuning.py +397 -0
- together/{utils → lib/utils}/__init__.py +6 -14
- together/{utils → lib/utils}/_log.py +11 -16
- together/{utils → lib/utils}/files.py +90 -288
- together/lib/utils/serializer.py +10 -0
- together/{utils → lib/utils}/tools.py +19 -55
- together/resources/__init__.py +225 -39
- together/resources/audio/__init__.py +72 -48
- together/resources/audio/audio.py +198 -0
- together/resources/audio/speech.py +574 -128
- together/resources/audio/transcriptions.py +247 -261
- together/resources/audio/translations.py +221 -241
- together/resources/audio/voices.py +111 -41
- together/resources/batches.py +417 -0
- together/resources/chat/__init__.py +30 -21
- together/resources/chat/chat.py +102 -0
- together/resources/chat/completions.py +1063 -263
- together/resources/code_interpreter/__init__.py +33 -0
- together/resources/code_interpreter/code_interpreter.py +258 -0
- together/resources/code_interpreter/sessions.py +135 -0
- together/resources/completions.py +884 -225
- together/resources/embeddings.py +172 -68
- together/resources/endpoints.py +589 -477
- together/resources/evals.py +452 -0
- together/resources/files.py +397 -129
- together/resources/fine_tuning.py +1033 -0
- together/resources/hardware.py +181 -0
- together/resources/images.py +258 -104
- together/resources/jobs.py +214 -0
- together/resources/models.py +223 -193
- together/resources/rerank.py +190 -92
- together/resources/videos.py +286 -214
- together/types/__init__.py +66 -167
- together/types/audio/__init__.py +10 -0
- together/types/audio/speech_create_params.py +75 -0
- together/types/audio/transcription_create_params.py +54 -0
- together/types/audio/transcription_create_response.py +111 -0
- together/types/audio/translation_create_params.py +40 -0
- together/types/audio/translation_create_response.py +70 -0
- together/types/audio/voice_list_response.py +23 -0
- together/types/audio_speech_stream_chunk.py +16 -0
- together/types/autoscaling.py +13 -0
- together/types/autoscaling_param.py +15 -0
- together/types/batch_create_params.py +24 -0
- together/types/batch_create_response.py +14 -0
- together/types/batch_job.py +45 -0
- together/types/batch_list_response.py +10 -0
- together/types/chat/__init__.py +18 -0
- together/types/chat/chat_completion.py +60 -0
- together/types/chat/chat_completion_chunk.py +61 -0
- together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
- together/types/chat/chat_completion_structured_message_text_param.py +13 -0
- together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
- together/types/chat/chat_completion_usage.py +13 -0
- together/types/chat/chat_completion_warning.py +9 -0
- together/types/chat/completion_create_params.py +329 -0
- together/types/code_interpreter/__init__.py +5 -0
- together/types/code_interpreter/session_list_response.py +31 -0
- together/types/code_interpreter_execute_params.py +45 -0
- together/types/completion.py +42 -0
- together/types/completion_chunk.py +66 -0
- together/types/completion_create_params.py +138 -0
- together/types/dedicated_endpoint.py +44 -0
- together/types/embedding.py +24 -0
- together/types/embedding_create_params.py +31 -0
- together/types/endpoint_create_params.py +43 -0
- together/types/endpoint_list_avzones_response.py +11 -0
- together/types/endpoint_list_params.py +18 -0
- together/types/endpoint_list_response.py +41 -0
- together/types/endpoint_update_params.py +27 -0
- together/types/eval_create_params.py +263 -0
- together/types/eval_create_response.py +16 -0
- together/types/eval_list_params.py +21 -0
- together/types/eval_list_response.py +10 -0
- together/types/eval_status_response.py +100 -0
- together/types/evaluation_job.py +139 -0
- together/types/execute_response.py +108 -0
- together/types/file_delete_response.py +13 -0
- together/types/file_list.py +12 -0
- together/types/file_purpose.py +9 -0
- together/types/file_response.py +31 -0
- together/types/file_type.py +7 -0
- together/types/fine_tuning_cancel_response.py +194 -0
- together/types/fine_tuning_content_params.py +24 -0
- together/types/fine_tuning_delete_params.py +11 -0
- together/types/fine_tuning_delete_response.py +12 -0
- together/types/fine_tuning_list_checkpoints_response.py +21 -0
- together/types/fine_tuning_list_events_response.py +12 -0
- together/types/fine_tuning_list_response.py +199 -0
- together/types/finetune_event.py +41 -0
- together/types/finetune_event_type.py +33 -0
- together/types/finetune_response.py +177 -0
- together/types/hardware_list_params.py +16 -0
- together/types/hardware_list_response.py +58 -0
- together/types/image_data_b64.py +15 -0
- together/types/image_data_url.py +15 -0
- together/types/image_file.py +23 -0
- together/types/image_generate_params.py +85 -0
- together/types/job_list_response.py +47 -0
- together/types/job_retrieve_response.py +43 -0
- together/types/log_probs.py +18 -0
- together/types/model_list_response.py +10 -0
- together/types/model_object.py +42 -0
- together/types/model_upload_params.py +36 -0
- together/types/model_upload_response.py +23 -0
- together/types/rerank_create_params.py +36 -0
- together/types/rerank_create_response.py +36 -0
- together/types/tool_choice.py +23 -0
- together/types/tool_choice_param.py +23 -0
- together/types/tools_param.py +23 -0
- together/types/training_method_dpo.py +22 -0
- together/types/training_method_sft.py +18 -0
- together/types/video_create_params.py +86 -0
- together/types/video_create_response.py +10 -0
- together/types/video_job.py +57 -0
- together-2.0.0a6.dist-info/METADATA +729 -0
- together-2.0.0a6.dist-info/RECORD +165 -0
- {together-1.5.34.dist-info → together-2.0.0a6.dist-info}/WHEEL +1 -1
- together-2.0.0a6.dist-info/entry_points.txt +2 -0
- {together-1.5.34.dist-info → together-2.0.0a6.dist-info}/licenses/LICENSE +1 -1
- together/abstract/api_requestor.py +0 -770
- together/cli/api/chat.py +0 -298
- together/cli/api/completions.py +0 -119
- together/cli/api/images.py +0 -93
- together/cli/api/utils.py +0 -139
- together/client.py +0 -186
- together/error.py +0 -194
- together/filemanager.py +0 -635
- together/legacy/__init__.py +0 -0
- together/legacy/base.py +0 -27
- together/legacy/complete.py +0 -93
- together/legacy/embeddings.py +0 -27
- together/legacy/files.py +0 -146
- together/legacy/finetune.py +0 -177
- together/legacy/images.py +0 -27
- together/legacy/models.py +0 -44
- together/resources/batch.py +0 -165
- together/resources/code_interpreter.py +0 -82
- together/resources/evaluation.py +0 -808
- together/resources/finetune.py +0 -1388
- together/together_response.py +0 -50
- together/types/abstract.py +0 -26
- together/types/audio_speech.py +0 -311
- together/types/batch.py +0 -54
- together/types/chat_completions.py +0 -210
- together/types/code_interpreter.py +0 -57
- together/types/common.py +0 -67
- together/types/completions.py +0 -107
- together/types/embeddings.py +0 -35
- together/types/endpoints.py +0 -123
- together/types/error.py +0 -16
- together/types/evaluation.py +0 -93
- together/types/files.py +0 -93
- together/types/finetune.py +0 -464
- together/types/images.py +0 -42
- together/types/models.py +0 -96
- together/types/rerank.py +0 -43
- together/types/videos.py +0 -69
- together/utils/api_helpers.py +0 -124
- together/version.py +0 -6
- together-1.5.34.dist-info/METADATA +0 -583
- together-1.5.34.dist-info/RECORD +0 -77
- together-1.5.34.dist-info/entry_points.txt +0 -3
- /together/{abstract → lib/cli}/__init__.py +0 -0
- /together/{cli → lib/cli/api}/__init__.py +0 -0
- /together/{cli/api/__init__.py → py.typed} +0 -0
together/resources/endpoints.py
CHANGED
|
@@ -1,599 +1,711 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
from typing import Dict, List, Literal, Optional, Union
|
|
4
|
-
|
|
5
|
-
from together.abstract import api_requestor
|
|
6
|
-
from together.together_response import TogetherResponse
|
|
7
|
-
from together.types import TogetherClient, TogetherRequest
|
|
8
|
-
from together.types.endpoints import DedicatedEndpoint, HardwareWithStatus, ListEndpoint
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
9
2
|
|
|
3
|
+
from __future__ import annotations
|
|
10
4
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from typing_extensions import Literal
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from ..types import endpoint_list_params, endpoint_create_params, endpoint_update_params
|
|
11
|
+
from .._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
|
|
12
|
+
from .._utils import maybe_transform, async_maybe_transform
|
|
13
|
+
from .._compat import cached_property
|
|
14
|
+
from .._resource import SyncAPIResource, AsyncAPIResource
|
|
15
|
+
from .._response import (
|
|
16
|
+
to_raw_response_wrapper,
|
|
17
|
+
to_streamed_response_wrapper,
|
|
18
|
+
async_to_raw_response_wrapper,
|
|
19
|
+
async_to_streamed_response_wrapper,
|
|
20
|
+
)
|
|
21
|
+
from .._base_client import make_request_options
|
|
22
|
+
from ..types.autoscaling_param import AutoscalingParam
|
|
23
|
+
from ..types.dedicated_endpoint import DedicatedEndpoint
|
|
24
|
+
from ..types.endpoint_list_response import EndpointListResponse
|
|
25
|
+
from ..types.endpoint_list_avzones_response import EndpointListAvzonesResponse
|
|
26
|
+
|
|
27
|
+
__all__ = ["EndpointsResource", "AsyncEndpointsResource"]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class EndpointsResource(SyncAPIResource):
|
|
31
|
+
@cached_property
|
|
32
|
+
def with_raw_response(self) -> EndpointsResourceWithRawResponse:
|
|
21
33
|
"""
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
Args:
|
|
25
|
-
type (str, optional): Filter endpoints by endpoint type ("dedicated" or "serverless"). Defaults to None.
|
|
26
|
-
usage_type (str, optional): Filter endpoints by usage type ("on-demand" or "reserved"). Defaults to None.
|
|
27
|
-
mine (bool, optional): If True, return only endpoints owned by the caller. Defaults to None.
|
|
34
|
+
This property can be used as a prefix for any HTTP method call to return
|
|
35
|
+
the raw response object instead of the parsed content.
|
|
28
36
|
|
|
29
|
-
|
|
30
|
-
List[ListEndpoint]: List of endpoint objects
|
|
37
|
+
For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
|
|
31
38
|
"""
|
|
32
|
-
|
|
33
|
-
client=self._client,
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
params: Dict[
|
|
37
|
-
str,
|
|
38
|
-
Union[
|
|
39
|
-
Literal["dedicated", "serverless"],
|
|
40
|
-
Literal["on-demand", "reserved"],
|
|
41
|
-
bool,
|
|
42
|
-
],
|
|
43
|
-
] = {}
|
|
44
|
-
if type is not None:
|
|
45
|
-
params["type"] = type
|
|
46
|
-
if usage_type is not None:
|
|
47
|
-
params["usage_type"] = usage_type
|
|
48
|
-
if mine is not None:
|
|
49
|
-
params["mine"] = mine
|
|
50
|
-
|
|
51
|
-
response, _, _ = requestor.request(
|
|
52
|
-
options=TogetherRequest(
|
|
53
|
-
method="GET",
|
|
54
|
-
url="endpoints",
|
|
55
|
-
params=params,
|
|
56
|
-
),
|
|
57
|
-
stream=False,
|
|
58
|
-
)
|
|
39
|
+
return EndpointsResourceWithRawResponse(self)
|
|
59
40
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
41
|
+
@cached_property
|
|
42
|
+
def with_streaming_response(self) -> EndpointsResourceWithStreamingResponse:
|
|
43
|
+
"""
|
|
44
|
+
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
64
45
|
|
|
65
|
-
|
|
46
|
+
For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
|
|
47
|
+
"""
|
|
48
|
+
return EndpointsResourceWithStreamingResponse(self)
|
|
66
49
|
|
|
67
50
|
def create(
|
|
68
51
|
self,
|
|
69
52
|
*,
|
|
70
|
-
|
|
53
|
+
autoscaling: AutoscalingParam,
|
|
71
54
|
hardware: str,
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
55
|
+
model: str,
|
|
56
|
+
availability_zone: str | Omit = omit,
|
|
57
|
+
disable_prompt_cache: bool | Omit = omit,
|
|
58
|
+
disable_speculative_decoding: bool | Omit = omit,
|
|
59
|
+
display_name: str | Omit = omit,
|
|
60
|
+
inactive_timeout: Optional[int] | Omit = omit,
|
|
61
|
+
state: Literal["STARTED", "STOPPED"] | Omit = omit,
|
|
62
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
63
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
64
|
+
extra_headers: Headers | None = None,
|
|
65
|
+
extra_query: Query | None = None,
|
|
66
|
+
extra_body: Body | None = None,
|
|
67
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
80
68
|
) -> DedicatedEndpoint:
|
|
81
|
-
"""
|
|
82
|
-
|
|
69
|
+
"""Creates a new dedicated endpoint for serving models.
|
|
70
|
+
|
|
71
|
+
The endpoint will
|
|
72
|
+
automatically start after creation. You can deploy any supported model on
|
|
73
|
+
hardware configurations that meet the model's requirements.
|
|
83
74
|
|
|
84
75
|
Args:
|
|
85
|
-
|
|
86
|
-
hardware (str): The hardware configuration to use for this endpoint
|
|
87
|
-
min_replicas (int): The minimum number of replicas to maintain
|
|
88
|
-
max_replicas (int): The maximum number of replicas to scale up to
|
|
89
|
-
display_name (str, optional): A human-readable name for the endpoint
|
|
90
|
-
disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
|
|
91
|
-
disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
|
|
92
|
-
state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
|
|
93
|
-
inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
|
|
94
|
-
availability_zone (str, optional): Start endpoint in specified availability zone (e.g., us-central-4b).
|
|
95
|
-
|
|
96
|
-
Returns:
|
|
97
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
98
|
-
"""
|
|
99
|
-
requestor = api_requestor.APIRequestor(
|
|
100
|
-
client=self._client,
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
|
|
104
|
-
"model": model,
|
|
105
|
-
"hardware": hardware,
|
|
106
|
-
"autoscaling": {
|
|
107
|
-
"min_replicas": min_replicas,
|
|
108
|
-
"max_replicas": max_replicas,
|
|
109
|
-
},
|
|
110
|
-
"disable_prompt_cache": disable_prompt_cache,
|
|
111
|
-
"disable_speculative_decoding": disable_speculative_decoding,
|
|
112
|
-
"state": state,
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
if display_name is not None:
|
|
116
|
-
data["display_name"] = display_name
|
|
117
|
-
|
|
118
|
-
if inactive_timeout is not None:
|
|
119
|
-
data["inactive_timeout"] = inactive_timeout
|
|
120
|
-
|
|
121
|
-
if availability_zone is not None:
|
|
122
|
-
data["availability_zone"] = availability_zone
|
|
123
|
-
|
|
124
|
-
response, _, _ = requestor.request(
|
|
125
|
-
options=TogetherRequest(
|
|
126
|
-
method="POST",
|
|
127
|
-
url="endpoints",
|
|
128
|
-
params=data,
|
|
129
|
-
),
|
|
130
|
-
stream=False,
|
|
131
|
-
)
|
|
76
|
+
autoscaling: Configuration for automatic scaling of the endpoint
|
|
132
77
|
|
|
133
|
-
|
|
78
|
+
hardware: The hardware configuration to use for this endpoint
|
|
134
79
|
|
|
135
|
-
|
|
80
|
+
model: The model to deploy on this endpoint
|
|
136
81
|
|
|
137
|
-
|
|
138
|
-
"""
|
|
139
|
-
Get details of a specific endpoint.
|
|
82
|
+
availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
|
|
140
83
|
|
|
141
|
-
|
|
142
|
-
endpoint_id (str): ID of the endpoint to retrieve
|
|
84
|
+
disable_prompt_cache: Whether to disable the prompt cache for this endpoint
|
|
143
85
|
|
|
144
|
-
|
|
145
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
146
|
-
"""
|
|
147
|
-
requestor = api_requestor.APIRequestor(
|
|
148
|
-
client=self._client,
|
|
149
|
-
)
|
|
86
|
+
disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
|
|
150
87
|
|
|
151
|
-
|
|
152
|
-
options=TogetherRequest(
|
|
153
|
-
method="GET",
|
|
154
|
-
url=f"endpoints/{endpoint_id}",
|
|
155
|
-
),
|
|
156
|
-
stream=False,
|
|
157
|
-
)
|
|
88
|
+
display_name: A human-readable name for the endpoint
|
|
158
89
|
|
|
159
|
-
|
|
90
|
+
inactive_timeout: The number of minutes of inactivity after which the endpoint will be
|
|
91
|
+
automatically stopped. Set to null, omit or set to 0 to disable automatic
|
|
92
|
+
timeout.
|
|
160
93
|
|
|
161
|
-
|
|
94
|
+
state: The desired state of the endpoint
|
|
162
95
|
|
|
163
|
-
|
|
164
|
-
"""
|
|
165
|
-
Delete a specific endpoint.
|
|
96
|
+
extra_headers: Send extra headers
|
|
166
97
|
|
|
167
|
-
|
|
168
|
-
endpoint_id (str): ID of the endpoint to delete
|
|
169
|
-
"""
|
|
170
|
-
requestor = api_requestor.APIRequestor(
|
|
171
|
-
client=self._client,
|
|
172
|
-
)
|
|
98
|
+
extra_query: Add additional query parameters to the request
|
|
173
99
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
100
|
+
extra_body: Add additional JSON properties to the request
|
|
101
|
+
|
|
102
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
103
|
+
"""
|
|
104
|
+
return self._post(
|
|
105
|
+
"/endpoints",
|
|
106
|
+
body=maybe_transform(
|
|
107
|
+
{
|
|
108
|
+
"autoscaling": autoscaling,
|
|
109
|
+
"hardware": hardware,
|
|
110
|
+
"model": model,
|
|
111
|
+
"availability_zone": availability_zone,
|
|
112
|
+
"disable_prompt_cache": disable_prompt_cache,
|
|
113
|
+
"disable_speculative_decoding": disable_speculative_decoding,
|
|
114
|
+
"display_name": display_name,
|
|
115
|
+
"inactive_timeout": inactive_timeout,
|
|
116
|
+
"state": state,
|
|
117
|
+
},
|
|
118
|
+
endpoint_create_params.EndpointCreateParams,
|
|
178
119
|
),
|
|
179
|
-
|
|
120
|
+
options=make_request_options(
|
|
121
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
122
|
+
),
|
|
123
|
+
cast_to=DedicatedEndpoint,
|
|
180
124
|
)
|
|
181
125
|
|
|
182
|
-
def
|
|
126
|
+
def retrieve(
|
|
183
127
|
self,
|
|
184
128
|
endpoint_id: str,
|
|
185
129
|
*,
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
130
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
131
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
132
|
+
extra_headers: Headers | None = None,
|
|
133
|
+
extra_query: Query | None = None,
|
|
134
|
+
extra_body: Body | None = None,
|
|
135
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
191
136
|
) -> DedicatedEndpoint:
|
|
192
137
|
"""
|
|
193
|
-
|
|
138
|
+
Retrieves details about a specific endpoint, including its current state,
|
|
139
|
+
configuration, and scaling settings.
|
|
194
140
|
|
|
195
141
|
Args:
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
Returns:
|
|
204
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
142
|
+
extra_headers: Send extra headers
|
|
143
|
+
|
|
144
|
+
extra_query: Add additional query parameters to the request
|
|
145
|
+
|
|
146
|
+
extra_body: Add additional JSON properties to the request
|
|
147
|
+
|
|
148
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
205
149
|
"""
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
if min_replicas is not None or max_replicas is not None:
|
|
213
|
-
current_min = min_replicas
|
|
214
|
-
current_max = max_replicas
|
|
215
|
-
if current_min is None or current_max is None:
|
|
216
|
-
# Get current values if only one is specified
|
|
217
|
-
current = self.get(endpoint_id=endpoint_id)
|
|
218
|
-
current_min = current_min or current.autoscaling.min_replicas
|
|
219
|
-
current_max = current_max or current.autoscaling.max_replicas
|
|
220
|
-
data["autoscaling"] = {
|
|
221
|
-
"min_replicas": current_min,
|
|
222
|
-
"max_replicas": current_max,
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
if state is not None:
|
|
226
|
-
data["state"] = state
|
|
227
|
-
|
|
228
|
-
if display_name is not None:
|
|
229
|
-
data["display_name"] = display_name
|
|
230
|
-
|
|
231
|
-
if inactive_timeout is not None:
|
|
232
|
-
data["inactive_timeout"] = inactive_timeout
|
|
233
|
-
|
|
234
|
-
response, _, _ = requestor.request(
|
|
235
|
-
options=TogetherRequest(
|
|
236
|
-
method="PATCH",
|
|
237
|
-
url=f"endpoints/{endpoint_id}",
|
|
238
|
-
params=data,
|
|
150
|
+
if not endpoint_id:
|
|
151
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
152
|
+
return self._get(
|
|
153
|
+
f"/endpoints/{endpoint_id}",
|
|
154
|
+
options=make_request_options(
|
|
155
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
239
156
|
),
|
|
240
|
-
|
|
157
|
+
cast_to=DedicatedEndpoint,
|
|
241
158
|
)
|
|
242
159
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
160
|
+
def update(
|
|
161
|
+
self,
|
|
162
|
+
endpoint_id: str,
|
|
163
|
+
*,
|
|
164
|
+
autoscaling: AutoscalingParam | Omit = omit,
|
|
165
|
+
display_name: str | Omit = omit,
|
|
166
|
+
inactive_timeout: Optional[int] | Omit = omit,
|
|
167
|
+
state: Literal["STARTED", "STOPPED"] | Omit = omit,
|
|
168
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
169
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
170
|
+
extra_headers: Headers | None = None,
|
|
171
|
+
extra_query: Query | None = None,
|
|
172
|
+
extra_body: Body | None = None,
|
|
173
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
174
|
+
) -> DedicatedEndpoint:
|
|
175
|
+
"""Updates an existing endpoint's configuration.
|
|
246
176
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
List available hardware configurations.
|
|
177
|
+
You can modify the display name,
|
|
178
|
+
autoscaling settings, or change the endpoint's state (start/stop).
|
|
250
179
|
|
|
251
180
|
Args:
|
|
252
|
-
|
|
253
|
-
the response includes availability status for each compatible configuration.
|
|
181
|
+
autoscaling: New autoscaling configuration for the endpoint
|
|
254
182
|
|
|
255
|
-
|
|
256
|
-
List[HardwareWithStatus]: List of hardware configurations with their status
|
|
257
|
-
"""
|
|
258
|
-
requestor = api_requestor.APIRequestor(
|
|
259
|
-
client=self._client,
|
|
260
|
-
)
|
|
183
|
+
display_name: A human-readable name for the endpoint
|
|
261
184
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
params["model"] = model
|
|
185
|
+
inactive_timeout: The number of minutes of inactivity after which the endpoint will be
|
|
186
|
+
automatically stopped. Set to 0 to disable automatic timeout.
|
|
265
187
|
|
|
266
|
-
|
|
267
|
-
options=TogetherRequest(
|
|
268
|
-
method="GET",
|
|
269
|
-
url="hardware",
|
|
270
|
-
params=params,
|
|
271
|
-
),
|
|
272
|
-
stream=False,
|
|
273
|
-
)
|
|
188
|
+
state: The desired state of the endpoint
|
|
274
189
|
|
|
275
|
-
|
|
276
|
-
assert isinstance(response.data, dict)
|
|
277
|
-
assert isinstance(response.data["data"], list)
|
|
190
|
+
extra_headers: Send extra headers
|
|
278
191
|
|
|
279
|
-
|
|
192
|
+
extra_query: Add additional query parameters to the request
|
|
280
193
|
|
|
281
|
-
|
|
282
|
-
"""
|
|
283
|
-
List all available availability zones.
|
|
194
|
+
extra_body: Add additional JSON properties to the request
|
|
284
195
|
|
|
285
|
-
|
|
286
|
-
List[str]: List of unique availability zones
|
|
196
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
287
197
|
"""
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
198
|
+
if not endpoint_id:
|
|
199
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
200
|
+
return self._patch(
|
|
201
|
+
f"/endpoints/{endpoint_id}",
|
|
202
|
+
body=maybe_transform(
|
|
203
|
+
{
|
|
204
|
+
"autoscaling": autoscaling,
|
|
205
|
+
"display_name": display_name,
|
|
206
|
+
"inactive_timeout": inactive_timeout,
|
|
207
|
+
"state": state,
|
|
208
|
+
},
|
|
209
|
+
endpoint_update_params.EndpointUpdateParams,
|
|
210
|
+
),
|
|
211
|
+
options=make_request_options(
|
|
212
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
296
213
|
),
|
|
297
|
-
|
|
214
|
+
cast_to=DedicatedEndpoint,
|
|
298
215
|
)
|
|
299
216
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
217
|
+
def list(
|
|
218
|
+
self,
|
|
219
|
+
*,
|
|
220
|
+
mine: bool | Omit = omit,
|
|
221
|
+
type: Literal["dedicated", "serverless"] | Omit = omit,
|
|
222
|
+
usage_type: Literal["on-demand", "reserved"] | Omit = omit,
|
|
223
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
224
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
225
|
+
extra_headers: Headers | None = None,
|
|
226
|
+
extra_query: Query | None = None,
|
|
227
|
+
extra_body: Body | None = None,
|
|
228
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
229
|
+
) -> EndpointListResponse:
|
|
230
|
+
"""Returns a list of all endpoints associated with your account.
|
|
231
|
+
|
|
232
|
+
You can filter the
|
|
233
|
+
results by type (dedicated or serverless).
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
mine: If true, return only endpoints owned by the caller
|
|
237
|
+
|
|
238
|
+
type: Filter endpoints by type
|
|
303
239
|
|
|
304
|
-
|
|
240
|
+
usage_type: Filter endpoints by usage type
|
|
305
241
|
|
|
242
|
+
extra_headers: Send extra headers
|
|
306
243
|
|
|
307
|
-
|
|
308
|
-
def __init__(self, client: TogetherClient) -> None:
|
|
309
|
-
self._client = client
|
|
244
|
+
extra_query: Add additional query parameters to the request
|
|
310
245
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
usage_type: Optional[Literal["on-demand", "reserved"]] = None,
|
|
315
|
-
mine: Optional[bool] = None,
|
|
316
|
-
) -> List[ListEndpoint]:
|
|
246
|
+
extra_body: Add additional JSON properties to the request
|
|
247
|
+
|
|
248
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
317
249
|
"""
|
|
318
|
-
|
|
250
|
+
return self._get(
|
|
251
|
+
"/endpoints",
|
|
252
|
+
options=make_request_options(
|
|
253
|
+
extra_headers=extra_headers,
|
|
254
|
+
extra_query=extra_query,
|
|
255
|
+
extra_body=extra_body,
|
|
256
|
+
timeout=timeout,
|
|
257
|
+
query=maybe_transform(
|
|
258
|
+
{
|
|
259
|
+
"mine": mine,
|
|
260
|
+
"type": type,
|
|
261
|
+
"usage_type": usage_type,
|
|
262
|
+
},
|
|
263
|
+
endpoint_list_params.EndpointListParams,
|
|
264
|
+
),
|
|
265
|
+
),
|
|
266
|
+
cast_to=EndpointListResponse,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def delete(
|
|
270
|
+
self,
|
|
271
|
+
endpoint_id: str,
|
|
272
|
+
*,
|
|
273
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
274
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
275
|
+
extra_headers: Headers | None = None,
|
|
276
|
+
extra_query: Query | None = None,
|
|
277
|
+
extra_body: Body | None = None,
|
|
278
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
279
|
+
) -> None:
|
|
280
|
+
"""Permanently deletes an endpoint.
|
|
281
|
+
|
|
282
|
+
This action cannot be undone.
|
|
319
283
|
|
|
320
284
|
Args:
|
|
321
|
-
|
|
322
|
-
usage_type (str, optional): Filter endpoints by usage type ("on-demand" or "reserved"). Defaults to None.
|
|
323
|
-
mine (bool, optional): If True, return only endpoints owned by the caller. Defaults to None.
|
|
285
|
+
extra_headers: Send extra headers
|
|
324
286
|
|
|
325
|
-
|
|
326
|
-
|
|
287
|
+
extra_query: Add additional query parameters to the request
|
|
288
|
+
|
|
289
|
+
extra_body: Add additional JSON properties to the request
|
|
290
|
+
|
|
291
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
327
292
|
"""
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
)
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
Literal["dedicated", "serverless"],
|
|
336
|
-
Literal["on-demand", "reserved"],
|
|
337
|
-
bool,
|
|
338
|
-
],
|
|
339
|
-
] = {}
|
|
340
|
-
if type is not None:
|
|
341
|
-
params["type"] = type
|
|
342
|
-
if usage_type is not None:
|
|
343
|
-
params["usage_type"] = usage_type
|
|
344
|
-
if mine is not None:
|
|
345
|
-
params["mine"] = mine
|
|
346
|
-
|
|
347
|
-
response, _, _ = await requestor.arequest(
|
|
348
|
-
options=TogetherRequest(
|
|
349
|
-
method="GET",
|
|
350
|
-
url="endpoints",
|
|
351
|
-
params=params,
|
|
293
|
+
if not endpoint_id:
|
|
294
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
295
|
+
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
|
|
296
|
+
return self._delete(
|
|
297
|
+
f"/endpoints/{endpoint_id}",
|
|
298
|
+
options=make_request_options(
|
|
299
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
352
300
|
),
|
|
353
|
-
|
|
301
|
+
cast_to=NoneType,
|
|
354
302
|
)
|
|
355
303
|
|
|
356
|
-
|
|
357
|
-
|
|
304
|
+
def list_avzones(
|
|
305
|
+
self,
|
|
306
|
+
*,
|
|
307
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
308
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
309
|
+
extra_headers: Headers | None = None,
|
|
310
|
+
extra_query: Query | None = None,
|
|
311
|
+
extra_body: Body | None = None,
|
|
312
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
313
|
+
) -> EndpointListAvzonesResponse:
|
|
314
|
+
"""List all available availability zones."""
|
|
315
|
+
return self._get(
|
|
316
|
+
"/clusters/availability-zones",
|
|
317
|
+
options=make_request_options(
|
|
318
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
319
|
+
),
|
|
320
|
+
cast_to=EndpointListAvzonesResponse,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class AsyncEndpointsResource(AsyncAPIResource):
|
|
325
|
+
@cached_property
|
|
326
|
+
def with_raw_response(self) -> AsyncEndpointsResourceWithRawResponse:
|
|
327
|
+
"""
|
|
328
|
+
This property can be used as a prefix for any HTTP method call to return
|
|
329
|
+
the raw response object instead of the parsed content.
|
|
330
|
+
|
|
331
|
+
For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
|
|
332
|
+
"""
|
|
333
|
+
return AsyncEndpointsResourceWithRawResponse(self)
|
|
358
334
|
|
|
359
|
-
|
|
335
|
+
@cached_property
|
|
336
|
+
def with_streaming_response(self) -> AsyncEndpointsResourceWithStreamingResponse:
|
|
337
|
+
"""
|
|
338
|
+
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
339
|
+
|
|
340
|
+
For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
|
|
341
|
+
"""
|
|
342
|
+
return AsyncEndpointsResourceWithStreamingResponse(self)
|
|
360
343
|
|
|
361
344
|
async def create(
|
|
362
345
|
self,
|
|
363
346
|
*,
|
|
364
|
-
|
|
347
|
+
autoscaling: AutoscalingParam,
|
|
365
348
|
hardware: str,
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
349
|
+
model: str,
|
|
350
|
+
availability_zone: str | Omit = omit,
|
|
351
|
+
disable_prompt_cache: bool | Omit = omit,
|
|
352
|
+
disable_speculative_decoding: bool | Omit = omit,
|
|
353
|
+
display_name: str | Omit = omit,
|
|
354
|
+
inactive_timeout: Optional[int] | Omit = omit,
|
|
355
|
+
state: Literal["STARTED", "STOPPED"] | Omit = omit,
|
|
356
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
357
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
358
|
+
extra_headers: Headers | None = None,
|
|
359
|
+
extra_query: Query | None = None,
|
|
360
|
+
extra_body: Body | None = None,
|
|
361
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
374
362
|
) -> DedicatedEndpoint:
|
|
375
|
-
"""
|
|
376
|
-
|
|
363
|
+
"""Creates a new dedicated endpoint for serving models.
|
|
364
|
+
|
|
365
|
+
The endpoint will
|
|
366
|
+
automatically start after creation. You can deploy any supported model on
|
|
367
|
+
hardware configurations that meet the model's requirements.
|
|
377
368
|
|
|
378
369
|
Args:
|
|
379
|
-
|
|
380
|
-
hardware (str): The hardware configuration to use for this endpoint
|
|
381
|
-
min_replicas (int): The minimum number of replicas to maintain
|
|
382
|
-
max_replicas (int): The maximum number of replicas to scale up to
|
|
383
|
-
display_name (str, optional): A human-readable name for the endpoint
|
|
384
|
-
disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
|
|
385
|
-
disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
|
|
386
|
-
state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
|
|
387
|
-
inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
|
|
388
|
-
|
|
389
|
-
Returns:
|
|
390
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
391
|
-
"""
|
|
392
|
-
requestor = api_requestor.APIRequestor(
|
|
393
|
-
client=self._client,
|
|
394
|
-
)
|
|
395
|
-
|
|
396
|
-
data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
|
|
397
|
-
"model": model,
|
|
398
|
-
"hardware": hardware,
|
|
399
|
-
"autoscaling": {
|
|
400
|
-
"min_replicas": min_replicas,
|
|
401
|
-
"max_replicas": max_replicas,
|
|
402
|
-
},
|
|
403
|
-
"disable_prompt_cache": disable_prompt_cache,
|
|
404
|
-
"disable_speculative_decoding": disable_speculative_decoding,
|
|
405
|
-
"state": state,
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
if display_name is not None:
|
|
409
|
-
data["display_name"] = display_name
|
|
410
|
-
|
|
411
|
-
if inactive_timeout is not None:
|
|
412
|
-
data["inactive_timeout"] = inactive_timeout
|
|
413
|
-
|
|
414
|
-
if availability_zone is not None:
|
|
415
|
-
data["availability_zone"] = availability_zone
|
|
416
|
-
|
|
417
|
-
response, _, _ = await requestor.arequest(
|
|
418
|
-
options=TogetherRequest(
|
|
419
|
-
method="POST",
|
|
420
|
-
url="endpoints",
|
|
421
|
-
params=data,
|
|
422
|
-
),
|
|
423
|
-
stream=False,
|
|
424
|
-
)
|
|
370
|
+
autoscaling: Configuration for automatic scaling of the endpoint
|
|
425
371
|
|
|
426
|
-
|
|
372
|
+
hardware: The hardware configuration to use for this endpoint
|
|
427
373
|
|
|
428
|
-
|
|
374
|
+
model: The model to deploy on this endpoint
|
|
429
375
|
|
|
430
|
-
|
|
431
|
-
"""
|
|
432
|
-
Get details of a specific endpoint.
|
|
376
|
+
availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
|
|
433
377
|
|
|
434
|
-
|
|
435
|
-
endpoint_id (str): ID of the endpoint to retrieve
|
|
378
|
+
disable_prompt_cache: Whether to disable the prompt cache for this endpoint
|
|
436
379
|
|
|
437
|
-
|
|
438
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
439
|
-
"""
|
|
440
|
-
requestor = api_requestor.APIRequestor(
|
|
441
|
-
client=self._client,
|
|
442
|
-
)
|
|
380
|
+
disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
|
|
443
381
|
|
|
444
|
-
|
|
445
|
-
options=TogetherRequest(
|
|
446
|
-
method="GET",
|
|
447
|
-
url=f"endpoints/{endpoint_id}",
|
|
448
|
-
),
|
|
449
|
-
stream=False,
|
|
450
|
-
)
|
|
382
|
+
display_name: A human-readable name for the endpoint
|
|
451
383
|
|
|
452
|
-
|
|
384
|
+
inactive_timeout: The number of minutes of inactivity after which the endpoint will be
|
|
385
|
+
automatically stopped. Set to null, omit or set to 0 to disable automatic
|
|
386
|
+
timeout.
|
|
453
387
|
|
|
454
|
-
|
|
388
|
+
state: The desired state of the endpoint
|
|
455
389
|
|
|
456
|
-
|
|
457
|
-
"""
|
|
458
|
-
Delete a specific endpoint.
|
|
390
|
+
extra_headers: Send extra headers
|
|
459
391
|
|
|
460
|
-
|
|
461
|
-
|
|
392
|
+
extra_query: Add additional query parameters to the request
|
|
393
|
+
|
|
394
|
+
extra_body: Add additional JSON properties to the request
|
|
395
|
+
|
|
396
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
462
397
|
"""
|
|
463
|
-
|
|
464
|
-
|
|
398
|
+
return await self._post(
|
|
399
|
+
"/endpoints",
|
|
400
|
+
body=await async_maybe_transform(
|
|
401
|
+
{
|
|
402
|
+
"autoscaling": autoscaling,
|
|
403
|
+
"hardware": hardware,
|
|
404
|
+
"model": model,
|
|
405
|
+
"availability_zone": availability_zone,
|
|
406
|
+
"disable_prompt_cache": disable_prompt_cache,
|
|
407
|
+
"disable_speculative_decoding": disable_speculative_decoding,
|
|
408
|
+
"display_name": display_name,
|
|
409
|
+
"inactive_timeout": inactive_timeout,
|
|
410
|
+
"state": state,
|
|
411
|
+
},
|
|
412
|
+
endpoint_create_params.EndpointCreateParams,
|
|
413
|
+
),
|
|
414
|
+
options=make_request_options(
|
|
415
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
416
|
+
),
|
|
417
|
+
cast_to=DedicatedEndpoint,
|
|
465
418
|
)
|
|
466
419
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
420
|
+
async def retrieve(
|
|
421
|
+
self,
|
|
422
|
+
endpoint_id: str,
|
|
423
|
+
*,
|
|
424
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
425
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
426
|
+
extra_headers: Headers | None = None,
|
|
427
|
+
extra_query: Query | None = None,
|
|
428
|
+
extra_body: Body | None = None,
|
|
429
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
430
|
+
) -> DedicatedEndpoint:
|
|
431
|
+
"""
|
|
432
|
+
Retrieves details about a specific endpoint, including its current state,
|
|
433
|
+
configuration, and scaling settings.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
extra_headers: Send extra headers
|
|
437
|
+
|
|
438
|
+
extra_query: Add additional query parameters to the request
|
|
439
|
+
|
|
440
|
+
extra_body: Add additional JSON properties to the request
|
|
441
|
+
|
|
442
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
443
|
+
"""
|
|
444
|
+
if not endpoint_id:
|
|
445
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
446
|
+
return await self._get(
|
|
447
|
+
f"/endpoints/{endpoint_id}",
|
|
448
|
+
options=make_request_options(
|
|
449
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
471
450
|
),
|
|
472
|
-
|
|
451
|
+
cast_to=DedicatedEndpoint,
|
|
473
452
|
)
|
|
474
453
|
|
|
475
454
|
async def update(
|
|
476
455
|
self,
|
|
477
456
|
endpoint_id: str,
|
|
478
457
|
*,
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
458
|
+
autoscaling: AutoscalingParam | Omit = omit,
|
|
459
|
+
display_name: str | Omit = omit,
|
|
460
|
+
inactive_timeout: Optional[int] | Omit = omit,
|
|
461
|
+
state: Literal["STARTED", "STOPPED"] | Omit = omit,
|
|
462
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
463
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
464
|
+
extra_headers: Headers | None = None,
|
|
465
|
+
extra_query: Query | None = None,
|
|
466
|
+
extra_body: Body | None = None,
|
|
467
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
484
468
|
) -> DedicatedEndpoint:
|
|
485
|
-
"""
|
|
486
|
-
|
|
469
|
+
"""Updates an existing endpoint's configuration.
|
|
470
|
+
|
|
471
|
+
You can modify the display name,
|
|
472
|
+
autoscaling settings, or change the endpoint's state (start/stop).
|
|
487
473
|
|
|
488
474
|
Args:
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
475
|
+
autoscaling: New autoscaling configuration for the endpoint
|
|
476
|
+
|
|
477
|
+
display_name: A human-readable name for the endpoint
|
|
478
|
+
|
|
479
|
+
inactive_timeout: The number of minutes of inactivity after which the endpoint will be
|
|
480
|
+
automatically stopped. Set to 0 to disable automatic timeout.
|
|
481
|
+
|
|
482
|
+
state: The desired state of the endpoint
|
|
483
|
+
|
|
484
|
+
extra_headers: Send extra headers
|
|
485
|
+
|
|
486
|
+
extra_query: Add additional query parameters to the request
|
|
487
|
+
|
|
488
|
+
extra_body: Add additional JSON properties to the request
|
|
489
|
+
|
|
490
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
498
491
|
"""
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
"min_replicas": current_min,
|
|
515
|
-
"max_replicas": current_max,
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
if state is not None:
|
|
519
|
-
data["state"] = state
|
|
520
|
-
|
|
521
|
-
if display_name is not None:
|
|
522
|
-
data["display_name"] = display_name
|
|
523
|
-
|
|
524
|
-
if inactive_timeout is not None:
|
|
525
|
-
data["inactive_timeout"] = inactive_timeout
|
|
526
|
-
|
|
527
|
-
response, _, _ = await requestor.arequest(
|
|
528
|
-
options=TogetherRequest(
|
|
529
|
-
method="PATCH",
|
|
530
|
-
url=f"endpoints/{endpoint_id}",
|
|
531
|
-
params=data,
|
|
492
|
+
if not endpoint_id:
|
|
493
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
494
|
+
return await self._patch(
|
|
495
|
+
f"/endpoints/{endpoint_id}",
|
|
496
|
+
body=await async_maybe_transform(
|
|
497
|
+
{
|
|
498
|
+
"autoscaling": autoscaling,
|
|
499
|
+
"display_name": display_name,
|
|
500
|
+
"inactive_timeout": inactive_timeout,
|
|
501
|
+
"state": state,
|
|
502
|
+
},
|
|
503
|
+
endpoint_update_params.EndpointUpdateParams,
|
|
504
|
+
),
|
|
505
|
+
options=make_request_options(
|
|
506
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
532
507
|
),
|
|
533
|
-
|
|
508
|
+
cast_to=DedicatedEndpoint,
|
|
534
509
|
)
|
|
535
510
|
|
|
536
|
-
|
|
511
|
+
async def list(
|
|
512
|
+
self,
|
|
513
|
+
*,
|
|
514
|
+
mine: bool | Omit = omit,
|
|
515
|
+
type: Literal["dedicated", "serverless"] | Omit = omit,
|
|
516
|
+
usage_type: Literal["on-demand", "reserved"] | Omit = omit,
|
|
517
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
518
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
519
|
+
extra_headers: Headers | None = None,
|
|
520
|
+
extra_query: Query | None = None,
|
|
521
|
+
extra_body: Body | None = None,
|
|
522
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
523
|
+
) -> EndpointListResponse:
|
|
524
|
+
"""Returns a list of all endpoints associated with your account.
|
|
525
|
+
|
|
526
|
+
You can filter the
|
|
527
|
+
results by type (dedicated or serverless).
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
mine: If true, return only endpoints owned by the caller
|
|
531
|
+
|
|
532
|
+
type: Filter endpoints by type
|
|
533
|
+
|
|
534
|
+
usage_type: Filter endpoints by usage type
|
|
535
|
+
|
|
536
|
+
extra_headers: Send extra headers
|
|
537
|
+
|
|
538
|
+
extra_query: Add additional query parameters to the request
|
|
537
539
|
|
|
538
|
-
|
|
540
|
+
extra_body: Add additional JSON properties to the request
|
|
539
541
|
|
|
540
|
-
|
|
541
|
-
self, model: Optional[str] = None
|
|
542
|
-
) -> List[HardwareWithStatus]:
|
|
542
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
543
543
|
"""
|
|
544
|
-
|
|
544
|
+
return await self._get(
|
|
545
|
+
"/endpoints",
|
|
546
|
+
options=make_request_options(
|
|
547
|
+
extra_headers=extra_headers,
|
|
548
|
+
extra_query=extra_query,
|
|
549
|
+
extra_body=extra_body,
|
|
550
|
+
timeout=timeout,
|
|
551
|
+
query=await async_maybe_transform(
|
|
552
|
+
{
|
|
553
|
+
"mine": mine,
|
|
554
|
+
"type": type,
|
|
555
|
+
"usage_type": usage_type,
|
|
556
|
+
},
|
|
557
|
+
endpoint_list_params.EndpointListParams,
|
|
558
|
+
),
|
|
559
|
+
),
|
|
560
|
+
cast_to=EndpointListResponse,
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
async def delete(
|
|
564
|
+
self,
|
|
565
|
+
endpoint_id: str,
|
|
566
|
+
*,
|
|
567
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
568
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
569
|
+
extra_headers: Headers | None = None,
|
|
570
|
+
extra_query: Query | None = None,
|
|
571
|
+
extra_body: Body | None = None,
|
|
572
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
573
|
+
) -> None:
|
|
574
|
+
"""Permanently deletes an endpoint.
|
|
575
|
+
|
|
576
|
+
This action cannot be undone.
|
|
545
577
|
|
|
546
578
|
Args:
|
|
547
|
-
|
|
548
|
-
|
|
579
|
+
extra_headers: Send extra headers
|
|
580
|
+
|
|
581
|
+
extra_query: Add additional query parameters to the request
|
|
582
|
+
|
|
583
|
+
extra_body: Add additional JSON properties to the request
|
|
549
584
|
|
|
550
|
-
|
|
551
|
-
List[HardwareWithStatus]: List of hardware configurations with their status
|
|
585
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
552
586
|
"""
|
|
553
|
-
|
|
554
|
-
|
|
587
|
+
if not endpoint_id:
|
|
588
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
589
|
+
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
|
|
590
|
+
return await self._delete(
|
|
591
|
+
f"/endpoints/{endpoint_id}",
|
|
592
|
+
options=make_request_options(
|
|
593
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
594
|
+
),
|
|
595
|
+
cast_to=NoneType,
|
|
555
596
|
)
|
|
556
597
|
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
598
|
+
async def list_avzones(
|
|
599
|
+
self,
|
|
600
|
+
*,
|
|
601
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
602
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
603
|
+
extra_headers: Headers | None = None,
|
|
604
|
+
extra_query: Query | None = None,
|
|
605
|
+
extra_body: Body | None = None,
|
|
606
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
607
|
+
) -> EndpointListAvzonesResponse:
|
|
608
|
+
"""List all available availability zones."""
|
|
609
|
+
return await self._get(
|
|
610
|
+
"/clusters/availability-zones",
|
|
611
|
+
options=make_request_options(
|
|
612
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
566
613
|
),
|
|
567
|
-
|
|
614
|
+
cast_to=EndpointListAvzonesResponse,
|
|
568
615
|
)
|
|
569
616
|
|
|
570
|
-
assert isinstance(response, TogetherResponse)
|
|
571
|
-
assert isinstance(response.data, dict)
|
|
572
|
-
assert isinstance(response.data["data"], list)
|
|
573
617
|
|
|
574
|
-
|
|
618
|
+
class EndpointsResourceWithRawResponse:
|
|
619
|
+
def __init__(self, endpoints: EndpointsResource) -> None:
|
|
620
|
+
self._endpoints = endpoints
|
|
575
621
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
622
|
+
self.create = to_raw_response_wrapper(
|
|
623
|
+
endpoints.create,
|
|
624
|
+
)
|
|
625
|
+
self.retrieve = to_raw_response_wrapper(
|
|
626
|
+
endpoints.retrieve,
|
|
627
|
+
)
|
|
628
|
+
self.update = to_raw_response_wrapper(
|
|
629
|
+
endpoints.update,
|
|
630
|
+
)
|
|
631
|
+
self.list = to_raw_response_wrapper(
|
|
632
|
+
endpoints.list,
|
|
633
|
+
)
|
|
634
|
+
self.delete = to_raw_response_wrapper(
|
|
635
|
+
endpoints.delete,
|
|
636
|
+
)
|
|
637
|
+
self.list_avzones = to_raw_response_wrapper(
|
|
638
|
+
endpoints.list_avzones,
|
|
639
|
+
)
|
|
579
640
|
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
641
|
+
|
|
642
|
+
class AsyncEndpointsResourceWithRawResponse:
|
|
643
|
+
def __init__(self, endpoints: AsyncEndpointsResource) -> None:
|
|
644
|
+
self._endpoints = endpoints
|
|
645
|
+
|
|
646
|
+
self.create = async_to_raw_response_wrapper(
|
|
647
|
+
endpoints.create,
|
|
648
|
+
)
|
|
649
|
+
self.retrieve = async_to_raw_response_wrapper(
|
|
650
|
+
endpoints.retrieve,
|
|
651
|
+
)
|
|
652
|
+
self.update = async_to_raw_response_wrapper(
|
|
653
|
+
endpoints.update,
|
|
654
|
+
)
|
|
655
|
+
self.list = async_to_raw_response_wrapper(
|
|
656
|
+
endpoints.list,
|
|
657
|
+
)
|
|
658
|
+
self.delete = async_to_raw_response_wrapper(
|
|
659
|
+
endpoints.delete,
|
|
660
|
+
)
|
|
661
|
+
self.list_avzones = async_to_raw_response_wrapper(
|
|
662
|
+
endpoints.list_avzones,
|
|
585
663
|
)
|
|
586
664
|
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
665
|
+
|
|
666
|
+
class EndpointsResourceWithStreamingResponse:
|
|
667
|
+
def __init__(self, endpoints: EndpointsResource) -> None:
|
|
668
|
+
self._endpoints = endpoints
|
|
669
|
+
|
|
670
|
+
self.create = to_streamed_response_wrapper(
|
|
671
|
+
endpoints.create,
|
|
672
|
+
)
|
|
673
|
+
self.retrieve = to_streamed_response_wrapper(
|
|
674
|
+
endpoints.retrieve,
|
|
675
|
+
)
|
|
676
|
+
self.update = to_streamed_response_wrapper(
|
|
677
|
+
endpoints.update,
|
|
678
|
+
)
|
|
679
|
+
self.list = to_streamed_response_wrapper(
|
|
680
|
+
endpoints.list,
|
|
681
|
+
)
|
|
682
|
+
self.delete = to_streamed_response_wrapper(
|
|
683
|
+
endpoints.delete,
|
|
684
|
+
)
|
|
685
|
+
self.list_avzones = to_streamed_response_wrapper(
|
|
686
|
+
endpoints.list_avzones,
|
|
593
687
|
)
|
|
594
688
|
|
|
595
|
-
assert isinstance(response, TogetherResponse)
|
|
596
|
-
assert isinstance(response.data, dict)
|
|
597
|
-
assert isinstance(response.data["avzones"], list)
|
|
598
689
|
|
|
599
|
-
|
|
690
|
+
class AsyncEndpointsResourceWithStreamingResponse:
|
|
691
|
+
def __init__(self, endpoints: AsyncEndpointsResource) -> None:
|
|
692
|
+
self._endpoints = endpoints
|
|
693
|
+
|
|
694
|
+
self.create = async_to_streamed_response_wrapper(
|
|
695
|
+
endpoints.create,
|
|
696
|
+
)
|
|
697
|
+
self.retrieve = async_to_streamed_response_wrapper(
|
|
698
|
+
endpoints.retrieve,
|
|
699
|
+
)
|
|
700
|
+
self.update = async_to_streamed_response_wrapper(
|
|
701
|
+
endpoints.update,
|
|
702
|
+
)
|
|
703
|
+
self.list = async_to_streamed_response_wrapper(
|
|
704
|
+
endpoints.list,
|
|
705
|
+
)
|
|
706
|
+
self.delete = async_to_streamed_response_wrapper(
|
|
707
|
+
endpoints.delete,
|
|
708
|
+
)
|
|
709
|
+
self.list_avzones = async_to_streamed_response_wrapper(
|
|
710
|
+
endpoints.list_avzones,
|
|
711
|
+
)
|