together 1.5.17__py3-none-any.whl → 2.0.0a8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/__init__.py +101 -63
- together/_base_client.py +1995 -0
- together/_client.py +1033 -0
- together/_compat.py +219 -0
- together/_constants.py +14 -0
- together/_exceptions.py +108 -0
- together/_files.py +123 -0
- together/_models.py +857 -0
- together/_qs.py +150 -0
- together/_resource.py +43 -0
- together/_response.py +830 -0
- together/_streaming.py +370 -0
- together/_types.py +260 -0
- together/_utils/__init__.py +64 -0
- together/_utils/_compat.py +45 -0
- together/_utils/_datetime_parse.py +136 -0
- together/_utils/_logs.py +25 -0
- together/_utils/_proxy.py +65 -0
- together/_utils/_reflection.py +42 -0
- together/_utils/_resources_proxy.py +24 -0
- together/_utils/_streams.py +12 -0
- together/_utils/_sync.py +58 -0
- together/_utils/_transform.py +457 -0
- together/_utils/_typing.py +156 -0
- together/_utils/_utils.py +421 -0
- together/_version.py +4 -0
- together/lib/.keep +4 -0
- together/lib/__init__.py +23 -0
- together/{cli → lib/cli}/api/endpoints.py +108 -75
- together/lib/cli/api/evals.py +588 -0
- together/{cli → lib/cli}/api/files.py +20 -17
- together/{cli/api/finetune.py → lib/cli/api/fine_tuning.py} +161 -120
- together/lib/cli/api/models.py +140 -0
- together/{cli → lib/cli}/api/utils.py +6 -7
- together/{cli → lib/cli}/cli.py +16 -24
- together/{constants.py → lib/constants.py} +17 -12
- together/lib/resources/__init__.py +11 -0
- together/lib/resources/files.py +999 -0
- together/lib/resources/fine_tuning.py +280 -0
- together/lib/resources/models.py +35 -0
- together/lib/types/__init__.py +13 -0
- together/lib/types/error.py +9 -0
- together/lib/types/fine_tuning.py +455 -0
- together/{utils → lib/utils}/__init__.py +6 -14
- together/{utils → lib/utils}/_log.py +11 -16
- together/lib/utils/files.py +628 -0
- together/lib/utils/serializer.py +10 -0
- together/{utils → lib/utils}/tools.py +19 -55
- together/resources/__init__.py +225 -33
- together/resources/audio/__init__.py +72 -21
- together/resources/audio/audio.py +198 -0
- together/resources/audio/speech.py +574 -122
- together/resources/audio/transcriptions.py +282 -0
- together/resources/audio/translations.py +256 -0
- together/resources/audio/voices.py +135 -0
- together/resources/batches.py +417 -0
- together/resources/chat/__init__.py +30 -21
- together/resources/chat/chat.py +102 -0
- together/resources/chat/completions.py +1063 -263
- together/resources/code_interpreter/__init__.py +33 -0
- together/resources/code_interpreter/code_interpreter.py +258 -0
- together/resources/code_interpreter/sessions.py +135 -0
- together/resources/completions.py +884 -225
- together/resources/embeddings.py +172 -68
- together/resources/endpoints.py +598 -395
- together/resources/evals.py +452 -0
- together/resources/files.py +398 -121
- together/resources/fine_tuning.py +1033 -0
- together/resources/hardware.py +181 -0
- together/resources/images.py +256 -108
- together/resources/jobs.py +214 -0
- together/resources/models.py +238 -90
- together/resources/rerank.py +190 -92
- together/resources/videos.py +374 -0
- together/types/__init__.py +65 -109
- together/types/audio/__init__.py +10 -0
- together/types/audio/speech_create_params.py +75 -0
- together/types/audio/transcription_create_params.py +54 -0
- together/types/audio/transcription_create_response.py +111 -0
- together/types/audio/translation_create_params.py +40 -0
- together/types/audio/translation_create_response.py +70 -0
- together/types/audio/voice_list_response.py +23 -0
- together/types/audio_speech_stream_chunk.py +16 -0
- together/types/autoscaling.py +13 -0
- together/types/autoscaling_param.py +15 -0
- together/types/batch_create_params.py +24 -0
- together/types/batch_create_response.py +14 -0
- together/types/batch_job.py +45 -0
- together/types/batch_list_response.py +10 -0
- together/types/chat/__init__.py +18 -0
- together/types/chat/chat_completion.py +60 -0
- together/types/chat/chat_completion_chunk.py +61 -0
- together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
- together/types/chat/chat_completion_structured_message_text_param.py +13 -0
- together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
- together/types/chat/chat_completion_usage.py +13 -0
- together/types/chat/chat_completion_warning.py +9 -0
- together/types/chat/completion_create_params.py +329 -0
- together/types/code_interpreter/__init__.py +5 -0
- together/types/code_interpreter/session_list_response.py +31 -0
- together/types/code_interpreter_execute_params.py +45 -0
- together/types/completion.py +42 -0
- together/types/completion_chunk.py +66 -0
- together/types/completion_create_params.py +138 -0
- together/types/dedicated_endpoint.py +44 -0
- together/types/embedding.py +24 -0
- together/types/embedding_create_params.py +31 -0
- together/types/endpoint_create_params.py +43 -0
- together/types/endpoint_list_avzones_response.py +11 -0
- together/types/endpoint_list_params.py +18 -0
- together/types/endpoint_list_response.py +41 -0
- together/types/endpoint_update_params.py +27 -0
- together/types/eval_create_params.py +263 -0
- together/types/eval_create_response.py +16 -0
- together/types/eval_list_params.py +21 -0
- together/types/eval_list_response.py +10 -0
- together/types/eval_status_response.py +100 -0
- together/types/evaluation_job.py +139 -0
- together/types/execute_response.py +108 -0
- together/types/file_delete_response.py +13 -0
- together/types/file_list.py +12 -0
- together/types/file_purpose.py +9 -0
- together/types/file_response.py +31 -0
- together/types/file_type.py +7 -0
- together/types/fine_tuning_cancel_response.py +194 -0
- together/types/fine_tuning_content_params.py +24 -0
- together/types/fine_tuning_delete_params.py +11 -0
- together/types/fine_tuning_delete_response.py +12 -0
- together/types/fine_tuning_list_checkpoints_response.py +21 -0
- together/types/fine_tuning_list_events_response.py +12 -0
- together/types/fine_tuning_list_response.py +199 -0
- together/types/finetune_event.py +41 -0
- together/types/finetune_event_type.py +33 -0
- together/types/finetune_response.py +177 -0
- together/types/hardware_list_params.py +16 -0
- together/types/hardware_list_response.py +58 -0
- together/types/image_data_b64.py +15 -0
- together/types/image_data_url.py +15 -0
- together/types/image_file.py +23 -0
- together/types/image_generate_params.py +85 -0
- together/types/job_list_response.py +47 -0
- together/types/job_retrieve_response.py +43 -0
- together/types/log_probs.py +18 -0
- together/types/model_list_response.py +10 -0
- together/types/model_object.py +42 -0
- together/types/model_upload_params.py +36 -0
- together/types/model_upload_response.py +23 -0
- together/types/rerank_create_params.py +36 -0
- together/types/rerank_create_response.py +36 -0
- together/types/tool_choice.py +23 -0
- together/types/tool_choice_param.py +23 -0
- together/types/tools_param.py +23 -0
- together/types/training_method_dpo.py +22 -0
- together/types/training_method_sft.py +18 -0
- together/types/video_create_params.py +86 -0
- together/types/video_job.py +57 -0
- together-2.0.0a8.dist-info/METADATA +680 -0
- together-2.0.0a8.dist-info/RECORD +164 -0
- {together-1.5.17.dist-info → together-2.0.0a8.dist-info}/WHEEL +1 -1
- together-2.0.0a8.dist-info/entry_points.txt +2 -0
- {together-1.5.17.dist-info → together-2.0.0a8.dist-info/licenses}/LICENSE +1 -1
- together/abstract/api_requestor.py +0 -729
- together/cli/api/chat.py +0 -276
- together/cli/api/completions.py +0 -119
- together/cli/api/images.py +0 -93
- together/cli/api/models.py +0 -55
- together/client.py +0 -176
- together/error.py +0 -194
- together/filemanager.py +0 -389
- together/legacy/__init__.py +0 -0
- together/legacy/base.py +0 -27
- together/legacy/complete.py +0 -93
- together/legacy/embeddings.py +0 -27
- together/legacy/files.py +0 -146
- together/legacy/finetune.py +0 -177
- together/legacy/images.py +0 -27
- together/legacy/models.py +0 -44
- together/resources/batch.py +0 -136
- together/resources/code_interpreter.py +0 -82
- together/resources/finetune.py +0 -1064
- together/together_response.py +0 -50
- together/types/abstract.py +0 -26
- together/types/audio_speech.py +0 -110
- together/types/batch.py +0 -53
- together/types/chat_completions.py +0 -197
- together/types/code_interpreter.py +0 -57
- together/types/common.py +0 -66
- together/types/completions.py +0 -107
- together/types/embeddings.py +0 -35
- together/types/endpoints.py +0 -123
- together/types/error.py +0 -16
- together/types/files.py +0 -90
- together/types/finetune.py +0 -398
- together/types/images.py +0 -44
- together/types/models.py +0 -45
- together/types/rerank.py +0 -43
- together/utils/api_helpers.py +0 -124
- together/utils/files.py +0 -425
- together/version.py +0 -6
- together-1.5.17.dist-info/METADATA +0 -525
- together-1.5.17.dist-info/RECORD +0 -69
- together-1.5.17.dist-info/entry_points.txt +0 -3
- /together/{abstract → lib/cli}/__init__.py +0 -0
- /together/{cli → lib/cli/api}/__init__.py +0 -0
- /together/{cli/api/__init__.py → py.typed} +0 -0
together/resources/endpoints.py
CHANGED
|
@@ -1,508 +1,711 @@
|
|
|
1
|
-
from
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from typing_extensions import Literal
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from ..types import endpoint_list_params, endpoint_create_params, endpoint_update_params
|
|
11
|
+
from .._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
|
|
12
|
+
from .._utils import maybe_transform, async_maybe_transform
|
|
13
|
+
from .._compat import cached_property
|
|
14
|
+
from .._resource import SyncAPIResource, AsyncAPIResource
|
|
15
|
+
from .._response import (
|
|
16
|
+
to_raw_response_wrapper,
|
|
17
|
+
to_streamed_response_wrapper,
|
|
18
|
+
async_to_raw_response_wrapper,
|
|
19
|
+
async_to_streamed_response_wrapper,
|
|
20
|
+
)
|
|
21
|
+
from .._base_client import make_request_options
|
|
22
|
+
from ..types.autoscaling_param import AutoscalingParam
|
|
23
|
+
from ..types.dedicated_endpoint import DedicatedEndpoint
|
|
24
|
+
from ..types.endpoint_list_response import EndpointListResponse
|
|
25
|
+
from ..types.endpoint_list_avzones_response import EndpointListAvzonesResponse
|
|
26
|
+
|
|
27
|
+
__all__ = ["EndpointsResource", "AsyncEndpointsResource"]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class EndpointsResource(SyncAPIResource):
|
|
31
|
+
@cached_property
|
|
32
|
+
def with_raw_response(self) -> EndpointsResourceWithRawResponse:
|
|
33
|
+
"""
|
|
34
|
+
This property can be used as a prefix for any HTTP method call to return
|
|
35
|
+
the raw response object instead of the parsed content.
|
|
9
36
|
|
|
37
|
+
For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
|
|
38
|
+
"""
|
|
39
|
+
return EndpointsResourceWithRawResponse(self)
|
|
10
40
|
|
|
11
|
-
|
|
12
|
-
def
|
|
13
|
-
|
|
41
|
+
@cached_property
|
|
42
|
+
def with_streaming_response(self) -> EndpointsResourceWithStreamingResponse:
|
|
43
|
+
"""
|
|
44
|
+
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
14
45
|
|
|
15
|
-
|
|
16
|
-
self, type: Optional[Literal["dedicated", "serverless"]] = None
|
|
17
|
-
) -> List[ListEndpoint]:
|
|
46
|
+
For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
|
|
18
47
|
"""
|
|
19
|
-
|
|
48
|
+
return EndpointsResourceWithStreamingResponse(self)
|
|
49
|
+
|
|
50
|
+
def create(
|
|
51
|
+
self,
|
|
52
|
+
*,
|
|
53
|
+
autoscaling: AutoscalingParam,
|
|
54
|
+
hardware: str,
|
|
55
|
+
model: str,
|
|
56
|
+
availability_zone: str | Omit = omit,
|
|
57
|
+
disable_prompt_cache: bool | Omit = omit,
|
|
58
|
+
disable_speculative_decoding: bool | Omit = omit,
|
|
59
|
+
display_name: str | Omit = omit,
|
|
60
|
+
inactive_timeout: Optional[int] | Omit = omit,
|
|
61
|
+
state: Literal["STARTED", "STOPPED"] | Omit = omit,
|
|
62
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
63
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
64
|
+
extra_headers: Headers | None = None,
|
|
65
|
+
extra_query: Query | None = None,
|
|
66
|
+
extra_body: Body | None = None,
|
|
67
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
68
|
+
) -> DedicatedEndpoint:
|
|
69
|
+
"""Creates a new dedicated endpoint for serving models.
|
|
70
|
+
|
|
71
|
+
The endpoint will
|
|
72
|
+
automatically start after creation. You can deploy any supported model on
|
|
73
|
+
hardware configurations that meet the model's requirements.
|
|
20
74
|
|
|
21
75
|
Args:
|
|
22
|
-
|
|
76
|
+
autoscaling: Configuration for automatic scaling of the endpoint
|
|
23
77
|
|
|
24
|
-
|
|
25
|
-
List[ListEndpoint]: List of endpoint objects
|
|
26
|
-
"""
|
|
27
|
-
requestor = api_requestor.APIRequestor(
|
|
28
|
-
client=self._client,
|
|
29
|
-
)
|
|
78
|
+
hardware: The hardware configuration to use for this endpoint
|
|
30
79
|
|
|
31
|
-
|
|
32
|
-
if type is not None:
|
|
33
|
-
params["type"] = type
|
|
80
|
+
model: The model to deploy on this endpoint
|
|
34
81
|
|
|
35
|
-
|
|
36
|
-
options=TogetherRequest(
|
|
37
|
-
method="GET",
|
|
38
|
-
url="endpoints",
|
|
39
|
-
params=params,
|
|
40
|
-
),
|
|
41
|
-
stream=False,
|
|
42
|
-
)
|
|
82
|
+
availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
|
|
43
83
|
|
|
44
|
-
|
|
84
|
+
disable_prompt_cache: Whether to disable the prompt cache for this endpoint
|
|
45
85
|
|
|
46
|
-
|
|
47
|
-
assert isinstance(response.data, list)
|
|
86
|
+
disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
|
|
48
87
|
|
|
49
|
-
|
|
88
|
+
display_name: A human-readable name for the endpoint
|
|
50
89
|
|
|
51
|
-
|
|
90
|
+
inactive_timeout: The number of minutes of inactivity after which the endpoint will be
|
|
91
|
+
automatically stopped. Set to null, omit or set to 0 to disable automatic
|
|
92
|
+
timeout.
|
|
93
|
+
|
|
94
|
+
state: The desired state of the endpoint
|
|
95
|
+
|
|
96
|
+
extra_headers: Send extra headers
|
|
97
|
+
|
|
98
|
+
extra_query: Add additional query parameters to the request
|
|
99
|
+
|
|
100
|
+
extra_body: Add additional JSON properties to the request
|
|
101
|
+
|
|
102
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
103
|
+
"""
|
|
104
|
+
return self._post(
|
|
105
|
+
"/endpoints",
|
|
106
|
+
body=maybe_transform(
|
|
107
|
+
{
|
|
108
|
+
"autoscaling": autoscaling,
|
|
109
|
+
"hardware": hardware,
|
|
110
|
+
"model": model,
|
|
111
|
+
"availability_zone": availability_zone,
|
|
112
|
+
"disable_prompt_cache": disable_prompt_cache,
|
|
113
|
+
"disable_speculative_decoding": disable_speculative_decoding,
|
|
114
|
+
"display_name": display_name,
|
|
115
|
+
"inactive_timeout": inactive_timeout,
|
|
116
|
+
"state": state,
|
|
117
|
+
},
|
|
118
|
+
endpoint_create_params.EndpointCreateParams,
|
|
119
|
+
),
|
|
120
|
+
options=make_request_options(
|
|
121
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
122
|
+
),
|
|
123
|
+
cast_to=DedicatedEndpoint,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def retrieve(
|
|
52
127
|
self,
|
|
128
|
+
endpoint_id: str,
|
|
53
129
|
*,
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
disable_speculative_decoding: bool = False,
|
|
61
|
-
state: Literal["STARTED", "STOPPED"] = "STARTED",
|
|
62
|
-
inactive_timeout: Optional[int] = None,
|
|
130
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
131
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
132
|
+
extra_headers: Headers | None = None,
|
|
133
|
+
extra_query: Query | None = None,
|
|
134
|
+
extra_body: Body | None = None,
|
|
135
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
63
136
|
) -> DedicatedEndpoint:
|
|
64
137
|
"""
|
|
65
|
-
|
|
138
|
+
Retrieves details about a specific endpoint, including its current state,
|
|
139
|
+
configuration, and scaling settings.
|
|
66
140
|
|
|
67
141
|
Args:
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
|
|
76
|
-
inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
|
|
77
|
-
|
|
78
|
-
Returns:
|
|
79
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
142
|
+
extra_headers: Send extra headers
|
|
143
|
+
|
|
144
|
+
extra_query: Add additional query parameters to the request
|
|
145
|
+
|
|
146
|
+
extra_body: Add additional JSON properties to the request
|
|
147
|
+
|
|
148
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
80
149
|
"""
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
"hardware": hardware,
|
|
88
|
-
"autoscaling": {
|
|
89
|
-
"min_replicas": min_replicas,
|
|
90
|
-
"max_replicas": max_replicas,
|
|
91
|
-
},
|
|
92
|
-
"disable_prompt_cache": disable_prompt_cache,
|
|
93
|
-
"disable_speculative_decoding": disable_speculative_decoding,
|
|
94
|
-
"state": state,
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
if display_name is not None:
|
|
98
|
-
data["display_name"] = display_name
|
|
99
|
-
|
|
100
|
-
if inactive_timeout is not None:
|
|
101
|
-
data["inactive_timeout"] = inactive_timeout
|
|
102
|
-
|
|
103
|
-
response, _, _ = requestor.request(
|
|
104
|
-
options=TogetherRequest(
|
|
105
|
-
method="POST",
|
|
106
|
-
url="endpoints",
|
|
107
|
-
params=data,
|
|
150
|
+
if not endpoint_id:
|
|
151
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
152
|
+
return self._get(
|
|
153
|
+
f"/endpoints/{endpoint_id}",
|
|
154
|
+
options=make_request_options(
|
|
155
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
108
156
|
),
|
|
109
|
-
|
|
157
|
+
cast_to=DedicatedEndpoint,
|
|
110
158
|
)
|
|
111
159
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
160
|
+
def update(
|
|
161
|
+
self,
|
|
162
|
+
endpoint_id: str,
|
|
163
|
+
*,
|
|
164
|
+
autoscaling: AutoscalingParam | Omit = omit,
|
|
165
|
+
display_name: str | Omit = omit,
|
|
166
|
+
inactive_timeout: Optional[int] | Omit = omit,
|
|
167
|
+
state: Literal["STARTED", "STOPPED"] | Omit = omit,
|
|
168
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
169
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
170
|
+
extra_headers: Headers | None = None,
|
|
171
|
+
extra_query: Query | None = None,
|
|
172
|
+
extra_body: Body | None = None,
|
|
173
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
174
|
+
) -> DedicatedEndpoint:
|
|
175
|
+
"""Updates an existing endpoint's configuration.
|
|
115
176
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
Get details of a specific endpoint.
|
|
177
|
+
You can modify the display name,
|
|
178
|
+
autoscaling settings, or change the endpoint's state (start/stop).
|
|
119
179
|
|
|
120
180
|
Args:
|
|
121
|
-
|
|
181
|
+
autoscaling: New autoscaling configuration for the endpoint
|
|
122
182
|
|
|
123
|
-
|
|
124
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
125
|
-
"""
|
|
126
|
-
requestor = api_requestor.APIRequestor(
|
|
127
|
-
client=self._client,
|
|
128
|
-
)
|
|
183
|
+
display_name: A human-readable name for the endpoint
|
|
129
184
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
method="GET",
|
|
133
|
-
url=f"endpoints/{endpoint_id}",
|
|
134
|
-
),
|
|
135
|
-
stream=False,
|
|
136
|
-
)
|
|
185
|
+
inactive_timeout: The number of minutes of inactivity after which the endpoint will be
|
|
186
|
+
automatically stopped. Set to 0 to disable automatic timeout.
|
|
137
187
|
|
|
138
|
-
|
|
188
|
+
state: The desired state of the endpoint
|
|
139
189
|
|
|
140
|
-
|
|
190
|
+
extra_headers: Send extra headers
|
|
141
191
|
|
|
142
|
-
|
|
143
|
-
"""
|
|
144
|
-
Delete a specific endpoint.
|
|
192
|
+
extra_query: Add additional query parameters to the request
|
|
145
193
|
|
|
146
|
-
|
|
147
|
-
endpoint_id (str): ID of the endpoint to delete
|
|
148
|
-
"""
|
|
149
|
-
requestor = api_requestor.APIRequestor(
|
|
150
|
-
client=self._client,
|
|
151
|
-
)
|
|
194
|
+
extra_body: Add additional JSON properties to the request
|
|
152
195
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
196
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
197
|
+
"""
|
|
198
|
+
if not endpoint_id:
|
|
199
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
200
|
+
return self._patch(
|
|
201
|
+
f"/endpoints/{endpoint_id}",
|
|
202
|
+
body=maybe_transform(
|
|
203
|
+
{
|
|
204
|
+
"autoscaling": autoscaling,
|
|
205
|
+
"display_name": display_name,
|
|
206
|
+
"inactive_timeout": inactive_timeout,
|
|
207
|
+
"state": state,
|
|
208
|
+
},
|
|
209
|
+
endpoint_update_params.EndpointUpdateParams,
|
|
157
210
|
),
|
|
158
|
-
|
|
211
|
+
options=make_request_options(
|
|
212
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
213
|
+
),
|
|
214
|
+
cast_to=DedicatedEndpoint,
|
|
159
215
|
)
|
|
160
216
|
|
|
161
|
-
def
|
|
217
|
+
def list(
|
|
162
218
|
self,
|
|
163
|
-
endpoint_id: str,
|
|
164
219
|
*,
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
220
|
+
mine: bool | Omit = omit,
|
|
221
|
+
type: Literal["dedicated", "serverless"] | Omit = omit,
|
|
222
|
+
usage_type: Literal["on-demand", "reserved"] | Omit = omit,
|
|
223
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
224
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
225
|
+
extra_headers: Headers | None = None,
|
|
226
|
+
extra_query: Query | None = None,
|
|
227
|
+
extra_body: Body | None = None,
|
|
228
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
229
|
+
) -> EndpointListResponse:
|
|
230
|
+
"""Returns a list of all endpoints associated with your account.
|
|
231
|
+
|
|
232
|
+
You can filter the
|
|
233
|
+
results by type (dedicated or serverless).
|
|
173
234
|
|
|
174
235
|
Args:
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
236
|
+
mine: If true, return only endpoints owned by the caller
|
|
237
|
+
|
|
238
|
+
type: Filter endpoints by type
|
|
239
|
+
|
|
240
|
+
usage_type: Filter endpoints by usage type
|
|
241
|
+
|
|
242
|
+
extra_headers: Send extra headers
|
|
243
|
+
|
|
244
|
+
extra_query: Add additional query parameters to the request
|
|
245
|
+
|
|
246
|
+
extra_body: Add additional JSON properties to the request
|
|
247
|
+
|
|
248
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
184
249
|
"""
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
"min_replicas": current_min,
|
|
201
|
-
"max_replicas": current_max,
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
if state is not None:
|
|
205
|
-
data["state"] = state
|
|
206
|
-
|
|
207
|
-
if display_name is not None:
|
|
208
|
-
data["display_name"] = display_name
|
|
209
|
-
|
|
210
|
-
if inactive_timeout is not None:
|
|
211
|
-
data["inactive_timeout"] = inactive_timeout
|
|
212
|
-
|
|
213
|
-
response, _, _ = requestor.request(
|
|
214
|
-
options=TogetherRequest(
|
|
215
|
-
method="PATCH",
|
|
216
|
-
url=f"endpoints/{endpoint_id}",
|
|
217
|
-
params=data,
|
|
250
|
+
return self._get(
|
|
251
|
+
"/endpoints",
|
|
252
|
+
options=make_request_options(
|
|
253
|
+
extra_headers=extra_headers,
|
|
254
|
+
extra_query=extra_query,
|
|
255
|
+
extra_body=extra_body,
|
|
256
|
+
timeout=timeout,
|
|
257
|
+
query=maybe_transform(
|
|
258
|
+
{
|
|
259
|
+
"mine": mine,
|
|
260
|
+
"type": type,
|
|
261
|
+
"usage_type": usage_type,
|
|
262
|
+
},
|
|
263
|
+
endpoint_list_params.EndpointListParams,
|
|
264
|
+
),
|
|
218
265
|
),
|
|
219
|
-
|
|
266
|
+
cast_to=EndpointListResponse,
|
|
220
267
|
)
|
|
221
268
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
269
|
+
def delete(
|
|
270
|
+
self,
|
|
271
|
+
endpoint_id: str,
|
|
272
|
+
*,
|
|
273
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
274
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
275
|
+
extra_headers: Headers | None = None,
|
|
276
|
+
extra_query: Query | None = None,
|
|
277
|
+
extra_body: Body | None = None,
|
|
278
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
279
|
+
) -> None:
|
|
280
|
+
"""Permanently deletes an endpoint.
|
|
225
281
|
|
|
226
|
-
|
|
227
|
-
"""
|
|
228
|
-
List available hardware configurations.
|
|
282
|
+
This action cannot be undone.
|
|
229
283
|
|
|
230
284
|
Args:
|
|
231
|
-
|
|
232
|
-
|
|
285
|
+
extra_headers: Send extra headers
|
|
286
|
+
|
|
287
|
+
extra_query: Add additional query parameters to the request
|
|
288
|
+
|
|
289
|
+
extra_body: Add additional JSON properties to the request
|
|
233
290
|
|
|
234
|
-
|
|
235
|
-
List[HardwareWithStatus]: List of hardware configurations with their status
|
|
291
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
236
292
|
"""
|
|
237
|
-
|
|
238
|
-
|
|
293
|
+
if not endpoint_id:
|
|
294
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
295
|
+
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
|
|
296
|
+
return self._delete(
|
|
297
|
+
f"/endpoints/{endpoint_id}",
|
|
298
|
+
options=make_request_options(
|
|
299
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
300
|
+
),
|
|
301
|
+
cast_to=NoneType,
|
|
239
302
|
)
|
|
240
303
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
304
|
+
def list_avzones(
|
|
305
|
+
self,
|
|
306
|
+
*,
|
|
307
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
308
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
309
|
+
extra_headers: Headers | None = None,
|
|
310
|
+
extra_query: Query | None = None,
|
|
311
|
+
extra_body: Body | None = None,
|
|
312
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
313
|
+
) -> EndpointListAvzonesResponse:
|
|
314
|
+
"""List all available availability zones."""
|
|
315
|
+
return self._get(
|
|
316
|
+
"/clusters/availability-zones",
|
|
317
|
+
options=make_request_options(
|
|
318
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
250
319
|
),
|
|
251
|
-
|
|
320
|
+
cast_to=EndpointListAvzonesResponse,
|
|
252
321
|
)
|
|
253
322
|
|
|
254
|
-
assert isinstance(response, TogetherResponse)
|
|
255
|
-
assert isinstance(response.data, dict)
|
|
256
|
-
assert isinstance(response.data["data"], list)
|
|
257
323
|
|
|
258
|
-
|
|
324
|
+
class AsyncEndpointsResource(AsyncAPIResource):
|
|
325
|
+
@cached_property
|
|
326
|
+
def with_raw_response(self) -> AsyncEndpointsResourceWithRawResponse:
|
|
327
|
+
"""
|
|
328
|
+
This property can be used as a prefix for any HTTP method call to return
|
|
329
|
+
the raw response object instead of the parsed content.
|
|
259
330
|
|
|
331
|
+
For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
|
|
332
|
+
"""
|
|
333
|
+
return AsyncEndpointsResourceWithRawResponse(self)
|
|
260
334
|
|
|
261
|
-
|
|
262
|
-
def
|
|
263
|
-
|
|
335
|
+
@cached_property
|
|
336
|
+
def with_streaming_response(self) -> AsyncEndpointsResourceWithStreamingResponse:
|
|
337
|
+
"""
|
|
338
|
+
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
264
339
|
|
|
265
|
-
|
|
266
|
-
self, type: Optional[Literal["dedicated", "serverless"]] = None
|
|
267
|
-
) -> List[ListEndpoint]:
|
|
340
|
+
For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
|
|
268
341
|
"""
|
|
269
|
-
|
|
342
|
+
return AsyncEndpointsResourceWithStreamingResponse(self)
|
|
343
|
+
|
|
344
|
+
async def create(
|
|
345
|
+
self,
|
|
346
|
+
*,
|
|
347
|
+
autoscaling: AutoscalingParam,
|
|
348
|
+
hardware: str,
|
|
349
|
+
model: str,
|
|
350
|
+
availability_zone: str | Omit = omit,
|
|
351
|
+
disable_prompt_cache: bool | Omit = omit,
|
|
352
|
+
disable_speculative_decoding: bool | Omit = omit,
|
|
353
|
+
display_name: str | Omit = omit,
|
|
354
|
+
inactive_timeout: Optional[int] | Omit = omit,
|
|
355
|
+
state: Literal["STARTED", "STOPPED"] | Omit = omit,
|
|
356
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
357
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
358
|
+
extra_headers: Headers | None = None,
|
|
359
|
+
extra_query: Query | None = None,
|
|
360
|
+
extra_body: Body | None = None,
|
|
361
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
362
|
+
) -> DedicatedEndpoint:
|
|
363
|
+
"""Creates a new dedicated endpoint for serving models.
|
|
364
|
+
|
|
365
|
+
The endpoint will
|
|
366
|
+
automatically start after creation. You can deploy any supported model on
|
|
367
|
+
hardware configurations that meet the model's requirements.
|
|
270
368
|
|
|
271
369
|
Args:
|
|
272
|
-
|
|
370
|
+
autoscaling: Configuration for automatic scaling of the endpoint
|
|
273
371
|
|
|
274
|
-
|
|
275
|
-
List[ListEndpoint]: List of endpoint objects
|
|
276
|
-
"""
|
|
277
|
-
requestor = api_requestor.APIRequestor(
|
|
278
|
-
client=self._client,
|
|
279
|
-
)
|
|
372
|
+
hardware: The hardware configuration to use for this endpoint
|
|
280
373
|
|
|
281
|
-
|
|
282
|
-
if type is not None:
|
|
283
|
-
params["type"] = type
|
|
374
|
+
model: The model to deploy on this endpoint
|
|
284
375
|
|
|
285
|
-
|
|
286
|
-
options=TogetherRequest(
|
|
287
|
-
method="GET",
|
|
288
|
-
url="endpoints",
|
|
289
|
-
params=params,
|
|
290
|
-
),
|
|
291
|
-
stream=False,
|
|
292
|
-
)
|
|
376
|
+
availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
|
|
293
377
|
|
|
294
|
-
|
|
295
|
-
assert isinstance(response.data, list)
|
|
378
|
+
disable_prompt_cache: Whether to disable the prompt cache for this endpoint
|
|
296
379
|
|
|
297
|
-
|
|
380
|
+
disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
|
|
298
381
|
|
|
299
|
-
|
|
382
|
+
display_name: A human-readable name for the endpoint
|
|
383
|
+
|
|
384
|
+
inactive_timeout: The number of minutes of inactivity after which the endpoint will be
|
|
385
|
+
automatically stopped. Set to null, omit or set to 0 to disable automatic
|
|
386
|
+
timeout.
|
|
387
|
+
|
|
388
|
+
state: The desired state of the endpoint
|
|
389
|
+
|
|
390
|
+
extra_headers: Send extra headers
|
|
391
|
+
|
|
392
|
+
extra_query: Add additional query parameters to the request
|
|
393
|
+
|
|
394
|
+
extra_body: Add additional JSON properties to the request
|
|
395
|
+
|
|
396
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
397
|
+
"""
|
|
398
|
+
return await self._post(
|
|
399
|
+
"/endpoints",
|
|
400
|
+
body=await async_maybe_transform(
|
|
401
|
+
{
|
|
402
|
+
"autoscaling": autoscaling,
|
|
403
|
+
"hardware": hardware,
|
|
404
|
+
"model": model,
|
|
405
|
+
"availability_zone": availability_zone,
|
|
406
|
+
"disable_prompt_cache": disable_prompt_cache,
|
|
407
|
+
"disable_speculative_decoding": disable_speculative_decoding,
|
|
408
|
+
"display_name": display_name,
|
|
409
|
+
"inactive_timeout": inactive_timeout,
|
|
410
|
+
"state": state,
|
|
411
|
+
},
|
|
412
|
+
endpoint_create_params.EndpointCreateParams,
|
|
413
|
+
),
|
|
414
|
+
options=make_request_options(
|
|
415
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
416
|
+
),
|
|
417
|
+
cast_to=DedicatedEndpoint,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
async def retrieve(
|
|
300
421
|
self,
|
|
422
|
+
endpoint_id: str,
|
|
301
423
|
*,
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
disable_speculative_decoding: bool = False,
|
|
309
|
-
state: Literal["STARTED", "STOPPED"] = "STARTED",
|
|
310
|
-
inactive_timeout: Optional[int] = None,
|
|
424
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
425
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
426
|
+
extra_headers: Headers | None = None,
|
|
427
|
+
extra_query: Query | None = None,
|
|
428
|
+
extra_body: Body | None = None,
|
|
429
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
311
430
|
) -> DedicatedEndpoint:
|
|
312
431
|
"""
|
|
313
|
-
|
|
432
|
+
Retrieves details about a specific endpoint, including its current state,
|
|
433
|
+
configuration, and scaling settings.
|
|
314
434
|
|
|
315
435
|
Args:
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
|
|
324
|
-
inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
|
|
325
|
-
|
|
326
|
-
Returns:
|
|
327
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
436
|
+
extra_headers: Send extra headers
|
|
437
|
+
|
|
438
|
+
extra_query: Add additional query parameters to the request
|
|
439
|
+
|
|
440
|
+
extra_body: Add additional JSON properties to the request
|
|
441
|
+
|
|
442
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
328
443
|
"""
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
"hardware": hardware,
|
|
336
|
-
"autoscaling": {
|
|
337
|
-
"min_replicas": min_replicas,
|
|
338
|
-
"max_replicas": max_replicas,
|
|
339
|
-
},
|
|
340
|
-
"disable_prompt_cache": disable_prompt_cache,
|
|
341
|
-
"disable_speculative_decoding": disable_speculative_decoding,
|
|
342
|
-
"state": state,
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
if display_name is not None:
|
|
346
|
-
data["display_name"] = display_name
|
|
347
|
-
|
|
348
|
-
if inactive_timeout is not None:
|
|
349
|
-
data["inactive_timeout"] = inactive_timeout
|
|
350
|
-
|
|
351
|
-
response, _, _ = await requestor.arequest(
|
|
352
|
-
options=TogetherRequest(
|
|
353
|
-
method="POST",
|
|
354
|
-
url="endpoints",
|
|
355
|
-
params=data,
|
|
444
|
+
if not endpoint_id:
|
|
445
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
446
|
+
return await self._get(
|
|
447
|
+
f"/endpoints/{endpoint_id}",
|
|
448
|
+
options=make_request_options(
|
|
449
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
356
450
|
),
|
|
357
|
-
|
|
451
|
+
cast_to=DedicatedEndpoint,
|
|
358
452
|
)
|
|
359
453
|
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
454
|
+
async def update(
|
|
455
|
+
self,
|
|
456
|
+
endpoint_id: str,
|
|
457
|
+
*,
|
|
458
|
+
autoscaling: AutoscalingParam | Omit = omit,
|
|
459
|
+
display_name: str | Omit = omit,
|
|
460
|
+
inactive_timeout: Optional[int] | Omit = omit,
|
|
461
|
+
state: Literal["STARTED", "STOPPED"] | Omit = omit,
|
|
462
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
463
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
464
|
+
extra_headers: Headers | None = None,
|
|
465
|
+
extra_query: Query | None = None,
|
|
466
|
+
extra_body: Body | None = None,
|
|
467
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
468
|
+
) -> DedicatedEndpoint:
|
|
469
|
+
"""Updates an existing endpoint's configuration.
|
|
363
470
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
Get details of a specific endpoint.
|
|
471
|
+
You can modify the display name,
|
|
472
|
+
autoscaling settings, or change the endpoint's state (start/stop).
|
|
367
473
|
|
|
368
474
|
Args:
|
|
369
|
-
|
|
475
|
+
autoscaling: New autoscaling configuration for the endpoint
|
|
370
476
|
|
|
371
|
-
|
|
372
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
373
|
-
"""
|
|
374
|
-
requestor = api_requestor.APIRequestor(
|
|
375
|
-
client=self._client,
|
|
376
|
-
)
|
|
477
|
+
display_name: A human-readable name for the endpoint
|
|
377
478
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
method="GET",
|
|
381
|
-
url=f"endpoints/{endpoint_id}",
|
|
382
|
-
),
|
|
383
|
-
stream=False,
|
|
384
|
-
)
|
|
479
|
+
inactive_timeout: The number of minutes of inactivity after which the endpoint will be
|
|
480
|
+
automatically stopped. Set to 0 to disable automatic timeout.
|
|
385
481
|
|
|
386
|
-
|
|
482
|
+
state: The desired state of the endpoint
|
|
387
483
|
|
|
388
|
-
|
|
484
|
+
extra_headers: Send extra headers
|
|
389
485
|
|
|
390
|
-
|
|
391
|
-
"""
|
|
392
|
-
Delete a specific endpoint.
|
|
486
|
+
extra_query: Add additional query parameters to the request
|
|
393
487
|
|
|
394
|
-
|
|
395
|
-
|
|
488
|
+
extra_body: Add additional JSON properties to the request
|
|
489
|
+
|
|
490
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
396
491
|
"""
|
|
397
|
-
|
|
398
|
-
|
|
492
|
+
if not endpoint_id:
|
|
493
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
494
|
+
return await self._patch(
|
|
495
|
+
f"/endpoints/{endpoint_id}",
|
|
496
|
+
body=await async_maybe_transform(
|
|
497
|
+
{
|
|
498
|
+
"autoscaling": autoscaling,
|
|
499
|
+
"display_name": display_name,
|
|
500
|
+
"inactive_timeout": inactive_timeout,
|
|
501
|
+
"state": state,
|
|
502
|
+
},
|
|
503
|
+
endpoint_update_params.EndpointUpdateParams,
|
|
504
|
+
),
|
|
505
|
+
options=make_request_options(
|
|
506
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
507
|
+
),
|
|
508
|
+
cast_to=DedicatedEndpoint,
|
|
399
509
|
)
|
|
400
510
|
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
511
|
+
async def list(
|
|
512
|
+
self,
|
|
513
|
+
*,
|
|
514
|
+
mine: bool | Omit = omit,
|
|
515
|
+
type: Literal["dedicated", "serverless"] | Omit = omit,
|
|
516
|
+
usage_type: Literal["on-demand", "reserved"] | Omit = omit,
|
|
517
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
518
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
519
|
+
extra_headers: Headers | None = None,
|
|
520
|
+
extra_query: Query | None = None,
|
|
521
|
+
extra_body: Body | None = None,
|
|
522
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
523
|
+
) -> EndpointListResponse:
|
|
524
|
+
"""Returns a list of all endpoints associated with your account.
|
|
525
|
+
|
|
526
|
+
You can filter the
|
|
527
|
+
results by type (dedicated or serverless).
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
mine: If true, return only endpoints owned by the caller
|
|
531
|
+
|
|
532
|
+
type: Filter endpoints by type
|
|
533
|
+
|
|
534
|
+
usage_type: Filter endpoints by usage type
|
|
535
|
+
|
|
536
|
+
extra_headers: Send extra headers
|
|
537
|
+
|
|
538
|
+
extra_query: Add additional query parameters to the request
|
|
539
|
+
|
|
540
|
+
extra_body: Add additional JSON properties to the request
|
|
541
|
+
|
|
542
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
543
|
+
"""
|
|
544
|
+
return await self._get(
|
|
545
|
+
"/endpoints",
|
|
546
|
+
options=make_request_options(
|
|
547
|
+
extra_headers=extra_headers,
|
|
548
|
+
extra_query=extra_query,
|
|
549
|
+
extra_body=extra_body,
|
|
550
|
+
timeout=timeout,
|
|
551
|
+
query=await async_maybe_transform(
|
|
552
|
+
{
|
|
553
|
+
"mine": mine,
|
|
554
|
+
"type": type,
|
|
555
|
+
"usage_type": usage_type,
|
|
556
|
+
},
|
|
557
|
+
endpoint_list_params.EndpointListParams,
|
|
558
|
+
),
|
|
405
559
|
),
|
|
406
|
-
|
|
560
|
+
cast_to=EndpointListResponse,
|
|
407
561
|
)
|
|
408
562
|
|
|
409
|
-
async def
|
|
563
|
+
async def delete(
|
|
410
564
|
self,
|
|
411
565
|
endpoint_id: str,
|
|
412
566
|
*,
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
567
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
568
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
569
|
+
extra_headers: Headers | None = None,
|
|
570
|
+
extra_query: Query | None = None,
|
|
571
|
+
extra_body: Body | None = None,
|
|
572
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
573
|
+
) -> None:
|
|
574
|
+
"""Permanently deletes an endpoint.
|
|
575
|
+
|
|
576
|
+
This action cannot be undone.
|
|
421
577
|
|
|
422
578
|
Args:
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
Returns:
|
|
431
|
-
DedicatedEndpoint: Object containing endpoint information
|
|
579
|
+
extra_headers: Send extra headers
|
|
580
|
+
|
|
581
|
+
extra_query: Add additional query parameters to the request
|
|
582
|
+
|
|
583
|
+
extra_body: Add additional JSON properties to the request
|
|
584
|
+
|
|
585
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
432
586
|
"""
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
)
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
current_min = min_replicas
|
|
441
|
-
current_max = max_replicas
|
|
442
|
-
if current_min is None or current_max is None:
|
|
443
|
-
# Get current values if only one is specified
|
|
444
|
-
current = await self.get(endpoint_id=endpoint_id)
|
|
445
|
-
current_min = current_min or current.autoscaling.min_replicas
|
|
446
|
-
current_max = current_max or current.autoscaling.max_replicas
|
|
447
|
-
data["autoscaling"] = {
|
|
448
|
-
"min_replicas": current_min,
|
|
449
|
-
"max_replicas": current_max,
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
if state is not None:
|
|
453
|
-
data["state"] = state
|
|
454
|
-
|
|
455
|
-
if display_name is not None:
|
|
456
|
-
data["display_name"] = display_name
|
|
457
|
-
|
|
458
|
-
if inactive_timeout is not None:
|
|
459
|
-
data["inactive_timeout"] = inactive_timeout
|
|
460
|
-
|
|
461
|
-
response, _, _ = await requestor.arequest(
|
|
462
|
-
options=TogetherRequest(
|
|
463
|
-
method="PATCH",
|
|
464
|
-
url=f"endpoints/{endpoint_id}",
|
|
465
|
-
params=data,
|
|
587
|
+
if not endpoint_id:
|
|
588
|
+
raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
|
|
589
|
+
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
|
|
590
|
+
return await self._delete(
|
|
591
|
+
f"/endpoints/{endpoint_id}",
|
|
592
|
+
options=make_request_options(
|
|
593
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
466
594
|
),
|
|
467
|
-
|
|
595
|
+
cast_to=NoneType,
|
|
468
596
|
)
|
|
469
597
|
|
|
470
|
-
|
|
598
|
+
async def list_avzones(
|
|
599
|
+
self,
|
|
600
|
+
*,
|
|
601
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
602
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
603
|
+
extra_headers: Headers | None = None,
|
|
604
|
+
extra_query: Query | None = None,
|
|
605
|
+
extra_body: Body | None = None,
|
|
606
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
607
|
+
) -> EndpointListAvzonesResponse:
|
|
608
|
+
"""List all available availability zones."""
|
|
609
|
+
return await self._get(
|
|
610
|
+
"/clusters/availability-zones",
|
|
611
|
+
options=make_request_options(
|
|
612
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
613
|
+
),
|
|
614
|
+
cast_to=EndpointListAvzonesResponse,
|
|
615
|
+
)
|
|
471
616
|
|
|
472
|
-
return DedicatedEndpoint(**response.data)
|
|
473
617
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
"""
|
|
478
|
-
List available hardware configurations.
|
|
618
|
+
class EndpointsResourceWithRawResponse:
|
|
619
|
+
def __init__(self, endpoints: EndpointsResource) -> None:
|
|
620
|
+
self._endpoints = endpoints
|
|
479
621
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
622
|
+
self.create = to_raw_response_wrapper(
|
|
623
|
+
endpoints.create,
|
|
624
|
+
)
|
|
625
|
+
self.retrieve = to_raw_response_wrapper(
|
|
626
|
+
endpoints.retrieve,
|
|
627
|
+
)
|
|
628
|
+
self.update = to_raw_response_wrapper(
|
|
629
|
+
endpoints.update,
|
|
630
|
+
)
|
|
631
|
+
self.list = to_raw_response_wrapper(
|
|
632
|
+
endpoints.list,
|
|
633
|
+
)
|
|
634
|
+
self.delete = to_raw_response_wrapper(
|
|
635
|
+
endpoints.delete,
|
|
636
|
+
)
|
|
637
|
+
self.list_avzones = to_raw_response_wrapper(
|
|
638
|
+
endpoints.list_avzones,
|
|
639
|
+
)
|
|
483
640
|
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
641
|
+
|
|
642
|
+
class AsyncEndpointsResourceWithRawResponse:
|
|
643
|
+
def __init__(self, endpoints: AsyncEndpointsResource) -> None:
|
|
644
|
+
self._endpoints = endpoints
|
|
645
|
+
|
|
646
|
+
self.create = async_to_raw_response_wrapper(
|
|
647
|
+
endpoints.create,
|
|
648
|
+
)
|
|
649
|
+
self.retrieve = async_to_raw_response_wrapper(
|
|
650
|
+
endpoints.retrieve,
|
|
651
|
+
)
|
|
652
|
+
self.update = async_to_raw_response_wrapper(
|
|
653
|
+
endpoints.update,
|
|
654
|
+
)
|
|
655
|
+
self.list = async_to_raw_response_wrapper(
|
|
656
|
+
endpoints.list,
|
|
657
|
+
)
|
|
658
|
+
self.delete = async_to_raw_response_wrapper(
|
|
659
|
+
endpoints.delete,
|
|
660
|
+
)
|
|
661
|
+
self.list_avzones = async_to_raw_response_wrapper(
|
|
662
|
+
endpoints.list_avzones,
|
|
489
663
|
)
|
|
490
664
|
|
|
491
|
-
params = {}
|
|
492
|
-
if model is not None:
|
|
493
|
-
params["model"] = model
|
|
494
665
|
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
666
|
+
class EndpointsResourceWithStreamingResponse:
|
|
667
|
+
def __init__(self, endpoints: EndpointsResource) -> None:
|
|
668
|
+
self._endpoints = endpoints
|
|
669
|
+
|
|
670
|
+
self.create = to_streamed_response_wrapper(
|
|
671
|
+
endpoints.create,
|
|
672
|
+
)
|
|
673
|
+
self.retrieve = to_streamed_response_wrapper(
|
|
674
|
+
endpoints.retrieve,
|
|
502
675
|
)
|
|
676
|
+
self.update = to_streamed_response_wrapper(
|
|
677
|
+
endpoints.update,
|
|
678
|
+
)
|
|
679
|
+
self.list = to_streamed_response_wrapper(
|
|
680
|
+
endpoints.list,
|
|
681
|
+
)
|
|
682
|
+
self.delete = to_streamed_response_wrapper(
|
|
683
|
+
endpoints.delete,
|
|
684
|
+
)
|
|
685
|
+
self.list_avzones = to_streamed_response_wrapper(
|
|
686
|
+
endpoints.list_avzones,
|
|
687
|
+
)
|
|
688
|
+
|
|
503
689
|
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
690
|
+
class AsyncEndpointsResourceWithStreamingResponse:
|
|
691
|
+
def __init__(self, endpoints: AsyncEndpointsResource) -> None:
|
|
692
|
+
self._endpoints = endpoints
|
|
507
693
|
|
|
508
|
-
|
|
694
|
+
self.create = async_to_streamed_response_wrapper(
|
|
695
|
+
endpoints.create,
|
|
696
|
+
)
|
|
697
|
+
self.retrieve = async_to_streamed_response_wrapper(
|
|
698
|
+
endpoints.retrieve,
|
|
699
|
+
)
|
|
700
|
+
self.update = async_to_streamed_response_wrapper(
|
|
701
|
+
endpoints.update,
|
|
702
|
+
)
|
|
703
|
+
self.list = async_to_streamed_response_wrapper(
|
|
704
|
+
endpoints.list,
|
|
705
|
+
)
|
|
706
|
+
self.delete = async_to_streamed_response_wrapper(
|
|
707
|
+
endpoints.delete,
|
|
708
|
+
)
|
|
709
|
+
self.list_avzones = async_to_streamed_response_wrapper(
|
|
710
|
+
endpoints.list_avzones,
|
|
711
|
+
)
|