together 1.4.0__py3-none-any.whl → 1.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,488 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict, List, Literal, Optional, Union
4
+
5
+ from together.abstract import api_requestor
6
+ from together.together_response import TogetherResponse
7
+ from together.types import TogetherClient, TogetherRequest
8
+ from together.types.endpoints import DedicatedEndpoint, HardwareWithStatus, ListEndpoint
9
+
10
+
11
+ class Endpoints:
12
+ def __init__(self, client: TogetherClient) -> None:
13
+ self._client = client
14
+
15
+ def list(
16
+ self, type: Optional[Literal["dedicated", "serverless"]] = None
17
+ ) -> List[ListEndpoint]:
18
+ """
19
+ List all endpoints, can be filtered by type.
20
+
21
+ Args:
22
+ type (str, optional): Filter endpoints by type ("dedicated" or "serverless"). Defaults to None.
23
+
24
+ Returns:
25
+ List[ListEndpoint]: List of endpoint objects
26
+ """
27
+ requestor = api_requestor.APIRequestor(
28
+ client=self._client,
29
+ )
30
+
31
+ params = {}
32
+ if type is not None:
33
+ params["type"] = type
34
+
35
+ response, _, _ = requestor.request(
36
+ options=TogetherRequest(
37
+ method="GET",
38
+ url="endpoints",
39
+ params=params,
40
+ ),
41
+ stream=False,
42
+ )
43
+
44
+ response.data = response.data["data"]
45
+
46
+ assert isinstance(response, TogetherResponse)
47
+ assert isinstance(response.data, list)
48
+
49
+ return [ListEndpoint(**endpoint) for endpoint in response.data]
50
+
51
+ def create(
52
+ self,
53
+ *,
54
+ model: str,
55
+ hardware: str,
56
+ min_replicas: int,
57
+ max_replicas: int,
58
+ display_name: Optional[str] = None,
59
+ disable_prompt_cache: bool = False,
60
+ disable_speculative_decoding: bool = False,
61
+ state: Literal["STARTED", "STOPPED"] = "STARTED",
62
+ ) -> DedicatedEndpoint:
63
+ """
64
+ Create a new dedicated endpoint.
65
+
66
+ Args:
67
+ model (str): The model to deploy on this endpoint
68
+ hardware (str): The hardware configuration to use for this endpoint
69
+ min_replicas (int): The minimum number of replicas to maintain
70
+ max_replicas (int): The maximum number of replicas to scale up to
71
+ display_name (str, optional): A human-readable name for the endpoint
72
+ disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
73
+ disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
74
+ state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
75
+
76
+ Returns:
77
+ DedicatedEndpoint: Object containing endpoint information
78
+ """
79
+ requestor = api_requestor.APIRequestor(
80
+ client=self._client,
81
+ )
82
+
83
+ data: Dict[str, Union[str, bool, Dict[str, int]]] = {
84
+ "model": model,
85
+ "hardware": hardware,
86
+ "autoscaling": {
87
+ "min_replicas": min_replicas,
88
+ "max_replicas": max_replicas,
89
+ },
90
+ "disable_prompt_cache": disable_prompt_cache,
91
+ "disable_speculative_decoding": disable_speculative_decoding,
92
+ "state": state,
93
+ }
94
+
95
+ if display_name is not None:
96
+ data["display_name"] = display_name
97
+
98
+ response, _, _ = requestor.request(
99
+ options=TogetherRequest(
100
+ method="POST",
101
+ url="endpoints",
102
+ params=data,
103
+ ),
104
+ stream=False,
105
+ )
106
+
107
+ assert isinstance(response, TogetherResponse)
108
+
109
+ return DedicatedEndpoint(**response.data)
110
+
111
+ def get(self, endpoint_id: str) -> DedicatedEndpoint:
112
+ """
113
+ Get details of a specific endpoint.
114
+
115
+ Args:
116
+ endpoint_id (str): ID of the endpoint to retrieve
117
+
118
+ Returns:
119
+ DedicatedEndpoint: Object containing endpoint information
120
+ """
121
+ requestor = api_requestor.APIRequestor(
122
+ client=self._client,
123
+ )
124
+
125
+ response, _, _ = requestor.request(
126
+ options=TogetherRequest(
127
+ method="GET",
128
+ url=f"endpoints/{endpoint_id}",
129
+ ),
130
+ stream=False,
131
+ )
132
+
133
+ assert isinstance(response, TogetherResponse)
134
+
135
+ return DedicatedEndpoint(**response.data)
136
+
137
+ def delete(self, endpoint_id: str) -> None:
138
+ """
139
+ Delete a specific endpoint.
140
+
141
+ Args:
142
+ endpoint_id (str): ID of the endpoint to delete
143
+ """
144
+ requestor = api_requestor.APIRequestor(
145
+ client=self._client,
146
+ )
147
+
148
+ requestor.request(
149
+ options=TogetherRequest(
150
+ method="DELETE",
151
+ url=f"endpoints/{endpoint_id}",
152
+ ),
153
+ stream=False,
154
+ )
155
+
156
+ def update(
157
+ self,
158
+ endpoint_id: str,
159
+ *,
160
+ min_replicas: Optional[int] = None,
161
+ max_replicas: Optional[int] = None,
162
+ state: Optional[Literal["STARTED", "STOPPED"]] = None,
163
+ display_name: Optional[str] = None,
164
+ ) -> DedicatedEndpoint:
165
+ """
166
+ Update an endpoint's configuration.
167
+
168
+ Args:
169
+ endpoint_id (str): ID of the endpoint to update
170
+ min_replicas (int, optional): The minimum number of replicas to maintain
171
+ max_replicas (int, optional): The maximum number of replicas to scale up to
172
+ state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
173
+ display_name (str, optional): A human-readable name for the endpoint
174
+
175
+ Returns:
176
+ DedicatedEndpoint: Object containing endpoint information
177
+ """
178
+ requestor = api_requestor.APIRequestor(
179
+ client=self._client,
180
+ )
181
+
182
+ data: Dict[str, Union[str, Dict[str, int]]] = {}
183
+
184
+ if min_replicas is not None or max_replicas is not None:
185
+ current_min = min_replicas
186
+ current_max = max_replicas
187
+ if current_min is None or current_max is None:
188
+ # Get current values if only one is specified
189
+ current = self.get(endpoint_id=endpoint_id)
190
+ current_min = current_min or current.autoscaling.min_replicas
191
+ current_max = current_max or current.autoscaling.max_replicas
192
+ data["autoscaling"] = {
193
+ "min_replicas": current_min,
194
+ "max_replicas": current_max,
195
+ }
196
+
197
+ if state is not None:
198
+ data["state"] = state
199
+
200
+ if display_name is not None:
201
+ data["display_name"] = display_name
202
+
203
+ response, _, _ = requestor.request(
204
+ options=TogetherRequest(
205
+ method="PATCH",
206
+ url=f"endpoints/{endpoint_id}",
207
+ params=data,
208
+ ),
209
+ stream=False,
210
+ )
211
+
212
+ assert isinstance(response, TogetherResponse)
213
+
214
+ return DedicatedEndpoint(**response.data)
215
+
216
+ def list_hardware(self, model: Optional[str] = None) -> List[HardwareWithStatus]:
217
+ """
218
+ List available hardware configurations.
219
+
220
+ Args:
221
+ model (str, optional): Filter hardware configurations by model compatibility. When provided,
222
+ the response includes availability status for each compatible configuration.
223
+
224
+ Returns:
225
+ List[HardwareWithStatus]: List of hardware configurations with their status
226
+ """
227
+ requestor = api_requestor.APIRequestor(
228
+ client=self._client,
229
+ )
230
+
231
+ params = {}
232
+ if model is not None:
233
+ params["model"] = model
234
+
235
+ response, _, _ = requestor.request(
236
+ options=TogetherRequest(
237
+ method="GET",
238
+ url="hardware",
239
+ params=params,
240
+ ),
241
+ stream=False,
242
+ )
243
+
244
+ assert isinstance(response, TogetherResponse)
245
+ assert isinstance(response.data, dict)
246
+ assert isinstance(response.data["data"], list)
247
+
248
+ return [HardwareWithStatus(**item) for item in response.data["data"]]
249
+
250
+
251
+ class AsyncEndpoints:
252
+ def __init__(self, client: TogetherClient) -> None:
253
+ self._client = client
254
+
255
+ async def list(
256
+ self, type: Optional[Literal["dedicated", "serverless"]] = None
257
+ ) -> List[ListEndpoint]:
258
+ """
259
+ List all endpoints, can be filtered by type.
260
+
261
+ Args:
262
+ type (str, optional): Filter endpoints by type ("dedicated" or "serverless"). Defaults to None.
263
+
264
+ Returns:
265
+ List[ListEndpoint]: List of endpoint objects
266
+ """
267
+ requestor = api_requestor.APIRequestor(
268
+ client=self._client,
269
+ )
270
+
271
+ params = {}
272
+ if type is not None:
273
+ params["type"] = type
274
+
275
+ response, _, _ = await requestor.arequest(
276
+ options=TogetherRequest(
277
+ method="GET",
278
+ url="endpoints",
279
+ params=params,
280
+ ),
281
+ stream=False,
282
+ )
283
+
284
+ assert isinstance(response, TogetherResponse)
285
+ assert isinstance(response.data, list)
286
+
287
+ return [ListEndpoint(**endpoint) for endpoint in response.data]
288
+
289
+ async def create(
290
+ self,
291
+ *,
292
+ model: str,
293
+ hardware: str,
294
+ min_replicas: int,
295
+ max_replicas: int,
296
+ display_name: Optional[str] = None,
297
+ disable_prompt_cache: bool = False,
298
+ disable_speculative_decoding: bool = False,
299
+ state: Literal["STARTED", "STOPPED"] = "STARTED",
300
+ ) -> DedicatedEndpoint:
301
+ """
302
+ Create a new dedicated endpoint.
303
+
304
+ Args:
305
+ model (str): The model to deploy on this endpoint
306
+ hardware (str): The hardware configuration to use for this endpoint
307
+ min_replicas (int): The minimum number of replicas to maintain
308
+ max_replicas (int): The maximum number of replicas to scale up to
309
+ display_name (str, optional): A human-readable name for the endpoint
310
+ disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
311
+ disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
312
+ state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
313
+
314
+ Returns:
315
+ DedicatedEndpoint: Object containing endpoint information
316
+ """
317
+ requestor = api_requestor.APIRequestor(
318
+ client=self._client,
319
+ )
320
+
321
+ data: Dict[str, Union[str, bool, Dict[str, int]]] = {
322
+ "model": model,
323
+ "hardware": hardware,
324
+ "autoscaling": {
325
+ "min_replicas": min_replicas,
326
+ "max_replicas": max_replicas,
327
+ },
328
+ "disable_prompt_cache": disable_prompt_cache,
329
+ "disable_speculative_decoding": disable_speculative_decoding,
330
+ "state": state,
331
+ }
332
+
333
+ if display_name is not None:
334
+ data["display_name"] = display_name
335
+
336
+ response, _, _ = await requestor.arequest(
337
+ options=TogetherRequest(
338
+ method="POST",
339
+ url="endpoints",
340
+ params=data,
341
+ ),
342
+ stream=False,
343
+ )
344
+
345
+ assert isinstance(response, TogetherResponse)
346
+
347
+ return DedicatedEndpoint(**response.data)
348
+
349
+ async def get(self, endpoint_id: str) -> DedicatedEndpoint:
350
+ """
351
+ Get details of a specific endpoint.
352
+
353
+ Args:
354
+ endpoint_id (str): ID of the endpoint to retrieve
355
+
356
+ Returns:
357
+ DedicatedEndpoint: Object containing endpoint information
358
+ """
359
+ requestor = api_requestor.APIRequestor(
360
+ client=self._client,
361
+ )
362
+
363
+ response, _, _ = await requestor.arequest(
364
+ options=TogetherRequest(
365
+ method="GET",
366
+ url=f"endpoints/{endpoint_id}",
367
+ ),
368
+ stream=False,
369
+ )
370
+
371
+ assert isinstance(response, TogetherResponse)
372
+
373
+ return DedicatedEndpoint(**response.data)
374
+
375
+ async def delete(self, endpoint_id: str) -> None:
376
+ """
377
+ Delete a specific endpoint.
378
+
379
+ Args:
380
+ endpoint_id (str): ID of the endpoint to delete
381
+ """
382
+ requestor = api_requestor.APIRequestor(
383
+ client=self._client,
384
+ )
385
+
386
+ await requestor.arequest(
387
+ options=TogetherRequest(
388
+ method="DELETE",
389
+ url=f"endpoints/{endpoint_id}",
390
+ ),
391
+ stream=False,
392
+ )
393
+
394
+ async def update(
395
+ self,
396
+ endpoint_id: str,
397
+ *,
398
+ min_replicas: Optional[int] = None,
399
+ max_replicas: Optional[int] = None,
400
+ state: Optional[Literal["STARTED", "STOPPED"]] = None,
401
+ display_name: Optional[str] = None,
402
+ ) -> DedicatedEndpoint:
403
+ """
404
+ Update an endpoint's configuration.
405
+
406
+ Args:
407
+ endpoint_id (str): ID of the endpoint to update
408
+ min_replicas (int, optional): The minimum number of replicas to maintain
409
+ max_replicas (int, optional): The maximum number of replicas to scale up to
410
+ state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
411
+ display_name (str, optional): A human-readable name for the endpoint
412
+
413
+ Returns:
414
+ DedicatedEndpoint: Object containing endpoint information
415
+ """
416
+ requestor = api_requestor.APIRequestor(
417
+ client=self._client,
418
+ )
419
+
420
+ data: Dict[str, Union[str, Dict[str, int]]] = {}
421
+
422
+ if min_replicas is not None or max_replicas is not None:
423
+ current_min = min_replicas
424
+ current_max = max_replicas
425
+ if current_min is None or current_max is None:
426
+ # Get current values if only one is specified
427
+ current = await self.get(endpoint_id=endpoint_id)
428
+ current_min = current_min or current.autoscaling.min_replicas
429
+ current_max = current_max or current.autoscaling.max_replicas
430
+ data["autoscaling"] = {
431
+ "min_replicas": current_min,
432
+ "max_replicas": current_max,
433
+ }
434
+
435
+ if state is not None:
436
+ data["state"] = state
437
+
438
+ if display_name is not None:
439
+ data["display_name"] = display_name
440
+
441
+ response, _, _ = await requestor.arequest(
442
+ options=TogetherRequest(
443
+ method="PATCH",
444
+ url=f"endpoints/{endpoint_id}",
445
+ params=data,
446
+ ),
447
+ stream=False,
448
+ )
449
+
450
+ assert isinstance(response, TogetherResponse)
451
+
452
+ return DedicatedEndpoint(**response.data)
453
+
454
+ async def list_hardware(
455
+ self, model: Optional[str] = None
456
+ ) -> List[HardwareWithStatus]:
457
+ """
458
+ List available hardware configurations.
459
+
460
+ Args:
461
+ model (str, optional): Filter hardware configurations by model compatibility. When provided,
462
+ the response includes availability status for each compatible configuration.
463
+
464
+ Returns:
465
+ List[HardwareWithStatus]: List of hardware configurations with their status
466
+ """
467
+ requestor = api_requestor.APIRequestor(
468
+ client=self._client,
469
+ )
470
+
471
+ params = {}
472
+ if model is not None:
473
+ params["model"] = model
474
+
475
+ response, _, _ = await requestor.arequest(
476
+ options=TogetherRequest(
477
+ method="GET",
478
+ url="hardware",
479
+ params=params,
480
+ ),
481
+ stream=False,
482
+ )
483
+
484
+ assert isinstance(response, TogetherResponse)
485
+ assert isinstance(response.data, dict)
486
+ assert isinstance(response.data["data"], list)
487
+
488
+ return [HardwareWithStatus(**item) for item in response.data["data"]]