together 2.0.0a16__py3-none-any.whl → 2.0.0a18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. together/_base_client.py +5 -2
  2. together/_client.py +1 -39
  3. together/_compat.py +3 -3
  4. together/_utils/_json.py +35 -0
  5. together/_version.py +1 -1
  6. together/lib/cli/api/endpoints/create.py +14 -8
  7. together/lib/cli/api/endpoints/hardware.py +37 -6
  8. together/lib/cli/api/models/list.py +18 -14
  9. together/lib/cli/api/models/upload.py +5 -1
  10. together/resources/__init__.py +0 -14
  11. together/resources/beta/__init__.py +14 -0
  12. together/resources/beta/beta.py +32 -0
  13. together/resources/beta/clusters/clusters.py +12 -12
  14. together/resources/beta/clusters/storage.py +10 -10
  15. together/resources/beta/jig/__init__.py +61 -0
  16. together/resources/beta/jig/jig.py +1024 -0
  17. together/resources/beta/jig/queue.py +482 -0
  18. together/resources/beta/jig/secrets.py +548 -0
  19. together/resources/beta/jig/volumes.py +514 -0
  20. together/resources/chat/completions.py +10 -0
  21. together/resources/endpoints.py +2 -2
  22. together/resources/models/__init__.py +33 -0
  23. together/resources/{models.py → models/models.py} +41 -9
  24. together/resources/models/uploads.py +163 -0
  25. together/types/__init__.py +0 -2
  26. together/types/beta/__init__.py +6 -0
  27. together/types/beta/deployment.py +261 -0
  28. together/types/beta/deployment_logs.py +11 -0
  29. together/types/beta/jig/__init__.py +20 -0
  30. together/types/beta/jig/queue_cancel_params.py +13 -0
  31. together/types/beta/jig/queue_cancel_response.py +11 -0
  32. together/types/beta/jig/queue_metrics_params.py +12 -0
  33. together/types/beta/jig/queue_metrics_response.py +8 -0
  34. together/types/beta/jig/queue_retrieve_params.py +15 -0
  35. together/types/beta/jig/queue_retrieve_response.py +35 -0
  36. together/types/beta/jig/queue_submit_params.py +19 -0
  37. together/types/beta/jig/queue_submit_response.py +25 -0
  38. together/types/beta/jig/secret.py +33 -0
  39. together/types/beta/jig/secret_create_params.py +34 -0
  40. together/types/beta/jig/secret_list_response.py +16 -0
  41. together/types/beta/jig/secret_update_params.py +34 -0
  42. together/types/beta/jig/volume.py +47 -0
  43. together/types/beta/jig/volume_create_params.py +34 -0
  44. together/types/beta/jig/volume_list_response.py +16 -0
  45. together/types/beta/jig/volume_update_params.py +34 -0
  46. together/types/beta/jig_deploy_params.py +150 -0
  47. together/types/beta/jig_list_response.py +16 -0
  48. together/types/beta/jig_retrieve_logs_params.py +15 -0
  49. together/types/beta/jig_update_params.py +141 -0
  50. together/types/chat/completion_create_params.py +11 -0
  51. together/types/endpoint_create_params.py +1 -1
  52. together/types/models/__init__.py +5 -0
  53. together/types/{job_retrieve_response.py → models/upload_status_response.py} +3 -3
  54. {together-2.0.0a16.dist-info → together-2.0.0a18.dist-info}/METADATA +11 -14
  55. {together-2.0.0a16.dist-info → together-2.0.0a18.dist-info}/RECORD +58 -28
  56. together/resources/jobs.py +0 -214
  57. together/types/job_list_response.py +0 -47
  58. {together-2.0.0a16.dist-info → together-2.0.0a18.dist-info}/WHEEL +0 -0
  59. {together-2.0.0a16.dist-info → together-2.0.0a18.dist-info}/entry_points.txt +0 -0
  60. {together-2.0.0a16.dist-info → together-2.0.0a18.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1024 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, Iterable
6
+ from typing_extensions import Literal
7
+
8
+ import httpx
9
+
10
+ from .queue import (
11
+ QueueResource,
12
+ AsyncQueueResource,
13
+ QueueResourceWithRawResponse,
14
+ AsyncQueueResourceWithRawResponse,
15
+ QueueResourceWithStreamingResponse,
16
+ AsyncQueueResourceWithStreamingResponse,
17
+ )
18
+ from .secrets import (
19
+ SecretsResource,
20
+ AsyncSecretsResource,
21
+ SecretsResourceWithRawResponse,
22
+ AsyncSecretsResourceWithRawResponse,
23
+ SecretsResourceWithStreamingResponse,
24
+ AsyncSecretsResourceWithStreamingResponse,
25
+ )
26
+ from .volumes import (
27
+ VolumesResource,
28
+ AsyncVolumesResource,
29
+ VolumesResourceWithRawResponse,
30
+ AsyncVolumesResourceWithRawResponse,
31
+ VolumesResourceWithStreamingResponse,
32
+ AsyncVolumesResourceWithStreamingResponse,
33
+ )
34
+ from ...._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
35
+ from ...._utils import maybe_transform, async_maybe_transform
36
+ from ...._compat import cached_property
37
+ from ...._resource import SyncAPIResource, AsyncAPIResource
38
+ from ...._response import (
39
+ to_raw_response_wrapper,
40
+ to_streamed_response_wrapper,
41
+ async_to_raw_response_wrapper,
42
+ async_to_streamed_response_wrapper,
43
+ )
44
+ from ....types.beta import jig_deploy_params, jig_update_params, jig_retrieve_logs_params
45
+ from ...._base_client import make_request_options
46
+ from ....types.beta.deployment import Deployment
47
+ from ....types.beta.deployment_logs import DeploymentLogs
48
+ from ....types.beta.jig_list_response import JigListResponse
49
+
50
+ __all__ = ["JigResource", "AsyncJigResource"]
51
+
52
+
53
+ class JigResource(SyncAPIResource):
54
+ @cached_property
55
+ def queue(self) -> QueueResource:
56
+ return QueueResource(self._client)
57
+
58
+ @cached_property
59
+ def volumes(self) -> VolumesResource:
60
+ return VolumesResource(self._client)
61
+
62
+ @cached_property
63
+ def secrets(self) -> SecretsResource:
64
+ return SecretsResource(self._client)
65
+
66
+ @cached_property
67
+ def with_raw_response(self) -> JigResourceWithRawResponse:
68
+ """
69
+ This property can be used as a prefix for any HTTP method call to return
70
+ the raw response object instead of the parsed content.
71
+
72
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
73
+ """
74
+ return JigResourceWithRawResponse(self)
75
+
76
+ @cached_property
77
+ def with_streaming_response(self) -> JigResourceWithStreamingResponse:
78
+ """
79
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
80
+
81
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
82
+ """
83
+ return JigResourceWithStreamingResponse(self)
84
+
85
+ def retrieve(
86
+ self,
87
+ id: str,
88
+ *,
89
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
90
+ # The extra values given here take precedence over values defined on the client or passed to this method.
91
+ extra_headers: Headers | None = None,
92
+ extra_query: Query | None = None,
93
+ extra_body: Body | None = None,
94
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
95
+ ) -> Deployment:
96
+ """
97
+ Retrieve details of a specific deployment by its ID or name
98
+
99
+ Args:
100
+ extra_headers: Send extra headers
101
+
102
+ extra_query: Add additional query parameters to the request
103
+
104
+ extra_body: Add additional JSON properties to the request
105
+
106
+ timeout: Override the client-level default timeout for this request, in seconds
107
+ """
108
+ if not id:
109
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
110
+ return self._get(
111
+ f"/deployments/{id}",
112
+ options=make_request_options(
113
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
114
+ ),
115
+ cast_to=Deployment,
116
+ )
117
+
118
+ def update(
119
+ self,
120
+ id: str,
121
+ *,
122
+ args: SequenceNotStr[str] | Omit = omit,
123
+ autoscaling: Dict[str, str] | Omit = omit,
124
+ command: SequenceNotStr[str] | Omit = omit,
125
+ cpu: float | Omit = omit,
126
+ description: str | Omit = omit,
127
+ environment_variables: Iterable[jig_update_params.EnvironmentVariable] | Omit = omit,
128
+ gpu_count: int | Omit = omit,
129
+ gpu_type: Literal["h100-80gb", " a100-80gb"] | Omit = omit,
130
+ health_check_path: str | Omit = omit,
131
+ image: str | Omit = omit,
132
+ max_replicas: int | Omit = omit,
133
+ memory: float | Omit = omit,
134
+ min_replicas: int | Omit = omit,
135
+ name: str | Omit = omit,
136
+ port: int | Omit = omit,
137
+ storage: int | Omit = omit,
138
+ termination_grace_period_seconds: int | Omit = omit,
139
+ volumes: Iterable[jig_update_params.Volume] | Omit = omit,
140
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
141
+ # The extra values given here take precedence over values defined on the client or passed to this method.
142
+ extra_headers: Headers | None = None,
143
+ extra_query: Query | None = None,
144
+ extra_body: Body | None = None,
145
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
146
+ ) -> Deployment:
147
+ """
148
+ Update an existing deployment configuration
149
+
150
+ Args:
151
+ args: Args overrides the container's CMD. Provide as an array of arguments (e.g.,
152
+ ["python", "app.py"])
153
+
154
+ autoscaling:
155
+ Autoscaling configuration as key-value pairs. Example: {"metric":
156
+ "QueueBacklogPerWorker", "target": "10"} to scale based on queue backlog
157
+
158
+ command: Command overrides the container's ENTRYPOINT. Provide as an array (e.g.,
159
+ ["/bin/sh", "-c"])
160
+
161
+ cpu: CPU is the number of CPU cores to allocate per container instance (e.g., 0.1 =
162
+ 100 milli cores)
163
+
164
+ description: Description is an optional human-readable description of your deployment
165
+
166
+ environment_variables: EnvironmentVariables is a list of environment variables to set in the container.
167
+ This will replace all existing environment variables
168
+
169
+ gpu_count: GPUCount is the number of GPUs to allocate per container instance
170
+
171
+ gpu_type: GPUType specifies the GPU hardware to use (e.g., "h100-80gb")
172
+
173
+ health_check_path: HealthCheckPath is the HTTP path for health checks (e.g., "/health"). Set to
174
+ empty string to disable health checks
175
+
176
+ image: Image is the container image to deploy from registry.together.ai.
177
+
178
+ max_replicas: MaxReplicas is the maximum number of replicas that can be scaled up to.
179
+
180
+ memory: Memory is the amount of RAM to allocate per container instance in GiB (e.g., 0.5
181
+ = 512MiB)
182
+
183
+ min_replicas: MinReplicas is the minimum number of replicas to run
184
+
185
+ name: Name is the new unique identifier for your deployment. Must contain only
186
+ alphanumeric characters, underscores, or hyphens (1-100 characters)
187
+
188
+ port: Port is the container port your application listens on (e.g., 8080 for web
189
+ servers)
190
+
191
+ storage: Storage is the amount of ephemeral disk storage to allocate per container
192
+ instance (e.g., 10 = 10GiB)
193
+
194
+ termination_grace_period_seconds: TerminationGracePeriodSeconds is the time in seconds to wait for graceful
195
+ shutdown before forcefully terminating the replica
196
+
197
+ volumes: Volumes is a list of volume mounts to attach to the container. This will replace
198
+ all existing volumes
199
+
200
+ extra_headers: Send extra headers
201
+
202
+ extra_query: Add additional query parameters to the request
203
+
204
+ extra_body: Add additional JSON properties to the request
205
+
206
+ timeout: Override the client-level default timeout for this request, in seconds
207
+ """
208
+ if not id:
209
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
210
+ return self._patch(
211
+ f"/deployments/{id}",
212
+ body=maybe_transform(
213
+ {
214
+ "args": args,
215
+ "autoscaling": autoscaling,
216
+ "command": command,
217
+ "cpu": cpu,
218
+ "description": description,
219
+ "environment_variables": environment_variables,
220
+ "gpu_count": gpu_count,
221
+ "gpu_type": gpu_type,
222
+ "health_check_path": health_check_path,
223
+ "image": image,
224
+ "max_replicas": max_replicas,
225
+ "memory": memory,
226
+ "min_replicas": min_replicas,
227
+ "name": name,
228
+ "port": port,
229
+ "storage": storage,
230
+ "termination_grace_period_seconds": termination_grace_period_seconds,
231
+ "volumes": volumes,
232
+ },
233
+ jig_update_params.JigUpdateParams,
234
+ ),
235
+ options=make_request_options(
236
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
237
+ ),
238
+ cast_to=Deployment,
239
+ )
240
+
241
+ def list(
242
+ self,
243
+ *,
244
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
245
+ # The extra values given here take precedence over values defined on the client or passed to this method.
246
+ extra_headers: Headers | None = None,
247
+ extra_query: Query | None = None,
248
+ extra_body: Body | None = None,
249
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
250
+ ) -> JigListResponse:
251
+ """Get a list of all deployments in your project"""
252
+ return self._get(
253
+ "/deployments",
254
+ options=make_request_options(
255
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
256
+ ),
257
+ cast_to=JigListResponse,
258
+ )
259
+
260
+ def deploy(
261
+ self,
262
+ *,
263
+ gpu_type: Literal["h100-80gb", " a100-80gb"],
264
+ image: str,
265
+ name: str,
266
+ args: SequenceNotStr[str] | Omit = omit,
267
+ autoscaling: Dict[str, str] | Omit = omit,
268
+ command: SequenceNotStr[str] | Omit = omit,
269
+ cpu: float | Omit = omit,
270
+ description: str | Omit = omit,
271
+ environment_variables: Iterable[jig_deploy_params.EnvironmentVariable] | Omit = omit,
272
+ gpu_count: int | Omit = omit,
273
+ health_check_path: str | Omit = omit,
274
+ max_replicas: int | Omit = omit,
275
+ memory: float | Omit = omit,
276
+ min_replicas: int | Omit = omit,
277
+ port: int | Omit = omit,
278
+ storage: int | Omit = omit,
279
+ termination_grace_period_seconds: int | Omit = omit,
280
+ volumes: Iterable[jig_deploy_params.Volume] | Omit = omit,
281
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
282
+ # The extra values given here take precedence over values defined on the client or passed to this method.
283
+ extra_headers: Headers | None = None,
284
+ extra_query: Query | None = None,
285
+ extra_body: Body | None = None,
286
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
287
+ ) -> Deployment:
288
+ """
289
+ Create a new deployment with specified configuration
290
+
291
+ Args:
292
+ gpu_type: GPUType specifies the GPU hardware to use (e.g., "h100-80gb").
293
+
294
+ image: Image is the container image to deploy from registry.together.ai.
295
+
296
+ name: Name is the unique identifier for your deployment. Must contain only
297
+ alphanumeric characters, underscores, or hyphens (1-100 characters)
298
+
299
+ args: Args overrides the container's CMD. Provide as an array of arguments (e.g.,
300
+ ["python", "app.py"])
301
+
302
+ autoscaling:
303
+ Autoscaling configuration as key-value pairs. Example: {"metric":
304
+ "QueueBacklogPerWorker", "target": "10"} to scale based on queue backlog
305
+
306
+ command: Command overrides the container's ENTRYPOINT. Provide as an array (e.g.,
307
+ ["/bin/sh", "-c"])
308
+
309
+ cpu: CPU is the number of CPU cores to allocate per container instance (e.g., 0.1 =
310
+ 100 milli cores)
311
+
312
+ description: Description is an optional human-readable description of your deployment
313
+
314
+ environment_variables: EnvironmentVariables is a list of environment variables to set in the container.
315
+ Each must have a name and either a value or value_from_secret
316
+
317
+ gpu_count: GPUCount is the number of GPUs to allocate per container instance. Defaults to 0
318
+ if not specified
319
+
320
+ health_check_path: HealthCheckPath is the HTTP path for health checks (e.g., "/health"). If set,
321
+ the platform will check this endpoint to determine container health
322
+
323
+ max_replicas: MaxReplicas is the maximum number of container instances that can be scaled up
324
+ to. If not set, will be set to MinReplicas
325
+
326
+ memory: Memory is the amount of RAM to allocate per container instance in GiB (e.g., 0.5
327
+ = 512MiB)
328
+
329
+ min_replicas: MinReplicas is the minimum number of container instances to run. Defaults to 1
330
+ if not specified
331
+
332
+ port: Port is the container port your application listens on (e.g., 8080 for web
333
+ servers). Required if your application serves traffic
334
+
335
+ storage: Storage is the amount of ephemeral disk storage to allocate per container
336
+ instance (e.g., 10 = 10GiB)
337
+
338
+ termination_grace_period_seconds: TerminationGracePeriodSeconds is the time in seconds to wait for graceful
339
+ shutdown before forcefully terminating the replica
340
+
341
+ volumes: Volumes is a list of volume mounts to attach to the container. Each mount must
342
+ reference an existing volume by name
343
+
344
+ extra_headers: Send extra headers
345
+
346
+ extra_query: Add additional query parameters to the request
347
+
348
+ extra_body: Add additional JSON properties to the request
349
+
350
+ timeout: Override the client-level default timeout for this request, in seconds
351
+ """
352
+ return self._post(
353
+ "/deployments",
354
+ body=maybe_transform(
355
+ {
356
+ "gpu_type": gpu_type,
357
+ "image": image,
358
+ "name": name,
359
+ "args": args,
360
+ "autoscaling": autoscaling,
361
+ "command": command,
362
+ "cpu": cpu,
363
+ "description": description,
364
+ "environment_variables": environment_variables,
365
+ "gpu_count": gpu_count,
366
+ "health_check_path": health_check_path,
367
+ "max_replicas": max_replicas,
368
+ "memory": memory,
369
+ "min_replicas": min_replicas,
370
+ "port": port,
371
+ "storage": storage,
372
+ "termination_grace_period_seconds": termination_grace_period_seconds,
373
+ "volumes": volumes,
374
+ },
375
+ jig_deploy_params.JigDeployParams,
376
+ ),
377
+ options=make_request_options(
378
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
379
+ ),
380
+ cast_to=Deployment,
381
+ )
382
+
383
+ def destroy(
384
+ self,
385
+ id: str,
386
+ *,
387
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
388
+ # The extra values given here take precedence over values defined on the client or passed to this method.
389
+ extra_headers: Headers | None = None,
390
+ extra_query: Query | None = None,
391
+ extra_body: Body | None = None,
392
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
393
+ ) -> object:
394
+ """
395
+ Delete an existing deployment
396
+
397
+ Args:
398
+ extra_headers: Send extra headers
399
+
400
+ extra_query: Add additional query parameters to the request
401
+
402
+ extra_body: Add additional JSON properties to the request
403
+
404
+ timeout: Override the client-level default timeout for this request, in seconds
405
+ """
406
+ if not id:
407
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
408
+ return self._delete(
409
+ f"/deployments/{id}",
410
+ options=make_request_options(
411
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
412
+ ),
413
+ cast_to=object,
414
+ )
415
+
416
+ def retrieve_logs(
417
+ self,
418
+ id: str,
419
+ *,
420
+ follow: bool | Omit = omit,
421
+ replica_id: str | Omit = omit,
422
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
423
+ # The extra values given here take precedence over values defined on the client or passed to this method.
424
+ extra_headers: Headers | None = None,
425
+ extra_query: Query | None = None,
426
+ extra_body: Body | None = None,
427
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
428
+ ) -> DeploymentLogs:
429
+ """Retrieve logs from a deployment, optionally filtered by replica ID.
430
+
431
+ Use
432
+ follow=true to stream logs in real-time.
433
+
434
+ Args:
435
+ follow: Stream logs in real-time (ndjson format)
436
+
437
+ replica_id: Replica ID to filter logs
438
+
439
+ extra_headers: Send extra headers
440
+
441
+ extra_query: Add additional query parameters to the request
442
+
443
+ extra_body: Add additional JSON properties to the request
444
+
445
+ timeout: Override the client-level default timeout for this request, in seconds
446
+ """
447
+ if not id:
448
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
449
+ return self._get(
450
+ f"/deployments/{id}/logs",
451
+ options=make_request_options(
452
+ extra_headers=extra_headers,
453
+ extra_query=extra_query,
454
+ extra_body=extra_body,
455
+ timeout=timeout,
456
+ query=maybe_transform(
457
+ {
458
+ "follow": follow,
459
+ "replica_id": replica_id,
460
+ },
461
+ jig_retrieve_logs_params.JigRetrieveLogsParams,
462
+ ),
463
+ ),
464
+ cast_to=DeploymentLogs,
465
+ )
466
+
467
+
468
+ class AsyncJigResource(AsyncAPIResource):
469
+ @cached_property
470
+ def queue(self) -> AsyncQueueResource:
471
+ return AsyncQueueResource(self._client)
472
+
473
+ @cached_property
474
+ def volumes(self) -> AsyncVolumesResource:
475
+ return AsyncVolumesResource(self._client)
476
+
477
+ @cached_property
478
+ def secrets(self) -> AsyncSecretsResource:
479
+ return AsyncSecretsResource(self._client)
480
+
481
+ @cached_property
482
+ def with_raw_response(self) -> AsyncJigResourceWithRawResponse:
483
+ """
484
+ This property can be used as a prefix for any HTTP method call to return
485
+ the raw response object instead of the parsed content.
486
+
487
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
488
+ """
489
+ return AsyncJigResourceWithRawResponse(self)
490
+
491
+ @cached_property
492
+ def with_streaming_response(self) -> AsyncJigResourceWithStreamingResponse:
493
+ """
494
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
495
+
496
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
497
+ """
498
+ return AsyncJigResourceWithStreamingResponse(self)
499
+
500
+ async def retrieve(
501
+ self,
502
+ id: str,
503
+ *,
504
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
505
+ # The extra values given here take precedence over values defined on the client or passed to this method.
506
+ extra_headers: Headers | None = None,
507
+ extra_query: Query | None = None,
508
+ extra_body: Body | None = None,
509
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
510
+ ) -> Deployment:
511
+ """
512
+ Retrieve details of a specific deployment by its ID or name
513
+
514
+ Args:
515
+ extra_headers: Send extra headers
516
+
517
+ extra_query: Add additional query parameters to the request
518
+
519
+ extra_body: Add additional JSON properties to the request
520
+
521
+ timeout: Override the client-level default timeout for this request, in seconds
522
+ """
523
+ if not id:
524
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
525
+ return await self._get(
526
+ f"/deployments/{id}",
527
+ options=make_request_options(
528
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
529
+ ),
530
+ cast_to=Deployment,
531
+ )
532
+
533
+ async def update(
534
+ self,
535
+ id: str,
536
+ *,
537
+ args: SequenceNotStr[str] | Omit = omit,
538
+ autoscaling: Dict[str, str] | Omit = omit,
539
+ command: SequenceNotStr[str] | Omit = omit,
540
+ cpu: float | Omit = omit,
541
+ description: str | Omit = omit,
542
+ environment_variables: Iterable[jig_update_params.EnvironmentVariable] | Omit = omit,
543
+ gpu_count: int | Omit = omit,
544
+ gpu_type: Literal["h100-80gb", " a100-80gb"] | Omit = omit,
545
+ health_check_path: str | Omit = omit,
546
+ image: str | Omit = omit,
547
+ max_replicas: int | Omit = omit,
548
+ memory: float | Omit = omit,
549
+ min_replicas: int | Omit = omit,
550
+ name: str | Omit = omit,
551
+ port: int | Omit = omit,
552
+ storage: int | Omit = omit,
553
+ termination_grace_period_seconds: int | Omit = omit,
554
+ volumes: Iterable[jig_update_params.Volume] | Omit = omit,
555
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
556
+ # The extra values given here take precedence over values defined on the client or passed to this method.
557
+ extra_headers: Headers | None = None,
558
+ extra_query: Query | None = None,
559
+ extra_body: Body | None = None,
560
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
561
+ ) -> Deployment:
562
+ """
563
+ Update an existing deployment configuration
564
+
565
+ Args:
566
+ args: Args overrides the container's CMD. Provide as an array of arguments (e.g.,
567
+ ["python", "app.py"])
568
+
569
+ autoscaling:
570
+ Autoscaling configuration as key-value pairs. Example: {"metric":
571
+ "QueueBacklogPerWorker", "target": "10"} to scale based on queue backlog
572
+
573
+ command: Command overrides the container's ENTRYPOINT. Provide as an array (e.g.,
574
+ ["/bin/sh", "-c"])
575
+
576
+ cpu: CPU is the number of CPU cores to allocate per container instance (e.g., 0.1 =
577
+ 100 milli cores)
578
+
579
+ description: Description is an optional human-readable description of your deployment
580
+
581
+ environment_variables: EnvironmentVariables is a list of environment variables to set in the container.
582
+ This will replace all existing environment variables
583
+
584
+ gpu_count: GPUCount is the number of GPUs to allocate per container instance
585
+
586
+ gpu_type: GPUType specifies the GPU hardware to use (e.g., "h100-80gb")
587
+
588
+ health_check_path: HealthCheckPath is the HTTP path for health checks (e.g., "/health"). Set to
589
+ empty string to disable health checks
590
+
591
+ image: Image is the container image to deploy from registry.together.ai.
592
+
593
+ max_replicas: MaxReplicas is the maximum number of replicas that can be scaled up to.
594
+
595
+ memory: Memory is the amount of RAM to allocate per container instance in GiB (e.g., 0.5
596
+ = 512MiB)
597
+
598
+ min_replicas: MinReplicas is the minimum number of replicas to run
599
+
600
+ name: Name is the new unique identifier for your deployment. Must contain only
601
+ alphanumeric characters, underscores, or hyphens (1-100 characters)
602
+
603
+ port: Port is the container port your application listens on (e.g., 8080 for web
604
+ servers)
605
+
606
+ storage: Storage is the amount of ephemeral disk storage to allocate per container
607
+ instance (e.g., 10 = 10GiB)
608
+
609
+ termination_grace_period_seconds: TerminationGracePeriodSeconds is the time in seconds to wait for graceful
610
+ shutdown before forcefully terminating the replica
611
+
612
+ volumes: Volumes is a list of volume mounts to attach to the container. This will replace
613
+ all existing volumes
614
+
615
+ extra_headers: Send extra headers
616
+
617
+ extra_query: Add additional query parameters to the request
618
+
619
+ extra_body: Add additional JSON properties to the request
620
+
621
+ timeout: Override the client-level default timeout for this request, in seconds
622
+ """
623
+ if not id:
624
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
625
+ return await self._patch(
626
+ f"/deployments/{id}",
627
+ body=await async_maybe_transform(
628
+ {
629
+ "args": args,
630
+ "autoscaling": autoscaling,
631
+ "command": command,
632
+ "cpu": cpu,
633
+ "description": description,
634
+ "environment_variables": environment_variables,
635
+ "gpu_count": gpu_count,
636
+ "gpu_type": gpu_type,
637
+ "health_check_path": health_check_path,
638
+ "image": image,
639
+ "max_replicas": max_replicas,
640
+ "memory": memory,
641
+ "min_replicas": min_replicas,
642
+ "name": name,
643
+ "port": port,
644
+ "storage": storage,
645
+ "termination_grace_period_seconds": termination_grace_period_seconds,
646
+ "volumes": volumes,
647
+ },
648
+ jig_update_params.JigUpdateParams,
649
+ ),
650
+ options=make_request_options(
651
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
652
+ ),
653
+ cast_to=Deployment,
654
+ )
655
+
656
+ async def list(
657
+ self,
658
+ *,
659
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
660
+ # The extra values given here take precedence over values defined on the client or passed to this method.
661
+ extra_headers: Headers | None = None,
662
+ extra_query: Query | None = None,
663
+ extra_body: Body | None = None,
664
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
665
+ ) -> JigListResponse:
666
+ """Get a list of all deployments in your project"""
667
+ return await self._get(
668
+ "/deployments",
669
+ options=make_request_options(
670
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
671
+ ),
672
+ cast_to=JigListResponse,
673
+ )
674
+
675
+ async def deploy(
676
+ self,
677
+ *,
678
+ gpu_type: Literal["h100-80gb", " a100-80gb"],
679
+ image: str,
680
+ name: str,
681
+ args: SequenceNotStr[str] | Omit = omit,
682
+ autoscaling: Dict[str, str] | Omit = omit,
683
+ command: SequenceNotStr[str] | Omit = omit,
684
+ cpu: float | Omit = omit,
685
+ description: str | Omit = omit,
686
+ environment_variables: Iterable[jig_deploy_params.EnvironmentVariable] | Omit = omit,
687
+ gpu_count: int | Omit = omit,
688
+ health_check_path: str | Omit = omit,
689
+ max_replicas: int | Omit = omit,
690
+ memory: float | Omit = omit,
691
+ min_replicas: int | Omit = omit,
692
+ port: int | Omit = omit,
693
+ storage: int | Omit = omit,
694
+ termination_grace_period_seconds: int | Omit = omit,
695
+ volumes: Iterable[jig_deploy_params.Volume] | Omit = omit,
696
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
697
+ # The extra values given here take precedence over values defined on the client or passed to this method.
698
+ extra_headers: Headers | None = None,
699
+ extra_query: Query | None = None,
700
+ extra_body: Body | None = None,
701
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
702
+ ) -> Deployment:
703
+ """
704
+ Create a new deployment with specified configuration
705
+
706
+ Args:
707
+ gpu_type: GPUType specifies the GPU hardware to use (e.g., "h100-80gb").
708
+
709
+ image: Image is the container image to deploy from registry.together.ai.
710
+
711
+ name: Name is the unique identifier for your deployment. Must contain only
712
+ alphanumeric characters, underscores, or hyphens (1-100 characters)
713
+
714
+ args: Args overrides the container's CMD. Provide as an array of arguments (e.g.,
715
+ ["python", "app.py"])
716
+
717
+ autoscaling:
718
+ Autoscaling configuration as key-value pairs. Example: {"metric":
719
+ "QueueBacklogPerWorker", "target": "10"} to scale based on queue backlog
720
+
721
+ command: Command overrides the container's ENTRYPOINT. Provide as an array (e.g.,
722
+ ["/bin/sh", "-c"])
723
+
724
+ cpu: CPU is the number of CPU cores to allocate per container instance (e.g., 0.1 =
725
+ 100 milli cores)
726
+
727
+ description: Description is an optional human-readable description of your deployment
728
+
729
+ environment_variables: EnvironmentVariables is a list of environment variables to set in the container.
730
+ Each must have a name and either a value or value_from_secret
731
+
732
+ gpu_count: GPUCount is the number of GPUs to allocate per container instance. Defaults to 0
733
+ if not specified
734
+
735
+ health_check_path: HealthCheckPath is the HTTP path for health checks (e.g., "/health"). If set,
736
+ the platform will check this endpoint to determine container health
737
+
738
+ max_replicas: MaxReplicas is the maximum number of container instances that can be scaled up
739
+ to. If not set, will be set to MinReplicas
740
+
741
+ memory: Memory is the amount of RAM to allocate per container instance in GiB (e.g., 0.5
742
+ = 512MiB)
743
+
744
+ min_replicas: MinReplicas is the minimum number of container instances to run. Defaults to 1
745
+ if not specified
746
+
747
+ port: Port is the container port your application listens on (e.g., 8080 for web
748
+ servers). Required if your application serves traffic
749
+
750
+ storage: Storage is the amount of ephemeral disk storage to allocate per container
751
+ instance (e.g., 10 = 10GiB)
752
+
753
+ termination_grace_period_seconds: TerminationGracePeriodSeconds is the time in seconds to wait for graceful
754
+ shutdown before forcefully terminating the replica
755
+
756
+ volumes: Volumes is a list of volume mounts to attach to the container. Each mount must
757
+ reference an existing volume by name
758
+
759
+ extra_headers: Send extra headers
760
+
761
+ extra_query: Add additional query parameters to the request
762
+
763
+ extra_body: Add additional JSON properties to the request
764
+
765
+ timeout: Override the client-level default timeout for this request, in seconds
766
+ """
767
+ return await self._post(
768
+ "/deployments",
769
+ body=await async_maybe_transform(
770
+ {
771
+ "gpu_type": gpu_type,
772
+ "image": image,
773
+ "name": name,
774
+ "args": args,
775
+ "autoscaling": autoscaling,
776
+ "command": command,
777
+ "cpu": cpu,
778
+ "description": description,
779
+ "environment_variables": environment_variables,
780
+ "gpu_count": gpu_count,
781
+ "health_check_path": health_check_path,
782
+ "max_replicas": max_replicas,
783
+ "memory": memory,
784
+ "min_replicas": min_replicas,
785
+ "port": port,
786
+ "storage": storage,
787
+ "termination_grace_period_seconds": termination_grace_period_seconds,
788
+ "volumes": volumes,
789
+ },
790
+ jig_deploy_params.JigDeployParams,
791
+ ),
792
+ options=make_request_options(
793
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
794
+ ),
795
+ cast_to=Deployment,
796
+ )
797
+
798
+ async def destroy(
799
+ self,
800
+ id: str,
801
+ *,
802
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
803
+ # The extra values given here take precedence over values defined on the client or passed to this method.
804
+ extra_headers: Headers | None = None,
805
+ extra_query: Query | None = None,
806
+ extra_body: Body | None = None,
807
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
808
+ ) -> object:
809
+ """
810
+ Delete an existing deployment
811
+
812
+ Args:
813
+ extra_headers: Send extra headers
814
+
815
+ extra_query: Add additional query parameters to the request
816
+
817
+ extra_body: Add additional JSON properties to the request
818
+
819
+ timeout: Override the client-level default timeout for this request, in seconds
820
+ """
821
+ if not id:
822
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
823
+ return await self._delete(
824
+ f"/deployments/{id}",
825
+ options=make_request_options(
826
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
827
+ ),
828
+ cast_to=object,
829
+ )
830
+
831
+ async def retrieve_logs(
832
+ self,
833
+ id: str,
834
+ *,
835
+ follow: bool | Omit = omit,
836
+ replica_id: str | Omit = omit,
837
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
838
+ # The extra values given here take precedence over values defined on the client or passed to this method.
839
+ extra_headers: Headers | None = None,
840
+ extra_query: Query | None = None,
841
+ extra_body: Body | None = None,
842
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
843
+ ) -> DeploymentLogs:
844
+ """Retrieve logs from a deployment, optionally filtered by replica ID.
845
+
846
+ Use
847
+ follow=true to stream logs in real-time.
848
+
849
+ Args:
850
+ follow: Stream logs in real-time (ndjson format)
851
+
852
+ replica_id: Replica ID to filter logs
853
+
854
+ extra_headers: Send extra headers
855
+
856
+ extra_query: Add additional query parameters to the request
857
+
858
+ extra_body: Add additional JSON properties to the request
859
+
860
+ timeout: Override the client-level default timeout for this request, in seconds
861
+ """
862
+ if not id:
863
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
864
+ return await self._get(
865
+ f"/deployments/{id}/logs",
866
+ options=make_request_options(
867
+ extra_headers=extra_headers,
868
+ extra_query=extra_query,
869
+ extra_body=extra_body,
870
+ timeout=timeout,
871
+ query=await async_maybe_transform(
872
+ {
873
+ "follow": follow,
874
+ "replica_id": replica_id,
875
+ },
876
+ jig_retrieve_logs_params.JigRetrieveLogsParams,
877
+ ),
878
+ ),
879
+ cast_to=DeploymentLogs,
880
+ )
881
+
882
+
883
+ class JigResourceWithRawResponse:
884
+ def __init__(self, jig: JigResource) -> None:
885
+ self._jig = jig
886
+
887
+ self.retrieve = to_raw_response_wrapper(
888
+ jig.retrieve,
889
+ )
890
+ self.update = to_raw_response_wrapper(
891
+ jig.update,
892
+ )
893
+ self.list = to_raw_response_wrapper(
894
+ jig.list,
895
+ )
896
+ self.deploy = to_raw_response_wrapper(
897
+ jig.deploy,
898
+ )
899
+ self.destroy = to_raw_response_wrapper(
900
+ jig.destroy,
901
+ )
902
+ self.retrieve_logs = to_raw_response_wrapper(
903
+ jig.retrieve_logs,
904
+ )
905
+
906
+ @cached_property
907
+ def queue(self) -> QueueResourceWithRawResponse:
908
+ return QueueResourceWithRawResponse(self._jig.queue)
909
+
910
+ @cached_property
911
+ def volumes(self) -> VolumesResourceWithRawResponse:
912
+ return VolumesResourceWithRawResponse(self._jig.volumes)
913
+
914
+ @cached_property
915
+ def secrets(self) -> SecretsResourceWithRawResponse:
916
+ return SecretsResourceWithRawResponse(self._jig.secrets)
917
+
918
+
919
+ class AsyncJigResourceWithRawResponse:
920
+ def __init__(self, jig: AsyncJigResource) -> None:
921
+ self._jig = jig
922
+
923
+ self.retrieve = async_to_raw_response_wrapper(
924
+ jig.retrieve,
925
+ )
926
+ self.update = async_to_raw_response_wrapper(
927
+ jig.update,
928
+ )
929
+ self.list = async_to_raw_response_wrapper(
930
+ jig.list,
931
+ )
932
+ self.deploy = async_to_raw_response_wrapper(
933
+ jig.deploy,
934
+ )
935
+ self.destroy = async_to_raw_response_wrapper(
936
+ jig.destroy,
937
+ )
938
+ self.retrieve_logs = async_to_raw_response_wrapper(
939
+ jig.retrieve_logs,
940
+ )
941
+
942
+ @cached_property
943
+ def queue(self) -> AsyncQueueResourceWithRawResponse:
944
+ return AsyncQueueResourceWithRawResponse(self._jig.queue)
945
+
946
+ @cached_property
947
+ def volumes(self) -> AsyncVolumesResourceWithRawResponse:
948
+ return AsyncVolumesResourceWithRawResponse(self._jig.volumes)
949
+
950
+ @cached_property
951
+ def secrets(self) -> AsyncSecretsResourceWithRawResponse:
952
+ return AsyncSecretsResourceWithRawResponse(self._jig.secrets)
953
+
954
+
955
+ class JigResourceWithStreamingResponse:
956
+ def __init__(self, jig: JigResource) -> None:
957
+ self._jig = jig
958
+
959
+ self.retrieve = to_streamed_response_wrapper(
960
+ jig.retrieve,
961
+ )
962
+ self.update = to_streamed_response_wrapper(
963
+ jig.update,
964
+ )
965
+ self.list = to_streamed_response_wrapper(
966
+ jig.list,
967
+ )
968
+ self.deploy = to_streamed_response_wrapper(
969
+ jig.deploy,
970
+ )
971
+ self.destroy = to_streamed_response_wrapper(
972
+ jig.destroy,
973
+ )
974
+ self.retrieve_logs = to_streamed_response_wrapper(
975
+ jig.retrieve_logs,
976
+ )
977
+
978
+ @cached_property
979
+ def queue(self) -> QueueResourceWithStreamingResponse:
980
+ return QueueResourceWithStreamingResponse(self._jig.queue)
981
+
982
+ @cached_property
983
+ def volumes(self) -> VolumesResourceWithStreamingResponse:
984
+ return VolumesResourceWithStreamingResponse(self._jig.volumes)
985
+
986
+ @cached_property
987
+ def secrets(self) -> SecretsResourceWithStreamingResponse:
988
+ return SecretsResourceWithStreamingResponse(self._jig.secrets)
989
+
990
+
991
+ class AsyncJigResourceWithStreamingResponse:
992
+ def __init__(self, jig: AsyncJigResource) -> None:
993
+ self._jig = jig
994
+
995
+ self.retrieve = async_to_streamed_response_wrapper(
996
+ jig.retrieve,
997
+ )
998
+ self.update = async_to_streamed_response_wrapper(
999
+ jig.update,
1000
+ )
1001
+ self.list = async_to_streamed_response_wrapper(
1002
+ jig.list,
1003
+ )
1004
+ self.deploy = async_to_streamed_response_wrapper(
1005
+ jig.deploy,
1006
+ )
1007
+ self.destroy = async_to_streamed_response_wrapper(
1008
+ jig.destroy,
1009
+ )
1010
+ self.retrieve_logs = async_to_streamed_response_wrapper(
1011
+ jig.retrieve_logs,
1012
+ )
1013
+
1014
+ @cached_property
1015
+ def queue(self) -> AsyncQueueResourceWithStreamingResponse:
1016
+ return AsyncQueueResourceWithStreamingResponse(self._jig.queue)
1017
+
1018
+ @cached_property
1019
+ def volumes(self) -> AsyncVolumesResourceWithStreamingResponse:
1020
+ return AsyncVolumesResourceWithStreamingResponse(self._jig.volumes)
1021
+
1022
+ @cached_property
1023
+ def secrets(self) -> AsyncSecretsResourceWithStreamingResponse:
1024
+ return AsyncSecretsResourceWithStreamingResponse(self._jig.secrets)