together 2.0.0a17__py3-none-any.whl → 2.0.0a19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. together/_base_client.py +5 -2
  2. together/_client.py +1 -77
  3. together/_compat.py +3 -3
  4. together/_utils/_json.py +35 -0
  5. together/_version.py +1 -1
  6. together/lib/cli/api/beta/__init__.py +2 -0
  7. together/lib/cli/api/beta/jig/__init__.py +52 -0
  8. together/lib/cli/api/beta/jig/_config.py +170 -0
  9. together/lib/cli/api/beta/jig/jig.py +664 -0
  10. together/lib/cli/api/beta/jig/secrets.py +138 -0
  11. together/lib/cli/api/beta/jig/volumes.py +509 -0
  12. together/lib/cli/api/endpoints/create.py +7 -3
  13. together/lib/cli/api/endpoints/hardware.py +38 -7
  14. together/lib/cli/api/models/upload.py +5 -1
  15. together/resources/__init__.py +0 -28
  16. together/resources/beta/__init__.py +14 -0
  17. together/resources/beta/beta.py +32 -0
  18. together/resources/beta/clusters/clusters.py +12 -12
  19. together/resources/beta/clusters/storage.py +10 -10
  20. together/resources/beta/jig/__init__.py +61 -0
  21. together/resources/beta/jig/jig.py +1004 -0
  22. together/resources/beta/jig/queue.py +482 -0
  23. together/resources/beta/jig/secrets.py +548 -0
  24. together/resources/beta/jig/volumes.py +514 -0
  25. together/resources/chat/completions.py +10 -0
  26. together/resources/endpoints.py +103 -1
  27. together/resources/models/__init__.py +33 -0
  28. together/resources/{models.py → models/models.py} +41 -9
  29. together/resources/models/uploads.py +163 -0
  30. together/types/__init__.py +2 -4
  31. together/types/beta/__init__.py +6 -0
  32. together/types/beta/deployment.py +261 -0
  33. together/types/beta/deployment_logs.py +11 -0
  34. together/types/beta/jig/__init__.py +20 -0
  35. together/types/beta/jig/queue_cancel_params.py +13 -0
  36. together/types/beta/jig/queue_cancel_response.py +11 -0
  37. together/types/beta/jig/queue_metrics_params.py +12 -0
  38. together/types/beta/jig/queue_metrics_response.py +8 -0
  39. together/types/beta/jig/queue_retrieve_params.py +15 -0
  40. together/types/beta/jig/queue_retrieve_response.py +35 -0
  41. together/types/beta/jig/queue_submit_params.py +19 -0
  42. together/types/beta/jig/queue_submit_response.py +25 -0
  43. together/types/beta/jig/secret.py +33 -0
  44. together/types/beta/jig/secret_create_params.py +34 -0
  45. together/types/beta/jig/secret_list_response.py +16 -0
  46. together/types/beta/jig/secret_update_params.py +34 -0
  47. together/types/beta/jig/volume.py +47 -0
  48. together/types/beta/jig/volume_create_params.py +34 -0
  49. together/types/beta/jig/volume_list_response.py +16 -0
  50. together/types/beta/jig/volume_update_params.py +34 -0
  51. together/types/beta/jig_deploy_params.py +150 -0
  52. together/types/beta/jig_list_response.py +16 -0
  53. together/types/beta/jig_retrieve_logs_params.py +12 -0
  54. together/types/beta/jig_update_params.py +141 -0
  55. together/types/chat/completion_create_params.py +11 -0
  56. together/types/{hardware_list_params.py → endpoint_list_hardware_params.py} +2 -2
  57. together/types/{hardware_list_response.py → endpoint_list_hardware_response.py} +2 -2
  58. together/types/models/__init__.py +5 -0
  59. together/types/{job_retrieve_response.py → models/upload_status_response.py} +3 -3
  60. {together-2.0.0a17.dist-info → together-2.0.0a19.dist-info}/METADATA +15 -14
  61. {together-2.0.0a17.dist-info → together-2.0.0a19.dist-info}/RECORD +64 -30
  62. together/resources/hardware.py +0 -181
  63. together/resources/jobs.py +0 -214
  64. together/types/job_list_response.py +0 -47
  65. {together-2.0.0a17.dist-info → together-2.0.0a19.dist-info}/WHEEL +0 -0
  66. {together-2.0.0a17.dist-info → together-2.0.0a19.dist-info}/entry_points.txt +0 -0
  67. {together-2.0.0a17.dist-info → together-2.0.0a19.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1004 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, Iterable
6
+ from typing_extensions import Literal
7
+
8
+ import httpx
9
+
10
+ from .queue import (
11
+ QueueResource,
12
+ AsyncQueueResource,
13
+ QueueResourceWithRawResponse,
14
+ AsyncQueueResourceWithRawResponse,
15
+ QueueResourceWithStreamingResponse,
16
+ AsyncQueueResourceWithStreamingResponse,
17
+ )
18
+ from .secrets import (
19
+ SecretsResource,
20
+ AsyncSecretsResource,
21
+ SecretsResourceWithRawResponse,
22
+ AsyncSecretsResourceWithRawResponse,
23
+ SecretsResourceWithStreamingResponse,
24
+ AsyncSecretsResourceWithStreamingResponse,
25
+ )
26
+ from .volumes import (
27
+ VolumesResource,
28
+ AsyncVolumesResource,
29
+ VolumesResourceWithRawResponse,
30
+ AsyncVolumesResourceWithRawResponse,
31
+ VolumesResourceWithStreamingResponse,
32
+ AsyncVolumesResourceWithStreamingResponse,
33
+ )
34
+ from ...._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
35
+ from ...._utils import maybe_transform, async_maybe_transform
36
+ from ...._compat import cached_property
37
+ from ...._resource import SyncAPIResource, AsyncAPIResource
38
+ from ...._response import (
39
+ to_raw_response_wrapper,
40
+ to_streamed_response_wrapper,
41
+ async_to_raw_response_wrapper,
42
+ async_to_streamed_response_wrapper,
43
+ )
44
+ from ....types.beta import jig_deploy_params, jig_update_params, jig_retrieve_logs_params
45
+ from ...._base_client import make_request_options
46
+ from ....types.beta.deployment import Deployment
47
+ from ....types.beta.deployment_logs import DeploymentLogs
48
+ from ....types.beta.jig_list_response import JigListResponse
49
+
50
+ __all__ = ["JigResource", "AsyncJigResource"]
51
+
52
+
53
+ class JigResource(SyncAPIResource):
54
+ @cached_property
55
+ def queue(self) -> QueueResource:
56
+ return QueueResource(self._client)
57
+
58
+ @cached_property
59
+ def volumes(self) -> VolumesResource:
60
+ return VolumesResource(self._client)
61
+
62
+ @cached_property
63
+ def secrets(self) -> SecretsResource:
64
+ return SecretsResource(self._client)
65
+
66
+ @cached_property
67
+ def with_raw_response(self) -> JigResourceWithRawResponse:
68
+ """
69
+ This property can be used as a prefix for any HTTP method call to return
70
+ the raw response object instead of the parsed content.
71
+
72
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
73
+ """
74
+ return JigResourceWithRawResponse(self)
75
+
76
+ @cached_property
77
+ def with_streaming_response(self) -> JigResourceWithStreamingResponse:
78
+ """
79
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
80
+
81
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
82
+ """
83
+ return JigResourceWithStreamingResponse(self)
84
+
85
+ def retrieve(
86
+ self,
87
+ id: str,
88
+ *,
89
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
90
+ # The extra values given here take precedence over values defined on the client or passed to this method.
91
+ extra_headers: Headers | None = None,
92
+ extra_query: Query | None = None,
93
+ extra_body: Body | None = None,
94
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
95
+ ) -> Deployment:
96
+ """
97
+ Retrieve details of a specific deployment by its ID or name
98
+
99
+ Args:
100
+ extra_headers: Send extra headers
101
+
102
+ extra_query: Add additional query parameters to the request
103
+
104
+ extra_body: Add additional JSON properties to the request
105
+
106
+ timeout: Override the client-level default timeout for this request, in seconds
107
+ """
108
+ if not id:
109
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
110
+ return self._get(
111
+ f"/deployments/{id}",
112
+ options=make_request_options(
113
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
114
+ ),
115
+ cast_to=Deployment,
116
+ )
117
+
118
+ def update(
119
+ self,
120
+ id: str,
121
+ *,
122
+ args: SequenceNotStr[str] | Omit = omit,
123
+ autoscaling: Dict[str, str] | Omit = omit,
124
+ command: SequenceNotStr[str] | Omit = omit,
125
+ cpu: float | Omit = omit,
126
+ description: str | Omit = omit,
127
+ environment_variables: Iterable[jig_update_params.EnvironmentVariable] | Omit = omit,
128
+ gpu_count: int | Omit = omit,
129
+ gpu_type: Literal["h100-80gb", " a100-80gb"] | Omit = omit,
130
+ health_check_path: str | Omit = omit,
131
+ image: str | Omit = omit,
132
+ max_replicas: int | Omit = omit,
133
+ memory: float | Omit = omit,
134
+ min_replicas: int | Omit = omit,
135
+ name: str | Omit = omit,
136
+ port: int | Omit = omit,
137
+ storage: int | Omit = omit,
138
+ termination_grace_period_seconds: int | Omit = omit,
139
+ volumes: Iterable[jig_update_params.Volume] | Omit = omit,
140
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
141
+ # The extra values given here take precedence over values defined on the client or passed to this method.
142
+ extra_headers: Headers | None = None,
143
+ extra_query: Query | None = None,
144
+ extra_body: Body | None = None,
145
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
146
+ ) -> Deployment:
147
+ """
148
+ Update an existing deployment configuration
149
+
150
+ Args:
151
+ args: Args overrides the container's CMD. Provide as an array of arguments (e.g.,
152
+ ["python", "app.py"])
153
+
154
+ autoscaling:
155
+ Autoscaling configuration as key-value pairs. Example: {"metric":
156
+ "QueueBacklogPerWorker", "target": "10"} to scale based on queue backlog
157
+
158
+ command: Command overrides the container's ENTRYPOINT. Provide as an array (e.g.,
159
+ ["/bin/sh", "-c"])
160
+
161
+ cpu: CPU is the number of CPU cores to allocate per container instance (e.g., 0.1 =
162
+ 100 milli cores)
163
+
164
+ description: Description is an optional human-readable description of your deployment
165
+
166
+ environment_variables: EnvironmentVariables is a list of environment variables to set in the container.
167
+ This will replace all existing environment variables
168
+
169
+ gpu_count: GPUCount is the number of GPUs to allocate per container instance
170
+
171
+ gpu_type: GPUType specifies the GPU hardware to use (e.g., "h100-80gb")
172
+
173
+ health_check_path: HealthCheckPath is the HTTP path for health checks (e.g., "/health"). Set to
174
+ empty string to disable health checks
175
+
176
+ image: Image is the container image to deploy from registry.together.ai.
177
+
178
+ max_replicas: MaxReplicas is the maximum number of replicas that can be scaled up to.
179
+
180
+ memory: Memory is the amount of RAM to allocate per container instance in GiB (e.g., 0.5
181
+ = 512MiB)
182
+
183
+ min_replicas: MinReplicas is the minimum number of replicas to run
184
+
185
+ name: Name is the new unique identifier for your deployment. Must contain only
186
+ alphanumeric characters, underscores, or hyphens (1-100 characters)
187
+
188
+ port: Port is the container port your application listens on (e.g., 8080 for web
189
+ servers)
190
+
191
+ storage: Storage is the amount of ephemeral disk storage to allocate per container
192
+ instance (e.g., 10 = 10GiB)
193
+
194
+ termination_grace_period_seconds: TerminationGracePeriodSeconds is the time in seconds to wait for graceful
195
+ shutdown before forcefully terminating the replica
196
+
197
+ volumes: Volumes is a list of volume mounts to attach to the container. This will replace
198
+ all existing volumes
199
+
200
+ extra_headers: Send extra headers
201
+
202
+ extra_query: Add additional query parameters to the request
203
+
204
+ extra_body: Add additional JSON properties to the request
205
+
206
+ timeout: Override the client-level default timeout for this request, in seconds
207
+ """
208
+ if not id:
209
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
210
+ return self._patch(
211
+ f"/deployments/{id}",
212
+ body=maybe_transform(
213
+ {
214
+ "args": args,
215
+ "autoscaling": autoscaling,
216
+ "command": command,
217
+ "cpu": cpu,
218
+ "description": description,
219
+ "environment_variables": environment_variables,
220
+ "gpu_count": gpu_count,
221
+ "gpu_type": gpu_type,
222
+ "health_check_path": health_check_path,
223
+ "image": image,
224
+ "max_replicas": max_replicas,
225
+ "memory": memory,
226
+ "min_replicas": min_replicas,
227
+ "name": name,
228
+ "port": port,
229
+ "storage": storage,
230
+ "termination_grace_period_seconds": termination_grace_period_seconds,
231
+ "volumes": volumes,
232
+ },
233
+ jig_update_params.JigUpdateParams,
234
+ ),
235
+ options=make_request_options(
236
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
237
+ ),
238
+ cast_to=Deployment,
239
+ )
240
+
241
+ def list(
242
+ self,
243
+ *,
244
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
245
+ # The extra values given here take precedence over values defined on the client or passed to this method.
246
+ extra_headers: Headers | None = None,
247
+ extra_query: Query | None = None,
248
+ extra_body: Body | None = None,
249
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
250
+ ) -> JigListResponse:
251
+ """Get a list of all deployments in your project"""
252
+ return self._get(
253
+ "/deployments",
254
+ options=make_request_options(
255
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
256
+ ),
257
+ cast_to=JigListResponse,
258
+ )
259
+
260
+ def deploy(
261
+ self,
262
+ *,
263
+ gpu_type: Literal["h100-80gb", "a100-80gb"],
264
+ image: str,
265
+ name: str,
266
+ args: SequenceNotStr[str] | Omit = omit,
267
+ autoscaling: Dict[str, str] | Omit = omit,
268
+ command: SequenceNotStr[str] | Omit = omit,
269
+ cpu: float | Omit = omit,
270
+ description: str | Omit = omit,
271
+ environment_variables: Iterable[jig_deploy_params.EnvironmentVariable] | Omit = omit,
272
+ gpu_count: int | Omit = omit,
273
+ health_check_path: str | Omit = omit,
274
+ max_replicas: int | Omit = omit,
275
+ memory: float | Omit = omit,
276
+ min_replicas: int | Omit = omit,
277
+ port: int | Omit = omit,
278
+ storage: int | Omit = omit,
279
+ termination_grace_period_seconds: int | Omit = omit,
280
+ volumes: Iterable[jig_deploy_params.Volume] | Omit = omit,
281
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
282
+ # The extra values given here take precedence over values defined on the client or passed to this method.
283
+ extra_headers: Headers | None = None,
284
+ extra_query: Query | None = None,
285
+ extra_body: Body | None = None,
286
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
287
+ ) -> Deployment:
288
+ """
289
+ Create a new deployment with specified configuration
290
+
291
+ Args:
292
+ gpu_type: GPUType specifies the GPU hardware to use (e.g., "h100-80gb").
293
+
294
+ image: Image is the container image to deploy from registry.together.ai.
295
+
296
+ name: Name is the unique identifier for your deployment. Must contain only
297
+ alphanumeric characters, underscores, or hyphens (1-100 characters)
298
+
299
+ args: Args overrides the container's CMD. Provide as an array of arguments (e.g.,
300
+ ["python", "app.py"])
301
+
302
+ autoscaling:
303
+ Autoscaling configuration as key-value pairs. Example: {"metric":
304
+ "QueueBacklogPerWorker", "target": "10"} to scale based on queue backlog
305
+
306
+ command: Command overrides the container's ENTRYPOINT. Provide as an array (e.g.,
307
+ ["/bin/sh", "-c"])
308
+
309
+ cpu: CPU is the number of CPU cores to allocate per container instance (e.g., 0.1 =
310
+ 100 milli cores)
311
+
312
+ description: Description is an optional human-readable description of your deployment
313
+
314
+ environment_variables: EnvironmentVariables is a list of environment variables to set in the container.
315
+ Each must have a name and either a value or value_from_secret
316
+
317
+ gpu_count: GPUCount is the number of GPUs to allocate per container instance. Defaults to 0
318
+ if not specified
319
+
320
+ health_check_path: HealthCheckPath is the HTTP path for health checks (e.g., "/health"). If set,
321
+ the platform will check this endpoint to determine container health
322
+
323
+ max_replicas: MaxReplicas is the maximum number of container instances that can be scaled up
324
+ to. If not set, will be set to MinReplicas
325
+
326
+ memory: Memory is the amount of RAM to allocate per container instance in GiB (e.g., 0.5
327
+ = 512MiB)
328
+
329
+ min_replicas: MinReplicas is the minimum number of container instances to run. Defaults to 1
330
+ if not specified
331
+
332
+ port: Port is the container port your application listens on (e.g., 8080 for web
333
+ servers). Required if your application serves traffic
334
+
335
+ storage: Storage is the amount of ephemeral disk storage to allocate per container
336
+ instance (e.g., 10 = 10GiB)
337
+
338
+ termination_grace_period_seconds: TerminationGracePeriodSeconds is the time in seconds to wait for graceful
339
+ shutdown before forcefully terminating the replica
340
+
341
+ volumes: Volumes is a list of volume mounts to attach to the container. Each mount must
342
+ reference an existing volume by name
343
+
344
+ extra_headers: Send extra headers
345
+
346
+ extra_query: Add additional query parameters to the request
347
+
348
+ extra_body: Add additional JSON properties to the request
349
+
350
+ timeout: Override the client-level default timeout for this request, in seconds
351
+ """
352
+ return self._post(
353
+ "/deployments",
354
+ body=maybe_transform(
355
+ {
356
+ "gpu_type": gpu_type,
357
+ "image": image,
358
+ "name": name,
359
+ "args": args,
360
+ "autoscaling": autoscaling,
361
+ "command": command,
362
+ "cpu": cpu,
363
+ "description": description,
364
+ "environment_variables": environment_variables,
365
+ "gpu_count": gpu_count,
366
+ "health_check_path": health_check_path,
367
+ "max_replicas": max_replicas,
368
+ "memory": memory,
369
+ "min_replicas": min_replicas,
370
+ "port": port,
371
+ "storage": storage,
372
+ "termination_grace_period_seconds": termination_grace_period_seconds,
373
+ "volumes": volumes,
374
+ },
375
+ jig_deploy_params.JigDeployParams,
376
+ ),
377
+ options=make_request_options(
378
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
379
+ ),
380
+ cast_to=Deployment,
381
+ )
382
+
383
+ def destroy(
384
+ self,
385
+ id: str,
386
+ *,
387
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
388
+ # The extra values given here take precedence over values defined on the client or passed to this method.
389
+ extra_headers: Headers | None = None,
390
+ extra_query: Query | None = None,
391
+ extra_body: Body | None = None,
392
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
393
+ ) -> object:
394
+ """
395
+ Delete an existing deployment
396
+
397
+ Args:
398
+ extra_headers: Send extra headers
399
+
400
+ extra_query: Add additional query parameters to the request
401
+
402
+ extra_body: Add additional JSON properties to the request
403
+
404
+ timeout: Override the client-level default timeout for this request, in seconds
405
+ """
406
+ if not id:
407
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
408
+ return self._delete(
409
+ f"/deployments/{id}",
410
+ options=make_request_options(
411
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
412
+ ),
413
+ cast_to=object,
414
+ )
415
+
416
+ def retrieve_logs(
417
+ self,
418
+ id: str,
419
+ *,
420
+ replica_id: str | Omit = omit,
421
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
422
+ # The extra values given here take precedence over values defined on the client or passed to this method.
423
+ extra_headers: Headers | None = None,
424
+ extra_query: Query | None = None,
425
+ extra_body: Body | None = None,
426
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
427
+ ) -> DeploymentLogs:
428
+ """
429
+ Retrieve logs from a deployment, optionally filtered by replica ID.
430
+
431
+ Args:
432
+ replica_id: Replica ID to filter logs
433
+
434
+ extra_headers: Send extra headers
435
+
436
+ extra_query: Add additional query parameters to the request
437
+
438
+ extra_body: Add additional JSON properties to the request
439
+
440
+ timeout: Override the client-level default timeout for this request, in seconds
441
+ """
442
+ if not id:
443
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
444
+ return self._get(
445
+ f"/deployments/{id}/logs",
446
+ options=make_request_options(
447
+ extra_headers=extra_headers,
448
+ extra_query=extra_query,
449
+ extra_body=extra_body,
450
+ timeout=timeout,
451
+ query=maybe_transform({"replica_id": replica_id}, jig_retrieve_logs_params.JigRetrieveLogsParams),
452
+ ),
453
+ cast_to=DeploymentLogs,
454
+ )
455
+
456
+
457
+ class AsyncJigResource(AsyncAPIResource):
458
+ @cached_property
459
+ def queue(self) -> AsyncQueueResource:
460
+ return AsyncQueueResource(self._client)
461
+
462
+ @cached_property
463
+ def volumes(self) -> AsyncVolumesResource:
464
+ return AsyncVolumesResource(self._client)
465
+
466
+ @cached_property
467
+ def secrets(self) -> AsyncSecretsResource:
468
+ return AsyncSecretsResource(self._client)
469
+
470
+ @cached_property
471
+ def with_raw_response(self) -> AsyncJigResourceWithRawResponse:
472
+ """
473
+ This property can be used as a prefix for any HTTP method call to return
474
+ the raw response object instead of the parsed content.
475
+
476
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
477
+ """
478
+ return AsyncJigResourceWithRawResponse(self)
479
+
480
+ @cached_property
481
+ def with_streaming_response(self) -> AsyncJigResourceWithStreamingResponse:
482
+ """
483
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
484
+
485
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
486
+ """
487
+ return AsyncJigResourceWithStreamingResponse(self)
488
+
489
+ async def retrieve(
490
+ self,
491
+ id: str,
492
+ *,
493
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
494
+ # The extra values given here take precedence over values defined on the client or passed to this method.
495
+ extra_headers: Headers | None = None,
496
+ extra_query: Query | None = None,
497
+ extra_body: Body | None = None,
498
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
499
+ ) -> Deployment:
500
+ """
501
+ Retrieve details of a specific deployment by its ID or name
502
+
503
+ Args:
504
+ extra_headers: Send extra headers
505
+
506
+ extra_query: Add additional query parameters to the request
507
+
508
+ extra_body: Add additional JSON properties to the request
509
+
510
+ timeout: Override the client-level default timeout for this request, in seconds
511
+ """
512
+ if not id:
513
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
514
+ return await self._get(
515
+ f"/deployments/{id}",
516
+ options=make_request_options(
517
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
518
+ ),
519
+ cast_to=Deployment,
520
+ )
521
+
522
+ async def update(
523
+ self,
524
+ id: str,
525
+ *,
526
+ args: SequenceNotStr[str] | Omit = omit,
527
+ autoscaling: Dict[str, str] | Omit = omit,
528
+ command: SequenceNotStr[str] | Omit = omit,
529
+ cpu: float | Omit = omit,
530
+ description: str | Omit = omit,
531
+ environment_variables: Iterable[jig_update_params.EnvironmentVariable] | Omit = omit,
532
+ gpu_count: int | Omit = omit,
533
+ gpu_type: Literal["h100-80gb", " a100-80gb"] | Omit = omit,
534
+ health_check_path: str | Omit = omit,
535
+ image: str | Omit = omit,
536
+ max_replicas: int | Omit = omit,
537
+ memory: float | Omit = omit,
538
+ min_replicas: int | Omit = omit,
539
+ name: str | Omit = omit,
540
+ port: int | Omit = omit,
541
+ storage: int | Omit = omit,
542
+ termination_grace_period_seconds: int | Omit = omit,
543
+ volumes: Iterable[jig_update_params.Volume] | Omit = omit,
544
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
545
+ # The extra values given here take precedence over values defined on the client or passed to this method.
546
+ extra_headers: Headers | None = None,
547
+ extra_query: Query | None = None,
548
+ extra_body: Body | None = None,
549
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
550
+ ) -> Deployment:
551
+ """
552
+ Update an existing deployment configuration
553
+
554
+ Args:
555
+ args: Args overrides the container's CMD. Provide as an array of arguments (e.g.,
556
+ ["python", "app.py"])
557
+
558
+ autoscaling:
559
+ Autoscaling configuration as key-value pairs. Example: {"metric":
560
+ "QueueBacklogPerWorker", "target": "10"} to scale based on queue backlog
561
+
562
+ command: Command overrides the container's ENTRYPOINT. Provide as an array (e.g.,
563
+ ["/bin/sh", "-c"])
564
+
565
+ cpu: CPU is the number of CPU cores to allocate per container instance (e.g., 0.1 =
566
+ 100 milli cores)
567
+
568
+ description: Description is an optional human-readable description of your deployment
569
+
570
+ environment_variables: EnvironmentVariables is a list of environment variables to set in the container.
571
+ This will replace all existing environment variables
572
+
573
+ gpu_count: GPUCount is the number of GPUs to allocate per container instance
574
+
575
+ gpu_type: GPUType specifies the GPU hardware to use (e.g., "h100-80gb")
576
+
577
+ health_check_path: HealthCheckPath is the HTTP path for health checks (e.g., "/health"). Set to
578
+ empty string to disable health checks
579
+
580
+ image: Image is the container image to deploy from registry.together.ai.
581
+
582
+ max_replicas: MaxReplicas is the maximum number of replicas that can be scaled up to.
583
+
584
+ memory: Memory is the amount of RAM to allocate per container instance in GiB (e.g., 0.5
585
+ = 512MiB)
586
+
587
+ min_replicas: MinReplicas is the minimum number of replicas to run
588
+
589
+ name: Name is the new unique identifier for your deployment. Must contain only
590
+ alphanumeric characters, underscores, or hyphens (1-100 characters)
591
+
592
+ port: Port is the container port your application listens on (e.g., 8080 for web
593
+ servers)
594
+
595
+ storage: Storage is the amount of ephemeral disk storage to allocate per container
596
+ instance (e.g., 10 = 10GiB)
597
+
598
+ termination_grace_period_seconds: TerminationGracePeriodSeconds is the time in seconds to wait for graceful
599
+ shutdown before forcefully terminating the replica
600
+
601
+ volumes: Volumes is a list of volume mounts to attach to the container. This will replace
602
+ all existing volumes
603
+
604
+ extra_headers: Send extra headers
605
+
606
+ extra_query: Add additional query parameters to the request
607
+
608
+ extra_body: Add additional JSON properties to the request
609
+
610
+ timeout: Override the client-level default timeout for this request, in seconds
611
+ """
612
+ if not id:
613
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
614
+ return await self._patch(
615
+ f"/deployments/{id}",
616
+ body=await async_maybe_transform(
617
+ {
618
+ "args": args,
619
+ "autoscaling": autoscaling,
620
+ "command": command,
621
+ "cpu": cpu,
622
+ "description": description,
623
+ "environment_variables": environment_variables,
624
+ "gpu_count": gpu_count,
625
+ "gpu_type": gpu_type,
626
+ "health_check_path": health_check_path,
627
+ "image": image,
628
+ "max_replicas": max_replicas,
629
+ "memory": memory,
630
+ "min_replicas": min_replicas,
631
+ "name": name,
632
+ "port": port,
633
+ "storage": storage,
634
+ "termination_grace_period_seconds": termination_grace_period_seconds,
635
+ "volumes": volumes,
636
+ },
637
+ jig_update_params.JigUpdateParams,
638
+ ),
639
+ options=make_request_options(
640
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
641
+ ),
642
+ cast_to=Deployment,
643
+ )
644
+
645
+ async def list(
646
+ self,
647
+ *,
648
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
649
+ # The extra values given here take precedence over values defined on the client or passed to this method.
650
+ extra_headers: Headers | None = None,
651
+ extra_query: Query | None = None,
652
+ extra_body: Body | None = None,
653
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
654
+ ) -> JigListResponse:
655
+ """Get a list of all deployments in your project"""
656
+ return await self._get(
657
+ "/deployments",
658
+ options=make_request_options(
659
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
660
+ ),
661
+ cast_to=JigListResponse,
662
+ )
663
+
664
+ async def deploy(
665
+ self,
666
+ *,
667
+ gpu_type: Literal["h100-80gb", "a100-80gb"],
668
+ image: str,
669
+ name: str,
670
+ args: SequenceNotStr[str] | Omit = omit,
671
+ autoscaling: Dict[str, str] | Omit = omit,
672
+ command: SequenceNotStr[str] | Omit = omit,
673
+ cpu: float | Omit = omit,
674
+ description: str | Omit = omit,
675
+ environment_variables: Iterable[jig_deploy_params.EnvironmentVariable] | Omit = omit,
676
+ gpu_count: int | Omit = omit,
677
+ health_check_path: str | Omit = omit,
678
+ max_replicas: int | Omit = omit,
679
+ memory: float | Omit = omit,
680
+ min_replicas: int | Omit = omit,
681
+ port: int | Omit = omit,
682
+ storage: int | Omit = omit,
683
+ termination_grace_period_seconds: int | Omit = omit,
684
+ volumes: Iterable[jig_deploy_params.Volume] | Omit = omit,
685
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
686
+ # The extra values given here take precedence over values defined on the client or passed to this method.
687
+ extra_headers: Headers | None = None,
688
+ extra_query: Query | None = None,
689
+ extra_body: Body | None = None,
690
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
691
+ ) -> Deployment:
692
+ """
693
+ Create a new deployment with specified configuration
694
+
695
+ Args:
696
+ gpu_type: GPUType specifies the GPU hardware to use (e.g., "h100-80gb").
697
+
698
+ image: Image is the container image to deploy from registry.together.ai.
699
+
700
+ name: Name is the unique identifier for your deployment. Must contain only
701
+ alphanumeric characters, underscores, or hyphens (1-100 characters)
702
+
703
+ args: Args overrides the container's CMD. Provide as an array of arguments (e.g.,
704
+ ["python", "app.py"])
705
+
706
+ autoscaling:
707
+ Autoscaling configuration as key-value pairs. Example: {"metric":
708
+ "QueueBacklogPerWorker", "target": "10"} to scale based on queue backlog
709
+
710
+ command: Command overrides the container's ENTRYPOINT. Provide as an array (e.g.,
711
+ ["/bin/sh", "-c"])
712
+
713
+ cpu: CPU is the number of CPU cores to allocate per container instance (e.g., 0.1 =
714
+ 100 milli cores)
715
+
716
+ description: Description is an optional human-readable description of your deployment
717
+
718
+ environment_variables: EnvironmentVariables is a list of environment variables to set in the container.
719
+ Each must have a name and either a value or value_from_secret
720
+
721
+ gpu_count: GPUCount is the number of GPUs to allocate per container instance. Defaults to 0
722
+ if not specified
723
+
724
+ health_check_path: HealthCheckPath is the HTTP path for health checks (e.g., "/health"). If set,
725
+ the platform will check this endpoint to determine container health
726
+
727
+ max_replicas: MaxReplicas is the maximum number of container instances that can be scaled up
728
+ to. If not set, will be set to MinReplicas
729
+
730
+ memory: Memory is the amount of RAM to allocate per container instance in GiB (e.g., 0.5
731
+ = 512MiB)
732
+
733
+ min_replicas: MinReplicas is the minimum number of container instances to run. Defaults to 1
734
+ if not specified
735
+
736
+ port: Port is the container port your application listens on (e.g., 8080 for web
737
+ servers). Required if your application serves traffic
738
+
739
+ storage: Storage is the amount of ephemeral disk storage to allocate per container
740
+ instance (e.g., 10 = 10GiB)
741
+
742
+ termination_grace_period_seconds: TerminationGracePeriodSeconds is the time in seconds to wait for graceful
743
+ shutdown before forcefully terminating the replica
744
+
745
+ volumes: Volumes is a list of volume mounts to attach to the container. Each mount must
746
+ reference an existing volume by name
747
+
748
+ extra_headers: Send extra headers
749
+
750
+ extra_query: Add additional query parameters to the request
751
+
752
+ extra_body: Add additional JSON properties to the request
753
+
754
+ timeout: Override the client-level default timeout for this request, in seconds
755
+ """
756
+ return await self._post(
757
+ "/deployments",
758
+ body=await async_maybe_transform(
759
+ {
760
+ "gpu_type": gpu_type,
761
+ "image": image,
762
+ "name": name,
763
+ "args": args,
764
+ "autoscaling": autoscaling,
765
+ "command": command,
766
+ "cpu": cpu,
767
+ "description": description,
768
+ "environment_variables": environment_variables,
769
+ "gpu_count": gpu_count,
770
+ "health_check_path": health_check_path,
771
+ "max_replicas": max_replicas,
772
+ "memory": memory,
773
+ "min_replicas": min_replicas,
774
+ "port": port,
775
+ "storage": storage,
776
+ "termination_grace_period_seconds": termination_grace_period_seconds,
777
+ "volumes": volumes,
778
+ },
779
+ jig_deploy_params.JigDeployParams,
780
+ ),
781
+ options=make_request_options(
782
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
783
+ ),
784
+ cast_to=Deployment,
785
+ )
786
+
787
+ async def destroy(
788
+ self,
789
+ id: str,
790
+ *,
791
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
792
+ # The extra values given here take precedence over values defined on the client or passed to this method.
793
+ extra_headers: Headers | None = None,
794
+ extra_query: Query | None = None,
795
+ extra_body: Body | None = None,
796
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
797
+ ) -> object:
798
+ """
799
+ Delete an existing deployment
800
+
801
+ Args:
802
+ extra_headers: Send extra headers
803
+
804
+ extra_query: Add additional query parameters to the request
805
+
806
+ extra_body: Add additional JSON properties to the request
807
+
808
+ timeout: Override the client-level default timeout for this request, in seconds
809
+ """
810
+ if not id:
811
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
812
+ return await self._delete(
813
+ f"/deployments/{id}",
814
+ options=make_request_options(
815
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
816
+ ),
817
+ cast_to=object,
818
+ )
819
+
820
+ async def retrieve_logs(
821
+ self,
822
+ id: str,
823
+ *,
824
+ replica_id: str | Omit = omit,
825
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
826
+ # The extra values given here take precedence over values defined on the client or passed to this method.
827
+ extra_headers: Headers | None = None,
828
+ extra_query: Query | None = None,
829
+ extra_body: Body | None = None,
830
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
831
+ ) -> DeploymentLogs:
832
+ """
833
+ Retrieve logs from a deployment, optionally filtered by replica ID.
834
+
835
+ Args:
836
+ replica_id: Replica ID to filter logs
837
+
838
+ extra_headers: Send extra headers
839
+
840
+ extra_query: Add additional query parameters to the request
841
+
842
+ extra_body: Add additional JSON properties to the request
843
+
844
+ timeout: Override the client-level default timeout for this request, in seconds
845
+ """
846
+ if not id:
847
+ raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
848
+ return await self._get(
849
+ f"/deployments/{id}/logs",
850
+ options=make_request_options(
851
+ extra_headers=extra_headers,
852
+ extra_query=extra_query,
853
+ extra_body=extra_body,
854
+ timeout=timeout,
855
+ query=await async_maybe_transform(
856
+ {"replica_id": replica_id}, jig_retrieve_logs_params.JigRetrieveLogsParams
857
+ ),
858
+ ),
859
+ cast_to=DeploymentLogs,
860
+ )
861
+
862
+
863
+ class JigResourceWithRawResponse:
864
+ def __init__(self, jig: JigResource) -> None:
865
+ self._jig = jig
866
+
867
+ self.retrieve = to_raw_response_wrapper(
868
+ jig.retrieve,
869
+ )
870
+ self.update = to_raw_response_wrapper(
871
+ jig.update,
872
+ )
873
+ self.list = to_raw_response_wrapper(
874
+ jig.list,
875
+ )
876
+ self.deploy = to_raw_response_wrapper(
877
+ jig.deploy,
878
+ )
879
+ self.destroy = to_raw_response_wrapper(
880
+ jig.destroy,
881
+ )
882
+ self.retrieve_logs = to_raw_response_wrapper(
883
+ jig.retrieve_logs,
884
+ )
885
+
886
+ @cached_property
887
+ def queue(self) -> QueueResourceWithRawResponse:
888
+ return QueueResourceWithRawResponse(self._jig.queue)
889
+
890
+ @cached_property
891
+ def volumes(self) -> VolumesResourceWithRawResponse:
892
+ return VolumesResourceWithRawResponse(self._jig.volumes)
893
+
894
+ @cached_property
895
+ def secrets(self) -> SecretsResourceWithRawResponse:
896
+ return SecretsResourceWithRawResponse(self._jig.secrets)
897
+
898
+
899
+ class AsyncJigResourceWithRawResponse:
900
+ def __init__(self, jig: AsyncJigResource) -> None:
901
+ self._jig = jig
902
+
903
+ self.retrieve = async_to_raw_response_wrapper(
904
+ jig.retrieve,
905
+ )
906
+ self.update = async_to_raw_response_wrapper(
907
+ jig.update,
908
+ )
909
+ self.list = async_to_raw_response_wrapper(
910
+ jig.list,
911
+ )
912
+ self.deploy = async_to_raw_response_wrapper(
913
+ jig.deploy,
914
+ )
915
+ self.destroy = async_to_raw_response_wrapper(
916
+ jig.destroy,
917
+ )
918
+ self.retrieve_logs = async_to_raw_response_wrapper(
919
+ jig.retrieve_logs,
920
+ )
921
+
922
+ @cached_property
923
+ def queue(self) -> AsyncQueueResourceWithRawResponse:
924
+ return AsyncQueueResourceWithRawResponse(self._jig.queue)
925
+
926
+ @cached_property
927
+ def volumes(self) -> AsyncVolumesResourceWithRawResponse:
928
+ return AsyncVolumesResourceWithRawResponse(self._jig.volumes)
929
+
930
+ @cached_property
931
+ def secrets(self) -> AsyncSecretsResourceWithRawResponse:
932
+ return AsyncSecretsResourceWithRawResponse(self._jig.secrets)
933
+
934
+
935
+ class JigResourceWithStreamingResponse:
936
+ def __init__(self, jig: JigResource) -> None:
937
+ self._jig = jig
938
+
939
+ self.retrieve = to_streamed_response_wrapper(
940
+ jig.retrieve,
941
+ )
942
+ self.update = to_streamed_response_wrapper(
943
+ jig.update,
944
+ )
945
+ self.list = to_streamed_response_wrapper(
946
+ jig.list,
947
+ )
948
+ self.deploy = to_streamed_response_wrapper(
949
+ jig.deploy,
950
+ )
951
+ self.destroy = to_streamed_response_wrapper(
952
+ jig.destroy,
953
+ )
954
+ self.retrieve_logs = to_streamed_response_wrapper(
955
+ jig.retrieve_logs,
956
+ )
957
+
958
+ @cached_property
959
+ def queue(self) -> QueueResourceWithStreamingResponse:
960
+ return QueueResourceWithStreamingResponse(self._jig.queue)
961
+
962
+ @cached_property
963
+ def volumes(self) -> VolumesResourceWithStreamingResponse:
964
+ return VolumesResourceWithStreamingResponse(self._jig.volumes)
965
+
966
+ @cached_property
967
+ def secrets(self) -> SecretsResourceWithStreamingResponse:
968
+ return SecretsResourceWithStreamingResponse(self._jig.secrets)
969
+
970
+
971
+ class AsyncJigResourceWithStreamingResponse:
972
+ def __init__(self, jig: AsyncJigResource) -> None:
973
+ self._jig = jig
974
+
975
+ self.retrieve = async_to_streamed_response_wrapper(
976
+ jig.retrieve,
977
+ )
978
+ self.update = async_to_streamed_response_wrapper(
979
+ jig.update,
980
+ )
981
+ self.list = async_to_streamed_response_wrapper(
982
+ jig.list,
983
+ )
984
+ self.deploy = async_to_streamed_response_wrapper(
985
+ jig.deploy,
986
+ )
987
+ self.destroy = async_to_streamed_response_wrapper(
988
+ jig.destroy,
989
+ )
990
+ self.retrieve_logs = async_to_streamed_response_wrapper(
991
+ jig.retrieve_logs,
992
+ )
993
+
994
+ @cached_property
995
+ def queue(self) -> AsyncQueueResourceWithStreamingResponse:
996
+ return AsyncQueueResourceWithStreamingResponse(self._jig.queue)
997
+
998
+ @cached_property
999
+ def volumes(self) -> AsyncVolumesResourceWithStreamingResponse:
1000
+ return AsyncVolumesResourceWithStreamingResponse(self._jig.volumes)
1001
+
1002
+ @cached_property
1003
+ def secrets(self) -> AsyncSecretsResourceWithStreamingResponse:
1004
+ return AsyncSecretsResourceWithStreamingResponse(self._jig.secrets)