together 2.0.0a17__py3-none-any.whl → 2.0.0a19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/_base_client.py +5 -2
- together/_client.py +1 -77
- together/_compat.py +3 -3
- together/_utils/_json.py +35 -0
- together/_version.py +1 -1
- together/lib/cli/api/beta/__init__.py +2 -0
- together/lib/cli/api/beta/jig/__init__.py +52 -0
- together/lib/cli/api/beta/jig/_config.py +170 -0
- together/lib/cli/api/beta/jig/jig.py +664 -0
- together/lib/cli/api/beta/jig/secrets.py +138 -0
- together/lib/cli/api/beta/jig/volumes.py +509 -0
- together/lib/cli/api/endpoints/create.py +7 -3
- together/lib/cli/api/endpoints/hardware.py +38 -7
- together/lib/cli/api/models/upload.py +5 -1
- together/resources/__init__.py +0 -28
- together/resources/beta/__init__.py +14 -0
- together/resources/beta/beta.py +32 -0
- together/resources/beta/clusters/clusters.py +12 -12
- together/resources/beta/clusters/storage.py +10 -10
- together/resources/beta/jig/__init__.py +61 -0
- together/resources/beta/jig/jig.py +1004 -0
- together/resources/beta/jig/queue.py +482 -0
- together/resources/beta/jig/secrets.py +548 -0
- together/resources/beta/jig/volumes.py +514 -0
- together/resources/chat/completions.py +10 -0
- together/resources/endpoints.py +103 -1
- together/resources/models/__init__.py +33 -0
- together/resources/{models.py → models/models.py} +41 -9
- together/resources/models/uploads.py +163 -0
- together/types/__init__.py +2 -4
- together/types/beta/__init__.py +6 -0
- together/types/beta/deployment.py +261 -0
- together/types/beta/deployment_logs.py +11 -0
- together/types/beta/jig/__init__.py +20 -0
- together/types/beta/jig/queue_cancel_params.py +13 -0
- together/types/beta/jig/queue_cancel_response.py +11 -0
- together/types/beta/jig/queue_metrics_params.py +12 -0
- together/types/beta/jig/queue_metrics_response.py +8 -0
- together/types/beta/jig/queue_retrieve_params.py +15 -0
- together/types/beta/jig/queue_retrieve_response.py +35 -0
- together/types/beta/jig/queue_submit_params.py +19 -0
- together/types/beta/jig/queue_submit_response.py +25 -0
- together/types/beta/jig/secret.py +33 -0
- together/types/beta/jig/secret_create_params.py +34 -0
- together/types/beta/jig/secret_list_response.py +16 -0
- together/types/beta/jig/secret_update_params.py +34 -0
- together/types/beta/jig/volume.py +47 -0
- together/types/beta/jig/volume_create_params.py +34 -0
- together/types/beta/jig/volume_list_response.py +16 -0
- together/types/beta/jig/volume_update_params.py +34 -0
- together/types/beta/jig_deploy_params.py +150 -0
- together/types/beta/jig_list_response.py +16 -0
- together/types/beta/jig_retrieve_logs_params.py +12 -0
- together/types/beta/jig_update_params.py +141 -0
- together/types/chat/completion_create_params.py +11 -0
- together/types/{hardware_list_params.py → endpoint_list_hardware_params.py} +2 -2
- together/types/{hardware_list_response.py → endpoint_list_hardware_response.py} +2 -2
- together/types/models/__init__.py +5 -0
- together/types/{job_retrieve_response.py → models/upload_status_response.py} +3 -3
- {together-2.0.0a17.dist-info → together-2.0.0a19.dist-info}/METADATA +15 -14
- {together-2.0.0a17.dist-info → together-2.0.0a19.dist-info}/RECORD +64 -30
- together/resources/hardware.py +0 -181
- together/resources/jobs.py +0 -214
- together/types/job_list_response.py +0 -47
- {together-2.0.0a17.dist-info → together-2.0.0a19.dist-info}/WHEEL +0 -0
- {together-2.0.0a17.dist-info → together-2.0.0a19.dist-info}/entry_points.txt +0 -0
- {together-2.0.0a17.dist-info → together-2.0.0a19.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
"""Main jig CLI commands (deploy, build, push, etc.)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
import shlex
|
|
8
|
+
import shutil
|
|
9
|
+
import subprocess
|
|
10
|
+
from typing import Any, Optional
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from urllib.parse import urlparse
|
|
13
|
+
|
|
14
|
+
import click
|
|
15
|
+
from rich.pretty import pprint
|
|
16
|
+
|
|
17
|
+
from together import Together
|
|
18
|
+
from together._exceptions import APIStatusError
|
|
19
|
+
from together.lib.cli.api._utils import handle_api_errors
|
|
20
|
+
from together.lib.cli.api.beta.jig._config import (
|
|
21
|
+
DEBUG,
|
|
22
|
+
WARMUP_ENV_NAME,
|
|
23
|
+
WARMUP_DEST,
|
|
24
|
+
State,
|
|
25
|
+
Config,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Managed dockerfile marker - if this is the first line, jig will regenerate the file
|
|
29
|
+
DOCKERFILE_MANAGED_MARKER = "# MANAGED BY JIG - Remove this line to prevent jig from overwriting this file"
|
|
30
|
+
|
|
31
|
+
# --- Helper Functions ---
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _get_api_base_url(client: Together) -> str:
|
|
35
|
+
"""Extract base URL (scheme://host) from client, stripping any path like /v1"""
|
|
36
|
+
parsed = urlparse(str(client.base_url))
|
|
37
|
+
return f"{parsed.scheme}://{parsed.netloc}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _run(cmd: list[str]) -> subprocess.CompletedProcess[str]:
|
|
41
|
+
"""Run process with defaults"""
|
|
42
|
+
return subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _generate_dockerfile(config: Config) -> str:
|
|
46
|
+
"""Generate Dockerfile from config"""
|
|
47
|
+
apt = ""
|
|
48
|
+
if config.image.system_packages:
|
|
49
|
+
sys_pkgs = " ".join(config.image.system_packages or [])
|
|
50
|
+
apt = f"""RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \\
|
|
51
|
+
apt-get update && \\
|
|
52
|
+
DEBIAN_FRONTEND=noninteractive \\
|
|
53
|
+
apt-get install -y --no-install-recommends {sys_pkgs} && \\
|
|
54
|
+
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
env = "\n".join(f"ENV {k}={v}" for k, v in config.image.environment.items())
|
|
58
|
+
if env:
|
|
59
|
+
env += "\n"
|
|
60
|
+
|
|
61
|
+
run = "\n".join(f"RUN {cmd}" for cmd in config.image.run)
|
|
62
|
+
if run:
|
|
63
|
+
run += "\n"
|
|
64
|
+
|
|
65
|
+
copy = "\n".join(f"COPY {file} {file}" for file in _get_files_to_copy(config))
|
|
66
|
+
|
|
67
|
+
# Check if .git exists in current directory
|
|
68
|
+
if Path(".git").exists():
|
|
69
|
+
git_version_cmd = 'RUN --mount=type=bind,source=.git,target=/git git --git-dir /git describe --tags --exact-match > VERSION || echo "0.0.0-dev" > VERSION'
|
|
70
|
+
else:
|
|
71
|
+
git_version_cmd = 'RUN echo "0.0.0-dev" > VERSION'
|
|
72
|
+
|
|
73
|
+
return f"""{DOCKERFILE_MANAGED_MARKER}
|
|
74
|
+
|
|
75
|
+
# Build stage
|
|
76
|
+
FROM python:{config.image.python_version} AS builder
|
|
77
|
+
|
|
78
|
+
{apt}
|
|
79
|
+
# Grab UV to install python packages
|
|
80
|
+
COPY --from=ghcr.io/astral-sh/uv /uv /usr/local/bin/uv
|
|
81
|
+
|
|
82
|
+
WORKDIR /app
|
|
83
|
+
COPY pyproject.toml .
|
|
84
|
+
RUN --mount=type=cache,target=/root/.cache/uv \\
|
|
85
|
+
uv pip install --system --compile-bytecode . && \\
|
|
86
|
+
(python -c "import sprocket" 2>/dev/null || (echo "sprocket not found in pyproject.toml, installing from pypi.together.ai..." && uv pip install --system --extra-index-url https://pypi.together.ai/ sprocket))
|
|
87
|
+
|
|
88
|
+
# Final stage - slim image
|
|
89
|
+
FROM python:{config.image.python_version}-slim
|
|
90
|
+
|
|
91
|
+
{apt}
|
|
92
|
+
COPY --from=builder /usr/local/lib/python{config.image.python_version} /usr/local/lib/python{config.image.python_version}
|
|
93
|
+
COPY --from=builder /usr/local/bin /usr/local/bin
|
|
94
|
+
|
|
95
|
+
# Tini for proper signal handling
|
|
96
|
+
COPY --from=krallin/ubuntu-tini:latest /usr/local/bin/tini /tini
|
|
97
|
+
ENTRYPOINT ["/tini", "--"]
|
|
98
|
+
|
|
99
|
+
{env}
|
|
100
|
+
{run}
|
|
101
|
+
WORKDIR /app
|
|
102
|
+
{copy}
|
|
103
|
+
ENV DEPLOYMENT_NAME={config.model_name}
|
|
104
|
+
# this tag will set the X-Worker-Version header, used for rollout monitoring
|
|
105
|
+
{git_version_cmd}
|
|
106
|
+
|
|
107
|
+
CMD {json.dumps(shlex.split(config.image.cmd))}"""
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _get_files_to_copy(config: Config) -> list[str]:
|
|
111
|
+
"""Get list of files to copy"""
|
|
112
|
+
files = set(config.image.copy)
|
|
113
|
+
if config.image.auto_include_git:
|
|
114
|
+
try:
|
|
115
|
+
if _run(["git", "status", "--porcelain"]).stdout.strip():
|
|
116
|
+
raise RuntimeError("Git repository has uncommitted changes: auto_include_git not allowed.")
|
|
117
|
+
git_files = _run(["git", "ls-files"]).stdout.strip().split("\n")
|
|
118
|
+
files.update(f for f in git_files if f and f != ".")
|
|
119
|
+
except subprocess.CalledProcessError:
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
if "." in files:
|
|
123
|
+
raise ValueError("Copying '.' is not allowed. Please enumerate specific files.")
|
|
124
|
+
|
|
125
|
+
return sorted(files)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _dockerfile(config: Config) -> bool:
|
|
129
|
+
"""Generate Dockerfile if appropriate.
|
|
130
|
+
|
|
131
|
+
Returns True if Dockerfile was generated, False if skipped (user-managed file exists).
|
|
132
|
+
|
|
133
|
+
Logic:
|
|
134
|
+
- If no Dockerfile exists → generate and return True
|
|
135
|
+
- If Dockerfile exists without our marker → skip and return False (user-managed)
|
|
136
|
+
- If Dockerfile exists with marker but config is older → skip and return True (no-op)
|
|
137
|
+
- If Dockerfile exists with marker and config is newer → regenerate and return True
|
|
138
|
+
"""
|
|
139
|
+
dockerfile_path = Path(config.dockerfile)
|
|
140
|
+
|
|
141
|
+
if dockerfile_path.exists():
|
|
142
|
+
with open(dockerfile_path) as f:
|
|
143
|
+
first_line = f.readline().strip()
|
|
144
|
+
|
|
145
|
+
if first_line != DOCKERFILE_MANAGED_MARKER:
|
|
146
|
+
return False
|
|
147
|
+
|
|
148
|
+
# Skip regeneration if config hasn't changed
|
|
149
|
+
if config._path and config._path.exists() and dockerfile_path.stat().st_mtime >= config._path.stat().st_mtime:
|
|
150
|
+
return True
|
|
151
|
+
|
|
152
|
+
with open(dockerfile_path, "w") as f:
|
|
153
|
+
f.write(_generate_dockerfile(config))
|
|
154
|
+
|
|
155
|
+
return True
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _get_image(state: State, config: Config, tag: str = "latest") -> str:
|
|
159
|
+
"""Get full image name"""
|
|
160
|
+
return f"{state.registry_base_path}/{config.model_name}:{tag}"
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _get_image_with_digest(state: State, config: Config, tag: str = "latest") -> str:
|
|
164
|
+
"""Get full image name tagged with digest"""
|
|
165
|
+
image_name = _get_image(state, config, tag)
|
|
166
|
+
if tag != "latest":
|
|
167
|
+
return image_name
|
|
168
|
+
try:
|
|
169
|
+
cmd = ["docker", "inspect", "--format={{json .RepoDigests}}", image_name]
|
|
170
|
+
repo_digests = _run(cmd).stdout.strip()
|
|
171
|
+
if repo_digests and repo_digests != "null":
|
|
172
|
+
registry = image_name.rsplit("/", 2)[0]
|
|
173
|
+
for digest in json.loads(repo_digests):
|
|
174
|
+
if digest.startswith(registry):
|
|
175
|
+
return str(digest)
|
|
176
|
+
except subprocess.CalledProcessError as e:
|
|
177
|
+
msg = e.stderr.strip() if e.stderr else "Docker command failed"
|
|
178
|
+
raise RuntimeError(f"Failed to get digest for {image_name}: {msg}") from e
|
|
179
|
+
raise RuntimeError(f"No registry digest found for {image_name}. Make sure the image was pushed to registry first.")
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _set_secret(client: Together, config: Config, state: State, name: str, value: str, description: str) -> None:
|
|
183
|
+
"""Set secret for the deployment"""
|
|
184
|
+
deployment_secret_name = f"{config.model_name}-{name}"
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
client.beta.jig.secrets.retrieve(deployment_secret_name)
|
|
188
|
+
client.beta.jig.secrets.update(
|
|
189
|
+
deployment_secret_name,
|
|
190
|
+
name=deployment_secret_name,
|
|
191
|
+
description=description,
|
|
192
|
+
value=value,
|
|
193
|
+
)
|
|
194
|
+
click.echo(f"\N{CHECK MARK} Updated secret: '{name}'")
|
|
195
|
+
except APIStatusError as e:
|
|
196
|
+
if hasattr(e, "status_code") and e.status_code == 404:
|
|
197
|
+
click.echo("\N{ROCKET} Creating new secret")
|
|
198
|
+
client.beta.jig.secrets.create(
|
|
199
|
+
name=deployment_secret_name,
|
|
200
|
+
value=value,
|
|
201
|
+
description=description,
|
|
202
|
+
)
|
|
203
|
+
click.echo(f"\N{CHECK MARK} Created secret: {name}")
|
|
204
|
+
else:
|
|
205
|
+
raise
|
|
206
|
+
|
|
207
|
+
state.secrets[name] = deployment_secret_name
|
|
208
|
+
state.save()
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _watch_job_status(client: Together, config: Config, request_id: str) -> None:
|
|
212
|
+
"""Watch job status until completion"""
|
|
213
|
+
last_status = None
|
|
214
|
+
while True:
|
|
215
|
+
try:
|
|
216
|
+
response = client.beta.jig.queue.retrieve(
|
|
217
|
+
model=config.model_name,
|
|
218
|
+
request_id=request_id,
|
|
219
|
+
)
|
|
220
|
+
current_status = response.status
|
|
221
|
+
if current_status != last_status:
|
|
222
|
+
pprint(response.model_dump_json(), indent_guides=False)
|
|
223
|
+
last_status = current_status
|
|
224
|
+
|
|
225
|
+
if current_status in ["done", "failed", "finished", "error"]:
|
|
226
|
+
break
|
|
227
|
+
|
|
228
|
+
time.sleep(1)
|
|
229
|
+
|
|
230
|
+
except KeyboardInterrupt:
|
|
231
|
+
click.echo(f"\nStopped watching {request_id}")
|
|
232
|
+
break
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _ensure_registry_base_path(client: Together, state: State) -> None:
|
|
236
|
+
"""Ensure registry base path is set in state"""
|
|
237
|
+
if not state.registry_base_path:
|
|
238
|
+
response = client._client.get("/image-repositories/base-path", headers=client.auth_headers)
|
|
239
|
+
response.raise_for_status()
|
|
240
|
+
data = response.json()
|
|
241
|
+
base_path = data["base-path"]
|
|
242
|
+
# Strip protocol prefix - Docker tags don't support URLs
|
|
243
|
+
if base_path.startswith("https://"):
|
|
244
|
+
base_path = base_path[8:]
|
|
245
|
+
elif base_path.startswith("http://"):
|
|
246
|
+
base_path = base_path[7:]
|
|
247
|
+
state.registry_base_path = base_path
|
|
248
|
+
state.save()
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _build_warm_image(base_image: str) -> None:
|
|
252
|
+
"""Run a warmup container to generate a cache, then rebuild with cache baked in.
|
|
253
|
+
|
|
254
|
+
This runs the container with RUN_AND_EXIT=1 which triggers warmup_inputs in sprocket.
|
|
255
|
+
The cache directory is mounted at /app/torch_cache and the user's code should set the
|
|
256
|
+
appropriate env var (TORCHINDUCTOR_CACHE_DIR, TKCC_OUTPUT_DIR, etc.) to point there.
|
|
257
|
+
"""
|
|
258
|
+
import os
|
|
259
|
+
|
|
260
|
+
cache_dir = Path(".") / WARMUP_DEST
|
|
261
|
+
# Clean any existing cache
|
|
262
|
+
try:
|
|
263
|
+
shutil.rmtree(cache_dir)
|
|
264
|
+
except FileNotFoundError:
|
|
265
|
+
pass
|
|
266
|
+
cache_dir.mkdir(exist_ok=True)
|
|
267
|
+
|
|
268
|
+
click.echo("\N{FIRE} Running warmup to generate compile cache...")
|
|
269
|
+
|
|
270
|
+
# Run container with GPU and RUN_AND_EXIT=1
|
|
271
|
+
# Mount current dir as /app so warmup_inputs can reference local weights
|
|
272
|
+
# Mount cache dir for compile artifacts
|
|
273
|
+
warmup_cmd = ["docker", "run", "--rm", "--gpus", "all", "-e", "RUN_AND_EXIT=1"]
|
|
274
|
+
warmup_cmd.extend(["-e", f"{WARMUP_ENV_NAME}=/app/{WARMUP_DEST}"])
|
|
275
|
+
warmup_cmd.extend(["-v", f"{Path.cwd().absolute()}:/app"])
|
|
276
|
+
# if MODEL_PRELOAD_PATH is set, also mount that (e.g. ~/.cache/huggingface)
|
|
277
|
+
if weights_path := os.getenv("MODEL_PRELOAD_PATH"):
|
|
278
|
+
warmup_cmd.extend(["-v", f"{weights_path}:{weights_path}"])
|
|
279
|
+
warmup_cmd.extend(["-e", f"MODEL_PRELOAD_PATH={weights_path}"])
|
|
280
|
+
warmup_cmd.append(base_image)
|
|
281
|
+
|
|
282
|
+
click.echo(f"Running: {' '.join(warmup_cmd)}")
|
|
283
|
+
result = subprocess.run(warmup_cmd)
|
|
284
|
+
if result.returncode != 0:
|
|
285
|
+
raise RuntimeError(f"Warmup failed with code {result.returncode}")
|
|
286
|
+
|
|
287
|
+
# Check cache was generated
|
|
288
|
+
cache_files = list(cache_dir.rglob("*"))
|
|
289
|
+
if not cache_files:
|
|
290
|
+
raise RuntimeError("Warmup completed but no cache files were generated")
|
|
291
|
+
|
|
292
|
+
click.echo(f"\N{CHECK MARK} Warmup complete, {len(cache_files)} cache files generated")
|
|
293
|
+
|
|
294
|
+
# Generate cache dockerfile - copy cache to same location used during warmup
|
|
295
|
+
cache_dockerfile = Path("Dockerfile.cache")
|
|
296
|
+
dockerfile_content = f"""FROM {base_image}
|
|
297
|
+
COPY {cache_dir.name} /app/{WARMUP_DEST}
|
|
298
|
+
ENV {WARMUP_ENV_NAME}=/app/{WARMUP_DEST}"""
|
|
299
|
+
cache_dockerfile.write_text(dockerfile_content)
|
|
300
|
+
|
|
301
|
+
click.echo("\N{PACKAGE} Building final image with cache...")
|
|
302
|
+
cmd = ["docker", "build", "--platform", "linux/amd64", "-t", base_image]
|
|
303
|
+
cmd.extend(["-f", str(cache_dockerfile), "."])
|
|
304
|
+
|
|
305
|
+
if subprocess.run(cmd).returncode != 0:
|
|
306
|
+
cache_dockerfile.unlink(missing_ok=True)
|
|
307
|
+
raise RuntimeError("Cache image build failed")
|
|
308
|
+
cache_dockerfile.unlink(missing_ok=True)
|
|
309
|
+
click.echo("\N{CHECK MARK} Final image with cache built")
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
# --- CLI Commands ---
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
@click.command()
|
|
316
|
+
def init() -> None:
|
|
317
|
+
"""Initialize jig configuration"""
|
|
318
|
+
if (pyproject := Path("pyproject.toml")).exists():
|
|
319
|
+
click.echo("pyproject.toml already exists")
|
|
320
|
+
return
|
|
321
|
+
|
|
322
|
+
content = """[project]
|
|
323
|
+
name = "my-model"
|
|
324
|
+
version = "0.1.0"
|
|
325
|
+
dependencies = ["torch", "transformers", "sprocket"]
|
|
326
|
+
|
|
327
|
+
[[tool.uv.index]]
|
|
328
|
+
name = "together-pypi"
|
|
329
|
+
url = "https://pypi.together.ai/"
|
|
330
|
+
|
|
331
|
+
[tool.uv.sources]
|
|
332
|
+
sprocket = { index = "together-pypi" }
|
|
333
|
+
|
|
334
|
+
[tool.jig.image]
|
|
335
|
+
python_version = "3.11"
|
|
336
|
+
system_packages = ["git", "libglib2.0-0"]
|
|
337
|
+
cmd = "python app.py"
|
|
338
|
+
|
|
339
|
+
[tool.jig.deploy]
|
|
340
|
+
description = "My model deployment"
|
|
341
|
+
gpu_type = "h100-80gb"
|
|
342
|
+
gpu_count = 1
|
|
343
|
+
"""
|
|
344
|
+
with open(pyproject, "w") as f:
|
|
345
|
+
f.write(content)
|
|
346
|
+
click.echo("\N{CHECK MARK} Created pyproject.toml")
|
|
347
|
+
click.echo(" Edit the configuration and run 'jig deploy'")
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
@click.command()
|
|
351
|
+
@click.option("--config", "config_path", default=None, help="Configuration file path")
|
|
352
|
+
@handle_api_errors("Jig")
|
|
353
|
+
def dockerfile(config_path: str | None) -> None:
|
|
354
|
+
"""Generate Dockerfile"""
|
|
355
|
+
config = Config.find(config_path)
|
|
356
|
+
if _dockerfile(config):
|
|
357
|
+
click.echo("\N{CHECK MARK} Generated Dockerfile")
|
|
358
|
+
else:
|
|
359
|
+
click.echo(
|
|
360
|
+
f"ERROR: {config.dockerfile} exists and is not managed by jig. "
|
|
361
|
+
f"Remove or rename the file to allow jig to manage dockerfile.",
|
|
362
|
+
err=True,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
@click.command()
|
|
367
|
+
@click.pass_context
|
|
368
|
+
@click.option("--tag", default="latest", help="Image tag")
|
|
369
|
+
@click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache")
|
|
370
|
+
@click.option("--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)")
|
|
371
|
+
@click.option("--config", "config_path", default=None, help="Configuration file path")
|
|
372
|
+
@handle_api_errors("Jig")
|
|
373
|
+
def build(ctx: click.Context, tag: str, warmup: bool, docker_args: str | None, config_path: str | None) -> None:
|
|
374
|
+
"""Build container image"""
|
|
375
|
+
import shlex as shlex_module
|
|
376
|
+
import os
|
|
377
|
+
|
|
378
|
+
client: Together = ctx.obj
|
|
379
|
+
config = Config.find(config_path)
|
|
380
|
+
state = State.load(config._path.parent)
|
|
381
|
+
_ensure_registry_base_path(client, state)
|
|
382
|
+
|
|
383
|
+
image = _get_image(state, config, tag)
|
|
384
|
+
|
|
385
|
+
if _dockerfile(config):
|
|
386
|
+
click.echo("\N{CHECK MARK} Generated Dockerfile")
|
|
387
|
+
else:
|
|
388
|
+
click.echo(f"\N{INFORMATION SOURCE} Using existing {config.dockerfile} (not managed by jig)")
|
|
389
|
+
|
|
390
|
+
click.echo(f"Building {image}")
|
|
391
|
+
cmd = ["docker", "build", "--platform", "linux/amd64", "-t", image, "."]
|
|
392
|
+
if config.dockerfile != "Dockerfile":
|
|
393
|
+
cmd.extend(["-f", config.dockerfile])
|
|
394
|
+
|
|
395
|
+
# Add extra docker args from flag or env
|
|
396
|
+
extra_args = docker_args or os.getenv("DOCKER_BUILD_EXTRA_ARGS", "")
|
|
397
|
+
if extra_args:
|
|
398
|
+
cmd.extend(shlex_module.split(extra_args))
|
|
399
|
+
if subprocess.run(cmd).returncode != 0:
|
|
400
|
+
raise RuntimeError("Build failed")
|
|
401
|
+
|
|
402
|
+
click.echo("\N{CHECK MARK} Built")
|
|
403
|
+
|
|
404
|
+
if warmup:
|
|
405
|
+
_build_warm_image(image)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
@click.command()
|
|
409
|
+
@click.pass_context
|
|
410
|
+
@click.option("--tag", default="latest", help="Image tag")
|
|
411
|
+
@click.option("--config", "config_path", default=None, help="Configuration file path")
|
|
412
|
+
@handle_api_errors("Jig")
|
|
413
|
+
def push(ctx: click.Context, tag: str, config_path: str | None) -> None:
|
|
414
|
+
"""Push image to registry"""
|
|
415
|
+
client: Together = ctx.obj
|
|
416
|
+
config = Config.find(config_path)
|
|
417
|
+
state = State.load(config._path.parent)
|
|
418
|
+
_ensure_registry_base_path(client, state)
|
|
419
|
+
|
|
420
|
+
image = _get_image(state, config, tag)
|
|
421
|
+
|
|
422
|
+
registry = state.registry_base_path.split("/")[0]
|
|
423
|
+
login_cmd = f"echo {client.api_key} | docker login {registry} --username user --password-stdin"
|
|
424
|
+
if subprocess.run(login_cmd, shell=True, capture_output=True).returncode != 0:
|
|
425
|
+
raise RuntimeError("Registry login failed")
|
|
426
|
+
|
|
427
|
+
click.echo(f"Pushing {image}")
|
|
428
|
+
if subprocess.run(["docker", "push", image]).returncode != 0:
|
|
429
|
+
raise RuntimeError("Push failed")
|
|
430
|
+
click.echo("\N{CHECK MARK} Pushed")
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
@click.command()
|
|
434
|
+
@click.pass_context
|
|
435
|
+
@click.option("--tag", default="latest", help="Image tag")
|
|
436
|
+
@click.option("--build-only", is_flag=True, help="Build and push only")
|
|
437
|
+
@click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache")
|
|
438
|
+
@click.option("--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)")
|
|
439
|
+
@click.option("--image", "existing_image", default=None, help="Use existing image (skip build/push)")
|
|
440
|
+
@click.option("--config", "config_path", default=None, help="Configuration file path")
|
|
441
|
+
@handle_api_errors("Jig")
|
|
442
|
+
def deploy(
|
|
443
|
+
ctx: click.Context,
|
|
444
|
+
tag: str,
|
|
445
|
+
build_only: bool,
|
|
446
|
+
warmup: bool,
|
|
447
|
+
docker_args: str | None,
|
|
448
|
+
existing_image: str | None,
|
|
449
|
+
config_path: str | None,
|
|
450
|
+
) -> Optional[dict[str, Any]]:
|
|
451
|
+
"""Deploy model"""
|
|
452
|
+
client: Together = ctx.obj
|
|
453
|
+
config = Config.find(config_path)
|
|
454
|
+
state = State.load(config._path.parent)
|
|
455
|
+
_ensure_registry_base_path(client, state)
|
|
456
|
+
|
|
457
|
+
if existing_image:
|
|
458
|
+
deployment_image = existing_image
|
|
459
|
+
else:
|
|
460
|
+
# Invoke build and push
|
|
461
|
+
ctx.invoke(build, tag=tag, warmup=warmup, docker_args=docker_args, config_path=config_path)
|
|
462
|
+
ctx.invoke(push, tag=tag, config_path=config_path)
|
|
463
|
+
deployment_image = _get_image_with_digest(state, config, tag)
|
|
464
|
+
|
|
465
|
+
if build_only:
|
|
466
|
+
click.echo("\N{CHECK MARK} Build complete (--build-only)")
|
|
467
|
+
return None
|
|
468
|
+
|
|
469
|
+
deploy_data: dict[str, Any] = {
|
|
470
|
+
"name": config.model_name,
|
|
471
|
+
"description": config.deploy.description,
|
|
472
|
+
"image": deployment_image,
|
|
473
|
+
"min_replicas": config.deploy.min_replicas,
|
|
474
|
+
"max_replicas": config.deploy.max_replicas,
|
|
475
|
+
"port": config.deploy.port,
|
|
476
|
+
"gpu_type": config.deploy.gpu_type,
|
|
477
|
+
"gpu_count": config.deploy.gpu_count,
|
|
478
|
+
"cpu": config.deploy.cpu,
|
|
479
|
+
"memory": config.deploy.memory,
|
|
480
|
+
"storage": config.deploy.storage,
|
|
481
|
+
"autoscaling": config.deploy.autoscaling,
|
|
482
|
+
"termination_grace_period_seconds": config.deploy.termination_grace_period_seconds,
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
if config.deploy.health_check_path:
|
|
486
|
+
deploy_data["health_check_path"] = config.deploy.health_check_path
|
|
487
|
+
if config.deploy.command:
|
|
488
|
+
deploy_data["command"] = config.deploy.command
|
|
489
|
+
|
|
490
|
+
env_vars = [{"name": k, "value": v} for k, v in config.deploy.environment_variables.items()]
|
|
491
|
+
env_vars.append({"name": "TOGETHER_API_BASE_URL", "value": _get_api_base_url(client)})
|
|
492
|
+
|
|
493
|
+
if "TOGETHER_API_KEY" not in state.secrets:
|
|
494
|
+
_set_secret(client, config, state, "TOGETHER_API_KEY", client.api_key, "Auth key for queue API")
|
|
495
|
+
|
|
496
|
+
for name, secret_id in state.secrets.items():
|
|
497
|
+
env_vars.append({"name": name, "value_from_secret": secret_id})
|
|
498
|
+
|
|
499
|
+
deploy_data["environment_variables"] = env_vars
|
|
500
|
+
|
|
501
|
+
volumes: list[dict[str, str]] = []
|
|
502
|
+
for volume_name, mount_path in state.volumes.items():
|
|
503
|
+
volumes.append({"name": volume_name, "mount_path": mount_path})
|
|
504
|
+
|
|
505
|
+
deploy_data["volumes"] = volumes
|
|
506
|
+
|
|
507
|
+
if DEBUG:
|
|
508
|
+
pprint(deploy_data, indent_guides=False)
|
|
509
|
+
click.echo(f"Deploying model: {config.model_name}")
|
|
510
|
+
|
|
511
|
+
try:
|
|
512
|
+
response = client.beta.jig.update(config.model_name, **deploy_data)
|
|
513
|
+
click.echo("\N{CHECK MARK} Updated deployment")
|
|
514
|
+
except APIStatusError as e:
|
|
515
|
+
if hasattr(e, "status_code") and e.status_code == 404:
|
|
516
|
+
click.echo("\N{ROCKET} Creating new deployment")
|
|
517
|
+
response = client.beta.jig.deploy(**deploy_data)
|
|
518
|
+
click.echo(f"\N{CHECK MARK} Deployed: {config.model_name}")
|
|
519
|
+
else:
|
|
520
|
+
raise
|
|
521
|
+
|
|
522
|
+
return response.model_dump()
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
@click.command()
|
|
526
|
+
@click.pass_context
|
|
527
|
+
@click.option("--config", "config_path", default=None, help="Configuration file path")
|
|
528
|
+
@handle_api_errors("Jig")
|
|
529
|
+
def status(ctx: click.Context, config_path: str | None) -> None:
|
|
530
|
+
"""Get deployment status"""
|
|
531
|
+
client: Together = ctx.obj
|
|
532
|
+
config = Config.find(config_path)
|
|
533
|
+
response = client.beta.jig.retrieve(config.model_name)
|
|
534
|
+
pprint(response.model_dump() if hasattr(response, "model_dump") else response, indent_guides=False)
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
@click.command()
|
|
538
|
+
@click.pass_context
|
|
539
|
+
@click.option("--config", "config_path", default=None, help="Configuration file path")
|
|
540
|
+
@handle_api_errors("Jig")
|
|
541
|
+
def endpoint(ctx: click.Context, config_path: str | None) -> None:
|
|
542
|
+
"""Get deployment endpoint URL"""
|
|
543
|
+
client: Together = ctx.obj
|
|
544
|
+
config = Config.find(config_path)
|
|
545
|
+
click.echo(f"{client.base_url}/deployment-request/{config.model_name}")
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
@click.command()
|
|
549
|
+
@click.pass_context
|
|
550
|
+
@click.option("--follow", is_flag=True, help="Follow log output")
|
|
551
|
+
@click.option("--config", "config_path", default=None, help="Configuration file path")
|
|
552
|
+
@handle_api_errors("Jig")
|
|
553
|
+
def logs(ctx: click.Context, follow: bool, config_path: str | None) -> None:
|
|
554
|
+
"""Get deployment logs"""
|
|
555
|
+
client: Together = ctx.obj
|
|
556
|
+
config = Config.find(config_path)
|
|
557
|
+
|
|
558
|
+
if not follow:
|
|
559
|
+
response = client.beta.jig.retrieve_logs(config.model_name)
|
|
560
|
+
if hasattr(response, "lines") and response.lines:
|
|
561
|
+
for log_line in response.lines:
|
|
562
|
+
click.echo(log_line)
|
|
563
|
+
else:
|
|
564
|
+
click.echo("No logs available")
|
|
565
|
+
return
|
|
566
|
+
|
|
567
|
+
# Stream logs using SDK streaming response
|
|
568
|
+
try:
|
|
569
|
+
with client.beta.jig.with_streaming_response.retrieve_logs(config.model_name) as streaming_response:
|
|
570
|
+
for line in streaming_response.iter_lines():
|
|
571
|
+
if line:
|
|
572
|
+
for log_line in json.loads(line).get("lines", []):
|
|
573
|
+
click.echo(log_line)
|
|
574
|
+
except KeyboardInterrupt:
|
|
575
|
+
click.echo("\nStopped following logs")
|
|
576
|
+
except Exception as e:
|
|
577
|
+
click.echo(f"\nConnection ended: {e}")
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
@click.command()
|
|
581
|
+
@click.pass_context
|
|
582
|
+
@click.option("--config", "config_path", default=None, help="Configuration file path")
|
|
583
|
+
@handle_api_errors("Jig")
|
|
584
|
+
def destroy(ctx: click.Context, config_path: str | None) -> None:
|
|
585
|
+
"""Destroy deployment"""
|
|
586
|
+
client: Together = ctx.obj
|
|
587
|
+
config = Config.find(config_path)
|
|
588
|
+
client.beta.jig.destroy(config.model_name)
|
|
589
|
+
click.echo(f"\N{WASTEBASKET} Destroyed {config.model_name}")
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
@click.command()
|
|
593
|
+
@click.pass_context
|
|
594
|
+
@click.option("--prompt", default=None, help="Job prompt")
|
|
595
|
+
@click.option("--payload", default=None, help="Job payload JSON")
|
|
596
|
+
@click.option("--watch", is_flag=True, help="Watch job status until completion")
|
|
597
|
+
@click.option("--config", "config_path", default=None, help="Configuration file path")
|
|
598
|
+
@handle_api_errors("Jig")
|
|
599
|
+
def submit(
|
|
600
|
+
ctx: click.Context,
|
|
601
|
+
prompt: str | None,
|
|
602
|
+
payload: str | None,
|
|
603
|
+
watch: bool,
|
|
604
|
+
config_path: str | None,
|
|
605
|
+
) -> None:
|
|
606
|
+
"""Submit a job to the deployment"""
|
|
607
|
+
client: Together = ctx.obj
|
|
608
|
+
config = Config.find(config_path)
|
|
609
|
+
|
|
610
|
+
if not prompt and not payload:
|
|
611
|
+
raise click.UsageError("Either --prompt or --payload required")
|
|
612
|
+
|
|
613
|
+
response = client.beta.jig.queue.submit(
|
|
614
|
+
model=config.model_name,
|
|
615
|
+
payload=json.loads(payload) if payload else {"prompt": prompt},
|
|
616
|
+
priority=1,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
click.echo("\N{CHECK MARK} Submitted job")
|
|
620
|
+
pprint(response.model_dump_json(), indent_guides=False)
|
|
621
|
+
|
|
622
|
+
if watch and response.request_id:
|
|
623
|
+
click.echo(f"\nWatching job {response.request_id}...")
|
|
624
|
+
_watch_job_status(client, config, response.request_id)
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
@click.command()
|
|
628
|
+
@click.pass_context
|
|
629
|
+
@click.option("--request-id", required=True, help="Job request ID")
|
|
630
|
+
@click.option("--config", "config_path", default=None, help="Configuration file path")
|
|
631
|
+
@handle_api_errors("Jig")
|
|
632
|
+
def job_status(ctx: click.Context, request_id: str, config_path: str | None) -> None:
|
|
633
|
+
"""Get status of a specific job"""
|
|
634
|
+
client: Together = ctx.obj
|
|
635
|
+
config = Config.find(config_path)
|
|
636
|
+
|
|
637
|
+
response = client.beta.jig.queue.retrieve(
|
|
638
|
+
model=config.model_name,
|
|
639
|
+
request_id=request_id,
|
|
640
|
+
)
|
|
641
|
+
pprint(response.model_dump_json(), indent_guides=False)
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
@click.command()
|
|
645
|
+
@click.pass_context
|
|
646
|
+
@click.option("--config", "config_path", default=None, help="Configuration file path")
|
|
647
|
+
@handle_api_errors("Jig")
|
|
648
|
+
def queue_status(ctx: click.Context, config_path: str | None) -> None:
|
|
649
|
+
"""Get queue metrics for the deployment"""
|
|
650
|
+
client: Together = ctx.obj
|
|
651
|
+
config = Config.find(config_path)
|
|
652
|
+
|
|
653
|
+
response = client.beta.jig.queue.metrics(model=config.model_name)
|
|
654
|
+
pprint(response, indent_guides=False)
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
@click.command("list")
|
|
658
|
+
@click.pass_context
|
|
659
|
+
@handle_api_errors("Jig")
|
|
660
|
+
def list_deployments(ctx: click.Context) -> None:
|
|
661
|
+
"""List all deployments"""
|
|
662
|
+
client: Together = ctx.obj
|
|
663
|
+
response = client.beta.jig.list()
|
|
664
|
+
pprint(response.model_dump() if hasattr(response, "model_dump") else response, indent_guides=False)
|