superlinear 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apps/__init__.py +4 -0
- apps/cli/__init__.py +8 -0
- apps/cli/bm25_rag.py +471 -0
- apps/cli/chat_repl.py +1497 -0
- apps/cli/client.py +195 -0
- apps/cli/docs_repl.py +2275 -0
- apps/cli/light_rag.py +729 -0
- apps/cli/local_snapshots.py +139 -0
- apps/cli/locks.py +214 -0
- apps/cli/main.py +457 -0
- apps/cli/output.py +32 -0
- apps/cli/server_cmds.py +516 -0
- apps/cli/session_cmds.py +491 -0
- apps/cli/snapshot_cmds.py +303 -0
- apps/cli/state.py +265 -0
- apps/server/__init__.py +4 -0
- apps/server/app.py +1363 -0
- apps/server/main.py +313 -0
- superlinear/__init__.py +114 -0
- superlinear/_version.py +3 -0
- superlinear/engine/__init__.py +10 -0
- superlinear/engine/adapters/__init__.py +12 -0
- superlinear/engine/adapters/base.py +91 -0
- superlinear/engine/adapters/superlinear.py +1233 -0
- superlinear/engine/chat_engine.py +1173 -0
- superlinear/engine/chat_types.py +130 -0
- superlinear/engine/registry.py +51 -0
- superlinear/engine/repetition.py +203 -0
- superlinear/engine/session_snapshots.py +451 -0
- superlinear/engine/tool_parser.py +83 -0
- superlinear/engine/types.py +42 -0
- superlinear/kernels/__init__.py +2 -0
- superlinear/kernels/common/__init__.py +21 -0
- superlinear/kernels/common/adjustment.py +106 -0
- superlinear/kernels/common/power.py +154 -0
- superlinear/kernels/superlinear/__init__.py +10 -0
- superlinear/kernels/superlinear/attention/__init__.py +78 -0
- superlinear/kernels/superlinear/attention/_prefill.py +940 -0
- superlinear/kernels/superlinear/attention/_sliding_window.py +1167 -0
- superlinear/kernels/superlinear/attention/api.py +433 -0
- superlinear/kernels/superlinear/search/__init__.py +33 -0
- superlinear/kernels/superlinear/search/_reference.py +204 -0
- superlinear/kernels/superlinear/search/_triton.py +488 -0
- superlinear/kernels/superlinear/search/_triton_gqa.py +534 -0
- superlinear/kernels/superlinear/search/api.py +200 -0
- superlinear/kernels/superlinear/span/__init__.py +41 -0
- superlinear/kernels/superlinear/span/_triton_bucketed_gqa.py +1461 -0
- superlinear/kernels/superlinear/span/_triton_forward.py +22 -0
- superlinear/kernels/superlinear/span/_triton_gqa.py +1226 -0
- superlinear/kernels/superlinear/span/_triton_impl.py +928 -0
- superlinear/kernels/superlinear/span/_triton_precomputed_sw.py +460 -0
- superlinear/kernels/superlinear/span/_triton_precomputed_sw_gqa.py +598 -0
- superlinear/kernels/superlinear/span/api.py +296 -0
- superlinear/kernels/superlinear/span/masks.py +187 -0
- superlinear/py.typed +0 -0
- superlinear/runtime.py +71 -0
- superlinear-0.1.0.dist-info/METADATA +469 -0
- superlinear-0.1.0.dist-info/RECORD +62 -0
- superlinear-0.1.0.dist-info/WHEEL +5 -0
- superlinear-0.1.0.dist-info/entry_points.txt +2 -0
- superlinear-0.1.0.dist-info/licenses/LICENSE +202 -0
- superlinear-0.1.0.dist-info/top_level.txt +2 -0
apps/cli/main.py
ADDED
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
"""`spl` — Superlinear CLI (HTTP client).
|
|
2
|
+
|
|
3
|
+
This is the CLI entrypoint. Run from source with:
|
|
4
|
+
`python -m apps.cli.main --help`
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import sys
|
|
11
|
+
from typing import Sequence
|
|
12
|
+
|
|
13
|
+
from apps.cli.client import DEFAULT_URL
|
|
14
|
+
from apps.cli.chat_repl import chat_repl, DEFAULT_SYSTEM_PROMPT
|
|
15
|
+
from apps.cli.docs_repl import docs_repl
|
|
16
|
+
from apps.cli.server_cmds import ServerCommandError, server_connect, server_start, server_status, server_stop
|
|
17
|
+
from apps.cli.state import load_state
|
|
18
|
+
from apps.cli.session_cmds import (
|
|
19
|
+
SessionCommandError,
|
|
20
|
+
session_close,
|
|
21
|
+
session_close_all,
|
|
22
|
+
session_history,
|
|
23
|
+
session_info,
|
|
24
|
+
session_ls,
|
|
25
|
+
unified_ls,
|
|
26
|
+
unified_rm,
|
|
27
|
+
)
|
|
28
|
+
from apps.cli.snapshot_cmds import (
|
|
29
|
+
SnapshotCommandError,
|
|
30
|
+
snapshot_load,
|
|
31
|
+
snapshot_ls,
|
|
32
|
+
snapshot_rm,
|
|
33
|
+
snapshot_rm_all,
|
|
34
|
+
snapshot_save,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
39
|
+
p = argparse.ArgumentParser(prog="spl", description="Superlinear CLI (HTTP client)")
|
|
40
|
+
p.add_argument(
|
|
41
|
+
"--url",
|
|
42
|
+
default=DEFAULT_URL,
|
|
43
|
+
help="Server base URL (default: %(default)s)",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
sub = p.add_subparsers(dest="command")
|
|
47
|
+
|
|
48
|
+
server = sub.add_parser("server", help="Manage local inference server")
|
|
49
|
+
server_sub = server.add_subparsers(dest="server_cmd") # Not required - defaults to status
|
|
50
|
+
server_start_p = server_sub.add_parser("start", help="Start local server")
|
|
51
|
+
server_start_p.add_argument("--model", required=True, help="Model path or HF repo id")
|
|
52
|
+
server_start_p.add_argument("--host", help="Bind host (default: derived from --url)")
|
|
53
|
+
server_start_p.add_argument("--port", type=int, help="Bind port (default: derived from --url)")
|
|
54
|
+
server_start_p.add_argument(
|
|
55
|
+
"--attn-implementation",
|
|
56
|
+
type=str,
|
|
57
|
+
default=None,
|
|
58
|
+
help="Attention implementation (maps to apps.server.main --attn-implementation)",
|
|
59
|
+
)
|
|
60
|
+
server_start_p.add_argument(
|
|
61
|
+
"--decode-kernel",
|
|
62
|
+
type=str,
|
|
63
|
+
default=None,
|
|
64
|
+
help="Decode kernel (maps to apps.server.main --decode-kernel)",
|
|
65
|
+
)
|
|
66
|
+
server_start_p.add_argument(
|
|
67
|
+
"--device",
|
|
68
|
+
type=str,
|
|
69
|
+
default=None,
|
|
70
|
+
help="Torch device / device_map (maps to apps.server.main --device)",
|
|
71
|
+
)
|
|
72
|
+
server_start_p.add_argument(
|
|
73
|
+
"--dtype",
|
|
74
|
+
type=str,
|
|
75
|
+
default=None,
|
|
76
|
+
help="Torch dtype: float16|bfloat16|float32 (maps to apps.server.main --dtype)",
|
|
77
|
+
)
|
|
78
|
+
server_start_p.add_argument(
|
|
79
|
+
"--chunk-size",
|
|
80
|
+
type=int,
|
|
81
|
+
default=None,
|
|
82
|
+
help="Chunk size for chunked prefill (default: 8192)",
|
|
83
|
+
)
|
|
84
|
+
server_start_p.add_argument(
|
|
85
|
+
"--max-prompt-tokens",
|
|
86
|
+
type=int,
|
|
87
|
+
default=None,
|
|
88
|
+
help="Server-side hard cap for prompt length (maps to apps.server.main --max-prompt-tokens)",
|
|
89
|
+
)
|
|
90
|
+
server_start_p.add_argument(
|
|
91
|
+
"--disable-cuda-graph",
|
|
92
|
+
action="store_true",
|
|
93
|
+
help="Disable CUDA graphs (maps to apps.server.main --disable-cuda-graph)",
|
|
94
|
+
)
|
|
95
|
+
server_start_p.add_argument(
|
|
96
|
+
"--disable-shared-fused-moe",
|
|
97
|
+
action="store_true",
|
|
98
|
+
help="Disable shared fused MoE (maps to apps.server.main --disable-shared-fused-moe)",
|
|
99
|
+
)
|
|
100
|
+
server_start_p.add_argument(
|
|
101
|
+
"--foreground",
|
|
102
|
+
action="store_true",
|
|
103
|
+
help="Run in foreground (default: detached with logs to ~/.config/spl/server)",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
server_sub.add_parser("status", help="Check server status")
|
|
107
|
+
server_stop_p = server_sub.add_parser("stop", help="Stop local server")
|
|
108
|
+
server_stop_p.add_argument(
|
|
109
|
+
"--force",
|
|
110
|
+
action="store_true",
|
|
111
|
+
help="Stop even if active sessions exist",
|
|
112
|
+
)
|
|
113
|
+
server_connect_p = server_sub.add_parser("connect", help="Connect to a remote server")
|
|
114
|
+
server_connect_p.add_argument("server_url", help="Server URL (e.g., http://gpu-server:8787)")
|
|
115
|
+
|
|
116
|
+
# Unified list command
|
|
117
|
+
ls_p = sub.add_parser("ls", help="List sessions and snapshots")
|
|
118
|
+
ls_p.add_argument("--json", action="store_true", help="Machine-readable JSON output")
|
|
119
|
+
|
|
120
|
+
rm_p = sub.add_parser("rm", help="Remove sessions and snapshots")
|
|
121
|
+
rm_p.add_argument("ids", nargs="+", help="IDs to remove (chat-*, docs-*, snap-*)")
|
|
122
|
+
|
|
123
|
+
session = sub.add_parser("session", help="Manage live sessions")
|
|
124
|
+
session_sub = session.add_subparsers(dest="session_cmd", required=True)
|
|
125
|
+
session_ls_p = session_sub.add_parser("ls", help="List active sessions")
|
|
126
|
+
session_ls_p.add_argument("--json", action="store_true", help="Machine-readable JSON output")
|
|
127
|
+
session_info_p = session_sub.add_parser("info", help="Show session info")
|
|
128
|
+
session_info_p.add_argument("session_id", help="Session id")
|
|
129
|
+
session_info_p.add_argument("--json", action="store_true", help="Machine-readable JSON output")
|
|
130
|
+
session_rm_p = session_sub.add_parser("rm", help="Remove session(s)")
|
|
131
|
+
session_rm_p.add_argument("session_ids", nargs="*", help="Session id(s) to remove")
|
|
132
|
+
session_rm_p.add_argument("--all", action="store_true", dest="remove_all", help="Remove all sessions")
|
|
133
|
+
|
|
134
|
+
# Back-compat (hidden from help): `spl session close <id> [--force]`
|
|
135
|
+
session_close_p = session_sub.add_parser("close", help="(deprecated; use `rm`)")
|
|
136
|
+
session_close_p.add_argument("session_id", help=argparse.SUPPRESS)
|
|
137
|
+
session_close_p.add_argument("--force", action="store_true", help=argparse.SUPPRESS)
|
|
138
|
+
# Hide `close` from `spl session --help` while still allowing it to parse.
|
|
139
|
+
try:
|
|
140
|
+
session_sub._choices_actions = [
|
|
141
|
+
a for a in session_sub._choices_actions if getattr(a, "dest", None) != "close"
|
|
142
|
+
]
|
|
143
|
+
session_sub.metavar = "{ls,info,rm,history}"
|
|
144
|
+
except Exception:
|
|
145
|
+
pass
|
|
146
|
+
session_hist_p = session_sub.add_parser("history", help="Print session transcript")
|
|
147
|
+
session_hist_p.add_argument("session_id", help="Session id")
|
|
148
|
+
session_hist_p.add_argument("--tail", type=int, help="Only show the last N messages")
|
|
149
|
+
session_hist_p.add_argument("--json", action="store_true", help="Machine-readable JSON output")
|
|
150
|
+
|
|
151
|
+
snapshot = sub.add_parser("snapshot", help="Manage durable snapshots")
|
|
152
|
+
snapshot_sub = snapshot.add_subparsers(dest="snapshot_cmd", required=True)
|
|
153
|
+
snapshot_ls_p = snapshot_sub.add_parser("ls", help="List snapshots")
|
|
154
|
+
snapshot_ls_p.add_argument("--json", action="store_true", help="Machine-readable JSON output")
|
|
155
|
+
snapshot_save_p = snapshot_sub.add_parser("save", help="Save a session to a snapshot")
|
|
156
|
+
snapshot_save_p.add_argument("--session", dest="session_id", required=True, help="Session id to save")
|
|
157
|
+
snapshot_save_p.add_argument("--title", help="Optional snapshot title")
|
|
158
|
+
snapshot_save_p.add_argument("--json", action="store_true", help="Machine-readable JSON output")
|
|
159
|
+
snapshot_load_p = snapshot_sub.add_parser("load", help="Load snapshot into a session")
|
|
160
|
+
snapshot_load_p.add_argument("snapshot_id", help="Snapshot id")
|
|
161
|
+
snapshot_load_p.add_argument("--session", dest="session_id", help="Target session id (default: new)")
|
|
162
|
+
snapshot_load_p.add_argument(
|
|
163
|
+
"--force",
|
|
164
|
+
action="store_true",
|
|
165
|
+
help="Overwrite target session if it already exists",
|
|
166
|
+
)
|
|
167
|
+
snapshot_load_p.add_argument("--json", action="store_true", help="Machine-readable JSON output")
|
|
168
|
+
snapshot_rm_p = snapshot_sub.add_parser("rm", help="Delete snapshot(s)")
|
|
169
|
+
snapshot_rm_p.add_argument("snapshot_ids", nargs="*", help="Snapshot id(s) to delete")
|
|
170
|
+
snapshot_rm_p.add_argument("--all", action="store_true", dest="remove_all", help="Delete all snapshots")
|
|
171
|
+
snapshot_rm_p.add_argument("--json", action="store_true", help="Machine-readable JSON output")
|
|
172
|
+
|
|
173
|
+
chat = sub.add_parser("chat", help="Chat REPL")
|
|
174
|
+
chat.add_argument("--new", action="store_true", help="Start a new chat workspace")
|
|
175
|
+
chat.add_argument("--session", help="Attach to a specific session id")
|
|
176
|
+
chat.add_argument(
|
|
177
|
+
"--max-seq-len",
|
|
178
|
+
type=int,
|
|
179
|
+
default=1_048_576,
|
|
180
|
+
help="Session context length (default: 1048576)",
|
|
181
|
+
)
|
|
182
|
+
chat.add_argument(
|
|
183
|
+
"--think-budget",
|
|
184
|
+
type=int,
|
|
185
|
+
default=8192,
|
|
186
|
+
help="Enable thinking mode with this token budget (0 disables thinking)",
|
|
187
|
+
)
|
|
188
|
+
chat.add_argument(
|
|
189
|
+
"--temperature",
|
|
190
|
+
type=float,
|
|
191
|
+
default=0.1,
|
|
192
|
+
help="Sampling temperature (default: 0.1)",
|
|
193
|
+
)
|
|
194
|
+
chat.add_argument(
|
|
195
|
+
"--top-p",
|
|
196
|
+
type=float,
|
|
197
|
+
default=0.95,
|
|
198
|
+
help="Top-p (nucleus) sampling (default: 0.95)",
|
|
199
|
+
)
|
|
200
|
+
chat.add_argument(
|
|
201
|
+
"--system-prompt",
|
|
202
|
+
type=str,
|
|
203
|
+
default=None,
|
|
204
|
+
help="Custom system prompt (use --no-system-prompt to disable)",
|
|
205
|
+
)
|
|
206
|
+
chat.add_argument(
|
|
207
|
+
"--no-system-prompt",
|
|
208
|
+
action="store_true",
|
|
209
|
+
help="Disable the default system prompt",
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
docs = sub.add_parser("docs", help="Docs REPL")
|
|
213
|
+
docs.add_argument("name", help="Docs workspace name")
|
|
214
|
+
docs.add_argument(
|
|
215
|
+
"-l",
|
|
216
|
+
"--load",
|
|
217
|
+
type=str,
|
|
218
|
+
metavar="SNAPSHOT_ID",
|
|
219
|
+
default=None,
|
|
220
|
+
help="Load from a snapshot (e.g., from `spl snapshot ls`)",
|
|
221
|
+
)
|
|
222
|
+
docs.add_argument(
|
|
223
|
+
"--max-seq-len",
|
|
224
|
+
type=int,
|
|
225
|
+
default=1_048_576,
|
|
226
|
+
help="Session context length (max_seq_len). If the workspace session already exists, the CLI will try to resize it upward.",
|
|
227
|
+
)
|
|
228
|
+
docs.add_argument(
|
|
229
|
+
"--think-budget",
|
|
230
|
+
type=int,
|
|
231
|
+
default=32768,
|
|
232
|
+
help="Enable thinking mode with this token budget (0 disables thinking)",
|
|
233
|
+
)
|
|
234
|
+
docs.add_argument(
|
|
235
|
+
"--temperature",
|
|
236
|
+
type=float,
|
|
237
|
+
default=0.3,
|
|
238
|
+
help="Sampling temperature (default: 0.3)",
|
|
239
|
+
)
|
|
240
|
+
docs.add_argument(
|
|
241
|
+
"--top-p",
|
|
242
|
+
type=float,
|
|
243
|
+
default=0.95,
|
|
244
|
+
help="Top-p (nucleus) sampling (default: 0.95)",
|
|
245
|
+
)
|
|
246
|
+
docs.add_argument(
|
|
247
|
+
"--system-prompt",
|
|
248
|
+
type=str,
|
|
249
|
+
default=None,
|
|
250
|
+
help="Custom system prompt (use --no-system-prompt to disable)",
|
|
251
|
+
)
|
|
252
|
+
docs.add_argument(
|
|
253
|
+
"--no-system-prompt",
|
|
254
|
+
action="store_true",
|
|
255
|
+
help="Disable the default system prompt",
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
return p
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _not_implemented(what: str) -> int:
|
|
262
|
+
print(f"{what} is not implemented yet (CLI foundation only).", file=sys.stderr)
|
|
263
|
+
return 2
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
267
|
+
parser = build_parser()
|
|
268
|
+
if argv is None:
|
|
269
|
+
argv_list = list(sys.argv[1:])
|
|
270
|
+
else:
|
|
271
|
+
argv_list = list(argv)
|
|
272
|
+
|
|
273
|
+
url_explicit = any(a == "--url" or a.startswith("--url=") for a in argv_list)
|
|
274
|
+
|
|
275
|
+
args = parser.parse_args(argv_list)
|
|
276
|
+
|
|
277
|
+
# Resolve server URL: --url flag > saved state > default
|
|
278
|
+
# Check if --url was explicitly provided by comparing to the default
|
|
279
|
+
if args.url == DEFAULT_URL:
|
|
280
|
+
state = load_state()
|
|
281
|
+
if state.server_url:
|
|
282
|
+
args.url = state.server_url
|
|
283
|
+
|
|
284
|
+
# `spl` defaults to `spl chat`.
|
|
285
|
+
command = args.command or "chat"
|
|
286
|
+
|
|
287
|
+
if command == "chat":
|
|
288
|
+
# Determine system prompt: custom > disabled > default
|
|
289
|
+
if getattr(args, "no_system_prompt", False):
|
|
290
|
+
system_prompt = None
|
|
291
|
+
elif getattr(args, "system_prompt", None):
|
|
292
|
+
system_prompt = args.system_prompt
|
|
293
|
+
else:
|
|
294
|
+
system_prompt = DEFAULT_SYSTEM_PROMPT
|
|
295
|
+
|
|
296
|
+
return chat_repl(
|
|
297
|
+
url=args.url,
|
|
298
|
+
new=bool(getattr(args, "new", False)),
|
|
299
|
+
session=getattr(args, "session", None),
|
|
300
|
+
max_seq_len=getattr(args, "max_seq_len", 1_048_576),
|
|
301
|
+
think_budget=None if int(getattr(args, "think_budget", 0)) <= 0 else int(getattr(args, "think_budget", 0)),
|
|
302
|
+
temperature=float(getattr(args, "temperature", 0.1)),
|
|
303
|
+
top_p=float(getattr(args, "top_p", 0.95)),
|
|
304
|
+
system_prompt=system_prompt,
|
|
305
|
+
)
|
|
306
|
+
if command == "docs":
|
|
307
|
+
# Determine system prompt for docs: custom > disabled > default (None means use built-in)
|
|
308
|
+
if getattr(args, "no_system_prompt", False):
|
|
309
|
+
docs_system_prompt: str | None = ""
|
|
310
|
+
elif getattr(args, "system_prompt", None):
|
|
311
|
+
docs_system_prompt = args.system_prompt
|
|
312
|
+
else:
|
|
313
|
+
docs_system_prompt = None # Use built-in default
|
|
314
|
+
|
|
315
|
+
return docs_repl(
|
|
316
|
+
url=args.url,
|
|
317
|
+
name=args.name,
|
|
318
|
+
load_snapshot_id=getattr(args, "load", None),
|
|
319
|
+
max_seq_len=getattr(args, "max_seq_len", 1_048_576),
|
|
320
|
+
think_budget=None if int(getattr(args, "think_budget", 0)) <= 0 else int(getattr(args, "think_budget", 0)),
|
|
321
|
+
temperature=float(getattr(args, "temperature", 0.3)),
|
|
322
|
+
top_p=float(getattr(args, "top_p", 0.95)),
|
|
323
|
+
system_prompt=docs_system_prompt,
|
|
324
|
+
)
|
|
325
|
+
if command == "ls":
|
|
326
|
+
try:
|
|
327
|
+
# Get active session info from state
|
|
328
|
+
state = load_state()
|
|
329
|
+
return unified_ls(
|
|
330
|
+
url=args.url,
|
|
331
|
+
json_output=bool(getattr(args, "json", False)),
|
|
332
|
+
active_chat_session_id=state.active_chat_session_id,
|
|
333
|
+
docs_workspaces=state.docs_workspaces,
|
|
334
|
+
)
|
|
335
|
+
except SessionCommandError as exc:
|
|
336
|
+
print(str(exc), file=sys.stderr)
|
|
337
|
+
return 1
|
|
338
|
+
if command == "rm":
|
|
339
|
+
try:
|
|
340
|
+
return unified_rm(
|
|
341
|
+
url=args.url,
|
|
342
|
+
ids=args.ids,
|
|
343
|
+
allow_remote_snapshot_delete=url_explicit,
|
|
344
|
+
)
|
|
345
|
+
except SessionCommandError as exc:
|
|
346
|
+
print(str(exc), file=sys.stderr)
|
|
347
|
+
return 1
|
|
348
|
+
if command == "server":
|
|
349
|
+
try:
|
|
350
|
+
server_cmd = args.server_cmd or "status"
|
|
351
|
+
if server_cmd == "status":
|
|
352
|
+
result = server_status(url=args.url)
|
|
353
|
+
if not args.server_cmd:
|
|
354
|
+
# Show hints when defaulting to status
|
|
355
|
+
print()
|
|
356
|
+
print("commands: spl server start --model <path>")
|
|
357
|
+
print(" spl server stop")
|
|
358
|
+
print(" spl server connect <url>")
|
|
359
|
+
return result
|
|
360
|
+
if server_cmd == "start":
|
|
361
|
+
return server_start(
|
|
362
|
+
url=args.url,
|
|
363
|
+
model=args.model,
|
|
364
|
+
host=args.host,
|
|
365
|
+
port=args.port,
|
|
366
|
+
chunk_size=getattr(args, "chunk_size", None),
|
|
367
|
+
attn_implementation=getattr(args, "attn_implementation", None),
|
|
368
|
+
decode_kernel=getattr(args, "decode_kernel", None),
|
|
369
|
+
device=getattr(args, "device", None),
|
|
370
|
+
dtype=getattr(args, "dtype", None),
|
|
371
|
+
max_prompt_tokens=getattr(args, "max_prompt_tokens", None),
|
|
372
|
+
disable_cuda_graph=bool(getattr(args, "disable_cuda_graph", False)),
|
|
373
|
+
disable_shared_fused_moe=bool(getattr(args, "disable_shared_fused_moe", False)),
|
|
374
|
+
foreground=bool(args.foreground),
|
|
375
|
+
)
|
|
376
|
+
if server_cmd == "stop":
|
|
377
|
+
return server_stop(url=args.url, force=bool(args.force))
|
|
378
|
+
if server_cmd == "connect":
|
|
379
|
+
return server_connect(url=args.server_url)
|
|
380
|
+
parser.error(f"Unknown server subcommand: {server_cmd!r}")
|
|
381
|
+
return 2
|
|
382
|
+
except ServerCommandError as exc:
|
|
383
|
+
print(str(exc), file=sys.stderr)
|
|
384
|
+
return 1
|
|
385
|
+
if command == "session":
|
|
386
|
+
try:
|
|
387
|
+
if args.session_cmd == "ls":
|
|
388
|
+
return session_ls(url=args.url, json_output=bool(args.json))
|
|
389
|
+
if args.session_cmd == "info":
|
|
390
|
+
return session_info(url=args.url, session_id=args.session_id, json_output=bool(args.json))
|
|
391
|
+
if args.session_cmd == "rm":
|
|
392
|
+
if getattr(args, "remove_all", False):
|
|
393
|
+
return session_close_all(url=args.url)
|
|
394
|
+
if not args.session_ids:
|
|
395
|
+
print("error: provide session IDs or use --all", file=sys.stderr)
|
|
396
|
+
return 2
|
|
397
|
+
return session_close(url=args.url, session_ids=args.session_ids)
|
|
398
|
+
if args.session_cmd == "close":
|
|
399
|
+
# Legacy alias; `--force` is accepted but ignored (MVP smoothness).
|
|
400
|
+
return session_close(url=args.url, session_ids=[args.session_id])
|
|
401
|
+
if args.session_cmd == "history":
|
|
402
|
+
return session_history(
|
|
403
|
+
url=args.url,
|
|
404
|
+
session_id=args.session_id,
|
|
405
|
+
tail=args.tail,
|
|
406
|
+
json_output=bool(args.json),
|
|
407
|
+
)
|
|
408
|
+
parser.error(f"Unknown session subcommand: {args.session_cmd!r}")
|
|
409
|
+
return 2
|
|
410
|
+
except SessionCommandError as exc:
|
|
411
|
+
print(str(exc), file=sys.stderr)
|
|
412
|
+
return 1
|
|
413
|
+
if command == "snapshot":
|
|
414
|
+
try:
|
|
415
|
+
if args.snapshot_cmd == "ls":
|
|
416
|
+
return snapshot_ls(url=args.url, json_output=bool(args.json))
|
|
417
|
+
if args.snapshot_cmd == "save":
|
|
418
|
+
return snapshot_save(
|
|
419
|
+
url=args.url,
|
|
420
|
+
session_id=args.session_id,
|
|
421
|
+
title=args.title,
|
|
422
|
+
json_output=bool(args.json),
|
|
423
|
+
)
|
|
424
|
+
if args.snapshot_cmd == "load":
|
|
425
|
+
return snapshot_load(
|
|
426
|
+
url=args.url,
|
|
427
|
+
snapshot_id=args.snapshot_id,
|
|
428
|
+
session_id=args.session_id,
|
|
429
|
+
force=bool(args.force),
|
|
430
|
+
json_output=bool(args.json),
|
|
431
|
+
)
|
|
432
|
+
if args.snapshot_cmd == "rm":
|
|
433
|
+
if args.remove_all:
|
|
434
|
+
return snapshot_rm_all(
|
|
435
|
+
url=args.url,
|
|
436
|
+
json_output=bool(args.json),
|
|
437
|
+
allow_remote_delete=url_explicit,
|
|
438
|
+
)
|
|
439
|
+
if not args.snapshot_ids:
|
|
440
|
+
parser.error("snapshot rm: either provide snapshot IDs or use --all")
|
|
441
|
+
return snapshot_rm(
|
|
442
|
+
url=args.url,
|
|
443
|
+
snapshot_ids=args.snapshot_ids,
|
|
444
|
+
json_output=bool(args.json),
|
|
445
|
+
allow_remote_delete=url_explicit,
|
|
446
|
+
)
|
|
447
|
+
parser.error(f"Unknown snapshot subcommand: {args.snapshot_cmd!r}")
|
|
448
|
+
return 2
|
|
449
|
+
except SnapshotCommandError as exc:
|
|
450
|
+
print(str(exc), file=sys.stderr)
|
|
451
|
+
return 1
|
|
452
|
+
parser.error(f"Unknown command: {command!r}")
|
|
453
|
+
return 2
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
if __name__ == "__main__":
|
|
457
|
+
raise SystemExit(main())
|
apps/cli/output.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import Any, Iterable, Sequence
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def print_json(obj: Any) -> None:
|
|
8
|
+
print(json.dumps(obj, ensure_ascii=False, indent=2, sort_keys=True))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def format_table(headers: Sequence[str], rows: Iterable[Sequence[str]]) -> str:
|
|
12
|
+
rows_list = [list(r) for r in rows]
|
|
13
|
+
widths = [len(h) for h in headers]
|
|
14
|
+
for r in rows_list:
|
|
15
|
+
for i, cell in enumerate(r):
|
|
16
|
+
if i >= len(widths):
|
|
17
|
+
break
|
|
18
|
+
widths[i] = max(widths[i], len(cell))
|
|
19
|
+
|
|
20
|
+
def fmt_row(cols: Sequence[str]) -> str:
|
|
21
|
+
padded = []
|
|
22
|
+
for i, c in enumerate(cols):
|
|
23
|
+
if i >= len(widths):
|
|
24
|
+
padded.append(c)
|
|
25
|
+
else:
|
|
26
|
+
padded.append(c.ljust(widths[i]))
|
|
27
|
+
return " ".join(padded).rstrip()
|
|
28
|
+
|
|
29
|
+
out = [fmt_row(list(headers)), fmt_row(["-" * w for w in widths])]
|
|
30
|
+
out.extend(fmt_row(r) for r in rows_list)
|
|
31
|
+
return "\n".join(out)
|
|
32
|
+
|