wafer-cli 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wafer/GUIDE.md +118 -0
- wafer/__init__.py +3 -0
- wafer/analytics.py +306 -0
- wafer/api_client.py +195 -0
- wafer/auth.py +432 -0
- wafer/autotuner.py +1080 -0
- wafer/billing.py +233 -0
- wafer/cli.py +7289 -0
- wafer/config.py +105 -0
- wafer/corpus.py +366 -0
- wafer/evaluate.py +4593 -0
- wafer/global_config.py +350 -0
- wafer/gpu_run.py +307 -0
- wafer/inference.py +148 -0
- wafer/kernel_scope.py +552 -0
- wafer/ncu_analyze.py +651 -0
- wafer/nsys_analyze.py +1042 -0
- wafer/nsys_profile.py +510 -0
- wafer/output.py +248 -0
- wafer/problems.py +357 -0
- wafer/rocprof_compute.py +490 -0
- wafer/rocprof_sdk.py +274 -0
- wafer/rocprof_systems.py +520 -0
- wafer/skills/wafer-guide/SKILL.md +129 -0
- wafer/ssh_keys.py +261 -0
- wafer/target_lock.py +270 -0
- wafer/targets.py +842 -0
- wafer/targets_ops.py +717 -0
- wafer/templates/__init__.py +0 -0
- wafer/templates/ask_docs.py +61 -0
- wafer/templates/optimize_kernel.py +71 -0
- wafer/templates/optimize_kernelbench.py +137 -0
- wafer/templates/trace_analyze.py +74 -0
- wafer/tracelens.py +218 -0
- wafer/wevin_cli.py +577 -0
- wafer/workspaces.py +852 -0
- wafer_cli-0.2.14.dist-info/METADATA +16 -0
- wafer_cli-0.2.14.dist-info/RECORD +41 -0
- wafer_cli-0.2.14.dist-info/WHEEL +5 -0
- wafer_cli-0.2.14.dist-info/entry_points.txt +2 -0
- wafer_cli-0.2.14.dist-info/top_level.txt +1 -0
wafer/global_config.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
"""Global configuration for Wafer CLI.
|
|
2
|
+
|
|
3
|
+
Handles API URLs, environment presets (staging/prod/local), and user preferences.
|
|
4
|
+
Separate from config.py which handles Docker execution config for `wafer run`.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import tomllib
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Literal
|
|
12
|
+
|
|
13
|
+
# Config file location
|
|
14
|
+
CONFIG_DIR = Path.home() / ".wafer"
|
|
15
|
+
CONFIG_FILE = CONFIG_DIR / "config.toml"
|
|
16
|
+
|
|
17
|
+
# Environment type
|
|
18
|
+
EnvironmentName = Literal["staging", "prod", "local"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class ApiEnvironment:
|
|
23
|
+
"""API environment configuration. Immutable."""
|
|
24
|
+
|
|
25
|
+
name: str
|
|
26
|
+
api_url: str
|
|
27
|
+
supabase_url: str
|
|
28
|
+
supabase_anon_key: str
|
|
29
|
+
|
|
30
|
+
def __post_init__(self) -> None:
|
|
31
|
+
assert self.name, "environment name cannot be empty"
|
|
32
|
+
assert self.api_url, "api_url cannot be empty"
|
|
33
|
+
assert self.supabase_url, "supabase_url cannot be empty"
|
|
34
|
+
assert self.supabase_anon_key, "supabase_anon_key cannot be empty"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Built-in environment presets
|
|
38
|
+
# Anon keys are public (used client-side) - safe to embed
|
|
39
|
+
BUILTIN_ENVIRONMENTS: dict[str, ApiEnvironment] = {
|
|
40
|
+
"staging": ApiEnvironment(
|
|
41
|
+
name="staging",
|
|
42
|
+
api_url="https://wafer-api-staging.onrender.com",
|
|
43
|
+
supabase_url="https://xudshwhzytyfxwwyofli.supabase.co",
|
|
44
|
+
supabase_anon_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Inh1ZHNod2h6eXR5Znh3d3lvZmxpIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NjU4Mzc5MDEsImV4cCI6MjA4MTQxMzkwMX0.JvuF4349z1ermKmrxEKGDHQj9I_ylLZYjjuouJleYhY",
|
|
45
|
+
),
|
|
46
|
+
"prod": ApiEnvironment(
|
|
47
|
+
name="prod",
|
|
48
|
+
api_url="https://www.api.wafer.ai",
|
|
49
|
+
supabase_url="https://auth.wafer.ai",
|
|
50
|
+
supabase_anon_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Imh2bHB0aGNueGx5d2xxdWljaXFlIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NjQ2MjQ1NTIsImV4cCI6MjA4MDIwMDU1Mn0.1ywPDp-QHgbqPOJocQvXEKKDjGt3BsoNluvVoQ7EW3o",
|
|
51
|
+
),
|
|
52
|
+
"local": ApiEnvironment(
|
|
53
|
+
name="local",
|
|
54
|
+
api_url="http://localhost:8000",
|
|
55
|
+
supabase_url="http://localhost:54321",
|
|
56
|
+
supabase_anon_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6ImFub24iLCJleHAiOjE5ODM4MTI5OTZ9.CRXP1A7WOeoJeXxjNni43kdQwgnWNReilDMblYTn_I0",
|
|
57
|
+
),
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# Default environment when no config exists
|
|
61
|
+
DEFAULT_ENVIRONMENT = "prod"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass(frozen=True)
|
|
65
|
+
class Preferences:
|
|
66
|
+
"""User preferences. Immutable.
|
|
67
|
+
|
|
68
|
+
mode: "implicit" (default) = quiet output, use -v for status messages
|
|
69
|
+
"explicit" = verbose output, shows [wafer] status messages
|
|
70
|
+
analytics_enabled: True (default) = send anonymous usage analytics to PostHog
|
|
71
|
+
False = disable all analytics tracking
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
mode: Literal["explicit", "implicit"] = "implicit"
|
|
75
|
+
analytics_enabled: bool = True
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(frozen=True)
|
|
79
|
+
class Defaults:
|
|
80
|
+
"""Default values for commands. Immutable."""
|
|
81
|
+
|
|
82
|
+
workspace: str | None = None
|
|
83
|
+
gpu: str = "H100"
|
|
84
|
+
exec_timeout: int = 300 # seconds
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@dataclass(frozen=True)
|
|
88
|
+
class GlobalConfig:
|
|
89
|
+
"""Global Wafer CLI configuration. Immutable."""
|
|
90
|
+
|
|
91
|
+
environment: str = DEFAULT_ENVIRONMENT
|
|
92
|
+
environments: dict[str, ApiEnvironment] = field(
|
|
93
|
+
default_factory=lambda: BUILTIN_ENVIRONMENTS.copy()
|
|
94
|
+
)
|
|
95
|
+
preferences: Preferences = field(default_factory=Preferences)
|
|
96
|
+
defaults: Defaults = field(default_factory=Defaults)
|
|
97
|
+
|
|
98
|
+
def __post_init__(self) -> None:
|
|
99
|
+
# Validate environment exists
|
|
100
|
+
assert self.environment in self.environments, (
|
|
101
|
+
f"environment '{self.environment}' not found. "
|
|
102
|
+
f"Available: {list(self.environments.keys())}"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def get_api_environment(self) -> ApiEnvironment:
|
|
106
|
+
"""Get the current API environment."""
|
|
107
|
+
return self.environments[self.environment]
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def api_url(self) -> str:
|
|
111
|
+
"""Get current API URL."""
|
|
112
|
+
return self.get_api_environment().api_url
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def supabase_url(self) -> str:
|
|
116
|
+
"""Get current Supabase URL."""
|
|
117
|
+
return self.get_api_environment().supabase_url
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _parse_config_file(path: Path) -> GlobalConfig:
|
|
121
|
+
"""Parse config from TOML file.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
path: Path to config file
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
GlobalConfig instance
|
|
128
|
+
"""
|
|
129
|
+
with open(path, "rb") as f:
|
|
130
|
+
data = tomllib.load(f)
|
|
131
|
+
|
|
132
|
+
# Parse API section
|
|
133
|
+
api_data = data.get("api", {})
|
|
134
|
+
environment = api_data.get("environment", DEFAULT_ENVIRONMENT)
|
|
135
|
+
|
|
136
|
+
# Merge built-in environments with user-defined ones
|
|
137
|
+
environments = BUILTIN_ENVIRONMENTS.copy()
|
|
138
|
+
user_envs = api_data.get("environments", {})
|
|
139
|
+
for name, env_config in user_envs.items():
|
|
140
|
+
if isinstance(env_config, dict):
|
|
141
|
+
# User can override built-in or define new
|
|
142
|
+
base_env = environments.get(name, BUILTIN_ENVIRONMENTS["prod"])
|
|
143
|
+
environments[name] = ApiEnvironment(
|
|
144
|
+
name=name,
|
|
145
|
+
api_url=env_config.get("url", base_env.api_url),
|
|
146
|
+
supabase_url=env_config.get("supabase_url", base_env.supabase_url),
|
|
147
|
+
supabase_anon_key=env_config.get("supabase_anon_key", base_env.supabase_anon_key),
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Parse preferences
|
|
151
|
+
pref_data = data.get("preferences", {})
|
|
152
|
+
mode = pref_data.get("mode", "explicit")
|
|
153
|
+
assert mode in ("explicit", "implicit"), f"mode must be 'explicit' or 'implicit', got '{mode}'"
|
|
154
|
+
analytics_enabled = pref_data.get("analytics_enabled", True)
|
|
155
|
+
assert isinstance(analytics_enabled, bool), f"analytics_enabled must be true or false, got '{analytics_enabled}'"
|
|
156
|
+
preferences = Preferences(mode=mode, analytics_enabled=analytics_enabled)
|
|
157
|
+
|
|
158
|
+
# Parse defaults
|
|
159
|
+
defaults_data = data.get("defaults", {})
|
|
160
|
+
defaults = Defaults(
|
|
161
|
+
workspace=defaults_data.get("workspace"),
|
|
162
|
+
gpu=defaults_data.get("gpu", "H100"),
|
|
163
|
+
exec_timeout=defaults_data.get("exec_timeout", 300),
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
return GlobalConfig(
|
|
167
|
+
environment=environment,
|
|
168
|
+
environments=environments,
|
|
169
|
+
preferences=preferences,
|
|
170
|
+
defaults=defaults,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# Cached config instance
|
|
175
|
+
_cached_config: GlobalConfig | None = None
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def load_global_config() -> GlobalConfig:
|
|
179
|
+
"""Load global config from file, with env var overrides.
|
|
180
|
+
|
|
181
|
+
Priority (highest to lowest):
|
|
182
|
+
1. Environment variables (WAFER_API_URL, SUPABASE_URL)
|
|
183
|
+
2. Config file (~/.wafer/config.toml)
|
|
184
|
+
3. Built-in defaults (prod environment)
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
GlobalConfig instance
|
|
188
|
+
"""
|
|
189
|
+
global _cached_config
|
|
190
|
+
|
|
191
|
+
if _cached_config is not None:
|
|
192
|
+
return _cached_config
|
|
193
|
+
|
|
194
|
+
# Start with defaults
|
|
195
|
+
if CONFIG_FILE.exists():
|
|
196
|
+
config = _parse_config_file(CONFIG_FILE)
|
|
197
|
+
else:
|
|
198
|
+
config = GlobalConfig()
|
|
199
|
+
|
|
200
|
+
_cached_config = config
|
|
201
|
+
return config
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def get_api_url() -> str:
|
|
205
|
+
"""Get API URL with env var override.
|
|
206
|
+
|
|
207
|
+
Priority:
|
|
208
|
+
1. WAFER_API_URL env var
|
|
209
|
+
2. Config file
|
|
210
|
+
3. Default (prod)
|
|
211
|
+
"""
|
|
212
|
+
env_url = os.environ.get("WAFER_API_URL")
|
|
213
|
+
if env_url:
|
|
214
|
+
return env_url
|
|
215
|
+
return load_global_config().api_url
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def get_supabase_url() -> str:
|
|
219
|
+
"""Get Supabase URL with env var override.
|
|
220
|
+
|
|
221
|
+
Priority:
|
|
222
|
+
1. SUPABASE_URL env var
|
|
223
|
+
2. Config file
|
|
224
|
+
3. Default (prod)
|
|
225
|
+
"""
|
|
226
|
+
env_url = os.environ.get("SUPABASE_URL")
|
|
227
|
+
if env_url:
|
|
228
|
+
return env_url
|
|
229
|
+
return load_global_config().supabase_url
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def get_supabase_anon_key() -> str:
|
|
233
|
+
"""Get Supabase anon key for current environment.
|
|
234
|
+
|
|
235
|
+
The anon key is public and used for client-side auth operations
|
|
236
|
+
like token refresh.
|
|
237
|
+
"""
|
|
238
|
+
return load_global_config().get_api_environment().supabase_anon_key
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def get_preferences() -> Preferences:
|
|
242
|
+
"""Get user preferences."""
|
|
243
|
+
return load_global_config().preferences
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def get_defaults() -> Defaults:
|
|
247
|
+
"""Get default values."""
|
|
248
|
+
return load_global_config().defaults
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def clear_config_cache() -> None:
|
|
252
|
+
"""Clear cached config. Useful after config changes."""
|
|
253
|
+
global _cached_config
|
|
254
|
+
_cached_config = None
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def save_global_config(config: GlobalConfig) -> None:
|
|
258
|
+
"""Save config to TOML file, preserving existing Docker config sections.
|
|
259
|
+
|
|
260
|
+
Merges the global API config with any existing [default] and [environments.*]
|
|
261
|
+
sections that are used by the Docker execution config (WaferConfig).
|
|
262
|
+
"""
|
|
263
|
+
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
|
264
|
+
|
|
265
|
+
# Load existing config to preserve Docker sections
|
|
266
|
+
existing_data: dict = {}
|
|
267
|
+
if CONFIG_FILE.exists():
|
|
268
|
+
with open(CONFIG_FILE, "rb") as f:
|
|
269
|
+
existing_data = tomllib.load(f)
|
|
270
|
+
|
|
271
|
+
# Build new content, preserving Docker config sections
|
|
272
|
+
lines = []
|
|
273
|
+
|
|
274
|
+
# Preserve [default] section (Docker config)
|
|
275
|
+
if "default" in existing_data:
|
|
276
|
+
lines.append("[default]")
|
|
277
|
+
for key, value in existing_data["default"].items():
|
|
278
|
+
if isinstance(value, str):
|
|
279
|
+
lines.append(f'{key} = "{value}"')
|
|
280
|
+
else:
|
|
281
|
+
lines.append(f"{key} = {value}")
|
|
282
|
+
lines.append("")
|
|
283
|
+
|
|
284
|
+
# API section (global config)
|
|
285
|
+
lines.append("[api]")
|
|
286
|
+
lines.append(f'environment = "{config.environment}"')
|
|
287
|
+
lines.append("")
|
|
288
|
+
|
|
289
|
+
# Only write non-builtin API environments
|
|
290
|
+
for name, env in config.environments.items():
|
|
291
|
+
if name not in BUILTIN_ENVIRONMENTS or env != BUILTIN_ENVIRONMENTS[name]:
|
|
292
|
+
lines.append(f"[api.environments.{name}]")
|
|
293
|
+
lines.append(f'url = "{env.api_url}"')
|
|
294
|
+
lines.append(f'supabase_url = "{env.supabase_url}"')
|
|
295
|
+
lines.append("")
|
|
296
|
+
|
|
297
|
+
# Preserve [environments.*] sections (Docker config)
|
|
298
|
+
if "environments" in existing_data:
|
|
299
|
+
for name, env_config in existing_data["environments"].items():
|
|
300
|
+
if isinstance(env_config, dict):
|
|
301
|
+
lines.append(f"[environments.{name}]")
|
|
302
|
+
for key, value in env_config.items():
|
|
303
|
+
if isinstance(value, str):
|
|
304
|
+
lines.append(f'{key} = "{value}"')
|
|
305
|
+
else:
|
|
306
|
+
lines.append(f"{key} = {value}")
|
|
307
|
+
lines.append("")
|
|
308
|
+
|
|
309
|
+
# Preferences section (only if non-default values)
|
|
310
|
+
pref_lines = []
|
|
311
|
+
if config.preferences.mode != "implicit":
|
|
312
|
+
pref_lines.append(f'mode = "{config.preferences.mode}"')
|
|
313
|
+
if not config.preferences.analytics_enabled:
|
|
314
|
+
pref_lines.append("analytics_enabled = false")
|
|
315
|
+
|
|
316
|
+
if pref_lines:
|
|
317
|
+
lines.append("[preferences]")
|
|
318
|
+
lines.extend(pref_lines)
|
|
319
|
+
lines.append("")
|
|
320
|
+
|
|
321
|
+
# Defaults section (only if non-default values)
|
|
322
|
+
defaults_lines = []
|
|
323
|
+
if config.defaults.workspace:
|
|
324
|
+
defaults_lines.append(f'workspace = "{config.defaults.workspace}"')
|
|
325
|
+
if config.defaults.gpu != "H100":
|
|
326
|
+
defaults_lines.append(f'gpu = "{config.defaults.gpu}"')
|
|
327
|
+
if config.defaults.exec_timeout != 300:
|
|
328
|
+
defaults_lines.append(f"exec_timeout = {config.defaults.exec_timeout}")
|
|
329
|
+
|
|
330
|
+
if defaults_lines:
|
|
331
|
+
lines.append("[defaults]")
|
|
332
|
+
lines.extend(defaults_lines)
|
|
333
|
+
lines.append("")
|
|
334
|
+
|
|
335
|
+
CONFIG_FILE.write_text("\n".join(lines))
|
|
336
|
+
clear_config_cache()
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def init_config(environment: str = DEFAULT_ENVIRONMENT) -> GlobalConfig:
|
|
340
|
+
"""Initialize config file with defaults.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
environment: Initial environment to use
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
The created GlobalConfig
|
|
347
|
+
"""
|
|
348
|
+
config = GlobalConfig(environment=environment)
|
|
349
|
+
save_global_config(config)
|
|
350
|
+
return config
|
wafer/gpu_run.py
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
"""Remote execution on GPU targets.
|
|
2
|
+
|
|
3
|
+
Provides push/run primitives for remote GPU execution via SSH and Docker.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import sys
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from wafer_core.ssh import SSHClient
|
|
11
|
+
from wafer_core.utils.kernel_utils.targets.config import BaremetalTarget, VMTarget
|
|
12
|
+
|
|
13
|
+
# Constants
|
|
14
|
+
REMOTE_WORKSPACE_BASE = "~/.wafer/workspaces"
|
|
15
|
+
CONTAINER_WORKSPACE = "/workspace"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True)
|
|
19
|
+
class PushResult:
|
|
20
|
+
"""Result of pushing a directory to remote target."""
|
|
21
|
+
|
|
22
|
+
workspace_name: str # Just the workspace name (e.g., "project")
|
|
23
|
+
workspace_path: (
|
|
24
|
+
str # Full absolute path on remote (e.g., "/home/user/.wafer/workspaces/project")
|
|
25
|
+
)
|
|
26
|
+
files_uploaded: list[str] # Relative paths of uploaded files
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def push_directory(
|
|
30
|
+
local_path: Path,
|
|
31
|
+
target: BaremetalTarget | VMTarget,
|
|
32
|
+
) -> PushResult:
|
|
33
|
+
"""Push local directory to remote target.
|
|
34
|
+
|
|
35
|
+
Uploads directory to ~/.wafer/workspaces/<dirname> on target.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
local_path: Local directory to upload
|
|
39
|
+
target: Remote target configuration
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
PushResult with workspace path and list of uploaded files
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
FileNotFoundError: If local_path doesn't exist
|
|
46
|
+
ValueError: If local_path is not a directory
|
|
47
|
+
"""
|
|
48
|
+
# Validate inputs
|
|
49
|
+
if not local_path.exists():
|
|
50
|
+
raise FileNotFoundError(f"Path not found: {local_path}")
|
|
51
|
+
if not local_path.is_dir():
|
|
52
|
+
raise ValueError(f"Not a directory: {local_path}")
|
|
53
|
+
if target.ssh_target is None:
|
|
54
|
+
raise ValueError(f"Target '{target.name}' must have ssh_target configured")
|
|
55
|
+
if target.ssh_key is None:
|
|
56
|
+
raise ValueError(f"Target '{target.name}' must have ssh_key configured")
|
|
57
|
+
|
|
58
|
+
client = SSHClient(target.ssh_target, target.ssh_key)
|
|
59
|
+
|
|
60
|
+
workspace_name = local_path.name
|
|
61
|
+
remote_workspace = f"{REMOTE_WORKSPACE_BASE}/{workspace_name}"
|
|
62
|
+
|
|
63
|
+
# Create workspace directory
|
|
64
|
+
client.exec(f"mkdir -p {remote_workspace}")
|
|
65
|
+
expanded_workspace = client.expand_path(remote_workspace)
|
|
66
|
+
|
|
67
|
+
# Upload directory recursively
|
|
68
|
+
client.upload_files(str(local_path), expanded_workspace, recursive=True)
|
|
69
|
+
|
|
70
|
+
# Get list of uploaded files (relative paths)
|
|
71
|
+
files_uploaded = []
|
|
72
|
+
for file in local_path.rglob("*"):
|
|
73
|
+
if file.is_file():
|
|
74
|
+
files_uploaded.append(str(file.relative_to(local_path)))
|
|
75
|
+
|
|
76
|
+
return PushResult(
|
|
77
|
+
workspace_name=workspace_name,
|
|
78
|
+
workspace_path=expanded_workspace,
|
|
79
|
+
files_uploaded=files_uploaded,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def run_command(
|
|
84
|
+
command: str,
|
|
85
|
+
workspace: str,
|
|
86
|
+
target: BaremetalTarget | VMTarget,
|
|
87
|
+
gpu_id: int | None = None,
|
|
88
|
+
) -> int:
|
|
89
|
+
"""Run command in Docker on remote target, streaming output.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
command: Command to execute inside container
|
|
93
|
+
workspace: Workspace name (subdirectory under ~/.wafer/workspaces/)
|
|
94
|
+
target: Remote target configuration (must have docker_image)
|
|
95
|
+
gpu_id: GPU ID to use (defaults to first in target.gpu_ids)
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Exit code from command (0 = success)
|
|
99
|
+
|
|
100
|
+
Raises:
|
|
101
|
+
ValueError: If target is missing required configuration
|
|
102
|
+
"""
|
|
103
|
+
if target.docker_image is None:
|
|
104
|
+
raise ValueError(f"Target '{target.name}' must have docker_image configured")
|
|
105
|
+
if target.ssh_target is None:
|
|
106
|
+
raise ValueError(f"Target '{target.name}' must have ssh_target configured")
|
|
107
|
+
if target.ssh_key is None:
|
|
108
|
+
raise ValueError(f"Target '{target.name}' must have ssh_key configured")
|
|
109
|
+
|
|
110
|
+
client = SSHClient(target.ssh_target, target.ssh_key)
|
|
111
|
+
|
|
112
|
+
effective_gpu_id = gpu_id if gpu_id is not None else target.gpu_ids[0]
|
|
113
|
+
|
|
114
|
+
# Get expanded workspace path
|
|
115
|
+
remote_workspace = f"{REMOTE_WORKSPACE_BASE}/{workspace}"
|
|
116
|
+
expanded_workspace = client.expand_path(remote_workspace)
|
|
117
|
+
|
|
118
|
+
# Build docker command with workspace mounted
|
|
119
|
+
volumes = {expanded_workspace: CONTAINER_WORKSPACE}
|
|
120
|
+
docker_cmd = _build_docker_command(
|
|
121
|
+
image=target.docker_image,
|
|
122
|
+
inner_cmd=command,
|
|
123
|
+
gpu_id=effective_gpu_id,
|
|
124
|
+
volumes=volumes,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Stream execution
|
|
128
|
+
exit_code = 0
|
|
129
|
+
try:
|
|
130
|
+
for line in client.exec_stream(docker_cmd):
|
|
131
|
+
print(line)
|
|
132
|
+
except Exception as e:
|
|
133
|
+
print(f"\nExecution failed: {e}", file=sys.stderr)
|
|
134
|
+
exit_code = 1
|
|
135
|
+
|
|
136
|
+
return exit_code
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def run_command_capture(
|
|
140
|
+
command: str,
|
|
141
|
+
workspace: str,
|
|
142
|
+
target: BaremetalTarget | VMTarget,
|
|
143
|
+
) -> tuple[int, str]:
|
|
144
|
+
"""Run command on remote target (without Docker) and capture output.
|
|
145
|
+
|
|
146
|
+
This is useful for commands that don't need GPU access, like running
|
|
147
|
+
NCU to analyze a profile file.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
command: Command to execute on the remote host
|
|
151
|
+
workspace: Workspace name (subdirectory under ~/.wafer/workspaces/)
|
|
152
|
+
target: Remote target configuration
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Tuple of (exit_code, output_text)
|
|
156
|
+
|
|
157
|
+
Raises:
|
|
158
|
+
ValueError: If target is missing required configuration
|
|
159
|
+
"""
|
|
160
|
+
if target.ssh_target is None:
|
|
161
|
+
raise ValueError(f"Target '{target.name}' must have ssh_target configured")
|
|
162
|
+
if target.ssh_key is None:
|
|
163
|
+
raise ValueError(f"Target '{target.name}' must have ssh_key configured")
|
|
164
|
+
|
|
165
|
+
client = SSHClient(target.ssh_target, target.ssh_key)
|
|
166
|
+
|
|
167
|
+
# Get expanded workspace path
|
|
168
|
+
remote_workspace = f"{REMOTE_WORKSPACE_BASE}/{workspace}"
|
|
169
|
+
expanded_workspace = client.expand_path(remote_workspace)
|
|
170
|
+
|
|
171
|
+
# Run command in workspace directory
|
|
172
|
+
full_cmd = f"cd {expanded_workspace} && {command}"
|
|
173
|
+
|
|
174
|
+
# Capture output
|
|
175
|
+
output_lines = []
|
|
176
|
+
exit_code = 0
|
|
177
|
+
try:
|
|
178
|
+
for line in client.exec_stream(full_cmd):
|
|
179
|
+
output_lines.append(line)
|
|
180
|
+
except Exception as e:
|
|
181
|
+
print(f"\nExecution failed: {e}", file=sys.stderr)
|
|
182
|
+
exit_code = 1
|
|
183
|
+
|
|
184
|
+
return exit_code, "\n".join(output_lines)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _build_docker_command(
|
|
188
|
+
image: str,
|
|
189
|
+
inner_cmd: str,
|
|
190
|
+
gpu_id: int,
|
|
191
|
+
volumes: dict[str, str],
|
|
192
|
+
) -> str:
|
|
193
|
+
"""Build docker run command string."""
|
|
194
|
+
import shlex
|
|
195
|
+
|
|
196
|
+
parts = ["docker", "run", "--rm"]
|
|
197
|
+
parts.extend(["--gpus", f"'device={gpu_id}'"])
|
|
198
|
+
|
|
199
|
+
for host_path, container_path in volumes.items():
|
|
200
|
+
parts.extend(["-v", f"{host_path}:{container_path}"])
|
|
201
|
+
|
|
202
|
+
parts.extend(["-w", CONTAINER_WORKSPACE])
|
|
203
|
+
parts.append(image)
|
|
204
|
+
parts.append(f"bash -c {shlex.quote(inner_cmd)}")
|
|
205
|
+
|
|
206
|
+
return " ".join(parts)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _build_uv_install_cmd() -> str:
|
|
210
|
+
"""Build command to ensure uv is available."""
|
|
211
|
+
ensure_curl = (
|
|
212
|
+
"which curl > /dev/null 2>&1 || "
|
|
213
|
+
"(apt-get update -qq && apt-get install -qq -y curl > /dev/null)"
|
|
214
|
+
)
|
|
215
|
+
install_uv = "which uv > /dev/null 2>&1 || curl -LsSf https://astral.sh/uv/install.sh | sh"
|
|
216
|
+
source_uv = "export PATH=$HOME/.local/bin:$PATH"
|
|
217
|
+
return f"{ensure_curl} && {install_uv} && {source_uv}"
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def run_python_docker(
|
|
221
|
+
file_path: Path,
|
|
222
|
+
args: list[str],
|
|
223
|
+
target: BaremetalTarget | VMTarget,
|
|
224
|
+
gpu_id: int,
|
|
225
|
+
) -> int:
|
|
226
|
+
"""Run Python file in Docker container on remote GPU."""
|
|
227
|
+
if target.docker_image is None:
|
|
228
|
+
raise ValueError(f"Target '{target.name}' must have docker_image configured")
|
|
229
|
+
if target.ssh_target is None:
|
|
230
|
+
raise ValueError(f"Target '{target.name}' must have ssh_target configured")
|
|
231
|
+
if target.ssh_key is None:
|
|
232
|
+
raise ValueError(f"Target '{target.name}' must have ssh_key configured")
|
|
233
|
+
|
|
234
|
+
print(f"Connecting to {target.ssh_target}...")
|
|
235
|
+
client = SSHClient(target.ssh_target, target.ssh_key)
|
|
236
|
+
|
|
237
|
+
# Setup workspace
|
|
238
|
+
remote_workspace = f"{REMOTE_WORKSPACE_BASE}/python_run"
|
|
239
|
+
client.exec(f"mkdir -p {remote_workspace}")
|
|
240
|
+
expanded_workspace = client.expand_path(remote_workspace)
|
|
241
|
+
|
|
242
|
+
# Upload file
|
|
243
|
+
remote_file = f"{expanded_workspace}/{file_path.name}"
|
|
244
|
+
print(f"Uploading {file_path.name}...")
|
|
245
|
+
client.upload_files(str(file_path), remote_file)
|
|
246
|
+
|
|
247
|
+
# Build inner command: install uv, run script with inline deps
|
|
248
|
+
script_args = " ".join(args) if args else ""
|
|
249
|
+
uv_setup = _build_uv_install_cmd()
|
|
250
|
+
inner_cmd = f"{uv_setup} && uv run --script {file_path.name} {script_args}"
|
|
251
|
+
|
|
252
|
+
# Build docker command
|
|
253
|
+
volumes = {expanded_workspace: CONTAINER_WORKSPACE}
|
|
254
|
+
docker_cmd = _build_docker_command(
|
|
255
|
+
image=target.docker_image,
|
|
256
|
+
inner_cmd=inner_cmd,
|
|
257
|
+
gpu_id=gpu_id,
|
|
258
|
+
volumes=volumes,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
print(f"Running on GPU {gpu_id} with {target.docker_image}...")
|
|
262
|
+
print("-" * 60)
|
|
263
|
+
|
|
264
|
+
# Stream execution
|
|
265
|
+
exit_code = 0
|
|
266
|
+
try:
|
|
267
|
+
for line in client.exec_stream(docker_cmd):
|
|
268
|
+
print(line)
|
|
269
|
+
except Exception as e:
|
|
270
|
+
print(f"\nExecution failed: {e}", file=sys.stderr)
|
|
271
|
+
exit_code = 1
|
|
272
|
+
|
|
273
|
+
return exit_code
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def run_python_file(
|
|
277
|
+
file_path: Path,
|
|
278
|
+
args: list[str],
|
|
279
|
+
target: BaremetalTarget | VMTarget,
|
|
280
|
+
gpu_id: int | None = None,
|
|
281
|
+
) -> int:
|
|
282
|
+
"""Run Python file on remote GPU in Docker container.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
file_path: Path to Python script
|
|
286
|
+
args: Arguments to pass to script
|
|
287
|
+
target: Remote target configuration (must have docker_image)
|
|
288
|
+
gpu_id: GPU ID to use (defaults to first in target.gpu_ids)
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
Exit code from script
|
|
292
|
+
|
|
293
|
+
Raises:
|
|
294
|
+
FileNotFoundError: If file doesn't exist
|
|
295
|
+
ValueError: If path is not a file or target has no docker_image
|
|
296
|
+
"""
|
|
297
|
+
# Validate inputs
|
|
298
|
+
if not file_path.exists():
|
|
299
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
300
|
+
if not file_path.is_file():
|
|
301
|
+
raise ValueError(f"Not a file: {file_path}")
|
|
302
|
+
if not target.docker_image:
|
|
303
|
+
raise ValueError(f"Target '{target.name}' has no docker_image configured")
|
|
304
|
+
|
|
305
|
+
effective_gpu_id = gpu_id if gpu_id is not None else target.gpu_ids[0]
|
|
306
|
+
|
|
307
|
+
return run_python_docker(file_path, args, target, effective_gpu_id)
|