wafer-cli 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wafer/cli.py +403 -106
- wafer/evaluate.py +871 -98
- wafer/target_lock.py +198 -0
- wafer/targets.py +158 -0
- {wafer_cli-0.2.4.dist-info → wafer_cli-0.2.6.dist-info}/METADATA +1 -1
- {wafer_cli-0.2.4.dist-info → wafer_cli-0.2.6.dist-info}/RECORD +9 -8
- {wafer_cli-0.2.4.dist-info → wafer_cli-0.2.6.dist-info}/WHEEL +0 -0
- {wafer_cli-0.2.4.dist-info → wafer_cli-0.2.6.dist-info}/entry_points.txt +0 -0
- {wafer_cli-0.2.4.dist-info → wafer_cli-0.2.6.dist-info}/top_level.txt +0 -0
wafer/target_lock.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""Target locking for concurrent access control.
|
|
2
|
+
|
|
3
|
+
Uses file locks (fcntl.flock) to ensure only one process uses a target at a time.
|
|
4
|
+
Locks are automatically released when the process exits or crashes.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
# Try to acquire a single target
|
|
8
|
+
with try_acquire_target("mi300x-1") as acquired:
|
|
9
|
+
if acquired:
|
|
10
|
+
# Got the lock, run eval
|
|
11
|
+
...
|
|
12
|
+
else:
|
|
13
|
+
# Target busy
|
|
14
|
+
...
|
|
15
|
+
|
|
16
|
+
# Acquire first available from a pool
|
|
17
|
+
with acquire_from_pool(["mi300x-1", "mi300x-2", "mi300x-3"]) as target:
|
|
18
|
+
if target:
|
|
19
|
+
# Got a target, run eval
|
|
20
|
+
...
|
|
21
|
+
else:
|
|
22
|
+
# All targets busy
|
|
23
|
+
...
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import fcntl
|
|
29
|
+
import os
|
|
30
|
+
from contextlib import contextmanager
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Iterator
|
|
33
|
+
|
|
34
|
+
# Lock directory
|
|
35
|
+
LOCKS_DIR = Path.home() / ".wafer" / "locks"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _ensure_locks_dir() -> None:
|
|
39
|
+
"""Ensure locks directory exists."""
|
|
40
|
+
LOCKS_DIR.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _lock_path(target_name: str) -> Path:
|
|
44
|
+
"""Get path to lock file for a target."""
|
|
45
|
+
return LOCKS_DIR / f"{target_name}.lock"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@contextmanager
|
|
49
|
+
def try_acquire_target(target_name: str) -> Iterator[bool]:
|
|
50
|
+
"""Try to acquire exclusive lock on a target.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
target_name: Name of the target to lock
|
|
54
|
+
|
|
55
|
+
Yields:
|
|
56
|
+
True if lock was acquired, False if target is busy
|
|
57
|
+
|
|
58
|
+
The lock is automatically released when the context exits,
|
|
59
|
+
or if the process crashes.
|
|
60
|
+
"""
|
|
61
|
+
_ensure_locks_dir()
|
|
62
|
+
lock_file = _lock_path(target_name)
|
|
63
|
+
|
|
64
|
+
# Open or create lock file
|
|
65
|
+
fd = os.open(str(lock_file), os.O_CREAT | os.O_RDWR)
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
# Try non-blocking exclusive lock
|
|
69
|
+
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
70
|
+
# Write PID to lock file for debugging
|
|
71
|
+
os.ftruncate(fd, 0)
|
|
72
|
+
os.write(fd, f"{os.getpid()}\n".encode())
|
|
73
|
+
try:
|
|
74
|
+
yield True
|
|
75
|
+
finally:
|
|
76
|
+
# Release lock
|
|
77
|
+
fcntl.flock(fd, fcntl.LOCK_UN)
|
|
78
|
+
except BlockingIOError:
|
|
79
|
+
# Lock is held by another process
|
|
80
|
+
yield False
|
|
81
|
+
finally:
|
|
82
|
+
os.close(fd)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@contextmanager
|
|
86
|
+
def acquire_from_pool(target_names: list[str]) -> Iterator[str | None]:
|
|
87
|
+
"""Acquire first available target from a list.
|
|
88
|
+
|
|
89
|
+
Tries each target in order, returns the first one that's available.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
target_names: List of target names to try
|
|
93
|
+
|
|
94
|
+
Yields:
|
|
95
|
+
Name of acquired target, or None if all are busy
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
with acquire_from_pool(["gpu-1", "gpu-2", "gpu-3"]) as target:
|
|
99
|
+
if target:
|
|
100
|
+
print(f"Got {target}")
|
|
101
|
+
run_eval(target)
|
|
102
|
+
else:
|
|
103
|
+
print("All targets busy")
|
|
104
|
+
"""
|
|
105
|
+
_ensure_locks_dir()
|
|
106
|
+
|
|
107
|
+
# Try each target in order
|
|
108
|
+
for target_name in target_names:
|
|
109
|
+
lock_file = _lock_path(target_name)
|
|
110
|
+
fd = os.open(str(lock_file), os.O_CREAT | os.O_RDWR)
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
114
|
+
# Got the lock - write PID and yield
|
|
115
|
+
os.ftruncate(fd, 0)
|
|
116
|
+
os.write(fd, f"{os.getpid()}\n".encode())
|
|
117
|
+
try:
|
|
118
|
+
yield target_name
|
|
119
|
+
return # Success - exit after context
|
|
120
|
+
finally:
|
|
121
|
+
fcntl.flock(fd, fcntl.LOCK_UN)
|
|
122
|
+
os.close(fd)
|
|
123
|
+
except BlockingIOError:
|
|
124
|
+
# This target is busy, try next
|
|
125
|
+
os.close(fd)
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
# All targets busy
|
|
129
|
+
yield None
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def is_target_locked(target_name: str) -> bool:
|
|
133
|
+
"""Check if a target is currently locked.
|
|
134
|
+
|
|
135
|
+
Note: This is a point-in-time check - the lock status can change
|
|
136
|
+
immediately after this returns.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
target_name: Name of the target to check
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
True if target is locked, False if available
|
|
143
|
+
"""
|
|
144
|
+
_ensure_locks_dir()
|
|
145
|
+
lock_file = _lock_path(target_name)
|
|
146
|
+
|
|
147
|
+
if not lock_file.exists():
|
|
148
|
+
return False
|
|
149
|
+
|
|
150
|
+
fd = os.open(str(lock_file), os.O_RDONLY)
|
|
151
|
+
try:
|
|
152
|
+
# Try non-blocking lock
|
|
153
|
+
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
154
|
+
# Got it - so it wasn't locked
|
|
155
|
+
fcntl.flock(fd, fcntl.LOCK_UN)
|
|
156
|
+
return False
|
|
157
|
+
except BlockingIOError:
|
|
158
|
+
return True
|
|
159
|
+
finally:
|
|
160
|
+
os.close(fd)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def get_lock_holder(target_name: str) -> int | None:
|
|
164
|
+
"""Get PID of process holding lock on a target.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
target_name: Name of the target
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
PID of lock holder, or None if not locked or unknown
|
|
171
|
+
"""
|
|
172
|
+
lock_file = _lock_path(target_name)
|
|
173
|
+
|
|
174
|
+
if not lock_file.exists():
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
content = lock_file.read_text().strip()
|
|
179
|
+
return int(content)
|
|
180
|
+
except (ValueError, OSError):
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def list_locked_targets() -> list[str]:
|
|
185
|
+
"""List all currently locked targets.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
List of target names that are currently locked
|
|
189
|
+
"""
|
|
190
|
+
_ensure_locks_dir()
|
|
191
|
+
|
|
192
|
+
locked = []
|
|
193
|
+
for lock_file in LOCKS_DIR.glob("*.lock"):
|
|
194
|
+
target_name = lock_file.stem
|
|
195
|
+
if is_target_locked(target_name):
|
|
196
|
+
locked.append(target_name)
|
|
197
|
+
|
|
198
|
+
return sorted(locked)
|
wafer/targets.py
CHANGED
|
@@ -257,6 +257,164 @@ def get_default_target() -> str | None:
|
|
|
257
257
|
return data.get("default_target")
|
|
258
258
|
|
|
259
259
|
|
|
260
|
+
# ── Pool Management ─────────────────────────────────────────────────────────
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def get_pool(name: str) -> list[str]:
|
|
264
|
+
"""Get list of targets in a named pool.
|
|
265
|
+
|
|
266
|
+
Pools are defined in ~/.wafer/config.toml:
|
|
267
|
+
[pools.my-pool]
|
|
268
|
+
targets = ["target-1", "target-2", "target-3"]
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
name: Pool name
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
List of target names in the pool
|
|
275
|
+
|
|
276
|
+
Raises:
|
|
277
|
+
FileNotFoundError: If pool doesn't exist
|
|
278
|
+
"""
|
|
279
|
+
if not CONFIG_FILE.exists():
|
|
280
|
+
raise FileNotFoundError(f"Pool not found: {name} (no config file)")
|
|
281
|
+
|
|
282
|
+
with open(CONFIG_FILE, "rb") as f:
|
|
283
|
+
data = tomllib.load(f)
|
|
284
|
+
|
|
285
|
+
pools = data.get("pools", {})
|
|
286
|
+
if name not in pools:
|
|
287
|
+
raise FileNotFoundError(
|
|
288
|
+
f"Pool not found: {name}\n"
|
|
289
|
+
f" Define pools in ~/.wafer/config.toml:\n"
|
|
290
|
+
f" [pools.{name}]\n"
|
|
291
|
+
f' targets = ["target-1", "target-2"]'
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
pool_config = pools[name]
|
|
295
|
+
targets = pool_config.get("targets", [])
|
|
296
|
+
|
|
297
|
+
if not targets:
|
|
298
|
+
raise ValueError(f"Pool '{name}' has no targets defined")
|
|
299
|
+
|
|
300
|
+
return targets
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def list_pools() -> list[str]:
|
|
304
|
+
"""List all configured pool names.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
Sorted list of pool names
|
|
308
|
+
"""
|
|
309
|
+
if not CONFIG_FILE.exists():
|
|
310
|
+
return []
|
|
311
|
+
|
|
312
|
+
with open(CONFIG_FILE, "rb") as f:
|
|
313
|
+
data = tomllib.load(f)
|
|
314
|
+
|
|
315
|
+
return sorted(data.get("pools", {}).keys())
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def save_pool(name: str, targets: list[str]) -> None:
|
|
319
|
+
"""Save or update a pool configuration.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
name: Pool name
|
|
323
|
+
targets: List of target names (must all exist)
|
|
324
|
+
|
|
325
|
+
Raises:
|
|
326
|
+
FileNotFoundError: If any target doesn't exist
|
|
327
|
+
"""
|
|
328
|
+
# Verify all targets exist
|
|
329
|
+
existing_targets = list_targets()
|
|
330
|
+
missing = [t for t in targets if t not in existing_targets]
|
|
331
|
+
if missing:
|
|
332
|
+
raise FileNotFoundError(f"Targets not found: {', '.join(missing)}")
|
|
333
|
+
|
|
334
|
+
_ensure_dirs()
|
|
335
|
+
|
|
336
|
+
# Load existing config
|
|
337
|
+
if CONFIG_FILE.exists():
|
|
338
|
+
with open(CONFIG_FILE, "rb") as f:
|
|
339
|
+
data = tomllib.load(f)
|
|
340
|
+
else:
|
|
341
|
+
data = {}
|
|
342
|
+
|
|
343
|
+
# Update pools section
|
|
344
|
+
if "pools" not in data:
|
|
345
|
+
data["pools"] = {}
|
|
346
|
+
|
|
347
|
+
data["pools"][name] = {"targets": targets}
|
|
348
|
+
|
|
349
|
+
# Write back - need custom handling for nested structure
|
|
350
|
+
_write_config_with_pools(data)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _write_config_with_pools(data: dict) -> None:
|
|
354
|
+
"""Write config file with pools support.
|
|
355
|
+
|
|
356
|
+
Handles the nested [pools.name] TOML structure and preserves
|
|
357
|
+
existing nested sections like [default], [api], [environments.*].
|
|
358
|
+
"""
|
|
359
|
+
lines = []
|
|
360
|
+
|
|
361
|
+
# Collect nested sections to write after top-level keys
|
|
362
|
+
nested_sections: dict[str, dict] = {}
|
|
363
|
+
|
|
364
|
+
# Write top-level keys first (except pools and nested dicts)
|
|
365
|
+
for key, value in data.items():
|
|
366
|
+
if key == "pools":
|
|
367
|
+
continue
|
|
368
|
+
if value is None:
|
|
369
|
+
continue
|
|
370
|
+
if isinstance(value, dict):
|
|
371
|
+
# Save nested sections for later
|
|
372
|
+
nested_sections[key] = value
|
|
373
|
+
elif isinstance(value, str):
|
|
374
|
+
lines.append(f'{key} = "{value}"')
|
|
375
|
+
elif isinstance(value, bool):
|
|
376
|
+
lines.append(f"{key} = {str(value).lower()}")
|
|
377
|
+
elif isinstance(value, int | float):
|
|
378
|
+
lines.append(f"{key} = {value}")
|
|
379
|
+
elif isinstance(value, list):
|
|
380
|
+
if all(isinstance(v, int) for v in value):
|
|
381
|
+
lines.append(f"{key} = {value}")
|
|
382
|
+
else:
|
|
383
|
+
formatted = ", ".join(f'"{v}"' if isinstance(v, str) else str(v) for v in value)
|
|
384
|
+
lines.append(f"{key} = [{formatted}]")
|
|
385
|
+
|
|
386
|
+
# Write nested sections (e.g., [default], [api], [environments.foo])
|
|
387
|
+
for section_name, section_data in nested_sections.items():
|
|
388
|
+
lines.append("")
|
|
389
|
+
lines.append(f"[{section_name}]")
|
|
390
|
+
for key, value in section_data.items():
|
|
391
|
+
if value is None:
|
|
392
|
+
continue
|
|
393
|
+
if isinstance(value, str):
|
|
394
|
+
lines.append(f'{key} = "{value}"')
|
|
395
|
+
elif isinstance(value, bool):
|
|
396
|
+
lines.append(f"{key} = {str(value).lower()}")
|
|
397
|
+
elif isinstance(value, int | float):
|
|
398
|
+
lines.append(f"{key} = {value}")
|
|
399
|
+
elif isinstance(value, list):
|
|
400
|
+
if all(isinstance(v, int) for v in value):
|
|
401
|
+
lines.append(f"{key} = {value}")
|
|
402
|
+
else:
|
|
403
|
+
formatted = ", ".join(f'"{v}"' if isinstance(v, str) else str(v) for v in value)
|
|
404
|
+
lines.append(f"{key} = [{formatted}]")
|
|
405
|
+
|
|
406
|
+
# Write pools
|
|
407
|
+
pools = data.get("pools", {})
|
|
408
|
+
for pool_name, pool_config in pools.items():
|
|
409
|
+
lines.append("")
|
|
410
|
+
lines.append(f"[pools.{pool_name}]")
|
|
411
|
+
targets = pool_config.get("targets", [])
|
|
412
|
+
formatted = ", ".join(f'"{t}"' for t in targets)
|
|
413
|
+
lines.append(f"targets = [{formatted}]")
|
|
414
|
+
|
|
415
|
+
CONFIG_FILE.write_text("\n".join(lines) + "\n")
|
|
416
|
+
|
|
417
|
+
|
|
260
418
|
def set_default_target(name: str) -> None:
|
|
261
419
|
"""Set default target.
|
|
262
420
|
|
|
@@ -5,10 +5,10 @@ wafer/api_client.py,sha256=cPULiTxqOAYYSfDTNJgd-6Pqrt3IM4Gm9903U7yGIwY,6163
|
|
|
5
5
|
wafer/auth.py,sha256=ZLsXZ73GDLD8GL7Rij1ELtuLqyJ5EU_uPBUMPVKwExA,10703
|
|
6
6
|
wafer/autotuner.py,sha256=6gH0Ho7T58EFerMQcHQxshWe3DF4qU7fb5xthAh5SPM,44364
|
|
7
7
|
wafer/billing.py,sha256=jbLB2lI4_9f2KD8uEFDi_ixLlowe5hasC0TIZJyIXRg,7163
|
|
8
|
-
wafer/cli.py,sha256=
|
|
8
|
+
wafer/cli.py,sha256=rdR84w5ubXO1W2xnrURTrX3AtXy3VeUFVekeGw0djyA,211114
|
|
9
9
|
wafer/config.py,sha256=h5Eo9_yfWqWGoPNdVQikI9GoZVUeysunSYiixf1mKcw,3411
|
|
10
10
|
wafer/corpus.py,sha256=yTF3UA5bOa8BII2fmcXf-3WsIsM5DX4etysv0AzVknE,8912
|
|
11
|
-
wafer/evaluate.py,sha256=
|
|
11
|
+
wafer/evaluate.py,sha256=D_0WqIw1HysH7XXiyjxRpv6BUuOoPljN9-1vjPt4xFo,166765
|
|
12
12
|
wafer/global_config.py,sha256=fhaR_RU3ufMksDmOohH1OLeQ0JT0SDW1hEip_zaP75k,11345
|
|
13
13
|
wafer/gpu_run.py,sha256=TwqXy72T7f2I7e6n5WWod3xgxCPnDhU0BgLsB4CUoQY,9716
|
|
14
14
|
wafer/inference.py,sha256=tZCO5i05FKY27ewis3CSBHFBeFbXY3xwj0DSjdoMY9s,4314
|
|
@@ -18,7 +18,8 @@ wafer/problems.py,sha256=ce2sy10A1nnNUG3VGsseTS8jL7LZsku4dE8zVf9JHQ4,11296
|
|
|
18
18
|
wafer/rocprof_compute.py,sha256=Tu16Vb05b2grvheFWi1XLGlAr6m48NEDeZoDyw_4Uzw,19885
|
|
19
19
|
wafer/rocprof_sdk.py,sha256=fAYCxpfJa5BZTTkIMBOXg4KsYK4i_wNOKrJJn1ZfypM,10086
|
|
20
20
|
wafer/rocprof_systems.py,sha256=4IWbMcbYk1x_8iS7P3FC_u5sgH6EXADCtR2lV9id80M,18629
|
|
21
|
-
wafer/
|
|
21
|
+
wafer/target_lock.py,sha256=QW0NMlu9Paa28O5iSAvGtN11j3kU2di0lADBrfwr2js,5160
|
|
22
|
+
wafer/targets.py,sha256=JlLvi18IHtOkgtBdkv_nUrzBweVmFoOQH-9tQW5s1yQ,15250
|
|
22
23
|
wafer/tracelens.py,sha256=g9ZIeFyNojZn4uTd3skPqIrRiL7aMJOz_-GOd3aiyy4,7998
|
|
23
24
|
wafer/wevin_cli.py,sha256=1_o2P47namZmPkbt47TnyYDmwhEzQYbSg5zjHffu2JQ,16802
|
|
24
25
|
wafer/workspaces.py,sha256=92LG1mtkzNz-ap3XzcqY6KnQ9SUCFG8VBIOUj1Who64,25757
|
|
@@ -27,8 +28,8 @@ wafer/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
27
28
|
wafer/templates/ask_docs.py,sha256=Lxs-faz9v5m4Qa4NjF2X_lE8KwM9ES9MNJkxo7ep56o,2256
|
|
28
29
|
wafer/templates/optimize_kernel.py,sha256=u6AL7Q3uttqlnBLzcoFdsiPq5lV2TV3bgqwCYYlK9gk,2357
|
|
29
30
|
wafer/templates/trace_analyze.py,sha256=XE1VqzVkIUsZbXF8EzQdDYgg-AZEYAOFpr6B_vnRELc,2880
|
|
30
|
-
wafer_cli-0.2.
|
|
31
|
-
wafer_cli-0.2.
|
|
32
|
-
wafer_cli-0.2.
|
|
33
|
-
wafer_cli-0.2.
|
|
34
|
-
wafer_cli-0.2.
|
|
31
|
+
wafer_cli-0.2.6.dist-info/METADATA,sha256=I72dRpc7rN96uFpTsyJkaJ0gS55LA9xpugMUGo2Dayw,559
|
|
32
|
+
wafer_cli-0.2.6.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
33
|
+
wafer_cli-0.2.6.dist-info/entry_points.txt,sha256=WqB7hB__WhtPY8y1cO2sZiUz7fCq6Ik-usAigpeFvWE,41
|
|
34
|
+
wafer_cli-0.2.6.dist-info/top_level.txt,sha256=2MK1IVMWfpLL8BZCQ3E9aG6L6L666gSA_teYlwan4fs,6
|
|
35
|
+
wafer_cli-0.2.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|