wafer-cli 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wafer/target_lock.py ADDED
@@ -0,0 +1,198 @@
1
+ """Target locking for concurrent access control.
2
+
3
+ Uses file locks (fcntl.flock) to ensure only one process uses a target at a time.
4
+ Locks are automatically released when the process exits or crashes.
5
+
6
+ Usage:
7
+ # Try to acquire a single target
8
+ with try_acquire_target("mi300x-1") as acquired:
9
+ if acquired:
10
+ # Got the lock, run eval
11
+ ...
12
+ else:
13
+ # Target busy
14
+ ...
15
+
16
+ # Acquire first available from a pool
17
+ with acquire_from_pool(["mi300x-1", "mi300x-2", "mi300x-3"]) as target:
18
+ if target:
19
+ # Got a target, run eval
20
+ ...
21
+ else:
22
+ # All targets busy
23
+ ...
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import fcntl
29
+ import os
30
+ from contextlib import contextmanager
31
+ from pathlib import Path
32
+ from typing import Iterator
33
+
34
+ # Lock directory
35
+ LOCKS_DIR = Path.home() / ".wafer" / "locks"
36
+
37
+
38
+ def _ensure_locks_dir() -> None:
39
+ """Ensure locks directory exists."""
40
+ LOCKS_DIR.mkdir(parents=True, exist_ok=True)
41
+
42
+
43
+ def _lock_path(target_name: str) -> Path:
44
+ """Get path to lock file for a target."""
45
+ return LOCKS_DIR / f"{target_name}.lock"
46
+
47
+
48
+ @contextmanager
49
+ def try_acquire_target(target_name: str) -> Iterator[bool]:
50
+ """Try to acquire exclusive lock on a target.
51
+
52
+ Args:
53
+ target_name: Name of the target to lock
54
+
55
+ Yields:
56
+ True if lock was acquired, False if target is busy
57
+
58
+ The lock is automatically released when the context exits,
59
+ or if the process crashes.
60
+ """
61
+ _ensure_locks_dir()
62
+ lock_file = _lock_path(target_name)
63
+
64
+ # Open or create lock file
65
+ fd = os.open(str(lock_file), os.O_CREAT | os.O_RDWR)
66
+
67
+ try:
68
+ # Try non-blocking exclusive lock
69
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
70
+ # Write PID to lock file for debugging
71
+ os.ftruncate(fd, 0)
72
+ os.write(fd, f"{os.getpid()}\n".encode())
73
+ try:
74
+ yield True
75
+ finally:
76
+ # Release lock
77
+ fcntl.flock(fd, fcntl.LOCK_UN)
78
+ except BlockingIOError:
79
+ # Lock is held by another process
80
+ yield False
81
+ finally:
82
+ os.close(fd)
83
+
84
+
85
+ @contextmanager
86
+ def acquire_from_pool(target_names: list[str]) -> Iterator[str | None]:
87
+ """Acquire first available target from a list.
88
+
89
+ Tries each target in order, returns the first one that's available.
90
+
91
+ Args:
92
+ target_names: List of target names to try
93
+
94
+ Yields:
95
+ Name of acquired target, or None if all are busy
96
+
97
+ Example:
98
+ with acquire_from_pool(["gpu-1", "gpu-2", "gpu-3"]) as target:
99
+ if target:
100
+ print(f"Got {target}")
101
+ run_eval(target)
102
+ else:
103
+ print("All targets busy")
104
+ """
105
+ _ensure_locks_dir()
106
+
107
+ # Try each target in order
108
+ for target_name in target_names:
109
+ lock_file = _lock_path(target_name)
110
+ fd = os.open(str(lock_file), os.O_CREAT | os.O_RDWR)
111
+
112
+ try:
113
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
114
+ # Got the lock - write PID and yield
115
+ os.ftruncate(fd, 0)
116
+ os.write(fd, f"{os.getpid()}\n".encode())
117
+ try:
118
+ yield target_name
119
+ return # Success - exit after context
120
+ finally:
121
+ fcntl.flock(fd, fcntl.LOCK_UN)
122
+ os.close(fd)
123
+ except BlockingIOError:
124
+ # This target is busy, try next
125
+ os.close(fd)
126
+ continue
127
+
128
+ # All targets busy
129
+ yield None
130
+
131
+
132
+ def is_target_locked(target_name: str) -> bool:
133
+ """Check if a target is currently locked.
134
+
135
+ Note: This is a point-in-time check - the lock status can change
136
+ immediately after this returns.
137
+
138
+ Args:
139
+ target_name: Name of the target to check
140
+
141
+ Returns:
142
+ True if target is locked, False if available
143
+ """
144
+ _ensure_locks_dir()
145
+ lock_file = _lock_path(target_name)
146
+
147
+ if not lock_file.exists():
148
+ return False
149
+
150
+ fd = os.open(str(lock_file), os.O_RDONLY)
151
+ try:
152
+ # Try non-blocking lock
153
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
154
+ # Got it - so it wasn't locked
155
+ fcntl.flock(fd, fcntl.LOCK_UN)
156
+ return False
157
+ except BlockingIOError:
158
+ return True
159
+ finally:
160
+ os.close(fd)
161
+
162
+
163
+ def get_lock_holder(target_name: str) -> int | None:
164
+ """Get PID of process holding lock on a target.
165
+
166
+ Args:
167
+ target_name: Name of the target
168
+
169
+ Returns:
170
+ PID of lock holder, or None if not locked or unknown
171
+ """
172
+ lock_file = _lock_path(target_name)
173
+
174
+ if not lock_file.exists():
175
+ return None
176
+
177
+ try:
178
+ content = lock_file.read_text().strip()
179
+ return int(content)
180
+ except (ValueError, OSError):
181
+ return None
182
+
183
+
184
+ def list_locked_targets() -> list[str]:
185
+ """List all currently locked targets.
186
+
187
+ Returns:
188
+ List of target names that are currently locked
189
+ """
190
+ _ensure_locks_dir()
191
+
192
+ locked = []
193
+ for lock_file in LOCKS_DIR.glob("*.lock"):
194
+ target_name = lock_file.stem
195
+ if is_target_locked(target_name):
196
+ locked.append(target_name)
197
+
198
+ return sorted(locked)
wafer/targets.py CHANGED
@@ -257,6 +257,164 @@ def get_default_target() -> str | None:
257
257
  return data.get("default_target")
258
258
 
259
259
 
260
+ # ── Pool Management ─────────────────────────────────────────────────────────
261
+
262
+
263
+ def get_pool(name: str) -> list[str]:
264
+ """Get list of targets in a named pool.
265
+
266
+ Pools are defined in ~/.wafer/config.toml:
267
+ [pools.my-pool]
268
+ targets = ["target-1", "target-2", "target-3"]
269
+
270
+ Args:
271
+ name: Pool name
272
+
273
+ Returns:
274
+ List of target names in the pool
275
+
276
+ Raises:
277
+ FileNotFoundError: If pool doesn't exist
278
+ """
279
+ if not CONFIG_FILE.exists():
280
+ raise FileNotFoundError(f"Pool not found: {name} (no config file)")
281
+
282
+ with open(CONFIG_FILE, "rb") as f:
283
+ data = tomllib.load(f)
284
+
285
+ pools = data.get("pools", {})
286
+ if name not in pools:
287
+ raise FileNotFoundError(
288
+ f"Pool not found: {name}\n"
289
+ f" Define pools in ~/.wafer/config.toml:\n"
290
+ f" [pools.{name}]\n"
291
+ f' targets = ["target-1", "target-2"]'
292
+ )
293
+
294
+ pool_config = pools[name]
295
+ targets = pool_config.get("targets", [])
296
+
297
+ if not targets:
298
+ raise ValueError(f"Pool '{name}' has no targets defined")
299
+
300
+ return targets
301
+
302
+
303
+ def list_pools() -> list[str]:
304
+ """List all configured pool names.
305
+
306
+ Returns:
307
+ Sorted list of pool names
308
+ """
309
+ if not CONFIG_FILE.exists():
310
+ return []
311
+
312
+ with open(CONFIG_FILE, "rb") as f:
313
+ data = tomllib.load(f)
314
+
315
+ return sorted(data.get("pools", {}).keys())
316
+
317
+
318
+ def save_pool(name: str, targets: list[str]) -> None:
319
+ """Save or update a pool configuration.
320
+
321
+ Args:
322
+ name: Pool name
323
+ targets: List of target names (must all exist)
324
+
325
+ Raises:
326
+ FileNotFoundError: If any target doesn't exist
327
+ """
328
+ # Verify all targets exist
329
+ existing_targets = list_targets()
330
+ missing = [t for t in targets if t not in existing_targets]
331
+ if missing:
332
+ raise FileNotFoundError(f"Targets not found: {', '.join(missing)}")
333
+
334
+ _ensure_dirs()
335
+
336
+ # Load existing config
337
+ if CONFIG_FILE.exists():
338
+ with open(CONFIG_FILE, "rb") as f:
339
+ data = tomllib.load(f)
340
+ else:
341
+ data = {}
342
+
343
+ # Update pools section
344
+ if "pools" not in data:
345
+ data["pools"] = {}
346
+
347
+ data["pools"][name] = {"targets": targets}
348
+
349
+ # Write back - need custom handling for nested structure
350
+ _write_config_with_pools(data)
351
+
352
+
353
+ def _write_config_with_pools(data: dict) -> None:
354
+ """Write config file with pools support.
355
+
356
+ Handles the nested [pools.name] TOML structure and preserves
357
+ existing nested sections like [default], [api], [environments.*].
358
+ """
359
+ lines = []
360
+
361
+ # Collect nested sections to write after top-level keys
362
+ nested_sections: dict[str, dict] = {}
363
+
364
+ # Write top-level keys first (except pools and nested dicts)
365
+ for key, value in data.items():
366
+ if key == "pools":
367
+ continue
368
+ if value is None:
369
+ continue
370
+ if isinstance(value, dict):
371
+ # Save nested sections for later
372
+ nested_sections[key] = value
373
+ elif isinstance(value, str):
374
+ lines.append(f'{key} = "{value}"')
375
+ elif isinstance(value, bool):
376
+ lines.append(f"{key} = {str(value).lower()}")
377
+ elif isinstance(value, int | float):
378
+ lines.append(f"{key} = {value}")
379
+ elif isinstance(value, list):
380
+ if all(isinstance(v, int) for v in value):
381
+ lines.append(f"{key} = {value}")
382
+ else:
383
+ formatted = ", ".join(f'"{v}"' if isinstance(v, str) else str(v) for v in value)
384
+ lines.append(f"{key} = [{formatted}]")
385
+
386
+ # Write nested sections (e.g., [default], [api], [environments.foo])
387
+ for section_name, section_data in nested_sections.items():
388
+ lines.append("")
389
+ lines.append(f"[{section_name}]")
390
+ for key, value in section_data.items():
391
+ if value is None:
392
+ continue
393
+ if isinstance(value, str):
394
+ lines.append(f'{key} = "{value}"')
395
+ elif isinstance(value, bool):
396
+ lines.append(f"{key} = {str(value).lower()}")
397
+ elif isinstance(value, int | float):
398
+ lines.append(f"{key} = {value}")
399
+ elif isinstance(value, list):
400
+ if all(isinstance(v, int) for v in value):
401
+ lines.append(f"{key} = {value}")
402
+ else:
403
+ formatted = ", ".join(f'"{v}"' if isinstance(v, str) else str(v) for v in value)
404
+ lines.append(f"{key} = [{formatted}]")
405
+
406
+ # Write pools
407
+ pools = data.get("pools", {})
408
+ for pool_name, pool_config in pools.items():
409
+ lines.append("")
410
+ lines.append(f"[pools.{pool_name}]")
411
+ targets = pool_config.get("targets", [])
412
+ formatted = ", ".join(f'"{t}"' for t in targets)
413
+ lines.append(f"targets = [{formatted}]")
414
+
415
+ CONFIG_FILE.write_text("\n".join(lines) + "\n")
416
+
417
+
260
418
  def set_default_target(name: str) -> None:
261
419
  """Set default target.
262
420
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wafer-cli
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: CLI tool for running commands on remote GPUs and GPU kernel optimization agent
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: typer>=0.12.0
@@ -5,10 +5,10 @@ wafer/api_client.py,sha256=cPULiTxqOAYYSfDTNJgd-6Pqrt3IM4Gm9903U7yGIwY,6163
5
5
  wafer/auth.py,sha256=ZLsXZ73GDLD8GL7Rij1ELtuLqyJ5EU_uPBUMPVKwExA,10703
6
6
  wafer/autotuner.py,sha256=6gH0Ho7T58EFerMQcHQxshWe3DF4qU7fb5xthAh5SPM,44364
7
7
  wafer/billing.py,sha256=jbLB2lI4_9f2KD8uEFDi_ixLlowe5hasC0TIZJyIXRg,7163
8
- wafer/cli.py,sha256=19BwYcZo6QfJWfGwkSXNFx__63ZjZM22woL66fNQhIM,200512
8
+ wafer/cli.py,sha256=rdR84w5ubXO1W2xnrURTrX3AtXy3VeUFVekeGw0djyA,211114
9
9
  wafer/config.py,sha256=h5Eo9_yfWqWGoPNdVQikI9GoZVUeysunSYiixf1mKcw,3411
10
10
  wafer/corpus.py,sha256=yTF3UA5bOa8BII2fmcXf-3WsIsM5DX4etysv0AzVknE,8912
11
- wafer/evaluate.py,sha256=sGtEkLmZ8wEeC2BRYMoCbCPIlQvuv6tG77910gbIygc,135553
11
+ wafer/evaluate.py,sha256=D_0WqIw1HysH7XXiyjxRpv6BUuOoPljN9-1vjPt4xFo,166765
12
12
  wafer/global_config.py,sha256=fhaR_RU3ufMksDmOohH1OLeQ0JT0SDW1hEip_zaP75k,11345
13
13
  wafer/gpu_run.py,sha256=TwqXy72T7f2I7e6n5WWod3xgxCPnDhU0BgLsB4CUoQY,9716
14
14
  wafer/inference.py,sha256=tZCO5i05FKY27ewis3CSBHFBeFbXY3xwj0DSjdoMY9s,4314
@@ -18,7 +18,8 @@ wafer/problems.py,sha256=ce2sy10A1nnNUG3VGsseTS8jL7LZsku4dE8zVf9JHQ4,11296
18
18
  wafer/rocprof_compute.py,sha256=Tu16Vb05b2grvheFWi1XLGlAr6m48NEDeZoDyw_4Uzw,19885
19
19
  wafer/rocprof_sdk.py,sha256=fAYCxpfJa5BZTTkIMBOXg4KsYK4i_wNOKrJJn1ZfypM,10086
20
20
  wafer/rocprof_systems.py,sha256=4IWbMcbYk1x_8iS7P3FC_u5sgH6EXADCtR2lV9id80M,18629
21
- wafer/targets.py,sha256=WE5TJgFPGtEIh7VaTQHZ4wB2t4kW0c5K8-UmQ_39Ock,10254
21
+ wafer/target_lock.py,sha256=QW0NMlu9Paa28O5iSAvGtN11j3kU2di0lADBrfwr2js,5160
22
+ wafer/targets.py,sha256=JlLvi18IHtOkgtBdkv_nUrzBweVmFoOQH-9tQW5s1yQ,15250
22
23
  wafer/tracelens.py,sha256=g9ZIeFyNojZn4uTd3skPqIrRiL7aMJOz_-GOd3aiyy4,7998
23
24
  wafer/wevin_cli.py,sha256=1_o2P47namZmPkbt47TnyYDmwhEzQYbSg5zjHffu2JQ,16802
24
25
  wafer/workspaces.py,sha256=92LG1mtkzNz-ap3XzcqY6KnQ9SUCFG8VBIOUj1Who64,25757
@@ -27,8 +28,8 @@ wafer/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
28
  wafer/templates/ask_docs.py,sha256=Lxs-faz9v5m4Qa4NjF2X_lE8KwM9ES9MNJkxo7ep56o,2256
28
29
  wafer/templates/optimize_kernel.py,sha256=u6AL7Q3uttqlnBLzcoFdsiPq5lV2TV3bgqwCYYlK9gk,2357
29
30
  wafer/templates/trace_analyze.py,sha256=XE1VqzVkIUsZbXF8EzQdDYgg-AZEYAOFpr6B_vnRELc,2880
30
- wafer_cli-0.2.4.dist-info/METADATA,sha256=OqNyd57hfOUZvBWFd8iMdrfH1VICqcqQf_K1sHZVhfA,559
31
- wafer_cli-0.2.4.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
32
- wafer_cli-0.2.4.dist-info/entry_points.txt,sha256=WqB7hB__WhtPY8y1cO2sZiUz7fCq6Ik-usAigpeFvWE,41
33
- wafer_cli-0.2.4.dist-info/top_level.txt,sha256=2MK1IVMWfpLL8BZCQ3E9aG6L6L666gSA_teYlwan4fs,6
34
- wafer_cli-0.2.4.dist-info/RECORD,,
31
+ wafer_cli-0.2.5.dist-info/METADATA,sha256=O1JyAJtPtr5j4sz5jKiDfyB4d14Mb3tSSDEnQftyzwE,559
32
+ wafer_cli-0.2.5.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
33
+ wafer_cli-0.2.5.dist-info/entry_points.txt,sha256=WqB7hB__WhtPY8y1cO2sZiUz7fCq6Ik-usAigpeFvWE,41
34
+ wafer_cli-0.2.5.dist-info/top_level.txt,sha256=2MK1IVMWfpLL8BZCQ3E9aG6L6L666gSA_teYlwan4fs,6
35
+ wafer_cli-0.2.5.dist-info/RECORD,,