yanex 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,625 @@
1
+ """
2
+ Run command implementation for yanex CLI.
3
+ """
4
+
5
+ import json
6
+ import os
7
+ import subprocess
8
+ import sys
9
+ import threading
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ import click
14
+
15
+ from ...core.config import expand_parameter_sweeps, has_sweep_parameters
16
+ from ...core.manager import ExperimentManager
17
+
18
+
19
+ @click.command()
20
+ @click.argument("script", type=click.Path(exists=True, path_type=Path), required=False)
21
+ @click.option(
22
+ "--config",
23
+ "-c",
24
+ type=click.Path(exists=True, path_type=Path),
25
+ help="Configuration file (YAML/JSON)",
26
+ )
27
+ @click.option(
28
+ "--param",
29
+ "-p",
30
+ multiple=True,
31
+ help="Parameter override in format key=value (repeatable)",
32
+ )
33
+ @click.option("--name", "-n", help="Experiment name")
34
+ @click.option("--tag", "-t", multiple=True, help="Experiment tag (repeatable)")
35
+ @click.option("--description", "-d", help="Experiment description")
36
+ @click.option("--dry-run", is_flag=True, help="Validate configuration without running")
37
+ @click.option(
38
+ "--ignore-dirty",
39
+ is_flag=True,
40
+ help="Allow running with uncommitted changes (bypasses git cleanliness check)",
41
+ )
42
+ @click.option(
43
+ "--stage",
44
+ is_flag=True,
45
+ help="Stage experiment for later execution instead of running immediately",
46
+ )
47
+ @click.option(
48
+ "--staged",
49
+ is_flag=True,
50
+ help="Execute staged experiments",
51
+ )
52
+ @click.pass_context
53
+ def run(
54
+ ctx: click.Context,
55
+ script: Optional[Path],
56
+ config: Optional[Path],
57
+ param: List[str],
58
+ name: Optional[str],
59
+ tag: List[str],
60
+ description: Optional[str],
61
+ dry_run: bool,
62
+ ignore_dirty: bool,
63
+ stage: bool,
64
+ staged: bool,
65
+ ) -> None:
66
+ """
67
+ Run a script as a tracked experiment.
68
+
69
+ SCRIPT is the path to the Python script to execute.
70
+
71
+ Examples:
72
+
73
+ # Basic run
74
+ yanex run train.py
75
+
76
+ # With configuration file
77
+ yanex run train.py --config config.yaml
78
+
79
+ # With parameter overrides
80
+ yanex run train.py --param learning_rate=0.01 --param epochs=100
81
+
82
+ # Parameter sweeps (requires --stage)
83
+ yanex run train.py --param "lr=range(0.01, 0.1, 0.01)" --stage
84
+ yanex run train.py --param "lr=linspace(0.001, 0.1, 5)" --stage
85
+ yanex run train.py --param "lr=logspace(-3, -1, 3)" --stage
86
+ yanex run train.py --param "batch_size=list(16, 32, 64)" --stage
87
+
88
+ # Multi-parameter sweep (cross-product)
89
+ yanex run train.py \\
90
+ --param "lr=range(0.01, 0.1, 0.01)" \\
91
+ --param "batch_size=list(16, 32, 64)" \\
92
+ --stage
93
+
94
+ # Execute staged experiments
95
+ yanex run --staged
96
+
97
+ # Full experiment setup
98
+ yanex run train.py \\
99
+ --config config.yaml \\
100
+ --param learning_rate=0.01 \\
101
+ --name "lr-tuning" \\
102
+ --tag "hyperopt" \\
103
+ --description "Learning rate optimization"
104
+ """
105
+ from .._utils import (
106
+ load_and_merge_config,
107
+ validate_experiment_config,
108
+ validate_sweep_requirements,
109
+ )
110
+
111
+ verbose = ctx.obj.get("verbose", False)
112
+
113
+ # Handle mutually exclusive flags
114
+ if stage and staged:
115
+ click.echo("Error: Cannot use both --stage and --staged flags", err=True)
116
+ raise click.Abort()
117
+
118
+ if staged:
119
+ # Execute staged experiments
120
+ _execute_staged_experiments(verbose)
121
+ return
122
+
123
+ # Validate script is provided when not using --staged
124
+ if script is None:
125
+ click.echo("Error: Missing argument 'SCRIPT'", err=True)
126
+ click.echo("Try 'yanex run --help' for help.", err=True)
127
+ raise click.Abort()
128
+
129
+ if verbose:
130
+ click.echo(f"Running script: {script}")
131
+ if config:
132
+ click.echo(f"Using config: {config}")
133
+ if param:
134
+ click.echo(f"Parameter overrides: {param}")
135
+
136
+ try:
137
+ # Load and merge configuration
138
+ merged_config = load_and_merge_config(
139
+ config_path=config, param_overrides=list(param), verbose=verbose
140
+ )
141
+
142
+ if verbose:
143
+ click.echo(f"Merged configuration: {merged_config}")
144
+
145
+ # Validate configuration
146
+ validate_experiment_config(
147
+ script=script,
148
+ name=name,
149
+ tags=list(tag),
150
+ description=description,
151
+ config=merged_config,
152
+ )
153
+
154
+ # Validate sweep requirements
155
+ validate_sweep_requirements(merged_config, stage)
156
+
157
+ if dry_run:
158
+ click.echo("✓ Configuration validation passed")
159
+ click.echo("Dry run completed - experiment would be created with:")
160
+ click.echo(f" Script: {script}")
161
+ click.echo(f" Name: {name}")
162
+ click.echo(f" Tags: {list(tag)}")
163
+ click.echo(f" Description: {description}")
164
+ click.echo(f" Config: {merged_config}")
165
+ return
166
+
167
+ # Phase 3: Execute or stage experiment
168
+ if stage:
169
+ _stage_experiment(
170
+ script=script,
171
+ name=name,
172
+ tags=list(tag),
173
+ description=description,
174
+ config=merged_config,
175
+ verbose=verbose,
176
+ ignore_dirty=ignore_dirty,
177
+ )
178
+ else:
179
+ _execute_experiment(
180
+ script=script,
181
+ name=name,
182
+ tags=list(tag),
183
+ description=description,
184
+ config=merged_config,
185
+ verbose=verbose,
186
+ ignore_dirty=ignore_dirty,
187
+ )
188
+
189
+ except Exception as e:
190
+ click.echo(f"Error: {e}", err=True)
191
+ raise click.Abort() from e
192
+
193
+
194
+ def _execute_experiment(
195
+ script: Path,
196
+ name: Optional[str],
197
+ tags: List[str],
198
+ description: Optional[str],
199
+ config: Dict[str, Any],
200
+ verbose: bool = False,
201
+ ignore_dirty: bool = False,
202
+ ) -> None:
203
+ """Execute script as an experiment with proper lifecycle management."""
204
+
205
+ # Create experiment
206
+ manager = ExperimentManager()
207
+ experiment_id = manager.create_experiment(
208
+ script_path=script,
209
+ name=name,
210
+ config=config,
211
+ tags=tags,
212
+ description=description,
213
+ allow_dirty=ignore_dirty,
214
+ )
215
+
216
+ if verbose:
217
+ click.echo(f"Created experiment: {experiment_id}")
218
+
219
+ # Start experiment
220
+ manager.start_experiment(experiment_id)
221
+
222
+ try:
223
+ # Prepare environment for subprocess
224
+ env = os.environ.copy()
225
+ env["YANEX_EXPERIMENT_ID"] = experiment_id
226
+ env["YANEX_CLI_ACTIVE"] = "1" # Mark as CLI context
227
+
228
+ # Add parameters as environment variables
229
+ for key, value in config.items():
230
+ env[f"YANEX_PARAM_{key}"] = (
231
+ json.dumps(value) if not isinstance(value, str) else value
232
+ )
233
+
234
+ if verbose:
235
+ click.echo(f"Starting script execution: {script}")
236
+
237
+ # Execute script with real-time output streaming
238
+ stdout_capture = []
239
+ stderr_capture = []
240
+
241
+ process = subprocess.Popen(
242
+ [sys.executable, str(script.resolve())],
243
+ env=env,
244
+ stdout=subprocess.PIPE,
245
+ stderr=subprocess.PIPE,
246
+ text=True,
247
+ cwd=Path.cwd(),
248
+ )
249
+
250
+ def stream_output(pipe, capture_list, output_stream):
251
+ """Stream output line by line while capturing it."""
252
+ for line in iter(pipe.readline, ""):
253
+ # Display in real-time
254
+ output_stream.write(line)
255
+ output_stream.flush()
256
+ # Capture for later saving
257
+ capture_list.append(line)
258
+ pipe.close()
259
+
260
+ # Start threads for stdout and stderr streaming
261
+ stdout_thread = threading.Thread(
262
+ target=stream_output, args=(process.stdout, stdout_capture, sys.stdout)
263
+ )
264
+ stderr_thread = threading.Thread(
265
+ target=stream_output, args=(process.stderr, stderr_capture, sys.stderr)
266
+ )
267
+
268
+ stdout_thread.start()
269
+ stderr_thread.start()
270
+
271
+ # Wait for process completion
272
+ return_code = process.wait()
273
+
274
+ # Wait for output threads to finish
275
+ stdout_thread.join()
276
+ stderr_thread.join()
277
+
278
+ # Save captured output as artifacts
279
+ stdout_text = "".join(stdout_capture)
280
+ stderr_text = "".join(stderr_capture)
281
+
282
+ if stdout_text:
283
+ manager.storage.save_text_artifact(experiment_id, "stdout.txt", stdout_text)
284
+ if stderr_text:
285
+ manager.storage.save_text_artifact(experiment_id, "stderr.txt", stderr_text)
286
+
287
+ # Handle experiment result based on exit code
288
+ if return_code == 0:
289
+ manager.complete_experiment(experiment_id)
290
+ exp_dir = manager.storage.get_experiment_directory(experiment_id)
291
+ click.echo(f"✓ Experiment completed successfully: {experiment_id}")
292
+ click.echo(f" Directory: {exp_dir}")
293
+ else:
294
+ error_msg = f"Script exited with code {return_code}"
295
+ if stderr_text:
296
+ error_msg += f": {stderr_text.strip()}"
297
+ manager.fail_experiment(experiment_id, error_msg)
298
+ exp_dir = manager.storage.get_experiment_directory(experiment_id)
299
+ click.echo(f"✗ Experiment failed: {experiment_id}")
300
+ click.echo(f" Directory: {exp_dir}")
301
+ click.echo(f"Error: {error_msg}")
302
+ raise click.Abort()
303
+
304
+ except KeyboardInterrupt:
305
+ # Terminate the process and wait for threads
306
+ if "process" in locals():
307
+ process.terminate()
308
+ process.wait()
309
+ manager.cancel_experiment(experiment_id, "Interrupted by user (Ctrl+C)")
310
+ click.echo(f"✗ Experiment cancelled: {experiment_id}")
311
+ raise
312
+
313
+ except Exception as e:
314
+ manager.fail_experiment(experiment_id, f"Unexpected error: {str(e)}")
315
+ click.echo(f"✗ Experiment failed: {experiment_id}")
316
+ click.echo(f"Error: {e}")
317
+ raise click.Abort() from e
318
+
319
+
320
+ def _generate_sweep_experiment_name(
321
+ base_name: Optional[str], config: Dict[str, Any]
322
+ ) -> str:
323
+ """
324
+ Generate a descriptive name for a sweep experiment based on its parameters.
325
+
326
+ Args:
327
+ base_name: Base experiment name (can be None)
328
+ config: Configuration dictionary with parameter values
329
+
330
+ Returns:
331
+ Generated experiment name with parameter suffixes
332
+ """
333
+ # Start with base name or default
334
+ if base_name:
335
+ name_parts = [base_name]
336
+ else:
337
+ name_parts = ["sweep"]
338
+
339
+ # Extract parameter name-value pairs
340
+ param_parts = []
341
+
342
+ def extract_params(d: Dict[str, Any], prefix: str = "") -> None:
343
+ for key, value in d.items():
344
+ if isinstance(value, dict):
345
+ # Handle nested parameters
346
+ new_prefix = f"{prefix}_{key}" if prefix else key
347
+ extract_params(value, new_prefix)
348
+ else:
349
+ # Format parameter name
350
+ param_name = f"{prefix}_{key}" if prefix else key
351
+
352
+ # Format parameter value
353
+ if isinstance(value, bool):
354
+ param_value = str(value).lower()
355
+ elif isinstance(value, (int, float)):
356
+ # Format numbers with reasonable precision
357
+ if isinstance(value, float):
358
+ # Remove trailing zeros and unnecessary decimal point
359
+ if value == int(value):
360
+ param_value = str(int(value))
361
+ else:
362
+ formatted = f"{value:.6g}" # Up to 6 significant digits
363
+ # Replace dots with 'p' and handle scientific notation
364
+ param_value = (
365
+ formatted.replace(".", "p")
366
+ .replace("e", "e")
367
+ .replace("+", "")
368
+ .replace("-", "m")
369
+ )
370
+ else:
371
+ param_value = str(value)
372
+ else:
373
+ # String values
374
+ param_value = str(value)
375
+
376
+ param_parts.append(f"{param_name}_{param_value}")
377
+
378
+ extract_params(config)
379
+
380
+ # Combine name parts
381
+ if param_parts:
382
+ name_parts.extend(param_parts)
383
+
384
+ result = "-".join(name_parts)
385
+
386
+ # Ensure name isn't too long (limit to 100 characters)
387
+ if len(result) > 100:
388
+ # Truncate but keep the base name and at least one parameter
389
+ if base_name:
390
+ base_len = len(base_name)
391
+ remaining = 97 - base_len # Leave room for "-..."
392
+ if param_parts:
393
+ truncated_params = param_parts[0][:remaining]
394
+ result = f"{base_name}-{truncated_params}..."
395
+ else:
396
+ result = base_name[:97] + "..."
397
+ else:
398
+ result = result[:97] + "..."
399
+
400
+ return result
401
+
402
+
403
+ def _stage_experiment(
404
+ script: Path,
405
+ name: Optional[str],
406
+ tags: List[str],
407
+ description: Optional[str],
408
+ config: Dict[str, Any],
409
+ verbose: bool = False,
410
+ ignore_dirty: bool = False,
411
+ ) -> None:
412
+ """Stage experiment(s) for later execution, expanding parameter sweeps."""
413
+
414
+ manager = ExperimentManager()
415
+
416
+ # Check if this is a parameter sweep
417
+ if has_sweep_parameters(config):
418
+ # Expand parameter sweeps into individual configurations
419
+ expanded_configs = expand_parameter_sweeps(config)
420
+
421
+ click.echo(
422
+ f"✓ Parameter sweep detected: expanding into {len(expanded_configs)} experiments"
423
+ )
424
+
425
+ experiment_ids = []
426
+ for i, expanded_config in enumerate(expanded_configs):
427
+ # Generate descriptive name for each sweep experiment
428
+ sweep_name = _generate_sweep_experiment_name(name, expanded_config)
429
+
430
+ experiment_id = manager.create_experiment(
431
+ script_path=script,
432
+ name=sweep_name,
433
+ config=expanded_config,
434
+ tags=tags,
435
+ description=description,
436
+ allow_dirty=ignore_dirty,
437
+ stage_only=True,
438
+ )
439
+
440
+ experiment_ids.append(experiment_id)
441
+
442
+ if verbose:
443
+ click.echo(
444
+ f" Staged sweep experiment {i + 1}/{len(expanded_configs)}: {experiment_id}"
445
+ )
446
+ click.echo(f" Config: {expanded_config}")
447
+
448
+ # Show summary
449
+ click.echo(f"✓ Staged {len(experiment_ids)} sweep experiments")
450
+ click.echo(f" IDs: {', '.join(experiment_ids)}")
451
+ click.echo(" Use 'yanex run --staged' to execute all staged experiments")
452
+
453
+ else:
454
+ # Single experiment (no sweeps)
455
+ experiment_id = manager.create_experiment(
456
+ script_path=script,
457
+ name=name,
458
+ config=config,
459
+ tags=tags,
460
+ description=description,
461
+ allow_dirty=ignore_dirty,
462
+ stage_only=True,
463
+ )
464
+
465
+ if verbose:
466
+ click.echo(f"Staged experiment: {experiment_id}")
467
+
468
+ exp_dir = manager.storage.get_experiment_directory(experiment_id)
469
+ click.echo(f"✓ Experiment staged: {experiment_id}")
470
+ click.echo(f" Directory: {exp_dir}")
471
+ click.echo(" Use 'yanex run --staged' to execute staged experiments")
472
+
473
+
474
+ def _execute_staged_experiments(verbose: bool = False) -> None:
475
+ """Execute all staged experiments."""
476
+
477
+ manager = ExperimentManager()
478
+ staged_experiments = manager.get_staged_experiments()
479
+
480
+ if not staged_experiments:
481
+ click.echo("No staged experiments found")
482
+ return
483
+
484
+ if verbose:
485
+ click.echo(f"Found {len(staged_experiments)} staged experiments")
486
+
487
+ for experiment_id in staged_experiments:
488
+ try:
489
+ if verbose:
490
+ click.echo(f"Executing staged experiment: {experiment_id}")
491
+
492
+ # Load experiment metadata to get script path and config
493
+ metadata = manager.storage.load_metadata(experiment_id)
494
+ config = manager.storage.load_config(experiment_id)
495
+ script_path = Path(metadata["script_path"])
496
+
497
+ # Transition to running state
498
+ manager.execute_staged_experiment(experiment_id)
499
+
500
+ # Execute the script using the same logic as _execute_experiment
501
+ _execute_staged_script(
502
+ experiment_id=experiment_id,
503
+ script_path=script_path,
504
+ config=config,
505
+ manager=manager,
506
+ verbose=verbose,
507
+ )
508
+
509
+ except Exception as e:
510
+ click.echo(
511
+ f"✗ Failed to execute staged experiment {experiment_id}: {e}", err=True
512
+ )
513
+ try:
514
+ manager.fail_experiment(
515
+ experiment_id, f"Staged execution failed: {str(e)}"
516
+ )
517
+ except Exception:
518
+ pass # Best effort to record failure
519
+
520
+
521
+ def _execute_staged_script(
522
+ experiment_id: str,
523
+ script_path: Path,
524
+ config: Dict[str, Any],
525
+ manager: ExperimentManager,
526
+ verbose: bool = False,
527
+ ) -> None:
528
+ """Execute the script for a staged experiment."""
529
+
530
+ try:
531
+ # Prepare environment for subprocess (same as _execute_experiment)
532
+ env = os.environ.copy()
533
+ env["YANEX_EXPERIMENT_ID"] = experiment_id
534
+ env["YANEX_CLI_ACTIVE"] = "1"
535
+
536
+ # Add parameters as environment variables
537
+ for key, value in config.items():
538
+ env[f"YANEX_PARAM_{key}"] = (
539
+ json.dumps(value) if not isinstance(value, str) else value
540
+ )
541
+
542
+ if verbose:
543
+ click.echo(f"Starting script execution: {script_path}")
544
+
545
+ # Execute script with real-time output streaming (same logic as _execute_experiment)
546
+ stdout_capture = []
547
+ stderr_capture = []
548
+
549
+ process = subprocess.Popen(
550
+ [sys.executable, str(script_path.resolve())],
551
+ env=env,
552
+ stdout=subprocess.PIPE,
553
+ stderr=subprocess.PIPE,
554
+ text=True,
555
+ cwd=Path.cwd(),
556
+ )
557
+
558
+ def stream_output(pipe, capture_list, output_stream):
559
+ """Stream output line by line while capturing it."""
560
+ for line in iter(pipe.readline, ""):
561
+ # Display in real-time
562
+ output_stream.write(line)
563
+ output_stream.flush()
564
+ # Capture for later saving
565
+ capture_list.append(line)
566
+ pipe.close()
567
+
568
+ # Start threads for stdout and stderr streaming
569
+ stdout_thread = threading.Thread(
570
+ target=stream_output, args=(process.stdout, stdout_capture, sys.stdout)
571
+ )
572
+ stderr_thread = threading.Thread(
573
+ target=stream_output, args=(process.stderr, stderr_capture, sys.stderr)
574
+ )
575
+
576
+ stdout_thread.start()
577
+ stderr_thread.start()
578
+
579
+ # Wait for process completion
580
+ return_code = process.wait()
581
+
582
+ # Wait for output threads to finish
583
+ stdout_thread.join()
584
+ stderr_thread.join()
585
+
586
+ # Save captured output as artifacts
587
+ stdout_text = "".join(stdout_capture)
588
+ stderr_text = "".join(stderr_capture)
589
+
590
+ if stdout_text:
591
+ manager.storage.save_text_artifact(experiment_id, "stdout.txt", stdout_text)
592
+ if stderr_text:
593
+ manager.storage.save_text_artifact(experiment_id, "stderr.txt", stderr_text)
594
+
595
+ # Handle experiment result based on exit code
596
+ if return_code == 0:
597
+ manager.complete_experiment(experiment_id)
598
+ exp_dir = manager.storage.get_experiment_directory(experiment_id)
599
+ click.echo(f"✓ Experiment completed successfully: {experiment_id}")
600
+ click.echo(f" Directory: {exp_dir}")
601
+ else:
602
+ error_msg = f"Script exited with code {return_code}"
603
+ if stderr_text:
604
+ error_msg += f": {stderr_text.strip()}"
605
+ manager.fail_experiment(experiment_id, error_msg)
606
+ exp_dir = manager.storage.get_experiment_directory(experiment_id)
607
+ click.echo(f"✗ Experiment failed: {experiment_id}")
608
+ click.echo(f" Directory: {exp_dir}")
609
+ click.echo(f"Error: {error_msg}")
610
+ raise click.Abort()
611
+
612
+ except KeyboardInterrupt:
613
+ # Terminate the process and wait for threads
614
+ if "process" in locals():
615
+ process.terminate()
616
+ process.wait()
617
+ manager.cancel_experiment(experiment_id, "Interrupted by user (Ctrl+C)")
618
+ click.echo(f"✗ Experiment cancelled: {experiment_id}")
619
+ raise
620
+
621
+ except Exception as e:
622
+ manager.fail_experiment(experiment_id, f"Unexpected error: {str(e)}")
623
+ click.echo(f"✗ Experiment failed: {experiment_id}")
624
+ click.echo(f"Error: {e}")
625
+ raise click.Abort() from e