yanex 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yanex/__init__.py +74 -0
- yanex/api.py +507 -0
- yanex/cli/__init__.py +3 -0
- yanex/cli/_utils.py +114 -0
- yanex/cli/commands/__init__.py +3 -0
- yanex/cli/commands/archive.py +177 -0
- yanex/cli/commands/compare.py +320 -0
- yanex/cli/commands/confirm.py +198 -0
- yanex/cli/commands/delete.py +203 -0
- yanex/cli/commands/list.py +243 -0
- yanex/cli/commands/run.py +625 -0
- yanex/cli/commands/show.py +560 -0
- yanex/cli/commands/unarchive.py +177 -0
- yanex/cli/commands/update.py +282 -0
- yanex/cli/filters/__init__.py +8 -0
- yanex/cli/filters/base.py +286 -0
- yanex/cli/filters/time_utils.py +178 -0
- yanex/cli/formatters/__init__.py +7 -0
- yanex/cli/formatters/console.py +325 -0
- yanex/cli/main.py +45 -0
- yanex/core/__init__.py +3 -0
- yanex/core/comparison.py +549 -0
- yanex/core/config.py +587 -0
- yanex/core/constants.py +16 -0
- yanex/core/environment.py +146 -0
- yanex/core/git_utils.py +153 -0
- yanex/core/manager.py +555 -0
- yanex/core/storage.py +682 -0
- yanex/ui/__init__.py +1 -0
- yanex/ui/compare_table.py +524 -0
- yanex/utils/__init__.py +3 -0
- yanex/utils/exceptions.py +70 -0
- yanex/utils/validation.py +165 -0
- yanex-0.1.0.dist-info/METADATA +251 -0
- yanex-0.1.0.dist-info/RECORD +39 -0
- yanex-0.1.0.dist-info/WHEEL +5 -0
- yanex-0.1.0.dist-info/entry_points.txt +2 -0
- yanex-0.1.0.dist-info/licenses/LICENSE +21 -0
- yanex-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,625 @@
|
|
1
|
+
"""
|
2
|
+
Run command implementation for yanex CLI.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import json
|
6
|
+
import os
|
7
|
+
import subprocess
|
8
|
+
import sys
|
9
|
+
import threading
|
10
|
+
from pathlib import Path
|
11
|
+
from typing import Any, Dict, List, Optional
|
12
|
+
|
13
|
+
import click
|
14
|
+
|
15
|
+
from ...core.config import expand_parameter_sweeps, has_sweep_parameters
|
16
|
+
from ...core.manager import ExperimentManager
|
17
|
+
|
18
|
+
|
19
|
+
@click.command()
|
20
|
+
@click.argument("script", type=click.Path(exists=True, path_type=Path), required=False)
|
21
|
+
@click.option(
|
22
|
+
"--config",
|
23
|
+
"-c",
|
24
|
+
type=click.Path(exists=True, path_type=Path),
|
25
|
+
help="Configuration file (YAML/JSON)",
|
26
|
+
)
|
27
|
+
@click.option(
|
28
|
+
"--param",
|
29
|
+
"-p",
|
30
|
+
multiple=True,
|
31
|
+
help="Parameter override in format key=value (repeatable)",
|
32
|
+
)
|
33
|
+
@click.option("--name", "-n", help="Experiment name")
|
34
|
+
@click.option("--tag", "-t", multiple=True, help="Experiment tag (repeatable)")
|
35
|
+
@click.option("--description", "-d", help="Experiment description")
|
36
|
+
@click.option("--dry-run", is_flag=True, help="Validate configuration without running")
|
37
|
+
@click.option(
|
38
|
+
"--ignore-dirty",
|
39
|
+
is_flag=True,
|
40
|
+
help="Allow running with uncommitted changes (bypasses git cleanliness check)",
|
41
|
+
)
|
42
|
+
@click.option(
|
43
|
+
"--stage",
|
44
|
+
is_flag=True,
|
45
|
+
help="Stage experiment for later execution instead of running immediately",
|
46
|
+
)
|
47
|
+
@click.option(
|
48
|
+
"--staged",
|
49
|
+
is_flag=True,
|
50
|
+
help="Execute staged experiments",
|
51
|
+
)
|
52
|
+
@click.pass_context
|
53
|
+
def run(
|
54
|
+
ctx: click.Context,
|
55
|
+
script: Optional[Path],
|
56
|
+
config: Optional[Path],
|
57
|
+
param: List[str],
|
58
|
+
name: Optional[str],
|
59
|
+
tag: List[str],
|
60
|
+
description: Optional[str],
|
61
|
+
dry_run: bool,
|
62
|
+
ignore_dirty: bool,
|
63
|
+
stage: bool,
|
64
|
+
staged: bool,
|
65
|
+
) -> None:
|
66
|
+
"""
|
67
|
+
Run a script as a tracked experiment.
|
68
|
+
|
69
|
+
SCRIPT is the path to the Python script to execute.
|
70
|
+
|
71
|
+
Examples:
|
72
|
+
|
73
|
+
# Basic run
|
74
|
+
yanex run train.py
|
75
|
+
|
76
|
+
# With configuration file
|
77
|
+
yanex run train.py --config config.yaml
|
78
|
+
|
79
|
+
# With parameter overrides
|
80
|
+
yanex run train.py --param learning_rate=0.01 --param epochs=100
|
81
|
+
|
82
|
+
# Parameter sweeps (requires --stage)
|
83
|
+
yanex run train.py --param "lr=range(0.01, 0.1, 0.01)" --stage
|
84
|
+
yanex run train.py --param "lr=linspace(0.001, 0.1, 5)" --stage
|
85
|
+
yanex run train.py --param "lr=logspace(-3, -1, 3)" --stage
|
86
|
+
yanex run train.py --param "batch_size=list(16, 32, 64)" --stage
|
87
|
+
|
88
|
+
# Multi-parameter sweep (cross-product)
|
89
|
+
yanex run train.py \\
|
90
|
+
--param "lr=range(0.01, 0.1, 0.01)" \\
|
91
|
+
--param "batch_size=list(16, 32, 64)" \\
|
92
|
+
--stage
|
93
|
+
|
94
|
+
# Execute staged experiments
|
95
|
+
yanex run --staged
|
96
|
+
|
97
|
+
# Full experiment setup
|
98
|
+
yanex run train.py \\
|
99
|
+
--config config.yaml \\
|
100
|
+
--param learning_rate=0.01 \\
|
101
|
+
--name "lr-tuning" \\
|
102
|
+
--tag "hyperopt" \\
|
103
|
+
--description "Learning rate optimization"
|
104
|
+
"""
|
105
|
+
from .._utils import (
|
106
|
+
load_and_merge_config,
|
107
|
+
validate_experiment_config,
|
108
|
+
validate_sweep_requirements,
|
109
|
+
)
|
110
|
+
|
111
|
+
verbose = ctx.obj.get("verbose", False)
|
112
|
+
|
113
|
+
# Handle mutually exclusive flags
|
114
|
+
if stage and staged:
|
115
|
+
click.echo("Error: Cannot use both --stage and --staged flags", err=True)
|
116
|
+
raise click.Abort()
|
117
|
+
|
118
|
+
if staged:
|
119
|
+
# Execute staged experiments
|
120
|
+
_execute_staged_experiments(verbose)
|
121
|
+
return
|
122
|
+
|
123
|
+
# Validate script is provided when not using --staged
|
124
|
+
if script is None:
|
125
|
+
click.echo("Error: Missing argument 'SCRIPT'", err=True)
|
126
|
+
click.echo("Try 'yanex run --help' for help.", err=True)
|
127
|
+
raise click.Abort()
|
128
|
+
|
129
|
+
if verbose:
|
130
|
+
click.echo(f"Running script: {script}")
|
131
|
+
if config:
|
132
|
+
click.echo(f"Using config: {config}")
|
133
|
+
if param:
|
134
|
+
click.echo(f"Parameter overrides: {param}")
|
135
|
+
|
136
|
+
try:
|
137
|
+
# Load and merge configuration
|
138
|
+
merged_config = load_and_merge_config(
|
139
|
+
config_path=config, param_overrides=list(param), verbose=verbose
|
140
|
+
)
|
141
|
+
|
142
|
+
if verbose:
|
143
|
+
click.echo(f"Merged configuration: {merged_config}")
|
144
|
+
|
145
|
+
# Validate configuration
|
146
|
+
validate_experiment_config(
|
147
|
+
script=script,
|
148
|
+
name=name,
|
149
|
+
tags=list(tag),
|
150
|
+
description=description,
|
151
|
+
config=merged_config,
|
152
|
+
)
|
153
|
+
|
154
|
+
# Validate sweep requirements
|
155
|
+
validate_sweep_requirements(merged_config, stage)
|
156
|
+
|
157
|
+
if dry_run:
|
158
|
+
click.echo("✓ Configuration validation passed")
|
159
|
+
click.echo("Dry run completed - experiment would be created with:")
|
160
|
+
click.echo(f" Script: {script}")
|
161
|
+
click.echo(f" Name: {name}")
|
162
|
+
click.echo(f" Tags: {list(tag)}")
|
163
|
+
click.echo(f" Description: {description}")
|
164
|
+
click.echo(f" Config: {merged_config}")
|
165
|
+
return
|
166
|
+
|
167
|
+
# Phase 3: Execute or stage experiment
|
168
|
+
if stage:
|
169
|
+
_stage_experiment(
|
170
|
+
script=script,
|
171
|
+
name=name,
|
172
|
+
tags=list(tag),
|
173
|
+
description=description,
|
174
|
+
config=merged_config,
|
175
|
+
verbose=verbose,
|
176
|
+
ignore_dirty=ignore_dirty,
|
177
|
+
)
|
178
|
+
else:
|
179
|
+
_execute_experiment(
|
180
|
+
script=script,
|
181
|
+
name=name,
|
182
|
+
tags=list(tag),
|
183
|
+
description=description,
|
184
|
+
config=merged_config,
|
185
|
+
verbose=verbose,
|
186
|
+
ignore_dirty=ignore_dirty,
|
187
|
+
)
|
188
|
+
|
189
|
+
except Exception as e:
|
190
|
+
click.echo(f"Error: {e}", err=True)
|
191
|
+
raise click.Abort() from e
|
192
|
+
|
193
|
+
|
194
|
+
def _execute_experiment(
|
195
|
+
script: Path,
|
196
|
+
name: Optional[str],
|
197
|
+
tags: List[str],
|
198
|
+
description: Optional[str],
|
199
|
+
config: Dict[str, Any],
|
200
|
+
verbose: bool = False,
|
201
|
+
ignore_dirty: bool = False,
|
202
|
+
) -> None:
|
203
|
+
"""Execute script as an experiment with proper lifecycle management."""
|
204
|
+
|
205
|
+
# Create experiment
|
206
|
+
manager = ExperimentManager()
|
207
|
+
experiment_id = manager.create_experiment(
|
208
|
+
script_path=script,
|
209
|
+
name=name,
|
210
|
+
config=config,
|
211
|
+
tags=tags,
|
212
|
+
description=description,
|
213
|
+
allow_dirty=ignore_dirty,
|
214
|
+
)
|
215
|
+
|
216
|
+
if verbose:
|
217
|
+
click.echo(f"Created experiment: {experiment_id}")
|
218
|
+
|
219
|
+
# Start experiment
|
220
|
+
manager.start_experiment(experiment_id)
|
221
|
+
|
222
|
+
try:
|
223
|
+
# Prepare environment for subprocess
|
224
|
+
env = os.environ.copy()
|
225
|
+
env["YANEX_EXPERIMENT_ID"] = experiment_id
|
226
|
+
env["YANEX_CLI_ACTIVE"] = "1" # Mark as CLI context
|
227
|
+
|
228
|
+
# Add parameters as environment variables
|
229
|
+
for key, value in config.items():
|
230
|
+
env[f"YANEX_PARAM_{key}"] = (
|
231
|
+
json.dumps(value) if not isinstance(value, str) else value
|
232
|
+
)
|
233
|
+
|
234
|
+
if verbose:
|
235
|
+
click.echo(f"Starting script execution: {script}")
|
236
|
+
|
237
|
+
# Execute script with real-time output streaming
|
238
|
+
stdout_capture = []
|
239
|
+
stderr_capture = []
|
240
|
+
|
241
|
+
process = subprocess.Popen(
|
242
|
+
[sys.executable, str(script.resolve())],
|
243
|
+
env=env,
|
244
|
+
stdout=subprocess.PIPE,
|
245
|
+
stderr=subprocess.PIPE,
|
246
|
+
text=True,
|
247
|
+
cwd=Path.cwd(),
|
248
|
+
)
|
249
|
+
|
250
|
+
def stream_output(pipe, capture_list, output_stream):
|
251
|
+
"""Stream output line by line while capturing it."""
|
252
|
+
for line in iter(pipe.readline, ""):
|
253
|
+
# Display in real-time
|
254
|
+
output_stream.write(line)
|
255
|
+
output_stream.flush()
|
256
|
+
# Capture for later saving
|
257
|
+
capture_list.append(line)
|
258
|
+
pipe.close()
|
259
|
+
|
260
|
+
# Start threads for stdout and stderr streaming
|
261
|
+
stdout_thread = threading.Thread(
|
262
|
+
target=stream_output, args=(process.stdout, stdout_capture, sys.stdout)
|
263
|
+
)
|
264
|
+
stderr_thread = threading.Thread(
|
265
|
+
target=stream_output, args=(process.stderr, stderr_capture, sys.stderr)
|
266
|
+
)
|
267
|
+
|
268
|
+
stdout_thread.start()
|
269
|
+
stderr_thread.start()
|
270
|
+
|
271
|
+
# Wait for process completion
|
272
|
+
return_code = process.wait()
|
273
|
+
|
274
|
+
# Wait for output threads to finish
|
275
|
+
stdout_thread.join()
|
276
|
+
stderr_thread.join()
|
277
|
+
|
278
|
+
# Save captured output as artifacts
|
279
|
+
stdout_text = "".join(stdout_capture)
|
280
|
+
stderr_text = "".join(stderr_capture)
|
281
|
+
|
282
|
+
if stdout_text:
|
283
|
+
manager.storage.save_text_artifact(experiment_id, "stdout.txt", stdout_text)
|
284
|
+
if stderr_text:
|
285
|
+
manager.storage.save_text_artifact(experiment_id, "stderr.txt", stderr_text)
|
286
|
+
|
287
|
+
# Handle experiment result based on exit code
|
288
|
+
if return_code == 0:
|
289
|
+
manager.complete_experiment(experiment_id)
|
290
|
+
exp_dir = manager.storage.get_experiment_directory(experiment_id)
|
291
|
+
click.echo(f"✓ Experiment completed successfully: {experiment_id}")
|
292
|
+
click.echo(f" Directory: {exp_dir}")
|
293
|
+
else:
|
294
|
+
error_msg = f"Script exited with code {return_code}"
|
295
|
+
if stderr_text:
|
296
|
+
error_msg += f": {stderr_text.strip()}"
|
297
|
+
manager.fail_experiment(experiment_id, error_msg)
|
298
|
+
exp_dir = manager.storage.get_experiment_directory(experiment_id)
|
299
|
+
click.echo(f"✗ Experiment failed: {experiment_id}")
|
300
|
+
click.echo(f" Directory: {exp_dir}")
|
301
|
+
click.echo(f"Error: {error_msg}")
|
302
|
+
raise click.Abort()
|
303
|
+
|
304
|
+
except KeyboardInterrupt:
|
305
|
+
# Terminate the process and wait for threads
|
306
|
+
if "process" in locals():
|
307
|
+
process.terminate()
|
308
|
+
process.wait()
|
309
|
+
manager.cancel_experiment(experiment_id, "Interrupted by user (Ctrl+C)")
|
310
|
+
click.echo(f"✗ Experiment cancelled: {experiment_id}")
|
311
|
+
raise
|
312
|
+
|
313
|
+
except Exception as e:
|
314
|
+
manager.fail_experiment(experiment_id, f"Unexpected error: {str(e)}")
|
315
|
+
click.echo(f"✗ Experiment failed: {experiment_id}")
|
316
|
+
click.echo(f"Error: {e}")
|
317
|
+
raise click.Abort() from e
|
318
|
+
|
319
|
+
|
320
|
+
def _generate_sweep_experiment_name(
|
321
|
+
base_name: Optional[str], config: Dict[str, Any]
|
322
|
+
) -> str:
|
323
|
+
"""
|
324
|
+
Generate a descriptive name for a sweep experiment based on its parameters.
|
325
|
+
|
326
|
+
Args:
|
327
|
+
base_name: Base experiment name (can be None)
|
328
|
+
config: Configuration dictionary with parameter values
|
329
|
+
|
330
|
+
Returns:
|
331
|
+
Generated experiment name with parameter suffixes
|
332
|
+
"""
|
333
|
+
# Start with base name or default
|
334
|
+
if base_name:
|
335
|
+
name_parts = [base_name]
|
336
|
+
else:
|
337
|
+
name_parts = ["sweep"]
|
338
|
+
|
339
|
+
# Extract parameter name-value pairs
|
340
|
+
param_parts = []
|
341
|
+
|
342
|
+
def extract_params(d: Dict[str, Any], prefix: str = "") -> None:
|
343
|
+
for key, value in d.items():
|
344
|
+
if isinstance(value, dict):
|
345
|
+
# Handle nested parameters
|
346
|
+
new_prefix = f"{prefix}_{key}" if prefix else key
|
347
|
+
extract_params(value, new_prefix)
|
348
|
+
else:
|
349
|
+
# Format parameter name
|
350
|
+
param_name = f"{prefix}_{key}" if prefix else key
|
351
|
+
|
352
|
+
# Format parameter value
|
353
|
+
if isinstance(value, bool):
|
354
|
+
param_value = str(value).lower()
|
355
|
+
elif isinstance(value, (int, float)):
|
356
|
+
# Format numbers with reasonable precision
|
357
|
+
if isinstance(value, float):
|
358
|
+
# Remove trailing zeros and unnecessary decimal point
|
359
|
+
if value == int(value):
|
360
|
+
param_value = str(int(value))
|
361
|
+
else:
|
362
|
+
formatted = f"{value:.6g}" # Up to 6 significant digits
|
363
|
+
# Replace dots with 'p' and handle scientific notation
|
364
|
+
param_value = (
|
365
|
+
formatted.replace(".", "p")
|
366
|
+
.replace("e", "e")
|
367
|
+
.replace("+", "")
|
368
|
+
.replace("-", "m")
|
369
|
+
)
|
370
|
+
else:
|
371
|
+
param_value = str(value)
|
372
|
+
else:
|
373
|
+
# String values
|
374
|
+
param_value = str(value)
|
375
|
+
|
376
|
+
param_parts.append(f"{param_name}_{param_value}")
|
377
|
+
|
378
|
+
extract_params(config)
|
379
|
+
|
380
|
+
# Combine name parts
|
381
|
+
if param_parts:
|
382
|
+
name_parts.extend(param_parts)
|
383
|
+
|
384
|
+
result = "-".join(name_parts)
|
385
|
+
|
386
|
+
# Ensure name isn't too long (limit to 100 characters)
|
387
|
+
if len(result) > 100:
|
388
|
+
# Truncate but keep the base name and at least one parameter
|
389
|
+
if base_name:
|
390
|
+
base_len = len(base_name)
|
391
|
+
remaining = 97 - base_len # Leave room for "-..."
|
392
|
+
if param_parts:
|
393
|
+
truncated_params = param_parts[0][:remaining]
|
394
|
+
result = f"{base_name}-{truncated_params}..."
|
395
|
+
else:
|
396
|
+
result = base_name[:97] + "..."
|
397
|
+
else:
|
398
|
+
result = result[:97] + "..."
|
399
|
+
|
400
|
+
return result
|
401
|
+
|
402
|
+
|
403
|
+
def _stage_experiment(
|
404
|
+
script: Path,
|
405
|
+
name: Optional[str],
|
406
|
+
tags: List[str],
|
407
|
+
description: Optional[str],
|
408
|
+
config: Dict[str, Any],
|
409
|
+
verbose: bool = False,
|
410
|
+
ignore_dirty: bool = False,
|
411
|
+
) -> None:
|
412
|
+
"""Stage experiment(s) for later execution, expanding parameter sweeps."""
|
413
|
+
|
414
|
+
manager = ExperimentManager()
|
415
|
+
|
416
|
+
# Check if this is a parameter sweep
|
417
|
+
if has_sweep_parameters(config):
|
418
|
+
# Expand parameter sweeps into individual configurations
|
419
|
+
expanded_configs = expand_parameter_sweeps(config)
|
420
|
+
|
421
|
+
click.echo(
|
422
|
+
f"✓ Parameter sweep detected: expanding into {len(expanded_configs)} experiments"
|
423
|
+
)
|
424
|
+
|
425
|
+
experiment_ids = []
|
426
|
+
for i, expanded_config in enumerate(expanded_configs):
|
427
|
+
# Generate descriptive name for each sweep experiment
|
428
|
+
sweep_name = _generate_sweep_experiment_name(name, expanded_config)
|
429
|
+
|
430
|
+
experiment_id = manager.create_experiment(
|
431
|
+
script_path=script,
|
432
|
+
name=sweep_name,
|
433
|
+
config=expanded_config,
|
434
|
+
tags=tags,
|
435
|
+
description=description,
|
436
|
+
allow_dirty=ignore_dirty,
|
437
|
+
stage_only=True,
|
438
|
+
)
|
439
|
+
|
440
|
+
experiment_ids.append(experiment_id)
|
441
|
+
|
442
|
+
if verbose:
|
443
|
+
click.echo(
|
444
|
+
f" Staged sweep experiment {i + 1}/{len(expanded_configs)}: {experiment_id}"
|
445
|
+
)
|
446
|
+
click.echo(f" Config: {expanded_config}")
|
447
|
+
|
448
|
+
# Show summary
|
449
|
+
click.echo(f"✓ Staged {len(experiment_ids)} sweep experiments")
|
450
|
+
click.echo(f" IDs: {', '.join(experiment_ids)}")
|
451
|
+
click.echo(" Use 'yanex run --staged' to execute all staged experiments")
|
452
|
+
|
453
|
+
else:
|
454
|
+
# Single experiment (no sweeps)
|
455
|
+
experiment_id = manager.create_experiment(
|
456
|
+
script_path=script,
|
457
|
+
name=name,
|
458
|
+
config=config,
|
459
|
+
tags=tags,
|
460
|
+
description=description,
|
461
|
+
allow_dirty=ignore_dirty,
|
462
|
+
stage_only=True,
|
463
|
+
)
|
464
|
+
|
465
|
+
if verbose:
|
466
|
+
click.echo(f"Staged experiment: {experiment_id}")
|
467
|
+
|
468
|
+
exp_dir = manager.storage.get_experiment_directory(experiment_id)
|
469
|
+
click.echo(f"✓ Experiment staged: {experiment_id}")
|
470
|
+
click.echo(f" Directory: {exp_dir}")
|
471
|
+
click.echo(" Use 'yanex run --staged' to execute staged experiments")
|
472
|
+
|
473
|
+
|
474
|
+
def _execute_staged_experiments(verbose: bool = False) -> None:
|
475
|
+
"""Execute all staged experiments."""
|
476
|
+
|
477
|
+
manager = ExperimentManager()
|
478
|
+
staged_experiments = manager.get_staged_experiments()
|
479
|
+
|
480
|
+
if not staged_experiments:
|
481
|
+
click.echo("No staged experiments found")
|
482
|
+
return
|
483
|
+
|
484
|
+
if verbose:
|
485
|
+
click.echo(f"Found {len(staged_experiments)} staged experiments")
|
486
|
+
|
487
|
+
for experiment_id in staged_experiments:
|
488
|
+
try:
|
489
|
+
if verbose:
|
490
|
+
click.echo(f"Executing staged experiment: {experiment_id}")
|
491
|
+
|
492
|
+
# Load experiment metadata to get script path and config
|
493
|
+
metadata = manager.storage.load_metadata(experiment_id)
|
494
|
+
config = manager.storage.load_config(experiment_id)
|
495
|
+
script_path = Path(metadata["script_path"])
|
496
|
+
|
497
|
+
# Transition to running state
|
498
|
+
manager.execute_staged_experiment(experiment_id)
|
499
|
+
|
500
|
+
# Execute the script using the same logic as _execute_experiment
|
501
|
+
_execute_staged_script(
|
502
|
+
experiment_id=experiment_id,
|
503
|
+
script_path=script_path,
|
504
|
+
config=config,
|
505
|
+
manager=manager,
|
506
|
+
verbose=verbose,
|
507
|
+
)
|
508
|
+
|
509
|
+
except Exception as e:
|
510
|
+
click.echo(
|
511
|
+
f"✗ Failed to execute staged experiment {experiment_id}: {e}", err=True
|
512
|
+
)
|
513
|
+
try:
|
514
|
+
manager.fail_experiment(
|
515
|
+
experiment_id, f"Staged execution failed: {str(e)}"
|
516
|
+
)
|
517
|
+
except Exception:
|
518
|
+
pass # Best effort to record failure
|
519
|
+
|
520
|
+
|
521
|
+
def _execute_staged_script(
|
522
|
+
experiment_id: str,
|
523
|
+
script_path: Path,
|
524
|
+
config: Dict[str, Any],
|
525
|
+
manager: ExperimentManager,
|
526
|
+
verbose: bool = False,
|
527
|
+
) -> None:
|
528
|
+
"""Execute the script for a staged experiment."""
|
529
|
+
|
530
|
+
try:
|
531
|
+
# Prepare environment for subprocess (same as _execute_experiment)
|
532
|
+
env = os.environ.copy()
|
533
|
+
env["YANEX_EXPERIMENT_ID"] = experiment_id
|
534
|
+
env["YANEX_CLI_ACTIVE"] = "1"
|
535
|
+
|
536
|
+
# Add parameters as environment variables
|
537
|
+
for key, value in config.items():
|
538
|
+
env[f"YANEX_PARAM_{key}"] = (
|
539
|
+
json.dumps(value) if not isinstance(value, str) else value
|
540
|
+
)
|
541
|
+
|
542
|
+
if verbose:
|
543
|
+
click.echo(f"Starting script execution: {script_path}")
|
544
|
+
|
545
|
+
# Execute script with real-time output streaming (same logic as _execute_experiment)
|
546
|
+
stdout_capture = []
|
547
|
+
stderr_capture = []
|
548
|
+
|
549
|
+
process = subprocess.Popen(
|
550
|
+
[sys.executable, str(script_path.resolve())],
|
551
|
+
env=env,
|
552
|
+
stdout=subprocess.PIPE,
|
553
|
+
stderr=subprocess.PIPE,
|
554
|
+
text=True,
|
555
|
+
cwd=Path.cwd(),
|
556
|
+
)
|
557
|
+
|
558
|
+
def stream_output(pipe, capture_list, output_stream):
|
559
|
+
"""Stream output line by line while capturing it."""
|
560
|
+
for line in iter(pipe.readline, ""):
|
561
|
+
# Display in real-time
|
562
|
+
output_stream.write(line)
|
563
|
+
output_stream.flush()
|
564
|
+
# Capture for later saving
|
565
|
+
capture_list.append(line)
|
566
|
+
pipe.close()
|
567
|
+
|
568
|
+
# Start threads for stdout and stderr streaming
|
569
|
+
stdout_thread = threading.Thread(
|
570
|
+
target=stream_output, args=(process.stdout, stdout_capture, sys.stdout)
|
571
|
+
)
|
572
|
+
stderr_thread = threading.Thread(
|
573
|
+
target=stream_output, args=(process.stderr, stderr_capture, sys.stderr)
|
574
|
+
)
|
575
|
+
|
576
|
+
stdout_thread.start()
|
577
|
+
stderr_thread.start()
|
578
|
+
|
579
|
+
# Wait for process completion
|
580
|
+
return_code = process.wait()
|
581
|
+
|
582
|
+
# Wait for output threads to finish
|
583
|
+
stdout_thread.join()
|
584
|
+
stderr_thread.join()
|
585
|
+
|
586
|
+
# Save captured output as artifacts
|
587
|
+
stdout_text = "".join(stdout_capture)
|
588
|
+
stderr_text = "".join(stderr_capture)
|
589
|
+
|
590
|
+
if stdout_text:
|
591
|
+
manager.storage.save_text_artifact(experiment_id, "stdout.txt", stdout_text)
|
592
|
+
if stderr_text:
|
593
|
+
manager.storage.save_text_artifact(experiment_id, "stderr.txt", stderr_text)
|
594
|
+
|
595
|
+
# Handle experiment result based on exit code
|
596
|
+
if return_code == 0:
|
597
|
+
manager.complete_experiment(experiment_id)
|
598
|
+
exp_dir = manager.storage.get_experiment_directory(experiment_id)
|
599
|
+
click.echo(f"✓ Experiment completed successfully: {experiment_id}")
|
600
|
+
click.echo(f" Directory: {exp_dir}")
|
601
|
+
else:
|
602
|
+
error_msg = f"Script exited with code {return_code}"
|
603
|
+
if stderr_text:
|
604
|
+
error_msg += f": {stderr_text.strip()}"
|
605
|
+
manager.fail_experiment(experiment_id, error_msg)
|
606
|
+
exp_dir = manager.storage.get_experiment_directory(experiment_id)
|
607
|
+
click.echo(f"✗ Experiment failed: {experiment_id}")
|
608
|
+
click.echo(f" Directory: {exp_dir}")
|
609
|
+
click.echo(f"Error: {error_msg}")
|
610
|
+
raise click.Abort()
|
611
|
+
|
612
|
+
except KeyboardInterrupt:
|
613
|
+
# Terminate the process and wait for threads
|
614
|
+
if "process" in locals():
|
615
|
+
process.terminate()
|
616
|
+
process.wait()
|
617
|
+
manager.cancel_experiment(experiment_id, "Interrupted by user (Ctrl+C)")
|
618
|
+
click.echo(f"✗ Experiment cancelled: {experiment_id}")
|
619
|
+
raise
|
620
|
+
|
621
|
+
except Exception as e:
|
622
|
+
manager.fail_experiment(experiment_id, f"Unexpected error: {str(e)}")
|
623
|
+
click.echo(f"✗ Experiment failed: {experiment_id}")
|
624
|
+
click.echo(f"Error: {e}")
|
625
|
+
raise click.Abort() from e
|