varve 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
varve/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ """Public API for varve."""
2
+
3
+ from varve.context import Ctx
4
+ from varve.decorators import StageSpec, batch_stage, stage
5
+ from varve.keyspec import JSON, KeySpec
6
+ from varve.pipeline import Pipeline
7
+
8
+ __all__ = [
9
+ "Ctx",
10
+ "Pipeline",
11
+ "JSON",
12
+ "KeySpec",
13
+ "StageSpec",
14
+ "batch_stage",
15
+ "stage",
16
+ ]
varve/branch.py ADDED
@@ -0,0 +1,92 @@
1
+ """Branch selection helpers for varve experiments."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ import hashlib
7
+ import json
8
+ import re
9
+ from collections.abc import Mapping
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ import yaml
14
+
15
+ BRANCH_NAME_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
16
+
17
+
18
+ def validate_branch_name(name: str) -> str:
19
+ """Validate a branch name before it is interpolated into output paths."""
20
+ if not isinstance(name, str) or BRANCH_NAME_RE.fullmatch(name) is None:
21
+ raise ValueError(
22
+ f"Invalid varve branch name {name!r}; branch names must match "
23
+ "[A-Za-z0-9][A-Za-z0-9._-]* and stay within one path segment."
24
+ )
25
+ return name
26
+
27
+
28
+ def load_branches(yaml_path: Path | None) -> dict[str, tuple[dict[str, Any], bool]]:
29
+ """Load all branch configs from a varve.yaml file."""
30
+ if yaml_path is None or not Path(yaml_path).exists():
31
+ return {}
32
+
33
+ raw = yaml.safe_load(Path(yaml_path).read_text(encoding="utf-8"))
34
+ if raw is None:
35
+ raw = {}
36
+ if not isinstance(raw, Mapping):
37
+ raise ValueError(f"varve.yaml must be a mapping of branch names to configs: {yaml_path}")
38
+ for name in raw:
39
+ validate_branch_name(name)
40
+
41
+ result: dict[str, tuple[dict[str, Any], bool]] = {}
42
+ for branch, section in raw.items():
43
+ if section is None:
44
+ section = {}
45
+ if not isinstance(section, Mapping):
46
+ raise ValueError(f"Varve branch {branch!r} must be a mapping in {yaml_path}")
47
+
48
+ config = dict(section)
49
+ is_temporary = config.pop("is_temporary", False)
50
+ if not isinstance(is_temporary, bool):
51
+ raise ValueError(f"Varve branch {branch!r} has non-boolean is_temporary in {yaml_path}")
52
+ result[branch] = (config, is_temporary)
53
+ return result
54
+
55
+
56
+ def _deep_merge(base: Mapping[str, Any], override: Mapping[str, Any]) -> dict[str, Any]:
57
+ merged = copy.deepcopy(dict(base))
58
+ for key, value in override.items():
59
+ current = merged.get(key)
60
+ if isinstance(current, Mapping) and isinstance(value, Mapping):
61
+ merged[key] = _deep_merge(current, value)
62
+ else:
63
+ merged[key] = copy.deepcopy(value)
64
+ return merged
65
+
66
+
67
+ def merge_override(base_config: Mapping[str, Any], override_json: str) -> dict[str, Any]:
68
+ """Apply an override JSON object to a raw config mapping."""
69
+ override = json.loads(override_json)
70
+ if not isinstance(override, Mapping):
71
+ raise ValueError("--override must be a JSON object")
72
+ return _deep_merge(base_config, override)
73
+
74
+
75
+ def canonical_config_json(config: Mapping[str, Any]) -> str:
76
+ """Return stable JSON for a validated config snapshot."""
77
+ return json.dumps(config, sort_keys=True, separators=(",", ":"), allow_nan=False)
78
+
79
+
80
+ def override_branch_name(config: Mapping[str, Any]) -> str:
81
+ """Derive the hash override branch name from a complete config snapshot."""
82
+ digest = hashlib.sha256(canonical_config_json(config).encode("utf-8")).hexdigest()[:12]
83
+ return f"main_override_{digest}"
84
+
85
+
86
+ def assert_same_config(left: Mapping[str, Any], right: Mapping[str, Any], *, branch: str) -> None:
87
+ """Raise when a named temporary branch is reused with a different config."""
88
+ if canonical_config_json(left) != canonical_config_json(right):
89
+ raise ValueError(
90
+ f"Temporary varve branch {branch!r} was created with a different config; "
91
+ "use a different --branch name or clean the existing temporary branch first."
92
+ )
varve/branch_config.py ADDED
@@ -0,0 +1,172 @@
1
+ """Resolve varve branches into Config objects and output roots."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from types import SimpleNamespace
8
+ from typing import Any, TypeVar
9
+
10
+ from pydantic import BaseModel, ValidationError, create_model
11
+ from pydantic_settings import BaseSettings, SettingsConfigDict
12
+
13
+ from varve.branch import (
14
+ assert_same_config,
15
+ load_branches,
16
+ merge_override,
17
+ override_branch_name,
18
+ validate_branch_name,
19
+ )
20
+ from varve.pipeline import Pipeline
21
+ from varve.store.store import Store
22
+
23
+ ConfigT = TypeVar("ConfigT", bound=BaseModel)
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class ResolvedBranch:
28
+ config: Any
29
+ branch: str
30
+ is_temporary: bool
31
+ output_base: Path | None
32
+ temporary_config: dict[str, Any] | None = None
33
+
34
+
35
+ def _settings_type(config_type: type[BaseModel]) -> type[BaseSettings]:
36
+ class VarveSettings(BaseSettings):
37
+ model_config = SettingsConfigDict(
38
+ env_nested_delimiter="__",
39
+ env_file=".env",
40
+ )
41
+
42
+ @classmethod
43
+ def settings_customise_sources(
44
+ cls,
45
+ settings_cls,
46
+ init_settings,
47
+ env_settings,
48
+ dotenv_settings,
49
+ file_secret_settings,
50
+ ):
51
+ return (init_settings, env_settings, dotenv_settings, file_secret_settings)
52
+
53
+ fields: dict[str, Any] = {
54
+ name: (field.annotation, field) for name, field in config_type.model_fields.items()
55
+ }
56
+ return create_model(f"{config_type.__name__}VarveSettings", __base__=VarveSettings, **fields)
57
+
58
+
59
+ def config_from_init(config_type: type[ConfigT], init_kwargs: dict[str, Any]) -> ConfigT:
60
+ settings_type = _settings_type(config_type)
61
+ settings = settings_type(**init_kwargs)
62
+ return config_type.model_validate(settings.model_dump())
63
+
64
+
65
+ def _snapshot(config: Any) -> dict[str, Any]:
66
+ if not hasattr(config, "model_dump"):
67
+ raise TypeError("Temporary varve branches require a pydantic Config model")
68
+ return config.model_dump(mode="json")
69
+
70
+
71
+ def _main_config(
72
+ experiment: type[Pipeline],
73
+ raw_main: dict[str, Any],
74
+ *,
75
+ cli_out: Path | None,
76
+ allow_bare_output_root: bool,
77
+ ) -> Any:
78
+ try:
79
+ return config_from_init(experiment.Config, raw_main)
80
+ except ValidationError:
81
+ if allow_bare_output_root and cli_out is not None:
82
+ return SimpleNamespace()
83
+ raise
84
+
85
+
86
+ def _temporary_config_from_manifest(main_base: Path, branch: str) -> dict[str, Any]:
87
+ manifest = Store(main_base / ".tmp" / branch).read_manifest()
88
+ if manifest is None or manifest.temporary_config is None:
89
+ raise ValueError(f"Unknown varve branch {branch!r}")
90
+ return manifest.temporary_config
91
+
92
+
93
+ def resolve_branch(
94
+ experiment: type[Pipeline],
95
+ *,
96
+ branch: str,
97
+ override_json: str | None,
98
+ cli_out: Path | None,
99
+ allow_bare_output_root: bool = False,
100
+ ) -> ResolvedBranch:
101
+ validate_branch_name(branch)
102
+ branches = load_branches(experiment.varve_config_path())
103
+ raw_main = branches.get("main", ({}, False))[0]
104
+
105
+ if override_json is not None:
106
+ if branch in branches and branch != "main":
107
+ raise ValueError("--override is only supported on main or temporary branches")
108
+
109
+ final_config = config_from_init(experiment.Config, merge_override(raw_main, override_json))
110
+ temporary_config = _snapshot(final_config)
111
+ if cli_out is not None:
112
+ main_base = Path(cli_out)
113
+ else:
114
+ main_config = config_from_init(experiment.Config, raw_main)
115
+ main_base = experiment.default_output_root(main_config)
116
+ resolved_branch = override_branch_name(temporary_config) if branch == "main" else branch
117
+ validate_branch_name(resolved_branch)
118
+
119
+ manifest = Store(main_base / ".tmp" / resolved_branch).read_manifest()
120
+ if manifest is not None:
121
+ if manifest.temporary_config is None:
122
+ raise ValueError(f"Unknown varve branch {resolved_branch!r}")
123
+ assert_same_config(manifest.temporary_config, temporary_config, branch=resolved_branch)
124
+
125
+ return ResolvedBranch(
126
+ config=final_config,
127
+ branch=resolved_branch,
128
+ is_temporary=True,
129
+ output_base=main_base,
130
+ temporary_config=temporary_config,
131
+ )
132
+
133
+ if branch in branches:
134
+ raw_config, is_temporary = branches[branch]
135
+ return ResolvedBranch(
136
+ config=config_from_init(experiment.Config, raw_config),
137
+ branch=branch,
138
+ is_temporary=is_temporary,
139
+ output_base=Path(cli_out) if cli_out is not None else None,
140
+ )
141
+ if branch == "main":
142
+ main_config = _main_config(
143
+ experiment,
144
+ raw_main,
145
+ cli_out=cli_out,
146
+ allow_bare_output_root=allow_bare_output_root,
147
+ )
148
+ return ResolvedBranch(
149
+ config=main_config,
150
+ branch="main",
151
+ is_temporary=False,
152
+ output_base=Path(cli_out) if cli_out is not None else None,
153
+ )
154
+
155
+ if cli_out is not None:
156
+ main_base = Path(cli_out)
157
+ else:
158
+ main_config = _main_config(
159
+ experiment,
160
+ raw_main,
161
+ cli_out=None,
162
+ allow_bare_output_root=False,
163
+ )
164
+ main_base = experiment.default_output_root(main_config)
165
+ temporary_config = _temporary_config_from_manifest(main_base, branch)
166
+ return ResolvedBranch(
167
+ config=experiment.Config.model_validate(temporary_config),
168
+ branch=branch,
169
+ is_temporary=True,
170
+ output_base=main_base,
171
+ temporary_config=temporary_config,
172
+ )
varve/cli/app.py ADDED
@@ -0,0 +1,297 @@
1
+ """Command-line interface for Pipeline subclasses."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import re
7
+ import sys
8
+ from pathlib import Path
9
+ from types import SimpleNamespace
10
+
11
+ from pydantic import BaseModel
12
+
13
+ from varve.branch_config import resolve_branch
14
+ from varve.cli import argmap
15
+ from varve.cli.clean import clean
16
+ from varve.engine.runner import evaluate_state, run, selected_stages
17
+ from varve.log import configure_cli_logging
18
+ from varve.pipeline import Pipeline
19
+
20
+ _CONFIG_COMMANDS = {"run", "status", "clean"}
21
+ _NEGATIVE_NUMBER_RE = re.compile(r"^-\d+$|^-\d*\.\d+$")
22
+ _COMMAND_OPTION_ARITIES = {
23
+ "run": {
24
+ "--branch": 1,
25
+ "--override": 1,
26
+ "--upto": 1,
27
+ "--downstream": 1,
28
+ "--force": 0,
29
+ "-f": 0,
30
+ "--out": 1,
31
+ },
32
+ "status": {"--branch": 1, "--upto": 1, "--downstream": 1, "--out": 1},
33
+ "clean": {
34
+ "--branch": 1,
35
+ "--downstream": 1,
36
+ "--out": 1,
37
+ "--yes": 0,
38
+ "-y": 0,
39
+ },
40
+ "plan": {"--upto": 1, "--downstream": 1},
41
+ }
42
+
43
+
44
+ def _args_from_namespace(
45
+ pipeline: type[Pipeline],
46
+ namespace: argparse.Namespace,
47
+ ) -> BaseModel:
48
+ init_kwargs = argmap.collect_cli_args_namespace(namespace, pipeline.Args)
49
+ return pipeline.Args.model_validate(init_kwargs)
50
+
51
+
52
+ def _print_list(pipeline: type[Pipeline]) -> None:
53
+ for name in pipeline.topo_order():
54
+ spec = pipeline.stages()[name]
55
+ needs = ",".join(spec.needs) if spec.needs else "-"
56
+ kind = "batch" if spec.kind == "batch" else "stage"
57
+ print(f"{name}\t{kind}\tneeds={needs}")
58
+
59
+
60
+ def _print_plan(
61
+ pipeline: type[Pipeline],
62
+ *,
63
+ upto: str | None,
64
+ downstream: str | None,
65
+ ) -> None:
66
+ selected = selected_stages(pipeline, upto=upto, downstream=downstream)
67
+ print(" -> ".join(name for name in pipeline.topo_order() if name in selected))
68
+
69
+
70
+ def _print_status(
71
+ pipeline: type[Pipeline],
72
+ config,
73
+ args,
74
+ *,
75
+ upto: str | None,
76
+ downstream: str | None,
77
+ cli_out: Path | None,
78
+ branch: str,
79
+ is_temporary: bool,
80
+ ) -> None:
81
+ outcomes = evaluate_state(
82
+ pipeline,
83
+ config,
84
+ args=args,
85
+ upto=upto,
86
+ downstream=downstream,
87
+ cli_out=cli_out,
88
+ branch=branch,
89
+ is_temporary=is_temporary,
90
+ )
91
+ for outcome in outcomes:
92
+ print(f"{outcome.stage}\t{outcome.status}\t{outcome.reason}")
93
+
94
+
95
+ def _default_confirm(message: str) -> bool:
96
+ try:
97
+ answer = input(f"{message} [y/N] ").strip().lower()
98
+ except EOFError:
99
+ return False
100
+ return answer in {"y", "yes"}
101
+
102
+
103
+ def _selected_command_index(argv: list[str]) -> int | None:
104
+ index = 0
105
+ while index < len(argv):
106
+ token = argv[index]
107
+ if token in {"-v", "--verbose"}:
108
+ index += 1
109
+ continue
110
+ if token == "--":
111
+ next_index = index + 1
112
+ return next_index if next_index < len(argv) else None
113
+ if token.startswith("-"):
114
+ return None
115
+ return index
116
+ return None
117
+
118
+
119
+ def _option_name(token: str) -> str:
120
+ if token.startswith("--"):
121
+ return token.split("=", 1)[0]
122
+ return token
123
+
124
+
125
+ def _looks_like_option(token: str) -> bool:
126
+ return token.startswith("-") and token != "-" and _NEGATIVE_NUMBER_RE.match(token) is None
127
+
128
+
129
+ def _has_unknown_option_before_config_registration(
130
+ *,
131
+ command: str,
132
+ command_args: list[str],
133
+ args_type: type[BaseModel],
134
+ ) -> bool:
135
+ option_arities = argmap.args_option_arities(args_type)
136
+ option_arities.update(_COMMAND_OPTION_ARITIES[command])
137
+ # Let argparse handle help instead of failing the strict precheck.
138
+ option_arities.setdefault("--help", 0)
139
+ option_arities.setdefault("-h", 0)
140
+
141
+ index = 0
142
+ while index < len(command_args):
143
+ token = command_args[index]
144
+ if token == "--":
145
+ return False
146
+ if not token.startswith("-") or token == "-":
147
+ index += 1
148
+ continue
149
+ option = _option_name(token)
150
+ arity = option_arities.get(option)
151
+ if arity is None:
152
+ return True
153
+ index += 1
154
+ if arity == 1 and "=" not in token:
155
+ if index >= len(command_args) or _looks_like_option(command_args[index]):
156
+ return True
157
+ index += 1
158
+ return False
159
+
160
+
161
+ def main(pipeline: type[Pipeline], argv: list[str] | None = None) -> int:
162
+ raw_argv = list(argv) if argv is not None else sys.argv[1:]
163
+ selected_command_index = _selected_command_index(raw_argv)
164
+ selected_command = (
165
+ raw_argv[selected_command_index] if selected_command_index is not None else None
166
+ )
167
+ parser = argparse.ArgumentParser(prog=pipeline.__name__)
168
+ parser.add_argument("-v", "--verbose", action="store_true")
169
+ subparsers = parser.add_subparsers(dest="command", required=True)
170
+
171
+ out_help = (
172
+ "Override the output base. For named yaml branches this replaces that branch's "
173
+ "default base; for main and temporary branches it replaces the main base."
174
+ )
175
+
176
+ run_parser = subparsers.add_parser("run", help="run selected stages")
177
+ run_parser.add_argument("--branch", default="main", metavar="NAME", help="Select a branch.")
178
+ run_parser.add_argument(
179
+ "--override",
180
+ metavar="JSON",
181
+ help="Merge JSON over main Config and run a temporary branch.",
182
+ )
183
+ run_stage = run_parser.add_mutually_exclusive_group()
184
+ run_stage.add_argument("--upto", metavar="STAGE", help="Run STAGE and all upstream stages.")
185
+ run_stage.add_argument(
186
+ "--downstream", metavar="STAGE", help="Run STAGE and all downstream stages."
187
+ )
188
+ run_parser.add_argument(
189
+ "--force", "-f", action="store_true", help="Ignore cache for selected stages."
190
+ )
191
+ run_parser.add_argument("--out", type=Path, metavar="PATH", help=out_help)
192
+
193
+ status_parser = subparsers.add_parser("status", help="show read-only stage status")
194
+ status_parser.add_argument("--branch", default="main", metavar="NAME", help="Select a branch.")
195
+ status_stage = status_parser.add_mutually_exclusive_group()
196
+ status_stage.add_argument("--upto", metavar="STAGE", help="Show STAGE and all upstream stages.")
197
+ status_stage.add_argument(
198
+ "--downstream", metavar="STAGE", help="Show STAGE and all downstream stages."
199
+ )
200
+ status_parser.add_argument("--out", type=Path, metavar="PATH", help=out_help)
201
+
202
+ clean_parser = subparsers.add_parser("clean", help="delete selected store records and outputs")
203
+ clean_parser.add_argument("--branch", default="main", metavar="NAME", help="Select a branch.")
204
+ clean_parser.add_argument(
205
+ "--downstream",
206
+ metavar="STAGE",
207
+ help="Clean STAGE and all downstream stages.",
208
+ )
209
+ clean_parser.add_argument("--out", type=Path, metavar="PATH", help=out_help)
210
+ clean_parser.add_argument("--yes", "-y", action="store_true", help="Skip confirmation.")
211
+
212
+ plan_parser = subparsers.add_parser("plan", help="print selected stage order")
213
+ plan_stage = plan_parser.add_mutually_exclusive_group()
214
+ plan_stage.add_argument("--upto", metavar="STAGE", help="Print STAGE and all upstream stages.")
215
+ plan_stage.add_argument(
216
+ "--downstream", metavar="STAGE", help="Print STAGE and all downstream stages."
217
+ )
218
+
219
+ subparsers.add_parser("list")
220
+
221
+ if selected_command in _CONFIG_COMMANDS and selected_command_index is not None:
222
+ command_args = raw_argv[selected_command_index + 1 :]
223
+ if _has_unknown_option_before_config_registration(
224
+ command=selected_command,
225
+ command_args=command_args,
226
+ args_type=pipeline.Args,
227
+ ):
228
+ parser.error("unknown option or missing option value")
229
+
230
+ if selected_command == "run":
231
+ argmap.register_args(run_parser, pipeline.Args)
232
+ if selected_command == "status":
233
+ argmap.register_args(status_parser, pipeline.Args)
234
+ if selected_command == "clean":
235
+ argmap.register_args(clean_parser, pipeline.Args)
236
+
237
+ namespace = parser.parse_args(raw_argv)
238
+ configure_cli_logging(namespace.verbose)
239
+
240
+ if namespace.command == "list":
241
+ _print_list(pipeline)
242
+ return 0
243
+ if namespace.command == "plan":
244
+ _print_plan(pipeline, upto=namespace.upto, downstream=namespace.downstream)
245
+ return 0
246
+
247
+ resolved = resolve_branch(
248
+ pipeline,
249
+ branch=namespace.branch,
250
+ override_json=namespace.override if namespace.command == "run" else None,
251
+ cli_out=namespace.out,
252
+ allow_bare_output_root=namespace.command == "clean",
253
+ )
254
+ config = resolved.config
255
+ args = _args_from_namespace(pipeline, namespace)
256
+ if namespace.command == "status":
257
+ _print_status(
258
+ pipeline,
259
+ config,
260
+ args,
261
+ upto=namespace.upto,
262
+ downstream=namespace.downstream,
263
+ cli_out=resolved.output_base,
264
+ branch=resolved.branch,
265
+ is_temporary=resolved.is_temporary,
266
+ )
267
+ elif namespace.command == "clean":
268
+ allowed_roots = (
269
+ None if isinstance(config, SimpleNamespace) else pipeline.clean_roots(config)
270
+ )
271
+ clean(
272
+ pipeline,
273
+ config,
274
+ cli_out=resolved.output_base,
275
+ branch=resolved.branch,
276
+ is_temporary=resolved.is_temporary,
277
+ target=namespace.downstream,
278
+ yes=namespace.yes,
279
+ allowed_roots=allowed_roots,
280
+ confirm=_default_confirm,
281
+ )
282
+ elif namespace.command == "run":
283
+ outcomes = run(
284
+ pipeline,
285
+ config,
286
+ args=args,
287
+ upto=namespace.upto,
288
+ downstream=namespace.downstream,
289
+ force=namespace.force,
290
+ cli_out=resolved.output_base,
291
+ branch=resolved.branch,
292
+ is_temporary=resolved.is_temporary,
293
+ temporary_config=resolved.temporary_config,
294
+ )
295
+ for outcome in outcomes:
296
+ print(f"{outcome.stage}\t{outcome.status}\t{outcome.reason}")
297
+ return 0