softschema 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
softschema/__init__.py ADDED
@@ -0,0 +1,72 @@
1
+ """Soft schema conventions and validation tools for Markdown/YAML artifacts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from softschema.compile import SOFTSCHEMA_FORMAT_VERSION, CompileResult, compile_model
6
+ from softschema.generate import GeneratedSection, RegenerateResult, regenerate
7
+ from softschema.models import (
8
+ SoftschemaBinding,
9
+ SoftschemaMetadata,
10
+ SoftschemaProfile,
11
+ SoftschemaStage,
12
+ SoftschemaStatus,
13
+ SoftschemaWarning,
14
+ WarningCode,
15
+ parse_softschema_metadata,
16
+ )
17
+ from softschema.registry import SoftschemaRegistry
18
+ from softschema.schema_view import FieldInfo, SchemaView
19
+ from softschema.sfield import (
20
+ RepairKind,
21
+ SField,
22
+ SFieldMeta,
23
+ SoftOwner,
24
+ SoftTier,
25
+ )
26
+ from softschema.validate import (
27
+ ArtifactValidationResult,
28
+ SemanticResult,
29
+ StructuralResult,
30
+ ValidationResult,
31
+ ValueResolver,
32
+ validate,
33
+ validate_artifact,
34
+ validate_semantic,
35
+ validate_structural,
36
+ validate_values,
37
+ )
38
+
39
+ __all__ = [
40
+ "SOFTSCHEMA_FORMAT_VERSION",
41
+ "SoftschemaProfile",
42
+ "ArtifactValidationResult",
43
+ "CompileResult",
44
+ "SoftschemaMetadata",
45
+ "SoftschemaBinding",
46
+ "SoftschemaRegistry",
47
+ "SemanticResult",
48
+ "SoftschemaStatus",
49
+ "StructuralResult",
50
+ "SoftschemaStage",
51
+ "ValidationResult",
52
+ "SoftschemaWarning",
53
+ "FieldInfo",
54
+ "GeneratedSection",
55
+ "RegenerateResult",
56
+ "RepairKind",
57
+ "SchemaView",
58
+ "SField",
59
+ "SFieldMeta",
60
+ "SoftOwner",
61
+ "SoftTier",
62
+ "ValueResolver",
63
+ "WarningCode",
64
+ "compile_model",
65
+ "parse_softschema_metadata",
66
+ "regenerate",
67
+ "validate",
68
+ "validate_artifact",
69
+ "validate_semantic",
70
+ "validate_structural",
71
+ "validate_values",
72
+ ]
softschema/cli.py ADDED
@@ -0,0 +1,450 @@
1
+ """Command-line interface for softschema."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import importlib
7
+ import json
8
+ import sys
9
+ from dataclasses import asdict, dataclass, is_dataclass
10
+ from enum import Enum
11
+ from importlib import resources
12
+ from pathlib import Path
13
+ from typing import Any, cast
14
+
15
+ from frontmatter_format import fmf_read
16
+ from pydantic import BaseModel, ValidationError
17
+
18
+ from softschema.compile import compile_model
19
+ from softschema.generate import regenerate
20
+ from softschema.models import SoftschemaBinding, SoftschemaStatus, parse_softschema_metadata
21
+ from softschema.validate import validate_artifact
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class ResourceTopic:
26
+ title: str
27
+ path: str
28
+ summary: str
29
+
30
+
31
+ DOC_TOPICS: dict[str, ResourceTopic] = {
32
+ "readme": ResourceTopic("README", "README.md", "Short first-visitor overview."),
33
+ "guide": ResourceTopic(
34
+ "Softschema Guide",
35
+ "docs/softschema-guide.md",
36
+ "Concepts, mental model, and adoption path.",
37
+ ),
38
+ "spec": ResourceTopic(
39
+ "Softschema Spec",
40
+ "docs/softschema-spec.md",
41
+ "Language-neutral artifact format.",
42
+ ),
43
+ "python-design": ResourceTopic(
44
+ "Python Package Design",
45
+ "docs/softschema-python-design.md",
46
+ "Python package design decisions.",
47
+ ),
48
+ "development": ResourceTopic(
49
+ "Development",
50
+ "docs/development.md",
51
+ "Local development workflow.",
52
+ ),
53
+ "installation": ResourceTopic(
54
+ "Installation",
55
+ "docs/installation.md",
56
+ "Installing uv and Python.",
57
+ ),
58
+ "publishing": ResourceTopic(
59
+ "Publishing",
60
+ "docs/publishing.md",
61
+ "Release and PyPI workflow.",
62
+ ),
63
+ "example": ResourceTopic(
64
+ "Movie Page Example",
65
+ "examples/movie_page/README.md",
66
+ "Copyable example overview.",
67
+ ),
68
+ "example-artifact": ResourceTopic(
69
+ "Movie Page Artifact",
70
+ "examples/movie_page/spirited-away.md",
71
+ "Copyable Markdown/YAML artifact.",
72
+ ),
73
+ "example-model": ResourceTopic(
74
+ "Movie Page Model",
75
+ "examples/movie_page/model.py",
76
+ "Pydantic model used by the example.",
77
+ ),
78
+ "example-host": ResourceTopic(
79
+ "Movie Page Host Integration",
80
+ "examples/movie_page/host_integration.py",
81
+ "Host registry and validation helper.",
82
+ ),
83
+ "skill": ResourceTopic(
84
+ "Softschema Skill",
85
+ "skills/softschema/SKILL.md",
86
+ "Portable agent skill instructions.",
87
+ ),
88
+ "agents": ResourceTopic(
89
+ "Agent Instructions",
90
+ "AGENTS.md",
91
+ "Repo-level agent instructions.",
92
+ ),
93
+ }
94
+
95
+
96
+ def main(argv: list[str] | None = None) -> int:
97
+ parser = argparse.ArgumentParser(
98
+ prog="softschema",
99
+ description="Validate and explain soft schema Markdown/YAML artifacts.",
100
+ epilog=(
101
+ "IMPORTANT for agents: run `softschema skill --brief` for operating "
102
+ "rules, then `softschema docs --list` to discover bundled docs "
103
+ "(`guide`, `spec`, and `example-artifact` are the key ones)."
104
+ ),
105
+ )
106
+ subparsers = parser.add_subparsers(dest="command", required=True)
107
+
108
+ validate_parser = subparsers.add_parser("validate", help="Validate an artifact.")
109
+ validate_parser.add_argument("path", type=Path)
110
+ validate_parser.add_argument("--contract", help="Override the document contract ID.")
111
+ validate_parser.add_argument("--envelope", help="Override the inferred envelope key.")
112
+ validate_parser.add_argument(
113
+ "--model",
114
+ help="Pydantic model as module:Class. Required unless --schema is provided.",
115
+ )
116
+ validate_parser.add_argument(
117
+ "--schema",
118
+ type=Path,
119
+ help="JSON Schema YAML sidecar. Required unless --model is provided.",
120
+ )
121
+ validate_parser.add_argument(
122
+ "--status",
123
+ choices=[status.value for status in SoftschemaStatus],
124
+ help="Override the document status.",
125
+ )
126
+ validate_parser.set_defaults(func=_validate_cmd)
127
+
128
+ compile_parser = subparsers.add_parser("compile", help="Compile a Pydantic model.")
129
+ compile_parser.add_argument("model", help="Pydantic model as module:Class.")
130
+ compile_parser.add_argument("--out", required=True, type=Path)
131
+ compile_parser.add_argument("--contract")
132
+ compile_parser.add_argument("--check", action="store_true")
133
+ compile_parser.set_defaults(func=_compile_cmd)
134
+
135
+ inspect_parser = subparsers.add_parser("inspect", help="Inspect artifact metadata.")
136
+ inspect_parser.add_argument("path", type=Path)
137
+ inspect_parser.set_defaults(func=_inspect_cmd)
138
+
139
+ docs_parser = subparsers.add_parser("docs", help="Print bundled docs and examples.")
140
+ docs_parser.add_argument("topic", nargs="?", choices=sorted(DOC_TOPICS))
141
+ docs_parser.add_argument(
142
+ "--list",
143
+ dest="list_topics",
144
+ action="store_true",
145
+ help="List bundled documentation topics.",
146
+ )
147
+ docs_parser.add_argument(
148
+ "--json",
149
+ action="store_true",
150
+ help="Emit topic metadata, and document content when a topic is selected, as JSON.",
151
+ )
152
+ docs_parser.set_defaults(func=_docs_cmd)
153
+
154
+ generate_parser = subparsers.add_parser(
155
+ "generate",
156
+ help="Regenerate `softschema:generated` Markdown sections from schemas.",
157
+ )
158
+ generate_parser.add_argument(
159
+ "paths",
160
+ nargs="+",
161
+ type=Path,
162
+ help="Markdown files containing softschema:generated markers.",
163
+ )
164
+ generate_parser.add_argument(
165
+ "--check",
166
+ action="store_true",
167
+ help="Do not write; exit 1 if any section is stale.",
168
+ )
169
+ generate_parser.set_defaults(func=_generate_cmd)
170
+
171
+ skill_parser = subparsers.add_parser("skill", help="Print agent-facing guidance.")
172
+ skill_parser.add_argument(
173
+ "--brief",
174
+ action="store_true",
175
+ help="Print compact skill guidance for constrained contexts.",
176
+ )
177
+ skill_parser.set_defaults(func=_skill_cmd)
178
+
179
+ args = parser.parse_args(argv)
180
+ return args.func(args)
181
+
182
+
183
+ def _validate_cmd(args: argparse.Namespace) -> int:
184
+ try:
185
+ if args.model is None and args.schema is None:
186
+ raise ValueError("missing validation implementation; pass --model, --schema, or both")
187
+ contract_id, status, envelope_key = _infer_validation_binding(args)
188
+ model = _load_model(args.model) if args.model else None
189
+ except (TypeError, ValueError, ValidationError) as exc:
190
+ print(f"softschema validate: {exc}", file=sys.stderr)
191
+ return 2
192
+ binding = SoftschemaBinding(
193
+ contract_id=contract_id,
194
+ model=model,
195
+ envelope_key=envelope_key,
196
+ schema_path=args.schema,
197
+ status=status,
198
+ )
199
+ result = validate_artifact(args.path, binding=binding)
200
+ print(_json(result))
201
+ return 0 if result.ok else 1
202
+
203
+
204
+ def _infer_validation_binding(args: argparse.Namespace) -> tuple[str, SoftschemaStatus, str | None]:
205
+ _content, frontmatter = fmf_read(args.path)
206
+ if not isinstance(frontmatter, dict):
207
+ if args.contract is None:
208
+ raise ValueError("missing --contract because the document has no YAML frontmatter")
209
+ return args.contract, _status_from_args(args, None), args.envelope
210
+
211
+ metadata = parse_softschema_metadata(frontmatter.get("softschema"))
212
+ contract_id = args.contract or (metadata.contract_id if metadata is not None else None)
213
+ if contract_id is None:
214
+ raise ValueError("missing --contract because the document has no softschema.contract")
215
+
216
+ return contract_id, _status_from_args(args, metadata), _envelope_from_args(args, frontmatter)
217
+
218
+
219
+ def _status_from_args(args: argparse.Namespace, metadata: Any) -> SoftschemaStatus:
220
+ if args.status is not None:
221
+ return SoftschemaStatus(args.status)
222
+ if metadata is not None and metadata.status is not None:
223
+ return metadata.status
224
+ return SoftschemaStatus.soft
225
+
226
+
227
+ def _envelope_from_args(args: argparse.Namespace, frontmatter: dict[str, Any]) -> str | None:
228
+ if args.envelope is not None:
229
+ return args.envelope
230
+ envelope_keys = [str(key) for key in frontmatter if key != "softschema"]
231
+ if len(envelope_keys) == 1:
232
+ return envelope_keys[0]
233
+ if not envelope_keys:
234
+ return None
235
+ raise ValueError(
236
+ "missing --envelope because the document has multiple non-softschema "
237
+ f"frontmatter keys: {', '.join(envelope_keys)}"
238
+ )
239
+
240
+
241
+ def _compile_cmd(args: argparse.Namespace) -> int:
242
+ try:
243
+ model = _load_model(args.model)
244
+ except (TypeError, ValueError) as exc:
245
+ print(f"softschema compile: {exc}", file=sys.stderr)
246
+ return 2
247
+ result = compile_model(model, args.out, contract_id=args.contract, check_only=args.check)
248
+ print(_json(result))
249
+ return 1 if result.drift else 0
250
+
251
+
252
+ def _inspect_cmd(args: argparse.Namespace) -> int:
253
+ _content, frontmatter = fmf_read(args.path)
254
+ metadata = None
255
+ envelope_keys: list[str] = []
256
+ if isinstance(frontmatter, dict):
257
+ metadata = parse_softschema_metadata(frontmatter.get("softschema"))
258
+ envelope_keys = [str(key) for key in frontmatter if key != "softschema"]
259
+ print(
260
+ _json(
261
+ {
262
+ "path": args.path,
263
+ "has_frontmatter": frontmatter is not None,
264
+ "metadata": metadata,
265
+ "envelope_keys": envelope_keys,
266
+ }
267
+ )
268
+ )
269
+ return 0
270
+
271
+
272
+ def _docs_cmd(args: argparse.Namespace) -> int:
273
+ if args.list_topics or args.topic is None:
274
+ if args.json:
275
+ print(_json(_docs_listing_payload()))
276
+ return 0
277
+ _write_text(_docs_listing())
278
+ return 0
279
+ if args.json:
280
+ topic = DOC_TOPICS[args.topic]
281
+ print(
282
+ _json(
283
+ {
284
+ "name": args.topic,
285
+ "title": topic.title,
286
+ "path": topic.path,
287
+ "summary": topic.summary,
288
+ "content": _read_resource(topic.path),
289
+ }
290
+ )
291
+ )
292
+ return 0
293
+ _write_text(_read_resource(DOC_TOPICS[args.topic].path))
294
+ return 0
295
+
296
+
297
+ def _generate_cmd(args: argparse.Namespace) -> int:
298
+ any_drift = False
299
+ summary: list[dict[str, Any]] = []
300
+ for path in args.paths:
301
+ try:
302
+ result = regenerate(path, check=args.check)
303
+ except (OSError, ValueError) as exc:
304
+ print(f"error: {path}: {exc}", file=sys.stderr)
305
+ return 1
306
+ any_drift = any_drift or result.drift
307
+ summary.append(
308
+ {
309
+ "path": str(path),
310
+ "sections": result.sections,
311
+ "drift": result.drift,
312
+ "drift_details": result.drift_details,
313
+ }
314
+ )
315
+ _write_text(_json({"check": args.check, "drift": any_drift, "files": summary}))
316
+ if args.check and any_drift:
317
+ return 1
318
+ return 0
319
+
320
+
321
+ def _skill_cmd(args: argparse.Namespace) -> int:
322
+ if args.brief:
323
+ _write_text(_brief_skill_text())
324
+ return 0
325
+ _write_text(_read_resource(DOC_TOPICS["skill"].path))
326
+ return 0
327
+
328
+
329
+ def _load_model(spec: str) -> type[BaseModel]:
330
+ module_name, _, attr = spec.partition(":")
331
+ if not module_name or not attr:
332
+ raise ValueError(f"model spec must be module:Class, got {spec!r}")
333
+ # Make the invoking directory importable so example modules outside the package
334
+ # (e.g. examples.movie_page.model) resolve when running the CLI from a checkout.
335
+ cwd = str(Path.cwd())
336
+ if cwd not in sys.path:
337
+ sys.path.insert(0, cwd)
338
+ module = importlib.import_module(module_name)
339
+ obj = getattr(module, attr, None)
340
+ if obj is None:
341
+ raise ValueError(f"{spec!r} has no attribute {attr!r}")
342
+ if not isinstance(obj, type) or not issubclass(obj, BaseModel):
343
+ raise TypeError(f"{spec!r} is not a Pydantic BaseModel class")
344
+ return obj
345
+
346
+
347
+ def _docs_listing() -> str:
348
+ lines = [
349
+ "Available softschema docs:",
350
+ "",
351
+ ]
352
+ width = max(len(name) for name in DOC_TOPICS)
353
+ for name, topic in sorted(DOC_TOPICS.items()):
354
+ lines.append(f" {name.ljust(width)} {topic.summary}")
355
+ lines.extend(
356
+ [
357
+ "",
358
+ "Run `softschema docs <topic>` to print a document.",
359
+ "Copy examples from the printed docs or from the repository files; "
360
+ "the CLI does not scaffold or mutate projects.",
361
+ ]
362
+ )
363
+ return "\n".join(lines)
364
+
365
+
366
+ def _docs_listing_payload() -> dict[str, Any]:
367
+ return {
368
+ "topics": [
369
+ {
370
+ "name": name,
371
+ "title": topic.title,
372
+ "path": topic.path,
373
+ "summary": topic.summary,
374
+ }
375
+ for name, topic in sorted(DOC_TOPICS.items())
376
+ ],
377
+ "copyable_examples": ["example", "example-artifact", "example-model", "example-host"],
378
+ "scaffolding": False,
379
+ }
380
+
381
+
382
+ def _brief_skill_text() -> str:
383
+ return """# Softschema Skill Brief
384
+
385
+ Use soft schemas when humans or agents write Markdown/YAML artifacts and tools need to
386
+ consume some values reliably.
387
+
388
+ - Read `softschema docs guide` for the mental model.
389
+ - Read `softschema docs spec` for the exact artifact format.
390
+ - Inspect `softschema docs example` and `softschema docs example-artifact` for the
391
+ copyable movie example.
392
+ - Treat YAML/frontmatter as authoritative.
393
+ - Do not parse Markdown body prose or tables for consumed values.
394
+ - Use `softschema.contract` to name the payload contract.
395
+ - Keep examples copyable; do not scaffold or mutate a target project unless the user
396
+ explicitly asks for that workflow.
397
+ """
398
+
399
+
400
+ def _read_resource(relative_path: str) -> str:
401
+ resource_path = resources.files("softschema").joinpath("resources", *Path(relative_path).parts)
402
+ try:
403
+ return resource_path.read_text(encoding="utf-8")
404
+ except FileNotFoundError:
405
+ pass
406
+
407
+ dev_path = _dev_repo_root() / relative_path
408
+ if dev_path.is_file():
409
+ return dev_path.read_text(encoding="utf-8")
410
+
411
+ raise FileNotFoundError(f"bundled softschema resource not found: {relative_path}")
412
+
413
+
414
+ def _dev_repo_root() -> Path:
415
+ for parent in Path(__file__).resolve().parents:
416
+ if (parent / "pyproject.toml").is_file() and (parent / "docs").is_dir():
417
+ return parent
418
+ return Path(__file__).resolve().parents[4]
419
+
420
+
421
+ def _write_text(text: str) -> None:
422
+ sys.stdout.write(text)
423
+ if not text.endswith("\n"):
424
+ sys.stdout.write("\n")
425
+
426
+
427
+ def _json(value: Any) -> str:
428
+ return json.dumps(_plain(value), indent=2, sort_keys=True)
429
+
430
+
431
+ def _plain(value: Any) -> Any:
432
+ if isinstance(value, Path):
433
+ return str(value)
434
+ if isinstance(value, Enum):
435
+ return value.value
436
+ if isinstance(value, BaseModel):
437
+ return _plain(value.model_dump(by_alias=True))
438
+ if is_dataclass(value) and not isinstance(value, type):
439
+ return _plain(asdict(cast(Any, value)))
440
+ if isinstance(value, dict):
441
+ return {str(k): _plain(v) for k, v in value.items()}
442
+ if isinstance(value, list | tuple):
443
+ return [_plain(item) for item in value]
444
+ if isinstance(value, type):
445
+ return f"{value.__module__}:{value.__name__}"
446
+ return value
447
+
448
+
449
+ if __name__ == "__main__":
450
+ sys.exit(main())
softschema/compile.py ADDED
@@ -0,0 +1,120 @@
1
+ """Pydantic to JSON Schema YAML sidecar emitter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ import tempfile
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import yaml
13
+ from pydantic import BaseModel
14
+
15
+ # Version of the `x-softschema` block format emitted into compiled sidecars,
16
+ # not the installed package version (use `importlib.metadata.version("softschema")`
17
+ # for that). Bump this only when the shape of `x-softschema` itself changes.
18
+ SOFTSCHEMA_FORMAT_VERSION = "0.1.0"
19
+ JSON_SCHEMA_DRAFT = "https://json-schema.org/draft/2020-12/schema"
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class CompileResult:
24
+ """Outcome of a compile pass."""
25
+
26
+ out_path: Path
27
+ schema_yaml: str
28
+ drift: bool = False
29
+ drift_diff: str | None = None
30
+ schema_sha256: str | None = None
31
+
32
+
33
+ def compile_model(
34
+ model_cls: type[BaseModel],
35
+ out_path: Path,
36
+ *,
37
+ contract_id: str | None = None,
38
+ check_only: bool = False,
39
+ ) -> CompileResult:
40
+ """Compile ``model_cls`` to a JSON Schema YAML sidecar at ``out_path``."""
41
+ schema = _augment_schema(model_cls.model_json_schema(), model_cls, contract_id)
42
+ schema_sha256 = _schema_sha256(schema)
43
+ schema.setdefault("x-softschema", {})["schema_sha256"] = schema_sha256
44
+ rendered = _yaml_dump(schema)
45
+
46
+ if check_only:
47
+ if not out_path.is_file():
48
+ return CompileResult(
49
+ out_path=out_path,
50
+ schema_yaml=rendered,
51
+ drift=True,
52
+ drift_diff=f"missing committed schema sidecar at {out_path}",
53
+ schema_sha256=schema_sha256,
54
+ )
55
+ existing = out_path.read_text()
56
+ if existing.strip() == rendered.strip():
57
+ return CompileResult(
58
+ out_path=out_path,
59
+ schema_yaml=rendered,
60
+ drift=False,
61
+ schema_sha256=schema_sha256,
62
+ )
63
+ return CompileResult(
64
+ out_path=out_path,
65
+ schema_yaml=rendered,
66
+ drift=True,
67
+ drift_diff=f"committed schema at {out_path} differs from compile output",
68
+ schema_sha256=schema_sha256,
69
+ )
70
+
71
+ _write_atomic(out_path, rendered)
72
+ return CompileResult(
73
+ out_path=out_path,
74
+ schema_yaml=rendered,
75
+ drift=False,
76
+ schema_sha256=schema_sha256,
77
+ )
78
+
79
+
80
+ def _augment_schema(
81
+ schema: dict[str, Any],
82
+ model_cls: type[BaseModel],
83
+ contract_id: str | None,
84
+ ) -> dict[str, Any]:
85
+ out = dict(schema)
86
+ out.setdefault("$schema", JSON_SCHEMA_DRAFT)
87
+ if contract_id is not None:
88
+ out.setdefault("$id", contract_id)
89
+ out.setdefault("x-softschema", {})
90
+ out["x-softschema"].update(
91
+ {
92
+ "contract": contract_id,
93
+ "generated_from": f"{model_cls.__module__}:{model_cls.__name__}",
94
+ "softschema_format_version": SOFTSCHEMA_FORMAT_VERSION,
95
+ }
96
+ )
97
+ return out
98
+
99
+
100
+ def _schema_sha256(schema: dict[str, Any]) -> str:
101
+ canonical = json.dumps(schema, sort_keys=True, separators=(",", ":"), default=str)
102
+ return hashlib.sha256(canonical.encode("utf-8")).hexdigest()
103
+
104
+
105
+ def _yaml_dump(schema: dict[str, Any]) -> str:
106
+ return yaml.safe_dump(schema, sort_keys=False, default_flow_style=False, allow_unicode=True)
107
+
108
+
109
+ def _write_atomic(path: Path, text: str) -> None:
110
+ path.parent.mkdir(parents=True, exist_ok=True)
111
+ with tempfile.NamedTemporaryFile(
112
+ "w",
113
+ encoding="utf-8",
114
+ delete=False,
115
+ dir=path.parent,
116
+ prefix=f".{path.name}.",
117
+ ) as tmp:
118
+ tmp.write(text)
119
+ tmp_path = Path(tmp.name)
120
+ tmp_path.replace(path)