trustrender 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,607 @@
1
+ """TrustRender: fast, code-first PDF generation from structured data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+
10
+ from .engine import CompileBackend, compile_typst, compile_typst_file
11
+ from .errors import ErrorCode, TrustRenderError
12
+
13
+
14
+ @dataclass
15
+ class RenderResult:
16
+ """Internal result from the render pipeline. Not part of the public API."""
17
+
18
+ pdf_bytes: bytes
19
+ trace_id: str | None = None
20
+
21
+ # Re-export for public API
22
+ from .errors import ErrorCode as ErrorCode # noqa: F811
23
+ from .templates import render_template
24
+
25
+ __version__ = "0.1.0"
26
+
27
+ __all__ = [
28
+ "render", "audit", "AuditResult",
29
+ "TrustRenderError", "ErrorCode", "__version__", "bundled_font_dir",
30
+ ]
31
+
32
+
33
+ # Resolved once at import time — deterministic across local, test, and container.
34
+ # Check multiple locations: env var, installed package, dev layout.
35
+ def _find_bundled_fonts() -> Path | None:
36
+ """Find bundled font directory. Checked once at import time."""
37
+ # 1. Environment variable (explicit override, used in containers)
38
+ env_path = os.environ.get("TRUSTRENDER_FONT_PATH")
39
+ if env_path:
40
+ p = Path(env_path)
41
+ if p.is_dir():
42
+ return p.resolve()
43
+
44
+ # 2. Installed package: fonts/ inside the trustrender package directory
45
+ pkg_path = Path(__file__).resolve().parent / "fonts"
46
+ if pkg_path.is_dir():
47
+ return pkg_path
48
+
49
+ # 3. Development layout: src/trustrender/__init__.py -> ../../fonts/
50
+ dev_path = Path(__file__).resolve().parent.parent.parent / "fonts"
51
+ if dev_path.is_dir():
52
+ return dev_path
53
+
54
+ return None
55
+
56
+
57
+ _BUNDLED_FONT_DIR = _find_bundled_fonts()
58
+
59
+
60
+ def bundled_font_dir() -> Path | None:
61
+ """Return the path to the bundled font directory, or None if not found."""
62
+ return _BUNDLED_FONT_DIR
63
+
64
+
65
+ def _build_font_paths(
66
+ font_paths: list[str | os.PathLike] | None,
67
+ ) -> list[str] | None:
68
+ """Build the final font_paths list.
69
+
70
+ Font precedence:
71
+ 1. Explicit font_paths passed by caller
72
+ 2. Bundled font directory (``fonts/`` in the package)
73
+ 3. System fonts (Typst default behavior — always included)
74
+
75
+ Caller paths extend the bundled defaults.
76
+ """
77
+ result: list[str] = []
78
+ if font_paths:
79
+ result.extend(str(p) for p in font_paths)
80
+ bundled = bundled_font_dir()
81
+ if bundled:
82
+ bundled_str = str(bundled)
83
+ if bundled_str not in result:
84
+ result.append(bundled_str)
85
+ return result or None
86
+
87
+
88
+ def _render_document_pipeline(
89
+ template_path: Path,
90
+ data: dict,
91
+ *,
92
+ debug: bool = False,
93
+ font_paths: list[str] | None = None,
94
+ validate: bool = True,
95
+ zugferd: str | None = None,
96
+ provenance: bool = False,
97
+ backend: CompileBackend | None = None,
98
+ timeout: float | None = None,
99
+ display_name: str | None = None,
100
+ ) -> RenderResult:
101
+ """Shared render pipeline: validate, preprocess, compile, post-process.
102
+
103
+ All inputs must be pre-resolved (paths exist, data is a dict, fonts are
104
+ string paths). This is the single source of truth for render semantics —
105
+ both ``render()`` and the server call this.
106
+
107
+ Pipeline stages (in order):
108
+ 1. ZUGFeRD data validation (EN 16931 requirements)
109
+ 2. Contract validation (opt-in, Jinja2 templates only)
110
+ 3. Template preprocessing (Jinja2) + Typst compilation
111
+ 4. ZUGFeRD post-processing (XML build + PDF embed)
112
+ 5. Provenance embedding (after all other processing)
113
+
114
+ The ordering of stages 4 and 5 is load-bearing: provenance uses
115
+ clone_from to preserve ZUGFeRD metadata and embedded files.
116
+
117
+ If ``TRUSTRENDER_HISTORY`` is set, a stage-by-stage RenderTrace is
118
+ recorded to the trace store after each render (success or failure).
119
+ """
120
+ import hashlib
121
+ import time
122
+
123
+ from .trace import RenderTrace, StageTrace, get_store
124
+
125
+ is_jinja = template_path.name.endswith(".j2.typ")
126
+ trace = RenderTrace(
127
+ template_name=display_name or template_path.name,
128
+ template_hash=f"sha256:{hashlib.sha256(template_path.read_bytes()).hexdigest()[:16]}",
129
+ data_hash=f"sha256:{hashlib.sha256(json.dumps(data, sort_keys=True, separators=(',',':')).encode()).hexdigest()[:16]}",
130
+ engine_version=__version__,
131
+ zugferd_profile=zugferd or "",
132
+ validated=validate,
133
+ )
134
+ pipeline_start = time.monotonic()
135
+
136
+ def _record_trace(outcome: str, pdf_size: int = 0, error: TrustRenderError | None = None) -> None:
137
+ trace.outcome = outcome
138
+ trace.pdf_size = pdf_size
139
+ trace.total_ms = int((time.monotonic() - pipeline_start) * 1000)
140
+ if error:
141
+ trace.error_code = error.code.value
142
+ trace.error_stage = error.stage
143
+ trace.error_message = str(error).split("\n")[0]
144
+ store = get_store()
145
+ if store:
146
+ try:
147
+ store.record(trace)
148
+ except Exception:
149
+ pass # Lineage is observability, not a gate
150
+
151
+ try:
152
+ # 1. ZUGFeRD invoice data validation
153
+ if zugferd:
154
+ from .zugferd import validate_zugferd_invoice_data
155
+
156
+ t0 = time.monotonic()
157
+ errors = validate_zugferd_invoice_data(data, profile=zugferd)
158
+ stage = StageTrace(
159
+ stage="zugferd_validation",
160
+ status="fail" if errors else "pass",
161
+ duration_ms=int((time.monotonic() - t0) * 1000),
162
+ checks_run=len(errors) + (1 if not errors else 0),
163
+ checks_passed=0 if errors else 1,
164
+ checks_failed=len(errors),
165
+ errors=[{"path": e.path, "message": e.message} for e in errors],
166
+ metadata={"profile": zugferd},
167
+ )
168
+ trace.stages.append(stage)
169
+ if errors:
170
+ detail = "\n".join(f" {e.path}: {e.message}" for e in errors)
171
+ exc = TrustRenderError(
172
+ f"Invoice data does not satisfy EN 16931: {len(errors)} error(s)",
173
+ code=ErrorCode.ZUGFERD_ERROR,
174
+ stage="zugferd_validation",
175
+ detail=detail,
176
+ template_path=str(template_path),
177
+ validation_errors=errors,
178
+ )
179
+ _record_trace("error", error=exc)
180
+ raise exc
181
+
182
+ # 2. Contract validation (opt-in, Jinja2 templates only)
183
+ if validate and is_jinja:
184
+ from .contract import (
185
+ format_contract_detail,
186
+ format_contract_errors,
187
+ infer_contract,
188
+ validate_data,
189
+ )
190
+
191
+ t0 = time.monotonic()
192
+ contract = infer_contract(template_path)
193
+ validation_errors = validate_data(contract, data)
194
+ stage = StageTrace(
195
+ stage="contract_validation",
196
+ status="fail" if validation_errors else "pass",
197
+ duration_ms=int((time.monotonic() - t0) * 1000),
198
+ checks_run=len(contract),
199
+ checks_passed=len(contract) - len(validation_errors),
200
+ checks_failed=len(validation_errors),
201
+ errors=[{"path": e.path, "message": e.message} for e in validation_errors],
202
+ )
203
+ trace.stages.append(stage)
204
+ if validation_errors:
205
+ exc = TrustRenderError(
206
+ format_contract_errors(validation_errors, template_path.name),
207
+ code=ErrorCode.DATA_CONTRACT,
208
+ stage="data_validation",
209
+ template_path=str(template_path),
210
+ detail=format_contract_detail(validation_errors, contract),
211
+ )
212
+ _record_trace("error", error=exc)
213
+ raise exc
214
+
215
+ # 3. Template preprocessing + compilation
216
+ t0 = time.monotonic()
217
+ pdf_standards = ["a-3b"] if zugferd else None
218
+ if is_jinja:
219
+ rendered = render_template(template_path, data)
220
+ pdf_bytes = compile_typst(
221
+ rendered,
222
+ template_path.parent,
223
+ debug=debug,
224
+ font_paths=font_paths,
225
+ template_path=template_path,
226
+ backend=backend,
227
+ timeout=timeout,
228
+ pdf_standards=pdf_standards,
229
+ )
230
+ else:
231
+ pdf_bytes = compile_typst_file(
232
+ template_path,
233
+ font_paths=font_paths,
234
+ backend=backend,
235
+ timeout=timeout,
236
+ pdf_standards=pdf_standards,
237
+ )
238
+ trace.stages.append(StageTrace(
239
+ stage="compilation",
240
+ status="pass",
241
+ duration_ms=int((time.monotonic() - t0) * 1000),
242
+ metadata={
243
+ "template_type": "jinja2" if is_jinja else "raw",
244
+ "pdf_standards": pdf_standards or [],
245
+ "pdf_size": len(pdf_bytes),
246
+ },
247
+ ))
248
+
249
+ # 4. ZUGFeRD post-processing
250
+ if zugferd:
251
+ from .zugferd import apply_zugferd, build_invoice_xml
252
+
253
+ t0 = time.monotonic()
254
+ try:
255
+ xml_bytes = build_invoice_xml(data, profile=zugferd)
256
+
257
+ # XSD + Schematron validation: guard rail before embedding XML
258
+ from .zugferd import validate_zugferd_xml
259
+
260
+ xml_errors = validate_zugferd_xml(xml_bytes)
261
+ if xml_errors:
262
+ raise TrustRenderError(
263
+ f"Generated XML failed validation: {xml_errors[0]}",
264
+ code=ErrorCode.ZUGFERD_ERROR,
265
+ stage="zugferd",
266
+ detail="; ".join(xml_errors),
267
+ template_path=str(template_path),
268
+ )
269
+
270
+ pdf_bytes = apply_zugferd(pdf_bytes, xml_bytes)
271
+ trace.stages.append(StageTrace(
272
+ stage="zugferd_postprocess",
273
+ status="pass",
274
+ duration_ms=int((time.monotonic() - t0) * 1000),
275
+ metadata={"xml_size": len(xml_bytes), "profile": zugferd},
276
+ ))
277
+ except TrustRenderError:
278
+ raise
279
+ except Exception as exc:
280
+ raise TrustRenderError(
281
+ f"ZUGFeRD generation failed: {exc}",
282
+ code=ErrorCode.ZUGFERD_ERROR,
283
+ stage="zugferd",
284
+ detail=str(exc),
285
+ template_path=str(template_path),
286
+ ) from exc
287
+
288
+ # 5. Provenance (AFTER ZUGFeRD — uses clone_from to preserve metadata)
289
+ if provenance:
290
+ from .provenance import create_provenance, embed_provenance
291
+
292
+ t0 = time.monotonic()
293
+ prov_record = create_provenance(template_path, data)
294
+ pdf_bytes = embed_provenance(pdf_bytes, prov_record)
295
+ trace.stages.append(StageTrace(
296
+ stage="provenance",
297
+ status="pass",
298
+ duration_ms=int((time.monotonic() - t0) * 1000),
299
+ metadata={"proof_hash": prov_record.proof[:30]},
300
+ ))
301
+ trace.provenance_hash = prov_record.proof
302
+
303
+ # Output fingerprint: hash the final PDF bytes after all post-processing
304
+ trace.output_hash = f"sha256:{hashlib.sha256(pdf_bytes).hexdigest()[:16]}"
305
+
306
+ _record_trace("success", pdf_size=len(pdf_bytes))
307
+ return RenderResult(pdf_bytes=pdf_bytes, trace_id=trace.id)
308
+
309
+ except TrustRenderError as exc:
310
+ if not trace.outcome: # Not already recorded by a stage
311
+ _record_trace("error", error=exc)
312
+ raise
313
+
314
+
315
+ def render(
316
+ template: str | os.PathLike,
317
+ data: dict | str | os.PathLike,
318
+ *,
319
+ output: str | os.PathLike | None = None,
320
+ debug: bool = False,
321
+ font_paths: list[str | os.PathLike] | None = None,
322
+ validate: bool = True,
323
+ zugferd: str | None = None,
324
+ provenance: bool = False,
325
+ ) -> bytes:
326
+ """Render a PDF from a template and data.
327
+
328
+ Args:
329
+ template: Path to a template file.
330
+ - ``.j2.typ``: Jinja2 template preprocessed then compiled with Typst.
331
+ - ``.typ``: Raw Typst file compiled directly.
332
+ data: Template data as a dict, a JSON string, or a path to a ``.json`` file.
333
+ output: If provided, write the PDF to this path.
334
+ debug: If True, preserve the intermediate ``.typ`` file after rendering.
335
+ On error, the intermediate file is always preserved regardless of this flag.
336
+ font_paths: Additional font directories. These are prepended to the
337
+ bundled font directory.
338
+ validate: If True, validate data against the template's inferred
339
+ structural contract before rendering. Raises ``TrustRenderError``
340
+ with code ``DATA_CONTRACT`` if validation fails.
341
+ zugferd: If set to ``"en16931"``, generate a ZUGFeRD-compliant
342
+ PDF/A-3b with embedded CII XML. Validates invoice data against
343
+ EN 16931 requirements before generation.
344
+ provenance: If True, embed a cryptographic generation proof in the
345
+ PDF metadata. Records template hash, data hash, engine version,
346
+ and timestamp. Use ``verify_provenance()`` to verify later.
347
+
348
+ Returns:
349
+ PDF file contents as bytes.
350
+
351
+ Raises:
352
+ TrustRenderError: If rendering fails. Check ``code`` for the error category,
353
+ ``stage`` for where it failed, and ``detail`` for the full diagnostic.
354
+ FileNotFoundError: If the template or data file does not exist.
355
+ """
356
+ _SUPPORTED_ZUGFERD = {"en16931"}
357
+ if zugferd is not None and zugferd not in _SUPPORTED_ZUGFERD:
358
+ raise TrustRenderError(
359
+ f"Unsupported zugferd profile: '{zugferd}'. Supported: {sorted(_SUPPORTED_ZUGFERD)}",
360
+ code=ErrorCode.INVALID_DATA,
361
+ stage="data_resolution",
362
+ )
363
+
364
+ template_path = Path(template)
365
+ if not template_path.exists():
366
+ raise TrustRenderError(
367
+ f"Template not found: {template_path}",
368
+ code=ErrorCode.TEMPLATE_NOT_FOUND,
369
+ stage="data_resolution",
370
+ template_path=str(template_path),
371
+ )
372
+
373
+ data_dict = _resolve_data(data)
374
+ resolved_fonts = _build_font_paths(font_paths)
375
+
376
+ result = _render_document_pipeline(
377
+ template_path,
378
+ data_dict,
379
+ debug=debug,
380
+ font_paths=resolved_fonts,
381
+ validate=validate,
382
+ zugferd=zugferd,
383
+ provenance=provenance,
384
+ )
385
+
386
+ if output is not None:
387
+ output_path = Path(output)
388
+ output_path.parent.mkdir(parents=True, exist_ok=True)
389
+ output_path.write_bytes(result.pdf_bytes)
390
+
391
+ return result.pdf_bytes
392
+
393
+
394
+ @dataclass
395
+ class AuditResult:
396
+ """Result of an audited render: PDF bytes plus audit artifacts.
397
+
398
+ The ``fingerprint`` is always present. The ``change_set`` and
399
+ ``drift_result`` are only present when a ``baseline_dir`` is provided
400
+ and a baseline exists for the template.
401
+ """
402
+
403
+ pdf_bytes: bytes
404
+ fingerprint: object # InputFingerprint (avoid import at module level)
405
+ change_set: object | None = None # ChangeSet | None
406
+ drift_result: object | None = None # DriftResult | None
407
+ semantic_report: object | None = None # SemanticReport | None
408
+
409
+
410
+ def audit(
411
+ template: str | os.PathLike,
412
+ data: dict | str | os.PathLike,
413
+ *,
414
+ output: str | os.PathLike | None = None,
415
+ debug: bool = False,
416
+ font_paths: list[str | os.PathLike] | None = None,
417
+ validate: bool = True,
418
+ zugferd: str | None = None,
419
+ provenance: bool = False,
420
+ baseline_dir: str | os.PathLike | None = None,
421
+ save_baseline: bool = False,
422
+ semantic_hints: object | None = None,
423
+ ) -> AuditResult:
424
+ """Render with full audit: fingerprint, drift detection, semantic checks.
425
+
426
+ This is a superset of ``render()`` that also computes an input
427
+ fingerprint, optionally compares against a stored baseline, and
428
+ optionally runs semantic validation.
429
+
430
+ Args:
431
+ template: Path to a template file.
432
+ data: Template data (dict, JSON string, or path to .json).
433
+ output: If provided, write the PDF to this path.
434
+ debug: Preserve intermediate files.
435
+ font_paths: Additional font directories.
436
+ validate: Run structural contract validation.
437
+ zugferd: ZUGFeRD profile (``"en16931"``).
438
+ provenance: Embed generation proof.
439
+ baseline_dir: Root directory for baselines. If provided and a
440
+ baseline exists, drift checks are run. If None, only
441
+ fingerprinting happens.
442
+ save_baseline: If True, save the current render as the new
443
+ baseline (requires ``baseline_dir``).
444
+ semantic_hints: SemanticHints instance for semantic validation.
445
+ If None, semantic checks are skipped.
446
+
447
+ Returns:
448
+ AuditResult with PDF bytes and all audit artifacts.
449
+ """
450
+ import time
451
+
452
+ from .fingerprint import InputFingerprint, compare, compute_fingerprint
453
+ from .regression import check_drift, save_baseline as _save_baseline
454
+ from .semantic import SemanticReport, validate_semantics
455
+
456
+ _SUPPORTED_ZUGFERD = {"en16931"}
457
+ if zugferd is not None and zugferd not in _SUPPORTED_ZUGFERD:
458
+ raise TrustRenderError(
459
+ f"Unsupported zugferd profile: '{zugferd}'. Supported: {sorted(_SUPPORTED_ZUGFERD)}",
460
+ code=ErrorCode.INVALID_DATA,
461
+ stage="data_resolution",
462
+ )
463
+
464
+ template_path = Path(template)
465
+ if not template_path.exists():
466
+ raise TrustRenderError(
467
+ f"Template not found: {template_path}",
468
+ code=ErrorCode.TEMPLATE_NOT_FOUND,
469
+ stage="data_resolution",
470
+ template_path=str(template_path),
471
+ )
472
+
473
+ data_dict = _resolve_data(data)
474
+ resolved_fonts = _build_font_paths(font_paths)
475
+
476
+ # 1. Compute input fingerprint (before rendering)
477
+ fingerprint = compute_fingerprint(
478
+ template_path,
479
+ data_dict,
480
+ font_paths=resolved_fonts,
481
+ zugferd_profile=zugferd,
482
+ provenance_enabled=provenance,
483
+ validate_enabled=validate,
484
+ )
485
+
486
+ # 2. Run semantic validation (if hints provided)
487
+ semantic_report: SemanticReport | None = None
488
+ if semantic_hints is not None:
489
+ semantic_report = validate_semantics(data_dict, hints=semantic_hints)
490
+
491
+ # 3. Render (uses the existing pipeline)
492
+ t0 = time.monotonic()
493
+ result = _render_document_pipeline(
494
+ template_path,
495
+ data_dict,
496
+ debug=debug,
497
+ font_paths=resolved_fonts,
498
+ validate=validate,
499
+ zugferd=zugferd,
500
+ provenance=provenance,
501
+ )
502
+ render_duration_ms = int((time.monotonic() - t0) * 1000)
503
+ pdf_bytes = result.pdf_bytes
504
+
505
+ # 4. Write output if requested
506
+ if output is not None:
507
+ output_path = Path(output)
508
+ output_path.parent.mkdir(parents=True, exist_ok=True)
509
+ output_path.write_bytes(pdf_bytes)
510
+
511
+ # 5. Drift detection (if baseline_dir provided)
512
+ change_set = None
513
+ drift_result = None
514
+ if baseline_dir is not None:
515
+ from .regression import load_baseline
516
+
517
+ baseline = load_baseline(baseline_dir, template_path.name)
518
+ if baseline is not None:
519
+ # Compare fingerprints for change detection
520
+ baseline_fp = InputFingerprint.from_dict(baseline.fingerprint_json)
521
+ change_set = compare(baseline_fp, fingerprint, current_data=data_dict)
522
+
523
+ # Run drift checks
524
+ drift_result = check_drift(
525
+ baseline_dir,
526
+ template_path.name,
527
+ fingerprint,
528
+ pdf_bytes,
529
+ zugferd_valid=zugferd is not None,
530
+ contract_valid=validate,
531
+ semantic_issue_count=(
532
+ len(semantic_report.issues) if semantic_report else 0
533
+ ),
534
+ )
535
+
536
+ # 6. Save baseline (if requested)
537
+ if save_baseline and baseline_dir is not None:
538
+ _save_baseline(
539
+ baseline_dir,
540
+ template_path.name,
541
+ fingerprint,
542
+ pdf_bytes,
543
+ render_duration_ms=render_duration_ms,
544
+ zugferd_valid=zugferd is not None if zugferd else None,
545
+ contract_valid=True if validate else None,
546
+ semantic_issue_count=(
547
+ len(semantic_report.issues) if semantic_report else 0
548
+ ),
549
+ )
550
+
551
+ return AuditResult(
552
+ pdf_bytes=pdf_bytes,
553
+ fingerprint=fingerprint,
554
+ change_set=change_set,
555
+ drift_result=drift_result,
556
+ semantic_report=semantic_report,
557
+ )
558
+
559
+
560
+ def _resolve_data(data: dict | str | os.PathLike) -> dict:
561
+ """Resolve data argument to a dict."""
562
+ if isinstance(data, dict):
563
+ return data
564
+
565
+ if not isinstance(data, (str, os.PathLike)):
566
+ raise TrustRenderError(
567
+ f"Data must be a dict, JSON string, or path to a .json file, "
568
+ f"got {type(data).__name__}",
569
+ code=ErrorCode.INVALID_DATA,
570
+ stage="data_resolution",
571
+ )
572
+
573
+ # Try as file path first
574
+ path = Path(data) if not isinstance(data, str) else None
575
+ if path is None:
576
+ # Could be a JSON string or a file path string
577
+ candidate = Path(data)
578
+ if candidate.exists() and candidate.suffix == ".json":
579
+ path = candidate
580
+
581
+ if path is not None and path.exists():
582
+ with open(path) as f:
583
+ return json.load(f)
584
+
585
+ # Try as JSON string
586
+ if isinstance(data, str):
587
+ try:
588
+ result = json.loads(data)
589
+ if isinstance(result, dict):
590
+ return result
591
+ raise TrustRenderError(
592
+ f"Data JSON must be an object, got {type(result).__name__}",
593
+ code=ErrorCode.INVALID_DATA,
594
+ stage="data_resolution",
595
+ )
596
+ except json.JSONDecodeError as exc:
597
+ raise TrustRenderError(
598
+ f"Invalid data: not a valid file path or JSON string: {exc}",
599
+ code=ErrorCode.INVALID_DATA,
600
+ stage="data_resolution",
601
+ ) from exc
602
+
603
+ raise TrustRenderError(
604
+ f"Data must be a dict, JSON string, or path to a .json file, got {type(data).__name__}",
605
+ code=ErrorCode.INVALID_DATA,
606
+ stage="data_resolution",
607
+ )