wraith-sas-lineage 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. wraith_sas_lineage-0.2.0/.claude/settings.local.json +48 -0
  2. wraith_sas_lineage-0.2.0/.cz.yaml +64 -0
  3. wraith_sas_lineage-0.2.0/.env.example +16 -0
  4. wraith_sas_lineage-0.2.0/.gitea/CODEOWNERS.md +1 -0
  5. wraith_sas_lineage-0.2.0/.gitea/PULL_REQUEST_TEMPLATE.md +32 -0
  6. wraith_sas_lineage-0.2.0/.gitea/workflows/pages.yml +59 -0
  7. wraith_sas_lineage-0.2.0/.gitea/workflows/release.yml +23 -0
  8. wraith_sas_lineage-0.2.0/.gitea/workflows/test.yml +35 -0
  9. wraith_sas_lineage-0.2.0/.gitignore +28 -0
  10. wraith_sas_lineage-0.2.0/.pre-commit-config.yaml +37 -0
  11. wraith_sas_lineage-0.2.0/CHANGELOG.md +19 -0
  12. wraith_sas_lineage-0.2.0/CONTRIBUTING.md +69 -0
  13. wraith_sas_lineage-0.2.0/LICENSE +21 -0
  14. wraith_sas_lineage-0.2.0/PKG-INFO +183 -0
  15. wraith_sas_lineage-0.2.0/README.md +120 -0
  16. wraith_sas_lineage-0.2.0/bin/build.sh +23 -0
  17. wraith_sas_lineage-0.2.0/bin/run_tests.sh +19 -0
  18. wraith_sas_lineage-0.2.0/docs/api.md +933 -0
  19. wraith_sas_lineage-0.2.0/docs/architecture.md +181 -0
  20. wraith_sas_lineage-0.2.0/docs/index.md +9 -0
  21. wraith_sas_lineage-0.2.0/docs/reference.md +240 -0
  22. wraith_sas_lineage-0.2.0/docs/roadmap.md +189 -0
  23. wraith_sas_lineage-0.2.0/docs/security.md +60 -0
  24. wraith_sas_lineage-0.2.0/docs/usage.md +161 -0
  25. wraith_sas_lineage-0.2.0/mkdocs.yml +45 -0
  26. wraith_sas_lineage-0.2.0/pyproject.toml +133 -0
  27. wraith_sas_lineage-0.2.0/schema/provenance.schema.json +126 -0
  28. wraith_sas_lineage-0.2.0/schema/turso.sql +87 -0
  29. wraith_sas_lineage-0.2.0/src/sas_lineage/__init__.py +10 -0
  30. wraith_sas_lineage-0.2.0/src/sas_lineage/ids.py +33 -0
  31. wraith_sas_lineage-0.2.0/src/sas_lineage/models.py +170 -0
  32. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/__init__.py +1 -0
  33. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/builder.py +344 -0
  34. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/cli.py +110 -0
  35. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractor.py +36 -0
  36. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/__init__.py +1 -0
  37. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/_factory.py +70 -0
  38. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/data_step.py +315 -0
  39. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/libname.py +105 -0
  40. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/macro_call.py +104 -0
  41. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/proc_generic.py +66 -0
  42. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/proc_sql.py +299 -0
  43. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/rsubmit.py +104 -0
  44. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/scd2.py +59 -0
  45. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/freshness.py +82 -0
  46. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/log_resolver.py +109 -0
  47. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/patterns.py +167 -0
  48. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/reconciler.py +79 -0
  49. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/sql_tokeniser.py +217 -0
  50. wraith_sas_lineage-0.2.0/src/sas_lineage/parser/walker.py +26 -0
  51. wraith_sas_lineage-0.2.0/src/sas_lineage/schema_mapper/__init__.py +1 -0
  52. wraith_sas_lineage-0.2.0/src/sas_lineage/schema_mapper/cli.py +117 -0
  53. wraith_sas_lineage-0.2.0/src/sas_lineage/schema_mapper/enrich.py +59 -0
  54. wraith_sas_lineage-0.2.0/src/sas_lineage/schema_mapper/gaps.py +52 -0
  55. wraith_sas_lineage-0.2.0/src/sas_lineage/schema_mapper/loader.py +143 -0
  56. wraith_sas_lineage-0.2.0/src/sas_lineage/schema_mapper/odcs.py +91 -0
  57. wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/__init__.py +1 -0
  58. wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/app.py +63 -0
  59. wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/db.py +20 -0
  60. wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/views/__init__.py +1 -0
  61. wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/views/flags.py +38 -0
  62. wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/views/lineage.py +62 -0
  63. wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/views/provenance.py +75 -0
  64. wraith_sas_lineage-0.2.0/tasks.py +37 -0
  65. wraith_sas_lineage-0.2.0/tests/__init__.py +0 -0
  66. wraith_sas_lineage-0.2.0/tests/test_end_to_end.py +122 -0
  67. wraith_sas_lineage-0.2.0/tests/test_hardening_gate.py +498 -0
  68. wraith_sas_lineage-0.2.0/tests/test_parser/__init__.py +0 -0
  69. wraith_sas_lineage-0.2.0/tests/test_parser/fixtures/sample_job.log +25 -0
  70. wraith_sas_lineage-0.2.0/tests/test_parser/fixtures/sample_job.sas +30 -0
  71. wraith_sas_lineage-0.2.0/tests/test_parser/test_block_splitter.py +93 -0
  72. wraith_sas_lineage-0.2.0/tests/test_parser/test_builder.py +130 -0
  73. wraith_sas_lineage-0.2.0/tests/test_parser/test_cli.py +100 -0
  74. wraith_sas_lineage-0.2.0/tests/test_parser/test_extractors.py +712 -0
  75. wraith_sas_lineage-0.2.0/tests/test_parser/test_ids.py +61 -0
  76. wraith_sas_lineage-0.2.0/tests/test_parser/test_log_resolver.py +74 -0
  77. wraith_sas_lineage-0.2.0/tests/test_parser/test_models.py +138 -0
  78. wraith_sas_lineage-0.2.0/tests/test_parser/test_pattern_wiring.py +199 -0
  79. wraith_sas_lineage-0.2.0/tests/test_parser/test_patterns.py +174 -0
  80. wraith_sas_lineage-0.2.0/tests/test_parser/test_reconciler.py +105 -0
  81. wraith_sas_lineage-0.2.0/tests/test_parser/test_sql_tokeniser.py +457 -0
  82. wraith_sas_lineage-0.2.0/tests/test_parser/test_walker.py +70 -0
  83. wraith_sas_lineage-0.2.0/tests/test_schema_mapper/__init__.py +0 -0
  84. wraith_sas_lineage-0.2.0/tests/test_schema_mapper/test_cli.py +137 -0
  85. wraith_sas_lineage-0.2.0/tests/test_schema_mapper/test_enrich.py +44 -0
  86. wraith_sas_lineage-0.2.0/tests/test_schema_mapper/test_freshness.py +69 -0
  87. wraith_sas_lineage-0.2.0/tests/test_schema_mapper/test_gaps.py +80 -0
  88. wraith_sas_lineage-0.2.0/tests/test_schema_mapper/test_loader.py +137 -0
  89. wraith_sas_lineage-0.2.0/tests/test_schema_mapper/test_odcs.py +117 -0
  90. wraith_sas_lineage-0.2.0/tests/test_web_ui/__init__.py +0 -0
  91. wraith_sas_lineage-0.2.0/tests/test_web_ui/test_app.py +55 -0
  92. wraith_sas_lineage-0.2.0/uv.lock +1579 -0
@@ -0,0 +1,48 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(uv run *)",
5
+ "Read(//home/thomaspeoples/pjt-nas/nas/SovereignDocs/gitea-repos/**)",
6
+ "Bash(ls -la sas-lineage-scaffolding-plan.md)",
7
+ "Bash(ls *.md)",
8
+ "Bash(ls -la wraith-sas-lineage/docs/)",
9
+ "Bash(ls -la wraith-sas-lineage/schema/)",
10
+ "Bash(uv sync *)",
11
+ "Bash(uv --version)",
12
+ "Bash(git add *)",
13
+ "Bash(git commit -m '👻 feature/TJP-20061: wire column-level lineage end to end *)",
14
+ "Bash(git stash *)",
15
+ "Bash(grep -nA15 \"\\\\[tool.ruff\" pyproject.toml)",
16
+ "Bash(git commit -m '👻 feature/TJP-20061: audit pass — stable flag IDs, lint clean *)",
17
+ "Bash(git commit -m '👻 feature/TJP-20061: wire patterns.py TODOs + factory dedup *)",
18
+ "Bash(git commit -m '👻 feature/TJP-20061: resolve final 8 patterns — zero unused *)",
19
+ "Bash(git commit -m '👻 feature/TJP-20061: step_count_mismatch as first-class flag *)",
20
+ "Bash(git rm *)",
21
+ "Bash(rm -f ../sas-lineage-scaffolding-plan.md)",
22
+ "Bash(git commit -m '👻 feature/TJP-20061: consolidate plans into docs/roadmap.md *)",
23
+ "Bash(git commit -m '👻 feature/TJP-20061: changelog + move pydoc config into pyproject *)",
24
+ "Bash(git commit -m '👻 fix/TJP-20061: CI uses uv.lock via setup-uv, drop system python *)",
25
+ "Bash(git push *)",
26
+ "Bash(git commit -m '👻 fix/TJP-20061: use image uv, drop setup-uv download *)",
27
+ "Bash(gh pr *)",
28
+ "Bash(echo \"---gh exit: $?\")",
29
+ "Bash(curl -s -o /dev/null -w \"%{http_code}\\\\n\" https://git.thomaspeoples.com/api/v1/repos/thomaspeoples/wraith-sas-lineage)",
30
+ "Bash(curl -s https://git.thomaspeoples.com/api/v1/repos/thomaspeoples/wraith-sas-lineage)",
31
+ "Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\('has_pull_requests:', d.get\\('has_pull_requests'\\)\\); print\\('default_branch:', d.get\\('default_branch'\\)\\)\")",
32
+ "Bash(git commit -m '👻 fix/TJP-20061: debug step to resolve auto-merge API target *)",
33
+ "Bash(git commit -m '👻 fix/TJP-20061: probe both api_url shapes for auto-merge 404 *)",
34
+ "Bash(git commit -m '👻 fix/TJP-20061: remove temporary auto-merge debug step *)",
35
+ "Bash(grep -nA40 \"commitizen\" pyproject.toml)",
36
+ "Bash(git fetch *)",
37
+ "Bash(echo \"dryrun exit: $?\")",
38
+ "Bash(echo \"real exit: $?\")",
39
+ "Bash(echo \"=====EXIT: $?=====\")",
40
+ "Bash(echo \"bump exit: $?\")",
41
+ "Read(//tmp/**)",
42
+ "Bash(echo \"REAL bump exit: $?\")",
43
+ "Bash(xargs -I{} grep -rn \"No tag found to do an incremental\" {})",
44
+ "Bash(git checkout *)",
45
+ "Bash(git ls-remote *)"
46
+ ]
47
+ }
48
+ }
@@ -0,0 +1,64 @@
1
+ commitizen:
2
+ name: cz_customize
3
+ version_provider: uv
4
+ version_scheme: semver
5
+ major_version_zero: true
6
+ update_changelog_on_bump: true
7
+ tag_format: v$version
8
+ bump_message: ":ghost: chore: release v$current_version -> v$new_version [skip ci]"
9
+
10
+ customize:
11
+ bump_map:
12
+ Break: MAJOR
13
+ break: MAJOR
14
+ Feature: MINOR
15
+ feature: MINOR
16
+ Fix: PATCH
17
+ fix: PATCH
18
+ Bug: PATCH
19
+ bug: PATCH
20
+
21
+ bump_pattern: '(feature|fix|bug|break|Feature|Fix|Bug|Break)/[A-Z]{3}-\d{5}'
22
+
23
+ commit_parser: '.*?(?P<change_type>feature|fix|bug|break|Feature|Fix|Bug|Break)/[A-Z]{3}-\d{5}: (?P<message>.*)'
24
+
25
+ change_type_map:
26
+ Break: "Breaking Changes"
27
+ break: "Breaking Changes"
28
+ Feature: "Features"
29
+ feature: "Features"
30
+ Fix: "Bug Fixes"
31
+ fix: "Bug Fixes"
32
+ Bug: "Bug Fixes"
33
+ bug: "Bug Fixes"
34
+
35
+ change_type_order:
36
+ - break
37
+ - Break
38
+ - feature
39
+ - Feature
40
+ - fix
41
+ - Fix
42
+ - bug
43
+ - Bug
44
+
45
+ changelog_pattern: '.*(feature|fix|bug|break|Feature|Fix|Bug|Break)/[A-Z]{3}-\d{5}'
46
+
47
+ example: '👻 feature/TJP-00001: implement sovereign automation'
48
+ message_template: '{{change_type}}: {{message}}'
49
+
50
+ questions:
51
+ - name: change_type
52
+ type: list
53
+ message: Select the type of change you are committing
54
+ choices:
55
+ - {name: "break: A breaking change", value: break}
56
+ - {name: "feature: A new feature", value: feature}
57
+ - {name: "fix: A standard fix", value: fix}
58
+ - {name: "bug: A specific bug fix", value: bug}
59
+ - name: message
60
+ type: input
61
+ message: "Enter a brief description:"
62
+
63
+ schema: '<type>/<ticket>: <body>'
64
+ schema_pattern: '^.*(feature|fix|bug|break|Feature|Fix|Bug|Break)/[A-Z]{3}-\d{5}: (.*)'
@@ -0,0 +1,16 @@
1
+ # Paths to deployed SAS estate
2
+ SAS_CODE_DIR=/path/to/sas/code
3
+ SAS_LOG_DIR=/path/to/sas/logs
4
+
5
+ # Output paths
6
+ PROVENANCE_OUTPUT=./provenance.json
7
+ PARSE_REPORT_OUTPUT=./parse_report.json
8
+ CONTRACTS_OUTPUT=./contracts/
9
+ DB_PATH=./lineage.db
10
+
11
+ # Optional: manual owner/domain overlay
12
+ OWNER_MAP_PATH=./owner_map.json
13
+
14
+ # Web UI
15
+ FLASK_ENV=development
16
+ FLASK_PORT=5000
@@ -0,0 +1 @@
1
+ * @thomaspeoples
@@ -0,0 +1,32 @@
1
+ # 👻 Ghost Stack Pull Request
2
+
3
+ ## 🎯 Purpose
4
+ _What are we haunting today? Briefly describe the change._
5
+
6
+ **Fixes:** # (Link the Gitea Issue here)
7
+
8
+ ---
9
+
10
+ ## 🛠️ Proposed Changes
11
+ - [ ] Logic updated in `src/`
12
+ - [ ] Dependencies synced via `uv lock`
13
+ - [ ] Schema updated in `schema/` if models changed
14
+
15
+ ---
16
+
17
+ ## 🚦 Quality Gate (The Ghost Protocol)
18
+ - [ ] **uv Sync:** Environment is healthy and `uv.lock` is up to date.
19
+ - [ ] **Tests:** `./bin/run_tests.sh` passes with >80% coverage.
20
+ - [ ] **Linting:** `pre-commit run --all-files` passes.
21
+ - [ ] **Commits:** Messages follow the `cz_customize` regex (Fix/Feature/Break).
22
+
23
+ ---
24
+
25
+ ## 🧪 Deployment & Verification
26
+ - [ ] Branch is synced with the latest `main`.
27
+ - [ ] Thin-slice test (one job end-to-end) passes if parser heuristics changed.
28
+
29
+ ---
30
+
31
+ ## 👤 Author's Final Word
32
+ _Any specific notes for the reviewer or warnings about breaking changes?_
@@ -0,0 +1,59 @@
1
+ name: Deploy Gitea Pages
2
+ on:
3
+ workflow_dispatch:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+
14
+ - name: Install uv
15
+ uses: astral-sh/setup-uv@v3
16
+ with:
17
+ version: "latest"
18
+
19
+ - name: Install dependencies
20
+ run: uv sync --all-extras
21
+
22
+ - name: Run Tests & Coverage
23
+ run: uv run pytest --cov=src --cov-report=xml
24
+
25
+ - name: Generate Coverage Badge
26
+ run: uv run genbadge coverage -i coverage.xml -o docs/coverage.svg
27
+
28
+ - name: Build Documentation
29
+ run: uv run mkdocs build
30
+
31
+ - name: Sync to Main Website Repo
32
+ env:
33
+ MY_TOKEN: ${{ secrets.GITEATOKEN }}
34
+ MY_URL: ${{ secrets.GITEAURL }}
35
+
36
+ run: |
37
+ # 1. Clone your main website repo
38
+ CLEAN_URL=${MY_URL#*//}
39
+ git clone https://${MY_TOKEN}@${CLEAN_URL}/thomaspeoples/ghost-site.git main_site
40
+
41
+ # 2. Create the target directory if it doesn't exist
42
+ TARGET_DIR="main_site/www/gitea-repos/wraith-sas-lineage"
43
+ mkdir -p $TARGET_DIR
44
+
45
+ # 3. Clean and Copy
46
+ rm -rf $TARGET_DIR/*
47
+ cp -r site/* $TARGET_DIR/
48
+
49
+ # 4. Commit and Push to the OTHER repo
50
+ cd main_site
51
+ git config user.name "Ghost Runner"
52
+ git config user.email "runner@ghost-stack.local"
53
+ git add .
54
+ if git diff --staged --quiet; then
55
+ echo "No changes to documentation. Skipping commit."
56
+ else
57
+ git commit -m ":books: main: Update docs for wraith-sas-lineage: ${{ gitea.sha }}"
58
+ git push origin main
59
+ fi
@@ -0,0 +1,23 @@
1
+ name: 'Sovereign Release'
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ release:
10
+ if: "!contains(github.event.head_commit.message, '[skip ci]')"
11
+ runs-on: ghost-runner
12
+ steps:
13
+ - name: Check out code
14
+ uses: actions/checkout@v4
15
+ with:
16
+ fetch-depth: 0
17
+ token: ${{ secrets.GITEA_TOKEN }}
18
+
19
+ - name: Run Ghost Bump
20
+ uses: https://git.thomaspeoples.com/thomaspeoples/ci-actions/version-bump@main
21
+ with:
22
+ gitea_token: ${{ secrets.giteatoken }}
23
+ api_url: "${{ secrets.giteaurl }}/api/v1"
@@ -0,0 +1,35 @@
1
+ name: SAS Lineage CI
2
+
3
+ on:
4
+ push:
5
+ branches-ignore:
6
+ - 'main'
7
+
8
+ jobs:
9
+ test-and-verify:
10
+ runs-on: ghost-runner
11
+ steps:
12
+ - name: Check out code
13
+ uses: actions/checkout@v4
14
+ with:
15
+ fetch-depth: 0
16
+ - name: Install dependencies
17
+ run: uv sync --all-extras
18
+ - name: Run Tests
19
+ run: |
20
+ chmod +x bin/run_tests.sh
21
+ ./bin/run_tests.sh
22
+ env:
23
+ PYTHONPATH: .
24
+
25
+ auto-merge:
26
+ needs: test-and-verify
27
+ if: github.ref != 'refs/heads/main'
28
+ runs-on: ubuntu-latest
29
+ steps:
30
+ - uses: actions/checkout@v4
31
+ - name: Call Central Merge Action
32
+ uses: https://git.thomaspeoples.com/thomaspeoples/ci-actions/auto-merge@main
33
+ with:
34
+ gitea_token: ${{ secrets.giteatoken }}
35
+ api_url: "${{ secrets.giteaurl }}/api/v1"
@@ -0,0 +1,28 @@
1
+ .uv/
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.so
5
+ build/
6
+ dist/
7
+ *.egg-info/
8
+ .eggs/
9
+ htmlcov/
10
+ .coverage
11
+ .coverage.*
12
+ .cache
13
+ .pytest_cache/
14
+ .tox/
15
+ .venv
16
+ venv/
17
+ env/
18
+ ENV/
19
+ .env
20
+ .env.private
21
+ /site
22
+ .mypy_cache/
23
+ *.db
24
+ *.sqlite3
25
+ provenance.json
26
+ parse_report.json
27
+ mapping_gaps.json
28
+ contracts/
@@ -0,0 +1,37 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.4.0
4
+ hooks:
5
+ - id: trailing-whitespace
6
+ - id: end-of-file-fixer
7
+ - id: check-yaml
8
+ - id: check-added-large-files
9
+
10
+ - repo: https://github.com/astral-sh/ruff-pre-commit
11
+ rev: v0.4.4
12
+ hooks:
13
+ - id: ruff
14
+ args: [--fix]
15
+ - id: ruff-format
16
+
17
+ - repo: https://github.com/Yelp/detect-secrets
18
+ rev: v1.5.0
19
+ hooks:
20
+ - id: detect-secrets
21
+ args: ['--baseline', '.secrets.baseline']
22
+ exclude: uv.lock
23
+
24
+ - repo: local
25
+ hooks:
26
+ - id: pytest-coverage
27
+ name: pytest-coverage
28
+ entry: ./bin/run_tests.sh
29
+ language: system
30
+ pass_filenames: false
31
+ always_run: true
32
+
33
+ - id: commitizen
34
+ name: commitizen check
35
+ entry: uv run cz check --commit-msg-file
36
+ language: system
37
+ stages: [commit-msg]
@@ -0,0 +1,19 @@
1
+ # Changelog
2
+
3
+ All notable changes to `wraith-sas-lineage` are documented here.
4
+
5
+ ## v0.2.0 (2026-06-21)
6
+
7
+ ### Features
8
+
9
+ - on a pig it looks nice' (#3) from feature/TJP-21061 into main
10
+ - on a pig it looks nice
11
+ - release v0.1.0 -> v0.2.0 [skip ci]
12
+
13
+ ## v0.0.1 (2026-06-20)
14
+
15
+ ### Features
16
+
17
+ - Initial scaffold: parser, schema mapper, and web UI skeletons
18
+ - Provenance JSON schema and Turso DDL
19
+ - Extractor protocol and implementations for DATA step, PROC SQL, RSUBMIT, SCD2, LIBNAME
@@ -0,0 +1,69 @@
1
+ # Contributing to wraith-sas-lineage
2
+
3
+ ## Prerequisites
4
+
5
+ - Python 3.12+
6
+ - [`uv`](https://docs.astral.sh/uv/) for dependency management
7
+
8
+ ## Setup
9
+
10
+ ```bash
11
+ git clone https://git.thomaspeoples.com/thomaspeoples/wraith-sas-lineage.git
12
+ cd wraith-sas-lineage
13
+ uv run poe setup
14
+ ```
15
+
16
+ This syncs all dependencies and installs pre-commit hooks.
17
+
18
+ ## Running tests
19
+
20
+ ```bash
21
+ uv run poe test
22
+ ```
23
+
24
+ Coverage must stay above 80%. The suite runs without any external services — no SAS licence, no database connections, no network.
25
+
26
+ ## Code style
27
+
28
+ Ruff handles formatting and linting:
29
+
30
+ ```bash
31
+ uv run poe lint
32
+ uv run poe format
33
+ ```
34
+
35
+ Line length is 79 characters.
36
+
37
+ ## Commits
38
+
39
+ Commits follow the Ghost Stack convention via commitizen:
40
+
41
+ ```
42
+ 👻 <type>/<ticket>: <message>
43
+ ```
44
+
45
+ ```bash
46
+ uv run cz commit
47
+ ```
48
+
49
+ Do not bypass pre-commit hooks with `--no-verify`.
50
+
51
+ ## Pull requests
52
+
53
+ - One concern per PR.
54
+ - Include or update tests for any behaviour change.
55
+ - If the change affects the JSON provenance schema or the Turso DDL, update `schema/` and the relevant `docs/` page.
56
+ - Run the full pre-commit suite before opening: `uv run pre-commit run --all-files`.
57
+
58
+ ## Adding a new extractor
59
+
60
+ 1. Create `src/sas_lineage/parser/extractors/<name>.py` implementing the `StepExtractor` protocol.
61
+ 2. Register it in `parser/cli.py` via `builder.register(MyExtractor())`.
62
+ 3. Add tests in `tests/test_parser/test_extractors.py`.
63
+ 4. Add fixture SAS snippets to `tests/test_parser/fixtures/` if needed.
64
+
65
+ Extractors must be additive — they receive a block and return what they find. Never raise; return a `Flag` instead.
66
+
67
+ ## Reporting issues
68
+
69
+ Open a Gitea issue with the SAS snippet (anonymised), the log excerpt, and the exact command you ran. For extraction failures, include the `parse_report.json` and relevant entries from `mapping_gaps.json`.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Thomas Peoples
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,183 @@
1
+ Metadata-Version: 2.4
2
+ Name: wraith-sas-lineage
3
+ Version: 0.2.0
4
+ Summary: SAS lineage extraction, schema mapping, and ODCS contract generation for the Ghost Stack.
5
+ Project-URL: Homepage, https://git.thomaspeoples.com/thomaspeoples/wraith-sas-lineage
6
+ Project-URL: Repository, https://git.thomaspeoples.com/thomaspeoples/wraith-sas-lineage.git
7
+ Project-URL: Documentation, https://www.thomaspeoples.com/gitea-repos/wraith-sas-lineage/
8
+ Project-URL: Issues, https://git.thomaspeoples.com/thomaspeoples/wraith-sas-lineage/issues
9
+ Project-URL: Changelog, https://git.thomaspeoples.com/thomaspeoples/wraith-sas-lineage/src/branch/main/CHANGELOG.md
10
+ Author-email: Thomas Peoples <hello@thomaspeoples.com>
11
+ License: MIT License
12
+
13
+ Copyright (c) 2026 Thomas Peoples
14
+
15
+ Permission is hereby granted, free of charge, to any person obtaining a copy
16
+ of this software and associated documentation files (the "Software"), to deal
17
+ in the Software without restriction, including without limitation the rights
18
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
19
+ copies of the Software, and to permit persons to whom the Software is
20
+ furnished to do so, subject to the following conditions:
21
+
22
+ The above copyright notice and this permission notice shall be included in all
23
+ copies or substantial portions of the Software.
24
+
25
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31
+ SOFTWARE.
32
+ License-File: LICENSE
33
+ Keywords: analytics-engineering,data-contracts,ghost-stack,lineage,odcs,sas
34
+ Classifier: Development Status :: 3 - Alpha
35
+ Classifier: Intended Audience :: Developers
36
+ Classifier: License :: OSI Approved :: MIT License
37
+ Classifier: Programming Language :: Python :: 3
38
+ Classifier: Programming Language :: Python :: 3.12
39
+ Classifier: Topic :: Database
40
+ Classifier: Topic :: Software Development :: Code Generators
41
+ Requires-Python: >=3.12
42
+ Requires-Dist: click>=8.1
43
+ Requires-Dist: flask>=3.0
44
+ Requires-Dist: openpyxl>=3.1
45
+ Requires-Dist: pyyaml>=6.0
46
+ Provides-Extra: dev
47
+ Requires-Dist: commitizen; extra == 'dev'
48
+ Requires-Dist: detect-secrets; extra == 'dev'
49
+ Requires-Dist: genbadge[coverage]>=1.1.1; extra == 'dev'
50
+ Requires-Dist: mkdocs; extra == 'dev'
51
+ Requires-Dist: mkdocs-material; extra == 'dev'
52
+ Requires-Dist: mkdocstrings[python]; extra == 'dev'
53
+ Requires-Dist: poethepoet; extra == 'dev'
54
+ Requires-Dist: pre-commit; extra == 'dev'
55
+ Requires-Dist: pydoc-markdown; extra == 'dev'
56
+ Requires-Dist: pymdown-extensions; extra == 'dev'
57
+ Requires-Dist: pytest; extra == 'dev'
58
+ Requires-Dist: pytest-cov; extra == 'dev'
59
+ Requires-Dist: ruff; extra == 'dev'
60
+ Requires-Dist: ty; extra == 'dev'
61
+ Requires-Dist: typer; extra == 'dev'
62
+ Description-Content-Type: text/markdown
63
+
64
+ [![Documentation](https://img.shields.io/badge/docs-live-brightgreen)](https://www.thomaspeoples.com/gitea-repos/wraith-sas-lineage/)
65
+ ![License](https://img.shields.io/badge/license-MIT-blue)
66
+ ![Python](https://img.shields.io/badge/python-3.12%2B-blue)
67
+
68
+ # 👻 wraith-sas-lineage
69
+ ### *SAS Lineage Extraction for the Ghost Stack*
70
+
71
+ **wraith-sas-lineage** extracts data lineage from SAS DI Studio code and verbose execution logs, maps it to Open Data Contract Standard (ODCS) contracts, and serves a local read-only lineage explorer. One JSON artefact connects all three tools.
72
+
73
+ Designed for large SAS estates running on Unix. Output feeds [wraith-modelgen](https://git.thomaspeoples.com/thomaspeoples/wraith-modelgen) to auto-generate dbt on BigQuery.
74
+
75
+ ---
76
+
77
+ ## Three tools. One seam.
78
+
79
+ ```
80
+ .sas + .log files
81
+
82
+
83
+ sas-parse ──► provenance.json
84
+
85
+
86
+ sas-map ──► lineage.db + ODCS contracts + gaps report
87
+
88
+
89
+ sas-ui ──► localhost:5000
90
+ ```
91
+
92
+ | Command | Input | Output |
93
+ |---|---|---|
94
+ | `sas-parse` | `.sas` files + verbose logs | `provenance.json` |
95
+ | `sas-map` | `provenance.json` | `lineage.db`, ODCS YAML contracts, `mapping_gaps.json` |
96
+ | `sas-ui` | `lineage.db` | Read-only lineage explorer |
97
+
98
+ ---
99
+
100
+ ## Install
101
+
102
+ ```bash
103
+ uv tool install wraith-sas-lineage
104
+ ```
105
+
106
+ Or for development:
107
+
108
+ ```bash
109
+ git clone https://git.thomaspeoples.com/thomaspeoples/wraith-sas-lineage.git
110
+ cd wraith-sas-lineage
111
+ uv run poe setup
112
+ ```
113
+
114
+ ---
115
+
116
+ ## Quick start
117
+
118
+ ```bash
119
+ # Parse SAS code + logs
120
+ sas-parse \
121
+ --code-dir /path/to/sas/code \
122
+ --log-dir /path/to/verbose/logs \
123
+ --estate acme-risk \
124
+ --output provenance.json
125
+
126
+ # Map to DB + ODCS contracts
127
+ sas-map \
128
+ --provenance provenance.json \
129
+ --db lineage.db \
130
+ --contracts-dir contracts/ \
131
+ --gaps mapping_gaps.json
132
+
133
+ # Browse lineage
134
+ sas-ui --db lineage.db
135
+ # → http://localhost:5000/flags all extraction flags
136
+ # → http://localhost:5000/provenance upstream chain for a column
137
+ # → http://localhost:5000/lineage downstream to all MARTs
138
+ ```
139
+
140
+ ---
141
+
142
+ ## Design principles
143
+
144
+ **Flag-never-drop.** Unresolved macros, remote execution blocks, and hand-written code surface as `Flag` objects with reason, context, and snippet. Nothing is silently discarded.
145
+
146
+ **Log-first macro resolution.** `SYMBOLGEN`/`MLOGIC` log lines resolve `&macro_var` references before falling back to the code literal. Accurate lineage for DI Studio estates requires verbose logs.
147
+
148
+ **Single seam.** `provenance.json` is the only interface between tools. Each tool is independently runnable and testable.
149
+
150
+ **Content-hashed stable IDs.** Table, column, job, and lineage chain IDs are `SHA-256[:12]` of their canonical string. Same estate, same run, same IDs.
151
+
152
+ ---
153
+
154
+ ## Layer vocabulary
155
+
156
+ | SAS layer | Schema value |
157
+ |---|---|
158
+ | RAW | `RAW` |
159
+ | ODS (ODP) | `ODP` |
160
+ | STG (CDP) | `CDP` |
161
+ | MART (FDP) | `FDP` |
162
+
163
+ ---
164
+
165
+ ## Developer quality gate
166
+
167
+ ```bash
168
+ uv run poe test # pytest (interactive)
169
+ uv run poe test-ci # pytest + coverage ≥ 80%
170
+ uv run poe lint # ruff check
171
+ uv run poe format # ruff format
172
+ uv run poe docs-serve # mkdocs at localhost:8000
173
+ ```
174
+
175
+ All commits go through commitizen:
176
+
177
+ ```bash
178
+ uv run cz commit
179
+ ```
180
+
181
+ ---
182
+
183
+ *Part of the [Ghost Stack](https://git.thomaspeoples.com/thomaspeoples). Sovereign. Self-hosted. No nonsense.*