wraith-sas-lineage 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wraith_sas_lineage-0.2.0/.claude/settings.local.json +48 -0
- wraith_sas_lineage-0.2.0/.cz.yaml +64 -0
- wraith_sas_lineage-0.2.0/.env.example +16 -0
- wraith_sas_lineage-0.2.0/.gitea/CODEOWNERS.md +1 -0
- wraith_sas_lineage-0.2.0/.gitea/PULL_REQUEST_TEMPLATE.md +32 -0
- wraith_sas_lineage-0.2.0/.gitea/workflows/pages.yml +59 -0
- wraith_sas_lineage-0.2.0/.gitea/workflows/release.yml +23 -0
- wraith_sas_lineage-0.2.0/.gitea/workflows/test.yml +35 -0
- wraith_sas_lineage-0.2.0/.gitignore +28 -0
- wraith_sas_lineage-0.2.0/.pre-commit-config.yaml +37 -0
- wraith_sas_lineage-0.2.0/CHANGELOG.md +19 -0
- wraith_sas_lineage-0.2.0/CONTRIBUTING.md +69 -0
- wraith_sas_lineage-0.2.0/LICENSE +21 -0
- wraith_sas_lineage-0.2.0/PKG-INFO +183 -0
- wraith_sas_lineage-0.2.0/README.md +120 -0
- wraith_sas_lineage-0.2.0/bin/build.sh +23 -0
- wraith_sas_lineage-0.2.0/bin/run_tests.sh +19 -0
- wraith_sas_lineage-0.2.0/docs/api.md +933 -0
- wraith_sas_lineage-0.2.0/docs/architecture.md +181 -0
- wraith_sas_lineage-0.2.0/docs/index.md +9 -0
- wraith_sas_lineage-0.2.0/docs/reference.md +240 -0
- wraith_sas_lineage-0.2.0/docs/roadmap.md +189 -0
- wraith_sas_lineage-0.2.0/docs/security.md +60 -0
- wraith_sas_lineage-0.2.0/docs/usage.md +161 -0
- wraith_sas_lineage-0.2.0/mkdocs.yml +45 -0
- wraith_sas_lineage-0.2.0/pyproject.toml +133 -0
- wraith_sas_lineage-0.2.0/schema/provenance.schema.json +126 -0
- wraith_sas_lineage-0.2.0/schema/turso.sql +87 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/__init__.py +10 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/ids.py +33 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/models.py +170 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/__init__.py +1 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/builder.py +344 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/cli.py +110 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractor.py +36 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/__init__.py +1 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/_factory.py +70 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/data_step.py +315 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/libname.py +105 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/macro_call.py +104 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/proc_generic.py +66 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/proc_sql.py +299 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/rsubmit.py +104 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/extractors/scd2.py +59 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/freshness.py +82 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/log_resolver.py +109 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/patterns.py +167 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/reconciler.py +79 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/sql_tokeniser.py +217 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/parser/walker.py +26 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/schema_mapper/__init__.py +1 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/schema_mapper/cli.py +117 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/schema_mapper/enrich.py +59 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/schema_mapper/gaps.py +52 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/schema_mapper/loader.py +143 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/schema_mapper/odcs.py +91 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/__init__.py +1 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/app.py +63 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/db.py +20 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/views/__init__.py +1 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/views/flags.py +38 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/views/lineage.py +62 -0
- wraith_sas_lineage-0.2.0/src/sas_lineage/web_ui/views/provenance.py +75 -0
- wraith_sas_lineage-0.2.0/tasks.py +37 -0
- wraith_sas_lineage-0.2.0/tests/__init__.py +0 -0
- wraith_sas_lineage-0.2.0/tests/test_end_to_end.py +122 -0
- wraith_sas_lineage-0.2.0/tests/test_hardening_gate.py +498 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/__init__.py +0 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/fixtures/sample_job.log +25 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/fixtures/sample_job.sas +30 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/test_block_splitter.py +93 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/test_builder.py +130 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/test_cli.py +100 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/test_extractors.py +712 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/test_ids.py +61 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/test_log_resolver.py +74 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/test_models.py +138 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/test_pattern_wiring.py +199 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/test_patterns.py +174 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/test_reconciler.py +105 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/test_sql_tokeniser.py +457 -0
- wraith_sas_lineage-0.2.0/tests/test_parser/test_walker.py +70 -0
- wraith_sas_lineage-0.2.0/tests/test_schema_mapper/__init__.py +0 -0
- wraith_sas_lineage-0.2.0/tests/test_schema_mapper/test_cli.py +137 -0
- wraith_sas_lineage-0.2.0/tests/test_schema_mapper/test_enrich.py +44 -0
- wraith_sas_lineage-0.2.0/tests/test_schema_mapper/test_freshness.py +69 -0
- wraith_sas_lineage-0.2.0/tests/test_schema_mapper/test_gaps.py +80 -0
- wraith_sas_lineage-0.2.0/tests/test_schema_mapper/test_loader.py +137 -0
- wraith_sas_lineage-0.2.0/tests/test_schema_mapper/test_odcs.py +117 -0
- wraith_sas_lineage-0.2.0/tests/test_web_ui/__init__.py +0 -0
- wraith_sas_lineage-0.2.0/tests/test_web_ui/test_app.py +55 -0
- wraith_sas_lineage-0.2.0/uv.lock +1579 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(uv run *)",
|
|
5
|
+
"Read(//home/thomaspeoples/pjt-nas/nas/SovereignDocs/gitea-repos/**)",
|
|
6
|
+
"Bash(ls -la sas-lineage-scaffolding-plan.md)",
|
|
7
|
+
"Bash(ls *.md)",
|
|
8
|
+
"Bash(ls -la wraith-sas-lineage/docs/)",
|
|
9
|
+
"Bash(ls -la wraith-sas-lineage/schema/)",
|
|
10
|
+
"Bash(uv sync *)",
|
|
11
|
+
"Bash(uv --version)",
|
|
12
|
+
"Bash(git add *)",
|
|
13
|
+
"Bash(git commit -m '👻 feature/TJP-20061: wire column-level lineage end to end *)",
|
|
14
|
+
"Bash(git stash *)",
|
|
15
|
+
"Bash(grep -nA15 \"\\\\[tool.ruff\" pyproject.toml)",
|
|
16
|
+
"Bash(git commit -m '👻 feature/TJP-20061: audit pass — stable flag IDs, lint clean *)",
|
|
17
|
+
"Bash(git commit -m '👻 feature/TJP-20061: wire patterns.py TODOs + factory dedup *)",
|
|
18
|
+
"Bash(git commit -m '👻 feature/TJP-20061: resolve final 8 patterns — zero unused *)",
|
|
19
|
+
"Bash(git commit -m '👻 feature/TJP-20061: step_count_mismatch as first-class flag *)",
|
|
20
|
+
"Bash(git rm *)",
|
|
21
|
+
"Bash(rm -f ../sas-lineage-scaffolding-plan.md)",
|
|
22
|
+
"Bash(git commit -m '👻 feature/TJP-20061: consolidate plans into docs/roadmap.md *)",
|
|
23
|
+
"Bash(git commit -m '👻 feature/TJP-20061: changelog + move pydoc config into pyproject *)",
|
|
24
|
+
"Bash(git commit -m '👻 fix/TJP-20061: CI uses uv.lock via setup-uv, drop system python *)",
|
|
25
|
+
"Bash(git push *)",
|
|
26
|
+
"Bash(git commit -m '👻 fix/TJP-20061: use image uv, drop setup-uv download *)",
|
|
27
|
+
"Bash(gh pr *)",
|
|
28
|
+
"Bash(echo \"---gh exit: $?\")",
|
|
29
|
+
"Bash(curl -s -o /dev/null -w \"%{http_code}\\\\n\" https://git.thomaspeoples.com/api/v1/repos/thomaspeoples/wraith-sas-lineage)",
|
|
30
|
+
"Bash(curl -s https://git.thomaspeoples.com/api/v1/repos/thomaspeoples/wraith-sas-lineage)",
|
|
31
|
+
"Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\('has_pull_requests:', d.get\\('has_pull_requests'\\)\\); print\\('default_branch:', d.get\\('default_branch'\\)\\)\")",
|
|
32
|
+
"Bash(git commit -m '👻 fix/TJP-20061: debug step to resolve auto-merge API target *)",
|
|
33
|
+
"Bash(git commit -m '👻 fix/TJP-20061: probe both api_url shapes for auto-merge 404 *)",
|
|
34
|
+
"Bash(git commit -m '👻 fix/TJP-20061: remove temporary auto-merge debug step *)",
|
|
35
|
+
"Bash(grep -nA40 \"commitizen\" pyproject.toml)",
|
|
36
|
+
"Bash(git fetch *)",
|
|
37
|
+
"Bash(echo \"dryrun exit: $?\")",
|
|
38
|
+
"Bash(echo \"real exit: $?\")",
|
|
39
|
+
"Bash(echo \"=====EXIT: $?=====\")",
|
|
40
|
+
"Bash(echo \"bump exit: $?\")",
|
|
41
|
+
"Read(//tmp/**)",
|
|
42
|
+
"Bash(echo \"REAL bump exit: $?\")",
|
|
43
|
+
"Bash(xargs -I{} grep -rn \"No tag found to do an incremental\" {})",
|
|
44
|
+
"Bash(git checkout *)",
|
|
45
|
+
"Bash(git ls-remote *)"
|
|
46
|
+
]
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
commitizen:
|
|
2
|
+
name: cz_customize
|
|
3
|
+
version_provider: uv
|
|
4
|
+
version_scheme: semver
|
|
5
|
+
major_version_zero: true
|
|
6
|
+
update_changelog_on_bump: true
|
|
7
|
+
tag_format: v$version
|
|
8
|
+
bump_message: ":ghost: chore: release v$current_version -> v$new_version [skip ci]"
|
|
9
|
+
|
|
10
|
+
customize:
|
|
11
|
+
bump_map:
|
|
12
|
+
Break: MAJOR
|
|
13
|
+
break: MAJOR
|
|
14
|
+
Feature: MINOR
|
|
15
|
+
feature: MINOR
|
|
16
|
+
Fix: PATCH
|
|
17
|
+
fix: PATCH
|
|
18
|
+
Bug: PATCH
|
|
19
|
+
bug: PATCH
|
|
20
|
+
|
|
21
|
+
bump_pattern: '(feature|fix|bug|break|Feature|Fix|Bug|Break)/[A-Z]{3}-\d{5}'
|
|
22
|
+
|
|
23
|
+
commit_parser: '.*?(?P<change_type>feature|fix|bug|break|Feature|Fix|Bug|Break)/[A-Z]{3}-\d{5}: (?P<message>.*)'
|
|
24
|
+
|
|
25
|
+
change_type_map:
|
|
26
|
+
Break: "Breaking Changes"
|
|
27
|
+
break: "Breaking Changes"
|
|
28
|
+
Feature: "Features"
|
|
29
|
+
feature: "Features"
|
|
30
|
+
Fix: "Bug Fixes"
|
|
31
|
+
fix: "Bug Fixes"
|
|
32
|
+
Bug: "Bug Fixes"
|
|
33
|
+
bug: "Bug Fixes"
|
|
34
|
+
|
|
35
|
+
change_type_order:
|
|
36
|
+
- break
|
|
37
|
+
- Break
|
|
38
|
+
- feature
|
|
39
|
+
- Feature
|
|
40
|
+
- fix
|
|
41
|
+
- Fix
|
|
42
|
+
- bug
|
|
43
|
+
- Bug
|
|
44
|
+
|
|
45
|
+
changelog_pattern: '.*(feature|fix|bug|break|Feature|Fix|Bug|Break)/[A-Z]{3}-\d{5}'
|
|
46
|
+
|
|
47
|
+
example: '👻 feature/TJP-00001: implement sovereign automation'
|
|
48
|
+
message_template: '{{change_type}}: {{message}}'
|
|
49
|
+
|
|
50
|
+
questions:
|
|
51
|
+
- name: change_type
|
|
52
|
+
type: list
|
|
53
|
+
message: Select the type of change you are committing
|
|
54
|
+
choices:
|
|
55
|
+
- {name: "break: A breaking change", value: break}
|
|
56
|
+
- {name: "feature: A new feature", value: feature}
|
|
57
|
+
- {name: "fix: A standard fix", value: fix}
|
|
58
|
+
- {name: "bug: A specific bug fix", value: bug}
|
|
59
|
+
- name: message
|
|
60
|
+
type: input
|
|
61
|
+
message: "Enter a brief description:"
|
|
62
|
+
|
|
63
|
+
schema: '<type>/<ticket>: <body>'
|
|
64
|
+
schema_pattern: '^.*(feature|fix|bug|break|Feature|Fix|Bug|Break)/[A-Z]{3}-\d{5}: (.*)'
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Paths to deployed SAS estate
|
|
2
|
+
SAS_CODE_DIR=/path/to/sas/code
|
|
3
|
+
SAS_LOG_DIR=/path/to/sas/logs
|
|
4
|
+
|
|
5
|
+
# Output paths
|
|
6
|
+
PROVENANCE_OUTPUT=./provenance.json
|
|
7
|
+
PARSE_REPORT_OUTPUT=./parse_report.json
|
|
8
|
+
CONTRACTS_OUTPUT=./contracts/
|
|
9
|
+
DB_PATH=./lineage.db
|
|
10
|
+
|
|
11
|
+
# Optional: manual owner/domain overlay
|
|
12
|
+
OWNER_MAP_PATH=./owner_map.json
|
|
13
|
+
|
|
14
|
+
# Web UI
|
|
15
|
+
FLASK_ENV=development
|
|
16
|
+
FLASK_PORT=5000
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
* @thomaspeoples
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# 👻 Ghost Stack Pull Request
|
|
2
|
+
|
|
3
|
+
## 🎯 Purpose
|
|
4
|
+
_What are we haunting today? Briefly describe the change._
|
|
5
|
+
|
|
6
|
+
**Fixes:** # (Link the Gitea Issue here)
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## 🛠️ Proposed Changes
|
|
11
|
+
- [ ] Logic updated in `src/`
|
|
12
|
+
- [ ] Dependencies synced via `uv lock`
|
|
13
|
+
- [ ] Schema updated in `schema/` if models changed
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## 🚦 Quality Gate (The Ghost Protocol)
|
|
18
|
+
- [ ] **uv Sync:** Environment is healthy and `uv.lock` is up to date.
|
|
19
|
+
- [ ] **Tests:** `./bin/run_tests.sh` passes with >80% coverage.
|
|
20
|
+
- [ ] **Linting:** `pre-commit run --all-files` passes.
|
|
21
|
+
- [ ] **Commits:** Messages follow the `cz_customize` regex (Fix/Feature/Break).
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## 🧪 Deployment & Verification
|
|
26
|
+
- [ ] Branch is synced with the latest `main`.
|
|
27
|
+
- [ ] Thin-slice test (one job end-to-end) passes if parser heuristics changed.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## 👤 Author's Final Word
|
|
32
|
+
_Any specific notes for the reviewer or warnings about breaking changes?_
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
name: Deploy Gitea Pages
|
|
2
|
+
on:
|
|
3
|
+
workflow_dispatch:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
deploy:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- name: Install uv
|
|
15
|
+
uses: astral-sh/setup-uv@v3
|
|
16
|
+
with:
|
|
17
|
+
version: "latest"
|
|
18
|
+
|
|
19
|
+
- name: Install dependencies
|
|
20
|
+
run: uv sync --all-extras
|
|
21
|
+
|
|
22
|
+
- name: Run Tests & Coverage
|
|
23
|
+
run: uv run pytest --cov=src --cov-report=xml
|
|
24
|
+
|
|
25
|
+
- name: Generate Coverage Badge
|
|
26
|
+
run: uv run genbadge coverage -i coverage.xml -o docs/coverage.svg
|
|
27
|
+
|
|
28
|
+
- name: Build Documentation
|
|
29
|
+
run: uv run mkdocs build
|
|
30
|
+
|
|
31
|
+
- name: Sync to Main Website Repo
|
|
32
|
+
env:
|
|
33
|
+
MY_TOKEN: ${{ secrets.GITEATOKEN }}
|
|
34
|
+
MY_URL: ${{ secrets.GITEAURL }}
|
|
35
|
+
|
|
36
|
+
run: |
|
|
37
|
+
# 1. Clone your main website repo
|
|
38
|
+
CLEAN_URL=${MY_URL#*//}
|
|
39
|
+
git clone https://${MY_TOKEN}@${CLEAN_URL}/thomaspeoples/ghost-site.git main_site
|
|
40
|
+
|
|
41
|
+
# 2. Create the target directory if it doesn't exist
|
|
42
|
+
TARGET_DIR="main_site/www/gitea-repos/wraith-sas-lineage"
|
|
43
|
+
mkdir -p $TARGET_DIR
|
|
44
|
+
|
|
45
|
+
# 3. Clean and Copy
|
|
46
|
+
rm -rf $TARGET_DIR/*
|
|
47
|
+
cp -r site/* $TARGET_DIR/
|
|
48
|
+
|
|
49
|
+
# 4. Commit and Push to the OTHER repo
|
|
50
|
+
cd main_site
|
|
51
|
+
git config user.name "Ghost Runner"
|
|
52
|
+
git config user.email "runner@ghost-stack.local"
|
|
53
|
+
git add .
|
|
54
|
+
if git diff --staged --quiet; then
|
|
55
|
+
echo "No changes to documentation. Skipping commit."
|
|
56
|
+
else
|
|
57
|
+
git commit -m ":books: main: Update docs for wraith-sas-lineage: ${{ gitea.sha }}"
|
|
58
|
+
git push origin main
|
|
59
|
+
fi
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
name: 'Sovereign Release'
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
release:
|
|
10
|
+
if: "!contains(github.event.head_commit.message, '[skip ci]')"
|
|
11
|
+
runs-on: ghost-runner
|
|
12
|
+
steps:
|
|
13
|
+
- name: Check out code
|
|
14
|
+
uses: actions/checkout@v4
|
|
15
|
+
with:
|
|
16
|
+
fetch-depth: 0
|
|
17
|
+
token: ${{ secrets.GITEA_TOKEN }}
|
|
18
|
+
|
|
19
|
+
- name: Run Ghost Bump
|
|
20
|
+
uses: https://git.thomaspeoples.com/thomaspeoples/ci-actions/version-bump@main
|
|
21
|
+
with:
|
|
22
|
+
gitea_token: ${{ secrets.giteatoken }}
|
|
23
|
+
api_url: "${{ secrets.giteaurl }}/api/v1"
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
name: SAS Lineage CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches-ignore:
|
|
6
|
+
- 'main'
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test-and-verify:
|
|
10
|
+
runs-on: ghost-runner
|
|
11
|
+
steps:
|
|
12
|
+
- name: Check out code
|
|
13
|
+
uses: actions/checkout@v4
|
|
14
|
+
with:
|
|
15
|
+
fetch-depth: 0
|
|
16
|
+
- name: Install dependencies
|
|
17
|
+
run: uv sync --all-extras
|
|
18
|
+
- name: Run Tests
|
|
19
|
+
run: |
|
|
20
|
+
chmod +x bin/run_tests.sh
|
|
21
|
+
./bin/run_tests.sh
|
|
22
|
+
env:
|
|
23
|
+
PYTHONPATH: .
|
|
24
|
+
|
|
25
|
+
auto-merge:
|
|
26
|
+
needs: test-and-verify
|
|
27
|
+
if: github.ref != 'refs/heads/main'
|
|
28
|
+
runs-on: ubuntu-latest
|
|
29
|
+
steps:
|
|
30
|
+
- uses: actions/checkout@v4
|
|
31
|
+
- name: Call Central Merge Action
|
|
32
|
+
uses: https://git.thomaspeoples.com/thomaspeoples/ci-actions/auto-merge@main
|
|
33
|
+
with:
|
|
34
|
+
gitea_token: ${{ secrets.giteatoken }}
|
|
35
|
+
api_url: "${{ secrets.giteaurl }}/api/v1"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
.uv/
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.so
|
|
5
|
+
build/
|
|
6
|
+
dist/
|
|
7
|
+
*.egg-info/
|
|
8
|
+
.eggs/
|
|
9
|
+
htmlcov/
|
|
10
|
+
.coverage
|
|
11
|
+
.coverage.*
|
|
12
|
+
.cache
|
|
13
|
+
.pytest_cache/
|
|
14
|
+
.tox/
|
|
15
|
+
.venv
|
|
16
|
+
venv/
|
|
17
|
+
env/
|
|
18
|
+
ENV/
|
|
19
|
+
.env
|
|
20
|
+
.env.private
|
|
21
|
+
/site
|
|
22
|
+
.mypy_cache/
|
|
23
|
+
*.db
|
|
24
|
+
*.sqlite3
|
|
25
|
+
provenance.json
|
|
26
|
+
parse_report.json
|
|
27
|
+
mapping_gaps.json
|
|
28
|
+
contracts/
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
+
rev: v4.4.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: trailing-whitespace
|
|
6
|
+
- id: end-of-file-fixer
|
|
7
|
+
- id: check-yaml
|
|
8
|
+
- id: check-added-large-files
|
|
9
|
+
|
|
10
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
11
|
+
rev: v0.4.4
|
|
12
|
+
hooks:
|
|
13
|
+
- id: ruff
|
|
14
|
+
args: [--fix]
|
|
15
|
+
- id: ruff-format
|
|
16
|
+
|
|
17
|
+
- repo: https://github.com/Yelp/detect-secrets
|
|
18
|
+
rev: v1.5.0
|
|
19
|
+
hooks:
|
|
20
|
+
- id: detect-secrets
|
|
21
|
+
args: ['--baseline', '.secrets.baseline']
|
|
22
|
+
exclude: uv.lock
|
|
23
|
+
|
|
24
|
+
- repo: local
|
|
25
|
+
hooks:
|
|
26
|
+
- id: pytest-coverage
|
|
27
|
+
name: pytest-coverage
|
|
28
|
+
entry: ./bin/run_tests.sh
|
|
29
|
+
language: system
|
|
30
|
+
pass_filenames: false
|
|
31
|
+
always_run: true
|
|
32
|
+
|
|
33
|
+
- id: commitizen
|
|
34
|
+
name: commitizen check
|
|
35
|
+
entry: uv run cz check --commit-msg-file
|
|
36
|
+
language: system
|
|
37
|
+
stages: [commit-msg]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to `wraith-sas-lineage` are documented here.
|
|
4
|
+
|
|
5
|
+
## v0.2.0 (2026-06-21)
|
|
6
|
+
|
|
7
|
+
### Features
|
|
8
|
+
|
|
9
|
+
- on a pig it looks nice' (#3) from feature/TJP-21061 into main
|
|
10
|
+
- on a pig it looks nice
|
|
11
|
+
- release v0.1.0 -> v0.2.0 [skip ci]
|
|
12
|
+
|
|
13
|
+
## v0.0.1 (2026-06-20)
|
|
14
|
+
|
|
15
|
+
### Features
|
|
16
|
+
|
|
17
|
+
- Initial scaffold: parser, schema mapper, and web UI skeletons
|
|
18
|
+
- Provenance JSON schema and Turso DDL
|
|
19
|
+
- Extractor protocol and implementations for DATA step, PROC SQL, RSUBMIT, SCD2, LIBNAME
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# Contributing to wraith-sas-lineage
|
|
2
|
+
|
|
3
|
+
## Prerequisites
|
|
4
|
+
|
|
5
|
+
- Python 3.12+
|
|
6
|
+
- [`uv`](https://docs.astral.sh/uv/) for dependency management
|
|
7
|
+
|
|
8
|
+
## Setup
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
git clone https://git.thomaspeoples.com/thomaspeoples/wraith-sas-lineage.git
|
|
12
|
+
cd wraith-sas-lineage
|
|
13
|
+
uv run poe setup
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
This syncs all dependencies and installs pre-commit hooks.
|
|
17
|
+
|
|
18
|
+
## Running tests
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
uv run poe test
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Coverage must stay above 80%. The suite runs without any external services — no SAS licence, no database connections, no network.
|
|
25
|
+
|
|
26
|
+
## Code style
|
|
27
|
+
|
|
28
|
+
Ruff handles formatting and linting:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
uv run poe lint
|
|
32
|
+
uv run poe format
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Line length is 79 characters.
|
|
36
|
+
|
|
37
|
+
## Commits
|
|
38
|
+
|
|
39
|
+
Commits follow the Ghost Stack convention via commitizen:
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
👻 <type>/<ticket>: <message>
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
uv run cz commit
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Do not bypass pre-commit hooks with `--no-verify`.
|
|
50
|
+
|
|
51
|
+
## Pull requests
|
|
52
|
+
|
|
53
|
+
- One concern per PR.
|
|
54
|
+
- Include or update tests for any behaviour change.
|
|
55
|
+
- If the change affects the JSON provenance schema or the Turso DDL, update `schema/` and the relevant `docs/` page.
|
|
56
|
+
- Run the full pre-commit suite before opening: `uv run pre-commit run --all-files`.
|
|
57
|
+
|
|
58
|
+
## Adding a new extractor
|
|
59
|
+
|
|
60
|
+
1. Create `src/sas_lineage/parser/extractors/<name>.py` implementing the `StepExtractor` protocol.
|
|
61
|
+
2. Register it in `parser/cli.py` via `builder.register(MyExtractor())`.
|
|
62
|
+
3. Add tests in `tests/test_parser/test_extractors.py`.
|
|
63
|
+
4. Add fixture SAS snippets to `tests/test_parser/fixtures/` if needed.
|
|
64
|
+
|
|
65
|
+
Extractors must be additive — they receive a block and return what they find. Never raise; return a `Flag` instead.
|
|
66
|
+
|
|
67
|
+
## Reporting issues
|
|
68
|
+
|
|
69
|
+
Open a Gitea issue with the SAS snippet (anonymised), the log excerpt, and the exact command you ran. For extraction failures, include the `parse_report.json` and relevant entries from `mapping_gaps.json`.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Thomas Peoples
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: wraith-sas-lineage
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: SAS lineage extraction, schema mapping, and ODCS contract generation for the Ghost Stack.
|
|
5
|
+
Project-URL: Homepage, https://git.thomaspeoples.com/thomaspeoples/wraith-sas-lineage
|
|
6
|
+
Project-URL: Repository, https://git.thomaspeoples.com/thomaspeoples/wraith-sas-lineage.git
|
|
7
|
+
Project-URL: Documentation, https://www.thomaspeoples.com/gitea-repos/wraith-sas-lineage/
|
|
8
|
+
Project-URL: Issues, https://git.thomaspeoples.com/thomaspeoples/wraith-sas-lineage/issues
|
|
9
|
+
Project-URL: Changelog, https://git.thomaspeoples.com/thomaspeoples/wraith-sas-lineage/src/branch/main/CHANGELOG.md
|
|
10
|
+
Author-email: Thomas Peoples <hello@thomaspeoples.com>
|
|
11
|
+
License: MIT License
|
|
12
|
+
|
|
13
|
+
Copyright (c) 2026 Thomas Peoples
|
|
14
|
+
|
|
15
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
16
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
17
|
+
in the Software without restriction, including without limitation the rights
|
|
18
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
19
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
20
|
+
furnished to do so, subject to the following conditions:
|
|
21
|
+
|
|
22
|
+
The above copyright notice and this permission notice shall be included in all
|
|
23
|
+
copies or substantial portions of the Software.
|
|
24
|
+
|
|
25
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
26
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
27
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
28
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
29
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
30
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
31
|
+
SOFTWARE.
|
|
32
|
+
License-File: LICENSE
|
|
33
|
+
Keywords: analytics-engineering,data-contracts,ghost-stack,lineage,odcs,sas
|
|
34
|
+
Classifier: Development Status :: 3 - Alpha
|
|
35
|
+
Classifier: Intended Audience :: Developers
|
|
36
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
37
|
+
Classifier: Programming Language :: Python :: 3
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
39
|
+
Classifier: Topic :: Database
|
|
40
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
41
|
+
Requires-Python: >=3.12
|
|
42
|
+
Requires-Dist: click>=8.1
|
|
43
|
+
Requires-Dist: flask>=3.0
|
|
44
|
+
Requires-Dist: openpyxl>=3.1
|
|
45
|
+
Requires-Dist: pyyaml>=6.0
|
|
46
|
+
Provides-Extra: dev
|
|
47
|
+
Requires-Dist: commitizen; extra == 'dev'
|
|
48
|
+
Requires-Dist: detect-secrets; extra == 'dev'
|
|
49
|
+
Requires-Dist: genbadge[coverage]>=1.1.1; extra == 'dev'
|
|
50
|
+
Requires-Dist: mkdocs; extra == 'dev'
|
|
51
|
+
Requires-Dist: mkdocs-material; extra == 'dev'
|
|
52
|
+
Requires-Dist: mkdocstrings[python]; extra == 'dev'
|
|
53
|
+
Requires-Dist: poethepoet; extra == 'dev'
|
|
54
|
+
Requires-Dist: pre-commit; extra == 'dev'
|
|
55
|
+
Requires-Dist: pydoc-markdown; extra == 'dev'
|
|
56
|
+
Requires-Dist: pymdown-extensions; extra == 'dev'
|
|
57
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
58
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
59
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
60
|
+
Requires-Dist: ty; extra == 'dev'
|
|
61
|
+
Requires-Dist: typer; extra == 'dev'
|
|
62
|
+
Description-Content-Type: text/markdown
|
|
63
|
+
|
|
64
|
+
[](https://www.thomaspeoples.com/gitea-repos/wraith-sas-lineage/)
|
|
65
|
+

|
|
66
|
+

|
|
67
|
+
|
|
68
|
+
# 👻 wraith-sas-lineage
|
|
69
|
+
### *SAS Lineage Extraction for the Ghost Stack*
|
|
70
|
+
|
|
71
|
+
**wraith-sas-lineage** extracts data lineage from SAS DI Studio code and verbose execution logs, maps it to Open Data Contract Standard (ODCS) contracts, and serves a local read-only lineage explorer. One JSON artefact connects all three tools.
|
|
72
|
+
|
|
73
|
+
Designed for large SAS estates running on Unix. Output feeds [wraith-modelgen](https://git.thomaspeoples.com/thomaspeoples/wraith-modelgen) to auto-generate dbt on BigQuery.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Three tools. One seam.
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
.sas + .log files
|
|
81
|
+
│
|
|
82
|
+
▼
|
|
83
|
+
sas-parse ──► provenance.json
|
|
84
|
+
│
|
|
85
|
+
▼
|
|
86
|
+
sas-map ──► lineage.db + ODCS contracts + gaps report
|
|
87
|
+
│
|
|
88
|
+
▼
|
|
89
|
+
sas-ui ──► localhost:5000
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
| Command | Input | Output |
|
|
93
|
+
|---|---|---|
|
|
94
|
+
| `sas-parse` | `.sas` files + verbose logs | `provenance.json` |
|
|
95
|
+
| `sas-map` | `provenance.json` | `lineage.db`, ODCS YAML contracts, `mapping_gaps.json` |
|
|
96
|
+
| `sas-ui` | `lineage.db` | Read-only lineage explorer |
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Install
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
uv tool install wraith-sas-lineage
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Or for development:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
git clone https://git.thomaspeoples.com/thomaspeoples/wraith-sas-lineage.git
|
|
110
|
+
cd wraith-sas-lineage
|
|
111
|
+
uv run poe setup
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Quick start
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# Parse SAS code + logs
|
|
120
|
+
sas-parse \
|
|
121
|
+
--code-dir /path/to/sas/code \
|
|
122
|
+
--log-dir /path/to/verbose/logs \
|
|
123
|
+
--estate acme-risk \
|
|
124
|
+
--output provenance.json
|
|
125
|
+
|
|
126
|
+
# Map to DB + ODCS contracts
|
|
127
|
+
sas-map \
|
|
128
|
+
--provenance provenance.json \
|
|
129
|
+
--db lineage.db \
|
|
130
|
+
--contracts-dir contracts/ \
|
|
131
|
+
--gaps mapping_gaps.json
|
|
132
|
+
|
|
133
|
+
# Browse lineage
|
|
134
|
+
sas-ui --db lineage.db
|
|
135
|
+
# → http://localhost:5000/flags all extraction flags
|
|
136
|
+
# → http://localhost:5000/provenance upstream chain for a column
|
|
137
|
+
# → http://localhost:5000/lineage downstream to all MARTs
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Design principles
|
|
143
|
+
|
|
144
|
+
**Flag-never-drop.** Unresolved macros, remote execution blocks, and hand-written code surface as `Flag` objects with reason, context, and snippet. Nothing is silently discarded.
|
|
145
|
+
|
|
146
|
+
**Log-first macro resolution.** `SYMBOLGEN`/`MLOGIC` log lines resolve `¯o_var` references before falling back to the code literal. Accurate lineage for DI Studio estates requires verbose logs.
|
|
147
|
+
|
|
148
|
+
**Single seam.** `provenance.json` is the only interface between tools. Each tool is independently runnable and testable.
|
|
149
|
+
|
|
150
|
+
**Content-hashed stable IDs.** Table, column, job, and lineage chain IDs are `SHA-256[:12]` of their canonical string. Same estate, same run, same IDs.
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## Layer vocabulary
|
|
155
|
+
|
|
156
|
+
| SAS layer | Schema value |
|
|
157
|
+
|---|---|
|
|
158
|
+
| RAW | `RAW` |
|
|
159
|
+
| ODS (ODP) | `ODP` |
|
|
160
|
+
| STG (CDP) | `CDP` |
|
|
161
|
+
| MART (FDP) | `FDP` |
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## Developer quality gate
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
uv run poe test # pytest (interactive)
|
|
169
|
+
uv run poe test-ci # pytest + coverage ≥ 80%
|
|
170
|
+
uv run poe lint # ruff check
|
|
171
|
+
uv run poe format # ruff format
|
|
172
|
+
uv run poe docs-serve # mkdocs at localhost:8000
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
All commits go through commitizen:
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
uv run cz commit
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
*Part of the [Ghost Stack](https://git.thomaspeoples.com/thomaspeoples). Sovereign. Self-hosted. No nonsense.*
|