threatsmith 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. threatsmith-0.2.0/.github/workflows/publish.yml +28 -0
  2. threatsmith-0.2.0/.gitignore +17 -0
  3. threatsmith-0.2.0/CLAUDE.md +70 -0
  4. threatsmith-0.2.0/LICENSE +21 -0
  5. threatsmith-0.2.0/PKG-INFO +179 -0
  6. threatsmith-0.2.0/README.md +164 -0
  7. threatsmith-0.2.0/pyproject.toml +46 -0
  8. threatsmith-0.2.0/src/threatsmith/__init__.py +3 -0
  9. threatsmith-0.2.0/src/threatsmith/engines/__init__.py +19 -0
  10. threatsmith-0.2.0/src/threatsmith/engines/base.py +13 -0
  11. threatsmith-0.2.0/src/threatsmith/engines/claude_code.py +37 -0
  12. threatsmith-0.2.0/src/threatsmith/engines/codex.py +29 -0
  13. threatsmith-0.2.0/src/threatsmith/main.py +126 -0
  14. threatsmith-0.2.0/src/threatsmith/orchestrator.py +108 -0
  15. threatsmith-0.2.0/src/threatsmith/prompts/__init__.py +0 -0
  16. threatsmith-0.2.0/src/threatsmith/prompts/assembler.py +128 -0
  17. threatsmith-0.2.0/src/threatsmith/prompts/contexts.py +82 -0
  18. threatsmith-0.2.0/src/threatsmith/prompts/owasp_references.py +59 -0
  19. threatsmith-0.2.0/src/threatsmith/prompts/scanner_snippets.py +26 -0
  20. threatsmith-0.2.0/src/threatsmith/prompts/stage_01_objectives.py +207 -0
  21. threatsmith-0.2.0/src/threatsmith/prompts/stage_02_technical_scope.py +258 -0
  22. threatsmith-0.2.0/src/threatsmith/prompts/stage_03_decomposition.py +297 -0
  23. threatsmith-0.2.0/src/threatsmith/prompts/stage_04_threat_analysis.py +371 -0
  24. threatsmith-0.2.0/src/threatsmith/prompts/stage_05_vulnerability.py +391 -0
  25. threatsmith-0.2.0/src/threatsmith/prompts/stage_06_attack_modeling.py +416 -0
  26. threatsmith-0.2.0/src/threatsmith/prompts/stage_07_risk_impact.py +444 -0
  27. threatsmith-0.2.0/src/threatsmith/prompts/stage_08_report.py +221 -0
  28. threatsmith-0.2.0/src/threatsmith/utils/__init__.py +0 -0
  29. threatsmith-0.2.0/src/threatsmith/utils/logging.py +50 -0
  30. threatsmith-0.2.0/src/threatsmith/utils/metadata.py +97 -0
  31. threatsmith-0.2.0/src/threatsmith/utils/scanners.py +29 -0
  32. threatsmith-0.2.0/tasks/archive/progress.txt +240 -0
  33. threatsmith-0.2.0/tasks/archive/stories.json +306 -0
  34. threatsmith-0.2.0/tasks/archive/threatsmith-v0.2.0-prd.md +466 -0
  35. threatsmith-0.2.0/tests/__init__.py +0 -0
  36. threatsmith-0.2.0/tests/test_assembler.py +255 -0
  37. threatsmith-0.2.0/tests/test_cli.py +215 -0
  38. threatsmith-0.2.0/tests/test_e2e.py +154 -0
  39. threatsmith-0.2.0/tests/test_engines.py +127 -0
  40. threatsmith-0.2.0/tests/test_metadata.py +232 -0
  41. threatsmith-0.2.0/tests/test_orchestrator.py +267 -0
  42. threatsmith-0.2.0/tests/test_package.py +35 -0
  43. threatsmith-0.2.0/tests/test_scanners.py +88 -0
  44. threatsmith-0.2.0/tests/test_stage_01_objectives.py +72 -0
  45. threatsmith-0.2.0/tests/test_stage_02_technical_scope.py +67 -0
  46. threatsmith-0.2.0/tests/test_stage_03_decomposition.py +108 -0
  47. threatsmith-0.2.0/tests/test_stage_04.py +355 -0
  48. threatsmith-0.2.0/tests/test_stage_05.py +269 -0
  49. threatsmith-0.2.0/tests/test_stage_06.py +230 -0
  50. threatsmith-0.2.0/tests/test_stage_07.py +259 -0
  51. threatsmith-0.2.0/tests/test_stage_08.py +247 -0
  52. threatsmith-0.2.0/uv.lock +204 -0
@@ -0,0 +1,28 @@
1
+ name: "Publish"
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - v*
7
+
8
+ jobs:
9
+ run:
10
+ runs-on: ubuntu-latest
11
+ environment:
12
+ name: pypi
13
+ permissions:
14
+ id-token: write
15
+ contents: read
16
+ steps:
17
+ - name: Checkout
18
+ uses: actions/checkout@v6
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v7
21
+ - name: Install Python 3.12
22
+ run: uv python install 3.12
23
+ - name: Run tests
24
+ run: uv run pytest
25
+ - name: Build
26
+ run: uv build
27
+ - name: Publish
28
+ run: uv publish
@@ -0,0 +1,17 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ .ruff_cache/
10
+
11
+ .pytest_cache
12
+
13
+ .cursor
14
+
15
+ venv/
16
+ .venv/
17
+ .env
@@ -0,0 +1,70 @@
1
+ ## Project Overview
2
+
3
+ ThreatSmith is an AI-powered PASTA (Process for Attack Simulation and Threat Analysis) threat modeling engine. It wraps AI coding agents (Claude Code, Codex) to orchestrate a 7-stage threat modeling pipeline plus a report consolidation step, producing structured markdown deliverables.
4
+
5
+ ## Technical Stack
6
+
7
+ - Python 3.12+, src layout with hatchling build backend
8
+ - Typer for CLI, stdlib logging for all output
9
+ - No runtime dependencies beyond typer — engines are invoked via subprocess
10
+
11
+ ## Development Commands
12
+
13
+ ```bash
14
+ uv run pytest # run all tests
15
+ uv run ruff check --fix # lint (auto-fix; never manually fix lint issues)
16
+ uv run ruff format # format (always use this, never manually reformat)
17
+ ```
18
+
19
+ Always run `uv run ruff check --fix` and `uv run ruff format` to let the tools auto-fix issues. Do not run the check-only variants (`ruff check` without `--fix`, `ruff format --check`) and then manually apply fixes.
20
+
21
+ ## Architecture
22
+
23
+ ```
24
+ CLI (main.py)
25
+ → detect_scanners(), generate_metadata()
26
+ → Orchestrator.run()
27
+ → for stage 1–8:
28
+ assemble_prompt(stage_number, prior_outputs, scanner_info, ...)
29
+ → selects stage module → builds typed context dataclass → calls build_prompt()
30
+ engine.execute(prompt, working_directory) → exit_code
31
+ validate output file exists → read into prior_outputs
32
+ ```
33
+
34
+ ### Key interfaces
35
+
36
+ - **Engine** (`engines/base.py`): ABC with `execute(prompt: str, working_directory: str) -> int`. Engines are thin subprocess wrappers — prompt assembly is the orchestrator's job.
37
+ - **`get_engine(name)`** (`engines/__init__.py`): Factory mapping `"claude-code"` / `"codex"` to engine classes.
38
+ - **`assemble_prompt()`** (`prompts/assembler.py`): Maps stage number to the correct stage module, builds the typed context dataclass, calls `build_prompt()`. Returns a single prompt string.
39
+ - **Stage prompt modules** (`prompts/stage_01_objectives.py` through `stage_08_report.py`): Each exports `STAGE_PROMPT` constant + `build_prompt(context, output_dir="threatmodel") -> str`.
40
+ - **Context dataclasses** (`prompts/contexts.py`): One per stage (e.g. `ObjectivesContext`, `VulnerabilityContext`). Fields are `str | None = None`; `VulnerabilityContext` also has `scanners_available: list[str] | None`.
41
+ - **Orchestrator** (`orchestrator.py`): Dataclass. `run()` iterates stages 1–8, accumulates deliverables in `_prior_outputs` dict keyed as `stage_01_output` through `stage_08_output`. Returns 0 on success, 1 on failure.
42
+
43
+ ### Dynamic injection
44
+
45
+ - **OWASP references** (Stage 4): Web Top 10 always injected. API/LLM/Mobile Top 10 conditionally injected based on case-insensitive keyword matching against Stage 2 output. Constants in `prompts/owasp_references.py`.
46
+ - **Scanner snippets** (Stage 5): Injected per available scanner from `prompts/scanner_snippets.py` `SCANNER_SNIPPETS` dict. `detect_scanners()` returns `{"available": [...], "unavailable": [...]}`.
47
+
48
+ ### Stage → file mapping
49
+
50
+ Defined in `orchestrator._STAGE_FILES`:
51
+ - `01-objectives.md` through `07-risk-and-impact-analysis.md` (PASTA stages)
52
+ - `08-report.md` (consolidation, not a PASTA stage — no new analysis)
53
+
54
+ ## Codebase Patterns
55
+
56
+ - **Prompt placeholders**: Use `.replace("{placeholder}", value)`, not `.format()` — prompt text contains curly braces that break `.format()`.
57
+ - **Prior stage injection**: XML-delimited `<prior_stages><stage_NN_name>...</stage_NN_name></prior_stages>` via `{prior_stages_section}` placeholder. Each stage output is independently optional (only included when present).
58
+ - **`or None` guards**: `context.field or None` treats both missing and empty-string values as absent. Used throughout `build_prompt()` and `assemble_prompt()`.
59
+ - **Logging**: Modules use `logger = logging.getLogger(__name__)`. CLI configures via `configure_logging(verbose)` in `utils/logging.py`. DEBUG = verbose, INFO = operational progress, WARNING/ERROR = failures.
60
+ - **scanner_info keys**: `detect_scanners()` returns `{"available": [...], "unavailable": [...]}`. The assembler maps `scanner_info["available"]` to `VulnerabilityContext.scanners_available`.
61
+ - **metadata.json**: `generate_metadata()` accepts `engine_name` param but writes `"engine"` key. Returns a `Metadata` dataclass. `write_metadata(output_dir, metadata)` serializes to JSON.
62
+
63
+ ## Testing Patterns
64
+
65
+ - Tests are in `tests/` at root level. Test files mirror source: `test_stage_04.py` tests `stage_04_threat_analysis.py`.
66
+ - Do not write tests for string constants — test logic and behavior only.
67
+ - **Mock engine in E2E tests**: Use a closure over `output_dir` in `execute_side_effect(prompt, working_directory)` to write stage files. Track call count with `{"n": 0}` dict.
68
+ - **Patch location for CLI tests**: Patch `threatsmith.main.get_engine` (not `threatsmith.engines.get_engine`) since the CLI imports into its own namespace.
69
+ - `detect_scanners()` and `generate_metadata()` are safe to use unpatched in tests — scanners uses `shutil.which()`, metadata falls back to `"unknown"` for git failures.
70
+ - Test logging with `caplog.at_level(logging.DEBUG, logger="threatsmith.module_name")`.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Abdul Rahman Al Kibbe
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,179 @@
1
+ Metadata-Version: 2.4
2
+ Name: threatsmith
3
+ Version: 0.2.0
4
+ Summary: AI-powered secure code review and threat analysis engine
5
+ Project-URL: Homepage, https://github.com/yogur/threatsmith
6
+ Project-URL: Repository, https://github.com/yogur/threatsmith
7
+ Project-URL: Issues, https://github.com/yogur/threatsmith/issues
8
+ Author: Abdul Rahman Al-Kibbe
9
+ License-File: LICENSE
10
+ Keywords: ai,code-review,owasp,pasta,security,threat-modeling
11
+ Requires-Python: >=3.12
12
+ Requires-Dist: structlog>=24.0
13
+ Requires-Dist: typer>=0.12
14
+ Description-Content-Type: text/markdown
15
+
16
+ ```
17
+ ████████╗ ██╗ ██╗ ██████╗ ███████╗ █████╗ ████████╗ ███████╗ ███╗ ███╗ ██╗ ████████╗ ██╗ ██╗
18
+ ╚══██╔══╝ ██║ ██║ ██╔══██╗ ██╔════╝ ██╔══██╗ ╚══██╔══╝ ██╔════╝ ████╗ ████║ ██║ ╚══██╔══╝ ██║ ██║
19
+ ██║ ███████║ ██████╔╝ █████╗ ███████║ ██║ ███████╗ ██╔████╔██║ ██║ ██║ ███████║
20
+ ██║ ██╔══██║ ██╔══██╗ ██╔══╝ ██╔══██║ ██║ ╚════██║ ██║╚██╔╝██║ ██║ ██║ ██╔══██║
21
+ ██║ ██║ ██║ ██║ ██║ ███████╗ ██║ ██║ ██║ ███████║ ██║ ╚═╝ ██║ ██║ ██║ ██║ ██║
22
+ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝
23
+ ```
24
+
25
+ # ThreatSmith 🔒🤖
26
+
27
+ ThreatSmith is an AI-powered PASTA threat modeling engine that automates the entire PASTA pipeline. It runs each stage as a fresh AI coding agent session, assembles prompts with accumulated context from prior stages, auto-detects available security scanners, and validates that each stage produces its expected deliverable. The result is a complete, structured threat model generated directly from your codebase.
28
+
29
+ **No API keys. No separate billing. No token budgets to manage.** If you have a `Claude Code` or `Codex` subscription, you already have everything you need. Point ThreatSmith at a repository and get a full threat model.
30
+
31
+ ### What is PASTA?
32
+
33
+ [PASTA (Process for Attack Simulation and Threat Analysis)](https://handbook.gitlab.com/handbook/security/product-security/security-platforms-architecture/application-security/threat-modeling/#pasta-stages) is a 7-stage, risk-centric methodology that produces structured security artifacts: data flow diagrams, threat inventories, vulnerability assessments, attack trees, and prioritized remediation roadmaps. It is thorough, but the manual effort involved makes full adoption rare.
34
+
35
+ ## Use Cases
36
+
37
+ - **Persist threat models in Git as context for AI-powered secure code review.** Commit the `threatmodel/` directory to your repository. When AI coding agents review PRs or audit code, they can reference the threat model for context on trust boundaries, known vulnerabilities, and attack surfaces.
38
+ - **Give AI coding agents security context to write secure code.** With the threat model in the repo, agents writing new features can consult it to understand data sensitivity classifications, required security controls, and known attack vectors before producing code.
39
+ - **Onboard security engineers to unfamiliar codebases.** The 7-stage output provides a structured, security-focused overview of architecture, data flows, threats, and vulnerabilities without manually reading the entire codebase.
40
+ - **Triage and prioritize remediation.** Stage 7 produces a P0-P3 remediation roadmap ranked by risk reduction vs. implementation effort, giving engineering teams a ready-made security backlog.
41
+
42
+ ## How It Works
43
+
44
+ ```
45
+ ┌────────────────┐ ┌──────────────────────┐ ┌──────────────────┐
46
+ │ CLI │────>│ Orchestrator │────>│ AI Coding Agent │
47
+ │ │ │ │ │ │
48
+ │ threatsmith │ │ - Stage sequencing │ │ Claude Code / │
49
+ │ /path/to/repo │ │ - Prompt assembly │ │ Codex │
50
+ │ --engine │ │ - Context passing │ │ │
51
+ │ │ │ - Scanner detection │ │ - Code nav │
52
+ │ │ │ - Output validation │ │ - File I/O │
53
+ └────────────────┘ └──────────────────────┘ │ - Shell exec │
54
+ │ - Reasoning │
55
+ └──────────────────┘
56
+ ```
57
+
58
+ ThreatSmith runs a sequential pipeline of 7 PASTA stages plus a report consolidation step. Each stage executes as a fresh agent session but receives all prior stage outputs as structured context. This mirrors how a security engineer works through PASTA: read the prior findings, then produce the next deliverable.
59
+
60
+ Currently supports Claude Code (`--engine claude-code`) and Codex (`--engine codex`). Adding a new engine requires implementing a single method: `execute(prompt, working_directory) -> exit_code`.
61
+
62
+ | Stage | Name | Output |
63
+ |-------|------|--------|
64
+ | 1 | Define Objectives | Business objectives, data sensitivity, compliance requirements |
65
+ | 2 | Define Technical Scope | Technology stack, dependencies, supply chain, deployment |
66
+ | 3 | Application Decomposition | Architecture, data flow diagrams (Mermaid), trust boundaries |
67
+ | 4 | Threat Analysis | STRIDE analysis, attack scenarios, OWASP cross-referencing |
68
+ | 5 | Vulnerability Analysis | Scanner results, CVSS scoring, CWE/CVE enumeration |
69
+ | 6 | Attack Modeling | Attack trees (Mermaid), MITRE ATT&CK mapping, exploit paths |
70
+ | 7 | Risk and Impact Analysis | Risk qualification, countermeasures, P0-P3 remediation roadmap |
71
+ | | | |
72
+ | 8 | Report Consolidation | Executive summary combining all stage outputs (not a PASTA stage) |
73
+
74
+ ### Context Accumulation
75
+
76
+ Each stage builds on all prior stages. Stage N receives the outputs of stages 1 through N-1, injected as structured XML-delimited sections in the prompt. This accumulated context directs the agent's analysis, reducing blind codebase exploration and improving output quality.
77
+
78
+ ## Installation
79
+
80
+ ### Prerequisites
81
+
82
+ - Python 3.12+
83
+ - One of the supported AI coding agents installed and authenticated:
84
+ - `Claude Code` for the claude-code engine
85
+ - `Codex` for the codex engine
86
+
87
+ ### Install
88
+
89
+ ```bash
90
+ # With pip
91
+ pip install threatsmith
92
+
93
+ # With uv
94
+ uv tool install threatsmith
95
+
96
+ # With pipx (no virtual environment needed)
97
+ pipx install threatsmith
98
+
99
+ # With uvx (no virtual environment needed)
100
+ uvx install threatsmith
101
+ ```
102
+
103
+ ## Quick Start
104
+
105
+ ```bash
106
+ threatsmith /path/to/your/repo
107
+ ```
108
+
109
+ This runs the full 7-stage PASTA pipeline using Claude Code (the default engine) and writes all deliverables to `threatmodel/` inside the target repository.
110
+
111
+ To use a different engine or provide objectives to guide the analysis:
112
+
113
+ ```bash
114
+ threatsmith /path/to/your/repo \
115
+ --engine codex \
116
+ --business-objectives "Protect user PII, meet GDPR requirements" \
117
+ --security-objectives "Prevent data exfiltration" \
118
+ -v
119
+ ```
120
+
121
+ ## Scanner Integration
122
+
123
+ ThreatSmith automatically detects security scanners on your system before running the pipeline. When a scanner is found, stage-specific instructions are injected into the Stage 5 (Vulnerability Analysis) prompt so the agent knows to run it and incorporate the results.
124
+
125
+ | Scanner | Purpose | Detection |
126
+ |---------|---------|-----------|
127
+ | Semgrep | Static analysis patterns | `which semgrep` |
128
+ | Trivy | Dependency CVE scanning | `which trivy` |
129
+ | Gitleaks | Secret/credential detection | `which gitleaks` |
130
+
131
+ Scanners that are not detected are omitted from the prompt entirely. Scanner availability is recorded in `metadata.json` for traceability.
132
+
133
+ ## Output Structure
134
+
135
+ All deliverables are written to a `threatmodel/` directory (configurable via `--output-dir`) at the target repository root:
136
+
137
+ ```
138
+ threatmodel/
139
+ metadata.json # Run metadata (engine, commit, scanners, timestamp)
140
+ 01-objectives.md # Stage 1: Business objectives and data sensitivity
141
+ 02-technical-scope.md # Stage 2: Technology stack and dependencies
142
+ 03-application-decomposition.md # Stage 3: Architecture, DFDs, trust boundaries
143
+ 04-threat-analysis.md # Stage 4: Threat identification and attack scenarios
144
+ 05-vulnerability-analysis.md # Stage 5: Vulnerability findings and CVSS scoring
145
+ 06-attack-modeling.md # Stage 6: Attack trees and exploitation paths
146
+ 07-risk-and-impact-analysis.md # Stage 7: Risk qualification and remediation roadmap
147
+ 08-report.md # Consolidated executive report
148
+ ```
149
+
150
+ Individual stage files are preserved alongside the consolidated report. This supports selective consumption (a developer fixing an auth issue only needs stages 4-5), debuggability (re-examine a single stage's output), and granular review by security teams.
151
+
152
+ ## CLI Reference
153
+
154
+ ```
155
+ threatsmith <path> [OPTIONS]
156
+ ```
157
+
158
+ | Parameter | Type | Default | Description |
159
+ |-----------|------|---------|-------------|
160
+ | `path` | positional | required | Path to the target repository |
161
+ | `--engine` | string | `claude-code` | AI engine to use (`claude-code` or `codex`) |
162
+ | `--business-objectives` | string | — | Business objectives to guide the analysis |
163
+ | `--security-objectives` | string | — | Security objectives to guide the analysis |
164
+ | `--output-dir` | string | `threatmodel/` | Output directory for deliverables (relative to target repo) |
165
+ | `-v` / `--verbose` | flag | off | Enable verbose (debug-level) logging |
166
+
167
+ ## Roadmap
168
+
169
+ - **Batch mode.** Process multiple repositories from a file list (`--repos repos.txt`) with configurable parallelism (`--parallel N`).
170
+ - **Auto-PR creation.** Automatically commit the `threatmodel/` directory, push a branch, and open a pull request via `gh` CLI after analysis completes.
171
+ - **Incremental updates.** Use `git diff` against the commit hash in `metadata.json` to selectively re-run only the stages affected by code changes.
172
+ - **Stage re-run.** Re-run a specific stage (e.g., `--rerun-stage 5`) using existing prior stage outputs without re-running the entire pipeline.
173
+ - **Resume from stage.** Resume a failed or interrupted pipeline run from the stage where it stopped.
174
+ - **CI/CD integration.** GitHub Action and GitLab CI templates for automated threat modeling on pull requests.
175
+ - **Threat model diff.** Compare two threat model runs and surface what changed between them.
176
+
177
+ ## License
178
+
179
+ MIT License.
@@ -0,0 +1,164 @@
1
+ ```
2
+ ████████╗ ██╗ ██╗ ██████╗ ███████╗ █████╗ ████████╗ ███████╗ ███╗ ███╗ ██╗ ████████╗ ██╗ ██╗
3
+ ╚══██╔══╝ ██║ ██║ ██╔══██╗ ██╔════╝ ██╔══██╗ ╚══██╔══╝ ██╔════╝ ████╗ ████║ ██║ ╚══██╔══╝ ██║ ██║
4
+ ██║ ███████║ ██████╔╝ █████╗ ███████║ ██║ ███████╗ ██╔████╔██║ ██║ ██║ ███████║
5
+ ██║ ██╔══██║ ██╔══██╗ ██╔══╝ ██╔══██║ ██║ ╚════██║ ██║╚██╔╝██║ ██║ ██║ ██╔══██║
6
+ ██║ ██║ ██║ ██║ ██║ ███████╗ ██║ ██║ ██║ ███████║ ██║ ╚═╝ ██║ ██║ ██║ ██║ ██║
7
+ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝
8
+ ```
9
+
10
+ # ThreatSmith 🔒🤖
11
+
12
+ ThreatSmith is an AI-powered PASTA threat modeling engine that automates the entire PASTA pipeline. It runs each stage as a fresh AI coding agent session, assembles prompts with accumulated context from prior stages, auto-detects available security scanners, and validates that each stage produces its expected deliverable. The result is a complete, structured threat model generated directly from your codebase.
13
+
14
+ **No API keys. No separate billing. No token budgets to manage.** If you have a `Claude Code` or `Codex` subscription, you already have everything you need. Point ThreatSmith at a repository and get a full threat model.
15
+
16
+ ### What is PASTA?
17
+
18
+ [PASTA (Process for Attack Simulation and Threat Analysis)](https://handbook.gitlab.com/handbook/security/product-security/security-platforms-architecture/application-security/threat-modeling/#pasta-stages) is a 7-stage, risk-centric methodology that produces structured security artifacts: data flow diagrams, threat inventories, vulnerability assessments, attack trees, and prioritized remediation roadmaps. It is thorough, but the manual effort involved makes full adoption rare.
19
+
20
+ ## Use Cases
21
+
22
+ - **Persist threat models in Git as context for AI-powered secure code review.** Commit the `threatmodel/` directory to your repository. When AI coding agents review PRs or audit code, they can reference the threat model for context on trust boundaries, known vulnerabilities, and attack surfaces.
23
+ - **Give AI coding agents security context to write secure code.** With the threat model in the repo, agents writing new features can consult it to understand data sensitivity classifications, required security controls, and known attack vectors before producing code.
24
+ - **Onboard security engineers to unfamiliar codebases.** The 7-stage output provides a structured, security-focused overview of architecture, data flows, threats, and vulnerabilities without manually reading the entire codebase.
25
+ - **Triage and prioritize remediation.** Stage 7 produces a P0-P3 remediation roadmap ranked by risk reduction vs. implementation effort, giving engineering teams a ready-made security backlog.
26
+
27
+ ## How It Works
28
+
29
+ ```
30
+ ┌────────────────┐ ┌──────────────────────┐ ┌──────────────────┐
31
+ │ CLI │────>│ Orchestrator │────>│ AI Coding Agent │
32
+ │ │ │ │ │ │
33
+ │ threatsmith │ │ - Stage sequencing │ │ Claude Code / │
34
+ │ /path/to/repo │ │ - Prompt assembly │ │ Codex │
35
+ │ --engine │ │ - Context passing │ │ │
36
+ │ │ │ - Scanner detection │ │ - Code nav │
37
+ │ │ │ - Output validation │ │ - File I/O │
38
+ └────────────────┘ └──────────────────────┘ │ - Shell exec │
39
+ │ - Reasoning │
40
+ └──────────────────┘
41
+ ```
42
+
43
+ ThreatSmith runs a sequential pipeline of 7 PASTA stages plus a report consolidation step. Each stage executes as a fresh agent session but receives all prior stage outputs as structured context. This mirrors how a security engineer works through PASTA: read the prior findings, then produce the next deliverable.
44
+
45
+ Currently supports Claude Code (`--engine claude-code`) and Codex (`--engine codex`). Adding a new engine requires implementing a single method: `execute(prompt, working_directory) -> exit_code`.
46
+
47
+ | Stage | Name | Output |
48
+ |-------|------|--------|
49
+ | 1 | Define Objectives | Business objectives, data sensitivity, compliance requirements |
50
+ | 2 | Define Technical Scope | Technology stack, dependencies, supply chain, deployment |
51
+ | 3 | Application Decomposition | Architecture, data flow diagrams (Mermaid), trust boundaries |
52
+ | 4 | Threat Analysis | STRIDE analysis, attack scenarios, OWASP cross-referencing |
53
+ | 5 | Vulnerability Analysis | Scanner results, CVSS scoring, CWE/CVE enumeration |
54
+ | 6 | Attack Modeling | Attack trees (Mermaid), MITRE ATT&CK mapping, exploit paths |
55
+ | 7 | Risk and Impact Analysis | Risk qualification, countermeasures, P0-P3 remediation roadmap |
56
+ | | | |
57
+ | 8 | Report Consolidation | Executive summary combining all stage outputs (not a PASTA stage) |
58
+
59
+ ### Context Accumulation
60
+
61
+ Each stage builds on all prior stages. Stage N receives the outputs of stages 1 through N-1, injected as structured XML-delimited sections in the prompt. This accumulated context directs the agent's analysis, reducing blind codebase exploration and improving output quality.
62
+
63
+ ## Installation
64
+
65
+ ### Prerequisites
66
+
67
+ - Python 3.12+
68
+ - One of the supported AI coding agents installed and authenticated:
69
+ - `Claude Code` for the claude-code engine
70
+ - `Codex` for the codex engine
71
+
72
+ ### Install
73
+
74
+ ```bash
75
+ # With pip
76
+ pip install threatsmith
77
+
78
+ # With uv
79
+ uv tool install threatsmith
80
+
81
+ # With pipx (no virtual environment needed)
82
+ pipx install threatsmith
83
+
84
+ # With uvx (no virtual environment needed)
85
+ uvx install threatsmith
86
+ ```
87
+
88
+ ## Quick Start
89
+
90
+ ```bash
91
+ threatsmith /path/to/your/repo
92
+ ```
93
+
94
+ This runs the full 7-stage PASTA pipeline using Claude Code (the default engine) and writes all deliverables to `threatmodel/` inside the target repository.
95
+
96
+ To use a different engine or provide objectives to guide the analysis:
97
+
98
+ ```bash
99
+ threatsmith /path/to/your/repo \
100
+ --engine codex \
101
+ --business-objectives "Protect user PII, meet GDPR requirements" \
102
+ --security-objectives "Prevent data exfiltration" \
103
+ -v
104
+ ```
105
+
106
+ ## Scanner Integration
107
+
108
+ ThreatSmith automatically detects security scanners on your system before running the pipeline. When a scanner is found, stage-specific instructions are injected into the Stage 5 (Vulnerability Analysis) prompt so the agent knows to run it and incorporate the results.
109
+
110
+ | Scanner | Purpose | Detection |
111
+ |---------|---------|-----------|
112
+ | Semgrep | Static analysis patterns | `which semgrep` |
113
+ | Trivy | Dependency CVE scanning | `which trivy` |
114
+ | Gitleaks | Secret/credential detection | `which gitleaks` |
115
+
116
+ Scanners that are not detected are omitted from the prompt entirely. Scanner availability is recorded in `metadata.json` for traceability.
117
+
118
+ ## Output Structure
119
+
120
+ All deliverables are written to a `threatmodel/` directory (configurable via `--output-dir`) at the target repository root:
121
+
122
+ ```
123
+ threatmodel/
124
+ metadata.json # Run metadata (engine, commit, scanners, timestamp)
125
+ 01-objectives.md # Stage 1: Business objectives and data sensitivity
126
+ 02-technical-scope.md # Stage 2: Technology stack and dependencies
127
+ 03-application-decomposition.md # Stage 3: Architecture, DFDs, trust boundaries
128
+ 04-threat-analysis.md # Stage 4: Threat identification and attack scenarios
129
+ 05-vulnerability-analysis.md # Stage 5: Vulnerability findings and CVSS scoring
130
+ 06-attack-modeling.md # Stage 6: Attack trees and exploitation paths
131
+ 07-risk-and-impact-analysis.md # Stage 7: Risk qualification and remediation roadmap
132
+ 08-report.md # Consolidated executive report
133
+ ```
134
+
135
+ Individual stage files are preserved alongside the consolidated report. This supports selective consumption (a developer fixing an auth issue only needs stages 4-5), debuggability (re-examine a single stage's output), and granular review by security teams.
136
+
137
+ ## CLI Reference
138
+
139
+ ```
140
+ threatsmith <path> [OPTIONS]
141
+ ```
142
+
143
+ | Parameter | Type | Default | Description |
144
+ |-----------|------|---------|-------------|
145
+ | `path` | positional | required | Path to the target repository |
146
+ | `--engine` | string | `claude-code` | AI engine to use (`claude-code` or `codex`) |
147
+ | `--business-objectives` | string | — | Business objectives to guide the analysis |
148
+ | `--security-objectives` | string | — | Security objectives to guide the analysis |
149
+ | `--output-dir` | string | `threatmodel/` | Output directory for deliverables (relative to target repo) |
150
+ | `-v` / `--verbose` | flag | off | Enable verbose (debug-level) logging |
151
+
152
+ ## Roadmap
153
+
154
+ - **Batch mode.** Process multiple repositories from a file list (`--repos repos.txt`) with configurable parallelism (`--parallel N`).
155
+ - **Auto-PR creation.** Automatically commit the `threatmodel/` directory, push a branch, and open a pull request via `gh` CLI after analysis completes.
156
+ - **Incremental updates.** Use `git diff` against the commit hash in `metadata.json` to selectively re-run only the stages affected by code changes.
157
+ - **Stage re-run.** Re-run a specific stage (e.g., `--rerun-stage 5`) using existing prior stage outputs without re-running the entire pipeline.
158
+ - **Resume from stage.** Resume a failed or interrupted pipeline run from the stage where it stopped.
159
+ - **CI/CD integration.** GitHub Action and GitLab CI templates for automated threat modeling on pull requests.
160
+ - **Threat model diff.** Compare two threat model runs and surface what changed between them.
161
+
162
+ ## License
163
+
164
+ MIT License.
@@ -0,0 +1,46 @@
1
+ [project]
2
+ name = "threatsmith"
3
+ version = "0.2.0"
4
+ description = "AI-powered secure code review and threat analysis engine"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ authors = [
8
+ {name = "Abdul Rahman Al-Kibbe"}
9
+ ]
10
+ keywords = ["security", "code-review", "ai", "pasta", "owasp", "threat-modeling"]
11
+ dependencies = [
12
+ "typer>=0.12",
13
+ "structlog>=24.0",
14
+ ]
15
+
16
+ [dependency-groups]
17
+ dev = [
18
+ "pytest>=9.0.2",
19
+ "ruff>=0.15.2"
20
+ ]
21
+
22
+ [build-system]
23
+ requires = ["hatchling"]
24
+ build-backend = "hatchling.build"
25
+
26
+ [tool.hatch.build.targets.wheel]
27
+ packages = ["src/threatsmith"]
28
+
29
+ [tool.pytest.ini_options]
30
+ testpaths = ["tests"]
31
+
32
+ [tool.ruff]
33
+ src = ["src"]
34
+ target-version = "py312"
35
+
36
+ [tool.ruff.lint]
37
+ select = ["E", "F", "I", "UP"]
38
+ ignore = ["E501"]
39
+
40
+ [project.scripts]
41
+ threatsmith = "threatsmith.main:app"
42
+
43
+ [project.urls]
44
+ Homepage = "https://github.com/yogur/threatsmith"
45
+ Repository = "https://github.com/yogur/threatsmith"
46
+ Issues = "https://github.com/yogur/threatsmith/issues"
@@ -0,0 +1,3 @@
1
+ from importlib.metadata import version
2
+
3
+ __version__ = version("threatsmith")
@@ -0,0 +1,19 @@
1
+ from threatsmith.engines.base import Engine
2
+ from threatsmith.engines.claude_code import ClaudeCodeEngine
3
+ from threatsmith.engines.codex import CodexEngine
4
+
5
+
6
+ def get_engine(engine_name: str) -> Engine:
7
+ """Return the correct engine instance for the given engine name."""
8
+ engines = {
9
+ "claude-code": ClaudeCodeEngine,
10
+ "codex": CodexEngine,
11
+ }
12
+ if engine_name not in engines:
13
+ raise ValueError(
14
+ f"Unknown engine: {engine_name!r}. Choose from: {list(engines)}"
15
+ )
16
+ return engines[engine_name]()
17
+
18
+
19
+ __all__ = ["Engine", "ClaudeCodeEngine", "CodexEngine", "get_engine"]
@@ -0,0 +1,13 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class Engine(ABC):
5
+ @abstractmethod
6
+ def execute(
7
+ self,
8
+ prompt: str,
9
+ working_directory: str,
10
+ output_dir: str,
11
+ ) -> int:
12
+ """Execute the engine with the assembled prompt and return an exit code."""
13
+ ...
@@ -0,0 +1,37 @@
1
+ import logging
2
+ import subprocess
3
+
4
+ from threatsmith.engines.base import Engine
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class ClaudeCodeEngine(Engine):
10
+ def execute(
11
+ self,
12
+ prompt: str,
13
+ working_directory: str,
14
+ output_dir: str,
15
+ ) -> int:
16
+ """Invoke claude CLI in non-interactive prompt mode and return its exit code."""
17
+ safe_dir = output_dir.rstrip("/")
18
+ cmd = [
19
+ "claude",
20
+ "-p",
21
+ prompt,
22
+ "--allowedTools",
23
+ f"Write({safe_dir}/**)",
24
+ f"Edit({safe_dir}/**)",
25
+ ]
26
+ logger.debug("Running: claude -p <prompt> in %s", working_directory)
27
+ try:
28
+ result = subprocess.run(cmd, cwd=working_directory)
29
+ return result.returncode
30
+ except FileNotFoundError:
31
+ logger.error(
32
+ "Claude CLI not found. Ensure 'claude' is installed and in your PATH."
33
+ )
34
+ return 1
35
+ except Exception as e:
36
+ logger.error("Error executing claude: %s", str(e))
37
+ return 1
@@ -0,0 +1,29 @@
1
+ import logging
2
+ import subprocess
3
+
4
+ from threatsmith.engines.base import Engine
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class CodexEngine(Engine):
10
+ def execute(
11
+ self,
12
+ prompt: str,
13
+ working_directory: str,
14
+ output_dir: str,
15
+ ) -> int:
16
+ """Invoke codex CLI in non-interactive exec mode and return its exit code."""
17
+ cmd = ["codex", "exec", prompt]
18
+ logger.debug("Running: codex exec <prompt> in %s", working_directory)
19
+ try:
20
+ result = subprocess.run(cmd, cwd=working_directory)
21
+ return result.returncode
22
+ except FileNotFoundError:
23
+ logger.error(
24
+ "Codex CLI not found. Ensure 'codex' is installed and in your PATH."
25
+ )
26
+ return 1
27
+ except Exception as e:
28
+ logger.error("Error executing codex: %s", str(e))
29
+ return 1