xelo 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xelo-0.1.0/LICENSE +19 -0
- xelo-0.1.0/PKG-INFO +137 -0
- xelo-0.1.0/README.md +97 -0
- xelo-0.1.0/pyproject.toml +86 -0
- xelo-0.1.0/setup.cfg +4 -0
- xelo-0.1.0/src/ai_sbom/__init__.py +11 -0
- xelo-0.1.0/src/ai_sbom/adapters/__init__.py +21 -0
- xelo-0.1.0/src/ai_sbom/adapters/base.py +184 -0
- xelo-0.1.0/src/ai_sbom/adapters/data_classification.py +275 -0
- xelo-0.1.0/src/ai_sbom/adapters/dockerfile.py +142 -0
- xelo-0.1.0/src/ai_sbom/adapters/frameworks.py +79 -0
- xelo-0.1.0/src/ai_sbom/adapters/models_kb.py +259 -0
- xelo-0.1.0/src/ai_sbom/adapters/patterns.py +39 -0
- xelo-0.1.0/src/ai_sbom/adapters/python/__init__.py +18 -0
- xelo-0.1.0/src/ai_sbom/adapters/python/autogen.py +236 -0
- xelo-0.1.0/src/ai_sbom/adapters/python/crewai.py +224 -0
- xelo-0.1.0/src/ai_sbom/adapters/python/langgraph.py +405 -0
- xelo-0.1.0/src/ai_sbom/adapters/python/llamaindex.py +217 -0
- xelo-0.1.0/src/ai_sbom/adapters/python/llm_clients.py +219 -0
- xelo-0.1.0/src/ai_sbom/adapters/python/openai_agents.py +228 -0
- xelo-0.1.0/src/ai_sbom/adapters/python/semantic_kernel.py +195 -0
- xelo-0.1.0/src/ai_sbom/adapters/registry.py +187 -0
- xelo-0.1.0/src/ai_sbom/adapters/typescript/__init__.py +28 -0
- xelo-0.1.0/src/ai_sbom/adapters/typescript/_ts_regex.py +105 -0
- xelo-0.1.0/src/ai_sbom/adapters/typescript/bedrock_agents.py +294 -0
- xelo-0.1.0/src/ai_sbom/adapters/typescript/datastores.py +263 -0
- xelo-0.1.0/src/ai_sbom/adapters/typescript/google_adk.py +250 -0
- xelo-0.1.0/src/ai_sbom/adapters/typescript/langgraph.py +219 -0
- xelo-0.1.0/src/ai_sbom/adapters/typescript/llm_clients.py +202 -0
- xelo-0.1.0/src/ai_sbom/adapters/typescript/openai_agents.py +198 -0
- xelo-0.1.0/src/ai_sbom/adapters/typescript/prompts.py +279 -0
- xelo-0.1.0/src/ai_sbom/ast_parser.py +298 -0
- xelo-0.1.0/src/ai_sbom/cdx_tools.py +197 -0
- xelo-0.1.0/src/ai_sbom/cli.py +377 -0
- xelo-0.1.0/src/ai_sbom/config.py +52 -0
- xelo-0.1.0/src/ai_sbom/core/__init__.py +21 -0
- xelo-0.1.0/src/ai_sbom/core/application_summary.py +447 -0
- xelo-0.1.0/src/ai_sbom/core/confidence.py +249 -0
- xelo-0.1.0/src/ai_sbom/core/ts_parser.py +1513 -0
- xelo-0.1.0/src/ai_sbom/core/verification.py +403 -0
- xelo-0.1.0/src/ai_sbom/deps.py +338 -0
- xelo-0.1.0/src/ai_sbom/extractor.py +652 -0
- xelo-0.1.0/src/ai_sbom/llm_client.py +188 -0
- xelo-0.1.0/src/ai_sbom/merger.py +360 -0
- xelo-0.1.0/src/ai_sbom/models.py +201 -0
- xelo-0.1.0/src/ai_sbom/normalization.py +10 -0
- xelo-0.1.0/src/ai_sbom/py.typed +0 -0
- xelo-0.1.0/src/ai_sbom/schemas/__init__.py +20 -0
- xelo-0.1.0/src/ai_sbom/schemas/aibom.schema.json +552 -0
- xelo-0.1.0/src/ai_sbom/serializer.py +195 -0
- xelo-0.1.0/src/ai_sbom/types.py +23 -0
- xelo-0.1.0/src/xelo.egg-info/PKG-INFO +137 -0
- xelo-0.1.0/src/xelo.egg-info/SOURCES.txt +63 -0
- xelo-0.1.0/src/xelo.egg-info/dependency_links.txt +1 -0
- xelo-0.1.0/src/xelo.egg-info/entry_points.txt +3 -0
- xelo-0.1.0/src/xelo.egg-info/requires.txt +24 -0
- xelo-0.1.0/src/xelo.egg-info/top_level.txt +1 -0
- xelo-0.1.0/tests/test_config.py +45 -0
- xelo-0.1.0/tests/test_cyclonedx.py +430 -0
- xelo-0.1.0/tests/test_data_classification.py +275 -0
- xelo-0.1.0/tests/test_deps.py +465 -0
- xelo-0.1.0/tests/test_extraction.py +473 -0
- xelo-0.1.0/tests/test_merger.py +464 -0
- xelo-0.1.0/tests/test_parser.py +175 -0
- xelo-0.1.0/tests/test_schema.py +100 -0
xelo-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
Copyright 2026 NuGuard AI
|
|
8
|
+
|
|
9
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
10
|
+
you may not use this file except in compliance with the License.
|
|
11
|
+
You may obtain a copy of the License at
|
|
12
|
+
|
|
13
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
14
|
+
|
|
15
|
+
Unless required by applicable law or agreed to in writing, software
|
|
16
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
17
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
18
|
+
See the License for the specific language governing permissions and
|
|
19
|
+
limitations under the License.
|
xelo-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: xelo
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI SBOM generator with portable schema
|
|
5
|
+
Author: NuGuardAI
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://nuguard.ai
|
|
8
|
+
Keywords: sbom,aibom,cyclonedx,security,llm,agent
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Security
|
|
15
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: pydantic<3,>=2.7.0
|
|
20
|
+
Requires-Dist: structlog<26,>=24.0
|
|
21
|
+
Provides-Extra: ts
|
|
22
|
+
Requires-Dist: tree-sitter<1,>=0.23; extra == "ts"
|
|
23
|
+
Requires-Dist: tree-sitter-javascript<1,>=0.23; extra == "ts"
|
|
24
|
+
Requires-Dist: tree-sitter-typescript<1,>=0.23; extra == "ts"
|
|
25
|
+
Provides-Extra: cdx
|
|
26
|
+
Requires-Dist: cyclonedx-bom<8,>=4.4; extra == "cdx"
|
|
27
|
+
Provides-Extra: llm
|
|
28
|
+
Requires-Dist: litellm<2,>=1.40; extra == "llm"
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == "dev"
|
|
32
|
+
Requires-Dist: ruff>=0.8.0; extra == "dev"
|
|
33
|
+
Requires-Dist: mypy>=1.10.0; extra == "dev"
|
|
34
|
+
Requires-Dist: cyclonedx-bom<8,>=4.4; extra == "dev"
|
|
35
|
+
Requires-Dist: tree-sitter<1,>=0.23; extra == "dev"
|
|
36
|
+
Requires-Dist: tree-sitter-javascript<1,>=0.23; extra == "dev"
|
|
37
|
+
Requires-Dist: tree-sitter-typescript<1,>=0.23; extra == "dev"
|
|
38
|
+
Requires-Dist: litellm<2,>=1.40; extra == "dev"
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
|
|
41
|
+
# Xelo
|
|
42
|
+
|
|
43
|
+
Xelo is an open-source AI SBOM generator for agentic and LLM-powered applications.
|
|
44
|
+
It scans code and configuration, produces AI-BOM JSON, and can export CycloneDX-compatible output for security and compliance workflows.
|
|
45
|
+
|
|
46
|
+
## Why Xelo
|
|
47
|
+
|
|
48
|
+
- Detects AI-specific components (agents, models, tools, prompts, datastores, auth, deployment artifacts).
|
|
49
|
+
- Works on mixed Python and TypeScript repositories.
|
|
50
|
+
- Uses deterministic extraction by default.
|
|
51
|
+
- Supports optional LLM enrichment when you explicitly enable it.
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
Install from PyPI:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install xelo
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Install for deXelopment:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install -e ".[dev]"
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Quickstart
|
|
68
|
+
|
|
69
|
+
Generate an AI-BOM from a local path:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
Xelo scan path ./my-repo --format json --output sbom.json
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Validate a generated document:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
Xelo validate sbom.json
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Export the JSON schema used by the models:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
Xelo schema --output ai_bom.schema.json
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
CLI alias: `ai-sbom`.
|
|
88
|
+
|
|
89
|
+
## CLI Commands
|
|
90
|
+
|
|
91
|
+
| Command | Description |
|
|
92
|
+
| --- | --- |
|
|
93
|
+
| `Xelo scan path <PATH>` | Scan a local repository path |
|
|
94
|
+
| `Xelo scan repo <URL>` | Clone and scan a remote repository |
|
|
95
|
+
| `Xelo validate <FILE>` | Validate AI-BOM JSON against schema models |
|
|
96
|
+
| `Xelo schema --output <FILE>` | Export schema JSON |
|
|
97
|
+
|
|
98
|
+
Run `Xelo --help` or `Xelo <command> --help` for all flags.
|
|
99
|
+
|
|
100
|
+
## Configuration
|
|
101
|
+
|
|
102
|
+
`Xelo scan` can be configured via `.env` values and CLI flags. CLI flags take precedence.
|
|
103
|
+
|
|
104
|
+
Environment variables:
|
|
105
|
+
|
|
106
|
+
- `AISBOM_DETERMINISTIC_ONLY=true|false`
|
|
107
|
+
- `AISBOM_LLM_MODEL=<litellm model string>`
|
|
108
|
+
- `AISBOM_LLM_BUDGET_TOKENS=<int>`
|
|
109
|
+
- `AISBOM_LLM_API_KEY=<optional key>`
|
|
110
|
+
|
|
111
|
+
Example enabling enrichment:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
Xelo scan path ./my-repo --enable-llm --llm-model gpt-4o-mini --output sbom.json
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## DeXelopment
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
pip install -e ".[dev]"
|
|
121
|
+
ruff check src tests
|
|
122
|
+
mypy src
|
|
123
|
+
pytest
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Project Docs
|
|
127
|
+
|
|
128
|
+
- [Contributing](./CONTRIBUTING.md)
|
|
129
|
+
- [Security Policy](./SECURITY.md)
|
|
130
|
+
- [Support](./SUPPORT.md)
|
|
131
|
+
- [Governance](./GOVERNANCE.md)
|
|
132
|
+
- [Roadmap](./ROADMAP.md)
|
|
133
|
+
- [Code of Conduct](./CODE_OF_CONDUCT.md)
|
|
134
|
+
|
|
135
|
+
## License
|
|
136
|
+
|
|
137
|
+
Apache-2.0. See [LICENSE](./LICENSE).
|
xelo-0.1.0/README.md
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Xelo
|
|
2
|
+
|
|
3
|
+
Xelo is an open-source AI SBOM generator for agentic and LLM-powered applications.
|
|
4
|
+
It scans code and configuration, produces AI-BOM JSON, and can export CycloneDX-compatible output for security and compliance workflows.
|
|
5
|
+
|
|
6
|
+
## Why Xelo
|
|
7
|
+
|
|
8
|
+
- Detects AI-specific components (agents, models, tools, prompts, datastores, auth, deployment artifacts).
|
|
9
|
+
- Works on mixed Python and TypeScript repositories.
|
|
10
|
+
- Uses deterministic extraction by default.
|
|
11
|
+
- Supports optional LLM enrichment when you explicitly enable it.
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
Install from PyPI:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install xelo
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Install for deXelopment:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install -e ".[dev]"
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quickstart
|
|
28
|
+
|
|
29
|
+
Generate an AI-BOM from a local path:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
Xelo scan path ./my-repo --format json --output sbom.json
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Validate a generated document:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
Xelo validate sbom.json
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Export the JSON schema used by the models:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
Xelo schema --output ai_bom.schema.json
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
CLI alias: `ai-sbom`.
|
|
48
|
+
|
|
49
|
+
## CLI Commands
|
|
50
|
+
|
|
51
|
+
| Command | Description |
|
|
52
|
+
| --- | --- |
|
|
53
|
+
| `Xelo scan path <PATH>` | Scan a local repository path |
|
|
54
|
+
| `Xelo scan repo <URL>` | Clone and scan a remote repository |
|
|
55
|
+
| `Xelo validate <FILE>` | Validate AI-BOM JSON against schema models |
|
|
56
|
+
| `Xelo schema --output <FILE>` | Export schema JSON |
|
|
57
|
+
|
|
58
|
+
Run `Xelo --help` or `Xelo <command> --help` for all flags.
|
|
59
|
+
|
|
60
|
+
## Configuration
|
|
61
|
+
|
|
62
|
+
`Xelo scan` can be configured via `.env` values and CLI flags. CLI flags take precedence.
|
|
63
|
+
|
|
64
|
+
Environment variables:
|
|
65
|
+
|
|
66
|
+
- `AISBOM_DETERMINISTIC_ONLY=true|false`
|
|
67
|
+
- `AISBOM_LLM_MODEL=<litellm model string>`
|
|
68
|
+
- `AISBOM_LLM_BUDGET_TOKENS=<int>`
|
|
69
|
+
- `AISBOM_LLM_API_KEY=<optional key>`
|
|
70
|
+
|
|
71
|
+
Example enabling enrichment:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
Xelo scan path ./my-repo --enable-llm --llm-model gpt-4o-mini --output sbom.json
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## DeXelopment
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pip install -e ".[dev]"
|
|
81
|
+
ruff check src tests
|
|
82
|
+
mypy src
|
|
83
|
+
pytest
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Project Docs
|
|
87
|
+
|
|
88
|
+
- [Contributing](./CONTRIBUTING.md)
|
|
89
|
+
- [Security Policy](./SECURITY.md)
|
|
90
|
+
- [Support](./SUPPORT.md)
|
|
91
|
+
- [Governance](./GOVERNANCE.md)
|
|
92
|
+
- [Roadmap](./ROADMAP.md)
|
|
93
|
+
- [Code of Conduct](./CODE_OF_CONDUCT.md)
|
|
94
|
+
|
|
95
|
+
## License
|
|
96
|
+
|
|
97
|
+
Apache-2.0. See [LICENSE](./LICENSE).
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "xelo"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "AI SBOM generator with portable schema"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = "Apache-2.0"
|
|
12
|
+
authors = [{ name = "NuGuardAI" }]
|
|
13
|
+
keywords = ["sbom", "aibom", "cyclonedx", "security", "llm", "agent"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Topic :: Security",
|
|
21
|
+
"Topic :: Software Development :: Build Tools",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"pydantic>=2.7.0,<3",
|
|
25
|
+
"structlog>=24.0,<26",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://nuguard.ai"
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
# Accurate TypeScript/JavaScript AST parsing (highly recommended)
|
|
33
|
+
# Without this, Velo falls back to regex-based TS parsing.
|
|
34
|
+
ts = [
|
|
35
|
+
"tree-sitter>=0.23,<1",
|
|
36
|
+
"tree-sitter-javascript>=0.23,<1",
|
|
37
|
+
"tree-sitter-typescript>=0.23,<1",
|
|
38
|
+
]
|
|
39
|
+
# Standard SBOM generation via cyclonedx-py CLI (highest fidelity)
|
|
40
|
+
cdx = [
|
|
41
|
+
"cyclonedx-bom>=4.4,<8",
|
|
42
|
+
]
|
|
43
|
+
# LLM-based enrichment: verification, confidence scoring, asset summaries
|
|
44
|
+
# Supports any provider via litellm model strings (OpenAI, Anthropic, Ollama, etc.)
|
|
45
|
+
# Required when using ExtractionConfig(deterministic_only=False)
|
|
46
|
+
llm = [
|
|
47
|
+
"litellm>=1.40,<2",
|
|
48
|
+
]
|
|
49
|
+
dev = [
|
|
50
|
+
"pytest>=8.0.0",
|
|
51
|
+
"pytest-cov>=5.0.0",
|
|
52
|
+
"ruff>=0.8.0",
|
|
53
|
+
"mypy>=1.10.0",
|
|
54
|
+
"cyclonedx-bom>=4.4,<8",
|
|
55
|
+
"tree-sitter>=0.23,<1",
|
|
56
|
+
"tree-sitter-javascript>=0.23,<1",
|
|
57
|
+
"tree-sitter-typescript>=0.23,<1",
|
|
58
|
+
"litellm>=1.40,<2",
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
[project.scripts]
|
|
62
|
+
velo = "ai_sbom.cli:main"
|
|
63
|
+
ai-sbom = "ai_sbom.cli:main"
|
|
64
|
+
|
|
65
|
+
[tool.setuptools.packages.find]
|
|
66
|
+
where = ["src"]
|
|
67
|
+
|
|
68
|
+
[tool.setuptools.package-data]
|
|
69
|
+
"ai_sbom" = ["py.typed", "schemas/*.json"]
|
|
70
|
+
|
|
71
|
+
[tool.pytest.ini_options]
|
|
72
|
+
pythonpath = ["src"]
|
|
73
|
+
addopts = "-q"
|
|
74
|
+
markers = [
|
|
75
|
+
"smoke: end-to-end tests against live repositories (require network + git)",
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
[tool.ruff]
|
|
79
|
+
line-length = 100
|
|
80
|
+
target-version = "py311"
|
|
81
|
+
|
|
82
|
+
[tool.mypy]
|
|
83
|
+
python_version = "3.11"
|
|
84
|
+
strict = true
|
|
85
|
+
warn_unreachable = true
|
|
86
|
+
pretty = true
|
xelo-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""ai_sbom.adapters — pluggable framework detection adapters.
|
|
2
|
+
|
|
3
|
+
Sub-packages
|
|
4
|
+
------------
|
|
5
|
+
python/
|
|
6
|
+
AST-aware adapters for Python files. Each module targets one agentic
|
|
7
|
+
framework: LangGraph, OpenAI Agents, AutoGen, CrewAI, Semantic Kernel,
|
|
8
|
+
LlamaIndex, and generic LLM client detection.
|
|
9
|
+
|
|
10
|
+
typescript/
|
|
11
|
+
Tree-sitter (or regex) adapters for TypeScript/JavaScript files.
|
|
12
|
+
Mirrors the Python adapter set and adds: LangGraph TS, OpenAI Agents TS,
|
|
13
|
+
Bedrock Agents, Google ADK, DataStores, and Prompts.
|
|
14
|
+
|
|
15
|
+
Base classes / registry
|
|
16
|
+
-----------------------
|
|
17
|
+
base.py ``FrameworkAdapter`` and ``DetectionAdapter`` ABCs, plus
|
|
18
|
+
``ComponentDetection`` and ``RelationshipHint`` data classes.
|
|
19
|
+
registry.py ``default_framework_adapters()`` and ``default_registry()``
|
|
20
|
+
factory functions used by ``SbomExtractor``.
|
|
21
|
+
"""
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from ai_sbom.types import ComponentType
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# ---------------------------------------------------------------------------
|
|
11
|
+
# Legacy regex-adapter types (kept for backwards compatibility and non-Python
|
|
12
|
+
# file scanning)
|
|
13
|
+
# ---------------------------------------------------------------------------
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class AdapterMatch:
|
|
17
|
+
pattern: str
|
|
18
|
+
line: int
|
|
19
|
+
snippet: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class AdapterDetection:
|
|
24
|
+
adapter_name: str
|
|
25
|
+
component_type: ComponentType
|
|
26
|
+
priority: int
|
|
27
|
+
canonical_name: str
|
|
28
|
+
metadata: dict[str, Any]
|
|
29
|
+
matches: tuple[AdapterMatch, ...]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DetectionAdapter:
|
|
33
|
+
name: str
|
|
34
|
+
priority: int
|
|
35
|
+
|
|
36
|
+
def detect(self, content: str) -> AdapterDetection | None:
|
|
37
|
+
raise NotImplementedError
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RegexAdapter(DetectionAdapter):
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
*,
|
|
44
|
+
name: str,
|
|
45
|
+
component_type: ComponentType,
|
|
46
|
+
priority: int,
|
|
47
|
+
patterns: tuple[re.Pattern[str], ...],
|
|
48
|
+
canonical_name: str | None = None,
|
|
49
|
+
metadata: dict[str, Any] | None = None,
|
|
50
|
+
) -> None:
|
|
51
|
+
self.name = name
|
|
52
|
+
self.component_type = component_type
|
|
53
|
+
self.priority = priority
|
|
54
|
+
self.patterns = patterns
|
|
55
|
+
self.canonical_name = canonical_name
|
|
56
|
+
self.metadata = metadata or {}
|
|
57
|
+
|
|
58
|
+
def detect(self, content: str) -> AdapterDetection | None:
|
|
59
|
+
all_matches: list[AdapterMatch] = []
|
|
60
|
+
for pattern in self.patterns:
|
|
61
|
+
for match in pattern.finditer(content):
|
|
62
|
+
line = content[: match.start()].count("\n") + 1
|
|
63
|
+
all_matches.append(
|
|
64
|
+
AdapterMatch(
|
|
65
|
+
pattern=pattern.pattern,
|
|
66
|
+
line=line,
|
|
67
|
+
snippet=match.group(0)[:120],
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if not all_matches:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
canonical = (
|
|
75
|
+
self.canonical_name
|
|
76
|
+
or all_matches[0].snippet.strip().lower().replace(" ", "_")
|
|
77
|
+
)
|
|
78
|
+
return AdapterDetection(
|
|
79
|
+
adapter_name=self.name,
|
|
80
|
+
component_type=self.component_type,
|
|
81
|
+
priority=self.priority,
|
|
82
|
+
canonical_name=canonical,
|
|
83
|
+
metadata=self.metadata,
|
|
84
|
+
matches=tuple(all_matches),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
# Rich AST-aware adapter types
|
|
90
|
+
# ---------------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
@dataclass(frozen=True)
|
|
93
|
+
class RelationshipHint:
|
|
94
|
+
"""Deferred relationship between two components, resolved after node creation."""
|
|
95
|
+
source_canonical: str
|
|
96
|
+
source_type: ComponentType
|
|
97
|
+
target_canonical: str
|
|
98
|
+
target_type: ComponentType
|
|
99
|
+
relationship_type: str # "USES", "CALLS", "ACCESSES", etc.
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class ComponentDetection:
|
|
104
|
+
"""A single detected AI component, produced by a FrameworkAdapter.
|
|
105
|
+
|
|
106
|
+
Richer than ``AdapterDetection``: carries file/line context,
|
|
107
|
+
evidence kind, and pre-computed metadata from AST analysis.
|
|
108
|
+
"""
|
|
109
|
+
component_type: ComponentType
|
|
110
|
+
canonical_name: str # lowercase, stable identifier used for dedup
|
|
111
|
+
display_name: str # human-readable name for the node
|
|
112
|
+
adapter_name: str
|
|
113
|
+
priority: int
|
|
114
|
+
confidence: float
|
|
115
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
116
|
+
file_path: str = ""
|
|
117
|
+
line: int = 0
|
|
118
|
+
snippet: str = ""
|
|
119
|
+
evidence_kind: str = "regex" # "ast_import" | "ast_instantiation" | "ast_call" | "regex"
|
|
120
|
+
# Relationships to other components detected in the same pass
|
|
121
|
+
relationships: list[RelationshipHint] = field(default_factory=list)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class FrameworkAdapter:
|
|
125
|
+
"""Base class for AST-aware framework adapters.
|
|
126
|
+
|
|
127
|
+
Unlike ``RegexAdapter``, a ``FrameworkAdapter`` receives both the raw
|
|
128
|
+
file content and the structured ``ParseResult`` from ``ast_parser.parse()``.
|
|
129
|
+
It returns a list of ``ComponentDetection`` objects rather than a single
|
|
130
|
+
``AdapterDetection``.
|
|
131
|
+
|
|
132
|
+
Subclasses must implement:
|
|
133
|
+
- ``name: str`` — unique adapter identifier
|
|
134
|
+
- ``priority: int`` — lower = higher priority during dedup
|
|
135
|
+
- ``handles_imports`` — list of module prefixes that activate this adapter
|
|
136
|
+
- ``extract()`` — main detection/extraction logic
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
name: str = "unknown"
|
|
140
|
+
priority: int = 50
|
|
141
|
+
handles_imports: list[str] = [] # module prefixes that trigger this adapter
|
|
142
|
+
|
|
143
|
+
def can_handle(self, imports_present: set[str]) -> bool:
|
|
144
|
+
"""Return True if any of the file's imported module prefixes match."""
|
|
145
|
+
for mod in imports_present:
|
|
146
|
+
for prefix in self.handles_imports:
|
|
147
|
+
if mod == prefix or mod.startswith(prefix + "."):
|
|
148
|
+
return True
|
|
149
|
+
return False
|
|
150
|
+
|
|
151
|
+
def extract(
|
|
152
|
+
self,
|
|
153
|
+
content: str,
|
|
154
|
+
file_path: str,
|
|
155
|
+
parse_result: Any, # ai_sbom.ast_parser.ParseResult
|
|
156
|
+
) -> list[ComponentDetection]:
|
|
157
|
+
"""Extract component detections from *file_path*.
|
|
158
|
+
|
|
159
|
+
``parse_result`` is a ``ParseResult`` from ``ast_parser.parse(content)``
|
|
160
|
+
or ``None`` for non-Python files.
|
|
161
|
+
"""
|
|
162
|
+
raise NotImplementedError
|
|
163
|
+
|
|
164
|
+
def _framework_node(self, file_path: str, line: int = 0) -> ComponentDetection:
|
|
165
|
+
"""Emit a FRAMEWORK presence node for this adapter's framework.
|
|
166
|
+
|
|
167
|
+
Subclasses should call this at the start of ``extract()`` whenever
|
|
168
|
+
``can_handle()`` returned True, to guarantee a FRAMEWORK node is always
|
|
169
|
+
emitted even if no higher-level components are detected.
|
|
170
|
+
"""
|
|
171
|
+
from ai_sbom.types import ComponentType as _CT
|
|
172
|
+
return ComponentDetection(
|
|
173
|
+
component_type=_CT.FRAMEWORK,
|
|
174
|
+
canonical_name=f"framework:{self.name}",
|
|
175
|
+
display_name=f"framework:{self.name}",
|
|
176
|
+
adapter_name=self.name,
|
|
177
|
+
priority=self.priority,
|
|
178
|
+
confidence=0.95,
|
|
179
|
+
metadata={"framework": self.name},
|
|
180
|
+
file_path=file_path,
|
|
181
|
+
line=line,
|
|
182
|
+
snippet=f"import {self.name}",
|
|
183
|
+
evidence_kind="ast_import",
|
|
184
|
+
)
|