swhid-verification-tool 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swhid_verification_tool-0.1.0/CHANGELOG.md +23 -0
- swhid_verification_tool-0.1.0/LICENSE +21 -0
- swhid_verification_tool-0.1.0/MANIFEST.in +8 -0
- swhid_verification_tool-0.1.0/PKG-INFO +165 -0
- swhid_verification_tool-0.1.0/README.md +113 -0
- swhid_verification_tool-0.1.0/developer_guide.md +51 -0
- swhid_verification_tool-0.1.0/maintainer_guide.md +24 -0
- swhid_verification_tool-0.1.0/pyproject.toml +81 -0
- swhid_verification_tool-0.1.0/setup.cfg +4 -0
- swhid_verification_tool-0.1.0/swhid_tool/__init__.py +4 -0
- swhid_verification_tool-0.1.0/swhid_tool/api.py +34 -0
- swhid_verification_tool-0.1.0/swhid_tool/batch_processor.py +59 -0
- swhid_verification_tool-0.1.0/swhid_tool/cli.py +86 -0
- swhid_verification_tool-0.1.0/swhid_tool/core.py +96 -0
- swhid_verification_tool-0.1.0/swhid_tool/logging_config.py +38 -0
- swhid_verification_tool-0.1.0/swhid_tool/manager.py +27 -0
- swhid_verification_tool-0.1.0/swhid_tool/purl_parser.py +20 -0
- swhid_verification_tool-0.1.0/swhid_tool/scanner.py +71 -0
- swhid_verification_tool-0.1.0/swhid_tool/spdx_exporter.py +99 -0
- swhid_verification_tool-0.1.0/swhid_tool/strategies/__init__.py +4 -0
- swhid_verification_tool-0.1.0/swhid_tool/strategies/base.py +13 -0
- swhid_verification_tool-0.1.0/swhid_tool/strategies/cargo_strategy.py +121 -0
- swhid_verification_tool-0.1.0/swhid_tool/strategies/maven_strategy.py +186 -0
- swhid_verification_tool-0.1.0/swhid_tool/strategies/pypi_strategy.py +263 -0
- swhid_verification_tool-0.1.0/swhid_verification_tool.egg-info/PKG-INFO +165 -0
- swhid_verification_tool-0.1.0/swhid_verification_tool.egg-info/SOURCES.txt +37 -0
- swhid_verification_tool-0.1.0/swhid_verification_tool.egg-info/dependency_links.txt +1 -0
- swhid_verification_tool-0.1.0/swhid_verification_tool.egg-info/entry_points.txt +2 -0
- swhid_verification_tool-0.1.0/swhid_verification_tool.egg-info/requires.txt +27 -0
- swhid_verification_tool-0.1.0/swhid_verification_tool.egg-info/top_level.txt +1 -0
- swhid_verification_tool-0.1.0/tests/test_core.py +63 -0
- swhid_verification_tool-0.1.0/tests/test_exporter.py +30 -0
- swhid_verification_tool-0.1.0/tests/test_purl_parser.py +44 -0
- swhid_verification_tool-0.1.0/tests/test_scanner.py +50 -0
- swhid_verification_tool-0.1.0/tests/test_spdx3_model.py +73 -0
- swhid_verification_tool-0.1.0/tests/test_strategies.py +105 -0
- swhid_verification_tool-0.1.0/tests/test_swhid.py +16 -0
- swhid_verification_tool-0.1.0/tests/test_validation.py +47 -0
- swhid_verification_tool-0.1.0/user_guide.md +82 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0] - 2026-06-27
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **Multi-Ecosystem Support**: Verification strategies for PyPI (via PEP 740 and project URLs), Cargo/Crates.io (with deterministic normalization), and Maven Central (with SCM metadata validation and source inspections).
|
|
12
|
+
- **Provenance Mapping**: Resolution rules from Package URLs (PURLs) to verified Software Heritage Identifiers (SWHIDs).
|
|
13
|
+
- **Attestation Parsing**: PyPI strategy extracts commit SHAs from Sigstore/PEP 740 attestations via Fulcio certificates.
|
|
14
|
+
- **VCS Normalization**: Restoring Cargo packages to match the original VCS source states by undoing registry modifications.
|
|
15
|
+
- **SPDX 3.0 Compliance**: JSON-LD export compliant with official SPDX 3.0 RDF-based models, and test suites with SHACL shape validation.
|
|
16
|
+
- **Archival Integration**: Automation for Software Heritage "Save Code Now" trigger functionality.
|
|
17
|
+
- **Local Scanner**: Utility to check local installation paths against verified SPDX manifests containing SWHID targets.
|
|
18
|
+
- **FastAPI Endpoint**: HTTP API interface for remote resolution in `swhid_tool/api.py`.
|
|
19
|
+
- **Command Line Interface**: Rich-powered CLI console interface for single PURL resolution, batch processing, path scanning, and status checking.
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
- Refactored project modules from independent scripts into the structured Python package `swhid_tool`.
|
|
23
|
+
- Consolidated tests under the `tests/` directory with multi-strategy mock suites.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Odysseas Kalaitsidis
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: swhid-verification-tool
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A verification framework to map PURLs to verified SWHIDs
|
|
5
|
+
Author: Odysseas Kalaitsidis
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/OdysseasKalaitsidis/SWHID_POC
|
|
8
|
+
Project-URL: Repository, https://github.com/OdysseasKalaitsidis/SWHID_POC
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/OdysseasKalaitsidis/SWHID_POC/issues
|
|
10
|
+
Project-URL: Documentation, https://github.com/OdysseasKalaitsidis/SWHID_POC#readme
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Information Technology
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Security
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: requests>=2.31.0
|
|
27
|
+
Requires-Dist: swh.model>=6.4.0
|
|
28
|
+
Requires-Dist: rich>=13.7.0
|
|
29
|
+
Requires-Dist: cryptography>=42.0.0
|
|
30
|
+
Requires-Dist: spdx-tools>=0.8.2
|
|
31
|
+
Requires-Dist: semantic-version>=2.10.0
|
|
32
|
+
Requires-Dist: typer>=0.9.0
|
|
33
|
+
Requires-Dist: fastapi>=0.109.0
|
|
34
|
+
Requires-Dist: uvicorn>=0.27.0
|
|
35
|
+
Requires-Dist: packageurl-python>=0.11.2
|
|
36
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
37
|
+
Requires-Dist: pyshacl>=0.25.0
|
|
38
|
+
Requires-Dist: rdflib>=7.0.0
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: ruff; extra == "dev"
|
|
41
|
+
Requires-Dist: mypy; extra == "dev"
|
|
42
|
+
Requires-Dist: pytest; extra == "dev"
|
|
43
|
+
Requires-Dist: responses; extra == "dev"
|
|
44
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
45
|
+
Requires-Dist: build; extra == "dev"
|
|
46
|
+
Requires-Dist: twine; extra == "dev"
|
|
47
|
+
Provides-Extra: test
|
|
48
|
+
Requires-Dist: pytest; extra == "test"
|
|
49
|
+
Requires-Dist: responses; extra == "test"
|
|
50
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
51
|
+
Dynamic: license-file
|
|
52
|
+
|
|
53
|
+
# SWHID Verification Tool
|
|
54
|
+
|
|
55
|
+
[](https://summerofcode.withgoogle.com/)
|
|
56
|
+
[](https://opensource.org/licenses/MIT)
|
|
57
|
+
[](https://www.python.org/downloads/)
|
|
58
|
+
[](https://www.softwareheritage.org/)
|
|
59
|
+
|
|
60
|
+
A verification framework designed to map Package URLs (PURLs) to verified Software Heritage Identifiers (SWHIDs). This tool ensures cryptographic and structural provenance by establishing a verifiable link between software distributions and their canonical source code archived in the Software Heritage (SWH) ecosystem.
|
|
61
|
+
|
|
62
|
+
## Key Features
|
|
63
|
+
|
|
64
|
+
* **Multi-Ecosystem Support**: Specialized verification strategies for PyPI, Crates.io (Cargo), and Maven Central.
|
|
65
|
+
* **High-Confidence Provenance**:
|
|
66
|
+
* **PyPI**: Extraction of commit SHAs from Sigstore/PEP 740 attestations via Fulcio certificates.
|
|
67
|
+
* **Cargo**: Deterministic normalization and restoration of original project state for byte-for-byte matching.
|
|
68
|
+
* **Maven**: SCM metadata resolution and verification of cleaned source artifacts.
|
|
69
|
+
* **SPDX 3.0 Compliance**: Generation of RDF-compatible JSON-LD manifests using official SPDX models.
|
|
70
|
+
* **Automated Archival Integration**: Proactive use of the Software Heritage "Save Code Now" API.
|
|
71
|
+
* **Installation Verification**: Local filesystem scanner to audit installed packages against verified SWHID ground truth.
|
|
72
|
+
|
|
73
|
+
## Installation
|
|
74
|
+
|
|
75
|
+
### Prerequisites
|
|
76
|
+
- Python 3.9+
|
|
77
|
+
- [Optional] A Software Heritage API Token for higher rate limits.
|
|
78
|
+
|
|
79
|
+
### Setup
|
|
80
|
+
```bash
|
|
81
|
+
git clone https://github.com/OdysseasKalaitsidis/SWHID_POC
|
|
82
|
+
cd SWHID_POC
|
|
83
|
+
python -m venv venv
|
|
84
|
+
source venv/bin/activate # Use .\venv\Scripts\activate on Windows
|
|
85
|
+
pip install -r requirements.txt
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Configuration
|
|
89
|
+
|
|
90
|
+
The tool can be configured via environment variables or a `.env` file:
|
|
91
|
+
|
|
92
|
+
| Variable | Description | Default |
|
|
93
|
+
| :--- | :--- | :--- |
|
|
94
|
+
| `SWH_TOKEN` | Software Heritage API Authentication Token | None |
|
|
95
|
+
| `CACHE_DIR` | Directory for caching resolution results | `./cache` |
|
|
96
|
+
| `LOG_LEVEL` | Logging verbosity (DEBUG, INFO, ERROR) | `INFO` |
|
|
97
|
+
|
|
98
|
+
## Usage
|
|
99
|
+
|
|
100
|
+
### Quick Start
|
|
101
|
+
Map a single PURL to a verified SWHID immediately:
|
|
102
|
+
```bash
|
|
103
|
+
python -m swhid_tool.cli swhid-map pkg:pypi/six@1.17.0
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Batch Processing
|
|
107
|
+
Generate an SPDX 3.0 dataset for multiple PURLs:
|
|
108
|
+
```bash
|
|
109
|
+
python -m swhid_tool.cli batch-process input_purls.txt output_report.jsonld
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Integrity Auditing
|
|
113
|
+
Verify a local directory against a verified manifest:
|
|
114
|
+
```bash
|
|
115
|
+
python -m swhid_tool.cli verify-path /path/to/installed/library manifest.jsonld
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### REST API
|
|
119
|
+
Deploy as a service using FastAPI:
|
|
120
|
+
```bash
|
|
121
|
+
python -m uvicorn swhid_tool.api:app --host 0.0.0.0 --port 8000
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Architecture
|
|
125
|
+
|
|
126
|
+
The system utilizes a strategy-based pattern to decouple ecosystem-specific logic from the core resolution engine.
|
|
127
|
+
|
|
128
|
+
```mermaid
|
|
129
|
+
graph TD
|
|
130
|
+
CLI[CLI / API] --> Manager[SWHID Manager]
|
|
131
|
+
Manager --> PURL[PURL Parser]
|
|
132
|
+
Manager --> StrategyRouter{Strategy Router}
|
|
133
|
+
StrategyRouter --> PyPI[PyPI Strategy]
|
|
134
|
+
StrategyRouter --> Cargo[Cargo Strategy]
|
|
135
|
+
StrategyRouter --> Maven[Maven Strategy]
|
|
136
|
+
PyPI --> SWH[SWH API / Archive]
|
|
137
|
+
Cargo --> SWH
|
|
138
|
+
Maven --> SWH
|
|
139
|
+
Manager --> Exporter[SPDX 3.0 Exporter]
|
|
140
|
+
Exporter --> JSONLD[JSON-LD Manifest]
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Validation and Standards
|
|
144
|
+
|
|
145
|
+
Verification findings are exported as SPDX 3.0 documents. Compliance with RDF standards is ensured through SHACL shape validation using the integrated `test_validation.py` suite.
|
|
146
|
+
|
|
147
|
+
## Documentation
|
|
148
|
+
|
|
149
|
+
Detailed guides for different stakeholders:
|
|
150
|
+
- [**User Guide**](user_guide.md): CLI reference, API specifications, and troubleshooting.
|
|
151
|
+
- [**Developer Guide**](developer_guide.md): Extending the tool to new ecosystems and core internals.
|
|
152
|
+
- [**Maintainer Guide**](maintainer_guide.md): Best practices for enabling high-confidence verifiability.
|
|
153
|
+
|
|
154
|
+
## Contributing
|
|
155
|
+
|
|
156
|
+
Contributions are welcome! Please see the [Developer Guide](developer_guide.md) for setup instructions and coding standards.
|
|
157
|
+
|
|
158
|
+
## License
|
|
159
|
+
|
|
160
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
161
|
+
|
|
162
|
+
## Acknowledgments
|
|
163
|
+
|
|
164
|
+
This project was developed as part of the **Google Summer of Code (GSoC) 2026** program, under the mentorship of **Software Heritage**.
|
|
165
|
+
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# SWHID Verification Tool
|
|
2
|
+
|
|
3
|
+
[](https://summerofcode.withgoogle.com/)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
[](https://www.python.org/downloads/)
|
|
6
|
+
[](https://www.softwareheritage.org/)
|
|
7
|
+
|
|
8
|
+
A verification framework designed to map Package URLs (PURLs) to verified Software Heritage Identifiers (SWHIDs). This tool ensures cryptographic and structural provenance by establishing a verifiable link between software distributions and their canonical source code archived in the Software Heritage (SWH) ecosystem.
|
|
9
|
+
|
|
10
|
+
## Key Features
|
|
11
|
+
|
|
12
|
+
* **Multi-Ecosystem Support**: Specialized verification strategies for PyPI, Crates.io (Cargo), and Maven Central.
|
|
13
|
+
* **High-Confidence Provenance**:
|
|
14
|
+
* **PyPI**: Extraction of commit SHAs from Sigstore/PEP 740 attestations via Fulcio certificates.
|
|
15
|
+
* **Cargo**: Deterministic normalization and restoration of original project state for byte-for-byte matching.
|
|
16
|
+
* **Maven**: SCM metadata resolution and verification of cleaned source artifacts.
|
|
17
|
+
* **SPDX 3.0 Compliance**: Generation of RDF-compatible JSON-LD manifests using official SPDX models.
|
|
18
|
+
* **Automated Archival Integration**: Proactive use of the Software Heritage "Save Code Now" API.
|
|
19
|
+
* **Installation Verification**: Local filesystem scanner to audit installed packages against verified SWHID ground truth.
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
### Prerequisites
|
|
24
|
+
- Python 3.9+
|
|
25
|
+
- [Optional] A Software Heritage API Token for higher rate limits.
|
|
26
|
+
|
|
27
|
+
### Setup
|
|
28
|
+
```bash
|
|
29
|
+
git clone https://github.com/OdysseasKalaitsidis/SWHID_POC
|
|
30
|
+
cd SWHID_POC
|
|
31
|
+
python -m venv venv
|
|
32
|
+
source venv/bin/activate # Use .\venv\Scripts\activate on Windows
|
|
33
|
+
pip install -r requirements.txt
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Configuration
|
|
37
|
+
|
|
38
|
+
The tool can be configured via environment variables or a `.env` file:
|
|
39
|
+
|
|
40
|
+
| Variable | Description | Default |
|
|
41
|
+
| :--- | :--- | :--- |
|
|
42
|
+
| `SWH_TOKEN` | Software Heritage API Authentication Token | None |
|
|
43
|
+
| `CACHE_DIR` | Directory for caching resolution results | `./cache` |
|
|
44
|
+
| `LOG_LEVEL` | Logging verbosity (DEBUG, INFO, ERROR) | `INFO` |
|
|
45
|
+
|
|
46
|
+
## Usage
|
|
47
|
+
|
|
48
|
+
### Quick Start
|
|
49
|
+
Map a single PURL to a verified SWHID immediately:
|
|
50
|
+
```bash
|
|
51
|
+
python -m swhid_tool.cli swhid-map pkg:pypi/six@1.17.0
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Batch Processing
|
|
55
|
+
Generate an SPDX 3.0 dataset for multiple PURLs:
|
|
56
|
+
```bash
|
|
57
|
+
python -m swhid_tool.cli batch-process input_purls.txt output_report.jsonld
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Integrity Auditing
|
|
61
|
+
Verify a local directory against a verified manifest:
|
|
62
|
+
```bash
|
|
63
|
+
python -m swhid_tool.cli verify-path /path/to/installed/library manifest.jsonld
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### REST API
|
|
67
|
+
Deploy as a service using FastAPI:
|
|
68
|
+
```bash
|
|
69
|
+
python -m uvicorn swhid_tool.api:app --host 0.0.0.0 --port 8000
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Architecture
|
|
73
|
+
|
|
74
|
+
The system utilizes a strategy-based pattern to decouple ecosystem-specific logic from the core resolution engine.
|
|
75
|
+
|
|
76
|
+
```mermaid
|
|
77
|
+
graph TD
|
|
78
|
+
CLI[CLI / API] --> Manager[SWHID Manager]
|
|
79
|
+
Manager --> PURL[PURL Parser]
|
|
80
|
+
Manager --> StrategyRouter{Strategy Router}
|
|
81
|
+
StrategyRouter --> PyPI[PyPI Strategy]
|
|
82
|
+
StrategyRouter --> Cargo[Cargo Strategy]
|
|
83
|
+
StrategyRouter --> Maven[Maven Strategy]
|
|
84
|
+
PyPI --> SWH[SWH API / Archive]
|
|
85
|
+
Cargo --> SWH
|
|
86
|
+
Maven --> SWH
|
|
87
|
+
Manager --> Exporter[SPDX 3.0 Exporter]
|
|
88
|
+
Exporter --> JSONLD[JSON-LD Manifest]
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Validation and Standards
|
|
92
|
+
|
|
93
|
+
Verification findings are exported as SPDX 3.0 documents. Compliance with RDF standards is ensured through SHACL shape validation using the integrated `test_validation.py` suite.
|
|
94
|
+
|
|
95
|
+
## Documentation
|
|
96
|
+
|
|
97
|
+
Detailed guides for different stakeholders:
|
|
98
|
+
- [**User Guide**](user_guide.md): CLI reference, API specifications, and troubleshooting.
|
|
99
|
+
- [**Developer Guide**](developer_guide.md): Extending the tool to new ecosystems and core internals.
|
|
100
|
+
- [**Maintainer Guide**](maintainer_guide.md): Best practices for enabling high-confidence verifiability.
|
|
101
|
+
|
|
102
|
+
## Contributing
|
|
103
|
+
|
|
104
|
+
Contributions are welcome! Please see the [Developer Guide](developer_guide.md) for setup instructions and coding standards.
|
|
105
|
+
|
|
106
|
+
## License
|
|
107
|
+
|
|
108
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
109
|
+
|
|
110
|
+
## Acknowledgments
|
|
111
|
+
|
|
112
|
+
This project was developed as part of the **Google Summer of Code (GSoC) 2026** program, under the mentorship of **Software Heritage**.
|
|
113
|
+
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Developer Guide
|
|
2
|
+
|
|
3
|
+
This guide is intended for developers who wish to contribute to the SWHID Verification Tool or extend its functionality.
|
|
4
|
+
|
|
5
|
+
## Core Architecture
|
|
6
|
+
|
|
7
|
+
The tool follows a **Strategy Pattern** to handle different package ecosystems.
|
|
8
|
+
|
|
9
|
+
### `SWHIDManager`
|
|
10
|
+
The central orchestrator that routes PURLs to the appropriate `VerificationStrategy`.
|
|
11
|
+
|
|
12
|
+
### `VerificationStrategy`
|
|
13
|
+
An abstract base class defined in `swhid_tool/strategies/base.py`. Every ecosystem (PyPI, Cargo, etc.) implements this class to provide:
|
|
14
|
+
1. **Source Discovery**: Finding the canonical source repository or sdist.
|
|
15
|
+
2. **Normalization**: Cleaning the source to match SWH's archival format.
|
|
16
|
+
3. **Verification**: Comparing computed SWHIDs with archived ones.
|
|
17
|
+
|
|
18
|
+
## Extending the Tool
|
|
19
|
+
|
|
20
|
+
To add support for a new ecosystem (e.g., `npm`):
|
|
21
|
+
|
|
22
|
+
1. Create a new strategy class in `swhid_tool/strategies/npm_strategy.py`.
|
|
23
|
+
2. Inherit from `VerificationStrategy`.
|
|
24
|
+
3. Register the new strategy in `SWHIDManager.__init__` within `swhid_tool/manager.py`.
|
|
25
|
+
|
|
26
|
+
## Testing
|
|
27
|
+
|
|
28
|
+
Run the test suite using `pytest`:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pytest tests/
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Individual test modules:
|
|
35
|
+
- `test_core.py`: SWHID computation and SWH client logic.
|
|
36
|
+
- `test_purl_parser.py`: PURL parsing for all supported ecosystems.
|
|
37
|
+
- `test_strategies.py`: Strategy-level unit tests (Cargo, Maven, PyPI).
|
|
38
|
+
- `test_scanner.py`: Installation scanner directory auditing.
|
|
39
|
+
- `test_spdx3_model.py`: SPDX 3.0 serialization.
|
|
40
|
+
- `test_exporter.py`: SPDX 3.0 JSON-LD export.
|
|
41
|
+
- `test_swhid.py`: End-to-end CLI smoke test.
|
|
42
|
+
- `test_validation.py`: SHACL validation of generated manifests.
|
|
43
|
+
|
|
44
|
+
## Development Environment Setup
|
|
45
|
+
|
|
46
|
+
1. Install development dependencies:
|
|
47
|
+
```bash
|
|
48
|
+
pip install -r requirements.txt
|
|
49
|
+
pip install pytest pytest-cov
|
|
50
|
+
```
|
|
51
|
+
2. Set up a local cache directory to speed up repeated resolutions.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Maintainer Guide
|
|
2
|
+
|
|
3
|
+
This guide is for package maintainers who want to ensure their packages are easily verifiable using SWHIDs.
|
|
4
|
+
|
|
5
|
+
## Why SWHIDs?
|
|
6
|
+
SWHIDs provide a persistent, cryptographic link to the exact source code of a package version. By ensuring your package is SWHID-verifiable, you provide users with high-confidence provenance.
|
|
7
|
+
|
|
8
|
+
## Best Practices for Verifiability
|
|
9
|
+
|
|
10
|
+
### 1. Use Sigstore Attestations (PyPI)
|
|
11
|
+
For Python packages, use [Sigstore](https://www.sigstore.dev/) to sign your releases. This tool extracts the git commit SHA from the Sigstore certificate to verify that the sdist matches the source repository.
|
|
12
|
+
|
|
13
|
+
### 2. Include SCM Metadata (Maven/Cargo)
|
|
14
|
+
Ensure your package metadata includes a valid `scm` (Maven) or `repository` (Cargo) URL. The tool uses this to locate the source code for comparison.
|
|
15
|
+
|
|
16
|
+
### 3. Clean Releases
|
|
17
|
+
Avoid including generated files (like `.pyc`, compiled binaries, or `.egg-info`) in your source distributions (sdists) unless they are absolutely necessary. The closer the sdist matches the git tree, the higher the verification confidence.
|
|
18
|
+
|
|
19
|
+
## Verifying Your Own Package
|
|
20
|
+
You can verify your package's archival status by running:
|
|
21
|
+
```bash
|
|
22
|
+
python -m swhid_tool.cli swhid-map pkg:<ecosystem>/<name>@<version>
|
|
23
|
+
```
|
|
24
|
+
If the tool reports a low confidence score, check if your source distribution contains extra files not present in the git repository.
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "swhid-verification-tool"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A verification framework to map PURLs to verified SWHIDs"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Odysseas Kalaitsidis"}
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Intended Audience :: Information Technology",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Topic :: Security",
|
|
27
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
28
|
+
]
|
|
29
|
+
dependencies = [
|
|
30
|
+
"requests>=2.31.0",
|
|
31
|
+
"swh.model>=6.4.0",
|
|
32
|
+
"rich>=13.7.0",
|
|
33
|
+
"cryptography>=42.0.0",
|
|
34
|
+
"spdx-tools>=0.8.2",
|
|
35
|
+
"semantic-version>=2.10.0",
|
|
36
|
+
"typer>=0.9.0",
|
|
37
|
+
"fastapi>=0.109.0",
|
|
38
|
+
"uvicorn>=0.27.0",
|
|
39
|
+
"packageurl-python>=0.11.2",
|
|
40
|
+
"python-multipart>=0.0.9",
|
|
41
|
+
"pyshacl>=0.25.0",
|
|
42
|
+
"rdflib>=7.0.0",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.optional-dependencies]
|
|
46
|
+
dev = [
|
|
47
|
+
"ruff",
|
|
48
|
+
"mypy",
|
|
49
|
+
"pytest",
|
|
50
|
+
"responses",
|
|
51
|
+
"pytest-cov",
|
|
52
|
+
"build",
|
|
53
|
+
"twine",
|
|
54
|
+
]
|
|
55
|
+
test = [
|
|
56
|
+
"pytest",
|
|
57
|
+
"responses",
|
|
58
|
+
"pytest-cov",
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
[project.urls]
|
|
62
|
+
Homepage = "https://github.com/OdysseasKalaitsidis/SWHID_POC"
|
|
63
|
+
Repository = "https://github.com/OdysseasKalaitsidis/SWHID_POC"
|
|
64
|
+
"Bug Tracker" = "https://github.com/OdysseasKalaitsidis/SWHID_POC/issues"
|
|
65
|
+
Documentation = "https://github.com/OdysseasKalaitsidis/SWHID_POC#readme"
|
|
66
|
+
|
|
67
|
+
[project.scripts]
|
|
68
|
+
swhid-tool = "swhid_tool.cli:app"
|
|
69
|
+
|
|
70
|
+
[tool.setuptools.packages.find]
|
|
71
|
+
where = ["."]
|
|
72
|
+
include = ["swhid_tool*"]
|
|
73
|
+
|
|
74
|
+
[tool.ruff]
|
|
75
|
+
line-length = 100
|
|
76
|
+
target-version = "py39"
|
|
77
|
+
|
|
78
|
+
[tool.mypy]
|
|
79
|
+
python_version = "3.9"
|
|
80
|
+
strict = true
|
|
81
|
+
ignore_missing_imports = true
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Odysseas Kalaitsidis
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
from fastapi import FastAPI, Query, HTTPException
|
|
5
|
+
from swhid_tool.manager import SWHIDManager
|
|
6
|
+
from swhid_tool.logging_config import setup_logging
|
|
7
|
+
from typing import Dict, Any
|
|
8
|
+
|
|
9
|
+
setup_logging()
|
|
10
|
+
app = FastAPI(title="SWHID Verification API")
|
|
11
|
+
manager = SWHIDManager()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@app.get("/resolve")
|
|
15
|
+
async def resolve_purl(purl: str = Query(..., description="The Package URL to resolve")):
|
|
16
|
+
"""
|
|
17
|
+
Resolves a PURL to a SWHID, returning confidence level and strategy used.
|
|
18
|
+
"""
|
|
19
|
+
try:
|
|
20
|
+
result = manager.resolve(purl)
|
|
21
|
+
return {
|
|
22
|
+
"purl": result.get("purl"),
|
|
23
|
+
"swhid": result.get("swhid"),
|
|
24
|
+
"confidence": result.get("confidence"),
|
|
25
|
+
"strategy": result.get("strategy", result.get("name", "unknown")),
|
|
26
|
+
"status": result.get("status", "Done"),
|
|
27
|
+
"details": result
|
|
28
|
+
}
|
|
29
|
+
except Exception as e:
|
|
30
|
+
raise HTTPException(status_code=400, detail=str(e))
|
|
31
|
+
|
|
32
|
+
@app.get("/health")
|
|
33
|
+
async def health():
|
|
34
|
+
return {"status": "ok"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Odysseas Kalaitsidis
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
import time
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import logging
|
|
8
|
+
from typing import List, Dict, Any
|
|
9
|
+
from swhid_tool.manager import SWHIDManager
|
|
10
|
+
from rich.progress import Progress
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
class BatchProcessor:
|
|
15
|
+
def __init__(self, manager: SWHIDManager, cache_dir: str = "cache"):
|
|
16
|
+
self.manager = manager
|
|
17
|
+
self.cache_dir = cache_dir
|
|
18
|
+
if not os.path.exists(cache_dir):
|
|
19
|
+
os.makedirs(cache_dir)
|
|
20
|
+
|
|
21
|
+
def process_purls(self, purls: List[str]) -> List[Dict[str, Any]]:
|
|
22
|
+
results = []
|
|
23
|
+
with Progress() as progress:
|
|
24
|
+
task = progress.add_task("[cyan]Processing PURLs...", total=len(purls))
|
|
25
|
+
|
|
26
|
+
for purl in purls:
|
|
27
|
+
# Check cache
|
|
28
|
+
cache_file = os.path.join(self.cache_dir, f"{purl.replace(':', '_').replace('/', '_')}.json")
|
|
29
|
+
if os.path.exists(cache_file):
|
|
30
|
+
with open(cache_file, "r") as f:
|
|
31
|
+
results.append(json.load(f))
|
|
32
|
+
progress.update(task, advance=1)
|
|
33
|
+
continue
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
logger.info(f"Resolving {purl}")
|
|
37
|
+
result = self.manager.resolve(purl)
|
|
38
|
+
|
|
39
|
+
# Trigger Save Code Now if not verified but repo is known
|
|
40
|
+
if result.get("status") in ["Partial", "Inferred"] and "repo_url" in result:
|
|
41
|
+
progress.console.print(f"[blue]Triggering Save Code Now for {result['repo_url']}...[/blue]")
|
|
42
|
+
save_result = self.manager.swh.trigger_save_code_now(result["repo_url"])
|
|
43
|
+
result["save_code_now"] = save_result
|
|
44
|
+
|
|
45
|
+
results.append(result)
|
|
46
|
+
# Save to cache
|
|
47
|
+
with open(cache_file, "w") as f:
|
|
48
|
+
json.dump(result, f)
|
|
49
|
+
except Exception as e:
|
|
50
|
+
logger.error(f"Error processing {purl}: {str(e)}")
|
|
51
|
+
progress.console.print(f"[red]Error processing {purl}: {str(e)}[/red]")
|
|
52
|
+
results.append({"purl": purl, "status": "Error", "reason": str(e)})
|
|
53
|
+
|
|
54
|
+
progress.update(task, advance=1)
|
|
55
|
+
# Small delay to be polite
|
|
56
|
+
time.sleep(0.5)
|
|
57
|
+
|
|
58
|
+
return results
|
|
59
|
+
|