swhid-verification-tool 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. swhid_verification_tool-0.1.0/CHANGELOG.md +23 -0
  2. swhid_verification_tool-0.1.0/LICENSE +21 -0
  3. swhid_verification_tool-0.1.0/MANIFEST.in +8 -0
  4. swhid_verification_tool-0.1.0/PKG-INFO +165 -0
  5. swhid_verification_tool-0.1.0/README.md +113 -0
  6. swhid_verification_tool-0.1.0/developer_guide.md +51 -0
  7. swhid_verification_tool-0.1.0/maintainer_guide.md +24 -0
  8. swhid_verification_tool-0.1.0/pyproject.toml +81 -0
  9. swhid_verification_tool-0.1.0/setup.cfg +4 -0
  10. swhid_verification_tool-0.1.0/swhid_tool/__init__.py +4 -0
  11. swhid_verification_tool-0.1.0/swhid_tool/api.py +34 -0
  12. swhid_verification_tool-0.1.0/swhid_tool/batch_processor.py +59 -0
  13. swhid_verification_tool-0.1.0/swhid_tool/cli.py +86 -0
  14. swhid_verification_tool-0.1.0/swhid_tool/core.py +96 -0
  15. swhid_verification_tool-0.1.0/swhid_tool/logging_config.py +38 -0
  16. swhid_verification_tool-0.1.0/swhid_tool/manager.py +27 -0
  17. swhid_verification_tool-0.1.0/swhid_tool/purl_parser.py +20 -0
  18. swhid_verification_tool-0.1.0/swhid_tool/scanner.py +71 -0
  19. swhid_verification_tool-0.1.0/swhid_tool/spdx_exporter.py +99 -0
  20. swhid_verification_tool-0.1.0/swhid_tool/strategies/__init__.py +4 -0
  21. swhid_verification_tool-0.1.0/swhid_tool/strategies/base.py +13 -0
  22. swhid_verification_tool-0.1.0/swhid_tool/strategies/cargo_strategy.py +121 -0
  23. swhid_verification_tool-0.1.0/swhid_tool/strategies/maven_strategy.py +186 -0
  24. swhid_verification_tool-0.1.0/swhid_tool/strategies/pypi_strategy.py +263 -0
  25. swhid_verification_tool-0.1.0/swhid_verification_tool.egg-info/PKG-INFO +165 -0
  26. swhid_verification_tool-0.1.0/swhid_verification_tool.egg-info/SOURCES.txt +37 -0
  27. swhid_verification_tool-0.1.0/swhid_verification_tool.egg-info/dependency_links.txt +1 -0
  28. swhid_verification_tool-0.1.0/swhid_verification_tool.egg-info/entry_points.txt +2 -0
  29. swhid_verification_tool-0.1.0/swhid_verification_tool.egg-info/requires.txt +27 -0
  30. swhid_verification_tool-0.1.0/swhid_verification_tool.egg-info/top_level.txt +1 -0
  31. swhid_verification_tool-0.1.0/tests/test_core.py +63 -0
  32. swhid_verification_tool-0.1.0/tests/test_exporter.py +30 -0
  33. swhid_verification_tool-0.1.0/tests/test_purl_parser.py +44 -0
  34. swhid_verification_tool-0.1.0/tests/test_scanner.py +50 -0
  35. swhid_verification_tool-0.1.0/tests/test_spdx3_model.py +73 -0
  36. swhid_verification_tool-0.1.0/tests/test_strategies.py +105 -0
  37. swhid_verification_tool-0.1.0/tests/test_swhid.py +16 -0
  38. swhid_verification_tool-0.1.0/tests/test_validation.py +47 -0
  39. swhid_verification_tool-0.1.0/user_guide.md +82 -0
@@ -0,0 +1,23 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2026-06-27
9
+
10
+ ### Added
11
+ - **Multi-Ecosystem Support**: Verification strategies for PyPI (via PEP 740 and project URLs), Cargo/Crates.io (with deterministic normalization), and Maven Central (with SCM metadata validation and source inspections).
12
+ - **Provenance Mapping**: Resolution rules from Package URLs (PURLs) to verified Software Heritage Identifiers (SWHIDs).
13
+ - **Attestation Parsing**: PyPI strategy extracts commit SHAs from Sigstore/PEP 740 attestations via Fulcio certificates.
14
+ - **VCS Normalization**: Restoring Cargo packages to match the original VCS source states by undoing registry modifications.
15
+ - **SPDX 3.0 Compliance**: JSON-LD export compliant with official SPDX 3.0 RDF-based models, and test suites with SHACL shape validation.
16
+ - **Archival Integration**: Automation for Software Heritage "Save Code Now" trigger functionality.
17
+ - **Local Scanner**: Utility to check local installation paths against verified SPDX manifests containing SWHID targets.
18
+ - **FastAPI Endpoint**: HTTP API interface for remote resolution in `swhid_tool/api.py`.
19
+ - **Command Line Interface**: Rich-powered CLI console interface for single PURL resolution, batch processing, path scanning, and status checking.
20
+
21
+ ### Changed
22
+ - Refactored project modules from independent scripts into the structured Python package `swhid_tool`.
23
+ - Consolidated tests under the `tests/` directory with multi-strategy mock suites.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Odysseas Kalaitsidis
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,8 @@
1
+ include LICENSE
2
+ include README.md
3
+ include CHANGELOG.md
4
+ include developer_guide.md
5
+ include user_guide.md
6
+ include maintainer_guide.md
7
+ recursive-include swhid_tool *.py
8
+ recursive-include tests *.py
@@ -0,0 +1,165 @@
1
+ Metadata-Version: 2.4
2
+ Name: swhid-verification-tool
3
+ Version: 0.1.0
4
+ Summary: A verification framework to map PURLs to verified SWHIDs
5
+ Author: Odysseas Kalaitsidis
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/OdysseasKalaitsidis/SWHID_POC
8
+ Project-URL: Repository, https://github.com/OdysseasKalaitsidis/SWHID_POC
9
+ Project-URL: Bug Tracker, https://github.com/OdysseasKalaitsidis/SWHID_POC/issues
10
+ Project-URL: Documentation, https://github.com/OdysseasKalaitsidis/SWHID_POC#readme
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Information Technology
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Security
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Requires-Python: >=3.9
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: requests>=2.31.0
27
+ Requires-Dist: swh.model>=6.4.0
28
+ Requires-Dist: rich>=13.7.0
29
+ Requires-Dist: cryptography>=42.0.0
30
+ Requires-Dist: spdx-tools>=0.8.2
31
+ Requires-Dist: semantic-version>=2.10.0
32
+ Requires-Dist: typer>=0.9.0
33
+ Requires-Dist: fastapi>=0.109.0
34
+ Requires-Dist: uvicorn>=0.27.0
35
+ Requires-Dist: packageurl-python>=0.11.2
36
+ Requires-Dist: python-multipart>=0.0.9
37
+ Requires-Dist: pyshacl>=0.25.0
38
+ Requires-Dist: rdflib>=7.0.0
39
+ Provides-Extra: dev
40
+ Requires-Dist: ruff; extra == "dev"
41
+ Requires-Dist: mypy; extra == "dev"
42
+ Requires-Dist: pytest; extra == "dev"
43
+ Requires-Dist: responses; extra == "dev"
44
+ Requires-Dist: pytest-cov; extra == "dev"
45
+ Requires-Dist: build; extra == "dev"
46
+ Requires-Dist: twine; extra == "dev"
47
+ Provides-Extra: test
48
+ Requires-Dist: pytest; extra == "test"
49
+ Requires-Dist: responses; extra == "test"
50
+ Requires-Dist: pytest-cov; extra == "test"
51
+ Dynamic: license-file
52
+
53
+ # SWHID Verification Tool
54
+
55
+ [![GSoC 2026](https://img.shields.io/badge/GSoC-2026-orange.svg)](https://summerofcode.withgoogle.com/)
56
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
57
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
58
+ [![Software Heritage](https://img.shields.io/badge/SWH-Archived-red.svg)](https://www.softwareheritage.org/)
59
+
60
+ A verification framework designed to map Package URLs (PURLs) to verified Software Heritage Identifiers (SWHIDs). This tool ensures cryptographic and structural provenance by establishing a verifiable link between software distributions and their canonical source code archived in the Software Heritage (SWH) ecosystem.
61
+
62
+ ## Key Features
63
+
64
+ * **Multi-Ecosystem Support**: Specialized verification strategies for PyPI, Crates.io (Cargo), and Maven Central.
65
+ * **High-Confidence Provenance**:
66
+ * **PyPI**: Extraction of commit SHAs from Sigstore/PEP 740 attestations via Fulcio certificates.
67
+ * **Cargo**: Deterministic normalization and restoration of original project state for byte-for-byte matching.
68
+ * **Maven**: SCM metadata resolution and verification of cleaned source artifacts.
69
+ * **SPDX 3.0 Compliance**: Generation of RDF-compatible JSON-LD manifests using official SPDX models.
70
+ * **Automated Archival Integration**: Proactive use of the Software Heritage "Save Code Now" API.
71
+ * **Installation Verification**: Local filesystem scanner to audit installed packages against verified SWHID ground truth.
72
+
73
+ ## Installation
74
+
75
+ ### Prerequisites
76
+ - Python 3.9+
77
+ - [Optional] A Software Heritage API Token for higher rate limits.
78
+
79
+ ### Setup
80
+ ```bash
81
+ git clone https://github.com/OdysseasKalaitsidis/SWHID_POC
82
+ cd SWHID_POC
83
+ python -m venv venv
84
+ source venv/bin/activate # Use .\venv\Scripts\activate on Windows
85
+ pip install -r requirements.txt
86
+ ```
87
+
88
+ ## Configuration
89
+
90
+ The tool can be configured via environment variables or a `.env` file:
91
+
92
+ | Variable | Description | Default |
93
+ | :--- | :--- | :--- |
94
+ | `SWH_TOKEN` | Software Heritage API Authentication Token | None |
95
+ | `CACHE_DIR` | Directory for caching resolution results | `./cache` |
96
+ | `LOG_LEVEL` | Logging verbosity (DEBUG, INFO, ERROR) | `INFO` |
97
+
98
+ ## Usage
99
+
100
+ ### Quick Start
101
+ Map a single PURL to a verified SWHID immediately:
102
+ ```bash
103
+ python -m swhid_tool.cli swhid-map pkg:pypi/six@1.17.0
104
+ ```
105
+
106
+ ### Batch Processing
107
+ Generate an SPDX 3.0 dataset for multiple PURLs:
108
+ ```bash
109
+ python -m swhid_tool.cli batch-process input_purls.txt output_report.jsonld
110
+ ```
111
+
112
+ ### Integrity Auditing
113
+ Verify a local directory against a verified manifest:
114
+ ```bash
115
+ python -m swhid_tool.cli verify-path /path/to/installed/library manifest.jsonld
116
+ ```
117
+
118
+ ### REST API
119
+ Deploy as a service using FastAPI:
120
+ ```bash
121
+ python -m uvicorn swhid_tool.api:app --host 0.0.0.0 --port 8000
122
+ ```
123
+
124
+ ## Architecture
125
+
126
+ The system utilizes a strategy-based pattern to decouple ecosystem-specific logic from the core resolution engine.
127
+
128
+ ```mermaid
129
+ graph TD
130
+ CLI[CLI / API] --> Manager[SWHID Manager]
131
+ Manager --> PURL[PURL Parser]
132
+ Manager --> StrategyRouter{Strategy Router}
133
+ StrategyRouter --> PyPI[PyPI Strategy]
134
+ StrategyRouter --> Cargo[Cargo Strategy]
135
+ StrategyRouter --> Maven[Maven Strategy]
136
+ PyPI --> SWH[SWH API / Archive]
137
+ Cargo --> SWH
138
+ Maven --> SWH
139
+ Manager --> Exporter[SPDX 3.0 Exporter]
140
+ Exporter --> JSONLD[JSON-LD Manifest]
141
+ ```
142
+
143
+ ## Validation and Standards
144
+
145
+ Verification findings are exported as SPDX 3.0 documents. Compliance with RDF standards is ensured through SHACL shape validation using the integrated `test_validation.py` suite.
146
+
147
+ ## Documentation
148
+
149
+ Detailed guides for different stakeholders:
150
+ - [**User Guide**](user_guide.md): CLI reference, API specifications, and troubleshooting.
151
+ - [**Developer Guide**](developer_guide.md): Extending the tool to new ecosystems and core internals.
152
+ - [**Maintainer Guide**](maintainer_guide.md): Best practices for enabling high-confidence verifiability.
153
+
154
+ ## Contributing
155
+
156
+ Contributions are welcome! Please see the [Developer Guide](developer_guide.md) for setup instructions and coding standards.
157
+
158
+ ## License
159
+
160
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
161
+
162
+ ## Acknowledgments
163
+
164
+ This project was developed as part of the **Google Summer of Code (GSoC) 2026** program, under the mentorship of **Software Heritage**.
165
+
@@ -0,0 +1,113 @@
1
+ # SWHID Verification Tool
2
+
3
+ [![GSoC 2026](https://img.shields.io/badge/GSoC-2026-orange.svg)](https://summerofcode.withgoogle.com/)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
6
+ [![Software Heritage](https://img.shields.io/badge/SWH-Archived-red.svg)](https://www.softwareheritage.org/)
7
+
8
+ A verification framework designed to map Package URLs (PURLs) to verified Software Heritage Identifiers (SWHIDs). This tool ensures cryptographic and structural provenance by establishing a verifiable link between software distributions and their canonical source code archived in the Software Heritage (SWH) ecosystem.
9
+
10
+ ## Key Features
11
+
12
+ * **Multi-Ecosystem Support**: Specialized verification strategies for PyPI, Crates.io (Cargo), and Maven Central.
13
+ * **High-Confidence Provenance**:
14
+ * **PyPI**: Extraction of commit SHAs from Sigstore/PEP 740 attestations via Fulcio certificates.
15
+ * **Cargo**: Deterministic normalization and restoration of original project state for byte-for-byte matching.
16
+ * **Maven**: SCM metadata resolution and verification of cleaned source artifacts.
17
+ * **SPDX 3.0 Compliance**: Generation of RDF-compatible JSON-LD manifests using official SPDX models.
18
+ * **Automated Archival Integration**: Proactive use of the Software Heritage "Save Code Now" API.
19
+ * **Installation Verification**: Local filesystem scanner to audit installed packages against verified SWHID ground truth.
20
+
21
+ ## Installation
22
+
23
+ ### Prerequisites
24
+ - Python 3.9+
25
+ - [Optional] A Software Heritage API Token for higher rate limits.
26
+
27
+ ### Setup
28
+ ```bash
29
+ git clone https://github.com/OdysseasKalaitsidis/SWHID_POC
30
+ cd SWHID_POC
31
+ python -m venv venv
32
+ source venv/bin/activate # Use .\venv\Scripts\activate on Windows
33
+ pip install -r requirements.txt
34
+ ```
35
+
36
+ ## Configuration
37
+
38
+ The tool can be configured via environment variables or a `.env` file:
39
+
40
+ | Variable | Description | Default |
41
+ | :--- | :--- | :--- |
42
+ | `SWH_TOKEN` | Software Heritage API Authentication Token | None |
43
+ | `CACHE_DIR` | Directory for caching resolution results | `./cache` |
44
+ | `LOG_LEVEL` | Logging verbosity (DEBUG, INFO, ERROR) | `INFO` |
45
+
46
+ ## Usage
47
+
48
+ ### Quick Start
49
+ Map a single PURL to a verified SWHID immediately:
50
+ ```bash
51
+ python -m swhid_tool.cli swhid-map pkg:pypi/six@1.17.0
52
+ ```
53
+
54
+ ### Batch Processing
55
+ Generate an SPDX 3.0 dataset for multiple PURLs:
56
+ ```bash
57
+ python -m swhid_tool.cli batch-process input_purls.txt output_report.jsonld
58
+ ```
59
+
60
+ ### Integrity Auditing
61
+ Verify a local directory against a verified manifest:
62
+ ```bash
63
+ python -m swhid_tool.cli verify-path /path/to/installed/library manifest.jsonld
64
+ ```
65
+
66
+ ### REST API
67
+ Deploy as a service using FastAPI:
68
+ ```bash
69
+ python -m uvicorn swhid_tool.api:app --host 0.0.0.0 --port 8000
70
+ ```
71
+
72
+ ## Architecture
73
+
74
+ The system utilizes a strategy-based pattern to decouple ecosystem-specific logic from the core resolution engine.
75
+
76
+ ```mermaid
77
+ graph TD
78
+ CLI[CLI / API] --> Manager[SWHID Manager]
79
+ Manager --> PURL[PURL Parser]
80
+ Manager --> StrategyRouter{Strategy Router}
81
+ StrategyRouter --> PyPI[PyPI Strategy]
82
+ StrategyRouter --> Cargo[Cargo Strategy]
83
+ StrategyRouter --> Maven[Maven Strategy]
84
+ PyPI --> SWH[SWH API / Archive]
85
+ Cargo --> SWH
86
+ Maven --> SWH
87
+ Manager --> Exporter[SPDX 3.0 Exporter]
88
+ Exporter --> JSONLD[JSON-LD Manifest]
89
+ ```
90
+
91
+ ## Validation and Standards
92
+
93
+ Verification findings are exported as SPDX 3.0 documents. Compliance with RDF standards is ensured through SHACL shape validation using the integrated `test_validation.py` suite.
94
+
95
+ ## Documentation
96
+
97
+ Detailed guides for different stakeholders:
98
+ - [**User Guide**](user_guide.md): CLI reference, API specifications, and troubleshooting.
99
+ - [**Developer Guide**](developer_guide.md): Extending the tool to new ecosystems and core internals.
100
+ - [**Maintainer Guide**](maintainer_guide.md): Best practices for enabling high-confidence verifiability.
101
+
102
+ ## Contributing
103
+
104
+ Contributions are welcome! Please see the [Developer Guide](developer_guide.md) for setup instructions and coding standards.
105
+
106
+ ## License
107
+
108
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
109
+
110
+ ## Acknowledgments
111
+
112
+ This project was developed as part of the **Google Summer of Code (GSoC) 2026** program, under the mentorship of **Software Heritage**.
113
+
@@ -0,0 +1,51 @@
1
+ # Developer Guide
2
+
3
+ This guide is intended for developers who wish to contribute to the SWHID Verification Tool or extend its functionality.
4
+
5
+ ## Core Architecture
6
+
7
+ The tool follows a **Strategy Pattern** to handle different package ecosystems.
8
+
9
+ ### `SWHIDManager`
10
+ The central orchestrator that routes PURLs to the appropriate `VerificationStrategy`.
11
+
12
+ ### `VerificationStrategy`
13
+ An abstract base class defined in `swhid_tool/strategies/base.py`. Every ecosystem (PyPI, Cargo, etc.) implements this class to provide:
14
+ 1. **Source Discovery**: Finding the canonical source repository or sdist.
15
+ 2. **Normalization**: Cleaning the source to match SWH's archival format.
16
+ 3. **Verification**: Comparing computed SWHIDs with archived ones.
17
+
18
+ ## Extending the Tool
19
+
20
+ To add support for a new ecosystem (e.g., `npm`):
21
+
22
+ 1. Create a new strategy class in `swhid_tool/strategies/npm_strategy.py`.
23
+ 2. Inherit from `VerificationStrategy`.
24
+ 3. Register the new strategy in `SWHIDManager.__init__` within `swhid_tool/manager.py`.
25
+
26
+ ## Testing
27
+
28
+ Run the test suite using `pytest`:
29
+
30
+ ```bash
31
+ pytest tests/
32
+ ```
33
+
34
+ Individual test modules:
35
+ - `test_core.py`: SWHID computation and SWH client logic.
36
+ - `test_purl_parser.py`: PURL parsing for all supported ecosystems.
37
+ - `test_strategies.py`: Strategy-level unit tests (Cargo, Maven, PyPI).
38
+ - `test_scanner.py`: Installation scanner directory auditing.
39
+ - `test_spdx3_model.py`: SPDX 3.0 serialization.
40
+ - `test_exporter.py`: SPDX 3.0 JSON-LD export.
41
+ - `test_swhid.py`: End-to-end CLI smoke test.
42
+ - `test_validation.py`: SHACL validation of generated manifests.
43
+
44
+ ## Development Environment Setup
45
+
46
+ 1. Install development dependencies:
47
+ ```bash
48
+ pip install -r requirements.txt
49
+ pip install pytest pytest-cov
50
+ ```
51
+ 2. Set up a local cache directory to speed up repeated resolutions.
@@ -0,0 +1,24 @@
1
+ # Maintainer Guide
2
+
3
+ This guide is for package maintainers who want to ensure their packages are easily verifiable using SWHIDs.
4
+
5
+ ## Why SWHIDs?
6
+ SWHIDs provide a persistent, cryptographic link to the exact source code of a package version. By ensuring your package is SWHID-verifiable, you provide users with high-confidence provenance.
7
+
8
+ ## Best Practices for Verifiability
9
+
10
+ ### 1. Use Sigstore Attestations (PyPI)
11
+ For Python packages, use [Sigstore](https://www.sigstore.dev/) to sign your releases. This tool extracts the git commit SHA from the Sigstore certificate to verify that the sdist matches the source repository.
12
+
13
+ ### 2. Include SCM Metadata (Maven/Cargo)
14
+ Ensure your package metadata includes a valid `scm` (Maven) or `repository` (Cargo) URL. The tool uses this to locate the source code for comparison.
15
+
16
+ ### 3. Clean Releases
17
+ Avoid including generated files (like `.pyc`, compiled binaries, or `.egg-info`) in your source distributions (sdists) unless they are absolutely necessary. The closer the sdist matches the git tree, the higher the verification confidence.
18
+
19
+ ## Verifying Your Own Package
20
+ You can verify your package's archival status by running:
21
+ ```bash
22
+ python -m swhid_tool.cli swhid-map pkg:<ecosystem>/<name>@<version>
23
+ ```
24
+ If the tool reports a low confidence score, check if your source distribution contains extra files not present in the git repository.
@@ -0,0 +1,81 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "swhid-verification-tool"
7
+ version = "0.1.0"
8
+ description = "A verification framework to map PURLs to verified SWHIDs"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Odysseas Kalaitsidis"}
14
+ ]
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Intended Audience :: Developers",
18
+ "Intended Audience :: Information Technology",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Topic :: Security",
27
+ "Topic :: Software Development :: Libraries :: Python Modules",
28
+ ]
29
+ dependencies = [
30
+ "requests>=2.31.0",
31
+ "swh.model>=6.4.0",
32
+ "rich>=13.7.0",
33
+ "cryptography>=42.0.0",
34
+ "spdx-tools>=0.8.2",
35
+ "semantic-version>=2.10.0",
36
+ "typer>=0.9.0",
37
+ "fastapi>=0.109.0",
38
+ "uvicorn>=0.27.0",
39
+ "packageurl-python>=0.11.2",
40
+ "python-multipart>=0.0.9",
41
+ "pyshacl>=0.25.0",
42
+ "rdflib>=7.0.0",
43
+ ]
44
+
45
+ [project.optional-dependencies]
46
+ dev = [
47
+ "ruff",
48
+ "mypy",
49
+ "pytest",
50
+ "responses",
51
+ "pytest-cov",
52
+ "build",
53
+ "twine",
54
+ ]
55
+ test = [
56
+ "pytest",
57
+ "responses",
58
+ "pytest-cov",
59
+ ]
60
+
61
+ [project.urls]
62
+ Homepage = "https://github.com/OdysseasKalaitsidis/SWHID_POC"
63
+ Repository = "https://github.com/OdysseasKalaitsidis/SWHID_POC"
64
+ "Bug Tracker" = "https://github.com/OdysseasKalaitsidis/SWHID_POC/issues"
65
+ Documentation = "https://github.com/OdysseasKalaitsidis/SWHID_POC#readme"
66
+
67
+ [project.scripts]
68
+ swhid-tool = "swhid_tool.cli:app"
69
+
70
+ [tool.setuptools.packages.find]
71
+ where = ["."]
72
+ include = ["swhid_tool*"]
73
+
74
+ [tool.ruff]
75
+ line-length = 100
76
+ target-version = "py39"
77
+
78
+ [tool.mypy]
79
+ python_version = "3.9"
80
+ strict = true
81
+ ignore_missing_imports = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,4 @@
1
+ # SPDX-FileCopyrightText: 2026 Odysseas Kalaitsidis
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ __version__ = "0.1.0"
@@ -0,0 +1,34 @@
1
+ # SPDX-FileCopyrightText: 2026 Odysseas Kalaitsidis
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ from fastapi import FastAPI, Query, HTTPException
5
+ from swhid_tool.manager import SWHIDManager
6
+ from swhid_tool.logging_config import setup_logging
7
+ from typing import Dict, Any
8
+
9
+ setup_logging()
10
+ app = FastAPI(title="SWHID Verification API")
11
+ manager = SWHIDManager()
12
+
13
+
14
+ @app.get("/resolve")
15
+ async def resolve_purl(purl: str = Query(..., description="The Package URL to resolve")):
16
+ """
17
+ Resolves a PURL to a SWHID, returning confidence level and strategy used.
18
+ """
19
+ try:
20
+ result = manager.resolve(purl)
21
+ return {
22
+ "purl": result.get("purl"),
23
+ "swhid": result.get("swhid"),
24
+ "confidence": result.get("confidence"),
25
+ "strategy": result.get("strategy", result.get("name", "unknown")),
26
+ "status": result.get("status", "Done"),
27
+ "details": result
28
+ }
29
+ except Exception as e:
30
+ raise HTTPException(status_code=400, detail=str(e))
31
+
32
+ @app.get("/health")
33
+ async def health():
34
+ return {"status": "ok"}
@@ -0,0 +1,59 @@
1
+ # SPDX-FileCopyrightText: 2026 Odysseas Kalaitsidis
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ import time
5
+ import json
6
+ import os
7
+ import logging
8
+ from typing import List, Dict, Any
9
+ from swhid_tool.manager import SWHIDManager
10
+ from rich.progress import Progress
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class BatchProcessor:
15
+ def __init__(self, manager: SWHIDManager, cache_dir: str = "cache"):
16
+ self.manager = manager
17
+ self.cache_dir = cache_dir
18
+ if not os.path.exists(cache_dir):
19
+ os.makedirs(cache_dir)
20
+
21
+ def process_purls(self, purls: List[str]) -> List[Dict[str, Any]]:
22
+ results = []
23
+ with Progress() as progress:
24
+ task = progress.add_task("[cyan]Processing PURLs...", total=len(purls))
25
+
26
+ for purl in purls:
27
+ # Check cache
28
+ cache_file = os.path.join(self.cache_dir, f"{purl.replace(':', '_').replace('/', '_')}.json")
29
+ if os.path.exists(cache_file):
30
+ with open(cache_file, "r") as f:
31
+ results.append(json.load(f))
32
+ progress.update(task, advance=1)
33
+ continue
34
+
35
+ try:
36
+ logger.info(f"Resolving {purl}")
37
+ result = self.manager.resolve(purl)
38
+
39
+ # Trigger Save Code Now if not verified but repo is known
40
+ if result.get("status") in ["Partial", "Inferred"] and "repo_url" in result:
41
+ progress.console.print(f"[blue]Triggering Save Code Now for {result['repo_url']}...[/blue]")
42
+ save_result = self.manager.swh.trigger_save_code_now(result["repo_url"])
43
+ result["save_code_now"] = save_result
44
+
45
+ results.append(result)
46
+ # Save to cache
47
+ with open(cache_file, "w") as f:
48
+ json.dump(result, f)
49
+ except Exception as e:
50
+ logger.error(f"Error processing {purl}: {str(e)}")
51
+ progress.console.print(f"[red]Error processing {purl}: {str(e)}[/red]")
52
+ results.append({"purl": purl, "status": "Error", "reason": str(e)})
53
+
54
+ progress.update(task, advance=1)
55
+ # Small delay to be polite
56
+ time.sleep(0.5)
57
+
58
+ return results
59
+