solorider 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- solorider/__init__.py +7 -0
- solorider/auditors/__init__.py +10 -0
- solorider/auditors/audit_dispatcher.py +117 -0
- solorider/auditors/audit_plugin_base.py +80 -0
- solorider/auditors/audit_plugins/__init__.py +1 -0
- solorider/auditors/audit_plugins/npm_auditor.py +197 -0
- solorider/auditors/audit_plugins/python_auditor.py +149 -0
- solorider/cache/.gitkeep +7 -0
- solorider/cli.py +448 -0
- solorider/core.py +157 -0
- solorider/lib/__init__.py +59 -0
- solorider/lib/advisory_database_helpers.py +106 -0
- solorider/lib/decompressor.py +77 -0
- solorider/lib/dependency_downloader.py +162 -0
- solorider/lib/dependency_downloader_plugins/__init__.py +16 -0
- solorider/lib/dependency_downloader_plugins/aur_checker.py +124 -0
- solorider/lib/dependency_downloader_plugins/github_checker.py +81 -0
- solorider/lib/dependency_downloader_plugins/local_directory.py +44 -0
- solorider/lib/dependency_downloader_plugins/local_file_checker.py +50 -0
- solorider/lib/dependency_downloader_plugins/npm_checker.py +96 -0
- solorider/lib/dependency_downloader_plugins/pypi_checker.py +93 -0
- solorider/lib/dependency_parser_npm.py +483 -0
- solorider/lib/dependency_parser_python.py +527 -0
- solorider/lib/directory_enumerator.py +62 -0
- solorider/lib/downloader_plugin_base.py +215 -0
- solorider/lib/downloader_plugin_loader.py +59 -0
- solorider/lib/file_hasher.py +44 -0
- solorider/lib/helpers.py +872 -0
- solorider/lib/pinned_version_parser.py +70 -0
- solorider/loader.py +46 -0
- solorider/plugin_base.py +354 -0
- solorider/plugins/__init__.py +0 -0
- solorider/plugins/plugins_classifiers/__init__.py +0 -0
- solorider/plugins/plugins_classifiers/anomalous_size.py +76 -0
- solorider/plugins/plugins_classifiers/npm_bin_detector.py +149 -0
- solorider/plugins/plugins_classifiers/package_installer_detector.py +163 -0
- solorider/plugins/plugins_classifiers/similar_filenames.py +169 -0
- solorider/plugins/plugins_deep/__init__.py +0 -0
- solorider/plugins/plugins_deep/check_dependency_for_advisory_npm.py +169 -0
- solorider/plugins/plugins_deep/check_dependency_for_advisory_pypi.py +162 -0
- solorider/plugins/plugins_deep/claude_deobfuscator.py +249 -0
- solorider/plugins/plugins_judgements/__init__.py +0 -0
- solorider/plugins/plugins_judgements/advisory_lookup_npm.py +114 -0
- solorider/plugins/plugins_judgements/advisory_lookup_pypi.py +107 -0
- solorider/plugins/plugins_judgements/check_npm_version_mismatch.py +167 -0
- solorider/plugins/plugins_judgements/check_pypi_version_mismatch.py +169 -0
- solorider/plugins/plugins_judgements/pinned_version_blacklist.py +94 -0
- solorider/plugins/plugins_judgements/standard_judgement.py +120 -0
- solorider/plugins/plugins_reporting/__init__.py +0 -0
- solorider/plugins/plugins_reporting/file_report.py +157 -0
- solorider/plugins/plugins_reporting/report_by_file.py +219 -0
- solorider/plugins/plugins_reporting/report_by_plugin.py +154 -0
- solorider/plugins/plugins_reporting/report_by_severity.py +173 -0
- solorider/plugins/plugins_reporting/simple_report.py +241 -0
- solorider/plugins/plugins_reporting/standard_report.py +171 -0
- solorider/plugins/plugins_static/__init__.py +0 -0
- solorider/plugins/plugins_static/entropy_analysis.py +199 -0
- solorider/plugins/plugins_static/indicator_match.py +71 -0
- solorider/plugins/plugins_static/yara_detector.py +223 -0
- solorider/plugins/plugins_static/yara_rules/all_bunArtifacts.yar +25 -0
- solorider/plugins/plugins_static/yara_rules/characteristic_large_file.yar +9 -0
- solorider/plugins/plugins_static/yara_rules/characteristic_obfuscator_pattern.yar +9 -0
- solorider/plugins/plugins_static/yara_rules/characteristic_singleFile.yar +11 -0
- solorider/plugins/plugins_static/yara_rules/npm_ExcessiveChrCode.yar +10 -0
- solorider/plugins/plugins_static/yara_rules/python_shellExec.yar +192 -0
- solorider/plugins/plugins_static/yara_rules/session_cookieTheft.yar +243 -0
- solorider/plugins/plugins_version_extractors/__init__.py +0 -0
- solorider/plugins/plugins_version_extractors/npm_version_extractor.py +85 -0
- solorider/plugins/plugins_version_extractors/pypi_version_extractor.py +169 -0
- solorider/plugins/plugins_version_extractors/stated_pinned_version_extractor.py +43 -0
- solorider/solorider.py +545 -0
- solorider-1.0.0.dist-info/METADATA +777 -0
- solorider-1.0.0.dist-info/RECORD +77 -0
- solorider-1.0.0.dist-info/WHEEL +5 -0
- solorider-1.0.0.dist-info/entry_points.txt +2 -0
- solorider-1.0.0.dist-info/licenses/LICENSE +21 -0
- solorider-1.0.0.dist-info/top_level.txt +1 -0
solorider/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""solorider: an extensible framework which allows security practitioners to efficiently iterate on the rapid detection and profiling of potential supply-chain attacks of libraries hosted on code repositories."""
|
|
2
|
+
|
|
3
|
+
from .core import Core
|
|
4
|
+
from .plugin_base import PluginBase, exported
|
|
5
|
+
from .solorider import SupplyChainDetector
|
|
6
|
+
|
|
7
|
+
__all__ = ["Core", "PluginBase", "SupplyChainDetector", "exported"]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""solorider audit platform: audit plugins, loader, and dispatcher."""
|
|
2
|
+
|
|
3
|
+
from .audit_plugin_base import AuditPluginBase
|
|
4
|
+
from .audit_dispatcher import AuditDispatcher, load_audit_plugins
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"AuditPluginBase",
|
|
8
|
+
"AuditDispatcher",
|
|
9
|
+
"load_audit_plugins",
|
|
10
|
+
]
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Loader and dispatcher for solorider audit plugins.
|
|
3
|
+
|
|
4
|
+
Mirrors the analyzer plugin loader (``loader.load_plugins_from_directory``):
|
|
5
|
+
audit plugins are discovered from the ``audit_plugins`` directory and
|
|
6
|
+
*initialized* up front, but a given plugin only runs when it is
|
|
7
|
+
dispatched by name.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import importlib.util
|
|
13
|
+
import inspect
|
|
14
|
+
import textwrap
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from .audit_plugin_base import AuditPluginBase
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def load_audit_plugins(
|
|
21
|
+
plugins_dir: Path | None = None,
|
|
22
|
+
) -> list[AuditPluginBase]:
|
|
23
|
+
"""
|
|
24
|
+
Discover and instantiate every audit plugin in the ``audit_plugins``
|
|
25
|
+
directory.
|
|
26
|
+
|
|
27
|
+
Scans each ``.py`` file (excluding ``_``-prefixed), imports it, and
|
|
28
|
+
instantiates any class that subclasses ``AuditPluginBase``. Returns
|
|
29
|
+
a list of *initialized* plugin instances, sorted by filename --
|
|
30
|
+
consistent with the analyzer plugin loader.
|
|
31
|
+
"""
|
|
32
|
+
if plugins_dir is None:
|
|
33
|
+
plugins_dir = Path(__file__).resolve().parent / "audit_plugins"
|
|
34
|
+
|
|
35
|
+
plugins: list[AuditPluginBase] = []
|
|
36
|
+
|
|
37
|
+
if not plugins_dir.is_dir():
|
|
38
|
+
return plugins
|
|
39
|
+
|
|
40
|
+
for file_path in sorted(plugins_dir.glob("*.py")):
|
|
41
|
+
if file_path.name.startswith("_"):
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
module_name = f"_solorider_audit_plugin_{file_path.stem}"
|
|
45
|
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
|
46
|
+
if spec is None or spec.loader is None:
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
module = importlib.util.module_from_spec(spec)
|
|
50
|
+
spec.loader.exec_module(module)
|
|
51
|
+
|
|
52
|
+
for _, obj in inspect.getmembers(module, inspect.isclass):
|
|
53
|
+
if (
|
|
54
|
+
issubclass(obj, AuditPluginBase)
|
|
55
|
+
and obj is not AuditPluginBase
|
|
56
|
+
and obj.__module__ == module_name
|
|
57
|
+
):
|
|
58
|
+
plugins.append(obj())
|
|
59
|
+
|
|
60
|
+
return plugins
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class AuditDispatcher:
|
|
64
|
+
"""
|
|
65
|
+
Loads (initializes) all audit plugins on construction, and runs a
|
|
66
|
+
specific one only when dispatched by name.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(self, plugins_dir: Path | None = None):
|
|
70
|
+
self.plugins = load_audit_plugins(plugins_dir)
|
|
71
|
+
|
|
72
|
+
def get(self, name: str) -> AuditPluginBase | None:
|
|
73
|
+
"""Return the initialized plugin whose ``name`` matches, or None."""
|
|
74
|
+
for plugin in self.plugins:
|
|
75
|
+
if plugin.name == name:
|
|
76
|
+
return plugin
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
def dispatch(self, name: str, *args, **kwargs):
|
|
80
|
+
"""
|
|
81
|
+
Run the audit plugin identified by *name* and return its result.
|
|
82
|
+
|
|
83
|
+
Any additional positional/keyword arguments are forwarded to the
|
|
84
|
+
plugin's ``run()`` -- this is how a plugin that requires input
|
|
85
|
+
(e.g. the npm auditor's target path) receives it.
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
ValueError: if no plugin with that name is registered.
|
|
89
|
+
"""
|
|
90
|
+
plugin = self.get(name)
|
|
91
|
+
if plugin is None:
|
|
92
|
+
available = ", ".join(p.name for p in self.plugins)
|
|
93
|
+
raise ValueError(
|
|
94
|
+
f"No audit plugin named '{name}'. Available: {available}"
|
|
95
|
+
)
|
|
96
|
+
return plugin.run(*args, **kwargs)
|
|
97
|
+
|
|
98
|
+
def list_plugins(self) -> None:
|
|
99
|
+
"""
|
|
100
|
+
Print the available audit plugins (name + description) to stdout,
|
|
101
|
+
consistent with ``SupplyChainDetector.list_plugins()``.
|
|
102
|
+
"""
|
|
103
|
+
wrap_width = 76
|
|
104
|
+
marker = " [+] "
|
|
105
|
+
indent = " " * len(marker)
|
|
106
|
+
|
|
107
|
+
heading = "Audit Plugins"
|
|
108
|
+
print()
|
|
109
|
+
print(heading)
|
|
110
|
+
print("=" * len(heading))
|
|
111
|
+
|
|
112
|
+
for plugin in self.plugins:
|
|
113
|
+
print(f"{marker}{plugin.name}")
|
|
114
|
+
if plugin.description:
|
|
115
|
+
for line in textwrap.wrap(plugin.description, width=wrap_width):
|
|
116
|
+
print(f"{indent}{line}")
|
|
117
|
+
print()
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Base class for solorider audit plugins."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class AuditPluginBase:
|
|
7
|
+
"""
|
|
8
|
+
Contract for an audit plugin.
|
|
9
|
+
|
|
10
|
+
An audit plugin is responsible for, on a single platform (e.g.
|
|
11
|
+
Python or npm), discovering the packages installed on the local
|
|
12
|
+
system, determining each package's pinned version, and locating
|
|
13
|
+
where that specific version's source code lives on disk.
|
|
14
|
+
|
|
15
|
+
Subclasses initialize, in their ``__init__``, two object properties:
|
|
16
|
+
name : str -- unique identifier used to invoke the plugin
|
|
17
|
+
description : str -- human-readable summary of the plugin
|
|
18
|
+
|
|
19
|
+
and a per-instance findings index:
|
|
20
|
+
PACKAGE_INDEX : list -- accumulates identified packages
|
|
21
|
+
|
|
22
|
+
Subclasses implement three core functions:
|
|
23
|
+
run() -- entry point; orchestrates the audit
|
|
24
|
+
enum_installed_packages() -- discover installed packages + versions
|
|
25
|
+
find_package_code() -- locate each package's code on disk
|
|
26
|
+
|
|
27
|
+
``add_new_package()`` is provided here (its behaviour is identical
|
|
28
|
+
across platforms) and records a finding into ``PACKAGE_INDEX``.
|
|
29
|
+
|
|
30
|
+
NOTE: ``PACKAGE_INDEX`` is initialized per instance in ``__init__``,
|
|
31
|
+
so each plugin object keeps its own findings list (a fresh list per
|
|
32
|
+
instantiation) rather than sharing one across plugins or runs.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self):
|
|
36
|
+
self.name: str = ""
|
|
37
|
+
self.description: str = ""
|
|
38
|
+
self.PACKAGE_INDEX: list = []
|
|
39
|
+
|
|
40
|
+
# ---- core functions (implemented by subclasses) ----------------------
|
|
41
|
+
|
|
42
|
+
def run(self, *args, **kwargs) -> list:
|
|
43
|
+
"""
|
|
44
|
+
Entry point for the audit.
|
|
45
|
+
|
|
46
|
+
Orchestrates ``enum_installed_packages()`` and
|
|
47
|
+
``find_package_code()``, recording each result via
|
|
48
|
+
``add_new_package()``, and returns ``PACKAGE_INDEX``.
|
|
49
|
+
|
|
50
|
+
Accepts optional arguments for plugins that require input (e.g.
|
|
51
|
+
a path to scan); plugins that need no input ignore them.
|
|
52
|
+
"""
|
|
53
|
+
raise NotImplementedError
|
|
54
|
+
|
|
55
|
+
def enum_installed_packages(self):
|
|
56
|
+
"""Discover installed packages and their pinned versions."""
|
|
57
|
+
raise NotImplementedError
|
|
58
|
+
|
|
59
|
+
def find_package_code(self):
|
|
60
|
+
"""Determine where each installed package's code lives on disk."""
|
|
61
|
+
raise NotImplementedError
|
|
62
|
+
|
|
63
|
+
# ---- shared functionality --------------------------------------------
|
|
64
|
+
|
|
65
|
+
def add_new_package(self, pinned_version, path_to_code) -> None:
|
|
66
|
+
"""
|
|
67
|
+
Record an identified package in ``PACKAGE_INDEX``.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
pinned_version: the determined pinned version
|
|
71
|
+
(e.g. ``flask==3.0.3`` / ``express@5.2.1``).
|
|
72
|
+
path_to_code: filesystem path to where that specific
|
|
73
|
+
version's source code actually lives.
|
|
74
|
+
"""
|
|
75
|
+
self.PACKAGE_INDEX.append(
|
|
76
|
+
{"pinned_version": pinned_version, "path_to_code": path_to_code}
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def __repr__(self) -> str:
|
|
80
|
+
return f"<AuditPlugin name={self.name!r} found={len(self.PACKAGE_INDEX)}>"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Audit plugins package. Plugins are auto-discovered by the dispatcher."""
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""npm platform audit plugin."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from solorider.auditors.audit_plugin_base import AuditPluginBase
|
|
9
|
+
from solorider.lib.helpers import npm_lockfile_parser
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class NpmAuditor(AuditPluginBase):
|
|
13
|
+
"""Audits npm packages installed under a node_modules tree.
|
|
14
|
+
|
|
15
|
+
Unlike the Python auditor (whose environment is implicit), npm
|
|
16
|
+
packages live under a ``node_modules`` directory that must be
|
|
17
|
+
supplied. The target -- a ``node_modules`` directory or a project
|
|
18
|
+
root containing one -- can be passed at construction or to ``run()``
|
|
19
|
+
(and therefore via ``AuditDispatcher.dispatch("npm", target)``).
|
|
20
|
+
|
|
21
|
+
When a lockfile is present (the hidden
|
|
22
|
+
``node_modules/.package-lock.json``, or a project-root
|
|
23
|
+
``package-lock.json`` / ``npm-shrinkwrap.json``),
|
|
24
|
+
``enum_installed_packages`` uses the shared
|
|
25
|
+
``lib.helpers.npm_lockfile_parser``, which returns each package's
|
|
26
|
+
pinned version together with its install path -- so the code
|
|
27
|
+
location is resolved directly (project root + install path), and
|
|
28
|
+
every distinct install directory is represented (the same version at
|
|
29
|
+
two locations yields two entries). When no lockfile is present, it
|
|
30
|
+
falls back to a scope- and nesting-aware walk of ``node_modules``
|
|
31
|
+
reading each ``package.json``.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, target=None):
|
|
35
|
+
super().__init__()
|
|
36
|
+
self.name = "npm"
|
|
37
|
+
self.description = (
|
|
38
|
+
"Audits npm packages installed under a node_modules tree, "
|
|
39
|
+
"resolving each to a pinned version (name@version) and the "
|
|
40
|
+
"on-disk location of its source code. Requires a target "
|
|
41
|
+
"argument: the path to a node_modules directory or a "
|
|
42
|
+
"project root containing one (passed to run() or via "
|
|
43
|
+
"dispatch('npm', target))."
|
|
44
|
+
)
|
|
45
|
+
self.PACKAGE_INDEX = []
|
|
46
|
+
self.target = target
|
|
47
|
+
|
|
48
|
+
# ---- core functions --------------------------------------------------
|
|
49
|
+
|
|
50
|
+
def run(self, target=None, **kwargs) -> list:
|
|
51
|
+
"""
|
|
52
|
+
Entry point. Enumerates the installed packages, validates each
|
|
53
|
+
package's code location, and records every finding via
|
|
54
|
+
``add_new_package()``.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
target: path to a ``node_modules`` directory or a project
|
|
58
|
+
root containing one. Falls back to the target supplied
|
|
59
|
+
at construction. If neither is set, the auditor does not
|
|
60
|
+
run and returns an empty list.
|
|
61
|
+
|
|
62
|
+
Extra keyword arguments are accepted and ignored, so callers
|
|
63
|
+
(e.g. the dispatcher forwarding assessment kwargs) can pass
|
|
64
|
+
through arguments this plugin does not need.
|
|
65
|
+
"""
|
|
66
|
+
self.PACKAGE_INDEX = []
|
|
67
|
+
|
|
68
|
+
for record in self.enum_installed_packages(target):
|
|
69
|
+
code_path = self.find_package_code(record)
|
|
70
|
+
if code_path is None:
|
|
71
|
+
continue
|
|
72
|
+
self.add_new_package(record["pinned_version"], str(code_path))
|
|
73
|
+
|
|
74
|
+
return self.PACKAGE_INDEX
|
|
75
|
+
|
|
76
|
+
def enum_installed_packages(self, target=None) -> list:
|
|
77
|
+
"""
|
|
78
|
+
Enumerate installed npm packages with their pinned version and
|
|
79
|
+
install path.
|
|
80
|
+
|
|
81
|
+
If a lockfile is identified, uses the shared
|
|
82
|
+
``npm_lockfile_parser`` from ``/lib`` -- which returns each
|
|
83
|
+
package's ``pinned_version`` and ``install_path`` -- and
|
|
84
|
+
resolves the path against the project root. Otherwise falls back
|
|
85
|
+
to walking ``node_modules``. Returns a list of records, each
|
|
86
|
+
with ``pinned_version`` and an absolute ``path``.
|
|
87
|
+
|
|
88
|
+
If no target was provided (here or at construction), the auditor
|
|
89
|
+
does not run and returns an empty list.
|
|
90
|
+
"""
|
|
91
|
+
target = target if target is not None else self.target
|
|
92
|
+
if target is None:
|
|
93
|
+
return []
|
|
94
|
+
|
|
95
|
+
node_modules = self._resolve_node_modules(Path(target))
|
|
96
|
+
if node_modules is None:
|
|
97
|
+
return []
|
|
98
|
+
|
|
99
|
+
project_root = node_modules.parent
|
|
100
|
+
lockfile = self._find_lockfile(node_modules, project_root)
|
|
101
|
+
|
|
102
|
+
if lockfile is not None:
|
|
103
|
+
# Lockfile present: pinned version + install path come
|
|
104
|
+
# straight from the shared parser; resolve the path against
|
|
105
|
+
# the project root.
|
|
106
|
+
return [
|
|
107
|
+
{
|
|
108
|
+
"pinned_version": entry["pinned_version"],
|
|
109
|
+
"path": project_root / entry["install_path"],
|
|
110
|
+
}
|
|
111
|
+
for entry in npm_lockfile_parser(lockfile)
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
# No lockfile: walk node_modules (yields pinned_version + path).
|
|
115
|
+
return self._walk_node_modules(node_modules)
|
|
116
|
+
|
|
117
|
+
def find_package_code(self, record) -> Path | None:
|
|
118
|
+
"""
|
|
119
|
+
Return the on-disk code location for an enumerated *record*,
|
|
120
|
+
validated to exist as a directory (else ``None``).
|
|
121
|
+
"""
|
|
122
|
+
path = Path(record["path"])
|
|
123
|
+
return path if path.is_dir() else None
|
|
124
|
+
|
|
125
|
+
# ---- helpers: target / lockfile --------------------------------------
|
|
126
|
+
|
|
127
|
+
@staticmethod
|
|
128
|
+
def _resolve_node_modules(target: Path) -> Path | None:
|
|
129
|
+
"""Resolve *target* to a node_modules directory, or None."""
|
|
130
|
+
if (target / "node_modules").is_dir():
|
|
131
|
+
return target / "node_modules"
|
|
132
|
+
if target.is_dir():
|
|
133
|
+
return target
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def _find_lockfile(node_modules: Path, project_root: Path) -> Path | None:
|
|
138
|
+
"""Locate the authoritative lockfile, if present."""
|
|
139
|
+
for candidate in (
|
|
140
|
+
node_modules / ".package-lock.json",
|
|
141
|
+
project_root / "package-lock.json",
|
|
142
|
+
project_root / "npm-shrinkwrap.json",
|
|
143
|
+
):
|
|
144
|
+
if candidate.is_file():
|
|
145
|
+
return candidate
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
# ---- helpers: filesystem walk fallback -------------------------------
|
|
149
|
+
|
|
150
|
+
def _walk_node_modules(self, node_modules: Path) -> list:
|
|
151
|
+
"""
|
|
152
|
+
Scope- and nesting-aware walk of *node_modules*, reading each
|
|
153
|
+
package's ``package.json`` for name/version. Used when no
|
|
154
|
+
lockfile is available.
|
|
155
|
+
"""
|
|
156
|
+
records: list = []
|
|
157
|
+
self._scan_dir(node_modules, records)
|
|
158
|
+
return records
|
|
159
|
+
|
|
160
|
+
def _scan_dir(self, modules_dir: Path, records: list) -> None:
|
|
161
|
+
"""Scan one node_modules level, descending into scopes."""
|
|
162
|
+
if not modules_dir.is_dir():
|
|
163
|
+
return
|
|
164
|
+
|
|
165
|
+
for entry in sorted(modules_dir.iterdir()):
|
|
166
|
+
if entry.name.startswith("."):
|
|
167
|
+
# .bin, .cache, .package-lock.json, etc.
|
|
168
|
+
continue
|
|
169
|
+
if not entry.is_dir():
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
if entry.name.startswith("@"):
|
|
173
|
+
# scope directory: each child is a package
|
|
174
|
+
for scoped in sorted(entry.iterdir()):
|
|
175
|
+
if scoped.is_dir():
|
|
176
|
+
self._collect_package(scoped, records)
|
|
177
|
+
else:
|
|
178
|
+
self._collect_package(entry, records)
|
|
179
|
+
|
|
180
|
+
def _collect_package(self, pkg_dir: Path, records: list) -> None:
|
|
181
|
+
"""Record a single package, then recurse into nested node_modules."""
|
|
182
|
+
manifest = pkg_dir / "package.json"
|
|
183
|
+
if manifest.is_file():
|
|
184
|
+
try:
|
|
185
|
+
data = json.loads(manifest.read_text(encoding="utf-8"))
|
|
186
|
+
except (ValueError, OSError):
|
|
187
|
+
data = {}
|
|
188
|
+
name = data.get("name")
|
|
189
|
+
version = data.get("version")
|
|
190
|
+
if name and version:
|
|
191
|
+
records.append(
|
|
192
|
+
{"pinned_version": f"{name}@{version}", "path": pkg_dir}
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
nested = pkg_dir / "node_modules"
|
|
196
|
+
if nested.is_dir():
|
|
197
|
+
self._scan_dir(nested, records)
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""Python platform audit plugin."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib.metadata
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from solorider.auditors.audit_plugin_base import AuditPluginBase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PythonAuditor(AuditPluginBase):
|
|
12
|
+
"""Audits Python packages installed in the local environment.
|
|
13
|
+
|
|
14
|
+
Uses ``importlib.metadata`` as the authoritative source of installed
|
|
15
|
+
distributions (it correctly handles regular packages, single-file
|
|
16
|
+
modules, namespace packages, and multi-directory distributions),
|
|
17
|
+
resolving each to a pinned version (``name==version``) and the
|
|
18
|
+
on-disk location(s) of its source code.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
super().__init__()
|
|
23
|
+
self.name = "python"
|
|
24
|
+
self.description = (
|
|
25
|
+
"Audits Python packages installed in the local environment, "
|
|
26
|
+
"resolving each to a pinned version (name==version) and the "
|
|
27
|
+
"on-disk location of its source code."
|
|
28
|
+
)
|
|
29
|
+
self.PACKAGE_INDEX = []
|
|
30
|
+
|
|
31
|
+
# ---- core functions --------------------------------------------------
|
|
32
|
+
|
|
33
|
+
def run(self, **kwargs) -> list:
|
|
34
|
+
"""
|
|
35
|
+
Entry point. Enumerates installed distributions, resolves each to
|
|
36
|
+
a pinned version and its code path(s), records every finding via
|
|
37
|
+
``add_new_package()``, and returns ``PACKAGE_INDEX``.
|
|
38
|
+
|
|
39
|
+
A distribution that installs more than one top-level (e.g.
|
|
40
|
+
``setuptools`` -> setuptools/, pkg_resources/, _distutils_hack/)
|
|
41
|
+
yields one entry per code location, all sharing the same pinned
|
|
42
|
+
version. ``PACKAGE_INDEX`` is reset at the start so repeated runs
|
|
43
|
+
are idempotent.
|
|
44
|
+
|
|
45
|
+
This auditor needs no input (the environment is implicit); extra
|
|
46
|
+
keyword arguments are accepted and ignored.
|
|
47
|
+
"""
|
|
48
|
+
self.PACKAGE_INDEX = []
|
|
49
|
+
|
|
50
|
+
for distribution in self.enum_installed_packages():
|
|
51
|
+
name = distribution.metadata["Name"]
|
|
52
|
+
version = distribution.version
|
|
53
|
+
if not name or not version:
|
|
54
|
+
continue
|
|
55
|
+
|
|
56
|
+
pinned_version = f"{name}=={version}"
|
|
57
|
+
for code_path in self.find_package_code(distribution):
|
|
58
|
+
self.add_new_package(pinned_version, str(code_path))
|
|
59
|
+
|
|
60
|
+
return self.PACKAGE_INDEX
|
|
61
|
+
|
|
62
|
+
def enum_installed_packages(self) -> list:
|
|
63
|
+
"""
|
|
64
|
+
Enumerate installed Python distributions via
|
|
65
|
+
``importlib.metadata.distributions()``.
|
|
66
|
+
|
|
67
|
+
Returns the ``Distribution`` objects, de-duplicated by
|
|
68
|
+
(name, version) so packages discovered on more than one
|
|
69
|
+
``sys.path`` entry are not reported twice.
|
|
70
|
+
"""
|
|
71
|
+
seen = set()
|
|
72
|
+
distributions = []
|
|
73
|
+
|
|
74
|
+
for distribution in importlib.metadata.distributions():
|
|
75
|
+
name = distribution.metadata["Name"]
|
|
76
|
+
version = distribution.version
|
|
77
|
+
if not name or not version:
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
key = (name.lower(), version)
|
|
81
|
+
if key in seen:
|
|
82
|
+
continue
|
|
83
|
+
seen.add(key)
|
|
84
|
+
distributions.append(distribution)
|
|
85
|
+
|
|
86
|
+
return distributions
|
|
87
|
+
|
|
88
|
+
def find_package_code(self, distribution) -> list:
|
|
89
|
+
"""
|
|
90
|
+
Resolve where *distribution*'s source code lives on disk.
|
|
91
|
+
|
|
92
|
+
Determines the distribution's top-level importable name(s) and
|
|
93
|
+
maps each to a real path: a package directory
|
|
94
|
+
(``site-packages/<name>``) or a single-file module
|
|
95
|
+
(``site-packages/<name>.py``). Returns a de-duplicated list of
|
|
96
|
+
``Path`` objects (empty if nothing could be resolved, e.g. an
|
|
97
|
+
editable install whose code lives outside site-packages).
|
|
98
|
+
"""
|
|
99
|
+
code_paths = []
|
|
100
|
+
|
|
101
|
+
for top_level in self._top_level_names(distribution):
|
|
102
|
+
directory = Path(distribution.locate_file(top_level))
|
|
103
|
+
if directory.is_dir():
|
|
104
|
+
code_paths.append(directory)
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
module = Path(distribution.locate_file(f"{top_level}.py"))
|
|
108
|
+
if module.is_file():
|
|
109
|
+
code_paths.append(module)
|
|
110
|
+
|
|
111
|
+
return list(dict.fromkeys(code_paths))
|
|
112
|
+
|
|
113
|
+
# ---- helpers ---------------------------------------------------------
|
|
114
|
+
|
|
115
|
+
@staticmethod
|
|
116
|
+
def _top_level_names(distribution) -> list:
|
|
117
|
+
"""
|
|
118
|
+
Determine a distribution's top-level importable names.
|
|
119
|
+
|
|
120
|
+
Prefers the authoritative ``top_level.txt``; if absent (some
|
|
121
|
+
newer wheels omit it), infers the names from the installed file
|
|
122
|
+
manifest (RECORD), skipping metadata/data directories.
|
|
123
|
+
"""
|
|
124
|
+
text = distribution.read_text("top_level.txt")
|
|
125
|
+
if text:
|
|
126
|
+
names = [line.strip() for line in text.splitlines() if line.strip()]
|
|
127
|
+
if names:
|
|
128
|
+
return list(dict.fromkeys(names))
|
|
129
|
+
|
|
130
|
+
names = []
|
|
131
|
+
for file in distribution.files or []:
|
|
132
|
+
parts = file.parts
|
|
133
|
+
if not parts:
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
first = parts[0]
|
|
137
|
+
if first.endswith((".dist-info", ".egg-info", ".data")):
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
if len(parts) == 1:
|
|
141
|
+
# A top-level single-file module (e.g. six.py -> six).
|
|
142
|
+
leaf = Path(first)
|
|
143
|
+
if leaf.suffix == ".py":
|
|
144
|
+
names.append(leaf.stem)
|
|
145
|
+
else:
|
|
146
|
+
# A top-level package directory.
|
|
147
|
+
names.append(first)
|
|
148
|
+
|
|
149
|
+
return list(dict.fromkeys(names))
|
solorider/cache/.gitkeep
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# This placeholder ensures the `cache/` directory ships with the
|
|
2
|
+
# installed package. Python packaging cannot include an empty
|
|
3
|
+
# directory, so this file gives it something to carry.
|
|
4
|
+
#
|
|
5
|
+
# Session output (downloads/, extracted/, other/) is written here at
|
|
6
|
+
# runtime by solorider.lib.helpers.create_session_dirs(), which creates
|
|
7
|
+
# any missing subdirectories on demand.
|