vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,573 @@
1
+ """Deployment-target detection for forensic gates (F19).
2
+
3
+ Some gates are only meaningful when code actually runs on a specific platform.
4
+ The canonical example is ``encoding.windows_unsafe_char`` — cp1252 console
5
+ crash risk only exists when the Python/shell/Java source actually executes on
6
+ a Windows host. A pure-Linux trading stack has no such risk, so the 1k+
7
+ findings the gate raises on Linux-deployed code are false positives.
8
+
9
+ This module implements a 3-layer cascade for detecting where a target project
10
+ actually runs:
11
+
12
+ Layer 3 — explicit override (highest precedence)
13
+ * ``<project>/.autoforensics/config.json`` with
14
+ ``{"deployment_target": "linux-only" | "windows-only" | "cross-platform" | "auto"}``
15
+ * Environment variable ``AUTOFORENSICS_DEPLOYMENT=<value>``. The CLI flag
16
+ ``--deployment-target`` is plumbed through the env var so a single
17
+ reader handles both cases.
18
+
19
+ Layer 2 — project-level signals (cached per project dir)
20
+ * ``pyproject.toml`` ``classifiers`` — "POSIX :: Linux" / "Microsoft ::
21
+ Windows" / "OS Independent".
22
+ * ``setup.py`` ``classifiers`` — same semantics, parsed via AST to avoid
23
+ executing arbitrary setup code.
24
+ * ``Dockerfile`` present in project root → container, usually Linux.
25
+ * ``.github/workflows/*.yml`` — if every job uses ``ubuntu-latest``
26
+ only, that is a Linux-only deployment signal.
27
+ * ``.bat`` / ``.ps1`` / ``.cmd`` / ``.psm1`` files present OUTSIDE dev
28
+ infra paths (``.venv``, ``venv``, ``node_modules``, etc.) → the project
29
+ is Windows-aware. Dev-infra-only Windows scripts do not change
30
+ deployment target.
31
+
32
+ Layer 1 — file-level hints (per-file, content-based)
33
+ * Shebang ``#!/usr/bin/env python`` / ``#!/bin/bash`` / ``#!/bin/sh`` →
34
+ Unix signal.
35
+ * Imports ``winreg`` / ``ctypes.windll`` / ``win32com`` / ``pywin32`` →
36
+ Windows signal.
37
+ * Imports ``fcntl`` / ``pwd`` / ``grp`` / ``resource`` / ``uvloop`` /
38
+ ``daemonize`` → Unix signal.
39
+ * AST pattern ``if sys.platform == "win32":`` / ``if os.name == "nt":``
40
+ → Windows-aware code (Windows signal).
41
+
42
+ File-level signals are score-based with a ±2 threshold: small accidental
43
+ matches (e.g. a docstring containing the word "winreg") do not flip the
44
+ classification.
45
+
46
+ Precedence (strictest → weakest): explicit > file > project > unknown.
47
+
48
+ Conservative default: when no layer decides, callers treat the target as
49
+ "unknown" and **scan** — a false positive is recoverable (suppression); a
50
+ false negative hides a real bug.
51
+ """
52
+ from __future__ import annotations
53
+
54
+ import ast
55
+ import json
56
+ import logging
57
+ import os
58
+ import re
59
+ from pathlib import Path
60
+ from typing import Literal
61
+
62
+ _log = logging.getLogger(__name__)
63
+
64
+ DeploymentTarget = Literal[
65
+ "linux-only",
66
+ "windows-only",
67
+ "cross-platform",
68
+ "auto",
69
+ "unknown",
70
+ ]
71
+
72
+ # Accepted values in config / env. "auto" means "fall through to signals".
73
+ _VALID_EXPLICIT: frozenset[str] = frozenset({
74
+ "linux-only", "windows-only", "cross-platform", "auto",
75
+ })
76
+
77
+ # Module-level cache keyed by resolved project-dir string. Rubik has ~1958
78
+ # source files and every file triggers the encoding scan; we MUST NOT
79
+ # re-scan the project tree for every file.
80
+ _PROJECT_CACHE: dict[str, DeploymentTarget] = {}
81
+
82
+ # ---------------------------------------------------------------------------
83
+ # Layer 3 — explicit override
84
+ # ---------------------------------------------------------------------------
85
+
86
+ _ENV_VAR = "AUTOFORENSICS_DEPLOYMENT"
87
+ _CONFIG_REL = Path(".autoforensics") / "config.json"
88
+
89
+
90
+ def _normalize_explicit(value: str | None) -> DeploymentTarget | None:
91
+ """Coerce a raw string into a valid DeploymentTarget or None.
92
+
93
+ Unknown / empty values return None so the caller falls through to the
94
+ next layer. "auto" returns None as well — the whole point of "auto" is
95
+ "let the detector decide".
96
+ """
97
+ if not value:
98
+ return None
99
+ normalized = value.strip().lower()
100
+ if normalized == "auto":
101
+ return None
102
+ if normalized in _VALID_EXPLICIT:
103
+ return normalized # type: ignore[return-value]
104
+ _log.warning(
105
+ "AUTOFORENSICS: ignoring invalid deployment_target=%r (expected one of %s)",
106
+ value,
107
+ sorted(_VALID_EXPLICIT),
108
+ )
109
+ return None
110
+
111
+
112
+ def get_explicit_deployment(project_dir: Path) -> DeploymentTarget | None:
113
+ """Return explicit override (Layer 3) or None.
114
+
115
+ Env var wins over config file: the CLI flag plumbs through the env var,
116
+ so the most recent caller intention is honoured. Config-file values that
117
+ are syntactically invalid are ignored (no crash — return None so we fall
118
+ through to signal detection).
119
+ """
120
+ env_value = os.environ.get(_ENV_VAR)
121
+ normalized = _normalize_explicit(env_value)
122
+ if normalized is not None:
123
+ return normalized
124
+
125
+ config_path = project_dir / _CONFIG_REL
126
+ if not config_path.is_file():
127
+ return None
128
+ try:
129
+ payload = json.loads(config_path.read_text(encoding="utf-8"))
130
+ except (OSError, json.JSONDecodeError) as exc:
131
+ _log.warning(
132
+ "AUTOFORENSICS: cannot read %s (%s: %s) — falling through to signal detection",
133
+ config_path, type(exc).__name__, exc,
134
+ )
135
+ return None
136
+ if not isinstance(payload, dict):
137
+ return None
138
+ return _normalize_explicit(payload.get("deployment_target"))
139
+
140
+
141
+ # ---------------------------------------------------------------------------
142
+ # Layer 1 — file-level hints
143
+ # ---------------------------------------------------------------------------
144
+
145
+ # Score threshold: a file must score >= +2 (unix) or <= -2 (windows) to be
146
+ # classified. Small accidental matches fall below threshold and stay unknown.
147
+ _FILE_THRESHOLD = 2
148
+
149
+ # Regexes are module-level so Python compiles them once per process.
150
+ _SHEBANG_UNIX_RE = re.compile(
151
+ r"^#!\s*/(?:usr/bin/env\s+(?:python\d*|bash|sh|zsh)|bin/(?:bash|sh|zsh))\b"
152
+ )
153
+
154
+ # Import patterns. These are deliberately conservative — we match
155
+ # module-top-level imports only, not references inside strings/comments.
156
+ _UNIX_IMPORT_RE = re.compile(
157
+ r"^\s*(?:from|import)\s+(fcntl|pwd|grp|resource|uvloop|daemonize|"
158
+ r"termios|syslog|posix|spwd|crypt)\b",
159
+ re.MULTILINE,
160
+ )
161
+ _WINDOWS_IMPORT_RE = re.compile(
162
+ r"^\s*(?:from|import)\s+(winreg|_winreg|win32com|win32api|win32con|"
163
+ r"win32process|win32security|pywin32|msvcrt|winsound)\b",
164
+ re.MULTILINE,
165
+ )
166
+ # ctypes.windll / ctypes.WinDLL — Windows-only ctypes surface.
167
+ _CTYPES_WIN_RE = re.compile(
168
+ r"\bctypes\.(?:windll|WinDLL|oledll|OleDLL)\b"
169
+ )
170
+ # sys.platform == "win32" / os.name == "nt" — Windows-aware branching.
171
+ _SYS_PLATFORM_WIN_RE = re.compile(
172
+ r"\bsys\.platform\s*==\s*['\"]win32['\"]|"
173
+ r"\bos\.name\s*==\s*['\"]nt['\"]"
174
+ )
175
+ _SYS_PLATFORM_LINUX_RE = re.compile(
176
+ r"\bsys\.platform\s*==\s*['\"]linux['\"]|"
177
+ r"\bsys\.platform\.startswith\(\s*['\"]linux['\"]\s*\)|"
178
+ r"\bos\.name\s*==\s*['\"]posix['\"]"
179
+ )
180
+
181
+
182
+ def detect_file_deployment(content: str) -> Literal["unix", "windows", "unknown"]:
183
+ """Classify a single file's content by platform affinity.
184
+
185
+ Score-based, threshold ±2. Strong single signals (``import winreg``,
186
+ ``import fcntl``, ``#!/usr/bin/env python`` + matching imports) are
187
+ worth 2 points so one signal alone flips classification. Weaker
188
+ supporting signals (``sys.platform == 'win32'``) are worth 1.
189
+
190
+ Returns:
191
+ "unix" — score >= +2
192
+ "windows" — score <= -2
193
+ "unknown" — between (ambiguous or no clear signal)
194
+
195
+ Anti-FP property: a file that imports BOTH a winreg and a fcntl module
196
+ (e.g. a cross-platform shim with explicit branches) scores 0 and stays
197
+ "unknown" — we let Layer 2 / default decide.
198
+ """
199
+ if not content:
200
+ return "unknown"
201
+
202
+ score = 0
203
+
204
+ # Shebang (first line only). Canonical Unix signal, worth +2 on its own.
205
+ # A shebang is a deliberate runtime declaration, not an accidental string
206
+ # match, so we treat it as a strong signal.
207
+ first_newline = content.find("\n")
208
+ first_line = content[:first_newline] if first_newline >= 0 else content
209
+ if _SHEBANG_UNIX_RE.match(first_line):
210
+ score += 2
211
+
212
+ # Unix imports. ``import fcntl`` / ``from pwd import getpwnam`` are hard
213
+ # dependencies on Unix-only stdlib modules. Each distinct module adds
214
+ # +2 (strong), capped at +3 so mass-import files don't dominate.
215
+ unix_hits = len(set(_UNIX_IMPORT_RE.findall(content)))
216
+ if unix_hits:
217
+ score += min(2 * unix_hits, 3)
218
+
219
+ # Windows imports. Same reasoning mirrored.
220
+ windows_hits = len(set(_WINDOWS_IMPORT_RE.findall(content)))
221
+ if windows_hits:
222
+ score -= min(2 * windows_hits, 3)
223
+
224
+ # ctypes.windll / ctypes.WinDLL — Windows-specific ctypes surface.
225
+ # Weaker (+1) because projects sometimes reference it conditionally.
226
+ if _CTYPES_WIN_RE.search(content):
227
+ score -= 1
228
+
229
+ # Platform-branch hints. A file that explicitly checks win32 branch is
230
+ # Windows-aware but not necessarily Windows-only — weight 1.
231
+ if _SYS_PLATFORM_WIN_RE.search(content):
232
+ score -= 1
233
+ if _SYS_PLATFORM_LINUX_RE.search(content):
234
+ score += 1
235
+
236
+ if score >= _FILE_THRESHOLD:
237
+ return "unix"
238
+ if score <= -_FILE_THRESHOLD:
239
+ return "windows"
240
+ return "unknown"
241
+
242
+
243
+ # ---------------------------------------------------------------------------
244
+ # Layer 2 — project-level signals
245
+ # ---------------------------------------------------------------------------
246
+
247
+ # Directories whose contents do NOT represent the project's deployment
248
+ # target — virtualenvs, bundled vendor libs, build output, etc. .bat/.ps1
249
+ # files under these paths are dev/tooling artifacts, not a signal that the
250
+ # project itself targets Windows.
251
+ _IGNORED_DIR_PARTS: frozenset[str] = frozenset({
252
+ ".venv", "venv", "env", ".env",
253
+ "node_modules",
254
+ "__pycache__",
255
+ ".git", ".hg", ".svn",
256
+ "build", "dist",
257
+ ".tox", ".mypy_cache", ".pytest_cache", ".ruff_cache",
258
+ "libs", # SYSTEM/libs vendor tree
259
+ ".cortex",
260
+ })
261
+
262
+ _WINDOWS_SCRIPT_EXTS: tuple[str, ...] = (".bat", ".cmd", ".ps1", ".psm1")
263
+
264
+
265
+ def _path_in_ignored_tree(rel_parts: tuple[str, ...]) -> bool:
266
+ return any(part in _IGNORED_DIR_PARTS for part in rel_parts)
267
+
268
+
269
+ def _read_pyproject_classifiers(project_dir: Path) -> list[str]:
270
+ """Return list of classifier strings from pyproject.toml, or [] if absent
271
+ / unreadable. Uses tomllib (stdlib 3.11+)."""
272
+ path = project_dir / "pyproject.toml"
273
+ if not path.is_file():
274
+ return []
275
+ try:
276
+ import tomllib
277
+ except ImportError: # pragma: no cover — 3.11+ always has it
278
+ return []
279
+ try:
280
+ with path.open("rb") as fh:
281
+ data = tomllib.load(fh)
282
+ except (OSError, tomllib.TOMLDecodeError) as exc:
283
+ _log.debug("AUTOFORENSICS: cannot read %s (%s)", path, exc)
284
+ return []
285
+ project = data.get("project") or {}
286
+ classifiers = project.get("classifiers") or []
287
+ if not isinstance(classifiers, list):
288
+ return []
289
+ return [str(c) for c in classifiers]
290
+
291
+
292
+ def _read_setuppy_classifiers(project_dir: Path) -> list[str]:
293
+ """Extract classifiers list from setup.py via AST (no exec).
294
+
295
+ Returns [] if setup.py missing, unparseable, or has no ``classifiers=``
296
+ keyword on a setup() call.
297
+ """
298
+ path = project_dir / "setup.py"
299
+ if not path.is_file():
300
+ return []
301
+ try:
302
+ source = path.read_text(encoding="utf-8", errors="replace")
303
+ tree = ast.parse(source)
304
+ except (OSError, SyntaxError) as exc:
305
+ _log.debug("AUTOFORENSICS: cannot parse %s (%s)", path, exc)
306
+ return []
307
+ for node in ast.walk(tree):
308
+ if not isinstance(node, ast.Call):
309
+ continue
310
+ func = node.func
311
+ # Match setup(...) — either bare name or <module>.setup.
312
+ if isinstance(func, ast.Name) and func.id == "setup":
313
+ pass
314
+ elif isinstance(func, ast.Attribute) and func.attr == "setup":
315
+ pass
316
+ else:
317
+ continue
318
+ for kw in node.keywords:
319
+ if kw.arg != "classifiers":
320
+ continue
321
+ if not isinstance(kw.value, (ast.List, ast.Tuple)):
322
+ continue
323
+ result: list[str] = []
324
+ for elt in kw.value.elts:
325
+ if isinstance(elt, ast.Constant) and isinstance(elt.value, str):
326
+ result.append(elt.value)
327
+ return result
328
+ return []
329
+
330
+
331
+ def _classify_from_classifiers(classifiers: list[str]) -> DeploymentTarget | None:
332
+ """Map Python trove classifiers to a deployment target.
333
+
334
+ Matches Trove strings like:
335
+ * "Operating System :: POSIX :: Linux"
336
+ * "Operating System :: Microsoft :: Windows"
337
+ * "Operating System :: OS Independent"
338
+ """
339
+ if not classifiers:
340
+ return None
341
+ has_linux = any("POSIX" in c or "Linux" in c for c in classifiers)
342
+ has_windows = any("Microsoft" in c or "Windows" in c for c in classifiers)
343
+ has_independent = any("OS Independent" in c for c in classifiers)
344
+ if has_independent:
345
+ return "cross-platform"
346
+ if has_linux and has_windows:
347
+ return "cross-platform"
348
+ if has_linux:
349
+ return "linux-only"
350
+ if has_windows:
351
+ return "windows-only"
352
+ return None
353
+
354
+
355
+ def _has_dockerfile(project_dir: Path) -> bool:
356
+ """True when a Dockerfile exists at the project root (case-insensitive
357
+ for the basename)."""
358
+ for name in ("Dockerfile", "dockerfile", "Dockerfile.prod", "Dockerfile.ci"):
359
+ if (project_dir / name).is_file():
360
+ return True
361
+ # Some projects put it under build/ or docker/ — accept up to depth 2.
362
+ for pattern in ("**/Dockerfile", "**/Dockerfile.*"):
363
+ for candidate in project_dir.glob(pattern):
364
+ try:
365
+ rel = candidate.relative_to(project_dir)
366
+ except ValueError:
367
+ continue
368
+ if _path_in_ignored_tree(rel.parts):
369
+ continue
370
+ return True
371
+ return False
372
+
373
+
374
+ _GHA_JOB_OS_RE = re.compile(
375
+ r"^\s*runs-on\s*:\s*[\"']?([A-Za-z0-9._-]+)[\"']?",
376
+ re.MULTILINE,
377
+ )
378
+
379
+
380
+ def _gha_runners(project_dir: Path) -> set[str]:
381
+ """Return the set of all ``runs-on`` values used across GitHub Actions
382
+ workflows. Empty set when no workflows present.
383
+
384
+ We avoid a YAML dependency by matching a simple regex — ``runs-on`` is
385
+ conventionally a single-line scalar. Matrix expressions (``${{...}}``)
386
+ are returned verbatim; callers treat non-ubuntu/windows/macos values as
387
+ unknown runners.
388
+ """
389
+ wf_dir = project_dir / ".github" / "workflows"
390
+ if not wf_dir.is_dir():
391
+ return set()
392
+ runners: set[str] = set()
393
+ for wf in wf_dir.glob("*.yml"):
394
+ try:
395
+ text = wf.read_text(encoding="utf-8", errors="replace")
396
+ except OSError:
397
+ continue
398
+ for m in _GHA_JOB_OS_RE.finditer(text):
399
+ runners.add(m.group(1).lower())
400
+ for wf in wf_dir.glob("*.yaml"):
401
+ try:
402
+ text = wf.read_text(encoding="utf-8", errors="replace")
403
+ except OSError:
404
+ continue
405
+ for m in _GHA_JOB_OS_RE.finditer(text):
406
+ runners.add(m.group(1).lower())
407
+ return runners
408
+
409
+
410
+ def _windows_scripts_outside_dev(project_dir: Path, cap: int = 4) -> bool:
411
+ """True when ≥1 .bat/.ps1/.cmd/.psm1 file exists outside dev-infra trees.
412
+
413
+ We short-circuit after finding `cap` hits so the scan stays bounded even
414
+ on huge repos.
415
+ """
416
+ found = 0
417
+ for ext in _WINDOWS_SCRIPT_EXTS:
418
+ for path in project_dir.rglob(f"*{ext}"):
419
+ try:
420
+ rel = path.relative_to(project_dir)
421
+ except ValueError:
422
+ continue
423
+ if _path_in_ignored_tree(rel.parts):
424
+ continue
425
+ found += 1
426
+ if found >= cap:
427
+ return True
428
+ return found > 0
429
+
430
+
431
+ def _linux_only_deps_in_requirements(project_dir: Path) -> bool:
432
+ """True when requirements*.txt lists a Linux-exclusive package (uvloop,
433
+ daemonize, sdnotify, systemd-python, etc.)."""
434
+ linux_pkgs = ("uvloop", "daemonize", "sdnotify", "systemd-python", "python-systemd")
435
+ for pattern in ("requirements.txt", "requirements-*.txt", "requirements/*.txt"):
436
+ for path in project_dir.glob(pattern):
437
+ try:
438
+ text = path.read_text(encoding="utf-8", errors="replace")
439
+ except OSError:
440
+ continue
441
+ for pkg in linux_pkgs:
442
+ if re.search(rf"(?m)^\s*{re.escape(pkg)}\b", text):
443
+ return True
444
+ return False
445
+
446
+
447
+ def _detect_project_uncached(project_dir: Path) -> DeploymentTarget:
448
+ """Run Layer 2 detection (no cache). Returns 'unknown' when signals
449
+ are absent or contradictory.
450
+
451
+ Priority order:
452
+ 1. pyproject.toml / setup.py classifiers (authoritative upstream metadata).
453
+ 2. GitHub Actions runners (deployment-test target).
454
+ 3. Dockerfile (containerisation signal).
455
+ 4. Linux-exclusive deps in requirements*.txt.
456
+ 5. Windows scripts outside dev-infra — downgrade to cross-platform.
457
+ """
458
+ # 1. Classifiers — most authoritative.
459
+ classifiers = _read_pyproject_classifiers(project_dir)
460
+ if not classifiers:
461
+ classifiers = _read_setuppy_classifiers(project_dir)
462
+ decision = _classify_from_classifiers(classifiers)
463
+ if decision is not None:
464
+ _log.debug(
465
+ "AUTOFORENSICS: %s classified %s via pyproject/setup.py classifiers",
466
+ project_dir, decision,
467
+ )
468
+ return decision
469
+
470
+ # 2. GitHub Actions runners.
471
+ runners = _gha_runners(project_dir)
472
+ if runners:
473
+ has_ubuntu = any(r.startswith("ubuntu") for r in runners)
474
+ has_windows = any(r.startswith("windows") for r in runners)
475
+ has_macos = any(r.startswith("macos") for r in runners)
476
+ # Unknown runners (matrix expressions, self-hosted) count as
477
+ # "unresolved" — we do not force a linux-only conclusion when they
478
+ # appear alongside ubuntu.
479
+ has_unknown = any(
480
+ not (r.startswith("ubuntu") or r.startswith("windows") or r.startswith("macos"))
481
+ for r in runners
482
+ )
483
+ if has_windows and not has_ubuntu:
484
+ return "windows-only"
485
+ if has_ubuntu and not has_windows and not has_unknown:
486
+ # Linux-only CI is a deployment signal. macOS runners alongside
487
+ # ubuntu still indicate a Unix-only test matrix — classify as
488
+ # linux-only for encoding-gate purposes (macOS console is UTF-8
489
+ # by default, not cp1252).
490
+ if not has_macos or has_macos:
491
+ return "linux-only"
492
+
493
+ # 3. Dockerfile → container → Linux in the overwhelming majority of
494
+ # cases. We do not downgrade for the rare Windows-container project;
495
+ # callers can override via explicit config.
496
+ if _has_dockerfile(project_dir):
497
+ return "linux-only"
498
+
499
+ # 4. Linux-exclusive deps.
500
+ if _linux_only_deps_in_requirements(project_dir):
501
+ return "linux-only"
502
+
503
+ # 5. Windows scripts outside dev-infra → at least cross-platform.
504
+ # This only fires when steps 1–4 did not decide. A project with a
505
+ # start_vigil.bat launcher but no deployment metadata is assumed to be
506
+ # Windows-aware.
507
+ if _windows_scripts_outside_dev(project_dir):
508
+ return "cross-platform"
509
+
510
+ return "unknown"
511
+
512
+
513
+ def detect_project_deployment(project_dir: Path) -> DeploymentTarget:
514
+ """Cached entry-point for Layer 2 detection.
515
+
516
+ Cache key is the resolved, case-normalised string path. A rubik-scale
517
+ project (~2000 files) asks this function once per file; we MUST amortise.
518
+ """
519
+ try:
520
+ key = str(project_dir.resolve()).lower()
521
+ except OSError:
522
+ key = str(project_dir).lower()
523
+ cached = _PROJECT_CACHE.get(key)
524
+ if cached is not None:
525
+ return cached
526
+ result = _detect_project_uncached(project_dir)
527
+ _PROJECT_CACHE[key] = result
528
+ return result
529
+
530
+
531
+ def clear_project_cache() -> None:
532
+ """Drop all memoised project-level detections. Intended for tests."""
533
+ _PROJECT_CACHE.clear()
534
+
535
+
536
+ # ---------------------------------------------------------------------------
537
+ # Cascade — the single entrypoint callers should use
538
+ # ---------------------------------------------------------------------------
539
+
540
+ def resolve_deployment(
541
+ project_dir: Path,
542
+ file_content: str | None = None,
543
+ ) -> DeploymentTarget:
544
+ """Resolve a deployment target using the full 3-layer cascade.
545
+
546
+ Precedence (strictest wins):
547
+ 1. Explicit override (config.json / env var).
548
+ 2. File-level signal — when content is provided and classifies as
549
+ 'unix' or 'windows'.
550
+ 3. Project-level signal.
551
+ 4. 'unknown' — caller decides how to handle (conservative default:
552
+ scan).
553
+ """
554
+ explicit = get_explicit_deployment(project_dir)
555
+ if explicit is not None:
556
+ return explicit
557
+ if file_content is not None:
558
+ file_signal = detect_file_deployment(file_content)
559
+ if file_signal == "unix":
560
+ return "linux-only"
561
+ if file_signal == "windows":
562
+ return "windows-only"
563
+ return detect_project_deployment(project_dir)
564
+
565
+
566
+ __all__ = [
567
+ "DeploymentTarget",
568
+ "get_explicit_deployment",
569
+ "detect_file_deployment",
570
+ "detect_project_deployment",
571
+ "clear_project_cache",
572
+ "resolve_deployment",
573
+ ]