sourcepack 1.10.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcepack/judgment.py ADDED
@@ -0,0 +1,1922 @@
1
+ from __future__ import annotations
2
+
3
+ import fnmatch
4
+ import hashlib
5
+ import json
6
+ import os
7
+ import tomllib
8
+ import re
9
+ import shutil
10
+ import subprocess
11
+ import sys
12
+ import tempfile
13
+ from dataclasses import dataclass, asdict
14
+ from datetime import datetime, timezone
15
+ from pathlib import Path, PurePosixPath
16
+ from typing import Iterable
17
+ from xml.sax.saxutils import escape as xml_escape
18
+ from .diff_parser import PatchFileChange, normalize_diff_path as _normalize_diff_path, parse_unified_diff
19
+ from .baseline import BaselineLockError, acquire_baseline_lock, baseline_corrupt_result, baseline_report_fields, build_current_baseline, protected_baseline_path, release_baseline_lock, resolve_active_baseline, validate_baseline
20
+ from .ecosystems.python import PY_IMPORT_ALIASES
21
+ from .paths import ensure_gitignore_entry, ensure_sourcepack_dirs, sourcepack_paths
22
+ from .reports.json import normalized_finding, traffic_report, write_user_report
23
+ from .policy import PolicyMode, normalize_policy_mode, exit_code as policy_exit_code
24
+ from .execution_ledger import execution_findings
25
+ from .commands import resolve_command
26
+ from .dependencies import resolve_js_import, resolve_python_import
27
+
28
+ try:
29
+ from . import __version__
30
+ except Exception:
31
+ __version__ = "1.10.0-alpha"
32
+
33
+ DEFAULT_IGNORED_DIRS = {
34
+ ".git", "node_modules", ".venv", "venv", "__pycache__", "dist", "build",
35
+ ".next", ".cache", "target", "coverage", ".pytest_cache", ".sourcepack"
36
+ }
37
+ DEFAULT_IGNORED_PATTERNS = {
38
+ ".env", ".env.*", "*.pem", "*.key", "*.sqlite", "*.db", "*.png", "*.jpg",
39
+ "*.jpeg", "*.gif", "*.webp", "*.pdf", "*.zip", "*.tar", "*.gz", "*.exe",
40
+ "*.dll", "*.so", "*.dylib", "*.bin", "*.pyc"
41
+ }
42
+ DEFAULT_TEXT_EXTENSIONS = {
43
+ ".txt", ".md", ".py", ".js", ".ts", ".tsx", ".jsx", ".json", ".yaml", ".yml",
44
+ ".html", ".css", ".csv", ".toml", ".ini", ".sql", ".sh", ".bat", ".ps1", ".rs",
45
+ ".go", ".java", ".c", ".cpp", ".h", ".hpp", ".rb", ".php", ".xml"
46
+ }
47
+ SECRET_PATTERNS = [
48
+ ("openai_key", re.compile(r"sk-proj-[A-Za-z0-9_\-]{12,}|sk-[A-Za-z0-9]{24,}")),
49
+ ("aws_access_key", re.compile(r"AKIA[0-9A-Z]{16}")),
50
+ ("private_key", re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----")),
51
+ ("generic_api_key", re.compile(r"(?i)(api[_-]?key|secret|token)\s*[:=]\s*['\"]?[A-Za-z0-9_\-]{16,}")),
52
+ ("github_token", re.compile(r"ghp_[A-Za-z0-9_]{20,}|github_pat_[A-Za-z0-9_]{20,}")),
53
+ ("slack_token", re.compile(r"xox[baprs]-[A-Za-z0-9\-]{20,}")),
54
+ ]
55
+ COMMON_DEPENDENCIES = ["fastapi", "flask", "django", "react", "vue", "svelte", "pytest", "typer", "click", "sqlalchemy", "prisma", "pydantic", "pyyaml", "pillow", "beautifulsoup4", "opencv-python", "scikit-learn", "python-dotenv", "pyjwt", "python-dateutil", "boto3", "requests"]
56
+ FEATURE_NAMES = ("pdf", "ocr", "web server", "react", "docker", "authentication", "database")
57
+
58
+
59
+ def utc_now() -> str:
60
+ return datetime.now(timezone.utc).isoformat()
61
+
62
+
63
+ def sha256_file(path: Path) -> str:
64
+ h = hashlib.sha256()
65
+ with path.open("rb") as f:
66
+ for block in iter(lambda: f.read(1024 * 1024), b""):
67
+ h.update(block)
68
+ return h.hexdigest()
69
+
70
+
71
+ def sha256_text(text: str) -> str:
72
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()
73
+
74
+
75
+ def estimate_tokens(text: str) -> int:
76
+ return (len(text) + 3) // 4
77
+
78
+
79
+ def is_probably_binary(path: Path, sample_size: int = 4096) -> bool:
80
+ try:
81
+ data = path.read_bytes()[:sample_size]
82
+ except OSError:
83
+ return True
84
+ if b"\x00" in data:
85
+ return True
86
+ if not data:
87
+ return False
88
+ nonprintable = sum(1 for b in data if b < 9 or (13 < b < 32))
89
+ return (nonprintable / max(len(data), 1)) > 0.30
90
+
91
+
92
+ def matches_any(name: str, patterns: Iterable[str]) -> bool:
93
+ return any(fnmatch.fnmatch(name, pattern) for pattern in patterns)
94
+
95
+
96
+ def redact_secrets(text: str):
97
+ redactions = []
98
+ redacted = text
99
+ for label, pattern in SECRET_PATTERNS:
100
+ def repl(match):
101
+ redactions.append({"pattern": label, "span_start": match.start(), "span_end": match.end()})
102
+ return f"[REDACTED:{label}]"
103
+ redacted = pattern.sub(repl, redacted)
104
+ return redacted, redactions
105
+
106
+
107
+ @dataclass
108
+ class IncludedFile:
109
+ relative_path: str
110
+ absolute_path: str
111
+ size_bytes: int
112
+ sha256: str
113
+ source_sha256: str
114
+ packet_sha256: str
115
+ estimated_tokens: int
116
+ extension: str
117
+ content: str
118
+
119
+
120
+ @dataclass
121
+ class IgnoredFile:
122
+ relative_path: str
123
+ reason: str
124
+
125
+
126
+ class SourceScanner:
127
+ def __init__(self, input_path: str | Path, max_file_size: int = 1_000_000, include_hidden: bool = False, redact: bool = True):
128
+ self.input_path = Path(input_path).resolve()
129
+ self.max_file_size = max_file_size
130
+ self.include_hidden = include_hidden
131
+ self.redact = redact
132
+ self.included_files: list[IncludedFile] = []
133
+ self.ignored_files: list[IgnoredFile] = []
134
+ self.redactions: list[dict] = []
135
+ self.total_seen = 0
136
+
137
+ def ignore(self, path: Path, reason: str):
138
+ rel = str(path.relative_to(self.input_path)) if path.is_absolute() or self.input_path in path.parents else str(path)
139
+ self.ignored_files.append(IgnoredFile(rel, reason))
140
+
141
+ def scan(self):
142
+ if not self.input_path.exists():
143
+ raise FileNotFoundError(f"Input path does not exist: {self.input_path}")
144
+ if not self.input_path.is_dir():
145
+ raise NotADirectoryError(f"Input path is not a directory: {self.input_path}")
146
+ for root, dirs, files in os.walk(self.input_path, followlinks=False):
147
+ root_path = Path(root)
148
+ dirs[:] = sorted(dirs)
149
+ files = sorted(files)
150
+ kept_dirs = []
151
+ for d in dirs:
152
+ dpath = root_path / d
153
+ rel = dpath.relative_to(self.input_path)
154
+ if d in DEFAULT_IGNORED_DIRS:
155
+ self.ignored_files.append(IgnoredFile(str(rel) + "/", "ignored_directory"))
156
+ elif not self.include_hidden and d.startswith("."):
157
+ self.ignored_files.append(IgnoredFile(str(rel) + "/", "hidden_directory"))
158
+ elif dpath.is_symlink():
159
+ self.ignored_files.append(IgnoredFile(str(rel) + "/", "symlink_skipped"))
160
+ else:
161
+ kept_dirs.append(d)
162
+ dirs[:] = kept_dirs
163
+ for filename in files:
164
+ fp = root_path / filename
165
+ rel = fp.relative_to(self.input_path)
166
+ self.total_seen += 1
167
+ rel_str = str(rel)
168
+ if fp.is_symlink():
169
+ self.ignored_files.append(IgnoredFile(rel_str, "symlink_skipped")); continue
170
+ if not self.include_hidden and filename.startswith("."):
171
+ self.ignored_files.append(IgnoredFile(rel_str, "hidden_file")); continue
172
+ if matches_any(filename, DEFAULT_IGNORED_PATTERNS) or matches_any(rel_str, DEFAULT_IGNORED_PATTERNS):
173
+ self.ignored_files.append(IgnoredFile(rel_str, "ignored_pattern")); continue
174
+ try:
175
+ size = fp.stat().st_size
176
+ except OSError:
177
+ self.ignored_files.append(IgnoredFile(rel_str, "stat_error")); continue
178
+ if size > self.max_file_size:
179
+ self.ignored_files.append(IgnoredFile(rel_str, "max_file_size_exceeded")); continue
180
+ if fp.suffix and fp.suffix.lower() not in DEFAULT_TEXT_EXTENSIONS:
181
+ self.ignored_files.append(IgnoredFile(rel_str, "unsupported_extension")); continue
182
+ if is_probably_binary(fp):
183
+ self.ignored_files.append(IgnoredFile(rel_str, "binary_detected")); continue
184
+ try:
185
+ content = fp.read_text(encoding="utf-8")
186
+ except UnicodeDecodeError:
187
+ self.ignored_files.append(IgnoredFile(rel_str, "decode_error")); continue
188
+ except OSError:
189
+ self.ignored_files.append(IgnoredFile(rel_str, "read_error")); continue
190
+ source_sha256 = sha256_text(content)
191
+ if self.redact:
192
+ redacted, reds = redact_secrets(content)
193
+ for r in reds:
194
+ r["file"] = rel_str
195
+ self.redactions.extend(reds)
196
+ content = redacted
197
+ packet_sha256 = sha256_text(content)
198
+ self.included_files.append(IncludedFile(
199
+ relative_path=rel_str,
200
+ absolute_path=str(fp.resolve()),
201
+ size_bytes=size,
202
+ sha256=packet_sha256,
203
+ source_sha256=source_sha256,
204
+ packet_sha256=packet_sha256,
205
+ estimated_tokens=estimate_tokens(content),
206
+ extension=fp.suffix.lower(),
207
+ content=content,
208
+ ))
209
+ self.included_files.sort(key=lambda x: x.relative_path)
210
+ self.ignored_files.sort(key=lambda x: x.relative_path)
211
+ return self
212
+
213
+
214
+ def _tracked_file_inventory(root: Path, included_records: list[dict]) -> dict:
215
+ included = {str(rec.get("relative_path", "")).replace("\\", "/") for rec in included_records}
216
+ files: list[dict] = []
217
+ source = "scanner_included_files"
218
+ try:
219
+ cp = subprocess.run(["git", "ls-files", "-z"], cwd=root, text=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
220
+ except (OSError, ValueError):
221
+ cp = None
222
+ if cp is not None and cp.returncode == 0:
223
+ raw_paths = [p.decode("utf-8", "surrogateescape") for p in cp.stdout.split(b"\0") if p]
224
+ source = "git_ls_files" if raw_paths else "scanner_included_files"
225
+ if not raw_paths:
226
+ raw_paths = sorted(included)
227
+ else:
228
+ raw_paths = sorted(included)
229
+ for raw in raw_paths:
230
+ rel = raw.replace("\\", "/")
231
+ path = root / rel
232
+ rec = {"relative_path": rel, "included_in_prompt_context": rel in included, "source": source}
233
+ try:
234
+ if path.exists() and path.is_file():
235
+ rec["sha256"] = sha256_file(path)
236
+ rec["file_type"] = "binary" if is_probably_binary(path) else "text"
237
+ else:
238
+ rec["file_type"] = "missing"
239
+ except OSError:
240
+ rec["file_type"] = "unreadable"
241
+ files.append(rec)
242
+ return {"schema_version": "sourcepack.file_inventory.v1", "generated_at": utc_now(), "source": source, "files": files}
243
+
244
+
245
+ class PacketWriter:
246
+ OUTPUT_FILES = ["manifest.json", "context.md", "context.xml", "file_tree.txt", "ignored_files.txt", "token_report.json", "redactions.json", "reality_map.json", "ai_instructions.md", "file_inventory.json"]
247
+
248
+ def __init__(self, out: str | Path, scanner: SourceScanner, force: bool = False):
249
+ self.out = Path(out)
250
+ self.scanner = scanner
251
+ self.force = force
252
+
253
+ def prepare_out(self):
254
+ if self.out.exists() and any(self.out.iterdir()):
255
+ if not self.force:
256
+ raise FileExistsError(f"Output directory is non-empty: {self.out}")
257
+ for child in self.out.iterdir():
258
+ if child.is_dir():
259
+ shutil.rmtree(child)
260
+ else:
261
+ child.unlink()
262
+ self.out.mkdir(parents=True, exist_ok=True)
263
+
264
+ def write_all(self):
265
+ self.prepare_out()
266
+ included_records = []
267
+ for f in self.scanner.included_files:
268
+ rec = asdict(f)
269
+ rec.pop("content")
270
+ included_records.append(rec)
271
+ ignored_records = [asdict(f) for f in self.scanner.ignored_files]
272
+ total_tokens = sum(f.estimated_tokens for f in self.scanner.included_files)
273
+ total_bytes = sum(f.size_bytes for f in self.scanner.included_files)
274
+ manifest = {
275
+ "input_path": str(self.scanner.input_path),
276
+ "generated_at": utc_now(),
277
+ "tool_version": __version__,
278
+ "total_files_seen": self.scanner.total_seen,
279
+ "total_files_included": len(included_records),
280
+ "total_files_ignored": len(ignored_records),
281
+ "total_bytes_included": total_bytes,
282
+ "total_estimated_tokens": total_tokens,
283
+ "included_files": included_records,
284
+ "ignored_files": ignored_records,
285
+ }
286
+ (self.out / "manifest.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8")
287
+ (self.out / "file_inventory.json").write_text(json.dumps(_tracked_file_inventory(self.scanner.input_path, included_records), indent=2), encoding="utf-8")
288
+ md_parts = ["# SourcePack Context Packet", "", "## Source Manifest Summary", "", f"Input path: {manifest['input_path']}", f"Generated at: {manifest['generated_at']}", f"Files included: {len(included_records)}", f"Estimated tokens: {total_tokens}", ""]
289
+ for f in self.scanner.included_files:
290
+ md_parts.extend([
291
+ f"## File: {f.relative_path}", "", "Metadata:", f"- sha256: {f.sha256}", f"- bytes: {f.size_bytes}", f"- estimated_tokens: {f.estimated_tokens}", "", "Content:", "", f.content, "", "---", ""
292
+ ])
293
+ (self.out / "context.md").write_text("\n".join(md_parts), encoding="utf-8")
294
+ xml_parts = ["<sourcepack>", " <files>"]
295
+ for f in self.scanner.included_files:
296
+ xml_parts.append(f' <file path="{xml_escape(f.relative_path)}" sha256="{f.sha256}" bytes="{f.size_bytes}" estimated_tokens="{f.estimated_tokens}">')
297
+ xml_parts.append(" <content>")
298
+ xml_parts.append(xml_escape(f.content))
299
+ xml_parts.append(" </content>")
300
+ xml_parts.append(" </file>")
301
+ xml_parts.extend([" </files>", "</sourcepack>"])
302
+ (self.out / "context.xml").write_text("\n".join(xml_parts), encoding="utf-8")
303
+ tree_lines = []
304
+ for f in self.scanner.included_files:
305
+ tree_lines.append(f"[INC] {f.relative_path}")
306
+ for f in self.scanner.ignored_files:
307
+ tree_lines.append(f"[IGN] {f.relative_path} - {f.reason}")
308
+ (self.out / "file_tree.txt").write_text("\n".join(sorted(tree_lines)) + "\n", encoding="utf-8")
309
+ (self.out / "ignored_files.txt").write_text("\n".join(f"{f.relative_path}\t{f.reason}" for f in self.scanner.ignored_files) + "\n", encoding="utf-8")
310
+ token_report = {
311
+ "total_estimated_tokens": total_tokens,
312
+ "warnings": [limit for limit in [32_000, 128_000, 200_000, 1_000_000] if total_tokens > limit],
313
+ "per_file": [{"relative_path": f.relative_path, "estimated_tokens": f.estimated_tokens} for f in self.scanner.included_files],
314
+ }
315
+ (self.out / "token_report.json").write_text(json.dumps(token_report, indent=2), encoding="utf-8")
316
+ (self.out / "redactions.json").write_text(json.dumps({"redactions": self.scanner.redactions}, indent=2), encoding="utf-8")
317
+ reality_map = generate_reality_map(manifest, self.out)
318
+ (self.out / "reality_map.json").write_text(json.dumps(reality_map, indent=2), encoding="utf-8")
319
+ (self.out / "ai_instructions.md").write_text(render_ai_instructions(reality_map), encoding="utf-8")
320
+ hashes = {name: sha256_file(self.out / name) for name in self.OUTPUT_FILES if (self.out / name).exists()}
321
+ receipt = {"generated_at": utc_now(), "tool_version": __version__, "hashes": hashes}
322
+ (self.out / "receipt.json").write_text(json.dumps(receipt, indent=2), encoding="utf-8")
323
+ return self.out
324
+
325
+
326
+
327
+ def _included_paths(manifest: dict) -> set[str]:
328
+ return {rec.get("relative_path", "").replace("\\", "/") for rec in manifest.get("included_files", [])}
329
+
330
+
331
+ def _package_json_scripts(packet: Path) -> dict[str, str]:
332
+ contents = _packet_file_contents(packet)
333
+ for rel, content in contents.items():
334
+ if Path(rel).name.lower() == "package.json":
335
+ try:
336
+ package = json.loads(content)
337
+ except json.JSONDecodeError:
338
+ return {}
339
+ scripts = package.get("scripts")
340
+ return scripts if isinstance(scripts, dict) else {}
341
+ return {}
342
+
343
+
344
+ def _is_poetry_project(packet: Path) -> bool:
345
+ for rel, content in _packet_file_contents(packet).items():
346
+ if Path(rel).name.lower() == "pyproject.toml" and re.search(r"(?m)^\s*\[tool\.poetry\]\s*$", content):
347
+ return True
348
+ return False
349
+
350
+
351
+ def _uses_unittest(packet: Path) -> bool:
352
+ for rel, content in _packet_file_contents(packet).items():
353
+ if Path(rel).suffix.lower() == ".py" and re.search(r"(?m)^\s*(import\s+unittest|from\s+unittest\s+import\s+)", content):
354
+ return True
355
+ return False
356
+
357
+
358
+ def generate_reality_map(manifest: dict, packet: Path) -> dict:
359
+ files = _included_paths(manifest)
360
+ lower_files = {f.lower() for f in files}
361
+ deps = dependency_inventory(manifest, packet)
362
+ features = feature_inventory(manifest, packet, deps)
363
+ scripts = _package_json_scripts(packet)
364
+ project_types = []
365
+ package_managers = []
366
+ frameworks = []
367
+ supported_commands = []
368
+ test_commands = []
369
+ build_commands = []
370
+ run_commands = []
371
+ if "pyproject.toml" in lower_files:
372
+ project_types.append("python")
373
+ if any(Path(f).name.lower().startswith("requirements") and f.endswith(".txt") for f in lower_files):
374
+ project_types.append("python")
375
+ package_managers.append("pip")
376
+ if _is_poetry_project(packet):
377
+ package_managers.append("poetry")
378
+ if "package.json" in lower_files:
379
+ project_types.append("node")
380
+ package_managers.append("npm")
381
+ for name in sorted(scripts):
382
+ cmd = "npm test" if name == "test" else f"npm run {name}"
383
+ supported_commands.append(cmd)
384
+ if name == "test": test_commands.append(cmd)
385
+ elif name in {"build", "compile"}: build_commands.append(cmd)
386
+ elif name in {"start", "dev", "serve"}: run_commands.append(cmd)
387
+ if any(Path(f).name.lower() == "dockerfile" for f in files):
388
+ supported_commands.append("docker build")
389
+ build_commands.append("docker build")
390
+ if any(Path(f).name.lower() in {"docker-compose.yml", "compose.yaml", "compose.yml"} for f in files):
391
+ supported_commands.append("docker compose up")
392
+ run_commands.append("docker compose up")
393
+ if "pytest" in deps or any(f == "tests" or f.startswith("tests/") for f in lower_files):
394
+ supported_commands.append("pytest")
395
+ test_commands.append("pytest")
396
+ if _uses_unittest(packet):
397
+ supported_commands.append("python -m unittest")
398
+ test_commands.append("python -m unittest")
399
+ framework_map = {"fastapi": "FastAPI", "flask": "Flask", "django": "Django", "react": "React"}
400
+ for dep, label in framework_map.items():
401
+ if dep in deps or (dep == "react" and "react" in features):
402
+ frameworks.append(label)
403
+ ignored = manifest.get("ignored_files", [])
404
+ ignored_reasons = {}
405
+ for rec in ignored:
406
+ reason = rec.get("reason", "unknown")
407
+ ignored_reasons[reason] = ignored_reasons.get(reason, 0) + 1
408
+ included_count = len(manifest.get("included_files", []))
409
+ safe_claims = [
410
+ f"This packet includes {included_count} source files.",
411
+ f"SourcePack scanned input path: {manifest.get('input_path', '')}.",
412
+ ]
413
+ for name in ["pyproject.toml", "package.json", "Dockerfile"]:
414
+ present = name.lower() in {Path(f).name.lower() for f in files}
415
+ safe_claims.append(f"The project {'contains' if present else 'does not include'} {name}.")
416
+ if "react" not in deps and "react" not in features:
417
+ safe_claims.append("No React dependency was detected.")
418
+ if "pdf" not in features:
419
+ safe_claims.append("No PDF parsing capability was detected.")
420
+ if ignored:
421
+ safe_claims.append("The packet includes ignored file records for safety or relevance reasons.")
422
+ claim_boundaries = [
423
+ "SourcePack did not execute the application.",
424
+ "SourcePack did not prove semantic correctness.",
425
+ "SourcePack did not verify external services.",
426
+ "SourcePack did not prove security.",
427
+ "SourcePack did not prove production readiness.",
428
+ "Absence of evidence means unknown, not impossible.",
429
+ "Unsupported claims should be treated as ungrounded.",
430
+ ]
431
+ return {
432
+ "reality_map_schema_version": "1.0",
433
+ "tool_version": __version__,
434
+ "generated_at": utc_now(),
435
+ "input_path": manifest.get("input_path", ""),
436
+ "project_types": sorted(set(project_types)),
437
+ "package_managers": sorted(set(package_managers)),
438
+ "frameworks": sorted(set(frameworks)),
439
+ "entry_points": sorted(f for f in files if Path(f).name in {"main.py", "app.py", "server.py", "cli.py"}),
440
+ "test_commands": sorted(set(test_commands)),
441
+ "build_commands": sorted(set(build_commands)),
442
+ "run_commands": sorted(set(run_commands)),
443
+ "supported_commands": sorted(set(supported_commands)),
444
+ "detected_dependencies": sorted(deps),
445
+ "supported_capabilities": sorted(features),
446
+ "excluded_files_summary": {"total": len(ignored), "reasons": ignored_reasons, "records": ignored[:25]},
447
+ "included_file_count": included_count,
448
+ "confirmed_files": sorted(files),
449
+ "ignored_file_count": len(ignored),
450
+ "safe_claims": safe_claims,
451
+ "unknowns": [
452
+ "Runtime behavior was not executed.",
453
+ "Semantic correctness was not proven.",
454
+ "External services were not verified.",
455
+ "Capabilities not present in structural evidence must be treated as unknown.",
456
+ "Missing files must not be invented.",
457
+ ],
458
+ "claim_boundaries": claim_boundaries,
459
+ "ai_constraints": [
460
+ "Use only the packet and reality map as project evidence.",
461
+ "Do not invent files, commands, dependencies, frameworks, services, or capabilities.",
462
+ "If a required file is missing, say it is missing.",
463
+ "If a command is unsupported by detected evidence, say it is unsupported.",
464
+ "If a capability is not in supported_capabilities, treat it as unknown or unsupported.",
465
+ "Cite file paths when making project-specific claims.",
466
+ "Do not claim SourcePack proves semantic truth.",
467
+ "Ask for missing files rather than hallucinating them.",
468
+ ],
469
+ }
470
+
471
+
472
+ def render_ai_instructions(reality_map: dict) -> str:
473
+ lines = [
474
+ "# AI Instructions for This SourcePack Packet", "",
475
+ "Use only the packet and `reality_map.json` as project evidence.",
476
+ "Do not invent files, commands, dependencies, frameworks, services, or capabilities.",
477
+ "If a required file is missing, say it is missing and ask for it rather than hallucinating it.",
478
+ "If a command is unsupported by detected evidence, say it is unsupported.",
479
+ "If a capability is not listed in `supported_capabilities`, treat it as unknown or unsupported.",
480
+ "If you introduce a new external dependency, modify the appropriate dependency manifest in the same patch and list it under Dependency Changes.",
481
+ "Only recommend commands listed under Supported Commands unless your patch also adds the project file that defines the new command.",
482
+ "Before referencing a file as existing, it must appear in Confirmed Files; label intentional creations as NEW FILE.",
483
+ "If required evidence is missing, say UNKNOWN and ask for the missing file/output instead of guessing.",
484
+ "Cite file paths when making project-specific claims.",
485
+ "Do not claim SourcePack proves semantic truth, security, production readiness, or external service behavior.", "",
486
+ "## Supported Commands", "",
487
+ ]
488
+ cmds = reality_map.get("supported_commands", [])
489
+ lines.extend([f"- `{cmd}`" for cmd in cmds] or ["- None detected"])
490
+ lines.extend(["", "## Supported Capabilities", ""])
491
+ caps = reality_map.get("supported_capabilities", [])
492
+ lines.extend([f"- {cap}" for cap in caps] or ["- None detected"])
493
+ lines.extend(["", "## Confirmed Files", ""])
494
+ lines.extend(f"- `{path}`" for path in reality_map.get("confirmed_files", [])[:200])
495
+ lines.extend(["", "## Required Answer Contract", "", "- Files to modify", "- New files", "- Dependency changes", "- Commands to run", "- Assumptions/unknowns", "- Patch or code", "", "## Claim Boundaries", ""])
496
+ lines.extend(f"- {boundary}" for boundary in reality_map.get("claim_boundaries", []))
497
+ return "\n".join(lines) + "\n"
498
+
499
+ def load_manifest(packet: Path) -> dict:
500
+ return json.loads((packet / "manifest.json").read_text(encoding="utf-8"))
501
+
502
+
503
+
504
+
505
+ PATHLIKE_EXTENSIONS = {".py", ".js", ".jsx", ".ts", ".tsx", ".json", ".toml", ".yaml", ".yml", ".md", ".txt", ".cfg", ".ini", ".css", ".html", ".rs", ".go", ".java", ".rb", ".php", ".sh"}
506
+ PROJECT_PATH_PREFIXES = {"src", "sourcepack", "tests", "test", "frontend", "backend", "docs", "app", "lib", "packages", "public", "config", "scripts"}
507
+
508
+
509
+ def _normalize_ai_ref(ref: str) -> str | None:
510
+ ref = ref.strip().strip("`'\".,;)")
511
+ ref = ref.replace("\\", "/")
512
+ if ref.endswith(":"):
513
+ ref = ref[:-1]
514
+ while ref.startswith("./"):
515
+ ref = ref[2:]
516
+ if not ref or ref.startswith("/") or re.match(r"^[A-Za-z]:/", ref):
517
+ return None
518
+ normalized, unsafe = _normalize_diff_path(ref)
519
+ if unsafe or not normalized:
520
+ return None
521
+ return normalized
522
+
523
+
524
+ def _looks_like_ai_file_ref(ref: str) -> bool:
525
+ normalized = ref.replace("\\", "/")
526
+ name = PurePosixPath(normalized).name
527
+ if name in {"Dockerfile", "docker-compose.yml", "compose.yaml", "compose.yml", "pyproject.toml", "package.json", "requirements.txt"}:
528
+ return True
529
+ suffix = PurePosixPath(normalized).suffix.lower()
530
+ if suffix not in PATHLIKE_EXTENSIONS:
531
+ return False
532
+ parts = [p for p in PurePosixPath(normalized).parts if p not in {"."}]
533
+ return "/" in normalized or (parts and parts[0] in PROJECT_PATH_PREFIXES)
534
+
535
+
536
+ def extract_refs(text: str) -> set[str]:
537
+ refs: set[str] = set()
538
+ token = r"(?:\./)?[A-Za-z0-9_.-]+(?:[\\/][A-Za-z0-9_.-]+)*\.[A-Za-z0-9_.-]+:?|Dockerfile"
539
+ patterns = [rf"[`'\"]({token})[`'\"]", rf"(?m)^\s*[-*]\s+({token})\b", rf"\b(?:edit|open|update|modify|change|in|file)\s+({token})\b", rf"\b((?:\./)?(?:src|sourcepack|tests|test|frontend|backend|docs|app|lib|packages|public|config|scripts)[\\/][A-Za-z0-9_./\\-]+\.[A-Za-z0-9_.-]+:?)\b"]
540
+ for pattern in patterns:
541
+ for candidate in re.findall(pattern, text, re.I):
542
+ normalized = _normalize_ai_ref(candidate)
543
+ if normalized and _looks_like_ai_file_ref(normalized):
544
+ refs.add(normalized)
545
+ return refs
546
+
547
+
548
+ def _packet_file_contents(packet: Path) -> dict[str, str]:
549
+ context_path = packet / "context.md"
550
+ if not context_path.exists():
551
+ return {}
552
+ text = context_path.read_text(encoding="utf-8", errors="ignore")
553
+ contents: dict[str, str] = {}
554
+ current: str | None = None
555
+ body: list[str] = []
556
+ in_content = False
557
+ for line in text.splitlines():
558
+ if line.startswith("## File: "):
559
+ if current is not None:
560
+ contents[current] = "\n".join(body).rstrip("\n")
561
+ current = line.removeprefix("## File: ").strip()
562
+ body = []
563
+ in_content = False
564
+ elif current is not None and line == "Content:":
565
+ in_content = True
566
+ body = []
567
+ elif current is not None and in_content and line == "---":
568
+ contents[current] = "\n".join(body).rstrip("\n")
569
+ current = None
570
+ body = []
571
+ in_content = False
572
+ elif current is not None and in_content:
573
+ body.append(line)
574
+ if current is not None:
575
+ contents[current] = "\n".join(body).rstrip("\n")
576
+ return contents
577
+
578
+
579
+ def _normalize_dependency_name(name: str) -> str:
580
+ return name.strip().lower().replace("_", "-")
581
+
582
+
583
+ def _dependency_name_for_import(name: str) -> str:
584
+ normalized = _normalize_dependency_name(name)
585
+ return PY_IMPORT_ALIASES.get(normalized, normalized)
586
+
587
+
588
+ def _js_package_root(imported: str) -> str:
589
+ imported = imported.strip().lower()
590
+ parts = imported.split("/")
591
+ if imported.startswith("@") and len(parts) >= 2 and parts[0] != "@":
592
+ return "/".join(parts[:2])
593
+ if imported.startswith("@/"):
594
+ return imported
595
+ return parts[0]
596
+
597
+
598
+ def _python_dependency_names_from_requirement_lines(text: str) -> set[str]:
599
+ deps: set[str] = set()
600
+ for line in text.splitlines():
601
+ cleaned = line.split("#", 1)[0].strip()
602
+ if cleaned and not cleaned.startswith(("-", "--")):
603
+ deps.add(_normalize_dependency_name(re.split(r"[<>=!~;\[]", cleaned, maxsplit=1)[0]))
604
+ return deps
605
+
606
+
607
+ def _python_dependency_names_from_pyproject(content: str) -> set[str]:
608
+ try:
609
+ data = tomllib.loads(content)
610
+ except tomllib.TOMLDecodeError:
611
+ return set()
612
+ deps: set[str] = set()
613
+
614
+ def add_requirement(req: object) -> None:
615
+ if isinstance(req, str):
616
+ name = re.split(r"[<>=!~;\[]", req.strip(), maxsplit=1)[0]
617
+ if name:
618
+ deps.add(_normalize_dependency_name(name))
619
+
620
+ project = data.get("project", {})
621
+ if isinstance(project, dict):
622
+ for req in project.get("dependencies", []) if isinstance(project.get("dependencies"), list) else []:
623
+ add_requirement(req)
624
+ optional = project.get("optional-dependencies", {})
625
+ if isinstance(optional, dict):
626
+ for group in optional.values():
627
+ if isinstance(group, list):
628
+ for req in group:
629
+ add_requirement(req)
630
+
631
+ tool = data.get("tool", {})
632
+ if isinstance(tool, dict):
633
+ poetry = tool.get("poetry", {})
634
+ if isinstance(poetry, dict):
635
+ for section_name in ("dependencies", "dev-dependencies"):
636
+ section = poetry.get(section_name, {})
637
+ if isinstance(section, dict):
638
+ for dep in section:
639
+ if dep.lower() != "python":
640
+ deps.add(_normalize_dependency_name(dep))
641
+ group = poetry.get("group", {})
642
+ if isinstance(group, dict):
643
+ for group_data in group.values():
644
+ if isinstance(group_data, dict):
645
+ section = group_data.get("dependencies", {})
646
+ if isinstance(section, dict):
647
+ deps.update(_normalize_dependency_name(dep) for dep in section)
648
+ for tool_name in ("pdm", "uv"):
649
+ tool_data = tool.get(tool_name, {})
650
+ if isinstance(tool_data, dict):
651
+ for key in ("dev-dependencies", "dependency-groups"):
652
+ groups = tool_data.get(key, {})
653
+ if isinstance(groups, dict):
654
+ for group in groups.values():
655
+ if isinstance(group, list):
656
+ for req in group:
657
+ add_requirement(req)
658
+ dependency_groups = data.get("dependency-groups", {})
659
+ if isinstance(dependency_groups, dict):
660
+ for group in dependency_groups.values():
661
+ if isinstance(group, list):
662
+ for req in group:
663
+ add_requirement(req)
664
+ return deps
665
+
666
+
667
+ def _add_common_dependency(deps: set[str], name: str):
668
+ normalized = _normalize_dependency_name(name)
669
+ for dep in COMMON_DEPENDENCIES:
670
+ if normalized == _normalize_dependency_name(dep):
671
+ deps.add(dep.lower())
672
+
673
+
674
+ def dependency_inventory(manifest: dict, packet: Path) -> set[str]:
675
+ deps: set[str] = set()
676
+ contents = _packet_file_contents(packet)
677
+ for rec in manifest.get("included_files", []):
678
+ rel = rec.get("relative_path", "")
679
+ content = contents.get(rel, "")
680
+ name = Path(rel).name.lower()
681
+ suffix = Path(rel).suffix.lower()
682
+ if name == "pyproject.toml":
683
+ for dep in _python_dependency_names_from_pyproject(content):
684
+ _add_common_dependency(deps, dep)
685
+ elif name.startswith("requirements") and name.endswith(".txt"):
686
+ for dep in _python_dependency_names_from_requirement_lines(content):
687
+ _add_common_dependency(deps, dep)
688
+ elif name == "package.json":
689
+ try:
690
+ package = json.loads(content)
691
+ except json.JSONDecodeError:
692
+ package = {}
693
+ for section in ("dependencies", "devDependencies", "peerDependencies", "optionalDependencies"):
694
+ section_deps = package.get(section)
695
+ if isinstance(section_deps, dict):
696
+ for dep_name in section_deps:
697
+ _add_common_dependency(deps, dep_name)
698
+ elif suffix == ".py":
699
+ for imported in re.findall(r"(?m)^\s*(?:import|from)\s+([A-Za-z_][A-Za-z0-9_]*)", content):
700
+ _add_common_dependency(deps, imported)
701
+ elif suffix in {".js", ".jsx", ".ts", ".tsx"}:
702
+ for imported in re.findall(r"""(?:from\s+["']|import\s*\(\s*["']|require\s*\(\s*["'])(@?[A-Za-z0-9_.-]+)""", content):
703
+ _add_common_dependency(deps, _js_package_root(imported))
704
+ return deps
705
+
706
+
707
+ def _has_import(content: str, *modules: str) -> bool:
708
+ module_pattern = "|".join(re.escape(module) for module in modules)
709
+ return bool(re.search(rf"(?m)^\s*(?:import|from)\s+({module_pattern})(?:\b|[._])", content))
710
+
711
+
712
+ PDF_DEPENDENCIES = {"pypdf", "pdfplumber", "fitz", "pymupdf"}
713
+
714
+
715
+ def _declares_pdf_dependency(rel: str, content: str) -> bool:
716
+ name = Path(rel).name.lower()
717
+ if name == "pyproject.toml":
718
+ return any(dep in PDF_DEPENDENCIES for dep in _python_dependency_names_from_pyproject(content))
719
+ if name.startswith("requirements") and name.endswith(".txt"):
720
+ return any(dep in PDF_DEPENDENCIES for dep in _python_dependency_names_from_requirement_lines(content))
721
+ return False
722
+
723
+
724
+ def feature_inventory(manifest: dict, packet: Path, deps: set[str] | None = None) -> set[str]:
725
+ if deps is None:
726
+ deps = dependency_inventory(manifest, packet)
727
+ contents = _packet_file_contents(packet)
728
+ files = {rec.get("relative_path", "").replace("\\", "/") for rec in manifest.get("included_files", [])}
729
+ lower_files = {rel.lower() for rel in files}
730
+ features: set[str] = set()
731
+
732
+ if any(Path(rel).name.lower() in {"dockerfile", "docker-compose.yml", "compose.yaml", "compose.yml"} for rel in files):
733
+ features.add("docker")
734
+ if any(rel.endswith(("/pdf_parser.py", "pdf_parser.py")) for rel in lower_files):
735
+ features.add("pdf")
736
+ if any(_declares_pdf_dependency(rel, content) for rel, content in contents.items()):
737
+ features.add("pdf")
738
+ if "react" in deps or any(rel in {"frontend/app.tsx", "frontend/app.jsx"} for rel in lower_files):
739
+ features.add("react")
740
+ if deps & {"fastapi", "flask", "django"} or any(Path(rel).name.lower() in {"server.py", "app.py"} for rel in files):
741
+ features.add("web server")
742
+ if deps & {"sqlalchemy", "prisma"} or any("/migrations/" in f"/{rel}/" or Path(rel).name.lower() in {"schema.prisma", "schema.sql"} for rel in files):
743
+ features.add("database")
744
+ if any(part == "auth" or part.startswith("auth_") for rel in lower_files for part in Path(rel).parts):
745
+ features.add("authentication")
746
+
747
+ for rel, content in contents.items():
748
+ suffix = Path(rel).suffix.lower()
749
+ if suffix == ".py":
750
+ if _has_import(content, "pypdf", "pdfplumber", "fitz"):
751
+ features.add("pdf")
752
+ if _has_import(content, "fastapi", "flask", "django") or re.search(r"(?m)^\s*@\w+\.(?:route|get|post|put|patch|delete)\(", content):
753
+ features.add("web server")
754
+ if _has_import(content, "sqlalchemy", "prisma") or re.search(r"(?i)\b(sqlite|postgres(?:ql)?|mysql)://", content):
755
+ features.add("database")
756
+ if _has_import(content, "jwt", "oauthlib", "authlib") or re.search(r"(?i)@\w+\.(?:route|get|post)\([^)]*login", content):
757
+ features.add("authentication")
758
+ if _has_import(content, "pytesseract", "easyocr"):
759
+ features.add("ocr")
760
+ elif suffix in {".js", ".jsx", ".ts", ".tsx"}:
761
+ if re.search(r"""(?:from\s+["']react["']|require\s*\(\s*["']react["']|import\s+React\b)""", content):
762
+ features.add("react")
763
+ if re.search(r"(?i)\b(jwt|oauth|session|login)\b", content):
764
+ features.add("authentication")
765
+ elif Path(rel).name.lower() == "package.json":
766
+ if re.search(r'"react"\s*:', content):
767
+ features.add("react")
768
+ return features
769
+
770
+
771
+ PROTECTED_PACKET_ARTIFACTS = {"manifest.json", "receipt.json", "reality_map.json", "ai_instructions.md"}
772
+
773
+
774
+ def _normalize_inventory_path(value: object) -> str | None:
775
+ if not isinstance(value, str):
776
+ return None
777
+ rel, unsafe = _normalize_diff_path(value)
778
+ if unsafe or not rel:
779
+ return None
780
+ return rel
781
+
782
+
783
+ def _baseline_inventory_from_packet(packet: str | Path, manifest: dict | None = None) -> tuple[set[str], bool]:
784
+ """Return authoritative enforcement baseline paths when a packet has them.
785
+
786
+ Prompt context manifests may be selective, so diff enforcement must prefer the
787
+ baseline file inventory artifact when it exists. The boolean is True only
788
+ when a full inventory artifact was loaded successfully.
789
+ """
790
+ packet = Path(packet)
791
+ for name in ("file_inventory.json", "inventory.json", "baseline_inventory.json"):
792
+ path = packet / name
793
+ if not path.exists():
794
+ continue
795
+ try:
796
+ data = json.loads(path.read_text(encoding="utf-8"))
797
+ except (OSError, json.JSONDecodeError):
798
+ continue
799
+ raw_files = data.get("files") if isinstance(data, dict) else data
800
+ if not isinstance(raw_files, list):
801
+ continue
802
+ files: set[str] = set()
803
+ for item in raw_files:
804
+ raw_path = item.get("relative_path") if isinstance(item, dict) else item
805
+ rel = _normalize_inventory_path(raw_path)
806
+ if rel:
807
+ files.add(rel)
808
+ return files, True
809
+ return _included_paths(manifest or load_manifest(packet)), False
810
+
811
+
812
+ def known_files(manifest: dict, packet_path: str | Path | None = None) -> set[str]:
813
+ if packet_path is not None:
814
+ files, _ = _baseline_inventory_from_packet(packet_path, manifest)
815
+ return files
816
+ return _included_paths(manifest)
817
+
818
+
819
+ def supported_commands_inventory(reality_map: dict) -> set[str]:
820
+ return set(reality_map.get("supported_commands", []))
821
+
822
+
823
+ def docker_evidence(files: set[str]) -> dict[str, bool]:
824
+ names = {Path(f).name.lower() for f in files}
825
+ return {
826
+ "dockerfile": "dockerfile" in names,
827
+ "compose": bool(names & {"docker-compose.yml", "compose.yaml", "compose.yml"}),
828
+ }
829
+
830
+
831
+ def python_project_evidence(files: set[str], deps: set[str]) -> dict[str, bool]:
832
+ lower = {f.lower() for f in files}
833
+ return {
834
+ "python_project": "pyproject.toml" in lower or any(Path(f).name.lower().startswith("requirements") and f.endswith(".txt") for f in lower),
835
+ "tests": any(f == "tests" or f.startswith("tests/") for f in lower),
836
+ "pytest": "pytest" in deps,
837
+ }
838
+
839
+
840
+ def node_project_evidence(files: set[str], scripts: dict[str, str]) -> dict[str, bool]:
841
+ return {"package_json": "package.json" in {f.lower() for f in files}, "scripts": bool(scripts)}
842
+
843
+
844
+ def extract_js_import_specifiers_from_text(text: str) -> set[str]:
845
+ specifiers: set[str] = set()
846
+ patterns = [
847
+ r"""\bimport\s+(?:[^"'()]+?\s+from\s+)?["']([^"']+)["']""",
848
+ r"""\bexport\s+[^"']*?\s+from\s+["']([^"']+)["']""",
849
+ r"""\bimport\s*\(\s*["']([^"']+)["']\s*\)""",
850
+ r"""\brequire\s*\(\s*["']([^"']+)["']\s*\)""",
851
+ ]
852
+ for pattern in patterns:
853
+ specifiers.update(m.strip() for m in re.findall(pattern, text) if m.strip())
854
+ return {s.lower() for s in specifiers}
855
+
856
+
857
+ def extract_imports_from_text(text: str, suffix: str = ".py") -> set[str]:
858
+ imports: set[str] = set()
859
+ if suffix == ".py":
860
+ imports |= set(re.findall(r"(?m)^\s*(?:import|from)\s+([A-Za-z_][A-Za-z0-9_]*)", text))
861
+ elif suffix in JS_EXTS:
862
+ imports |= extract_js_import_specifiers_from_text(text)
863
+ return {i.lower() for i in imports}
864
+
865
+
866
+
867
+
868
+
869
+
870
+
871
+ def _materialize_packet_worktree(packet: Path, overlay: dict[str, str] | None = None) -> tempfile.TemporaryDirectory[str]:
872
+ tmp = tempfile.TemporaryDirectory(prefix="sourcepack-resolver-")
873
+ root = Path(tmp.name)
874
+ contents = _packet_file_contents(packet)
875
+ if overlay:
876
+ contents.update(overlay)
877
+ for rel, content in contents.items():
878
+ normalized, unsafe = _normalize_diff_path(rel)
879
+ if unsafe or not normalized:
880
+ continue
881
+ target = root / normalized
882
+ target.parent.mkdir(parents=True, exist_ok=True)
883
+ target.write_text(content, encoding="utf-8")
884
+ return tmp
885
+
886
+
887
+ def _dependency_additions_from_patch(changes: list[PatchFileChange]) -> set[str]:
888
+ return set()
889
+
890
+
891
+ def analyze_patch(packet_path: str | Path, patch_text: str, changes: list[PatchFileChange] | None = None) -> dict:
892
+ packet = Path(packet_path)
893
+ manifest = load_manifest(packet)
894
+ reality = json.loads((packet / "reality_map.json").read_text(encoding="utf-8")) if (packet / "reality_map.json").exists() else generate_reality_map(manifest, packet)
895
+ files, baseline_inventory_loaded = _baseline_inventory_from_packet(packet, manifest)
896
+ deps = dependency_inventory(manifest, packet)
897
+ scripts = _package_json_scripts(packet)
898
+ if changes is None:
899
+ changes = parse_unified_diff(patch_text)
900
+ patch_deps = _dependency_additions_from_patch(changes)
901
+ report = {
902
+ "patch_judgment_schema_version": "1.0",
903
+ "verdict": "PASS",
904
+ "modified_files": [], "missing_modified_files": [], "new_files": [], "deleted_files": [],
905
+ "unsupported_dependencies": [], "unsupported_commands": [], "protected_artifact_modifications": [], "git_path_modifications": [], "warnings": [],
906
+ }
907
+ if any(ch.unsafe_path for ch in changes):
908
+ report["path_escape"] = True
909
+ all_added = []
910
+ for ch in changes:
911
+ report["modified_files"].append(ch.path)
912
+ if ch.new_file:
913
+ report["new_files"].append(ch.path)
914
+ elif ch.operation in {"rename", "copy"}:
915
+ pass
916
+ elif ch.path not in files:
917
+ if baseline_inventory_loaded or ch.path in _included_paths(manifest):
918
+ report["missing_modified_files"].append(ch.path)
919
+ else:
920
+ report.setdefault("uncertain_modified_files", []).append(ch.path)
921
+ if ch.deleted_file:
922
+ report["deleted_files"].append(ch.path)
923
+ protected = ch.path.startswith(".sourcepack/")
924
+ git_internal = ch.path == ".git" or ch.path.startswith(".git/")
925
+ workflow = ch.path.startswith(".github/workflows/")
926
+ if protected:
927
+ report["protected_artifact_modifications"].append(ch.path)
928
+ if git_internal:
929
+ report.setdefault("git_path_modifications", []).append(ch.path)
930
+ if workflow:
931
+ report.setdefault("uncertainties", []).append({"id": "workflow_change", "message": f"{ch.path} changes repository automation and requires review", "path": ch.path, "evidence": ch.path})
932
+ if ch.operation in {"rename", "copy"}:
933
+ report.setdefault("uncertainties", []).append({"id": "unsupported_rename_copy", "message": f"{ch.operation} semantics for {ch.path} require review", "path": ch.path, "evidence": ch.old_path or ch.path})
934
+ added = "\n".join(ch.added_lines or [])
935
+ all_added.append(added)
936
+ for imported in extract_imports_from_text(added, Path(ch.path).suffix.lower()):
937
+ for dep in COMMON_DEPENDENCIES:
938
+ if _normalize_dependency_name(imported) == _normalize_dependency_name(dep) and dep not in deps and dep not in patch_deps:
939
+ report["unsupported_dependencies"].append(dep)
940
+ added_text = "\n".join(all_added)
941
+ supported = supported_commands_inventory(reality)
942
+ added_paths = {ch.path for ch in changes}
943
+ compose_added = any(Path(path).name.lower() in {"docker-compose.yml", "compose.yaml", "compose.yml"} for path in added_paths)
944
+ if re.search(r"docker\s+compose\s+up", added_text, re.I):
945
+ evidence = docker_evidence(files)
946
+ if compose_added:
947
+ report["warnings"].append("Patch adds Docker Compose support used by commands; review the new support.")
948
+ report.setdefault("declared_commands", []).append("docker compose up")
949
+ elif not evidence["compose"]:
950
+ report["unsupported_commands"].append("docker compose up")
951
+ patch_scripts = set()
952
+ command_uncertainties = []
953
+ for ch in changes:
954
+ if Path(ch.path).name.lower() != "package.json":
955
+ continue
956
+ base = _packet_file_contents(packet).get(ch.old_path or ch.path, "")
957
+ post = _apply_patch_change_to_text(base, ch)
958
+ if post is None:
959
+ command_uncertainties.append({"id": "command_manifest_uncertain", "message": f"Could not reconstruct {ch.path} safely", "path": ch.path})
960
+ continue
961
+ try:
962
+ package = json.loads(post)
963
+ except json.JSONDecodeError:
964
+ command_uncertainties.append({"id": "command_manifest_uncertain", "message": f"Could not parse {ch.path} as JSON", "path": ch.path})
965
+ continue
966
+ package_scripts = package.get("scripts")
967
+ if isinstance(package_scripts, dict):
968
+ patch_scripts.update(str(script) for script in package_scripts if isinstance(script, str) and script not in scripts)
969
+ if command_uncertainties:
970
+ report.setdefault("uncertainties", []).extend(command_uncertainties)
971
+ for cmd in sorted(set(re.findall(r"npm\s+(?:run\s+)?[A-Za-z0-9:_-]+", added_text))):
972
+ normalized = cmd if cmd == "npm test" else cmd
973
+ if normalized.startswith("npm run "):
974
+ script = normalized.removeprefix("npm run ").strip()
975
+ if script in patch_scripts:
976
+ report["warnings"].append(f"Patch adds npm script {script} used by commands; review the new support.")
977
+ report.setdefault("declared_commands", []).append(normalized)
978
+ elif script not in scripts:
979
+ report["unsupported_commands"].append(normalized)
980
+ elif normalized == "npm test" and "test" not in scripts:
981
+ report["unsupported_commands"].append(normalized)
982
+ if re.search(r"\b(pytest|python\s+-m\s+pytest)\b", added_text, re.I):
983
+ py = python_project_evidence(files, deps)
984
+ if not (py["pytest"] or py["tests"] or "pytest" in supported):
985
+ report["unsupported_commands"].append("pytest")
986
+ packet_contents = _packet_file_contents(packet)
987
+ make_text = packet_contents.get("Makefile") or packet_contents.get("makefile") or ""
988
+ make_targets = {m.group(1) for m in re.finditer(r"^([A-Za-z0-9_.:-]+)\s*:", make_text, re.M)}
989
+ for cmd in sorted(set(re.findall(r"\bmake\s+[A-Za-z0-9_.:-]+", added_text))):
990
+ target = cmd.split(None, 1)[1]
991
+ if target not in make_targets:
992
+ report["unsupported_commands"].append(cmd)
993
+ if not baseline_inventory_loaded:
994
+ outside_context = sorted({
995
+ ch.path for ch in changes
996
+ if not ch.new_file
997
+ and not ch.deleted_file
998
+ and ch.path not in _included_paths(manifest)
999
+ })
1000
+ if outside_context:
1001
+ report.setdefault("uncertainties", []).append({"id": "baseline_inventory_missing", "message": "Baseline packet lacks full file inventory; modified files outside prompt context could not be checked against tracked repo inventory.", "evidence": ", ".join(outside_context)})
1002
+ if report["new_files"]:
1003
+ report["warnings"].append("Patch creates new files that were not part of the original packet reality.")
1004
+ fail_keys = ["missing_modified_files", "unsupported_dependencies", "unsupported_commands", "protected_artifact_modifications", "git_path_modifications", "path_escape"]
1005
+ if any(report.get(k) for k in fail_keys):
1006
+ report["verdict"] = "FAIL"
1007
+ elif report["new_files"] or report["warnings"] or report.get("uncertainties"):
1008
+ report["verdict"] = "WARN"
1009
+ for key in ["modified_files", "missing_modified_files", "new_files", "deleted_files", "unsupported_dependencies", "unsupported_commands", "protected_artifact_modifications", "git_path_modifications", "warnings"]:
1010
+ report[key] = sorted(set(report[key]))
1011
+ return report
1012
+
1013
+
1014
+
1015
+ def _has_negation_before(text: str, start: int) -> bool:
1016
+ window = text[max(0, start - 48):start].lower()
1017
+ return bool(re.search(r"\b(do not|don't|avoid|not|no|without|unless|until|does not|is no|will not)\b", window))
1018
+
1019
+
1020
+ def _ai_dependency_actions(text: str, dep: str) -> bool:
1021
+ dep_pat = re.escape(dep)
1022
+ aliases = [dep_pat]
1023
+ for imported, package in PY_IMPORT_ALIASES.items():
1024
+ if package == _normalize_dependency_name(dep):
1025
+ aliases.append(re.escape(imported))
1026
+ alias_pat = "(?:" + "|".join(sorted(set(aliases), key=len, reverse=True)) + ")"
1027
+ patterns = [
1028
+ rf"\bimport\s+{alias_pat}\b",
1029
+ rf"\bfrom\s+{alias_pat}\s+import\b",
1030
+ rf"\b(?:pip install|python\s+-m\s+pip\s+install|poetry add|uv add|pdm add|add|use|install|import)\s+{dep_pat}\b",
1031
+ ]
1032
+ for pattern in patterns:
1033
+ for m in re.finditer(pattern, text, re.I):
1034
+ if not _has_negation_before(text, m.start()):
1035
+ return True
1036
+ return False
1037
+
1038
+
1039
+ def _ai_js_dependency_actions(text: str, dep: str) -> bool:
1040
+ dep_pat = re.escape(dep)
1041
+ patterns = [
1042
+ rf"\bimport\s+[^\n;]*?from\s+[`'\"]{dep_pat}(?:/[^`'\"]*)?[`'\"]",
1043
+ rf"\brequire\s*\(\s*[`'\"]{dep_pat}(?:/[^`'\"]*)?[`'\"]\s*\)",
1044
+ rf"\b(?:npm install|npm i|pnpm add|yarn add|add|use|install|import)\s+{dep_pat}\b",
1045
+ ]
1046
+ for pattern in patterns:
1047
+ for m in re.finditer(pattern, text, re.I):
1048
+ if not _has_negation_before(text, m.start()):
1049
+ return True
1050
+ return False
1051
+
1052
+
1053
+ def _ai_command_instructions(text: str, command_pattern: str) -> list[str]:
1054
+ found = []
1055
+ for m in re.finditer(command_pattern, text, re.I):
1056
+ before = text[max(0, m.start() - 32):m.start()].lower()
1057
+ line_start = text.rfind("\n", 0, m.start()) + 1
1058
+ line_prefix = text[line_start:m.start()].strip().lower()
1059
+ backticked = m.start() > 0 and m.end() < len(text) and text[m.start() - 1] == "`" and text[m.end()] == "`"
1060
+ instruction = bool(re.search(r"\b(run|then|execute|use|uses|start with)\s+$", before)) or line_prefix in {"-", "*", "1.", "2.", "3."} or backticked
1061
+ if instruction and not _has_negation_before(text, m.start()):
1062
+ found.append(re.sub(r"\s+", " ", m.group(0).strip()).lower())
1063
+ return found
1064
+
1065
+
1066
+
1067
+
1068
+ LIGHT_BY_VERDICT = {"PASS": "GREEN LIGHT", "WARN": "YELLOW LIGHT", "FAIL": "RED LIGHT"}
1069
+ SEVERITY_ORDER = {"error": 0, "warn": 1, "info": 2}
1070
+ PY_STDLIB = set(getattr(sys, "stdlib_module_names", set())) | {"typing", "pathlib", "json", "os", "sys", "re", "subprocess", "datetime", "unittest"}
1071
+ PY_DEP_FILES = {"requirements.txt", "pyproject.toml", "setup.py", "setup.cfg"}
1072
+ JS_EXTS = {".js", ".jsx", ".ts", ".tsx"}
1073
+
1074
+
1075
+
1076
+ def _latest_report_html_path(repo: str | Path) -> Path:
1077
+ return ensure_sourcepack_dirs(repo)["latest_html"]
1078
+
1079
+
1080
+
1081
+
1082
+ def finalize_diff_report(repo: str | Path | None, report: dict, args, stem: str = "diff") -> dict:
1083
+ full = dict(report)
1084
+ if getattr(args, "ci", False):
1085
+ full["ci"] = True
1086
+ if repo is not None:
1087
+ try:
1088
+ write_user_report(repo, full, stem)
1089
+ except Exception:
1090
+ full.setdefault("warnings", []).append("report_artifact_write_failed")
1091
+ return full
1092
+
1093
+
1094
+ def git_metadata(repo: str | Path) -> dict:
1095
+ root = Path(repo)
1096
+ head = run_git(root, ["rev-parse", "HEAD"])
1097
+ branch = run_git(root, ["rev-parse", "--abbrev-ref", "HEAD"])
1098
+ dirty, dirty_state = git_worktree_dirty(root)
1099
+ return {
1100
+ "branch": branch.stdout.strip() if branch.returncode == 0 else None,
1101
+ "head_commit": head.stdout.strip() if head.returncode == 0 else None,
1102
+ "dirty": dirty if dirty_state is None else None,
1103
+ "dirty_state": dirty_state,
1104
+ }
1105
+
1106
+
1107
+ def scanner_config_hash() -> str:
1108
+ payload = {
1109
+ "ignored_dirs": sorted(DEFAULT_IGNORED_DIRS),
1110
+ "ignored_patterns": sorted(DEFAULT_IGNORED_PATTERNS),
1111
+ "text_extensions": sorted(DEFAULT_TEXT_EXTENSIONS),
1112
+ "max_file_size": 1_000_000,
1113
+ "include_hidden": False,
1114
+ "redact": True,
1115
+ }
1116
+ return sha256_text(json.dumps(payload, sort_keys=True))
1117
+
1118
+
1119
+
1120
+ def build_prompt_context(repo: str | Path) -> dict:
1121
+ paths = ensure_sourcepack_dirs(repo)
1122
+ PacketWriter(paths["prompt_packet"], SourceScanner(repo).scan(), force=True).write_all()
1123
+ shutil.copy2(paths["prompt_packet"] / "reality_map.json", paths["prompt_reality"])
1124
+ shutil.copy2(paths["prompt_packet"] / "ai_instructions.md", paths["prompt_instructions"])
1125
+ return paths
1126
+
1127
+
1128
+ def render_prompt(task: str, instructions: str, reality: dict) -> str:
1129
+ def bullets(items):
1130
+ return "\n".join(f"- {item}" for item in items) if items else "- None detected"
1131
+ return "\n".join(["# SourcePack Verified AI Prompt", "", "## User Task", "", task, "", "## AI Grounding Instructions", "", instructions.rstrip(), "", "## Compact Reality Map Summary", "", f"Project types: {', '.join(reality.get('project_types') or ['unknown'])}", f"Included files: {reality.get('included_file_count', 0)}", "", "## Supported Commands", "", bullets(reality.get('supported_commands', [])), "", "## Detected Dependencies", "", bullets(reality.get('detected_dependencies', [])), "", "## Supported Capabilities", "", bullets(reality.get('supported_capabilities', [])), "", "## Unknown and Unsupported Boundaries", "", bullets(reality.get('claim_boundaries', [])), "", "Cite exact file paths for project-specific claims.", "Do not invent files, dependencies, commands, services, or capabilities.", "Absence of evidence means unknown, not impossible.", ""])
1132
+
1133
+
1134
+ def copy_to_clipboard(text: str) -> bool:
1135
+ system = platform.system().lower()
1136
+ cmds = [["pbcopy"]] if system == "darwin" else [["clip"]] if system == "windows" else [["wl-copy"], ["xclip", "-selection", "clipboard"], ["xsel", "--clipboard", "--input"]]
1137
+ for cmd in cmds:
1138
+ if shutil.which(cmd[0]) is None:
1139
+ continue
1140
+ try:
1141
+ if subprocess.run(cmd, input=text, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=5).returncode == 0:
1142
+ return True
1143
+ except Exception:
1144
+ pass
1145
+ return False
1146
+
1147
+
1148
+ def _is_local_python_import(name: str, path: str, files: set[str]) -> bool:
1149
+ candidates = {f"{name}.py", f"{name}/__init__.py", f"src/{name}.py", f"src/{name}/__init__.py"}
1150
+ parent = str(Path(path).parent).replace("\\", "/")
1151
+ if parent != ".":
1152
+ candidates |= {f"{parent}/{name}.py", f"{parent}/{name}/__init__.py"}
1153
+ return bool(candidates & files)
1154
+
1155
+
1156
+ JS_DEP_SECTIONS = {"dependencies", "devDependencies", "peerDependencies", "optionalDependencies"}
1157
+
1158
+
1159
+ def _package_json_declared_deps_from_added_lines(lines: list[str]) -> set[str]:
1160
+ added = "\n".join(lines)
1161
+ try:
1162
+ package = json.loads(added)
1163
+ except json.JSONDecodeError:
1164
+ package = None
1165
+ deps: set[str] = set()
1166
+ if isinstance(package, dict):
1167
+ for section in JS_DEP_SECTIONS:
1168
+ section_deps = package.get(section)
1169
+ if isinstance(section_deps, dict):
1170
+ deps.update(dep.lower() for dep in section_deps)
1171
+ if deps:
1172
+ return deps
1173
+ for section in JS_DEP_SECTIONS:
1174
+ for body in re.findall(rf'"{section}"\s*:\s*\{{(.*?)\}}', added, re.I | re.S):
1175
+ deps.update(m.lower() for m in re.findall(r'"(@?[A-Za-z0-9_.-]+(?:/[A-Za-z0-9_.-]+)?)"\s*:', body))
1176
+ return deps
1177
+
1178
+
1179
+ def _apply_patch_change_to_text(original: str, change: PatchFileChange) -> str | None:
1180
+ if change.deleted_file:
1181
+ return ""
1182
+ result = original.splitlines()
1183
+ if result and result[0] == "":
1184
+ result = result[1:]
1185
+ out: list[str] = []
1186
+ idx = 0
1187
+ saw_hunk = False
1188
+ for line in change.diff_lines or []:
1189
+ if line.startswith("@@"):
1190
+ m = re.match(r"@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@", line)
1191
+ if not m:
1192
+ return None
1193
+ old_start = max(int(m.group(1)) - 1, 0)
1194
+ if old_start < idx or old_start > len(result):
1195
+ return None
1196
+ out.extend(result[idx:old_start])
1197
+ idx = old_start
1198
+ saw_hunk = True
1199
+ elif line.startswith(" "):
1200
+ body = line[1:]
1201
+ if idx >= len(result) or result[idx] != body:
1202
+ return None
1203
+ out.append(result[idx])
1204
+ idx += 1
1205
+ elif line.startswith("-"):
1206
+ body = line[1:]
1207
+ if idx >= len(result) or result[idx] != body:
1208
+ return None
1209
+ idx += 1
1210
+ elif line.startswith("+"):
1211
+ out.append(line[1:])
1212
+ if not saw_hunk and not change.new_file:
1213
+ return None
1214
+ out.extend(result[idx:])
1215
+ return "\n".join(out) + ("\n" if original.endswith("\n") or change.new_file else "")
1216
+
1217
+
1218
+ def _python_dependency_names_by_scope_from_pyproject(content: str) -> dict[str, set[str]]:
1219
+ scopes = {"runtime": set(), "dev": set(), "optional": set()}
1220
+ try:
1221
+ data = tomllib.loads(content)
1222
+ except tomllib.TOMLDecodeError:
1223
+ return scopes
1224
+
1225
+ def add_req(target: set[str], req: object) -> None:
1226
+ if isinstance(req, str):
1227
+ name = re.split(r"[<>=!~;\[]", req.strip(), maxsplit=1)[0]
1228
+ if name:
1229
+ target.add(_normalize_dependency_name(name))
1230
+
1231
+ project = data.get("project", {})
1232
+ if isinstance(project, dict):
1233
+ for req in project.get("dependencies", []) if isinstance(project.get("dependencies"), list) else []:
1234
+ add_req(scopes["runtime"], req)
1235
+ optional = project.get("optional-dependencies", {})
1236
+ if isinstance(optional, dict):
1237
+ for group in optional.values():
1238
+ if isinstance(group, list):
1239
+ for req in group:
1240
+ add_req(scopes["optional"], req)
1241
+ tool = data.get("tool", {})
1242
+ if isinstance(tool, dict):
1243
+ poetry = tool.get("poetry", {})
1244
+ if isinstance(poetry, dict):
1245
+ section = poetry.get("dependencies", {})
1246
+ if isinstance(section, dict):
1247
+ for dep in section:
1248
+ if dep.lower() != "python":
1249
+ scopes["runtime"].add(_normalize_dependency_name(dep))
1250
+ for section_name in ("dev-dependencies",):
1251
+ section = poetry.get(section_name, {})
1252
+ if isinstance(section, dict):
1253
+ scopes["dev"].update(_normalize_dependency_name(dep) for dep in section)
1254
+ group = poetry.get("group", {})
1255
+ if isinstance(group, dict):
1256
+ for group_data in group.values():
1257
+ if isinstance(group_data, dict):
1258
+ section = group_data.get("dependencies", {})
1259
+ if isinstance(section, dict):
1260
+ scopes["dev"].update(_normalize_dependency_name(dep) for dep in section)
1261
+ for tool_name in ("pdm", "uv"):
1262
+ tool_data = tool.get(tool_name, {})
1263
+ if isinstance(tool_data, dict):
1264
+ for key in ("dev-dependencies", "dependency-groups"):
1265
+ groups = tool_data.get(key, {})
1266
+ if isinstance(groups, dict):
1267
+ for group in groups.values():
1268
+ if isinstance(group, list):
1269
+ for req in group:
1270
+ add_req(scopes["dev"], req)
1271
+ dependency_groups = data.get("dependency-groups", {})
1272
+ if isinstance(dependency_groups, dict):
1273
+ for group in dependency_groups.values():
1274
+ if isinstance(group, list):
1275
+ for req in group:
1276
+ add_req(scopes["dev"], req)
1277
+ return scopes
1278
+
1279
+
1280
+ def _declared_dependency_scopes_by_ecosystem(manifest: dict, packet: Path) -> dict[str, dict[str, set[str]]]:
1281
+ contents = _packet_file_contents(packet)
1282
+ scopes = {"python": {"runtime": set(), "dev": set(), "optional": set()}, "js": {"runtime": set(), "dev": set(), "optional": set()}}
1283
+ for rel, content in contents.items():
1284
+ name = Path(rel).name.lower()
1285
+ if name == "pyproject.toml":
1286
+ parsed = _python_dependency_names_by_scope_from_pyproject(content)
1287
+ for key, values in parsed.items():
1288
+ scopes["python"][key].update(values)
1289
+ elif name == "requirements.txt":
1290
+ scopes["python"]["runtime"].update(_python_dependency_names_from_requirement_lines(content))
1291
+ elif name.startswith("requirements") and name.endswith(".txt"):
1292
+ target = "dev" if any(x in name for x in ("dev", "test")) else "runtime"
1293
+ scopes["python"][target].update(_python_dependency_names_from_requirement_lines(content))
1294
+ elif name == "package.json":
1295
+ try:
1296
+ package = json.loads(content)
1297
+ except json.JSONDecodeError:
1298
+ package = {}
1299
+ section_map = {"dependencies": "runtime", "peerDependencies": "runtime", "optionalDependencies": "optional", "devDependencies": "dev"}
1300
+ for section, target in section_map.items():
1301
+ section_deps = package.get(section)
1302
+ if isinstance(section_deps, dict):
1303
+ scopes["js"][target].update(dep.lower() for dep in section_deps)
1304
+ return scopes
1305
+
1306
+
1307
+ def _is_test_path(path: str) -> bool:
1308
+ p = path.replace("\\", "/").lower()
1309
+ name = PurePosixPath(p).name
1310
+ return p.startswith(("tests/", "test/")) or "/__tests__/" in f"/{p}" or name.endswith("_test.py") or any(name.endswith(s) for s in (".test.js", ".test.ts", ".spec.js", ".spec.ts", ".test.jsx", ".test.tsx", ".spec.jsx", ".spec.tsx"))
1311
+
1312
+
1313
+ def _dependency_scope_status(dep: str, scopes: dict[str, set[str]], path: str) -> str:
1314
+ dep = _normalize_dependency_name(dep)
1315
+ if dep in scopes.get("runtime", set()):
1316
+ return "supported"
1317
+ if dep in scopes.get("dev", set()):
1318
+ return "supported" if _is_test_path(path) else "scope_review"
1319
+ if dep in scopes.get("optional", set()):
1320
+ return "scope_review"
1321
+ return "missing"
1322
+
1323
+
1324
+ def _declared_dependency_names_from_patch_by_ecosystem_structural(changes: list[PatchFileChange], contents: dict[str, str]) -> tuple[dict[str, set[str]], list[dict]]:
1325
+ deps = {"python": set(), "js": set()}
1326
+ uncertainties: list[dict] = []
1327
+ for ch in changes:
1328
+ name = Path(ch.path).name.lower()
1329
+ if name not in {"package.json", "pyproject.toml"} and not (name.startswith("requirements") and name.endswith(".txt")):
1330
+ continue
1331
+ base = contents.get(ch.old_path or ch.path, "")
1332
+ post = _apply_patch_change_to_text(base, ch)
1333
+ if post is None:
1334
+ uncertainties.append({"id": "dependency_manifest_uncertain", "message": f"Could not reconstruct {ch.path} safely", "path": ch.path})
1335
+ continue
1336
+ if name == "package.json":
1337
+ try:
1338
+ package = json.loads(post)
1339
+ except json.JSONDecodeError:
1340
+ uncertainties.append({"id": "dependency_manifest_uncertain", "message": f"Could not parse {ch.path} as JSON", "path": ch.path})
1341
+ continue
1342
+ for section in JS_DEP_SECTIONS:
1343
+ section_deps = package.get(section)
1344
+ if isinstance(section_deps, dict):
1345
+ deps["js"].update(dep.lower() for dep in section_deps)
1346
+ elif name == "pyproject.toml":
1347
+ parsed = _python_dependency_names_by_scope_from_pyproject(post)
1348
+ deps["python"].update(set().union(*parsed.values()))
1349
+ else:
1350
+ deps["python"].update(_python_dependency_names_from_requirement_lines(post))
1351
+ return deps, uncertainties
1352
+
1353
+
1354
+ def _declared_dependency_names_from_patch_by_ecosystem(changes: list[PatchFileChange]) -> dict[str, set[str]]:
1355
+ deps = {"python": set(), "js": set()}
1356
+ for ch in changes:
1357
+ added = "\n".join(ch.added_lines or [])
1358
+ name = Path(ch.path).name.lower()
1359
+ if name == "package.json":
1360
+ deps["js"].update(_package_json_declared_deps_from_added_lines(ch.added_lines or []))
1361
+ elif name == "pyproject.toml":
1362
+ deps["python"].update(_python_dependency_names_from_pyproject(added))
1363
+ elif name.startswith("requirements") and name.endswith(".txt"):
1364
+ deps["python"].update(_python_dependency_names_from_requirement_lines(added))
1365
+ return deps
1366
+
1367
+
1368
+ def _declared_dependency_names_from_patch(changes: list[PatchFileChange]) -> set[str]:
1369
+ scoped = _declared_dependency_names_from_patch_by_ecosystem(changes)
1370
+ return scoped["python"] | scoped["js"]
1371
+
1372
+
1373
+ def _declared_dependency_names_by_ecosystem(manifest: dict, packet: Path) -> dict[str, set[str]]:
1374
+ declared = {"python": set(), "js": set()}
1375
+ contents = _packet_file_contents(packet)
1376
+ for rec in manifest.get("included_files", []):
1377
+ rel = rec.get("relative_path", "")
1378
+ content = contents.get(rel, "")
1379
+ name = Path(rel).name.lower()
1380
+ if name == "pyproject.toml":
1381
+ declared["python"].update(_python_dependency_names_from_pyproject(content))
1382
+ elif name.startswith("requirements") and name.endswith(".txt"):
1383
+ declared["python"].update(_python_dependency_names_from_requirement_lines(content))
1384
+ elif name == "package.json":
1385
+ try:
1386
+ package = json.loads(content)
1387
+ except json.JSONDecodeError:
1388
+ package = {}
1389
+ for section in JS_DEP_SECTIONS:
1390
+ section_deps = package.get(section)
1391
+ if isinstance(section_deps, dict):
1392
+ declared["js"].update(dep.lower() for dep in section_deps)
1393
+ return declared
1394
+
1395
+
1396
+ def _declared_dependency_names(manifest: dict, packet: Path) -> set[str]:
1397
+ scoped = _declared_dependency_names_by_ecosystem(manifest, packet)
1398
+ return scoped["python"] | scoped["js"]
1399
+
1400
+
1401
+ def _workspace_package_names(packet: Path) -> set[str]:
1402
+ contents = _packet_file_contents(packet)
1403
+ root = {}
1404
+ try:
1405
+ root = json.loads(contents.get("package.json", "{}"))
1406
+ except json.JSONDecodeError:
1407
+ return set()
1408
+ workspaces = root.get("workspaces")
1409
+ patterns = workspaces if isinstance(workspaces, list) else workspaces.get("packages", []) if isinstance(workspaces, dict) else []
1410
+ names: set[str] = set()
1411
+ for pattern in patterns:
1412
+ if not isinstance(pattern, str) or not pattern.endswith("/*"):
1413
+ continue
1414
+ prefix = pattern[:-2].strip("/")
1415
+ for rel, content in contents.items():
1416
+ if Path(rel).name == "package.json" and rel.startswith(prefix + "/"):
1417
+ try:
1418
+ package = json.loads(content)
1419
+ except json.JSONDecodeError:
1420
+ continue
1421
+ name = package.get("name")
1422
+ if isinstance(name, str):
1423
+ names.add(name.lower())
1424
+ return names
1425
+
1426
+
1427
+ def _is_js_alias_specifier(imported: str) -> bool:
1428
+ return imported.startswith(("@/", "~/"))
1429
+
1430
+
1431
+ def _js_alias_local(imported: str, files: set[str], contents: dict[str, str]) -> bool | None:
1432
+ configs = []
1433
+ for cfg in ("tsconfig.json", "jsconfig.json"):
1434
+ if cfg in contents:
1435
+ try:
1436
+ configs.append(json.loads(contents[cfg]))
1437
+ except json.JSONDecodeError:
1438
+ return None
1439
+ for cfg in configs:
1440
+ opts = cfg.get("compilerOptions", {}) if isinstance(cfg, dict) else {}
1441
+ base = str(opts.get("baseUrl", ".")).strip("./")
1442
+ paths = opts.get("paths", {})
1443
+ candidates = []
1444
+ if isinstance(paths, dict):
1445
+ for alias, targets in paths.items():
1446
+ prefix = alias[:-1] if alias.endswith("*") else alias
1447
+ if imported.startswith(prefix):
1448
+ rest = imported[len(prefix):]
1449
+ for target in targets if isinstance(targets, list) else []:
1450
+ tprefix = target[:-1] if isinstance(target, str) and target.endswith("*") else target
1451
+ candidates.append((tprefix + rest).strip("/"))
1452
+ if base and not imported.startswith("@") and not imported.startswith("~"):
1453
+ candidates.append(f"{base}/{imported}".strip("/"))
1454
+ for c in candidates:
1455
+ variants = {c, f"{c}.ts", f"{c}.tsx", f"{c}.js", f"{c}.jsx", f"{c}/index.ts", f"{c}/index.tsx", f"{c}/index.js", f"{c}/index.jsx"}
1456
+ if variants & files:
1457
+ return True
1458
+ if candidates:
1459
+ return None
1460
+ return False
1461
+
1462
+
1463
+ def _is_high_risk_binary_path(rel: str) -> bool:
1464
+ normalized = rel.replace("\\", "/").lstrip("/")
1465
+ high_risk_prefixes = (".sourcepack/", ".git/", ".github/workflows/")
1466
+ high_risk_names = {"pyproject.toml", "package.json", "package-lock.json", "uv.lock", "poetry.lock"}
1467
+ return normalized.startswith(high_risk_prefixes) or Path(normalized).name in high_risk_names
1468
+
1469
+
1470
+ UNSUPPORTED_ECOSYSTEM_MARKERS = {
1471
+ "gemfile": ("Gemfile", "Ruby/Bundler dependency validation is not implemented"),
1472
+ "composer.json": ("composer.json", "PHP/Composer dependency validation is not implemented"),
1473
+ "main.tf": ("main.tf", "Terraform module/provider validation is not implemented"),
1474
+ "flake.nix": ("flake.nix", "Nix flake validation is not implemented"),
1475
+ "cargo.toml": ("Cargo.toml", "Rust dependency validation is not implemented"),
1476
+ "go.mod": ("go.mod", "Go module dependency validation is not implemented"),
1477
+ "pom.xml": ("pom.xml", "Maven dependency validation is not implemented"),
1478
+ "build.gradle": ("build.gradle", "Gradle dependency validation is not implemented"),
1479
+ "build.gradle.kts": ("build.gradle.kts", "Gradle dependency validation is not implemented"),
1480
+ "settings.gradle": ("settings.gradle", "Gradle workspace validation is not implemented"),
1481
+ "settings.gradle.kts": ("settings.gradle.kts", "Gradle workspace validation is not implemented"),
1482
+ "*.csproj": ("*.csproj", ".NET/NuGet dependency validation is not implemented"),
1483
+ }
1484
+
1485
+
1486
+ def _unsupported_ecosystem_uncertainties(files: set[str], changes: list[PatchFileChange]) -> list[dict]:
1487
+ names = {Path(f).name.lower() for f in files}
1488
+ names.update(Path(ch.path).name.lower() for ch in changes)
1489
+ for ch in changes:
1490
+ if ch.path.lower().endswith(".csproj"):
1491
+ names.add("*.csproj")
1492
+ uncertainties = []
1493
+ for marker, (evidence, message) in sorted(UNSUPPORTED_ECOSYSTEM_MARKERS.items()):
1494
+ if marker in names:
1495
+ uncertainties.append({"id": "unsupported_ecosystem", "message": f"{evidence} detected, but {message}", "evidence": evidence})
1496
+ return uncertainties
1497
+
1498
+ def judge_patch_text(packet_path: str | Path, patch_text: str) -> dict:
1499
+ if re.search(r"(?m)^@@", patch_text) and "diff --git " not in patch_text:
1500
+ return {"verdict": "FAIL", "modified_files": [], "missing_modified_files": [], "new_files": [], "deleted_files": [], "unsupported_dependencies": [], "unsupported_commands": [], "protected_artifact_modifications": [], "warnings": [], "malformed_diff": True}
1501
+ if re.search(r"(?m)^@@(?! -\d+(?:,\d+)? \+\d+(?:,\d+)? @@)", patch_text):
1502
+ return {"verdict": "FAIL", "modified_files": [], "missing_modified_files": [], "new_files": [], "deleted_files": [], "unsupported_dependencies": [], "unsupported_commands": [], "protected_artifact_modifications": [], "warnings": [], "malformed_diff": True}
1503
+ changes = parse_unified_diff(patch_text)
1504
+ unsafe_paths = sorted({ch.path for ch in changes if ch.unsafe_path and ch.path})
1505
+ if any(ch.unsafe_path for ch in changes):
1506
+ return {"verdict": "FAIL", "modified_files": [], "missing_modified_files": [], "new_files": [], "deleted_files": [], "unsupported_dependencies": [], "unsupported_commands": [], "protected_artifact_modifications": [], "warnings": [], "path_escape": True, "path_escape_paths": unsafe_paths}
1507
+ if patch_text.strip() and not changes and "Binary files " not in patch_text:
1508
+ return {"verdict": "FAIL", "modified_files": [], "missing_modified_files": [], "new_files": [], "deleted_files": [], "unsupported_dependencies": [], "unsupported_commands": [], "protected_artifact_modifications": [], "warnings": [], "malformed_diff": True}
1509
+ report = analyze_patch(packet_path, patch_text, changes)
1510
+ packet = Path(packet_path); manifest = load_manifest(packet); files = known_files(manifest, packet); contents = _packet_file_contents(packet)
1511
+ existing_declared = _declared_dependency_names_by_ecosystem(manifest, packet)
1512
+ scopes = _declared_dependency_scopes_by_ecosystem(manifest, packet)
1513
+ patch_declared, manifest_uncertainties = _declared_dependency_names_from_patch_by_ecosystem_structural(changes, contents)
1514
+ if manifest_uncertainties:
1515
+ report.setdefault("uncertainties", []).extend(manifest_uncertainties)
1516
+ workspace_names = _workspace_package_names(packet)
1517
+ unsupported = set(report.get("unsupported_dependencies", []))
1518
+ resolver_tmp = _materialize_packet_worktree(packet)
1519
+ resolver_root = Path(resolver_tmp.name)
1520
+ try:
1521
+ for ch in changes:
1522
+ suffix = Path(ch.path).suffix.lower(); added = "\n".join(ch.added_lines or [])
1523
+ if suffix == ".py":
1524
+ for imported in extract_imports_from_text(added, suffix):
1525
+ dep_resolution = resolve_python_import(resolver_root, imported, added_dependencies=patch_declared["python"])
1526
+ dep_name = _dependency_name_for_import(imported)
1527
+ if dep_resolution.verdict == "PASS":
1528
+ unsupported.discard(imported); unsupported.discard(dep_name)
1529
+ elif dep_resolution.reason_code == "declared_dependency":
1530
+ unsupported.discard(imported); unsupported.discard(dep_name)
1531
+ report.setdefault("uncertainties", []).append({"id": "declared_dependency", "message": f"{dep_name} is declared in the same patch and requires review", "path": ch.path, "evidence": dep_name})
1532
+ elif dep_resolution.reason_code == "dependency_scope_review":
1533
+ report.setdefault("uncertainties", []).append({"id": "dependency_scope_review", "message": f"{dep_name} is declared outside the runtime dependency scope", "path": ch.path, "evidence": dep_name})
1534
+ elif dep_resolution.reason_code == "unsupported_dependency":
1535
+ unsupported.add(imported)
1536
+ elif suffix in JS_EXTS:
1537
+ for imported in extract_imports_from_text(added, suffix):
1538
+ pkg = _js_package_root(imported)
1539
+ local_alias = _js_alias_local(imported, files, contents)
1540
+ if pkg in workspace_names or local_alias is True:
1541
+ continue
1542
+ dep_resolution = resolve_js_import(resolver_root, imported)
1543
+ if dep_resolution.verdict == "PASS":
1544
+ unsupported.discard(pkg)
1545
+ elif dep_resolution.reason_code == "js_alias_uncertain":
1546
+ report.setdefault("uncertainties", []).append({"id": "js_alias_uncertain", "message": f"{imported} could not be resolved safely", "path": ch.path, "evidence": imported})
1547
+ elif dep_resolution.reason_code == "dependency_scope_review":
1548
+ report.setdefault("uncertainties", []).append({"id": "dependency_scope_review", "message": f"{pkg} is declared outside the runtime dependency scope", "path": ch.path, "evidence": pkg})
1549
+ elif dep_resolution.reason_code == "unsupported_dependency" and pkg not in patch_declared["js"]:
1550
+ unsupported.add(pkg)
1551
+ finally:
1552
+ resolver_tmp.cleanup()
1553
+
1554
+ # Re-run command claims through the command resolver so report output is
1555
+ # based on the same manifest-aware command semantics as unit-level checks.
1556
+ command_overlay: dict[str, str] = {}
1557
+ for ch in changes:
1558
+ if Path(ch.path).name.lower() in {"package.json", "Makefile", "justfile", "Justfile", "Taskfile.yml", "Taskfile.yaml", "tox.ini", "noxfile.py", "compose.yml", "compose.yaml", "docker-compose.yml", "docker-compose.yaml"}:
1559
+ base = contents.get(ch.old_path or ch.path, "")
1560
+ post = _apply_patch_change_to_text(base, ch)
1561
+ if post is not None:
1562
+ command_overlay[ch.path] = post
1563
+ command_tmp = _materialize_packet_worktree(packet, command_overlay)
1564
+ try:
1565
+ command_root = Path(command_tmp.name)
1566
+ added_text = "\n".join("\n".join(ch.added_lines or []) for ch in changes)
1567
+ commands = set()
1568
+ if re.search(r"docker\s+compose\s+up", added_text, re.I):
1569
+ commands.add("docker compose up")
1570
+ commands.update(re.findall(r"npm\s+(?:run\s+)?[A-Za-z0-9:_-]+", added_text))
1571
+ commands.update(re.findall(r"make\s+[A-Za-z0-9_.:-]+", added_text))
1572
+ commands.update(re.findall(r"just\s+[A-Za-z0-9_.:-]+", added_text))
1573
+ commands.update(re.findall(r"task\s+[A-Za-z0-9_.:-]+", added_text))
1574
+ if re.search(r"\b(pytest|python\s+-m\s+pytest)\b", added_text, re.I):
1575
+ commands.add("pytest")
1576
+ report["unsupported_commands"] = []
1577
+ for command in sorted(commands):
1578
+ resolution = resolve_command(command_root, command)
1579
+ if resolution.reason_code == "unsupported_command":
1580
+ report["unsupported_commands"].append(command)
1581
+ elif resolution.reason_code in {"declared_command", "command_check_inconclusive", "command_manifest_missing", "command_manifest_uncertain"}:
1582
+ report.setdefault("uncertainties", []).append({"id": resolution.reason_code, "message": resolution.message, "evidence": command})
1583
+ finally:
1584
+ command_tmp.cleanup()
1585
+ declared = patch_declared["python"] | patch_declared["js"]
1586
+ existing_deps = existing_declared["python"] | existing_declared["js"]
1587
+ declared_only = {d for d in declared if d not in existing_deps}
1588
+ binary_paths = []
1589
+ binary_blockers = []
1590
+ for line in patch_text.splitlines():
1591
+ if line.startswith("Binary files "):
1592
+ m = re.search(r" b/(.+?) differ", line)
1593
+ rel = m.group(1) if m else "unknown"
1594
+ binary_paths.append(rel)
1595
+ if rel == "unknown" or _is_high_risk_binary_path(rel):
1596
+ binary_blockers.append(rel)
1597
+ if binary_paths:
1598
+ report["binary_diffs"] = sorted(set(binary_paths))
1599
+ if binary_blockers:
1600
+ report["binary_diff_blockers"] = sorted(set(binary_blockers))
1601
+ unsupported_ecosystems = _unsupported_ecosystem_uncertainties(files, changes)
1602
+ if unsupported_ecosystems:
1603
+ seen_uncertainties = set()
1604
+ merged_uncertainties = []
1605
+ for uncertainty in report.get("uncertainties", []) + unsupported_ecosystems:
1606
+ if isinstance(uncertainty, dict):
1607
+ key = (uncertainty.get("id"), uncertainty.get("message"), uncertainty.get("evidence"), uncertainty.get("path"))
1608
+ else:
1609
+ key = (str(uncertainty),)
1610
+ if key not in seen_uncertainties:
1611
+ seen_uncertainties.add(key)
1612
+ merged_uncertainties.append(uncertainty)
1613
+ report["uncertainties"] = merged_uncertainties
1614
+ report["unsupported_dependencies"] = sorted(unsupported)
1615
+ if declared_only:
1616
+ report.setdefault("warnings", []).append("Patch declares new dependencies that require review.")
1617
+ report["declared_dependencies"] = sorted(declared_only)
1618
+ fail_keys = ["missing_modified_files", "unsupported_dependencies", "unsupported_commands", "protected_artifact_modifications", "git_path_modifications", "binary_diff_blockers", "path_escape"]
1619
+ report["verdict"] = "FAIL" if any(report.get(k) for k in fail_keys) else "WARN" if (report.get("new_files") or report.get("deleted_files") or report.get("warnings") or declared_only or report.get("uncertainties") or report.get("binary_diffs")) else "PASS"
1620
+ return report
1621
+
1622
+
1623
+ def patch_report_to_traffic(report: dict, report_path: str = ".sourcepack/reports/latest.json") -> dict:
1624
+ findings=[]
1625
+ for p in report.get("missing_modified_files", []): findings.append(normalized_finding("missing_file", "error", "file", f"{p} not found in the trusted baseline.", p, suggestion="Restore the file, create it as a new file, or refresh the baseline only after accepting the current repo state."))
1626
+ for d in report.get("unsupported_dependencies", []): findings.append(normalized_finding("unsupported_dependency", "error", "dependency", f"{d} is imported but not declared in scanned dependency files.", evidence=d, suggestion=f"Either remove {d} usage or add it intentionally to the appropriate dependency manifest."))
1627
+ for c in report.get("unsupported_commands", []): findings.append(normalized_finding("unsupported_command", "error", "command", f"{c} is not supported by project evidence.", evidence=c, suggestion="Use a detected supported command or add the project file that defines this command."))
1628
+ if report.get("malformed_diff"):
1629
+ findings.append(normalized_finding("malformed_diff", "error", "diff", "SourcePack could not safely parse the diff artifact it was asked to judge."))
1630
+ if report.get("path_escape"):
1631
+ paths = report.get("path_escape_paths") or []
1632
+ if paths:
1633
+ for p in paths:
1634
+ findings.append(normalized_finding("path_escape", "error", "diff", "Diff path escapes the repository root or is absolute.", p, evidence=p))
1635
+ else:
1636
+ findings.append(normalized_finding("path_escape", "error", "diff", "Diff path escapes the repository root or is absolute."))
1637
+ for p in report.get("protected_artifact_modifications", []): findings.append(normalized_finding("protected_artifact", "error", "artifact", f"{p} is a protected SourcePack trust artifact.", p, evidence=p))
1638
+ for p in report.get("git_path_modifications", []): findings.append(normalized_finding("git_path_modification", "error", "artifact", f"{p} modifies Git internal state and is not safe to judge as a normal repository file.", p, evidence=p))
1639
+ for p in report.get("binary_diff_blockers", []): findings.append(normalized_finding("binary_diff", "error", "diff", f"Binary change at {p} crosses a SourcePack trust or high-risk control boundary.", p, evidence=p))
1640
+ for p in report.get("binary_diffs", []):
1641
+ if p not in set(report.get("binary_diff_blockers", [])):
1642
+ findings.append(normalized_finding("binary_diff", "warn", "uncertainty", f"Binary content was detected at {p} and was not semantically evaluated.", p, evidence=p))
1643
+ for p in report.get("new_files", []): findings.append(normalized_finding("new_file", "warn", "review", f"{p} was created by the patch.", p))
1644
+ for p in report.get("deleted_files", []): findings.append(normalized_finding("deleted_file", "warn", "review", f"{p} was deleted by the patch.", p))
1645
+ for d in report.get("declared_dependencies", []): findings.append(normalized_finding("declared_dependency", "warn", "review", f"{d} was added to dependency files.", evidence=d))
1646
+ for c in report.get("declared_commands", []): findings.append(normalized_finding("declared_command", "warn", "review", f"{c} was added in the same patch.", evidence=c))
1647
+ for w in report.get("uncertainties", []):
1648
+ if isinstance(w, dict):
1649
+ fid = str(w.get("id") or "uncertainty")
1650
+ message = str(w.get("message") or "SourcePack could not fully evaluate this change.")
1651
+ findings.append(normalized_finding(fid, "warn", "uncertainty", message, w.get("path"), w.get("evidence"), w.get("suggestion")))
1652
+ else:
1653
+ fid, _, detail = str(w).partition(":")
1654
+ fid = fid.strip() or "uncertainty"
1655
+ message = detail.strip() or str(w)
1656
+ findings.append(normalized_finding(fid, "warn", "uncertainty", message))
1657
+ return traffic_report(report.get("verdict", "PASS"), findings=findings, checked_categories=["file references", "Python imports", "JS/TS imports", "known project commands", "protected SourcePack artifacts"], report_path=report_path)
1658
+
1659
+
1660
+ def run_git(repo: Path, args: list[str]) -> subprocess.CompletedProcess:
1661
+ try:
1662
+ return subprocess.run(["git", *args], cwd=repo, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1663
+ except FileNotFoundError:
1664
+ return subprocess.CompletedProcess(["git", *args], 127, "", "git executable not found")
1665
+
1666
+
1667
+
1668
+ def git_worktree_dirty(repo: str | Path) -> tuple[bool, str | None]:
1669
+ repo = Path(repo)
1670
+ cp = run_git(repo, ["rev-parse", "--show-toplevel"])
1671
+ if cp.returncode != 0:
1672
+ return False, "git_unavailable" if cp.returncode == 127 else "not_git"
1673
+ root = Path(cp.stdout.strip())
1674
+ for args in (["diff", "--quiet"], ["diff", "--staged", "--quiet"]):
1675
+ diff_cp = run_git(root, list(args))
1676
+ if diff_cp.returncode == 1:
1677
+ return True, None
1678
+ if diff_cp.returncode == 127:
1679
+ return False, "git_unavailable"
1680
+ untracked = run_git(root, ["ls-files", "--others", "--exclude-standard"])
1681
+ if untracked.returncode == 0 and untracked.stdout.strip():
1682
+ return True, None
1683
+ if untracked.returncode == 127:
1684
+ return False, "git_unavailable"
1685
+ return False, None
1686
+
1687
+
1688
+
1689
+ def _only_sourcepack_gitignore_change(repo: Path) -> bool:
1690
+ status = run_git(repo, ["status", "--porcelain", "--", ".gitignore"])
1691
+ others = run_git(repo, ["status", "--porcelain"])
1692
+ if status.returncode != 0 or others.returncode != 0:
1693
+ return False
1694
+ lines = [line for line in others.stdout.splitlines() if line.strip()]
1695
+ if not lines or any(not line.endswith(".gitignore") for line in lines):
1696
+ return False
1697
+ try:
1698
+ text = (repo / ".gitignore").read_text(encoding="utf-8")
1699
+ except OSError:
1700
+ return False
1701
+ tracked = run_git(repo, ["show", "HEAD:.gitignore"])
1702
+ before = tracked.stdout if tracked.returncode == 0 else ""
1703
+ added = [line.strip() for line in text.splitlines() if line.strip() and line.strip() not in {l.strip() for l in before.splitlines()}]
1704
+ return bool(added) and set(added) <= {".sourcepack", ".sourcepack/"}
1705
+
1706
+
1707
+ def untracked_files_as_diff(repo: str | Path) -> str:
1708
+ repo = Path(repo)
1709
+ cp = run_git(repo, ["ls-files", "--others", "--exclude-standard"])
1710
+ if cp.returncode != 0:
1711
+ return ""
1712
+ chunks = []
1713
+ for rel in [line.strip() for line in cp.stdout.splitlines() if line.strip()]:
1714
+ path = repo / rel
1715
+ if rel == ".gitignore":
1716
+ try:
1717
+ ignore_lines = {line.strip() for line in path.read_text(encoding="utf-8").splitlines() if line.strip()}
1718
+ except OSError:
1719
+ ignore_lines = set()
1720
+ if ignore_lines <= {".sourcepack", ".sourcepack/"}:
1721
+ continue
1722
+ safe_rel = rel.replace("\\", "/")
1723
+ chunks.extend([f"diff --git a/{safe_rel} b/{safe_rel}", "new file mode 100644", "--- /dev/null", f"+++ b/{safe_rel}"])
1724
+ if is_probably_binary(path):
1725
+ chunks.append(f"Binary files /dev/null and b/{safe_rel} differ")
1726
+ continue
1727
+ try:
1728
+ text = path.read_text(encoding="utf-8")
1729
+ except UnicodeDecodeError:
1730
+ chunks.append(f"Binary files /dev/null and b/{safe_rel} differ")
1731
+ continue
1732
+ except OSError:
1733
+ continue
1734
+ lines = text.splitlines()
1735
+ chunks.append(f"@@ -0,0 +1,{len(lines)} @@")
1736
+ chunks.extend(f"+{line}" for line in lines)
1737
+ return "\n".join(chunks) + ("\n" if chunks else "")
1738
+
1739
+ def build_repo_change_report(repo_path: str | Path, *, staged: bool = False, patch_text: str | None = None, ci: bool = False) -> dict:
1740
+ repo_arg = Path(repo_path).resolve(); cp = run_git(repo_arg, ["rev-parse", "--show-toplevel"])
1741
+ if cp.returncode != 0:
1742
+ message = "Git executable not found." if cp.returncode == 127 else "No git repository found. Run sourcepack prompt or sourcepack baseline for non-git use."
1743
+ return traffic_report("FAIL", "stop before trusting this output.", [normalized_finding("git_unavailable" if cp.returncode == 127 else "no_git_repo", "error", "git", message)])
1744
+ git_root = Path(cp.stdout.strip()).resolve()
1745
+ repo = repo_arg if validate_baseline(repo_arg).get("state") in {"present", "stale", "corrupt"} else git_root
1746
+ paths = ensure_sourcepack_dirs(repo); added, err = ensure_gitignore_entry(repo)
1747
+ if added:
1748
+ paths.setdefault("gitignore_added", True)
1749
+ if err:
1750
+ return traffic_report("FAIL", "stop before trusting this output.", [normalized_finding("gitignore_unwritable", "error", "git", f"Cannot write .gitignore: {err}")])
1751
+ if patch_text is None:
1752
+ diff_args = ["diff", "--staged"] if staged else ["diff"]
1753
+ if repo != git_root:
1754
+ diff_args.append("--relative")
1755
+ cp = run_git(repo, diff_args); diff_text = cp.stdout
1756
+ if cp.returncode == 127:
1757
+ return traffic_report("FAIL", "stop before trusting this output.", [normalized_finding("git_unavailable", "error", "git", "Git executable not found.")])
1758
+ if not staged:
1759
+ extra = untracked_files_as_diff(repo)
1760
+ if extra and not (added and _only_sourcepack_gitignore_change(repo)):
1761
+ diff_text = (diff_text + "\n" + extra).strip() + "\n"
1762
+ else:
1763
+ diff_text = patch_text
1764
+ baseline_status = validate_baseline(repo)
1765
+ if baseline_status["state"] == "corrupt":
1766
+ rep = traffic_report("FAIL", "trusted baseline is corrupt.", [normalized_finding("baseline_corrupt", "error", "baseline", baseline_status["message"])], ["baseline", "diff"], "Recreate the baseline only after verifying the current repo state should be trusted.")
1767
+ rep.update(baseline_report_fields(baseline_status)); return rep
1768
+ if baseline_status["state"] == "missing":
1769
+ dirty_now, dirty_state_now = git_worktree_dirty(repo)
1770
+ if ci:
1771
+ rep = traffic_report("FAIL", "trusted baseline is missing in CI.", [normalized_finding("baseline_missing", "error", "baseline", "No trusted SourcePack baseline exists; CI must not establish trust.")], ["baseline", "diff"], "create the baseline locally only after deciding the current repo state should be trusted.")
1772
+ rep.update(baseline_report_fields(baseline_status)); return rep
1773
+ if diff_text.strip() or (dirty_now and not _only_sourcepack_gitignore_change(repo)):
1774
+ rep = traffic_report("FAIL", "baseline missing while changes are present.", [normalized_finding("baseline_missing", "error", "baseline", "No trusted SourcePack baseline exists while changes are present.")], ["baseline", "diff"], "run sourcepack baseline only after deciding the current repo state should be trusted.")
1775
+ rep.update(baseline_report_fields(baseline_status)); return rep
1776
+ try:
1777
+ build_current_baseline(repo, quiet=True); baseline_status = validate_baseline(repo)
1778
+ rep_note = "Created SourcePack baseline because none existed and no diff was present."
1779
+ except BaselineLockError as exc:
1780
+ return traffic_report("WARN", "baseline writer is locked.", [normalized_finding("baseline_locked", "warn", "tooling", str(exc))], ["baseline", "diff"], "try again after the other baseline operation finishes.", reason_type="tooling")
1781
+ except Exception as exc:
1782
+ return traffic_report("FAIL", "stop before trusting this output.", [normalized_finding("baseline_failed", "error", "baseline", f"Baseline verification failed: {exc}")])
1783
+ else:
1784
+ rep_note = None
1785
+ stale_findings = []
1786
+ if baseline_status["state"] == "stale":
1787
+ stale_findings.append(normalized_finding("baseline_stale", "warn", "uncertainty", "Trusted SourcePack baseline may not match current repo state."))
1788
+ if not diff_text.strip():
1789
+ verdict = "WARN" if stale_findings else "PASS"
1790
+ rep = traffic_report(verdict, "SourcePack could not fully evaluate this change." if stale_findings else "good to continue.", [normalized_finding("no_diff", "info", "diff", "No uncommitted changes detected."), *stale_findings], ["diff", "baseline freshness"])
1791
+ else:
1792
+ raw = judge_patch_text(repo / baseline_status["packet_path"], diff_text); rep = patch_report_to_traffic(raw); rep["raw_patch_judgment"] = raw
1793
+ rep = _integrate_execution_findings(repo, diff_text, rep)
1794
+ rep = _apply_local_policy(repo, rep)
1795
+ if stale_findings and rep["verdict"] != "FAIL":
1796
+ rep = traffic_report("WARN", "SourcePack could not fully evaluate this change.", rep.get("findings", []) + stale_findings, rep.get("checked_categories", []), rep.get("next_action"), reason_type="uncertainty"); rep["raw_patch_judgment"] = raw
1797
+ elif stale_findings:
1798
+ rep = traffic_report("FAIL", rep.get("headline"), rep.get("findings", []) + stale_findings, rep.get("checked_categories", []), rep.get("next_action")); rep["raw_patch_judgment"] = raw
1799
+ rep.update(baseline_report_fields(baseline_status))
1800
+ if baseline_status.get("metadata_path"):
1801
+ try:
1802
+ rep["baseline"] = json.loads((repo / baseline_status["metadata_path"]).read_text(encoding="utf-8"))
1803
+ except Exception:
1804
+ pass
1805
+ rep["current_git"] = git_metadata(repo)
1806
+ if rep_note:
1807
+ rep["note"] = rep_note
1808
+ rep["repo_path"] = str(repo)
1809
+ return rep
1810
+
1811
+
1812
+ def _rebuild_from_findings(rep: dict, findings: list[dict]) -> dict:
1813
+ verdict = "FAIL" if any(f.get("severity") == "error" for f in findings) else "WARN" if any(f.get("severity") == "warn" for f in findings) else "PASS"
1814
+ rebuilt = traffic_report(verdict, findings=findings, checked_categories=rep.get("checked_categories") or rep.get("checked") or [], report_path=rep.get("report_path", ".sourcepack/reports/latest.json"))
1815
+ for key in ("raw_patch_judgment", "policy_overrides"):
1816
+ if key in rep:
1817
+ rebuilt[key] = rep[key]
1818
+ return rebuilt
1819
+
1820
+
1821
+ def _integrate_execution_findings(repo: Path, checked_text: str, rep: dict) -> dict:
1822
+ execution = execution_findings(repo, checked_text)
1823
+ if not execution:
1824
+ return rep
1825
+ return _rebuild_from_findings(rep, list(rep.get("findings", [])) + execution)
1826
+
1827
+
1828
+ def _policy_entries_for_judgment(repo: Path) -> list[dict]:
1829
+ path = repo / ".sourcepack" / "policy" / "allow.jsonl"
1830
+ if not path.exists():
1831
+ return []
1832
+ entries = []
1833
+ now = utc_now()
1834
+ for line in path.read_text(encoding="utf-8").splitlines():
1835
+ try:
1836
+ entry = json.loads(line)
1837
+ except Exception:
1838
+ continue
1839
+ expires = entry.get("expires_at")
1840
+ if expires and str(expires) < now:
1841
+ continue
1842
+ entries.append(entry)
1843
+ return entries
1844
+
1845
+
1846
+ def _policy_matches(entry: dict, finding: dict) -> bool:
1847
+ scope = entry.get("scope")
1848
+ value = str(entry.get("value") or "")
1849
+ fid = finding.get("id")
1850
+ if fid == "git_path_modification" or str(finding.get("path") or "").startswith(".git/"):
1851
+ return False
1852
+ if scope == "dependency":
1853
+ return fid == "unsupported_dependency" and finding.get("evidence") == value
1854
+ if scope == "command":
1855
+ return fid == "unsupported_command" and finding.get("evidence") == value
1856
+ if scope == "path":
1857
+ if str(finding.get("path") or "") != value:
1858
+ return False
1859
+ if str(value).startswith(".sourcepack/baseline/") and not entry.get("high_risk"):
1860
+ return False
1861
+ return fid not in {"git_path_modification"}
1862
+ return False
1863
+
1864
+
1865
+ def _apply_local_policy(repo: Path, rep: dict) -> dict:
1866
+ entries = _policy_entries_for_judgment(repo)
1867
+ if not entries:
1868
+ return rep
1869
+ kept = []
1870
+ overrides = []
1871
+ for finding in rep.get("findings", []):
1872
+ match = next((entry for entry in entries if _policy_matches(entry, finding)), None)
1873
+ if match:
1874
+ overrides.append({"policy_id": match.get("id"), "scope": match.get("scope"), "value": match.get("value"), "reason": match.get("reason"), "suppressed_finding": finding.get("id"), "path": finding.get("path")})
1875
+ else:
1876
+ kept.append(finding)
1877
+ if not overrides:
1878
+ return rep
1879
+ rebuilt = _rebuild_from_findings(rep, kept)
1880
+ rebuilt["policy_overrides"] = overrides
1881
+ rebuilt.setdefault("findings", []).append(normalized_finding("policy_override", "info", "policy", "A local allow policy suppressed a matching finding.", evidence=", ".join(str(o.get("value")) for o in overrides)))
1882
+ return _rebuild_from_findings(rebuilt, rebuilt["findings"])
1883
+
1884
+
1885
+
1886
+
1887
+
1888
+
1889
+
1890
+
1891
+ def write_auto_report(repo: Path, report: dict, details: dict) -> None:
1892
+ payload = dict(report)
1893
+ payload.update(details)
1894
+ write_user_report(repo, payload, "auto")
1895
+
1896
+
1897
+
1898
+
1899
+
1900
+
1901
+ # CLI-independent public judgment API
1902
+ @dataclass(frozen=True)
1903
+ class Judgment:
1904
+ repo_path: str
1905
+ policy_mode: PolicyMode
1906
+ report: dict
1907
+
1908
+ @property
1909
+ def verdict(self) -> str:
1910
+ return str(self.report.get("verdict", "WARN"))
1911
+
1912
+ def exit_code(self) -> int:
1913
+ return policy_exit_code(self.verdict, self.policy_mode)
1914
+
1915
+
1916
+ def judge_repo_change(repo_path: str | Path, *, staged: bool = False, patch_text: str | None = None, policy_mode: PolicyMode | str = PolicyMode.LOCAL) -> Judgment:
1917
+ """Judge repository changes without CLI parsing, stdout rendering, or cli.py imports."""
1918
+ mode = normalize_policy_mode(policy_mode)
1919
+ report = build_repo_change_report(Path(repo_path).resolve(), staged=staged, patch_text=patch_text, ci=(mode is PolicyMode.CI))
1920
+ if mode is PolicyMode.CI:
1921
+ report["ci"] = True
1922
+ return Judgment(str(Path(repo_path).resolve()), mode, report)