sourcepack 1.10.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcepack/__init__.py +19 -0
- sourcepack/assets/__init__.py +1 -0
- sourcepack/assets/audit_template.md +3 -0
- sourcepack/assets/packet_instructions.md +3 -0
- sourcepack/baseline.py +285 -0
- sourcepack/cli.py +2991 -0
- sourcepack/commands.py +149 -0
- sourcepack/dependencies.py +98 -0
- sourcepack/diff_parser.py +122 -0
- sourcepack/ecosystems/__init__.py +3 -0
- sourcepack/ecosystems/generic.py +13 -0
- sourcepack/ecosystems/node.py +3 -0
- sourcepack/ecosystems/python.py +12 -0
- sourcepack/errors.py +19 -0
- sourcepack/evidence.py +109 -0
- sourcepack/execution_ledger.py +252 -0
- sourcepack/git.py +50 -0
- sourcepack/judgment.py +1922 -0
- sourcepack/packet.py +837 -0
- sourcepack/paths.py +68 -0
- sourcepack/policy.py +38 -0
- sourcepack/reason_codes.py +72 -0
- sourcepack/reports/__init__.py +5 -0
- sourcepack/reports/html.py +88 -0
- sourcepack/reports/json.py +123 -0
- sourcepack/reports/markdown.py +61 -0
- sourcepack/schemas.py +63 -0
- sourcepack-1.10.0a0.dist-info/METADATA +311 -0
- sourcepack-1.10.0a0.dist-info/RECORD +33 -0
- sourcepack-1.10.0a0.dist-info/WHEEL +5 -0
- sourcepack-1.10.0a0.dist-info/entry_points.txt +2 -0
- sourcepack-1.10.0a0.dist-info/licenses/LICENSE +21 -0
- sourcepack-1.10.0a0.dist-info/top_level.txt +1 -0
sourcepack/judgment.py
ADDED
|
@@ -0,0 +1,1922 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import fnmatch
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import tomllib
|
|
8
|
+
import re
|
|
9
|
+
import shutil
|
|
10
|
+
import subprocess
|
|
11
|
+
import sys
|
|
12
|
+
import tempfile
|
|
13
|
+
from dataclasses import dataclass, asdict
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from pathlib import Path, PurePosixPath
|
|
16
|
+
from typing import Iterable
|
|
17
|
+
from xml.sax.saxutils import escape as xml_escape
|
|
18
|
+
from .diff_parser import PatchFileChange, normalize_diff_path as _normalize_diff_path, parse_unified_diff
|
|
19
|
+
from .baseline import BaselineLockError, acquire_baseline_lock, baseline_corrupt_result, baseline_report_fields, build_current_baseline, protected_baseline_path, release_baseline_lock, resolve_active_baseline, validate_baseline
|
|
20
|
+
from .ecosystems.python import PY_IMPORT_ALIASES
|
|
21
|
+
from .paths import ensure_gitignore_entry, ensure_sourcepack_dirs, sourcepack_paths
|
|
22
|
+
from .reports.json import normalized_finding, traffic_report, write_user_report
|
|
23
|
+
from .policy import PolicyMode, normalize_policy_mode, exit_code as policy_exit_code
|
|
24
|
+
from .execution_ledger import execution_findings
|
|
25
|
+
from .commands import resolve_command
|
|
26
|
+
from .dependencies import resolve_js_import, resolve_python_import
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
from . import __version__
|
|
30
|
+
except Exception:
|
|
31
|
+
__version__ = "1.10.0-alpha"
|
|
32
|
+
|
|
33
|
+
DEFAULT_IGNORED_DIRS = {
|
|
34
|
+
".git", "node_modules", ".venv", "venv", "__pycache__", "dist", "build",
|
|
35
|
+
".next", ".cache", "target", "coverage", ".pytest_cache", ".sourcepack"
|
|
36
|
+
}
|
|
37
|
+
DEFAULT_IGNORED_PATTERNS = {
|
|
38
|
+
".env", ".env.*", "*.pem", "*.key", "*.sqlite", "*.db", "*.png", "*.jpg",
|
|
39
|
+
"*.jpeg", "*.gif", "*.webp", "*.pdf", "*.zip", "*.tar", "*.gz", "*.exe",
|
|
40
|
+
"*.dll", "*.so", "*.dylib", "*.bin", "*.pyc"
|
|
41
|
+
}
|
|
42
|
+
DEFAULT_TEXT_EXTENSIONS = {
|
|
43
|
+
".txt", ".md", ".py", ".js", ".ts", ".tsx", ".jsx", ".json", ".yaml", ".yml",
|
|
44
|
+
".html", ".css", ".csv", ".toml", ".ini", ".sql", ".sh", ".bat", ".ps1", ".rs",
|
|
45
|
+
".go", ".java", ".c", ".cpp", ".h", ".hpp", ".rb", ".php", ".xml"
|
|
46
|
+
}
|
|
47
|
+
SECRET_PATTERNS = [
|
|
48
|
+
("openai_key", re.compile(r"sk-proj-[A-Za-z0-9_\-]{12,}|sk-[A-Za-z0-9]{24,}")),
|
|
49
|
+
("aws_access_key", re.compile(r"AKIA[0-9A-Z]{16}")),
|
|
50
|
+
("private_key", re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----")),
|
|
51
|
+
("generic_api_key", re.compile(r"(?i)(api[_-]?key|secret|token)\s*[:=]\s*['\"]?[A-Za-z0-9_\-]{16,}")),
|
|
52
|
+
("github_token", re.compile(r"ghp_[A-Za-z0-9_]{20,}|github_pat_[A-Za-z0-9_]{20,}")),
|
|
53
|
+
("slack_token", re.compile(r"xox[baprs]-[A-Za-z0-9\-]{20,}")),
|
|
54
|
+
]
|
|
55
|
+
COMMON_DEPENDENCIES = ["fastapi", "flask", "django", "react", "vue", "svelte", "pytest", "typer", "click", "sqlalchemy", "prisma", "pydantic", "pyyaml", "pillow", "beautifulsoup4", "opencv-python", "scikit-learn", "python-dotenv", "pyjwt", "python-dateutil", "boto3", "requests"]
|
|
56
|
+
FEATURE_NAMES = ("pdf", "ocr", "web server", "react", "docker", "authentication", "database")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def utc_now() -> str:
|
|
60
|
+
return datetime.now(timezone.utc).isoformat()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def sha256_file(path: Path) -> str:
|
|
64
|
+
h = hashlib.sha256()
|
|
65
|
+
with path.open("rb") as f:
|
|
66
|
+
for block in iter(lambda: f.read(1024 * 1024), b""):
|
|
67
|
+
h.update(block)
|
|
68
|
+
return h.hexdigest()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def sha256_text(text: str) -> str:
|
|
72
|
+
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def estimate_tokens(text: str) -> int:
|
|
76
|
+
return (len(text) + 3) // 4
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def is_probably_binary(path: Path, sample_size: int = 4096) -> bool:
|
|
80
|
+
try:
|
|
81
|
+
data = path.read_bytes()[:sample_size]
|
|
82
|
+
except OSError:
|
|
83
|
+
return True
|
|
84
|
+
if b"\x00" in data:
|
|
85
|
+
return True
|
|
86
|
+
if not data:
|
|
87
|
+
return False
|
|
88
|
+
nonprintable = sum(1 for b in data if b < 9 or (13 < b < 32))
|
|
89
|
+
return (nonprintable / max(len(data), 1)) > 0.30
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def matches_any(name: str, patterns: Iterable[str]) -> bool:
|
|
93
|
+
return any(fnmatch.fnmatch(name, pattern) for pattern in patterns)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def redact_secrets(text: str):
|
|
97
|
+
redactions = []
|
|
98
|
+
redacted = text
|
|
99
|
+
for label, pattern in SECRET_PATTERNS:
|
|
100
|
+
def repl(match):
|
|
101
|
+
redactions.append({"pattern": label, "span_start": match.start(), "span_end": match.end()})
|
|
102
|
+
return f"[REDACTED:{label}]"
|
|
103
|
+
redacted = pattern.sub(repl, redacted)
|
|
104
|
+
return redacted, redactions
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@dataclass
|
|
108
|
+
class IncludedFile:
|
|
109
|
+
relative_path: str
|
|
110
|
+
absolute_path: str
|
|
111
|
+
size_bytes: int
|
|
112
|
+
sha256: str
|
|
113
|
+
source_sha256: str
|
|
114
|
+
packet_sha256: str
|
|
115
|
+
estimated_tokens: int
|
|
116
|
+
extension: str
|
|
117
|
+
content: str
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass
|
|
121
|
+
class IgnoredFile:
|
|
122
|
+
relative_path: str
|
|
123
|
+
reason: str
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class SourceScanner:
|
|
127
|
+
def __init__(self, input_path: str | Path, max_file_size: int = 1_000_000, include_hidden: bool = False, redact: bool = True):
|
|
128
|
+
self.input_path = Path(input_path).resolve()
|
|
129
|
+
self.max_file_size = max_file_size
|
|
130
|
+
self.include_hidden = include_hidden
|
|
131
|
+
self.redact = redact
|
|
132
|
+
self.included_files: list[IncludedFile] = []
|
|
133
|
+
self.ignored_files: list[IgnoredFile] = []
|
|
134
|
+
self.redactions: list[dict] = []
|
|
135
|
+
self.total_seen = 0
|
|
136
|
+
|
|
137
|
+
def ignore(self, path: Path, reason: str):
|
|
138
|
+
rel = str(path.relative_to(self.input_path)) if path.is_absolute() or self.input_path in path.parents else str(path)
|
|
139
|
+
self.ignored_files.append(IgnoredFile(rel, reason))
|
|
140
|
+
|
|
141
|
+
def scan(self):
|
|
142
|
+
if not self.input_path.exists():
|
|
143
|
+
raise FileNotFoundError(f"Input path does not exist: {self.input_path}")
|
|
144
|
+
if not self.input_path.is_dir():
|
|
145
|
+
raise NotADirectoryError(f"Input path is not a directory: {self.input_path}")
|
|
146
|
+
for root, dirs, files in os.walk(self.input_path, followlinks=False):
|
|
147
|
+
root_path = Path(root)
|
|
148
|
+
dirs[:] = sorted(dirs)
|
|
149
|
+
files = sorted(files)
|
|
150
|
+
kept_dirs = []
|
|
151
|
+
for d in dirs:
|
|
152
|
+
dpath = root_path / d
|
|
153
|
+
rel = dpath.relative_to(self.input_path)
|
|
154
|
+
if d in DEFAULT_IGNORED_DIRS:
|
|
155
|
+
self.ignored_files.append(IgnoredFile(str(rel) + "/", "ignored_directory"))
|
|
156
|
+
elif not self.include_hidden and d.startswith("."):
|
|
157
|
+
self.ignored_files.append(IgnoredFile(str(rel) + "/", "hidden_directory"))
|
|
158
|
+
elif dpath.is_symlink():
|
|
159
|
+
self.ignored_files.append(IgnoredFile(str(rel) + "/", "symlink_skipped"))
|
|
160
|
+
else:
|
|
161
|
+
kept_dirs.append(d)
|
|
162
|
+
dirs[:] = kept_dirs
|
|
163
|
+
for filename in files:
|
|
164
|
+
fp = root_path / filename
|
|
165
|
+
rel = fp.relative_to(self.input_path)
|
|
166
|
+
self.total_seen += 1
|
|
167
|
+
rel_str = str(rel)
|
|
168
|
+
if fp.is_symlink():
|
|
169
|
+
self.ignored_files.append(IgnoredFile(rel_str, "symlink_skipped")); continue
|
|
170
|
+
if not self.include_hidden and filename.startswith("."):
|
|
171
|
+
self.ignored_files.append(IgnoredFile(rel_str, "hidden_file")); continue
|
|
172
|
+
if matches_any(filename, DEFAULT_IGNORED_PATTERNS) or matches_any(rel_str, DEFAULT_IGNORED_PATTERNS):
|
|
173
|
+
self.ignored_files.append(IgnoredFile(rel_str, "ignored_pattern")); continue
|
|
174
|
+
try:
|
|
175
|
+
size = fp.stat().st_size
|
|
176
|
+
except OSError:
|
|
177
|
+
self.ignored_files.append(IgnoredFile(rel_str, "stat_error")); continue
|
|
178
|
+
if size > self.max_file_size:
|
|
179
|
+
self.ignored_files.append(IgnoredFile(rel_str, "max_file_size_exceeded")); continue
|
|
180
|
+
if fp.suffix and fp.suffix.lower() not in DEFAULT_TEXT_EXTENSIONS:
|
|
181
|
+
self.ignored_files.append(IgnoredFile(rel_str, "unsupported_extension")); continue
|
|
182
|
+
if is_probably_binary(fp):
|
|
183
|
+
self.ignored_files.append(IgnoredFile(rel_str, "binary_detected")); continue
|
|
184
|
+
try:
|
|
185
|
+
content = fp.read_text(encoding="utf-8")
|
|
186
|
+
except UnicodeDecodeError:
|
|
187
|
+
self.ignored_files.append(IgnoredFile(rel_str, "decode_error")); continue
|
|
188
|
+
except OSError:
|
|
189
|
+
self.ignored_files.append(IgnoredFile(rel_str, "read_error")); continue
|
|
190
|
+
source_sha256 = sha256_text(content)
|
|
191
|
+
if self.redact:
|
|
192
|
+
redacted, reds = redact_secrets(content)
|
|
193
|
+
for r in reds:
|
|
194
|
+
r["file"] = rel_str
|
|
195
|
+
self.redactions.extend(reds)
|
|
196
|
+
content = redacted
|
|
197
|
+
packet_sha256 = sha256_text(content)
|
|
198
|
+
self.included_files.append(IncludedFile(
|
|
199
|
+
relative_path=rel_str,
|
|
200
|
+
absolute_path=str(fp.resolve()),
|
|
201
|
+
size_bytes=size,
|
|
202
|
+
sha256=packet_sha256,
|
|
203
|
+
source_sha256=source_sha256,
|
|
204
|
+
packet_sha256=packet_sha256,
|
|
205
|
+
estimated_tokens=estimate_tokens(content),
|
|
206
|
+
extension=fp.suffix.lower(),
|
|
207
|
+
content=content,
|
|
208
|
+
))
|
|
209
|
+
self.included_files.sort(key=lambda x: x.relative_path)
|
|
210
|
+
self.ignored_files.sort(key=lambda x: x.relative_path)
|
|
211
|
+
return self
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _tracked_file_inventory(root: Path, included_records: list[dict]) -> dict:
|
|
215
|
+
included = {str(rec.get("relative_path", "")).replace("\\", "/") for rec in included_records}
|
|
216
|
+
files: list[dict] = []
|
|
217
|
+
source = "scanner_included_files"
|
|
218
|
+
try:
|
|
219
|
+
cp = subprocess.run(["git", "ls-files", "-z"], cwd=root, text=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
220
|
+
except (OSError, ValueError):
|
|
221
|
+
cp = None
|
|
222
|
+
if cp is not None and cp.returncode == 0:
|
|
223
|
+
raw_paths = [p.decode("utf-8", "surrogateescape") for p in cp.stdout.split(b"\0") if p]
|
|
224
|
+
source = "git_ls_files" if raw_paths else "scanner_included_files"
|
|
225
|
+
if not raw_paths:
|
|
226
|
+
raw_paths = sorted(included)
|
|
227
|
+
else:
|
|
228
|
+
raw_paths = sorted(included)
|
|
229
|
+
for raw in raw_paths:
|
|
230
|
+
rel = raw.replace("\\", "/")
|
|
231
|
+
path = root / rel
|
|
232
|
+
rec = {"relative_path": rel, "included_in_prompt_context": rel in included, "source": source}
|
|
233
|
+
try:
|
|
234
|
+
if path.exists() and path.is_file():
|
|
235
|
+
rec["sha256"] = sha256_file(path)
|
|
236
|
+
rec["file_type"] = "binary" if is_probably_binary(path) else "text"
|
|
237
|
+
else:
|
|
238
|
+
rec["file_type"] = "missing"
|
|
239
|
+
except OSError:
|
|
240
|
+
rec["file_type"] = "unreadable"
|
|
241
|
+
files.append(rec)
|
|
242
|
+
return {"schema_version": "sourcepack.file_inventory.v1", "generated_at": utc_now(), "source": source, "files": files}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class PacketWriter:
|
|
246
|
+
OUTPUT_FILES = ["manifest.json", "context.md", "context.xml", "file_tree.txt", "ignored_files.txt", "token_report.json", "redactions.json", "reality_map.json", "ai_instructions.md", "file_inventory.json"]
|
|
247
|
+
|
|
248
|
+
def __init__(self, out: str | Path, scanner: SourceScanner, force: bool = False):
|
|
249
|
+
self.out = Path(out)
|
|
250
|
+
self.scanner = scanner
|
|
251
|
+
self.force = force
|
|
252
|
+
|
|
253
|
+
def prepare_out(self):
|
|
254
|
+
if self.out.exists() and any(self.out.iterdir()):
|
|
255
|
+
if not self.force:
|
|
256
|
+
raise FileExistsError(f"Output directory is non-empty: {self.out}")
|
|
257
|
+
for child in self.out.iterdir():
|
|
258
|
+
if child.is_dir():
|
|
259
|
+
shutil.rmtree(child)
|
|
260
|
+
else:
|
|
261
|
+
child.unlink()
|
|
262
|
+
self.out.mkdir(parents=True, exist_ok=True)
|
|
263
|
+
|
|
264
|
+
def write_all(self):
|
|
265
|
+
self.prepare_out()
|
|
266
|
+
included_records = []
|
|
267
|
+
for f in self.scanner.included_files:
|
|
268
|
+
rec = asdict(f)
|
|
269
|
+
rec.pop("content")
|
|
270
|
+
included_records.append(rec)
|
|
271
|
+
ignored_records = [asdict(f) for f in self.scanner.ignored_files]
|
|
272
|
+
total_tokens = sum(f.estimated_tokens for f in self.scanner.included_files)
|
|
273
|
+
total_bytes = sum(f.size_bytes for f in self.scanner.included_files)
|
|
274
|
+
manifest = {
|
|
275
|
+
"input_path": str(self.scanner.input_path),
|
|
276
|
+
"generated_at": utc_now(),
|
|
277
|
+
"tool_version": __version__,
|
|
278
|
+
"total_files_seen": self.scanner.total_seen,
|
|
279
|
+
"total_files_included": len(included_records),
|
|
280
|
+
"total_files_ignored": len(ignored_records),
|
|
281
|
+
"total_bytes_included": total_bytes,
|
|
282
|
+
"total_estimated_tokens": total_tokens,
|
|
283
|
+
"included_files": included_records,
|
|
284
|
+
"ignored_files": ignored_records,
|
|
285
|
+
}
|
|
286
|
+
(self.out / "manifest.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8")
|
|
287
|
+
(self.out / "file_inventory.json").write_text(json.dumps(_tracked_file_inventory(self.scanner.input_path, included_records), indent=2), encoding="utf-8")
|
|
288
|
+
md_parts = ["# SourcePack Context Packet", "", "## Source Manifest Summary", "", f"Input path: {manifest['input_path']}", f"Generated at: {manifest['generated_at']}", f"Files included: {len(included_records)}", f"Estimated tokens: {total_tokens}", ""]
|
|
289
|
+
for f in self.scanner.included_files:
|
|
290
|
+
md_parts.extend([
|
|
291
|
+
f"## File: {f.relative_path}", "", "Metadata:", f"- sha256: {f.sha256}", f"- bytes: {f.size_bytes}", f"- estimated_tokens: {f.estimated_tokens}", "", "Content:", "", f.content, "", "---", ""
|
|
292
|
+
])
|
|
293
|
+
(self.out / "context.md").write_text("\n".join(md_parts), encoding="utf-8")
|
|
294
|
+
xml_parts = ["<sourcepack>", " <files>"]
|
|
295
|
+
for f in self.scanner.included_files:
|
|
296
|
+
xml_parts.append(f' <file path="{xml_escape(f.relative_path)}" sha256="{f.sha256}" bytes="{f.size_bytes}" estimated_tokens="{f.estimated_tokens}">')
|
|
297
|
+
xml_parts.append(" <content>")
|
|
298
|
+
xml_parts.append(xml_escape(f.content))
|
|
299
|
+
xml_parts.append(" </content>")
|
|
300
|
+
xml_parts.append(" </file>")
|
|
301
|
+
xml_parts.extend([" </files>", "</sourcepack>"])
|
|
302
|
+
(self.out / "context.xml").write_text("\n".join(xml_parts), encoding="utf-8")
|
|
303
|
+
tree_lines = []
|
|
304
|
+
for f in self.scanner.included_files:
|
|
305
|
+
tree_lines.append(f"[INC] {f.relative_path}")
|
|
306
|
+
for f in self.scanner.ignored_files:
|
|
307
|
+
tree_lines.append(f"[IGN] {f.relative_path} - {f.reason}")
|
|
308
|
+
(self.out / "file_tree.txt").write_text("\n".join(sorted(tree_lines)) + "\n", encoding="utf-8")
|
|
309
|
+
(self.out / "ignored_files.txt").write_text("\n".join(f"{f.relative_path}\t{f.reason}" for f in self.scanner.ignored_files) + "\n", encoding="utf-8")
|
|
310
|
+
token_report = {
|
|
311
|
+
"total_estimated_tokens": total_tokens,
|
|
312
|
+
"warnings": [limit for limit in [32_000, 128_000, 200_000, 1_000_000] if total_tokens > limit],
|
|
313
|
+
"per_file": [{"relative_path": f.relative_path, "estimated_tokens": f.estimated_tokens} for f in self.scanner.included_files],
|
|
314
|
+
}
|
|
315
|
+
(self.out / "token_report.json").write_text(json.dumps(token_report, indent=2), encoding="utf-8")
|
|
316
|
+
(self.out / "redactions.json").write_text(json.dumps({"redactions": self.scanner.redactions}, indent=2), encoding="utf-8")
|
|
317
|
+
reality_map = generate_reality_map(manifest, self.out)
|
|
318
|
+
(self.out / "reality_map.json").write_text(json.dumps(reality_map, indent=2), encoding="utf-8")
|
|
319
|
+
(self.out / "ai_instructions.md").write_text(render_ai_instructions(reality_map), encoding="utf-8")
|
|
320
|
+
hashes = {name: sha256_file(self.out / name) for name in self.OUTPUT_FILES if (self.out / name).exists()}
|
|
321
|
+
receipt = {"generated_at": utc_now(), "tool_version": __version__, "hashes": hashes}
|
|
322
|
+
(self.out / "receipt.json").write_text(json.dumps(receipt, indent=2), encoding="utf-8")
|
|
323
|
+
return self.out
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _included_paths(manifest: dict) -> set[str]:
|
|
328
|
+
return {rec.get("relative_path", "").replace("\\", "/") for rec in manifest.get("included_files", [])}
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _package_json_scripts(packet: Path) -> dict[str, str]:
|
|
332
|
+
contents = _packet_file_contents(packet)
|
|
333
|
+
for rel, content in contents.items():
|
|
334
|
+
if Path(rel).name.lower() == "package.json":
|
|
335
|
+
try:
|
|
336
|
+
package = json.loads(content)
|
|
337
|
+
except json.JSONDecodeError:
|
|
338
|
+
return {}
|
|
339
|
+
scripts = package.get("scripts")
|
|
340
|
+
return scripts if isinstance(scripts, dict) else {}
|
|
341
|
+
return {}
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _is_poetry_project(packet: Path) -> bool:
|
|
345
|
+
for rel, content in _packet_file_contents(packet).items():
|
|
346
|
+
if Path(rel).name.lower() == "pyproject.toml" and re.search(r"(?m)^\s*\[tool\.poetry\]\s*$", content):
|
|
347
|
+
return True
|
|
348
|
+
return False
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def _uses_unittest(packet: Path) -> bool:
|
|
352
|
+
for rel, content in _packet_file_contents(packet).items():
|
|
353
|
+
if Path(rel).suffix.lower() == ".py" and re.search(r"(?m)^\s*(import\s+unittest|from\s+unittest\s+import\s+)", content):
|
|
354
|
+
return True
|
|
355
|
+
return False
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def generate_reality_map(manifest: dict, packet: Path) -> dict:
|
|
359
|
+
files = _included_paths(manifest)
|
|
360
|
+
lower_files = {f.lower() for f in files}
|
|
361
|
+
deps = dependency_inventory(manifest, packet)
|
|
362
|
+
features = feature_inventory(manifest, packet, deps)
|
|
363
|
+
scripts = _package_json_scripts(packet)
|
|
364
|
+
project_types = []
|
|
365
|
+
package_managers = []
|
|
366
|
+
frameworks = []
|
|
367
|
+
supported_commands = []
|
|
368
|
+
test_commands = []
|
|
369
|
+
build_commands = []
|
|
370
|
+
run_commands = []
|
|
371
|
+
if "pyproject.toml" in lower_files:
|
|
372
|
+
project_types.append("python")
|
|
373
|
+
if any(Path(f).name.lower().startswith("requirements") and f.endswith(".txt") for f in lower_files):
|
|
374
|
+
project_types.append("python")
|
|
375
|
+
package_managers.append("pip")
|
|
376
|
+
if _is_poetry_project(packet):
|
|
377
|
+
package_managers.append("poetry")
|
|
378
|
+
if "package.json" in lower_files:
|
|
379
|
+
project_types.append("node")
|
|
380
|
+
package_managers.append("npm")
|
|
381
|
+
for name in sorted(scripts):
|
|
382
|
+
cmd = "npm test" if name == "test" else f"npm run {name}"
|
|
383
|
+
supported_commands.append(cmd)
|
|
384
|
+
if name == "test": test_commands.append(cmd)
|
|
385
|
+
elif name in {"build", "compile"}: build_commands.append(cmd)
|
|
386
|
+
elif name in {"start", "dev", "serve"}: run_commands.append(cmd)
|
|
387
|
+
if any(Path(f).name.lower() == "dockerfile" for f in files):
|
|
388
|
+
supported_commands.append("docker build")
|
|
389
|
+
build_commands.append("docker build")
|
|
390
|
+
if any(Path(f).name.lower() in {"docker-compose.yml", "compose.yaml", "compose.yml"} for f in files):
|
|
391
|
+
supported_commands.append("docker compose up")
|
|
392
|
+
run_commands.append("docker compose up")
|
|
393
|
+
if "pytest" in deps or any(f == "tests" or f.startswith("tests/") for f in lower_files):
|
|
394
|
+
supported_commands.append("pytest")
|
|
395
|
+
test_commands.append("pytest")
|
|
396
|
+
if _uses_unittest(packet):
|
|
397
|
+
supported_commands.append("python -m unittest")
|
|
398
|
+
test_commands.append("python -m unittest")
|
|
399
|
+
framework_map = {"fastapi": "FastAPI", "flask": "Flask", "django": "Django", "react": "React"}
|
|
400
|
+
for dep, label in framework_map.items():
|
|
401
|
+
if dep in deps or (dep == "react" and "react" in features):
|
|
402
|
+
frameworks.append(label)
|
|
403
|
+
ignored = manifest.get("ignored_files", [])
|
|
404
|
+
ignored_reasons = {}
|
|
405
|
+
for rec in ignored:
|
|
406
|
+
reason = rec.get("reason", "unknown")
|
|
407
|
+
ignored_reasons[reason] = ignored_reasons.get(reason, 0) + 1
|
|
408
|
+
included_count = len(manifest.get("included_files", []))
|
|
409
|
+
safe_claims = [
|
|
410
|
+
f"This packet includes {included_count} source files.",
|
|
411
|
+
f"SourcePack scanned input path: {manifest.get('input_path', '')}.",
|
|
412
|
+
]
|
|
413
|
+
for name in ["pyproject.toml", "package.json", "Dockerfile"]:
|
|
414
|
+
present = name.lower() in {Path(f).name.lower() for f in files}
|
|
415
|
+
safe_claims.append(f"The project {'contains' if present else 'does not include'} {name}.")
|
|
416
|
+
if "react" not in deps and "react" not in features:
|
|
417
|
+
safe_claims.append("No React dependency was detected.")
|
|
418
|
+
if "pdf" not in features:
|
|
419
|
+
safe_claims.append("No PDF parsing capability was detected.")
|
|
420
|
+
if ignored:
|
|
421
|
+
safe_claims.append("The packet includes ignored file records for safety or relevance reasons.")
|
|
422
|
+
claim_boundaries = [
|
|
423
|
+
"SourcePack did not execute the application.",
|
|
424
|
+
"SourcePack did not prove semantic correctness.",
|
|
425
|
+
"SourcePack did not verify external services.",
|
|
426
|
+
"SourcePack did not prove security.",
|
|
427
|
+
"SourcePack did not prove production readiness.",
|
|
428
|
+
"Absence of evidence means unknown, not impossible.",
|
|
429
|
+
"Unsupported claims should be treated as ungrounded.",
|
|
430
|
+
]
|
|
431
|
+
return {
|
|
432
|
+
"reality_map_schema_version": "1.0",
|
|
433
|
+
"tool_version": __version__,
|
|
434
|
+
"generated_at": utc_now(),
|
|
435
|
+
"input_path": manifest.get("input_path", ""),
|
|
436
|
+
"project_types": sorted(set(project_types)),
|
|
437
|
+
"package_managers": sorted(set(package_managers)),
|
|
438
|
+
"frameworks": sorted(set(frameworks)),
|
|
439
|
+
"entry_points": sorted(f for f in files if Path(f).name in {"main.py", "app.py", "server.py", "cli.py"}),
|
|
440
|
+
"test_commands": sorted(set(test_commands)),
|
|
441
|
+
"build_commands": sorted(set(build_commands)),
|
|
442
|
+
"run_commands": sorted(set(run_commands)),
|
|
443
|
+
"supported_commands": sorted(set(supported_commands)),
|
|
444
|
+
"detected_dependencies": sorted(deps),
|
|
445
|
+
"supported_capabilities": sorted(features),
|
|
446
|
+
"excluded_files_summary": {"total": len(ignored), "reasons": ignored_reasons, "records": ignored[:25]},
|
|
447
|
+
"included_file_count": included_count,
|
|
448
|
+
"confirmed_files": sorted(files),
|
|
449
|
+
"ignored_file_count": len(ignored),
|
|
450
|
+
"safe_claims": safe_claims,
|
|
451
|
+
"unknowns": [
|
|
452
|
+
"Runtime behavior was not executed.",
|
|
453
|
+
"Semantic correctness was not proven.",
|
|
454
|
+
"External services were not verified.",
|
|
455
|
+
"Capabilities not present in structural evidence must be treated as unknown.",
|
|
456
|
+
"Missing files must not be invented.",
|
|
457
|
+
],
|
|
458
|
+
"claim_boundaries": claim_boundaries,
|
|
459
|
+
"ai_constraints": [
|
|
460
|
+
"Use only the packet and reality map as project evidence.",
|
|
461
|
+
"Do not invent files, commands, dependencies, frameworks, services, or capabilities.",
|
|
462
|
+
"If a required file is missing, say it is missing.",
|
|
463
|
+
"If a command is unsupported by detected evidence, say it is unsupported.",
|
|
464
|
+
"If a capability is not in supported_capabilities, treat it as unknown or unsupported.",
|
|
465
|
+
"Cite file paths when making project-specific claims.",
|
|
466
|
+
"Do not claim SourcePack proves semantic truth.",
|
|
467
|
+
"Ask for missing files rather than hallucinating them.",
|
|
468
|
+
],
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def render_ai_instructions(reality_map: dict) -> str:
|
|
473
|
+
lines = [
|
|
474
|
+
"# AI Instructions for This SourcePack Packet", "",
|
|
475
|
+
"Use only the packet and `reality_map.json` as project evidence.",
|
|
476
|
+
"Do not invent files, commands, dependencies, frameworks, services, or capabilities.",
|
|
477
|
+
"If a required file is missing, say it is missing and ask for it rather than hallucinating it.",
|
|
478
|
+
"If a command is unsupported by detected evidence, say it is unsupported.",
|
|
479
|
+
"If a capability is not listed in `supported_capabilities`, treat it as unknown or unsupported.",
|
|
480
|
+
"If you introduce a new external dependency, modify the appropriate dependency manifest in the same patch and list it under Dependency Changes.",
|
|
481
|
+
"Only recommend commands listed under Supported Commands unless your patch also adds the project file that defines the new command.",
|
|
482
|
+
"Before referencing a file as existing, it must appear in Confirmed Files; label intentional creations as NEW FILE.",
|
|
483
|
+
"If required evidence is missing, say UNKNOWN and ask for the missing file/output instead of guessing.",
|
|
484
|
+
"Cite file paths when making project-specific claims.",
|
|
485
|
+
"Do not claim SourcePack proves semantic truth, security, production readiness, or external service behavior.", "",
|
|
486
|
+
"## Supported Commands", "",
|
|
487
|
+
]
|
|
488
|
+
cmds = reality_map.get("supported_commands", [])
|
|
489
|
+
lines.extend([f"- `{cmd}`" for cmd in cmds] or ["- None detected"])
|
|
490
|
+
lines.extend(["", "## Supported Capabilities", ""])
|
|
491
|
+
caps = reality_map.get("supported_capabilities", [])
|
|
492
|
+
lines.extend([f"- {cap}" for cap in caps] or ["- None detected"])
|
|
493
|
+
lines.extend(["", "## Confirmed Files", ""])
|
|
494
|
+
lines.extend(f"- `{path}`" for path in reality_map.get("confirmed_files", [])[:200])
|
|
495
|
+
lines.extend(["", "## Required Answer Contract", "", "- Files to modify", "- New files", "- Dependency changes", "- Commands to run", "- Assumptions/unknowns", "- Patch or code", "", "## Claim Boundaries", ""])
|
|
496
|
+
lines.extend(f"- {boundary}" for boundary in reality_map.get("claim_boundaries", []))
|
|
497
|
+
return "\n".join(lines) + "\n"
|
|
498
|
+
|
|
499
|
+
def load_manifest(packet: Path) -> dict:
|
|
500
|
+
return json.loads((packet / "manifest.json").read_text(encoding="utf-8"))
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
PATHLIKE_EXTENSIONS = {".py", ".js", ".jsx", ".ts", ".tsx", ".json", ".toml", ".yaml", ".yml", ".md", ".txt", ".cfg", ".ini", ".css", ".html", ".rs", ".go", ".java", ".rb", ".php", ".sh"}
|
|
506
|
+
PROJECT_PATH_PREFIXES = {"src", "sourcepack", "tests", "test", "frontend", "backend", "docs", "app", "lib", "packages", "public", "config", "scripts"}
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def _normalize_ai_ref(ref: str) -> str | None:
|
|
510
|
+
ref = ref.strip().strip("`'\".,;)")
|
|
511
|
+
ref = ref.replace("\\", "/")
|
|
512
|
+
if ref.endswith(":"):
|
|
513
|
+
ref = ref[:-1]
|
|
514
|
+
while ref.startswith("./"):
|
|
515
|
+
ref = ref[2:]
|
|
516
|
+
if not ref or ref.startswith("/") or re.match(r"^[A-Za-z]:/", ref):
|
|
517
|
+
return None
|
|
518
|
+
normalized, unsafe = _normalize_diff_path(ref)
|
|
519
|
+
if unsafe or not normalized:
|
|
520
|
+
return None
|
|
521
|
+
return normalized
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _looks_like_ai_file_ref(ref: str) -> bool:
|
|
525
|
+
normalized = ref.replace("\\", "/")
|
|
526
|
+
name = PurePosixPath(normalized).name
|
|
527
|
+
if name in {"Dockerfile", "docker-compose.yml", "compose.yaml", "compose.yml", "pyproject.toml", "package.json", "requirements.txt"}:
|
|
528
|
+
return True
|
|
529
|
+
suffix = PurePosixPath(normalized).suffix.lower()
|
|
530
|
+
if suffix not in PATHLIKE_EXTENSIONS:
|
|
531
|
+
return False
|
|
532
|
+
parts = [p for p in PurePosixPath(normalized).parts if p not in {"."}]
|
|
533
|
+
return "/" in normalized or (parts and parts[0] in PROJECT_PATH_PREFIXES)
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def extract_refs(text: str) -> set[str]:
|
|
537
|
+
refs: set[str] = set()
|
|
538
|
+
token = r"(?:\./)?[A-Za-z0-9_.-]+(?:[\\/][A-Za-z0-9_.-]+)*\.[A-Za-z0-9_.-]+:?|Dockerfile"
|
|
539
|
+
patterns = [rf"[`'\"]({token})[`'\"]", rf"(?m)^\s*[-*]\s+({token})\b", rf"\b(?:edit|open|update|modify|change|in|file)\s+({token})\b", rf"\b((?:\./)?(?:src|sourcepack|tests|test|frontend|backend|docs|app|lib|packages|public|config|scripts)[\\/][A-Za-z0-9_./\\-]+\.[A-Za-z0-9_.-]+:?)\b"]
|
|
540
|
+
for pattern in patterns:
|
|
541
|
+
for candidate in re.findall(pattern, text, re.I):
|
|
542
|
+
normalized = _normalize_ai_ref(candidate)
|
|
543
|
+
if normalized and _looks_like_ai_file_ref(normalized):
|
|
544
|
+
refs.add(normalized)
|
|
545
|
+
return refs
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def _packet_file_contents(packet: Path) -> dict[str, str]:
|
|
549
|
+
context_path = packet / "context.md"
|
|
550
|
+
if not context_path.exists():
|
|
551
|
+
return {}
|
|
552
|
+
text = context_path.read_text(encoding="utf-8", errors="ignore")
|
|
553
|
+
contents: dict[str, str] = {}
|
|
554
|
+
current: str | None = None
|
|
555
|
+
body: list[str] = []
|
|
556
|
+
in_content = False
|
|
557
|
+
for line in text.splitlines():
|
|
558
|
+
if line.startswith("## File: "):
|
|
559
|
+
if current is not None:
|
|
560
|
+
contents[current] = "\n".join(body).rstrip("\n")
|
|
561
|
+
current = line.removeprefix("## File: ").strip()
|
|
562
|
+
body = []
|
|
563
|
+
in_content = False
|
|
564
|
+
elif current is not None and line == "Content:":
|
|
565
|
+
in_content = True
|
|
566
|
+
body = []
|
|
567
|
+
elif current is not None and in_content and line == "---":
|
|
568
|
+
contents[current] = "\n".join(body).rstrip("\n")
|
|
569
|
+
current = None
|
|
570
|
+
body = []
|
|
571
|
+
in_content = False
|
|
572
|
+
elif current is not None and in_content:
|
|
573
|
+
body.append(line)
|
|
574
|
+
if current is not None:
|
|
575
|
+
contents[current] = "\n".join(body).rstrip("\n")
|
|
576
|
+
return contents
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def _normalize_dependency_name(name: str) -> str:
|
|
580
|
+
return name.strip().lower().replace("_", "-")
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def _dependency_name_for_import(name: str) -> str:
|
|
584
|
+
normalized = _normalize_dependency_name(name)
|
|
585
|
+
return PY_IMPORT_ALIASES.get(normalized, normalized)
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def _js_package_root(imported: str) -> str:
|
|
589
|
+
imported = imported.strip().lower()
|
|
590
|
+
parts = imported.split("/")
|
|
591
|
+
if imported.startswith("@") and len(parts) >= 2 and parts[0] != "@":
|
|
592
|
+
return "/".join(parts[:2])
|
|
593
|
+
if imported.startswith("@/"):
|
|
594
|
+
return imported
|
|
595
|
+
return parts[0]
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def _python_dependency_names_from_requirement_lines(text: str) -> set[str]:
|
|
599
|
+
deps: set[str] = set()
|
|
600
|
+
for line in text.splitlines():
|
|
601
|
+
cleaned = line.split("#", 1)[0].strip()
|
|
602
|
+
if cleaned and not cleaned.startswith(("-", "--")):
|
|
603
|
+
deps.add(_normalize_dependency_name(re.split(r"[<>=!~;\[]", cleaned, maxsplit=1)[0]))
|
|
604
|
+
return deps
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def _python_dependency_names_from_pyproject(content: str) -> set[str]:
|
|
608
|
+
try:
|
|
609
|
+
data = tomllib.loads(content)
|
|
610
|
+
except tomllib.TOMLDecodeError:
|
|
611
|
+
return set()
|
|
612
|
+
deps: set[str] = set()
|
|
613
|
+
|
|
614
|
+
def add_requirement(req: object) -> None:
|
|
615
|
+
if isinstance(req, str):
|
|
616
|
+
name = re.split(r"[<>=!~;\[]", req.strip(), maxsplit=1)[0]
|
|
617
|
+
if name:
|
|
618
|
+
deps.add(_normalize_dependency_name(name))
|
|
619
|
+
|
|
620
|
+
project = data.get("project", {})
|
|
621
|
+
if isinstance(project, dict):
|
|
622
|
+
for req in project.get("dependencies", []) if isinstance(project.get("dependencies"), list) else []:
|
|
623
|
+
add_requirement(req)
|
|
624
|
+
optional = project.get("optional-dependencies", {})
|
|
625
|
+
if isinstance(optional, dict):
|
|
626
|
+
for group in optional.values():
|
|
627
|
+
if isinstance(group, list):
|
|
628
|
+
for req in group:
|
|
629
|
+
add_requirement(req)
|
|
630
|
+
|
|
631
|
+
tool = data.get("tool", {})
|
|
632
|
+
if isinstance(tool, dict):
|
|
633
|
+
poetry = tool.get("poetry", {})
|
|
634
|
+
if isinstance(poetry, dict):
|
|
635
|
+
for section_name in ("dependencies", "dev-dependencies"):
|
|
636
|
+
section = poetry.get(section_name, {})
|
|
637
|
+
if isinstance(section, dict):
|
|
638
|
+
for dep in section:
|
|
639
|
+
if dep.lower() != "python":
|
|
640
|
+
deps.add(_normalize_dependency_name(dep))
|
|
641
|
+
group = poetry.get("group", {})
|
|
642
|
+
if isinstance(group, dict):
|
|
643
|
+
for group_data in group.values():
|
|
644
|
+
if isinstance(group_data, dict):
|
|
645
|
+
section = group_data.get("dependencies", {})
|
|
646
|
+
if isinstance(section, dict):
|
|
647
|
+
deps.update(_normalize_dependency_name(dep) for dep in section)
|
|
648
|
+
for tool_name in ("pdm", "uv"):
|
|
649
|
+
tool_data = tool.get(tool_name, {})
|
|
650
|
+
if isinstance(tool_data, dict):
|
|
651
|
+
for key in ("dev-dependencies", "dependency-groups"):
|
|
652
|
+
groups = tool_data.get(key, {})
|
|
653
|
+
if isinstance(groups, dict):
|
|
654
|
+
for group in groups.values():
|
|
655
|
+
if isinstance(group, list):
|
|
656
|
+
for req in group:
|
|
657
|
+
add_requirement(req)
|
|
658
|
+
dependency_groups = data.get("dependency-groups", {})
|
|
659
|
+
if isinstance(dependency_groups, dict):
|
|
660
|
+
for group in dependency_groups.values():
|
|
661
|
+
if isinstance(group, list):
|
|
662
|
+
for req in group:
|
|
663
|
+
add_requirement(req)
|
|
664
|
+
return deps
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def _add_common_dependency(deps: set[str], name: str):
|
|
668
|
+
normalized = _normalize_dependency_name(name)
|
|
669
|
+
for dep in COMMON_DEPENDENCIES:
|
|
670
|
+
if normalized == _normalize_dependency_name(dep):
|
|
671
|
+
deps.add(dep.lower())
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def dependency_inventory(manifest: dict, packet: Path) -> set[str]:
|
|
675
|
+
deps: set[str] = set()
|
|
676
|
+
contents = _packet_file_contents(packet)
|
|
677
|
+
for rec in manifest.get("included_files", []):
|
|
678
|
+
rel = rec.get("relative_path", "")
|
|
679
|
+
content = contents.get(rel, "")
|
|
680
|
+
name = Path(rel).name.lower()
|
|
681
|
+
suffix = Path(rel).suffix.lower()
|
|
682
|
+
if name == "pyproject.toml":
|
|
683
|
+
for dep in _python_dependency_names_from_pyproject(content):
|
|
684
|
+
_add_common_dependency(deps, dep)
|
|
685
|
+
elif name.startswith("requirements") and name.endswith(".txt"):
|
|
686
|
+
for dep in _python_dependency_names_from_requirement_lines(content):
|
|
687
|
+
_add_common_dependency(deps, dep)
|
|
688
|
+
elif name == "package.json":
|
|
689
|
+
try:
|
|
690
|
+
package = json.loads(content)
|
|
691
|
+
except json.JSONDecodeError:
|
|
692
|
+
package = {}
|
|
693
|
+
for section in ("dependencies", "devDependencies", "peerDependencies", "optionalDependencies"):
|
|
694
|
+
section_deps = package.get(section)
|
|
695
|
+
if isinstance(section_deps, dict):
|
|
696
|
+
for dep_name in section_deps:
|
|
697
|
+
_add_common_dependency(deps, dep_name)
|
|
698
|
+
elif suffix == ".py":
|
|
699
|
+
for imported in re.findall(r"(?m)^\s*(?:import|from)\s+([A-Za-z_][A-Za-z0-9_]*)", content):
|
|
700
|
+
_add_common_dependency(deps, imported)
|
|
701
|
+
elif suffix in {".js", ".jsx", ".ts", ".tsx"}:
|
|
702
|
+
for imported in re.findall(r"""(?:from\s+["']|import\s*\(\s*["']|require\s*\(\s*["'])(@?[A-Za-z0-9_.-]+)""", content):
|
|
703
|
+
_add_common_dependency(deps, _js_package_root(imported))
|
|
704
|
+
return deps
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def _has_import(content: str, *modules: str) -> bool:
|
|
708
|
+
module_pattern = "|".join(re.escape(module) for module in modules)
|
|
709
|
+
return bool(re.search(rf"(?m)^\s*(?:import|from)\s+({module_pattern})(?:\b|[._])", content))
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
PDF_DEPENDENCIES = {"pypdf", "pdfplumber", "fitz", "pymupdf"}
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
def _declares_pdf_dependency(rel: str, content: str) -> bool:
|
|
716
|
+
name = Path(rel).name.lower()
|
|
717
|
+
if name == "pyproject.toml":
|
|
718
|
+
return any(dep in PDF_DEPENDENCIES for dep in _python_dependency_names_from_pyproject(content))
|
|
719
|
+
if name.startswith("requirements") and name.endswith(".txt"):
|
|
720
|
+
return any(dep in PDF_DEPENDENCIES for dep in _python_dependency_names_from_requirement_lines(content))
|
|
721
|
+
return False
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
def feature_inventory(manifest: dict, packet: Path, deps: set[str] | None = None) -> set[str]:
|
|
725
|
+
if deps is None:
|
|
726
|
+
deps = dependency_inventory(manifest, packet)
|
|
727
|
+
contents = _packet_file_contents(packet)
|
|
728
|
+
files = {rec.get("relative_path", "").replace("\\", "/") for rec in manifest.get("included_files", [])}
|
|
729
|
+
lower_files = {rel.lower() for rel in files}
|
|
730
|
+
features: set[str] = set()
|
|
731
|
+
|
|
732
|
+
if any(Path(rel).name.lower() in {"dockerfile", "docker-compose.yml", "compose.yaml", "compose.yml"} for rel in files):
|
|
733
|
+
features.add("docker")
|
|
734
|
+
if any(rel.endswith(("/pdf_parser.py", "pdf_parser.py")) for rel in lower_files):
|
|
735
|
+
features.add("pdf")
|
|
736
|
+
if any(_declares_pdf_dependency(rel, content) for rel, content in contents.items()):
|
|
737
|
+
features.add("pdf")
|
|
738
|
+
if "react" in deps or any(rel in {"frontend/app.tsx", "frontend/app.jsx"} for rel in lower_files):
|
|
739
|
+
features.add("react")
|
|
740
|
+
if deps & {"fastapi", "flask", "django"} or any(Path(rel).name.lower() in {"server.py", "app.py"} for rel in files):
|
|
741
|
+
features.add("web server")
|
|
742
|
+
if deps & {"sqlalchemy", "prisma"} or any("/migrations/" in f"/{rel}/" or Path(rel).name.lower() in {"schema.prisma", "schema.sql"} for rel in files):
|
|
743
|
+
features.add("database")
|
|
744
|
+
if any(part == "auth" or part.startswith("auth_") for rel in lower_files for part in Path(rel).parts):
|
|
745
|
+
features.add("authentication")
|
|
746
|
+
|
|
747
|
+
for rel, content in contents.items():
|
|
748
|
+
suffix = Path(rel).suffix.lower()
|
|
749
|
+
if suffix == ".py":
|
|
750
|
+
if _has_import(content, "pypdf", "pdfplumber", "fitz"):
|
|
751
|
+
features.add("pdf")
|
|
752
|
+
if _has_import(content, "fastapi", "flask", "django") or re.search(r"(?m)^\s*@\w+\.(?:route|get|post|put|patch|delete)\(", content):
|
|
753
|
+
features.add("web server")
|
|
754
|
+
if _has_import(content, "sqlalchemy", "prisma") or re.search(r"(?i)\b(sqlite|postgres(?:ql)?|mysql)://", content):
|
|
755
|
+
features.add("database")
|
|
756
|
+
if _has_import(content, "jwt", "oauthlib", "authlib") or re.search(r"(?i)@\w+\.(?:route|get|post)\([^)]*login", content):
|
|
757
|
+
features.add("authentication")
|
|
758
|
+
if _has_import(content, "pytesseract", "easyocr"):
|
|
759
|
+
features.add("ocr")
|
|
760
|
+
elif suffix in {".js", ".jsx", ".ts", ".tsx"}:
|
|
761
|
+
if re.search(r"""(?:from\s+["']react["']|require\s*\(\s*["']react["']|import\s+React\b)""", content):
|
|
762
|
+
features.add("react")
|
|
763
|
+
if re.search(r"(?i)\b(jwt|oauth|session|login)\b", content):
|
|
764
|
+
features.add("authentication")
|
|
765
|
+
elif Path(rel).name.lower() == "package.json":
|
|
766
|
+
if re.search(r'"react"\s*:', content):
|
|
767
|
+
features.add("react")
|
|
768
|
+
return features
|
|
769
|
+
|
|
770
|
+
|
|
771
|
+
PROTECTED_PACKET_ARTIFACTS = {"manifest.json", "receipt.json", "reality_map.json", "ai_instructions.md"}
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
def _normalize_inventory_path(value: object) -> str | None:
|
|
775
|
+
if not isinstance(value, str):
|
|
776
|
+
return None
|
|
777
|
+
rel, unsafe = _normalize_diff_path(value)
|
|
778
|
+
if unsafe or not rel:
|
|
779
|
+
return None
|
|
780
|
+
return rel
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def _baseline_inventory_from_packet(packet: str | Path, manifest: dict | None = None) -> tuple[set[str], bool]:
|
|
784
|
+
"""Return authoritative enforcement baseline paths when a packet has them.
|
|
785
|
+
|
|
786
|
+
Prompt context manifests may be selective, so diff enforcement must prefer the
|
|
787
|
+
baseline file inventory artifact when it exists. The boolean is True only
|
|
788
|
+
when a full inventory artifact was loaded successfully.
|
|
789
|
+
"""
|
|
790
|
+
packet = Path(packet)
|
|
791
|
+
for name in ("file_inventory.json", "inventory.json", "baseline_inventory.json"):
|
|
792
|
+
path = packet / name
|
|
793
|
+
if not path.exists():
|
|
794
|
+
continue
|
|
795
|
+
try:
|
|
796
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
797
|
+
except (OSError, json.JSONDecodeError):
|
|
798
|
+
continue
|
|
799
|
+
raw_files = data.get("files") if isinstance(data, dict) else data
|
|
800
|
+
if not isinstance(raw_files, list):
|
|
801
|
+
continue
|
|
802
|
+
files: set[str] = set()
|
|
803
|
+
for item in raw_files:
|
|
804
|
+
raw_path = item.get("relative_path") if isinstance(item, dict) else item
|
|
805
|
+
rel = _normalize_inventory_path(raw_path)
|
|
806
|
+
if rel:
|
|
807
|
+
files.add(rel)
|
|
808
|
+
return files, True
|
|
809
|
+
return _included_paths(manifest or load_manifest(packet)), False
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
def known_files(manifest: dict, packet_path: str | Path | None = None) -> set[str]:
|
|
813
|
+
if packet_path is not None:
|
|
814
|
+
files, _ = _baseline_inventory_from_packet(packet_path, manifest)
|
|
815
|
+
return files
|
|
816
|
+
return _included_paths(manifest)
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
def supported_commands_inventory(reality_map: dict) -> set[str]:
|
|
820
|
+
return set(reality_map.get("supported_commands", []))
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def docker_evidence(files: set[str]) -> dict[str, bool]:
|
|
824
|
+
names = {Path(f).name.lower() for f in files}
|
|
825
|
+
return {
|
|
826
|
+
"dockerfile": "dockerfile" in names,
|
|
827
|
+
"compose": bool(names & {"docker-compose.yml", "compose.yaml", "compose.yml"}),
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
def python_project_evidence(files: set[str], deps: set[str]) -> dict[str, bool]:
|
|
832
|
+
lower = {f.lower() for f in files}
|
|
833
|
+
return {
|
|
834
|
+
"python_project": "pyproject.toml" in lower or any(Path(f).name.lower().startswith("requirements") and f.endswith(".txt") for f in lower),
|
|
835
|
+
"tests": any(f == "tests" or f.startswith("tests/") for f in lower),
|
|
836
|
+
"pytest": "pytest" in deps,
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
def node_project_evidence(files: set[str], scripts: dict[str, str]) -> dict[str, bool]:
|
|
841
|
+
return {"package_json": "package.json" in {f.lower() for f in files}, "scripts": bool(scripts)}
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
def extract_js_import_specifiers_from_text(text: str) -> set[str]:
|
|
845
|
+
specifiers: set[str] = set()
|
|
846
|
+
patterns = [
|
|
847
|
+
r"""\bimport\s+(?:[^"'()]+?\s+from\s+)?["']([^"']+)["']""",
|
|
848
|
+
r"""\bexport\s+[^"']*?\s+from\s+["']([^"']+)["']""",
|
|
849
|
+
r"""\bimport\s*\(\s*["']([^"']+)["']\s*\)""",
|
|
850
|
+
r"""\brequire\s*\(\s*["']([^"']+)["']\s*\)""",
|
|
851
|
+
]
|
|
852
|
+
for pattern in patterns:
|
|
853
|
+
specifiers.update(m.strip() for m in re.findall(pattern, text) if m.strip())
|
|
854
|
+
return {s.lower() for s in specifiers}
|
|
855
|
+
|
|
856
|
+
|
|
857
|
+
def extract_imports_from_text(text: str, suffix: str = ".py") -> set[str]:
|
|
858
|
+
imports: set[str] = set()
|
|
859
|
+
if suffix == ".py":
|
|
860
|
+
imports |= set(re.findall(r"(?m)^\s*(?:import|from)\s+([A-Za-z_][A-Za-z0-9_]*)", text))
|
|
861
|
+
elif suffix in JS_EXTS:
|
|
862
|
+
imports |= extract_js_import_specifiers_from_text(text)
|
|
863
|
+
return {i.lower() for i in imports}
|
|
864
|
+
|
|
865
|
+
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
|
|
869
|
+
|
|
870
|
+
|
|
871
|
+
def _materialize_packet_worktree(packet: Path, overlay: dict[str, str] | None = None) -> tempfile.TemporaryDirectory[str]:
|
|
872
|
+
tmp = tempfile.TemporaryDirectory(prefix="sourcepack-resolver-")
|
|
873
|
+
root = Path(tmp.name)
|
|
874
|
+
contents = _packet_file_contents(packet)
|
|
875
|
+
if overlay:
|
|
876
|
+
contents.update(overlay)
|
|
877
|
+
for rel, content in contents.items():
|
|
878
|
+
normalized, unsafe = _normalize_diff_path(rel)
|
|
879
|
+
if unsafe or not normalized:
|
|
880
|
+
continue
|
|
881
|
+
target = root / normalized
|
|
882
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
883
|
+
target.write_text(content, encoding="utf-8")
|
|
884
|
+
return tmp
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
def _dependency_additions_from_patch(changes: list[PatchFileChange]) -> set[str]:
|
|
888
|
+
return set()
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
def analyze_patch(packet_path: str | Path, patch_text: str, changes: list[PatchFileChange] | None = None) -> dict:
|
|
892
|
+
packet = Path(packet_path)
|
|
893
|
+
manifest = load_manifest(packet)
|
|
894
|
+
reality = json.loads((packet / "reality_map.json").read_text(encoding="utf-8")) if (packet / "reality_map.json").exists() else generate_reality_map(manifest, packet)
|
|
895
|
+
files, baseline_inventory_loaded = _baseline_inventory_from_packet(packet, manifest)
|
|
896
|
+
deps = dependency_inventory(manifest, packet)
|
|
897
|
+
scripts = _package_json_scripts(packet)
|
|
898
|
+
if changes is None:
|
|
899
|
+
changes = parse_unified_diff(patch_text)
|
|
900
|
+
patch_deps = _dependency_additions_from_patch(changes)
|
|
901
|
+
report = {
|
|
902
|
+
"patch_judgment_schema_version": "1.0",
|
|
903
|
+
"verdict": "PASS",
|
|
904
|
+
"modified_files": [], "missing_modified_files": [], "new_files": [], "deleted_files": [],
|
|
905
|
+
"unsupported_dependencies": [], "unsupported_commands": [], "protected_artifact_modifications": [], "git_path_modifications": [], "warnings": [],
|
|
906
|
+
}
|
|
907
|
+
if any(ch.unsafe_path for ch in changes):
|
|
908
|
+
report["path_escape"] = True
|
|
909
|
+
all_added = []
|
|
910
|
+
for ch in changes:
|
|
911
|
+
report["modified_files"].append(ch.path)
|
|
912
|
+
if ch.new_file:
|
|
913
|
+
report["new_files"].append(ch.path)
|
|
914
|
+
elif ch.operation in {"rename", "copy"}:
|
|
915
|
+
pass
|
|
916
|
+
elif ch.path not in files:
|
|
917
|
+
if baseline_inventory_loaded or ch.path in _included_paths(manifest):
|
|
918
|
+
report["missing_modified_files"].append(ch.path)
|
|
919
|
+
else:
|
|
920
|
+
report.setdefault("uncertain_modified_files", []).append(ch.path)
|
|
921
|
+
if ch.deleted_file:
|
|
922
|
+
report["deleted_files"].append(ch.path)
|
|
923
|
+
protected = ch.path.startswith(".sourcepack/")
|
|
924
|
+
git_internal = ch.path == ".git" or ch.path.startswith(".git/")
|
|
925
|
+
workflow = ch.path.startswith(".github/workflows/")
|
|
926
|
+
if protected:
|
|
927
|
+
report["protected_artifact_modifications"].append(ch.path)
|
|
928
|
+
if git_internal:
|
|
929
|
+
report.setdefault("git_path_modifications", []).append(ch.path)
|
|
930
|
+
if workflow:
|
|
931
|
+
report.setdefault("uncertainties", []).append({"id": "workflow_change", "message": f"{ch.path} changes repository automation and requires review", "path": ch.path, "evidence": ch.path})
|
|
932
|
+
if ch.operation in {"rename", "copy"}:
|
|
933
|
+
report.setdefault("uncertainties", []).append({"id": "unsupported_rename_copy", "message": f"{ch.operation} semantics for {ch.path} require review", "path": ch.path, "evidence": ch.old_path or ch.path})
|
|
934
|
+
added = "\n".join(ch.added_lines or [])
|
|
935
|
+
all_added.append(added)
|
|
936
|
+
for imported in extract_imports_from_text(added, Path(ch.path).suffix.lower()):
|
|
937
|
+
for dep in COMMON_DEPENDENCIES:
|
|
938
|
+
if _normalize_dependency_name(imported) == _normalize_dependency_name(dep) and dep not in deps and dep not in patch_deps:
|
|
939
|
+
report["unsupported_dependencies"].append(dep)
|
|
940
|
+
added_text = "\n".join(all_added)
|
|
941
|
+
supported = supported_commands_inventory(reality)
|
|
942
|
+
added_paths = {ch.path for ch in changes}
|
|
943
|
+
compose_added = any(Path(path).name.lower() in {"docker-compose.yml", "compose.yaml", "compose.yml"} for path in added_paths)
|
|
944
|
+
if re.search(r"docker\s+compose\s+up", added_text, re.I):
|
|
945
|
+
evidence = docker_evidence(files)
|
|
946
|
+
if compose_added:
|
|
947
|
+
report["warnings"].append("Patch adds Docker Compose support used by commands; review the new support.")
|
|
948
|
+
report.setdefault("declared_commands", []).append("docker compose up")
|
|
949
|
+
elif not evidence["compose"]:
|
|
950
|
+
report["unsupported_commands"].append("docker compose up")
|
|
951
|
+
patch_scripts = set()
|
|
952
|
+
command_uncertainties = []
|
|
953
|
+
for ch in changes:
|
|
954
|
+
if Path(ch.path).name.lower() != "package.json":
|
|
955
|
+
continue
|
|
956
|
+
base = _packet_file_contents(packet).get(ch.old_path or ch.path, "")
|
|
957
|
+
post = _apply_patch_change_to_text(base, ch)
|
|
958
|
+
if post is None:
|
|
959
|
+
command_uncertainties.append({"id": "command_manifest_uncertain", "message": f"Could not reconstruct {ch.path} safely", "path": ch.path})
|
|
960
|
+
continue
|
|
961
|
+
try:
|
|
962
|
+
package = json.loads(post)
|
|
963
|
+
except json.JSONDecodeError:
|
|
964
|
+
command_uncertainties.append({"id": "command_manifest_uncertain", "message": f"Could not parse {ch.path} as JSON", "path": ch.path})
|
|
965
|
+
continue
|
|
966
|
+
package_scripts = package.get("scripts")
|
|
967
|
+
if isinstance(package_scripts, dict):
|
|
968
|
+
patch_scripts.update(str(script) for script in package_scripts if isinstance(script, str) and script not in scripts)
|
|
969
|
+
if command_uncertainties:
|
|
970
|
+
report.setdefault("uncertainties", []).extend(command_uncertainties)
|
|
971
|
+
for cmd in sorted(set(re.findall(r"npm\s+(?:run\s+)?[A-Za-z0-9:_-]+", added_text))):
|
|
972
|
+
normalized = cmd if cmd == "npm test" else cmd
|
|
973
|
+
if normalized.startswith("npm run "):
|
|
974
|
+
script = normalized.removeprefix("npm run ").strip()
|
|
975
|
+
if script in patch_scripts:
|
|
976
|
+
report["warnings"].append(f"Patch adds npm script {script} used by commands; review the new support.")
|
|
977
|
+
report.setdefault("declared_commands", []).append(normalized)
|
|
978
|
+
elif script not in scripts:
|
|
979
|
+
report["unsupported_commands"].append(normalized)
|
|
980
|
+
elif normalized == "npm test" and "test" not in scripts:
|
|
981
|
+
report["unsupported_commands"].append(normalized)
|
|
982
|
+
if re.search(r"\b(pytest|python\s+-m\s+pytest)\b", added_text, re.I):
|
|
983
|
+
py = python_project_evidence(files, deps)
|
|
984
|
+
if not (py["pytest"] or py["tests"] or "pytest" in supported):
|
|
985
|
+
report["unsupported_commands"].append("pytest")
|
|
986
|
+
packet_contents = _packet_file_contents(packet)
|
|
987
|
+
make_text = packet_contents.get("Makefile") or packet_contents.get("makefile") or ""
|
|
988
|
+
make_targets = {m.group(1) for m in re.finditer(r"^([A-Za-z0-9_.:-]+)\s*:", make_text, re.M)}
|
|
989
|
+
for cmd in sorted(set(re.findall(r"\bmake\s+[A-Za-z0-9_.:-]+", added_text))):
|
|
990
|
+
target = cmd.split(None, 1)[1]
|
|
991
|
+
if target not in make_targets:
|
|
992
|
+
report["unsupported_commands"].append(cmd)
|
|
993
|
+
if not baseline_inventory_loaded:
|
|
994
|
+
outside_context = sorted({
|
|
995
|
+
ch.path for ch in changes
|
|
996
|
+
if not ch.new_file
|
|
997
|
+
and not ch.deleted_file
|
|
998
|
+
and ch.path not in _included_paths(manifest)
|
|
999
|
+
})
|
|
1000
|
+
if outside_context:
|
|
1001
|
+
report.setdefault("uncertainties", []).append({"id": "baseline_inventory_missing", "message": "Baseline packet lacks full file inventory; modified files outside prompt context could not be checked against tracked repo inventory.", "evidence": ", ".join(outside_context)})
|
|
1002
|
+
if report["new_files"]:
|
|
1003
|
+
report["warnings"].append("Patch creates new files that were not part of the original packet reality.")
|
|
1004
|
+
fail_keys = ["missing_modified_files", "unsupported_dependencies", "unsupported_commands", "protected_artifact_modifications", "git_path_modifications", "path_escape"]
|
|
1005
|
+
if any(report.get(k) for k in fail_keys):
|
|
1006
|
+
report["verdict"] = "FAIL"
|
|
1007
|
+
elif report["new_files"] or report["warnings"] or report.get("uncertainties"):
|
|
1008
|
+
report["verdict"] = "WARN"
|
|
1009
|
+
for key in ["modified_files", "missing_modified_files", "new_files", "deleted_files", "unsupported_dependencies", "unsupported_commands", "protected_artifact_modifications", "git_path_modifications", "warnings"]:
|
|
1010
|
+
report[key] = sorted(set(report[key]))
|
|
1011
|
+
return report
|
|
1012
|
+
|
|
1013
|
+
|
|
1014
|
+
|
|
1015
|
+
def _has_negation_before(text: str, start: int) -> bool:
|
|
1016
|
+
window = text[max(0, start - 48):start].lower()
|
|
1017
|
+
return bool(re.search(r"\b(do not|don't|avoid|not|no|without|unless|until|does not|is no|will not)\b", window))
|
|
1018
|
+
|
|
1019
|
+
|
|
1020
|
+
def _ai_dependency_actions(text: str, dep: str) -> bool:
|
|
1021
|
+
dep_pat = re.escape(dep)
|
|
1022
|
+
aliases = [dep_pat]
|
|
1023
|
+
for imported, package in PY_IMPORT_ALIASES.items():
|
|
1024
|
+
if package == _normalize_dependency_name(dep):
|
|
1025
|
+
aliases.append(re.escape(imported))
|
|
1026
|
+
alias_pat = "(?:" + "|".join(sorted(set(aliases), key=len, reverse=True)) + ")"
|
|
1027
|
+
patterns = [
|
|
1028
|
+
rf"\bimport\s+{alias_pat}\b",
|
|
1029
|
+
rf"\bfrom\s+{alias_pat}\s+import\b",
|
|
1030
|
+
rf"\b(?:pip install|python\s+-m\s+pip\s+install|poetry add|uv add|pdm add|add|use|install|import)\s+{dep_pat}\b",
|
|
1031
|
+
]
|
|
1032
|
+
for pattern in patterns:
|
|
1033
|
+
for m in re.finditer(pattern, text, re.I):
|
|
1034
|
+
if not _has_negation_before(text, m.start()):
|
|
1035
|
+
return True
|
|
1036
|
+
return False
|
|
1037
|
+
|
|
1038
|
+
|
|
1039
|
+
def _ai_js_dependency_actions(text: str, dep: str) -> bool:
|
|
1040
|
+
dep_pat = re.escape(dep)
|
|
1041
|
+
patterns = [
|
|
1042
|
+
rf"\bimport\s+[^\n;]*?from\s+[`'\"]{dep_pat}(?:/[^`'\"]*)?[`'\"]",
|
|
1043
|
+
rf"\brequire\s*\(\s*[`'\"]{dep_pat}(?:/[^`'\"]*)?[`'\"]\s*\)",
|
|
1044
|
+
rf"\b(?:npm install|npm i|pnpm add|yarn add|add|use|install|import)\s+{dep_pat}\b",
|
|
1045
|
+
]
|
|
1046
|
+
for pattern in patterns:
|
|
1047
|
+
for m in re.finditer(pattern, text, re.I):
|
|
1048
|
+
if not _has_negation_before(text, m.start()):
|
|
1049
|
+
return True
|
|
1050
|
+
return False
|
|
1051
|
+
|
|
1052
|
+
|
|
1053
|
+
def _ai_command_instructions(text: str, command_pattern: str) -> list[str]:
|
|
1054
|
+
found = []
|
|
1055
|
+
for m in re.finditer(command_pattern, text, re.I):
|
|
1056
|
+
before = text[max(0, m.start() - 32):m.start()].lower()
|
|
1057
|
+
line_start = text.rfind("\n", 0, m.start()) + 1
|
|
1058
|
+
line_prefix = text[line_start:m.start()].strip().lower()
|
|
1059
|
+
backticked = m.start() > 0 and m.end() < len(text) and text[m.start() - 1] == "`" and text[m.end()] == "`"
|
|
1060
|
+
instruction = bool(re.search(r"\b(run|then|execute|use|uses|start with)\s+$", before)) or line_prefix in {"-", "*", "1.", "2.", "3."} or backticked
|
|
1061
|
+
if instruction and not _has_negation_before(text, m.start()):
|
|
1062
|
+
found.append(re.sub(r"\s+", " ", m.group(0).strip()).lower())
|
|
1063
|
+
return found
|
|
1064
|
+
|
|
1065
|
+
|
|
1066
|
+
|
|
1067
|
+
|
|
1068
|
+
LIGHT_BY_VERDICT = {"PASS": "GREEN LIGHT", "WARN": "YELLOW LIGHT", "FAIL": "RED LIGHT"}
|
|
1069
|
+
SEVERITY_ORDER = {"error": 0, "warn": 1, "info": 2}
|
|
1070
|
+
PY_STDLIB = set(getattr(sys, "stdlib_module_names", set())) | {"typing", "pathlib", "json", "os", "sys", "re", "subprocess", "datetime", "unittest"}
|
|
1071
|
+
PY_DEP_FILES = {"requirements.txt", "pyproject.toml", "setup.py", "setup.cfg"}
|
|
1072
|
+
JS_EXTS = {".js", ".jsx", ".ts", ".tsx"}
|
|
1073
|
+
|
|
1074
|
+
|
|
1075
|
+
|
|
1076
|
+
def _latest_report_html_path(repo: str | Path) -> Path:
|
|
1077
|
+
return ensure_sourcepack_dirs(repo)["latest_html"]
|
|
1078
|
+
|
|
1079
|
+
|
|
1080
|
+
|
|
1081
|
+
|
|
1082
|
+
def finalize_diff_report(repo: str | Path | None, report: dict, args, stem: str = "diff") -> dict:
|
|
1083
|
+
full = dict(report)
|
|
1084
|
+
if getattr(args, "ci", False):
|
|
1085
|
+
full["ci"] = True
|
|
1086
|
+
if repo is not None:
|
|
1087
|
+
try:
|
|
1088
|
+
write_user_report(repo, full, stem)
|
|
1089
|
+
except Exception:
|
|
1090
|
+
full.setdefault("warnings", []).append("report_artifact_write_failed")
|
|
1091
|
+
return full
|
|
1092
|
+
|
|
1093
|
+
|
|
1094
|
+
def git_metadata(repo: str | Path) -> dict:
|
|
1095
|
+
root = Path(repo)
|
|
1096
|
+
head = run_git(root, ["rev-parse", "HEAD"])
|
|
1097
|
+
branch = run_git(root, ["rev-parse", "--abbrev-ref", "HEAD"])
|
|
1098
|
+
dirty, dirty_state = git_worktree_dirty(root)
|
|
1099
|
+
return {
|
|
1100
|
+
"branch": branch.stdout.strip() if branch.returncode == 0 else None,
|
|
1101
|
+
"head_commit": head.stdout.strip() if head.returncode == 0 else None,
|
|
1102
|
+
"dirty": dirty if dirty_state is None else None,
|
|
1103
|
+
"dirty_state": dirty_state,
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
def scanner_config_hash() -> str:
|
|
1108
|
+
payload = {
|
|
1109
|
+
"ignored_dirs": sorted(DEFAULT_IGNORED_DIRS),
|
|
1110
|
+
"ignored_patterns": sorted(DEFAULT_IGNORED_PATTERNS),
|
|
1111
|
+
"text_extensions": sorted(DEFAULT_TEXT_EXTENSIONS),
|
|
1112
|
+
"max_file_size": 1_000_000,
|
|
1113
|
+
"include_hidden": False,
|
|
1114
|
+
"redact": True,
|
|
1115
|
+
}
|
|
1116
|
+
return sha256_text(json.dumps(payload, sort_keys=True))
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
|
|
1120
|
+
def build_prompt_context(repo: str | Path) -> dict:
|
|
1121
|
+
paths = ensure_sourcepack_dirs(repo)
|
|
1122
|
+
PacketWriter(paths["prompt_packet"], SourceScanner(repo).scan(), force=True).write_all()
|
|
1123
|
+
shutil.copy2(paths["prompt_packet"] / "reality_map.json", paths["prompt_reality"])
|
|
1124
|
+
shutil.copy2(paths["prompt_packet"] / "ai_instructions.md", paths["prompt_instructions"])
|
|
1125
|
+
return paths
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
def render_prompt(task: str, instructions: str, reality: dict) -> str:
|
|
1129
|
+
def bullets(items):
|
|
1130
|
+
return "\n".join(f"- {item}" for item in items) if items else "- None detected"
|
|
1131
|
+
return "\n".join(["# SourcePack Verified AI Prompt", "", "## User Task", "", task, "", "## AI Grounding Instructions", "", instructions.rstrip(), "", "## Compact Reality Map Summary", "", f"Project types: {', '.join(reality.get('project_types') or ['unknown'])}", f"Included files: {reality.get('included_file_count', 0)}", "", "## Supported Commands", "", bullets(reality.get('supported_commands', [])), "", "## Detected Dependencies", "", bullets(reality.get('detected_dependencies', [])), "", "## Supported Capabilities", "", bullets(reality.get('supported_capabilities', [])), "", "## Unknown and Unsupported Boundaries", "", bullets(reality.get('claim_boundaries', [])), "", "Cite exact file paths for project-specific claims.", "Do not invent files, dependencies, commands, services, or capabilities.", "Absence of evidence means unknown, not impossible.", ""])
|
|
1132
|
+
|
|
1133
|
+
|
|
1134
|
+
def copy_to_clipboard(text: str) -> bool:
|
|
1135
|
+
system = platform.system().lower()
|
|
1136
|
+
cmds = [["pbcopy"]] if system == "darwin" else [["clip"]] if system == "windows" else [["wl-copy"], ["xclip", "-selection", "clipboard"], ["xsel", "--clipboard", "--input"]]
|
|
1137
|
+
for cmd in cmds:
|
|
1138
|
+
if shutil.which(cmd[0]) is None:
|
|
1139
|
+
continue
|
|
1140
|
+
try:
|
|
1141
|
+
if subprocess.run(cmd, input=text, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=5).returncode == 0:
|
|
1142
|
+
return True
|
|
1143
|
+
except Exception:
|
|
1144
|
+
pass
|
|
1145
|
+
return False
|
|
1146
|
+
|
|
1147
|
+
|
|
1148
|
+
def _is_local_python_import(name: str, path: str, files: set[str]) -> bool:
|
|
1149
|
+
candidates = {f"{name}.py", f"{name}/__init__.py", f"src/{name}.py", f"src/{name}/__init__.py"}
|
|
1150
|
+
parent = str(Path(path).parent).replace("\\", "/")
|
|
1151
|
+
if parent != ".":
|
|
1152
|
+
candidates |= {f"{parent}/{name}.py", f"{parent}/{name}/__init__.py"}
|
|
1153
|
+
return bool(candidates & files)
|
|
1154
|
+
|
|
1155
|
+
|
|
1156
|
+
JS_DEP_SECTIONS = {"dependencies", "devDependencies", "peerDependencies", "optionalDependencies"}
|
|
1157
|
+
|
|
1158
|
+
|
|
1159
|
+
def _package_json_declared_deps_from_added_lines(lines: list[str]) -> set[str]:
|
|
1160
|
+
added = "\n".join(lines)
|
|
1161
|
+
try:
|
|
1162
|
+
package = json.loads(added)
|
|
1163
|
+
except json.JSONDecodeError:
|
|
1164
|
+
package = None
|
|
1165
|
+
deps: set[str] = set()
|
|
1166
|
+
if isinstance(package, dict):
|
|
1167
|
+
for section in JS_DEP_SECTIONS:
|
|
1168
|
+
section_deps = package.get(section)
|
|
1169
|
+
if isinstance(section_deps, dict):
|
|
1170
|
+
deps.update(dep.lower() for dep in section_deps)
|
|
1171
|
+
if deps:
|
|
1172
|
+
return deps
|
|
1173
|
+
for section in JS_DEP_SECTIONS:
|
|
1174
|
+
for body in re.findall(rf'"{section}"\s*:\s*\{{(.*?)\}}', added, re.I | re.S):
|
|
1175
|
+
deps.update(m.lower() for m in re.findall(r'"(@?[A-Za-z0-9_.-]+(?:/[A-Za-z0-9_.-]+)?)"\s*:', body))
|
|
1176
|
+
return deps
|
|
1177
|
+
|
|
1178
|
+
|
|
1179
|
+
def _apply_patch_change_to_text(original: str, change: PatchFileChange) -> str | None:
|
|
1180
|
+
if change.deleted_file:
|
|
1181
|
+
return ""
|
|
1182
|
+
result = original.splitlines()
|
|
1183
|
+
if result and result[0] == "":
|
|
1184
|
+
result = result[1:]
|
|
1185
|
+
out: list[str] = []
|
|
1186
|
+
idx = 0
|
|
1187
|
+
saw_hunk = False
|
|
1188
|
+
for line in change.diff_lines or []:
|
|
1189
|
+
if line.startswith("@@"):
|
|
1190
|
+
m = re.match(r"@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@", line)
|
|
1191
|
+
if not m:
|
|
1192
|
+
return None
|
|
1193
|
+
old_start = max(int(m.group(1)) - 1, 0)
|
|
1194
|
+
if old_start < idx or old_start > len(result):
|
|
1195
|
+
return None
|
|
1196
|
+
out.extend(result[idx:old_start])
|
|
1197
|
+
idx = old_start
|
|
1198
|
+
saw_hunk = True
|
|
1199
|
+
elif line.startswith(" "):
|
|
1200
|
+
body = line[1:]
|
|
1201
|
+
if idx >= len(result) or result[idx] != body:
|
|
1202
|
+
return None
|
|
1203
|
+
out.append(result[idx])
|
|
1204
|
+
idx += 1
|
|
1205
|
+
elif line.startswith("-"):
|
|
1206
|
+
body = line[1:]
|
|
1207
|
+
if idx >= len(result) or result[idx] != body:
|
|
1208
|
+
return None
|
|
1209
|
+
idx += 1
|
|
1210
|
+
elif line.startswith("+"):
|
|
1211
|
+
out.append(line[1:])
|
|
1212
|
+
if not saw_hunk and not change.new_file:
|
|
1213
|
+
return None
|
|
1214
|
+
out.extend(result[idx:])
|
|
1215
|
+
return "\n".join(out) + ("\n" if original.endswith("\n") or change.new_file else "")
|
|
1216
|
+
|
|
1217
|
+
|
|
1218
|
+
def _python_dependency_names_by_scope_from_pyproject(content: str) -> dict[str, set[str]]:
|
|
1219
|
+
scopes = {"runtime": set(), "dev": set(), "optional": set()}
|
|
1220
|
+
try:
|
|
1221
|
+
data = tomllib.loads(content)
|
|
1222
|
+
except tomllib.TOMLDecodeError:
|
|
1223
|
+
return scopes
|
|
1224
|
+
|
|
1225
|
+
def add_req(target: set[str], req: object) -> None:
|
|
1226
|
+
if isinstance(req, str):
|
|
1227
|
+
name = re.split(r"[<>=!~;\[]", req.strip(), maxsplit=1)[0]
|
|
1228
|
+
if name:
|
|
1229
|
+
target.add(_normalize_dependency_name(name))
|
|
1230
|
+
|
|
1231
|
+
project = data.get("project", {})
|
|
1232
|
+
if isinstance(project, dict):
|
|
1233
|
+
for req in project.get("dependencies", []) if isinstance(project.get("dependencies"), list) else []:
|
|
1234
|
+
add_req(scopes["runtime"], req)
|
|
1235
|
+
optional = project.get("optional-dependencies", {})
|
|
1236
|
+
if isinstance(optional, dict):
|
|
1237
|
+
for group in optional.values():
|
|
1238
|
+
if isinstance(group, list):
|
|
1239
|
+
for req in group:
|
|
1240
|
+
add_req(scopes["optional"], req)
|
|
1241
|
+
tool = data.get("tool", {})
|
|
1242
|
+
if isinstance(tool, dict):
|
|
1243
|
+
poetry = tool.get("poetry", {})
|
|
1244
|
+
if isinstance(poetry, dict):
|
|
1245
|
+
section = poetry.get("dependencies", {})
|
|
1246
|
+
if isinstance(section, dict):
|
|
1247
|
+
for dep in section:
|
|
1248
|
+
if dep.lower() != "python":
|
|
1249
|
+
scopes["runtime"].add(_normalize_dependency_name(dep))
|
|
1250
|
+
for section_name in ("dev-dependencies",):
|
|
1251
|
+
section = poetry.get(section_name, {})
|
|
1252
|
+
if isinstance(section, dict):
|
|
1253
|
+
scopes["dev"].update(_normalize_dependency_name(dep) for dep in section)
|
|
1254
|
+
group = poetry.get("group", {})
|
|
1255
|
+
if isinstance(group, dict):
|
|
1256
|
+
for group_data in group.values():
|
|
1257
|
+
if isinstance(group_data, dict):
|
|
1258
|
+
section = group_data.get("dependencies", {})
|
|
1259
|
+
if isinstance(section, dict):
|
|
1260
|
+
scopes["dev"].update(_normalize_dependency_name(dep) for dep in section)
|
|
1261
|
+
for tool_name in ("pdm", "uv"):
|
|
1262
|
+
tool_data = tool.get(tool_name, {})
|
|
1263
|
+
if isinstance(tool_data, dict):
|
|
1264
|
+
for key in ("dev-dependencies", "dependency-groups"):
|
|
1265
|
+
groups = tool_data.get(key, {})
|
|
1266
|
+
if isinstance(groups, dict):
|
|
1267
|
+
for group in groups.values():
|
|
1268
|
+
if isinstance(group, list):
|
|
1269
|
+
for req in group:
|
|
1270
|
+
add_req(scopes["dev"], req)
|
|
1271
|
+
dependency_groups = data.get("dependency-groups", {})
|
|
1272
|
+
if isinstance(dependency_groups, dict):
|
|
1273
|
+
for group in dependency_groups.values():
|
|
1274
|
+
if isinstance(group, list):
|
|
1275
|
+
for req in group:
|
|
1276
|
+
add_req(scopes["dev"], req)
|
|
1277
|
+
return scopes
|
|
1278
|
+
|
|
1279
|
+
|
|
1280
|
+
def _declared_dependency_scopes_by_ecosystem(manifest: dict, packet: Path) -> dict[str, dict[str, set[str]]]:
|
|
1281
|
+
contents = _packet_file_contents(packet)
|
|
1282
|
+
scopes = {"python": {"runtime": set(), "dev": set(), "optional": set()}, "js": {"runtime": set(), "dev": set(), "optional": set()}}
|
|
1283
|
+
for rel, content in contents.items():
|
|
1284
|
+
name = Path(rel).name.lower()
|
|
1285
|
+
if name == "pyproject.toml":
|
|
1286
|
+
parsed = _python_dependency_names_by_scope_from_pyproject(content)
|
|
1287
|
+
for key, values in parsed.items():
|
|
1288
|
+
scopes["python"][key].update(values)
|
|
1289
|
+
elif name == "requirements.txt":
|
|
1290
|
+
scopes["python"]["runtime"].update(_python_dependency_names_from_requirement_lines(content))
|
|
1291
|
+
elif name.startswith("requirements") and name.endswith(".txt"):
|
|
1292
|
+
target = "dev" if any(x in name for x in ("dev", "test")) else "runtime"
|
|
1293
|
+
scopes["python"][target].update(_python_dependency_names_from_requirement_lines(content))
|
|
1294
|
+
elif name == "package.json":
|
|
1295
|
+
try:
|
|
1296
|
+
package = json.loads(content)
|
|
1297
|
+
except json.JSONDecodeError:
|
|
1298
|
+
package = {}
|
|
1299
|
+
section_map = {"dependencies": "runtime", "peerDependencies": "runtime", "optionalDependencies": "optional", "devDependencies": "dev"}
|
|
1300
|
+
for section, target in section_map.items():
|
|
1301
|
+
section_deps = package.get(section)
|
|
1302
|
+
if isinstance(section_deps, dict):
|
|
1303
|
+
scopes["js"][target].update(dep.lower() for dep in section_deps)
|
|
1304
|
+
return scopes
|
|
1305
|
+
|
|
1306
|
+
|
|
1307
|
+
def _is_test_path(path: str) -> bool:
|
|
1308
|
+
p = path.replace("\\", "/").lower()
|
|
1309
|
+
name = PurePosixPath(p).name
|
|
1310
|
+
return p.startswith(("tests/", "test/")) or "/__tests__/" in f"/{p}" or name.endswith("_test.py") or any(name.endswith(s) for s in (".test.js", ".test.ts", ".spec.js", ".spec.ts", ".test.jsx", ".test.tsx", ".spec.jsx", ".spec.tsx"))
|
|
1311
|
+
|
|
1312
|
+
|
|
1313
|
+
def _dependency_scope_status(dep: str, scopes: dict[str, set[str]], path: str) -> str:
|
|
1314
|
+
dep = _normalize_dependency_name(dep)
|
|
1315
|
+
if dep in scopes.get("runtime", set()):
|
|
1316
|
+
return "supported"
|
|
1317
|
+
if dep in scopes.get("dev", set()):
|
|
1318
|
+
return "supported" if _is_test_path(path) else "scope_review"
|
|
1319
|
+
if dep in scopes.get("optional", set()):
|
|
1320
|
+
return "scope_review"
|
|
1321
|
+
return "missing"
|
|
1322
|
+
|
|
1323
|
+
|
|
1324
|
+
def _declared_dependency_names_from_patch_by_ecosystem_structural(changes: list[PatchFileChange], contents: dict[str, str]) -> tuple[dict[str, set[str]], list[dict]]:
|
|
1325
|
+
deps = {"python": set(), "js": set()}
|
|
1326
|
+
uncertainties: list[dict] = []
|
|
1327
|
+
for ch in changes:
|
|
1328
|
+
name = Path(ch.path).name.lower()
|
|
1329
|
+
if name not in {"package.json", "pyproject.toml"} and not (name.startswith("requirements") and name.endswith(".txt")):
|
|
1330
|
+
continue
|
|
1331
|
+
base = contents.get(ch.old_path or ch.path, "")
|
|
1332
|
+
post = _apply_patch_change_to_text(base, ch)
|
|
1333
|
+
if post is None:
|
|
1334
|
+
uncertainties.append({"id": "dependency_manifest_uncertain", "message": f"Could not reconstruct {ch.path} safely", "path": ch.path})
|
|
1335
|
+
continue
|
|
1336
|
+
if name == "package.json":
|
|
1337
|
+
try:
|
|
1338
|
+
package = json.loads(post)
|
|
1339
|
+
except json.JSONDecodeError:
|
|
1340
|
+
uncertainties.append({"id": "dependency_manifest_uncertain", "message": f"Could not parse {ch.path} as JSON", "path": ch.path})
|
|
1341
|
+
continue
|
|
1342
|
+
for section in JS_DEP_SECTIONS:
|
|
1343
|
+
section_deps = package.get(section)
|
|
1344
|
+
if isinstance(section_deps, dict):
|
|
1345
|
+
deps["js"].update(dep.lower() for dep in section_deps)
|
|
1346
|
+
elif name == "pyproject.toml":
|
|
1347
|
+
parsed = _python_dependency_names_by_scope_from_pyproject(post)
|
|
1348
|
+
deps["python"].update(set().union(*parsed.values()))
|
|
1349
|
+
else:
|
|
1350
|
+
deps["python"].update(_python_dependency_names_from_requirement_lines(post))
|
|
1351
|
+
return deps, uncertainties
|
|
1352
|
+
|
|
1353
|
+
|
|
1354
|
+
def _declared_dependency_names_from_patch_by_ecosystem(changes: list[PatchFileChange]) -> dict[str, set[str]]:
|
|
1355
|
+
deps = {"python": set(), "js": set()}
|
|
1356
|
+
for ch in changes:
|
|
1357
|
+
added = "\n".join(ch.added_lines or [])
|
|
1358
|
+
name = Path(ch.path).name.lower()
|
|
1359
|
+
if name == "package.json":
|
|
1360
|
+
deps["js"].update(_package_json_declared_deps_from_added_lines(ch.added_lines or []))
|
|
1361
|
+
elif name == "pyproject.toml":
|
|
1362
|
+
deps["python"].update(_python_dependency_names_from_pyproject(added))
|
|
1363
|
+
elif name.startswith("requirements") and name.endswith(".txt"):
|
|
1364
|
+
deps["python"].update(_python_dependency_names_from_requirement_lines(added))
|
|
1365
|
+
return deps
|
|
1366
|
+
|
|
1367
|
+
|
|
1368
|
+
def _declared_dependency_names_from_patch(changes: list[PatchFileChange]) -> set[str]:
|
|
1369
|
+
scoped = _declared_dependency_names_from_patch_by_ecosystem(changes)
|
|
1370
|
+
return scoped["python"] | scoped["js"]
|
|
1371
|
+
|
|
1372
|
+
|
|
1373
|
+
def _declared_dependency_names_by_ecosystem(manifest: dict, packet: Path) -> dict[str, set[str]]:
|
|
1374
|
+
declared = {"python": set(), "js": set()}
|
|
1375
|
+
contents = _packet_file_contents(packet)
|
|
1376
|
+
for rec in manifest.get("included_files", []):
|
|
1377
|
+
rel = rec.get("relative_path", "")
|
|
1378
|
+
content = contents.get(rel, "")
|
|
1379
|
+
name = Path(rel).name.lower()
|
|
1380
|
+
if name == "pyproject.toml":
|
|
1381
|
+
declared["python"].update(_python_dependency_names_from_pyproject(content))
|
|
1382
|
+
elif name.startswith("requirements") and name.endswith(".txt"):
|
|
1383
|
+
declared["python"].update(_python_dependency_names_from_requirement_lines(content))
|
|
1384
|
+
elif name == "package.json":
|
|
1385
|
+
try:
|
|
1386
|
+
package = json.loads(content)
|
|
1387
|
+
except json.JSONDecodeError:
|
|
1388
|
+
package = {}
|
|
1389
|
+
for section in JS_DEP_SECTIONS:
|
|
1390
|
+
section_deps = package.get(section)
|
|
1391
|
+
if isinstance(section_deps, dict):
|
|
1392
|
+
declared["js"].update(dep.lower() for dep in section_deps)
|
|
1393
|
+
return declared
|
|
1394
|
+
|
|
1395
|
+
|
|
1396
|
+
def _declared_dependency_names(manifest: dict, packet: Path) -> set[str]:
|
|
1397
|
+
scoped = _declared_dependency_names_by_ecosystem(manifest, packet)
|
|
1398
|
+
return scoped["python"] | scoped["js"]
|
|
1399
|
+
|
|
1400
|
+
|
|
1401
|
+
def _workspace_package_names(packet: Path) -> set[str]:
|
|
1402
|
+
contents = _packet_file_contents(packet)
|
|
1403
|
+
root = {}
|
|
1404
|
+
try:
|
|
1405
|
+
root = json.loads(contents.get("package.json", "{}"))
|
|
1406
|
+
except json.JSONDecodeError:
|
|
1407
|
+
return set()
|
|
1408
|
+
workspaces = root.get("workspaces")
|
|
1409
|
+
patterns = workspaces if isinstance(workspaces, list) else workspaces.get("packages", []) if isinstance(workspaces, dict) else []
|
|
1410
|
+
names: set[str] = set()
|
|
1411
|
+
for pattern in patterns:
|
|
1412
|
+
if not isinstance(pattern, str) or not pattern.endswith("/*"):
|
|
1413
|
+
continue
|
|
1414
|
+
prefix = pattern[:-2].strip("/")
|
|
1415
|
+
for rel, content in contents.items():
|
|
1416
|
+
if Path(rel).name == "package.json" and rel.startswith(prefix + "/"):
|
|
1417
|
+
try:
|
|
1418
|
+
package = json.loads(content)
|
|
1419
|
+
except json.JSONDecodeError:
|
|
1420
|
+
continue
|
|
1421
|
+
name = package.get("name")
|
|
1422
|
+
if isinstance(name, str):
|
|
1423
|
+
names.add(name.lower())
|
|
1424
|
+
return names
|
|
1425
|
+
|
|
1426
|
+
|
|
1427
|
+
def _is_js_alias_specifier(imported: str) -> bool:
|
|
1428
|
+
return imported.startswith(("@/", "~/"))
|
|
1429
|
+
|
|
1430
|
+
|
|
1431
|
+
def _js_alias_local(imported: str, files: set[str], contents: dict[str, str]) -> bool | None:
|
|
1432
|
+
configs = []
|
|
1433
|
+
for cfg in ("tsconfig.json", "jsconfig.json"):
|
|
1434
|
+
if cfg in contents:
|
|
1435
|
+
try:
|
|
1436
|
+
configs.append(json.loads(contents[cfg]))
|
|
1437
|
+
except json.JSONDecodeError:
|
|
1438
|
+
return None
|
|
1439
|
+
for cfg in configs:
|
|
1440
|
+
opts = cfg.get("compilerOptions", {}) if isinstance(cfg, dict) else {}
|
|
1441
|
+
base = str(opts.get("baseUrl", ".")).strip("./")
|
|
1442
|
+
paths = opts.get("paths", {})
|
|
1443
|
+
candidates = []
|
|
1444
|
+
if isinstance(paths, dict):
|
|
1445
|
+
for alias, targets in paths.items():
|
|
1446
|
+
prefix = alias[:-1] if alias.endswith("*") else alias
|
|
1447
|
+
if imported.startswith(prefix):
|
|
1448
|
+
rest = imported[len(prefix):]
|
|
1449
|
+
for target in targets if isinstance(targets, list) else []:
|
|
1450
|
+
tprefix = target[:-1] if isinstance(target, str) and target.endswith("*") else target
|
|
1451
|
+
candidates.append((tprefix + rest).strip("/"))
|
|
1452
|
+
if base and not imported.startswith("@") and not imported.startswith("~"):
|
|
1453
|
+
candidates.append(f"{base}/{imported}".strip("/"))
|
|
1454
|
+
for c in candidates:
|
|
1455
|
+
variants = {c, f"{c}.ts", f"{c}.tsx", f"{c}.js", f"{c}.jsx", f"{c}/index.ts", f"{c}/index.tsx", f"{c}/index.js", f"{c}/index.jsx"}
|
|
1456
|
+
if variants & files:
|
|
1457
|
+
return True
|
|
1458
|
+
if candidates:
|
|
1459
|
+
return None
|
|
1460
|
+
return False
|
|
1461
|
+
|
|
1462
|
+
|
|
1463
|
+
def _is_high_risk_binary_path(rel: str) -> bool:
|
|
1464
|
+
normalized = rel.replace("\\", "/").lstrip("/")
|
|
1465
|
+
high_risk_prefixes = (".sourcepack/", ".git/", ".github/workflows/")
|
|
1466
|
+
high_risk_names = {"pyproject.toml", "package.json", "package-lock.json", "uv.lock", "poetry.lock"}
|
|
1467
|
+
return normalized.startswith(high_risk_prefixes) or Path(normalized).name in high_risk_names
|
|
1468
|
+
|
|
1469
|
+
|
|
1470
|
+
UNSUPPORTED_ECOSYSTEM_MARKERS = {
|
|
1471
|
+
"gemfile": ("Gemfile", "Ruby/Bundler dependency validation is not implemented"),
|
|
1472
|
+
"composer.json": ("composer.json", "PHP/Composer dependency validation is not implemented"),
|
|
1473
|
+
"main.tf": ("main.tf", "Terraform module/provider validation is not implemented"),
|
|
1474
|
+
"flake.nix": ("flake.nix", "Nix flake validation is not implemented"),
|
|
1475
|
+
"cargo.toml": ("Cargo.toml", "Rust dependency validation is not implemented"),
|
|
1476
|
+
"go.mod": ("go.mod", "Go module dependency validation is not implemented"),
|
|
1477
|
+
"pom.xml": ("pom.xml", "Maven dependency validation is not implemented"),
|
|
1478
|
+
"build.gradle": ("build.gradle", "Gradle dependency validation is not implemented"),
|
|
1479
|
+
"build.gradle.kts": ("build.gradle.kts", "Gradle dependency validation is not implemented"),
|
|
1480
|
+
"settings.gradle": ("settings.gradle", "Gradle workspace validation is not implemented"),
|
|
1481
|
+
"settings.gradle.kts": ("settings.gradle.kts", "Gradle workspace validation is not implemented"),
|
|
1482
|
+
"*.csproj": ("*.csproj", ".NET/NuGet dependency validation is not implemented"),
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1485
|
+
|
|
1486
|
+
def _unsupported_ecosystem_uncertainties(files: set[str], changes: list[PatchFileChange]) -> list[dict]:
|
|
1487
|
+
names = {Path(f).name.lower() for f in files}
|
|
1488
|
+
names.update(Path(ch.path).name.lower() for ch in changes)
|
|
1489
|
+
for ch in changes:
|
|
1490
|
+
if ch.path.lower().endswith(".csproj"):
|
|
1491
|
+
names.add("*.csproj")
|
|
1492
|
+
uncertainties = []
|
|
1493
|
+
for marker, (evidence, message) in sorted(UNSUPPORTED_ECOSYSTEM_MARKERS.items()):
|
|
1494
|
+
if marker in names:
|
|
1495
|
+
uncertainties.append({"id": "unsupported_ecosystem", "message": f"{evidence} detected, but {message}", "evidence": evidence})
|
|
1496
|
+
return uncertainties
|
|
1497
|
+
|
|
1498
|
+
def judge_patch_text(packet_path: str | Path, patch_text: str) -> dict:
|
|
1499
|
+
if re.search(r"(?m)^@@", patch_text) and "diff --git " not in patch_text:
|
|
1500
|
+
return {"verdict": "FAIL", "modified_files": [], "missing_modified_files": [], "new_files": [], "deleted_files": [], "unsupported_dependencies": [], "unsupported_commands": [], "protected_artifact_modifications": [], "warnings": [], "malformed_diff": True}
|
|
1501
|
+
if re.search(r"(?m)^@@(?! -\d+(?:,\d+)? \+\d+(?:,\d+)? @@)", patch_text):
|
|
1502
|
+
return {"verdict": "FAIL", "modified_files": [], "missing_modified_files": [], "new_files": [], "deleted_files": [], "unsupported_dependencies": [], "unsupported_commands": [], "protected_artifact_modifications": [], "warnings": [], "malformed_diff": True}
|
|
1503
|
+
changes = parse_unified_diff(patch_text)
|
|
1504
|
+
unsafe_paths = sorted({ch.path for ch in changes if ch.unsafe_path and ch.path})
|
|
1505
|
+
if any(ch.unsafe_path for ch in changes):
|
|
1506
|
+
return {"verdict": "FAIL", "modified_files": [], "missing_modified_files": [], "new_files": [], "deleted_files": [], "unsupported_dependencies": [], "unsupported_commands": [], "protected_artifact_modifications": [], "warnings": [], "path_escape": True, "path_escape_paths": unsafe_paths}
|
|
1507
|
+
if patch_text.strip() and not changes and "Binary files " not in patch_text:
|
|
1508
|
+
return {"verdict": "FAIL", "modified_files": [], "missing_modified_files": [], "new_files": [], "deleted_files": [], "unsupported_dependencies": [], "unsupported_commands": [], "protected_artifact_modifications": [], "warnings": [], "malformed_diff": True}
|
|
1509
|
+
report = analyze_patch(packet_path, patch_text, changes)
|
|
1510
|
+
packet = Path(packet_path); manifest = load_manifest(packet); files = known_files(manifest, packet); contents = _packet_file_contents(packet)
|
|
1511
|
+
existing_declared = _declared_dependency_names_by_ecosystem(manifest, packet)
|
|
1512
|
+
scopes = _declared_dependency_scopes_by_ecosystem(manifest, packet)
|
|
1513
|
+
patch_declared, manifest_uncertainties = _declared_dependency_names_from_patch_by_ecosystem_structural(changes, contents)
|
|
1514
|
+
if manifest_uncertainties:
|
|
1515
|
+
report.setdefault("uncertainties", []).extend(manifest_uncertainties)
|
|
1516
|
+
workspace_names = _workspace_package_names(packet)
|
|
1517
|
+
unsupported = set(report.get("unsupported_dependencies", []))
|
|
1518
|
+
resolver_tmp = _materialize_packet_worktree(packet)
|
|
1519
|
+
resolver_root = Path(resolver_tmp.name)
|
|
1520
|
+
try:
|
|
1521
|
+
for ch in changes:
|
|
1522
|
+
suffix = Path(ch.path).suffix.lower(); added = "\n".join(ch.added_lines or [])
|
|
1523
|
+
if suffix == ".py":
|
|
1524
|
+
for imported in extract_imports_from_text(added, suffix):
|
|
1525
|
+
dep_resolution = resolve_python_import(resolver_root, imported, added_dependencies=patch_declared["python"])
|
|
1526
|
+
dep_name = _dependency_name_for_import(imported)
|
|
1527
|
+
if dep_resolution.verdict == "PASS":
|
|
1528
|
+
unsupported.discard(imported); unsupported.discard(dep_name)
|
|
1529
|
+
elif dep_resolution.reason_code == "declared_dependency":
|
|
1530
|
+
unsupported.discard(imported); unsupported.discard(dep_name)
|
|
1531
|
+
report.setdefault("uncertainties", []).append({"id": "declared_dependency", "message": f"{dep_name} is declared in the same patch and requires review", "path": ch.path, "evidence": dep_name})
|
|
1532
|
+
elif dep_resolution.reason_code == "dependency_scope_review":
|
|
1533
|
+
report.setdefault("uncertainties", []).append({"id": "dependency_scope_review", "message": f"{dep_name} is declared outside the runtime dependency scope", "path": ch.path, "evidence": dep_name})
|
|
1534
|
+
elif dep_resolution.reason_code == "unsupported_dependency":
|
|
1535
|
+
unsupported.add(imported)
|
|
1536
|
+
elif suffix in JS_EXTS:
|
|
1537
|
+
for imported in extract_imports_from_text(added, suffix):
|
|
1538
|
+
pkg = _js_package_root(imported)
|
|
1539
|
+
local_alias = _js_alias_local(imported, files, contents)
|
|
1540
|
+
if pkg in workspace_names or local_alias is True:
|
|
1541
|
+
continue
|
|
1542
|
+
dep_resolution = resolve_js_import(resolver_root, imported)
|
|
1543
|
+
if dep_resolution.verdict == "PASS":
|
|
1544
|
+
unsupported.discard(pkg)
|
|
1545
|
+
elif dep_resolution.reason_code == "js_alias_uncertain":
|
|
1546
|
+
report.setdefault("uncertainties", []).append({"id": "js_alias_uncertain", "message": f"{imported} could not be resolved safely", "path": ch.path, "evidence": imported})
|
|
1547
|
+
elif dep_resolution.reason_code == "dependency_scope_review":
|
|
1548
|
+
report.setdefault("uncertainties", []).append({"id": "dependency_scope_review", "message": f"{pkg} is declared outside the runtime dependency scope", "path": ch.path, "evidence": pkg})
|
|
1549
|
+
elif dep_resolution.reason_code == "unsupported_dependency" and pkg not in patch_declared["js"]:
|
|
1550
|
+
unsupported.add(pkg)
|
|
1551
|
+
finally:
|
|
1552
|
+
resolver_tmp.cleanup()
|
|
1553
|
+
|
|
1554
|
+
# Re-run command claims through the command resolver so report output is
|
|
1555
|
+
# based on the same manifest-aware command semantics as unit-level checks.
|
|
1556
|
+
command_overlay: dict[str, str] = {}
|
|
1557
|
+
for ch in changes:
|
|
1558
|
+
if Path(ch.path).name.lower() in {"package.json", "Makefile", "justfile", "Justfile", "Taskfile.yml", "Taskfile.yaml", "tox.ini", "noxfile.py", "compose.yml", "compose.yaml", "docker-compose.yml", "docker-compose.yaml"}:
|
|
1559
|
+
base = contents.get(ch.old_path or ch.path, "")
|
|
1560
|
+
post = _apply_patch_change_to_text(base, ch)
|
|
1561
|
+
if post is not None:
|
|
1562
|
+
command_overlay[ch.path] = post
|
|
1563
|
+
command_tmp = _materialize_packet_worktree(packet, command_overlay)
|
|
1564
|
+
try:
|
|
1565
|
+
command_root = Path(command_tmp.name)
|
|
1566
|
+
added_text = "\n".join("\n".join(ch.added_lines or []) for ch in changes)
|
|
1567
|
+
commands = set()
|
|
1568
|
+
if re.search(r"docker\s+compose\s+up", added_text, re.I):
|
|
1569
|
+
commands.add("docker compose up")
|
|
1570
|
+
commands.update(re.findall(r"npm\s+(?:run\s+)?[A-Za-z0-9:_-]+", added_text))
|
|
1571
|
+
commands.update(re.findall(r"make\s+[A-Za-z0-9_.:-]+", added_text))
|
|
1572
|
+
commands.update(re.findall(r"just\s+[A-Za-z0-9_.:-]+", added_text))
|
|
1573
|
+
commands.update(re.findall(r"task\s+[A-Za-z0-9_.:-]+", added_text))
|
|
1574
|
+
if re.search(r"\b(pytest|python\s+-m\s+pytest)\b", added_text, re.I):
|
|
1575
|
+
commands.add("pytest")
|
|
1576
|
+
report["unsupported_commands"] = []
|
|
1577
|
+
for command in sorted(commands):
|
|
1578
|
+
resolution = resolve_command(command_root, command)
|
|
1579
|
+
if resolution.reason_code == "unsupported_command":
|
|
1580
|
+
report["unsupported_commands"].append(command)
|
|
1581
|
+
elif resolution.reason_code in {"declared_command", "command_check_inconclusive", "command_manifest_missing", "command_manifest_uncertain"}:
|
|
1582
|
+
report.setdefault("uncertainties", []).append({"id": resolution.reason_code, "message": resolution.message, "evidence": command})
|
|
1583
|
+
finally:
|
|
1584
|
+
command_tmp.cleanup()
|
|
1585
|
+
declared = patch_declared["python"] | patch_declared["js"]
|
|
1586
|
+
existing_deps = existing_declared["python"] | existing_declared["js"]
|
|
1587
|
+
declared_only = {d for d in declared if d not in existing_deps}
|
|
1588
|
+
binary_paths = []
|
|
1589
|
+
binary_blockers = []
|
|
1590
|
+
for line in patch_text.splitlines():
|
|
1591
|
+
if line.startswith("Binary files "):
|
|
1592
|
+
m = re.search(r" b/(.+?) differ", line)
|
|
1593
|
+
rel = m.group(1) if m else "unknown"
|
|
1594
|
+
binary_paths.append(rel)
|
|
1595
|
+
if rel == "unknown" or _is_high_risk_binary_path(rel):
|
|
1596
|
+
binary_blockers.append(rel)
|
|
1597
|
+
if binary_paths:
|
|
1598
|
+
report["binary_diffs"] = sorted(set(binary_paths))
|
|
1599
|
+
if binary_blockers:
|
|
1600
|
+
report["binary_diff_blockers"] = sorted(set(binary_blockers))
|
|
1601
|
+
unsupported_ecosystems = _unsupported_ecosystem_uncertainties(files, changes)
|
|
1602
|
+
if unsupported_ecosystems:
|
|
1603
|
+
seen_uncertainties = set()
|
|
1604
|
+
merged_uncertainties = []
|
|
1605
|
+
for uncertainty in report.get("uncertainties", []) + unsupported_ecosystems:
|
|
1606
|
+
if isinstance(uncertainty, dict):
|
|
1607
|
+
key = (uncertainty.get("id"), uncertainty.get("message"), uncertainty.get("evidence"), uncertainty.get("path"))
|
|
1608
|
+
else:
|
|
1609
|
+
key = (str(uncertainty),)
|
|
1610
|
+
if key not in seen_uncertainties:
|
|
1611
|
+
seen_uncertainties.add(key)
|
|
1612
|
+
merged_uncertainties.append(uncertainty)
|
|
1613
|
+
report["uncertainties"] = merged_uncertainties
|
|
1614
|
+
report["unsupported_dependencies"] = sorted(unsupported)
|
|
1615
|
+
if declared_only:
|
|
1616
|
+
report.setdefault("warnings", []).append("Patch declares new dependencies that require review.")
|
|
1617
|
+
report["declared_dependencies"] = sorted(declared_only)
|
|
1618
|
+
fail_keys = ["missing_modified_files", "unsupported_dependencies", "unsupported_commands", "protected_artifact_modifications", "git_path_modifications", "binary_diff_blockers", "path_escape"]
|
|
1619
|
+
report["verdict"] = "FAIL" if any(report.get(k) for k in fail_keys) else "WARN" if (report.get("new_files") or report.get("deleted_files") or report.get("warnings") or declared_only or report.get("uncertainties") or report.get("binary_diffs")) else "PASS"
|
|
1620
|
+
return report
|
|
1621
|
+
|
|
1622
|
+
|
|
1623
|
+
def patch_report_to_traffic(report: dict, report_path: str = ".sourcepack/reports/latest.json") -> dict:
|
|
1624
|
+
findings=[]
|
|
1625
|
+
for p in report.get("missing_modified_files", []): findings.append(normalized_finding("missing_file", "error", "file", f"{p} not found in the trusted baseline.", p, suggestion="Restore the file, create it as a new file, or refresh the baseline only after accepting the current repo state."))
|
|
1626
|
+
for d in report.get("unsupported_dependencies", []): findings.append(normalized_finding("unsupported_dependency", "error", "dependency", f"{d} is imported but not declared in scanned dependency files.", evidence=d, suggestion=f"Either remove {d} usage or add it intentionally to the appropriate dependency manifest."))
|
|
1627
|
+
for c in report.get("unsupported_commands", []): findings.append(normalized_finding("unsupported_command", "error", "command", f"{c} is not supported by project evidence.", evidence=c, suggestion="Use a detected supported command or add the project file that defines this command."))
|
|
1628
|
+
if report.get("malformed_diff"):
|
|
1629
|
+
findings.append(normalized_finding("malformed_diff", "error", "diff", "SourcePack could not safely parse the diff artifact it was asked to judge."))
|
|
1630
|
+
if report.get("path_escape"):
|
|
1631
|
+
paths = report.get("path_escape_paths") or []
|
|
1632
|
+
if paths:
|
|
1633
|
+
for p in paths:
|
|
1634
|
+
findings.append(normalized_finding("path_escape", "error", "diff", "Diff path escapes the repository root or is absolute.", p, evidence=p))
|
|
1635
|
+
else:
|
|
1636
|
+
findings.append(normalized_finding("path_escape", "error", "diff", "Diff path escapes the repository root or is absolute."))
|
|
1637
|
+
for p in report.get("protected_artifact_modifications", []): findings.append(normalized_finding("protected_artifact", "error", "artifact", f"{p} is a protected SourcePack trust artifact.", p, evidence=p))
|
|
1638
|
+
for p in report.get("git_path_modifications", []): findings.append(normalized_finding("git_path_modification", "error", "artifact", f"{p} modifies Git internal state and is not safe to judge as a normal repository file.", p, evidence=p))
|
|
1639
|
+
for p in report.get("binary_diff_blockers", []): findings.append(normalized_finding("binary_diff", "error", "diff", f"Binary change at {p} crosses a SourcePack trust or high-risk control boundary.", p, evidence=p))
|
|
1640
|
+
for p in report.get("binary_diffs", []):
|
|
1641
|
+
if p not in set(report.get("binary_diff_blockers", [])):
|
|
1642
|
+
findings.append(normalized_finding("binary_diff", "warn", "uncertainty", f"Binary content was detected at {p} and was not semantically evaluated.", p, evidence=p))
|
|
1643
|
+
for p in report.get("new_files", []): findings.append(normalized_finding("new_file", "warn", "review", f"{p} was created by the patch.", p))
|
|
1644
|
+
for p in report.get("deleted_files", []): findings.append(normalized_finding("deleted_file", "warn", "review", f"{p} was deleted by the patch.", p))
|
|
1645
|
+
for d in report.get("declared_dependencies", []): findings.append(normalized_finding("declared_dependency", "warn", "review", f"{d} was added to dependency files.", evidence=d))
|
|
1646
|
+
for c in report.get("declared_commands", []): findings.append(normalized_finding("declared_command", "warn", "review", f"{c} was added in the same patch.", evidence=c))
|
|
1647
|
+
for w in report.get("uncertainties", []):
|
|
1648
|
+
if isinstance(w, dict):
|
|
1649
|
+
fid = str(w.get("id") or "uncertainty")
|
|
1650
|
+
message = str(w.get("message") or "SourcePack could not fully evaluate this change.")
|
|
1651
|
+
findings.append(normalized_finding(fid, "warn", "uncertainty", message, w.get("path"), w.get("evidence"), w.get("suggestion")))
|
|
1652
|
+
else:
|
|
1653
|
+
fid, _, detail = str(w).partition(":")
|
|
1654
|
+
fid = fid.strip() or "uncertainty"
|
|
1655
|
+
message = detail.strip() or str(w)
|
|
1656
|
+
findings.append(normalized_finding(fid, "warn", "uncertainty", message))
|
|
1657
|
+
return traffic_report(report.get("verdict", "PASS"), findings=findings, checked_categories=["file references", "Python imports", "JS/TS imports", "known project commands", "protected SourcePack artifacts"], report_path=report_path)
|
|
1658
|
+
|
|
1659
|
+
|
|
1660
|
+
def run_git(repo: Path, args: list[str]) -> subprocess.CompletedProcess:
|
|
1661
|
+
try:
|
|
1662
|
+
return subprocess.run(["git", *args], cwd=repo, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
1663
|
+
except FileNotFoundError:
|
|
1664
|
+
return subprocess.CompletedProcess(["git", *args], 127, "", "git executable not found")
|
|
1665
|
+
|
|
1666
|
+
|
|
1667
|
+
|
|
1668
|
+
def git_worktree_dirty(repo: str | Path) -> tuple[bool, str | None]:
|
|
1669
|
+
repo = Path(repo)
|
|
1670
|
+
cp = run_git(repo, ["rev-parse", "--show-toplevel"])
|
|
1671
|
+
if cp.returncode != 0:
|
|
1672
|
+
return False, "git_unavailable" if cp.returncode == 127 else "not_git"
|
|
1673
|
+
root = Path(cp.stdout.strip())
|
|
1674
|
+
for args in (["diff", "--quiet"], ["diff", "--staged", "--quiet"]):
|
|
1675
|
+
diff_cp = run_git(root, list(args))
|
|
1676
|
+
if diff_cp.returncode == 1:
|
|
1677
|
+
return True, None
|
|
1678
|
+
if diff_cp.returncode == 127:
|
|
1679
|
+
return False, "git_unavailable"
|
|
1680
|
+
untracked = run_git(root, ["ls-files", "--others", "--exclude-standard"])
|
|
1681
|
+
if untracked.returncode == 0 and untracked.stdout.strip():
|
|
1682
|
+
return True, None
|
|
1683
|
+
if untracked.returncode == 127:
|
|
1684
|
+
return False, "git_unavailable"
|
|
1685
|
+
return False, None
|
|
1686
|
+
|
|
1687
|
+
|
|
1688
|
+
|
|
1689
|
+
def _only_sourcepack_gitignore_change(repo: Path) -> bool:
|
|
1690
|
+
status = run_git(repo, ["status", "--porcelain", "--", ".gitignore"])
|
|
1691
|
+
others = run_git(repo, ["status", "--porcelain"])
|
|
1692
|
+
if status.returncode != 0 or others.returncode != 0:
|
|
1693
|
+
return False
|
|
1694
|
+
lines = [line for line in others.stdout.splitlines() if line.strip()]
|
|
1695
|
+
if not lines or any(not line.endswith(".gitignore") for line in lines):
|
|
1696
|
+
return False
|
|
1697
|
+
try:
|
|
1698
|
+
text = (repo / ".gitignore").read_text(encoding="utf-8")
|
|
1699
|
+
except OSError:
|
|
1700
|
+
return False
|
|
1701
|
+
tracked = run_git(repo, ["show", "HEAD:.gitignore"])
|
|
1702
|
+
before = tracked.stdout if tracked.returncode == 0 else ""
|
|
1703
|
+
added = [line.strip() for line in text.splitlines() if line.strip() and line.strip() not in {l.strip() for l in before.splitlines()}]
|
|
1704
|
+
return bool(added) and set(added) <= {".sourcepack", ".sourcepack/"}
|
|
1705
|
+
|
|
1706
|
+
|
|
1707
|
+
def untracked_files_as_diff(repo: str | Path) -> str:
|
|
1708
|
+
repo = Path(repo)
|
|
1709
|
+
cp = run_git(repo, ["ls-files", "--others", "--exclude-standard"])
|
|
1710
|
+
if cp.returncode != 0:
|
|
1711
|
+
return ""
|
|
1712
|
+
chunks = []
|
|
1713
|
+
for rel in [line.strip() for line in cp.stdout.splitlines() if line.strip()]:
|
|
1714
|
+
path = repo / rel
|
|
1715
|
+
if rel == ".gitignore":
|
|
1716
|
+
try:
|
|
1717
|
+
ignore_lines = {line.strip() for line in path.read_text(encoding="utf-8").splitlines() if line.strip()}
|
|
1718
|
+
except OSError:
|
|
1719
|
+
ignore_lines = set()
|
|
1720
|
+
if ignore_lines <= {".sourcepack", ".sourcepack/"}:
|
|
1721
|
+
continue
|
|
1722
|
+
safe_rel = rel.replace("\\", "/")
|
|
1723
|
+
chunks.extend([f"diff --git a/{safe_rel} b/{safe_rel}", "new file mode 100644", "--- /dev/null", f"+++ b/{safe_rel}"])
|
|
1724
|
+
if is_probably_binary(path):
|
|
1725
|
+
chunks.append(f"Binary files /dev/null and b/{safe_rel} differ")
|
|
1726
|
+
continue
|
|
1727
|
+
try:
|
|
1728
|
+
text = path.read_text(encoding="utf-8")
|
|
1729
|
+
except UnicodeDecodeError:
|
|
1730
|
+
chunks.append(f"Binary files /dev/null and b/{safe_rel} differ")
|
|
1731
|
+
continue
|
|
1732
|
+
except OSError:
|
|
1733
|
+
continue
|
|
1734
|
+
lines = text.splitlines()
|
|
1735
|
+
chunks.append(f"@@ -0,0 +1,{len(lines)} @@")
|
|
1736
|
+
chunks.extend(f"+{line}" for line in lines)
|
|
1737
|
+
return "\n".join(chunks) + ("\n" if chunks else "")
|
|
1738
|
+
|
|
1739
|
+
def build_repo_change_report(repo_path: str | Path, *, staged: bool = False, patch_text: str | None = None, ci: bool = False) -> dict:
|
|
1740
|
+
repo_arg = Path(repo_path).resolve(); cp = run_git(repo_arg, ["rev-parse", "--show-toplevel"])
|
|
1741
|
+
if cp.returncode != 0:
|
|
1742
|
+
message = "Git executable not found." if cp.returncode == 127 else "No git repository found. Run sourcepack prompt or sourcepack baseline for non-git use."
|
|
1743
|
+
return traffic_report("FAIL", "stop before trusting this output.", [normalized_finding("git_unavailable" if cp.returncode == 127 else "no_git_repo", "error", "git", message)])
|
|
1744
|
+
git_root = Path(cp.stdout.strip()).resolve()
|
|
1745
|
+
repo = repo_arg if validate_baseline(repo_arg).get("state") in {"present", "stale", "corrupt"} else git_root
|
|
1746
|
+
paths = ensure_sourcepack_dirs(repo); added, err = ensure_gitignore_entry(repo)
|
|
1747
|
+
if added:
|
|
1748
|
+
paths.setdefault("gitignore_added", True)
|
|
1749
|
+
if err:
|
|
1750
|
+
return traffic_report("FAIL", "stop before trusting this output.", [normalized_finding("gitignore_unwritable", "error", "git", f"Cannot write .gitignore: {err}")])
|
|
1751
|
+
if patch_text is None:
|
|
1752
|
+
diff_args = ["diff", "--staged"] if staged else ["diff"]
|
|
1753
|
+
if repo != git_root:
|
|
1754
|
+
diff_args.append("--relative")
|
|
1755
|
+
cp = run_git(repo, diff_args); diff_text = cp.stdout
|
|
1756
|
+
if cp.returncode == 127:
|
|
1757
|
+
return traffic_report("FAIL", "stop before trusting this output.", [normalized_finding("git_unavailable", "error", "git", "Git executable not found.")])
|
|
1758
|
+
if not staged:
|
|
1759
|
+
extra = untracked_files_as_diff(repo)
|
|
1760
|
+
if extra and not (added and _only_sourcepack_gitignore_change(repo)):
|
|
1761
|
+
diff_text = (diff_text + "\n" + extra).strip() + "\n"
|
|
1762
|
+
else:
|
|
1763
|
+
diff_text = patch_text
|
|
1764
|
+
baseline_status = validate_baseline(repo)
|
|
1765
|
+
if baseline_status["state"] == "corrupt":
|
|
1766
|
+
rep = traffic_report("FAIL", "trusted baseline is corrupt.", [normalized_finding("baseline_corrupt", "error", "baseline", baseline_status["message"])], ["baseline", "diff"], "Recreate the baseline only after verifying the current repo state should be trusted.")
|
|
1767
|
+
rep.update(baseline_report_fields(baseline_status)); return rep
|
|
1768
|
+
if baseline_status["state"] == "missing":
|
|
1769
|
+
dirty_now, dirty_state_now = git_worktree_dirty(repo)
|
|
1770
|
+
if ci:
|
|
1771
|
+
rep = traffic_report("FAIL", "trusted baseline is missing in CI.", [normalized_finding("baseline_missing", "error", "baseline", "No trusted SourcePack baseline exists; CI must not establish trust.")], ["baseline", "diff"], "create the baseline locally only after deciding the current repo state should be trusted.")
|
|
1772
|
+
rep.update(baseline_report_fields(baseline_status)); return rep
|
|
1773
|
+
if diff_text.strip() or (dirty_now and not _only_sourcepack_gitignore_change(repo)):
|
|
1774
|
+
rep = traffic_report("FAIL", "baseline missing while changes are present.", [normalized_finding("baseline_missing", "error", "baseline", "No trusted SourcePack baseline exists while changes are present.")], ["baseline", "diff"], "run sourcepack baseline only after deciding the current repo state should be trusted.")
|
|
1775
|
+
rep.update(baseline_report_fields(baseline_status)); return rep
|
|
1776
|
+
try:
|
|
1777
|
+
build_current_baseline(repo, quiet=True); baseline_status = validate_baseline(repo)
|
|
1778
|
+
rep_note = "Created SourcePack baseline because none existed and no diff was present."
|
|
1779
|
+
except BaselineLockError as exc:
|
|
1780
|
+
return traffic_report("WARN", "baseline writer is locked.", [normalized_finding("baseline_locked", "warn", "tooling", str(exc))], ["baseline", "diff"], "try again after the other baseline operation finishes.", reason_type="tooling")
|
|
1781
|
+
except Exception as exc:
|
|
1782
|
+
return traffic_report("FAIL", "stop before trusting this output.", [normalized_finding("baseline_failed", "error", "baseline", f"Baseline verification failed: {exc}")])
|
|
1783
|
+
else:
|
|
1784
|
+
rep_note = None
|
|
1785
|
+
stale_findings = []
|
|
1786
|
+
if baseline_status["state"] == "stale":
|
|
1787
|
+
stale_findings.append(normalized_finding("baseline_stale", "warn", "uncertainty", "Trusted SourcePack baseline may not match current repo state."))
|
|
1788
|
+
if not diff_text.strip():
|
|
1789
|
+
verdict = "WARN" if stale_findings else "PASS"
|
|
1790
|
+
rep = traffic_report(verdict, "SourcePack could not fully evaluate this change." if stale_findings else "good to continue.", [normalized_finding("no_diff", "info", "diff", "No uncommitted changes detected."), *stale_findings], ["diff", "baseline freshness"])
|
|
1791
|
+
else:
|
|
1792
|
+
raw = judge_patch_text(repo / baseline_status["packet_path"], diff_text); rep = patch_report_to_traffic(raw); rep["raw_patch_judgment"] = raw
|
|
1793
|
+
rep = _integrate_execution_findings(repo, diff_text, rep)
|
|
1794
|
+
rep = _apply_local_policy(repo, rep)
|
|
1795
|
+
if stale_findings and rep["verdict"] != "FAIL":
|
|
1796
|
+
rep = traffic_report("WARN", "SourcePack could not fully evaluate this change.", rep.get("findings", []) + stale_findings, rep.get("checked_categories", []), rep.get("next_action"), reason_type="uncertainty"); rep["raw_patch_judgment"] = raw
|
|
1797
|
+
elif stale_findings:
|
|
1798
|
+
rep = traffic_report("FAIL", rep.get("headline"), rep.get("findings", []) + stale_findings, rep.get("checked_categories", []), rep.get("next_action")); rep["raw_patch_judgment"] = raw
|
|
1799
|
+
rep.update(baseline_report_fields(baseline_status))
|
|
1800
|
+
if baseline_status.get("metadata_path"):
|
|
1801
|
+
try:
|
|
1802
|
+
rep["baseline"] = json.loads((repo / baseline_status["metadata_path"]).read_text(encoding="utf-8"))
|
|
1803
|
+
except Exception:
|
|
1804
|
+
pass
|
|
1805
|
+
rep["current_git"] = git_metadata(repo)
|
|
1806
|
+
if rep_note:
|
|
1807
|
+
rep["note"] = rep_note
|
|
1808
|
+
rep["repo_path"] = str(repo)
|
|
1809
|
+
return rep
|
|
1810
|
+
|
|
1811
|
+
|
|
1812
|
+
def _rebuild_from_findings(rep: dict, findings: list[dict]) -> dict:
|
|
1813
|
+
verdict = "FAIL" if any(f.get("severity") == "error" for f in findings) else "WARN" if any(f.get("severity") == "warn" for f in findings) else "PASS"
|
|
1814
|
+
rebuilt = traffic_report(verdict, findings=findings, checked_categories=rep.get("checked_categories") or rep.get("checked") or [], report_path=rep.get("report_path", ".sourcepack/reports/latest.json"))
|
|
1815
|
+
for key in ("raw_patch_judgment", "policy_overrides"):
|
|
1816
|
+
if key in rep:
|
|
1817
|
+
rebuilt[key] = rep[key]
|
|
1818
|
+
return rebuilt
|
|
1819
|
+
|
|
1820
|
+
|
|
1821
|
+
def _integrate_execution_findings(repo: Path, checked_text: str, rep: dict) -> dict:
|
|
1822
|
+
execution = execution_findings(repo, checked_text)
|
|
1823
|
+
if not execution:
|
|
1824
|
+
return rep
|
|
1825
|
+
return _rebuild_from_findings(rep, list(rep.get("findings", [])) + execution)
|
|
1826
|
+
|
|
1827
|
+
|
|
1828
|
+
def _policy_entries_for_judgment(repo: Path) -> list[dict]:
|
|
1829
|
+
path = repo / ".sourcepack" / "policy" / "allow.jsonl"
|
|
1830
|
+
if not path.exists():
|
|
1831
|
+
return []
|
|
1832
|
+
entries = []
|
|
1833
|
+
now = utc_now()
|
|
1834
|
+
for line in path.read_text(encoding="utf-8").splitlines():
|
|
1835
|
+
try:
|
|
1836
|
+
entry = json.loads(line)
|
|
1837
|
+
except Exception:
|
|
1838
|
+
continue
|
|
1839
|
+
expires = entry.get("expires_at")
|
|
1840
|
+
if expires and str(expires) < now:
|
|
1841
|
+
continue
|
|
1842
|
+
entries.append(entry)
|
|
1843
|
+
return entries
|
|
1844
|
+
|
|
1845
|
+
|
|
1846
|
+
def _policy_matches(entry: dict, finding: dict) -> bool:
|
|
1847
|
+
scope = entry.get("scope")
|
|
1848
|
+
value = str(entry.get("value") or "")
|
|
1849
|
+
fid = finding.get("id")
|
|
1850
|
+
if fid == "git_path_modification" or str(finding.get("path") or "").startswith(".git/"):
|
|
1851
|
+
return False
|
|
1852
|
+
if scope == "dependency":
|
|
1853
|
+
return fid == "unsupported_dependency" and finding.get("evidence") == value
|
|
1854
|
+
if scope == "command":
|
|
1855
|
+
return fid == "unsupported_command" and finding.get("evidence") == value
|
|
1856
|
+
if scope == "path":
|
|
1857
|
+
if str(finding.get("path") or "") != value:
|
|
1858
|
+
return False
|
|
1859
|
+
if str(value).startswith(".sourcepack/baseline/") and not entry.get("high_risk"):
|
|
1860
|
+
return False
|
|
1861
|
+
return fid not in {"git_path_modification"}
|
|
1862
|
+
return False
|
|
1863
|
+
|
|
1864
|
+
|
|
1865
|
+
def _apply_local_policy(repo: Path, rep: dict) -> dict:
|
|
1866
|
+
entries = _policy_entries_for_judgment(repo)
|
|
1867
|
+
if not entries:
|
|
1868
|
+
return rep
|
|
1869
|
+
kept = []
|
|
1870
|
+
overrides = []
|
|
1871
|
+
for finding in rep.get("findings", []):
|
|
1872
|
+
match = next((entry for entry in entries if _policy_matches(entry, finding)), None)
|
|
1873
|
+
if match:
|
|
1874
|
+
overrides.append({"policy_id": match.get("id"), "scope": match.get("scope"), "value": match.get("value"), "reason": match.get("reason"), "suppressed_finding": finding.get("id"), "path": finding.get("path")})
|
|
1875
|
+
else:
|
|
1876
|
+
kept.append(finding)
|
|
1877
|
+
if not overrides:
|
|
1878
|
+
return rep
|
|
1879
|
+
rebuilt = _rebuild_from_findings(rep, kept)
|
|
1880
|
+
rebuilt["policy_overrides"] = overrides
|
|
1881
|
+
rebuilt.setdefault("findings", []).append(normalized_finding("policy_override", "info", "policy", "A local allow policy suppressed a matching finding.", evidence=", ".join(str(o.get("value")) for o in overrides)))
|
|
1882
|
+
return _rebuild_from_findings(rebuilt, rebuilt["findings"])
|
|
1883
|
+
|
|
1884
|
+
|
|
1885
|
+
|
|
1886
|
+
|
|
1887
|
+
|
|
1888
|
+
|
|
1889
|
+
|
|
1890
|
+
|
|
1891
|
+
def write_auto_report(repo: Path, report: dict, details: dict) -> None:
|
|
1892
|
+
payload = dict(report)
|
|
1893
|
+
payload.update(details)
|
|
1894
|
+
write_user_report(repo, payload, "auto")
|
|
1895
|
+
|
|
1896
|
+
|
|
1897
|
+
|
|
1898
|
+
|
|
1899
|
+
|
|
1900
|
+
|
|
1901
|
+
# CLI-independent public judgment API
|
|
1902
|
+
@dataclass(frozen=True)
|
|
1903
|
+
class Judgment:
|
|
1904
|
+
repo_path: str
|
|
1905
|
+
policy_mode: PolicyMode
|
|
1906
|
+
report: dict
|
|
1907
|
+
|
|
1908
|
+
@property
|
|
1909
|
+
def verdict(self) -> str:
|
|
1910
|
+
return str(self.report.get("verdict", "WARN"))
|
|
1911
|
+
|
|
1912
|
+
def exit_code(self) -> int:
|
|
1913
|
+
return policy_exit_code(self.verdict, self.policy_mode)
|
|
1914
|
+
|
|
1915
|
+
|
|
1916
|
+
def judge_repo_change(repo_path: str | Path, *, staged: bool = False, patch_text: str | None = None, policy_mode: PolicyMode | str = PolicyMode.LOCAL) -> Judgment:
|
|
1917
|
+
"""Judge repository changes without CLI parsing, stdout rendering, or cli.py imports."""
|
|
1918
|
+
mode = normalize_policy_mode(policy_mode)
|
|
1919
|
+
report = build_repo_change_report(Path(repo_path).resolve(), staged=staged, patch_text=patch_text, ci=(mode is PolicyMode.CI))
|
|
1920
|
+
if mode is PolicyMode.CI:
|
|
1921
|
+
report["ci"] = True
|
|
1922
|
+
return Judgment(str(Path(repo_path).resolve()), mode, report)
|