sourcecode 0.41.0__py3-none-any.whl → 0.43.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/architecture_analyzer.py +94 -8
- sourcecode/cli.py +28 -0
- sourcecode/contract_model.py +1 -0
- sourcecode/contract_pipeline.py +49 -14
- sourcecode/doc_analyzer.py +22 -0
- sourcecode/env_analyzer.py +110 -22
- sourcecode/git_analyzer.py +13 -2
- sourcecode/prepare_context.py +6 -2
- sourcecode/schema.py +29 -0
- sourcecode/semantic_analyzer.py +64 -0
- sourcecode/serializer.py +44 -7
- {sourcecode-0.41.0.dist-info → sourcecode-0.43.0.dist-info}/METADATA +1 -1
- {sourcecode-0.41.0.dist-info → sourcecode-0.43.0.dist-info}/RECORD +17 -17
- {sourcecode-0.41.0.dist-info → sourcecode-0.43.0.dist-info}/WHEEL +0 -0
- {sourcecode-0.41.0.dist-info → sourcecode-0.43.0.dist-info}/entry_points.txt +0 -0
- {sourcecode-0.41.0.dist-info → sourcecode-0.43.0.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
|
@@ -172,6 +172,7 @@ class ArchitectureAnalyzer:
|
|
|
172
172
|
graph: Optional[ModuleGraph] = None,
|
|
173
173
|
) -> ArchitectureAnalysis:
|
|
174
174
|
limitations: list[str] = []
|
|
175
|
+
evidence: list[dict] = []
|
|
175
176
|
|
|
176
177
|
# Step 1: filter paths
|
|
177
178
|
filtered = self._filter_paths(sm.file_paths)
|
|
@@ -180,6 +181,8 @@ class ArchitectureAnalyzer:
|
|
|
180
181
|
requested=True,
|
|
181
182
|
pattern="unknown",
|
|
182
183
|
limitations=["Arquitectura no inferida: proyecto sin archivos de codigo suficientes"],
|
|
184
|
+
evidence=[{"type": "none", "paths": [], "reason": "insufficient source files", "confidence": "high"}],
|
|
185
|
+
tentative=False,
|
|
183
186
|
)
|
|
184
187
|
|
|
185
188
|
# Step 2: domain clustering
|
|
@@ -193,17 +196,32 @@ class ArchitectureAnalyzer:
|
|
|
193
196
|
elif pattern == "unknown":
|
|
194
197
|
limitations.append("Patron de capas no reconocido: estructura de directorios sin senales claras")
|
|
195
198
|
|
|
196
|
-
# Step 3b: monorepo override — workspace config is hard evidence
|
|
197
|
-
|
|
199
|
+
# Step 3b: monorepo override — workspace config is hard evidence.
|
|
200
|
+
# Overrides all weak inferred patterns; only truly specialised patterns
|
|
201
|
+
# (cqrs, clean, onion, hexagonal) take precedence over workspace config.
|
|
202
|
+
has_workspace = self._has_workspace_config(sm.file_paths)
|
|
203
|
+
if has_workspace and pattern not in (
|
|
198
204
|
"monorepo", "cqrs", "clean", "onion", "hexagonal"
|
|
199
205
|
):
|
|
200
206
|
mono_layers = self._detect_monorepo_packages(filtered)
|
|
201
|
-
|
|
207
|
+
# Override whenever: monorepo packages detected, OR pattern is any weak/generic type.
|
|
208
|
+
# "fullstack", "layered", "mvc", "microservices", "modular", "flat", "unknown", None
|
|
209
|
+
# all yield to workspace config evidence.
|
|
210
|
+
_WEAK_PATTERNS = {None, "unknown", "flat", "modular", "layered",
|
|
211
|
+
"fullstack", "mvc", "microservices"}
|
|
212
|
+
if mono_layers or pattern in _WEAK_PATTERNS:
|
|
202
213
|
pattern = "monorepo"
|
|
203
214
|
layers = mono_layers
|
|
204
215
|
limitations.append(
|
|
205
216
|
"Workspace config detectado — arquitectura refleja topologia de paquetes"
|
|
206
217
|
)
|
|
218
|
+
ws_files = [p for p in sm.file_paths if p.split("/")[-1] in _WORKSPACE_CONFIG_FILES]
|
|
219
|
+
evidence.append({
|
|
220
|
+
"type": "workspace_config",
|
|
221
|
+
"paths": ws_files[:4],
|
|
222
|
+
"reason": "Monorepo workspace config file(s) detected — hard evidence for monorepo topology",
|
|
223
|
+
"confidence": "high",
|
|
224
|
+
})
|
|
207
225
|
|
|
208
226
|
# Step 4: bounded context inference
|
|
209
227
|
bounded_contexts = self._infer_bounded_contexts(domains, graph)
|
|
@@ -212,25 +230,91 @@ class ArchitectureAnalyzer:
|
|
|
212
230
|
confidence: Literal["high", "medium", "low"]
|
|
213
231
|
strong_domains = [d for d in domains if d.confidence in ("high", "medium")]
|
|
214
232
|
all_layers_weak = layers and all(l.confidence == "low" for l in layers)
|
|
233
|
+
|
|
234
|
+
method = "graph+structure" if graph is not None else "filesystem_inference"
|
|
235
|
+
# High-confidence evidence (workspace config) makes pattern non-tentative.
|
|
236
|
+
tentative = not any(e.get("confidence") == "high" for e in evidence)
|
|
237
|
+
|
|
238
|
+
# _hard_evidence: high-confidence evidence was already set (e.g. workspace_config).
|
|
239
|
+
# When True, tentative must stay False and confidence must stay at least "medium".
|
|
240
|
+
_hard_evidence = not tentative # tentative=False iff high-conf evidence present
|
|
241
|
+
|
|
215
242
|
if pattern not in (None, "unknown", "flat"):
|
|
216
|
-
if
|
|
243
|
+
if graph is not None:
|
|
244
|
+
# Import graph provided — structural validation available
|
|
245
|
+
confidence = "medium" if len(strong_domains) >= 3 else "low"
|
|
246
|
+
evidence.append({
|
|
247
|
+
"type": "import_graph",
|
|
248
|
+
"paths": [n.id for n in graph.nodes[:6]],
|
|
249
|
+
"reason": f"Module import graph with {len(graph.nodes)} nodes used for pattern validation",
|
|
250
|
+
"confidence": "medium",
|
|
251
|
+
})
|
|
252
|
+
elif all_layers_weak:
|
|
217
253
|
# Layers came from file-naming heuristic only, not directory structure
|
|
218
254
|
confidence = "low"
|
|
255
|
+
if not _hard_evidence:
|
|
256
|
+
tentative = True
|
|
219
257
|
limitations.append(
|
|
220
258
|
"Low confidence inference: pattern inferred from filenames only, without import graph confirmation"
|
|
221
259
|
)
|
|
260
|
+
evidence.append({
|
|
261
|
+
"type": "filesystem_naming",
|
|
262
|
+
"paths": [l.files[0] for l in layers if l.files][:6],
|
|
263
|
+
"reason": (
|
|
264
|
+
f"Pattern '{pattern}' inferred from file stem naming conventions only "
|
|
265
|
+
"(e.g. *_controller.py, *_service.py). "
|
|
266
|
+
"No directory structure or import graph confirmation."
|
|
267
|
+
),
|
|
268
|
+
"confidence": "low",
|
|
269
|
+
})
|
|
222
270
|
else:
|
|
223
|
-
|
|
224
|
-
if
|
|
271
|
+
# Directory structure match (or monorepo/workspace override with no layers)
|
|
272
|
+
confidence = "medium" if (_hard_evidence or len(strong_domains) >= 3) else "low"
|
|
273
|
+
if confidence == "low" and not _hard_evidence:
|
|
274
|
+
tentative = True
|
|
275
|
+
if not _hard_evidence:
|
|
225
276
|
limitations.append(
|
|
226
277
|
"Pattern not confirmed by module import graph; run with --graph-modules for structural validation"
|
|
227
278
|
)
|
|
279
|
+
if not _hard_evidence:
|
|
280
|
+
matched_dirs = sorted({
|
|
281
|
+
p.replace("\\", "/").split("/")[0]
|
|
282
|
+
for layer in layers for p in layer.files
|
|
283
|
+
})
|
|
284
|
+
evidence.append({
|
|
285
|
+
"type": "filesystem_naming",
|
|
286
|
+
"paths": matched_dirs[:8],
|
|
287
|
+
"reason": (
|
|
288
|
+
f"Pattern '{pattern}' inferred from directory names matching layer keywords. "
|
|
289
|
+
"Import graph not available — structural direction of dependencies unverified."
|
|
290
|
+
),
|
|
291
|
+
"confidence": "low" if confidence == "low" else "medium",
|
|
292
|
+
})
|
|
228
293
|
elif len(strong_domains) >= 1:
|
|
229
294
|
confidence = "medium"
|
|
295
|
+
if not _hard_evidence:
|
|
296
|
+
tentative = True
|
|
297
|
+
evidence.append({
|
|
298
|
+
"type": "filesystem_naming",
|
|
299
|
+
"paths": [d.name for d in strong_domains[:6]],
|
|
300
|
+
"reason": "Domain clustering from directory names; no layer pattern confirmed",
|
|
301
|
+
"confidence": "low",
|
|
302
|
+
})
|
|
230
303
|
else:
|
|
231
304
|
confidence = "low"
|
|
232
|
-
|
|
233
|
-
|
|
305
|
+
if not _hard_evidence:
|
|
306
|
+
tentative = True
|
|
307
|
+
if not evidence:
|
|
308
|
+
limitations.append(
|
|
309
|
+
"insufficient_evidence: no recognizable architectural signals found; "
|
|
310
|
+
"filesystem structure does not match known patterns"
|
|
311
|
+
)
|
|
312
|
+
evidence.append({
|
|
313
|
+
"type": "filesystem_naming",
|
|
314
|
+
"paths": filtered[:6],
|
|
315
|
+
"reason": "Only filesystem paths available; no pattern matched",
|
|
316
|
+
"confidence": "low",
|
|
317
|
+
})
|
|
234
318
|
|
|
235
319
|
return ArchitectureAnalysis(
|
|
236
320
|
requested=True,
|
|
@@ -241,6 +325,8 @@ class ArchitectureAnalyzer:
|
|
|
241
325
|
confidence=confidence,
|
|
242
326
|
method=method,
|
|
243
327
|
limitations=limitations,
|
|
328
|
+
evidence=evidence,
|
|
329
|
+
tentative=tentative,
|
|
244
330
|
)
|
|
245
331
|
|
|
246
332
|
# ------------------------------------------------------------------
|
sourcecode/cli.py
CHANGED
|
@@ -181,6 +181,7 @@ _OPTIONS_WITH_VALUE: frozenset[str] = frozenset({
|
|
|
181
181
|
"--dependency-depth",
|
|
182
182
|
"--rank-by",
|
|
183
183
|
"--symbol",
|
|
184
|
+
"--max-importers",
|
|
184
185
|
})
|
|
185
186
|
|
|
186
187
|
|
|
@@ -594,6 +595,17 @@ def main(
|
|
|
594
595
|
"--symbol",
|
|
595
596
|
help="Contract mode: extract localized context for a specific symbol name. Returns defining file + all importers.",
|
|
596
597
|
),
|
|
598
|
+
max_importers: int = typer.Option(
|
|
599
|
+
50,
|
|
600
|
+
"--max-importers",
|
|
601
|
+
help=(
|
|
602
|
+
"Maximum importer files returned by --symbol (default: 50). "
|
|
603
|
+
"Popular symbols can have hundreds of importers — this prevents output explosion. "
|
|
604
|
+
"Defining files are never truncated. Override: --symbol Foo --max-importers 200."
|
|
605
|
+
),
|
|
606
|
+
min=1,
|
|
607
|
+
max=10000,
|
|
608
|
+
),
|
|
597
609
|
copy: bool = typer.Option(
|
|
598
610
|
False,
|
|
599
611
|
"--copy",
|
|
@@ -770,6 +782,21 @@ def main(
|
|
|
770
782
|
code_notes = True
|
|
771
783
|
no_tree = True # agents never need the raw file tree
|
|
772
784
|
typer.echo("[agent] dependencies env-map code-notes (no-tree)", err=True)
|
|
785
|
+
# Warn about flags that are computed but excluded from agent_view output
|
|
786
|
+
_agent_suppressed: list[str] = []
|
|
787
|
+
if full_metrics:
|
|
788
|
+
_agent_suppressed.append("--full-metrics")
|
|
789
|
+
if graph_modules:
|
|
790
|
+
_agent_suppressed.append("--graph-modules")
|
|
791
|
+
if docs:
|
|
792
|
+
_agent_suppressed.append("--docs")
|
|
793
|
+
if _agent_suppressed:
|
|
794
|
+
typer.echo(
|
|
795
|
+
f"[agent] warning: {', '.join(_agent_suppressed)} computed but excluded "
|
|
796
|
+
"from --agent output — agent_view does not include these sections. "
|
|
797
|
+
"Remove these flags to skip unnecessary computation.",
|
|
798
|
+
err=True,
|
|
799
|
+
)
|
|
773
800
|
|
|
774
801
|
scanner = AdaptiveScanner(target, topology=_topology, base_depth=effective_depth)
|
|
775
802
|
raw_tree = scanner.scan_tree()
|
|
@@ -1343,6 +1370,7 @@ def main(
|
|
|
1343
1370
|
changed_only=changed_only,
|
|
1344
1371
|
symbol=symbol,
|
|
1345
1372
|
compress_types=compress_types,
|
|
1373
|
+
max_importers=max_importers,
|
|
1346
1374
|
)
|
|
1347
1375
|
sm = _replace(sm, file_contracts=_contracts, contract_summary=_contract_summary)
|
|
1348
1376
|
if symbol is not None and len(_contracts) == 0:
|
sourcecode/contract_model.py
CHANGED
sourcecode/contract_pipeline.py
CHANGED
|
@@ -45,9 +45,10 @@ def _get_changed_files(root: Path) -> set[str]:
|
|
|
45
45
|
]:
|
|
46
46
|
try:
|
|
47
47
|
result = subprocess.run(
|
|
48
|
-
cmd, cwd=root, capture_output=True, text=True,
|
|
48
|
+
cmd, cwd=root, capture_output=True, text=True,
|
|
49
|
+
encoding="utf-8", errors="replace", timeout=10,
|
|
49
50
|
)
|
|
50
|
-
for line in result.stdout.splitlines():
|
|
51
|
+
for line in (result.stdout or "").splitlines():
|
|
51
52
|
line = line.strip()
|
|
52
53
|
if line:
|
|
53
54
|
changed.add(line.replace("\\", "/"))
|
|
@@ -56,9 +57,10 @@ def _get_changed_files(root: Path) -> set[str]:
|
|
|
56
57
|
try:
|
|
57
58
|
result = subprocess.run(
|
|
58
59
|
["git", "status", "--porcelain"],
|
|
59
|
-
cwd=root, capture_output=True, text=True,
|
|
60
|
+
cwd=root, capture_output=True, text=True,
|
|
61
|
+
encoding="utf-8", errors="replace", timeout=10,
|
|
60
62
|
)
|
|
61
|
-
for line in result.stdout.splitlines():
|
|
63
|
+
for line in (result.stdout or "").splitlines():
|
|
62
64
|
if len(line) > 3:
|
|
63
65
|
changed.add(line[3:].strip().replace("\\", "/"))
|
|
64
66
|
except Exception:
|
|
@@ -129,11 +131,12 @@ def _get_git_churn(root: Path, file_paths: list[str]) -> dict[str, int]:
|
|
|
129
131
|
try:
|
|
130
132
|
result = subprocess.run(
|
|
131
133
|
["git", "log", "--name-only", "--format=", "--since=90.days.ago"],
|
|
132
|
-
cwd=root, capture_output=True, text=True,
|
|
134
|
+
cwd=root, capture_output=True, text=True,
|
|
135
|
+
encoding="utf-8", errors="replace", timeout=15,
|
|
133
136
|
)
|
|
134
137
|
path_set = set(file_paths)
|
|
135
138
|
counter: Counter[str] = Counter()
|
|
136
|
-
for line in result.stdout.splitlines():
|
|
139
|
+
for line in (result.stdout or "").splitlines():
|
|
137
140
|
line = line.strip().replace("\\", "/")
|
|
138
141
|
if line in path_set:
|
|
139
142
|
counter[line] += 1
|
|
@@ -172,6 +175,7 @@ class ContractPipeline:
|
|
|
172
175
|
changed_only: bool = False,
|
|
173
176
|
symbol: Optional[str] = None,
|
|
174
177
|
compress_types: bool = False,
|
|
178
|
+
max_importers: int = 50,
|
|
175
179
|
) -> tuple[list[FileContract], ContractSummary]:
|
|
176
180
|
"""Run the full extraction pipeline.
|
|
177
181
|
|
|
@@ -276,17 +280,19 @@ class ContractPipeline:
|
|
|
276
280
|
contracts = self._rank(contracts, rank_by)
|
|
277
281
|
|
|
278
282
|
# 8. Symbol filter — keep files that define or import the symbol
|
|
283
|
+
_symbol_truncation: Optional[dict] = None
|
|
279
284
|
if symbol:
|
|
280
|
-
contracts = _filter_by_symbol(contracts, symbol)
|
|
285
|
+
contracts, _symbol_truncation = _filter_by_symbol(contracts, symbol, max_importers=max_importers)
|
|
281
286
|
# When shallow scan missed the defining file (deep monorepo), fall back
|
|
282
287
|
# to a grep-based filesystem search over the full directory tree.
|
|
283
288
|
if not contracts:
|
|
284
|
-
contracts = self._symbol_deep_scan(
|
|
289
|
+
contracts, _symbol_truncation = self._symbol_deep_scan(
|
|
285
290
|
root, symbol,
|
|
286
291
|
known_paths=set(src_paths),
|
|
287
292
|
entry_paths=entry_paths,
|
|
288
293
|
changed_files=changed_files,
|
|
289
294
|
engine=engine,
|
|
295
|
+
max_importers=max_importers,
|
|
290
296
|
)
|
|
291
297
|
|
|
292
298
|
# 9. Entrypoints-only filter
|
|
@@ -310,6 +316,7 @@ class ContractPipeline:
|
|
|
310
316
|
method_breakdown=dict(method_counts),
|
|
311
317
|
ranked_by=rank_by,
|
|
312
318
|
limitations=limitations,
|
|
319
|
+
symbol_truncation=_symbol_truncation,
|
|
313
320
|
)
|
|
314
321
|
return contracts, summary
|
|
315
322
|
|
|
@@ -329,7 +336,8 @@ class ContractPipeline:
|
|
|
329
336
|
entry_paths: set[str],
|
|
330
337
|
changed_files: set[str],
|
|
331
338
|
engine: RankingEngine,
|
|
332
|
-
|
|
339
|
+
max_importers: int = 50,
|
|
340
|
+
) -> tuple[list[FileContract], dict]:
|
|
333
341
|
"""Grep-based fallback when the shallow scan missed the defining files.
|
|
334
342
|
|
|
335
343
|
Searches the full directory tree for source files containing *symbol*,
|
|
@@ -353,7 +361,7 @@ class ContractPipeline:
|
|
|
353
361
|
contract.ranking_reasons = fs.reasons
|
|
354
362
|
extra.append(contract)
|
|
355
363
|
|
|
356
|
-
return _filter_by_symbol(extra, symbol)
|
|
364
|
+
return _filter_by_symbol(extra, symbol, max_importers=max_importers)
|
|
357
365
|
|
|
358
366
|
|
|
359
367
|
# ---------------------------------------------------------------------------
|
|
@@ -409,7 +417,11 @@ def _limit_symbols(contracts: list[FileContract], max_symbols: int) -> list[File
|
|
|
409
417
|
# Symbol-aware filter
|
|
410
418
|
# ---------------------------------------------------------------------------
|
|
411
419
|
|
|
412
|
-
def _filter_by_symbol(
|
|
420
|
+
def _filter_by_symbol(
|
|
421
|
+
contracts: list[FileContract],
|
|
422
|
+
symbol: str,
|
|
423
|
+
max_importers: int = 50,
|
|
424
|
+
) -> tuple[list[FileContract], dict]:
|
|
413
425
|
"""Return contracts that define, import, or structurally reference *symbol*.
|
|
414
426
|
|
|
415
427
|
Four tiers applied in order:
|
|
@@ -420,6 +432,8 @@ def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileCo
|
|
|
420
432
|
function signatures (word-boundary). Only used when tiers 1-3 fail.
|
|
421
433
|
|
|
422
434
|
Defining contracts are ranked first; importers and references follow.
|
|
435
|
+
max_importers caps tier 3 results to prevent output explosion on popular symbols.
|
|
436
|
+
Returns (contracts, truncation_metadata).
|
|
423
437
|
"""
|
|
424
438
|
sym_l = symbol.lower()
|
|
425
439
|
word_re = re.compile(
|
|
@@ -463,8 +477,14 @@ def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileCo
|
|
|
463
477
|
|
|
464
478
|
# Tier 3: import matching (case-insensitive when no definers found)
|
|
465
479
|
ci_imports = len(defining) == 0
|
|
466
|
-
|
|
467
|
-
|
|
480
|
+
all_importer_paths = {c.path for c in contracts if _imports_sym(c, case=ci_imports)}
|
|
481
|
+
all_importers = [c for c in contracts if c.path in all_importer_paths and c.path not in defining_paths]
|
|
482
|
+
|
|
483
|
+
# Apply importer cap — definers are never truncated
|
|
484
|
+
total_importers = len(all_importers)
|
|
485
|
+
truncated = total_importers > max_importers
|
|
486
|
+
importers = all_importers[:max_importers] if truncated else all_importers
|
|
487
|
+
importer_paths = {c.path for c in importers}
|
|
468
488
|
|
|
469
489
|
# Tier 4: type-reference matching (only when tiers 1-3 yield nothing)
|
|
470
490
|
references: list[FileContract] = []
|
|
@@ -480,12 +500,27 @@ def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileCo
|
|
|
480
500
|
seen.add(c.path)
|
|
481
501
|
merged.append(c)
|
|
482
502
|
|
|
483
|
-
|
|
503
|
+
result = sorted(merged, key=lambda c: (
|
|
484
504
|
c.path not in defining_paths,
|
|
485
505
|
c.path not in importer_paths,
|
|
486
506
|
-c.relevance_score,
|
|
487
507
|
))
|
|
488
508
|
|
|
509
|
+
truncation: dict = {
|
|
510
|
+
"symbol": symbol,
|
|
511
|
+
"definers_found": len(defining),
|
|
512
|
+
"importers_found": total_importers,
|
|
513
|
+
"importers_returned": len(importers),
|
|
514
|
+
"references_found": len(references),
|
|
515
|
+
"total_returned": len(result),
|
|
516
|
+
"truncated": truncated,
|
|
517
|
+
}
|
|
518
|
+
if truncated:
|
|
519
|
+
truncation["truncation_reason"] = "max_importers_limit"
|
|
520
|
+
truncation["override_hint"] = f"--symbol {symbol} --max-importers {total_importers}"
|
|
521
|
+
|
|
522
|
+
return result, truncation
|
|
523
|
+
|
|
489
524
|
|
|
490
525
|
# ---------------------------------------------------------------------------
|
|
491
526
|
# Deep symbol scan — grep-based fallback for shallow-scanned repos
|
sourcecode/doc_analyzer.py
CHANGED
|
@@ -132,6 +132,8 @@ class DocAnalyzer:
|
|
|
132
132
|
records: list[DocRecord] = []
|
|
133
133
|
limitations: list[str] = list(limitations_pre)
|
|
134
134
|
languages: set[str] = set()
|
|
135
|
+
# Track per-language support status for honest reporting
|
|
136
|
+
unsupported_langs: set[str] = set()
|
|
135
137
|
|
|
136
138
|
for relative_path in file_paths:
|
|
137
139
|
abs_path = root / relative_path
|
|
@@ -176,8 +178,18 @@ class DocAnalyzer:
|
|
|
176
178
|
# Unsupported language — D-04: no emitir DocRecord, solo registrar limitation
|
|
177
179
|
limitations.append(f"docs_unavailable:{norm_path}:language={lang}")
|
|
178
180
|
languages.add(lang)
|
|
181
|
+
unsupported_langs.add(lang)
|
|
179
182
|
# NO records.append() here
|
|
180
183
|
|
|
184
|
+
# Build language_coverage: explicit per-language support status
|
|
185
|
+
_SUPPORTED_LANGS = {"python", "javascript", "typescript"}
|
|
186
|
+
lang_coverage: dict[str, str] = {}
|
|
187
|
+
for lang in languages:
|
|
188
|
+
if lang in _SUPPORTED_LANGS:
|
|
189
|
+
lang_coverage[lang] = "supported"
|
|
190
|
+
else:
|
|
191
|
+
lang_coverage[lang] = "unsupported"
|
|
192
|
+
|
|
181
193
|
# Build summary
|
|
182
194
|
symbol_count = sum(1 for r in records if r.kind != "module")
|
|
183
195
|
total_count = len(records)
|
|
@@ -192,6 +204,15 @@ class DocAnalyzer:
|
|
|
192
204
|
"no docstrings or JSDoc comments found"
|
|
193
205
|
)
|
|
194
206
|
|
|
207
|
+
# Warn explicitly when unsupported languages are present — agents must not
|
|
208
|
+
# assume full coverage when Java/Go/Rust files are in scope but not analyzed.
|
|
209
|
+
if unsupported_langs:
|
|
210
|
+
sorted_unsupported = sorted(unsupported_langs)
|
|
211
|
+
limitations.append(
|
|
212
|
+
f"docs_not_extracted: language(s) {sorted_unsupported} present but not supported; "
|
|
213
|
+
"only Python and JS/TS docstrings are extracted"
|
|
214
|
+
)
|
|
215
|
+
|
|
195
216
|
summary = DocSummary(
|
|
196
217
|
requested=True,
|
|
197
218
|
total_count=total_count,
|
|
@@ -200,6 +221,7 @@ class DocAnalyzer:
|
|
|
200
221
|
depth=depth,
|
|
201
222
|
truncated=truncated,
|
|
202
223
|
limitations=limitations,
|
|
224
|
+
language_coverage=lang_coverage,
|
|
203
225
|
)
|
|
204
226
|
return records, summary
|
|
205
227
|
|
sourcecode/env_analyzer.py
CHANGED
|
@@ -27,9 +27,13 @@ _ENV_EXAMPLE_NAMES = {
|
|
|
27
27
|
|
|
28
28
|
# Spring Boot application.properties / application.yml and their profile variants
|
|
29
29
|
_SPRING_CONF_BASE = {"application.properties", "application.yml", "application.yaml"}
|
|
30
|
-
_SPRING_CONF_PROFILE_RE = re.compile(r'^application-[a-z0-9_-]
|
|
31
|
-
# Matches ${ENV_VAR} or ${ENV_VAR:default} where ENV_VAR is UPPER_SNAKE_CASE
|
|
32
|
-
|
|
30
|
+
_SPRING_CONF_PROFILE_RE = re.compile(r'^application-([a-z0-9_-]+)\.(properties|ya?ml)$', re.IGNORECASE)
|
|
31
|
+
# Matches ${ENV_VAR} or ${ENV_VAR:default} where ENV_VAR is UPPER_SNAKE_CASE.
|
|
32
|
+
# Group 1 = key, Group 2 = default (may be empty string, absent = no default).
|
|
33
|
+
_SPRING_ENV_VAR_RE = re.compile(r'\$\{([A-Z][A-Z0-9_]*)(?::([^}]*))?\}')
|
|
34
|
+
# Matches ${spring.dotted.key} or ${spring.dotted.key:default} — Spring property references.
|
|
35
|
+
# These are internal property cross-references, not OS env vars, but still config signals.
|
|
36
|
+
_SPRING_PROP_REF_RE = re.compile(r'\$\{([a-z][a-z0-9]*(?:\.[a-z][a-z0-9_-]*)*)(?::([^}]*))?\}')
|
|
33
37
|
|
|
34
38
|
# Patterns where absence of the variable causes a hard runtime error (not just None/null).
|
|
35
39
|
# py_environ_bracket → os.environ["KEY"] raises KeyError
|
|
@@ -140,9 +144,9 @@ def _infer_type_hint(key: str) -> str:
|
|
|
140
144
|
def _scan_file(
|
|
141
145
|
path: Path,
|
|
142
146
|
rel_path: str,
|
|
143
|
-
findings: dict[str, list[tuple[str, Optional[str], bool]]],
|
|
147
|
+
findings: dict[str, list[tuple[str, Optional[str], bool, Optional[str]]]],
|
|
144
148
|
) -> None:
|
|
145
|
-
"""Escanea un fichero y acumula hallazgos en findings[key] = [(file_ref, default, is_hard)]."""
|
|
149
|
+
"""Escanea un fichero y acumula hallazgos en findings[key] = [(file_ref, default, is_hard, profile)]."""
|
|
146
150
|
try:
|
|
147
151
|
size = path.stat().st_size
|
|
148
152
|
if size > _MAX_FILE_SIZE:
|
|
@@ -168,7 +172,7 @@ def _scan_file(
|
|
|
168
172
|
|
|
169
173
|
line_num = content.count("\n", 0, m.start()) + 1
|
|
170
174
|
file_ref = f"{rel_path}:{line_num}"
|
|
171
|
-
findings[key].append((file_ref, default, is_hard))
|
|
175
|
+
findings[key].append((file_ref, default, is_hard, None))
|
|
172
176
|
|
|
173
177
|
|
|
174
178
|
def _parse_env_example(
|
|
@@ -204,22 +208,66 @@ def _parse_env_example(
|
|
|
204
208
|
return results
|
|
205
209
|
|
|
206
210
|
|
|
211
|
+
def _extract_spring_profile(filename: str) -> Optional[str]:
|
|
212
|
+
"""Extract Spring profile from filename.
|
|
213
|
+
|
|
214
|
+
application.yml / application.properties → 'default'
|
|
215
|
+
application-m3dev.yml → 'm3dev'
|
|
216
|
+
"""
|
|
217
|
+
name_lower = filename.lower()
|
|
218
|
+
if name_lower in _SPRING_CONF_BASE:
|
|
219
|
+
return "default"
|
|
220
|
+
m = _SPRING_CONF_PROFILE_RE.match(name_lower)
|
|
221
|
+
if m:
|
|
222
|
+
return m.group(1)
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
|
|
207
226
|
def _parse_spring_config(
|
|
208
227
|
path: Path,
|
|
209
228
|
rel_path: str,
|
|
210
229
|
findings: dict,
|
|
211
|
-
|
|
212
|
-
|
|
230
|
+
profile: Optional[str] = None,
|
|
231
|
+
) -> int:
|
|
232
|
+
"""Parse application.properties / application.yml for ${ENV_VAR} refs.
|
|
233
|
+
|
|
234
|
+
Returns the total number of ${...} placeholders found (candidates).
|
|
235
|
+
Captures default values from ${VAR:default} syntax.
|
|
236
|
+
Marks vars without defaults as hard-required (Spring fails to start if missing).
|
|
237
|
+
"""
|
|
213
238
|
try:
|
|
214
239
|
content = path.read_text(encoding="utf-8", errors="replace")
|
|
215
240
|
except OSError:
|
|
216
|
-
return
|
|
241
|
+
return 0
|
|
217
242
|
|
|
218
|
-
|
|
243
|
+
candidates = 0
|
|
244
|
+
|
|
245
|
+
# 1. UPPER_SNAKE_CASE env var references: ${DB_HOST} or ${DB_HOST:localhost}
|
|
246
|
+
for m in _SPRING_ENV_VAR_RE.finditer(content):
|
|
247
|
+
key = m.group(1)
|
|
248
|
+
raw_default = m.group(2) # None if no colon, "" if colon with empty default
|
|
249
|
+
# A colon means a default was specified (even if empty string)
|
|
250
|
+
has_default = raw_default is not None
|
|
251
|
+
default: Optional[str] = raw_default if (raw_default and raw_default.strip()) else None
|
|
252
|
+
line_num = content.count("\n", 0, m.start()) + 1
|
|
253
|
+
# Hard required only when no default is provided
|
|
254
|
+
is_hard = not has_default
|
|
255
|
+
findings[key].append((f"{rel_path}:{line_num}", default, is_hard, profile))
|
|
256
|
+
candidates += 1
|
|
257
|
+
|
|
258
|
+
# 2. lowercase.dotted Spring property refs: ${spring.datasource.url:default}
|
|
259
|
+
# These are internal property cross-references; store with a special prefix so
|
|
260
|
+
# callers can distinguish them from OS env vars. We do NOT mark them hard-required
|
|
261
|
+
# because they reference Spring's own property resolution chain.
|
|
262
|
+
for m in _SPRING_PROP_REF_RE.finditer(content):
|
|
219
263
|
key = m.group(1)
|
|
264
|
+
raw_default = m.group(2)
|
|
265
|
+
default = raw_default if (raw_default and raw_default.strip()) else None
|
|
220
266
|
line_num = content.count("\n", 0, m.start()) + 1
|
|
221
|
-
|
|
222
|
-
|
|
267
|
+
findings[key].append((f"{rel_path}:{line_num}", default, False, profile))
|
|
268
|
+
candidates += 1
|
|
269
|
+
|
|
270
|
+
return candidates
|
|
223
271
|
|
|
224
272
|
|
|
225
273
|
class EnvAnalyzer:
|
|
@@ -232,13 +280,18 @@ class EnvAnalyzer:
|
|
|
232
280
|
) -> tuple[list, object]:
|
|
233
281
|
from sourcecode.schema import EnvSummary, EnvVarRecord
|
|
234
282
|
|
|
235
|
-
# findings[key] = list of (file_ref, default_or_None, is_hard_required)
|
|
236
|
-
findings: dict[str, list[tuple[str, Optional[str], bool]]] = defaultdict(list)
|
|
283
|
+
# findings[key] = list of (file_ref, default_or_None, is_hard_required, profile_or_None)
|
|
284
|
+
findings: dict[str, list[tuple[str, Optional[str], bool, Optional[str]]]] = defaultdict(list)
|
|
237
285
|
example_entries: list[tuple[str, Optional[str], Optional[str]]] = []
|
|
238
286
|
example_files_found: list[str] = []
|
|
239
287
|
limitations: list[str] = []
|
|
288
|
+
profiles_scanned: list[str] = []
|
|
289
|
+
spring_candidates: int = 0
|
|
240
290
|
|
|
241
|
-
self._walk(
|
|
291
|
+
spring_candidates = self._walk(
|
|
292
|
+
root, root, findings, example_entries, example_files_found,
|
|
293
|
+
limitations, profiles_scanned,
|
|
294
|
+
)
|
|
242
295
|
|
|
243
296
|
# Merge findings into EnvVarRecord per key
|
|
244
297
|
records: dict[str, EnvVarRecord] = {}
|
|
@@ -248,19 +301,23 @@ class EnvAnalyzer:
|
|
|
248
301
|
if len(records) >= _MAX_KEYS:
|
|
249
302
|
limitations.append(f"key_limit_reached:{_MAX_KEYS}")
|
|
250
303
|
break
|
|
251
|
-
defaults = [d for _, d, _ in refs if d is not None]
|
|
304
|
+
defaults = [d for _, d, _, _ in refs if d is not None]
|
|
252
305
|
# required only when access pattern causes a hard runtime error if missing:
|
|
253
306
|
# os.environ["KEY"] (KeyError) or Spring @Value/${KEY} without default.
|
|
254
307
|
# os.getenv("KEY") / os.environ.get("KEY") return None — not hard required.
|
|
255
|
-
has_hard_access = any(is_hard for _, _, is_hard in refs)
|
|
308
|
+
has_hard_access = any(is_hard for _, _, is_hard, _ in refs)
|
|
256
309
|
required = has_hard_access and not defaults
|
|
257
310
|
default_val = defaults[0] if defaults else None
|
|
258
311
|
unique_files: list[str] = []
|
|
259
312
|
seen: set[str] = set()
|
|
260
|
-
for
|
|
313
|
+
# Collect first profile seen for this key (from Spring config files)
|
|
314
|
+
first_profile: Optional[str] = None
|
|
315
|
+
for file_ref, _, _, prof in refs:
|
|
261
316
|
if file_ref not in seen:
|
|
262
317
|
seen.add(file_ref)
|
|
263
318
|
unique_files.append(file_ref)
|
|
319
|
+
if first_profile is None and prof is not None:
|
|
320
|
+
first_profile = prof
|
|
264
321
|
if len(unique_files) >= _MAX_FILES_PER_KEY:
|
|
265
322
|
break
|
|
266
323
|
records[key] = EnvVarRecord(
|
|
@@ -270,6 +327,7 @@ class EnvAnalyzer:
|
|
|
270
327
|
type_hint=_infer_type_hint(key),
|
|
271
328
|
category=_infer_category(key),
|
|
272
329
|
files=unique_files,
|
|
330
|
+
profile=first_profile,
|
|
273
331
|
)
|
|
274
332
|
|
|
275
333
|
# 2. Supplement with .env.example entries (fill description + add missing keys)
|
|
@@ -300,6 +358,20 @@ class EnvAnalyzer:
|
|
|
300
358
|
# Build summary
|
|
301
359
|
categories = sorted({r.category for r in sorted_records if r.category})
|
|
302
360
|
required_count = sum(1 for r in sorted_records if r.required)
|
|
361
|
+
|
|
362
|
+
# Coverage note: warn if Spring config was scanned but coverage seems partial
|
|
363
|
+
coverage_note: Optional[str] = None
|
|
364
|
+
if profiles_scanned and spring_candidates > 0:
|
|
365
|
+
spring_key_count = sum(
|
|
366
|
+
1 for r in sorted_records if r.profile is not None
|
|
367
|
+
)
|
|
368
|
+
if spring_key_count < spring_candidates:
|
|
369
|
+
coverage_note = (
|
|
370
|
+
f"{spring_candidates} Spring ${{VAR}} placeholder(s) found across "
|
|
371
|
+
f"{len(profiles_scanned)} profile(s); {spring_key_count} unique key(s) "
|
|
372
|
+
"extracted. Duplicates across profiles collapsed."
|
|
373
|
+
)
|
|
374
|
+
|
|
303
375
|
summary = EnvSummary(
|
|
304
376
|
requested=True,
|
|
305
377
|
total=len(sorted_records),
|
|
@@ -308,6 +380,9 @@ class EnvAnalyzer:
|
|
|
308
380
|
categories=categories,
|
|
309
381
|
example_files_found=example_files_found,
|
|
310
382
|
limitations=limitations,
|
|
383
|
+
profiles_scanned=sorted(set(profiles_scanned)),
|
|
384
|
+
spring_candidates=spring_candidates,
|
|
385
|
+
coverage_note=coverage_note,
|
|
311
386
|
)
|
|
312
387
|
|
|
313
388
|
return sorted_records, summary
|
|
@@ -320,11 +395,15 @@ class EnvAnalyzer:
|
|
|
320
395
|
example_entries: list,
|
|
321
396
|
example_files_found: list,
|
|
322
397
|
limitations: list,
|
|
323
|
-
|
|
398
|
+
profiles_scanned: list,
|
|
399
|
+
) -> int:
|
|
400
|
+
"""Walk the directory tree accumulating env var findings. Returns spring_candidates count."""
|
|
324
401
|
try:
|
|
325
402
|
entries = sorted(current.iterdir())
|
|
326
403
|
except PermissionError:
|
|
327
|
-
return
|
|
404
|
+
return 0
|
|
405
|
+
|
|
406
|
+
total_spring_candidates = 0
|
|
328
407
|
|
|
329
408
|
for entry in entries:
|
|
330
409
|
name = entry.name
|
|
@@ -333,7 +412,10 @@ class EnvAnalyzer:
|
|
|
333
412
|
if entry.is_dir():
|
|
334
413
|
if name in _SKIP_DIRS:
|
|
335
414
|
continue
|
|
336
|
-
self._walk(
|
|
415
|
+
total_spring_candidates += self._walk(
|
|
416
|
+
root, entry, findings, example_entries, example_files_found,
|
|
417
|
+
limitations, profiles_scanned,
|
|
418
|
+
)
|
|
337
419
|
elif entry.is_file():
|
|
338
420
|
rel = entry.relative_to(root).as_posix()
|
|
339
421
|
name_lower = name.lower()
|
|
@@ -344,13 +426,19 @@ class EnvAnalyzer:
|
|
|
344
426
|
continue
|
|
345
427
|
# Spring Boot application.properties / application.yml (incl. profiles)
|
|
346
428
|
if name_lower in _SPRING_CONF_BASE or _SPRING_CONF_PROFILE_RE.match(name_lower):
|
|
347
|
-
|
|
429
|
+
profile = _extract_spring_profile(name)
|
|
430
|
+
if profile and profile not in profiles_scanned:
|
|
431
|
+
profiles_scanned.append(profile)
|
|
432
|
+
count = _parse_spring_config(entry, rel, findings, profile)
|
|
433
|
+
total_spring_candidates += count
|
|
348
434
|
continue
|
|
349
435
|
# Source code files
|
|
350
436
|
suffix = entry.suffix.lower()
|
|
351
437
|
if suffix in _CODE_EXTENSIONS:
|
|
352
438
|
_scan_file(entry, rel, findings)
|
|
353
439
|
|
|
440
|
+
return total_spring_candidates
|
|
441
|
+
|
|
354
442
|
|
|
355
443
|
def _replace_description(record, description: str):
|
|
356
444
|
from dataclasses import replace
|
sourcecode/git_analyzer.py
CHANGED
|
@@ -60,9 +60,13 @@ def _run_git(args: list[str], cwd: Path, timeout: int = 15) -> tuple[str, int]:
|
|
|
60
60
|
["git", "-C", str(cwd)] + args,
|
|
61
61
|
capture_output=True,
|
|
62
62
|
text=True,
|
|
63
|
+
encoding="utf-8",
|
|
64
|
+
errors="replace",
|
|
63
65
|
timeout=timeout,
|
|
64
66
|
)
|
|
65
|
-
|
|
67
|
+
# `result.stdout` is typed Optional[str]; guard against None on edge-case
|
|
68
|
+
# platforms (Windows subprocess encoding failures, detached processes, etc.)
|
|
69
|
+
return result.stdout or "", result.returncode
|
|
66
70
|
|
|
67
71
|
|
|
68
72
|
class GitAnalyzer:
|
|
@@ -80,6 +84,7 @@ class GitAnalyzer:
|
|
|
80
84
|
branch: Optional[str] = None
|
|
81
85
|
recent_commits: list[CommitRecord] = []
|
|
82
86
|
change_hotspots: list[ChangeHotspot] = []
|
|
87
|
+
hotspots_status: str = "ok"
|
|
83
88
|
uncommitted: Optional[UncommittedChanges] = None
|
|
84
89
|
contributors: list[str] = []
|
|
85
90
|
|
|
@@ -137,8 +142,10 @@ class GitAnalyzer:
|
|
|
137
142
|
change_hotspots = _parse_hotspots(stdout)
|
|
138
143
|
except subprocess.TimeoutExpired:
|
|
139
144
|
limitations.append("hotspots_timeout")
|
|
145
|
+
hotspots_status = "failed"
|
|
140
146
|
except Exception as exc:
|
|
141
147
|
limitations.append(f"hotspots_error:{exc}")
|
|
148
|
+
hotspots_status = "failed"
|
|
142
149
|
|
|
143
150
|
try:
|
|
144
151
|
stdout, _ = _run_git(["status", "--porcelain"], path, timeout=10)
|
|
@@ -166,6 +173,7 @@ class GitAnalyzer:
|
|
|
166
173
|
branch=branch,
|
|
167
174
|
recent_commits=recent_commits,
|
|
168
175
|
change_hotspots=change_hotspots,
|
|
176
|
+
hotspots_status=hotspots_status,
|
|
169
177
|
uncommitted_changes=uncommitted,
|
|
170
178
|
contributors=contributors,
|
|
171
179
|
git_summary=git_summary,
|
|
@@ -228,9 +236,12 @@ def _is_hotspot_admin(path: str) -> bool:
|
|
|
228
236
|
return False
|
|
229
237
|
|
|
230
238
|
|
|
231
|
-
def _parse_hotspots(output: str) -> list:
|
|
239
|
+
def _parse_hotspots(output: str | None) -> list:
|
|
232
240
|
from sourcecode.schema import ChangeHotspot
|
|
233
241
|
|
|
242
|
+
if not output:
|
|
243
|
+
return []
|
|
244
|
+
|
|
234
245
|
file_counts: Counter = Counter()
|
|
235
246
|
file_last_date: dict[str, str] = {}
|
|
236
247
|
current_date = ""
|
sourcecode/prepare_context.py
CHANGED
|
@@ -728,11 +728,13 @@ class TaskContextBuilder:
|
|
|
728
728
|
cwd=str(self.root),
|
|
729
729
|
capture_output=True,
|
|
730
730
|
text=True,
|
|
731
|
+
encoding="utf-8",
|
|
732
|
+
errors="replace",
|
|
731
733
|
timeout=10,
|
|
732
734
|
)
|
|
733
735
|
if result.returncode == 0:
|
|
734
736
|
return [
|
|
735
|
-
line.strip() for line in result.stdout.splitlines()
|
|
737
|
+
line.strip() for line in (result.stdout or "").splitlines()
|
|
736
738
|
if line.strip()
|
|
737
739
|
]
|
|
738
740
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
@@ -744,10 +746,12 @@ class TaskContextBuilder:
|
|
|
744
746
|
cwd=str(self.root),
|
|
745
747
|
capture_output=True,
|
|
746
748
|
text=True,
|
|
749
|
+
encoding="utf-8",
|
|
750
|
+
errors="replace",
|
|
747
751
|
timeout=10,
|
|
748
752
|
)
|
|
749
753
|
if result.returncode == 0:
|
|
750
|
-
return [line.strip() for line in result.stdout.splitlines() if line.strip()]
|
|
754
|
+
return [line.strip() for line in (result.stdout or "").splitlines() if line.strip()]
|
|
751
755
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
752
756
|
pass
|
|
753
757
|
return []
|
sourcecode/schema.py
CHANGED
|
@@ -252,6 +252,9 @@ class DocSummary:
|
|
|
252
252
|
depth: Optional[DocsDepth] = None
|
|
253
253
|
truncated: bool = False
|
|
254
254
|
limitations: list[str] = field(default_factory=list)
|
|
255
|
+
# Per-language support status: "supported" | "unsupported" | "partial"
|
|
256
|
+
# Absent key = language not present in scanned files.
|
|
257
|
+
language_coverage: dict[str, str] = field(default_factory=dict)
|
|
255
258
|
|
|
256
259
|
|
|
257
260
|
@dataclass
|
|
@@ -303,11 +306,21 @@ class SemanticSummary:
|
|
|
303
306
|
"""Summary of the --semantics analysis."""
|
|
304
307
|
|
|
305
308
|
requested: bool = False
|
|
309
|
+
# Explicit analysis outcome — never omit, never silent.
|
|
310
|
+
# "ok": analysis ran and produced results
|
|
311
|
+
# "partial": analysis ran but with significant coverage gaps
|
|
312
|
+
# "failed": analysis could not produce useful results
|
|
313
|
+
status: str = "ok"
|
|
314
|
+
reason: Optional[str] = None # human-readable failure/partial reason
|
|
306
315
|
call_count: int = 0
|
|
307
316
|
symbol_count: int = 0
|
|
308
317
|
link_count: int = 0
|
|
309
318
|
languages: list[str] = field(default_factory=list)
|
|
310
319
|
language_coverage: dict[str, str] = field(default_factory=dict)
|
|
320
|
+
# Structured per-language support details. Each value:
|
|
321
|
+
# {"supported": bool, "status": str, "reason": str}
|
|
322
|
+
# status: "full" | "heuristic" | "unsupported"
|
|
323
|
+
language_coverage_details: dict[str, Any] = field(default_factory=dict)
|
|
311
324
|
files_analyzed: int = 0
|
|
312
325
|
files_skipped: int = 0
|
|
313
326
|
truncated: bool = False
|
|
@@ -393,6 +406,13 @@ class ArchitectureAnalysis:
|
|
|
393
406
|
confidence: Literal["high", "medium", "low"] = "low"
|
|
394
407
|
method: str = "heuristic"
|
|
395
408
|
limitations: list[str] = field(default_factory=list)
|
|
409
|
+
# Structured evidence for each architectural inference.
|
|
410
|
+
# Each entry: {"type": str, "paths": list[str], "reason": str, "confidence": str}
|
|
411
|
+
# type: "workspace_config" | "filesystem_naming" | "import_graph" | "entry_files"
|
|
412
|
+
evidence: list[dict] = field(default_factory=list)
|
|
413
|
+
# True when pattern is inferred from weak signals (e.g. directory names only).
|
|
414
|
+
# Agents must not treat tentative patterns as confirmed facts.
|
|
415
|
+
tentative: bool = False
|
|
396
416
|
|
|
397
417
|
|
|
398
418
|
# --- Env Map ---
|
|
@@ -408,6 +428,7 @@ class EnvVarRecord:
|
|
|
408
428
|
category: Optional[str] = None # database | cache | storage | auth | service | observability | feature_flag | server | general
|
|
409
429
|
description: Optional[str] = None
|
|
410
430
|
files: list[str] = field(default_factory=list) # "path:line"
|
|
431
|
+
profile: Optional[str] = None # Spring profile if first occurrence is in application-{profile}.yml
|
|
411
432
|
|
|
412
433
|
|
|
413
434
|
@dataclass
|
|
@@ -421,6 +442,10 @@ class EnvSummary:
|
|
|
421
442
|
categories: list[str] = field(default_factory=list)
|
|
422
443
|
example_files_found: list[str] = field(default_factory=list)
|
|
423
444
|
limitations: list[str] = field(default_factory=list)
|
|
445
|
+
# Spring Boot coverage metadata
|
|
446
|
+
profiles_scanned: list[str] = field(default_factory=list)
|
|
447
|
+
spring_candidates: int = 0 # total ${VAR} refs found across Spring config files
|
|
448
|
+
coverage_note: Optional[str] = None # explicit note about partial coverage
|
|
424
449
|
|
|
425
450
|
|
|
426
451
|
# --- Code Notes ---
|
|
@@ -557,6 +582,10 @@ class GitContext:
|
|
|
557
582
|
branch: Optional[str] = None
|
|
558
583
|
recent_commits: list[CommitRecord] = field(default_factory=list)
|
|
559
584
|
change_hotspots: list[ChangeHotspot] = field(default_factory=list)
|
|
585
|
+
# Explicit hotspot analysis outcome — distinguishes "no hotspots found" from "analysis failed".
|
|
586
|
+
# "ok": hotspot analysis ran (change_hotspots may still be empty if no changes in window)
|
|
587
|
+
# "failed": hotspot analysis threw an exception (see limitations for hotspots_error:...)
|
|
588
|
+
hotspots_status: str = "ok"
|
|
560
589
|
uncommitted_changes: Optional[UncommittedChanges] = None
|
|
561
590
|
contributors: list[str] = field(default_factory=list)
|
|
562
591
|
git_summary: Optional[str] = None
|
sourcecode/semantic_analyzer.py
CHANGED
|
@@ -343,8 +343,14 @@ class SemanticAnalyzer:
|
|
|
343
343
|
|
|
344
344
|
# Plan 12-02: language_coverage["python"] = "full" when Python files are analyzed
|
|
345
345
|
lang_coverage: dict[str, str] = {}
|
|
346
|
+
lang_coverage_details: dict[str, Any] = {}
|
|
346
347
|
if source_files:
|
|
347
348
|
lang_coverage["python"] = "full"
|
|
349
|
+
lang_coverage_details["python"] = {
|
|
350
|
+
"supported": True,
|
|
351
|
+
"status": "full",
|
|
352
|
+
"reason": "AST-based: symbols, cross-file calls, and imports fully resolved",
|
|
353
|
+
}
|
|
348
354
|
|
|
349
355
|
# -----------------------------------------------------------------------
|
|
350
356
|
# Plan 12-03: JS/TS analysis block
|
|
@@ -489,6 +495,12 @@ class SemanticAnalyzer:
|
|
|
489
495
|
js_languages.add("javascript")
|
|
490
496
|
languages.extend(sorted(js_languages))
|
|
491
497
|
lang_coverage["nodejs"] = "heuristic"
|
|
498
|
+
for js_lang in js_languages:
|
|
499
|
+
lang_coverage_details[js_lang] = {
|
|
500
|
+
"supported": True,
|
|
501
|
+
"status": "heuristic",
|
|
502
|
+
"reason": "Regex-based: exports/imports extracted; cross-file call resolution is heuristic, not AST",
|
|
503
|
+
}
|
|
492
504
|
|
|
493
505
|
# -----------------------------------------------------------------------
|
|
494
506
|
# Plan 12-04: Go analysis block
|
|
@@ -530,6 +542,11 @@ class SemanticAnalyzer:
|
|
|
530
542
|
files_analyzed += 1
|
|
531
543
|
languages.append("go")
|
|
532
544
|
lang_coverage["go"] = "heuristic"
|
|
545
|
+
lang_coverage_details["go"] = {
|
|
546
|
+
"supported": True,
|
|
547
|
+
"status": "heuristic",
|
|
548
|
+
"reason": "Regex-based: func/struct names and same-file calls extracted; no cross-file resolution",
|
|
549
|
+
}
|
|
533
550
|
|
|
534
551
|
# -----------------------------------------------------------------------
|
|
535
552
|
# Plan 12-04: Rust analysis block
|
|
@@ -571,6 +588,11 @@ class SemanticAnalyzer:
|
|
|
571
588
|
files_analyzed += 1
|
|
572
589
|
languages.append("rust")
|
|
573
590
|
lang_coverage["rust"] = "heuristic"
|
|
591
|
+
lang_coverage_details["rust"] = {
|
|
592
|
+
"supported": True,
|
|
593
|
+
"status": "heuristic",
|
|
594
|
+
"reason": "Regex-based: fn/struct names and module-qualified calls extracted; no cross-file resolution",
|
|
595
|
+
}
|
|
574
596
|
|
|
575
597
|
# -----------------------------------------------------------------------
|
|
576
598
|
# Plan 12-04: JVM analysis block (Java, Kotlin, Scala)
|
|
@@ -612,14 +634,56 @@ class SemanticAnalyzer:
|
|
|
612
634
|
files_analyzed += 1
|
|
613
635
|
languages.append("java")
|
|
614
636
|
lang_coverage["java"] = "heuristic"
|
|
637
|
+
lang_coverage_details["java"] = {
|
|
638
|
+
"supported": True,
|
|
639
|
+
"status": "heuristic",
|
|
640
|
+
"reason": (
|
|
641
|
+
"Regex-based only: class/interface/method names extracted, "
|
|
642
|
+
"same-file call sites detected. "
|
|
643
|
+
"No cross-file resolution, no type inference, no import graph. "
|
|
644
|
+
"Spring annotations (@Service, @Component, etc.) not semantically interpreted."
|
|
645
|
+
),
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
# Determine explicit analysis status — never emit silent empty results.
|
|
649
|
+
# An agent must be able to tell "analysis ran and found nothing" from
|
|
650
|
+
# "analysis failed to run" or "significant coverage gap".
|
|
651
|
+
_total_candidates = (
|
|
652
|
+
len(source_files)
|
|
653
|
+
+ len(js_source_files)
|
|
654
|
+
+ len(go_source_files)
|
|
655
|
+
+ len(rust_source_files)
|
|
656
|
+
+ len(jvm_source_files)
|
|
657
|
+
)
|
|
658
|
+
if _total_candidates == 0:
|
|
659
|
+
_sem_status = "failed"
|
|
660
|
+
_sem_reason = "no analyzable source files found in project"
|
|
661
|
+
elif files_analyzed == 0:
|
|
662
|
+
_sem_status = "failed"
|
|
663
|
+
_sem_reason = (
|
|
664
|
+
f"all {_total_candidates} candidate file(s) failed to analyze; "
|
|
665
|
+
"check limitations for parse/read errors"
|
|
666
|
+
)
|
|
667
|
+
elif files_analyzed < _total_candidates // 2 and _total_candidates > 4:
|
|
668
|
+
_sem_status = "partial"
|
|
669
|
+
_sem_reason = (
|
|
670
|
+
f"{files_analyzed} of {_total_candidates} file(s) analyzed; "
|
|
671
|
+
f"{files_skipped} skipped; see limitations"
|
|
672
|
+
)
|
|
673
|
+
else:
|
|
674
|
+
_sem_status = "ok"
|
|
675
|
+
_sem_reason = None
|
|
615
676
|
|
|
616
677
|
summary = SemanticSummary(
|
|
617
678
|
requested=True,
|
|
679
|
+
status=_sem_status,
|
|
680
|
+
reason=_sem_reason,
|
|
618
681
|
call_count=len(calls),
|
|
619
682
|
symbol_count=len(all_symbols),
|
|
620
683
|
link_count=len(links),
|
|
621
684
|
languages=languages,
|
|
622
685
|
language_coverage=lang_coverage,
|
|
686
|
+
language_coverage_details=lang_coverage_details,
|
|
623
687
|
files_analyzed=files_analyzed,
|
|
624
688
|
files_skipped=files_skipped,
|
|
625
689
|
truncated=truncated,
|
sourcecode/serializer.py
CHANGED
|
@@ -722,8 +722,10 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
|
|
|
722
722
|
# production runtime is represented as entry_points=[], never by fallback.
|
|
723
723
|
ep_groups = _entry_point_groups(sm.entry_points)
|
|
724
724
|
result["entry_points"] = ep_groups["production"]
|
|
725
|
-
|
|
726
|
-
|
|
725
|
+
if ep_groups["development"]:
|
|
726
|
+
result["development_entry_points"] = ep_groups["development"]
|
|
727
|
+
if ep_groups["auxiliary"]:
|
|
728
|
+
result["auxiliary_entry_points"] = ep_groups["auxiliary"]
|
|
727
729
|
|
|
728
730
|
# ── 3. Architecture ───────────────────────────────────────────────────────
|
|
729
731
|
result["architecture"] = _architecture_context(sm)
|
|
@@ -888,6 +890,23 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
|
|
|
888
890
|
if analysis_gaps:
|
|
889
891
|
result["analysis_gaps"] = analysis_gaps
|
|
890
892
|
|
|
893
|
+
# ── 8. Agent mode metadata — explicit transparency about auto-enabled/suppressed flags ──
|
|
894
|
+
_auto_enabled: list[str] = ["--dependencies", "--env-map", "--code-notes"]
|
|
895
|
+
_suppressed: list[str] = []
|
|
896
|
+
if sm.metrics_summary is not None and sm.metrics_summary.requested:
|
|
897
|
+
_suppressed.append("--full-metrics")
|
|
898
|
+
if sm.module_graph is not None and sm.module_graph.summary.requested:
|
|
899
|
+
_suppressed.append("--graph-modules")
|
|
900
|
+
if sm.doc_summary is not None and sm.doc_summary.requested:
|
|
901
|
+
_suppressed.append("--docs")
|
|
902
|
+
agent_mode_meta: dict[str, Any] = {
|
|
903
|
+
"auto_enabled": _auto_enabled,
|
|
904
|
+
}
|
|
905
|
+
if _suppressed:
|
|
906
|
+
agent_mode_meta["suppressed_flags"] = _suppressed
|
|
907
|
+
agent_mode_meta["suppressed_note"] = "computed but excluded from agent_view"
|
|
908
|
+
result["agent_mode"] = agent_mode_meta
|
|
909
|
+
|
|
891
910
|
return result
|
|
892
911
|
|
|
893
912
|
|
|
@@ -918,9 +937,11 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
|
|
|
918
937
|
"architecture_summary": sm.architecture_summary,
|
|
919
938
|
"stacks": [asdict(s) for s in sm.stacks],
|
|
920
939
|
"entry_points": ep_groups["production"],
|
|
921
|
-
"development_entry_points": ep_groups["development"],
|
|
922
|
-
"auxiliary_entry_points": ep_groups["auxiliary"],
|
|
923
940
|
}
|
|
941
|
+
if ep_groups["development"]:
|
|
942
|
+
result["development_entry_points"] = ep_groups["development"]
|
|
943
|
+
if ep_groups["auxiliary"]:
|
|
944
|
+
result["auxiliary_entry_points"] = ep_groups["auxiliary"]
|
|
924
945
|
|
|
925
946
|
# Layer B — signals (only when the corresponding analyzer ran)
|
|
926
947
|
if sm.dependency_summary is not None and sm.dependency_summary.requested:
|
|
@@ -957,9 +978,21 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
|
|
|
957
978
|
|
|
958
979
|
if sm.semantic_summary is not None and sm.semantic_summary.requested:
|
|
959
980
|
result["semantic_summary"] = asdict(sm.semantic_summary)
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
981
|
+
# Defensive filter: never emit objects with null required fields.
|
|
982
|
+
# A null entry in these arrays is worse than a shorter array — it causes
|
|
983
|
+
# agents to misinterpret the analysis as valid when it is not.
|
|
984
|
+
result["semantic_calls"] = [
|
|
985
|
+
asdict(c) for c in sm.semantic_calls
|
|
986
|
+
if c.caller_path and c.callee_path
|
|
987
|
+
]
|
|
988
|
+
result["semantic_symbols"] = [
|
|
989
|
+
asdict(s) for s in sm.semantic_symbols
|
|
990
|
+
if s.symbol and s.kind and s.language and s.path
|
|
991
|
+
]
|
|
992
|
+
result["semantic_links"] = [
|
|
993
|
+
asdict(lnk) for lnk in sm.semantic_links
|
|
994
|
+
if lnk.importer_path and lnk.symbol
|
|
995
|
+
]
|
|
963
996
|
|
|
964
997
|
if sm.metrics_summary is not None and sm.metrics_summary.requested:
|
|
965
998
|
result["metrics_summary"] = asdict(sm.metrics_summary)
|
|
@@ -1113,6 +1146,8 @@ def _contract_view_minimal(
|
|
|
1113
1146
|
summary["degraded"] = True
|
|
1114
1147
|
summary["degraded_hint"] = "install sourcecode[ast] for full TS/JS extraction"
|
|
1115
1148
|
result["summary"] = summary
|
|
1149
|
+
if cs.symbol_truncation:
|
|
1150
|
+
result["symbol_query"] = cs.symbol_truncation
|
|
1116
1151
|
|
|
1117
1152
|
return result
|
|
1118
1153
|
|
|
@@ -1392,6 +1427,8 @@ def _contract_view_standard(
|
|
|
1392
1427
|
}
|
|
1393
1428
|
if cs.limitations:
|
|
1394
1429
|
result["contract_summary"]["limitations"] = cs.limitations
|
|
1430
|
+
if cs.symbol_truncation:
|
|
1431
|
+
result["symbol_query"] = cs.symbol_truncation
|
|
1395
1432
|
|
|
1396
1433
|
return result
|
|
1397
1434
|
|
|
@@ -1,34 +1,34 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=ktYEudER40ycgGKE7MG3VPlJ8UQQPhREtYx6twzDGUM,103
|
|
2
2
|
sourcecode/adaptive_scanner.py,sha256=6dh34C2qZXyRbw-8xBhbEwDdXanM6CRFRWayVoYITnA,10190
|
|
3
|
-
sourcecode/architecture_analyzer.py,sha256=
|
|
3
|
+
sourcecode/architecture_analyzer.py,sha256=O4AXc7l_WTzIXrcAzstqZy-TGKNaFa6p3MzpgVjaO8g,27749
|
|
4
4
|
sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
|
|
5
5
|
sourcecode/ast_extractor.py,sha256=0OHQwTUBBc9lmqPLryVeB1z8dGIC6NhLlar800CD9oI,41129
|
|
6
6
|
sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
|
|
7
|
-
sourcecode/cli.py,sha256=
|
|
7
|
+
sourcecode/cli.py,sha256=tsubK4RGYtqQEtknH5bKYgsJmeTWfEWk_DSujbZYb70,68783
|
|
8
8
|
sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvUQKw4,9226
|
|
9
9
|
sourcecode/confidence_analyzer.py,sha256=HxJMPLI5ulqtkncnv98W4iVO6yMbpQo87VuxiuNbDmY,12167
|
|
10
10
|
sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
|
|
11
|
-
sourcecode/contract_model.py,sha256=
|
|
12
|
-
sourcecode/contract_pipeline.py,sha256=
|
|
11
|
+
sourcecode/contract_model.py,sha256=gCf9-Kj0G7l0lvRTAcRfFAfMgs1Rpizv4mKovQLYUkw,3434
|
|
12
|
+
sourcecode/contract_pipeline.py,sha256=dTOvoaJy-S_hLZtpqpLxjb0dmnPyGnKabLUzS3DlJ-s,24278
|
|
13
13
|
sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
|
|
14
14
|
sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
|
|
15
|
-
sourcecode/doc_analyzer.py,sha256=
|
|
15
|
+
sourcecode/doc_analyzer.py,sha256=TttdS7mndKQhyJCfJnnAsyGCJrf-TIL7oXxDlTLUFKE,21248
|
|
16
16
|
sourcecode/entrypoint_classifier.py,sha256=a69dMGyxCTd_LOm3oqj-EXWpRmbmeujN7T1mr2eJ1as,3877
|
|
17
|
-
sourcecode/env_analyzer.py,sha256=
|
|
17
|
+
sourcecode/env_analyzer.py,sha256=NFV4PSeBH5GEONOIo2SY5iJRXuuqhAOlRDtTZMqOZTI,18452
|
|
18
18
|
sourcecode/file_classifier.py,sha256=_KfFIIolharaIxbSTrCkaWauQIqNHCyor_n47RGyDh8,8577
|
|
19
|
-
sourcecode/git_analyzer.py,sha256=
|
|
19
|
+
sourcecode/git_analyzer.py,sha256=PD3eNWydznQ6KLNpxGzBqizIHoPIKevfwz9Xyf_pDt4,11600
|
|
20
20
|
sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
|
|
21
21
|
sourcecode/metrics_analyzer.py,sha256=e2cFwB9XubFq_dIVsP2PLjpr4wX0N6ulb3ol3sGDUeo,20777
|
|
22
|
-
sourcecode/prepare_context.py,sha256=
|
|
22
|
+
sourcecode/prepare_context.py,sha256=a0_ThVNJ8v98UTrgnrnjacovvCd-2HWJug1scenUtEU,31044
|
|
23
23
|
sourcecode/ranking_engine.py,sha256=XdhzahKGleYNW3N0GqGW9salPOXx2BNp8KqXpaeHHmw,8247
|
|
24
24
|
sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
|
|
25
25
|
sourcecode/relevance_scorer.py,sha256=E74w7nlsNVobO3LqKHiMtBd84ONwGp8uDpwXJEjRtLA,8330
|
|
26
26
|
sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
|
|
27
27
|
sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
|
|
28
28
|
sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
|
|
29
|
-
sourcecode/schema.py,sha256=
|
|
30
|
-
sourcecode/semantic_analyzer.py,sha256=
|
|
31
|
-
sourcecode/serializer.py,sha256=
|
|
29
|
+
sourcecode/schema.py,sha256=ofEge9hTWHOTjeWt7ceCDQWzP-uhhenrYX2usjW2KVU,22759
|
|
30
|
+
sourcecode/semantic_analyzer.py,sha256=16EFTgM7ooW0m5gNUKOlTSn7IEMLSzKmzQn-cWaSqjs,82604
|
|
31
|
+
sourcecode/serializer.py,sha256=h7KuMcDi7K-BcnDbXZu8q5MTE3PwyIZcU8Is4_Vv32Q,58107
|
|
32
32
|
sourcecode/summarizer.py,sha256=ZuzIdm3t8A-d5MuQL0TSNLrd-L0IQIuguIxeNXMNJf8,16070
|
|
33
33
|
sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
|
|
34
34
|
sourcecode/workspace.py,sha256=fQlVoNx8S-fSHpKoJ0JBvEHCFkxszH0KZVJed1i3TRk,6845
|
|
@@ -59,8 +59,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
59
59
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
60
60
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
61
61
|
sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
|
|
62
|
-
sourcecode-0.
|
|
63
|
-
sourcecode-0.
|
|
64
|
-
sourcecode-0.
|
|
65
|
-
sourcecode-0.
|
|
66
|
-
sourcecode-0.
|
|
62
|
+
sourcecode-0.43.0.dist-info/METADATA,sha256=7-5QNqOmUMepNrGq4TmK5JAy-QDIHyWyb8-RdxaRQ0k,25209
|
|
63
|
+
sourcecode-0.43.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
64
|
+
sourcecode-0.43.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
65
|
+
sourcecode-0.43.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
66
|
+
sourcecode-0.43.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|