sourcecode 0.41.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "0.41.0"
3
+ __version__ = "0.42.0"
@@ -172,6 +172,7 @@ class ArchitectureAnalyzer:
172
172
  graph: Optional[ModuleGraph] = None,
173
173
  ) -> ArchitectureAnalysis:
174
174
  limitations: list[str] = []
175
+ evidence: list[dict] = []
175
176
 
176
177
  # Step 1: filter paths
177
178
  filtered = self._filter_paths(sm.file_paths)
@@ -180,6 +181,8 @@ class ArchitectureAnalyzer:
180
181
  requested=True,
181
182
  pattern="unknown",
182
183
  limitations=["Arquitectura no inferida: proyecto sin archivos de codigo suficientes"],
184
+ evidence=[{"type": "none", "paths": [], "reason": "insufficient source files", "confidence": "high"}],
185
+ tentative=False,
183
186
  )
184
187
 
185
188
  # Step 2: domain clustering
@@ -193,17 +196,32 @@ class ArchitectureAnalyzer:
193
196
  elif pattern == "unknown":
194
197
  limitations.append("Patron de capas no reconocido: estructura de directorios sin senales claras")
195
198
 
196
- # Step 3b: monorepo override — workspace config is hard evidence
197
- if self._has_workspace_config(sm.file_paths) and pattern not in (
199
+ # Step 3b: monorepo override — workspace config is hard evidence.
200
+ # Overrides all weak inferred patterns; only truly specialised patterns
201
+ # (cqrs, clean, onion, hexagonal) take precedence over workspace config.
202
+ has_workspace = self._has_workspace_config(sm.file_paths)
203
+ if has_workspace and pattern not in (
198
204
  "monorepo", "cqrs", "clean", "onion", "hexagonal"
199
205
  ):
200
206
  mono_layers = self._detect_monorepo_packages(filtered)
201
- if mono_layers or pattern in (None, "unknown", "flat", "modular", "layered"):
207
+ # Override whenever: monorepo packages detected, OR pattern is any weak/generic type.
208
+ # "fullstack", "layered", "mvc", "microservices", "modular", "flat", "unknown", None
209
+ # all yield to workspace config evidence.
210
+ _WEAK_PATTERNS = {None, "unknown", "flat", "modular", "layered",
211
+ "fullstack", "mvc", "microservices"}
212
+ if mono_layers or pattern in _WEAK_PATTERNS:
202
213
  pattern = "monorepo"
203
214
  layers = mono_layers
204
215
  limitations.append(
205
216
  "Workspace config detectado — arquitectura refleja topologia de paquetes"
206
217
  )
218
+ ws_files = [p for p in sm.file_paths if p.split("/")[-1] in _WORKSPACE_CONFIG_FILES]
219
+ evidence.append({
220
+ "type": "workspace_config",
221
+ "paths": ws_files[:4],
222
+ "reason": "Monorepo workspace config file(s) detected — hard evidence for monorepo topology",
223
+ "confidence": "high",
224
+ })
207
225
 
208
226
  # Step 4: bounded context inference
209
227
  bounded_contexts = self._infer_bounded_contexts(domains, graph)
@@ -212,25 +230,91 @@ class ArchitectureAnalyzer:
212
230
  confidence: Literal["high", "medium", "low"]
213
231
  strong_domains = [d for d in domains if d.confidence in ("high", "medium")]
214
232
  all_layers_weak = layers and all(l.confidence == "low" for l in layers)
233
+
234
+ method = "graph+structure" if graph is not None else "filesystem_inference"
235
+ # High-confidence evidence (workspace config) makes pattern non-tentative.
236
+ tentative = not any(e.get("confidence") == "high" for e in evidence)
237
+
238
+ # _hard_evidence: high-confidence evidence was already set (e.g. workspace_config).
239
+ # When True, tentative must stay False and confidence must stay at least "medium".
240
+ _hard_evidence = not tentative # tentative=False iff high-conf evidence present
241
+
215
242
  if pattern not in (None, "unknown", "flat"):
216
- if all_layers_weak:
243
+ if graph is not None:
244
+ # Import graph provided — structural validation available
245
+ confidence = "medium" if len(strong_domains) >= 3 else "low"
246
+ evidence.append({
247
+ "type": "import_graph",
248
+ "paths": [n.id for n in graph.nodes[:6]],
249
+ "reason": f"Module import graph with {len(graph.nodes)} nodes used for pattern validation",
250
+ "confidence": "medium",
251
+ })
252
+ elif all_layers_weak:
217
253
  # Layers came from file-naming heuristic only, not directory structure
218
254
  confidence = "low"
255
+ if not _hard_evidence:
256
+ tentative = True
219
257
  limitations.append(
220
258
  "Low confidence inference: pattern inferred from filenames only, without import graph confirmation"
221
259
  )
260
+ evidence.append({
261
+ "type": "filesystem_naming",
262
+ "paths": [l.files[0] for l in layers if l.files][:6],
263
+ "reason": (
264
+ f"Pattern '{pattern}' inferred from file stem naming conventions only "
265
+ "(e.g. *_controller.py, *_service.py). "
266
+ "No directory structure or import graph confirmation."
267
+ ),
268
+ "confidence": "low",
269
+ })
222
270
  else:
223
- confidence = "medium" if len(strong_domains) >= 3 else "low"
224
- if graph is None:
271
+ # Directory structure match (or monorepo/workspace override with no layers)
272
+ confidence = "medium" if (_hard_evidence or len(strong_domains) >= 3) else "low"
273
+ if confidence == "low" and not _hard_evidence:
274
+ tentative = True
275
+ if not _hard_evidence:
225
276
  limitations.append(
226
277
  "Pattern not confirmed by module import graph; run with --graph-modules for structural validation"
227
278
  )
279
+ if not _hard_evidence:
280
+ matched_dirs = sorted({
281
+ p.replace("\\", "/").split("/")[0]
282
+ for layer in layers for p in layer.files
283
+ })
284
+ evidence.append({
285
+ "type": "filesystem_naming",
286
+ "paths": matched_dirs[:8],
287
+ "reason": (
288
+ f"Pattern '{pattern}' inferred from directory names matching layer keywords. "
289
+ "Import graph not available — structural direction of dependencies unverified."
290
+ ),
291
+ "confidence": "low" if confidence == "low" else "medium",
292
+ })
228
293
  elif len(strong_domains) >= 1:
229
294
  confidence = "medium"
295
+ if not _hard_evidence:
296
+ tentative = True
297
+ evidence.append({
298
+ "type": "filesystem_naming",
299
+ "paths": [d.name for d in strong_domains[:6]],
300
+ "reason": "Domain clustering from directory names; no layer pattern confirmed",
301
+ "confidence": "low",
302
+ })
230
303
  else:
231
304
  confidence = "low"
232
-
233
- method = "graph+structure" if graph is not None else "filesystem_inference"
305
+ if not _hard_evidence:
306
+ tentative = True
307
+ if not evidence:
308
+ limitations.append(
309
+ "insufficient_evidence: no recognizable architectural signals found; "
310
+ "filesystem structure does not match known patterns"
311
+ )
312
+ evidence.append({
313
+ "type": "filesystem_naming",
314
+ "paths": filtered[:6],
315
+ "reason": "Only filesystem paths available; no pattern matched",
316
+ "confidence": "low",
317
+ })
234
318
 
235
319
  return ArchitectureAnalysis(
236
320
  requested=True,
@@ -241,6 +325,8 @@ class ArchitectureAnalyzer:
241
325
  confidence=confidence,
242
326
  method=method,
243
327
  limitations=limitations,
328
+ evidence=evidence,
329
+ tentative=tentative,
244
330
  )
245
331
 
246
332
  # ------------------------------------------------------------------
@@ -45,9 +45,10 @@ def _get_changed_files(root: Path) -> set[str]:
45
45
  ]:
46
46
  try:
47
47
  result = subprocess.run(
48
- cmd, cwd=root, capture_output=True, text=True, timeout=10
48
+ cmd, cwd=root, capture_output=True, text=True,
49
+ encoding="utf-8", errors="replace", timeout=10,
49
50
  )
50
- for line in result.stdout.splitlines():
51
+ for line in (result.stdout or "").splitlines():
51
52
  line = line.strip()
52
53
  if line:
53
54
  changed.add(line.replace("\\", "/"))
@@ -56,9 +57,10 @@ def _get_changed_files(root: Path) -> set[str]:
56
57
  try:
57
58
  result = subprocess.run(
58
59
  ["git", "status", "--porcelain"],
59
- cwd=root, capture_output=True, text=True, timeout=10
60
+ cwd=root, capture_output=True, text=True,
61
+ encoding="utf-8", errors="replace", timeout=10,
60
62
  )
61
- for line in result.stdout.splitlines():
63
+ for line in (result.stdout or "").splitlines():
62
64
  if len(line) > 3:
63
65
  changed.add(line[3:].strip().replace("\\", "/"))
64
66
  except Exception:
@@ -129,11 +131,12 @@ def _get_git_churn(root: Path, file_paths: list[str]) -> dict[str, int]:
129
131
  try:
130
132
  result = subprocess.run(
131
133
  ["git", "log", "--name-only", "--format=", "--since=90.days.ago"],
132
- cwd=root, capture_output=True, text=True, timeout=15,
134
+ cwd=root, capture_output=True, text=True,
135
+ encoding="utf-8", errors="replace", timeout=15,
133
136
  )
134
137
  path_set = set(file_paths)
135
138
  counter: Counter[str] = Counter()
136
- for line in result.stdout.splitlines():
139
+ for line in (result.stdout or "").splitlines():
137
140
  line = line.strip().replace("\\", "/")
138
141
  if line in path_set:
139
142
  counter[line] += 1
@@ -132,6 +132,8 @@ class DocAnalyzer:
132
132
  records: list[DocRecord] = []
133
133
  limitations: list[str] = list(limitations_pre)
134
134
  languages: set[str] = set()
135
+ # Track per-language support status for honest reporting
136
+ unsupported_langs: set[str] = set()
135
137
 
136
138
  for relative_path in file_paths:
137
139
  abs_path = root / relative_path
@@ -176,8 +178,18 @@ class DocAnalyzer:
176
178
  # Unsupported language — D-04: no emitir DocRecord, solo registrar limitation
177
179
  limitations.append(f"docs_unavailable:{norm_path}:language={lang}")
178
180
  languages.add(lang)
181
+ unsupported_langs.add(lang)
179
182
  # NO records.append() here
180
183
 
184
+ # Build language_coverage: explicit per-language support status
185
+ _SUPPORTED_LANGS = {"python", "javascript", "typescript"}
186
+ lang_coverage: dict[str, str] = {}
187
+ for lang in languages:
188
+ if lang in _SUPPORTED_LANGS:
189
+ lang_coverage[lang] = "supported"
190
+ else:
191
+ lang_coverage[lang] = "unsupported"
192
+
181
193
  # Build summary
182
194
  symbol_count = sum(1 for r in records if r.kind != "module")
183
195
  total_count = len(records)
@@ -192,6 +204,15 @@ class DocAnalyzer:
192
204
  "no docstrings or JSDoc comments found"
193
205
  )
194
206
 
207
+ # Warn explicitly when unsupported languages are present — agents must not
208
+ # assume full coverage when Java/Go/Rust files are in scope but not analyzed.
209
+ if unsupported_langs:
210
+ sorted_unsupported = sorted(unsupported_langs)
211
+ limitations.append(
212
+ f"docs_not_extracted: language(s) {sorted_unsupported} present but not supported; "
213
+ "only Python and JS/TS docstrings are extracted"
214
+ )
215
+
195
216
  summary = DocSummary(
196
217
  requested=True,
197
218
  total_count=total_count,
@@ -200,6 +221,7 @@ class DocAnalyzer:
200
221
  depth=depth,
201
222
  truncated=truncated,
202
223
  limitations=limitations,
224
+ language_coverage=lang_coverage,
203
225
  )
204
226
  return records, summary
205
227
 
@@ -27,9 +27,13 @@ _ENV_EXAMPLE_NAMES = {
27
27
 
28
28
  # Spring Boot application.properties / application.yml and their profile variants
29
29
  _SPRING_CONF_BASE = {"application.properties", "application.yml", "application.yaml"}
30
- _SPRING_CONF_PROFILE_RE = re.compile(r'^application-[a-z0-9_-]+\.(properties|ya?ml)$', re.IGNORECASE)
31
- # Matches ${ENV_VAR} or ${ENV_VAR:default} where ENV_VAR is UPPER_SNAKE_CASE
32
- _SPRING_ENV_REF_RE = re.compile(r'\$\{([A-Z][A-Z0-9_]*)(?::[^}]*)?\}')
30
+ _SPRING_CONF_PROFILE_RE = re.compile(r'^application-([a-z0-9_-]+)\.(properties|ya?ml)$', re.IGNORECASE)
31
+ # Matches ${ENV_VAR} or ${ENV_VAR:default} where ENV_VAR is UPPER_SNAKE_CASE.
32
+ # Group 1 = key, Group 2 = default (may be empty string, absent = no default).
33
+ _SPRING_ENV_VAR_RE = re.compile(r'\$\{([A-Z][A-Z0-9_]*)(?::([^}]*))?\}')
34
+ # Matches ${spring.dotted.key} or ${spring.dotted.key:default} — Spring property references.
35
+ # These are internal property cross-references, not OS env vars, but still config signals.
36
+ _SPRING_PROP_REF_RE = re.compile(r'\$\{([a-z][a-z0-9]*(?:\.[a-z][a-z0-9_-]*)*)(?::([^}]*))?\}')
33
37
 
34
38
  # Patterns where absence of the variable causes a hard runtime error (not just None/null).
35
39
  # py_environ_bracket → os.environ["KEY"] raises KeyError
@@ -140,9 +144,9 @@ def _infer_type_hint(key: str) -> str:
140
144
  def _scan_file(
141
145
  path: Path,
142
146
  rel_path: str,
143
- findings: dict[str, list[tuple[str, Optional[str], bool]]],
147
+ findings: dict[str, list[tuple[str, Optional[str], bool, Optional[str]]]],
144
148
  ) -> None:
145
- """Escanea un fichero y acumula hallazgos en findings[key] = [(file_ref, default, is_hard)]."""
149
+ """Escanea un fichero y acumula hallazgos en findings[key] = [(file_ref, default, is_hard, profile)]."""
146
150
  try:
147
151
  size = path.stat().st_size
148
152
  if size > _MAX_FILE_SIZE:
@@ -168,7 +172,7 @@ def _scan_file(
168
172
 
169
173
  line_num = content.count("\n", 0, m.start()) + 1
170
174
  file_ref = f"{rel_path}:{line_num}"
171
- findings[key].append((file_ref, default, is_hard))
175
+ findings[key].append((file_ref, default, is_hard, None))
172
176
 
173
177
 
174
178
  def _parse_env_example(
@@ -204,22 +208,66 @@ def _parse_env_example(
204
208
  return results
205
209
 
206
210
 
211
+ def _extract_spring_profile(filename: str) -> Optional[str]:
212
+ """Extract Spring profile from filename.
213
+
214
+ application.yml / application.properties → 'default'
215
+ application-m3dev.yml → 'm3dev'
216
+ """
217
+ name_lower = filename.lower()
218
+ if name_lower in _SPRING_CONF_BASE:
219
+ return "default"
220
+ m = _SPRING_CONF_PROFILE_RE.match(name_lower)
221
+ if m:
222
+ return m.group(1)
223
+ return None
224
+
225
+
207
226
  def _parse_spring_config(
208
227
  path: Path,
209
228
  rel_path: str,
210
229
  findings: dict,
211
- ) -> None:
212
- """Parse application.properties / application.yml looking for ${ENV_VAR} refs."""
230
+ profile: Optional[str] = None,
231
+ ) -> int:
232
+ """Parse application.properties / application.yml for ${ENV_VAR} refs.
233
+
234
+ Returns the total number of ${...} placeholders found (candidates).
235
+ Captures default values from ${VAR:default} syntax.
236
+ Marks vars without defaults as hard-required (Spring fails to start if missing).
237
+ """
213
238
  try:
214
239
  content = path.read_text(encoding="utf-8", errors="replace")
215
240
  except OSError:
216
- return
241
+ return 0
217
242
 
218
- for m in _SPRING_ENV_REF_RE.finditer(content):
243
+ candidates = 0
244
+
245
+ # 1. UPPER_SNAKE_CASE env var references: ${DB_HOST} or ${DB_HOST:localhost}
246
+ for m in _SPRING_ENV_VAR_RE.finditer(content):
247
+ key = m.group(1)
248
+ raw_default = m.group(2) # None if no colon, "" if colon with empty default
249
+ # A colon means a default was specified (even if empty string)
250
+ has_default = raw_default is not None
251
+ default: Optional[str] = raw_default if (raw_default and raw_default.strip()) else None
252
+ line_num = content.count("\n", 0, m.start()) + 1
253
+ # Hard required only when no default is provided
254
+ is_hard = not has_default
255
+ findings[key].append((f"{rel_path}:{line_num}", default, is_hard, profile))
256
+ candidates += 1
257
+
258
+ # 2. lowercase.dotted Spring property refs: ${spring.datasource.url:default}
259
+ # These are internal property cross-references; store with a special prefix so
260
+ # callers can distinguish them from OS env vars. We do NOT mark them hard-required
261
+ # because they reference Spring's own property resolution chain.
262
+ for m in _SPRING_PROP_REF_RE.finditer(content):
219
263
  key = m.group(1)
264
+ raw_default = m.group(2)
265
+ default = raw_default if (raw_default and raw_default.strip()) else None
220
266
  line_num = content.count("\n", 0, m.start()) + 1
221
- # Spring fails to start if a referenced env var has no default hard required
222
- findings[key].append((f"{rel_path}:{line_num}", None, True))
267
+ findings[key].append((f"{rel_path}:{line_num}", default, False, profile))
268
+ candidates += 1
269
+
270
+ return candidates
223
271
 
224
272
 
225
273
  class EnvAnalyzer:
@@ -232,13 +280,18 @@ class EnvAnalyzer:
232
280
  ) -> tuple[list, object]:
233
281
  from sourcecode.schema import EnvSummary, EnvVarRecord
234
282
 
235
- # findings[key] = list of (file_ref, default_or_None, is_hard_required)
236
- findings: dict[str, list[tuple[str, Optional[str], bool]]] = defaultdict(list)
283
+ # findings[key] = list of (file_ref, default_or_None, is_hard_required, profile_or_None)
284
+ findings: dict[str, list[tuple[str, Optional[str], bool, Optional[str]]]] = defaultdict(list)
237
285
  example_entries: list[tuple[str, Optional[str], Optional[str]]] = []
238
286
  example_files_found: list[str] = []
239
287
  limitations: list[str] = []
288
+ profiles_scanned: list[str] = []
289
+ spring_candidates: int = 0
240
290
 
241
- self._walk(root, root, findings, example_entries, example_files_found, limitations)
291
+ spring_candidates = self._walk(
292
+ root, root, findings, example_entries, example_files_found,
293
+ limitations, profiles_scanned,
294
+ )
242
295
 
243
296
  # Merge findings into EnvVarRecord per key
244
297
  records: dict[str, EnvVarRecord] = {}
@@ -248,19 +301,23 @@ class EnvAnalyzer:
248
301
  if len(records) >= _MAX_KEYS:
249
302
  limitations.append(f"key_limit_reached:{_MAX_KEYS}")
250
303
  break
251
- defaults = [d for _, d, _ in refs if d is not None]
304
+ defaults = [d for _, d, _, _ in refs if d is not None]
252
305
  # required only when access pattern causes a hard runtime error if missing:
253
306
  # os.environ["KEY"] (KeyError) or Spring @Value/${KEY} without default.
254
307
  # os.getenv("KEY") / os.environ.get("KEY") return None — not hard required.
255
- has_hard_access = any(is_hard for _, _, is_hard in refs)
308
+ has_hard_access = any(is_hard for _, _, is_hard, _ in refs)
256
309
  required = has_hard_access and not defaults
257
310
  default_val = defaults[0] if defaults else None
258
311
  unique_files: list[str] = []
259
312
  seen: set[str] = set()
260
- for file_ref, _, _ in refs:
313
+ # Collect first profile seen for this key (from Spring config files)
314
+ first_profile: Optional[str] = None
315
+ for file_ref, _, _, prof in refs:
261
316
  if file_ref not in seen:
262
317
  seen.add(file_ref)
263
318
  unique_files.append(file_ref)
319
+ if first_profile is None and prof is not None:
320
+ first_profile = prof
264
321
  if len(unique_files) >= _MAX_FILES_PER_KEY:
265
322
  break
266
323
  records[key] = EnvVarRecord(
@@ -270,6 +327,7 @@ class EnvAnalyzer:
270
327
  type_hint=_infer_type_hint(key),
271
328
  category=_infer_category(key),
272
329
  files=unique_files,
330
+ profile=first_profile,
273
331
  )
274
332
 
275
333
  # 2. Supplement with .env.example entries (fill description + add missing keys)
@@ -300,6 +358,20 @@ class EnvAnalyzer:
300
358
  # Build summary
301
359
  categories = sorted({r.category for r in sorted_records if r.category})
302
360
  required_count = sum(1 for r in sorted_records if r.required)
361
+
362
+ # Coverage note: warn if Spring config was scanned but coverage seems partial
363
+ coverage_note: Optional[str] = None
364
+ if profiles_scanned and spring_candidates > 0:
365
+ spring_key_count = sum(
366
+ 1 for r in sorted_records if r.profile is not None
367
+ )
368
+ if spring_key_count < spring_candidates:
369
+ coverage_note = (
370
+ f"{spring_candidates} Spring ${{VAR}} placeholder(s) found across "
371
+ f"{len(profiles_scanned)} profile(s); {spring_key_count} unique key(s) "
372
+ "extracted. Duplicates across profiles collapsed."
373
+ )
374
+
303
375
  summary = EnvSummary(
304
376
  requested=True,
305
377
  total=len(sorted_records),
@@ -308,6 +380,9 @@ class EnvAnalyzer:
308
380
  categories=categories,
309
381
  example_files_found=example_files_found,
310
382
  limitations=limitations,
383
+ profiles_scanned=sorted(set(profiles_scanned)),
384
+ spring_candidates=spring_candidates,
385
+ coverage_note=coverage_note,
311
386
  )
312
387
 
313
388
  return sorted_records, summary
@@ -320,11 +395,15 @@ class EnvAnalyzer:
320
395
  example_entries: list,
321
396
  example_files_found: list,
322
397
  limitations: list,
323
- ) -> None:
398
+ profiles_scanned: list,
399
+ ) -> int:
400
+ """Walk the directory tree accumulating env var findings. Returns spring_candidates count."""
324
401
  try:
325
402
  entries = sorted(current.iterdir())
326
403
  except PermissionError:
327
- return
404
+ return 0
405
+
406
+ total_spring_candidates = 0
328
407
 
329
408
  for entry in entries:
330
409
  name = entry.name
@@ -333,7 +412,10 @@ class EnvAnalyzer:
333
412
  if entry.is_dir():
334
413
  if name in _SKIP_DIRS:
335
414
  continue
336
- self._walk(root, entry, findings, example_entries, example_files_found, limitations)
415
+ total_spring_candidates += self._walk(
416
+ root, entry, findings, example_entries, example_files_found,
417
+ limitations, profiles_scanned,
418
+ )
337
419
  elif entry.is_file():
338
420
  rel = entry.relative_to(root).as_posix()
339
421
  name_lower = name.lower()
@@ -344,13 +426,19 @@ class EnvAnalyzer:
344
426
  continue
345
427
  # Spring Boot application.properties / application.yml (incl. profiles)
346
428
  if name_lower in _SPRING_CONF_BASE or _SPRING_CONF_PROFILE_RE.match(name_lower):
347
- _parse_spring_config(entry, rel, findings)
429
+ profile = _extract_spring_profile(name)
430
+ if profile and profile not in profiles_scanned:
431
+ profiles_scanned.append(profile)
432
+ count = _parse_spring_config(entry, rel, findings, profile)
433
+ total_spring_candidates += count
348
434
  continue
349
435
  # Source code files
350
436
  suffix = entry.suffix.lower()
351
437
  if suffix in _CODE_EXTENSIONS:
352
438
  _scan_file(entry, rel, findings)
353
439
 
440
+ return total_spring_candidates
441
+
354
442
 
355
443
  def _replace_description(record, description: str):
356
444
  from dataclasses import replace
@@ -60,9 +60,13 @@ def _run_git(args: list[str], cwd: Path, timeout: int = 15) -> tuple[str, int]:
60
60
  ["git", "-C", str(cwd)] + args,
61
61
  capture_output=True,
62
62
  text=True,
63
+ encoding="utf-8",
64
+ errors="replace",
63
65
  timeout=timeout,
64
66
  )
65
- return result.stdout, result.returncode
67
+ # `result.stdout` is typed Optional[str]; guard against None on edge-case
68
+ # platforms (Windows subprocess encoding failures, detached processes, etc.)
69
+ return result.stdout or "", result.returncode
66
70
 
67
71
 
68
72
  class GitAnalyzer:
@@ -80,6 +84,7 @@ class GitAnalyzer:
80
84
  branch: Optional[str] = None
81
85
  recent_commits: list[CommitRecord] = []
82
86
  change_hotspots: list[ChangeHotspot] = []
87
+ hotspots_status: str = "ok"
83
88
  uncommitted: Optional[UncommittedChanges] = None
84
89
  contributors: list[str] = []
85
90
 
@@ -137,8 +142,10 @@ class GitAnalyzer:
137
142
  change_hotspots = _parse_hotspots(stdout)
138
143
  except subprocess.TimeoutExpired:
139
144
  limitations.append("hotspots_timeout")
145
+ hotspots_status = "failed"
140
146
  except Exception as exc:
141
147
  limitations.append(f"hotspots_error:{exc}")
148
+ hotspots_status = "failed"
142
149
 
143
150
  try:
144
151
  stdout, _ = _run_git(["status", "--porcelain"], path, timeout=10)
@@ -166,6 +173,7 @@ class GitAnalyzer:
166
173
  branch=branch,
167
174
  recent_commits=recent_commits,
168
175
  change_hotspots=change_hotspots,
176
+ hotspots_status=hotspots_status,
169
177
  uncommitted_changes=uncommitted,
170
178
  contributors=contributors,
171
179
  git_summary=git_summary,
@@ -228,9 +236,12 @@ def _is_hotspot_admin(path: str) -> bool:
228
236
  return False
229
237
 
230
238
 
231
- def _parse_hotspots(output: str) -> list:
239
+ def _parse_hotspots(output: str | None) -> list:
232
240
  from sourcecode.schema import ChangeHotspot
233
241
 
242
+ if not output:
243
+ return []
244
+
234
245
  file_counts: Counter = Counter()
235
246
  file_last_date: dict[str, str] = {}
236
247
  current_date = ""
@@ -728,11 +728,13 @@ class TaskContextBuilder:
728
728
  cwd=str(self.root),
729
729
  capture_output=True,
730
730
  text=True,
731
+ encoding="utf-8",
732
+ errors="replace",
731
733
  timeout=10,
732
734
  )
733
735
  if result.returncode == 0:
734
736
  return [
735
- line.strip() for line in result.stdout.splitlines()
737
+ line.strip() for line in (result.stdout or "").splitlines()
736
738
  if line.strip()
737
739
  ]
738
740
  except (subprocess.TimeoutExpired, FileNotFoundError):
@@ -744,10 +746,12 @@ class TaskContextBuilder:
744
746
  cwd=str(self.root),
745
747
  capture_output=True,
746
748
  text=True,
749
+ encoding="utf-8",
750
+ errors="replace",
747
751
  timeout=10,
748
752
  )
749
753
  if result.returncode == 0:
750
- return [line.strip() for line in result.stdout.splitlines() if line.strip()]
754
+ return [line.strip() for line in (result.stdout or "").splitlines() if line.strip()]
751
755
  except (subprocess.TimeoutExpired, FileNotFoundError):
752
756
  pass
753
757
  return []
sourcecode/schema.py CHANGED
@@ -252,6 +252,9 @@ class DocSummary:
252
252
  depth: Optional[DocsDepth] = None
253
253
  truncated: bool = False
254
254
  limitations: list[str] = field(default_factory=list)
255
+ # Per-language support status: "supported" | "unsupported" | "partial"
256
+ # Absent key = language not present in scanned files.
257
+ language_coverage: dict[str, str] = field(default_factory=dict)
255
258
 
256
259
 
257
260
  @dataclass
@@ -303,11 +306,21 @@ class SemanticSummary:
303
306
  """Summary of the --semantics analysis."""
304
307
 
305
308
  requested: bool = False
309
+ # Explicit analysis outcome — never omit, never silent.
310
+ # "ok": analysis ran and produced results
311
+ # "partial": analysis ran but with significant coverage gaps
312
+ # "failed": analysis could not produce useful results
313
+ status: str = "ok"
314
+ reason: Optional[str] = None # human-readable failure/partial reason
306
315
  call_count: int = 0
307
316
  symbol_count: int = 0
308
317
  link_count: int = 0
309
318
  languages: list[str] = field(default_factory=list)
310
319
  language_coverage: dict[str, str] = field(default_factory=dict)
320
+ # Structured per-language support details. Each value:
321
+ # {"supported": bool, "status": str, "reason": str}
322
+ # status: "full" | "heuristic" | "unsupported"
323
+ language_coverage_details: dict[str, Any] = field(default_factory=dict)
311
324
  files_analyzed: int = 0
312
325
  files_skipped: int = 0
313
326
  truncated: bool = False
@@ -393,6 +406,13 @@ class ArchitectureAnalysis:
393
406
  confidence: Literal["high", "medium", "low"] = "low"
394
407
  method: str = "heuristic"
395
408
  limitations: list[str] = field(default_factory=list)
409
+ # Structured evidence for each architectural inference.
410
+ # Each entry: {"type": str, "paths": list[str], "reason": str, "confidence": str}
411
+ # type: "workspace_config" | "filesystem_naming" | "import_graph" | "entry_files"
412
+ evidence: list[dict] = field(default_factory=list)
413
+ # True when pattern is inferred from weak signals (e.g. directory names only).
414
+ # Agents must not treat tentative patterns as confirmed facts.
415
+ tentative: bool = False
396
416
 
397
417
 
398
418
  # --- Env Map ---
@@ -408,6 +428,7 @@ class EnvVarRecord:
408
428
  category: Optional[str] = None # database | cache | storage | auth | service | observability | feature_flag | server | general
409
429
  description: Optional[str] = None
410
430
  files: list[str] = field(default_factory=list) # "path:line"
431
+ profile: Optional[str] = None # Spring profile if first occurrence is in application-{profile}.yml
411
432
 
412
433
 
413
434
  @dataclass
@@ -421,6 +442,10 @@ class EnvSummary:
421
442
  categories: list[str] = field(default_factory=list)
422
443
  example_files_found: list[str] = field(default_factory=list)
423
444
  limitations: list[str] = field(default_factory=list)
445
+ # Spring Boot coverage metadata
446
+ profiles_scanned: list[str] = field(default_factory=list)
447
+ spring_candidates: int = 0 # total ${VAR} refs found across Spring config files
448
+ coverage_note: Optional[str] = None # explicit note about partial coverage
424
449
 
425
450
 
426
451
  # --- Code Notes ---
@@ -557,6 +582,10 @@ class GitContext:
557
582
  branch: Optional[str] = None
558
583
  recent_commits: list[CommitRecord] = field(default_factory=list)
559
584
  change_hotspots: list[ChangeHotspot] = field(default_factory=list)
585
+ # Explicit hotspot analysis outcome — distinguishes "no hotspots found" from "analysis failed".
586
+ # "ok": hotspot analysis ran (change_hotspots may still be empty if no changes in window)
587
+ # "failed": hotspot analysis threw an exception (see limitations for hotspots_error:...)
588
+ hotspots_status: str = "ok"
560
589
  uncommitted_changes: Optional[UncommittedChanges] = None
561
590
  contributors: list[str] = field(default_factory=list)
562
591
  git_summary: Optional[str] = None
@@ -343,8 +343,14 @@ class SemanticAnalyzer:
343
343
 
344
344
  # Plan 12-02: language_coverage["python"] = "full" when Python files are analyzed
345
345
  lang_coverage: dict[str, str] = {}
346
+ lang_coverage_details: dict[str, Any] = {}
346
347
  if source_files:
347
348
  lang_coverage["python"] = "full"
349
+ lang_coverage_details["python"] = {
350
+ "supported": True,
351
+ "status": "full",
352
+ "reason": "AST-based: symbols, cross-file calls, and imports fully resolved",
353
+ }
348
354
 
349
355
  # -----------------------------------------------------------------------
350
356
  # Plan 12-03: JS/TS analysis block
@@ -489,6 +495,12 @@ class SemanticAnalyzer:
489
495
  js_languages.add("javascript")
490
496
  languages.extend(sorted(js_languages))
491
497
  lang_coverage["nodejs"] = "heuristic"
498
+ for js_lang in js_languages:
499
+ lang_coverage_details[js_lang] = {
500
+ "supported": True,
501
+ "status": "heuristic",
502
+ "reason": "Regex-based: exports/imports extracted; cross-file call resolution is heuristic, not AST",
503
+ }
492
504
 
493
505
  # -----------------------------------------------------------------------
494
506
  # Plan 12-04: Go analysis block
@@ -530,6 +542,11 @@ class SemanticAnalyzer:
530
542
  files_analyzed += 1
531
543
  languages.append("go")
532
544
  lang_coverage["go"] = "heuristic"
545
+ lang_coverage_details["go"] = {
546
+ "supported": True,
547
+ "status": "heuristic",
548
+ "reason": "Regex-based: func/struct names and same-file calls extracted; no cross-file resolution",
549
+ }
533
550
 
534
551
  # -----------------------------------------------------------------------
535
552
  # Plan 12-04: Rust analysis block
@@ -571,6 +588,11 @@ class SemanticAnalyzer:
571
588
  files_analyzed += 1
572
589
  languages.append("rust")
573
590
  lang_coverage["rust"] = "heuristic"
591
+ lang_coverage_details["rust"] = {
592
+ "supported": True,
593
+ "status": "heuristic",
594
+ "reason": "Regex-based: fn/struct names and module-qualified calls extracted; no cross-file resolution",
595
+ }
574
596
 
575
597
  # -----------------------------------------------------------------------
576
598
  # Plan 12-04: JVM analysis block (Java, Kotlin, Scala)
@@ -612,14 +634,56 @@ class SemanticAnalyzer:
612
634
  files_analyzed += 1
613
635
  languages.append("java")
614
636
  lang_coverage["java"] = "heuristic"
637
+ lang_coverage_details["java"] = {
638
+ "supported": True,
639
+ "status": "heuristic",
640
+ "reason": (
641
+ "Regex-based only: class/interface/method names extracted, "
642
+ "same-file call sites detected. "
643
+ "No cross-file resolution, no type inference, no import graph. "
644
+ "Spring annotations (@Service, @Component, etc.) not semantically interpreted."
645
+ ),
646
+ }
647
+
648
+ # Determine explicit analysis status — never emit silent empty results.
649
+ # An agent must be able to tell "analysis ran and found nothing" from
650
+ # "analysis failed to run" or "significant coverage gap".
651
+ _total_candidates = (
652
+ len(source_files)
653
+ + len(js_source_files)
654
+ + len(go_source_files)
655
+ + len(rust_source_files)
656
+ + len(jvm_source_files)
657
+ )
658
+ if _total_candidates == 0:
659
+ _sem_status = "failed"
660
+ _sem_reason = "no analyzable source files found in project"
661
+ elif files_analyzed == 0:
662
+ _sem_status = "failed"
663
+ _sem_reason = (
664
+ f"all {_total_candidates} candidate file(s) failed to analyze; "
665
+ "check limitations for parse/read errors"
666
+ )
667
+ elif files_analyzed < _total_candidates // 2 and _total_candidates > 4:
668
+ _sem_status = "partial"
669
+ _sem_reason = (
670
+ f"{files_analyzed} of {_total_candidates} file(s) analyzed; "
671
+ f"{files_skipped} skipped; see limitations"
672
+ )
673
+ else:
674
+ _sem_status = "ok"
675
+ _sem_reason = None
615
676
 
616
677
  summary = SemanticSummary(
617
678
  requested=True,
679
+ status=_sem_status,
680
+ reason=_sem_reason,
618
681
  call_count=len(calls),
619
682
  symbol_count=len(all_symbols),
620
683
  link_count=len(links),
621
684
  languages=languages,
622
685
  language_coverage=lang_coverage,
686
+ language_coverage_details=lang_coverage_details,
623
687
  files_analyzed=files_analyzed,
624
688
  files_skipped=files_skipped,
625
689
  truncated=truncated,
sourcecode/serializer.py CHANGED
@@ -957,9 +957,21 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
957
957
 
958
958
  if sm.semantic_summary is not None and sm.semantic_summary.requested:
959
959
  result["semantic_summary"] = asdict(sm.semantic_summary)
960
- result["semantic_calls"] = [asdict(c) for c in sm.semantic_calls]
961
- result["semantic_symbols"] = [asdict(s) for s in sm.semantic_symbols]
962
- result["semantic_links"] = [asdict(lnk) for lnk in sm.semantic_links]
960
+ # Defensive filter: never emit objects with null required fields.
961
+ # A null entry in these arrays is worse than a shorter array — it causes
962
+ # agents to misinterpret the analysis as valid when it is not.
963
+ result["semantic_calls"] = [
964
+ asdict(c) for c in sm.semantic_calls
965
+ if c.caller_path and c.callee_path
966
+ ]
967
+ result["semantic_symbols"] = [
968
+ asdict(s) for s in sm.semantic_symbols
969
+ if s.symbol and s.kind and s.language and s.path
970
+ ]
971
+ result["semantic_links"] = [
972
+ asdict(lnk) for lnk in sm.semantic_links
973
+ if lnk.importer_path and lnk.symbol
974
+ ]
963
975
 
964
976
  if sm.metrics_summary is not None and sm.metrics_summary.requested:
965
977
  result["metrics_summary"] = asdict(sm.metrics_summary)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 0.41.0
3
+ Version: 0.42.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -1,6 +1,6 @@
1
- sourcecode/__init__.py,sha256=Z0LOxVp01ZH1jSUmGwFp1S832KRn_Hq6x3bcAaQ-10c,103
1
+ sourcecode/__init__.py,sha256=K7shxEMemP2ulUio4YBuziIbKkDcIuDkcsLEFth5CwM,103
2
2
  sourcecode/adaptive_scanner.py,sha256=6dh34C2qZXyRbw-8xBhbEwDdXanM6CRFRWayVoYITnA,10190
3
- sourcecode/architecture_analyzer.py,sha256=H6noGgVArUJ25z1qC0fFA0KvJJeHZYyhKvKSkOyWHUk,23096
3
+ sourcecode/architecture_analyzer.py,sha256=O4AXc7l_WTzIXrcAzstqZy-TGKNaFa6p3MzpgVjaO8g,27749
4
4
  sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
5
5
  sourcecode/ast_extractor.py,sha256=0OHQwTUBBc9lmqPLryVeB1z8dGIC6NhLlar800CD9oI,41129
6
6
  sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
@@ -9,26 +9,26 @@ sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvU
9
9
  sourcecode/confidence_analyzer.py,sha256=HxJMPLI5ulqtkncnv98W4iVO6yMbpQo87VuxiuNbDmY,12167
10
10
  sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
11
11
  sourcecode/contract_model.py,sha256=wpYNWGzHAVnyGxniGqNMk96TCmWbVVOqNSc3Kauajrg,3348
12
- sourcecode/contract_pipeline.py,sha256=m2xPFLYWkTRvEv9L7iV9gqE0JRDxYhnx_IcQNo5P9es,22793
12
+ sourcecode/contract_pipeline.py,sha256=af30z1l4LiSOngawYkrpzQC-8huIJOgbQ8EJrq_PDSc,22967
13
13
  sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
14
14
  sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
15
- sourcecode/doc_analyzer.py,sha256=KLQ8g5cFTLEnZfH2xh7Z1t936oS6N6fP5L6YplhbtzM,20182
15
+ sourcecode/doc_analyzer.py,sha256=TttdS7mndKQhyJCfJnnAsyGCJrf-TIL7oXxDlTLUFKE,21248
16
16
  sourcecode/entrypoint_classifier.py,sha256=a69dMGyxCTd_LOm3oqj-EXWpRmbmeujN7T1mr2eJ1as,3877
17
- sourcecode/env_analyzer.py,sha256=slvq-eT24RVMNczLNDlZbe0hU8JXIIPxybqubvrrnSQ,14409
17
+ sourcecode/env_analyzer.py,sha256=NFV4PSeBH5GEONOIo2SY5iJRXuuqhAOlRDtTZMqOZTI,18452
18
18
  sourcecode/file_classifier.py,sha256=_KfFIIolharaIxbSTrCkaWauQIqNHCyor_n47RGyDh8,8577
19
- sourcecode/git_analyzer.py,sha256=khF1AOT8dL5RP9d_tDqDpE8FXEvCa6Ns14L4BXjFcs0,11179
19
+ sourcecode/git_analyzer.py,sha256=PD3eNWydznQ6KLNpxGzBqizIHoPIKevfwz9Xyf_pDt4,11600
20
20
  sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
21
21
  sourcecode/metrics_analyzer.py,sha256=e2cFwB9XubFq_dIVsP2PLjpr4wX0N6ulb3ol3sGDUeo,20777
22
- sourcecode/prepare_context.py,sha256=zYRcRFc9OXN_V-3eKcVmA6wwO9A8uhUjM2cqkkp1dV0,30892
22
+ sourcecode/prepare_context.py,sha256=a0_ThVNJ8v98UTrgnrnjacovvCd-2HWJug1scenUtEU,31044
23
23
  sourcecode/ranking_engine.py,sha256=XdhzahKGleYNW3N0GqGW9salPOXx2BNp8KqXpaeHHmw,8247
24
24
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
25
25
  sourcecode/relevance_scorer.py,sha256=E74w7nlsNVobO3LqKHiMtBd84ONwGp8uDpwXJEjRtLA,8330
26
26
  sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
27
27
  sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
28
28
  sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
29
- sourcecode/schema.py,sha256=dVA-3EbHBakHLkgeZF-LfjKClEFRgPZkzblXpDTshFA,20796
30
- sourcecode/semantic_analyzer.py,sha256=CBRRt92AFucf8vhKbly24132sM3EEIaZZpzFsUDpsUI,79617
31
- sourcecode/serializer.py,sha256=1wWmBUTY1SoRBedVnE4_mPEzEL8xYsoZ8hamvpQiTvc,56477
29
+ sourcecode/schema.py,sha256=ofEge9hTWHOTjeWt7ceCDQWzP-uhhenrYX2usjW2KVU,22759
30
+ sourcecode/semantic_analyzer.py,sha256=16EFTgM7ooW0m5gNUKOlTSn7IEMLSzKmzQn-cWaSqjs,82604
31
+ sourcecode/serializer.py,sha256=VUiBxA2w9CqlblXqhHQMXEUvysxTaNljgiATbw6MJ4A,56927
32
32
  sourcecode/summarizer.py,sha256=ZuzIdm3t8A-d5MuQL0TSNLrd-L0IQIuguIxeNXMNJf8,16070
33
33
  sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
34
34
  sourcecode/workspace.py,sha256=fQlVoNx8S-fSHpKoJ0JBvEHCFkxszH0KZVJed1i3TRk,6845
@@ -59,8 +59,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
59
59
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
60
60
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
61
61
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
62
- sourcecode-0.41.0.dist-info/METADATA,sha256=NinjVy-jlbAy-be1L-ejAtO5j7HiAZwi5B3C4CbOCqk,25209
63
- sourcecode-0.41.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
64
- sourcecode-0.41.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
65
- sourcecode-0.41.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
66
- sourcecode-0.41.0.dist-info/RECORD,,
62
+ sourcecode-0.42.0.dist-info/METADATA,sha256=-H--yzWSnQ5wpiUOXDmKirFowuaAGWb-LhUMSLYiTQ8,25209
63
+ sourcecode-0.42.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
64
+ sourcecode-0.42.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
65
+ sourcecode-0.42.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
66
+ sourcecode-0.42.0.dist-info/RECORD,,