sourcecode 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.0.0"
3
+ __version__ = "1.2.0"
@@ -176,6 +176,40 @@ class ArchitectureAnalyzer:
176
176
 
177
177
  # Step 1: filter paths
178
178
  filtered = self._filter_paths(sm.file_paths)
179
+
180
+ # Step 1b: DDD filesystem detection — runs before the filtered-paths guard
181
+ # because DDD signals live in directory structure, not just file extensions.
182
+ ddd_result = self._detect_ddd(sm.file_paths)
183
+ if ddd_result is not None:
184
+ ddd_pattern, ddd_layers, ddd_contexts, ddd_layer_names = ddd_result
185
+ domains_for_ddd = self._cluster_domains(filtered) if len(filtered) >= 2 else []
186
+ module_files = self._build_ddd_module_files(sm.file_paths, ddd_contexts)
187
+ bc_list = [
188
+ BoundedContext(name=n, modules=module_files.get(n, []), confidence="high")
189
+ for n in ddd_contexts
190
+ ]
191
+ return ArchitectureAnalysis(
192
+ requested=True,
193
+ pattern=ddd_pattern,
194
+ domains=domains_for_ddd,
195
+ layers=ddd_layers,
196
+ bounded_contexts=bc_list,
197
+ ddd_layers_detected=ddd_layer_names,
198
+ confidence="high",
199
+ method="filesystem_inference",
200
+ limitations=[],
201
+ evidence=[{
202
+ "type": "filesystem_naming",
203
+ "paths": [f"{ddd_contexts[0]}/" if ddd_contexts else ""],
204
+ "reason": (
205
+ f"DDD layout detected: {len(ddd_contexts)} modules under common prefix "
206
+ "each contain application/, domain/, infrastructure/ subdirectories."
207
+ ),
208
+ "confidence": "high",
209
+ }],
210
+ tentative=False,
211
+ )
212
+
179
213
  if len(filtered) < 2:
180
214
  return ArchitectureAnalysis(
181
215
  requested=True,
@@ -333,6 +367,73 @@ class ArchitectureAnalyzer:
333
367
  # Private helpers
334
368
  # ------------------------------------------------------------------
335
369
 
370
+ def _detect_ddd(
371
+ self, paths: list[str]
372
+ ) -> "Optional[tuple[str, list[ArchitectureLayer], list[str], list[str]]]":
373
+ """Detect DDD: ≥5 modules under a common prefix each with application/domain/infrastructure."""
374
+ _DDD_LAYERS = frozenset({"application", "domain", "infrastructure"})
375
+ _DDD_MIN_MODULES = 5
376
+
377
+ # Map (prefix, module) → set of DDD layer names found under that module
378
+ prefix_module_layers: dict[tuple[str, str], set[str]] = {}
379
+
380
+ for p in paths:
381
+ parts = p.replace("\\", "/").split("/")
382
+ for i, part in enumerate(parts):
383
+ if part in _DDD_LAYERS and i >= 2:
384
+ module = parts[i - 1]
385
+ prefix = "/".join(parts[:i - 1])
386
+ key = (prefix, module)
387
+ prefix_module_layers.setdefault(key, set()).add(part)
388
+ break
389
+
390
+ # Group by prefix; find prefixes where ≥5 modules have all 3 DDD layers
391
+ prefix_modules: dict[str, list[str]] = {}
392
+ for (prefix, module), layers_found in prefix_module_layers.items():
393
+ if _DDD_LAYERS <= layers_found: # module has all 3
394
+ prefix_modules.setdefault(prefix, []).append(module)
395
+
396
+ best_prefix = max(
397
+ prefix_modules,
398
+ key=lambda p: len(prefix_modules[p]),
399
+ default=None,
400
+ )
401
+ if best_prefix is None or len(prefix_modules[best_prefix]) < _DDD_MIN_MODULES:
402
+ return None
403
+
404
+ bounded_context_names = sorted(set(prefix_modules[best_prefix]))
405
+ ddd_layer_names = sorted(_DDD_LAYERS)
406
+
407
+ arch_layers: list[ArchitectureLayer] = [
408
+ ArchitectureLayer(
409
+ name=layer,
410
+ pattern="ddd",
411
+ files=[
412
+ p for p in paths
413
+ if f"/{layer}/" in p.replace("\\", "/")
414
+ ],
415
+ confidence="high",
416
+ )
417
+ for layer in ddd_layer_names
418
+ ]
419
+ return "ddd", arch_layers, bounded_context_names, ddd_layer_names
420
+
421
+ def _build_ddd_module_files(
422
+ self, paths: list[str], bounded_context_names: list[str]
423
+ ) -> "dict[str, list[str]]":
424
+ """Build a mapping of DDD module name → list of file paths."""
425
+ _DDD_LAYERS = frozenset({"application", "domain", "infrastructure"})
426
+ module_files: dict[str, list[str]] = {}
427
+ for p in paths:
428
+ parts = p.replace("\\", "/").split("/")
429
+ for i, part in enumerate(parts):
430
+ if part in _DDD_LAYERS and i >= 2:
431
+ mod = parts[i - 1]
432
+ if mod in bounded_context_names:
433
+ module_files.setdefault(mod, []).append(p)
434
+ break
435
+ return module_files
436
+
336
437
  def _is_tooling(self, path: str) -> bool:
337
438
  norm = path.replace("\\", "/")
338
439
  return any(norm.startswith(p) for p in _TOOLING_PREFIXES)
@@ -84,6 +84,11 @@ class ArchitectureSummarizer:
84
84
  elif suffix in {".cs", ".fs", ".vb"}:
85
85
  lang_lines = self._summarize_dotnet_entry(sm.stacks)
86
86
 
87
+ # MyBatis XML mapper count line (Java projects)
88
+ mybatis_line = self._mybatis_summary_line(file_paths)
89
+ if mybatis_line:
90
+ lang_lines.append(mybatis_line)
91
+
87
92
  # Merge: rich lines first, stack-specific details appended (deduped)
88
93
  lines = rich_lines + [l for l in lang_lines if l not in rich_lines]
89
94
 
@@ -296,6 +301,13 @@ class ArchitectureSummarizer:
296
301
  lines.append("Orquesta el arranque de la aplicacion JVM.")
297
302
  return lines
298
303
 
304
+ def _mybatis_summary_line(self, file_paths: list[str]) -> str | None:
305
+ """Return a summary line when >5 MyBatis XML mappers are detected."""
306
+ mapper_xml_count = sum(1 for p in file_paths if p.endswith("Mapper.xml"))
307
+ if mapper_xml_count > 5:
308
+ return f"MyBatis XML mappers: {mapper_xml_count} *Mapper.xml detected."
309
+ return None
310
+
299
311
  def _summarize_dotnet_entry(self, stacks: list[StackDetection]) -> list[str]:
300
312
  dotnet_stacks = [s for s in stacks if s.stack == "dotnet"]
301
313
  if not dotnet_stacks:
sourcecode/cli.py CHANGED
@@ -790,7 +790,7 @@ def main(
790
790
  # Require at least 8: src(1)+main(2)+java(3)+com(4)+co(5)+app(6)+module(7)+file.
791
791
  _java_manifest_names = {"pom.xml", "build.gradle", "build.gradle.kts"}
792
792
  _is_java = any(Path(m).name in _java_manifest_names for m in manifests)
793
- _java_min_depth = 8
793
+ _java_min_depth = 10
794
794
  effective_depth = max(depth, _java_min_depth) if _is_java and depth < _java_min_depth else depth
795
795
 
796
796
  # --agent: enable signal analyzers; output via agent_view (not compact)
@@ -1376,6 +1376,12 @@ def main(
1376
1376
  ))
1377
1377
  sm = _replace(sm, pipeline_trace=_trace.build_trace())
1378
1378
 
1379
+ # P3-B: Auto-switch to centrality ranking when DDD layout detected
1380
+ if (rank_by == "relevance"
1381
+ and sm.architecture is not None
1382
+ and sm.architecture.pattern == "ddd"):
1383
+ rank_by = "centrality"
1384
+
1379
1385
  # Contract pipeline — runs for mode=contract|standard|deep|hybrid (skip for raw)
1380
1386
  _is_contract_mode = mode in ("contract", "standard")
1381
1387
  if _is_contract_mode:
@@ -193,6 +193,27 @@ class ConfidenceAnalyzer:
193
193
  impact="low",
194
194
  ))
195
195
 
196
+ # ── Java test coverage gap check (P2-A) ──────────────────────────────
197
+ _java_all = [p for p in sm.file_paths if p.endswith(".java")]
198
+ _java_tests = [
199
+ p for p in _java_all
200
+ if "/test/" in p.replace("\\", "/") or "/tests/" in p.replace("\\", "/")
201
+ or Path(p).stem.endswith(("Test", "Tests", "IT", "Spec"))
202
+ ]
203
+ _java_prod = [p for p in _java_all if p not in set(_java_tests)]
204
+ if _java_prod and len(_java_prod) >= 10:
205
+ _ratio = len(_java_tests) / len(_java_prod)
206
+ if _ratio < 0.05:
207
+ gaps.append(AnalysisGap(
208
+ area="testing",
209
+ reason=(
210
+ f"Backend test coverage critical: {len(_java_tests)} test files "
211
+ f"for {len(_java_prod)} Java files "
212
+ f"({_ratio:.1%})"
213
+ ),
214
+ impact="high",
215
+ ))
216
+
196
217
  # ── Compute overall confidence ─────────────────────────────────────────
197
218
  # Stack: use best manifest-detected stack, fall back to min
198
219
  manifest_stacks = [s for s in sm.stacks if s.detection_method != "heuristic"]
@@ -15,12 +15,29 @@ from sourcecode.schema import FrameworkDetection
15
15
  from sourcecode.tree_utils import flatten_file_tree
16
16
 
17
17
  _MAX_FILE_SIZE = 256 * 1024 # 256 KB
18
- _MAX_JAVA_ENTRY_SCAN = 200
19
- _MAX_ANNOTATION_ENTRY_POINTS = 20
18
+ _MAX_JAVA_ENTRY_SCAN = 1000
19
+ _MAX_ANNOTATION_ENTRY_POINTS = 500
20
20
 
21
- _REST_CONTROLLER_RE = re.compile(r'@(?:Rest)?Controller\b')
21
+ _REST_CONTROLLER_RE = re.compile(r'@RestController\b')
22
+ _MVC_CONTROLLER_RE = re.compile(r'@Controller\b')
23
+ _REQUEST_MAPPING_RE = re.compile(r'@RequestMapping\b')
24
+ _CONTROLLER_ADVICE_RE = re.compile(r'@ControllerAdvice\b')
22
25
  _WEB_FILTER_RE = re.compile(r'@WebFilter\b')
23
26
  _FILTER_BEAN_RE = re.compile(r'FilterRegistrationBean\b')
27
+ # Extracts path from @RequestMapping("/v1/foo"), @GetMapping("/bar"), etc.
28
+ # Handles attribute order: value= may come after method= in legacy @RequestMapping style.
29
+ _HTTP_PATH_RE = re.compile(
30
+ r'@(?:Request|Get|Post|Put|Delete|Patch)Mapping\s*\([^)]*?(?:value\s*=\s*)?["\']([^"\']+)["\']'
31
+ )
32
+ _REQUEST_METHOD_VERB_RE = re.compile(
33
+ r'method\s*=\s*RequestMethod\.([A-Z]+)'
34
+ )
35
+ # @M3FiltroSeguridad custom security annotation
36
+ _M3_FILTRO_RE = re.compile(r'@M3FiltroSeguridad\b')
37
+ _M3_FILTRO_PARAMS_RE = re.compile(
38
+ r'@M3FiltroSeguridad\s*\(\s*(?:nombreRecurso\s*=\s*"([^"]*)")?'
39
+ r'(?:[^)]*nivelRequerido\s*=\s*(\d+))?'
40
+ )
24
41
 
25
42
 
26
43
  class JavaDetector(AbstractDetector):
@@ -81,6 +98,8 @@ class JavaDetector(AbstractDetector):
81
98
  frameworks.append(FrameworkDetection(name="Vert.x", source=source))
82
99
  if "jakarta.ee" in text or "javax.ws.rs" in text:
83
100
  frameworks.append(FrameworkDetection(name="Jakarta EE", source=source))
101
+ if "mybatis" in text:
102
+ frameworks.append(FrameworkDetection(name="MyBatis", source=source))
84
103
  return frameworks
85
104
 
86
105
  def _collect_entry_points(self, context: DetectionContext) -> list[EntryPoint]:
@@ -139,13 +158,55 @@ class JavaDetector(AbstractDetector):
139
158
  return []
140
159
 
141
160
  # Quick pre-filter before running regexes
142
- if "Controller" not in content and "Filter" not in content:
161
+ if ("Controller" not in content and "Filter" not in content
162
+ and "ControllerAdvice" not in content
163
+ and "M3FiltroSeguridad" not in content):
143
164
  return []
144
165
 
145
166
  if _REST_CONTROLLER_RE.search(content):
167
+ http_path_match = _HTTP_PATH_RE.search(content)
168
+ http_path = http_path_match.group(1) if http_path_match else None
169
+ verb_match = _REQUEST_METHOD_VERB_RE.search(content)
170
+ if verb_match and http_path:
171
+ http_path = f"[{verb_match.group(1)}] {http_path}"
172
+ elif verb_match:
173
+ http_path = f"[{verb_match.group(1)}]"
174
+ security_evidence = None
175
+ m3_match = _M3_FILTRO_PARAMS_RE.search(content)
176
+ if m3_match:
177
+ nombre = m3_match.group(1) or ""
178
+ nivel = m3_match.group(2) or ""
179
+ security_evidence = f"@M3FiltroSeguridad(nombreRecurso={nombre!r}, nivelRequerido={nivel})"
146
180
  return [EntryPoint(
147
- path=rel_path, stack="java", kind="http_handler",
181
+ path=rel_path, stack="java", kind="rest_controller",
148
182
  source="annotation", confidence="high",
183
+ http_path=http_path,
184
+ evidence=security_evidence,
185
+ )]
186
+ if _CONTROLLER_ADVICE_RE.search(content):
187
+ return [EntryPoint(
188
+ path=rel_path, stack="java", kind="exception_handler",
189
+ source="annotation", confidence="medium",
190
+ )]
191
+ if _MVC_CONTROLLER_RE.search(content) and _REQUEST_MAPPING_RE.search(content):
192
+ http_path_match = _HTTP_PATH_RE.search(content)
193
+ http_path = http_path_match.group(1) if http_path_match else None
194
+ verb_match = _REQUEST_METHOD_VERB_RE.search(content)
195
+ if verb_match and http_path:
196
+ http_path = f"[{verb_match.group(1)}] {http_path}"
197
+ elif verb_match:
198
+ http_path = f"[{verb_match.group(1)}]"
199
+ security_evidence = None
200
+ m3_match = _M3_FILTRO_PARAMS_RE.search(content)
201
+ if m3_match:
202
+ nombre = m3_match.group(1) or ""
203
+ nivel = m3_match.group(2) or ""
204
+ security_evidence = f"@M3FiltroSeguridad(nombreRecurso={nombre!r}, nivelRequerido={nivel})"
205
+ return [EntryPoint(
206
+ path=rel_path, stack="java", kind="mvc_controller",
207
+ source="annotation", confidence="medium",
208
+ http_path=http_path,
209
+ evidence=security_evidence,
149
210
  )]
150
211
  if _WEB_FILTER_RE.search(content):
151
212
  return [EntryPoint(
@@ -27,6 +27,8 @@ _ENV_EXAMPLE_NAMES = {
27
27
 
28
28
  # Spring Boot application.properties / application.yml and their profile variants
29
29
  _SPRING_CONF_BASE = {"application.properties", "application.yml", "application.yaml"}
30
+ # Matches options/{profile}/ in multi-tenant SAS layout paths
31
+ _OPTIONS_PROFILE_PATH_RE = re.compile(r'options/([a-z0-9_-]+)/', re.IGNORECASE)
30
32
  _SPRING_CONF_PROFILE_RE = re.compile(r'^application-([a-z0-9_-]+)\.(properties|ya?ml)$', re.IGNORECASE)
31
33
  # Matches ${ENV_VAR} or ${ENV_VAR:default} where ENV_VAR is UPPER_SNAKE_CASE.
32
34
  # Group 1 = key, Group 2 = default (may be empty string, absent = no default).
@@ -35,6 +37,15 @@ _SPRING_ENV_VAR_RE = re.compile(r'\$\{([A-Z][A-Z0-9_]*)(?::([^}]*))?\}')
35
37
  # These are internal property cross-references, not OS env vars, but still config signals.
36
38
  _SPRING_PROP_REF_RE = re.compile(r'\$\{([a-z][a-z0-9]*(?:\.[a-z][a-z0-9_-]*)*)(?::([^}]*))?\}')
37
39
 
40
+ # Known Spring-internal namespaces — NOT emitted as custom application properties.
41
+ _SPRING_BUILTIN_NAMESPACES: frozenset[str] = frozenset({
42
+ "spring", "logging", "management", "server", "info", "debug",
43
+ "endpoints", "security", "eureka", "feign", "ribbon", "hystrix",
44
+ "zuul", "cloud", "flyway", "liquibase", "jpa", "datasource",
45
+ "kafka", "rabbitmq", "redis", "mail", "thymeleaf", "mvc",
46
+ "web", "actuator", "metrics", "tracing",
47
+ })
48
+
38
49
  # Patterns where absence of the variable causes a hard runtime error (not just None/null).
39
50
  # py_environ_bracket → os.environ["KEY"] raises KeyError
40
51
  # java_spring_value → Spring fails to start if ${KEY} has no default
@@ -223,6 +234,66 @@ def _extract_spring_profile(filename: str) -> Optional[str]:
223
234
  return None
224
235
 
225
236
 
237
+ def _parse_yaml_custom_properties(
238
+ content: str,
239
+ rel_path: str,
240
+ profile: Optional[str],
241
+ findings: dict,
242
+ ) -> None:
243
+ """Extract custom namespace leaf properties from YAML (e.g. saint.ldap.url).
244
+
245
+ Builds dotted key paths by tracking indentation levels. Emits only properties
246
+ whose top-level namespace is NOT a well-known Spring built-in namespace.
247
+ """
248
+ # Stack of (indent, key_segment)
249
+ key_stack: list[tuple[int, str]] = []
250
+
251
+ for line in content.splitlines():
252
+ stripped = line.lstrip()
253
+ if not stripped or stripped.startswith('#'):
254
+ continue
255
+ if ':' not in stripped:
256
+ continue
257
+
258
+ indent = len(line) - len(stripped)
259
+ colon_idx = stripped.index(':')
260
+ key_part = stripped[:colon_idx].strip()
261
+ value_part = stripped[colon_idx + 1:].strip() if colon_idx + 1 < len(stripped) else ""
262
+
263
+ # Only plain identifiers (no special chars)
264
+ if not re.match(r'^[a-zA-Z][a-zA-Z0-9_-]*$', key_part):
265
+ continue
266
+
267
+ # Pop stack entries at same or deeper indent
268
+ while key_stack and key_stack[-1][0] >= indent:
269
+ key_stack.pop()
270
+
271
+ key_stack.append((indent, key_part))
272
+
273
+ # Only emit leaf values (non-empty, not a nested mapping start)
274
+ if not value_part or value_part.startswith('{') or value_part.startswith('['):
275
+ continue
276
+
277
+ # Reconstruct full dotted key
278
+ full_key = '.'.join(seg for _, seg in key_stack)
279
+ top_ns = key_stack[0][1].lower()
280
+
281
+ # Skip Spring built-in namespaces
282
+ if top_ns in _SPRING_BUILTIN_NAMESPACES:
283
+ continue
284
+
285
+ # Skip entries that look like ${...} references (already handled elsewhere)
286
+ if value_part.startswith('${'):
287
+ continue
288
+
289
+ # Strip inline YAML comments
290
+ clean_value = value_part.split('#')[0].strip()
291
+ if not clean_value:
292
+ continue
293
+
294
+ findings[full_key].append((rel_path, clean_value, False, profile))
295
+
296
+
226
297
  def _parse_spring_config(
227
298
  path: Path,
228
299
  rel_path: str,
@@ -234,6 +305,7 @@ def _parse_spring_config(
234
305
  Returns the total number of ${...} placeholders found (candidates).
235
306
  Captures default values from ${VAR:default} syntax.
236
307
  Marks vars without defaults as hard-required (Spring fails to start if missing).
308
+ Also extracts custom namespace properties (saint.*, app.*, etc.) as yml_property entries.
237
309
  """
238
310
  try:
239
311
  content = path.read_text(encoding="utf-8", errors="replace")
@@ -267,6 +339,10 @@ def _parse_spring_config(
267
339
  findings[key].append((f"{rel_path}:{line_num}", default, False, profile))
268
340
  candidates += 1
269
341
 
342
+ # 3. Custom YAML namespace properties (YAML/YML files only)
343
+ if rel_path.endswith((".yml", ".yaml")):
344
+ _parse_yaml_custom_properties(content, rel_path, profile, findings)
345
+
270
346
  return candidates
271
347
 
272
348
 
@@ -320,14 +396,17 @@ class EnvAnalyzer:
320
396
  first_profile = prof
321
397
  if len(unique_files) >= _MAX_FILES_PER_KEY:
322
398
  break
399
+ # Custom YAML properties use lowercase.dotted keys and category "application"
400
+ is_yml_prop = '.' in key and key[0].islower()
323
401
  records[key] = EnvVarRecord(
324
402
  key=key,
325
403
  required=required,
326
404
  default=default_val,
327
405
  type_hint=_infer_type_hint(key),
328
- category=_infer_category(key),
406
+ category="application" if is_yml_prop else _infer_category(key),
329
407
  files=unique_files,
330
408
  profile=first_profile,
409
+ source="yml_property" if is_yml_prop else None,
331
410
  )
332
411
 
333
412
  # 2. Supplement with .env.example entries (fill description + add missing keys)
@@ -372,6 +451,8 @@ class EnvAnalyzer:
372
451
  "extracted. Duplicates across profiles collapsed."
373
452
  )
374
453
 
454
+ # spring_profiles: named profiles only (exclude "default")
455
+ _named_profiles = sorted({p for p in profiles_scanned if p != "default"})
375
456
  summary = EnvSummary(
376
457
  requested=True,
377
458
  total=len(sorted_records),
@@ -383,6 +464,7 @@ class EnvAnalyzer:
383
464
  profiles_scanned=sorted(set(profiles_scanned)),
384
465
  spring_candidates=spring_candidates,
385
466
  coverage_note=coverage_note,
467
+ spring_profiles=_named_profiles,
386
468
  )
387
469
 
388
470
  return sorted_records, summary
@@ -427,6 +509,10 @@ class EnvAnalyzer:
427
509
  # Spring Boot application.properties / application.yml (incl. profiles)
428
510
  if name_lower in _SPRING_CONF_BASE or _SPRING_CONF_PROFILE_RE.match(name_lower):
429
511
  profile = _extract_spring_profile(name)
512
+ # Override profile if path contains options/{profile}/ (multi-tenant SAS layout)
513
+ path_profile_match = _OPTIONS_PROFILE_PATH_RE.search(rel)
514
+ if path_profile_match:
515
+ profile = path_profile_match.group(1)
430
516
  if profile and profile not in profiles_scanned:
431
517
  profiles_scanned.append(profile)
432
518
  count = _parse_spring_config(entry, rel, findings, profile)
@@ -78,6 +78,29 @@ _IMPORT_RE = re.compile(
78
78
  )
79
79
  _DEF_RE = re.compile(r"\b(class|def|function|const|export\s+class|interface|type)\s+[A-Za-z_]", re.MULTILINE)
80
80
 
81
+ # Java Spring stereotype annotation detection
82
+ _JAVA_ANNOTATION_RE = re.compile(r'@(RestController|Controller|Service|Repository|Mapper|Entity|Data|Configuration|EnableWebSecurity|ControllerAdvice|Transactional)\b')
83
+
84
+ # (annotation_set, category, relevance, why_template)
85
+ # Checked in priority order; first match wins.
86
+ _JAVA_STEREOTYPE_RULES: list[tuple[frozenset, str, float, str]] = [
87
+ (frozenset({"EnableWebSecurity"}), "security", 0.85, "Spring Security configuration"),
88
+ (frozenset({"RestController"}), "api_endpoint", 0.90, "Spring REST controller — defines HTTP API surface"),
89
+ (frozenset({"Controller", "RequestMapping"}), "api_endpoint", 0.80, "Spring MVC controller"),
90
+ (frozenset({"Service", "Transactional"}), "business_logic", 0.75, "Transactional service — business logic boundary"),
91
+ (frozenset({"Service"}), "business_logic", 0.65, "Spring service component"),
92
+ (frozenset({"Repository"}), "data_access", 0.65, "Spring repository — data access layer"),
93
+ (frozenset({"Mapper"}), "data_access", 0.65, "MyBatis mapper — SQL data access"),
94
+ (frozenset({"Configuration"}), "configuration", 0.70, "Spring configuration class"),
95
+ (frozenset({"Entity"}), "domain_model", 0.50, "JPA entity — domain model"),
96
+ (frozenset({"Data"}), "dto", 0.40, "Lombok DTO"),
97
+ ]
98
+
99
+ # Categories produced by Java stereotype detection — used downstream to apply direct relevance
100
+ JAVA_STEREOTYPE_CATEGORIES: frozenset[str] = frozenset(
101
+ cat for _, cat, _, _ in _JAVA_STEREOTYPE_RULES
102
+ )
103
+
81
104
 
82
105
  class FileClassifier:
83
106
  def __init__(
@@ -138,6 +161,12 @@ class FileClassifier:
138
161
  if norm in self.production_entry_paths:
139
162
  return FileClassification(norm, "runtime_core", "high", 0.95, "declared production runtime entrypoint", ["entry_points"])
140
163
 
164
+ # Java Spring stereotype detection (Java/Kotlin files only)
165
+ if suffix in {".java", ".kt"}:
166
+ java_class = self._classify_java_stereotype(norm, content)
167
+ if java_class is not None:
168
+ return java_class
169
+
141
170
  if self._has_any_import(imports, _API_IMPORTS):
142
171
  evidence = self._matched_imports(imports, _API_IMPORTS)
143
172
  return FileClassification(norm, "api_layer", "high", 0.82, "imports API/server framework", evidence)
@@ -213,3 +242,21 @@ class FileClassifier:
213
242
  def _sample(self, imports: list[str]) -> list[str]:
214
243
  return [f"import:{imp}" for imp in imports[:4]]
215
244
 
245
+ def _classify_java_stereotype(self, path: str, content: str) -> "FileClassification | None":
246
+ """Classify Java file by Spring/JPA/MyBatis annotation stereotypes."""
247
+ if not content:
248
+ return None
249
+ found = frozenset(m.group(1) for m in _JAVA_ANNOTATION_RE.finditer(content))
250
+ if not found:
251
+ return None
252
+ for required_annotations, category, relevance, why in _JAVA_STEREOTYPE_RULES:
253
+ # For @Data DTO: must have @Data but NOT @Entity
254
+ if required_annotations == frozenset({"Data"}):
255
+ if "Data" in found and "Entity" not in found:
256
+ return FileClassification(path, category, "high", relevance, why, list(found))
257
+ continue
258
+ # For compound rules (Service+Transactional, Controller+RequestMapping): all required
259
+ if required_annotations <= found:
260
+ return FileClassification(path, category, "high", relevance, why, list(found))
261
+ return None
262
+
@@ -229,6 +229,29 @@ class MetricsAnalyzer:
229
229
  "null complexity fields are expected, not an error."
230
230
  )
231
231
 
232
+ # P2-C: DDD module metrics — group by module, count files/methods per layer
233
+ _DDD_LAYERS = {"domain", "application", "infrastructure"}
234
+ ddd_files = [r for r in records if "/ddd/" in r.path.replace("\\", "/")]
235
+ if ddd_files:
236
+ module_layer_data: dict[str, dict[str, dict]] = {}
237
+ for fm in ddd_files:
238
+ parts = fm.path.replace("\\", "/").split("/")
239
+ for i, part in enumerate(parts):
240
+ if part in _DDD_LAYERS and i >= 2:
241
+ module = parts[i - 1]
242
+ layer = part
243
+ if module not in module_layer_data:
244
+ module_layer_data[module] = {lyr: {"files": 0, "methods": 0} for lyr in _DDD_LAYERS}
245
+ module_layer_data[module][layer]["files"] += 1
246
+ module_layer_data[module][layer]["methods"] += fm.function_count or 0
247
+ break
248
+ ddd_metrics = [
249
+ {"module": mod, "layers": layers}
250
+ for mod, layers in sorted(module_layer_data.items())
251
+ ]
252
+ else:
253
+ ddd_metrics = []
254
+
232
255
  summary = MetricsSummary(
233
256
  requested=True,
234
257
  file_count=len(records),
@@ -238,6 +261,7 @@ class MetricsAnalyzer:
238
261
  coverage_records=coverage_records,
239
262
  coverage_sources_found=sorted({r.format for r in coverage_records}),
240
263
  limitations=limitations,
264
+ ddd_module_metrics=ddd_metrics,
241
265
  )
242
266
  return records, summary
243
267
 
@@ -332,6 +332,50 @@ _SOURCE_EXTENSIONS: frozenset[str] = frozenset({
332
332
  ".go", ".rs", ".rb", ".php", ".cs", ".dart",
333
333
  })
334
334
 
335
+
336
+ def _extract_ddd_domain(path: str) -> str:
337
+ """Extract domain name from DDD package path.
338
+
339
+ For m3informatica.saint.ddd.{domain}.infrastructure.rest.*RestController
340
+ the domain is the segment just before application/ domain/ or infrastructure/.
341
+ """
342
+ parts = path.replace("\\", "/").split("/")
343
+ _DDD_LAYERS = {"application", "domain", "infrastructure"}
344
+ for i, part in enumerate(parts):
345
+ if part in _DDD_LAYERS and i >= 1:
346
+ return parts[i - 1]
347
+ # Fallback: penultimate directory segment
348
+ if len(parts) >= 2:
349
+ return parts[-2]
350
+ return ""
351
+
352
+
353
+ def _java_why(path: str, file_class: "Optional[object]") -> str:
354
+ """Generate why string for Java files based on stereotype classification."""
355
+ if file_class is None:
356
+ return ""
357
+ from sourcecode.file_classifier import JAVA_STEREOTYPE_CATEGORIES
358
+ category = getattr(file_class, "category", "")
359
+ if category not in JAVA_STEREOTYPE_CATEGORIES:
360
+ return ""
361
+ domain = _extract_ddd_domain(path)
362
+ class_name = Path(path).stem
363
+ if category == "api_endpoint":
364
+ return f"Defines HTTP endpoints for the {domain} domain" if domain else "Defines HTTP API endpoints"
365
+ if category == "business_logic":
366
+ return f"Orchestrates {domain} business logic" if domain else "Business logic service"
367
+ if category == "data_access":
368
+ return f"SQL queries for {domain} data access" if domain else "Data access layer"
369
+ if category == "domain_model":
370
+ return f"JPA entity for {class_name} persistence"
371
+ if category == "configuration":
372
+ return getattr(file_class, "reason", "Spring configuration class")
373
+ if category == "security":
374
+ return getattr(file_class, "reason", "Spring Security configuration")
375
+ if category == "dto":
376
+ return f"Lombok DTO — {class_name}"
377
+ return getattr(file_class, "reason", "")
378
+
335
379
  _ALL_EXTENSIONS: frozenset[str] = _SOURCE_EXTENSIONS | frozenset({
336
380
  ".md", ".toml", ".yaml", ".yml", ".json", ".xml",
337
381
  })
@@ -726,12 +770,14 @@ class TaskContextBuilder:
726
770
  )
727
771
  all_reasons = [r for r in fs.reasons if r != "source file"] + content_reasons
728
772
  reason_str = ", ".join(all_reasons) if all_reasons else "source file"
773
+ why_str = _java_why(path, file_class)
729
774
 
730
775
  scored.append((total, path, RelevantFile(
731
776
  path=path,
732
777
  role=role,
733
778
  score=round(min(total / 3.0, 1.0), 2),
734
779
  reason=reason_str,
780
+ why=why_str,
735
781
  )))
736
782
 
737
783
  # Deterministic: score desc, then path asc as tiebreaker
sourcecode/schema.py CHANGED
@@ -79,6 +79,7 @@ class EntryPoint:
79
79
  classification: Optional[Literal["production", "development", "auxiliary"]] = None
80
80
  runtime_relevance: Optional[Literal["high", "medium", "low"]] = None
81
81
  produced_by: Optional[str] = None # which detector emitted this
82
+ http_path: Optional[str] = None # extracted from @RequestMapping / @GetMapping (Java REST controllers)
82
83
 
83
84
 
84
85
  @dataclass
@@ -224,6 +225,7 @@ class MetricsSummary:
224
225
  coverage_records: list[CoverageRecord] = field(default_factory=list)
225
226
  coverage_sources_found: list[str] = field(default_factory=list)
226
227
  limitations: list[str] = field(default_factory=list)
228
+ ddd_module_metrics: list[dict] = field(default_factory=list)
227
229
 
228
230
 
229
231
  @dataclass
@@ -413,6 +415,7 @@ class ArchitectureAnalysis:
413
415
  # True when pattern is inferred from weak signals (e.g. directory names only).
414
416
  # Agents must not treat tentative patterns as confirmed facts.
415
417
  tentative: bool = False
418
+ ddd_layers_detected: list[str] = field(default_factory=list) # e.g. ["application", "domain", "infrastructure"]
416
419
 
417
420
 
418
421
  # --- Env Map ---
@@ -425,10 +428,11 @@ class EnvVarRecord:
425
428
  required: bool = True
426
429
  default: Optional[str] = None
427
430
  type_hint: Optional[str] = None # string | int | bool | url | path | enum
428
- category: Optional[str] = None # database | cache | storage | auth | service | observability | feature_flag | server | general
431
+ category: Optional[str] = None # database | cache | storage | auth | service | observability | feature_flag | server | general | application
429
432
  description: Optional[str] = None
430
433
  files: list[str] = field(default_factory=list) # "path:line"
431
434
  profile: Optional[str] = None # Spring profile if first occurrence is in application-{profile}.yml
435
+ source: Optional[str] = None # yml_property | env_var | source_code
432
436
 
433
437
 
434
438
  @dataclass
@@ -446,6 +450,7 @@ class EnvSummary:
446
450
  profiles_scanned: list[str] = field(default_factory=list)
447
451
  spring_candidates: int = 0 # total ${VAR} refs found across Spring config files
448
452
  coverage_note: Optional[str] = None # explicit note about partial coverage
453
+ spring_profiles: list[str] = field(default_factory=list) # canonical list: profile names from application-{profile}.yml
449
454
 
450
455
 
451
456
  # --- Code Notes ---