sourcecode 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/architecture_analyzer.py +101 -0
- sourcecode/architecture_summary.py +12 -0
- sourcecode/cli.py +7 -1
- sourcecode/confidence_analyzer.py +21 -0
- sourcecode/detectors/java.py +66 -5
- sourcecode/env_analyzer.py +87 -1
- sourcecode/file_classifier.py +47 -0
- sourcecode/metrics_analyzer.py +24 -0
- sourcecode/prepare_context.py +46 -0
- sourcecode/schema.py +6 -1
- sourcecode/semantic_analyzer.py +152 -0
- sourcecode/serializer.py +22 -2
- {sourcecode-1.0.0.dist-info → sourcecode-1.2.0.dist-info}/METADATA +99 -307
- {sourcecode-1.0.0.dist-info → sourcecode-1.2.0.dist-info}/RECORD +18 -18
- {sourcecode-1.0.0.dist-info → sourcecode-1.2.0.dist-info}/WHEEL +0 -0
- {sourcecode-1.0.0.dist-info → sourcecode-1.2.0.dist-info}/entry_points.txt +0 -0
- {sourcecode-1.0.0.dist-info → sourcecode-1.2.0.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
|
@@ -176,6 +176,40 @@ class ArchitectureAnalyzer:
|
|
|
176
176
|
|
|
177
177
|
# Step 1: filter paths
|
|
178
178
|
filtered = self._filter_paths(sm.file_paths)
|
|
179
|
+
|
|
180
|
+
# Step 1b: DDD filesystem detection — runs before the filtered-paths guard
|
|
181
|
+
# because DDD signals live in directory structure, not just file extensions.
|
|
182
|
+
ddd_result = self._detect_ddd(sm.file_paths)
|
|
183
|
+
if ddd_result is not None:
|
|
184
|
+
ddd_pattern, ddd_layers, ddd_contexts, ddd_layer_names = ddd_result
|
|
185
|
+
domains_for_ddd = self._cluster_domains(filtered) if len(filtered) >= 2 else []
|
|
186
|
+
module_files = self._build_ddd_module_files(sm.file_paths, ddd_contexts)
|
|
187
|
+
bc_list = [
|
|
188
|
+
BoundedContext(name=n, modules=module_files.get(n, []), confidence="high")
|
|
189
|
+
for n in ddd_contexts
|
|
190
|
+
]
|
|
191
|
+
return ArchitectureAnalysis(
|
|
192
|
+
requested=True,
|
|
193
|
+
pattern=ddd_pattern,
|
|
194
|
+
domains=domains_for_ddd,
|
|
195
|
+
layers=ddd_layers,
|
|
196
|
+
bounded_contexts=bc_list,
|
|
197
|
+
ddd_layers_detected=ddd_layer_names,
|
|
198
|
+
confidence="high",
|
|
199
|
+
method="filesystem_inference",
|
|
200
|
+
limitations=[],
|
|
201
|
+
evidence=[{
|
|
202
|
+
"type": "filesystem_naming",
|
|
203
|
+
"paths": [f"{ddd_contexts[0]}/" if ddd_contexts else ""],
|
|
204
|
+
"reason": (
|
|
205
|
+
f"DDD layout detected: {len(ddd_contexts)} modules under common prefix "
|
|
206
|
+
"each contain application/, domain/, infrastructure/ subdirectories."
|
|
207
|
+
),
|
|
208
|
+
"confidence": "high",
|
|
209
|
+
}],
|
|
210
|
+
tentative=False,
|
|
211
|
+
)
|
|
212
|
+
|
|
179
213
|
if len(filtered) < 2:
|
|
180
214
|
return ArchitectureAnalysis(
|
|
181
215
|
requested=True,
|
|
@@ -333,6 +367,73 @@ class ArchitectureAnalyzer:
|
|
|
333
367
|
# Private helpers
|
|
334
368
|
# ------------------------------------------------------------------
|
|
335
369
|
|
|
370
|
+
def _detect_ddd(
|
|
371
|
+
self, paths: list[str]
|
|
372
|
+
) -> "Optional[tuple[str, list[ArchitectureLayer], list[str], list[str]]]":
|
|
373
|
+
"""Detect DDD: ≥5 modules under a common prefix each with application/domain/infrastructure."""
|
|
374
|
+
_DDD_LAYERS = frozenset({"application", "domain", "infrastructure"})
|
|
375
|
+
_DDD_MIN_MODULES = 5
|
|
376
|
+
|
|
377
|
+
# Map (prefix, module) → set of DDD layer names found under that module
|
|
378
|
+
prefix_module_layers: dict[tuple[str, str], set[str]] = {}
|
|
379
|
+
|
|
380
|
+
for p in paths:
|
|
381
|
+
parts = p.replace("\\", "/").split("/")
|
|
382
|
+
for i, part in enumerate(parts):
|
|
383
|
+
if part in _DDD_LAYERS and i >= 2:
|
|
384
|
+
module = parts[i - 1]
|
|
385
|
+
prefix = "/".join(parts[:i - 1])
|
|
386
|
+
key = (prefix, module)
|
|
387
|
+
prefix_module_layers.setdefault(key, set()).add(part)
|
|
388
|
+
break
|
|
389
|
+
|
|
390
|
+
# Group by prefix; find prefixes where ≥5 modules have all 3 DDD layers
|
|
391
|
+
prefix_modules: dict[str, list[str]] = {}
|
|
392
|
+
for (prefix, module), layers_found in prefix_module_layers.items():
|
|
393
|
+
if _DDD_LAYERS <= layers_found: # module has all 3
|
|
394
|
+
prefix_modules.setdefault(prefix, []).append(module)
|
|
395
|
+
|
|
396
|
+
best_prefix = max(
|
|
397
|
+
prefix_modules,
|
|
398
|
+
key=lambda p: len(prefix_modules[p]),
|
|
399
|
+
default=None,
|
|
400
|
+
)
|
|
401
|
+
if best_prefix is None or len(prefix_modules[best_prefix]) < _DDD_MIN_MODULES:
|
|
402
|
+
return None
|
|
403
|
+
|
|
404
|
+
bounded_context_names = sorted(set(prefix_modules[best_prefix]))
|
|
405
|
+
ddd_layer_names = sorted(_DDD_LAYERS)
|
|
406
|
+
|
|
407
|
+
arch_layers: list[ArchitectureLayer] = [
|
|
408
|
+
ArchitectureLayer(
|
|
409
|
+
name=layer,
|
|
410
|
+
pattern="ddd",
|
|
411
|
+
files=[
|
|
412
|
+
p for p in paths
|
|
413
|
+
if f"/{layer}/" in p.replace("\\", "/")
|
|
414
|
+
],
|
|
415
|
+
confidence="high",
|
|
416
|
+
)
|
|
417
|
+
for layer in ddd_layer_names
|
|
418
|
+
]
|
|
419
|
+
return "ddd", arch_layers, bounded_context_names, ddd_layer_names
|
|
420
|
+
|
|
421
|
+
def _build_ddd_module_files(
|
|
422
|
+
self, paths: list[str], bounded_context_names: list[str]
|
|
423
|
+
) -> "dict[str, list[str]]":
|
|
424
|
+
"""Build a mapping of DDD module name → list of file paths."""
|
|
425
|
+
_DDD_LAYERS = frozenset({"application", "domain", "infrastructure"})
|
|
426
|
+
module_files: dict[str, list[str]] = {}
|
|
427
|
+
for p in paths:
|
|
428
|
+
parts = p.replace("\\", "/").split("/")
|
|
429
|
+
for i, part in enumerate(parts):
|
|
430
|
+
if part in _DDD_LAYERS and i >= 2:
|
|
431
|
+
mod = parts[i - 1]
|
|
432
|
+
if mod in bounded_context_names:
|
|
433
|
+
module_files.setdefault(mod, []).append(p)
|
|
434
|
+
break
|
|
435
|
+
return module_files
|
|
436
|
+
|
|
336
437
|
def _is_tooling(self, path: str) -> bool:
|
|
337
438
|
norm = path.replace("\\", "/")
|
|
338
439
|
return any(norm.startswith(p) for p in _TOOLING_PREFIXES)
|
|
@@ -84,6 +84,11 @@ class ArchitectureSummarizer:
|
|
|
84
84
|
elif suffix in {".cs", ".fs", ".vb"}:
|
|
85
85
|
lang_lines = self._summarize_dotnet_entry(sm.stacks)
|
|
86
86
|
|
|
87
|
+
# MyBatis XML mapper count line (Java projects)
|
|
88
|
+
mybatis_line = self._mybatis_summary_line(file_paths)
|
|
89
|
+
if mybatis_line:
|
|
90
|
+
lang_lines.append(mybatis_line)
|
|
91
|
+
|
|
87
92
|
# Merge: rich lines first, stack-specific details appended (deduped)
|
|
88
93
|
lines = rich_lines + [l for l in lang_lines if l not in rich_lines]
|
|
89
94
|
|
|
@@ -296,6 +301,13 @@ class ArchitectureSummarizer:
|
|
|
296
301
|
lines.append("Orquesta el arranque de la aplicacion JVM.")
|
|
297
302
|
return lines
|
|
298
303
|
|
|
304
|
+
def _mybatis_summary_line(self, file_paths: list[str]) -> str | None:
|
|
305
|
+
"""Return a summary line when >5 MyBatis XML mappers are detected."""
|
|
306
|
+
mapper_xml_count = sum(1 for p in file_paths if p.endswith("Mapper.xml"))
|
|
307
|
+
if mapper_xml_count > 5:
|
|
308
|
+
return f"MyBatis XML mappers: {mapper_xml_count} *Mapper.xml detected."
|
|
309
|
+
return None
|
|
310
|
+
|
|
299
311
|
def _summarize_dotnet_entry(self, stacks: list[StackDetection]) -> list[str]:
|
|
300
312
|
dotnet_stacks = [s for s in stacks if s.stack == "dotnet"]
|
|
301
313
|
if not dotnet_stacks:
|
sourcecode/cli.py
CHANGED
|
@@ -790,7 +790,7 @@ def main(
|
|
|
790
790
|
# Require at least 8: src(1)+main(2)+java(3)+com(4)+co(5)+app(6)+module(7)+file.
|
|
791
791
|
_java_manifest_names = {"pom.xml", "build.gradle", "build.gradle.kts"}
|
|
792
792
|
_is_java = any(Path(m).name in _java_manifest_names for m in manifests)
|
|
793
|
-
_java_min_depth =
|
|
793
|
+
_java_min_depth = 10
|
|
794
794
|
effective_depth = max(depth, _java_min_depth) if _is_java and depth < _java_min_depth else depth
|
|
795
795
|
|
|
796
796
|
# --agent: enable signal analyzers; output via agent_view (not compact)
|
|
@@ -1376,6 +1376,12 @@ def main(
|
|
|
1376
1376
|
))
|
|
1377
1377
|
sm = _replace(sm, pipeline_trace=_trace.build_trace())
|
|
1378
1378
|
|
|
1379
|
+
# P3-B: Auto-switch to centrality ranking when DDD layout detected
|
|
1380
|
+
if (rank_by == "relevance"
|
|
1381
|
+
and sm.architecture is not None
|
|
1382
|
+
and sm.architecture.pattern == "ddd"):
|
|
1383
|
+
rank_by = "centrality"
|
|
1384
|
+
|
|
1379
1385
|
# Contract pipeline — runs for mode=contract|standard|deep|hybrid (skip for raw)
|
|
1380
1386
|
_is_contract_mode = mode in ("contract", "standard")
|
|
1381
1387
|
if _is_contract_mode:
|
|
@@ -193,6 +193,27 @@ class ConfidenceAnalyzer:
|
|
|
193
193
|
impact="low",
|
|
194
194
|
))
|
|
195
195
|
|
|
196
|
+
# ── Java test coverage gap check (P2-A) ──────────────────────────────
|
|
197
|
+
_java_all = [p for p in sm.file_paths if p.endswith(".java")]
|
|
198
|
+
_java_tests = [
|
|
199
|
+
p for p in _java_all
|
|
200
|
+
if "/test/" in p.replace("\\", "/") or "/tests/" in p.replace("\\", "/")
|
|
201
|
+
or Path(p).stem.endswith(("Test", "Tests", "IT", "Spec"))
|
|
202
|
+
]
|
|
203
|
+
_java_prod = [p for p in _java_all if p not in set(_java_tests)]
|
|
204
|
+
if _java_prod and len(_java_prod) >= 10:
|
|
205
|
+
_ratio = len(_java_tests) / len(_java_prod)
|
|
206
|
+
if _ratio < 0.05:
|
|
207
|
+
gaps.append(AnalysisGap(
|
|
208
|
+
area="testing",
|
|
209
|
+
reason=(
|
|
210
|
+
f"Backend test coverage critical: {len(_java_tests)} test files "
|
|
211
|
+
f"for {len(_java_prod)} Java files "
|
|
212
|
+
f"({_ratio:.1%})"
|
|
213
|
+
),
|
|
214
|
+
impact="high",
|
|
215
|
+
))
|
|
216
|
+
|
|
196
217
|
# ── Compute overall confidence ─────────────────────────────────────────
|
|
197
218
|
# Stack: use best manifest-detected stack, fall back to min
|
|
198
219
|
manifest_stacks = [s for s in sm.stacks if s.detection_method != "heuristic"]
|
sourcecode/detectors/java.py
CHANGED
|
@@ -15,12 +15,29 @@ from sourcecode.schema import FrameworkDetection
|
|
|
15
15
|
from sourcecode.tree_utils import flatten_file_tree
|
|
16
16
|
|
|
17
17
|
_MAX_FILE_SIZE = 256 * 1024 # 256 KB
|
|
18
|
-
_MAX_JAVA_ENTRY_SCAN =
|
|
19
|
-
_MAX_ANNOTATION_ENTRY_POINTS =
|
|
18
|
+
_MAX_JAVA_ENTRY_SCAN = 1000
|
|
19
|
+
_MAX_ANNOTATION_ENTRY_POINTS = 500
|
|
20
20
|
|
|
21
|
-
_REST_CONTROLLER_RE = re.compile(r'@
|
|
21
|
+
_REST_CONTROLLER_RE = re.compile(r'@RestController\b')
|
|
22
|
+
_MVC_CONTROLLER_RE = re.compile(r'@Controller\b')
|
|
23
|
+
_REQUEST_MAPPING_RE = re.compile(r'@RequestMapping\b')
|
|
24
|
+
_CONTROLLER_ADVICE_RE = re.compile(r'@ControllerAdvice\b')
|
|
22
25
|
_WEB_FILTER_RE = re.compile(r'@WebFilter\b')
|
|
23
26
|
_FILTER_BEAN_RE = re.compile(r'FilterRegistrationBean\b')
|
|
27
|
+
# Extracts path from @RequestMapping("/v1/foo"), @GetMapping("/bar"), etc.
|
|
28
|
+
# Handles attribute order: value= may come after method= in legacy @RequestMapping style.
|
|
29
|
+
_HTTP_PATH_RE = re.compile(
|
|
30
|
+
r'@(?:Request|Get|Post|Put|Delete|Patch)Mapping\s*\([^)]*?(?:value\s*=\s*)?["\']([^"\']+)["\']'
|
|
31
|
+
)
|
|
32
|
+
_REQUEST_METHOD_VERB_RE = re.compile(
|
|
33
|
+
r'method\s*=\s*RequestMethod\.([A-Z]+)'
|
|
34
|
+
)
|
|
35
|
+
# @M3FiltroSeguridad custom security annotation
|
|
36
|
+
_M3_FILTRO_RE = re.compile(r'@M3FiltroSeguridad\b')
|
|
37
|
+
_M3_FILTRO_PARAMS_RE = re.compile(
|
|
38
|
+
r'@M3FiltroSeguridad\s*\(\s*(?:nombreRecurso\s*=\s*"([^"]*)")?'
|
|
39
|
+
r'(?:[^)]*nivelRequerido\s*=\s*(\d+))?'
|
|
40
|
+
)
|
|
24
41
|
|
|
25
42
|
|
|
26
43
|
class JavaDetector(AbstractDetector):
|
|
@@ -81,6 +98,8 @@ class JavaDetector(AbstractDetector):
|
|
|
81
98
|
frameworks.append(FrameworkDetection(name="Vert.x", source=source))
|
|
82
99
|
if "jakarta.ee" in text or "javax.ws.rs" in text:
|
|
83
100
|
frameworks.append(FrameworkDetection(name="Jakarta EE", source=source))
|
|
101
|
+
if "mybatis" in text:
|
|
102
|
+
frameworks.append(FrameworkDetection(name="MyBatis", source=source))
|
|
84
103
|
return frameworks
|
|
85
104
|
|
|
86
105
|
def _collect_entry_points(self, context: DetectionContext) -> list[EntryPoint]:
|
|
@@ -139,13 +158,55 @@ class JavaDetector(AbstractDetector):
|
|
|
139
158
|
return []
|
|
140
159
|
|
|
141
160
|
# Quick pre-filter before running regexes
|
|
142
|
-
if "Controller" not in content and "Filter" not in content
|
|
161
|
+
if ("Controller" not in content and "Filter" not in content
|
|
162
|
+
and "ControllerAdvice" not in content
|
|
163
|
+
and "M3FiltroSeguridad" not in content):
|
|
143
164
|
return []
|
|
144
165
|
|
|
145
166
|
if _REST_CONTROLLER_RE.search(content):
|
|
167
|
+
http_path_match = _HTTP_PATH_RE.search(content)
|
|
168
|
+
http_path = http_path_match.group(1) if http_path_match else None
|
|
169
|
+
verb_match = _REQUEST_METHOD_VERB_RE.search(content)
|
|
170
|
+
if verb_match and http_path:
|
|
171
|
+
http_path = f"[{verb_match.group(1)}] {http_path}"
|
|
172
|
+
elif verb_match:
|
|
173
|
+
http_path = f"[{verb_match.group(1)}]"
|
|
174
|
+
security_evidence = None
|
|
175
|
+
m3_match = _M3_FILTRO_PARAMS_RE.search(content)
|
|
176
|
+
if m3_match:
|
|
177
|
+
nombre = m3_match.group(1) or ""
|
|
178
|
+
nivel = m3_match.group(2) or ""
|
|
179
|
+
security_evidence = f"@M3FiltroSeguridad(nombreRecurso={nombre!r}, nivelRequerido={nivel})"
|
|
146
180
|
return [EntryPoint(
|
|
147
|
-
path=rel_path, stack="java", kind="
|
|
181
|
+
path=rel_path, stack="java", kind="rest_controller",
|
|
148
182
|
source="annotation", confidence="high",
|
|
183
|
+
http_path=http_path,
|
|
184
|
+
evidence=security_evidence,
|
|
185
|
+
)]
|
|
186
|
+
if _CONTROLLER_ADVICE_RE.search(content):
|
|
187
|
+
return [EntryPoint(
|
|
188
|
+
path=rel_path, stack="java", kind="exception_handler",
|
|
189
|
+
source="annotation", confidence="medium",
|
|
190
|
+
)]
|
|
191
|
+
if _MVC_CONTROLLER_RE.search(content) and _REQUEST_MAPPING_RE.search(content):
|
|
192
|
+
http_path_match = _HTTP_PATH_RE.search(content)
|
|
193
|
+
http_path = http_path_match.group(1) if http_path_match else None
|
|
194
|
+
verb_match = _REQUEST_METHOD_VERB_RE.search(content)
|
|
195
|
+
if verb_match and http_path:
|
|
196
|
+
http_path = f"[{verb_match.group(1)}] {http_path}"
|
|
197
|
+
elif verb_match:
|
|
198
|
+
http_path = f"[{verb_match.group(1)}]"
|
|
199
|
+
security_evidence = None
|
|
200
|
+
m3_match = _M3_FILTRO_PARAMS_RE.search(content)
|
|
201
|
+
if m3_match:
|
|
202
|
+
nombre = m3_match.group(1) or ""
|
|
203
|
+
nivel = m3_match.group(2) or ""
|
|
204
|
+
security_evidence = f"@M3FiltroSeguridad(nombreRecurso={nombre!r}, nivelRequerido={nivel})"
|
|
205
|
+
return [EntryPoint(
|
|
206
|
+
path=rel_path, stack="java", kind="mvc_controller",
|
|
207
|
+
source="annotation", confidence="medium",
|
|
208
|
+
http_path=http_path,
|
|
209
|
+
evidence=security_evidence,
|
|
149
210
|
)]
|
|
150
211
|
if _WEB_FILTER_RE.search(content):
|
|
151
212
|
return [EntryPoint(
|
sourcecode/env_analyzer.py
CHANGED
|
@@ -27,6 +27,8 @@ _ENV_EXAMPLE_NAMES = {
|
|
|
27
27
|
|
|
28
28
|
# Spring Boot application.properties / application.yml and their profile variants
|
|
29
29
|
_SPRING_CONF_BASE = {"application.properties", "application.yml", "application.yaml"}
|
|
30
|
+
# Matches options/{profile}/ in multi-tenant SAS layout paths
|
|
31
|
+
_OPTIONS_PROFILE_PATH_RE = re.compile(r'options/([a-z0-9_-]+)/', re.IGNORECASE)
|
|
30
32
|
_SPRING_CONF_PROFILE_RE = re.compile(r'^application-([a-z0-9_-]+)\.(properties|ya?ml)$', re.IGNORECASE)
|
|
31
33
|
# Matches ${ENV_VAR} or ${ENV_VAR:default} where ENV_VAR is UPPER_SNAKE_CASE.
|
|
32
34
|
# Group 1 = key, Group 2 = default (may be empty string, absent = no default).
|
|
@@ -35,6 +37,15 @@ _SPRING_ENV_VAR_RE = re.compile(r'\$\{([A-Z][A-Z0-9_]*)(?::([^}]*))?\}')
|
|
|
35
37
|
# These are internal property cross-references, not OS env vars, but still config signals.
|
|
36
38
|
_SPRING_PROP_REF_RE = re.compile(r'\$\{([a-z][a-z0-9]*(?:\.[a-z][a-z0-9_-]*)*)(?::([^}]*))?\}')
|
|
37
39
|
|
|
40
|
+
# Known Spring-internal namespaces — NOT emitted as custom application properties.
|
|
41
|
+
_SPRING_BUILTIN_NAMESPACES: frozenset[str] = frozenset({
|
|
42
|
+
"spring", "logging", "management", "server", "info", "debug",
|
|
43
|
+
"endpoints", "security", "eureka", "feign", "ribbon", "hystrix",
|
|
44
|
+
"zuul", "cloud", "flyway", "liquibase", "jpa", "datasource",
|
|
45
|
+
"kafka", "rabbitmq", "redis", "mail", "thymeleaf", "mvc",
|
|
46
|
+
"web", "actuator", "metrics", "tracing",
|
|
47
|
+
})
|
|
48
|
+
|
|
38
49
|
# Patterns where absence of the variable causes a hard runtime error (not just None/null).
|
|
39
50
|
# py_environ_bracket → os.environ["KEY"] raises KeyError
|
|
40
51
|
# java_spring_value → Spring fails to start if ${KEY} has no default
|
|
@@ -223,6 +234,66 @@ def _extract_spring_profile(filename: str) -> Optional[str]:
|
|
|
223
234
|
return None
|
|
224
235
|
|
|
225
236
|
|
|
237
|
+
def _parse_yaml_custom_properties(
|
|
238
|
+
content: str,
|
|
239
|
+
rel_path: str,
|
|
240
|
+
profile: Optional[str],
|
|
241
|
+
findings: dict,
|
|
242
|
+
) -> None:
|
|
243
|
+
"""Extract custom namespace leaf properties from YAML (e.g. saint.ldap.url).
|
|
244
|
+
|
|
245
|
+
Builds dotted key paths by tracking indentation levels. Emits only properties
|
|
246
|
+
whose top-level namespace is NOT a well-known Spring built-in namespace.
|
|
247
|
+
"""
|
|
248
|
+
# Stack of (indent, key_segment)
|
|
249
|
+
key_stack: list[tuple[int, str]] = []
|
|
250
|
+
|
|
251
|
+
for line in content.splitlines():
|
|
252
|
+
stripped = line.lstrip()
|
|
253
|
+
if not stripped or stripped.startswith('#'):
|
|
254
|
+
continue
|
|
255
|
+
if ':' not in stripped:
|
|
256
|
+
continue
|
|
257
|
+
|
|
258
|
+
indent = len(line) - len(stripped)
|
|
259
|
+
colon_idx = stripped.index(':')
|
|
260
|
+
key_part = stripped[:colon_idx].strip()
|
|
261
|
+
value_part = stripped[colon_idx + 1:].strip() if colon_idx + 1 < len(stripped) else ""
|
|
262
|
+
|
|
263
|
+
# Only plain identifiers (no special chars)
|
|
264
|
+
if not re.match(r'^[a-zA-Z][a-zA-Z0-9_-]*$', key_part):
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
# Pop stack entries at same or deeper indent
|
|
268
|
+
while key_stack and key_stack[-1][0] >= indent:
|
|
269
|
+
key_stack.pop()
|
|
270
|
+
|
|
271
|
+
key_stack.append((indent, key_part))
|
|
272
|
+
|
|
273
|
+
# Only emit leaf values (non-empty, not a nested mapping start)
|
|
274
|
+
if not value_part or value_part.startswith('{') or value_part.startswith('['):
|
|
275
|
+
continue
|
|
276
|
+
|
|
277
|
+
# Reconstruct full dotted key
|
|
278
|
+
full_key = '.'.join(seg for _, seg in key_stack)
|
|
279
|
+
top_ns = key_stack[0][1].lower()
|
|
280
|
+
|
|
281
|
+
# Skip Spring built-in namespaces
|
|
282
|
+
if top_ns in _SPRING_BUILTIN_NAMESPACES:
|
|
283
|
+
continue
|
|
284
|
+
|
|
285
|
+
# Skip entries that look like ${...} references (already handled elsewhere)
|
|
286
|
+
if value_part.startswith('${'):
|
|
287
|
+
continue
|
|
288
|
+
|
|
289
|
+
# Strip inline YAML comments
|
|
290
|
+
clean_value = value_part.split('#')[0].strip()
|
|
291
|
+
if not clean_value:
|
|
292
|
+
continue
|
|
293
|
+
|
|
294
|
+
findings[full_key].append((rel_path, clean_value, False, profile))
|
|
295
|
+
|
|
296
|
+
|
|
226
297
|
def _parse_spring_config(
|
|
227
298
|
path: Path,
|
|
228
299
|
rel_path: str,
|
|
@@ -234,6 +305,7 @@ def _parse_spring_config(
|
|
|
234
305
|
Returns the total number of ${...} placeholders found (candidates).
|
|
235
306
|
Captures default values from ${VAR:default} syntax.
|
|
236
307
|
Marks vars without defaults as hard-required (Spring fails to start if missing).
|
|
308
|
+
Also extracts custom namespace properties (saint.*, app.*, etc.) as yml_property entries.
|
|
237
309
|
"""
|
|
238
310
|
try:
|
|
239
311
|
content = path.read_text(encoding="utf-8", errors="replace")
|
|
@@ -267,6 +339,10 @@ def _parse_spring_config(
|
|
|
267
339
|
findings[key].append((f"{rel_path}:{line_num}", default, False, profile))
|
|
268
340
|
candidates += 1
|
|
269
341
|
|
|
342
|
+
# 3. Custom YAML namespace properties (YAML/YML files only)
|
|
343
|
+
if rel_path.endswith((".yml", ".yaml")):
|
|
344
|
+
_parse_yaml_custom_properties(content, rel_path, profile, findings)
|
|
345
|
+
|
|
270
346
|
return candidates
|
|
271
347
|
|
|
272
348
|
|
|
@@ -320,14 +396,17 @@ class EnvAnalyzer:
|
|
|
320
396
|
first_profile = prof
|
|
321
397
|
if len(unique_files) >= _MAX_FILES_PER_KEY:
|
|
322
398
|
break
|
|
399
|
+
# Custom YAML properties use lowercase.dotted keys and category "application"
|
|
400
|
+
is_yml_prop = '.' in key and key[0].islower()
|
|
323
401
|
records[key] = EnvVarRecord(
|
|
324
402
|
key=key,
|
|
325
403
|
required=required,
|
|
326
404
|
default=default_val,
|
|
327
405
|
type_hint=_infer_type_hint(key),
|
|
328
|
-
category=_infer_category(key),
|
|
406
|
+
category="application" if is_yml_prop else _infer_category(key),
|
|
329
407
|
files=unique_files,
|
|
330
408
|
profile=first_profile,
|
|
409
|
+
source="yml_property" if is_yml_prop else None,
|
|
331
410
|
)
|
|
332
411
|
|
|
333
412
|
# 2. Supplement with .env.example entries (fill description + add missing keys)
|
|
@@ -372,6 +451,8 @@ class EnvAnalyzer:
|
|
|
372
451
|
"extracted. Duplicates across profiles collapsed."
|
|
373
452
|
)
|
|
374
453
|
|
|
454
|
+
# spring_profiles: named profiles only (exclude "default")
|
|
455
|
+
_named_profiles = sorted({p for p in profiles_scanned if p != "default"})
|
|
375
456
|
summary = EnvSummary(
|
|
376
457
|
requested=True,
|
|
377
458
|
total=len(sorted_records),
|
|
@@ -383,6 +464,7 @@ class EnvAnalyzer:
|
|
|
383
464
|
profiles_scanned=sorted(set(profiles_scanned)),
|
|
384
465
|
spring_candidates=spring_candidates,
|
|
385
466
|
coverage_note=coverage_note,
|
|
467
|
+
spring_profiles=_named_profiles,
|
|
386
468
|
)
|
|
387
469
|
|
|
388
470
|
return sorted_records, summary
|
|
@@ -427,6 +509,10 @@ class EnvAnalyzer:
|
|
|
427
509
|
# Spring Boot application.properties / application.yml (incl. profiles)
|
|
428
510
|
if name_lower in _SPRING_CONF_BASE or _SPRING_CONF_PROFILE_RE.match(name_lower):
|
|
429
511
|
profile = _extract_spring_profile(name)
|
|
512
|
+
# Override profile if path contains options/{profile}/ (multi-tenant SAS layout)
|
|
513
|
+
path_profile_match = _OPTIONS_PROFILE_PATH_RE.search(rel)
|
|
514
|
+
if path_profile_match:
|
|
515
|
+
profile = path_profile_match.group(1)
|
|
430
516
|
if profile and profile not in profiles_scanned:
|
|
431
517
|
profiles_scanned.append(profile)
|
|
432
518
|
count = _parse_spring_config(entry, rel, findings, profile)
|
sourcecode/file_classifier.py
CHANGED
|
@@ -78,6 +78,29 @@ _IMPORT_RE = re.compile(
|
|
|
78
78
|
)
|
|
79
79
|
_DEF_RE = re.compile(r"\b(class|def|function|const|export\s+class|interface|type)\s+[A-Za-z_]", re.MULTILINE)
|
|
80
80
|
|
|
81
|
+
# Java Spring stereotype annotation detection
|
|
82
|
+
_JAVA_ANNOTATION_RE = re.compile(r'@(RestController|Controller|Service|Repository|Mapper|Entity|Data|Configuration|EnableWebSecurity|ControllerAdvice|Transactional)\b')
|
|
83
|
+
|
|
84
|
+
# (annotation_set, category, relevance, why_template)
|
|
85
|
+
# Checked in priority order; first match wins.
|
|
86
|
+
_JAVA_STEREOTYPE_RULES: list[tuple[frozenset, str, float, str]] = [
|
|
87
|
+
(frozenset({"EnableWebSecurity"}), "security", 0.85, "Spring Security configuration"),
|
|
88
|
+
(frozenset({"RestController"}), "api_endpoint", 0.90, "Spring REST controller — defines HTTP API surface"),
|
|
89
|
+
(frozenset({"Controller", "RequestMapping"}), "api_endpoint", 0.80, "Spring MVC controller"),
|
|
90
|
+
(frozenset({"Service", "Transactional"}), "business_logic", 0.75, "Transactional service — business logic boundary"),
|
|
91
|
+
(frozenset({"Service"}), "business_logic", 0.65, "Spring service component"),
|
|
92
|
+
(frozenset({"Repository"}), "data_access", 0.65, "Spring repository — data access layer"),
|
|
93
|
+
(frozenset({"Mapper"}), "data_access", 0.65, "MyBatis mapper — SQL data access"),
|
|
94
|
+
(frozenset({"Configuration"}), "configuration", 0.70, "Spring configuration class"),
|
|
95
|
+
(frozenset({"Entity"}), "domain_model", 0.50, "JPA entity — domain model"),
|
|
96
|
+
(frozenset({"Data"}), "dto", 0.40, "Lombok DTO"),
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
# Categories produced by Java stereotype detection — used downstream to apply direct relevance
|
|
100
|
+
JAVA_STEREOTYPE_CATEGORIES: frozenset[str] = frozenset(
|
|
101
|
+
cat for _, cat, _, _ in _JAVA_STEREOTYPE_RULES
|
|
102
|
+
)
|
|
103
|
+
|
|
81
104
|
|
|
82
105
|
class FileClassifier:
|
|
83
106
|
def __init__(
|
|
@@ -138,6 +161,12 @@ class FileClassifier:
|
|
|
138
161
|
if norm in self.production_entry_paths:
|
|
139
162
|
return FileClassification(norm, "runtime_core", "high", 0.95, "declared production runtime entrypoint", ["entry_points"])
|
|
140
163
|
|
|
164
|
+
# Java Spring stereotype detection (Java/Kotlin files only)
|
|
165
|
+
if suffix in {".java", ".kt"}:
|
|
166
|
+
java_class = self._classify_java_stereotype(norm, content)
|
|
167
|
+
if java_class is not None:
|
|
168
|
+
return java_class
|
|
169
|
+
|
|
141
170
|
if self._has_any_import(imports, _API_IMPORTS):
|
|
142
171
|
evidence = self._matched_imports(imports, _API_IMPORTS)
|
|
143
172
|
return FileClassification(norm, "api_layer", "high", 0.82, "imports API/server framework", evidence)
|
|
@@ -213,3 +242,21 @@ class FileClassifier:
|
|
|
213
242
|
def _sample(self, imports: list[str]) -> list[str]:
|
|
214
243
|
return [f"import:{imp}" for imp in imports[:4]]
|
|
215
244
|
|
|
245
|
+
def _classify_java_stereotype(self, path: str, content: str) -> "FileClassification | None":
|
|
246
|
+
"""Classify Java file by Spring/JPA/MyBatis annotation stereotypes."""
|
|
247
|
+
if not content:
|
|
248
|
+
return None
|
|
249
|
+
found = frozenset(m.group(1) for m in _JAVA_ANNOTATION_RE.finditer(content))
|
|
250
|
+
if not found:
|
|
251
|
+
return None
|
|
252
|
+
for required_annotations, category, relevance, why in _JAVA_STEREOTYPE_RULES:
|
|
253
|
+
# For @Data DTO: must have @Data but NOT @Entity
|
|
254
|
+
if required_annotations == frozenset({"Data"}):
|
|
255
|
+
if "Data" in found and "Entity" not in found:
|
|
256
|
+
return FileClassification(path, category, "high", relevance, why, list(found))
|
|
257
|
+
continue
|
|
258
|
+
# For compound rules (Service+Transactional, Controller+RequestMapping): all required
|
|
259
|
+
if required_annotations <= found:
|
|
260
|
+
return FileClassification(path, category, "high", relevance, why, list(found))
|
|
261
|
+
return None
|
|
262
|
+
|
sourcecode/metrics_analyzer.py
CHANGED
|
@@ -229,6 +229,29 @@ class MetricsAnalyzer:
|
|
|
229
229
|
"null complexity fields are expected, not an error."
|
|
230
230
|
)
|
|
231
231
|
|
|
232
|
+
# P2-C: DDD module metrics — group by module, count files/methods per layer
|
|
233
|
+
_DDD_LAYERS = {"domain", "application", "infrastructure"}
|
|
234
|
+
ddd_files = [r for r in records if "/ddd/" in r.path.replace("\\", "/")]
|
|
235
|
+
if ddd_files:
|
|
236
|
+
module_layer_data: dict[str, dict[str, dict]] = {}
|
|
237
|
+
for fm in ddd_files:
|
|
238
|
+
parts = fm.path.replace("\\", "/").split("/")
|
|
239
|
+
for i, part in enumerate(parts):
|
|
240
|
+
if part in _DDD_LAYERS and i >= 2:
|
|
241
|
+
module = parts[i - 1]
|
|
242
|
+
layer = part
|
|
243
|
+
if module not in module_layer_data:
|
|
244
|
+
module_layer_data[module] = {lyr: {"files": 0, "methods": 0} for lyr in _DDD_LAYERS}
|
|
245
|
+
module_layer_data[module][layer]["files"] += 1
|
|
246
|
+
module_layer_data[module][layer]["methods"] += fm.function_count or 0
|
|
247
|
+
break
|
|
248
|
+
ddd_metrics = [
|
|
249
|
+
{"module": mod, "layers": layers}
|
|
250
|
+
for mod, layers in sorted(module_layer_data.items())
|
|
251
|
+
]
|
|
252
|
+
else:
|
|
253
|
+
ddd_metrics = []
|
|
254
|
+
|
|
232
255
|
summary = MetricsSummary(
|
|
233
256
|
requested=True,
|
|
234
257
|
file_count=len(records),
|
|
@@ -238,6 +261,7 @@ class MetricsAnalyzer:
|
|
|
238
261
|
coverage_records=coverage_records,
|
|
239
262
|
coverage_sources_found=sorted({r.format for r in coverage_records}),
|
|
240
263
|
limitations=limitations,
|
|
264
|
+
ddd_module_metrics=ddd_metrics,
|
|
241
265
|
)
|
|
242
266
|
return records, summary
|
|
243
267
|
|
sourcecode/prepare_context.py
CHANGED
|
@@ -332,6 +332,50 @@ _SOURCE_EXTENSIONS: frozenset[str] = frozenset({
|
|
|
332
332
|
".go", ".rs", ".rb", ".php", ".cs", ".dart",
|
|
333
333
|
})
|
|
334
334
|
|
|
335
|
+
|
|
336
|
+
def _extract_ddd_domain(path: str) -> str:
|
|
337
|
+
"""Extract domain name from DDD package path.
|
|
338
|
+
|
|
339
|
+
For m3informatica.saint.ddd.{domain}.infrastructure.rest.*RestController
|
|
340
|
+
the domain is the segment just before application/ domain/ or infrastructure/.
|
|
341
|
+
"""
|
|
342
|
+
parts = path.replace("\\", "/").split("/")
|
|
343
|
+
_DDD_LAYERS = {"application", "domain", "infrastructure"}
|
|
344
|
+
for i, part in enumerate(parts):
|
|
345
|
+
if part in _DDD_LAYERS and i >= 1:
|
|
346
|
+
return parts[i - 1]
|
|
347
|
+
# Fallback: penultimate directory segment
|
|
348
|
+
if len(parts) >= 2:
|
|
349
|
+
return parts[-2]
|
|
350
|
+
return ""
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _java_why(path: str, file_class: "Optional[object]") -> str:
|
|
354
|
+
"""Generate why string for Java files based on stereotype classification."""
|
|
355
|
+
if file_class is None:
|
|
356
|
+
return ""
|
|
357
|
+
from sourcecode.file_classifier import JAVA_STEREOTYPE_CATEGORIES
|
|
358
|
+
category = getattr(file_class, "category", "")
|
|
359
|
+
if category not in JAVA_STEREOTYPE_CATEGORIES:
|
|
360
|
+
return ""
|
|
361
|
+
domain = _extract_ddd_domain(path)
|
|
362
|
+
class_name = Path(path).stem
|
|
363
|
+
if category == "api_endpoint":
|
|
364
|
+
return f"Defines HTTP endpoints for the {domain} domain" if domain else "Defines HTTP API endpoints"
|
|
365
|
+
if category == "business_logic":
|
|
366
|
+
return f"Orchestrates {domain} business logic" if domain else "Business logic service"
|
|
367
|
+
if category == "data_access":
|
|
368
|
+
return f"SQL queries for {domain} data access" if domain else "Data access layer"
|
|
369
|
+
if category == "domain_model":
|
|
370
|
+
return f"JPA entity for {class_name} persistence"
|
|
371
|
+
if category == "configuration":
|
|
372
|
+
return getattr(file_class, "reason", "Spring configuration class")
|
|
373
|
+
if category == "security":
|
|
374
|
+
return getattr(file_class, "reason", "Spring Security configuration")
|
|
375
|
+
if category == "dto":
|
|
376
|
+
return f"Lombok DTO — {class_name}"
|
|
377
|
+
return getattr(file_class, "reason", "")
|
|
378
|
+
|
|
335
379
|
_ALL_EXTENSIONS: frozenset[str] = _SOURCE_EXTENSIONS | frozenset({
|
|
336
380
|
".md", ".toml", ".yaml", ".yml", ".json", ".xml",
|
|
337
381
|
})
|
|
@@ -726,12 +770,14 @@ class TaskContextBuilder:
|
|
|
726
770
|
)
|
|
727
771
|
all_reasons = [r for r in fs.reasons if r != "source file"] + content_reasons
|
|
728
772
|
reason_str = ", ".join(all_reasons) if all_reasons else "source file"
|
|
773
|
+
why_str = _java_why(path, file_class)
|
|
729
774
|
|
|
730
775
|
scored.append((total, path, RelevantFile(
|
|
731
776
|
path=path,
|
|
732
777
|
role=role,
|
|
733
778
|
score=round(min(total / 3.0, 1.0), 2),
|
|
734
779
|
reason=reason_str,
|
|
780
|
+
why=why_str,
|
|
735
781
|
)))
|
|
736
782
|
|
|
737
783
|
# Deterministic: score desc, then path asc as tiebreaker
|
sourcecode/schema.py
CHANGED
|
@@ -79,6 +79,7 @@ class EntryPoint:
|
|
|
79
79
|
classification: Optional[Literal["production", "development", "auxiliary"]] = None
|
|
80
80
|
runtime_relevance: Optional[Literal["high", "medium", "low"]] = None
|
|
81
81
|
produced_by: Optional[str] = None # which detector emitted this
|
|
82
|
+
http_path: Optional[str] = None # extracted from @RequestMapping / @GetMapping (Java REST controllers)
|
|
82
83
|
|
|
83
84
|
|
|
84
85
|
@dataclass
|
|
@@ -224,6 +225,7 @@ class MetricsSummary:
|
|
|
224
225
|
coverage_records: list[CoverageRecord] = field(default_factory=list)
|
|
225
226
|
coverage_sources_found: list[str] = field(default_factory=list)
|
|
226
227
|
limitations: list[str] = field(default_factory=list)
|
|
228
|
+
ddd_module_metrics: list[dict] = field(default_factory=list)
|
|
227
229
|
|
|
228
230
|
|
|
229
231
|
@dataclass
|
|
@@ -413,6 +415,7 @@ class ArchitectureAnalysis:
|
|
|
413
415
|
# True when pattern is inferred from weak signals (e.g. directory names only).
|
|
414
416
|
# Agents must not treat tentative patterns as confirmed facts.
|
|
415
417
|
tentative: bool = False
|
|
418
|
+
ddd_layers_detected: list[str] = field(default_factory=list) # e.g. ["application", "domain", "infrastructure"]
|
|
416
419
|
|
|
417
420
|
|
|
418
421
|
# --- Env Map ---
|
|
@@ -425,10 +428,11 @@ class EnvVarRecord:
|
|
|
425
428
|
required: bool = True
|
|
426
429
|
default: Optional[str] = None
|
|
427
430
|
type_hint: Optional[str] = None # string | int | bool | url | path | enum
|
|
428
|
-
category: Optional[str] = None # database | cache | storage | auth | service | observability | feature_flag | server | general
|
|
431
|
+
category: Optional[str] = None # database | cache | storage | auth | service | observability | feature_flag | server | general | application
|
|
429
432
|
description: Optional[str] = None
|
|
430
433
|
files: list[str] = field(default_factory=list) # "path:line"
|
|
431
434
|
profile: Optional[str] = None # Spring profile if first occurrence is in application-{profile}.yml
|
|
435
|
+
source: Optional[str] = None # yml_property | env_var | source_code
|
|
432
436
|
|
|
433
437
|
|
|
434
438
|
@dataclass
|
|
@@ -446,6 +450,7 @@ class EnvSummary:
|
|
|
446
450
|
profiles_scanned: list[str] = field(default_factory=list)
|
|
447
451
|
spring_candidates: int = 0 # total ${VAR} refs found across Spring config files
|
|
448
452
|
coverage_note: Optional[str] = None # explicit note about partial coverage
|
|
453
|
+
spring_profiles: list[str] = field(default_factory=list) # canonical list: profile names from application-{profile}.yml
|
|
449
454
|
|
|
450
455
|
|
|
451
456
|
# --- Code Notes ---
|