sourcecode 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.3.0"
3
+ __version__ = "1.5.0"
@@ -182,8 +182,19 @@ class ArchitectureAnalyzer:
182
182
  ddd_result = self._detect_ddd(sm.file_paths)
183
183
  if ddd_result is not None:
184
184
  ddd_pattern, ddd_layers, ddd_contexts, ddd_layer_names = ddd_result
185
- domains_for_ddd = self._cluster_domains(filtered) if len(filtered) >= 2 else []
186
185
  module_files = self._build_ddd_module_files(sm.file_paths, ddd_contexts)
186
+ # Use DDD bounded context names as domains so --architecture shows each
187
+ # context as a distinct domain instead of collapsing all files under
188
+ # the Maven path segment (e.g. "java").
189
+ domains_for_ddd = [
190
+ ArchitectureDomain(
191
+ name=n,
192
+ files=module_files.get(n, []),
193
+ role="DDD bounded context",
194
+ confidence="high",
195
+ )
196
+ for n in ddd_contexts
197
+ ]
187
198
  bc_list = [
188
199
  BoundedContext(name=n, modules=module_files.get(n, []), confidence="high")
189
200
  for n in ddd_contexts
@@ -940,19 +940,51 @@ def _extract_python(path: str, source: str) -> FileContract:
940
940
 
941
941
 
942
942
  # ---------------------------------------------------------------------------
943
- # Minimal Java extraction (regex-based, no AST)
943
+ # ---------------------------------------------------------------------------
944
+ # Enhanced Java extraction (regex-based, annotation-aware)
944
945
  # ---------------------------------------------------------------------------
945
946
 
946
- _JAVA_CLASS_DECL_RE = re.compile(
947
- r'public\s+(?:(?:abstract|final|static)\s+)*(class|interface|enum)\s+(\w+)'
948
- r'(?:\s+extends\s+([\w.]+))?(?:\s+implements\s+([\w.,\s]+?))?(?=\s*[\{<])',
949
- re.MULTILINE,
947
+ _JAVA_IMPORT_RE = re.compile(r'^import\s+(?:static\s+)?([^;\s]+)\s*;', re.MULTILINE)
948
+
949
+ # Single annotation on one line (captures name + optional parens args)
950
+ _JAVA_ANNO_LINE_RE = re.compile(r'^\s*(@[\w.]+(?:\s*\([^)]*\))?)\s*$')
951
+ # Class/interface/enum declaration line (public or package-private)
952
+ _JAVA_CLASS_LINE_RE = re.compile(
953
+ r'(?:public\s+)?(?:(?:abstract|final|static|sealed)\s+)*'
954
+ r'(class|interface|enum|@interface)\s+(\w+)'
955
+ r'(?:\s+extends\s+([\w.]+))?'
956
+ r'(?:\s+implements\s+([\w.,\s<>]+?))?'
957
+ r'(?=\s*[\{<])'
950
958
  )
951
- _JAVA_METHOD_SIG_RE = re.compile(
952
- r'^\s{0,12}public\s+[^\{]+\(',
959
+ # Public method: up to 12 leading spaces, return type, name, open paren
960
+ _JAVA_PUB_METHOD_LINE_RE = re.compile(
961
+ r'^\s{0,12}public\s+(?:(?:static|final|synchronized|abstract|default)\s+)*'
962
+ r'[\w<>\[\]?,\s]+?\s+(\w+)\s*\(',
953
963
  re.MULTILINE,
954
964
  )
955
- _JAVA_IMPORT_RE = re.compile(r'^import\s+(?:static\s+)?([^;\s]+)\s*;', re.MULTILINE)
965
+ # @Autowired or @Inject field
966
+ _JAVA_FIELD_DECL_RE = re.compile(
967
+ r'^\s*(?:private|protected|public)?\s*'
968
+ r'([\w<>.,\[\]? ]+?)\s+(\w+)\s*[;=]'
969
+ )
970
+
971
+
972
+ def _java_collect_preceding_annotations(lines: list[str], decl_idx: int) -> list[str]:
973
+ """Walk back from decl_idx and collect @annotation lines immediately before it."""
974
+ annotations: list[str] = []
975
+ i = decl_idx - 1
976
+ while i >= 0:
977
+ stripped = lines[i].strip()
978
+ if not stripped or stripped.startswith("//") or stripped.startswith("*"):
979
+ i -= 1
980
+ continue
981
+ m = re.match(r'(@[\w.]+(?:\s*\([^)]*\))?)', stripped)
982
+ if m:
983
+ annotations.insert(0, m.group(1))
984
+ i -= 1
985
+ else:
986
+ break
987
+ return annotations
956
988
 
957
989
 
958
990
  def _extract_java(path: str, source: str) -> FileContract:
@@ -960,35 +992,93 @@ def _extract_java(path: str, source: str) -> FileContract:
960
992
  types: list[TypeDefinition] = []
961
993
  functions: list[FunctionSignature] = []
962
994
  imports: list[ImportRecord] = []
995
+ autowired_fields: list[dict] = []
963
996
 
964
- # Class / interface / enum declarations
965
- for m in _JAVA_CLASS_DECL_RE.finditer(source):
966
- name = m.group(2)
967
- extends_str = m.group(3)
968
- implements_str = m.group(4)
969
- all_extends: list[str] = []
970
- if extends_str:
971
- all_extends.append(extends_str.strip())
972
- if implements_str:
973
- all_extends.extend(i.strip() for i in implements_str.split(",") if i.strip())
974
- types.append(TypeDefinition(name=name, kind="class", fields=[], extends=all_extends))
975
- exports.append(ExportRecord(name=name, kind="class"))
976
-
977
- class_names = {t.name for t in types}
978
-
979
- # Public method signatures (one-line heuristic)
997
+ lines = source.splitlines()
998
+
999
+ # Pass 1: collect imports
1000
+ seen_sources: set[str] = set()
1001
+ for m in _JAVA_IMPORT_RE.finditer(source):
1002
+ full_import = m.group(1).strip()
1003
+ if full_import not in seen_sources:
1004
+ seen_sources.add(full_import)
1005
+ imports.append(ImportRecord(source=full_import, kind="named", symbols=[]))
1006
+
1007
+ # Pass 2: line-by-line scan for classes, methods, @Autowired fields
1008
+ class_names: set[str] = set()
980
1009
  seen_methods: set[str] = set()
981
- for m in _JAVA_METHOD_SIG_RE.finditer(source):
982
- sig_text = m.group(0).strip()
983
- name_match = re.search(r'(\w+)\s*\($', sig_text)
984
- if not name_match:
985
- name_match = re.search(r'(\w+)\s*\(', sig_text)
986
- if not name_match:
1010
+ autowired_pending = False
1011
+
1012
+ for idx, line in enumerate(lines):
1013
+ stripped = line.strip()
1014
+
1015
+ # Detect @Autowired / @Inject annotations
1016
+ if stripped.startswith("@Autowired") or stripped.startswith("@Inject"):
1017
+ autowired_pending = True
987
1018
  continue
988
- mname = name_match.group(1)
989
- if mname in class_names or mname in seen_methods or mname in {"if", "for", "while", "switch"}:
1019
+
1020
+ # Capture autowired field on next non-annotation, non-blank line
1021
+ if autowired_pending and stripped and not stripped.startswith("@"):
1022
+ autowired_pending = False
1023
+ fm = _JAVA_FIELD_DECL_RE.match(line)
1024
+ if fm:
1025
+ type_name = fm.group(1).strip().split()[-1] # last word = simple type
1026
+ field_name = fm.group(2).strip()
1027
+ if field_name and type_name and field_name not in {"class", "interface"}:
1028
+ autowired_fields.append({"type": type_name, "name": field_name})
1029
+ elif stripped and not stripped.startswith("@"):
1030
+ autowired_pending = False
1031
+
1032
+ # Class/interface/enum declaration
1033
+ cm = _JAVA_CLASS_LINE_RE.search(stripped)
1034
+ if cm and ("class " in stripped or "interface " in stripped or "enum " in stripped):
1035
+ kind_kw = cm.group(1) # class | interface | enum | @interface
1036
+ name = cm.group(2)
1037
+ extends_str = cm.group(3)
1038
+ implements_str = cm.group(4)
1039
+
1040
+ annotations = _java_collect_preceding_annotations(lines, idx)
1041
+ all_extends: list[str] = []
1042
+ if extends_str:
1043
+ all_extends.append(extends_str.strip())
1044
+
1045
+ implements_list: list[str] = []
1046
+ if implements_str:
1047
+ implements_list = [i.strip() for i in implements_str.split(",") if i.strip()]
1048
+
1049
+ export_kind = "class" if kind_kw in ("class", "@interface") else kind_kw
1050
+ exports.append(ExportRecord(
1051
+ name=name,
1052
+ kind=export_kind,
1053
+ annotations=annotations,
1054
+ extends=extends_str.strip() if extends_str else None,
1055
+ implements=implements_list,
1056
+ ))
1057
+ types.append(TypeDefinition(
1058
+ name=name,
1059
+ kind=export_kind,
1060
+ fields=[],
1061
+ extends=all_extends,
1062
+ ))
1063
+ class_names.add(name)
1064
+
1065
+ # Pass 3: public methods with their preceding annotations
1066
+ for m in _JAVA_PUB_METHOD_LINE_RE.finditer(source):
1067
+ sig_text = m.group(0).strip()
1068
+ mname = m.group(1)
1069
+ if (mname in class_names or mname in seen_methods
1070
+ or mname in {"if", "for", "while", "switch", "return", "new"}):
990
1071
  continue
991
1072
  seen_methods.add(mname)
1073
+ # Find line index for this match to collect preceding annotations
1074
+ line_start = source.count("\n", 0, m.start())
1075
+ annotations = _java_collect_preceding_annotations(lines, line_start)
1076
+ exports.append(ExportRecord(
1077
+ name=mname,
1078
+ kind="method",
1079
+ annotations=annotations,
1080
+ signature=sig_text,
1081
+ ))
992
1082
  functions.append(FunctionSignature(
993
1083
  name=mname,
994
1084
  signature=sig_text,
@@ -997,14 +1087,6 @@ def _extract_java(path: str, source: str) -> FileContract:
997
1087
  return_type=None,
998
1088
  ))
999
1089
 
1000
- # Import statements
1001
- seen_sources: set[str] = set()
1002
- for m in _JAVA_IMPORT_RE.finditer(source):
1003
- full_import = m.group(1).strip()
1004
- if full_import not in seen_sources:
1005
- seen_sources.add(full_import)
1006
- imports.append(ImportRecord(source=full_import, kind="named", symbols=[]))
1007
-
1008
1090
  # External deps: top-2 package segments, skip java.* / javax.*
1009
1091
  deps = sorted({
1010
1092
  ".".join(imp.source.split(".")[:2])
@@ -1021,6 +1103,7 @@ def _extract_java(path: str, source: str) -> FileContract:
1021
1103
  functions=sorted(functions, key=lambda f: f.name)[:20],
1022
1104
  types=sorted(types, key=lambda t: t.name),
1023
1105
  dependencies=deps[:20],
1106
+ autowired_fields=autowired_fields[:20],
1024
1107
  extraction_method="heuristic",
1025
1108
  )
1026
1109
 
@@ -1089,6 +1172,51 @@ def _detect_role(path: str, contract: FileContract) -> str:
1089
1172
  return "util"
1090
1173
 
1091
1174
 
1175
+ # ---------------------------------------------------------------------------
1176
+ # MyBatis XML mapper extractor
1177
+ # ---------------------------------------------------------------------------
1178
+
1179
+ def _extract_mybatis_xml(rel_path: str, source: str) -> FileContract:
1180
+ """Extract namespace and SQL operations from a MyBatis *Mapper.xml file."""
1181
+ import re as _re
1182
+ from xml.etree import ElementTree
1183
+
1184
+ _NS_STRIP = _re.compile(r"\{[^}]+\}")
1185
+ _SQL_OPS = frozenset({"select", "insert", "update", "delete"})
1186
+
1187
+ exports: list[ExportRecord] = []
1188
+ namespace: str | None = None
1189
+
1190
+ try:
1191
+ root_elem = ElementTree.fromstring(source.encode("utf-8"))
1192
+ namespace = root_elem.get("namespace") or None
1193
+ for elem in root_elem:
1194
+ tag = _NS_STRIP.sub("", elem.tag).lower()
1195
+ if tag in _SQL_OPS:
1196
+ op_id = (elem.get("id") or "").strip()
1197
+ if op_id:
1198
+ # type_ref carries select/insert/update/delete for the serializer
1199
+ exports.append(ExportRecord(kind="query", name=op_id, type_ref=tag))
1200
+ except Exception:
1201
+ return FileContract(
1202
+ path=rel_path,
1203
+ language="mybatis-xml",
1204
+ role="mybatis-mapper",
1205
+ extraction_method="heuristic",
1206
+ limitations=["xml_parse_error: failed to parse mapper XML"],
1207
+ )
1208
+
1209
+ deps = [f"namespace:{namespace}"] if namespace else []
1210
+ return FileContract(
1211
+ path=rel_path,
1212
+ language="mybatis-xml",
1213
+ role="mybatis-mapper",
1214
+ exports=exports,
1215
+ dependencies=deps,
1216
+ extraction_method="heuristic",
1217
+ )
1218
+
1219
+
1092
1220
  # ---------------------------------------------------------------------------
1093
1221
  # AstExtractor public class
1094
1222
  # ---------------------------------------------------------------------------
@@ -1108,6 +1236,16 @@ class AstExtractor:
1108
1236
  return self._ts_ok
1109
1237
 
1110
1238
  def extract(self, path: Path, root: Optional[Path] = None) -> Optional[FileContract]:
1239
+ # MyBatis mapper XML — handled before the language map lookup so .xml
1240
+ # files are only processed when they match the mapper naming convention.
1241
+ if path.suffix.lower() == ".xml" and path.name.endswith("Mapper.xml"):
1242
+ try:
1243
+ source = path.read_text(encoding="utf-8", errors="replace")
1244
+ except OSError:
1245
+ return None
1246
+ rel_path = str(path.relative_to(root)).replace("\\", "/") if root else path.name
1247
+ return _extract_mybatis_xml(rel_path, source)
1248
+
1111
1249
  ext = path.suffix.lower()
1112
1250
  language = _LANGUAGE_MAP.get(ext)
1113
1251
  if language is None:
sourcecode/cli.py CHANGED
@@ -728,15 +728,13 @@ def main(
728
728
  mode = "contract" # unknown → safe default
729
729
 
730
730
  # Legacy flags imply raw mode unless --mode was explicitly overridden.
731
- # These flags produce standard_view-only output sections not in contract_view.
732
- # Preserves backward compat: callers using any legacy flag get their previous format.
733
- # New callers opt into contract mode via --mode contract (or bare invocation).
734
- # Legacy flags that produce output sections incompatible with contract_view
735
- # force mode to raw. --agent is excluded: it now runs the contract pipeline
736
- # and enriches contract_view with auto-enabled analyzers (deps, env, notes).
731
+ # --format yaml and --graph-modules are now compatible with contract_view:
732
+ # yaml is a serialization format (not an output-section flag)
733
+ # graph-modules output is included in contract_view when available
734
+ # Other flags that produce sections exclusive to standard_view still force raw.
737
735
  _legacy_flags_active = (
738
- compact or tree or format == "yaml" or trace_pipeline
739
- or docs or semantics or graph_modules or full_metrics or architecture
736
+ compact or tree or trace_pipeline
737
+ or docs or semantics or full_metrics or architecture
740
738
  )
741
739
  if mode in ("contract", "standard") and _legacy_flags_active:
742
740
  mode = "raw"
@@ -1106,6 +1104,17 @@ def main(
1106
1104
  metrics_summary=metrics_summary,
1107
1105
  )
1108
1106
 
1107
+ # Populate Java-specific root fields from java stack detection (FIX-6, 7, 8)
1108
+ _java_stack = next((s for s in stacks if s.stack == "java"), None)
1109
+ if _java_stack is not None:
1110
+ from dataclasses import replace as _dc_replace
1111
+ sm = _dc_replace(sm,
1112
+ packaging=getattr(_java_stack, "packaging", None) or None,
1113
+ language_version=getattr(_java_stack, "language_version", None) or None,
1114
+ spring_profiles=getattr(_java_stack, "spring_profiles", []) or [],
1115
+ app_server_hint=getattr(_java_stack, "app_server_hint", None) or None,
1116
+ )
1117
+
1109
1118
  # Semantic analysis (--semantics flag)
1110
1119
  if semantic_analyzer is not None:
1111
1120
  if workspace_analysis.workspaces:
@@ -1402,7 +1411,16 @@ def main(
1402
1411
  if _is_contract_mode:
1403
1412
  from sourcecode.contract_pipeline import ContractPipeline
1404
1413
  from sourcecode.contract_model import ContractSummary as _ContractSummary
1405
- _cp = ContractPipeline()
1414
+ # FIX-1: Java projects need higher caps — many files, comprehensive coverage required
1415
+ _jvm_stacks = {"java", "kotlin", "scala", "groovy"}
1416
+ _is_jvm = any(s.stack in _jvm_stacks for s in sm.stacks)
1417
+ # FIX-1: Java projects need higher caps and no relevance threshold
1418
+ _max_files_cp = 2500 if _is_jvm else 500
1419
+ _cp = ContractPipeline(max_files=_max_files_cp)
1420
+ _java_pipeline_kwargs: dict = {}
1421
+ if _is_jvm:
1422
+ _java_pipeline_kwargs["max_contracts"] = 500
1423
+ _java_pipeline_kwargs["min_score"] = 0.0
1406
1424
  try:
1407
1425
  _contracts, _contract_summary = _cp.run(
1408
1426
  target,
@@ -1420,6 +1438,7 @@ def main(
1420
1438
  max_importers=max_importers,
1421
1439
  semantic_calls=sm.semantic_calls or None,
1422
1440
  code_notes=sm.code_notes or None,
1441
+ **_java_pipeline_kwargs,
1423
1442
  )
1424
1443
  except Exception as _exc:
1425
1444
  typer.echo(f"[error] contract pipeline failed: {_exc}", err=True)
@@ -1462,7 +1481,22 @@ def main(
1462
1481
  data = _contract_view(sm, emit_graph=emit_graph, depth=_depth)
1463
1482
  if not no_redact:
1464
1483
  data = redact_dict(data)
1465
- content = json.dumps(data, indent=2, ensure_ascii=False)
1484
+ if format == "yaml":
1485
+ from io import StringIO
1486
+ from ruamel.yaml import YAML as _YAML
1487
+ _yaml = _YAML()
1488
+ _yaml.default_flow_style = False
1489
+ _yaml.representer.add_representer(
1490
+ type(None),
1491
+ lambda dumper, data_val: dumper.represent_scalar(
1492
+ "tag:yaml.org,2002:null", "null"
1493
+ ),
1494
+ )
1495
+ _stream = StringIO()
1496
+ _yaml.dump(data, _stream)
1497
+ content = _stream.getvalue()
1498
+ else:
1499
+ content = json.dumps(data, indent=2, ensure_ascii=False)
1466
1500
  elif agent:
1467
1501
  data = agent_view(sm)
1468
1502
  # When contract pipeline ran (mode=contract, no legacy flags), include
@@ -25,9 +25,13 @@ class ExportRecord:
25
25
  """Exported symbol."""
26
26
 
27
27
  name: str
28
- kind: str = "unknown" # function | class | const | type | default | react_component | enum | interface
28
+ kind: str = "unknown" # function | class | const | type | default | react_component | enum | interface | method
29
29
  type_ref: Optional[str] = None
30
30
  async_: bool = False
31
+ annotations: list[str] = field(default_factory=list) # Java: ["@Controller", "@Transactional"]
32
+ extends: Optional[str] = None # Java: parent class
33
+ implements: list[str] = field(default_factory=list) # Java: interfaces
34
+ signature: Optional[str] = None # Java method: full signature
31
35
 
32
36
 
33
37
  @dataclass
@@ -96,6 +100,8 @@ class FileContract:
96
100
  # Extraction quality
97
101
  extraction_method: str = "heuristic" # ast | tree_sitter | heuristic
98
102
  limitations: list[str] = field(default_factory=list)
103
+ # Java-specific (FIX-1)
104
+ autowired_fields: list[dict] = field(default_factory=list) # [{"type": "...", "name": "..."}]
99
105
 
100
106
 
101
107
  @dataclass
@@ -182,6 +182,7 @@ class ContractPipeline:
182
182
  semantic_calls: Optional[list] = None,
183
183
  code_notes: Optional[list] = None,
184
184
  max_contracts: Optional[int] = _MAX_CONTRACTS,
185
+ min_score: Optional[float] = None,
185
186
  ) -> tuple[list[FileContract], ContractSummary]:
186
187
  """Run the full extraction pipeline.
187
188
 
@@ -218,9 +219,18 @@ class ContractPipeline:
218
219
  fname = Path(pn).name
219
220
  return any(fname.startswith(pat) or f".{pat.strip('.')}" in fname for pat in _TEST_PATTERNS)
220
221
 
222
+ def _is_extractable(p: str) -> bool:
223
+ suf = Path(p).suffix.lower()
224
+ if suf in _SRC_EXTENSIONS:
225
+ return True
226
+ # MyBatis mapper XML files — only *Mapper.xml, not all XML
227
+ if suf == ".xml" and p.endswith("Mapper.xml"):
228
+ return True
229
+ return False
230
+
221
231
  src_paths = [
222
232
  p for p in file_paths
223
- if Path(p).suffix.lower() in _SRC_EXTENSIONS
233
+ if _is_extractable(p)
224
234
  and not scorer.is_noise(p)
225
235
  and (symbol is not None or changed_only or not _is_test(p))
226
236
  ]
@@ -317,11 +327,17 @@ class ContractPipeline:
317
327
  # 10. Top-N cap — enforce max_contracts when not in symbol-search mode.
318
328
  # Symbol searches must return all matching files; budget applies only to
319
329
  # the default architectural briefing use case.
330
+ _effective_min_score = min_score if min_score is not None else _MIN_CONTRACT_SCORE
320
331
  if symbol is None and max_contracts is not None:
321
332
  contracts = [
322
333
  c for c in contracts
323
- if c.relevance_score >= _MIN_CONTRACT_SCORE or c.is_entrypoint
334
+ if c.relevance_score >= _effective_min_score or c.is_entrypoint
324
335
  ][:max_contracts]
336
+ elif symbol is None and max_contracts is None:
337
+ contracts = [
338
+ c for c in contracts
339
+ if c.relevance_score >= _effective_min_score or c.is_entrypoint
340
+ ]
325
341
 
326
342
  # 11. Compress types if requested
327
343
  if compress_types:
@@ -128,6 +128,8 @@ def _infer_role(name: str, ecosystem: str, scope: str) -> str:
128
128
  return "runtime"
129
129
 
130
130
  if ecosystem == "java":
131
+ if scope == "provided":
132
+ return "provided"
131
133
  artifact = n.split(":")[-1] if ":" in n else n
132
134
  if any(x in artifact for x in ("spring-boot", "spring-security")):
133
135
  return "runtime"
@@ -1120,6 +1122,23 @@ class DependencyAnalyzer:
1120
1122
  properties = self._parse_maven_properties(root_elem, ns)
1121
1123
  dm_versions = self._parse_dependency_management(root_elem, ns, properties)
1122
1124
 
1125
+ # FIX-9: extract parent version for BOM resolution
1126
+ parent_elem = root_elem.find(f"{ns}parent")
1127
+ parent_version: Optional[str] = None
1128
+ parent_group: str = ""
1129
+ if parent_elem is not None:
1130
+ parent_version = (parent_elem.findtext(f"{ns}version") or "").strip() or None
1131
+ parent_group = (parent_elem.findtext(f"{ns}groupId") or "").strip()
1132
+ parent_artifact = (parent_elem.findtext(f"{ns}artifactId") or "").strip()
1133
+ # Propagate parent version into properties for ${project.parent.version}
1134
+ if parent_version:
1135
+ properties.setdefault("project.parent.version", parent_version)
1136
+ properties.setdefault("revision", parent_version)
1137
+
1138
+ # Infer packaging for FIX-6 (used by scope hint for provided)
1139
+ packaging_elem = root_elem.find(f"{ns}packaging")
1140
+ is_war = packaging_elem is not None and (packaging_elem.text or "").strip().lower() == "war"
1141
+
1123
1142
  records: list[DependencyRecord] = []
1124
1143
  deps_elem = root_elem.find(f"{ns}dependencies")
1125
1144
  if deps_elem is None:
@@ -1134,14 +1153,36 @@ class DependencyAnalyzer:
1134
1153
  declared = self._resolve_maven_version(version_raw, properties)
1135
1154
  if declared is None:
1136
1155
  declared = dm_versions.get(f"{group_id}:{artifact_id}")
1156
+
1157
+ # FIX-4: proper maven scope mapping
1137
1158
  scope_text = (dep.findtext(f"{ns}scope") or "compile").strip().lower()
1138
- scope = "dev" if scope_text == "test" else "direct"
1159
+ if scope_text == "test":
1160
+ scope = "dev"
1161
+ elif scope_text == "provided":
1162
+ scope = "provided"
1163
+ else:
1164
+ scope = "direct" # compile, runtime, system, import
1165
+
1166
+ # FIX-4: infer provided for embedded tomcat in WAR projects
1167
+ if (is_war and scope == "direct"
1168
+ and artifact_id in ("spring-boot-starter-tomcat", "tomcat-embed-core")):
1169
+ scope = "provided"
1170
+
1171
+ # FIX-9: resolve BOM version for Spring Boot / Spring Security starters
1172
+ resolved_version: Optional[str] = None
1173
+ if declared is None and parent_version:
1174
+ if group_id == "org.springframework.boot":
1175
+ resolved_version = parent_version
1176
+ elif group_id == "org.springframework.security" and "spring-security.version" in properties:
1177
+ resolved_version = properties["spring-security.version"]
1178
+
1139
1179
  records.append(
1140
1180
  DependencyRecord(
1141
1181
  name=f"{group_id}:{artifact_id}",
1142
1182
  ecosystem="java",
1143
1183
  scope=scope,
1144
1184
  declared_version=declared,
1185
+ resolved_version=resolved_version,
1145
1186
  source="manifest",
1146
1187
  manifest_path="pom.xml",
1147
1188
  )
@@ -1150,6 +1191,18 @@ class DependencyAnalyzer:
1150
1191
  limitations: list[str] = []
1151
1192
  if not records:
1152
1193
  limitations.append("java: pom.xml sin dependencias parseables (puede usar BOM o propiedades)")
1194
+
1195
+ # Warn when Spring Boot BOM manages transitive deps — they can't be resolved statically.
1196
+ parent_artifact_local = (
1197
+ root_elem.findtext(f"{ns}parent/{ns}artifactId") or ""
1198
+ ).strip() if parent_elem is not None else ""
1199
+ if parent_artifact_local == "spring-boot-starter-parent" and parent_version:
1200
+ limitations.append(
1201
+ f"spring_boot_bom_detected: transitive deps managed by Spring Boot BOM "
1202
+ f"v{parent_version}, not resolved statically. "
1203
+ "Run 'mvn dependency:tree' for the full transitive tree."
1204
+ )
1205
+
1153
1206
  return records, limitations
1154
1207
 
1155
1208
  def _analyze_gradle(self, root: Path) -> tuple[list[DependencyRecord], list[str]]:
@@ -14,9 +14,11 @@ from sourcecode.detectors.parsers import read_text_lines, unique_strings
14
14
  from sourcecode.schema import FrameworkDetection
15
15
  from sourcecode.tree_utils import flatten_file_tree
16
16
 
17
+ _NS_TAG_RE = re.compile(r"\{[^}]+\}")
18
+
17
19
  _MAX_FILE_SIZE = 256 * 1024 # 256 KB
18
20
  _MAX_JAVA_ENTRY_SCAN = 1000
19
- _MAX_ANNOTATION_ENTRY_POINTS = 500
21
+ _MAX_ANNOTATION_ENTRY_POINTS = 1000
20
22
 
21
23
  _REST_CONTROLLER_RE = re.compile(r'@RestController\b')
22
24
  _MVC_CONTROLLER_RE = re.compile(r'@Controller\b')
@@ -50,14 +52,34 @@ class JavaDetector(AbstractDetector):
50
52
  def detect(self, context: DetectionContext) -> tuple[list[StackDetection], list[EntryPoint]]:
51
53
  frameworks: list[FrameworkDetection] = []
52
54
  manifests: list[str] = []
55
+ language_version: str | None = None
56
+ packaging: str | None = None
57
+ app_server_hint: str | None = None
58
+ spring_profiles: list[str] = []
53
59
 
54
60
  if "pom.xml" in context.manifests:
55
61
  manifests.append("pom.xml")
56
- frameworks.extend(self._frameworks_from_pom(context.root / "pom.xml"))
62
+ pom_path = context.root / "pom.xml"
63
+ frameworks.extend(self._frameworks_from_pom(pom_path))
64
+ meta = self._parse_pom_metadata(pom_path)
65
+ if meta.get("language_version"):
66
+ language_version = meta["language_version"]
67
+ if meta.get("packaging"):
68
+ packaging = meta["packaging"]
57
69
  if "build.gradle" in context.manifests:
58
70
  manifests.append("build.gradle")
59
71
  frameworks.extend(self._frameworks_from_gradle(context.root / "build.gradle"))
60
72
 
73
+ # Detect app server from descriptor files
74
+ all_paths = flatten_file_tree(context.file_tree)
75
+ if any("weblogic.xml" in p or "weblogic-ejb-jar.xml" in p for p in all_paths):
76
+ app_server_hint = "weblogic"
77
+ elif any("wildfly" in p.lower() or "jboss" in p.lower() for p in all_paths):
78
+ app_server_hint = "wildfly"
79
+
80
+ # Spring profiles — check src/main/options/, src/main/resources/
81
+ spring_profiles = self._detect_spring_profiles(context.root, all_paths)
82
+
61
83
  entry_points = self._collect_entry_points(context)
62
84
  stack = StackDetection(
63
85
  stack="java",
@@ -65,27 +87,124 @@ class JavaDetector(AbstractDetector):
65
87
  confidence="high",
66
88
  frameworks=self._dedupe_frameworks(frameworks),
67
89
  manifests=manifests,
90
+ language_version=language_version,
91
+ packaging=packaging,
92
+ app_server_hint=app_server_hint,
93
+ spring_profiles=spring_profiles,
68
94
  )
69
95
  return [stack], entry_points
70
96
 
97
+ def _parse_pom_metadata(self, path: Path) -> dict:
98
+ """Extract packaging, java version from pom.xml properties/parent."""
99
+ result: dict = {}
100
+ try:
101
+ tree = ElementTree.parse(path)
102
+ except (OSError, ElementTree.ParseError):
103
+ return result
104
+ root = tree.getroot()
105
+ ns_match = _NS_TAG_RE.match(root.tag)
106
+ ns = ns_match.group(0) if ns_match else ""
107
+
108
+ # Packaging (FIX-6)
109
+ packaging_elem = root.find(f"{ns}packaging")
110
+ if packaging_elem is not None and packaging_elem.text:
111
+ result["packaging"] = packaging_elem.text.strip().lower()
112
+
113
+ # Properties
114
+ props_elem = root.find(f"{ns}properties")
115
+ props: dict[str, str] = {}
116
+ if props_elem is not None:
117
+ for prop in props_elem:
118
+ tag = prop.tag.replace(ns, "") if ns else prop.tag
119
+ if prop.text:
120
+ props[tag] = prop.text.strip()
121
+
122
+ # Java version (FIX-7) — check properties first, then compiler plugin
123
+ for key in ("maven.compiler.source", "java.version", "maven.compiler.release"):
124
+ if key in props:
125
+ result["language_version"] = props[key]
126
+ break
127
+ if "language_version" not in result:
128
+ # Check maven-compiler-plugin configuration
129
+ for plugin in root.findall(f".//{ns}plugin"):
130
+ artifact = (plugin.findtext(f"{ns}artifactId") or "").strip()
131
+ if artifact == "maven-compiler-plugin":
132
+ config = plugin.find(f"{ns}configuration")
133
+ if config is not None:
134
+ for tag in ("source", "release"):
135
+ val = config.findtext(f"{ns}{tag}")
136
+ if val:
137
+ result["language_version"] = val.strip()
138
+ break
139
+ break
140
+
141
+ return result
142
+
143
+ def _detect_spring_profiles(self, root: Path, all_paths: list[str]) -> list[str]:
144
+ """Detect Spring profiles from option/resource directories and application-{profile}.yml."""
145
+ profiles: list[str] = []
146
+ seen: set[str] = set()
147
+
148
+ # Pattern 1: src/main/options/{profile}/ directories
149
+ _PROFILE_DIRS = ("src/main/options/", "src/main/resources/")
150
+ for path in all_paths:
151
+ for prefix in _PROFILE_DIRS:
152
+ if path.startswith(prefix):
153
+ remainder = path[len(prefix):]
154
+ parts = remainder.split("/")
155
+ if len(parts) >= 1 and parts[0] and not parts[0].startswith("."):
156
+ candidate = parts[0]
157
+ # Only if it's a directory (has sub-paths) with application.yml
158
+ if candidate not in seen and not candidate.endswith(".yml") and not candidate.endswith(".yaml") and not candidate.endswith(".properties"):
159
+ seen.add(candidate)
160
+ profiles.append(candidate)
161
+ break
162
+
163
+ # Pattern 2: application-{profile}.yml files
164
+ _APP_PROFILE_RE = re.compile(r"application-([A-Za-z0-9_-]+)\.ya?ml$")
165
+ for path in all_paths:
166
+ m = _APP_PROFILE_RE.search(path)
167
+ if m:
168
+ profile = m.group(1)
169
+ if profile not in seen:
170
+ seen.add(profile)
171
+ profiles.append(profile)
172
+
173
+ # Filter out generic names that aren't profiles
174
+ _SKIP = frozenset({"test", "it", "integration"})
175
+ return [p for p in profiles if p.lower() not in _SKIP]
176
+
71
177
  def _frameworks_from_pom(self, path: Path) -> list[FrameworkDetection]:
72
178
  try:
73
179
  tree = ElementTree.parse(path)
74
180
  except (OSError, ElementTree.ParseError):
75
181
  return []
76
- text = ElementTree.tostring(tree.getroot(), encoding="unicode").lower()
77
- return self._detect_jvm_frameworks(text, "pom.xml")
182
+ root_elem = tree.getroot()
183
+ ns_match = _NS_TAG_RE.match(root_elem.tag)
184
+ ns = ns_match.group(0) if ns_match else ""
185
+
186
+ # Extract Spring Boot version from <parent> (FIX-3)
187
+ sb_version: str | None = None
188
+ parent_elem = root_elem.find(f"{ns}parent")
189
+ if parent_elem is not None:
190
+ parent_artifact = (parent_elem.findtext(f"{ns}artifactId") or "").strip()
191
+ if parent_artifact == "spring-boot-starter-parent":
192
+ sb_version = (parent_elem.findtext(f"{ns}version") or "").strip() or None
193
+
194
+ text = ElementTree.tostring(root_elem, encoding="unicode").lower()
195
+ frameworks = self._detect_jvm_frameworks(text, "pom.xml", sb_version=sb_version)
196
+ return frameworks
78
197
 
79
198
  def _frameworks_from_gradle(self, path: Path) -> list[FrameworkDetection]:
80
199
  content = "\n".join(read_text_lines(path)).lower()
81
200
  return self._detect_jvm_frameworks(content, "build.gradle")
82
201
 
83
- def _detect_jvm_frameworks(self, text: str, source: str) -> list[FrameworkDetection]:
202
+ def _detect_jvm_frameworks(self, text: str, source: str, *, sb_version: str | None = None) -> list[FrameworkDetection]:
84
203
  frameworks: list[FrameworkDetection] = []
85
204
  if "com.android.application" in text or "com.android.library" in text:
86
205
  frameworks.append(FrameworkDetection(name="Android", source=source))
87
206
  if "spring-boot" in text:
88
- frameworks.append(FrameworkDetection(name="Spring Boot", source=source))
207
+ frameworks.append(FrameworkDetection(name="Spring Boot", source=source, version=sb_version))
89
208
  if "spring-webmvc" in text or "spring-web" in text:
90
209
  frameworks.append(FrameworkDetection(name="Spring MVC", source=source))
91
210
  if "spring-webflux" in text:
@@ -117,10 +236,12 @@ class JavaDetector(AbstractDetector):
117
236
  ]
118
237
 
119
238
  # 2. Annotation-based scan: @RestController, @WebFilter, FilterRegistrationBean
120
- scan_candidates = [
121
- p for p in all_java
122
- if "/test/" not in p and "/tests/" not in p
123
- ][:_MAX_JAVA_ENTRY_SCAN]
239
+ # Prioritize Controller-named files so all REST controllers are detected
240
+ # even in large codebases where total files > _MAX_JAVA_ENTRY_SCAN.
241
+ _non_test = [p for p in all_java if "/test/" not in p and "/tests/" not in p]
242
+ _ctrl_files = [p for p in _non_test if "Controller" in p]
243
+ _other_files = [p for p in _non_test if "Controller" not in p]
244
+ scan_candidates = _ctrl_files + _other_files[:max(0, _MAX_JAVA_ENTRY_SCAN - len(_ctrl_files))]
124
245
 
125
246
  annotation_eps: list[EntryPoint] = []
126
247
  for rel_path in scan_candidates:
@@ -100,7 +100,13 @@ class ProjectDetector:
100
100
  detection_method=stack.detection_method,
101
101
  confidence=stack.confidence,
102
102
  frameworks=[
103
- FrameworkDetection(name=framework.name, source=framework.source)
103
+ FrameworkDetection(
104
+ name=framework.name,
105
+ source=framework.source,
106
+ confidence=framework.confidence,
107
+ detected_via=list(framework.detected_via),
108
+ version=framework.version,
109
+ )
104
110
  for framework in stack.frameworks
105
111
  ],
106
112
  package_manager=stack.package_manager,
@@ -110,6 +116,11 @@ class ProjectDetector:
110
116
  workspace=stack.workspace,
111
117
  signals=list(stack.signals),
112
118
  produced_by=stack.produced_by,
119
+ # Java-specific fields
120
+ language_version=stack.language_version,
121
+ packaging=stack.packaging,
122
+ app_server_hint=stack.app_server_hint,
123
+ spring_profiles=list(stack.spring_profiles),
113
124
  )
114
125
 
115
126
  def _merge_stack(self, current: StackDetection, incoming: StackDetection) -> StackDetection:
@@ -124,6 +135,15 @@ class ProjectDetector:
124
135
  elif self._confidence_rank(incoming.confidence) == self._confidence_rank(current.confidence):
125
136
  if current.detection_method == "heuristic" and incoming.detection_method != "heuristic":
126
137
  current.detection_method = incoming.detection_method
138
+ # Java-specific: propagate from incoming if not already set
139
+ if incoming.language_version and not current.language_version:
140
+ current.language_version = incoming.language_version
141
+ if incoming.packaging and not current.packaging:
142
+ current.packaging = incoming.packaging
143
+ if incoming.app_server_hint and not current.app_server_hint:
144
+ current.app_server_hint = incoming.app_server_hint
145
+ if incoming.spring_profiles and not current.spring_profiles:
146
+ current.spring_profiles = list(incoming.spring_profiles)
127
147
  return current
128
148
 
129
149
  def _merge_frameworks(
@@ -133,10 +153,18 @@ class ProjectDetector:
133
153
  ) -> list[FrameworkDetection]:
134
154
  merged: dict[str, FrameworkDetection] = {}
135
155
  for framework in list(current) + list(incoming):
136
- merged.setdefault(
137
- framework.name,
138
- FrameworkDetection(name=framework.name, source=framework.source),
139
- )
156
+ if framework.name not in merged:
157
+ merged[framework.name] = FrameworkDetection(
158
+ name=framework.name,
159
+ source=framework.source,
160
+ confidence=framework.confidence,
161
+ detected_via=list(framework.detected_via),
162
+ version=framework.version,
163
+ )
164
+ elif framework.version and not merged[framework.name].version:
165
+ # Preserve version from whichever source has it
166
+ from dataclasses import replace as _fr_replace
167
+ merged[framework.name] = _fr_replace(merged[framework.name], version=framework.version)
140
168
  return list(merged.values())
141
169
 
142
170
  def _merge_manifests(self, current: Iterable[str], incoming: Iterable[str]) -> list[str]:
@@ -47,6 +47,11 @@ def infer_package_manager(stack: str, file_tree: dict[str, object]) -> str | Non
47
47
  return "composer"
48
48
  if stack == "ruby" and "Gemfile.lock" in flat_paths:
49
49
  return "bundler"
50
+ if stack == "java":
51
+ if "pom.xml" in flat_paths:
52
+ return "maven"
53
+ if any(p == "build.gradle" or p == "build.gradle.kts" for p in flat_paths):
54
+ return "gradle"
50
55
  if stack == "terraform" and any(path.endswith(".tf") for path in flat_paths):
51
56
  return "terraform"
52
57
  return None
@@ -1016,82 +1016,35 @@ class GraphAnalyzer:
1016
1016
  type_map: dict[str, tuple[str, str]],
1017
1017
  workspace: str | None,
1018
1018
  ) -> tuple[list[GraphNode], list[GraphEdge], list[str]]:
1019
+ """Extract module-to-module import edges for JVM files.
1020
+
1021
+ Class/function nodes are intentionally skipped: for large Java codebases
1022
+ (2000+ files) creating one node per class exhausts the node budget before
1023
+ most import edges are processed, resulting in almost no graph edges.
1024
+ Skipping class nodes keeps one module node per file and allows the full
1025
+ import graph to be built within the budget.
1026
+ """
1019
1027
  module_node_id = f"module:{relative_path}"
1020
- language = self._language_for_suffix(Path(relative_path).suffix)
1021
1028
  nodes: list[GraphNode] = []
1022
1029
  edges: list[GraphEdge] = []
1023
1030
  limitations: list[str] = []
1024
- imported_type_map = {
1025
- imported.split(".")[-1]: imported
1026
- for imported in re.findall(r"(?m)^\s*import\s+([A-Za-z0-9_.]+)", content)
1027
- }
1028
1031
 
1029
- class_pattern = re.compile(
1030
- r"(?m)^\s*(?:public\s+|private\s+|protected\s+|internal\s+)?"
1031
- r"(?:class|interface|object|trait)\s+([A-Za-z_][A-Za-z0-9_]*)"
1032
- r"(?:\s+extends\s+([A-Za-z_][A-Za-z0-9_.]*))?"
1033
- )
1034
- local_class_ids: dict[str, str] = {}
1035
- for match in class_pattern.finditer(content):
1036
- name = match.group(1)
1037
- base = match.group(2)
1038
- node_id = f"class:{relative_path}:{name}"
1039
- local_class_ids[name] = node_id
1040
- nodes.append(
1041
- GraphNode(
1042
- id=node_id,
1043
- kind="class",
1044
- language=language,
1045
- path=relative_path,
1046
- symbol=name,
1047
- display_name=name,
1048
- workspace=workspace,
1049
- )
1050
- )
1051
- edges.append(
1052
- GraphEdge(
1053
- source=module_node_id,
1054
- target=node_id,
1055
- kind="contains",
1056
- confidence="medium",
1057
- method="heuristic",
1058
- )
1059
- )
1060
- if base:
1061
- base_short = base.split(".")[-1]
1062
- if base_short in local_class_ids:
1063
- edges.append(
1064
- GraphEdge(
1065
- source=node_id,
1066
- target=local_class_ids[base_short],
1067
- kind="extends",
1068
- confidence="low",
1069
- method="heuristic",
1070
- )
1071
- )
1072
- else:
1073
- imported_base = imported_type_map.get(base_short)
1074
- fqcn = imported_base or base
1075
- mapping = type_map.get(fqcn)
1076
- if mapping is None:
1077
- continue
1078
- base_path, _base_name = mapping
1079
- edges.append(
1080
- GraphEdge(
1081
- source=node_id,
1082
- target=f"class:{base_path}:{base_short}",
1083
- kind="extends",
1084
- confidence="low",
1085
- method="heuristic",
1086
- )
1087
- )
1032
+ # Extract imports, skipping static imports and wildcard imports
1033
+ # Pattern includes optional 'static' keyword; \w at end excludes wildcards
1034
+ _import_re = re.compile(r"(?m)^\s*import\s+(?:static\s+)?([A-Za-z][A-Za-z0-9_.]*\w)\s*;")
1035
+ imported_fqcns: set[str] = set()
1036
+ for m in _import_re.finditer(content):
1037
+ imported_fqcns.add(m.group(1))
1088
1038
 
1089
- for imported in imported_type_map.values():
1090
- mapping = type_map.get(imported)
1039
+ seen_targets: set[str] = set()
1040
+ for fqcn in imported_fqcns:
1041
+ mapping = type_map.get(fqcn)
1091
1042
  if mapping is None:
1092
- limitations.append(f"jvm_unresolved:{relative_path}:{imported}")
1093
1043
  continue
1094
1044
  target_path, _symbol = mapping
1045
+ if target_path == relative_path or target_path in seen_targets:
1046
+ continue
1047
+ seen_targets.add(target_path)
1095
1048
  edges.append(
1096
1049
  GraphEdge(
1097
1050
  source=module_node_id,
sourcecode/schema.py CHANGED
@@ -45,6 +45,7 @@ class FrameworkDetection:
45
45
  source: str = "manifest"
46
46
  confidence: Literal["high", "medium", "low"] = "high"
47
47
  detected_via: list[str] = field(default_factory=list)
48
+ version: Optional[str] = None # e.g. "2.7.18" for Spring Boot
48
49
 
49
50
 
50
51
  @dataclass
@@ -62,6 +63,11 @@ class StackDetection:
62
63
  workspace: Optional[str] = None
63
64
  signals: list[str] = field(default_factory=list)
64
65
  produced_by: Optional[str] = None # which detector emitted this
66
+ # Java-specific fields (FIX-6, FIX-7)
67
+ language_version: Optional[str] = None # e.g. "1.8" from maven.compiler.source
68
+ packaging: Optional[str] = None # e.g. "war" | "jar"
69
+ app_server_hint: Optional[str] = None # e.g. "weblogic" | "wildfly"
70
+ spring_profiles: list[str] = field(default_factory=list) # detected Spring profiles
65
71
 
66
72
 
67
73
  @dataclass
@@ -656,3 +662,8 @@ class SourceMap:
656
662
  # AST contract mode (v0.33.0) — populated when --mode contract|hybrid
657
663
  file_contracts: list[Any] = field(default_factory=list) # list[FileContract]
658
664
  contract_summary: Optional[Any] = None # ContractSummary
665
+ # Java-specific root fields (v1.3.0) — additive, null/empty for non-Java projects
666
+ packaging: Optional[str] = None # "war" | "jar"
667
+ language_version: Optional[str] = None # "1.8" | "11" | "17"
668
+ spring_profiles: list[str] = field(default_factory=list)
669
+ app_server_hint: Optional[str] = None # "weblogic" | "wildfly" | "tomcat"
sourcecode/serializer.py CHANGED
@@ -1056,6 +1056,16 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
1056
1056
  if ep_groups["auxiliary"]:
1057
1057
  result["auxiliary_entry_points"] = ep_groups["auxiliary"][:_EP_DEV_CAP]
1058
1058
 
1059
+ # Java-specific root fields (FIX-6, FIX-7, FIX-8)
1060
+ if getattr(sm, "packaging", None):
1061
+ result["packaging"] = sm.packaging
1062
+ if getattr(sm, "language_version", None):
1063
+ result["language_version"] = sm.language_version
1064
+ if getattr(sm, "spring_profiles", None):
1065
+ result["spring_profiles"] = sm.spring_profiles
1066
+ if getattr(sm, "app_server_hint", None):
1067
+ result["app_server_hint"] = sm.app_server_hint
1068
+
1059
1069
  # Layer B — signals (only when the corresponding analyzer ran)
1060
1070
  if sm.dependency_summary is not None and sm.dependency_summary.requested:
1061
1071
  dep_dict = asdict(sm.dependency_summary)
@@ -1196,6 +1206,20 @@ def _contract_view_minimal(
1196
1206
  "project": project,
1197
1207
  }
1198
1208
 
1209
+ # Full stacks list (needed for version checks in smoke tests)
1210
+ if sm.stacks:
1211
+ result["stacks"] = [asdict(s) for s in sm.stacks]
1212
+
1213
+ # Java-specific root fields
1214
+ if getattr(sm, "packaging", None):
1215
+ result["packaging"] = sm.packaging
1216
+ if getattr(sm, "language_version", None):
1217
+ result["language_version"] = sm.language_version
1218
+ if getattr(sm, "spring_profiles", None):
1219
+ result["spring_profiles"] = sm.spring_profiles
1220
+ if getattr(sm, "app_server_hint", None):
1221
+ result["app_server_hint"] = sm.app_server_hint
1222
+
1199
1223
  # Per-file contracts
1200
1224
  if contracts:
1201
1225
  serialized: list[dict[str, Any]] = []
@@ -1295,6 +1319,15 @@ def _contract_view_minimal(
1295
1319
  if sm.analysis_gaps:
1296
1320
  result["analysis_gaps"] = [asdict(g) for g in sm.analysis_gaps]
1297
1321
 
1322
+ # Module graph — included when --graph-modules was requested
1323
+ if sm.module_graph is not None and sm.module_graph_summary is not None and sm.module_graph_summary.requested:
1324
+ result["module_graph"] = {
1325
+ "nodes": [asdict(n) for n in sm.module_graph.nodes],
1326
+ "edges": [asdict(e) for e in sm.module_graph.edges],
1327
+ "summary": asdict(sm.module_graph_summary),
1328
+ }
1329
+ result["module_graph_summary"] = asdict(sm.module_graph_summary)
1330
+
1298
1331
  return result
1299
1332
 
1300
1333
 
@@ -1377,8 +1410,60 @@ _MAX_FN_PER_CONTRACT = 5 # max function signatures per contract (token budget)
1377
1410
  _MAX_SIG_LEN = 60 # max chars per compressed signature
1378
1411
 
1379
1412
 
1413
+ def _serialize_contract_java(c: Any) -> dict[str, Any]:
1414
+ """Java-specific contract serializer with full field names and annotations."""
1415
+ item: dict[str, Any] = {"path": c.path, "language": "java"}
1416
+
1417
+ exports_out: list[dict] = []
1418
+ for e in c.exports:
1419
+ entry: dict = {"kind": e.kind, "name": e.name}
1420
+ if getattr(e, "annotations", None):
1421
+ entry["annotations"] = e.annotations
1422
+ if getattr(e, "extends", None):
1423
+ entry["extends"] = e.extends
1424
+ if getattr(e, "implements", None) and e.implements:
1425
+ entry["implements"] = e.implements
1426
+ if getattr(e, "signature", None):
1427
+ entry["signature"] = e.signature
1428
+ exports_out.append(entry)
1429
+ if exports_out:
1430
+ item["exports"] = exports_out
1431
+
1432
+ if c.imports:
1433
+ item["imports"] = [imp.source for imp in c.imports[:20]]
1434
+
1435
+ autowired = getattr(c, "autowired_fields", [])
1436
+ if autowired:
1437
+ item["autowired_fields"] = autowired
1438
+
1439
+ return item
1440
+
1441
+
1442
+ def _serialize_contract_mybatis_xml(c: Any) -> dict[str, Any]:
1443
+ """Serialize a MyBatis *Mapper.xml contract."""
1444
+ item: dict[str, Any] = {"path": c.path, "language": "mybatis-xml"}
1445
+ # Extract namespace stored as "namespace:<fqn>" in dependencies
1446
+ for dep in (c.dependencies or []):
1447
+ if dep.startswith("namespace:"):
1448
+ item["namespace"] = dep[len("namespace:"):]
1449
+ break
1450
+ exports_out: list[dict] = []
1451
+ for e in c.exports:
1452
+ entry: dict = {"kind": e.kind, "name": e.name}
1453
+ if getattr(e, "type_ref", None):
1454
+ entry["type"] = e.type_ref
1455
+ exports_out.append(entry)
1456
+ if exports_out:
1457
+ item["exports"] = exports_out
1458
+ return item
1459
+
1460
+
1380
1461
  def _serialize_contract_minimal(c: Any) -> dict[str, Any]:
1381
1462
  """Serialize one FileContract to minimal format."""
1463
+ if getattr(c, "language", None) == "java":
1464
+ return _serialize_contract_java(c)
1465
+ if getattr(c, "language", None) == "mybatis-xml":
1466
+ return _serialize_contract_mybatis_xml(c)
1382
1467
  item: dict[str, Any] = {"path": c.path, "role": c.role}
1383
1468
 
1384
1469
  if c.is_changed:
@@ -1496,6 +1581,11 @@ def _contract_view_standard(
1496
1581
  if contracts:
1497
1582
  serialized: list[dict[str, Any]] = []
1498
1583
  for c in contracts:
1584
+ if getattr(c, "language", None) == "mybatis-xml":
1585
+ item = _serialize_contract_mybatis_xml(c)
1586
+ item["relevance_score"] = round(c.relevance_score, 3)
1587
+ serialized.append(item)
1588
+ continue
1499
1589
  item: dict[str, Any] = {
1500
1590
  "path": c.path,
1501
1591
  "language": c.language,
@@ -1545,7 +1635,7 @@ def _contract_view_standard(
1545
1635
  item["ranking_reasons"] = non_trivial
1546
1636
  item["method"] = c.extraction_method
1547
1637
  serialized.append(item)
1548
- result["file_contracts"] = serialized
1638
+ result["contracts"] = serialized
1549
1639
 
1550
1640
  # Optional analysis sections (deep mode or when analyzers ran)
1551
1641
  if include_optional:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.3.0
3
+ Version: 1.5.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -1,24 +1,24 @@
1
- sourcecode/__init__.py,sha256=OFSkiae_6W3SLXW8WqBF-PGOUxC4FhKuxuKQDTuCuvc,102
1
+ sourcecode/__init__.py,sha256=81eXvIk2uxc6dFJg9ND6pJ-xePSGbA0ZUJgr1h_bZ48,102
2
2
  sourcecode/adaptive_scanner.py,sha256=6dh34C2qZXyRbw-8xBhbEwDdXanM6CRFRWayVoYITnA,10190
3
- sourcecode/architecture_analyzer.py,sha256=qzDW3_lQv__czQ-qs6AqEEoMvTfhfp7M7kNslPuQy7A,32128
3
+ sourcecode/architecture_analyzer.py,sha256=oPmGPf9_p6y8Z7SIGHEu57nVYwxycIovDzBvlHl-l5k,32557
4
4
  sourcecode/architecture_summary.py,sha256=J9yoLgh8wXwIRrT6q6JooB6PekivbOEYpJz4BUXdalk,20545
5
- sourcecode/ast_extractor.py,sha256=zvHeO-w0evdS8EAJhwlK7hhrWMGN-lnHm6XFdZlaST8,44389
5
+ sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
6
6
  sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
7
- sourcecode/cli.py,sha256=0g7U2s1pyoTqFc7ne62Z305bErRKuHuKGyN0VrlsSOk,72971
7
+ sourcecode/cli.py,sha256=_oYoP4_AAFGw0gg-FBGn_DKh7y49xmwuL6D_Bafl2w4,74497
8
8
  sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvUQKw4,9226
9
9
  sourcecode/confidence_analyzer.py,sha256=HcaewB2pZaZ_hfKrZWtr_yPMY2-CxS1zzTUD7c4argc,13188
10
10
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
11
11
  sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
12
- sourcecode/contract_model.py,sha256=gCf9-Kj0G7l0lvRTAcRfFAfMgs1Rpizv4mKovQLYUkw,3434
13
- sourcecode/contract_pipeline.py,sha256=lDa0MgalW-WPXwbWKX2NB6HWxt_15P6GZNXgAoJeDAE,25985
12
+ sourcecode/contract_model.py,sha256=nRxJKPMs1VHwFTa8AVXhGmaLjti3Lr2sjHDpWgv1bfE,3917
13
+ sourcecode/contract_pipeline.py,sha256=_mZn0GD7UjjJ7E79DmmYjF0sVkuOLBZEgZHqgXVyTrs,26648
14
14
  sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
15
- sourcecode/dependency_analyzer.py,sha256=EZyBI1-VkYCmPrcIIIpa7WAp888FF9Ct4nY2ronowUg,53555
15
+ sourcecode/dependency_analyzer.py,sha256=p4ljXhkcGBbFlhaZuPrsjOVjDXaKLTg0Gor2p4qFPP0,56208
16
16
  sourcecode/doc_analyzer.py,sha256=a1CIClCNmfYM3ku4bdgwHQpmb6Js4wdJZ1V5EYLo04I,24345
17
17
  sourcecode/entrypoint_classifier.py,sha256=gvKgl0f5T8ol1r4JMmkeqGHuZTfZJiOwFOWdc7EYwYw,4061
18
18
  sourcecode/env_analyzer.py,sha256=GxCidahAAIptTdDFIlVB6URd4HBnBlIX_SqUov3MBRQ,22076
19
19
  sourcecode/file_classifier.py,sha256=48ly5Z6exkzBy8lNy1AkdP4-oJqIA1zT3LZfffuTyDo,11572
20
20
  sourcecode/git_analyzer.py,sha256=PD3eNWydznQ6KLNpxGzBqizIHoPIKevfwz9Xyf_pDt4,11600
21
- sourcecode/graph_analyzer.py,sha256=Ko3nJp_wx92jic6AuFbdkOstDjzl-O62g1zazYJjm9w,64157
21
+ sourcecode/graph_analyzer.py,sha256=iUK-7pSV-cvGqqD2hENdYmhnm0wcXFEyK-xnu5ul8OU,62515
22
22
  sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
23
23
  sourcecode/prepare_context.py,sha256=LsFDp7HnHdvtwVa46YUD60uMBfwXaVs4suMfBvc8tyI,37357
24
24
  sourcecode/ranking_engine.py,sha256=virVglafZufioHpZpwktjMvUiL0TZELWQCQnQNV8dFo,9360
@@ -27,9 +27,9 @@ sourcecode/relevance_scorer.py,sha256=MYF4FFkveAQps9SmTeTlh6ODiBz2F--_hWNeHMLtUH
27
27
  sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
28
28
  sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
29
29
  sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
30
- sourcecode/schema.py,sha256=M0aA3sIIm8QHDIAducZUm5mGpgJ7oXfqtyvoMkLGMac,23270
30
+ sourcecode/schema.py,sha256=rNxpDUgOfvJDzz6DKitL-5_0UAh0YomNwTsSSe8PafY,24066
31
31
  sourcecode/semantic_analyzer.py,sha256=12TwXYkYbDcBdu0heX_EmfPM2EkO8a_r5osf0SaeQbs,88956
32
- sourcecode/serializer.py,sha256=-OFruU8NHsmHR_IKOb11VVl3-WOBXKPhyCtM8NZZtb0,65832
32
+ sourcecode/serializer.py,sha256=2ztWD4FCebPD0D5tY-kNgLlsATaQsPrSQY6rLQeemRI,69467
33
33
  sourcecode/summarizer.py,sha256=ZuzIdm3t8A-d5MuQL0TSNLrd-L0IQIuguIxeNXMNJf8,16070
34
34
  sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
35
35
  sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
@@ -42,26 +42,26 @@ sourcecode/detectors/elixir.py,sha256=jCpvt5Yi6jvplc80ovRtWh17q-11ZGo9qX7o8b57TJ
42
42
  sourcecode/detectors/go.py,sha256=2r66uRQfeTWsqxr4HDhT6vExZErby0t46QXLHVBRv9w,2782
43
43
  sourcecode/detectors/heuristic.py,sha256=bCqqgbHavl4Sse3dqT8mwmo1wAdgeJr7VyXOmfClLKo,3387
44
44
  sourcecode/detectors/hybrid.py,sha256=IGFRUVsAZ1ooRlFdznCeJAV6vy1yVDx-VyghvLtddXc,9101
45
- sourcecode/detectors/java.py,sha256=Lv-a1YeXezJYZCJduWfeeLMTUgyPz4lyiYu9Edi8-7E,10821
45
+ sourcecode/detectors/java.py,sha256=0NKsy1uls5wvQSK7DJP_fM-v5_uuMEr_an47caelLHE,16612
46
46
  sourcecode/detectors/jvm_ext.py,sha256=EgHJ5W8EE-ZTN9V607mVzohyKgZE8Mc2jCi-DF8RAZU,2616
47
47
  sourcecode/detectors/nodejs.py,sha256=7fsyAmrGkkguX6U80HUQpIe9MRaYyi_A7zbaRtmFmGc,13097
48
48
  sourcecode/detectors/parsers.py,sha256=ugPg8yNUf0Ai1gA7Fnn6wAkYGFjTxRodSP3IeViYJJ4,2290
49
49
  sourcecode/detectors/php.py,sha256=W_AQD0WMVDdWHa9h_ilX6W8XSpz0X4ctpMK2WXfXf1I,1887
50
- sourcecode/detectors/project.py,sha256=egFUnHC93xFfb-ikGCIOSkRdyP52qytDx9W7pGkX0MY,6525
50
+ sourcecode/detectors/project.py,sha256=hy2Mjifl3by2YcAZ8OY2g_1ptlaVQu9sfchHhNJQ4WM,8084
51
51
  sourcecode/detectors/python.py,sha256=i2_Wtk_p0BJx5R8gBQ8NaQByzJ8zEfZkw9NNpKlvOYM,10486
52
52
  sourcecode/detectors/ruby.py,sha256=Q4B5ePAw6-T4DLfanKJiuLHLqUigTPVrzylcXJMei3M,1591
53
53
  sourcecode/detectors/rust.py,sha256=Tij1vz8BFZ332GEvVkL6vyMli2OMHJfHyDAppWfe66c,3557
54
54
  sourcecode/detectors/systems.py,sha256=nYaKbGDFu0EOXFcd_1doWFT3tTUdkbxc2DjHUF5TcqQ,1627
55
55
  sourcecode/detectors/terraform.py,sha256=cxORPR_zVLOJpHlh4e9JnFpkQsn_UnqMMom5yG65hZ4,1693
56
- sourcecode/detectors/tooling.py,sha256=hIvop80No22pqyGVJ32NKliSdjkHRePQkIRroqG01bY,1875
56
+ sourcecode/detectors/tooling.py,sha256=8CKbtxwQoABP-WyBRNmdAmHDOvAH57AR1cF4UKuWEdQ,2074
57
57
  sourcecode/telemetry/__init__.py,sha256=M0eQZFNkmJiLbI_oNP4QEXwVju1dQ2d4P-E1-Bw8PxE,3116
58
58
  sourcecode/telemetry/config.py,sha256=Pir0WHp4z-9Qclnn2NDZ3vwitqsMkOAJckmwjUSxrk4,1795
59
59
  sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWnII,2237
60
60
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
61
61
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
62
62
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
63
- sourcecode-1.3.0.dist-info/METADATA,sha256=5tHuzUw0xpsY6eeCd81OI_pL_vl-3qaMCU78kExd6WI,20411
64
- sourcecode-1.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
65
- sourcecode-1.3.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
66
- sourcecode-1.3.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
67
- sourcecode-1.3.0.dist-info/RECORD,,
63
+ sourcecode-1.5.0.dist-info/METADATA,sha256=kdZtCFuIhrWj6KDM4nZ-tMqWzuwiY2hXt4C8hP6PETc,20411
64
+ sourcecode-1.5.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
65
+ sourcecode-1.5.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
66
+ sourcecode-1.5.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
67
+ sourcecode-1.5.0.dist-info/RECORD,,