sourcecode 1.2.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.2.0"
3
+ __version__ = "1.4.0"
@@ -79,6 +79,7 @@ _LANGUAGE_MAP: dict[str, str] = {
79
79
  ".jsx": "jsx",
80
80
  ".mjs": "javascript",
81
81
  ".cjs": "javascript",
82
+ ".java": "java",
82
83
  }
83
84
 
84
85
  _REACT_HOOKS: frozenset[str] = frozenset({
@@ -938,6 +939,175 @@ def _extract_python(path: str, source: str) -> FileContract:
938
939
  )
939
940
 
940
941
 
942
+ # ---------------------------------------------------------------------------
943
+ # ---------------------------------------------------------------------------
944
+ # Enhanced Java extraction (regex-based, annotation-aware)
945
+ # ---------------------------------------------------------------------------
946
+
947
+ _JAVA_IMPORT_RE = re.compile(r'^import\s+(?:static\s+)?([^;\s]+)\s*;', re.MULTILINE)
948
+
949
+ # Single annotation on one line (captures name + optional parens args)
950
+ _JAVA_ANNO_LINE_RE = re.compile(r'^\s*(@[\w.]+(?:\s*\([^)]*\))?)\s*$')
951
+ # Class/interface/enum declaration line (public or package-private)
952
+ _JAVA_CLASS_LINE_RE = re.compile(
953
+ r'(?:public\s+)?(?:(?:abstract|final|static|sealed)\s+)*'
954
+ r'(class|interface|enum|@interface)\s+(\w+)'
955
+ r'(?:\s+extends\s+([\w.]+))?'
956
+ r'(?:\s+implements\s+([\w.,\s<>]+?))?'
957
+ r'(?=\s*[\{<])'
958
+ )
959
+ # Public method: up to 12 leading spaces, return type, name, open paren
960
+ _JAVA_PUB_METHOD_LINE_RE = re.compile(
961
+ r'^\s{0,12}public\s+(?:(?:static|final|synchronized|abstract|default)\s+)*'
962
+ r'[\w<>\[\]?,\s]+?\s+(\w+)\s*\(',
963
+ re.MULTILINE,
964
+ )
965
+ # @Autowired or @Inject field
966
+ _JAVA_FIELD_DECL_RE = re.compile(
967
+ r'^\s*(?:private|protected|public)?\s*'
968
+ r'([\w<>.,\[\]? ]+?)\s+(\w+)\s*[;=]'
969
+ )
970
+
971
+
972
+ def _java_collect_preceding_annotations(lines: list[str], decl_idx: int) -> list[str]:
973
+ """Walk back from decl_idx and collect @annotation lines immediately before it."""
974
+ annotations: list[str] = []
975
+ i = decl_idx - 1
976
+ while i >= 0:
977
+ stripped = lines[i].strip()
978
+ if not stripped or stripped.startswith("//") or stripped.startswith("*"):
979
+ i -= 1
980
+ continue
981
+ m = re.match(r'(@[\w.]+(?:\s*\([^)]*\))?)', stripped)
982
+ if m:
983
+ annotations.insert(0, m.group(1))
984
+ i -= 1
985
+ else:
986
+ break
987
+ return annotations
988
+
989
+
990
+ def _extract_java(path: str, source: str) -> FileContract:
991
+ exports: list[ExportRecord] = []
992
+ types: list[TypeDefinition] = []
993
+ functions: list[FunctionSignature] = []
994
+ imports: list[ImportRecord] = []
995
+ autowired_fields: list[dict] = []
996
+
997
+ lines = source.splitlines()
998
+
999
+ # Pass 1: collect imports
1000
+ seen_sources: set[str] = set()
1001
+ for m in _JAVA_IMPORT_RE.finditer(source):
1002
+ full_import = m.group(1).strip()
1003
+ if full_import not in seen_sources:
1004
+ seen_sources.add(full_import)
1005
+ imports.append(ImportRecord(source=full_import, kind="named", symbols=[]))
1006
+
1007
+ # Pass 2: line-by-line scan for classes, methods, @Autowired fields
1008
+ class_names: set[str] = set()
1009
+ seen_methods: set[str] = set()
1010
+ autowired_pending = False
1011
+
1012
+ for idx, line in enumerate(lines):
1013
+ stripped = line.strip()
1014
+
1015
+ # Detect @Autowired / @Inject annotations
1016
+ if stripped.startswith("@Autowired") or stripped.startswith("@Inject"):
1017
+ autowired_pending = True
1018
+ continue
1019
+
1020
+ # Capture autowired field on next non-annotation, non-blank line
1021
+ if autowired_pending and stripped and not stripped.startswith("@"):
1022
+ autowired_pending = False
1023
+ fm = _JAVA_FIELD_DECL_RE.match(line)
1024
+ if fm:
1025
+ type_name = fm.group(1).strip().split()[-1] # last word = simple type
1026
+ field_name = fm.group(2).strip()
1027
+ if field_name and type_name and field_name not in {"class", "interface"}:
1028
+ autowired_fields.append({"type": type_name, "name": field_name})
1029
+ elif stripped and not stripped.startswith("@"):
1030
+ autowired_pending = False
1031
+
1032
+ # Class/interface/enum declaration
1033
+ cm = _JAVA_CLASS_LINE_RE.search(stripped)
1034
+ if cm and ("class " in stripped or "interface " in stripped or "enum " in stripped):
1035
+ kind_kw = cm.group(1) # class | interface | enum | @interface
1036
+ name = cm.group(2)
1037
+ extends_str = cm.group(3)
1038
+ implements_str = cm.group(4)
1039
+
1040
+ annotations = _java_collect_preceding_annotations(lines, idx)
1041
+ all_extends: list[str] = []
1042
+ if extends_str:
1043
+ all_extends.append(extends_str.strip())
1044
+
1045
+ implements_list: list[str] = []
1046
+ if implements_str:
1047
+ implements_list = [i.strip() for i in implements_str.split(",") if i.strip()]
1048
+
1049
+ export_kind = "class" if kind_kw in ("class", "@interface") else kind_kw
1050
+ exports.append(ExportRecord(
1051
+ name=name,
1052
+ kind=export_kind,
1053
+ annotations=annotations,
1054
+ extends=extends_str.strip() if extends_str else None,
1055
+ implements=implements_list,
1056
+ ))
1057
+ types.append(TypeDefinition(
1058
+ name=name,
1059
+ kind=export_kind,
1060
+ fields=[],
1061
+ extends=all_extends,
1062
+ ))
1063
+ class_names.add(name)
1064
+
1065
+ # Pass 3: public methods with their preceding annotations
1066
+ for m in _JAVA_PUB_METHOD_LINE_RE.finditer(source):
1067
+ sig_text = m.group(0).strip()
1068
+ mname = m.group(1)
1069
+ if (mname in class_names or mname in seen_methods
1070
+ or mname in {"if", "for", "while", "switch", "return", "new"}):
1071
+ continue
1072
+ seen_methods.add(mname)
1073
+ # Find line index for this match to collect preceding annotations
1074
+ line_start = source.count("\n", 0, m.start())
1075
+ annotations = _java_collect_preceding_annotations(lines, line_start)
1076
+ exports.append(ExportRecord(
1077
+ name=mname,
1078
+ kind="method",
1079
+ annotations=annotations,
1080
+ signature=sig_text,
1081
+ ))
1082
+ functions.append(FunctionSignature(
1083
+ name=mname,
1084
+ signature=sig_text,
1085
+ async_=False,
1086
+ exported=True,
1087
+ return_type=None,
1088
+ ))
1089
+
1090
+ # External deps: top-2 package segments, skip java.* / javax.*
1091
+ deps = sorted({
1092
+ ".".join(imp.source.split(".")[:2])
1093
+ for imp in imports
1094
+ if not imp.source.startswith("java.") and not imp.source.startswith("javax.")
1095
+ and len(imp.source.split(".")) >= 2
1096
+ })
1097
+
1098
+ return FileContract(
1099
+ path=path,
1100
+ language="java",
1101
+ exports=exports,
1102
+ imports=sorted(imports, key=lambda i: i.source)[:30],
1103
+ functions=sorted(functions, key=lambda f: f.name)[:20],
1104
+ types=sorted(types, key=lambda t: t.name),
1105
+ dependencies=deps[:20],
1106
+ autowired_fields=autowired_fields[:20],
1107
+ extraction_method="heuristic",
1108
+ )
1109
+
1110
+
941
1111
  # ---------------------------------------------------------------------------
942
1112
  # Role detection
943
1113
  # ---------------------------------------------------------------------------
@@ -1048,6 +1218,8 @@ class AstExtractor:
1048
1218
 
1049
1219
  if language == "python":
1050
1220
  contract = _extract_python(rel_path, source)
1221
+ elif language == "java":
1222
+ contract = _extract_java(rel_path, source)
1051
1223
  else:
1052
1224
  if self._ensure_ts():
1053
1225
  lang_obj = _get_ts_lang(language)
sourcecode/cli.py CHANGED
@@ -728,15 +728,13 @@ def main(
728
728
  mode = "contract" # unknown → safe default
729
729
 
730
730
  # Legacy flags imply raw mode unless --mode was explicitly overridden.
731
- # These flags produce standard_view-only output sections not in contract_view.
732
- # Preserves backward compat: callers using any legacy flag get their previous format.
733
- # New callers opt into contract mode via --mode contract (or bare invocation).
734
- # Legacy flags that produce output sections incompatible with contract_view
735
- # force mode to raw. --agent is excluded: it now runs the contract pipeline
736
- # and enriches contract_view with auto-enabled analyzers (deps, env, notes).
731
+ # --format yaml and --graph-modules are now compatible with contract_view:
732
+ # yaml is a serialization format (not an output-section flag)
733
+ # graph-modules output is included in contract_view when available
734
+ # Other flags that produce sections exclusive to standard_view still force raw.
737
735
  _legacy_flags_active = (
738
- compact or tree or format == "yaml" or trace_pipeline
739
- or docs or semantics or graph_modules or full_metrics or architecture
736
+ compact or tree or trace_pipeline
737
+ or docs or semantics or full_metrics or architecture
740
738
  )
741
739
  if mode in ("contract", "standard") and _legacy_flags_active:
742
740
  mode = "raw"
@@ -1106,6 +1104,17 @@ def main(
1106
1104
  metrics_summary=metrics_summary,
1107
1105
  )
1108
1106
 
1107
+ # Populate Java-specific root fields from java stack detection (FIX-6, 7, 8)
1108
+ _java_stack = next((s for s in stacks if s.stack == "java"), None)
1109
+ if _java_stack is not None:
1110
+ from dataclasses import replace as _dc_replace
1111
+ sm = _dc_replace(sm,
1112
+ packaging=getattr(_java_stack, "packaging", None) or None,
1113
+ language_version=getattr(_java_stack, "language_version", None) or None,
1114
+ spring_profiles=getattr(_java_stack, "spring_profiles", []) or [],
1115
+ app_server_hint=getattr(_java_stack, "app_server_hint", None) or None,
1116
+ )
1117
+
1109
1118
  # Semantic analysis (--semantics flag)
1110
1119
  if semantic_analyzer is not None:
1111
1120
  if workspace_analysis.workspaces:
@@ -1270,10 +1279,24 @@ def main(
1270
1279
  and d.scope not in {"dev"}
1271
1280
  ]
1272
1281
 
1273
- def _dep_sort_key(d: Any) -> tuple[int, int, str]:
1282
+ _JAVA_SEMANTIC_PRIORITY: dict[str, int] = {
1283
+ "spring-boot": 0, "spring-security": 1, "mybatis": 2,
1284
+ "poi": 3, "pdfbox": 4, "jackson": 5, "jjwt": 6,
1285
+ }
1286
+
1287
+ def _java_priority(d: Any) -> int:
1288
+ if d.ecosystem != "java":
1289
+ return 99
1290
+ art = (d.name.split(":")[-1] if ":" in d.name else d.name).lower()
1291
+ for key, pri in _JAVA_SEMANTIC_PRIORITY.items():
1292
+ if key in art:
1293
+ return pri
1294
+ return 50
1295
+
1296
+ def _dep_sort_key(d: Any) -> tuple[int, int, int, str]:
1274
1297
  role_order = _ROLE_PRIORITY.get(d.role or "runtime", 5)
1275
1298
  eco_order = 0 if d.ecosystem == primary_ecosystem else 1
1276
- return (role_order, eco_order, d.name.lower())
1299
+ return (role_order, eco_order, _java_priority(d), d.name.lower())
1277
1300
 
1278
1301
  _seen_dep_names: set[str] = set()
1279
1302
  _deduped_deps: list[Any] = []
@@ -1281,7 +1304,7 @@ def main(
1281
1304
  if d.name not in _seen_dep_names:
1282
1305
  _seen_dep_names.add(d.name)
1283
1306
  _deduped_deps.append(d)
1284
- sm.key_dependencies = _deduped_deps[:15]
1307
+ sm.key_dependencies = _deduped_deps # no cap — all direct deps included
1285
1308
 
1286
1309
  # LQN-02: deterministic NL summary
1287
1310
  sm.project_summary = ProjectSummarizer(target).generate(sm)
@@ -1384,10 +1407,20 @@ def main(
1384
1407
 
1385
1408
  # Contract pipeline — runs for mode=contract|standard|deep|hybrid (skip for raw)
1386
1409
  _is_contract_mode = mode in ("contract", "standard")
1410
+ _pipeline_error = False
1387
1411
  if _is_contract_mode:
1388
1412
  from sourcecode.contract_pipeline import ContractPipeline
1389
1413
  from sourcecode.contract_model import ContractSummary as _ContractSummary
1390
- _cp = ContractPipeline()
1414
+ # FIX-1: Java projects need higher caps — many files, comprehensive coverage required
1415
+ _jvm_stacks = {"java", "kotlin", "scala", "groovy"}
1416
+ _is_jvm = any(s.stack in _jvm_stacks for s in sm.stacks)
1417
+ # FIX-1: Java projects need higher caps and no relevance threshold
1418
+ _max_files_cp = 2500 if _is_jvm else 500
1419
+ _cp = ContractPipeline(max_files=_max_files_cp)
1420
+ _java_pipeline_kwargs: dict = {}
1421
+ if _is_jvm:
1422
+ _java_pipeline_kwargs["max_contracts"] = 500
1423
+ _java_pipeline_kwargs["min_score"] = 0.0
1391
1424
  try:
1392
1425
  _contracts, _contract_summary = _cp.run(
1393
1426
  target,
@@ -1405,9 +1438,11 @@ def main(
1405
1438
  max_importers=max_importers,
1406
1439
  semantic_calls=sm.semantic_calls or None,
1407
1440
  code_notes=sm.code_notes or None,
1441
+ **_java_pipeline_kwargs,
1408
1442
  )
1409
1443
  except Exception as _exc:
1410
1444
  typer.echo(f"[error] contract pipeline failed: {_exc}", err=True)
1445
+ _pipeline_error = True
1411
1446
  _contracts = []
1412
1447
  _contract_summary = _ContractSummary(
1413
1448
  mode=mode,
@@ -1446,7 +1481,22 @@ def main(
1446
1481
  data = _contract_view(sm, emit_graph=emit_graph, depth=_depth)
1447
1482
  if not no_redact:
1448
1483
  data = redact_dict(data)
1449
- content = json.dumps(data, indent=2, ensure_ascii=False)
1484
+ if format == "yaml":
1485
+ from io import StringIO
1486
+ from ruamel.yaml import YAML as _YAML
1487
+ _yaml = _YAML()
1488
+ _yaml.default_flow_style = False
1489
+ _yaml.representer.add_representer(
1490
+ type(None),
1491
+ lambda dumper, data_val: dumper.represent_scalar(
1492
+ "tag:yaml.org,2002:null", "null"
1493
+ ),
1494
+ )
1495
+ _stream = StringIO()
1496
+ _yaml.dump(data, _stream)
1497
+ content = _stream.getvalue()
1498
+ else:
1499
+ content = json.dumps(data, indent=2, ensure_ascii=False)
1450
1500
  elif agent:
1451
1501
  data = agent_view(sm)
1452
1502
  # When contract pipeline ran (mode=contract, no legacy flags), include
@@ -1518,6 +1568,9 @@ def main(
1518
1568
  # 6. Write output (CLI-04)
1519
1569
  write_output(content, output=output)
1520
1570
 
1571
+ if _pipeline_error:
1572
+ raise typer.Exit(code=2)
1573
+
1521
1574
  # 7. Clipboard copy (--copy / -c)
1522
1575
  if copy and output is None:
1523
1576
  _trimmed = content.strip()
@@ -25,9 +25,13 @@ class ExportRecord:
25
25
  """Exported symbol."""
26
26
 
27
27
  name: str
28
- kind: str = "unknown" # function | class | const | type | default | react_component | enum | interface
28
+ kind: str = "unknown" # function | class | const | type | default | react_component | enum | interface | method
29
29
  type_ref: Optional[str] = None
30
30
  async_: bool = False
31
+ annotations: list[str] = field(default_factory=list) # Java: ["@Controller", "@Transactional"]
32
+ extends: Optional[str] = None # Java: parent class
33
+ implements: list[str] = field(default_factory=list) # Java: interfaces
34
+ signature: Optional[str] = None # Java method: full signature
31
35
 
32
36
 
33
37
  @dataclass
@@ -96,6 +100,8 @@ class FileContract:
96
100
  # Extraction quality
97
101
  extraction_method: str = "heuristic" # ast | tree_sitter | heuristic
98
102
  limitations: list[str] = field(default_factory=list)
103
+ # Java-specific (FIX-1)
104
+ autowired_fields: list[dict] = field(default_factory=list) # [{"type": "...", "name": "..."}]
99
105
 
100
106
 
101
107
  @dataclass
@@ -182,6 +182,7 @@ class ContractPipeline:
182
182
  semantic_calls: Optional[list] = None,
183
183
  code_notes: Optional[list] = None,
184
184
  max_contracts: Optional[int] = _MAX_CONTRACTS,
185
+ min_score: Optional[float] = None,
185
186
  ) -> tuple[list[FileContract], ContractSummary]:
186
187
  """Run the full extraction pipeline.
187
188
 
@@ -317,11 +318,17 @@ class ContractPipeline:
317
318
  # 10. Top-N cap — enforce max_contracts when not in symbol-search mode.
318
319
  # Symbol searches must return all matching files; budget applies only to
319
320
  # the default architectural briefing use case.
321
+ _effective_min_score = min_score if min_score is not None else _MIN_CONTRACT_SCORE
320
322
  if symbol is None and max_contracts is not None:
321
323
  contracts = [
322
324
  c for c in contracts
323
- if c.relevance_score >= _MIN_CONTRACT_SCORE or c.is_entrypoint
325
+ if c.relevance_score >= _effective_min_score or c.is_entrypoint
324
326
  ][:max_contracts]
327
+ elif symbol is None and max_contracts is None:
328
+ contracts = [
329
+ c for c in contracts
330
+ if c.relevance_score >= _effective_min_score or c.is_entrypoint
331
+ ]
325
332
 
326
333
  # 11. Compress types if requested
327
334
  if compress_types:
@@ -370,7 +377,15 @@ class ContractPipeline:
370
377
  """
371
378
  candidates = _find_symbol_files(root, symbol, known_paths, engine)
372
379
  if not candidates:
373
- return []
380
+ return [], {
381
+ "symbol": symbol,
382
+ "definers_found": 0,
383
+ "importers_found": 0,
384
+ "importers_returned": 0,
385
+ "references_found": 0,
386
+ "total_returned": 0,
387
+ "truncated": False,
388
+ }
374
389
 
375
390
  extra: list[FileContract] = []
376
391
  for rel_path in candidates[:300]: # cap to prevent excessive extraction
@@ -577,7 +592,7 @@ def _find_symbol_files(
577
592
  "grep", "-rl",
578
593
  "--include=*.ts", "--include=*.tsx",
579
594
  "--include=*.js", "--include=*.jsx",
580
- "--include=*.py",
595
+ "--include=*.py", "--include=*.java",
581
596
  symbol, ".",
582
597
  ],
583
598
  cwd=str(root),
@@ -127,6 +127,24 @@ def _infer_role(name: str, ecosystem: str, scope: str) -> str:
127
127
  return "infra"
128
128
  return "runtime"
129
129
 
130
+ if ecosystem == "java":
131
+ if scope == "provided":
132
+ return "provided"
133
+ artifact = n.split(":")[-1] if ":" in n else n
134
+ if any(x in artifact for x in ("spring-boot", "spring-security")):
135
+ return "runtime"
136
+ if any(x in artifact for x in ("spring-web", "spring-mvc", "spring-core", "spring-context")):
137
+ return "runtime"
138
+ if any(x in artifact for x in ("mybatis", "hibernate", "jpa", "druid", "datasource")):
139
+ return "infra"
140
+ if any(x in artifact for x in ("jackson", "gson", "fastjson")):
141
+ return "serialization"
142
+ if any(x in artifact for x in ("poi", "pdfbox", "itext", "openpdf")):
143
+ return "parsing"
144
+ if any(x in artifact for x in ("jjwt", "nimbus-jose")):
145
+ return "runtime"
146
+ return "devtool" if is_dev else "runtime"
147
+
130
148
  return "devtool" if is_dev else "runtime"
131
149
 
132
150
 
@@ -1104,6 +1122,23 @@ class DependencyAnalyzer:
1104
1122
  properties = self._parse_maven_properties(root_elem, ns)
1105
1123
  dm_versions = self._parse_dependency_management(root_elem, ns, properties)
1106
1124
 
1125
+ # FIX-9: extract parent version for BOM resolution
1126
+ parent_elem = root_elem.find(f"{ns}parent")
1127
+ parent_version: Optional[str] = None
1128
+ parent_group: str = ""
1129
+ if parent_elem is not None:
1130
+ parent_version = (parent_elem.findtext(f"{ns}version") or "").strip() or None
1131
+ parent_group = (parent_elem.findtext(f"{ns}groupId") or "").strip()
1132
+ parent_artifact = (parent_elem.findtext(f"{ns}artifactId") or "").strip()
1133
+ # Propagate parent version into properties for ${project.parent.version}
1134
+ if parent_version:
1135
+ properties.setdefault("project.parent.version", parent_version)
1136
+ properties.setdefault("revision", parent_version)
1137
+
1138
+ # Infer packaging for FIX-6 (used by scope hint for provided)
1139
+ packaging_elem = root_elem.find(f"{ns}packaging")
1140
+ is_war = packaging_elem is not None and (packaging_elem.text or "").strip().lower() == "war"
1141
+
1107
1142
  records: list[DependencyRecord] = []
1108
1143
  deps_elem = root_elem.find(f"{ns}dependencies")
1109
1144
  if deps_elem is None:
@@ -1118,14 +1153,36 @@ class DependencyAnalyzer:
1118
1153
  declared = self._resolve_maven_version(version_raw, properties)
1119
1154
  if declared is None:
1120
1155
  declared = dm_versions.get(f"{group_id}:{artifact_id}")
1156
+
1157
+ # FIX-4: proper maven scope mapping
1121
1158
  scope_text = (dep.findtext(f"{ns}scope") or "compile").strip().lower()
1122
- scope = "dev" if scope_text == "test" else "direct"
1159
+ if scope_text == "test":
1160
+ scope = "dev"
1161
+ elif scope_text == "provided":
1162
+ scope = "provided"
1163
+ else:
1164
+ scope = "direct" # compile, runtime, system, import
1165
+
1166
+ # FIX-4: infer provided for embedded tomcat in WAR projects
1167
+ if (is_war and scope == "direct"
1168
+ and artifact_id in ("spring-boot-starter-tomcat", "tomcat-embed-core")):
1169
+ scope = "provided"
1170
+
1171
+ # FIX-9: resolve BOM version for Spring Boot / Spring Security starters
1172
+ resolved_version: Optional[str] = None
1173
+ if declared is None and parent_version:
1174
+ if group_id == "org.springframework.boot":
1175
+ resolved_version = parent_version
1176
+ elif group_id == "org.springframework.security" and "spring-security.version" in properties:
1177
+ resolved_version = properties["spring-security.version"]
1178
+
1123
1179
  records.append(
1124
1180
  DependencyRecord(
1125
1181
  name=f"{group_id}:{artifact_id}",
1126
1182
  ecosystem="java",
1127
1183
  scope=scope,
1128
1184
  declared_version=declared,
1185
+ resolved_version=resolved_version,
1129
1186
  source="manifest",
1130
1187
  manifest_path="pom.xml",
1131
1188
  )
@@ -14,6 +14,8 @@ from sourcecode.detectors.parsers import read_text_lines, unique_strings
14
14
  from sourcecode.schema import FrameworkDetection
15
15
  from sourcecode.tree_utils import flatten_file_tree
16
16
 
17
+ _NS_TAG_RE = re.compile(r"\{[^}]+\}")
18
+
17
19
  _MAX_FILE_SIZE = 256 * 1024 # 256 KB
18
20
  _MAX_JAVA_ENTRY_SCAN = 1000
19
21
  _MAX_ANNOTATION_ENTRY_POINTS = 500
@@ -50,14 +52,34 @@ class JavaDetector(AbstractDetector):
50
52
  def detect(self, context: DetectionContext) -> tuple[list[StackDetection], list[EntryPoint]]:
51
53
  frameworks: list[FrameworkDetection] = []
52
54
  manifests: list[str] = []
55
+ language_version: str | None = None
56
+ packaging: str | None = None
57
+ app_server_hint: str | None = None
58
+ spring_profiles: list[str] = []
53
59
 
54
60
  if "pom.xml" in context.manifests:
55
61
  manifests.append("pom.xml")
56
- frameworks.extend(self._frameworks_from_pom(context.root / "pom.xml"))
62
+ pom_path = context.root / "pom.xml"
63
+ frameworks.extend(self._frameworks_from_pom(pom_path))
64
+ meta = self._parse_pom_metadata(pom_path)
65
+ if meta.get("language_version"):
66
+ language_version = meta["language_version"]
67
+ if meta.get("packaging"):
68
+ packaging = meta["packaging"]
57
69
  if "build.gradle" in context.manifests:
58
70
  manifests.append("build.gradle")
59
71
  frameworks.extend(self._frameworks_from_gradle(context.root / "build.gradle"))
60
72
 
73
+ # Detect app server from descriptor files
74
+ all_paths = flatten_file_tree(context.file_tree)
75
+ if any("weblogic.xml" in p or "weblogic-ejb-jar.xml" in p for p in all_paths):
76
+ app_server_hint = "weblogic"
77
+ elif any("wildfly" in p.lower() or "jboss" in p.lower() for p in all_paths):
78
+ app_server_hint = "wildfly"
79
+
80
+ # Spring profiles — check src/main/options/, src/main/resources/
81
+ spring_profiles = self._detect_spring_profiles(context.root, all_paths)
82
+
61
83
  entry_points = self._collect_entry_points(context)
62
84
  stack = StackDetection(
63
85
  stack="java",
@@ -65,27 +87,124 @@ class JavaDetector(AbstractDetector):
65
87
  confidence="high",
66
88
  frameworks=self._dedupe_frameworks(frameworks),
67
89
  manifests=manifests,
90
+ language_version=language_version,
91
+ packaging=packaging,
92
+ app_server_hint=app_server_hint,
93
+ spring_profiles=spring_profiles,
68
94
  )
69
95
  return [stack], entry_points
70
96
 
97
+ def _parse_pom_metadata(self, path: Path) -> dict:
98
+ """Extract packaging, java version from pom.xml properties/parent."""
99
+ result: dict = {}
100
+ try:
101
+ tree = ElementTree.parse(path)
102
+ except (OSError, ElementTree.ParseError):
103
+ return result
104
+ root = tree.getroot()
105
+ ns_match = _NS_TAG_RE.match(root.tag)
106
+ ns = ns_match.group(0) if ns_match else ""
107
+
108
+ # Packaging (FIX-6)
109
+ packaging_elem = root.find(f"{ns}packaging")
110
+ if packaging_elem is not None and packaging_elem.text:
111
+ result["packaging"] = packaging_elem.text.strip().lower()
112
+
113
+ # Properties
114
+ props_elem = root.find(f"{ns}properties")
115
+ props: dict[str, str] = {}
116
+ if props_elem is not None:
117
+ for prop in props_elem:
118
+ tag = prop.tag.replace(ns, "") if ns else prop.tag
119
+ if prop.text:
120
+ props[tag] = prop.text.strip()
121
+
122
+ # Java version (FIX-7) — check properties first, then compiler plugin
123
+ for key in ("maven.compiler.source", "java.version", "maven.compiler.release"):
124
+ if key in props:
125
+ result["language_version"] = props[key]
126
+ break
127
+ if "language_version" not in result:
128
+ # Check maven-compiler-plugin configuration
129
+ for plugin in root.findall(f".//{ns}plugin"):
130
+ artifact = (plugin.findtext(f"{ns}artifactId") or "").strip()
131
+ if artifact == "maven-compiler-plugin":
132
+ config = plugin.find(f"{ns}configuration")
133
+ if config is not None:
134
+ for tag in ("source", "release"):
135
+ val = config.findtext(f"{ns}{tag}")
136
+ if val:
137
+ result["language_version"] = val.strip()
138
+ break
139
+ break
140
+
141
+ return result
142
+
143
+ def _detect_spring_profiles(self, root: Path, all_paths: list[str]) -> list[str]:
144
+ """Detect Spring profiles from option/resource directories and application-{profile}.yml."""
145
+ profiles: list[str] = []
146
+ seen: set[str] = set()
147
+
148
+ # Pattern 1: src/main/options/{profile}/ directories
149
+ _PROFILE_DIRS = ("src/main/options/", "src/main/resources/")
150
+ for path in all_paths:
151
+ for prefix in _PROFILE_DIRS:
152
+ if path.startswith(prefix):
153
+ remainder = path[len(prefix):]
154
+ parts = remainder.split("/")
155
+ if len(parts) >= 1 and parts[0] and not parts[0].startswith("."):
156
+ candidate = parts[0]
157
+ # Only if it's a directory (has sub-paths) with application.yml
158
+ if candidate not in seen and not candidate.endswith(".yml") and not candidate.endswith(".yaml") and not candidate.endswith(".properties"):
159
+ seen.add(candidate)
160
+ profiles.append(candidate)
161
+ break
162
+
163
+ # Pattern 2: application-{profile}.yml files
164
+ _APP_PROFILE_RE = re.compile(r"application-([A-Za-z0-9_-]+)\.ya?ml$")
165
+ for path in all_paths:
166
+ m = _APP_PROFILE_RE.search(path)
167
+ if m:
168
+ profile = m.group(1)
169
+ if profile not in seen:
170
+ seen.add(profile)
171
+ profiles.append(profile)
172
+
173
+ # Filter out generic names that aren't profiles
174
+ _SKIP = frozenset({"test", "it", "integration"})
175
+ return [p for p in profiles if p.lower() not in _SKIP]
176
+
71
177
  def _frameworks_from_pom(self, path: Path) -> list[FrameworkDetection]:
72
178
  try:
73
179
  tree = ElementTree.parse(path)
74
180
  except (OSError, ElementTree.ParseError):
75
181
  return []
76
- text = ElementTree.tostring(tree.getroot(), encoding="unicode").lower()
77
- return self._detect_jvm_frameworks(text, "pom.xml")
182
+ root_elem = tree.getroot()
183
+ ns_match = _NS_TAG_RE.match(root_elem.tag)
184
+ ns = ns_match.group(0) if ns_match else ""
185
+
186
+ # Extract Spring Boot version from <parent> (FIX-3)
187
+ sb_version: str | None = None
188
+ parent_elem = root_elem.find(f"{ns}parent")
189
+ if parent_elem is not None:
190
+ parent_artifact = (parent_elem.findtext(f"{ns}artifactId") or "").strip()
191
+ if parent_artifact == "spring-boot-starter-parent":
192
+ sb_version = (parent_elem.findtext(f"{ns}version") or "").strip() or None
193
+
194
+ text = ElementTree.tostring(root_elem, encoding="unicode").lower()
195
+ frameworks = self._detect_jvm_frameworks(text, "pom.xml", sb_version=sb_version)
196
+ return frameworks
78
197
 
79
198
  def _frameworks_from_gradle(self, path: Path) -> list[FrameworkDetection]:
80
199
  content = "\n".join(read_text_lines(path)).lower()
81
200
  return self._detect_jvm_frameworks(content, "build.gradle")
82
201
 
83
- def _detect_jvm_frameworks(self, text: str, source: str) -> list[FrameworkDetection]:
202
+ def _detect_jvm_frameworks(self, text: str, source: str, *, sb_version: str | None = None) -> list[FrameworkDetection]:
84
203
  frameworks: list[FrameworkDetection] = []
85
204
  if "com.android.application" in text or "com.android.library" in text:
86
205
  frameworks.append(FrameworkDetection(name="Android", source=source))
87
206
  if "spring-boot" in text:
88
- frameworks.append(FrameworkDetection(name="Spring Boot", source=source))
207
+ frameworks.append(FrameworkDetection(name="Spring Boot", source=source, version=sb_version))
89
208
  if "spring-webmvc" in text or "spring-web" in text:
90
209
  frameworks.append(FrameworkDetection(name="Spring MVC", source=source))
91
210
  if "spring-webflux" in text: