sourcecode 1.2.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/ast_extractor.py +172 -0
- sourcecode/cli.py +66 -13
- sourcecode/contract_model.py +7 -1
- sourcecode/contract_pipeline.py +18 -3
- sourcecode/dependency_analyzer.py +58 -1
- sourcecode/detectors/java.py +124 -5
- sourcecode/detectors/project.py +33 -5
- sourcecode/detectors/tooling.py +5 -0
- sourcecode/doc_analyzer.py +76 -1
- sourcecode/entrypoint_classifier.py +3 -0
- sourcecode/graph_analyzer.py +21 -68
- sourcecode/metrics_analyzer.py +16 -1
- sourcecode/prepare_context.py +27 -0
- sourcecode/schema.py +11 -0
- sourcecode/serializer.py +75 -3
- {sourcecode-1.2.0.dist-info → sourcecode-1.4.0.dist-info}/METADATA +1 -1
- {sourcecode-1.2.0.dist-info → sourcecode-1.4.0.dist-info}/RECORD +21 -21
- {sourcecode-1.2.0.dist-info → sourcecode-1.4.0.dist-info}/WHEEL +0 -0
- {sourcecode-1.2.0.dist-info → sourcecode-1.4.0.dist-info}/entry_points.txt +0 -0
- {sourcecode-1.2.0.dist-info → sourcecode-1.4.0.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/ast_extractor.py
CHANGED
|
@@ -79,6 +79,7 @@ _LANGUAGE_MAP: dict[str, str] = {
|
|
|
79
79
|
".jsx": "jsx",
|
|
80
80
|
".mjs": "javascript",
|
|
81
81
|
".cjs": "javascript",
|
|
82
|
+
".java": "java",
|
|
82
83
|
}
|
|
83
84
|
|
|
84
85
|
_REACT_HOOKS: frozenset[str] = frozenset({
|
|
@@ -938,6 +939,175 @@ def _extract_python(path: str, source: str) -> FileContract:
|
|
|
938
939
|
)
|
|
939
940
|
|
|
940
941
|
|
|
942
|
+
# ---------------------------------------------------------------------------
|
|
943
|
+
# ---------------------------------------------------------------------------
|
|
944
|
+
# Enhanced Java extraction (regex-based, annotation-aware)
|
|
945
|
+
# ---------------------------------------------------------------------------
|
|
946
|
+
|
|
947
|
+
_JAVA_IMPORT_RE = re.compile(r'^import\s+(?:static\s+)?([^;\s]+)\s*;', re.MULTILINE)
|
|
948
|
+
|
|
949
|
+
# Single annotation on one line (captures name + optional parens args)
|
|
950
|
+
_JAVA_ANNO_LINE_RE = re.compile(r'^\s*(@[\w.]+(?:\s*\([^)]*\))?)\s*$')
|
|
951
|
+
# Class/interface/enum declaration line (public or package-private)
|
|
952
|
+
_JAVA_CLASS_LINE_RE = re.compile(
|
|
953
|
+
r'(?:public\s+)?(?:(?:abstract|final|static|sealed)\s+)*'
|
|
954
|
+
r'(class|interface|enum|@interface)\s+(\w+)'
|
|
955
|
+
r'(?:\s+extends\s+([\w.]+))?'
|
|
956
|
+
r'(?:\s+implements\s+([\w.,\s<>]+?))?'
|
|
957
|
+
r'(?=\s*[\{<])'
|
|
958
|
+
)
|
|
959
|
+
# Public method: up to 12 leading spaces, return type, name, open paren
|
|
960
|
+
_JAVA_PUB_METHOD_LINE_RE = re.compile(
|
|
961
|
+
r'^\s{0,12}public\s+(?:(?:static|final|synchronized|abstract|default)\s+)*'
|
|
962
|
+
r'[\w<>\[\]?,\s]+?\s+(\w+)\s*\(',
|
|
963
|
+
re.MULTILINE,
|
|
964
|
+
)
|
|
965
|
+
# @Autowired or @Inject field
|
|
966
|
+
_JAVA_FIELD_DECL_RE = re.compile(
|
|
967
|
+
r'^\s*(?:private|protected|public)?\s*'
|
|
968
|
+
r'([\w<>.,\[\]? ]+?)\s+(\w+)\s*[;=]'
|
|
969
|
+
)
|
|
970
|
+
|
|
971
|
+
|
|
972
|
+
def _java_collect_preceding_annotations(lines: list[str], decl_idx: int) -> list[str]:
|
|
973
|
+
"""Walk back from decl_idx and collect @annotation lines immediately before it."""
|
|
974
|
+
annotations: list[str] = []
|
|
975
|
+
i = decl_idx - 1
|
|
976
|
+
while i >= 0:
|
|
977
|
+
stripped = lines[i].strip()
|
|
978
|
+
if not stripped or stripped.startswith("//") or stripped.startswith("*"):
|
|
979
|
+
i -= 1
|
|
980
|
+
continue
|
|
981
|
+
m = re.match(r'(@[\w.]+(?:\s*\([^)]*\))?)', stripped)
|
|
982
|
+
if m:
|
|
983
|
+
annotations.insert(0, m.group(1))
|
|
984
|
+
i -= 1
|
|
985
|
+
else:
|
|
986
|
+
break
|
|
987
|
+
return annotations
|
|
988
|
+
|
|
989
|
+
|
|
990
|
+
def _extract_java(path: str, source: str) -> FileContract:
|
|
991
|
+
exports: list[ExportRecord] = []
|
|
992
|
+
types: list[TypeDefinition] = []
|
|
993
|
+
functions: list[FunctionSignature] = []
|
|
994
|
+
imports: list[ImportRecord] = []
|
|
995
|
+
autowired_fields: list[dict] = []
|
|
996
|
+
|
|
997
|
+
lines = source.splitlines()
|
|
998
|
+
|
|
999
|
+
# Pass 1: collect imports
|
|
1000
|
+
seen_sources: set[str] = set()
|
|
1001
|
+
for m in _JAVA_IMPORT_RE.finditer(source):
|
|
1002
|
+
full_import = m.group(1).strip()
|
|
1003
|
+
if full_import not in seen_sources:
|
|
1004
|
+
seen_sources.add(full_import)
|
|
1005
|
+
imports.append(ImportRecord(source=full_import, kind="named", symbols=[]))
|
|
1006
|
+
|
|
1007
|
+
# Pass 2: line-by-line scan for classes, methods, @Autowired fields
|
|
1008
|
+
class_names: set[str] = set()
|
|
1009
|
+
seen_methods: set[str] = set()
|
|
1010
|
+
autowired_pending = False
|
|
1011
|
+
|
|
1012
|
+
for idx, line in enumerate(lines):
|
|
1013
|
+
stripped = line.strip()
|
|
1014
|
+
|
|
1015
|
+
# Detect @Autowired / @Inject annotations
|
|
1016
|
+
if stripped.startswith("@Autowired") or stripped.startswith("@Inject"):
|
|
1017
|
+
autowired_pending = True
|
|
1018
|
+
continue
|
|
1019
|
+
|
|
1020
|
+
# Capture autowired field on next non-annotation, non-blank line
|
|
1021
|
+
if autowired_pending and stripped and not stripped.startswith("@"):
|
|
1022
|
+
autowired_pending = False
|
|
1023
|
+
fm = _JAVA_FIELD_DECL_RE.match(line)
|
|
1024
|
+
if fm:
|
|
1025
|
+
type_name = fm.group(1).strip().split()[-1] # last word = simple type
|
|
1026
|
+
field_name = fm.group(2).strip()
|
|
1027
|
+
if field_name and type_name and field_name not in {"class", "interface"}:
|
|
1028
|
+
autowired_fields.append({"type": type_name, "name": field_name})
|
|
1029
|
+
elif stripped and not stripped.startswith("@"):
|
|
1030
|
+
autowired_pending = False
|
|
1031
|
+
|
|
1032
|
+
# Class/interface/enum declaration
|
|
1033
|
+
cm = _JAVA_CLASS_LINE_RE.search(stripped)
|
|
1034
|
+
if cm and ("class " in stripped or "interface " in stripped or "enum " in stripped):
|
|
1035
|
+
kind_kw = cm.group(1) # class | interface | enum | @interface
|
|
1036
|
+
name = cm.group(2)
|
|
1037
|
+
extends_str = cm.group(3)
|
|
1038
|
+
implements_str = cm.group(4)
|
|
1039
|
+
|
|
1040
|
+
annotations = _java_collect_preceding_annotations(lines, idx)
|
|
1041
|
+
all_extends: list[str] = []
|
|
1042
|
+
if extends_str:
|
|
1043
|
+
all_extends.append(extends_str.strip())
|
|
1044
|
+
|
|
1045
|
+
implements_list: list[str] = []
|
|
1046
|
+
if implements_str:
|
|
1047
|
+
implements_list = [i.strip() for i in implements_str.split(",") if i.strip()]
|
|
1048
|
+
|
|
1049
|
+
export_kind = "class" if kind_kw in ("class", "@interface") else kind_kw
|
|
1050
|
+
exports.append(ExportRecord(
|
|
1051
|
+
name=name,
|
|
1052
|
+
kind=export_kind,
|
|
1053
|
+
annotations=annotations,
|
|
1054
|
+
extends=extends_str.strip() if extends_str else None,
|
|
1055
|
+
implements=implements_list,
|
|
1056
|
+
))
|
|
1057
|
+
types.append(TypeDefinition(
|
|
1058
|
+
name=name,
|
|
1059
|
+
kind=export_kind,
|
|
1060
|
+
fields=[],
|
|
1061
|
+
extends=all_extends,
|
|
1062
|
+
))
|
|
1063
|
+
class_names.add(name)
|
|
1064
|
+
|
|
1065
|
+
# Pass 3: public methods with their preceding annotations
|
|
1066
|
+
for m in _JAVA_PUB_METHOD_LINE_RE.finditer(source):
|
|
1067
|
+
sig_text = m.group(0).strip()
|
|
1068
|
+
mname = m.group(1)
|
|
1069
|
+
if (mname in class_names or mname in seen_methods
|
|
1070
|
+
or mname in {"if", "for", "while", "switch", "return", "new"}):
|
|
1071
|
+
continue
|
|
1072
|
+
seen_methods.add(mname)
|
|
1073
|
+
# Find line index for this match to collect preceding annotations
|
|
1074
|
+
line_start = source.count("\n", 0, m.start())
|
|
1075
|
+
annotations = _java_collect_preceding_annotations(lines, line_start)
|
|
1076
|
+
exports.append(ExportRecord(
|
|
1077
|
+
name=mname,
|
|
1078
|
+
kind="method",
|
|
1079
|
+
annotations=annotations,
|
|
1080
|
+
signature=sig_text,
|
|
1081
|
+
))
|
|
1082
|
+
functions.append(FunctionSignature(
|
|
1083
|
+
name=mname,
|
|
1084
|
+
signature=sig_text,
|
|
1085
|
+
async_=False,
|
|
1086
|
+
exported=True,
|
|
1087
|
+
return_type=None,
|
|
1088
|
+
))
|
|
1089
|
+
|
|
1090
|
+
# External deps: top-2 package segments, skip java.* / javax.*
|
|
1091
|
+
deps = sorted({
|
|
1092
|
+
".".join(imp.source.split(".")[:2])
|
|
1093
|
+
for imp in imports
|
|
1094
|
+
if not imp.source.startswith("java.") and not imp.source.startswith("javax.")
|
|
1095
|
+
and len(imp.source.split(".")) >= 2
|
|
1096
|
+
})
|
|
1097
|
+
|
|
1098
|
+
return FileContract(
|
|
1099
|
+
path=path,
|
|
1100
|
+
language="java",
|
|
1101
|
+
exports=exports,
|
|
1102
|
+
imports=sorted(imports, key=lambda i: i.source)[:30],
|
|
1103
|
+
functions=sorted(functions, key=lambda f: f.name)[:20],
|
|
1104
|
+
types=sorted(types, key=lambda t: t.name),
|
|
1105
|
+
dependencies=deps[:20],
|
|
1106
|
+
autowired_fields=autowired_fields[:20],
|
|
1107
|
+
extraction_method="heuristic",
|
|
1108
|
+
)
|
|
1109
|
+
|
|
1110
|
+
|
|
941
1111
|
# ---------------------------------------------------------------------------
|
|
942
1112
|
# Role detection
|
|
943
1113
|
# ---------------------------------------------------------------------------
|
|
@@ -1048,6 +1218,8 @@ class AstExtractor:
|
|
|
1048
1218
|
|
|
1049
1219
|
if language == "python":
|
|
1050
1220
|
contract = _extract_python(rel_path, source)
|
|
1221
|
+
elif language == "java":
|
|
1222
|
+
contract = _extract_java(rel_path, source)
|
|
1051
1223
|
else:
|
|
1052
1224
|
if self._ensure_ts():
|
|
1053
1225
|
lang_obj = _get_ts_lang(language)
|
sourcecode/cli.py
CHANGED
|
@@ -728,15 +728,13 @@ def main(
|
|
|
728
728
|
mode = "contract" # unknown → safe default
|
|
729
729
|
|
|
730
730
|
# Legacy flags imply raw mode unless --mode was explicitly overridden.
|
|
731
|
-
#
|
|
732
|
-
#
|
|
733
|
-
#
|
|
734
|
-
#
|
|
735
|
-
# force mode to raw. --agent is excluded: it now runs the contract pipeline
|
|
736
|
-
# and enriches contract_view with auto-enabled analyzers (deps, env, notes).
|
|
731
|
+
# --format yaml and --graph-modules are now compatible with contract_view:
|
|
732
|
+
# yaml is a serialization format (not an output-section flag)
|
|
733
|
+
# graph-modules output is included in contract_view when available
|
|
734
|
+
# Other flags that produce sections exclusive to standard_view still force raw.
|
|
737
735
|
_legacy_flags_active = (
|
|
738
|
-
compact or tree or
|
|
739
|
-
or docs or semantics or
|
|
736
|
+
compact or tree or trace_pipeline
|
|
737
|
+
or docs or semantics or full_metrics or architecture
|
|
740
738
|
)
|
|
741
739
|
if mode in ("contract", "standard") and _legacy_flags_active:
|
|
742
740
|
mode = "raw"
|
|
@@ -1106,6 +1104,17 @@ def main(
|
|
|
1106
1104
|
metrics_summary=metrics_summary,
|
|
1107
1105
|
)
|
|
1108
1106
|
|
|
1107
|
+
# Populate Java-specific root fields from java stack detection (FIX-6, 7, 8)
|
|
1108
|
+
_java_stack = next((s for s in stacks if s.stack == "java"), None)
|
|
1109
|
+
if _java_stack is not None:
|
|
1110
|
+
from dataclasses import replace as _dc_replace
|
|
1111
|
+
sm = _dc_replace(sm,
|
|
1112
|
+
packaging=getattr(_java_stack, "packaging", None) or None,
|
|
1113
|
+
language_version=getattr(_java_stack, "language_version", None) or None,
|
|
1114
|
+
spring_profiles=getattr(_java_stack, "spring_profiles", []) or [],
|
|
1115
|
+
app_server_hint=getattr(_java_stack, "app_server_hint", None) or None,
|
|
1116
|
+
)
|
|
1117
|
+
|
|
1109
1118
|
# Semantic analysis (--semantics flag)
|
|
1110
1119
|
if semantic_analyzer is not None:
|
|
1111
1120
|
if workspace_analysis.workspaces:
|
|
@@ -1270,10 +1279,24 @@ def main(
|
|
|
1270
1279
|
and d.scope not in {"dev"}
|
|
1271
1280
|
]
|
|
1272
1281
|
|
|
1273
|
-
|
|
1282
|
+
_JAVA_SEMANTIC_PRIORITY: dict[str, int] = {
|
|
1283
|
+
"spring-boot": 0, "spring-security": 1, "mybatis": 2,
|
|
1284
|
+
"poi": 3, "pdfbox": 4, "jackson": 5, "jjwt": 6,
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
def _java_priority(d: Any) -> int:
|
|
1288
|
+
if d.ecosystem != "java":
|
|
1289
|
+
return 99
|
|
1290
|
+
art = (d.name.split(":")[-1] if ":" in d.name else d.name).lower()
|
|
1291
|
+
for key, pri in _JAVA_SEMANTIC_PRIORITY.items():
|
|
1292
|
+
if key in art:
|
|
1293
|
+
return pri
|
|
1294
|
+
return 50
|
|
1295
|
+
|
|
1296
|
+
def _dep_sort_key(d: Any) -> tuple[int, int, int, str]:
|
|
1274
1297
|
role_order = _ROLE_PRIORITY.get(d.role or "runtime", 5)
|
|
1275
1298
|
eco_order = 0 if d.ecosystem == primary_ecosystem else 1
|
|
1276
|
-
return (role_order, eco_order, d.name.lower())
|
|
1299
|
+
return (role_order, eco_order, _java_priority(d), d.name.lower())
|
|
1277
1300
|
|
|
1278
1301
|
_seen_dep_names: set[str] = set()
|
|
1279
1302
|
_deduped_deps: list[Any] = []
|
|
@@ -1281,7 +1304,7 @@ def main(
|
|
|
1281
1304
|
if d.name not in _seen_dep_names:
|
|
1282
1305
|
_seen_dep_names.add(d.name)
|
|
1283
1306
|
_deduped_deps.append(d)
|
|
1284
|
-
sm.key_dependencies = _deduped_deps
|
|
1307
|
+
sm.key_dependencies = _deduped_deps # no cap — all direct deps included
|
|
1285
1308
|
|
|
1286
1309
|
# LQN-02: deterministic NL summary
|
|
1287
1310
|
sm.project_summary = ProjectSummarizer(target).generate(sm)
|
|
@@ -1384,10 +1407,20 @@ def main(
|
|
|
1384
1407
|
|
|
1385
1408
|
# Contract pipeline — runs for mode=contract|standard|deep|hybrid (skip for raw)
|
|
1386
1409
|
_is_contract_mode = mode in ("contract", "standard")
|
|
1410
|
+
_pipeline_error = False
|
|
1387
1411
|
if _is_contract_mode:
|
|
1388
1412
|
from sourcecode.contract_pipeline import ContractPipeline
|
|
1389
1413
|
from sourcecode.contract_model import ContractSummary as _ContractSummary
|
|
1390
|
-
|
|
1414
|
+
# FIX-1: Java projects need higher caps — many files, comprehensive coverage required
|
|
1415
|
+
_jvm_stacks = {"java", "kotlin", "scala", "groovy"}
|
|
1416
|
+
_is_jvm = any(s.stack in _jvm_stacks for s in sm.stacks)
|
|
1417
|
+
# FIX-1: Java projects need higher caps and no relevance threshold
|
|
1418
|
+
_max_files_cp = 2500 if _is_jvm else 500
|
|
1419
|
+
_cp = ContractPipeline(max_files=_max_files_cp)
|
|
1420
|
+
_java_pipeline_kwargs: dict = {}
|
|
1421
|
+
if _is_jvm:
|
|
1422
|
+
_java_pipeline_kwargs["max_contracts"] = 500
|
|
1423
|
+
_java_pipeline_kwargs["min_score"] = 0.0
|
|
1391
1424
|
try:
|
|
1392
1425
|
_contracts, _contract_summary = _cp.run(
|
|
1393
1426
|
target,
|
|
@@ -1405,9 +1438,11 @@ def main(
|
|
|
1405
1438
|
max_importers=max_importers,
|
|
1406
1439
|
semantic_calls=sm.semantic_calls or None,
|
|
1407
1440
|
code_notes=sm.code_notes or None,
|
|
1441
|
+
**_java_pipeline_kwargs,
|
|
1408
1442
|
)
|
|
1409
1443
|
except Exception as _exc:
|
|
1410
1444
|
typer.echo(f"[error] contract pipeline failed: {_exc}", err=True)
|
|
1445
|
+
_pipeline_error = True
|
|
1411
1446
|
_contracts = []
|
|
1412
1447
|
_contract_summary = _ContractSummary(
|
|
1413
1448
|
mode=mode,
|
|
@@ -1446,7 +1481,22 @@ def main(
|
|
|
1446
1481
|
data = _contract_view(sm, emit_graph=emit_graph, depth=_depth)
|
|
1447
1482
|
if not no_redact:
|
|
1448
1483
|
data = redact_dict(data)
|
|
1449
|
-
|
|
1484
|
+
if format == "yaml":
|
|
1485
|
+
from io import StringIO
|
|
1486
|
+
from ruamel.yaml import YAML as _YAML
|
|
1487
|
+
_yaml = _YAML()
|
|
1488
|
+
_yaml.default_flow_style = False
|
|
1489
|
+
_yaml.representer.add_representer(
|
|
1490
|
+
type(None),
|
|
1491
|
+
lambda dumper, data_val: dumper.represent_scalar(
|
|
1492
|
+
"tag:yaml.org,2002:null", "null"
|
|
1493
|
+
),
|
|
1494
|
+
)
|
|
1495
|
+
_stream = StringIO()
|
|
1496
|
+
_yaml.dump(data, _stream)
|
|
1497
|
+
content = _stream.getvalue()
|
|
1498
|
+
else:
|
|
1499
|
+
content = json.dumps(data, indent=2, ensure_ascii=False)
|
|
1450
1500
|
elif agent:
|
|
1451
1501
|
data = agent_view(sm)
|
|
1452
1502
|
# When contract pipeline ran (mode=contract, no legacy flags), include
|
|
@@ -1518,6 +1568,9 @@ def main(
|
|
|
1518
1568
|
# 6. Write output (CLI-04)
|
|
1519
1569
|
write_output(content, output=output)
|
|
1520
1570
|
|
|
1571
|
+
if _pipeline_error:
|
|
1572
|
+
raise typer.Exit(code=2)
|
|
1573
|
+
|
|
1521
1574
|
# 7. Clipboard copy (--copy / -c)
|
|
1522
1575
|
if copy and output is None:
|
|
1523
1576
|
_trimmed = content.strip()
|
sourcecode/contract_model.py
CHANGED
|
@@ -25,9 +25,13 @@ class ExportRecord:
|
|
|
25
25
|
"""Exported symbol."""
|
|
26
26
|
|
|
27
27
|
name: str
|
|
28
|
-
kind: str = "unknown" # function | class | const | type | default | react_component | enum | interface
|
|
28
|
+
kind: str = "unknown" # function | class | const | type | default | react_component | enum | interface | method
|
|
29
29
|
type_ref: Optional[str] = None
|
|
30
30
|
async_: bool = False
|
|
31
|
+
annotations: list[str] = field(default_factory=list) # Java: ["@Controller", "@Transactional"]
|
|
32
|
+
extends: Optional[str] = None # Java: parent class
|
|
33
|
+
implements: list[str] = field(default_factory=list) # Java: interfaces
|
|
34
|
+
signature: Optional[str] = None # Java method: full signature
|
|
31
35
|
|
|
32
36
|
|
|
33
37
|
@dataclass
|
|
@@ -96,6 +100,8 @@ class FileContract:
|
|
|
96
100
|
# Extraction quality
|
|
97
101
|
extraction_method: str = "heuristic" # ast | tree_sitter | heuristic
|
|
98
102
|
limitations: list[str] = field(default_factory=list)
|
|
103
|
+
# Java-specific (FIX-1)
|
|
104
|
+
autowired_fields: list[dict] = field(default_factory=list) # [{"type": "...", "name": "..."}]
|
|
99
105
|
|
|
100
106
|
|
|
101
107
|
@dataclass
|
sourcecode/contract_pipeline.py
CHANGED
|
@@ -182,6 +182,7 @@ class ContractPipeline:
|
|
|
182
182
|
semantic_calls: Optional[list] = None,
|
|
183
183
|
code_notes: Optional[list] = None,
|
|
184
184
|
max_contracts: Optional[int] = _MAX_CONTRACTS,
|
|
185
|
+
min_score: Optional[float] = None,
|
|
185
186
|
) -> tuple[list[FileContract], ContractSummary]:
|
|
186
187
|
"""Run the full extraction pipeline.
|
|
187
188
|
|
|
@@ -317,11 +318,17 @@ class ContractPipeline:
|
|
|
317
318
|
# 10. Top-N cap — enforce max_contracts when not in symbol-search mode.
|
|
318
319
|
# Symbol searches must return all matching files; budget applies only to
|
|
319
320
|
# the default architectural briefing use case.
|
|
321
|
+
_effective_min_score = min_score if min_score is not None else _MIN_CONTRACT_SCORE
|
|
320
322
|
if symbol is None and max_contracts is not None:
|
|
321
323
|
contracts = [
|
|
322
324
|
c for c in contracts
|
|
323
|
-
if c.relevance_score >=
|
|
325
|
+
if c.relevance_score >= _effective_min_score or c.is_entrypoint
|
|
324
326
|
][:max_contracts]
|
|
327
|
+
elif symbol is None and max_contracts is None:
|
|
328
|
+
contracts = [
|
|
329
|
+
c for c in contracts
|
|
330
|
+
if c.relevance_score >= _effective_min_score or c.is_entrypoint
|
|
331
|
+
]
|
|
325
332
|
|
|
326
333
|
# 11. Compress types if requested
|
|
327
334
|
if compress_types:
|
|
@@ -370,7 +377,15 @@ class ContractPipeline:
|
|
|
370
377
|
"""
|
|
371
378
|
candidates = _find_symbol_files(root, symbol, known_paths, engine)
|
|
372
379
|
if not candidates:
|
|
373
|
-
return []
|
|
380
|
+
return [], {
|
|
381
|
+
"symbol": symbol,
|
|
382
|
+
"definers_found": 0,
|
|
383
|
+
"importers_found": 0,
|
|
384
|
+
"importers_returned": 0,
|
|
385
|
+
"references_found": 0,
|
|
386
|
+
"total_returned": 0,
|
|
387
|
+
"truncated": False,
|
|
388
|
+
}
|
|
374
389
|
|
|
375
390
|
extra: list[FileContract] = []
|
|
376
391
|
for rel_path in candidates[:300]: # cap to prevent excessive extraction
|
|
@@ -577,7 +592,7 @@ def _find_symbol_files(
|
|
|
577
592
|
"grep", "-rl",
|
|
578
593
|
"--include=*.ts", "--include=*.tsx",
|
|
579
594
|
"--include=*.js", "--include=*.jsx",
|
|
580
|
-
"--include=*.py",
|
|
595
|
+
"--include=*.py", "--include=*.java",
|
|
581
596
|
symbol, ".",
|
|
582
597
|
],
|
|
583
598
|
cwd=str(root),
|
|
@@ -127,6 +127,24 @@ def _infer_role(name: str, ecosystem: str, scope: str) -> str:
|
|
|
127
127
|
return "infra"
|
|
128
128
|
return "runtime"
|
|
129
129
|
|
|
130
|
+
if ecosystem == "java":
|
|
131
|
+
if scope == "provided":
|
|
132
|
+
return "provided"
|
|
133
|
+
artifact = n.split(":")[-1] if ":" in n else n
|
|
134
|
+
if any(x in artifact for x in ("spring-boot", "spring-security")):
|
|
135
|
+
return "runtime"
|
|
136
|
+
if any(x in artifact for x in ("spring-web", "spring-mvc", "spring-core", "spring-context")):
|
|
137
|
+
return "runtime"
|
|
138
|
+
if any(x in artifact for x in ("mybatis", "hibernate", "jpa", "druid", "datasource")):
|
|
139
|
+
return "infra"
|
|
140
|
+
if any(x in artifact for x in ("jackson", "gson", "fastjson")):
|
|
141
|
+
return "serialization"
|
|
142
|
+
if any(x in artifact for x in ("poi", "pdfbox", "itext", "openpdf")):
|
|
143
|
+
return "parsing"
|
|
144
|
+
if any(x in artifact for x in ("jjwt", "nimbus-jose")):
|
|
145
|
+
return "runtime"
|
|
146
|
+
return "devtool" if is_dev else "runtime"
|
|
147
|
+
|
|
130
148
|
return "devtool" if is_dev else "runtime"
|
|
131
149
|
|
|
132
150
|
|
|
@@ -1104,6 +1122,23 @@ class DependencyAnalyzer:
|
|
|
1104
1122
|
properties = self._parse_maven_properties(root_elem, ns)
|
|
1105
1123
|
dm_versions = self._parse_dependency_management(root_elem, ns, properties)
|
|
1106
1124
|
|
|
1125
|
+
# FIX-9: extract parent version for BOM resolution
|
|
1126
|
+
parent_elem = root_elem.find(f"{ns}parent")
|
|
1127
|
+
parent_version: Optional[str] = None
|
|
1128
|
+
parent_group: str = ""
|
|
1129
|
+
if parent_elem is not None:
|
|
1130
|
+
parent_version = (parent_elem.findtext(f"{ns}version") or "").strip() or None
|
|
1131
|
+
parent_group = (parent_elem.findtext(f"{ns}groupId") or "").strip()
|
|
1132
|
+
parent_artifact = (parent_elem.findtext(f"{ns}artifactId") or "").strip()
|
|
1133
|
+
# Propagate parent version into properties for ${project.parent.version}
|
|
1134
|
+
if parent_version:
|
|
1135
|
+
properties.setdefault("project.parent.version", parent_version)
|
|
1136
|
+
properties.setdefault("revision", parent_version)
|
|
1137
|
+
|
|
1138
|
+
# Infer packaging for FIX-6 (used by scope hint for provided)
|
|
1139
|
+
packaging_elem = root_elem.find(f"{ns}packaging")
|
|
1140
|
+
is_war = packaging_elem is not None and (packaging_elem.text or "").strip().lower() == "war"
|
|
1141
|
+
|
|
1107
1142
|
records: list[DependencyRecord] = []
|
|
1108
1143
|
deps_elem = root_elem.find(f"{ns}dependencies")
|
|
1109
1144
|
if deps_elem is None:
|
|
@@ -1118,14 +1153,36 @@ class DependencyAnalyzer:
|
|
|
1118
1153
|
declared = self._resolve_maven_version(version_raw, properties)
|
|
1119
1154
|
if declared is None:
|
|
1120
1155
|
declared = dm_versions.get(f"{group_id}:{artifact_id}")
|
|
1156
|
+
|
|
1157
|
+
# FIX-4: proper maven scope mapping
|
|
1121
1158
|
scope_text = (dep.findtext(f"{ns}scope") or "compile").strip().lower()
|
|
1122
|
-
|
|
1159
|
+
if scope_text == "test":
|
|
1160
|
+
scope = "dev"
|
|
1161
|
+
elif scope_text == "provided":
|
|
1162
|
+
scope = "provided"
|
|
1163
|
+
else:
|
|
1164
|
+
scope = "direct" # compile, runtime, system, import
|
|
1165
|
+
|
|
1166
|
+
# FIX-4: infer provided for embedded tomcat in WAR projects
|
|
1167
|
+
if (is_war and scope == "direct"
|
|
1168
|
+
and artifact_id in ("spring-boot-starter-tomcat", "tomcat-embed-core")):
|
|
1169
|
+
scope = "provided"
|
|
1170
|
+
|
|
1171
|
+
# FIX-9: resolve BOM version for Spring Boot / Spring Security starters
|
|
1172
|
+
resolved_version: Optional[str] = None
|
|
1173
|
+
if declared is None and parent_version:
|
|
1174
|
+
if group_id == "org.springframework.boot":
|
|
1175
|
+
resolved_version = parent_version
|
|
1176
|
+
elif group_id == "org.springframework.security" and "spring-security.version" in properties:
|
|
1177
|
+
resolved_version = properties["spring-security.version"]
|
|
1178
|
+
|
|
1123
1179
|
records.append(
|
|
1124
1180
|
DependencyRecord(
|
|
1125
1181
|
name=f"{group_id}:{artifact_id}",
|
|
1126
1182
|
ecosystem="java",
|
|
1127
1183
|
scope=scope,
|
|
1128
1184
|
declared_version=declared,
|
|
1185
|
+
resolved_version=resolved_version,
|
|
1129
1186
|
source="manifest",
|
|
1130
1187
|
manifest_path="pom.xml",
|
|
1131
1188
|
)
|
sourcecode/detectors/java.py
CHANGED
|
@@ -14,6 +14,8 @@ from sourcecode.detectors.parsers import read_text_lines, unique_strings
|
|
|
14
14
|
from sourcecode.schema import FrameworkDetection
|
|
15
15
|
from sourcecode.tree_utils import flatten_file_tree
|
|
16
16
|
|
|
17
|
+
_NS_TAG_RE = re.compile(r"\{[^}]+\}")
|
|
18
|
+
|
|
17
19
|
_MAX_FILE_SIZE = 256 * 1024 # 256 KB
|
|
18
20
|
_MAX_JAVA_ENTRY_SCAN = 1000
|
|
19
21
|
_MAX_ANNOTATION_ENTRY_POINTS = 500
|
|
@@ -50,14 +52,34 @@ class JavaDetector(AbstractDetector):
|
|
|
50
52
|
def detect(self, context: DetectionContext) -> tuple[list[StackDetection], list[EntryPoint]]:
|
|
51
53
|
frameworks: list[FrameworkDetection] = []
|
|
52
54
|
manifests: list[str] = []
|
|
55
|
+
language_version: str | None = None
|
|
56
|
+
packaging: str | None = None
|
|
57
|
+
app_server_hint: str | None = None
|
|
58
|
+
spring_profiles: list[str] = []
|
|
53
59
|
|
|
54
60
|
if "pom.xml" in context.manifests:
|
|
55
61
|
manifests.append("pom.xml")
|
|
56
|
-
|
|
62
|
+
pom_path = context.root / "pom.xml"
|
|
63
|
+
frameworks.extend(self._frameworks_from_pom(pom_path))
|
|
64
|
+
meta = self._parse_pom_metadata(pom_path)
|
|
65
|
+
if meta.get("language_version"):
|
|
66
|
+
language_version = meta["language_version"]
|
|
67
|
+
if meta.get("packaging"):
|
|
68
|
+
packaging = meta["packaging"]
|
|
57
69
|
if "build.gradle" in context.manifests:
|
|
58
70
|
manifests.append("build.gradle")
|
|
59
71
|
frameworks.extend(self._frameworks_from_gradle(context.root / "build.gradle"))
|
|
60
72
|
|
|
73
|
+
# Detect app server from descriptor files
|
|
74
|
+
all_paths = flatten_file_tree(context.file_tree)
|
|
75
|
+
if any("weblogic.xml" in p or "weblogic-ejb-jar.xml" in p for p in all_paths):
|
|
76
|
+
app_server_hint = "weblogic"
|
|
77
|
+
elif any("wildfly" in p.lower() or "jboss" in p.lower() for p in all_paths):
|
|
78
|
+
app_server_hint = "wildfly"
|
|
79
|
+
|
|
80
|
+
# Spring profiles — check src/main/options/, src/main/resources/
|
|
81
|
+
spring_profiles = self._detect_spring_profiles(context.root, all_paths)
|
|
82
|
+
|
|
61
83
|
entry_points = self._collect_entry_points(context)
|
|
62
84
|
stack = StackDetection(
|
|
63
85
|
stack="java",
|
|
@@ -65,27 +87,124 @@ class JavaDetector(AbstractDetector):
|
|
|
65
87
|
confidence="high",
|
|
66
88
|
frameworks=self._dedupe_frameworks(frameworks),
|
|
67
89
|
manifests=manifests,
|
|
90
|
+
language_version=language_version,
|
|
91
|
+
packaging=packaging,
|
|
92
|
+
app_server_hint=app_server_hint,
|
|
93
|
+
spring_profiles=spring_profiles,
|
|
68
94
|
)
|
|
69
95
|
return [stack], entry_points
|
|
70
96
|
|
|
97
|
+
def _parse_pom_metadata(self, path: Path) -> dict:
|
|
98
|
+
"""Extract packaging, java version from pom.xml properties/parent."""
|
|
99
|
+
result: dict = {}
|
|
100
|
+
try:
|
|
101
|
+
tree = ElementTree.parse(path)
|
|
102
|
+
except (OSError, ElementTree.ParseError):
|
|
103
|
+
return result
|
|
104
|
+
root = tree.getroot()
|
|
105
|
+
ns_match = _NS_TAG_RE.match(root.tag)
|
|
106
|
+
ns = ns_match.group(0) if ns_match else ""
|
|
107
|
+
|
|
108
|
+
# Packaging (FIX-6)
|
|
109
|
+
packaging_elem = root.find(f"{ns}packaging")
|
|
110
|
+
if packaging_elem is not None and packaging_elem.text:
|
|
111
|
+
result["packaging"] = packaging_elem.text.strip().lower()
|
|
112
|
+
|
|
113
|
+
# Properties
|
|
114
|
+
props_elem = root.find(f"{ns}properties")
|
|
115
|
+
props: dict[str, str] = {}
|
|
116
|
+
if props_elem is not None:
|
|
117
|
+
for prop in props_elem:
|
|
118
|
+
tag = prop.tag.replace(ns, "") if ns else prop.tag
|
|
119
|
+
if prop.text:
|
|
120
|
+
props[tag] = prop.text.strip()
|
|
121
|
+
|
|
122
|
+
# Java version (FIX-7) — check properties first, then compiler plugin
|
|
123
|
+
for key in ("maven.compiler.source", "java.version", "maven.compiler.release"):
|
|
124
|
+
if key in props:
|
|
125
|
+
result["language_version"] = props[key]
|
|
126
|
+
break
|
|
127
|
+
if "language_version" not in result:
|
|
128
|
+
# Check maven-compiler-plugin configuration
|
|
129
|
+
for plugin in root.findall(f".//{ns}plugin"):
|
|
130
|
+
artifact = (plugin.findtext(f"{ns}artifactId") or "").strip()
|
|
131
|
+
if artifact == "maven-compiler-plugin":
|
|
132
|
+
config = plugin.find(f"{ns}configuration")
|
|
133
|
+
if config is not None:
|
|
134
|
+
for tag in ("source", "release"):
|
|
135
|
+
val = config.findtext(f"{ns}{tag}")
|
|
136
|
+
if val:
|
|
137
|
+
result["language_version"] = val.strip()
|
|
138
|
+
break
|
|
139
|
+
break
|
|
140
|
+
|
|
141
|
+
return result
|
|
142
|
+
|
|
143
|
+
def _detect_spring_profiles(self, root: Path, all_paths: list[str]) -> list[str]:
|
|
144
|
+
"""Detect Spring profiles from option/resource directories and application-{profile}.yml."""
|
|
145
|
+
profiles: list[str] = []
|
|
146
|
+
seen: set[str] = set()
|
|
147
|
+
|
|
148
|
+
# Pattern 1: src/main/options/{profile}/ directories
|
|
149
|
+
_PROFILE_DIRS = ("src/main/options/", "src/main/resources/")
|
|
150
|
+
for path in all_paths:
|
|
151
|
+
for prefix in _PROFILE_DIRS:
|
|
152
|
+
if path.startswith(prefix):
|
|
153
|
+
remainder = path[len(prefix):]
|
|
154
|
+
parts = remainder.split("/")
|
|
155
|
+
if len(parts) >= 1 and parts[0] and not parts[0].startswith("."):
|
|
156
|
+
candidate = parts[0]
|
|
157
|
+
# Only if it's a directory (has sub-paths) with application.yml
|
|
158
|
+
if candidate not in seen and not candidate.endswith(".yml") and not candidate.endswith(".yaml") and not candidate.endswith(".properties"):
|
|
159
|
+
seen.add(candidate)
|
|
160
|
+
profiles.append(candidate)
|
|
161
|
+
break
|
|
162
|
+
|
|
163
|
+
# Pattern 2: application-{profile}.yml files
|
|
164
|
+
_APP_PROFILE_RE = re.compile(r"application-([A-Za-z0-9_-]+)\.ya?ml$")
|
|
165
|
+
for path in all_paths:
|
|
166
|
+
m = _APP_PROFILE_RE.search(path)
|
|
167
|
+
if m:
|
|
168
|
+
profile = m.group(1)
|
|
169
|
+
if profile not in seen:
|
|
170
|
+
seen.add(profile)
|
|
171
|
+
profiles.append(profile)
|
|
172
|
+
|
|
173
|
+
# Filter out generic names that aren't profiles
|
|
174
|
+
_SKIP = frozenset({"test", "it", "integration"})
|
|
175
|
+
return [p for p in profiles if p.lower() not in _SKIP]
|
|
176
|
+
|
|
71
177
|
def _frameworks_from_pom(self, path: Path) -> list[FrameworkDetection]:
|
|
72
178
|
try:
|
|
73
179
|
tree = ElementTree.parse(path)
|
|
74
180
|
except (OSError, ElementTree.ParseError):
|
|
75
181
|
return []
|
|
76
|
-
|
|
77
|
-
|
|
182
|
+
root_elem = tree.getroot()
|
|
183
|
+
ns_match = _NS_TAG_RE.match(root_elem.tag)
|
|
184
|
+
ns = ns_match.group(0) if ns_match else ""
|
|
185
|
+
|
|
186
|
+
# Extract Spring Boot version from <parent> (FIX-3)
|
|
187
|
+
sb_version: str | None = None
|
|
188
|
+
parent_elem = root_elem.find(f"{ns}parent")
|
|
189
|
+
if parent_elem is not None:
|
|
190
|
+
parent_artifact = (parent_elem.findtext(f"{ns}artifactId") or "").strip()
|
|
191
|
+
if parent_artifact == "spring-boot-starter-parent":
|
|
192
|
+
sb_version = (parent_elem.findtext(f"{ns}version") or "").strip() or None
|
|
193
|
+
|
|
194
|
+
text = ElementTree.tostring(root_elem, encoding="unicode").lower()
|
|
195
|
+
frameworks = self._detect_jvm_frameworks(text, "pom.xml", sb_version=sb_version)
|
|
196
|
+
return frameworks
|
|
78
197
|
|
|
79
198
|
def _frameworks_from_gradle(self, path: Path) -> list[FrameworkDetection]:
|
|
80
199
|
content = "\n".join(read_text_lines(path)).lower()
|
|
81
200
|
return self._detect_jvm_frameworks(content, "build.gradle")
|
|
82
201
|
|
|
83
|
-
def _detect_jvm_frameworks(self, text: str, source: str) -> list[FrameworkDetection]:
|
|
202
|
+
def _detect_jvm_frameworks(self, text: str, source: str, *, sb_version: str | None = None) -> list[FrameworkDetection]:
|
|
84
203
|
frameworks: list[FrameworkDetection] = []
|
|
85
204
|
if "com.android.application" in text or "com.android.library" in text:
|
|
86
205
|
frameworks.append(FrameworkDetection(name="Android", source=source))
|
|
87
206
|
if "spring-boot" in text:
|
|
88
|
-
frameworks.append(FrameworkDetection(name="Spring Boot", source=source))
|
|
207
|
+
frameworks.append(FrameworkDetection(name="Spring Boot", source=source, version=sb_version))
|
|
89
208
|
if "spring-webmvc" in text or "spring-web" in text:
|
|
90
209
|
frameworks.append(FrameworkDetection(name="Spring MVC", source=source))
|
|
91
210
|
if "spring-webflux" in text:
|