sourcecode 1.51.0__py3-none-any.whl → 1.53.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/cli.py +390 -1
- sourcecode/format_contract.py +1 -0
- sourcecode/integration_detector.py +163 -0
- sourcecode/repository_ir.py +73 -6
- sourcecode/spring_impact.py +27 -1
- {sourcecode-1.51.0.dist-info → sourcecode-1.53.0.dist-info}/METADATA +25 -4
- {sourcecode-1.51.0.dist-info → sourcecode-1.53.0.dist-info}/RECORD +11 -10
- {sourcecode-1.51.0.dist-info → sourcecode-1.53.0.dist-info}/WHEEL +0 -0
- {sourcecode-1.51.0.dist-info → sourcecode-1.53.0.dist-info}/entry_points.txt +0 -0
- {sourcecode-1.51.0.dist-info → sourcecode-1.53.0.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/cli.py
CHANGED
|
@@ -219,7 +219,7 @@ _HELP = _build_help_text()
|
|
|
219
219
|
_SUBCOMMANDS: frozenset[str] = frozenset(
|
|
220
220
|
{
|
|
221
221
|
"telemetry", "prepare-context", "version", "config",
|
|
222
|
-
"repo-ir", "mcp", "endpoints", "impact",
|
|
222
|
+
"repo-ir", "mcp", "endpoints", "impact", "export",
|
|
223
223
|
# Enterprise workflow commands
|
|
224
224
|
"onboard", "modernize", "fix-bug", "review-pr",
|
|
225
225
|
# License / auth
|
|
@@ -3804,6 +3804,30 @@ def impact_cmd(
|
|
|
3804
3804
|
# canonical single-source-of-truth endpoint extractor.
|
|
3805
3805
|
|
|
3806
3806
|
|
|
3807
|
+
def _group_endpoints_by_controller(endpoints: "list[dict]") -> "dict":
|
|
3808
|
+
"""Group endpoints by their controller FQN into a structured API surface.
|
|
3809
|
+
|
|
3810
|
+
Returns ``{"by_controller": {fqn: [{method, path, return_type}, ...]},
|
|
3811
|
+
"controller_count": int, "total": int}``. Controllers and their routes are
|
|
3812
|
+
ordered deterministically (controllers by name, routes by path then method).
|
|
3813
|
+
"""
|
|
3814
|
+
by_ctrl: "dict[str, list[dict]]" = {}
|
|
3815
|
+
for ep in endpoints:
|
|
3816
|
+
ctrl = ep.get("controller", "") or "<unknown>"
|
|
3817
|
+
by_ctrl.setdefault(ctrl, []).append({
|
|
3818
|
+
"method": ep.get("method", ""),
|
|
3819
|
+
"path": ep.get("path", ""),
|
|
3820
|
+
"return_type": ep.get("return_type", "void"),
|
|
3821
|
+
})
|
|
3822
|
+
for ctrl in by_ctrl:
|
|
3823
|
+
by_ctrl[ctrl].sort(key=lambda r: (r["path"], r["method"]))
|
|
3824
|
+
ordered = {k: by_ctrl[k] for k in sorted(by_ctrl)}
|
|
3825
|
+
return {
|
|
3826
|
+
"by_controller": ordered,
|
|
3827
|
+
"controller_count": len(ordered),
|
|
3828
|
+
"total": len(endpoints),
|
|
3829
|
+
}
|
|
3830
|
+
|
|
3807
3831
|
|
|
3808
3832
|
@app.command("endpoints")
|
|
3809
3833
|
def endpoints_cmd(
|
|
@@ -3844,6 +3868,10 @@ def endpoints_cmd(
|
|
|
3844
3868
|
False, "--no-cache",
|
|
3845
3869
|
help="Accepted for compatibility; this command always reads fresh source (no snapshot cache). No-op.",
|
|
3846
3870
|
),
|
|
3871
|
+
by_controller: bool = typer.Option(
|
|
3872
|
+
False, "--by-controller",
|
|
3873
|
+
help="Group endpoints by controller class (structured API surface for C4/Container synthesis).",
|
|
3874
|
+
),
|
|
3847
3875
|
) -> None:
|
|
3848
3876
|
"""Extract REST API endpoint surface from Java source files.
|
|
3849
3877
|
|
|
@@ -3929,6 +3957,11 @@ def endpoints_cmd(
|
|
|
3929
3957
|
"undocumented_before_filter": _undoc_before,
|
|
3930
3958
|
}
|
|
3931
3959
|
|
|
3960
|
+
if by_controller:
|
|
3961
|
+
_grouped = _group_endpoints_by_controller(data.get("endpoints", []))
|
|
3962
|
+
data["by_controller"] = _grouped["by_controller"]
|
|
3963
|
+
data["controller_count"] = _grouped["controller_count"]
|
|
3964
|
+
|
|
3932
3965
|
output = _serialize_dict(data, format)
|
|
3933
3966
|
|
|
3934
3967
|
_emit_command_output(output, output_path, copy,
|
|
@@ -3938,6 +3971,362 @@ def endpoints_cmd(
|
|
|
3938
3971
|
_nudge()
|
|
3939
3972
|
|
|
3940
3973
|
|
|
3974
|
+
# ── export ──────────────────────────────────────────────────────────────────
|
|
3975
|
+
|
|
3976
|
+
def _group_symbols_by_directory(nodes: "list[dict]") -> "dict":
|
|
3977
|
+
"""Group repo-ir graph nodes by source directory with path:line refs.
|
|
3978
|
+
|
|
3979
|
+
Returns ``{dir: [{symbol, kind, ref}]}`` ordered deterministically (dirs by
|
|
3980
|
+
name, symbols by ref). ``ref`` is ``source_file:line`` when the line is known,
|
|
3981
|
+
otherwise just ``source_file``. This is the per-directory, file:line-anchored
|
|
3982
|
+
code-level export — the file:line-anchored input an architecture/code-map
|
|
3983
|
+
consumer needs to proceed by file write instead of per-directory LLM reads.
|
|
3984
|
+
"""
|
|
3985
|
+
import posixpath
|
|
3986
|
+
by_dir: "dict[str, list[dict]]" = {}
|
|
3987
|
+
for n in nodes:
|
|
3988
|
+
sf = n.get("source_file") or ""
|
|
3989
|
+
if not sf:
|
|
3990
|
+
continue
|
|
3991
|
+
d = posixpath.dirname(sf) or "."
|
|
3992
|
+
ln = n.get("line")
|
|
3993
|
+
ref = f"{sf}:{ln}" if ln else sf
|
|
3994
|
+
by_dir.setdefault(d, []).append({
|
|
3995
|
+
"symbol": n.get("canonical_name") or n.get("fqn"),
|
|
3996
|
+
"kind": n.get("symbol_kind"),
|
|
3997
|
+
"ref": ref,
|
|
3998
|
+
})
|
|
3999
|
+
for d in by_dir:
|
|
4000
|
+
by_dir[d].sort(key=lambda r: r["ref"])
|
|
4001
|
+
return {k: by_dir[k] for k in sorted(by_dir)}
|
|
4002
|
+
|
|
4003
|
+
|
|
4004
|
+
def _build_module_graph(nodes: "list[dict]", edges: "list[dict]") -> "dict":
|
|
4005
|
+
"""Roll class-level relation edges up into a module→module dependency graph.
|
|
4006
|
+
|
|
4007
|
+
A *module* is the source directory of a symbol (same grouping key as
|
|
4008
|
+
``--by-directory``), giving C4 a container/component-level dependency view.
|
|
4009
|
+
Every node FQN maps to its module; each edge whose endpoints resolve to two
|
|
4010
|
+
*different* modules contributes one inter-module dependency, aggregated by
|
|
4011
|
+
``(from, to)`` with a hit count and the set of underlying edge types.
|
|
4012
|
+
|
|
4013
|
+
Edges whose endpoints are not internal nodes (e.g. imports of external
|
|
4014
|
+
library types) are skipped — only resolvable, internal module→module
|
|
4015
|
+
dependencies are reported. Returns ``{nodes, edges, summary}`` deterministically.
|
|
4016
|
+
"""
|
|
4017
|
+
import posixpath
|
|
4018
|
+
|
|
4019
|
+
fqn_to_module: "dict[str, str]" = {}
|
|
4020
|
+
module_symbols: "dict[str, int]" = {}
|
|
4021
|
+
for n in nodes:
|
|
4022
|
+
sf = n.get("source_file") or ""
|
|
4023
|
+
if not sf:
|
|
4024
|
+
continue
|
|
4025
|
+
mod = posixpath.dirname(sf) or "."
|
|
4026
|
+
fqn = n.get("fqn")
|
|
4027
|
+
if fqn:
|
|
4028
|
+
fqn_to_module[fqn] = mod
|
|
4029
|
+
module_symbols[mod] = module_symbols.get(mod, 0) + 1
|
|
4030
|
+
|
|
4031
|
+
# (from_mod, to_mod) -> {"count": int, "types": set[str]}
|
|
4032
|
+
agg: "dict[tuple[str, str], dict]" = {}
|
|
4033
|
+
for e in edges:
|
|
4034
|
+
fm = fqn_to_module.get(e.get("from"))
|
|
4035
|
+
tm = fqn_to_module.get(e.get("to"))
|
|
4036
|
+
if fm is None or tm is None or fm == tm:
|
|
4037
|
+
continue
|
|
4038
|
+
slot = agg.setdefault((fm, tm), {"count": 0, "types": set()})
|
|
4039
|
+
slot["count"] += 1
|
|
4040
|
+
et = e.get("type")
|
|
4041
|
+
if et:
|
|
4042
|
+
slot["types"].add(et)
|
|
4043
|
+
|
|
4044
|
+
graph_edges = [
|
|
4045
|
+
{
|
|
4046
|
+
"from": fm,
|
|
4047
|
+
"to": tm,
|
|
4048
|
+
"count": slot["count"],
|
|
4049
|
+
"types": sorted(slot["types"]),
|
|
4050
|
+
}
|
|
4051
|
+
for (fm, tm), slot in sorted(agg.items())
|
|
4052
|
+
]
|
|
4053
|
+
out_deg: "dict[str, int]" = {}
|
|
4054
|
+
in_deg: "dict[str, int]" = {}
|
|
4055
|
+
for ed in graph_edges:
|
|
4056
|
+
out_deg[ed["from"]] = out_deg.get(ed["from"], 0) + 1
|
|
4057
|
+
in_deg[ed["to"]] = in_deg.get(ed["to"], 0) + 1
|
|
4058
|
+
graph_nodes = [
|
|
4059
|
+
{
|
|
4060
|
+
"module": mod,
|
|
4061
|
+
"symbol_count": module_symbols[mod],
|
|
4062
|
+
"out_degree": out_deg.get(mod, 0),
|
|
4063
|
+
"in_degree": in_deg.get(mod, 0),
|
|
4064
|
+
}
|
|
4065
|
+
for mod in sorted(module_symbols)
|
|
4066
|
+
]
|
|
4067
|
+
return {
|
|
4068
|
+
"nodes": graph_nodes,
|
|
4069
|
+
"edges": graph_edges,
|
|
4070
|
+
"summary": {
|
|
4071
|
+
"module_count": len(graph_nodes),
|
|
4072
|
+
"edge_count": len(graph_edges),
|
|
4073
|
+
},
|
|
4074
|
+
}
|
|
4075
|
+
|
|
4076
|
+
|
|
4077
|
+
# Build-system markers that identify a deployable/buildable unit (C4 container).
|
|
4078
|
+
_BUILD_MARKERS: "tuple[str, ...]" = (
|
|
4079
|
+
"pom.xml", "build.gradle", "build.gradle.kts", "settings.gradle",
|
|
4080
|
+
)
|
|
4081
|
+
|
|
4082
|
+
|
|
4083
|
+
def _detect_containers(root: "Path") -> "list[dict]":
|
|
4084
|
+
"""Detect build-module roots as C4 containers (deployable/buildable units).
|
|
4085
|
+
|
|
4086
|
+
A *container* is a directory holding a recognized build file
|
|
4087
|
+
(Maven/Gradle). Detection is purely structural — no build is run. Returns
|
|
4088
|
+
``[{root, build_file}]`` (relative paths, deterministic order). Empty when no
|
|
4089
|
+
build files are found; the caller records that as a limitation rather than
|
|
4090
|
+
fabricating containers.
|
|
4091
|
+
"""
|
|
4092
|
+
import os
|
|
4093
|
+
found: "list[dict]" = []
|
|
4094
|
+
seen: "set[str]" = set()
|
|
4095
|
+
_SKIP = {".git", "node_modules", "target", "build", ".gradle", ".idea", "dist"}
|
|
4096
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
4097
|
+
dirnames[:] = [d for d in dirnames if d not in _SKIP and not d.startswith(".")]
|
|
4098
|
+
for marker in _BUILD_MARKERS:
|
|
4099
|
+
if marker in filenames:
|
|
4100
|
+
rel = os.path.relpath(dirpath, root).replace(os.sep, "/")
|
|
4101
|
+
rel = "." if rel == "." else rel
|
|
4102
|
+
if rel in seen:
|
|
4103
|
+
continue
|
|
4104
|
+
seen.add(rel)
|
|
4105
|
+
found.append({"root": rel, "build_file": marker})
|
|
4106
|
+
break
|
|
4107
|
+
found.sort(key=lambda c: c["root"])
|
|
4108
|
+
return found
|
|
4109
|
+
|
|
4110
|
+
|
|
4111
|
+
def _directory_hashes(file_list: "list[str]", root: "Path") -> "dict[str, str]":
|
|
4112
|
+
"""Content-addressed sha256 per source directory for incremental consumers.
|
|
4113
|
+
|
|
4114
|
+
Hash inputs are the directory's source files as ``(relpath, bytes)`` in
|
|
4115
|
+
sorted order, so the digest is stable across runs and changes iff a file in
|
|
4116
|
+
that directory changes. A consumer compares hashes to skip unchanged
|
|
4117
|
+
directories. Tool-agnostic: just a map ``{dir: sha256[:16]}``.
|
|
4118
|
+
"""
|
|
4119
|
+
import hashlib
|
|
4120
|
+
import posixpath
|
|
4121
|
+
by_dir: "dict[str, list[str]]" = {}
|
|
4122
|
+
for rel in file_list:
|
|
4123
|
+
d = posixpath.dirname(rel) or "."
|
|
4124
|
+
by_dir.setdefault(d, []).append(rel)
|
|
4125
|
+
out: "dict[str, str]" = {}
|
|
4126
|
+
for d in sorted(by_dir):
|
|
4127
|
+
h = hashlib.sha256()
|
|
4128
|
+
for rel in sorted(by_dir[d]):
|
|
4129
|
+
h.update(rel.encode("utf-8"))
|
|
4130
|
+
h.update(b"\0")
|
|
4131
|
+
try:
|
|
4132
|
+
h.update((root / rel).read_bytes())
|
|
4133
|
+
except OSError:
|
|
4134
|
+
h.update(b"<unreadable>")
|
|
4135
|
+
h.update(b"\0")
|
|
4136
|
+
out[d] = h.hexdigest()[:16]
|
|
4137
|
+
return out
|
|
4138
|
+
|
|
4139
|
+
|
|
4140
|
+
def _build_c4_export(
|
|
4141
|
+
root: "Path",
|
|
4142
|
+
file_list: "list[str]",
|
|
4143
|
+
nodes: "list[dict]",
|
|
4144
|
+
edges: "list[dict]",
|
|
4145
|
+
endpoints: "list[dict]",
|
|
4146
|
+
integrations: "dict",
|
|
4147
|
+
) -> "dict":
|
|
4148
|
+
"""Assemble a unified, tool-agnostic C4 architecture export + incremental manifest.
|
|
4149
|
+
|
|
4150
|
+
Maps the four already-built views onto the open C4 model (a public notation,
|
|
4151
|
+
not a product): code (L4), components (L3/L2), context external systems (L1),
|
|
4152
|
+
plus build-module containers and an interface-contract API surface. The
|
|
4153
|
+
``manifest`` carries per-directory content hashes so a downstream consumer can
|
|
4154
|
+
process incrementally. No third-party tool or format is hardcoded.
|
|
4155
|
+
"""
|
|
4156
|
+
by_directory = _group_symbols_by_directory(nodes)
|
|
4157
|
+
module_graph = _build_module_graph(nodes, edges)
|
|
4158
|
+
api_surface = _group_endpoints_by_controller(endpoints)
|
|
4159
|
+
containers = _detect_containers(root)
|
|
4160
|
+
|
|
4161
|
+
limitations: "list[str]" = []
|
|
4162
|
+
if not containers:
|
|
4163
|
+
limitations.append(
|
|
4164
|
+
"No build files (Maven/Gradle) found; containers not derived. "
|
|
4165
|
+
"Treat the repository as a single implicit container."
|
|
4166
|
+
)
|
|
4167
|
+
|
|
4168
|
+
return {
|
|
4169
|
+
"schema_version": "c4-v1",
|
|
4170
|
+
"c4": {
|
|
4171
|
+
"context": {
|
|
4172
|
+
"system": {"name": root.name, "file_count": len(file_list)},
|
|
4173
|
+
"external_systems": integrations,
|
|
4174
|
+
},
|
|
4175
|
+
"containers": containers,
|
|
4176
|
+
"components": module_graph,
|
|
4177
|
+
"code": by_directory,
|
|
4178
|
+
},
|
|
4179
|
+
"api_surface": api_surface,
|
|
4180
|
+
"manifest": {
|
|
4181
|
+
"directory_hashes": _directory_hashes(file_list, root),
|
|
4182
|
+
"generated": {
|
|
4183
|
+
"tool": "sourcecode",
|
|
4184
|
+
"schema": "c4-v1",
|
|
4185
|
+
"file_count": len(file_list),
|
|
4186
|
+
},
|
|
4187
|
+
},
|
|
4188
|
+
"limitations": limitations,
|
|
4189
|
+
}
|
|
4190
|
+
|
|
4191
|
+
|
|
4192
|
+
@app.command("export")
|
|
4193
|
+
def export_cmd(
|
|
4194
|
+
path: Path = typer.Argument(
|
|
4195
|
+
Path("."),
|
|
4196
|
+
help="Repository path to export (default: current directory)",
|
|
4197
|
+
),
|
|
4198
|
+
output_path: Optional[Path] = typer.Option(
|
|
4199
|
+
None, "--output", "-o",
|
|
4200
|
+
help="Write output to a file instead of stdout.",
|
|
4201
|
+
),
|
|
4202
|
+
format: str = typer.Option(
|
|
4203
|
+
"json", "--format", "-f",
|
|
4204
|
+
help="Output format: json (default) or yaml.",
|
|
4205
|
+
),
|
|
4206
|
+
copy: bool = typer.Option(
|
|
4207
|
+
False, "--copy", "-c",
|
|
4208
|
+
help="Copy output to system clipboard after a successful run.",
|
|
4209
|
+
),
|
|
4210
|
+
by_directory: bool = typer.Option(
|
|
4211
|
+
False, "--by-directory",
|
|
4212
|
+
help="Group symbols by source directory with path:line refs (C4 code-level export).",
|
|
4213
|
+
),
|
|
4214
|
+
module_graph: bool = typer.Option(
|
|
4215
|
+
False, "--module-graph",
|
|
4216
|
+
help="Emit a module→module dependency graph (C4 container/component level).",
|
|
4217
|
+
),
|
|
4218
|
+
integrations: bool = typer.Option(
|
|
4219
|
+
False, "--integrations",
|
|
4220
|
+
help="Detect outbound integrations (HTTP/LDAP/JMS clients) with file:line evidence.",
|
|
4221
|
+
),
|
|
4222
|
+
c4: bool = typer.Option(
|
|
4223
|
+
False, "--c4",
|
|
4224
|
+
help="Unified C4 architecture export (context/containers/components/code) "
|
|
4225
|
+
"+ per-directory incremental manifest. Vendor-neutral.",
|
|
4226
|
+
),
|
|
4227
|
+
) -> None:
|
|
4228
|
+
"""Export structured, tool-agnostic codebase views for downstream tooling.
|
|
4229
|
+
|
|
4230
|
+
Output is plain JSON/YAML that any consumer (architecture-doc generators,
|
|
4231
|
+
diagram renderers, code-search agents) can ingest. Section labels map to the
|
|
4232
|
+
open C4 model (an open architecture notation, not a product) but the schema
|
|
4233
|
+
is vendor-neutral.
|
|
4234
|
+
|
|
4235
|
+
\b
|
|
4236
|
+
--by-directory One group per source directory, each symbol carrying a
|
|
4237
|
+
path:line reference — the file:line-anchored code map that
|
|
4238
|
+
lets a consumer proceed by file write instead of per-dir reads.
|
|
4239
|
+
--module-graph Module→module dependency graph (container/component level)
|
|
4240
|
+
rolled up from class-level relation edges.
|
|
4241
|
+
--integrations Outbound integrations (RestTemplate/WebClient/Feign/LDAP/JMS)
|
|
4242
|
+
with file:line evidence — external-system dependency arrows.
|
|
4243
|
+
--c4 Unified architecture document mapped onto the open C4 model
|
|
4244
|
+
(context/containers/components/code) + an API surface and a
|
|
4245
|
+
per-directory content-hash manifest for incremental consumers.
|
|
4246
|
+
|
|
4247
|
+
The section flags compose; pass several to emit multiple sections in one
|
|
4248
|
+
document. --c4 assembles the full architecture export on its own.
|
|
4249
|
+
"""
|
|
4250
|
+
from sourcecode.repository_ir import build_repo_ir, find_java_files
|
|
4251
|
+
|
|
4252
|
+
_enforce_format("export", format)
|
|
4253
|
+
|
|
4254
|
+
root = path.resolve()
|
|
4255
|
+
if not root.is_dir():
|
|
4256
|
+
_emit_error_json(
|
|
4257
|
+
INVALID_INPUT_CODE,
|
|
4258
|
+
f"'{root}' is not a valid directory.",
|
|
4259
|
+
path=str(root),
|
|
4260
|
+
hint="Pass an existing repository directory.",
|
|
4261
|
+
expected="A directory path.",
|
|
4262
|
+
)
|
|
4263
|
+
raise typer.Exit(1)
|
|
4264
|
+
|
|
4265
|
+
if not (by_directory or module_graph or integrations or c4):
|
|
4266
|
+
_emit_error_json(
|
|
4267
|
+
INVALID_INPUT_CODE,
|
|
4268
|
+
"export requires a mode flag.",
|
|
4269
|
+
path=str(root),
|
|
4270
|
+
hint="Pass --c4 for the full architecture export, or one of "
|
|
4271
|
+
"--by-directory / --module-graph / --integrations for a section.",
|
|
4272
|
+
expected="--c4 | --by-directory | --module-graph | --integrations",
|
|
4273
|
+
)
|
|
4274
|
+
raise typer.Exit(1)
|
|
4275
|
+
|
|
4276
|
+
file_list = [
|
|
4277
|
+
f for f in find_java_files(root)
|
|
4278
|
+
if "/test/" not in f and "/tests/" not in f
|
|
4279
|
+
]
|
|
4280
|
+
|
|
4281
|
+
if c4:
|
|
4282
|
+
# Unified architecture export: assembles every section + manifest.
|
|
4283
|
+
from sourcecode.integration_detector import detect_integrations
|
|
4284
|
+
from sourcecode.repository_ir import extract_java_endpoints
|
|
4285
|
+
ir = build_repo_ir(file_list, root)
|
|
4286
|
+
graph = ir.get("graph", {})
|
|
4287
|
+
endpoints = extract_java_endpoints(root).get("endpoints", [])
|
|
4288
|
+
data = _build_c4_export(
|
|
4289
|
+
root,
|
|
4290
|
+
file_list,
|
|
4291
|
+
graph.get("nodes", []),
|
|
4292
|
+
graph.get("edges", []),
|
|
4293
|
+
endpoints,
|
|
4294
|
+
detect_integrations(file_list, root),
|
|
4295
|
+
)
|
|
4296
|
+
output = _serialize_dict(data, format)
|
|
4297
|
+
_emit_command_output(output, output_path, copy,
|
|
4298
|
+
success_msg=f"C4 architecture export written to {output_path}")
|
|
4299
|
+
from sourcecode.mcp_nudge import nudge_mcp_if_needed as _nudge
|
|
4300
|
+
_nudge()
|
|
4301
|
+
return
|
|
4302
|
+
|
|
4303
|
+
data: "dict" = {}
|
|
4304
|
+
# IR is only needed for the symbol/graph-derived views, not for the
|
|
4305
|
+
# source-scanned integration detector.
|
|
4306
|
+
if by_directory or module_graph:
|
|
4307
|
+
ir = build_repo_ir(file_list, root)
|
|
4308
|
+
graph = ir.get("graph", {})
|
|
4309
|
+
nodes = graph.get("nodes", [])
|
|
4310
|
+
if by_directory:
|
|
4311
|
+
grouped = _group_symbols_by_directory(nodes)
|
|
4312
|
+
data["by_directory"] = grouped
|
|
4313
|
+
data["directory_count"] = len(grouped)
|
|
4314
|
+
data["symbol_count"] = sum(len(v) for v in grouped.values())
|
|
4315
|
+
if module_graph:
|
|
4316
|
+
data["module_graph"] = _build_module_graph(nodes, graph.get("edges", []))
|
|
4317
|
+
|
|
4318
|
+
if integrations:
|
|
4319
|
+
from sourcecode.integration_detector import detect_integrations
|
|
4320
|
+
data["integrations"] = detect_integrations(file_list, root)
|
|
4321
|
+
|
|
4322
|
+
output = _serialize_dict(data, format)
|
|
4323
|
+
_emit_command_output(output, output_path, copy,
|
|
4324
|
+
success_msg=f"Export written to {output_path}")
|
|
4325
|
+
|
|
4326
|
+
from sourcecode.mcp_nudge import nudge_mcp_if_needed as _nudge
|
|
4327
|
+
_nudge()
|
|
4328
|
+
|
|
4329
|
+
|
|
3941
4330
|
@app.command("validation")
|
|
3942
4331
|
def validation_cmd(
|
|
3943
4332
|
path: Path = typer.Argument(
|
sourcecode/format_contract.py
CHANGED
|
@@ -27,6 +27,7 @@ FORMAT_REGISTRY: "dict[str, tuple[str, ...]]" = {
|
|
|
27
27
|
"repo-ir": ("json", "yaml"),
|
|
28
28
|
"impact": ("json", "yaml"),
|
|
29
29
|
"endpoints": ("json", "yaml"),
|
|
30
|
+
"export": ("json", "yaml"),
|
|
30
31
|
"validation": ("json", "yaml"),
|
|
31
32
|
"impact-chain": ("json", "yaml"),
|
|
32
33
|
"pr-impact": ("text", "json"),
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Outgoing-integration detection for the C4/BMB export pipeline.
|
|
2
|
+
|
|
3
|
+
Scans Java source for *outbound* integration points — the edges a C4 Context
|
|
4
|
+
diagram needs to draw arrows from this system to external systems. Detection is
|
|
5
|
+
deterministic source-text matching (same approach as the JNDI datasource scan in
|
|
6
|
+
``serializer.py``); it never executes code and never resolves runtime values.
|
|
7
|
+
|
|
8
|
+
Covered clients:
|
|
9
|
+
|
|
10
|
+
* HTTP — ``RestTemplate``, ``WebClient``, ``@FeignClient`` (declarative)
|
|
11
|
+
* LDAP — ``LdapTemplate``
|
|
12
|
+
* JMS — ``JmsTemplate``, ActiveMQ connection factories
|
|
13
|
+
|
|
14
|
+
Each hit is reported with a ``file:line`` evidence anchor and, when a literal URL
|
|
15
|
+
or logical name is present on the same construct, a ``target``. URLs assembled at
|
|
16
|
+
runtime (concatenated strings, property placeholders) yield a ``null`` target —
|
|
17
|
+
honest absence rather than a guess.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import re
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Optional
|
|
25
|
+
|
|
26
|
+
# A URL/endpoint literal for any scheme we care about.
|
|
27
|
+
_URL_RE = re.compile(r'"((?:https?|ldaps?|tcp|amqp|jms|nio)://[^"]*)"')
|
|
28
|
+
# First string literal on a line (fallback target, e.g. WebClient.create("x")).
|
|
29
|
+
_STR_RE = re.compile(r'"([^"]+)"')
|
|
30
|
+
|
|
31
|
+
# Declarative HTTP client. Attrs may span multiple lines, so matched on full text.
|
|
32
|
+
_FEIGN_RE = re.compile(r"@FeignClient\s*\(([^)]*)\)", re.DOTALL)
|
|
33
|
+
_ATTR_URL_RE = re.compile(r'url\s*=\s*"([^"]*)"')
|
|
34
|
+
_ATTR_NAME_RE = re.compile(r'(?:name|value)\s*=\s*"([^"]*)"')
|
|
35
|
+
_FIRST_LITERAL_RE = re.compile(r'^\s*"([^"]*)"')
|
|
36
|
+
|
|
37
|
+
# token -> (kind, client). Matched as whole-word usage outside imports/comments.
|
|
38
|
+
_TOKEN_CLIENTS: "tuple[tuple[str, str, str], ...]" = (
|
|
39
|
+
("RestTemplate", "http", "resttemplate"),
|
|
40
|
+
("WebClient", "http", "webclient"),
|
|
41
|
+
("LdapTemplate", "ldap", "ldaptemplate"),
|
|
42
|
+
("JmsTemplate", "jms", "jmstemplate"),
|
|
43
|
+
("ActiveMQConnectionFactory", "jms", "activemq"),
|
|
44
|
+
)
|
|
45
|
+
_TOKEN_RES = tuple(
|
|
46
|
+
(re.compile(r"\b" + re.escape(tok) + r"\b"), kind, client)
|
|
47
|
+
for tok, kind, client in _TOKEN_CLIENTS
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _line_of(text: str, idx: int) -> int:
|
|
52
|
+
"""1-based line number of character offset ``idx`` in ``text``."""
|
|
53
|
+
return text.count("\n", 0, idx) + 1
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _extract_target(line: str) -> Optional[str]:
|
|
57
|
+
"""Best-effort literal target on a usage line: a scheme URL, else first string."""
|
|
58
|
+
m = _URL_RE.search(line)
|
|
59
|
+
if m:
|
|
60
|
+
return m.group(1)
|
|
61
|
+
m = _STR_RE.search(line)
|
|
62
|
+
if m:
|
|
63
|
+
return m.group(1)
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def detect_integrations(file_paths: "list[str]", root: Path) -> dict:
|
|
68
|
+
"""Detect outbound integrations across ``file_paths`` (relative to ``root``).
|
|
69
|
+
|
|
70
|
+
Returns ``{"integrations": [...], "by_kind": {kind: count}, "count": N}`` with
|
|
71
|
+
integrations sorted by ``(kind, client, evidence)`` for deterministic output.
|
|
72
|
+
Each integration is ``{kind, client, target, evidence}`` where ``evidence`` is
|
|
73
|
+
``relpath:line`` and ``target`` is a literal URL/name or ``None``.
|
|
74
|
+
"""
|
|
75
|
+
seen: "set[tuple[str, str, Optional[str], str]]" = set()
|
|
76
|
+
records: "list[dict]" = []
|
|
77
|
+
|
|
78
|
+
def _add(kind: str, client: str, target: Optional[str], rel: str, line: int) -> None:
|
|
79
|
+
evidence = f"{rel}:{line}"
|
|
80
|
+
key = (kind, client, target, evidence)
|
|
81
|
+
if key in seen:
|
|
82
|
+
return
|
|
83
|
+
seen.add(key)
|
|
84
|
+
records.append({
|
|
85
|
+
"kind": kind,
|
|
86
|
+
"client": client,
|
|
87
|
+
"target": target,
|
|
88
|
+
"evidence": evidence,
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
for rel in file_paths:
|
|
92
|
+
try:
|
|
93
|
+
text = (root / rel).read_text(encoding="utf-8", errors="replace")
|
|
94
|
+
except OSError:
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
# @FeignClient — capture attrs even when spread across lines.
|
|
98
|
+
for m in _FEIGN_RE.finditer(text):
|
|
99
|
+
attrs = m.group(1)
|
|
100
|
+
url_m = _ATTR_URL_RE.search(attrs)
|
|
101
|
+
name_m = _ATTR_NAME_RE.search(attrs)
|
|
102
|
+
first_m = _FIRST_LITERAL_RE.search(attrs)
|
|
103
|
+
target = (
|
|
104
|
+
url_m.group(1) if url_m
|
|
105
|
+
else name_m.group(1) if name_m
|
|
106
|
+
else first_m.group(1) if first_m
|
|
107
|
+
else None
|
|
108
|
+
)
|
|
109
|
+
_add("http", "feign", target, rel, _line_of(text, m.start()))
|
|
110
|
+
|
|
111
|
+
# Token clients — per line, skipping imports/package/comment noise.
|
|
112
|
+
# First pass records the declaration site and any variable name bound to
|
|
113
|
+
# the client, so a later call site (where the URL literal usually lives)
|
|
114
|
+
# can be attributed back to the client.
|
|
115
|
+
var_to_client: "dict[str, tuple[str, str]]" = {}
|
|
116
|
+
lines = text.splitlines()
|
|
117
|
+
for lineno, line in enumerate(lines, start=1):
|
|
118
|
+
stripped = line.lstrip()
|
|
119
|
+
if (
|
|
120
|
+
stripped.startswith("import ")
|
|
121
|
+
or stripped.startswith("package ")
|
|
122
|
+
or stripped.startswith("//")
|
|
123
|
+
or stripped.startswith("*")
|
|
124
|
+
or stripped.startswith("/*")
|
|
125
|
+
):
|
|
126
|
+
continue
|
|
127
|
+
for token_re, kind, client in _TOKEN_RES:
|
|
128
|
+
m = token_re.search(line)
|
|
129
|
+
if not m:
|
|
130
|
+
continue
|
|
131
|
+
_add(kind, client, _extract_target(line), rel, lineno)
|
|
132
|
+
tok = m.group(0)
|
|
133
|
+
# `Type name` (field/local decl) and `name = new Type(` forms.
|
|
134
|
+
decl = re.search(re.escape(tok) + r"\s+(\w+)\b", line)
|
|
135
|
+
if decl:
|
|
136
|
+
var_to_client[decl.group(1)] = (kind, client)
|
|
137
|
+
asgn = re.search(r"(\w+)\s*=\s*new\s+" + re.escape(tok), line)
|
|
138
|
+
if asgn:
|
|
139
|
+
var_to_client[asgn.group(1)] = (kind, client)
|
|
140
|
+
|
|
141
|
+
# Second pass: a call on a tracked client variable carrying a URL literal
|
|
142
|
+
# is reported as a hit at the call site (the URL endpoint C4 wants).
|
|
143
|
+
if var_to_client:
|
|
144
|
+
for lineno, line in enumerate(lines, start=1):
|
|
145
|
+
url = _URL_RE.search(line)
|
|
146
|
+
if not url:
|
|
147
|
+
continue
|
|
148
|
+
for var, (kind, client) in var_to_client.items():
|
|
149
|
+
if re.search(r"\b" + re.escape(var) + r"\s*\.", line):
|
|
150
|
+
_add(kind, client, url.group(1), rel, lineno)
|
|
151
|
+
break
|
|
152
|
+
|
|
153
|
+
records.sort(key=lambda r: (r["kind"], r["client"], r["evidence"]))
|
|
154
|
+
|
|
155
|
+
by_kind: "dict[str, int]" = {}
|
|
156
|
+
for r in records:
|
|
157
|
+
by_kind[r["kind"]] = by_kind.get(r["kind"], 0) + 1
|
|
158
|
+
|
|
159
|
+
return {
|
|
160
|
+
"integrations": records,
|
|
161
|
+
"by_kind": {k: by_kind[k] for k in sorted(by_kind)},
|
|
162
|
+
"count": len(records),
|
|
163
|
+
}
|
sourcecode/repository_ir.py
CHANGED
|
@@ -48,6 +48,7 @@ class SymbolRecord:
|
|
|
48
48
|
symbol_kind: str = "" # class|interface|enum|annotation|method|constructor|field|endpoint|bean
|
|
49
49
|
canonical_name: str = "" # pkg.Class#method(Type1,Type2) — human-readable
|
|
50
50
|
source_file: str = "" # alias for declaring_file (IR output contract)
|
|
51
|
+
line: Optional[int] = None # 1-based source line of the declaration
|
|
51
52
|
signature: str = "" # (Type1,Type2)->ReturnType for methods; type for fields
|
|
52
53
|
param_types: list[str] = field(default_factory=list)
|
|
53
54
|
return_type: str = ""
|
|
@@ -696,8 +697,12 @@ def _extract_symbols(
|
|
|
696
697
|
# here makes the regex work without changing the per-line brace-depth counter.
|
|
697
698
|
_raw_lines = source.splitlines()
|
|
698
699
|
_joined: list[str] = []
|
|
700
|
+
# Parallel list: 1-based source line where each _joined entry STARTS, so symbol
|
|
701
|
+
# declarations can carry their source line through the join/normalize transforms.
|
|
702
|
+
_joined_lines: list[int] = []
|
|
699
703
|
_i = 0
|
|
700
704
|
while _i < len(_raw_lines):
|
|
705
|
+
_start = _i # 0-based source index where this joined entry begins
|
|
701
706
|
_line = _raw_lines[_i]
|
|
702
707
|
_stripped = _line.strip()
|
|
703
708
|
if (_CLASS_KW_RE.search(_stripped) and '{' not in _stripped
|
|
@@ -713,6 +718,7 @@ def _extract_symbols(
|
|
|
713
718
|
if '{' in _cont:
|
|
714
719
|
break
|
|
715
720
|
_joined.append(_buf)
|
|
721
|
+
_joined_lines.append(_start + 1)
|
|
716
722
|
elif (
|
|
717
723
|
(_METHOD_DECL_RE.match(_stripped) or _CONSTRUCTOR_DECL_RE.match(_stripped))
|
|
718
724
|
and _net_parens(_stripped) > 0
|
|
@@ -731,15 +737,20 @@ def _extract_symbols(
|
|
|
731
737
|
_bal += _net_parens(_cont)
|
|
732
738
|
_i += 1
|
|
733
739
|
_joined.append(_buf)
|
|
740
|
+
_joined_lines.append(_start + 1)
|
|
734
741
|
else:
|
|
735
742
|
_joined.append(_line)
|
|
743
|
+
_joined_lines.append(_start + 1)
|
|
736
744
|
_i += 1
|
|
737
745
|
|
|
738
746
|
# P1 fix: normalize multiline annotations (e.g. @RequestMapping(\n value="..."\n))
|
|
739
747
|
# into single lines so the per-line regex can capture annotation args correctly.
|
|
740
|
-
_normalized_lines = _normalize_multiline_annotations(
|
|
748
|
+
_normalized_lines, _normalized_src_lines = _normalize_multiline_annotations(
|
|
749
|
+
_joined, _joined_lines
|
|
750
|
+
)
|
|
741
751
|
|
|
742
|
-
for line in _normalized_lines:
|
|
752
|
+
for _ni, line in enumerate(_normalized_lines):
|
|
753
|
+
cur_line = _normalized_src_lines[_ni] if _ni < len(_normalized_src_lines) else None
|
|
743
754
|
stripped = line.strip()
|
|
744
755
|
|
|
745
756
|
if in_block_comment:
|
|
@@ -821,6 +832,7 @@ def _extract_symbols(
|
|
|
821
832
|
symbol_kind=sym_kind,
|
|
822
833
|
canonical_name=fqn,
|
|
823
834
|
source_file=rel_path,
|
|
835
|
+
line=cur_line,
|
|
824
836
|
signature=" ".join(_sig_parts),
|
|
825
837
|
annotation_values=dict(pending_ann_values),
|
|
826
838
|
))
|
|
@@ -888,6 +900,7 @@ def _extract_symbols(
|
|
|
888
900
|
symbol_kind=_sym_kind,
|
|
889
901
|
canonical_name=_canonical,
|
|
890
902
|
source_file=rel_path,
|
|
903
|
+
line=cur_line,
|
|
891
904
|
signature=_signature,
|
|
892
905
|
param_types=_param_types,
|
|
893
906
|
return_type=_ret_raw,
|
|
@@ -923,6 +936,7 @@ def _extract_symbols(
|
|
|
923
936
|
symbol_kind="constructor",
|
|
924
937
|
canonical_name=f"{class_fqn}#{_class_simple}({_param_str})",
|
|
925
938
|
source_file=rel_path,
|
|
939
|
+
line=cur_line,
|
|
926
940
|
signature=f"({_param_str})->void",
|
|
927
941
|
param_types=_ctor_param_types,
|
|
928
942
|
return_type="void",
|
|
@@ -958,6 +972,7 @@ def _extract_symbols(
|
|
|
958
972
|
symbol_kind="field",
|
|
959
973
|
canonical_name=fqn,
|
|
960
974
|
source_file=rel_path,
|
|
975
|
+
line=cur_line,
|
|
961
976
|
signature=f"{ftype} {fname}",
|
|
962
977
|
))
|
|
963
978
|
pending_anns = []
|
|
@@ -1514,6 +1529,34 @@ def _build_relations(
|
|
|
1514
1529
|
evidence={"type": "method_call", "value": f"new {_tgt.split('.')[-1]}(...)"},
|
|
1515
1530
|
))
|
|
1516
1531
|
|
|
1532
|
+
# ── Static-utility calls: `Type.method(...)` edges (G-2 / static-call gap) ──
|
|
1533
|
+
# A static call `AnnotationHelper.foo(...)` couples the calling class to the
|
|
1534
|
+
# utility type, but the call/DI graph misses it: only an `imports` edge is
|
|
1535
|
+
# recorded (and impact-chain skips imports), so a static helper showed 0
|
|
1536
|
+
# callers and impact-chain reported a false-confident "no blast radius".
|
|
1537
|
+
# Mirror the instantiation scan: regex over comment-stripped source, resolve the
|
|
1538
|
+
# receiver type via the import map (so JDK/unresolved receivers like Math/LOGGER
|
|
1539
|
+
# yield None and are skipped), attribute at class level. Unlike `instantiates`
|
|
1540
|
+
# this INCLUDES controllers — a controller statically calling a utility is a real
|
|
1541
|
+
# caller and has no `returns`-edge overlap. Emitted as `calls` (traversed by the
|
|
1542
|
+
# caller BFS). Instance calls go through lower-case variables and never match.
|
|
1543
|
+
if _class_syms:
|
|
1544
|
+
_call_targets: set[str] = set()
|
|
1545
|
+
for _m in re.finditer(r'\b([A-Z]\w*)\.\w+\s*\(', _source_no_comments):
|
|
1546
|
+
_ct_fqn = _resolve_dep_type(_m.group(1))
|
|
1547
|
+
if _ct_fqn:
|
|
1548
|
+
_call_targets.add(_ct_fqn)
|
|
1549
|
+
for cls_sym in _class_syms:
|
|
1550
|
+
for _tgt in sorted(_call_targets):
|
|
1551
|
+
if _tgt != cls_sym.symbol:
|
|
1552
|
+
edges.append(RelationEdge(
|
|
1553
|
+
from_symbol=cls_sym.symbol,
|
|
1554
|
+
to_symbol=_tgt,
|
|
1555
|
+
type="calls",
|
|
1556
|
+
confidence="medium",
|
|
1557
|
+
evidence={"type": "method_call", "value": f"{_tgt.split('.')[-1]}.…(…)"},
|
|
1558
|
+
))
|
|
1559
|
+
|
|
1517
1560
|
seen: set[tuple[str, str, str]] = set()
|
|
1518
1561
|
unique: list[RelationEdge] = []
|
|
1519
1562
|
for e in edges:
|
|
@@ -1742,7 +1785,7 @@ def _resolve_ann_path_expr(ann_args: str, constants: dict[str, str]) -> str:
|
|
|
1742
1785
|
return ann_args
|
|
1743
1786
|
|
|
1744
1787
|
|
|
1745
|
-
def _normalize_multiline_annotations(lines
|
|
1788
|
+
def _normalize_multiline_annotations(lines, line_nums=None):
|
|
1746
1789
|
"""Merge multiline annotation spans into a single line.
|
|
1747
1790
|
|
|
1748
1791
|
Handles annotations split across lines because their args span multiple lines:
|
|
@@ -1754,10 +1797,13 @@ def _normalize_multiline_annotations(lines: list[str]) -> list[str]:
|
|
|
1754
1797
|
Merges into: '@RequestMapping(value = "/add", method = RequestMethod.GET)'
|
|
1755
1798
|
"""
|
|
1756
1799
|
result: list[str] = []
|
|
1800
|
+
result_lines: list[int] = []
|
|
1757
1801
|
buf: list[str] = []
|
|
1802
|
+
buf_line: Optional[int] = None
|
|
1758
1803
|
paren_depth = 0
|
|
1759
1804
|
|
|
1760
|
-
for line in lines:
|
|
1805
|
+
for _idx, line in enumerate(lines):
|
|
1806
|
+
src_line = line_nums[_idx] if (line_nums is not None and _idx < len(line_nums)) else None
|
|
1761
1807
|
stripped = line.strip()
|
|
1762
1808
|
if buf:
|
|
1763
1809
|
# Continuation of a multiline annotation
|
|
@@ -1765,7 +1811,9 @@ def _normalize_multiline_annotations(lines: list[str]) -> list[str]:
|
|
|
1765
1811
|
paren_depth += stripped.count("(") - stripped.count(")")
|
|
1766
1812
|
if paren_depth <= 0:
|
|
1767
1813
|
result.append(" ".join(buf))
|
|
1814
|
+
result_lines.append(buf_line if buf_line is not None else (src_line or 0))
|
|
1768
1815
|
buf = []
|
|
1816
|
+
buf_line = None
|
|
1769
1817
|
paren_depth = 0
|
|
1770
1818
|
elif stripped.startswith("@") and "(" in stripped:
|
|
1771
1819
|
opens = stripped.count("(")
|
|
@@ -1773,16 +1821,24 @@ def _normalize_multiline_annotations(lines: list[str]) -> list[str]:
|
|
|
1773
1821
|
if opens > closes:
|
|
1774
1822
|
# Unbalanced — start collecting continuation lines
|
|
1775
1823
|
buf = [stripped]
|
|
1824
|
+
buf_line = src_line
|
|
1776
1825
|
paren_depth = opens - closes
|
|
1777
1826
|
else:
|
|
1778
1827
|
result.append(line)
|
|
1828
|
+
result_lines.append(src_line or 0)
|
|
1779
1829
|
else:
|
|
1780
1830
|
result.append(line)
|
|
1831
|
+
result_lines.append(src_line or 0)
|
|
1781
1832
|
|
|
1782
1833
|
# Flush any dangling buffer (shouldn't happen in well-formed code)
|
|
1783
1834
|
if buf:
|
|
1784
|
-
|
|
1785
|
-
|
|
1835
|
+
for _bi, _bl in enumerate(buf):
|
|
1836
|
+
result.append(_bl)
|
|
1837
|
+
result_lines.append(buf_line if buf_line is not None else 0)
|
|
1838
|
+
|
|
1839
|
+
if line_nums is None:
|
|
1840
|
+
return result
|
|
1841
|
+
return result, result_lines
|
|
1786
1842
|
|
|
1787
1843
|
|
|
1788
1844
|
def _parse_route_path(args_str: str) -> str:
|
|
@@ -2615,6 +2671,7 @@ def _assemble(
|
|
|
2615
2671
|
"symbol_kind": s.symbol_kind,
|
|
2616
2672
|
"canonical_name": s.canonical_name or s.symbol,
|
|
2617
2673
|
"source_file": s.declaring_file,
|
|
2674
|
+
"line": s.line,
|
|
2618
2675
|
"signature": s.signature,
|
|
2619
2676
|
"type": s.type,
|
|
2620
2677
|
"role": spring_role_map.get(s.symbol, "other"),
|
|
@@ -2981,6 +3038,7 @@ def _build_route_surface(
|
|
|
2981
3038
|
"effective_class": cls_fqn,
|
|
2982
3039
|
"path": full_path,
|
|
2983
3040
|
"method": method,
|
|
3041
|
+
"return_type": (sym.return_type.strip() if sym.return_type else "void"),
|
|
2984
3042
|
"stable_id": sym.stable_id,
|
|
2985
3043
|
"inheritance_depth": 0,
|
|
2986
3044
|
}
|
|
@@ -3000,6 +3058,10 @@ def _build_route_surface(
|
|
|
3000
3058
|
_parent_sec_by_sym: dict[str, object] = {
|
|
3001
3059
|
r["symbol"]: r.get("security_annotations") for r in routes
|
|
3002
3060
|
}
|
|
3061
|
+
# Build lookup for return_type from phase-2 routes (inherited methods reuse parent's)
|
|
3062
|
+
_parent_rt_by_sym: dict[str, str] = {
|
|
3063
|
+
r["symbol"]: r.get("return_type", "void") for r in routes
|
|
3064
|
+
}
|
|
3003
3065
|
|
|
3004
3066
|
for cls_simple, data in class_info.items():
|
|
3005
3067
|
if not any(data["prefixes"]):
|
|
@@ -3038,6 +3100,7 @@ def _build_route_surface(
|
|
|
3038
3100
|
"effective_class": data["fqn"],
|
|
3039
3101
|
"path": full_path,
|
|
3040
3102
|
"method": verb,
|
|
3103
|
+
"return_type": _parent_rt_by_sym.get(declaring_sym, "void"),
|
|
3041
3104
|
"stable_id": stable_id,
|
|
3042
3105
|
"inheritance_depth": depth,
|
|
3043
3106
|
"security_annotations": _parent_sec_by_sym.get(declaring_sym),
|
|
@@ -3750,6 +3813,9 @@ def _recover_openapi_spec_routes(
|
|
|
3750
3813
|
"path": op.path,
|
|
3751
3814
|
"controller": ctrl_simple,
|
|
3752
3815
|
"handler": handler,
|
|
3816
|
+
# Response type is not parsed from the OpenAPI spec (only request
|
|
3817
|
+
# bodies are). "unknown" is honest here — these are spec-sourced.
|
|
3818
|
+
"return_type": "unknown",
|
|
3753
3819
|
"source": "openapi-spec",
|
|
3754
3820
|
# Security for generated controllers is declared in the spec /
|
|
3755
3821
|
# enforced by the filter chain, not by per-endpoint annotations.
|
|
@@ -3925,6 +3991,7 @@ def extract_java_endpoints(root: Path) -> "dict[str, Any]":
|
|
|
3925
3991
|
"path": route["path"],
|
|
3926
3992
|
"controller": controller,
|
|
3927
3993
|
"handler": handler,
|
|
3994
|
+
"return_type": route.get("return_type", "void"),
|
|
3928
3995
|
}
|
|
3929
3996
|
# Use security_annotations already extracted by _build_route_surface
|
|
3930
3997
|
# via the canonical _route_security_from_sym extractor.
|
sourcecode/spring_impact.py
CHANGED
|
@@ -872,10 +872,35 @@ class ImpactOrchestrator:
|
|
|
872
872
|
"An empty result is NOT proof the type is unused."
|
|
873
873
|
)
|
|
874
874
|
|
|
875
|
+
# G-2 residual guard: something imports this symbol but no call/DI/instantiation
|
|
876
|
+
# edge resolved to it. With static-call edges now extracted, the common static
|
|
877
|
+
# utility case is covered; this catches what remains (static imports invoked
|
|
878
|
+
# without a qualifier, reflection, method references) — usages the call-graph
|
|
879
|
+
# cannot bind. An empty blast radius here is NOT proof of dead code, so it must
|
|
880
|
+
# not be reported as a high-confidence "safe to change".
|
|
881
|
+
unresolved_ref_blind_spot = False
|
|
882
|
+
if (
|
|
883
|
+
empty_blast
|
|
884
|
+
and class_level_seed
|
|
885
|
+
and not framework_di_blind_spot
|
|
886
|
+
and not value_type_blind_spot
|
|
887
|
+
):
|
|
888
|
+
_rev = cir.reverse_graph.get(resolved_symbol) or {}
|
|
889
|
+
_importers = sorted(set(_rev.get("imports") or []))
|
|
890
|
+
if _importers:
|
|
891
|
+
unresolved_ref_blind_spot = True
|
|
892
|
+
warnings.append(
|
|
893
|
+
f"Unresolved inbound references (G-2): {len(_importers)} in-repo "
|
|
894
|
+
"file(s) import this symbol but no call/DI/instantiation edge "
|
|
895
|
+
"resolves to it — the usage may be a static import, reflection, or "
|
|
896
|
+
"method reference the call-graph does not model. 0 callers is NOT "
|
|
897
|
+
"proof this symbol is unused."
|
|
898
|
+
)
|
|
899
|
+
|
|
875
900
|
confidence: str
|
|
876
901
|
if resolution == "not_found":
|
|
877
902
|
confidence = "low"
|
|
878
|
-
elif framework_di_blind_spot or value_type_blind_spot:
|
|
903
|
+
elif framework_di_blind_spot or value_type_blind_spot or unresolved_ref_blind_spot:
|
|
879
904
|
confidence = "low"
|
|
880
905
|
elif resolution == "partial" or confidence_reducing:
|
|
881
906
|
confidence = "medium"
|
|
@@ -908,6 +933,7 @@ class ImpactOrchestrator:
|
|
|
908
933
|
"blind_spots": (
|
|
909
934
|
(["framework_di"] if framework_di_blind_spot else [])
|
|
910
935
|
+ (["value_type"] if value_type_blind_spot else [])
|
|
936
|
+
+ (["unresolved_refs"] if unresolved_ref_blind_spot else [])
|
|
911
937
|
),
|
|
912
938
|
"external_supertypes": external_supertypes,
|
|
913
939
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.53.0
|
|
4
4
|
Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Keywords: agents,ai,codebase,context,developer-tools,llm
|
|
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
|
|
|
40
40
|
|
|
41
41
|
**Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
|
|
42
42
|
|
|
43
|
-

|
|
44
44
|

|
|
45
45
|
|
|
46
46
|
---
|
|
@@ -114,7 +114,7 @@ pipx install sourcecode
|
|
|
114
114
|
|
|
115
115
|
```bash
|
|
116
116
|
sourcecode version
|
|
117
|
-
# sourcecode 1.
|
|
117
|
+
# sourcecode 1.53.0
|
|
118
118
|
```
|
|
119
119
|
|
|
120
120
|
---
|
|
@@ -364,9 +364,10 @@ sourcecode impact OrderService . --depth 2 # limit BFS depth
|
|
|
364
364
|
```bash
|
|
365
365
|
sourcecode endpoints /path/to/repo
|
|
366
366
|
sourcecode endpoints /path/to/repo --output endpoints.json
|
|
367
|
+
sourcecode endpoints /path/to/repo --by-controller
|
|
367
368
|
```
|
|
368
369
|
|
|
369
|
-
Extracts all Spring MVC (`@GetMapping`, `@PostMapping`, `@RequestMapping`, etc.) and JAX-RS (`@GET`, `@POST`, `@Path`) endpoint methods. Returns HTTP method, path, controller class, and handler method.
|
|
370
|
+
Extracts all Spring MVC (`@GetMapping`, `@PostMapping`, `@RequestMapping`, etc.) and JAX-RS (`@GET`, `@POST`, `@Path`) endpoint methods. Returns HTTP method, path, controller class, and handler method. Each endpoint also carries its `return_type`. `--by-controller` groups the surface per controller (`{by_controller, controller_count, total}`) for an API-surface view.
|
|
370
371
|
|
|
371
372
|
**Functional / WebFlux routing (honest limitation).** Routes registered via the functional DSL — `route().GET("/path", handler)` / `RouterFunction` / `CustomEndpoint`, common in reactive Spring apps — are **not** modeled (their real paths depend on `nest()`/group-version prefixes that can't be resolved statically). Rather than emit partial paths that would mislead, the output reports a `functional_routing` block (`files`, `route_registrations`, `modeled: false`) plus a warning. When the annotation surface is empty but functional routes exist, the warning explicitly tells you not to read it as "no endpoints". Annotation-based (MVC/JAX-RS) repos are unaffected.
|
|
372
373
|
|
|
@@ -387,6 +388,26 @@ Extracts all Spring MVC (`@GetMapping`, `@PostMapping`, `@RequestMapping`, etc.)
|
|
|
387
388
|
|
|
388
389
|
Matching endpoints then report `policy: "custom"` with `annotation`, `resourceName`, and `requiredLevel`, and are no longer counted in `no_security_signal`. Repos without the config behave exactly as before.
|
|
389
390
|
|
|
391
|
+
### `export` — architecture views for downstream tooling
|
|
392
|
+
|
|
393
|
+
```bash
|
|
394
|
+
sourcecode export /path/to/repo --by-directory # code map, path:line refs
|
|
395
|
+
sourcecode export /path/to/repo --module-graph # module→module dependencies
|
|
396
|
+
sourcecode export /path/to/repo --integrations # outbound HTTP/LDAP/JMS clients
|
|
397
|
+
sourcecode export /path/to/repo --c4 # unified architecture + manifest
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
Emits **structured, tool-agnostic** codebase views as plain JSON/YAML — the kind of input an architecture-doc generator, diagram renderer, or code-search agent can consume directly instead of walking the tree file by file. Section labels map to the open [C4 model](https://c4model.com) (an open architecture notation, not a product); the schema is vendor-neutral.
|
|
401
|
+
|
|
402
|
+
| Flag | Output |
|
|
403
|
+
|------|--------|
|
|
404
|
+
| `--by-directory` | One group per source directory, each symbol with a `source_file:line` reference. |
|
|
405
|
+
| `--module-graph` | `{nodes, edges, summary}` — directories as modules, inter-module dependencies rolled up from class-level relation edges with hit counts + edge types. |
|
|
406
|
+
| `--integrations` | Outbound integrations (`RestTemplate`, `WebClient`, `@FeignClient`, `LdapTemplate`, `JmsTemplate`, ActiveMQ) with `file:line` evidence and a literal `target` URL/name when present. |
|
|
407
|
+
| `--c4` | Unified document: `c4.{context, containers, components, code}` + `api_surface` + a `manifest` with per-directory content hashes for **incremental** consumers (skip directories whose hash is unchanged). |
|
|
408
|
+
|
|
409
|
+
The section flags compose (pass several for one multi-section document); `--c4` assembles the full export on its own. URLs assembled at runtime yield `target: null` (honest absence, never a guess); containers are derived from build files (Maven/Gradle) and reported as a limitation when none are found.
|
|
410
|
+
|
|
390
411
|
### `spring-audit` — Spring semantic audit [free]
|
|
391
412
|
|
|
392
413
|
```bash
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=iHGCfyboU5livWODKEj-u8oT6BwJInerv6YHn28vXno,103
|
|
2
2
|
sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
|
|
3
3
|
sourcecode/architecture_analyzer.py,sha256=liCwQmLgb5vplohy8arjYxs_HOIv5C9MjLh_gY6bc5Q,44115
|
|
4
4
|
sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
|
|
@@ -7,7 +7,7 @@ sourcecode/cache.py,sha256=1V3vsaODAa2UBJAC0xpvxpmRdriCezQx5Q8JCcfgziE,31892
|
|
|
7
7
|
sourcecode/canonical_ir.py,sha256=DEwucOPJguLsVtg5cV8mWXNi112l5jmBhv73KGGebVk,24849
|
|
8
8
|
sourcecode/cir_graphs.py,sha256=9G0HHj1kw2325IDyzo2OpX73BNswEckecf4MZUXB4JM,12078
|
|
9
9
|
sourcecode/classifier.py,sha256=hKzg-nQ47htqqIUzSGvYxv15cXrA3KgICTwJmdqal0o,8095
|
|
10
|
-
sourcecode/cli.py,sha256=
|
|
10
|
+
sourcecode/cli.py,sha256=IZ_TcUzd-rUFoIYMgkOcGP-FqiYoOGPxZ3sjPkEp4OM,272648
|
|
11
11
|
sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
|
|
12
12
|
sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
|
|
13
13
|
sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
|
|
@@ -24,10 +24,11 @@ sourcecode/explain.py,sha256=dVG35YBlpRmbtOXSmspEhoIwDMVApPmLISBy3iigUSc,16913
|
|
|
24
24
|
sourcecode/file_chunker.py,sha256=3vkM3mDQ5eE_yTPvUgjyjpGFBIjkW6_mrBmIbrylnA8,16444
|
|
25
25
|
sourcecode/file_classifier.py,sha256=A0fEABqtfVu1MfoaxnPAvGpZgneGgVXlJDhT74NYXxE,15314
|
|
26
26
|
sourcecode/flow_analyzer.py,sha256=dSiuY4w49k29jW_EPXUOND9B5uVbuCA7kjnuHi-pIWA,28781
|
|
27
|
-
sourcecode/format_contract.py,sha256=
|
|
27
|
+
sourcecode/format_contract.py,sha256=1cTNqwP8geA2hbQoBHUPgX3_vSh3l8guJT_jmgEnFF8,3466
|
|
28
28
|
sourcecode/fqn_utils.py,sha256=XLU7zDkNBXz_RZkIUNfpPmp1nekWtqP-fxV92tDV1vg,2158
|
|
29
29
|
sourcecode/git_analyzer.py,sha256=JStxTQXNjBWi_wLdwhsZs9mT-v50cSJIz4Agzn6Kh9I,13362
|
|
30
30
|
sourcecode/graph_analyzer.py,sha256=DHR8fY69oU_Pi4SYaWboX6EoEFrctQKB9dsjpqwGMzw,62403
|
|
31
|
+
sourcecode/integration_detector.py,sha256=ZJqrGwvZ4ee2JTGhlazKk67aZi173HxkhNpl8Yntpd8,6503
|
|
31
32
|
sourcecode/license.py,sha256=i_X1bYdobL_z9OVuLiycnWEFSaaNhcKKuTd6G55U3_k,20747
|
|
32
33
|
sourcecode/mcp_nudge.py,sha256=5ELU_ixzh6uA83NXLOZT8h00OhL53okfQdji3jyKOjg,2917
|
|
33
34
|
sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
|
|
@@ -44,7 +45,7 @@ sourcecode/redactor.py,sha256=SB4hwIvg8h-hvcqKcDWaZvA-aSyn-at-BIRwa0tUv5E,3227
|
|
|
44
45
|
sourcecode/relevance_scorer.py,sha256=0AgEt4KrV73nioMqBgjhGjtY7L2C7L7cSyKtj3IKcrw,9408
|
|
45
46
|
sourcecode/rename_refactor.py,sha256=h6dNFlB9aZ_3q6heeHBkgXQeXaT03nvPSsYH6P8qxFg,12965
|
|
46
47
|
sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
|
|
47
|
-
sourcecode/repository_ir.py,sha256=
|
|
48
|
+
sourcecode/repository_ir.py,sha256=n1H0OROkD1dHvpWAtDoYNHGlTkVhQpYIFqIQ3jf3mgs,214101
|
|
48
49
|
sourcecode/ris.py,sha256=RcqLVwC-doFcKKViYDkCjZLBqf_wzLES7-F6vHEeWzE,20419
|
|
49
50
|
sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
|
|
50
51
|
sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
|
|
@@ -54,7 +55,7 @@ sourcecode/semantic_analyzer.py,sha256=4OdG6tTSnTvq3_dSWMbQu8Ad1ndSCKeG-b9qM4hIx
|
|
|
54
55
|
sourcecode/serializer.py,sha256=TGzftrSKitZrtl6Hh-R05s4KdTOxwTmph_lGDbo2Wzg,125015
|
|
55
56
|
sourcecode/spring_event_topology.py,sha256=5_ON_21Le5zbG-1GRc5GLIi5HJfy_QjcXLVPC5WeUGQ,18055
|
|
56
57
|
sourcecode/spring_findings.py,sha256=G7Or2lKBUQbcTDqudLvSs9XvNg_YoAa-_lBOG_ULs8E,5457
|
|
57
|
-
sourcecode/spring_impact.py,sha256=
|
|
58
|
+
sourcecode/spring_impact.py,sha256=5ooAVO-gg1rL-wRaT9_V8ra8edq5TAKu-kp4sAEoS_U,46343
|
|
58
59
|
sourcecode/spring_model.py,sha256=zOAgFmrRbG4a6KLm1TJl55aWMyPNsz3OS3FSczqPG6A,16594
|
|
59
60
|
sourcecode/spring_security_audit.py,sha256=XtPJ1SXlZJ8k6VYmaWuAp7Bbir4UmreAL7doIGQ5I7o,20595
|
|
60
61
|
sourcecode/spring_semantic.py,sha256=O1nKSGVzlukuxLHQVuCPxc-XrcrMFxwlHA20_dmEGgM,13307
|
|
@@ -101,8 +102,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
101
102
|
sourcecode/telemetry/events.py,sha256=LtzYfaX9Ilckj5PTvAcTpDa9mLqDsYPDUiDkRa58piY,2580
|
|
102
103
|
sourcecode/telemetry/filters.py,sha256=NHa5T-6DaZduQPFuC34jOqHWQgSizM-Ygq8aZ4j19ng,5834
|
|
103
104
|
sourcecode/telemetry/transport.py,sha256=4gGHsq0WeY9VywEZXA3vUxykfiYnw9uuqfjAAec7F8o,1681
|
|
104
|
-
sourcecode-1.
|
|
105
|
-
sourcecode-1.
|
|
106
|
-
sourcecode-1.
|
|
107
|
-
sourcecode-1.
|
|
108
|
-
sourcecode-1.
|
|
105
|
+
sourcecode-1.53.0.dist-info/METADATA,sha256=Ahvq7n0P2M28DyG8mqksS-g11VTDnqDCjdPSZ23NjH0,36719
|
|
106
|
+
sourcecode-1.53.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
107
|
+
sourcecode-1.53.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
108
|
+
sourcecode-1.53.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
109
|
+
sourcecode-1.53.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|