sourcecode 1.52.0__py3-none-any.whl → 1.53.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/cli.py +390 -1
- sourcecode/format_contract.py +1 -0
- sourcecode/integration_detector.py +163 -0
- sourcecode/repository_ir.py +45 -6
- {sourcecode-1.52.0.dist-info → sourcecode-1.53.0.dist-info}/METADATA +25 -4
- {sourcecode-1.52.0.dist-info → sourcecode-1.53.0.dist-info}/RECORD +10 -9
- {sourcecode-1.52.0.dist-info → sourcecode-1.53.0.dist-info}/WHEEL +0 -0
- {sourcecode-1.52.0.dist-info → sourcecode-1.53.0.dist-info}/entry_points.txt +0 -0
- {sourcecode-1.52.0.dist-info → sourcecode-1.53.0.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/cli.py
CHANGED
|
@@ -219,7 +219,7 @@ _HELP = _build_help_text()
|
|
|
219
219
|
_SUBCOMMANDS: frozenset[str] = frozenset(
|
|
220
220
|
{
|
|
221
221
|
"telemetry", "prepare-context", "version", "config",
|
|
222
|
-
"repo-ir", "mcp", "endpoints", "impact",
|
|
222
|
+
"repo-ir", "mcp", "endpoints", "impact", "export",
|
|
223
223
|
# Enterprise workflow commands
|
|
224
224
|
"onboard", "modernize", "fix-bug", "review-pr",
|
|
225
225
|
# License / auth
|
|
@@ -3804,6 +3804,30 @@ def impact_cmd(
|
|
|
3804
3804
|
# canonical single-source-of-truth endpoint extractor.
|
|
3805
3805
|
|
|
3806
3806
|
|
|
3807
|
+
def _group_endpoints_by_controller(endpoints: "list[dict]") -> "dict":
|
|
3808
|
+
"""Group endpoints by their controller FQN into a structured API surface.
|
|
3809
|
+
|
|
3810
|
+
Returns ``{"by_controller": {fqn: [{method, path, return_type}, ...]},
|
|
3811
|
+
"controller_count": int, "total": int}``. Controllers and their routes are
|
|
3812
|
+
ordered deterministically (controllers by name, routes by path then method).
|
|
3813
|
+
"""
|
|
3814
|
+
by_ctrl: "dict[str, list[dict]]" = {}
|
|
3815
|
+
for ep in endpoints:
|
|
3816
|
+
ctrl = ep.get("controller", "") or "<unknown>"
|
|
3817
|
+
by_ctrl.setdefault(ctrl, []).append({
|
|
3818
|
+
"method": ep.get("method", ""),
|
|
3819
|
+
"path": ep.get("path", ""),
|
|
3820
|
+
"return_type": ep.get("return_type", "void"),
|
|
3821
|
+
})
|
|
3822
|
+
for ctrl in by_ctrl:
|
|
3823
|
+
by_ctrl[ctrl].sort(key=lambda r: (r["path"], r["method"]))
|
|
3824
|
+
ordered = {k: by_ctrl[k] for k in sorted(by_ctrl)}
|
|
3825
|
+
return {
|
|
3826
|
+
"by_controller": ordered,
|
|
3827
|
+
"controller_count": len(ordered),
|
|
3828
|
+
"total": len(endpoints),
|
|
3829
|
+
}
|
|
3830
|
+
|
|
3807
3831
|
|
|
3808
3832
|
@app.command("endpoints")
|
|
3809
3833
|
def endpoints_cmd(
|
|
@@ -3844,6 +3868,10 @@ def endpoints_cmd(
|
|
|
3844
3868
|
False, "--no-cache",
|
|
3845
3869
|
help="Accepted for compatibility; this command always reads fresh source (no snapshot cache). No-op.",
|
|
3846
3870
|
),
|
|
3871
|
+
by_controller: bool = typer.Option(
|
|
3872
|
+
False, "--by-controller",
|
|
3873
|
+
help="Group endpoints by controller class (structured API surface for C4/Container synthesis).",
|
|
3874
|
+
),
|
|
3847
3875
|
) -> None:
|
|
3848
3876
|
"""Extract REST API endpoint surface from Java source files.
|
|
3849
3877
|
|
|
@@ -3929,6 +3957,11 @@ def endpoints_cmd(
|
|
|
3929
3957
|
"undocumented_before_filter": _undoc_before,
|
|
3930
3958
|
}
|
|
3931
3959
|
|
|
3960
|
+
if by_controller:
|
|
3961
|
+
_grouped = _group_endpoints_by_controller(data.get("endpoints", []))
|
|
3962
|
+
data["by_controller"] = _grouped["by_controller"]
|
|
3963
|
+
data["controller_count"] = _grouped["controller_count"]
|
|
3964
|
+
|
|
3932
3965
|
output = _serialize_dict(data, format)
|
|
3933
3966
|
|
|
3934
3967
|
_emit_command_output(output, output_path, copy,
|
|
@@ -3938,6 +3971,362 @@ def endpoints_cmd(
|
|
|
3938
3971
|
_nudge()
|
|
3939
3972
|
|
|
3940
3973
|
|
|
3974
|
+
# ── export ──────────────────────────────────────────────────────────────────
|
|
3975
|
+
|
|
3976
|
+
def _group_symbols_by_directory(nodes: "list[dict]") -> "dict":
|
|
3977
|
+
"""Group repo-ir graph nodes by source directory with path:line refs.
|
|
3978
|
+
|
|
3979
|
+
Returns ``{dir: [{symbol, kind, ref}]}`` ordered deterministically (dirs by
|
|
3980
|
+
name, symbols by ref). ``ref`` is ``source_file:line`` when the line is known,
|
|
3981
|
+
otherwise just ``source_file``. This is the per-directory, file:line-anchored
|
|
3982
|
+
code-level export — the file:line-anchored input an architecture/code-map
|
|
3983
|
+
consumer needs to proceed by file write instead of per-directory LLM reads.
|
|
3984
|
+
"""
|
|
3985
|
+
import posixpath
|
|
3986
|
+
by_dir: "dict[str, list[dict]]" = {}
|
|
3987
|
+
for n in nodes:
|
|
3988
|
+
sf = n.get("source_file") or ""
|
|
3989
|
+
if not sf:
|
|
3990
|
+
continue
|
|
3991
|
+
d = posixpath.dirname(sf) or "."
|
|
3992
|
+
ln = n.get("line")
|
|
3993
|
+
ref = f"{sf}:{ln}" if ln else sf
|
|
3994
|
+
by_dir.setdefault(d, []).append({
|
|
3995
|
+
"symbol": n.get("canonical_name") or n.get("fqn"),
|
|
3996
|
+
"kind": n.get("symbol_kind"),
|
|
3997
|
+
"ref": ref,
|
|
3998
|
+
})
|
|
3999
|
+
for d in by_dir:
|
|
4000
|
+
by_dir[d].sort(key=lambda r: r["ref"])
|
|
4001
|
+
return {k: by_dir[k] for k in sorted(by_dir)}
|
|
4002
|
+
|
|
4003
|
+
|
|
4004
|
+
def _build_module_graph(nodes: "list[dict]", edges: "list[dict]") -> "dict":
|
|
4005
|
+
"""Roll class-level relation edges up into a module→module dependency graph.
|
|
4006
|
+
|
|
4007
|
+
A *module* is the source directory of a symbol (same grouping key as
|
|
4008
|
+
``--by-directory``), giving C4 a container/component-level dependency view.
|
|
4009
|
+
Every node FQN maps to its module; each edge whose endpoints resolve to two
|
|
4010
|
+
*different* modules contributes one inter-module dependency, aggregated by
|
|
4011
|
+
``(from, to)`` with a hit count and the set of underlying edge types.
|
|
4012
|
+
|
|
4013
|
+
Edges whose endpoints are not internal nodes (e.g. imports of external
|
|
4014
|
+
library types) are skipped — only resolvable, internal module→module
|
|
4015
|
+
dependencies are reported. Returns ``{nodes, edges, summary}`` deterministically.
|
|
4016
|
+
"""
|
|
4017
|
+
import posixpath
|
|
4018
|
+
|
|
4019
|
+
fqn_to_module: "dict[str, str]" = {}
|
|
4020
|
+
module_symbols: "dict[str, int]" = {}
|
|
4021
|
+
for n in nodes:
|
|
4022
|
+
sf = n.get("source_file") or ""
|
|
4023
|
+
if not sf:
|
|
4024
|
+
continue
|
|
4025
|
+
mod = posixpath.dirname(sf) or "."
|
|
4026
|
+
fqn = n.get("fqn")
|
|
4027
|
+
if fqn:
|
|
4028
|
+
fqn_to_module[fqn] = mod
|
|
4029
|
+
module_symbols[mod] = module_symbols.get(mod, 0) + 1
|
|
4030
|
+
|
|
4031
|
+
# (from_mod, to_mod) -> {"count": int, "types": set[str]}
|
|
4032
|
+
agg: "dict[tuple[str, str], dict]" = {}
|
|
4033
|
+
for e in edges:
|
|
4034
|
+
fm = fqn_to_module.get(e.get("from"))
|
|
4035
|
+
tm = fqn_to_module.get(e.get("to"))
|
|
4036
|
+
if fm is None or tm is None or fm == tm:
|
|
4037
|
+
continue
|
|
4038
|
+
slot = agg.setdefault((fm, tm), {"count": 0, "types": set()})
|
|
4039
|
+
slot["count"] += 1
|
|
4040
|
+
et = e.get("type")
|
|
4041
|
+
if et:
|
|
4042
|
+
slot["types"].add(et)
|
|
4043
|
+
|
|
4044
|
+
graph_edges = [
|
|
4045
|
+
{
|
|
4046
|
+
"from": fm,
|
|
4047
|
+
"to": tm,
|
|
4048
|
+
"count": slot["count"],
|
|
4049
|
+
"types": sorted(slot["types"]),
|
|
4050
|
+
}
|
|
4051
|
+
for (fm, tm), slot in sorted(agg.items())
|
|
4052
|
+
]
|
|
4053
|
+
out_deg: "dict[str, int]" = {}
|
|
4054
|
+
in_deg: "dict[str, int]" = {}
|
|
4055
|
+
for ed in graph_edges:
|
|
4056
|
+
out_deg[ed["from"]] = out_deg.get(ed["from"], 0) + 1
|
|
4057
|
+
in_deg[ed["to"]] = in_deg.get(ed["to"], 0) + 1
|
|
4058
|
+
graph_nodes = [
|
|
4059
|
+
{
|
|
4060
|
+
"module": mod,
|
|
4061
|
+
"symbol_count": module_symbols[mod],
|
|
4062
|
+
"out_degree": out_deg.get(mod, 0),
|
|
4063
|
+
"in_degree": in_deg.get(mod, 0),
|
|
4064
|
+
}
|
|
4065
|
+
for mod in sorted(module_symbols)
|
|
4066
|
+
]
|
|
4067
|
+
return {
|
|
4068
|
+
"nodes": graph_nodes,
|
|
4069
|
+
"edges": graph_edges,
|
|
4070
|
+
"summary": {
|
|
4071
|
+
"module_count": len(graph_nodes),
|
|
4072
|
+
"edge_count": len(graph_edges),
|
|
4073
|
+
},
|
|
4074
|
+
}
|
|
4075
|
+
|
|
4076
|
+
|
|
4077
|
+
# Build-system markers that identify a deployable/buildable unit (C4 container).
|
|
4078
|
+
_BUILD_MARKERS: "tuple[str, ...]" = (
|
|
4079
|
+
"pom.xml", "build.gradle", "build.gradle.kts", "settings.gradle",
|
|
4080
|
+
)
|
|
4081
|
+
|
|
4082
|
+
|
|
4083
|
+
def _detect_containers(root: "Path") -> "list[dict]":
|
|
4084
|
+
"""Detect build-module roots as C4 containers (deployable/buildable units).
|
|
4085
|
+
|
|
4086
|
+
A *container* is a directory holding a recognized build file
|
|
4087
|
+
(Maven/Gradle). Detection is purely structural — no build is run. Returns
|
|
4088
|
+
``[{root, build_file}]`` (relative paths, deterministic order). Empty when no
|
|
4089
|
+
build files are found; the caller records that as a limitation rather than
|
|
4090
|
+
fabricating containers.
|
|
4091
|
+
"""
|
|
4092
|
+
import os
|
|
4093
|
+
found: "list[dict]" = []
|
|
4094
|
+
seen: "set[str]" = set()
|
|
4095
|
+
_SKIP = {".git", "node_modules", "target", "build", ".gradle", ".idea", "dist"}
|
|
4096
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
4097
|
+
dirnames[:] = [d for d in dirnames if d not in _SKIP and not d.startswith(".")]
|
|
4098
|
+
for marker in _BUILD_MARKERS:
|
|
4099
|
+
if marker in filenames:
|
|
4100
|
+
rel = os.path.relpath(dirpath, root).replace(os.sep, "/")
|
|
4101
|
+
rel = "." if rel == "." else rel
|
|
4102
|
+
if rel in seen:
|
|
4103
|
+
continue
|
|
4104
|
+
seen.add(rel)
|
|
4105
|
+
found.append({"root": rel, "build_file": marker})
|
|
4106
|
+
break
|
|
4107
|
+
found.sort(key=lambda c: c["root"])
|
|
4108
|
+
return found
|
|
4109
|
+
|
|
4110
|
+
|
|
4111
|
+
def _directory_hashes(file_list: "list[str]", root: "Path") -> "dict[str, str]":
|
|
4112
|
+
"""Content-addressed sha256 per source directory for incremental consumers.
|
|
4113
|
+
|
|
4114
|
+
Hash inputs are the directory's source files as ``(relpath, bytes)`` in
|
|
4115
|
+
sorted order, so the digest is stable across runs and changes iff a file in
|
|
4116
|
+
that directory changes. A consumer compares hashes to skip unchanged
|
|
4117
|
+
directories. Tool-agnostic: just a map ``{dir: sha256[:16]}``.
|
|
4118
|
+
"""
|
|
4119
|
+
import hashlib
|
|
4120
|
+
import posixpath
|
|
4121
|
+
by_dir: "dict[str, list[str]]" = {}
|
|
4122
|
+
for rel in file_list:
|
|
4123
|
+
d = posixpath.dirname(rel) or "."
|
|
4124
|
+
by_dir.setdefault(d, []).append(rel)
|
|
4125
|
+
out: "dict[str, str]" = {}
|
|
4126
|
+
for d in sorted(by_dir):
|
|
4127
|
+
h = hashlib.sha256()
|
|
4128
|
+
for rel in sorted(by_dir[d]):
|
|
4129
|
+
h.update(rel.encode("utf-8"))
|
|
4130
|
+
h.update(b"\0")
|
|
4131
|
+
try:
|
|
4132
|
+
h.update((root / rel).read_bytes())
|
|
4133
|
+
except OSError:
|
|
4134
|
+
h.update(b"<unreadable>")
|
|
4135
|
+
h.update(b"\0")
|
|
4136
|
+
out[d] = h.hexdigest()[:16]
|
|
4137
|
+
return out
|
|
4138
|
+
|
|
4139
|
+
|
|
4140
|
+
def _build_c4_export(
|
|
4141
|
+
root: "Path",
|
|
4142
|
+
file_list: "list[str]",
|
|
4143
|
+
nodes: "list[dict]",
|
|
4144
|
+
edges: "list[dict]",
|
|
4145
|
+
endpoints: "list[dict]",
|
|
4146
|
+
integrations: "dict",
|
|
4147
|
+
) -> "dict":
|
|
4148
|
+
"""Assemble a unified, tool-agnostic C4 architecture export + incremental manifest.
|
|
4149
|
+
|
|
4150
|
+
Maps the four already-built views onto the open C4 model (a public notation,
|
|
4151
|
+
not a product): code (L4), components (L3/L2), context external systems (L1),
|
|
4152
|
+
plus build-module containers and an interface-contract API surface. The
|
|
4153
|
+
``manifest`` carries per-directory content hashes so a downstream consumer can
|
|
4154
|
+
process incrementally. No third-party tool or format is hardcoded.
|
|
4155
|
+
"""
|
|
4156
|
+
by_directory = _group_symbols_by_directory(nodes)
|
|
4157
|
+
module_graph = _build_module_graph(nodes, edges)
|
|
4158
|
+
api_surface = _group_endpoints_by_controller(endpoints)
|
|
4159
|
+
containers = _detect_containers(root)
|
|
4160
|
+
|
|
4161
|
+
limitations: "list[str]" = []
|
|
4162
|
+
if not containers:
|
|
4163
|
+
limitations.append(
|
|
4164
|
+
"No build files (Maven/Gradle) found; containers not derived. "
|
|
4165
|
+
"Treat the repository as a single implicit container."
|
|
4166
|
+
)
|
|
4167
|
+
|
|
4168
|
+
return {
|
|
4169
|
+
"schema_version": "c4-v1",
|
|
4170
|
+
"c4": {
|
|
4171
|
+
"context": {
|
|
4172
|
+
"system": {"name": root.name, "file_count": len(file_list)},
|
|
4173
|
+
"external_systems": integrations,
|
|
4174
|
+
},
|
|
4175
|
+
"containers": containers,
|
|
4176
|
+
"components": module_graph,
|
|
4177
|
+
"code": by_directory,
|
|
4178
|
+
},
|
|
4179
|
+
"api_surface": api_surface,
|
|
4180
|
+
"manifest": {
|
|
4181
|
+
"directory_hashes": _directory_hashes(file_list, root),
|
|
4182
|
+
"generated": {
|
|
4183
|
+
"tool": "sourcecode",
|
|
4184
|
+
"schema": "c4-v1",
|
|
4185
|
+
"file_count": len(file_list),
|
|
4186
|
+
},
|
|
4187
|
+
},
|
|
4188
|
+
"limitations": limitations,
|
|
4189
|
+
}
|
|
4190
|
+
|
|
4191
|
+
|
|
4192
|
+
@app.command("export")
|
|
4193
|
+
def export_cmd(
|
|
4194
|
+
path: Path = typer.Argument(
|
|
4195
|
+
Path("."),
|
|
4196
|
+
help="Repository path to export (default: current directory)",
|
|
4197
|
+
),
|
|
4198
|
+
output_path: Optional[Path] = typer.Option(
|
|
4199
|
+
None, "--output", "-o",
|
|
4200
|
+
help="Write output to a file instead of stdout.",
|
|
4201
|
+
),
|
|
4202
|
+
format: str = typer.Option(
|
|
4203
|
+
"json", "--format", "-f",
|
|
4204
|
+
help="Output format: json (default) or yaml.",
|
|
4205
|
+
),
|
|
4206
|
+
copy: bool = typer.Option(
|
|
4207
|
+
False, "--copy", "-c",
|
|
4208
|
+
help="Copy output to system clipboard after a successful run.",
|
|
4209
|
+
),
|
|
4210
|
+
by_directory: bool = typer.Option(
|
|
4211
|
+
False, "--by-directory",
|
|
4212
|
+
help="Group symbols by source directory with path:line refs (C4 code-level export).",
|
|
4213
|
+
),
|
|
4214
|
+
module_graph: bool = typer.Option(
|
|
4215
|
+
False, "--module-graph",
|
|
4216
|
+
help="Emit a module→module dependency graph (C4 container/component level).",
|
|
4217
|
+
),
|
|
4218
|
+
integrations: bool = typer.Option(
|
|
4219
|
+
False, "--integrations",
|
|
4220
|
+
help="Detect outbound integrations (HTTP/LDAP/JMS clients) with file:line evidence.",
|
|
4221
|
+
),
|
|
4222
|
+
c4: bool = typer.Option(
|
|
4223
|
+
False, "--c4",
|
|
4224
|
+
help="Unified C4 architecture export (context/containers/components/code) "
|
|
4225
|
+
"+ per-directory incremental manifest. Vendor-neutral.",
|
|
4226
|
+
),
|
|
4227
|
+
) -> None:
|
|
4228
|
+
"""Export structured, tool-agnostic codebase views for downstream tooling.
|
|
4229
|
+
|
|
4230
|
+
Output is plain JSON/YAML that any consumer (architecture-doc generators,
|
|
4231
|
+
diagram renderers, code-search agents) can ingest. Section labels map to the
|
|
4232
|
+
open C4 model (an open architecture notation, not a product) but the schema
|
|
4233
|
+
is vendor-neutral.
|
|
4234
|
+
|
|
4235
|
+
\b
|
|
4236
|
+
--by-directory One group per source directory, each symbol carrying a
|
|
4237
|
+
path:line reference — the file:line-anchored code map that
|
|
4238
|
+
lets a consumer proceed by file write instead of per-dir reads.
|
|
4239
|
+
--module-graph Module→module dependency graph (container/component level)
|
|
4240
|
+
rolled up from class-level relation edges.
|
|
4241
|
+
--integrations Outbound integrations (RestTemplate/WebClient/Feign/LDAP/JMS)
|
|
4242
|
+
with file:line evidence — external-system dependency arrows.
|
|
4243
|
+
--c4 Unified architecture document mapped onto the open C4 model
|
|
4244
|
+
(context/containers/components/code) + an API surface and a
|
|
4245
|
+
per-directory content-hash manifest for incremental consumers.
|
|
4246
|
+
|
|
4247
|
+
The section flags compose; pass several to emit multiple sections in one
|
|
4248
|
+
document. --c4 assembles the full architecture export on its own.
|
|
4249
|
+
"""
|
|
4250
|
+
from sourcecode.repository_ir import build_repo_ir, find_java_files
|
|
4251
|
+
|
|
4252
|
+
_enforce_format("export", format)
|
|
4253
|
+
|
|
4254
|
+
root = path.resolve()
|
|
4255
|
+
if not root.is_dir():
|
|
4256
|
+
_emit_error_json(
|
|
4257
|
+
INVALID_INPUT_CODE,
|
|
4258
|
+
f"'{root}' is not a valid directory.",
|
|
4259
|
+
path=str(root),
|
|
4260
|
+
hint="Pass an existing repository directory.",
|
|
4261
|
+
expected="A directory path.",
|
|
4262
|
+
)
|
|
4263
|
+
raise typer.Exit(1)
|
|
4264
|
+
|
|
4265
|
+
if not (by_directory or module_graph or integrations or c4):
|
|
4266
|
+
_emit_error_json(
|
|
4267
|
+
INVALID_INPUT_CODE,
|
|
4268
|
+
"export requires a mode flag.",
|
|
4269
|
+
path=str(root),
|
|
4270
|
+
hint="Pass --c4 for the full architecture export, or one of "
|
|
4271
|
+
"--by-directory / --module-graph / --integrations for a section.",
|
|
4272
|
+
expected="--c4 | --by-directory | --module-graph | --integrations",
|
|
4273
|
+
)
|
|
4274
|
+
raise typer.Exit(1)
|
|
4275
|
+
|
|
4276
|
+
file_list = [
|
|
4277
|
+
f for f in find_java_files(root)
|
|
4278
|
+
if "/test/" not in f and "/tests/" not in f
|
|
4279
|
+
]
|
|
4280
|
+
|
|
4281
|
+
if c4:
|
|
4282
|
+
# Unified architecture export: assembles every section + manifest.
|
|
4283
|
+
from sourcecode.integration_detector import detect_integrations
|
|
4284
|
+
from sourcecode.repository_ir import extract_java_endpoints
|
|
4285
|
+
ir = build_repo_ir(file_list, root)
|
|
4286
|
+
graph = ir.get("graph", {})
|
|
4287
|
+
endpoints = extract_java_endpoints(root).get("endpoints", [])
|
|
4288
|
+
data = _build_c4_export(
|
|
4289
|
+
root,
|
|
4290
|
+
file_list,
|
|
4291
|
+
graph.get("nodes", []),
|
|
4292
|
+
graph.get("edges", []),
|
|
4293
|
+
endpoints,
|
|
4294
|
+
detect_integrations(file_list, root),
|
|
4295
|
+
)
|
|
4296
|
+
output = _serialize_dict(data, format)
|
|
4297
|
+
_emit_command_output(output, output_path, copy,
|
|
4298
|
+
success_msg=f"C4 architecture export written to {output_path}")
|
|
4299
|
+
from sourcecode.mcp_nudge import nudge_mcp_if_needed as _nudge
|
|
4300
|
+
_nudge()
|
|
4301
|
+
return
|
|
4302
|
+
|
|
4303
|
+
data: "dict" = {}
|
|
4304
|
+
# IR is only needed for the symbol/graph-derived views, not for the
|
|
4305
|
+
# source-scanned integration detector.
|
|
4306
|
+
if by_directory or module_graph:
|
|
4307
|
+
ir = build_repo_ir(file_list, root)
|
|
4308
|
+
graph = ir.get("graph", {})
|
|
4309
|
+
nodes = graph.get("nodes", [])
|
|
4310
|
+
if by_directory:
|
|
4311
|
+
grouped = _group_symbols_by_directory(nodes)
|
|
4312
|
+
data["by_directory"] = grouped
|
|
4313
|
+
data["directory_count"] = len(grouped)
|
|
4314
|
+
data["symbol_count"] = sum(len(v) for v in grouped.values())
|
|
4315
|
+
if module_graph:
|
|
4316
|
+
data["module_graph"] = _build_module_graph(nodes, graph.get("edges", []))
|
|
4317
|
+
|
|
4318
|
+
if integrations:
|
|
4319
|
+
from sourcecode.integration_detector import detect_integrations
|
|
4320
|
+
data["integrations"] = detect_integrations(file_list, root)
|
|
4321
|
+
|
|
4322
|
+
output = _serialize_dict(data, format)
|
|
4323
|
+
_emit_command_output(output, output_path, copy,
|
|
4324
|
+
success_msg=f"Export written to {output_path}")
|
|
4325
|
+
|
|
4326
|
+
from sourcecode.mcp_nudge import nudge_mcp_if_needed as _nudge
|
|
4327
|
+
_nudge()
|
|
4328
|
+
|
|
4329
|
+
|
|
3941
4330
|
@app.command("validation")
|
|
3942
4331
|
def validation_cmd(
|
|
3943
4332
|
path: Path = typer.Argument(
|
sourcecode/format_contract.py
CHANGED
|
@@ -27,6 +27,7 @@ FORMAT_REGISTRY: "dict[str, tuple[str, ...]]" = {
|
|
|
27
27
|
"repo-ir": ("json", "yaml"),
|
|
28
28
|
"impact": ("json", "yaml"),
|
|
29
29
|
"endpoints": ("json", "yaml"),
|
|
30
|
+
"export": ("json", "yaml"),
|
|
30
31
|
"validation": ("json", "yaml"),
|
|
31
32
|
"impact-chain": ("json", "yaml"),
|
|
32
33
|
"pr-impact": ("text", "json"),
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Outgoing-integration detection for the C4/BMB export pipeline.
|
|
2
|
+
|
|
3
|
+
Scans Java source for *outbound* integration points — the edges a C4 Context
|
|
4
|
+
diagram needs to draw arrows from this system to external systems. Detection is
|
|
5
|
+
deterministic source-text matching (same approach as the JNDI datasource scan in
|
|
6
|
+
``serializer.py``); it never executes code and never resolves runtime values.
|
|
7
|
+
|
|
8
|
+
Covered clients:
|
|
9
|
+
|
|
10
|
+
* HTTP — ``RestTemplate``, ``WebClient``, ``@FeignClient`` (declarative)
|
|
11
|
+
* LDAP — ``LdapTemplate``
|
|
12
|
+
* JMS — ``JmsTemplate``, ActiveMQ connection factories
|
|
13
|
+
|
|
14
|
+
Each hit is reported with a ``file:line`` evidence anchor and, when a literal URL
|
|
15
|
+
or logical name is present on the same construct, a ``target``. URLs assembled at
|
|
16
|
+
runtime (concatenated strings, property placeholders) yield a ``null`` target —
|
|
17
|
+
honest absence rather than a guess.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import re
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Optional
|
|
25
|
+
|
|
26
|
+
# A URL/endpoint literal for any scheme we care about.
|
|
27
|
+
_URL_RE = re.compile(r'"((?:https?|ldaps?|tcp|amqp|jms|nio)://[^"]*)"')
|
|
28
|
+
# First string literal on a line (fallback target, e.g. WebClient.create("x")).
|
|
29
|
+
_STR_RE = re.compile(r'"([^"]+)"')
|
|
30
|
+
|
|
31
|
+
# Declarative HTTP client. Attrs may span multiple lines, so matched on full text.
|
|
32
|
+
_FEIGN_RE = re.compile(r"@FeignClient\s*\(([^)]*)\)", re.DOTALL)
|
|
33
|
+
_ATTR_URL_RE = re.compile(r'url\s*=\s*"([^"]*)"')
|
|
34
|
+
_ATTR_NAME_RE = re.compile(r'(?:name|value)\s*=\s*"([^"]*)"')
|
|
35
|
+
_FIRST_LITERAL_RE = re.compile(r'^\s*"([^"]*)"')
|
|
36
|
+
|
|
37
|
+
# token -> (kind, client). Matched as whole-word usage outside imports/comments.
|
|
38
|
+
_TOKEN_CLIENTS: "tuple[tuple[str, str, str], ...]" = (
|
|
39
|
+
("RestTemplate", "http", "resttemplate"),
|
|
40
|
+
("WebClient", "http", "webclient"),
|
|
41
|
+
("LdapTemplate", "ldap", "ldaptemplate"),
|
|
42
|
+
("JmsTemplate", "jms", "jmstemplate"),
|
|
43
|
+
("ActiveMQConnectionFactory", "jms", "activemq"),
|
|
44
|
+
)
|
|
45
|
+
_TOKEN_RES = tuple(
|
|
46
|
+
(re.compile(r"\b" + re.escape(tok) + r"\b"), kind, client)
|
|
47
|
+
for tok, kind, client in _TOKEN_CLIENTS
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _line_of(text: str, idx: int) -> int:
|
|
52
|
+
"""1-based line number of character offset ``idx`` in ``text``."""
|
|
53
|
+
return text.count("\n", 0, idx) + 1
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _extract_target(line: str) -> Optional[str]:
|
|
57
|
+
"""Best-effort literal target on a usage line: a scheme URL, else first string."""
|
|
58
|
+
m = _URL_RE.search(line)
|
|
59
|
+
if m:
|
|
60
|
+
return m.group(1)
|
|
61
|
+
m = _STR_RE.search(line)
|
|
62
|
+
if m:
|
|
63
|
+
return m.group(1)
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def detect_integrations(file_paths: "list[str]", root: Path) -> dict:
|
|
68
|
+
"""Detect outbound integrations across ``file_paths`` (relative to ``root``).
|
|
69
|
+
|
|
70
|
+
Returns ``{"integrations": [...], "by_kind": {kind: count}, "count": N}`` with
|
|
71
|
+
integrations sorted by ``(kind, client, evidence)`` for deterministic output.
|
|
72
|
+
Each integration is ``{kind, client, target, evidence}`` where ``evidence`` is
|
|
73
|
+
``relpath:line`` and ``target`` is a literal URL/name or ``None``.
|
|
74
|
+
"""
|
|
75
|
+
seen: "set[tuple[str, str, Optional[str], str]]" = set()
|
|
76
|
+
records: "list[dict]" = []
|
|
77
|
+
|
|
78
|
+
def _add(kind: str, client: str, target: Optional[str], rel: str, line: int) -> None:
|
|
79
|
+
evidence = f"{rel}:{line}"
|
|
80
|
+
key = (kind, client, target, evidence)
|
|
81
|
+
if key in seen:
|
|
82
|
+
return
|
|
83
|
+
seen.add(key)
|
|
84
|
+
records.append({
|
|
85
|
+
"kind": kind,
|
|
86
|
+
"client": client,
|
|
87
|
+
"target": target,
|
|
88
|
+
"evidence": evidence,
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
for rel in file_paths:
|
|
92
|
+
try:
|
|
93
|
+
text = (root / rel).read_text(encoding="utf-8", errors="replace")
|
|
94
|
+
except OSError:
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
# @FeignClient — capture attrs even when spread across lines.
|
|
98
|
+
for m in _FEIGN_RE.finditer(text):
|
|
99
|
+
attrs = m.group(1)
|
|
100
|
+
url_m = _ATTR_URL_RE.search(attrs)
|
|
101
|
+
name_m = _ATTR_NAME_RE.search(attrs)
|
|
102
|
+
first_m = _FIRST_LITERAL_RE.search(attrs)
|
|
103
|
+
target = (
|
|
104
|
+
url_m.group(1) if url_m
|
|
105
|
+
else name_m.group(1) if name_m
|
|
106
|
+
else first_m.group(1) if first_m
|
|
107
|
+
else None
|
|
108
|
+
)
|
|
109
|
+
_add("http", "feign", target, rel, _line_of(text, m.start()))
|
|
110
|
+
|
|
111
|
+
# Token clients — per line, skipping imports/package/comment noise.
|
|
112
|
+
# First pass records the declaration site and any variable name bound to
|
|
113
|
+
# the client, so a later call site (where the URL literal usually lives)
|
|
114
|
+
# can be attributed back to the client.
|
|
115
|
+
var_to_client: "dict[str, tuple[str, str]]" = {}
|
|
116
|
+
lines = text.splitlines()
|
|
117
|
+
for lineno, line in enumerate(lines, start=1):
|
|
118
|
+
stripped = line.lstrip()
|
|
119
|
+
if (
|
|
120
|
+
stripped.startswith("import ")
|
|
121
|
+
or stripped.startswith("package ")
|
|
122
|
+
or stripped.startswith("//")
|
|
123
|
+
or stripped.startswith("*")
|
|
124
|
+
or stripped.startswith("/*")
|
|
125
|
+
):
|
|
126
|
+
continue
|
|
127
|
+
for token_re, kind, client in _TOKEN_RES:
|
|
128
|
+
m = token_re.search(line)
|
|
129
|
+
if not m:
|
|
130
|
+
continue
|
|
131
|
+
_add(kind, client, _extract_target(line), rel, lineno)
|
|
132
|
+
tok = m.group(0)
|
|
133
|
+
# `Type name` (field/local decl) and `name = new Type(` forms.
|
|
134
|
+
decl = re.search(re.escape(tok) + r"\s+(\w+)\b", line)
|
|
135
|
+
if decl:
|
|
136
|
+
var_to_client[decl.group(1)] = (kind, client)
|
|
137
|
+
asgn = re.search(r"(\w+)\s*=\s*new\s+" + re.escape(tok), line)
|
|
138
|
+
if asgn:
|
|
139
|
+
var_to_client[asgn.group(1)] = (kind, client)
|
|
140
|
+
|
|
141
|
+
# Second pass: a call on a tracked client variable carrying a URL literal
|
|
142
|
+
# is reported as a hit at the call site (the URL endpoint C4 wants).
|
|
143
|
+
if var_to_client:
|
|
144
|
+
for lineno, line in enumerate(lines, start=1):
|
|
145
|
+
url = _URL_RE.search(line)
|
|
146
|
+
if not url:
|
|
147
|
+
continue
|
|
148
|
+
for var, (kind, client) in var_to_client.items():
|
|
149
|
+
if re.search(r"\b" + re.escape(var) + r"\s*\.", line):
|
|
150
|
+
_add(kind, client, url.group(1), rel, lineno)
|
|
151
|
+
break
|
|
152
|
+
|
|
153
|
+
records.sort(key=lambda r: (r["kind"], r["client"], r["evidence"]))
|
|
154
|
+
|
|
155
|
+
by_kind: "dict[str, int]" = {}
|
|
156
|
+
for r in records:
|
|
157
|
+
by_kind[r["kind"]] = by_kind.get(r["kind"], 0) + 1
|
|
158
|
+
|
|
159
|
+
return {
|
|
160
|
+
"integrations": records,
|
|
161
|
+
"by_kind": {k: by_kind[k] for k in sorted(by_kind)},
|
|
162
|
+
"count": len(records),
|
|
163
|
+
}
|
sourcecode/repository_ir.py
CHANGED
|
@@ -48,6 +48,7 @@ class SymbolRecord:
|
|
|
48
48
|
symbol_kind: str = "" # class|interface|enum|annotation|method|constructor|field|endpoint|bean
|
|
49
49
|
canonical_name: str = "" # pkg.Class#method(Type1,Type2) — human-readable
|
|
50
50
|
source_file: str = "" # alias for declaring_file (IR output contract)
|
|
51
|
+
line: Optional[int] = None # 1-based source line of the declaration
|
|
51
52
|
signature: str = "" # (Type1,Type2)->ReturnType for methods; type for fields
|
|
52
53
|
param_types: list[str] = field(default_factory=list)
|
|
53
54
|
return_type: str = ""
|
|
@@ -696,8 +697,12 @@ def _extract_symbols(
|
|
|
696
697
|
# here makes the regex work without changing the per-line brace-depth counter.
|
|
697
698
|
_raw_lines = source.splitlines()
|
|
698
699
|
_joined: list[str] = []
|
|
700
|
+
# Parallel list: 1-based source line where each _joined entry STARTS, so symbol
|
|
701
|
+
# declarations can carry their source line through the join/normalize transforms.
|
|
702
|
+
_joined_lines: list[int] = []
|
|
699
703
|
_i = 0
|
|
700
704
|
while _i < len(_raw_lines):
|
|
705
|
+
_start = _i # 0-based source index where this joined entry begins
|
|
701
706
|
_line = _raw_lines[_i]
|
|
702
707
|
_stripped = _line.strip()
|
|
703
708
|
if (_CLASS_KW_RE.search(_stripped) and '{' not in _stripped
|
|
@@ -713,6 +718,7 @@ def _extract_symbols(
|
|
|
713
718
|
if '{' in _cont:
|
|
714
719
|
break
|
|
715
720
|
_joined.append(_buf)
|
|
721
|
+
_joined_lines.append(_start + 1)
|
|
716
722
|
elif (
|
|
717
723
|
(_METHOD_DECL_RE.match(_stripped) or _CONSTRUCTOR_DECL_RE.match(_stripped))
|
|
718
724
|
and _net_parens(_stripped) > 0
|
|
@@ -731,15 +737,20 @@ def _extract_symbols(
|
|
|
731
737
|
_bal += _net_parens(_cont)
|
|
732
738
|
_i += 1
|
|
733
739
|
_joined.append(_buf)
|
|
740
|
+
_joined_lines.append(_start + 1)
|
|
734
741
|
else:
|
|
735
742
|
_joined.append(_line)
|
|
743
|
+
_joined_lines.append(_start + 1)
|
|
736
744
|
_i += 1
|
|
737
745
|
|
|
738
746
|
# P1 fix: normalize multiline annotations (e.g. @RequestMapping(\n value="..."\n))
|
|
739
747
|
# into single lines so the per-line regex can capture annotation args correctly.
|
|
740
|
-
_normalized_lines = _normalize_multiline_annotations(
|
|
748
|
+
_normalized_lines, _normalized_src_lines = _normalize_multiline_annotations(
|
|
749
|
+
_joined, _joined_lines
|
|
750
|
+
)
|
|
741
751
|
|
|
742
|
-
for line in _normalized_lines:
|
|
752
|
+
for _ni, line in enumerate(_normalized_lines):
|
|
753
|
+
cur_line = _normalized_src_lines[_ni] if _ni < len(_normalized_src_lines) else None
|
|
743
754
|
stripped = line.strip()
|
|
744
755
|
|
|
745
756
|
if in_block_comment:
|
|
@@ -821,6 +832,7 @@ def _extract_symbols(
|
|
|
821
832
|
symbol_kind=sym_kind,
|
|
822
833
|
canonical_name=fqn,
|
|
823
834
|
source_file=rel_path,
|
|
835
|
+
line=cur_line,
|
|
824
836
|
signature=" ".join(_sig_parts),
|
|
825
837
|
annotation_values=dict(pending_ann_values),
|
|
826
838
|
))
|
|
@@ -888,6 +900,7 @@ def _extract_symbols(
|
|
|
888
900
|
symbol_kind=_sym_kind,
|
|
889
901
|
canonical_name=_canonical,
|
|
890
902
|
source_file=rel_path,
|
|
903
|
+
line=cur_line,
|
|
891
904
|
signature=_signature,
|
|
892
905
|
param_types=_param_types,
|
|
893
906
|
return_type=_ret_raw,
|
|
@@ -923,6 +936,7 @@ def _extract_symbols(
|
|
|
923
936
|
symbol_kind="constructor",
|
|
924
937
|
canonical_name=f"{class_fqn}#{_class_simple}({_param_str})",
|
|
925
938
|
source_file=rel_path,
|
|
939
|
+
line=cur_line,
|
|
926
940
|
signature=f"({_param_str})->void",
|
|
927
941
|
param_types=_ctor_param_types,
|
|
928
942
|
return_type="void",
|
|
@@ -958,6 +972,7 @@ def _extract_symbols(
|
|
|
958
972
|
symbol_kind="field",
|
|
959
973
|
canonical_name=fqn,
|
|
960
974
|
source_file=rel_path,
|
|
975
|
+
line=cur_line,
|
|
961
976
|
signature=f"{ftype} {fname}",
|
|
962
977
|
))
|
|
963
978
|
pending_anns = []
|
|
@@ -1770,7 +1785,7 @@ def _resolve_ann_path_expr(ann_args: str, constants: dict[str, str]) -> str:
|
|
|
1770
1785
|
return ann_args
|
|
1771
1786
|
|
|
1772
1787
|
|
|
1773
|
-
def _normalize_multiline_annotations(lines
|
|
1788
|
+
def _normalize_multiline_annotations(lines, line_nums=None):
|
|
1774
1789
|
"""Merge multiline annotation spans into a single line.
|
|
1775
1790
|
|
|
1776
1791
|
Handles annotations split across lines because their args span multiple lines:
|
|
@@ -1782,10 +1797,13 @@ def _normalize_multiline_annotations(lines: list[str]) -> list[str]:
|
|
|
1782
1797
|
Merges into: '@RequestMapping(value = "/add", method = RequestMethod.GET)'
|
|
1783
1798
|
"""
|
|
1784
1799
|
result: list[str] = []
|
|
1800
|
+
result_lines: list[int] = []
|
|
1785
1801
|
buf: list[str] = []
|
|
1802
|
+
buf_line: Optional[int] = None
|
|
1786
1803
|
paren_depth = 0
|
|
1787
1804
|
|
|
1788
|
-
for line in lines:
|
|
1805
|
+
for _idx, line in enumerate(lines):
|
|
1806
|
+
src_line = line_nums[_idx] if (line_nums is not None and _idx < len(line_nums)) else None
|
|
1789
1807
|
stripped = line.strip()
|
|
1790
1808
|
if buf:
|
|
1791
1809
|
# Continuation of a multiline annotation
|
|
@@ -1793,7 +1811,9 @@ def _normalize_multiline_annotations(lines: list[str]) -> list[str]:
|
|
|
1793
1811
|
paren_depth += stripped.count("(") - stripped.count(")")
|
|
1794
1812
|
if paren_depth <= 0:
|
|
1795
1813
|
result.append(" ".join(buf))
|
|
1814
|
+
result_lines.append(buf_line if buf_line is not None else (src_line or 0))
|
|
1796
1815
|
buf = []
|
|
1816
|
+
buf_line = None
|
|
1797
1817
|
paren_depth = 0
|
|
1798
1818
|
elif stripped.startswith("@") and "(" in stripped:
|
|
1799
1819
|
opens = stripped.count("(")
|
|
@@ -1801,16 +1821,24 @@ def _normalize_multiline_annotations(lines: list[str]) -> list[str]:
|
|
|
1801
1821
|
if opens > closes:
|
|
1802
1822
|
# Unbalanced — start collecting continuation lines
|
|
1803
1823
|
buf = [stripped]
|
|
1824
|
+
buf_line = src_line
|
|
1804
1825
|
paren_depth = opens - closes
|
|
1805
1826
|
else:
|
|
1806
1827
|
result.append(line)
|
|
1828
|
+
result_lines.append(src_line or 0)
|
|
1807
1829
|
else:
|
|
1808
1830
|
result.append(line)
|
|
1831
|
+
result_lines.append(src_line or 0)
|
|
1809
1832
|
|
|
1810
1833
|
# Flush any dangling buffer (shouldn't happen in well-formed code)
|
|
1811
1834
|
if buf:
|
|
1812
|
-
|
|
1813
|
-
|
|
1835
|
+
for _bi, _bl in enumerate(buf):
|
|
1836
|
+
result.append(_bl)
|
|
1837
|
+
result_lines.append(buf_line if buf_line is not None else 0)
|
|
1838
|
+
|
|
1839
|
+
if line_nums is None:
|
|
1840
|
+
return result
|
|
1841
|
+
return result, result_lines
|
|
1814
1842
|
|
|
1815
1843
|
|
|
1816
1844
|
def _parse_route_path(args_str: str) -> str:
|
|
@@ -2643,6 +2671,7 @@ def _assemble(
|
|
|
2643
2671
|
"symbol_kind": s.symbol_kind,
|
|
2644
2672
|
"canonical_name": s.canonical_name or s.symbol,
|
|
2645
2673
|
"source_file": s.declaring_file,
|
|
2674
|
+
"line": s.line,
|
|
2646
2675
|
"signature": s.signature,
|
|
2647
2676
|
"type": s.type,
|
|
2648
2677
|
"role": spring_role_map.get(s.symbol, "other"),
|
|
@@ -3009,6 +3038,7 @@ def _build_route_surface(
|
|
|
3009
3038
|
"effective_class": cls_fqn,
|
|
3010
3039
|
"path": full_path,
|
|
3011
3040
|
"method": method,
|
|
3041
|
+
"return_type": (sym.return_type.strip() if sym.return_type else "void"),
|
|
3012
3042
|
"stable_id": sym.stable_id,
|
|
3013
3043
|
"inheritance_depth": 0,
|
|
3014
3044
|
}
|
|
@@ -3028,6 +3058,10 @@ def _build_route_surface(
|
|
|
3028
3058
|
_parent_sec_by_sym: dict[str, object] = {
|
|
3029
3059
|
r["symbol"]: r.get("security_annotations") for r in routes
|
|
3030
3060
|
}
|
|
3061
|
+
# Build lookup for return_type from phase-2 routes (inherited methods reuse parent's)
|
|
3062
|
+
_parent_rt_by_sym: dict[str, str] = {
|
|
3063
|
+
r["symbol"]: r.get("return_type", "void") for r in routes
|
|
3064
|
+
}
|
|
3031
3065
|
|
|
3032
3066
|
for cls_simple, data in class_info.items():
|
|
3033
3067
|
if not any(data["prefixes"]):
|
|
@@ -3066,6 +3100,7 @@ def _build_route_surface(
|
|
|
3066
3100
|
"effective_class": data["fqn"],
|
|
3067
3101
|
"path": full_path,
|
|
3068
3102
|
"method": verb,
|
|
3103
|
+
"return_type": _parent_rt_by_sym.get(declaring_sym, "void"),
|
|
3069
3104
|
"stable_id": stable_id,
|
|
3070
3105
|
"inheritance_depth": depth,
|
|
3071
3106
|
"security_annotations": _parent_sec_by_sym.get(declaring_sym),
|
|
@@ -3778,6 +3813,9 @@ def _recover_openapi_spec_routes(
|
|
|
3778
3813
|
"path": op.path,
|
|
3779
3814
|
"controller": ctrl_simple,
|
|
3780
3815
|
"handler": handler,
|
|
3816
|
+
# Response type is not parsed from the OpenAPI spec (only request
|
|
3817
|
+
# bodies are). "unknown" is honest here — these are spec-sourced.
|
|
3818
|
+
"return_type": "unknown",
|
|
3781
3819
|
"source": "openapi-spec",
|
|
3782
3820
|
# Security for generated controllers is declared in the spec /
|
|
3783
3821
|
# enforced by the filter chain, not by per-endpoint annotations.
|
|
@@ -3953,6 +3991,7 @@ def extract_java_endpoints(root: Path) -> "dict[str, Any]":
|
|
|
3953
3991
|
"path": route["path"],
|
|
3954
3992
|
"controller": controller,
|
|
3955
3993
|
"handler": handler,
|
|
3994
|
+
"return_type": route.get("return_type", "void"),
|
|
3956
3995
|
}
|
|
3957
3996
|
# Use security_annotations already extracted by _build_route_surface
|
|
3958
3997
|
# via the canonical _route_security_from_sym extractor.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.53.0
|
|
4
4
|
Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Keywords: agents,ai,codebase,context,developer-tools,llm
|
|
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
|
|
|
40
40
|
|
|
41
41
|
**Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
|
|
42
42
|
|
|
43
|
-

|
|
44
44
|

|
|
45
45
|
|
|
46
46
|
---
|
|
@@ -114,7 +114,7 @@ pipx install sourcecode
|
|
|
114
114
|
|
|
115
115
|
```bash
|
|
116
116
|
sourcecode version
|
|
117
|
-
# sourcecode 1.
|
|
117
|
+
# sourcecode 1.53.0
|
|
118
118
|
```
|
|
119
119
|
|
|
120
120
|
---
|
|
@@ -364,9 +364,10 @@ sourcecode impact OrderService . --depth 2 # limit BFS depth
|
|
|
364
364
|
```bash
|
|
365
365
|
sourcecode endpoints /path/to/repo
|
|
366
366
|
sourcecode endpoints /path/to/repo --output endpoints.json
|
|
367
|
+
sourcecode endpoints /path/to/repo --by-controller
|
|
367
368
|
```
|
|
368
369
|
|
|
369
|
-
Extracts all Spring MVC (`@GetMapping`, `@PostMapping`, `@RequestMapping`, etc.) and JAX-RS (`@GET`, `@POST`, `@Path`) endpoint methods. Returns HTTP method, path, controller class, and handler method.
|
|
370
|
+
Extracts all Spring MVC (`@GetMapping`, `@PostMapping`, `@RequestMapping`, etc.) and JAX-RS (`@GET`, `@POST`, `@Path`) endpoint methods. Returns HTTP method, path, controller class, and handler method. Each endpoint also carries its `return_type`. `--by-controller` groups the surface per controller (`{by_controller, controller_count, total}`) for an API-surface view.
|
|
370
371
|
|
|
371
372
|
**Functional / WebFlux routing (honest limitation).** Routes registered via the functional DSL — `route().GET("/path", handler)` / `RouterFunction` / `CustomEndpoint`, common in reactive Spring apps — are **not** modeled (their real paths depend on `nest()`/group-version prefixes that can't be resolved statically). Rather than emit partial paths that would mislead, the output reports a `functional_routing` block (`files`, `route_registrations`, `modeled: false`) plus a warning. When the annotation surface is empty but functional routes exist, the warning explicitly tells you not to read it as "no endpoints". Annotation-based (MVC/JAX-RS) repos are unaffected.
|
|
372
373
|
|
|
@@ -387,6 +388,26 @@ Extracts all Spring MVC (`@GetMapping`, `@PostMapping`, `@RequestMapping`, etc.)
|
|
|
387
388
|
|
|
388
389
|
Matching endpoints then report `policy: "custom"` with `annotation`, `resourceName`, and `requiredLevel`, and are no longer counted in `no_security_signal`. Repos without the config behave exactly as before.
|
|
389
390
|
|
|
391
|
+
### `export` — architecture views for downstream tooling
|
|
392
|
+
|
|
393
|
+
```bash
|
|
394
|
+
sourcecode export /path/to/repo --by-directory # code map, path:line refs
|
|
395
|
+
sourcecode export /path/to/repo --module-graph # module→module dependencies
|
|
396
|
+
sourcecode export /path/to/repo --integrations # outbound HTTP/LDAP/JMS clients
|
|
397
|
+
sourcecode export /path/to/repo --c4 # unified architecture + manifest
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
Emits **structured, tool-agnostic** codebase views as plain JSON/YAML — the kind of input an architecture-doc generator, diagram renderer, or code-search agent can consume directly instead of walking the tree file by file. Section labels map to the open [C4 model](https://c4model.com) (an open architecture notation, not a product); the schema is vendor-neutral.
|
|
401
|
+
|
|
402
|
+
| Flag | Output |
|
|
403
|
+
|------|--------|
|
|
404
|
+
| `--by-directory` | One group per source directory, each symbol with a `source_file:line` reference. |
|
|
405
|
+
| `--module-graph` | `{nodes, edges, summary}` — directories as modules, inter-module dependencies rolled up from class-level relation edges with hit counts + edge types. |
|
|
406
|
+
| `--integrations` | Outbound integrations (`RestTemplate`, `WebClient`, `@FeignClient`, `LdapTemplate`, `JmsTemplate`, ActiveMQ) with `file:line` evidence and a literal `target` URL/name when present. |
|
|
407
|
+
| `--c4` | Unified document: `c4.{context, containers, components, code}` + `api_surface` + a `manifest` with per-directory content hashes for **incremental** consumers (skip directories whose hash is unchanged). |
|
|
408
|
+
|
|
409
|
+
The section flags compose (pass several for one multi-section document); `--c4` assembles the full export on its own. URLs assembled at runtime yield `target: null` (honest absence, never a guess); containers are derived from build files (Maven/Gradle) and reported as a limitation when none are found.
|
|
410
|
+
|
|
390
411
|
### `spring-audit` — Spring semantic audit [free]
|
|
391
412
|
|
|
392
413
|
```bash
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=iHGCfyboU5livWODKEj-u8oT6BwJInerv6YHn28vXno,103
|
|
2
2
|
sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
|
|
3
3
|
sourcecode/architecture_analyzer.py,sha256=liCwQmLgb5vplohy8arjYxs_HOIv5C9MjLh_gY6bc5Q,44115
|
|
4
4
|
sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
|
|
@@ -7,7 +7,7 @@ sourcecode/cache.py,sha256=1V3vsaODAa2UBJAC0xpvxpmRdriCezQx5Q8JCcfgziE,31892
|
|
|
7
7
|
sourcecode/canonical_ir.py,sha256=DEwucOPJguLsVtg5cV8mWXNi112l5jmBhv73KGGebVk,24849
|
|
8
8
|
sourcecode/cir_graphs.py,sha256=9G0HHj1kw2325IDyzo2OpX73BNswEckecf4MZUXB4JM,12078
|
|
9
9
|
sourcecode/classifier.py,sha256=hKzg-nQ47htqqIUzSGvYxv15cXrA3KgICTwJmdqal0o,8095
|
|
10
|
-
sourcecode/cli.py,sha256=
|
|
10
|
+
sourcecode/cli.py,sha256=IZ_TcUzd-rUFoIYMgkOcGP-FqiYoOGPxZ3sjPkEp4OM,272648
|
|
11
11
|
sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
|
|
12
12
|
sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
|
|
13
13
|
sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
|
|
@@ -24,10 +24,11 @@ sourcecode/explain.py,sha256=dVG35YBlpRmbtOXSmspEhoIwDMVApPmLISBy3iigUSc,16913
|
|
|
24
24
|
sourcecode/file_chunker.py,sha256=3vkM3mDQ5eE_yTPvUgjyjpGFBIjkW6_mrBmIbrylnA8,16444
|
|
25
25
|
sourcecode/file_classifier.py,sha256=A0fEABqtfVu1MfoaxnPAvGpZgneGgVXlJDhT74NYXxE,15314
|
|
26
26
|
sourcecode/flow_analyzer.py,sha256=dSiuY4w49k29jW_EPXUOND9B5uVbuCA7kjnuHi-pIWA,28781
|
|
27
|
-
sourcecode/format_contract.py,sha256=
|
|
27
|
+
sourcecode/format_contract.py,sha256=1cTNqwP8geA2hbQoBHUPgX3_vSh3l8guJT_jmgEnFF8,3466
|
|
28
28
|
sourcecode/fqn_utils.py,sha256=XLU7zDkNBXz_RZkIUNfpPmp1nekWtqP-fxV92tDV1vg,2158
|
|
29
29
|
sourcecode/git_analyzer.py,sha256=JStxTQXNjBWi_wLdwhsZs9mT-v50cSJIz4Agzn6Kh9I,13362
|
|
30
30
|
sourcecode/graph_analyzer.py,sha256=DHR8fY69oU_Pi4SYaWboX6EoEFrctQKB9dsjpqwGMzw,62403
|
|
31
|
+
sourcecode/integration_detector.py,sha256=ZJqrGwvZ4ee2JTGhlazKk67aZi173HxkhNpl8Yntpd8,6503
|
|
31
32
|
sourcecode/license.py,sha256=i_X1bYdobL_z9OVuLiycnWEFSaaNhcKKuTd6G55U3_k,20747
|
|
32
33
|
sourcecode/mcp_nudge.py,sha256=5ELU_ixzh6uA83NXLOZT8h00OhL53okfQdji3jyKOjg,2917
|
|
33
34
|
sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
|
|
@@ -44,7 +45,7 @@ sourcecode/redactor.py,sha256=SB4hwIvg8h-hvcqKcDWaZvA-aSyn-at-BIRwa0tUv5E,3227
|
|
|
44
45
|
sourcecode/relevance_scorer.py,sha256=0AgEt4KrV73nioMqBgjhGjtY7L2C7L7cSyKtj3IKcrw,9408
|
|
45
46
|
sourcecode/rename_refactor.py,sha256=h6dNFlB9aZ_3q6heeHBkgXQeXaT03nvPSsYH6P8qxFg,12965
|
|
46
47
|
sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
|
|
47
|
-
sourcecode/repository_ir.py,sha256=
|
|
48
|
+
sourcecode/repository_ir.py,sha256=n1H0OROkD1dHvpWAtDoYNHGlTkVhQpYIFqIQ3jf3mgs,214101
|
|
48
49
|
sourcecode/ris.py,sha256=RcqLVwC-doFcKKViYDkCjZLBqf_wzLES7-F6vHEeWzE,20419
|
|
49
50
|
sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
|
|
50
51
|
sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
|
|
@@ -101,8 +102,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
101
102
|
sourcecode/telemetry/events.py,sha256=LtzYfaX9Ilckj5PTvAcTpDa9mLqDsYPDUiDkRa58piY,2580
|
|
102
103
|
sourcecode/telemetry/filters.py,sha256=NHa5T-6DaZduQPFuC34jOqHWQgSizM-Ygq8aZ4j19ng,5834
|
|
103
104
|
sourcecode/telemetry/transport.py,sha256=4gGHsq0WeY9VywEZXA3vUxykfiYnw9uuqfjAAec7F8o,1681
|
|
104
|
-
sourcecode-1.
|
|
105
|
-
sourcecode-1.
|
|
106
|
-
sourcecode-1.
|
|
107
|
-
sourcecode-1.
|
|
108
|
-
sourcecode-1.
|
|
105
|
+
sourcecode-1.53.0.dist-info/METADATA,sha256=Ahvq7n0P2M28DyG8mqksS-g11VTDnqDCjdPSZ23NjH0,36719
|
|
106
|
+
sourcecode-1.53.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
107
|
+
sourcecode-1.53.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
108
|
+
sourcecode-1.53.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
109
|
+
sourcecode-1.53.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|