sourcecode 1.36.5__py3-none-any.whl → 1.38.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.36.5"
3
+ __version__ = "1.38.0"
sourcecode/cli.py CHANGED
@@ -401,6 +401,28 @@ def _emit_error_json(error: str, message: str, **context: object) -> None:
401
401
  sys.stderr.flush()
402
402
 
403
403
 
404
+ def _enforce_format(command: str, fmt: str) -> None:
405
+ """Validate ``--format`` for ``command`` against the central contract.
406
+
407
+ Single validation path for every command's ``--format`` option (see
408
+ ``sourcecode.format_contract``). On an invalid value it emits the
409
+ homogeneous JSON error envelope to stderr and exits with code 2
410
+ (argument-validation convention). Valid values are a no-op.
411
+ """
412
+ from sourcecode.format_contract import (
413
+ FORMAT_ERROR_EXIT_CODE,
414
+ format_error_context,
415
+ is_valid_format,
416
+ )
417
+
418
+ if is_valid_format(command, fmt):
419
+ return
420
+ ctx = format_error_context(command, fmt)
421
+ message = str(ctx.pop("message"))
422
+ _emit_error_json(INVALID_INPUT_CODE, message, **ctx)
423
+ raise typer.Exit(code=FORMAT_ERROR_EXIT_CODE)
424
+
425
+
404
426
  def _safe_write_file(path: "Path", content: str) -> None:
405
427
  """Write content to path, emitting a clean JSON error on I/O failure."""
406
428
  try:
@@ -631,7 +653,8 @@ def _active_flags(
631
653
  if fmt != "json": flags.append("--format")
632
654
  return flags
633
655
 
634
- FORMAT_CHOICES = ["json", "yaml"]
656
+ # Per-command output-format contracts now live in sourcecode.format_contract
657
+ # (validated via _enforce_format). No module-level FORMAT_CHOICES here.
635
658
  GRAPH_DETAIL_CHOICES = ["high", "medium", "full"]
636
659
  GRAPH_EDGE_CHOICES = {"imports", "calls", "contains", "extends"}
637
660
  DOCS_DEPTH_CHOICES = ["module", "symbols", "full"]
@@ -1138,17 +1161,7 @@ def main(
1138
1161
  )
1139
1162
 
1140
1163
  # Validate format choices
1141
- if format not in FORMAT_CHOICES:
1142
- _emit_error_json(
1143
- INVALID_INPUT_CODE,
1144
- f"Invalid value '{format}' for --format. Valid values: {', '.join(FORMAT_CHOICES)}.",
1145
- flag="--format",
1146
- value=format,
1147
- valid_values=list(FORMAT_CHOICES),
1148
- hint="Choose one of the supported --format values.",
1149
- expected=f"One of: {', '.join(FORMAT_CHOICES)}",
1150
- )
1151
- raise typer.Exit(code=2) # FIX-P2-7: arg validation → exit 2
1164
+ _enforce_format("main", format)
1152
1165
  if graph_detail not in GRAPH_DETAIL_CHOICES:
1153
1166
  _emit_error_json(
1154
1167
  INVALID_INPUT_CODE,
@@ -2834,19 +2847,9 @@ def prepare_context_cmd(
2834
2847
  # Validate --format: only "json" and "github-comment" are valid for prepare-context.
2835
2848
  # "yaml" is intentionally NOT supported here (use main command for yaml output).
2836
2849
  # Invalid values must error loudly — silently falling through to JSON is a lie.
2837
- _PC_FORMAT_CHOICES = ("json", "github-comment")
2838
- if format is not None and format not in _PC_FORMAT_CHOICES:
2839
- _emit_error_json(
2840
- INVALID_INPUT_CODE,
2841
- f"invalid value '{format}' for --format. "
2842
- f"Valid options: {', '.join(_PC_FORMAT_CHOICES)}.",
2843
- flag="--format",
2844
- value=format,
2845
- valid_values=list(_PC_FORMAT_CHOICES),
2846
- hint="Choose one of the supported prepare-context output formats.",
2847
- expected=f"One of: {', '.join(_PC_FORMAT_CHOICES)}",
2848
- )
2849
- raise typer.Exit(code=2)
2850
+ # None means "use default" (json); a concrete value is validated against the contract.
2851
+ if format is not None:
2852
+ _enforce_format("prepare-context", format)
2850
2853
  # github-comment only renders for review-pr; warn and normalize for other tasks.
2851
2854
  if format == "github-comment" and task != "review-pr":
2852
2855
  typer.echo(
@@ -3479,14 +3482,7 @@ def repo_ir_cmd(
3479
3482
 
3480
3483
  from sourcecode.repository_ir import apply_ir_size_limits, build_repo_ir, find_java_files
3481
3484
 
3482
- if format not in ("json", "yaml"):
3483
- _emit_error_json(
3484
- INVALID_INPUT_CODE,
3485
- f"Invalid format '{format}'.",
3486
- hint="Valid values: json, yaml.",
3487
- expected="json | yaml",
3488
- )
3489
- raise typer.Exit(code=1)
3485
+ _enforce_format("repo-ir", format)
3490
3486
 
3491
3487
  root = path.resolve()
3492
3488
  if not root.is_dir():
@@ -3712,14 +3708,7 @@ def impact_cmd(
3712
3708
  from sourcecode.license import require_repo_or_pro as _require_repo_or_pro
3713
3709
  _require_repo_or_pro(str(path.resolve()), "impact")
3714
3710
 
3715
- if format not in ("json", "yaml"):
3716
- _emit_error_json(
3717
- INVALID_INPUT_CODE,
3718
- f"Invalid format '{format}'.",
3719
- hint="format must be: json or yaml.",
3720
- expected="json | yaml",
3721
- )
3722
- raise typer.Exit(code=1)
3711
+ _enforce_format("impact", format)
3723
3712
 
3724
3713
  from sourcecode.repository_ir import (
3725
3714
  build_repo_ir, find_java_files, compute_blast_radius,
@@ -3875,14 +3864,7 @@ def endpoints_cmd(
3875
3864
  sourcecode endpoints . --controller LiquidacionJornada
3876
3865
  sourcecode endpoints . --limit 10
3877
3866
  """
3878
- if format not in ("json", "yaml"):
3879
- _emit_error_json(
3880
- INVALID_INPUT_CODE,
3881
- f"Invalid format '{format}'.",
3882
- hint="format must be: json or yaml.",
3883
- expected="json | yaml",
3884
- )
3885
- raise typer.Exit(code=1)
3867
+ _enforce_format("endpoints", format)
3886
3868
 
3887
3869
  target = path.resolve()
3888
3870
  if not target.exists() or not target.is_dir():
@@ -4116,14 +4098,7 @@ def spring_audit_cmd(
4116
4098
  )
4117
4099
  raise typer.Exit(code=1)
4118
4100
 
4119
- if format not in ("json", "yaml", "github-comment"):
4120
- _emit_error_json(
4121
- INVALID_INPUT_CODE,
4122
- f"Invalid format '{format}'.",
4123
- hint="format must be one of: json, yaml, github-comment.",
4124
- expected="json | yaml | github-comment",
4125
- )
4126
- raise typer.Exit(code=1)
4101
+ _enforce_format("spring-audit", format)
4127
4102
 
4128
4103
  _file_limitations: list[str] = []
4129
4104
  file_list = find_java_files(target, limitations=_file_limitations)
@@ -4274,14 +4249,7 @@ def migrate_check_cmd(
4274
4249
  )
4275
4250
  raise typer.Exit(code=1)
4276
4251
 
4277
- if format not in ("json", "text"):
4278
- _emit_error_json(
4279
- INVALID_INPUT_CODE,
4280
- f"Invalid format '{format}'.",
4281
- hint="format must be one of: json, text.",
4282
- expected="json | text",
4283
- )
4284
- raise typer.Exit(code=1)
4252
+ _enforce_format("migrate-check", format)
4285
4253
 
4286
4254
  if min_severity not in ("critical", "high", "medium", "low"):
4287
4255
  _emit_error_json(
@@ -4426,14 +4394,7 @@ def impact_chain_cmd(
4426
4394
  )
4427
4395
  raise typer.Exit(code=1)
4428
4396
 
4429
- if format not in ("json", "yaml"):
4430
- _emit_error_json(
4431
- INVALID_INPUT_CODE,
4432
- f"Invalid format '{format}'.",
4433
- hint="format must be: json or yaml.",
4434
- expected="json | yaml",
4435
- )
4436
- raise typer.Exit(code=1)
4397
+ _enforce_format("impact-chain", format)
4437
4398
 
4438
4399
  file_list = find_java_files(target)
4439
4400
  if not file_list:
@@ -4567,14 +4528,7 @@ def pr_impact_cmd(
4567
4528
  )
4568
4529
  raise typer.Exit(code=1)
4569
4530
 
4570
- if format not in ("text", "json"):
4571
- _emit_error_json(
4572
- INVALID_INPUT_CODE,
4573
- f"Invalid format '{format}'.",
4574
- hint="format must be: text or json.",
4575
- expected="text | json",
4576
- )
4577
- raise typer.Exit(code=1)
4531
+ _enforce_format("pr-impact", format)
4578
4532
 
4579
4533
  # Read changed-files list
4580
4534
  changed_files = [
@@ -4699,14 +4653,7 @@ def explain_cmd(
4699
4653
  )
4700
4654
  raise typer.Exit(code=1)
4701
4655
 
4702
- if format not in ("text", "json"):
4703
- _emit_error_json(
4704
- INVALID_INPUT_CODE,
4705
- f"Invalid format '{format}'.",
4706
- hint="format must be: text or json.",
4707
- expected="text | json",
4708
- )
4709
- raise typer.Exit(code=1)
4656
+ _enforce_format("explain", format)
4710
4657
 
4711
4658
  file_list = find_java_files(target)
4712
4659
  if not file_list:
@@ -0,0 +1,86 @@
1
+ """Single source of truth for per-command output-format contracts.
2
+
3
+ Every CLI command that emits machine-consumable output validates its
4
+ ``--format`` option through this registry so that:
5
+
6
+ * the set of allowed formats for each command lives in exactly one place,
7
+ * ``-f json`` is a strict contract on every command (pure JSON to stdout),
8
+ * invalid-format errors share an identical envelope shape and exit code.
9
+
10
+ The registry value is an *ordered* tuple; element ``0`` is the command's
11
+ default and matches its Typer option default. Defaults are intentionally NOT
12
+ changed when centralizing — ``explain`` and ``pr-impact`` keep their
13
+ human-facing ``text`` default — to avoid breaking existing scripts. The strict
14
+ guarantee is on ``-f json``, which every command supports.
15
+
16
+ Exit-code policy: an invalid ``--format`` is an argument-validation error and
17
+ exits with code ``2`` for every command (matching the documented
18
+ ``arg validation -> exit 2`` convention used by the root command).
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ # Command name (as registered with ``@app.command``, or "main" for the root
24
+ # command) -> ordered tuple of allowed formats. Element 0 is the default.
25
+ FORMAT_REGISTRY: "dict[str, tuple[str, ...]]" = {
26
+ "main": ("json", "yaml"),
27
+ "repo-ir": ("json", "yaml"),
28
+ "impact": ("json", "yaml"),
29
+ "endpoints": ("json", "yaml"),
30
+ "impact-chain": ("json", "yaml"),
31
+ "pr-impact": ("text", "json"),
32
+ "migrate-check": ("json", "text"),
33
+ "spring-audit": ("json", "yaml", "github-comment"),
34
+ "explain": ("text", "json"),
35
+ "prepare-context": ("json", "github-comment"),
36
+ }
37
+
38
+ # Invalid --format is an argument-validation error.
39
+ FORMAT_ERROR_EXIT_CODE = 2
40
+
41
+ # The strict machine-readable format every command must support.
42
+ STRICT_FORMAT = "json"
43
+
44
+
45
+ def allowed_formats(command: str) -> "tuple[str, ...]":
46
+ """Return the ordered tuple of allowed formats for ``command``.
47
+
48
+ Raises ``KeyError`` if the command has no registered contract — a
49
+ programming error, surfaced loudly rather than silently allowing anything.
50
+ """
51
+ try:
52
+ return FORMAT_REGISTRY[command]
53
+ except KeyError as exc:
54
+ raise KeyError(
55
+ f"No format contract registered for command '{command}'. "
56
+ f"Add it to FORMAT_REGISTRY in sourcecode/format_contract.py."
57
+ ) from exc
58
+
59
+
60
+ def default_format(command: str) -> str:
61
+ """Return the default format for ``command`` (registry element 0)."""
62
+ return allowed_formats(command)[0]
63
+
64
+
65
+ def is_valid_format(command: str, fmt: str) -> bool:
66
+ """True iff ``fmt`` is allowed for ``command``."""
67
+ return fmt in FORMAT_REGISTRY.get(command, ())
68
+
69
+
70
+ def format_error_context(command: str, fmt: str) -> "dict[str, object]":
71
+ """Build the homogeneous error-envelope fields for an invalid ``--format``.
72
+
73
+ Returns a dict whose ``message`` key is the human message and whose
74
+ remaining keys are passed verbatim as the error-envelope context, so every
75
+ command produces an identically shaped ``--format`` error.
76
+ """
77
+ allowed = list(allowed_formats(command))
78
+ joined = ", ".join(allowed)
79
+ return {
80
+ "message": f"Invalid value '{fmt}' for --format. Valid values: {joined}.",
81
+ "flag": "--format",
82
+ "value": fmt,
83
+ "valid_values": allowed,
84
+ "hint": "Choose one of the supported --format values.",
85
+ "expected": f"One of: {joined}",
86
+ }
@@ -0,0 +1,431 @@
1
+ """OpenAPI spec surface extraction (Phase 18, wave 18-01).
2
+
3
+ Many enterprise Spring repos generate their HTTP surface, DTOs and validation
4
+ constraints from an OpenAPI spec via openapi-generator: controllers
5
+ ``implements XxxApi`` where the mapping annotations and DTO classes live under
6
+ ``target/generated-sources`` (excluded from the source scan). The structural
7
+ scanner therefore sees no routes and no constraints for those controllers.
8
+
9
+ The spec itself, however, ships in the repo source (commonly
10
+ ``src/main/resources/openapi.yml``): always present, deterministic, no build
11
+ required. This module discovers and parses that spec into a normalized surface
12
+ — operations (method/path/operationId/tags/requestBody) and schemas (fields
13
+ with validation constraints) — so downstream code can recover the endpoint and
14
+ constraint surface without touching generated sources.
15
+
16
+ Design notes:
17
+ * Pure extraction, not validation: we never assert spec conformance.
18
+ * Defensive: a malformed or partial spec yields a partial surface, never an
19
+ exception. Unresolvable ``$ref``/``allOf`` are skipped, not fatal.
20
+ * Bounded: discovery is limited to well-known locations + a capped content
21
+ sniff so it never walks an entire large tree.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import json
27
+ from dataclasses import dataclass, field
28
+ from pathlib import Path
29
+ from typing import Any, Optional
30
+
31
+ _HTTP_METHODS = ("get", "put", "post", "delete", "patch", "options", "head", "trace")
32
+
33
+ # Filenames that are almost certainly an API spec.
34
+ _SPEC_NAME_HINTS = ("openapi", "swagger", "api-docs")
35
+
36
+ # Directories worth searching first (relative to repo root).
37
+ _SPEC_DIRS = (
38
+ "src/main/resources",
39
+ "src/main/resources/openapi",
40
+ "api",
41
+ "apis",
42
+ "openapi",
43
+ "spec",
44
+ "specs",
45
+ "docs",
46
+ "contracts",
47
+ ".",
48
+ )
49
+
50
+ # Cap on how many candidate files we content-sniff, to stay fast on big repos.
51
+ _SNIFF_CAP = 400
52
+ # Cap on $ref / allOf resolution depth, to stay safe on cyclic specs.
53
+ _RESOLVE_DEPTH = 8
54
+
55
+
56
+ @dataclass
57
+ class FieldConstraint:
58
+ """A single schema property and its validation constraints."""
59
+
60
+ name: str
61
+ type: Optional[str] = None
62
+ required: bool = False
63
+ pattern: Optional[str] = None
64
+ min_length: Optional[int] = None
65
+ max_length: Optional[int] = None
66
+ minimum: Optional[float] = None
67
+ maximum: Optional[float] = None
68
+ fmt: Optional[str] = None
69
+ enum: Optional[list[Any]] = None
70
+ ref: Optional[str] = None # schema name when the field is an object/array ref
71
+
72
+ def to_dict(self) -> "dict[str, Any]":
73
+ out: "dict[str, Any]" = {"name": self.name, "required": self.required}
74
+ for key, val in (
75
+ ("type", self.type),
76
+ ("pattern", self.pattern),
77
+ ("minLength", self.min_length),
78
+ ("maxLength", self.max_length),
79
+ ("minimum", self.minimum),
80
+ ("maximum", self.maximum),
81
+ ("format", self.fmt),
82
+ ("enum", self.enum),
83
+ ("ref", self.ref),
84
+ ):
85
+ if val is not None:
86
+ out[key] = val
87
+ return out
88
+
89
+
90
+ @dataclass
91
+ class OpenApiSchema:
92
+ name: str
93
+ fields: "list[FieldConstraint]" = field(default_factory=list)
94
+
95
+ def to_dict(self) -> "dict[str, Any]":
96
+ return {"name": self.name, "fields": [f.to_dict() for f in self.fields]}
97
+
98
+
99
+ @dataclass
100
+ class OpenApiOperation:
101
+ method: str
102
+ path: str
103
+ operation_id: Optional[str] = None
104
+ tags: "list[str]" = field(default_factory=list)
105
+ request_body_schema: Optional[str] = None # schema name (ref) of the body
106
+ has_security: bool = False
107
+
108
+ def to_dict(self) -> "dict[str, Any]":
109
+ out: "dict[str, Any]" = {"method": self.method, "path": self.path}
110
+ if self.operation_id:
111
+ out["operationId"] = self.operation_id
112
+ if self.tags:
113
+ out["tags"] = self.tags
114
+ if self.request_body_schema:
115
+ out["requestBodySchema"] = self.request_body_schema
116
+ out["hasSecurity"] = self.has_security
117
+ return out
118
+
119
+
120
+ @dataclass
121
+ class OpenApiSurface:
122
+ spec_path: str
123
+ operations: "list[OpenApiOperation]" = field(default_factory=list)
124
+ schemas: "dict[str, OpenApiSchema]" = field(default_factory=dict)
125
+
126
+ def to_dict(self) -> "dict[str, Any]":
127
+ return {
128
+ "spec_path": self.spec_path,
129
+ "operations": [op.to_dict() for op in self.operations],
130
+ "schemas": {name: s.to_dict() for name, s in self.schemas.items()},
131
+ }
132
+
133
+
134
+ def tag_to_interface(tag: str) -> str:
135
+ """Map an OpenAPI tag to the openapi-generator interface name.
136
+
137
+ openapi-generator with ``useTags: true`` derives one ``{PascalCaseTag}Api``
138
+ interface per tag, splitting on ``-``/``_``/space. E.g. ``owners`` ->
139
+ ``OwnersApi``, ``owner-v2`` -> ``OwnerV2Api``, ``vet_v2`` -> ``VetV2Api``.
140
+ """
141
+ import re as _re
142
+
143
+ words = [w for w in _re.split(r"[-_\s]+", tag) if w]
144
+ return "".join(w[:1].upper() + w[1:] for w in words) + "Api"
145
+
146
+
147
+ # ── Discovery ──────────────────────────────────────────────────────────────
148
+
149
+
150
+ def _looks_like_spec(data: Any) -> bool:
151
+ return isinstance(data, dict) and ("openapi" in data or "swagger" in data)
152
+
153
+
154
+ def _load_yaml_or_json(path: Path) -> Optional[Any]:
155
+ """Load a YAML or JSON document, returning None on any failure."""
156
+ try:
157
+ text = path.read_text(encoding="utf-8")
158
+ except (OSError, UnicodeDecodeError):
159
+ return None
160
+ suffix = path.suffix.lower()
161
+ try:
162
+ if suffix == ".json":
163
+ return json.loads(text)
164
+ # .yml/.yaml (and unknown) -> YAML, which is a JSON superset.
165
+ from ruamel.yaml import YAML
166
+
167
+ yaml = YAML(typ="safe")
168
+ return yaml.load(text)
169
+ except Exception:
170
+ # Last resort: a .json-less file that is actually JSON.
171
+ try:
172
+ return json.loads(text)
173
+ except Exception:
174
+ return None
175
+
176
+
177
+ def find_openapi_specs(root: Path) -> "list[Path]":
178
+ """Discover OpenAPI/Swagger spec files under ``root``.
179
+
180
+ Strategy: collect candidates by filename hint within well-known dirs, then
181
+ content-sniff a bounded set of ``.yml/.yaml/.json`` files to confirm. Result
182
+ is sorted for determinism. Never raises.
183
+ """
184
+ root = Path(root)
185
+ candidates: "list[Path]" = []
186
+ seen: "set[Path]" = set()
187
+
188
+ def _consider(p: Path) -> None:
189
+ try:
190
+ rp = p.resolve()
191
+ except OSError:
192
+ return
193
+ if rp in seen or not p.is_file():
194
+ return
195
+ seen.add(rp)
196
+ candidates.append(p)
197
+
198
+ # Pass 1: filename-hinted files in well-known dirs.
199
+ for rel in _SPEC_DIRS:
200
+ d = root / rel
201
+ if not d.is_dir():
202
+ continue
203
+ try:
204
+ entries = sorted(d.iterdir())
205
+ except OSError:
206
+ continue
207
+ for p in entries:
208
+ if not p.is_file():
209
+ continue
210
+ stem = p.stem.lower()
211
+ if p.suffix.lower() in (".yml", ".yaml", ".json") and any(
212
+ h in stem for h in _SPEC_NAME_HINTS
213
+ ):
214
+ _consider(p)
215
+
216
+ # Confirm pass-1 candidates by content; keep only real specs.
217
+ confirmed: "list[Path]" = []
218
+ for p in candidates:
219
+ data = _load_yaml_or_json(p)
220
+ if _looks_like_spec(data):
221
+ confirmed.append(p)
222
+
223
+ if confirmed:
224
+ return sorted(confirmed, key=lambda p: str(p))
225
+
226
+ # Pass 2 (fallback): bounded content sniff of resource-y yaml/json files.
227
+ sniffed = 0
228
+ for rel in ("src/main/resources", "."):
229
+ d = root / rel
230
+ if not d.is_dir():
231
+ continue
232
+ for p in sorted(d.rglob("*")):
233
+ if sniffed >= _SNIFF_CAP:
234
+ break
235
+ if not p.is_file() or p.suffix.lower() not in (".yml", ".yaml", ".json"):
236
+ continue
237
+ # Skip obvious build output.
238
+ parts = {seg.lower() for seg in p.parts}
239
+ if "target" in parts or "node_modules" in parts or "build" in parts:
240
+ continue
241
+ sniffed += 1
242
+ data = _load_yaml_or_json(p)
243
+ if _looks_like_spec(data):
244
+ _consider(p)
245
+ return sorted({p.resolve(): p for p in candidates}.values(), key=lambda p: str(p))
246
+
247
+
248
+ # ── Parsing ────────────────────────────────────────────────────────────────
249
+
250
+
251
+ def _ref_name(ref: Any) -> Optional[str]:
252
+ """Return the trailing name of a ``#/components/schemas/Xxx`` ref."""
253
+ if isinstance(ref, str) and ref.startswith("#/"):
254
+ return ref.rsplit("/", 1)[-1]
255
+ return None
256
+
257
+
258
+ def _field_from_property(name: str, prop: Any, required: bool) -> FieldConstraint:
259
+ fc = FieldConstraint(name=name, required=required)
260
+ if not isinstance(prop, dict):
261
+ return fc
262
+ ref = _ref_name(prop.get("$ref"))
263
+ if ref:
264
+ fc.ref = ref
265
+ fc.type = prop.get("type")
266
+ fc.pattern = prop.get("pattern")
267
+ fc.fmt = prop.get("format")
268
+ for src, dst in (("minLength", "min_length"), ("maxLength", "max_length")):
269
+ v = prop.get(src)
270
+ if isinstance(v, int):
271
+ setattr(fc, dst, v)
272
+ for src, dst in (("minimum", "minimum"), ("maximum", "maximum")):
273
+ v = prop.get(src)
274
+ if isinstance(v, (int, float)):
275
+ setattr(fc, dst, float(v))
276
+ enum = prop.get("enum")
277
+ if isinstance(enum, list):
278
+ fc.enum = list(enum)
279
+ if fc.type is None and prop.get("type") == "array":
280
+ items = prop.get("items")
281
+ if isinstance(items, dict):
282
+ fc.ref = fc.ref or _ref_name(items.get("$ref"))
283
+ return fc
284
+
285
+
286
+ def _resolve_schema_fields(
287
+ node: Any,
288
+ all_schemas: "dict[str, Any]",
289
+ depth: int = 0,
290
+ _seen: "Optional[set[str]]" = None,
291
+ ) -> "tuple[dict[str, FieldConstraint], set[str]]":
292
+ """Recursively flatten a schema node (handling allOf + $ref) into fields.
293
+
294
+ Returns (ordered field map by name, required-name set). Bounded by depth.
295
+ """
296
+ fields: "dict[str, FieldConstraint]" = {}
297
+ required: "set[str]" = set()
298
+ if depth > _RESOLVE_DEPTH or not isinstance(node, dict):
299
+ return fields, required
300
+ seen = _seen or set()
301
+
302
+ # $ref -> resolve the referenced schema.
303
+ ref = _ref_name(node.get("$ref"))
304
+ if ref:
305
+ if ref in seen:
306
+ return fields, required
307
+ target = all_schemas.get(ref)
308
+ if isinstance(target, dict):
309
+ return _resolve_schema_fields(target, all_schemas, depth + 1, seen | {ref})
310
+ return fields, required
311
+
312
+ # allOf -> merge each sub-schema.
313
+ for sub in node.get("allOf", []) or []:
314
+ sub_fields, sub_req = _resolve_schema_fields(
315
+ sub, all_schemas, depth + 1, seen
316
+ )
317
+ fields.update(sub_fields)
318
+ required |= sub_req
319
+
320
+ # required list at this level.
321
+ for r in node.get("required", []) or []:
322
+ if isinstance(r, str):
323
+ required.add(r)
324
+
325
+ # direct properties.
326
+ props = node.get("properties")
327
+ if isinstance(props, dict):
328
+ for pname, prop in props.items():
329
+ fields[pname] = _field_from_property(pname, prop, required=False)
330
+
331
+ # apply required flags now that we know the union.
332
+ for rname in required:
333
+ if rname in fields:
334
+ fields[rname].required = True
335
+ return fields, required
336
+
337
+
338
+ def _parse_schemas(components: Any) -> "dict[str, OpenApiSchema]":
339
+ schemas_raw = {}
340
+ if isinstance(components, dict):
341
+ schemas_raw = components.get("schemas") or {}
342
+ if not isinstance(schemas_raw, dict):
343
+ return {}
344
+ out: "dict[str, OpenApiSchema]" = {}
345
+ for name, node in schemas_raw.items():
346
+ fields_map, _ = _resolve_schema_fields(node, schemas_raw)
347
+ out[name] = OpenApiSchema(name=name, fields=list(fields_map.values()))
348
+ return out
349
+
350
+
351
+ def _request_body_schema(operation: Any) -> Optional[str]:
352
+ if not isinstance(operation, dict):
353
+ return None
354
+ body = operation.get("requestBody")
355
+ if not isinstance(body, dict):
356
+ return None
357
+ content = body.get("content")
358
+ if not isinstance(content, dict):
359
+ return None
360
+ # Prefer application/json, else first media type with a schema.
361
+ media_types = [content.get("application/json")] + [
362
+ v for k, v in content.items() if k != "application/json"
363
+ ]
364
+ for media in media_types:
365
+ if not isinstance(media, dict):
366
+ continue
367
+ schema = media.get("schema")
368
+ if isinstance(schema, dict):
369
+ name = _ref_name(schema.get("$ref"))
370
+ if name:
371
+ return name
372
+ # array of refs
373
+ if schema.get("type") == "array":
374
+ items = schema.get("items")
375
+ if isinstance(items, dict):
376
+ return _ref_name(items.get("$ref"))
377
+ return None
378
+
379
+
380
+ def _parse_operations(paths: Any) -> "list[OpenApiOperation]":
381
+ if not isinstance(paths, dict):
382
+ return []
383
+ ops: "list[OpenApiOperation]" = []
384
+ for path, methods in paths.items():
385
+ if not isinstance(methods, dict):
386
+ continue
387
+ for method in _HTTP_METHODS:
388
+ op = methods.get(method)
389
+ if not isinstance(op, dict):
390
+ continue
391
+ tags = op.get("tags")
392
+ ops.append(
393
+ OpenApiOperation(
394
+ method=method.upper(),
395
+ path=str(path),
396
+ operation_id=op.get("operationId"),
397
+ tags=[str(t) for t in tags] if isinstance(tags, list) else [],
398
+ request_body_schema=_request_body_schema(op),
399
+ has_security="security" in op,
400
+ )
401
+ )
402
+ return ops
403
+
404
+
405
+ def parse_openapi_spec(path: Path) -> Optional[OpenApiSurface]:
406
+ """Parse a single spec file into an OpenApiSurface, or None if unparseable."""
407
+ data = _load_yaml_or_json(Path(path))
408
+ if not _looks_like_spec(data):
409
+ return None
410
+ surface = OpenApiSurface(spec_path=str(path))
411
+ try:
412
+ surface.operations = _parse_operations(data.get("paths"))
413
+ surface.schemas = _parse_schemas(data.get("components"))
414
+ except Exception:
415
+ # Partial surface beats a crash; return whatever resolved.
416
+ pass
417
+ return surface
418
+
419
+
420
+ def build_openapi_surface(root: Path) -> Optional[OpenApiSurface]:
421
+ """Discover and parse the primary OpenAPI spec under ``root``.
422
+
423
+ Returns the surface of the first discovered spec (deterministic ordering),
424
+ or None when no spec is present.
425
+ """
426
+ specs = find_openapi_specs(Path(root))
427
+ for spec in specs:
428
+ surface = parse_openapi_spec(spec)
429
+ if surface is not None and (surface.operations or surface.schemas):
430
+ return surface
431
+ return None
@@ -3557,8 +3557,9 @@ def extract_java_endpoints(root: Path) -> "dict[str, Any]":
3557
3557
  _CONTROLLER_ANNS = {"@RestController", "@Controller"}
3558
3558
  _IMPLEMENTS_RE = _re.compile(r'\bimplements\s+(.+)$')
3559
3559
  _routed_fqns = {route.get("effective_class") for route in routes}
3560
- interface_defined_controllers: list[str] = []
3561
- endpoint_warnings: list[str] = []
3560
+ # Collect (controller_fqn, [implemented *Api interfaces]) pairs; resolution
3561
+ # against the OpenAPI spec happens below.
3562
+ _iface_controllers: list[tuple[str, list[str]]] = []
3562
3563
  for sym in all_symbols:
3563
3564
  if sym.type != "class":
3564
3565
  continue
@@ -3573,13 +3574,66 @@ def extract_java_endpoints(root: Path) -> "dict[str, Any]":
3573
3574
  api_ifaces = [i for i in ifaces if i.endswith("Api")]
3574
3575
  if not api_ifaces:
3575
3576
  continue
3576
- interface_defined_controllers.append(sym.symbol)
3577
- endpoint_warnings.append(
3578
- f"{sym.symbol.split('.')[-1]} implements {', '.join(api_ifaces)}: HTTP "
3579
- "mappings are declared on the implemented interface (commonly generated by "
3580
- "openapi-generator under target/generated-sources, which is not scanned). "
3581
- "Endpoint surface for this controller is NOT captured."
3582
- )
3577
+ _iface_controllers.append((sym.symbol, api_ifaces))
3578
+
3579
+ # Recover the surface of interface-defined controllers from an OpenAPI spec
3580
+ # shipped in the repo (src/main/resources/openapi.yml & co.). The spec is
3581
+ # always present and deterministic — unlike target/generated-sources so it
3582
+ # lets us populate routes + request-body constraints without a build. A
3583
+ # controller is "resolved" when its implemented *Api interface maps (via tag)
3584
+ # to spec operations; otherwise it keeps the explicit "not captured" warning.
3585
+ _spec_endpoints: list[dict] = []
3586
+ resolved_from_openapi_spec: list[str] = []
3587
+ interface_defined_controllers: list[str] = []
3588
+ endpoint_warnings: list[str] = []
3589
+ _openapi_spec_path: "str | None" = None
3590
+ _iface_to_ops: dict[str, list] = {}
3591
+ if _iface_controllers:
3592
+ from sourcecode.openapi_surface import build_openapi_surface, tag_to_interface
3593
+ _surface = build_openapi_surface(root)
3594
+ if _surface is not None:
3595
+ _openapi_spec_path = _surface.spec_path
3596
+ for _op in _surface.operations:
3597
+ for _tag in _op.tags:
3598
+ _iface_to_ops.setdefault(tag_to_interface(_tag), []).append(_op)
3599
+ for _fqn, _api_ifaces in _iface_controllers:
3600
+ _matched = [op for i in _api_ifaces for op in _iface_to_ops.get(i, [])]
3601
+ if not _matched:
3602
+ interface_defined_controllers.append(_fqn)
3603
+ endpoint_warnings.append(
3604
+ f"{_fqn.split('.')[-1]} implements {', '.join(_api_ifaces)}: HTTP "
3605
+ "mappings are declared on the implemented interface (commonly "
3606
+ "generated by openapi-generator under target/generated-sources, "
3607
+ "which is not scanned) and no matching OpenAPI spec operation was "
3608
+ "found. Endpoint surface for this controller is NOT captured."
3609
+ )
3610
+ continue
3611
+ resolved_from_openapi_spec.append(_fqn)
3612
+ _ctrl_simple = _fqn.split(".")[-1]
3613
+ for _op in _matched:
3614
+ _entry: dict = {
3615
+ "method": _op.method,
3616
+ "path": _op.path,
3617
+ "controller": _ctrl_simple,
3618
+ "handler": _op.operation_id or "(operation)",
3619
+ "source": "openapi-spec",
3620
+ # Security for generated controllers is declared in the spec /
3621
+ # enforced by the filter chain, not by per-endpoint annotations.
3622
+ "security": {
3623
+ "policy": "openapi_spec"
3624
+ if _op.has_security
3625
+ else "openapi_spec_unspecified"
3626
+ },
3627
+ }
3628
+ if _op.request_body_schema and _surface is not None:
3629
+ _schema = _surface.schemas.get(_op.request_body_schema)
3630
+ if _schema is not None:
3631
+ _entry["request_body"] = {
3632
+ "schema": _op.request_body_schema,
3633
+ "constraints": [f.to_dict() for f in _schema.fields],
3634
+ "source": "openapi-spec",
3635
+ }
3636
+ _spec_endpoints.append(_entry)
3583
3637
 
3584
3638
  endpoints: list[dict] = []
3585
3639
  for route in routes:
@@ -3707,6 +3761,11 @@ def extract_java_endpoints(root: Path) -> "dict[str, Any]":
3707
3761
  if e.get("security", {}).get("policy") == "none_detected"
3708
3762
  )
3709
3763
 
3764
+ # Append spec-recovered endpoints AFTER the security-model heuristics (which
3765
+ # are about annotation/filter/XML coverage of scanned source) so spec-sourced
3766
+ # entries don't skew those signals. They carry their own source provenance.
3767
+ endpoints = endpoints + _spec_endpoints
3768
+
3710
3769
  result: dict[str, Any] = {
3711
3770
  "endpoints": endpoints,
3712
3771
  "total": len(endpoints),
@@ -3720,6 +3779,13 @@ def extract_java_endpoints(root: Path) -> "dict[str, Any]":
3720
3779
  if endpoint_warnings:
3721
3780
  result["warnings"] = endpoint_warnings
3722
3781
  result["interface_defined_controllers"] = interface_defined_controllers
3782
+ # Surface what was recovered from the OpenAPI spec, so a consumer knows the
3783
+ # surface is complete (not the legacy "1 of N" blind spot) and where it came from.
3784
+ if resolved_from_openapi_spec:
3785
+ result["resolved_from_openapi_spec"] = resolved_from_openapi_spec
3786
+ result["spec_sourced_endpoints"] = len(_spec_endpoints)
3787
+ if _openapi_spec_path:
3788
+ result["openapi_spec"] = _openapi_spec_path
3723
3789
  return result
3724
3790
 
3725
3791
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.36.5
3
+ Version: 1.38.0
4
4
  Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
5
5
  License-File: LICENSE
6
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
40
40
 
41
41
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
42
42
 
43
- ![Version](https://img.shields.io/badge/version-1.36.5-blue)
43
+ ![Version](https://img.shields.io/badge/version-1.38.0-blue)
44
44
  ![Python](https://img.shields.io/badge/python-3.9%2B-green)
45
45
 
46
46
  ---
@@ -114,7 +114,7 @@ pipx install sourcecode
114
114
 
115
115
  ```bash
116
116
  sourcecode version
117
- # sourcecode 1.36.5
117
+ # sourcecode 1.38.0
118
118
  ```
119
119
 
120
120
  ---
@@ -1,4 +1,4 @@
1
- sourcecode/__init__.py,sha256=o9c3MUAMMI3cMbNxDQQBYbcvFPS_juLXH25a0Wy8jC0,103
1
+ sourcecode/__init__.py,sha256=Wc32jhD7HMnKWYhdxikcnv_UXHFA2NT--SSeAopG1cA,103
2
2
  sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
3
3
  sourcecode/architecture_analyzer.py,sha256=liCwQmLgb5vplohy8arjYxs_HOIv5C9MjLh_gY6bc5Q,44115
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
@@ -7,7 +7,7 @@ sourcecode/cache.py,sha256=1V3vsaODAa2UBJAC0xpvxpmRdriCezQx5Q8JCcfgziE,31892
7
7
  sourcecode/canonical_ir.py,sha256=DEwucOPJguLsVtg5cV8mWXNi112l5jmBhv73KGGebVk,24849
8
8
  sourcecode/cir_graphs.py,sha256=rZi8JV4ZrAa2WSCeyNa4JIEKQ_yZzDZTsrvVz2KfuKA,8919
9
9
  sourcecode/classifier.py,sha256=hKzg-nQ47htqqIUzSGvYxv15cXrA3KgICTwJmdqal0o,8095
10
- sourcecode/cli.py,sha256=-E7iKh47hQyZGbhy1lM1bxCPUa21XW6zLHurByc7KGc,253149
10
+ sourcecode/cli.py,sha256=b2dzPS2camrLgsBeDmsTKin9skqE0l_xttLvyTXEVgI,251296
11
11
  sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
12
12
  sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
13
13
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -24,6 +24,7 @@ sourcecode/explain.py,sha256=dVG35YBlpRmbtOXSmspEhoIwDMVApPmLISBy3iigUSc,16913
24
24
  sourcecode/file_chunker.py,sha256=3vkM3mDQ5eE_yTPvUgjyjpGFBIjkW6_mrBmIbrylnA8,16444
25
25
  sourcecode/file_classifier.py,sha256=A0fEABqtfVu1MfoaxnPAvGpZgneGgVXlJDhT74NYXxE,15314
26
26
  sourcecode/flow_analyzer.py,sha256=dSiuY4w49k29jW_EPXUOND9B5uVbuCA7kjnuHi-pIWA,28781
27
+ sourcecode/format_contract.py,sha256=W_V-dWhJyjdMi3gNcQOHjdm2V3ufc262Kp7vdcM9-ZM,3398
27
28
  sourcecode/fqn_utils.py,sha256=XLU7zDkNBXz_RZkIUNfpPmp1nekWtqP-fxV92tDV1vg,2158
28
29
  sourcecode/git_analyzer.py,sha256=JStxTQXNjBWi_wLdwhsZs9mT-v50cSJIz4Agzn6Kh9I,13362
29
30
  sourcecode/graph_analyzer.py,sha256=DHR8fY69oU_Pi4SYaWboX6EoEFrctQKB9dsjpqwGMzw,62403
@@ -31,6 +32,7 @@ sourcecode/license.py,sha256=i_X1bYdobL_z9OVuLiycnWEFSaaNhcKKuTd6G55U3_k,20747
31
32
  sourcecode/mcp_nudge.py,sha256=5ELU_ixzh6uA83NXLOZT8h00OhL53okfQdji3jyKOjg,2917
32
33
  sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
33
34
  sourcecode/migrate_check.py,sha256=vowVIAxVaHU8vhZUEt-HrWrWM38m6a5INHJQGjEg5E0,55390
35
+ sourcecode/openapi_surface.py,sha256=GgHPC_CUyTLPt2N9eLAWoPmnD1IvIrZzstJrLkvOZwo,14870
34
36
  sourcecode/output_budget.py,sha256=Js9yUlfQtPhqBl9R6wn_9UHVjjJc3GtLcqyfjf5t50Q,9869
35
37
  sourcecode/path_filters.py,sha256=EN1RGZRvLq5EcPgpjYV_IyCKVlAQQn2bbpEisQ5LpGg,3780
36
38
  sourcecode/pr_comment_renderer.py,sha256=smHslxiG14lrytCkq5nFrFu-qTHgA-t-LFYfdrfjz2o,14423
@@ -42,7 +44,7 @@ sourcecode/redactor.py,sha256=SB4hwIvg8h-hvcqKcDWaZvA-aSyn-at-BIRwa0tUv5E,3227
42
44
  sourcecode/relevance_scorer.py,sha256=0AgEt4KrV73nioMqBgjhGjtY7L2C7L7cSyKtj3IKcrw,9408
43
45
  sourcecode/rename_refactor.py,sha256=h6dNFlB9aZ_3q6heeHBkgXQeXaT03nvPSsYH6P8qxFg,12965
44
46
  sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
45
- sourcecode/repository_ir.py,sha256=JuB0Dl1OMbQN-bd8smuVIIKwesLJJga5rCQyUSRm5xA,191971
47
+ sourcecode/repository_ir.py,sha256=2Gr919ylJnY9Z7fxNOZ0UK0GtJ-YN1UqreUs6mP-NHg,195611
46
48
  sourcecode/ris.py,sha256=RcqLVwC-doFcKKViYDkCjZLBqf_wzLES7-F6vHEeWzE,20419
47
49
  sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
48
50
  sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
@@ -98,8 +100,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
98
100
  sourcecode/telemetry/events.py,sha256=LtzYfaX9Ilckj5PTvAcTpDa9mLqDsYPDUiDkRa58piY,2580
99
101
  sourcecode/telemetry/filters.py,sha256=NHa5T-6DaZduQPFuC34jOqHWQgSizM-Ygq8aZ4j19ng,5834
100
102
  sourcecode/telemetry/transport.py,sha256=4gGHsq0WeY9VywEZXA3vUxykfiYnw9uuqfjAAec7F8o,1681
101
- sourcecode-1.36.5.dist-info/METADATA,sha256=ogbSzTG2t7MK6U6P2szIf9nfK99AVOFb9OLXoAm2KMw,32243
102
- sourcecode-1.36.5.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
103
- sourcecode-1.36.5.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
104
- sourcecode-1.36.5.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
105
- sourcecode-1.36.5.dist-info/RECORD,,
103
+ sourcecode-1.38.0.dist-info/METADATA,sha256=fSyrH1y9J5HEZuJc_HjNivpUIyKEnkF2cyJBKsQ-XMI,32243
104
+ sourcecode-1.38.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
105
+ sourcecode-1.38.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
106
+ sourcecode-1.38.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
107
+ sourcecode-1.38.0.dist-info/RECORD,,