sourcecode 1.35.27__py3-none-any.whl → 1.35.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/cli.py +6 -3
- sourcecode/explain.py +3 -0
- sourcecode/rename_refactor.py +58 -6
- sourcecode/repository_ir.py +208 -55
- sourcecode/spring_model.py +2 -0
- {sourcecode-1.35.27.dist-info → sourcecode-1.35.29.dist-info}/METADATA +5 -3
- {sourcecode-1.35.27.dist-info → sourcecode-1.35.29.dist-info}/RECORD +11 -11
- {sourcecode-1.35.27.dist-info → sourcecode-1.35.29.dist-info}/WHEEL +0 -0
- {sourcecode-1.35.27.dist-info → sourcecode-1.35.29.dist-info}/entry_points.txt +0 -0
- {sourcecode-1.35.27.dist-info → sourcecode-1.35.29.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/cli.py
CHANGED
|
@@ -5397,10 +5397,13 @@ def cold_start_cmd(
|
|
|
5397
5397
|
result = _gcs(target)
|
|
5398
5398
|
if compact:
|
|
5399
5399
|
# P1-C: cap at ~10K tokens — keep only fields essential for orientation.
|
|
5400
|
-
|
|
5401
|
-
|
|
5402
|
-
"validation", "_meta"}
|
|
5400
|
+
# BUG-6 fix: use actual RIS key names (summary/entrypoints, not stacks/entry_points)
|
|
5401
|
+
_cs_keys = {"status", "git_head", "summary", "entrypoints", "endpoints",
|
|
5402
|
+
"project_type", "validation", "_meta"}
|
|
5403
5403
|
result = {k: v for k, v in result.items() if k in _cs_keys}
|
|
5404
|
+
# Truncate endpoints to first 30 to stay within ~10K token budget
|
|
5405
|
+
if isinstance(result.get("endpoints"), list):
|
|
5406
|
+
result["endpoints"] = result["endpoints"][:30]
|
|
5404
5407
|
result["_meta"] = {**(result.get("_meta") or {}), "compact_mode": True,
|
|
5405
5408
|
"full_available": "sourcecode cold-start (without --compact)"}
|
|
5406
5409
|
_out = _json.dumps(result, indent=2, ensure_ascii=False)
|
sourcecode/explain.py
CHANGED
|
@@ -28,6 +28,9 @@ _STEREOTYPE_DESC: dict[str, str] = {
|
|
|
28
28
|
"component": "Spring @Component — general-purpose bean",
|
|
29
29
|
"configuration": "Spring @Configuration — bean factory / config",
|
|
30
30
|
"bean": "Spring @Bean — managed component",
|
|
31
|
+
"entity": "JPA @Entity — persistent domain object mapped to a database table",
|
|
32
|
+
"mappedsuperclass": "JPA @MappedSuperclass — base class sharing persistent state with subclasses",
|
|
33
|
+
"embeddable": "JPA @Embeddable — value object embedded in owning entity table",
|
|
31
34
|
}
|
|
32
35
|
|
|
33
36
|
_SECURITY_ANNOTATION_PREFIXES = (
|
sourcecode/rename_refactor.py
CHANGED
|
@@ -106,7 +106,11 @@ def _collect_java_files(root: Path, *, include_tests: bool = True) -> list[Path]
|
|
|
106
106
|
if any(part in _VENDOR_DIRS for part in parts[:-1]):
|
|
107
107
|
continue
|
|
108
108
|
if not include_tests:
|
|
109
|
-
if
|
|
109
|
+
if (
|
|
110
|
+
"/src/test/" in rel or rel.startswith("src/test/")
|
|
111
|
+
or "/src/tests/" in rel or rel.startswith("src/tests/")
|
|
112
|
+
or rel.startswith("test/") or rel.startswith("tests/")
|
|
113
|
+
):
|
|
110
114
|
continue
|
|
111
115
|
results.append(p)
|
|
112
116
|
return results
|
|
@@ -150,10 +154,42 @@ def _find_class_file(
|
|
|
150
154
|
|
|
151
155
|
def _apply_rename(source: str, old_name: str, new_name: str) -> str:
|
|
152
156
|
"""Apply word-boundary replacement for class name (PascalCase and camelCase forms)."""
|
|
153
|
-
# PascalCase replacement: all type references, declarations, imports
|
|
154
157
|
result = re.sub(r'\b' + re.escape(old_name) + r'\b', new_name, source)
|
|
155
158
|
|
|
156
|
-
|
|
159
|
+
old_camel = _to_camel(old_name)
|
|
160
|
+
new_camel = _to_camel(new_name)
|
|
161
|
+
if old_camel != old_name and old_camel in result:
|
|
162
|
+
result = re.sub(r'\b' + re.escape(old_camel) + r'\b', new_camel, result)
|
|
163
|
+
|
|
164
|
+
return result
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# Matches a class/interface/enum/record declaration of a given name
|
|
168
|
+
_CLASS_DECL_RE_TMPL = r'\b(?:class|interface|enum|record)\s+{name}\b'
|
|
169
|
+
# Matches a constructor declaration: optional access modifier + ClassName + (
|
|
170
|
+
_CTOR_DECL_RE_TMPL = r'^\s*(?:(?:public|protected|private)\s+)?' + r'{name}\s*\('
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _apply_rename_refs_only(source: str, old_name: str, new_name: str) -> str:
|
|
174
|
+
"""Rename old_name→new_name in a non-source file (import/type references only).
|
|
175
|
+
|
|
176
|
+
Skips lines containing a class/interface/enum/record declaration or constructor
|
|
177
|
+
declaration of old_name, so that a class sharing the simple name in another
|
|
178
|
+
package is not corrupted.
|
|
179
|
+
"""
|
|
180
|
+
class_decl_re = re.compile(_CLASS_DECL_RE_TMPL.format(name=re.escape(old_name)))
|
|
181
|
+
ctor_decl_re = re.compile(_CTOR_DECL_RE_TMPL.format(name=re.escape(old_name)))
|
|
182
|
+
ref_re = re.compile(r'\b' + re.escape(old_name) + r'\b')
|
|
183
|
+
|
|
184
|
+
lines = source.splitlines(keepends=True)
|
|
185
|
+
result_lines = []
|
|
186
|
+
for line in lines:
|
|
187
|
+
if class_decl_re.search(line) or ctor_decl_re.search(line):
|
|
188
|
+
result_lines.append(line)
|
|
189
|
+
else:
|
|
190
|
+
result_lines.append(ref_re.sub(new_name, line))
|
|
191
|
+
result = ''.join(result_lines)
|
|
192
|
+
|
|
157
193
|
old_camel = _to_camel(old_name)
|
|
158
194
|
new_camel = _to_camel(new_name)
|
|
159
195
|
if old_camel != old_name and old_camel in result:
|
|
@@ -245,6 +281,19 @@ def rename_class(
|
|
|
245
281
|
result.old_file = str(source_file.relative_to(root)).replace("\\", "/")
|
|
246
282
|
result.new_file = str(new_file_path.relative_to(root)).replace("\\", "/")
|
|
247
283
|
|
|
284
|
+
# BUG-2: check for collision anywhere in the repo, not just same directory
|
|
285
|
+
collision = next(
|
|
286
|
+
(f for f in java_files if f.stem == new_name and f.resolve() != new_file_path.resolve()),
|
|
287
|
+
None,
|
|
288
|
+
)
|
|
289
|
+
if collision is not None:
|
|
290
|
+
collision_rel = str(collision.relative_to(root)).replace("\\", "/")
|
|
291
|
+
result.errors.append(
|
|
292
|
+
f"'{new_name}' already exists at '{collision_rel}' — "
|
|
293
|
+
f"rename would create a duplicate class name. Pass --force to override."
|
|
294
|
+
)
|
|
295
|
+
return result
|
|
296
|
+
|
|
248
297
|
if new_file_path.exists() and new_file_path != source_file:
|
|
249
298
|
result.errors.append(
|
|
250
299
|
f"Target file '{result.new_file}' already exists — aborting to avoid overwrite."
|
|
@@ -260,15 +309,18 @@ def rename_class(
|
|
|
260
309
|
result.errors.append(f"Could not read '{java_file}': {e}")
|
|
261
310
|
continue
|
|
262
311
|
|
|
263
|
-
|
|
312
|
+
is_source = java_file == source_file
|
|
313
|
+
if is_source:
|
|
314
|
+
new_text = _apply_rename(old_text, old_name, new_name)
|
|
315
|
+
else:
|
|
316
|
+
# BUG-4: use refs-only variant to avoid clobbering same-named class in other package
|
|
317
|
+
new_text = _apply_rename_refs_only(old_text, old_name, new_name)
|
|
264
318
|
if new_text == old_text:
|
|
265
319
|
continue
|
|
266
320
|
|
|
267
321
|
rel_path = str(java_file.relative_to(root)).replace("\\", "/")
|
|
268
322
|
diff = _make_diff(old_text, new_text, rel_path)
|
|
269
323
|
|
|
270
|
-
# Determine intent
|
|
271
|
-
is_source = java_file == source_file
|
|
272
324
|
if is_source:
|
|
273
325
|
intent = f"Renamed class declaration: {old_name} → {new_name}"
|
|
274
326
|
else:
|
sourcecode/repository_ir.py
CHANGED
|
@@ -202,8 +202,9 @@ _SECURITY_MARKER_ANNOTATIONS: frozenset[str] = frozenset({
|
|
|
202
202
|
# is expected and does NOT mean endpoints are unprotected.
|
|
203
203
|
_FILTER_SECURITY_ANNOTATIONS: frozenset[str] = frozenset({
|
|
204
204
|
"@EnableWebSecurity",
|
|
205
|
-
|
|
206
|
-
|
|
205
|
+
# @EnableMethodSecurity / @EnableGlobalMethodSecurity enable per-method annotation
|
|
206
|
+
# security (@PreAuthorize/@Secured), NOT a filter chain — must NOT be treated as
|
|
207
|
+
# filter_based or SEC-001 is suppressed for every unannotated endpoint.
|
|
207
208
|
})
|
|
208
209
|
|
|
209
210
|
# Programmatic security: method-call patterns that indicate runtime auth enforcement.
|
|
@@ -360,6 +361,50 @@ def _strip_java_comments(source: str) -> str:
|
|
|
360
361
|
source = _LINE_COMMENT_RE.sub(' ', source)
|
|
361
362
|
return source
|
|
362
363
|
|
|
364
|
+
|
|
365
|
+
def _parse_annotation_line(line: str) -> tuple[str, str]:
|
|
366
|
+
"""Parse annotation name and args from a line starting with '@'.
|
|
367
|
+
|
|
368
|
+
Returns (ann_name, ann_args) where ann_args is content inside the outermost ().
|
|
369
|
+
Uses O(n) character scanning instead of regex to avoid catastrophic backtracking
|
|
370
|
+
on lines with deeply nested annotation arguments (e.g. @APIResponse with @Content
|
|
371
|
+
containing @Schema — 3-level nesting that breaks _ANN_WITH_ARGS_RE).
|
|
372
|
+
"""
|
|
373
|
+
if not line.startswith('@'):
|
|
374
|
+
return "", ""
|
|
375
|
+
i = 1
|
|
376
|
+
while i < len(line) and (line[i].isalnum() or line[i] in ('_', '.')):
|
|
377
|
+
i += 1
|
|
378
|
+
ann_name = line[:i]
|
|
379
|
+
while i < len(line) and line[i] in (' ', '\t'):
|
|
380
|
+
i += 1
|
|
381
|
+
if i >= len(line) or line[i] != '(':
|
|
382
|
+
return ann_name, ""
|
|
383
|
+
depth = 0
|
|
384
|
+
in_string = False
|
|
385
|
+
string_char = ''
|
|
386
|
+
start = i + 1
|
|
387
|
+
i += 1
|
|
388
|
+
while i < len(line):
|
|
389
|
+
c = line[i]
|
|
390
|
+
if in_string:
|
|
391
|
+
if c == '\\':
|
|
392
|
+
i += 2
|
|
393
|
+
continue
|
|
394
|
+
if c == string_char:
|
|
395
|
+
in_string = False
|
|
396
|
+
elif c in ('"', "'"):
|
|
397
|
+
in_string = True
|
|
398
|
+
string_char = c
|
|
399
|
+
elif c == '(':
|
|
400
|
+
depth += 1
|
|
401
|
+
elif c == ')':
|
|
402
|
+
if depth == 0:
|
|
403
|
+
return ann_name, line[start:i]
|
|
404
|
+
depth -= 1
|
|
405
|
+
i += 1
|
|
406
|
+
return ann_name, line[start:]
|
|
407
|
+
|
|
363
408
|
# Edge types used for subsystem grouping — semantic hierarchy only, not imports
|
|
364
409
|
_SUBSYSTEM_STRUCTURAL_EDGES: frozenset[str] = frozenset({
|
|
365
410
|
"extends", "implements", "injects", "contained_in",
|
|
@@ -409,22 +454,27 @@ _BFS_MAX_DEPTH: int = 3
|
|
|
409
454
|
# Regex to strip leading annotations from a single parameter (e.g. @NotNull @Valid String name)
|
|
410
455
|
_ANN_PREFIX_RE = re.compile(r'^(?:@\w+\s*(?:\([^)]*\))?\s*)+')
|
|
411
456
|
|
|
457
|
+
# Used by _count_net_braces fast path: strip string/char literals before counting braces.
|
|
458
|
+
# Handles escape sequences (\\) so escaped quotes don't close the literal prematurely.
|
|
459
|
+
_STRING_LITERAL_RE = re.compile(r'"(?:[^"\\]|\\.)*"|\'(?:[^\'\\]|\\.)*\'')
|
|
460
|
+
|
|
461
|
+
# Module-level cache for class-keyword detection (avoids recompilation per _extract_symbols call)
|
|
462
|
+
_CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
|
|
463
|
+
|
|
412
464
|
|
|
413
465
|
# ---------------------------------------------------------------------------
|
|
414
466
|
# Stable ID helpers
|
|
415
467
|
# ---------------------------------------------------------------------------
|
|
416
468
|
|
|
417
|
-
|
|
418
|
-
|
|
469
|
+
_FINAL_STRIP_RE = re.compile(r'\bfinal\s+')
|
|
470
|
+
_TYPE_PARAM_RE = re.compile(r'^([\w<>\[\].,? ]+?)\s+\w+$')
|
|
419
471
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
"
|
|
423
|
-
"""
|
|
472
|
+
|
|
473
|
+
def _normalize_type_name(raw: str) -> str:
|
|
474
|
+
"""Strip annotations, final modifier, and param name; return only type."""
|
|
424
475
|
raw = _ANN_PREFIX_RE.sub("", raw).strip()
|
|
425
|
-
raw =
|
|
426
|
-
|
|
427
|
-
m = re.match(r'^([\w<>\[\].,? ]+?)\s+\w+$', raw)
|
|
476
|
+
raw = _FINAL_STRIP_RE.sub("", raw).strip()
|
|
477
|
+
m = _TYPE_PARAM_RE.match(raw)
|
|
428
478
|
if m:
|
|
429
479
|
return m.group(1).strip()
|
|
430
480
|
return raw.strip()
|
|
@@ -502,26 +552,15 @@ def _compute_stable_id(
|
|
|
502
552
|
# ---------------------------------------------------------------------------
|
|
503
553
|
|
|
504
554
|
def _count_net_braces(line: str) -> int:
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
if ch == '"' and not in_char:
|
|
515
|
-
in_str = not in_str
|
|
516
|
-
elif ch == "'" and not in_str:
|
|
517
|
-
in_char = not in_char
|
|
518
|
-
elif not in_str and not in_char:
|
|
519
|
-
if ch == '{':
|
|
520
|
-
depth += 1
|
|
521
|
-
elif ch == '}':
|
|
522
|
-
depth -= 1
|
|
523
|
-
i += 1
|
|
524
|
-
return depth
|
|
555
|
+
# Fast exit: no braces on this line at all
|
|
556
|
+
if '{' not in line and '}' not in line:
|
|
557
|
+
return 0
|
|
558
|
+
# Fast path: no string/char literals — count directly (C-speed)
|
|
559
|
+
if '"' not in line and "'" not in line:
|
|
560
|
+
return line.count('{') - line.count('}')
|
|
561
|
+
# Slow path: strip string/char literals first so quoted braces don't count
|
|
562
|
+
clean = _STRING_LITERAL_RE.sub('', line)
|
|
563
|
+
return clean.count('{') - clean.count('}')
|
|
525
564
|
|
|
526
565
|
|
|
527
566
|
def _extract_modifiers(text: str) -> list[str]:
|
|
@@ -590,7 +629,6 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
|
|
|
590
629
|
_raw_lines = source.splitlines()
|
|
591
630
|
_joined: list[str] = []
|
|
592
631
|
_i = 0
|
|
593
|
-
_CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
|
|
594
632
|
while _i < len(_raw_lines):
|
|
595
633
|
_line = _raw_lines[_i]
|
|
596
634
|
_stripped = _line.strip()
|
|
@@ -632,10 +670,8 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
|
|
|
632
670
|
net = _count_net_braces(stripped)
|
|
633
671
|
|
|
634
672
|
if stripped.startswith("@"):
|
|
635
|
-
|
|
636
|
-
if
|
|
637
|
-
ann = ann_m.group(1)
|
|
638
|
-
ann_args = ann_m.group(2) or ""
|
|
673
|
+
ann, ann_args = _parse_annotation_line(stripped)
|
|
674
|
+
if ann:
|
|
639
675
|
if ann not in pending_anns:
|
|
640
676
|
pending_anns.append(ann)
|
|
641
677
|
if ann_args and ann in _CAPTURE_ANN_ARGS:
|
|
@@ -1140,17 +1176,26 @@ def _build_relations(
|
|
|
1140
1176
|
evidence={"type": "signature", "value": f"implements {iface}"},
|
|
1141
1177
|
))
|
|
1142
1178
|
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1179
|
+
# mapped_to edges: controller class → class-level @RequestMapping path prefix.
|
|
1180
|
+
# O(N) scan of symbols — do NOT call _extract_mapped_paths(source) here because
|
|
1181
|
+
# _REQUEST_MAPPING_RE also matches method-level @GetMapping/@PostMapping, producing
|
|
1182
|
+
# O(N_methods) paths × O(N_syms) inner loop = O(N²) on files with many endpoints.
|
|
1183
|
+
for sym in symbols:
|
|
1184
|
+
if sym.type not in ("class", "interface"):
|
|
1185
|
+
continue
|
|
1186
|
+
if "@RestController" not in sym.annotations and "@Controller" not in sym.annotations:
|
|
1187
|
+
continue
|
|
1188
|
+
if "@RequestMapping" not in sym.annotations:
|
|
1189
|
+
continue
|
|
1190
|
+
_rm_args = sym.annotation_values.get("@RequestMapping", "")
|
|
1191
|
+
for _m_path in _parse_route_paths(_rm_args):
|
|
1192
|
+
if _m_path:
|
|
1148
1193
|
edges.append(RelationEdge(
|
|
1149
1194
|
from_symbol=sym.symbol,
|
|
1150
|
-
to_symbol=
|
|
1195
|
+
to_symbol=_m_path,
|
|
1151
1196
|
type="mapped_to",
|
|
1152
1197
|
confidence="high",
|
|
1153
|
-
evidence={"type": "annotation", "value": f"@RequestMapping(\"{
|
|
1198
|
+
evidence={"type": "annotation", "value": f"@RequestMapping(\"{_m_path}\")"},
|
|
1154
1199
|
))
|
|
1155
1200
|
|
|
1156
1201
|
# contained_in edges: method/field → enclosing class (structural membership)
|
|
@@ -1418,9 +1463,18 @@ def _collect_file_constants(source: str) -> dict[str, str]:
|
|
|
1418
1463
|
Returns {simple_name: value} covering all classes in the file.
|
|
1419
1464
|
Used by _resolve_ann_path_expr to fold constant references in @RequestMapping args.
|
|
1420
1465
|
"""
|
|
1466
|
+
# Fast path: skip entirely when no declarations present (C-speed string scan)
|
|
1467
|
+
if 'static final String' not in source:
|
|
1468
|
+
return {}
|
|
1469
|
+
# Scan only candidate lines (skips full-source regex over 100KB files).
|
|
1470
|
+
# Running _STATIC_FINAL_STR_RE over the whole source is O(source_size) due to
|
|
1471
|
+
# optional modifier group backtracking; per-line match is far cheaper.
|
|
1421
1472
|
constants: dict[str, str] = {}
|
|
1422
|
-
for
|
|
1423
|
-
|
|
1473
|
+
for line in source.splitlines():
|
|
1474
|
+
if 'static' in line and 'final' in line and 'String' in line and '=' in line and '"' in line:
|
|
1475
|
+
m = _STATIC_FINAL_STR_RE.search(line)
|
|
1476
|
+
if m:
|
|
1477
|
+
constants[m.group(1)] = m.group(2)
|
|
1424
1478
|
return constants
|
|
1425
1479
|
|
|
1426
1480
|
|
|
@@ -2204,11 +2258,19 @@ def _assemble(
|
|
|
2204
2258
|
|
|
2205
2259
|
all_fqns_set = {s.symbol for s in sorted_syms}
|
|
2206
2260
|
|
|
2207
|
-
# Bounded BFS reachability per node (graph-only)
|
|
2208
|
-
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2261
|
+
# Bounded BFS reachability per node (graph-only).
|
|
2262
|
+
# Skipped when symbol count exceeds threshold: O(N*(V+E)) BFS for every symbol
|
|
2263
|
+
# hangs on large repos (keycloak: 80K+ symbols → 180s+ with no output).
|
|
2264
|
+
# bfs_reach contributes only 0.1× weight vs in_deg+out_deg; skipping it on large
|
|
2265
|
+
# repos causes no accuracy loss for spring-audit/endpoints/security analysis.
|
|
2266
|
+
_BFS_SYMBOL_THRESHOLD: int = 5000
|
|
2267
|
+
if len(sorted_syms) <= _BFS_SYMBOL_THRESHOLD:
|
|
2268
|
+
bfs_reach: dict[str, int] = {
|
|
2269
|
+
s.symbol: _bfs_reachability(s.symbol, adjacency)
|
|
2270
|
+
for s in sorted_syms
|
|
2271
|
+
}
|
|
2272
|
+
else:
|
|
2273
|
+
bfs_reach = {}
|
|
2212
2274
|
|
|
2213
2275
|
# Normalize centrality across all nodes
|
|
2214
2276
|
max_raw = max(
|
|
@@ -2828,6 +2890,29 @@ def build_repo_ir(
|
|
|
2828
2890
|
# type map before building relations. Java classes in the same package
|
|
2829
2891
|
# reference each other without import statements, so import_map alone cannot
|
|
2830
2892
|
# resolve them — _build_same_package_map provides the cross-file fallback.
|
|
2893
|
+
#
|
|
2894
|
+
# Pre-scan filter: skip full symbol extraction for files that have no
|
|
2895
|
+
# Spring/JAX-RS/CDI annotations. These files (utility classes, model beans,
|
|
2896
|
+
# SPI interfaces) contribute no endpoints, transactions, or security findings
|
|
2897
|
+
# to spring-audit. The text scan is C-speed vs O(lines) Python parse loop.
|
|
2898
|
+
# Non-annotated files still register their package+class via a lightweight
|
|
2899
|
+
# regex scan so same-package type resolution remains correct.
|
|
2900
|
+
_ANNOTATION_MARKERS: tuple[str, ...] = (
|
|
2901
|
+
'@Controller', '@RestController', '@Service', '@Repository',
|
|
2902
|
+
'@Component', '@Configuration', '@Bean', '@Transactional',
|
|
2903
|
+
'@Path', '@GET', '@POST', '@PUT', '@DELETE', '@PATCH',
|
|
2904
|
+
'@PreAuthorize', '@RolesAllowed', '@Secured', '@EnableWebSecurity',
|
|
2905
|
+
'@SpringBootApplication', '@EventListener', '@TransactionalEventListener',
|
|
2906
|
+
'@RequiredArgsConstructor', '@AllArgsConstructor',
|
|
2907
|
+
'@Inject', '@ApplicationScoped', '@RequestScoped', '@Singleton',
|
|
2908
|
+
'@EnableMethodSecurity', '@EnableGlobalMethodSecurity',
|
|
2909
|
+
# JPA / persistence (needed for stereotype detection in all commands)
|
|
2910
|
+
'@Entity', '@MappedSuperclass', '@Embeddable',
|
|
2911
|
+
# AOP / messaging / event sourcing
|
|
2912
|
+
'@Aspect', '@Aggregate', '@Document',
|
|
2913
|
+
# Spring Data
|
|
2914
|
+
'@Query', '@NamedQuery',
|
|
2915
|
+
)
|
|
2831
2916
|
_per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
|
|
2832
2917
|
for rel_path in sorted(file_paths):
|
|
2833
2918
|
abs_path = root / rel_path
|
|
@@ -2838,6 +2923,23 @@ def build_repo_ir(
|
|
|
2838
2923
|
_meta_files_read += 1
|
|
2839
2924
|
_meta_lines_read += source.count("\n") + (1 if source and not source.endswith("\n") else 0)
|
|
2840
2925
|
_meta_chars_read += len(source)
|
|
2926
|
+
# Fast pre-scan: if file has no relevant annotations skip full extraction.
|
|
2927
|
+
# Still register package/class name for same-package resolution.
|
|
2928
|
+
if not any(marker in source for marker in _ANNOTATION_MARKERS):
|
|
2929
|
+
pkg_m = _PKG_RE.search(source)
|
|
2930
|
+
_pkg = pkg_m.group(1) if pkg_m else ""
|
|
2931
|
+
# Minimal class-name symbols for same-package map (no methods/fields)
|
|
2932
|
+
_min_syms: list[SymbolRecord] = []
|
|
2933
|
+
for _cm in re.finditer(r'(?:class|interface|enum)\s+(\w+)', source):
|
|
2934
|
+
_cls_name = _cm.group(1)
|
|
2935
|
+
_fqn = f"{_pkg}.{_cls_name}" if _pkg else _cls_name
|
|
2936
|
+
_min_syms.append(SymbolRecord(
|
|
2937
|
+
symbol=_fqn, type="class", confidence="medium",
|
|
2938
|
+
declaring_file=rel_path,
|
|
2939
|
+
))
|
|
2940
|
+
all_symbols.extend(_min_syms)
|
|
2941
|
+
# No relations needed for non-annotated files
|
|
2942
|
+
continue
|
|
2841
2943
|
package, symbols, raw_imports = _extract_symbols(source, rel_path)
|
|
2842
2944
|
all_symbols.extend(symbols)
|
|
2843
2945
|
_per_file.append((rel_path, source, package, raw_imports, symbols))
|
|
@@ -2893,6 +2995,48 @@ def build_repo_ir(
|
|
|
2893
2995
|
)
|
|
2894
2996
|
ir = _assemble(all_symbols, unique_relations, all_changed, spring_summary, route_diffs_arg)
|
|
2895
2997
|
|
|
2998
|
+
# BUG-7: XML Spring Security detection for the canonical CIR pipeline.
|
|
2999
|
+
# _assemble only sees Java symbols — XML config is invisible to it.
|
|
3000
|
+
# Scan here (where root is available) and retag route_surface entries so
|
|
3001
|
+
# build_canonical_ir produces correct CanonicalEndpoint.security values.
|
|
3002
|
+
_xml_sec_re = re.compile(
|
|
3003
|
+
r'(?:xmlns(?::[a-z]+)?="http://www\.springframework\.org/schema/security"'
|
|
3004
|
+
r'|<security:http\b'
|
|
3005
|
+
r'|<http\s[^>]*use-expressions'
|
|
3006
|
+
r'|spring-security-[2345]'
|
|
3007
|
+
r'|xmlns:security="http://www\.springframework\.org/schema/security")',
|
|
3008
|
+
re.IGNORECASE,
|
|
3009
|
+
)
|
|
3010
|
+
_xml_sec_detected = False
|
|
3011
|
+
for _xml_glob in (
|
|
3012
|
+
"*security*.xml", "*Security*.xml",
|
|
3013
|
+
"*applicationContext*.xml", "*-context.xml", "*Context.xml",
|
|
3014
|
+
"*spring*.xml", "*Spring*.xml",
|
|
3015
|
+
):
|
|
3016
|
+
for _xf in root.rglob(_xml_glob):
|
|
3017
|
+
if "target/" in str(_xf).replace("\\", "/"):
|
|
3018
|
+
continue
|
|
3019
|
+
try:
|
|
3020
|
+
_xt = _xf.read_text(encoding="utf-8", errors="replace")
|
|
3021
|
+
except OSError:
|
|
3022
|
+
continue
|
|
3023
|
+
if _xml_sec_re.search(_xt):
|
|
3024
|
+
_xml_sec_detected = True
|
|
3025
|
+
break
|
|
3026
|
+
if _xml_sec_detected:
|
|
3027
|
+
break
|
|
3028
|
+
if _xml_sec_detected:
|
|
3029
|
+
_sec_model = ir.get("security_model", "unknown")
|
|
3030
|
+
if _sec_model == "unknown":
|
|
3031
|
+
ir["security_model"] = "xml_or_filter_chain"
|
|
3032
|
+
elif _sec_model in ("annotation_based", "mixed"):
|
|
3033
|
+
ir["security_model"] = "mixed"
|
|
3034
|
+
# Retag route_surface entries that have no security (would become none_detected in CIR)
|
|
3035
|
+
for _r in ir.get("route_surface") or []:
|
|
3036
|
+
_r_sec = _r.get("security_annotations")
|
|
3037
|
+
if _r_sec is None or (isinstance(_r_sec, dict) and _r_sec.get("policy") == "none_detected"):
|
|
3038
|
+
_r["security_annotations"] = {"policy": "xml_or_filter_chain"}
|
|
3039
|
+
|
|
2896
3040
|
# L-6: inject analysis_meta — files_read, lines_read, symbols_analyzed, token_estimate
|
|
2897
3041
|
ir["analysis_meta"] = {
|
|
2898
3042
|
"files_read": _meta_files_read,
|
|
@@ -3358,13 +3502,18 @@ def extract_java_endpoints(root: Path) -> "dict[str, Any]":
|
|
|
3358
3502
|
if _xml_security_detected:
|
|
3359
3503
|
break
|
|
3360
3504
|
|
|
3361
|
-
if _xml_security_detected
|
|
3362
|
-
|
|
3363
|
-
#
|
|
3364
|
-
#
|
|
3505
|
+
if _xml_security_detected:
|
|
3506
|
+
# Re-tag per-endpoint none_detected → xml_or_filter_chain regardless of security_model.
|
|
3507
|
+
# BUG-7 fix: previously only ran when model == "unknown", causing false-positive SEC-001
|
|
3508
|
+
# when annotation security (@PreAuthorize) coexisted with XML security config.
|
|
3365
3509
|
for ep in endpoints:
|
|
3366
3510
|
if ep.get("security", {}).get("policy") == "none_detected":
|
|
3367
3511
|
ep["security"] = {"policy": "xml_or_filter_chain"}
|
|
3512
|
+
if security_model == "unknown":
|
|
3513
|
+
security_model = "xml_or_filter_chain"
|
|
3514
|
+
elif security_model in ("annotation_based", "mixed"):
|
|
3515
|
+
security_model = "mixed"
|
|
3516
|
+
# filter_based stays filter_based — XML + filter chain is still filter_based
|
|
3368
3517
|
# Recompute no_security_signal (now counts only truly unknown endpoints)
|
|
3369
3518
|
no_security_signal = sum(
|
|
3370
3519
|
1 for e in endpoints
|
|
@@ -3395,7 +3544,11 @@ def find_java_files(root: Path, *, max_files: int = 8000, limitations: list[str]
|
|
|
3395
3544
|
continue
|
|
3396
3545
|
parts = rel.split("/")
|
|
3397
3546
|
# Skip test dirs
|
|
3398
|
-
if
|
|
3547
|
+
if (
|
|
3548
|
+
"/src/test/" in rel or rel.startswith("src/test/")
|
|
3549
|
+
or "/src/tests/" in rel or rel.startswith("src/tests/")
|
|
3550
|
+
or rel.startswith("test/") or rel.startswith("tests/")
|
|
3551
|
+
):
|
|
3399
3552
|
continue
|
|
3400
3553
|
# Skip vendor/generated/build dirs
|
|
3401
3554
|
if any(part in _VENDOR_DIRS for part in parts[:-1]):
|
sourcecode/spring_model.py
CHANGED
|
@@ -41,6 +41,8 @@ _CALL_SKIP: frozenset[str] = frozenset({"annotated_with", "mapped_to", "containe
|
|
|
41
41
|
_BEAN_ANNOTATIONS: frozenset[str] = frozenset({
|
|
42
42
|
"@Component", "@Service", "@Repository",
|
|
43
43
|
"@Controller", "@RestController", "@Configuration", "@Bean",
|
|
44
|
+
# JPA persistence annotations — not Spring beans but need stereotype recognition in explain
|
|
45
|
+
"@Entity", "@MappedSuperclass", "@Embeddable",
|
|
44
46
|
})
|
|
45
47
|
|
|
46
48
|
_GENERIC_PARAM_RE = re.compile(r"<[A-Z][\w,\s<>?]*>")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.35.
|
|
3
|
+
Version: 1.35.29
|
|
4
4
|
Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Keywords: agents,ai,codebase,context,developer-tools,llm
|
|
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
|
|
|
40
40
|
|
|
41
41
|
**Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
|
|
42
42
|
|
|
43
|
-

|
|
44
44
|

|
|
45
45
|
|
|
46
46
|
---
|
|
@@ -114,7 +114,9 @@ pipx install sourcecode
|
|
|
114
114
|
|
|
115
115
|
```bash
|
|
116
116
|
sourcecode version
|
|
117
|
-
# sourcecode 1.35.
|
|
117
|
+
# sourcecode 1.35.29
|
|
118
|
+
|
|
119
|
+
**v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
|
|
118
120
|
```
|
|
119
121
|
|
|
120
122
|
---
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=7uoEdJsiULS_BcH-jg6p93kBPCNguzP9l_u66BOtiOk,104
|
|
2
2
|
sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
|
|
3
3
|
sourcecode/architecture_analyzer.py,sha256=qh749a7ykPtGmQI1MR9y6j8TtL_jBdVYFx9YRsLqOMw,44121
|
|
4
4
|
sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
|
|
@@ -7,7 +7,7 @@ sourcecode/cache.py,sha256=wAyPrXN5DqiGivnMpeEuun2xHDKfBer2_oBsh6kj_vc,30447
|
|
|
7
7
|
sourcecode/canonical_ir.py,sha256=2vTLc6wL1cH3NNbEcdZpfX5okh8h5dKq7xd0m0rv_Ro,24167
|
|
8
8
|
sourcecode/cir_graphs.py,sha256=rZi8JV4ZrAa2WSCeyNa4JIEKQ_yZzDZTsrvVz2KfuKA,8919
|
|
9
9
|
sourcecode/classifier.py,sha256=2lYoSH3vOTkXZYPU7Go2WIet1-IuNzTWVhc-ULnXtgw,8024
|
|
10
|
-
sourcecode/cli.py,sha256=
|
|
10
|
+
sourcecode/cli.py,sha256=SZDc7biuDWEXYGn1kvvN4RqmWOA-GHnmJbGdHnYBdh0,246491
|
|
11
11
|
sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
|
|
12
12
|
sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
|
|
13
13
|
sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
|
|
@@ -20,7 +20,7 @@ sourcecode/doc_analyzer.py,sha256=05bjTUbDbmnbajD_cgRnACzS8T7xxBKVX4CjkJlhZg8,24
|
|
|
20
20
|
sourcecode/entrypoint_classifier.py,sha256=jhTYlyqDJH2AtdEcLVaRU3lYRTJuF8DkxVzl4-W3zWE,5322
|
|
21
21
|
sourcecode/env_analyzer.py,sha256=aNTyYgQk5noJDfJU6FmasmESOHfiomyJw5EvZqjy6qc,22213
|
|
22
22
|
sourcecode/error_schema.py,sha256=uwosfNaSujtYm11_732Hu92z5ITV040fQDaIyefSvR4,1683
|
|
23
|
-
sourcecode/explain.py,sha256=
|
|
23
|
+
sourcecode/explain.py,sha256=N5189hO8Ydbunr431zWDpSueSTdgBZh9l2xU-fH-AO8,16832
|
|
24
24
|
sourcecode/file_chunker.py,sha256=xceHnlEg6SlSJAO5Iv2-bXICPjN8qvjQJ2CLYrHuq0o,14744
|
|
25
25
|
sourcecode/file_classifier.py,sha256=A0fEABqtfVu1MfoaxnPAvGpZgneGgVXlJDhT74NYXxE,15314
|
|
26
26
|
sourcecode/flow_analyzer.py,sha256=dSiuY4w49k29jW_EPXUOND9B5uVbuCA7kjnuHi-pIWA,28781
|
|
@@ -40,9 +40,9 @@ sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
|
|
|
40
40
|
sourcecode/ranking_engine.py,sha256=ZAucq_YX2KkWUuAZf4P0lhtQ_38vEFnUhuGtSZd1S0E,12970
|
|
41
41
|
sourcecode/redactor.py,sha256=SB4hwIvg8h-hvcqKcDWaZvA-aSyn-at-BIRwa0tUv5E,3227
|
|
42
42
|
sourcecode/relevance_scorer.py,sha256=0AgEt4KrV73nioMqBgjhGjtY7L2C7L7cSyKtj3IKcrw,9408
|
|
43
|
-
sourcecode/rename_refactor.py,sha256=
|
|
43
|
+
sourcecode/rename_refactor.py,sha256=rWCsXoDxJNdsmkUXjPtHphT5CjYOgEPmcc817_8Gu-Y,12538
|
|
44
44
|
sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
|
|
45
|
-
sourcecode/repository_ir.py,sha256=
|
|
45
|
+
sourcecode/repository_ir.py,sha256=vlReshZputMZSmPLUkM6zbnAvygi3aSk0lKHUbW9ijc,180308
|
|
46
46
|
sourcecode/ris.py,sha256=RcqLVwC-doFcKKViYDkCjZLBqf_wzLES7-F6vHEeWzE,20419
|
|
47
47
|
sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
|
|
48
48
|
sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
|
|
@@ -52,7 +52,7 @@ sourcecode/serializer.py,sha256=7SBJIbpC_Lg0RGWq8jjNbF5TiuZwoP_fi0qhHnzQM8M,1243
|
|
|
52
52
|
sourcecode/spring_event_topology.py,sha256=5_ON_21Le5zbG-1GRc5GLIi5HJfy_QjcXLVPC5WeUGQ,18055
|
|
53
53
|
sourcecode/spring_findings.py,sha256=8V91iHOg9hFgg6tLLl4FSsgrF-dBqOcO2s-K5sD_goA,5417
|
|
54
54
|
sourcecode/spring_impact.py,sha256=Ohm2k3W4Wts8Kx8Z7DIM-J-cwGtTJBWKFBsX-WkupBQ,32943
|
|
55
|
-
sourcecode/spring_model.py,sha256=
|
|
55
|
+
sourcecode/spring_model.py,sha256=6Lk3rGGFy2suq867S8Da_aCNAXtSGJ36XBaQd9VNTFc,14888
|
|
56
56
|
sourcecode/spring_security_audit.py,sha256=AmUkqoExkNZ3YxxZf9TwkwX-f7P_SETm0QC7VqEAqh4,20618
|
|
57
57
|
sourcecode/spring_semantic.py,sha256=CiAf77p48-RFrUF0zbgww4w2Xigrbo1t5M3ZCDIfV_g,12032
|
|
58
58
|
sourcecode/spring_tx_analyzer.py,sha256=u4_ckdEFZUiIsHdUX4OaIhnvoTdAwrxNTFweG6vc7wE,30526
|
|
@@ -96,8 +96,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
96
96
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
97
97
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
98
98
|
sourcecode/telemetry/transport.py,sha256=QSslxIwij8YkRWcVvxykODDrkiN_GAAEu3dUP7KIWeE,1651
|
|
99
|
-
sourcecode-1.35.
|
|
100
|
-
sourcecode-1.35.
|
|
101
|
-
sourcecode-1.35.
|
|
102
|
-
sourcecode-1.35.
|
|
103
|
-
sourcecode-1.35.
|
|
99
|
+
sourcecode-1.35.29.dist-info/METADATA,sha256=jYYziktPfdsRvuLAdGB_lj_3o27zWYeKypP_pBlZ9gE,21705
|
|
100
|
+
sourcecode-1.35.29.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
101
|
+
sourcecode-1.35.29.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
102
|
+
sourcecode-1.35.29.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
103
|
+
sourcecode-1.35.29.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|