sourcecode 1.35.28__py3-none-any.whl → 1.35.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/repository_ir.py +149 -48
- {sourcecode-1.35.28.dist-info → sourcecode-1.35.29.dist-info}/METADATA +3 -3
- {sourcecode-1.35.28.dist-info → sourcecode-1.35.29.dist-info}/RECORD +7 -7
- {sourcecode-1.35.28.dist-info → sourcecode-1.35.29.dist-info}/WHEEL +0 -0
- {sourcecode-1.35.28.dist-info → sourcecode-1.35.29.dist-info}/entry_points.txt +0 -0
- {sourcecode-1.35.28.dist-info → sourcecode-1.35.29.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/repository_ir.py
CHANGED
|
@@ -361,6 +361,50 @@ def _strip_java_comments(source: str) -> str:
|
|
|
361
361
|
source = _LINE_COMMENT_RE.sub(' ', source)
|
|
362
362
|
return source
|
|
363
363
|
|
|
364
|
+
|
|
365
|
+
def _parse_annotation_line(line: str) -> tuple[str, str]:
|
|
366
|
+
"""Parse annotation name and args from a line starting with '@'.
|
|
367
|
+
|
|
368
|
+
Returns (ann_name, ann_args) where ann_args is content inside the outermost ().
|
|
369
|
+
Uses O(n) character scanning instead of regex to avoid catastrophic backtracking
|
|
370
|
+
on lines with deeply nested annotation arguments (e.g. @APIResponse with @Content
|
|
371
|
+
containing @Schema — 3-level nesting that breaks _ANN_WITH_ARGS_RE).
|
|
372
|
+
"""
|
|
373
|
+
if not line.startswith('@'):
|
|
374
|
+
return "", ""
|
|
375
|
+
i = 1
|
|
376
|
+
while i < len(line) and (line[i].isalnum() or line[i] in ('_', '.')):
|
|
377
|
+
i += 1
|
|
378
|
+
ann_name = line[:i]
|
|
379
|
+
while i < len(line) and line[i] in (' ', '\t'):
|
|
380
|
+
i += 1
|
|
381
|
+
if i >= len(line) or line[i] != '(':
|
|
382
|
+
return ann_name, ""
|
|
383
|
+
depth = 0
|
|
384
|
+
in_string = False
|
|
385
|
+
string_char = ''
|
|
386
|
+
start = i + 1
|
|
387
|
+
i += 1
|
|
388
|
+
while i < len(line):
|
|
389
|
+
c = line[i]
|
|
390
|
+
if in_string:
|
|
391
|
+
if c == '\\':
|
|
392
|
+
i += 2
|
|
393
|
+
continue
|
|
394
|
+
if c == string_char:
|
|
395
|
+
in_string = False
|
|
396
|
+
elif c in ('"', "'"):
|
|
397
|
+
in_string = True
|
|
398
|
+
string_char = c
|
|
399
|
+
elif c == '(':
|
|
400
|
+
depth += 1
|
|
401
|
+
elif c == ')':
|
|
402
|
+
if depth == 0:
|
|
403
|
+
return ann_name, line[start:i]
|
|
404
|
+
depth -= 1
|
|
405
|
+
i += 1
|
|
406
|
+
return ann_name, line[start:]
|
|
407
|
+
|
|
364
408
|
# Edge types used for subsystem grouping — semantic hierarchy only, not imports
|
|
365
409
|
_SUBSYSTEM_STRUCTURAL_EDGES: frozenset[str] = frozenset({
|
|
366
410
|
"extends", "implements", "injects", "contained_in",
|
|
@@ -410,22 +454,27 @@ _BFS_MAX_DEPTH: int = 3
|
|
|
410
454
|
# Regex to strip leading annotations from a single parameter (e.g. @NotNull @Valid String name)
|
|
411
455
|
_ANN_PREFIX_RE = re.compile(r'^(?:@\w+\s*(?:\([^)]*\))?\s*)+')
|
|
412
456
|
|
|
457
|
+
# Used by _count_net_braces fast path: strip string/char literals before counting braces.
|
|
458
|
+
# Handles escape sequences (\\) so escaped quotes don't close the literal prematurely.
|
|
459
|
+
_STRING_LITERAL_RE = re.compile(r'"(?:[^"\\]|\\.)*"|\'(?:[^\'\\]|\\.)*\'')
|
|
460
|
+
|
|
461
|
+
# Module-level cache for class-keyword detection (avoids recompilation per _extract_symbols call)
|
|
462
|
+
_CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
|
|
463
|
+
|
|
413
464
|
|
|
414
465
|
# ---------------------------------------------------------------------------
|
|
415
466
|
# Stable ID helpers
|
|
416
467
|
# ---------------------------------------------------------------------------
|
|
417
468
|
|
|
418
|
-
|
|
419
|
-
|
|
469
|
+
_FINAL_STRIP_RE = re.compile(r'\bfinal\s+')
|
|
470
|
+
_TYPE_PARAM_RE = re.compile(r'^([\w<>\[\].,? ]+?)\s+\w+$')
|
|
420
471
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
"
|
|
424
|
-
"""
|
|
472
|
+
|
|
473
|
+
def _normalize_type_name(raw: str) -> str:
|
|
474
|
+
"""Strip annotations, final modifier, and param name; return only type."""
|
|
425
475
|
raw = _ANN_PREFIX_RE.sub("", raw).strip()
|
|
426
|
-
raw =
|
|
427
|
-
|
|
428
|
-
m = re.match(r'^([\w<>\[\].,? ]+?)\s+\w+$', raw)
|
|
476
|
+
raw = _FINAL_STRIP_RE.sub("", raw).strip()
|
|
477
|
+
m = _TYPE_PARAM_RE.match(raw)
|
|
429
478
|
if m:
|
|
430
479
|
return m.group(1).strip()
|
|
431
480
|
return raw.strip()
|
|
@@ -503,26 +552,15 @@ def _compute_stable_id(
|
|
|
503
552
|
# ---------------------------------------------------------------------------
|
|
504
553
|
|
|
505
554
|
def _count_net_braces(line: str) -> int:
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
if ch == '"' and not in_char:
|
|
516
|
-
in_str = not in_str
|
|
517
|
-
elif ch == "'" and not in_str:
|
|
518
|
-
in_char = not in_char
|
|
519
|
-
elif not in_str and not in_char:
|
|
520
|
-
if ch == '{':
|
|
521
|
-
depth += 1
|
|
522
|
-
elif ch == '}':
|
|
523
|
-
depth -= 1
|
|
524
|
-
i += 1
|
|
525
|
-
return depth
|
|
555
|
+
# Fast exit: no braces on this line at all
|
|
556
|
+
if '{' not in line and '}' not in line:
|
|
557
|
+
return 0
|
|
558
|
+
# Fast path: no string/char literals — count directly (C-speed)
|
|
559
|
+
if '"' not in line and "'" not in line:
|
|
560
|
+
return line.count('{') - line.count('}')
|
|
561
|
+
# Slow path: strip string/char literals first so quoted braces don't count
|
|
562
|
+
clean = _STRING_LITERAL_RE.sub('', line)
|
|
563
|
+
return clean.count('{') - clean.count('}')
|
|
526
564
|
|
|
527
565
|
|
|
528
566
|
def _extract_modifiers(text: str) -> list[str]:
|
|
@@ -591,7 +629,6 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
|
|
|
591
629
|
_raw_lines = source.splitlines()
|
|
592
630
|
_joined: list[str] = []
|
|
593
631
|
_i = 0
|
|
594
|
-
_CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
|
|
595
632
|
while _i < len(_raw_lines):
|
|
596
633
|
_line = _raw_lines[_i]
|
|
597
634
|
_stripped = _line.strip()
|
|
@@ -633,10 +670,8 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
|
|
|
633
670
|
net = _count_net_braces(stripped)
|
|
634
671
|
|
|
635
672
|
if stripped.startswith("@"):
|
|
636
|
-
|
|
637
|
-
if
|
|
638
|
-
ann = ann_m.group(1)
|
|
639
|
-
ann_args = ann_m.group(2) or ""
|
|
673
|
+
ann, ann_args = _parse_annotation_line(stripped)
|
|
674
|
+
if ann:
|
|
640
675
|
if ann not in pending_anns:
|
|
641
676
|
pending_anns.append(ann)
|
|
642
677
|
if ann_args and ann in _CAPTURE_ANN_ARGS:
|
|
@@ -1141,17 +1176,26 @@ def _build_relations(
|
|
|
1141
1176
|
evidence={"type": "signature", "value": f"implements {iface}"},
|
|
1142
1177
|
))
|
|
1143
1178
|
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1179
|
+
# mapped_to edges: controller class → class-level @RequestMapping path prefix.
|
|
1180
|
+
# O(N) scan of symbols — do NOT call _extract_mapped_paths(source) here because
|
|
1181
|
+
# _REQUEST_MAPPING_RE also matches method-level @GetMapping/@PostMapping, producing
|
|
1182
|
+
# O(N_methods) paths × O(N_syms) inner loop = O(N²) on files with many endpoints.
|
|
1183
|
+
for sym in symbols:
|
|
1184
|
+
if sym.type not in ("class", "interface"):
|
|
1185
|
+
continue
|
|
1186
|
+
if "@RestController" not in sym.annotations and "@Controller" not in sym.annotations:
|
|
1187
|
+
continue
|
|
1188
|
+
if "@RequestMapping" not in sym.annotations:
|
|
1189
|
+
continue
|
|
1190
|
+
_rm_args = sym.annotation_values.get("@RequestMapping", "")
|
|
1191
|
+
for _m_path in _parse_route_paths(_rm_args):
|
|
1192
|
+
if _m_path:
|
|
1149
1193
|
edges.append(RelationEdge(
|
|
1150
1194
|
from_symbol=sym.symbol,
|
|
1151
|
-
to_symbol=
|
|
1195
|
+
to_symbol=_m_path,
|
|
1152
1196
|
type="mapped_to",
|
|
1153
1197
|
confidence="high",
|
|
1154
|
-
evidence={"type": "annotation", "value": f"@RequestMapping(\"{
|
|
1198
|
+
evidence={"type": "annotation", "value": f"@RequestMapping(\"{_m_path}\")"},
|
|
1155
1199
|
))
|
|
1156
1200
|
|
|
1157
1201
|
# contained_in edges: method/field → enclosing class (structural membership)
|
|
@@ -1419,9 +1463,18 @@ def _collect_file_constants(source: str) -> dict[str, str]:
|
|
|
1419
1463
|
Returns {simple_name: value} covering all classes in the file.
|
|
1420
1464
|
Used by _resolve_ann_path_expr to fold constant references in @RequestMapping args.
|
|
1421
1465
|
"""
|
|
1466
|
+
# Fast path: skip entirely when no declarations present (C-speed string scan)
|
|
1467
|
+
if 'static final String' not in source:
|
|
1468
|
+
return {}
|
|
1469
|
+
# Scan only candidate lines (skips full-source regex over 100KB files).
|
|
1470
|
+
# Running _STATIC_FINAL_STR_RE over the whole source is O(source_size) due to
|
|
1471
|
+
# optional modifier group backtracking; per-line match is far cheaper.
|
|
1422
1472
|
constants: dict[str, str] = {}
|
|
1423
|
-
for
|
|
1424
|
-
|
|
1473
|
+
for line in source.splitlines():
|
|
1474
|
+
if 'static' in line and 'final' in line and 'String' in line and '=' in line and '"' in line:
|
|
1475
|
+
m = _STATIC_FINAL_STR_RE.search(line)
|
|
1476
|
+
if m:
|
|
1477
|
+
constants[m.group(1)] = m.group(2)
|
|
1425
1478
|
return constants
|
|
1426
1479
|
|
|
1427
1480
|
|
|
@@ -2205,11 +2258,19 @@ def _assemble(
|
|
|
2205
2258
|
|
|
2206
2259
|
all_fqns_set = {s.symbol for s in sorted_syms}
|
|
2207
2260
|
|
|
2208
|
-
# Bounded BFS reachability per node (graph-only)
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2261
|
+
# Bounded BFS reachability per node (graph-only).
|
|
2262
|
+
# Skipped when symbol count exceeds threshold: O(N*(V+E)) BFS for every symbol
|
|
2263
|
+
# hangs on large repos (keycloak: 80K+ symbols → 180s+ with no output).
|
|
2264
|
+
# bfs_reach contributes only 0.1× weight vs in_deg+out_deg; skipping it on large
|
|
2265
|
+
# repos causes no accuracy loss for spring-audit/endpoints/security analysis.
|
|
2266
|
+
_BFS_SYMBOL_THRESHOLD: int = 5000
|
|
2267
|
+
if len(sorted_syms) <= _BFS_SYMBOL_THRESHOLD:
|
|
2268
|
+
bfs_reach: dict[str, int] = {
|
|
2269
|
+
s.symbol: _bfs_reachability(s.symbol, adjacency)
|
|
2270
|
+
for s in sorted_syms
|
|
2271
|
+
}
|
|
2272
|
+
else:
|
|
2273
|
+
bfs_reach = {}
|
|
2213
2274
|
|
|
2214
2275
|
# Normalize centrality across all nodes
|
|
2215
2276
|
max_raw = max(
|
|
@@ -2829,6 +2890,29 @@ def build_repo_ir(
|
|
|
2829
2890
|
# type map before building relations. Java classes in the same package
|
|
2830
2891
|
# reference each other without import statements, so import_map alone cannot
|
|
2831
2892
|
# resolve them — _build_same_package_map provides the cross-file fallback.
|
|
2893
|
+
#
|
|
2894
|
+
# Pre-scan filter: skip full symbol extraction for files that have no
|
|
2895
|
+
# Spring/JAX-RS/CDI annotations. These files (utility classes, model beans,
|
|
2896
|
+
# SPI interfaces) contribute no endpoints, transactions, or security findings
|
|
2897
|
+
# to spring-audit. The text scan is C-speed vs O(lines) Python parse loop.
|
|
2898
|
+
# Non-annotated files still register their package+class via a lightweight
|
|
2899
|
+
# regex scan so same-package type resolution remains correct.
|
|
2900
|
+
_ANNOTATION_MARKERS: tuple[str, ...] = (
|
|
2901
|
+
'@Controller', '@RestController', '@Service', '@Repository',
|
|
2902
|
+
'@Component', '@Configuration', '@Bean', '@Transactional',
|
|
2903
|
+
'@Path', '@GET', '@POST', '@PUT', '@DELETE', '@PATCH',
|
|
2904
|
+
'@PreAuthorize', '@RolesAllowed', '@Secured', '@EnableWebSecurity',
|
|
2905
|
+
'@SpringBootApplication', '@EventListener', '@TransactionalEventListener',
|
|
2906
|
+
'@RequiredArgsConstructor', '@AllArgsConstructor',
|
|
2907
|
+
'@Inject', '@ApplicationScoped', '@RequestScoped', '@Singleton',
|
|
2908
|
+
'@EnableMethodSecurity', '@EnableGlobalMethodSecurity',
|
|
2909
|
+
# JPA / persistence (needed for stereotype detection in all commands)
|
|
2910
|
+
'@Entity', '@MappedSuperclass', '@Embeddable',
|
|
2911
|
+
# AOP / messaging / event sourcing
|
|
2912
|
+
'@Aspect', '@Aggregate', '@Document',
|
|
2913
|
+
# Spring Data
|
|
2914
|
+
'@Query', '@NamedQuery',
|
|
2915
|
+
)
|
|
2832
2916
|
_per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
|
|
2833
2917
|
for rel_path in sorted(file_paths):
|
|
2834
2918
|
abs_path = root / rel_path
|
|
@@ -2839,6 +2923,23 @@ def build_repo_ir(
|
|
|
2839
2923
|
_meta_files_read += 1
|
|
2840
2924
|
_meta_lines_read += source.count("\n") + (1 if source and not source.endswith("\n") else 0)
|
|
2841
2925
|
_meta_chars_read += len(source)
|
|
2926
|
+
# Fast pre-scan: if file has no relevant annotations skip full extraction.
|
|
2927
|
+
# Still register package/class name for same-package resolution.
|
|
2928
|
+
if not any(marker in source for marker in _ANNOTATION_MARKERS):
|
|
2929
|
+
pkg_m = _PKG_RE.search(source)
|
|
2930
|
+
_pkg = pkg_m.group(1) if pkg_m else ""
|
|
2931
|
+
# Minimal class-name symbols for same-package map (no methods/fields)
|
|
2932
|
+
_min_syms: list[SymbolRecord] = []
|
|
2933
|
+
for _cm in re.finditer(r'(?:class|interface|enum)\s+(\w+)', source):
|
|
2934
|
+
_cls_name = _cm.group(1)
|
|
2935
|
+
_fqn = f"{_pkg}.{_cls_name}" if _pkg else _cls_name
|
|
2936
|
+
_min_syms.append(SymbolRecord(
|
|
2937
|
+
symbol=_fqn, type="class", confidence="medium",
|
|
2938
|
+
declaring_file=rel_path,
|
|
2939
|
+
))
|
|
2940
|
+
all_symbols.extend(_min_syms)
|
|
2941
|
+
# No relations needed for non-annotated files
|
|
2942
|
+
continue
|
|
2842
2943
|
package, symbols, raw_imports = _extract_symbols(source, rel_path)
|
|
2843
2944
|
all_symbols.extend(symbols)
|
|
2844
2945
|
_per_file.append((rel_path, source, package, raw_imports, symbols))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.35.
|
|
3
|
+
Version: 1.35.29
|
|
4
4
|
Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Keywords: agents,ai,codebase,context,developer-tools,llm
|
|
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
|
|
|
40
40
|
|
|
41
41
|
**Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
|
|
42
42
|
|
|
43
|
-

|
|
44
44
|

|
|
45
45
|
|
|
46
46
|
---
|
|
@@ -114,7 +114,7 @@ pipx install sourcecode
|
|
|
114
114
|
|
|
115
115
|
```bash
|
|
116
116
|
sourcecode version
|
|
117
|
-
# sourcecode 1.35.
|
|
117
|
+
# sourcecode 1.35.29
|
|
118
118
|
|
|
119
119
|
**v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
|
|
120
120
|
```
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=7uoEdJsiULS_BcH-jg6p93kBPCNguzP9l_u66BOtiOk,104
|
|
2
2
|
sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
|
|
3
3
|
sourcecode/architecture_analyzer.py,sha256=qh749a7ykPtGmQI1MR9y6j8TtL_jBdVYFx9YRsLqOMw,44121
|
|
4
4
|
sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
|
|
@@ -42,7 +42,7 @@ sourcecode/redactor.py,sha256=SB4hwIvg8h-hvcqKcDWaZvA-aSyn-at-BIRwa0tUv5E,3227
|
|
|
42
42
|
sourcecode/relevance_scorer.py,sha256=0AgEt4KrV73nioMqBgjhGjtY7L2C7L7cSyKtj3IKcrw,9408
|
|
43
43
|
sourcecode/rename_refactor.py,sha256=rWCsXoDxJNdsmkUXjPtHphT5CjYOgEPmcc817_8Gu-Y,12538
|
|
44
44
|
sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
|
|
45
|
-
sourcecode/repository_ir.py,sha256=
|
|
45
|
+
sourcecode/repository_ir.py,sha256=vlReshZputMZSmPLUkM6zbnAvygi3aSk0lKHUbW9ijc,180308
|
|
46
46
|
sourcecode/ris.py,sha256=RcqLVwC-doFcKKViYDkCjZLBqf_wzLES7-F6vHEeWzE,20419
|
|
47
47
|
sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
|
|
48
48
|
sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
|
|
@@ -96,8 +96,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
96
96
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
97
97
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
98
98
|
sourcecode/telemetry/transport.py,sha256=QSslxIwij8YkRWcVvxykODDrkiN_GAAEu3dUP7KIWeE,1651
|
|
99
|
-
sourcecode-1.35.
|
|
100
|
-
sourcecode-1.35.
|
|
101
|
-
sourcecode-1.35.
|
|
102
|
-
sourcecode-1.35.
|
|
103
|
-
sourcecode-1.35.
|
|
99
|
+
sourcecode-1.35.29.dist-info/METADATA,sha256=jYYziktPfdsRvuLAdGB_lj_3o27zWYeKypP_pBlZ9gE,21705
|
|
100
|
+
sourcecode-1.35.29.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
101
|
+
sourcecode-1.35.29.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
102
|
+
sourcecode-1.35.29.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
103
|
+
sourcecode-1.35.29.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|