sourcecode 1.35.28__py3-none-any.whl → 1.35.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.35.28"
3
+ __version__ = "1.35.29"
@@ -361,6 +361,50 @@ def _strip_java_comments(source: str) -> str:
361
361
  source = _LINE_COMMENT_RE.sub(' ', source)
362
362
  return source
363
363
 
364
+
365
+ def _parse_annotation_line(line: str) -> tuple[str, str]:
366
+ """Parse annotation name and args from a line starting with '@'.
367
+
368
+ Returns (ann_name, ann_args) where ann_args is content inside the outermost ().
369
+ Uses O(n) character scanning instead of regex to avoid catastrophic backtracking
370
+ on lines with deeply nested annotation arguments (e.g. @APIResponse with @Content
371
+ containing @Schema — 3-level nesting that breaks _ANN_WITH_ARGS_RE).
372
+ """
373
+ if not line.startswith('@'):
374
+ return "", ""
375
+ i = 1
376
+ while i < len(line) and (line[i].isalnum() or line[i] in ('_', '.')):
377
+ i += 1
378
+ ann_name = line[:i]
379
+ while i < len(line) and line[i] in (' ', '\t'):
380
+ i += 1
381
+ if i >= len(line) or line[i] != '(':
382
+ return ann_name, ""
383
+ depth = 0
384
+ in_string = False
385
+ string_char = ''
386
+ start = i + 1
387
+ i += 1
388
+ while i < len(line):
389
+ c = line[i]
390
+ if in_string:
391
+ if c == '\\':
392
+ i += 2
393
+ continue
394
+ if c == string_char:
395
+ in_string = False
396
+ elif c in ('"', "'"):
397
+ in_string = True
398
+ string_char = c
399
+ elif c == '(':
400
+ depth += 1
401
+ elif c == ')':
402
+ if depth == 0:
403
+ return ann_name, line[start:i]
404
+ depth -= 1
405
+ i += 1
406
+ return ann_name, line[start:]
407
+
364
408
  # Edge types used for subsystem grouping — semantic hierarchy only, not imports
365
409
  _SUBSYSTEM_STRUCTURAL_EDGES: frozenset[str] = frozenset({
366
410
  "extends", "implements", "injects", "contained_in",
@@ -410,22 +454,27 @@ _BFS_MAX_DEPTH: int = 3
410
454
  # Regex to strip leading annotations from a single parameter (e.g. @NotNull @Valid String name)
411
455
  _ANN_PREFIX_RE = re.compile(r'^(?:@\w+\s*(?:\([^)]*\))?\s*)+')
412
456
 
457
+ # Used by _count_net_braces fast path: strip string/char literals before counting braces.
458
+ # Handles escape sequences (\\) so escaped quotes don't close the literal prematurely.
459
+ _STRING_LITERAL_RE = re.compile(r'"(?:[^"\\]|\\.)*"|\'(?:[^\'\\]|\\.)*\'')
460
+
461
+ # Module-level cache for class-keyword detection (avoids recompilation per _extract_symbols call)
462
+ _CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
463
+
413
464
 
414
465
  # ---------------------------------------------------------------------------
415
466
  # Stable ID helpers
416
467
  # ---------------------------------------------------------------------------
417
468
 
418
- def _normalize_type_name(raw: str) -> str:
419
- """Strip annotations, final modifier, and param name; return only type.
469
+ _FINAL_STRIP_RE = re.compile(r'\bfinal\s+')
470
+ _TYPE_PARAM_RE = re.compile(r'^([\w<>\[\].,? ]+?)\s+\w+$')
420
471
 
421
- "(Long id)" -> strip after parsing → "Long"
422
- "@NotNull User user" "User"
423
- "List<String>" "List<String>"
424
- """
472
+
473
+ def _normalize_type_name(raw: str) -> str:
474
+ """Strip annotations, final modifier, and param name; return only type."""
425
475
  raw = _ANN_PREFIX_RE.sub("", raw).strip()
426
- raw = re.sub(r'\bfinal\s+', "", raw).strip()
427
- # "Type name" → extract Type (rightmost word is the param name)
428
- m = re.match(r'^([\w<>\[\].,? ]+?)\s+\w+$', raw)
476
+ raw = _FINAL_STRIP_RE.sub("", raw).strip()
477
+ m = _TYPE_PARAM_RE.match(raw)
429
478
  if m:
430
479
  return m.group(1).strip()
431
480
  return raw.strip()
@@ -503,26 +552,15 @@ def _compute_stable_id(
503
552
  # ---------------------------------------------------------------------------
504
553
 
505
554
  def _count_net_braces(line: str) -> int:
506
- depth = 0
507
- in_str = False
508
- in_char = False
509
- i = 0
510
- while i < len(line):
511
- ch = line[i]
512
- if ch == '\\' and (in_str or in_char):
513
- i += 2
514
- continue
515
- if ch == '"' and not in_char:
516
- in_str = not in_str
517
- elif ch == "'" and not in_str:
518
- in_char = not in_char
519
- elif not in_str and not in_char:
520
- if ch == '{':
521
- depth += 1
522
- elif ch == '}':
523
- depth -= 1
524
- i += 1
525
- return depth
555
+ # Fast exit: no braces on this line at all
556
+ if '{' not in line and '}' not in line:
557
+ return 0
558
+ # Fast path: no string/char literals — count directly (C-speed)
559
+ if '"' not in line and "'" not in line:
560
+ return line.count('{') - line.count('}')
561
+ # Slow path: strip string/char literals first so quoted braces don't count
562
+ clean = _STRING_LITERAL_RE.sub('', line)
563
+ return clean.count('{') - clean.count('}')
526
564
 
527
565
 
528
566
  def _extract_modifiers(text: str) -> list[str]:
@@ -591,7 +629,6 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
591
629
  _raw_lines = source.splitlines()
592
630
  _joined: list[str] = []
593
631
  _i = 0
594
- _CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
595
632
  while _i < len(_raw_lines):
596
633
  _line = _raw_lines[_i]
597
634
  _stripped = _line.strip()
@@ -633,10 +670,8 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
633
670
  net = _count_net_braces(stripped)
634
671
 
635
672
  if stripped.startswith("@"):
636
- ann_m = _ANN_WITH_ARGS_RE.match(stripped)
637
- if ann_m:
638
- ann = ann_m.group(1)
639
- ann_args = ann_m.group(2) or ""
673
+ ann, ann_args = _parse_annotation_line(stripped)
674
+ if ann:
640
675
  if ann not in pending_anns:
641
676
  pending_anns.append(ann)
642
677
  if ann_args and ann in _CAPTURE_ANN_ARGS:
@@ -1141,17 +1176,26 @@ def _build_relations(
1141
1176
  evidence={"type": "signature", "value": f"implements {iface}"},
1142
1177
  ))
1143
1178
 
1144
- for m_path, class_fqn in _extract_mapped_paths(source, "").items():
1145
- for sym in symbols:
1146
- if sym.type in ("class", "interface") and (
1147
- "@RestController" in sym.annotations or "@Controller" in sym.annotations
1148
- ):
1179
+ # mapped_to edges: controller class → class-level @RequestMapping path prefix.
1180
+ # O(N) scan of symbols — do NOT call _extract_mapped_paths(source) here because
1181
+ # _REQUEST_MAPPING_RE also matches method-level @GetMapping/@PostMapping, producing
1182
+ # O(N_methods) paths × O(N_syms) inner loop = O(N²) on files with many endpoints.
1183
+ for sym in symbols:
1184
+ if sym.type not in ("class", "interface"):
1185
+ continue
1186
+ if "@RestController" not in sym.annotations and "@Controller" not in sym.annotations:
1187
+ continue
1188
+ if "@RequestMapping" not in sym.annotations:
1189
+ continue
1190
+ _rm_args = sym.annotation_values.get("@RequestMapping", "")
1191
+ for _m_path in _parse_route_paths(_rm_args):
1192
+ if _m_path:
1149
1193
  edges.append(RelationEdge(
1150
1194
  from_symbol=sym.symbol,
1151
- to_symbol=m_path,
1195
+ to_symbol=_m_path,
1152
1196
  type="mapped_to",
1153
1197
  confidence="high",
1154
- evidence={"type": "annotation", "value": f"@RequestMapping(\"{m_path}\")"},
1198
+ evidence={"type": "annotation", "value": f"@RequestMapping(\"{_m_path}\")"},
1155
1199
  ))
1156
1200
 
1157
1201
  # contained_in edges: method/field → enclosing class (structural membership)
@@ -1419,9 +1463,18 @@ def _collect_file_constants(source: str) -> dict[str, str]:
1419
1463
  Returns {simple_name: value} covering all classes in the file.
1420
1464
  Used by _resolve_ann_path_expr to fold constant references in @RequestMapping args.
1421
1465
  """
1466
+ # Fast path: skip entirely when no declarations present (C-speed string scan)
1467
+ if 'static final String' not in source:
1468
+ return {}
1469
+ # Scan only candidate lines (skips full-source regex over 100KB files).
1470
+ # Running _STATIC_FINAL_STR_RE over the whole source is O(source_size) due to
1471
+ # optional modifier group backtracking; per-line match is far cheaper.
1422
1472
  constants: dict[str, str] = {}
1423
- for m in _STATIC_FINAL_STR_RE.finditer(source):
1424
- constants[m.group(1)] = m.group(2)
1473
+ for line in source.splitlines():
1474
+ if 'static' in line and 'final' in line and 'String' in line and '=' in line and '"' in line:
1475
+ m = _STATIC_FINAL_STR_RE.search(line)
1476
+ if m:
1477
+ constants[m.group(1)] = m.group(2)
1425
1478
  return constants
1426
1479
 
1427
1480
 
@@ -2205,11 +2258,19 @@ def _assemble(
2205
2258
 
2206
2259
  all_fqns_set = {s.symbol for s in sorted_syms}
2207
2260
 
2208
- # Bounded BFS reachability per node (graph-only)
2209
- bfs_reach: dict[str, int] = {
2210
- s.symbol: _bfs_reachability(s.symbol, adjacency)
2211
- for s in sorted_syms
2212
- }
2261
+ # Bounded BFS reachability per node (graph-only).
2262
+ # Skipped when symbol count exceeds threshold: O(N*(V+E)) BFS for every symbol
2263
+ # hangs on large repos (keycloak: 80K+ symbols → 180s+ with no output).
2264
+ # bfs_reach contributes only 0.1× weight vs in_deg+out_deg; skipping it on large
2265
+ # repos causes no accuracy loss for spring-audit/endpoints/security analysis.
2266
+ _BFS_SYMBOL_THRESHOLD: int = 5000
2267
+ if len(sorted_syms) <= _BFS_SYMBOL_THRESHOLD:
2268
+ bfs_reach: dict[str, int] = {
2269
+ s.symbol: _bfs_reachability(s.symbol, adjacency)
2270
+ for s in sorted_syms
2271
+ }
2272
+ else:
2273
+ bfs_reach = {}
2213
2274
 
2214
2275
  # Normalize centrality across all nodes
2215
2276
  max_raw = max(
@@ -2829,6 +2890,29 @@ def build_repo_ir(
2829
2890
  # type map before building relations. Java classes in the same package
2830
2891
  # reference each other without import statements, so import_map alone cannot
2831
2892
  # resolve them — _build_same_package_map provides the cross-file fallback.
2893
+ #
2894
+ # Pre-scan filter: skip full symbol extraction for files that have no
2895
+ # Spring/JAX-RS/CDI annotations. These files (utility classes, model beans,
2896
+ # SPI interfaces) contribute no endpoints, transactions, or security findings
2897
+ # to spring-audit. The text scan is C-speed vs O(lines) Python parse loop.
2898
+ # Non-annotated files still register their package+class via a lightweight
2899
+ # regex scan so same-package type resolution remains correct.
2900
+ _ANNOTATION_MARKERS: tuple[str, ...] = (
2901
+ '@Controller', '@RestController', '@Service', '@Repository',
2902
+ '@Component', '@Configuration', '@Bean', '@Transactional',
2903
+ '@Path', '@GET', '@POST', '@PUT', '@DELETE', '@PATCH',
2904
+ '@PreAuthorize', '@RolesAllowed', '@Secured', '@EnableWebSecurity',
2905
+ '@SpringBootApplication', '@EventListener', '@TransactionalEventListener',
2906
+ '@RequiredArgsConstructor', '@AllArgsConstructor',
2907
+ '@Inject', '@ApplicationScoped', '@RequestScoped', '@Singleton',
2908
+ '@EnableMethodSecurity', '@EnableGlobalMethodSecurity',
2909
+ # JPA / persistence (needed for stereotype detection in all commands)
2910
+ '@Entity', '@MappedSuperclass', '@Embeddable',
2911
+ # AOP / messaging / event sourcing
2912
+ '@Aspect', '@Aggregate', '@Document',
2913
+ # Spring Data
2914
+ '@Query', '@NamedQuery',
2915
+ )
2832
2916
  _per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
2833
2917
  for rel_path in sorted(file_paths):
2834
2918
  abs_path = root / rel_path
@@ -2839,6 +2923,23 @@ def build_repo_ir(
2839
2923
  _meta_files_read += 1
2840
2924
  _meta_lines_read += source.count("\n") + (1 if source and not source.endswith("\n") else 0)
2841
2925
  _meta_chars_read += len(source)
2926
+ # Fast pre-scan: if file has no relevant annotations skip full extraction.
2927
+ # Still register package/class name for same-package resolution.
2928
+ if not any(marker in source for marker in _ANNOTATION_MARKERS):
2929
+ pkg_m = _PKG_RE.search(source)
2930
+ _pkg = pkg_m.group(1) if pkg_m else ""
2931
+ # Minimal class-name symbols for same-package map (no methods/fields)
2932
+ _min_syms: list[SymbolRecord] = []
2933
+ for _cm in re.finditer(r'(?:class|interface|enum)\s+(\w+)', source):
2934
+ _cls_name = _cm.group(1)
2935
+ _fqn = f"{_pkg}.{_cls_name}" if _pkg else _cls_name
2936
+ _min_syms.append(SymbolRecord(
2937
+ symbol=_fqn, type="class", confidence="medium",
2938
+ declaring_file=rel_path,
2939
+ ))
2940
+ all_symbols.extend(_min_syms)
2941
+ # No relations needed for non-annotated files
2942
+ continue
2842
2943
  package, symbols, raw_imports = _extract_symbols(source, rel_path)
2843
2944
  all_symbols.extend(symbols)
2844
2945
  _per_file.append((rel_path, source, package, raw_imports, symbols))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.35.28
3
+ Version: 1.35.29
4
4
  Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
5
5
  License-File: LICENSE
6
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
40
40
 
41
41
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
42
42
 
43
- ![Version](https://img.shields.io/badge/version-1.35.28-blue)
43
+ ![Version](https://img.shields.io/badge/version-1.35.29-blue)
44
44
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
45
45
 
46
46
  ---
@@ -114,7 +114,7 @@ pipx install sourcecode
114
114
 
115
115
  ```bash
116
116
  sourcecode version
117
- # sourcecode 1.35.28
117
+ # sourcecode 1.35.29
118
118
 
119
119
  **v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
120
120
  ```
@@ -1,4 +1,4 @@
1
- sourcecode/__init__.py,sha256=6hJmVTMbA3xWMi_K8kDZKhf82Qh1AoNAv1zZtaf2IEg,104
1
+ sourcecode/__init__.py,sha256=7uoEdJsiULS_BcH-jg6p93kBPCNguzP9l_u66BOtiOk,104
2
2
  sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
3
3
  sourcecode/architecture_analyzer.py,sha256=qh749a7ykPtGmQI1MR9y6j8TtL_jBdVYFx9YRsLqOMw,44121
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
@@ -42,7 +42,7 @@ sourcecode/redactor.py,sha256=SB4hwIvg8h-hvcqKcDWaZvA-aSyn-at-BIRwa0tUv5E,3227
42
42
  sourcecode/relevance_scorer.py,sha256=0AgEt4KrV73nioMqBgjhGjtY7L2C7L7cSyKtj3IKcrw,9408
43
43
  sourcecode/rename_refactor.py,sha256=rWCsXoDxJNdsmkUXjPtHphT5CjYOgEPmcc817_8Gu-Y,12538
44
44
  sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
45
- sourcecode/repository_ir.py,sha256=n--piFig_NjiGFRzQF8p2-UkLnspHh-4ZFIhUURg2ik,175044
45
+ sourcecode/repository_ir.py,sha256=vlReshZputMZSmPLUkM6zbnAvygi3aSk0lKHUbW9ijc,180308
46
46
  sourcecode/ris.py,sha256=RcqLVwC-doFcKKViYDkCjZLBqf_wzLES7-F6vHEeWzE,20419
47
47
  sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
48
48
  sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
@@ -96,8 +96,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
96
96
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
97
97
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
98
98
  sourcecode/telemetry/transport.py,sha256=QSslxIwij8YkRWcVvxykODDrkiN_GAAEu3dUP7KIWeE,1651
99
- sourcecode-1.35.28.dist-info/METADATA,sha256=2IgUuaTJL4deTV1f5JN_uZoyxnHCr_pj4nSptbPpnfo,21705
100
- sourcecode-1.35.28.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
101
- sourcecode-1.35.28.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
102
- sourcecode-1.35.28.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
103
- sourcecode-1.35.28.dist-info/RECORD,,
99
+ sourcecode-1.35.29.dist-info/METADATA,sha256=jYYziktPfdsRvuLAdGB_lj_3o27zWYeKypP_pBlZ9gE,21705
100
+ sourcecode-1.35.29.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
101
+ sourcecode-1.35.29.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
102
+ sourcecode-1.35.29.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
103
+ sourcecode-1.35.29.dist-info/RECORD,,