sourcecode 1.35.27__tar.gz → 1.35.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {sourcecode-1.35.27 → sourcecode-1.35.29}/PKG-INFO +5 -3
  2. {sourcecode-1.35.27 → sourcecode-1.35.29}/README.md +4 -2
  3. {sourcecode-1.35.27 → sourcecode-1.35.29}/pyproject.toml +1 -1
  4. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/__init__.py +1 -1
  5. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/cli.py +6 -3
  6. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/explain.py +3 -0
  7. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/rename_refactor.py +58 -6
  8. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/repository_ir.py +208 -55
  9. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/spring_model.py +2 -0
  10. {sourcecode-1.35.27 → sourcecode-1.35.29}/.github/workflows/build-windows.yml +0 -0
  11. {sourcecode-1.35.27 → sourcecode-1.35.29}/.gitignore +0 -0
  12. {sourcecode-1.35.27 → sourcecode-1.35.29}/.ruff.toml +0 -0
  13. {sourcecode-1.35.27 → sourcecode-1.35.29}/CHANGELOG.md +0 -0
  14. {sourcecode-1.35.27 → sourcecode-1.35.29}/CONTRIBUTING.md +0 -0
  15. {sourcecode-1.35.27 → sourcecode-1.35.29}/LICENSE +0 -0
  16. {sourcecode-1.35.27 → sourcecode-1.35.29}/SECURITY.md +0 -0
  17. {sourcecode-1.35.27 → sourcecode-1.35.29}/raw +0 -0
  18. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/adaptive_scanner.py +0 -0
  19. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/architecture_analyzer.py +0 -0
  20. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/architecture_summary.py +0 -0
  21. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/ast_extractor.py +0 -0
  22. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/cache.py +0 -0
  23. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/canonical_ir.py +0 -0
  24. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/cir_graphs.py +0 -0
  25. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/classifier.py +0 -0
  26. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/code_notes_analyzer.py +0 -0
  27. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/confidence_analyzer.py +0 -0
  28. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/context_scorer.py +0 -0
  29. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/context_summarizer.py +0 -0
  30. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/contract_model.py +0 -0
  31. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/contract_pipeline.py +0 -0
  32. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/coverage_parser.py +0 -0
  33. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/dependency_analyzer.py +0 -0
  34. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/__init__.py +0 -0
  35. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/base.py +0 -0
  36. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/csproj_parser.py +0 -0
  37. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/dart.py +0 -0
  38. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/dotnet.py +0 -0
  39. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/elixir.py +0 -0
  40. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/go.py +0 -0
  41. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/heuristic.py +0 -0
  42. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/hybrid.py +0 -0
  43. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/java.py +0 -0
  44. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/jvm_ext.py +0 -0
  45. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/nodejs.py +0 -0
  46. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/parsers.py +0 -0
  47. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/php.py +0 -0
  48. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/project.py +0 -0
  49. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/python.py +0 -0
  50. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/ruby.py +0 -0
  51. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/rust.py +0 -0
  52. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/systems.py +0 -0
  53. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/terraform.py +0 -0
  54. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/detectors/tooling.py +0 -0
  55. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/doc_analyzer.py +0 -0
  56. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/entrypoint_classifier.py +0 -0
  57. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/env_analyzer.py +0 -0
  58. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/error_schema.py +0 -0
  59. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/file_chunker.py +0 -0
  60. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/file_classifier.py +0 -0
  61. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/flow_analyzer.py +0 -0
  62. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/fqn_utils.py +0 -0
  63. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/git_analyzer.py +0 -0
  64. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/graph_analyzer.py +0 -0
  65. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/license.py +0 -0
  66. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/mcp/__init__.py +0 -0
  67. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
  68. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/mcp/onboarding/applier.py +0 -0
  69. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/mcp/onboarding/backup.py +0 -0
  70. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/mcp/onboarding/detector.py +0 -0
  71. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/mcp/onboarding/planner.py +0 -0
  72. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/mcp/orchestrator.py +0 -0
  73. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/mcp/registry.py +0 -0
  74. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/mcp/runner.py +0 -0
  75. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/mcp/server.py +0 -0
  76. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/mcp_nudge.py +0 -0
  77. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/metrics_analyzer.py +0 -0
  78. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/migrate_check.py +0 -0
  79. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/output_budget.py +0 -0
  80. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/path_filters.py +0 -0
  81. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/pr_comment_renderer.py +0 -0
  82. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/pr_impact.py +0 -0
  83. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/prepare_context.py +0 -0
  84. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/progress.py +0 -0
  85. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/ranking_engine.py +0 -0
  86. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/redactor.py +0 -0
  87. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/relevance_scorer.py +0 -0
  88. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/repo_classifier.py +0 -0
  89. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/ris.py +0 -0
  90. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/runtime_classifier.py +0 -0
  91. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/scanner.py +0 -0
  92. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/schema.py +0 -0
  93. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/semantic_analyzer.py +0 -0
  94. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/serializer.py +0 -0
  95. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/spring_event_topology.py +0 -0
  96. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/spring_findings.py +0 -0
  97. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/spring_impact.py +0 -0
  98. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/spring_security_audit.py +0 -0
  99. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/spring_semantic.py +0 -0
  100. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/spring_tx_analyzer.py +0 -0
  101. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/summarizer.py +0 -0
  102. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/telemetry/__init__.py +0 -0
  103. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/telemetry/config.py +0 -0
  104. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/telemetry/consent.py +0 -0
  105. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/telemetry/events.py +0 -0
  106. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/telemetry/filters.py +0 -0
  107. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/telemetry/transport.py +0 -0
  108. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/tree_utils.py +0 -0
  109. {sourcecode-1.35.27 → sourcecode-1.35.29}/src/sourcecode/workspace.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.35.27
3
+ Version: 1.35.29
4
4
  Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
5
5
  License-File: LICENSE
6
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
40
40
 
41
41
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
42
42
 
43
- ![Version](https://img.shields.io/badge/version-1.35.27-blue)
43
+ ![Version](https://img.shields.io/badge/version-1.35.29-blue)
44
44
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
45
45
 
46
46
  ---
@@ -114,7 +114,9 @@ pipx install sourcecode
114
114
 
115
115
  ```bash
116
116
  sourcecode version
117
- # sourcecode 1.35.27
117
+ # sourcecode 1.35.29
118
+
119
+ **v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
118
120
  ```
119
121
 
120
122
  ---
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
4
4
 
5
- ![Version](https://img.shields.io/badge/version-1.35.27-blue)
5
+ ![Version](https://img.shields.io/badge/version-1.35.29-blue)
6
6
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
7
7
 
8
8
  ---
@@ -76,7 +76,9 @@ pipx install sourcecode
76
76
 
77
77
  ```bash
78
78
  sourcecode version
79
- # sourcecode 1.35.27
79
+ # sourcecode 1.35.29
80
+
81
+ **v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
80
82
  ```
81
83
 
82
84
  ---
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "1.35.27"
7
+ version = "1.35.29"
8
8
  description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.35.27"
3
+ __version__ = "1.35.29"
@@ -5397,10 +5397,13 @@ def cold_start_cmd(
5397
5397
  result = _gcs(target)
5398
5398
  if compact:
5399
5399
  # P1-C: cap at ~10K tokens — keep only fields essential for orientation.
5400
- _cs_keys = {"status", "git_head", "stacks", "entry_points",
5401
- "key_dependencies", "project_type", "project_summary",
5402
- "validation", "_meta"}
5400
+ # BUG-6 fix: use actual RIS key names (summary/entrypoints, not stacks/entry_points)
5401
+ _cs_keys = {"status", "git_head", "summary", "entrypoints", "endpoints",
5402
+ "project_type", "validation", "_meta"}
5403
5403
  result = {k: v for k, v in result.items() if k in _cs_keys}
5404
+ # Truncate endpoints to first 30 to stay within ~10K token budget
5405
+ if isinstance(result.get("endpoints"), list):
5406
+ result["endpoints"] = result["endpoints"][:30]
5404
5407
  result["_meta"] = {**(result.get("_meta") or {}), "compact_mode": True,
5405
5408
  "full_available": "sourcecode cold-start (without --compact)"}
5406
5409
  _out = _json.dumps(result, indent=2, ensure_ascii=False)
@@ -28,6 +28,9 @@ _STEREOTYPE_DESC: dict[str, str] = {
28
28
  "component": "Spring @Component — general-purpose bean",
29
29
  "configuration": "Spring @Configuration — bean factory / config",
30
30
  "bean": "Spring @Bean — managed component",
31
+ "entity": "JPA @Entity — persistent domain object mapped to a database table",
32
+ "mappedsuperclass": "JPA @MappedSuperclass — base class sharing persistent state with subclasses",
33
+ "embeddable": "JPA @Embeddable — value object embedded in owning entity table",
31
34
  }
32
35
 
33
36
  _SECURITY_ANNOTATION_PREFIXES = (
@@ -106,7 +106,11 @@ def _collect_java_files(root: Path, *, include_tests: bool = True) -> list[Path]
106
106
  if any(part in _VENDOR_DIRS for part in parts[:-1]):
107
107
  continue
108
108
  if not include_tests:
109
- if "/test/" in rel or "/tests/" in rel or rel.startswith("test/"):
109
+ if (
110
+ "/src/test/" in rel or rel.startswith("src/test/")
111
+ or "/src/tests/" in rel or rel.startswith("src/tests/")
112
+ or rel.startswith("test/") or rel.startswith("tests/")
113
+ ):
110
114
  continue
111
115
  results.append(p)
112
116
  return results
@@ -150,10 +154,42 @@ def _find_class_file(
150
154
 
151
155
  def _apply_rename(source: str, old_name: str, new_name: str) -> str:
152
156
  """Apply word-boundary replacement for class name (PascalCase and camelCase forms)."""
153
- # PascalCase replacement: all type references, declarations, imports
154
157
  result = re.sub(r'\b' + re.escape(old_name) + r'\b', new_name, source)
155
158
 
156
- # camelCase instance names: serviceA → serviceB (only when different from PascalCase)
159
+ old_camel = _to_camel(old_name)
160
+ new_camel = _to_camel(new_name)
161
+ if old_camel != old_name and old_camel in result:
162
+ result = re.sub(r'\b' + re.escape(old_camel) + r'\b', new_camel, result)
163
+
164
+ return result
165
+
166
+
167
+ # Matches a class/interface/enum/record declaration of a given name
168
+ _CLASS_DECL_RE_TMPL = r'\b(?:class|interface|enum|record)\s+{name}\b'
169
+ # Matches a constructor declaration: optional access modifier + ClassName + (
170
+ _CTOR_DECL_RE_TMPL = r'^\s*(?:(?:public|protected|private)\s+)?' + r'{name}\s*\('
171
+
172
+
173
+ def _apply_rename_refs_only(source: str, old_name: str, new_name: str) -> str:
174
+ """Rename old_name→new_name in a non-source file (import/type references only).
175
+
176
+ Skips lines containing a class/interface/enum/record declaration or constructor
177
+ declaration of old_name, so that a class sharing the simple name in another
178
+ package is not corrupted.
179
+ """
180
+ class_decl_re = re.compile(_CLASS_DECL_RE_TMPL.format(name=re.escape(old_name)))
181
+ ctor_decl_re = re.compile(_CTOR_DECL_RE_TMPL.format(name=re.escape(old_name)))
182
+ ref_re = re.compile(r'\b' + re.escape(old_name) + r'\b')
183
+
184
+ lines = source.splitlines(keepends=True)
185
+ result_lines = []
186
+ for line in lines:
187
+ if class_decl_re.search(line) or ctor_decl_re.search(line):
188
+ result_lines.append(line)
189
+ else:
190
+ result_lines.append(ref_re.sub(new_name, line))
191
+ result = ''.join(result_lines)
192
+
157
193
  old_camel = _to_camel(old_name)
158
194
  new_camel = _to_camel(new_name)
159
195
  if old_camel != old_name and old_camel in result:
@@ -245,6 +281,19 @@ def rename_class(
245
281
  result.old_file = str(source_file.relative_to(root)).replace("\\", "/")
246
282
  result.new_file = str(new_file_path.relative_to(root)).replace("\\", "/")
247
283
 
284
+ # BUG-2: check for collision anywhere in the repo, not just same directory
285
+ collision = next(
286
+ (f for f in java_files if f.stem == new_name and f.resolve() != new_file_path.resolve()),
287
+ None,
288
+ )
289
+ if collision is not None:
290
+ collision_rel = str(collision.relative_to(root)).replace("\\", "/")
291
+ result.errors.append(
292
+ f"'{new_name}' already exists at '{collision_rel}' — "
293
+ f"rename would create a duplicate class name. Pass --force to override."
294
+ )
295
+ return result
296
+
248
297
  if new_file_path.exists() and new_file_path != source_file:
249
298
  result.errors.append(
250
299
  f"Target file '{result.new_file}' already exists — aborting to avoid overwrite."
@@ -260,15 +309,18 @@ def rename_class(
260
309
  result.errors.append(f"Could not read '{java_file}': {e}")
261
310
  continue
262
311
 
263
- new_text = _apply_rename(old_text, old_name, new_name)
312
+ is_source = java_file == source_file
313
+ if is_source:
314
+ new_text = _apply_rename(old_text, old_name, new_name)
315
+ else:
316
+ # BUG-4: use refs-only variant to avoid clobbering same-named class in other package
317
+ new_text = _apply_rename_refs_only(old_text, old_name, new_name)
264
318
  if new_text == old_text:
265
319
  continue
266
320
 
267
321
  rel_path = str(java_file.relative_to(root)).replace("\\", "/")
268
322
  diff = _make_diff(old_text, new_text, rel_path)
269
323
 
270
- # Determine intent
271
- is_source = java_file == source_file
272
324
  if is_source:
273
325
  intent = f"Renamed class declaration: {old_name} → {new_name}"
274
326
  else:
@@ -202,8 +202,9 @@ _SECURITY_MARKER_ANNOTATIONS: frozenset[str] = frozenset({
202
202
  # is expected and does NOT mean endpoints are unprotected.
203
203
  _FILTER_SECURITY_ANNOTATIONS: frozenset[str] = frozenset({
204
204
  "@EnableWebSecurity",
205
- "@EnableMethodSecurity",
206
- "@EnableGlobalMethodSecurity",
205
+ # @EnableMethodSecurity / @EnableGlobalMethodSecurity enable per-method annotation
206
+ # security (@PreAuthorize/@Secured), NOT a filter chain — must NOT be treated as
207
+ # filter_based or SEC-001 is suppressed for every unannotated endpoint.
207
208
  })
208
209
 
209
210
  # Programmatic security: method-call patterns that indicate runtime auth enforcement.
@@ -360,6 +361,50 @@ def _strip_java_comments(source: str) -> str:
360
361
  source = _LINE_COMMENT_RE.sub(' ', source)
361
362
  return source
362
363
 
364
+
365
+ def _parse_annotation_line(line: str) -> tuple[str, str]:
366
+ """Parse annotation name and args from a line starting with '@'.
367
+
368
+ Returns (ann_name, ann_args) where ann_args is content inside the outermost ().
369
+ Uses O(n) character scanning instead of regex to avoid catastrophic backtracking
370
+ on lines with deeply nested annotation arguments (e.g. @APIResponse with @Content
371
+ containing @Schema — 3-level nesting that breaks _ANN_WITH_ARGS_RE).
372
+ """
373
+ if not line.startswith('@'):
374
+ return "", ""
375
+ i = 1
376
+ while i < len(line) and (line[i].isalnum() or line[i] in ('_', '.')):
377
+ i += 1
378
+ ann_name = line[:i]
379
+ while i < len(line) and line[i] in (' ', '\t'):
380
+ i += 1
381
+ if i >= len(line) or line[i] != '(':
382
+ return ann_name, ""
383
+ depth = 0
384
+ in_string = False
385
+ string_char = ''
386
+ start = i + 1
387
+ i += 1
388
+ while i < len(line):
389
+ c = line[i]
390
+ if in_string:
391
+ if c == '\\':
392
+ i += 2
393
+ continue
394
+ if c == string_char:
395
+ in_string = False
396
+ elif c in ('"', "'"):
397
+ in_string = True
398
+ string_char = c
399
+ elif c == '(':
400
+ depth += 1
401
+ elif c == ')':
402
+ if depth == 0:
403
+ return ann_name, line[start:i]
404
+ depth -= 1
405
+ i += 1
406
+ return ann_name, line[start:]
407
+
363
408
  # Edge types used for subsystem grouping — semantic hierarchy only, not imports
364
409
  _SUBSYSTEM_STRUCTURAL_EDGES: frozenset[str] = frozenset({
365
410
  "extends", "implements", "injects", "contained_in",
@@ -409,22 +454,27 @@ _BFS_MAX_DEPTH: int = 3
409
454
  # Regex to strip leading annotations from a single parameter (e.g. @NotNull @Valid String name)
410
455
  _ANN_PREFIX_RE = re.compile(r'^(?:@\w+\s*(?:\([^)]*\))?\s*)+')
411
456
 
457
+ # Used by _count_net_braces fast path: strip string/char literals before counting braces.
458
+ # Handles escape sequences (\\) so escaped quotes don't close the literal prematurely.
459
+ _STRING_LITERAL_RE = re.compile(r'"(?:[^"\\]|\\.)*"|\'(?:[^\'\\]|\\.)*\'')
460
+
461
+ # Module-level cache for class-keyword detection (avoids recompilation per _extract_symbols call)
462
+ _CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
463
+
412
464
 
413
465
  # ---------------------------------------------------------------------------
414
466
  # Stable ID helpers
415
467
  # ---------------------------------------------------------------------------
416
468
 
417
- def _normalize_type_name(raw: str) -> str:
418
- """Strip annotations, final modifier, and param name; return only type.
469
+ _FINAL_STRIP_RE = re.compile(r'\bfinal\s+')
470
+ _TYPE_PARAM_RE = re.compile(r'^([\w<>\[\].,? ]+?)\s+\w+$')
419
471
 
420
- "(Long id)" -> strip after parsing → "Long"
421
- "@NotNull User user" "User"
422
- "List<String>" "List<String>"
423
- """
472
+
473
+ def _normalize_type_name(raw: str) -> str:
474
+ """Strip annotations, final modifier, and param name; return only type."""
424
475
  raw = _ANN_PREFIX_RE.sub("", raw).strip()
425
- raw = re.sub(r'\bfinal\s+', "", raw).strip()
426
- # "Type name" → extract Type (rightmost word is the param name)
427
- m = re.match(r'^([\w<>\[\].,? ]+?)\s+\w+$', raw)
476
+ raw = _FINAL_STRIP_RE.sub("", raw).strip()
477
+ m = _TYPE_PARAM_RE.match(raw)
428
478
  if m:
429
479
  return m.group(1).strip()
430
480
  return raw.strip()
@@ -502,26 +552,15 @@ def _compute_stable_id(
502
552
  # ---------------------------------------------------------------------------
503
553
 
504
554
  def _count_net_braces(line: str) -> int:
505
- depth = 0
506
- in_str = False
507
- in_char = False
508
- i = 0
509
- while i < len(line):
510
- ch = line[i]
511
- if ch == '\\' and (in_str or in_char):
512
- i += 2
513
- continue
514
- if ch == '"' and not in_char:
515
- in_str = not in_str
516
- elif ch == "'" and not in_str:
517
- in_char = not in_char
518
- elif not in_str and not in_char:
519
- if ch == '{':
520
- depth += 1
521
- elif ch == '}':
522
- depth -= 1
523
- i += 1
524
- return depth
555
+ # Fast exit: no braces on this line at all
556
+ if '{' not in line and '}' not in line:
557
+ return 0
558
+ # Fast path: no string/char literals — count directly (C-speed)
559
+ if '"' not in line and "'" not in line:
560
+ return line.count('{') - line.count('}')
561
+ # Slow path: strip string/char literals first so quoted braces don't count
562
+ clean = _STRING_LITERAL_RE.sub('', line)
563
+ return clean.count('{') - clean.count('}')
525
564
 
526
565
 
527
566
  def _extract_modifiers(text: str) -> list[str]:
@@ -590,7 +629,6 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
590
629
  _raw_lines = source.splitlines()
591
630
  _joined: list[str] = []
592
631
  _i = 0
593
- _CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
594
632
  while _i < len(_raw_lines):
595
633
  _line = _raw_lines[_i]
596
634
  _stripped = _line.strip()
@@ -632,10 +670,8 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
632
670
  net = _count_net_braces(stripped)
633
671
 
634
672
  if stripped.startswith("@"):
635
- ann_m = _ANN_WITH_ARGS_RE.match(stripped)
636
- if ann_m:
637
- ann = ann_m.group(1)
638
- ann_args = ann_m.group(2) or ""
673
+ ann, ann_args = _parse_annotation_line(stripped)
674
+ if ann:
639
675
  if ann not in pending_anns:
640
676
  pending_anns.append(ann)
641
677
  if ann_args and ann in _CAPTURE_ANN_ARGS:
@@ -1140,17 +1176,26 @@ def _build_relations(
1140
1176
  evidence={"type": "signature", "value": f"implements {iface}"},
1141
1177
  ))
1142
1178
 
1143
- for m_path, class_fqn in _extract_mapped_paths(source, "").items():
1144
- for sym in symbols:
1145
- if sym.type in ("class", "interface") and (
1146
- "@RestController" in sym.annotations or "@Controller" in sym.annotations
1147
- ):
1179
+ # mapped_to edges: controller class → class-level @RequestMapping path prefix.
1180
+ # O(N) scan of symbols — do NOT call _extract_mapped_paths(source) here because
1181
+ # _REQUEST_MAPPING_RE also matches method-level @GetMapping/@PostMapping, producing
1182
+ # O(N_methods) paths × O(N_syms) inner loop = O(N²) on files with many endpoints.
1183
+ for sym in symbols:
1184
+ if sym.type not in ("class", "interface"):
1185
+ continue
1186
+ if "@RestController" not in sym.annotations and "@Controller" not in sym.annotations:
1187
+ continue
1188
+ if "@RequestMapping" not in sym.annotations:
1189
+ continue
1190
+ _rm_args = sym.annotation_values.get("@RequestMapping", "")
1191
+ for _m_path in _parse_route_paths(_rm_args):
1192
+ if _m_path:
1148
1193
  edges.append(RelationEdge(
1149
1194
  from_symbol=sym.symbol,
1150
- to_symbol=m_path,
1195
+ to_symbol=_m_path,
1151
1196
  type="mapped_to",
1152
1197
  confidence="high",
1153
- evidence={"type": "annotation", "value": f"@RequestMapping(\"{m_path}\")"},
1198
+ evidence={"type": "annotation", "value": f"@RequestMapping(\"{_m_path}\")"},
1154
1199
  ))
1155
1200
 
1156
1201
  # contained_in edges: method/field → enclosing class (structural membership)
@@ -1418,9 +1463,18 @@ def _collect_file_constants(source: str) -> dict[str, str]:
1418
1463
  Returns {simple_name: value} covering all classes in the file.
1419
1464
  Used by _resolve_ann_path_expr to fold constant references in @RequestMapping args.
1420
1465
  """
1466
+ # Fast path: skip entirely when no declarations present (C-speed string scan)
1467
+ if 'static final String' not in source:
1468
+ return {}
1469
+ # Scan only candidate lines (skips full-source regex over 100KB files).
1470
+ # Running _STATIC_FINAL_STR_RE over the whole source is O(source_size) due to
1471
+ # optional modifier group backtracking; per-line match is far cheaper.
1421
1472
  constants: dict[str, str] = {}
1422
- for m in _STATIC_FINAL_STR_RE.finditer(source):
1423
- constants[m.group(1)] = m.group(2)
1473
+ for line in source.splitlines():
1474
+ if 'static' in line and 'final' in line and 'String' in line and '=' in line and '"' in line:
1475
+ m = _STATIC_FINAL_STR_RE.search(line)
1476
+ if m:
1477
+ constants[m.group(1)] = m.group(2)
1424
1478
  return constants
1425
1479
 
1426
1480
 
@@ -2204,11 +2258,19 @@ def _assemble(
2204
2258
 
2205
2259
  all_fqns_set = {s.symbol for s in sorted_syms}
2206
2260
 
2207
- # Bounded BFS reachability per node (graph-only)
2208
- bfs_reach: dict[str, int] = {
2209
- s.symbol: _bfs_reachability(s.symbol, adjacency)
2210
- for s in sorted_syms
2211
- }
2261
+ # Bounded BFS reachability per node (graph-only).
2262
+ # Skipped when symbol count exceeds threshold: O(N*(V+E)) BFS for every symbol
2263
+ # hangs on large repos (keycloak: 80K+ symbols → 180s+ with no output).
2264
+ # bfs_reach contributes only 0.1× weight vs in_deg+out_deg; skipping it on large
2265
+ # repos causes no accuracy loss for spring-audit/endpoints/security analysis.
2266
+ _BFS_SYMBOL_THRESHOLD: int = 5000
2267
+ if len(sorted_syms) <= _BFS_SYMBOL_THRESHOLD:
2268
+ bfs_reach: dict[str, int] = {
2269
+ s.symbol: _bfs_reachability(s.symbol, adjacency)
2270
+ for s in sorted_syms
2271
+ }
2272
+ else:
2273
+ bfs_reach = {}
2212
2274
 
2213
2275
  # Normalize centrality across all nodes
2214
2276
  max_raw = max(
@@ -2828,6 +2890,29 @@ def build_repo_ir(
2828
2890
  # type map before building relations. Java classes in the same package
2829
2891
  # reference each other without import statements, so import_map alone cannot
2830
2892
  # resolve them — _build_same_package_map provides the cross-file fallback.
2893
+ #
2894
+ # Pre-scan filter: skip full symbol extraction for files that have no
2895
+ # Spring/JAX-RS/CDI annotations. These files (utility classes, model beans,
2896
+ # SPI interfaces) contribute no endpoints, transactions, or security findings
2897
+ # to spring-audit. The text scan is C-speed vs O(lines) Python parse loop.
2898
+ # Non-annotated files still register their package+class via a lightweight
2899
+ # regex scan so same-package type resolution remains correct.
2900
+ _ANNOTATION_MARKERS: tuple[str, ...] = (
2901
+ '@Controller', '@RestController', '@Service', '@Repository',
2902
+ '@Component', '@Configuration', '@Bean', '@Transactional',
2903
+ '@Path', '@GET', '@POST', '@PUT', '@DELETE', '@PATCH',
2904
+ '@PreAuthorize', '@RolesAllowed', '@Secured', '@EnableWebSecurity',
2905
+ '@SpringBootApplication', '@EventListener', '@TransactionalEventListener',
2906
+ '@RequiredArgsConstructor', '@AllArgsConstructor',
2907
+ '@Inject', '@ApplicationScoped', '@RequestScoped', '@Singleton',
2908
+ '@EnableMethodSecurity', '@EnableGlobalMethodSecurity',
2909
+ # JPA / persistence (needed for stereotype detection in all commands)
2910
+ '@Entity', '@MappedSuperclass', '@Embeddable',
2911
+ # AOP / messaging / event sourcing
2912
+ '@Aspect', '@Aggregate', '@Document',
2913
+ # Spring Data
2914
+ '@Query', '@NamedQuery',
2915
+ )
2831
2916
  _per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
2832
2917
  for rel_path in sorted(file_paths):
2833
2918
  abs_path = root / rel_path
@@ -2838,6 +2923,23 @@ def build_repo_ir(
2838
2923
  _meta_files_read += 1
2839
2924
  _meta_lines_read += source.count("\n") + (1 if source and not source.endswith("\n") else 0)
2840
2925
  _meta_chars_read += len(source)
2926
+ # Fast pre-scan: if file has no relevant annotations skip full extraction.
2927
+ # Still register package/class name for same-package resolution.
2928
+ if not any(marker in source for marker in _ANNOTATION_MARKERS):
2929
+ pkg_m = _PKG_RE.search(source)
2930
+ _pkg = pkg_m.group(1) if pkg_m else ""
2931
+ # Minimal class-name symbols for same-package map (no methods/fields)
2932
+ _min_syms: list[SymbolRecord] = []
2933
+ for _cm in re.finditer(r'(?:class|interface|enum)\s+(\w+)', source):
2934
+ _cls_name = _cm.group(1)
2935
+ _fqn = f"{_pkg}.{_cls_name}" if _pkg else _cls_name
2936
+ _min_syms.append(SymbolRecord(
2937
+ symbol=_fqn, type="class", confidence="medium",
2938
+ declaring_file=rel_path,
2939
+ ))
2940
+ all_symbols.extend(_min_syms)
2941
+ # No relations needed for non-annotated files
2942
+ continue
2841
2943
  package, symbols, raw_imports = _extract_symbols(source, rel_path)
2842
2944
  all_symbols.extend(symbols)
2843
2945
  _per_file.append((rel_path, source, package, raw_imports, symbols))
@@ -2893,6 +2995,48 @@ def build_repo_ir(
2893
2995
  )
2894
2996
  ir = _assemble(all_symbols, unique_relations, all_changed, spring_summary, route_diffs_arg)
2895
2997
 
2998
+ # BUG-7: XML Spring Security detection for the canonical CIR pipeline.
2999
+ # _assemble only sees Java symbols — XML config is invisible to it.
3000
+ # Scan here (where root is available) and retag route_surface entries so
3001
+ # build_canonical_ir produces correct CanonicalEndpoint.security values.
3002
+ _xml_sec_re = re.compile(
3003
+ r'(?:xmlns(?::[a-z]+)?="http://www\.springframework\.org/schema/security"'
3004
+ r'|<security:http\b'
3005
+ r'|<http\s[^>]*use-expressions'
3006
+ r'|spring-security-[2345]'
3007
+ r'|xmlns:security="http://www\.springframework\.org/schema/security")',
3008
+ re.IGNORECASE,
3009
+ )
3010
+ _xml_sec_detected = False
3011
+ for _xml_glob in (
3012
+ "*security*.xml", "*Security*.xml",
3013
+ "*applicationContext*.xml", "*-context.xml", "*Context.xml",
3014
+ "*spring*.xml", "*Spring*.xml",
3015
+ ):
3016
+ for _xf in root.rglob(_xml_glob):
3017
+ if "target/" in str(_xf).replace("\\", "/"):
3018
+ continue
3019
+ try:
3020
+ _xt = _xf.read_text(encoding="utf-8", errors="replace")
3021
+ except OSError:
3022
+ continue
3023
+ if _xml_sec_re.search(_xt):
3024
+ _xml_sec_detected = True
3025
+ break
3026
+ if _xml_sec_detected:
3027
+ break
3028
+ if _xml_sec_detected:
3029
+ _sec_model = ir.get("security_model", "unknown")
3030
+ if _sec_model == "unknown":
3031
+ ir["security_model"] = "xml_or_filter_chain"
3032
+ elif _sec_model in ("annotation_based", "mixed"):
3033
+ ir["security_model"] = "mixed"
3034
+ # Retag route_surface entries that have no security (would become none_detected in CIR)
3035
+ for _r in ir.get("route_surface") or []:
3036
+ _r_sec = _r.get("security_annotations")
3037
+ if _r_sec is None or (isinstance(_r_sec, dict) and _r_sec.get("policy") == "none_detected"):
3038
+ _r["security_annotations"] = {"policy": "xml_or_filter_chain"}
3039
+
2896
3040
  # L-6: inject analysis_meta — files_read, lines_read, symbols_analyzed, token_estimate
2897
3041
  ir["analysis_meta"] = {
2898
3042
  "files_read": _meta_files_read,
@@ -3358,13 +3502,18 @@ def extract_java_endpoints(root: Path) -> "dict[str, Any]":
3358
3502
  if _xml_security_detected:
3359
3503
  break
3360
3504
 
3361
- if _xml_security_detected and security_model == "unknown":
3362
- security_model = "xml_or_filter_chain"
3363
- # Re-tag per-endpoint none_detected xml_or_filter_chain so the output
3364
- # cannot be misread as "endpoint is unprotected".
3505
+ if _xml_security_detected:
3506
+ # Re-tag per-endpoint none_detected → xml_or_filter_chain regardless of security_model.
3507
+ # BUG-7 fix: previously only ran when model == "unknown", causing false-positive SEC-001
3508
+ # when annotation security (@PreAuthorize) coexisted with XML security config.
3365
3509
  for ep in endpoints:
3366
3510
  if ep.get("security", {}).get("policy") == "none_detected":
3367
3511
  ep["security"] = {"policy": "xml_or_filter_chain"}
3512
+ if security_model == "unknown":
3513
+ security_model = "xml_or_filter_chain"
3514
+ elif security_model in ("annotation_based", "mixed"):
3515
+ security_model = "mixed"
3516
+ # filter_based stays filter_based — XML + filter chain is still filter_based
3368
3517
  # Recompute no_security_signal (now counts only truly unknown endpoints)
3369
3518
  no_security_signal = sum(
3370
3519
  1 for e in endpoints
@@ -3395,7 +3544,11 @@ def find_java_files(root: Path, *, max_files: int = 8000, limitations: list[str]
3395
3544
  continue
3396
3545
  parts = rel.split("/")
3397
3546
  # Skip test dirs
3398
- if "/test/" in rel or "/tests/" in rel or rel.startswith("test/"):
3547
+ if (
3548
+ "/src/test/" in rel or rel.startswith("src/test/")
3549
+ or "/src/tests/" in rel or rel.startswith("src/tests/")
3550
+ or rel.startswith("test/") or rel.startswith("tests/")
3551
+ ):
3399
3552
  continue
3400
3553
  # Skip vendor/generated/build dirs
3401
3554
  if any(part in _VENDOR_DIRS for part in parts[:-1]):
@@ -41,6 +41,8 @@ _CALL_SKIP: frozenset[str] = frozenset({"annotated_with", "mapped_to", "containe
41
41
  _BEAN_ANNOTATIONS: frozenset[str] = frozenset({
42
42
  "@Component", "@Service", "@Repository",
43
43
  "@Controller", "@RestController", "@Configuration", "@Bean",
44
+ # JPA persistence annotations — not Spring beans but need stereotype recognition in explain
45
+ "@Entity", "@MappedSuperclass", "@Embeddable",
44
46
  })
45
47
 
46
48
  _GENERIC_PARAM_RE = re.compile(r"<[A-Z][\w,\s<>?]*>")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes