sourcecode 1.31.23__py3-none-any.whl → 1.31.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.31.23"
3
+ __version__ = "1.31.24"
@@ -280,7 +280,7 @@ class ArchitectureAnalyzer:
280
280
  })
281
281
 
282
282
  # Step 4: bounded context inference
283
- bounded_contexts = self._infer_bounded_contexts(domains, graph)
283
+ bounded_contexts = self._infer_bounded_contexts(domains, graph, sm.file_paths)
284
284
 
285
285
  # Overall confidence — based on domain quality, not raw count
286
286
  confidence: Literal["high", "medium", "low"]
@@ -703,11 +703,78 @@ class ArchitectureAnalyzer:
703
703
  ]
704
704
  return result[:16]
705
705
 
706
+ @staticmethod
707
+ def _maven_module_bounded_contexts(file_paths: list[str]) -> list[BoundedContext]:
708
+ """Priority 0: extract bounded contexts from Maven module directory names.
709
+
710
+ Maven multi-module projects have structure: <module>/src/main/java/...
711
+ The module directory name is a strong bounded context signal
712
+ (e.g. broadleaf-order, keycloak-services → order, services).
713
+ Strips common project-name prefixes (longest common prefix across modules).
714
+ Returns empty list when fewer than 2 distinct modules are found.
715
+ """
716
+ import re as _re
717
+ _MAVEN_SRC = "src/main/java/"
718
+ _MAVEN_TEST = "src/test/java/"
719
+ module_names: dict[str, list[str]] = {} # module_name → [files]
720
+ for p in file_paths:
721
+ norm = p.replace("\\", "/")
722
+ for marker in (_MAVEN_SRC, _MAVEN_TEST):
723
+ idx = norm.find(marker)
724
+ if idx > 0:
725
+ # Everything before the marker is the module path
726
+ module_path = norm[:idx].rstrip("/")
727
+ # Take the last path segment as module name
728
+ module_seg = module_path.split("/")[-1] if "/" in module_path else module_path
729
+ if module_seg:
730
+ module_names.setdefault(module_seg, []).append(p)
731
+ break
732
+
733
+ if len(module_names) < 2:
734
+ return []
735
+
736
+ # Strip common project-name prefix (e.g. "keycloak-", "broadleaf-")
737
+ # by finding longest common prefix across all module names
738
+ all_names = sorted(module_names)
739
+ common = ""
740
+ for i, ch in enumerate(all_names[0]):
741
+ if all(n[i:i+1] == ch for n in all_names[1:]):
742
+ common += ch
743
+ else:
744
+ break
745
+ # Only strip prefix up to last '-' (avoid stripping into meaningful segment)
746
+ prefix_to_strip = common[:common.rfind("-") + 1] if "-" in common else ""
747
+
748
+ _GENERIC_EXTENDED = _GENERIC_NAMES | {
749
+ "api", "impl", "base", "test", "tests", "main", "java",
750
+ "integration", "parent", "bom", "platform",
751
+ }
752
+ bc_list: list[BoundedContext] = []
753
+ for raw_name, files in sorted(module_names.items()):
754
+ clean = raw_name[len(prefix_to_strip):] if prefix_to_strip else raw_name
755
+ # Remove trailing -api, -impl, -core suffixes
756
+ clean = _re.sub(r"-(api|impl|core|base|common|parent|test)$", "", clean)
757
+ if not clean or clean in _GENERIC_EXTENDED:
758
+ continue
759
+ bc_list.append(BoundedContext(
760
+ name=clean,
761
+ modules=files[:20], # cap file list
762
+ confidence="high",
763
+ ))
764
+ return bc_list
765
+
706
766
  def _infer_bounded_contexts(
707
767
  self,
708
768
  domains: list[ArchitectureDomain],
709
769
  graph: Optional[ModuleGraph],
770
+ file_paths: list[str] | None = None,
710
771
  ) -> list[BoundedContext]:
772
+ # Priority 0: Maven module names — strong bounded context signal for Java projects
773
+ if file_paths:
774
+ maven_bcs = self._maven_module_bounded_contexts(file_paths)
775
+ if maven_bcs:
776
+ return maven_bcs
777
+
711
778
  # Priority 1: use graph SCCs when available
712
779
  if graph is not None:
713
780
  sccs = self._find_sccs(graph)
sourcecode/cli.py CHANGED
@@ -1960,7 +1960,11 @@ def _serialize_relevant_file(f: Any) -> dict:
1960
1960
  d = {k: v for k, v in _asdict(f).items() if v != "" and v is not None}
1961
1961
  reason = d.pop("reason", "") or ""
1962
1962
  why = d.pop("why", "") or ""
1963
- d.pop("score", None) # score removed from public output (internal ranking only)
1963
+ # Expose score as a rounded float so agents can rank/filter files deterministically.
1964
+ # Kept as "score" (0.0–1.0 normalized relevance) — higher = more relevant.
1965
+ raw_score = d.pop("score", None)
1966
+ if raw_score is not None:
1967
+ d["score"] = round(float(raw_score), 4)
1964
1968
  explanation = _make_explanation(reason, why)
1965
1969
  if explanation:
1966
1970
  d["explanation"] = explanation
@@ -2147,6 +2151,26 @@ def prepare_context_cmd(
2147
2151
  )
2148
2152
  raise typer.Exit(code=1)
2149
2153
 
2154
+ # Validate --format: only "json" and "github-comment" are valid for prepare-context.
2155
+ # "yaml" is intentionally NOT supported here (use main command for yaml output).
2156
+ # Invalid values must error loudly — silently falling through to JSON is a lie.
2157
+ _PC_FORMAT_CHOICES = ("json", "github-comment")
2158
+ if format is not None and format not in _PC_FORMAT_CHOICES:
2159
+ typer.echo(
2160
+ f"Error: invalid value '{format}' for --format. "
2161
+ f"Valid options: {', '.join(_PC_FORMAT_CHOICES)}.",
2162
+ err=True,
2163
+ )
2164
+ raise typer.Exit(code=2)
2165
+ # github-comment only renders for review-pr; warn and normalize for other tasks.
2166
+ if format == "github-comment" and task != "review-pr":
2167
+ typer.echo(
2168
+ f"[warning] --format github-comment is only supported for the review-pr task. "
2169
+ f"Outputting JSON for '{task}'.",
2170
+ err=True,
2171
+ )
2172
+ format = "json"
2173
+
2150
2174
  target = path.resolve()
2151
2175
  if not target.exists() or not target.is_dir():
2152
2176
  typer.echo(f"Error: '{target}' is not a valid directory.", err=True)
@@ -3169,6 +3193,21 @@ def modernize_cmd(
3169
3193
  subsystems: list = ir.get("subsystems") or []
3170
3194
  reverse_graph: dict = ir.get("reverse_graph") or {}
3171
3195
 
3196
+ # Git churn: commit frequency per file in last 90 days → proxy for volatility
3197
+ from sourcecode.contract_pipeline import _get_git_churn
3198
+ _java_rel_paths = [
3199
+ str(Path(p).relative_to(root)).replace("\\", "/") if Path(p).is_absolute() else p.replace("\\", "/")
3200
+ for p in file_list
3201
+ ]
3202
+ _file_churn: dict[str, int] = _get_git_churn(root, _java_rel_paths)
3203
+
3204
+ # Build fqn → churn mapping via source_file field on graph nodes
3205
+ _fqn_churn: dict[str, int] = {}
3206
+ for _n in graph_nodes:
3207
+ _src = (_n.get("source_file") or "").replace("\\", "/")
3208
+ if _src and _src in _file_churn:
3209
+ _fqn_churn[_n["fqn"]] = _file_churn[_src]
3210
+
3172
3211
  # High-coupling nodes: high in_degree (many dependents = risky to change)
3173
3212
  coupling_nodes = sorted(
3174
3213
  [n for n in graph_nodes if n.get("in_degree", 0) >= 3],
@@ -3183,17 +3222,42 @@ def modernize_cmd(
3183
3222
  key=lambda n: n.get("fqn", ""),
3184
3223
  )[:20]
3185
3224
 
3186
- # Hotspot candidates: high in-degree service/repository nodes
3187
- hotspots = [
3188
- {
3189
- "fqn": n["fqn"],
3190
- "role": n.get("role", "other"),
3191
- "in_degree": n.get("in_degree", 0),
3192
- "out_degree": n.get("out_degree", 0),
3193
- }
3194
- for n in coupling_nodes
3195
- if n.get("role") in ("service", "repository", "controller")
3196
- ][:15]
3225
+ # Hotspot candidates: high in-degree service/repository/controller nodes,
3226
+ # ranked by composite score (in_degree × 2 + git_churn) for volatility signal.
3227
+ _HOTSPOT_ROLES = frozenset({"service", "repository", "controller", "entity"})
3228
+ _hotspot_candidates = [
3229
+ n for n in coupling_nodes if n.get("role") in _HOTSPOT_ROLES
3230
+ ]
3231
+ # Also include high-coupling nodes with name-based role inference even if
3232
+ # they didn't appear in coupling_nodes (in_degree >= 1 is sufficient here)
3233
+ _seen_hotspot_fqns = {n["fqn"] for n in _hotspot_candidates}
3234
+ for _n in graph_nodes:
3235
+ if (_n.get("fqn") not in _seen_hotspot_fqns
3236
+ and _n.get("role") in _HOTSPOT_ROLES
3237
+ and _n.get("in_degree", 0) >= 1
3238
+ and _fqn_churn.get(_n["fqn"], 0) >= 3):
3239
+ _hotspot_candidates.append(_n)
3240
+ _seen_hotspot_fqns.add(_n["fqn"])
3241
+
3242
+ _max_churn = max(_fqn_churn.values(), default=1)
3243
+ hotspots = sorted(
3244
+ [
3245
+ {
3246
+ "fqn": n["fqn"],
3247
+ "role": n.get("role", "other"),
3248
+ "in_degree": n.get("in_degree", 0),
3249
+ "out_degree": n.get("out_degree", 0),
3250
+ "git_churn_90d": _fqn_churn.get(n["fqn"], 0),
3251
+ "hotspot_score": round(
3252
+ n.get("in_degree", 0) * 2.0
3253
+ + (_fqn_churn.get(n["fqn"], 0) / _max_churn) * 5.0,
3254
+ 2,
3255
+ ),
3256
+ }
3257
+ for n in _hotspot_candidates
3258
+ ],
3259
+ key=lambda h: (-h["hotspot_score"], h["fqn"]),
3260
+ )[:15]
3197
3261
 
3198
3262
  # Cross-module tangles: subsystems with high member count
3199
3263
  tangle_modules = sorted(
@@ -14,6 +14,7 @@ No inference, approximation, or heuristics.
14
14
 
15
15
  from __future__ import annotations
16
16
 
17
+ import random
17
18
  import re
18
19
  import subprocess
19
20
  from collections import deque
@@ -217,6 +218,11 @@ _JAVA_ROLE_MAP: dict[str, str] = {
217
218
  "@Component": "component",
218
219
  "@Configuration": "config",
219
220
  "@Bean": "config",
221
+ # JPA / Hibernate
222
+ "@Entity": "entity",
223
+ "@MappedSuperclass": "entity",
224
+ "@Embeddable": "entity",
225
+ "@Table": "entity",
220
226
  # CDI / Jakarta EE
221
227
  "@ApplicationScoped": "service",
222
228
  "@RequestScoped": "service",
@@ -226,6 +232,9 @@ _JAVA_ROLE_MAP: dict[str, str] = {
226
232
  "@Dependent": "component",
227
233
  "@Named": "component",
228
234
  "@Produces": "component",
235
+ "@Stateless": "service",
236
+ "@Stateful": "service",
237
+ "@MessageDriven": "service",
229
238
  # JAX-RS
230
239
  "@Provider": "provider",
231
240
  "@Consumes": "controller",
@@ -233,6 +242,11 @@ _JAVA_ROLE_MAP: dict[str, str] = {
233
242
  "@QuarkusMain": "entrypoint",
234
243
  "@QuarkusTest": "test",
235
244
  "@QuarkusIntegrationTest": "test",
245
+ "@RegisterForReflection": "component",
246
+ # Spring Security / AOP
247
+ "@Aspect": "config",
248
+ "@EnableWebSecurity": "config",
249
+ "@EnableMethodSecurity": "config",
236
250
  }
237
251
 
238
252
  # Backward-compatible alias — external callers may reference this name.
@@ -746,6 +760,36 @@ def _java_role(annotations: list[str]) -> str:
746
760
  return "unknown"
747
761
 
748
762
 
763
+ # Name-suffix patterns for role inference when annotations are absent.
764
+ # Ordered: more specific patterns first.
765
+ _JAVA_NAME_ROLE_PATTERNS: list[tuple[re.Pattern, str]] = [
766
+ (re.compile(r"(?:Controller|Resource|Endpoint|Handler|Servlet|Filter|Action)$"), "controller"),
767
+ (re.compile(r"(?:ServiceImpl|ServiceBean|ServiceFacade|Facade)$"), "service"),
768
+ (re.compile(r"(?:Service|Manager|Processor|Coordinator|Orchestrator|UseCase|Interactor)$"), "service"),
769
+ (re.compile(r"(?:RepositoryImpl|DaoImpl|DAOImpl)$"), "repository"),
770
+ (re.compile(r"(?:Repository|Dao|DAO|Store|Persistence|JpaRepository|CrudRepository)$"), "repository"),
771
+ (re.compile(r"(?:Entity|Model|Domain|Vo|ValueObject|Record)$"), "entity"),
772
+ (re.compile(r"(?:Config|Configuration|Configurer|AutoConfiguration|Properties|Settings)$"), "config"),
773
+ (re.compile(r"(?:Factory|Builder|Provider|Supplier|Creator|Generator)$"), "provider"),
774
+ (re.compile(r"(?:Listener|Observer|Handler|EventHandler|MessageListener|Consumer)$"), "component"),
775
+ (re.compile(r"(?:Util|Utils|Helper|Helpers|Converter|Transformer|Mapper|Adapter)$"), "component"),
776
+ (re.compile(r"(?:Exception|Error)$"), "other"),
777
+ (re.compile(r"(?:Test|Tests|Spec|IT|IntegrationTest)$"), "test"),
778
+ ]
779
+
780
+
781
+ def _java_role_from_name(simple_name: str) -> str:
782
+ """Infer role from Java class simple name when annotations don't classify it.
783
+
784
+ Returns 'other' (never 'unknown') — callers use 'unknown' to mean
785
+ 'not classified at all'; 'other' means 'classified but no interesting role'.
786
+ """
787
+ for pattern, role in _JAVA_NAME_ROLE_PATTERNS:
788
+ if pattern.search(simple_name):
789
+ return role
790
+ return "other"
791
+
792
+
749
793
  # Backward-compatible alias used by external callers and serializer.
750
794
  _spring_role = _java_role
751
795
 
@@ -1849,7 +1893,7 @@ def _assemble(
1849
1893
  sorted_rels = sorted(relations, key=lambda e: (e.from_symbol, e.type, e.to_symbol))
1850
1894
  sorted_changed = sorted(changed_symbols, key=lambda c: c.symbol)
1851
1895
 
1852
- # Java role map: fqn → role (annotation evidence + JAX-RS @Path heuristic)
1896
+ # Java role map: fqn → role (annotation evidence + JAX-RS @Path heuristic + name fallback)
1853
1897
  spring_role_map: dict[str, str] = {}
1854
1898
  for sym in sorted_syms:
1855
1899
  if sym.type in ("class", "interface"):
@@ -1857,6 +1901,10 @@ def _assemble(
1857
1901
  # JAX-RS resource: class-level @Path without a recognized annotation → controller
1858
1902
  if role == "unknown" and "@Path" in sym.annotations:
1859
1903
  role = "controller"
1904
+ # Name-based fallback: when annotations provide no signal, infer from class name
1905
+ if role == "unknown":
1906
+ simple = sym.symbol.split(".")[-1].split("#")[0]
1907
+ role = _java_role_from_name(simple)
1860
1908
  spring_role_map[sym.symbol] = role
1861
1909
 
1862
1910
  # Degree maps (graph-derived)
@@ -2976,12 +3024,19 @@ def compute_blast_radius(
2976
3024
  # KeycloakSession with 2023 importers), deep BFS is O(n^depth) and collapses
2977
3025
  # to 70-91s at depth=4. Cap effective depth to 1 for hub classes so the
2978
3026
  # direct-caller list is still accurate but we skip the catastrophic expansion.
3027
+ # Instead of omitting indirect callers entirely, we do a sampled BFS: pick
3028
+ # _SAMPLE_SIZE random direct callers, run depth-2 BFS from those, then scale
3029
+ # up to estimate total indirect reach.
2979
3030
  _HUB_CALLER_THRESHOLD = 500
3031
+ _HUB_SAMPLE_SIZE = 20
3032
+ _HUB_SAMPLE_DEPTH = 2
2980
3033
  _effective_depth = max_depth
3034
+ _hub_class_guard = False
2981
3035
  for seed in matched_fqns:
2982
3036
  _seed_callers = _all_callers_from_rg(seed, reverse_graph)
2983
3037
  if len(_seed_callers) > _HUB_CALLER_THRESHOLD and max_depth > 1:
2984
3038
  _effective_depth = 1
3039
+ _hub_class_guard = True
2985
3040
  break
2986
3041
 
2987
3042
  for seed in matched_fqns:
@@ -3078,6 +3133,35 @@ def compute_blast_radius(
3078
3133
  indirect_callers.append(caller)
3079
3134
  queue.append((caller, depth + 1))
3080
3135
 
3136
+ # Sampled BFS for hub classes: direct BFS was capped at depth=1, so
3137
+ # indirect_callers is empty. Sample _HUB_SAMPLE_SIZE random direct callers,
3138
+ # run depth-_HUB_SAMPLE_DEPTH BFS from those, and scale up to estimate reach.
3139
+ _indirect_sampled = False
3140
+ _indirect_estimated_count: int | None = None
3141
+ if _hub_class_guard and direct_callers:
3142
+ _n_direct = len(direct_callers)
3143
+ _k = min(_HUB_SAMPLE_SIZE, _n_direct)
3144
+ _sample_seeds = random.sample(direct_callers, _k)
3145
+ _sample_visited: set[str] = set(matched_fqns) | set(direct_callers)
3146
+ _sample_queue: list[tuple[str, int]] = [(c, 1) for c in _sample_seeds]
3147
+ _sample_indirect: list[str] = []
3148
+ while _sample_queue:
3149
+ _snode, _sdepth = _sample_queue.pop(0)
3150
+ if _sdepth >= _HUB_SAMPLE_DEPTH:
3151
+ continue
3152
+ for _scaller in _all_callers_from_rg(_snode, reverse_graph):
3153
+ if _scaller not in _sample_visited:
3154
+ _sample_visited.add(_scaller)
3155
+ all_affected[_scaller] = _sdepth + 1
3156
+ _sample_indirect.append(_scaller)
3157
+ _sample_queue.append((_scaller, _sdepth + 1))
3158
+ if _sample_indirect:
3159
+ indirect_callers = _sample_indirect
3160
+ _indirect_sampled = True
3161
+ # Scale: sample covered _k of _n_direct seeds; extrapolate linearly
3162
+ _scale = _n_direct / _k
3163
+ _indirect_estimated_count = round(len(_sample_indirect) * _scale)
3164
+
3081
3165
  # ── 3. Identify affected endpoints from route_surface ─────────────────────
3082
3166
  affected_classes: set[str] = set(matched_fqns) | set(direct_callers) | set(indirect_callers)
3083
3167
  # Expand to enclosing classes of field/method FQNs in affected set.
@@ -3275,6 +3359,8 @@ def compute_blast_radius(
3275
3359
  confidence_level = "low"
3276
3360
 
3277
3361
  # ── 10. Explanation ───────────────────────────────────────────────────────
3362
+ _bfs_truncated = _effective_depth < max_depth
3363
+
3278
3364
  _parts: list[str] = []
3279
3365
  if n_direct:
3280
3366
  _parts.append(f"{n_direct} direct caller{'s' if n_direct != 1 else ''}")
@@ -3298,6 +3384,22 @@ def compute_blast_radius(
3298
3384
  f"({', '.join(_iface_names)}) — Spring/CDI DI pattern"
3299
3385
  )
3300
3386
 
3387
+ # Transparency: hub-class BFS truncation must appear in explanation so the
3388
+ # text and JSON are semantically identical.
3389
+ if _bfs_truncated:
3390
+ if _indirect_sampled and _indirect_estimated_count is not None:
3391
+ _parts.append(
3392
+ f"indirect callers sampled ({_HUB_SAMPLE_SIZE} of {n_direct} seeds, "
3393
+ f"depth={_HUB_SAMPLE_DEPTH}): {n_indirect} found in sample, "
3394
+ f"~{_indirect_estimated_count} estimated total"
3395
+ )
3396
+ else:
3397
+ _parts.append(
3398
+ f"indirect BFS skipped (hub class: {n_direct} direct callers "
3399
+ f"exceed {_HUB_CALLER_THRESHOLD} threshold; no indirect callers reachable "
3400
+ "from sample — graph may be a terminal sink)"
3401
+ )
3402
+
3301
3403
  if not _parts:
3302
3404
  explanation = f"No callers or dependents found for {target!r}. Low-risk isolated change."
3303
3405
  else:
@@ -3324,10 +3426,13 @@ def compute_blast_radius(
3324
3426
  "security_surface_affected": security_surface_affected,
3325
3427
  "cross_module_impact": cross_module_impact,
3326
3428
  "transactional_boundaries_touched": txn_nodes,
3327
- "depth_reached": max_depth,
3429
+ "depth_reached": _effective_depth, # actual BFS depth used, not the requested max
3430
+ "bfs_truncated": _bfs_truncated,
3328
3431
  "stats": {
3329
3432
  "direct_caller_count": n_direct,
3330
3433
  "indirect_caller_count": n_indirect,
3434
+ "indirect_callers_computed": not _bfs_truncated or _indirect_sampled,
3435
+ "indirect_callers_sampled": _indirect_sampled,
3331
3436
  "endpoints_affected_count": n_ep,
3332
3437
  "transactional_boundaries_count": n_txn,
3333
3438
  "mappers_affected_count": n_mappers,
@@ -3335,6 +3440,14 @@ def compute_blast_radius(
3335
3440
  "security_surface_count": n_sec,
3336
3441
  },
3337
3442
  }
3443
+ if _indirect_sampled and _indirect_estimated_count is not None:
3444
+ out["indirect_callers_estimated_count"] = _indirect_estimated_count
3445
+ out["indirect_callers_sample_note"] = (
3446
+ f"indirect_callers contains a sample (BFS depth={_HUB_SAMPLE_DEPTH} from "
3447
+ f"{min(_HUB_SAMPLE_SIZE, n_direct)} of {n_direct} direct callers). "
3448
+ f"Estimated total indirect reach: ~{_indirect_estimated_count}. "
3449
+ "Actual count may differ; use a lower-fan-in entry point for exact traversal."
3450
+ )
3338
3451
  if _candidates_out:
3339
3452
  out["candidates"] = _candidates_out
3340
3453
  if _iface_bridging:
@@ -3344,6 +3457,21 @@ def compute_blast_radius(
3344
3457
  "(Spring/CDI/Guice). direct_callers includes callers of the implemented "
3345
3458
  "interface(s) — these are the real production dependents."
3346
3459
  )
3460
+ if _bfs_truncated:
3461
+ out["bfs_truncation_reason"] = "hub_class_depth_cap"
3462
+ if _indirect_sampled:
3463
+ out["bfs_truncation_note"] = (
3464
+ f"Full BFS capped at depth=1 (hub class: {n_direct} direct callers "
3465
+ f">{_HUB_CALLER_THRESHOLD}). indirect_callers is a sampled estimate — "
3466
+ f"BFS from {min(_HUB_SAMPLE_SIZE, n_direct)} random seeds at depth={_HUB_SAMPLE_DEPTH}."
3467
+ )
3468
+ else:
3469
+ out["bfs_truncation_note"] = (
3470
+ f"Indirect BFS capped at depth=1: target has {n_direct} direct callers "
3471
+ f"(>{_HUB_CALLER_THRESHOLD} threshold). indirect_callers is empty — "
3472
+ "no indirect callers reachable from sampled seeds (terminal sink or sparse graph). "
3473
+ "Use a lower-fan-in entry point for full transitive traversal."
3474
+ )
3347
3475
  if len(direct_callers) > 30:
3348
3476
  out["direct_callers_note"] = (
3349
3477
  f"Showing 30/{n_direct} direct callers. Use --output to inspect full IR."
sourcecode/serializer.py CHANGED
@@ -1979,7 +1979,7 @@ def agent_view(sm: SourceMap, *, full: bool = False) -> dict[str, Any]:
1979
1979
  result["file_relevance_hint"] = (
1980
1980
  f"Showing top {_fr_limit}/{_total_paths} files by score "
1981
1981
  f"({'--full' if full else 'normal'} mode, bounded for signal quality). "
1982
- f"Use --deep for up to {compute_context_limit('deep', _FR_AGENT_CAP)} files."
1982
+ f"Use --full for up to {compute_context_limit('full', _FR_AGENT_CAP)} files."
1983
1983
  )
1984
1984
 
1985
1985
  # ── 5. Monorepo package roles (when available), capped ───────────────────
sourcecode/summarizer.py CHANGED
@@ -300,6 +300,7 @@ class ProjectSummarizer:
300
300
 
301
301
  # Stack with frameworks — keep brief, skip internal module listings
302
302
  non_tooling_stacks = self._filter_non_tooling_stacks(sm)
303
+ primary = None
303
304
  if non_tooling_stacks:
304
305
  primary = self._select_summary_primary_stack(non_tooling_stacks)
305
306
  frameworks = [fw.name for fw in primary.frameworks[:2]]
@@ -313,6 +314,30 @@ class ProjectSummarizer:
313
314
  if domains:
314
315
  parts.append(f"Domains: {', '.join(domains)}")
315
316
 
317
+ # Quantitative structural suffix for Java projects — adds concrete scale signals
318
+ # that README descriptions omit (class count, transactional boundary count).
319
+ if primary is not None and primary.stack.lower() == "java":
320
+ quant_parts: list[str] = []
321
+ java_files = sum(
322
+ 1 for p in sm.file_paths if p.endswith(".java")
323
+ )
324
+ if java_files >= 50:
325
+ quant_parts.append(f"{java_files:,} Java classes")
326
+ txn_classes: list[str] = []
327
+ for stack in non_tooling_stacks:
328
+ txn_classes.extend(getattr(stack, "transactional_classes", []))
329
+ n_txn = len(set(txn_classes))
330
+ if n_txn > 0:
331
+ quant_parts.append(f"{n_txn} transactional boundaries")
332
+ ep_controllers = [
333
+ ep for ep in sm.entry_points
334
+ if ep.kind in ("controller", "rest_controller", "rest", "endpoint")
335
+ ]
336
+ if ep_controllers:
337
+ quant_parts.append(f"{len(ep_controllers)} controller entry points")
338
+ if quant_parts:
339
+ parts.append(", ".join(quant_parts))
340
+
316
341
  return ". ".join(parts) + "."
317
342
 
318
343
  def _detect_architecture_pattern(self, file_paths: list[str]) -> str | None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.31.23
3
+ Version: 1.31.24
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -225,7 +225,7 @@ Description-Content-Type: text/markdown
225
225
 
226
226
  **AI-ready change intelligence for Java/Spring enterprise monoliths.**
227
227
 
228
- ![Version](https://img.shields.io/badge/version-1.31.23-blue)
228
+ ![Version](https://img.shields.io/badge/version-1.31.24-blue)
229
229
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
230
230
 
231
231
  ---
@@ -263,7 +263,7 @@ pipx install sourcecode
263
263
 
264
264
  ```bash
265
265
  sourcecode version
266
- # sourcecode 1.31.23
266
+ # sourcecode 1.31.24
267
267
  ```
268
268
 
269
269
  ---
@@ -1,12 +1,12 @@
1
- sourcecode/__init__.py,sha256=u-AGXNvPNCuCZnA6MlEhddkQlAKa1tp7BeXtzkz6TpU,104
1
+ sourcecode/__init__.py,sha256=OFgxQ97Ujgsq98XhK0hoPwDBX_R6E7oO8JgpjanvaHQ,104
2
2
  sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
3
- sourcecode/architecture_analyzer.py,sha256=4R13Yb02OrPeB4IH3z6V_g7HWhmGcRHbI8CobCVnRrc,39111
3
+ sourcecode/architecture_analyzer.py,sha256=Ry3aYT9dc7XuLmWLT5IZ93RkCf_P14Qtew0nGPvUl_8,42184
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
5
5
  sourcecode/ast_extractor.py,sha256=_btmeOJIe3t-NicF94D5ZAesa2YIJ0_QNExGnbHxGFE,50578
6
6
  sourcecode/cache.py,sha256=TiYa3ECjBKtvlfCk7GvQ9v6gZkAITpH3ow9PubA7sUo,22946
7
7
  sourcecode/canonical_ir.py,sha256=NZu0XICv__hkQGKzW2LNQLRqb1L28K2p_WQCQKS5Zlk,23141
8
8
  sourcecode/classifier.py,sha256=yWeq6agTjkFa3zuNa-gdVIHtjoBoPoVlJnX-b7tdVJs,7851
9
- sourcecode/cli.py,sha256=juaaKE6QgifwFGSRdIxxB7XoQhp51m_AuG3U7zLNQ4Y,147521
9
+ sourcecode/cli.py,sha256=zykj3wNxSXAdiBIgmn6KWLdrNLHCEUrhv4YL9rlRlUE,150539
10
10
  sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
11
11
  sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
12
12
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -32,13 +32,13 @@ sourcecode/ranking_engine.py,sha256=ZAucq_YX2KkWUuAZf4P0lhtQ_38vEFnUhuGtSZd1S0E,
32
32
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
33
33
  sourcecode/relevance_scorer.py,sha256=MYF4FFkveAQps9SmTeTlh6ODiBz2F--_hWNeHMLtUHQ,8405
34
34
  sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
35
- sourcecode/repository_ir.py,sha256=vdgJwzfoY2qpQtzcGX7k0E5jWe9fcuyFdkqnF4K6UBs,141767
35
+ sourcecode/repository_ir.py,sha256=sp6IdcZbFAQjznUthMBu_6Mu5RBxVP72d5Vw0hKnH7o,148437
36
36
  sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
37
37
  sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
38
38
  sourcecode/schema.py,sha256=aHNXDf8LGyUC8ZDE_VS9kiskC2-Oswhi_WnpdGy6HDw,24897
39
39
  sourcecode/semantic_analyzer.py,sha256=TDuC3wzZR2DPm1mgrAg1YSLk2QzJoueS3TZAmyGGpCU,89417
40
- sourcecode/serializer.py,sha256=zzhWyaseWSAqSIe77QwJkyehVI_1DJBkGM7KqbDQtXo,123313
41
- sourcecode/summarizer.py,sha256=KAtU2mvXT1GLJJQ50Rup42BFsJXij5hmklCwMvcxVx4,19514
40
+ sourcecode/serializer.py,sha256=7TzN2GLtIP3PIVatoB98_7DQdoAkUNvvNVU7Bz7r_K8,123313
41
+ sourcecode/summarizer.py,sha256=BMHJA0Do4rBnabc1_BxHoETTNb5ew0VqCX_eY3_PdCg,20706
42
42
  sourcecode/tree_utils.py,sha256=8GAkIfQAsvtEudIeW1l4ooH_oRtrWR8cpJQJsEa_Pfw,2093
43
43
  sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
44
44
  sourcecode/detectors/__init__.py,sha256=A0AACJFF6HWf_RgatNtWu3PUzstcKtIGM9f1PoFcJug,1987
@@ -76,8 +76,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
76
76
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
77
77
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
78
78
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
79
- sourcecode-1.31.23.dist-info/METADATA,sha256=3-xJut96GlWt6KcUJCKPO4trdkPbNuSR9cxUo9O6FlI,31103
80
- sourcecode-1.31.23.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
81
- sourcecode-1.31.23.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
82
- sourcecode-1.31.23.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
83
- sourcecode-1.31.23.dist-info/RECORD,,
79
+ sourcecode-1.31.24.dist-info/METADATA,sha256=y1qV8wDttJuezYPLbBUeZZwzQMWWrtJ8clEuBAchsJ0,31103
80
+ sourcecode-1.31.24.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
81
+ sourcecode-1.31.24.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
82
+ sourcecode-1.31.24.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
83
+ sourcecode-1.31.24.dist-info/RECORD,,