PyPI - sourcecode - Versions diffs - 1.31.22__py3-none-any.whl → 1.31.24__py3-none-any.whl - Mend

sourcecode 1.31.22py3-none-any.whl → 1.31.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

sourcecode/__init__.py +1 -1
sourcecode/architecture_analyzer.py +68 -1
sourcecode/cli.py +77 -13
sourcecode/repository_ir.py +154 -3
sourcecode/serializer.py +32 -9
sourcecode/summarizer.py +85 -16
{sourcecode-1.31.22.dist-info → sourcecode-1.31.24.dist-info}/METADATA +3 -3
{sourcecode-1.31.22.dist-info → sourcecode-1.31.24.dist-info}/RECORD +11 -11
{sourcecode-1.31.22.dist-info → sourcecode-1.31.24.dist-info}/WHEEL +0 -0
{sourcecode-1.31.22.dist-info → sourcecode-1.31.24.dist-info}/entry_points.txt +0 -0
{sourcecode-1.31.22.dist-info → sourcecode-1.31.24.dist-info}/licenses/LICENSE +0 -0

sourcecode/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "1.31.22"
+__version__ = "1.31.24"

sourcecode/architecture_analyzer.py CHANGED Viewed

@@ -280,7 +280,7 @@ class ArchitectureAnalyzer:
                 })
         # Step 4: bounded context inference
-        bounded_contexts = self._infer_bounded_contexts(domains, graph)
+        bounded_contexts = self._infer_bounded_contexts(domains, graph, sm.file_paths)
         # Overall confidence — based on domain quality, not raw count
         confidence: Literal["high", "medium", "low"]
@@ -703,11 +703,78 @@ class ArchitectureAnalyzer:
         ]
         return result[:16]
+    @staticmethod
+    def _maven_module_bounded_contexts(file_paths: list[str]) -> list[BoundedContext]:
+        """Priority 0: extract bounded contexts from Maven module directory names.
+        Maven multi-module projects have structure: <module>/src/main/java/...
+        The module directory name is a strong bounded context signal
+        (e.g. broadleaf-order, keycloak-services → order, services).
+        Strips common project-name prefixes (longest common prefix across modules).
+        Returns empty list when fewer than 2 distinct modules are found.
+        """
+        import re as _re
+        _MAVEN_SRC = "src/main/java/"
+        _MAVEN_TEST = "src/test/java/"
+        module_names: dict[str, list[str]] = {}  # module_name → [files]
+        for p in file_paths:
+            norm = p.replace("\\", "/")
+            for marker in (_MAVEN_SRC, _MAVEN_TEST):
+                idx = norm.find(marker)
+                if idx > 0:
+                    # Everything before the marker is the module path
+                    module_path = norm[:idx].rstrip("/")
+                    # Take the last path segment as module name
+                    module_seg = module_path.split("/")[-1] if "/" in module_path else module_path
+                    if module_seg:
+                        module_names.setdefault(module_seg, []).append(p)
+                    break
+        if len(module_names) < 2:
+            return []
+        # Strip common project-name prefix (e.g. "keycloak-", "broadleaf-")
+        # by finding longest common prefix across all module names
+        all_names = sorted(module_names)
+        common = ""
+        for i, ch in enumerate(all_names[0]):
+            if all(n[i:i+1] == ch for n in all_names[1:]):
+                common += ch
+            else:
+                break
+        # Only strip prefix up to last '-' (avoid stripping into meaningful segment)
+        prefix_to_strip = common[:common.rfind("-") + 1] if "-" in common else ""
+        _GENERIC_EXTENDED = _GENERIC_NAMES | {
+            "api", "impl", "base", "test", "tests", "main", "java",
+            "integration", "parent", "bom", "platform",
+        }
+        bc_list: list[BoundedContext] = []
+        for raw_name, files in sorted(module_names.items()):
+            clean = raw_name[len(prefix_to_strip):] if prefix_to_strip else raw_name
+            # Remove trailing -api, -impl, -core suffixes
+            clean = _re.sub(r"-(api|impl|core|base|common|parent|test)$", "", clean)
+            if not clean or clean in _GENERIC_EXTENDED:
+                continue
+            bc_list.append(BoundedContext(
+                name=clean,
+                modules=files[:20],  # cap file list
+                confidence="high",
+            ))
+        return bc_list
     def _infer_bounded_contexts(
         self,
         domains: list[ArchitectureDomain],
         graph: Optional[ModuleGraph],
+        file_paths: list[str] | None = None,
     ) -> list[BoundedContext]:
+        # Priority 0: Maven module names — strong bounded context signal for Java projects
+        if file_paths:
+            maven_bcs = self._maven_module_bounded_contexts(file_paths)
+            if maven_bcs:
+                return maven_bcs
         # Priority 1: use graph SCCs when available
         if graph is not None:
             sccs = self._find_sccs(graph)

sourcecode/cli.py CHANGED Viewed

@@ -423,7 +423,7 @@ def main(
             "High-signal summary (typically 1000–3000 tokens depending on repo size): "
             "stacks, entry points, dependency summary, confidence, and gaps. "
             "Includes security_surface (when @M3FiltroSeguridad detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
-            "Use --agent for maximum signal or --slim (when available) for minimal token footprint."
+            "Use --agent for maximum signal."
         ),
     ),
     dependencies: bool = typer.Option(
@@ -1960,7 +1960,11 @@ def _serialize_relevant_file(f: Any) -> dict:
     d = {k: v for k, v in _asdict(f).items() if v != "" and v is not None}
     reason = d.pop("reason", "") or ""
     why = d.pop("why", "") or ""
-    d.pop("score", None)  # score removed from public output (internal ranking only)
+    # Expose score as a rounded float so agents can rank/filter files deterministically.
+    # Kept as "score" (0.0–1.0 normalized relevance) — higher = more relevant.
+    raw_score = d.pop("score", None)
+    if raw_score is not None:
+        d["score"] = round(float(raw_score), 4)
     explanation = _make_explanation(reason, why)
     if explanation:
         d["explanation"] = explanation
@@ -2147,6 +2151,26 @@ def prepare_context_cmd(
         )
         raise typer.Exit(code=1)
+    # Validate --format: only "json" and "github-comment" are valid for prepare-context.
+    # "yaml" is intentionally NOT supported here (use main command for yaml output).
+    # Invalid values must error loudly — silently falling through to JSON is a lie.
+    _PC_FORMAT_CHOICES = ("json", "github-comment")
+    if format is not None and format not in _PC_FORMAT_CHOICES:
+        typer.echo(
+            f"Error: invalid value '{format}' for --format. "
+            f"Valid options: {', '.join(_PC_FORMAT_CHOICES)}.",
+            err=True,
+        )
+        raise typer.Exit(code=2)
+    # github-comment only renders for review-pr; warn and normalize for other tasks.
+    if format == "github-comment" and task != "review-pr":
+        typer.echo(
+            f"[warning] --format github-comment is only supported for the review-pr task. "
+            f"Outputting JSON for '{task}'.",
+            err=True,
+        )
+        format = "json"
     target = path.resolve()
     if not target.exists() or not target.is_dir():
         typer.echo(f"Error: '{target}' is not a valid directory.", err=True)
@@ -3169,6 +3193,21 @@ def modernize_cmd(
     subsystems: list = ir.get("subsystems") or []
     reverse_graph: dict = ir.get("reverse_graph") or {}
+    # Git churn: commit frequency per file in last 90 days → proxy for volatility
+    from sourcecode.contract_pipeline import _get_git_churn
+    _java_rel_paths = [
+        str(Path(p).relative_to(root)).replace("\\", "/") if Path(p).is_absolute() else p.replace("\\", "/")
+        for p in file_list
+    ]
+    _file_churn: dict[str, int] = _get_git_churn(root, _java_rel_paths)
+    # Build fqn → churn mapping via source_file field on graph nodes
+    _fqn_churn: dict[str, int] = {}
+    for _n in graph_nodes:
+        _src = (_n.get("source_file") or "").replace("\\", "/")
+        if _src and _src in _file_churn:
+            _fqn_churn[_n["fqn"]] = _file_churn[_src]
     # High-coupling nodes: high in_degree (many dependents = risky to change)
     coupling_nodes = sorted(
         [n for n in graph_nodes if n.get("in_degree", 0) >= 3],
@@ -3183,17 +3222,42 @@ def modernize_cmd(
         key=lambda n: n.get("fqn", ""),
     )[:20]
-    # Hotspot candidates: high in-degree service/repository nodes
-    hotspots = [
-        {
-            "fqn": n["fqn"],
-            "role": n.get("role", "other"),
-            "in_degree": n.get("in_degree", 0),
-            "out_degree": n.get("out_degree", 0),
-        }
-        for n in coupling_nodes
-        if n.get("role") in ("service", "repository", "controller")
-    ][:15]
+    # Hotspot candidates: high in-degree service/repository/controller nodes,
+    # ranked by composite score (in_degree × 2 + git_churn) for volatility signal.
+    _HOTSPOT_ROLES = frozenset({"service", "repository", "controller", "entity"})
+    _hotspot_candidates = [
+        n for n in coupling_nodes if n.get("role") in _HOTSPOT_ROLES
+    ]
+    # Also include high-coupling nodes with name-based role inference even if
+    # they didn't appear in coupling_nodes (in_degree >= 1 is sufficient here)
+    _seen_hotspot_fqns = {n["fqn"] for n in _hotspot_candidates}
+    for _n in graph_nodes:
+        if (_n.get("fqn") not in _seen_hotspot_fqns
+                and _n.get("role") in _HOTSPOT_ROLES
+                and _n.get("in_degree", 0) >= 1
+                and _fqn_churn.get(_n["fqn"], 0) >= 3):
+            _hotspot_candidates.append(_n)
+            _seen_hotspot_fqns.add(_n["fqn"])
+    _max_churn = max(_fqn_churn.values(), default=1)
+    hotspots = sorted(
+        [
+            {
+                "fqn": n["fqn"],
+                "role": n.get("role", "other"),
+                "in_degree": n.get("in_degree", 0),
+                "out_degree": n.get("out_degree", 0),
+                "git_churn_90d": _fqn_churn.get(n["fqn"], 0),
+                "hotspot_score": round(
+                    n.get("in_degree", 0) * 2.0
+                    + (_fqn_churn.get(n["fqn"], 0) / _max_churn) * 5.0,
+                    2,
+                ),
+            }
+            for n in _hotspot_candidates
+        ],
+        key=lambda h: (-h["hotspot_score"], h["fqn"]),
+    )[:15]
     # Cross-module tangles: subsystems with high member count
     tangle_modules = sorted(

sourcecode/repository_ir.py CHANGED Viewed

@@ -14,6 +14,7 @@ No inference, approximation, or heuristics.
 from __future__ import annotations
+import random
 import re
 import subprocess
 from collections import deque
@@ -217,6 +218,11 @@ _JAVA_ROLE_MAP: dict[str, str] = {
     "@Component": "component",
     "@Configuration": "config",
     "@Bean": "config",
+    # JPA / Hibernate
+    "@Entity": "entity",
+    "@MappedSuperclass": "entity",
+    "@Embeddable": "entity",
+    "@Table": "entity",
     # CDI / Jakarta EE
     "@ApplicationScoped": "service",
     "@RequestScoped": "service",
@@ -226,6 +232,9 @@ _JAVA_ROLE_MAP: dict[str, str] = {
     "@Dependent": "component",
     "@Named": "component",
     "@Produces": "component",
+    "@Stateless": "service",
+    "@Stateful": "service",
+    "@MessageDriven": "service",
     # JAX-RS
     "@Provider": "provider",
     "@Consumes": "controller",
@@ -233,6 +242,11 @@ _JAVA_ROLE_MAP: dict[str, str] = {
     "@QuarkusMain": "entrypoint",
     "@QuarkusTest": "test",
     "@QuarkusIntegrationTest": "test",
+    "@RegisterForReflection": "component",
+    # Spring Security / AOP
+    "@Aspect": "config",
+    "@EnableWebSecurity": "config",
+    "@EnableMethodSecurity": "config",
 }
 # Backward-compatible alias — external callers may reference this name.
@@ -746,6 +760,36 @@ def _java_role(annotations: list[str]) -> str:
     return "unknown"
+# Name-suffix patterns for role inference when annotations are absent.
+# Ordered: more specific patterns first.
+_JAVA_NAME_ROLE_PATTERNS: list[tuple[re.Pattern, str]] = [
+    (re.compile(r"(?:Controller|Resource|Endpoint|Handler|Servlet|Filter|Action)$"), "controller"),
+    (re.compile(r"(?:ServiceImpl|ServiceBean|ServiceFacade|Facade)$"), "service"),
+    (re.compile(r"(?:Service|Manager|Processor|Coordinator|Orchestrator|UseCase|Interactor)$"), "service"),
+    (re.compile(r"(?:RepositoryImpl|DaoImpl|DAOImpl)$"), "repository"),
+    (re.compile(r"(?:Repository|Dao|DAO|Store|Persistence|JpaRepository|CrudRepository)$"), "repository"),
+    (re.compile(r"(?:Entity|Model|Domain|Vo|ValueObject|Record)$"), "entity"),
+    (re.compile(r"(?:Config|Configuration|Configurer|AutoConfiguration|Properties|Settings)$"), "config"),
+    (re.compile(r"(?:Factory|Builder|Provider|Supplier|Creator|Generator)$"), "provider"),
+    (re.compile(r"(?:Listener|Observer|Handler|EventHandler|MessageListener|Consumer)$"), "component"),
+    (re.compile(r"(?:Util|Utils|Helper|Helpers|Converter|Transformer|Mapper|Adapter)$"), "component"),
+    (re.compile(r"(?:Exception|Error)$"), "other"),
+    (re.compile(r"(?:Test|Tests|Spec|IT|IntegrationTest)$"), "test"),
+]
+def _java_role_from_name(simple_name: str) -> str:
+    """Infer role from Java class simple name when annotations don't classify it.
+    Returns 'other' (never 'unknown') — callers use 'unknown' to mean
+    'not classified at all'; 'other' means 'classified but no interesting role'.
+    """
+    for pattern, role in _JAVA_NAME_ROLE_PATTERNS:
+        if pattern.search(simple_name):
+            return role
+    return "other"
 # Backward-compatible alias used by external callers and serializer.
 _spring_role = _java_role
@@ -1093,7 +1137,18 @@ def _resolve_jaxrs_prefixes(
     for parent_simple, locator_path in locator_map[cls_simple]:
         parent_full = _resolve_jaxrs_prefixes(parent_simple, class_info, locator_map, new_visited)
-        for pp in parent_full:
+        # Skip implementation/unrooted parents: if the parent resolves to only empty
+        # prefixes AND has no class-level @Path annotation, it is a concrete impl class
+        # (e.g. DefaultClientsApi implements ClientsApi) that duplicates a locator method
+        # from its interface. Including it would produce spurious short paths like /{id}
+        # alongside the correctly-resolved full path. The interface version is already
+        # in the locator_map and will produce the correct full path.
+        _parent_has_path_ann = class_info.get(parent_simple, {}).get("has_path_ann", False)
+        _non_empty_parent = [p for p in parent_full if p]
+        if not _non_empty_parent and not _parent_has_path_ann:
+            continue
+        use_parent_paths = _non_empty_parent if _non_empty_parent else parent_full
+        for pp in use_parent_paths:
             for op in own_prefixes:
                 combined = _join_path_segments(pp, locator_path, op)
                 full_prefixes.append(combined)
@@ -1838,7 +1893,7 @@ def _assemble(
     sorted_rels = sorted(relations, key=lambda e: (e.from_symbol, e.type, e.to_symbol))
     sorted_changed = sorted(changed_symbols, key=lambda c: c.symbol)
-    # Java role map: fqn → role (annotation evidence + JAX-RS @Path heuristic)
+    # Java role map: fqn → role (annotation evidence + JAX-RS @Path heuristic + name fallback)
     spring_role_map: dict[str, str] = {}
     for sym in sorted_syms:
         if sym.type in ("class", "interface"):
@@ -1846,6 +1901,10 @@ def _assemble(
             # JAX-RS resource: class-level @Path without a recognized annotation → controller
             if role == "unknown" and "@Path" in sym.annotations:
                 role = "controller"
+            # Name-based fallback: when annotations provide no signal, infer from class name
+            if role == "unknown":
+                simple = sym.symbol.split(".")[-1].split("#")[0]
+                role = _java_role_from_name(simple)
             spring_role_map[sym.symbol] = role
     # Degree maps (graph-derived)
@@ -2833,6 +2892,18 @@ def extract_java_endpoints(root: Path) -> "dict[str, Any]":
                 entry["required_permission"] = security_info["required_permission"]
         endpoints.append(entry)
+    # Filter out endpoints whose path looks like a Java FQN (e.g. dynamic admin routing
+    # in frameworks like Broadleaf Commerce where @AdminSection registers entity class
+    # FQNs as URL segments). These are not real REST paths — they are resolved at
+    # runtime by the framework. Including them pollutes the endpoint surface with 20+
+    # garbage entries that confuse agents and break endpoint count accuracy.
+    # Pattern: path segment that matches a Java package hierarchy (org.foo.Bar).
+    import re as _re_fqn
+    _FQN_PATH_RE = _re_fqn.compile(
+        r"/(org|com|net|io|edu)\.[a-z][a-z0-9]*\.[a-zA-Z]",
+    )
+    endpoints = [e for e in endpoints if not _FQN_PATH_RE.search(e.get("path", ""))]
     # "no_security_signal" = no recognized security annotation at method OR class level.
     # Note: repos may use framework-level security (e.g. Keycloak itself) with no
     # per-endpoint annotations — this count reflects annotation-based coverage only.
@@ -2953,12 +3024,19 @@ def compute_blast_radius(
     # KeycloakSession with 2023 importers), deep BFS is O(n^depth) and collapses
     # to 70-91s at depth=4.  Cap effective depth to 1 for hub classes so the
     # direct-caller list is still accurate but we skip the catastrophic expansion.
+    # Instead of omitting indirect callers entirely, we do a sampled BFS: pick
+    # _SAMPLE_SIZE random direct callers, run depth-2 BFS from those, then scale
+    # up to estimate total indirect reach.
     _HUB_CALLER_THRESHOLD = 500
+    _HUB_SAMPLE_SIZE = 20
+    _HUB_SAMPLE_DEPTH = 2
     _effective_depth = max_depth
+    _hub_class_guard = False
     for seed in matched_fqns:
         _seed_callers = _all_callers_from_rg(seed, reverse_graph)
         if len(_seed_callers) > _HUB_CALLER_THRESHOLD and max_depth > 1:
             _effective_depth = 1
+            _hub_class_guard = True
             break
     for seed in matched_fqns:
@@ -3055,6 +3133,35 @@ def compute_blast_radius(
                 indirect_callers.append(caller)
                 queue.append((caller, depth + 1))
+    # Sampled BFS for hub classes: direct BFS was capped at depth=1, so
+    # indirect_callers is empty.  Sample _HUB_SAMPLE_SIZE random direct callers,
+    # run depth-_HUB_SAMPLE_DEPTH BFS from those, and scale up to estimate reach.
+    _indirect_sampled = False
+    _indirect_estimated_count: int | None = None
+    if _hub_class_guard and direct_callers:
+        _n_direct = len(direct_callers)
+        _k = min(_HUB_SAMPLE_SIZE, _n_direct)
+        _sample_seeds = random.sample(direct_callers, _k)
+        _sample_visited: set[str] = set(matched_fqns) | set(direct_callers)
+        _sample_queue: list[tuple[str, int]] = [(c, 1) for c in _sample_seeds]
+        _sample_indirect: list[str] = []
+        while _sample_queue:
+            _snode, _sdepth = _sample_queue.pop(0)
+            if _sdepth >= _HUB_SAMPLE_DEPTH:
+                continue
+            for _scaller in _all_callers_from_rg(_snode, reverse_graph):
+                if _scaller not in _sample_visited:
+                    _sample_visited.add(_scaller)
+                    all_affected[_scaller] = _sdepth + 1
+                    _sample_indirect.append(_scaller)
+                    _sample_queue.append((_scaller, _sdepth + 1))
+        if _sample_indirect:
+            indirect_callers = _sample_indirect
+            _indirect_sampled = True
+            # Scale: sample covered _k of _n_direct seeds; extrapolate linearly
+            _scale = _n_direct / _k
+            _indirect_estimated_count = round(len(_sample_indirect) * _scale)
     # ── 3. Identify affected endpoints from route_surface ─────────────────────
     affected_classes: set[str] = set(matched_fqns) | set(direct_callers) | set(indirect_callers)
     # Expand to enclosing classes of field/method FQNs in affected set.
@@ -3252,6 +3359,8 @@ def compute_blast_radius(
         confidence_level = "low"
     # ── 10. Explanation ───────────────────────────────────────────────────────
+    _bfs_truncated = _effective_depth < max_depth
     _parts: list[str] = []
     if n_direct:
         _parts.append(f"{n_direct} direct caller{'s' if n_direct != 1 else ''}")
@@ -3275,6 +3384,22 @@ def compute_blast_radius(
             f"({', '.join(_iface_names)}) — Spring/CDI DI pattern"
         )
+    # Transparency: hub-class BFS truncation must appear in explanation so the
+    # text and JSON are semantically identical.
+    if _bfs_truncated:
+        if _indirect_sampled and _indirect_estimated_count is not None:
+            _parts.append(
+                f"indirect callers sampled ({_HUB_SAMPLE_SIZE} of {n_direct} seeds, "
+                f"depth={_HUB_SAMPLE_DEPTH}): {n_indirect} found in sample, "
+                f"~{_indirect_estimated_count} estimated total"
+            )
+        else:
+            _parts.append(
+                f"indirect BFS skipped (hub class: {n_direct} direct callers "
+                f"exceed {_HUB_CALLER_THRESHOLD} threshold; no indirect callers reachable "
+                "from sample — graph may be a terminal sink)"
+            )
     if not _parts:
         explanation = f"No callers or dependents found for {target!r}. Low-risk isolated change."
     else:
@@ -3301,10 +3426,13 @@ def compute_blast_radius(
         "security_surface_affected": security_surface_affected,
         "cross_module_impact": cross_module_impact,
         "transactional_boundaries_touched": txn_nodes,
-        "depth_reached": max_depth,
+        "depth_reached": _effective_depth,  # actual BFS depth used, not the requested max
+        "bfs_truncated": _bfs_truncated,
         "stats": {
             "direct_caller_count": n_direct,
             "indirect_caller_count": n_indirect,
+            "indirect_callers_computed": not _bfs_truncated or _indirect_sampled,
+            "indirect_callers_sampled": _indirect_sampled,
             "endpoints_affected_count": n_ep,
             "transactional_boundaries_count": n_txn,
             "mappers_affected_count": n_mappers,
@@ -3312,6 +3440,14 @@ def compute_blast_radius(
             "security_surface_count": n_sec,
         },
     }
+    if _indirect_sampled and _indirect_estimated_count is not None:
+        out["indirect_callers_estimated_count"] = _indirect_estimated_count
+        out["indirect_callers_sample_note"] = (
+            f"indirect_callers contains a sample (BFS depth={_HUB_SAMPLE_DEPTH} from "
+            f"{min(_HUB_SAMPLE_SIZE, n_direct)} of {n_direct} direct callers). "
+            f"Estimated total indirect reach: ~{_indirect_estimated_count}. "
+            "Actual count may differ; use a lower-fan-in entry point for exact traversal."
+        )
     if _candidates_out:
         out["candidates"] = _candidates_out
     if _iface_bridging:
@@ -3321,6 +3457,21 @@ def compute_blast_radius(
             "(Spring/CDI/Guice). direct_callers includes callers of the implemented "
             "interface(s) — these are the real production dependents."
         )
+    if _bfs_truncated:
+        out["bfs_truncation_reason"] = "hub_class_depth_cap"
+        if _indirect_sampled:
+            out["bfs_truncation_note"] = (
+                f"Full BFS capped at depth=1 (hub class: {n_direct} direct callers "
+                f">{_HUB_CALLER_THRESHOLD}). indirect_callers is a sampled estimate — "
+                f"BFS from {min(_HUB_SAMPLE_SIZE, n_direct)} random seeds at depth={_HUB_SAMPLE_DEPTH}."
+            )
+        else:
+            out["bfs_truncation_note"] = (
+                f"Indirect BFS capped at depth=1: target has {n_direct} direct callers "
+                f"(>{_HUB_CALLER_THRESHOLD} threshold). indirect_callers is empty — "
+                "no indirect callers reachable from sampled seeds (terminal sink or sparse graph). "
+                "Use a lower-fan-in entry point for full transitive traversal."
+            )
     if len(direct_callers) > 30:
         out["direct_callers_note"] = (
             f"Showing 30/{n_direct} direct callers. Use --output to inspect full IR."

sourcecode/serializer.py CHANGED Viewed

@@ -1771,24 +1771,47 @@ def _angular_analysis(sm: "SourceMap") -> "Optional[dict[str, Any]]":
                 if val and val not in route_paths:
                     route_paths.append(val)
-    # Angular version from package.json
+    # Angular version from package.json — check root first, then subdirectories.
+    # In monorepos (Java + Angular), the Angular package.json is in a subdirectory
+    # like frontend/ and not at the repo root. We probe candidate locations.
     angular_version: Optional[str] = None
-    pkg_json = root / "package.json"
-    if pkg_json.exists():
+    def _read_angular_version_from_pkg(pkg_path: Path) -> Optional[str]:
+        """Extract @angular/core version from a package.json file."""
         try:
-            pkg = _json.loads(pkg_json.read_text(encoding="utf-8", errors="replace"))
-            # Use `or {}` so explicit `null` values in package.json don't
-            # raise TypeError when unpacking (BUG-4).
+            pkg = _json.loads(pkg_path.read_text(encoding="utf-8", errors="replace"))
             deps = {
                 **(pkg.get("dependencies") or {}),
                 **(pkg.get("devDependencies") or {}),
                 **(pkg.get("peerDependencies") or {}),
             }
             av = deps.get("@angular/core")
-            if av:
-                angular_version = av.lstrip("^~>=")
+            if av and isinstance(av, str):
+                return av.lstrip("^~>=")
         except Exception:
             pass
+        return None
+    # 1. Try root package.json first (fastest, most common for pure Angular projects)
+    _root_pkg = root / "package.json"
+    if _root_pkg.exists():
+        angular_version = _read_angular_version_from_pkg(_root_pkg)
+    # 2. If not found at root, search subdirectory package.json files.
+    # Limit to ts_files-derived subdirs to avoid scanning the whole repo.
+    if angular_version is None and ts_files:
+        _candidate_dirs: set[str] = set()
+        for ts_rel in ts_files[:200]:  # sample first 200 ts files
+            parts = ts_rel.replace("\\", "/").split("/")
+            if len(parts) >= 2:
+                _candidate_dirs.add(parts[0])  # top-level subdir (e.g. "frontend")
+        for subdir in sorted(_candidate_dirs):
+            _sub_pkg = root / subdir / "package.json"
+            if _sub_pkg.exists():
+                _v = _read_angular_version_from_pkg(_sub_pkg)
+                if _v:
+                    angular_version = _v
+                    break
     # Also check angular.json for entry point
     entry_point: Optional[str] = None
@@ -1956,7 +1979,7 @@ def agent_view(sm: SourceMap, *, full: bool = False) -> dict[str, Any]:
             result["file_relevance_hint"] = (
                 f"Showing top {_fr_limit}/{_total_paths} files by score "
                 f"({'--full' if full else 'normal'} mode, bounded for signal quality). "
-                f"Use --deep for up to {compute_context_limit('deep', _FR_AGENT_CAP)} files."
+                f"Use --full for up to {compute_context_limit('full', _FR_AGENT_CAP)} files."
             )
     # ── 5. Monorepo package roles (when available), capped ───────────────────

sourcecode/summarizer.py CHANGED Viewed

@@ -203,34 +203,78 @@ class ProjectSummarizer:
         __import__("re").IGNORECASE,
     )
+    # Patterns that indicate license notices or user-facing marketing text.
+    # These describe what the product does FOR users or its licensing terms,
+    # not the codebase architecture.
+    _LICENSE_MARKETING_RE = __import__("re").compile(
+        r"\bfair[- ]use\b"                        # Fair Use license
+        r"|\bcommunity edition\b"                 # product tier labels
+        r"|\benterprise edition\b"
+        r"|\bcommercial licen[sc]e\b"
+        r"|\bsource.available\b"
+        r"|\bavailable to companies\b"            # license restriction
+        r"|\bunder \$\d+[MK]\b"                   # revenue threshold
+        r"|\bimportant:\s"                        # WARNING/IMPORTANT caveats
+        r"|\badd authentication to\b"             # user-facing "add X to Y" marketing
+        r"|\bno need to deal with\b"
+        r"|\bwith minimum effort\b"
+        r"|\bsign up\b.*\bevaluation\b"
+        r"|\bcontact us\b.*\bmore information\b",
+        __import__("re").IGNORECASE,
+    )
     def _extract_first_useful_paragraph(self, content: str) -> str | None:
+        """Extract the first paragraph that describes the project architecture, not its license or marketing."""
         import re as _re
         _BADGE_RE = _re.compile(r"^\[?!\[")  # [![badge](...)] or ![img](...)
         _LINK_ONLY_RE = _re.compile(r"^\[.*?\]\(.*?\)$")  # pure link line
-        lines: list[str] = []
+        paragraphs: list[str] = []
+        current_lines: list[str] = []
         in_code_block = False
         for raw_line in content.splitlines():
             line = raw_line.strip()
             if line.startswith("```"):
                 in_code_block = not in_code_block
                 continue
-            if in_code_block or not line or line.startswith(("#", "<!--", ">")):
-                if lines:
-                    break
+            if in_code_block:
                 continue
-            # Skip badge-only lines and pure-link lines — they are metadata, not descriptions
-            if _BADGE_RE.match(line) or (not lines and _LINK_ONLY_RE.match(line)):
+            if not line or line.startswith(("#", "<!--", ">")):
+                if current_lines:
+                    paragraphs.append(" ".join(current_lines).strip())
+                    current_lines = []
                 continue
-            lines.append(line)
-        if not lines:
-            return None
-        paragraph = " ".join(lines).strip()
-        # Reject paragraphs that are startup/setup snippets, not domain descriptions.
-        # Count how many startup signals appear; >1 means the paragraph is instructions.
-        _startup_hits = len(self._STARTUP_RE.findall(paragraph))
-        if _startup_hits >= 2:
-            return None
-        return paragraph
+            if _BADGE_RE.match(line) or _LINK_ONLY_RE.match(line):
+                if current_lines:
+                    paragraphs.append(" ".join(current_lines).strip())
+                    current_lines = []
+                continue
+            current_lines.append(line)
+        if current_lines:
+            paragraphs.append(" ".join(current_lines).strip())
+        _MD_LINK_RE = _re.compile(r"\[.+?\]\(.+?\)")
+        for paragraph in paragraphs[:6]:  # Check up to 6 paragraphs
+            if not paragraph:
+                continue
+            # Reject very short fragments (< 30 chars) — likely just a section title
+            if len(paragraph) < 30:
+                continue
+            # Reject startup/setup snippets
+            _startup_hits = len(self._STARTUP_RE.findall(paragraph))
+            if _startup_hits >= 2:
+                continue
+            # Reject license notices and user-facing marketing text
+            if self._LICENSE_MARKETING_RE.search(paragraph):
+                continue
+            # Reject link-list paragraphs (docs/navigation sections):
+            # if more than 2 markdown links dominate the paragraph, it's a nav section
+            _link_count = len(_MD_LINK_RE.findall(paragraph))
+            if _link_count > 2 and _link_count * 30 > len(paragraph):
+                continue
+            return paragraph
+        return None
     _TYPE_LABELS: dict[str, str] = {
         "cli": "CLI",
@@ -256,6 +300,7 @@ class ProjectSummarizer:
         # Stack with frameworks — keep brief, skip internal module listings
         non_tooling_stacks = self._filter_non_tooling_stacks(sm)
+        primary = None
         if non_tooling_stacks:
             primary = self._select_summary_primary_stack(non_tooling_stacks)
             frameworks = [fw.name for fw in primary.frameworks[:2]]
@@ -269,6 +314,30 @@ class ProjectSummarizer:
         if domains:
             parts.append(f"Domains: {', '.join(domains)}")
+        # Quantitative structural suffix for Java projects — adds concrete scale signals
+        # that README descriptions omit (class count, transactional boundary count).
+        if primary is not None and primary.stack.lower() == "java":
+            quant_parts: list[str] = []
+            java_files = sum(
+                1 for p in sm.file_paths if p.endswith(".java")
+            )
+            if java_files >= 50:
+                quant_parts.append(f"{java_files:,} Java classes")
+            txn_classes: list[str] = []
+            for stack in non_tooling_stacks:
+                txn_classes.extend(getattr(stack, "transactional_classes", []))
+            n_txn = len(set(txn_classes))
+            if n_txn > 0:
+                quant_parts.append(f"{n_txn} transactional boundaries")
+            ep_controllers = [
+                ep for ep in sm.entry_points
+                if ep.kind in ("controller", "rest_controller", "rest", "endpoint")
+            ]
+            if ep_controllers:
+                quant_parts.append(f"{len(ep_controllers)} controller entry points")
+            if quant_parts:
+                parts.append(", ".join(quant_parts))
         return ". ".join(parts) + "."
     def _detect_architecture_pattern(self, file_paths: list[str]) -> str | None:

{sourcecode-1.31.22.dist-info → sourcecode-1.31.24.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 1.31.22
+Version: 1.31.24
 Summary: Deterministic codebase context for AI coding agents
 License:                                  Apache License
                                    Version 2.0, January 2004
@@ -225,7 +225,7 @@ Description-Content-Type: text/markdown
 **AI-ready change intelligence for Java/Spring enterprise monoliths.**
-![Version](https://img.shields.io/badge/version-1.31.22-blue)
+![Version](https://img.shields.io/badge/version-1.31.24-blue)
 ![Python](https://img.shields.io/badge/python-3.10%2B-green)
 ---
@@ -263,7 +263,7 @@ pipx install sourcecode
 ```bash
 sourcecode version
-# sourcecode 1.31.22
+# sourcecode 1.31.24
 ```
 ---

{sourcecode-1.31.22.dist-info → sourcecode-1.31.24.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-sourcecode/__init__.py,sha256=Wsav7BZkVmw8XZqjz_WUnhLQyGjtZVwjYnyc_N4sraE,104
+sourcecode/__init__.py,sha256=OFgxQ97Ujgsq98XhK0hoPwDBX_R6E7oO8JgpjanvaHQ,104
 sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
-sourcecode/architecture_analyzer.py,sha256=4R13Yb02OrPeB4IH3z6V_g7HWhmGcRHbI8CobCVnRrc,39111
+sourcecode/architecture_analyzer.py,sha256=Ry3aYT9dc7XuLmWLT5IZ93RkCf_P14Qtew0nGPvUl_8,42184
 sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
 sourcecode/ast_extractor.py,sha256=_btmeOJIe3t-NicF94D5ZAesa2YIJ0_QNExGnbHxGFE,50578
 sourcecode/cache.py,sha256=TiYa3ECjBKtvlfCk7GvQ9v6gZkAITpH3ow9PubA7sUo,22946
 sourcecode/canonical_ir.py,sha256=NZu0XICv__hkQGKzW2LNQLRqb1L28K2p_WQCQKS5Zlk,23141
 sourcecode/classifier.py,sha256=yWeq6agTjkFa3zuNa-gdVIHtjoBoPoVlJnX-b7tdVJs,7851
-sourcecode/cli.py,sha256=qMn-4zD8v03dmkn-AZsf2TSplyhjbq9ZPMAcWl_Lrxg,147576
+sourcecode/cli.py,sha256=zykj3wNxSXAdiBIgmn6KWLdrNLHCEUrhv4YL9rlRlUE,150539
 sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
 sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
 sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -32,13 +32,13 @@ sourcecode/ranking_engine.py,sha256=ZAucq_YX2KkWUuAZf4P0lhtQ_38vEFnUhuGtSZd1S0E,
 sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
 sourcecode/relevance_scorer.py,sha256=MYF4FFkveAQps9SmTeTlh6ODiBz2F--_hWNeHMLtUHQ,8405
 sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
-sourcecode/repository_ir.py,sha256=NooCrMJYqycKSYTEroVWTYR8X83hHaAYKTsgYxvlz-I,140221
+sourcecode/repository_ir.py,sha256=sp6IdcZbFAQjznUthMBu_6Mu5RBxVP72d5Vw0hKnH7o,148437
 sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
 sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
 sourcecode/schema.py,sha256=aHNXDf8LGyUC8ZDE_VS9kiskC2-Oswhi_WnpdGy6HDw,24897
 sourcecode/semantic_analyzer.py,sha256=TDuC3wzZR2DPm1mgrAg1YSLk2QzJoueS3TZAmyGGpCU,89417
-sourcecode/serializer.py,sha256=V8ZV3Y1j4T6rkpO09-PvpVORioWWWbSnOvDjZ2hmQ2U,122144
-sourcecode/summarizer.py,sha256=lPlKhMh28nueXkPo2xKeD3DUFYVGRlJMIdY-8TSM-ls,17486
+sourcecode/serializer.py,sha256=7TzN2GLtIP3PIVatoB98_7DQdoAkUNvvNVU7Bz7r_K8,123313
+sourcecode/summarizer.py,sha256=BMHJA0Do4rBnabc1_BxHoETTNb5ew0VqCX_eY3_PdCg,20706
 sourcecode/tree_utils.py,sha256=8GAkIfQAsvtEudIeW1l4ooH_oRtrWR8cpJQJsEa_Pfw,2093
 sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
 sourcecode/detectors/__init__.py,sha256=A0AACJFF6HWf_RgatNtWu3PUzstcKtIGM9f1PoFcJug,1987
@@ -76,8 +76,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
 sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
 sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
 sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
-sourcecode-1.31.22.dist-info/METADATA,sha256=zjEDrWUQ-08LOjvIfXTDUgQ4UTPkneyr4CFGZc5yaOo,31103
-sourcecode-1.31.22.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-sourcecode-1.31.22.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
-sourcecode-1.31.22.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
-sourcecode-1.31.22.dist-info/RECORD,,
+sourcecode-1.31.24.dist-info/METADATA,sha256=y1qV8wDttJuezYPLbBUeZZwzQMWWrtJ8clEuBAchsJ0,31103
+sourcecode-1.31.24.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+sourcecode-1.31.24.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
+sourcecode-1.31.24.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
+sourcecode-1.31.24.dist-info/RECORD,,

{sourcecode-1.31.22.dist-info → sourcecode-1.31.24.dist-info}/WHEEL RENAMED Viewed

File without changes

{sourcecode-1.31.22.dist-info → sourcecode-1.31.24.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sourcecode-1.31.22.dist-info → sourcecode-1.31.24.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sourcecode 1.31.22__py3-none-any.whl → 1.31.24__py3-none-any.whl

sourcecode 1.31.22py3-none-any.whl → 1.31.24py3-none-any.whl