PyPI - sourcecode - Versions diffs - 1.33.11__tar.gz → 1.33.13__tar.gz - Mend

sourcecode 1.33.11tar.gz → 1.33.13tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

{sourcecode-1.33.11 → sourcecode-1.33.13}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 1.33.11
+Version: 1.33.13
 Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
 License-File: LICENSE
 Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -39,7 +39,7 @@ Description-Content-Type: text/markdown
 **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
-![Version](https://img.shields.io/badge/version-1.33.11-blue)
+![Version](https://img.shields.io/badge/version-1.33.12-blue)
 ![Python](https://img.shields.io/badge/python-3.10%2B-green)
 ---

{sourcecode-1.33.11 → sourcecode-1.33.13}/README.md RENAMED Viewed

@@ -2,7 +2,7 @@
 **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
-![Version](https://img.shields.io/badge/version-1.33.11-blue)
+![Version](https://img.shields.io/badge/version-1.33.12-blue)
 ![Python](https://img.shields.io/badge/python-3.10%2B-green)
 ---

{sourcecode-1.33.11 → sourcecode-1.33.13}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "sourcecode"
-version = "1.33.11"
+version = "1.33.13"
 description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
 readme = "README.md"
 requires-python = ">=3.9"

{sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "1.33.11"
+__version__ = "1.33.13"

{sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/cli.py RENAMED Viewed

@@ -1102,6 +1102,9 @@ def main(
             obj = _jm.loads(raw)
             if isinstance(obj, dict):
                 obj["_cache"] = meta
+                # Top-level cache_source for one release — backward compat alias
+                if "cache_source" in meta:
+                    obj["cache_source"] = meta["cache_source"]
                 return _jm.dumps(obj, indent=2, ensure_ascii=False)
         except Exception:
             pass
@@ -2273,6 +2276,9 @@ def _make_explanation(reason: str, why: str) -> str:
 def _serialize_relevant_file(f: Any) -> dict:
     from dataclasses import asdict as _asdict
     d = {k: v for k, v in _asdict(f).items() if v != "" and v is not None}
+    # Emit 'file' as backward-compat alias for 'path' for one release
+    if "path" in d:
+        d["file"] = d["path"]
     reason = d.pop("reason", "") or ""
     why = d.pop("why", "") or ""
     # Expose score as a rounded float so agents can rank/filter files deterministically.

{sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/output_budget.py RENAMED Viewed

@@ -67,7 +67,7 @@ _TRIM_SCHEDULE: list[tuple[str, str | None, int]] = [
     ("execution_paths",          None,                    0),
     ("dependency_graph_summary", None,                    0),
     # Step 6 — last resort
-    ("relevant_files",           None,                    3),
+    ("relevant_files",           None,                   10),
     ("suspected_areas",          None,                    0),
     ("key_dependencies",         None,                    0),
 ]
@@ -148,7 +148,7 @@ def trim_to_budget(data: dict, budget_bytes: int, *, label: str = "") -> dict:
 # Budget constants (bytes) — used by CLI callers
 BUDGET_COMPACT    = 30_000   # compact/agent main cmd
 BUDGET_AGENT      = 40_000   # agent main cmd (slightly more headroom)
-BUDGET_FIX_BUG   = 100_000  # fix-bug (with or without --symptom)
+BUDGET_FIX_BUG   = 200_000  # fix-bug (with or without --symptom)
 BUDGET_REVIEW_PR  = 100_000  # review-pr
 BUDGET_ONBOARD    = 30_000   # onboard
 BUDGET_EXPLAIN    = 30_000   # explain

{sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/prepare_context.py RENAMED Viewed

@@ -627,6 +627,21 @@ _FRONTEND_SYMPTOM_MAP: dict[str, list[str]] = {
     "trabajador": ["trabajador", "empleado", "worker", "asignacion", "trabajadordao", "trabajadorservice"],
 }
+# Generic words that add noise when used as symptom keywords in large repos.
+# "token" and "user" are too ubiquitous in auth systems to be useful alone.
+_SYMPTOM_STOP_WORDS: frozenset[str] = frozenset({
+    "fails", "fail", "failed", "failure",
+    "not", "for", "with", "when", "that", "the", "and", "but",
+    "are", "has", "had", "have", "was", "were",
+    "get", "set", "can", "does", "did", "should", "would", "could",
+    "null", "none", "empty", "invalid", "incorrect", "wrong", "missing",
+    "error", "issue", "problem", "bug",
+    "from", "into", "via", "due", "also", "after", "before",
+    "slow", "fast", "new", "old",
+})
+# Repo-scale threshold: above this file count, use stricter injection logic.
+_LARGE_REPO_THRESHOLD = 500
 MAX_FILES_FAST = 2000  # above this threshold --fast uses git-index-only mode
@@ -1695,7 +1710,7 @@ class TaskContextBuilder:
             _camel_expanded = _re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _camel_expanded)
             symptom_keywords = [
                 w.lower() for w in _re.split(r"[\s\W]+", _camel_expanded)
-                if len(w) > 2
+                if len(w) > 2 and w.lower() not in _SYMPTOM_STOP_WORDS
             ]
             if symptom_keywords:
                 # Pre-compile combined keyword pattern for fast content scanning
@@ -1710,6 +1725,7 @@ class TaskContextBuilder:
                 _sx_commits: list[dict] = []
                 _sx_synonyms: list[str] = []
                 _sx_boosts: list[dict] = []
+                _sx_graph_expanded: list[str] = []
                 # Pass 1: surface code notes whose text contains any keyword
                 _note_matched_paths: dict[str, int] = {}  # path → count of matching notes
@@ -1759,14 +1775,27 @@ class TaskContextBuilder:
                     ))
                     _existing_paths.add(_cp)
-                # Pass 4: inject files whose path matches symptom keywords
+                # Scale-awareness: large repos need wider scan and stricter injection.
+                _is_large_repo = len(all_paths) > _LARGE_REPO_THRESHOLD
+                # Pass 4: inject files whose path matches symptom keywords.
+                # CamelCase-expand the filename stem so "OfflineSessionLoader" matches
+                # the keyword "offline" even without an explicit directory separator.
+                _p4_dirs_of_injected: set[str] = set()  # directories of high-score injects
                 for _p in all_paths:
                     if _p in _existing_paths:
                         continue
                     if Path(_p).suffix.lower() not in _ALL_EXTENSIONS:
                         continue
                     _p_lower = _p.lower()
-                    _matching_kws = [kw for kw in symptom_keywords if kw in _p_lower]
+                    # CamelCase-expand the stem and append to the search string so
+                    # "OfflineSessionLoader" → "offline session loader" can match
+                    # individual keyword tokens beyond what substring search finds.
+                    _stem_raw = Path(_p).stem
+                    _stem_exp = _re.sub(r'([a-z])([A-Z])', r'\1 \2', _stem_raw)
+                    _stem_exp = _re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _stem_exp).lower()
+                    _p_search = _p_lower + " " + _stem_exp
+                    _matching_kws = [kw for kw in symptom_keywords if kw in _p_search]
                     if not _matching_kws:
                         continue
                     _boost = 0.2 * len(_matching_kws)
@@ -1781,6 +1810,8 @@ class TaskContextBuilder:
                     ))
                     _existing_paths.add(_p)
                     _sx_direct_path.append(_p)
+                    if _injected_score >= 0.7:
+                        _p4_dirs_of_injected.add(str(Path(_p).parent))
                 # Pass 4b: grep-based injection for frontend→backend synonym terms.
                 # Runs parallel grep for each backend term to find files not yet in
@@ -1828,13 +1859,46 @@ class TaskContextBuilder:
                         ))
                         _existing_paths_now.add(_gf)
-                # Sort before content scan so top candidates get read first
-                relevant_files = sorted(relevant_files, key=lambda rf: -rf.score)
-                _CONTENT_SCAN_LIMIT = 80
+                # Pass 4c: subsystem co-location — inject sibling files from the same
+                # directories as high-score (≥0.7) path-matched files. This catches
+                # architecturally adjacent classes that don't mention symptom keywords
+                # in their own name (e.g. InfinispanOfflineSessionCacheEntryLifespan…
+                # siblings in the same infinispan/ package).
+                if _is_large_repo and _p4_dirs_of_injected:
+                    _coloc_existing = {rf.path for rf in relevant_files}
+                    for _cp in all_paths:
+                        if _cp in _coloc_existing:
+                            continue
+                        if Path(_cp).suffix.lower() not in _src_exts:
+                            continue
+                        if str(Path(_cp).parent) in _p4_dirs_of_injected:
+                            relevant_files.append(RelevantFile(
+                                path=_cp,
+                                role="symptom_match",
+                                score=0.55,
+                                reason="subsystem co-location: same directory as symptom-matched file",
+                                why="directory proximity injection",
+                            ))
+                            _coloc_existing.add(_cp)
+                # Sort before content scan so top candidates get read first.
+                # In large repos: prioritise symptom_match files within each score band
+                # so that subsystem-relevant files are content-scanned before generic
+                # structural files at the same score.
+                if _is_large_repo:
+                    relevant_files = sorted(
+                        relevant_files,
+                        key=lambda rf: (-rf.score, 0 if rf.role == "symptom_match" else 1),
+                    )
+                    _CONTENT_SCAN_LIMIT = 150
+                else:
+                    relevant_files = sorted(relevant_files, key=lambda rf: -rf.score)
+                    _CONTENT_SCAN_LIMIT = 80
                 _scan_candidates = relevant_files[:_CONTENT_SCAN_LIMIT]
                 _no_scan_candidates = relevant_files[_CONTENT_SCAN_LIMIT:]
                 _boosted: list[RelevantFile] = []
+                _scanned_body: dict[str, str] = {}  # cache for graph expansion (Pass 5)
                 for _rf in _scan_candidates:
                     _extra = 0.0
                     _extra_syn = 0.0
@@ -1869,9 +1933,11 @@ class TaskContextBuilder:
                     _body_lower = ""
                     if Path(_rf.path).suffix.lower() in _src_exts:
                         try:
-                            _body_lower = (self.root / _rf.path).read_text(
+                            _raw_body = (self.root / _rf.path).read_text(
                                 encoding="utf-8", errors="replace"
-                            )[:12000].lower()  # ~300 lines avg
+                            )[:12000]  # ~300 lines avg
+                            _scanned_body[_rf.path] = _raw_body  # cache for Pass 5
+                            _body_lower = _raw_body.lower()
                         except OSError:
                             pass
@@ -1905,15 +1971,130 @@ class TaskContextBuilder:
                     elif _extra_syn > 0:
                         _new_reason = _rf.reason + f", synonym-match backend (+{_extra_syn:.2f})"
+                    _final_score = round(min(_rf.score + _total_extra, 1.0), 2)
                     _boosted.append(RelevantFile(
                         path=_rf.path,
                         role=_rf.role,
-                        score=round(min(_rf.score + _total_extra, 1.0), 2),
+                        score=_final_score,
                         reason=_new_reason,
                         why=_rf.why,
                     ))
-                relevant_files = sorted(_boosted + _no_scan_candidates, key=lambda rf: -rf.score)
+                # Use total boost as a secondary sort key so symptom-matched files
+                # that were boosted from a lower base score rank above structural
+                # files that coincidentally reach the same capped score of 1.0.
+                # This prevents budget-trimming from discarding the most relevant files.
+                _boost_totals: dict[str, float] = {}
+                for _rf in _scan_candidates:
+                    pass  # populated below
+                _boost_totals = {}
+                for _idx, _rf in enumerate(_scan_candidates):
+                    _b_rf = _boosted[_idx]
+                    _boost_totals[_b_rf.path] = round(_b_rf.score - _rf.score, 4)
+                relevant_files = sorted(
+                    _boosted + _no_scan_candidates,
+                    key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
+                )
+                # Pass 5: reverse graph expansion from high-score seed nodes.
+                # Identifies which source files in the repo REFERENCE the seed
+                # classes (imports, implements, extends, field declarations).
+                # This is a reverse-import lookup: for seed class "UserProvider",
+                # it finds JpaUserProvider / DefaultUserSessionProvider which import
+                # UserProvider — even though those files don't contain symptom
+                # keywords in their own path.
+                # Seeds include any high-score file (not just symptom_match role)
+                # so that files found by _rank_files class-name matching also expand.
+                if not fast:
+                    import re as _re_gx
+                    _GX_SEED_THRESH = 0.5
+                    _GX_EXPAND_CAP = 30
+                    _GX_HOP_DECAY = 0.6
+                    # Collect seed class names from high-score results
+                    _gx_seed_stems: dict[str, float] = {}  # stem → score
+                    for _gx_rf in relevant_files:
+                        if _gx_rf.score < _GX_SEED_THRESH:
+                            continue
+                        if Path(_gx_rf.path).suffix.lower() not in _src_exts:
+                            continue
+                        _gx_stem = Path(_gx_rf.path).stem
+                        _gx_seed_stems[_gx_stem] = max(
+                            _gx_seed_stems.get(_gx_stem, 0.0), _gx_rf.score
+                        )
+                    if _gx_seed_stems:
+                        # Compile per-stem word-boundary patterns for fast matching
+                        import re as _re_gx2
+                        _gx_patterns: dict[str, Any] = {
+                            stem: _re_gx2.compile(rf'\b{_re_gx2.escape(stem)}\b')
+                            for stem in _gx_seed_stems
+                        }
+                        _gx_existing = {rf.path for rf in relevant_files}
+                        _gx_new: list[RelevantFile] = []
+                        _gx_added: set[str] = set()
+                        # Candidates: non-test source files not yet in results.
+                        # Small repos: scan all; large repos: use pre-scanned content only.
+                        # Test files are excluded (fix-bug focuses on production code).
+                        if _is_large_repo:
+                            _gx_candidates = [
+                                p for p in _scanned_body
+                                if p not in _gx_existing and not self._is_test(p)
+                            ]
+                        else:
+                            _gx_candidates = [
+                                p for p in all_paths
+                                if p not in _gx_existing
+                                and Path(p).suffix.lower() in _src_exts
+                                and not self._is_test(p)
+                            ]
+                        for _gx_cand in _gx_candidates:
+                            if len(_gx_new) >= _GX_EXPAND_CAP:
+                                break
+                            if _gx_cand in _gx_added:
+                                continue
+                            # Use cached content or read fresh (small repos only)
+                            _gx_body = _scanned_body.get(_gx_cand)
+                            if _gx_body is None:
+                                if _is_large_repo:
+                                    continue  # never do fresh reads on large repos in Pass 5
+                                try:
+                                    _gx_body = (self.root / _gx_cand).read_text(
+                                        encoding="utf-8", errors="replace"
+                                    )[:8000]
+                                except OSError:
+                                    continue
+                            # Reverse lookup: does this file reference any seed class?
+                            for _gx_stem, _gx_seed_score in _gx_seed_stems.items():
+                                if _gx_patterns[_gx_stem].search(_gx_body):
+                                    _hop1_score = round(
+                                        min(_gx_seed_score * _GX_HOP_DECAY, 0.85), 2
+                                    )
+                                    _gx_new.append(RelevantFile(
+                                        path=_gx_cand,
+                                        role="symptom_match",
+                                        score=_hop1_score,
+                                        reason=(
+                                            f"graph_expansion: references {_gx_stem} "
+                                            f"(1-hop reverse import)"
+                                        ),
+                                        why=f"graph_expansion: 1 hop from {_gx_stem}",
+                                    ))
+                                    _gx_added.add(_gx_cand)
+                                    _sx_graph_expanded.append(_gx_cand)
+                                    break  # one match per candidate is enough
+                        if _gx_new:
+                            relevant_files = sorted(
+                                relevant_files + _gx_new,
+                                key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
+                            )
                 # Synonym note (only when synonyms actually fired)
                 if _frontend_kws and _sx_synonyms:
@@ -1938,6 +2119,7 @@ class TaskContextBuilder:
                     "content_matches": _sx_content[:10],
                     "commit_matches": _sx_commits[:10],
                     "synonym_matches": _sx_synonyms[:10],
+                    "graph_expansion": _sx_graph_expanded[:10],
                     "boosts": _sx_boosts[:30],
                     "final_boost": round(
                         sum(b["value"] for b in _sx_boosts), 3
@@ -2390,7 +2572,8 @@ class TaskContextBuilder:
                     else:
                         _symptom_class_names.add(_tok)
                 _symptom_tokens = {
-                    w.lower() for w in _re_bug.split(r'[\s\W]+', symptom) if len(w) > 2
+                    w.lower() for w in _re_bug.split(r'[\s\W]+', symptom)
+                    if len(w) > 2 and w.lower() not in _SYMPTOM_STOP_WORDS
                 }
         scored: list[tuple[float, str, RelevantFile]] = []
@@ -2487,9 +2670,16 @@ class TaskContextBuilder:
                             content_boost += 0.8
                             _why_parts.append("exception type in path (+0.8)")
-                # AND-weighted token intersection — multiple matching tokens >> single
+                # AND-weighted token intersection — multiple matching tokens >> single.
+                # CamelCase-expand the filename stem so "OfflineSessionLoader" contributes
+                # "offline", "session", "loader" as individual tokens beyond what the raw
+                # path splitting yields. This lets multi-word symptoms match class names.
                 if _symptom_tokens:
                     _path_parts = set(path_lower.replace("/", " ").replace(".", " ").replace("_", " ").split())
+                    _stem_cc = Path(path).stem
+                    _stem_cc_exp = _re_bug.sub(r'([a-z])([A-Z])', r'\1 \2', _stem_cc)
+                    _stem_cc_exp = _re_bug.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _stem_cc_exp).lower()
+                    _path_parts.update(_stem_cc_exp.split())
                     _intersection = _symptom_tokens & _path_parts
                     _n_match = len(_intersection)
                     if _n_match >= 3:

{sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/repository_ir.py RENAMED Viewed

@@ -888,15 +888,40 @@ def _extract_mapped_paths(source: str, class_fqn: str) -> dict[str, str]:
 # Phase 3 — Symbol relation graph
 # ---------------------------------------------------------------------------
+def _build_same_package_map(symbols: list[SymbolRecord]) -> dict[str, dict[str, str]]:
+    """Build {package: {simple_name: FQN}} map from all class/interface symbols.
+    Used by build_repo_ir to resolve same-package types that need no explicit import.
+    In Java, classes in the same package reference each other without import statements,
+    so import_map is empty for them — this map provides the fallback resolution.
+    """
+    result: dict[str, dict[str, str]] = {}
+    for sym in symbols:
+        if sym.type not in ("class", "interface") or "#" in sym.symbol:
+            continue
+        pkg = sym.symbol.rsplit(".", 1)[0] if "." in sym.symbol else ""
+        simple = sym.symbol.split(".")[-1]
+        result.setdefault(pkg, {})[simple] = sym.symbol
+    return result
 def _build_relations(
     symbols: list[SymbolRecord],
     raw_imports: list[str],
     source: str,
     package: str,
     rel_path: str,
+    same_pkg_types: dict[str, str] | None = None,
 ) -> list[RelationEdge]:
-    """Phase 3: Build directed relation graph for symbols in one file."""
+    """Phase 3: Build directed relation graph for symbols in one file.
+    same_pkg_types: {simple_name → FQN} for classes in the same package.
+    Passed by build_repo_ir after a first pass that collects all symbols.
+    Enables resolving injection targets that share a package with the caller
+    and therefore need no explicit Java import statement.
+    """
     edges: list[RelationEdge] = []
+    _same_pkg: dict[str, str] = same_pkg_types or {}
     import_map: dict[str, str] = {}
     for fqn in raw_imports:
@@ -929,15 +954,27 @@ def _build_relations(
                 ))
         if sym.type == "field":
-            for imp_fqn in sym.imports_used:
+            _inject_ann = next(
+                (a for a in sym.annotations if a in _INJECT_ANNOTATIONS), "@Autowired"
+            )
+            _field_targets: set[str] = set(sym.imports_used)
+            # Same-package field injection: imports_used is empty when the field type
+            # shares a package with the declaring class (no import needed in Java).
+            # Extract type from signature ("Type name") and resolve via same_pkg_types.
+            if not _field_targets and _same_pkg:
+                _sig_type = (sym.signature or "").split()[0] if sym.signature else ""
+                _sig_base = re.sub(r'<.*', '', _sig_type).strip()
+                if _sig_base and _sig_base[0].isupper():
+                    _same_fqn = _same_pkg.get(_sig_base)
+                    if _same_fqn and _same_fqn != _enclosing_class(sym_fqn):
+                        _field_targets.add(_same_fqn)
+            for imp_fqn in _field_targets:
                 edges.append(RelationEdge(
                     from_symbol=sym_fqn,
                     to_symbol=imp_fqn,
                     type="injects",
                     confidence="high",
-                    evidence={"type": "annotation", "value": next(
-                        (a for a in sym.annotations if a in _INJECT_ANNOTATIONS), "@Autowired"
-                    )},
+                    evidence={"type": "annotation", "value": _inject_ann},
                 ))
     # ── Constructor injection ─────────────────────────────────────────────────
@@ -949,7 +986,7 @@ def _build_relations(
             continue
         for simple_type in sym.param_types:
             base = re.sub(r'<.*', '', simple_type).strip()
-            fqn = import_map.get(base)
+            fqn = import_map.get(base) or _same_pkg.get(base)
             if fqn:
                 edges.append(RelationEdge(
                     from_symbol=sym.symbol,
@@ -982,7 +1019,7 @@ def _build_relations(
                 continue
             _ftype = fld.group("type").strip()
             _base = re.sub(r'<.*', '', _ftype).strip()
-            _fqn = import_map.get(_base)
+            _fqn = import_map.get(_base) or _same_pkg.get(_base)
             if _fqn:
                 edges.append(RelationEdge(
                     from_symbol=sym.symbol,
@@ -2632,24 +2669,38 @@ def build_repo_ir(
     if since:
         _since_changed = _get_git_changed_files(root, since)
+    # Pass 1: extract symbols from all files so we can build the same-package
+    # type map before building relations.  Java classes in the same package
+    # reference each other without import statements, so import_map alone cannot
+    # resolve them — _build_same_package_map provides the cross-file fallback.
+    _per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
     for rel_path in sorted(file_paths):
         abs_path = root / rel_path
         try:
             source = abs_path.read_text(encoding="utf-8", errors="replace")
         except OSError:
             continue
+        package, symbols, raw_imports = _extract_symbols(source, rel_path)
+        all_symbols.extend(symbols)
+        _per_file.append((rel_path, source, package, raw_imports, symbols))
+    # Build {package: {simple_name: FQN}} from every class/interface found.
+    _same_pkg_map: dict[str, dict[str, str]] = _build_same_package_map(all_symbols)
+    # Pass 2: build relations with same-package type resolution available.
+    for rel_path, source, package, raw_imports, symbols in _per_file:
+        same_pkg_types = _same_pkg_map.get(package, {})
+        relations = _build_relations(
+            symbols, raw_imports, source, package, rel_path,
+            same_pkg_types=same_pkg_types,
+        )
         old_source: Optional[str] = None
         if since:
-            # Only fetch old content for files known to have changed.
-            # Unchanged files have no diff entries — skip git show entirely.
             _file_changed = _since_changed is None or rel_path in _since_changed
             if _file_changed:
                 old_source = _get_git_old_content(root, rel_path, since)
-        package, symbols, raw_imports = _extract_symbols(source, rel_path)
-        relations = _build_relations(symbols, raw_imports, source, package, rel_path)
         if old_source is not None:
             _, old_symbols, _ = _extract_symbols(old_source, rel_path)
             all_changed.extend(_diff_symbols(old_symbols, symbols))
@@ -2664,7 +2715,6 @@ def build_repo_ir(
                     confidence="high",
                 ))
-        all_symbols.extend(symbols)
         all_relations.extend(relations)
     spring_summary = _build_spring_summary(all_symbols)