sourcecode 1.32.3__py3-none-any.whl → 1.32.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/cache.py +109 -9
- sourcecode/cache.tmp_new +772 -0
- sourcecode/cli.py +57 -35
- {sourcecode-1.32.3.dist-info → sourcecode-1.32.4.dist-info}/METADATA +1 -187
- {sourcecode-1.32.3.dist-info → sourcecode-1.32.4.dist-info}/RECORD +9 -8
- {sourcecode-1.32.3.dist-info → sourcecode-1.32.4.dist-info}/WHEEL +0 -0
- {sourcecode-1.32.3.dist-info → sourcecode-1.32.4.dist-info}/entry_points.txt +0 -0
- {sourcecode-1.32.3.dist-info → sourcecode-1.32.4.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/cache.py
CHANGED
|
@@ -92,6 +92,8 @@ _CAS_FIELDS: frozenset[str] = frozenset([
|
|
|
92
92
|
_CAS_THRESHOLD: int = 4096
|
|
93
93
|
|
|
94
94
|
_DEFAULT_KEEP_COMMITS: int = 5
|
|
95
|
+
_DEFAULT_MAX_CORES: int = 20
|
|
96
|
+
_DEFAULT_MAX_SIZE_MB: int = 50
|
|
95
97
|
|
|
96
98
|
# Matches "snapshot-<hex_commit>-<hex_flags>.json.gz"
|
|
97
99
|
_SNAPSHOT_RE = re.compile(r"^snapshot-([0-9a-f]+)-[0-9a-f]+\.json\.gz$")
|
|
@@ -124,6 +126,58 @@ def cache_dir(repo_root: Path) -> Path:
|
|
|
124
126
|
return base / repo_id(repo_root)
|
|
125
127
|
|
|
126
128
|
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
# Public API — observability
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
def status(repo_root: Path) -> dict[str, Any]:
|
|
134
|
+
"""Return a stats dict describing the current cache state for *repo_root*.
|
|
135
|
+
|
|
136
|
+
Keys: ``cache_dir``, ``cores``, ``snapshots``, ``views``, ``cas_blobs``,
|
|
137
|
+
``total_size_bytes``, ``total_size_mb``.
|
|
138
|
+
"""
|
|
139
|
+
cache_d = cache_dir(repo_root)
|
|
140
|
+
if not cache_d.exists():
|
|
141
|
+
return {
|
|
142
|
+
"cache_dir": str(cache_d),
|
|
143
|
+
"cores": 0, "snapshots": 0, "views": 0, "cas_blobs": 0,
|
|
144
|
+
"total_size_bytes": 0, "total_size_mb": 0.0,
|
|
145
|
+
}
|
|
146
|
+
cores = list(cache_d.glob("core-*.json.gz"))
|
|
147
|
+
snapshots = list(cache_d.glob("snapshot-*.json.gz"))
|
|
148
|
+
views = list(cache_d.glob("view-*.json.gz"))
|
|
149
|
+
cas_blobs = list((_cas_dir(cache_d)).glob("*.gz")) if _cas_dir(cache_d).exists() else []
|
|
150
|
+
all_files = cores + snapshots + views + cas_blobs
|
|
151
|
+
total_bytes = sum(f.stat().st_size for f in all_files if f.exists())
|
|
152
|
+
return {
|
|
153
|
+
"cache_dir": str(cache_d),
|
|
154
|
+
"cores": len(cores),
|
|
155
|
+
"snapshots": len(snapshots),
|
|
156
|
+
"views": len(views),
|
|
157
|
+
"cas_blobs": len(cas_blobs),
|
|
158
|
+
"total_size_bytes": total_bytes,
|
|
159
|
+
"total_size_mb": round(total_bytes / (1024 * 1024), 2),
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def clear(repo_root: Path) -> int:
|
|
164
|
+
"""Delete all cache files for *repo_root*. Returns the number of files removed."""
|
|
165
|
+
cache_d = cache_dir(repo_root)
|
|
166
|
+
if not cache_d.exists():
|
|
167
|
+
return 0
|
|
168
|
+
removed = 0
|
|
169
|
+
for pattern in ("core-*.json.gz", "snapshot-*.json.gz", "view-*.json.gz"):
|
|
170
|
+
for f in cache_d.glob(pattern):
|
|
171
|
+
_safe_unlink(f)
|
|
172
|
+
removed += 1
|
|
173
|
+
cas_d = _cas_dir(cache_d)
|
|
174
|
+
if cas_d.exists():
|
|
175
|
+
for f in cas_d.glob("*.gz"):
|
|
176
|
+
_safe_unlink(f)
|
|
177
|
+
removed += 1
|
|
178
|
+
return removed
|
|
179
|
+
|
|
180
|
+
|
|
127
181
|
# ---------------------------------------------------------------------------
|
|
128
182
|
# Public API — read / write
|
|
129
183
|
# ---------------------------------------------------------------------------
|
|
@@ -193,7 +247,7 @@ def write(
|
|
|
193
247
|
try:
|
|
194
248
|
cache_d.mkdir(parents=True, exist_ok=True)
|
|
195
249
|
payload = _build_envelope(cache_key, content, fmt, layers or {}, cache_d)
|
|
196
|
-
dest
|
|
250
|
+
_atomic_write(dest, payload)
|
|
197
251
|
except Exception:
|
|
198
252
|
return # non-fatal
|
|
199
253
|
|
|
@@ -275,7 +329,7 @@ def write_core(repo_root: Path, core_key: str, core_data: dict[str, Any]) -> str
|
|
|
275
329
|
json.dumps(envelope, ensure_ascii=False).encode("utf-8"),
|
|
276
330
|
compresslevel=6,
|
|
277
331
|
)
|
|
278
|
-
dest
|
|
332
|
+
_atomic_write(dest, payload)
|
|
279
333
|
except Exception:
|
|
280
334
|
pass
|
|
281
335
|
|
|
@@ -327,7 +381,7 @@ def write_view(
|
|
|
327
381
|
try:
|
|
328
382
|
cache_d.mkdir(parents=True, exist_ok=True)
|
|
329
383
|
payload = _build_envelope(view_key, content, fmt, layers or {}, cache_d)
|
|
330
|
-
dest
|
|
384
|
+
_atomic_write(dest, payload)
|
|
331
385
|
except Exception:
|
|
332
386
|
pass
|
|
333
387
|
|
|
@@ -529,12 +583,16 @@ def _cas_restore(
|
|
|
529
583
|
def _gc(cache_d: Path) -> None:
|
|
530
584
|
"""Evict old snapshots/cores/views and sweep orphaned CAS blobs.
|
|
531
585
|
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
586
|
+
Three eviction passes (all non-fatal):
|
|
587
|
+
1. Commit-based: keep only last SOURCECODE_CACHE_KEEP_COMMITS distinct SHAs.
|
|
588
|
+
2. Core-count: keep at most SOURCECODE_CACHE_MAX_CORES core files (LRU).
|
|
589
|
+
3. Size-based: if total cache exceeds SOURCECODE_CACHE_MAX_SIZE_MB, evict
|
|
590
|
+
oldest core+snapshot files until under budget.
|
|
591
|
+
Views and CAS blobs are swept after each pass.
|
|
536
592
|
"""
|
|
537
593
|
keep = int(os.environ.get("SOURCECODE_CACHE_KEEP_COMMITS", _DEFAULT_KEEP_COMMITS))
|
|
594
|
+
max_cores = int(os.environ.get("SOURCECODE_CACHE_MAX_CORES", _DEFAULT_MAX_CORES))
|
|
595
|
+
max_size_bytes = int(os.environ.get("SOURCECODE_CACHE_MAX_SIZE_MB", _DEFAULT_MAX_SIZE_MB)) * 1024 * 1024
|
|
538
596
|
|
|
539
597
|
try:
|
|
540
598
|
all_snapshots = list(cache_d.glob("snapshot-*.json.gz"))
|
|
@@ -544,7 +602,7 @@ def _gc(cache_d: Path) -> None:
|
|
|
544
602
|
if not all_snapshots and not all_cores and not all_views:
|
|
545
603
|
return
|
|
546
604
|
|
|
547
|
-
#
|
|
605
|
+
# ── Pass 1: commit-based eviction ──────────────────────────────────
|
|
548
606
|
groups: dict[str, list[Path]] = {}
|
|
549
607
|
for f in all_snapshots:
|
|
550
608
|
m = _SNAPSHOT_RE.match(f.name)
|
|
@@ -558,7 +616,6 @@ def _gc(cache_d: Path) -> None:
|
|
|
558
616
|
surviving: list[Path]
|
|
559
617
|
|
|
560
618
|
if keep <= 0 or len(groups) <= keep:
|
|
561
|
-
# No eviction needed — but still sweep views + CAS
|
|
562
619
|
surviving = all_snapshots + all_cores
|
|
563
620
|
else:
|
|
564
621
|
def _newest_mtime(commit: str) -> float:
|
|
@@ -573,7 +630,33 @@ def _gc(cache_d: Path) -> None:
|
|
|
573
630
|
for f in groups[commit]:
|
|
574
631
|
_safe_unlink(f)
|
|
575
632
|
|
|
633
|
+
# ── Pass 2: per-repo core count cap ────────────────────────────────
|
|
634
|
+
if max_cores > 0:
|
|
635
|
+
surviving_cores = [p for p in surviving if p.name.startswith("core-") and p.exists()]
|
|
636
|
+
if len(surviving_cores) > max_cores:
|
|
637
|
+
surviving_cores.sort(key=lambda p: p.stat().st_mtime, reverse=True)
|
|
638
|
+
for evict in surviving_cores[max_cores:]:
|
|
639
|
+
_safe_unlink(evict)
|
|
640
|
+
surviving = [p for p in surviving if p != evict]
|
|
641
|
+
|
|
642
|
+
# ── Pass 3: total size cap ──────────────────────────────────────────
|
|
643
|
+
if max_size_bytes > 0:
|
|
644
|
+
size_candidates = [p for p in surviving if p.exists()]
|
|
645
|
+
total = sum(p.stat().st_size for p in size_candidates if not p.name.startswith("view-"))
|
|
646
|
+
if total > max_size_bytes:
|
|
647
|
+
# Sort oldest-first; evict core+snapshot files until under budget
|
|
648
|
+
size_candidates.sort(key=lambda p: p.stat().st_mtime)
|
|
649
|
+
for evict in size_candidates:
|
|
650
|
+
if evict.name.startswith("view-"):
|
|
651
|
+
continue
|
|
652
|
+
total -= evict.stat().st_size if evict.exists() else 0
|
|
653
|
+
_safe_unlink(evict)
|
|
654
|
+
surviving = [p for p in surviving if p != evict]
|
|
655
|
+
if total <= max_size_bytes:
|
|
656
|
+
break
|
|
657
|
+
|
|
576
658
|
# Prune view files whose core hash is no longer in the surviving set
|
|
659
|
+
all_views = list(cache_d.glob("view-*.json.gz"))
|
|
577
660
|
_gc_views(cache_d, surviving, all_views)
|
|
578
661
|
|
|
579
662
|
# Sweep orphaned CAS blobs (surviving snapshots + view files may ref them)
|
|
@@ -648,6 +731,23 @@ def _gc_cas(cache_d: Path, surviving_snapshots: list[Path]) -> None:
|
|
|
648
731
|
# Utilities
|
|
649
732
|
# ---------------------------------------------------------------------------
|
|
650
733
|
|
|
734
|
+
def _atomic_write(dest: Path, data: bytes) -> None:
|
|
735
|
+
"""Write *data* to *dest* atomically via a sibling .tmp file + rename.
|
|
736
|
+
|
|
737
|
+
On POSIX, ``Path.replace()`` is a single ``rename(2)`` syscall — the
|
|
738
|
+
destination either has the old content or the new content, never a partial
|
|
739
|
+
write. The .tmp suffix keeps the partial file out of glob patterns used
|
|
740
|
+
by the cache reader and GC.
|
|
741
|
+
"""
|
|
742
|
+
tmp = dest.with_suffix(".tmp")
|
|
743
|
+
try:
|
|
744
|
+
tmp.write_bytes(data)
|
|
745
|
+
tmp.replace(dest)
|
|
746
|
+
except Exception:
|
|
747
|
+
_safe_unlink(tmp)
|
|
748
|
+
raise
|
|
749
|
+
|
|
750
|
+
|
|
651
751
|
def _safe_unlink(path: Path) -> None:
|
|
652
752
|
try:
|
|
653
753
|
path.unlink(missing_ok=True)
|
sourcecode/cache.tmp_new
ADDED
|
@@ -0,0 +1,772 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Snapshot cache manager for sourcecode — v2.
|
|
3
|
+
|
|
4
|
+
Cache layout
|
|
5
|
+
------------
|
|
6
|
+
~/.sourcecode/cache/<repo_id>/
|
|
7
|
+
snapshot-<git_sha>-<flags_hash>.json.gz ← versioned envelope
|
|
8
|
+
core-<git_sha>-<flags_hash>.json.gz ← L1 core analysis
|
|
9
|
+
view-<core_hash16>-<view_flags_hash>.json.gz ← L2 derived view
|
|
10
|
+
cas/
|
|
11
|
+
<blob_hash16>.gz ← content-addressed blobs
|
|
12
|
+
|
|
13
|
+
Schema
|
|
14
|
+
------
|
|
15
|
+
Every snapshot file is a gzip-compressed JSON *envelope*:
|
|
16
|
+
|
|
17
|
+
{
|
|
18
|
+
"sv": "2", // schema version — bump to invalidate all
|
|
19
|
+
"key": "abc1234-aabbccdd", // cache key (git_sha + flags_hash)
|
|
20
|
+
"ts": "2026-05-24T22:00:00Z", // write timestamp (ISO-8601 UTC)
|
|
21
|
+
"fmt": "json", // output format: "json" | "yaml"
|
|
22
|
+
"layers": {"heuristic": "...", ...}, // analyzer fingerprints at write time
|
|
23
|
+
// ── content (one of two forms) ──────────────────────────────────────
|
|
24
|
+
"snap": {...}, // inline fields (small) — JSON mode
|
|
25
|
+
"cas": {"file_paths": "<h16>",…} // large fields deduped into CAS store
|
|
26
|
+
// — OR —
|
|
27
|
+
"raw": "<content string>" // YAML or unparseable JSON stored as-is
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
Content-addressed store (CAS)
|
|
31
|
+
-----------------------------
|
|
32
|
+
Large top-level JSON fields (> _CAS_THRESHOLD bytes) are extracted into the
|
|
33
|
+
``cas/`` directory as individual gzip-compressed blobs identified by a 16-char
|
|
34
|
+
SHA-256 hash of their uncompressed bytes. Two snapshots that share an
|
|
35
|
+
identical ``file_paths`` array reference the *same* blob — zero duplication.
|
|
36
|
+
|
|
37
|
+
Eviction / GC
|
|
38
|
+
-------------
|
|
39
|
+
After each write, ``_gc()`` keeps snapshots from the last
|
|
40
|
+
``SOURCECODE_CACHE_KEEP_COMMITS`` distinct git commits (default 5, override via
|
|
41
|
+
env var). A CAS sweep runs concurrently: blobs unreferenced by any surviving
|
|
42
|
+
snapshot are deleted.
|
|
43
|
+
|
|
44
|
+
Backward compatibility
|
|
45
|
+
----------------------
|
|
46
|
+
v1 files (raw gzip'd content, no envelope) are detected by the absence of an
|
|
47
|
+
``sv`` key in the decompressed JSON, and served transparently. Legacy files
|
|
48
|
+
in ``<repo>/.sourcecode-cache/`` are also checked as a final fallback.
|
|
49
|
+
|
|
50
|
+
Env vars
|
|
51
|
+
--------
|
|
52
|
+
SOURCECODE_CACHE_DIR Override global cache base (default: ~/.sourcecode/cache)
|
|
53
|
+
SOURCECODE_CACHE_KEEP_COMMITS How many git commits to retain (default: 5; 0 = unlimited)
|
|
54
|
+
SOURCECODE_CACHE_MAX_CORES Max L1 core files to retain (default: 20; 0 = unlimited)
|
|
55
|
+
SOURCECODE_CACHE_MAX_SIZE_MB Max total cache size in MB (default: 50; 0 = unlimited)
|
|
56
|
+
"""
|
|
57
|
+
from __future__ import annotations
|
|
58
|
+
|
|
59
|
+
import gzip
|
|
60
|
+
import hashlib
|
|
61
|
+
import json
|
|
62
|
+
import os
|
|
63
|
+
import re
|
|
64
|
+
from datetime import datetime, timezone
|
|
65
|
+
from pathlib import Path
|
|
66
|
+
from typing import Any, Optional
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
# Version / constants
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
#: Bump this string to invalidate *all* existing cached snapshots.
|
|
74
|
+
SCHEMA_VERSION: str = "2"
|
|
75
|
+
|
|
76
|
+
#: Bump to invalidate all L1 core caches (independent of snapshot version).
|
|
77
|
+
CORE_SCHEMA_VERSION: str = "1"
|
|
78
|
+
|
|
79
|
+
#: Fields eligible for CAS deduplication (applied to top-level JSON dict keys).
|
|
80
|
+
_CAS_FIELDS: frozenset[str] = frozenset([
|
|
81
|
+
"file_paths",
|
|
82
|
+
"entry_points",
|
|
83
|
+
"docs",
|
|
84
|
+
"dependencies",
|
|
85
|
+
"graph",
|
|
86
|
+
"semantic_calls",
|
|
87
|
+
"semantic_symbols",
|
|
88
|
+
"architecture",
|
|
89
|
+
"metrics",
|
|
90
|
+
"git_history",
|
|
91
|
+
"env_map",
|
|
92
|
+
"code_notes",
|
|
93
|
+
])
|
|
94
|
+
|
|
95
|
+
#: Serialised size threshold (bytes) above which a field is moved to CAS.
|
|
96
|
+
_CAS_THRESHOLD: int = 4096
|
|
97
|
+
|
|
98
|
+
_DEFAULT_KEEP_COMMITS: int = 5
|
|
99
|
+
_DEFAULT_MAX_CORES: int = 20
|
|
100
|
+
_DEFAULT_MAX_SIZE_MB: int = 50
|
|
101
|
+
|
|
102
|
+
# Matches "snapshot-<hex_commit>-<hex_flags>.json.gz"
|
|
103
|
+
_SNAPSHOT_RE = re.compile(r"^snapshot-([0-9a-f]+)-[0-9a-f]+\.json\.gz$")
|
|
104
|
+
|
|
105
|
+
# Matches "core-<hex_commit>-<hex_analysis>.json.gz"
|
|
106
|
+
_CORE_RE = re.compile(r"^core-([0-9a-f]+)-[0-9a-f]+\.json\.gz$")
|
|
107
|
+
|
|
108
|
+
# Matches "view-<hex_core_hash16>-<hex_view_flags>.json.gz"
|
|
109
|
+
_VIEW_RE = re.compile(r"^view-([0-9a-f]{16})-[0-9a-f]+\.json\.gz$")
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
# Public API — location helpers
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def repo_id(repo_root: Path) -> str:
|
|
118
|
+
"""Stable 16-char hex identifier derived from the canonical repo path."""
|
|
119
|
+
return hashlib.sha256(str(repo_root.resolve()).encode()).hexdigest()[:16]
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def cache_dir(repo_root: Path) -> Path:
|
|
123
|
+
"""
|
|
124
|
+
Return the per-repo cache directory (``~/.sourcecode/cache/<repo_id>/``).
|
|
125
|
+
|
|
126
|
+
Override the base via ``SOURCECODE_CACHE_DIR``.
|
|
127
|
+
"""
|
|
128
|
+
env_base = os.environ.get("SOURCECODE_CACHE_DIR", "")
|
|
129
|
+
base: Path = Path(env_base) if env_base else Path.home() / ".sourcecode" / "cache"
|
|
130
|
+
return base / repo_id(repo_root)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# ---------------------------------------------------------------------------
|
|
134
|
+
# Public API — observability
|
|
135
|
+
# ---------------------------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
def status(repo_root: Path) -> dict[str, Any]:
|
|
138
|
+
"""Return a dict describing the current cache state for *repo_root*.
|
|
139
|
+
|
|
140
|
+
Keys: cache_dir, cores, snapshots, views, cas_blobs,
|
|
141
|
+
total_size_bytes, total_size_mb.
|
|
142
|
+
Always succeeds — returns zeros when the cache directory does not exist.
|
|
143
|
+
"""
|
|
144
|
+
cache_d = cache_dir(repo_root)
|
|
145
|
+
if not cache_d.exists():
|
|
146
|
+
return {
|
|
147
|
+
"cache_dir": str(cache_d),
|
|
148
|
+
"cores": 0,
|
|
149
|
+
"snapshots": 0,
|
|
150
|
+
"views": 0,
|
|
151
|
+
"cas_blobs": 0,
|
|
152
|
+
"total_size_bytes": 0,
|
|
153
|
+
"total_size_mb": 0.0,
|
|
154
|
+
}
|
|
155
|
+
cores = list(cache_d.glob("core-*.json.gz"))
|
|
156
|
+
snapshots = list(cache_d.glob("snapshot-*.json.gz"))
|
|
157
|
+
views = list(cache_d.glob("view-*.json.gz"))
|
|
158
|
+
cas_d = _cas_dir(cache_d)
|
|
159
|
+
cas_blobs = list(cas_d.glob("*.gz")) if cas_d.exists() else []
|
|
160
|
+
all_files = cores + snapshots + views + cas_blobs
|
|
161
|
+
total_bytes = sum(f.stat().st_size for f in all_files if f.exists())
|
|
162
|
+
return {
|
|
163
|
+
"cache_dir": str(cache_d),
|
|
164
|
+
"cores": len(cores),
|
|
165
|
+
"snapshots": len(snapshots),
|
|
166
|
+
"views": len(views),
|
|
167
|
+
"cas_blobs": len(cas_blobs),
|
|
168
|
+
"total_size_bytes": total_bytes,
|
|
169
|
+
"total_size_mb": round(total_bytes / (1024 * 1024), 2),
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def clear(repo_root: Path) -> int:
|
|
174
|
+
"""Delete all cache files for *repo_root*. Returns the number of files removed.
|
|
175
|
+
|
|
176
|
+
Removes cores, snapshots, views, and CAS blobs.
|
|
177
|
+
Always succeeds — returns 0 when cache directory does not exist.
|
|
178
|
+
"""
|
|
179
|
+
cache_d = cache_dir(repo_root)
|
|
180
|
+
if not cache_d.exists():
|
|
181
|
+
return 0
|
|
182
|
+
removed = 0
|
|
183
|
+
for pattern in ("core-*.json.gz", "snapshot-*.json.gz", "view-*.json.gz"):
|
|
184
|
+
for f in cache_d.glob(pattern):
|
|
185
|
+
_safe_unlink(f)
|
|
186
|
+
removed += 1
|
|
187
|
+
cas_d = _cas_dir(cache_d)
|
|
188
|
+
if cas_d.exists():
|
|
189
|
+
for f in cas_d.glob("*.gz"):
|
|
190
|
+
_safe_unlink(f)
|
|
191
|
+
removed += 1
|
|
192
|
+
return removed
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
# Public API — read / write
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
def read(repo_root: Path, cache_key: str) -> Optional[str]:
|
|
200
|
+
"""
|
|
201
|
+
Return the cached snapshot string for *cache_key*, or ``None`` on miss.
|
|
202
|
+
|
|
203
|
+
Lookup order:
|
|
204
|
+
1. ``<cache_dir>/snapshot-<cache_key>.json.gz`` — v2 envelope (new)
|
|
205
|
+
2. ``<repo_root>/.sourcecode-cache/snapshot-<cache_key>.json`` — legacy
|
|
206
|
+
"""
|
|
207
|
+
cache_d = cache_dir(repo_root)
|
|
208
|
+
|
|
209
|
+
# ── 1. Global location (.json.gz, v2 envelope or v1 raw) ───────────────
|
|
210
|
+
gz_path = cache_d / f"snapshot-{cache_key}.json.gz"
|
|
211
|
+
if gz_path.exists():
|
|
212
|
+
try:
|
|
213
|
+
result = _parse_envelope(gz_path.read_bytes(), cache_d)
|
|
214
|
+
if result is not None:
|
|
215
|
+
return result
|
|
216
|
+
except Exception:
|
|
217
|
+
pass
|
|
218
|
+
_safe_unlink(gz_path) # corrupted or version mismatch — evict
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
# ── 2. Legacy location (<repo>/.sourcecode-cache/*.json) ───────────────
|
|
222
|
+
legacy = repo_root / ".sourcecode-cache" / f"snapshot-{cache_key}.json"
|
|
223
|
+
if legacy.exists():
|
|
224
|
+
try:
|
|
225
|
+
return legacy.read_text(encoding="utf-8")
|
|
226
|
+
except Exception:
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
return None
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def write(
|
|
233
|
+
repo_root: Path,
|
|
234
|
+
cache_key: str,
|
|
235
|
+
content: str,
|
|
236
|
+
*,
|
|
237
|
+
fmt: str = "json",
|
|
238
|
+
layers: Optional[dict[str, str]] = None,
|
|
239
|
+
) -> None:
|
|
240
|
+
"""
|
|
241
|
+
Persist *content* as a versioned, optionally CAS-deduped snapshot.
|
|
242
|
+
|
|
243
|
+
Parameters
|
|
244
|
+
----------
|
|
245
|
+
repo_root : Path
|
|
246
|
+
Root directory of the analysed repository.
|
|
247
|
+
cache_key : str
|
|
248
|
+
``"{git_sha}-{flags_hash}"`` identifying this analysis.
|
|
249
|
+
content : str
|
|
250
|
+
Final rendered output (JSON or YAML string).
|
|
251
|
+
fmt : str
|
|
252
|
+
``"json"`` or ``"yaml"`` — determines whether CAS extraction applies.
|
|
253
|
+
layers : dict[str, str], optional
|
|
254
|
+
Analyzer fingerprints (from ``_compute_analyzer_fingerprints()``).
|
|
255
|
+
Stored in the envelope for future layer-aware reuse.
|
|
256
|
+
|
|
257
|
+
Writes are always best-effort: any failure is silently swallowed.
|
|
258
|
+
"""
|
|
259
|
+
cache_d = cache_dir(repo_root)
|
|
260
|
+
dest = cache_d / f"snapshot-{cache_key}.json.gz"
|
|
261
|
+
try:
|
|
262
|
+
cache_d.mkdir(parents=True, exist_ok=True)
|
|
263
|
+
payload = _build_envelope(cache_key, content, fmt, layers or {}, cache_d)
|
|
264
|
+
_atomic_write(dest, payload)
|
|
265
|
+
except Exception:
|
|
266
|
+
return # non-fatal
|
|
267
|
+
|
|
268
|
+
_gc(cache_d)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
# ---------------------------------------------------------------------------
|
|
272
|
+
# Layer 1 — Core Analysis cache
|
|
273
|
+
# ---------------------------------------------------------------------------
|
|
274
|
+
|
|
275
|
+
def read_core(repo_root: Path, core_key: str) -> Optional[tuple[dict[str, Any], str]]:
|
|
276
|
+
"""Read core analysis artifacts from L1 cache.
|
|
277
|
+
|
|
278
|
+
Returns ``(core_dict, core_hash)`` on hit, or ``None`` on miss.
|
|
279
|
+
``core_hash`` is the 16-char SHA-256 of the stored core JSON, used as
|
|
280
|
+
the L2 view-key prefix so that different views of the same core share
|
|
281
|
+
a common ancestry without a full re-analysis.
|
|
282
|
+
"""
|
|
283
|
+
cache_d = cache_dir(repo_root)
|
|
284
|
+
gz_path = cache_d / f"core-{core_key}.json.gz"
|
|
285
|
+
if not gz_path.exists():
|
|
286
|
+
return None
|
|
287
|
+
try:
|
|
288
|
+
raw_bytes = gzip.decompress(gz_path.read_bytes())
|
|
289
|
+
envelope = json.loads(raw_bytes.decode("utf-8"))
|
|
290
|
+
except Exception:
|
|
291
|
+
_safe_unlink(gz_path)
|
|
292
|
+
return None
|
|
293
|
+
|
|
294
|
+
if not isinstance(envelope, dict):
|
|
295
|
+
_safe_unlink(gz_path)
|
|
296
|
+
return None
|
|
297
|
+
if envelope.get("csv") != CORE_SCHEMA_VERSION:
|
|
298
|
+
_safe_unlink(gz_path) # schema mismatch — evict
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
core_data = envelope.get("data")
|
|
302
|
+
core_hash = envelope.get("hash", "")
|
|
303
|
+
if not isinstance(core_data, dict) or not core_hash:
|
|
304
|
+
_safe_unlink(gz_path)
|
|
305
|
+
return None
|
|
306
|
+
|
|
307
|
+
return core_data, core_hash
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def write_core(repo_root: Path, core_key: str, core_data: dict[str, Any]) -> str:
|
|
311
|
+
"""Persist core analysis dict to L1 cache.
|
|
312
|
+
|
|
313
|
+
Returns the 16-char SHA-256 hash of the core JSON (the L2 key prefix).
|
|
314
|
+
Writes are always best-effort; failures are silently swallowed.
|
|
315
|
+
|
|
316
|
+
File layout::
|
|
317
|
+
|
|
318
|
+
~/.sourcecode/cache/<repo_id>/core-<core_key>.json.gz
|
|
319
|
+
|
|
320
|
+
Envelope schema::
|
|
321
|
+
|
|
322
|
+
{ "csv": "1", // CORE_SCHEMA_VERSION
|
|
323
|
+
"key": "...", // core_key passed in
|
|
324
|
+
"hash": "<h16>", // SHA-256[:16] of core JSON — used as L2 prefix
|
|
325
|
+
"ts": "...", // ISO-8601 UTC write time
|
|
326
|
+
"data": {...} } // core_view(sm) dict
|
|
327
|
+
"""
|
|
328
|
+
core_json = json.dumps(core_data, ensure_ascii=False)
|
|
329
|
+
core_hash = hashlib.sha256(core_json.encode()).hexdigest()[:16]
|
|
330
|
+
|
|
331
|
+
cache_d = cache_dir(repo_root)
|
|
332
|
+
dest = cache_d / f"core-{core_key}.json.gz"
|
|
333
|
+
try:
|
|
334
|
+
cache_d.mkdir(parents=True, exist_ok=True)
|
|
335
|
+
envelope: dict[str, Any] = {
|
|
336
|
+
"csv": CORE_SCHEMA_VERSION,
|
|
337
|
+
"key": core_key,
|
|
338
|
+
"hash": core_hash,
|
|
339
|
+
"ts": _now_iso(),
|
|
340
|
+
"data": core_data,
|
|
341
|
+
}
|
|
342
|
+
payload = gzip.compress(
|
|
343
|
+
json.dumps(envelope, ensure_ascii=False).encode("utf-8"),
|
|
344
|
+
compresslevel=6,
|
|
345
|
+
)
|
|
346
|
+
_atomic_write(dest, payload)
|
|
347
|
+
except Exception:
|
|
348
|
+
pass
|
|
349
|
+
|
|
350
|
+
return core_hash
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
# ---------------------------------------------------------------------------
|
|
354
|
+
# Layer 2 — Derived View cache
|
|
355
|
+
# ---------------------------------------------------------------------------
|
|
356
|
+
|
|
357
|
+
def read_view(repo_root: Path, view_key: str) -> Optional[str]:
|
|
358
|
+
"""Read a rendered view string from L2 cache.
|
|
359
|
+
|
|
360
|
+
Views are stored as ``view-{view_key}.json.gz`` using the same
|
|
361
|
+
envelope+CAS format as snapshot files. Returns the content string
|
|
362
|
+
(JSON or YAML) or ``None`` on miss.
|
|
363
|
+
"""
|
|
364
|
+
cache_d = cache_dir(repo_root)
|
|
365
|
+
gz_path = cache_d / f"view-{view_key}.json.gz"
|
|
366
|
+
if not gz_path.exists():
|
|
367
|
+
return None
|
|
368
|
+
try:
|
|
369
|
+
result = _parse_envelope(gz_path.read_bytes(), cache_d)
|
|
370
|
+
if result is not None:
|
|
371
|
+
return result
|
|
372
|
+
except Exception:
|
|
373
|
+
pass
|
|
374
|
+
_safe_unlink(gz_path)
|
|
375
|
+
return None
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def write_view(
|
|
379
|
+
repo_root: Path,
|
|
380
|
+
view_key: str,
|
|
381
|
+
content: str,
|
|
382
|
+
*,
|
|
383
|
+
fmt: str = "json",
|
|
384
|
+
layers: Optional[dict[str, str]] = None,
|
|
385
|
+
) -> None:
|
|
386
|
+
"""Persist a rendered view string to L2 cache as ``view-{view_key}.json.gz``.
|
|
387
|
+
|
|
388
|
+
Reuses the envelope+CAS infrastructure so large fields (file_paths,
|
|
389
|
+
graph, docs …) are automatically deduplicated with other snapshots/views.
|
|
390
|
+
Writes are always best-effort; GC is **not** triggered here — callers
|
|
391
|
+
that want eviction should invoke ``_gc(cache_dir(repo_root))`` explicitly.
|
|
392
|
+
"""
|
|
393
|
+
cache_d = cache_dir(repo_root)
|
|
394
|
+
dest = cache_d / f"view-{view_key}.json.gz"
|
|
395
|
+
try:
|
|
396
|
+
cache_d.mkdir(parents=True, exist_ok=True)
|
|
397
|
+
payload = _build_envelope(view_key, content, fmt, layers or {}, cache_d)
|
|
398
|
+
_atomic_write(dest, payload)
|
|
399
|
+
except Exception:
|
|
400
|
+
pass
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# ---------------------------------------------------------------------------
|
|
404
|
+
# Envelope (de)serialisation
|
|
405
|
+
# ---------------------------------------------------------------------------
|
|
406
|
+
|
|
407
|
+
def _now_iso() -> str:
|
|
408
|
+
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def _build_envelope(
|
|
412
|
+
cache_key: str,
|
|
413
|
+
content: str,
|
|
414
|
+
fmt: str,
|
|
415
|
+
layers: dict[str, str],
|
|
416
|
+
cache_d: Path,
|
|
417
|
+
) -> bytes:
|
|
418
|
+
"""Build a versioned envelope and return gzip-compressed bytes."""
|
|
419
|
+
envelope: dict[str, Any] = {
|
|
420
|
+
"sv": SCHEMA_VERSION,
|
|
421
|
+
"key": cache_key,
|
|
422
|
+
"ts": _now_iso(),
|
|
423
|
+
"fmt": fmt,
|
|
424
|
+
"layers": layers,
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
if fmt == "json":
|
|
428
|
+
# Try to parse and extract large fields into CAS
|
|
429
|
+
try:
|
|
430
|
+
snap_dict = json.loads(content)
|
|
431
|
+
if isinstance(snap_dict, dict):
|
|
432
|
+
inline, cas_refs = _cas_extract(snap_dict, cache_d)
|
|
433
|
+
envelope["snap"] = inline
|
|
434
|
+
if cas_refs:
|
|
435
|
+
envelope["cas"] = cas_refs
|
|
436
|
+
else:
|
|
437
|
+
# JSON array or primitive — store as-is
|
|
438
|
+
envelope["raw"] = content
|
|
439
|
+
except Exception:
|
|
440
|
+
envelope["raw"] = content
|
|
441
|
+
else:
|
|
442
|
+
# YAML or unknown format — store raw string
|
|
443
|
+
envelope["raw"] = content
|
|
444
|
+
|
|
445
|
+
return gzip.compress(
|
|
446
|
+
json.dumps(envelope, ensure_ascii=False).encode("utf-8"),
|
|
447
|
+
compresslevel=6,
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def _parse_envelope(data: bytes, cache_d: Path) -> Optional[str]:
|
|
452
|
+
"""
|
|
453
|
+
Decompress *data*, parse envelope, resolve CAS refs, return content string.
|
|
454
|
+
|
|
455
|
+
Returns ``None`` on schema version mismatch, CAS miss, or parse failure.
|
|
456
|
+
v1 files (no envelope wrapper) are detected and served transparently.
|
|
457
|
+
"""
|
|
458
|
+
try:
|
|
459
|
+
raw_bytes = gzip.decompress(data)
|
|
460
|
+
except Exception:
|
|
461
|
+
return None
|
|
462
|
+
|
|
463
|
+
# ── v1 detection ────────────────────────────────────────────────────────
|
|
464
|
+
# v1 stored the content string directly (gzip'd UTF-8), not an envelope.
|
|
465
|
+
# Heuristic: if decompressed bytes are not a JSON object with an "sv" key,
|
|
466
|
+
# treat as v1 and return the raw bytes as the content string.
|
|
467
|
+
try:
|
|
468
|
+
envelope = json.loads(raw_bytes.decode("utf-8"))
|
|
469
|
+
except Exception:
|
|
470
|
+
# Not JSON at all (e.g. YAML v1) — return as-is
|
|
471
|
+
try:
|
|
472
|
+
return raw_bytes.decode("utf-8")
|
|
473
|
+
except Exception:
|
|
474
|
+
return None
|
|
475
|
+
|
|
476
|
+
if not isinstance(envelope, dict) or envelope.get("sv") != SCHEMA_VERSION:
|
|
477
|
+
# dict without "sv" → v1 JSON snapshot; non-matching sv → old envelope
|
|
478
|
+
# Serve v1 transparently; reject mismatched schema versions as a miss.
|
|
479
|
+
if isinstance(envelope, dict) and "sv" in envelope:
|
|
480
|
+
return None # schema version mismatch
|
|
481
|
+
# No "sv" at all → v1 format, raw content
|
|
482
|
+
return raw_bytes.decode("utf-8")
|
|
483
|
+
|
|
484
|
+
# ── v2 envelope ─────────────────────────────────────────────────────────
|
|
485
|
+
if "raw" in envelope:
|
|
486
|
+
return envelope["raw"]
|
|
487
|
+
|
|
488
|
+
if "snap" in envelope:
|
|
489
|
+
inline: dict[str, Any] = envelope["snap"]
|
|
490
|
+
cas_refs: dict[str, str] = envelope.get("cas", {})
|
|
491
|
+
if cas_refs:
|
|
492
|
+
restored = _cas_restore(inline, cas_refs, cache_d)
|
|
493
|
+
if restored is None:
|
|
494
|
+
return None # CAS miss (blob evicted or corrupted)
|
|
495
|
+
else:
|
|
496
|
+
restored = dict(inline)
|
|
497
|
+
# Re-serialise with the same parameters used by the pipeline.
|
|
498
|
+
# json.loads → json.dumps round-trips correctly: Python 3.7+ preserves
|
|
499
|
+
# dict insertion order and the pipeline uses indent=2, ensure_ascii=False.
|
|
500
|
+
return json.dumps(restored, indent=2, ensure_ascii=False)
|
|
501
|
+
|
|
502
|
+
return None # malformed envelope
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
# ---------------------------------------------------------------------------
|
|
506
|
+
# CAS store
|
|
507
|
+
# ---------------------------------------------------------------------------
|
|
508
|
+
|
|
509
|
+
def _cas_dir(cache_d: Path) -> Path:
|
|
510
|
+
return cache_d / "cas"
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def _cas_path(cache_d: Path, blob_hash: str) -> Path:
|
|
514
|
+
return _cas_dir(cache_d) / f"{blob_hash}.gz"
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _cas_store_blob(cache_d: Path, serialised: str) -> str:
|
|
518
|
+
"""
|
|
519
|
+
Store *serialised* (a JSON string) in the CAS. Idempotent.
|
|
520
|
+
|
|
521
|
+
Returns the 16-char SHA-256 hex hash that identifies the blob.
|
|
522
|
+
"""
|
|
523
|
+
raw = serialised.encode("utf-8")
|
|
524
|
+
blob_hash = hashlib.sha256(raw).hexdigest()[:16]
|
|
525
|
+
path = _cas_path(cache_d, blob_hash)
|
|
526
|
+
if not path.exists():
|
|
527
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
528
|
+
path.write_bytes(gzip.compress(raw, compresslevel=6))
|
|
529
|
+
return blob_hash
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def _cas_load_blob(cache_d: Path, blob_hash: str) -> Optional[str]:
|
|
533
|
+
"""Return the stored JSON string for *blob_hash*, or ``None`` if absent."""
|
|
534
|
+
path = _cas_path(cache_d, blob_hash)
|
|
535
|
+
if not path.exists():
|
|
536
|
+
return None
|
|
537
|
+
try:
|
|
538
|
+
return gzip.decompress(path.read_bytes()).decode("utf-8")
|
|
539
|
+
except Exception:
|
|
540
|
+
return None
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
def _cas_extract(
|
|
544
|
+
snap_dict: dict[str, Any],
|
|
545
|
+
cache_d: Path,
|
|
546
|
+
) -> tuple[dict[str, Any], dict[str, str]]:
|
|
547
|
+
"""
|
|
548
|
+
Walk *snap_dict* top-level fields. Fields that:
|
|
549
|
+
- are in ``_CAS_FIELDS``
|
|
550
|
+
- serialise to more than ``_CAS_THRESHOLD`` bytes
|
|
551
|
+
|
|
552
|
+
… are stored as CAS blobs and replaced with their hash in the returned
|
|
553
|
+
``cas_refs`` mapping. Other fields remain inline.
|
|
554
|
+
"""
|
|
555
|
+
inline: dict[str, Any] = {}
|
|
556
|
+
cas_refs: dict[str, str] = {}
|
|
557
|
+
|
|
558
|
+
for key, value in snap_dict.items():
|
|
559
|
+
if key in _CAS_FIELDS and value is not None:
|
|
560
|
+
serialised = json.dumps(value, ensure_ascii=False)
|
|
561
|
+
if len(serialised.encode("utf-8")) > _CAS_THRESHOLD:
|
|
562
|
+
blob_hash = _cas_store_blob(cache_d, serialised)
|
|
563
|
+
cas_refs[key] = blob_hash
|
|
564
|
+
continue
|
|
565
|
+
inline[key] = value
|
|
566
|
+
|
|
567
|
+
return inline, cas_refs
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
def _cas_restore(
|
|
571
|
+
inline: dict[str, Any],
|
|
572
|
+
cas_refs: dict[str, str],
|
|
573
|
+
cache_d: Path,
|
|
574
|
+
) -> Optional[dict[str, Any]]:
|
|
575
|
+
"""
|
|
576
|
+
Reconstruct a full snapshot dict by loading CAS blobs for *cas_refs*.
|
|
577
|
+
|
|
578
|
+
Returns ``None`` if any blob is missing (treat as cache miss).
|
|
579
|
+
"""
|
|
580
|
+
result: dict[str, Any] = dict(inline)
|
|
581
|
+
for field, blob_hash in cas_refs.items():
|
|
582
|
+
blob_str = _cas_load_blob(cache_d, blob_hash)
|
|
583
|
+
if blob_str is None:
|
|
584
|
+
return None # blob evicted or corrupted → full miss
|
|
585
|
+
try:
|
|
586
|
+
result[field] = json.loads(blob_str)
|
|
587
|
+
except Exception:
|
|
588
|
+
return None
|
|
589
|
+
return result
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
# ---------------------------------------------------------------------------
|
|
593
|
+
# Eviction / GC
|
|
594
|
+
# ---------------------------------------------------------------------------
|
|
595
|
+
|
|
596
|
+
def _gc(cache_d: Path) -> None:
|
|
597
|
+
"""Evict old snapshots/cores/views and sweep orphaned CAS blobs.
|
|
598
|
+
|
|
599
|
+
Three eviction passes:
|
|
600
|
+
1. Commit-based: keep only the last SOURCECODE_CACHE_KEEP_COMMITS distinct
|
|
601
|
+
git commits (by newest mtime within each commit group).
|
|
602
|
+
2. Core count cap: if surviving core files exceed SOURCECODE_CACHE_MAX_CORES,
|
|
603
|
+
evict oldest by mtime until under the cap.
|
|
604
|
+
3. Size cap: if total cache size exceeds SOURCECODE_CACHE_MAX_SIZE_MB,
|
|
605
|
+
evict oldest core/snapshot files until under the cap.
|
|
606
|
+
Views are pruned after all passes: only views traceable to a surviving core
|
|
607
|
+
survive. CAS blobs are swept last.
|
|
608
|
+
"""
|
|
609
|
+
keep = int(os.environ.get("SOURCECODE_CACHE_KEEP_COMMITS", _DEFAULT_KEEP_COMMITS))
|
|
610
|
+
max_cores = int(os.environ.get("SOURCECODE_CACHE_MAX_CORES", _DEFAULT_MAX_CORES))
|
|
611
|
+
max_size_bytes = (
|
|
612
|
+
int(os.environ.get("SOURCECODE_CACHE_MAX_SIZE_MB", _DEFAULT_MAX_SIZE_MB))
|
|
613
|
+
* 1024 * 1024
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
try:
|
|
617
|
+
all_snapshots = list(cache_d.glob("snapshot-*.json.gz"))
|
|
618
|
+
all_cores = list(cache_d.glob("core-*.json.gz"))
|
|
619
|
+
all_views = list(cache_d.glob("view-*.json.gz"))
|
|
620
|
+
|
|
621
|
+
if not all_snapshots and not all_cores and not all_views:
|
|
622
|
+
return
|
|
623
|
+
|
|
624
|
+
# ── Pass 1: commit-based eviction ───────────────────────────────────
|
|
625
|
+
groups: dict[str, list[Path]] = {}
|
|
626
|
+
for f in all_snapshots:
|
|
627
|
+
m = _SNAPSHOT_RE.match(f.name)
|
|
628
|
+
if m:
|
|
629
|
+
groups.setdefault(m.group(1), []).append(f)
|
|
630
|
+
for f in all_cores:
|
|
631
|
+
m = _CORE_RE.match(f.name)
|
|
632
|
+
if m:
|
|
633
|
+
groups.setdefault(m.group(1), []).append(f)
|
|
634
|
+
|
|
635
|
+
surviving: list[Path]
|
|
636
|
+
|
|
637
|
+
if keep <= 0 or len(groups) <= keep:
|
|
638
|
+
surviving = all_snapshots + all_cores
|
|
639
|
+
else:
|
|
640
|
+
def _newest_mtime(commit: str) -> float:
|
|
641
|
+
return max(p.stat().st_mtime for p in groups[commit])
|
|
642
|
+
|
|
643
|
+
sorted_commits = sorted(groups, key=_newest_mtime, reverse=True)
|
|
644
|
+
surviving = []
|
|
645
|
+
for i, commit in enumerate(sorted_commits):
|
|
646
|
+
if i < keep:
|
|
647
|
+
surviving.extend(groups[commit])
|
|
648
|
+
else:
|
|
649
|
+
for f in groups[commit]:
|
|
650
|
+
_safe_unlink(f)
|
|
651
|
+
|
|
652
|
+
# ── Pass 2: core count cap ───────────────────────────────────────────
|
|
653
|
+
if max_cores > 0:
|
|
654
|
+
surviving_cores = [
|
|
655
|
+
p for p in surviving
|
|
656
|
+
if p.name.startswith("core-") and p.exists()
|
|
657
|
+
]
|
|
658
|
+
if len(surviving_cores) > max_cores:
|
|
659
|
+
surviving_cores.sort(key=lambda p: p.stat().st_mtime, reverse=True)
|
|
660
|
+
for evict in surviving_cores[max_cores:]:
|
|
661
|
+
_safe_unlink(evict)
|
|
662
|
+
surviving = [p for p in surviving if p != evict]
|
|
663
|
+
|
|
664
|
+
# ── Pass 3: size cap ─────────────────────────────────────────────────
|
|
665
|
+
if max_size_bytes > 0:
|
|
666
|
+
size_candidates = [p for p in surviving if p.exists()]
|
|
667
|
+
total = sum(
|
|
668
|
+
p.stat().st_size
|
|
669
|
+
for p in size_candidates
|
|
670
|
+
if not p.name.startswith("view-")
|
|
671
|
+
)
|
|
672
|
+
if total > max_size_bytes:
|
|
673
|
+
size_candidates.sort(key=lambda p: p.stat().st_mtime)
|
|
674
|
+
for evict in size_candidates:
|
|
675
|
+
if evict.name.startswith("view-"):
|
|
676
|
+
continue
|
|
677
|
+
total -= evict.stat().st_size if evict.exists() else 0
|
|
678
|
+
_safe_unlink(evict)
|
|
679
|
+
surviving = [p for p in surviving if p != evict]
|
|
680
|
+
if total <= max_size_bytes:
|
|
681
|
+
break
|
|
682
|
+
|
|
683
|
+
# ── Prune orphaned views + CAS ───────────────────────────────────────
|
|
684
|
+
all_views = list(cache_d.glob("view-*.json.gz"))
|
|
685
|
+
_gc_views(cache_d, surviving, all_views)
|
|
686
|
+
surviving_with_views = surviving + [v for v in all_views if v.exists()]
|
|
687
|
+
_gc_cas(cache_d, surviving_with_views)
|
|
688
|
+
|
|
689
|
+
except Exception:
|
|
690
|
+
pass # GC failure is non-fatal
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
def _gc_views(cache_d: Path, surviving: list[Path], all_views: list[Path]) -> None:
|
|
694
|
+
"""Delete view files not traceable to a surviving core.
|
|
695
|
+
|
|
696
|
+
Collects the ``hash`` field from every surviving core envelope, then
|
|
697
|
+
deletes view files whose filename core-hash prefix is absent from that
|
|
698
|
+
set. View files with unrecognisable names are left untouched.
|
|
699
|
+
"""
|
|
700
|
+
if not all_views:
|
|
701
|
+
return
|
|
702
|
+
|
|
703
|
+
# Collect live core hashes from surviving core-*.json.gz files
|
|
704
|
+
live_hashes: set[str] = set()
|
|
705
|
+
for path in surviving:
|
|
706
|
+
if not path.name.startswith("core-"):
|
|
707
|
+
continue
|
|
708
|
+
try:
|
|
709
|
+
env = json.loads(gzip.decompress(path.read_bytes()).decode("utf-8"))
|
|
710
|
+
h = env.get("hash", "")
|
|
711
|
+
if h:
|
|
712
|
+
live_hashes.add(h)
|
|
713
|
+
except Exception:
|
|
714
|
+
pass # unreadable core — conservatively keep its views unknown
|
|
715
|
+
|
|
716
|
+
for vp in all_views:
|
|
717
|
+
m = _VIEW_RE.match(vp.name)
|
|
718
|
+
if m and m.group(1) not in live_hashes:
|
|
719
|
+
_safe_unlink(vp)
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
def _gc_cas(cache_d: Path, surviving_snapshots: list[Path]) -> None:
|
|
723
|
+
"""
|
|
724
|
+
Delete CAS blobs not referenced by any snapshot in *surviving_snapshots*.
|
|
725
|
+
|
|
726
|
+
Walks each snapshot's ``cas`` dict to collect live hashes; deletes the rest.
|
|
727
|
+
"""
|
|
728
|
+
cas_d = _cas_dir(cache_d)
|
|
729
|
+
if not cas_d.exists():
|
|
730
|
+
return
|
|
731
|
+
|
|
732
|
+
try:
|
|
733
|
+
# Collect all hashes referenced by surviving snapshots
|
|
734
|
+
referenced: set[str] = set()
|
|
735
|
+
for snap_path in surviving_snapshots:
|
|
736
|
+
try:
|
|
737
|
+
raw = gzip.decompress(snap_path.read_bytes())
|
|
738
|
+
env = json.loads(raw.decode("utf-8"))
|
|
739
|
+
if isinstance(env, dict) and "cas" in env:
|
|
740
|
+
referenced.update(env["cas"].values())
|
|
741
|
+
except Exception:
|
|
742
|
+
pass # unreadable snapshot — conservatively keep its blobs unknown
|
|
743
|
+
|
|
744
|
+
# Delete blobs not referenced by any surviving snapshot
|
|
745
|
+
for blob in cas_d.glob("*.gz"):
|
|
746
|
+
if blob.stem not in referenced:
|
|
747
|
+
_safe_unlink(blob)
|
|
748
|
+
|
|
749
|
+
except Exception:
|
|
750
|
+
pass # CAS sweep failure is non-fatal
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
# ---------------------------------------------------------------------------
|
|
754
|
+
# Utilities
|
|
755
|
+
# ---------------------------------------------------------------------------
|
|
756
|
+
|
|
757
|
+
def _atomic_write(dest: Path, data: bytes) -> None:
|
|
758
|
+
"""Write *data* to *dest* atomically via a sibling .tmp file + rename."""
|
|
759
|
+
tmp = dest.with_suffix(".tmp")
|
|
760
|
+
try:
|
|
761
|
+
tmp.write_bytes(data)
|
|
762
|
+
tmp.replace(dest)
|
|
763
|
+
except Exception:
|
|
764
|
+
_safe_unlink(tmp)
|
|
765
|
+
raise
|
|
766
|
+
|
|
767
|
+
|
|
768
|
+
def _safe_unlink(path: Path) -> None:
|
|
769
|
+
try:
|
|
770
|
+
path.unlink(missing_ok=True)
|
|
771
|
+
except Exception:
|
|
772
|
+
pass
|
sourcecode/cli.py
CHANGED
|
@@ -1018,6 +1018,18 @@ def main(
|
|
|
1018
1018
|
architecture = True # agents need full architectural signal (M4)
|
|
1019
1019
|
graph_modules = True # IC-003: import graph needed for architecture confidence
|
|
1020
1020
|
|
|
1021
|
+
# --compact implicitly enables lightweight analysis passes so that
|
|
1022
|
+
# dependency_summary, env_summary and code_notes_summary are never null.
|
|
1023
|
+
# architecture=True is also enabled so that architecture.confidence is
|
|
1024
|
+
# consistent with --agent (which auto-enables architecture). The
|
|
1025
|
+
# ArchitectureAnalyzer is path-based and adds negligible latency.
|
|
1026
|
+
# NOTE: must happen BEFORE cache key computation so key reflects effective flags.
|
|
1027
|
+
if compact:
|
|
1028
|
+
dependencies = True
|
|
1029
|
+
env_map = True
|
|
1030
|
+
code_notes = True
|
|
1031
|
+
architecture = True
|
|
1032
|
+
|
|
1021
1033
|
# ── Two-layer cache ────────────────────────────────────────────────────────
|
|
1022
1034
|
# L1 (core): (repo, commit, analysis_flags) → pre-computed view data dict
|
|
1023
1035
|
# key = core-<git_sha>-<analysis_hash>.json.gz
|
|
@@ -1065,7 +1077,7 @@ def main(
|
|
|
1065
1077
|
f"dep={dependencies},gm={graph_modules},"
|
|
1066
1078
|
f"docs={docs},fm={full_metrics},sem={semantics},"
|
|
1067
1079
|
f"arch={architecture},gc={git_context},em={env_map},"
|
|
1068
|
-
f"cn={code_notes},
|
|
1080
|
+
f"cn={code_notes},"
|
|
1069
1081
|
f"ex={_excl_key},depth={effective_depth}"
|
|
1070
1082
|
)
|
|
1071
1083
|
_core_h = _hashlib.sha256(_core_flags_str.encode()).hexdigest()[:8]
|
|
@@ -1073,7 +1085,7 @@ def main(
|
|
|
1073
1085
|
|
|
1074
1086
|
# ── View flags: output presentation only (no re-analysis needed) ──
|
|
1075
1087
|
_view_flags_str = (
|
|
1076
|
-
f"c={compact},ag={agent},fmt={format},full={full},"
|
|
1088
|
+
f"c={compact},ag={agent},mode={mode},fmt={format},full={full},"
|
|
1077
1089
|
f"co={changed_only},tree={tree},nt={no_tree},"
|
|
1078
1090
|
f"rb={rank_by},sym={symbol},ep={entrypoints_only},"
|
|
1079
1091
|
f"nr={no_redact},gd={graph_detail},dd={docs_depth},"
|
|
@@ -1235,17 +1247,6 @@ def main(
|
|
|
1235
1247
|
err=True,
|
|
1236
1248
|
)
|
|
1237
1249
|
|
|
1238
|
-
# --compact implicitly enables lightweight analysis passes so that
|
|
1239
|
-
# dependency_summary, env_summary and code_notes_summary are never null.
|
|
1240
|
-
# architecture=True is also enabled so that architecture.confidence is
|
|
1241
|
-
# consistent with --agent (which auto-enables architecture). The
|
|
1242
|
-
# ArchitectureAnalyzer is path-based and adds negligible latency.
|
|
1243
|
-
if compact:
|
|
1244
|
-
dependencies = True
|
|
1245
|
-
env_map = True
|
|
1246
|
-
code_notes = True
|
|
1247
|
-
architecture = True
|
|
1248
|
-
|
|
1249
1250
|
dependency_analyzer = DependencyAnalyzer() if dependencies else None
|
|
1250
1251
|
graph_analyzer = GraphAnalyzer() if graph_modules else None
|
|
1251
1252
|
parsed_graph_edges = (
|
|
@@ -2024,30 +2025,51 @@ def main(
|
|
|
2024
2025
|
# L2 (view): stores the exact rendered string for this flag combination.
|
|
2025
2026
|
#
|
|
2026
2027
|
# GC runs after L2 write to evict old commits and orphaned blobs/views.
|
|
2028
|
+
# Writes happen in a background daemon thread so cold-run latency is not
|
|
2029
|
+
# penalised by gzip encoding + disk I/O. atexit join ensures writes
|
|
2030
|
+
# complete on clean exit without blocking the user-visible response.
|
|
2027
2031
|
if not no_cache and _core_key and not _pipeline_error:
|
|
2028
|
-
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2032
|
+
import atexit as _atexit
|
|
2033
|
+
import threading as _threading
|
|
2034
|
+
|
|
2035
|
+
# Capture all closure state before handing off to thread
|
|
2036
|
+
_bg_sm = sm
|
|
2037
|
+
_bg_target = target
|
|
2038
|
+
_bg_core_key = _core_key
|
|
2039
|
+
_bg_view_key = _view_key
|
|
2040
|
+
_bg_view_flags_str = _view_flags_str
|
|
2041
|
+
_bg_content = content
|
|
2042
|
+
_bg_format = format
|
|
2043
|
+
_bg_hashlib = _hashlib
|
|
2044
|
+
_bg_cache_mod = _cache_mod
|
|
2045
|
+
|
|
2046
|
+
def _write_cache_async() -> None:
|
|
2047
|
+
try:
|
|
2048
|
+
from sourcecode.serializer import core_view as _core_view_fn
|
|
2049
|
+
_core_dict_write = _core_view_fn(_bg_sm)
|
|
2050
|
+
_written_core_hash = _bg_cache_mod.write_core(
|
|
2051
|
+
_bg_target, _bg_core_key, _core_dict_write
|
|
2045
2052
|
)
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2053
|
+
if _written_core_hash:
|
|
2054
|
+
_vk = _bg_view_key
|
|
2055
|
+
if not _vk:
|
|
2056
|
+
_wvh = _bg_hashlib.sha256(_bg_view_flags_str.encode()).hexdigest()[:8]
|
|
2057
|
+
_vk = f"{_written_core_hash}-{_wvh}"
|
|
2058
|
+
_bg_cache_mod.write_view(
|
|
2059
|
+
_bg_target,
|
|
2060
|
+
_vk,
|
|
2061
|
+
_bg_content,
|
|
2062
|
+
fmt=_bg_format,
|
|
2063
|
+
layers=_compute_analyzer_fingerprints(),
|
|
2064
|
+
)
|
|
2065
|
+
from sourcecode.cache import cache_dir as _cdir, _gc as _run_gc
|
|
2066
|
+
_run_gc(_cdir(_bg_target))
|
|
2067
|
+
except Exception:
|
|
2068
|
+
pass
|
|
2069
|
+
|
|
2070
|
+
_cache_write_thread = _threading.Thread(target=_write_cache_async, daemon=True)
|
|
2071
|
+
_cache_write_thread.start()
|
|
2072
|
+
_atexit.register(_cache_write_thread.join, 5.0)
|
|
2051
2073
|
|
|
2052
2074
|
# Update RIS with aggregated snapshot data (non-fatal side-effect).
|
|
2053
2075
|
if not no_cache and not _pipeline_error and _core_key:
|
|
@@ -1,193 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.32.
|
|
3
|
+
Version: 1.32.4
|
|
4
4
|
Summary: Deterministic codebase context for AI coding agents
|
|
5
|
-
License: Apache License
|
|
6
|
-
Version 2.0, January 2004
|
|
7
|
-
http://www.apache.org/licenses/
|
|
8
|
-
|
|
9
|
-
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
10
|
-
|
|
11
|
-
1. Definitions.
|
|
12
|
-
|
|
13
|
-
"License" shall mean the terms and conditions for use, reproduction,
|
|
14
|
-
and distribution as defined by Sections 1 through 9 of this document.
|
|
15
|
-
|
|
16
|
-
"Licensor" shall mean the copyright owner or entity authorized by
|
|
17
|
-
the copyright owner that is granting the License.
|
|
18
|
-
|
|
19
|
-
"Legal Entity" shall mean the union of the acting entity and all
|
|
20
|
-
other entities that control, are controlled by, or are under common
|
|
21
|
-
control with that entity. For the purposes of this definition,
|
|
22
|
-
"control" means (i) the power, direct or indirect, to cause the
|
|
23
|
-
direction or management of such entity, whether by contract or
|
|
24
|
-
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
25
|
-
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
26
|
-
|
|
27
|
-
"You" (or "Your") shall mean an individual or Legal Entity
|
|
28
|
-
exercising permissions granted by this License.
|
|
29
|
-
|
|
30
|
-
"Source" form shall mean the preferred form for making modifications,
|
|
31
|
-
including but not limited to software source code, documentation
|
|
32
|
-
source, and configuration files.
|
|
33
|
-
|
|
34
|
-
"Object" form shall mean any form resulting from mechanical
|
|
35
|
-
transformation or translation of a Source form, including but
|
|
36
|
-
not limited to compiled object code, generated documentation,
|
|
37
|
-
and conversions to other formats.
|
|
38
|
-
|
|
39
|
-
"Work" shall mean the work of authorship made available under
|
|
40
|
-
the License, as indicated by a copyright notice that is included in
|
|
41
|
-
or attached to the work (an example is provided in the Appendix below).
|
|
42
|
-
|
|
43
|
-
"Derivative Works" shall mean any work, whether in Source or Object
|
|
44
|
-
form, that is based on (or derived from) the Work and for which the
|
|
45
|
-
editorial revisions, annotations, elaborations, or other transformations
|
|
46
|
-
represent, as a whole, an original work of authorship. For the purposes
|
|
47
|
-
of this License, Derivative Works shall not include works that remain
|
|
48
|
-
separable from, or merely link (or bind by name) to the interfaces of,
|
|
49
|
-
the Work and Derivative Works thereof.
|
|
50
|
-
|
|
51
|
-
"Contribution" shall mean, as submitted to the Licensor for inclusion
|
|
52
|
-
in the Work by the copyright owner or by an individual or Legal Entity
|
|
53
|
-
authorized to submit on behalf of the copyright owner. For the purposes
|
|
54
|
-
of this definition, "submit" means any form of electronic, verbal, or
|
|
55
|
-
written communication sent to the Licensor or its representatives,
|
|
56
|
-
including but not limited to communication on electronic mailing lists,
|
|
57
|
-
source code control systems, and issue tracking systems that are managed
|
|
58
|
-
by, or on behalf of, the Licensor for the purpose of discussing and
|
|
59
|
-
improving the Work, but excluding communication that is conspicuously
|
|
60
|
-
marked or designated in writing by the copyright owner as "Not a
|
|
61
|
-
Contribution."
|
|
62
|
-
|
|
63
|
-
"Contributor" shall mean Licensor and any Legal Entity on behalf of
|
|
64
|
-
whom a Contribution has been received by the Licensor and included
|
|
65
|
-
within the Work.
|
|
66
|
-
|
|
67
|
-
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
68
|
-
this License, each Contributor hereby grants to You a perpetual,
|
|
69
|
-
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
70
|
-
copyright license to reproduce, prepare Derivative Works of,
|
|
71
|
-
publicly display, publicly perform, sublicense, and distribute the
|
|
72
|
-
Work and such Derivative Works in Source or Object form.
|
|
73
|
-
|
|
74
|
-
3. Grant of Patent License. Subject to the terms and conditions of
|
|
75
|
-
this License, each Contributor hereby grants to You a perpetual,
|
|
76
|
-
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
77
|
-
(except as stated in this section) patent license to make, have made,
|
|
78
|
-
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
79
|
-
where such license applies only to those patent claims licensable
|
|
80
|
-
by such Contributor that are necessarily infringed by their
|
|
81
|
-
Contribution(s) alone or by the combination of their Contribution(s)
|
|
82
|
-
with the Work to which such Contribution(s) was submitted. If You
|
|
83
|
-
institute patent litigation against any entity (including a cross-claim
|
|
84
|
-
or counterclaim in a lawsuit) alleging that the Work or any Contributor
|
|
85
|
-
Contribution constitutes direct or contributory patent infringement,
|
|
86
|
-
then any patent rights granted to You under this License for that Work
|
|
87
|
-
shall terminate as of the date such litigation is filed.
|
|
88
|
-
|
|
89
|
-
4. Redistribution. You may reproduce and distribute copies of the
|
|
90
|
-
Work or Derivative Works thereof in any medium, with or without
|
|
91
|
-
modifications, and in Source or Object form, provided that You
|
|
92
|
-
meet the following conditions:
|
|
93
|
-
|
|
94
|
-
(a) You must give any other recipients of the Work or Derivative
|
|
95
|
-
Works a copy of this License; and
|
|
96
|
-
|
|
97
|
-
(b) You must cause any modified files to carry prominent notices
|
|
98
|
-
stating that You changed the files; and
|
|
99
|
-
|
|
100
|
-
(c) You must retain, in the Source form of any Derivative Works
|
|
101
|
-
that You distribute, all copyright, patent, trademark, and
|
|
102
|
-
attribution notices from the Source form of the Work,
|
|
103
|
-
excluding those notices that do not pertain to any part of
|
|
104
|
-
the Derivative Works; and
|
|
105
|
-
|
|
106
|
-
(d) If the Work includes a "NOTICE" text file as part of its
|
|
107
|
-
distribution, You must include a readable copy of the
|
|
108
|
-
attribution notices contained within such NOTICE file, in
|
|
109
|
-
at least one of the following places: within a NOTICE text
|
|
110
|
-
file distributed as part of the Derivative Works; within
|
|
111
|
-
the Source form or documentation, if provided along with the
|
|
112
|
-
Derivative Works; or, within a display generated by the
|
|
113
|
-
Derivative Works, if and wherever such third-party notices
|
|
114
|
-
normally appear. The contents of the NOTICE file are for
|
|
115
|
-
informational purposes only and do not modify the License.
|
|
116
|
-
You may add Your own attribution notices within Derivative
|
|
117
|
-
Works that You distribute, alongside or in addition to the
|
|
118
|
-
NOTICE text from the Work, provided that such additional
|
|
119
|
-
attribution notices cannot be construed as modifying the License.
|
|
120
|
-
|
|
121
|
-
You may add Your own license statement for Your modifications and
|
|
122
|
-
may provide additional grant of rights to use, copy, modify, merge,
|
|
123
|
-
publish, distribute, sublicense, and/or sell copies of the
|
|
124
|
-
Derivative Works, as separate terms and conditions for their use,
|
|
125
|
-
reproduction, and distribution, or alongside or as supplement to
|
|
126
|
-
any license terms for such Derivative Works as a whole, provided
|
|
127
|
-
Your use, reproduction, and distribution of the Work otherwise
|
|
128
|
-
complies with the conditions stated in this License.
|
|
129
|
-
|
|
130
|
-
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
131
|
-
any Contribution intentionally submitted for inclusion in the Work
|
|
132
|
-
by You to the Licensor shall be under the terms and conditions of
|
|
133
|
-
this License, without any additional terms or conditions.
|
|
134
|
-
Notwithstanding the above, nothing herein shall supersede or modify
|
|
135
|
-
the terms of any separate license agreement you may have executed
|
|
136
|
-
with Licensor regarding such Contributions.
|
|
137
|
-
|
|
138
|
-
6. Trademarks. This License does not grant permission to use the trade
|
|
139
|
-
names, trademarks, service marks, or product names of the Licensor,
|
|
140
|
-
except as required for reasonable and customary use in describing the
|
|
141
|
-
origin of the Work and reproducing the content of the NOTICE file.
|
|
142
|
-
|
|
143
|
-
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
144
|
-
agreed to in writing, Licensor provides the Work (and each
|
|
145
|
-
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
146
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
147
|
-
implied, including, without limitation, any warranties or conditions
|
|
148
|
-
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
149
|
-
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
150
|
-
appropriateness of using or reproducing the Work and assume any
|
|
151
|
-
risks associated with Your exercise of permissions under this License.
|
|
152
|
-
|
|
153
|
-
8. Limitation of Liability. In no event and under no legal theory,
|
|
154
|
-
whether in tort (including negligence), contract, or otherwise,
|
|
155
|
-
unless required by applicable law (such as deliberate and grossly
|
|
156
|
-
negligent acts) or agreed to in writing, shall any Contributor be
|
|
157
|
-
liable to You for damages, including any direct, indirect, special,
|
|
158
|
-
incidental, or exemplary damages of any character arising as a
|
|
159
|
-
result of this License or out of the use or inability to use the
|
|
160
|
-
Work (including but not limited to damages for loss of goodwill,
|
|
161
|
-
work stoppage, computer failure or malfunction, or all other
|
|
162
|
-
commercial damages or losses), even if such Contributor has been
|
|
163
|
-
advised of the possibility of such damages.
|
|
164
|
-
|
|
165
|
-
9. Accepting Warranty or Liability. While redistributing the Work or
|
|
166
|
-
Derivative Works thereof, You may choose to offer, and charge a fee
|
|
167
|
-
for, acceptance of support, warranty, indemnity, or other liability
|
|
168
|
-
obligations and/or rights consistent with this License. However, in
|
|
169
|
-
accepting such obligations, You may offer such obligations only on
|
|
170
|
-
Your own behalf and on Your sole responsibility, not on behalf of
|
|
171
|
-
any other Contributor, and only if You agree to indemnify, defend,
|
|
172
|
-
and hold each Contributor harmless for any liability incurred by,
|
|
173
|
-
or claims asserted against, such Contributor by reason of your
|
|
174
|
-
accepting any warranty or additional liability.
|
|
175
|
-
|
|
176
|
-
END OF TERMS AND CONDITIONS
|
|
177
|
-
|
|
178
|
-
Copyright 2024 sourcecode contributors
|
|
179
|
-
|
|
180
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
|
181
|
-
you may not use this file except in compliance with the License.
|
|
182
|
-
You may obtain a copy of the License at
|
|
183
|
-
|
|
184
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
|
185
|
-
|
|
186
|
-
Unless required by applicable law or agreed to in writing, software
|
|
187
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
|
188
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
189
|
-
See the License for the specific language governing permissions and
|
|
190
|
-
limitations under the License.
|
|
191
5
|
License-File: LICENSE
|
|
192
6
|
Keywords: agents,ai,codebase,context,developer-tools,llm
|
|
193
7
|
Classifier: Development Status :: 4 - Beta
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=ekW8VutI9sqMXBEN-MSRy8p8fHOhat9jl5sCtrKrygc,103
|
|
2
2
|
sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
|
|
3
3
|
sourcecode/architecture_analyzer.py,sha256=qh749a7ykPtGmQI1MR9y6j8TtL_jBdVYFx9YRsLqOMw,44121
|
|
4
4
|
sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
|
|
5
5
|
sourcecode/ast_extractor.py,sha256=_btmeOJIe3t-NicF94D5ZAesa2YIJ0_QNExGnbHxGFE,50578
|
|
6
|
-
sourcecode/cache.py,sha256=
|
|
6
|
+
sourcecode/cache.py,sha256=dvXt8HsU-SyO0a0UXY1n-wt6F2ozGv9VnKR0XydjxCY,27502
|
|
7
|
+
sourcecode/cache.tmp_new,sha256=-IvV7CojiZjqeKMln1m-lqI0QVA2uFGWmYir4XRFOUk,27970
|
|
7
8
|
sourcecode/canonical_ir.py,sha256=_HM3AUmKSdna9u4dCoU6rpgSA6HdF8gzOKZykIUCNGY,23277
|
|
8
9
|
sourcecode/classifier.py,sha256=2lYoSH3vOTkXZYPU7Go2WIet1-IuNzTWVhc-ULnXtgw,8024
|
|
9
|
-
sourcecode/cli.py,sha256=
|
|
10
|
+
sourcecode/cli.py,sha256=RdJ1F_sjmqTJ6zcv3eY2Tokg9H5dJe6zSuK3ownUGcA,166617
|
|
10
11
|
sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
|
|
11
12
|
sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
|
|
12
13
|
sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
|
|
@@ -79,8 +80,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
79
80
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
80
81
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
81
82
|
sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
|
|
82
|
-
sourcecode-1.32.
|
|
83
|
-
sourcecode-1.32.
|
|
84
|
-
sourcecode-1.32.
|
|
85
|
-
sourcecode-1.32.
|
|
86
|
-
sourcecode-1.32.
|
|
83
|
+
sourcecode-1.32.4.dist-info/METADATA,sha256=SJ09ABP688xA3bbWRuseGyJyVK-mYQmR7CgodrIjmDE,19120
|
|
84
|
+
sourcecode-1.32.4.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
85
|
+
sourcecode-1.32.4.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
86
|
+
sourcecode-1.32.4.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
87
|
+
sourcecode-1.32.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|