sourcecode 1.32.3__py3-none-any.whl → 1.32.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.32.3"
3
+ __version__ = "1.32.4"
sourcecode/cache.py CHANGED
@@ -92,6 +92,8 @@ _CAS_FIELDS: frozenset[str] = frozenset([
92
92
  _CAS_THRESHOLD: int = 4096
93
93
 
94
94
  _DEFAULT_KEEP_COMMITS: int = 5
95
+ _DEFAULT_MAX_CORES: int = 20
96
+ _DEFAULT_MAX_SIZE_MB: int = 50
95
97
 
96
98
  # Matches "snapshot-<hex_commit>-<hex_flags>.json.gz"
97
99
  _SNAPSHOT_RE = re.compile(r"^snapshot-([0-9a-f]+)-[0-9a-f]+\.json\.gz$")
@@ -124,6 +126,58 @@ def cache_dir(repo_root: Path) -> Path:
124
126
  return base / repo_id(repo_root)
125
127
 
126
128
 
129
+ # ---------------------------------------------------------------------------
130
+ # Public API — observability
131
+ # ---------------------------------------------------------------------------
132
+
133
+ def status(repo_root: Path) -> dict[str, Any]:
134
+ """Return a stats dict describing the current cache state for *repo_root*.
135
+
136
+ Keys: ``cache_dir``, ``cores``, ``snapshots``, ``views``, ``cas_blobs``,
137
+ ``total_size_bytes``, ``total_size_mb``.
138
+ """
139
+ cache_d = cache_dir(repo_root)
140
+ if not cache_d.exists():
141
+ return {
142
+ "cache_dir": str(cache_d),
143
+ "cores": 0, "snapshots": 0, "views": 0, "cas_blobs": 0,
144
+ "total_size_bytes": 0, "total_size_mb": 0.0,
145
+ }
146
+ cores = list(cache_d.glob("core-*.json.gz"))
147
+ snapshots = list(cache_d.glob("snapshot-*.json.gz"))
148
+ views = list(cache_d.glob("view-*.json.gz"))
149
+ cas_blobs = list((_cas_dir(cache_d)).glob("*.gz")) if _cas_dir(cache_d).exists() else []
150
+ all_files = cores + snapshots + views + cas_blobs
151
+ total_bytes = sum(f.stat().st_size for f in all_files if f.exists())
152
+ return {
153
+ "cache_dir": str(cache_d),
154
+ "cores": len(cores),
155
+ "snapshots": len(snapshots),
156
+ "views": len(views),
157
+ "cas_blobs": len(cas_blobs),
158
+ "total_size_bytes": total_bytes,
159
+ "total_size_mb": round(total_bytes / (1024 * 1024), 2),
160
+ }
161
+
162
+
163
+ def clear(repo_root: Path) -> int:
164
+ """Delete all cache files for *repo_root*. Returns the number of files removed."""
165
+ cache_d = cache_dir(repo_root)
166
+ if not cache_d.exists():
167
+ return 0
168
+ removed = 0
169
+ for pattern in ("core-*.json.gz", "snapshot-*.json.gz", "view-*.json.gz"):
170
+ for f in cache_d.glob(pattern):
171
+ _safe_unlink(f)
172
+ removed += 1
173
+ cas_d = _cas_dir(cache_d)
174
+ if cas_d.exists():
175
+ for f in cas_d.glob("*.gz"):
176
+ _safe_unlink(f)
177
+ removed += 1
178
+ return removed
179
+
180
+
127
181
  # ---------------------------------------------------------------------------
128
182
  # Public API — read / write
129
183
  # ---------------------------------------------------------------------------
@@ -193,7 +247,7 @@ def write(
193
247
  try:
194
248
  cache_d.mkdir(parents=True, exist_ok=True)
195
249
  payload = _build_envelope(cache_key, content, fmt, layers or {}, cache_d)
196
- dest.write_bytes(payload)
250
+ _atomic_write(dest, payload)
197
251
  except Exception:
198
252
  return # non-fatal
199
253
 
@@ -275,7 +329,7 @@ def write_core(repo_root: Path, core_key: str, core_data: dict[str, Any]) -> str
275
329
  json.dumps(envelope, ensure_ascii=False).encode("utf-8"),
276
330
  compresslevel=6,
277
331
  )
278
- dest.write_bytes(payload)
332
+ _atomic_write(dest, payload)
279
333
  except Exception:
280
334
  pass
281
335
 
@@ -327,7 +381,7 @@ def write_view(
327
381
  try:
328
382
  cache_d.mkdir(parents=True, exist_ok=True)
329
383
  payload = _build_envelope(view_key, content, fmt, layers or {}, cache_d)
330
- dest.write_bytes(payload)
384
+ _atomic_write(dest, payload)
331
385
  except Exception:
332
386
  pass
333
387
 
@@ -529,12 +583,16 @@ def _cas_restore(
529
583
  def _gc(cache_d: Path) -> None:
530
584
  """Evict old snapshots/cores/views and sweep orphaned CAS blobs.
531
585
 
532
- Keeps snapshots and cores from the last ``SOURCECODE_CACHE_KEEP_COMMITS``
533
- distinct git commits (determined by newest mtime within each commit group).
534
- Views are then pruned: a view survives only when its core-hash prefix
535
- matches a core file in the surviving set.
586
+ Three eviction passes (all non-fatal):
587
+ 1. Commit-based: keep only last SOURCECODE_CACHE_KEEP_COMMITS distinct SHAs.
588
+ 2. Core-count: keep at most SOURCECODE_CACHE_MAX_CORES core files (LRU).
589
+ 3. Size-based: if total cache exceeds SOURCECODE_CACHE_MAX_SIZE_MB, evict
590
+ oldest core+snapshot files until under budget.
591
+ Views and CAS blobs are swept after each pass.
536
592
  """
537
593
  keep = int(os.environ.get("SOURCECODE_CACHE_KEEP_COMMITS", _DEFAULT_KEEP_COMMITS))
594
+ max_cores = int(os.environ.get("SOURCECODE_CACHE_MAX_CORES", _DEFAULT_MAX_CORES))
595
+ max_size_bytes = int(os.environ.get("SOURCECODE_CACHE_MAX_SIZE_MB", _DEFAULT_MAX_SIZE_MB)) * 1024 * 1024
538
596
 
539
597
  try:
540
598
  all_snapshots = list(cache_d.glob("snapshot-*.json.gz"))
@@ -544,7 +602,7 @@ def _gc(cache_d: Path) -> None:
544
602
  if not all_snapshots and not all_cores and not all_views:
545
603
  return
546
604
 
547
- # Group snapshot + core files by commit SHA
605
+ # ── Pass 1: commit-based eviction ──────────────────────────────────
548
606
  groups: dict[str, list[Path]] = {}
549
607
  for f in all_snapshots:
550
608
  m = _SNAPSHOT_RE.match(f.name)
@@ -558,7 +616,6 @@ def _gc(cache_d: Path) -> None:
558
616
  surviving: list[Path]
559
617
 
560
618
  if keep <= 0 or len(groups) <= keep:
561
- # No eviction needed — but still sweep views + CAS
562
619
  surviving = all_snapshots + all_cores
563
620
  else:
564
621
  def _newest_mtime(commit: str) -> float:
@@ -573,7 +630,33 @@ def _gc(cache_d: Path) -> None:
573
630
  for f in groups[commit]:
574
631
  _safe_unlink(f)
575
632
 
633
+ # ── Pass 2: per-repo core count cap ────────────────────────────────
634
+ if max_cores > 0:
635
+ surviving_cores = [p for p in surviving if p.name.startswith("core-") and p.exists()]
636
+ if len(surviving_cores) > max_cores:
637
+ surviving_cores.sort(key=lambda p: p.stat().st_mtime, reverse=True)
638
+ for evict in surviving_cores[max_cores:]:
639
+ _safe_unlink(evict)
640
+ surviving = [p for p in surviving if p != evict]
641
+
642
+ # ── Pass 3: total size cap ──────────────────────────────────────────
643
+ if max_size_bytes > 0:
644
+ size_candidates = [p for p in surviving if p.exists()]
645
+ total = sum(p.stat().st_size for p in size_candidates if not p.name.startswith("view-"))
646
+ if total > max_size_bytes:
647
+ # Sort oldest-first; evict core+snapshot files until under budget
648
+ size_candidates.sort(key=lambda p: p.stat().st_mtime)
649
+ for evict in size_candidates:
650
+ if evict.name.startswith("view-"):
651
+ continue
652
+ total -= evict.stat().st_size if evict.exists() else 0
653
+ _safe_unlink(evict)
654
+ surviving = [p for p in surviving if p != evict]
655
+ if total <= max_size_bytes:
656
+ break
657
+
576
658
  # Prune view files whose core hash is no longer in the surviving set
659
+ all_views = list(cache_d.glob("view-*.json.gz"))
577
660
  _gc_views(cache_d, surviving, all_views)
578
661
 
579
662
  # Sweep orphaned CAS blobs (surviving snapshots + view files may ref them)
@@ -648,6 +731,23 @@ def _gc_cas(cache_d: Path, surviving_snapshots: list[Path]) -> None:
648
731
  # Utilities
649
732
  # ---------------------------------------------------------------------------
650
733
 
734
+ def _atomic_write(dest: Path, data: bytes) -> None:
735
+ """Write *data* to *dest* atomically via a sibling .tmp file + rename.
736
+
737
+ On POSIX, ``Path.replace()`` is a single ``rename(2)`` syscall — the
738
+ destination either has the old content or the new content, never a partial
739
+ write. The .tmp suffix keeps the partial file out of glob patterns used
740
+ by the cache reader and GC.
741
+ """
742
+ tmp = dest.with_suffix(".tmp")
743
+ try:
744
+ tmp.write_bytes(data)
745
+ tmp.replace(dest)
746
+ except Exception:
747
+ _safe_unlink(tmp)
748
+ raise
749
+
750
+
651
751
  def _safe_unlink(path: Path) -> None:
652
752
  try:
653
753
  path.unlink(missing_ok=True)
@@ -0,0 +1,772 @@
1
+ """
2
+ Snapshot cache manager for sourcecode — v2.
3
+
4
+ Cache layout
5
+ ------------
6
+ ~/.sourcecode/cache/<repo_id>/
7
+ snapshot-<git_sha>-<flags_hash>.json.gz ← versioned envelope
8
+ core-<git_sha>-<flags_hash>.json.gz ← L1 core analysis
9
+ view-<core_hash16>-<view_flags_hash>.json.gz ← L2 derived view
10
+ cas/
11
+ <blob_hash16>.gz ← content-addressed blobs
12
+
13
+ Schema
14
+ ------
15
+ Every snapshot file is a gzip-compressed JSON *envelope*:
16
+
17
+ {
18
+ "sv": "2", // schema version — bump to invalidate all
19
+ "key": "abc1234-aabbccdd", // cache key (git_sha + flags_hash)
20
+ "ts": "2026-05-24T22:00:00Z", // write timestamp (ISO-8601 UTC)
21
+ "fmt": "json", // output format: "json" | "yaml"
22
+ "layers": {"heuristic": "...", ...}, // analyzer fingerprints at write time
23
+ // ── content (one of two forms) ──────────────────────────────────────
24
+ "snap": {...}, // inline fields (small) — JSON mode
25
+ "cas": {"file_paths": "<h16>",…} // large fields deduped into CAS store
26
+ // — OR —
27
+ "raw": "<content string>" // YAML or unparseable JSON stored as-is
28
+ }
29
+
30
+ Content-addressed store (CAS)
31
+ -----------------------------
32
+ Large top-level JSON fields (> _CAS_THRESHOLD bytes) are extracted into the
33
+ ``cas/`` directory as individual gzip-compressed blobs identified by a 16-char
34
+ SHA-256 hash of their uncompressed bytes. Two snapshots that share an
35
+ identical ``file_paths`` array reference the *same* blob — zero duplication.
36
+
37
+ Eviction / GC
38
+ -------------
39
+ After each write, ``_gc()`` keeps snapshots from the last
40
+ ``SOURCECODE_CACHE_KEEP_COMMITS`` distinct git commits (default 5, override via
41
+ env var). A CAS sweep runs concurrently: blobs unreferenced by any surviving
42
+ snapshot are deleted.
43
+
44
+ Backward compatibility
45
+ ----------------------
46
+ v1 files (raw gzip'd content, no envelope) are detected by the absence of an
47
+ ``sv`` key in the decompressed JSON, and served transparently. Legacy files
48
+ in ``<repo>/.sourcecode-cache/`` are also checked as a final fallback.
49
+
50
+ Env vars
51
+ --------
52
+ SOURCECODE_CACHE_DIR Override global cache base (default: ~/.sourcecode/cache)
53
+ SOURCECODE_CACHE_KEEP_COMMITS How many git commits to retain (default: 5; 0 = unlimited)
54
+ SOURCECODE_CACHE_MAX_CORES Max L1 core files to retain (default: 20; 0 = unlimited)
55
+ SOURCECODE_CACHE_MAX_SIZE_MB Max total cache size in MB (default: 50; 0 = unlimited)
56
+ """
57
+ from __future__ import annotations
58
+
59
+ import gzip
60
+ import hashlib
61
+ import json
62
+ import os
63
+ import re
64
+ from datetime import datetime, timezone
65
+ from pathlib import Path
66
+ from typing import Any, Optional
67
+
68
+
69
+ # ---------------------------------------------------------------------------
70
+ # Version / constants
71
+ # ---------------------------------------------------------------------------
72
+
73
+ #: Bump this string to invalidate *all* existing cached snapshots.
74
+ SCHEMA_VERSION: str = "2"
75
+
76
+ #: Bump to invalidate all L1 core caches (independent of snapshot version).
77
+ CORE_SCHEMA_VERSION: str = "1"
78
+
79
+ #: Fields eligible for CAS deduplication (applied to top-level JSON dict keys).
80
+ _CAS_FIELDS: frozenset[str] = frozenset([
81
+ "file_paths",
82
+ "entry_points",
83
+ "docs",
84
+ "dependencies",
85
+ "graph",
86
+ "semantic_calls",
87
+ "semantic_symbols",
88
+ "architecture",
89
+ "metrics",
90
+ "git_history",
91
+ "env_map",
92
+ "code_notes",
93
+ ])
94
+
95
+ #: Serialised size threshold (bytes) above which a field is moved to CAS.
96
+ _CAS_THRESHOLD: int = 4096
97
+
98
+ _DEFAULT_KEEP_COMMITS: int = 5
99
+ _DEFAULT_MAX_CORES: int = 20
100
+ _DEFAULT_MAX_SIZE_MB: int = 50
101
+
102
+ # Matches "snapshot-<hex_commit>-<hex_flags>.json.gz"
103
+ _SNAPSHOT_RE = re.compile(r"^snapshot-([0-9a-f]+)-[0-9a-f]+\.json\.gz$")
104
+
105
+ # Matches "core-<hex_commit>-<hex_analysis>.json.gz"
106
+ _CORE_RE = re.compile(r"^core-([0-9a-f]+)-[0-9a-f]+\.json\.gz$")
107
+
108
+ # Matches "view-<hex_core_hash16>-<hex_view_flags>.json.gz"
109
+ _VIEW_RE = re.compile(r"^view-([0-9a-f]{16})-[0-9a-f]+\.json\.gz$")
110
+
111
+
112
+ # ---------------------------------------------------------------------------
113
+ # Public API — location helpers
114
+ # ---------------------------------------------------------------------------
115
+
116
+
117
+ def repo_id(repo_root: Path) -> str:
118
+ """Stable 16-char hex identifier derived from the canonical repo path."""
119
+ return hashlib.sha256(str(repo_root.resolve()).encode()).hexdigest()[:16]
120
+
121
+
122
+ def cache_dir(repo_root: Path) -> Path:
123
+ """
124
+ Return the per-repo cache directory (``~/.sourcecode/cache/<repo_id>/``).
125
+
126
+ Override the base via ``SOURCECODE_CACHE_DIR``.
127
+ """
128
+ env_base = os.environ.get("SOURCECODE_CACHE_DIR", "")
129
+ base: Path = Path(env_base) if env_base else Path.home() / ".sourcecode" / "cache"
130
+ return base / repo_id(repo_root)
131
+
132
+
133
+ # ---------------------------------------------------------------------------
134
+ # Public API — observability
135
+ # ---------------------------------------------------------------------------
136
+
137
+ def status(repo_root: Path) -> dict[str, Any]:
138
+ """Return a dict describing the current cache state for *repo_root*.
139
+
140
+ Keys: cache_dir, cores, snapshots, views, cas_blobs,
141
+ total_size_bytes, total_size_mb.
142
+ Always succeeds — returns zeros when the cache directory does not exist.
143
+ """
144
+ cache_d = cache_dir(repo_root)
145
+ if not cache_d.exists():
146
+ return {
147
+ "cache_dir": str(cache_d),
148
+ "cores": 0,
149
+ "snapshots": 0,
150
+ "views": 0,
151
+ "cas_blobs": 0,
152
+ "total_size_bytes": 0,
153
+ "total_size_mb": 0.0,
154
+ }
155
+ cores = list(cache_d.glob("core-*.json.gz"))
156
+ snapshots = list(cache_d.glob("snapshot-*.json.gz"))
157
+ views = list(cache_d.glob("view-*.json.gz"))
158
+ cas_d = _cas_dir(cache_d)
159
+ cas_blobs = list(cas_d.glob("*.gz")) if cas_d.exists() else []
160
+ all_files = cores + snapshots + views + cas_blobs
161
+ total_bytes = sum(f.stat().st_size for f in all_files if f.exists())
162
+ return {
163
+ "cache_dir": str(cache_d),
164
+ "cores": len(cores),
165
+ "snapshots": len(snapshots),
166
+ "views": len(views),
167
+ "cas_blobs": len(cas_blobs),
168
+ "total_size_bytes": total_bytes,
169
+ "total_size_mb": round(total_bytes / (1024 * 1024), 2),
170
+ }
171
+
172
+
173
+ def clear(repo_root: Path) -> int:
174
+ """Delete all cache files for *repo_root*. Returns the number of files removed.
175
+
176
+ Removes cores, snapshots, views, and CAS blobs.
177
+ Always succeeds — returns 0 when cache directory does not exist.
178
+ """
179
+ cache_d = cache_dir(repo_root)
180
+ if not cache_d.exists():
181
+ return 0
182
+ removed = 0
183
+ for pattern in ("core-*.json.gz", "snapshot-*.json.gz", "view-*.json.gz"):
184
+ for f in cache_d.glob(pattern):
185
+ _safe_unlink(f)
186
+ removed += 1
187
+ cas_d = _cas_dir(cache_d)
188
+ if cas_d.exists():
189
+ for f in cas_d.glob("*.gz"):
190
+ _safe_unlink(f)
191
+ removed += 1
192
+ return removed
193
+
194
+
195
+ # ---------------------------------------------------------------------------
196
+ # Public API — read / write
197
+ # ---------------------------------------------------------------------------
198
+
199
+ def read(repo_root: Path, cache_key: str) -> Optional[str]:
200
+ """
201
+ Return the cached snapshot string for *cache_key*, or ``None`` on miss.
202
+
203
+ Lookup order:
204
+ 1. ``<cache_dir>/snapshot-<cache_key>.json.gz`` — v2 envelope (new)
205
+ 2. ``<repo_root>/.sourcecode-cache/snapshot-<cache_key>.json`` — legacy
206
+ """
207
+ cache_d = cache_dir(repo_root)
208
+
209
+ # ── 1. Global location (.json.gz, v2 envelope or v1 raw) ───────────────
210
+ gz_path = cache_d / f"snapshot-{cache_key}.json.gz"
211
+ if gz_path.exists():
212
+ try:
213
+ result = _parse_envelope(gz_path.read_bytes(), cache_d)
214
+ if result is not None:
215
+ return result
216
+ except Exception:
217
+ pass
218
+ _safe_unlink(gz_path) # corrupted or version mismatch — evict
219
+ return None
220
+
221
+ # ── 2. Legacy location (<repo>/.sourcecode-cache/*.json) ───────────────
222
+ legacy = repo_root / ".sourcecode-cache" / f"snapshot-{cache_key}.json"
223
+ if legacy.exists():
224
+ try:
225
+ return legacy.read_text(encoding="utf-8")
226
+ except Exception:
227
+ return None
228
+
229
+ return None
230
+
231
+
232
+ def write(
233
+ repo_root: Path,
234
+ cache_key: str,
235
+ content: str,
236
+ *,
237
+ fmt: str = "json",
238
+ layers: Optional[dict[str, str]] = None,
239
+ ) -> None:
240
+ """
241
+ Persist *content* as a versioned, optionally CAS-deduped snapshot.
242
+
243
+ Parameters
244
+ ----------
245
+ repo_root : Path
246
+ Root directory of the analysed repository.
247
+ cache_key : str
248
+ ``"{git_sha}-{flags_hash}"`` identifying this analysis.
249
+ content : str
250
+ Final rendered output (JSON or YAML string).
251
+ fmt : str
252
+ ``"json"`` or ``"yaml"`` — determines whether CAS extraction applies.
253
+ layers : dict[str, str], optional
254
+ Analyzer fingerprints (from ``_compute_analyzer_fingerprints()``).
255
+ Stored in the envelope for future layer-aware reuse.
256
+
257
+ Writes are always best-effort: any failure is silently swallowed.
258
+ """
259
+ cache_d = cache_dir(repo_root)
260
+ dest = cache_d / f"snapshot-{cache_key}.json.gz"
261
+ try:
262
+ cache_d.mkdir(parents=True, exist_ok=True)
263
+ payload = _build_envelope(cache_key, content, fmt, layers or {}, cache_d)
264
+ _atomic_write(dest, payload)
265
+ except Exception:
266
+ return # non-fatal
267
+
268
+ _gc(cache_d)
269
+
270
+
271
+ # ---------------------------------------------------------------------------
272
+ # Layer 1 — Core Analysis cache
273
+ # ---------------------------------------------------------------------------
274
+
275
+ def read_core(repo_root: Path, core_key: str) -> Optional[tuple[dict[str, Any], str]]:
276
+ """Read core analysis artifacts from L1 cache.
277
+
278
+ Returns ``(core_dict, core_hash)`` on hit, or ``None`` on miss.
279
+ ``core_hash`` is the 16-char SHA-256 of the stored core JSON, used as
280
+ the L2 view-key prefix so that different views of the same core share
281
+ a common ancestry without a full re-analysis.
282
+ """
283
+ cache_d = cache_dir(repo_root)
284
+ gz_path = cache_d / f"core-{core_key}.json.gz"
285
+ if not gz_path.exists():
286
+ return None
287
+ try:
288
+ raw_bytes = gzip.decompress(gz_path.read_bytes())
289
+ envelope = json.loads(raw_bytes.decode("utf-8"))
290
+ except Exception:
291
+ _safe_unlink(gz_path)
292
+ return None
293
+
294
+ if not isinstance(envelope, dict):
295
+ _safe_unlink(gz_path)
296
+ return None
297
+ if envelope.get("csv") != CORE_SCHEMA_VERSION:
298
+ _safe_unlink(gz_path) # schema mismatch — evict
299
+ return None
300
+
301
+ core_data = envelope.get("data")
302
+ core_hash = envelope.get("hash", "")
303
+ if not isinstance(core_data, dict) or not core_hash:
304
+ _safe_unlink(gz_path)
305
+ return None
306
+
307
+ return core_data, core_hash
308
+
309
+
310
+ def write_core(repo_root: Path, core_key: str, core_data: dict[str, Any]) -> str:
311
+ """Persist core analysis dict to L1 cache.
312
+
313
+ Returns the 16-char SHA-256 hash of the core JSON (the L2 key prefix).
314
+ Writes are always best-effort; failures are silently swallowed.
315
+
316
+ File layout::
317
+
318
+ ~/.sourcecode/cache/<repo_id>/core-<core_key>.json.gz
319
+
320
+ Envelope schema::
321
+
322
+ { "csv": "1", // CORE_SCHEMA_VERSION
323
+ "key": "...", // core_key passed in
324
+ "hash": "<h16>", // SHA-256[:16] of core JSON — used as L2 prefix
325
+ "ts": "...", // ISO-8601 UTC write time
326
+ "data": {...} } // core_view(sm) dict
327
+ """
328
+ core_json = json.dumps(core_data, ensure_ascii=False)
329
+ core_hash = hashlib.sha256(core_json.encode()).hexdigest()[:16]
330
+
331
+ cache_d = cache_dir(repo_root)
332
+ dest = cache_d / f"core-{core_key}.json.gz"
333
+ try:
334
+ cache_d.mkdir(parents=True, exist_ok=True)
335
+ envelope: dict[str, Any] = {
336
+ "csv": CORE_SCHEMA_VERSION,
337
+ "key": core_key,
338
+ "hash": core_hash,
339
+ "ts": _now_iso(),
340
+ "data": core_data,
341
+ }
342
+ payload = gzip.compress(
343
+ json.dumps(envelope, ensure_ascii=False).encode("utf-8"),
344
+ compresslevel=6,
345
+ )
346
+ _atomic_write(dest, payload)
347
+ except Exception:
348
+ pass
349
+
350
+ return core_hash
351
+
352
+
353
+ # ---------------------------------------------------------------------------
354
+ # Layer 2 — Derived View cache
355
+ # ---------------------------------------------------------------------------
356
+
357
+ def read_view(repo_root: Path, view_key: str) -> Optional[str]:
358
+ """Read a rendered view string from L2 cache.
359
+
360
+ Views are stored as ``view-{view_key}.json.gz`` using the same
361
+ envelope+CAS format as snapshot files. Returns the content string
362
+ (JSON or YAML) or ``None`` on miss.
363
+ """
364
+ cache_d = cache_dir(repo_root)
365
+ gz_path = cache_d / f"view-{view_key}.json.gz"
366
+ if not gz_path.exists():
367
+ return None
368
+ try:
369
+ result = _parse_envelope(gz_path.read_bytes(), cache_d)
370
+ if result is not None:
371
+ return result
372
+ except Exception:
373
+ pass
374
+ _safe_unlink(gz_path)
375
+ return None
376
+
377
+
378
+ def write_view(
379
+ repo_root: Path,
380
+ view_key: str,
381
+ content: str,
382
+ *,
383
+ fmt: str = "json",
384
+ layers: Optional[dict[str, str]] = None,
385
+ ) -> None:
386
+ """Persist a rendered view string to L2 cache as ``view-{view_key}.json.gz``.
387
+
388
+ Reuses the envelope+CAS infrastructure so large fields (file_paths,
389
+ graph, docs …) are automatically deduplicated with other snapshots/views.
390
+ Writes are always best-effort; GC is **not** triggered here — callers
391
+ that want eviction should invoke ``_gc(cache_dir(repo_root))`` explicitly.
392
+ """
393
+ cache_d = cache_dir(repo_root)
394
+ dest = cache_d / f"view-{view_key}.json.gz"
395
+ try:
396
+ cache_d.mkdir(parents=True, exist_ok=True)
397
+ payload = _build_envelope(view_key, content, fmt, layers or {}, cache_d)
398
+ _atomic_write(dest, payload)
399
+ except Exception:
400
+ pass
401
+
402
+
403
+ # ---------------------------------------------------------------------------
404
+ # Envelope (de)serialisation
405
+ # ---------------------------------------------------------------------------
406
+
407
+ def _now_iso() -> str:
408
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
409
+
410
+
411
+ def _build_envelope(
412
+ cache_key: str,
413
+ content: str,
414
+ fmt: str,
415
+ layers: dict[str, str],
416
+ cache_d: Path,
417
+ ) -> bytes:
418
+ """Build a versioned envelope and return gzip-compressed bytes."""
419
+ envelope: dict[str, Any] = {
420
+ "sv": SCHEMA_VERSION,
421
+ "key": cache_key,
422
+ "ts": _now_iso(),
423
+ "fmt": fmt,
424
+ "layers": layers,
425
+ }
426
+
427
+ if fmt == "json":
428
+ # Try to parse and extract large fields into CAS
429
+ try:
430
+ snap_dict = json.loads(content)
431
+ if isinstance(snap_dict, dict):
432
+ inline, cas_refs = _cas_extract(snap_dict, cache_d)
433
+ envelope["snap"] = inline
434
+ if cas_refs:
435
+ envelope["cas"] = cas_refs
436
+ else:
437
+ # JSON array or primitive — store as-is
438
+ envelope["raw"] = content
439
+ except Exception:
440
+ envelope["raw"] = content
441
+ else:
442
+ # YAML or unknown format — store raw string
443
+ envelope["raw"] = content
444
+
445
+ return gzip.compress(
446
+ json.dumps(envelope, ensure_ascii=False).encode("utf-8"),
447
+ compresslevel=6,
448
+ )
449
+
450
+
451
+ def _parse_envelope(data: bytes, cache_d: Path) -> Optional[str]:
452
+ """
453
+ Decompress *data*, parse envelope, resolve CAS refs, return content string.
454
+
455
+ Returns ``None`` on schema version mismatch, CAS miss, or parse failure.
456
+ v1 files (no envelope wrapper) are detected and served transparently.
457
+ """
458
+ try:
459
+ raw_bytes = gzip.decompress(data)
460
+ except Exception:
461
+ return None
462
+
463
+ # ── v1 detection ────────────────────────────────────────────────────────
464
+ # v1 stored the content string directly (gzip'd UTF-8), not an envelope.
465
+ # Heuristic: if decompressed bytes are not a JSON object with an "sv" key,
466
+ # treat as v1 and return the raw bytes as the content string.
467
+ try:
468
+ envelope = json.loads(raw_bytes.decode("utf-8"))
469
+ except Exception:
470
+ # Not JSON at all (e.g. YAML v1) — return as-is
471
+ try:
472
+ return raw_bytes.decode("utf-8")
473
+ except Exception:
474
+ return None
475
+
476
+ if not isinstance(envelope, dict) or envelope.get("sv") != SCHEMA_VERSION:
477
+ # dict without "sv" → v1 JSON snapshot; non-matching sv → old envelope
478
+ # Serve v1 transparently; reject mismatched schema versions as a miss.
479
+ if isinstance(envelope, dict) and "sv" in envelope:
480
+ return None # schema version mismatch
481
+ # No "sv" at all → v1 format, raw content
482
+ return raw_bytes.decode("utf-8")
483
+
484
+ # ── v2 envelope ─────────────────────────────────────────────────────────
485
+ if "raw" in envelope:
486
+ return envelope["raw"]
487
+
488
+ if "snap" in envelope:
489
+ inline: dict[str, Any] = envelope["snap"]
490
+ cas_refs: dict[str, str] = envelope.get("cas", {})
491
+ if cas_refs:
492
+ restored = _cas_restore(inline, cas_refs, cache_d)
493
+ if restored is None:
494
+ return None # CAS miss (blob evicted or corrupted)
495
+ else:
496
+ restored = dict(inline)
497
+ # Re-serialise with the same parameters used by the pipeline.
498
+ # json.loads → json.dumps round-trips correctly: Python 3.7+ preserves
499
+ # dict insertion order and the pipeline uses indent=2, ensure_ascii=False.
500
+ return json.dumps(restored, indent=2, ensure_ascii=False)
501
+
502
+ return None # malformed envelope
503
+
504
+
505
+ # ---------------------------------------------------------------------------
506
+ # CAS store
507
+ # ---------------------------------------------------------------------------
508
+
509
+ def _cas_dir(cache_d: Path) -> Path:
510
+ return cache_d / "cas"
511
+
512
+
513
+ def _cas_path(cache_d: Path, blob_hash: str) -> Path:
514
+ return _cas_dir(cache_d) / f"{blob_hash}.gz"
515
+
516
+
517
+ def _cas_store_blob(cache_d: Path, serialised: str) -> str:
518
+ """
519
+ Store *serialised* (a JSON string) in the CAS. Idempotent.
520
+
521
+ Returns the 16-char SHA-256 hex hash that identifies the blob.
522
+ """
523
+ raw = serialised.encode("utf-8")
524
+ blob_hash = hashlib.sha256(raw).hexdigest()[:16]
525
+ path = _cas_path(cache_d, blob_hash)
526
+ if not path.exists():
527
+ path.parent.mkdir(parents=True, exist_ok=True)
528
+ path.write_bytes(gzip.compress(raw, compresslevel=6))
529
+ return blob_hash
530
+
531
+
532
+ def _cas_load_blob(cache_d: Path, blob_hash: str) -> Optional[str]:
533
+ """Return the stored JSON string for *blob_hash*, or ``None`` if absent."""
534
+ path = _cas_path(cache_d, blob_hash)
535
+ if not path.exists():
536
+ return None
537
+ try:
538
+ return gzip.decompress(path.read_bytes()).decode("utf-8")
539
+ except Exception:
540
+ return None
541
+
542
+
543
+ def _cas_extract(
544
+ snap_dict: dict[str, Any],
545
+ cache_d: Path,
546
+ ) -> tuple[dict[str, Any], dict[str, str]]:
547
+ """
548
+ Walk *snap_dict* top-level fields. Fields that:
549
+ - are in ``_CAS_FIELDS``
550
+ - serialise to more than ``_CAS_THRESHOLD`` bytes
551
+
552
+ … are stored as CAS blobs and replaced with their hash in the returned
553
+ ``cas_refs`` mapping. Other fields remain inline.
554
+ """
555
+ inline: dict[str, Any] = {}
556
+ cas_refs: dict[str, str] = {}
557
+
558
+ for key, value in snap_dict.items():
559
+ if key in _CAS_FIELDS and value is not None:
560
+ serialised = json.dumps(value, ensure_ascii=False)
561
+ if len(serialised.encode("utf-8")) > _CAS_THRESHOLD:
562
+ blob_hash = _cas_store_blob(cache_d, serialised)
563
+ cas_refs[key] = blob_hash
564
+ continue
565
+ inline[key] = value
566
+
567
+ return inline, cas_refs
568
+
569
+
570
+ def _cas_restore(
571
+ inline: dict[str, Any],
572
+ cas_refs: dict[str, str],
573
+ cache_d: Path,
574
+ ) -> Optional[dict[str, Any]]:
575
+ """
576
+ Reconstruct a full snapshot dict by loading CAS blobs for *cas_refs*.
577
+
578
+ Returns ``None`` if any blob is missing (treat as cache miss).
579
+ """
580
+ result: dict[str, Any] = dict(inline)
581
+ for field, blob_hash in cas_refs.items():
582
+ blob_str = _cas_load_blob(cache_d, blob_hash)
583
+ if blob_str is None:
584
+ return None # blob evicted or corrupted → full miss
585
+ try:
586
+ result[field] = json.loads(blob_str)
587
+ except Exception:
588
+ return None
589
+ return result
590
+
591
+
592
+ # ---------------------------------------------------------------------------
593
+ # Eviction / GC
594
+ # ---------------------------------------------------------------------------
595
+
596
+ def _gc(cache_d: Path) -> None:
597
+ """Evict old snapshots/cores/views and sweep orphaned CAS blobs.
598
+
599
+ Three eviction passes:
600
+ 1. Commit-based: keep only the last SOURCECODE_CACHE_KEEP_COMMITS distinct
601
+ git commits (by newest mtime within each commit group).
602
+ 2. Core count cap: if surviving core files exceed SOURCECODE_CACHE_MAX_CORES,
603
+ evict oldest by mtime until under the cap.
604
+ 3. Size cap: if total cache size exceeds SOURCECODE_CACHE_MAX_SIZE_MB,
605
+ evict oldest core/snapshot files until under the cap.
606
+ Views are pruned after all passes: only views traceable to a surviving core
607
+ survive. CAS blobs are swept last.
608
+ """
609
+ keep = int(os.environ.get("SOURCECODE_CACHE_KEEP_COMMITS", _DEFAULT_KEEP_COMMITS))
610
+ max_cores = int(os.environ.get("SOURCECODE_CACHE_MAX_CORES", _DEFAULT_MAX_CORES))
611
+ max_size_bytes = (
612
+ int(os.environ.get("SOURCECODE_CACHE_MAX_SIZE_MB", _DEFAULT_MAX_SIZE_MB))
613
+ * 1024 * 1024
614
+ )
615
+
616
+ try:
617
+ all_snapshots = list(cache_d.glob("snapshot-*.json.gz"))
618
+ all_cores = list(cache_d.glob("core-*.json.gz"))
619
+ all_views = list(cache_d.glob("view-*.json.gz"))
620
+
621
+ if not all_snapshots and not all_cores and not all_views:
622
+ return
623
+
624
+ # ── Pass 1: commit-based eviction ───────────────────────────────────
625
+ groups: dict[str, list[Path]] = {}
626
+ for f in all_snapshots:
627
+ m = _SNAPSHOT_RE.match(f.name)
628
+ if m:
629
+ groups.setdefault(m.group(1), []).append(f)
630
+ for f in all_cores:
631
+ m = _CORE_RE.match(f.name)
632
+ if m:
633
+ groups.setdefault(m.group(1), []).append(f)
634
+
635
+ surviving: list[Path]
636
+
637
+ if keep <= 0 or len(groups) <= keep:
638
+ surviving = all_snapshots + all_cores
639
+ else:
640
+ def _newest_mtime(commit: str) -> float:
641
+ return max(p.stat().st_mtime for p in groups[commit])
642
+
643
+ sorted_commits = sorted(groups, key=_newest_mtime, reverse=True)
644
+ surviving = []
645
+ for i, commit in enumerate(sorted_commits):
646
+ if i < keep:
647
+ surviving.extend(groups[commit])
648
+ else:
649
+ for f in groups[commit]:
650
+ _safe_unlink(f)
651
+
652
+ # ── Pass 2: core count cap ───────────────────────────────────────────
653
+ if max_cores > 0:
654
+ surviving_cores = [
655
+ p for p in surviving
656
+ if p.name.startswith("core-") and p.exists()
657
+ ]
658
+ if len(surviving_cores) > max_cores:
659
+ surviving_cores.sort(key=lambda p: p.stat().st_mtime, reverse=True)
660
+ for evict in surviving_cores[max_cores:]:
661
+ _safe_unlink(evict)
662
+ surviving = [p for p in surviving if p != evict]
663
+
664
+ # ── Pass 3: size cap ─────────────────────────────────────────────────
665
+ if max_size_bytes > 0:
666
+ size_candidates = [p for p in surviving if p.exists()]
667
+ total = sum(
668
+ p.stat().st_size
669
+ for p in size_candidates
670
+ if not p.name.startswith("view-")
671
+ )
672
+ if total > max_size_bytes:
673
+ size_candidates.sort(key=lambda p: p.stat().st_mtime)
674
+ for evict in size_candidates:
675
+ if evict.name.startswith("view-"):
676
+ continue
677
+ total -= evict.stat().st_size if evict.exists() else 0
678
+ _safe_unlink(evict)
679
+ surviving = [p for p in surviving if p != evict]
680
+ if total <= max_size_bytes:
681
+ break
682
+
683
+ # ── Prune orphaned views + CAS ───────────────────────────────────────
684
+ all_views = list(cache_d.glob("view-*.json.gz"))
685
+ _gc_views(cache_d, surviving, all_views)
686
+ surviving_with_views = surviving + [v for v in all_views if v.exists()]
687
+ _gc_cas(cache_d, surviving_with_views)
688
+
689
+ except Exception:
690
+ pass # GC failure is non-fatal
691
+
692
+
693
+ def _gc_views(cache_d: Path, surviving: list[Path], all_views: list[Path]) -> None:
694
+ """Delete view files not traceable to a surviving core.
695
+
696
+ Collects the ``hash`` field from every surviving core envelope, then
697
+ deletes view files whose filename core-hash prefix is absent from that
698
+ set. View files with unrecognisable names are left untouched.
699
+ """
700
+ if not all_views:
701
+ return
702
+
703
+ # Collect live core hashes from surviving core-*.json.gz files
704
+ live_hashes: set[str] = set()
705
+ for path in surviving:
706
+ if not path.name.startswith("core-"):
707
+ continue
708
+ try:
709
+ env = json.loads(gzip.decompress(path.read_bytes()).decode("utf-8"))
710
+ h = env.get("hash", "")
711
+ if h:
712
+ live_hashes.add(h)
713
+ except Exception:
714
+ pass # unreadable core — conservatively keep its views unknown
715
+
716
+ for vp in all_views:
717
+ m = _VIEW_RE.match(vp.name)
718
+ if m and m.group(1) not in live_hashes:
719
+ _safe_unlink(vp)
720
+
721
+
722
+ def _gc_cas(cache_d: Path, surviving_snapshots: list[Path]) -> None:
723
+ """
724
+ Delete CAS blobs not referenced by any snapshot in *surviving_snapshots*.
725
+
726
+ Walks each snapshot's ``cas`` dict to collect live hashes; deletes the rest.
727
+ """
728
+ cas_d = _cas_dir(cache_d)
729
+ if not cas_d.exists():
730
+ return
731
+
732
+ try:
733
+ # Collect all hashes referenced by surviving snapshots
734
+ referenced: set[str] = set()
735
+ for snap_path in surviving_snapshots:
736
+ try:
737
+ raw = gzip.decompress(snap_path.read_bytes())
738
+ env = json.loads(raw.decode("utf-8"))
739
+ if isinstance(env, dict) and "cas" in env:
740
+ referenced.update(env["cas"].values())
741
+ except Exception:
742
+ pass # unreadable snapshot — conservatively keep its blobs unknown
743
+
744
+ # Delete blobs not referenced by any surviving snapshot
745
+ for blob in cas_d.glob("*.gz"):
746
+ if blob.stem not in referenced:
747
+ _safe_unlink(blob)
748
+
749
+ except Exception:
750
+ pass # CAS sweep failure is non-fatal
751
+
752
+
753
+ # ---------------------------------------------------------------------------
754
+ # Utilities
755
+ # ---------------------------------------------------------------------------
756
+
757
+ def _atomic_write(dest: Path, data: bytes) -> None:
758
+ """Write *data* to *dest* atomically via a sibling .tmp file + rename."""
759
+ tmp = dest.with_suffix(".tmp")
760
+ try:
761
+ tmp.write_bytes(data)
762
+ tmp.replace(dest)
763
+ except Exception:
764
+ _safe_unlink(tmp)
765
+ raise
766
+
767
+
768
+ def _safe_unlink(path: Path) -> None:
769
+ try:
770
+ path.unlink(missing_ok=True)
771
+ except Exception:
772
+ pass
sourcecode/cli.py CHANGED
@@ -1018,6 +1018,18 @@ def main(
1018
1018
  architecture = True # agents need full architectural signal (M4)
1019
1019
  graph_modules = True # IC-003: import graph needed for architecture confidence
1020
1020
 
1021
+ # --compact implicitly enables lightweight analysis passes so that
1022
+ # dependency_summary, env_summary and code_notes_summary are never null.
1023
+ # architecture=True is also enabled so that architecture.confidence is
1024
+ # consistent with --agent (which auto-enables architecture). The
1025
+ # ArchitectureAnalyzer is path-based and adds negligible latency.
1026
+ # NOTE: must happen BEFORE cache key computation so key reflects effective flags.
1027
+ if compact:
1028
+ dependencies = True
1029
+ env_map = True
1030
+ code_notes = True
1031
+ architecture = True
1032
+
1021
1033
  # ── Two-layer cache ────────────────────────────────────────────────────────
1022
1034
  # L1 (core): (repo, commit, analysis_flags) → pre-computed view data dict
1023
1035
  # key = core-<git_sha>-<analysis_hash>.json.gz
@@ -1065,7 +1077,7 @@ def main(
1065
1077
  f"dep={dependencies},gm={graph_modules},"
1066
1078
  f"docs={docs},fm={full_metrics},sem={semantics},"
1067
1079
  f"arch={architecture},gc={git_context},em={env_map},"
1068
- f"cn={code_notes},mode={mode},"
1080
+ f"cn={code_notes},"
1069
1081
  f"ex={_excl_key},depth={effective_depth}"
1070
1082
  )
1071
1083
  _core_h = _hashlib.sha256(_core_flags_str.encode()).hexdigest()[:8]
@@ -1073,7 +1085,7 @@ def main(
1073
1085
 
1074
1086
  # ── View flags: output presentation only (no re-analysis needed) ──
1075
1087
  _view_flags_str = (
1076
- f"c={compact},ag={agent},fmt={format},full={full},"
1088
+ f"c={compact},ag={agent},mode={mode},fmt={format},full={full},"
1077
1089
  f"co={changed_only},tree={tree},nt={no_tree},"
1078
1090
  f"rb={rank_by},sym={symbol},ep={entrypoints_only},"
1079
1091
  f"nr={no_redact},gd={graph_detail},dd={docs_depth},"
@@ -1235,17 +1247,6 @@ def main(
1235
1247
  err=True,
1236
1248
  )
1237
1249
 
1238
- # --compact implicitly enables lightweight analysis passes so that
1239
- # dependency_summary, env_summary and code_notes_summary are never null.
1240
- # architecture=True is also enabled so that architecture.confidence is
1241
- # consistent with --agent (which auto-enables architecture). The
1242
- # ArchitectureAnalyzer is path-based and adds negligible latency.
1243
- if compact:
1244
- dependencies = True
1245
- env_map = True
1246
- code_notes = True
1247
- architecture = True
1248
-
1249
1250
  dependency_analyzer = DependencyAnalyzer() if dependencies else None
1250
1251
  graph_analyzer = GraphAnalyzer() if graph_modules else None
1251
1252
  parsed_graph_edges = (
@@ -2024,30 +2025,51 @@ def main(
2024
2025
  # L2 (view): stores the exact rendered string for this flag combination.
2025
2026
  #
2026
2027
  # GC runs after L2 write to evict old commits and orphaned blobs/views.
2028
+ # Writes happen in a background daemon thread so cold-run latency is not
2029
+ # penalised by gzip encoding + disk I/O. atexit join ensures writes
2030
+ # complete on clean exit without blocking the user-visible response.
2027
2031
  if not no_cache and _core_key and not _pipeline_error:
2028
- try:
2029
- from sourcecode.serializer import core_view as _core_view_fn
2030
- _core_dict_write = _core_view_fn(sm)
2031
- _written_core_hash = _cache_mod.write_core(target, _core_key, _core_dict_write)
2032
-
2033
- # Compute view key using the just-written core hash
2034
- if _written_core_hash:
2035
- if not _view_key:
2036
- # _view_key not set (L1 was also a miss); compute it now
2037
- _wvh = _hashlib.sha256(_view_flags_str.encode()).hexdigest()[:8]
2038
- _view_key = f"{_written_core_hash}-{_wvh}"
2039
- _cache_mod.write_view(
2040
- target,
2041
- _view_key,
2042
- content,
2043
- fmt=format,
2044
- layers=_compute_analyzer_fingerprints(),
2032
+ import atexit as _atexit
2033
+ import threading as _threading
2034
+
2035
+ # Capture all closure state before handing off to thread
2036
+ _bg_sm = sm
2037
+ _bg_target = target
2038
+ _bg_core_key = _core_key
2039
+ _bg_view_key = _view_key
2040
+ _bg_view_flags_str = _view_flags_str
2041
+ _bg_content = content
2042
+ _bg_format = format
2043
+ _bg_hashlib = _hashlib
2044
+ _bg_cache_mod = _cache_mod
2045
+
2046
+ def _write_cache_async() -> None:
2047
+ try:
2048
+ from sourcecode.serializer import core_view as _core_view_fn
2049
+ _core_dict_write = _core_view_fn(_bg_sm)
2050
+ _written_core_hash = _bg_cache_mod.write_core(
2051
+ _bg_target, _bg_core_key, _core_dict_write
2045
2052
  )
2046
- # Trigger GC (evict old commits + orphaned views + CAS blobs)
2047
- from sourcecode.cache import cache_dir as _cdir, _gc as _run_gc
2048
- _run_gc(_cdir(target))
2049
- except Exception:
2050
- pass # non-fatal: cache write failure
2053
+ if _written_core_hash:
2054
+ _vk = _bg_view_key
2055
+ if not _vk:
2056
+ _wvh = _bg_hashlib.sha256(_bg_view_flags_str.encode()).hexdigest()[:8]
2057
+ _vk = f"{_written_core_hash}-{_wvh}"
2058
+ _bg_cache_mod.write_view(
2059
+ _bg_target,
2060
+ _vk,
2061
+ _bg_content,
2062
+ fmt=_bg_format,
2063
+ layers=_compute_analyzer_fingerprints(),
2064
+ )
2065
+ from sourcecode.cache import cache_dir as _cdir, _gc as _run_gc
2066
+ _run_gc(_cdir(_bg_target))
2067
+ except Exception:
2068
+ pass
2069
+
2070
+ _cache_write_thread = _threading.Thread(target=_write_cache_async, daemon=True)
2071
+ _cache_write_thread.start()
2072
+ _atexit.register(_cache_write_thread.join, 5.0)
2051
2073
 
2052
2074
  # Update RIS with aggregated snapshot data (non-fatal side-effect).
2053
2075
  if not no_cache and not _pipeline_error and _core_key:
@@ -1,193 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.32.3
3
+ Version: 1.32.4
4
4
  Summary: Deterministic codebase context for AI coding agents
5
- License: Apache License
6
- Version 2.0, January 2004
7
- http://www.apache.org/licenses/
8
-
9
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
10
-
11
- 1. Definitions.
12
-
13
- "License" shall mean the terms and conditions for use, reproduction,
14
- and distribution as defined by Sections 1 through 9 of this document.
15
-
16
- "Licensor" shall mean the copyright owner or entity authorized by
17
- the copyright owner that is granting the License.
18
-
19
- "Legal Entity" shall mean the union of the acting entity and all
20
- other entities that control, are controlled by, or are under common
21
- control with that entity. For the purposes of this definition,
22
- "control" means (i) the power, direct or indirect, to cause the
23
- direction or management of such entity, whether by contract or
24
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
25
- outstanding shares, or (iii) beneficial ownership of such entity.
26
-
27
- "You" (or "Your") shall mean an individual or Legal Entity
28
- exercising permissions granted by this License.
29
-
30
- "Source" form shall mean the preferred form for making modifications,
31
- including but not limited to software source code, documentation
32
- source, and configuration files.
33
-
34
- "Object" form shall mean any form resulting from mechanical
35
- transformation or translation of a Source form, including but
36
- not limited to compiled object code, generated documentation,
37
- and conversions to other formats.
38
-
39
- "Work" shall mean the work of authorship made available under
40
- the License, as indicated by a copyright notice that is included in
41
- or attached to the work (an example is provided in the Appendix below).
42
-
43
- "Derivative Works" shall mean any work, whether in Source or Object
44
- form, that is based on (or derived from) the Work and for which the
45
- editorial revisions, annotations, elaborations, or other transformations
46
- represent, as a whole, an original work of authorship. For the purposes
47
- of this License, Derivative Works shall not include works that remain
48
- separable from, or merely link (or bind by name) to the interfaces of,
49
- the Work and Derivative Works thereof.
50
-
51
- "Contribution" shall mean, as submitted to the Licensor for inclusion
52
- in the Work by the copyright owner or by an individual or Legal Entity
53
- authorized to submit on behalf of the copyright owner. For the purposes
54
- of this definition, "submit" means any form of electronic, verbal, or
55
- written communication sent to the Licensor or its representatives,
56
- including but not limited to communication on electronic mailing lists,
57
- source code control systems, and issue tracking systems that are managed
58
- by, or on behalf of, the Licensor for the purpose of discussing and
59
- improving the Work, but excluding communication that is conspicuously
60
- marked or designated in writing by the copyright owner as "Not a
61
- Contribution."
62
-
63
- "Contributor" shall mean Licensor and any Legal Entity on behalf of
64
- whom a Contribution has been received by the Licensor and included
65
- within the Work.
66
-
67
- 2. Grant of Copyright License. Subject to the terms and conditions of
68
- this License, each Contributor hereby grants to You a perpetual,
69
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
- copyright license to reproduce, prepare Derivative Works of,
71
- publicly display, publicly perform, sublicense, and distribute the
72
- Work and such Derivative Works in Source or Object form.
73
-
74
- 3. Grant of Patent License. Subject to the terms and conditions of
75
- this License, each Contributor hereby grants to You a perpetual,
76
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
- (except as stated in this section) patent license to make, have made,
78
- use, offer to sell, sell, import, and otherwise transfer the Work,
79
- where such license applies only to those patent claims licensable
80
- by such Contributor that are necessarily infringed by their
81
- Contribution(s) alone or by the combination of their Contribution(s)
82
- with the Work to which such Contribution(s) was submitted. If You
83
- institute patent litigation against any entity (including a cross-claim
84
- or counterclaim in a lawsuit) alleging that the Work or any Contributor
85
- Contribution constitutes direct or contributory patent infringement,
86
- then any patent rights granted to You under this License for that Work
87
- shall terminate as of the date such litigation is filed.
88
-
89
- 4. Redistribution. You may reproduce and distribute copies of the
90
- Work or Derivative Works thereof in any medium, with or without
91
- modifications, and in Source or Object form, provided that You
92
- meet the following conditions:
93
-
94
- (a) You must give any other recipients of the Work or Derivative
95
- Works a copy of this License; and
96
-
97
- (b) You must cause any modified files to carry prominent notices
98
- stating that You changed the files; and
99
-
100
- (c) You must retain, in the Source form of any Derivative Works
101
- that You distribute, all copyright, patent, trademark, and
102
- attribution notices from the Source form of the Work,
103
- excluding those notices that do not pertain to any part of
104
- the Derivative Works; and
105
-
106
- (d) If the Work includes a "NOTICE" text file as part of its
107
- distribution, You must include a readable copy of the
108
- attribution notices contained within such NOTICE file, in
109
- at least one of the following places: within a NOTICE text
110
- file distributed as part of the Derivative Works; within
111
- the Source form or documentation, if provided along with the
112
- Derivative Works; or, within a display generated by the
113
- Derivative Works, if and wherever such third-party notices
114
- normally appear. The contents of the NOTICE file are for
115
- informational purposes only and do not modify the License.
116
- You may add Your own attribution notices within Derivative
117
- Works that You distribute, alongside or in addition to the
118
- NOTICE text from the Work, provided that such additional
119
- attribution notices cannot be construed as modifying the License.
120
-
121
- You may add Your own license statement for Your modifications and
122
- may provide additional grant of rights to use, copy, modify, merge,
123
- publish, distribute, sublicense, and/or sell copies of the
124
- Derivative Works, as separate terms and conditions for their use,
125
- reproduction, and distribution, or alongside or as supplement to
126
- any license terms for such Derivative Works as a whole, provided
127
- Your use, reproduction, and distribution of the Work otherwise
128
- complies with the conditions stated in this License.
129
-
130
- 5. Submission of Contributions. Unless You explicitly state otherwise,
131
- any Contribution intentionally submitted for inclusion in the Work
132
- by You to the Licensor shall be under the terms and conditions of
133
- this License, without any additional terms or conditions.
134
- Notwithstanding the above, nothing herein shall supersede or modify
135
- the terms of any separate license agreement you may have executed
136
- with Licensor regarding such Contributions.
137
-
138
- 6. Trademarks. This License does not grant permission to use the trade
139
- names, trademarks, service marks, or product names of the Licensor,
140
- except as required for reasonable and customary use in describing the
141
- origin of the Work and reproducing the content of the NOTICE file.
142
-
143
- 7. Disclaimer of Warranty. Unless required by applicable law or
144
- agreed to in writing, Licensor provides the Work (and each
145
- Contributor provides its Contributions) on an "AS IS" BASIS,
146
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
- implied, including, without limitation, any warranties or conditions
148
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
- PARTICULAR PURPOSE. You are solely responsible for determining the
150
- appropriateness of using or reproducing the Work and assume any
151
- risks associated with Your exercise of permissions under this License.
152
-
153
- 8. Limitation of Liability. In no event and under no legal theory,
154
- whether in tort (including negligence), contract, or otherwise,
155
- unless required by applicable law (such as deliberate and grossly
156
- negligent acts) or agreed to in writing, shall any Contributor be
157
- liable to You for damages, including any direct, indirect, special,
158
- incidental, or exemplary damages of any character arising as a
159
- result of this License or out of the use or inability to use the
160
- Work (including but not limited to damages for loss of goodwill,
161
- work stoppage, computer failure or malfunction, or all other
162
- commercial damages or losses), even if such Contributor has been
163
- advised of the possibility of such damages.
164
-
165
- 9. Accepting Warranty or Liability. While redistributing the Work or
166
- Derivative Works thereof, You may choose to offer, and charge a fee
167
- for, acceptance of support, warranty, indemnity, or other liability
168
- obligations and/or rights consistent with this License. However, in
169
- accepting such obligations, You may offer such obligations only on
170
- Your own behalf and on Your sole responsibility, not on behalf of
171
- any other Contributor, and only if You agree to indemnify, defend,
172
- and hold each Contributor harmless for any liability incurred by,
173
- or claims asserted against, such Contributor by reason of your
174
- accepting any warranty or additional liability.
175
-
176
- END OF TERMS AND CONDITIONS
177
-
178
- Copyright 2024 sourcecode contributors
179
-
180
- Licensed under the Apache License, Version 2.0 (the "License");
181
- you may not use this file except in compliance with the License.
182
- You may obtain a copy of the License at
183
-
184
- http://www.apache.org/licenses/LICENSE-2.0
185
-
186
- Unless required by applicable law or agreed to in writing, software
187
- distributed under the License is distributed on an "AS IS" BASIS,
188
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189
- See the License for the specific language governing permissions and
190
- limitations under the License.
191
5
  License-File: LICENSE
192
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
193
7
  Classifier: Development Status :: 4 - Beta
@@ -1,12 +1,13 @@
1
- sourcecode/__init__.py,sha256=5Ey4P8Jp1XkENrolOFwJQ6v_ucSixtebokTZVV3MgA8,103
1
+ sourcecode/__init__.py,sha256=ekW8VutI9sqMXBEN-MSRy8p8fHOhat9jl5sCtrKrygc,103
2
2
  sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
3
3
  sourcecode/architecture_analyzer.py,sha256=qh749a7ykPtGmQI1MR9y6j8TtL_jBdVYFx9YRsLqOMw,44121
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
5
5
  sourcecode/ast_extractor.py,sha256=_btmeOJIe3t-NicF94D5ZAesa2YIJ0_QNExGnbHxGFE,50578
6
- sourcecode/cache.py,sha256=pBrPdpPrOgpXHHQO670U3aUfVf5N3A3obsTKgiZtN4I,23030
6
+ sourcecode/cache.py,sha256=dvXt8HsU-SyO0a0UXY1n-wt6F2ozGv9VnKR0XydjxCY,27502
7
+ sourcecode/cache.tmp_new,sha256=-IvV7CojiZjqeKMln1m-lqI0QVA2uFGWmYir4XRFOUk,27970
7
8
  sourcecode/canonical_ir.py,sha256=_HM3AUmKSdna9u4dCoU6rpgSA6HdF8gzOKZykIUCNGY,23277
8
9
  sourcecode/classifier.py,sha256=2lYoSH3vOTkXZYPU7Go2WIet1-IuNzTWVhc-ULnXtgw,8024
9
- sourcecode/cli.py,sha256=l3PJ9pK6JQ_JUxYSD3xnpvCIvR9kvOvbEEvHvqfij9A,165743
10
+ sourcecode/cli.py,sha256=RdJ1F_sjmqTJ6zcv3eY2Tokg9H5dJe6zSuK3ownUGcA,166617
10
11
  sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
11
12
  sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
12
13
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -79,8 +80,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
79
80
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
80
81
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
81
82
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
82
- sourcecode-1.32.3.dist-info/METADATA,sha256=joMm6IGlPuAM0S0Sv9P_P5KORZ6Dletqfhv3GMQByO0,31100
83
- sourcecode-1.32.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
84
- sourcecode-1.32.3.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
85
- sourcecode-1.32.3.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
86
- sourcecode-1.32.3.dist-info/RECORD,,
83
+ sourcecode-1.32.4.dist-info/METADATA,sha256=SJ09ABP688xA3bbWRuseGyJyVK-mYQmR7CgodrIjmDE,19120
84
+ sourcecode-1.32.4.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
85
+ sourcecode-1.32.4.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
86
+ sourcecode-1.32.4.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
87
+ sourcecode-1.32.4.dist-info/RECORD,,