superlinear 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. apps/__init__.py +4 -0
  2. apps/cli/__init__.py +8 -0
  3. apps/cli/bm25_rag.py +471 -0
  4. apps/cli/chat_repl.py +1497 -0
  5. apps/cli/client.py +195 -0
  6. apps/cli/docs_repl.py +2275 -0
  7. apps/cli/light_rag.py +729 -0
  8. apps/cli/local_snapshots.py +139 -0
  9. apps/cli/locks.py +214 -0
  10. apps/cli/main.py +457 -0
  11. apps/cli/output.py +32 -0
  12. apps/cli/server_cmds.py +516 -0
  13. apps/cli/session_cmds.py +491 -0
  14. apps/cli/snapshot_cmds.py +303 -0
  15. apps/cli/state.py +265 -0
  16. apps/server/__init__.py +4 -0
  17. apps/server/app.py +1363 -0
  18. apps/server/main.py +313 -0
  19. superlinear/__init__.py +114 -0
  20. superlinear/_version.py +3 -0
  21. superlinear/engine/__init__.py +10 -0
  22. superlinear/engine/adapters/__init__.py +12 -0
  23. superlinear/engine/adapters/base.py +91 -0
  24. superlinear/engine/adapters/superlinear.py +1233 -0
  25. superlinear/engine/chat_engine.py +1173 -0
  26. superlinear/engine/chat_types.py +130 -0
  27. superlinear/engine/registry.py +51 -0
  28. superlinear/engine/repetition.py +203 -0
  29. superlinear/engine/session_snapshots.py +451 -0
  30. superlinear/engine/tool_parser.py +83 -0
  31. superlinear/engine/types.py +42 -0
  32. superlinear/kernels/__init__.py +2 -0
  33. superlinear/kernels/common/__init__.py +21 -0
  34. superlinear/kernels/common/adjustment.py +106 -0
  35. superlinear/kernels/common/power.py +154 -0
  36. superlinear/kernels/superlinear/__init__.py +10 -0
  37. superlinear/kernels/superlinear/attention/__init__.py +78 -0
  38. superlinear/kernels/superlinear/attention/_prefill.py +940 -0
  39. superlinear/kernels/superlinear/attention/_sliding_window.py +1167 -0
  40. superlinear/kernels/superlinear/attention/api.py +433 -0
  41. superlinear/kernels/superlinear/search/__init__.py +33 -0
  42. superlinear/kernels/superlinear/search/_reference.py +204 -0
  43. superlinear/kernels/superlinear/search/_triton.py +488 -0
  44. superlinear/kernels/superlinear/search/_triton_gqa.py +534 -0
  45. superlinear/kernels/superlinear/search/api.py +200 -0
  46. superlinear/kernels/superlinear/span/__init__.py +41 -0
  47. superlinear/kernels/superlinear/span/_triton_bucketed_gqa.py +1461 -0
  48. superlinear/kernels/superlinear/span/_triton_forward.py +22 -0
  49. superlinear/kernels/superlinear/span/_triton_gqa.py +1226 -0
  50. superlinear/kernels/superlinear/span/_triton_impl.py +928 -0
  51. superlinear/kernels/superlinear/span/_triton_precomputed_sw.py +460 -0
  52. superlinear/kernels/superlinear/span/_triton_precomputed_sw_gqa.py +598 -0
  53. superlinear/kernels/superlinear/span/api.py +296 -0
  54. superlinear/kernels/superlinear/span/masks.py +187 -0
  55. superlinear/py.typed +0 -0
  56. superlinear/runtime.py +71 -0
  57. superlinear-0.1.0.dist-info/METADATA +469 -0
  58. superlinear-0.1.0.dist-info/RECORD +62 -0
  59. superlinear-0.1.0.dist-info/WHEEL +5 -0
  60. superlinear-0.1.0.dist-info/entry_points.txt +2 -0
  61. superlinear-0.1.0.dist-info/licenses/LICENSE +202 -0
  62. superlinear-0.1.0.dist-info/top_level.txt +2 -0
apps/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ # Superlinear applications (server, CLI)
2
+ #
3
+ # These are runnable entrypoints that depend on the core superlinear package.
4
+ # They can have heavier dependencies (FastAPI, Typer, etc.).
apps/cli/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ # Superlinear CLI tool
2
+ #
3
+ # Provides commands for:
4
+ # - serve: Start the inference server
5
+ # - chat: Interactive chat with a model
6
+ # - complete: Run a single completion
7
+ #
8
+ # Hides HTTP details from the user.
apps/cli/bm25_rag.py ADDED
@@ -0,0 +1,471 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import json
5
+ import re
6
+ import time
7
+ from dataclasses import dataclass, replace
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from apps.cli.light_rag import split_paragraphs, tokenize_query_terms, tokenize_rag_text
12
+
13
+
14
+ _QUOTEY_RE = re.compile(
15
+ r"\b(quote|verbatim|exact|substring|sentence|sentences|fragment|fragments|no\s+ellipses)\b",
16
+ re.IGNORECASE,
17
+ )
18
+
19
+
20
+ def _looks_like_quote_task(question: str) -> bool:
21
+ return bool(_QUOTEY_RE.search(question or ""))
22
+
23
+
24
+ def _split_into_sentences(text: str) -> list[str]:
25
+ t = (text or "").replace("\r", "")
26
+ if not t:
27
+ return []
28
+
29
+ out: list[str] = []
30
+ start = 0
31
+ i = 0
32
+ n = len(t)
33
+ while i < n:
34
+ ch = t[i]
35
+ if ch == "\n":
36
+ seg = t[start:i].strip()
37
+ if seg:
38
+ out.append(seg)
39
+ start = i + 1
40
+ elif ch in {".", "!", "?"}:
41
+ end = i + 1
42
+ seg = t[start:end].strip()
43
+ if seg:
44
+ out.append(seg)
45
+ j = end
46
+ while j < n and t[j].isspace() and t[j] != "\n":
47
+ j += 1
48
+ start = j
49
+ i = j - 1
50
+ i += 1
51
+
52
+ tail = t[start:].strip()
53
+ if tail:
54
+ out.append(tail)
55
+ return out
56
+
57
+
58
+ def _truncate_text(text: str, max_chars: int) -> str:
59
+ if max_chars <= 0:
60
+ return ""
61
+ if len(text) <= max_chars:
62
+ return text
63
+ if max_chars == 1:
64
+ return text[:1]
65
+ return text[: max_chars - 1].rstrip() + "…"
66
+
67
+
68
+ def _select_sentence_snippet(text: str, *, terms: list[str], max_chars: int) -> str:
69
+ if max_chars <= 0:
70
+ return ""
71
+ if len(text) <= max_chars:
72
+ return text
73
+
74
+ sentences = _split_into_sentences(text)
75
+ if not sentences:
76
+ return _truncate_text(text, max_chars)
77
+
78
+ best: tuple[int, int, str] | None = None # (score, -len, sentence)
79
+ for s in sentences:
80
+ s_l = s.lower()
81
+ score = sum(1 for t in terms if t and t in s_l)
82
+ if score <= 0:
83
+ continue
84
+ cand = (score, -len(s), s)
85
+ if best is None or cand > best:
86
+ best = cand
87
+
88
+ chosen = (best[2] if best is not None else sentences[0]).strip()
89
+ if len(chosen) <= max_chars:
90
+ return chosen
91
+
92
+ # Clip without adding an ellipsis (to avoid models copying it into "verbatim" quotes).
93
+ return chosen[:max_chars].rstrip()
94
+
95
+
96
+ def _coerce_int(v: Any, *, default: int, min_v: int, max_v: int) -> int:
97
+ try:
98
+ n = int(v)
99
+ except Exception:
100
+ return default
101
+ if n < min_v:
102
+ return min_v
103
+ if n > max_v:
104
+ return max_v
105
+ return n
106
+
107
+
108
+ @dataclass(frozen=True)
109
+ class Bm25RagConfig:
110
+ enabled: bool = True
111
+ k_sources: int = 5
112
+ total_chars: int = 12000
113
+ per_source_chars: int = 2600
114
+ debug: bool = False
115
+
116
+ k_paragraphs: int = 40
117
+ max_terms: int = 32
118
+ max_paragraphs_per_source: int = 8
119
+ max_paragraph_chars: int = 1200
120
+
121
+ def sanitized(self) -> "Bm25RagConfig":
122
+ return replace(
123
+ self,
124
+ k_sources=_coerce_int(self.k_sources, default=5, min_v=1, max_v=50),
125
+ total_chars=_coerce_int(self.total_chars, default=12000, min_v=200, max_v=200000),
126
+ per_source_chars=_coerce_int(self.per_source_chars, default=2600, min_v=50, max_v=50000),
127
+ k_paragraphs=_coerce_int(self.k_paragraphs, default=40, min_v=1, max_v=1000),
128
+ max_terms=_coerce_int(self.max_terms, default=32, min_v=1, max_v=256),
129
+ max_paragraphs_per_source=_coerce_int(
130
+ self.max_paragraphs_per_source, default=8, min_v=1, max_v=64
131
+ ),
132
+ max_paragraph_chars=_coerce_int(self.max_paragraph_chars, default=1200, min_v=50, max_v=20000),
133
+ )
134
+
135
+
136
+ @dataclass(frozen=True)
137
+ class _Paragraph:
138
+ path: str
139
+ paragraph_index: int
140
+ text: str
141
+
142
+
143
+ class Bm25RagRetriever:
144
+ def __init__(self) -> None:
145
+ self._bm25_cls: type | None = None
146
+ self._bm25_import_error: str | None = None
147
+ self._index_key: str | None = None
148
+
149
+ self._paragraphs: list[_Paragraph] = []
150
+ self._paragraph_tokens: list[list[str]] = []
151
+ self._source_meta: dict[str, dict[str, Any]] = {}
152
+ self._bm25: Any | None = None
153
+ self._last_build_ms: int | None = None
154
+
155
+ def is_available(self) -> bool:
156
+ return self._get_bm25_cls() is not None
157
+
158
+ def last_build_stats(self) -> dict[str, Any]:
159
+ return {
160
+ "sources": len(self._source_meta),
161
+ "paragraphs": len(self._paragraphs),
162
+ "build_ms": self._last_build_ms,
163
+ }
164
+
165
+ def clear_index(self) -> None:
166
+ self._index_key = None
167
+ self._paragraphs = []
168
+ self._paragraph_tokens = []
169
+ self._source_meta = {}
170
+ self._bm25 = None
171
+ self._last_build_ms = None
172
+
173
+ def _get_bm25_cls(self) -> type | None:
174
+ if self._bm25_cls is not None:
175
+ return self._bm25_cls
176
+ if self._bm25_import_error is not None:
177
+ return None
178
+
179
+ try:
180
+ mod = importlib.import_module("rank_bm25")
181
+ cls = getattr(mod, "BM25Okapi", None)
182
+ if cls is None:
183
+ self._bm25_import_error = "rank_bm25.BM25Okapi not found"
184
+ return None
185
+ self._bm25_cls = cls
186
+ return cls
187
+ except Exception as exc:
188
+ self._bm25_import_error = str(exc)
189
+ return None
190
+
191
+ def _sources_key(self, sources: list[dict[str, Any]]) -> str:
192
+ # Use (path, sha256) when available so we can detect content changes across /add.
193
+ # Sort for stability.
194
+ items: list[tuple[str, str]] = []
195
+ for s in sources:
196
+ if not isinstance(s, dict):
197
+ continue
198
+ path = s.get("path")
199
+ if not isinstance(path, str) or not path:
200
+ continue
201
+ sha = s.get("sha256")
202
+ items.append((path, sha if isinstance(sha, str) else ""))
203
+ items.sort()
204
+ return json.dumps({"v": 1, "sources": items}, ensure_ascii=False, sort_keys=True)
205
+
206
+ def ensure_index(self, *, sources: list[dict[str, Any]], debug: bool = False) -> list[str]:
207
+ dbg: list[str] = []
208
+ bm25_cls = self._get_bm25_cls()
209
+ if bm25_cls is None:
210
+ if debug:
211
+ hint = (
212
+ "bm25: unavailable (install `rank-bm25` to enable BM25 retrieval)"
213
+ if self._bm25_import_error is None
214
+ else f"bm25: unavailable ({self._bm25_import_error})"
215
+ )
216
+ dbg.append(hint)
217
+ self.clear_index()
218
+ return dbg
219
+
220
+ key = self._sources_key(sources)
221
+ if self._index_key == key and self._bm25 is not None:
222
+ return dbg
223
+
224
+ t0 = time.perf_counter()
225
+
226
+ paragraphs: list[_Paragraph] = []
227
+ paragraph_tokens: list[list[str]] = []
228
+ source_meta: dict[str, dict[str, Any]] = {}
229
+
230
+ skipped: list[str] = []
231
+
232
+ # Deduplicate by path; keep the last metadata entry for a path.
233
+ seen_paths: set[str] = set()
234
+ unique_sources: list[dict[str, Any]] = []
235
+ for s in reversed(sources):
236
+ if not isinstance(s, dict):
237
+ continue
238
+ path = s.get("path")
239
+ if not isinstance(path, str) or not path:
240
+ continue
241
+ if path in seen_paths:
242
+ continue
243
+ seen_paths.add(path)
244
+ unique_sources.append(s)
245
+ unique_sources.reverse()
246
+
247
+ for s in unique_sources:
248
+ path = s.get("path")
249
+ if not isinstance(path, str) or not path:
250
+ continue
251
+
252
+ title = s.get("title")
253
+ src = s.get("source")
254
+ url = s.get("url")
255
+ meta: dict[str, Any] = {"path": path}
256
+ if isinstance(title, str) and title.strip():
257
+ meta["title"] = title.strip()
258
+ if isinstance(src, str) and src.strip():
259
+ meta["source"] = src.strip()
260
+ if isinstance(url, str) and url.strip():
261
+ meta["url"] = url.strip()
262
+ source_meta[path] = meta
263
+
264
+ try:
265
+ data = Path(path).read_bytes()
266
+ if b"\x00" in data:
267
+ raise ValueError("refusing to read binary file (NUL byte found)")
268
+ text = data.decode("utf-8", errors="replace")
269
+ except Exception as exc:
270
+ skipped.append(f"{path}: {exc}")
271
+ continue
272
+
273
+ for p_idx, para in enumerate(split_paragraphs(text)):
274
+ tokens = tokenize_rag_text(para)
275
+ if not tokens:
276
+ continue
277
+ paragraphs.append(_Paragraph(path=path, paragraph_index=p_idx, text=para))
278
+ paragraph_tokens.append(tokens)
279
+
280
+ if not paragraphs:
281
+ self._index_key = key
282
+ self._paragraphs = []
283
+ self._paragraph_tokens = []
284
+ self._source_meta = source_meta
285
+ self._bm25 = None
286
+ self._last_build_ms = int((time.perf_counter() - t0) * 1000)
287
+ if debug:
288
+ dbg.append(
289
+ f"bm25: index empty (sources={len(source_meta)} paragraphs=0 build_ms={self._last_build_ms})"
290
+ )
291
+ if skipped:
292
+ dbg.append("bm25: skipped (read errors):")
293
+ dbg.extend([f" - {s}" for s in skipped[:20]])
294
+ return dbg
295
+
296
+ bm25 = bm25_cls(paragraph_tokens)
297
+
298
+ self._index_key = key
299
+ self._paragraphs = paragraphs
300
+ self._paragraph_tokens = paragraph_tokens
301
+ self._source_meta = source_meta
302
+ self._bm25 = bm25
303
+ self._last_build_ms = int((time.perf_counter() - t0) * 1000)
304
+
305
+ if debug:
306
+ dbg.append(
307
+ f"bm25: index built (sources={len(source_meta)} paragraphs={len(paragraphs)} build_ms={self._last_build_ms})"
308
+ )
309
+ if skipped:
310
+ dbg.append("bm25: skipped (read errors):")
311
+ dbg.extend([f" - {s}" for s in skipped[:20]])
312
+
313
+ return dbg
314
+
315
+ def build_retrieved_excerpts_message(
316
+ self,
317
+ *,
318
+ question: str,
319
+ sources: list[dict[str, Any]],
320
+ config: Bm25RagConfig,
321
+ ) -> tuple[str | None, list[str]]:
322
+ cfg = config.sanitized()
323
+ if not cfg.enabled:
324
+ return None, []
325
+
326
+ terms = tokenize_query_terms(question, max_terms=cfg.max_terms)
327
+ if not terms:
328
+ return None, []
329
+
330
+ debug_lines: list[str] = []
331
+ debug_lines.extend(self.ensure_index(sources=sources, debug=cfg.debug))
332
+ if self._bm25 is None or not self._paragraphs:
333
+ return None, debug_lines
334
+
335
+ quote_task = _looks_like_quote_task(question)
336
+
337
+ try:
338
+ scores_raw = self._bm25.get_scores(terms)
339
+ except Exception as exc:
340
+ if cfg.debug:
341
+ debug_lines.append(f"bm25: scoring failed ({exc}); falling back")
342
+ return None, debug_lines
343
+
344
+ try:
345
+ scores = list(scores_raw)
346
+ except Exception:
347
+ scores = [scores_raw[i] for i in range(len(self._paragraphs))]
348
+
349
+ scored: list[tuple[float, int]] = []
350
+ for i, s in enumerate(scores[: len(self._paragraphs)]):
351
+ try:
352
+ f = float(s)
353
+ except Exception:
354
+ continue
355
+ if f <= 0:
356
+ continue
357
+ scored.append((f, i))
358
+
359
+ if not scored:
360
+ if cfg.debug:
361
+ debug_lines.append(f"bm25: terms={terms!r}")
362
+ debug_lines.append("bm25: no positive-scoring paragraphs")
363
+ return None, debug_lines
364
+
365
+ scored.sort(key=lambda x: (-x[0], x[1]))
366
+ top_para = scored[: cfg.k_paragraphs]
367
+
368
+ by_path: dict[str, list[tuple[float, int]]] = {}
369
+ for score, pid in top_para:
370
+ path = self._paragraphs[pid].path
371
+ by_path.setdefault(path, []).append((score, pid))
372
+
373
+ source_scored: list[tuple[float, str]] = []
374
+ for path, items in by_path.items():
375
+ agg = float(sum(score for score, _ in items))
376
+ source_scored.append((agg, path))
377
+ source_scored.sort(key=lambda x: (-x[0], x[1]))
378
+
379
+ selected_sources = source_scored[: cfg.k_sources]
380
+ if not selected_sources:
381
+ return None, debug_lines
382
+
383
+ if cfg.debug:
384
+ debug_lines.append(f"bm25: terms={terms!r}")
385
+ debug_lines.append(
386
+ f"bm25: selected_sources={len(selected_sources)} from_paths={len(by_path)} top_paragraphs={len(top_para)}"
387
+ )
388
+
389
+ total_remaining = int(cfg.total_chars)
390
+ blocks: list[str] = [
391
+ "Retrieved excerpts (hints for where to look - verify against your full memory of the documents):",
392
+ "",
393
+ ]
394
+
395
+ included = 0
396
+ for agg, path in selected_sources:
397
+ if total_remaining <= 0:
398
+ break
399
+
400
+ per_remaining = min(int(cfg.per_source_chars), total_remaining)
401
+ if per_remaining <= 0:
402
+ break
403
+
404
+ items = by_path.get(path, [])
405
+ items.sort(key=lambda x: (-x[0], x[1]))
406
+ items = items[: cfg.max_paragraphs_per_source]
407
+ items.sort(key=lambda x: self._paragraphs[x[1]].paragraph_index)
408
+
409
+ parts: list[str] = []
410
+ used = 0
411
+ for score, pid in items:
412
+ para = self._paragraphs[pid].text.strip()
413
+ if not para:
414
+ continue
415
+
416
+ sep = "\n\n" if parts else ""
417
+ avail = per_remaining - used - len(sep)
418
+ if avail <= 0:
419
+ break
420
+
421
+ clip_limit = min(int(cfg.max_paragraph_chars), avail)
422
+ if quote_task:
423
+ clipped = _select_sentence_snippet(para, terms=terms, max_chars=clip_limit)
424
+ else:
425
+ clipped = _truncate_text(para, clip_limit)
426
+ if not clipped:
427
+ break
428
+
429
+ parts.append(sep + clipped)
430
+ used += len(sep) + len(clipped)
431
+ if used >= per_remaining:
432
+ break
433
+
434
+ excerpt = "".join(parts).strip()
435
+ if not excerpt:
436
+ continue
437
+
438
+ included += 1
439
+ total_remaining -= used
440
+
441
+ meta = self._source_meta.get(path, {"path": path})
442
+ attrs: list[str] = []
443
+ attrs.append(f"path={json.dumps(path, ensure_ascii=False)}")
444
+
445
+ title = meta.get("title")
446
+ if isinstance(title, str) and title.strip():
447
+ attrs.append(f"title={json.dumps(title.strip(), ensure_ascii=False)}")
448
+ src = meta.get("source")
449
+ if isinstance(src, str) and src.strip():
450
+ attrs.append(f"source={json.dumps(src.strip(), ensure_ascii=False)}")
451
+ url = meta.get("url")
452
+ if isinstance(url, str) and url.strip():
453
+ attrs.append(f"url={json.dumps(url.strip(), ensure_ascii=False)}")
454
+
455
+ blocks.append(f"[SOURCE {' '.join(attrs)}]")
456
+ blocks.append(excerpt)
457
+ blocks.append("[/SOURCE]")
458
+ blocks.append("")
459
+
460
+ if cfg.debug:
461
+ top_scores = [f"{score:.3f}" for score, _ in sorted(items, reverse=True)[:3]]
462
+ debug_lines.append(
463
+ f"bm25: + {path} agg={agg:.3f} paras={len(items)} chars={used} top_scores={top_scores}"
464
+ )
465
+
466
+ if included == 0:
467
+ return None, debug_lines
468
+
469
+ msg = "\n".join(blocks).rstrip() + "\n"
470
+ return msg, debug_lines
471
+