spooling 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spooling/__init__.py +2 -0
- spooling/agent.py +213 -0
- spooling/classifiers.py +147 -0
- spooling/cli.py +522 -0
- spooling/cloud.py +768 -0
- spooling/config.py +44 -0
- spooling/db.py +21 -0
- spooling/embeddings.py +60 -0
- spooling/evals.py +611 -0
- spooling/experiments.py +407 -0
- spooling/ingest.py +496 -0
- spooling/mcp_server.py +312 -0
- spooling/parser.py +614 -0
- spooling/pricing.py +307 -0
- spooling/providers/__init__.py +46 -0
- spooling/providers/antigravity.py +312 -0
- spooling/providers/base.py +166 -0
- spooling/providers/codex.py +230 -0
- spooling/providers/copilot.py +294 -0
- spooling/providers/cortex_code.py +234 -0
- spooling/providers/cursor.py +307 -0
- spooling/providers/gemini.py +476 -0
- spooling/providers/github.py +241 -0
- spooling/providers/gitlab.py +186 -0
- spooling/providers/kiro.py +240 -0
- spooling/providers/opencode.py +282 -0
- spooling/providers/session_file.py +36 -0
- spooling/providers/windsurf.py +355 -0
- spooling/redact.py +284 -0
- spooling/remote_otel.py +257 -0
- spooling/sdk.py +364 -0
- spooling/search.py +68 -0
- spooling/server.py +1291 -0
- spooling/stats.py +180 -0
- spooling/subscription_pricing.py +131 -0
- spooling/tracing.py +451 -0
- spooling/watcher.py +125 -0
- spooling-0.1.1.dist-info/METADATA +28 -0
- spooling-0.1.1.dist-info/RECORD +43 -0
- spooling-0.1.1.dist-info/WHEEL +5 -0
- spooling-0.1.1.dist-info/entry_points.txt +2 -0
- spooling-0.1.1.dist-info/licenses/LICENSE +21 -0
- spooling-0.1.1.dist-info/top_level.txt +1 -0
spooling/config.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Configuration for Spooling."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
# Legacy session data directory (JSONL-format sessions)
|
|
7
|
+
SESSIONS_DIR = Path.home() / ".sessions"
|
|
8
|
+
SESSIONS_PROJECTS_DIR = SESSIONS_DIR / "projects"
|
|
9
|
+
|
|
10
|
+
# Snowflake Cortex Code data directory. Sessions live in
|
|
11
|
+
# ~/.snowflake/cortex/conversations/<uuid>.history.jsonl with a sidecar
|
|
12
|
+
# <uuid>.json carrying title, working_directory, git info, and timestamps.
|
|
13
|
+
CORTEX_DIR = Path.home() / ".snowflake" / "cortex"
|
|
14
|
+
CORTEX_CONVERSATIONS_DIR = CORTEX_DIR / "conversations"
|
|
15
|
+
|
|
16
|
+
# opencode (sst/opencode) data directory. Single SQLite DB at
|
|
17
|
+
# ~/.local/share/opencode/opencode.db with session/message/part tables
|
|
18
|
+
# (Drizzle-managed). Parts carry the Vercel AI SDK UIMessage payload.
|
|
19
|
+
OPENCODE_DIR = Path.home() / ".local" / "share" / "opencode"
|
|
20
|
+
OPENCODE_DB = OPENCODE_DIR / "opencode.db"
|
|
21
|
+
|
|
22
|
+
# Database
|
|
23
|
+
DB_HOST = os.getenv("SPOOLING_DB_HOST", "localhost")
|
|
24
|
+
DB_PORT = int(os.getenv("SPOOLING_DB_PORT", "5434"))
|
|
25
|
+
DB_NAME = os.getenv("SPOOLING_DB_NAME", "spooling")
|
|
26
|
+
DB_USER = os.getenv("SPOOLING_DB_USER", "spooling")
|
|
27
|
+
DB_PASSWORD = os.getenv("SPOOLING_DB_PASSWORD", "spooling")
|
|
28
|
+
|
|
29
|
+
DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
|
|
30
|
+
|
|
31
|
+
# Embeddings
|
|
32
|
+
EMBEDDING_MODEL = os.getenv("SPOOLING_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
|
|
33
|
+
EMBEDDING_DIM = 384
|
|
34
|
+
CHUNK_SIZE = 500 # chars per chunk for embedding
|
|
35
|
+
|
|
36
|
+
# Server
|
|
37
|
+
UI_HOST = os.getenv("SPOOLING_UI_HOST", "127.0.0.1")
|
|
38
|
+
UI_PORT = int(os.getenv("SPOOLING_UI_PORT", "3001"))
|
|
39
|
+
|
|
40
|
+
# Token estimation (rough heuristic: ~4 chars per token)
|
|
41
|
+
CHARS_PER_TOKEN = 4
|
|
42
|
+
|
|
43
|
+
# Default model pricing per 1M tokens (input, output)
|
|
44
|
+
DEFAULT_PRICING = (3.0, 15.0)
|
spooling/db.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Database connection management."""
|
|
2
|
+
|
|
3
|
+
import psycopg
|
|
4
|
+
from psycopg.rows import dict_row
|
|
5
|
+
|
|
6
|
+
from spooling.config import DATABASE_URL
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_connection():
|
|
10
|
+
"""Get a database connection."""
|
|
11
|
+
return psycopg.connect(DATABASE_URL, row_factory=dict_row)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def check_db():
|
|
15
|
+
"""Check if the database is reachable."""
|
|
16
|
+
try:
|
|
17
|
+
with get_connection() as conn:
|
|
18
|
+
conn.execute("SELECT 1")
|
|
19
|
+
return True
|
|
20
|
+
except Exception:
|
|
21
|
+
return False
|
spooling/embeddings.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Local embeddings via sentence-transformers."""
|
|
2
|
+
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
|
|
5
|
+
from spooling.config import EMBEDDING_MODEL, CHUNK_SIZE
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@lru_cache(maxsize=1)
|
|
9
|
+
def _get_model():
|
|
10
|
+
"""Lazy-load the embedding model."""
|
|
11
|
+
from sentence_transformers import SentenceTransformer
|
|
12
|
+
return SentenceTransformer(EMBEDDING_MODEL)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def embed_texts(texts: list[str]) -> list[list[float]]:
|
|
16
|
+
"""Embed a batch of texts. Returns list of vectors."""
|
|
17
|
+
model = _get_model()
|
|
18
|
+
embeddings = model.encode(texts, show_progress_bar=False, normalize_embeddings=True)
|
|
19
|
+
return [e.tolist() for e in embeddings]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def embed_text(text: str) -> list[float]:
|
|
23
|
+
"""Embed a single text."""
|
|
24
|
+
return embed_texts([text])[0]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def chunk_text(text: str, chunk_size: int = CHUNK_SIZE) -> list[str]:
|
|
28
|
+
"""Split text into chunks for embedding."""
|
|
29
|
+
if len(text) <= chunk_size:
|
|
30
|
+
return [text] if text.strip() else []
|
|
31
|
+
|
|
32
|
+
chunks = []
|
|
33
|
+
# Split on paragraph boundaries first, then by size
|
|
34
|
+
paragraphs = text.split("\n\n")
|
|
35
|
+
current = ""
|
|
36
|
+
|
|
37
|
+
for para in paragraphs:
|
|
38
|
+
if len(current) + len(para) + 2 <= chunk_size:
|
|
39
|
+
current = f"{current}\n\n{para}" if current else para
|
|
40
|
+
else:
|
|
41
|
+
if current.strip():
|
|
42
|
+
chunks.append(current.strip())
|
|
43
|
+
# If a single paragraph exceeds chunk_size, split by sentences
|
|
44
|
+
if len(para) > chunk_size:
|
|
45
|
+
words = para.split()
|
|
46
|
+
current = ""
|
|
47
|
+
for word in words:
|
|
48
|
+
if len(current) + len(word) + 1 <= chunk_size:
|
|
49
|
+
current = f"{current} {word}" if current else word
|
|
50
|
+
else:
|
|
51
|
+
if current.strip():
|
|
52
|
+
chunks.append(current.strip())
|
|
53
|
+
current = word
|
|
54
|
+
else:
|
|
55
|
+
current = para
|
|
56
|
+
|
|
57
|
+
if current.strip():
|
|
58
|
+
chunks.append(current.strip())
|
|
59
|
+
|
|
60
|
+
return chunks
|