spooling 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spooling/config.py ADDED
@@ -0,0 +1,44 @@
1
+ """Configuration for Spooling."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ # Legacy session data directory (JSONL-format sessions)
7
+ SESSIONS_DIR = Path.home() / ".sessions"
8
+ SESSIONS_PROJECTS_DIR = SESSIONS_DIR / "projects"
9
+
10
+ # Snowflake Cortex Code data directory. Sessions live in
11
+ # ~/.snowflake/cortex/conversations/<uuid>.history.jsonl with a sidecar
12
+ # <uuid>.json carrying title, working_directory, git info, and timestamps.
13
+ CORTEX_DIR = Path.home() / ".snowflake" / "cortex"
14
+ CORTEX_CONVERSATIONS_DIR = CORTEX_DIR / "conversations"
15
+
16
+ # opencode (sst/opencode) data directory. Single SQLite DB at
17
+ # ~/.local/share/opencode/opencode.db with session/message/part tables
18
+ # (Drizzle-managed). Parts carry the Vercel AI SDK UIMessage payload.
19
+ OPENCODE_DIR = Path.home() / ".local" / "share" / "opencode"
20
+ OPENCODE_DB = OPENCODE_DIR / "opencode.db"
21
+
22
+ # Database
23
+ DB_HOST = os.getenv("SPOOLING_DB_HOST", "localhost")
24
+ DB_PORT = int(os.getenv("SPOOLING_DB_PORT", "5434"))
25
+ DB_NAME = os.getenv("SPOOLING_DB_NAME", "spooling")
26
+ DB_USER = os.getenv("SPOOLING_DB_USER", "spooling")
27
+ DB_PASSWORD = os.getenv("SPOOLING_DB_PASSWORD", "spooling")
28
+
29
+ DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
30
+
31
+ # Embeddings
32
+ EMBEDDING_MODEL = os.getenv("SPOOLING_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
33
+ EMBEDDING_DIM = 384
34
+ CHUNK_SIZE = 500 # chars per chunk for embedding
35
+
36
+ # Server
37
+ UI_HOST = os.getenv("SPOOLING_UI_HOST", "127.0.0.1")
38
+ UI_PORT = int(os.getenv("SPOOLING_UI_PORT", "3001"))
39
+
40
+ # Token estimation (rough heuristic: ~4 chars per token)
41
+ CHARS_PER_TOKEN = 4
42
+
43
+ # Default model pricing per 1M tokens (input, output)
44
+ DEFAULT_PRICING = (3.0, 15.0)
spooling/db.py ADDED
@@ -0,0 +1,21 @@
1
+ """Database connection management."""
2
+
3
+ import psycopg
4
+ from psycopg.rows import dict_row
5
+
6
+ from spooling.config import DATABASE_URL
7
+
8
+
9
+ def get_connection():
10
+ """Get a database connection."""
11
+ return psycopg.connect(DATABASE_URL, row_factory=dict_row)
12
+
13
+
14
+ def check_db():
15
+ """Check if the database is reachable."""
16
+ try:
17
+ with get_connection() as conn:
18
+ conn.execute("SELECT 1")
19
+ return True
20
+ except Exception:
21
+ return False
spooling/embeddings.py ADDED
@@ -0,0 +1,60 @@
1
+ """Local embeddings via sentence-transformers."""
2
+
3
+ from functools import lru_cache
4
+
5
+ from spooling.config import EMBEDDING_MODEL, CHUNK_SIZE
6
+
7
+
8
+ @lru_cache(maxsize=1)
9
+ def _get_model():
10
+ """Lazy-load the embedding model."""
11
+ from sentence_transformers import SentenceTransformer
12
+ return SentenceTransformer(EMBEDDING_MODEL)
13
+
14
+
15
+ def embed_texts(texts: list[str]) -> list[list[float]]:
16
+ """Embed a batch of texts. Returns list of vectors."""
17
+ model = _get_model()
18
+ embeddings = model.encode(texts, show_progress_bar=False, normalize_embeddings=True)
19
+ return [e.tolist() for e in embeddings]
20
+
21
+
22
+ def embed_text(text: str) -> list[float]:
23
+ """Embed a single text."""
24
+ return embed_texts([text])[0]
25
+
26
+
27
+ def chunk_text(text: str, chunk_size: int = CHUNK_SIZE) -> list[str]:
28
+ """Split text into chunks for embedding."""
29
+ if len(text) <= chunk_size:
30
+ return [text] if text.strip() else []
31
+
32
+ chunks = []
33
+ # Split on paragraph boundaries first, then by size
34
+ paragraphs = text.split("\n\n")
35
+ current = ""
36
+
37
+ for para in paragraphs:
38
+ if len(current) + len(para) + 2 <= chunk_size:
39
+ current = f"{current}\n\n{para}" if current else para
40
+ else:
41
+ if current.strip():
42
+ chunks.append(current.strip())
43
+ # If a single paragraph exceeds chunk_size, split by sentences
44
+ if len(para) > chunk_size:
45
+ words = para.split()
46
+ current = ""
47
+ for word in words:
48
+ if len(current) + len(word) + 1 <= chunk_size:
49
+ current = f"{current} {word}" if current else word
50
+ else:
51
+ if current.strip():
52
+ chunks.append(current.strip())
53
+ current = word
54
+ else:
55
+ current = para
56
+
57
+ if current.strip():
58
+ chunks.append(current.strip())
59
+
60
+ return chunks