spooling 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spooling/__init__.py +2 -0
- spooling/agent.py +213 -0
- spooling/classifiers.py +147 -0
- spooling/cli.py +522 -0
- spooling/cloud.py +768 -0
- spooling/config.py +44 -0
- spooling/db.py +21 -0
- spooling/embeddings.py +60 -0
- spooling/evals.py +611 -0
- spooling/experiments.py +407 -0
- spooling/ingest.py +496 -0
- spooling/mcp_server.py +312 -0
- spooling/parser.py +614 -0
- spooling/pricing.py +307 -0
- spooling/providers/__init__.py +46 -0
- spooling/providers/antigravity.py +312 -0
- spooling/providers/base.py +166 -0
- spooling/providers/codex.py +230 -0
- spooling/providers/copilot.py +294 -0
- spooling/providers/cortex_code.py +234 -0
- spooling/providers/cursor.py +307 -0
- spooling/providers/gemini.py +476 -0
- spooling/providers/github.py +241 -0
- spooling/providers/gitlab.py +186 -0
- spooling/providers/kiro.py +240 -0
- spooling/providers/opencode.py +282 -0
- spooling/providers/session_file.py +36 -0
- spooling/providers/windsurf.py +355 -0
- spooling/redact.py +284 -0
- spooling/remote_otel.py +257 -0
- spooling/sdk.py +364 -0
- spooling/search.py +68 -0
- spooling/server.py +1291 -0
- spooling/stats.py +180 -0
- spooling/subscription_pricing.py +131 -0
- spooling/tracing.py +451 -0
- spooling/watcher.py +125 -0
- spooling-0.1.1.dist-info/METADATA +28 -0
- spooling-0.1.1.dist-info/RECORD +43 -0
- spooling-0.1.1.dist-info/WHEEL +5 -0
- spooling-0.1.1.dist-info/entry_points.txt +2 -0
- spooling-0.1.1.dist-info/licenses/LICENSE +21 -0
- spooling-0.1.1.dist-info/top_level.txt +1 -0
spooling/__init__.py
ADDED
spooling/agent.py
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""Chat agent that can talk to your Spooling data.
|
|
2
|
+
|
|
3
|
+
Supports Ollama (free/local) and Anthropic API (bring your own key).
|
|
4
|
+
Uses RAG - retrieves relevant session context from pgvector before answering.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
from spooling.search import search as semantic_search
|
|
14
|
+
from spooling.stats import get_overview, get_daily_stats, get_session_detail
|
|
15
|
+
from spooling.db import get_connection
|
|
16
|
+
|
|
17
|
+
SYSTEM_PROMPT = """You are Spooling Assistant, an AI that helps users understand their coding session history.
|
|
18
|
+
You have access to the user's session data from AI coding tools (Codex, Cursor, etc.).
|
|
19
|
+
|
|
20
|
+
When answering questions:
|
|
21
|
+
- Be concise and specific. Reference actual session data, projects, and timestamps.
|
|
22
|
+
- Every session has a UUID session ID (e.g. 8cb6f9d2-0214-4a4c-b731-d6d9c7914836). Always include the full session ID when referencing sessions.
|
|
23
|
+
- If you don't have enough context, say so rather than guessing.
|
|
24
|
+
- Format costs as dollars, tokens with commas, and dates in a readable format.
|
|
25
|
+
- When listing sessions or results, keep it scannable - use short descriptions.
|
|
26
|
+
|
|
27
|
+
You're given relevant context from the user's session history below. Use it to answer their question."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _build_context(query: str) -> str:
|
|
31
|
+
"""Build RAG context from session data relevant to the query."""
|
|
32
|
+
parts = []
|
|
33
|
+
|
|
34
|
+
# 1. Semantic search for relevant session chunks
|
|
35
|
+
results = semantic_search(query, limit=6)
|
|
36
|
+
if results:
|
|
37
|
+
parts.append("## Relevant Session Context")
|
|
38
|
+
for r in results:
|
|
39
|
+
ts = r["timestamp"][:16] if r["timestamp"] else "unknown"
|
|
40
|
+
parts.append(
|
|
41
|
+
f"- [session_id: {r['session_id']}] [{r['role']}] ({r['project']} | {ts} | {r['similarity']:.0%} match): "
|
|
42
|
+
f"{r['content'][:300]}"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# 2. Overview stats
|
|
46
|
+
overview = get_overview()
|
|
47
|
+
s = overview["summary"]
|
|
48
|
+
total_tokens = (s.get("total_input_tokens", 0) or 0) + (s.get("total_output_tokens", 0) or 0)
|
|
49
|
+
parts.append("\n## Usage Summary")
|
|
50
|
+
parts.append(
|
|
51
|
+
f"- Total sessions: {s.get('total_sessions', 0)}\n"
|
|
52
|
+
f"- Total messages: {s.get('total_messages', 0)}\n"
|
|
53
|
+
f"- Total tool calls: {s.get('total_tool_calls', 0)}\n"
|
|
54
|
+
f"- Est. tokens: {total_tokens:,}\n"
|
|
55
|
+
f"- Est. cost: ${float(s.get('total_cost_usd', 0)):.2f}"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# 3. Projects
|
|
59
|
+
if overview["projects"]:
|
|
60
|
+
parts.append("\n## Projects")
|
|
61
|
+
for p in overview["projects"][:8]:
|
|
62
|
+
parts.append(f"- {p['project']}: {p['sessions']} sessions, {p['messages']} msgs, ${float(p['cost'] or 0):.2f}")
|
|
63
|
+
|
|
64
|
+
# 4. Recent sessions (with session IDs)
|
|
65
|
+
if overview["recent_sessions"]:
|
|
66
|
+
parts.append("\n## Recent Sessions")
|
|
67
|
+
for r in overview["recent_sessions"][:6]:
|
|
68
|
+
ts = r["started_at"].strftime("%m/%d %H:%M") if r["started_at"] else ""
|
|
69
|
+
parts.append(f"- [session_id: {r['id']}] [{ts}] {r['project']}: {r['title'][:60]} ({r['message_count']} msgs)")
|
|
70
|
+
|
|
71
|
+
# 5. Daily stats (last 7 days)
|
|
72
|
+
daily = get_daily_stats(days=7)
|
|
73
|
+
if daily:
|
|
74
|
+
parts.append("\n## Last 7 Days")
|
|
75
|
+
for d in daily:
|
|
76
|
+
parts.append(f"- {d['day']}: {d['sessions']} sessions, {d['messages']} msgs, ${float(d['cost']):.2f}")
|
|
77
|
+
|
|
78
|
+
return "\n".join(parts)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _get_config() -> dict:
|
|
82
|
+
"""Load agent config from the database."""
|
|
83
|
+
try:
|
|
84
|
+
conn = get_connection()
|
|
85
|
+
row = conn.execute(
|
|
86
|
+
"SELECT config FROM providers WHERE id = 'spooling-agent'"
|
|
87
|
+
).fetchone()
|
|
88
|
+
conn.close()
|
|
89
|
+
if row and row["config"]:
|
|
90
|
+
return row["config"] if isinstance(row["config"], dict) else json.loads(row["config"])
|
|
91
|
+
except Exception:
|
|
92
|
+
pass
|
|
93
|
+
return {}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _get_provider() -> str:
|
|
97
|
+
"""Determine which LLM provider to use."""
|
|
98
|
+
config = _get_config()
|
|
99
|
+
provider = config.get("provider", "")
|
|
100
|
+
if provider:
|
|
101
|
+
return provider
|
|
102
|
+
# Check env vars
|
|
103
|
+
if os.getenv("ANTHROPIC_API_KEY"):
|
|
104
|
+
return "anthropic"
|
|
105
|
+
return "ollama"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
async def chat(messages: list[dict], provider: str | None = None) -> dict:
|
|
109
|
+
"""Send messages to the configured LLM.
|
|
110
|
+
|
|
111
|
+
Returns a dict with the assistant `response` and the retrieved `sources`
|
|
112
|
+
(the RAG chunks fed into the prompt), so the UI can show what the agent
|
|
113
|
+
actually had in context when answering.
|
|
114
|
+
"""
|
|
115
|
+
config = _get_config()
|
|
116
|
+
prov = provider or _get_provider()
|
|
117
|
+
|
|
118
|
+
user_msg = ""
|
|
119
|
+
for m in reversed(messages):
|
|
120
|
+
if m["role"] == "user":
|
|
121
|
+
user_msg = m["content"]
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
sources = semantic_search(user_msg, limit=6) if user_msg else []
|
|
125
|
+
context = _build_context(user_msg) if user_msg else ""
|
|
126
|
+
|
|
127
|
+
system = SYSTEM_PROMPT
|
|
128
|
+
if context:
|
|
129
|
+
system += f"\n\n---\n\n{context}"
|
|
130
|
+
|
|
131
|
+
if prov == "anthropic":
|
|
132
|
+
response = await _chat_anthropic(system, messages, config)
|
|
133
|
+
else:
|
|
134
|
+
response = await _chat_ollama(system, messages, config)
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
"response": response,
|
|
138
|
+
"sources": [
|
|
139
|
+
{
|
|
140
|
+
"session_id": s["session_id"],
|
|
141
|
+
"project": s.get("project"),
|
|
142
|
+
"role": s.get("role"),
|
|
143
|
+
"timestamp": s.get("timestamp"),
|
|
144
|
+
"similarity": s.get("similarity"),
|
|
145
|
+
"title": s.get("title"),
|
|
146
|
+
"excerpt": (s.get("content") or "")[:200],
|
|
147
|
+
}
|
|
148
|
+
for s in sources
|
|
149
|
+
],
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
async def _chat_anthropic(system: str, messages: list[dict], config: dict) -> str:
|
|
154
|
+
"""Chat using Anthropic API."""
|
|
155
|
+
import anthropic
|
|
156
|
+
|
|
157
|
+
api_key = config.get("anthropic_api_key") or os.getenv("ANTHROPIC_API_KEY")
|
|
158
|
+
if not api_key:
|
|
159
|
+
return "No Anthropic API key configured. Add one in Settings or set ANTHROPIC_API_KEY env var."
|
|
160
|
+
|
|
161
|
+
model = config.get("model", "gemma3:4b")
|
|
162
|
+
|
|
163
|
+
client = anthropic.Anthropic(api_key=api_key)
|
|
164
|
+
response = client.messages.create(
|
|
165
|
+
model=model,
|
|
166
|
+
max_tokens=1024,
|
|
167
|
+
system=system,
|
|
168
|
+
messages=[{"role": m["role"], "content": m["content"]} for m in messages],
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
return response.content[0].text
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
async def _chat_ollama(system: str, messages: list[dict], config: dict) -> str:
|
|
175
|
+
"""Chat using local Ollama."""
|
|
176
|
+
base_url = config.get("ollama_url", "http://localhost:11434")
|
|
177
|
+
model = config.get("model", "gemma3:4b")
|
|
178
|
+
|
|
179
|
+
ollama_messages = [{"role": "system", "content": system}]
|
|
180
|
+
for m in messages:
|
|
181
|
+
ollama_messages.append({"role": m["role"], "content": m["content"]})
|
|
182
|
+
|
|
183
|
+
async with httpx.AsyncClient(timeout=180) as client:
|
|
184
|
+
try:
|
|
185
|
+
resp = await client.post(
|
|
186
|
+
f"{base_url}/api/chat",
|
|
187
|
+
json={
|
|
188
|
+
"model": model,
|
|
189
|
+
"messages": ollama_messages,
|
|
190
|
+
"stream": False,
|
|
191
|
+
"options": {
|
|
192
|
+
"num_ctx": 8192,
|
|
193
|
+
"num_predict": 1024,
|
|
194
|
+
"temperature": 0.3,
|
|
195
|
+
},
|
|
196
|
+
},
|
|
197
|
+
)
|
|
198
|
+
resp.raise_for_status()
|
|
199
|
+
data = resp.json()
|
|
200
|
+
return data.get("message", {}).get("content", "No response from Ollama.")
|
|
201
|
+
except httpx.ConnectError:
|
|
202
|
+
return (
|
|
203
|
+
f"Cannot connect to Ollama at {base_url}. "
|
|
204
|
+
"Make sure Ollama is running (`ollama serve`) and you've pulled a model (`ollama pull gemma3:4b`).\n\n"
|
|
205
|
+
"Or switch to Anthropic in Settings with your API key."
|
|
206
|
+
)
|
|
207
|
+
except httpx.HTTPStatusError as e:
|
|
208
|
+
if e.response.status_code == 404:
|
|
209
|
+
return (
|
|
210
|
+
f"Model '{model}' not found in Ollama. "
|
|
211
|
+
f"Pull it with: `ollama pull {model}`"
|
|
212
|
+
)
|
|
213
|
+
return f"Ollama error: {e.response.text}"
|
spooling/classifiers.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Classify tool spans into vendor + category.
|
|
2
|
+
|
|
3
|
+
`vendor` = which service/product the tool talks to (linear, github, slack,
|
|
4
|
+
anthropic, filesystem, shell...). `category` = what kind of thing the tool
|
|
5
|
+
does (issue-tracker, vcs, chat, docs, web, filesystem, shell, planning,
|
|
6
|
+
llm, search, exec).
|
|
7
|
+
|
|
8
|
+
Classification is prefix-based, biased toward MCP tool-name conventions
|
|
9
|
+
(`mcp__<server>__<action>`). Unknown
|
|
10
|
+
names fall through to ("unknown", "other"). Callers can add custom entries
|
|
11
|
+
via `register_classifier` at import time.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class ToolClass:
|
|
21
|
+
vendor: str
|
|
22
|
+
category: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_MCP_VENDORS: dict[str, ToolClass] = {
|
|
26
|
+
"linear": ToolClass("linear", "issue-tracker"),
|
|
27
|
+
"github": ToolClass("github", "vcs"),
|
|
28
|
+
"gitlab": ToolClass("gitlab", "vcs"),
|
|
29
|
+
"bitbucket": ToolClass("bitbucket", "vcs"),
|
|
30
|
+
"slack": ToolClass("slack", "chat"),
|
|
31
|
+
"discord": ToolClass("discord", "chat"),
|
|
32
|
+
"teams": ToolClass("teams", "chat"),
|
|
33
|
+
"notion": ToolClass("notion", "docs"),
|
|
34
|
+
"confluence": ToolClass("confluence", "docs"),
|
|
35
|
+
"jira": ToolClass("jira", "issue-tracker"),
|
|
36
|
+
"atlassian": ToolClass("atlassian", "issue-tracker"),
|
|
37
|
+
"stripe": ToolClass("stripe", "payments"),
|
|
38
|
+
"paypal": ToolClass("paypal", "payments"),
|
|
39
|
+
"vercel": ToolClass("vercel", "hosting"),
|
|
40
|
+
"netlify": ToolClass("netlify", "hosting"),
|
|
41
|
+
"cloudflare": ToolClass("cloudflare", "hosting"),
|
|
42
|
+
"aws": ToolClass("aws", "cloud"),
|
|
43
|
+
"gcp": ToolClass("gcp", "cloud"),
|
|
44
|
+
"azure": ToolClass("azure", "cloud"),
|
|
45
|
+
"snowflake": ToolClass("snowflake", "database"),
|
|
46
|
+
"bigquery": ToolClass("bigquery", "database"),
|
|
47
|
+
"postgres": ToolClass("postgres", "database"),
|
|
48
|
+
"mysql": ToolClass("mysql", "database"),
|
|
49
|
+
"mongodb": ToolClass("mongodb", "database"),
|
|
50
|
+
"redis": ToolClass("redis", "database"),
|
|
51
|
+
"supabase": ToolClass("supabase", "database"),
|
|
52
|
+
"neon": ToolClass("neon", "database"),
|
|
53
|
+
"sentry": ToolClass("sentry", "observability"),
|
|
54
|
+
"datadog": ToolClass("datadog", "observability"),
|
|
55
|
+
"grafana": ToolClass("grafana", "observability"),
|
|
56
|
+
"honeycomb": ToolClass("honeycomb", "observability"),
|
|
57
|
+
"openai": ToolClass("openai", "llm"),
|
|
58
|
+
"anthropic": ToolClass("anthropic", "llm"),
|
|
59
|
+
"gmail": ToolClass("gmail", "email"),
|
|
60
|
+
"calendar": ToolClass("calendar", "calendar"),
|
|
61
|
+
"drive": ToolClass("drive", "storage"),
|
|
62
|
+
"dropbox": ToolClass("dropbox", "storage"),
|
|
63
|
+
"figma": ToolClass("figma", "design"),
|
|
64
|
+
"shopify": ToolClass("shopify", "commerce"),
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Built-in tools → local-execution vendors.
|
|
68
|
+
_BUILTIN: dict[str, ToolClass] = {
|
|
69
|
+
# Filesystem
|
|
70
|
+
"Read": ToolClass("filesystem", "filesystem"),
|
|
71
|
+
"Write": ToolClass("filesystem", "filesystem"),
|
|
72
|
+
"Edit": ToolClass("filesystem", "filesystem"),
|
|
73
|
+
"NotebookEdit": ToolClass("filesystem", "filesystem"),
|
|
74
|
+
"Glob": ToolClass("filesystem", "filesystem"),
|
|
75
|
+
# Shell
|
|
76
|
+
"Bash": ToolClass("shell", "shell"),
|
|
77
|
+
"exec": ToolClass("shell", "shell"),
|
|
78
|
+
# Search
|
|
79
|
+
"Grep": ToolClass("search", "search"),
|
|
80
|
+
"ToolSearch": ToolClass("search", "search"),
|
|
81
|
+
# Web
|
|
82
|
+
"WebFetch": ToolClass("web", "web"),
|
|
83
|
+
"WebSearch": ToolClass("web", "web"),
|
|
84
|
+
# Agents / planning
|
|
85
|
+
"Task": ToolClass("agent", "agent"),
|
|
86
|
+
"Agent": ToolClass("agent", "agent"),
|
|
87
|
+
"TaskCreate": ToolClass("planning", "planning"),
|
|
88
|
+
"TaskUpdate": ToolClass("planning", "planning"),
|
|
89
|
+
"TaskList": ToolClass("planning", "planning"),
|
|
90
|
+
"TaskGet": ToolClass("planning", "planning"),
|
|
91
|
+
"TaskStop": ToolClass("planning", "planning"),
|
|
92
|
+
"TaskOutput": ToolClass("planning", "planning"),
|
|
93
|
+
# Process / monitor
|
|
94
|
+
"Monitor": ToolClass("shell", "shell"),
|
|
95
|
+
# Git / GitHub via gh CLI typically appears as Bash(gh ...); leave alone.
|
|
96
|
+
# Notebook
|
|
97
|
+
"Jupyter": ToolClass("filesystem", "filesystem"),
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
UNKNOWN = ToolClass("unknown", "other")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def register_classifier(tool_name: str, vendor: str, category: str) -> None:
|
|
104
|
+
"""Register or override classification for a specific tool name."""
|
|
105
|
+
_BUILTIN[tool_name] = ToolClass(vendor, category)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def register_mcp_vendor(prefix: str, vendor: str, category: str) -> None:
|
|
109
|
+
"""Register or override classification for an MCP vendor prefix."""
|
|
110
|
+
_MCP_VENDORS[prefix] = ToolClass(vendor, category)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def classify(tool_name: str | None) -> ToolClass:
|
|
114
|
+
"""Return (vendor, category) for a tool_name. Never raises."""
|
|
115
|
+
if not tool_name:
|
|
116
|
+
return UNKNOWN
|
|
117
|
+
|
|
118
|
+
# MCP: mcp__<vendor>__<action> or mcp_<vendor>_<action>
|
|
119
|
+
low = tool_name.lower()
|
|
120
|
+
if low.startswith("mcp__") or low.startswith("mcp_"):
|
|
121
|
+
rest = low.split("mcp__", 1)[-1] if "mcp__" in low else low.split("mcp_", 1)[-1]
|
|
122
|
+
# Next segment up to next __ or _ is the server name, which we match
|
|
123
|
+
# against our vendor table as a substring (so "linear_server" → linear).
|
|
124
|
+
head = rest.split("__", 1)[0].split("_", 1)[0]
|
|
125
|
+
for prefix, cls in _MCP_VENDORS.items():
|
|
126
|
+
if head.startswith(prefix) or prefix in head:
|
|
127
|
+
return cls
|
|
128
|
+
return ToolClass(head or "mcp", "mcp")
|
|
129
|
+
|
|
130
|
+
# Exact match on builtins
|
|
131
|
+
cls = _BUILTIN.get(tool_name)
|
|
132
|
+
if cls is not None:
|
|
133
|
+
return cls
|
|
134
|
+
|
|
135
|
+
# Case-insensitive builtin fallback
|
|
136
|
+
for k, v in _BUILTIN.items():
|
|
137
|
+
if k.lower() == low:
|
|
138
|
+
return v
|
|
139
|
+
|
|
140
|
+
# Heuristic: plain vendor prefix like "linear.create_issue" or "slack-send"
|
|
141
|
+
for sep in (".", "-", ":", "_"):
|
|
142
|
+
if sep in tool_name:
|
|
143
|
+
head = tool_name.split(sep, 1)[0].lower()
|
|
144
|
+
if head in _MCP_VENDORS:
|
|
145
|
+
return _MCP_VENDORS[head]
|
|
146
|
+
|
|
147
|
+
return UNKNOWN
|