spooling 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spooling/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ """Spooling - local session tracker for AI coding assistants."""
2
+ __version__ = "0.1.0"
spooling/agent.py ADDED
@@ -0,0 +1,213 @@
1
+ """Chat agent that can talk to your Spooling data.
2
+
3
+ Supports Ollama (free/local) and Anthropic API (bring your own key).
4
+ Uses RAG - retrieves relevant session context from pgvector before answering.
5
+ """
6
+
7
+ import json
8
+ import os
9
+ from datetime import datetime, timezone
10
+
11
+ import httpx
12
+
13
+ from spooling.search import search as semantic_search
14
+ from spooling.stats import get_overview, get_daily_stats, get_session_detail
15
+ from spooling.db import get_connection
16
+
17
+ SYSTEM_PROMPT = """You are Spooling Assistant, an AI that helps users understand their coding session history.
18
+ You have access to the user's session data from AI coding tools (Codex, Cursor, etc.).
19
+
20
+ When answering questions:
21
+ - Be concise and specific. Reference actual session data, projects, and timestamps.
22
+ - Every session has a UUID session ID (e.g. 8cb6f9d2-0214-4a4c-b731-d6d9c7914836). Always include the full session ID when referencing sessions.
23
+ - If you don't have enough context, say so rather than guessing.
24
+ - Format costs as dollars, tokens with commas, and dates in a readable format.
25
+ - When listing sessions or results, keep it scannable - use short descriptions.
26
+
27
+ You're given relevant context from the user's session history below. Use it to answer their question."""
28
+
29
+
30
+ def _build_context(query: str) -> str:
31
+ """Build RAG context from session data relevant to the query."""
32
+ parts = []
33
+
34
+ # 1. Semantic search for relevant session chunks
35
+ results = semantic_search(query, limit=6)
36
+ if results:
37
+ parts.append("## Relevant Session Context")
38
+ for r in results:
39
+ ts = r["timestamp"][:16] if r["timestamp"] else "unknown"
40
+ parts.append(
41
+ f"- [session_id: {r['session_id']}] [{r['role']}] ({r['project']} | {ts} | {r['similarity']:.0%} match): "
42
+ f"{r['content'][:300]}"
43
+ )
44
+
45
+ # 2. Overview stats
46
+ overview = get_overview()
47
+ s = overview["summary"]
48
+ total_tokens = (s.get("total_input_tokens", 0) or 0) + (s.get("total_output_tokens", 0) or 0)
49
+ parts.append("\n## Usage Summary")
50
+ parts.append(
51
+ f"- Total sessions: {s.get('total_sessions', 0)}\n"
52
+ f"- Total messages: {s.get('total_messages', 0)}\n"
53
+ f"- Total tool calls: {s.get('total_tool_calls', 0)}\n"
54
+ f"- Est. tokens: {total_tokens:,}\n"
55
+ f"- Est. cost: ${float(s.get('total_cost_usd', 0)):.2f}"
56
+ )
57
+
58
+ # 3. Projects
59
+ if overview["projects"]:
60
+ parts.append("\n## Projects")
61
+ for p in overview["projects"][:8]:
62
+ parts.append(f"- {p['project']}: {p['sessions']} sessions, {p['messages']} msgs, ${float(p['cost'] or 0):.2f}")
63
+
64
+ # 4. Recent sessions (with session IDs)
65
+ if overview["recent_sessions"]:
66
+ parts.append("\n## Recent Sessions")
67
+ for r in overview["recent_sessions"][:6]:
68
+ ts = r["started_at"].strftime("%m/%d %H:%M") if r["started_at"] else ""
69
+ parts.append(f"- [session_id: {r['id']}] [{ts}] {r['project']}: {r['title'][:60]} ({r['message_count']} msgs)")
70
+
71
+ # 5. Daily stats (last 7 days)
72
+ daily = get_daily_stats(days=7)
73
+ if daily:
74
+ parts.append("\n## Last 7 Days")
75
+ for d in daily:
76
+ parts.append(f"- {d['day']}: {d['sessions']} sessions, {d['messages']} msgs, ${float(d['cost']):.2f}")
77
+
78
+ return "\n".join(parts)
79
+
80
+
81
+ def _get_config() -> dict:
82
+ """Load agent config from the database."""
83
+ try:
84
+ conn = get_connection()
85
+ row = conn.execute(
86
+ "SELECT config FROM providers WHERE id = 'spooling-agent'"
87
+ ).fetchone()
88
+ conn.close()
89
+ if row and row["config"]:
90
+ return row["config"] if isinstance(row["config"], dict) else json.loads(row["config"])
91
+ except Exception:
92
+ pass
93
+ return {}
94
+
95
+
96
+ def _get_provider() -> str:
97
+ """Determine which LLM provider to use."""
98
+ config = _get_config()
99
+ provider = config.get("provider", "")
100
+ if provider:
101
+ return provider
102
+ # Check env vars
103
+ if os.getenv("ANTHROPIC_API_KEY"):
104
+ return "anthropic"
105
+ return "ollama"
106
+
107
+
108
+ async def chat(messages: list[dict], provider: str | None = None) -> dict:
109
+ """Send messages to the configured LLM.
110
+
111
+ Returns a dict with the assistant `response` and the retrieved `sources`
112
+ (the RAG chunks fed into the prompt), so the UI can show what the agent
113
+ actually had in context when answering.
114
+ """
115
+ config = _get_config()
116
+ prov = provider or _get_provider()
117
+
118
+ user_msg = ""
119
+ for m in reversed(messages):
120
+ if m["role"] == "user":
121
+ user_msg = m["content"]
122
+ break
123
+
124
+ sources = semantic_search(user_msg, limit=6) if user_msg else []
125
+ context = _build_context(user_msg) if user_msg else ""
126
+
127
+ system = SYSTEM_PROMPT
128
+ if context:
129
+ system += f"\n\n---\n\n{context}"
130
+
131
+ if prov == "anthropic":
132
+ response = await _chat_anthropic(system, messages, config)
133
+ else:
134
+ response = await _chat_ollama(system, messages, config)
135
+
136
+ return {
137
+ "response": response,
138
+ "sources": [
139
+ {
140
+ "session_id": s["session_id"],
141
+ "project": s.get("project"),
142
+ "role": s.get("role"),
143
+ "timestamp": s.get("timestamp"),
144
+ "similarity": s.get("similarity"),
145
+ "title": s.get("title"),
146
+ "excerpt": (s.get("content") or "")[:200],
147
+ }
148
+ for s in sources
149
+ ],
150
+ }
151
+
152
+
153
+ async def _chat_anthropic(system: str, messages: list[dict], config: dict) -> str:
154
+ """Chat using Anthropic API."""
155
+ import anthropic
156
+
157
+ api_key = config.get("anthropic_api_key") or os.getenv("ANTHROPIC_API_KEY")
158
+ if not api_key:
159
+ return "No Anthropic API key configured. Add one in Settings or set ANTHROPIC_API_KEY env var."
160
+
161
+ model = config.get("model", "gemma3:4b")
162
+
163
+ client = anthropic.Anthropic(api_key=api_key)
164
+ response = client.messages.create(
165
+ model=model,
166
+ max_tokens=1024,
167
+ system=system,
168
+ messages=[{"role": m["role"], "content": m["content"]} for m in messages],
169
+ )
170
+
171
+ return response.content[0].text
172
+
173
+
174
+ async def _chat_ollama(system: str, messages: list[dict], config: dict) -> str:
175
+ """Chat using local Ollama."""
176
+ base_url = config.get("ollama_url", "http://localhost:11434")
177
+ model = config.get("model", "gemma3:4b")
178
+
179
+ ollama_messages = [{"role": "system", "content": system}]
180
+ for m in messages:
181
+ ollama_messages.append({"role": m["role"], "content": m["content"]})
182
+
183
+ async with httpx.AsyncClient(timeout=180) as client:
184
+ try:
185
+ resp = await client.post(
186
+ f"{base_url}/api/chat",
187
+ json={
188
+ "model": model,
189
+ "messages": ollama_messages,
190
+ "stream": False,
191
+ "options": {
192
+ "num_ctx": 8192,
193
+ "num_predict": 1024,
194
+ "temperature": 0.3,
195
+ },
196
+ },
197
+ )
198
+ resp.raise_for_status()
199
+ data = resp.json()
200
+ return data.get("message", {}).get("content", "No response from Ollama.")
201
+ except httpx.ConnectError:
202
+ return (
203
+ f"Cannot connect to Ollama at {base_url}. "
204
+ "Make sure Ollama is running (`ollama serve`) and you've pulled a model (`ollama pull gemma3:4b`).\n\n"
205
+ "Or switch to Anthropic in Settings with your API key."
206
+ )
207
+ except httpx.HTTPStatusError as e:
208
+ if e.response.status_code == 404:
209
+ return (
210
+ f"Model '{model}' not found in Ollama. "
211
+ f"Pull it with: `ollama pull {model}`"
212
+ )
213
+ return f"Ollama error: {e.response.text}"
@@ -0,0 +1,147 @@
1
+ """Classify tool spans into vendor + category.
2
+
3
+ `vendor` = which service/product the tool talks to (linear, github, slack,
4
+ anthropic, filesystem, shell...). `category` = what kind of thing the tool
5
+ does (issue-tracker, vcs, chat, docs, web, filesystem, shell, planning,
6
+ llm, search, exec).
7
+
8
+ Classification is prefix-based, biased toward MCP tool-name conventions
9
+ (`mcp__<server>__<action>`). Unknown
10
+ names fall through to ("unknown", "other"). Callers can add custom entries
11
+ via `register_classifier` at import time.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from dataclasses import dataclass
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class ToolClass:
21
+ vendor: str
22
+ category: str
23
+
24
+
25
+ _MCP_VENDORS: dict[str, ToolClass] = {
26
+ "linear": ToolClass("linear", "issue-tracker"),
27
+ "github": ToolClass("github", "vcs"),
28
+ "gitlab": ToolClass("gitlab", "vcs"),
29
+ "bitbucket": ToolClass("bitbucket", "vcs"),
30
+ "slack": ToolClass("slack", "chat"),
31
+ "discord": ToolClass("discord", "chat"),
32
+ "teams": ToolClass("teams", "chat"),
33
+ "notion": ToolClass("notion", "docs"),
34
+ "confluence": ToolClass("confluence", "docs"),
35
+ "jira": ToolClass("jira", "issue-tracker"),
36
+ "atlassian": ToolClass("atlassian", "issue-tracker"),
37
+ "stripe": ToolClass("stripe", "payments"),
38
+ "paypal": ToolClass("paypal", "payments"),
39
+ "vercel": ToolClass("vercel", "hosting"),
40
+ "netlify": ToolClass("netlify", "hosting"),
41
+ "cloudflare": ToolClass("cloudflare", "hosting"),
42
+ "aws": ToolClass("aws", "cloud"),
43
+ "gcp": ToolClass("gcp", "cloud"),
44
+ "azure": ToolClass("azure", "cloud"),
45
+ "snowflake": ToolClass("snowflake", "database"),
46
+ "bigquery": ToolClass("bigquery", "database"),
47
+ "postgres": ToolClass("postgres", "database"),
48
+ "mysql": ToolClass("mysql", "database"),
49
+ "mongodb": ToolClass("mongodb", "database"),
50
+ "redis": ToolClass("redis", "database"),
51
+ "supabase": ToolClass("supabase", "database"),
52
+ "neon": ToolClass("neon", "database"),
53
+ "sentry": ToolClass("sentry", "observability"),
54
+ "datadog": ToolClass("datadog", "observability"),
55
+ "grafana": ToolClass("grafana", "observability"),
56
+ "honeycomb": ToolClass("honeycomb", "observability"),
57
+ "openai": ToolClass("openai", "llm"),
58
+ "anthropic": ToolClass("anthropic", "llm"),
59
+ "gmail": ToolClass("gmail", "email"),
60
+ "calendar": ToolClass("calendar", "calendar"),
61
+ "drive": ToolClass("drive", "storage"),
62
+ "dropbox": ToolClass("dropbox", "storage"),
63
+ "figma": ToolClass("figma", "design"),
64
+ "shopify": ToolClass("shopify", "commerce"),
65
+ }
66
+
67
+ # Built-in tools → local-execution vendors.
68
+ _BUILTIN: dict[str, ToolClass] = {
69
+ # Filesystem
70
+ "Read": ToolClass("filesystem", "filesystem"),
71
+ "Write": ToolClass("filesystem", "filesystem"),
72
+ "Edit": ToolClass("filesystem", "filesystem"),
73
+ "NotebookEdit": ToolClass("filesystem", "filesystem"),
74
+ "Glob": ToolClass("filesystem", "filesystem"),
75
+ # Shell
76
+ "Bash": ToolClass("shell", "shell"),
77
+ "exec": ToolClass("shell", "shell"),
78
+ # Search
79
+ "Grep": ToolClass("search", "search"),
80
+ "ToolSearch": ToolClass("search", "search"),
81
+ # Web
82
+ "WebFetch": ToolClass("web", "web"),
83
+ "WebSearch": ToolClass("web", "web"),
84
+ # Agents / planning
85
+ "Task": ToolClass("agent", "agent"),
86
+ "Agent": ToolClass("agent", "agent"),
87
+ "TaskCreate": ToolClass("planning", "planning"),
88
+ "TaskUpdate": ToolClass("planning", "planning"),
89
+ "TaskList": ToolClass("planning", "planning"),
90
+ "TaskGet": ToolClass("planning", "planning"),
91
+ "TaskStop": ToolClass("planning", "planning"),
92
+ "TaskOutput": ToolClass("planning", "planning"),
93
+ # Process / monitor
94
+ "Monitor": ToolClass("shell", "shell"),
95
+ # Git / GitHub via gh CLI typically appears as Bash(gh ...); leave alone.
96
+ # Notebook
97
+ "Jupyter": ToolClass("filesystem", "filesystem"),
98
+ }
99
+
100
+ UNKNOWN = ToolClass("unknown", "other")
101
+
102
+
103
+ def register_classifier(tool_name: str, vendor: str, category: str) -> None:
104
+ """Register or override classification for a specific tool name."""
105
+ _BUILTIN[tool_name] = ToolClass(vendor, category)
106
+
107
+
108
+ def register_mcp_vendor(prefix: str, vendor: str, category: str) -> None:
109
+ """Register or override classification for an MCP vendor prefix."""
110
+ _MCP_VENDORS[prefix] = ToolClass(vendor, category)
111
+
112
+
113
+ def classify(tool_name: str | None) -> ToolClass:
114
+ """Return (vendor, category) for a tool_name. Never raises."""
115
+ if not tool_name:
116
+ return UNKNOWN
117
+
118
+ # MCP: mcp__<vendor>__<action> or mcp_<vendor>_<action>
119
+ low = tool_name.lower()
120
+ if low.startswith("mcp__") or low.startswith("mcp_"):
121
+ rest = low.split("mcp__", 1)[-1] if "mcp__" in low else low.split("mcp_", 1)[-1]
122
+ # Next segment up to next __ or _ is the server name, which we match
123
+ # against our vendor table as a substring (so "linear_server" → linear).
124
+ head = rest.split("__", 1)[0].split("_", 1)[0]
125
+ for prefix, cls in _MCP_VENDORS.items():
126
+ if head.startswith(prefix) or prefix in head:
127
+ return cls
128
+ return ToolClass(head or "mcp", "mcp")
129
+
130
+ # Exact match on builtins
131
+ cls = _BUILTIN.get(tool_name)
132
+ if cls is not None:
133
+ return cls
134
+
135
+ # Case-insensitive builtin fallback
136
+ for k, v in _BUILTIN.items():
137
+ if k.lower() == low:
138
+ return v
139
+
140
+ # Heuristic: plain vendor prefix like "linear.create_issue" or "slack-send"
141
+ for sep in (".", "-", ":", "_"):
142
+ if sep in tool_name:
143
+ head = tool_name.split(sep, 1)[0].lower()
144
+ if head in _MCP_VENDORS:
145
+ return _MCP_VENDORS[head]
146
+
147
+ return UNKNOWN