timelog-extract 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- collectors/__init__.py +1 -0
- collectors/ai_logs.py +149 -0
- collectors/chrome.py +219 -0
- collectors/cursor.py +130 -0
- collectors/github.py +169 -0
- collectors/mail.py +109 -0
- collectors/timelog.py +135 -0
- core/__init__.py +1 -0
- core/analytics.py +59 -0
- core/cli.py +38 -0
- core/cli_app.py +13 -0
- core/cli_doctor_sources_projects.py +407 -0
- core/cli_global_timelog_setup.py +53 -0
- core/cli_options.py +85 -0
- core/cli_prompts.py +68 -0
- core/cli_report_status.py +236 -0
- core/collector_registry.py +89 -0
- core/config.py +105 -0
- core/domain.py +65 -0
- core/engine_api.py +81 -0
- core/events.py +52 -0
- core/global_timelog_setup_lib.py +477 -0
- core/pipeline.py +129 -0
- core/report_aggregate.py +67 -0
- core/report_cli.py +157 -0
- core/report_runtime.py +226 -0
- core/report_service.py +367 -0
- core/runtime_collectors.py +177 -0
- core/screen_time.py +79 -0
- core/sources.py +31 -0
- core/truth_payload.py +171 -0
- outputs/__init__.py +1 -0
- outputs/gittan_banner.py +31 -0
- outputs/html_timeline.py +174 -0
- outputs/narrative.py +124 -0
- outputs/pdf.py +244 -0
- outputs/terminal.py +304 -0
- outputs/terminal_theme.py +23 -0
- scripts/__init__.py +1 -0
- scripts/check_file_lengths.py +61 -0
- scripts/eval_accuracy.py +150 -0
- scripts/friend_trial.py +63 -0
- scripts/manual_matrix_automation.py +229 -0
- scripts/run_engine_report.py +79 -0
- scripts/run_golden_eval.py +197 -0
- timelog_extract-0.2.3.dist-info/METADATA +855 -0
- timelog_extract-0.2.3.dist-info/RECORD +52 -0
- timelog_extract-0.2.3.dist-info/WHEEL +5 -0
- timelog_extract-0.2.3.dist-info/entry_points.txt +4 -0
- timelog_extract-0.2.3.dist-info/licenses/LICENSE +676 -0
- timelog_extract-0.2.3.dist-info/top_level.txt +5 -0
- timelog_extract.py +24 -0
collectors/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Collectors package for source-specific event extraction."""
|
collectors/ai_logs.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _read_jsonl_timestamps(jsonl_file, dt_from, dt_to):
|
|
9
|
+
results = []
|
|
10
|
+
try:
|
|
11
|
+
with open(jsonl_file, encoding="utf-8", errors="replace") as f:
|
|
12
|
+
for line in f:
|
|
13
|
+
line = line.strip()
|
|
14
|
+
if not line:
|
|
15
|
+
continue
|
|
16
|
+
try:
|
|
17
|
+
obj = json.loads(line)
|
|
18
|
+
except json.JSONDecodeError:
|
|
19
|
+
continue
|
|
20
|
+
|
|
21
|
+
ts_raw = obj.get("timestamp") or obj.get("ts") or obj.get("created_at") or obj.get("time")
|
|
22
|
+
if ts_raw is None:
|
|
23
|
+
continue
|
|
24
|
+
try:
|
|
25
|
+
if isinstance(ts_raw, (int, float)):
|
|
26
|
+
divisor = 1000 if ts_raw > 1e11 else 1
|
|
27
|
+
ts = datetime.fromtimestamp(ts_raw / divisor, tz=timezone.utc)
|
|
28
|
+
else:
|
|
29
|
+
ts = datetime.fromisoformat(str(ts_raw).replace("Z", "+00:00"))
|
|
30
|
+
except (ValueError, OSError):
|
|
31
|
+
continue
|
|
32
|
+
|
|
33
|
+
if not (dt_from <= ts <= dt_to):
|
|
34
|
+
continue
|
|
35
|
+
|
|
36
|
+
msg = obj.get("message", {})
|
|
37
|
+
if isinstance(msg, dict):
|
|
38
|
+
content = msg.get("content", "")
|
|
39
|
+
if isinstance(content, list):
|
|
40
|
+
content = " ".join(c.get("text", "") for c in content if isinstance(c, dict))
|
|
41
|
+
detail = str(content)[:70].replace("\n", " ")
|
|
42
|
+
elif isinstance(msg, str):
|
|
43
|
+
detail = msg[:70]
|
|
44
|
+
else:
|
|
45
|
+
detail = str(obj.get("type", ""))[:70]
|
|
46
|
+
|
|
47
|
+
results.append((ts, detail or "log", obj))
|
|
48
|
+
except (OSError, PermissionError):
|
|
49
|
+
pass
|
|
50
|
+
return results
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def collect_claude_code(profiles, dt_from, dt_to, home, classify_project, make_event):
|
|
54
|
+
results = []
|
|
55
|
+
projects_dir = home / ".claude" / "projects"
|
|
56
|
+
if not projects_dir.exists():
|
|
57
|
+
return results
|
|
58
|
+
for proj_dir in projects_dir.iterdir():
|
|
59
|
+
if not proj_dir.is_dir():
|
|
60
|
+
continue
|
|
61
|
+
dir_name = proj_dir.name.lower()
|
|
62
|
+
for jsonl_file in proj_dir.glob("*.jsonl"):
|
|
63
|
+
for ts, detail, _ in _read_jsonl_timestamps(jsonl_file, dt_from, dt_to):
|
|
64
|
+
project = classify_project(f"{dir_name} {detail}", profiles)
|
|
65
|
+
results.append(make_event("Claude Code CLI", ts, detail, project))
|
|
66
|
+
return results
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def collect_claude_desktop(profiles, dt_from, dt_to, home, classify_project, make_event):
|
|
70
|
+
results = []
|
|
71
|
+
sessions_dir = home / "Library" / "Application Support" / "Claude" / "local-agent-mode-sessions"
|
|
72
|
+
if not sessions_dir.exists():
|
|
73
|
+
return results
|
|
74
|
+
for jsonl_file in sessions_dir.glob("**/*.jsonl"):
|
|
75
|
+
for ts, detail, _ in _read_jsonl_timestamps(jsonl_file, dt_from, dt_to):
|
|
76
|
+
project = classify_project(detail, profiles)
|
|
77
|
+
results.append(make_event("Claude Desktop", ts, detail, project))
|
|
78
|
+
return results
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def collect_gemini_cli(profiles, dt_from, dt_to, home, classify_project, make_event):
|
|
82
|
+
results = []
|
|
83
|
+
base_dir = home / ".gemini" / "tmp"
|
|
84
|
+
if not base_dir.exists():
|
|
85
|
+
return results
|
|
86
|
+
for chat_file in base_dir.glob("*/chats/session-*.json"):
|
|
87
|
+
proj_name = chat_file.parent.parent.name.lower()
|
|
88
|
+
try:
|
|
89
|
+
data = json.loads(chat_file.read_text(encoding="utf-8"))
|
|
90
|
+
except (OSError, json.JSONDecodeError):
|
|
91
|
+
continue
|
|
92
|
+
for msg in data.get("messages", []):
|
|
93
|
+
ts_raw = msg.get("timestamp")
|
|
94
|
+
if not ts_raw:
|
|
95
|
+
continue
|
|
96
|
+
try:
|
|
97
|
+
ts = datetime.fromisoformat(str(ts_raw).replace("Z", "+00:00"))
|
|
98
|
+
except ValueError:
|
|
99
|
+
continue
|
|
100
|
+
if not (dt_from <= ts <= dt_to):
|
|
101
|
+
continue
|
|
102
|
+
content = msg.get("content", "")
|
|
103
|
+
if isinstance(content, list):
|
|
104
|
+
content = " ".join(c.get("text", "") for c in content if isinstance(c, dict))
|
|
105
|
+
detail = str(content)[:70].replace("\n", " ")
|
|
106
|
+
role = msg.get("type", "")
|
|
107
|
+
project = classify_project(f"{proj_name} {detail}", profiles)
|
|
108
|
+
results.append(make_event("Gemini CLI", ts, f"[{role}] {detail}" if detail else "Gemini CLI", project))
|
|
109
|
+
return results
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def collect_codex_ide(
|
|
113
|
+
profiles,
|
|
114
|
+
dt_from,
|
|
115
|
+
dt_to,
|
|
116
|
+
codex_session_index: Path,
|
|
117
|
+
classify_project,
|
|
118
|
+
make_event,
|
|
119
|
+
):
|
|
120
|
+
if not codex_session_index.is_file():
|
|
121
|
+
return []
|
|
122
|
+
results = []
|
|
123
|
+
try:
|
|
124
|
+
text = codex_session_index.read_text(encoding="utf-8")
|
|
125
|
+
except OSError:
|
|
126
|
+
return []
|
|
127
|
+
for raw_line in text.splitlines():
|
|
128
|
+
line = raw_line.strip()
|
|
129
|
+
if not line:
|
|
130
|
+
continue
|
|
131
|
+
try:
|
|
132
|
+
obj = json.loads(line)
|
|
133
|
+
except json.JSONDecodeError:
|
|
134
|
+
continue
|
|
135
|
+
ts_raw = obj.get("updated_at")
|
|
136
|
+
if not ts_raw:
|
|
137
|
+
continue
|
|
138
|
+
try:
|
|
139
|
+
ts = datetime.fromisoformat(str(ts_raw).replace("Z", "+00:00"))
|
|
140
|
+
except ValueError:
|
|
141
|
+
continue
|
|
142
|
+
if not (dt_from <= ts <= dt_to):
|
|
143
|
+
continue
|
|
144
|
+
thread = str(obj.get("thread_name") or "").strip() or "session"
|
|
145
|
+
sid = str(obj.get("id") or "").replace("-", "")[:10]
|
|
146
|
+
detail = f"{thread[:65]} — id {sid}…" if sid else thread[:70]
|
|
147
|
+
project = classify_project(thread, profiles)
|
|
148
|
+
results.append(make_event("Codex IDE", ts, detail, project))
|
|
149
|
+
return results
|
collectors/chrome.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import sqlite3
|
|
6
|
+
import tempfile
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import Callable, Dict
|
|
9
|
+
from urllib.parse import urlparse
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _like_escape(value: str) -> str:
|
|
13
|
+
"""Escape SQLite LIKE wildcard characters so values match literally.
|
|
14
|
+
|
|
15
|
+
Uses backslash as the escape character; callers must append ESCAPE '\\\\' to
|
|
16
|
+
the predicate string.
|
|
17
|
+
"""
|
|
18
|
+
return value.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def chrome_history_path(home):
|
|
22
|
+
return home / "Library" / "Application Support" / "Google" / "Chrome" / "Default" / "History"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def query_chrome(history_path, where_clause, dt_from_cu, dt_to_cu, params=()):
|
|
26
|
+
if not history_path.exists():
|
|
27
|
+
return []
|
|
28
|
+
|
|
29
|
+
tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
|
|
30
|
+
tmp.close()
|
|
31
|
+
rows = []
|
|
32
|
+
try:
|
|
33
|
+
shutil.copy2(history_path, tmp.name)
|
|
34
|
+
conn = sqlite3.connect(tmp.name)
|
|
35
|
+
cursor = conn.cursor()
|
|
36
|
+
cursor.execute(
|
|
37
|
+
f"""
|
|
38
|
+
SELECT v.visit_time, u.url, u.title
|
|
39
|
+
FROM visits v
|
|
40
|
+
JOIN urls u ON v.url = u.id
|
|
41
|
+
WHERE v.visit_time BETWEEN ? AND ?
|
|
42
|
+
AND ({where_clause})
|
|
43
|
+
ORDER BY v.visit_time
|
|
44
|
+
""",
|
|
45
|
+
(dt_from_cu, dt_to_cu, *params),
|
|
46
|
+
)
|
|
47
|
+
rows = cursor.fetchall()
|
|
48
|
+
conn.close()
|
|
49
|
+
except Exception as exc:
|
|
50
|
+
print(f" [Warning] Chrome history: {exc}")
|
|
51
|
+
finally:
|
|
52
|
+
try:
|
|
53
|
+
os.unlink(tmp.name)
|
|
54
|
+
except OSError:
|
|
55
|
+
pass
|
|
56
|
+
return rows
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def chrome_time_range(dt_from, dt_to, epoch_delta_us):
|
|
60
|
+
return (
|
|
61
|
+
int(dt_from.astimezone(timezone.utc).timestamp() * 1_000_000) + epoch_delta_us,
|
|
62
|
+
int(dt_to.astimezone(timezone.utc).timestamp() * 1_000_000) + epoch_delta_us,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def chrome_ts(visit_time_cu, epoch_delta_us):
|
|
67
|
+
return datetime.fromtimestamp(
|
|
68
|
+
(visit_time_cu - epoch_delta_us) / 1_000_000, tz=timezone.utc
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def normalize_chrome_url(url):
|
|
73
|
+
if not url:
|
|
74
|
+
return ""
|
|
75
|
+
try:
|
|
76
|
+
parsed = urlparse(url)
|
|
77
|
+
path = parsed.path or "/"
|
|
78
|
+
if len(path) > 1 and path.endswith("/"):
|
|
79
|
+
path = path.rstrip("/")
|
|
80
|
+
return f"{parsed.netloc.lower()}{path.lower()}"
|
|
81
|
+
except Exception:
|
|
82
|
+
return (url or "")[:200].lower()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def thin_chrome_visit_rows(rows, collapse_minutes, epoch_delta_us):
|
|
86
|
+
if collapse_minutes <= 0 or not rows:
|
|
87
|
+
return rows
|
|
88
|
+
window_s = collapse_minutes * 60
|
|
89
|
+
out = []
|
|
90
|
+
last_emit_ts_by_norm = {}
|
|
91
|
+
for visit_time_cu, url, title in rows:
|
|
92
|
+
ts = chrome_ts(visit_time_cu, epoch_delta_us)
|
|
93
|
+
norm = normalize_chrome_url(url)
|
|
94
|
+
if not norm:
|
|
95
|
+
out.append((visit_time_cu, url, title))
|
|
96
|
+
continue
|
|
97
|
+
prev = last_emit_ts_by_norm.get(norm)
|
|
98
|
+
if prev is not None and (ts - prev).total_seconds() < window_s:
|
|
99
|
+
continue
|
|
100
|
+
last_emit_ts_by_norm[norm] = ts
|
|
101
|
+
out.append((visit_time_cu, url, title))
|
|
102
|
+
return out
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def collect_claude_ai_urls(
|
|
106
|
+
profiles,
|
|
107
|
+
dt_from,
|
|
108
|
+
dt_to,
|
|
109
|
+
home,
|
|
110
|
+
epoch_delta_us,
|
|
111
|
+
uncategorized,
|
|
112
|
+
make_event: Callable,
|
|
113
|
+
):
|
|
114
|
+
url_map: Dict[str, str] = {}
|
|
115
|
+
for profile in profiles:
|
|
116
|
+
for url in profile["tracked_urls"]:
|
|
117
|
+
if "claude.ai" not in str(url).lower():
|
|
118
|
+
continue
|
|
119
|
+
url_map[url] = profile["name"]
|
|
120
|
+
if not url_map:
|
|
121
|
+
return []
|
|
122
|
+
|
|
123
|
+
clauses = " OR ".join(["u.url LIKE ? ESCAPE '\\'" for _ in url_map])
|
|
124
|
+
clause_params = tuple(f"%{_like_escape(url)}%" for url in url_map)
|
|
125
|
+
dt_from_cu, dt_to_cu = chrome_time_range(dt_from, dt_to, epoch_delta_us)
|
|
126
|
+
history_path = chrome_history_path(home)
|
|
127
|
+
rows = query_chrome(history_path, clauses, dt_from_cu, dt_to_cu, clause_params)
|
|
128
|
+
|
|
129
|
+
results = []
|
|
130
|
+
for visit_time_cu, url, title in rows:
|
|
131
|
+
ts = chrome_ts(visit_time_cu, epoch_delta_us)
|
|
132
|
+
chat_id = url.split("/chat/")[-1].split("?")[0][:12] if "/chat/" in url else url[-20:]
|
|
133
|
+
project = next(
|
|
134
|
+
(name for tracked_url, name in url_map.items() if tracked_url in url),
|
|
135
|
+
uncategorized,
|
|
136
|
+
)
|
|
137
|
+
detail = f"chat/{chat_id}… — {(title or '')[:40]}"
|
|
138
|
+
results.append(make_event("Claude.ai (web)", ts, detail, project))
|
|
139
|
+
return results
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def collect_gemini_web_urls(
|
|
143
|
+
profiles,
|
|
144
|
+
dt_from,
|
|
145
|
+
dt_to,
|
|
146
|
+
home,
|
|
147
|
+
epoch_delta_us,
|
|
148
|
+
uncategorized,
|
|
149
|
+
make_event: Callable,
|
|
150
|
+
):
|
|
151
|
+
url_map: Dict[str, str] = {}
|
|
152
|
+
for profile in profiles:
|
|
153
|
+
for url in profile["tracked_urls"]:
|
|
154
|
+
if "gemini.google.com" not in str(url).lower():
|
|
155
|
+
continue
|
|
156
|
+
url_map[url] = profile["name"]
|
|
157
|
+
if not url_map:
|
|
158
|
+
return []
|
|
159
|
+
|
|
160
|
+
clauses = " OR ".join(["u.url LIKE ? ESCAPE '\\'" for _ in url_map])
|
|
161
|
+
clause_params = tuple(f"%{_like_escape(url)}%" for url in url_map)
|
|
162
|
+
dt_from_cu, dt_to_cu = chrome_time_range(dt_from, dt_to, epoch_delta_us)
|
|
163
|
+
history_path = chrome_history_path(home)
|
|
164
|
+
rows = query_chrome(history_path, clauses, dt_from_cu, dt_to_cu, clause_params)
|
|
165
|
+
|
|
166
|
+
results = []
|
|
167
|
+
for visit_time_cu, url, title in rows:
|
|
168
|
+
ts = chrome_ts(visit_time_cu, epoch_delta_us)
|
|
169
|
+
match = None
|
|
170
|
+
best_len = -1
|
|
171
|
+
for tracked_url, name in url_map.items():
|
|
172
|
+
if tracked_url in url and len(tracked_url) > best_len:
|
|
173
|
+
match = name
|
|
174
|
+
best_len = len(tracked_url)
|
|
175
|
+
project = match or uncategorized
|
|
176
|
+
chat_id = url.split("/app/")[-1].split("?")[0][:20] if "/app/" in url else url[-24:]
|
|
177
|
+
detail = f"gemini/app/{chat_id}… — {(title or '')[:40]}"
|
|
178
|
+
results.append(make_event("Gemini (web)", ts, detail, project))
|
|
179
|
+
return results
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def collect_chrome(
|
|
183
|
+
profiles,
|
|
184
|
+
dt_from,
|
|
185
|
+
dt_to,
|
|
186
|
+
collapse_minutes,
|
|
187
|
+
home,
|
|
188
|
+
epoch_delta_us,
|
|
189
|
+
classify_project: Callable,
|
|
190
|
+
make_event: Callable,
|
|
191
|
+
):
|
|
192
|
+
all_keywords = sorted(
|
|
193
|
+
{
|
|
194
|
+
kw.lower()
|
|
195
|
+
for profile in profiles
|
|
196
|
+
for kw in (profile["match_terms"] + [profile["name"]])
|
|
197
|
+
if kw
|
|
198
|
+
}
|
|
199
|
+
)
|
|
200
|
+
if not all_keywords:
|
|
201
|
+
return []
|
|
202
|
+
|
|
203
|
+
dt_from_cu, dt_to_cu = chrome_time_range(dt_from, dt_to, epoch_delta_us)
|
|
204
|
+
kw_clauses = " OR ".join(
|
|
205
|
+
["(LOWER(u.url) LIKE ? ESCAPE '\\' OR LOWER(u.title) LIKE ? ESCAPE '\\')" for _ in all_keywords]
|
|
206
|
+
)
|
|
207
|
+
kw_params = tuple(p for kw in all_keywords for p in (f"%{_like_escape(kw)}%", f"%{_like_escape(kw)}%"))
|
|
208
|
+
where_clause = f"({kw_clauses}) AND u.url NOT LIKE ? AND u.url NOT LIKE ?"
|
|
209
|
+
clause_params = (*kw_params, "%claude.ai%", "%gemini.google.com%")
|
|
210
|
+
history_path = chrome_history_path(home)
|
|
211
|
+
rows = query_chrome(history_path, where_clause, dt_from_cu, dt_to_cu, clause_params)
|
|
212
|
+
rows = thin_chrome_visit_rows(rows, collapse_minutes, epoch_delta_us)
|
|
213
|
+
results = []
|
|
214
|
+
for visit_time_cu, url, title in rows:
|
|
215
|
+
ts = chrome_ts(visit_time_cu, epoch_delta_us)
|
|
216
|
+
detail = (title or url)[:70]
|
|
217
|
+
project = classify_project(f"{url} {title}", profiles)
|
|
218
|
+
results.append(make_event("Chrome", ts, detail, project))
|
|
219
|
+
return results
|
collectors/cursor.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from urllib.parse import unquote, urlparse
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def load_cursor_workspaces(home: Path):
|
|
11
|
+
storage_dir = home / "Library" / "Application Support" / "Cursor" / "User" / "workspaceStorage"
|
|
12
|
+
workspace_map = {}
|
|
13
|
+
if not storage_dir.exists():
|
|
14
|
+
return workspace_map
|
|
15
|
+
for workspace_json in storage_dir.glob("*/workspace.json"):
|
|
16
|
+
workspace_id = workspace_json.parent.name
|
|
17
|
+
try:
|
|
18
|
+
data = json.loads(workspace_json.read_text(encoding="utf-8"))
|
|
19
|
+
except (OSError, json.JSONDecodeError):
|
|
20
|
+
continue
|
|
21
|
+
raw_uri = data.get("folder") or data.get("workspace")
|
|
22
|
+
if not raw_uri:
|
|
23
|
+
continue
|
|
24
|
+
parsed = urlparse(raw_uri)
|
|
25
|
+
path = unquote(parsed.path) if parsed.scheme == "file" else raw_uri
|
|
26
|
+
workspace_map[workspace_id] = path
|
|
27
|
+
return workspace_map
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def collect_cursor(profiles, dt_from, dt_to, home, local_tz, classify_project, make_event):
|
|
31
|
+
workspace_map = load_cursor_workspaces(home)
|
|
32
|
+
logs_dir = home / "Library" / "Application Support" / "Cursor" / "logs"
|
|
33
|
+
if not logs_dir.exists():
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
results = []
|
|
37
|
+
ts_pattern = re.compile(r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})")
|
|
38
|
+
ts_iso_bracket_pattern = re.compile(r"^\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?)(Z|[+-]\d{2}:\d{2})?\]")
|
|
39
|
+
workspace_id_pattern = re.compile(r"workspaceStorage/([0-9a-f]{32})|old id ([0-9a-f]{32})-")
|
|
40
|
+
workspace_path_pattern = re.compile(r"(/Users/[^\"'\s]+)")
|
|
41
|
+
|
|
42
|
+
def _parse_cursor_log_ts(line: str):
|
|
43
|
+
m = ts_pattern.match(line)
|
|
44
|
+
if m:
|
|
45
|
+
try:
|
|
46
|
+
return datetime.strptime(m.group(1), "%Y-%m-%d %H:%M:%S").replace(tzinfo=local_tz)
|
|
47
|
+
except ValueError:
|
|
48
|
+
return None
|
|
49
|
+
m = ts_iso_bracket_pattern.match(line)
|
|
50
|
+
if m:
|
|
51
|
+
iso = (m.group(1) + (m.group(2) or "")).replace("Z", "+00:00")
|
|
52
|
+
try:
|
|
53
|
+
return datetime.fromisoformat(iso)
|
|
54
|
+
except ValueError:
|
|
55
|
+
return None
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
for log_file in logs_dir.glob("**/*.log"):
|
|
59
|
+
try:
|
|
60
|
+
with open(log_file, encoding="utf-8", errors="replace") as fh:
|
|
61
|
+
for line in fh:
|
|
62
|
+
ts = _parse_cursor_log_ts(line)
|
|
63
|
+
if not ts or not (dt_from <= ts <= dt_to):
|
|
64
|
+
continue
|
|
65
|
+
workspace_path = None
|
|
66
|
+
m_id = workspace_id_pattern.search(line)
|
|
67
|
+
if m_id and workspace_map:
|
|
68
|
+
workspace_id = m_id.group(1) or m_id.group(2)
|
|
69
|
+
workspace_path = workspace_map.get(workspace_id)
|
|
70
|
+
if not workspace_path:
|
|
71
|
+
m_path = workspace_path_pattern.search(line)
|
|
72
|
+
if m_path:
|
|
73
|
+
workspace_path = m_path.group(1)
|
|
74
|
+
if not workspace_path:
|
|
75
|
+
continue
|
|
76
|
+
project = classify_project(f"{workspace_path} {line}", profiles)
|
|
77
|
+
detail = f"{Path(workspace_path).name} — {line.strip()[:90]}"
|
|
78
|
+
results.append(make_event("Cursor", ts, detail, project))
|
|
79
|
+
except OSError:
|
|
80
|
+
continue
|
|
81
|
+
return results
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def collect_cursor_checkpoints(
|
|
85
|
+
profiles,
|
|
86
|
+
dt_from,
|
|
87
|
+
dt_to,
|
|
88
|
+
checkpoints_dir: Path,
|
|
89
|
+
home: Path,
|
|
90
|
+
classify_project,
|
|
91
|
+
make_event,
|
|
92
|
+
source_name: str,
|
|
93
|
+
):
|
|
94
|
+
if not checkpoints_dir.is_dir():
|
|
95
|
+
return []
|
|
96
|
+
workspace_map = load_cursor_workspaces(home)
|
|
97
|
+
results = []
|
|
98
|
+
for meta_path in checkpoints_dir.glob("*/metadata.json"):
|
|
99
|
+
try:
|
|
100
|
+
data = json.loads(meta_path.read_text(encoding="utf-8"))
|
|
101
|
+
except (OSError, json.JSONDecodeError):
|
|
102
|
+
continue
|
|
103
|
+
ms = data.get("startTrackingDateUnixMilliseconds")
|
|
104
|
+
if ms is None:
|
|
105
|
+
continue
|
|
106
|
+
try:
|
|
107
|
+
ts = datetime.fromtimestamp(ms / 1000.0, tz=timezone.utc)
|
|
108
|
+
except (OSError, ValueError, OverflowError):
|
|
109
|
+
continue
|
|
110
|
+
if not (dt_from <= ts <= dt_to):
|
|
111
|
+
continue
|
|
112
|
+
paths = []
|
|
113
|
+
for rf in data.get("requestFiles") or []:
|
|
114
|
+
p = rf.get("fsPath")
|
|
115
|
+
if p:
|
|
116
|
+
paths.append(str(p))
|
|
117
|
+
wid = data.get("workspaceId")
|
|
118
|
+
if wid:
|
|
119
|
+
mapped = workspace_map.get(wid)
|
|
120
|
+
if mapped:
|
|
121
|
+
paths.append(str(mapped))
|
|
122
|
+
hay = " ".join(paths)
|
|
123
|
+
if not hay:
|
|
124
|
+
continue
|
|
125
|
+
project = classify_project(hay, profiles)
|
|
126
|
+
agent_id = str(data.get("agentRequestId", "")).split("-")[0][:8]
|
|
127
|
+
label = Path(paths[0]).name if paths else "checkpoint"
|
|
128
|
+
detail = f"checkpoint {agent_id}… — {label}"
|
|
129
|
+
results.append(make_event(source_name, ts, detail, project))
|
|
130
|
+
return results
|
collectors/github.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""GitHub public activity via REST API (optional; requires username)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
9
|
+
from urllib.error import HTTPError, URLError
|
|
10
|
+
from urllib.request import Request, urlopen
|
|
11
|
+
|
|
12
|
+
from core.cli_options import package_version
|
|
13
|
+
from core.sources import GITHUB_SOURCE
|
|
14
|
+
|
|
15
|
+
USER_AGENT = (
|
|
16
|
+
f"timelog-extract/{package_version()} "
|
|
17
|
+
"(+https://github.com/mbjorke/timelog-extract)"
|
|
18
|
+
)
|
|
19
|
+
PER_PAGE = 100
|
|
20
|
+
MAX_PAGES = 10
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def resolve_github_username(args: Any) -> str:
|
|
24
|
+
"""CLI `--github-user` overrides `GITHUB_USER` / `GITHUB_LOGIN`."""
|
|
25
|
+
explicit = getattr(args, "github_user", None)
|
|
26
|
+
if explicit and str(explicit).strip():
|
|
27
|
+
return str(explicit).strip()
|
|
28
|
+
return (os.environ.get("GITHUB_USER") or os.environ.get("GITHUB_LOGIN") or "").strip()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def github_source_enabled(args: Any) -> tuple[bool, Optional[str]]:
|
|
32
|
+
"""Return (enabled, disable_reason)."""
|
|
33
|
+
mode = getattr(args, "github_source", "auto")
|
|
34
|
+
if mode == "off":
|
|
35
|
+
return False, "GitHub source disabled via --github-source off"
|
|
36
|
+
user = resolve_github_username(args)
|
|
37
|
+
if mode == "on" and not user:
|
|
38
|
+
return False, "GitHub on but no username (use --github-user or GITHUB_USER)"
|
|
39
|
+
if mode == "auto" and not user:
|
|
40
|
+
return False, "no GitHub username (set --github-user or GITHUB_USER for this source)"
|
|
41
|
+
if not user:
|
|
42
|
+
return False, "no GitHub username"
|
|
43
|
+
return True, None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _parse_github_ts(created_at: str) -> datetime:
|
|
47
|
+
if created_at.endswith("Z"):
|
|
48
|
+
created_at = created_at[:-1] + "+00:00"
|
|
49
|
+
return datetime.fromisoformat(created_at)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _detail_for_event(ev: Dict[str, Any]) -> str:
|
|
53
|
+
et = ev.get("type") or "unknown"
|
|
54
|
+
repo = (ev.get("repo") or {}).get("name") or "unknown/repo"
|
|
55
|
+
payload = ev.get("payload") or {}
|
|
56
|
+
|
|
57
|
+
if et == "PushEvent":
|
|
58
|
+
commits = payload.get("commits") or []
|
|
59
|
+
n = len(commits) if commits else payload.get("size") or 0
|
|
60
|
+
ref = (payload.get("ref") or "").split("/")[-1] or "default"
|
|
61
|
+
return f"push to {repo} ({n} commits, ref {ref})"
|
|
62
|
+
|
|
63
|
+
if et == "PullRequestEvent":
|
|
64
|
+
pr = payload.get("pull_request") or {}
|
|
65
|
+
title = (pr.get("title") or "").strip() or "(no title)"
|
|
66
|
+
action = payload.get("action") or "?"
|
|
67
|
+
num = pr.get("number", "")
|
|
68
|
+
return f"PR #{num} {action}: {title} ({repo})"
|
|
69
|
+
|
|
70
|
+
if et == "IssuesEvent":
|
|
71
|
+
issue = payload.get("issue") or {}
|
|
72
|
+
title = (issue.get("title") or "").strip() or "(no title)"
|
|
73
|
+
action = payload.get("action") or "?"
|
|
74
|
+
num = issue.get("number", "")
|
|
75
|
+
return f"issue #{num} {action}: {title} ({repo})"
|
|
76
|
+
|
|
77
|
+
if et == "CreateEvent":
|
|
78
|
+
ref = payload.get("ref") or ""
|
|
79
|
+
desc = payload.get("description") or ""
|
|
80
|
+
rt = payload.get("ref_type") or "ref"
|
|
81
|
+
extra = f" {desc}" if desc else ""
|
|
82
|
+
return f"created {rt} {ref} in {repo}{extra}"
|
|
83
|
+
|
|
84
|
+
if et == "DeleteEvent":
|
|
85
|
+
ref = payload.get("ref") or ""
|
|
86
|
+
return f"deleted {payload.get('ref_type', 'ref')} {ref} in {repo}"
|
|
87
|
+
|
|
88
|
+
if et == "ReleaseEvent":
|
|
89
|
+
rel = payload.get("release") or {}
|
|
90
|
+
tag = rel.get("tag_name") or rel.get("name") or "release"
|
|
91
|
+
return f"release {tag} ({repo})"
|
|
92
|
+
|
|
93
|
+
if et == "ForkEvent":
|
|
94
|
+
fork = (payload.get("forkee") or {}).get("full_name") or "fork"
|
|
95
|
+
return f"forked {repo} → {fork}"
|
|
96
|
+
|
|
97
|
+
if et == "WatchEvent":
|
|
98
|
+
return f"starred {repo}"
|
|
99
|
+
|
|
100
|
+
return f"{et} ({repo})"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def collect_public_events(
|
|
104
|
+
profiles: List[Dict[str, Any]],
|
|
105
|
+
dt_from: datetime,
|
|
106
|
+
dt_to: datetime,
|
|
107
|
+
*,
|
|
108
|
+
username: str,
|
|
109
|
+
token: Optional[str],
|
|
110
|
+
classify_project: Callable[..., str],
|
|
111
|
+
make_event: Callable[..., Dict[str, Any]],
|
|
112
|
+
) -> List[Dict[str, Any]]:
|
|
113
|
+
"""
|
|
114
|
+
Fetch `/users/{username}/events/public` (newest first; GitHub retains ~300 recent events).
|
|
115
|
+
|
|
116
|
+
Events outside the API window will not appear — sparse for old ranges.
|
|
117
|
+
"""
|
|
118
|
+
if not username:
|
|
119
|
+
return []
|
|
120
|
+
|
|
121
|
+
results: List[Dict[str, Any]] = []
|
|
122
|
+
# Normalize bounds to aware UTC for comparison
|
|
123
|
+
if dt_from.tzinfo is None:
|
|
124
|
+
dt_from = dt_from.replace(tzinfo=timezone.utc)
|
|
125
|
+
if dt_to.tzinfo is None:
|
|
126
|
+
dt_to = dt_to.replace(tzinfo=timezone.utc)
|
|
127
|
+
dt_from_utc = dt_from.astimezone(timezone.utc)
|
|
128
|
+
dt_to_utc = dt_to.astimezone(timezone.utc)
|
|
129
|
+
|
|
130
|
+
for page in range(1, MAX_PAGES + 1):
|
|
131
|
+
url = f"https://api.github.com/users/{username}/events/public?per_page={PER_PAGE}&page={page}"
|
|
132
|
+
req = Request(url, headers={"User-Agent": USER_AGENT, "Accept": "application/vnd.github+json"})
|
|
133
|
+
if token:
|
|
134
|
+
req.add_header("Authorization", f"Bearer {token}")
|
|
135
|
+
try:
|
|
136
|
+
with urlopen(req, timeout=30) as resp:
|
|
137
|
+
raw = resp.read().decode("utf-8")
|
|
138
|
+
except HTTPError as exc:
|
|
139
|
+
raise RuntimeError(f"GitHub API HTTP {exc.code}: {exc.reason}") from exc
|
|
140
|
+
except URLError as exc:
|
|
141
|
+
raise RuntimeError(f"GitHub API network error: {exc.reason}") from exc
|
|
142
|
+
|
|
143
|
+
batch = json.loads(raw)
|
|
144
|
+
if not batch:
|
|
145
|
+
break
|
|
146
|
+
|
|
147
|
+
stop_paging = False
|
|
148
|
+
for ev in batch:
|
|
149
|
+
created = ev.get("created_at")
|
|
150
|
+
if not created:
|
|
151
|
+
continue
|
|
152
|
+
ts = _parse_github_ts(created)
|
|
153
|
+
if ts > dt_to_utc:
|
|
154
|
+
continue
|
|
155
|
+
if ts < dt_from_utc:
|
|
156
|
+
stop_paging = True
|
|
157
|
+
break
|
|
158
|
+
detail = _detail_for_event(ev)
|
|
159
|
+
repo = (ev.get("repo") or {}).get("name") or ""
|
|
160
|
+
haystack = f"{repo} {detail}"
|
|
161
|
+
project = classify_project(haystack, profiles)
|
|
162
|
+
results.append(make_event(GITHUB_SOURCE, ts, detail, project))
|
|
163
|
+
|
|
164
|
+
if stop_paging:
|
|
165
|
+
break
|
|
166
|
+
if len(batch) < PER_PAGE:
|
|
167
|
+
break
|
|
168
|
+
|
|
169
|
+
return results
|