timelog-extract 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. collectors/__init__.py +1 -0
  2. collectors/ai_logs.py +149 -0
  3. collectors/chrome.py +219 -0
  4. collectors/cursor.py +130 -0
  5. collectors/github.py +169 -0
  6. collectors/mail.py +109 -0
  7. collectors/timelog.py +135 -0
  8. core/__init__.py +1 -0
  9. core/analytics.py +59 -0
  10. core/cli.py +38 -0
  11. core/cli_app.py +13 -0
  12. core/cli_doctor_sources_projects.py +407 -0
  13. core/cli_global_timelog_setup.py +53 -0
  14. core/cli_options.py +85 -0
  15. core/cli_prompts.py +68 -0
  16. core/cli_report_status.py +236 -0
  17. core/collector_registry.py +89 -0
  18. core/config.py +105 -0
  19. core/domain.py +65 -0
  20. core/engine_api.py +81 -0
  21. core/events.py +52 -0
  22. core/global_timelog_setup_lib.py +477 -0
  23. core/pipeline.py +129 -0
  24. core/report_aggregate.py +67 -0
  25. core/report_cli.py +157 -0
  26. core/report_runtime.py +226 -0
  27. core/report_service.py +367 -0
  28. core/runtime_collectors.py +177 -0
  29. core/screen_time.py +79 -0
  30. core/sources.py +31 -0
  31. core/truth_payload.py +171 -0
  32. outputs/__init__.py +1 -0
  33. outputs/gittan_banner.py +31 -0
  34. outputs/html_timeline.py +174 -0
  35. outputs/narrative.py +124 -0
  36. outputs/pdf.py +244 -0
  37. outputs/terminal.py +304 -0
  38. outputs/terminal_theme.py +23 -0
  39. scripts/__init__.py +1 -0
  40. scripts/check_file_lengths.py +61 -0
  41. scripts/eval_accuracy.py +150 -0
  42. scripts/friend_trial.py +63 -0
  43. scripts/manual_matrix_automation.py +229 -0
  44. scripts/run_engine_report.py +79 -0
  45. scripts/run_golden_eval.py +197 -0
  46. timelog_extract-0.2.3.dist-info/METADATA +855 -0
  47. timelog_extract-0.2.3.dist-info/RECORD +52 -0
  48. timelog_extract-0.2.3.dist-info/WHEEL +5 -0
  49. timelog_extract-0.2.3.dist-info/entry_points.txt +4 -0
  50. timelog_extract-0.2.3.dist-info/licenses/LICENSE +676 -0
  51. timelog_extract-0.2.3.dist-info/top_level.txt +5 -0
  52. timelog_extract.py +24 -0
collectors/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Collectors package for source-specific event extraction."""
collectors/ai_logs.py ADDED
@@ -0,0 +1,149 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+
7
+
8
+ def _read_jsonl_timestamps(jsonl_file, dt_from, dt_to):
9
+ results = []
10
+ try:
11
+ with open(jsonl_file, encoding="utf-8", errors="replace") as f:
12
+ for line in f:
13
+ line = line.strip()
14
+ if not line:
15
+ continue
16
+ try:
17
+ obj = json.loads(line)
18
+ except json.JSONDecodeError:
19
+ continue
20
+
21
+ ts_raw = obj.get("timestamp") or obj.get("ts") or obj.get("created_at") or obj.get("time")
22
+ if ts_raw is None:
23
+ continue
24
+ try:
25
+ if isinstance(ts_raw, (int, float)):
26
+ divisor = 1000 if ts_raw > 1e11 else 1
27
+ ts = datetime.fromtimestamp(ts_raw / divisor, tz=timezone.utc)
28
+ else:
29
+ ts = datetime.fromisoformat(str(ts_raw).replace("Z", "+00:00"))
30
+ except (ValueError, OSError):
31
+ continue
32
+
33
+ if not (dt_from <= ts <= dt_to):
34
+ continue
35
+
36
+ msg = obj.get("message", {})
37
+ if isinstance(msg, dict):
38
+ content = msg.get("content", "")
39
+ if isinstance(content, list):
40
+ content = " ".join(c.get("text", "") for c in content if isinstance(c, dict))
41
+ detail = str(content)[:70].replace("\n", " ")
42
+ elif isinstance(msg, str):
43
+ detail = msg[:70]
44
+ else:
45
+ detail = str(obj.get("type", ""))[:70]
46
+
47
+ results.append((ts, detail or "log", obj))
48
+ except (OSError, PermissionError):
49
+ pass
50
+ return results
51
+
52
+
53
+ def collect_claude_code(profiles, dt_from, dt_to, home, classify_project, make_event):
54
+ results = []
55
+ projects_dir = home / ".claude" / "projects"
56
+ if not projects_dir.exists():
57
+ return results
58
+ for proj_dir in projects_dir.iterdir():
59
+ if not proj_dir.is_dir():
60
+ continue
61
+ dir_name = proj_dir.name.lower()
62
+ for jsonl_file in proj_dir.glob("*.jsonl"):
63
+ for ts, detail, _ in _read_jsonl_timestamps(jsonl_file, dt_from, dt_to):
64
+ project = classify_project(f"{dir_name} {detail}", profiles)
65
+ results.append(make_event("Claude Code CLI", ts, detail, project))
66
+ return results
67
+
68
+
69
+ def collect_claude_desktop(profiles, dt_from, dt_to, home, classify_project, make_event):
70
+ results = []
71
+ sessions_dir = home / "Library" / "Application Support" / "Claude" / "local-agent-mode-sessions"
72
+ if not sessions_dir.exists():
73
+ return results
74
+ for jsonl_file in sessions_dir.glob("**/*.jsonl"):
75
+ for ts, detail, _ in _read_jsonl_timestamps(jsonl_file, dt_from, dt_to):
76
+ project = classify_project(detail, profiles)
77
+ results.append(make_event("Claude Desktop", ts, detail, project))
78
+ return results
79
+
80
+
81
+ def collect_gemini_cli(profiles, dt_from, dt_to, home, classify_project, make_event):
82
+ results = []
83
+ base_dir = home / ".gemini" / "tmp"
84
+ if not base_dir.exists():
85
+ return results
86
+ for chat_file in base_dir.glob("*/chats/session-*.json"):
87
+ proj_name = chat_file.parent.parent.name.lower()
88
+ try:
89
+ data = json.loads(chat_file.read_text(encoding="utf-8"))
90
+ except (OSError, json.JSONDecodeError):
91
+ continue
92
+ for msg in data.get("messages", []):
93
+ ts_raw = msg.get("timestamp")
94
+ if not ts_raw:
95
+ continue
96
+ try:
97
+ ts = datetime.fromisoformat(str(ts_raw).replace("Z", "+00:00"))
98
+ except ValueError:
99
+ continue
100
+ if not (dt_from <= ts <= dt_to):
101
+ continue
102
+ content = msg.get("content", "")
103
+ if isinstance(content, list):
104
+ content = " ".join(c.get("text", "") for c in content if isinstance(c, dict))
105
+ detail = str(content)[:70].replace("\n", " ")
106
+ role = msg.get("type", "")
107
+ project = classify_project(f"{proj_name} {detail}", profiles)
108
+ results.append(make_event("Gemini CLI", ts, f"[{role}] {detail}" if detail else "Gemini CLI", project))
109
+ return results
110
+
111
+
112
+ def collect_codex_ide(
113
+ profiles,
114
+ dt_from,
115
+ dt_to,
116
+ codex_session_index: Path,
117
+ classify_project,
118
+ make_event,
119
+ ):
120
+ if not codex_session_index.is_file():
121
+ return []
122
+ results = []
123
+ try:
124
+ text = codex_session_index.read_text(encoding="utf-8")
125
+ except OSError:
126
+ return []
127
+ for raw_line in text.splitlines():
128
+ line = raw_line.strip()
129
+ if not line:
130
+ continue
131
+ try:
132
+ obj = json.loads(line)
133
+ except json.JSONDecodeError:
134
+ continue
135
+ ts_raw = obj.get("updated_at")
136
+ if not ts_raw:
137
+ continue
138
+ try:
139
+ ts = datetime.fromisoformat(str(ts_raw).replace("Z", "+00:00"))
140
+ except ValueError:
141
+ continue
142
+ if not (dt_from <= ts <= dt_to):
143
+ continue
144
+ thread = str(obj.get("thread_name") or "").strip() or "session"
145
+ sid = str(obj.get("id") or "").replace("-", "")[:10]
146
+ detail = f"{thread[:65]} — id {sid}…" if sid else thread[:70]
147
+ project = classify_project(thread, profiles)
148
+ results.append(make_event("Codex IDE", ts, detail, project))
149
+ return results
collectors/chrome.py ADDED
@@ -0,0 +1,219 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import shutil
5
+ import sqlite3
6
+ import tempfile
7
+ from datetime import datetime, timezone
8
+ from typing import Callable, Dict
9
+ from urllib.parse import urlparse
10
+
11
+
12
+ def _like_escape(value: str) -> str:
13
+ """Escape SQLite LIKE wildcard characters so values match literally.
14
+
15
+ Uses backslash as the escape character; callers must append ESCAPE '\\\\' to
16
+ the predicate string.
17
+ """
18
+ return value.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
19
+
20
+
21
+ def chrome_history_path(home):
22
+ return home / "Library" / "Application Support" / "Google" / "Chrome" / "Default" / "History"
23
+
24
+
25
+ def query_chrome(history_path, where_clause, dt_from_cu, dt_to_cu, params=()):
26
+ if not history_path.exists():
27
+ return []
28
+
29
+ tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
30
+ tmp.close()
31
+ rows = []
32
+ try:
33
+ shutil.copy2(history_path, tmp.name)
34
+ conn = sqlite3.connect(tmp.name)
35
+ cursor = conn.cursor()
36
+ cursor.execute(
37
+ f"""
38
+ SELECT v.visit_time, u.url, u.title
39
+ FROM visits v
40
+ JOIN urls u ON v.url = u.id
41
+ WHERE v.visit_time BETWEEN ? AND ?
42
+ AND ({where_clause})
43
+ ORDER BY v.visit_time
44
+ """,
45
+ (dt_from_cu, dt_to_cu, *params),
46
+ )
47
+ rows = cursor.fetchall()
48
+ conn.close()
49
+ except Exception as exc:
50
+ print(f" [Warning] Chrome history: {exc}")
51
+ finally:
52
+ try:
53
+ os.unlink(tmp.name)
54
+ except OSError:
55
+ pass
56
+ return rows
57
+
58
+
59
+ def chrome_time_range(dt_from, dt_to, epoch_delta_us):
60
+ return (
61
+ int(dt_from.astimezone(timezone.utc).timestamp() * 1_000_000) + epoch_delta_us,
62
+ int(dt_to.astimezone(timezone.utc).timestamp() * 1_000_000) + epoch_delta_us,
63
+ )
64
+
65
+
66
+ def chrome_ts(visit_time_cu, epoch_delta_us):
67
+ return datetime.fromtimestamp(
68
+ (visit_time_cu - epoch_delta_us) / 1_000_000, tz=timezone.utc
69
+ )
70
+
71
+
72
+ def normalize_chrome_url(url):
73
+ if not url:
74
+ return ""
75
+ try:
76
+ parsed = urlparse(url)
77
+ path = parsed.path or "/"
78
+ if len(path) > 1 and path.endswith("/"):
79
+ path = path.rstrip("/")
80
+ return f"{parsed.netloc.lower()}{path.lower()}"
81
+ except Exception:
82
+ return (url or "")[:200].lower()
83
+
84
+
85
+ def thin_chrome_visit_rows(rows, collapse_minutes, epoch_delta_us):
86
+ if collapse_minutes <= 0 or not rows:
87
+ return rows
88
+ window_s = collapse_minutes * 60
89
+ out = []
90
+ last_emit_ts_by_norm = {}
91
+ for visit_time_cu, url, title in rows:
92
+ ts = chrome_ts(visit_time_cu, epoch_delta_us)
93
+ norm = normalize_chrome_url(url)
94
+ if not norm:
95
+ out.append((visit_time_cu, url, title))
96
+ continue
97
+ prev = last_emit_ts_by_norm.get(norm)
98
+ if prev is not None and (ts - prev).total_seconds() < window_s:
99
+ continue
100
+ last_emit_ts_by_norm[norm] = ts
101
+ out.append((visit_time_cu, url, title))
102
+ return out
103
+
104
+
105
+ def collect_claude_ai_urls(
106
+ profiles,
107
+ dt_from,
108
+ dt_to,
109
+ home,
110
+ epoch_delta_us,
111
+ uncategorized,
112
+ make_event: Callable,
113
+ ):
114
+ url_map: Dict[str, str] = {}
115
+ for profile in profiles:
116
+ for url in profile["tracked_urls"]:
117
+ if "claude.ai" not in str(url).lower():
118
+ continue
119
+ url_map[url] = profile["name"]
120
+ if not url_map:
121
+ return []
122
+
123
+ clauses = " OR ".join(["u.url LIKE ? ESCAPE '\\'" for _ in url_map])
124
+ clause_params = tuple(f"%{_like_escape(url)}%" for url in url_map)
125
+ dt_from_cu, dt_to_cu = chrome_time_range(dt_from, dt_to, epoch_delta_us)
126
+ history_path = chrome_history_path(home)
127
+ rows = query_chrome(history_path, clauses, dt_from_cu, dt_to_cu, clause_params)
128
+
129
+ results = []
130
+ for visit_time_cu, url, title in rows:
131
+ ts = chrome_ts(visit_time_cu, epoch_delta_us)
132
+ chat_id = url.split("/chat/")[-1].split("?")[0][:12] if "/chat/" in url else url[-20:]
133
+ project = next(
134
+ (name for tracked_url, name in url_map.items() if tracked_url in url),
135
+ uncategorized,
136
+ )
137
+ detail = f"chat/{chat_id}… — {(title or '')[:40]}"
138
+ results.append(make_event("Claude.ai (web)", ts, detail, project))
139
+ return results
140
+
141
+
142
+ def collect_gemini_web_urls(
143
+ profiles,
144
+ dt_from,
145
+ dt_to,
146
+ home,
147
+ epoch_delta_us,
148
+ uncategorized,
149
+ make_event: Callable,
150
+ ):
151
+ url_map: Dict[str, str] = {}
152
+ for profile in profiles:
153
+ for url in profile["tracked_urls"]:
154
+ if "gemini.google.com" not in str(url).lower():
155
+ continue
156
+ url_map[url] = profile["name"]
157
+ if not url_map:
158
+ return []
159
+
160
+ clauses = " OR ".join(["u.url LIKE ? ESCAPE '\\'" for _ in url_map])
161
+ clause_params = tuple(f"%{_like_escape(url)}%" for url in url_map)
162
+ dt_from_cu, dt_to_cu = chrome_time_range(dt_from, dt_to, epoch_delta_us)
163
+ history_path = chrome_history_path(home)
164
+ rows = query_chrome(history_path, clauses, dt_from_cu, dt_to_cu, clause_params)
165
+
166
+ results = []
167
+ for visit_time_cu, url, title in rows:
168
+ ts = chrome_ts(visit_time_cu, epoch_delta_us)
169
+ match = None
170
+ best_len = -1
171
+ for tracked_url, name in url_map.items():
172
+ if tracked_url in url and len(tracked_url) > best_len:
173
+ match = name
174
+ best_len = len(tracked_url)
175
+ project = match or uncategorized
176
+ chat_id = url.split("/app/")[-1].split("?")[0][:20] if "/app/" in url else url[-24:]
177
+ detail = f"gemini/app/{chat_id}… — {(title or '')[:40]}"
178
+ results.append(make_event("Gemini (web)", ts, detail, project))
179
+ return results
180
+
181
+
182
+ def collect_chrome(
183
+ profiles,
184
+ dt_from,
185
+ dt_to,
186
+ collapse_minutes,
187
+ home,
188
+ epoch_delta_us,
189
+ classify_project: Callable,
190
+ make_event: Callable,
191
+ ):
192
+ all_keywords = sorted(
193
+ {
194
+ kw.lower()
195
+ for profile in profiles
196
+ for kw in (profile["match_terms"] + [profile["name"]])
197
+ if kw
198
+ }
199
+ )
200
+ if not all_keywords:
201
+ return []
202
+
203
+ dt_from_cu, dt_to_cu = chrome_time_range(dt_from, dt_to, epoch_delta_us)
204
+ kw_clauses = " OR ".join(
205
+ ["(LOWER(u.url) LIKE ? ESCAPE '\\' OR LOWER(u.title) LIKE ? ESCAPE '\\')" for _ in all_keywords]
206
+ )
207
+ kw_params = tuple(p for kw in all_keywords for p in (f"%{_like_escape(kw)}%", f"%{_like_escape(kw)}%"))
208
+ where_clause = f"({kw_clauses}) AND u.url NOT LIKE ? AND u.url NOT LIKE ?"
209
+ clause_params = (*kw_params, "%claude.ai%", "%gemini.google.com%")
210
+ history_path = chrome_history_path(home)
211
+ rows = query_chrome(history_path, where_clause, dt_from_cu, dt_to_cu, clause_params)
212
+ rows = thin_chrome_visit_rows(rows, collapse_minutes, epoch_delta_us)
213
+ results = []
214
+ for visit_time_cu, url, title in rows:
215
+ ts = chrome_ts(visit_time_cu, epoch_delta_us)
216
+ detail = (title or url)[:70]
217
+ project = classify_project(f"{url} {title}", profiles)
218
+ results.append(make_event("Chrome", ts, detail, project))
219
+ return results
collectors/cursor.py ADDED
@@ -0,0 +1,130 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+ from urllib.parse import unquote, urlparse
8
+
9
+
10
+ def load_cursor_workspaces(home: Path):
11
+ storage_dir = home / "Library" / "Application Support" / "Cursor" / "User" / "workspaceStorage"
12
+ workspace_map = {}
13
+ if not storage_dir.exists():
14
+ return workspace_map
15
+ for workspace_json in storage_dir.glob("*/workspace.json"):
16
+ workspace_id = workspace_json.parent.name
17
+ try:
18
+ data = json.loads(workspace_json.read_text(encoding="utf-8"))
19
+ except (OSError, json.JSONDecodeError):
20
+ continue
21
+ raw_uri = data.get("folder") or data.get("workspace")
22
+ if not raw_uri:
23
+ continue
24
+ parsed = urlparse(raw_uri)
25
+ path = unquote(parsed.path) if parsed.scheme == "file" else raw_uri
26
+ workspace_map[workspace_id] = path
27
+ return workspace_map
28
+
29
+
30
+ def collect_cursor(profiles, dt_from, dt_to, home, local_tz, classify_project, make_event):
31
+ workspace_map = load_cursor_workspaces(home)
32
+ logs_dir = home / "Library" / "Application Support" / "Cursor" / "logs"
33
+ if not logs_dir.exists():
34
+ return []
35
+
36
+ results = []
37
+ ts_pattern = re.compile(r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})")
38
+ ts_iso_bracket_pattern = re.compile(r"^\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?)(Z|[+-]\d{2}:\d{2})?\]")
39
+ workspace_id_pattern = re.compile(r"workspaceStorage/([0-9a-f]{32})|old id ([0-9a-f]{32})-")
40
+ workspace_path_pattern = re.compile(r"(/Users/[^\"'\s]+)")
41
+
42
+ def _parse_cursor_log_ts(line: str):
43
+ m = ts_pattern.match(line)
44
+ if m:
45
+ try:
46
+ return datetime.strptime(m.group(1), "%Y-%m-%d %H:%M:%S").replace(tzinfo=local_tz)
47
+ except ValueError:
48
+ return None
49
+ m = ts_iso_bracket_pattern.match(line)
50
+ if m:
51
+ iso = (m.group(1) + (m.group(2) or "")).replace("Z", "+00:00")
52
+ try:
53
+ return datetime.fromisoformat(iso)
54
+ except ValueError:
55
+ return None
56
+ return None
57
+
58
+ for log_file in logs_dir.glob("**/*.log"):
59
+ try:
60
+ with open(log_file, encoding="utf-8", errors="replace") as fh:
61
+ for line in fh:
62
+ ts = _parse_cursor_log_ts(line)
63
+ if not ts or not (dt_from <= ts <= dt_to):
64
+ continue
65
+ workspace_path = None
66
+ m_id = workspace_id_pattern.search(line)
67
+ if m_id and workspace_map:
68
+ workspace_id = m_id.group(1) or m_id.group(2)
69
+ workspace_path = workspace_map.get(workspace_id)
70
+ if not workspace_path:
71
+ m_path = workspace_path_pattern.search(line)
72
+ if m_path:
73
+ workspace_path = m_path.group(1)
74
+ if not workspace_path:
75
+ continue
76
+ project = classify_project(f"{workspace_path} {line}", profiles)
77
+ detail = f"{Path(workspace_path).name} — {line.strip()[:90]}"
78
+ results.append(make_event("Cursor", ts, detail, project))
79
+ except OSError:
80
+ continue
81
+ return results
82
+
83
+
84
+ def collect_cursor_checkpoints(
85
+ profiles,
86
+ dt_from,
87
+ dt_to,
88
+ checkpoints_dir: Path,
89
+ home: Path,
90
+ classify_project,
91
+ make_event,
92
+ source_name: str,
93
+ ):
94
+ if not checkpoints_dir.is_dir():
95
+ return []
96
+ workspace_map = load_cursor_workspaces(home)
97
+ results = []
98
+ for meta_path in checkpoints_dir.glob("*/metadata.json"):
99
+ try:
100
+ data = json.loads(meta_path.read_text(encoding="utf-8"))
101
+ except (OSError, json.JSONDecodeError):
102
+ continue
103
+ ms = data.get("startTrackingDateUnixMilliseconds")
104
+ if ms is None:
105
+ continue
106
+ try:
107
+ ts = datetime.fromtimestamp(ms / 1000.0, tz=timezone.utc)
108
+ except (OSError, ValueError, OverflowError):
109
+ continue
110
+ if not (dt_from <= ts <= dt_to):
111
+ continue
112
+ paths = []
113
+ for rf in data.get("requestFiles") or []:
114
+ p = rf.get("fsPath")
115
+ if p:
116
+ paths.append(str(p))
117
+ wid = data.get("workspaceId")
118
+ if wid:
119
+ mapped = workspace_map.get(wid)
120
+ if mapped:
121
+ paths.append(str(mapped))
122
+ hay = " ".join(paths)
123
+ if not hay:
124
+ continue
125
+ project = classify_project(hay, profiles)
126
+ agent_id = str(data.get("agentRequestId", "")).split("-")[0][:8]
127
+ label = Path(paths[0]).name if paths else "checkpoint"
128
+ detail = f"checkpoint {agent_id}… — {label}"
129
+ results.append(make_event(source_name, ts, detail, project))
130
+ return results
collectors/github.py ADDED
@@ -0,0 +1,169 @@
1
+ """GitHub public activity via REST API (optional; requires username)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from datetime import datetime, timezone
8
+ from typing import Any, Callable, Dict, List, Optional
9
+ from urllib.error import HTTPError, URLError
10
+ from urllib.request import Request, urlopen
11
+
12
+ from core.cli_options import package_version
13
+ from core.sources import GITHUB_SOURCE
14
+
15
+ USER_AGENT = (
16
+ f"timelog-extract/{package_version()} "
17
+ "(+https://github.com/mbjorke/timelog-extract)"
18
+ )
19
+ PER_PAGE = 100
20
+ MAX_PAGES = 10
21
+
22
+
23
+ def resolve_github_username(args: Any) -> str:
24
+ """CLI `--github-user` overrides `GITHUB_USER` / `GITHUB_LOGIN`."""
25
+ explicit = getattr(args, "github_user", None)
26
+ if explicit and str(explicit).strip():
27
+ return str(explicit).strip()
28
+ return (os.environ.get("GITHUB_USER") or os.environ.get("GITHUB_LOGIN") or "").strip()
29
+
30
+
31
+ def github_source_enabled(args: Any) -> tuple[bool, Optional[str]]:
32
+ """Return (enabled, disable_reason)."""
33
+ mode = getattr(args, "github_source", "auto")
34
+ if mode == "off":
35
+ return False, "GitHub source disabled via --github-source off"
36
+ user = resolve_github_username(args)
37
+ if mode == "on" and not user:
38
+ return False, "GitHub on but no username (use --github-user or GITHUB_USER)"
39
+ if mode == "auto" and not user:
40
+ return False, "no GitHub username (set --github-user or GITHUB_USER for this source)"
41
+ if not user:
42
+ return False, "no GitHub username"
43
+ return True, None
44
+
45
+
46
+ def _parse_github_ts(created_at: str) -> datetime:
47
+ if created_at.endswith("Z"):
48
+ created_at = created_at[:-1] + "+00:00"
49
+ return datetime.fromisoformat(created_at)
50
+
51
+
52
+ def _detail_for_event(ev: Dict[str, Any]) -> str:
53
+ et = ev.get("type") or "unknown"
54
+ repo = (ev.get("repo") or {}).get("name") or "unknown/repo"
55
+ payload = ev.get("payload") or {}
56
+
57
+ if et == "PushEvent":
58
+ commits = payload.get("commits") or []
59
+ n = len(commits) if commits else payload.get("size") or 0
60
+ ref = (payload.get("ref") or "").split("/")[-1] or "default"
61
+ return f"push to {repo} ({n} commits, ref {ref})"
62
+
63
+ if et == "PullRequestEvent":
64
+ pr = payload.get("pull_request") or {}
65
+ title = (pr.get("title") or "").strip() or "(no title)"
66
+ action = payload.get("action") or "?"
67
+ num = pr.get("number", "")
68
+ return f"PR #{num} {action}: {title} ({repo})"
69
+
70
+ if et == "IssuesEvent":
71
+ issue = payload.get("issue") or {}
72
+ title = (issue.get("title") or "").strip() or "(no title)"
73
+ action = payload.get("action") or "?"
74
+ num = issue.get("number", "")
75
+ return f"issue #{num} {action}: {title} ({repo})"
76
+
77
+ if et == "CreateEvent":
78
+ ref = payload.get("ref") or ""
79
+ desc = payload.get("description") or ""
80
+ rt = payload.get("ref_type") or "ref"
81
+ extra = f" {desc}" if desc else ""
82
+ return f"created {rt} {ref} in {repo}{extra}"
83
+
84
+ if et == "DeleteEvent":
85
+ ref = payload.get("ref") or ""
86
+ return f"deleted {payload.get('ref_type', 'ref')} {ref} in {repo}"
87
+
88
+ if et == "ReleaseEvent":
89
+ rel = payload.get("release") or {}
90
+ tag = rel.get("tag_name") or rel.get("name") or "release"
91
+ return f"release {tag} ({repo})"
92
+
93
+ if et == "ForkEvent":
94
+ fork = (payload.get("forkee") or {}).get("full_name") or "fork"
95
+ return f"forked {repo} → {fork}"
96
+
97
+ if et == "WatchEvent":
98
+ return f"starred {repo}"
99
+
100
+ return f"{et} ({repo})"
101
+
102
+
103
+ def collect_public_events(
104
+ profiles: List[Dict[str, Any]],
105
+ dt_from: datetime,
106
+ dt_to: datetime,
107
+ *,
108
+ username: str,
109
+ token: Optional[str],
110
+ classify_project: Callable[..., str],
111
+ make_event: Callable[..., Dict[str, Any]],
112
+ ) -> List[Dict[str, Any]]:
113
+ """
114
+ Fetch `/users/{username}/events/public` (newest first; GitHub retains ~300 recent events).
115
+
116
+ Events outside the API window will not appear — sparse for old ranges.
117
+ """
118
+ if not username:
119
+ return []
120
+
121
+ results: List[Dict[str, Any]] = []
122
+ # Normalize bounds to aware UTC for comparison
123
+ if dt_from.tzinfo is None:
124
+ dt_from = dt_from.replace(tzinfo=timezone.utc)
125
+ if dt_to.tzinfo is None:
126
+ dt_to = dt_to.replace(tzinfo=timezone.utc)
127
+ dt_from_utc = dt_from.astimezone(timezone.utc)
128
+ dt_to_utc = dt_to.astimezone(timezone.utc)
129
+
130
+ for page in range(1, MAX_PAGES + 1):
131
+ url = f"https://api.github.com/users/{username}/events/public?per_page={PER_PAGE}&page={page}"
132
+ req = Request(url, headers={"User-Agent": USER_AGENT, "Accept": "application/vnd.github+json"})
133
+ if token:
134
+ req.add_header("Authorization", f"Bearer {token}")
135
+ try:
136
+ with urlopen(req, timeout=30) as resp:
137
+ raw = resp.read().decode("utf-8")
138
+ except HTTPError as exc:
139
+ raise RuntimeError(f"GitHub API HTTP {exc.code}: {exc.reason}") from exc
140
+ except URLError as exc:
141
+ raise RuntimeError(f"GitHub API network error: {exc.reason}") from exc
142
+
143
+ batch = json.loads(raw)
144
+ if not batch:
145
+ break
146
+
147
+ stop_paging = False
148
+ for ev in batch:
149
+ created = ev.get("created_at")
150
+ if not created:
151
+ continue
152
+ ts = _parse_github_ts(created)
153
+ if ts > dt_to_utc:
154
+ continue
155
+ if ts < dt_from_utc:
156
+ stop_paging = True
157
+ break
158
+ detail = _detail_for_event(ev)
159
+ repo = (ev.get("repo") or {}).get("name") or ""
160
+ haystack = f"{repo} {detail}"
161
+ project = classify_project(haystack, profiles)
162
+ results.append(make_event(GITHUB_SOURCE, ts, detail, project))
163
+
164
+ if stop_paging:
165
+ break
166
+ if len(batch) < PER_PAGE:
167
+ break
168
+
169
+ return results