taskclf 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. taskclf/README.md +20 -0
  2. taskclf/__init__.py +2 -0
  3. taskclf/adapters/README.md +41 -0
  4. taskclf/adapters/__init__.py +0 -0
  5. taskclf/adapters/activitywatch/__init__.py +0 -0
  6. taskclf/adapters/activitywatch/client.py +296 -0
  7. taskclf/adapters/activitywatch/mapping.py +178 -0
  8. taskclf/adapters/activitywatch/types.py +48 -0
  9. taskclf/adapters/input/__init__.py +0 -0
  10. taskclf/adapters/input/linux.py +0 -0
  11. taskclf/adapters/input/macos.py +1 -0
  12. taskclf/adapters/input/windows.py +0 -0
  13. taskclf/cli/README.md +54 -0
  14. taskclf/cli/__init__.py +0 -0
  15. taskclf/cli/main.py +2169 -0
  16. taskclf/core/README.md +22 -0
  17. taskclf/core/__init__.py +0 -0
  18. taskclf/core/defaults.py +84 -0
  19. taskclf/core/drift.py +331 -0
  20. taskclf/core/hashing.py +42 -0
  21. taskclf/core/logging.py +88 -0
  22. taskclf/core/metrics.py +395 -0
  23. taskclf/core/model_io.py +220 -0
  24. taskclf/core/schema.py +163 -0
  25. taskclf/core/store.py +51 -0
  26. taskclf/core/telemetry.py +237 -0
  27. taskclf/core/time.py +67 -0
  28. taskclf/core/types.py +244 -0
  29. taskclf/core/validation.py +314 -0
  30. taskclf/features/README.md +36 -0
  31. taskclf/features/__init__.py +0 -0
  32. taskclf/features/build.py +439 -0
  33. taskclf/features/domain.py +138 -0
  34. taskclf/features/dynamics.py +140 -0
  35. taskclf/features/sessions.py +69 -0
  36. taskclf/features/text.py +56 -0
  37. taskclf/features/windows.py +75 -0
  38. taskclf/infer/README.md +38 -0
  39. taskclf/infer/__init__.py +0 -0
  40. taskclf/infer/baseline.py +199 -0
  41. taskclf/infer/batch.py +289 -0
  42. taskclf/infer/calibration.py +326 -0
  43. taskclf/infer/monitor.py +328 -0
  44. taskclf/infer/online.py +515 -0
  45. taskclf/infer/prediction.py +44 -0
  46. taskclf/infer/resolve.py +161 -0
  47. taskclf/infer/smooth.py +253 -0
  48. taskclf/infer/taxonomy.py +341 -0
  49. taskclf/labels/README.md +28 -0
  50. taskclf/labels/__init__.py +0 -0
  51. taskclf/labels/projection.py +99 -0
  52. taskclf/labels/queue.py +247 -0
  53. taskclf/labels/store.py +254 -0
  54. taskclf/labels/weak_rules.py +1 -0
  55. taskclf/model_registry.py +760 -0
  56. taskclf/report/README.md +18 -0
  57. taskclf/report/__init__.py +0 -0
  58. taskclf/report/daily.py +144 -0
  59. taskclf/report/export.py +125 -0
  60. taskclf/train/README.md +31 -0
  61. taskclf/train/__init__.py +0 -0
  62. taskclf/train/build_dataset.py +172 -0
  63. taskclf/train/calibrate.py +267 -0
  64. taskclf/train/dataset.py +98 -0
  65. taskclf/train/evaluate.py +394 -0
  66. taskclf/train/lgbm.py +231 -0
  67. taskclf/train/retrain.py +650 -0
  68. taskclf/ui/__init__.py +0 -0
  69. taskclf/ui/labeling.py +310 -0
  70. taskclf/ui/tray.py +503 -0
  71. taskclf-0.1.0.dist-info/METADATA +281 -0
  72. taskclf-0.1.0.dist-info/RECORD +74 -0
  73. taskclf-0.1.0.dist-info/WHEEL +4 -0
  74. taskclf-0.1.0.dist-info/entry_points.txt +3 -0
taskclf/README.md ADDED
@@ -0,0 +1,20 @@
1
+ # src/taskclf/
2
+
3
+ Main package.
4
+
5
+ ## Design principles
6
+ - **Adapters** isolate unstable platform/tool integrations.
7
+ - **Core** defines contracts, validation, and data/model IO.
8
+ - **Pipelines** compose pure transforms into repeatable runs.
9
+ - **CLI** is the stable interface for humans and automation.
10
+
11
+ ## Subpackages
12
+ - `core/` — schemas, validation, storage primitives, model IO, metrics, drift detection, telemetry
13
+ - `adapters/` — ActivityWatch + input collectors
14
+ - `features/` — feature computation (event -> bucketed features, rolling windows, sessions)
15
+ - `labels/` — label span formats, import/export, projection onto feature windows, active labeling queue, weak label rules
16
+ - `train/` — dataset construction, splits, training, evaluation, calibration, retraining pipeline
17
+ - `infer/` — batch and online inference, rule-based baseline, smoothing, calibration, taxonomy mapping, drift monitoring
18
+ - `report/` — daily summaries and exports (JSON/CSV/Parquet)
19
+ - `cli/` — Typer entrypoint and commands
20
+ - `ui/` — labeling UI (Streamlit)
taskclf/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ def main() -> None:
2
+ print("Hello from taskclf!")
@@ -0,0 +1,41 @@
1
+ # adapters/
2
+
3
+ Integrations with external tools or OS APIs.
4
+
5
+ ## Subpackages
6
+
7
+ ### `activitywatch/`
8
+ - `types.py` -- `AWEvent` (window events) and `AWInputEvent` (keyboard/mouse
9
+ aggregate counts from `aw-watcher-input`).
10
+ - `mapping.py` -- App-name normalization, browser/editor/terminal classification,
11
+ and semantic `app_category` assignment (browser, editor, terminal, chat, email,
12
+ meeting, docs, design, devtools, media, file_manager, utilities, project_mgmt,
13
+ other).
14
+ - `client.py` -- AW JSON export parser and REST API client. Supports both
15
+ `currentwindow` (window watcher) and `os.hid.input` (input watcher) bucket
16
+ types for file-based and REST-based ingestion.
17
+
18
+ ### `input/`
19
+ - Optional OS-specific input aggregators (counts only, not yet implemented).
20
+ The `aw-watcher-input` integration in `activitywatch/` covers the same
21
+ signals when ActivityWatch is running.
22
+
23
+ ## Invariants
24
+ - Adapters must output normalized events satisfying the `core.types.Event` protocol.
25
+ - Raw window titles are never persisted -- they are replaced with salted hashes.
26
+ - App names are mapped to reverse-domain identifiers via the known-app registry.
27
+ - Input events carry only aggregate counts (presses, clicks, movement, scroll)
28
+ -- never individual key identities.
29
+ - Keep adapter-specific quirks out of `core/`.
30
+ - Adapters should be swappable without changing feature or model code.
31
+
32
+ ## Event Protocol
33
+ The `Event` protocol (`core.types.Event`) defines the minimal attribute set
34
+ that any adapter event must expose. `AWEvent` satisfies this protocol
35
+ structurally (no inheritance required). New adapters should likewise expose
36
+ `timestamp`, `duration_seconds`, `app_id`, `window_title_hash`, `is_browser`,
37
+ `is_editor`, `is_terminal`, and `app_category`.
38
+
39
+ `AWInputEvent` is a separate type that does not implement the `Event` protocol
40
+ -- it feeds into the feature builder as a supplementary data source via the
41
+ `input_events` parameter of `build_features_from_aw_events()`.
File without changes
File without changes
@@ -0,0 +1,296 @@
1
+ """ActivityWatch data access: JSON export parsing and REST API client.
2
+
3
+ Provides two data-ingestion paths:
4
+
5
+ * **File-based** -- :func:`parse_aw_export` reads an AW JSON export
6
+ (the format produced by *Export all buckets as JSON* in the AW web UI
7
+ or ``GET /api/0/export``).
8
+ * **REST-based** -- :func:`fetch_aw_events` queries a running
9
+ ``aw-server`` instance for events in a time range.
10
+
11
+ Both paths normalize application names via
12
+ :func:`~taskclf.adapters.activitywatch.mapping.normalize_app` and
13
+ replace raw window titles with salted hashes so that no sensitive text
14
+ is ever persisted.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import logging
21
+ import urllib.request
22
+ from datetime import datetime, timezone
23
+ from pathlib import Path
24
+ from typing import Any
25
+
26
+ from taskclf.adapters.activitywatch.mapping import normalize_app
27
+ from taskclf.adapters.activitywatch.types import AWEvent, AWInputEvent
28
+ from taskclf.core.defaults import DEFAULT_AW_TIMEOUT_SECONDS
29
+ from taskclf.core.hashing import salted_hash
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ _CURRENTWINDOW_TYPE = "currentwindow"
34
+ _INPUT_TYPE = "os.hid.input"
35
+
36
+
37
+ def _parse_timestamp(raw: str) -> datetime:
38
+ """Parse an ISO-8601 timestamp from AW into a naive-UTC datetime."""
39
+ ts = datetime.fromisoformat(raw)
40
+ if ts.tzinfo is not None:
41
+ ts = ts.astimezone(timezone.utc).replace(tzinfo=None)
42
+ return ts
43
+
44
+
45
+ def _raw_event_to_aw_event(raw: dict[str, Any], *, title_salt: str) -> AWEvent:
46
+ """Convert a single raw AW event dict into a normalized :class:`AWEvent`."""
47
+ data = raw.get("data", {})
48
+ app_name = data.get("app", "unknown")
49
+ title = data.get("title", "")
50
+
51
+ app_id, is_browser, is_editor, is_terminal, app_category = normalize_app(app_name)
52
+ title_hash = salted_hash(title, salt=title_salt)
53
+
54
+ return AWEvent(
55
+ timestamp=_parse_timestamp(raw["timestamp"]),
56
+ duration_seconds=float(raw.get("duration", 0)),
57
+ app_id=app_id,
58
+ window_title_hash=title_hash,
59
+ is_browser=is_browser,
60
+ is_editor=is_editor,
61
+ is_terminal=is_terminal,
62
+ app_category=app_category,
63
+ )
64
+
65
+
66
+ # ---------------------------------------------------------------------------
67
+ # File-based ingestion
68
+ # ---------------------------------------------------------------------------
69
+
70
+
71
+ def parse_aw_export(path: Path, *, title_salt: str) -> list[AWEvent]:
72
+ """Parse an ActivityWatch JSON export file into normalized events.
73
+
74
+ Filters for buckets of type ``currentwindow`` (i.e.
75
+ ``aw-watcher-window`` data). Each event's application name is
76
+ normalized and its window title is replaced with a salted hash.
77
+
78
+ Args:
79
+ path: Path to the AW export JSON file.
80
+ title_salt: Salt used for hashing window titles.
81
+
82
+ Returns:
83
+ Sorted (by timestamp) list of :class:`AWEvent` instances.
84
+
85
+ Raises:
86
+ FileNotFoundError: If *path* does not exist.
87
+ KeyError: If the JSON structure is missing expected keys.
88
+ """
89
+ raw = json.loads(path.read_text(encoding="utf-8"))
90
+
91
+ buckets: dict[str, Any] = raw.get("buckets", raw)
92
+
93
+ events: list[AWEvent] = []
94
+ for bucket_id, bucket in buckets.items():
95
+ bucket_type = bucket.get("type", "")
96
+ if bucket_type != _CURRENTWINDOW_TYPE:
97
+ logger.debug("Skipping bucket %s (type=%s)", bucket_id, bucket_type)
98
+ continue
99
+
100
+ logger.info(
101
+ "Processing bucket %s (%d events)",
102
+ bucket_id,
103
+ len(bucket.get("events", [])),
104
+ )
105
+ for raw_event in bucket.get("events", []):
106
+ events.append(_raw_event_to_aw_event(raw_event, title_salt=title_salt))
107
+
108
+ events.sort(key=lambda e: e.timestamp)
109
+ return events
110
+
111
+
112
+ def _raw_to_input_event(raw: dict[str, Any]) -> AWInputEvent:
113
+ """Convert a single raw AW input event dict into an :class:`AWInputEvent`."""
114
+ data = raw.get("data", {})
115
+ return AWInputEvent(
116
+ timestamp=_parse_timestamp(raw["timestamp"]),
117
+ duration_seconds=float(raw.get("duration", 0)),
118
+ presses=int(data.get("presses", 0)),
119
+ clicks=int(data.get("clicks", 0)),
120
+ delta_x=int(data.get("deltaX", 0)),
121
+ delta_y=int(data.get("deltaY", 0)),
122
+ scroll_x=int(data.get("scrollX", 0)),
123
+ scroll_y=int(data.get("scrollY", 0)),
124
+ )
125
+
126
+
127
+ def parse_aw_input_export(path: Path) -> list[AWInputEvent]:
128
+ """Parse ``aw-watcher-input`` events from an AW JSON export.
129
+
130
+ Filters for buckets of type ``os.hid.input``. These events carry
131
+ only aggregate counts (key presses, mouse clicks, movement, scroll)
132
+ and contain no sensitive payload.
133
+
134
+ Args:
135
+ path: Path to the AW export JSON file.
136
+
137
+ Returns:
138
+ Sorted (by timestamp) list of :class:`AWInputEvent` instances.
139
+ Empty if no ``os.hid.input`` bucket exists in the export.
140
+ """
141
+ raw = json.loads(path.read_text(encoding="utf-8"))
142
+ buckets: dict[str, Any] = raw.get("buckets", raw)
143
+
144
+ events: list[AWInputEvent] = []
145
+ for bucket_id, bucket in buckets.items():
146
+ bucket_type = bucket.get("type", "")
147
+ if bucket_type != _INPUT_TYPE:
148
+ continue
149
+
150
+ logger.info(
151
+ "Processing input bucket %s (%d events)",
152
+ bucket_id,
153
+ len(bucket.get("events", [])),
154
+ )
155
+ for raw_event in bucket.get("events", []):
156
+ events.append(_raw_to_input_event(raw_event))
157
+
158
+ events.sort(key=lambda e: e.timestamp)
159
+ return events
160
+
161
+
162
+ # ---------------------------------------------------------------------------
163
+ # REST API helpers
164
+ # ---------------------------------------------------------------------------
165
+
166
+
167
+ def _api_get(url: str) -> Any:
168
+ """Issue a GET request and return the parsed JSON body."""
169
+ req = urllib.request.Request(url, headers={"Accept": "application/json"})
170
+ with urllib.request.urlopen(req, timeout=DEFAULT_AW_TIMEOUT_SECONDS) as resp:
171
+ return json.loads(resp.read().decode("utf-8"))
172
+
173
+
174
+ def list_aw_buckets(host: str) -> dict[str, dict]:
175
+ """List all buckets from a running AW server.
176
+
177
+ Args:
178
+ host: Base URL of the AW server (e.g. ``"http://localhost:5600"``).
179
+
180
+ Returns:
181
+ Dict mapping bucket IDs to their metadata.
182
+ """
183
+ url = f"{host.rstrip('/')}/api/0/buckets/"
184
+ return _api_get(url)
185
+
186
+
187
+ def find_window_bucket_id(host: str) -> str:
188
+ """Auto-discover the ``aw-watcher-window`` bucket on *host*.
189
+
190
+ Args:
191
+ host: Base URL of the AW server.
192
+
193
+ Returns:
194
+ The bucket ID whose ``type`` is ``currentwindow``.
195
+
196
+ Raises:
197
+ ValueError: If no ``currentwindow`` bucket exists on the server.
198
+ """
199
+ buckets = list_aw_buckets(host)
200
+ for bucket_id, meta in buckets.items():
201
+ if meta.get("type") == _CURRENTWINDOW_TYPE:
202
+ return bucket_id
203
+ raise ValueError(
204
+ f"No bucket with type={_CURRENTWINDOW_TYPE!r} found on {host}. "
205
+ f"Available: {list(buckets.keys())}"
206
+ )
207
+
208
+
209
+ def find_input_bucket_id(host: str) -> str | None:
210
+ """Auto-discover the ``aw-watcher-input`` bucket on *host*.
211
+
212
+ Unlike :func:`find_window_bucket_id`, this returns ``None`` when no
213
+ input bucket exists because ``aw-watcher-input`` is an optional
214
+ watcher that many users don't run.
215
+
216
+ Args:
217
+ host: Base URL of the AW server.
218
+
219
+ Returns:
220
+ The bucket ID whose ``type`` is ``os.hid.input``, or ``None``.
221
+ """
222
+ buckets = list_aw_buckets(host)
223
+ for bucket_id, meta in buckets.items():
224
+ if meta.get("type") == _INPUT_TYPE:
225
+ return bucket_id
226
+ return None
227
+
228
+
229
+ def fetch_aw_events(
230
+ host: str,
231
+ bucket_id: str,
232
+ start: datetime,
233
+ end: datetime,
234
+ *,
235
+ title_salt: str,
236
+ ) -> list[AWEvent]:
237
+ """Fetch events from the AW REST API for a time range.
238
+
239
+ Args:
240
+ host: Base URL of the AW server (e.g. ``"http://localhost:5600"``).
241
+ bucket_id: Bucket to query (e.g. ``"aw-watcher-window_myhostname"``).
242
+ start: Inclusive start of the query window (UTC).
243
+ end: Exclusive end of the query window (UTC).
244
+ title_salt: Salt used for hashing window titles.
245
+
246
+ Returns:
247
+ Sorted list of :class:`AWEvent` instances.
248
+ """
249
+ base = host.rstrip("/")
250
+ start_iso = start.isoformat() + "Z" if start.tzinfo is None else start.isoformat()
251
+ end_iso = end.isoformat() + "Z" if end.tzinfo is None else end.isoformat()
252
+
253
+ url = (
254
+ f"{base}/api/0/buckets/{bucket_id}/events"
255
+ f"?start={start_iso}&end={end_iso}"
256
+ )
257
+ raw_events: list[dict] = _api_get(url)
258
+
259
+ events = [
260
+ _raw_event_to_aw_event(e, title_salt=title_salt) for e in raw_events
261
+ ]
262
+ events.sort(key=lambda e: e.timestamp)
263
+ return events
264
+
265
+
266
+ def fetch_aw_input_events(
267
+ host: str,
268
+ bucket_id: str,
269
+ start: datetime,
270
+ end: datetime,
271
+ ) -> list[AWInputEvent]:
272
+ """Fetch input events from the AW REST API for a time range.
273
+
274
+ Args:
275
+ host: Base URL of the AW server.
276
+ bucket_id: Input bucket to query (e.g.
277
+ ``"aw-watcher-input_myhostname"``).
278
+ start: Inclusive start of the query window (UTC).
279
+ end: Exclusive end of the query window (UTC).
280
+
281
+ Returns:
282
+ Sorted list of :class:`AWInputEvent` instances.
283
+ """
284
+ base = host.rstrip("/")
285
+ start_iso = start.isoformat() + "Z" if start.tzinfo is None else start.isoformat()
286
+ end_iso = end.isoformat() + "Z" if end.tzinfo is None else end.isoformat()
287
+
288
+ url = (
289
+ f"{base}/api/0/buckets/{bucket_id}/events"
290
+ f"?start={start_iso}&end={end_iso}"
291
+ )
292
+ raw_events: list[dict] = _api_get(url)
293
+
294
+ events = [_raw_to_input_event(e) for e in raw_events]
295
+ events.sort(key=lambda e: e.timestamp)
296
+ return events
@@ -0,0 +1,178 @@
1
+ """App-name normalization, classification, and category assignment.
2
+
3
+ ActivityWatch reports the foreground application as a human-readable
4
+ name (e.g. ``"Firefox"``, ``"Code"``). This module maps those names
5
+ to reverse-domain identifiers, boolean flags, and a semantic category
6
+ consumed by :class:`~taskclf.core.types.FeatureRow`.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Final
12
+
13
+ AppInfo = tuple[str, bool, bool, bool, str]
14
+ # (app_id, is_browser, is_editor, is_terminal, app_category)
15
+
16
+ # ---- known application registry ------------------------------------------------
17
+ # (reverse_domain_id, is_browser, is_editor, is_terminal, app_category)
18
+
19
+ _BROWSERS: Final[dict[str, AppInfo]] = {
20
+ "firefox": ("org.mozilla.firefox", True, False, False, "browser"),
21
+ "google chrome": ("com.google.Chrome", True, False, False, "browser"),
22
+ "google-chrome": ("com.google.Chrome", True, False, False, "browser"),
23
+ "chrome": ("com.google.Chrome", True, False, False, "browser"),
24
+ "chromium": ("org.chromium.Chromium", True, False, False, "browser"),
25
+ "chromium-browser": ("org.chromium.Chromium", True, False, False, "browser"),
26
+ "safari": ("com.apple.Safari", True, False, False, "browser"),
27
+ "arc": ("company.thebrowser.Browser", True, False, False, "browser"),
28
+ "brave browser": ("com.brave.Browser", True, False, False, "browser"),
29
+ "brave-browser": ("com.brave.Browser", True, False, False, "browser"),
30
+ "microsoft edge": ("com.microsoft.edgemac", True, False, False, "browser"),
31
+ "msedge": ("com.microsoft.edgemac", True, False, False, "browser"),
32
+ "vivaldi": ("com.vivaldi.Vivaldi", True, False, False, "browser"),
33
+ "opera": ("com.operasoftware.Opera", True, False, False, "browser"),
34
+ "zen browser": ("io.github.nicothin.zen", True, False, False, "browser"),
35
+ }
36
+
37
+ _EDITORS: Final[dict[str, AppInfo]] = {
38
+ "code": ("com.microsoft.VSCode", False, True, False, "editor"),
39
+ "visual studio code": ("com.microsoft.VSCode", False, True, False, "editor"),
40
+ "code - insiders": ("com.microsoft.VSCodeInsiders", False, True, False, "editor"),
41
+ "cursor": ("com.todesktop.cursor", False, True, False, "editor"),
42
+ "intellij idea": ("com.jetbrains.intellij", False, True, False, "editor"),
43
+ "idea": ("com.jetbrains.intellij", False, True, False, "editor"),
44
+ "pycharm": ("com.jetbrains.pycharm", False, True, False, "editor"),
45
+ "webstorm": ("com.jetbrains.webstorm", False, True, False, "editor"),
46
+ "goland": ("com.jetbrains.goland", False, True, False, "editor"),
47
+ "clion": ("com.jetbrains.clion", False, True, False, "editor"),
48
+ "rustrover": ("com.jetbrains.rustrover", False, True, False, "editor"),
49
+ "sublime text": ("com.sublimetext.4", False, True, False, "editor"),
50
+ "sublime_text": ("com.sublimetext.4", False, True, False, "editor"),
51
+ "vim": ("org.vim.Vim", False, True, False, "editor"),
52
+ "gvim": ("org.vim.Vim", False, True, False, "editor"),
53
+ "neovim": ("io.neovim.nvim", False, True, False, "editor"),
54
+ "nvim": ("io.neovim.nvim", False, True, False, "editor"),
55
+ "emacs": ("org.gnu.emacs", False, True, False, "editor"),
56
+ "xcode": ("com.apple.dt.Xcode", False, True, False, "editor"),
57
+ "android studio": ("com.google.android.studio", False, True, False, "editor"),
58
+ "zed": ("dev.zed.Zed", False, True, False, "editor"),
59
+ }
60
+
61
+ _TERMINALS: Final[dict[str, AppInfo]] = {
62
+ "terminal": ("com.apple.Terminal", False, False, True, "terminal"),
63
+ "iterm2": ("com.googlecode.iterm2", False, False, True, "terminal"),
64
+ "iterm": ("com.googlecode.iterm2", False, False, True, "terminal"),
65
+ "alacritty": ("org.alacritty", False, False, True, "terminal"),
66
+ "kitty": ("net.kovidgoyal.kitty", False, False, True, "terminal"),
67
+ "wezterm": ("org.wezfurlong.wezterm", False, False, True, "terminal"),
68
+ "wezterm-gui": ("org.wezfurlong.wezterm", False, False, True, "terminal"),
69
+ "gnome-terminal": ("org.gnome.Terminal", False, False, True, "terminal"),
70
+ "konsole": ("org.kde.konsole", False, False, True, "terminal"),
71
+ "hyper": ("co.zeit.hyper", False, False, True, "terminal"),
72
+ "ghostty": ("com.mitchellh.ghostty", False, False, True, "terminal"),
73
+ "rio": ("io.raphamorim.rio", False, False, True, "terminal"),
74
+ "warp": ("dev.warp.Warp", False, False, True, "terminal"),
75
+ "tabby": ("org.tabby.Tabby", False, False, True, "terminal"),
76
+ }
77
+
78
+ _EMAIL: Final[dict[str, AppInfo]] = {
79
+ "mail": ("com.apple.mail", False, False, False, "email"),
80
+ "thunderbird": ("org.mozilla.thunderbird", False, False, False, "email"),
81
+ "outlook": ("com.microsoft.Outlook", False, False, False, "email"),
82
+ }
83
+
84
+ _CHAT: Final[dict[str, AppInfo]] = {
85
+ "slack": ("com.tinyspeck.slackmacgap", False, False, False, "chat"),
86
+ "discord": ("com.discordapp.Discord", False, False, False, "chat"),
87
+ "microsoft teams": ("com.microsoft.teams2", False, False, False, "chat"),
88
+ "teams": ("com.microsoft.teams2", False, False, False, "chat"),
89
+ "messages": ("com.apple.MobileSMS", False, False, False, "chat"),
90
+ }
91
+
92
+ _MEETING: Final[dict[str, AppInfo]] = {
93
+ "zoom": ("us.zoom.xos", False, False, False, "meeting"),
94
+ "zoom.us": ("us.zoom.xos", False, False, False, "meeting"),
95
+ "facetime": ("com.apple.FaceTime", False, False, False, "meeting"),
96
+ }
97
+
98
+ _DOCS: Final[dict[str, AppInfo]] = {
99
+ "notes": ("com.apple.Notes", False, False, False, "docs"),
100
+ "obsidian": ("md.obsidian", False, False, False, "docs"),
101
+ "notion": ("notion.id", False, False, False, "docs"),
102
+ }
103
+
104
+ _DESIGN: Final[dict[str, AppInfo]] = {
105
+ "figma": ("com.figma.Desktop", False, False, False, "design"),
106
+ }
107
+
108
+ _DEVTOOLS: Final[dict[str, AppInfo]] = {
109
+ "postman": ("com.postmanlabs.mac", False, False, False, "devtools"),
110
+ "docker desktop": ("com.docker.docker", False, False, False, "devtools"),
111
+ }
112
+
113
+ _MEDIA: Final[dict[str, AppInfo]] = {
114
+ "spotify": ("com.spotify.client", False, False, False, "media"),
115
+ }
116
+
117
+ _FILE_MANAGER: Final[dict[str, AppInfo]] = {
118
+ "finder": ("com.apple.finder", False, False, False, "file_manager"),
119
+ "nautilus": ("org.gnome.Nautilus", False, False, False, "file_manager"),
120
+ "files": ("org.gnome.Nautilus", False, False, False, "file_manager"),
121
+ }
122
+
123
+ _UTILITIES: Final[dict[str, AppInfo]] = {
124
+ "preview": ("com.apple.Preview", False, False, False, "utilities"),
125
+ "system preferences": ("com.apple.systempreferences", False, False, False, "utilities"),
126
+ "system settings": ("com.apple.systempreferences", False, False, False, "utilities"),
127
+ "activity monitor": ("com.apple.ActivityMonitor", False, False, False, "utilities"),
128
+ "1password": ("com.1password.1password", False, False, False, "utilities"),
129
+ "bitwarden": ("com.bitwarden.desktop", False, False, False, "utilities"),
130
+ }
131
+
132
+ _PROJECT_MGMT: Final[dict[str, AppInfo]] = {
133
+ "linear": ("com.linear", False, False, False, "project_mgmt"),
134
+ }
135
+
136
+ KNOWN_APPS: Final[dict[str, AppInfo]] = {
137
+ **_BROWSERS,
138
+ **_EDITORS,
139
+ **_TERMINALS,
140
+ **_EMAIL,
141
+ **_CHAT,
142
+ **_MEETING,
143
+ **_DOCS,
144
+ **_DESIGN,
145
+ **_DEVTOOLS,
146
+ **_MEDIA,
147
+ **_FILE_MANAGER,
148
+ **_UTILITIES,
149
+ **_PROJECT_MGMT,
150
+ }
151
+
152
+ APP_CATEGORIES: Final[frozenset[str]] = frozenset({
153
+ "browser", "editor", "terminal", "email", "chat", "meeting",
154
+ "docs", "design", "devtools", "media", "file_manager",
155
+ "utilities", "project_mgmt", "other",
156
+ })
157
+
158
+
159
+ def normalize_app(app_name: str) -> AppInfo:
160
+ """Map an AW application name to a reverse-domain ID, flags, and category.
161
+
162
+ Performs a case-insensitive lookup in :data:`KNOWN_APPS`. Unknown
163
+ applications fall back to ``"unknown.<sanitized_name>"`` with all
164
+ flags set to ``False`` and category ``"other"``.
165
+
166
+ Args:
167
+ app_name: Application name as reported by ActivityWatch
168
+ (e.g. ``"Firefox"``).
169
+
170
+ Returns:
171
+ A ``(app_id, is_browser, is_editor, is_terminal, app_category)``
172
+ tuple.
173
+ """
174
+ key = app_name.strip().lower()
175
+ if key in KNOWN_APPS:
176
+ return KNOWN_APPS[key]
177
+ sanitized = key.replace(" ", "_").replace("/", "_")
178
+ return (f"unknown.{sanitized}", False, False, False, "other")
@@ -0,0 +1,48 @@
1
+ """Privacy-safe normalized event types for ActivityWatch data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class AWEvent(BaseModel, frozen=True):
11
+ """A single ActivityWatch window event, normalized and privacy-scrubbed.
12
+
13
+ Raw ``app`` names are mapped to reverse-domain identifiers via
14
+ :func:`~taskclf.adapters.activitywatch.mapping.normalize_app`.
15
+ Raw ``title`` strings are replaced with a salted hash via
16
+ :func:`~taskclf.core.hashing.salted_hash` -- the original title
17
+ is never persisted.
18
+ """
19
+
20
+ timestamp: datetime = Field(description="Event start (UTC).")
21
+ duration_seconds: float = Field(ge=0, description="Duration in seconds.")
22
+ app_id: str = Field(description="Reverse-domain app identifier.")
23
+ window_title_hash: str = Field(description="Salted SHA-256 of the window title.")
24
+ is_browser: bool = Field(description="True if the app is a web browser.")
25
+ is_editor: bool = Field(description="True if the app is a code editor.")
26
+ is_terminal: bool = Field(description="True if the app is a terminal emulator.")
27
+ app_category: str = Field(description="Semantic app category (e.g. 'editor', 'chat').")
28
+
29
+
30
+ class AWInputEvent(BaseModel, frozen=True):
31
+ """Aggregated keyboard/mouse activity from ``aw-watcher-input``.
32
+
33
+ Each event covers a short polling interval (typically 5 s) and
34
+ carries only aggregate counts -- never individual key identities.
35
+ This makes the type privacy-safe by construction.
36
+
37
+ The upstream AW fields ``deltaX``/``deltaY`` and ``scrollX``/``scrollY``
38
+ are mapped to snake_case for consistency with project conventions.
39
+ """
40
+
41
+ timestamp: datetime = Field(description="Interval start (UTC).")
42
+ duration_seconds: float = Field(ge=0, description="Duration in seconds.")
43
+ presses: int = Field(ge=0, description="Key presses in this interval.")
44
+ clicks: int = Field(ge=0, description="Mouse clicks in this interval.")
45
+ delta_x: int = Field(ge=0, description="Absolute horizontal mouse movement (px).")
46
+ delta_y: int = Field(ge=0, description="Absolute vertical mouse movement (px).")
47
+ scroll_x: int = Field(ge=0, description="Absolute horizontal scroll delta.")
48
+ scroll_y: int = Field(ge=0, description="Absolute vertical scroll delta.")
File without changes
File without changes
@@ -0,0 +1 @@
1
+ # optional: HID events aggregator
File without changes
taskclf/cli/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # cli/
2
+
3
+ Human-facing command surface area (stable).
4
+
5
+ ## Command Groups
6
+
7
+ ### `ingest`
8
+ - `aw` — ingest ActivityWatch JSON export into privacy-safe events
9
+
10
+ ### `features`
11
+ - `build` — build per-minute feature rows for a date
12
+
13
+ ### `labels`
14
+ - `import` — import label spans from CSV
15
+ - `add-block` — create a manual label block for a time range
16
+ - `show-queue` — show pending items in the active labeling queue
17
+ - `project` — project label blocks onto feature windows
18
+
19
+ ### `train`
20
+ - `build-dataset` — build training dataset (join features + labels, split)
21
+ - `lgbm` — train a LightGBM multiclass model
22
+ - `evaluate` — run full evaluation (metrics, calibration, acceptance checks)
23
+ - `tune-reject` — sweep reject thresholds and recommend best
24
+ - `calibrate` — fit per-user probability calibrators
25
+ - `retrain` — run full retrain pipeline (train, evaluate, gate-check, promote)
26
+ - `check-retrain` — check if retraining or calibrator update is due
27
+
28
+ ### `taxonomy`
29
+ - `validate` — validate a taxonomy YAML file
30
+ - `show` — display taxonomy mapping as a Rich table
31
+ - `init` — generate default taxonomy YAML
32
+
33
+ ### `infer`
34
+ - `batch` — batch inference (predict, smooth, segmentize)
35
+ - `online` — online inference loop (poll ActivityWatch)
36
+ - `baseline` — rule-based baseline inference (no ML model)
37
+ - `compare` — compare baseline vs ML model on labeled data
38
+
39
+ ### `report`
40
+ - `daily` — generate daily report from segments JSON
41
+
42
+ ### `monitor`
43
+ - `drift-check` — run drift detection comparing reference vs current
44
+ - `telemetry` — compute telemetry snapshot and append to store
45
+ - `show` — display recent telemetry snapshots
46
+
47
+ ## Responsibilities
48
+ - Typer app entrypoint
49
+ - Commands call pipeline functions (thin wrapper)
50
+ - Provide consistent flags and defaults (via `core.defaults`)
51
+
52
+ ## Invariants
53
+ - CLI should remain backward compatible whenever possible.
54
+ - Keep business logic out of CLI; delegate to packages.
File without changes