systemu 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. sharing_on/__init__.py +3 -0
  2. sharing_on/__main__.py +6 -0
  3. sharing_on/analyzer/__init__.py +1 -0
  4. sharing_on/analyzer/generator.py +332 -0
  5. sharing_on/analyzer/intent_extractor.py +292 -0
  6. sharing_on/analyzer/step_detector.py +318 -0
  7. sharing_on/analyzer/unifier.py +268 -0
  8. sharing_on/cli.py +936 -0
  9. sharing_on/collectors/__init__.py +1 -0
  10. sharing_on/collectors/base.py +85 -0
  11. sharing_on/collectors/clipboard.py +204 -0
  12. sharing_on/collectors/filesystem.py +236 -0
  13. sharing_on/collectors/input_hook.py +178 -0
  14. sharing_on/collectors/introspectors/__init__.py +22 -0
  15. sharing_on/collectors/introspectors/base.py +80 -0
  16. sharing_on/collectors/introspectors/linux.py +34 -0
  17. sharing_on/collectors/introspectors/macos.py +34 -0
  18. sharing_on/collectors/introspectors/windows.py +114 -0
  19. sharing_on/collectors/process.py +123 -0
  20. sharing_on/collectors/screen.py +89 -0
  21. sharing_on/collectors/web_extension.py +135 -0
  22. sharing_on/collectors/window.py +205 -0
  23. sharing_on/config.py +276 -0
  24. sharing_on/events/__init__.py +1 -0
  25. sharing_on/events/models.py +136 -0
  26. sharing_on/events/store.py +187 -0
  27. sharing_on/output/__init__.py +1 -0
  28. sharing_on/output/markdown.py +321 -0
  29. sharing_on/platform_info.py +160 -0
  30. sharing_on/redactor.py +100 -0
  31. sharing_on/session.py +267 -0
  32. systemu/__init__.py +2 -0
  33. systemu/abstractions/__init__.py +22 -0
  34. systemu/abstractions/approval_gate.py +63 -0
  35. systemu/abstractions/event_broker.py +102 -0
  36. systemu/abstractions/task_queue.py +56 -0
  37. systemu/abstractions/vault.py +125 -0
  38. systemu/approval/__init__.py +6 -0
  39. systemu/approval/notification_gate.py +60 -0
  40. systemu/approval/sqlite_approval_gate.py +285 -0
  41. systemu/core/__init__.py +1 -0
  42. systemu/core/llm_router.py +501 -0
  43. systemu/core/memory_types.py +236 -0
  44. systemu/core/models.py +462 -0
  45. systemu/core/utils.py +45 -0
  46. systemu/elder/__init__.py +5 -0
  47. systemu/elder/memory.py +125 -0
  48. systemu/events/__init__.py +6 -0
  49. systemu/events/memory_event_broker.py +96 -0
  50. systemu/events/sqlite_event_broker.py +410 -0
  51. systemu/interface/__init__.py +1 -0
  52. systemu/interface/cli_commands.py +1300 -0
  53. systemu/interface/components/__init__.py +11 -0
  54. systemu/interface/components/learning_curves.py +79 -0
  55. systemu/interface/components/memory_status.py +130 -0
  56. systemu/interface/components/pending_deps.py +211 -0
  57. systemu/interface/components/pending_tools.py +81 -0
  58. systemu/interface/components/skills_snapshot.py +87 -0
  59. systemu/interface/components/workflow_pipeline.py +145 -0
  60. systemu/interface/dashboard.py +710 -0
  61. systemu/interface/dashboard_state.py +619 -0
  62. systemu/interface/event_bus.py +424 -0
  63. systemu/interface/jobs.py +235 -0
  64. systemu/interface/notifications.py +334 -0
  65. systemu/interface/pages/__init__.py +3 -0
  66. systemu/interface/pages/activities.py +406 -0
  67. systemu/interface/pages/army.py +405 -0
  68. systemu/interface/pages/chat_page.py +192 -0
  69. systemu/interface/pages/evolutions.py +182 -0
  70. systemu/interface/pages/flywheel_page.py +363 -0
  71. systemu/interface/pages/memory_consolidation_page.py +422 -0
  72. systemu/interface/pages/notifications_page.py +361 -0
  73. systemu/interface/pages/overview.py +330 -0
  74. systemu/interface/pages/recover.py +133 -0
  75. systemu/interface/pages/scrolls.py +288 -0
  76. systemu/interface/pages/settings.py +159 -0
  77. systemu/interface/pages/shadow_memory_page.py +164 -0
  78. systemu/interface/pages/skills_page.py +232 -0
  79. systemu/interface/pages/systemu_chat.py +703 -0
  80. systemu/interface/pages/tools.py +1085 -0
  81. systemu/interface/pages/workflow_detail.py +370 -0
  82. systemu/interface/pages/workshop.py +621 -0
  83. systemu/interface/ui_helpers.py +58 -0
  84. systemu/llm/__init__.py +1 -0
  85. systemu/llm/providers/__init__.py +53 -0
  86. systemu/llm/providers/anthropic.py +57 -0
  87. systemu/llm/providers/base.py +40 -0
  88. systemu/llm/providers/google.py +36 -0
  89. systemu/llm/providers/ollama.py +39 -0
  90. systemu/llm/providers/openai.py +35 -0
  91. systemu/llm/providers/openrouter.py +50 -0
  92. systemu/messaging/__init__.py +30 -0
  93. systemu/messaging/event_pusher.py +201 -0
  94. systemu/messaging/gateway.py +208 -0
  95. systemu/messaging/handlers.py +182 -0
  96. systemu/messaging/telegram_gateway.py +261 -0
  97. systemu/migrations/__init__.py +1 -0
  98. systemu/migrations/json_to_db.py +256 -0
  99. systemu/pipelines/__init__.py +1 -0
  100. systemu/pipelines/activity_extractor.py +627 -0
  101. systemu/pipelines/cross_shadow_patterns.py +215 -0
  102. systemu/pipelines/direct_task.py +282 -0
  103. systemu/pipelines/evolution_engine.py +466 -0
  104. systemu/pipelines/evolution_policy.py +132 -0
  105. systemu/pipelines/memory_consolidator.py +167 -0
  106. systemu/pipelines/refinery.py +468 -0
  107. systemu/pipelines/scroll_refiner.py +610 -0
  108. systemu/pipelines/scroll_remediator.py +294 -0
  109. systemu/pipelines/scroll_validator.py +278 -0
  110. systemu/pipelines/shadow_decision.py +642 -0
  111. systemu/pipelines/skill_exporter.py +75 -0
  112. systemu/pipelines/skill_recalibrator.py +341 -0
  113. systemu/pipelines/skill_validator.py +195 -0
  114. systemu/pipelines/tool_dry_run.py +460 -0
  115. systemu/pipelines/tool_forge.py +566 -0
  116. systemu/pipelines/tool_inadequacy_diagnosis.py +214 -0
  117. systemu/pipelines/tool_recalibrator.py +631 -0
  118. systemu/pipelines/tool_service.py +143 -0
  119. systemu/pipelines/workshop_module.py +136 -0
  120. systemu/queue/__init__.py +5 -0
  121. systemu/queue/huey_app.py +352 -0
  122. systemu/queue/huey_task_queue.py +184 -0
  123. systemu/queue/protocol.py +134 -0
  124. systemu/queue/redis_priority_queue.py +432 -0
  125. systemu/queue/sqlite_priority_queue.py +307 -0
  126. systemu/queue/thread_task_queue.py +57 -0
  127. systemu/recovery/__init__.py +1 -0
  128. systemu/recovery/classifier.py +27 -0
  129. systemu/recovery/engine.py +177 -0
  130. systemu/recovery/links.py +17 -0
  131. systemu/runtime/__init__.py +1 -0
  132. systemu/runtime/affinity_log.py +232 -0
  133. systemu/runtime/backend/__init__.py +76 -0
  134. systemu/runtime/backend/docker.py +115 -0
  135. systemu/runtime/backend/local.py +179 -0
  136. systemu/runtime/backend/protocol.py +109 -0
  137. systemu/runtime/backend/ssh.py +52 -0
  138. systemu/runtime/backend/wsl.py +50 -0
  139. systemu/runtime/context_builder.py +468 -0
  140. systemu/runtime/dep_approvals.py +403 -0
  141. systemu/runtime/dep_conflicts.py +193 -0
  142. systemu/runtime/dependency_installer.py +521 -0
  143. systemu/runtime/execution_mind.py +561 -0
  144. systemu/runtime/execution_snapshot.py +284 -0
  145. systemu/runtime/failure_classifier.py +316 -0
  146. systemu/runtime/failure_telemetry.py +274 -0
  147. systemu/runtime/inadequacy_tracker.py +202 -0
  148. systemu/runtime/interpreter_check.py +277 -0
  149. systemu/runtime/memory_backends/__init__.py +20 -0
  150. systemu/runtime/memory_backends/base.py +26 -0
  151. systemu/runtime/memory_backends/filesystem.py +51 -0
  152. systemu/runtime/memory_backends/mem0.py +66 -0
  153. systemu/runtime/memory_consolidator.py +183 -0
  154. systemu/runtime/memory_invalidator.py +213 -0
  155. systemu/runtime/memory_recall.py +84 -0
  156. systemu/runtime/metrics_tracker.py +195 -0
  157. systemu/runtime/rejection_store.py +273 -0
  158. systemu/runtime/shadow_metrics.py +263 -0
  159. systemu/runtime/shadow_runtime.py +2013 -0
  160. systemu/runtime/specialty_suggester.py +221 -0
  161. systemu/runtime/supervisor.py +1375 -0
  162. systemu/runtime/supervisor_cost_ledger.py +257 -0
  163. systemu/runtime/tool_metrics.py +285 -0
  164. systemu/runtime/tool_registry.py +522 -0
  165. systemu/runtime/tool_sandbox.py +242 -0
  166. systemu/runtime/workflow_tracker.py +410 -0
  167. systemu/scheduler/__init__.py +1 -0
  168. systemu/scheduler/daemon.py +487 -0
  169. systemu/scheduler/jobs.py +891 -0
  170. systemu/storage/__init__.py +6 -0
  171. systemu/storage/file_vault.py +196 -0
  172. systemu/storage/parallel_vault.py +304 -0
  173. systemu/storage/skill_migrator.py +122 -0
  174. systemu/storage/sqlite/__init__.py +5 -0
  175. systemu/storage/sqlite/models.py +360 -0
  176. systemu/storage/sqlite/vault.py +1534 -0
  177. systemu/vault/__init__.py +1 -0
  178. systemu/vault/factory.py +132 -0
  179. systemu/vault/tools/implementations/api_call_get.py +66 -0
  180. systemu/vault/tools/implementations/browser_navigate.py +69 -0
  181. systemu/vault/tools/implementations/calculate_rsi.py +86 -0
  182. systemu/vault/tools/implementations/calculate_sma.py +50 -0
  183. systemu/vault/tools/implementations/clipboard_read.py +20 -0
  184. systemu/vault/tools/implementations/clipboard_write.py +22 -0
  185. systemu/vault/tools/implementations/close_application.py +107 -0
  186. systemu/vault/tools/implementations/compress_files.py +48 -0
  187. systemu/vault/tools/implementations/create_excel_sheet.py +49 -0
  188. systemu/vault/tools/implementations/create_word_doc.py +160 -0
  189. systemu/vault/tools/implementations/detect_language_from_extension.py +72 -0
  190. systemu/vault/tools/implementations/download_file.py +46 -0
  191. systemu/vault/tools/implementations/extract_archive.py +60 -0
  192. systemu/vault/tools/implementations/fetch_docker_hub_metadata.py +63 -0
  193. systemu/vault/tools/implementations/fetch_github_org_data.py +58 -0
  194. systemu/vault/tools/implementations/fetch_github_pr_files.py +80 -0
  195. systemu/vault/tools/implementations/fetch_github_prs.py +51 -0
  196. systemu/vault/tools/implementations/fetch_html.py +32 -0
  197. systemu/vault/tools/implementations/fetch_json.py +34 -0
  198. systemu/vault/tools/implementations/fetch_nse_stock_data.py +153 -0
  199. systemu/vault/tools/implementations/fetch_reddit_posts.py +56 -0
  200. systemu/vault/tools/implementations/file_append.py +32 -0
  201. systemu/vault/tools/implementations/file_copy.py +44 -0
  202. systemu/vault/tools/implementations/file_delete.py +33 -0
  203. systemu/vault/tools/implementations/file_list_dir.py +45 -0
  204. systemu/vault/tools/implementations/file_read.py +36 -0
  205. systemu/vault/tools/implementations/file_scan_directory.py +54 -0
  206. systemu/vault/tools/implementations/file_write.py +36 -0
  207. systemu/vault/tools/implementations/format_date.py +28 -0
  208. systemu/vault/tools/implementations/generate_pr_review_markdown.py +106 -0
  209. systemu/vault/tools/implementations/github_get_commit.py +49 -0
  210. systemu/vault/tools/implementations/github_get_workflow_run.py +39 -0
  211. systemu/vault/tools/implementations/github_list_workflow_runs.py +61 -0
  212. systemu/vault/tools/implementations/image_resize.py +69 -0
  213. systemu/vault/tools/implementations/keyboard_shortcut.py +96 -0
  214. systemu/vault/tools/implementations/launch_application.py +103 -0
  215. systemu/vault/tools/implementations/mouse_click.py +118 -0
  216. systemu/vault/tools/implementations/mouse_drag.py +70 -0
  217. systemu/vault/tools/implementations/notify_desktop.py +32 -0
  218. systemu/vault/tools/implementations/parse_diff_statistics.py +81 -0
  219. systemu/vault/tools/implementations/parse_json.py +49 -0
  220. systemu/vault/tools/implementations/read_excel_sheet.py +64 -0
  221. systemu/vault/tools/implementations/read_word_doc.py +35 -0
  222. systemu/vault/tools/implementations/run_cli_command.py +49 -0
  223. systemu/vault/tools/implementations/run_command.py +54 -0
  224. systemu/vault/tools/implementations/search_emails.py +67 -0
  225. systemu/vault/tools/implementations/send_email.py +78 -0
  226. systemu/vault/tools/implementations/take_screenshot.py +53 -0
  227. systemu/vault/tools/implementations/type_text.py +44 -0
  228. systemu/vault/tools/implementations/web_extract_table.py +120 -0
  229. systemu/vault/tools/implementations/web_extract_text.py +61 -0
  230. systemu/vault/tools/implementations/web_screenshot.py +77 -0
  231. systemu/vault/tools/implementations/web_search.py +128 -0
  232. systemu/vault/tools/implementations/write_csv_file.py +45 -0
  233. systemu/vault/tools/implementations/write_markdown_file.py +41 -0
  234. systemu/vault/tools/implementations/write_text_file.py +37 -0
  235. systemu/vault/vault.py +1095 -0
  236. systemu/worker.py +223 -0
  237. systemu-0.7.0.dist-info/METADATA +749 -0
  238. systemu-0.7.0.dist-info/RECORD +242 -0
  239. systemu-0.7.0.dist-info/WHEEL +5 -0
  240. systemu-0.7.0.dist-info/entry_points.txt +2 -0
  241. systemu-0.7.0.dist-info/licenses/LICENSE +21 -0
  242. systemu-0.7.0.dist-info/top_level.txt +2 -0
sharing_on/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """sharing_on — Record computer activity, generate step-by-step instructions."""
2
+
3
+ __version__ = "0.7.0"
sharing_on/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Allow running as `python -m sharing_on`."""
2
+
3
+ from sharing_on.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -0,0 +1 @@
1
+ """Analyzer subpackage — step detection and instruction generation."""
@@ -0,0 +1,332 @@
1
+ """LLM-powered instruction generator — converts detected steps into
2
+ human-readable, step-by-step instructions using OpenRouter API.
3
+
4
+ Uses the OpenAI-compatible client library with OpenRouter's base URL.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ from typing import List, Optional
12
+
13
+ from openai import OpenAI
14
+
15
+ from sharing_on.analyzer.step_detector import Step
16
+ from sharing_on.events.models import EventAction, EventCategory
17
+ from sharing_on.redactor import redact
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ SYSTEM_PROMPT = """You are an expert Standard Operating Procedure (SOP) writer.
23
+ Your job is to analyze captured computer activity and produce a clear, narrative
24
+ document that another person can follow to reproduce the exact same task.
25
+
26
+ ## Your Approach
27
+
28
+ 1. **Infer the Overall Intent**: Before writing any steps, analyze ALL the captured
29
+ events holistically. Determine WHAT the user was trying to accomplish at a high
30
+ level (e.g., "Create a daily financial market summary report" or "Deploy a
31
+ microservice to production"). State this intent clearly at the top of the document
32
+ as a one-paragraph executive summary.
33
+
34
+ 2. **Identify Subtasks**: Break the full activity into logical subtasks or phases
35
+ (e.g., "Phase 1: Research — Gather market data", "Phase 2: Documentation —
36
+ Compile findings into a report"). Each subtask should have a clear heading and
37
+ a brief description of its purpose before the numbered steps.
38
+
39
+ 3. **Write Narrative Steps**: Each step should read like a clear instruction from
40
+ a knowledgeable colleague, not a robotic log. Use natural language.
41
+ - BAD: "Clicked 'Save' (ButtonControl) in Save As dialog"
42
+ - GOOD: "Save the file by clicking the **Save** button in the Save As dialog."
43
+
44
+ ## Rules
45
+
46
+ 1. Write each step as a clear, action-oriented instruction
47
+ 2. Include exact commands, URLs, and file paths when detected
48
+ 3. Mention which application was used at the start of each step (e.g., "**In Google Chrome**, ...")
49
+ 4. Be specific but concise — assume the reader is technically competent but unfamiliar with this task
50
+ 5. If clipboard content was pasted, mention what was pasted and where
51
+ 6. Format file changes as markdown diff blocks
52
+ 7. Do NOT invent steps that weren't captured — only document what actually happened
53
+ 8. Group closely related sub-actions (like repeated formatting clicks) into a single step and describe the intent
54
+ 9. When a UI element was clicked, describe it by its visible label or name, NOT by coordinates
55
+ 10. If a URL was navigated to, include the full URL
56
+ 11. When a repeated action is noted (e.g., "clicked 15 times"), describe the intent
57
+ (e.g., "Reduced the font size to approximately 10pt by clicking the decrease button repeatedly")
58
+ 12. If an input field was changed, mention which field and what value was entered
59
+ 13. If the user switched between apps to copy/reference information, describe the workflow
60
+ (e.g., "Switch to the browser tab showing NSE India to note the closing Nifty value, then return to the Google Doc to enter it")
61
+
62
+ ## Output Format
63
+
64
+ ```markdown
65
+ # [Task Title — inferred from activity]
66
+
67
+ ## Overview
68
+ [1-2 paragraph executive summary of what was accomplished and why]
69
+
70
+ ## Subtask 1: [Phase Name]
71
+ [Brief description of this phase's purpose]
72
+
73
+ 1. **[App Name]** — [Clear narrative instruction]
74
+ 2. ...
75
+
76
+ ## Subtask 2: [Phase Name]
77
+ ...
78
+
79
+ ## Result
80
+ [Brief description of the final outcome — e.g., what file was created, what was deployed, etc.]
81
+ ```
82
+
83
+ Return ONLY the Markdown document. Do not add any preamble or explanation outside the document."""
84
+
85
+
86
+ def generate_instructions(
87
+ steps: List[Step],
88
+ session_name: str,
89
+ platform_info: str,
90
+ duration_seconds: float,
91
+ api_key: str,
92
+ base_url: str = "https://openrouter.ai/api/v1",
93
+ model: str = "openai/gpt-4o-mini",
94
+ intent: Optional["IntentExtraction"] = None, # noqa: F821 — fwd ref
95
+ ) -> str:
96
+ """Send captured steps to the LLM and get back formatted instructions.
97
+
98
+ Args:
99
+ steps: List of detected steps with their events.
100
+ session_name: Name of the capture session.
101
+ platform_info: Platform description string.
102
+ duration_seconds: Total session duration.
103
+ api_key: OpenRouter API key.
104
+ base_url: OpenRouter API base URL.
105
+ model: LLM model identifier.
106
+ intent: (v0.6.0-a) optional pre-extracted intent. When present, the
107
+ LLM is told the user's actual outcome up-front and instructed
108
+ to anchor the narrative on that intent rather than re-inferring
109
+ it from the click sequence.
110
+
111
+ Returns:
112
+ Markdown-formatted step-by-step instructions.
113
+ """
114
+ if not steps:
115
+ return "_No activity was captured during this session._"
116
+
117
+ # Build the structured step data for the LLM
118
+ step_descriptions = []
119
+ for step in steps:
120
+ step_desc = _format_step_for_llm(step)
121
+ step_descriptions.append(step_desc)
122
+
123
+ # when intent is pre-extracted, surface it explicitly so the
124
+ # narrative LLM doesn't have to re-derive it from clicks (which is the
125
+ # whole reason the click-mirroring failure mode exists).
126
+ intent_block = ""
127
+ if intent is not None and getattr(intent, "is_usable", False):
128
+ intent_block = (
129
+ "## Pre-Inferred User Intent\n\n"
130
+ f"- **Intent:** {intent.intent}\n"
131
+ f"- **Expected outcome:** {intent.expected_outcome}\n"
132
+ f"- **Success signal:** {intent.success_signal}\n\n"
133
+ "Anchor your narrative on this stated intent. The captured steps "
134
+ "below describe HOW the user happened to do it; your job is to "
135
+ "narrate them in a way that serves the stated intent, not to "
136
+ "re-derive intent from the click sequence.\n\n"
137
+ "---\n\n"
138
+ )
139
+
140
+ user_prompt = f"""Task Name: {session_name}
141
+ Platform: {platform_info}
142
+ Total Duration: {duration_seconds:.0f} seconds
143
+ Number of Steps Detected: {len(steps)}
144
+
145
+ {intent_block}Below are the captured steps with their raw events. Convert these into clear,
146
+ reproducible instructions.
147
+
148
+ ---
149
+
150
+ {chr(10).join(step_descriptions)}
151
+ """
152
+
153
+ # Redact PII before sending to LLM
154
+ user_prompt = redact(user_prompt)
155
+
156
+ try:
157
+ client = OpenAI(
158
+ api_key=api_key,
159
+ base_url=base_url,
160
+ )
161
+
162
+ response = client.chat.completions.create(
163
+ model=model,
164
+ messages=[
165
+ {"role": "system", "content": SYSTEM_PROMPT},
166
+ {"role": "user", "content": user_prompt},
167
+ ],
168
+ temperature=0.3, # Low creativity, high accuracy
169
+ max_tokens=4000,
170
+ top_p=0.9,
171
+ )
172
+
173
+ instructions = response.choices[0].message.content or ""
174
+ logger.info(
175
+ f"Generated instructions: {len(instructions)} chars, "
176
+ f"tokens used: {response.usage.total_tokens if response.usage else 'unknown'}"
177
+ )
178
+ return instructions.strip()
179
+
180
+ except Exception as e:
181
+ logger.error(f"LLM instruction generation failed: {e}")
182
+ # Fallback: generate basic instructions without LLM
183
+ return _generate_fallback_instructions(steps, session_name)
184
+
185
+
186
+ def _format_step_for_llm(step: Step) -> str:
187
+ """Format a single step's events into a readable description for the LLM."""
188
+ lines = []
189
+ lines.append(f"### Step {step.step_number}")
190
+
191
+ if step.label:
192
+ lines.append(f"**User Label:** {step.label}")
193
+
194
+ if step.primary_app:
195
+ lines.append(f"**Primary Application:** {step.primary_app}")
196
+
197
+ if step.start_time:
198
+ lines.append(f"**Time:** {step.start_time.strftime('%H:%M:%S')}")
199
+ lines.append(f"**Duration:** {step.duration_seconds:.1f}s")
200
+
201
+ lines.append("")
202
+ lines.append("**Events:**")
203
+
204
+ # Emit relevant events (skip screenshots — they're referenced separately)
205
+ for event in step.events:
206
+ if event.category == EventCategory.SCREEN:
207
+ continue
208
+
209
+ if event.action == EventAction.WINDOW_FOCUS:
210
+ app = event.application or "Unknown"
211
+ title = event.window_title or ""
212
+ lines.append(f"- Switched to **{app}**: {title}")
213
+
214
+ elif event.action == EventAction.FILE_CREATED:
215
+ lines.append(f"- Created file: `{event.file_path}`")
216
+
217
+ elif event.action == EventAction.FILE_MODIFIED:
218
+ lines.append(f"- Modified file: `{event.file_path}`")
219
+ diff = event.data.get("diff")
220
+ if diff:
221
+ # Truncate very long diffs for the LLM
222
+ if len(diff) > 2000:
223
+ diff = diff[:2000] + "\n... (truncated)"
224
+ lines.append(f" ```diff\n{diff}\n ```")
225
+
226
+ elif event.action == EventAction.FILE_DELETED:
227
+ lines.append(f"- Deleted file: `{event.file_path}`")
228
+
229
+ elif event.action == EventAction.FILE_MOVED:
230
+ dest = event.data.get("dest_path", "unknown")
231
+ lines.append(f"- Moved file: `{event.file_path}` → `{dest}`")
232
+
233
+ elif event.action == EventAction.PROCESS_STARTED:
234
+ cmdline = event.data.get("cmdline", event.process_name or "")
235
+ lines.append(f"- Ran command: `{cmdline}`")
236
+
237
+ elif event.action == EventAction.PROCESS_ENDED:
238
+ lines.append(f"- Process ended: {event.process_name}")
239
+
240
+ elif event.action == EventAction.CLIPBOARD_CHANGE:
241
+ preview = event.data.get("preview", "")
242
+ content_type = event.data.get("content_type", "text")
243
+ if content_type == "command":
244
+ lines.append(f"- Copied command: `{preview}`")
245
+ elif content_type == "code":
246
+ lines.append(f"- Copied code snippet: `{preview[:100]}`")
247
+ elif content_type == "url":
248
+ lines.append(f"- Copied URL: `{preview}`")
249
+ else:
250
+ lines.append(f"- Copied to clipboard: {preview[:100]}")
251
+
252
+ elif event.action == EventAction.STEP_MARKER:
253
+ label = event.data.get("label", "")
254
+ key_name = event.data.get("key", "")
255
+ if key_name:
256
+ lines.append(f"- Pressed key: **{key_name}**")
257
+ elif label:
258
+ lines.append(f"- User note: {label}")
259
+
260
+ elif event.action == EventAction.MOUSE_CLICK:
261
+ app = event.application or "Unknown"
262
+ el_name = event.data.get("element_name", "")
263
+ ctrl_type = event.data.get("control_type", "")
264
+ xpath = event.data.get("element_xpath", "")
265
+ url = event.data.get("url", "")
266
+ el_text = event.data.get("element_text", "")
267
+ value = event.data.get("value", "")
268
+ repeat = event.data.get("repeat_count", 1)
269
+
270
+ # Build a clear, semantic description
271
+ desc_parts = []
272
+ if el_name and el_name != "Unknown":
273
+ desc_parts.append(f"**{el_name}**")
274
+ elif el_text:
275
+ desc_parts.append(f"**{el_text}**")
276
+
277
+ if ctrl_type and ctrl_type != "Unknown":
278
+ desc_parts.append(f"({ctrl_type})")
279
+
280
+ if url:
281
+ desc_parts.append(f"on page `{url}`")
282
+
283
+ if value:
284
+ desc_parts.append(f"[value: `{value}`]")
285
+
286
+ desc = " ".join(desc_parts) if desc_parts else "an element"
287
+
288
+ if repeat and repeat > 1:
289
+ lines.append(f"- Clicked {desc} **{repeat} times** in **{app}**")
290
+ else:
291
+ lines.append(f"- Clicked {desc} in **{app}**")
292
+
293
+ elif event.action == EventAction.KEY_PRESS:
294
+ el_text = event.data.get("element_text", "")
295
+ value = event.data.get("value", "")
296
+ url = event.data.get("url", "")
297
+ if value:
298
+ lines.append(f"- Typed `{value}` into a field")
299
+ if url:
300
+ lines.append(f" on page `{url}`")
301
+ elif el_text:
302
+ lines.append(f"- Interacted with input: {el_text}")
303
+
304
+ lines.append("")
305
+ return "\n".join(lines)
306
+
307
+
308
+ def _generate_fallback_instructions(steps: List[Step], session_name: str) -> str:
309
+ """Generate basic instructions without LLM (fallback if API fails)."""
310
+ lines = [
311
+ f"# {session_name}",
312
+ "",
313
+ "_Note: LLM generation failed. Showing raw captured steps._",
314
+ "",
315
+ ]
316
+
317
+ for step in steps:
318
+ lines.append(f"## Step {step.step_number}")
319
+ if step.label:
320
+ lines.append(f"_{step.label}_")
321
+ if step.primary_app:
322
+ lines.append(f"**Application:** {step.primary_app}")
323
+ lines.append("")
324
+
325
+ for event in step.events:
326
+ if event.category == EventCategory.SCREEN:
327
+ continue
328
+ lines.append(f"- {event.summary}")
329
+
330
+ lines.append("")
331
+
332
+ return "\n".join(lines)
@@ -0,0 +1,292 @@
1
+ """Intent extractor (v0.6.0-a, Stage 1).
2
+
3
+ Pre-pass that runs BEFORE the narrative generator (`generator.py`). Reads
4
+ the raw events + detected steps and emits a structured ``intent.json``
5
+ artifact at the session root.
6
+
7
+ The point: today the downstream pipeline (Scroll → Activity → Tools →
8
+ Shadow) infers the user's intent from a click-by-click narrative. When
9
+ the user's captured workflow uses one app to achieve an outcome that
10
+ could be achieved better another way (e.g., Snipping Tool + Word to
11
+ "document weather" — actually just wants weather data documented), the
12
+ downstream LLMs faithfully reproduce the means and miss the end.
13
+
14
+ This extractor decouples intent from means. Output schema::
15
+
16
+ {
17
+ "intent": "<outcome, one line, no app/GUI names>",
18
+ "expected_outcome": "<concrete success description>",
19
+ "success_signal": "<observable proof of completion>",
20
+ "abstracted_steps": ["<outcome-described step>", ...],
21
+ "confidence": "high" | "medium" | "low"
22
+ }
23
+
24
+ Best-effort throughout — when the extractor fails or returns
25
+ ``confidence == "low"``, callers fall back to today's narrative-only
26
+ behaviour (no operator card; this stage is read-only background work).
27
+ """
28
+ from __future__ import annotations
29
+
30
+ import json
31
+ import logging
32
+ from collections import Counter
33
+ from dataclasses import asdict, dataclass, field
34
+ from pathlib import Path
35
+ from typing import Any, Dict, List, Optional
36
+
37
+ from openai import OpenAI
38
+
39
+ from sharing_on.analyzer.step_detector import Step
40
+ from sharing_on.events.models import EventAction, EventCategory
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ # ─────────────────────────────────────────────────────────────────────────────
46
+
47
+ @dataclass
48
+ class IntentExtraction:
49
+ """Structured intent inferred from a capture session."""
50
+
51
+ intent: str = ""
52
+ expected_outcome: str = ""
53
+ success_signal: str = ""
54
+ abstracted_steps: List[str] = field(default_factory=list)
55
+ confidence: str = "low" # high | medium | low
56
+ error: Optional[str] = None
57
+
58
+ @property
59
+ def is_usable(self) -> bool:
60
+ """True when downstream pipeline should prefer this over narrative."""
61
+ return self.confidence in ("high", "medium") and bool(self.intent.strip())
62
+
63
+
64
+ # ─────────────────────────────────────────────────────────────────────────────
65
+ # Public API
66
+
67
+ def extract_intent(
68
+ *,
69
+ steps: List[Step],
70
+ events: List[Any],
71
+ session_name: str,
72
+ platform_info: str,
73
+ api_key: str,
74
+ base_url: str = "https://openrouter.ai/api/v1",
75
+ model: str = "google/gemini-2.0-flash-exp:free",
76
+ ) -> IntentExtraction:
77
+ """Run the intent-extraction LLM pass.
78
+
79
+ Returns a structured ``IntentExtraction``. Never raises — failures
80
+ are returned as ``confidence="low"`` with ``error`` populated so the
81
+ caller can fall back gracefully.
82
+ """
83
+ if not steps and not events:
84
+ return IntentExtraction(
85
+ confidence="low",
86
+ error="no steps or events to analyse",
87
+ )
88
+
89
+ summary = _summarise_events(events, steps)
90
+ abstracted = _abstract_step_titles(steps)
91
+
92
+ payload = {
93
+ "session_name": session_name,
94
+ "platform": platform_info,
95
+ "event_summary": summary,
96
+ "abstracted_step_descriptions": abstracted,
97
+ }
98
+
99
+ try:
100
+ prompt = _load_prompt()
101
+ except Exception as exc:
102
+ logger.warning("[IntentExtractor] could not load prompt: %s", exc)
103
+ return IntentExtraction(confidence="low", error=str(exc))
104
+
105
+ try:
106
+ client = OpenAI(api_key=api_key, base_url=base_url)
107
+ response = client.chat.completions.create(
108
+ model=model,
109
+ messages=[
110
+ {"role": "system", "content": prompt},
111
+ {"role": "user", "content": json.dumps(payload, default=str)},
112
+ ],
113
+ temperature=0.1,
114
+ max_tokens=1024,
115
+ response_format={"type": "json_object"},
116
+ )
117
+ raw = response.choices[0].message.content or "{}"
118
+ except Exception as exc:
119
+ logger.warning("[IntentExtractor] LLM call failed: %s", exc)
120
+ return IntentExtraction(confidence="low", error=str(exc))
121
+
122
+ return _parse_response(raw)
123
+
124
+
125
+ def write_intent_json(intent: IntentExtraction, session_dir: Path) -> Path:
126
+ """Persist the extraction to ``<session_dir>/intent.json``."""
127
+ target = Path(session_dir) / "intent.json"
128
+ try:
129
+ data = asdict(intent)
130
+ # Drop the transient error field on usable extractions — operators
131
+ # looking at the file want signal, not noise. Failed/low-confidence
132
+ # extractions keep the error so the failure mode is visible.
133
+ if intent.is_usable:
134
+ data.pop("error", None)
135
+ target.write_text(json.dumps(data, indent=2), encoding="utf-8")
136
+ logger.info(
137
+ "[IntentExtractor] wrote %s (confidence=%s)",
138
+ target, intent.confidence,
139
+ )
140
+ except Exception:
141
+ logger.exception("[IntentExtractor] failed to write intent.json")
142
+ return target
143
+
144
+
145
+ def read_intent_json(session_dir: Path) -> Optional[IntentExtraction]:
146
+ """Load a previously-written intent.json; returns None if absent or
147
+ unparseable."""
148
+ target = Path(session_dir) / "intent.json"
149
+ if not target.exists():
150
+ return None
151
+ try:
152
+ data = json.loads(target.read_text(encoding="utf-8"))
153
+ if not isinstance(data, dict):
154
+ return None
155
+ return IntentExtraction(
156
+ intent = str(data.get("intent", ""))[:500],
157
+ expected_outcome = str(data.get("expected_outcome", ""))[:500],
158
+ success_signal = str(data.get("success_signal", ""))[:500],
159
+ abstracted_steps = [str(s)[:200] for s in (data.get("abstracted_steps") or [])][:12],
160
+ confidence = str(data.get("confidence", "low")),
161
+ error = data.get("error"),
162
+ )
163
+ except Exception:
164
+ logger.debug("[IntentExtractor] read failed", exc_info=True)
165
+ return None
166
+
167
+
168
+ # ─────────────────────────────────────────────────────────────────────────────
169
+ # Internals
170
+
171
+ def _load_prompt() -> str:
172
+ """Read extract_intent.md from the colocated prompts/ folder."""
173
+ here = Path(__file__).resolve().parent
174
+ return (here / "prompts" / "extract_intent.md").read_text(encoding="utf-8")
175
+
176
+
177
+ def _summarise_events(events: List[Any], steps: List[Step]) -> Dict[str, Any]:
178
+ """Compact event statistics for the LLM payload — keeps token cost
179
+ bounded while preserving signal."""
180
+ apps: Counter = Counter()
181
+ files_created: List[str] = []
182
+ files_modified: List[str] = []
183
+ urls: List[str] = []
184
+ clipboard_count = 0
185
+
186
+ for ev in events or []:
187
+ app = getattr(ev, "application", None)
188
+ if app:
189
+ apps[app] += 1
190
+
191
+ action = getattr(ev, "action", None)
192
+ if action == EventAction.FILE_CREATED:
193
+ p = getattr(ev, "file_path", None)
194
+ if p:
195
+ files_created.append(str(p))
196
+ elif action == EventAction.FILE_MODIFIED:
197
+ p = getattr(ev, "file_path", None)
198
+ if p:
199
+ files_modified.append(str(p))
200
+
201
+ if getattr(ev, "category", None) == EventCategory.CLIPBOARD:
202
+ clipboard_count += 1
203
+
204
+ url = getattr(ev, "url", None) or (getattr(ev, "data", {}) or {}).get("url")
205
+ if url:
206
+ urls.append(str(url))
207
+
208
+ # Keep only the most-used apps and dedup file/URL lists; cap sizes.
209
+ top_apps = [a for a, _ in apps.most_common(8)]
210
+
211
+ return {
212
+ "applications_used": top_apps,
213
+ "files_created": _dedup_cap(files_created, cap=10),
214
+ "files_modified": _dedup_cap(files_modified, cap=10),
215
+ "urls_visited": _dedup_cap(urls, cap=10),
216
+ "clipboard_actions": clipboard_count,
217
+ "step_count": len(steps or []),
218
+ "total_events": len(events or []),
219
+ }
220
+
221
+
222
+ def _abstract_step_titles(steps: List[Step]) -> List[str]:
223
+ """Produce one short line per detected step. We deliberately keep app
224
+ hints out where possible — the prompt will further abstract them."""
225
+ out: List[str] = []
226
+ for s in (steps or [])[:20]:
227
+ # Prefer user-provided label, else fall back to primary_app + brief
228
+ # event-type breakdown. Capped per step.
229
+ if getattr(s, "label", None):
230
+ out.append(f"Step {s.step_number}: {str(s.label)[:140]}")
231
+ continue
232
+
233
+ counts = getattr(s, "event_summary", None) or {}
234
+ parts: List[str] = []
235
+ if counts.get("file"):
236
+ parts.append(f"{counts['file']} file ops")
237
+ if counts.get("clipboard"):
238
+ parts.append(f"{counts['clipboard']} clipboard ops")
239
+ if counts.get("process"):
240
+ parts.append(f"{counts['process']} process ops")
241
+ if counts.get("window"):
242
+ parts.append(f"{counts['window']} window switches")
243
+
244
+ out.append(
245
+ f"Step {s.step_number}: "
246
+ + (", ".join(parts) if parts else "activity observed")
247
+ )
248
+
249
+ return out
250
+
251
+
252
+ def _parse_response(raw: str) -> IntentExtraction:
253
+ """Tolerant JSON parser — strips code fences if the model added them."""
254
+ text = (raw or "").strip()
255
+ if text.startswith("```"):
256
+ # Strip code fence
257
+ lines = text.split("\n")
258
+ # Drop opening fence
259
+ if lines and lines[0].startswith("```"):
260
+ lines = lines[1:]
261
+ if lines and lines[-1].startswith("```"):
262
+ lines = lines[:-1]
263
+ text = "\n".join(lines).strip()
264
+
265
+ try:
266
+ data = json.loads(text)
267
+ except json.JSONDecodeError as exc:
268
+ logger.warning("[IntentExtractor] could not parse model output: %s", exc)
269
+ return IntentExtraction(confidence="low", error=f"parse_error: {exc}")
270
+
271
+ if not isinstance(data, dict):
272
+ return IntentExtraction(confidence="low", error="response was not a JSON object")
273
+
274
+ return IntentExtraction(
275
+ intent = str(data.get("intent", ""))[:500],
276
+ expected_outcome = str(data.get("expected_outcome", ""))[:500],
277
+ success_signal = str(data.get("success_signal", ""))[:500],
278
+ abstracted_steps = [str(s)[:200] for s in (data.get("abstracted_steps") or [])][:12],
279
+ confidence = str(data.get("confidence", "low")).lower(),
280
+ )
281
+
282
+
283
+ def _dedup_cap(items: List[str], *, cap: int) -> List[str]:
284
+ seen, out = set(), []
285
+ for x in items:
286
+ if x in seen:
287
+ continue
288
+ seen.add(x)
289
+ out.append(x)
290
+ if len(out) >= cap:
291
+ break
292
+ return out