systemu 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sharing_on/__init__.py +3 -0
- sharing_on/__main__.py +6 -0
- sharing_on/analyzer/__init__.py +1 -0
- sharing_on/analyzer/generator.py +332 -0
- sharing_on/analyzer/intent_extractor.py +292 -0
- sharing_on/analyzer/step_detector.py +318 -0
- sharing_on/analyzer/unifier.py +268 -0
- sharing_on/cli.py +936 -0
- sharing_on/collectors/__init__.py +1 -0
- sharing_on/collectors/base.py +85 -0
- sharing_on/collectors/clipboard.py +204 -0
- sharing_on/collectors/filesystem.py +236 -0
- sharing_on/collectors/input_hook.py +178 -0
- sharing_on/collectors/introspectors/__init__.py +22 -0
- sharing_on/collectors/introspectors/base.py +80 -0
- sharing_on/collectors/introspectors/linux.py +34 -0
- sharing_on/collectors/introspectors/macos.py +34 -0
- sharing_on/collectors/introspectors/windows.py +114 -0
- sharing_on/collectors/process.py +123 -0
- sharing_on/collectors/screen.py +89 -0
- sharing_on/collectors/web_extension.py +135 -0
- sharing_on/collectors/window.py +205 -0
- sharing_on/config.py +276 -0
- sharing_on/events/__init__.py +1 -0
- sharing_on/events/models.py +136 -0
- sharing_on/events/store.py +187 -0
- sharing_on/output/__init__.py +1 -0
- sharing_on/output/markdown.py +321 -0
- sharing_on/platform_info.py +160 -0
- sharing_on/redactor.py +100 -0
- sharing_on/session.py +267 -0
- systemu/__init__.py +2 -0
- systemu/abstractions/__init__.py +22 -0
- systemu/abstractions/approval_gate.py +63 -0
- systemu/abstractions/event_broker.py +102 -0
- systemu/abstractions/task_queue.py +56 -0
- systemu/abstractions/vault.py +125 -0
- systemu/approval/__init__.py +6 -0
- systemu/approval/notification_gate.py +60 -0
- systemu/approval/sqlite_approval_gate.py +285 -0
- systemu/core/__init__.py +1 -0
- systemu/core/llm_router.py +501 -0
- systemu/core/memory_types.py +236 -0
- systemu/core/models.py +462 -0
- systemu/core/utils.py +45 -0
- systemu/elder/__init__.py +5 -0
- systemu/elder/memory.py +125 -0
- systemu/events/__init__.py +6 -0
- systemu/events/memory_event_broker.py +96 -0
- systemu/events/sqlite_event_broker.py +410 -0
- systemu/interface/__init__.py +1 -0
- systemu/interface/cli_commands.py +1300 -0
- systemu/interface/components/__init__.py +11 -0
- systemu/interface/components/learning_curves.py +79 -0
- systemu/interface/components/memory_status.py +130 -0
- systemu/interface/components/pending_deps.py +211 -0
- systemu/interface/components/pending_tools.py +81 -0
- systemu/interface/components/skills_snapshot.py +87 -0
- systemu/interface/components/workflow_pipeline.py +145 -0
- systemu/interface/dashboard.py +710 -0
- systemu/interface/dashboard_state.py +619 -0
- systemu/interface/event_bus.py +424 -0
- systemu/interface/jobs.py +235 -0
- systemu/interface/notifications.py +334 -0
- systemu/interface/pages/__init__.py +3 -0
- systemu/interface/pages/activities.py +406 -0
- systemu/interface/pages/army.py +405 -0
- systemu/interface/pages/chat_page.py +192 -0
- systemu/interface/pages/evolutions.py +182 -0
- systemu/interface/pages/flywheel_page.py +363 -0
- systemu/interface/pages/memory_consolidation_page.py +422 -0
- systemu/interface/pages/notifications_page.py +361 -0
- systemu/interface/pages/overview.py +330 -0
- systemu/interface/pages/recover.py +133 -0
- systemu/interface/pages/scrolls.py +288 -0
- systemu/interface/pages/settings.py +159 -0
- systemu/interface/pages/shadow_memory_page.py +164 -0
- systemu/interface/pages/skills_page.py +232 -0
- systemu/interface/pages/systemu_chat.py +703 -0
- systemu/interface/pages/tools.py +1085 -0
- systemu/interface/pages/workflow_detail.py +370 -0
- systemu/interface/pages/workshop.py +621 -0
- systemu/interface/ui_helpers.py +58 -0
- systemu/llm/__init__.py +1 -0
- systemu/llm/providers/__init__.py +53 -0
- systemu/llm/providers/anthropic.py +57 -0
- systemu/llm/providers/base.py +40 -0
- systemu/llm/providers/google.py +36 -0
- systemu/llm/providers/ollama.py +39 -0
- systemu/llm/providers/openai.py +35 -0
- systemu/llm/providers/openrouter.py +50 -0
- systemu/messaging/__init__.py +30 -0
- systemu/messaging/event_pusher.py +201 -0
- systemu/messaging/gateway.py +208 -0
- systemu/messaging/handlers.py +182 -0
- systemu/messaging/telegram_gateway.py +261 -0
- systemu/migrations/__init__.py +1 -0
- systemu/migrations/json_to_db.py +256 -0
- systemu/pipelines/__init__.py +1 -0
- systemu/pipelines/activity_extractor.py +627 -0
- systemu/pipelines/cross_shadow_patterns.py +215 -0
- systemu/pipelines/direct_task.py +282 -0
- systemu/pipelines/evolution_engine.py +466 -0
- systemu/pipelines/evolution_policy.py +132 -0
- systemu/pipelines/memory_consolidator.py +167 -0
- systemu/pipelines/refinery.py +468 -0
- systemu/pipelines/scroll_refiner.py +610 -0
- systemu/pipelines/scroll_remediator.py +294 -0
- systemu/pipelines/scroll_validator.py +278 -0
- systemu/pipelines/shadow_decision.py +642 -0
- systemu/pipelines/skill_exporter.py +75 -0
- systemu/pipelines/skill_recalibrator.py +341 -0
- systemu/pipelines/skill_validator.py +195 -0
- systemu/pipelines/tool_dry_run.py +460 -0
- systemu/pipelines/tool_forge.py +566 -0
- systemu/pipelines/tool_inadequacy_diagnosis.py +214 -0
- systemu/pipelines/tool_recalibrator.py +631 -0
- systemu/pipelines/tool_service.py +143 -0
- systemu/pipelines/workshop_module.py +136 -0
- systemu/queue/__init__.py +5 -0
- systemu/queue/huey_app.py +352 -0
- systemu/queue/huey_task_queue.py +184 -0
- systemu/queue/protocol.py +134 -0
- systemu/queue/redis_priority_queue.py +432 -0
- systemu/queue/sqlite_priority_queue.py +307 -0
- systemu/queue/thread_task_queue.py +57 -0
- systemu/recovery/__init__.py +1 -0
- systemu/recovery/classifier.py +27 -0
- systemu/recovery/engine.py +177 -0
- systemu/recovery/links.py +17 -0
- systemu/runtime/__init__.py +1 -0
- systemu/runtime/affinity_log.py +232 -0
- systemu/runtime/backend/__init__.py +76 -0
- systemu/runtime/backend/docker.py +115 -0
- systemu/runtime/backend/local.py +179 -0
- systemu/runtime/backend/protocol.py +109 -0
- systemu/runtime/backend/ssh.py +52 -0
- systemu/runtime/backend/wsl.py +50 -0
- systemu/runtime/context_builder.py +468 -0
- systemu/runtime/dep_approvals.py +403 -0
- systemu/runtime/dep_conflicts.py +193 -0
- systemu/runtime/dependency_installer.py +521 -0
- systemu/runtime/execution_mind.py +561 -0
- systemu/runtime/execution_snapshot.py +284 -0
- systemu/runtime/failure_classifier.py +316 -0
- systemu/runtime/failure_telemetry.py +274 -0
- systemu/runtime/inadequacy_tracker.py +202 -0
- systemu/runtime/interpreter_check.py +277 -0
- systemu/runtime/memory_backends/__init__.py +20 -0
- systemu/runtime/memory_backends/base.py +26 -0
- systemu/runtime/memory_backends/filesystem.py +51 -0
- systemu/runtime/memory_backends/mem0.py +66 -0
- systemu/runtime/memory_consolidator.py +183 -0
- systemu/runtime/memory_invalidator.py +213 -0
- systemu/runtime/memory_recall.py +84 -0
- systemu/runtime/metrics_tracker.py +195 -0
- systemu/runtime/rejection_store.py +273 -0
- systemu/runtime/shadow_metrics.py +263 -0
- systemu/runtime/shadow_runtime.py +2013 -0
- systemu/runtime/specialty_suggester.py +221 -0
- systemu/runtime/supervisor.py +1375 -0
- systemu/runtime/supervisor_cost_ledger.py +257 -0
- systemu/runtime/tool_metrics.py +285 -0
- systemu/runtime/tool_registry.py +522 -0
- systemu/runtime/tool_sandbox.py +242 -0
- systemu/runtime/workflow_tracker.py +410 -0
- systemu/scheduler/__init__.py +1 -0
- systemu/scheduler/daemon.py +487 -0
- systemu/scheduler/jobs.py +891 -0
- systemu/storage/__init__.py +6 -0
- systemu/storage/file_vault.py +196 -0
- systemu/storage/parallel_vault.py +304 -0
- systemu/storage/skill_migrator.py +122 -0
- systemu/storage/sqlite/__init__.py +5 -0
- systemu/storage/sqlite/models.py +360 -0
- systemu/storage/sqlite/vault.py +1534 -0
- systemu/vault/__init__.py +1 -0
- systemu/vault/factory.py +132 -0
- systemu/vault/tools/implementations/api_call_get.py +66 -0
- systemu/vault/tools/implementations/browser_navigate.py +69 -0
- systemu/vault/tools/implementations/calculate_rsi.py +86 -0
- systemu/vault/tools/implementations/calculate_sma.py +50 -0
- systemu/vault/tools/implementations/clipboard_read.py +20 -0
- systemu/vault/tools/implementations/clipboard_write.py +22 -0
- systemu/vault/tools/implementations/close_application.py +107 -0
- systemu/vault/tools/implementations/compress_files.py +48 -0
- systemu/vault/tools/implementations/create_excel_sheet.py +49 -0
- systemu/vault/tools/implementations/create_word_doc.py +160 -0
- systemu/vault/tools/implementations/detect_language_from_extension.py +72 -0
- systemu/vault/tools/implementations/download_file.py +46 -0
- systemu/vault/tools/implementations/extract_archive.py +60 -0
- systemu/vault/tools/implementations/fetch_docker_hub_metadata.py +63 -0
- systemu/vault/tools/implementations/fetch_github_org_data.py +58 -0
- systemu/vault/tools/implementations/fetch_github_pr_files.py +80 -0
- systemu/vault/tools/implementations/fetch_github_prs.py +51 -0
- systemu/vault/tools/implementations/fetch_html.py +32 -0
- systemu/vault/tools/implementations/fetch_json.py +34 -0
- systemu/vault/tools/implementations/fetch_nse_stock_data.py +153 -0
- systemu/vault/tools/implementations/fetch_reddit_posts.py +56 -0
- systemu/vault/tools/implementations/file_append.py +32 -0
- systemu/vault/tools/implementations/file_copy.py +44 -0
- systemu/vault/tools/implementations/file_delete.py +33 -0
- systemu/vault/tools/implementations/file_list_dir.py +45 -0
- systemu/vault/tools/implementations/file_read.py +36 -0
- systemu/vault/tools/implementations/file_scan_directory.py +54 -0
- systemu/vault/tools/implementations/file_write.py +36 -0
- systemu/vault/tools/implementations/format_date.py +28 -0
- systemu/vault/tools/implementations/generate_pr_review_markdown.py +106 -0
- systemu/vault/tools/implementations/github_get_commit.py +49 -0
- systemu/vault/tools/implementations/github_get_workflow_run.py +39 -0
- systemu/vault/tools/implementations/github_list_workflow_runs.py +61 -0
- systemu/vault/tools/implementations/image_resize.py +69 -0
- systemu/vault/tools/implementations/keyboard_shortcut.py +96 -0
- systemu/vault/tools/implementations/launch_application.py +103 -0
- systemu/vault/tools/implementations/mouse_click.py +118 -0
- systemu/vault/tools/implementations/mouse_drag.py +70 -0
- systemu/vault/tools/implementations/notify_desktop.py +32 -0
- systemu/vault/tools/implementations/parse_diff_statistics.py +81 -0
- systemu/vault/tools/implementations/parse_json.py +49 -0
- systemu/vault/tools/implementations/read_excel_sheet.py +64 -0
- systemu/vault/tools/implementations/read_word_doc.py +35 -0
- systemu/vault/tools/implementations/run_cli_command.py +49 -0
- systemu/vault/tools/implementations/run_command.py +54 -0
- systemu/vault/tools/implementations/search_emails.py +67 -0
- systemu/vault/tools/implementations/send_email.py +78 -0
- systemu/vault/tools/implementations/take_screenshot.py +53 -0
- systemu/vault/tools/implementations/type_text.py +44 -0
- systemu/vault/tools/implementations/web_extract_table.py +120 -0
- systemu/vault/tools/implementations/web_extract_text.py +61 -0
- systemu/vault/tools/implementations/web_screenshot.py +77 -0
- systemu/vault/tools/implementations/web_search.py +128 -0
- systemu/vault/tools/implementations/write_csv_file.py +45 -0
- systemu/vault/tools/implementations/write_markdown_file.py +41 -0
- systemu/vault/tools/implementations/write_text_file.py +37 -0
- systemu/vault/vault.py +1095 -0
- systemu/worker.py +223 -0
- systemu-0.7.0.dist-info/METADATA +749 -0
- systemu-0.7.0.dist-info/RECORD +242 -0
- systemu-0.7.0.dist-info/WHEEL +5 -0
- systemu-0.7.0.dist-info/entry_points.txt +2 -0
- systemu-0.7.0.dist-info/licenses/LICENSE +21 -0
- systemu-0.7.0.dist-info/top_level.txt +2 -0
sharing_on/__init__.py
ADDED
sharing_on/__main__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Analyzer subpackage — step detection and instruction generation."""
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""LLM-powered instruction generator — converts detected steps into
|
|
2
|
+
human-readable, step-by-step instructions using OpenRouter API.
|
|
3
|
+
|
|
4
|
+
Uses the OpenAI-compatible client library with OpenRouter's base URL.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
from typing import List, Optional
|
|
12
|
+
|
|
13
|
+
from openai import OpenAI
|
|
14
|
+
|
|
15
|
+
from sharing_on.analyzer.step_detector import Step
|
|
16
|
+
from sharing_on.events.models import EventAction, EventCategory
|
|
17
|
+
from sharing_on.redactor import redact
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
SYSTEM_PROMPT = """You are an expert Standard Operating Procedure (SOP) writer.
|
|
23
|
+
Your job is to analyze captured computer activity and produce a clear, narrative
|
|
24
|
+
document that another person can follow to reproduce the exact same task.
|
|
25
|
+
|
|
26
|
+
## Your Approach
|
|
27
|
+
|
|
28
|
+
1. **Infer the Overall Intent**: Before writing any steps, analyze ALL the captured
|
|
29
|
+
events holistically. Determine WHAT the user was trying to accomplish at a high
|
|
30
|
+
level (e.g., "Create a daily financial market summary report" or "Deploy a
|
|
31
|
+
microservice to production"). State this intent clearly at the top of the document
|
|
32
|
+
as a one-paragraph executive summary.
|
|
33
|
+
|
|
34
|
+
2. **Identify Subtasks**: Break the full activity into logical subtasks or phases
|
|
35
|
+
(e.g., "Phase 1: Research — Gather market data", "Phase 2: Documentation —
|
|
36
|
+
Compile findings into a report"). Each subtask should have a clear heading and
|
|
37
|
+
a brief description of its purpose before the numbered steps.
|
|
38
|
+
|
|
39
|
+
3. **Write Narrative Steps**: Each step should read like a clear instruction from
|
|
40
|
+
a knowledgeable colleague, not a robotic log. Use natural language.
|
|
41
|
+
- BAD: "Clicked 'Save' (ButtonControl) in Save As dialog"
|
|
42
|
+
- GOOD: "Save the file by clicking the **Save** button in the Save As dialog."
|
|
43
|
+
|
|
44
|
+
## Rules
|
|
45
|
+
|
|
46
|
+
1. Write each step as a clear, action-oriented instruction
|
|
47
|
+
2. Include exact commands, URLs, and file paths when detected
|
|
48
|
+
3. Mention which application was used at the start of each step (e.g., "**In Google Chrome**, ...")
|
|
49
|
+
4. Be specific but concise — assume the reader is technically competent but unfamiliar with this task
|
|
50
|
+
5. If clipboard content was pasted, mention what was pasted and where
|
|
51
|
+
6. Format file changes as markdown diff blocks
|
|
52
|
+
7. Do NOT invent steps that weren't captured — only document what actually happened
|
|
53
|
+
8. Group closely related sub-actions (like repeated formatting clicks) into a single step and describe the intent
|
|
54
|
+
9. When a UI element was clicked, describe it by its visible label or name, NOT by coordinates
|
|
55
|
+
10. If a URL was navigated to, include the full URL
|
|
56
|
+
11. When a repeated action is noted (e.g., "clicked 15 times"), describe the intent
|
|
57
|
+
(e.g., "Reduced the font size to approximately 10pt by clicking the decrease button repeatedly")
|
|
58
|
+
12. If an input field was changed, mention which field and what value was entered
|
|
59
|
+
13. If the user switched between apps to copy/reference information, describe the workflow
|
|
60
|
+
(e.g., "Switch to the browser tab showing NSE India to note the closing Nifty value, then return to the Google Doc to enter it")
|
|
61
|
+
|
|
62
|
+
## Output Format
|
|
63
|
+
|
|
64
|
+
```markdown
|
|
65
|
+
# [Task Title — inferred from activity]
|
|
66
|
+
|
|
67
|
+
## Overview
|
|
68
|
+
[1-2 paragraph executive summary of what was accomplished and why]
|
|
69
|
+
|
|
70
|
+
## Subtask 1: [Phase Name]
|
|
71
|
+
[Brief description of this phase's purpose]
|
|
72
|
+
|
|
73
|
+
1. **[App Name]** — [Clear narrative instruction]
|
|
74
|
+
2. ...
|
|
75
|
+
|
|
76
|
+
## Subtask 2: [Phase Name]
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
## Result
|
|
80
|
+
[Brief description of the final outcome — e.g., what file was created, what was deployed, etc.]
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Return ONLY the Markdown document. Do not add any preamble or explanation outside the document."""
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def generate_instructions(
|
|
87
|
+
steps: List[Step],
|
|
88
|
+
session_name: str,
|
|
89
|
+
platform_info: str,
|
|
90
|
+
duration_seconds: float,
|
|
91
|
+
api_key: str,
|
|
92
|
+
base_url: str = "https://openrouter.ai/api/v1",
|
|
93
|
+
model: str = "openai/gpt-4o-mini",
|
|
94
|
+
intent: Optional["IntentExtraction"] = None, # noqa: F821 — fwd ref
|
|
95
|
+
) -> str:
|
|
96
|
+
"""Send captured steps to the LLM and get back formatted instructions.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
steps: List of detected steps with their events.
|
|
100
|
+
session_name: Name of the capture session.
|
|
101
|
+
platform_info: Platform description string.
|
|
102
|
+
duration_seconds: Total session duration.
|
|
103
|
+
api_key: OpenRouter API key.
|
|
104
|
+
base_url: OpenRouter API base URL.
|
|
105
|
+
model: LLM model identifier.
|
|
106
|
+
intent: (v0.6.0-a) optional pre-extracted intent. When present, the
|
|
107
|
+
LLM is told the user's actual outcome up-front and instructed
|
|
108
|
+
to anchor the narrative on that intent rather than re-inferring
|
|
109
|
+
it from the click sequence.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Markdown-formatted step-by-step instructions.
|
|
113
|
+
"""
|
|
114
|
+
if not steps:
|
|
115
|
+
return "_No activity was captured during this session._"
|
|
116
|
+
|
|
117
|
+
# Build the structured step data for the LLM
|
|
118
|
+
step_descriptions = []
|
|
119
|
+
for step in steps:
|
|
120
|
+
step_desc = _format_step_for_llm(step)
|
|
121
|
+
step_descriptions.append(step_desc)
|
|
122
|
+
|
|
123
|
+
# when intent is pre-extracted, surface it explicitly so the
|
|
124
|
+
# narrative LLM doesn't have to re-derive it from clicks (which is the
|
|
125
|
+
# whole reason the click-mirroring failure mode exists).
|
|
126
|
+
intent_block = ""
|
|
127
|
+
if intent is not None and getattr(intent, "is_usable", False):
|
|
128
|
+
intent_block = (
|
|
129
|
+
"## Pre-Inferred User Intent\n\n"
|
|
130
|
+
f"- **Intent:** {intent.intent}\n"
|
|
131
|
+
f"- **Expected outcome:** {intent.expected_outcome}\n"
|
|
132
|
+
f"- **Success signal:** {intent.success_signal}\n\n"
|
|
133
|
+
"Anchor your narrative on this stated intent. The captured steps "
|
|
134
|
+
"below describe HOW the user happened to do it; your job is to "
|
|
135
|
+
"narrate them in a way that serves the stated intent, not to "
|
|
136
|
+
"re-derive intent from the click sequence.\n\n"
|
|
137
|
+
"---\n\n"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
user_prompt = f"""Task Name: {session_name}
|
|
141
|
+
Platform: {platform_info}
|
|
142
|
+
Total Duration: {duration_seconds:.0f} seconds
|
|
143
|
+
Number of Steps Detected: {len(steps)}
|
|
144
|
+
|
|
145
|
+
{intent_block}Below are the captured steps with their raw events. Convert these into clear,
|
|
146
|
+
reproducible instructions.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
{chr(10).join(step_descriptions)}
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
# Redact PII before sending to LLM
|
|
154
|
+
user_prompt = redact(user_prompt)
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
client = OpenAI(
|
|
158
|
+
api_key=api_key,
|
|
159
|
+
base_url=base_url,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
response = client.chat.completions.create(
|
|
163
|
+
model=model,
|
|
164
|
+
messages=[
|
|
165
|
+
{"role": "system", "content": SYSTEM_PROMPT},
|
|
166
|
+
{"role": "user", "content": user_prompt},
|
|
167
|
+
],
|
|
168
|
+
temperature=0.3, # Low creativity, high accuracy
|
|
169
|
+
max_tokens=4000,
|
|
170
|
+
top_p=0.9,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
instructions = response.choices[0].message.content or ""
|
|
174
|
+
logger.info(
|
|
175
|
+
f"Generated instructions: {len(instructions)} chars, "
|
|
176
|
+
f"tokens used: {response.usage.total_tokens if response.usage else 'unknown'}"
|
|
177
|
+
)
|
|
178
|
+
return instructions.strip()
|
|
179
|
+
|
|
180
|
+
except Exception as e:
|
|
181
|
+
logger.error(f"LLM instruction generation failed: {e}")
|
|
182
|
+
# Fallback: generate basic instructions without LLM
|
|
183
|
+
return _generate_fallback_instructions(steps, session_name)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _format_step_for_llm(step: Step) -> str:
|
|
187
|
+
"""Format a single step's events into a readable description for the LLM."""
|
|
188
|
+
lines = []
|
|
189
|
+
lines.append(f"### Step {step.step_number}")
|
|
190
|
+
|
|
191
|
+
if step.label:
|
|
192
|
+
lines.append(f"**User Label:** {step.label}")
|
|
193
|
+
|
|
194
|
+
if step.primary_app:
|
|
195
|
+
lines.append(f"**Primary Application:** {step.primary_app}")
|
|
196
|
+
|
|
197
|
+
if step.start_time:
|
|
198
|
+
lines.append(f"**Time:** {step.start_time.strftime('%H:%M:%S')}")
|
|
199
|
+
lines.append(f"**Duration:** {step.duration_seconds:.1f}s")
|
|
200
|
+
|
|
201
|
+
lines.append("")
|
|
202
|
+
lines.append("**Events:**")
|
|
203
|
+
|
|
204
|
+
# Emit relevant events (skip screenshots — they're referenced separately)
|
|
205
|
+
for event in step.events:
|
|
206
|
+
if event.category == EventCategory.SCREEN:
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
if event.action == EventAction.WINDOW_FOCUS:
|
|
210
|
+
app = event.application or "Unknown"
|
|
211
|
+
title = event.window_title or ""
|
|
212
|
+
lines.append(f"- Switched to **{app}**: {title}")
|
|
213
|
+
|
|
214
|
+
elif event.action == EventAction.FILE_CREATED:
|
|
215
|
+
lines.append(f"- Created file: `{event.file_path}`")
|
|
216
|
+
|
|
217
|
+
elif event.action == EventAction.FILE_MODIFIED:
|
|
218
|
+
lines.append(f"- Modified file: `{event.file_path}`")
|
|
219
|
+
diff = event.data.get("diff")
|
|
220
|
+
if diff:
|
|
221
|
+
# Truncate very long diffs for the LLM
|
|
222
|
+
if len(diff) > 2000:
|
|
223
|
+
diff = diff[:2000] + "\n... (truncated)"
|
|
224
|
+
lines.append(f" ```diff\n{diff}\n ```")
|
|
225
|
+
|
|
226
|
+
elif event.action == EventAction.FILE_DELETED:
|
|
227
|
+
lines.append(f"- Deleted file: `{event.file_path}`")
|
|
228
|
+
|
|
229
|
+
elif event.action == EventAction.FILE_MOVED:
|
|
230
|
+
dest = event.data.get("dest_path", "unknown")
|
|
231
|
+
lines.append(f"- Moved file: `{event.file_path}` → `{dest}`")
|
|
232
|
+
|
|
233
|
+
elif event.action == EventAction.PROCESS_STARTED:
|
|
234
|
+
cmdline = event.data.get("cmdline", event.process_name or "")
|
|
235
|
+
lines.append(f"- Ran command: `{cmdline}`")
|
|
236
|
+
|
|
237
|
+
elif event.action == EventAction.PROCESS_ENDED:
|
|
238
|
+
lines.append(f"- Process ended: {event.process_name}")
|
|
239
|
+
|
|
240
|
+
elif event.action == EventAction.CLIPBOARD_CHANGE:
|
|
241
|
+
preview = event.data.get("preview", "")
|
|
242
|
+
content_type = event.data.get("content_type", "text")
|
|
243
|
+
if content_type == "command":
|
|
244
|
+
lines.append(f"- Copied command: `{preview}`")
|
|
245
|
+
elif content_type == "code":
|
|
246
|
+
lines.append(f"- Copied code snippet: `{preview[:100]}`")
|
|
247
|
+
elif content_type == "url":
|
|
248
|
+
lines.append(f"- Copied URL: `{preview}`")
|
|
249
|
+
else:
|
|
250
|
+
lines.append(f"- Copied to clipboard: {preview[:100]}")
|
|
251
|
+
|
|
252
|
+
elif event.action == EventAction.STEP_MARKER:
|
|
253
|
+
label = event.data.get("label", "")
|
|
254
|
+
key_name = event.data.get("key", "")
|
|
255
|
+
if key_name:
|
|
256
|
+
lines.append(f"- Pressed key: **{key_name}**")
|
|
257
|
+
elif label:
|
|
258
|
+
lines.append(f"- User note: {label}")
|
|
259
|
+
|
|
260
|
+
elif event.action == EventAction.MOUSE_CLICK:
|
|
261
|
+
app = event.application or "Unknown"
|
|
262
|
+
el_name = event.data.get("element_name", "")
|
|
263
|
+
ctrl_type = event.data.get("control_type", "")
|
|
264
|
+
xpath = event.data.get("element_xpath", "")
|
|
265
|
+
url = event.data.get("url", "")
|
|
266
|
+
el_text = event.data.get("element_text", "")
|
|
267
|
+
value = event.data.get("value", "")
|
|
268
|
+
repeat = event.data.get("repeat_count", 1)
|
|
269
|
+
|
|
270
|
+
# Build a clear, semantic description
|
|
271
|
+
desc_parts = []
|
|
272
|
+
if el_name and el_name != "Unknown":
|
|
273
|
+
desc_parts.append(f"**{el_name}**")
|
|
274
|
+
elif el_text:
|
|
275
|
+
desc_parts.append(f"**{el_text}**")
|
|
276
|
+
|
|
277
|
+
if ctrl_type and ctrl_type != "Unknown":
|
|
278
|
+
desc_parts.append(f"({ctrl_type})")
|
|
279
|
+
|
|
280
|
+
if url:
|
|
281
|
+
desc_parts.append(f"on page `{url}`")
|
|
282
|
+
|
|
283
|
+
if value:
|
|
284
|
+
desc_parts.append(f"[value: `{value}`]")
|
|
285
|
+
|
|
286
|
+
desc = " ".join(desc_parts) if desc_parts else "an element"
|
|
287
|
+
|
|
288
|
+
if repeat and repeat > 1:
|
|
289
|
+
lines.append(f"- Clicked {desc} **{repeat} times** in **{app}**")
|
|
290
|
+
else:
|
|
291
|
+
lines.append(f"- Clicked {desc} in **{app}**")
|
|
292
|
+
|
|
293
|
+
elif event.action == EventAction.KEY_PRESS:
|
|
294
|
+
el_text = event.data.get("element_text", "")
|
|
295
|
+
value = event.data.get("value", "")
|
|
296
|
+
url = event.data.get("url", "")
|
|
297
|
+
if value:
|
|
298
|
+
lines.append(f"- Typed `{value}` into a field")
|
|
299
|
+
if url:
|
|
300
|
+
lines.append(f" on page `{url}`")
|
|
301
|
+
elif el_text:
|
|
302
|
+
lines.append(f"- Interacted with input: {el_text}")
|
|
303
|
+
|
|
304
|
+
lines.append("")
|
|
305
|
+
return "\n".join(lines)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _generate_fallback_instructions(steps: List[Step], session_name: str) -> str:
|
|
309
|
+
"""Generate basic instructions without LLM (fallback if API fails)."""
|
|
310
|
+
lines = [
|
|
311
|
+
f"# {session_name}",
|
|
312
|
+
"",
|
|
313
|
+
"_Note: LLM generation failed. Showing raw captured steps._",
|
|
314
|
+
"",
|
|
315
|
+
]
|
|
316
|
+
|
|
317
|
+
for step in steps:
|
|
318
|
+
lines.append(f"## Step {step.step_number}")
|
|
319
|
+
if step.label:
|
|
320
|
+
lines.append(f"_{step.label}_")
|
|
321
|
+
if step.primary_app:
|
|
322
|
+
lines.append(f"**Application:** {step.primary_app}")
|
|
323
|
+
lines.append("")
|
|
324
|
+
|
|
325
|
+
for event in step.events:
|
|
326
|
+
if event.category == EventCategory.SCREEN:
|
|
327
|
+
continue
|
|
328
|
+
lines.append(f"- {event.summary}")
|
|
329
|
+
|
|
330
|
+
lines.append("")
|
|
331
|
+
|
|
332
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"""Intent extractor (v0.6.0-a, Stage 1).
|
|
2
|
+
|
|
3
|
+
Pre-pass that runs BEFORE the narrative generator (`generator.py`). Reads
|
|
4
|
+
the raw events + detected steps and emits a structured ``intent.json``
|
|
5
|
+
artifact at the session root.
|
|
6
|
+
|
|
7
|
+
The point: today the downstream pipeline (Scroll → Activity → Tools →
|
|
8
|
+
Shadow) infers the user's intent from a click-by-click narrative. When
|
|
9
|
+
the user's captured workflow uses one app to achieve an outcome that
|
|
10
|
+
could be achieved better another way (e.g., Snipping Tool + Word to
|
|
11
|
+
"document weather" — actually just wants weather data documented), the
|
|
12
|
+
downstream LLMs faithfully reproduce the means and miss the end.
|
|
13
|
+
|
|
14
|
+
This extractor decouples intent from means. Output schema::
|
|
15
|
+
|
|
16
|
+
{
|
|
17
|
+
"intent": "<outcome, one line, no app/GUI names>",
|
|
18
|
+
"expected_outcome": "<concrete success description>",
|
|
19
|
+
"success_signal": "<observable proof of completion>",
|
|
20
|
+
"abstracted_steps": ["<outcome-described step>", ...],
|
|
21
|
+
"confidence": "high" | "medium" | "low"
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
Best-effort throughout — when the extractor fails or returns
|
|
25
|
+
``confidence == "low"``, callers fall back to today's narrative-only
|
|
26
|
+
behaviour (no operator card; this stage is read-only background work).
|
|
27
|
+
"""
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import json
|
|
31
|
+
import logging
|
|
32
|
+
from collections import Counter
|
|
33
|
+
from dataclasses import asdict, dataclass, field
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from typing import Any, Dict, List, Optional
|
|
36
|
+
|
|
37
|
+
from openai import OpenAI
|
|
38
|
+
|
|
39
|
+
from sharing_on.analyzer.step_detector import Step
|
|
40
|
+
from sharing_on.events.models import EventAction, EventCategory
|
|
41
|
+
|
|
42
|
+
logger = logging.getLogger(__name__)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class IntentExtraction:
|
|
49
|
+
"""Structured intent inferred from a capture session."""
|
|
50
|
+
|
|
51
|
+
intent: str = ""
|
|
52
|
+
expected_outcome: str = ""
|
|
53
|
+
success_signal: str = ""
|
|
54
|
+
abstracted_steps: List[str] = field(default_factory=list)
|
|
55
|
+
confidence: str = "low" # high | medium | low
|
|
56
|
+
error: Optional[str] = None
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def is_usable(self) -> bool:
|
|
60
|
+
"""True when downstream pipeline should prefer this over narrative."""
|
|
61
|
+
return self.confidence in ("high", "medium") and bool(self.intent.strip())
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
65
|
+
# Public API
|
|
66
|
+
|
|
67
|
+
def extract_intent(
|
|
68
|
+
*,
|
|
69
|
+
steps: List[Step],
|
|
70
|
+
events: List[Any],
|
|
71
|
+
session_name: str,
|
|
72
|
+
platform_info: str,
|
|
73
|
+
api_key: str,
|
|
74
|
+
base_url: str = "https://openrouter.ai/api/v1",
|
|
75
|
+
model: str = "google/gemini-2.0-flash-exp:free",
|
|
76
|
+
) -> IntentExtraction:
|
|
77
|
+
"""Run the intent-extraction LLM pass.
|
|
78
|
+
|
|
79
|
+
Returns a structured ``IntentExtraction``. Never raises — failures
|
|
80
|
+
are returned as ``confidence="low"`` with ``error`` populated so the
|
|
81
|
+
caller can fall back gracefully.
|
|
82
|
+
"""
|
|
83
|
+
if not steps and not events:
|
|
84
|
+
return IntentExtraction(
|
|
85
|
+
confidence="low",
|
|
86
|
+
error="no steps or events to analyse",
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
summary = _summarise_events(events, steps)
|
|
90
|
+
abstracted = _abstract_step_titles(steps)
|
|
91
|
+
|
|
92
|
+
payload = {
|
|
93
|
+
"session_name": session_name,
|
|
94
|
+
"platform": platform_info,
|
|
95
|
+
"event_summary": summary,
|
|
96
|
+
"abstracted_step_descriptions": abstracted,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
prompt = _load_prompt()
|
|
101
|
+
except Exception as exc:
|
|
102
|
+
logger.warning("[IntentExtractor] could not load prompt: %s", exc)
|
|
103
|
+
return IntentExtraction(confidence="low", error=str(exc))
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
client = OpenAI(api_key=api_key, base_url=base_url)
|
|
107
|
+
response = client.chat.completions.create(
|
|
108
|
+
model=model,
|
|
109
|
+
messages=[
|
|
110
|
+
{"role": "system", "content": prompt},
|
|
111
|
+
{"role": "user", "content": json.dumps(payload, default=str)},
|
|
112
|
+
],
|
|
113
|
+
temperature=0.1,
|
|
114
|
+
max_tokens=1024,
|
|
115
|
+
response_format={"type": "json_object"},
|
|
116
|
+
)
|
|
117
|
+
raw = response.choices[0].message.content or "{}"
|
|
118
|
+
except Exception as exc:
|
|
119
|
+
logger.warning("[IntentExtractor] LLM call failed: %s", exc)
|
|
120
|
+
return IntentExtraction(confidence="low", error=str(exc))
|
|
121
|
+
|
|
122
|
+
return _parse_response(raw)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def write_intent_json(intent: IntentExtraction, session_dir: Path) -> Path:
|
|
126
|
+
"""Persist the extraction to ``<session_dir>/intent.json``."""
|
|
127
|
+
target = Path(session_dir) / "intent.json"
|
|
128
|
+
try:
|
|
129
|
+
data = asdict(intent)
|
|
130
|
+
# Drop the transient error field on usable extractions — operators
|
|
131
|
+
# looking at the file want signal, not noise. Failed/low-confidence
|
|
132
|
+
# extractions keep the error so the failure mode is visible.
|
|
133
|
+
if intent.is_usable:
|
|
134
|
+
data.pop("error", None)
|
|
135
|
+
target.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
136
|
+
logger.info(
|
|
137
|
+
"[IntentExtractor] wrote %s (confidence=%s)",
|
|
138
|
+
target, intent.confidence,
|
|
139
|
+
)
|
|
140
|
+
except Exception:
|
|
141
|
+
logger.exception("[IntentExtractor] failed to write intent.json")
|
|
142
|
+
return target
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def read_intent_json(session_dir: Path) -> Optional[IntentExtraction]:
|
|
146
|
+
"""Load a previously-written intent.json; returns None if absent or
|
|
147
|
+
unparseable."""
|
|
148
|
+
target = Path(session_dir) / "intent.json"
|
|
149
|
+
if not target.exists():
|
|
150
|
+
return None
|
|
151
|
+
try:
|
|
152
|
+
data = json.loads(target.read_text(encoding="utf-8"))
|
|
153
|
+
if not isinstance(data, dict):
|
|
154
|
+
return None
|
|
155
|
+
return IntentExtraction(
|
|
156
|
+
intent = str(data.get("intent", ""))[:500],
|
|
157
|
+
expected_outcome = str(data.get("expected_outcome", ""))[:500],
|
|
158
|
+
success_signal = str(data.get("success_signal", ""))[:500],
|
|
159
|
+
abstracted_steps = [str(s)[:200] for s in (data.get("abstracted_steps") or [])][:12],
|
|
160
|
+
confidence = str(data.get("confidence", "low")),
|
|
161
|
+
error = data.get("error"),
|
|
162
|
+
)
|
|
163
|
+
except Exception:
|
|
164
|
+
logger.debug("[IntentExtractor] read failed", exc_info=True)
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
169
|
+
# Internals
|
|
170
|
+
|
|
171
|
+
def _load_prompt() -> str:
|
|
172
|
+
"""Read extract_intent.md from the colocated prompts/ folder."""
|
|
173
|
+
here = Path(__file__).resolve().parent
|
|
174
|
+
return (here / "prompts" / "extract_intent.md").read_text(encoding="utf-8")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _summarise_events(events: List[Any], steps: List[Step]) -> Dict[str, Any]:
|
|
178
|
+
"""Compact event statistics for the LLM payload — keeps token cost
|
|
179
|
+
bounded while preserving signal."""
|
|
180
|
+
apps: Counter = Counter()
|
|
181
|
+
files_created: List[str] = []
|
|
182
|
+
files_modified: List[str] = []
|
|
183
|
+
urls: List[str] = []
|
|
184
|
+
clipboard_count = 0
|
|
185
|
+
|
|
186
|
+
for ev in events or []:
|
|
187
|
+
app = getattr(ev, "application", None)
|
|
188
|
+
if app:
|
|
189
|
+
apps[app] += 1
|
|
190
|
+
|
|
191
|
+
action = getattr(ev, "action", None)
|
|
192
|
+
if action == EventAction.FILE_CREATED:
|
|
193
|
+
p = getattr(ev, "file_path", None)
|
|
194
|
+
if p:
|
|
195
|
+
files_created.append(str(p))
|
|
196
|
+
elif action == EventAction.FILE_MODIFIED:
|
|
197
|
+
p = getattr(ev, "file_path", None)
|
|
198
|
+
if p:
|
|
199
|
+
files_modified.append(str(p))
|
|
200
|
+
|
|
201
|
+
if getattr(ev, "category", None) == EventCategory.CLIPBOARD:
|
|
202
|
+
clipboard_count += 1
|
|
203
|
+
|
|
204
|
+
url = getattr(ev, "url", None) or (getattr(ev, "data", {}) or {}).get("url")
|
|
205
|
+
if url:
|
|
206
|
+
urls.append(str(url))
|
|
207
|
+
|
|
208
|
+
# Keep only the most-used apps and dedup file/URL lists; cap sizes.
|
|
209
|
+
top_apps = [a for a, _ in apps.most_common(8)]
|
|
210
|
+
|
|
211
|
+
return {
|
|
212
|
+
"applications_used": top_apps,
|
|
213
|
+
"files_created": _dedup_cap(files_created, cap=10),
|
|
214
|
+
"files_modified": _dedup_cap(files_modified, cap=10),
|
|
215
|
+
"urls_visited": _dedup_cap(urls, cap=10),
|
|
216
|
+
"clipboard_actions": clipboard_count,
|
|
217
|
+
"step_count": len(steps or []),
|
|
218
|
+
"total_events": len(events or []),
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _abstract_step_titles(steps: List[Step]) -> List[str]:
|
|
223
|
+
"""Produce one short line per detected step. We deliberately keep app
|
|
224
|
+
hints out where possible — the prompt will further abstract them."""
|
|
225
|
+
out: List[str] = []
|
|
226
|
+
for s in (steps or [])[:20]:
|
|
227
|
+
# Prefer user-provided label, else fall back to primary_app + brief
|
|
228
|
+
# event-type breakdown. Capped per step.
|
|
229
|
+
if getattr(s, "label", None):
|
|
230
|
+
out.append(f"Step {s.step_number}: {str(s.label)[:140]}")
|
|
231
|
+
continue
|
|
232
|
+
|
|
233
|
+
counts = getattr(s, "event_summary", None) or {}
|
|
234
|
+
parts: List[str] = []
|
|
235
|
+
if counts.get("file"):
|
|
236
|
+
parts.append(f"{counts['file']} file ops")
|
|
237
|
+
if counts.get("clipboard"):
|
|
238
|
+
parts.append(f"{counts['clipboard']} clipboard ops")
|
|
239
|
+
if counts.get("process"):
|
|
240
|
+
parts.append(f"{counts['process']} process ops")
|
|
241
|
+
if counts.get("window"):
|
|
242
|
+
parts.append(f"{counts['window']} window switches")
|
|
243
|
+
|
|
244
|
+
out.append(
|
|
245
|
+
f"Step {s.step_number}: "
|
|
246
|
+
+ (", ".join(parts) if parts else "activity observed")
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
return out
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _parse_response(raw: str) -> IntentExtraction:
|
|
253
|
+
"""Tolerant JSON parser — strips code fences if the model added them."""
|
|
254
|
+
text = (raw or "").strip()
|
|
255
|
+
if text.startswith("```"):
|
|
256
|
+
# Strip code fence
|
|
257
|
+
lines = text.split("\n")
|
|
258
|
+
# Drop opening fence
|
|
259
|
+
if lines and lines[0].startswith("```"):
|
|
260
|
+
lines = lines[1:]
|
|
261
|
+
if lines and lines[-1].startswith("```"):
|
|
262
|
+
lines = lines[:-1]
|
|
263
|
+
text = "\n".join(lines).strip()
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
data = json.loads(text)
|
|
267
|
+
except json.JSONDecodeError as exc:
|
|
268
|
+
logger.warning("[IntentExtractor] could not parse model output: %s", exc)
|
|
269
|
+
return IntentExtraction(confidence="low", error=f"parse_error: {exc}")
|
|
270
|
+
|
|
271
|
+
if not isinstance(data, dict):
|
|
272
|
+
return IntentExtraction(confidence="low", error="response was not a JSON object")
|
|
273
|
+
|
|
274
|
+
return IntentExtraction(
|
|
275
|
+
intent = str(data.get("intent", ""))[:500],
|
|
276
|
+
expected_outcome = str(data.get("expected_outcome", ""))[:500],
|
|
277
|
+
success_signal = str(data.get("success_signal", ""))[:500],
|
|
278
|
+
abstracted_steps = [str(s)[:200] for s in (data.get("abstracted_steps") or [])][:12],
|
|
279
|
+
confidence = str(data.get("confidence", "low")).lower(),
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _dedup_cap(items: List[str], *, cap: int) -> List[str]:
|
|
284
|
+
seen, out = set(), []
|
|
285
|
+
for x in items:
|
|
286
|
+
if x in seen:
|
|
287
|
+
continue
|
|
288
|
+
seen.add(x)
|
|
289
|
+
out.append(x)
|
|
290
|
+
if len(out) >= cap:
|
|
291
|
+
break
|
|
292
|
+
return out
|