terx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- terx/__init__.py +10 -0
- terx/agent/__init__.py +3 -0
- terx/agent/healer.py +67 -0
- terx/cache/__init__.py +9 -0
- terx/cache/cache.py +598 -0
- terx/cdp/__init__.py +4 -0
- terx/cdp/bridge.py +195 -0
- terx/cdp/session.py +181 -0
- terx/dom/__init__.py +3 -0
- terx/dom/extractor.py +250 -0
- terx/server/__init__.py +3 -0
- terx/server/mcp.py +430 -0
- terx/vision/__init__.py +10 -0
- terx/vision/ssim.py +30 -0
- terx-0.1.0.dist-info/METADATA +359 -0
- terx-0.1.0.dist-info/RECORD +18 -0
- terx-0.1.0.dist-info/WHEEL +4 -0
- terx-0.1.0.dist-info/entry_points.txt +3 -0
terx/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TERX — Memory layer for browser agents.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from terx.cdp.bridge import CDPBridge
|
|
6
|
+
from terx.cdp.session import BrowserSession
|
|
7
|
+
from terx.cache.cache import MuscleMemorycache
|
|
8
|
+
|
|
9
|
+
__version__ = "0.1.0"
|
|
10
|
+
__all__ = ["CDPBridge", "BrowserSession", "MuscleMemorycache"]
|
terx/agent/__init__.py
ADDED
terx/agent/healer.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
import litellm
|
|
8
|
+
HAS_LITELLM = True
|
|
9
|
+
except ImportError:
|
|
10
|
+
HAS_LITELLM = False
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
class SelfHealer:
|
|
15
|
+
"""
|
|
16
|
+
LLM-powered self-healing fallback for broken CDP replays.
|
|
17
|
+
Uses litellm to reason about the DOM changes and propose new CDP parameters.
|
|
18
|
+
"""
|
|
19
|
+
def __init__(self, model_name: str = "gpt-4o"):
|
|
20
|
+
self.model_name = model_name
|
|
21
|
+
|
|
22
|
+
async def heal_command(self, failed_method: str, old_params: dict, current_dom: list, task_desc: str) -> Optional[dict]:
|
|
23
|
+
"""
|
|
24
|
+
Takes the failed command and the current DOM state,
|
|
25
|
+
and asks the LLM to fix the parameters (e.g., providing a new backendNodeId).
|
|
26
|
+
"""
|
|
27
|
+
if not HAS_LITELLM:
|
|
28
|
+
logger.warning("litellm is not installed. Self-healing is disabled.")
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
if not os.environ.get("OPENAI_API_KEY") and not os.environ.get("ANTHROPIC_API_KEY"):
|
|
32
|
+
logger.warning("No API key found for self-healing (needs OPENAI_API_KEY or ANTHROPIC_API_KEY).")
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
prompt = f"""
|
|
36
|
+
You are an autonomous browser agent memory layer. A previously recorded sequence failed during replay due to DOM drift.
|
|
37
|
+
|
|
38
|
+
Task Context: {task_desc}
|
|
39
|
+
Failed CDP Method: {failed_method}
|
|
40
|
+
Old Parameters: {json.dumps(old_params)}
|
|
41
|
+
|
|
42
|
+
Current DOM State (Interactable Elements):
|
|
43
|
+
{json.dumps([{"id": el.id, "role": el.role, "label": el.label, "backendDOMNodeId": el.backend_dom_id} for el in current_dom], indent=2)}
|
|
44
|
+
|
|
45
|
+
Your job is to find the correct new parameters for this CDP method.
|
|
46
|
+
If it was a click or focus on a specific element, identify the new element's backendNodeId from the current DOM based on the role and label that best matches the intent.
|
|
47
|
+
|
|
48
|
+
Return ONLY valid JSON with the new parameters. Do not include markdown blocks.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
response = await litellm.acompletion(
|
|
53
|
+
model=self.model_name,
|
|
54
|
+
messages=[{"role": "user", "content": prompt}],
|
|
55
|
+
response_format={"type": "json_object"}
|
|
56
|
+
)
|
|
57
|
+
content = response.choices[0].message.content
|
|
58
|
+
# Clean up markdown if present
|
|
59
|
+
if content.startswith("```json"):
|
|
60
|
+
content = content.strip()[7:-3]
|
|
61
|
+
elif content.startswith("```"):
|
|
62
|
+
content = content.strip()[3:-3]
|
|
63
|
+
|
|
64
|
+
return json.loads(content)
|
|
65
|
+
except Exception as e:
|
|
66
|
+
logger.error("Self-healing prediction failed: %s", e)
|
|
67
|
+
return None
|
terx/cache/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from terx.cache.cache import (
|
|
2
|
+
MuscleMemorycache, CDPCommand, CachedSequence,
|
|
3
|
+
ReplayCostLedger, CacheReplayError, session_for
|
|
4
|
+
)
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"MuscleMemorycache", "CDPCommand", "CachedSequence",
|
|
8
|
+
"ReplayCostLedger", "CacheReplayError", "session_for"
|
|
9
|
+
]
|
terx/cache/cache.py
ADDED
|
@@ -0,0 +1,598 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Muscle Memory Cache — the core of TERX.
|
|
3
|
+
|
|
4
|
+
Records successful CDP action sequences keyed by (domain, structural_hash, task).
|
|
5
|
+
On cache hit: replays raw CDP commands directly — zero LLM tokens.
|
|
6
|
+
On cache miss: lets the agent reason normally, then caches the result.
|
|
7
|
+
|
|
8
|
+
Writes sessions in .vcr format (compatible with Agent VCR).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import hashlib
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import sqlite3
|
|
18
|
+
import time
|
|
19
|
+
from dataclasses import asdict, dataclass
|
|
20
|
+
from datetime import datetime, timezone
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
from urllib.parse import urlparse
|
|
24
|
+
|
|
25
|
+
from terx.cdp.bridge import CDPBridge
|
|
26
|
+
from terx.dom.extractor import DOMExtractor, DOMSnapshot, hash_similarity
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
# Cache hit threshold — role sequences more similar than this are treated as the same page
|
|
31
|
+
SIMILARITY_THRESHOLD = 0.85
|
|
32
|
+
SSIM_THRESHOLD = 0.85
|
|
33
|
+
VCR_DIR = Path(".vcr")
|
|
34
|
+
SCREENSHOT_DIR = Path(".terx/screenshots")
|
|
35
|
+
|
|
36
|
+
MUTATING_CDP_METHODS = {
|
|
37
|
+
"Page.navigate",
|
|
38
|
+
"Input.dispatchMouseEvent",
|
|
39
|
+
"Input.dispatchKeyEvent",
|
|
40
|
+
"Input.insertText",
|
|
41
|
+
"DOM.focus",
|
|
42
|
+
"Runtime.evaluate",
|
|
43
|
+
"Runtime.callFunctionOn",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Methods used internally by TERX that should NOT be recorded
|
|
47
|
+
_INTERNAL_METHODS = {
|
|
48
|
+
"Accessibility.getFullAXTree",
|
|
49
|
+
"Page.captureScreenshot",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class CDPCommand:
|
|
55
|
+
"""A single recorded CDP command."""
|
|
56
|
+
method: str
|
|
57
|
+
params: dict
|
|
58
|
+
result: dict
|
|
59
|
+
latency_ms: float
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class CachedSequence:
|
|
64
|
+
"""A cached action sequence for one successful task."""
|
|
65
|
+
domain: str
|
|
66
|
+
structural_hash: str
|
|
67
|
+
task_key: str
|
|
68
|
+
task_description: str
|
|
69
|
+
commands: list[CDPCommand]
|
|
70
|
+
hit_count: int
|
|
71
|
+
created_at: str
|
|
72
|
+
last_used: str
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class ReplayCostLedger:
|
|
77
|
+
"""Tracks savings from a cache replay."""
|
|
78
|
+
task_description: str
|
|
79
|
+
hit: bool
|
|
80
|
+
commands_replayed: int
|
|
81
|
+
estimated_llm_calls_saved: int
|
|
82
|
+
latency_ms: float
|
|
83
|
+
run_number: int
|
|
84
|
+
|
|
85
|
+
def __str__(self) -> str:
|
|
86
|
+
if self.hit:
|
|
87
|
+
return (
|
|
88
|
+
f"💾 Cache HIT · {self.commands_replayed} commands · "
|
|
89
|
+
f"{self.latency_ms:.0f}ms · "
|
|
90
|
+
f"~{self.estimated_llm_calls_saved} LLM calls saved · "
|
|
91
|
+
f"run #{self.run_number}"
|
|
92
|
+
)
|
|
93
|
+
return f"🔍 Cache MISS · run #{self.run_number} (learning...)"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class MuscleMemorycache:
|
|
97
|
+
"""
|
|
98
|
+
The TERX muscle memory cache.
|
|
99
|
+
|
|
100
|
+
Usage:
|
|
101
|
+
cache = MuscleMemorycache()
|
|
102
|
+
|
|
103
|
+
# Wrap the agent call
|
|
104
|
+
async with cache.session(browser, task="login to salesforce") as ctx:
|
|
105
|
+
if ctx.hit:
|
|
106
|
+
# Cached path — replay CDP commands directly
|
|
107
|
+
await ctx.replay()
|
|
108
|
+
else:
|
|
109
|
+
# New path — run your agent normally
|
|
110
|
+
await my_agent.run(task)
|
|
111
|
+
# TERX records the CDP stream automatically
|
|
112
|
+
|
|
113
|
+
print(ctx.ledger)
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
def __init__(
|
|
117
|
+
self,
|
|
118
|
+
db_path: str | Path = ".terx/cache.db",
|
|
119
|
+
vcr_dir: str | Path = ".vcr",
|
|
120
|
+
similarity_threshold: float = SIMILARITY_THRESHOLD,
|
|
121
|
+
) -> None:
|
|
122
|
+
self.db_path = Path(db_path)
|
|
123
|
+
self.vcr_dir = Path(vcr_dir)
|
|
124
|
+
self.similarity_threshold = similarity_threshold
|
|
125
|
+
self._db: sqlite3.Connection | None = None
|
|
126
|
+
|
|
127
|
+
# ------------------------------------------------------------------ #
|
|
128
|
+
# Setup #
|
|
129
|
+
# ------------------------------------------------------------------ #
|
|
130
|
+
|
|
131
|
+
def _ensure_db(self) -> sqlite3.Connection:
|
|
132
|
+
if self._db is not None:
|
|
133
|
+
return self._db
|
|
134
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
135
|
+
db = sqlite3.connect(self.db_path, check_same_thread=False)
|
|
136
|
+
db.execute("PRAGMA journal_mode=WAL")
|
|
137
|
+
db.execute("""
|
|
138
|
+
CREATE TABLE IF NOT EXISTS sequences (
|
|
139
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
140
|
+
domain TEXT NOT NULL,
|
|
141
|
+
structural_hash TEXT NOT NULL,
|
|
142
|
+
task_key TEXT NOT NULL,
|
|
143
|
+
task_description TEXT NOT NULL,
|
|
144
|
+
role_sequence TEXT NOT NULL DEFAULT '',
|
|
145
|
+
commands_json TEXT NOT NULL,
|
|
146
|
+
hit_count INTEGER NOT NULL DEFAULT 0,
|
|
147
|
+
created_at TEXT NOT NULL,
|
|
148
|
+
last_used TEXT NOT NULL,
|
|
149
|
+
UNIQUE(domain, structural_hash, task_key)
|
|
150
|
+
)
|
|
151
|
+
""")
|
|
152
|
+
db.execute(
|
|
153
|
+
"CREATE INDEX IF NOT EXISTS idx_domain_task ON sequences(domain, task_key)"
|
|
154
|
+
)
|
|
155
|
+
db.commit()
|
|
156
|
+
self._db = db
|
|
157
|
+
return db
|
|
158
|
+
|
|
159
|
+
# ------------------------------------------------------------------ #
|
|
160
|
+
# Core cache operations #
|
|
161
|
+
# ------------------------------------------------------------------ #
|
|
162
|
+
|
|
163
|
+
def lookup(
|
|
164
|
+
self, domain: str, role_sequence: str, task_description: str
|
|
165
|
+
) -> CachedSequence | None:
|
|
166
|
+
"""
|
|
167
|
+
Find a cached sequence for the given domain + DOM structure + task.
|
|
168
|
+
Uses Levenshtein distance on role sequences for fuzzy DOM matching.
|
|
169
|
+
Task key is derived from the normalized task description.
|
|
170
|
+
"""
|
|
171
|
+
db = self._ensure_db()
|
|
172
|
+
task_key = _task_key(task_description)
|
|
173
|
+
rows = db.execute(
|
|
174
|
+
"SELECT structural_hash, task_description, commands_json, "
|
|
175
|
+
"hit_count, created_at, last_used, role_sequence, task_key "
|
|
176
|
+
"FROM sequences WHERE domain = ? AND task_key = ?",
|
|
177
|
+
(domain, task_key)
|
|
178
|
+
).fetchall()
|
|
179
|
+
|
|
180
|
+
best_match: tuple[float, Any] | None = None
|
|
181
|
+
for row in rows:
|
|
182
|
+
cached_role_seq = row[6]
|
|
183
|
+
sim = hash_similarity(role_sequence, cached_role_seq)
|
|
184
|
+
if sim >= self.similarity_threshold:
|
|
185
|
+
if best_match is None or sim > best_match[0]:
|
|
186
|
+
best_match = (sim, row)
|
|
187
|
+
|
|
188
|
+
if best_match is None:
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
_, row = best_match
|
|
192
|
+
commands = [CDPCommand(**c) for c in json.loads(row[2])]
|
|
193
|
+
return CachedSequence(
|
|
194
|
+
domain=domain,
|
|
195
|
+
structural_hash=row[0],
|
|
196
|
+
task_key=row[7],
|
|
197
|
+
task_description=row[1],
|
|
198
|
+
commands=commands,
|
|
199
|
+
hit_count=row[3],
|
|
200
|
+
created_at=row[4],
|
|
201
|
+
last_used=row[5],
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def store(
|
|
205
|
+
self,
|
|
206
|
+
domain: str,
|
|
207
|
+
structural_hash: str,
|
|
208
|
+
role_sequence: str,
|
|
209
|
+
task_description: str,
|
|
210
|
+
commands: list[CDPCommand],
|
|
211
|
+
) -> None:
|
|
212
|
+
"""Persist a successful action sequence."""
|
|
213
|
+
db = self._ensure_db()
|
|
214
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
215
|
+
commands_json = json.dumps([asdict(c) for c in commands])
|
|
216
|
+
task_key = _task_key(task_description)
|
|
217
|
+
|
|
218
|
+
db.execute(
|
|
219
|
+
"""
|
|
220
|
+
INSERT INTO sequences
|
|
221
|
+
(domain, structural_hash, task_key, task_description,
|
|
222
|
+
role_sequence, commands_json, hit_count, created_at, last_used)
|
|
223
|
+
VALUES (?, ?, ?, ?, ?, ?, 0, ?, ?)
|
|
224
|
+
ON CONFLICT(domain, structural_hash, task_key) DO UPDATE SET
|
|
225
|
+
commands_json = excluded.commands_json,
|
|
226
|
+
task_description = excluded.task_description,
|
|
227
|
+
role_sequence = excluded.role_sequence,
|
|
228
|
+
last_used = excluded.last_used
|
|
229
|
+
""",
|
|
230
|
+
(domain, structural_hash, task_key, task_description,
|
|
231
|
+
role_sequence, commands_json, now, now),
|
|
232
|
+
)
|
|
233
|
+
db.commit()
|
|
234
|
+
logger.info(
|
|
235
|
+
"Cached %d commands for domain=%s task=%s hash=%.8s",
|
|
236
|
+
len(commands), domain, task_key, structural_hash
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
def increment_hit(self, domain: str, structural_hash: str, task_key: str) -> None:
|
|
240
|
+
"""Increment the hit counter for a cached sequence."""
|
|
241
|
+
db = self._ensure_db()
|
|
242
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
243
|
+
db.execute(
|
|
244
|
+
"UPDATE sequences SET hit_count = hit_count + 1, last_used = ? "
|
|
245
|
+
"WHERE domain = ? AND structural_hash = ? AND task_key = ?",
|
|
246
|
+
(now, domain, structural_hash, task_key),
|
|
247
|
+
)
|
|
248
|
+
db.commit()
|
|
249
|
+
|
|
250
|
+
def invalidate(self, domain: str) -> int:
|
|
251
|
+
"""Remove all cached sequences for a domain. Returns rows deleted."""
|
|
252
|
+
db = self._ensure_db()
|
|
253
|
+
cursor = db.execute(
|
|
254
|
+
"DELETE FROM sequences WHERE domain = ?", (domain,)
|
|
255
|
+
)
|
|
256
|
+
db.commit()
|
|
257
|
+
return cursor.rowcount
|
|
258
|
+
|
|
259
|
+
def stats(self) -> dict:
|
|
260
|
+
"""Return cache statistics."""
|
|
261
|
+
db = self._ensure_db()
|
|
262
|
+
total = db.execute("SELECT COUNT(*) FROM sequences").fetchone()[0]
|
|
263
|
+
hits = db.execute("SELECT SUM(hit_count) FROM sequences").fetchone()[0] or 0
|
|
264
|
+
domains = db.execute(
|
|
265
|
+
"SELECT COUNT(DISTINCT domain) FROM sequences"
|
|
266
|
+
).fetchone()[0]
|
|
267
|
+
return {"total_sequences": total, "total_hits": hits, "domains": domains}
|
|
268
|
+
|
|
269
|
+
# ------------------------------------------------------------------ #
|
|
270
|
+
# VCR-format writer (compatible with Agent VCR) #
|
|
271
|
+
# ------------------------------------------------------------------ #
|
|
272
|
+
|
|
273
|
+
def write_vcr(
|
|
274
|
+
self,
|
|
275
|
+
session_id: str,
|
|
276
|
+
task_description: str,
|
|
277
|
+
commands: list[CDPCommand],
|
|
278
|
+
domain: str,
|
|
279
|
+
was_cache_hit: bool,
|
|
280
|
+
) -> Path:
|
|
281
|
+
"""
|
|
282
|
+
Write a browser session in .vcr JSONL format.
|
|
283
|
+
Compatible with Agent VCR's VCRPlayer.
|
|
284
|
+
|
|
285
|
+
Format:
|
|
286
|
+
{"type": "session", "data": {...}}
|
|
287
|
+
{"type": "frame", "data": {...}} ← one per CDP command
|
|
288
|
+
"""
|
|
289
|
+
self.vcr_dir.mkdir(parents=True, exist_ok=True)
|
|
290
|
+
vcr_path = self.vcr_dir / f"{session_id}.vcr"
|
|
291
|
+
|
|
292
|
+
with vcr_path.open("w") as f:
|
|
293
|
+
# Session header
|
|
294
|
+
session_record = {
|
|
295
|
+
"type": "session",
|
|
296
|
+
"data": {
|
|
297
|
+
"session_id": session_id,
|
|
298
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
299
|
+
"agent_type": "browser",
|
|
300
|
+
"tool": "terx",
|
|
301
|
+
"task": task_description,
|
|
302
|
+
"domain": domain,
|
|
303
|
+
"cache_hit": was_cache_hit,
|
|
304
|
+
"tags": ["browser", "terx", "cdp"],
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
f.write(json.dumps(session_record) + "\n")
|
|
308
|
+
|
|
309
|
+
# One frame per CDP command
|
|
310
|
+
for i, cmd in enumerate(commands):
|
|
311
|
+
frame = {
|
|
312
|
+
"type": "frame",
|
|
313
|
+
"data": {
|
|
314
|
+
"node_name": cmd.method.replace(".", "_").lower(),
|
|
315
|
+
"input_state": {
|
|
316
|
+
"cdp_method": cmd.method,
|
|
317
|
+
"cdp_params": cmd.params,
|
|
318
|
+
"frame_index": i,
|
|
319
|
+
},
|
|
320
|
+
"output_state": {
|
|
321
|
+
"cdp_result": cmd.result,
|
|
322
|
+
},
|
|
323
|
+
"metadata": {
|
|
324
|
+
"latency_ms": cmd.latency_ms,
|
|
325
|
+
"cache_hit": was_cache_hit,
|
|
326
|
+
"cdp_method": cmd.method,
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
f.write(json.dumps(frame) + "\n")
|
|
331
|
+
|
|
332
|
+
logger.info("Wrote .vcr session → %s (%d frames)", vcr_path, len(commands))
|
|
333
|
+
return vcr_path
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
# ------------------------------------------------------------------ #
|
|
337
|
+
# Recording context manager #
|
|
338
|
+
# ------------------------------------------------------------------ #
|
|
339
|
+
|
|
340
|
+
class RecordingContext:
|
|
341
|
+
"""
|
|
342
|
+
Context returned by session_for().
|
|
343
|
+
|
|
344
|
+
Records all CDP commands sent through the bridge.
|
|
345
|
+
On exit: stores them in the cache + writes .vcr file.
|
|
346
|
+
"""
|
|
347
|
+
|
|
348
|
+
def __init__(
|
|
349
|
+
self,
|
|
350
|
+
cache: MuscleMemorycache,
|
|
351
|
+
bridge: CDPBridge,
|
|
352
|
+
task: str,
|
|
353
|
+
session_id: str | None = None,
|
|
354
|
+
) -> None:
|
|
355
|
+
self._cache = cache
|
|
356
|
+
self._bridge = bridge
|
|
357
|
+
self._task = task
|
|
358
|
+
self._session_id = session_id or f"browser_session_{int(time.time())}"
|
|
359
|
+
self._snapshot: DOMSnapshot | None = None
|
|
360
|
+
self._domain: str = "unknown"
|
|
361
|
+
self._cached_seq: CachedSequence | None = None
|
|
362
|
+
self._run_number: int = 1
|
|
363
|
+
self._recorded_commands: list[CDPCommand] = []
|
|
364
|
+
self.ledger: ReplayCostLedger | None = None
|
|
365
|
+
|
|
366
|
+
@property
|
|
367
|
+
def hit(self) -> bool:
|
|
368
|
+
return self._cached_seq is not None
|
|
369
|
+
|
|
370
|
+
async def _wait_for_load(self, timeout: float = 2.0) -> None:
|
|
371
|
+
"""Wait for the page readyState to be complete to prevent race conditions."""
|
|
372
|
+
t0 = time.time()
|
|
373
|
+
while time.time() - t0 < timeout:
|
|
374
|
+
try:
|
|
375
|
+
# Use _ws directly to avoid triggering recorders for internal checks
|
|
376
|
+
self._bridge._id_counter += 1
|
|
377
|
+
cmd_id = self._bridge._id_counter
|
|
378
|
+
future = asyncio.get_running_loop().create_future()
|
|
379
|
+
self._bridge._pending[cmd_id] = future
|
|
380
|
+
import json as _json
|
|
381
|
+
await self._bridge._ws.send(_json.dumps({
|
|
382
|
+
"id": cmd_id,
|
|
383
|
+
"method": "Runtime.evaluate",
|
|
384
|
+
"params": {"expression": "document.readyState"}
|
|
385
|
+
}))
|
|
386
|
+
res = await asyncio.wait_for(future, timeout=1.0)
|
|
387
|
+
state = res.get("result", {}).get("value")
|
|
388
|
+
if state == "complete":
|
|
389
|
+
return
|
|
390
|
+
except Exception:
|
|
391
|
+
pass
|
|
392
|
+
await asyncio.sleep(0.05)
|
|
393
|
+
|
|
394
|
+
async def replay(self) -> None:
|
|
395
|
+
"""Replay the cached CDP command sequence directly. Zero LLM calls."""
|
|
396
|
+
if self._cached_seq is None:
|
|
397
|
+
raise RuntimeError("No cached sequence to replay (cache miss)")
|
|
398
|
+
|
|
399
|
+
t0 = time.perf_counter()
|
|
400
|
+
for cmd in self._cached_seq.commands:
|
|
401
|
+
if cmd.method in MUTATING_CDP_METHODS:
|
|
402
|
+
await self._wait_for_load()
|
|
403
|
+
try:
|
|
404
|
+
await self._bridge.send(cmd.method, cmd.params)
|
|
405
|
+
except Exception as exc:
|
|
406
|
+
logger.warning(
|
|
407
|
+
"Replay failed at %s: %s — attempting self-healing",
|
|
408
|
+
cmd.method, exc
|
|
409
|
+
)
|
|
410
|
+
from terx.agent.healer import SelfHealer
|
|
411
|
+
healer = SelfHealer()
|
|
412
|
+
extractor = DOMExtractor()
|
|
413
|
+
current_snapshot = await extractor.snapshot(self._bridge)
|
|
414
|
+
|
|
415
|
+
new_params = await healer.heal_command(
|
|
416
|
+
failed_method=cmd.method,
|
|
417
|
+
old_params=cmd.params,
|
|
418
|
+
current_dom=current_snapshot.elements,
|
|
419
|
+
task_desc=self._task
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
if new_params:
|
|
423
|
+
logger.info("Self-healing generated new params: %s", new_params)
|
|
424
|
+
try:
|
|
425
|
+
await self._bridge.send(cmd.method, new_params)
|
|
426
|
+
continue # Successfully healed
|
|
427
|
+
except Exception as e2:
|
|
428
|
+
logger.error("Healed parameters failed: %s", e2)
|
|
429
|
+
|
|
430
|
+
raise CacheReplayError(cmd.method) from exc
|
|
431
|
+
|
|
432
|
+
# --- Visual Audit (SSIM) ---
|
|
433
|
+
SCREENSHOT_DIR.mkdir(parents=True, exist_ok=True)
|
|
434
|
+
screenshot_path = SCREENSHOT_DIR / f"{self._cached_seq.structural_hash}.png"
|
|
435
|
+
|
|
436
|
+
if screenshot_path.exists():
|
|
437
|
+
try:
|
|
438
|
+
import base64 as _b64
|
|
439
|
+
result = await self._bridge.send("Page.captureScreenshot", {"format": "png"})
|
|
440
|
+
new_screenshot = _b64.b64decode(result.get("data", ""))
|
|
441
|
+
old_screenshot = screenshot_path.read_bytes()
|
|
442
|
+
|
|
443
|
+
from terx.vision.ssim import compute_ssim
|
|
444
|
+
ssim_score = compute_ssim(old_screenshot, new_screenshot)
|
|
445
|
+
logger.info("Visual Audit SSIM Score: %.3f", ssim_score)
|
|
446
|
+
|
|
447
|
+
if ssim_score < SSIM_THRESHOLD:
|
|
448
|
+
logger.warning("SSIM drift detected (%.3f < %.3f)! UI changed significantly.", ssim_score, SSIM_THRESHOLD)
|
|
449
|
+
# We still count it as a hit, but warn the agent.
|
|
450
|
+
except Exception as e:
|
|
451
|
+
logger.warning("Failed to run SSIM visual audit: %s", e)
|
|
452
|
+
|
|
453
|
+
latency = (time.perf_counter() - t0) * 1000
|
|
454
|
+
self._cache.increment_hit(
|
|
455
|
+
self._domain,
|
|
456
|
+
self._cached_seq.structural_hash,
|
|
457
|
+
self._cached_seq.task_key,
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
self.ledger = ReplayCostLedger(
|
|
461
|
+
task_description=self._task,
|
|
462
|
+
hit=True,
|
|
463
|
+
commands_replayed=len(self._cached_seq.commands),
|
|
464
|
+
estimated_llm_calls_saved=len(self._cached_seq.commands),
|
|
465
|
+
latency_ms=latency,
|
|
466
|
+
run_number=self._run_number,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
# Still write a .vcr file for the replay (for audit trail)
|
|
470
|
+
self._cache.write_vcr(
|
|
471
|
+
session_id=self._session_id,
|
|
472
|
+
task_description=self._task,
|
|
473
|
+
commands=self._cached_seq.commands,
|
|
474
|
+
domain=self._domain,
|
|
475
|
+
was_cache_hit=True,
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
def record_command(self, cmd: CDPCommand) -> None:
|
|
479
|
+
"""Manually record a command (legacy). Transparent proxy now auto-records."""
|
|
480
|
+
self._recorded_commands.append(cmd)
|
|
481
|
+
|
|
482
|
+
def _auto_record(self, method: str, params: dict, result: dict, latency: float) -> None:
|
|
483
|
+
"""Transparent interceptor: auto-captures mutating commands sent through the bridge."""
|
|
484
|
+
if method in MUTATING_CDP_METHODS:
|
|
485
|
+
cmd = CDPCommand(method=method, params=params, result=result, latency_ms=latency)
|
|
486
|
+
self._recorded_commands.append(cmd)
|
|
487
|
+
|
|
488
|
+
async def __aenter__(self) -> "RecordingContext":
|
|
489
|
+
# Capture DOM snapshot asynchronously on enter
|
|
490
|
+
extractor = DOMExtractor()
|
|
491
|
+
self._snapshot = await extractor.snapshot(self._bridge)
|
|
492
|
+
self._domain = urlparse(self._snapshot.url).netloc or "unknown"
|
|
493
|
+
|
|
494
|
+
# Update session_id with domain if using default
|
|
495
|
+
if self._session_id.startswith("browser_session_"):
|
|
496
|
+
self._session_id = f"browser_{self._domain}_{self._session_id.split('_')[-1]}"
|
|
497
|
+
|
|
498
|
+
# Lookup cached sequence
|
|
499
|
+
self._cached_seq = self._cache.lookup(self._domain, self._snapshot.role_sequence, self._task)
|
|
500
|
+
|
|
501
|
+
# Calculate run number
|
|
502
|
+
db = self._cache._ensure_db()
|
|
503
|
+
task_key = _task_key(self._task)
|
|
504
|
+
self._run_number = db.execute(
|
|
505
|
+
"SELECT COALESCE(SUM(hit_count) + COUNT(*), 1) "
|
|
506
|
+
"FROM sequences WHERE domain = ? AND task_key = ?",
|
|
507
|
+
(self._domain, task_key)
|
|
508
|
+
).fetchone()[0]
|
|
509
|
+
|
|
510
|
+
if not self.hit:
|
|
511
|
+
self._bridge.add_recorder(self._auto_record)
|
|
512
|
+
return self
|
|
513
|
+
|
|
514
|
+
async def __aexit__(self, exc_type: Any, *_: Any) -> None:
|
|
515
|
+
if not self.hit:
|
|
516
|
+
self._bridge.remove_recorder(self._auto_record)
|
|
517
|
+
|
|
518
|
+
if exc_type is not None:
|
|
519
|
+
return # Don't cache failed runs
|
|
520
|
+
|
|
521
|
+
if not self.hit and self._recorded_commands:
|
|
522
|
+
# Cache the new sequence
|
|
523
|
+
self._cache.store(
|
|
524
|
+
domain=self._domain,
|
|
525
|
+
structural_hash=self._snapshot.structural_hash,
|
|
526
|
+
role_sequence=self._snapshot.role_sequence,
|
|
527
|
+
task_description=self._task,
|
|
528
|
+
commands=self._recorded_commands,
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
# Save visual baseline for future SSIM checks
|
|
532
|
+
try:
|
|
533
|
+
import base64 as _b64
|
|
534
|
+
result = await self._bridge.send("Page.captureScreenshot", {"format": "png"})
|
|
535
|
+
screenshot_bytes = _b64.b64decode(result.get("data", ""))
|
|
536
|
+
SCREENSHOT_DIR.mkdir(parents=True, exist_ok=True)
|
|
537
|
+
(SCREENSHOT_DIR / f"{self._snapshot.structural_hash}.png").write_bytes(screenshot_bytes)
|
|
538
|
+
except Exception as e:
|
|
539
|
+
logger.warning("Failed to save baseline screenshot for SSIM: %s", e)
|
|
540
|
+
|
|
541
|
+
# Write .vcr file
|
|
542
|
+
self._cache.write_vcr(
|
|
543
|
+
session_id=self._session_id,
|
|
544
|
+
task_description=self._task,
|
|
545
|
+
commands=self._recorded_commands,
|
|
546
|
+
domain=self._domain,
|
|
547
|
+
was_cache_hit=False,
|
|
548
|
+
)
|
|
549
|
+
self.ledger = ReplayCostLedger(
|
|
550
|
+
task_description=self._task,
|
|
551
|
+
hit=False,
|
|
552
|
+
commands_replayed=0,
|
|
553
|
+
estimated_llm_calls_saved=0,
|
|
554
|
+
latency_ms=0,
|
|
555
|
+
run_number=self._run_number,
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def session_for(
|
|
560
|
+
cache: MuscleMemorycache,
|
|
561
|
+
bridge: CDPBridge,
|
|
562
|
+
task: str,
|
|
563
|
+
session_id: str | None = None,
|
|
564
|
+
) -> RecordingContext:
|
|
565
|
+
"""
|
|
566
|
+
Factory: create a RecordingContext for a task on the current page.
|
|
567
|
+
|
|
568
|
+
Example:
|
|
569
|
+
async with session_for(cache, bridge, "login to salesforce") as ctx:
|
|
570
|
+
if ctx.hit:
|
|
571
|
+
await ctx.replay()
|
|
572
|
+
else:
|
|
573
|
+
await bridge.send("Page.navigate", {"url": "..."})
|
|
574
|
+
print(ctx.ledger)
|
|
575
|
+
"""
|
|
576
|
+
return RecordingContext(
|
|
577
|
+
cache=cache,
|
|
578
|
+
bridge=bridge,
|
|
579
|
+
task=task,
|
|
580
|
+
session_id=session_id,
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def _task_key(task_description: str) -> str:
|
|
585
|
+
"""
|
|
586
|
+
Normalize task description into a cache key.
|
|
587
|
+
Lowercase, strip whitespace, hash to fixed length.
|
|
588
|
+
Two tasks that mean the same thing should produce the same key.
|
|
589
|
+
"""
|
|
590
|
+
normalized = task_description.lower().strip()
|
|
591
|
+
return hashlib.md5(normalized.encode()).hexdigest()[:16]
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
class CacheReplayError(Exception):
|
|
595
|
+
"""Raised when a cached CDP command fails during replay (DOM drift)."""
|
|
596
|
+
def __init__(self, failed_method: str) -> None:
|
|
597
|
+
self.failed_method = failed_method
|
|
598
|
+
super().__init__(f"Replay failed at CDP method: {failed_method}")
|
terx/cdp/__init__.py
ADDED