spider-browser 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spider_browser-0.1.0/.gitignore +22 -0
- spider_browser-0.1.0/PKG-INFO +12 -0
- spider_browser-0.1.0/pyproject.toml +27 -0
- spider_browser-0.1.0/spider_browser/__init__.py +41 -0
- spider_browser-0.1.0/spider_browser/ai/__init__.py +0 -0
- spider_browser-0.1.0/spider_browser/ai/act.py +41 -0
- spider_browser-0.1.0/spider_browser/ai/agent.py +440 -0
- spider_browser-0.1.0/spider_browser/ai/extract.py +69 -0
- spider_browser-0.1.0/spider_browser/ai/llm_provider.py +64 -0
- spider_browser-0.1.0/spider_browser/ai/observe.py +107 -0
- spider_browser-0.1.0/spider_browser/ai/prompts.py +133 -0
- spider_browser-0.1.0/spider_browser/ai/providers/__init__.py +0 -0
- spider_browser-0.1.0/spider_browser/ai/providers/anthropic_provider.py +92 -0
- spider_browser-0.1.0/spider_browser/ai/providers/openai_provider.py +55 -0
- spider_browser-0.1.0/spider_browser/events/__init__.py +0 -0
- spider_browser-0.1.0/spider_browser/events/emitter.py +40 -0
- spider_browser-0.1.0/spider_browser/events/types.py +5 -0
- spider_browser-0.1.0/spider_browser/page.py +611 -0
- spider_browser-0.1.0/spider_browser/protocol/__init__.py +0 -0
- spider_browser-0.1.0/spider_browser/protocol/bidi_session.py +194 -0
- spider_browser-0.1.0/spider_browser/protocol/cdp_session.py +353 -0
- spider_browser-0.1.0/spider_browser/protocol/protocol_adapter.py +286 -0
- spider_browser-0.1.0/spider_browser/protocol/transport.py +304 -0
- spider_browser-0.1.0/spider_browser/protocol/types.py +27 -0
- spider_browser-0.1.0/spider_browser/retry/__init__.py +0 -0
- spider_browser-0.1.0/spider_browser/retry/browser_selector.py +66 -0
- spider_browser-0.1.0/spider_browser/retry/failure_tracker.py +73 -0
- spider_browser-0.1.0/spider_browser/retry/retry_engine.py +419 -0
- spider_browser-0.1.0/spider_browser/spider_browser.py +278 -0
- spider_browser-0.1.0/spider_browser/utils/__init__.py +0 -0
- spider_browser-0.1.0/spider_browser/utils/dom.py +55 -0
- spider_browser-0.1.0/spider_browser/utils/errors.py +75 -0
- spider_browser-0.1.0/spider_browser/utils/html.py +10 -0
- spider_browser-0.1.0/spider_browser/utils/logger.py +22 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
node_modules/
|
|
2
|
+
dist/
|
|
3
|
+
*.tsbuildinfo
|
|
4
|
+
.env
|
|
5
|
+
.env.*
|
|
6
|
+
|
|
7
|
+
# Python
|
|
8
|
+
__pycache__/
|
|
9
|
+
*.pyc
|
|
10
|
+
*.pyo
|
|
11
|
+
*.egg-info/
|
|
12
|
+
build/
|
|
13
|
+
*.egg
|
|
14
|
+
|
|
15
|
+
# Rust
|
|
16
|
+
target/
|
|
17
|
+
|
|
18
|
+
# Test data (synced to spider-browser-dataset)
|
|
19
|
+
stealth-*.csv
|
|
20
|
+
stealth-*.json
|
|
21
|
+
automation-showcase*.csv
|
|
22
|
+
.claude/
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: spider-browser
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Browser automation client for Spider's pre-warmed browser fleet with smart retry and browser switching
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: httpx>=0.27.0
|
|
8
|
+
Requires-Dist: pydantic>=2.0
|
|
9
|
+
Requires-Dist: websockets>=12.0
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
12
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "spider-browser"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Browser automation client for Spider's pre-warmed browser fleet with smart retry and browser switching"
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"websockets>=12.0",
|
|
13
|
+
"pydantic>=2.0",
|
|
14
|
+
"httpx>=0.27.0",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[project.optional-dependencies]
|
|
18
|
+
dev = [
|
|
19
|
+
"pytest>=8.0",
|
|
20
|
+
"pytest-asyncio>=0.23",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[tool.hatch.build.targets.sdist]
|
|
24
|
+
include = ["spider_browser/", "LICENSE"]
|
|
25
|
+
|
|
26
|
+
[tool.hatch.build.targets.wheel]
|
|
27
|
+
packages = ["spider_browser"]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""spider-browser — Python browser automation client for Spider's pre-warmed browser fleet."""
|
|
2
|
+
|
|
3
|
+
from .spider_browser import SpiderBrowser, SpiderBrowserOptions
|
|
4
|
+
from .page import SpiderPage
|
|
5
|
+
from .events.types import BrowserType
|
|
6
|
+
from .events.emitter import SpiderEventEmitter
|
|
7
|
+
from .utils.errors import (
|
|
8
|
+
SpiderError,
|
|
9
|
+
ConnectionError as SpiderConnectionError,
|
|
10
|
+
AuthError,
|
|
11
|
+
RateLimitError,
|
|
12
|
+
BlockedError,
|
|
13
|
+
BackendUnavailableError,
|
|
14
|
+
TimeoutError as SpiderTimeoutError,
|
|
15
|
+
ProtocolError,
|
|
16
|
+
LLMError,
|
|
17
|
+
)
|
|
18
|
+
from .ai.llm_provider import LLMConfig, create_provider
|
|
19
|
+
from .ai.agent import Agent, AgentOptions, AgentResult
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"SpiderBrowser",
|
|
23
|
+
"SpiderBrowserOptions",
|
|
24
|
+
"SpiderPage",
|
|
25
|
+
"BrowserType",
|
|
26
|
+
"SpiderEventEmitter",
|
|
27
|
+
"SpiderError",
|
|
28
|
+
"SpiderConnectionError",
|
|
29
|
+
"AuthError",
|
|
30
|
+
"RateLimitError",
|
|
31
|
+
"BlockedError",
|
|
32
|
+
"BackendUnavailableError",
|
|
33
|
+
"SpiderTimeoutError",
|
|
34
|
+
"ProtocolError",
|
|
35
|
+
"LLMError",
|
|
36
|
+
"LLMConfig",
|
|
37
|
+
"create_provider",
|
|
38
|
+
"Agent",
|
|
39
|
+
"AgentOptions",
|
|
40
|
+
"AgentResult",
|
|
41
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""act() — single action from natural language."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
|
|
7
|
+
from ..protocol.protocol_adapter import ProtocolAdapter
|
|
8
|
+
from .llm_provider import LLMProvider
|
|
9
|
+
from .prompts import SYSTEM_PROMPT, build_user_message
|
|
10
|
+
from .agent import execute_action
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
async def act(
|
|
14
|
+
adapter: ProtocolAdapter,
|
|
15
|
+
llm: LLMProvider,
|
|
16
|
+
instruction: str,
|
|
17
|
+
) -> None:
|
|
18
|
+
"""
|
|
19
|
+
Execute a single action from natural language.
|
|
20
|
+
|
|
21
|
+
Takes a screenshot + HTML, sends to LLM with the instruction,
|
|
22
|
+
then executes the returned action steps.
|
|
23
|
+
"""
|
|
24
|
+
screenshot, html, url, title = await asyncio.gather(
|
|
25
|
+
adapter.capture_screenshot(),
|
|
26
|
+
adapter.get_html(),
|
|
27
|
+
adapter.evaluate("window.location.href"),
|
|
28
|
+
adapter.evaluate("document.title"),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
context = f"Task: {instruction}\nPAGE TITLE: {title}"
|
|
32
|
+
|
|
33
|
+
plan = await llm.chat_json([
|
|
34
|
+
{"role": "system", "content": SYSTEM_PROMPT},
|
|
35
|
+
{"role": "user", "content": build_user_message(str(url), str(html), str(screenshot), context)},
|
|
36
|
+
])
|
|
37
|
+
|
|
38
|
+
steps = plan.get("steps", []) if isinstance(plan, dict) else []
|
|
39
|
+
for step in steps:
|
|
40
|
+
await execute_action(adapter, step)
|
|
41
|
+
await asyncio.sleep(0.2)
|
|
@@ -0,0 +1,440 @@
|
|
|
1
|
+
"""Autonomous multi-step agent."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
from ..protocol.protocol_adapter import ProtocolAdapter
|
|
11
|
+
from ..events.emitter import SpiderEventEmitter
|
|
12
|
+
from ..utils.errors import TimeoutError
|
|
13
|
+
from ..utils.logger import logger
|
|
14
|
+
from .llm_provider import LLMProvider
|
|
15
|
+
from .prompts import SYSTEM_PROMPT, build_user_message
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class AgentOptions:
|
|
20
|
+
"""Agent configuration."""
|
|
21
|
+
|
|
22
|
+
max_rounds: int = 30
|
|
23
|
+
step_delay_ms: int = 1500
|
|
24
|
+
instruction: Optional[str] = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class AgentResult:
|
|
29
|
+
"""Agent execution result."""
|
|
30
|
+
|
|
31
|
+
done: bool
|
|
32
|
+
rounds: int
|
|
33
|
+
extracted: Any = None
|
|
34
|
+
label: str = ""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Agent:
|
|
38
|
+
"""
|
|
39
|
+
Autonomous multi-step agent.
|
|
40
|
+
|
|
41
|
+
Uses the same action vocabulary and system prompt as Spider's
|
|
42
|
+
server-side captcha solver.
|
|
43
|
+
|
|
44
|
+
Loop: screenshot -> HTML -> LLM -> parse plan -> execute actions -> repeat.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
adapter: ProtocolAdapter,
|
|
50
|
+
llm: LLMProvider,
|
|
51
|
+
emitter: SpiderEventEmitter,
|
|
52
|
+
options: Optional[AgentOptions] = None,
|
|
53
|
+
) -> None:
|
|
54
|
+
self._adapter = adapter
|
|
55
|
+
self._llm = llm
|
|
56
|
+
self._emitter = emitter
|
|
57
|
+
opts = options or AgentOptions()
|
|
58
|
+
self._max_rounds = opts.max_rounds
|
|
59
|
+
self._step_delay_s = opts.step_delay_ms / 1000.0
|
|
60
|
+
self._instruction = opts.instruction
|
|
61
|
+
|
|
62
|
+
async def execute(self, instruction: str) -> AgentResult:
|
|
63
|
+
"""Execute the agent loop until done or max rounds reached."""
|
|
64
|
+
extracted: Any = None
|
|
65
|
+
last_label = ""
|
|
66
|
+
|
|
67
|
+
await asyncio.sleep(0.5)
|
|
68
|
+
|
|
69
|
+
for round_num in range(self._max_rounds):
|
|
70
|
+
# 1. Screenshot
|
|
71
|
+
try:
|
|
72
|
+
screenshot = await self._adapter.capture_screenshot()
|
|
73
|
+
except Exception as err:
|
|
74
|
+
logger.warning(f"agent: screenshot failed round {round_num}: {err}")
|
|
75
|
+
break
|
|
76
|
+
|
|
77
|
+
# 2. HTML
|
|
78
|
+
try:
|
|
79
|
+
html = await self._adapter.get_html()
|
|
80
|
+
except Exception as err:
|
|
81
|
+
logger.warning(f"agent: get HTML failed round {round_num}: {err}")
|
|
82
|
+
break
|
|
83
|
+
|
|
84
|
+
# 3. URL and title
|
|
85
|
+
try:
|
|
86
|
+
url = str(await self._adapter.evaluate("window.location.href"))
|
|
87
|
+
except Exception:
|
|
88
|
+
url = "unknown"
|
|
89
|
+
try:
|
|
90
|
+
title = str(await self._adapter.evaluate("document.title"))
|
|
91
|
+
except Exception:
|
|
92
|
+
title = ""
|
|
93
|
+
|
|
94
|
+
# 4. Call LLM
|
|
95
|
+
context = f"Round {round_num + 1}/{self._max_rounds}. Task: {instruction}\nPAGE TITLE: {title}"
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
plan = await self._llm.chat_json([
|
|
99
|
+
{"role": "system", "content": SYSTEM_PROMPT},
|
|
100
|
+
{"role": "user", "content": build_user_message(url, html, screenshot, context)},
|
|
101
|
+
])
|
|
102
|
+
except Exception as err:
|
|
103
|
+
logger.warning(f"agent: LLM call failed round {round_num}: {err}")
|
|
104
|
+
await asyncio.sleep(2.0)
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
if not isinstance(plan, dict):
|
|
108
|
+
await asyncio.sleep(self._step_delay_s)
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
last_label = plan.get("label", "")
|
|
112
|
+
if plan.get("extracted") is not None:
|
|
113
|
+
extracted = plan["extracted"]
|
|
114
|
+
|
|
115
|
+
steps = plan.get("steps", [])
|
|
116
|
+
logger.info(f"agent: round {round_num + 1} label={last_label} done={plan.get('done')} steps={len(steps)}")
|
|
117
|
+
|
|
118
|
+
self._emitter.emit("agent.step", {
|
|
119
|
+
"round": round_num + 1,
|
|
120
|
+
"label": last_label,
|
|
121
|
+
"stepsCount": len(steps),
|
|
122
|
+
})
|
|
123
|
+
|
|
124
|
+
# 5. Check if done
|
|
125
|
+
if plan.get("done"):
|
|
126
|
+
self._emitter.emit("agent.done", {"rounds": round_num + 1, "result": extracted})
|
|
127
|
+
return AgentResult(done=True, rounds=round_num + 1, extracted=extracted, label=last_label)
|
|
128
|
+
|
|
129
|
+
if not steps:
|
|
130
|
+
logger.info("agent: no steps, retrying")
|
|
131
|
+
await asyncio.sleep(self._step_delay_s)
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
# 6. Execute steps
|
|
135
|
+
for i, action in enumerate(steps):
|
|
136
|
+
try:
|
|
137
|
+
await execute_action(self._adapter, action)
|
|
138
|
+
except Exception as err:
|
|
139
|
+
logger.warning(f"agent: action failed round {round_num} step {i}: {err}")
|
|
140
|
+
break
|
|
141
|
+
await asyncio.sleep(0.2)
|
|
142
|
+
|
|
143
|
+
# 7. Wait for page settle
|
|
144
|
+
await asyncio.sleep(self._step_delay_s)
|
|
145
|
+
|
|
146
|
+
logger.warning("agent: max rounds exceeded")
|
|
147
|
+
self._emitter.emit("agent.error", {"error": "max rounds exceeded", "round": self._max_rounds})
|
|
148
|
+
return AgentResult(done=False, rounds=self._max_rounds, extracted=extracted, label=last_label)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
# -------------------------------------------------------------------
|
|
152
|
+
# Action executor — mirrors agent.rs execute_action()
|
|
153
|
+
# -------------------------------------------------------------------
|
|
154
|
+
|
|
155
|
+
async def execute_action(adapter: ProtocolAdapter, action: Dict[str, Any]) -> None:
|
|
156
|
+
"""Execute a single agent action via the protocol adapter."""
|
|
157
|
+
|
|
158
|
+
# Click actions
|
|
159
|
+
if "Click" in action:
|
|
160
|
+
pos = await _get_element_center(adapter, action["Click"])
|
|
161
|
+
await adapter.click_point(pos["x"], pos["y"])
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
if "ClickAll" in action:
|
|
165
|
+
selector = action["ClickAll"]
|
|
166
|
+
points = await adapter.evaluate(f"""
|
|
167
|
+
(function() {{
|
|
168
|
+
const els = document.querySelectorAll({json.dumps(selector)});
|
|
169
|
+
return Array.from(els).map(el => {{
|
|
170
|
+
const r = el.getBoundingClientRect();
|
|
171
|
+
return {{ x: r.x + r.width / 2, y: r.y + r.height / 2 }};
|
|
172
|
+
}});
|
|
173
|
+
}})()
|
|
174
|
+
""")
|
|
175
|
+
if isinstance(points, list):
|
|
176
|
+
for pt in points:
|
|
177
|
+
if isinstance(pt, dict):
|
|
178
|
+
await adapter.click_point(pt["x"], pt["y"])
|
|
179
|
+
await asyncio.sleep(0.1)
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
if "ClickPoint" in action:
|
|
183
|
+
pt = action["ClickPoint"]
|
|
184
|
+
await adapter.click_point(pt["x"], pt["y"])
|
|
185
|
+
return
|
|
186
|
+
|
|
187
|
+
if "ClickHold" in action:
|
|
188
|
+
data = action["ClickHold"]
|
|
189
|
+
pos = await _get_element_center(adapter, data["selector"])
|
|
190
|
+
await adapter.click_hold_point(pos["x"], pos["y"], data.get("hold_ms", 500))
|
|
191
|
+
return
|
|
192
|
+
|
|
193
|
+
if "ClickHoldPoint" in action:
|
|
194
|
+
pt = action["ClickHoldPoint"]
|
|
195
|
+
await adapter.click_hold_point(pt["x"], pt["y"], pt.get("hold_ms", 500))
|
|
196
|
+
return
|
|
197
|
+
|
|
198
|
+
if "DoubleClick" in action:
|
|
199
|
+
pos = await _get_element_center(adapter, action["DoubleClick"])
|
|
200
|
+
await adapter.double_click_point(pos["x"], pos["y"])
|
|
201
|
+
return
|
|
202
|
+
|
|
203
|
+
if "DoubleClickPoint" in action:
|
|
204
|
+
pt = action["DoubleClickPoint"]
|
|
205
|
+
await adapter.double_click_point(pt["x"], pt["y"])
|
|
206
|
+
return
|
|
207
|
+
|
|
208
|
+
if "RightClick" in action:
|
|
209
|
+
pos = await _get_element_center(adapter, action["RightClick"])
|
|
210
|
+
await adapter.right_click_point(pos["x"], pos["y"])
|
|
211
|
+
return
|
|
212
|
+
|
|
213
|
+
if "RightClickPoint" in action:
|
|
214
|
+
pt = action["RightClickPoint"]
|
|
215
|
+
await adapter.right_click_point(pt["x"], pt["y"])
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
if "WaitForAndClick" in action:
|
|
219
|
+
selector = action["WaitForAndClick"]
|
|
220
|
+
await _wait_for_element(adapter, selector, 5000)
|
|
221
|
+
pos = await _get_element_center(adapter, selector)
|
|
222
|
+
await adapter.click_point(pos["x"], pos["y"])
|
|
223
|
+
return
|
|
224
|
+
|
|
225
|
+
# Drag actions
|
|
226
|
+
if "ClickDrag" in action:
|
|
227
|
+
data = action["ClickDrag"]
|
|
228
|
+
f = await _get_element_center(adapter, data["from"])
|
|
229
|
+
t = await _get_element_center(adapter, data["to"])
|
|
230
|
+
await adapter.drag_point(f["x"], f["y"], t["x"], t["y"])
|
|
231
|
+
return
|
|
232
|
+
|
|
233
|
+
if "ClickDragPoint" in action:
|
|
234
|
+
pt = action["ClickDragPoint"]
|
|
235
|
+
await adapter.drag_point(pt["from_x"], pt["from_y"], pt["to_x"], pt["to_y"])
|
|
236
|
+
return
|
|
237
|
+
|
|
238
|
+
# Input actions
|
|
239
|
+
if "Type" in action:
|
|
240
|
+
await adapter.insert_text(action["Type"]["value"])
|
|
241
|
+
return
|
|
242
|
+
|
|
243
|
+
if "Fill" in action:
|
|
244
|
+
data = action["Fill"]
|
|
245
|
+
selector = data["selector"]
|
|
246
|
+
value = data["value"]
|
|
247
|
+
await adapter.evaluate(f"""
|
|
248
|
+
(function() {{
|
|
249
|
+
const el = document.querySelector({json.dumps(selector)});
|
|
250
|
+
if (el) {{ el.focus(); el.value = ''; }}
|
|
251
|
+
}})()
|
|
252
|
+
""")
|
|
253
|
+
try:
|
|
254
|
+
pos = await _get_element_center(adapter, selector)
|
|
255
|
+
await adapter.click_point(pos["x"], pos["y"])
|
|
256
|
+
except Exception:
|
|
257
|
+
pass
|
|
258
|
+
await adapter.insert_text(value)
|
|
259
|
+
await adapter.evaluate(f"""
|
|
260
|
+
(function() {{
|
|
261
|
+
const el = document.querySelector({json.dumps(selector)});
|
|
262
|
+
if (el) {{
|
|
263
|
+
el.dispatchEvent(new Event('input', {{ bubbles: true }}));
|
|
264
|
+
el.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
|
265
|
+
}}
|
|
266
|
+
}})()
|
|
267
|
+
""")
|
|
268
|
+
return
|
|
269
|
+
|
|
270
|
+
if "Clear" in action:
|
|
271
|
+
selector = action["Clear"]
|
|
272
|
+
await adapter.evaluate(f"document.querySelector({json.dumps(selector)}).value = ''")
|
|
273
|
+
return
|
|
274
|
+
|
|
275
|
+
if "Press" in action:
|
|
276
|
+
await adapter.press_key(action["Press"])
|
|
277
|
+
return
|
|
278
|
+
|
|
279
|
+
if "KeyDown" in action:
|
|
280
|
+
await adapter.key_down(action["KeyDown"])
|
|
281
|
+
return
|
|
282
|
+
|
|
283
|
+
if "KeyUp" in action:
|
|
284
|
+
await adapter.key_up(action["KeyUp"])
|
|
285
|
+
return
|
|
286
|
+
|
|
287
|
+
# Select & Focus
|
|
288
|
+
if "Select" in action:
|
|
289
|
+
data = action["Select"]
|
|
290
|
+
selector = data["selector"]
|
|
291
|
+
value = data["value"]
|
|
292
|
+
await adapter.evaluate(f"""
|
|
293
|
+
(function() {{
|
|
294
|
+
const el = document.querySelector({json.dumps(selector)});
|
|
295
|
+
if (el) {{
|
|
296
|
+
el.value = {json.dumps(value)};
|
|
297
|
+
el.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
|
298
|
+
}}
|
|
299
|
+
}})()
|
|
300
|
+
""")
|
|
301
|
+
return
|
|
302
|
+
|
|
303
|
+
if "Focus" in action:
|
|
304
|
+
await adapter.evaluate(f"document.querySelector({json.dumps(action['Focus'])})?.focus()")
|
|
305
|
+
return
|
|
306
|
+
|
|
307
|
+
if "Blur" in action:
|
|
308
|
+
await adapter.evaluate(f"document.querySelector({json.dumps(action['Blur'])})?.blur()")
|
|
309
|
+
return
|
|
310
|
+
|
|
311
|
+
if "Hover" in action:
|
|
312
|
+
pos = await _get_element_center(adapter, action["Hover"])
|
|
313
|
+
await adapter.hover_point(pos["x"], pos["y"])
|
|
314
|
+
return
|
|
315
|
+
|
|
316
|
+
if "HoverPoint" in action:
|
|
317
|
+
pt = action["HoverPoint"]
|
|
318
|
+
await adapter.hover_point(pt["x"], pt["y"])
|
|
319
|
+
return
|
|
320
|
+
|
|
321
|
+
# Scroll actions
|
|
322
|
+
if "ScrollY" in action:
|
|
323
|
+
await adapter.evaluate(f"window.scrollBy(0, {action['ScrollY']})")
|
|
324
|
+
return
|
|
325
|
+
|
|
326
|
+
if "ScrollX" in action:
|
|
327
|
+
await adapter.evaluate(f"window.scrollBy({action['ScrollX']}, 0)")
|
|
328
|
+
return
|
|
329
|
+
|
|
330
|
+
if "ScrollTo" in action:
|
|
331
|
+
selector = action["ScrollTo"]["selector"] if isinstance(action["ScrollTo"], dict) else action["ScrollTo"]
|
|
332
|
+
await adapter.evaluate(
|
|
333
|
+
f"document.querySelector({json.dumps(selector)})?.scrollIntoView({{ behavior: 'smooth', block: 'center' }})"
|
|
334
|
+
)
|
|
335
|
+
return
|
|
336
|
+
|
|
337
|
+
if "ScrollToPoint" in action:
|
|
338
|
+
pt = action["ScrollToPoint"]
|
|
339
|
+
await adapter.evaluate(f"window.scrollTo({pt['x']}, {pt['y']})")
|
|
340
|
+
return
|
|
341
|
+
|
|
342
|
+
if "InfiniteScroll" in action:
|
|
343
|
+
count = action["InfiniteScroll"]
|
|
344
|
+
for _ in range(count):
|
|
345
|
+
await adapter.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
|
346
|
+
await asyncio.sleep(0.5)
|
|
347
|
+
return
|
|
348
|
+
|
|
349
|
+
# Wait actions
|
|
350
|
+
if "Wait" in action:
|
|
351
|
+
await asyncio.sleep(action["Wait"] / 1000.0)
|
|
352
|
+
return
|
|
353
|
+
|
|
354
|
+
if "WaitFor" in action:
|
|
355
|
+
await _wait_for_element(adapter, action["WaitFor"], 5000)
|
|
356
|
+
return
|
|
357
|
+
|
|
358
|
+
if "WaitForWithTimeout" in action:
|
|
359
|
+
data = action["WaitForWithTimeout"]
|
|
360
|
+
await _wait_for_element(adapter, data["selector"], data.get("timeout", 5000))
|
|
361
|
+
return
|
|
362
|
+
|
|
363
|
+
if "WaitForNavigation" in action:
|
|
364
|
+
await asyncio.sleep(1.0)
|
|
365
|
+
return
|
|
366
|
+
|
|
367
|
+
if "WaitForDom" in action:
|
|
368
|
+
timeout = action["WaitForDom"].get("timeout", 5000) if isinstance(action["WaitForDom"], dict) else 5000
|
|
369
|
+
await asyncio.sleep(timeout / 1000.0)
|
|
370
|
+
return
|
|
371
|
+
|
|
372
|
+
# Navigation actions
|
|
373
|
+
if "Navigate" in action:
|
|
374
|
+
await adapter.navigate(action["Navigate"])
|
|
375
|
+
return
|
|
376
|
+
|
|
377
|
+
if "GoBack" in action:
|
|
378
|
+
await adapter.evaluate("window.history.back()")
|
|
379
|
+
return
|
|
380
|
+
|
|
381
|
+
if "GoForward" in action:
|
|
382
|
+
await adapter.evaluate("window.history.forward()")
|
|
383
|
+
return
|
|
384
|
+
|
|
385
|
+
if "Reload" in action:
|
|
386
|
+
await adapter.evaluate("window.location.reload()")
|
|
387
|
+
return
|
|
388
|
+
|
|
389
|
+
# Viewport
|
|
390
|
+
if "SetViewport" in action:
|
|
391
|
+
data = action["SetViewport"]
|
|
392
|
+
await adapter.set_viewport(
|
|
393
|
+
data["width"],
|
|
394
|
+
data["height"],
|
|
395
|
+
data.get("device_scale_factor", 2.0),
|
|
396
|
+
data.get("mobile", False),
|
|
397
|
+
)
|
|
398
|
+
return
|
|
399
|
+
|
|
400
|
+
# JavaScript
|
|
401
|
+
if "Evaluate" in action:
|
|
402
|
+
await adapter.evaluate(action["Evaluate"])
|
|
403
|
+
return
|
|
404
|
+
|
|
405
|
+
# Screenshot (no-op — handled by agent loop)
|
|
406
|
+
if "Screenshot" in action:
|
|
407
|
+
return
|
|
408
|
+
|
|
409
|
+
logger.warning(f"agent: unknown action: {json.dumps(action)[:100]}")
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
# -------------------------------------------------------------------
|
|
413
|
+
# Helpers
|
|
414
|
+
# -------------------------------------------------------------------
|
|
415
|
+
|
|
416
|
+
async def _get_element_center(adapter: ProtocolAdapter, selector: str) -> Dict[str, float]:
|
|
417
|
+
result = await adapter.evaluate(f"""
|
|
418
|
+
(function() {{
|
|
419
|
+
const el = document.querySelector({json.dumps(selector)});
|
|
420
|
+
if (!el) return null;
|
|
421
|
+
el.scrollIntoView({{ block: 'center', behavior: 'instant' }});
|
|
422
|
+
const r = el.getBoundingClientRect();
|
|
423
|
+
return {{ x: r.x + r.width / 2, y: r.y + r.height / 2 }};
|
|
424
|
+
}})()
|
|
425
|
+
""")
|
|
426
|
+
if not result or not isinstance(result, dict):
|
|
427
|
+
raise RuntimeError(f"Element not found: {selector}")
|
|
428
|
+
return result
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
async def _wait_for_element(adapter: ProtocolAdapter, selector: str, timeout_ms: int) -> None:
|
|
432
|
+
interval = 0.1
|
|
433
|
+
max_iter = int(timeout_ms / 100)
|
|
434
|
+
check_js = f"!!document.querySelector({json.dumps(selector)})"
|
|
435
|
+
for _ in range(max_iter):
|
|
436
|
+
found = await adapter.evaluate(check_js)
|
|
437
|
+
if found:
|
|
438
|
+
return
|
|
439
|
+
await asyncio.sleep(interval)
|
|
440
|
+
raise TimeoutError(f"Timeout waiting for element: {selector}")
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""extract() — structured data extraction from pages."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
from typing import Any, Dict, Optional, Type
|
|
8
|
+
|
|
9
|
+
from ..protocol.protocol_adapter import ProtocolAdapter
|
|
10
|
+
from ..utils.html import truncate_html
|
|
11
|
+
from .llm_provider import LLMProvider
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def extract(
|
|
15
|
+
adapter: ProtocolAdapter,
|
|
16
|
+
llm: LLMProvider,
|
|
17
|
+
instruction: str,
|
|
18
|
+
schema: Optional[Any] = None,
|
|
19
|
+
) -> Any:
|
|
20
|
+
"""
|
|
21
|
+
Extract structured data from the page.
|
|
22
|
+
|
|
23
|
+
Takes a screenshot + HTML, sends to LLM with the instruction
|
|
24
|
+
and optional Pydantic model, returns parsed data.
|
|
25
|
+
"""
|
|
26
|
+
screenshot, html, url, title = await asyncio.gather(
|
|
27
|
+
adapter.capture_screenshot(),
|
|
28
|
+
adapter.get_html(),
|
|
29
|
+
adapter.evaluate("window.location.href"),
|
|
30
|
+
adapter.evaluate("document.title"),
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
truncated_html = truncate_html(str(html), 12000)
|
|
34
|
+
|
|
35
|
+
schema_desc = ""
|
|
36
|
+
if schema is not None:
|
|
37
|
+
try:
|
|
38
|
+
# Pydantic v2 model
|
|
39
|
+
if hasattr(schema, "model_json_schema"):
|
|
40
|
+
schema_desc = f"\n\nReturn data matching this JSON schema:\n{json.dumps(schema.model_json_schema(), indent=2)}"
|
|
41
|
+
else:
|
|
42
|
+
schema_desc = "\n\nReturn a JSON object matching the expected structure."
|
|
43
|
+
except Exception:
|
|
44
|
+
schema_desc = "\n\nReturn a JSON object matching the expected structure."
|
|
45
|
+
|
|
46
|
+
system_prompt = (
|
|
47
|
+
f"You are a data extraction agent. Given a webpage screenshot and HTML, "
|
|
48
|
+
f"extract the requested information as JSON.{schema_desc}\n\n"
|
|
49
|
+
f"Return ONLY a valid JSON object. No prose, no markdown."
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
user_text = f"URL: {url}\nTitle: {title}\nInstruction: {instruction}\n\nHTML (truncated):\n{truncated_html}"
|
|
53
|
+
|
|
54
|
+
result = await llm.chat_json([
|
|
55
|
+
{"role": "system", "content": system_prompt},
|
|
56
|
+
{
|
|
57
|
+
"role": "user",
|
|
58
|
+
"content": [
|
|
59
|
+
{"type": "text", "text": user_text},
|
|
60
|
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{screenshot}"}},
|
|
61
|
+
],
|
|
62
|
+
},
|
|
63
|
+
])
|
|
64
|
+
|
|
65
|
+
# Validate with Pydantic if schema provided
|
|
66
|
+
if schema is not None and hasattr(schema, "model_validate"):
|
|
67
|
+
return schema.model_validate(result)
|
|
68
|
+
|
|
69
|
+
return result
|