texas-grocery-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- texas_grocery_mcp/__init__.py +3 -0
- texas_grocery_mcp/auth/__init__.py +5 -0
- texas_grocery_mcp/auth/browser_refresh.py +1629 -0
- texas_grocery_mcp/auth/credentials.py +337 -0
- texas_grocery_mcp/auth/session.py +767 -0
- texas_grocery_mcp/clients/__init__.py +5 -0
- texas_grocery_mcp/clients/graphql.py +2400 -0
- texas_grocery_mcp/models/__init__.py +54 -0
- texas_grocery_mcp/models/cart.py +60 -0
- texas_grocery_mcp/models/coupon.py +44 -0
- texas_grocery_mcp/models/errors.py +43 -0
- texas_grocery_mcp/models/health.py +41 -0
- texas_grocery_mcp/models/product.py +274 -0
- texas_grocery_mcp/models/store.py +77 -0
- texas_grocery_mcp/observability/__init__.py +6 -0
- texas_grocery_mcp/observability/health.py +141 -0
- texas_grocery_mcp/observability/logging.py +73 -0
- texas_grocery_mcp/reliability/__init__.py +23 -0
- texas_grocery_mcp/reliability/cache.py +116 -0
- texas_grocery_mcp/reliability/circuit_breaker.py +138 -0
- texas_grocery_mcp/reliability/retry.py +96 -0
- texas_grocery_mcp/reliability/throttle.py +113 -0
- texas_grocery_mcp/server.py +211 -0
- texas_grocery_mcp/services/__init__.py +5 -0
- texas_grocery_mcp/services/geocoding.py +227 -0
- texas_grocery_mcp/state.py +166 -0
- texas_grocery_mcp/tools/__init__.py +5 -0
- texas_grocery_mcp/tools/cart.py +821 -0
- texas_grocery_mcp/tools/coupon.py +381 -0
- texas_grocery_mcp/tools/product.py +437 -0
- texas_grocery_mcp/tools/session.py +486 -0
- texas_grocery_mcp/tools/store.py +353 -0
- texas_grocery_mcp/utils/__init__.py +5 -0
- texas_grocery_mcp/utils/config.py +146 -0
- texas_grocery_mcp/utils/secure_file.py +123 -0
- texas_grocery_mcp-0.1.0.dist-info/METADATA +296 -0
- texas_grocery_mcp-0.1.0.dist-info/RECORD +40 -0
- texas_grocery_mcp-0.1.0.dist-info/WHEEL +4 -0
- texas_grocery_mcp-0.1.0.dist-info/entry_points.txt +2 -0
- texas_grocery_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1629 @@
|
|
|
1
|
+
"""Browser-based session refresh using embedded Playwright.
|
|
2
|
+
|
|
3
|
+
This module provides fast session refresh (~10-15 seconds) by embedding
|
|
4
|
+
Playwright directly, eliminating the orchestration overhead of the
|
|
5
|
+
Playwright MCP approach (~4 minutes).
|
|
6
|
+
|
|
7
|
+
Requires optional dependency: pip install texas-grocery-mcp[browser]
|
|
8
|
+
After install, run: playwright install chromium
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import glob
|
|
13
|
+
import os
|
|
14
|
+
import time
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, Literal, TypedDict
|
|
17
|
+
|
|
18
|
+
import structlog
|
|
19
|
+
|
|
20
|
+
logger = structlog.get_logger()
|
|
21
|
+
|
|
22
|
+
# Check if playwright is available (optional dependency)
|
|
23
|
+
try:
|
|
24
|
+
from playwright.async_api import async_playwright
|
|
25
|
+
|
|
26
|
+
PLAYWRIGHT_AVAILABLE = True
|
|
27
|
+
except ImportError:
|
|
28
|
+
PLAYWRIGHT_AVAILABLE = False
|
|
29
|
+
async_playwright = None # type: ignore[assignment]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class PlaywrightNotInstalledError(Exception):
|
|
33
|
+
"""Raised when playwright is not installed."""
|
|
34
|
+
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class BrowserRefreshError(Exception):
|
|
39
|
+
"""Raised when browser refresh fails."""
|
|
40
|
+
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class LoginRequiredError(Exception):
|
|
45
|
+
"""Raised when HEB requires full login (not just token refresh)."""
|
|
46
|
+
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# Lock to prevent concurrent refreshes
|
|
51
|
+
_refresh_lock = asyncio.Lock()
|
|
52
|
+
|
|
53
|
+
class PendingLoginState(TypedDict, total=False):
|
|
54
|
+
"""State for an interactive login/refresh flow that spans tool calls."""
|
|
55
|
+
|
|
56
|
+
flow: Literal["auto_login", "manual_login", "unknown"]
|
|
57
|
+
stage: str
|
|
58
|
+
start_time: float
|
|
59
|
+
auth_path: Path
|
|
60
|
+
|
|
61
|
+
# Playwright objects (kept open between calls)
|
|
62
|
+
playwright: Any
|
|
63
|
+
browser: Any
|
|
64
|
+
context: Any
|
|
65
|
+
page: Any
|
|
66
|
+
|
|
67
|
+
# Optional (auto-login) fields
|
|
68
|
+
email: str
|
|
69
|
+
password: str
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# Module-level state to track a pending interactive login/refresh flow
|
|
73
|
+
_pending_login_state: PendingLoginState | None = None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def is_playwright_available() -> bool:
|
|
77
|
+
"""Check if Playwright is installed and available."""
|
|
78
|
+
return PLAYWRIGHT_AVAILABLE
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _detect_security_challenge_html(html: str) -> bool:
|
|
82
|
+
"""Detect if HTML content is a WAF/captcha challenge page.
|
|
83
|
+
|
|
84
|
+
HEB uses Incapsula/Imperva and other anti-bot measures that sometimes
|
|
85
|
+
return interstitials instead of the real site.
|
|
86
|
+
|
|
87
|
+
IMPORTANT: This function must NOT trigger on normal HEB pages.
|
|
88
|
+
- "reese84" appears on ALL HEB pages (bot detection script) - NOT a challenge
|
|
89
|
+
- "incapsula" may appear in normal page headers - need context
|
|
90
|
+
|
|
91
|
+
True challenge pages are interstitials with minimal content and specific phrases.
|
|
92
|
+
"""
|
|
93
|
+
html_lower = html.lower()
|
|
94
|
+
|
|
95
|
+
# First, check if this looks like a normal HEB page (has real content)
|
|
96
|
+
# If so, it's NOT a challenge page even if some indicators are present
|
|
97
|
+
normal_page_indicators = [
|
|
98
|
+
"heb.com", # Site branding
|
|
99
|
+
"add to cart", # Shopping functionality
|
|
100
|
+
"my cart", # Cart link
|
|
101
|
+
"my account", # Account link
|
|
102
|
+
"curbside", # HEB service
|
|
103
|
+
"delivery", # HEB service
|
|
104
|
+
"weekly ad", # HEB feature
|
|
105
|
+
"shop now", # Call to action
|
|
106
|
+
"products", # Product content
|
|
107
|
+
'<nav', # Navigation element
|
|
108
|
+
'<header', # Header element
|
|
109
|
+
'data-testid', # React test IDs (HEB uses React)
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
# If we find normal page indicators, this is NOT a challenge page
|
|
113
|
+
normal_indicator_count = sum(1 for ind in normal_page_indicators if ind in html_lower)
|
|
114
|
+
if normal_indicator_count >= 3:
|
|
115
|
+
# Has multiple signs of being a real HEB page
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
# Challenge-specific phrases that indicate a true interstitial block page
|
|
119
|
+
# These are phrases that appear ONLY on challenge pages, not normal pages
|
|
120
|
+
strong_challenge_indicators = [
|
|
121
|
+
"please verify you are a human",
|
|
122
|
+
"enable javascript and cookies",
|
|
123
|
+
"request unsuccessful",
|
|
124
|
+
"sorry, you have been blocked",
|
|
125
|
+
"access denied",
|
|
126
|
+
"checking your browser",
|
|
127
|
+
"please wait while we verify",
|
|
128
|
+
"just a moment", # Cloudflare-style challenge
|
|
129
|
+
"ray id:", # Cloudflare block page
|
|
130
|
+
"performance & security by", # Cloudflare footer
|
|
131
|
+
"why have i been blocked",
|
|
132
|
+
"this website is using a security service",
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
# If any strong indicator is present, it's definitely a challenge
|
|
136
|
+
if any(indicator in html_lower for indicator in strong_challenge_indicators):
|
|
137
|
+
return True
|
|
138
|
+
|
|
139
|
+
# Check for challenge pages with minimal content (interstitials are usually sparse)
|
|
140
|
+
# A real HEB page has thousands of characters; a challenge page is typically < 5000
|
|
141
|
+
is_minimal_content = len(html) < 5000
|
|
142
|
+
|
|
143
|
+
# Weak indicators that only count on minimal content pages
|
|
144
|
+
weak_challenge_indicators = [
|
|
145
|
+
"_incapsula_resource", # Incapsula resource loading
|
|
146
|
+
"challenge-platform", # Challenge platform marker
|
|
147
|
+
"cf-browser-verification", # Cloudflare verification
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
return is_minimal_content and any(
|
|
151
|
+
indicator in html_lower for indicator in weak_challenge_indicators
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
async def _detect_security_challenge(page: Any) -> bool:
|
|
156
|
+
"""Detect security challenge in the current page."""
|
|
157
|
+
try:
|
|
158
|
+
content = await page.content()
|
|
159
|
+
return _detect_security_challenge_html(content)
|
|
160
|
+
except Exception:
|
|
161
|
+
return False
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
async def _detect_login_form(page: Any) -> bool:
|
|
165
|
+
"""Check whether a login form is present (email/password fields)."""
|
|
166
|
+
selectors = [
|
|
167
|
+
'input[name="email"]',
|
|
168
|
+
'input[type="email"]',
|
|
169
|
+
"#email",
|
|
170
|
+
'input[placeholder*="email" i]',
|
|
171
|
+
'input[name="password"]',
|
|
172
|
+
'input[type="password"]',
|
|
173
|
+
"#password",
|
|
174
|
+
]
|
|
175
|
+
for selector in selectors:
|
|
176
|
+
try:
|
|
177
|
+
el = await page.query_selector(selector)
|
|
178
|
+
if el:
|
|
179
|
+
return True
|
|
180
|
+
except Exception:
|
|
181
|
+
continue
|
|
182
|
+
return False
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
async def _take_login_screenshot(page: Any, action: str) -> str | None:
|
|
186
|
+
"""Take screenshot of current page and return path.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
page: Playwright page object
|
|
190
|
+
action: Type of action (e.g., "captcha", "2fa")
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Path to screenshot file, or None if failed
|
|
194
|
+
"""
|
|
195
|
+
timestamp = int(time.time())
|
|
196
|
+
path = f"/tmp/heb-login-{action}-{timestamp}.png"
|
|
197
|
+
try:
|
|
198
|
+
await page.screenshot(path=path, full_page=True)
|
|
199
|
+
logger.info("Screenshot saved", path=path, action=action)
|
|
200
|
+
return path
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.warning("Screenshot failed", error=str(e), action=action)
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _cleanup_old_screenshots(max_age_seconds: int = 3600) -> int:
|
|
207
|
+
"""Delete old login screenshots older than max_age_seconds.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
max_age_seconds: Maximum age in seconds (default 1 hour)
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Number of files deleted
|
|
214
|
+
"""
|
|
215
|
+
deleted = 0
|
|
216
|
+
pattern = "/tmp/heb-login-*.png"
|
|
217
|
+
now = time.time()
|
|
218
|
+
|
|
219
|
+
for filepath in glob.glob(pattern):
|
|
220
|
+
try:
|
|
221
|
+
file_age = now - os.path.getmtime(filepath)
|
|
222
|
+
if file_age > max_age_seconds:
|
|
223
|
+
os.remove(filepath)
|
|
224
|
+
deleted += 1
|
|
225
|
+
logger.debug("Deleted old screenshot", path=filepath)
|
|
226
|
+
except OSError as e:
|
|
227
|
+
logger.debug("Could not delete screenshot", path=filepath, error=str(e))
|
|
228
|
+
|
|
229
|
+
return deleted
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def clear_pending_login() -> None:
|
|
233
|
+
"""Clear any pending login state and close the browser.
|
|
234
|
+
|
|
235
|
+
Call this to reset state if login flow needs to be restarted.
|
|
236
|
+
"""
|
|
237
|
+
global _pending_login_state
|
|
238
|
+
if _pending_login_state:
|
|
239
|
+
try:
|
|
240
|
+
playwright = _pending_login_state.get("playwright")
|
|
241
|
+
browser = _pending_login_state.get("browser")
|
|
242
|
+
if playwright or browser:
|
|
243
|
+
# Full cleanup (browser + playwright) in the event loop
|
|
244
|
+
asyncio.create_task(_cleanup_browser(playwright, browser))
|
|
245
|
+
except Exception as e:
|
|
246
|
+
logger.debug("Error closing pending browser", error=str(e))
|
|
247
|
+
_pending_login_state = None
|
|
248
|
+
logger.info("Pending login state cleared")
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
async def _check_authenticated(context: Any) -> bool:
|
|
252
|
+
"""Check if session has authentication cookies."""
|
|
253
|
+
cookies = await context.cookies()
|
|
254
|
+
# HEB uses 'sat' or 'DYN_USER_ID' cookies for authenticated sessions
|
|
255
|
+
auth_cookie_names = {"sat", "DYN_USER_ID"}
|
|
256
|
+
return any(c["name"] in auth_cookie_names for c in cookies)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
async def refresh_session_with_browser(
|
|
260
|
+
auth_path: Path,
|
|
261
|
+
headless: bool = True,
|
|
262
|
+
timeout: int = 30000,
|
|
263
|
+
login_timeout: int = 300000, # 5 minutes for manual login
|
|
264
|
+
) -> dict[str, Any]:
|
|
265
|
+
"""Refresh HEB session using embedded Playwright.
|
|
266
|
+
|
|
267
|
+
This is the FAST method (10-15 seconds) that runs Playwright directly
|
|
268
|
+
instead of orchestrating it through MCP tool calls.
|
|
269
|
+
|
|
270
|
+
SMART REFRESH LOGIC:
|
|
271
|
+
- Loads existing auth.json cookies into browser before navigating
|
|
272
|
+
- This allows headless refresh to work even when reese84 token expired
|
|
273
|
+
- Only requires manual login when session cookies are truly expired
|
|
274
|
+
- Visiting HEB.com regenerates the reese84 bot detection token
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
auth_path: Path to save auth.json (cookies + localStorage)
|
|
278
|
+
headless: Run browser in headless mode (default True).
|
|
279
|
+
Set to False if you need to complete a manual login.
|
|
280
|
+
timeout: Navigation timeout in milliseconds (default 30000)
|
|
281
|
+
login_timeout: Deprecated (non-headless mode returns immediately for human handoff).
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
dict with success status, message, and timing info:
|
|
285
|
+
{
|
|
286
|
+
"success": True,
|
|
287
|
+
"message": "Session refreshed successfully in 12.3s",
|
|
288
|
+
"elapsed_seconds": 12.3,
|
|
289
|
+
"auth_path": "/path/to/auth.json",
|
|
290
|
+
"cookies_count": 25,
|
|
291
|
+
"local_storage_count": 5
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
Raises:
|
|
295
|
+
PlaywrightNotInstalledError: If playwright is not installed
|
|
296
|
+
BrowserRefreshError: If browser operation fails
|
|
297
|
+
LoginRequiredError: If HEB requires full login
|
|
298
|
+
"""
|
|
299
|
+
if not PLAYWRIGHT_AVAILABLE:
|
|
300
|
+
raise PlaywrightNotInstalledError(
|
|
301
|
+
"Playwright not installed. Install with:\n"
|
|
302
|
+
" pip install texas-grocery-mcp[browser]\n"
|
|
303
|
+
" playwright install chromium"
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
assert async_playwright is not None
|
|
307
|
+
|
|
308
|
+
# Use lock to prevent concurrent refresh attempts
|
|
309
|
+
async with _refresh_lock:
|
|
310
|
+
# If we already have an interactive flow in progress, resume it instead
|
|
311
|
+
# of starting a new browser (prevents "stuck" calls and duplicate windows).
|
|
312
|
+
global _pending_login_state
|
|
313
|
+
_cleanup_old_screenshots()
|
|
314
|
+
if _pending_login_state:
|
|
315
|
+
return await _resume_pending_login(auth_path)
|
|
316
|
+
|
|
317
|
+
start_time = time.monotonic()
|
|
318
|
+
playwright: Any | None = None
|
|
319
|
+
browser: Any | None = None
|
|
320
|
+
|
|
321
|
+
# Headless mode: refresh tokens quickly, but cannot handle human interaction.
|
|
322
|
+
if headless:
|
|
323
|
+
try:
|
|
324
|
+
async with async_playwright() as p:
|
|
325
|
+
logger.info("Launching browser for session refresh", headless=headless)
|
|
326
|
+
browser = await p.chromium.launch(
|
|
327
|
+
headless=True,
|
|
328
|
+
args=[
|
|
329
|
+
"--disable-blink-features=AutomationControlled",
|
|
330
|
+
"--no-first-run",
|
|
331
|
+
"--no-default-browser-check",
|
|
332
|
+
"--disable-infobars",
|
|
333
|
+
],
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
storage_state = str(auth_path) if auth_path.exists() else None
|
|
337
|
+
context = await browser.new_context(
|
|
338
|
+
user_agent=(
|
|
339
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
340
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
341
|
+
"Chrome/120.0.0.0 Safari/537.36"
|
|
342
|
+
),
|
|
343
|
+
storage_state=storage_state,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
page = await context.new_page()
|
|
347
|
+
logger.info("Navigating to HEB.com...")
|
|
348
|
+
response = await page.goto(
|
|
349
|
+
"https://www.heb.com",
|
|
350
|
+
wait_until="load",
|
|
351
|
+
timeout=timeout,
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
if response and response.status >= 400:
|
|
355
|
+
await browser.close()
|
|
356
|
+
raise BrowserRefreshError(f"HEB.com returned HTTP status {response.status}")
|
|
357
|
+
|
|
358
|
+
# Fail fast if we're on a security interstitial.
|
|
359
|
+
if await _detect_security_challenge(page) or await _detect_captcha(page):
|
|
360
|
+
await browser.close()
|
|
361
|
+
raise BrowserRefreshError(
|
|
362
|
+
"Security challenge detected in headless mode. "
|
|
363
|
+
"Run session_refresh(headless=False) to complete it."
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
if not await _check_authenticated(context):
|
|
367
|
+
await browser.close()
|
|
368
|
+
raise LoginRequiredError(
|
|
369
|
+
"HEB requires login. Your session has expired.\n"
|
|
370
|
+
"Run session_refresh(headless=False) to login manually."
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
logger.info("Waiting for reese84 token generation...")
|
|
374
|
+
await page.wait_for_timeout(5000)
|
|
375
|
+
|
|
376
|
+
logger.info("Saving session state", auth_path=str(auth_path))
|
|
377
|
+
auth_path.parent.mkdir(parents=True, exist_ok=True)
|
|
378
|
+
await context.storage_state(path=str(auth_path))
|
|
379
|
+
|
|
380
|
+
# Ensure secure permissions on auth file
|
|
381
|
+
from texas_grocery_mcp.utils.secure_file import ensure_secure_permissions
|
|
382
|
+
|
|
383
|
+
ensure_secure_permissions(auth_path)
|
|
384
|
+
|
|
385
|
+
cookies = await context.cookies()
|
|
386
|
+
local_storage_count = await page.evaluate("() => window.localStorage.length")
|
|
387
|
+
await browser.close()
|
|
388
|
+
|
|
389
|
+
elapsed = time.monotonic() - start_time
|
|
390
|
+
logger.info(
|
|
391
|
+
"Session refreshed successfully",
|
|
392
|
+
elapsed_seconds=round(elapsed, 1),
|
|
393
|
+
cookies_count=len(cookies),
|
|
394
|
+
local_storage_count=local_storage_count,
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
return {
|
|
398
|
+
"success": True,
|
|
399
|
+
"status": "success",
|
|
400
|
+
"message": f"Session refreshed successfully in {elapsed:.1f}s",
|
|
401
|
+
"elapsed_seconds": round(elapsed, 1),
|
|
402
|
+
"auth_path": str(auth_path),
|
|
403
|
+
"cookies_count": len(cookies),
|
|
404
|
+
"local_storage_count": local_storage_count,
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
except PlaywrightNotInstalledError:
|
|
408
|
+
raise
|
|
409
|
+
except LoginRequiredError:
|
|
410
|
+
raise
|
|
411
|
+
except TimeoutError as e:
|
|
412
|
+
elapsed = time.monotonic() - start_time
|
|
413
|
+
logger.error("Browser refresh timed out", elapsed_seconds=elapsed)
|
|
414
|
+
raise BrowserRefreshError(
|
|
415
|
+
f"Browser navigation timed out after {elapsed:.1f}s. "
|
|
416
|
+
"Check your internet connection and try again."
|
|
417
|
+
) from e
|
|
418
|
+
except Exception as e:
|
|
419
|
+
elapsed = time.monotonic() - start_time
|
|
420
|
+
logger.error(
|
|
421
|
+
"Browser refresh failed",
|
|
422
|
+
error=str(e),
|
|
423
|
+
elapsed_seconds=round(elapsed, 1),
|
|
424
|
+
)
|
|
425
|
+
raise BrowserRefreshError(f"Browser refresh failed: {e}") from e
|
|
426
|
+
|
|
427
|
+
# Non-headless mode: NEVER block waiting for login. Start an interactive
|
|
428
|
+
# flow, take a screenshot, and return control to the agent/user immediately.
|
|
429
|
+
playwright = None
|
|
430
|
+
browser = None
|
|
431
|
+
try:
|
|
432
|
+
playwright = await async_playwright().start()
|
|
433
|
+
|
|
434
|
+
logger.info("Launching browser for session refresh", headless=False)
|
|
435
|
+
browser = await playwright.chromium.launch(
|
|
436
|
+
headless=False,
|
|
437
|
+
args=[
|
|
438
|
+
"--disable-blink-features=AutomationControlled",
|
|
439
|
+
"--no-first-run",
|
|
440
|
+
"--no-default-browser-check",
|
|
441
|
+
"--disable-infobars",
|
|
442
|
+
],
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
storage_state = str(auth_path) if auth_path.exists() else None
|
|
446
|
+
if storage_state:
|
|
447
|
+
logger.info(
|
|
448
|
+
"Loading existing auth state for smart refresh",
|
|
449
|
+
auth_path=str(auth_path),
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
context = await browser.new_context(
|
|
453
|
+
user_agent=(
|
|
454
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
455
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
456
|
+
"Chrome/120.0.0.0 Safari/537.36"
|
|
457
|
+
),
|
|
458
|
+
storage_state=storage_state,
|
|
459
|
+
)
|
|
460
|
+
page = await context.new_page()
|
|
461
|
+
|
|
462
|
+
# Step 1: Try homepage refresh (may succeed without login)
|
|
463
|
+
logger.info("Navigating to HEB.com...")
|
|
464
|
+
response = await page.goto(
|
|
465
|
+
"https://www.heb.com",
|
|
466
|
+
wait_until="load",
|
|
467
|
+
timeout=timeout,
|
|
468
|
+
)
|
|
469
|
+
if response and response.status >= 400:
|
|
470
|
+
raise BrowserRefreshError(f"HEB.com returned HTTP status {response.status}")
|
|
471
|
+
|
|
472
|
+
# If we hit a security challenge, hand off immediately.
|
|
473
|
+
if await _detect_security_challenge(page):
|
|
474
|
+
await _inject_status_banner(
|
|
475
|
+
page,
|
|
476
|
+
(
|
|
477
|
+
"Security check detected. Complete it in this browser, "
|
|
478
|
+
"then tell your agent 'done'."
|
|
479
|
+
),
|
|
480
|
+
is_waiting=True,
|
|
481
|
+
)
|
|
482
|
+
screenshot_path = await _take_login_screenshot(page, "waf")
|
|
483
|
+
|
|
484
|
+
_pending_login_state = PendingLoginState(
|
|
485
|
+
flow="manual_login",
|
|
486
|
+
stage="manual_login",
|
|
487
|
+
start_time=start_time,
|
|
488
|
+
auth_path=auth_path,
|
|
489
|
+
playwright=playwright,
|
|
490
|
+
browser=browser,
|
|
491
|
+
context=context,
|
|
492
|
+
page=page,
|
|
493
|
+
)
|
|
494
|
+
return _build_human_action_response("waf", screenshot_path)
|
|
495
|
+
|
|
496
|
+
if await _detect_captcha(page):
|
|
497
|
+
await _inject_status_banner(
|
|
498
|
+
page,
|
|
499
|
+
"CAPTCHA detected. Solve it in this browser, then tell your agent 'done'.",
|
|
500
|
+
is_waiting=True,
|
|
501
|
+
)
|
|
502
|
+
screenshot_path = await _take_login_screenshot(page, "captcha")
|
|
503
|
+
|
|
504
|
+
_pending_login_state = PendingLoginState(
|
|
505
|
+
flow="manual_login",
|
|
506
|
+
stage="manual_login",
|
|
507
|
+
start_time=start_time,
|
|
508
|
+
auth_path=auth_path,
|
|
509
|
+
playwright=playwright,
|
|
510
|
+
browser=browser,
|
|
511
|
+
context=context,
|
|
512
|
+
page=page,
|
|
513
|
+
)
|
|
514
|
+
return _build_human_action_response("captcha", screenshot_path)
|
|
515
|
+
|
|
516
|
+
# If already authenticated, just refresh and save.
|
|
517
|
+
if await _check_authenticated(context):
|
|
518
|
+
logger.info("Already authenticated - refreshing session tokens")
|
|
519
|
+
logger.info("Waiting for reese84 token generation...")
|
|
520
|
+
await page.wait_for_timeout(5000)
|
|
521
|
+
|
|
522
|
+
logger.info("Saving session state", auth_path=str(auth_path))
|
|
523
|
+
auth_path.parent.mkdir(parents=True, exist_ok=True)
|
|
524
|
+
await context.storage_state(path=str(auth_path))
|
|
525
|
+
|
|
526
|
+
# Ensure secure permissions on auth file
|
|
527
|
+
from texas_grocery_mcp.utils.secure_file import ensure_secure_permissions
|
|
528
|
+
|
|
529
|
+
ensure_secure_permissions(auth_path)
|
|
530
|
+
|
|
531
|
+
cookies = await context.cookies()
|
|
532
|
+
local_storage_count = await page.evaluate("() => window.localStorage.length")
|
|
533
|
+
await _cleanup_browser(playwright, browser)
|
|
534
|
+
|
|
535
|
+
elapsed = time.monotonic() - start_time
|
|
536
|
+
return {
|
|
537
|
+
"success": True,
|
|
538
|
+
"status": "success",
|
|
539
|
+
"message": f"Session refreshed successfully in {elapsed:.1f}s",
|
|
540
|
+
"elapsed_seconds": round(elapsed, 1),
|
|
541
|
+
"auth_path": str(auth_path),
|
|
542
|
+
"cookies_count": len(cookies),
|
|
543
|
+
"local_storage_count": local_storage_count,
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
# Step 2: Not authenticated - open login and hand off immediately.
|
|
547
|
+
logger.info("Not authenticated - opening login page for user")
|
|
548
|
+
await page.goto(
|
|
549
|
+
"https://www.heb.com/my-account/login",
|
|
550
|
+
wait_until="load",
|
|
551
|
+
timeout=timeout,
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
await _inject_status_banner(
|
|
555
|
+
page,
|
|
556
|
+
"Please log in to HEB in this browser, then tell your agent 'done'.",
|
|
557
|
+
is_waiting=True,
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
# Detect blockers on the login page and hand off with a screenshot.
|
|
561
|
+
action: Literal["login", "captcha", "2fa", "waf"] = "login"
|
|
562
|
+
if await _detect_security_challenge(page):
|
|
563
|
+
action = "waf"
|
|
564
|
+
elif await _detect_captcha(page):
|
|
565
|
+
action = "captcha"
|
|
566
|
+
elif await _detect_2fa(page):
|
|
567
|
+
action = "2fa"
|
|
568
|
+
elif not await _detect_login_form(page):
|
|
569
|
+
# Sometimes HEB changes login flow or returns an error page. Treat as WAF/error.
|
|
570
|
+
action = "waf"
|
|
571
|
+
|
|
572
|
+
screenshot_path = await _take_login_screenshot(page, action)
|
|
573
|
+
|
|
574
|
+
_pending_login_state = PendingLoginState(
|
|
575
|
+
flow="manual_login",
|
|
576
|
+
stage="manual_login",
|
|
577
|
+
start_time=start_time,
|
|
578
|
+
auth_path=auth_path,
|
|
579
|
+
playwright=playwright,
|
|
580
|
+
browser=browser,
|
|
581
|
+
context=context,
|
|
582
|
+
page=page,
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
return _build_human_action_response(action, screenshot_path)
|
|
586
|
+
|
|
587
|
+
except TimeoutError as e:
|
|
588
|
+
elapsed = time.monotonic() - start_time
|
|
589
|
+
logger.error("Browser refresh timed out", elapsed_seconds=elapsed)
|
|
590
|
+
await _cleanup_browser(playwright, browser)
|
|
591
|
+
raise BrowserRefreshError(
|
|
592
|
+
f"Browser navigation timed out after {elapsed:.1f}s. "
|
|
593
|
+
"Check your internet connection and try again."
|
|
594
|
+
) from e
|
|
595
|
+
except Exception as e:
|
|
596
|
+
elapsed = time.monotonic() - start_time
|
|
597
|
+
logger.error("Browser refresh failed", error=str(e), elapsed_seconds=round(elapsed, 1))
|
|
598
|
+
await _cleanup_browser(playwright, browser)
|
|
599
|
+
raise BrowserRefreshError(f"Browser refresh failed: {e}") from e
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
# CAPTCHA detection selectors
|
|
603
|
+
CAPTCHA_SELECTORS = [
|
|
604
|
+
'iframe[src*="recaptcha"]',
|
|
605
|
+
'#g-recaptcha',
|
|
606
|
+
'.g-recaptcha',
|
|
607
|
+
'iframe[src*="hcaptcha"]',
|
|
608
|
+
'[data-hcaptcha-sitekey]',
|
|
609
|
+
'[data-friendly-captcha]',
|
|
610
|
+
'iframe[src*="captcha"]',
|
|
611
|
+
]
|
|
612
|
+
|
|
613
|
+
# 2FA detection patterns
|
|
614
|
+
TWO_FA_INDICATORS = [
|
|
615
|
+
"verification code",
|
|
616
|
+
"one-time code",
|
|
617
|
+
"we sent a code",
|
|
618
|
+
"enter the code",
|
|
619
|
+
"security code",
|
|
620
|
+
"verify your identity",
|
|
621
|
+
]
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
class AutoLoginError(Exception):
|
|
625
|
+
"""Raised when auto-login fails."""
|
|
626
|
+
|
|
627
|
+
pass
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
class CaptchaRequiredError(Exception):
|
|
631
|
+
"""Raised when CAPTCHA needs human solving."""
|
|
632
|
+
|
|
633
|
+
def __init__(
|
|
634
|
+
self,
|
|
635
|
+
message: str,
|
|
636
|
+
browser: Any | None = None,
|
|
637
|
+
page: Any | None = None,
|
|
638
|
+
context: Any | None = None,
|
|
639
|
+
):
|
|
640
|
+
super().__init__(message)
|
|
641
|
+
self.browser = browser
|
|
642
|
+
self.page = page
|
|
643
|
+
self.context = context
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
class TwoFactorRequiredError(Exception):
|
|
647
|
+
"""Raised when 2FA verification is needed."""
|
|
648
|
+
|
|
649
|
+
def __init__(
|
|
650
|
+
self,
|
|
651
|
+
message: str,
|
|
652
|
+
browser: Any | None = None,
|
|
653
|
+
page: Any | None = None,
|
|
654
|
+
context: Any | None = None,
|
|
655
|
+
):
|
|
656
|
+
super().__init__(message)
|
|
657
|
+
self.browser = browser
|
|
658
|
+
self.page = page
|
|
659
|
+
self.context = context
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
async def _detect_captcha(page: Any) -> bool:
|
|
663
|
+
"""Check if CAPTCHA is present on page.
|
|
664
|
+
|
|
665
|
+
Returns:
|
|
666
|
+
True if CAPTCHA detected, False otherwise
|
|
667
|
+
"""
|
|
668
|
+
for selector in CAPTCHA_SELECTORS:
|
|
669
|
+
try:
|
|
670
|
+
element = await page.query_selector(selector)
|
|
671
|
+
if element:
|
|
672
|
+
logger.info("CAPTCHA detected", selector=selector)
|
|
673
|
+
return True
|
|
674
|
+
except Exception:
|
|
675
|
+
continue
|
|
676
|
+
|
|
677
|
+
# Also check page content for CAPTCHA-related text
|
|
678
|
+
try:
|
|
679
|
+
content = await page.content()
|
|
680
|
+
content_lower = content.lower()
|
|
681
|
+
if "captcha" in content_lower and ("solve" in content_lower or "verify" in content_lower):
|
|
682
|
+
logger.info("CAPTCHA detected via page content")
|
|
683
|
+
return True
|
|
684
|
+
except Exception:
|
|
685
|
+
pass
|
|
686
|
+
|
|
687
|
+
return False
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
async def _detect_2fa(page: Any) -> bool:
|
|
691
|
+
"""Check if 2FA verification is required.
|
|
692
|
+
|
|
693
|
+
Returns:
|
|
694
|
+
True if 2FA prompt detected, False otherwise
|
|
695
|
+
"""
|
|
696
|
+
try:
|
|
697
|
+
content = await page.content()
|
|
698
|
+
content_lower = content.lower()
|
|
699
|
+
|
|
700
|
+
for indicator in TWO_FA_INDICATORS:
|
|
701
|
+
if indicator in content_lower:
|
|
702
|
+
logger.info("2FA detected", indicator=indicator)
|
|
703
|
+
return True
|
|
704
|
+
|
|
705
|
+
# Check for 6-digit code input field
|
|
706
|
+
code_input = await page.query_selector('input[maxlength="6"]')
|
|
707
|
+
if code_input:
|
|
708
|
+
logger.info("2FA detected via 6-digit input field")
|
|
709
|
+
return True
|
|
710
|
+
|
|
711
|
+
except Exception as e:
|
|
712
|
+
logger.warning("Error checking for 2FA", error=str(e))
|
|
713
|
+
|
|
714
|
+
return False
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
async def _verify_login_success(page: Any, context: Any) -> bool:
|
|
718
|
+
"""Verify that login completed successfully.
|
|
719
|
+
|
|
720
|
+
Checks:
|
|
721
|
+
- URL redirect to profile/account page
|
|
722
|
+
- Presence of auth cookies
|
|
723
|
+
- "Hi, [Name]" in page content
|
|
724
|
+
|
|
725
|
+
Returns:
|
|
726
|
+
True if login appears successful
|
|
727
|
+
"""
|
|
728
|
+
try:
|
|
729
|
+
# Check URL
|
|
730
|
+
current_url = page.url
|
|
731
|
+
success_url_patterns = ["/my-account", "/profile", "/account"]
|
|
732
|
+
url_indicates_success = any(pattern in current_url for pattern in success_url_patterns)
|
|
733
|
+
|
|
734
|
+
# Check auth cookies
|
|
735
|
+
has_auth_cookies = await _check_authenticated(context)
|
|
736
|
+
|
|
737
|
+
# Check for user greeting
|
|
738
|
+
try:
|
|
739
|
+
content = await page.content()
|
|
740
|
+
has_greeting = "hi," in content.lower() or "hello," in content.lower()
|
|
741
|
+
except Exception:
|
|
742
|
+
has_greeting = False
|
|
743
|
+
|
|
744
|
+
# Success if we have auth cookies AND (URL or greeting)
|
|
745
|
+
is_success = has_auth_cookies and (url_indicates_success or has_greeting)
|
|
746
|
+
|
|
747
|
+
logger.debug(
|
|
748
|
+
"Login success check",
|
|
749
|
+
url_indicates_success=url_indicates_success,
|
|
750
|
+
has_auth_cookies=has_auth_cookies,
|
|
751
|
+
has_greeting=has_greeting,
|
|
752
|
+
is_success=is_success,
|
|
753
|
+
)
|
|
754
|
+
|
|
755
|
+
return is_success
|
|
756
|
+
|
|
757
|
+
except Exception as e:
|
|
758
|
+
logger.warning("Error verifying login success", error=str(e))
|
|
759
|
+
return False
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
async def auto_login_with_credentials(
|
|
763
|
+
auth_path: Path,
|
|
764
|
+
email: str,
|
|
765
|
+
password: str,
|
|
766
|
+
headless: bool = False, # Default to visible for human handoff
|
|
767
|
+
timeout: int = 30000,
|
|
768
|
+
login_timeout: int = 300000,
|
|
769
|
+
) -> dict[str, Any]:
|
|
770
|
+
"""Perform automatic login using stored credentials.
|
|
771
|
+
|
|
772
|
+
This implements "Smart Semi-Automated Login with Human Handoff":
|
|
773
|
+
1. Navigate to login page
|
|
774
|
+
2. Fill email and password automatically
|
|
775
|
+
3. Click Continue/Submit
|
|
776
|
+
4. If CAPTCHA detected: take screenshot, return immediately for human solving
|
|
777
|
+
5. If 2FA detected: take screenshot, return immediately for code entry
|
|
778
|
+
6. Call again after human action to continue
|
|
779
|
+
7. Verify success and save session
|
|
780
|
+
|
|
781
|
+
The browser stays open between calls when waiting for human action.
|
|
782
|
+
Call session_refresh() again after solving CAPTCHA/2FA to continue.
|
|
783
|
+
|
|
784
|
+
Args:
|
|
785
|
+
auth_path: Path to save auth.json
|
|
786
|
+
email: HEB account email
|
|
787
|
+
password: HEB account password
|
|
788
|
+
headless: Run browser without visible window (default False for handoff)
|
|
789
|
+
timeout: Navigation timeout in milliseconds
|
|
790
|
+
login_timeout: Max time for entire login process
|
|
791
|
+
|
|
792
|
+
Returns:
|
|
793
|
+
dict with status and next action:
|
|
794
|
+
- {"status": "success", ...} - Login completed
|
|
795
|
+
- {"status": "human_action_required", "action": "captcha", ...} - Solve CAPTCHA
|
|
796
|
+
- {"status": "human_action_required", "action": "2fa", ...} - Enter 2FA code
|
|
797
|
+
- {"status": "failed", ...} - Login failed
|
|
798
|
+
"""
|
|
799
|
+
global _pending_login_state
|
|
800
|
+
|
|
801
|
+
if not PLAYWRIGHT_AVAILABLE:
|
|
802
|
+
raise PlaywrightNotInstalledError(
|
|
803
|
+
"Playwright not installed. Install with:\n"
|
|
804
|
+
" pip install texas-grocery-mcp[browser]\n"
|
|
805
|
+
" playwright install chromium"
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
# Cleanup old screenshots on each call
|
|
809
|
+
_cleanup_old_screenshots()
|
|
810
|
+
|
|
811
|
+
async with _refresh_lock:
|
|
812
|
+
start_time = time.monotonic()
|
|
813
|
+
|
|
814
|
+
# Check if we have a pending login to resume
|
|
815
|
+
if _pending_login_state:
|
|
816
|
+
return await _resume_pending_login(auth_path)
|
|
817
|
+
|
|
818
|
+
# Start fresh login
|
|
819
|
+
playwright = None
|
|
820
|
+
browser = None
|
|
821
|
+
|
|
822
|
+
try:
|
|
823
|
+
playwright = await async_playwright().start()
|
|
824
|
+
|
|
825
|
+
# Launch browser (visible by default for human handoff)
|
|
826
|
+
logger.info("Launching browser for auto-login", headless=headless)
|
|
827
|
+
browser = await playwright.chromium.launch(
|
|
828
|
+
headless=headless,
|
|
829
|
+
args=[
|
|
830
|
+
"--disable-blink-features=AutomationControlled",
|
|
831
|
+
"--no-first-run",
|
|
832
|
+
"--no-default-browser-check",
|
|
833
|
+
"--disable-infobars",
|
|
834
|
+
],
|
|
835
|
+
)
|
|
836
|
+
|
|
837
|
+
context = await browser.new_context(
|
|
838
|
+
user_agent=(
|
|
839
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
840
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
841
|
+
"Chrome/120.0.0.0 Safari/537.36"
|
|
842
|
+
),
|
|
843
|
+
)
|
|
844
|
+
|
|
845
|
+
page = await context.new_page()
|
|
846
|
+
|
|
847
|
+
# Navigate to HEB login page (must use /my-account/login, not /login)
|
|
848
|
+
logger.info("Navigating to HEB login page...")
|
|
849
|
+
await page.goto(
|
|
850
|
+
"https://www.heb.com/my-account/login",
|
|
851
|
+
wait_until="load",
|
|
852
|
+
timeout=timeout,
|
|
853
|
+
)
|
|
854
|
+
|
|
855
|
+
# Wait for actual login form to appear
|
|
856
|
+
logger.info("Waiting for login form to load...")
|
|
857
|
+
login_form_loaded = False
|
|
858
|
+
form_wait_start = time.monotonic()
|
|
859
|
+
max_form_wait = 30000 # 30 seconds max
|
|
860
|
+
|
|
861
|
+
while not login_form_loaded:
|
|
862
|
+
await page.wait_for_timeout(1000)
|
|
863
|
+
|
|
864
|
+
# Check if login form is present (email field exists)
|
|
865
|
+
for selector in ['input[name="email"]', 'input[type="email"]', '#email']:
|
|
866
|
+
try:
|
|
867
|
+
email_field = await page.query_selector(selector)
|
|
868
|
+
if email_field:
|
|
869
|
+
login_form_loaded = True
|
|
870
|
+
logger.info("Login form loaded", selector=selector)
|
|
871
|
+
break
|
|
872
|
+
except Exception:
|
|
873
|
+
continue
|
|
874
|
+
|
|
875
|
+
# Check for timeout
|
|
876
|
+
if (time.monotonic() - form_wait_start) * 1000 >= max_form_wait:
|
|
877
|
+
current_url = page.url
|
|
878
|
+
logger.warning("Login form not found after waiting", url=current_url)
|
|
879
|
+
break
|
|
880
|
+
|
|
881
|
+
# If login form never loaded, check if we're on an error page
|
|
882
|
+
if not login_form_loaded:
|
|
883
|
+
current_url = page.url
|
|
884
|
+
page_title = await page.title()
|
|
885
|
+
|
|
886
|
+
# Check for common error indicators
|
|
887
|
+
is_error_page = (
|
|
888
|
+
"error" in current_url.lower()
|
|
889
|
+
or "error" in page_title.lower()
|
|
890
|
+
or "something went wrong" in page_title.lower()
|
|
891
|
+
or "page not found" in page_title.lower()
|
|
892
|
+
)
|
|
893
|
+
|
|
894
|
+
# Take screenshot to show what happened
|
|
895
|
+
screenshot_path = await _take_login_screenshot(page, "error")
|
|
896
|
+
|
|
897
|
+
if is_error_page:
|
|
898
|
+
logger.error(
|
|
899
|
+
"HEB returned an error page",
|
|
900
|
+
url=current_url,
|
|
901
|
+
title=page_title,
|
|
902
|
+
)
|
|
903
|
+
await _cleanup_browser(playwright, browser)
|
|
904
|
+
return {
|
|
905
|
+
"status": "failed",
|
|
906
|
+
"message": f"HEB returned an error page: {page_title}",
|
|
907
|
+
"error": "heb_error_page",
|
|
908
|
+
"screenshot_path": screenshot_path,
|
|
909
|
+
"url": current_url,
|
|
910
|
+
"suggestion": "HEB.com may be having issues. Try again in a few minutes.",
|
|
911
|
+
}
|
|
912
|
+
else:
|
|
913
|
+
logger.error(
|
|
914
|
+
"Login form not found",
|
|
915
|
+
url=current_url,
|
|
916
|
+
title=page_title,
|
|
917
|
+
)
|
|
918
|
+
await _cleanup_browser(playwright, browser)
|
|
919
|
+
return {
|
|
920
|
+
"status": "failed",
|
|
921
|
+
"message": f"Could not find login form. Page: {page_title}",
|
|
922
|
+
"error": "login_form_not_found",
|
|
923
|
+
"screenshot_path": screenshot_path,
|
|
924
|
+
"url": current_url,
|
|
925
|
+
"suggestion": (
|
|
926
|
+
"Check the screenshot to see what page loaded. "
|
|
927
|
+
"HEB may have changed their login flow."
|
|
928
|
+
),
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
# Check for CAPTCHA before filling form
|
|
932
|
+
await _inject_status_banner(page, "Checking for CAPTCHA...")
|
|
933
|
+
await page.wait_for_timeout(1000)
|
|
934
|
+
|
|
935
|
+
if await _detect_captcha(page):
|
|
936
|
+
logger.info("CAPTCHA detected on login page - returning for human action")
|
|
937
|
+
await _inject_status_banner(
|
|
938
|
+
page,
|
|
939
|
+
"CAPTCHA detected! Please solve it, then tell the AI 'done'.",
|
|
940
|
+
is_waiting=True,
|
|
941
|
+
)
|
|
942
|
+
screenshot_path = await _take_login_screenshot(page, "captcha")
|
|
943
|
+
|
|
944
|
+
# Store state for resume
|
|
945
|
+
_pending_login_state = {
|
|
946
|
+
"flow": "auto_login",
|
|
947
|
+
"playwright": playwright,
|
|
948
|
+
"browser": browser,
|
|
949
|
+
"context": context,
|
|
950
|
+
"page": page,
|
|
951
|
+
"auth_path": auth_path,
|
|
952
|
+
"email": email,
|
|
953
|
+
"password": password,
|
|
954
|
+
"stage": "pre_credentials",
|
|
955
|
+
"start_time": start_time,
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
return _build_human_action_response("captcha", screenshot_path)
|
|
959
|
+
|
|
960
|
+
# Fill credentials
|
|
961
|
+
await _inject_status_banner(page, "Filling credentials automatically...")
|
|
962
|
+
await page.wait_for_timeout(500)
|
|
963
|
+
|
|
964
|
+
# Fill email
|
|
965
|
+
email_filled = False
|
|
966
|
+
email_selectors = [
|
|
967
|
+
'input[name="email"]',
|
|
968
|
+
'input[type="email"]',
|
|
969
|
+
"#email",
|
|
970
|
+
'input[placeholder*="email" i]',
|
|
971
|
+
]
|
|
972
|
+
for selector in email_selectors:
|
|
973
|
+
try:
|
|
974
|
+
email_field = await page.query_selector(selector)
|
|
975
|
+
if email_field:
|
|
976
|
+
await email_field.fill(email)
|
|
977
|
+
email_filled = True
|
|
978
|
+
logger.debug("Filled email field", selector=selector)
|
|
979
|
+
break
|
|
980
|
+
except Exception:
|
|
981
|
+
continue
|
|
982
|
+
|
|
983
|
+
if not email_filled:
|
|
984
|
+
await _cleanup_browser(playwright, browser)
|
|
985
|
+
return {
|
|
986
|
+
"status": "failed",
|
|
987
|
+
"message": "Could not find email field on login page",
|
|
988
|
+
"error": "selector_not_found",
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
# Fill password
|
|
992
|
+
password_filled = False
|
|
993
|
+
for selector in ['input[name="password"]', 'input[type="password"]', '#password']:
|
|
994
|
+
try:
|
|
995
|
+
password_field = await page.query_selector(selector)
|
|
996
|
+
if password_field:
|
|
997
|
+
await password_field.fill(password)
|
|
998
|
+
password_filled = True
|
|
999
|
+
logger.debug("Filled password field", selector=selector)
|
|
1000
|
+
break
|
|
1001
|
+
except Exception:
|
|
1002
|
+
continue
|
|
1003
|
+
|
|
1004
|
+
if not password_filled:
|
|
1005
|
+
await _cleanup_browser(playwright, browser)
|
|
1006
|
+
return {
|
|
1007
|
+
"status": "failed",
|
|
1008
|
+
"message": "Could not find password field on login page",
|
|
1009
|
+
"error": "selector_not_found",
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
# Click Continue button
|
|
1013
|
+
await _inject_status_banner(page, "Clicking Continue...")
|
|
1014
|
+
await page.wait_for_timeout(500)
|
|
1015
|
+
|
|
1016
|
+
continue_selectors = [
|
|
1017
|
+
'button:has-text("Continue")',
|
|
1018
|
+
'button[type="submit"]:has-text("Continue")',
|
|
1019
|
+
'input[type="submit"][value*="Continue" i]',
|
|
1020
|
+
]
|
|
1021
|
+
for selector in continue_selectors:
|
|
1022
|
+
try:
|
|
1023
|
+
button = await page.query_selector(selector)
|
|
1024
|
+
if button:
|
|
1025
|
+
await button.click()
|
|
1026
|
+
logger.debug("Clicked Continue button", selector=selector)
|
|
1027
|
+
break
|
|
1028
|
+
except Exception:
|
|
1029
|
+
continue
|
|
1030
|
+
|
|
1031
|
+
await page.wait_for_timeout(2000)
|
|
1032
|
+
|
|
1033
|
+
# Check for CAPTCHA after Continue
|
|
1034
|
+
if await _detect_captcha(page):
|
|
1035
|
+
logger.info("CAPTCHA detected after Continue - returning for human action")
|
|
1036
|
+
await _inject_status_banner(
|
|
1037
|
+
page,
|
|
1038
|
+
"CAPTCHA detected! Please solve it, then tell the AI 'done'.",
|
|
1039
|
+
is_waiting=True,
|
|
1040
|
+
)
|
|
1041
|
+
screenshot_path = await _take_login_screenshot(page, "captcha")
|
|
1042
|
+
|
|
1043
|
+
_pending_login_state = {
|
|
1044
|
+
"flow": "auto_login",
|
|
1045
|
+
"playwright": playwright,
|
|
1046
|
+
"browser": browser,
|
|
1047
|
+
"context": context,
|
|
1048
|
+
"page": page,
|
|
1049
|
+
"auth_path": auth_path,
|
|
1050
|
+
"email": email,
|
|
1051
|
+
"password": password,
|
|
1052
|
+
"stage": "post_continue",
|
|
1053
|
+
"start_time": start_time,
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
return _build_human_action_response("captcha", screenshot_path)
|
|
1057
|
+
|
|
1058
|
+
# Click Submit button
|
|
1059
|
+
await _inject_status_banner(page, "Clicking Submit...")
|
|
1060
|
+
await page.wait_for_timeout(500)
|
|
1061
|
+
|
|
1062
|
+
submit_selectors = [
|
|
1063
|
+
'button:has-text("Submit")',
|
|
1064
|
+
'button:has-text("Sign in")',
|
|
1065
|
+
'button:has-text("Log in")',
|
|
1066
|
+
'button[type="submit"]',
|
|
1067
|
+
]
|
|
1068
|
+
for selector in submit_selectors:
|
|
1069
|
+
try:
|
|
1070
|
+
button = await page.query_selector(selector)
|
|
1071
|
+
if button:
|
|
1072
|
+
await button.click()
|
|
1073
|
+
logger.debug("Clicked Submit button", selector=selector)
|
|
1074
|
+
break
|
|
1075
|
+
except Exception:
|
|
1076
|
+
continue
|
|
1077
|
+
|
|
1078
|
+
await page.wait_for_timeout(3000)
|
|
1079
|
+
|
|
1080
|
+
# Check for CAPTCHA after Submit
|
|
1081
|
+
if await _detect_captcha(page):
|
|
1082
|
+
logger.info("CAPTCHA detected after Submit - returning for human action")
|
|
1083
|
+
await _inject_status_banner(
|
|
1084
|
+
page,
|
|
1085
|
+
"CAPTCHA detected! Please solve it, then tell the AI 'done'.",
|
|
1086
|
+
is_waiting=True,
|
|
1087
|
+
)
|
|
1088
|
+
screenshot_path = await _take_login_screenshot(page, "captcha")
|
|
1089
|
+
|
|
1090
|
+
_pending_login_state = {
|
|
1091
|
+
"flow": "auto_login",
|
|
1092
|
+
"playwright": playwright,
|
|
1093
|
+
"browser": browser,
|
|
1094
|
+
"context": context,
|
|
1095
|
+
"page": page,
|
|
1096
|
+
"auth_path": auth_path,
|
|
1097
|
+
"email": email,
|
|
1098
|
+
"password": password,
|
|
1099
|
+
"stage": "post_submit",
|
|
1100
|
+
"start_time": start_time,
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
return _build_human_action_response("captcha", screenshot_path)
|
|
1104
|
+
|
|
1105
|
+
# Check for 2FA
|
|
1106
|
+
if await _detect_2fa(page):
|
|
1107
|
+
logger.info("2FA detected - returning for human action")
|
|
1108
|
+
await _inject_status_banner(
|
|
1109
|
+
page,
|
|
1110
|
+
"Verification code required! Enter the code sent to your email.",
|
|
1111
|
+
is_waiting=True,
|
|
1112
|
+
)
|
|
1113
|
+
screenshot_path = await _take_login_screenshot(page, "2fa")
|
|
1114
|
+
|
|
1115
|
+
_pending_login_state = {
|
|
1116
|
+
"flow": "auto_login",
|
|
1117
|
+
"playwright": playwright,
|
|
1118
|
+
"browser": browser,
|
|
1119
|
+
"context": context,
|
|
1120
|
+
"page": page,
|
|
1121
|
+
"auth_path": auth_path,
|
|
1122
|
+
"email": email,
|
|
1123
|
+
"password": password,
|
|
1124
|
+
"stage": "2fa",
|
|
1125
|
+
"start_time": start_time,
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
return _build_human_action_response("2fa", screenshot_path)
|
|
1129
|
+
|
|
1130
|
+
# Check for login errors
|
|
1131
|
+
for selector in ['.error-message', '.alert-danger', '[role="alert"]', '.login-error']:
|
|
1132
|
+
try:
|
|
1133
|
+
error_el = await page.query_selector(selector)
|
|
1134
|
+
if error_el:
|
|
1135
|
+
error_text = await error_el.text_content()
|
|
1136
|
+
if error_text and len(error_text.strip()) > 0:
|
|
1137
|
+
await _cleanup_browser(playwright, browser)
|
|
1138
|
+
return {
|
|
1139
|
+
"status": "failed",
|
|
1140
|
+
"message": f"Login failed: {error_text.strip()}",
|
|
1141
|
+
"error": "invalid_credentials",
|
|
1142
|
+
"suggestion": (
|
|
1143
|
+
"Update your credentials with session_save_credentials()."
|
|
1144
|
+
),
|
|
1145
|
+
}
|
|
1146
|
+
except Exception:
|
|
1147
|
+
continue
|
|
1148
|
+
|
|
1149
|
+
# Verify login success
|
|
1150
|
+
if await _verify_login_success(page, context):
|
|
1151
|
+
return await _complete_login(
|
|
1152
|
+
playwright,
|
|
1153
|
+
browser,
|
|
1154
|
+
context,
|
|
1155
|
+
page,
|
|
1156
|
+
auth_path,
|
|
1157
|
+
start_time,
|
|
1158
|
+
)
|
|
1159
|
+
|
|
1160
|
+
# Wait and check again
|
|
1161
|
+
await page.wait_for_timeout(3000)
|
|
1162
|
+
|
|
1163
|
+
if await _verify_login_success(page, context):
|
|
1164
|
+
return await _complete_login(
|
|
1165
|
+
playwright,
|
|
1166
|
+
browser,
|
|
1167
|
+
context,
|
|
1168
|
+
page,
|
|
1169
|
+
auth_path,
|
|
1170
|
+
start_time,
|
|
1171
|
+
)
|
|
1172
|
+
|
|
1173
|
+
# Unknown state
|
|
1174
|
+
await _cleanup_browser(playwright, browser)
|
|
1175
|
+
return {
|
|
1176
|
+
"status": "failed",
|
|
1177
|
+
"message": (
|
|
1178
|
+
"Login result unclear. Please try manual login with "
|
|
1179
|
+
"session_refresh(headless=False)"
|
|
1180
|
+
),
|
|
1181
|
+
"error": "unknown_state",
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
except PlaywrightNotInstalledError:
|
|
1185
|
+
raise
|
|
1186
|
+
except TimeoutError:
|
|
1187
|
+
elapsed = time.monotonic() - start_time
|
|
1188
|
+
await _cleanup_browser(playwright, browser)
|
|
1189
|
+
return {
|
|
1190
|
+
"status": "failed",
|
|
1191
|
+
"message": f"Login timed out after {elapsed:.1f}s",
|
|
1192
|
+
"error": "timeout",
|
|
1193
|
+
}
|
|
1194
|
+
except Exception as e:
|
|
1195
|
+
elapsed = time.monotonic() - start_time
|
|
1196
|
+
logger.error("Auto-login failed", error=str(e), elapsed_seconds=round(elapsed, 1))
|
|
1197
|
+
await _cleanup_browser(playwright, browser)
|
|
1198
|
+
return {
|
|
1199
|
+
"status": "failed",
|
|
1200
|
+
"message": f"Auto-login failed: {e}",
|
|
1201
|
+
"error": "exception",
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
|
|
1205
|
+
def _build_human_action_response(action: str, screenshot_path: str | None) -> dict[str, Any]:
|
|
1206
|
+
"""Build standardized response for human action required."""
|
|
1207
|
+
action_messages = {
|
|
1208
|
+
"captcha": "CAPTCHA detected. Please solve it in the browser window.",
|
|
1209
|
+
"2fa": "Verification code required. Check your email and enter the code in the browser.",
|
|
1210
|
+
"login": "Login required. Please log in to your HEB account in the browser window.",
|
|
1211
|
+
"waf": "Security check detected. Please complete it in the browser window.",
|
|
1212
|
+
}
|
|
1213
|
+
|
|
1214
|
+
action_instructions = {
|
|
1215
|
+
"captcha": [
|
|
1216
|
+
"1. Look at the browser window that opened",
|
|
1217
|
+
"2. Solve the CAPTCHA challenge shown",
|
|
1218
|
+
"3. After solving, tell me 'done' and I'll continue the login",
|
|
1219
|
+
],
|
|
1220
|
+
"2fa": [
|
|
1221
|
+
"1. Check your email for a verification code from HEB",
|
|
1222
|
+
"2. Enter the code in the browser window",
|
|
1223
|
+
"3. After entering, tell me 'done' and I'll continue the login",
|
|
1224
|
+
],
|
|
1225
|
+
"login": [
|
|
1226
|
+
"1. Look at the browser window that opened",
|
|
1227
|
+
"2. Log in to your HEB account",
|
|
1228
|
+
"3. Complete any prompts (CAPTCHA/2FA) if they appear",
|
|
1229
|
+
"4. After you're logged in, tell me 'done' and I'll save the session",
|
|
1230
|
+
],
|
|
1231
|
+
"waf": [
|
|
1232
|
+
"1. Look at the browser window that opened",
|
|
1233
|
+
"2. Complete any security check shown (CAPTCHA, 'verify you are human', etc.)",
|
|
1234
|
+
"3. If it's a hard block, try refreshing or switching networks/VPN settings",
|
|
1235
|
+
"4. After the page is unblocked, tell me 'done' and I'll continue",
|
|
1236
|
+
],
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
response = {
|
|
1240
|
+
"status": "human_action_required",
|
|
1241
|
+
"success": False,
|
|
1242
|
+
"action": action,
|
|
1243
|
+
"message": action_messages.get(action, f"{action} required"),
|
|
1244
|
+
"screenshot_path": screenshot_path,
|
|
1245
|
+
"instructions": action_instructions.get(action, ["Complete the action in the browser"]),
|
|
1246
|
+
"next_step": "Call session_refresh() again after completing the action",
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
if screenshot_path:
|
|
1250
|
+
response["screenshot_info"] = (
|
|
1251
|
+
f"Screenshot saved to: {screenshot_path} - "
|
|
1252
|
+
"You can view this to see what's shown in the browser."
|
|
1253
|
+
)
|
|
1254
|
+
else:
|
|
1255
|
+
response["screenshot_error"] = "Could not capture screenshot"
|
|
1256
|
+
|
|
1257
|
+
return response
|
|
1258
|
+
|
|
1259
|
+
|
|
1260
|
+
async def _resume_pending_login(auth_path: Path) -> dict[str, Any]:
|
|
1261
|
+
"""Resume a pending login after human action (CAPTCHA/2FA solved)."""
|
|
1262
|
+
global _pending_login_state
|
|
1263
|
+
|
|
1264
|
+
if not _pending_login_state:
|
|
1265
|
+
return {
|
|
1266
|
+
"status": "failed",
|
|
1267
|
+
"message": "No pending login to resume",
|
|
1268
|
+
"error": "no_pending_state",
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
playwright = _pending_login_state.get("playwright")
|
|
1272
|
+
browser = _pending_login_state.get("browser")
|
|
1273
|
+
context = _pending_login_state.get("context")
|
|
1274
|
+
page = _pending_login_state.get("page")
|
|
1275
|
+
stage = _pending_login_state.get("stage", "unknown")
|
|
1276
|
+
flow = _pending_login_state.get("flow", "unknown")
|
|
1277
|
+
start_time = float(_pending_login_state.get("start_time", time.monotonic()))
|
|
1278
|
+
saved_auth_path = _pending_login_state.get("auth_path", auth_path)
|
|
1279
|
+
email = _pending_login_state.get("email")
|
|
1280
|
+
password = _pending_login_state.get("password")
|
|
1281
|
+
|
|
1282
|
+
logger.info("Resuming pending login", stage=stage, flow=flow)
|
|
1283
|
+
|
|
1284
|
+
try:
|
|
1285
|
+
# Check if browser is still open
|
|
1286
|
+
if not browser or not page:
|
|
1287
|
+
_pending_login_state = None
|
|
1288
|
+
return {
|
|
1289
|
+
"status": "failed",
|
|
1290
|
+
"message": "Browser was closed. Please start login again.",
|
|
1291
|
+
"error": "browser_closed",
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
# Manual login flow: never click/fill, just hand off until auth cookies appear.
|
|
1295
|
+
if stage == "manual_login" or flow == "manual_login":
|
|
1296
|
+
# If a security check is still present, keep handing off.
|
|
1297
|
+
if await _detect_security_challenge(page):
|
|
1298
|
+
screenshot_path = await _take_login_screenshot(page, "waf")
|
|
1299
|
+
return _build_human_action_response("waf", screenshot_path)
|
|
1300
|
+
|
|
1301
|
+
if await _detect_captcha(page):
|
|
1302
|
+
screenshot_path = await _take_login_screenshot(page, "captcha")
|
|
1303
|
+
return _build_human_action_response("captcha", screenshot_path)
|
|
1304
|
+
|
|
1305
|
+
if await _detect_2fa(page):
|
|
1306
|
+
screenshot_path = await _take_login_screenshot(page, "2fa")
|
|
1307
|
+
return _build_human_action_response("2fa", screenshot_path)
|
|
1308
|
+
|
|
1309
|
+
# If authenticated, save session and cleanup.
|
|
1310
|
+
if await _check_authenticated(context):
|
|
1311
|
+
_pending_login_state = None
|
|
1312
|
+
return await _complete_login(
|
|
1313
|
+
playwright,
|
|
1314
|
+
browser,
|
|
1315
|
+
context,
|
|
1316
|
+
page,
|
|
1317
|
+
saved_auth_path,
|
|
1318
|
+
start_time,
|
|
1319
|
+
)
|
|
1320
|
+
|
|
1321
|
+
# Not authenticated yet; keep handing off (don't block).
|
|
1322
|
+
screenshot_path = await _take_login_screenshot(page, "login")
|
|
1323
|
+
return _build_human_action_response("login", screenshot_path)
|
|
1324
|
+
|
|
1325
|
+
# Check current page state
|
|
1326
|
+
# If CAPTCHA is still present, return again for human action
|
|
1327
|
+
if await _detect_captcha(page):
|
|
1328
|
+
logger.info("CAPTCHA still detected - waiting for human")
|
|
1329
|
+
screenshot_path = await _take_login_screenshot(page, "captcha")
|
|
1330
|
+
return _build_human_action_response("captcha", screenshot_path)
|
|
1331
|
+
|
|
1332
|
+
# If 2FA is still present, return again for human action
|
|
1333
|
+
if await _detect_2fa(page):
|
|
1334
|
+
logger.info("2FA still detected - waiting for human")
|
|
1335
|
+
screenshot_path = await _take_login_screenshot(page, "2fa")
|
|
1336
|
+
return _build_human_action_response("2fa", screenshot_path)
|
|
1337
|
+
|
|
1338
|
+
# If we ended up on a WAF/security page, hand off.
|
|
1339
|
+
if await _detect_security_challenge(page):
|
|
1340
|
+
screenshot_path = await _take_login_screenshot(page, "waf")
|
|
1341
|
+
return _build_human_action_response("waf", screenshot_path)
|
|
1342
|
+
|
|
1343
|
+
# CAPTCHA/2FA appears to be solved, continue the flow based on stage
|
|
1344
|
+
await _inject_status_banner(page, "Continuing login...")
|
|
1345
|
+
|
|
1346
|
+
if stage == "pre_credentials":
|
|
1347
|
+
# Need to fill credentials and continue
|
|
1348
|
+
await page.wait_for_timeout(1000)
|
|
1349
|
+
|
|
1350
|
+
# Fill email
|
|
1351
|
+
for selector in ['input[name="email"]', 'input[type="email"]', '#email']:
|
|
1352
|
+
try:
|
|
1353
|
+
email_field = await page.query_selector(selector)
|
|
1354
|
+
if email_field:
|
|
1355
|
+
await email_field.fill(email)
|
|
1356
|
+
break
|
|
1357
|
+
except Exception:
|
|
1358
|
+
continue
|
|
1359
|
+
|
|
1360
|
+
# Fill password
|
|
1361
|
+
for selector in ['input[name="password"]', 'input[type="password"]', '#password']:
|
|
1362
|
+
try:
|
|
1363
|
+
password_field = await page.query_selector(selector)
|
|
1364
|
+
if password_field:
|
|
1365
|
+
await password_field.fill(password)
|
|
1366
|
+
break
|
|
1367
|
+
except Exception:
|
|
1368
|
+
continue
|
|
1369
|
+
|
|
1370
|
+
# Click Continue
|
|
1371
|
+
await page.wait_for_timeout(500)
|
|
1372
|
+
for selector in ['button:has-text("Continue")', 'button[type="submit"]']:
|
|
1373
|
+
try:
|
|
1374
|
+
button = await page.query_selector(selector)
|
|
1375
|
+
if button:
|
|
1376
|
+
await button.click()
|
|
1377
|
+
break
|
|
1378
|
+
except Exception:
|
|
1379
|
+
continue
|
|
1380
|
+
|
|
1381
|
+
await page.wait_for_timeout(2000)
|
|
1382
|
+
|
|
1383
|
+
# Check for CAPTCHA after Continue
|
|
1384
|
+
if await _detect_captcha(page):
|
|
1385
|
+
screenshot_path = await _take_login_screenshot(page, "captcha")
|
|
1386
|
+
_pending_login_state["stage"] = "post_continue"
|
|
1387
|
+
return _build_human_action_response("captcha", screenshot_path)
|
|
1388
|
+
|
|
1389
|
+
if stage in ["pre_credentials", "post_continue"]:
|
|
1390
|
+
# Click Submit
|
|
1391
|
+
await _inject_status_banner(page, "Clicking Submit...")
|
|
1392
|
+
await page.wait_for_timeout(500)
|
|
1393
|
+
|
|
1394
|
+
resume_submit_selectors = [
|
|
1395
|
+
'button:has-text("Submit")',
|
|
1396
|
+
'button:has-text("Sign in")',
|
|
1397
|
+
'button[type="submit"]',
|
|
1398
|
+
]
|
|
1399
|
+
for selector in resume_submit_selectors:
|
|
1400
|
+
try:
|
|
1401
|
+
button = await page.query_selector(selector)
|
|
1402
|
+
if button:
|
|
1403
|
+
await button.click()
|
|
1404
|
+
break
|
|
1405
|
+
except Exception:
|
|
1406
|
+
continue
|
|
1407
|
+
|
|
1408
|
+
await page.wait_for_timeout(3000)
|
|
1409
|
+
|
|
1410
|
+
# Check for CAPTCHA after Submit
|
|
1411
|
+
if await _detect_captcha(page):
|
|
1412
|
+
screenshot_path = await _take_login_screenshot(page, "captcha")
|
|
1413
|
+
_pending_login_state["stage"] = "post_submit"
|
|
1414
|
+
return _build_human_action_response("captcha", screenshot_path)
|
|
1415
|
+
|
|
1416
|
+
# Check for 2FA
|
|
1417
|
+
if await _detect_2fa(page):
|
|
1418
|
+
screenshot_path = await _take_login_screenshot(page, "2fa")
|
|
1419
|
+
_pending_login_state["stage"] = "2fa"
|
|
1420
|
+
return _build_human_action_response("2fa", screenshot_path)
|
|
1421
|
+
|
|
1422
|
+
# Check for login success
|
|
1423
|
+
if await _verify_login_success(page, context):
|
|
1424
|
+
_pending_login_state = None
|
|
1425
|
+
return await _complete_login(
|
|
1426
|
+
playwright,
|
|
1427
|
+
browser,
|
|
1428
|
+
context,
|
|
1429
|
+
page,
|
|
1430
|
+
saved_auth_path,
|
|
1431
|
+
start_time,
|
|
1432
|
+
)
|
|
1433
|
+
|
|
1434
|
+
# Wait and check again
|
|
1435
|
+
await page.wait_for_timeout(3000)
|
|
1436
|
+
|
|
1437
|
+
if await _verify_login_success(page, context):
|
|
1438
|
+
_pending_login_state = None
|
|
1439
|
+
return await _complete_login(
|
|
1440
|
+
playwright,
|
|
1441
|
+
browser,
|
|
1442
|
+
context,
|
|
1443
|
+
page,
|
|
1444
|
+
saved_auth_path,
|
|
1445
|
+
start_time,
|
|
1446
|
+
)
|
|
1447
|
+
|
|
1448
|
+
# Still not logged in - check for errors
|
|
1449
|
+
for selector in ['.error-message', '.alert-danger', '[role="alert"]']:
|
|
1450
|
+
try:
|
|
1451
|
+
error_el = await page.query_selector(selector)
|
|
1452
|
+
if error_el:
|
|
1453
|
+
error_text = await error_el.text_content()
|
|
1454
|
+
if error_text and len(error_text.strip()) > 0:
|
|
1455
|
+
_pending_login_state = None
|
|
1456
|
+
await _cleanup_browser(playwright, browser)
|
|
1457
|
+
return {
|
|
1458
|
+
"status": "failed",
|
|
1459
|
+
"message": f"Login failed: {error_text.strip()}",
|
|
1460
|
+
"error": "invalid_credentials",
|
|
1461
|
+
}
|
|
1462
|
+
except Exception:
|
|
1463
|
+
continue
|
|
1464
|
+
|
|
1465
|
+
# Unknown state
|
|
1466
|
+
_pending_login_state = None
|
|
1467
|
+
await _cleanup_browser(playwright, browser)
|
|
1468
|
+
return {
|
|
1469
|
+
"status": "failed",
|
|
1470
|
+
"message": "Login result unclear after human action. Please try again.",
|
|
1471
|
+
"error": "unknown_state",
|
|
1472
|
+
}
|
|
1473
|
+
|
|
1474
|
+
except Exception as e:
|
|
1475
|
+
logger.error("Error resuming pending login", error=str(e))
|
|
1476
|
+
_pending_login_state = None
|
|
1477
|
+
await _cleanup_browser(playwright, browser)
|
|
1478
|
+
return {
|
|
1479
|
+
"status": "failed",
|
|
1480
|
+
"message": f"Error resuming login: {e}",
|
|
1481
|
+
"error": "exception",
|
|
1482
|
+
}
|
|
1483
|
+
|
|
1484
|
+
|
|
1485
|
+
async def _complete_login(
|
|
1486
|
+
playwright: Any,
|
|
1487
|
+
browser: Any,
|
|
1488
|
+
context: Any,
|
|
1489
|
+
page: Any,
|
|
1490
|
+
auth_path: Path,
|
|
1491
|
+
start_time: float,
|
|
1492
|
+
) -> dict[str, Any]:
|
|
1493
|
+
"""Complete the login process - save session and cleanup."""
|
|
1494
|
+
global _pending_login_state
|
|
1495
|
+
|
|
1496
|
+
try:
|
|
1497
|
+
# Give the site a moment to finalize cookies/localStorage (reese84, etc.)
|
|
1498
|
+
await page.wait_for_timeout(2000)
|
|
1499
|
+
|
|
1500
|
+
# Best-effort: visit homepage to trigger bot token/localStorage generation.
|
|
1501
|
+
# If this hits a security interstitial, don't block; we'll still save state.
|
|
1502
|
+
try:
|
|
1503
|
+
await page.goto("https://www.heb.com", wait_until="load", timeout=30000)
|
|
1504
|
+
await page.wait_for_timeout(3000)
|
|
1505
|
+
except Exception:
|
|
1506
|
+
pass
|
|
1507
|
+
|
|
1508
|
+
# Save session
|
|
1509
|
+
auth_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1510
|
+
await context.storage_state(path=str(auth_path))
|
|
1511
|
+
|
|
1512
|
+
# Ensure secure permissions on auth file
|
|
1513
|
+
from texas_grocery_mcp.utils.secure_file import ensure_secure_permissions
|
|
1514
|
+
|
|
1515
|
+
ensure_secure_permissions(auth_path)
|
|
1516
|
+
|
|
1517
|
+
cookies = await context.cookies()
|
|
1518
|
+
local_storage_count = await page.evaluate("() => window.localStorage.length")
|
|
1519
|
+
|
|
1520
|
+
elapsed = time.monotonic() - start_time
|
|
1521
|
+
|
|
1522
|
+
logger.info(
|
|
1523
|
+
"Login/session save successful",
|
|
1524
|
+
elapsed_seconds=round(elapsed, 1),
|
|
1525
|
+
cookies_count=len(cookies),
|
|
1526
|
+
)
|
|
1527
|
+
|
|
1528
|
+
# Cleanup
|
|
1529
|
+
_pending_login_state = None
|
|
1530
|
+
await _cleanup_browser(playwright, browser)
|
|
1531
|
+
|
|
1532
|
+
return {
|
|
1533
|
+
"status": "success",
|
|
1534
|
+
"success": True,
|
|
1535
|
+
"message": f"Logged in successfully in {elapsed:.1f}s",
|
|
1536
|
+
"elapsed_seconds": round(elapsed, 1),
|
|
1537
|
+
"auth_path": str(auth_path),
|
|
1538
|
+
"cookies_count": len(cookies),
|
|
1539
|
+
"local_storage_count": local_storage_count,
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
except Exception as e:
|
|
1543
|
+
logger.error("Error completing login", error=str(e))
|
|
1544
|
+
_pending_login_state = None
|
|
1545
|
+
await _cleanup_browser(playwright, browser)
|
|
1546
|
+
return {
|
|
1547
|
+
"status": "failed",
|
|
1548
|
+
"message": f"Error saving session: {e}",
|
|
1549
|
+
"error": "save_failed",
|
|
1550
|
+
}
|
|
1551
|
+
|
|
1552
|
+
|
|
1553
|
+
async def _cleanup_browser(playwright: Any, browser: Any) -> None:
|
|
1554
|
+
"""Safely cleanup browser and playwright instances."""
|
|
1555
|
+
global _pending_login_state
|
|
1556
|
+
_pending_login_state = None
|
|
1557
|
+
|
|
1558
|
+
try:
|
|
1559
|
+
if browser:
|
|
1560
|
+
await browser.close()
|
|
1561
|
+
except Exception:
|
|
1562
|
+
pass
|
|
1563
|
+
|
|
1564
|
+
try:
|
|
1565
|
+
if playwright:
|
|
1566
|
+
await playwright.stop()
|
|
1567
|
+
except Exception:
|
|
1568
|
+
pass
|
|
1569
|
+
|
|
1570
|
+
|
|
1571
|
+
|
|
1572
|
+
async def _inject_status_banner(
|
|
1573
|
+
page: Any,
|
|
1574
|
+
message: str,
|
|
1575
|
+
is_waiting: bool = False,
|
|
1576
|
+
) -> None:
|
|
1577
|
+
"""Inject or update a status banner on the page.
|
|
1578
|
+
|
|
1579
|
+
Args:
|
|
1580
|
+
page: Playwright page
|
|
1581
|
+
message: Status message to display
|
|
1582
|
+
is_waiting: If True, show pulsing indicator
|
|
1583
|
+
"""
|
|
1584
|
+
try:
|
|
1585
|
+
indicator = "⏳" if is_waiting else "🤖"
|
|
1586
|
+
await page.evaluate(
|
|
1587
|
+
f"""
|
|
1588
|
+
() => {{
|
|
1589
|
+
let banner = document.getElementById('mcp-auto-login-banner');
|
|
1590
|
+
if (!banner) {{
|
|
1591
|
+
banner = document.createElement('div');
|
|
1592
|
+
banner.id = 'mcp-auto-login-banner';
|
|
1593
|
+
banner.style.cssText = `
|
|
1594
|
+
position: fixed;
|
|
1595
|
+
top: 0;
|
|
1596
|
+
left: 0;
|
|
1597
|
+
right: 0;
|
|
1598
|
+
background: linear-gradient(135deg, #e31837 0%, #c41230 100%);
|
|
1599
|
+
color: white;
|
|
1600
|
+
padding: 16px 20px;
|
|
1601
|
+
font-family: -apple-system, BlinkMacSystemFont,
|
|
1602
|
+
'Segoe UI', Roboto, sans-serif;
|
|
1603
|
+
font-size: 16px;
|
|
1604
|
+
font-weight: 500;
|
|
1605
|
+
text-align: center;
|
|
1606
|
+
z-index: 999999;
|
|
1607
|
+
box-shadow: 0 4px 12px rgba(0,0,0,0.3);
|
|
1608
|
+
`;
|
|
1609
|
+
document.body.prepend(banner);
|
|
1610
|
+
document.body.style.marginTop = '60px';
|
|
1611
|
+
|
|
1612
|
+
const style = document.createElement('style');
|
|
1613
|
+
style.textContent = (
|
|
1614
|
+
'@keyframes pulse {{ 0%, 100% {{ opacity: 1; }} '
|
|
1615
|
+
'50% {{ opacity: 0.5; }} }}'
|
|
1616
|
+
);
|
|
1617
|
+
document.head.appendChild(style);
|
|
1618
|
+
}}
|
|
1619
|
+
banner.innerHTML = '{indicator} ' + `{message}`;
|
|
1620
|
+
if ({str(is_waiting).lower()}) {{
|
|
1621
|
+
banner.style.animation = 'pulse 1.5s infinite';
|
|
1622
|
+
}} else {{
|
|
1623
|
+
banner.style.animation = 'none';
|
|
1624
|
+
}}
|
|
1625
|
+
}}
|
|
1626
|
+
"""
|
|
1627
|
+
)
|
|
1628
|
+
except Exception as e:
|
|
1629
|
+
logger.debug("Could not inject status banner", error=str(e))
|