texas-grocery-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- texas_grocery_mcp/__init__.py +3 -0
- texas_grocery_mcp/auth/__init__.py +5 -0
- texas_grocery_mcp/auth/browser_refresh.py +1629 -0
- texas_grocery_mcp/auth/credentials.py +337 -0
- texas_grocery_mcp/auth/session.py +767 -0
- texas_grocery_mcp/clients/__init__.py +5 -0
- texas_grocery_mcp/clients/graphql.py +2400 -0
- texas_grocery_mcp/models/__init__.py +54 -0
- texas_grocery_mcp/models/cart.py +60 -0
- texas_grocery_mcp/models/coupon.py +44 -0
- texas_grocery_mcp/models/errors.py +43 -0
- texas_grocery_mcp/models/health.py +41 -0
- texas_grocery_mcp/models/product.py +274 -0
- texas_grocery_mcp/models/store.py +77 -0
- texas_grocery_mcp/observability/__init__.py +6 -0
- texas_grocery_mcp/observability/health.py +141 -0
- texas_grocery_mcp/observability/logging.py +73 -0
- texas_grocery_mcp/reliability/__init__.py +23 -0
- texas_grocery_mcp/reliability/cache.py +116 -0
- texas_grocery_mcp/reliability/circuit_breaker.py +138 -0
- texas_grocery_mcp/reliability/retry.py +96 -0
- texas_grocery_mcp/reliability/throttle.py +113 -0
- texas_grocery_mcp/server.py +211 -0
- texas_grocery_mcp/services/__init__.py +5 -0
- texas_grocery_mcp/services/geocoding.py +227 -0
- texas_grocery_mcp/state.py +166 -0
- texas_grocery_mcp/tools/__init__.py +5 -0
- texas_grocery_mcp/tools/cart.py +821 -0
- texas_grocery_mcp/tools/coupon.py +381 -0
- texas_grocery_mcp/tools/product.py +437 -0
- texas_grocery_mcp/tools/session.py +486 -0
- texas_grocery_mcp/tools/store.py +353 -0
- texas_grocery_mcp/utils/__init__.py +5 -0
- texas_grocery_mcp/utils/config.py +146 -0
- texas_grocery_mcp/utils/secure_file.py +123 -0
- texas_grocery_mcp-0.1.0.dist-info/METADATA +296 -0
- texas_grocery_mcp-0.1.0.dist-info/RECORD +40 -0
- texas_grocery_mcp-0.1.0.dist-info/WHEEL +4 -0
- texas_grocery_mcp-0.1.0.dist-info/entry_points.txt +2 -0
- texas_grocery_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,2400 @@
|
|
|
1
|
+
"""HEB API client using persisted queries and Next.js data endpoints.
|
|
2
|
+
|
|
3
|
+
Supports both unauthenticated (typeahead) and authenticated (full product search)
|
|
4
|
+
modes. Authenticated mode uses browser session cookies for faster API access.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import re
|
|
9
|
+
from typing import Any, cast
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
import structlog
|
|
13
|
+
|
|
14
|
+
from texas_grocery_mcp.auth.session import get_httpx_cookies, is_authenticated
|
|
15
|
+
from texas_grocery_mcp.models import (
|
|
16
|
+
Coupon,
|
|
17
|
+
CouponCategory,
|
|
18
|
+
CouponSearchResult,
|
|
19
|
+
GeocodedLocation,
|
|
20
|
+
NutrientInfo,
|
|
21
|
+
Product,
|
|
22
|
+
ProductDetails,
|
|
23
|
+
ProductSearchAttempt,
|
|
24
|
+
ProductSearchResult,
|
|
25
|
+
SearchAttempt,
|
|
26
|
+
Store,
|
|
27
|
+
StoreSearchResult,
|
|
28
|
+
)
|
|
29
|
+
from texas_grocery_mcp.reliability import (
|
|
30
|
+
CircuitBreaker,
|
|
31
|
+
RetryConfig,
|
|
32
|
+
ThrottleConfig,
|
|
33
|
+
Throttler,
|
|
34
|
+
TTLCache,
|
|
35
|
+
with_retry,
|
|
36
|
+
)
|
|
37
|
+
from texas_grocery_mcp.services.geocoding import GeocodingResult, GeocodingService
|
|
38
|
+
from texas_grocery_mcp.utils.config import get_settings
|
|
39
|
+
|
|
40
|
+
logger = structlog.get_logger()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class GraphQLError(Exception):
|
|
44
|
+
"""Raised when GraphQL returns errors."""
|
|
45
|
+
|
|
46
|
+
def __init__(self, errors: list[dict[str, Any]]):
|
|
47
|
+
self.errors = errors
|
|
48
|
+
messages = [e.get("message", "Unknown error") for e in errors]
|
|
49
|
+
super().__init__(f"GraphQL error: {'; '.join(messages)}")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class PersistedQueryNotFoundError(Exception):
|
|
53
|
+
"""Raised when a persisted query hash is not found on the server."""
|
|
54
|
+
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# Persisted Query Hashes (discovered via reverse engineering)
|
|
59
|
+
# These may change when HEB deploys new code
|
|
60
|
+
PERSISTED_QUERIES = {
|
|
61
|
+
"ShopNavigation": "0e669423cef683226cb8eb295664619c8e0f95945734e0a458095f51ee89efb3",
|
|
62
|
+
"alertEntryPoint": "3e3ccd248652e8fce4674d0c5f3f30f2ddc63da277bfa0ff36ea9420e5dffd5e",
|
|
63
|
+
"cartEstimated": "7b033abaf2caa80bc49541e51d2b89e3cc6a316e37c4bd576d9b5c498a51e9c5",
|
|
64
|
+
"typeaheadContent": "1ed956c0f10efcfc375321f33c40964bc236fff1397a4e86b7b53cb3b18ad329",
|
|
65
|
+
"cartItemV2": "ade8ec1365c185244d42f9cc4c13997fec4b633ac3c38ff39558df92b210c6d0",
|
|
66
|
+
"StoreSearch": "e01fa39e66c3a2c7881322bc48af6a5af97d49b1442d433f2d09d273de2db4b6",
|
|
67
|
+
"CouponClip": "88b18ac22cee98372428d9a91d759ffb5e919026ee61c747f9f88d11336b846b",
|
|
68
|
+
# Store change mutation - changes the active pickup store
|
|
69
|
+
"SelectPickupFulfillment": "8fa3c683ee37ad1bab9ce22b99bd34315b2a89cfc56208d63ba9efc0c49a6323",
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# Well-known HEB stores (fallback for store search)
|
|
73
|
+
KNOWN_STORES = {
|
|
74
|
+
"737": Store(
|
|
75
|
+
store_id="737",
|
|
76
|
+
name="The Heights H-E-B",
|
|
77
|
+
address="2300 N. SHEPHERD DR., HOUSTON, TX 77008",
|
|
78
|
+
phone="(713) 802-9090",
|
|
79
|
+
latitude=29.8028,
|
|
80
|
+
longitude=-95.4103,
|
|
81
|
+
),
|
|
82
|
+
"579": Store(
|
|
83
|
+
store_id="579",
|
|
84
|
+
name="Buffalo Speedway H-E-B",
|
|
85
|
+
address="5601 S BRAESWOOD BLVD, HOUSTON, TX 77096",
|
|
86
|
+
phone="(713) 432-1400",
|
|
87
|
+
latitude=29.6916,
|
|
88
|
+
longitude=-95.4587,
|
|
89
|
+
),
|
|
90
|
+
"150": Store(
|
|
91
|
+
store_id="150",
|
|
92
|
+
name="Montrose H-E-B",
|
|
93
|
+
address="1701 W ALABAMA ST, HOUSTON, TX 77098",
|
|
94
|
+
phone="(713) 523-4481",
|
|
95
|
+
latitude=29.7419,
|
|
96
|
+
longitude=-95.3979,
|
|
97
|
+
),
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class HEBGraphQLClient:
|
|
102
|
+
"""Client for HEB's API using persisted queries and Next.js data endpoints.
|
|
103
|
+
|
|
104
|
+
Supports two modes:
|
|
105
|
+
- Unauthenticated: Basic operations like typeahead (always available)
|
|
106
|
+
- Authenticated: Full product search and cart operations (requires cookies)
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
# Standard headers for browser-like requests
|
|
110
|
+
_BROWSER_HEADERS = {
|
|
111
|
+
"User-Agent": (
|
|
112
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
113
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
114
|
+
"Chrome/120.0.0.0 Safari/537.36"
|
|
115
|
+
),
|
|
116
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
117
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
118
|
+
"Origin": "https://www.heb.com",
|
|
119
|
+
"Referer": "https://www.heb.com/",
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
def __init__(self, base_url: str | None = None):
|
|
123
|
+
settings = get_settings()
|
|
124
|
+
self.base_url = base_url or settings.heb_graphql_url
|
|
125
|
+
self.circuit_breaker = CircuitBreaker("heb_api")
|
|
126
|
+
self._client: httpx.AsyncClient | None = None
|
|
127
|
+
self._auth_client: httpx.AsyncClient | None = None
|
|
128
|
+
self._build_id: str | None = None
|
|
129
|
+
|
|
130
|
+
# Initialize throttlers for rate limiting
|
|
131
|
+
self._ssr_throttler = Throttler(
|
|
132
|
+
ThrottleConfig(
|
|
133
|
+
max_concurrent=settings.max_concurrent_ssr_searches,
|
|
134
|
+
min_delay_ms=settings.min_ssr_delay_ms,
|
|
135
|
+
jitter_ms=settings.ssr_jitter_ms,
|
|
136
|
+
enabled=settings.throttling_enabled,
|
|
137
|
+
),
|
|
138
|
+
name="ssr",
|
|
139
|
+
)
|
|
140
|
+
self._graphql_throttler = Throttler(
|
|
141
|
+
ThrottleConfig(
|
|
142
|
+
max_concurrent=settings.max_concurrent_graphql,
|
|
143
|
+
min_delay_ms=settings.min_graphql_delay_ms,
|
|
144
|
+
jitter_ms=settings.graphql_jitter_ms,
|
|
145
|
+
enabled=settings.throttling_enabled,
|
|
146
|
+
),
|
|
147
|
+
name="graphql",
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Initialize cache for product details (24-hour TTL)
|
|
151
|
+
self._product_details_cache: TTLCache[ProductDetails] = TTLCache(
|
|
152
|
+
ttl_hours=24,
|
|
153
|
+
max_size=500, # Cache up to 500 products
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
async def _get_client(self) -> httpx.AsyncClient:
|
|
157
|
+
"""Get or create basic HTTP client (no auth cookies)."""
|
|
158
|
+
if self._client is None:
|
|
159
|
+
self._client = httpx.AsyncClient(
|
|
160
|
+
timeout=httpx.Timeout(30.0),
|
|
161
|
+
headers={
|
|
162
|
+
"Content-Type": "application/json",
|
|
163
|
+
"Accept": "application/json",
|
|
164
|
+
**self._BROWSER_HEADERS,
|
|
165
|
+
},
|
|
166
|
+
follow_redirects=True,
|
|
167
|
+
)
|
|
168
|
+
return self._client
|
|
169
|
+
|
|
170
|
+
async def _get_authenticated_client(self) -> httpx.AsyncClient | None:
|
|
171
|
+
"""Get HTTP client with authentication cookies.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Authenticated client if cookies available, None otherwise
|
|
175
|
+
"""
|
|
176
|
+
if not is_authenticated():
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
# Always recreate to get fresh cookies
|
|
180
|
+
if self._auth_client:
|
|
181
|
+
await self._auth_client.aclose()
|
|
182
|
+
|
|
183
|
+
cookies = get_httpx_cookies()
|
|
184
|
+
if not cookies:
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
self._auth_client = httpx.AsyncClient(
|
|
188
|
+
timeout=httpx.Timeout(30.0),
|
|
189
|
+
headers=self._BROWSER_HEADERS,
|
|
190
|
+
cookies=cookies,
|
|
191
|
+
follow_redirects=True,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
logger.debug("Created authenticated client", cookie_count=len(cookies))
|
|
195
|
+
return self._auth_client
|
|
196
|
+
|
|
197
|
+
async def close(self) -> None:
|
|
198
|
+
"""Close HTTP clients."""
|
|
199
|
+
if self._client:
|
|
200
|
+
await self._client.aclose()
|
|
201
|
+
self._client = None
|
|
202
|
+
if self._auth_client:
|
|
203
|
+
await self._auth_client.aclose()
|
|
204
|
+
self._auth_client = None
|
|
205
|
+
|
|
206
|
+
async def _get_build_id(self) -> str:
|
|
207
|
+
"""Extract Next.js build ID from HEB homepage.
|
|
208
|
+
|
|
209
|
+
The build ID is required for accessing _next/data endpoints.
|
|
210
|
+
It changes with each deployment.
|
|
211
|
+
|
|
212
|
+
Uses authenticated client when available to bypass WAF challenges.
|
|
213
|
+
"""
|
|
214
|
+
if self._build_id:
|
|
215
|
+
return self._build_id
|
|
216
|
+
|
|
217
|
+
# Prefer authenticated client to bypass WAF/security challenges
|
|
218
|
+
client = await self._get_authenticated_client()
|
|
219
|
+
if not client:
|
|
220
|
+
client = await self._get_client()
|
|
221
|
+
|
|
222
|
+
response = await client.get("https://www.heb.com")
|
|
223
|
+
response.raise_for_status()
|
|
224
|
+
|
|
225
|
+
# Check for security challenge
|
|
226
|
+
if self._detect_security_challenge(response.text):
|
|
227
|
+
logger.warning("Security challenge detected when fetching build ID")
|
|
228
|
+
raise RuntimeError(
|
|
229
|
+
"Security challenge blocked build ID extraction. Try session_refresh."
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Look for build ID in the HTML
|
|
233
|
+
# Pattern: /_next/static/{buildId}/_buildManifest.js
|
|
234
|
+
match = re.search(r'/_next/static/([a-zA-Z0-9_-]+)/_buildManifest\.js', response.text)
|
|
235
|
+
if match:
|
|
236
|
+
self._build_id = match.group(1)
|
|
237
|
+
logger.info("Extracted Next.js build ID", build_id=self._build_id)
|
|
238
|
+
return self._build_id
|
|
239
|
+
|
|
240
|
+
# Fallback: try to find it in data-nscript tags
|
|
241
|
+
match = re.search(r'"buildId":"([a-zA-Z0-9_-]+)"', response.text)
|
|
242
|
+
if match:
|
|
243
|
+
self._build_id = match.group(1)
|
|
244
|
+
logger.info("Extracted Next.js build ID from JSON", build_id=self._build_id)
|
|
245
|
+
return self._build_id
|
|
246
|
+
|
|
247
|
+
# Log the response for debugging
|
|
248
|
+
logger.error(
|
|
249
|
+
"Could not extract build ID",
|
|
250
|
+
response_length=len(response.text),
|
|
251
|
+
response_preview=response.text[:500] if response.text else "empty",
|
|
252
|
+
)
|
|
253
|
+
raise RuntimeError("Could not extract Next.js build ID from HEB homepage")
|
|
254
|
+
|
|
255
|
+
@with_retry(config=RetryConfig(max_attempts=3, base_delay=1.0))
|
|
256
|
+
async def _execute_persisted_query(
|
|
257
|
+
self,
|
|
258
|
+
operation_name: str,
|
|
259
|
+
variables: dict[str, Any],
|
|
260
|
+
) -> dict[str, Any]:
|
|
261
|
+
"""Execute a persisted GraphQL query.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
operation_name: The name of the persisted operation
|
|
265
|
+
variables: Query variables
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Response data
|
|
269
|
+
|
|
270
|
+
Raises:
|
|
271
|
+
GraphQLError: If GraphQL returns errors
|
|
272
|
+
PersistedQueryNotFoundError: If the hash is not recognized
|
|
273
|
+
CircuitBreakerOpenError: If circuit is open
|
|
274
|
+
"""
|
|
275
|
+
async with self._graphql_throttler:
|
|
276
|
+
self.circuit_breaker.check()
|
|
277
|
+
|
|
278
|
+
if operation_name not in PERSISTED_QUERIES:
|
|
279
|
+
raise ValueError(f"Unknown operation: {operation_name}")
|
|
280
|
+
|
|
281
|
+
client = await self._get_client()
|
|
282
|
+
|
|
283
|
+
payload = {
|
|
284
|
+
"operationName": operation_name,
|
|
285
|
+
"variables": variables,
|
|
286
|
+
"extensions": {
|
|
287
|
+
"persistedQuery": {
|
|
288
|
+
"version": 1,
|
|
289
|
+
"sha256Hash": PERSISTED_QUERIES[operation_name],
|
|
290
|
+
}
|
|
291
|
+
},
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
try:
|
|
295
|
+
response = await client.post(self.base_url, json=payload)
|
|
296
|
+
response.raise_for_status()
|
|
297
|
+
|
|
298
|
+
data: Any = response.json()
|
|
299
|
+
|
|
300
|
+
# Check for persisted query errors
|
|
301
|
+
if "errors" in data:
|
|
302
|
+
for error in data["errors"]:
|
|
303
|
+
if "PersistedQueryNotFound" in str(error):
|
|
304
|
+
raise PersistedQueryNotFoundError(
|
|
305
|
+
f"Persisted query hash for '{operation_name}' is no longer valid"
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
raise GraphQLError(data["errors"])
|
|
309
|
+
|
|
310
|
+
self.circuit_breaker.record_success()
|
|
311
|
+
|
|
312
|
+
if isinstance(data, dict):
|
|
313
|
+
payload_data = data.get("data")
|
|
314
|
+
if isinstance(payload_data, dict):
|
|
315
|
+
return cast(dict[str, Any], payload_data)
|
|
316
|
+
return {}
|
|
317
|
+
|
|
318
|
+
except (httpx.HTTPError, GraphQLError) as e:
|
|
319
|
+
self.circuit_breaker.record_failure()
|
|
320
|
+
logger.error(
|
|
321
|
+
"Persisted query failed",
|
|
322
|
+
operation=operation_name,
|
|
323
|
+
error=str(e),
|
|
324
|
+
)
|
|
325
|
+
raise
|
|
326
|
+
|
|
327
|
+
@with_retry(config=RetryConfig(max_attempts=3, base_delay=1.0))
|
|
328
|
+
async def _fetch_nextjs_data(
|
|
329
|
+
self,
|
|
330
|
+
path: str,
|
|
331
|
+
params: dict[str, str] | None = None,
|
|
332
|
+
) -> dict[str, Any]:
|
|
333
|
+
"""Fetch data from Next.js _next/data endpoint.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
path: The page path (e.g., "search" for /search)
|
|
337
|
+
params: Query parameters
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Page props data
|
|
341
|
+
"""
|
|
342
|
+
self.circuit_breaker.check()
|
|
343
|
+
|
|
344
|
+
build_id = await self._get_build_id()
|
|
345
|
+
client = await self._get_client()
|
|
346
|
+
|
|
347
|
+
url = f"https://www.heb.com/_next/data/{build_id}/en/{path}.json"
|
|
348
|
+
|
|
349
|
+
try:
|
|
350
|
+
response = await client.get(url, params=params)
|
|
351
|
+
response.raise_for_status()
|
|
352
|
+
|
|
353
|
+
data: Any = response.json()
|
|
354
|
+
self.circuit_breaker.record_success()
|
|
355
|
+
|
|
356
|
+
# Next.js data is wrapped in pageProps
|
|
357
|
+
if not isinstance(data, dict):
|
|
358
|
+
return {}
|
|
359
|
+
|
|
360
|
+
page_props = data.get("pageProps")
|
|
361
|
+
if isinstance(page_props, dict):
|
|
362
|
+
return cast(dict[str, Any], page_props)
|
|
363
|
+
return cast(dict[str, Any], data)
|
|
364
|
+
|
|
365
|
+
except httpx.HTTPError as e:
|
|
366
|
+
self.circuit_breaker.record_failure()
|
|
367
|
+
logger.error(
|
|
368
|
+
"Next.js data fetch failed",
|
|
369
|
+
path=path,
|
|
370
|
+
error=str(e),
|
|
371
|
+
)
|
|
372
|
+
raise
|
|
373
|
+
|
|
374
|
+
async def search_stores(
|
|
375
|
+
self,
|
|
376
|
+
address: str,
|
|
377
|
+
radius_miles: int = 25,
|
|
378
|
+
) -> StoreSearchResult:
|
|
379
|
+
"""Search for HEB stores near an address.
|
|
380
|
+
|
|
381
|
+
Uses geocoding to handle informal location queries (neighborhoods,
|
|
382
|
+
landmarks) and tries multiple query variations against HEB's API.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
address: Address, zip code, neighborhood, or landmark to search near
|
|
386
|
+
radius_miles: Search radius in miles
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
StoreSearchResult with stores, geocoded location, and search feedback
|
|
390
|
+
"""
|
|
391
|
+
logger.info(
|
|
392
|
+
"Searching for stores",
|
|
393
|
+
address=address,
|
|
394
|
+
radius_miles=radius_miles,
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
attempts: list[SearchAttempt] = []
|
|
398
|
+
geocoded: GeocodedLocation | None = None
|
|
399
|
+
geocoding_result: GeocodingResult | None = None
|
|
400
|
+
|
|
401
|
+
# Step 1: Geocode the address
|
|
402
|
+
geocoding_service = GeocodingService()
|
|
403
|
+
try:
|
|
404
|
+
geocoding_result = await geocoding_service.geocode(address)
|
|
405
|
+
if geocoding_result:
|
|
406
|
+
geocoded = GeocodedLocation(
|
|
407
|
+
latitude=geocoding_result.latitude,
|
|
408
|
+
longitude=geocoding_result.longitude,
|
|
409
|
+
display_name=geocoding_result.display_name,
|
|
410
|
+
)
|
|
411
|
+
logger.info(
|
|
412
|
+
"Geocoding successful",
|
|
413
|
+
address=address,
|
|
414
|
+
lat=geocoding_result.latitude,
|
|
415
|
+
lon=geocoding_result.longitude,
|
|
416
|
+
)
|
|
417
|
+
except Exception as e:
|
|
418
|
+
logger.warning("Geocoding failed", address=address, error=str(e))
|
|
419
|
+
finally:
|
|
420
|
+
await geocoding_service.close()
|
|
421
|
+
|
|
422
|
+
# Step 2: Generate query variations
|
|
423
|
+
if geocoding_result:
|
|
424
|
+
query_variations = geocoding_result.get_query_variations(address)
|
|
425
|
+
else:
|
|
426
|
+
# Geocoding failed - just try the original query
|
|
427
|
+
query_variations = [address]
|
|
428
|
+
|
|
429
|
+
# Step 3: Try each query variation until we get results
|
|
430
|
+
stores: list[Store] = []
|
|
431
|
+
for query in query_variations:
|
|
432
|
+
try:
|
|
433
|
+
result_stores = await self._execute_store_search(query, radius_miles)
|
|
434
|
+
attempts.append(SearchAttempt(
|
|
435
|
+
query=query,
|
|
436
|
+
result="success" if result_stores else "no_stores",
|
|
437
|
+
))
|
|
438
|
+
|
|
439
|
+
if result_stores:
|
|
440
|
+
stores = result_stores
|
|
441
|
+
logger.info(
|
|
442
|
+
"Store search successful",
|
|
443
|
+
query=query,
|
|
444
|
+
result_count=len(stores),
|
|
445
|
+
)
|
|
446
|
+
break
|
|
447
|
+
|
|
448
|
+
except Exception as e:
|
|
449
|
+
logger.warning(
|
|
450
|
+
"Store search query failed",
|
|
451
|
+
query=query,
|
|
452
|
+
error=str(e),
|
|
453
|
+
)
|
|
454
|
+
attempts.append(SearchAttempt(query=query, result="error"))
|
|
455
|
+
continue
|
|
456
|
+
|
|
457
|
+
# Step 4: Calculate distances from geocoded point and sort
|
|
458
|
+
if stores and geocoding_result:
|
|
459
|
+
for store in stores:
|
|
460
|
+
if store.latitude is not None and store.longitude is not None:
|
|
461
|
+
store.distance_miles = GeocodingService.haversine_miles(
|
|
462
|
+
geocoding_result.latitude,
|
|
463
|
+
geocoding_result.longitude,
|
|
464
|
+
store.latitude,
|
|
465
|
+
store.longitude,
|
|
466
|
+
)
|
|
467
|
+
# Sort by calculated distance
|
|
468
|
+
stores.sort(
|
|
469
|
+
key=lambda s: (
|
|
470
|
+
s.distance_miles
|
|
471
|
+
if s.distance_miles is not None
|
|
472
|
+
else float("inf")
|
|
473
|
+
)
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
# Step 5: Build response with feedback
|
|
477
|
+
error: str | None = None
|
|
478
|
+
suggestions: list[str] = []
|
|
479
|
+
|
|
480
|
+
if not stores:
|
|
481
|
+
if not geocoding_result:
|
|
482
|
+
error = f"Couldn't locate '{address}'. Try a zip code or street address."
|
|
483
|
+
suggestions = [
|
|
484
|
+
"Use a Texas zip code (e.g., 77007)",
|
|
485
|
+
"Try a specific street address",
|
|
486
|
+
]
|
|
487
|
+
else:
|
|
488
|
+
location = geocoded.display_name if geocoded else address
|
|
489
|
+
error = (
|
|
490
|
+
f"No HEB stores found within {radius_miles} miles of {location}."
|
|
491
|
+
)
|
|
492
|
+
suggestions = [
|
|
493
|
+
"HEB operates primarily in Texas",
|
|
494
|
+
"Try increasing the search radius",
|
|
495
|
+
"Verify this is a Texas location",
|
|
496
|
+
]
|
|
497
|
+
|
|
498
|
+
return StoreSearchResult(
|
|
499
|
+
stores=stores,
|
|
500
|
+
count=len(stores),
|
|
501
|
+
search_address=address,
|
|
502
|
+
geocoded=geocoded,
|
|
503
|
+
attempts=attempts,
|
|
504
|
+
error=error,
|
|
505
|
+
suggestions=suggestions,
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
async def _execute_store_search(
|
|
509
|
+
self,
|
|
510
|
+
query: str,
|
|
511
|
+
radius_miles: int,
|
|
512
|
+
) -> list[Store]:
|
|
513
|
+
"""Execute a single store search query against HEB's API.
|
|
514
|
+
|
|
515
|
+
Args:
|
|
516
|
+
query: Search query (zip, city/state, or address)
|
|
517
|
+
radius_miles: Search radius in miles
|
|
518
|
+
|
|
519
|
+
Returns:
|
|
520
|
+
List of stores (may be empty)
|
|
521
|
+
"""
|
|
522
|
+
data = await self._execute_persisted_query(
|
|
523
|
+
"StoreSearch",
|
|
524
|
+
{
|
|
525
|
+
"address": query,
|
|
526
|
+
"radius": radius_miles,
|
|
527
|
+
"fulfillmentChannels": [],
|
|
528
|
+
"includeEcommInactive": False,
|
|
529
|
+
"retailFormatCodes": ["P", "NP"],
|
|
530
|
+
},
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
stores = []
|
|
534
|
+
# API returns data in searchStoresByAddress.stores (not storeSearch)
|
|
535
|
+
store_search_data = data.get("searchStoresByAddress", {}) or data.get("storeSearch", {})
|
|
536
|
+
store_list = store_search_data.get("stores", [])
|
|
537
|
+
|
|
538
|
+
for store_result in store_list:
|
|
539
|
+
try:
|
|
540
|
+
store = self._parse_store_result(store_result)
|
|
541
|
+
if store:
|
|
542
|
+
stores.append(store)
|
|
543
|
+
except Exception as e:
|
|
544
|
+
logger.debug("Failed to parse store data", error=str(e))
|
|
545
|
+
continue
|
|
546
|
+
|
|
547
|
+
return stores
|
|
548
|
+
|
|
549
|
+
def _parse_store_result(self, store_result: dict[str, Any]) -> Store | None:
|
|
550
|
+
"""Parse store result from searchStoresByAddress response.
|
|
551
|
+
|
|
552
|
+
The API returns results with distanceMiles at top level and
|
|
553
|
+
store details nested in a 'store' object.
|
|
554
|
+
|
|
555
|
+
Args:
|
|
556
|
+
store_result: Store result dict from GraphQL response
|
|
557
|
+
|
|
558
|
+
Returns:
|
|
559
|
+
Store object or None if parsing fails
|
|
560
|
+
"""
|
|
561
|
+
# Distance is at the top level
|
|
562
|
+
distance = store_result.get("distanceMiles")
|
|
563
|
+
|
|
564
|
+
# Store details are nested
|
|
565
|
+
store_data = store_result.get("store", store_result)
|
|
566
|
+
|
|
567
|
+
store_id = store_data.get("storeNumber") or store_data.get("id")
|
|
568
|
+
if not store_id:
|
|
569
|
+
return None
|
|
570
|
+
|
|
571
|
+
name = store_data.get("name", "")
|
|
572
|
+
|
|
573
|
+
# Build address from components (new format uses streetAddress/locality/region)
|
|
574
|
+
address_obj = store_data.get("address", {})
|
|
575
|
+
address_parts = []
|
|
576
|
+
|
|
577
|
+
street = address_obj.get("streetAddress") or store_data.get("address1", "")
|
|
578
|
+
if street:
|
|
579
|
+
address_parts.append(street)
|
|
580
|
+
|
|
581
|
+
city = address_obj.get("locality") or store_data.get("city", "")
|
|
582
|
+
state = address_obj.get("region") or store_data.get("state", "")
|
|
583
|
+
postal_code = address_obj.get("postalCode") or store_data.get("postalCode", "")
|
|
584
|
+
|
|
585
|
+
if city and state:
|
|
586
|
+
address_parts.append(f"{city}, {state} {postal_code}".strip())
|
|
587
|
+
|
|
588
|
+
address = ", ".join(address_parts) if address_parts else ""
|
|
589
|
+
|
|
590
|
+
# Extract coordinates
|
|
591
|
+
latitude = store_data.get("latitude")
|
|
592
|
+
longitude = store_data.get("longitude")
|
|
593
|
+
|
|
594
|
+
# Extract fulfillment channels to determine curbside/delivery support
|
|
595
|
+
# API returns data in storeFulfillments array with objects like {"name": "CURBSIDE_PICKUP"}
|
|
596
|
+
store_fulfillments = store_data.get("storeFulfillments", None)
|
|
597
|
+
if store_fulfillments is not None:
|
|
598
|
+
# Build list of fulfillment channel names
|
|
599
|
+
fulfillment_names = [
|
|
600
|
+
f.get("name", "")
|
|
601
|
+
for f in store_fulfillments
|
|
602
|
+
if isinstance(f, dict)
|
|
603
|
+
]
|
|
604
|
+
# Curbside = any fulfillment containing "CURBSIDE" (CURBSIDE_PICKUP, CURBSIDE_DELIVERY)
|
|
605
|
+
supports_curbside = any("CURBSIDE" in name for name in fulfillment_names)
|
|
606
|
+
# Delivery = ALCOHOL_DELIVERY or DELIVERY channel
|
|
607
|
+
supports_delivery = any(
|
|
608
|
+
"DELIVERY" in name and "CURBSIDE" not in name
|
|
609
|
+
for name in fulfillment_names
|
|
610
|
+
)
|
|
611
|
+
else:
|
|
612
|
+
# Legacy format: check fulfillmentChannels array of strings
|
|
613
|
+
fulfillment_channels = store_data.get("fulfillmentChannels", None)
|
|
614
|
+
if fulfillment_channels is not None:
|
|
615
|
+
supports_curbside = (
|
|
616
|
+
"PICKUP" in fulfillment_channels
|
|
617
|
+
or "CURBSIDE" in fulfillment_channels
|
|
618
|
+
)
|
|
619
|
+
supports_delivery = "DELIVERY" in fulfillment_channels
|
|
620
|
+
else:
|
|
621
|
+
# No fulfillment data - default to True for curbside (most stores support it)
|
|
622
|
+
supports_curbside = True
|
|
623
|
+
supports_delivery = False
|
|
624
|
+
|
|
625
|
+
return Store(
|
|
626
|
+
store_id=str(store_id),
|
|
627
|
+
name=name,
|
|
628
|
+
address=address,
|
|
629
|
+
phone=store_data.get("phone", ""),
|
|
630
|
+
latitude=float(latitude) if latitude else None,
|
|
631
|
+
longitude=float(longitude) if longitude else None,
|
|
632
|
+
distance_miles=float(distance) if distance else None,
|
|
633
|
+
supports_curbside=supports_curbside,
|
|
634
|
+
supports_delivery=supports_delivery,
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
def _parse_store_data(self, store_data: dict[str, Any]) -> Store | None:
|
|
638
|
+
"""Parse store data from legacy StoreSearch response format.
|
|
639
|
+
|
|
640
|
+
Args:
|
|
641
|
+
store_data: Store dict from GraphQL response
|
|
642
|
+
|
|
643
|
+
Returns:
|
|
644
|
+
Store object or None if parsing fails
|
|
645
|
+
"""
|
|
646
|
+
store_id = store_data.get("id") or store_data.get("storeNumber")
|
|
647
|
+
if not store_id:
|
|
648
|
+
return None
|
|
649
|
+
|
|
650
|
+
name = store_data.get("name", "")
|
|
651
|
+
|
|
652
|
+
# Build address from components
|
|
653
|
+
address_obj = store_data.get("address") or {}
|
|
654
|
+
address_parts = []
|
|
655
|
+
address1 = store_data.get("address1") or address_obj.get("address1", "")
|
|
656
|
+
if address1:
|
|
657
|
+
address_parts.append(address1)
|
|
658
|
+
|
|
659
|
+
city = store_data.get("city") or address_obj.get("city", "")
|
|
660
|
+
state = store_data.get("state") or address_obj.get("state", "")
|
|
661
|
+
postal_code = store_data.get("postalCode") or address_obj.get("postalCode", "")
|
|
662
|
+
|
|
663
|
+
if city and state:
|
|
664
|
+
address_parts.append(f"{city}, {state} {postal_code}".strip())
|
|
665
|
+
|
|
666
|
+
address = ", ".join(address_parts) if address_parts else ""
|
|
667
|
+
|
|
668
|
+
# Extract coordinates
|
|
669
|
+
latitude = store_data.get("latitude") or store_data.get("location", {}).get("latitude")
|
|
670
|
+
longitude = store_data.get("longitude") or store_data.get("location", {}).get("longitude")
|
|
671
|
+
|
|
672
|
+
# Extract distance if available
|
|
673
|
+
distance = store_data.get("distance") or store_data.get("distanceFromSearchLocation")
|
|
674
|
+
|
|
675
|
+
return Store(
|
|
676
|
+
store_id=str(store_id),
|
|
677
|
+
name=name,
|
|
678
|
+
address=address,
|
|
679
|
+
phone=store_data.get("phone", ""),
|
|
680
|
+
latitude=float(latitude) if latitude else None,
|
|
681
|
+
longitude=float(longitude) if longitude else None,
|
|
682
|
+
distance_miles=float(distance) if distance else None,
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
def _generate_query_variations(self, query: str) -> list[str]:
|
|
686
|
+
"""Generate query variations to improve search results.
|
|
687
|
+
|
|
688
|
+
HEB's search is sensitive to exact query wording. This generates
|
|
689
|
+
variations to try when the original query returns no results.
|
|
690
|
+
|
|
691
|
+
Args:
|
|
692
|
+
query: Original search query
|
|
693
|
+
|
|
694
|
+
Returns:
|
|
695
|
+
List of query variations to try (original query first)
|
|
696
|
+
"""
|
|
697
|
+
variations = [query] # Always try original first
|
|
698
|
+
query_lower = query.lower()
|
|
699
|
+
|
|
700
|
+
# Expand common abbreviations
|
|
701
|
+
expanded = query
|
|
702
|
+
abbreviations = {
|
|
703
|
+
"ny ": "new york ",
|
|
704
|
+
"NY ": "New York ",
|
|
705
|
+
"heb ": "H-E-B ",
|
|
706
|
+
"HEB ": "H-E-B ",
|
|
707
|
+
}
|
|
708
|
+
for abbrev, full in abbreviations.items():
|
|
709
|
+
if abbrev.lower() in query_lower:
|
|
710
|
+
expanded = query.replace(abbrev.strip(), full.strip())
|
|
711
|
+
if expanded != query:
|
|
712
|
+
variations.append(expanded)
|
|
713
|
+
break
|
|
714
|
+
|
|
715
|
+
# Add "Meal Simple" prefix for meal-related queries
|
|
716
|
+
meal_keywords = ["steak", "chicken", "salmon", "pork", "beef", "shrimp",
|
|
717
|
+
"asparagus", "potato", "meatloaf", "alfredo", "enchilada",
|
|
718
|
+
"jambalaya", "bowl", "dinner", "entree"]
|
|
719
|
+
if (
|
|
720
|
+
any(kw in query_lower for kw in meal_keywords)
|
|
721
|
+
and "meal simple" not in query_lower
|
|
722
|
+
):
|
|
723
|
+
variations.append(f"Meal Simple {query}")
|
|
724
|
+
|
|
725
|
+
# Add "H-E-B" prefix if not present
|
|
726
|
+
if "h-e-b" not in query_lower and "heb" not in query_lower:
|
|
727
|
+
variations.append(f"H-E-B {query}")
|
|
728
|
+
|
|
729
|
+
# Remove duplicates while preserving order
|
|
730
|
+
seen = set()
|
|
731
|
+
unique_variations = []
|
|
732
|
+
for v in variations:
|
|
733
|
+
v_lower = v.lower()
|
|
734
|
+
if v_lower not in seen:
|
|
735
|
+
seen.add(v_lower)
|
|
736
|
+
unique_variations.append(v)
|
|
737
|
+
|
|
738
|
+
return unique_variations
|
|
739
|
+
|
|
740
|
+
def _detect_security_challenge(self, html: str) -> bool:
|
|
741
|
+
"""Detect if response is a WAF/captcha security challenge page.
|
|
742
|
+
|
|
743
|
+
HEB uses Incapsula (Imperva) WAF which may return challenge pages
|
|
744
|
+
instead of actual content when bot detection is triggered.
|
|
745
|
+
|
|
746
|
+
Args:
|
|
747
|
+
html: Response HTML content
|
|
748
|
+
|
|
749
|
+
Returns:
|
|
750
|
+
True if response appears to be a security challenge
|
|
751
|
+
"""
|
|
752
|
+
challenge_indicators = [
|
|
753
|
+
"incapsula",
|
|
754
|
+
"reese84",
|
|
755
|
+
"_Incapsula_Resource",
|
|
756
|
+
"challenge-platform",
|
|
757
|
+
"cf-browser-verification",
|
|
758
|
+
"captcha",
|
|
759
|
+
"blocked",
|
|
760
|
+
"access denied",
|
|
761
|
+
"please verify you are a human",
|
|
762
|
+
"enable javascript and cookies",
|
|
763
|
+
]
|
|
764
|
+
html_lower = html.lower()
|
|
765
|
+
return any(indicator in html_lower for indicator in challenge_indicators)
|
|
766
|
+
|
|
767
|
+
def _determine_fallback_reason(
|
|
768
|
+
self,
|
|
769
|
+
was_authenticated: bool,
|
|
770
|
+
security_challenge: bool,
|
|
771
|
+
attempts: list[ProductSearchAttempt],
|
|
772
|
+
) -> str:
|
|
773
|
+
"""Determine human-readable reason for fallback to typeahead.
|
|
774
|
+
|
|
775
|
+
Args:
|
|
776
|
+
was_authenticated: Whether auth cookies were available
|
|
777
|
+
security_challenge: Whether a security challenge was detected
|
|
778
|
+
attempts: List of search attempts made
|
|
779
|
+
|
|
780
|
+
Returns:
|
|
781
|
+
Human-readable explanation of why fallback was used
|
|
782
|
+
"""
|
|
783
|
+
if not was_authenticated:
|
|
784
|
+
return "No authentication cookies available"
|
|
785
|
+
if security_challenge:
|
|
786
|
+
return (
|
|
787
|
+
"Security challenge (WAF/captcha) blocked API requests. "
|
|
788
|
+
"Use session_refresh (Playwright) to refresh the session."
|
|
789
|
+
)
|
|
790
|
+
if all(a.result == "empty" for a in attempts if a.method in ("ssr", "typeahead_as_ssr")):
|
|
791
|
+
return "All SSR queries returned empty results - product may not exist"
|
|
792
|
+
if all(a.result == "error" for a in attempts if a.method in ("ssr", "typeahead_as_ssr")):
|
|
793
|
+
return "All SSR queries failed with errors"
|
|
794
|
+
return "SSR search unsuccessful"
|
|
795
|
+
|
|
796
|
+
def _get_session_refresh_instructions(self) -> list[str]:
|
|
797
|
+
"""Get Playwright instructions for refreshing the session.
|
|
798
|
+
|
|
799
|
+
When session tokens are stale, use Playwright to refresh
|
|
800
|
+
the bot detection tokens before retrying API calls.
|
|
801
|
+
|
|
802
|
+
Returns:
|
|
803
|
+
Step-by-step instructions for session refresh
|
|
804
|
+
"""
|
|
805
|
+
settings = get_settings()
|
|
806
|
+
return [
|
|
807
|
+
"Session refresh required. Run these Playwright commands:",
|
|
808
|
+
"",
|
|
809
|
+
"1. browser_navigate('https://www.heb.com')",
|
|
810
|
+
"",
|
|
811
|
+
"2. browser_wait_for({ time: 3 }) # Wait for bot detection to initialize",
|
|
812
|
+
"",
|
|
813
|
+
"3. browser_type('[data-qe-id=\"headerSearchInput\"]', 'test')",
|
|
814
|
+
"",
|
|
815
|
+
"4. browser_press_key('Enter')",
|
|
816
|
+
"",
|
|
817
|
+
"5. browser_wait_for({ selector: '[data-qe-id=\"productCard\"]', timeout: 10000 })",
|
|
818
|
+
"",
|
|
819
|
+
(
|
|
820
|
+
"6. browser_run_code with: await page.context().storageState({ path: '"
|
|
821
|
+
f"{settings.auth_state_path}"
|
|
822
|
+
"' })"
|
|
823
|
+
),
|
|
824
|
+
"",
|
|
825
|
+
"Then retry your search.",
|
|
826
|
+
]
|
|
827
|
+
|
|
828
|
+
def _get_playwright_search_instructions(self, query: str, store_id: str) -> list[str]:
|
|
829
|
+
"""Get instructions for using Playwright MCP to perform the search.
|
|
830
|
+
|
|
831
|
+
When security challenges block httpx requests, Playwright can
|
|
832
|
+
bypass them because it runs in a real browser.
|
|
833
|
+
|
|
834
|
+
Args:
|
|
835
|
+
query: Original search query
|
|
836
|
+
store_id: Store ID for context
|
|
837
|
+
|
|
838
|
+
Returns:
|
|
839
|
+
Step-by-step instructions for Playwright-based search
|
|
840
|
+
"""
|
|
841
|
+
encoded_query = query.replace(" ", "+")
|
|
842
|
+
return [
|
|
843
|
+
"Use Playwright MCP to search (bypasses bot detection):",
|
|
844
|
+
"",
|
|
845
|
+
f"1. browser_navigate('https://www.heb.com/search?q={encoded_query}')",
|
|
846
|
+
"",
|
|
847
|
+
"2. Wait for results to load:",
|
|
848
|
+
" browser_wait_for({ selector: '[data-qe-id=\"productCard\"]', timeout: 10000 })",
|
|
849
|
+
"",
|
|
850
|
+
"3. Take a snapshot to see the results:",
|
|
851
|
+
" browser_snapshot()",
|
|
852
|
+
"",
|
|
853
|
+
"4. Extract product data (optional - run in browser):",
|
|
854
|
+
" browser_run_code with:",
|
|
855
|
+
" ```javascript",
|
|
856
|
+
" const products = [...document.querySelectorAll('[data-qe-id=\"productCard\"]')]",
|
|
857
|
+
" .slice(0, 20)",
|
|
858
|
+
" .map(card => ({",
|
|
859
|
+
(
|
|
860
|
+
" name: card.querySelector('[data-qe-id=\"productTitle\"]')"
|
|
861
|
+
"?.textContent?.trim(),"
|
|
862
|
+
),
|
|
863
|
+
(
|
|
864
|
+
" price: card.querySelector('[data-qe-id=\"productPrice\"]')"
|
|
865
|
+
"?.textContent?.trim(),"
|
|
866
|
+
),
|
|
867
|
+
" sku: card.dataset.productId || card.querySelector('[data-sku]')?.dataset?.sku,",
|
|
868
|
+
" }));",
|
|
869
|
+
" return JSON.stringify(products, null, 2);",
|
|
870
|
+
" ```",
|
|
871
|
+
"",
|
|
872
|
+
"5. After browsing, save refreshed session cookies:",
|
|
873
|
+
(
|
|
874
|
+
" browser_run_code with: await page.context().storageState({ path: "
|
|
875
|
+
"'~/.texas-grocery-mcp/auth.json' })"
|
|
876
|
+
),
|
|
877
|
+
]
|
|
878
|
+
|
|
879
|
+
async def search_products(
|
|
880
|
+
self,
|
|
881
|
+
query: str,
|
|
882
|
+
store_id: str,
|
|
883
|
+
limit: int = 20,
|
|
884
|
+
) -> ProductSearchResult:
|
|
885
|
+
"""Search for products at a store.
|
|
886
|
+
|
|
887
|
+
Tries authenticated search first (fast, full data), falls back to
|
|
888
|
+
typeahead suggestions if no auth cookies available. When authenticated
|
|
889
|
+
search returns no results, tries query variations before falling back.
|
|
890
|
+
|
|
891
|
+
Args:
|
|
892
|
+
query: Search query
|
|
893
|
+
store_id: Store ID for inventory/pricing
|
|
894
|
+
limit: Maximum results to return
|
|
895
|
+
|
|
896
|
+
Returns:
|
|
897
|
+
ProductSearchResult with products and diagnostic metadata
|
|
898
|
+
"""
|
|
899
|
+
|
|
900
|
+
attempts: list[ProductSearchAttempt] = []
|
|
901
|
+
security_challenge_detected = False
|
|
902
|
+
search_url = f"https://www.heb.com/search?q={query.replace(' ', '+')}"
|
|
903
|
+
|
|
904
|
+
# Try authenticated search first
|
|
905
|
+
auth_client = await self._get_authenticated_client()
|
|
906
|
+
if auth_client:
|
|
907
|
+
# Generate query variations to try
|
|
908
|
+
query_variations = self._generate_query_variations(query)
|
|
909
|
+
|
|
910
|
+
for variation in query_variations:
|
|
911
|
+
try:
|
|
912
|
+
products, was_challenge = await self._search_products_ssr(
|
|
913
|
+
auth_client, variation, store_id, limit
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
if was_challenge:
|
|
917
|
+
security_challenge_detected = True
|
|
918
|
+
attempts.append(ProductSearchAttempt(
|
|
919
|
+
query=variation,
|
|
920
|
+
method="ssr",
|
|
921
|
+
result="security_challenge",
|
|
922
|
+
))
|
|
923
|
+
logger.error(
|
|
924
|
+
(
|
|
925
|
+
"Security challenge detected - stopping search attempts, "
|
|
926
|
+
"session refresh required"
|
|
927
|
+
),
|
|
928
|
+
query=variation,
|
|
929
|
+
)
|
|
930
|
+
# Fail-fast: don't waste more queries, session needs refresh
|
|
931
|
+
break
|
|
932
|
+
|
|
933
|
+
if products:
|
|
934
|
+
attempts.append(ProductSearchAttempt(
|
|
935
|
+
query=variation,
|
|
936
|
+
method="ssr",
|
|
937
|
+
result="success",
|
|
938
|
+
))
|
|
939
|
+
logger.info(
|
|
940
|
+
"SSR search successful",
|
|
941
|
+
original_query=query,
|
|
942
|
+
effective_query=variation,
|
|
943
|
+
result_count=len(products),
|
|
944
|
+
)
|
|
945
|
+
return ProductSearchResult(
|
|
946
|
+
products=products,
|
|
947
|
+
count=len(products),
|
|
948
|
+
query=query,
|
|
949
|
+
store_id=store_id,
|
|
950
|
+
data_source="ssr",
|
|
951
|
+
authenticated=True,
|
|
952
|
+
attempts=attempts,
|
|
953
|
+
search_url=search_url,
|
|
954
|
+
)
|
|
955
|
+
else:
|
|
956
|
+
attempts.append(ProductSearchAttempt(
|
|
957
|
+
query=variation,
|
|
958
|
+
method="ssr",
|
|
959
|
+
result="empty",
|
|
960
|
+
))
|
|
961
|
+
|
|
962
|
+
except Exception as e:
|
|
963
|
+
attempts.append(ProductSearchAttempt(
|
|
964
|
+
query=variation,
|
|
965
|
+
method="ssr",
|
|
966
|
+
result="error",
|
|
967
|
+
error_detail=str(e),
|
|
968
|
+
))
|
|
969
|
+
logger.warning(
|
|
970
|
+
"Authenticated search failed for variation",
|
|
971
|
+
query=variation,
|
|
972
|
+
error=str(e),
|
|
973
|
+
)
|
|
974
|
+
continue
|
|
975
|
+
|
|
976
|
+
# If all variations failed, try using typeahead suggestions as queries
|
|
977
|
+
# Skip this if security challenge was detected - no point in trying more SSR requests
|
|
978
|
+
if not security_challenge_detected:
|
|
979
|
+
try:
|
|
980
|
+
suggestions = await self.get_typeahead(query)
|
|
981
|
+
if suggestions:
|
|
982
|
+
for suggestion in suggestions[:2]: # Try top 2 suggestions
|
|
983
|
+
try:
|
|
984
|
+
products, was_challenge = await self._search_products_ssr(
|
|
985
|
+
auth_client, suggestion, store_id, limit
|
|
986
|
+
)
|
|
987
|
+
|
|
988
|
+
if was_challenge:
|
|
989
|
+
security_challenge_detected = True
|
|
990
|
+
attempts.append(ProductSearchAttempt(
|
|
991
|
+
query=suggestion,
|
|
992
|
+
method="typeahead_as_ssr",
|
|
993
|
+
result="security_challenge",
|
|
994
|
+
))
|
|
995
|
+
# Fail-fast: don't try more suggestions
|
|
996
|
+
break
|
|
997
|
+
|
|
998
|
+
if products:
|
|
999
|
+
attempts.append(ProductSearchAttempt(
|
|
1000
|
+
query=suggestion,
|
|
1001
|
+
method="typeahead_as_ssr",
|
|
1002
|
+
result="success",
|
|
1003
|
+
))
|
|
1004
|
+
logger.info(
|
|
1005
|
+
"SSR search successful via typeahead suggestion",
|
|
1006
|
+
original_query=query,
|
|
1007
|
+
suggestion_used=suggestion,
|
|
1008
|
+
result_count=len(products),
|
|
1009
|
+
)
|
|
1010
|
+
return ProductSearchResult(
|
|
1011
|
+
products=products,
|
|
1012
|
+
count=len(products),
|
|
1013
|
+
query=query,
|
|
1014
|
+
store_id=store_id,
|
|
1015
|
+
data_source="ssr",
|
|
1016
|
+
authenticated=True,
|
|
1017
|
+
attempts=attempts,
|
|
1018
|
+
search_url=search_url,
|
|
1019
|
+
)
|
|
1020
|
+
else:
|
|
1021
|
+
attempts.append(ProductSearchAttempt(
|
|
1022
|
+
query=suggestion,
|
|
1023
|
+
method="typeahead_as_ssr",
|
|
1024
|
+
result="empty",
|
|
1025
|
+
))
|
|
1026
|
+
|
|
1027
|
+
except Exception as e:
|
|
1028
|
+
attempts.append(ProductSearchAttempt(
|
|
1029
|
+
query=suggestion,
|
|
1030
|
+
method="typeahead_as_ssr",
|
|
1031
|
+
result="error",
|
|
1032
|
+
error_detail=str(e),
|
|
1033
|
+
))
|
|
1034
|
+
continue
|
|
1035
|
+
except Exception as e:
|
|
1036
|
+
logger.debug("Typeahead-guided search failed", error=str(e))
|
|
1037
|
+
|
|
1038
|
+
# Fallback to typeahead suggestions only
|
|
1039
|
+
fallback_reason = self._determine_fallback_reason(
|
|
1040
|
+
was_authenticated=auth_client is not None,
|
|
1041
|
+
security_challenge=security_challenge_detected,
|
|
1042
|
+
attempts=attempts,
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
logger.info(
|
|
1046
|
+
"Product search using typeahead fallback",
|
|
1047
|
+
query=query,
|
|
1048
|
+
store_id=store_id,
|
|
1049
|
+
fallback_reason=fallback_reason,
|
|
1050
|
+
security_challenge=security_challenge_detected,
|
|
1051
|
+
)
|
|
1052
|
+
|
|
1053
|
+
# Get Playwright instructions if security challenge was detected
|
|
1054
|
+
playwright_instructions = None
|
|
1055
|
+
if security_challenge_detected:
|
|
1056
|
+
playwright_instructions = self._get_playwright_search_instructions(query, store_id)
|
|
1057
|
+
|
|
1058
|
+
try:
|
|
1059
|
+
suggestions = await self.get_typeahead(query)
|
|
1060
|
+
except Exception as e:
|
|
1061
|
+
logger.error("Product search failed", query=query, error=str(e))
|
|
1062
|
+
return ProductSearchResult(
|
|
1063
|
+
products=[],
|
|
1064
|
+
count=0,
|
|
1065
|
+
query=query,
|
|
1066
|
+
store_id=store_id,
|
|
1067
|
+
data_source="typeahead_suggestions",
|
|
1068
|
+
authenticated=auth_client is not None,
|
|
1069
|
+
fallback_reason=fallback_reason,
|
|
1070
|
+
security_challenge_detected=security_challenge_detected,
|
|
1071
|
+
attempts=attempts,
|
|
1072
|
+
search_url=search_url,
|
|
1073
|
+
playwright_fallback_available=security_challenge_detected,
|
|
1074
|
+
playwright_instructions=playwright_instructions,
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
# Return suggestions as placeholder products
|
|
1078
|
+
products = []
|
|
1079
|
+
for suggestion in suggestions[:limit]:
|
|
1080
|
+
product = Product(
|
|
1081
|
+
sku=f"suggestion-{suggestion.lower().replace(' ', '-')}",
|
|
1082
|
+
name=suggestion,
|
|
1083
|
+
price=0.0, # Price unavailable via typeahead
|
|
1084
|
+
available=True,
|
|
1085
|
+
brand=None,
|
|
1086
|
+
size=None,
|
|
1087
|
+
price_per_unit=None,
|
|
1088
|
+
image_url=None,
|
|
1089
|
+
aisle=None,
|
|
1090
|
+
on_sale=False,
|
|
1091
|
+
original_price=None,
|
|
1092
|
+
)
|
|
1093
|
+
products.append(product)
|
|
1094
|
+
attempts.append(ProductSearchAttempt(
|
|
1095
|
+
query=suggestion,
|
|
1096
|
+
method="typeahead",
|
|
1097
|
+
result="success",
|
|
1098
|
+
))
|
|
1099
|
+
|
|
1100
|
+
return ProductSearchResult(
|
|
1101
|
+
products=products,
|
|
1102
|
+
count=len(products),
|
|
1103
|
+
query=query,
|
|
1104
|
+
store_id=store_id,
|
|
1105
|
+
data_source="typeahead_suggestions",
|
|
1106
|
+
authenticated=auth_client is not None,
|
|
1107
|
+
fallback_reason=fallback_reason,
|
|
1108
|
+
security_challenge_detected=security_challenge_detected,
|
|
1109
|
+
attempts=attempts,
|
|
1110
|
+
search_url=search_url,
|
|
1111
|
+
playwright_fallback_available=security_challenge_detected,
|
|
1112
|
+
playwright_instructions=playwright_instructions,
|
|
1113
|
+
)
|
|
1114
|
+
|
|
1115
|
+
# ========================================================================
|
|
1116
|
+
# Product Details
|
|
1117
|
+
# ========================================================================
|
|
1118
|
+
|
|
1119
|
+
async def get_product_details(
|
|
1120
|
+
self,
|
|
1121
|
+
product_id: str,
|
|
1122
|
+
store_id: str | None = None,
|
|
1123
|
+
) -> ProductDetails | None:
|
|
1124
|
+
"""Get comprehensive details for a single product.
|
|
1125
|
+
|
|
1126
|
+
Fetches the product detail page via SSR and extracts full product
|
|
1127
|
+
information including ingredients, nutrition, warnings, and instructions.
|
|
1128
|
+
|
|
1129
|
+
Results are cached for 24 hours to reduce API calls since product
|
|
1130
|
+
details rarely change.
|
|
1131
|
+
|
|
1132
|
+
Args:
|
|
1133
|
+
product_id: The product ID (e.g., '127074')
|
|
1134
|
+
store_id: Optional store ID (uses session's store if not provided)
|
|
1135
|
+
|
|
1136
|
+
Returns:
|
|
1137
|
+
ProductDetails with full product information, or None if not found
|
|
1138
|
+
"""
|
|
1139
|
+
|
|
1140
|
+
# Check cache first
|
|
1141
|
+
cache_key = f"{product_id}:{store_id or 'default'}"
|
|
1142
|
+
cached = self._product_details_cache.get(cache_key)
|
|
1143
|
+
if cached:
|
|
1144
|
+
logger.info(
|
|
1145
|
+
"Product details cache hit",
|
|
1146
|
+
product_id=product_id,
|
|
1147
|
+
name=cached.name,
|
|
1148
|
+
)
|
|
1149
|
+
return cached
|
|
1150
|
+
|
|
1151
|
+
# Pre-fetch build ID before getting auth client
|
|
1152
|
+
# (prevents client lifecycle issues since _get_build_id may create a client)
|
|
1153
|
+
await self._get_build_id()
|
|
1154
|
+
|
|
1155
|
+
auth_client = await self._get_authenticated_client()
|
|
1156
|
+
if not auth_client:
|
|
1157
|
+
logger.warning("No authenticated client for product details")
|
|
1158
|
+
# Try with unauthenticated client as fallback
|
|
1159
|
+
auth_client = await self._get_client()
|
|
1160
|
+
|
|
1161
|
+
try:
|
|
1162
|
+
details = await self._get_product_details_ssr(auth_client, product_id)
|
|
1163
|
+
if details:
|
|
1164
|
+
# Cache the result
|
|
1165
|
+
self._product_details_cache.set(cache_key, details)
|
|
1166
|
+
logger.info(
|
|
1167
|
+
"Product details fetched and cached",
|
|
1168
|
+
product_id=product_id,
|
|
1169
|
+
name=details.name,
|
|
1170
|
+
)
|
|
1171
|
+
return details
|
|
1172
|
+
except Exception as e:
|
|
1173
|
+
logger.error(
|
|
1174
|
+
"Failed to get product details",
|
|
1175
|
+
product_id=product_id,
|
|
1176
|
+
error=str(e),
|
|
1177
|
+
)
|
|
1178
|
+
return None
|
|
1179
|
+
|
|
1180
|
+
def get_product_details_cache_stats(self) -> dict[str, Any]:
|
|
1181
|
+
"""Get statistics about the product details cache.
|
|
1182
|
+
|
|
1183
|
+
Returns:
|
|
1184
|
+
Dict with cache stats (size, valid_entries, ttl_hours, etc.)
|
|
1185
|
+
"""
|
|
1186
|
+
return self._product_details_cache.stats()
|
|
1187
|
+
|
|
1188
|
+
def clear_product_details_cache(self) -> None:
|
|
1189
|
+
"""Clear the product details cache."""
|
|
1190
|
+
self._product_details_cache.clear()
|
|
1191
|
+
|
|
1192
|
+
@with_retry(config=RetryConfig(max_attempts=2, base_delay=0.5))
|
|
1193
|
+
async def _get_product_details_ssr(
|
|
1194
|
+
self,
|
|
1195
|
+
client: httpx.AsyncClient,
|
|
1196
|
+
product_id: str,
|
|
1197
|
+
) -> ProductDetails | None:
|
|
1198
|
+
"""Fetch product details via SSR data endpoint.
|
|
1199
|
+
|
|
1200
|
+
Args:
|
|
1201
|
+
client: HTTP client (authenticated preferred)
|
|
1202
|
+
product_id: Product ID to fetch
|
|
1203
|
+
|
|
1204
|
+
Returns:
|
|
1205
|
+
ProductDetails or None if not found/error
|
|
1206
|
+
"""
|
|
1207
|
+
|
|
1208
|
+
async with self._ssr_throttler:
|
|
1209
|
+
self.circuit_breaker.check()
|
|
1210
|
+
|
|
1211
|
+
# Get build ID for SSR endpoint
|
|
1212
|
+
build_id = await self._get_build_id()
|
|
1213
|
+
|
|
1214
|
+
url = f"https://www.heb.com/_next/data/{build_id}/en/product-detail/{product_id}.json"
|
|
1215
|
+
logger.debug("Fetching product details SSR", url=url, product_id=product_id)
|
|
1216
|
+
|
|
1217
|
+
try:
|
|
1218
|
+
response = await client.get(url)
|
|
1219
|
+
|
|
1220
|
+
# 404 means product doesn't exist
|
|
1221
|
+
if response.status_code == 404:
|
|
1222
|
+
logger.info("Product not found", product_id=product_id)
|
|
1223
|
+
return None
|
|
1224
|
+
|
|
1225
|
+
response.raise_for_status()
|
|
1226
|
+
|
|
1227
|
+
# Check for security challenge
|
|
1228
|
+
if response.headers.get(
|
|
1229
|
+
"content-type", ""
|
|
1230
|
+
).startswith("text/html") and self._detect_security_challenge(
|
|
1231
|
+
response.text
|
|
1232
|
+
):
|
|
1233
|
+
logger.warning(
|
|
1234
|
+
"Security challenge detected in product details response",
|
|
1235
|
+
product_id=product_id,
|
|
1236
|
+
)
|
|
1237
|
+
return None
|
|
1238
|
+
|
|
1239
|
+
data = response.json()
|
|
1240
|
+
|
|
1241
|
+
# Try standard Next.js SSR structure first
|
|
1242
|
+
product_data = data.get("pageProps", {}).get("product")
|
|
1243
|
+
|
|
1244
|
+
# Fallback to props wrapper if needed
|
|
1245
|
+
if not product_data:
|
|
1246
|
+
product_data = data.get("props", {}).get("pageProps", {}).get("product")
|
|
1247
|
+
|
|
1248
|
+
if not product_data:
|
|
1249
|
+
page_props_keys = (
|
|
1250
|
+
list(data.get("pageProps", {}).keys())
|
|
1251
|
+
if "pageProps" in data
|
|
1252
|
+
else None
|
|
1253
|
+
)
|
|
1254
|
+
logger.warning(
|
|
1255
|
+
"No product data in response",
|
|
1256
|
+
product_id=product_id,
|
|
1257
|
+
response_keys=list(data.keys()),
|
|
1258
|
+
pageProps_keys=page_props_keys,
|
|
1259
|
+
)
|
|
1260
|
+
return None
|
|
1261
|
+
|
|
1262
|
+
self.circuit_breaker.record_success()
|
|
1263
|
+
return self._parse_product_details(product_data)
|
|
1264
|
+
|
|
1265
|
+
except httpx.HTTPStatusError as e:
|
|
1266
|
+
logger.error(
|
|
1267
|
+
"HTTP error fetching product details",
|
|
1268
|
+
product_id=product_id,
|
|
1269
|
+
status=e.response.status_code,
|
|
1270
|
+
)
|
|
1271
|
+
self.circuit_breaker.record_failure()
|
|
1272
|
+
return None
|
|
1273
|
+
except Exception as e:
|
|
1274
|
+
logger.error(
|
|
1275
|
+
"Error fetching product details",
|
|
1276
|
+
product_id=product_id,
|
|
1277
|
+
error=str(e),
|
|
1278
|
+
)
|
|
1279
|
+
self.circuit_breaker.record_failure()
|
|
1280
|
+
raise
|
|
1281
|
+
|
|
1282
|
+
def _parse_product_details(self, data: dict[str, Any]) -> ProductDetails:
|
|
1283
|
+
"""Parse product detail JSON into ProductDetails model.
|
|
1284
|
+
|
|
1285
|
+
Args:
|
|
1286
|
+
data: Raw product data from __NEXT_DATA__ pageProps.product
|
|
1287
|
+
|
|
1288
|
+
Returns:
|
|
1289
|
+
Parsed ProductDetails model
|
|
1290
|
+
"""
|
|
1291
|
+
from texas_grocery_mcp.models.product import (
|
|
1292
|
+
ExtendedNutrition,
|
|
1293
|
+
ProductDetails,
|
|
1294
|
+
)
|
|
1295
|
+
|
|
1296
|
+
# Extract basic info
|
|
1297
|
+
product_id = str(data.get("id", ""))
|
|
1298
|
+
name = data.get("fullDisplayName", "")
|
|
1299
|
+
|
|
1300
|
+
# Extract SKU info (use first SKU)
|
|
1301
|
+
skus = data.get("SKUs", [])
|
|
1302
|
+
sku_data = skus[0] if skus else {}
|
|
1303
|
+
sku = str(sku_data.get("id", ""))
|
|
1304
|
+
upc = sku_data.get("twelveDigitUPC")
|
|
1305
|
+
size = sku_data.get("customerFriendlySize")
|
|
1306
|
+
|
|
1307
|
+
# Extract brand
|
|
1308
|
+
brand_info = data.get("brand", {})
|
|
1309
|
+
brand = brand_info.get("name") if brand_info else None
|
|
1310
|
+
is_own_brand = brand_info.get("isOwnBrand", False) if brand_info else False
|
|
1311
|
+
|
|
1312
|
+
# Extract pricing from context prices
|
|
1313
|
+
price = 0.0
|
|
1314
|
+
price_online = None
|
|
1315
|
+
on_sale = False
|
|
1316
|
+
is_price_cut = False
|
|
1317
|
+
price_per_unit = None
|
|
1318
|
+
|
|
1319
|
+
context_prices = sku_data.get("contextPrices", [])
|
|
1320
|
+
for cp in context_prices:
|
|
1321
|
+
context = cp.get("context", "")
|
|
1322
|
+
list_price = cp.get("listPrice", {}).get("amount", 0.0)
|
|
1323
|
+
sale_price = cp.get("salePrice", {}).get("amount", list_price)
|
|
1324
|
+
unit_price = cp.get("unitListPrice", {})
|
|
1325
|
+
|
|
1326
|
+
if context == "CURBSIDE":
|
|
1327
|
+
price = sale_price if cp.get("isOnSale") else list_price
|
|
1328
|
+
on_sale = cp.get("isOnSale", False)
|
|
1329
|
+
is_price_cut = cp.get("isPriceCut", False)
|
|
1330
|
+
if unit_price:
|
|
1331
|
+
formatted_amount = unit_price.get("formattedAmount", "")
|
|
1332
|
+
unit = unit_price.get("unit", "")
|
|
1333
|
+
price_per_unit = f"{formatted_amount} / {unit}"
|
|
1334
|
+
elif context == "ONLINE":
|
|
1335
|
+
price_online = sale_price if cp.get("isOnSale") else list_price
|
|
1336
|
+
|
|
1337
|
+
# Extract availability
|
|
1338
|
+
inventory = data.get("inventory", {})
|
|
1339
|
+
available = inventory.get("inventoryState") == "IN_STOCK"
|
|
1340
|
+
|
|
1341
|
+
# Extract availability channels
|
|
1342
|
+
availability_channels = sku_data.get("productAvailability", [])
|
|
1343
|
+
|
|
1344
|
+
# Extract ingredients (string, not list)
|
|
1345
|
+
ingredients = data.get("ingredientStatement")
|
|
1346
|
+
|
|
1347
|
+
# Extract safety warning
|
|
1348
|
+
safety_warning = data.get("safetyWarning")
|
|
1349
|
+
|
|
1350
|
+
# Extract instructions
|
|
1351
|
+
instructions = data.get("preparationInstructions")
|
|
1352
|
+
|
|
1353
|
+
# Extract dietary attributes from lifestyles
|
|
1354
|
+
lifestyles = data.get("lifestyles", [])
|
|
1355
|
+
dietary_attributes = [
|
|
1356
|
+
lifestyle.get("formattedName", "")
|
|
1357
|
+
for lifestyle in lifestyles
|
|
1358
|
+
if lifestyle.get("formattedName")
|
|
1359
|
+
]
|
|
1360
|
+
|
|
1361
|
+
# Extract nutrition labels
|
|
1362
|
+
nutrition = None
|
|
1363
|
+
nutrition_labels = data.get("nutritionLabels", [])
|
|
1364
|
+
if nutrition_labels:
|
|
1365
|
+
nl = nutrition_labels[0]
|
|
1366
|
+
nutrients = self._parse_nutrients(nl.get("nutrients", []))
|
|
1367
|
+
vitamins = self._parse_nutrients(nl.get("vitaminsAndMinerals", []))
|
|
1368
|
+
|
|
1369
|
+
nutrition = ExtendedNutrition(
|
|
1370
|
+
serving_size=nl.get("servingSize"),
|
|
1371
|
+
servings_per_container=nl.get("servingsPerContainer"),
|
|
1372
|
+
calories=nl.get("calories"),
|
|
1373
|
+
label_modifier=nl.get("labelModifier"),
|
|
1374
|
+
nutrients=nutrients,
|
|
1375
|
+
vitamins_and_minerals=vitamins,
|
|
1376
|
+
)
|
|
1377
|
+
|
|
1378
|
+
# Extract category path from breadcrumbs
|
|
1379
|
+
breadcrumbs = data.get("breadcrumbs", [])
|
|
1380
|
+
category_path = [b.get("title", "") for b in breadcrumbs if b.get("title")]
|
|
1381
|
+
# Remove "H-E-B" from path if present (it's always first)
|
|
1382
|
+
if category_path and category_path[0] == "H-E-B":
|
|
1383
|
+
category_path = category_path[1:]
|
|
1384
|
+
|
|
1385
|
+
# Extract images
|
|
1386
|
+
image_url = None
|
|
1387
|
+
product_images = data.get("productImageUrls", [])
|
|
1388
|
+
if product_images:
|
|
1389
|
+
# Prefer MEDIUM size
|
|
1390
|
+
for img in product_images:
|
|
1391
|
+
if img.get("size") == "MEDIUM":
|
|
1392
|
+
image_url = img.get("url")
|
|
1393
|
+
break
|
|
1394
|
+
if not image_url and product_images:
|
|
1395
|
+
image_url = product_images[0].get("url")
|
|
1396
|
+
|
|
1397
|
+
images = data.get("carouselImageUrls", [])
|
|
1398
|
+
|
|
1399
|
+
# Extract location
|
|
1400
|
+
location = None
|
|
1401
|
+
product_location = data.get("productLocation", {})
|
|
1402
|
+
if product_location:
|
|
1403
|
+
location = product_location.get("location")
|
|
1404
|
+
|
|
1405
|
+
# Extract store ID
|
|
1406
|
+
store_id = data.get("storeId")
|
|
1407
|
+
|
|
1408
|
+
# Extract SNAP eligibility
|
|
1409
|
+
is_snap_eligible = data.get("isEbtSnapProduct", False)
|
|
1410
|
+
|
|
1411
|
+
# Extract product URL
|
|
1412
|
+
product_url = data.get("productPageURL")
|
|
1413
|
+
|
|
1414
|
+
# Extract description
|
|
1415
|
+
description = data.get("productDescription")
|
|
1416
|
+
|
|
1417
|
+
return ProductDetails(
|
|
1418
|
+
product_id=product_id,
|
|
1419
|
+
sku=sku,
|
|
1420
|
+
upc=upc,
|
|
1421
|
+
name=name,
|
|
1422
|
+
description=description,
|
|
1423
|
+
brand=brand,
|
|
1424
|
+
is_own_brand=is_own_brand,
|
|
1425
|
+
price=price,
|
|
1426
|
+
price_online=price_online,
|
|
1427
|
+
on_sale=on_sale,
|
|
1428
|
+
is_price_cut=is_price_cut,
|
|
1429
|
+
available=available,
|
|
1430
|
+
price_per_unit=price_per_unit,
|
|
1431
|
+
size=size,
|
|
1432
|
+
ingredients=ingredients,
|
|
1433
|
+
safety_warning=safety_warning,
|
|
1434
|
+
instructions=instructions,
|
|
1435
|
+
dietary_attributes=dietary_attributes,
|
|
1436
|
+
nutrition=nutrition,
|
|
1437
|
+
category_path=category_path,
|
|
1438
|
+
image_url=image_url,
|
|
1439
|
+
images=images,
|
|
1440
|
+
location=location,
|
|
1441
|
+
store_id=store_id,
|
|
1442
|
+
availability_channels=availability_channels,
|
|
1443
|
+
is_snap_eligible=is_snap_eligible,
|
|
1444
|
+
product_url=product_url,
|
|
1445
|
+
)
|
|
1446
|
+
|
|
1447
|
+
def _parse_nutrients(self, nutrients_data: list[dict[str, Any]]) -> list[NutrientInfo]:
|
|
1448
|
+
"""Parse nutrients list with nested sub_items.
|
|
1449
|
+
|
|
1450
|
+
Args:
|
|
1451
|
+
nutrients_data: List of nutrient dicts from API
|
|
1452
|
+
|
|
1453
|
+
Returns:
|
|
1454
|
+
List of NutrientInfo models
|
|
1455
|
+
"""
|
|
1456
|
+
from texas_grocery_mcp.models.product import NutrientInfo
|
|
1457
|
+
|
|
1458
|
+
result = []
|
|
1459
|
+
for n in nutrients_data:
|
|
1460
|
+
sub_items = None
|
|
1461
|
+
if n.get("subItems"):
|
|
1462
|
+
sub_items = self._parse_nutrients(n["subItems"])
|
|
1463
|
+
|
|
1464
|
+
result.append(NutrientInfo(
|
|
1465
|
+
title=n.get("title", ""),
|
|
1466
|
+
unit=n.get("unit", ""),
|
|
1467
|
+
percentage=n.get("percentage"),
|
|
1468
|
+
font_modifier=n.get("fontModifier"),
|
|
1469
|
+
sub_items=sub_items,
|
|
1470
|
+
))
|
|
1471
|
+
return result
|
|
1472
|
+
|
|
1473
|
+
@with_retry(config=RetryConfig(max_attempts=2, base_delay=0.5))
|
|
1474
|
+
async def _search_products_ssr(
|
|
1475
|
+
self,
|
|
1476
|
+
client: httpx.AsyncClient,
|
|
1477
|
+
query: str,
|
|
1478
|
+
store_id: str,
|
|
1479
|
+
limit: int = 20,
|
|
1480
|
+
) -> tuple[list[Product], bool]:
|
|
1481
|
+
"""Search products using authenticated SSR page fetch.
|
|
1482
|
+
|
|
1483
|
+
Fetches the search results page HTML and extracts product data
|
|
1484
|
+
from the embedded __NEXT_DATA__ JSON.
|
|
1485
|
+
|
|
1486
|
+
Args:
|
|
1487
|
+
client: Authenticated httpx client with cookies
|
|
1488
|
+
query: Search query
|
|
1489
|
+
store_id: Store ID (used for context)
|
|
1490
|
+
limit: Maximum results to return
|
|
1491
|
+
|
|
1492
|
+
Returns:
|
|
1493
|
+
Tuple of (products list, security_challenge_detected)
|
|
1494
|
+
"""
|
|
1495
|
+
async with self._ssr_throttler:
|
|
1496
|
+
self.circuit_breaker.check()
|
|
1497
|
+
|
|
1498
|
+
url = f"https://www.heb.com/search?q={query.replace(' ', '+')}"
|
|
1499
|
+
logger.debug("Fetching SSR search results", url=url)
|
|
1500
|
+
|
|
1501
|
+
try:
|
|
1502
|
+
response = await client.get(url)
|
|
1503
|
+
response.raise_for_status()
|
|
1504
|
+
|
|
1505
|
+
# Check for security challenge before parsing
|
|
1506
|
+
if self._detect_security_challenge(response.text):
|
|
1507
|
+
logger.warning(
|
|
1508
|
+
"Security challenge detected in SSR response",
|
|
1509
|
+
query=query,
|
|
1510
|
+
response_length=len(response.text),
|
|
1511
|
+
)
|
|
1512
|
+
self.circuit_breaker.record_failure()
|
|
1513
|
+
return [], True
|
|
1514
|
+
|
|
1515
|
+
# Extract __NEXT_DATA__ JSON from HTML
|
|
1516
|
+
match = re.search(
|
|
1517
|
+
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
|
|
1518
|
+
response.text,
|
|
1519
|
+
re.DOTALL,
|
|
1520
|
+
)
|
|
1521
|
+
|
|
1522
|
+
if not match:
|
|
1523
|
+
logger.warning(
|
|
1524
|
+
"No __NEXT_DATA__ found in response",
|
|
1525
|
+
query=query,
|
|
1526
|
+
response_length=len(response.text),
|
|
1527
|
+
)
|
|
1528
|
+
return [], False
|
|
1529
|
+
|
|
1530
|
+
next_data = json.loads(match.group(1))
|
|
1531
|
+
products = self._parse_ssr_products(next_data, limit)
|
|
1532
|
+
|
|
1533
|
+
self.circuit_breaker.record_success()
|
|
1534
|
+
logger.info(
|
|
1535
|
+
"SSR product search successful",
|
|
1536
|
+
query=query,
|
|
1537
|
+
result_count=len(products),
|
|
1538
|
+
)
|
|
1539
|
+
|
|
1540
|
+
return products, False
|
|
1541
|
+
|
|
1542
|
+
except httpx.HTTPError as e:
|
|
1543
|
+
self.circuit_breaker.record_failure()
|
|
1544
|
+
logger.error("SSR search request failed", query=query, error=str(e))
|
|
1545
|
+
raise
|
|
1546
|
+
|
|
1547
|
+
def _parse_ssr_products(self, next_data: dict[str, Any], limit: int = 20) -> list[Product]:
|
|
1548
|
+
"""Parse products from Next.js SSR data.
|
|
1549
|
+
|
|
1550
|
+
Extracts product data from the searchGridV2 component in the
|
|
1551
|
+
page props layout.
|
|
1552
|
+
|
|
1553
|
+
Args:
|
|
1554
|
+
next_data: Parsed __NEXT_DATA__ JSON
|
|
1555
|
+
limit: Maximum products to return
|
|
1556
|
+
|
|
1557
|
+
Returns:
|
|
1558
|
+
List of Product objects
|
|
1559
|
+
"""
|
|
1560
|
+
products: list[Product] = []
|
|
1561
|
+
|
|
1562
|
+
try:
|
|
1563
|
+
# Navigate to search grid items
|
|
1564
|
+
layout = next_data.get("props", {}).get("pageProps", {}).get("layout", {})
|
|
1565
|
+
visual_components = layout.get("visualComponents", [])
|
|
1566
|
+
|
|
1567
|
+
# Find searchGridV2 component
|
|
1568
|
+
search_grid = None
|
|
1569
|
+
for component in visual_components:
|
|
1570
|
+
if component.get("type") == "searchGridV2":
|
|
1571
|
+
search_grid = component
|
|
1572
|
+
break
|
|
1573
|
+
|
|
1574
|
+
if not search_grid:
|
|
1575
|
+
logger.debug("No searchGridV2 component found")
|
|
1576
|
+
return []
|
|
1577
|
+
|
|
1578
|
+
items = search_grid.get("items", [])
|
|
1579
|
+
|
|
1580
|
+
for item in items[:limit]:
|
|
1581
|
+
try:
|
|
1582
|
+
product = self._parse_ssr_product_item(item)
|
|
1583
|
+
if product:
|
|
1584
|
+
products.append(product)
|
|
1585
|
+
except Exception as e:
|
|
1586
|
+
logger.debug("Failed to parse product item", error=str(e))
|
|
1587
|
+
continue
|
|
1588
|
+
|
|
1589
|
+
except Exception as e:
|
|
1590
|
+
logger.error("Failed to parse SSR products", error=str(e))
|
|
1591
|
+
|
|
1592
|
+
return products
|
|
1593
|
+
|
|
1594
|
+
def _parse_ssr_product_item(self, item: dict[str, Any]) -> Product | None:
|
|
1595
|
+
"""Parse a single product item from SSR data.
|
|
1596
|
+
|
|
1597
|
+
Args:
|
|
1598
|
+
item: Product item dict from searchGridV2.items
|
|
1599
|
+
|
|
1600
|
+
Returns:
|
|
1601
|
+
Product object or None if parsing fails
|
|
1602
|
+
"""
|
|
1603
|
+
if item.get("__typename") != "Product":
|
|
1604
|
+
return None
|
|
1605
|
+
|
|
1606
|
+
# Extract basic info
|
|
1607
|
+
product_id = item.get("id", "")
|
|
1608
|
+
display_name = item.get("fullDisplayName") or item.get("displayName", "")
|
|
1609
|
+
|
|
1610
|
+
# Extract brand
|
|
1611
|
+
brand_info = item.get("brand", {})
|
|
1612
|
+
brand = brand_info.get("name") if brand_info else None
|
|
1613
|
+
|
|
1614
|
+
# Extract SKU and pricing
|
|
1615
|
+
skus = item.get("SKUs", [])
|
|
1616
|
+
sku_data = skus[0] if skus else {}
|
|
1617
|
+
sku_id = sku_data.get("id", "")
|
|
1618
|
+
size = sku_data.get("customerFriendlySize", "")
|
|
1619
|
+
|
|
1620
|
+
# Get pricing (prefer CURBSIDE context, fallback to ONLINE)
|
|
1621
|
+
price = 0.0
|
|
1622
|
+
price_per_unit = None
|
|
1623
|
+
on_sale = False
|
|
1624
|
+
original_price = None
|
|
1625
|
+
|
|
1626
|
+
context_prices = sku_data.get("contextPrices", [])
|
|
1627
|
+
for ctx_price in context_prices:
|
|
1628
|
+
context = ctx_price.get("context", "")
|
|
1629
|
+
if context in ("CURBSIDE", "CURBSIDE_PICKUP", "ONLINE"):
|
|
1630
|
+
list_price = ctx_price.get("listPrice", {})
|
|
1631
|
+
sale_price = ctx_price.get("salePrice", {})
|
|
1632
|
+
unit_price = ctx_price.get("unitListPrice", {})
|
|
1633
|
+
|
|
1634
|
+
price = sale_price.get("amount", 0.0) or list_price.get("amount", 0.0)
|
|
1635
|
+
|
|
1636
|
+
if unit_price:
|
|
1637
|
+
unit_amount = unit_price.get("amount", 0.0)
|
|
1638
|
+
unit_type = unit_price.get("unit", "")
|
|
1639
|
+
if unit_amount and unit_type:
|
|
1640
|
+
price_per_unit = f"${unit_amount:.2f}/{unit_type}"
|
|
1641
|
+
|
|
1642
|
+
on_sale = ctx_price.get("isOnSale", False) or ctx_price.get("isPriceCut", False)
|
|
1643
|
+
if on_sale:
|
|
1644
|
+
original_price = list_price.get("amount")
|
|
1645
|
+
|
|
1646
|
+
break # Use first matching context
|
|
1647
|
+
|
|
1648
|
+
# Extract inventory
|
|
1649
|
+
inventory = item.get("inventory", {})
|
|
1650
|
+
inventory_state = inventory.get("inventoryState", "")
|
|
1651
|
+
available = inventory_state == "IN_STOCK"
|
|
1652
|
+
|
|
1653
|
+
# Extract image URL
|
|
1654
|
+
images = item.get("productImageUrls", [])
|
|
1655
|
+
image_url = None
|
|
1656
|
+
for img in images:
|
|
1657
|
+
if img.get("size") == "MEDIUM":
|
|
1658
|
+
image_url = img.get("url")
|
|
1659
|
+
break
|
|
1660
|
+
if not image_url and images:
|
|
1661
|
+
image_url = images[0].get("url")
|
|
1662
|
+
|
|
1663
|
+
# Extract aisle/location
|
|
1664
|
+
location = item.get("productLocation", {})
|
|
1665
|
+
aisle = location.get("location") if location else None
|
|
1666
|
+
|
|
1667
|
+
# Extract coupon flag
|
|
1668
|
+
has_coupon = item.get("showCouponFlag", False)
|
|
1669
|
+
|
|
1670
|
+
return Product(
|
|
1671
|
+
sku=sku_id or product_id,
|
|
1672
|
+
product_id=product_id, # Store product ID separately for cart operations
|
|
1673
|
+
name=display_name,
|
|
1674
|
+
price=price,
|
|
1675
|
+
available=available,
|
|
1676
|
+
brand=brand,
|
|
1677
|
+
size=size,
|
|
1678
|
+
price_per_unit=price_per_unit,
|
|
1679
|
+
image_url=image_url,
|
|
1680
|
+
aisle=aisle,
|
|
1681
|
+
on_sale=on_sale,
|
|
1682
|
+
original_price=original_price,
|
|
1683
|
+
has_coupon=has_coupon,
|
|
1684
|
+
)
|
|
1685
|
+
|
|
1686
|
+
async def get_categories(self) -> list[dict[str, Any]]:
|
|
1687
|
+
"""Get shop navigation categories.
|
|
1688
|
+
|
|
1689
|
+
Returns:
|
|
1690
|
+
List of category dictionaries with id, name, href, and subcategories
|
|
1691
|
+
"""
|
|
1692
|
+
try:
|
|
1693
|
+
data = await self._execute_persisted_query("ShopNavigation", {})
|
|
1694
|
+
categories = data.get("shopNavigation", [])
|
|
1695
|
+
return [
|
|
1696
|
+
{
|
|
1697
|
+
"id": cat.get("id"),
|
|
1698
|
+
"name": cat.get("displayName"),
|
|
1699
|
+
"href": cat.get("href"),
|
|
1700
|
+
"subcategories": [
|
|
1701
|
+
{"id": sub.get("id"), "name": sub.get("displayName")}
|
|
1702
|
+
for sub in cat.get("subCategories", [])
|
|
1703
|
+
],
|
|
1704
|
+
}
|
|
1705
|
+
for cat in categories
|
|
1706
|
+
]
|
|
1707
|
+
except Exception as e:
|
|
1708
|
+
logger.error("Failed to get categories", error=str(e))
|
|
1709
|
+
return []
|
|
1710
|
+
|
|
1711
|
+
async def get_typeahead(self, term: str) -> list[str]:
|
|
1712
|
+
"""Get search suggestions for a term.
|
|
1713
|
+
|
|
1714
|
+
Args:
|
|
1715
|
+
term: Partial search term
|
|
1716
|
+
|
|
1717
|
+
Returns:
|
|
1718
|
+
List of suggested search terms
|
|
1719
|
+
"""
|
|
1720
|
+
try:
|
|
1721
|
+
data = await self._execute_persisted_query(
|
|
1722
|
+
"typeaheadContent",
|
|
1723
|
+
{"term": term, "searchMode": "MAIN_SEARCH"},
|
|
1724
|
+
)
|
|
1725
|
+
|
|
1726
|
+
suggestions = []
|
|
1727
|
+
content = data.get("typeaheadContent", {})
|
|
1728
|
+
vertical_stack = content.get("verticalStack", [])
|
|
1729
|
+
|
|
1730
|
+
for section in vertical_stack:
|
|
1731
|
+
typename = section.get("__typename", "")
|
|
1732
|
+
if "SuggestedSearches" in typename:
|
|
1733
|
+
suggestions.extend(section.get("terms", []))
|
|
1734
|
+
elif "TrendingSearches" in typename:
|
|
1735
|
+
suggestions.extend(section.get("trendingSearches", []))
|
|
1736
|
+
|
|
1737
|
+
return suggestions
|
|
1738
|
+
|
|
1739
|
+
except Exception as e:
|
|
1740
|
+
logger.error("Typeahead failed", term=term, error=str(e))
|
|
1741
|
+
return []
|
|
1742
|
+
|
|
1743
|
+
async def add_to_cart(
|
|
1744
|
+
self,
|
|
1745
|
+
product_id: str,
|
|
1746
|
+
sku_id: str,
|
|
1747
|
+
quantity: int = 1,
|
|
1748
|
+
) -> dict[str, Any]:
|
|
1749
|
+
"""Add an item to the cart using authenticated GraphQL.
|
|
1750
|
+
|
|
1751
|
+
Requires authentication cookies to be available.
|
|
1752
|
+
|
|
1753
|
+
Args:
|
|
1754
|
+
product_id: The product ID
|
|
1755
|
+
sku_id: The SKU ID
|
|
1756
|
+
quantity: Number to add
|
|
1757
|
+
|
|
1758
|
+
Returns:
|
|
1759
|
+
Cart response data or error dict if not authenticated
|
|
1760
|
+
"""
|
|
1761
|
+
auth_client = await self._get_authenticated_client()
|
|
1762
|
+
if not auth_client:
|
|
1763
|
+
return {"error": True, "code": "NOT_AUTHENTICATED", "message": "Login required"}
|
|
1764
|
+
|
|
1765
|
+
return await self._execute_persisted_query_with_client(
|
|
1766
|
+
auth_client,
|
|
1767
|
+
"cartItemV2",
|
|
1768
|
+
{
|
|
1769
|
+
"userIsLoggedIn": True,
|
|
1770
|
+
"productId": product_id,
|
|
1771
|
+
"skuId": sku_id,
|
|
1772
|
+
"quantity": quantity,
|
|
1773
|
+
},
|
|
1774
|
+
)
|
|
1775
|
+
|
|
1776
|
+
async def get_cart(self) -> dict[str, Any]:
|
|
1777
|
+
"""Get current cart contents using authenticated GraphQL.
|
|
1778
|
+
|
|
1779
|
+
Requires authentication cookies to be available.
|
|
1780
|
+
|
|
1781
|
+
Returns:
|
|
1782
|
+
Cart data or error dict if not authenticated
|
|
1783
|
+
"""
|
|
1784
|
+
auth_client = await self._get_authenticated_client()
|
|
1785
|
+
if not auth_client:
|
|
1786
|
+
return {"error": True, "code": "NOT_AUTHENTICATED", "message": "Login required"}
|
|
1787
|
+
|
|
1788
|
+
return await self._execute_persisted_query_with_client(
|
|
1789
|
+
auth_client,
|
|
1790
|
+
"cartEstimated",
|
|
1791
|
+
{"userIsLoggedIn": True},
|
|
1792
|
+
)
|
|
1793
|
+
|
|
1794
|
+
@with_retry(config=RetryConfig(max_attempts=3, base_delay=1.0))
|
|
1795
|
+
async def _execute_persisted_query_with_client(
|
|
1796
|
+
self,
|
|
1797
|
+
client: httpx.AsyncClient,
|
|
1798
|
+
operation_name: str,
|
|
1799
|
+
variables: dict[str, Any],
|
|
1800
|
+
) -> dict[str, Any]:
|
|
1801
|
+
"""Execute a persisted GraphQL query with a specific client.
|
|
1802
|
+
|
|
1803
|
+
Args:
|
|
1804
|
+
client: httpx client to use (may have cookies)
|
|
1805
|
+
operation_name: The name of the persisted operation
|
|
1806
|
+
variables: Query variables
|
|
1807
|
+
|
|
1808
|
+
Returns:
|
|
1809
|
+
Response data
|
|
1810
|
+
"""
|
|
1811
|
+
self.circuit_breaker.check()
|
|
1812
|
+
|
|
1813
|
+
if operation_name not in PERSISTED_QUERIES:
|
|
1814
|
+
raise ValueError(f"Unknown operation: {operation_name}")
|
|
1815
|
+
|
|
1816
|
+
payload = {
|
|
1817
|
+
"operationName": operation_name,
|
|
1818
|
+
"variables": variables,
|
|
1819
|
+
"extensions": {
|
|
1820
|
+
"persistedQuery": {
|
|
1821
|
+
"version": 1,
|
|
1822
|
+
"sha256Hash": PERSISTED_QUERIES[operation_name],
|
|
1823
|
+
}
|
|
1824
|
+
},
|
|
1825
|
+
}
|
|
1826
|
+
|
|
1827
|
+
try:
|
|
1828
|
+
response = await client.post(
|
|
1829
|
+
self.base_url,
|
|
1830
|
+
json=payload,
|
|
1831
|
+
headers={"Content-Type": "application/json", "Accept": "application/json"},
|
|
1832
|
+
)
|
|
1833
|
+
response.raise_for_status()
|
|
1834
|
+
|
|
1835
|
+
data: Any = response.json()
|
|
1836
|
+
|
|
1837
|
+
if "errors" in data:
|
|
1838
|
+
for error in data["errors"]:
|
|
1839
|
+
if "PersistedQueryNotFound" in str(error):
|
|
1840
|
+
raise PersistedQueryNotFoundError(
|
|
1841
|
+
f"Persisted query hash for '{operation_name}' is no longer valid"
|
|
1842
|
+
)
|
|
1843
|
+
raise GraphQLError(data["errors"])
|
|
1844
|
+
|
|
1845
|
+
self.circuit_breaker.record_success()
|
|
1846
|
+
|
|
1847
|
+
if isinstance(data, dict):
|
|
1848
|
+
payload_data = data.get("data")
|
|
1849
|
+
if isinstance(payload_data, dict):
|
|
1850
|
+
return cast(dict[str, Any], payload_data)
|
|
1851
|
+
return {}
|
|
1852
|
+
|
|
1853
|
+
except (httpx.HTTPError, GraphQLError) as e:
|
|
1854
|
+
self.circuit_breaker.record_failure()
|
|
1855
|
+
logger.error(
|
|
1856
|
+
"Persisted query with client failed",
|
|
1857
|
+
operation=operation_name,
|
|
1858
|
+
error=str(e),
|
|
1859
|
+
)
|
|
1860
|
+
raise
|
|
1861
|
+
|
|
1862
|
+
def get_status(self) -> dict[str, Any]:
|
|
1863
|
+
"""Get client status for health checks."""
|
|
1864
|
+
return {
|
|
1865
|
+
"circuit_breaker": self.circuit_breaker.get_status(),
|
|
1866
|
+
"build_id": self._build_id,
|
|
1867
|
+
"known_stores": len(KNOWN_STORES),
|
|
1868
|
+
}
|
|
1869
|
+
|
|
1870
|
+
# ===================
|
|
1871
|
+
# Coupon Methods
|
|
1872
|
+
# ===================
|
|
1873
|
+
|
|
1874
|
+
async def get_coupons(
|
|
1875
|
+
self,
|
|
1876
|
+
category_id: int | None = None,
|
|
1877
|
+
search_query: str | None = None,
|
|
1878
|
+
limit: int = 60,
|
|
1879
|
+
) -> CouponSearchResult:
|
|
1880
|
+
"""Fetch available coupons.
|
|
1881
|
+
|
|
1882
|
+
Coupons are loaded via SSR from the all-coupons page.
|
|
1883
|
+
|
|
1884
|
+
Args:
|
|
1885
|
+
category_id: Filter by category ID (e.g., 490021 for Health & beauty)
|
|
1886
|
+
search_query: Search coupons by keyword
|
|
1887
|
+
limit: Maximum coupons to return (max 60 per page)
|
|
1888
|
+
|
|
1889
|
+
Returns:
|
|
1890
|
+
CouponSearchResult with coupons and metadata
|
|
1891
|
+
"""
|
|
1892
|
+
auth_client = await self._get_authenticated_client()
|
|
1893
|
+
if not auth_client:
|
|
1894
|
+
logger.warning("Coupon fetch requires authentication for full data")
|
|
1895
|
+
return CouponSearchResult(
|
|
1896
|
+
coupons=[],
|
|
1897
|
+
count=0,
|
|
1898
|
+
total=0,
|
|
1899
|
+
categories=[],
|
|
1900
|
+
)
|
|
1901
|
+
|
|
1902
|
+
try:
|
|
1903
|
+
return await self._fetch_coupons_ssr(
|
|
1904
|
+
auth_client,
|
|
1905
|
+
category_id=category_id,
|
|
1906
|
+
search_query=search_query,
|
|
1907
|
+
limit=limit,
|
|
1908
|
+
)
|
|
1909
|
+
except Exception as e:
|
|
1910
|
+
logger.error("Failed to fetch coupons", error=str(e))
|
|
1911
|
+
return CouponSearchResult(
|
|
1912
|
+
coupons=[],
|
|
1913
|
+
count=0,
|
|
1914
|
+
total=0,
|
|
1915
|
+
categories=[],
|
|
1916
|
+
)
|
|
1917
|
+
|
|
1918
|
+
@with_retry(config=RetryConfig(max_attempts=2, base_delay=0.5))
|
|
1919
|
+
async def _fetch_coupons_ssr(
|
|
1920
|
+
self,
|
|
1921
|
+
client: httpx.AsyncClient,
|
|
1922
|
+
category_id: int | None = None,
|
|
1923
|
+
search_query: str | None = None,
|
|
1924
|
+
limit: int = 60,
|
|
1925
|
+
) -> CouponSearchResult:
|
|
1926
|
+
"""Fetch coupons via SSR page.
|
|
1927
|
+
|
|
1928
|
+
Args:
|
|
1929
|
+
client: Authenticated httpx client
|
|
1930
|
+
category_id: Filter by category
|
|
1931
|
+
search_query: Search term
|
|
1932
|
+
limit: Max results
|
|
1933
|
+
|
|
1934
|
+
Returns:
|
|
1935
|
+
CouponSearchResult with parsed coupon data
|
|
1936
|
+
"""
|
|
1937
|
+
self.circuit_breaker.check()
|
|
1938
|
+
|
|
1939
|
+
# Build URL with query params
|
|
1940
|
+
url = "https://www.heb.com/digital-coupon/coupon-selection/all-coupons"
|
|
1941
|
+
params = {}
|
|
1942
|
+
|
|
1943
|
+
if search_query:
|
|
1944
|
+
params["searchTerm"] = search_query
|
|
1945
|
+
if category_id:
|
|
1946
|
+
params["productCategories"] = str(category_id)
|
|
1947
|
+
|
|
1948
|
+
logger.debug("Fetching coupons SSR", url=url, params=params)
|
|
1949
|
+
|
|
1950
|
+
try:
|
|
1951
|
+
response = await client.get(url, params=params if params else None)
|
|
1952
|
+
response.raise_for_status()
|
|
1953
|
+
|
|
1954
|
+
# Extract __NEXT_DATA__ JSON from HTML
|
|
1955
|
+
match = re.search(
|
|
1956
|
+
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
|
|
1957
|
+
response.text,
|
|
1958
|
+
re.DOTALL,
|
|
1959
|
+
)
|
|
1960
|
+
|
|
1961
|
+
if not match:
|
|
1962
|
+
logger.warning("No __NEXT_DATA__ found in coupon response")
|
|
1963
|
+
return CouponSearchResult(coupons=[], count=0, total=0, categories=[])
|
|
1964
|
+
|
|
1965
|
+
next_data = json.loads(match.group(1))
|
|
1966
|
+
result = self._parse_coupon_ssr_data(next_data, limit)
|
|
1967
|
+
|
|
1968
|
+
self.circuit_breaker.record_success()
|
|
1969
|
+
logger.info(
|
|
1970
|
+
"Coupon fetch successful",
|
|
1971
|
+
count=result.count,
|
|
1972
|
+
total=result.total,
|
|
1973
|
+
)
|
|
1974
|
+
|
|
1975
|
+
return result
|
|
1976
|
+
|
|
1977
|
+
except httpx.HTTPError as e:
|
|
1978
|
+
self.circuit_breaker.record_failure()
|
|
1979
|
+
logger.error("Coupon SSR fetch failed", error=str(e))
|
|
1980
|
+
raise
|
|
1981
|
+
|
|
1982
|
+
def _parse_coupon_ssr_data(
|
|
1983
|
+
self,
|
|
1984
|
+
next_data: dict[str, Any],
|
|
1985
|
+
limit: int = 60,
|
|
1986
|
+
) -> CouponSearchResult:
|
|
1987
|
+
"""Parse coupon data from SSR __NEXT_DATA__.
|
|
1988
|
+
|
|
1989
|
+
Args:
|
|
1990
|
+
next_data: Parsed __NEXT_DATA__ JSON
|
|
1991
|
+
limit: Max coupons to return
|
|
1992
|
+
|
|
1993
|
+
Returns:
|
|
1994
|
+
CouponSearchResult with coupons and categories
|
|
1995
|
+
"""
|
|
1996
|
+
page_props = next_data.get("props", {}).get("pageProps", {})
|
|
1997
|
+
|
|
1998
|
+
# Parse coupon data
|
|
1999
|
+
coupon_data = page_props.get("couponData", [])
|
|
2000
|
+
coupons: list[Coupon] = []
|
|
2001
|
+
|
|
2002
|
+
for item in coupon_data[:limit]:
|
|
2003
|
+
try:
|
|
2004
|
+
coupon = self._parse_coupon_item(item)
|
|
2005
|
+
if coupon:
|
|
2006
|
+
coupons.append(coupon)
|
|
2007
|
+
except Exception as e:
|
|
2008
|
+
logger.debug("Failed to parse coupon", error=str(e))
|
|
2009
|
+
continue
|
|
2010
|
+
|
|
2011
|
+
# Parse pagination
|
|
2012
|
+
pagination = page_props.get("pagination", {})
|
|
2013
|
+
total = pagination.get("totalCoupons", len(coupons))
|
|
2014
|
+
|
|
2015
|
+
# Parse categories from filters
|
|
2016
|
+
categories: list[CouponCategory] = []
|
|
2017
|
+
filters_info = page_props.get("filtersInfo", {})
|
|
2018
|
+
filter_counts = filters_info.get("filterCounts", {})
|
|
2019
|
+
product_categories = filter_counts.get("productCategories", [])
|
|
2020
|
+
|
|
2021
|
+
for cat in product_categories:
|
|
2022
|
+
try:
|
|
2023
|
+
categories.append(CouponCategory(
|
|
2024
|
+
id=cat.get("option", 0),
|
|
2025
|
+
name=cat.get("displayName", ""),
|
|
2026
|
+
count=cat.get("count", 0),
|
|
2027
|
+
))
|
|
2028
|
+
except Exception:
|
|
2029
|
+
continue
|
|
2030
|
+
|
|
2031
|
+
return CouponSearchResult(
|
|
2032
|
+
coupons=coupons,
|
|
2033
|
+
count=len(coupons),
|
|
2034
|
+
total=total,
|
|
2035
|
+
categories=categories,
|
|
2036
|
+
)
|
|
2037
|
+
|
|
2038
|
+
def _parse_coupon_item(self, item: dict[str, Any]) -> Coupon | None:
|
|
2039
|
+
"""Parse a single coupon from SSR data.
|
|
2040
|
+
|
|
2041
|
+
Args:
|
|
2042
|
+
item: Coupon dict from couponData array
|
|
2043
|
+
|
|
2044
|
+
Returns:
|
|
2045
|
+
Coupon object or None if parsing fails
|
|
2046
|
+
"""
|
|
2047
|
+
coupon_id = item.get("id")
|
|
2048
|
+
if not coupon_id:
|
|
2049
|
+
return None
|
|
2050
|
+
|
|
2051
|
+
# Parse expiration date
|
|
2052
|
+
exp_date = item.get("expirationDate")
|
|
2053
|
+
expires_display = None
|
|
2054
|
+
if exp_date:
|
|
2055
|
+
# Convert YYYY-MM-DD to more readable format
|
|
2056
|
+
try:
|
|
2057
|
+
from datetime import datetime
|
|
2058
|
+
dt = datetime.strptime(exp_date, "%Y-%m-%d")
|
|
2059
|
+
expires_display = dt.strftime("%m/%d/%Y")
|
|
2060
|
+
except Exception:
|
|
2061
|
+
expires_display = exp_date
|
|
2062
|
+
|
|
2063
|
+
# Determine if digital only
|
|
2064
|
+
print_statuses = item.get("printStatuses", [])
|
|
2065
|
+
digital_only = "PAPERLESS" in print_statuses and "PRINTED" not in print_statuses
|
|
2066
|
+
|
|
2067
|
+
# Parse usage limit
|
|
2068
|
+
redemption_limit = item.get("redemptionLimit")
|
|
2069
|
+
usage_limit = f"Limit {redemption_limit}" if redemption_limit else "Unlimited use"
|
|
2070
|
+
|
|
2071
|
+
return Coupon(
|
|
2072
|
+
coupon_id=coupon_id,
|
|
2073
|
+
headline=item.get("shortDescription", ""),
|
|
2074
|
+
description=item.get("description", ""),
|
|
2075
|
+
expires=exp_date,
|
|
2076
|
+
expires_display=expires_display,
|
|
2077
|
+
image_url=item.get("imageUrl"),
|
|
2078
|
+
coupon_type=item.get("type", "NORMAL"),
|
|
2079
|
+
clipped=item.get("clippedStatus") == "CLIPPED",
|
|
2080
|
+
redeemable=item.get("redemptionStatus") == "REDEEMABLE",
|
|
2081
|
+
usage_limit=usage_limit,
|
|
2082
|
+
digital_only=digital_only,
|
|
2083
|
+
)
|
|
2084
|
+
|
|
2085
|
+
async def clip_coupon(self, coupon_id: int) -> dict[str, Any]:
|
|
2086
|
+
"""Clip a coupon to the user's account.
|
|
2087
|
+
|
|
2088
|
+
Args:
|
|
2089
|
+
coupon_id: The coupon ID to clip
|
|
2090
|
+
|
|
2091
|
+
Returns:
|
|
2092
|
+
Result dict with success/error status
|
|
2093
|
+
"""
|
|
2094
|
+
auth_client = await self._get_authenticated_client()
|
|
2095
|
+
if not auth_client:
|
|
2096
|
+
return {
|
|
2097
|
+
"error": True,
|
|
2098
|
+
"code": "NOT_AUTHENTICATED",
|
|
2099
|
+
"message": "Login required to clip coupons",
|
|
2100
|
+
}
|
|
2101
|
+
|
|
2102
|
+
try:
|
|
2103
|
+
result = await self._execute_persisted_query_with_client(
|
|
2104
|
+
auth_client,
|
|
2105
|
+
"CouponClip",
|
|
2106
|
+
{
|
|
2107
|
+
"userIsLoggedIn": True,
|
|
2108
|
+
"id": coupon_id,
|
|
2109
|
+
},
|
|
2110
|
+
)
|
|
2111
|
+
|
|
2112
|
+
# Check if the mutation succeeded
|
|
2113
|
+
clip_result = result.get("clipCoupon", {})
|
|
2114
|
+
if clip_result:
|
|
2115
|
+
return {
|
|
2116
|
+
"success": True,
|
|
2117
|
+
"coupon_id": coupon_id,
|
|
2118
|
+
"message": "Coupon clipped successfully!",
|
|
2119
|
+
}
|
|
2120
|
+
else:
|
|
2121
|
+
return {
|
|
2122
|
+
"success": True,
|
|
2123
|
+
"coupon_id": coupon_id,
|
|
2124
|
+
"message": "Coupon clipped.",
|
|
2125
|
+
}
|
|
2126
|
+
|
|
2127
|
+
except GraphQLError as e:
|
|
2128
|
+
error_msg = str(e)
|
|
2129
|
+
if "already clipped" in error_msg.lower():
|
|
2130
|
+
return {
|
|
2131
|
+
"error": True,
|
|
2132
|
+
"code": "ALREADY_CLIPPED",
|
|
2133
|
+
"message": "This coupon is already clipped to your account.",
|
|
2134
|
+
"coupon_id": coupon_id,
|
|
2135
|
+
}
|
|
2136
|
+
logger.error("Failed to clip coupon", coupon_id=coupon_id, error=error_msg)
|
|
2137
|
+
return {
|
|
2138
|
+
"error": True,
|
|
2139
|
+
"code": "CLIP_FAILED",
|
|
2140
|
+
"message": f"Failed to clip coupon: {error_msg}",
|
|
2141
|
+
"coupon_id": coupon_id,
|
|
2142
|
+
}
|
|
2143
|
+
except Exception as e:
|
|
2144
|
+
logger.error("Failed to clip coupon", coupon_id=coupon_id, error=str(e))
|
|
2145
|
+
return {
|
|
2146
|
+
"error": True,
|
|
2147
|
+
"code": "CLIP_FAILED",
|
|
2148
|
+
"message": f"Failed to clip coupon: {e!s}",
|
|
2149
|
+
"coupon_id": coupon_id,
|
|
2150
|
+
}
|
|
2151
|
+
|
|
2152
|
+
async def get_clipped_coupons(self, limit: int = 60) -> CouponSearchResult:
|
|
2153
|
+
"""Get the user's clipped coupons.
|
|
2154
|
+
|
|
2155
|
+
Fetches clipped coupons via SSR from the clipped-coupons page.
|
|
2156
|
+
|
|
2157
|
+
Args:
|
|
2158
|
+
limit: Maximum coupons to return
|
|
2159
|
+
|
|
2160
|
+
Returns:
|
|
2161
|
+
CouponSearchResult with clipped coupons
|
|
2162
|
+
"""
|
|
2163
|
+
auth_client = await self._get_authenticated_client()
|
|
2164
|
+
if not auth_client:
|
|
2165
|
+
logger.warning("Clipped coupons require authentication")
|
|
2166
|
+
return CouponSearchResult(
|
|
2167
|
+
coupons=[],
|
|
2168
|
+
count=0,
|
|
2169
|
+
total=0,
|
|
2170
|
+
categories=[],
|
|
2171
|
+
)
|
|
2172
|
+
|
|
2173
|
+
try:
|
|
2174
|
+
return await self._fetch_clipped_coupons_ssr(auth_client, limit)
|
|
2175
|
+
except Exception as e:
|
|
2176
|
+
logger.error("Failed to fetch clipped coupons", error=str(e))
|
|
2177
|
+
return CouponSearchResult(
|
|
2178
|
+
coupons=[],
|
|
2179
|
+
count=0,
|
|
2180
|
+
total=0,
|
|
2181
|
+
categories=[],
|
|
2182
|
+
)
|
|
2183
|
+
|
|
2184
|
+
async def select_store(
|
|
2185
|
+
self, store_id: str, ignore_conflicts: bool = False
|
|
2186
|
+
) -> dict[str, Any]:
|
|
2187
|
+
"""Change the active store via GraphQL mutation with verification.
|
|
2188
|
+
|
|
2189
|
+
This calls the SelectPickupFulfillment mutation which changes
|
|
2190
|
+
the user's active store on HEB's backend, then verifies the
|
|
2191
|
+
change actually took effect by checking the cart's store.
|
|
2192
|
+
|
|
2193
|
+
Args:
|
|
2194
|
+
store_id: The store ID to switch to
|
|
2195
|
+
ignore_conflicts: If True, force store change even if cart has
|
|
2196
|
+
conflicts (items unavailable, price changes). Default False.
|
|
2197
|
+
|
|
2198
|
+
Returns:
|
|
2199
|
+
Result dict with:
|
|
2200
|
+
- success: True only if store actually changed (verified)
|
|
2201
|
+
- error: True if store change failed or couldn't be verified
|
|
2202
|
+
- code: Error code for programmatic handling
|
|
2203
|
+
- verified: True if change was verified via get_cart()
|
|
2204
|
+
"""
|
|
2205
|
+
auth_client = await self._get_authenticated_client()
|
|
2206
|
+
if not auth_client:
|
|
2207
|
+
return {
|
|
2208
|
+
"error": True,
|
|
2209
|
+
"code": "NOT_AUTHENTICATED",
|
|
2210
|
+
"message": "Login required to change stores",
|
|
2211
|
+
}
|
|
2212
|
+
|
|
2213
|
+
try:
|
|
2214
|
+
# The mutation expects storeId as both string and int in different fields
|
|
2215
|
+
result = await self._execute_persisted_query_with_client(
|
|
2216
|
+
auth_client,
|
|
2217
|
+
"SelectPickupFulfillment",
|
|
2218
|
+
{
|
|
2219
|
+
"fulfillmentType": "PICKUP",
|
|
2220
|
+
"pickupStoreId": store_id,
|
|
2221
|
+
"ignoreCartConflicts": ignore_conflicts,
|
|
2222
|
+
"storeId": int(store_id),
|
|
2223
|
+
"userIsLoggedIn": True,
|
|
2224
|
+
},
|
|
2225
|
+
)
|
|
2226
|
+
|
|
2227
|
+
fulfillment_data = result.get("selectPickupFulfillment", {})
|
|
2228
|
+
logger.debug(
|
|
2229
|
+
"SelectPickupFulfillment response",
|
|
2230
|
+
store_id=store_id,
|
|
2231
|
+
response=fulfillment_data,
|
|
2232
|
+
)
|
|
2233
|
+
|
|
2234
|
+
# VERIFY: Check if store actually changed by fetching cart
|
|
2235
|
+
# This is the key fix - don't trust the mutation response alone
|
|
2236
|
+
cart = await self.get_cart()
|
|
2237
|
+
if cart.get("error"):
|
|
2238
|
+
logger.warning(
|
|
2239
|
+
"Could not verify store change - cart fetch failed",
|
|
2240
|
+
store_id=store_id,
|
|
2241
|
+
cart_error=cart,
|
|
2242
|
+
)
|
|
2243
|
+
return {
|
|
2244
|
+
"error": True,
|
|
2245
|
+
"code": "VERIFICATION_FAILED",
|
|
2246
|
+
"message": "Store change could not be verified - cart fetch failed",
|
|
2247
|
+
"store_id": store_id,
|
|
2248
|
+
"mutation_response": fulfillment_data,
|
|
2249
|
+
}
|
|
2250
|
+
|
|
2251
|
+
# Extract actual store from cart response
|
|
2252
|
+
# Cart structure: cartV2.fulfillment.store.id
|
|
2253
|
+
cart_v2 = cart.get("cartV2") or cart.get("cart") or {}
|
|
2254
|
+
fulfillment = cart_v2.get("fulfillment") or {}
|
|
2255
|
+
store_info = fulfillment.get("store") or {}
|
|
2256
|
+
actual_store_id = str(store_info.get("id", ""))
|
|
2257
|
+
|
|
2258
|
+
# Also check pickupStore as alternative location
|
|
2259
|
+
if not actual_store_id:
|
|
2260
|
+
pickup_store = fulfillment.get("pickupStore") or {}
|
|
2261
|
+
actual_store_id = str(pickup_store.get("id", ""))
|
|
2262
|
+
|
|
2263
|
+
# If still no store found, check top-level storeId
|
|
2264
|
+
if not actual_store_id:
|
|
2265
|
+
actual_store_id = str(cart_v2.get("storeId", ""))
|
|
2266
|
+
|
|
2267
|
+
logger.debug(
|
|
2268
|
+
"Store verification",
|
|
2269
|
+
requested=store_id,
|
|
2270
|
+
actual=actual_store_id,
|
|
2271
|
+
cart_fulfillment=fulfillment,
|
|
2272
|
+
)
|
|
2273
|
+
|
|
2274
|
+
# Compare requested vs actual
|
|
2275
|
+
if actual_store_id == store_id:
|
|
2276
|
+
logger.info(
|
|
2277
|
+
"Store change verified successful",
|
|
2278
|
+
store_id=store_id,
|
|
2279
|
+
verified=True,
|
|
2280
|
+
)
|
|
2281
|
+
return {
|
|
2282
|
+
"success": True,
|
|
2283
|
+
"store_id": store_id,
|
|
2284
|
+
"message": f"Store changed to {store_id}",
|
|
2285
|
+
"verified": True,
|
|
2286
|
+
}
|
|
2287
|
+
else:
|
|
2288
|
+
# Store didn't change - likely cart conflict
|
|
2289
|
+
logger.warning(
|
|
2290
|
+
"Store change verification failed",
|
|
2291
|
+
requested=store_id,
|
|
2292
|
+
actual=actual_store_id,
|
|
2293
|
+
ignore_conflicts=ignore_conflicts,
|
|
2294
|
+
)
|
|
2295
|
+
|
|
2296
|
+
# Determine likely cause
|
|
2297
|
+
if not ignore_conflicts:
|
|
2298
|
+
return {
|
|
2299
|
+
"error": True,
|
|
2300
|
+
"code": "CART_CONFLICT",
|
|
2301
|
+
"message": (
|
|
2302
|
+
f"Store change not applied - your cart may have items "
|
|
2303
|
+
"unavailable at the new store. Current store is still "
|
|
2304
|
+
f"{actual_store_id}."
|
|
2305
|
+
),
|
|
2306
|
+
"expected_store": store_id,
|
|
2307
|
+
"actual_store": actual_store_id,
|
|
2308
|
+
"suggestion": "Try with ignore_conflicts=True to force the change, "
|
|
2309
|
+
"or clear your cart first.",
|
|
2310
|
+
}
|
|
2311
|
+
else:
|
|
2312
|
+
return {
|
|
2313
|
+
"error": True,
|
|
2314
|
+
"code": "VERIFICATION_FAILED",
|
|
2315
|
+
"message": (
|
|
2316
|
+
f"Store change not applied even with ignore_conflicts=True. "
|
|
2317
|
+
f"Current store is still {actual_store_id}."
|
|
2318
|
+
),
|
|
2319
|
+
"expected_store": store_id,
|
|
2320
|
+
"actual_store": actual_store_id,
|
|
2321
|
+
}
|
|
2322
|
+
|
|
2323
|
+
except GraphQLError as e:
|
|
2324
|
+
error_msg = str(e)
|
|
2325
|
+
logger.error("Failed to change store", store_id=store_id, error=error_msg)
|
|
2326
|
+
return {
|
|
2327
|
+
"error": True,
|
|
2328
|
+
"code": "STORE_CHANGE_FAILED",
|
|
2329
|
+
"message": f"Failed to change store: {error_msg}",
|
|
2330
|
+
"store_id": store_id,
|
|
2331
|
+
}
|
|
2332
|
+
except ValueError as e:
|
|
2333
|
+
# Invalid store_id format (can't convert to int)
|
|
2334
|
+
logger.error("Invalid store ID format", store_id=store_id, error=str(e))
|
|
2335
|
+
return {
|
|
2336
|
+
"error": True,
|
|
2337
|
+
"code": "INVALID_STORE_ID",
|
|
2338
|
+
"message": f"Invalid store ID format: {store_id}",
|
|
2339
|
+
"store_id": store_id,
|
|
2340
|
+
}
|
|
2341
|
+
except Exception as e:
|
|
2342
|
+
logger.error("Failed to change store", store_id=store_id, error=str(e))
|
|
2343
|
+
return {
|
|
2344
|
+
"error": True,
|
|
2345
|
+
"code": "STORE_CHANGE_FAILED",
|
|
2346
|
+
"message": f"Failed to change store: {e!s}",
|
|
2347
|
+
"store_id": store_id,
|
|
2348
|
+
}
|
|
2349
|
+
|
|
2350
|
+
@with_retry(config=RetryConfig(max_attempts=2, base_delay=0.5))
|
|
2351
|
+
async def _fetch_clipped_coupons_ssr(
|
|
2352
|
+
self,
|
|
2353
|
+
client: httpx.AsyncClient,
|
|
2354
|
+
limit: int = 60,
|
|
2355
|
+
) -> CouponSearchResult:
|
|
2356
|
+
"""Fetch clipped coupons via SSR page.
|
|
2357
|
+
|
|
2358
|
+
Args:
|
|
2359
|
+
client: Authenticated httpx client
|
|
2360
|
+
limit: Max results
|
|
2361
|
+
|
|
2362
|
+
Returns:
|
|
2363
|
+
CouponSearchResult with clipped coupon data
|
|
2364
|
+
"""
|
|
2365
|
+
self.circuit_breaker.check()
|
|
2366
|
+
|
|
2367
|
+
url = "https://www.heb.com/digital-coupon/clipped-coupons"
|
|
2368
|
+
logger.debug("Fetching clipped coupons SSR", url=url)
|
|
2369
|
+
|
|
2370
|
+
try:
|
|
2371
|
+
response = await client.get(url)
|
|
2372
|
+
response.raise_for_status()
|
|
2373
|
+
|
|
2374
|
+
# Extract __NEXT_DATA__ JSON from HTML
|
|
2375
|
+
match = re.search(
|
|
2376
|
+
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
|
|
2377
|
+
response.text,
|
|
2378
|
+
re.DOTALL,
|
|
2379
|
+
)
|
|
2380
|
+
|
|
2381
|
+
if not match:
|
|
2382
|
+
logger.warning("No __NEXT_DATA__ found in clipped coupons response")
|
|
2383
|
+
return CouponSearchResult(coupons=[], count=0, total=0, categories=[])
|
|
2384
|
+
|
|
2385
|
+
next_data = json.loads(match.group(1))
|
|
2386
|
+
result = self._parse_coupon_ssr_data(next_data, limit)
|
|
2387
|
+
|
|
2388
|
+
self.circuit_breaker.record_success()
|
|
2389
|
+
logger.info(
|
|
2390
|
+
"Clipped coupons fetch successful",
|
|
2391
|
+
count=result.count,
|
|
2392
|
+
total=result.total,
|
|
2393
|
+
)
|
|
2394
|
+
|
|
2395
|
+
return result
|
|
2396
|
+
|
|
2397
|
+
except httpx.HTTPError as e:
|
|
2398
|
+
self.circuit_breaker.record_failure()
|
|
2399
|
+
logger.error("Clipped coupons SSR fetch failed", error=str(e))
|
|
2400
|
+
raise
|