texas-grocery-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. texas_grocery_mcp/__init__.py +3 -0
  2. texas_grocery_mcp/auth/__init__.py +5 -0
  3. texas_grocery_mcp/auth/browser_refresh.py +1629 -0
  4. texas_grocery_mcp/auth/credentials.py +337 -0
  5. texas_grocery_mcp/auth/session.py +767 -0
  6. texas_grocery_mcp/clients/__init__.py +5 -0
  7. texas_grocery_mcp/clients/graphql.py +2400 -0
  8. texas_grocery_mcp/models/__init__.py +54 -0
  9. texas_grocery_mcp/models/cart.py +60 -0
  10. texas_grocery_mcp/models/coupon.py +44 -0
  11. texas_grocery_mcp/models/errors.py +43 -0
  12. texas_grocery_mcp/models/health.py +41 -0
  13. texas_grocery_mcp/models/product.py +274 -0
  14. texas_grocery_mcp/models/store.py +77 -0
  15. texas_grocery_mcp/observability/__init__.py +6 -0
  16. texas_grocery_mcp/observability/health.py +141 -0
  17. texas_grocery_mcp/observability/logging.py +73 -0
  18. texas_grocery_mcp/reliability/__init__.py +23 -0
  19. texas_grocery_mcp/reliability/cache.py +116 -0
  20. texas_grocery_mcp/reliability/circuit_breaker.py +138 -0
  21. texas_grocery_mcp/reliability/retry.py +96 -0
  22. texas_grocery_mcp/reliability/throttle.py +113 -0
  23. texas_grocery_mcp/server.py +211 -0
  24. texas_grocery_mcp/services/__init__.py +5 -0
  25. texas_grocery_mcp/services/geocoding.py +227 -0
  26. texas_grocery_mcp/state.py +166 -0
  27. texas_grocery_mcp/tools/__init__.py +5 -0
  28. texas_grocery_mcp/tools/cart.py +821 -0
  29. texas_grocery_mcp/tools/coupon.py +381 -0
  30. texas_grocery_mcp/tools/product.py +437 -0
  31. texas_grocery_mcp/tools/session.py +486 -0
  32. texas_grocery_mcp/tools/store.py +353 -0
  33. texas_grocery_mcp/utils/__init__.py +5 -0
  34. texas_grocery_mcp/utils/config.py +146 -0
  35. texas_grocery_mcp/utils/secure_file.py +123 -0
  36. texas_grocery_mcp-0.1.0.dist-info/METADATA +296 -0
  37. texas_grocery_mcp-0.1.0.dist-info/RECORD +40 -0
  38. texas_grocery_mcp-0.1.0.dist-info/WHEEL +4 -0
  39. texas_grocery_mcp-0.1.0.dist-info/entry_points.txt +2 -0
  40. texas_grocery_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,2400 @@
1
+ """HEB API client using persisted queries and Next.js data endpoints.
2
+
3
+ Supports both unauthenticated (typeahead) and authenticated (full product search)
4
+ modes. Authenticated mode uses browser session cookies for faster API access.
5
+ """
6
+
7
+ import json
8
+ import re
9
+ from typing import Any, cast
10
+
11
+ import httpx
12
+ import structlog
13
+
14
+ from texas_grocery_mcp.auth.session import get_httpx_cookies, is_authenticated
15
+ from texas_grocery_mcp.models import (
16
+ Coupon,
17
+ CouponCategory,
18
+ CouponSearchResult,
19
+ GeocodedLocation,
20
+ NutrientInfo,
21
+ Product,
22
+ ProductDetails,
23
+ ProductSearchAttempt,
24
+ ProductSearchResult,
25
+ SearchAttempt,
26
+ Store,
27
+ StoreSearchResult,
28
+ )
29
+ from texas_grocery_mcp.reliability import (
30
+ CircuitBreaker,
31
+ RetryConfig,
32
+ ThrottleConfig,
33
+ Throttler,
34
+ TTLCache,
35
+ with_retry,
36
+ )
37
+ from texas_grocery_mcp.services.geocoding import GeocodingResult, GeocodingService
38
+ from texas_grocery_mcp.utils.config import get_settings
39
+
40
+ logger = structlog.get_logger()
41
+
42
+
43
+ class GraphQLError(Exception):
44
+ """Raised when GraphQL returns errors."""
45
+
46
+ def __init__(self, errors: list[dict[str, Any]]):
47
+ self.errors = errors
48
+ messages = [e.get("message", "Unknown error") for e in errors]
49
+ super().__init__(f"GraphQL error: {'; '.join(messages)}")
50
+
51
+
52
+ class PersistedQueryNotFoundError(Exception):
53
+ """Raised when a persisted query hash is not found on the server."""
54
+
55
+ pass
56
+
57
+
58
+ # Persisted Query Hashes (discovered via reverse engineering)
59
+ # These may change when HEB deploys new code
60
+ PERSISTED_QUERIES = {
61
+ "ShopNavigation": "0e669423cef683226cb8eb295664619c8e0f95945734e0a458095f51ee89efb3",
62
+ "alertEntryPoint": "3e3ccd248652e8fce4674d0c5f3f30f2ddc63da277bfa0ff36ea9420e5dffd5e",
63
+ "cartEstimated": "7b033abaf2caa80bc49541e51d2b89e3cc6a316e37c4bd576d9b5c498a51e9c5",
64
+ "typeaheadContent": "1ed956c0f10efcfc375321f33c40964bc236fff1397a4e86b7b53cb3b18ad329",
65
+ "cartItemV2": "ade8ec1365c185244d42f9cc4c13997fec4b633ac3c38ff39558df92b210c6d0",
66
+ "StoreSearch": "e01fa39e66c3a2c7881322bc48af6a5af97d49b1442d433f2d09d273de2db4b6",
67
+ "CouponClip": "88b18ac22cee98372428d9a91d759ffb5e919026ee61c747f9f88d11336b846b",
68
+ # Store change mutation - changes the active pickup store
69
+ "SelectPickupFulfillment": "8fa3c683ee37ad1bab9ce22b99bd34315b2a89cfc56208d63ba9efc0c49a6323",
70
+ }
71
+
72
+ # Well-known HEB stores (fallback for store search)
73
+ KNOWN_STORES = {
74
+ "737": Store(
75
+ store_id="737",
76
+ name="The Heights H-E-B",
77
+ address="2300 N. SHEPHERD DR., HOUSTON, TX 77008",
78
+ phone="(713) 802-9090",
79
+ latitude=29.8028,
80
+ longitude=-95.4103,
81
+ ),
82
+ "579": Store(
83
+ store_id="579",
84
+ name="Buffalo Speedway H-E-B",
85
+ address="5601 S BRAESWOOD BLVD, HOUSTON, TX 77096",
86
+ phone="(713) 432-1400",
87
+ latitude=29.6916,
88
+ longitude=-95.4587,
89
+ ),
90
+ "150": Store(
91
+ store_id="150",
92
+ name="Montrose H-E-B",
93
+ address="1701 W ALABAMA ST, HOUSTON, TX 77098",
94
+ phone="(713) 523-4481",
95
+ latitude=29.7419,
96
+ longitude=-95.3979,
97
+ ),
98
+ }
99
+
100
+
101
+ class HEBGraphQLClient:
102
+ """Client for HEB's API using persisted queries and Next.js data endpoints.
103
+
104
+ Supports two modes:
105
+ - Unauthenticated: Basic operations like typeahead (always available)
106
+ - Authenticated: Full product search and cart operations (requires cookies)
107
+ """
108
+
109
+ # Standard headers for browser-like requests
110
+ _BROWSER_HEADERS = {
111
+ "User-Agent": (
112
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
113
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
114
+ "Chrome/120.0.0.0 Safari/537.36"
115
+ ),
116
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
117
+ "Accept-Language": "en-US,en;q=0.5",
118
+ "Origin": "https://www.heb.com",
119
+ "Referer": "https://www.heb.com/",
120
+ }
121
+
122
+ def __init__(self, base_url: str | None = None):
123
+ settings = get_settings()
124
+ self.base_url = base_url or settings.heb_graphql_url
125
+ self.circuit_breaker = CircuitBreaker("heb_api")
126
+ self._client: httpx.AsyncClient | None = None
127
+ self._auth_client: httpx.AsyncClient | None = None
128
+ self._build_id: str | None = None
129
+
130
+ # Initialize throttlers for rate limiting
131
+ self._ssr_throttler = Throttler(
132
+ ThrottleConfig(
133
+ max_concurrent=settings.max_concurrent_ssr_searches,
134
+ min_delay_ms=settings.min_ssr_delay_ms,
135
+ jitter_ms=settings.ssr_jitter_ms,
136
+ enabled=settings.throttling_enabled,
137
+ ),
138
+ name="ssr",
139
+ )
140
+ self._graphql_throttler = Throttler(
141
+ ThrottleConfig(
142
+ max_concurrent=settings.max_concurrent_graphql,
143
+ min_delay_ms=settings.min_graphql_delay_ms,
144
+ jitter_ms=settings.graphql_jitter_ms,
145
+ enabled=settings.throttling_enabled,
146
+ ),
147
+ name="graphql",
148
+ )
149
+
150
+ # Initialize cache for product details (24-hour TTL)
151
+ self._product_details_cache: TTLCache[ProductDetails] = TTLCache(
152
+ ttl_hours=24,
153
+ max_size=500, # Cache up to 500 products
154
+ )
155
+
156
+ async def _get_client(self) -> httpx.AsyncClient:
157
+ """Get or create basic HTTP client (no auth cookies)."""
158
+ if self._client is None:
159
+ self._client = httpx.AsyncClient(
160
+ timeout=httpx.Timeout(30.0),
161
+ headers={
162
+ "Content-Type": "application/json",
163
+ "Accept": "application/json",
164
+ **self._BROWSER_HEADERS,
165
+ },
166
+ follow_redirects=True,
167
+ )
168
+ return self._client
169
+
170
+ async def _get_authenticated_client(self) -> httpx.AsyncClient | None:
171
+ """Get HTTP client with authentication cookies.
172
+
173
+ Returns:
174
+ Authenticated client if cookies available, None otherwise
175
+ """
176
+ if not is_authenticated():
177
+ return None
178
+
179
+ # Always recreate to get fresh cookies
180
+ if self._auth_client:
181
+ await self._auth_client.aclose()
182
+
183
+ cookies = get_httpx_cookies()
184
+ if not cookies:
185
+ return None
186
+
187
+ self._auth_client = httpx.AsyncClient(
188
+ timeout=httpx.Timeout(30.0),
189
+ headers=self._BROWSER_HEADERS,
190
+ cookies=cookies,
191
+ follow_redirects=True,
192
+ )
193
+
194
+ logger.debug("Created authenticated client", cookie_count=len(cookies))
195
+ return self._auth_client
196
+
197
+ async def close(self) -> None:
198
+ """Close HTTP clients."""
199
+ if self._client:
200
+ await self._client.aclose()
201
+ self._client = None
202
+ if self._auth_client:
203
+ await self._auth_client.aclose()
204
+ self._auth_client = None
205
+
206
+ async def _get_build_id(self) -> str:
207
+ """Extract Next.js build ID from HEB homepage.
208
+
209
+ The build ID is required for accessing _next/data endpoints.
210
+ It changes with each deployment.
211
+
212
+ Uses authenticated client when available to bypass WAF challenges.
213
+ """
214
+ if self._build_id:
215
+ return self._build_id
216
+
217
+ # Prefer authenticated client to bypass WAF/security challenges
218
+ client = await self._get_authenticated_client()
219
+ if not client:
220
+ client = await self._get_client()
221
+
222
+ response = await client.get("https://www.heb.com")
223
+ response.raise_for_status()
224
+
225
+ # Check for security challenge
226
+ if self._detect_security_challenge(response.text):
227
+ logger.warning("Security challenge detected when fetching build ID")
228
+ raise RuntimeError(
229
+ "Security challenge blocked build ID extraction. Try session_refresh."
230
+ )
231
+
232
+ # Look for build ID in the HTML
233
+ # Pattern: /_next/static/{buildId}/_buildManifest.js
234
+ match = re.search(r'/_next/static/([a-zA-Z0-9_-]+)/_buildManifest\.js', response.text)
235
+ if match:
236
+ self._build_id = match.group(1)
237
+ logger.info("Extracted Next.js build ID", build_id=self._build_id)
238
+ return self._build_id
239
+
240
+ # Fallback: try to find it in data-nscript tags
241
+ match = re.search(r'"buildId":"([a-zA-Z0-9_-]+)"', response.text)
242
+ if match:
243
+ self._build_id = match.group(1)
244
+ logger.info("Extracted Next.js build ID from JSON", build_id=self._build_id)
245
+ return self._build_id
246
+
247
+ # Log the response for debugging
248
+ logger.error(
249
+ "Could not extract build ID",
250
+ response_length=len(response.text),
251
+ response_preview=response.text[:500] if response.text else "empty",
252
+ )
253
+ raise RuntimeError("Could not extract Next.js build ID from HEB homepage")
254
+
255
+ @with_retry(config=RetryConfig(max_attempts=3, base_delay=1.0))
256
+ async def _execute_persisted_query(
257
+ self,
258
+ operation_name: str,
259
+ variables: dict[str, Any],
260
+ ) -> dict[str, Any]:
261
+ """Execute a persisted GraphQL query.
262
+
263
+ Args:
264
+ operation_name: The name of the persisted operation
265
+ variables: Query variables
266
+
267
+ Returns:
268
+ Response data
269
+
270
+ Raises:
271
+ GraphQLError: If GraphQL returns errors
272
+ PersistedQueryNotFoundError: If the hash is not recognized
273
+ CircuitBreakerOpenError: If circuit is open
274
+ """
275
+ async with self._graphql_throttler:
276
+ self.circuit_breaker.check()
277
+
278
+ if operation_name not in PERSISTED_QUERIES:
279
+ raise ValueError(f"Unknown operation: {operation_name}")
280
+
281
+ client = await self._get_client()
282
+
283
+ payload = {
284
+ "operationName": operation_name,
285
+ "variables": variables,
286
+ "extensions": {
287
+ "persistedQuery": {
288
+ "version": 1,
289
+ "sha256Hash": PERSISTED_QUERIES[operation_name],
290
+ }
291
+ },
292
+ }
293
+
294
+ try:
295
+ response = await client.post(self.base_url, json=payload)
296
+ response.raise_for_status()
297
+
298
+ data: Any = response.json()
299
+
300
+ # Check for persisted query errors
301
+ if "errors" in data:
302
+ for error in data["errors"]:
303
+ if "PersistedQueryNotFound" in str(error):
304
+ raise PersistedQueryNotFoundError(
305
+ f"Persisted query hash for '{operation_name}' is no longer valid"
306
+ )
307
+
308
+ raise GraphQLError(data["errors"])
309
+
310
+ self.circuit_breaker.record_success()
311
+
312
+ if isinstance(data, dict):
313
+ payload_data = data.get("data")
314
+ if isinstance(payload_data, dict):
315
+ return cast(dict[str, Any], payload_data)
316
+ return {}
317
+
318
+ except (httpx.HTTPError, GraphQLError) as e:
319
+ self.circuit_breaker.record_failure()
320
+ logger.error(
321
+ "Persisted query failed",
322
+ operation=operation_name,
323
+ error=str(e),
324
+ )
325
+ raise
326
+
327
+ @with_retry(config=RetryConfig(max_attempts=3, base_delay=1.0))
328
+ async def _fetch_nextjs_data(
329
+ self,
330
+ path: str,
331
+ params: dict[str, str] | None = None,
332
+ ) -> dict[str, Any]:
333
+ """Fetch data from Next.js _next/data endpoint.
334
+
335
+ Args:
336
+ path: The page path (e.g., "search" for /search)
337
+ params: Query parameters
338
+
339
+ Returns:
340
+ Page props data
341
+ """
342
+ self.circuit_breaker.check()
343
+
344
+ build_id = await self._get_build_id()
345
+ client = await self._get_client()
346
+
347
+ url = f"https://www.heb.com/_next/data/{build_id}/en/{path}.json"
348
+
349
+ try:
350
+ response = await client.get(url, params=params)
351
+ response.raise_for_status()
352
+
353
+ data: Any = response.json()
354
+ self.circuit_breaker.record_success()
355
+
356
+ # Next.js data is wrapped in pageProps
357
+ if not isinstance(data, dict):
358
+ return {}
359
+
360
+ page_props = data.get("pageProps")
361
+ if isinstance(page_props, dict):
362
+ return cast(dict[str, Any], page_props)
363
+ return cast(dict[str, Any], data)
364
+
365
+ except httpx.HTTPError as e:
366
+ self.circuit_breaker.record_failure()
367
+ logger.error(
368
+ "Next.js data fetch failed",
369
+ path=path,
370
+ error=str(e),
371
+ )
372
+ raise
373
+
374
+ async def search_stores(
375
+ self,
376
+ address: str,
377
+ radius_miles: int = 25,
378
+ ) -> StoreSearchResult:
379
+ """Search for HEB stores near an address.
380
+
381
+ Uses geocoding to handle informal location queries (neighborhoods,
382
+ landmarks) and tries multiple query variations against HEB's API.
383
+
384
+ Args:
385
+ address: Address, zip code, neighborhood, or landmark to search near
386
+ radius_miles: Search radius in miles
387
+
388
+ Returns:
389
+ StoreSearchResult with stores, geocoded location, and search feedback
390
+ """
391
+ logger.info(
392
+ "Searching for stores",
393
+ address=address,
394
+ radius_miles=radius_miles,
395
+ )
396
+
397
+ attempts: list[SearchAttempt] = []
398
+ geocoded: GeocodedLocation | None = None
399
+ geocoding_result: GeocodingResult | None = None
400
+
401
+ # Step 1: Geocode the address
402
+ geocoding_service = GeocodingService()
403
+ try:
404
+ geocoding_result = await geocoding_service.geocode(address)
405
+ if geocoding_result:
406
+ geocoded = GeocodedLocation(
407
+ latitude=geocoding_result.latitude,
408
+ longitude=geocoding_result.longitude,
409
+ display_name=geocoding_result.display_name,
410
+ )
411
+ logger.info(
412
+ "Geocoding successful",
413
+ address=address,
414
+ lat=geocoding_result.latitude,
415
+ lon=geocoding_result.longitude,
416
+ )
417
+ except Exception as e:
418
+ logger.warning("Geocoding failed", address=address, error=str(e))
419
+ finally:
420
+ await geocoding_service.close()
421
+
422
+ # Step 2: Generate query variations
423
+ if geocoding_result:
424
+ query_variations = geocoding_result.get_query_variations(address)
425
+ else:
426
+ # Geocoding failed - just try the original query
427
+ query_variations = [address]
428
+
429
+ # Step 3: Try each query variation until we get results
430
+ stores: list[Store] = []
431
+ for query in query_variations:
432
+ try:
433
+ result_stores = await self._execute_store_search(query, radius_miles)
434
+ attempts.append(SearchAttempt(
435
+ query=query,
436
+ result="success" if result_stores else "no_stores",
437
+ ))
438
+
439
+ if result_stores:
440
+ stores = result_stores
441
+ logger.info(
442
+ "Store search successful",
443
+ query=query,
444
+ result_count=len(stores),
445
+ )
446
+ break
447
+
448
+ except Exception as e:
449
+ logger.warning(
450
+ "Store search query failed",
451
+ query=query,
452
+ error=str(e),
453
+ )
454
+ attempts.append(SearchAttempt(query=query, result="error"))
455
+ continue
456
+
457
+ # Step 4: Calculate distances from geocoded point and sort
458
+ if stores and geocoding_result:
459
+ for store in stores:
460
+ if store.latitude is not None and store.longitude is not None:
461
+ store.distance_miles = GeocodingService.haversine_miles(
462
+ geocoding_result.latitude,
463
+ geocoding_result.longitude,
464
+ store.latitude,
465
+ store.longitude,
466
+ )
467
+ # Sort by calculated distance
468
+ stores.sort(
469
+ key=lambda s: (
470
+ s.distance_miles
471
+ if s.distance_miles is not None
472
+ else float("inf")
473
+ )
474
+ )
475
+
476
+ # Step 5: Build response with feedback
477
+ error: str | None = None
478
+ suggestions: list[str] = []
479
+
480
+ if not stores:
481
+ if not geocoding_result:
482
+ error = f"Couldn't locate '{address}'. Try a zip code or street address."
483
+ suggestions = [
484
+ "Use a Texas zip code (e.g., 77007)",
485
+ "Try a specific street address",
486
+ ]
487
+ else:
488
+ location = geocoded.display_name if geocoded else address
489
+ error = (
490
+ f"No HEB stores found within {radius_miles} miles of {location}."
491
+ )
492
+ suggestions = [
493
+ "HEB operates primarily in Texas",
494
+ "Try increasing the search radius",
495
+ "Verify this is a Texas location",
496
+ ]
497
+
498
+ return StoreSearchResult(
499
+ stores=stores,
500
+ count=len(stores),
501
+ search_address=address,
502
+ geocoded=geocoded,
503
+ attempts=attempts,
504
+ error=error,
505
+ suggestions=suggestions,
506
+ )
507
+
508
+ async def _execute_store_search(
509
+ self,
510
+ query: str,
511
+ radius_miles: int,
512
+ ) -> list[Store]:
513
+ """Execute a single store search query against HEB's API.
514
+
515
+ Args:
516
+ query: Search query (zip, city/state, or address)
517
+ radius_miles: Search radius in miles
518
+
519
+ Returns:
520
+ List of stores (may be empty)
521
+ """
522
+ data = await self._execute_persisted_query(
523
+ "StoreSearch",
524
+ {
525
+ "address": query,
526
+ "radius": radius_miles,
527
+ "fulfillmentChannels": [],
528
+ "includeEcommInactive": False,
529
+ "retailFormatCodes": ["P", "NP"],
530
+ },
531
+ )
532
+
533
+ stores = []
534
+ # API returns data in searchStoresByAddress.stores (not storeSearch)
535
+ store_search_data = data.get("searchStoresByAddress", {}) or data.get("storeSearch", {})
536
+ store_list = store_search_data.get("stores", [])
537
+
538
+ for store_result in store_list:
539
+ try:
540
+ store = self._parse_store_result(store_result)
541
+ if store:
542
+ stores.append(store)
543
+ except Exception as e:
544
+ logger.debug("Failed to parse store data", error=str(e))
545
+ continue
546
+
547
+ return stores
548
+
549
+ def _parse_store_result(self, store_result: dict[str, Any]) -> Store | None:
550
+ """Parse store result from searchStoresByAddress response.
551
+
552
+ The API returns results with distanceMiles at top level and
553
+ store details nested in a 'store' object.
554
+
555
+ Args:
556
+ store_result: Store result dict from GraphQL response
557
+
558
+ Returns:
559
+ Store object or None if parsing fails
560
+ """
561
+ # Distance is at the top level
562
+ distance = store_result.get("distanceMiles")
563
+
564
+ # Store details are nested
565
+ store_data = store_result.get("store", store_result)
566
+
567
+ store_id = store_data.get("storeNumber") or store_data.get("id")
568
+ if not store_id:
569
+ return None
570
+
571
+ name = store_data.get("name", "")
572
+
573
+ # Build address from components (new format uses streetAddress/locality/region)
574
+ address_obj = store_data.get("address", {})
575
+ address_parts = []
576
+
577
+ street = address_obj.get("streetAddress") or store_data.get("address1", "")
578
+ if street:
579
+ address_parts.append(street)
580
+
581
+ city = address_obj.get("locality") or store_data.get("city", "")
582
+ state = address_obj.get("region") or store_data.get("state", "")
583
+ postal_code = address_obj.get("postalCode") or store_data.get("postalCode", "")
584
+
585
+ if city and state:
586
+ address_parts.append(f"{city}, {state} {postal_code}".strip())
587
+
588
+ address = ", ".join(address_parts) if address_parts else ""
589
+
590
+ # Extract coordinates
591
+ latitude = store_data.get("latitude")
592
+ longitude = store_data.get("longitude")
593
+
594
+ # Extract fulfillment channels to determine curbside/delivery support
595
+ # API returns data in storeFulfillments array with objects like {"name": "CURBSIDE_PICKUP"}
596
+ store_fulfillments = store_data.get("storeFulfillments", None)
597
+ if store_fulfillments is not None:
598
+ # Build list of fulfillment channel names
599
+ fulfillment_names = [
600
+ f.get("name", "")
601
+ for f in store_fulfillments
602
+ if isinstance(f, dict)
603
+ ]
604
+ # Curbside = any fulfillment containing "CURBSIDE" (CURBSIDE_PICKUP, CURBSIDE_DELIVERY)
605
+ supports_curbside = any("CURBSIDE" in name for name in fulfillment_names)
606
+ # Delivery = ALCOHOL_DELIVERY or DELIVERY channel
607
+ supports_delivery = any(
608
+ "DELIVERY" in name and "CURBSIDE" not in name
609
+ for name in fulfillment_names
610
+ )
611
+ else:
612
+ # Legacy format: check fulfillmentChannels array of strings
613
+ fulfillment_channels = store_data.get("fulfillmentChannels", None)
614
+ if fulfillment_channels is not None:
615
+ supports_curbside = (
616
+ "PICKUP" in fulfillment_channels
617
+ or "CURBSIDE" in fulfillment_channels
618
+ )
619
+ supports_delivery = "DELIVERY" in fulfillment_channels
620
+ else:
621
+ # No fulfillment data - default to True for curbside (most stores support it)
622
+ supports_curbside = True
623
+ supports_delivery = False
624
+
625
+ return Store(
626
+ store_id=str(store_id),
627
+ name=name,
628
+ address=address,
629
+ phone=store_data.get("phone", ""),
630
+ latitude=float(latitude) if latitude else None,
631
+ longitude=float(longitude) if longitude else None,
632
+ distance_miles=float(distance) if distance else None,
633
+ supports_curbside=supports_curbside,
634
+ supports_delivery=supports_delivery,
635
+ )
636
+
637
+ def _parse_store_data(self, store_data: dict[str, Any]) -> Store | None:
638
+ """Parse store data from legacy StoreSearch response format.
639
+
640
+ Args:
641
+ store_data: Store dict from GraphQL response
642
+
643
+ Returns:
644
+ Store object or None if parsing fails
645
+ """
646
+ store_id = store_data.get("id") or store_data.get("storeNumber")
647
+ if not store_id:
648
+ return None
649
+
650
+ name = store_data.get("name", "")
651
+
652
+ # Build address from components
653
+ address_obj = store_data.get("address") or {}
654
+ address_parts = []
655
+ address1 = store_data.get("address1") or address_obj.get("address1", "")
656
+ if address1:
657
+ address_parts.append(address1)
658
+
659
+ city = store_data.get("city") or address_obj.get("city", "")
660
+ state = store_data.get("state") or address_obj.get("state", "")
661
+ postal_code = store_data.get("postalCode") or address_obj.get("postalCode", "")
662
+
663
+ if city and state:
664
+ address_parts.append(f"{city}, {state} {postal_code}".strip())
665
+
666
+ address = ", ".join(address_parts) if address_parts else ""
667
+
668
+ # Extract coordinates
669
+ latitude = store_data.get("latitude") or store_data.get("location", {}).get("latitude")
670
+ longitude = store_data.get("longitude") or store_data.get("location", {}).get("longitude")
671
+
672
+ # Extract distance if available
673
+ distance = store_data.get("distance") or store_data.get("distanceFromSearchLocation")
674
+
675
+ return Store(
676
+ store_id=str(store_id),
677
+ name=name,
678
+ address=address,
679
+ phone=store_data.get("phone", ""),
680
+ latitude=float(latitude) if latitude else None,
681
+ longitude=float(longitude) if longitude else None,
682
+ distance_miles=float(distance) if distance else None,
683
+ )
684
+
685
+ def _generate_query_variations(self, query: str) -> list[str]:
686
+ """Generate query variations to improve search results.
687
+
688
+ HEB's search is sensitive to exact query wording. This generates
689
+ variations to try when the original query returns no results.
690
+
691
+ Args:
692
+ query: Original search query
693
+
694
+ Returns:
695
+ List of query variations to try (original query first)
696
+ """
697
+ variations = [query] # Always try original first
698
+ query_lower = query.lower()
699
+
700
+ # Expand common abbreviations
701
+ expanded = query
702
+ abbreviations = {
703
+ "ny ": "new york ",
704
+ "NY ": "New York ",
705
+ "heb ": "H-E-B ",
706
+ "HEB ": "H-E-B ",
707
+ }
708
+ for abbrev, full in abbreviations.items():
709
+ if abbrev.lower() in query_lower:
710
+ expanded = query.replace(abbrev.strip(), full.strip())
711
+ if expanded != query:
712
+ variations.append(expanded)
713
+ break
714
+
715
+ # Add "Meal Simple" prefix for meal-related queries
716
+ meal_keywords = ["steak", "chicken", "salmon", "pork", "beef", "shrimp",
717
+ "asparagus", "potato", "meatloaf", "alfredo", "enchilada",
718
+ "jambalaya", "bowl", "dinner", "entree"]
719
+ if (
720
+ any(kw in query_lower for kw in meal_keywords)
721
+ and "meal simple" not in query_lower
722
+ ):
723
+ variations.append(f"Meal Simple {query}")
724
+
725
+ # Add "H-E-B" prefix if not present
726
+ if "h-e-b" not in query_lower and "heb" not in query_lower:
727
+ variations.append(f"H-E-B {query}")
728
+
729
+ # Remove duplicates while preserving order
730
+ seen = set()
731
+ unique_variations = []
732
+ for v in variations:
733
+ v_lower = v.lower()
734
+ if v_lower not in seen:
735
+ seen.add(v_lower)
736
+ unique_variations.append(v)
737
+
738
+ return unique_variations
739
+
740
+ def _detect_security_challenge(self, html: str) -> bool:
741
+ """Detect if response is a WAF/captcha security challenge page.
742
+
743
+ HEB uses Incapsula (Imperva) WAF which may return challenge pages
744
+ instead of actual content when bot detection is triggered.
745
+
746
+ Args:
747
+ html: Response HTML content
748
+
749
+ Returns:
750
+ True if response appears to be a security challenge
751
+ """
752
+ challenge_indicators = [
753
+ "incapsula",
754
+ "reese84",
755
+ "_Incapsula_Resource",
756
+ "challenge-platform",
757
+ "cf-browser-verification",
758
+ "captcha",
759
+ "blocked",
760
+ "access denied",
761
+ "please verify you are a human",
762
+ "enable javascript and cookies",
763
+ ]
764
+ html_lower = html.lower()
765
+ return any(indicator in html_lower for indicator in challenge_indicators)
766
+
767
+ def _determine_fallback_reason(
768
+ self,
769
+ was_authenticated: bool,
770
+ security_challenge: bool,
771
+ attempts: list[ProductSearchAttempt],
772
+ ) -> str:
773
+ """Determine human-readable reason for fallback to typeahead.
774
+
775
+ Args:
776
+ was_authenticated: Whether auth cookies were available
777
+ security_challenge: Whether a security challenge was detected
778
+ attempts: List of search attempts made
779
+
780
+ Returns:
781
+ Human-readable explanation of why fallback was used
782
+ """
783
+ if not was_authenticated:
784
+ return "No authentication cookies available"
785
+ if security_challenge:
786
+ return (
787
+ "Security challenge (WAF/captcha) blocked API requests. "
788
+ "Use session_refresh (Playwright) to refresh the session."
789
+ )
790
+ if all(a.result == "empty" for a in attempts if a.method in ("ssr", "typeahead_as_ssr")):
791
+ return "All SSR queries returned empty results - product may not exist"
792
+ if all(a.result == "error" for a in attempts if a.method in ("ssr", "typeahead_as_ssr")):
793
+ return "All SSR queries failed with errors"
794
+ return "SSR search unsuccessful"
795
+
796
+ def _get_session_refresh_instructions(self) -> list[str]:
797
+ """Get Playwright instructions for refreshing the session.
798
+
799
+ When session tokens are stale, use Playwright to refresh
800
+ the bot detection tokens before retrying API calls.
801
+
802
+ Returns:
803
+ Step-by-step instructions for session refresh
804
+ """
805
+ settings = get_settings()
806
+ return [
807
+ "Session refresh required. Run these Playwright commands:",
808
+ "",
809
+ "1. browser_navigate('https://www.heb.com')",
810
+ "",
811
+ "2. browser_wait_for({ time: 3 }) # Wait for bot detection to initialize",
812
+ "",
813
+ "3. browser_type('[data-qe-id=\"headerSearchInput\"]', 'test')",
814
+ "",
815
+ "4. browser_press_key('Enter')",
816
+ "",
817
+ "5. browser_wait_for({ selector: '[data-qe-id=\"productCard\"]', timeout: 10000 })",
818
+ "",
819
+ (
820
+ "6. browser_run_code with: await page.context().storageState({ path: '"
821
+ f"{settings.auth_state_path}"
822
+ "' })"
823
+ ),
824
+ "",
825
+ "Then retry your search.",
826
+ ]
827
+
828
+ def _get_playwright_search_instructions(self, query: str, store_id: str) -> list[str]:
829
+ """Get instructions for using Playwright MCP to perform the search.
830
+
831
+ When security challenges block httpx requests, Playwright can
832
+ bypass them because it runs in a real browser.
833
+
834
+ Args:
835
+ query: Original search query
836
+ store_id: Store ID for context
837
+
838
+ Returns:
839
+ Step-by-step instructions for Playwright-based search
840
+ """
841
+ encoded_query = query.replace(" ", "+")
842
+ return [
843
+ "Use Playwright MCP to search (bypasses bot detection):",
844
+ "",
845
+ f"1. browser_navigate('https://www.heb.com/search?q={encoded_query}')",
846
+ "",
847
+ "2. Wait for results to load:",
848
+ " browser_wait_for({ selector: '[data-qe-id=\"productCard\"]', timeout: 10000 })",
849
+ "",
850
+ "3. Take a snapshot to see the results:",
851
+ " browser_snapshot()",
852
+ "",
853
+ "4. Extract product data (optional - run in browser):",
854
+ " browser_run_code with:",
855
+ " ```javascript",
856
+ " const products = [...document.querySelectorAll('[data-qe-id=\"productCard\"]')]",
857
+ " .slice(0, 20)",
858
+ " .map(card => ({",
859
+ (
860
+ " name: card.querySelector('[data-qe-id=\"productTitle\"]')"
861
+ "?.textContent?.trim(),"
862
+ ),
863
+ (
864
+ " price: card.querySelector('[data-qe-id=\"productPrice\"]')"
865
+ "?.textContent?.trim(),"
866
+ ),
867
+ " sku: card.dataset.productId || card.querySelector('[data-sku]')?.dataset?.sku,",
868
+ " }));",
869
+ " return JSON.stringify(products, null, 2);",
870
+ " ```",
871
+ "",
872
+ "5. After browsing, save refreshed session cookies:",
873
+ (
874
+ " browser_run_code with: await page.context().storageState({ path: "
875
+ "'~/.texas-grocery-mcp/auth.json' })"
876
+ ),
877
+ ]
878
+
879
+ async def search_products(
880
+ self,
881
+ query: str,
882
+ store_id: str,
883
+ limit: int = 20,
884
+ ) -> ProductSearchResult:
885
+ """Search for products at a store.
886
+
887
+ Tries authenticated search first (fast, full data), falls back to
888
+ typeahead suggestions if no auth cookies available. When authenticated
889
+ search returns no results, tries query variations before falling back.
890
+
891
+ Args:
892
+ query: Search query
893
+ store_id: Store ID for inventory/pricing
894
+ limit: Maximum results to return
895
+
896
+ Returns:
897
+ ProductSearchResult with products and diagnostic metadata
898
+ """
899
+
900
+ attempts: list[ProductSearchAttempt] = []
901
+ security_challenge_detected = False
902
+ search_url = f"https://www.heb.com/search?q={query.replace(' ', '+')}"
903
+
904
+ # Try authenticated search first
905
+ auth_client = await self._get_authenticated_client()
906
+ if auth_client:
907
+ # Generate query variations to try
908
+ query_variations = self._generate_query_variations(query)
909
+
910
+ for variation in query_variations:
911
+ try:
912
+ products, was_challenge = await self._search_products_ssr(
913
+ auth_client, variation, store_id, limit
914
+ )
915
+
916
+ if was_challenge:
917
+ security_challenge_detected = True
918
+ attempts.append(ProductSearchAttempt(
919
+ query=variation,
920
+ method="ssr",
921
+ result="security_challenge",
922
+ ))
923
+ logger.error(
924
+ (
925
+ "Security challenge detected - stopping search attempts, "
926
+ "session refresh required"
927
+ ),
928
+ query=variation,
929
+ )
930
+ # Fail-fast: don't waste more queries, session needs refresh
931
+ break
932
+
933
+ if products:
934
+ attempts.append(ProductSearchAttempt(
935
+ query=variation,
936
+ method="ssr",
937
+ result="success",
938
+ ))
939
+ logger.info(
940
+ "SSR search successful",
941
+ original_query=query,
942
+ effective_query=variation,
943
+ result_count=len(products),
944
+ )
945
+ return ProductSearchResult(
946
+ products=products,
947
+ count=len(products),
948
+ query=query,
949
+ store_id=store_id,
950
+ data_source="ssr",
951
+ authenticated=True,
952
+ attempts=attempts,
953
+ search_url=search_url,
954
+ )
955
+ else:
956
+ attempts.append(ProductSearchAttempt(
957
+ query=variation,
958
+ method="ssr",
959
+ result="empty",
960
+ ))
961
+
962
+ except Exception as e:
963
+ attempts.append(ProductSearchAttempt(
964
+ query=variation,
965
+ method="ssr",
966
+ result="error",
967
+ error_detail=str(e),
968
+ ))
969
+ logger.warning(
970
+ "Authenticated search failed for variation",
971
+ query=variation,
972
+ error=str(e),
973
+ )
974
+ continue
975
+
976
+ # If all variations failed, try using typeahead suggestions as queries
977
+ # Skip this if security challenge was detected - no point in trying more SSR requests
978
+ if not security_challenge_detected:
979
+ try:
980
+ suggestions = await self.get_typeahead(query)
981
+ if suggestions:
982
+ for suggestion in suggestions[:2]: # Try top 2 suggestions
983
+ try:
984
+ products, was_challenge = await self._search_products_ssr(
985
+ auth_client, suggestion, store_id, limit
986
+ )
987
+
988
+ if was_challenge:
989
+ security_challenge_detected = True
990
+ attempts.append(ProductSearchAttempt(
991
+ query=suggestion,
992
+ method="typeahead_as_ssr",
993
+ result="security_challenge",
994
+ ))
995
+ # Fail-fast: don't try more suggestions
996
+ break
997
+
998
+ if products:
999
+ attempts.append(ProductSearchAttempt(
1000
+ query=suggestion,
1001
+ method="typeahead_as_ssr",
1002
+ result="success",
1003
+ ))
1004
+ logger.info(
1005
+ "SSR search successful via typeahead suggestion",
1006
+ original_query=query,
1007
+ suggestion_used=suggestion,
1008
+ result_count=len(products),
1009
+ )
1010
+ return ProductSearchResult(
1011
+ products=products,
1012
+ count=len(products),
1013
+ query=query,
1014
+ store_id=store_id,
1015
+ data_source="ssr",
1016
+ authenticated=True,
1017
+ attempts=attempts,
1018
+ search_url=search_url,
1019
+ )
1020
+ else:
1021
+ attempts.append(ProductSearchAttempt(
1022
+ query=suggestion,
1023
+ method="typeahead_as_ssr",
1024
+ result="empty",
1025
+ ))
1026
+
1027
+ except Exception as e:
1028
+ attempts.append(ProductSearchAttempt(
1029
+ query=suggestion,
1030
+ method="typeahead_as_ssr",
1031
+ result="error",
1032
+ error_detail=str(e),
1033
+ ))
1034
+ continue
1035
+ except Exception as e:
1036
+ logger.debug("Typeahead-guided search failed", error=str(e))
1037
+
1038
+ # Fallback to typeahead suggestions only
1039
+ fallback_reason = self._determine_fallback_reason(
1040
+ was_authenticated=auth_client is not None,
1041
+ security_challenge=security_challenge_detected,
1042
+ attempts=attempts,
1043
+ )
1044
+
1045
+ logger.info(
1046
+ "Product search using typeahead fallback",
1047
+ query=query,
1048
+ store_id=store_id,
1049
+ fallback_reason=fallback_reason,
1050
+ security_challenge=security_challenge_detected,
1051
+ )
1052
+
1053
+ # Get Playwright instructions if security challenge was detected
1054
+ playwright_instructions = None
1055
+ if security_challenge_detected:
1056
+ playwright_instructions = self._get_playwright_search_instructions(query, store_id)
1057
+
1058
+ try:
1059
+ suggestions = await self.get_typeahead(query)
1060
+ except Exception as e:
1061
+ logger.error("Product search failed", query=query, error=str(e))
1062
+ return ProductSearchResult(
1063
+ products=[],
1064
+ count=0,
1065
+ query=query,
1066
+ store_id=store_id,
1067
+ data_source="typeahead_suggestions",
1068
+ authenticated=auth_client is not None,
1069
+ fallback_reason=fallback_reason,
1070
+ security_challenge_detected=security_challenge_detected,
1071
+ attempts=attempts,
1072
+ search_url=search_url,
1073
+ playwright_fallback_available=security_challenge_detected,
1074
+ playwright_instructions=playwright_instructions,
1075
+ )
1076
+
1077
+ # Return suggestions as placeholder products
1078
+ products = []
1079
+ for suggestion in suggestions[:limit]:
1080
+ product = Product(
1081
+ sku=f"suggestion-{suggestion.lower().replace(' ', '-')}",
1082
+ name=suggestion,
1083
+ price=0.0, # Price unavailable via typeahead
1084
+ available=True,
1085
+ brand=None,
1086
+ size=None,
1087
+ price_per_unit=None,
1088
+ image_url=None,
1089
+ aisle=None,
1090
+ on_sale=False,
1091
+ original_price=None,
1092
+ )
1093
+ products.append(product)
1094
+ attempts.append(ProductSearchAttempt(
1095
+ query=suggestion,
1096
+ method="typeahead",
1097
+ result="success",
1098
+ ))
1099
+
1100
+ return ProductSearchResult(
1101
+ products=products,
1102
+ count=len(products),
1103
+ query=query,
1104
+ store_id=store_id,
1105
+ data_source="typeahead_suggestions",
1106
+ authenticated=auth_client is not None,
1107
+ fallback_reason=fallback_reason,
1108
+ security_challenge_detected=security_challenge_detected,
1109
+ attempts=attempts,
1110
+ search_url=search_url,
1111
+ playwright_fallback_available=security_challenge_detected,
1112
+ playwright_instructions=playwright_instructions,
1113
+ )
1114
+
1115
+ # ========================================================================
1116
+ # Product Details
1117
+ # ========================================================================
1118
+
1119
+ async def get_product_details(
1120
+ self,
1121
+ product_id: str,
1122
+ store_id: str | None = None,
1123
+ ) -> ProductDetails | None:
1124
+ """Get comprehensive details for a single product.
1125
+
1126
+ Fetches the product detail page via SSR and extracts full product
1127
+ information including ingredients, nutrition, warnings, and instructions.
1128
+
1129
+ Results are cached for 24 hours to reduce API calls since product
1130
+ details rarely change.
1131
+
1132
+ Args:
1133
+ product_id: The product ID (e.g., '127074')
1134
+ store_id: Optional store ID (uses session's store if not provided)
1135
+
1136
+ Returns:
1137
+ ProductDetails with full product information, or None if not found
1138
+ """
1139
+
1140
+ # Check cache first
1141
+ cache_key = f"{product_id}:{store_id or 'default'}"
1142
+ cached = self._product_details_cache.get(cache_key)
1143
+ if cached:
1144
+ logger.info(
1145
+ "Product details cache hit",
1146
+ product_id=product_id,
1147
+ name=cached.name,
1148
+ )
1149
+ return cached
1150
+
1151
+ # Pre-fetch build ID before getting auth client
1152
+ # (prevents client lifecycle issues since _get_build_id may create a client)
1153
+ await self._get_build_id()
1154
+
1155
+ auth_client = await self._get_authenticated_client()
1156
+ if not auth_client:
1157
+ logger.warning("No authenticated client for product details")
1158
+ # Try with unauthenticated client as fallback
1159
+ auth_client = await self._get_client()
1160
+
1161
+ try:
1162
+ details = await self._get_product_details_ssr(auth_client, product_id)
1163
+ if details:
1164
+ # Cache the result
1165
+ self._product_details_cache.set(cache_key, details)
1166
+ logger.info(
1167
+ "Product details fetched and cached",
1168
+ product_id=product_id,
1169
+ name=details.name,
1170
+ )
1171
+ return details
1172
+ except Exception as e:
1173
+ logger.error(
1174
+ "Failed to get product details",
1175
+ product_id=product_id,
1176
+ error=str(e),
1177
+ )
1178
+ return None
1179
+
1180
+ def get_product_details_cache_stats(self) -> dict[str, Any]:
1181
+ """Get statistics about the product details cache.
1182
+
1183
+ Returns:
1184
+ Dict with cache stats (size, valid_entries, ttl_hours, etc.)
1185
+ """
1186
+ return self._product_details_cache.stats()
1187
+
1188
+ def clear_product_details_cache(self) -> None:
1189
+ """Clear the product details cache."""
1190
+ self._product_details_cache.clear()
1191
+
1192
+ @with_retry(config=RetryConfig(max_attempts=2, base_delay=0.5))
1193
+ async def _get_product_details_ssr(
1194
+ self,
1195
+ client: httpx.AsyncClient,
1196
+ product_id: str,
1197
+ ) -> ProductDetails | None:
1198
+ """Fetch product details via SSR data endpoint.
1199
+
1200
+ Args:
1201
+ client: HTTP client (authenticated preferred)
1202
+ product_id: Product ID to fetch
1203
+
1204
+ Returns:
1205
+ ProductDetails or None if not found/error
1206
+ """
1207
+
1208
+ async with self._ssr_throttler:
1209
+ self.circuit_breaker.check()
1210
+
1211
+ # Get build ID for SSR endpoint
1212
+ build_id = await self._get_build_id()
1213
+
1214
+ url = f"https://www.heb.com/_next/data/{build_id}/en/product-detail/{product_id}.json"
1215
+ logger.debug("Fetching product details SSR", url=url, product_id=product_id)
1216
+
1217
+ try:
1218
+ response = await client.get(url)
1219
+
1220
+ # 404 means product doesn't exist
1221
+ if response.status_code == 404:
1222
+ logger.info("Product not found", product_id=product_id)
1223
+ return None
1224
+
1225
+ response.raise_for_status()
1226
+
1227
+ # Check for security challenge
1228
+ if response.headers.get(
1229
+ "content-type", ""
1230
+ ).startswith("text/html") and self._detect_security_challenge(
1231
+ response.text
1232
+ ):
1233
+ logger.warning(
1234
+ "Security challenge detected in product details response",
1235
+ product_id=product_id,
1236
+ )
1237
+ return None
1238
+
1239
+ data = response.json()
1240
+
1241
+ # Try standard Next.js SSR structure first
1242
+ product_data = data.get("pageProps", {}).get("product")
1243
+
1244
+ # Fallback to props wrapper if needed
1245
+ if not product_data:
1246
+ product_data = data.get("props", {}).get("pageProps", {}).get("product")
1247
+
1248
+ if not product_data:
1249
+ page_props_keys = (
1250
+ list(data.get("pageProps", {}).keys())
1251
+ if "pageProps" in data
1252
+ else None
1253
+ )
1254
+ logger.warning(
1255
+ "No product data in response",
1256
+ product_id=product_id,
1257
+ response_keys=list(data.keys()),
1258
+ pageProps_keys=page_props_keys,
1259
+ )
1260
+ return None
1261
+
1262
+ self.circuit_breaker.record_success()
1263
+ return self._parse_product_details(product_data)
1264
+
1265
+ except httpx.HTTPStatusError as e:
1266
+ logger.error(
1267
+ "HTTP error fetching product details",
1268
+ product_id=product_id,
1269
+ status=e.response.status_code,
1270
+ )
1271
+ self.circuit_breaker.record_failure()
1272
+ return None
1273
+ except Exception as e:
1274
+ logger.error(
1275
+ "Error fetching product details",
1276
+ product_id=product_id,
1277
+ error=str(e),
1278
+ )
1279
+ self.circuit_breaker.record_failure()
1280
+ raise
1281
+
1282
+ def _parse_product_details(self, data: dict[str, Any]) -> ProductDetails:
1283
+ """Parse product detail JSON into ProductDetails model.
1284
+
1285
+ Args:
1286
+ data: Raw product data from __NEXT_DATA__ pageProps.product
1287
+
1288
+ Returns:
1289
+ Parsed ProductDetails model
1290
+ """
1291
+ from texas_grocery_mcp.models.product import (
1292
+ ExtendedNutrition,
1293
+ ProductDetails,
1294
+ )
1295
+
1296
+ # Extract basic info
1297
+ product_id = str(data.get("id", ""))
1298
+ name = data.get("fullDisplayName", "")
1299
+
1300
+ # Extract SKU info (use first SKU)
1301
+ skus = data.get("SKUs", [])
1302
+ sku_data = skus[0] if skus else {}
1303
+ sku = str(sku_data.get("id", ""))
1304
+ upc = sku_data.get("twelveDigitUPC")
1305
+ size = sku_data.get("customerFriendlySize")
1306
+
1307
+ # Extract brand
1308
+ brand_info = data.get("brand", {})
1309
+ brand = brand_info.get("name") if brand_info else None
1310
+ is_own_brand = brand_info.get("isOwnBrand", False) if brand_info else False
1311
+
1312
+ # Extract pricing from context prices
1313
+ price = 0.0
1314
+ price_online = None
1315
+ on_sale = False
1316
+ is_price_cut = False
1317
+ price_per_unit = None
1318
+
1319
+ context_prices = sku_data.get("contextPrices", [])
1320
+ for cp in context_prices:
1321
+ context = cp.get("context", "")
1322
+ list_price = cp.get("listPrice", {}).get("amount", 0.0)
1323
+ sale_price = cp.get("salePrice", {}).get("amount", list_price)
1324
+ unit_price = cp.get("unitListPrice", {})
1325
+
1326
+ if context == "CURBSIDE":
1327
+ price = sale_price if cp.get("isOnSale") else list_price
1328
+ on_sale = cp.get("isOnSale", False)
1329
+ is_price_cut = cp.get("isPriceCut", False)
1330
+ if unit_price:
1331
+ formatted_amount = unit_price.get("formattedAmount", "")
1332
+ unit = unit_price.get("unit", "")
1333
+ price_per_unit = f"{formatted_amount} / {unit}"
1334
+ elif context == "ONLINE":
1335
+ price_online = sale_price if cp.get("isOnSale") else list_price
1336
+
1337
+ # Extract availability
1338
+ inventory = data.get("inventory", {})
1339
+ available = inventory.get("inventoryState") == "IN_STOCK"
1340
+
1341
+ # Extract availability channels
1342
+ availability_channels = sku_data.get("productAvailability", [])
1343
+
1344
+ # Extract ingredients (string, not list)
1345
+ ingredients = data.get("ingredientStatement")
1346
+
1347
+ # Extract safety warning
1348
+ safety_warning = data.get("safetyWarning")
1349
+
1350
+ # Extract instructions
1351
+ instructions = data.get("preparationInstructions")
1352
+
1353
+ # Extract dietary attributes from lifestyles
1354
+ lifestyles = data.get("lifestyles", [])
1355
+ dietary_attributes = [
1356
+ lifestyle.get("formattedName", "")
1357
+ for lifestyle in lifestyles
1358
+ if lifestyle.get("formattedName")
1359
+ ]
1360
+
1361
+ # Extract nutrition labels
1362
+ nutrition = None
1363
+ nutrition_labels = data.get("nutritionLabels", [])
1364
+ if nutrition_labels:
1365
+ nl = nutrition_labels[0]
1366
+ nutrients = self._parse_nutrients(nl.get("nutrients", []))
1367
+ vitamins = self._parse_nutrients(nl.get("vitaminsAndMinerals", []))
1368
+
1369
+ nutrition = ExtendedNutrition(
1370
+ serving_size=nl.get("servingSize"),
1371
+ servings_per_container=nl.get("servingsPerContainer"),
1372
+ calories=nl.get("calories"),
1373
+ label_modifier=nl.get("labelModifier"),
1374
+ nutrients=nutrients,
1375
+ vitamins_and_minerals=vitamins,
1376
+ )
1377
+
1378
+ # Extract category path from breadcrumbs
1379
+ breadcrumbs = data.get("breadcrumbs", [])
1380
+ category_path = [b.get("title", "") for b in breadcrumbs if b.get("title")]
1381
+ # Remove "H-E-B" from path if present (it's always first)
1382
+ if category_path and category_path[0] == "H-E-B":
1383
+ category_path = category_path[1:]
1384
+
1385
+ # Extract images
1386
+ image_url = None
1387
+ product_images = data.get("productImageUrls", [])
1388
+ if product_images:
1389
+ # Prefer MEDIUM size
1390
+ for img in product_images:
1391
+ if img.get("size") == "MEDIUM":
1392
+ image_url = img.get("url")
1393
+ break
1394
+ if not image_url and product_images:
1395
+ image_url = product_images[0].get("url")
1396
+
1397
+ images = data.get("carouselImageUrls", [])
1398
+
1399
+ # Extract location
1400
+ location = None
1401
+ product_location = data.get("productLocation", {})
1402
+ if product_location:
1403
+ location = product_location.get("location")
1404
+
1405
+ # Extract store ID
1406
+ store_id = data.get("storeId")
1407
+
1408
+ # Extract SNAP eligibility
1409
+ is_snap_eligible = data.get("isEbtSnapProduct", False)
1410
+
1411
+ # Extract product URL
1412
+ product_url = data.get("productPageURL")
1413
+
1414
+ # Extract description
1415
+ description = data.get("productDescription")
1416
+
1417
+ return ProductDetails(
1418
+ product_id=product_id,
1419
+ sku=sku,
1420
+ upc=upc,
1421
+ name=name,
1422
+ description=description,
1423
+ brand=brand,
1424
+ is_own_brand=is_own_brand,
1425
+ price=price,
1426
+ price_online=price_online,
1427
+ on_sale=on_sale,
1428
+ is_price_cut=is_price_cut,
1429
+ available=available,
1430
+ price_per_unit=price_per_unit,
1431
+ size=size,
1432
+ ingredients=ingredients,
1433
+ safety_warning=safety_warning,
1434
+ instructions=instructions,
1435
+ dietary_attributes=dietary_attributes,
1436
+ nutrition=nutrition,
1437
+ category_path=category_path,
1438
+ image_url=image_url,
1439
+ images=images,
1440
+ location=location,
1441
+ store_id=store_id,
1442
+ availability_channels=availability_channels,
1443
+ is_snap_eligible=is_snap_eligible,
1444
+ product_url=product_url,
1445
+ )
1446
+
1447
+ def _parse_nutrients(self, nutrients_data: list[dict[str, Any]]) -> list[NutrientInfo]:
1448
+ """Parse nutrients list with nested sub_items.
1449
+
1450
+ Args:
1451
+ nutrients_data: List of nutrient dicts from API
1452
+
1453
+ Returns:
1454
+ List of NutrientInfo models
1455
+ """
1456
+ from texas_grocery_mcp.models.product import NutrientInfo
1457
+
1458
+ result = []
1459
+ for n in nutrients_data:
1460
+ sub_items = None
1461
+ if n.get("subItems"):
1462
+ sub_items = self._parse_nutrients(n["subItems"])
1463
+
1464
+ result.append(NutrientInfo(
1465
+ title=n.get("title", ""),
1466
+ unit=n.get("unit", ""),
1467
+ percentage=n.get("percentage"),
1468
+ font_modifier=n.get("fontModifier"),
1469
+ sub_items=sub_items,
1470
+ ))
1471
+ return result
1472
+
1473
+ @with_retry(config=RetryConfig(max_attempts=2, base_delay=0.5))
1474
+ async def _search_products_ssr(
1475
+ self,
1476
+ client: httpx.AsyncClient,
1477
+ query: str,
1478
+ store_id: str,
1479
+ limit: int = 20,
1480
+ ) -> tuple[list[Product], bool]:
1481
+ """Search products using authenticated SSR page fetch.
1482
+
1483
+ Fetches the search results page HTML and extracts product data
1484
+ from the embedded __NEXT_DATA__ JSON.
1485
+
1486
+ Args:
1487
+ client: Authenticated httpx client with cookies
1488
+ query: Search query
1489
+ store_id: Store ID (used for context)
1490
+ limit: Maximum results to return
1491
+
1492
+ Returns:
1493
+ Tuple of (products list, security_challenge_detected)
1494
+ """
1495
+ async with self._ssr_throttler:
1496
+ self.circuit_breaker.check()
1497
+
1498
+ url = f"https://www.heb.com/search?q={query.replace(' ', '+')}"
1499
+ logger.debug("Fetching SSR search results", url=url)
1500
+
1501
+ try:
1502
+ response = await client.get(url)
1503
+ response.raise_for_status()
1504
+
1505
+ # Check for security challenge before parsing
1506
+ if self._detect_security_challenge(response.text):
1507
+ logger.warning(
1508
+ "Security challenge detected in SSR response",
1509
+ query=query,
1510
+ response_length=len(response.text),
1511
+ )
1512
+ self.circuit_breaker.record_failure()
1513
+ return [], True
1514
+
1515
+ # Extract __NEXT_DATA__ JSON from HTML
1516
+ match = re.search(
1517
+ r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
1518
+ response.text,
1519
+ re.DOTALL,
1520
+ )
1521
+
1522
+ if not match:
1523
+ logger.warning(
1524
+ "No __NEXT_DATA__ found in response",
1525
+ query=query,
1526
+ response_length=len(response.text),
1527
+ )
1528
+ return [], False
1529
+
1530
+ next_data = json.loads(match.group(1))
1531
+ products = self._parse_ssr_products(next_data, limit)
1532
+
1533
+ self.circuit_breaker.record_success()
1534
+ logger.info(
1535
+ "SSR product search successful",
1536
+ query=query,
1537
+ result_count=len(products),
1538
+ )
1539
+
1540
+ return products, False
1541
+
1542
+ except httpx.HTTPError as e:
1543
+ self.circuit_breaker.record_failure()
1544
+ logger.error("SSR search request failed", query=query, error=str(e))
1545
+ raise
1546
+
1547
+ def _parse_ssr_products(self, next_data: dict[str, Any], limit: int = 20) -> list[Product]:
1548
+ """Parse products from Next.js SSR data.
1549
+
1550
+ Extracts product data from the searchGridV2 component in the
1551
+ page props layout.
1552
+
1553
+ Args:
1554
+ next_data: Parsed __NEXT_DATA__ JSON
1555
+ limit: Maximum products to return
1556
+
1557
+ Returns:
1558
+ List of Product objects
1559
+ """
1560
+ products: list[Product] = []
1561
+
1562
+ try:
1563
+ # Navigate to search grid items
1564
+ layout = next_data.get("props", {}).get("pageProps", {}).get("layout", {})
1565
+ visual_components = layout.get("visualComponents", [])
1566
+
1567
+ # Find searchGridV2 component
1568
+ search_grid = None
1569
+ for component in visual_components:
1570
+ if component.get("type") == "searchGridV2":
1571
+ search_grid = component
1572
+ break
1573
+
1574
+ if not search_grid:
1575
+ logger.debug("No searchGridV2 component found")
1576
+ return []
1577
+
1578
+ items = search_grid.get("items", [])
1579
+
1580
+ for item in items[:limit]:
1581
+ try:
1582
+ product = self._parse_ssr_product_item(item)
1583
+ if product:
1584
+ products.append(product)
1585
+ except Exception as e:
1586
+ logger.debug("Failed to parse product item", error=str(e))
1587
+ continue
1588
+
1589
+ except Exception as e:
1590
+ logger.error("Failed to parse SSR products", error=str(e))
1591
+
1592
+ return products
1593
+
1594
+ def _parse_ssr_product_item(self, item: dict[str, Any]) -> Product | None:
1595
+ """Parse a single product item from SSR data.
1596
+
1597
+ Args:
1598
+ item: Product item dict from searchGridV2.items
1599
+
1600
+ Returns:
1601
+ Product object or None if parsing fails
1602
+ """
1603
+ if item.get("__typename") != "Product":
1604
+ return None
1605
+
1606
+ # Extract basic info
1607
+ product_id = item.get("id", "")
1608
+ display_name = item.get("fullDisplayName") or item.get("displayName", "")
1609
+
1610
+ # Extract brand
1611
+ brand_info = item.get("brand", {})
1612
+ brand = brand_info.get("name") if brand_info else None
1613
+
1614
+ # Extract SKU and pricing
1615
+ skus = item.get("SKUs", [])
1616
+ sku_data = skus[0] if skus else {}
1617
+ sku_id = sku_data.get("id", "")
1618
+ size = sku_data.get("customerFriendlySize", "")
1619
+
1620
+ # Get pricing (prefer CURBSIDE context, fallback to ONLINE)
1621
+ price = 0.0
1622
+ price_per_unit = None
1623
+ on_sale = False
1624
+ original_price = None
1625
+
1626
+ context_prices = sku_data.get("contextPrices", [])
1627
+ for ctx_price in context_prices:
1628
+ context = ctx_price.get("context", "")
1629
+ if context in ("CURBSIDE", "CURBSIDE_PICKUP", "ONLINE"):
1630
+ list_price = ctx_price.get("listPrice", {})
1631
+ sale_price = ctx_price.get("salePrice", {})
1632
+ unit_price = ctx_price.get("unitListPrice", {})
1633
+
1634
+ price = sale_price.get("amount", 0.0) or list_price.get("amount", 0.0)
1635
+
1636
+ if unit_price:
1637
+ unit_amount = unit_price.get("amount", 0.0)
1638
+ unit_type = unit_price.get("unit", "")
1639
+ if unit_amount and unit_type:
1640
+ price_per_unit = f"${unit_amount:.2f}/{unit_type}"
1641
+
1642
+ on_sale = ctx_price.get("isOnSale", False) or ctx_price.get("isPriceCut", False)
1643
+ if on_sale:
1644
+ original_price = list_price.get("amount")
1645
+
1646
+ break # Use first matching context
1647
+
1648
+ # Extract inventory
1649
+ inventory = item.get("inventory", {})
1650
+ inventory_state = inventory.get("inventoryState", "")
1651
+ available = inventory_state == "IN_STOCK"
1652
+
1653
+ # Extract image URL
1654
+ images = item.get("productImageUrls", [])
1655
+ image_url = None
1656
+ for img in images:
1657
+ if img.get("size") == "MEDIUM":
1658
+ image_url = img.get("url")
1659
+ break
1660
+ if not image_url and images:
1661
+ image_url = images[0].get("url")
1662
+
1663
+ # Extract aisle/location
1664
+ location = item.get("productLocation", {})
1665
+ aisle = location.get("location") if location else None
1666
+
1667
+ # Extract coupon flag
1668
+ has_coupon = item.get("showCouponFlag", False)
1669
+
1670
+ return Product(
1671
+ sku=sku_id or product_id,
1672
+ product_id=product_id, # Store product ID separately for cart operations
1673
+ name=display_name,
1674
+ price=price,
1675
+ available=available,
1676
+ brand=brand,
1677
+ size=size,
1678
+ price_per_unit=price_per_unit,
1679
+ image_url=image_url,
1680
+ aisle=aisle,
1681
+ on_sale=on_sale,
1682
+ original_price=original_price,
1683
+ has_coupon=has_coupon,
1684
+ )
1685
+
1686
+ async def get_categories(self) -> list[dict[str, Any]]:
1687
+ """Get shop navigation categories.
1688
+
1689
+ Returns:
1690
+ List of category dictionaries with id, name, href, and subcategories
1691
+ """
1692
+ try:
1693
+ data = await self._execute_persisted_query("ShopNavigation", {})
1694
+ categories = data.get("shopNavigation", [])
1695
+ return [
1696
+ {
1697
+ "id": cat.get("id"),
1698
+ "name": cat.get("displayName"),
1699
+ "href": cat.get("href"),
1700
+ "subcategories": [
1701
+ {"id": sub.get("id"), "name": sub.get("displayName")}
1702
+ for sub in cat.get("subCategories", [])
1703
+ ],
1704
+ }
1705
+ for cat in categories
1706
+ ]
1707
+ except Exception as e:
1708
+ logger.error("Failed to get categories", error=str(e))
1709
+ return []
1710
+
1711
+ async def get_typeahead(self, term: str) -> list[str]:
1712
+ """Get search suggestions for a term.
1713
+
1714
+ Args:
1715
+ term: Partial search term
1716
+
1717
+ Returns:
1718
+ List of suggested search terms
1719
+ """
1720
+ try:
1721
+ data = await self._execute_persisted_query(
1722
+ "typeaheadContent",
1723
+ {"term": term, "searchMode": "MAIN_SEARCH"},
1724
+ )
1725
+
1726
+ suggestions = []
1727
+ content = data.get("typeaheadContent", {})
1728
+ vertical_stack = content.get("verticalStack", [])
1729
+
1730
+ for section in vertical_stack:
1731
+ typename = section.get("__typename", "")
1732
+ if "SuggestedSearches" in typename:
1733
+ suggestions.extend(section.get("terms", []))
1734
+ elif "TrendingSearches" in typename:
1735
+ suggestions.extend(section.get("trendingSearches", []))
1736
+
1737
+ return suggestions
1738
+
1739
+ except Exception as e:
1740
+ logger.error("Typeahead failed", term=term, error=str(e))
1741
+ return []
1742
+
1743
+ async def add_to_cart(
1744
+ self,
1745
+ product_id: str,
1746
+ sku_id: str,
1747
+ quantity: int = 1,
1748
+ ) -> dict[str, Any]:
1749
+ """Add an item to the cart using authenticated GraphQL.
1750
+
1751
+ Requires authentication cookies to be available.
1752
+
1753
+ Args:
1754
+ product_id: The product ID
1755
+ sku_id: The SKU ID
1756
+ quantity: Number to add
1757
+
1758
+ Returns:
1759
+ Cart response data or error dict if not authenticated
1760
+ """
1761
+ auth_client = await self._get_authenticated_client()
1762
+ if not auth_client:
1763
+ return {"error": True, "code": "NOT_AUTHENTICATED", "message": "Login required"}
1764
+
1765
+ return await self._execute_persisted_query_with_client(
1766
+ auth_client,
1767
+ "cartItemV2",
1768
+ {
1769
+ "userIsLoggedIn": True,
1770
+ "productId": product_id,
1771
+ "skuId": sku_id,
1772
+ "quantity": quantity,
1773
+ },
1774
+ )
1775
+
1776
+ async def get_cart(self) -> dict[str, Any]:
1777
+ """Get current cart contents using authenticated GraphQL.
1778
+
1779
+ Requires authentication cookies to be available.
1780
+
1781
+ Returns:
1782
+ Cart data or error dict if not authenticated
1783
+ """
1784
+ auth_client = await self._get_authenticated_client()
1785
+ if not auth_client:
1786
+ return {"error": True, "code": "NOT_AUTHENTICATED", "message": "Login required"}
1787
+
1788
+ return await self._execute_persisted_query_with_client(
1789
+ auth_client,
1790
+ "cartEstimated",
1791
+ {"userIsLoggedIn": True},
1792
+ )
1793
+
1794
+ @with_retry(config=RetryConfig(max_attempts=3, base_delay=1.0))
1795
+ async def _execute_persisted_query_with_client(
1796
+ self,
1797
+ client: httpx.AsyncClient,
1798
+ operation_name: str,
1799
+ variables: dict[str, Any],
1800
+ ) -> dict[str, Any]:
1801
+ """Execute a persisted GraphQL query with a specific client.
1802
+
1803
+ Args:
1804
+ client: httpx client to use (may have cookies)
1805
+ operation_name: The name of the persisted operation
1806
+ variables: Query variables
1807
+
1808
+ Returns:
1809
+ Response data
1810
+ """
1811
+ self.circuit_breaker.check()
1812
+
1813
+ if operation_name not in PERSISTED_QUERIES:
1814
+ raise ValueError(f"Unknown operation: {operation_name}")
1815
+
1816
+ payload = {
1817
+ "operationName": operation_name,
1818
+ "variables": variables,
1819
+ "extensions": {
1820
+ "persistedQuery": {
1821
+ "version": 1,
1822
+ "sha256Hash": PERSISTED_QUERIES[operation_name],
1823
+ }
1824
+ },
1825
+ }
1826
+
1827
+ try:
1828
+ response = await client.post(
1829
+ self.base_url,
1830
+ json=payload,
1831
+ headers={"Content-Type": "application/json", "Accept": "application/json"},
1832
+ )
1833
+ response.raise_for_status()
1834
+
1835
+ data: Any = response.json()
1836
+
1837
+ if "errors" in data:
1838
+ for error in data["errors"]:
1839
+ if "PersistedQueryNotFound" in str(error):
1840
+ raise PersistedQueryNotFoundError(
1841
+ f"Persisted query hash for '{operation_name}' is no longer valid"
1842
+ )
1843
+ raise GraphQLError(data["errors"])
1844
+
1845
+ self.circuit_breaker.record_success()
1846
+
1847
+ if isinstance(data, dict):
1848
+ payload_data = data.get("data")
1849
+ if isinstance(payload_data, dict):
1850
+ return cast(dict[str, Any], payload_data)
1851
+ return {}
1852
+
1853
+ except (httpx.HTTPError, GraphQLError) as e:
1854
+ self.circuit_breaker.record_failure()
1855
+ logger.error(
1856
+ "Persisted query with client failed",
1857
+ operation=operation_name,
1858
+ error=str(e),
1859
+ )
1860
+ raise
1861
+
1862
+ def get_status(self) -> dict[str, Any]:
1863
+ """Get client status for health checks."""
1864
+ return {
1865
+ "circuit_breaker": self.circuit_breaker.get_status(),
1866
+ "build_id": self._build_id,
1867
+ "known_stores": len(KNOWN_STORES),
1868
+ }
1869
+
1870
+ # ===================
1871
+ # Coupon Methods
1872
+ # ===================
1873
+
1874
+ async def get_coupons(
1875
+ self,
1876
+ category_id: int | None = None,
1877
+ search_query: str | None = None,
1878
+ limit: int = 60,
1879
+ ) -> CouponSearchResult:
1880
+ """Fetch available coupons.
1881
+
1882
+ Coupons are loaded via SSR from the all-coupons page.
1883
+
1884
+ Args:
1885
+ category_id: Filter by category ID (e.g., 490021 for Health & beauty)
1886
+ search_query: Search coupons by keyword
1887
+ limit: Maximum coupons to return (max 60 per page)
1888
+
1889
+ Returns:
1890
+ CouponSearchResult with coupons and metadata
1891
+ """
1892
+ auth_client = await self._get_authenticated_client()
1893
+ if not auth_client:
1894
+ logger.warning("Coupon fetch requires authentication for full data")
1895
+ return CouponSearchResult(
1896
+ coupons=[],
1897
+ count=0,
1898
+ total=0,
1899
+ categories=[],
1900
+ )
1901
+
1902
+ try:
1903
+ return await self._fetch_coupons_ssr(
1904
+ auth_client,
1905
+ category_id=category_id,
1906
+ search_query=search_query,
1907
+ limit=limit,
1908
+ )
1909
+ except Exception as e:
1910
+ logger.error("Failed to fetch coupons", error=str(e))
1911
+ return CouponSearchResult(
1912
+ coupons=[],
1913
+ count=0,
1914
+ total=0,
1915
+ categories=[],
1916
+ )
1917
+
1918
+ @with_retry(config=RetryConfig(max_attempts=2, base_delay=0.5))
1919
+ async def _fetch_coupons_ssr(
1920
+ self,
1921
+ client: httpx.AsyncClient,
1922
+ category_id: int | None = None,
1923
+ search_query: str | None = None,
1924
+ limit: int = 60,
1925
+ ) -> CouponSearchResult:
1926
+ """Fetch coupons via SSR page.
1927
+
1928
+ Args:
1929
+ client: Authenticated httpx client
1930
+ category_id: Filter by category
1931
+ search_query: Search term
1932
+ limit: Max results
1933
+
1934
+ Returns:
1935
+ CouponSearchResult with parsed coupon data
1936
+ """
1937
+ self.circuit_breaker.check()
1938
+
1939
+ # Build URL with query params
1940
+ url = "https://www.heb.com/digital-coupon/coupon-selection/all-coupons"
1941
+ params = {}
1942
+
1943
+ if search_query:
1944
+ params["searchTerm"] = search_query
1945
+ if category_id:
1946
+ params["productCategories"] = str(category_id)
1947
+
1948
+ logger.debug("Fetching coupons SSR", url=url, params=params)
1949
+
1950
+ try:
1951
+ response = await client.get(url, params=params if params else None)
1952
+ response.raise_for_status()
1953
+
1954
+ # Extract __NEXT_DATA__ JSON from HTML
1955
+ match = re.search(
1956
+ r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
1957
+ response.text,
1958
+ re.DOTALL,
1959
+ )
1960
+
1961
+ if not match:
1962
+ logger.warning("No __NEXT_DATA__ found in coupon response")
1963
+ return CouponSearchResult(coupons=[], count=0, total=0, categories=[])
1964
+
1965
+ next_data = json.loads(match.group(1))
1966
+ result = self._parse_coupon_ssr_data(next_data, limit)
1967
+
1968
+ self.circuit_breaker.record_success()
1969
+ logger.info(
1970
+ "Coupon fetch successful",
1971
+ count=result.count,
1972
+ total=result.total,
1973
+ )
1974
+
1975
+ return result
1976
+
1977
+ except httpx.HTTPError as e:
1978
+ self.circuit_breaker.record_failure()
1979
+ logger.error("Coupon SSR fetch failed", error=str(e))
1980
+ raise
1981
+
1982
+ def _parse_coupon_ssr_data(
1983
+ self,
1984
+ next_data: dict[str, Any],
1985
+ limit: int = 60,
1986
+ ) -> CouponSearchResult:
1987
+ """Parse coupon data from SSR __NEXT_DATA__.
1988
+
1989
+ Args:
1990
+ next_data: Parsed __NEXT_DATA__ JSON
1991
+ limit: Max coupons to return
1992
+
1993
+ Returns:
1994
+ CouponSearchResult with coupons and categories
1995
+ """
1996
+ page_props = next_data.get("props", {}).get("pageProps", {})
1997
+
1998
+ # Parse coupon data
1999
+ coupon_data = page_props.get("couponData", [])
2000
+ coupons: list[Coupon] = []
2001
+
2002
+ for item in coupon_data[:limit]:
2003
+ try:
2004
+ coupon = self._parse_coupon_item(item)
2005
+ if coupon:
2006
+ coupons.append(coupon)
2007
+ except Exception as e:
2008
+ logger.debug("Failed to parse coupon", error=str(e))
2009
+ continue
2010
+
2011
+ # Parse pagination
2012
+ pagination = page_props.get("pagination", {})
2013
+ total = pagination.get("totalCoupons", len(coupons))
2014
+
2015
+ # Parse categories from filters
2016
+ categories: list[CouponCategory] = []
2017
+ filters_info = page_props.get("filtersInfo", {})
2018
+ filter_counts = filters_info.get("filterCounts", {})
2019
+ product_categories = filter_counts.get("productCategories", [])
2020
+
2021
+ for cat in product_categories:
2022
+ try:
2023
+ categories.append(CouponCategory(
2024
+ id=cat.get("option", 0),
2025
+ name=cat.get("displayName", ""),
2026
+ count=cat.get("count", 0),
2027
+ ))
2028
+ except Exception:
2029
+ continue
2030
+
2031
+ return CouponSearchResult(
2032
+ coupons=coupons,
2033
+ count=len(coupons),
2034
+ total=total,
2035
+ categories=categories,
2036
+ )
2037
+
2038
+ def _parse_coupon_item(self, item: dict[str, Any]) -> Coupon | None:
2039
+ """Parse a single coupon from SSR data.
2040
+
2041
+ Args:
2042
+ item: Coupon dict from couponData array
2043
+
2044
+ Returns:
2045
+ Coupon object or None if parsing fails
2046
+ """
2047
+ coupon_id = item.get("id")
2048
+ if not coupon_id:
2049
+ return None
2050
+
2051
+ # Parse expiration date
2052
+ exp_date = item.get("expirationDate")
2053
+ expires_display = None
2054
+ if exp_date:
2055
+ # Convert YYYY-MM-DD to more readable format
2056
+ try:
2057
+ from datetime import datetime
2058
+ dt = datetime.strptime(exp_date, "%Y-%m-%d")
2059
+ expires_display = dt.strftime("%m/%d/%Y")
2060
+ except Exception:
2061
+ expires_display = exp_date
2062
+
2063
+ # Determine if digital only
2064
+ print_statuses = item.get("printStatuses", [])
2065
+ digital_only = "PAPERLESS" in print_statuses and "PRINTED" not in print_statuses
2066
+
2067
+ # Parse usage limit
2068
+ redemption_limit = item.get("redemptionLimit")
2069
+ usage_limit = f"Limit {redemption_limit}" if redemption_limit else "Unlimited use"
2070
+
2071
+ return Coupon(
2072
+ coupon_id=coupon_id,
2073
+ headline=item.get("shortDescription", ""),
2074
+ description=item.get("description", ""),
2075
+ expires=exp_date,
2076
+ expires_display=expires_display,
2077
+ image_url=item.get("imageUrl"),
2078
+ coupon_type=item.get("type", "NORMAL"),
2079
+ clipped=item.get("clippedStatus") == "CLIPPED",
2080
+ redeemable=item.get("redemptionStatus") == "REDEEMABLE",
2081
+ usage_limit=usage_limit,
2082
+ digital_only=digital_only,
2083
+ )
2084
+
2085
+ async def clip_coupon(self, coupon_id: int) -> dict[str, Any]:
2086
+ """Clip a coupon to the user's account.
2087
+
2088
+ Args:
2089
+ coupon_id: The coupon ID to clip
2090
+
2091
+ Returns:
2092
+ Result dict with success/error status
2093
+ """
2094
+ auth_client = await self._get_authenticated_client()
2095
+ if not auth_client:
2096
+ return {
2097
+ "error": True,
2098
+ "code": "NOT_AUTHENTICATED",
2099
+ "message": "Login required to clip coupons",
2100
+ }
2101
+
2102
+ try:
2103
+ result = await self._execute_persisted_query_with_client(
2104
+ auth_client,
2105
+ "CouponClip",
2106
+ {
2107
+ "userIsLoggedIn": True,
2108
+ "id": coupon_id,
2109
+ },
2110
+ )
2111
+
2112
+ # Check if the mutation succeeded
2113
+ clip_result = result.get("clipCoupon", {})
2114
+ if clip_result:
2115
+ return {
2116
+ "success": True,
2117
+ "coupon_id": coupon_id,
2118
+ "message": "Coupon clipped successfully!",
2119
+ }
2120
+ else:
2121
+ return {
2122
+ "success": True,
2123
+ "coupon_id": coupon_id,
2124
+ "message": "Coupon clipped.",
2125
+ }
2126
+
2127
+ except GraphQLError as e:
2128
+ error_msg = str(e)
2129
+ if "already clipped" in error_msg.lower():
2130
+ return {
2131
+ "error": True,
2132
+ "code": "ALREADY_CLIPPED",
2133
+ "message": "This coupon is already clipped to your account.",
2134
+ "coupon_id": coupon_id,
2135
+ }
2136
+ logger.error("Failed to clip coupon", coupon_id=coupon_id, error=error_msg)
2137
+ return {
2138
+ "error": True,
2139
+ "code": "CLIP_FAILED",
2140
+ "message": f"Failed to clip coupon: {error_msg}",
2141
+ "coupon_id": coupon_id,
2142
+ }
2143
+ except Exception as e:
2144
+ logger.error("Failed to clip coupon", coupon_id=coupon_id, error=str(e))
2145
+ return {
2146
+ "error": True,
2147
+ "code": "CLIP_FAILED",
2148
+ "message": f"Failed to clip coupon: {e!s}",
2149
+ "coupon_id": coupon_id,
2150
+ }
2151
+
2152
+ async def get_clipped_coupons(self, limit: int = 60) -> CouponSearchResult:
2153
+ """Get the user's clipped coupons.
2154
+
2155
+ Fetches clipped coupons via SSR from the clipped-coupons page.
2156
+
2157
+ Args:
2158
+ limit: Maximum coupons to return
2159
+
2160
+ Returns:
2161
+ CouponSearchResult with clipped coupons
2162
+ """
2163
+ auth_client = await self._get_authenticated_client()
2164
+ if not auth_client:
2165
+ logger.warning("Clipped coupons require authentication")
2166
+ return CouponSearchResult(
2167
+ coupons=[],
2168
+ count=0,
2169
+ total=0,
2170
+ categories=[],
2171
+ )
2172
+
2173
+ try:
2174
+ return await self._fetch_clipped_coupons_ssr(auth_client, limit)
2175
+ except Exception as e:
2176
+ logger.error("Failed to fetch clipped coupons", error=str(e))
2177
+ return CouponSearchResult(
2178
+ coupons=[],
2179
+ count=0,
2180
+ total=0,
2181
+ categories=[],
2182
+ )
2183
+
2184
+ async def select_store(
2185
+ self, store_id: str, ignore_conflicts: bool = False
2186
+ ) -> dict[str, Any]:
2187
+ """Change the active store via GraphQL mutation with verification.
2188
+
2189
+ This calls the SelectPickupFulfillment mutation which changes
2190
+ the user's active store on HEB's backend, then verifies the
2191
+ change actually took effect by checking the cart's store.
2192
+
2193
+ Args:
2194
+ store_id: The store ID to switch to
2195
+ ignore_conflicts: If True, force store change even if cart has
2196
+ conflicts (items unavailable, price changes). Default False.
2197
+
2198
+ Returns:
2199
+ Result dict with:
2200
+ - success: True only if store actually changed (verified)
2201
+ - error: True if store change failed or couldn't be verified
2202
+ - code: Error code for programmatic handling
2203
+ - verified: True if change was verified via get_cart()
2204
+ """
2205
+ auth_client = await self._get_authenticated_client()
2206
+ if not auth_client:
2207
+ return {
2208
+ "error": True,
2209
+ "code": "NOT_AUTHENTICATED",
2210
+ "message": "Login required to change stores",
2211
+ }
2212
+
2213
+ try:
2214
+ # The mutation expects storeId as both string and int in different fields
2215
+ result = await self._execute_persisted_query_with_client(
2216
+ auth_client,
2217
+ "SelectPickupFulfillment",
2218
+ {
2219
+ "fulfillmentType": "PICKUP",
2220
+ "pickupStoreId": store_id,
2221
+ "ignoreCartConflicts": ignore_conflicts,
2222
+ "storeId": int(store_id),
2223
+ "userIsLoggedIn": True,
2224
+ },
2225
+ )
2226
+
2227
+ fulfillment_data = result.get("selectPickupFulfillment", {})
2228
+ logger.debug(
2229
+ "SelectPickupFulfillment response",
2230
+ store_id=store_id,
2231
+ response=fulfillment_data,
2232
+ )
2233
+
2234
+ # VERIFY: Check if store actually changed by fetching cart
2235
+ # This is the key fix - don't trust the mutation response alone
2236
+ cart = await self.get_cart()
2237
+ if cart.get("error"):
2238
+ logger.warning(
2239
+ "Could not verify store change - cart fetch failed",
2240
+ store_id=store_id,
2241
+ cart_error=cart,
2242
+ )
2243
+ return {
2244
+ "error": True,
2245
+ "code": "VERIFICATION_FAILED",
2246
+ "message": "Store change could not be verified - cart fetch failed",
2247
+ "store_id": store_id,
2248
+ "mutation_response": fulfillment_data,
2249
+ }
2250
+
2251
+ # Extract actual store from cart response
2252
+ # Cart structure: cartV2.fulfillment.store.id
2253
+ cart_v2 = cart.get("cartV2") or cart.get("cart") or {}
2254
+ fulfillment = cart_v2.get("fulfillment") or {}
2255
+ store_info = fulfillment.get("store") or {}
2256
+ actual_store_id = str(store_info.get("id", ""))
2257
+
2258
+ # Also check pickupStore as alternative location
2259
+ if not actual_store_id:
2260
+ pickup_store = fulfillment.get("pickupStore") or {}
2261
+ actual_store_id = str(pickup_store.get("id", ""))
2262
+
2263
+ # If still no store found, check top-level storeId
2264
+ if not actual_store_id:
2265
+ actual_store_id = str(cart_v2.get("storeId", ""))
2266
+
2267
+ logger.debug(
2268
+ "Store verification",
2269
+ requested=store_id,
2270
+ actual=actual_store_id,
2271
+ cart_fulfillment=fulfillment,
2272
+ )
2273
+
2274
+ # Compare requested vs actual
2275
+ if actual_store_id == store_id:
2276
+ logger.info(
2277
+ "Store change verified successful",
2278
+ store_id=store_id,
2279
+ verified=True,
2280
+ )
2281
+ return {
2282
+ "success": True,
2283
+ "store_id": store_id,
2284
+ "message": f"Store changed to {store_id}",
2285
+ "verified": True,
2286
+ }
2287
+ else:
2288
+ # Store didn't change - likely cart conflict
2289
+ logger.warning(
2290
+ "Store change verification failed",
2291
+ requested=store_id,
2292
+ actual=actual_store_id,
2293
+ ignore_conflicts=ignore_conflicts,
2294
+ )
2295
+
2296
+ # Determine likely cause
2297
+ if not ignore_conflicts:
2298
+ return {
2299
+ "error": True,
2300
+ "code": "CART_CONFLICT",
2301
+ "message": (
2302
+ f"Store change not applied - your cart may have items "
2303
+ "unavailable at the new store. Current store is still "
2304
+ f"{actual_store_id}."
2305
+ ),
2306
+ "expected_store": store_id,
2307
+ "actual_store": actual_store_id,
2308
+ "suggestion": "Try with ignore_conflicts=True to force the change, "
2309
+ "or clear your cart first.",
2310
+ }
2311
+ else:
2312
+ return {
2313
+ "error": True,
2314
+ "code": "VERIFICATION_FAILED",
2315
+ "message": (
2316
+ f"Store change not applied even with ignore_conflicts=True. "
2317
+ f"Current store is still {actual_store_id}."
2318
+ ),
2319
+ "expected_store": store_id,
2320
+ "actual_store": actual_store_id,
2321
+ }
2322
+
2323
+ except GraphQLError as e:
2324
+ error_msg = str(e)
2325
+ logger.error("Failed to change store", store_id=store_id, error=error_msg)
2326
+ return {
2327
+ "error": True,
2328
+ "code": "STORE_CHANGE_FAILED",
2329
+ "message": f"Failed to change store: {error_msg}",
2330
+ "store_id": store_id,
2331
+ }
2332
+ except ValueError as e:
2333
+ # Invalid store_id format (can't convert to int)
2334
+ logger.error("Invalid store ID format", store_id=store_id, error=str(e))
2335
+ return {
2336
+ "error": True,
2337
+ "code": "INVALID_STORE_ID",
2338
+ "message": f"Invalid store ID format: {store_id}",
2339
+ "store_id": store_id,
2340
+ }
2341
+ except Exception as e:
2342
+ logger.error("Failed to change store", store_id=store_id, error=str(e))
2343
+ return {
2344
+ "error": True,
2345
+ "code": "STORE_CHANGE_FAILED",
2346
+ "message": f"Failed to change store: {e!s}",
2347
+ "store_id": store_id,
2348
+ }
2349
+
2350
+ @with_retry(config=RetryConfig(max_attempts=2, base_delay=0.5))
2351
+ async def _fetch_clipped_coupons_ssr(
2352
+ self,
2353
+ client: httpx.AsyncClient,
2354
+ limit: int = 60,
2355
+ ) -> CouponSearchResult:
2356
+ """Fetch clipped coupons via SSR page.
2357
+
2358
+ Args:
2359
+ client: Authenticated httpx client
2360
+ limit: Max results
2361
+
2362
+ Returns:
2363
+ CouponSearchResult with clipped coupon data
2364
+ """
2365
+ self.circuit_breaker.check()
2366
+
2367
+ url = "https://www.heb.com/digital-coupon/clipped-coupons"
2368
+ logger.debug("Fetching clipped coupons SSR", url=url)
2369
+
2370
+ try:
2371
+ response = await client.get(url)
2372
+ response.raise_for_status()
2373
+
2374
+ # Extract __NEXT_DATA__ JSON from HTML
2375
+ match = re.search(
2376
+ r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
2377
+ response.text,
2378
+ re.DOTALL,
2379
+ )
2380
+
2381
+ if not match:
2382
+ logger.warning("No __NEXT_DATA__ found in clipped coupons response")
2383
+ return CouponSearchResult(coupons=[], count=0, total=0, categories=[])
2384
+
2385
+ next_data = json.loads(match.group(1))
2386
+ result = self._parse_coupon_ssr_data(next_data, limit)
2387
+
2388
+ self.circuit_breaker.record_success()
2389
+ logger.info(
2390
+ "Clipped coupons fetch successful",
2391
+ count=result.count,
2392
+ total=result.total,
2393
+ )
2394
+
2395
+ return result
2396
+
2397
+ except httpx.HTTPError as e:
2398
+ self.circuit_breaker.record_failure()
2399
+ logger.error("Clipped coupons SSR fetch failed", error=str(e))
2400
+ raise