vfbquery 0.3.4__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,659 @@
1
+ """
2
+ SOLR-based Result Caching for VFBquery
3
+
4
+ This module implements server-side caching by storing computed VFBquery results
5
+ directly in the SOLR server, eliminating cold start delays for frequently
6
+ requested terms.
7
+
8
+ The approach uses a dedicated SOLR collection 'vfbquery_cache' to store
9
+ pre-computed results that can be retrieved instantly without expensive
10
+ Neo4j queries and data processing.
11
+ """
12
+
13
+ import json
14
+ import requests
15
+ import hashlib
16
+ import time
17
+ from datetime import datetime, timedelta
18
+ from typing import Dict, Any, Optional, List
19
+ import logging
20
+ from dataclasses import dataclass, asdict
21
+ from vfbquery.term_info_queries import NumpyEncoder
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ @dataclass
26
+ class CacheMetadata:
27
+ """Metadata for cached results"""
28
+ query_type: str # 'term_info', 'instances', etc.
29
+ term_id: str # The queried term ID
30
+ query_params: str # Hashed parameters for unique identification
31
+ created_at: str # ISO timestamp
32
+ expires_at: str # ISO timestamp
33
+ result_size: int # Size in bytes
34
+ version: str # VFBquery version
35
+ hit_count: int = 0 # How many times this cache entry was used
36
+
37
+ class SolrResultCache:
38
+ """
39
+ SOLR-based result caching system for VFBquery
40
+
41
+ Stores computed query results in a dedicated SOLR collection to enable
42
+ instant retrieval without expensive computation on cold starts.
43
+ """
44
+
45
+ def __init__(self,
46
+ cache_url: str = "https://solr.virtualflybrain.org/solr/vfb_json",
47
+ ttl_hours: int = 2160, # 3 months like VFB_connect
48
+ max_result_size_mb: int = 10):
49
+ """
50
+ Initialize SOLR result cache
51
+
52
+ Args:
53
+ cache_url: SOLR collection URL for caching
54
+ ttl_hours: Time-to-live for cache entries in hours
55
+ max_result_size_mb: Maximum result size to cache in MB
56
+ """
57
+ self.cache_url = cache_url
58
+ self.ttl_hours = ttl_hours
59
+ self.max_result_size_mb = max_result_size_mb
60
+ self.max_result_size_bytes = max_result_size_mb * 1024 * 1024
61
+
62
+ def _create_cache_metadata(self, result: Any) -> Optional[Dict[str, Any]]:
63
+ """Create metadata for cached result with 3-month expiration"""
64
+ serialized_result = json.dumps(result, cls=NumpyEncoder)
65
+ result_size = len(serialized_result.encode('utf-8'))
66
+
67
+ # Don't cache if result is too large
68
+ if result_size > self.max_result_size_bytes:
69
+ logger.warning(f"Result too large to cache: {result_size/1024/1024:.2f}MB > {self.max_result_size_mb}MB")
70
+ return None
71
+
72
+ now = datetime.now().astimezone()
73
+ expires_at = now + timedelta(hours=self.ttl_hours) # 2160 hours = 90 days = 3 months
74
+
75
+ return {
76
+ "result": result, # Store original object, not serialized string
77
+ "cached_at": now.isoformat(),
78
+ "expires_at": expires_at.isoformat(),
79
+ "result_size": result_size,
80
+ "hit_count": 0,
81
+ "cache_version": "1.0", # For future compatibility
82
+ "ttl_hours": self.ttl_hours # Store TTL for debugging
83
+ }
84
+
85
+ def get_cached_result(self, query_type: str, term_id: str, **params) -> Optional[Any]:
86
+ """
87
+ Retrieve cached result from separate cache document
88
+
89
+ Args:
90
+ query_type: Type of query ('term_info', 'instances', etc.)
91
+ term_id: Term identifier
92
+ **params: Query parameters for field name generation
93
+
94
+ Returns:
95
+ Cached result or None if not found/expired
96
+ """
97
+ try:
98
+ # Query for cache document with prefixed ID
99
+ cache_doc_id = f"vfb_query_{term_id}"
100
+
101
+ response = requests.get(f"{self.cache_url}/select", params={
102
+ "q": f"id:{cache_doc_id} AND query_type:{query_type}",
103
+ "fl": "cache_data",
104
+ "wt": "json"
105
+ }, timeout=5) # Short timeout for cache lookups
106
+
107
+ if response.status_code != 200:
108
+ logger.debug(f"Cache miss: HTTP {response.status_code}")
109
+ return None
110
+
111
+ data = response.json()
112
+ docs = data.get("response", {}).get("docs", [])
113
+
114
+ if not docs:
115
+ logger.debug(f"Cache miss: No cache document found for {query_type}:{term_id}")
116
+ return None
117
+
118
+ cached_field = docs[0].get("cache_data")
119
+ if not cached_field:
120
+ logger.debug(f"Cache miss: No cache_data field found for {term_id}")
121
+ return None
122
+
123
+ # Handle both list and string formats
124
+ if isinstance(cached_field, list):
125
+ cached_field = cached_field[0]
126
+
127
+ # Parse the cached metadata and result
128
+ cached_data = json.loads(cached_field)
129
+
130
+ # Check expiration (3-month max age)
131
+ try:
132
+ expires_at = datetime.fromisoformat(cached_data["expires_at"].replace('Z', '+00:00'))
133
+ cached_at = datetime.fromisoformat(cached_data["cached_at"].replace('Z', '+00:00'))
134
+ now = datetime.now().astimezone()
135
+
136
+ if now > expires_at:
137
+ age_days = (now - cached_at).days
138
+ logger.info(f"Cache expired for {query_type}({term_id}) - age: {age_days} days")
139
+ self._clear_expired_cache_document(cache_doc_id)
140
+ return None
141
+
142
+ # Log cache age for monitoring
143
+ age_hours = (now - cached_at).total_seconds() / 3600
144
+ logger.debug(f"Cache hit for {query_type}({term_id}) - age: {age_hours:.1f} hours")
145
+
146
+ except (KeyError, ValueError) as e:
147
+ logger.warning(f"Invalid cache metadata for {term_id}: {e}")
148
+ self._clear_expired_cache_document(cache_doc_id)
149
+ return None
150
+
151
+ # Increment hit count asynchronously
152
+ self._increment_cache_hit_count(cache_doc_id, cached_data.get("hit_count", 0))
153
+
154
+ # Return cached result
155
+ result = cached_data["result"]
156
+ # If result is a string, parse it as JSON
157
+ if isinstance(result, str):
158
+ try:
159
+ result = json.loads(result)
160
+ except json.JSONDecodeError:
161
+ logger.warning(f"Failed to parse cached result for {term_id}")
162
+ return None
163
+
164
+ logger.info(f"Cache hit for {query_type}({term_id})")
165
+ return result
166
+
167
+ except Exception as e:
168
+ logger.debug(f"Error retrieving cached result: {e}")
169
+ return None
170
+
171
+ def cache_result(self, query_type: str, term_id: str, result: Any, **params) -> bool:
172
+ """
173
+ Store result as separate cache document with prefixed ID
174
+
175
+ This approach is safer as it never touches original VFB documents,
176
+ eliminating risk of data loss.
177
+
178
+ Args:
179
+ query_type: Type of query being cached
180
+ term_id: Term identifier
181
+ result: Query result to cache
182
+ **params: Query parameters for field name generation
183
+
184
+ Returns:
185
+ True if successfully cached, False otherwise
186
+ """
187
+ if not result:
188
+ logger.debug("Empty result, not caching")
189
+ return False
190
+
191
+ try:
192
+ # Create cached metadata and result
193
+ cached_data = self._create_cache_metadata(result)
194
+ if not cached_data:
195
+ return False # Result too large or other issue
196
+
197
+ # Create cache document with prefixed ID
198
+ cache_doc_id = f"vfb_query_{term_id}"
199
+
200
+ cache_doc = {
201
+ "id": cache_doc_id,
202
+ "original_term_id": term_id,
203
+ "query_type": query_type,
204
+ "cache_data": json.dumps(cached_data, cls=NumpyEncoder),
205
+ "cached_at": cached_data["cached_at"],
206
+ "expires_at": cached_data["expires_at"]
207
+ }
208
+
209
+ # Store cache document
210
+ response = requests.post(
211
+ f"{self.cache_url}/update",
212
+ data=json.dumps([cache_doc]),
213
+ headers={"Content-Type": "application/json"},
214
+ params={"commit": "true"}, # Immediate commit for availability
215
+ timeout=10
216
+ )
217
+
218
+ if response.status_code == 200:
219
+ logger.info(f"Cached {query_type} for {term_id} as {cache_doc_id}, size: {cached_data['result_size']/1024:.1f}KB")
220
+ return True
221
+ else:
222
+ logger.error(f"Failed to cache result: HTTP {response.status_code} - {response.text}")
223
+ return False
224
+
225
+ except Exception as e:
226
+ logger.error(f"Error caching result: {e}")
227
+ return False
228
+
229
+
230
+ def _clear_expired_cache_document(self, cache_doc_id: str):
231
+ """Delete expired cache document from SOLR"""
232
+ try:
233
+ requests.post(
234
+ f"{self.cache_url}/update",
235
+ data=f'<delete><id>{cache_doc_id}</id></delete>',
236
+ headers={"Content-Type": "application/xml"},
237
+ params={"commit": "false"}, # Don't commit immediately for performance
238
+ timeout=2
239
+ )
240
+ except Exception as e:
241
+ logger.debug(f"Failed to clear expired cache document: {e}")
242
+
243
+ def clear_cache_entry(self, query_type: str, term_id: str) -> bool:
244
+ """
245
+ Manually clear a specific cache entry to force refresh
246
+
247
+ Args:
248
+ query_type: Type of query ('term_info', 'instances', etc.)
249
+ term_id: Term identifier
250
+
251
+ Returns:
252
+ True if successfully cleared, False otherwise
253
+ """
254
+ try:
255
+ cache_doc_id = f"vfb_query_{term_id}"
256
+ response = requests.post(
257
+ f"{self.cache_url}/update",
258
+ data=f'<delete><id>{cache_doc_id}</id></delete>',
259
+ headers={"Content-Type": "application/xml"},
260
+ params={"commit": "true"}, # Commit immediately to ensure it's cleared
261
+ timeout=5
262
+ )
263
+ if response.status_code == 200:
264
+ logger.info(f"Cleared cache entry for {query_type}({term_id})")
265
+ return True
266
+ else:
267
+ logger.error(f"Failed to clear cache entry: HTTP {response.status_code}")
268
+ return False
269
+ except Exception as e:
270
+ logger.error(f"Error clearing cache entry: {e}")
271
+ return False
272
+
273
+ def _increment_cache_hit_count(self, cache_doc_id: str, current_count: int):
274
+ """Increment hit count for cache document (background operation)"""
275
+ try:
276
+ # Update hit count in cache document
277
+ new_count = current_count + 1
278
+ update_doc = {
279
+ "id": cache_doc_id,
280
+ "hit_count": {"set": new_count},
281
+ "last_accessed": {"set": datetime.now().isoformat() + "Z"}
282
+ }
283
+
284
+ requests.post(
285
+ f"{self.cache_url}/update",
286
+ data=json.dumps([update_doc]),
287
+ headers={"Content-Type": "application/json"},
288
+ params={"commit": "false"}, # Don't commit immediately for performance
289
+ timeout=2
290
+ )
291
+ except Exception as e:
292
+ logger.debug(f"Failed to update hit count: {e}")
293
+
294
+ def get_cache_age(self, query_type: str, term_id: str, **params) -> Optional[Dict[str, Any]]:
295
+ """
296
+ Get cache age information for a specific cached result
297
+
298
+ Returns:
299
+ Dictionary with cache age info or None if not cached
300
+ """
301
+ try:
302
+ cache_doc_id = f"vfb_query_{term_id}"
303
+
304
+ response = requests.get(f"{self.cache_url}/select", params={
305
+ "q": f"id:{cache_doc_id} AND query_type:{query_type}",
306
+ "fl": "cache_data,hit_count,last_accessed",
307
+ "wt": "json"
308
+ }, timeout=5)
309
+
310
+ if response.status_code == 200:
311
+ data = response.json()
312
+ docs = data.get("response", {}).get("docs", [])
313
+
314
+ if docs:
315
+ doc = docs[0]
316
+ cached_field = doc.get("cache_data")
317
+ if cached_field:
318
+ # Handle both list and string formats
319
+ if isinstance(cached_field, list):
320
+ cached_field = cached_field[0]
321
+
322
+ cached_data = json.loads(cached_field)
323
+
324
+ cached_at = datetime.fromisoformat(cached_data["cached_at"].replace('Z', '+00:00'))
325
+ expires_at = datetime.fromisoformat(cached_data["expires_at"].replace('Z', '+00:00'))
326
+ now = datetime.now().astimezone()
327
+
328
+ age = now - cached_at
329
+ time_to_expiry = expires_at - now
330
+
331
+ return {
332
+ "cached_at": cached_at.isoformat(),
333
+ "expires_at": expires_at.isoformat(),
334
+ "age_days": age.days,
335
+ "age_hours": age.total_seconds() / 3600,
336
+ "time_to_expiry_days": time_to_expiry.days,
337
+ "time_to_expiry_hours": time_to_expiry.total_seconds() / 3600,
338
+ "is_expired": now > expires_at,
339
+ "hit_count": doc.get("hit_count", cached_data.get("hit_count", 0)),
340
+ "size_kb": cached_data.get("result_size", 0) / 1024,
341
+ "last_accessed": doc.get("last_accessed", ["Never"])[0] if isinstance(doc.get("last_accessed"), list) else doc.get("last_accessed", "Never")
342
+ }
343
+ except Exception as e:
344
+ logger.debug(f"Error getting cache age: {e}")
345
+
346
+ return None
347
+
348
+ def cleanup_expired_entries(self) -> int:
349
+ """
350
+ Clean up expired VFBquery cache documents
351
+
352
+ This method scans for cache documents (IDs starting with vfb_query_) and removes expired ones.
353
+
354
+ Returns:
355
+ Number of expired cache documents cleaned up
356
+ """
357
+ try:
358
+ now = datetime.now().astimezone()
359
+ cleaned_count = 0
360
+
361
+ # Search for all cache documents
362
+ response = requests.get(f"{self.cache_url}/select", params={
363
+ "q": "id:vfb_query_*",
364
+ "fl": "id,cache_data,expires_at",
365
+ "rows": "1000", # Process in batches
366
+ "wt": "json"
367
+ }, timeout=30)
368
+
369
+ if response.status_code == 200:
370
+ data = response.json()
371
+ docs = data.get("response", {}).get("docs", [])
372
+ expired_ids = []
373
+
374
+ for doc in docs:
375
+ doc_id = doc["id"]
376
+
377
+ try:
378
+ # Check expiration using expires_at field if available, or cache_data
379
+ expires_at = None
380
+
381
+ if "expires_at" in doc:
382
+ expires_at_field = doc["expires_at"]
383
+ expires_at_str = expires_at_field[0] if isinstance(expires_at_field, list) else expires_at_field
384
+ expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
385
+ elif "cache_data" in doc:
386
+ # Fallback to parsing cache_data
387
+ cached_field = doc["cache_data"]
388
+ if isinstance(cached_field, list):
389
+ cached_field = cached_field[0]
390
+ cached_data = json.loads(cached_field)
391
+ expires_at = datetime.fromisoformat(cached_data["expires_at"].replace('Z', '+00:00'))
392
+
393
+ if expires_at and now > expires_at:
394
+ expired_ids.append(doc_id)
395
+ cleaned_count += 1
396
+ logger.debug(f"Marking cache document {doc_id} for removal (expired)")
397
+
398
+ except (json.JSONDecodeError, KeyError, ValueError) as e:
399
+ # Invalid cache data - remove it
400
+ expired_ids.append(doc_id)
401
+ cleaned_count += 1
402
+ logger.debug(f"Marking invalid cache document {doc_id} for removal: {e}")
403
+
404
+ # Delete expired cache documents in batch
405
+ if expired_ids:
406
+ delete_xml = "<delete>" + "".join(f"<id>{doc_id}</id>" for doc_id in expired_ids) + "</delete>"
407
+
408
+ delete_response = requests.post(
409
+ f"{self.cache_url}/update",
410
+ data=delete_xml,
411
+ headers={"Content-Type": "application/xml"},
412
+ params={"commit": "true"}, # Commit deletions immediately
413
+ timeout=10
414
+ )
415
+
416
+ if delete_response.status_code != 200:
417
+ logger.warning(f"Failed to delete expired cache documents: HTTP {delete_response.status_code}")
418
+ else:
419
+ logger.info(f"Cleaned up {cleaned_count} expired cache documents")
420
+
421
+ return cleaned_count
422
+
423
+ except Exception as e:
424
+ logger.error(f"Error during cache cleanup: {e}")
425
+ return 0
426
+
427
+ def get_cache_stats(self) -> Dict[str, Any]:
428
+ """
429
+ Get VFBquery cache statistics from cache documents
430
+
431
+ Returns:
432
+ Dictionary with cache statistics including document counts and age distribution
433
+ """
434
+ try:
435
+ # Get all cache documents
436
+ response = requests.get(f"{self.cache_url}/select", params={
437
+ "q": "id:vfb_query_*",
438
+ "fl": "id,query_type,cache_data,hit_count,last_accessed,cached_at,expires_at",
439
+ "rows": "1000", # Process in batches
440
+ "wt": "json"
441
+ }, timeout=30)
442
+
443
+ if response.status_code == 200:
444
+ data = response.json()
445
+ docs = data.get("response", {}).get("docs", [])
446
+ total_cache_docs = data.get("response", {}).get("numFound", 0)
447
+
448
+ type_stats = {}
449
+ total_size = 0
450
+ expired_count = 0
451
+ total_hits = 0
452
+ age_buckets = {"0-1d": 0, "1-7d": 0, "7-30d": 0, "30-90d": 0, ">90d": 0}
453
+
454
+ now = datetime.now().astimezone()
455
+
456
+ # Analyze each cache document
457
+ for doc in docs:
458
+ query_type_field = doc.get("query_type", "unknown")
459
+ # Handle both list and string formats
460
+ query_type = query_type_field[0] if isinstance(query_type_field, list) else query_type_field
461
+ type_stats[query_type] = type_stats.get(query_type, 0) + 1
462
+
463
+ try:
464
+ # Get cache data and metadata
465
+ cached_field = doc.get("cache_data")
466
+ if cached_field:
467
+ # Handle both list and string formats
468
+ if isinstance(cached_field, list):
469
+ cached_field = cached_field[0]
470
+
471
+ cached_data = json.loads(cached_field)
472
+ total_size += len(cached_field)
473
+
474
+ # Get timestamps from document fields or cache_data
475
+ cached_at = None
476
+ expires_at = None
477
+
478
+ # Try document fields first
479
+ if "cached_at" in doc:
480
+ cached_at_field = doc["cached_at"]
481
+ cached_at_str = cached_at_field[0] if isinstance(cached_at_field, list) else cached_at_field
482
+ cached_at = datetime.fromisoformat(cached_at_str.replace('Z', '+00:00'))
483
+
484
+ if "expires_at" in doc:
485
+ expires_at_field = doc["expires_at"]
486
+ expires_at_str = expires_at_field[0] if isinstance(expires_at_field, list) else expires_at_field
487
+ expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
488
+
489
+ # Fallback to cache_data
490
+ if not cached_at and "cached_at" in cached_data:
491
+ cached_at = datetime.fromisoformat(cached_data["cached_at"].replace('Z', '+00:00'))
492
+ if not expires_at and "expires_at" in cached_data:
493
+ expires_at = datetime.fromisoformat(cached_data["expires_at"].replace('Z', '+00:00'))
494
+
495
+ if cached_at and expires_at:
496
+ age_days = (now - cached_at).days
497
+
498
+ # Check if expired
499
+ if now > expires_at:
500
+ expired_count += 1
501
+
502
+ # Categorize by age
503
+ if age_days <= 1:
504
+ age_buckets["0-1d"] += 1
505
+ elif age_days <= 7:
506
+ age_buckets["1-7d"] += 1
507
+ elif age_days <= 30:
508
+ age_buckets["7-30d"] += 1
509
+ elif age_days <= 90:
510
+ age_buckets["30-90d"] += 1
511
+ else:
512
+ age_buckets[">90d"] += 1
513
+
514
+ # Get hit count
515
+ hit_count = doc.get("hit_count", cached_data.get("hit_count", 0))
516
+ if isinstance(hit_count, list):
517
+ hit_count = hit_count[0]
518
+ total_hits += int(hit_count) if hit_count else 0
519
+
520
+ except (json.JSONDecodeError, KeyError, ValueError):
521
+ # Invalid cache data
522
+ expired_count += 1
523
+
524
+ return {
525
+ "total_cache_documents": total_cache_docs,
526
+ "cache_by_type": type_stats,
527
+ "expired_documents": expired_count,
528
+ "age_distribution": age_buckets,
529
+ "total_hits": total_hits,
530
+ "estimated_size_bytes": total_size,
531
+ "estimated_size_mb": round(total_size / (1024 * 1024), 2),
532
+ "cache_efficiency": round((total_cache_docs - expired_count) / max(total_cache_docs, 1) * 100, 1)
533
+ }
534
+
535
+ except Exception as e:
536
+ logger.error(f"Error getting cache stats: {e}")
537
+
538
+ return {
539
+ "total_cache_documents": 0,
540
+ "cache_by_type": {},
541
+ "expired_documents": 0,
542
+ "age_distribution": {},
543
+ "total_hits": 0,
544
+ "estimated_size_bytes": 0,
545
+ "estimated_size_mb": 0.0,
546
+ "cache_efficiency": 0.0
547
+ }
548
+
549
+
550
+ # Global cache instance
551
+ _solr_cache = None
552
+
553
+ def get_solr_cache() -> SolrResultCache:
554
+ """Get global SOLR cache instance"""
555
+ global _solr_cache
556
+ if _solr_cache is None:
557
+ _solr_cache = SolrResultCache()
558
+ return _solr_cache
559
+
560
+ def with_solr_cache(query_type: str):
561
+ """
562
+ Decorator to add SOLR caching to query functions
563
+
564
+ Usage:
565
+ @with_solr_cache('term_info')
566
+ def get_term_info(short_form, force_refresh=False, **kwargs):
567
+ # ... existing implementation
568
+
569
+ The decorated function can accept a 'force_refresh' parameter to bypass cache.
570
+ """
571
+ def decorator(func):
572
+ def wrapper(*args, **kwargs):
573
+ # Check if force_refresh is requested (pop it before passing to function)
574
+ force_refresh = kwargs.pop('force_refresh', False)
575
+
576
+ # Extract term_id from first argument or kwargs
577
+ term_id = args[0] if args else kwargs.get('short_form') or kwargs.get('term_id')
578
+
579
+ # For functions like get_templates that don't have a term_id, use query_type as cache key
580
+ if not term_id:
581
+ if query_type == 'templates':
582
+ # Use a fixed cache key for templates since it doesn't take a term_id
583
+ term_id = 'all_templates'
584
+ else:
585
+ logger.warning(f"No term_id found for caching {query_type}")
586
+ return func(*args, **kwargs)
587
+
588
+ cache = get_solr_cache()
589
+
590
+ # Clear cache if force_refresh is True
591
+ if force_refresh:
592
+ logger.info(f"Force refresh requested for {query_type}({term_id})")
593
+ cache.clear_cache_entry(query_type, term_id)
594
+
595
+ # Try cache first (will be empty if force_refresh was True)
596
+ if not force_refresh:
597
+ cached_result = cache.get_cached_result(query_type, term_id, **kwargs)
598
+ if cached_result is not None:
599
+ # Validate that cached result has essential fields for term_info
600
+ if query_type == 'term_info':
601
+ is_valid = (cached_result and isinstance(cached_result, dict) and
602
+ cached_result.get('Id') and cached_result.get('Name'))
603
+
604
+ # Additional validation for query results
605
+ if is_valid and 'Queries' in cached_result:
606
+ logger.debug(f"Validating {len(cached_result['Queries'])} queries for {term_id}")
607
+ for i, query in enumerate(cached_result['Queries']):
608
+ count = query.get('count', 0)
609
+ preview_results = query.get('preview_results')
610
+ headers = preview_results.get('headers', []) if isinstance(preview_results, dict) else []
611
+
612
+ logger.debug(f"Query {i}: count={count}, preview_results_type={type(preview_results)}, headers={headers}")
613
+
614
+ # Check if query has unrealistic count (0 or -1) which indicates failed execution
615
+ if count <= 0:
616
+ is_valid = False
617
+ logger.debug(f"Cached result has invalid query count {count} for {term_id}")
618
+ break
619
+ # Check if preview_results is missing or has empty headers when it should have data
620
+ if not isinstance(preview_results, dict) or not headers:
621
+ is_valid = False
622
+ logger.debug(f"Cached result has invalid preview_results structure for {term_id}")
623
+ break
624
+
625
+ if is_valid:
626
+ logger.debug(f"Using valid cached result for {term_id}")
627
+ return cached_result
628
+ else:
629
+ logger.warning(f"Cached result incomplete for {term_id}, re-executing function")
630
+ # Don't return the incomplete cached result, continue to execute function
631
+ else:
632
+ return cached_result
633
+
634
+ # Execute function and cache result
635
+ result = func(*args, **kwargs)
636
+
637
+ # Cache the result asynchronously to avoid blocking
638
+ if result:
639
+ # Validate result before caching for term_info
640
+ if query_type == 'term_info':
641
+ if (result and isinstance(result, dict) and
642
+ result.get('Id') and result.get('Name')):
643
+ try:
644
+ cache.cache_result(query_type, term_id, result, **kwargs)
645
+ logger.debug(f"Cached complete result for {term_id}")
646
+ except Exception as e:
647
+ logger.debug(f"Failed to cache result: {e}")
648
+ else:
649
+ logger.warning(f"Not caching incomplete result for {term_id}")
650
+ else:
651
+ try:
652
+ cache.cache_result(query_type, term_id, result, **kwargs)
653
+ except Exception as e:
654
+ logger.debug(f"Failed to cache result: {e}")
655
+
656
+ return result
657
+
658
+ return wrapper
659
+ return decorator