vfbquery 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,465 @@
1
+ """
2
+ VFBquery Caching Enhancements
3
+
4
+ This module implements caching optimizations inspired by VFB_connect
5
+ to improve VFBquery performance for repeated queries.
6
+
7
+ Features:
8
+ 1. Term info result caching (similar to VFB_connect's VFBTerm cache)
9
+ 2. SOLR query result caching
10
+ 3. Query result caching for get_instances and other functions
11
+ 4. Configurable cache expiry and size limits
12
+ 5. Memory-based and disk-based caching options
13
+ """
14
+
15
+ import os
16
+ import json
17
+ import time
18
+ import pickle
19
+ import hashlib
20
+ from pathlib import Path
21
+ from typing import Dict, Any, Optional, Union
22
+ from functools import lru_cache, wraps
23
+ from dataclasses import dataclass, asdict
24
+ import threading
25
+
26
+ # Custom JSON encoder for caching
27
+ from .vfb_queries import NumpyEncoder
28
+
29
+ @dataclass
30
+ class CacheConfig:
31
+ """Configuration for VFBquery caching system."""
32
+ enabled: bool = True
33
+ memory_cache_size_mb: int = 2048 # Max memory cache size in MB (2GB default)
34
+ max_items: int = 10000 # Max items in memory cache (fallback limit)
35
+ disk_cache_enabled: bool = True
36
+ disk_cache_dir: Optional[str] = None
37
+ cache_ttl_hours: int = 2160 # Cache time-to-live in hours (3 months = 90 days * 24 hours)
38
+ solr_cache_enabled: bool = True
39
+ term_info_cache_enabled: bool = True
40
+ query_result_cache_enabled: bool = True
41
+
42
+ class VFBQueryCache:
43
+ """
44
+ Enhanced caching system for VFBquery inspired by VFB_connect optimizations.
45
+
46
+ Provides multiple layers of caching:
47
+ - Memory cache for frequently accessed items (size-limited)
48
+ - Disk cache for persistence across sessions
49
+ - Query result caching for expensive operations
50
+ """
51
+
52
+ def __init__(self, config: Optional[CacheConfig] = None):
53
+ self.config = config or CacheConfig()
54
+ self._memory_cache: Dict[str, Dict[str, Any]] = {}
55
+ self._cache_stats = {'hits': 0, 'misses': 0, 'memory_size_bytes': 0}
56
+ self._lock = threading.RLock()
57
+
58
+ # Set up disk cache directory
59
+ if self.config.disk_cache_enabled:
60
+ if self.config.disk_cache_dir:
61
+ self.cache_dir = Path(self.config.disk_cache_dir)
62
+ else:
63
+ # Use similar location to VFB_connect
64
+ self.cache_dir = Path.home() / '.vfbquery_cache'
65
+ self.cache_dir.mkdir(exist_ok=True)
66
+
67
+ # Enable caching based on environment variable (like VFB_connect)
68
+ env_enabled = os.getenv('VFBQUERY_CACHE_ENABLED', '').lower()
69
+ if env_enabled in ('false', '0', 'no'):
70
+ self.config.enabled = False
71
+
72
+ def _generate_cache_key(self, prefix: str, *args, **kwargs) -> str:
73
+ """Generate a cache key from function arguments."""
74
+ # Create deterministic hash from arguments
75
+ key_data = f"{prefix}:{args}:{sorted(kwargs.items())}"
76
+ return hashlib.md5(key_data.encode()).hexdigest()
77
+
78
+ def _is_cache_valid(self, cache_entry: Dict[str, Any]) -> bool:
79
+ """Check if cache entry is still valid based on TTL."""
80
+ if not cache_entry or 'timestamp' not in cache_entry:
81
+ return False
82
+
83
+ age_hours = (time.time() - cache_entry['timestamp']) / 3600
84
+ return age_hours < self.config.cache_ttl_hours
85
+
86
+ def _get_from_memory(self, cache_key: str) -> Optional[Any]:
87
+ """Get item from memory cache."""
88
+ with self._lock:
89
+ if cache_key in self._memory_cache:
90
+ entry = self._memory_cache[cache_key]
91
+ if self._is_cache_valid(entry):
92
+ self._cache_stats['hits'] += 1
93
+ return entry['data']
94
+ else:
95
+ # Remove expired entry and update memory size tracking
96
+ expired_entry = self._memory_cache.pop(cache_key)
97
+ self._cache_stats['memory_size_bytes'] -= expired_entry.get('size_bytes', 0)
98
+
99
+ self._cache_stats['misses'] += 1
100
+ return None
101
+
102
+ def _get_object_size(self, obj: Any) -> int:
103
+ """Estimate memory size of an object in bytes."""
104
+ try:
105
+ import sys
106
+ if isinstance(obj, (str, bytes)):
107
+ return len(obj)
108
+ elif isinstance(obj, dict):
109
+ return sum(self._get_object_size(k) + self._get_object_size(v) for k, v in obj.items())
110
+ elif isinstance(obj, (list, tuple)):
111
+ return sum(self._get_object_size(item) for item in obj)
112
+ else:
113
+ # Fallback: use sys.getsizeof for other objects
114
+ return sys.getsizeof(obj)
115
+ except:
116
+ # If size estimation fails, assume 1KB
117
+ return 1024
118
+
119
+ def _store_in_memory(self, cache_key: str, data: Any):
120
+ """Store item in memory cache with size-based LRU eviction."""
121
+ with self._lock:
122
+ entry = {
123
+ 'data': data,
124
+ 'timestamp': time.time(),
125
+ 'size_bytes': self._get_object_size(data)
126
+ }
127
+
128
+ # Check if we need to evict items to stay under memory limit
129
+ max_size_bytes = self.config.memory_cache_size_mb * 1024 * 1024
130
+
131
+ # If this single item is larger than the cache limit, don't cache it
132
+ if entry['size_bytes'] > max_size_bytes:
133
+ return
134
+
135
+ # Evict items if adding this one would exceed memory limit or max items
136
+ while (len(self._memory_cache) >= self.config.max_items or
137
+ self._cache_stats['memory_size_bytes'] + entry['size_bytes'] > max_size_bytes):
138
+ if not self._memory_cache:
139
+ break
140
+ # Remove oldest item (first in dict)
141
+ oldest_key = next(iter(self._memory_cache))
142
+ old_entry = self._memory_cache.pop(oldest_key)
143
+ self._cache_stats['memory_size_bytes'] -= old_entry.get('size_bytes', 0)
144
+
145
+ # Add new entry
146
+ self._memory_cache[cache_key] = entry
147
+ self._cache_stats['memory_size_bytes'] += entry['size_bytes']
148
+
149
+ def _get_from_disk(self, cache_key: str) -> Optional[Any]:
150
+ """Get item from disk cache."""
151
+ if not self.config.disk_cache_enabled:
152
+ return None
153
+
154
+ cache_file = self.cache_dir / f"{cache_key}.pkl"
155
+ if cache_file.exists():
156
+ try:
157
+ with open(cache_file, 'rb') as f:
158
+ entry = pickle.load(f)
159
+ if self._is_cache_valid(entry):
160
+ return entry['data']
161
+ else:
162
+ # Remove expired file
163
+ cache_file.unlink()
164
+ except Exception:
165
+ # If file is corrupted, remove it
166
+ cache_file.unlink(missing_ok=True)
167
+
168
+ return None
169
+
170
+ def _store_on_disk(self, cache_key: str, data: Any):
171
+ """Store item on disk cache."""
172
+ if not self.config.disk_cache_enabled:
173
+ return
174
+
175
+ cache_file = self.cache_dir / f"{cache_key}.pkl"
176
+ try:
177
+ entry = {
178
+ 'data': data,
179
+ 'timestamp': time.time()
180
+ }
181
+ with open(cache_file, 'wb') as f:
182
+ pickle.dump(entry, f)
183
+ except Exception as e:
184
+ print(f"Warning: Could not save to disk cache: {e}")
185
+
186
+ def get(self, cache_key: str) -> Optional[Any]:
187
+ """Get item from cache (memory first, then disk)."""
188
+ if not self.config.enabled:
189
+ return None
190
+
191
+ # Try memory cache first
192
+ result = self._get_from_memory(cache_key)
193
+ if result is not None:
194
+ return result
195
+
196
+ # Try disk cache
197
+ result = self._get_from_disk(cache_key)
198
+ if result is not None:
199
+ # Store in memory for future access
200
+ self._store_in_memory(cache_key, result)
201
+ return result
202
+
203
+ return None
204
+
205
+ def set(self, cache_key: str, data: Any):
206
+ """Store item in cache (both memory and disk)."""
207
+ if not self.config.enabled:
208
+ return
209
+
210
+ self._store_in_memory(cache_key, data)
211
+ self._store_on_disk(cache_key, data)
212
+
213
+ def clear(self):
214
+ """Clear all caches."""
215
+ with self._lock:
216
+ self._memory_cache.clear()
217
+ self._cache_stats['memory_size_bytes'] = 0
218
+
219
+ if self.config.disk_cache_enabled and hasattr(self, 'cache_dir') and self.cache_dir.exists():
220
+ for cache_file in self.cache_dir.glob("*.pkl"):
221
+ cache_file.unlink()
222
+
223
+ def get_stats(self) -> Dict[str, Any]:
224
+ """Get cache statistics."""
225
+ total_requests = self._cache_stats['hits'] + self._cache_stats['misses']
226
+ hit_rate = (self._cache_stats['hits'] / total_requests * 100) if total_requests > 0 else 0
227
+ memory_size_mb = self._cache_stats.get('memory_size_bytes', 0) / (1024 * 1024)
228
+
229
+ return {
230
+ 'enabled': self.config.enabled,
231
+ 'memory_cache_items': len(self._memory_cache),
232
+ 'memory_cache_size_mb': round(memory_size_mb, 2),
233
+ 'memory_cache_limit_mb': self.config.memory_cache_size_mb,
234
+ 'max_items': self.config.max_items,
235
+ 'hits': self._cache_stats['hits'],
236
+ 'misses': self._cache_stats['misses'],
237
+ 'hit_rate_percent': round(hit_rate, 2),
238
+ 'disk_cache_enabled': self.config.disk_cache_enabled,
239
+ 'cache_ttl_hours': self.config.cache_ttl_hours,
240
+ 'cache_ttl_days': round(self.config.cache_ttl_hours / 24, 1)
241
+ }
242
+
243
+
244
+ # Global cache instance
245
+ _global_cache = VFBQueryCache()
246
+
247
+ def configure_cache(config: CacheConfig):
248
+ """Configure the global cache instance."""
249
+ global _global_cache
250
+ _global_cache = VFBQueryCache(config)
251
+
252
+ def get_cache() -> VFBQueryCache:
253
+ """Get the global cache instance."""
254
+ return _global_cache
255
+
256
+ def cache_result(cache_prefix: str, enabled_check: Optional[str] = None):
257
+ """
258
+ Decorator to cache function results.
259
+
260
+ Args:
261
+ cache_prefix: Prefix for cache keys
262
+ enabled_check: Config attribute to check if this cache type is enabled
263
+ """
264
+ def decorator(func):
265
+ @wraps(func)
266
+ def wrapper(*args, **kwargs):
267
+ cache = get_cache()
268
+
269
+ # Check if this specific cache type is enabled
270
+ if enabled_check and not getattr(cache.config, enabled_check, True):
271
+ return func(*args, **kwargs)
272
+
273
+ # Generate cache key
274
+ cache_key = cache._generate_cache_key(cache_prefix, *args, **kwargs)
275
+
276
+ # Try to get from cache
277
+ cached_result = cache.get(cache_key)
278
+ if cached_result is not None:
279
+ return cached_result
280
+
281
+ # Execute function and cache result
282
+ result = func(*args, **kwargs)
283
+ if result is not None: # Only cache non-None results
284
+ cache.set(cache_key, result)
285
+
286
+ return result
287
+
288
+ return wrapper
289
+ return decorator
290
+
291
+
292
+ def enable_vfbquery_caching(
293
+ cache_ttl_hours: int = 2160, # 3 months default
294
+ memory_cache_size_mb: int = 2048, # 2GB default
295
+ max_items: int = 10000,
296
+ disk_cache_enabled: bool = True,
297
+ disk_cache_dir: Optional[str] = None
298
+ ):
299
+ """
300
+ Enable VFBquery caching with specified configuration.
301
+
302
+ Args:
303
+ cache_ttl_hours: Cache time-to-live in hours (default: 2160 = 3 months)
304
+ memory_cache_size_mb: Maximum memory cache size in MB (default: 2048 = 2GB)
305
+ max_items: Maximum number of items in memory cache (default: 10000)
306
+ disk_cache_enabled: Enable persistent disk caching (default: True)
307
+ disk_cache_dir: Custom cache directory path (optional)
308
+
309
+ Usage:
310
+ from vfbquery.cache_enhancements import enable_vfbquery_caching
311
+ enable_vfbquery_caching() # Use defaults: 3 months TTL, 2GB memory
312
+ enable_vfbquery_caching(cache_ttl_hours=720, memory_cache_size_mb=1024) # 1 month, 1GB
313
+ """
314
+ config = CacheConfig(
315
+ enabled=True,
316
+ cache_ttl_hours=cache_ttl_hours,
317
+ memory_cache_size_mb=memory_cache_size_mb,
318
+ max_items=max_items,
319
+ disk_cache_enabled=disk_cache_enabled,
320
+ disk_cache_dir=disk_cache_dir
321
+ )
322
+ configure_cache(config)
323
+ print(f"VFBquery caching enabled: TTL={cache_ttl_hours}h ({cache_ttl_hours//24} days), Memory={memory_cache_size_mb}MB")
324
+
325
+ def disable_vfbquery_caching():
326
+ """Disable VFBquery caching."""
327
+ config = CacheConfig(enabled=False)
328
+ configure_cache(config)
329
+ print("VFBquery caching disabled")
330
+
331
+ def clear_vfbquery_cache():
332
+ """Clear all VFBquery caches."""
333
+ get_cache().clear()
334
+ print("VFBquery cache cleared")
335
+
336
+ def get_vfbquery_cache_stats() -> Dict[str, Any]:
337
+ """Get VFBquery cache statistics."""
338
+ return get_cache().get_stats()
339
+
340
+ def set_cache_ttl(hours: int):
341
+ """
342
+ Update the cache TTL (time-to-live) for new cache entries.
343
+
344
+ Args:
345
+ hours: New TTL in hours (e.g., 24 for 1 day, 720 for 1 month, 2160 for 3 months)
346
+
347
+ Examples:
348
+ set_cache_ttl(24) # 1 day
349
+ set_cache_ttl(168) # 1 week
350
+ set_cache_ttl(720) # 1 month
351
+ set_cache_ttl(2160) # 3 months (default)
352
+ """
353
+ cache = get_cache()
354
+ cache.config.cache_ttl_hours = hours
355
+ days = hours / 24
356
+ print(f"Cache TTL updated to {hours} hours ({days:.1f} days)")
357
+
358
+ def set_cache_memory_limit(size_mb: int):
359
+ """
360
+ Update the memory cache size limit.
361
+
362
+ Args:
363
+ size_mb: Maximum memory cache size in MB (e.g., 512, 1024, 2048)
364
+
365
+ Examples:
366
+ set_cache_memory_limit(512) # 512MB
367
+ set_cache_memory_limit(1024) # 1GB
368
+ set_cache_memory_limit(2048) # 2GB (default)
369
+ """
370
+ cache = get_cache()
371
+ old_limit = cache.config.memory_cache_size_mb
372
+ cache.config.memory_cache_size_mb = size_mb
373
+
374
+ # If reducing size, trigger eviction if needed
375
+ if size_mb < old_limit:
376
+ with cache._lock:
377
+ max_size_bytes = size_mb * 1024 * 1024
378
+ while cache._cache_stats.get('memory_size_bytes', 0) > max_size_bytes:
379
+ if not cache._memory_cache:
380
+ break
381
+ # Remove oldest item
382
+ oldest_key = next(iter(cache._memory_cache))
383
+ old_entry = cache._memory_cache.pop(oldest_key)
384
+ cache._cache_stats['memory_size_bytes'] -= old_entry.get('size_bytes', 0)
385
+
386
+ print(f"Memory cache limit updated from {old_limit}MB to {size_mb}MB")
387
+
388
+ def set_cache_max_items(max_items: int):
389
+ """
390
+ Update the maximum number of items in memory cache.
391
+
392
+ Args:
393
+ max_items: Maximum number of cached items (e.g., 1000, 5000, 10000)
394
+
395
+ Examples:
396
+ set_cache_max_items(1000) # 1K items
397
+ set_cache_max_items(5000) # 5K items
398
+ set_cache_max_items(10000) # 10K items (default)
399
+ """
400
+ cache = get_cache()
401
+ old_limit = cache.config.max_items
402
+ cache.config.max_items = max_items
403
+
404
+ # If reducing count, trigger eviction if needed
405
+ if max_items < old_limit:
406
+ with cache._lock:
407
+ while len(cache._memory_cache) > max_items:
408
+ if not cache._memory_cache:
409
+ break
410
+ # Remove oldest item
411
+ oldest_key = next(iter(cache._memory_cache))
412
+ old_entry = cache._memory_cache.pop(oldest_key)
413
+ cache._cache_stats['memory_size_bytes'] -= old_entry.get('size_bytes', 0)
414
+
415
+ print(f"Max cache items updated from {old_limit} to {max_items}")
416
+
417
+ def enable_disk_cache(cache_dir: Optional[str] = None):
418
+ """
419
+ Enable persistent disk caching.
420
+
421
+ Args:
422
+ cache_dir: Optional custom cache directory path
423
+
424
+ Examples:
425
+ enable_disk_cache() # Use default location
426
+ enable_disk_cache('/tmp/my_vfbquery_cache') # Custom location
427
+ """
428
+ cache = get_cache()
429
+ cache.config.disk_cache_enabled = True
430
+
431
+ if cache_dir:
432
+ cache.config.disk_cache_dir = cache_dir
433
+ cache.cache_dir = Path(cache_dir)
434
+ cache.cache_dir.mkdir(exist_ok=True)
435
+
436
+ print(f"Disk caching enabled: {getattr(cache, 'cache_dir', 'default location')}")
437
+
438
+ def disable_disk_cache():
439
+ """Disable persistent disk caching (memory cache only)."""
440
+ cache = get_cache()
441
+ cache.config.disk_cache_enabled = False
442
+ print("Disk caching disabled (memory cache only)")
443
+
444
+ def get_cache_config() -> Dict[str, Any]:
445
+ """
446
+ Get current cache configuration settings.
447
+
448
+ Returns:
449
+ Dictionary with current cache configuration
450
+ """
451
+ cache = get_cache()
452
+ config = cache.config
453
+
454
+ return {
455
+ 'enabled': config.enabled,
456
+ 'cache_ttl_hours': config.cache_ttl_hours,
457
+ 'cache_ttl_days': config.cache_ttl_hours / 24,
458
+ 'memory_cache_size_mb': config.memory_cache_size_mb,
459
+ 'max_items': config.max_items,
460
+ 'disk_cache_enabled': config.disk_cache_enabled,
461
+ 'disk_cache_dir': config.disk_cache_dir,
462
+ 'solr_cache_enabled': config.solr_cache_enabled,
463
+ 'term_info_cache_enabled': config.term_info_cache_enabled,
464
+ 'query_result_cache_enabled': config.query_result_cache_enabled
465
+ }
@@ -0,0 +1,227 @@
1
+ """
2
+ Cached VFBquery Functions
3
+
4
+ Enhanced versions of VFBquery functions with integrated caching
5
+ inspired by VFB_connect optimizations.
6
+ """
7
+
8
+ from typing import Dict, Any, Optional
9
+ from .cache_enhancements import cache_result, get_cache
10
+
11
+
12
+ def is_valid_term_info_result(result):
13
+ """Check if a term_info result has the essential fields and valid query structure"""
14
+ if not result or not isinstance(result, dict):
15
+ return False
16
+
17
+ # Check for essential fields
18
+ if not (result.get('Id') and result.get('Name')):
19
+ return False
20
+
21
+ # Additional validation for query results
22
+ if 'Queries' in result:
23
+ for query in result['Queries']:
24
+ # Check if query has invalid count (-1) which indicates failed execution
25
+ # Note: count=0 is valid if preview_results structure is correct
26
+ count = query.get('count', 0)
27
+
28
+ # Check if preview_results has the correct structure
29
+ preview_results = query.get('preview_results')
30
+ if not isinstance(preview_results, dict):
31
+ print(f"DEBUG: Invalid preview_results type {type(preview_results)} detected")
32
+ return False
33
+
34
+ headers = preview_results.get('headers', [])
35
+ if not headers:
36
+ print(f"DEBUG: Empty headers detected in preview_results")
37
+ return False
38
+
39
+ # Only reject if count is -1 (failed execution) or if count is 0 but preview_results is missing/empty
40
+ if count < 0:
41
+ print(f"DEBUG: Invalid query count {count} detected")
42
+ return False
43
+
44
+ return True
45
+ from .vfb_queries import (
46
+ get_term_info as _original_get_term_info,
47
+ get_instances as _original_get_instances,
48
+ vfb_solr,
49
+ term_info_parse_object as _original_term_info_parse_object,
50
+ fill_query_results as _original_fill_query_results
51
+ )
52
+
53
+ @cache_result("solr_search", "solr_cache_enabled")
54
+ def cached_solr_search(query: str):
55
+ """Cached version of SOLR search."""
56
+ return vfb_solr.search(query)
57
+
58
+ @cache_result("term_info_parse", "term_info_cache_enabled")
59
+ def cached_term_info_parse_object(results, short_form: str):
60
+ """Cached version of term_info_parse_object."""
61
+ return _original_term_info_parse_object(results, short_form)
62
+
63
+ @cache_result("query_results", "query_result_cache_enabled")
64
+ def cached_fill_query_results(term_info: Dict[str, Any]):
65
+ """Cached version of fill_query_results."""
66
+ return _original_fill_query_results(term_info)
67
+
68
+ @cache_result("get_instances", "query_result_cache_enabled")
69
+ def cached_get_instances(short_form: str, return_dataframe=True, limit: int = -1):
70
+ """Cached version of get_instances."""
71
+ return _original_get_instances(short_form, return_dataframe, limit)
72
+
73
+ def get_term_info_cached(short_form: str, preview: bool = False):
74
+ """
75
+ Enhanced get_term_info with multi-layer caching.
76
+
77
+ This version uses caching at multiple levels:
78
+ 1. Final result caching (entire term_info response)
79
+ 2. SOLR query result caching
80
+ 3. Term info parsing caching
81
+ 4. Query result caching
82
+
83
+ Args:
84
+ short_form: Term short form (e.g., 'FBbt_00003748')
85
+ preview: Whether to include preview results
86
+
87
+ Returns:
88
+ Term info dictionary or None if not found
89
+ """
90
+ cache = get_cache()
91
+
92
+ # Check for complete result in cache first
93
+ cache_key = cache._generate_cache_key("term_info_complete", short_form, preview)
94
+ cached_result = cache.get(cache_key)
95
+ print(f"DEBUG: Cache lookup for {short_form}: {'HIT' if cached_result is not None else 'MISS'}")
96
+ if cached_result is not None:
97
+ # Validate that cached result has essential fields
98
+ if not is_valid_term_info_result(cached_result):
99
+ print(f"DEBUG: Cached result incomplete for {short_form}, falling back to original function")
100
+ print(f"DEBUG: cached_result keys: {list(cached_result.keys()) if cached_result else 'None'}")
101
+ print(f"DEBUG: cached_result Id: {cached_result.get('Id', 'MISSING') if cached_result else 'None'}")
102
+ print(f"DEBUG: cached_result Name: {cached_result.get('Name', 'MISSING') if cached_result else 'None'}")
103
+
104
+ # Fall back to original function and cache the complete result
105
+ fallback_result = _original_get_term_info(short_form, preview)
106
+ if is_valid_term_info_result(fallback_result):
107
+ print(f"DEBUG: Fallback successful, caching complete result for {short_form}")
108
+ cache.set(cache_key, fallback_result)
109
+ return fallback_result
110
+ else:
111
+ print(f"DEBUG: Using valid cached result for {short_form}")
112
+ return cached_result
113
+
114
+ parsed_object = None
115
+ try:
116
+ # Use cached SOLR search
117
+ results = cached_solr_search('id:' + short_form)
118
+
119
+ # Use cached term info parsing
120
+ parsed_object = cached_term_info_parse_object(results, short_form)
121
+
122
+ if parsed_object:
123
+ # Use cached query result filling (skip if queries would fail)
124
+ if parsed_object.get('Queries') and len(parsed_object['Queries']) > 0:
125
+ try:
126
+ term_info = cached_fill_query_results(parsed_object)
127
+ if term_info:
128
+ # Validate result before caching
129
+ if term_info.get('Id') and term_info.get('Name'):
130
+ # Cache the complete result
131
+ cache.set(cache_key, term_info)
132
+ return term_info
133
+ else:
134
+ print(f"Query result for {short_form} is incomplete, falling back to original function...")
135
+ return _original_get_term_info(short_form, preview)
136
+ else:
137
+ print("Failed to fill query preview results!")
138
+ # Validate result before caching
139
+ if parsed_object.get('Id') and parsed_object.get('Name'):
140
+ # Cache the complete result
141
+ cache.set(cache_key, parsed_object)
142
+ return parsed_object
143
+ else:
144
+ print(f"Parsed object for {short_form} is incomplete, falling back to original function...")
145
+ return _original_get_term_info(short_form, preview)
146
+ except Exception as e:
147
+ print(f"Error filling query results (continuing without query data): {e}")
148
+ # Validate result before caching
149
+ if is_valid_term_info_result(parsed_object):
150
+ cache.set(cache_key, parsed_object)
151
+ return parsed_object
152
+ else:
153
+ print(f"DEBUG: Exception case - parsed object incomplete for {short_form}, falling back to original function")
154
+ fallback_result = _original_get_term_info(short_form, preview)
155
+ if is_valid_term_info_result(fallback_result):
156
+ cache.set(cache_key, fallback_result)
157
+ return fallback_result
158
+ else:
159
+ # No queries to fill, validate result before caching
160
+ if parsed_object.get('Id') and parsed_object.get('Name'):
161
+ # Cache and return parsed object directly
162
+ cache.set(cache_key, parsed_object)
163
+ return parsed_object
164
+ else:
165
+ print(f"DEBUG: No queries case - parsed object incomplete for {short_form}, falling back to original function...")
166
+ fallback_result = _original_get_term_info(short_form, preview)
167
+ if is_valid_term_info_result(fallback_result):
168
+ cache.set(cache_key, fallback_result)
169
+ return fallback_result
170
+ else:
171
+ print(f"No valid term info found for ID '{short_form}'")
172
+ return None
173
+
174
+ except Exception as e:
175
+ print(f"Error in cached get_term_info: {type(e).__name__}: {e}")
176
+ # Fall back to original function if caching fails
177
+ return _original_get_term_info(short_form, preview)
178
+
179
+ def get_instances_cached(short_form: str, return_dataframe=True, limit: int = -1):
180
+ """
181
+ Enhanced get_instances with caching.
182
+
183
+ This cached version can provide dramatic speedup for repeated queries,
184
+ especially useful for:
185
+ - UI applications with repeated browsing
186
+ - Data analysis workflows
187
+ - Testing and development
188
+
189
+ Args:
190
+ short_form: Class short form
191
+ return_dataframe: Whether to return DataFrame or formatted dict
192
+ limit: Maximum number of results (-1 for all)
193
+
194
+ Returns:
195
+ Instances data (DataFrame or formatted dict based on return_dataframe)
196
+ """
197
+ return cached_get_instances(short_form, return_dataframe, limit)
198
+
199
+ # Convenience function to replace original functions
200
+ def patch_vfbquery_with_caching():
201
+ """
202
+ Replace original VFBquery functions with cached versions.
203
+
204
+ This allows existing code to benefit from caching without changes.
205
+ """
206
+ import vfbquery.vfb_queries as vfb_queries
207
+
208
+ # Store original functions for fallback
209
+ setattr(vfb_queries, '_original_get_term_info', vfb_queries.get_term_info)
210
+ setattr(vfb_queries, '_original_get_instances', vfb_queries.get_instances)
211
+
212
+ # Replace with cached versions
213
+ vfb_queries.get_term_info = get_term_info_cached
214
+ vfb_queries.get_instances = get_instances_cached
215
+
216
+ print("VFBquery functions patched with caching support")
217
+
218
+ def unpatch_vfbquery_caching():
219
+ """Restore original VFBquery functions."""
220
+ import vfbquery.vfb_queries as vfb_queries
221
+
222
+ if hasattr(vfb_queries, '_original_get_term_info'):
223
+ vfb_queries.get_term_info = getattr(vfb_queries, '_original_get_term_info')
224
+ if hasattr(vfb_queries, '_original_get_instances'):
225
+ vfb_queries.get_instances = getattr(vfb_queries, '_original_get_instances')
226
+
227
+ print("VFBquery functions restored to original (non-cached) versions")