vfbquery 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/term_info_queries_test.py +58 -0
- test/test_default_caching.py +173 -0
- test/test_examples_diff.py +6 -1
- vfbquery/__init__.py +62 -1
- vfbquery/cache_enhancements.py +465 -0
- vfbquery/cached_functions.py +227 -0
- vfbquery/solr_cache_integration.py +212 -0
- vfbquery/solr_fetcher.py +47 -3
- vfbquery/solr_result_cache.py +613 -0
- vfbquery/vfb_queries.py +268 -46
- {vfbquery-0.3.4.dist-info → vfbquery-0.4.0.dist-info}/METADATA +6 -6
- vfbquery-0.4.0.dist-info/RECORD +19 -0
- vfbquery-0.3.4.dist-info/RECORD +0 -14
- {vfbquery-0.3.4.dist-info → vfbquery-0.4.0.dist-info}/LICENSE +0 -0
- {vfbquery-0.3.4.dist-info → vfbquery-0.4.0.dist-info}/WHEEL +0 -0
- {vfbquery-0.3.4.dist-info → vfbquery-0.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
"""
|
|
2
|
+
VFBquery Caching Enhancements
|
|
3
|
+
|
|
4
|
+
This module implements caching optimizations inspired by VFB_connect
|
|
5
|
+
to improve VFBquery performance for repeated queries.
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
1. Term info result caching (similar to VFB_connect's VFBTerm cache)
|
|
9
|
+
2. SOLR query result caching
|
|
10
|
+
3. Query result caching for get_instances and other functions
|
|
11
|
+
4. Configurable cache expiry and size limits
|
|
12
|
+
5. Memory-based and disk-based caching options
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import json
|
|
17
|
+
import time
|
|
18
|
+
import pickle
|
|
19
|
+
import hashlib
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Dict, Any, Optional, Union
|
|
22
|
+
from functools import lru_cache, wraps
|
|
23
|
+
from dataclasses import dataclass, asdict
|
|
24
|
+
import threading
|
|
25
|
+
|
|
26
|
+
# Custom JSON encoder for caching
|
|
27
|
+
from .vfb_queries import NumpyEncoder
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class CacheConfig:
|
|
31
|
+
"""Configuration for VFBquery caching system."""
|
|
32
|
+
enabled: bool = True
|
|
33
|
+
memory_cache_size_mb: int = 2048 # Max memory cache size in MB (2GB default)
|
|
34
|
+
max_items: int = 10000 # Max items in memory cache (fallback limit)
|
|
35
|
+
disk_cache_enabled: bool = True
|
|
36
|
+
disk_cache_dir: Optional[str] = None
|
|
37
|
+
cache_ttl_hours: int = 2160 # Cache time-to-live in hours (3 months = 90 days * 24 hours)
|
|
38
|
+
solr_cache_enabled: bool = True
|
|
39
|
+
term_info_cache_enabled: bool = True
|
|
40
|
+
query_result_cache_enabled: bool = True
|
|
41
|
+
|
|
42
|
+
class VFBQueryCache:
|
|
43
|
+
"""
|
|
44
|
+
Enhanced caching system for VFBquery inspired by VFB_connect optimizations.
|
|
45
|
+
|
|
46
|
+
Provides multiple layers of caching:
|
|
47
|
+
- Memory cache for frequently accessed items (size-limited)
|
|
48
|
+
- Disk cache for persistence across sessions
|
|
49
|
+
- Query result caching for expensive operations
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, config: Optional[CacheConfig] = None):
|
|
53
|
+
self.config = config or CacheConfig()
|
|
54
|
+
self._memory_cache: Dict[str, Dict[str, Any]] = {}
|
|
55
|
+
self._cache_stats = {'hits': 0, 'misses': 0, 'memory_size_bytes': 0}
|
|
56
|
+
self._lock = threading.RLock()
|
|
57
|
+
|
|
58
|
+
# Set up disk cache directory
|
|
59
|
+
if self.config.disk_cache_enabled:
|
|
60
|
+
if self.config.disk_cache_dir:
|
|
61
|
+
self.cache_dir = Path(self.config.disk_cache_dir)
|
|
62
|
+
else:
|
|
63
|
+
# Use similar location to VFB_connect
|
|
64
|
+
self.cache_dir = Path.home() / '.vfbquery_cache'
|
|
65
|
+
self.cache_dir.mkdir(exist_ok=True)
|
|
66
|
+
|
|
67
|
+
# Enable caching based on environment variable (like VFB_connect)
|
|
68
|
+
env_enabled = os.getenv('VFBQUERY_CACHE_ENABLED', '').lower()
|
|
69
|
+
if env_enabled in ('false', '0', 'no'):
|
|
70
|
+
self.config.enabled = False
|
|
71
|
+
|
|
72
|
+
def _generate_cache_key(self, prefix: str, *args, **kwargs) -> str:
|
|
73
|
+
"""Generate a cache key from function arguments."""
|
|
74
|
+
# Create deterministic hash from arguments
|
|
75
|
+
key_data = f"{prefix}:{args}:{sorted(kwargs.items())}"
|
|
76
|
+
return hashlib.md5(key_data.encode()).hexdigest()
|
|
77
|
+
|
|
78
|
+
def _is_cache_valid(self, cache_entry: Dict[str, Any]) -> bool:
|
|
79
|
+
"""Check if cache entry is still valid based on TTL."""
|
|
80
|
+
if not cache_entry or 'timestamp' not in cache_entry:
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
age_hours = (time.time() - cache_entry['timestamp']) / 3600
|
|
84
|
+
return age_hours < self.config.cache_ttl_hours
|
|
85
|
+
|
|
86
|
+
def _get_from_memory(self, cache_key: str) -> Optional[Any]:
|
|
87
|
+
"""Get item from memory cache."""
|
|
88
|
+
with self._lock:
|
|
89
|
+
if cache_key in self._memory_cache:
|
|
90
|
+
entry = self._memory_cache[cache_key]
|
|
91
|
+
if self._is_cache_valid(entry):
|
|
92
|
+
self._cache_stats['hits'] += 1
|
|
93
|
+
return entry['data']
|
|
94
|
+
else:
|
|
95
|
+
# Remove expired entry and update memory size tracking
|
|
96
|
+
expired_entry = self._memory_cache.pop(cache_key)
|
|
97
|
+
self._cache_stats['memory_size_bytes'] -= expired_entry.get('size_bytes', 0)
|
|
98
|
+
|
|
99
|
+
self._cache_stats['misses'] += 1
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
def _get_object_size(self, obj: Any) -> int:
|
|
103
|
+
"""Estimate memory size of an object in bytes."""
|
|
104
|
+
try:
|
|
105
|
+
import sys
|
|
106
|
+
if isinstance(obj, (str, bytes)):
|
|
107
|
+
return len(obj)
|
|
108
|
+
elif isinstance(obj, dict):
|
|
109
|
+
return sum(self._get_object_size(k) + self._get_object_size(v) for k, v in obj.items())
|
|
110
|
+
elif isinstance(obj, (list, tuple)):
|
|
111
|
+
return sum(self._get_object_size(item) for item in obj)
|
|
112
|
+
else:
|
|
113
|
+
# Fallback: use sys.getsizeof for other objects
|
|
114
|
+
return sys.getsizeof(obj)
|
|
115
|
+
except:
|
|
116
|
+
# If size estimation fails, assume 1KB
|
|
117
|
+
return 1024
|
|
118
|
+
|
|
119
|
+
def _store_in_memory(self, cache_key: str, data: Any):
|
|
120
|
+
"""Store item in memory cache with size-based LRU eviction."""
|
|
121
|
+
with self._lock:
|
|
122
|
+
entry = {
|
|
123
|
+
'data': data,
|
|
124
|
+
'timestamp': time.time(),
|
|
125
|
+
'size_bytes': self._get_object_size(data)
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# Check if we need to evict items to stay under memory limit
|
|
129
|
+
max_size_bytes = self.config.memory_cache_size_mb * 1024 * 1024
|
|
130
|
+
|
|
131
|
+
# If this single item is larger than the cache limit, don't cache it
|
|
132
|
+
if entry['size_bytes'] > max_size_bytes:
|
|
133
|
+
return
|
|
134
|
+
|
|
135
|
+
# Evict items if adding this one would exceed memory limit or max items
|
|
136
|
+
while (len(self._memory_cache) >= self.config.max_items or
|
|
137
|
+
self._cache_stats['memory_size_bytes'] + entry['size_bytes'] > max_size_bytes):
|
|
138
|
+
if not self._memory_cache:
|
|
139
|
+
break
|
|
140
|
+
# Remove oldest item (first in dict)
|
|
141
|
+
oldest_key = next(iter(self._memory_cache))
|
|
142
|
+
old_entry = self._memory_cache.pop(oldest_key)
|
|
143
|
+
self._cache_stats['memory_size_bytes'] -= old_entry.get('size_bytes', 0)
|
|
144
|
+
|
|
145
|
+
# Add new entry
|
|
146
|
+
self._memory_cache[cache_key] = entry
|
|
147
|
+
self._cache_stats['memory_size_bytes'] += entry['size_bytes']
|
|
148
|
+
|
|
149
|
+
def _get_from_disk(self, cache_key: str) -> Optional[Any]:
|
|
150
|
+
"""Get item from disk cache."""
|
|
151
|
+
if not self.config.disk_cache_enabled:
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
cache_file = self.cache_dir / f"{cache_key}.pkl"
|
|
155
|
+
if cache_file.exists():
|
|
156
|
+
try:
|
|
157
|
+
with open(cache_file, 'rb') as f:
|
|
158
|
+
entry = pickle.load(f)
|
|
159
|
+
if self._is_cache_valid(entry):
|
|
160
|
+
return entry['data']
|
|
161
|
+
else:
|
|
162
|
+
# Remove expired file
|
|
163
|
+
cache_file.unlink()
|
|
164
|
+
except Exception:
|
|
165
|
+
# If file is corrupted, remove it
|
|
166
|
+
cache_file.unlink(missing_ok=True)
|
|
167
|
+
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
def _store_on_disk(self, cache_key: str, data: Any):
|
|
171
|
+
"""Store item on disk cache."""
|
|
172
|
+
if not self.config.disk_cache_enabled:
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
cache_file = self.cache_dir / f"{cache_key}.pkl"
|
|
176
|
+
try:
|
|
177
|
+
entry = {
|
|
178
|
+
'data': data,
|
|
179
|
+
'timestamp': time.time()
|
|
180
|
+
}
|
|
181
|
+
with open(cache_file, 'wb') as f:
|
|
182
|
+
pickle.dump(entry, f)
|
|
183
|
+
except Exception as e:
|
|
184
|
+
print(f"Warning: Could not save to disk cache: {e}")
|
|
185
|
+
|
|
186
|
+
def get(self, cache_key: str) -> Optional[Any]:
|
|
187
|
+
"""Get item from cache (memory first, then disk)."""
|
|
188
|
+
if not self.config.enabled:
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
# Try memory cache first
|
|
192
|
+
result = self._get_from_memory(cache_key)
|
|
193
|
+
if result is not None:
|
|
194
|
+
return result
|
|
195
|
+
|
|
196
|
+
# Try disk cache
|
|
197
|
+
result = self._get_from_disk(cache_key)
|
|
198
|
+
if result is not None:
|
|
199
|
+
# Store in memory for future access
|
|
200
|
+
self._store_in_memory(cache_key, result)
|
|
201
|
+
return result
|
|
202
|
+
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
def set(self, cache_key: str, data: Any):
|
|
206
|
+
"""Store item in cache (both memory and disk)."""
|
|
207
|
+
if not self.config.enabled:
|
|
208
|
+
return
|
|
209
|
+
|
|
210
|
+
self._store_in_memory(cache_key, data)
|
|
211
|
+
self._store_on_disk(cache_key, data)
|
|
212
|
+
|
|
213
|
+
def clear(self):
|
|
214
|
+
"""Clear all caches."""
|
|
215
|
+
with self._lock:
|
|
216
|
+
self._memory_cache.clear()
|
|
217
|
+
self._cache_stats['memory_size_bytes'] = 0
|
|
218
|
+
|
|
219
|
+
if self.config.disk_cache_enabled and hasattr(self, 'cache_dir') and self.cache_dir.exists():
|
|
220
|
+
for cache_file in self.cache_dir.glob("*.pkl"):
|
|
221
|
+
cache_file.unlink()
|
|
222
|
+
|
|
223
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
224
|
+
"""Get cache statistics."""
|
|
225
|
+
total_requests = self._cache_stats['hits'] + self._cache_stats['misses']
|
|
226
|
+
hit_rate = (self._cache_stats['hits'] / total_requests * 100) if total_requests > 0 else 0
|
|
227
|
+
memory_size_mb = self._cache_stats.get('memory_size_bytes', 0) / (1024 * 1024)
|
|
228
|
+
|
|
229
|
+
return {
|
|
230
|
+
'enabled': self.config.enabled,
|
|
231
|
+
'memory_cache_items': len(self._memory_cache),
|
|
232
|
+
'memory_cache_size_mb': round(memory_size_mb, 2),
|
|
233
|
+
'memory_cache_limit_mb': self.config.memory_cache_size_mb,
|
|
234
|
+
'max_items': self.config.max_items,
|
|
235
|
+
'hits': self._cache_stats['hits'],
|
|
236
|
+
'misses': self._cache_stats['misses'],
|
|
237
|
+
'hit_rate_percent': round(hit_rate, 2),
|
|
238
|
+
'disk_cache_enabled': self.config.disk_cache_enabled,
|
|
239
|
+
'cache_ttl_hours': self.config.cache_ttl_hours,
|
|
240
|
+
'cache_ttl_days': round(self.config.cache_ttl_hours / 24, 1)
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
# Global cache instance
|
|
245
|
+
_global_cache = VFBQueryCache()
|
|
246
|
+
|
|
247
|
+
def configure_cache(config: CacheConfig):
|
|
248
|
+
"""Configure the global cache instance."""
|
|
249
|
+
global _global_cache
|
|
250
|
+
_global_cache = VFBQueryCache(config)
|
|
251
|
+
|
|
252
|
+
def get_cache() -> VFBQueryCache:
|
|
253
|
+
"""Get the global cache instance."""
|
|
254
|
+
return _global_cache
|
|
255
|
+
|
|
256
|
+
def cache_result(cache_prefix: str, enabled_check: Optional[str] = None):
|
|
257
|
+
"""
|
|
258
|
+
Decorator to cache function results.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
cache_prefix: Prefix for cache keys
|
|
262
|
+
enabled_check: Config attribute to check if this cache type is enabled
|
|
263
|
+
"""
|
|
264
|
+
def decorator(func):
|
|
265
|
+
@wraps(func)
|
|
266
|
+
def wrapper(*args, **kwargs):
|
|
267
|
+
cache = get_cache()
|
|
268
|
+
|
|
269
|
+
# Check if this specific cache type is enabled
|
|
270
|
+
if enabled_check and not getattr(cache.config, enabled_check, True):
|
|
271
|
+
return func(*args, **kwargs)
|
|
272
|
+
|
|
273
|
+
# Generate cache key
|
|
274
|
+
cache_key = cache._generate_cache_key(cache_prefix, *args, **kwargs)
|
|
275
|
+
|
|
276
|
+
# Try to get from cache
|
|
277
|
+
cached_result = cache.get(cache_key)
|
|
278
|
+
if cached_result is not None:
|
|
279
|
+
return cached_result
|
|
280
|
+
|
|
281
|
+
# Execute function and cache result
|
|
282
|
+
result = func(*args, **kwargs)
|
|
283
|
+
if result is not None: # Only cache non-None results
|
|
284
|
+
cache.set(cache_key, result)
|
|
285
|
+
|
|
286
|
+
return result
|
|
287
|
+
|
|
288
|
+
return wrapper
|
|
289
|
+
return decorator
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def enable_vfbquery_caching(
|
|
293
|
+
cache_ttl_hours: int = 2160, # 3 months default
|
|
294
|
+
memory_cache_size_mb: int = 2048, # 2GB default
|
|
295
|
+
max_items: int = 10000,
|
|
296
|
+
disk_cache_enabled: bool = True,
|
|
297
|
+
disk_cache_dir: Optional[str] = None
|
|
298
|
+
):
|
|
299
|
+
"""
|
|
300
|
+
Enable VFBquery caching with specified configuration.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
cache_ttl_hours: Cache time-to-live in hours (default: 2160 = 3 months)
|
|
304
|
+
memory_cache_size_mb: Maximum memory cache size in MB (default: 2048 = 2GB)
|
|
305
|
+
max_items: Maximum number of items in memory cache (default: 10000)
|
|
306
|
+
disk_cache_enabled: Enable persistent disk caching (default: True)
|
|
307
|
+
disk_cache_dir: Custom cache directory path (optional)
|
|
308
|
+
|
|
309
|
+
Usage:
|
|
310
|
+
from vfbquery.cache_enhancements import enable_vfbquery_caching
|
|
311
|
+
enable_vfbquery_caching() # Use defaults: 3 months TTL, 2GB memory
|
|
312
|
+
enable_vfbquery_caching(cache_ttl_hours=720, memory_cache_size_mb=1024) # 1 month, 1GB
|
|
313
|
+
"""
|
|
314
|
+
config = CacheConfig(
|
|
315
|
+
enabled=True,
|
|
316
|
+
cache_ttl_hours=cache_ttl_hours,
|
|
317
|
+
memory_cache_size_mb=memory_cache_size_mb,
|
|
318
|
+
max_items=max_items,
|
|
319
|
+
disk_cache_enabled=disk_cache_enabled,
|
|
320
|
+
disk_cache_dir=disk_cache_dir
|
|
321
|
+
)
|
|
322
|
+
configure_cache(config)
|
|
323
|
+
print(f"VFBquery caching enabled: TTL={cache_ttl_hours}h ({cache_ttl_hours//24} days), Memory={memory_cache_size_mb}MB")
|
|
324
|
+
|
|
325
|
+
def disable_vfbquery_caching():
|
|
326
|
+
"""Disable VFBquery caching."""
|
|
327
|
+
config = CacheConfig(enabled=False)
|
|
328
|
+
configure_cache(config)
|
|
329
|
+
print("VFBquery caching disabled")
|
|
330
|
+
|
|
331
|
+
def clear_vfbquery_cache():
|
|
332
|
+
"""Clear all VFBquery caches."""
|
|
333
|
+
get_cache().clear()
|
|
334
|
+
print("VFBquery cache cleared")
|
|
335
|
+
|
|
336
|
+
def get_vfbquery_cache_stats() -> Dict[str, Any]:
|
|
337
|
+
"""Get VFBquery cache statistics."""
|
|
338
|
+
return get_cache().get_stats()
|
|
339
|
+
|
|
340
|
+
def set_cache_ttl(hours: int):
|
|
341
|
+
"""
|
|
342
|
+
Update the cache TTL (time-to-live) for new cache entries.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
hours: New TTL in hours (e.g., 24 for 1 day, 720 for 1 month, 2160 for 3 months)
|
|
346
|
+
|
|
347
|
+
Examples:
|
|
348
|
+
set_cache_ttl(24) # 1 day
|
|
349
|
+
set_cache_ttl(168) # 1 week
|
|
350
|
+
set_cache_ttl(720) # 1 month
|
|
351
|
+
set_cache_ttl(2160) # 3 months (default)
|
|
352
|
+
"""
|
|
353
|
+
cache = get_cache()
|
|
354
|
+
cache.config.cache_ttl_hours = hours
|
|
355
|
+
days = hours / 24
|
|
356
|
+
print(f"Cache TTL updated to {hours} hours ({days:.1f} days)")
|
|
357
|
+
|
|
358
|
+
def set_cache_memory_limit(size_mb: int):
|
|
359
|
+
"""
|
|
360
|
+
Update the memory cache size limit.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
size_mb: Maximum memory cache size in MB (e.g., 512, 1024, 2048)
|
|
364
|
+
|
|
365
|
+
Examples:
|
|
366
|
+
set_cache_memory_limit(512) # 512MB
|
|
367
|
+
set_cache_memory_limit(1024) # 1GB
|
|
368
|
+
set_cache_memory_limit(2048) # 2GB (default)
|
|
369
|
+
"""
|
|
370
|
+
cache = get_cache()
|
|
371
|
+
old_limit = cache.config.memory_cache_size_mb
|
|
372
|
+
cache.config.memory_cache_size_mb = size_mb
|
|
373
|
+
|
|
374
|
+
# If reducing size, trigger eviction if needed
|
|
375
|
+
if size_mb < old_limit:
|
|
376
|
+
with cache._lock:
|
|
377
|
+
max_size_bytes = size_mb * 1024 * 1024
|
|
378
|
+
while cache._cache_stats.get('memory_size_bytes', 0) > max_size_bytes:
|
|
379
|
+
if not cache._memory_cache:
|
|
380
|
+
break
|
|
381
|
+
# Remove oldest item
|
|
382
|
+
oldest_key = next(iter(cache._memory_cache))
|
|
383
|
+
old_entry = cache._memory_cache.pop(oldest_key)
|
|
384
|
+
cache._cache_stats['memory_size_bytes'] -= old_entry.get('size_bytes', 0)
|
|
385
|
+
|
|
386
|
+
print(f"Memory cache limit updated from {old_limit}MB to {size_mb}MB")
|
|
387
|
+
|
|
388
|
+
def set_cache_max_items(max_items: int):
|
|
389
|
+
"""
|
|
390
|
+
Update the maximum number of items in memory cache.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
max_items: Maximum number of cached items (e.g., 1000, 5000, 10000)
|
|
394
|
+
|
|
395
|
+
Examples:
|
|
396
|
+
set_cache_max_items(1000) # 1K items
|
|
397
|
+
set_cache_max_items(5000) # 5K items
|
|
398
|
+
set_cache_max_items(10000) # 10K items (default)
|
|
399
|
+
"""
|
|
400
|
+
cache = get_cache()
|
|
401
|
+
old_limit = cache.config.max_items
|
|
402
|
+
cache.config.max_items = max_items
|
|
403
|
+
|
|
404
|
+
# If reducing count, trigger eviction if needed
|
|
405
|
+
if max_items < old_limit:
|
|
406
|
+
with cache._lock:
|
|
407
|
+
while len(cache._memory_cache) > max_items:
|
|
408
|
+
if not cache._memory_cache:
|
|
409
|
+
break
|
|
410
|
+
# Remove oldest item
|
|
411
|
+
oldest_key = next(iter(cache._memory_cache))
|
|
412
|
+
old_entry = cache._memory_cache.pop(oldest_key)
|
|
413
|
+
cache._cache_stats['memory_size_bytes'] -= old_entry.get('size_bytes', 0)
|
|
414
|
+
|
|
415
|
+
print(f"Max cache items updated from {old_limit} to {max_items}")
|
|
416
|
+
|
|
417
|
+
def enable_disk_cache(cache_dir: Optional[str] = None):
|
|
418
|
+
"""
|
|
419
|
+
Enable persistent disk caching.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
cache_dir: Optional custom cache directory path
|
|
423
|
+
|
|
424
|
+
Examples:
|
|
425
|
+
enable_disk_cache() # Use default location
|
|
426
|
+
enable_disk_cache('/tmp/my_vfbquery_cache') # Custom location
|
|
427
|
+
"""
|
|
428
|
+
cache = get_cache()
|
|
429
|
+
cache.config.disk_cache_enabled = True
|
|
430
|
+
|
|
431
|
+
if cache_dir:
|
|
432
|
+
cache.config.disk_cache_dir = cache_dir
|
|
433
|
+
cache.cache_dir = Path(cache_dir)
|
|
434
|
+
cache.cache_dir.mkdir(exist_ok=True)
|
|
435
|
+
|
|
436
|
+
print(f"Disk caching enabled: {getattr(cache, 'cache_dir', 'default location')}")
|
|
437
|
+
|
|
438
|
+
def disable_disk_cache():
|
|
439
|
+
"""Disable persistent disk caching (memory cache only)."""
|
|
440
|
+
cache = get_cache()
|
|
441
|
+
cache.config.disk_cache_enabled = False
|
|
442
|
+
print("Disk caching disabled (memory cache only)")
|
|
443
|
+
|
|
444
|
+
def get_cache_config() -> Dict[str, Any]:
|
|
445
|
+
"""
|
|
446
|
+
Get current cache configuration settings.
|
|
447
|
+
|
|
448
|
+
Returns:
|
|
449
|
+
Dictionary with current cache configuration
|
|
450
|
+
"""
|
|
451
|
+
cache = get_cache()
|
|
452
|
+
config = cache.config
|
|
453
|
+
|
|
454
|
+
return {
|
|
455
|
+
'enabled': config.enabled,
|
|
456
|
+
'cache_ttl_hours': config.cache_ttl_hours,
|
|
457
|
+
'cache_ttl_days': config.cache_ttl_hours / 24,
|
|
458
|
+
'memory_cache_size_mb': config.memory_cache_size_mb,
|
|
459
|
+
'max_items': config.max_items,
|
|
460
|
+
'disk_cache_enabled': config.disk_cache_enabled,
|
|
461
|
+
'disk_cache_dir': config.disk_cache_dir,
|
|
462
|
+
'solr_cache_enabled': config.solr_cache_enabled,
|
|
463
|
+
'term_info_cache_enabled': config.term_info_cache_enabled,
|
|
464
|
+
'query_result_cache_enabled': config.query_result_cache_enabled
|
|
465
|
+
}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cached VFBquery Functions
|
|
3
|
+
|
|
4
|
+
Enhanced versions of VFBquery functions with integrated caching
|
|
5
|
+
inspired by VFB_connect optimizations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Dict, Any, Optional
|
|
9
|
+
from .cache_enhancements import cache_result, get_cache
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def is_valid_term_info_result(result):
|
|
13
|
+
"""Check if a term_info result has the essential fields and valid query structure"""
|
|
14
|
+
if not result or not isinstance(result, dict):
|
|
15
|
+
return False
|
|
16
|
+
|
|
17
|
+
# Check for essential fields
|
|
18
|
+
if not (result.get('Id') and result.get('Name')):
|
|
19
|
+
return False
|
|
20
|
+
|
|
21
|
+
# Additional validation for query results
|
|
22
|
+
if 'Queries' in result:
|
|
23
|
+
for query in result['Queries']:
|
|
24
|
+
# Check if query has invalid count (-1) which indicates failed execution
|
|
25
|
+
# Note: count=0 is valid if preview_results structure is correct
|
|
26
|
+
count = query.get('count', 0)
|
|
27
|
+
|
|
28
|
+
# Check if preview_results has the correct structure
|
|
29
|
+
preview_results = query.get('preview_results')
|
|
30
|
+
if not isinstance(preview_results, dict):
|
|
31
|
+
print(f"DEBUG: Invalid preview_results type {type(preview_results)} detected")
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
headers = preview_results.get('headers', [])
|
|
35
|
+
if not headers:
|
|
36
|
+
print(f"DEBUG: Empty headers detected in preview_results")
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
# Only reject if count is -1 (failed execution) or if count is 0 but preview_results is missing/empty
|
|
40
|
+
if count < 0:
|
|
41
|
+
print(f"DEBUG: Invalid query count {count} detected")
|
|
42
|
+
return False
|
|
43
|
+
|
|
44
|
+
return True
|
|
45
|
+
from .vfb_queries import (
|
|
46
|
+
get_term_info as _original_get_term_info,
|
|
47
|
+
get_instances as _original_get_instances,
|
|
48
|
+
vfb_solr,
|
|
49
|
+
term_info_parse_object as _original_term_info_parse_object,
|
|
50
|
+
fill_query_results as _original_fill_query_results
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@cache_result("solr_search", "solr_cache_enabled")
|
|
54
|
+
def cached_solr_search(query: str):
|
|
55
|
+
"""Cached version of SOLR search."""
|
|
56
|
+
return vfb_solr.search(query)
|
|
57
|
+
|
|
58
|
+
@cache_result("term_info_parse", "term_info_cache_enabled")
|
|
59
|
+
def cached_term_info_parse_object(results, short_form: str):
|
|
60
|
+
"""Cached version of term_info_parse_object."""
|
|
61
|
+
return _original_term_info_parse_object(results, short_form)
|
|
62
|
+
|
|
63
|
+
@cache_result("query_results", "query_result_cache_enabled")
|
|
64
|
+
def cached_fill_query_results(term_info: Dict[str, Any]):
|
|
65
|
+
"""Cached version of fill_query_results."""
|
|
66
|
+
return _original_fill_query_results(term_info)
|
|
67
|
+
|
|
68
|
+
@cache_result("get_instances", "query_result_cache_enabled")
|
|
69
|
+
def cached_get_instances(short_form: str, return_dataframe=True, limit: int = -1):
|
|
70
|
+
"""Cached version of get_instances."""
|
|
71
|
+
return _original_get_instances(short_form, return_dataframe, limit)
|
|
72
|
+
|
|
73
|
+
def get_term_info_cached(short_form: str, preview: bool = False):
|
|
74
|
+
"""
|
|
75
|
+
Enhanced get_term_info with multi-layer caching.
|
|
76
|
+
|
|
77
|
+
This version uses caching at multiple levels:
|
|
78
|
+
1. Final result caching (entire term_info response)
|
|
79
|
+
2. SOLR query result caching
|
|
80
|
+
3. Term info parsing caching
|
|
81
|
+
4. Query result caching
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
short_form: Term short form (e.g., 'FBbt_00003748')
|
|
85
|
+
preview: Whether to include preview results
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Term info dictionary or None if not found
|
|
89
|
+
"""
|
|
90
|
+
cache = get_cache()
|
|
91
|
+
|
|
92
|
+
# Check for complete result in cache first
|
|
93
|
+
cache_key = cache._generate_cache_key("term_info_complete", short_form, preview)
|
|
94
|
+
cached_result = cache.get(cache_key)
|
|
95
|
+
print(f"DEBUG: Cache lookup for {short_form}: {'HIT' if cached_result is not None else 'MISS'}")
|
|
96
|
+
if cached_result is not None:
|
|
97
|
+
# Validate that cached result has essential fields
|
|
98
|
+
if not is_valid_term_info_result(cached_result):
|
|
99
|
+
print(f"DEBUG: Cached result incomplete for {short_form}, falling back to original function")
|
|
100
|
+
print(f"DEBUG: cached_result keys: {list(cached_result.keys()) if cached_result else 'None'}")
|
|
101
|
+
print(f"DEBUG: cached_result Id: {cached_result.get('Id', 'MISSING') if cached_result else 'None'}")
|
|
102
|
+
print(f"DEBUG: cached_result Name: {cached_result.get('Name', 'MISSING') if cached_result else 'None'}")
|
|
103
|
+
|
|
104
|
+
# Fall back to original function and cache the complete result
|
|
105
|
+
fallback_result = _original_get_term_info(short_form, preview)
|
|
106
|
+
if is_valid_term_info_result(fallback_result):
|
|
107
|
+
print(f"DEBUG: Fallback successful, caching complete result for {short_form}")
|
|
108
|
+
cache.set(cache_key, fallback_result)
|
|
109
|
+
return fallback_result
|
|
110
|
+
else:
|
|
111
|
+
print(f"DEBUG: Using valid cached result for {short_form}")
|
|
112
|
+
return cached_result
|
|
113
|
+
|
|
114
|
+
parsed_object = None
|
|
115
|
+
try:
|
|
116
|
+
# Use cached SOLR search
|
|
117
|
+
results = cached_solr_search('id:' + short_form)
|
|
118
|
+
|
|
119
|
+
# Use cached term info parsing
|
|
120
|
+
parsed_object = cached_term_info_parse_object(results, short_form)
|
|
121
|
+
|
|
122
|
+
if parsed_object:
|
|
123
|
+
# Use cached query result filling (skip if queries would fail)
|
|
124
|
+
if parsed_object.get('Queries') and len(parsed_object['Queries']) > 0:
|
|
125
|
+
try:
|
|
126
|
+
term_info = cached_fill_query_results(parsed_object)
|
|
127
|
+
if term_info:
|
|
128
|
+
# Validate result before caching
|
|
129
|
+
if term_info.get('Id') and term_info.get('Name'):
|
|
130
|
+
# Cache the complete result
|
|
131
|
+
cache.set(cache_key, term_info)
|
|
132
|
+
return term_info
|
|
133
|
+
else:
|
|
134
|
+
print(f"Query result for {short_form} is incomplete, falling back to original function...")
|
|
135
|
+
return _original_get_term_info(short_form, preview)
|
|
136
|
+
else:
|
|
137
|
+
print("Failed to fill query preview results!")
|
|
138
|
+
# Validate result before caching
|
|
139
|
+
if parsed_object.get('Id') and parsed_object.get('Name'):
|
|
140
|
+
# Cache the complete result
|
|
141
|
+
cache.set(cache_key, parsed_object)
|
|
142
|
+
return parsed_object
|
|
143
|
+
else:
|
|
144
|
+
print(f"Parsed object for {short_form} is incomplete, falling back to original function...")
|
|
145
|
+
return _original_get_term_info(short_form, preview)
|
|
146
|
+
except Exception as e:
|
|
147
|
+
print(f"Error filling query results (continuing without query data): {e}")
|
|
148
|
+
# Validate result before caching
|
|
149
|
+
if is_valid_term_info_result(parsed_object):
|
|
150
|
+
cache.set(cache_key, parsed_object)
|
|
151
|
+
return parsed_object
|
|
152
|
+
else:
|
|
153
|
+
print(f"DEBUG: Exception case - parsed object incomplete for {short_form}, falling back to original function")
|
|
154
|
+
fallback_result = _original_get_term_info(short_form, preview)
|
|
155
|
+
if is_valid_term_info_result(fallback_result):
|
|
156
|
+
cache.set(cache_key, fallback_result)
|
|
157
|
+
return fallback_result
|
|
158
|
+
else:
|
|
159
|
+
# No queries to fill, validate result before caching
|
|
160
|
+
if parsed_object.get('Id') and parsed_object.get('Name'):
|
|
161
|
+
# Cache and return parsed object directly
|
|
162
|
+
cache.set(cache_key, parsed_object)
|
|
163
|
+
return parsed_object
|
|
164
|
+
else:
|
|
165
|
+
print(f"DEBUG: No queries case - parsed object incomplete for {short_form}, falling back to original function...")
|
|
166
|
+
fallback_result = _original_get_term_info(short_form, preview)
|
|
167
|
+
if is_valid_term_info_result(fallback_result):
|
|
168
|
+
cache.set(cache_key, fallback_result)
|
|
169
|
+
return fallback_result
|
|
170
|
+
else:
|
|
171
|
+
print(f"No valid term info found for ID '{short_form}'")
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
except Exception as e:
|
|
175
|
+
print(f"Error in cached get_term_info: {type(e).__name__}: {e}")
|
|
176
|
+
# Fall back to original function if caching fails
|
|
177
|
+
return _original_get_term_info(short_form, preview)
|
|
178
|
+
|
|
179
|
+
def get_instances_cached(short_form: str, return_dataframe=True, limit: int = -1):
|
|
180
|
+
"""
|
|
181
|
+
Enhanced get_instances with caching.
|
|
182
|
+
|
|
183
|
+
This cached version can provide dramatic speedup for repeated queries,
|
|
184
|
+
especially useful for:
|
|
185
|
+
- UI applications with repeated browsing
|
|
186
|
+
- Data analysis workflows
|
|
187
|
+
- Testing and development
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
short_form: Class short form
|
|
191
|
+
return_dataframe: Whether to return DataFrame or formatted dict
|
|
192
|
+
limit: Maximum number of results (-1 for all)
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Instances data (DataFrame or formatted dict based on return_dataframe)
|
|
196
|
+
"""
|
|
197
|
+
return cached_get_instances(short_form, return_dataframe, limit)
|
|
198
|
+
|
|
199
|
+
# Convenience function to replace original functions
|
|
200
|
+
def patch_vfbquery_with_caching():
|
|
201
|
+
"""
|
|
202
|
+
Replace original VFBquery functions with cached versions.
|
|
203
|
+
|
|
204
|
+
This allows existing code to benefit from caching without changes.
|
|
205
|
+
"""
|
|
206
|
+
import vfbquery.vfb_queries as vfb_queries
|
|
207
|
+
|
|
208
|
+
# Store original functions for fallback
|
|
209
|
+
setattr(vfb_queries, '_original_get_term_info', vfb_queries.get_term_info)
|
|
210
|
+
setattr(vfb_queries, '_original_get_instances', vfb_queries.get_instances)
|
|
211
|
+
|
|
212
|
+
# Replace with cached versions
|
|
213
|
+
vfb_queries.get_term_info = get_term_info_cached
|
|
214
|
+
vfb_queries.get_instances = get_instances_cached
|
|
215
|
+
|
|
216
|
+
print("VFBquery functions patched with caching support")
|
|
217
|
+
|
|
218
|
+
def unpatch_vfbquery_caching():
|
|
219
|
+
"""Restore original VFBquery functions."""
|
|
220
|
+
import vfbquery.vfb_queries as vfb_queries
|
|
221
|
+
|
|
222
|
+
if hasattr(vfb_queries, '_original_get_term_info'):
|
|
223
|
+
vfb_queries.get_term_info = getattr(vfb_queries, '_original_get_term_info')
|
|
224
|
+
if hasattr(vfb_queries, '_original_get_instances'):
|
|
225
|
+
vfb_queries.get_instances = getattr(vfb_queries, '_original_get_instances')
|
|
226
|
+
|
|
227
|
+
print("VFBquery functions restored to original (non-cached) versions")
|