vfbquery 0.3.4__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/readme_parser.py +29 -1
- test/term_info_queries_test.py +58 -0
- test/test_default_caching.py +173 -0
- test/test_examples_diff.py +6 -1
- vfbquery/__init__.py +83 -1
- vfbquery/cache_enhancements.py +465 -0
- vfbquery/cached_functions.py +227 -0
- vfbquery/solr_cache_integration.py +212 -0
- vfbquery/solr_fetcher.py +47 -3
- vfbquery/solr_result_cache.py +659 -0
- vfbquery/vfb_queries.py +315 -73
- {vfbquery-0.3.4.dist-info → vfbquery-0.4.1.dist-info}/METADATA +16 -16
- vfbquery-0.4.1.dist-info/RECORD +19 -0
- vfbquery-0.3.4.dist-info/RECORD +0 -14
- {vfbquery-0.3.4.dist-info → vfbquery-0.4.1.dist-info}/LICENSE +0 -0
- {vfbquery-0.3.4.dist-info → vfbquery-0.4.1.dist-info}/WHEEL +0 -0
- {vfbquery-0.3.4.dist-info → vfbquery-0.4.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Integration layer for SOLR-based result caching in VFBquery
|
|
3
|
+
|
|
4
|
+
This module patches existing VFBquery functions to use SOLR caching,
|
|
5
|
+
providing significant performance improvements for cold starts.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import functools
|
|
9
|
+
from typing import Any, Dict
|
|
10
|
+
from vfbquery.solr_result_cache import get_solr_cache, with_solr_cache
|
|
11
|
+
import vfbquery.vfb_queries as vfb_queries
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
class SolrCacheIntegration:
|
|
17
|
+
"""
|
|
18
|
+
Integration layer for SOLR caching in VFBquery
|
|
19
|
+
|
|
20
|
+
Provides methods to enable/disable SOLR caching for query functions
|
|
21
|
+
and fallback mechanisms in case SOLR cache is unavailable.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self):
|
|
25
|
+
self.original_functions = {}
|
|
26
|
+
self.cache_enabled = True
|
|
27
|
+
|
|
28
|
+
def enable_solr_caching(self):
|
|
29
|
+
"""Enable SOLR-based result caching for VFBquery functions"""
|
|
30
|
+
if not self.cache_enabled:
|
|
31
|
+
self._patch_functions()
|
|
32
|
+
self.cache_enabled = True
|
|
33
|
+
logger.info("SOLR result caching enabled")
|
|
34
|
+
|
|
35
|
+
def disable_solr_caching(self):
|
|
36
|
+
"""Disable SOLR caching and restore original functions"""
|
|
37
|
+
if self.cache_enabled:
|
|
38
|
+
self._unpatch_functions()
|
|
39
|
+
self.cache_enabled = False
|
|
40
|
+
logger.info("SOLR result caching disabled")
|
|
41
|
+
|
|
42
|
+
def _patch_functions(self):
|
|
43
|
+
"""Patch VFBquery functions with SOLR caching"""
|
|
44
|
+
# Store original functions
|
|
45
|
+
self.original_functions['get_term_info'] = vfb_queries.get_term_info
|
|
46
|
+
self.original_functions['get_instances'] = vfb_queries.get_instances
|
|
47
|
+
|
|
48
|
+
# Create cached versions
|
|
49
|
+
vfb_queries.get_term_info = self._create_cached_get_term_info()
|
|
50
|
+
vfb_queries.get_instances = self._create_cached_get_instances()
|
|
51
|
+
|
|
52
|
+
def _unpatch_functions(self):
|
|
53
|
+
"""Restore original functions"""
|
|
54
|
+
for func_name, original_func in self.original_functions.items():
|
|
55
|
+
setattr(vfb_queries, func_name, original_func)
|
|
56
|
+
self.original_functions.clear()
|
|
57
|
+
|
|
58
|
+
def _create_cached_get_term_info(self):
|
|
59
|
+
"""Create SOLR-cached version of get_term_info"""
|
|
60
|
+
original_func = self.original_functions['get_term_info']
|
|
61
|
+
|
|
62
|
+
@functools.wraps(original_func)
|
|
63
|
+
def cached_get_term_info(short_form: str, preview: bool = False):
|
|
64
|
+
cache = get_solr_cache()
|
|
65
|
+
cache_params = {"preview": preview}
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
# Try SOLR cache first
|
|
69
|
+
cached_result = cache.get_cached_result(
|
|
70
|
+
"term_info", short_form, **cache_params
|
|
71
|
+
)
|
|
72
|
+
if cached_result is not None:
|
|
73
|
+
logger.debug(f"SOLR cache hit for term_info({short_form})")
|
|
74
|
+
return cached_result
|
|
75
|
+
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logger.warning(f"SOLR cache lookup failed, falling back: {e}")
|
|
78
|
+
|
|
79
|
+
# Execute original function
|
|
80
|
+
logger.debug(f"SOLR cache miss for term_info({short_form}), computing...")
|
|
81
|
+
result = original_func(short_form, preview)
|
|
82
|
+
|
|
83
|
+
# Cache result asynchronously
|
|
84
|
+
if result:
|
|
85
|
+
try:
|
|
86
|
+
cache.cache_result("term_info", short_form, result, **cache_params)
|
|
87
|
+
logger.debug(f"Cached term_info result for {short_form}")
|
|
88
|
+
except Exception as e:
|
|
89
|
+
logger.debug(f"Failed to cache term_info result: {e}")
|
|
90
|
+
|
|
91
|
+
return result
|
|
92
|
+
|
|
93
|
+
return cached_get_term_info
|
|
94
|
+
|
|
95
|
+
def _create_cached_get_instances(self):
|
|
96
|
+
"""Create SOLR-cached version of get_instances"""
|
|
97
|
+
original_func = self.original_functions['get_instances']
|
|
98
|
+
|
|
99
|
+
@functools.wraps(original_func)
|
|
100
|
+
def cached_get_instances(short_form: str, return_dataframe=True, limit: int = -1):
|
|
101
|
+
cache = get_solr_cache()
|
|
102
|
+
cache_params = {
|
|
103
|
+
"return_dataframe": return_dataframe,
|
|
104
|
+
"limit": limit
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
# Try SOLR cache first
|
|
109
|
+
cached_result = cache.get_cached_result(
|
|
110
|
+
"instances", short_form, **cache_params
|
|
111
|
+
)
|
|
112
|
+
if cached_result is not None:
|
|
113
|
+
logger.debug(f"SOLR cache hit for get_instances({short_form})")
|
|
114
|
+
return cached_result
|
|
115
|
+
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.warning(f"SOLR cache lookup failed, falling back: {e}")
|
|
118
|
+
|
|
119
|
+
# Execute original function
|
|
120
|
+
logger.debug(f"SOLR cache miss for get_instances({short_form}), computing...")
|
|
121
|
+
result = original_func(short_form, return_dataframe, limit)
|
|
122
|
+
|
|
123
|
+
# Cache result asynchronously
|
|
124
|
+
if result is not None:
|
|
125
|
+
try:
|
|
126
|
+
cache.cache_result("instances", short_form, result, **cache_params)
|
|
127
|
+
logger.debug(f"Cached get_instances result for {short_form}")
|
|
128
|
+
except Exception as e:
|
|
129
|
+
logger.debug(f"Failed to cache get_instances result: {e}")
|
|
130
|
+
|
|
131
|
+
return result
|
|
132
|
+
|
|
133
|
+
return cached_get_instances
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# Global integration instance
|
|
137
|
+
_solr_integration = None
|
|
138
|
+
|
|
139
|
+
def get_solr_integration() -> SolrCacheIntegration:
|
|
140
|
+
"""Get global SOLR cache integration instance"""
|
|
141
|
+
global _solr_integration
|
|
142
|
+
if _solr_integration is None:
|
|
143
|
+
_solr_integration = SolrCacheIntegration()
|
|
144
|
+
return _solr_integration
|
|
145
|
+
|
|
146
|
+
def enable_solr_result_caching():
|
|
147
|
+
"""Enable SOLR-based result caching for VFBquery"""
|
|
148
|
+
integration = get_solr_integration()
|
|
149
|
+
integration.enable_solr_caching()
|
|
150
|
+
|
|
151
|
+
def disable_solr_result_caching():
|
|
152
|
+
"""Disable SOLR-based result caching"""
|
|
153
|
+
integration = get_solr_integration()
|
|
154
|
+
integration.disable_solr_caching()
|
|
155
|
+
|
|
156
|
+
def warmup_solr_cache(term_ids: list, query_types: list = ["term_info", "instances"]):
|
|
157
|
+
"""
|
|
158
|
+
Warm up SOLR cache by pre-computing results for common terms
|
|
159
|
+
|
|
160
|
+
This function can be run during deployment or maintenance windows
|
|
161
|
+
to pre-populate the cache with frequently requested terms.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
term_ids: List of term IDs to warm up
|
|
165
|
+
query_types: Types of queries to warm up ('term_info', 'instances')
|
|
166
|
+
"""
|
|
167
|
+
logger.info(f"Warming up SOLR cache for {len(term_ids)} terms")
|
|
168
|
+
|
|
169
|
+
# Temporarily enable SOLR caching if not already enabled
|
|
170
|
+
integration = get_solr_integration()
|
|
171
|
+
was_enabled = integration.cache_enabled
|
|
172
|
+
if not was_enabled:
|
|
173
|
+
integration.enable_solr_caching()
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
for term_id in term_ids:
|
|
177
|
+
for query_type in query_types:
|
|
178
|
+
try:
|
|
179
|
+
if query_type == "term_info":
|
|
180
|
+
vfb_queries.get_term_info(term_id)
|
|
181
|
+
elif query_type == "instances":
|
|
182
|
+
vfb_queries.get_instances(term_id, limit=100) # Reasonable limit for warmup
|
|
183
|
+
|
|
184
|
+
logger.debug(f"Warmed up {query_type} for {term_id}")
|
|
185
|
+
|
|
186
|
+
except Exception as e:
|
|
187
|
+
logger.warning(f"Failed to warm up {query_type} for {term_id}: {e}")
|
|
188
|
+
|
|
189
|
+
logger.info("SOLR cache warmup completed")
|
|
190
|
+
|
|
191
|
+
finally:
|
|
192
|
+
# Restore original state if we changed it
|
|
193
|
+
if not was_enabled:
|
|
194
|
+
integration.disable_solr_caching()
|
|
195
|
+
|
|
196
|
+
def get_solr_cache_stats() -> Dict[str, Any]:
|
|
197
|
+
"""Get SOLR cache statistics"""
|
|
198
|
+
try:
|
|
199
|
+
cache = get_solr_cache()
|
|
200
|
+
return cache.get_cache_stats()
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.error(f"Failed to get SOLR cache stats: {e}")
|
|
203
|
+
return {}
|
|
204
|
+
|
|
205
|
+
def cleanup_solr_cache() -> int:
|
|
206
|
+
"""Clean up expired entries in SOLR cache"""
|
|
207
|
+
try:
|
|
208
|
+
cache = get_solr_cache()
|
|
209
|
+
return cache.cleanup_expired_entries()
|
|
210
|
+
except Exception as e:
|
|
211
|
+
logger.error(f"Failed to cleanup SOLR cache: {e}")
|
|
212
|
+
return 0
|
vfbquery/solr_fetcher.py
CHANGED
|
@@ -2,7 +2,37 @@ import requests
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import pandas as pd
|
|
5
|
+
import sys
|
|
5
6
|
from typing import List, Dict, Any, Optional, Union
|
|
7
|
+
from unittest.mock import MagicMock
|
|
8
|
+
|
|
9
|
+
class GraphicsLibraryMocker:
|
|
10
|
+
"""Context manager to mock graphics libraries during vfb_connect import"""
|
|
11
|
+
|
|
12
|
+
def __init__(self):
|
|
13
|
+
self.mocked_modules = [
|
|
14
|
+
'vispy', 'vispy.scene', 'vispy.util', 'vispy.util.fonts',
|
|
15
|
+
'vispy.util.fonts._triage', 'vispy.util.fonts._quartz',
|
|
16
|
+
'vispy.ext', 'vispy.ext.cocoapy', 'navis.plotting',
|
|
17
|
+
'navis.plotting.vispy', 'navis.plotting.vispy.viewer'
|
|
18
|
+
]
|
|
19
|
+
self.original_modules = {}
|
|
20
|
+
|
|
21
|
+
def __enter__(self):
|
|
22
|
+
# Store original modules and mock graphics libraries
|
|
23
|
+
for module_name in self.mocked_modules:
|
|
24
|
+
if module_name in sys.modules:
|
|
25
|
+
self.original_modules[module_name] = sys.modules[module_name]
|
|
26
|
+
sys.modules[module_name] = MagicMock()
|
|
27
|
+
return self
|
|
28
|
+
|
|
29
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
30
|
+
# Restore original modules
|
|
31
|
+
for module_name in self.mocked_modules:
|
|
32
|
+
if module_name in self.original_modules:
|
|
33
|
+
sys.modules[module_name] = self.original_modules[module_name]
|
|
34
|
+
else:
|
|
35
|
+
sys.modules.pop(module_name, None)
|
|
6
36
|
|
|
7
37
|
class SolrTermInfoFetcher:
|
|
8
38
|
"""Fetches term information directly from the Solr server instead of using VfbConnect"""
|
|
@@ -12,19 +42,28 @@ class SolrTermInfoFetcher:
|
|
|
12
42
|
self.solr_url = solr_url
|
|
13
43
|
self.logger = logging.getLogger(__name__)
|
|
14
44
|
self._vfb = None # Lazy load vfb_connect
|
|
45
|
+
self._nc = None # Lazy load neo4j connection
|
|
15
46
|
|
|
16
47
|
@property
|
|
17
48
|
def vfb(self):
|
|
18
|
-
"""Lazy load vfb_connect
|
|
49
|
+
"""Lazy load vfb_connect with graphics libraries mocked"""
|
|
19
50
|
if self._vfb is None:
|
|
20
51
|
try:
|
|
21
|
-
|
|
22
|
-
|
|
52
|
+
with GraphicsLibraryMocker():
|
|
53
|
+
from vfb_connect import vfb
|
|
54
|
+
self._vfb = vfb
|
|
23
55
|
except ImportError as e:
|
|
24
56
|
self.logger.error(f"Could not import vfb_connect: {e}")
|
|
25
57
|
raise ImportError("vfb_connect is required but could not be imported")
|
|
26
58
|
return self._vfb
|
|
27
59
|
|
|
60
|
+
@property
|
|
61
|
+
def nc(self):
|
|
62
|
+
"""Lazy load Neo4j connection from vfb_connect"""
|
|
63
|
+
if self._nc is None:
|
|
64
|
+
self._nc = self.vfb.nc
|
|
65
|
+
return self._nc
|
|
66
|
+
|
|
28
67
|
def get_TermInfo(self, short_forms: List[str],
|
|
29
68
|
return_dataframe: bool = False,
|
|
30
69
|
summary: bool = False) -> Union[List[Dict[str, Any]], pd.DataFrame]:
|
|
@@ -95,6 +134,11 @@ class SolrTermInfoFetcher:
|
|
|
95
134
|
|
|
96
135
|
This allows us to use this class as a drop-in replacement for VfbConnect
|
|
97
136
|
while only implementing the methods we want to customize.
|
|
137
|
+
Special handling for 'nc' (Neo4j connection) to avoid graphics imports.
|
|
98
138
|
"""
|
|
139
|
+
# Handle Neo4j connection separately to use our mocked import
|
|
140
|
+
if name == 'nc':
|
|
141
|
+
return self.nc
|
|
142
|
+
|
|
99
143
|
self.logger.debug(f"Passing through method call: {name}")
|
|
100
144
|
return getattr(self.vfb, name)
|