vfbquery 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/readme_parser.py +29 -27
- test/term_info_queries_test.py +46 -34
- test/test_dataset_template_queries.py +138 -0
- test/test_default_caching.py +89 -84
- test/test_examples_code.py +7 -0
- test/test_examples_diff.py +95 -172
- test/test_expression_overlaps.py +183 -0
- test/test_expression_pattern_fragments.py +123 -0
- test/test_images_neurons.py +152 -0
- test/test_images_that_develop_from.py +112 -0
- test/test_lineage_clones_in.py +190 -0
- test/test_nblast_queries.py +124 -0
- test/test_neuron_classes_fasciculating.py +187 -0
- test/test_neuron_inputs.py +193 -0
- test/test_neuron_neuron_connectivity.py +89 -0
- test/test_neuron_region_connectivity.py +117 -0
- test/test_neurons_part_here.py +203 -0
- test/test_new_owlery_queries.py +282 -0
- test/test_publication_transgene_queries.py +101 -0
- test/test_query_performance.py +739 -0
- test/test_similar_morphology.py +177 -0
- test/test_tracts_nerves_innervating.py +188 -0
- test/test_transcriptomics.py +223 -0
- vfbquery/__init__.py +47 -35
- vfbquery/cached_functions.py +772 -131
- vfbquery/neo4j_client.py +120 -0
- vfbquery/owlery_client.py +463 -0
- vfbquery/solr_cache_integration.py +34 -30
- vfbquery/solr_fetcher.py +1 -1
- vfbquery/solr_result_cache.py +338 -36
- vfbquery/term_info_queries.py +1 -1
- vfbquery/vfb_queries.py +2969 -627
- vfbquery-0.5.1.dist-info/METADATA +2806 -0
- vfbquery-0.5.1.dist-info/RECORD +40 -0
- vfbquery-0.4.1.dist-info/METADATA +0 -1315
- vfbquery-0.4.1.dist-info/RECORD +0 -19
- {vfbquery-0.4.1.dist-info → vfbquery-0.5.1.dist-info}/LICENSE +0 -0
- {vfbquery-0.4.1.dist-info → vfbquery-0.5.1.dist-info}/WHEEL +0 -0
- {vfbquery-0.4.1.dist-info → vfbquery-0.5.1.dist-info}/top_level.txt +0 -0
vfbquery/solr_result_cache.py
CHANGED
|
@@ -14,10 +14,12 @@ import json
|
|
|
14
14
|
import requests
|
|
15
15
|
import hashlib
|
|
16
16
|
import time
|
|
17
|
+
import threading
|
|
17
18
|
from datetime import datetime, timedelta
|
|
18
19
|
from typing import Dict, Any, Optional, List
|
|
19
20
|
import logging
|
|
20
21
|
from dataclasses import dataclass, asdict
|
|
22
|
+
import pandas as pd
|
|
21
23
|
from vfbquery.term_info_queries import NumpyEncoder
|
|
22
24
|
|
|
23
25
|
logger = logging.getLogger(__name__)
|
|
@@ -59,7 +61,7 @@ class SolrResultCache:
|
|
|
59
61
|
self.max_result_size_mb = max_result_size_mb
|
|
60
62
|
self.max_result_size_bytes = max_result_size_mb * 1024 * 1024
|
|
61
63
|
|
|
62
|
-
def _create_cache_metadata(self, result: Any) -> Optional[Dict[str, Any]]:
|
|
64
|
+
def _create_cache_metadata(self, result: Any, **params) -> Optional[Dict[str, Any]]:
|
|
63
65
|
"""Create metadata for cached result with 3-month expiration"""
|
|
64
66
|
serialized_result = json.dumps(result, cls=NumpyEncoder)
|
|
65
67
|
result_size = len(serialized_result.encode('utf-8'))
|
|
@@ -77,6 +79,7 @@ class SolrResultCache:
|
|
|
77
79
|
"cached_at": now.isoformat(),
|
|
78
80
|
"expires_at": expires_at.isoformat(),
|
|
79
81
|
"result_size": result_size,
|
|
82
|
+
"params": params, # Store the parameters used for this query
|
|
80
83
|
"hit_count": 0,
|
|
81
84
|
"cache_version": "1.0", # For future compatibility
|
|
82
85
|
"ttl_hours": self.ttl_hours # Store TTL for debugging
|
|
@@ -95,11 +98,12 @@ class SolrResultCache:
|
|
|
95
98
|
Cached result or None if not found/expired
|
|
96
99
|
"""
|
|
97
100
|
try:
|
|
98
|
-
# Query for cache document with prefixed ID
|
|
99
|
-
|
|
101
|
+
# Query for cache document with prefixed ID including query type
|
|
102
|
+
# This ensures different query types for the same term have separate cache entries
|
|
103
|
+
cache_doc_id = f"vfb_query_{query_type}_{term_id}"
|
|
100
104
|
|
|
101
105
|
response = requests.get(f"{self.cache_url}/select", params={
|
|
102
|
-
"q": f"id:{cache_doc_id}
|
|
106
|
+
"q": f"id:{cache_doc_id}",
|
|
103
107
|
"fl": "cache_data",
|
|
104
108
|
"wt": "json"
|
|
105
109
|
}, timeout=5) # Short timeout for cache lookups
|
|
@@ -148,6 +152,33 @@ class SolrResultCache:
|
|
|
148
152
|
self._clear_expired_cache_document(cache_doc_id)
|
|
149
153
|
return None
|
|
150
154
|
|
|
155
|
+
# Check if cached result parameters are compatible with requested parameters
|
|
156
|
+
cached_params = cached_data.get("params", {})
|
|
157
|
+
requested_limit = params.get("limit", -1)
|
|
158
|
+
cached_limit = cached_params.get("limit", -1)
|
|
159
|
+
|
|
160
|
+
# Only cached full results (limit=-1) are stored
|
|
161
|
+
# If requesting limited results, we can slice from cached full results
|
|
162
|
+
if cached_limit != -1:
|
|
163
|
+
logger.debug(f"Cache miss: Unexpected cached result with limit={cached_limit}, expected -1")
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
# If requesting unlimited results, return the full cached result
|
|
167
|
+
if requested_limit == -1:
|
|
168
|
+
result = cached_data["result"]
|
|
169
|
+
else:
|
|
170
|
+
# If requesting limited results, slice from the cached full result
|
|
171
|
+
result = cached_data["result"]
|
|
172
|
+
if isinstance(result, (list, pd.DataFrame)):
|
|
173
|
+
if isinstance(result, list):
|
|
174
|
+
result = result[:requested_limit]
|
|
175
|
+
elif isinstance(result, pd.DataFrame):
|
|
176
|
+
result = result.head(requested_limit)
|
|
177
|
+
logger.debug(f"Cache hit: Returning {requested_limit} items from cached full result")
|
|
178
|
+
else:
|
|
179
|
+
# For other result types, return as-is (can't slice)
|
|
180
|
+
logger.debug(f"Cache hit: Returning full cached result (cannot slice type {type(result)})")
|
|
181
|
+
|
|
151
182
|
# Increment hit count asynchronously
|
|
152
183
|
self._increment_cache_hit_count(cache_doc_id, cached_data.get("hit_count", 0))
|
|
153
184
|
|
|
@@ -161,6 +192,14 @@ class SolrResultCache:
|
|
|
161
192
|
logger.warning(f"Failed to parse cached result for {term_id}")
|
|
162
193
|
return None
|
|
163
194
|
|
|
195
|
+
# IMPORTANT: Validate cached result - reject error results (count=-1)
|
|
196
|
+
# This ensures old cached errors get retried when the service is working again
|
|
197
|
+
if isinstance(result, dict) and 'count' in result:
|
|
198
|
+
if result.get('count', -1) < 0:
|
|
199
|
+
logger.warning(f"Rejecting cached error result for {query_type}({term_id}): count={result.get('count')}")
|
|
200
|
+
self._clear_expired_cache_document(cache_doc_id)
|
|
201
|
+
return None
|
|
202
|
+
|
|
164
203
|
logger.info(f"Cache hit for {query_type}({term_id})")
|
|
165
204
|
return result
|
|
166
205
|
|
|
@@ -190,12 +229,13 @@ class SolrResultCache:
|
|
|
190
229
|
|
|
191
230
|
try:
|
|
192
231
|
# Create cached metadata and result
|
|
193
|
-
cached_data = self._create_cache_metadata(result)
|
|
232
|
+
cached_data = self._create_cache_metadata(result, **params)
|
|
194
233
|
if not cached_data:
|
|
195
234
|
return False # Result too large or other issue
|
|
196
235
|
|
|
197
|
-
# Create cache document with prefixed ID
|
|
198
|
-
|
|
236
|
+
# Create cache document with prefixed ID including query type
|
|
237
|
+
# This ensures different query types for the same term have separate cache entries
|
|
238
|
+
cache_doc_id = f"vfb_query_{query_type}_{term_id}"
|
|
199
239
|
|
|
200
240
|
cache_doc = {
|
|
201
241
|
"id": cache_doc_id,
|
|
@@ -252,7 +292,8 @@ class SolrResultCache:
|
|
|
252
292
|
True if successfully cleared, False otherwise
|
|
253
293
|
"""
|
|
254
294
|
try:
|
|
255
|
-
|
|
295
|
+
# Include query_type in cache document ID to match storage format
|
|
296
|
+
cache_doc_id = f"vfb_query_{query_type}_{term_id}"
|
|
256
297
|
response = requests.post(
|
|
257
298
|
f"{self.cache_url}/update",
|
|
258
299
|
data=f'<delete><id>{cache_doc_id}</id></delete>',
|
|
@@ -299,10 +340,11 @@ class SolrResultCache:
|
|
|
299
340
|
Dictionary with cache age info or None if not cached
|
|
300
341
|
"""
|
|
301
342
|
try:
|
|
302
|
-
|
|
343
|
+
# Include query_type in cache document ID to match storage format
|
|
344
|
+
cache_doc_id = f"vfb_query_{query_type}_{term_id}"
|
|
303
345
|
|
|
304
346
|
response = requests.get(f"{self.cache_url}/select", params={
|
|
305
|
-
"q": f"id:{cache_doc_id}
|
|
347
|
+
"q": f"id:{cache_doc_id}",
|
|
306
348
|
"fl": "cache_data,hit_count,last_accessed",
|
|
307
349
|
"wt": "json"
|
|
308
350
|
}, timeout=5)
|
|
@@ -573,6 +615,25 @@ def with_solr_cache(query_type: str):
|
|
|
573
615
|
# Check if force_refresh is requested (pop it before passing to function)
|
|
574
616
|
force_refresh = kwargs.pop('force_refresh', False)
|
|
575
617
|
|
|
618
|
+
# Check if limit is applied - only cache full results (limit=-1)
|
|
619
|
+
limit = kwargs.get('limit', -1)
|
|
620
|
+
should_cache = (limit == -1) # Only cache when getting all results (limit=-1)
|
|
621
|
+
|
|
622
|
+
# For expensive queries, we still only cache full results, but we handle limited requests
|
|
623
|
+
# by slicing from cached full results
|
|
624
|
+
expensive_query_types = ['similar_neurons', 'similar_morphology', 'similar_morphology_part_of',
|
|
625
|
+
'similar_morphology_part_of_exp', 'similar_morphology_nb',
|
|
626
|
+
'similar_morphology_nb_exp', 'similar_morphology_userdata',
|
|
627
|
+
'neurons_part_here', 'neurons_synaptic',
|
|
628
|
+
'neurons_presynaptic', 'neurons_postsynaptic']
|
|
629
|
+
# Note: expensive queries still only cache full results, but retrieval logic handles slicing
|
|
630
|
+
|
|
631
|
+
# For neuron_neuron_connectivity_query, only cache when all parameters are defaults
|
|
632
|
+
if query_type == 'neuron_neuron_connectivity_query':
|
|
633
|
+
min_weight = kwargs.get('min_weight', 0)
|
|
634
|
+
direction = kwargs.get('direction', 'both')
|
|
635
|
+
should_cache = should_cache and (min_weight == 0) and (direction == 'both')
|
|
636
|
+
|
|
576
637
|
# Extract term_id from first argument or kwargs
|
|
577
638
|
term_id = args[0] if args else kwargs.get('short_form') or kwargs.get('term_id')
|
|
578
639
|
|
|
@@ -585,36 +646,121 @@ def with_solr_cache(query_type: str):
|
|
|
585
646
|
logger.warning(f"No term_id found for caching {query_type}")
|
|
586
647
|
return func(*args, **kwargs)
|
|
587
648
|
|
|
649
|
+
# Include preview parameter in cache key for term_info queries
|
|
650
|
+
# This ensures preview=True and preview=False have separate cache entries
|
|
651
|
+
cache_term_id = term_id
|
|
652
|
+
if query_type == 'term_info':
|
|
653
|
+
preview = kwargs.get('preview', True) # Default is True
|
|
654
|
+
cache_term_id = f"{term_id}_preview_{preview}"
|
|
655
|
+
|
|
656
|
+
# Include return_dataframe parameter in cache key for queries that support it
|
|
657
|
+
# This ensures DataFrame and dict results are cached separately
|
|
658
|
+
dataframe_query_types = ['neurons_part_here', 'neurons_synaptic', 'neurons_presynaptic',
|
|
659
|
+
'neurons_postsynaptic', 'similar_neurons', 'similar_morphology',
|
|
660
|
+
'similar_morphology_part_of', 'similar_morphology_part_of_exp',
|
|
661
|
+
'similar_morphology_nb', 'similar_morphology_nb_exp',
|
|
662
|
+
'similar_morphology_userdata', 'neurons_part_here', 'neurons_synaptic',
|
|
663
|
+
'neurons_presynaptic', 'neurons_postsynaptic']
|
|
664
|
+
if query_type in dataframe_query_types:
|
|
665
|
+
return_dataframe = kwargs.get('return_dataframe', True) # Default is True
|
|
666
|
+
cache_term_id = f"{cache_term_id}_dataframe_{return_dataframe}"
|
|
667
|
+
|
|
588
668
|
cache = get_solr_cache()
|
|
589
669
|
|
|
590
670
|
# Clear cache if force_refresh is True
|
|
591
671
|
if force_refresh:
|
|
592
672
|
logger.info(f"Force refresh requested for {query_type}({term_id})")
|
|
593
|
-
cache.clear_cache_entry(query_type,
|
|
673
|
+
cache.clear_cache_entry(query_type, cache_term_id)
|
|
594
674
|
|
|
595
675
|
# Try cache first (will be empty if force_refresh was True)
|
|
676
|
+
# OPTIMIZATION: Always try to get full cached results first, then slice if needed
|
|
677
|
+
cached_result = None
|
|
596
678
|
if not force_refresh:
|
|
597
|
-
|
|
679
|
+
# print(f"DEBUG: Checking cache for {query_type}, term_id={term_id}, cache_term_id={cache_term_id}, should_cache={should_cache}")
|
|
680
|
+
# Try to get cached full result (limit=-1)
|
|
681
|
+
full_params = kwargs.copy()
|
|
682
|
+
full_params['limit'] = -1
|
|
683
|
+
# print(f"DEBUG: Attempting cache lookup for {query_type}({cache_term_id}) with full results")
|
|
684
|
+
cached_result = cache.get_cached_result(query_type, cache_term_id, **full_params)
|
|
685
|
+
# print(f"DEBUG: Cache lookup result: {cached_result is not None}")
|
|
686
|
+
|
|
687
|
+
# If we got a cached full result but need limited results, slice it
|
|
688
|
+
if cached_result is not None and limit != -1:
|
|
689
|
+
if isinstance(cached_result, (list, pd.DataFrame)):
|
|
690
|
+
if isinstance(cached_result, list):
|
|
691
|
+
cached_result = cached_result[:limit]
|
|
692
|
+
elif isinstance(cached_result, pd.DataFrame):
|
|
693
|
+
cached_result = cached_result.head(limit)
|
|
694
|
+
# print(f"DEBUG: Sliced cached result to {limit} items")
|
|
695
|
+
elif isinstance(cached_result, dict):
|
|
696
|
+
# Handle dict results with 'rows' (e.g., get_instances)
|
|
697
|
+
if 'rows' in cached_result:
|
|
698
|
+
cached_result = {
|
|
699
|
+
'headers': cached_result.get('headers', {}),
|
|
700
|
+
'rows': cached_result['rows'][:limit],
|
|
701
|
+
'count': cached_result.get('count', len(cached_result.get('rows', [])))
|
|
702
|
+
}
|
|
703
|
+
# print(f"DEBUG: Sliced cached dict result to {limit} rows")
|
|
704
|
+
# Handle term_info dict with 'queries'
|
|
705
|
+
elif 'queries' in cached_result:
|
|
706
|
+
for query in cached_result.get('queries', []):
|
|
707
|
+
if 'preview_results' in query and 'rows' in query['preview_results']:
|
|
708
|
+
query['preview_results']['rows'] = query['preview_results']['rows'][:limit]
|
|
709
|
+
# Keep original count - don't change it to limit
|
|
710
|
+
# print(f"DEBUG: Sliced cached term_info result to {limit} rows per query")
|
|
711
|
+
else:
|
|
712
|
+
# print(f"DEBUG: Cannot slice cached dict result (no 'rows' or 'queries'), returning full result")
|
|
713
|
+
pass
|
|
714
|
+
else:
|
|
715
|
+
# print(f"DEBUG: Cannot slice cached result of type {type(cached_result)}, returning full result")
|
|
716
|
+
pass
|
|
717
|
+
else:
|
|
718
|
+
# For limited queries, try to get full cached results instead
|
|
719
|
+
full_kwargs = kwargs.copy()
|
|
720
|
+
full_kwargs['limit'] = -1 # Get full results
|
|
721
|
+
cached_result = cache.get_cached_result(query_type, cache_term_id, **full_kwargs)
|
|
722
|
+
|
|
723
|
+
# If we got full cached results, extract the limited portion
|
|
724
|
+
if cached_result is not None and limit > 0:
|
|
725
|
+
logger.debug(f"Extracting first {limit} rows from cached full results for {term_id}")
|
|
726
|
+
|
|
727
|
+
# Extract limited rows based on result type
|
|
728
|
+
if isinstance(cached_result, dict) and 'rows' in cached_result:
|
|
729
|
+
cached_result = {
|
|
730
|
+
'headers': cached_result.get('headers', {}),
|
|
731
|
+
'rows': cached_result['rows'][:limit],
|
|
732
|
+
'count': cached_result.get('count', len(cached_result.get('rows', [])))
|
|
733
|
+
}
|
|
734
|
+
elif isinstance(cached_result, pd.DataFrame):
|
|
735
|
+
# Keep the full count but limit the rows
|
|
736
|
+
original_count = len(cached_result)
|
|
737
|
+
cached_result = cached_result.head(limit)
|
|
738
|
+
# Add count attribute if possible
|
|
739
|
+
if hasattr(cached_result, '_metadata'):
|
|
740
|
+
cached_result._metadata['count'] = original_count
|
|
741
|
+
|
|
598
742
|
if cached_result is not None:
|
|
599
743
|
# Validate that cached result has essential fields for term_info
|
|
600
744
|
if query_type == 'term_info':
|
|
601
745
|
is_valid = (cached_result and isinstance(cached_result, dict) and
|
|
602
746
|
cached_result.get('Id') and cached_result.get('Name'))
|
|
603
747
|
|
|
604
|
-
# Additional validation for query results
|
|
605
|
-
|
|
748
|
+
# Additional validation for query results - only when preview=True
|
|
749
|
+
preview = kwargs.get('preview', True) # Default is True
|
|
750
|
+
if is_valid and preview and 'Queries' in cached_result:
|
|
606
751
|
logger.debug(f"Validating {len(cached_result['Queries'])} queries for {term_id}")
|
|
607
752
|
for i, query in enumerate(cached_result['Queries']):
|
|
608
|
-
count = query.get('count',
|
|
753
|
+
count = query.get('count', -1) # Default to -1 if missing
|
|
609
754
|
preview_results = query.get('preview_results')
|
|
610
755
|
headers = preview_results.get('headers', []) if isinstance(preview_results, dict) else []
|
|
611
756
|
|
|
612
757
|
logger.debug(f"Query {i}: count={count}, preview_results_type={type(preview_results)}, headers={headers}")
|
|
613
758
|
|
|
614
|
-
# Check if query has
|
|
615
|
-
|
|
759
|
+
# Check if query has error count (-1) which indicates failed execution
|
|
760
|
+
# Note: count of 0 is valid - it means "no matches found"
|
|
761
|
+
if count < 0:
|
|
616
762
|
is_valid = False
|
|
617
|
-
logger.debug(f"Cached result has
|
|
763
|
+
logger.debug(f"Cached result has error query count {count} for {term_id}")
|
|
618
764
|
break
|
|
619
765
|
# Check if preview_results is missing or has empty headers when it should have data
|
|
620
766
|
if not isinstance(preview_results, dict) or not headers:
|
|
@@ -631,27 +777,183 @@ def with_solr_cache(query_type: str):
|
|
|
631
777
|
else:
|
|
632
778
|
return cached_result
|
|
633
779
|
|
|
634
|
-
# Execute function
|
|
635
|
-
result =
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
780
|
+
# Execute function - for expensive queries, get quick results first, then cache full results in background
|
|
781
|
+
result = None
|
|
782
|
+
if query_type in expensive_query_types:
|
|
783
|
+
# For expensive queries: execute with original parameters for quick return, cache full results in background
|
|
784
|
+
# print(f"DEBUG: Executing {query_type} with original parameters for quick return")
|
|
785
|
+
result = func(*args, **kwargs)
|
|
786
|
+
|
|
787
|
+
# Start background thread to get full results and cache them
|
|
788
|
+
def cache_full_results_background():
|
|
789
|
+
try:
|
|
790
|
+
# Check if function supports limit parameter
|
|
791
|
+
import inspect
|
|
792
|
+
if 'limit' in inspect.signature(func).parameters:
|
|
793
|
+
full_kwargs = kwargs.copy()
|
|
794
|
+
full_kwargs['limit'] = -1
|
|
795
|
+
# print(f"DEBUG: Background: Executing {query_type} with full results for caching")
|
|
796
|
+
full_result = func(*args, **full_kwargs)
|
|
797
|
+
|
|
798
|
+
# Validate and cache the full result
|
|
799
|
+
if full_result is not None:
|
|
800
|
+
result_is_valid = False
|
|
801
|
+
if hasattr(full_result, 'empty'): # DataFrame
|
|
802
|
+
result_is_valid = not full_result.empty
|
|
803
|
+
elif isinstance(full_result, dict):
|
|
804
|
+
if 'count' in full_result:
|
|
805
|
+
count_value = full_result.get('count', -1)
|
|
806
|
+
result_is_valid = count_value >= 0
|
|
807
|
+
else:
|
|
808
|
+
result_is_valid = bool(full_result)
|
|
809
|
+
elif isinstance(full_result, (list, str)):
|
|
810
|
+
result_is_valid = len(full_result) > 0
|
|
811
|
+
else:
|
|
812
|
+
result_is_valid = True
|
|
813
|
+
|
|
814
|
+
if result_is_valid:
|
|
815
|
+
# Special validation for term_info
|
|
816
|
+
if query_type == 'term_info':
|
|
817
|
+
is_complete = (full_result and isinstance(full_result, dict) and
|
|
818
|
+
full_result.get('Id') and full_result.get('Name'))
|
|
819
|
+
if is_complete:
|
|
820
|
+
try:
|
|
821
|
+
full_kwargs_for_cache = kwargs.copy()
|
|
822
|
+
full_kwargs_for_cache['limit'] = -1
|
|
823
|
+
cache.cache_result(query_type, cache_term_id, full_result, **full_kwargs_for_cache)
|
|
824
|
+
logger.debug(f"Background cached complete full result for {term_id}")
|
|
825
|
+
except Exception as e:
|
|
826
|
+
logger.debug(f"Background caching failed: {e}")
|
|
827
|
+
else:
|
|
828
|
+
try:
|
|
829
|
+
full_kwargs_for_cache = kwargs.copy()
|
|
830
|
+
full_kwargs_for_cache['limit'] = -1
|
|
831
|
+
cache.cache_result(query_type, cache_term_id, full_result, **full_kwargs_for_cache)
|
|
832
|
+
logger.debug(f"Background cached full result for {term_id}")
|
|
833
|
+
except Exception as e:
|
|
834
|
+
logger.debug(f"Background caching failed: {e}")
|
|
835
|
+
except Exception as e:
|
|
836
|
+
logger.debug(f"Background caching thread failed: {e}")
|
|
837
|
+
|
|
838
|
+
# Start background caching thread
|
|
839
|
+
background_thread = threading.Thread(target=cache_full_results_background, daemon=True)
|
|
840
|
+
background_thread.start()
|
|
841
|
+
# print(f"DEBUG: Started background caching thread for {query_type}({term_id})")
|
|
842
|
+
else:
|
|
843
|
+
# For non-expensive queries: use original caching logic
|
|
844
|
+
full_result = None
|
|
845
|
+
if should_cache:
|
|
846
|
+
# Execute with limit=-1 to get full results for caching (only for functions that support limit)
|
|
847
|
+
full_kwargs = kwargs.copy()
|
|
848
|
+
import inspect
|
|
849
|
+
if 'limit' in inspect.signature(func).parameters:
|
|
850
|
+
full_kwargs['limit'] = -1
|
|
851
|
+
# print(f"DEBUG: Executing {query_type} with full results for caching")
|
|
852
|
+
full_result = func(*args, **full_kwargs)
|
|
853
|
+
result = full_result
|
|
854
|
+
|
|
855
|
+
# If the original request was limited, slice the result for return
|
|
856
|
+
if limit != -1 and result is not None:
|
|
857
|
+
if isinstance(result, (list, pd.DataFrame)):
|
|
858
|
+
if isinstance(result, list):
|
|
859
|
+
result = result[:limit]
|
|
860
|
+
elif isinstance(result, pd.DataFrame):
|
|
861
|
+
result = result.head(limit)
|
|
862
|
+
# print(f"DEBUG: Sliced result to {limit} items for return")
|
|
650
863
|
else:
|
|
864
|
+
# Execute with original parameters (no caching)
|
|
865
|
+
result = func(*args, **kwargs)
|
|
866
|
+
full_result = result
|
|
867
|
+
|
|
868
|
+
# Cache the result - skip for expensive queries as they use background caching
|
|
869
|
+
if query_type not in expensive_query_types:
|
|
870
|
+
# Handle DataFrame, dict, and other result types properly
|
|
871
|
+
result_is_valid = False
|
|
872
|
+
result_is_error = False # Track if result is an error that should clear cache
|
|
873
|
+
|
|
874
|
+
if result is not None:
|
|
875
|
+
if hasattr(result, 'empty'): # DataFrame
|
|
876
|
+
result_is_valid = not result.empty
|
|
877
|
+
elif isinstance(result, dict):
|
|
878
|
+
# For dict results, check if it's not an error result (count != -1)
|
|
879
|
+
# Error results should not be cached
|
|
880
|
+
if 'count' in result:
|
|
881
|
+
count_value = result.get('count', -1)
|
|
882
|
+
result_is_valid = count_value >= 0 # Don't cache errors (count=-1)
|
|
883
|
+
result_is_error = count_value < 0 # Mark as error if count is negative
|
|
884
|
+
else:
|
|
885
|
+
result_is_valid = bool(result) # For dicts without count field
|
|
886
|
+
elif isinstance(result, (list, str)):
|
|
887
|
+
result_is_valid = len(result) > 0
|
|
888
|
+
else:
|
|
889
|
+
result_is_valid = True
|
|
890
|
+
|
|
891
|
+
# If result is an error, actively clear any existing cache entry
|
|
892
|
+
# This ensures that transient failures don't get stuck in cache
|
|
893
|
+
if result_is_error:
|
|
894
|
+
logger.warning(f"Query returned error result for {query_type}({term_id}), clearing cache entry")
|
|
651
895
|
try:
|
|
652
|
-
cache.
|
|
896
|
+
cache.clear_cache_entry(query_type, cache_term_id)
|
|
653
897
|
except Exception as e:
|
|
654
|
-
logger.debug(f"Failed to cache
|
|
898
|
+
logger.debug(f"Failed to clear cache entry: {e}")
|
|
899
|
+
|
|
900
|
+
if result_is_valid:
|
|
901
|
+
# Validate result before caching for term_info
|
|
902
|
+
if query_type == 'term_info':
|
|
903
|
+
# Basic validation: must have Id and Name
|
|
904
|
+
is_complete = (result and isinstance(result, dict) and
|
|
905
|
+
result.get('Id') and result.get('Name'))
|
|
906
|
+
|
|
907
|
+
# Additional validation when preview=True: check if queries have results
|
|
908
|
+
# We allow caching even if some queries failed (count=-1) as long as the core term_info is valid
|
|
909
|
+
# This is because some query functions may not be implemented yet or may legitimately fail
|
|
910
|
+
if is_complete:
|
|
911
|
+
preview = kwargs.get('preview', True)
|
|
912
|
+
if preview and 'Queries' in result and result['Queries']:
|
|
913
|
+
# Count how many queries have valid results vs errors
|
|
914
|
+
valid_queries = 0
|
|
915
|
+
failed_queries = 0
|
|
916
|
+
|
|
917
|
+
for query in result['Queries']:
|
|
918
|
+
count = query.get('count', -1)
|
|
919
|
+
preview_results = query.get('preview_results')
|
|
920
|
+
|
|
921
|
+
# Count queries with valid results (count >= 0)
|
|
922
|
+
if count >= 0 and isinstance(preview_results, dict):
|
|
923
|
+
valid_queries += 1
|
|
924
|
+
else:
|
|
925
|
+
failed_queries += 1
|
|
926
|
+
|
|
927
|
+
# Only reject if ALL queries failed - at least one must succeed
|
|
928
|
+
if valid_queries == 0 and failed_queries > 0:
|
|
929
|
+
is_complete = False
|
|
930
|
+
logger.warning(f"Not caching result for {term_id}: all {failed_queries} queries failed")
|
|
931
|
+
elif failed_queries > 0:
|
|
932
|
+
logger.debug(f"Caching result for {term_id} with {valid_queries} valid queries ({failed_queries} failed)")
|
|
933
|
+
|
|
934
|
+
# Only cache if result is complete AND no limit was applied
|
|
935
|
+
if is_complete and should_cache:
|
|
936
|
+
try:
|
|
937
|
+
# Cache the full result with full parameters (limit=-1)
|
|
938
|
+
full_kwargs_for_cache = kwargs.copy()
|
|
939
|
+
full_kwargs_for_cache['limit'] = -1
|
|
940
|
+
cache.cache_result(query_type, cache_term_id, full_result, **full_kwargs_for_cache)
|
|
941
|
+
logger.debug(f"Cached complete full result for {term_id}")
|
|
942
|
+
except Exception as e:
|
|
943
|
+
logger.debug(f"Failed to cache result: {e}")
|
|
944
|
+
elif not should_cache:
|
|
945
|
+
logger.debug(f"Not caching limited result for {term_id} (limit={limit})")
|
|
946
|
+
else:
|
|
947
|
+
logger.warning(f"Not caching incomplete result for {term_id}")
|
|
948
|
+
else:
|
|
949
|
+
# Only cache if no limit was applied
|
|
950
|
+
if should_cache:
|
|
951
|
+
try:
|
|
952
|
+
cache.cache_result(query_type, cache_term_id, result, **kwargs)
|
|
953
|
+
except Exception as e:
|
|
954
|
+
logger.debug(f"Failed to cache result: {e}")
|
|
955
|
+
else:
|
|
956
|
+
logger.debug(f"Not caching limited result for {term_id} (limit={limit}))")
|
|
655
957
|
|
|
656
958
|
return result
|
|
657
959
|
|
vfbquery/term_info_queries.py
CHANGED
|
@@ -745,7 +745,7 @@ def get_link(text: str, link: str) -> str:
|
|
|
745
745
|
|
|
746
746
|
|
|
747
747
|
def get_secure_url(url: str, allow_redirects: bool = True, timeout=15) -> str:
|
|
748
|
-
secure_url = url.replace("http://", "
|
|
748
|
+
secure_url = url.replace("http://", "https://")
|
|
749
749
|
if check_url_exist(secure_url, allow_redirects, timeout):
|
|
750
750
|
return secure_url
|
|
751
751
|
return url
|