vfbquery 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/readme_parser.py +29 -33
- test/term_info_queries_test.py +42 -30
- test/test_default_caching.py +86 -85
- test/test_examples_code.py +7 -0
- test/test_examples_diff.py +95 -172
- test/test_neurons_part_here.py +12 -13
- test/test_query_performance.py +3 -7
- vfbquery/__init__.py +47 -35
- vfbquery/cached_functions.py +772 -131
- vfbquery/owlery_client.py +1 -1
- vfbquery/solr_cache_integration.py +34 -30
- vfbquery/solr_result_cache.py +262 -99
- vfbquery/term_info_queries.py +1 -1
- vfbquery/vfb_queries.py +38 -7
- vfbquery-0.5.1.dist-info/METADATA +2806 -0
- {vfbquery-0.5.0.dist-info → vfbquery-0.5.1.dist-info}/RECORD +19 -18
- vfbquery-0.5.0.dist-info/METADATA +0 -2273
- {vfbquery-0.5.0.dist-info → vfbquery-0.5.1.dist-info}/LICENSE +0 -0
- {vfbquery-0.5.0.dist-info → vfbquery-0.5.1.dist-info}/WHEEL +0 -0
- {vfbquery-0.5.0.dist-info → vfbquery-0.5.1.dist-info}/top_level.txt +0 -0
vfbquery/owlery_client.py
CHANGED
|
@@ -372,7 +372,7 @@ class SimpleVFBConnect:
|
|
|
372
372
|
# Try to initialize - this will fail if Neo4j server unreachable
|
|
373
373
|
self._nc = Neo4jConnect()
|
|
374
374
|
self._nc_available = True
|
|
375
|
-
print("✅ Neo4j connection established")
|
|
375
|
+
# print("✅ Neo4j connection established")
|
|
376
376
|
except Exception as e:
|
|
377
377
|
# Fall back to mock client
|
|
378
378
|
self._nc = MockNeo4jClient()
|
|
@@ -60,28 +60,30 @@ class SolrCacheIntegration:
|
|
|
60
60
|
original_func = self.original_functions['get_term_info']
|
|
61
61
|
|
|
62
62
|
@functools.wraps(original_func)
|
|
63
|
-
def cached_get_term_info(short_form: str, preview: bool = False):
|
|
63
|
+
def cached_get_term_info(short_form: str, preview: bool = False, **kwargs):
|
|
64
|
+
force_refresh = kwargs.get('force_refresh', False)
|
|
64
65
|
cache = get_solr_cache()
|
|
65
66
|
cache_params = {"preview": preview}
|
|
66
67
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
68
|
+
if not force_refresh:
|
|
69
|
+
try:
|
|
70
|
+
# Try SOLR cache first
|
|
71
|
+
cached_result = cache.get_cached_result(
|
|
72
|
+
"term_info", short_form, **cache_params
|
|
73
|
+
)
|
|
74
|
+
if cached_result is not None:
|
|
75
|
+
logger.debug(f"SOLR cache hit for term_info({short_form})")
|
|
76
|
+
return cached_result
|
|
77
|
+
|
|
78
|
+
except Exception as e:
|
|
79
|
+
logger.warning(f"SOLR cache lookup failed, falling back: {e}")
|
|
78
80
|
|
|
79
81
|
# Execute original function
|
|
80
|
-
logger.debug(f"SOLR cache miss for term_info({short_form}), computing...")
|
|
82
|
+
logger.debug(f"SOLR cache miss or force_refresh for term_info({short_form}), computing...")
|
|
81
83
|
result = original_func(short_form, preview)
|
|
82
84
|
|
|
83
|
-
# Cache result asynchronously
|
|
84
|
-
if result:
|
|
85
|
+
# Cache result asynchronously if not force_refresh
|
|
86
|
+
if result and not force_refresh:
|
|
85
87
|
try:
|
|
86
88
|
cache.cache_result("term_info", short_form, result, **cache_params)
|
|
87
89
|
logger.debug(f"Cached term_info result for {short_form}")
|
|
@@ -97,31 +99,33 @@ class SolrCacheIntegration:
|
|
|
97
99
|
original_func = self.original_functions['get_instances']
|
|
98
100
|
|
|
99
101
|
@functools.wraps(original_func)
|
|
100
|
-
def cached_get_instances(short_form: str, return_dataframe=True, limit: int = -1):
|
|
102
|
+
def cached_get_instances(short_form: str, return_dataframe=True, limit: int = -1, **kwargs):
|
|
103
|
+
force_refresh = kwargs.get('force_refresh', False)
|
|
101
104
|
cache = get_solr_cache()
|
|
102
105
|
cache_params = {
|
|
103
106
|
"return_dataframe": return_dataframe,
|
|
104
107
|
"limit": limit
|
|
105
108
|
}
|
|
106
109
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
110
|
+
if not force_refresh:
|
|
111
|
+
try:
|
|
112
|
+
# Try SOLR cache first
|
|
113
|
+
cached_result = cache.get_cached_result(
|
|
114
|
+
"instances", short_form, **cache_params
|
|
115
|
+
)
|
|
116
|
+
if cached_result is not None:
|
|
117
|
+
logger.debug(f"SOLR cache hit for get_instances({short_form})")
|
|
118
|
+
return cached_result
|
|
119
|
+
|
|
120
|
+
except Exception as e:
|
|
121
|
+
logger.warning(f"SOLR cache lookup failed, falling back: {e}")
|
|
118
122
|
|
|
119
123
|
# Execute original function
|
|
120
|
-
logger.debug(f"SOLR cache miss for get_instances({short_form}), computing...")
|
|
124
|
+
logger.debug(f"SOLR cache miss or force_refresh for get_instances({short_form}), computing...")
|
|
121
125
|
result = original_func(short_form, return_dataframe, limit)
|
|
122
126
|
|
|
123
|
-
# Cache result asynchronously
|
|
124
|
-
if result is not None:
|
|
127
|
+
# Cache result asynchronously if not force_refresh
|
|
128
|
+
if result is not None and not force_refresh:
|
|
125
129
|
try:
|
|
126
130
|
cache.cache_result("instances", short_form, result, **cache_params)
|
|
127
131
|
logger.debug(f"Cached get_instances result for {short_form}")
|
vfbquery/solr_result_cache.py
CHANGED
|
@@ -14,6 +14,7 @@ import json
|
|
|
14
14
|
import requests
|
|
15
15
|
import hashlib
|
|
16
16
|
import time
|
|
17
|
+
import threading
|
|
17
18
|
from datetime import datetime, timedelta
|
|
18
19
|
from typing import Dict, Any, Optional, List
|
|
19
20
|
import logging
|
|
@@ -60,7 +61,7 @@ class SolrResultCache:
|
|
|
60
61
|
self.max_result_size_mb = max_result_size_mb
|
|
61
62
|
self.max_result_size_bytes = max_result_size_mb * 1024 * 1024
|
|
62
63
|
|
|
63
|
-
def _create_cache_metadata(self, result: Any) -> Optional[Dict[str, Any]]:
|
|
64
|
+
def _create_cache_metadata(self, result: Any, **params) -> Optional[Dict[str, Any]]:
|
|
64
65
|
"""Create metadata for cached result with 3-month expiration"""
|
|
65
66
|
serialized_result = json.dumps(result, cls=NumpyEncoder)
|
|
66
67
|
result_size = len(serialized_result.encode('utf-8'))
|
|
@@ -78,6 +79,7 @@ class SolrResultCache:
|
|
|
78
79
|
"cached_at": now.isoformat(),
|
|
79
80
|
"expires_at": expires_at.isoformat(),
|
|
80
81
|
"result_size": result_size,
|
|
82
|
+
"params": params, # Store the parameters used for this query
|
|
81
83
|
"hit_count": 0,
|
|
82
84
|
"cache_version": "1.0", # For future compatibility
|
|
83
85
|
"ttl_hours": self.ttl_hours # Store TTL for debugging
|
|
@@ -150,6 +152,33 @@ class SolrResultCache:
|
|
|
150
152
|
self._clear_expired_cache_document(cache_doc_id)
|
|
151
153
|
return None
|
|
152
154
|
|
|
155
|
+
# Check if cached result parameters are compatible with requested parameters
|
|
156
|
+
cached_params = cached_data.get("params", {})
|
|
157
|
+
requested_limit = params.get("limit", -1)
|
|
158
|
+
cached_limit = cached_params.get("limit", -1)
|
|
159
|
+
|
|
160
|
+
# Only cached full results (limit=-1) are stored
|
|
161
|
+
# If requesting limited results, we can slice from cached full results
|
|
162
|
+
if cached_limit != -1:
|
|
163
|
+
logger.debug(f"Cache miss: Unexpected cached result with limit={cached_limit}, expected -1")
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
# If requesting unlimited results, return the full cached result
|
|
167
|
+
if requested_limit == -1:
|
|
168
|
+
result = cached_data["result"]
|
|
169
|
+
else:
|
|
170
|
+
# If requesting limited results, slice from the cached full result
|
|
171
|
+
result = cached_data["result"]
|
|
172
|
+
if isinstance(result, (list, pd.DataFrame)):
|
|
173
|
+
if isinstance(result, list):
|
|
174
|
+
result = result[:requested_limit]
|
|
175
|
+
elif isinstance(result, pd.DataFrame):
|
|
176
|
+
result = result.head(requested_limit)
|
|
177
|
+
logger.debug(f"Cache hit: Returning {requested_limit} items from cached full result")
|
|
178
|
+
else:
|
|
179
|
+
# For other result types, return as-is (can't slice)
|
|
180
|
+
logger.debug(f"Cache hit: Returning full cached result (cannot slice type {type(result)})")
|
|
181
|
+
|
|
153
182
|
# Increment hit count asynchronously
|
|
154
183
|
self._increment_cache_hit_count(cache_doc_id, cached_data.get("hit_count", 0))
|
|
155
184
|
|
|
@@ -200,7 +229,7 @@ class SolrResultCache:
|
|
|
200
229
|
|
|
201
230
|
try:
|
|
202
231
|
# Create cached metadata and result
|
|
203
|
-
cached_data = self._create_cache_metadata(result)
|
|
232
|
+
cached_data = self._create_cache_metadata(result, **params)
|
|
204
233
|
if not cached_data:
|
|
205
234
|
return False # Result too large or other issue
|
|
206
235
|
|
|
@@ -586,10 +615,19 @@ def with_solr_cache(query_type: str):
|
|
|
586
615
|
# Check if force_refresh is requested (pop it before passing to function)
|
|
587
616
|
force_refresh = kwargs.pop('force_refresh', False)
|
|
588
617
|
|
|
589
|
-
# Check if limit is applied -
|
|
618
|
+
# Check if limit is applied - only cache full results (limit=-1)
|
|
590
619
|
limit = kwargs.get('limit', -1)
|
|
591
620
|
should_cache = (limit == -1) # Only cache when getting all results (limit=-1)
|
|
592
621
|
|
|
622
|
+
# For expensive queries, we still only cache full results, but we handle limited requests
|
|
623
|
+
# by slicing from cached full results
|
|
624
|
+
expensive_query_types = ['similar_neurons', 'similar_morphology', 'similar_morphology_part_of',
|
|
625
|
+
'similar_morphology_part_of_exp', 'similar_morphology_nb',
|
|
626
|
+
'similar_morphology_nb_exp', 'similar_morphology_userdata',
|
|
627
|
+
'neurons_part_here', 'neurons_synaptic',
|
|
628
|
+
'neurons_presynaptic', 'neurons_postsynaptic']
|
|
629
|
+
# Note: expensive queries still only cache full results, but retrieval logic handles slicing
|
|
630
|
+
|
|
593
631
|
# For neuron_neuron_connectivity_query, only cache when all parameters are defaults
|
|
594
632
|
if query_type == 'neuron_neuron_connectivity_query':
|
|
595
633
|
min_weight = kwargs.get('min_weight', 0)
|
|
@@ -616,15 +654,16 @@ def with_solr_cache(query_type: str):
|
|
|
616
654
|
cache_term_id = f"{term_id}_preview_{preview}"
|
|
617
655
|
|
|
618
656
|
# Include return_dataframe parameter in cache key for queries that support it
|
|
619
|
-
# This ensures DataFrame and dict
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
657
|
+
# This ensures DataFrame and dict results are cached separately
|
|
658
|
+
dataframe_query_types = ['neurons_part_here', 'neurons_synaptic', 'neurons_presynaptic',
|
|
659
|
+
'neurons_postsynaptic', 'similar_neurons', 'similar_morphology',
|
|
660
|
+
'similar_morphology_part_of', 'similar_morphology_part_of_exp',
|
|
661
|
+
'similar_morphology_nb', 'similar_morphology_nb_exp',
|
|
662
|
+
'similar_morphology_userdata', 'neurons_part_here', 'neurons_synaptic',
|
|
663
|
+
'neurons_presynaptic', 'neurons_postsynaptic']
|
|
664
|
+
if query_type in dataframe_query_types:
|
|
626
665
|
return_dataframe = kwargs.get('return_dataframe', True) # Default is True
|
|
627
|
-
cache_term_id = f"{cache_term_id}
|
|
666
|
+
cache_term_id = f"{cache_term_id}_dataframe_{return_dataframe}"
|
|
628
667
|
|
|
629
668
|
cache = get_solr_cache()
|
|
630
669
|
|
|
@@ -634,12 +673,47 @@ def with_solr_cache(query_type: str):
|
|
|
634
673
|
cache.clear_cache_entry(query_type, cache_term_id)
|
|
635
674
|
|
|
636
675
|
# Try cache first (will be empty if force_refresh was True)
|
|
637
|
-
# OPTIMIZATION:
|
|
638
|
-
|
|
676
|
+
# OPTIMIZATION: Always try to get full cached results first, then slice if needed
|
|
677
|
+
cached_result = None
|
|
639
678
|
if not force_refresh:
|
|
640
|
-
#
|
|
641
|
-
|
|
642
|
-
|
|
679
|
+
# print(f"DEBUG: Checking cache for {query_type}, term_id={term_id}, cache_term_id={cache_term_id}, should_cache={should_cache}")
|
|
680
|
+
# Try to get cached full result (limit=-1)
|
|
681
|
+
full_params = kwargs.copy()
|
|
682
|
+
full_params['limit'] = -1
|
|
683
|
+
# print(f"DEBUG: Attempting cache lookup for {query_type}({cache_term_id}) with full results")
|
|
684
|
+
cached_result = cache.get_cached_result(query_type, cache_term_id, **full_params)
|
|
685
|
+
# print(f"DEBUG: Cache lookup result: {cached_result is not None}")
|
|
686
|
+
|
|
687
|
+
# If we got a cached full result but need limited results, slice it
|
|
688
|
+
if cached_result is not None and limit != -1:
|
|
689
|
+
if isinstance(cached_result, (list, pd.DataFrame)):
|
|
690
|
+
if isinstance(cached_result, list):
|
|
691
|
+
cached_result = cached_result[:limit]
|
|
692
|
+
elif isinstance(cached_result, pd.DataFrame):
|
|
693
|
+
cached_result = cached_result.head(limit)
|
|
694
|
+
# print(f"DEBUG: Sliced cached result to {limit} items")
|
|
695
|
+
elif isinstance(cached_result, dict):
|
|
696
|
+
# Handle dict results with 'rows' (e.g., get_instances)
|
|
697
|
+
if 'rows' in cached_result:
|
|
698
|
+
cached_result = {
|
|
699
|
+
'headers': cached_result.get('headers', {}),
|
|
700
|
+
'rows': cached_result['rows'][:limit],
|
|
701
|
+
'count': cached_result.get('count', len(cached_result.get('rows', [])))
|
|
702
|
+
}
|
|
703
|
+
# print(f"DEBUG: Sliced cached dict result to {limit} rows")
|
|
704
|
+
# Handle term_info dict with 'queries'
|
|
705
|
+
elif 'queries' in cached_result:
|
|
706
|
+
for query in cached_result.get('queries', []):
|
|
707
|
+
if 'preview_results' in query and 'rows' in query['preview_results']:
|
|
708
|
+
query['preview_results']['rows'] = query['preview_results']['rows'][:limit]
|
|
709
|
+
# Keep original count - don't change it to limit
|
|
710
|
+
# print(f"DEBUG: Sliced cached term_info result to {limit} rows per query")
|
|
711
|
+
else:
|
|
712
|
+
# print(f"DEBUG: Cannot slice cached dict result (no 'rows' or 'queries'), returning full result")
|
|
713
|
+
pass
|
|
714
|
+
else:
|
|
715
|
+
# print(f"DEBUG: Cannot slice cached result of type {type(cached_result)}, returning full result")
|
|
716
|
+
pass
|
|
643
717
|
else:
|
|
644
718
|
# For limited queries, try to get full cached results instead
|
|
645
719
|
full_kwargs = kwargs.copy()
|
|
@@ -703,94 +777,183 @@ def with_solr_cache(query_type: str):
|
|
|
703
777
|
else:
|
|
704
778
|
return cached_result
|
|
705
779
|
|
|
706
|
-
# Execute function
|
|
707
|
-
result =
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
result_is_error = count_value < 0 # Mark as error if count is negative
|
|
724
|
-
else:
|
|
725
|
-
result_is_valid = bool(result) # For dicts without count field
|
|
726
|
-
elif isinstance(result, (list, str)):
|
|
727
|
-
result_is_valid = len(result) > 0
|
|
728
|
-
else:
|
|
729
|
-
result_is_valid = True
|
|
730
|
-
|
|
731
|
-
# If result is an error, actively clear any existing cache entry
|
|
732
|
-
# This ensures that transient failures don't get stuck in cache
|
|
733
|
-
if result_is_error:
|
|
734
|
-
logger.warning(f"Query returned error result for {query_type}({term_id}), clearing cache entry")
|
|
735
|
-
try:
|
|
736
|
-
cache.clear_cache_entry(query_type, cache_term_id)
|
|
737
|
-
except Exception as e:
|
|
738
|
-
logger.debug(f"Failed to clear cache entry: {e}")
|
|
739
|
-
|
|
740
|
-
if result_is_valid:
|
|
741
|
-
# Validate result before caching for term_info
|
|
742
|
-
if query_type == 'term_info':
|
|
743
|
-
# Basic validation: must have Id and Name
|
|
744
|
-
is_complete = (result and isinstance(result, dict) and
|
|
745
|
-
result.get('Id') and result.get('Name'))
|
|
746
|
-
|
|
747
|
-
# Additional validation when preview=True: check if queries have results
|
|
748
|
-
# We allow caching even if some queries failed (count=-1) as long as the core term_info is valid
|
|
749
|
-
# This is because some query functions may not be implemented yet or may legitimately fail
|
|
750
|
-
if is_complete:
|
|
751
|
-
preview = kwargs.get('preview', True)
|
|
752
|
-
if preview and 'Queries' in result and result['Queries']:
|
|
753
|
-
# Count how many queries have valid results vs errors
|
|
754
|
-
valid_queries = 0
|
|
755
|
-
failed_queries = 0
|
|
780
|
+
# Execute function - for expensive queries, get quick results first, then cache full results in background
|
|
781
|
+
result = None
|
|
782
|
+
if query_type in expensive_query_types:
|
|
783
|
+
# For expensive queries: execute with original parameters for quick return, cache full results in background
|
|
784
|
+
# print(f"DEBUG: Executing {query_type} with original parameters for quick return")
|
|
785
|
+
result = func(*args, **kwargs)
|
|
786
|
+
|
|
787
|
+
# Start background thread to get full results and cache them
|
|
788
|
+
def cache_full_results_background():
|
|
789
|
+
try:
|
|
790
|
+
# Check if function supports limit parameter
|
|
791
|
+
import inspect
|
|
792
|
+
if 'limit' in inspect.signature(func).parameters:
|
|
793
|
+
full_kwargs = kwargs.copy()
|
|
794
|
+
full_kwargs['limit'] = -1
|
|
795
|
+
# print(f"DEBUG: Background: Executing {query_type} with full results for caching")
|
|
796
|
+
full_result = func(*args, **full_kwargs)
|
|
756
797
|
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
798
|
+
# Validate and cache the full result
|
|
799
|
+
if full_result is not None:
|
|
800
|
+
result_is_valid = False
|
|
801
|
+
if hasattr(full_result, 'empty'): # DataFrame
|
|
802
|
+
result_is_valid = not full_result.empty
|
|
803
|
+
elif isinstance(full_result, dict):
|
|
804
|
+
if 'count' in full_result:
|
|
805
|
+
count_value = full_result.get('count', -1)
|
|
806
|
+
result_is_valid = count_value >= 0
|
|
807
|
+
else:
|
|
808
|
+
result_is_valid = bool(full_result)
|
|
809
|
+
elif isinstance(full_result, (list, str)):
|
|
810
|
+
result_is_valid = len(full_result) > 0
|
|
764
811
|
else:
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
812
|
+
result_is_valid = True
|
|
813
|
+
|
|
814
|
+
if result_is_valid:
|
|
815
|
+
# Special validation for term_info
|
|
816
|
+
if query_type == 'term_info':
|
|
817
|
+
is_complete = (full_result and isinstance(full_result, dict) and
|
|
818
|
+
full_result.get('Id') and full_result.get('Name'))
|
|
819
|
+
if is_complete:
|
|
820
|
+
try:
|
|
821
|
+
full_kwargs_for_cache = kwargs.copy()
|
|
822
|
+
full_kwargs_for_cache['limit'] = -1
|
|
823
|
+
cache.cache_result(query_type, cache_term_id, full_result, **full_kwargs_for_cache)
|
|
824
|
+
logger.debug(f"Background cached complete full result for {term_id}")
|
|
825
|
+
except Exception as e:
|
|
826
|
+
logger.debug(f"Background caching failed: {e}")
|
|
827
|
+
else:
|
|
828
|
+
try:
|
|
829
|
+
full_kwargs_for_cache = kwargs.copy()
|
|
830
|
+
full_kwargs_for_cache['limit'] = -1
|
|
831
|
+
cache.cache_result(query_type, cache_term_id, full_result, **full_kwargs_for_cache)
|
|
832
|
+
logger.debug(f"Background cached full result for {term_id}")
|
|
833
|
+
except Exception as e:
|
|
834
|
+
logger.debug(f"Background caching failed: {e}")
|
|
835
|
+
except Exception as e:
|
|
836
|
+
logger.debug(f"Background caching thread failed: {e}")
|
|
837
|
+
|
|
838
|
+
# Start background caching thread
|
|
839
|
+
background_thread = threading.Thread(target=cache_full_results_background, daemon=True)
|
|
840
|
+
background_thread.start()
|
|
841
|
+
# print(f"DEBUG: Started background caching thread for {query_type}({term_id})")
|
|
842
|
+
else:
|
|
843
|
+
# For non-expensive queries: use original caching logic
|
|
844
|
+
full_result = None
|
|
845
|
+
if should_cache:
|
|
846
|
+
# Execute with limit=-1 to get full results for caching (only for functions that support limit)
|
|
847
|
+
full_kwargs = kwargs.copy()
|
|
848
|
+
import inspect
|
|
849
|
+
if 'limit' in inspect.signature(func).parameters:
|
|
850
|
+
full_kwargs['limit'] = -1
|
|
851
|
+
# print(f"DEBUG: Executing {query_type} with full results for caching")
|
|
852
|
+
full_result = func(*args, **full_kwargs)
|
|
853
|
+
result = full_result
|
|
773
854
|
|
|
774
|
-
#
|
|
775
|
-
if
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
logger.debug(f"Not caching limited result for {term_id} (limit={limit})")
|
|
783
|
-
else:
|
|
784
|
-
logger.warning(f"Not caching incomplete result for {term_id}")
|
|
855
|
+
# If the original request was limited, slice the result for return
|
|
856
|
+
if limit != -1 and result is not None:
|
|
857
|
+
if isinstance(result, (list, pd.DataFrame)):
|
|
858
|
+
if isinstance(result, list):
|
|
859
|
+
result = result[:limit]
|
|
860
|
+
elif isinstance(result, pd.DataFrame):
|
|
861
|
+
result = result.head(limit)
|
|
862
|
+
# print(f"DEBUG: Sliced result to {limit} items for return")
|
|
785
863
|
else:
|
|
786
|
-
#
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
864
|
+
# Execute with original parameters (no caching)
|
|
865
|
+
result = func(*args, **kwargs)
|
|
866
|
+
full_result = result
|
|
867
|
+
|
|
868
|
+
# Cache the result - skip for expensive queries as they use background caching
|
|
869
|
+
if query_type not in expensive_query_types:
|
|
870
|
+
# Handle DataFrame, dict, and other result types properly
|
|
871
|
+
result_is_valid = False
|
|
872
|
+
result_is_error = False # Track if result is an error that should clear cache
|
|
873
|
+
|
|
874
|
+
if result is not None:
|
|
875
|
+
if hasattr(result, 'empty'): # DataFrame
|
|
876
|
+
result_is_valid = not result.empty
|
|
877
|
+
elif isinstance(result, dict):
|
|
878
|
+
# For dict results, check if it's not an error result (count != -1)
|
|
879
|
+
# Error results should not be cached
|
|
880
|
+
if 'count' in result:
|
|
881
|
+
count_value = result.get('count', -1)
|
|
882
|
+
result_is_valid = count_value >= 0 # Don't cache errors (count=-1)
|
|
883
|
+
result_is_error = count_value < 0 # Mark as error if count is negative
|
|
884
|
+
else:
|
|
885
|
+
result_is_valid = bool(result) # For dicts without count field
|
|
886
|
+
elif isinstance(result, (list, str)):
|
|
887
|
+
result_is_valid = len(result) > 0
|
|
792
888
|
else:
|
|
793
|
-
|
|
889
|
+
result_is_valid = True
|
|
890
|
+
|
|
891
|
+
# If result is an error, actively clear any existing cache entry
|
|
892
|
+
# This ensures that transient failures don't get stuck in cache
|
|
893
|
+
if result_is_error:
|
|
894
|
+
logger.warning(f"Query returned error result for {query_type}({term_id}), clearing cache entry")
|
|
895
|
+
try:
|
|
896
|
+
cache.clear_cache_entry(query_type, cache_term_id)
|
|
897
|
+
except Exception as e:
|
|
898
|
+
logger.debug(f"Failed to clear cache entry: {e}")
|
|
899
|
+
|
|
900
|
+
if result_is_valid:
|
|
901
|
+
# Validate result before caching for term_info
|
|
902
|
+
if query_type == 'term_info':
|
|
903
|
+
# Basic validation: must have Id and Name
|
|
904
|
+
is_complete = (result and isinstance(result, dict) and
|
|
905
|
+
result.get('Id') and result.get('Name'))
|
|
906
|
+
|
|
907
|
+
# Additional validation when preview=True: check if queries have results
|
|
908
|
+
# We allow caching even if some queries failed (count=-1) as long as the core term_info is valid
|
|
909
|
+
# This is because some query functions may not be implemented yet or may legitimately fail
|
|
910
|
+
if is_complete:
|
|
911
|
+
preview = kwargs.get('preview', True)
|
|
912
|
+
if preview and 'Queries' in result and result['Queries']:
|
|
913
|
+
# Count how many queries have valid results vs errors
|
|
914
|
+
valid_queries = 0
|
|
915
|
+
failed_queries = 0
|
|
916
|
+
|
|
917
|
+
for query in result['Queries']:
|
|
918
|
+
count = query.get('count', -1)
|
|
919
|
+
preview_results = query.get('preview_results')
|
|
920
|
+
|
|
921
|
+
# Count queries with valid results (count >= 0)
|
|
922
|
+
if count >= 0 and isinstance(preview_results, dict):
|
|
923
|
+
valid_queries += 1
|
|
924
|
+
else:
|
|
925
|
+
failed_queries += 1
|
|
926
|
+
|
|
927
|
+
# Only reject if ALL queries failed - at least one must succeed
|
|
928
|
+
if valid_queries == 0 and failed_queries > 0:
|
|
929
|
+
is_complete = False
|
|
930
|
+
logger.warning(f"Not caching result for {term_id}: all {failed_queries} queries failed")
|
|
931
|
+
elif failed_queries > 0:
|
|
932
|
+
logger.debug(f"Caching result for {term_id} with {valid_queries} valid queries ({failed_queries} failed)")
|
|
933
|
+
|
|
934
|
+
# Only cache if result is complete AND no limit was applied
|
|
935
|
+
if is_complete and should_cache:
|
|
936
|
+
try:
|
|
937
|
+
# Cache the full result with full parameters (limit=-1)
|
|
938
|
+
full_kwargs_for_cache = kwargs.copy()
|
|
939
|
+
full_kwargs_for_cache['limit'] = -1
|
|
940
|
+
cache.cache_result(query_type, cache_term_id, full_result, **full_kwargs_for_cache)
|
|
941
|
+
logger.debug(f"Cached complete full result for {term_id}")
|
|
942
|
+
except Exception as e:
|
|
943
|
+
logger.debug(f"Failed to cache result: {e}")
|
|
944
|
+
elif not should_cache:
|
|
945
|
+
logger.debug(f"Not caching limited result for {term_id} (limit={limit})")
|
|
946
|
+
else:
|
|
947
|
+
logger.warning(f"Not caching incomplete result for {term_id}")
|
|
948
|
+
else:
|
|
949
|
+
# Only cache if no limit was applied
|
|
950
|
+
if should_cache:
|
|
951
|
+
try:
|
|
952
|
+
cache.cache_result(query_type, cache_term_id, result, **kwargs)
|
|
953
|
+
except Exception as e:
|
|
954
|
+
logger.debug(f"Failed to cache result: {e}")
|
|
955
|
+
else:
|
|
956
|
+
logger.debug(f"Not caching limited result for {term_id} (limit={limit}))")
|
|
794
957
|
|
|
795
958
|
return result
|
|
796
959
|
|
vfbquery/term_info_queries.py
CHANGED
|
@@ -745,7 +745,7 @@ def get_link(text: str, link: str) -> str:
|
|
|
745
745
|
|
|
746
746
|
|
|
747
747
|
def get_secure_url(url: str, allow_redirects: bool = True, timeout=15) -> str:
|
|
748
|
-
secure_url = url.replace("http://", "
|
|
748
|
+
secure_url = url.replace("http://", "https://")
|
|
749
749
|
if check_url_exist(secure_url, allow_redirects, timeout):
|
|
750
750
|
return secure_url
|
|
751
751
|
return url
|
vfbquery/vfb_queries.py
CHANGED
|
@@ -340,10 +340,25 @@ def encode_markdown_links(df, columns):
|
|
|
340
340
|
return label
|
|
341
341
|
|
|
342
342
|
try:
|
|
343
|
-
#
|
|
344
|
-
# These should NOT be encoded
|
|
343
|
+
# Handle linked images (format: [](link))
|
|
345
344
|
if label.startswith("[](link)
|
|
347
|
+
def secure_image_url(match):
|
|
348
|
+
alt_text = match.group(1)
|
|
349
|
+
image_url = match.group(2)
|
|
350
|
+
title = match.group(3) if match.group(3) else ""
|
|
351
|
+
link = match.group(4)
|
|
352
|
+
secure_url = image_url.replace("http://", "https://")
|
|
353
|
+
if title:
|
|
354
|
+
return f"[]({link})"
|
|
355
|
+
else:
|
|
356
|
+
return f"[]({link})"
|
|
357
|
+
|
|
358
|
+
# Regex to match the entire linked image
|
|
359
|
+
pattern = r'\[\!\[([^\]]+)\]\(([^\'"\s]+)(?:\s+[\'"]([^\'"]*)[\'"])?\)\]\(([^)]+)\)'
|
|
360
|
+
encoded_label = re.sub(pattern, secure_image_url, label)
|
|
361
|
+
return encoded_label
|
|
347
362
|
|
|
348
363
|
# Process regular markdown links - handle multiple links separated by commas
|
|
349
364
|
# Pattern matches [label](url) format
|
|
@@ -356,7 +371,9 @@ def encode_markdown_links(df, columns):
|
|
|
356
371
|
url_part = match.group(2) # The URL part (between ( and ))
|
|
357
372
|
# Encode brackets in the label part only
|
|
358
373
|
label_part_encoded = encode_brackets(label_part)
|
|
359
|
-
|
|
374
|
+
# Ensure URLs use https
|
|
375
|
+
url_part_secure = url_part.replace("http://", "https://")
|
|
376
|
+
return f"[{label_part_encoded}]({url_part_secure})"
|
|
360
377
|
|
|
361
378
|
# Replace all markdown links with their encoded versions
|
|
362
379
|
encoded_label = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', encode_single_link, label)
|
|
@@ -1268,7 +1285,7 @@ def NeuronRegionConnectivityQuery_to_schema(name, take_default):
|
|
|
1268
1285
|
"default": take_default,
|
|
1269
1286
|
}
|
|
1270
1287
|
preview = 5
|
|
1271
|
-
preview_columns = ["id", "
|
|
1288
|
+
preview_columns = ["id", "region", "presynaptic_terminals", "postsynaptic_terminals", "tags"]
|
|
1272
1289
|
return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
|
|
1273
1290
|
|
|
1274
1291
|
|
|
@@ -2713,7 +2730,7 @@ def get_neuron_region_connectivity(short_form: str, return_dataframe=True, limit
|
|
|
2713
2730
|
primary
|
|
2714
2731
|
RETURN
|
|
2715
2732
|
target.short_form AS id,
|
|
2716
|
-
target.label AS
|
|
2733
|
+
target.label AS region,
|
|
2717
2734
|
synapse_counts.`pre` AS presynaptic_terminals,
|
|
2718
2735
|
synapse_counts.`post` AS postsynaptic_terminals,
|
|
2719
2736
|
target.uniqueFacets AS tags
|
|
@@ -2732,7 +2749,7 @@ def get_neuron_region_connectivity(short_form: str, return_dataframe=True, limit
|
|
|
2732
2749
|
|
|
2733
2750
|
headers = {
|
|
2734
2751
|
'id': {'title': 'Region ID', 'type': 'selection_id', 'order': -1},
|
|
2735
|
-
'
|
|
2752
|
+
'region': {'title': 'Brain Region', 'type': 'markdown', 'order': 0},
|
|
2736
2753
|
'presynaptic_terminals': {'title': 'Presynaptic Terminals', 'type': 'number', 'order': 1},
|
|
2737
2754
|
'postsynaptic_terminals': {'title': 'Postsynaptic Terminals', 'type': 'number', 'order': 2},
|
|
2738
2755
|
'tags': {'title': 'Region Types', 'type': 'list', 'order': 3},
|
|
@@ -3915,6 +3932,20 @@ def fill_query_results(term_info):
|
|
|
3915
3932
|
result_count = 0
|
|
3916
3933
|
|
|
3917
3934
|
# Store preview results (count is stored at query level, not in preview_results)
|
|
3935
|
+
# Sort rows based on the sort field in headers, default to ID descending if none
|
|
3936
|
+
sort_column = None
|
|
3937
|
+
sort_direction = None
|
|
3938
|
+
for col, info in filtered_headers.items():
|
|
3939
|
+
if 'sort' in info and isinstance(info['sort'], dict):
|
|
3940
|
+
sort_column = col
|
|
3941
|
+
sort_direction = list(info['sort'].values())[0] # e.g., 'Asc' or 'Desc'
|
|
3942
|
+
break
|
|
3943
|
+
if sort_column:
|
|
3944
|
+
reverse = sort_direction == 'Desc'
|
|
3945
|
+
filtered_result.sort(key=lambda x: x.get(sort_column, ''), reverse=reverse)
|
|
3946
|
+
else:
|
|
3947
|
+
# Default to ID descending if no sort specified
|
|
3948
|
+
filtered_result.sort(key=lambda x: x.get('id', ''), reverse=True)
|
|
3918
3949
|
query['preview_results'] = {'headers': filtered_headers, 'rows': filtered_result}
|
|
3919
3950
|
query['count'] = result_count
|
|
3920
3951
|
# print(f"Filtered result: {filtered_result}")
|