vfbquery 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. test/readme_parser.py +29 -27
  2. test/term_info_queries_test.py +46 -34
  3. test/test_dataset_template_queries.py +138 -0
  4. test/test_default_caching.py +89 -84
  5. test/test_examples_code.py +7 -0
  6. test/test_examples_diff.py +95 -172
  7. test/test_expression_overlaps.py +183 -0
  8. test/test_expression_pattern_fragments.py +123 -0
  9. test/test_images_neurons.py +152 -0
  10. test/test_images_that_develop_from.py +112 -0
  11. test/test_lineage_clones_in.py +190 -0
  12. test/test_nblast_queries.py +124 -0
  13. test/test_neuron_classes_fasciculating.py +187 -0
  14. test/test_neuron_inputs.py +193 -0
  15. test/test_neuron_neuron_connectivity.py +89 -0
  16. test/test_neuron_region_connectivity.py +117 -0
  17. test/test_neurons_part_here.py +203 -0
  18. test/test_new_owlery_queries.py +282 -0
  19. test/test_publication_transgene_queries.py +101 -0
  20. test/test_query_performance.py +739 -0
  21. test/test_similar_morphology.py +177 -0
  22. test/test_tracts_nerves_innervating.py +188 -0
  23. test/test_transcriptomics.py +223 -0
  24. vfbquery/__init__.py +47 -35
  25. vfbquery/cached_functions.py +772 -131
  26. vfbquery/neo4j_client.py +120 -0
  27. vfbquery/owlery_client.py +463 -0
  28. vfbquery/solr_cache_integration.py +34 -30
  29. vfbquery/solr_fetcher.py +1 -1
  30. vfbquery/solr_result_cache.py +338 -36
  31. vfbquery/term_info_queries.py +1 -1
  32. vfbquery/vfb_queries.py +2969 -627
  33. vfbquery-0.5.1.dist-info/METADATA +2806 -0
  34. vfbquery-0.5.1.dist-info/RECORD +40 -0
  35. vfbquery-0.4.1.dist-info/METADATA +0 -1315
  36. vfbquery-0.4.1.dist-info/RECORD +0 -19
  37. {vfbquery-0.4.1.dist-info → vfbquery-0.5.1.dist-info}/LICENSE +0 -0
  38. {vfbquery-0.4.1.dist-info → vfbquery-0.5.1.dist-info}/WHEEL +0 -0
  39. {vfbquery-0.4.1.dist-info → vfbquery-0.5.1.dist-info}/top_level.txt +0 -0
@@ -14,10 +14,12 @@ import json
14
14
  import requests
15
15
  import hashlib
16
16
  import time
17
+ import threading
17
18
  from datetime import datetime, timedelta
18
19
  from typing import Dict, Any, Optional, List
19
20
  import logging
20
21
  from dataclasses import dataclass, asdict
22
+ import pandas as pd
21
23
  from vfbquery.term_info_queries import NumpyEncoder
22
24
 
23
25
  logger = logging.getLogger(__name__)
@@ -59,7 +61,7 @@ class SolrResultCache:
59
61
  self.max_result_size_mb = max_result_size_mb
60
62
  self.max_result_size_bytes = max_result_size_mb * 1024 * 1024
61
63
 
62
- def _create_cache_metadata(self, result: Any) -> Optional[Dict[str, Any]]:
64
+ def _create_cache_metadata(self, result: Any, **params) -> Optional[Dict[str, Any]]:
63
65
  """Create metadata for cached result with 3-month expiration"""
64
66
  serialized_result = json.dumps(result, cls=NumpyEncoder)
65
67
  result_size = len(serialized_result.encode('utf-8'))
@@ -77,6 +79,7 @@ class SolrResultCache:
77
79
  "cached_at": now.isoformat(),
78
80
  "expires_at": expires_at.isoformat(),
79
81
  "result_size": result_size,
82
+ "params": params, # Store the parameters used for this query
80
83
  "hit_count": 0,
81
84
  "cache_version": "1.0", # For future compatibility
82
85
  "ttl_hours": self.ttl_hours # Store TTL for debugging
@@ -95,11 +98,12 @@ class SolrResultCache:
95
98
  Cached result or None if not found/expired
96
99
  """
97
100
  try:
98
- # Query for cache document with prefixed ID
99
- cache_doc_id = f"vfb_query_{term_id}"
101
+ # Query for cache document with prefixed ID including query type
102
+ # This ensures different query types for the same term have separate cache entries
103
+ cache_doc_id = f"vfb_query_{query_type}_{term_id}"
100
104
 
101
105
  response = requests.get(f"{self.cache_url}/select", params={
102
- "q": f"id:{cache_doc_id} AND query_type:{query_type}",
106
+ "q": f"id:{cache_doc_id}",
103
107
  "fl": "cache_data",
104
108
  "wt": "json"
105
109
  }, timeout=5) # Short timeout for cache lookups
@@ -148,6 +152,33 @@ class SolrResultCache:
148
152
  self._clear_expired_cache_document(cache_doc_id)
149
153
  return None
150
154
 
155
+ # Check if cached result parameters are compatible with requested parameters
156
+ cached_params = cached_data.get("params", {})
157
+ requested_limit = params.get("limit", -1)
158
+ cached_limit = cached_params.get("limit", -1)
159
+
160
+ # Only cached full results (limit=-1) are stored
161
+ # If requesting limited results, we can slice from cached full results
162
+ if cached_limit != -1:
163
+ logger.debug(f"Cache miss: Unexpected cached result with limit={cached_limit}, expected -1")
164
+ return None
165
+
166
+ # If requesting unlimited results, return the full cached result
167
+ if requested_limit == -1:
168
+ result = cached_data["result"]
169
+ else:
170
+ # If requesting limited results, slice from the cached full result
171
+ result = cached_data["result"]
172
+ if isinstance(result, (list, pd.DataFrame)):
173
+ if isinstance(result, list):
174
+ result = result[:requested_limit]
175
+ elif isinstance(result, pd.DataFrame):
176
+ result = result.head(requested_limit)
177
+ logger.debug(f"Cache hit: Returning {requested_limit} items from cached full result")
178
+ else:
179
+ # For other result types, return as-is (can't slice)
180
+ logger.debug(f"Cache hit: Returning full cached result (cannot slice type {type(result)})")
181
+
151
182
  # Increment hit count asynchronously
152
183
  self._increment_cache_hit_count(cache_doc_id, cached_data.get("hit_count", 0))
153
184
 
@@ -161,6 +192,14 @@ class SolrResultCache:
161
192
  logger.warning(f"Failed to parse cached result for {term_id}")
162
193
  return None
163
194
 
195
+ # IMPORTANT: Validate cached result - reject error results (count=-1)
196
+ # This ensures old cached errors get retried when the service is working again
197
+ if isinstance(result, dict) and 'count' in result:
198
+ if result.get('count', -1) < 0:
199
+ logger.warning(f"Rejecting cached error result for {query_type}({term_id}): count={result.get('count')}")
200
+ self._clear_expired_cache_document(cache_doc_id)
201
+ return None
202
+
164
203
  logger.info(f"Cache hit for {query_type}({term_id})")
165
204
  return result
166
205
 
@@ -190,12 +229,13 @@ class SolrResultCache:
190
229
 
191
230
  try:
192
231
  # Create cached metadata and result
193
- cached_data = self._create_cache_metadata(result)
232
+ cached_data = self._create_cache_metadata(result, **params)
194
233
  if not cached_data:
195
234
  return False # Result too large or other issue
196
235
 
197
- # Create cache document with prefixed ID
198
- cache_doc_id = f"vfb_query_{term_id}"
236
+ # Create cache document with prefixed ID including query type
237
+ # This ensures different query types for the same term have separate cache entries
238
+ cache_doc_id = f"vfb_query_{query_type}_{term_id}"
199
239
 
200
240
  cache_doc = {
201
241
  "id": cache_doc_id,
@@ -252,7 +292,8 @@ class SolrResultCache:
252
292
  True if successfully cleared, False otherwise
253
293
  """
254
294
  try:
255
- cache_doc_id = f"vfb_query_{term_id}"
295
+ # Include query_type in cache document ID to match storage format
296
+ cache_doc_id = f"vfb_query_{query_type}_{term_id}"
256
297
  response = requests.post(
257
298
  f"{self.cache_url}/update",
258
299
  data=f'<delete><id>{cache_doc_id}</id></delete>',
@@ -299,10 +340,11 @@ class SolrResultCache:
299
340
  Dictionary with cache age info or None if not cached
300
341
  """
301
342
  try:
302
- cache_doc_id = f"vfb_query_{term_id}"
343
+ # Include query_type in cache document ID to match storage format
344
+ cache_doc_id = f"vfb_query_{query_type}_{term_id}"
303
345
 
304
346
  response = requests.get(f"{self.cache_url}/select", params={
305
- "q": f"id:{cache_doc_id} AND query_type:{query_type}",
347
+ "q": f"id:{cache_doc_id}",
306
348
  "fl": "cache_data,hit_count,last_accessed",
307
349
  "wt": "json"
308
350
  }, timeout=5)
@@ -573,6 +615,25 @@ def with_solr_cache(query_type: str):
573
615
  # Check if force_refresh is requested (pop it before passing to function)
574
616
  force_refresh = kwargs.pop('force_refresh', False)
575
617
 
618
+ # Check if limit is applied - only cache full results (limit=-1)
619
+ limit = kwargs.get('limit', -1)
620
+ should_cache = (limit == -1) # Only cache when getting all results (limit=-1)
621
+
622
+ # For expensive queries, we still only cache full results, but we handle limited requests
623
+ # by slicing from cached full results
624
+ expensive_query_types = ['similar_neurons', 'similar_morphology', 'similar_morphology_part_of',
625
+ 'similar_morphology_part_of_exp', 'similar_morphology_nb',
626
+ 'similar_morphology_nb_exp', 'similar_morphology_userdata',
627
+ 'neurons_part_here', 'neurons_synaptic',
628
+ 'neurons_presynaptic', 'neurons_postsynaptic']
629
+ # Note: expensive queries still only cache full results, but retrieval logic handles slicing
630
+
631
+ # For neuron_neuron_connectivity_query, only cache when all parameters are defaults
632
+ if query_type == 'neuron_neuron_connectivity_query':
633
+ min_weight = kwargs.get('min_weight', 0)
634
+ direction = kwargs.get('direction', 'both')
635
+ should_cache = should_cache and (min_weight == 0) and (direction == 'both')
636
+
576
637
  # Extract term_id from first argument or kwargs
577
638
  term_id = args[0] if args else kwargs.get('short_form') or kwargs.get('term_id')
578
639
 
@@ -585,36 +646,121 @@ def with_solr_cache(query_type: str):
585
646
  logger.warning(f"No term_id found for caching {query_type}")
586
647
  return func(*args, **kwargs)
587
648
 
649
+ # Include preview parameter in cache key for term_info queries
650
+ # This ensures preview=True and preview=False have separate cache entries
651
+ cache_term_id = term_id
652
+ if query_type == 'term_info':
653
+ preview = kwargs.get('preview', True) # Default is True
654
+ cache_term_id = f"{term_id}_preview_{preview}"
655
+
656
+ # Include return_dataframe parameter in cache key for queries that support it
657
+ # This ensures DataFrame and dict results are cached separately
658
+ dataframe_query_types = ['neurons_part_here', 'neurons_synaptic', 'neurons_presynaptic',
659
+ 'neurons_postsynaptic', 'similar_neurons', 'similar_morphology',
660
+ 'similar_morphology_part_of', 'similar_morphology_part_of_exp',
661
+ 'similar_morphology_nb', 'similar_morphology_nb_exp',
662
+ 'similar_morphology_userdata', 'neurons_part_here', 'neurons_synaptic',
663
+ 'neurons_presynaptic', 'neurons_postsynaptic']
664
+ if query_type in dataframe_query_types:
665
+ return_dataframe = kwargs.get('return_dataframe', True) # Default is True
666
+ cache_term_id = f"{cache_term_id}_dataframe_{return_dataframe}"
667
+
588
668
  cache = get_solr_cache()
589
669
 
590
670
  # Clear cache if force_refresh is True
591
671
  if force_refresh:
592
672
  logger.info(f"Force refresh requested for {query_type}({term_id})")
593
- cache.clear_cache_entry(query_type, term_id)
673
+ cache.clear_cache_entry(query_type, cache_term_id)
594
674
 
595
675
  # Try cache first (will be empty if force_refresh was True)
676
+ # OPTIMIZATION: Always try to get full cached results first, then slice if needed
677
+ cached_result = None
596
678
  if not force_refresh:
597
- cached_result = cache.get_cached_result(query_type, term_id, **kwargs)
679
+ # print(f"DEBUG: Checking cache for {query_type}, term_id={term_id}, cache_term_id={cache_term_id}, should_cache={should_cache}")
680
+ # Try to get cached full result (limit=-1)
681
+ full_params = kwargs.copy()
682
+ full_params['limit'] = -1
683
+ # print(f"DEBUG: Attempting cache lookup for {query_type}({cache_term_id}) with full results")
684
+ cached_result = cache.get_cached_result(query_type, cache_term_id, **full_params)
685
+ # print(f"DEBUG: Cache lookup result: {cached_result is not None}")
686
+
687
+ # If we got a cached full result but need limited results, slice it
688
+ if cached_result is not None and limit != -1:
689
+ if isinstance(cached_result, (list, pd.DataFrame)):
690
+ if isinstance(cached_result, list):
691
+ cached_result = cached_result[:limit]
692
+ elif isinstance(cached_result, pd.DataFrame):
693
+ cached_result = cached_result.head(limit)
694
+ # print(f"DEBUG: Sliced cached result to {limit} items")
695
+ elif isinstance(cached_result, dict):
696
+ # Handle dict results with 'rows' (e.g., get_instances)
697
+ if 'rows' in cached_result:
698
+ cached_result = {
699
+ 'headers': cached_result.get('headers', {}),
700
+ 'rows': cached_result['rows'][:limit],
701
+ 'count': cached_result.get('count', len(cached_result.get('rows', [])))
702
+ }
703
+ # print(f"DEBUG: Sliced cached dict result to {limit} rows")
704
+ # Handle term_info dict with 'queries'
705
+ elif 'queries' in cached_result:
706
+ for query in cached_result.get('queries', []):
707
+ if 'preview_results' in query and 'rows' in query['preview_results']:
708
+ query['preview_results']['rows'] = query['preview_results']['rows'][:limit]
709
+ # Keep original count - don't change it to limit
710
+ # print(f"DEBUG: Sliced cached term_info result to {limit} rows per query")
711
+ else:
712
+ # print(f"DEBUG: Cannot slice cached dict result (no 'rows' or 'queries'), returning full result")
713
+ pass
714
+ else:
715
+ # print(f"DEBUG: Cannot slice cached result of type {type(cached_result)}, returning full result")
716
+ pass
717
+ else:
718
+ # For limited queries, try to get full cached results instead
719
+ full_kwargs = kwargs.copy()
720
+ full_kwargs['limit'] = -1 # Get full results
721
+ cached_result = cache.get_cached_result(query_type, cache_term_id, **full_kwargs)
722
+
723
+ # If we got full cached results, extract the limited portion
724
+ if cached_result is not None and limit > 0:
725
+ logger.debug(f"Extracting first {limit} rows from cached full results for {term_id}")
726
+
727
+ # Extract limited rows based on result type
728
+ if isinstance(cached_result, dict) and 'rows' in cached_result:
729
+ cached_result = {
730
+ 'headers': cached_result.get('headers', {}),
731
+ 'rows': cached_result['rows'][:limit],
732
+ 'count': cached_result.get('count', len(cached_result.get('rows', [])))
733
+ }
734
+ elif isinstance(cached_result, pd.DataFrame):
735
+ # Keep the full count but limit the rows
736
+ original_count = len(cached_result)
737
+ cached_result = cached_result.head(limit)
738
+ # Add count attribute if possible
739
+ if hasattr(cached_result, '_metadata'):
740
+ cached_result._metadata['count'] = original_count
741
+
598
742
  if cached_result is not None:
599
743
  # Validate that cached result has essential fields for term_info
600
744
  if query_type == 'term_info':
601
745
  is_valid = (cached_result and isinstance(cached_result, dict) and
602
746
  cached_result.get('Id') and cached_result.get('Name'))
603
747
 
604
- # Additional validation for query results
605
- if is_valid and 'Queries' in cached_result:
748
+ # Additional validation for query results - only when preview=True
749
+ preview = kwargs.get('preview', True) # Default is True
750
+ if is_valid and preview and 'Queries' in cached_result:
606
751
  logger.debug(f"Validating {len(cached_result['Queries'])} queries for {term_id}")
607
752
  for i, query in enumerate(cached_result['Queries']):
608
- count = query.get('count', 0)
753
+ count = query.get('count', -1) # Default to -1 if missing
609
754
  preview_results = query.get('preview_results')
610
755
  headers = preview_results.get('headers', []) if isinstance(preview_results, dict) else []
611
756
 
612
757
  logger.debug(f"Query {i}: count={count}, preview_results_type={type(preview_results)}, headers={headers}")
613
758
 
614
- # Check if query has unrealistic count (0 or -1) which indicates failed execution
615
- if count <= 0:
759
+ # Check if query has error count (-1) which indicates failed execution
760
+ # Note: count of 0 is valid - it means "no matches found"
761
+ if count < 0:
616
762
  is_valid = False
617
- logger.debug(f"Cached result has invalid query count {count} for {term_id}")
763
+ logger.debug(f"Cached result has error query count {count} for {term_id}")
618
764
  break
619
765
  # Check if preview_results is missing or has empty headers when it should have data
620
766
  if not isinstance(preview_results, dict) or not headers:
@@ -631,27 +777,183 @@ def with_solr_cache(query_type: str):
631
777
  else:
632
778
  return cached_result
633
779
 
634
- # Execute function and cache result
635
- result = func(*args, **kwargs)
636
-
637
- # Cache the result asynchronously to avoid blocking
638
- if result:
639
- # Validate result before caching for term_info
640
- if query_type == 'term_info':
641
- if (result and isinstance(result, dict) and
642
- result.get('Id') and result.get('Name')):
643
- try:
644
- cache.cache_result(query_type, term_id, result, **kwargs)
645
- logger.debug(f"Cached complete result for {term_id}")
646
- except Exception as e:
647
- logger.debug(f"Failed to cache result: {e}")
648
- else:
649
- logger.warning(f"Not caching incomplete result for {term_id}")
780
+ # Execute function - for expensive queries, get quick results first, then cache full results in background
781
+ result = None
782
+ if query_type in expensive_query_types:
783
+ # For expensive queries: execute with original parameters for quick return, cache full results in background
784
+ # print(f"DEBUG: Executing {query_type} with original parameters for quick return")
785
+ result = func(*args, **kwargs)
786
+
787
+ # Start background thread to get full results and cache them
788
+ def cache_full_results_background():
789
+ try:
790
+ # Check if function supports limit parameter
791
+ import inspect
792
+ if 'limit' in inspect.signature(func).parameters:
793
+ full_kwargs = kwargs.copy()
794
+ full_kwargs['limit'] = -1
795
+ # print(f"DEBUG: Background: Executing {query_type} with full results for caching")
796
+ full_result = func(*args, **full_kwargs)
797
+
798
+ # Validate and cache the full result
799
+ if full_result is not None:
800
+ result_is_valid = False
801
+ if hasattr(full_result, 'empty'): # DataFrame
802
+ result_is_valid = not full_result.empty
803
+ elif isinstance(full_result, dict):
804
+ if 'count' in full_result:
805
+ count_value = full_result.get('count', -1)
806
+ result_is_valid = count_value >= 0
807
+ else:
808
+ result_is_valid = bool(full_result)
809
+ elif isinstance(full_result, (list, str)):
810
+ result_is_valid = len(full_result) > 0
811
+ else:
812
+ result_is_valid = True
813
+
814
+ if result_is_valid:
815
+ # Special validation for term_info
816
+ if query_type == 'term_info':
817
+ is_complete = (full_result and isinstance(full_result, dict) and
818
+ full_result.get('Id') and full_result.get('Name'))
819
+ if is_complete:
820
+ try:
821
+ full_kwargs_for_cache = kwargs.copy()
822
+ full_kwargs_for_cache['limit'] = -1
823
+ cache.cache_result(query_type, cache_term_id, full_result, **full_kwargs_for_cache)
824
+ logger.debug(f"Background cached complete full result for {term_id}")
825
+ except Exception as e:
826
+ logger.debug(f"Background caching failed: {e}")
827
+ else:
828
+ try:
829
+ full_kwargs_for_cache = kwargs.copy()
830
+ full_kwargs_for_cache['limit'] = -1
831
+ cache.cache_result(query_type, cache_term_id, full_result, **full_kwargs_for_cache)
832
+ logger.debug(f"Background cached full result for {term_id}")
833
+ except Exception as e:
834
+ logger.debug(f"Background caching failed: {e}")
835
+ except Exception as e:
836
+ logger.debug(f"Background caching thread failed: {e}")
837
+
838
+ # Start background caching thread
839
+ background_thread = threading.Thread(target=cache_full_results_background, daemon=True)
840
+ background_thread.start()
841
+ # print(f"DEBUG: Started background caching thread for {query_type}({term_id})")
842
+ else:
843
+ # For non-expensive queries: use original caching logic
844
+ full_result = None
845
+ if should_cache:
846
+ # Execute with limit=-1 to get full results for caching (only for functions that support limit)
847
+ full_kwargs = kwargs.copy()
848
+ import inspect
849
+ if 'limit' in inspect.signature(func).parameters:
850
+ full_kwargs['limit'] = -1
851
+ # print(f"DEBUG: Executing {query_type} with full results for caching")
852
+ full_result = func(*args, **full_kwargs)
853
+ result = full_result
854
+
855
+ # If the original request was limited, slice the result for return
856
+ if limit != -1 and result is not None:
857
+ if isinstance(result, (list, pd.DataFrame)):
858
+ if isinstance(result, list):
859
+ result = result[:limit]
860
+ elif isinstance(result, pd.DataFrame):
861
+ result = result.head(limit)
862
+ # print(f"DEBUG: Sliced result to {limit} items for return")
650
863
  else:
864
+ # Execute with original parameters (no caching)
865
+ result = func(*args, **kwargs)
866
+ full_result = result
867
+
868
+ # Cache the result - skip for expensive queries as they use background caching
869
+ if query_type not in expensive_query_types:
870
+ # Handle DataFrame, dict, and other result types properly
871
+ result_is_valid = False
872
+ result_is_error = False # Track if result is an error that should clear cache
873
+
874
+ if result is not None:
875
+ if hasattr(result, 'empty'): # DataFrame
876
+ result_is_valid = not result.empty
877
+ elif isinstance(result, dict):
878
+ # For dict results, check if it's not an error result (count != -1)
879
+ # Error results should not be cached
880
+ if 'count' in result:
881
+ count_value = result.get('count', -1)
882
+ result_is_valid = count_value >= 0 # Don't cache errors (count=-1)
883
+ result_is_error = count_value < 0 # Mark as error if count is negative
884
+ else:
885
+ result_is_valid = bool(result) # For dicts without count field
886
+ elif isinstance(result, (list, str)):
887
+ result_is_valid = len(result) > 0
888
+ else:
889
+ result_is_valid = True
890
+
891
+ # If result is an error, actively clear any existing cache entry
892
+ # This ensures that transient failures don't get stuck in cache
893
+ if result_is_error:
894
+ logger.warning(f"Query returned error result for {query_type}({term_id}), clearing cache entry")
651
895
  try:
652
- cache.cache_result(query_type, term_id, result, **kwargs)
896
+ cache.clear_cache_entry(query_type, cache_term_id)
653
897
  except Exception as e:
654
- logger.debug(f"Failed to cache result: {e}")
898
+ logger.debug(f"Failed to clear cache entry: {e}")
899
+
900
+ if result_is_valid:
901
+ # Validate result before caching for term_info
902
+ if query_type == 'term_info':
903
+ # Basic validation: must have Id and Name
904
+ is_complete = (result and isinstance(result, dict) and
905
+ result.get('Id') and result.get('Name'))
906
+
907
+ # Additional validation when preview=True: check if queries have results
908
+ # We allow caching even if some queries failed (count=-1) as long as the core term_info is valid
909
+ # This is because some query functions may not be implemented yet or may legitimately fail
910
+ if is_complete:
911
+ preview = kwargs.get('preview', True)
912
+ if preview and 'Queries' in result and result['Queries']:
913
+ # Count how many queries have valid results vs errors
914
+ valid_queries = 0
915
+ failed_queries = 0
916
+
917
+ for query in result['Queries']:
918
+ count = query.get('count', -1)
919
+ preview_results = query.get('preview_results')
920
+
921
+ # Count queries with valid results (count >= 0)
922
+ if count >= 0 and isinstance(preview_results, dict):
923
+ valid_queries += 1
924
+ else:
925
+ failed_queries += 1
926
+
927
+ # Only reject if ALL queries failed - at least one must succeed
928
+ if valid_queries == 0 and failed_queries > 0:
929
+ is_complete = False
930
+ logger.warning(f"Not caching result for {term_id}: all {failed_queries} queries failed")
931
+ elif failed_queries > 0:
932
+ logger.debug(f"Caching result for {term_id} with {valid_queries} valid queries ({failed_queries} failed)")
933
+
934
+ # Only cache if result is complete AND no limit was applied
935
+ if is_complete and should_cache:
936
+ try:
937
+ # Cache the full result with full parameters (limit=-1)
938
+ full_kwargs_for_cache = kwargs.copy()
939
+ full_kwargs_for_cache['limit'] = -1
940
+ cache.cache_result(query_type, cache_term_id, full_result, **full_kwargs_for_cache)
941
+ logger.debug(f"Cached complete full result for {term_id}")
942
+ except Exception as e:
943
+ logger.debug(f"Failed to cache result: {e}")
944
+ elif not should_cache:
945
+ logger.debug(f"Not caching limited result for {term_id} (limit={limit})")
946
+ else:
947
+ logger.warning(f"Not caching incomplete result for {term_id}")
948
+ else:
949
+ # Only cache if no limit was applied
950
+ if should_cache:
951
+ try:
952
+ cache.cache_result(query_type, cache_term_id, result, **kwargs)
953
+ except Exception as e:
954
+ logger.debug(f"Failed to cache result: {e}")
955
+ else:
956
+ logger.debug(f"Not caching limited result for {term_id} (limit={limit}))")
655
957
 
656
958
  return result
657
959
 
@@ -745,7 +745,7 @@ def get_link(text: str, link: str) -> str:
745
745
 
746
746
 
747
747
  def get_secure_url(url: str, allow_redirects: bool = True, timeout=15) -> str:
748
- secure_url = url.replace("http://", "http://")
748
+ secure_url = url.replace("http://", "https://")
749
749
  if check_url_exist(secure_url, allow_redirects, timeout):
750
750
  return secure_url
751
751
  return url