PyPI - vfbquery - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

vfbquery 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

test/readme_parser.py +29 -33
test/term_info_queries_test.py +42 -30
test/test_default_caching.py +86 -85
test/test_examples_code.py +7 -0
test/test_examples_diff.py +95 -172
test/test_neurons_part_here.py +12 -13
test/test_query_performance.py +3 -7
vfbquery/__init__.py +47 -35
vfbquery/cached_functions.py +772 -131
vfbquery/owlery_client.py +1 -1
vfbquery/solr_cache_integration.py +34 -30
vfbquery/solr_result_cache.py +262 -99
vfbquery/term_info_queries.py +1 -1
vfbquery/vfb_queries.py +38 -7
vfbquery-0.5.1.dist-info/METADATA +2806 -0
{vfbquery-0.5.0.dist-info → vfbquery-0.5.1.dist-info}/RECORD +19 -18
vfbquery-0.5.0.dist-info/METADATA +0 -2273
{vfbquery-0.5.0.dist-info → vfbquery-0.5.1.dist-info}/LICENSE +0 -0
{vfbquery-0.5.0.dist-info → vfbquery-0.5.1.dist-info}/WHEEL +0 -0
{vfbquery-0.5.0.dist-info → vfbquery-0.5.1.dist-info}/top_level.txt +0 -0

vfbquery/owlery_client.py CHANGED Viewed

@@ -372,7 +372,7 @@ class SimpleVFBConnect:
                     # Try to initialize - this will fail if Neo4j server unreachable
                     self._nc = Neo4jConnect()
                     self._nc_available = True
-                    print("✅ Neo4j connection established")
+                    # print("✅ Neo4j connection established")
                 except Exception as e:
                     # Fall back to mock client
                     self._nc = MockNeo4jClient()

vfbquery/solr_cache_integration.py CHANGED Viewed

@@ -60,28 +60,30 @@ class SolrCacheIntegration:
         original_func = self.original_functions['get_term_info']
         @functools.wraps(original_func)
-        def cached_get_term_info(short_form: str, preview: bool = False):
+        def cached_get_term_info(short_form: str, preview: bool = False, **kwargs):
+            force_refresh = kwargs.get('force_refresh', False)
             cache = get_solr_cache()
             cache_params = {"preview": preview}
-            try:
-                # Try SOLR cache first
-                cached_result = cache.get_cached_result(
-                    "term_info", short_form, **cache_params
-                )
-                if cached_result is not None:
-                    logger.debug(f"SOLR cache hit for term_info({short_form})")
-                    return cached_result
-            except Exception as e:
-                logger.warning(f"SOLR cache lookup failed, falling back: {e}")
+            if not force_refresh:
+                try:
+                    # Try SOLR cache first
+                    cached_result = cache.get_cached_result(
+                        "term_info", short_form, **cache_params
+                    )
+                    if cached_result is not None:
+                        logger.debug(f"SOLR cache hit for term_info({short_form})")
+                        return cached_result
+                except Exception as e:
+                    logger.warning(f"SOLR cache lookup failed, falling back: {e}")
             # Execute original function
-            logger.debug(f"SOLR cache miss for term_info({short_form}), computing...")
+            logger.debug(f"SOLR cache miss or force_refresh for term_info({short_form}), computing...")
             result = original_func(short_form, preview)
-            # Cache result asynchronously
-            if result:
+            # Cache result asynchronously if not force_refresh
+            if result and not force_refresh:
                 try:
                     cache.cache_result("term_info", short_form, result, **cache_params)
                     logger.debug(f"Cached term_info result for {short_form}")
@@ -97,31 +99,33 @@ class SolrCacheIntegration:
         original_func = self.original_functions['get_instances']
         @functools.wraps(original_func)
-        def cached_get_instances(short_form: str, return_dataframe=True, limit: int = -1):
+        def cached_get_instances(short_form: str, return_dataframe=True, limit: int = -1, **kwargs):
+            force_refresh = kwargs.get('force_refresh', False)
             cache = get_solr_cache()
             cache_params = {
                 "return_dataframe": return_dataframe,
                 "limit": limit
             }
-            try:
-                # Try SOLR cache first
-                cached_result = cache.get_cached_result(
-                    "instances", short_form, **cache_params
-                )
-                if cached_result is not None:
-                    logger.debug(f"SOLR cache hit for get_instances({short_form})")
-                    return cached_result
-            except Exception as e:
-                logger.warning(f"SOLR cache lookup failed, falling back: {e}")
+            if not force_refresh:
+                try:
+                    # Try SOLR cache first
+                    cached_result = cache.get_cached_result(
+                        "instances", short_form, **cache_params
+                    )
+                    if cached_result is not None:
+                        logger.debug(f"SOLR cache hit for get_instances({short_form})")
+                        return cached_result
+                except Exception as e:
+                    logger.warning(f"SOLR cache lookup failed, falling back: {e}")
             # Execute original function
-            logger.debug(f"SOLR cache miss for get_instances({short_form}), computing...")
+            logger.debug(f"SOLR cache miss or force_refresh for get_instances({short_form}), computing...")
             result = original_func(short_form, return_dataframe, limit)
-            # Cache result asynchronously
-            if result is not None:
+            # Cache result asynchronously if not force_refresh
+            if result is not None and not force_refresh:
                 try:
                     cache.cache_result("instances", short_form, result, **cache_params)
                     logger.debug(f"Cached get_instances result for {short_form}")

vfbquery/solr_result_cache.py CHANGED Viewed

@@ -14,6 +14,7 @@ import json
 import requests
 import hashlib
 import time
+import threading
 from datetime import datetime, timedelta
 from typing import Dict, Any, Optional, List
 import logging
@@ -60,7 +61,7 @@ class SolrResultCache:
         self.max_result_size_mb = max_result_size_mb
         self.max_result_size_bytes = max_result_size_mb * 1024 * 1024
-    def _create_cache_metadata(self, result: Any) -> Optional[Dict[str, Any]]:
+    def _create_cache_metadata(self, result: Any, **params) -> Optional[Dict[str, Any]]:
         """Create metadata for cached result with 3-month expiration"""
         serialized_result = json.dumps(result, cls=NumpyEncoder)
         result_size = len(serialized_result.encode('utf-8'))
@@ -78,6 +79,7 @@ class SolrResultCache:
             "cached_at": now.isoformat(),
             "expires_at": expires_at.isoformat(),
             "result_size": result_size,
+            "params": params,  # Store the parameters used for this query
             "hit_count": 0,
             "cache_version": "1.0",  # For future compatibility
             "ttl_hours": self.ttl_hours  # Store TTL for debugging
@@ -150,6 +152,33 @@ class SolrResultCache:
                 self._clear_expired_cache_document(cache_doc_id)
                 return None
+            # Check if cached result parameters are compatible with requested parameters
+            cached_params = cached_data.get("params", {})
+            requested_limit = params.get("limit", -1)
+            cached_limit = cached_params.get("limit", -1)
+            # Only cached full results (limit=-1) are stored
+            # If requesting limited results, we can slice from cached full results
+            if cached_limit != -1:
+                logger.debug(f"Cache miss: Unexpected cached result with limit={cached_limit}, expected -1")
+                return None
+            # If requesting unlimited results, return the full cached result
+            if requested_limit == -1:
+                result = cached_data["result"]
+            else:
+                # If requesting limited results, slice from the cached full result
+                result = cached_data["result"]
+                if isinstance(result, (list, pd.DataFrame)):
+                    if isinstance(result, list):
+                        result = result[:requested_limit]
+                    elif isinstance(result, pd.DataFrame):
+                        result = result.head(requested_limit)
+                    logger.debug(f"Cache hit: Returning {requested_limit} items from cached full result")
+                else:
+                    # For other result types, return as-is (can't slice)
+                    logger.debug(f"Cache hit: Returning full cached result (cannot slice type {type(result)})")
             # Increment hit count asynchronously
             self._increment_cache_hit_count(cache_doc_id, cached_data.get("hit_count", 0))
@@ -200,7 +229,7 @@ class SolrResultCache:
         try:
             # Create cached metadata and result
-            cached_data = self._create_cache_metadata(result)
+            cached_data = self._create_cache_metadata(result, **params)
             if not cached_data:
                 return False  # Result too large or other issue
@@ -586,10 +615,19 @@ def with_solr_cache(query_type: str):
             # Check if force_refresh is requested (pop it before passing to function)
             force_refresh = kwargs.pop('force_refresh', False)
-            # Check if limit is applied - don't cache limited results as they're incomplete
+            # Check if limit is applied - only cache full results (limit=-1)
             limit = kwargs.get('limit', -1)
             should_cache = (limit == -1)  # Only cache when getting all results (limit=-1)
+            # For expensive queries, we still only cache full results, but we handle limited requests
+            # by slicing from cached full results
+            expensive_query_types = ['similar_neurons', 'similar_morphology', 'similar_morphology_part_of',
+                                   'similar_morphology_part_of_exp', 'similar_morphology_nb',
+                                   'similar_morphology_nb_exp', 'similar_morphology_userdata',
+                                   'neurons_part_here', 'neurons_synaptic',
+                                   'neurons_presynaptic', 'neurons_postsynaptic']
+            # Note: expensive queries still only cache full results, but retrieval logic handles slicing
             # For neuron_neuron_connectivity_query, only cache when all parameters are defaults
             if query_type == 'neuron_neuron_connectivity_query':
                 min_weight = kwargs.get('min_weight', 0)
@@ -616,15 +654,16 @@ def with_solr_cache(query_type: str):
                 cache_term_id = f"{term_id}_preview_{preview}"
             # Include return_dataframe parameter in cache key for queries that support it
-            # This ensures DataFrame and dict formats are cached separately
-            if query_type in ['instances', 'neurons_part_here', 'neurons_synaptic',
-                             'neurons_presynaptic', 'neurons_postsynaptic',
-                             'components_of', 'parts_of', 'subclasses_of',
-                             'neuron_classes_fasciculating_here', 'tracts_nerves_innervating_here',
-                             'lineage_clones_in', 'images_neurons', 'images_that_develop_from',
-                             'expression_pattern_fragments', 'neuron_neuron_connectivity_query']:
+            # This ensures DataFrame and dict results are cached separately
+            dataframe_query_types = ['neurons_part_here', 'neurons_synaptic', 'neurons_presynaptic',
+                                   'neurons_postsynaptic', 'similar_neurons', 'similar_morphology',
+                                   'similar_morphology_part_of', 'similar_morphology_part_of_exp',
+                                   'similar_morphology_nb', 'similar_morphology_nb_exp',
+                                   'similar_morphology_userdata', 'neurons_part_here', 'neurons_synaptic',
+                                   'neurons_presynaptic', 'neurons_postsynaptic']
+            if query_type in dataframe_query_types:
                 return_dataframe = kwargs.get('return_dataframe', True)  # Default is True
-                cache_term_id = f"{cache_term_id}_df_{return_dataframe}"
+                cache_term_id = f"{cache_term_id}_dataframe_{return_dataframe}"
             cache = get_solr_cache()
@@ -634,12 +673,47 @@ def with_solr_cache(query_type: str):
                 cache.clear_cache_entry(query_type, cache_term_id)
             # Try cache first (will be empty if force_refresh was True)
-            # OPTIMIZATION: If requesting limited results, check if full results are cached
-            # If yes, we can extract the limited rows from the cached full results
+            # OPTIMIZATION: Always try to get full cached results first, then slice if needed
+            cached_result = None
             if not force_refresh:
-                # First try to get cached result matching the exact query (including limit)
-                if should_cache:
-                    cached_result = cache.get_cached_result(query_type, cache_term_id, **kwargs)
+                # print(f"DEBUG: Checking cache for {query_type}, term_id={term_id}, cache_term_id={cache_term_id}, should_cache={should_cache}")
+                # Try to get cached full result (limit=-1)
+                full_params = kwargs.copy()
+                full_params['limit'] = -1
+                # print(f"DEBUG: Attempting cache lookup for {query_type}({cache_term_id}) with full results")
+                cached_result = cache.get_cached_result(query_type, cache_term_id, **full_params)
+                # print(f"DEBUG: Cache lookup result: {cached_result is not None}")
+                # If we got a cached full result but need limited results, slice it
+                if cached_result is not None and limit != -1:
+                    if isinstance(cached_result, (list, pd.DataFrame)):
+                        if isinstance(cached_result, list):
+                            cached_result = cached_result[:limit]
+                        elif isinstance(cached_result, pd.DataFrame):
+                            cached_result = cached_result.head(limit)
+                        # print(f"DEBUG: Sliced cached result to {limit} items")
+                    elif isinstance(cached_result, dict):
+                        # Handle dict results with 'rows' (e.g., get_instances)
+                        if 'rows' in cached_result:
+                            cached_result = {
+                                'headers': cached_result.get('headers', {}),
+                                'rows': cached_result['rows'][:limit],
+                                'count': cached_result.get('count', len(cached_result.get('rows', [])))
+                            }
+                            # print(f"DEBUG: Sliced cached dict result to {limit} rows")
+                        # Handle term_info dict with 'queries'
+                        elif 'queries' in cached_result:
+                            for query in cached_result.get('queries', []):
+                                if 'preview_results' in query and 'rows' in query['preview_results']:
+                                    query['preview_results']['rows'] = query['preview_results']['rows'][:limit]
+                                    # Keep original count - don't change it to limit
+                            # print(f"DEBUG: Sliced cached term_info result to {limit} rows per query")
+                        else:
+                            # print(f"DEBUG: Cannot slice cached dict result (no 'rows' or 'queries'), returning full result")
+                            pass
+                    else:
+                        # print(f"DEBUG: Cannot slice cached result of type {type(cached_result)}, returning full result")
+                        pass
                 else:
                     # For limited queries, try to get full cached results instead
                     full_kwargs = kwargs.copy()
@@ -703,94 +777,183 @@ def with_solr_cache(query_type: str):
                     else:
                         return cached_result
-            # Execute function and cache result
-            result = func(*args, **kwargs)
-            # Cache the result asynchronously to avoid blocking
-            # Handle DataFrame, dict, and other result types properly
-            result_is_valid = False
-            result_is_error = False  # Track if result is an error that should clear cache
-            if result is not None:
-                if hasattr(result, 'empty'):  # DataFrame
-                    result_is_valid = not result.empty
-                elif isinstance(result, dict):
-                    # For dict results, check if it's not an error result (count != -1)
-                    # Error results should not be cached
-                    if 'count' in result:
-                        count_value = result.get('count', -1)
-                        result_is_valid = count_value >= 0  # Don't cache errors (count=-1)
-                        result_is_error = count_value < 0  # Mark as error if count is negative
-                    else:
-                        result_is_valid = bool(result)  # For dicts without count field
-                elif isinstance(result, (list, str)):
-                    result_is_valid = len(result) > 0
-                else:
-                    result_is_valid = True
-            # If result is an error, actively clear any existing cache entry
-            # This ensures that transient failures don't get stuck in cache
-            if result_is_error:
-                logger.warning(f"Query returned error result for {query_type}({term_id}), clearing cache entry")
-                try:
-                    cache.clear_cache_entry(query_type, cache_term_id)
-                except Exception as e:
-                    logger.debug(f"Failed to clear cache entry: {e}")
-            if result_is_valid:
-                # Validate result before caching for term_info
-                if query_type == 'term_info':
-                    # Basic validation: must have Id and Name
-                    is_complete = (result and isinstance(result, dict) and
-                                  result.get('Id') and result.get('Name'))
-                    # Additional validation when preview=True: check if queries have results
-                    # We allow caching even if some queries failed (count=-1) as long as the core term_info is valid
-                    # This is because some query functions may not be implemented yet or may legitimately fail
-                    if is_complete:
-                        preview = kwargs.get('preview', True)
-                        if preview and 'Queries' in result and result['Queries']:
-                            # Count how many queries have valid results vs errors
-                            valid_queries = 0
-                            failed_queries = 0
+            # Execute function - for expensive queries, get quick results first, then cache full results in background
+            result = None
+            if query_type in expensive_query_types:
+                # For expensive queries: execute with original parameters for quick return, cache full results in background
+                # print(f"DEBUG: Executing {query_type} with original parameters for quick return")
+                result = func(*args, **kwargs)
+                # Start background thread to get full results and cache them
+                def cache_full_results_background():
+                    try:
+                        # Check if function supports limit parameter
+                        import inspect
+                        if 'limit' in inspect.signature(func).parameters:
+                            full_kwargs = kwargs.copy()
+                            full_kwargs['limit'] = -1
+                            # print(f"DEBUG: Background: Executing {query_type} with full results for caching")
+                            full_result = func(*args, **full_kwargs)
-                            for query in result['Queries']:
-                                count = query.get('count', -1)
-                                preview_results = query.get('preview_results')
-                                # Count queries with valid results (count >= 0)
-                                if count >= 0 and isinstance(preview_results, dict):
-                                    valid_queries += 1
+                            # Validate and cache the full result
+                            if full_result is not None:
+                                result_is_valid = False
+                                if hasattr(full_result, 'empty'):  # DataFrame
+                                    result_is_valid = not full_result.empty
+                                elif isinstance(full_result, dict):
+                                    if 'count' in full_result:
+                                        count_value = full_result.get('count', -1)
+                                        result_is_valid = count_value >= 0
+                                    else:
+                                        result_is_valid = bool(full_result)
+                                elif isinstance(full_result, (list, str)):
+                                    result_is_valid = len(full_result) > 0
                                 else:
-                                    failed_queries += 1
-                            # Only reject if ALL queries failed - at least one must succeed
-                            if valid_queries == 0 and failed_queries > 0:
-                                is_complete = False
-                                logger.warning(f"Not caching result for {term_id}: all {failed_queries} queries failed")
-                            elif failed_queries > 0:
-                                logger.debug(f"Caching result for {term_id} with {valid_queries} valid queries ({failed_queries} failed)")
+                                    result_is_valid = True
+                                if result_is_valid:
+                                    # Special validation for term_info
+                                    if query_type == 'term_info':
+                                        is_complete = (full_result and isinstance(full_result, dict) and
+                                                      full_result.get('Id') and full_result.get('Name'))
+                                        if is_complete:
+                                            try:
+                                                full_kwargs_for_cache = kwargs.copy()
+                                                full_kwargs_for_cache['limit'] = -1
+                                                cache.cache_result(query_type, cache_term_id, full_result, **full_kwargs_for_cache)
+                                                logger.debug(f"Background cached complete full result for {term_id}")
+                                            except Exception as e:
+                                                logger.debug(f"Background caching failed: {e}")
+                                    else:
+                                        try:
+                                            full_kwargs_for_cache = kwargs.copy()
+                                            full_kwargs_for_cache['limit'] = -1
+                                            cache.cache_result(query_type, cache_term_id, full_result, **full_kwargs_for_cache)
+                                            logger.debug(f"Background cached full result for {term_id}")
+                                        except Exception as e:
+                                            logger.debug(f"Background caching failed: {e}")
+                    except Exception as e:
+                        logger.debug(f"Background caching thread failed: {e}")
+                # Start background caching thread
+                background_thread = threading.Thread(target=cache_full_results_background, daemon=True)
+                background_thread.start()
+                # print(f"DEBUG: Started background caching thread for {query_type}({term_id})")
+            else:
+                # For non-expensive queries: use original caching logic
+                full_result = None
+                if should_cache:
+                    # Execute with limit=-1 to get full results for caching (only for functions that support limit)
+                    full_kwargs = kwargs.copy()
+                    import inspect
+                    if 'limit' in inspect.signature(func).parameters:
+                        full_kwargs['limit'] = -1
+                    # print(f"DEBUG: Executing {query_type} with full results for caching")
+                    full_result = func(*args, **full_kwargs)
+                    result = full_result
-                    # Only cache if result is complete AND no limit was applied
-                    if is_complete and should_cache:
-                        try:
-                            cache.cache_result(query_type, cache_term_id, result, **kwargs)
-                            logger.debug(f"Cached complete result for {term_id}")
-                        except Exception as e:
-                            logger.debug(f"Failed to cache result: {e}")
-                    elif not should_cache:
-                        logger.debug(f"Not caching limited result for {term_id} (limit={limit})")
-                    else:
-                        logger.warning(f"Not caching incomplete result for {term_id}")
+                    # If the original request was limited, slice the result for return
+                    if limit != -1 and result is not None:
+                        if isinstance(result, (list, pd.DataFrame)):
+                            if isinstance(result, list):
+                                result = result[:limit]
+                            elif isinstance(result, pd.DataFrame):
+                                result = result.head(limit)
+                            # print(f"DEBUG: Sliced result to {limit} items for return")
                 else:
-                    # Only cache if no limit was applied
-                    if should_cache:
-                        try:
-                            cache.cache_result(query_type, cache_term_id, result, **kwargs)
-                        except Exception as e:
-                            logger.debug(f"Failed to cache result: {e}")
+                    # Execute with original parameters (no caching)
+                    result = func(*args, **kwargs)
+                    full_result = result
+            # Cache the result - skip for expensive queries as they use background caching
+            if query_type not in expensive_query_types:
+                # Handle DataFrame, dict, and other result types properly
+                result_is_valid = False
+                result_is_error = False  # Track if result is an error that should clear cache
+                if result is not None:
+                    if hasattr(result, 'empty'):  # DataFrame
+                        result_is_valid = not result.empty
+                    elif isinstance(result, dict):
+                        # For dict results, check if it's not an error result (count != -1)
+                        # Error results should not be cached
+                        if 'count' in result:
+                            count_value = result.get('count', -1)
+                            result_is_valid = count_value >= 0  # Don't cache errors (count=-1)
+                            result_is_error = count_value < 0  # Mark as error if count is negative
+                        else:
+                            result_is_valid = bool(result)  # For dicts without count field
+                    elif isinstance(result, (list, str)):
+                        result_is_valid = len(result) > 0
                     else:
-                        logger.debug(f"Not caching limited result for {term_id} (limit={limit})")
+                        result_is_valid = True
+                # If result is an error, actively clear any existing cache entry
+                # This ensures that transient failures don't get stuck in cache
+                if result_is_error:
+                    logger.warning(f"Query returned error result for {query_type}({term_id}), clearing cache entry")
+                    try:
+                        cache.clear_cache_entry(query_type, cache_term_id)
+                    except Exception as e:
+                        logger.debug(f"Failed to clear cache entry: {e}")
+                if result_is_valid:
+                    # Validate result before caching for term_info
+                    if query_type == 'term_info':
+                        # Basic validation: must have Id and Name
+                        is_complete = (result and isinstance(result, dict) and
+                                      result.get('Id') and result.get('Name'))
+                        # Additional validation when preview=True: check if queries have results
+                        # We allow caching even if some queries failed (count=-1) as long as the core term_info is valid
+                        # This is because some query functions may not be implemented yet or may legitimately fail
+                        if is_complete:
+                            preview = kwargs.get('preview', True)
+                            if preview and 'Queries' in result and result['Queries']:
+                                # Count how many queries have valid results vs errors
+                                valid_queries = 0
+                                failed_queries = 0
+                                for query in result['Queries']:
+                                    count = query.get('count', -1)
+                                    preview_results = query.get('preview_results')
+                                    # Count queries with valid results (count >= 0)
+                                    if count >= 0 and isinstance(preview_results, dict):
+                                        valid_queries += 1
+                                    else:
+                                        failed_queries += 1
+                                # Only reject if ALL queries failed - at least one must succeed
+                                if valid_queries == 0 and failed_queries > 0:
+                                    is_complete = False
+                                    logger.warning(f"Not caching result for {term_id}: all {failed_queries} queries failed")
+                                elif failed_queries > 0:
+                                    logger.debug(f"Caching result for {term_id} with {valid_queries} valid queries ({failed_queries} failed)")
+                        # Only cache if result is complete AND no limit was applied
+                        if is_complete and should_cache:
+                            try:
+                                # Cache the full result with full parameters (limit=-1)
+                                full_kwargs_for_cache = kwargs.copy()
+                                full_kwargs_for_cache['limit'] = -1
+                                cache.cache_result(query_type, cache_term_id, full_result, **full_kwargs_for_cache)
+                                logger.debug(f"Cached complete full result for {term_id}")
+                            except Exception as e:
+                                logger.debug(f"Failed to cache result: {e}")
+                        elif not should_cache:
+                            logger.debug(f"Not caching limited result for {term_id} (limit={limit})")
+                        else:
+                            logger.warning(f"Not caching incomplete result for {term_id}")
+                    else:
+                        # Only cache if no limit was applied
+                        if should_cache:
+                            try:
+                                cache.cache_result(query_type, cache_term_id, result, **kwargs)
+                            except Exception as e:
+                                logger.debug(f"Failed to cache result: {e}")
+                        else:
+                            logger.debug(f"Not caching limited result for {term_id} (limit={limit}))")
             return result

vfbquery/term_info_queries.py CHANGED Viewed

@@ -745,7 +745,7 @@ def get_link(text: str, link: str) -> str:
 def get_secure_url(url: str, allow_redirects: bool = True, timeout=15) -> str:
-    secure_url = url.replace("http://", "http://")
+    secure_url = url.replace("http://", "https://")
     if check_url_exist(secure_url, allow_redirects, timeout):
         return secure_url
     return url

vfbquery/vfb_queries.py CHANGED Viewed

@@ -340,10 +340,25 @@ def encode_markdown_links(df, columns):
             return label
         try:
-            # Skip linked images (format: [![alt text](image_url "title")](link))
-            # These should NOT be encoded
+            # Handle linked images (format: [![alt text](image_url "title")](link))
             if label.startswith("[!["):
-                return label
+                # Replace http with https in the image URL
+                # Pattern: [![anything](http://... "title")](link)
+                def secure_image_url(match):
+                    alt_text = match.group(1)
+                    image_url = match.group(2)
+                    title = match.group(3) if match.group(3) else ""
+                    link = match.group(4)
+                    secure_url = image_url.replace("http://", "https://")
+                    if title:
+                        return f"[![{alt_text}]({secure_url} \"{title}\")]({link})"
+                    else:
+                        return f"[![{alt_text}]({secure_url})]({link})"
+                # Regex to match the entire linked image
+                pattern = r'\[\!\[([^\]]+)\]\(([^\'"\s]+)(?:\s+[\'"]([^\'"]*)[\'"])?\)\]\(([^)]+)\)'
+                encoded_label = re.sub(pattern, secure_image_url, label)
+                return encoded_label
             # Process regular markdown links - handle multiple links separated by commas
             # Pattern matches [label](url) format
@@ -356,7 +371,9 @@ def encode_markdown_links(df, columns):
                     url_part = match.group(2)     # The URL part (between ( and ))
                     # Encode brackets in the label part only
                     label_part_encoded = encode_brackets(label_part)
-                    return f"[{label_part_encoded}]({url_part})"
+                    # Ensure URLs use https
+                    url_part_secure = url_part.replace("http://", "https://")
+                    return f"[{label_part_encoded}]({url_part_secure})"
                 # Replace all markdown links with their encoded versions
                 encoded_label = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', encode_single_link, label)
@@ -1268,7 +1285,7 @@ def NeuronRegionConnectivityQuery_to_schema(name, take_default):
         "default": take_default,
     }
     preview = 5
-    preview_columns = ["id", "label", "presynaptic_terminals", "postsynaptic_terminals", "tags"]
+    preview_columns = ["id", "region", "presynaptic_terminals", "postsynaptic_terminals", "tags"]
     return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
@@ -2713,7 +2730,7 @@ def get_neuron_region_connectivity(short_form: str, return_dataframe=True, limit
          primary
     RETURN
         target.short_form AS id,
-        target.label AS label,
+        target.label AS region,
         synapse_counts.`pre` AS presynaptic_terminals,
         synapse_counts.`post` AS postsynaptic_terminals,
         target.uniqueFacets AS tags
@@ -2732,7 +2749,7 @@ def get_neuron_region_connectivity(short_form: str, return_dataframe=True, limit
     headers = {
         'id': {'title': 'Region ID', 'type': 'selection_id', 'order': -1},
-        'label': {'title': 'Brain Region', 'type': 'markdown', 'order': 0},
+        'region': {'title': 'Brain Region', 'type': 'markdown', 'order': 0},
         'presynaptic_terminals': {'title': 'Presynaptic Terminals', 'type': 'number', 'order': 1},
         'postsynaptic_terminals': {'title': 'Postsynaptic Terminals', 'type': 'number', 'order': 2},
         'tags': {'title': 'Region Types', 'type': 'list', 'order': 3},
@@ -3915,6 +3932,20 @@ def fill_query_results(term_info):
                     result_count = 0
                 # Store preview results (count is stored at query level, not in preview_results)
+                # Sort rows based on the sort field in headers, default to ID descending if none
+                sort_column = None
+                sort_direction = None
+                for col, info in filtered_headers.items():
+                    if 'sort' in info and isinstance(info['sort'], dict):
+                        sort_column = col
+                        sort_direction = list(info['sort'].values())[0]  # e.g., 'Asc' or 'Desc'
+                        break
+                if sort_column:
+                    reverse = sort_direction == 'Desc'
+                    filtered_result.sort(key=lambda x: x.get(sort_column, ''), reverse=reverse)
+                else:
+                    # Default to ID descending if no sort specified
+                    filtered_result.sort(key=lambda x: x.get('id', ''), reverse=True)
                 query['preview_results'] = {'headers': filtered_headers, 'rows': filtered_result}
                 query['count'] = result_count
                 # print(f"Filtered result: {filtered_result}")

vfbquery 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

vfbquery 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl