vfbquery 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/readme_parser.py +29 -1
- test/term_info_queries_test.py +9 -9
- vfbquery/__init__.py +22 -1
- vfbquery/solr_result_cache.py +85 -39
- vfbquery/vfb_queries.py +60 -40
- {vfbquery-0.4.0.dist-info → vfbquery-0.4.1.dist-info}/METADATA +20 -20
- {vfbquery-0.4.0.dist-info → vfbquery-0.4.1.dist-info}/RECORD +10 -10
- {vfbquery-0.4.0.dist-info → vfbquery-0.4.1.dist-info}/LICENSE +0 -0
- {vfbquery-0.4.0.dist-info → vfbquery-0.4.1.dist-info}/WHEEL +0 -0
- {vfbquery-0.4.0.dist-info → vfbquery-0.4.1.dist-info}/top_level.txt +0 -0
test/readme_parser.py
CHANGED
|
@@ -27,7 +27,35 @@ def extract_code_blocks(readme_path):
|
|
|
27
27
|
# Look for vfb.* calls and extract them
|
|
28
28
|
vfb_calls = re.findall(r'(vfb\.[^)]*\))', block)
|
|
29
29
|
if vfb_calls:
|
|
30
|
-
|
|
30
|
+
# Add force_refresh=True to each call to ensure fresh data in tests
|
|
31
|
+
# Exceptions:
|
|
32
|
+
# - get_templates() doesn't support force_refresh (no SOLR cache)
|
|
33
|
+
# - Performance test terms (FBbt_00003748, VFB_00101567) should use cache
|
|
34
|
+
for call in vfb_calls:
|
|
35
|
+
# Check if this is get_templates() - if so, don't add force_refresh
|
|
36
|
+
if 'get_templates' in call:
|
|
37
|
+
processed_python_blocks.append(call)
|
|
38
|
+
continue
|
|
39
|
+
|
|
40
|
+
# Check if this call uses performance test terms - skip force_refresh for those
|
|
41
|
+
if 'FBbt_00003748' in call or 'VFB_00101567' in call:
|
|
42
|
+
processed_python_blocks.append(call)
|
|
43
|
+
continue
|
|
44
|
+
|
|
45
|
+
# Check if the call already has parameters
|
|
46
|
+
if '(' in call and ')' in call:
|
|
47
|
+
# Insert force_refresh=True before the closing parenthesis
|
|
48
|
+
# Handle both cases: with and without existing parameters
|
|
49
|
+
if call.rstrip(')').endswith('('):
|
|
50
|
+
# No parameters: vfb.function()
|
|
51
|
+
modified_call = call[:-1] + 'force_refresh=True)'
|
|
52
|
+
else:
|
|
53
|
+
# Has parameters: vfb.function(param1, param2)
|
|
54
|
+
modified_call = call[:-1] + ', force_refresh=True)'
|
|
55
|
+
processed_python_blocks.append(modified_call)
|
|
56
|
+
else:
|
|
57
|
+
# Shouldn't happen, but include original call if no parentheses
|
|
58
|
+
processed_python_blocks.append(call)
|
|
31
59
|
|
|
32
60
|
# Process JSON blocks
|
|
33
61
|
processed_json_blocks = []
|
test/term_info_queries_test.py
CHANGED
|
@@ -551,14 +551,14 @@ class TermInfoQueriesTest(unittest.TestCase):
|
|
|
551
551
|
|
|
552
552
|
# Performance categories
|
|
553
553
|
total_time = duration_1 + duration_2
|
|
554
|
-
if total_time < 1.
|
|
555
|
-
performance_level = "🟢 Excellent (< 1
|
|
556
|
-
elif total_time <
|
|
557
|
-
performance_level = "🟡 Good (1-
|
|
558
|
-
elif total_time <
|
|
559
|
-
performance_level = "🟠 Acceptable (
|
|
554
|
+
if total_time < 1.5:
|
|
555
|
+
performance_level = "🟢 Excellent (< 1.5 seconds)"
|
|
556
|
+
elif total_time < 3.0:
|
|
557
|
+
performance_level = "🟡 Good (1.5-3 seconds)"
|
|
558
|
+
elif total_time < 6.0:
|
|
559
|
+
performance_level = "🟠 Acceptable (3-6 seconds)"
|
|
560
560
|
else:
|
|
561
|
-
performance_level = "🔴 Slow (>
|
|
561
|
+
performance_level = "🔴 Slow (> 6 seconds)"
|
|
562
562
|
|
|
563
563
|
print(f"Performance Level: {performance_level}")
|
|
564
564
|
print(f"="*50)
|
|
@@ -569,8 +569,8 @@ class TermInfoQueriesTest(unittest.TestCase):
|
|
|
569
569
|
|
|
570
570
|
# Performance assertions - fail if queries take too long
|
|
571
571
|
# These thresholds are based on observed performance characteristics
|
|
572
|
-
max_single_query_time =
|
|
573
|
-
max_total_time =
|
|
572
|
+
max_single_query_time = 3.0 # seconds (increased from 2.0 to account for SOLR cache overhead)
|
|
573
|
+
max_total_time = 6.0 # seconds (2 queries * 3 seconds each)
|
|
574
574
|
|
|
575
575
|
self.assertLess(duration_1, max_single_query_time,
|
|
576
576
|
f"FBbt_00003748 query took {duration_1:.4f}s, exceeding {max_single_query_time}s threshold")
|
vfbquery/__init__.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from .vfb_queries import *
|
|
2
|
+
from .solr_result_cache import get_solr_cache
|
|
2
3
|
|
|
3
4
|
# Caching enhancements (optional import - don't break if dependencies missing)
|
|
4
5
|
try:
|
|
@@ -48,6 +49,26 @@ except ImportError:
|
|
|
48
49
|
__caching_available__ = False
|
|
49
50
|
print("VFBquery: Caching not available (dependencies missing)")
|
|
50
51
|
|
|
52
|
+
# Convenience function for clearing SOLR cache entries
|
|
53
|
+
def clear_solr_cache(query_type: str, term_id: str) -> bool:
|
|
54
|
+
"""
|
|
55
|
+
Clear a specific SOLR cache entry to force refresh
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
query_type: Type of query ('term_info', 'instances', etc.)
|
|
59
|
+
term_id: Term identifier (e.g., 'FBbt_00003748')
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
True if successfully cleared, False otherwise
|
|
63
|
+
|
|
64
|
+
Example:
|
|
65
|
+
>>> import vfbquery as vfb
|
|
66
|
+
>>> vfb.clear_solr_cache('term_info', 'FBbt_00003748')
|
|
67
|
+
>>> result = vfb.get_term_info('FBbt_00003748') # Will fetch fresh data
|
|
68
|
+
"""
|
|
69
|
+
cache = get_solr_cache()
|
|
70
|
+
return cache.clear_cache_entry(query_type, term_id)
|
|
71
|
+
|
|
51
72
|
# SOLR-based result caching (experimental - for cold start optimization)
|
|
52
73
|
try:
|
|
53
74
|
from .solr_cache_integration import (
|
|
@@ -62,4 +83,4 @@ except ImportError:
|
|
|
62
83
|
__solr_caching_available__ = False
|
|
63
84
|
|
|
64
85
|
# Version information
|
|
65
|
-
__version__ = "0.4.
|
|
86
|
+
__version__ = "0.4.1"
|
vfbquery/solr_result_cache.py
CHANGED
|
@@ -240,6 +240,36 @@ class SolrResultCache:
|
|
|
240
240
|
except Exception as e:
|
|
241
241
|
logger.debug(f"Failed to clear expired cache document: {e}")
|
|
242
242
|
|
|
243
|
+
def clear_cache_entry(self, query_type: str, term_id: str) -> bool:
|
|
244
|
+
"""
|
|
245
|
+
Manually clear a specific cache entry to force refresh
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
query_type: Type of query ('term_info', 'instances', etc.)
|
|
249
|
+
term_id: Term identifier
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
True if successfully cleared, False otherwise
|
|
253
|
+
"""
|
|
254
|
+
try:
|
|
255
|
+
cache_doc_id = f"vfb_query_{term_id}"
|
|
256
|
+
response = requests.post(
|
|
257
|
+
f"{self.cache_url}/update",
|
|
258
|
+
data=f'<delete><id>{cache_doc_id}</id></delete>',
|
|
259
|
+
headers={"Content-Type": "application/xml"},
|
|
260
|
+
params={"commit": "true"}, # Commit immediately to ensure it's cleared
|
|
261
|
+
timeout=5
|
|
262
|
+
)
|
|
263
|
+
if response.status_code == 200:
|
|
264
|
+
logger.info(f"Cleared cache entry for {query_type}({term_id})")
|
|
265
|
+
return True
|
|
266
|
+
else:
|
|
267
|
+
logger.error(f"Failed to clear cache entry: HTTP {response.status_code}")
|
|
268
|
+
return False
|
|
269
|
+
except Exception as e:
|
|
270
|
+
logger.error(f"Error clearing cache entry: {e}")
|
|
271
|
+
return False
|
|
272
|
+
|
|
243
273
|
def _increment_cache_hit_count(self, cache_doc_id: str, current_count: int):
|
|
244
274
|
"""Increment hit count for cache document (background operation)"""
|
|
245
275
|
try:
|
|
@@ -533,57 +563,73 @@ def with_solr_cache(query_type: str):
|
|
|
533
563
|
|
|
534
564
|
Usage:
|
|
535
565
|
@with_solr_cache('term_info')
|
|
536
|
-
def get_term_info(short_form, **kwargs):
|
|
566
|
+
def get_term_info(short_form, force_refresh=False, **kwargs):
|
|
537
567
|
# ... existing implementation
|
|
568
|
+
|
|
569
|
+
The decorated function can accept a 'force_refresh' parameter to bypass cache.
|
|
538
570
|
"""
|
|
539
571
|
def decorator(func):
|
|
540
572
|
def wrapper(*args, **kwargs):
|
|
573
|
+
# Check if force_refresh is requested (pop it before passing to function)
|
|
574
|
+
force_refresh = kwargs.pop('force_refresh', False)
|
|
575
|
+
|
|
541
576
|
# Extract term_id from first argument or kwargs
|
|
542
577
|
term_id = args[0] if args else kwargs.get('short_form') or kwargs.get('term_id')
|
|
543
578
|
|
|
579
|
+
# For functions like get_templates that don't have a term_id, use query_type as cache key
|
|
544
580
|
if not term_id:
|
|
545
|
-
|
|
546
|
-
|
|
581
|
+
if query_type == 'templates':
|
|
582
|
+
# Use a fixed cache key for templates since it doesn't take a term_id
|
|
583
|
+
term_id = 'all_templates'
|
|
584
|
+
else:
|
|
585
|
+
logger.warning(f"No term_id found for caching {query_type}")
|
|
586
|
+
return func(*args, **kwargs)
|
|
547
587
|
|
|
548
588
|
cache = get_solr_cache()
|
|
549
589
|
|
|
550
|
-
#
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
logger.debug(f"
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
590
|
+
# Clear cache if force_refresh is True
|
|
591
|
+
if force_refresh:
|
|
592
|
+
logger.info(f"Force refresh requested for {query_type}({term_id})")
|
|
593
|
+
cache.clear_cache_entry(query_type, term_id)
|
|
594
|
+
|
|
595
|
+
# Try cache first (will be empty if force_refresh was True)
|
|
596
|
+
if not force_refresh:
|
|
597
|
+
cached_result = cache.get_cached_result(query_type, term_id, **kwargs)
|
|
598
|
+
if cached_result is not None:
|
|
599
|
+
# Validate that cached result has essential fields for term_info
|
|
600
|
+
if query_type == 'term_info':
|
|
601
|
+
is_valid = (cached_result and isinstance(cached_result, dict) and
|
|
602
|
+
cached_result.get('Id') and cached_result.get('Name'))
|
|
603
|
+
|
|
604
|
+
# Additional validation for query results
|
|
605
|
+
if is_valid and 'Queries' in cached_result:
|
|
606
|
+
logger.debug(f"Validating {len(cached_result['Queries'])} queries for {term_id}")
|
|
607
|
+
for i, query in enumerate(cached_result['Queries']):
|
|
608
|
+
count = query.get('count', 0)
|
|
609
|
+
preview_results = query.get('preview_results')
|
|
610
|
+
headers = preview_results.get('headers', []) if isinstance(preview_results, dict) else []
|
|
611
|
+
|
|
612
|
+
logger.debug(f"Query {i}: count={count}, preview_results_type={type(preview_results)}, headers={headers}")
|
|
613
|
+
|
|
614
|
+
# Check if query has unrealistic count (0 or -1) which indicates failed execution
|
|
615
|
+
if count <= 0:
|
|
616
|
+
is_valid = False
|
|
617
|
+
logger.debug(f"Cached result has invalid query count {count} for {term_id}")
|
|
618
|
+
break
|
|
619
|
+
# Check if preview_results is missing or has empty headers when it should have data
|
|
620
|
+
if not isinstance(preview_results, dict) or not headers:
|
|
621
|
+
is_valid = False
|
|
622
|
+
logger.debug(f"Cached result has invalid preview_results structure for {term_id}")
|
|
623
|
+
break
|
|
624
|
+
|
|
625
|
+
if is_valid:
|
|
626
|
+
logger.debug(f"Using valid cached result for {term_id}")
|
|
627
|
+
return cached_result
|
|
628
|
+
else:
|
|
629
|
+
logger.warning(f"Cached result incomplete for {term_id}, re-executing function")
|
|
630
|
+
# Don't return the incomplete cached result, continue to execute function
|
|
582
631
|
else:
|
|
583
|
-
|
|
584
|
-
# Don't return the incomplete cached result, continue to execute function
|
|
585
|
-
else:
|
|
586
|
-
return cached_result
|
|
632
|
+
return cached_result
|
|
587
633
|
|
|
588
634
|
# Execute function and cache result
|
|
589
635
|
result = func(*args, **kwargs)
|
vfbquery/vfb_queries.py
CHANGED
|
@@ -9,6 +9,7 @@ import pandas as pd
|
|
|
9
9
|
from marshmallow import ValidationError
|
|
10
10
|
import json
|
|
11
11
|
import numpy as np
|
|
12
|
+
from urllib.parse import unquote
|
|
12
13
|
from .solr_result_cache import with_solr_cache
|
|
13
14
|
|
|
14
15
|
# Custom JSON encoder to handle NumPy and pandas types
|
|
@@ -59,6 +60,23 @@ vfb_solr = pysolr.Solr('http://solr.virtualflybrain.org/solr/vfb_json/', always_
|
|
|
59
60
|
# Replace VfbConnect with SolrTermInfoFetcher
|
|
60
61
|
vc = SolrTermInfoFetcher()
|
|
61
62
|
|
|
63
|
+
def initialize_vfb_connect():
|
|
64
|
+
"""
|
|
65
|
+
Initialize VFB_connect by triggering the lazy load of the vfb and nc properties.
|
|
66
|
+
This causes VFB_connect to cache all terms, which takes ~95 seconds on first call.
|
|
67
|
+
Subsequent calls to functions using vc.nc will be fast.
|
|
68
|
+
|
|
69
|
+
:return: True if initialization successful, False otherwise
|
|
70
|
+
"""
|
|
71
|
+
try:
|
|
72
|
+
# Access the properties to trigger lazy loading
|
|
73
|
+
_ = vc.vfb
|
|
74
|
+
_ = vc.nc
|
|
75
|
+
return True
|
|
76
|
+
except Exception as e:
|
|
77
|
+
print(f"Failed to initialize VFB_connect: {e}")
|
|
78
|
+
return False
|
|
79
|
+
|
|
62
80
|
class Query:
|
|
63
81
|
def __init__(self, query, label, function, takes, preview=0, preview_columns=[], preview_results=[], output_format="table", count=-1):
|
|
64
82
|
self.query = query
|
|
@@ -294,19 +312,19 @@ class TermInfoOutputSchema(Schema):
|
|
|
294
312
|
|
|
295
313
|
def encode_brackets(text):
|
|
296
314
|
"""
|
|
297
|
-
Encodes brackets in the given text.
|
|
315
|
+
Encodes square brackets in the given text to prevent breaking markdown link syntax.
|
|
316
|
+
Parentheses are NOT encoded as they don't break markdown syntax.
|
|
298
317
|
|
|
299
318
|
:param text: The text to encode.
|
|
300
|
-
:return: The text with brackets encoded.
|
|
319
|
+
:return: The text with square brackets encoded.
|
|
301
320
|
"""
|
|
302
|
-
return (text.replace('
|
|
303
|
-
.replace(')', '%29')
|
|
304
|
-
.replace('[', '%5B')
|
|
321
|
+
return (text.replace('[', '%5B')
|
|
305
322
|
.replace(']', '%5D'))
|
|
306
323
|
|
|
307
324
|
def encode_markdown_links(df, columns):
|
|
308
325
|
"""
|
|
309
|
-
Encodes brackets in the labels
|
|
326
|
+
Encodes brackets in the labels within markdown links, leaving the link syntax intact.
|
|
327
|
+
Does NOT encode alt text in linked images ([(...)] format).
|
|
310
328
|
:param df: DataFrame containing the query results.
|
|
311
329
|
:param columns: List of column names to apply encoding to.
|
|
312
330
|
"""
|
|
@@ -315,28 +333,10 @@ def encode_markdown_links(df, columns):
|
|
|
315
333
|
return label
|
|
316
334
|
|
|
317
335
|
try:
|
|
318
|
-
#
|
|
336
|
+
# Skip linked images (format: [](link))
|
|
337
|
+
# These should NOT be encoded
|
|
319
338
|
if label.startswith("[
|
|
322
|
-
if len(parts) < 2:
|
|
323
|
-
return label
|
|
324
|
-
|
|
325
|
-
image_part = parts[0]
|
|
326
|
-
link_part = parts[1]
|
|
327
|
-
|
|
328
|
-
# Process the image part
|
|
329
|
-
image_parts = image_part.split("](")
|
|
330
|
-
if len(image_parts) < 2:
|
|
331
|
-
return label
|
|
332
|
-
|
|
333
|
-
alt_text = image_parts[0][3:] # Remove the "[]({link_part}"
|
|
339
|
-
return encoded_label
|
|
339
|
+
return label
|
|
340
340
|
|
|
341
341
|
# Process regular markdown links
|
|
342
342
|
elif label.startswith("[") and "](" in label:
|
|
@@ -905,6 +905,7 @@ def get_term_info(short_form: str, preview: bool = False):
|
|
|
905
905
|
print(f"Unexpected error when retrieving term info: {type(e).__name__}: {e}")
|
|
906
906
|
return parsed_object
|
|
907
907
|
|
|
908
|
+
@with_solr_cache('instances')
|
|
908
909
|
def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
|
|
909
910
|
"""
|
|
910
911
|
Retrieves available instances for the given class short form.
|
|
@@ -1041,19 +1042,34 @@ def _get_instances_from_solr(short_form: str, return_dataframe=True, limit: int
|
|
|
1041
1042
|
# Use the ordered tags to match expected format
|
|
1042
1043
|
tags = '|'.join(ordered_tags)
|
|
1043
1044
|
|
|
1044
|
-
# Extract thumbnail URL
|
|
1045
|
+
# Extract thumbnail URL and convert to HTTPS
|
|
1045
1046
|
thumbnail_url = image_info.get('image_thumbnail', '') if image_info else ''
|
|
1047
|
+
if thumbnail_url:
|
|
1048
|
+
# Replace http with https and thumbnailT.png with thumbnail.png
|
|
1049
|
+
thumbnail_url = thumbnail_url.replace('http://', 'https://').replace('thumbnailT.png', 'thumbnail.png')
|
|
1046
1050
|
|
|
1047
1051
|
# Format thumbnail with proper markdown link (matching Neo4j format)
|
|
1048
1052
|
thumbnail = ''
|
|
1049
1053
|
if thumbnail_url and template_anatomy:
|
|
1054
|
+
# Prefer symbol over label for template (matching Neo4j behavior)
|
|
1050
1055
|
template_label = template_anatomy.get('label', '')
|
|
1056
|
+
if template_anatomy.get('symbol') and len(template_anatomy.get('symbol', '')) > 0:
|
|
1057
|
+
template_label = template_anatomy.get('symbol')
|
|
1058
|
+
# Decode URL-encoded strings from SOLR (e.g., ME%28R%29 -> ME(R))
|
|
1059
|
+
template_label = unquote(template_label)
|
|
1051
1060
|
template_short_form = template_anatomy.get('short_form', '')
|
|
1061
|
+
|
|
1062
|
+
# Prefer symbol over label for anatomy (matching Neo4j behavior)
|
|
1052
1063
|
anatomy_label = anatomy.get('label', '')
|
|
1064
|
+
if anatomy.get('symbol') and len(anatomy.get('symbol', '')) > 0:
|
|
1065
|
+
anatomy_label = anatomy.get('symbol')
|
|
1066
|
+
# Decode URL-encoded strings from SOLR (e.g., ME%28R%29 -> ME(R))
|
|
1067
|
+
anatomy_label = unquote(anatomy_label)
|
|
1053
1068
|
anatomy_short_form = anatomy.get('short_form', '')
|
|
1054
1069
|
|
|
1055
1070
|
if template_label and anatomy_label:
|
|
1056
1071
|
# Create thumbnail markdown link matching the original format
|
|
1072
|
+
# DO NOT encode brackets in alt text - that's done later by encode_markdown_links
|
|
1057
1073
|
alt_text = f"{anatomy_label} aligned to {template_label}"
|
|
1058
1074
|
link_target = f"{template_short_form},{anatomy_short_form}"
|
|
1059
1075
|
thumbnail = f"[]({link_target})"
|
|
@@ -1061,27 +1077,27 @@ def _get_instances_from_solr(short_form: str, return_dataframe=True, limit: int
|
|
|
1061
1077
|
# Format template information
|
|
1062
1078
|
template_formatted = ''
|
|
1063
1079
|
if template_anatomy:
|
|
1080
|
+
# Prefer symbol over label (matching Neo4j behavior)
|
|
1064
1081
|
template_label = template_anatomy.get('label', '')
|
|
1082
|
+
if template_anatomy.get('symbol') and len(template_anatomy.get('symbol', '')) > 0:
|
|
1083
|
+
template_label = template_anatomy.get('symbol')
|
|
1084
|
+
# Decode URL-encoded strings from SOLR (e.g., ME%28R%29 -> ME(R))
|
|
1085
|
+
template_label = unquote(template_label)
|
|
1065
1086
|
template_short_form = template_anatomy.get('short_form', '')
|
|
1066
1087
|
if template_label and template_short_form:
|
|
1067
1088
|
template_formatted = f"[{template_label}]({template_short_form})"
|
|
1068
1089
|
|
|
1069
|
-
# Handle
|
|
1090
|
+
# Handle label formatting (match Neo4j format - prefer symbol over label)
|
|
1070
1091
|
anatomy_label = anatomy.get('label', 'Unknown')
|
|
1092
|
+
if anatomy.get('symbol') and len(anatomy.get('symbol', '')) > 0:
|
|
1093
|
+
anatomy_label = anatomy.get('symbol')
|
|
1094
|
+
# Decode URL-encoded strings from SOLR (e.g., ME%28R%29 -> ME(R))
|
|
1095
|
+
anatomy_label = unquote(anatomy_label)
|
|
1071
1096
|
anatomy_short_form = anatomy.get('short_form', '')
|
|
1072
1097
|
|
|
1073
|
-
# URL encode special characters in label for markdown links (matching Neo4j behavior)
|
|
1074
|
-
# Only certain labels need encoding (like those with parentheses)
|
|
1075
|
-
import urllib.parse
|
|
1076
|
-
if '(' in anatomy_label or ')' in anatomy_label:
|
|
1077
|
-
# URL encode but keep spaces and common characters
|
|
1078
|
-
encoded_label = urllib.parse.quote(anatomy_label, safe=' -_.')
|
|
1079
|
-
else:
|
|
1080
|
-
encoded_label = anatomy_label
|
|
1081
|
-
|
|
1082
1098
|
row = {
|
|
1083
1099
|
'id': anatomy_short_form,
|
|
1084
|
-
'label': f"[{
|
|
1100
|
+
'label': f"[{anatomy_label}]({anatomy_short_form})",
|
|
1085
1101
|
'tags': tags,
|
|
1086
1102
|
'parent': f"[{term_info.get('term', {}).get('core', {}).get('label', 'Unknown')}]({short_form})",
|
|
1087
1103
|
'source': '', # Not readily available in SOLR anatomy_channel_image
|
|
@@ -1099,7 +1115,11 @@ def _get_instances_from_solr(short_form: str, return_dataframe=True, limit: int
|
|
|
1099
1115
|
total_count = len(anatomy_images)
|
|
1100
1116
|
|
|
1101
1117
|
if return_dataframe:
|
|
1102
|
-
|
|
1118
|
+
df = pd.DataFrame(rows)
|
|
1119
|
+
# Apply encoding to markdown links (matches Neo4j implementation)
|
|
1120
|
+
columns_to_encode = ['label', 'parent', 'source', 'source_id', 'template', 'dataset', 'license', 'thumbnail']
|
|
1121
|
+
df = encode_markdown_links(df, columns_to_encode)
|
|
1122
|
+
return df
|
|
1103
1123
|
|
|
1104
1124
|
return {
|
|
1105
1125
|
"headers": _get_instances_headers(),
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: vfbquery
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Wrapper for querying VirtualFlyBrain knowledge graph.
|
|
5
5
|
Home-page: https://github.com/VirtualFlyBrain/VFBquery
|
|
6
6
|
Author: VirtualFlyBrain
|
|
@@ -117,25 +117,25 @@ vfb.get_term_info('FBbt_00003748')
|
|
|
117
117
|
"id": "VFB_00102107",
|
|
118
118
|
"label": "[ME on JRC2018Unisex adult brain](VFB_00102107)",
|
|
119
119
|
"tags": "Nervous_system|Adult|Visual_system|Synaptic_neuropil_domain",
|
|
120
|
-
"thumbnail": "[](VFB_00101567,VFB_00102107)"
|
|
121
121
|
},
|
|
122
122
|
{
|
|
123
123
|
"id": "VFB_00101385",
|
|
124
|
-
"label": "[ME
|
|
124
|
+
"label": "[ME(R) on JRC_FlyEM_Hemibrain](VFB_00101385)",
|
|
125
125
|
"tags": "Nervous_system|Adult|Visual_system|Synaptic_neuropil_domain",
|
|
126
|
-
"thumbnail": "[ on JRC_FlyEM_Hemibrain aligned to JRCFIB2018Fum')](VFB_00101384,VFB_00101385)"
|
|
127
127
|
},
|
|
128
128
|
{
|
|
129
129
|
"id": "VFB_00030810",
|
|
130
130
|
"label": "[medulla on adult brain template Ito2014](VFB_00030810)",
|
|
131
|
-
"tags": "Nervous_system|Adult|
|
|
131
|
+
"tags": "Nervous_system|Visual_system|Adult|Synaptic_neuropil_domain",
|
|
132
132
|
"thumbnail": "[](VFB_00030786,VFB_00030810)"
|
|
133
133
|
},
|
|
134
134
|
{
|
|
135
135
|
"id": "VFB_00030624",
|
|
136
136
|
"label": "[medulla on adult brain template JFRC2](VFB_00030624)",
|
|
137
|
-
"tags": "Nervous_system|Adult|
|
|
138
|
-
"thumbnail": "[](VFB_00017894,VFB_00030624)"
|
|
139
139
|
}
|
|
140
140
|
]
|
|
141
141
|
},
|
|
@@ -143,8 +143,8 @@ vfb.get_term_info('FBbt_00003748')
|
|
|
143
143
|
"count": 4
|
|
144
144
|
}
|
|
145
145
|
],
|
|
146
|
-
"IsIndividual":
|
|
147
|
-
"IsClass":
|
|
146
|
+
"IsIndividual": False,
|
|
147
|
+
"IsClass": True,
|
|
148
148
|
"Examples": {
|
|
149
149
|
"VFB_00101384": [
|
|
150
150
|
{
|
|
@@ -191,7 +191,7 @@ vfb.get_term_info('FBbt_00003748')
|
|
|
191
191
|
}
|
|
192
192
|
]
|
|
193
193
|
},
|
|
194
|
-
"IsTemplate":
|
|
194
|
+
"IsTemplate": False,
|
|
195
195
|
"Synonyms": [
|
|
196
196
|
{
|
|
197
197
|
"label": "ME",
|
|
@@ -1122,7 +1122,7 @@ vfb.get_instances('FBbt_00003748', return_dataframe=False)
|
|
|
1122
1122
|
},
|
|
1123
1123
|
{
|
|
1124
1124
|
"id": "VFB_00101385",
|
|
1125
|
-
"label": "[ME
|
|
1125
|
+
"label": "[ME(R) on JRC_FlyEM_Hemibrain](VFB_00101385)",
|
|
1126
1126
|
"tags": "Nervous_system|Adult|Visual_system|Synaptic_neuropil_domain",
|
|
1127
1127
|
"parent": "[medulla](FBbt_00003748)",
|
|
1128
1128
|
"source": "",
|
|
@@ -1130,7 +1130,7 @@ vfb.get_instances('FBbt_00003748', return_dataframe=False)
|
|
|
1130
1130
|
"template": "[JRCFIB2018Fum](VFB_00101384)",
|
|
1131
1131
|
"dataset": "[JRC_FlyEM_Hemibrain painted domains](Xu2020roi)",
|
|
1132
1132
|
"license": "",
|
|
1133
|
-
"thumbnail": "[ on JRC_FlyEM_Hemibrain aligned to JRCFIB2018Fum')](VFB_00101384,VFB_00101385)"
|
|
1134
1134
|
},
|
|
1135
1135
|
{
|
|
1136
1136
|
"id": "VFB_00030810",
|
|
@@ -1152,7 +1152,7 @@ vfb.get_instances('FBbt_00003748', return_dataframe=False)
|
|
|
1152
1152
|
"source": "",
|
|
1153
1153
|
"source_id": "",
|
|
1154
1154
|
"template": "[JFRC2](VFB_00017894)",
|
|
1155
|
-
"dataset": "[BrainName neuropils on adult brain JFRC2
|
|
1155
|
+
"dataset": "[BrainName neuropils on adult brain JFRC2 (Jenett, Shinomya)](JenettShinomya_BrainName)",
|
|
1156
1156
|
"license": "",
|
|
1157
1157
|
"thumbnail": "[](VFB_00017894,VFB_00030624)"
|
|
1158
1158
|
}
|
|
@@ -1234,7 +1234,7 @@ vfb.get_templates(return_dataframe=False)
|
|
|
1234
1234
|
"name": "[JFRC2](VFB_00017894)",
|
|
1235
1235
|
"tags": "Nervous_system|Adult",
|
|
1236
1236
|
"thumbnail": "[](VFB_00017894)",
|
|
1237
|
-
"dataset": "[FlyLight - GMR GAL4 collection
|
|
1237
|
+
"dataset": "[FlyLight - GMR GAL4 collection (Jenett2012)](Jenett2012)",
|
|
1238
1238
|
"license": "[CC-BY-NC-SA](VFBlicense_CC_BY_NC_SA_4_0)"
|
|
1239
1239
|
},
|
|
1240
1240
|
{
|
|
@@ -1252,7 +1252,7 @@ vfb.get_templates(return_dataframe=False)
|
|
|
1252
1252
|
"name": "[L1 larval CNS ssTEM - Cardona/Janelia](VFB_00050000)",
|
|
1253
1253
|
"tags": "Nervous_system|Larva",
|
|
1254
1254
|
"thumbnail": "[](VFB_00050000)",
|
|
1255
|
-
"dataset": "[Neurons involved in larval fast escape response - EM
|
|
1255
|
+
"dataset": "[Neurons involved in larval fast escape response - EM (Ohyama2016)](Ohyama2015)",
|
|
1256
1256
|
"license": "[CC_BY_SA](VFBlicense_CC_BY_SA_4_0)"
|
|
1257
1257
|
},
|
|
1258
1258
|
{
|
|
@@ -1261,7 +1261,7 @@ vfb.get_templates(return_dataframe=False)
|
|
|
1261
1261
|
"name": "[L1 larval CNS ssTEM - Cardona/Janelia](VFB_00050000)",
|
|
1262
1262
|
"tags": "Nervous_system|Larva",
|
|
1263
1263
|
"thumbnail": "[](VFB_00050000)",
|
|
1264
|
-
"dataset": "[larval hugin neurons - EM
|
|
1264
|
+
"dataset": "[larval hugin neurons - EM (Schlegel2016)](Schlegel2016)",
|
|
1265
1265
|
"license": "[CC_BY](VFBlicense_CC_BY_4_0)"
|
|
1266
1266
|
},
|
|
1267
1267
|
{
|
|
@@ -1270,7 +1270,7 @@ vfb.get_templates(return_dataframe=False)
|
|
|
1270
1270
|
"name": "[L3 CNS template - Wood2018](VFB_00049000)",
|
|
1271
1271
|
"tags": "Nervous_system|Larva",
|
|
1272
1272
|
"thumbnail": "[](VFB_00049000)",
|
|
1273
|
-
"dataset": "[L3 Larval CNS Template
|
|
1273
|
+
"dataset": "[L3 Larval CNS Template (Truman2016)](Truman2016)",
|
|
1274
1274
|
"license": "[CC_BY_SA](VFBlicense_CC_BY_SA_4_0)"
|
|
1275
1275
|
},
|
|
1276
1276
|
{
|
|
@@ -1279,7 +1279,7 @@ vfb.get_templates(return_dataframe=False)
|
|
|
1279
1279
|
"name": "[COURT2018VNS](VFB_00100000)",
|
|
1280
1280
|
"tags": "Nervous_system|Adult|Ganglion",
|
|
1281
1281
|
"thumbnail": "[](VFB_00100000)",
|
|
1282
|
-
"dataset": "[Adult VNS neuropils
|
|
1282
|
+
"dataset": "[Adult VNS neuropils (Court2017)](Court2017)",
|
|
1283
1283
|
"license": "[CC_BY_SA](VFBlicense_CC_BY_SA_4_0)"
|
|
1284
1284
|
},
|
|
1285
1285
|
{
|
|
@@ -1294,7 +1294,7 @@ vfb.get_templates(return_dataframe=False)
|
|
|
1294
1294
|
{
|
|
1295
1295
|
"id": "VFB_00110000",
|
|
1296
1296
|
"order": 9,
|
|
1297
|
-
"name": "[Adult Head
|
|
1297
|
+
"name": "[Adult Head (McKellar2020)](VFB_00110000)",
|
|
1298
1298
|
"tags": "Adult|Anatomy",
|
|
1299
1299
|
"thumbnail": "[')](VFB_00110000)",
|
|
1300
1300
|
"dataset": "[GAL4 lines from McKellar et al., 2020](McKellar2020)",
|
|
@@ -1303,7 +1303,7 @@ vfb.get_templates(return_dataframe=False)
|
|
|
1303
1303
|
{
|
|
1304
1304
|
"id": "VFB_00120000",
|
|
1305
1305
|
"order": 10,
|
|
1306
|
-
"name": "[Adult T1 Leg
|
|
1306
|
+
"name": "[Adult T1 Leg (Kuan2020)](VFB_00120000)",
|
|
1307
1307
|
"tags": "Adult|Anatomy",
|
|
1308
1308
|
"thumbnail": "[')](VFB_00120000)",
|
|
1309
1309
|
"dataset": "[Millimeter-scale imaging of a Drosophila leg at single-neuron resolution](Kuan2020)",
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
test/readme_parser.py,sha256=
|
|
3
|
-
test/term_info_queries_test.py,sha256=
|
|
2
|
+
test/readme_parser.py,sha256=I15lzWyYaYRgbFqZ1EZOBrUyzeJJK0VWzpxvH25lAZE,5772
|
|
3
|
+
test/term_info_queries_test.py,sha256=bb3oTnPqpGzegWDkWyIBySVbYEeyMdKB3epgzm0yf40,36963
|
|
4
4
|
test/test_default_caching.py,sha256=-KW2Mkz9x0tjlhXMreqJvjgo3pWOg49G0r22Woa9U5U,6616
|
|
5
5
|
test/test_examples_diff.py,sha256=TPo7gHPcus-24T7kxBXMQiCl0EHcXaEXeVuOG7C3rUo,15853
|
|
6
|
-
vfbquery/__init__.py,sha256=
|
|
6
|
+
vfbquery/__init__.py,sha256=IQ2W2LkrVKThB-00cuaIsW5fwQRN8PgPlKSXGdGY2Q8,3010
|
|
7
7
|
vfbquery/cache_enhancements.py,sha256=-PCM0YZHPjwUJwJODZLgmz91sDyFGuYz_QRph_kTbB8,17341
|
|
8
8
|
vfbquery/cached_functions.py,sha256=5-aIiRP9mfEhVT3mXkLvIPDmdFq6iIExiLZAyir12IQ,10255
|
|
9
9
|
vfbquery/solr_cache_integration.py,sha256=Q87z_pXPdS9zn0r9kp2YBLGpCKOXVvcmzNmkRN7D8MU,7984
|
|
10
10
|
vfbquery/solr_fetcher.py,sha256=1FAyqaLrvZLaAmCW96en9y8lKTcs-ZFjt_UlnohP0jo,5683
|
|
11
|
-
vfbquery/solr_result_cache.py,sha256=
|
|
11
|
+
vfbquery/solr_result_cache.py,sha256=qkR13mCiqRHLCv5PWd1pT-rPIboZcGuusn065HgvV-0,30111
|
|
12
12
|
vfbquery/term_info_queries.py,sha256=oE-Ogm7jCPPlKtD3W3EtttYZcHnInwDOpOj-phAEOaI,42009
|
|
13
13
|
vfbquery/test_utils.py,sha256=7wUA3xgaGu3eLnjC98msNYt1wL538nOimVJjkC0ZLjU,5791
|
|
14
|
-
vfbquery/vfb_queries.py,sha256=
|
|
15
|
-
vfbquery-0.4.
|
|
16
|
-
vfbquery-0.4.
|
|
17
|
-
vfbquery-0.4.
|
|
18
|
-
vfbquery-0.4.
|
|
19
|
-
vfbquery-0.4.
|
|
14
|
+
vfbquery/vfb_queries.py,sha256=tLLphDbsjv0MsEnA5JWvKzT1a8OutsCUH6gmrVahZKg,78143
|
|
15
|
+
vfbquery-0.4.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
16
|
+
vfbquery-0.4.1.dist-info/METADATA,sha256=d5onNHbLwdQXpfNLNgvvtVNbOV2L920AO8aFf-A3rbY,63049
|
|
17
|
+
vfbquery-0.4.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
18
|
+
vfbquery-0.4.1.dist-info/top_level.txt,sha256=UgaRTTOy4JBdKbkr_gkeknT4eaibm3ztF520G4NTQZs,14
|
|
19
|
+
vfbquery-0.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|