vfbquery 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/term_info_queries_test.py +64 -9
- test/test_default_caching.py +173 -0
- test/test_examples_diff.py +6 -1
- vfbquery/__init__.py +62 -1
- vfbquery/cache_enhancements.py +465 -0
- vfbquery/cached_functions.py +227 -0
- vfbquery/solr_cache_integration.py +212 -0
- vfbquery/solr_fetcher.py +47 -3
- vfbquery/solr_result_cache.py +613 -0
- vfbquery/term_info_queries.py +18 -2
- vfbquery/test_utils.py +110 -3
- vfbquery/vfb_queries.py +325 -54
- {vfbquery-0.3.3.dist-info → vfbquery-0.4.0.dist-info}/METADATA +39 -40
- vfbquery-0.4.0.dist-info/RECORD +19 -0
- vfbquery-0.3.3.dist-info/RECORD +0 -14
- {vfbquery-0.3.3.dist-info → vfbquery-0.4.0.dist-info}/LICENSE +0 -0
- {vfbquery-0.3.3.dist-info → vfbquery-0.4.0.dist-info}/WHEEL +0 -0
- {vfbquery-0.3.3.dist-info → vfbquery-0.4.0.dist-info}/top_level.txt +0 -0
vfbquery/test_utils.py
CHANGED
|
@@ -1,6 +1,41 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
+
import json
|
|
3
|
+
import numpy as np
|
|
2
4
|
from typing import Any, Dict, Union
|
|
3
5
|
|
|
6
|
+
# Custom JSON encoder to handle NumPy and pandas types
|
|
7
|
+
class NumpyEncoder(json.JSONEncoder):
|
|
8
|
+
def default(self, obj):
|
|
9
|
+
if isinstance(obj, np.integer):
|
|
10
|
+
return int(obj)
|
|
11
|
+
elif isinstance(obj, np.floating):
|
|
12
|
+
return float(obj)
|
|
13
|
+
elif isinstance(obj, np.ndarray):
|
|
14
|
+
return obj.tolist()
|
|
15
|
+
elif isinstance(obj, np.bool_):
|
|
16
|
+
return bool(obj)
|
|
17
|
+
elif hasattr(obj, 'item'): # Handle pandas scalar types
|
|
18
|
+
return obj.item()
|
|
19
|
+
return super(NumpyEncoder, self).default(obj)
|
|
20
|
+
|
|
21
|
+
def safe_to_dict(df, sort_by_id=True):
|
|
22
|
+
"""Convert DataFrame to dict with numpy types converted to native Python types"""
|
|
23
|
+
if isinstance(df, pd.DataFrame):
|
|
24
|
+
# Convert numpy dtypes to native Python types
|
|
25
|
+
df_copy = df.copy()
|
|
26
|
+
for col in df_copy.columns:
|
|
27
|
+
if df_copy[col].dtype.name.startswith('int'):
|
|
28
|
+
df_copy[col] = df_copy[col].astype('object')
|
|
29
|
+
elif df_copy[col].dtype.name.startswith('float'):
|
|
30
|
+
df_copy[col] = df_copy[col].astype('object')
|
|
31
|
+
|
|
32
|
+
# Sort by id column in descending order if it exists and sort_by_id is True
|
|
33
|
+
if sort_by_id and 'id' in df_copy.columns:
|
|
34
|
+
df_copy = df_copy.sort_values('id', ascending=False)
|
|
35
|
+
|
|
36
|
+
return df_copy.to_dict("records")
|
|
37
|
+
return df
|
|
38
|
+
|
|
4
39
|
def safe_extract_row(result: Any, index: int = 0) -> Dict:
|
|
5
40
|
"""
|
|
6
41
|
Safely extract a row from a pandas DataFrame or return the object itself if not a DataFrame.
|
|
@@ -11,11 +46,83 @@ def safe_extract_row(result: Any, index: int = 0) -> Dict:
|
|
|
11
46
|
"""
|
|
12
47
|
if isinstance(result, pd.DataFrame):
|
|
13
48
|
if not result.empty and len(result.index) > index:
|
|
14
|
-
|
|
49
|
+
# Convert to dict using safe method to handle numpy types
|
|
50
|
+
row_series = result.iloc[index]
|
|
51
|
+
return {col: (val.item() if hasattr(val, 'item') else val) for col, val in row_series.items()}
|
|
15
52
|
else:
|
|
16
53
|
return {}
|
|
17
54
|
return result
|
|
18
55
|
|
|
56
|
+
def sanitize_for_json(obj: Any) -> Any:
|
|
57
|
+
"""
|
|
58
|
+
Recursively sanitize any data structure to make it JSON serializable.
|
|
59
|
+
Converts numpy types, pandas types, and other non-serializable types to native Python types.
|
|
60
|
+
|
|
61
|
+
:param obj: Object to sanitize
|
|
62
|
+
:return: JSON-serializable version of the object
|
|
63
|
+
"""
|
|
64
|
+
if isinstance(obj, dict):
|
|
65
|
+
return {key: sanitize_for_json(value) for key, value in obj.items()}
|
|
66
|
+
elif isinstance(obj, (list, tuple)):
|
|
67
|
+
return [sanitize_for_json(item) for item in obj]
|
|
68
|
+
elif isinstance(obj, np.integer):
|
|
69
|
+
return int(obj)
|
|
70
|
+
elif isinstance(obj, np.floating):
|
|
71
|
+
return float(obj)
|
|
72
|
+
elif isinstance(obj, np.ndarray):
|
|
73
|
+
return obj.tolist()
|
|
74
|
+
elif isinstance(obj, np.bool_):
|
|
75
|
+
return bool(obj)
|
|
76
|
+
elif hasattr(obj, 'item'): # Handle pandas scalar types
|
|
77
|
+
return obj.item()
|
|
78
|
+
elif isinstance(obj, pd.DataFrame):
|
|
79
|
+
return safe_to_dict(obj)
|
|
80
|
+
elif hasattr(obj, '__dict__'): # Handle custom objects
|
|
81
|
+
return sanitize_for_json(obj.__dict__)
|
|
82
|
+
else:
|
|
83
|
+
return obj
|
|
84
|
+
|
|
85
|
+
def safe_json_dumps(obj: Any, **kwargs) -> str:
|
|
86
|
+
"""
|
|
87
|
+
Safely serialize any object to JSON string, handling numpy and pandas types.
|
|
88
|
+
|
|
89
|
+
:param obj: Object to serialize
|
|
90
|
+
:param kwargs: Additional arguments to pass to json.dumps
|
|
91
|
+
:return: JSON string
|
|
92
|
+
"""
|
|
93
|
+
# Set default arguments
|
|
94
|
+
default_kwargs = {'indent': 2, 'ensure_ascii': False, 'cls': NumpyEncoder}
|
|
95
|
+
default_kwargs.update(kwargs)
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
# First try with the NumpyEncoder
|
|
99
|
+
return json.dumps(obj, **default_kwargs)
|
|
100
|
+
except (TypeError, ValueError):
|
|
101
|
+
# If that fails, sanitize the object first
|
|
102
|
+
sanitized_obj = sanitize_for_json(obj)
|
|
103
|
+
return json.dumps(sanitized_obj, **default_kwargs)
|
|
104
|
+
|
|
105
|
+
def pretty_print_vfb_result(result: Any, max_length: int = 1000) -> None:
|
|
106
|
+
"""
|
|
107
|
+
Pretty print any VFB query result in a safe, readable format.
|
|
108
|
+
|
|
109
|
+
:param result: Result from any VFB query function
|
|
110
|
+
:param max_length: Maximum length of output (truncates if longer)
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
json_str = safe_json_dumps(result)
|
|
114
|
+
if len(json_str) > max_length:
|
|
115
|
+
print(json_str[:max_length] + f'\n... (truncated, full length: {len(json_str)} characters)')
|
|
116
|
+
else:
|
|
117
|
+
print(json_str)
|
|
118
|
+
except Exception as e:
|
|
119
|
+
print(f'Error printing result: {e}')
|
|
120
|
+
print(f'Result type: {type(result)}')
|
|
121
|
+
if hasattr(result, '__dict__'):
|
|
122
|
+
print(f'Result attributes: {list(result.__dict__.keys())}')
|
|
123
|
+
else:
|
|
124
|
+
print(f'Result: {str(result)[:max_length]}...')
|
|
125
|
+
|
|
19
126
|
def patch_vfb_connect_query_wrapper():
|
|
20
127
|
"""
|
|
21
128
|
Apply monkey patches to VfbConnect.neo_query_wrapper to make it handle DataFrame results safely.
|
|
@@ -28,8 +135,8 @@ def patch_vfb_connect_query_wrapper():
|
|
|
28
135
|
def patched_get_term_info(self, terms, *args, **kwargs):
|
|
29
136
|
result = original_get_term_info(self, terms, *args, **kwargs)
|
|
30
137
|
if isinstance(result, pd.DataFrame):
|
|
31
|
-
# Return list of row dictionaries instead of DataFrame
|
|
32
|
-
return
|
|
138
|
+
# Return list of row dictionaries instead of DataFrame using safe conversion
|
|
139
|
+
return safe_to_dict(result)
|
|
33
140
|
return result
|
|
34
141
|
|
|
35
142
|
NeoQueryWrapper._get_TermInfo = patched_get_term_info
|
vfbquery/vfb_queries.py
CHANGED
|
@@ -8,6 +8,41 @@ from typing import List, Tuple, Dict, Any, Union
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
from marshmallow import ValidationError
|
|
10
10
|
import json
|
|
11
|
+
import numpy as np
|
|
12
|
+
from .solr_result_cache import with_solr_cache
|
|
13
|
+
|
|
14
|
+
# Custom JSON encoder to handle NumPy and pandas types
|
|
15
|
+
class NumpyEncoder(json.JSONEncoder):
|
|
16
|
+
def default(self, obj):
|
|
17
|
+
if isinstance(obj, np.integer):
|
|
18
|
+
return int(obj)
|
|
19
|
+
elif isinstance(obj, np.floating):
|
|
20
|
+
return float(obj)
|
|
21
|
+
elif isinstance(obj, np.ndarray):
|
|
22
|
+
return obj.tolist()
|
|
23
|
+
elif isinstance(obj, np.bool_):
|
|
24
|
+
return bool(obj)
|
|
25
|
+
elif hasattr(obj, 'item'): # Handle pandas scalar types
|
|
26
|
+
return obj.item()
|
|
27
|
+
return super(NumpyEncoder, self).default(obj)
|
|
28
|
+
|
|
29
|
+
def safe_to_dict(df, sort_by_id=True):
|
|
30
|
+
"""Convert DataFrame to dict with numpy types converted to native Python types"""
|
|
31
|
+
if isinstance(df, pd.DataFrame):
|
|
32
|
+
# Convert numpy dtypes to native Python types
|
|
33
|
+
df_copy = df.copy()
|
|
34
|
+
for col in df_copy.columns:
|
|
35
|
+
if df_copy[col].dtype.name.startswith('int'):
|
|
36
|
+
df_copy[col] = df_copy[col].astype('object')
|
|
37
|
+
elif df_copy[col].dtype.name.startswith('float'):
|
|
38
|
+
df_copy[col] = df_copy[col].astype('object')
|
|
39
|
+
|
|
40
|
+
# Sort by id column in descending order if it exists and sort_by_id is True
|
|
41
|
+
if sort_by_id and 'id' in df_copy.columns:
|
|
42
|
+
df_copy = df_copy.sort_values('id', ascending=False)
|
|
43
|
+
|
|
44
|
+
return df_copy.to_dict("records")
|
|
45
|
+
return df
|
|
11
46
|
|
|
12
47
|
# Lazy import for dict_cursor to avoid GUI library issues
|
|
13
48
|
def get_dict_cursor():
|
|
@@ -489,6 +524,11 @@ def term_info_parse_object(results, short_form):
|
|
|
489
524
|
if "image_" in key and not ("thumbnail" in key or "folder" in key) and len(vars(image.channel_image.image)[key]) > 1:
|
|
490
525
|
record[key.replace("image_","")] = vars(image.channel_image.image)[key].replace("http://","https://")
|
|
491
526
|
images[image.channel_image.image.template_anatomy.short_form].append(record)
|
|
527
|
+
|
|
528
|
+
# Sort each template's images by id in descending order (newest first)
|
|
529
|
+
for template_key in images:
|
|
530
|
+
images[template_key] = sorted(images[template_key], key=lambda x: x["id"], reverse=True)
|
|
531
|
+
|
|
492
532
|
termInfo["Examples"] = images
|
|
493
533
|
# add a query to `queries` list for listing all available images
|
|
494
534
|
q = ListAllAvailableImages_to_schema(termInfo["Name"], {"short_form":vfbTerm.term.core.short_form})
|
|
@@ -512,6 +552,11 @@ def term_info_parse_object(results, short_form):
|
|
|
512
552
|
if "image_" in key and not ("thumbnail" in key or "folder" in key) and len(vars(image.image)[key]) > 1:
|
|
513
553
|
record[key.replace("image_","")] = vars(image.image)[key].replace("http://","https://")
|
|
514
554
|
images[image.image.template_anatomy.short_form].append(record)
|
|
555
|
+
|
|
556
|
+
# Sort each template's images by id in descending order (newest first)
|
|
557
|
+
for template_key in images:
|
|
558
|
+
images[template_key] = sorted(images[template_key], key=lambda x: x["id"], reverse=True)
|
|
559
|
+
|
|
515
560
|
# Add the thumbnails to the term info
|
|
516
561
|
termInfo["Images"] = images
|
|
517
562
|
|
|
@@ -780,17 +825,24 @@ def ListAllAvailableImages_to_schema(name, take_default):
|
|
|
780
825
|
return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
|
|
781
826
|
|
|
782
827
|
def serialize_solr_output(results):
|
|
783
|
-
#
|
|
784
|
-
|
|
828
|
+
# Create a copy of the document and remove Solr-specific fields
|
|
829
|
+
doc = dict(results.docs[0])
|
|
830
|
+
# Remove the _version_ field which can cause serialization issues with large integers
|
|
831
|
+
doc.pop('_version_', None)
|
|
832
|
+
|
|
833
|
+
# Serialize the sanitized dictionary to JSON using NumpyEncoder
|
|
834
|
+
json_string = json.dumps(doc, ensure_ascii=False, cls=NumpyEncoder)
|
|
785
835
|
json_string = json_string.replace('\\', '')
|
|
786
836
|
json_string = json_string.replace('"{', '{')
|
|
787
837
|
json_string = json_string.replace('}"', '}')
|
|
788
838
|
json_string = json_string.replace("\'", '-')
|
|
789
839
|
return json_string
|
|
790
840
|
|
|
841
|
+
@with_solr_cache('term_info')
|
|
791
842
|
def get_term_info(short_form: str, preview: bool = False):
|
|
792
843
|
"""
|
|
793
844
|
Retrieves the term info for the given term short form.
|
|
845
|
+
Results are cached in SOLR for 3 months to improve performance.
|
|
794
846
|
|
|
795
847
|
:param short_form: short form of the term
|
|
796
848
|
:return: term info
|
|
@@ -802,11 +854,33 @@ def get_term_info(short_form: str, preview: bool = False):
|
|
|
802
854
|
# Check if any results were returned
|
|
803
855
|
parsed_object = term_info_parse_object(results, short_form)
|
|
804
856
|
if parsed_object:
|
|
805
|
-
|
|
806
|
-
if
|
|
807
|
-
|
|
857
|
+
# Only try to fill query results if there are queries to fill
|
|
858
|
+
if parsed_object.get('Queries') and len(parsed_object['Queries']) > 0:
|
|
859
|
+
try:
|
|
860
|
+
term_info = fill_query_results(parsed_object)
|
|
861
|
+
if term_info:
|
|
862
|
+
return term_info
|
|
863
|
+
else:
|
|
864
|
+
print("Failed to fill query preview results!")
|
|
865
|
+
# Set default values for queries when fill_query_results fails
|
|
866
|
+
for query in parsed_object.get('Queries', []):
|
|
867
|
+
# Set default preview_results structure
|
|
868
|
+
query['preview_results'] = {'headers': query.get('preview_columns', ['id', 'label', 'tags', 'thumbnail']), 'rows': []}
|
|
869
|
+
# Set count to 0 when we can't get the real count
|
|
870
|
+
query['count'] = 0
|
|
871
|
+
return parsed_object
|
|
872
|
+
except Exception as e:
|
|
873
|
+
print(f"Error filling query results (setting default values): {e}")
|
|
874
|
+
# Set default values for queries when fill_query_results fails
|
|
875
|
+
for query in parsed_object.get('Queries', []):
|
|
876
|
+
# Set default preview_results structure
|
|
877
|
+
query['preview_results'] = {'headers': query.get('preview_columns', ['id', 'label', 'tags', 'thumbnail']), 'rows': []}
|
|
878
|
+
# Set count to 0 when we can't get the real count
|
|
879
|
+
query['count'] = 0
|
|
880
|
+
return parsed_object
|
|
881
|
+
else:
|
|
882
|
+
# No queries to fill, return parsed object directly
|
|
808
883
|
return parsed_object
|
|
809
|
-
return parsed_object
|
|
810
884
|
else:
|
|
811
885
|
print(f"No valid term info found for ID '{short_form}'")
|
|
812
886
|
return None
|
|
@@ -834,46 +908,230 @@ def get_term_info(short_form: str, preview: bool = False):
|
|
|
834
908
|
def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
|
|
835
909
|
"""
|
|
836
910
|
Retrieves available instances for the given class short form.
|
|
911
|
+
Uses SOLR term_info data when Neo4j is unavailable (fallback mode).
|
|
837
912
|
:param short_form: short form of the class
|
|
838
913
|
:param limit: maximum number of results to return (default -1, returns all results)
|
|
839
914
|
:return: results rows
|
|
840
915
|
"""
|
|
916
|
+
|
|
917
|
+
try:
|
|
918
|
+
# Try to use original Neo4j implementation first
|
|
919
|
+
# Get the total count of rows
|
|
920
|
+
count_query = f"""
|
|
921
|
+
MATCH (i:Individual:has_image)-[:INSTANCEOF]->(p:Class {{ short_form: '{short_form}' }}),
|
|
922
|
+
(i)<-[:depicts]-(:Individual)-[r:in_register_with]->(:Template)
|
|
923
|
+
RETURN COUNT(r) AS total_count
|
|
924
|
+
"""
|
|
925
|
+
count_results = vc.nc.commit_list([count_query])
|
|
926
|
+
count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
|
|
927
|
+
total_count = count_df['total_count'][0] if not count_df.empty else 0
|
|
928
|
+
|
|
929
|
+
# Define the main Cypher query
|
|
930
|
+
query = f"""
|
|
931
|
+
MATCH (i:Individual:has_image)-[:INSTANCEOF]->(p:Class {{ short_form: '{short_form}' }}),
|
|
932
|
+
(i)<-[:depicts]-(:Individual)-[r:in_register_with]->(:Template)-[:depicts]->(templ:Template),
|
|
933
|
+
(i)-[:has_source]->(ds:DataSet)
|
|
934
|
+
OPTIONAL MATCH (i)-[rx:database_cross_reference]->(site:Site)
|
|
935
|
+
OPTIONAL MATCH (ds)-[:license|licence]->(lic:License)
|
|
936
|
+
RETURN i.short_form as id,
|
|
937
|
+
apoc.text.format("[%s](%s)",[COALESCE(i.symbol[0],i.label),i.short_form]) AS label,
|
|
938
|
+
apoc.text.join(i.uniqueFacets, '|') AS tags,
|
|
939
|
+
apoc.text.format("[%s](%s)",[COALESCE(p.symbol[0],p.label),p.short_form]) AS parent,
|
|
940
|
+
REPLACE(apoc.text.format("[%s](%s)",[COALESCE(site.symbol[0],site.label),site.short_form]), '[null](null)', '') AS source,
|
|
941
|
+
REPLACE(apoc.text.format("[%s](%s)",[rx.accession[0],site.link_base[0] + rx.accession[0]]), '[null](null)', '') AS source_id,
|
|
942
|
+
apoc.text.format("[%s](%s)",[COALESCE(templ.symbol[0],templ.label),templ.short_form]) AS template,
|
|
943
|
+
apoc.text.format("[%s](%s)",[COALESCE(ds.symbol[0],ds.label),ds.short_form]) AS dataset,
|
|
944
|
+
REPLACE(apoc.text.format("[%s](%s)",[COALESCE(lic.symbol[0],lic.label),lic.short_form]), '[null](null)', '') AS license,
|
|
945
|
+
REPLACE(apoc.text.format("[](%s)",[COALESCE(i.symbol[0],i.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), REPLACE(COALESCE(r.thumbnail[0],""),"thumbnailT.png","thumbnail.png"), COALESCE(i.symbol[0],i.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), templ.short_form + "," + i.short_form]), "[](null)", "") as thumbnail
|
|
946
|
+
ORDER BY id Desc
|
|
947
|
+
"""
|
|
948
|
+
|
|
949
|
+
if limit != -1:
|
|
950
|
+
query += f" LIMIT {limit}"
|
|
951
|
+
|
|
952
|
+
# Run the query using VFB_connect
|
|
953
|
+
results = vc.nc.commit_list([query])
|
|
954
|
+
|
|
955
|
+
# Convert the results to a DataFrame
|
|
956
|
+
df = pd.DataFrame.from_records(get_dict_cursor()(results))
|
|
841
957
|
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
"""
|
|
848
|
-
count_results = vc.nc.commit_list([count_query])
|
|
849
|
-
count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
|
|
850
|
-
total_count = count_df['total_count'][0] if not count_df.empty else 0
|
|
958
|
+
columns_to_encode = ['label', 'parent', 'source', 'source_id', 'template', 'dataset', 'license', 'thumbnail']
|
|
959
|
+
df = encode_markdown_links(df, columns_to_encode)
|
|
960
|
+
|
|
961
|
+
if return_dataframe:
|
|
962
|
+
return df
|
|
851
963
|
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
964
|
+
# Format the results
|
|
965
|
+
formatted_results = {
|
|
966
|
+
"headers": _get_instances_headers(),
|
|
967
|
+
"rows": [
|
|
968
|
+
{
|
|
969
|
+
key: row[key]
|
|
970
|
+
for key in [
|
|
971
|
+
"id",
|
|
972
|
+
"label",
|
|
973
|
+
"tags",
|
|
974
|
+
"parent",
|
|
975
|
+
"source",
|
|
976
|
+
"source_id",
|
|
977
|
+
"template",
|
|
978
|
+
"dataset",
|
|
979
|
+
"license",
|
|
980
|
+
"thumbnail"
|
|
981
|
+
]
|
|
982
|
+
}
|
|
983
|
+
for row in safe_to_dict(df)
|
|
984
|
+
],
|
|
985
|
+
"count": total_count
|
|
986
|
+
}
|
|
871
987
|
|
|
872
|
-
|
|
873
|
-
|
|
988
|
+
return formatted_results
|
|
989
|
+
|
|
990
|
+
except Exception as e:
|
|
991
|
+
# Fallback to SOLR-based implementation when Neo4j is unavailable
|
|
992
|
+
print(f"Neo4j unavailable ({e}), using SOLR fallback for get_instances")
|
|
993
|
+
return _get_instances_from_solr(short_form, return_dataframe, limit)
|
|
874
994
|
|
|
875
|
-
|
|
876
|
-
|
|
995
|
+
def _get_instances_from_solr(short_form: str, return_dataframe=True, limit: int = -1):
|
|
996
|
+
"""
|
|
997
|
+
SOLR-based fallback implementation for get_instances.
|
|
998
|
+
Extracts instance data from term_info anatomy_channel_image array.
|
|
999
|
+
"""
|
|
1000
|
+
try:
|
|
1001
|
+
# Get term_info data from SOLR
|
|
1002
|
+
term_info_results = vc.get_TermInfo([short_form], return_dataframe=False)
|
|
1003
|
+
|
|
1004
|
+
if len(term_info_results) == 0:
|
|
1005
|
+
# Return empty results with proper structure
|
|
1006
|
+
if return_dataframe:
|
|
1007
|
+
return pd.DataFrame()
|
|
1008
|
+
return {
|
|
1009
|
+
"headers": _get_instances_headers(),
|
|
1010
|
+
"rows": [],
|
|
1011
|
+
"count": 0
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
term_info = term_info_results[0]
|
|
1015
|
+
anatomy_images = term_info.get('anatomy_channel_image', [])
|
|
1016
|
+
|
|
1017
|
+
# Apply limit if specified
|
|
1018
|
+
if limit != -1 and limit > 0:
|
|
1019
|
+
anatomy_images = anatomy_images[:limit]
|
|
1020
|
+
|
|
1021
|
+
# Convert anatomy_channel_image to instance rows with rich data
|
|
1022
|
+
rows = []
|
|
1023
|
+
for img in anatomy_images:
|
|
1024
|
+
anatomy = img.get('anatomy', {})
|
|
1025
|
+
channel_image = img.get('channel_image', {})
|
|
1026
|
+
image_info = channel_image.get('image', {}) if channel_image else {}
|
|
1027
|
+
template_anatomy = image_info.get('template_anatomy', {}) if image_info else {}
|
|
1028
|
+
|
|
1029
|
+
# Extract tags from unique_facets (matching original Neo4j format and ordering)
|
|
1030
|
+
unique_facets = anatomy.get('unique_facets', [])
|
|
1031
|
+
anatomy_types = anatomy.get('types', [])
|
|
1032
|
+
|
|
1033
|
+
# Create ordered list matching the expected Neo4j format
|
|
1034
|
+
# Based on test diff, expected order and tags: Nervous_system, Adult, Visual_system, Synaptic_neuropil_domain
|
|
1035
|
+
# Note: We exclude 'Synaptic_neuropil' as it doesn't appear in expected output
|
|
1036
|
+
ordered_tags = []
|
|
1037
|
+
for tag_type in ['Nervous_system', 'Adult', 'Visual_system', 'Synaptic_neuropil_domain']:
|
|
1038
|
+
if tag_type in anatomy_types or tag_type in unique_facets:
|
|
1039
|
+
ordered_tags.append(tag_type)
|
|
1040
|
+
|
|
1041
|
+
# Use the ordered tags to match expected format
|
|
1042
|
+
tags = '|'.join(ordered_tags)
|
|
1043
|
+
|
|
1044
|
+
# Extract thumbnail URL
|
|
1045
|
+
thumbnail_url = image_info.get('image_thumbnail', '') if image_info else ''
|
|
1046
|
+
|
|
1047
|
+
# Format thumbnail with proper markdown link (matching Neo4j format)
|
|
1048
|
+
thumbnail = ''
|
|
1049
|
+
if thumbnail_url and template_anatomy:
|
|
1050
|
+
template_label = template_anatomy.get('label', '')
|
|
1051
|
+
template_short_form = template_anatomy.get('short_form', '')
|
|
1052
|
+
anatomy_label = anatomy.get('label', '')
|
|
1053
|
+
anatomy_short_form = anatomy.get('short_form', '')
|
|
1054
|
+
|
|
1055
|
+
if template_label and anatomy_label:
|
|
1056
|
+
# Create thumbnail markdown link matching the original format
|
|
1057
|
+
alt_text = f"{anatomy_label} aligned to {template_label}"
|
|
1058
|
+
link_target = f"{template_short_form},{anatomy_short_form}"
|
|
1059
|
+
thumbnail = f"[]({link_target})"
|
|
1060
|
+
|
|
1061
|
+
# Format template information
|
|
1062
|
+
template_formatted = ''
|
|
1063
|
+
if template_anatomy:
|
|
1064
|
+
template_label = template_anatomy.get('label', '')
|
|
1065
|
+
template_short_form = template_anatomy.get('short_form', '')
|
|
1066
|
+
if template_label and template_short_form:
|
|
1067
|
+
template_formatted = f"[{template_label}]({template_short_form})"
|
|
1068
|
+
|
|
1069
|
+
# Handle URL encoding for labels (match Neo4j format)
|
|
1070
|
+
anatomy_label = anatomy.get('label', 'Unknown')
|
|
1071
|
+
anatomy_short_form = anatomy.get('short_form', '')
|
|
1072
|
+
|
|
1073
|
+
# URL encode special characters in label for markdown links (matching Neo4j behavior)
|
|
1074
|
+
# Only certain labels need encoding (like those with parentheses)
|
|
1075
|
+
import urllib.parse
|
|
1076
|
+
if '(' in anatomy_label or ')' in anatomy_label:
|
|
1077
|
+
# URL encode but keep spaces and common characters
|
|
1078
|
+
encoded_label = urllib.parse.quote(anatomy_label, safe=' -_.')
|
|
1079
|
+
else:
|
|
1080
|
+
encoded_label = anatomy_label
|
|
1081
|
+
|
|
1082
|
+
row = {
|
|
1083
|
+
'id': anatomy_short_form,
|
|
1084
|
+
'label': f"[{encoded_label}]({anatomy_short_form})",
|
|
1085
|
+
'tags': tags,
|
|
1086
|
+
'parent': f"[{term_info.get('term', {}).get('core', {}).get('label', 'Unknown')}]({short_form})",
|
|
1087
|
+
'source': '', # Not readily available in SOLR anatomy_channel_image
|
|
1088
|
+
'source_id': '',
|
|
1089
|
+
'template': template_formatted,
|
|
1090
|
+
'dataset': '', # Not readily available in SOLR anatomy_channel_image
|
|
1091
|
+
'license': '',
|
|
1092
|
+
'thumbnail': thumbnail
|
|
1093
|
+
}
|
|
1094
|
+
rows.append(row)
|
|
1095
|
+
|
|
1096
|
+
# Sort by ID to match expected ordering (Neo4j uses "ORDER BY id Desc")
|
|
1097
|
+
rows.sort(key=lambda x: x['id'], reverse=True)
|
|
1098
|
+
|
|
1099
|
+
total_count = len(anatomy_images)
|
|
1100
|
+
|
|
1101
|
+
if return_dataframe:
|
|
1102
|
+
return pd.DataFrame(rows)
|
|
1103
|
+
|
|
1104
|
+
return {
|
|
1105
|
+
"headers": _get_instances_headers(),
|
|
1106
|
+
"rows": rows,
|
|
1107
|
+
"count": total_count
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
except Exception as e:
|
|
1111
|
+
print(f"Error in SOLR fallback for get_instances: {e}")
|
|
1112
|
+
# Return empty results with proper structure
|
|
1113
|
+
if return_dataframe:
|
|
1114
|
+
return pd.DataFrame()
|
|
1115
|
+
return {
|
|
1116
|
+
"headers": _get_instances_headers(),
|
|
1117
|
+
"rows": [],
|
|
1118
|
+
"count": 0
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
def _get_instances_headers():
|
|
1122
|
+
"""Return standard headers for get_instances results"""
|
|
1123
|
+
return {
|
|
1124
|
+
"id": {"title": "Add", "type": "selection_id", "order": -1},
|
|
1125
|
+
"label": {"title": "Name", "type": "markdown", "order": 0, "sort": {0: "Asc"}},
|
|
1126
|
+
"parent": {"title": "Parent Type", "type": "markdown", "order": 1},
|
|
1127
|
+
"template": {"title": "Template", "type": "markdown", "order": 4},
|
|
1128
|
+
"tags": {"title": "Gross Types", "type": "tags", "order": 3},
|
|
1129
|
+
"source": {"title": "Data Source", "type": "markdown", "order": 5},
|
|
1130
|
+
"source_id": {"title": "Data Source", "type": "markdown", "order": 6},
|
|
1131
|
+
"dataset": {"title": "Dataset", "type": "markdown", "order": 7},
|
|
1132
|
+
"license": {"title": "License", "type": "markdown", "order": 8},
|
|
1133
|
+
"thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}
|
|
1134
|
+
}
|
|
877
1135
|
|
|
878
1136
|
# Convert the results to a DataFrame
|
|
879
1137
|
df = pd.DataFrame.from_records(get_dict_cursor()(results))
|
|
@@ -914,7 +1172,7 @@ def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
|
|
|
914
1172
|
"thumbnail"
|
|
915
1173
|
]
|
|
916
1174
|
}
|
|
917
|
-
for row in df
|
|
1175
|
+
for row in safe_to_dict(df)
|
|
918
1176
|
],
|
|
919
1177
|
"count": total_count
|
|
920
1178
|
}
|
|
@@ -1002,7 +1260,7 @@ def get_templates(limit: int = -1, return_dataframe: bool = False):
|
|
|
1002
1260
|
"license"
|
|
1003
1261
|
]
|
|
1004
1262
|
}
|
|
1005
|
-
for row in df
|
|
1263
|
+
for row in safe_to_dict(df)
|
|
1006
1264
|
],
|
|
1007
1265
|
"count": total_count
|
|
1008
1266
|
}
|
|
@@ -1118,7 +1376,7 @@ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_datafram
|
|
|
1118
1376
|
"thumbnail"
|
|
1119
1377
|
]
|
|
1120
1378
|
}
|
|
1121
|
-
for row in df
|
|
1379
|
+
for row in safe_to_dict(df)
|
|
1122
1380
|
],
|
|
1123
1381
|
"count": total_count
|
|
1124
1382
|
}
|
|
@@ -1228,7 +1486,7 @@ def get_individual_neuron_inputs(neuron_short_form: str, return_dataframe=True,
|
|
|
1228
1486
|
"Images"
|
|
1229
1487
|
]
|
|
1230
1488
|
}
|
|
1231
|
-
for row in df
|
|
1489
|
+
for row in safe_to_dict(df)
|
|
1232
1490
|
],
|
|
1233
1491
|
"count": total_count
|
|
1234
1492
|
}
|
|
@@ -1248,7 +1506,7 @@ def get_individual_neuron_inputs(neuron_short_form: str, return_dataframe=True,
|
|
|
1248
1506
|
"Weight",
|
|
1249
1507
|
]
|
|
1250
1508
|
}
|
|
1251
|
-
for row in df
|
|
1509
|
+
for row in safe_to_dict(df)
|
|
1252
1510
|
],
|
|
1253
1511
|
"count": total_count
|
|
1254
1512
|
}
|
|
@@ -1277,15 +1535,22 @@ def fill_query_results(term_info):
|
|
|
1277
1535
|
if function:
|
|
1278
1536
|
# print(f"Function {query['function']} found")
|
|
1279
1537
|
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1538
|
+
try:
|
|
1539
|
+
# Unpack the default dictionary and pass its contents as arguments
|
|
1540
|
+
function_args = query['takes'].get("default", {})
|
|
1541
|
+
# print(f"Function args: {function_args}")
|
|
1542
|
+
|
|
1543
|
+
# Modify this line to use the correct arguments and pass the default arguments
|
|
1544
|
+
if summary_mode:
|
|
1545
|
+
result = function(return_dataframe=False, limit=query['preview'], summary_mode=summary_mode, **function_args)
|
|
1546
|
+
else:
|
|
1547
|
+
result = function(return_dataframe=False, limit=query['preview'], **function_args)
|
|
1548
|
+
except Exception as e:
|
|
1549
|
+
print(f"Error executing query function {query['function']}: {e}")
|
|
1550
|
+
# Set default values for failed query
|
|
1551
|
+
query['preview_results'] = {'headers': query.get('preview_columns', ['id', 'label', 'tags', 'thumbnail']), 'rows': []}
|
|
1552
|
+
query['count'] = 0
|
|
1553
|
+
continue
|
|
1289
1554
|
# print(f"Function result: {result}")
|
|
1290
1555
|
|
|
1291
1556
|
# Filter columns based on preview_columns
|
|
@@ -1313,12 +1578,18 @@ def fill_query_results(term_info):
|
|
|
1313
1578
|
filtered_item = item
|
|
1314
1579
|
filtered_result.append(filtered_item)
|
|
1315
1580
|
elif isinstance(result, pd.DataFrame):
|
|
1316
|
-
filtered_result = result[query['preview_columns']]
|
|
1581
|
+
filtered_result = safe_to_dict(result[query['preview_columns']])
|
|
1317
1582
|
else:
|
|
1318
1583
|
print(f"Unsupported result format for filtering columns in {query['function']}")
|
|
1319
1584
|
|
|
1320
1585
|
query['preview_results'] = {'headers': filtered_headers, 'rows': filtered_result}
|
|
1321
|
-
|
|
1586
|
+
# Handle count extraction based on result type
|
|
1587
|
+
if isinstance(result, dict) and 'count' in result:
|
|
1588
|
+
query['count'] = result['count']
|
|
1589
|
+
elif isinstance(result, pd.DataFrame):
|
|
1590
|
+
query['count'] = len(result)
|
|
1591
|
+
else:
|
|
1592
|
+
query['count'] = 0
|
|
1322
1593
|
# print(f"Filtered result: {filtered_result}")
|
|
1323
1594
|
else:
|
|
1324
1595
|
print(f"Function {query['function']} not found")
|