vfbquery 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/readme_parser.py +35 -1
- test/term_info_queries_test.py +11 -11
- test/test_dataset_template_queries.py +138 -0
- test/test_default_caching.py +15 -11
- test/test_expression_overlaps.py +183 -0
- test/test_expression_pattern_fragments.py +123 -0
- test/test_images_neurons.py +152 -0
- test/test_images_that_develop_from.py +112 -0
- test/test_lineage_clones_in.py +190 -0
- test/test_nblast_queries.py +124 -0
- test/test_neuron_classes_fasciculating.py +187 -0
- test/test_neuron_inputs.py +193 -0
- test/test_neuron_neuron_connectivity.py +89 -0
- test/test_neuron_region_connectivity.py +117 -0
- test/test_neurons_part_here.py +204 -0
- test/test_new_owlery_queries.py +282 -0
- test/test_publication_transgene_queries.py +101 -0
- test/test_query_performance.py +743 -0
- test/test_similar_morphology.py +177 -0
- test/test_tracts_nerves_innervating.py +188 -0
- test/test_transcriptomics.py +223 -0
- vfbquery/__init__.py +22 -1
- vfbquery/neo4j_client.py +120 -0
- vfbquery/owlery_client.py +463 -0
- vfbquery/solr_fetcher.py +1 -1
- vfbquery/solr_result_cache.py +238 -53
- vfbquery/vfb_queries.py +2969 -638
- {vfbquery-0.4.0.dist-info → vfbquery-0.5.0.dist-info}/METADATA +1023 -65
- vfbquery-0.5.0.dist-info/RECORD +39 -0
- vfbquery-0.4.0.dist-info/RECORD +0 -19
- {vfbquery-0.4.0.dist-info → vfbquery-0.5.0.dist-info}/LICENSE +0 -0
- {vfbquery-0.4.0.dist-info → vfbquery-0.5.0.dist-info}/WHEEL +0 -0
- {vfbquery-0.4.0.dist-info → vfbquery-0.5.0.dist-info}/top_level.txt +0 -0
vfbquery/neo4j_client.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lightweight Neo4j REST client.
|
|
3
|
+
|
|
4
|
+
This module provides a minimal Neo4j client extracted from vfb_connect
|
|
5
|
+
to avoid loading heavy GUI dependencies (navis, vispy, matplotlib, etc.)
|
|
6
|
+
that come with the full vfb_connect package.
|
|
7
|
+
|
|
8
|
+
Based on vfb_connect.neo.neo4j_tools.Neo4jConnect
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import requests
|
|
12
|
+
import json
|
|
13
|
+
import time
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def dict_cursor(results):
|
|
17
|
+
"""
|
|
18
|
+
Takes JSON results from a neo4j query and turns them into a list of dicts.
|
|
19
|
+
|
|
20
|
+
:param results: neo4j query results
|
|
21
|
+
:return: list of dicts
|
|
22
|
+
"""
|
|
23
|
+
dc = []
|
|
24
|
+
for n in results:
|
|
25
|
+
# Add conditional to skip any failures
|
|
26
|
+
if n:
|
|
27
|
+
for d in n['data']:
|
|
28
|
+
dc.append(dict(zip(n['columns'], d['row'])))
|
|
29
|
+
return dc
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Neo4jConnect:
|
|
33
|
+
"""
|
|
34
|
+
Thin layer over Neo4j REST API to handle connections and queries.
|
|
35
|
+
|
|
36
|
+
:param endpoint: Neo4j REST endpoint (default: VFB production server)
|
|
37
|
+
:param usr: username for authentication
|
|
38
|
+
:param pwd: password for authentication
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self,
|
|
42
|
+
endpoint: str = "http://pdb.virtualflybrain.org",
|
|
43
|
+
usr: str = "neo4j",
|
|
44
|
+
pwd: str = "vfb"):
|
|
45
|
+
self.base_uri = endpoint
|
|
46
|
+
self.usr = usr
|
|
47
|
+
self.pwd = pwd
|
|
48
|
+
self.commit = "/db/neo4j/tx/commit"
|
|
49
|
+
self.headers = {'Content-type': 'application/json'}
|
|
50
|
+
|
|
51
|
+
# Test connection and fall back to v3 API if needed
|
|
52
|
+
if not self.test_connection():
|
|
53
|
+
print("Falling back to Neo4j v3 connection")
|
|
54
|
+
self.commit = "/db/data/transaction/commit"
|
|
55
|
+
self.headers = {}
|
|
56
|
+
if not self.test_connection():
|
|
57
|
+
raise Exception("Failed to connect to Neo4j.")
|
|
58
|
+
|
|
59
|
+
def commit_list(self, statements, return_graphs=False):
|
|
60
|
+
"""
|
|
61
|
+
Commit a list of Cypher statements to Neo4j via REST API.
|
|
62
|
+
|
|
63
|
+
:param statements: A list of Cypher statements
|
|
64
|
+
:param return_graphs: If True, returns graphs under 'graph' key
|
|
65
|
+
:return: List of results or False if errors encountered
|
|
66
|
+
"""
|
|
67
|
+
cstatements = []
|
|
68
|
+
if return_graphs:
|
|
69
|
+
for s in statements:
|
|
70
|
+
cstatements.append({'statement': s, "resultDataContents": ["row", "graph"]})
|
|
71
|
+
else:
|
|
72
|
+
for s in statements:
|
|
73
|
+
cstatements.append({'statement': s})
|
|
74
|
+
|
|
75
|
+
payload = {'statements': cstatements}
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
response = requests.post(
|
|
79
|
+
url=f"{self.base_uri}{self.commit}",
|
|
80
|
+
auth=(self.usr, self.pwd),
|
|
81
|
+
data=json.dumps(payload),
|
|
82
|
+
headers=self.headers
|
|
83
|
+
)
|
|
84
|
+
except requests.exceptions.RequestException as e:
|
|
85
|
+
print(f"\033[31mConnection Error:\033[0m {e}")
|
|
86
|
+
print("Retrying in 10 seconds...")
|
|
87
|
+
time.sleep(10)
|
|
88
|
+
return self.commit_list(statements)
|
|
89
|
+
|
|
90
|
+
if self.rest_return_check(response):
|
|
91
|
+
return response.json()['results']
|
|
92
|
+
else:
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
def rest_return_check(self, response):
|
|
96
|
+
"""
|
|
97
|
+
Check status response and report errors.
|
|
98
|
+
|
|
99
|
+
:param response: requests.Response object
|
|
100
|
+
:return: True if OK and no errors, False otherwise
|
|
101
|
+
"""
|
|
102
|
+
if response.status_code != 200:
|
|
103
|
+
print(f"\033[31mConnection Error:\033[0m {response.status_code} ({response.reason})")
|
|
104
|
+
return False
|
|
105
|
+
else:
|
|
106
|
+
j = response.json()
|
|
107
|
+
if j['errors']:
|
|
108
|
+
for e in j['errors']:
|
|
109
|
+
print(f"\033[31mQuery Error:\033[0m {e}")
|
|
110
|
+
return False
|
|
111
|
+
else:
|
|
112
|
+
return True
|
|
113
|
+
|
|
114
|
+
def test_connection(self):
|
|
115
|
+
"""Test neo4j endpoint connection"""
|
|
116
|
+
statements = ["MATCH (n) RETURN n LIMIT 1"]
|
|
117
|
+
if self.commit_list(statements):
|
|
118
|
+
return True
|
|
119
|
+
else:
|
|
120
|
+
return False
|
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Simple Owlery REST API client to replace VFBConnect dependency.
|
|
3
|
+
|
|
4
|
+
This module provides direct HTTP access to the Owlery OWL reasoning service,
|
|
5
|
+
eliminating the need for vfb_connect which has problematic GUI dependencies.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
import json
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import re
|
|
12
|
+
from urllib.parse import quote
|
|
13
|
+
from typing import List, Optional, Dict, Any, Union
|
|
14
|
+
import concurrent.futures
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def short_form_to_iri(short_form: str) -> str:
|
|
18
|
+
"""
|
|
19
|
+
Convert a short form (e.g., 'FBbt_00003748', 'VFBexp_FBtp0022557') to full IRI.
|
|
20
|
+
|
|
21
|
+
Handles common ID prefixes:
|
|
22
|
+
- VFB* -> http://virtualflybrain.org/reports/
|
|
23
|
+
- FB* -> http://purl.obolibrary.org/obo/
|
|
24
|
+
- Other -> http://purl.obolibrary.org/obo/ (default)
|
|
25
|
+
|
|
26
|
+
:param short_form: Short form like 'FBbt_00003748' or 'VFBexp_FBtp0022557'
|
|
27
|
+
:return: Full IRI
|
|
28
|
+
"""
|
|
29
|
+
# VFB IDs use virtualflybrain.org/reports
|
|
30
|
+
if short_form.startswith('VFB'):
|
|
31
|
+
return f"http://virtualflybrain.org/reports/{short_form}"
|
|
32
|
+
|
|
33
|
+
# FB* IDs (FlyBase) use purl.obolibrary.org/obo
|
|
34
|
+
if short_form.startswith('FB'):
|
|
35
|
+
return f"http://purl.obolibrary.org/obo/{short_form}"
|
|
36
|
+
|
|
37
|
+
# Default to OBO for other IDs
|
|
38
|
+
return f"http://purl.obolibrary.org/obo/{short_form}"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def gen_short_form(iri: str) -> str:
|
|
42
|
+
"""
|
|
43
|
+
Generate short_form from an IRI string (VFBConnect compatible).
|
|
44
|
+
Splits by '/' or '#' and takes the last part.
|
|
45
|
+
|
|
46
|
+
:param iri: An IRI string
|
|
47
|
+
:return: short_form
|
|
48
|
+
"""
|
|
49
|
+
return re.split('/|#', iri)[-1]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class OwleryClient:
|
|
53
|
+
"""
|
|
54
|
+
Simple client for Owlery OWL reasoning service.
|
|
55
|
+
|
|
56
|
+
Provides minimal interface matching VFBConnect's OWLeryConnect functionality
|
|
57
|
+
for subclass queries needed by VFBquery.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self, owlery_endpoint: str = "http://owl.virtualflybrain.org/kbs/vfb"):
|
|
61
|
+
"""
|
|
62
|
+
Initialize Owlery client.
|
|
63
|
+
|
|
64
|
+
:param owlery_endpoint: Base URL for Owlery service (default: VFB public instance)
|
|
65
|
+
"""
|
|
66
|
+
self.owlery_endpoint = owlery_endpoint.rstrip('/')
|
|
67
|
+
|
|
68
|
+
def get_subclasses(self, query: str, query_by_label: bool = True,
|
|
69
|
+
verbose: bool = False, direct: bool = False) -> List[str]:
|
|
70
|
+
"""
|
|
71
|
+
Query Owlery for subclasses matching an OWL class expression.
|
|
72
|
+
|
|
73
|
+
This replicates the VFBConnect OWLeryConnect.get_subclasses() method.
|
|
74
|
+
Based on: https://github.com/VirtualFlyBrain/VFB_connect/blob/master/src/vfb_connect/owl/owlery_query_tools.py
|
|
75
|
+
|
|
76
|
+
:param query: OWL class expression query string (with short forms like '<FBbt_00003748>')
|
|
77
|
+
:param query_by_label: If True, query uses label syntax (quotes).
|
|
78
|
+
If False, uses IRI syntax (angle brackets).
|
|
79
|
+
:param verbose: If True, print debug information
|
|
80
|
+
:param direct: Return direct subclasses only. Default False.
|
|
81
|
+
:return: List of class IDs (short forms like 'FBbt_00003748')
|
|
82
|
+
"""
|
|
83
|
+
try:
|
|
84
|
+
# Convert short forms in query to full IRIs
|
|
85
|
+
# Pattern: <FBbt_00003748> -> <http://purl.obolibrary.org/obo/FBbt_00003748>
|
|
86
|
+
# Match angle brackets with content that looks like a short form (alphanumeric + underscore)
|
|
87
|
+
import re
|
|
88
|
+
def convert_short_form_to_iri(match):
|
|
89
|
+
short_form = match.group(1) # Extract content between < >
|
|
90
|
+
# Only convert if it looks like a short form (contains underscore, no slashes)
|
|
91
|
+
if '_' in short_form and '/' not in short_form:
|
|
92
|
+
return f"<{short_form_to_iri(short_form)}>"
|
|
93
|
+
else:
|
|
94
|
+
# Already an IRI or other syntax, leave as-is
|
|
95
|
+
return match.group(0)
|
|
96
|
+
|
|
97
|
+
# Replace all <SHORT_FORM> patterns with <FULL_IRI>
|
|
98
|
+
iri_query = re.sub(r'<([^>]+)>', convert_short_form_to_iri, query)
|
|
99
|
+
|
|
100
|
+
if verbose:
|
|
101
|
+
print(f"Original query: {query}")
|
|
102
|
+
print(f"IRI query: {iri_query}")
|
|
103
|
+
|
|
104
|
+
# Build Owlery subclasses endpoint URL
|
|
105
|
+
# Based on VFBConnect's query() method
|
|
106
|
+
params = {
|
|
107
|
+
'object': iri_query,
|
|
108
|
+
'direct': 'false', # Always use indirect (transitive) queries
|
|
109
|
+
'includeDeprecated': 'false', # Exclude deprecated terms
|
|
110
|
+
'includeEquivalent': 'true' # Include equivalent classes
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# Make HTTP GET request with longer timeout for complex queries (40 minutes for OWL reasoning)
|
|
114
|
+
# Add retry logic for connection resets (common with long-running queries)
|
|
115
|
+
from requests.adapters import HTTPAdapter
|
|
116
|
+
from urllib3.util.retry import Retry
|
|
117
|
+
|
|
118
|
+
session = requests.Session()
|
|
119
|
+
retry_strategy = Retry(
|
|
120
|
+
total=3, # Total number of retries
|
|
121
|
+
backoff_factor=2, # Wait 2s, 4s, 8s between retries
|
|
122
|
+
status_forcelist=[500, 502, 503, 504], # Retry on server errors
|
|
123
|
+
allowed_methods=["GET"] # Only retry GET requests
|
|
124
|
+
)
|
|
125
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
126
|
+
session.mount("http://", adapter)
|
|
127
|
+
session.mount("https://", adapter)
|
|
128
|
+
|
|
129
|
+
response = session.get(
|
|
130
|
+
f"{self.owlery_endpoint}/subclasses",
|
|
131
|
+
params=params,
|
|
132
|
+
timeout=2400
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
if verbose:
|
|
136
|
+
print(f"Owlery query: {response.url}")
|
|
137
|
+
|
|
138
|
+
response.raise_for_status()
|
|
139
|
+
|
|
140
|
+
# Parse JSON response
|
|
141
|
+
# Owlery returns: {"superClassOf": ["IRI1", "IRI2", ...]}
|
|
142
|
+
# Based on VFBConnect: return_type='superClassOf' for subclasses
|
|
143
|
+
data = response.json()
|
|
144
|
+
|
|
145
|
+
if verbose:
|
|
146
|
+
print(f"Response keys: {data.keys() if isinstance(data, dict) else 'not a dict'}")
|
|
147
|
+
|
|
148
|
+
# Extract IRIs from response using VFBConnect's key
|
|
149
|
+
iris = []
|
|
150
|
+
if isinstance(data, dict) and 'superClassOf' in data:
|
|
151
|
+
iris = data['superClassOf']
|
|
152
|
+
elif isinstance(data, list):
|
|
153
|
+
# Fallback: simple list response
|
|
154
|
+
iris = data
|
|
155
|
+
else:
|
|
156
|
+
if verbose:
|
|
157
|
+
print(f"Unexpected Owlery response format: {type(data)}")
|
|
158
|
+
print(f"Response: {data}")
|
|
159
|
+
return []
|
|
160
|
+
|
|
161
|
+
if not isinstance(iris, list):
|
|
162
|
+
if verbose:
|
|
163
|
+
print(f"Warning: No results! This is likely due to a query error")
|
|
164
|
+
print(f"Query: {query}")
|
|
165
|
+
return []
|
|
166
|
+
|
|
167
|
+
# Convert IRIs to short forms using gen_short_form logic from VFBConnect
|
|
168
|
+
# gen_short_form splits by '/' or '#' and takes the last part
|
|
169
|
+
import re
|
|
170
|
+
def gen_short_form(iri):
|
|
171
|
+
"""Generate short_form from an IRI string (VFBConnect compatible)"""
|
|
172
|
+
return re.split('/|#', iri)[-1]
|
|
173
|
+
|
|
174
|
+
short_forms = list(map(gen_short_form, iris))
|
|
175
|
+
|
|
176
|
+
if verbose:
|
|
177
|
+
print(f"Found {len(short_forms)} subclasses")
|
|
178
|
+
|
|
179
|
+
return short_forms
|
|
180
|
+
|
|
181
|
+
except requests.RequestException as e:
|
|
182
|
+
print(f"ERROR: Owlery request failed: {e}")
|
|
183
|
+
raise
|
|
184
|
+
except Exception as e:
|
|
185
|
+
print(f"ERROR: Unexpected error in Owlery query: {e}")
|
|
186
|
+
raise
|
|
187
|
+
|
|
188
|
+
def get_instances(self, query: str, query_by_label: bool = True,
|
|
189
|
+
verbose: bool = False, direct: bool = False) -> List[str]:
|
|
190
|
+
"""
|
|
191
|
+
Query Owlery for instances matching an OWL class expression.
|
|
192
|
+
|
|
193
|
+
Similar to get_subclasses but returns individuals/instances instead of classes.
|
|
194
|
+
Used for queries like ImagesNeurons that need individual images rather than classes.
|
|
195
|
+
|
|
196
|
+
:param query: OWL class expression query string (with short forms like '<FBbt_00003748>')
|
|
197
|
+
:param query_by_label: If True, query uses label syntax (quotes).
|
|
198
|
+
If False, uses IRI syntax (angle brackets).
|
|
199
|
+
:param verbose: If True, print debug information
|
|
200
|
+
:param direct: Return direct instances only. Default False.
|
|
201
|
+
:return: List of instance IDs (short forms like 'VFB_00101567')
|
|
202
|
+
"""
|
|
203
|
+
try:
|
|
204
|
+
# Convert short forms in query to full IRIs
|
|
205
|
+
import re
|
|
206
|
+
def convert_short_form_to_iri(match):
|
|
207
|
+
short_form = match.group(1)
|
|
208
|
+
if '_' in short_form and '/' not in short_form:
|
|
209
|
+
return f"<{short_form_to_iri(short_form)}>"
|
|
210
|
+
else:
|
|
211
|
+
return match.group(0)
|
|
212
|
+
|
|
213
|
+
iri_query = re.sub(r'<([^>]+)>', convert_short_form_to_iri, query)
|
|
214
|
+
|
|
215
|
+
if verbose:
|
|
216
|
+
print(f"Original query: {query}")
|
|
217
|
+
print(f"IRI query: {iri_query}")
|
|
218
|
+
|
|
219
|
+
# Build Owlery instances endpoint URL
|
|
220
|
+
params = {
|
|
221
|
+
'object': iri_query,
|
|
222
|
+
'direct': 'true' if direct else 'false',
|
|
223
|
+
'includeDeprecated': 'false'
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
# Build full URL for debugging
|
|
227
|
+
full_url = f"{self.owlery_endpoint}/instances"
|
|
228
|
+
prepared_request = requests.Request('GET', full_url, params=params).prepare()
|
|
229
|
+
|
|
230
|
+
if verbose:
|
|
231
|
+
print(f"Owlery instances URL: {prepared_request.url}")
|
|
232
|
+
|
|
233
|
+
# Make HTTP GET request to instances endpoint (40 minutes for OWL reasoning)
|
|
234
|
+
# Add retry logic for connection resets (common with long-running queries)
|
|
235
|
+
from requests.adapters import HTTPAdapter
|
|
236
|
+
from urllib3.util.retry import Retry
|
|
237
|
+
|
|
238
|
+
session = requests.Session()
|
|
239
|
+
retry_strategy = Retry(
|
|
240
|
+
total=3, # Total number of retries
|
|
241
|
+
backoff_factor=2, # Wait 2s, 4s, 8s between retries
|
|
242
|
+
status_forcelist=[500, 502, 503, 504], # Retry on server errors
|
|
243
|
+
allowed_methods=["GET"] # Only retry GET requests
|
|
244
|
+
)
|
|
245
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
246
|
+
session.mount("http://", adapter)
|
|
247
|
+
session.mount("https://", adapter)
|
|
248
|
+
|
|
249
|
+
response = session.get(
|
|
250
|
+
f"{self.owlery_endpoint}/instances",
|
|
251
|
+
params=params,
|
|
252
|
+
timeout=2400
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
response.raise_for_status()
|
|
256
|
+
|
|
257
|
+
# Parse JSON response
|
|
258
|
+
# KEY DIFFERENCE: Owlery returns {"hasInstance": ["IRI1", "IRI2", ...]} for instances
|
|
259
|
+
# whereas subclasses returns {"superClassOf": [...]}
|
|
260
|
+
data = response.json()
|
|
261
|
+
|
|
262
|
+
if verbose:
|
|
263
|
+
print(f"Response keys: {data.keys() if isinstance(data, dict) else 'not a dict'}")
|
|
264
|
+
|
|
265
|
+
# Extract IRIs from response using correct key
|
|
266
|
+
iris = []
|
|
267
|
+
if isinstance(data, dict) and 'hasInstance' in data:
|
|
268
|
+
iris = data['hasInstance']
|
|
269
|
+
elif isinstance(data, list):
|
|
270
|
+
iris = data
|
|
271
|
+
else:
|
|
272
|
+
if verbose:
|
|
273
|
+
print(f"Unexpected Owlery response format: {type(data)}")
|
|
274
|
+
print(f"Response: {data}")
|
|
275
|
+
return []
|
|
276
|
+
|
|
277
|
+
if not isinstance(iris, list):
|
|
278
|
+
if verbose:
|
|
279
|
+
print(f"Warning: No results! This is likely due to a query error")
|
|
280
|
+
print(f"Query: {query}")
|
|
281
|
+
return []
|
|
282
|
+
|
|
283
|
+
# Convert IRIs to short forms
|
|
284
|
+
def gen_short_form(iri):
|
|
285
|
+
return re.split('/|#', iri)[-1]
|
|
286
|
+
|
|
287
|
+
short_forms = list(map(gen_short_form, iris))
|
|
288
|
+
|
|
289
|
+
if verbose:
|
|
290
|
+
print(f"Found {len(short_forms)} instances")
|
|
291
|
+
if short_forms:
|
|
292
|
+
print(f"Sample instances: {short_forms[:5]}")
|
|
293
|
+
|
|
294
|
+
return short_forms
|
|
295
|
+
|
|
296
|
+
except requests.RequestException as e:
|
|
297
|
+
# Show the full URL that was attempted
|
|
298
|
+
try:
|
|
299
|
+
full_url = f"{self.owlery_endpoint}/instances"
|
|
300
|
+
prepared_request = requests.Request('GET', full_url, params=params).prepare()
|
|
301
|
+
print(f"ERROR: Owlery instances request failed: {e}")
|
|
302
|
+
print(f" Test URL: {prepared_request.url}")
|
|
303
|
+
except:
|
|
304
|
+
print(f"ERROR: Owlery instances request failed: {e}")
|
|
305
|
+
raise
|
|
306
|
+
except Exception as e:
|
|
307
|
+
print(f"ERROR: Unexpected error in Owlery instances query: {e}")
|
|
308
|
+
raise
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class MockNeo4jClient:
|
|
312
|
+
"""
|
|
313
|
+
Mock Neo4j client that raises NotImplementedError for all queries.
|
|
314
|
+
Used when Neo4j is not available or connection fails.
|
|
315
|
+
"""
|
|
316
|
+
def commit_list(self, statements):
|
|
317
|
+
raise NotImplementedError(
|
|
318
|
+
"Neo4j queries are not available. "
|
|
319
|
+
"Either Neo4j server is unavailable or connection failed."
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class SimpleVFBConnect:
|
|
324
|
+
"""
|
|
325
|
+
Minimal replacement for VFBConnect that works in headless environments.
|
|
326
|
+
|
|
327
|
+
Provides:
|
|
328
|
+
- Owlery client (vc.vfb.oc) for OWL reasoning queries
|
|
329
|
+
- Neo4j client (vc.nc) - tries real Neo4j first, falls back to mock
|
|
330
|
+
- SOLR term info fetcher (vc.get_TermInfo) for term metadata
|
|
331
|
+
|
|
332
|
+
This eliminates the need for vfb_connect which requires GUI libraries
|
|
333
|
+
(vispy, Quartz.framework on macOS) that aren't available in all dev environments.
|
|
334
|
+
"""
|
|
335
|
+
|
|
336
|
+
def __init__(self, solr_url: str = "https://solr.virtualflybrain.org/solr/vfb_json"):
|
|
337
|
+
"""
|
|
338
|
+
Initialize simple VFB connection with Owlery and SOLR access.
|
|
339
|
+
Attempts to use real Neo4j if available, falls back to mock otherwise.
|
|
340
|
+
|
|
341
|
+
:param solr_url: Base URL for SOLR server (default: VFB public instance)
|
|
342
|
+
"""
|
|
343
|
+
self._vfb = None
|
|
344
|
+
self._nc = None
|
|
345
|
+
self._nc_available = None # Cache whether Neo4j is available
|
|
346
|
+
self.solr_url = solr_url
|
|
347
|
+
|
|
348
|
+
@property
|
|
349
|
+
def vfb(self):
|
|
350
|
+
"""Get VFB object with Owlery client."""
|
|
351
|
+
if self._vfb is None:
|
|
352
|
+
# Create simple object with oc (Owlery client) property
|
|
353
|
+
class VFBObject:
|
|
354
|
+
def __init__(self):
|
|
355
|
+
self.oc = OwleryClient()
|
|
356
|
+
self._vfb = VFBObject()
|
|
357
|
+
return self._vfb
|
|
358
|
+
|
|
359
|
+
@property
|
|
360
|
+
def nc(self):
|
|
361
|
+
"""
|
|
362
|
+
Get Neo4j client - tries real Neo4j first, falls back to mock.
|
|
363
|
+
|
|
364
|
+
Attempts to connect to Neo4j using our lightweight client.
|
|
365
|
+
If unavailable (server down, network issues), returns mock client.
|
|
366
|
+
"""
|
|
367
|
+
if self._nc is None:
|
|
368
|
+
# Try to connect to real Neo4j
|
|
369
|
+
if self._nc_available is None:
|
|
370
|
+
try:
|
|
371
|
+
from .neo4j_client import Neo4jConnect
|
|
372
|
+
# Try to initialize - this will fail if Neo4j server unreachable
|
|
373
|
+
self._nc = Neo4jConnect()
|
|
374
|
+
self._nc_available = True
|
|
375
|
+
print("✅ Neo4j connection established")
|
|
376
|
+
except Exception as e:
|
|
377
|
+
# Fall back to mock client
|
|
378
|
+
self._nc = MockNeo4jClient()
|
|
379
|
+
self._nc_available = False
|
|
380
|
+
print(f"ℹ️ Neo4j unavailable ({type(e).__name__}), using Owlery-only mode")
|
|
381
|
+
elif self._nc_available:
|
|
382
|
+
from .neo4j_client import Neo4jConnect
|
|
383
|
+
self._nc = Neo4jConnect()
|
|
384
|
+
else:
|
|
385
|
+
self._nc = MockNeo4jClient()
|
|
386
|
+
return self._nc
|
|
387
|
+
|
|
388
|
+
def get_TermInfo(self, short_forms: List[str],
|
|
389
|
+
return_dataframe: bool = False,
|
|
390
|
+
summary: bool = False) -> Union[List[Dict[str, Any]], pd.DataFrame]:
|
|
391
|
+
"""
|
|
392
|
+
Fetch term info from SOLR directly.
|
|
393
|
+
|
|
394
|
+
This replicates VFBConnect's get_TermInfo method using direct SOLR queries.
|
|
395
|
+
|
|
396
|
+
:param short_forms: List of term IDs to fetch (e.g., ['FBbt_00003748'])
|
|
397
|
+
:param return_dataframe: If True, return as pandas DataFrame
|
|
398
|
+
:param summary: If True, return summarized version (currently ignored)
|
|
399
|
+
:return: List of term info dictionaries or DataFrame
|
|
400
|
+
"""
|
|
401
|
+
# Fetch term info entries in parallel to speed up multiple short_form requests.
|
|
402
|
+
# We preserve input order in the returned list.
|
|
403
|
+
results_map = {}
|
|
404
|
+
|
|
405
|
+
def fetch(short_form: str):
|
|
406
|
+
try:
|
|
407
|
+
url = f"{self.solr_url}/select"
|
|
408
|
+
params = {
|
|
409
|
+
"indent": "true",
|
|
410
|
+
"fl": "term_info",
|
|
411
|
+
"q.op": "OR",
|
|
412
|
+
"q": f"id:{short_form}"
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
response = requests.get(url, params=params, timeout=30)
|
|
416
|
+
response.raise_for_status()
|
|
417
|
+
|
|
418
|
+
data = response.json()
|
|
419
|
+
docs = data.get("response", {}).get("docs", [])
|
|
420
|
+
|
|
421
|
+
if not docs:
|
|
422
|
+
# no result for this id
|
|
423
|
+
return None
|
|
424
|
+
|
|
425
|
+
if "term_info" not in docs[0] or not docs[0]["term_info"]:
|
|
426
|
+
return None
|
|
427
|
+
|
|
428
|
+
term_info_str = docs[0]["term_info"][0]
|
|
429
|
+
term_info_obj = json.loads(term_info_str)
|
|
430
|
+
return term_info_obj
|
|
431
|
+
|
|
432
|
+
except requests.RequestException as e:
|
|
433
|
+
print(f"ERROR: Error fetching data from SOLR for {short_form}: {e}")
|
|
434
|
+
except json.JSONDecodeError as e:
|
|
435
|
+
print(f"ERROR: Error decoding JSON for {short_form}: {e}")
|
|
436
|
+
except Exception as e:
|
|
437
|
+
print(f"ERROR: Unexpected error for {short_form}: {e}")
|
|
438
|
+
return None
|
|
439
|
+
|
|
440
|
+
max_workers = min(10, max(1, len(short_forms)))
|
|
441
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as exc:
|
|
442
|
+
# map preserves order of input keys in Python 3.9+ when using as_completed we reassemble
|
|
443
|
+
future_to_sf = {exc.submit(fetch, sf): sf for sf in short_forms}
|
|
444
|
+
for fut in concurrent.futures.as_completed(future_to_sf):
|
|
445
|
+
sf = future_to_sf[fut]
|
|
446
|
+
try:
|
|
447
|
+
res = fut.result()
|
|
448
|
+
results_map[sf] = res
|
|
449
|
+
except Exception as e:
|
|
450
|
+
print(f"ERROR: Exception while fetching {sf}: {e}")
|
|
451
|
+
|
|
452
|
+
# Build results list in the same order as short_forms input, skipping None results
|
|
453
|
+
results = [results_map[sf] for sf in short_forms if sf in results_map and results_map[sf] is not None]
|
|
454
|
+
|
|
455
|
+
# Convert to DataFrame if requested
|
|
456
|
+
if return_dataframe and results:
|
|
457
|
+
try:
|
|
458
|
+
return pd.json_normalize(results)
|
|
459
|
+
except Exception as e:
|
|
460
|
+
print(f"ERROR: Error converting to DataFrame: {e}")
|
|
461
|
+
return results
|
|
462
|
+
|
|
463
|
+
return results
|
vfbquery/solr_fetcher.py
CHANGED