vfbquery 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ """
2
+ Lightweight Neo4j REST client.
3
+
4
+ This module provides a minimal Neo4j client extracted from vfb_connect
5
+ to avoid loading heavy GUI dependencies (navis, vispy, matplotlib, etc.)
6
+ that come with the full vfb_connect package.
7
+
8
+ Based on vfb_connect.neo.neo4j_tools.Neo4jConnect
9
+ """
10
+
11
+ import requests
12
+ import json
13
+ import time
14
+
15
+
16
+ def dict_cursor(results):
17
+ """
18
+ Takes JSON results from a neo4j query and turns them into a list of dicts.
19
+
20
+ :param results: neo4j query results
21
+ :return: list of dicts
22
+ """
23
+ dc = []
24
+ for n in results:
25
+ # Add conditional to skip any failures
26
+ if n:
27
+ for d in n['data']:
28
+ dc.append(dict(zip(n['columns'], d['row'])))
29
+ return dc
30
+
31
+
32
+ class Neo4jConnect:
33
+ """
34
+ Thin layer over Neo4j REST API to handle connections and queries.
35
+
36
+ :param endpoint: Neo4j REST endpoint (default: VFB production server)
37
+ :param usr: username for authentication
38
+ :param pwd: password for authentication
39
+ """
40
+
41
+ def __init__(self,
42
+ endpoint: str = "http://pdb.virtualflybrain.org",
43
+ usr: str = "neo4j",
44
+ pwd: str = "vfb"):
45
+ self.base_uri = endpoint
46
+ self.usr = usr
47
+ self.pwd = pwd
48
+ self.commit = "/db/neo4j/tx/commit"
49
+ self.headers = {'Content-type': 'application/json'}
50
+
51
+ # Test connection and fall back to v3 API if needed
52
+ if not self.test_connection():
53
+ print("Falling back to Neo4j v3 connection")
54
+ self.commit = "/db/data/transaction/commit"
55
+ self.headers = {}
56
+ if not self.test_connection():
57
+ raise Exception("Failed to connect to Neo4j.")
58
+
59
+ def commit_list(self, statements, return_graphs=False):
60
+ """
61
+ Commit a list of Cypher statements to Neo4j via REST API.
62
+
63
+ :param statements: A list of Cypher statements
64
+ :param return_graphs: If True, returns graphs under 'graph' key
65
+ :return: List of results or False if errors encountered
66
+ """
67
+ cstatements = []
68
+ if return_graphs:
69
+ for s in statements:
70
+ cstatements.append({'statement': s, "resultDataContents": ["row", "graph"]})
71
+ else:
72
+ for s in statements:
73
+ cstatements.append({'statement': s})
74
+
75
+ payload = {'statements': cstatements}
76
+
77
+ try:
78
+ response = requests.post(
79
+ url=f"{self.base_uri}{self.commit}",
80
+ auth=(self.usr, self.pwd),
81
+ data=json.dumps(payload),
82
+ headers=self.headers
83
+ )
84
+ except requests.exceptions.RequestException as e:
85
+ print(f"\033[31mConnection Error:\033[0m {e}")
86
+ print("Retrying in 10 seconds...")
87
+ time.sleep(10)
88
+ return self.commit_list(statements)
89
+
90
+ if self.rest_return_check(response):
91
+ return response.json()['results']
92
+ else:
93
+ return False
94
+
95
+ def rest_return_check(self, response):
96
+ """
97
+ Check status response and report errors.
98
+
99
+ :param response: requests.Response object
100
+ :return: True if OK and no errors, False otherwise
101
+ """
102
+ if response.status_code != 200:
103
+ print(f"\033[31mConnection Error:\033[0m {response.status_code} ({response.reason})")
104
+ return False
105
+ else:
106
+ j = response.json()
107
+ if j['errors']:
108
+ for e in j['errors']:
109
+ print(f"\033[31mQuery Error:\033[0m {e}")
110
+ return False
111
+ else:
112
+ return True
113
+
114
+ def test_connection(self):
115
+ """Test neo4j endpoint connection"""
116
+ statements = ["MATCH (n) RETURN n LIMIT 1"]
117
+ if self.commit_list(statements):
118
+ return True
119
+ else:
120
+ return False
@@ -0,0 +1,463 @@
1
+ """
2
+ Simple Owlery REST API client to replace VFBConnect dependency.
3
+
4
+ This module provides direct HTTP access to the Owlery OWL reasoning service,
5
+ eliminating the need for vfb_connect which has problematic GUI dependencies.
6
+ """
7
+
8
+ import requests
9
+ import json
10
+ import pandas as pd
11
+ import re
12
+ from urllib.parse import quote
13
+ from typing import List, Optional, Dict, Any, Union
14
+ import concurrent.futures
15
+
16
+
17
+ def short_form_to_iri(short_form: str) -> str:
18
+ """
19
+ Convert a short form (e.g., 'FBbt_00003748', 'VFBexp_FBtp0022557') to full IRI.
20
+
21
+ Handles common ID prefixes:
22
+ - VFB* -> http://virtualflybrain.org/reports/
23
+ - FB* -> http://purl.obolibrary.org/obo/
24
+ - Other -> http://purl.obolibrary.org/obo/ (default)
25
+
26
+ :param short_form: Short form like 'FBbt_00003748' or 'VFBexp_FBtp0022557'
27
+ :return: Full IRI
28
+ """
29
+ # VFB IDs use virtualflybrain.org/reports
30
+ if short_form.startswith('VFB'):
31
+ return f"http://virtualflybrain.org/reports/{short_form}"
32
+
33
+ # FB* IDs (FlyBase) use purl.obolibrary.org/obo
34
+ if short_form.startswith('FB'):
35
+ return f"http://purl.obolibrary.org/obo/{short_form}"
36
+
37
+ # Default to OBO for other IDs
38
+ return f"http://purl.obolibrary.org/obo/{short_form}"
39
+
40
+
41
+ def gen_short_form(iri: str) -> str:
42
+ """
43
+ Generate short_form from an IRI string (VFBConnect compatible).
44
+ Splits by '/' or '#' and takes the last part.
45
+
46
+ :param iri: An IRI string
47
+ :return: short_form
48
+ """
49
+ return re.split('/|#', iri)[-1]
50
+
51
+
52
+ class OwleryClient:
53
+ """
54
+ Simple client for Owlery OWL reasoning service.
55
+
56
+ Provides minimal interface matching VFBConnect's OWLeryConnect functionality
57
+ for subclass queries needed by VFBquery.
58
+ """
59
+
60
+ def __init__(self, owlery_endpoint: str = "http://owl.virtualflybrain.org/kbs/vfb"):
61
+ """
62
+ Initialize Owlery client.
63
+
64
+ :param owlery_endpoint: Base URL for Owlery service (default: VFB public instance)
65
+ """
66
+ self.owlery_endpoint = owlery_endpoint.rstrip('/')
67
+
68
+ def get_subclasses(self, query: str, query_by_label: bool = True,
69
+ verbose: bool = False, direct: bool = False) -> List[str]:
70
+ """
71
+ Query Owlery for subclasses matching an OWL class expression.
72
+
73
+ This replicates the VFBConnect OWLeryConnect.get_subclasses() method.
74
+ Based on: https://github.com/VirtualFlyBrain/VFB_connect/blob/master/src/vfb_connect/owl/owlery_query_tools.py
75
+
76
+ :param query: OWL class expression query string (with short forms like '<FBbt_00003748>')
77
+ :param query_by_label: If True, query uses label syntax (quotes).
78
+ If False, uses IRI syntax (angle brackets).
79
+ :param verbose: If True, print debug information
80
+ :param direct: Return direct subclasses only. Default False.
81
+ :return: List of class IDs (short forms like 'FBbt_00003748')
82
+ """
83
+ try:
84
+ # Convert short forms in query to full IRIs
85
+ # Pattern: <FBbt_00003748> -> <http://purl.obolibrary.org/obo/FBbt_00003748>
86
+ # Match angle brackets with content that looks like a short form (alphanumeric + underscore)
87
+ import re
88
+ def convert_short_form_to_iri(match):
89
+ short_form = match.group(1) # Extract content between < >
90
+ # Only convert if it looks like a short form (contains underscore, no slashes)
91
+ if '_' in short_form and '/' not in short_form:
92
+ return f"<{short_form_to_iri(short_form)}>"
93
+ else:
94
+ # Already an IRI or other syntax, leave as-is
95
+ return match.group(0)
96
+
97
+ # Replace all <SHORT_FORM> patterns with <FULL_IRI>
98
+ iri_query = re.sub(r'<([^>]+)>', convert_short_form_to_iri, query)
99
+
100
+ if verbose:
101
+ print(f"Original query: {query}")
102
+ print(f"IRI query: {iri_query}")
103
+
104
+ # Build Owlery subclasses endpoint URL
105
+ # Based on VFBConnect's query() method
106
+ params = {
107
+ 'object': iri_query,
108
+ 'direct': 'false', # Always use indirect (transitive) queries
109
+ 'includeDeprecated': 'false', # Exclude deprecated terms
110
+ 'includeEquivalent': 'true' # Include equivalent classes
111
+ }
112
+
113
+ # Make HTTP GET request with longer timeout for complex queries (40 minutes for OWL reasoning)
114
+ # Add retry logic for connection resets (common with long-running queries)
115
+ from requests.adapters import HTTPAdapter
116
+ from urllib3.util.retry import Retry
117
+
118
+ session = requests.Session()
119
+ retry_strategy = Retry(
120
+ total=3, # Total number of retries
121
+ backoff_factor=2, # Wait 2s, 4s, 8s between retries
122
+ status_forcelist=[500, 502, 503, 504], # Retry on server errors
123
+ allowed_methods=["GET"] # Only retry GET requests
124
+ )
125
+ adapter = HTTPAdapter(max_retries=retry_strategy)
126
+ session.mount("http://", adapter)
127
+ session.mount("https://", adapter)
128
+
129
+ response = session.get(
130
+ f"{self.owlery_endpoint}/subclasses",
131
+ params=params,
132
+ timeout=2400
133
+ )
134
+
135
+ if verbose:
136
+ print(f"Owlery query: {response.url}")
137
+
138
+ response.raise_for_status()
139
+
140
+ # Parse JSON response
141
+ # Owlery returns: {"superClassOf": ["IRI1", "IRI2", ...]}
142
+ # Based on VFBConnect: return_type='superClassOf' for subclasses
143
+ data = response.json()
144
+
145
+ if verbose:
146
+ print(f"Response keys: {data.keys() if isinstance(data, dict) else 'not a dict'}")
147
+
148
+ # Extract IRIs from response using VFBConnect's key
149
+ iris = []
150
+ if isinstance(data, dict) and 'superClassOf' in data:
151
+ iris = data['superClassOf']
152
+ elif isinstance(data, list):
153
+ # Fallback: simple list response
154
+ iris = data
155
+ else:
156
+ if verbose:
157
+ print(f"Unexpected Owlery response format: {type(data)}")
158
+ print(f"Response: {data}")
159
+ return []
160
+
161
+ if not isinstance(iris, list):
162
+ if verbose:
163
+ print(f"Warning: No results! This is likely due to a query error")
164
+ print(f"Query: {query}")
165
+ return []
166
+
167
+ # Convert IRIs to short forms using gen_short_form logic from VFBConnect
168
+ # gen_short_form splits by '/' or '#' and takes the last part
169
+ import re
170
+ def gen_short_form(iri):
171
+ """Generate short_form from an IRI string (VFBConnect compatible)"""
172
+ return re.split('/|#', iri)[-1]
173
+
174
+ short_forms = list(map(gen_short_form, iris))
175
+
176
+ if verbose:
177
+ print(f"Found {len(short_forms)} subclasses")
178
+
179
+ return short_forms
180
+
181
+ except requests.RequestException as e:
182
+ print(f"ERROR: Owlery request failed: {e}")
183
+ raise
184
+ except Exception as e:
185
+ print(f"ERROR: Unexpected error in Owlery query: {e}")
186
+ raise
187
+
188
+ def get_instances(self, query: str, query_by_label: bool = True,
189
+ verbose: bool = False, direct: bool = False) -> List[str]:
190
+ """
191
+ Query Owlery for instances matching an OWL class expression.
192
+
193
+ Similar to get_subclasses but returns individuals/instances instead of classes.
194
+ Used for queries like ImagesNeurons that need individual images rather than classes.
195
+
196
+ :param query: OWL class expression query string (with short forms like '<FBbt_00003748>')
197
+ :param query_by_label: If True, query uses label syntax (quotes).
198
+ If False, uses IRI syntax (angle brackets).
199
+ :param verbose: If True, print debug information
200
+ :param direct: Return direct instances only. Default False.
201
+ :return: List of instance IDs (short forms like 'VFB_00101567')
202
+ """
203
+ try:
204
+ # Convert short forms in query to full IRIs
205
+ import re
206
+ def convert_short_form_to_iri(match):
207
+ short_form = match.group(1)
208
+ if '_' in short_form and '/' not in short_form:
209
+ return f"<{short_form_to_iri(short_form)}>"
210
+ else:
211
+ return match.group(0)
212
+
213
+ iri_query = re.sub(r'<([^>]+)>', convert_short_form_to_iri, query)
214
+
215
+ if verbose:
216
+ print(f"Original query: {query}")
217
+ print(f"IRI query: {iri_query}")
218
+
219
+ # Build Owlery instances endpoint URL
220
+ params = {
221
+ 'object': iri_query,
222
+ 'direct': 'true' if direct else 'false',
223
+ 'includeDeprecated': 'false'
224
+ }
225
+
226
+ # Build full URL for debugging
227
+ full_url = f"{self.owlery_endpoint}/instances"
228
+ prepared_request = requests.Request('GET', full_url, params=params).prepare()
229
+
230
+ if verbose:
231
+ print(f"Owlery instances URL: {prepared_request.url}")
232
+
233
+ # Make HTTP GET request to instances endpoint (40 minutes for OWL reasoning)
234
+ # Add retry logic for connection resets (common with long-running queries)
235
+ from requests.adapters import HTTPAdapter
236
+ from urllib3.util.retry import Retry
237
+
238
+ session = requests.Session()
239
+ retry_strategy = Retry(
240
+ total=3, # Total number of retries
241
+ backoff_factor=2, # Wait 2s, 4s, 8s between retries
242
+ status_forcelist=[500, 502, 503, 504], # Retry on server errors
243
+ allowed_methods=["GET"] # Only retry GET requests
244
+ )
245
+ adapter = HTTPAdapter(max_retries=retry_strategy)
246
+ session.mount("http://", adapter)
247
+ session.mount("https://", adapter)
248
+
249
+ response = session.get(
250
+ f"{self.owlery_endpoint}/instances",
251
+ params=params,
252
+ timeout=2400
253
+ )
254
+
255
+ response.raise_for_status()
256
+
257
+ # Parse JSON response
258
+ # KEY DIFFERENCE: Owlery returns {"hasInstance": ["IRI1", "IRI2", ...]} for instances
259
+ # whereas subclasses returns {"superClassOf": [...]}
260
+ data = response.json()
261
+
262
+ if verbose:
263
+ print(f"Response keys: {data.keys() if isinstance(data, dict) else 'not a dict'}")
264
+
265
+ # Extract IRIs from response using correct key
266
+ iris = []
267
+ if isinstance(data, dict) and 'hasInstance' in data:
268
+ iris = data['hasInstance']
269
+ elif isinstance(data, list):
270
+ iris = data
271
+ else:
272
+ if verbose:
273
+ print(f"Unexpected Owlery response format: {type(data)}")
274
+ print(f"Response: {data}")
275
+ return []
276
+
277
+ if not isinstance(iris, list):
278
+ if verbose:
279
+ print(f"Warning: No results! This is likely due to a query error")
280
+ print(f"Query: {query}")
281
+ return []
282
+
283
+ # Convert IRIs to short forms
284
+ def gen_short_form(iri):
285
+ return re.split('/|#', iri)[-1]
286
+
287
+ short_forms = list(map(gen_short_form, iris))
288
+
289
+ if verbose:
290
+ print(f"Found {len(short_forms)} instances")
291
+ if short_forms:
292
+ print(f"Sample instances: {short_forms[:5]}")
293
+
294
+ return short_forms
295
+
296
+ except requests.RequestException as e:
297
+ # Show the full URL that was attempted
298
+ try:
299
+ full_url = f"{self.owlery_endpoint}/instances"
300
+ prepared_request = requests.Request('GET', full_url, params=params).prepare()
301
+ print(f"ERROR: Owlery instances request failed: {e}")
302
+ print(f" Test URL: {prepared_request.url}")
303
+ except:
304
+ print(f"ERROR: Owlery instances request failed: {e}")
305
+ raise
306
+ except Exception as e:
307
+ print(f"ERROR: Unexpected error in Owlery instances query: {e}")
308
+ raise
309
+
310
+
311
+ class MockNeo4jClient:
312
+ """
313
+ Mock Neo4j client that raises NotImplementedError for all queries.
314
+ Used when Neo4j is not available or connection fails.
315
+ """
316
+ def commit_list(self, statements):
317
+ raise NotImplementedError(
318
+ "Neo4j queries are not available. "
319
+ "Either Neo4j server is unavailable or connection failed."
320
+ )
321
+
322
+
323
+ class SimpleVFBConnect:
324
+ """
325
+ Minimal replacement for VFBConnect that works in headless environments.
326
+
327
+ Provides:
328
+ - Owlery client (vc.vfb.oc) for OWL reasoning queries
329
+ - Neo4j client (vc.nc) - tries real Neo4j first, falls back to mock
330
+ - SOLR term info fetcher (vc.get_TermInfo) for term metadata
331
+
332
+ This eliminates the need for vfb_connect which requires GUI libraries
333
+ (vispy, Quartz.framework on macOS) that aren't available in all dev environments.
334
+ """
335
+
336
+ def __init__(self, solr_url: str = "https://solr.virtualflybrain.org/solr/vfb_json"):
337
+ """
338
+ Initialize simple VFB connection with Owlery and SOLR access.
339
+ Attempts to use real Neo4j if available, falls back to mock otherwise.
340
+
341
+ :param solr_url: Base URL for SOLR server (default: VFB public instance)
342
+ """
343
+ self._vfb = None
344
+ self._nc = None
345
+ self._nc_available = None # Cache whether Neo4j is available
346
+ self.solr_url = solr_url
347
+
348
+ @property
349
+ def vfb(self):
350
+ """Get VFB object with Owlery client."""
351
+ if self._vfb is None:
352
+ # Create simple object with oc (Owlery client) property
353
+ class VFBObject:
354
+ def __init__(self):
355
+ self.oc = OwleryClient()
356
+ self._vfb = VFBObject()
357
+ return self._vfb
358
+
359
+ @property
360
+ def nc(self):
361
+ """
362
+ Get Neo4j client - tries real Neo4j first, falls back to mock.
363
+
364
+ Attempts to connect to Neo4j using our lightweight client.
365
+ If unavailable (server down, network issues), returns mock client.
366
+ """
367
+ if self._nc is None:
368
+ # Try to connect to real Neo4j
369
+ if self._nc_available is None:
370
+ try:
371
+ from .neo4j_client import Neo4jConnect
372
+ # Try to initialize - this will fail if Neo4j server unreachable
373
+ self._nc = Neo4jConnect()
374
+ self._nc_available = True
375
+ print("✅ Neo4j connection established")
376
+ except Exception as e:
377
+ # Fall back to mock client
378
+ self._nc = MockNeo4jClient()
379
+ self._nc_available = False
380
+ print(f"ℹ️ Neo4j unavailable ({type(e).__name__}), using Owlery-only mode")
381
+ elif self._nc_available:
382
+ from .neo4j_client import Neo4jConnect
383
+ self._nc = Neo4jConnect()
384
+ else:
385
+ self._nc = MockNeo4jClient()
386
+ return self._nc
387
+
388
+ def get_TermInfo(self, short_forms: List[str],
389
+ return_dataframe: bool = False,
390
+ summary: bool = False) -> Union[List[Dict[str, Any]], pd.DataFrame]:
391
+ """
392
+ Fetch term info from SOLR directly.
393
+
394
+ This replicates VFBConnect's get_TermInfo method using direct SOLR queries.
395
+
396
+ :param short_forms: List of term IDs to fetch (e.g., ['FBbt_00003748'])
397
+ :param return_dataframe: If True, return as pandas DataFrame
398
+ :param summary: If True, return summarized version (currently ignored)
399
+ :return: List of term info dictionaries or DataFrame
400
+ """
401
+ # Fetch term info entries in parallel to speed up multiple short_form requests.
402
+ # We preserve input order in the returned list.
403
+ results_map = {}
404
+
405
+ def fetch(short_form: str):
406
+ try:
407
+ url = f"{self.solr_url}/select"
408
+ params = {
409
+ "indent": "true",
410
+ "fl": "term_info",
411
+ "q.op": "OR",
412
+ "q": f"id:{short_form}"
413
+ }
414
+
415
+ response = requests.get(url, params=params, timeout=30)
416
+ response.raise_for_status()
417
+
418
+ data = response.json()
419
+ docs = data.get("response", {}).get("docs", [])
420
+
421
+ if not docs:
422
+ # no result for this id
423
+ return None
424
+
425
+ if "term_info" not in docs[0] or not docs[0]["term_info"]:
426
+ return None
427
+
428
+ term_info_str = docs[0]["term_info"][0]
429
+ term_info_obj = json.loads(term_info_str)
430
+ return term_info_obj
431
+
432
+ except requests.RequestException as e:
433
+ print(f"ERROR: Error fetching data from SOLR for {short_form}: {e}")
434
+ except json.JSONDecodeError as e:
435
+ print(f"ERROR: Error decoding JSON for {short_form}: {e}")
436
+ except Exception as e:
437
+ print(f"ERROR: Unexpected error for {short_form}: {e}")
438
+ return None
439
+
440
+ max_workers = min(10, max(1, len(short_forms)))
441
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as exc:
442
+ # map preserves order of input keys in Python 3.9+ when using as_completed we reassemble
443
+ future_to_sf = {exc.submit(fetch, sf): sf for sf in short_forms}
444
+ for fut in concurrent.futures.as_completed(future_to_sf):
445
+ sf = future_to_sf[fut]
446
+ try:
447
+ res = fut.result()
448
+ results_map[sf] = res
449
+ except Exception as e:
450
+ print(f"ERROR: Exception while fetching {sf}: {e}")
451
+
452
+ # Build results list in the same order as short_forms input, skipping None results
453
+ results = [results_map[sf] for sf in short_forms if sf in results_map and results_map[sf] is not None]
454
+
455
+ # Convert to DataFrame if requested
456
+ if return_dataframe and results:
457
+ try:
458
+ return pd.json_normalize(results)
459
+ except Exception as e:
460
+ print(f"ERROR: Error converting to DataFrame: {e}")
461
+ return results
462
+
463
+ return results
vfbquery/solr_fetcher.py CHANGED
@@ -91,7 +91,7 @@ class SolrTermInfoFetcher:
91
91
  }
92
92
 
93
93
  self.logger.debug(f"Querying Solr for {short_form}")
94
- response = requests.get(url, params=params)
94
+ response = requests.get(url, params=params, timeout=120)
95
95
  response.raise_for_status()
96
96
 
97
97
  data = response.json()