vfbquery 0.2.12__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/readme_parser.py +103 -0
- test/term_info_queries_test.py +111 -177
- test/test_examples_diff.py +317 -0
- vfbquery/__init__.py +3 -0
- vfbquery/solr_fetcher.py +100 -0
- vfbquery/term_info_queries.py +63 -3
- vfbquery/test_utils.py +39 -0
- vfbquery/vfb_queries.py +350 -76
- vfbquery-0.3.3.dist-info/METADATA +1316 -0
- vfbquery-0.3.3.dist-info/RECORD +14 -0
- {vfbquery-0.2.12.dist-info → vfbquery-0.3.3.dist-info}/WHEEL +1 -1
- vfbquery-0.2.12.dist-info/METADATA +0 -1169
- vfbquery-0.2.12.dist-info/RECORD +0 -10
- {vfbquery-0.2.12.dist-info → vfbquery-0.3.3.dist-info}/LICENSE +0 -0
- {vfbquery-0.2.12.dist-info → vfbquery-0.3.3.dist-info}/top_level.txt +0 -0
vfbquery/vfb_queries.py
CHANGED
|
@@ -1,17 +1,28 @@
|
|
|
1
1
|
import pysolr
|
|
2
2
|
from .term_info_queries import deserialize_term_info
|
|
3
|
-
|
|
3
|
+
# Replace VfbConnect import with our new SolrTermInfoFetcher
|
|
4
|
+
from .solr_fetcher import SolrTermInfoFetcher
|
|
5
|
+
# Keep dict_cursor if it's used elsewhere - lazy import to avoid GUI issues
|
|
4
6
|
from marshmallow import Schema, fields, post_load
|
|
5
|
-
from typing import List, Tuple
|
|
7
|
+
from typing import List, Tuple, Dict, Any, Union
|
|
6
8
|
import pandas as pd
|
|
7
9
|
from marshmallow import ValidationError
|
|
8
10
|
import json
|
|
9
11
|
|
|
12
|
+
# Lazy import for dict_cursor to avoid GUI library issues
|
|
13
|
+
def get_dict_cursor():
|
|
14
|
+
"""Lazy import dict_cursor to avoid import issues during testing"""
|
|
15
|
+
try:
|
|
16
|
+
from vfb_connect.cross_server_tools import dict_cursor
|
|
17
|
+
return dict_cursor
|
|
18
|
+
except ImportError as e:
|
|
19
|
+
raise ImportError(f"vfb_connect is required but could not be imported: {e}")
|
|
20
|
+
|
|
10
21
|
# Connect to the VFB SOLR server
|
|
11
22
|
vfb_solr = pysolr.Solr('http://solr.virtualflybrain.org/solr/vfb_json/', always_commit=False, timeout=990)
|
|
12
23
|
|
|
13
|
-
#
|
|
14
|
-
vc =
|
|
24
|
+
# Replace VfbConnect with SolrTermInfoFetcher
|
|
25
|
+
vc = SolrTermInfoFetcher()
|
|
15
26
|
|
|
16
27
|
class Query:
|
|
17
28
|
def __init__(self, query, label, function, takes, preview=0, preview_columns=[], preview_results=[], output_format="table", count=-1):
|
|
@@ -231,6 +242,8 @@ class TermInfoOutputSchema(Schema):
|
|
|
231
242
|
IsTemplate = fields.Bool(missing=False, required=False)
|
|
232
243
|
Domains = fields.Dict(keys=fields.Integer(), values=fields.Nested(ImageSchema()), required=False, allow_none=True)
|
|
233
244
|
Licenses = fields.Dict(keys=fields.Integer(), values=fields.Nested(LicenseSchema()), required=False, allow_none=True)
|
|
245
|
+
Publications = fields.List(fields.Dict(keys=fields.String(), values=fields.Field()), required=False)
|
|
246
|
+
Synonyms = fields.List(fields.Dict(keys=fields.String(), values=fields.Field()), required=False, allow_none=True)
|
|
234
247
|
|
|
235
248
|
@post_load
|
|
236
249
|
def make_term_info(self, data, **kwargs):
|
|
@@ -244,9 +257,93 @@ class TermInfoOutputSchema(Schema):
|
|
|
244
257
|
term_info_data["Queries"] = [query.to_dict() for query in term_info_data["Queries"]]
|
|
245
258
|
return str(self.dump(term_info_data))
|
|
246
259
|
|
|
260
|
+
def encode_brackets(text):
|
|
261
|
+
"""
|
|
262
|
+
Encodes brackets in the given text.
|
|
263
|
+
|
|
264
|
+
:param text: The text to encode.
|
|
265
|
+
:return: The text with brackets encoded.
|
|
266
|
+
"""
|
|
267
|
+
return (text.replace('(', '%28')
|
|
268
|
+
.replace(')', '%29')
|
|
269
|
+
.replace('[', '%5B')
|
|
270
|
+
.replace(']', '%5D'))
|
|
271
|
+
|
|
272
|
+
def encode_markdown_links(df, columns):
|
|
273
|
+
"""
|
|
274
|
+
Encodes brackets in the labels and titles within markdown links and images, leaving the link syntax intact.
|
|
275
|
+
:param df: DataFrame containing the query results.
|
|
276
|
+
:param columns: List of column names to apply encoding to.
|
|
277
|
+
"""
|
|
278
|
+
def encode_label(label):
|
|
279
|
+
if not isinstance(label, str):
|
|
280
|
+
return label
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
# Process linked images (format: [](link))
|
|
284
|
+
if label.startswith("[
|
|
287
|
+
if len(parts) < 2:
|
|
288
|
+
return label
|
|
289
|
+
|
|
290
|
+
image_part = parts[0]
|
|
291
|
+
link_part = parts[1]
|
|
292
|
+
|
|
293
|
+
# Process the image part
|
|
294
|
+
image_parts = image_part.split("](")
|
|
295
|
+
if len(image_parts) < 2:
|
|
296
|
+
return label
|
|
297
|
+
|
|
298
|
+
alt_text = image_parts[0][3:] # Remove the "[]({link_part}"
|
|
304
|
+
return encoded_label
|
|
305
|
+
|
|
306
|
+
# Process regular markdown links
|
|
307
|
+
elif label.startswith("[") and "](" in label:
|
|
308
|
+
parts = label.split("](")
|
|
309
|
+
if len(parts) < 2:
|
|
310
|
+
return label
|
|
311
|
+
|
|
312
|
+
label_part = parts[0][1:] # Remove the leading '['
|
|
313
|
+
# Encode brackets in the label part
|
|
314
|
+
label_part_encoded = encode_brackets(label_part)
|
|
315
|
+
# Reconstruct the markdown link with the encoded label
|
|
316
|
+
encoded_label = f"[{label_part_encoded}]({parts[1]}"
|
|
317
|
+
return encoded_label
|
|
318
|
+
|
|
319
|
+
except Exception as e:
|
|
320
|
+
# In case of any other unexpected error, log or print the error and return the original label
|
|
321
|
+
print(f"Error processing label: {label}, error: {e}")
|
|
322
|
+
return label
|
|
323
|
+
|
|
324
|
+
# If none of the conditions above match, return the original label
|
|
325
|
+
return label
|
|
326
|
+
|
|
327
|
+
for column in columns:
|
|
328
|
+
df[column] = df[column].apply(lambda x: encode_label(x) if pd.notnull(x) else x)
|
|
329
|
+
|
|
330
|
+
return df
|
|
247
331
|
|
|
248
332
|
def term_info_parse_object(results, short_form):
|
|
249
333
|
termInfo = {}
|
|
334
|
+
termInfo["SuperTypes"] = []
|
|
335
|
+
termInfo["Tags"] = []
|
|
336
|
+
termInfo["Queries"] = []
|
|
337
|
+
termInfo["IsClass"] = False
|
|
338
|
+
termInfo["IsIndividual"] = False
|
|
339
|
+
termInfo["IsTemplate"] = False
|
|
340
|
+
termInfo["Images"] = {}
|
|
341
|
+
termInfo["Examples"] = {}
|
|
342
|
+
termInfo["Domains"] = {}
|
|
343
|
+
termInfo["Licenses"] = {}
|
|
344
|
+
termInfo["Publications"] = []
|
|
345
|
+
termInfo["Synonyms"] = []
|
|
346
|
+
|
|
250
347
|
if results.hits > 0 and results.docs and len(results.docs) > 0:
|
|
251
348
|
termInfo["Meta"] = {}
|
|
252
349
|
try:
|
|
@@ -255,15 +352,21 @@ def term_info_parse_object(results, short_form):
|
|
|
255
352
|
except KeyError:
|
|
256
353
|
print(f"SOLR doc missing 'term_info': {results.docs[0]}")
|
|
257
354
|
return None
|
|
355
|
+
except Exception as e:
|
|
356
|
+
print(f"Error deserializing term info: {e}")
|
|
357
|
+
return None
|
|
358
|
+
|
|
258
359
|
queries = []
|
|
360
|
+
# Initialize synonyms variable to avoid UnboundLocalError
|
|
361
|
+
synonyms = []
|
|
259
362
|
termInfo["Id"] = vfbTerm.term.core.short_form
|
|
260
|
-
termInfo["Meta"]["Name"] = "[%s](%s)"%(vfbTerm.term.core.label, vfbTerm.term.core.short_form)
|
|
363
|
+
termInfo["Meta"]["Name"] = "[%s](%s)"%(encode_brackets(vfbTerm.term.core.label), vfbTerm.term.core.short_form)
|
|
261
364
|
mainlabel = vfbTerm.term.core.label
|
|
262
|
-
if vfbTerm.term.core.symbol and len(vfbTerm.term.core.symbol) > 0:
|
|
263
|
-
termInfo["Meta"]["Symbol"] = "[%s](%s)"%(vfbTerm.term.core.symbol, vfbTerm.term.core.short_form)
|
|
365
|
+
if hasattr(vfbTerm.term.core, 'symbol') and vfbTerm.term.core.symbol and len(vfbTerm.term.core.symbol) > 0:
|
|
366
|
+
termInfo["Meta"]["Symbol"] = "[%s](%s)"%(encode_brackets(vfbTerm.term.core.symbol), vfbTerm.term.core.short_form)
|
|
264
367
|
mainlabel = vfbTerm.term.core.symbol
|
|
265
368
|
termInfo["Name"] = mainlabel
|
|
266
|
-
termInfo["SuperTypes"] = vfbTerm.term.core.types
|
|
369
|
+
termInfo["SuperTypes"] = vfbTerm.term.core.types if hasattr(vfbTerm.term.core, 'types') else []
|
|
267
370
|
if "Class" in termInfo["SuperTypes"]:
|
|
268
371
|
termInfo["IsClass"] = True
|
|
269
372
|
elif "Individual" in termInfo["SuperTypes"]:
|
|
@@ -271,44 +374,74 @@ def term_info_parse_object(results, short_form):
|
|
|
271
374
|
try:
|
|
272
375
|
# Retrieve tags from the term's unique_facets attribute
|
|
273
376
|
termInfo["Tags"] = vfbTerm.term.core.unique_facets
|
|
274
|
-
except NameError:
|
|
377
|
+
except (NameError, AttributeError):
|
|
275
378
|
# If unique_facets attribute doesn't exist, use the term's types
|
|
276
|
-
termInfo["Tags"] = vfbTerm.term.core.types
|
|
379
|
+
termInfo["Tags"] = vfbTerm.term.core.types if hasattr(vfbTerm.term.core, 'types') else []
|
|
277
380
|
try:
|
|
278
381
|
# Retrieve description from the term's description attribute
|
|
279
382
|
termInfo["Meta"]["Description"] = "%s"%("".join(vfbTerm.term.description))
|
|
280
|
-
except NameError:
|
|
383
|
+
except (NameError, AttributeError):
|
|
281
384
|
pass
|
|
282
385
|
try:
|
|
283
386
|
# Retrieve comment from the term's comment attribute
|
|
284
387
|
termInfo["Meta"]["Comment"] = "%s"%("".join(vfbTerm.term.comment))
|
|
285
|
-
except NameError:
|
|
388
|
+
except (NameError, AttributeError):
|
|
286
389
|
pass
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
if vfbTerm.parents and len(vfbTerm.parents) > 0:
|
|
390
|
+
|
|
391
|
+
if hasattr(vfbTerm, 'parents') and vfbTerm.parents and len(vfbTerm.parents) > 0:
|
|
290
392
|
parents = []
|
|
291
393
|
|
|
292
394
|
# Sort the parents alphabetically
|
|
293
395
|
sorted_parents = sorted(vfbTerm.parents, key=lambda parent: parent.label)
|
|
294
396
|
|
|
295
397
|
for parent in sorted_parents:
|
|
296
|
-
parents.append("[%s](%s)"%(parent.label, parent.short_form))
|
|
398
|
+
parents.append("[%s](%s)"%(encode_brackets(parent.label), parent.short_form))
|
|
297
399
|
termInfo["Meta"]["Types"] = "; ".join(parents)
|
|
298
400
|
|
|
299
|
-
if vfbTerm.relationships and len(vfbTerm.relationships) > 0:
|
|
401
|
+
if hasattr(vfbTerm, 'relationships') and vfbTerm.relationships and len(vfbTerm.relationships) > 0:
|
|
300
402
|
relationships = []
|
|
403
|
+
pubs_from_relationships = [] # New: Collect publication references from relationships
|
|
301
404
|
|
|
302
405
|
# Group relationships by relation type and remove duplicates
|
|
303
406
|
grouped_relationships = {}
|
|
304
407
|
for relationship in vfbTerm.relationships:
|
|
305
|
-
if relationship.relation.short_form:
|
|
408
|
+
if hasattr(relationship.relation, 'short_form') and relationship.relation.short_form:
|
|
306
409
|
relation_key = (relationship.relation.label, relationship.relation.short_form)
|
|
307
|
-
elif relationship.relation.iri:
|
|
410
|
+
elif hasattr(relationship.relation, 'iri') and relationship.relation.iri:
|
|
308
411
|
relation_key = (relationship.relation.label, relationship.relation.iri.split('/')[-1])
|
|
309
|
-
elif relationship.relation.label:
|
|
412
|
+
elif hasattr(relationship.relation, 'label') and relationship.relation.label:
|
|
310
413
|
relation_key = (relationship.relation.label, relationship.relation.label)
|
|
311
|
-
|
|
414
|
+
else:
|
|
415
|
+
# Skip relationships with no identifiable relation
|
|
416
|
+
continue
|
|
417
|
+
|
|
418
|
+
if not hasattr(relationship, 'object') or not hasattr(relationship.object, 'label'):
|
|
419
|
+
# Skip relationships with missing object information
|
|
420
|
+
continue
|
|
421
|
+
|
|
422
|
+
object_key = (relationship.object.label, getattr(relationship.object, 'short_form', ''))
|
|
423
|
+
|
|
424
|
+
# New: Extract publications from this relationship if they exist
|
|
425
|
+
if hasattr(relationship, 'pubs') and relationship.pubs:
|
|
426
|
+
for pub in relationship.pubs:
|
|
427
|
+
if hasattr(pub, 'get_miniref') and pub.get_miniref():
|
|
428
|
+
publication = {}
|
|
429
|
+
publication["title"] = pub.core.label if hasattr(pub, 'core') and hasattr(pub.core, 'label') else ""
|
|
430
|
+
publication["short_form"] = pub.core.short_form if hasattr(pub, 'core') and hasattr(pub.core, 'short_form') else ""
|
|
431
|
+
publication["microref"] = pub.get_microref() if hasattr(pub, 'get_microref') and pub.get_microref() else ""
|
|
432
|
+
|
|
433
|
+
# Add external references
|
|
434
|
+
refs = []
|
|
435
|
+
if hasattr(pub, 'PubMed') and pub.PubMed:
|
|
436
|
+
refs.append(f"http://www.ncbi.nlm.nih.gov/pubmed/?term={pub.PubMed}")
|
|
437
|
+
if hasattr(pub, 'FlyBase') and pub.FlyBase:
|
|
438
|
+
refs.append(f"http://flybase.org/reports/{pub.FlyBase}")
|
|
439
|
+
if hasattr(pub, 'DOI') and pub.DOI:
|
|
440
|
+
refs.append(f"https://doi.org/{pub.DOI}")
|
|
441
|
+
|
|
442
|
+
publication["refs"] = refs
|
|
443
|
+
pubs_from_relationships.append(publication)
|
|
444
|
+
|
|
312
445
|
if relation_key not in grouped_relationships:
|
|
313
446
|
grouped_relationships[relation_key] = set()
|
|
314
447
|
grouped_relationships[relation_key].add(object_key)
|
|
@@ -322,38 +455,21 @@ def term_info_parse_object(results, short_form):
|
|
|
322
455
|
sorted_object_set = sorted(list(object_set))
|
|
323
456
|
relation_objects = []
|
|
324
457
|
for object_key in sorted_object_set:
|
|
325
|
-
relation_objects.append("[%s](%s)" % (object_key[0], object_key[1]))
|
|
326
|
-
relationships.append("[%s](%s): %s" % (relation_key[0], relation_key[1], ', '.join(relation_objects)))
|
|
458
|
+
relation_objects.append("[%s](%s)" % (encode_brackets(object_key[0]), object_key[1]))
|
|
459
|
+
relationships.append("[%s](%s): %s" % (encode_brackets(relation_key[0]), relation_key[1], ', '.join(relation_objects)))
|
|
327
460
|
termInfo["Meta"]["Relationships"] = "; ".join(relationships)
|
|
328
461
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
# Group xrefs by site
|
|
334
|
-
grouped_xrefs = {}
|
|
335
|
-
for xref in vfbTerm.xrefs:
|
|
336
|
-
site_key = (xref.site.label, xref.homepage, xref.icon)
|
|
337
|
-
link_key = (xref.accession, xref.link())
|
|
338
|
-
if site_key not in grouped_xrefs:
|
|
339
|
-
grouped_xrefs[site_key] = set()
|
|
340
|
-
grouped_xrefs[site_key].add(link_key)
|
|
341
|
-
|
|
342
|
-
# Sort the grouped_xrefs by site_key
|
|
343
|
-
sorted_grouped_xrefs = dict(sorted(grouped_xrefs.items()))
|
|
344
|
-
|
|
345
|
-
# Append the grouped xrefs to termInfo
|
|
346
|
-
for site_key, link_set in sorted_grouped_xrefs.items():
|
|
347
|
-
# Sort the link_set by link_key
|
|
348
|
-
sorted_link_set = sorted(list(link_set))
|
|
349
|
-
links = []
|
|
350
|
-
for link_key in sorted_link_set:
|
|
351
|
-
links.append("[%s](%s)" % (link_key[0], link_key[1]))
|
|
352
|
-
if site_key[2]:
|
|
353
|
-
xrefs.append(" [%s](%s): %s" % (site_key[0], site_key[2], site_key[0], site_key[1], ', '.join(links)))
|
|
462
|
+
# New: Add relationship publications to main publications list
|
|
463
|
+
if pubs_from_relationships:
|
|
464
|
+
if "Publications" not in termInfo:
|
|
465
|
+
termInfo["Publications"] = pubs_from_relationships
|
|
354
466
|
else:
|
|
355
|
-
|
|
356
|
-
|
|
467
|
+
# Merge with existing publications, avoiding duplicates by short_form
|
|
468
|
+
existing_pub_short_forms = {pub.get("short_form", "") for pub in termInfo["Publications"]}
|
|
469
|
+
for pub in pubs_from_relationships:
|
|
470
|
+
if pub.get("short_form", "") not in existing_pub_short_forms:
|
|
471
|
+
termInfo["Publications"].append(pub)
|
|
472
|
+
existing_pub_short_forms.add(pub.get("short_form", ""))
|
|
357
473
|
|
|
358
474
|
# If the term has anatomy channel images, retrieve the images and associated information
|
|
359
475
|
if vfbTerm.anatomy_channel_image and len(vfbTerm.anatomy_channel_image) > 0:
|
|
@@ -362,7 +478,7 @@ def term_info_parse_object(results, short_form):
|
|
|
362
478
|
record = {}
|
|
363
479
|
record["id"] = image.anatomy.short_form
|
|
364
480
|
label = image.anatomy.label
|
|
365
|
-
if image.anatomy.symbol
|
|
481
|
+
if image.anatomy.symbol and len(image.anatomy.symbol) > 0:
|
|
366
482
|
label = image.anatomy.symbol
|
|
367
483
|
record["label"] = label
|
|
368
484
|
if not image.channel_image.image.template_anatomy.short_form in images.keys():
|
|
@@ -385,7 +501,7 @@ def term_info_parse_object(results, short_form):
|
|
|
385
501
|
record = {}
|
|
386
502
|
record["id"] = vfbTerm.term.core.short_form
|
|
387
503
|
label = vfbTerm.term.core.label
|
|
388
|
-
if vfbTerm.term.core.symbol
|
|
504
|
+
if vfbTerm.term.core.symbol and len(vfbTerm.term.core.symbol) > 0:
|
|
389
505
|
label = vfbTerm.term.core.symbol
|
|
390
506
|
record["label"] = label
|
|
391
507
|
if not image.image.template_anatomy.short_form in images.keys():
|
|
@@ -417,13 +533,29 @@ def term_info_parse_object(results, short_form):
|
|
|
417
533
|
images = {}
|
|
418
534
|
image = vfbTerm.template_channel
|
|
419
535
|
record = {}
|
|
420
|
-
|
|
421
|
-
|
|
536
|
+
|
|
537
|
+
# Validate that the channel ID matches the template ID (numeric part should be the same)
|
|
538
|
+
template_id = vfbTerm.term.core.short_form
|
|
539
|
+
channel_id = vfbTerm.template_channel.channel.short_form
|
|
540
|
+
|
|
541
|
+
# Extract numeric parts for validation
|
|
542
|
+
if template_id and channel_id:
|
|
543
|
+
template_numeric = template_id.replace("VFB_", "") if template_id.startswith("VFB_") else ""
|
|
544
|
+
channel_numeric = channel_id.replace("VFBc_", "") if channel_id.startswith("VFBc_") else ""
|
|
545
|
+
|
|
546
|
+
if template_numeric != channel_numeric:
|
|
547
|
+
print(f"Warning: Template ID {template_id} does not match channel ID {channel_id}")
|
|
548
|
+
label = vfbTerm.template_channel.channel.label
|
|
549
|
+
record["id"] = channel_id
|
|
550
|
+
else:
|
|
551
|
+
label = vfbTerm.term.core.label
|
|
552
|
+
record["id"] = template_id
|
|
553
|
+
|
|
422
554
|
if vfbTerm.template_channel.channel.symbol != "" and len(vfbTerm.template_channel.channel.symbol) > 0:
|
|
423
555
|
label = vfbTerm.template_channel.channel.symbol
|
|
424
556
|
record["label"] = label
|
|
425
|
-
if not
|
|
426
|
-
images[
|
|
557
|
+
if not template_id in images.keys():
|
|
558
|
+
images[template_id]=[]
|
|
427
559
|
record["thumbnail"] = image.image_thumbnail.replace("http://","https://").replace("thumbnailT.png","thumbnail.png")
|
|
428
560
|
record["thumbnail_transparent"] = image.image_thumbnail.replace("http://","https://").replace("thumbnail.png","thumbnailT.png")
|
|
429
561
|
for key in vars(image).keys():
|
|
@@ -441,7 +573,7 @@ def term_info_parse_object(results, short_form):
|
|
|
441
573
|
record['voxel'] = image.get_voxel()
|
|
442
574
|
if 'orientation' in image_vars.keys():
|
|
443
575
|
record['orientation'] = image.orientation
|
|
444
|
-
images[
|
|
576
|
+
images[template_id].append(record)
|
|
445
577
|
|
|
446
578
|
# Add the thumbnails to the term info
|
|
447
579
|
termInfo["Images"] = images
|
|
@@ -480,6 +612,119 @@ def term_info_parse_object(results, short_form):
|
|
|
480
612
|
if contains_all_tags(termInfo["SuperTypes"], ["Individual", "Neuron", "has_neuron_connectivity"]):
|
|
481
613
|
q = NeuronInputsTo_to_schema(termInfo["Name"], {"neuron_short_form": vfbTerm.term.core.short_form})
|
|
482
614
|
queries.append(q)
|
|
615
|
+
|
|
616
|
+
# Add Publications to the termInfo object
|
|
617
|
+
if vfbTerm.pubs and len(vfbTerm.pubs) > 0:
|
|
618
|
+
publications = []
|
|
619
|
+
for pub in vfbTerm.pubs:
|
|
620
|
+
if pub.get_miniref():
|
|
621
|
+
publication = {}
|
|
622
|
+
publication["title"] = pub.core.label if pub.core.label else ""
|
|
623
|
+
publication["short_form"] = pub.core.short_form if pub.core.short_form else ""
|
|
624
|
+
publication["microref"] = pub.get_microref() if hasattr(pub, 'get_microref') and pub.get_microref() else ""
|
|
625
|
+
|
|
626
|
+
# Add external references
|
|
627
|
+
refs = []
|
|
628
|
+
if hasattr(pub, 'PubMed') and pub.PubMed:
|
|
629
|
+
refs.append(f"http://www.ncbi.nlm.nih.gov/pubmed/?term={pub.PubMed}")
|
|
630
|
+
if hasattr(pub, 'FlyBase') and pub.FlyBase:
|
|
631
|
+
refs.append(f"http://flybase.org/reports/{pub.FlyBase}")
|
|
632
|
+
if hasattr(pub, 'DOI') and pub.DOI:
|
|
633
|
+
refs.append(f"https://doi.org/{pub.DOI}")
|
|
634
|
+
|
|
635
|
+
publication["refs"] = refs
|
|
636
|
+
publications.append(publication)
|
|
637
|
+
|
|
638
|
+
termInfo["Publications"] = publications
|
|
639
|
+
|
|
640
|
+
# Add Synonyms for Class entities
|
|
641
|
+
if termInfo["SuperTypes"] and "Class" in termInfo["SuperTypes"] and vfbTerm.pub_syn and len(vfbTerm.pub_syn) > 0:
|
|
642
|
+
synonyms = []
|
|
643
|
+
for syn in vfbTerm.pub_syn:
|
|
644
|
+
if hasattr(syn, 'synonym') and syn.synonym:
|
|
645
|
+
synonym = {}
|
|
646
|
+
synonym["label"] = syn.synonym.label if hasattr(syn.synonym, 'label') else ""
|
|
647
|
+
synonym["scope"] = syn.synonym.scope if hasattr(syn.synonym, 'scope') else "exact"
|
|
648
|
+
synonym["type"] = syn.synonym.type if hasattr(syn.synonym, 'type') else "synonym"
|
|
649
|
+
|
|
650
|
+
# Enhanced publication handling - handle multiple publications
|
|
651
|
+
if hasattr(syn, 'pubs') and syn.pubs:
|
|
652
|
+
pub_refs = []
|
|
653
|
+
for pub in syn.pubs:
|
|
654
|
+
if hasattr(pub, 'get_microref') and pub.get_microref():
|
|
655
|
+
pub_refs.append(pub.get_microref())
|
|
656
|
+
|
|
657
|
+
if pub_refs:
|
|
658
|
+
# Join multiple publication references with commas
|
|
659
|
+
synonym["publication"] = ", ".join(pub_refs)
|
|
660
|
+
# Fallback to single pub if pubs collection not available
|
|
661
|
+
elif hasattr(syn, 'pub') and syn.pub and hasattr(syn.pub, 'get_microref'):
|
|
662
|
+
synonym["publication"] = syn.pub.get_microref()
|
|
663
|
+
|
|
664
|
+
synonyms.append(synonym)
|
|
665
|
+
|
|
666
|
+
# Only add the synonyms if we found any
|
|
667
|
+
if synonyms:
|
|
668
|
+
termInfo["Synonyms"] = synonyms
|
|
669
|
+
|
|
670
|
+
# Alternative approach for extracting synonyms from relationships
|
|
671
|
+
if "Class" in termInfo["SuperTypes"] and vfbTerm.relationships and len(vfbTerm.relationships) > 0:
|
|
672
|
+
synonyms = []
|
|
673
|
+
for relationship in vfbTerm.relationships:
|
|
674
|
+
if (relationship.relation.label == "has_exact_synonym" or
|
|
675
|
+
relationship.relation.label == "has_broad_synonym" or
|
|
676
|
+
relationship.relation.label == "has_narrow_synonym"):
|
|
677
|
+
|
|
678
|
+
synonym = {}
|
|
679
|
+
synonym["label"] = relationship.object.label
|
|
680
|
+
|
|
681
|
+
# Determine scope based on relation type
|
|
682
|
+
if relationship.relation.label == "has_exact_synonym":
|
|
683
|
+
synonym["scope"] = "exact"
|
|
684
|
+
elif relationship.relation.label == "has_broad_synonym":
|
|
685
|
+
synonym["scope"] = "broad"
|
|
686
|
+
elif relationship.relation.label == "has_narrow_synonym":
|
|
687
|
+
synonym["scope"] = "narrow"
|
|
688
|
+
|
|
689
|
+
synonym["type"] = "synonym"
|
|
690
|
+
synonyms.append(synonym)
|
|
691
|
+
|
|
692
|
+
# Only add the synonyms if we found any
|
|
693
|
+
if synonyms and "Synonyms" not in termInfo:
|
|
694
|
+
termInfo["Synonyms"] = synonyms
|
|
695
|
+
|
|
696
|
+
# Special handling for Publication entities
|
|
697
|
+
if termInfo["SuperTypes"] and "Publication" in termInfo["SuperTypes"] and vfbTerm.pub_specific_content:
|
|
698
|
+
publication = {}
|
|
699
|
+
publication["title"] = vfbTerm.pub_specific_content.title if hasattr(vfbTerm.pub_specific_content, 'title') else ""
|
|
700
|
+
publication["short_form"] = vfbTerm.term.core.short_form
|
|
701
|
+
publication["microref"] = termInfo["Name"]
|
|
702
|
+
|
|
703
|
+
# Add external references
|
|
704
|
+
refs = []
|
|
705
|
+
if hasattr(vfbTerm.pub_specific_content, 'PubMed') and vfbTerm.pub_specific_content.PubMed:
|
|
706
|
+
refs.append(f"http://www.ncbi.nlm.nih.gov/pubmed/?term={vfbTerm.pub_specific_content.PubMed}")
|
|
707
|
+
if hasattr(vfbTerm.pub_specific_content, 'FlyBase') and vfbTerm.pub_specific_content.FlyBase:
|
|
708
|
+
refs.append(f"http://flybase.org/reports/{vfbTerm.pub_specific_content.FlyBase}")
|
|
709
|
+
if hasattr(vfbTerm.pub_specific_content, 'DOI') and vfbTerm.pub_specific_content.DOI:
|
|
710
|
+
refs.append(f"https://doi.org/{vfbTerm.pub_specific_content.DOI}")
|
|
711
|
+
|
|
712
|
+
publication["refs"] = refs
|
|
713
|
+
termInfo["Publications"] = [publication]
|
|
714
|
+
|
|
715
|
+
# Append new synonyms to any existing ones
|
|
716
|
+
if synonyms:
|
|
717
|
+
if "Synonyms" not in termInfo:
|
|
718
|
+
termInfo["Synonyms"] = synonyms
|
|
719
|
+
else:
|
|
720
|
+
# Create a set of existing synonym labels to avoid duplicates
|
|
721
|
+
existing_labels = {syn["label"] for syn in termInfo["Synonyms"]}
|
|
722
|
+
# Only append synonyms that don't already exist
|
|
723
|
+
for synonym in synonyms:
|
|
724
|
+
if synonym["label"] not in existing_labels:
|
|
725
|
+
termInfo["Synonyms"].append(synonym)
|
|
726
|
+
existing_labels.add(synonym["label"])
|
|
727
|
+
|
|
483
728
|
# Add the queries to the term info
|
|
484
729
|
termInfo["Queries"] = queries
|
|
485
730
|
|
|
@@ -487,7 +732,12 @@ def term_info_parse_object(results, short_form):
|
|
|
487
732
|
if "Queries" in termInfo:
|
|
488
733
|
termInfo["Queries"] = [query.to_dict() for query in termInfo["Queries"]]
|
|
489
734
|
# print("termInfo object before schema validation:", termInfo)
|
|
490
|
-
|
|
735
|
+
try:
|
|
736
|
+
return TermInfoOutputSchema().load(termInfo)
|
|
737
|
+
except ValidationError as e:
|
|
738
|
+
print(f"Validation error when parsing term info: {e}")
|
|
739
|
+
# Return the raw termInfo as a fallback
|
|
740
|
+
return termInfo
|
|
491
741
|
|
|
492
742
|
def NeuronInputsTo_to_schema(name, take_default):
|
|
493
743
|
query = "NeuronInputsTo"
|
|
@@ -549,24 +799,37 @@ def get_term_info(short_form: str, preview: bool = False):
|
|
|
549
799
|
try:
|
|
550
800
|
# Search for the term in the SOLR server
|
|
551
801
|
results = vfb_solr.search('id:' + short_form)
|
|
552
|
-
sanitized_results = serialize_solr_output(results)
|
|
553
|
-
print(sanitized_results)
|
|
554
802
|
# Check if any results were returned
|
|
555
803
|
parsed_object = term_info_parse_object(results, short_form)
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
804
|
+
if parsed_object:
|
|
805
|
+
term_info = fill_query_results(parsed_object)
|
|
806
|
+
if not term_info:
|
|
807
|
+
print("Failed to fill query preview results!")
|
|
808
|
+
return parsed_object
|
|
809
|
+
return parsed_object
|
|
810
|
+
else:
|
|
811
|
+
print(f"No valid term info found for ID '{short_form}'")
|
|
812
|
+
return None
|
|
561
813
|
except ValidationError as e:
|
|
562
814
|
# handle the validation error
|
|
563
815
|
print("Schema validation error when parsing response")
|
|
564
816
|
print("Error details:", e)
|
|
565
817
|
print("Original data:", results)
|
|
566
818
|
print("Parsed object:", parsed_object)
|
|
567
|
-
|
|
819
|
+
return parsed_object
|
|
820
|
+
except IndexError as e:
|
|
568
821
|
print(f"No results found for ID '{short_form}'")
|
|
569
|
-
print("Error
|
|
822
|
+
print("Error details:", e)
|
|
823
|
+
if parsed_object:
|
|
824
|
+
print("Parsed object:", parsed_object)
|
|
825
|
+
if 'term_info' in locals():
|
|
826
|
+
print("Term info:", term_info)
|
|
827
|
+
else:
|
|
828
|
+
print("Error accessing SOLR server!")
|
|
829
|
+
return None
|
|
830
|
+
except Exception as e:
|
|
831
|
+
print(f"Unexpected error when retrieving term info: {type(e).__name__}: {e}")
|
|
832
|
+
return parsed_object
|
|
570
833
|
|
|
571
834
|
def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
|
|
572
835
|
"""
|
|
@@ -583,7 +846,7 @@ def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
|
|
|
583
846
|
RETURN COUNT(r) AS total_count
|
|
584
847
|
"""
|
|
585
848
|
count_results = vc.nc.commit_list([count_query])
|
|
586
|
-
count_df = pd.DataFrame.from_records(
|
|
849
|
+
count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
|
|
587
850
|
total_count = count_df['total_count'][0] if not count_df.empty else 0
|
|
588
851
|
|
|
589
852
|
# Define the main Cypher query
|
|
@@ -613,8 +876,11 @@ def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
|
|
|
613
876
|
results = vc.nc.commit_list([query])
|
|
614
877
|
|
|
615
878
|
# Convert the results to a DataFrame
|
|
616
|
-
df = pd.DataFrame.from_records(
|
|
879
|
+
df = pd.DataFrame.from_records(get_dict_cursor()(results))
|
|
617
880
|
|
|
881
|
+
columns_to_encode = ['label', 'parent', 'source', 'source_id', 'template', 'dataset', 'license', 'thumbnail']
|
|
882
|
+
df = encode_markdown_links(df, columns_to_encode)
|
|
883
|
+
|
|
618
884
|
if return_dataframe:
|
|
619
885
|
return df
|
|
620
886
|
|
|
@@ -668,7 +934,7 @@ def get_templates(limit: int = -1, return_dataframe: bool = False):
|
|
|
668
934
|
RETURN COUNT(DISTINCT t) AS total_count"""
|
|
669
935
|
|
|
670
936
|
count_results = vc.nc.commit_list([count_query])
|
|
671
|
-
count_df = pd.DataFrame.from_records(
|
|
937
|
+
count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
|
|
672
938
|
total_count = count_df['total_count'][0] if not count_df.empty else 0
|
|
673
939
|
|
|
674
940
|
# Define the main Cypher query
|
|
@@ -693,7 +959,10 @@ def get_templates(limit: int = -1, return_dataframe: bool = False):
|
|
|
693
959
|
results = vc.nc.commit_list([query])
|
|
694
960
|
|
|
695
961
|
# Convert the results to a DataFrame
|
|
696
|
-
df = pd.DataFrame.from_records(
|
|
962
|
+
df = pd.DataFrame.from_records(get_dict_cursor()(results))
|
|
963
|
+
|
|
964
|
+
columns_to_encode = ['name', 'dataset', 'license', 'thumbnail']
|
|
965
|
+
df = encode_markdown_links(df, columns_to_encode)
|
|
697
966
|
|
|
698
967
|
template_order = ["VFB_00101567","VFB_00200000","VFB_00017894","VFB_00101384","VFB_00050000","VFB_00049000","VFB_00100000","VFB_00030786","VFB_00110000","VFB_00120000"]
|
|
699
968
|
|
|
@@ -792,7 +1061,7 @@ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_datafram
|
|
|
792
1061
|
RETURN COUNT(DISTINCT n2) AS total_count"""
|
|
793
1062
|
|
|
794
1063
|
count_results = vc.nc.commit_list([count_query])
|
|
795
|
-
count_df = pd.DataFrame.from_records(
|
|
1064
|
+
count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
|
|
796
1065
|
total_count = count_df['total_count'][0] if not count_df.empty else 0
|
|
797
1066
|
|
|
798
1067
|
main_query = f"""MATCH (c1:Class)<-[:INSTANCEOF]-(n1)-[r:has_similar_morphology_to]-(n2)-[:INSTANCEOF]->(c2:Class)
|
|
@@ -818,8 +1087,11 @@ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_datafram
|
|
|
818
1087
|
results = vc.nc.commit_list([main_query])
|
|
819
1088
|
|
|
820
1089
|
# Convert the results to a DataFrame
|
|
821
|
-
df = pd.DataFrame.from_records(
|
|
1090
|
+
df = pd.DataFrame.from_records(get_dict_cursor()(results))
|
|
822
1091
|
|
|
1092
|
+
columns_to_encode = ['name', 'source', 'source_id', 'thumbnail']
|
|
1093
|
+
df = encode_markdown_links(df, columns_to_encode)
|
|
1094
|
+
|
|
823
1095
|
if return_dataframe:
|
|
824
1096
|
return df
|
|
825
1097
|
else:
|
|
@@ -879,7 +1151,7 @@ def get_individual_neuron_inputs(neuron_short_form: str, return_dataframe=True,
|
|
|
879
1151
|
RETURN COUNT(DISTINCT c) AS total_count"""
|
|
880
1152
|
|
|
881
1153
|
count_results = vc.nc.commit_list([count_query])
|
|
882
|
-
count_df = pd.DataFrame.from_records(
|
|
1154
|
+
count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
|
|
883
1155
|
total_count = count_df['total_count'][0] if not count_df.empty else 0
|
|
884
1156
|
|
|
885
1157
|
# Define the part of the query for normal mode
|
|
@@ -918,8 +1190,11 @@ def get_individual_neuron_inputs(neuron_short_form: str, return_dataframe=True,
|
|
|
918
1190
|
results = vc.nc.commit_list([query])
|
|
919
1191
|
|
|
920
1192
|
# Convert the results to a DataFrame
|
|
921
|
-
df = pd.DataFrame.from_records(
|
|
1193
|
+
df = pd.DataFrame.from_records(get_dict_cursor()(results))
|
|
922
1194
|
|
|
1195
|
+
columns_to_encode = ['Neurotransmitter', 'Type', 'Name', 'Template_Space', 'Imaging_Technique', 'thumbnail']
|
|
1196
|
+
df = encode_markdown_links(df, columns_to_encode)
|
|
1197
|
+
|
|
923
1198
|
# If return_dataframe is True, return the results as a DataFrame
|
|
924
1199
|
if return_dataframe:
|
|
925
1200
|
return df
|
|
@@ -1050,4 +1325,3 @@ def fill_query_results(term_info):
|
|
|
1050
1325
|
else:
|
|
1051
1326
|
print("Preview key not found or preview is 0")
|
|
1052
1327
|
return term_info
|
|
1053
|
-
|