vfbquery 0.2.11__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/readme_parser.py +103 -0
- test/term_info_queries_test.py +88 -171
- test/test_examples_diff.py +317 -0
- vfbquery/solr_fetcher.py +89 -0
- vfbquery/term_info_queries.py +63 -3
- vfbquery/test_utils.py +39 -0
- vfbquery/vfb_queries.py +314 -64
- vfbquery-0.3.2.dist-info/METADATA +1323 -0
- vfbquery-0.3.2.dist-info/RECORD +14 -0
- {vfbquery-0.2.11.dist-info → vfbquery-0.3.2.dist-info}/WHEEL +1 -1
- vfbquery-0.2.11.dist-info/METADATA +0 -1169
- vfbquery-0.2.11.dist-info/RECORD +0 -10
- {vfbquery-0.2.11.dist-info → vfbquery-0.3.2.dist-info}/LICENSE +0 -0
- {vfbquery-0.2.11.dist-info → vfbquery-0.3.2.dist-info}/top_level.txt +0 -0
vfbquery/vfb_queries.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import pysolr
|
|
2
2
|
from .term_info_queries import deserialize_term_info
|
|
3
|
-
|
|
3
|
+
# Replace VfbConnect import with our new SolrTermInfoFetcher
|
|
4
|
+
from .solr_fetcher import SolrTermInfoFetcher
|
|
5
|
+
# Keep dict_cursor if it's used elsewhere
|
|
6
|
+
from vfb_connect.cross_server_tools import dict_cursor
|
|
4
7
|
from marshmallow import Schema, fields, post_load
|
|
5
|
-
from typing import List, Tuple
|
|
8
|
+
from typing import List, Tuple, Dict, Any, Union
|
|
6
9
|
import pandas as pd
|
|
7
10
|
from marshmallow import ValidationError
|
|
8
11
|
import json
|
|
@@ -10,8 +13,8 @@ import json
|
|
|
10
13
|
# Connect to the VFB SOLR server
|
|
11
14
|
vfb_solr = pysolr.Solr('http://solr.virtualflybrain.org/solr/vfb_json/', always_commit=False, timeout=990)
|
|
12
15
|
|
|
13
|
-
#
|
|
14
|
-
vc =
|
|
16
|
+
# Replace VfbConnect with SolrTermInfoFetcher
|
|
17
|
+
vc = SolrTermInfoFetcher()
|
|
15
18
|
|
|
16
19
|
class Query:
|
|
17
20
|
def __init__(self, query, label, function, takes, preview=0, preview_columns=[], preview_results=[], output_format="table", count=-1):
|
|
@@ -231,6 +234,8 @@ class TermInfoOutputSchema(Schema):
|
|
|
231
234
|
IsTemplate = fields.Bool(missing=False, required=False)
|
|
232
235
|
Domains = fields.Dict(keys=fields.Integer(), values=fields.Nested(ImageSchema()), required=False, allow_none=True)
|
|
233
236
|
Licenses = fields.Dict(keys=fields.Integer(), values=fields.Nested(LicenseSchema()), required=False, allow_none=True)
|
|
237
|
+
Publications = fields.List(fields.Dict(keys=fields.String(), values=fields.Field()), required=False)
|
|
238
|
+
Synonyms = fields.List(fields.Dict(keys=fields.String(), values=fields.Field()), required=False, allow_none=True)
|
|
234
239
|
|
|
235
240
|
@post_load
|
|
236
241
|
def make_term_info(self, data, **kwargs):
|
|
@@ -244,9 +249,93 @@ class TermInfoOutputSchema(Schema):
|
|
|
244
249
|
term_info_data["Queries"] = [query.to_dict() for query in term_info_data["Queries"]]
|
|
245
250
|
return str(self.dump(term_info_data))
|
|
246
251
|
|
|
252
|
+
def encode_brackets(text):
|
|
253
|
+
"""
|
|
254
|
+
Encodes brackets in the given text.
|
|
255
|
+
|
|
256
|
+
:param text: The text to encode.
|
|
257
|
+
:return: The text with brackets encoded.
|
|
258
|
+
"""
|
|
259
|
+
return (text.replace('(', '%28')
|
|
260
|
+
.replace(')', '%29')
|
|
261
|
+
.replace('[', '%5B')
|
|
262
|
+
.replace(']', '%5D'))
|
|
263
|
+
|
|
264
|
+
def encode_markdown_links(df, columns):
|
|
265
|
+
"""
|
|
266
|
+
Encodes brackets in the labels and titles within markdown links and images, leaving the link syntax intact.
|
|
267
|
+
:param df: DataFrame containing the query results.
|
|
268
|
+
:param columns: List of column names to apply encoding to.
|
|
269
|
+
"""
|
|
270
|
+
def encode_label(label):
|
|
271
|
+
if not isinstance(label, str):
|
|
272
|
+
return label
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
# Process linked images (format: [](link))
|
|
276
|
+
if label.startswith("[
|
|
279
|
+
if len(parts) < 2:
|
|
280
|
+
return label
|
|
281
|
+
|
|
282
|
+
image_part = parts[0]
|
|
283
|
+
link_part = parts[1]
|
|
284
|
+
|
|
285
|
+
# Process the image part
|
|
286
|
+
image_parts = image_part.split("](")
|
|
287
|
+
if len(image_parts) < 2:
|
|
288
|
+
return label
|
|
289
|
+
|
|
290
|
+
alt_text = image_parts[0][3:] # Remove the "[]({link_part}"
|
|
296
|
+
return encoded_label
|
|
297
|
+
|
|
298
|
+
# Process regular markdown links
|
|
299
|
+
elif label.startswith("[") and "](" in label:
|
|
300
|
+
parts = label.split("](")
|
|
301
|
+
if len(parts) < 2:
|
|
302
|
+
return label
|
|
303
|
+
|
|
304
|
+
label_part = parts[0][1:] # Remove the leading '['
|
|
305
|
+
# Encode brackets in the label part
|
|
306
|
+
label_part_encoded = encode_brackets(label_part)
|
|
307
|
+
# Reconstruct the markdown link with the encoded label
|
|
308
|
+
encoded_label = f"[{label_part_encoded}]({parts[1]}"
|
|
309
|
+
return encoded_label
|
|
310
|
+
|
|
311
|
+
except Exception as e:
|
|
312
|
+
# In case of any other unexpected error, log or print the error and return the original label
|
|
313
|
+
print(f"Error processing label: {label}, error: {e}")
|
|
314
|
+
return label
|
|
315
|
+
|
|
316
|
+
# If none of the conditions above match, return the original label
|
|
317
|
+
return label
|
|
318
|
+
|
|
319
|
+
for column in columns:
|
|
320
|
+
df[column] = df[column].apply(lambda x: encode_label(x) if pd.notnull(x) else x)
|
|
321
|
+
|
|
322
|
+
return df
|
|
247
323
|
|
|
248
324
|
def term_info_parse_object(results, short_form):
|
|
249
325
|
termInfo = {}
|
|
326
|
+
termInfo["SuperTypes"] = []
|
|
327
|
+
termInfo["Tags"] = []
|
|
328
|
+
termInfo["Queries"] = []
|
|
329
|
+
termInfo["IsClass"] = False
|
|
330
|
+
termInfo["IsIndividual"] = False
|
|
331
|
+
termInfo["IsTemplate"] = False
|
|
332
|
+
termInfo["Images"] = {}
|
|
333
|
+
termInfo["Examples"] = {}
|
|
334
|
+
termInfo["Domains"] = {}
|
|
335
|
+
termInfo["Licenses"] = {}
|
|
336
|
+
termInfo["Publications"] = []
|
|
337
|
+
termInfo["Synonyms"] = []
|
|
338
|
+
|
|
250
339
|
if results.hits > 0 and results.docs and len(results.docs) > 0:
|
|
251
340
|
termInfo["Meta"] = {}
|
|
252
341
|
try:
|
|
@@ -255,15 +344,21 @@ def term_info_parse_object(results, short_form):
|
|
|
255
344
|
except KeyError:
|
|
256
345
|
print(f"SOLR doc missing 'term_info': {results.docs[0]}")
|
|
257
346
|
return None
|
|
347
|
+
except Exception as e:
|
|
348
|
+
print(f"Error deserializing term info: {e}")
|
|
349
|
+
return None
|
|
350
|
+
|
|
258
351
|
queries = []
|
|
352
|
+
# Initialize synonyms variable to avoid UnboundLocalError
|
|
353
|
+
synonyms = []
|
|
259
354
|
termInfo["Id"] = vfbTerm.term.core.short_form
|
|
260
|
-
termInfo["Meta"]["Name"] = "[%s](%s)"%(vfbTerm.term.core.label, vfbTerm.term.core.short_form)
|
|
355
|
+
termInfo["Meta"]["Name"] = "[%s](%s)"%(encode_brackets(vfbTerm.term.core.label), vfbTerm.term.core.short_form)
|
|
261
356
|
mainlabel = vfbTerm.term.core.label
|
|
262
|
-
if vfbTerm.term.core.symbol and len(vfbTerm.term.core.symbol) > 0:
|
|
263
|
-
termInfo["Meta"]["Symbol"] = "[%s](%s)"%(vfbTerm.term.core.symbol, vfbTerm.term.core.short_form)
|
|
357
|
+
if hasattr(vfbTerm.term.core, 'symbol') and vfbTerm.term.core.symbol and len(vfbTerm.term.core.symbol) > 0:
|
|
358
|
+
termInfo["Meta"]["Symbol"] = "[%s](%s)"%(encode_brackets(vfbTerm.term.core.symbol), vfbTerm.term.core.short_form)
|
|
264
359
|
mainlabel = vfbTerm.term.core.symbol
|
|
265
360
|
termInfo["Name"] = mainlabel
|
|
266
|
-
termInfo["SuperTypes"] = vfbTerm.term.core.types
|
|
361
|
+
termInfo["SuperTypes"] = vfbTerm.term.core.types if hasattr(vfbTerm.term.core, 'types') else []
|
|
267
362
|
if "Class" in termInfo["SuperTypes"]:
|
|
268
363
|
termInfo["IsClass"] = True
|
|
269
364
|
elif "Individual" in termInfo["SuperTypes"]:
|
|
@@ -271,44 +366,74 @@ def term_info_parse_object(results, short_form):
|
|
|
271
366
|
try:
|
|
272
367
|
# Retrieve tags from the term's unique_facets attribute
|
|
273
368
|
termInfo["Tags"] = vfbTerm.term.core.unique_facets
|
|
274
|
-
except NameError:
|
|
369
|
+
except (NameError, AttributeError):
|
|
275
370
|
# If unique_facets attribute doesn't exist, use the term's types
|
|
276
|
-
termInfo["Tags"] = vfbTerm.term.core.types
|
|
371
|
+
termInfo["Tags"] = vfbTerm.term.core.types if hasattr(vfbTerm.term.core, 'types') else []
|
|
277
372
|
try:
|
|
278
373
|
# Retrieve description from the term's description attribute
|
|
279
374
|
termInfo["Meta"]["Description"] = "%s"%("".join(vfbTerm.term.description))
|
|
280
|
-
except NameError:
|
|
375
|
+
except (NameError, AttributeError):
|
|
281
376
|
pass
|
|
282
377
|
try:
|
|
283
378
|
# Retrieve comment from the term's comment attribute
|
|
284
379
|
termInfo["Meta"]["Comment"] = "%s"%("".join(vfbTerm.term.comment))
|
|
285
|
-
except NameError:
|
|
380
|
+
except (NameError, AttributeError):
|
|
286
381
|
pass
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
if vfbTerm.parents and len(vfbTerm.parents) > 0:
|
|
382
|
+
|
|
383
|
+
if hasattr(vfbTerm, 'parents') and vfbTerm.parents and len(vfbTerm.parents) > 0:
|
|
290
384
|
parents = []
|
|
291
385
|
|
|
292
386
|
# Sort the parents alphabetically
|
|
293
387
|
sorted_parents = sorted(vfbTerm.parents, key=lambda parent: parent.label)
|
|
294
388
|
|
|
295
389
|
for parent in sorted_parents:
|
|
296
|
-
parents.append("[%s](%s)"%(parent.label, parent.short_form))
|
|
390
|
+
parents.append("[%s](%s)"%(encode_brackets(parent.label), parent.short_form))
|
|
297
391
|
termInfo["Meta"]["Types"] = "; ".join(parents)
|
|
298
392
|
|
|
299
|
-
if vfbTerm.relationships and len(vfbTerm.relationships) > 0:
|
|
393
|
+
if hasattr(vfbTerm, 'relationships') and vfbTerm.relationships and len(vfbTerm.relationships) > 0:
|
|
300
394
|
relationships = []
|
|
395
|
+
pubs_from_relationships = [] # New: Collect publication references from relationships
|
|
301
396
|
|
|
302
397
|
# Group relationships by relation type and remove duplicates
|
|
303
398
|
grouped_relationships = {}
|
|
304
399
|
for relationship in vfbTerm.relationships:
|
|
305
|
-
if relationship.relation.short_form:
|
|
400
|
+
if hasattr(relationship.relation, 'short_form') and relationship.relation.short_form:
|
|
306
401
|
relation_key = (relationship.relation.label, relationship.relation.short_form)
|
|
307
|
-
elif relationship.relation.iri:
|
|
402
|
+
elif hasattr(relationship.relation, 'iri') and relationship.relation.iri:
|
|
308
403
|
relation_key = (relationship.relation.label, relationship.relation.iri.split('/')[-1])
|
|
309
|
-
elif relationship.relation.label:
|
|
404
|
+
elif hasattr(relationship.relation, 'label') and relationship.relation.label:
|
|
310
405
|
relation_key = (relationship.relation.label, relationship.relation.label)
|
|
311
|
-
|
|
406
|
+
else:
|
|
407
|
+
# Skip relationships with no identifiable relation
|
|
408
|
+
continue
|
|
409
|
+
|
|
410
|
+
if not hasattr(relationship, 'object') or not hasattr(relationship.object, 'label'):
|
|
411
|
+
# Skip relationships with missing object information
|
|
412
|
+
continue
|
|
413
|
+
|
|
414
|
+
object_key = (relationship.object.label, getattr(relationship.object, 'short_form', ''))
|
|
415
|
+
|
|
416
|
+
# New: Extract publications from this relationship if they exist
|
|
417
|
+
if hasattr(relationship, 'pubs') and relationship.pubs:
|
|
418
|
+
for pub in relationship.pubs:
|
|
419
|
+
if hasattr(pub, 'get_miniref') and pub.get_miniref():
|
|
420
|
+
publication = {}
|
|
421
|
+
publication["title"] = pub.core.label if hasattr(pub, 'core') and hasattr(pub.core, 'label') else ""
|
|
422
|
+
publication["short_form"] = pub.core.short_form if hasattr(pub, 'core') and hasattr(pub.core, 'short_form') else ""
|
|
423
|
+
publication["microref"] = pub.get_microref() if hasattr(pub, 'get_microref') and pub.get_microref() else ""
|
|
424
|
+
|
|
425
|
+
# Add external references
|
|
426
|
+
refs = []
|
|
427
|
+
if hasattr(pub, 'PubMed') and pub.PubMed:
|
|
428
|
+
refs.append(f"http://www.ncbi.nlm.nih.gov/pubmed/?term={pub.PubMed}")
|
|
429
|
+
if hasattr(pub, 'FlyBase') and pub.FlyBase:
|
|
430
|
+
refs.append(f"http://flybase.org/reports/{pub.FlyBase}")
|
|
431
|
+
if hasattr(pub, 'DOI') and pub.DOI:
|
|
432
|
+
refs.append(f"https://doi.org/{pub.DOI}")
|
|
433
|
+
|
|
434
|
+
publication["refs"] = refs
|
|
435
|
+
pubs_from_relationships.append(publication)
|
|
436
|
+
|
|
312
437
|
if relation_key not in grouped_relationships:
|
|
313
438
|
grouped_relationships[relation_key] = set()
|
|
314
439
|
grouped_relationships[relation_key].add(object_key)
|
|
@@ -322,38 +447,21 @@ def term_info_parse_object(results, short_form):
|
|
|
322
447
|
sorted_object_set = sorted(list(object_set))
|
|
323
448
|
relation_objects = []
|
|
324
449
|
for object_key in sorted_object_set:
|
|
325
|
-
relation_objects.append("[%s](%s)" % (object_key[0], object_key[1]))
|
|
326
|
-
relationships.append("[%s](%s): %s" % (relation_key[0], relation_key[1], ', '.join(relation_objects)))
|
|
450
|
+
relation_objects.append("[%s](%s)" % (encode_brackets(object_key[0]), object_key[1]))
|
|
451
|
+
relationships.append("[%s](%s): %s" % (encode_brackets(relation_key[0]), relation_key[1], ', '.join(relation_objects)))
|
|
327
452
|
termInfo["Meta"]["Relationships"] = "; ".join(relationships)
|
|
328
453
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
# Group xrefs by site
|
|
334
|
-
grouped_xrefs = {}
|
|
335
|
-
for xref in vfbTerm.xrefs:
|
|
336
|
-
site_key = (xref.site.label, xref.homepage, xref.icon)
|
|
337
|
-
link_key = (xref.accession, xref.link())
|
|
338
|
-
if site_key not in grouped_xrefs:
|
|
339
|
-
grouped_xrefs[site_key] = set()
|
|
340
|
-
grouped_xrefs[site_key].add(link_key)
|
|
341
|
-
|
|
342
|
-
# Sort the grouped_xrefs by site_key
|
|
343
|
-
sorted_grouped_xrefs = dict(sorted(grouped_xrefs.items()))
|
|
344
|
-
|
|
345
|
-
# Append the grouped xrefs to termInfo
|
|
346
|
-
for site_key, link_set in sorted_grouped_xrefs.items():
|
|
347
|
-
# Sort the link_set by link_key
|
|
348
|
-
sorted_link_set = sorted(list(link_set))
|
|
349
|
-
links = []
|
|
350
|
-
for link_key in sorted_link_set:
|
|
351
|
-
links.append("[%s](%s)" % (link_key[0], link_key[1]))
|
|
352
|
-
if site_key[2]:
|
|
353
|
-
xrefs.append(" [%s](%s): %s" % (site_key[0], site_key[2], site_key[0], site_key[1], ', '.join(links)))
|
|
454
|
+
# New: Add relationship publications to main publications list
|
|
455
|
+
if pubs_from_relationships:
|
|
456
|
+
if "Publications" not in termInfo:
|
|
457
|
+
termInfo["Publications"] = pubs_from_relationships
|
|
354
458
|
else:
|
|
355
|
-
|
|
356
|
-
|
|
459
|
+
# Merge with existing publications, avoiding duplicates by short_form
|
|
460
|
+
existing_pub_short_forms = {pub.get("short_form", "") for pub in termInfo["Publications"]}
|
|
461
|
+
for pub in pubs_from_relationships:
|
|
462
|
+
if pub.get("short_form", "") not in existing_pub_short_forms:
|
|
463
|
+
termInfo["Publications"].append(pub)
|
|
464
|
+
existing_pub_short_forms.add(pub.get("short_form", ""))
|
|
357
465
|
|
|
358
466
|
# If the term has anatomy channel images, retrieve the images and associated information
|
|
359
467
|
if vfbTerm.anatomy_channel_image and len(vfbTerm.anatomy_channel_image) > 0:
|
|
@@ -362,7 +470,7 @@ def term_info_parse_object(results, short_form):
|
|
|
362
470
|
record = {}
|
|
363
471
|
record["id"] = image.anatomy.short_form
|
|
364
472
|
label = image.anatomy.label
|
|
365
|
-
if image.anatomy.symbol
|
|
473
|
+
if image.anatomy.symbol and len(image.anatomy.symbol) > 0:
|
|
366
474
|
label = image.anatomy.symbol
|
|
367
475
|
record["label"] = label
|
|
368
476
|
if not image.channel_image.image.template_anatomy.short_form in images.keys():
|
|
@@ -385,7 +493,7 @@ def term_info_parse_object(results, short_form):
|
|
|
385
493
|
record = {}
|
|
386
494
|
record["id"] = vfbTerm.term.core.short_form
|
|
387
495
|
label = vfbTerm.term.core.label
|
|
388
|
-
if vfbTerm.term.core.symbol
|
|
496
|
+
if vfbTerm.term.core.symbol and len(vfbTerm.term.core.symbol) > 0:
|
|
389
497
|
label = vfbTerm.term.core.symbol
|
|
390
498
|
record["label"] = label
|
|
391
499
|
if not image.image.template_anatomy.short_form in images.keys():
|
|
@@ -480,6 +588,119 @@ def term_info_parse_object(results, short_form):
|
|
|
480
588
|
if contains_all_tags(termInfo["SuperTypes"], ["Individual", "Neuron", "has_neuron_connectivity"]):
|
|
481
589
|
q = NeuronInputsTo_to_schema(termInfo["Name"], {"neuron_short_form": vfbTerm.term.core.short_form})
|
|
482
590
|
queries.append(q)
|
|
591
|
+
|
|
592
|
+
# Add Publications to the termInfo object
|
|
593
|
+
if vfbTerm.pubs and len(vfbTerm.pubs) > 0:
|
|
594
|
+
publications = []
|
|
595
|
+
for pub in vfbTerm.pubs:
|
|
596
|
+
if pub.get_miniref():
|
|
597
|
+
publication = {}
|
|
598
|
+
publication["title"] = pub.core.label if pub.core.label else ""
|
|
599
|
+
publication["short_form"] = pub.core.short_form if pub.core.short_form else ""
|
|
600
|
+
publication["microref"] = pub.get_microref() if hasattr(pub, 'get_microref') and pub.get_microref() else ""
|
|
601
|
+
|
|
602
|
+
# Add external references
|
|
603
|
+
refs = []
|
|
604
|
+
if hasattr(pub, 'PubMed') and pub.PubMed:
|
|
605
|
+
refs.append(f"http://www.ncbi.nlm.nih.gov/pubmed/?term={pub.PubMed}")
|
|
606
|
+
if hasattr(pub, 'FlyBase') and pub.FlyBase:
|
|
607
|
+
refs.append(f"http://flybase.org/reports/{pub.FlyBase}")
|
|
608
|
+
if hasattr(pub, 'DOI') and pub.DOI:
|
|
609
|
+
refs.append(f"https://doi.org/{pub.DOI}")
|
|
610
|
+
|
|
611
|
+
publication["refs"] = refs
|
|
612
|
+
publications.append(publication)
|
|
613
|
+
|
|
614
|
+
termInfo["Publications"] = publications
|
|
615
|
+
|
|
616
|
+
# Add Synonyms for Class entities
|
|
617
|
+
if termInfo["SuperTypes"] and "Class" in termInfo["SuperTypes"] and vfbTerm.pub_syn and len(vfbTerm.pub_syn) > 0:
|
|
618
|
+
synonyms = []
|
|
619
|
+
for syn in vfbTerm.pub_syn:
|
|
620
|
+
if hasattr(syn, 'synonym') and syn.synonym:
|
|
621
|
+
synonym = {}
|
|
622
|
+
synonym["label"] = syn.synonym.label if hasattr(syn.synonym, 'label') else ""
|
|
623
|
+
synonym["scope"] = syn.synonym.scope if hasattr(syn.synonym, 'scope') else "exact"
|
|
624
|
+
synonym["type"] = syn.synonym.type if hasattr(syn.synonym, 'type') else "synonym"
|
|
625
|
+
|
|
626
|
+
# Enhanced publication handling - handle multiple publications
|
|
627
|
+
if hasattr(syn, 'pubs') and syn.pubs:
|
|
628
|
+
pub_refs = []
|
|
629
|
+
for pub in syn.pubs:
|
|
630
|
+
if hasattr(pub, 'get_microref') and pub.get_microref():
|
|
631
|
+
pub_refs.append(pub.get_microref())
|
|
632
|
+
|
|
633
|
+
if pub_refs:
|
|
634
|
+
# Join multiple publication references with commas
|
|
635
|
+
synonym["publication"] = ", ".join(pub_refs)
|
|
636
|
+
# Fallback to single pub if pubs collection not available
|
|
637
|
+
elif hasattr(syn, 'pub') and syn.pub and hasattr(syn.pub, 'get_microref'):
|
|
638
|
+
synonym["publication"] = syn.pub.get_microref()
|
|
639
|
+
|
|
640
|
+
synonyms.append(synonym)
|
|
641
|
+
|
|
642
|
+
# Only add the synonyms if we found any
|
|
643
|
+
if synonyms:
|
|
644
|
+
termInfo["Synonyms"] = synonyms
|
|
645
|
+
|
|
646
|
+
# Alternative approach for extracting synonyms from relationships
|
|
647
|
+
if "Class" in termInfo["SuperTypes"] and vfbTerm.relationships and len(vfbTerm.relationships) > 0:
|
|
648
|
+
synonyms = []
|
|
649
|
+
for relationship in vfbTerm.relationships:
|
|
650
|
+
if (relationship.relation.label == "has_exact_synonym" or
|
|
651
|
+
relationship.relation.label == "has_broad_synonym" or
|
|
652
|
+
relationship.relation.label == "has_narrow_synonym"):
|
|
653
|
+
|
|
654
|
+
synonym = {}
|
|
655
|
+
synonym["label"] = relationship.object.label
|
|
656
|
+
|
|
657
|
+
# Determine scope based on relation type
|
|
658
|
+
if relationship.relation.label == "has_exact_synonym":
|
|
659
|
+
synonym["scope"] = "exact"
|
|
660
|
+
elif relationship.relation.label == "has_broad_synonym":
|
|
661
|
+
synonym["scope"] = "broad"
|
|
662
|
+
elif relationship.relation.label == "has_narrow_synonym":
|
|
663
|
+
synonym["scope"] = "narrow"
|
|
664
|
+
|
|
665
|
+
synonym["type"] = "synonym"
|
|
666
|
+
synonyms.append(synonym)
|
|
667
|
+
|
|
668
|
+
# Only add the synonyms if we found any
|
|
669
|
+
if synonyms and "Synonyms" not in termInfo:
|
|
670
|
+
termInfo["Synonyms"] = synonyms
|
|
671
|
+
|
|
672
|
+
# Special handling for Publication entities
|
|
673
|
+
if termInfo["SuperTypes"] and "Publication" in termInfo["SuperTypes"] and vfbTerm.pub_specific_content:
|
|
674
|
+
publication = {}
|
|
675
|
+
publication["title"] = vfbTerm.pub_specific_content.title if hasattr(vfbTerm.pub_specific_content, 'title') else ""
|
|
676
|
+
publication["short_form"] = vfbTerm.term.core.short_form
|
|
677
|
+
publication["microref"] = termInfo["Name"]
|
|
678
|
+
|
|
679
|
+
# Add external references
|
|
680
|
+
refs = []
|
|
681
|
+
if hasattr(vfbTerm.pub_specific_content, 'PubMed') and vfbTerm.pub_specific_content.PubMed:
|
|
682
|
+
refs.append(f"http://www.ncbi.nlm.nih.gov/pubmed/?term={vfbTerm.pub_specific_content.PubMed}")
|
|
683
|
+
if hasattr(vfbTerm.pub_specific_content, 'FlyBase') and vfbTerm.pub_specific_content.FlyBase:
|
|
684
|
+
refs.append(f"http://flybase.org/reports/{vfbTerm.pub_specific_content.FlyBase}")
|
|
685
|
+
if hasattr(vfbTerm.pub_specific_content, 'DOI') and vfbTerm.pub_specific_content.DOI:
|
|
686
|
+
refs.append(f"https://doi.org/{vfbTerm.pub_specific_content.DOI}")
|
|
687
|
+
|
|
688
|
+
publication["refs"] = refs
|
|
689
|
+
termInfo["Publications"] = [publication]
|
|
690
|
+
|
|
691
|
+
# Append new synonyms to any existing ones
|
|
692
|
+
if synonyms:
|
|
693
|
+
if "Synonyms" not in termInfo:
|
|
694
|
+
termInfo["Synonyms"] = synonyms
|
|
695
|
+
else:
|
|
696
|
+
# Create a set of existing synonym labels to avoid duplicates
|
|
697
|
+
existing_labels = {syn["label"] for syn in termInfo["Synonyms"]}
|
|
698
|
+
# Only append synonyms that don't already exist
|
|
699
|
+
for synonym in synonyms:
|
|
700
|
+
if synonym["label"] not in existing_labels:
|
|
701
|
+
termInfo["Synonyms"].append(synonym)
|
|
702
|
+
existing_labels.add(synonym["label"])
|
|
703
|
+
|
|
483
704
|
# Add the queries to the term info
|
|
484
705
|
termInfo["Queries"] = queries
|
|
485
706
|
|
|
@@ -487,7 +708,12 @@ def term_info_parse_object(results, short_form):
|
|
|
487
708
|
if "Queries" in termInfo:
|
|
488
709
|
termInfo["Queries"] = [query.to_dict() for query in termInfo["Queries"]]
|
|
489
710
|
# print("termInfo object before schema validation:", termInfo)
|
|
490
|
-
|
|
711
|
+
try:
|
|
712
|
+
return TermInfoOutputSchema().load(termInfo)
|
|
713
|
+
except ValidationError as e:
|
|
714
|
+
print(f"Validation error when parsing term info: {e}")
|
|
715
|
+
# Return the raw termInfo as a fallback
|
|
716
|
+
return termInfo
|
|
491
717
|
|
|
492
718
|
def NeuronInputsTo_to_schema(name, take_default):
|
|
493
719
|
query = "NeuronInputsTo"
|
|
@@ -524,7 +750,7 @@ def ListAllAvailableImages_to_schema(name, take_default):
|
|
|
524
750
|
"short_form": {"$and": ["Class", "Anatomy"]},
|
|
525
751
|
"default": take_default,
|
|
526
752
|
}
|
|
527
|
-
preview =
|
|
753
|
+
preview = 5
|
|
528
754
|
preview_columns = ["id","label","tags","thumbnail"]
|
|
529
755
|
|
|
530
756
|
return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
|
|
@@ -549,24 +775,37 @@ def get_term_info(short_form: str, preview: bool = False):
|
|
|
549
775
|
try:
|
|
550
776
|
# Search for the term in the SOLR server
|
|
551
777
|
results = vfb_solr.search('id:' + short_form)
|
|
552
|
-
sanitized_results = serialize_solr_output(results)
|
|
553
|
-
print(sanitized_results)
|
|
554
778
|
# Check if any results were returned
|
|
555
779
|
parsed_object = term_info_parse_object(results, short_form)
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
780
|
+
if parsed_object:
|
|
781
|
+
term_info = fill_query_results(parsed_object)
|
|
782
|
+
if not term_info:
|
|
783
|
+
print("Failed to fill query preview results!")
|
|
784
|
+
return parsed_object
|
|
785
|
+
return parsed_object
|
|
786
|
+
else:
|
|
787
|
+
print(f"No valid term info found for ID '{short_form}'")
|
|
788
|
+
return None
|
|
561
789
|
except ValidationError as e:
|
|
562
790
|
# handle the validation error
|
|
563
791
|
print("Schema validation error when parsing response")
|
|
564
792
|
print("Error details:", e)
|
|
565
793
|
print("Original data:", results)
|
|
566
794
|
print("Parsed object:", parsed_object)
|
|
567
|
-
|
|
795
|
+
return parsed_object
|
|
796
|
+
except IndexError as e:
|
|
568
797
|
print(f"No results found for ID '{short_form}'")
|
|
569
|
-
print("Error
|
|
798
|
+
print("Error details:", e)
|
|
799
|
+
if parsed_object:
|
|
800
|
+
print("Parsed object:", parsed_object)
|
|
801
|
+
if 'term_info' in locals():
|
|
802
|
+
print("Term info:", term_info)
|
|
803
|
+
else:
|
|
804
|
+
print("Error accessing SOLR server!")
|
|
805
|
+
return None
|
|
806
|
+
except Exception as e:
|
|
807
|
+
print(f"Unexpected error when retrieving term info: {type(e).__name__}: {e}")
|
|
808
|
+
return parsed_object
|
|
570
809
|
|
|
571
810
|
def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
|
|
572
811
|
"""
|
|
@@ -615,6 +854,9 @@ def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
|
|
|
615
854
|
# Convert the results to a DataFrame
|
|
616
855
|
df = pd.DataFrame.from_records(dict_cursor(results))
|
|
617
856
|
|
|
857
|
+
columns_to_encode = ['label', 'parent', 'source', 'source_id', 'template', 'dataset', 'license', 'thumbnail']
|
|
858
|
+
df = encode_markdown_links(df, columns_to_encode)
|
|
859
|
+
|
|
618
860
|
if return_dataframe:
|
|
619
861
|
return df
|
|
620
862
|
|
|
@@ -695,6 +937,9 @@ def get_templates(limit: int = -1, return_dataframe: bool = False):
|
|
|
695
937
|
# Convert the results to a DataFrame
|
|
696
938
|
df = pd.DataFrame.from_records(dict_cursor(results))
|
|
697
939
|
|
|
940
|
+
columns_to_encode = ['name', 'dataset', 'license', 'thumbnail']
|
|
941
|
+
df = encode_markdown_links(df, columns_to_encode)
|
|
942
|
+
|
|
698
943
|
template_order = ["VFB_00101567","VFB_00200000","VFB_00017894","VFB_00101384","VFB_00050000","VFB_00049000","VFB_00100000","VFB_00030786","VFB_00110000","VFB_00120000"]
|
|
699
944
|
|
|
700
945
|
order = 1
|
|
@@ -820,6 +1065,9 @@ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_datafram
|
|
|
820
1065
|
# Convert the results to a DataFrame
|
|
821
1066
|
df = pd.DataFrame.from_records(dict_cursor(results))
|
|
822
1067
|
|
|
1068
|
+
columns_to_encode = ['name', 'source', 'source_id', 'thumbnail']
|
|
1069
|
+
df = encode_markdown_links(df, columns_to_encode)
|
|
1070
|
+
|
|
823
1071
|
if return_dataframe:
|
|
824
1072
|
return df
|
|
825
1073
|
else:
|
|
@@ -920,6 +1168,9 @@ def get_individual_neuron_inputs(neuron_short_form: str, return_dataframe=True,
|
|
|
920
1168
|
# Convert the results to a DataFrame
|
|
921
1169
|
df = pd.DataFrame.from_records(dict_cursor(results))
|
|
922
1170
|
|
|
1171
|
+
columns_to_encode = ['Neurotransmitter', 'Type', 'Name', 'Template_Space', 'Imaging_Technique', 'thumbnail']
|
|
1172
|
+
df = encode_markdown_links(df, columns_to_encode)
|
|
1173
|
+
|
|
923
1174
|
# If return_dataframe is True, return the results as a DataFrame
|
|
924
1175
|
if return_dataframe:
|
|
925
1176
|
return df
|
|
@@ -1050,4 +1301,3 @@ def fill_query_results(term_info):
|
|
|
1050
1301
|
else:
|
|
1051
1302
|
print("Preview key not found or preview is 0")
|
|
1052
1303
|
return term_info
|
|
1053
|
-
|