vfbquery 0.2.12__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vfbquery/vfb_queries.py CHANGED
@@ -1,17 +1,28 @@
1
1
  import pysolr
2
2
  from .term_info_queries import deserialize_term_info
3
- from vfb_connect.cross_server_tools import VfbConnect, dict_cursor
3
+ # Replace VfbConnect import with our new SolrTermInfoFetcher
4
+ from .solr_fetcher import SolrTermInfoFetcher
5
+ # Keep dict_cursor if it's used elsewhere - lazy import to avoid GUI issues
4
6
  from marshmallow import Schema, fields, post_load
5
- from typing import List, Tuple
7
+ from typing import List, Tuple, Dict, Any, Union
6
8
  import pandas as pd
7
9
  from marshmallow import ValidationError
8
10
  import json
9
11
 
12
+ # Lazy import for dict_cursor to avoid GUI library issues
13
+ def get_dict_cursor():
14
+ """Lazy import dict_cursor to avoid import issues during testing"""
15
+ try:
16
+ from vfb_connect.cross_server_tools import dict_cursor
17
+ return dict_cursor
18
+ except ImportError as e:
19
+ raise ImportError(f"vfb_connect is required but could not be imported: {e}")
20
+
10
21
  # Connect to the VFB SOLR server
11
22
  vfb_solr = pysolr.Solr('http://solr.virtualflybrain.org/solr/vfb_json/', always_commit=False, timeout=990)
12
23
 
13
- # Create a VFB connection object for retrieving instances
14
- vc = VfbConnect()
24
+ # Replace VfbConnect with SolrTermInfoFetcher
25
+ vc = SolrTermInfoFetcher()
15
26
 
16
27
  class Query:
17
28
  def __init__(self, query, label, function, takes, preview=0, preview_columns=[], preview_results=[], output_format="table", count=-1):
@@ -231,6 +242,8 @@ class TermInfoOutputSchema(Schema):
231
242
  IsTemplate = fields.Bool(missing=False, required=False)
232
243
  Domains = fields.Dict(keys=fields.Integer(), values=fields.Nested(ImageSchema()), required=False, allow_none=True)
233
244
  Licenses = fields.Dict(keys=fields.Integer(), values=fields.Nested(LicenseSchema()), required=False, allow_none=True)
245
+ Publications = fields.List(fields.Dict(keys=fields.String(), values=fields.Field()), required=False)
246
+ Synonyms = fields.List(fields.Dict(keys=fields.String(), values=fields.Field()), required=False, allow_none=True)
234
247
 
235
248
  @post_load
236
249
  def make_term_info(self, data, **kwargs):
@@ -244,9 +257,93 @@ class TermInfoOutputSchema(Schema):
244
257
  term_info_data["Queries"] = [query.to_dict() for query in term_info_data["Queries"]]
245
258
  return str(self.dump(term_info_data))
246
259
 
260
+ def encode_brackets(text):
261
+ """
262
+ Encodes brackets in the given text.
263
+
264
+ :param text: The text to encode.
265
+ :return: The text with brackets encoded.
266
+ """
267
+ return (text.replace('(', '%28')
268
+ .replace(')', '%29')
269
+ .replace('[', '%5B')
270
+ .replace(']', '%5D'))
271
+
272
+ def encode_markdown_links(df, columns):
273
+ """
274
+ Encodes brackets in the labels and titles within markdown links and images, leaving the link syntax intact.
275
+ :param df: DataFrame containing the query results.
276
+ :param columns: List of column names to apply encoding to.
277
+ """
278
+ def encode_label(label):
279
+ if not isinstance(label, str):
280
+ return label
281
+
282
+ try:
283
+ # Process linked images (format: [![alt text](image_url "title")](link))
284
+ if label.startswith("[!["):
285
+ # Split into image part and link part
286
+ parts = label.split(")](")
287
+ if len(parts) < 2:
288
+ return label
289
+
290
+ image_part = parts[0]
291
+ link_part = parts[1]
292
+
293
+ # Process the image part
294
+ image_parts = image_part.split("](")
295
+ if len(image_parts) < 2:
296
+ return label
297
+
298
+ alt_text = image_parts[0][3:] # Remove the "[![" prefix
299
+ # Encode brackets in alt text
300
+ alt_text_encoded = encode_brackets(alt_text)
301
+
302
+ # Reconstruct the linked image with encoded alt text
303
+ encoded_label = f"[![{alt_text_encoded}]({image_parts[1]})]({link_part}"
304
+ return encoded_label
305
+
306
+ # Process regular markdown links
307
+ elif label.startswith("[") and "](" in label:
308
+ parts = label.split("](")
309
+ if len(parts) < 2:
310
+ return label
311
+
312
+ label_part = parts[0][1:] # Remove the leading '['
313
+ # Encode brackets in the label part
314
+ label_part_encoded = encode_brackets(label_part)
315
+ # Reconstruct the markdown link with the encoded label
316
+ encoded_label = f"[{label_part_encoded}]({parts[1]}"
317
+ return encoded_label
318
+
319
+ except Exception as e:
320
+ # In case of any other unexpected error, log or print the error and return the original label
321
+ print(f"Error processing label: {label}, error: {e}")
322
+ return label
323
+
324
+ # If none of the conditions above match, return the original label
325
+ return label
326
+
327
+ for column in columns:
328
+ df[column] = df[column].apply(lambda x: encode_label(x) if pd.notnull(x) else x)
329
+
330
+ return df
247
331
 
248
332
  def term_info_parse_object(results, short_form):
249
333
  termInfo = {}
334
+ termInfo["SuperTypes"] = []
335
+ termInfo["Tags"] = []
336
+ termInfo["Queries"] = []
337
+ termInfo["IsClass"] = False
338
+ termInfo["IsIndividual"] = False
339
+ termInfo["IsTemplate"] = False
340
+ termInfo["Images"] = {}
341
+ termInfo["Examples"] = {}
342
+ termInfo["Domains"] = {}
343
+ termInfo["Licenses"] = {}
344
+ termInfo["Publications"] = []
345
+ termInfo["Synonyms"] = []
346
+
250
347
  if results.hits > 0 and results.docs and len(results.docs) > 0:
251
348
  termInfo["Meta"] = {}
252
349
  try:
@@ -255,15 +352,21 @@ def term_info_parse_object(results, short_form):
255
352
  except KeyError:
256
353
  print(f"SOLR doc missing 'term_info': {results.docs[0]}")
257
354
  return None
355
+ except Exception as e:
356
+ print(f"Error deserializing term info: {e}")
357
+ return None
358
+
258
359
  queries = []
360
+ # Initialize synonyms variable to avoid UnboundLocalError
361
+ synonyms = []
259
362
  termInfo["Id"] = vfbTerm.term.core.short_form
260
- termInfo["Meta"]["Name"] = "[%s](%s)"%(vfbTerm.term.core.label, vfbTerm.term.core.short_form)
363
+ termInfo["Meta"]["Name"] = "[%s](%s)"%(encode_brackets(vfbTerm.term.core.label), vfbTerm.term.core.short_form)
261
364
  mainlabel = vfbTerm.term.core.label
262
- if vfbTerm.term.core.symbol and len(vfbTerm.term.core.symbol) > 0:
263
- termInfo["Meta"]["Symbol"] = "[%s](%s)"%(vfbTerm.term.core.symbol, vfbTerm.term.core.short_form)
365
+ if hasattr(vfbTerm.term.core, 'symbol') and vfbTerm.term.core.symbol and len(vfbTerm.term.core.symbol) > 0:
366
+ termInfo["Meta"]["Symbol"] = "[%s](%s)"%(encode_brackets(vfbTerm.term.core.symbol), vfbTerm.term.core.short_form)
264
367
  mainlabel = vfbTerm.term.core.symbol
265
368
  termInfo["Name"] = mainlabel
266
- termInfo["SuperTypes"] = vfbTerm.term.core.types
369
+ termInfo["SuperTypes"] = vfbTerm.term.core.types if hasattr(vfbTerm.term.core, 'types') else []
267
370
  if "Class" in termInfo["SuperTypes"]:
268
371
  termInfo["IsClass"] = True
269
372
  elif "Individual" in termInfo["SuperTypes"]:
@@ -271,44 +374,74 @@ def term_info_parse_object(results, short_form):
271
374
  try:
272
375
  # Retrieve tags from the term's unique_facets attribute
273
376
  termInfo["Tags"] = vfbTerm.term.core.unique_facets
274
- except NameError:
377
+ except (NameError, AttributeError):
275
378
  # If unique_facets attribute doesn't exist, use the term's types
276
- termInfo["Tags"] = vfbTerm.term.core.types
379
+ termInfo["Tags"] = vfbTerm.term.core.types if hasattr(vfbTerm.term.core, 'types') else []
277
380
  try:
278
381
  # Retrieve description from the term's description attribute
279
382
  termInfo["Meta"]["Description"] = "%s"%("".join(vfbTerm.term.description))
280
- except NameError:
383
+ except (NameError, AttributeError):
281
384
  pass
282
385
  try:
283
386
  # Retrieve comment from the term's comment attribute
284
387
  termInfo["Meta"]["Comment"] = "%s"%("".join(vfbTerm.term.comment))
285
- except NameError:
388
+ except (NameError, AttributeError):
286
389
  pass
287
- except AttributeError:
288
- print(f"vfbTerm.term.comment: {vfbTerm.term}")
289
- if vfbTerm.parents and len(vfbTerm.parents) > 0:
390
+
391
+ if hasattr(vfbTerm, 'parents') and vfbTerm.parents and len(vfbTerm.parents) > 0:
290
392
  parents = []
291
393
 
292
394
  # Sort the parents alphabetically
293
395
  sorted_parents = sorted(vfbTerm.parents, key=lambda parent: parent.label)
294
396
 
295
397
  for parent in sorted_parents:
296
- parents.append("[%s](%s)"%(parent.label, parent.short_form))
398
+ parents.append("[%s](%s)"%(encode_brackets(parent.label), parent.short_form))
297
399
  termInfo["Meta"]["Types"] = "; ".join(parents)
298
400
 
299
- if vfbTerm.relationships and len(vfbTerm.relationships) > 0:
401
+ if hasattr(vfbTerm, 'relationships') and vfbTerm.relationships and len(vfbTerm.relationships) > 0:
300
402
  relationships = []
403
+ pubs_from_relationships = [] # New: Collect publication references from relationships
301
404
 
302
405
  # Group relationships by relation type and remove duplicates
303
406
  grouped_relationships = {}
304
407
  for relationship in vfbTerm.relationships:
305
- if relationship.relation.short_form:
408
+ if hasattr(relationship.relation, 'short_form') and relationship.relation.short_form:
306
409
  relation_key = (relationship.relation.label, relationship.relation.short_form)
307
- elif relationship.relation.iri:
410
+ elif hasattr(relationship.relation, 'iri') and relationship.relation.iri:
308
411
  relation_key = (relationship.relation.label, relationship.relation.iri.split('/')[-1])
309
- elif relationship.relation.label:
412
+ elif hasattr(relationship.relation, 'label') and relationship.relation.label:
310
413
  relation_key = (relationship.relation.label, relationship.relation.label)
311
- object_key = (relationship.object.label, relationship.object.short_form)
414
+ else:
415
+ # Skip relationships with no identifiable relation
416
+ continue
417
+
418
+ if not hasattr(relationship, 'object') or not hasattr(relationship.object, 'label'):
419
+ # Skip relationships with missing object information
420
+ continue
421
+
422
+ object_key = (relationship.object.label, getattr(relationship.object, 'short_form', ''))
423
+
424
+ # New: Extract publications from this relationship if they exist
425
+ if hasattr(relationship, 'pubs') and relationship.pubs:
426
+ for pub in relationship.pubs:
427
+ if hasattr(pub, 'get_miniref') and pub.get_miniref():
428
+ publication = {}
429
+ publication["title"] = pub.core.label if hasattr(pub, 'core') and hasattr(pub.core, 'label') else ""
430
+ publication["short_form"] = pub.core.short_form if hasattr(pub, 'core') and hasattr(pub.core, 'short_form') else ""
431
+ publication["microref"] = pub.get_microref() if hasattr(pub, 'get_microref') and pub.get_microref() else ""
432
+
433
+ # Add external references
434
+ refs = []
435
+ if hasattr(pub, 'PubMed') and pub.PubMed:
436
+ refs.append(f"http://www.ncbi.nlm.nih.gov/pubmed/?term={pub.PubMed}")
437
+ if hasattr(pub, 'FlyBase') and pub.FlyBase:
438
+ refs.append(f"http://flybase.org/reports/{pub.FlyBase}")
439
+ if hasattr(pub, 'DOI') and pub.DOI:
440
+ refs.append(f"https://doi.org/{pub.DOI}")
441
+
442
+ publication["refs"] = refs
443
+ pubs_from_relationships.append(publication)
444
+
312
445
  if relation_key not in grouped_relationships:
313
446
  grouped_relationships[relation_key] = set()
314
447
  grouped_relationships[relation_key].add(object_key)
@@ -322,38 +455,21 @@ def term_info_parse_object(results, short_form):
322
455
  sorted_object_set = sorted(list(object_set))
323
456
  relation_objects = []
324
457
  for object_key in sorted_object_set:
325
- relation_objects.append("[%s](%s)" % (object_key[0], object_key[1]))
326
- relationships.append("[%s](%s): %s" % (relation_key[0], relation_key[1], ', '.join(relation_objects)))
458
+ relation_objects.append("[%s](%s)" % (encode_brackets(object_key[0]), object_key[1]))
459
+ relationships.append("[%s](%s): %s" % (encode_brackets(relation_key[0]), relation_key[1], ', '.join(relation_objects)))
327
460
  termInfo["Meta"]["Relationships"] = "; ".join(relationships)
328
461
 
329
-
330
- if vfbTerm.xrefs and len(vfbTerm.xrefs) > 0:
331
- xrefs = []
332
-
333
- # Group xrefs by site
334
- grouped_xrefs = {}
335
- for xref in vfbTerm.xrefs:
336
- site_key = (xref.site.label, xref.homepage, xref.icon)
337
- link_key = (xref.accession, xref.link())
338
- if site_key not in grouped_xrefs:
339
- grouped_xrefs[site_key] = set()
340
- grouped_xrefs[site_key].add(link_key)
341
-
342
- # Sort the grouped_xrefs by site_key
343
- sorted_grouped_xrefs = dict(sorted(grouped_xrefs.items()))
344
-
345
- # Append the grouped xrefs to termInfo
346
- for site_key, link_set in sorted_grouped_xrefs.items():
347
- # Sort the link_set by link_key
348
- sorted_link_set = sorted(list(link_set))
349
- links = []
350
- for link_key in sorted_link_set:
351
- links.append("[%s](%s)" % (link_key[0], link_key[1]))
352
- if site_key[2]:
353
- xrefs.append("![%s](%s) [%s](%s): %s" % (site_key[0], site_key[2], site_key[0], site_key[1], ', '.join(links)))
462
+ # New: Add relationship publications to main publications list
463
+ if pubs_from_relationships:
464
+ if "Publications" not in termInfo:
465
+ termInfo["Publications"] = pubs_from_relationships
354
466
  else:
355
- xrefs.append("[%s](%s): %s" % (site_key[0], site_key[1], ', '.join(links)))
356
- termInfo["Meta"]["Cross References"] = "; ".join(xrefs)
467
+ # Merge with existing publications, avoiding duplicates by short_form
468
+ existing_pub_short_forms = {pub.get("short_form", "") for pub in termInfo["Publications"]}
469
+ for pub in pubs_from_relationships:
470
+ if pub.get("short_form", "") not in existing_pub_short_forms:
471
+ termInfo["Publications"].append(pub)
472
+ existing_pub_short_forms.add(pub.get("short_form", ""))
357
473
 
358
474
  # If the term has anatomy channel images, retrieve the images and associated information
359
475
  if vfbTerm.anatomy_channel_image and len(vfbTerm.anatomy_channel_image) > 0:
@@ -362,7 +478,7 @@ def term_info_parse_object(results, short_form):
362
478
  record = {}
363
479
  record["id"] = image.anatomy.short_form
364
480
  label = image.anatomy.label
365
- if image.anatomy.symbol != "" and len(image.anatomy.symbol) > 0:
481
+ if image.anatomy.symbol and len(image.anatomy.symbol) > 0:
366
482
  label = image.anatomy.symbol
367
483
  record["label"] = label
368
484
  if not image.channel_image.image.template_anatomy.short_form in images.keys():
@@ -385,7 +501,7 @@ def term_info_parse_object(results, short_form):
385
501
  record = {}
386
502
  record["id"] = vfbTerm.term.core.short_form
387
503
  label = vfbTerm.term.core.label
388
- if vfbTerm.term.core.symbol != "" and len(vfbTerm.term.core.symbol) > 0:
504
+ if vfbTerm.term.core.symbol and len(vfbTerm.term.core.symbol) > 0:
389
505
  label = vfbTerm.term.core.symbol
390
506
  record["label"] = label
391
507
  if not image.image.template_anatomy.short_form in images.keys():
@@ -417,13 +533,29 @@ def term_info_parse_object(results, short_form):
417
533
  images = {}
418
534
  image = vfbTerm.template_channel
419
535
  record = {}
420
- record["id"] = vfbTerm.template_channel.channel.short_form
421
- label = vfbTerm.template_channel.channel.label
536
+
537
+ # Validate that the channel ID matches the template ID (numeric part should be the same)
538
+ template_id = vfbTerm.term.core.short_form
539
+ channel_id = vfbTerm.template_channel.channel.short_form
540
+
541
+ # Extract numeric parts for validation
542
+ if template_id and channel_id:
543
+ template_numeric = template_id.replace("VFB_", "") if template_id.startswith("VFB_") else ""
544
+ channel_numeric = channel_id.replace("VFBc_", "") if channel_id.startswith("VFBc_") else ""
545
+
546
+ if template_numeric != channel_numeric:
547
+ print(f"Warning: Template ID {template_id} does not match channel ID {channel_id}")
548
+ label = vfbTerm.template_channel.channel.label
549
+ record["id"] = channel_id
550
+ else:
551
+ label = vfbTerm.term.core.label
552
+ record["id"] = template_id
553
+
422
554
  if vfbTerm.template_channel.channel.symbol != "" and len(vfbTerm.template_channel.channel.symbol) > 0:
423
555
  label = vfbTerm.template_channel.channel.symbol
424
556
  record["label"] = label
425
- if not vfbTerm.template_channel.channel.short_form in images.keys():
426
- images[vfbTerm.template_channel.channel.short_form]=[]
557
+ if not template_id in images.keys():
558
+ images[template_id]=[]
427
559
  record["thumbnail"] = image.image_thumbnail.replace("http://","https://").replace("thumbnailT.png","thumbnail.png")
428
560
  record["thumbnail_transparent"] = image.image_thumbnail.replace("http://","https://").replace("thumbnail.png","thumbnailT.png")
429
561
  for key in vars(image).keys():
@@ -441,7 +573,7 @@ def term_info_parse_object(results, short_form):
441
573
  record['voxel'] = image.get_voxel()
442
574
  if 'orientation' in image_vars.keys():
443
575
  record['orientation'] = image.orientation
444
- images[vfbTerm.template_channel.channel.short_form].append(record)
576
+ images[template_id].append(record)
445
577
 
446
578
  # Add the thumbnails to the term info
447
579
  termInfo["Images"] = images
@@ -480,6 +612,119 @@ def term_info_parse_object(results, short_form):
480
612
  if contains_all_tags(termInfo["SuperTypes"], ["Individual", "Neuron", "has_neuron_connectivity"]):
481
613
  q = NeuronInputsTo_to_schema(termInfo["Name"], {"neuron_short_form": vfbTerm.term.core.short_form})
482
614
  queries.append(q)
615
+
616
+ # Add Publications to the termInfo object
617
+ if vfbTerm.pubs and len(vfbTerm.pubs) > 0:
618
+ publications = []
619
+ for pub in vfbTerm.pubs:
620
+ if pub.get_miniref():
621
+ publication = {}
622
+ publication["title"] = pub.core.label if pub.core.label else ""
623
+ publication["short_form"] = pub.core.short_form if pub.core.short_form else ""
624
+ publication["microref"] = pub.get_microref() if hasattr(pub, 'get_microref') and pub.get_microref() else ""
625
+
626
+ # Add external references
627
+ refs = []
628
+ if hasattr(pub, 'PubMed') and pub.PubMed:
629
+ refs.append(f"http://www.ncbi.nlm.nih.gov/pubmed/?term={pub.PubMed}")
630
+ if hasattr(pub, 'FlyBase') and pub.FlyBase:
631
+ refs.append(f"http://flybase.org/reports/{pub.FlyBase}")
632
+ if hasattr(pub, 'DOI') and pub.DOI:
633
+ refs.append(f"https://doi.org/{pub.DOI}")
634
+
635
+ publication["refs"] = refs
636
+ publications.append(publication)
637
+
638
+ termInfo["Publications"] = publications
639
+
640
+ # Add Synonyms for Class entities
641
+ if termInfo["SuperTypes"] and "Class" in termInfo["SuperTypes"] and vfbTerm.pub_syn and len(vfbTerm.pub_syn) > 0:
642
+ synonyms = []
643
+ for syn in vfbTerm.pub_syn:
644
+ if hasattr(syn, 'synonym') and syn.synonym:
645
+ synonym = {}
646
+ synonym["label"] = syn.synonym.label if hasattr(syn.synonym, 'label') else ""
647
+ synonym["scope"] = syn.synonym.scope if hasattr(syn.synonym, 'scope') else "exact"
648
+ synonym["type"] = syn.synonym.type if hasattr(syn.synonym, 'type') else "synonym"
649
+
650
+ # Enhanced publication handling - handle multiple publications
651
+ if hasattr(syn, 'pubs') and syn.pubs:
652
+ pub_refs = []
653
+ for pub in syn.pubs:
654
+ if hasattr(pub, 'get_microref') and pub.get_microref():
655
+ pub_refs.append(pub.get_microref())
656
+
657
+ if pub_refs:
658
+ # Join multiple publication references with commas
659
+ synonym["publication"] = ", ".join(pub_refs)
660
+ # Fallback to single pub if pubs collection not available
661
+ elif hasattr(syn, 'pub') and syn.pub and hasattr(syn.pub, 'get_microref'):
662
+ synonym["publication"] = syn.pub.get_microref()
663
+
664
+ synonyms.append(synonym)
665
+
666
+ # Only add the synonyms if we found any
667
+ if synonyms:
668
+ termInfo["Synonyms"] = synonyms
669
+
670
+ # Alternative approach for extracting synonyms from relationships
671
+ if "Class" in termInfo["SuperTypes"] and vfbTerm.relationships and len(vfbTerm.relationships) > 0:
672
+ synonyms = []
673
+ for relationship in vfbTerm.relationships:
674
+ if (relationship.relation.label == "has_exact_synonym" or
675
+ relationship.relation.label == "has_broad_synonym" or
676
+ relationship.relation.label == "has_narrow_synonym"):
677
+
678
+ synonym = {}
679
+ synonym["label"] = relationship.object.label
680
+
681
+ # Determine scope based on relation type
682
+ if relationship.relation.label == "has_exact_synonym":
683
+ synonym["scope"] = "exact"
684
+ elif relationship.relation.label == "has_broad_synonym":
685
+ synonym["scope"] = "broad"
686
+ elif relationship.relation.label == "has_narrow_synonym":
687
+ synonym["scope"] = "narrow"
688
+
689
+ synonym["type"] = "synonym"
690
+ synonyms.append(synonym)
691
+
692
+ # Only add the synonyms if we found any
693
+ if synonyms and "Synonyms" not in termInfo:
694
+ termInfo["Synonyms"] = synonyms
695
+
696
+ # Special handling for Publication entities
697
+ if termInfo["SuperTypes"] and "Publication" in termInfo["SuperTypes"] and vfbTerm.pub_specific_content:
698
+ publication = {}
699
+ publication["title"] = vfbTerm.pub_specific_content.title if hasattr(vfbTerm.pub_specific_content, 'title') else ""
700
+ publication["short_form"] = vfbTerm.term.core.short_form
701
+ publication["microref"] = termInfo["Name"]
702
+
703
+ # Add external references
704
+ refs = []
705
+ if hasattr(vfbTerm.pub_specific_content, 'PubMed') and vfbTerm.pub_specific_content.PubMed:
706
+ refs.append(f"http://www.ncbi.nlm.nih.gov/pubmed/?term={vfbTerm.pub_specific_content.PubMed}")
707
+ if hasattr(vfbTerm.pub_specific_content, 'FlyBase') and vfbTerm.pub_specific_content.FlyBase:
708
+ refs.append(f"http://flybase.org/reports/{vfbTerm.pub_specific_content.FlyBase}")
709
+ if hasattr(vfbTerm.pub_specific_content, 'DOI') and vfbTerm.pub_specific_content.DOI:
710
+ refs.append(f"https://doi.org/{vfbTerm.pub_specific_content.DOI}")
711
+
712
+ publication["refs"] = refs
713
+ termInfo["Publications"] = [publication]
714
+
715
+ # Append new synonyms to any existing ones
716
+ if synonyms:
717
+ if "Synonyms" not in termInfo:
718
+ termInfo["Synonyms"] = synonyms
719
+ else:
720
+ # Create a set of existing synonym labels to avoid duplicates
721
+ existing_labels = {syn["label"] for syn in termInfo["Synonyms"]}
722
+ # Only append synonyms that don't already exist
723
+ for synonym in synonyms:
724
+ if synonym["label"] not in existing_labels:
725
+ termInfo["Synonyms"].append(synonym)
726
+ existing_labels.add(synonym["label"])
727
+
483
728
  # Add the queries to the term info
484
729
  termInfo["Queries"] = queries
485
730
 
@@ -487,7 +732,12 @@ def term_info_parse_object(results, short_form):
487
732
  if "Queries" in termInfo:
488
733
  termInfo["Queries"] = [query.to_dict() for query in termInfo["Queries"]]
489
734
  # print("termInfo object before schema validation:", termInfo)
490
- return TermInfoOutputSchema().load(termInfo)
735
+ try:
736
+ return TermInfoOutputSchema().load(termInfo)
737
+ except ValidationError as e:
738
+ print(f"Validation error when parsing term info: {e}")
739
+ # Return the raw termInfo as a fallback
740
+ return termInfo
491
741
 
492
742
  def NeuronInputsTo_to_schema(name, take_default):
493
743
  query = "NeuronInputsTo"
@@ -549,24 +799,37 @@ def get_term_info(short_form: str, preview: bool = False):
549
799
  try:
550
800
  # Search for the term in the SOLR server
551
801
  results = vfb_solr.search('id:' + short_form)
552
- sanitized_results = serialize_solr_output(results)
553
- print(sanitized_results)
554
802
  # Check if any results were returned
555
803
  parsed_object = term_info_parse_object(results, short_form)
556
- term_info = fill_query_results(parsed_object)
557
- if not term_info:
558
- print("Failed to fill query preview results!")
559
- return term_info
560
- return parsed_object
804
+ if parsed_object:
805
+ term_info = fill_query_results(parsed_object)
806
+ if not term_info:
807
+ print("Failed to fill query preview results!")
808
+ return parsed_object
809
+ return parsed_object
810
+ else:
811
+ print(f"No valid term info found for ID '{short_form}'")
812
+ return None
561
813
  except ValidationError as e:
562
814
  # handle the validation error
563
815
  print("Schema validation error when parsing response")
564
816
  print("Error details:", e)
565
817
  print("Original data:", results)
566
818
  print("Parsed object:", parsed_object)
567
- except IndexError:
819
+ return parsed_object
820
+ except IndexError as e:
568
821
  print(f"No results found for ID '{short_form}'")
569
- print("Error accessing SOLR server!")
822
+ print("Error details:", e)
823
+ if parsed_object:
824
+ print("Parsed object:", parsed_object)
825
+ if 'term_info' in locals():
826
+ print("Term info:", term_info)
827
+ else:
828
+ print("Error accessing SOLR server!")
829
+ return None
830
+ except Exception as e:
831
+ print(f"Unexpected error when retrieving term info: {type(e).__name__}: {e}")
832
+ return parsed_object
570
833
 
571
834
  def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
572
835
  """
@@ -583,7 +846,7 @@ def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
583
846
  RETURN COUNT(r) AS total_count
584
847
  """
585
848
  count_results = vc.nc.commit_list([count_query])
586
- count_df = pd.DataFrame.from_records(dict_cursor(count_results))
849
+ count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
587
850
  total_count = count_df['total_count'][0] if not count_df.empty else 0
588
851
 
589
852
  # Define the main Cypher query
@@ -613,8 +876,11 @@ def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
613
876
  results = vc.nc.commit_list([query])
614
877
 
615
878
  # Convert the results to a DataFrame
616
- df = pd.DataFrame.from_records(dict_cursor(results))
879
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
617
880
 
881
+ columns_to_encode = ['label', 'parent', 'source', 'source_id', 'template', 'dataset', 'license', 'thumbnail']
882
+ df = encode_markdown_links(df, columns_to_encode)
883
+
618
884
  if return_dataframe:
619
885
  return df
620
886
 
@@ -668,7 +934,7 @@ def get_templates(limit: int = -1, return_dataframe: bool = False):
668
934
  RETURN COUNT(DISTINCT t) AS total_count"""
669
935
 
670
936
  count_results = vc.nc.commit_list([count_query])
671
- count_df = pd.DataFrame.from_records(dict_cursor(count_results))
937
+ count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
672
938
  total_count = count_df['total_count'][0] if not count_df.empty else 0
673
939
 
674
940
  # Define the main Cypher query
@@ -693,7 +959,10 @@ def get_templates(limit: int = -1, return_dataframe: bool = False):
693
959
  results = vc.nc.commit_list([query])
694
960
 
695
961
  # Convert the results to a DataFrame
696
- df = pd.DataFrame.from_records(dict_cursor(results))
962
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
963
+
964
+ columns_to_encode = ['name', 'dataset', 'license', 'thumbnail']
965
+ df = encode_markdown_links(df, columns_to_encode)
697
966
 
698
967
  template_order = ["VFB_00101567","VFB_00200000","VFB_00017894","VFB_00101384","VFB_00050000","VFB_00049000","VFB_00100000","VFB_00030786","VFB_00110000","VFB_00120000"]
699
968
 
@@ -792,7 +1061,7 @@ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_datafram
792
1061
  RETURN COUNT(DISTINCT n2) AS total_count"""
793
1062
 
794
1063
  count_results = vc.nc.commit_list([count_query])
795
- count_df = pd.DataFrame.from_records(dict_cursor(count_results))
1064
+ count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
796
1065
  total_count = count_df['total_count'][0] if not count_df.empty else 0
797
1066
 
798
1067
  main_query = f"""MATCH (c1:Class)<-[:INSTANCEOF]-(n1)-[r:has_similar_morphology_to]-(n2)-[:INSTANCEOF]->(c2:Class)
@@ -818,8 +1087,11 @@ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_datafram
818
1087
  results = vc.nc.commit_list([main_query])
819
1088
 
820
1089
  # Convert the results to a DataFrame
821
- df = pd.DataFrame.from_records(dict_cursor(results))
1090
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
822
1091
 
1092
+ columns_to_encode = ['name', 'source', 'source_id', 'thumbnail']
1093
+ df = encode_markdown_links(df, columns_to_encode)
1094
+
823
1095
  if return_dataframe:
824
1096
  return df
825
1097
  else:
@@ -879,7 +1151,7 @@ def get_individual_neuron_inputs(neuron_short_form: str, return_dataframe=True,
879
1151
  RETURN COUNT(DISTINCT c) AS total_count"""
880
1152
 
881
1153
  count_results = vc.nc.commit_list([count_query])
882
- count_df = pd.DataFrame.from_records(dict_cursor(count_results))
1154
+ count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
883
1155
  total_count = count_df['total_count'][0] if not count_df.empty else 0
884
1156
 
885
1157
  # Define the part of the query for normal mode
@@ -918,8 +1190,11 @@ def get_individual_neuron_inputs(neuron_short_form: str, return_dataframe=True,
918
1190
  results = vc.nc.commit_list([query])
919
1191
 
920
1192
  # Convert the results to a DataFrame
921
- df = pd.DataFrame.from_records(dict_cursor(results))
1193
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
922
1194
 
1195
+ columns_to_encode = ['Neurotransmitter', 'Type', 'Name', 'Template_Space', 'Imaging_Technique', 'thumbnail']
1196
+ df = encode_markdown_links(df, columns_to_encode)
1197
+
923
1198
  # If return_dataframe is True, return the results as a DataFrame
924
1199
  if return_dataframe:
925
1200
  return df
@@ -1050,4 +1325,3 @@ def fill_query_results(term_info):
1050
1325
  else:
1051
1326
  print("Preview key not found or preview is 0")
1052
1327
  return term_info
1053
-