vfbquery 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. test/readme_parser.py +29 -27
  2. test/term_info_queries_test.py +46 -34
  3. test/test_dataset_template_queries.py +138 -0
  4. test/test_default_caching.py +89 -84
  5. test/test_examples_code.py +7 -0
  6. test/test_examples_diff.py +95 -172
  7. test/test_expression_overlaps.py +183 -0
  8. test/test_expression_pattern_fragments.py +123 -0
  9. test/test_images_neurons.py +152 -0
  10. test/test_images_that_develop_from.py +112 -0
  11. test/test_lineage_clones_in.py +190 -0
  12. test/test_nblast_queries.py +124 -0
  13. test/test_neuron_classes_fasciculating.py +187 -0
  14. test/test_neuron_inputs.py +193 -0
  15. test/test_neuron_neuron_connectivity.py +89 -0
  16. test/test_neuron_region_connectivity.py +117 -0
  17. test/test_neurons_part_here.py +203 -0
  18. test/test_new_owlery_queries.py +282 -0
  19. test/test_publication_transgene_queries.py +101 -0
  20. test/test_query_performance.py +739 -0
  21. test/test_similar_morphology.py +177 -0
  22. test/test_tracts_nerves_innervating.py +188 -0
  23. test/test_transcriptomics.py +223 -0
  24. vfbquery/__init__.py +47 -35
  25. vfbquery/cached_functions.py +772 -131
  26. vfbquery/neo4j_client.py +120 -0
  27. vfbquery/owlery_client.py +463 -0
  28. vfbquery/solr_cache_integration.py +34 -30
  29. vfbquery/solr_fetcher.py +1 -1
  30. vfbquery/solr_result_cache.py +338 -36
  31. vfbquery/term_info_queries.py +1 -1
  32. vfbquery/vfb_queries.py +2969 -627
  33. vfbquery-0.5.1.dist-info/METADATA +2806 -0
  34. vfbquery-0.5.1.dist-info/RECORD +40 -0
  35. vfbquery-0.4.1.dist-info/METADATA +0 -1315
  36. vfbquery-0.4.1.dist-info/RECORD +0 -19
  37. {vfbquery-0.4.1.dist-info → vfbquery-0.5.1.dist-info}/LICENSE +0 -0
  38. {vfbquery-0.4.1.dist-info → vfbquery-0.5.1.dist-info}/WHEEL +0 -0
  39. {vfbquery-0.4.1.dist-info → vfbquery-0.5.1.dist-info}/top_level.txt +0 -0
vfbquery/vfb_queries.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import pysolr
2
2
  from .term_info_queries import deserialize_term_info
3
- # Replace VfbConnect import with our new SolrTermInfoFetcher
4
- from .solr_fetcher import SolrTermInfoFetcher
3
+ # Replace VfbConnect import with our new SimpleVFBConnect
4
+ from .owlery_client import SimpleVFBConnect
5
5
  # Keep dict_cursor if it's used elsewhere - lazy import to avoid GUI issues
6
6
  from marshmallow import Schema, fields, post_load
7
7
  from typing import List, Tuple, Dict, Any, Union
@@ -11,6 +11,10 @@ import json
11
11
  import numpy as np
12
12
  from urllib.parse import unquote
13
13
  from .solr_result_cache import with_solr_cache
14
+ import time
15
+ import requests
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ import inspect
14
18
 
15
19
  # Custom JSON encoder to handle NumPy and pandas types
16
20
  class NumpyEncoder(json.JSONEncoder):
@@ -49,16 +53,16 @@ def safe_to_dict(df, sort_by_id=True):
49
53
  def get_dict_cursor():
50
54
  """Lazy import dict_cursor to avoid import issues during testing"""
51
55
  try:
52
- from vfb_connect.cross_server_tools import dict_cursor
56
+ from .neo4j_client import dict_cursor
53
57
  return dict_cursor
54
58
  except ImportError as e:
55
- raise ImportError(f"vfb_connect is required but could not be imported: {e}")
59
+ raise ImportError(f"Could not import dict_cursor: {e}")
56
60
 
57
61
  # Connect to the VFB SOLR server
58
62
  vfb_solr = pysolr.Solr('http://solr.virtualflybrain.org/solr/vfb_json/', always_commit=False, timeout=990)
59
63
 
60
- # Replace VfbConnect with SolrTermInfoFetcher
61
- vc = SolrTermInfoFetcher()
64
+ # Replace VfbConnect with SimpleVFBConnect
65
+ vc = SimpleVFBConnect()
62
66
 
63
67
  def initialize_vfb_connect():
64
68
  """
@@ -325,30 +329,54 @@ def encode_markdown_links(df, columns):
325
329
  """
326
330
  Encodes brackets in the labels within markdown links, leaving the link syntax intact.
327
331
  Does NOT encode alt text in linked images ([![...](...)(...)] format).
332
+ Handles multiple comma-separated markdown links in a single string.
328
333
  :param df: DataFrame containing the query results.
329
334
  :param columns: List of column names to apply encoding to.
330
335
  """
336
+ import re
337
+
331
338
  def encode_label(label):
332
339
  if not isinstance(label, str):
333
340
  return label
334
341
 
335
342
  try:
336
- # Skip linked images (format: [![alt text](image_url "title")](link))
337
- # These should NOT be encoded
343
+ # Handle linked images (format: [![alt text](image_url "title")](link))
338
344
  if label.startswith("[!["):
339
- return label
345
+ # Replace http with https in the image URL
346
+ # Pattern: [![anything](http://... "title")](link)
347
+ def secure_image_url(match):
348
+ alt_text = match.group(1)
349
+ image_url = match.group(2)
350
+ title = match.group(3) if match.group(3) else ""
351
+ link = match.group(4)
352
+ secure_url = image_url.replace("http://", "https://")
353
+ if title:
354
+ return f"[![{alt_text}]({secure_url} \"{title}\")]({link})"
355
+ else:
356
+ return f"[![{alt_text}]({secure_url})]({link})"
357
+
358
+ # Regex to match the entire linked image
359
+ pattern = r'\[\!\[([^\]]+)\]\(([^\'"\s]+)(?:\s+[\'"]([^\'"]*)[\'"])?\)\]\(([^)]+)\)'
360
+ encoded_label = re.sub(pattern, secure_image_url, label)
361
+ return encoded_label
340
362
 
341
- # Process regular markdown links
342
- elif label.startswith("[") and "](" in label:
343
- parts = label.split("](")
344
- if len(parts) < 2:
345
- return label
363
+ # Process regular markdown links - handle multiple links separated by commas
364
+ # Pattern matches [label](url) format
365
+ elif "[" in label and "](" in label:
366
+ # Use regex to find all markdown links and encode each one separately
367
+ # Pattern: \[([^\]]+)\]\(([^\)]+)\)
368
+ # Matches: [anything except ]](anything except ))
369
+ def encode_single_link(match):
370
+ label_part = match.group(1) # The label part (between [ and ])
371
+ url_part = match.group(2) # The URL part (between ( and ))
372
+ # Encode brackets in the label part only
373
+ label_part_encoded = encode_brackets(label_part)
374
+ # Ensure URLs use https
375
+ url_part_secure = url_part.replace("http://", "https://")
376
+ return f"[{label_part_encoded}]({url_part_secure})"
346
377
 
347
- label_part = parts[0][1:] # Remove the leading '['
348
- # Encode brackets in the label part
349
- label_part_encoded = encode_brackets(label_part)
350
- # Reconstruct the markdown link with the encoded label
351
- encoded_label = f"[{label_part_encoded}]({parts[1]}"
378
+ # Replace all markdown links with their encoded versions
379
+ encoded_label = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', encode_single_link, label)
352
380
  return encoded_label
353
381
 
354
382
  except Exception as e:
@@ -360,7 +388,9 @@ def encode_markdown_links(df, columns):
360
388
  return label
361
389
 
362
390
  for column in columns:
363
- df[column] = df[column].apply(lambda x: encode_label(x) if pd.notnull(x) else x)
391
+ # Only encode if the column exists in the DataFrame
392
+ if column in df.columns:
393
+ df[column] = df[column].apply(lambda x: encode_label(x) if pd.notnull(x) else x)
364
394
 
365
395
  return df
366
396
 
@@ -657,6 +687,205 @@ def term_info_parse_object(results, short_form):
657
687
  if contains_all_tags(termInfo["SuperTypes"], ["Individual", "Neuron", "has_neuron_connectivity"]):
658
688
  q = NeuronInputsTo_to_schema(termInfo["Name"], {"neuron_short_form": vfbTerm.term.core.short_form})
659
689
  queries.append(q)
690
+ # NeuronNeuronConnectivity query - neurons connected to this neuron
691
+ q = NeuronNeuronConnectivityQuery_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
692
+ queries.append(q)
693
+
694
+ # NeuronsPartHere query - for Class+Anatomy terms (synaptic neuropils, etc.)
695
+ # Matches XMI criteria: Class + Synaptic_neuropil, or other anatomical regions
696
+ if contains_all_tags(termInfo["SuperTypes"], ["Class"]) and (
697
+ "Synaptic_neuropil" in termInfo["SuperTypes"] or
698
+ "Anatomy" in termInfo["SuperTypes"]
699
+ ):
700
+ q = NeuronsPartHere_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
701
+ queries.append(q)
702
+
703
+ # NeuronsSynaptic query - for synaptic neuropils and visual systems
704
+ # Matches XMI criteria: Class + (Synaptic_neuropil OR Visual_system OR Synaptic_neuropil_domain)
705
+ if contains_all_tags(termInfo["SuperTypes"], ["Class"]) and (
706
+ "Synaptic_neuropil" in termInfo["SuperTypes"] or
707
+ "Visual_system" in termInfo["SuperTypes"] or
708
+ "Synaptic_neuropil_domain" in termInfo["SuperTypes"]
709
+ ):
710
+ q = NeuronsSynaptic_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
711
+ queries.append(q)
712
+
713
+ # NeuronsPresynapticHere query - for synaptic neuropils and visual systems
714
+ # Matches XMI criteria: Class + (Synaptic_neuropil OR Visual_system OR Synaptic_neuropil_domain)
715
+ if contains_all_tags(termInfo["SuperTypes"], ["Class"]) and (
716
+ "Synaptic_neuropil" in termInfo["SuperTypes"] or
717
+ "Visual_system" in termInfo["SuperTypes"] or
718
+ "Synaptic_neuropil_domain" in termInfo["SuperTypes"]
719
+ ):
720
+ q = NeuronsPresynapticHere_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
721
+ queries.append(q)
722
+
723
+ # NeuronsPostsynapticHere query - for synaptic neuropils and visual systems
724
+ # Matches XMI criteria: Class + (Synaptic_neuropil OR Visual_system OR Synaptic_neuropil_domain)
725
+ if contains_all_tags(termInfo["SuperTypes"], ["Class"]) and (
726
+ "Synaptic_neuropil" in termInfo["SuperTypes"] or
727
+ "Visual_system" in termInfo["SuperTypes"] or
728
+ "Synaptic_neuropil_domain" in termInfo["SuperTypes"]
729
+ ):
730
+ q = NeuronsPostsynapticHere_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
731
+ queries.append(q)
732
+
733
+ # ComponentsOf query - for clones
734
+ # Matches XMI criteria: Class + Clone
735
+ if contains_all_tags(termInfo["SuperTypes"], ["Class", "Clone"]):
736
+ q = ComponentsOf_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
737
+ queries.append(q)
738
+
739
+ # PartsOf query - for any Class
740
+ # Matches XMI criteria: Class (any)
741
+ if contains_all_tags(termInfo["SuperTypes"], ["Class"]):
742
+ q = PartsOf_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
743
+ queries.append(q)
744
+
745
+ # SubclassesOf query - for any Class
746
+ # Matches XMI criteria: Class (any)
747
+ if contains_all_tags(termInfo["SuperTypes"], ["Class"]):
748
+ q = SubclassesOf_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
749
+ queries.append(q)
750
+
751
+ # NeuronClassesFasciculatingHere query - for tracts/nerves
752
+ # Matches XMI criteria: Class + Tract_or_nerve (VFB uses Neuron_projection_bundle type)
753
+ if contains_all_tags(termInfo["SuperTypes"], ["Class"]) and "Neuron_projection_bundle" in termInfo["SuperTypes"]:
754
+ q = NeuronClassesFasciculatingHere_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
755
+ queries.append(q)
756
+
757
+ # TractsNervesInnervatingHere query - for synaptic neuropils
758
+ # Matches XMI criteria: Class + (Synaptic_neuropil OR Synaptic_neuropil_domain)
759
+ if contains_all_tags(termInfo["SuperTypes"], ["Class"]) and (
760
+ "Synaptic_neuropil" in termInfo["SuperTypes"] or
761
+ "Synaptic_neuropil_domain" in termInfo["SuperTypes"]
762
+ ):
763
+ q = TractsNervesInnervatingHere_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
764
+ queries.append(q)
765
+
766
+ # LineageClonesIn query - for synaptic neuropils
767
+ # Matches XMI criteria: Class + (Synaptic_neuropil OR Synaptic_neuropil_domain)
768
+ if contains_all_tags(termInfo["SuperTypes"], ["Class"]) and (
769
+ "Synaptic_neuropil" in termInfo["SuperTypes"] or
770
+ "Synaptic_neuropil_domain" in termInfo["SuperTypes"]
771
+ ):
772
+ q = LineageClonesIn_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
773
+ queries.append(q)
774
+
775
+ # ImagesNeurons query - for synaptic neuropils
776
+ # Matches XMI criteria: Class + (Synaptic_neuropil OR Synaptic_neuropil_domain)
777
+ # Returns individual neuron images (instances) rather than neuron classes
778
+ if contains_all_tags(termInfo["SuperTypes"], ["Class"]) and (
779
+ "Synaptic_neuropil" in termInfo["SuperTypes"] or
780
+ "Synaptic_neuropil_domain" in termInfo["SuperTypes"]
781
+ ):
782
+ q = ImagesNeurons_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
783
+ queries.append(q)
784
+
785
+ # ImagesThatDevelopFrom query - for neuroblasts
786
+ # Matches XMI criteria: Class + Neuroblast
787
+ # Returns individual neuron images that develop from the neuroblast
788
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Class", "Neuroblast"]):
789
+ q = ImagesThatDevelopFrom_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
790
+ queries.append(q)
791
+
792
+ # epFrag query - for expression patterns
793
+ # Matches XMI criteria: Class + Expression_pattern
794
+ # Returns individual expression pattern fragment images
795
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Class", "Expression_pattern"]):
796
+ q = epFrag_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
797
+ queries.append(q)
798
+
799
+ # ExpressionOverlapsHere query - for anatomical regions
800
+ # Matches XMI criteria: Class + Anatomy
801
+ # Returns expression patterns that overlap with the anatomical region
802
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Class", "Anatomy"]):
803
+ q = ExpressionOverlapsHere_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
804
+ queries.append(q)
805
+
806
+ # anatScRNAseqQuery query - for anatomical regions with scRNAseq data
807
+ # Matches XMI criteria: Class + Anatomy + hasScRNAseq
808
+ # Returns scRNAseq clusters and datasets for the anatomical region
809
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Class", "Anatomy", "hasScRNAseq"]):
810
+ q = anatScRNAseqQuery_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
811
+ queries.append(q)
812
+
813
+ # clusterExpression query - for clusters
814
+ # Matches XMI criteria: Individual + Cluster
815
+ # Returns genes expressed in the cluster
816
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Individual", "Cluster"]):
817
+ q = clusterExpression_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
818
+ queries.append(q)
819
+
820
+ # expressionCluster query - for genes with scRNAseq data
821
+ # Matches XMI criteria: Class + Gene + hasScRNAseq
822
+ # Returns clusters expressing the gene
823
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Class", "Gene", "hasScRNAseq"]):
824
+ q = expressionCluster_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
825
+ queries.append(q)
826
+
827
+ # scRNAdatasetData query - for scRNAseq datasets
828
+ # Matches XMI criteria: DataSet + hasScRNAseq
829
+ # Returns all clusters in the dataset
830
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["DataSet", "hasScRNAseq"]):
831
+ q = scRNAdatasetData_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
832
+ queries.append(q)
833
+
834
+ # NBLAST similarity queries
835
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Individual", "Neuron", "NBLASTexp"]):
836
+ q = SimilarMorphologyToPartOf_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
837
+ queries.append(q)
838
+
839
+ # SimilarMorphologyToPartOfexp query - reverse NBLASTexp
840
+ # Matches XMI criteria: (Individual + Expression_pattern + NBLASTexp) OR (Individual + Expression_pattern_fragment + NBLASTexp)
841
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Individual", "NBLASTexp"]) and (
842
+ "Expression_pattern" in termInfo["SuperTypes"] or
843
+ "Expression_pattern_fragment" in termInfo["SuperTypes"]
844
+ ):
845
+ q = SimilarMorphologyToPartOfexp_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
846
+ queries.append(q)
847
+
848
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Individual", "neuronbridge"]):
849
+ q = SimilarMorphologyToNB_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
850
+ queries.append(q)
851
+
852
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Individual", "Expression_pattern", "neuronbridge"]):
853
+ q = SimilarMorphologyToNBexp_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
854
+ queries.append(q)
855
+
856
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Individual", "UNBLAST"]):
857
+ q = SimilarMorphologyToUserData_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
858
+ queries.append(q)
859
+
860
+ # Dataset/Template queries
861
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Template", "Individual"]):
862
+ q = PaintedDomains_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
863
+ queries.append(q)
864
+ q2 = AllAlignedImages_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
865
+ queries.append(q2)
866
+ q3 = AlignedDatasets_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
867
+ queries.append(q3)
868
+
869
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["DataSet", "has_image"]):
870
+ q = DatasetImages_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
871
+ queries.append(q)
872
+
873
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Template"]):
874
+ q = AllDatasets_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
875
+ queries.append(q)
876
+
877
+ # Publication query
878
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Individual", "pub"]):
879
+ q = TermsForPub_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
880
+ queries.append(q)
881
+
882
+ # Transgene expression query
883
+ # Matches XMI criteria: (Class + Nervous_system + Anatomy) OR (Class + Nervous_system + Neuron)
884
+ if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Class", "Nervous_system"]) and (
885
+ "Anatomy" in termInfo["SuperTypes"] or "Neuron" in termInfo["SuperTypes"]
886
+ ):
887
+ q = TransgeneExpressionHere_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
888
+ queries.append(q)
660
889
 
661
890
  # Add Publications to the termInfo object
662
891
  if vfbTerm.pubs and len(vfbTerm.pubs) > 0:
@@ -692,7 +921,6 @@ def term_info_parse_object(results, short_form):
692
921
  synonym["scope"] = syn.synonym.scope if hasattr(syn.synonym, 'scope') else "exact"
693
922
  synonym["type"] = syn.synonym.type if hasattr(syn.synonym, 'type') else "synonym"
694
923
 
695
- # Enhanced publication handling - handle multiple publications
696
924
  if hasattr(syn, 'pubs') and syn.pubs:
697
925
  pub_refs = []
698
926
  for pub in syn.pubs:
@@ -824,728 +1052,2805 @@ def ListAllAvailableImages_to_schema(name, take_default):
824
1052
 
825
1053
  return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
826
1054
 
827
- def serialize_solr_output(results):
828
- # Create a copy of the document and remove Solr-specific fields
829
- doc = dict(results.docs[0])
830
- # Remove the _version_ field which can cause serialization issues with large integers
831
- doc.pop('_version_', None)
1055
+ def NeuronsPartHere_to_schema(name, take_default):
1056
+ """
1057
+ Schema for NeuronsPartHere query.
1058
+ Finds neuron classes that have some part overlapping with the specified anatomical region.
832
1059
 
833
- # Serialize the sanitized dictionary to JSON using NumpyEncoder
834
- json_string = json.dumps(doc, ensure_ascii=False, cls=NumpyEncoder)
835
- json_string = json_string.replace('\\', '')
836
- json_string = json_string.replace('"{', '{')
837
- json_string = json_string.replace('}"', '}')
838
- json_string = json_string.replace("\'", '-')
839
- return json_string
840
-
841
- @with_solr_cache('term_info')
842
- def get_term_info(short_form: str, preview: bool = False):
1060
+ Matching criteria from XMI:
1061
+ - Class + Synaptic_neuropil (types.1 + types.5)
1062
+ - Additional type matches for comprehensive coverage
1063
+
1064
+ Query chain: Owlery subclass query → process → SOLR
1065
+ OWL query: "Neuron and overlaps some $ID"
843
1066
  """
844
- Retrieves the term info for the given term short form.
845
- Results are cached in SOLR for 3 months to improve performance.
1067
+ query = "NeuronsPartHere"
1068
+ label = f"Neurons with some part in {name}"
1069
+ function = "get_neurons_with_part_in"
1070
+ takes = {
1071
+ "short_form": {"$and": ["Class", "Anatomy"]},
1072
+ "default": take_default,
1073
+ }
1074
+ preview = 5 # Show 5 preview results with example images
1075
+ preview_columns = ["id", "label", "tags", "thumbnail"]
846
1076
 
847
- :param short_form: short form of the term
848
- :return: term info
849
- """
850
- parsed_object = None
851
- try:
852
- # Search for the term in the SOLR server
853
- results = vfb_solr.search('id:' + short_form)
854
- # Check if any results were returned
855
- parsed_object = term_info_parse_object(results, short_form)
856
- if parsed_object:
857
- # Only try to fill query results if there are queries to fill
858
- if parsed_object.get('Queries') and len(parsed_object['Queries']) > 0:
859
- try:
860
- term_info = fill_query_results(parsed_object)
861
- if term_info:
862
- return term_info
863
- else:
864
- print("Failed to fill query preview results!")
865
- # Set default values for queries when fill_query_results fails
866
- for query in parsed_object.get('Queries', []):
867
- # Set default preview_results structure
868
- query['preview_results'] = {'headers': query.get('preview_columns', ['id', 'label', 'tags', 'thumbnail']), 'rows': []}
869
- # Set count to 0 when we can't get the real count
870
- query['count'] = 0
871
- return parsed_object
872
- except Exception as e:
873
- print(f"Error filling query results (setting default values): {e}")
874
- # Set default values for queries when fill_query_results fails
875
- for query in parsed_object.get('Queries', []):
876
- # Set default preview_results structure
877
- query['preview_results'] = {'headers': query.get('preview_columns', ['id', 'label', 'tags', 'thumbnail']), 'rows': []}
878
- # Set count to 0 when we can't get the real count
879
- query['count'] = 0
880
- return parsed_object
881
- else:
882
- # No queries to fill, return parsed object directly
883
- return parsed_object
884
- else:
885
- print(f"No valid term info found for ID '{short_form}'")
886
- return None
887
- except ValidationError as e:
888
- # handle the validation error
889
- print("Schema validation error when parsing response")
890
- print("Error details:", e)
891
- print("Original data:", results)
892
- print("Parsed object:", parsed_object)
893
- return parsed_object
894
- except IndexError as e:
895
- print(f"No results found for ID '{short_form}'")
896
- print("Error details:", e)
897
- if parsed_object:
898
- print("Parsed object:", parsed_object)
899
- if 'term_info' in locals():
900
- print("Term info:", term_info)
901
- else:
902
- print("Error accessing SOLR server!")
903
- return None
904
- except Exception as e:
905
- print(f"Unexpected error when retrieving term info: {type(e).__name__}: {e}")
906
- return parsed_object
1077
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
907
1078
 
908
- @with_solr_cache('instances')
909
- def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
910
- """
911
- Retrieves available instances for the given class short form.
912
- Uses SOLR term_info data when Neo4j is unavailable (fallback mode).
913
- :param short_form: short form of the class
914
- :param limit: maximum number of results to return (default -1, returns all results)
915
- :return: results rows
1079
+
1080
+ def NeuronsSynaptic_to_schema(name, take_default):
916
1081
  """
1082
+ Schema for NeuronsSynaptic query.
1083
+ Finds neuron classes that have synaptic terminals in the specified anatomical region.
917
1084
 
918
- try:
919
- # Try to use original Neo4j implementation first
920
- # Get the total count of rows
921
- count_query = f"""
922
- MATCH (i:Individual:has_image)-[:INSTANCEOF]->(p:Class {{ short_form: '{short_form}' }}),
923
- (i)<-[:depicts]-(:Individual)-[r:in_register_with]->(:Template)
924
- RETURN COUNT(r) AS total_count
925
- """
926
- count_results = vc.nc.commit_list([count_query])
927
- count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
928
- total_count = count_df['total_count'][0] if not count_df.empty else 0
1085
+ Matching criteria from XMI:
1086
+ - Class + Synaptic_neuropil
1087
+ - Class + Visual_system
1088
+ - Class + Synaptic_neuropil_domain
1089
+
1090
+ Query chain: Owlery subclass query → process → SOLR
1091
+ OWL query: "Neuron and has_synaptic_terminals_in some $ID"
1092
+ """
1093
+ query = "NeuronsSynaptic"
1094
+ label = f"Neurons with synaptic terminals in {name}"
1095
+ function = "get_neurons_with_synapses_in"
1096
+ takes = {
1097
+ "short_form": {"$and": ["Class", "Anatomy"]},
1098
+ "default": take_default,
1099
+ }
1100
+ preview = 5
1101
+ preview_columns = ["id", "label", "tags", "thumbnail"]
929
1102
 
930
- # Define the main Cypher query
931
- query = f"""
932
- MATCH (i:Individual:has_image)-[:INSTANCEOF]->(p:Class {{ short_form: '{short_form}' }}),
933
- (i)<-[:depicts]-(:Individual)-[r:in_register_with]->(:Template)-[:depicts]->(templ:Template),
934
- (i)-[:has_source]->(ds:DataSet)
935
- OPTIONAL MATCH (i)-[rx:database_cross_reference]->(site:Site)
936
- OPTIONAL MATCH (ds)-[:license|licence]->(lic:License)
937
- RETURN i.short_form as id,
938
- apoc.text.format("[%s](%s)",[COALESCE(i.symbol[0],i.label),i.short_form]) AS label,
939
- apoc.text.join(i.uniqueFacets, '|') AS tags,
940
- apoc.text.format("[%s](%s)",[COALESCE(p.symbol[0],p.label),p.short_form]) AS parent,
941
- REPLACE(apoc.text.format("[%s](%s)",[COALESCE(site.symbol[0],site.label),site.short_form]), '[null](null)', '') AS source,
942
- REPLACE(apoc.text.format("[%s](%s)",[rx.accession[0],site.link_base[0] + rx.accession[0]]), '[null](null)', '') AS source_id,
943
- apoc.text.format("[%s](%s)",[COALESCE(templ.symbol[0],templ.label),templ.short_form]) AS template,
944
- apoc.text.format("[%s](%s)",[COALESCE(ds.symbol[0],ds.label),ds.short_form]) AS dataset,
945
- REPLACE(apoc.text.format("[%s](%s)",[COALESCE(lic.symbol[0],lic.label),lic.short_form]), '[null](null)', '') AS license,
946
- REPLACE(apoc.text.format("[![%s](%s '%s')](%s)",[COALESCE(i.symbol[0],i.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), REPLACE(COALESCE(r.thumbnail[0],""),"thumbnailT.png","thumbnail.png"), COALESCE(i.symbol[0],i.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), templ.short_form + "," + i.short_form]), "[![null]( 'null')](null)", "") as thumbnail
947
- ORDER BY id Desc
948
- """
1103
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
949
1104
 
950
- if limit != -1:
951
- query += f" LIMIT {limit}"
952
1105
 
953
- # Run the query using VFB_connect
954
- results = vc.nc.commit_list([query])
955
-
956
- # Convert the results to a DataFrame
957
- df = pd.DataFrame.from_records(get_dict_cursor()(results))
1106
+ def NeuronsPresynapticHere_to_schema(name, take_default):
1107
+ """
1108
+ Schema for NeuronsPresynapticHere query.
1109
+ Finds neuron classes that have presynaptic terminals in the specified anatomical region.
1110
+
1111
+ Matching criteria from XMI:
1112
+ - Class + Synaptic_neuropil
1113
+ - Class + Visual_system
1114
+ - Class + Synaptic_neuropil_domain
1115
+
1116
+ Query chain: Owlery subclass query → process → SOLR
1117
+ OWL query: "Neuron and has_presynaptic_terminal_in some $ID"
1118
+ """
1119
+ query = "NeuronsPresynapticHere"
1120
+ label = f"Neurons with presynaptic terminals in {name}"
1121
+ function = "get_neurons_with_presynaptic_terminals_in"
1122
+ takes = {
1123
+ "short_form": {"$and": ["Class", "Anatomy"]},
1124
+ "default": take_default,
1125
+ }
1126
+ preview = 5
1127
+ preview_columns = ["id", "label", "tags", "thumbnail"]
958
1128
 
959
- columns_to_encode = ['label', 'parent', 'source', 'source_id', 'template', 'dataset', 'license', 'thumbnail']
960
- df = encode_markdown_links(df, columns_to_encode)
961
-
962
- if return_dataframe:
963
- return df
1129
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
964
1130
 
965
- # Format the results
966
- formatted_results = {
967
- "headers": _get_instances_headers(),
968
- "rows": [
969
- {
970
- key: row[key]
971
- for key in [
972
- "id",
973
- "label",
974
- "tags",
975
- "parent",
976
- "source",
977
- "source_id",
978
- "template",
979
- "dataset",
980
- "license",
981
- "thumbnail"
982
- ]
983
- }
984
- for row in safe_to_dict(df)
985
- ],
986
- "count": total_count
987
- }
988
1131
 
989
- return formatted_results
990
-
991
- except Exception as e:
992
- # Fallback to SOLR-based implementation when Neo4j is unavailable
993
- print(f"Neo4j unavailable ({e}), using SOLR fallback for get_instances")
994
- return _get_instances_from_solr(short_form, return_dataframe, limit)
1132
+ def NeuronsPostsynapticHere_to_schema(name, take_default):
1133
+ """
1134
+ Schema for NeuronsPostsynapticHere query.
1135
+ Finds neuron classes that have postsynaptic terminals in the specified anatomical region.
1136
+
1137
+ Matching criteria from XMI:
1138
+ - Class + Synaptic_neuropil
1139
+ - Class + Visual_system
1140
+ - Class + Synaptic_neuropil_domain
1141
+
1142
+ Query chain: Owlery subclass query → process → SOLR
1143
+ OWL query: "Neuron and has_postsynaptic_terminal_in some $ID"
1144
+ """
1145
+ query = "NeuronsPostsynapticHere"
1146
+ label = f"Neurons with postsynaptic terminals in {name}"
1147
+ function = "get_neurons_with_postsynaptic_terminals_in"
1148
+ takes = {
1149
+ "short_form": {"$and": ["Class", "Anatomy"]},
1150
+ "default": take_default,
1151
+ }
1152
+ preview = 5
1153
+ preview_columns = ["id", "label", "tags", "thumbnail"]
995
1154
 
996
- def _get_instances_from_solr(short_form: str, return_dataframe=True, limit: int = -1):
1155
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1156
+
1157
+
1158
+ def ComponentsOf_to_schema(name, take_default):
997
1159
  """
998
- SOLR-based fallback implementation for get_instances.
999
- Extracts instance data from term_info anatomy_channel_image array.
1160
+ Schema for ComponentsOf query.
1161
+ Finds components (parts) of the specified anatomical class.
1162
+
1163
+ Matching criteria from XMI:
1164
+ - Class + Clone
1165
+
1166
+ Query chain: Owlery part_of query → process → SOLR
1167
+ OWL query: "part_of some $ID"
1000
1168
  """
1001
- try:
1169
+ query = "ComponentsOf"
1170
+ label = f"Components of {name}"
1171
+ function = "get_components_of"
1172
+ takes = {
1173
+ "short_form": {"$and": ["Class", "Anatomy"]},
1174
+ "default": take_default,
1175
+ }
1176
+ preview = 5
1177
+ preview_columns = ["id", "label", "tags", "thumbnail"]
1178
+
1179
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1180
+
1181
+
1182
+ def PartsOf_to_schema(name, take_default):
1183
+ """
1184
+ Schema for PartsOf query.
1185
+ Finds parts of the specified anatomical class.
1186
+
1187
+ Matching criteria from XMI:
1188
+ - Class (any)
1189
+
1190
+ Query chain: Owlery part_of query → process → SOLR
1191
+ OWL query: "part_of some $ID"
1192
+ """
1193
+ query = "PartsOf"
1194
+ label = f"Parts of {name}"
1195
+ function = "get_parts_of"
1196
+ takes = {
1197
+ "short_form": {"$and": ["Class"]},
1198
+ "default": take_default,
1199
+ }
1200
+ preview = 5
1201
+ preview_columns = ["id", "label", "tags", "thumbnail"]
1202
+
1203
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1204
+
1205
+
1206
+ def SubclassesOf_to_schema(name, take_default):
1207
+ """
1208
+ Schema for SubclassesOf query.
1209
+ Finds subclasses of the specified class.
1210
+
1211
+ Matching criteria from XMI:
1212
+ - Class (any)
1213
+
1214
+ Query chain: Owlery subclasses query → process → SOLR
1215
+ OWL query: Direct subclasses of $ID
1216
+ """
1217
+ query = "SubclassesOf"
1218
+ label = f"Subclasses of {name}"
1219
+ function = "get_subclasses_of"
1220
+ takes = {
1221
+ "short_form": {"$and": ["Class"]},
1222
+ "default": take_default,
1223
+ }
1224
+ preview = 5
1225
+ preview_columns = ["id", "label", "tags", "thumbnail"]
1226
+
1227
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1228
+
1229
+
1230
+ def NeuronClassesFasciculatingHere_to_schema(name, take_default):
1231
+ """
1232
+ Schema for NeuronClassesFasciculatingHere query.
1233
+ Finds neuron classes that fascicululate with (run along) a tract or nerve.
1234
+
1235
+ Matching criteria from XMI:
1236
+ - Class + Tract_or_nerve (VFB uses Neuron_projection_bundle type)
1237
+
1238
+ Query chain: Owlery subclass query → process → SOLR
1239
+ OWL query: 'Neuron' that 'fasciculates with' some '{short_form}'
1240
+ """
1241
+ query = "NeuronClassesFasciculatingHere"
1242
+ label = f"Neurons fasciculating in {name}"
1243
+ function = "get_neuron_classes_fasciculating_here"
1244
+ takes = {
1245
+ "short_form": {"$and": ["Class", "Neuron_projection_bundle"]},
1246
+ "default": take_default,
1247
+ }
1248
+ preview = 5
1249
+ preview_columns = ["id", "label", "tags", "thumbnail"]
1250
+
1251
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1252
+
1253
+
1254
+ def NeuronNeuronConnectivityQuery_to_schema(name, take_default):
1255
+ """
1256
+ Schema for neuron_neuron_connectivity_query.
1257
+ Finds neurons connected to the specified neuron.
1258
+ Matching criteria from XMI: Connected_neuron
1259
+ Query chain: Neo4j compound query → process
1260
+ """
1261
+ query = "NeuronNeuronConnectivityQuery"
1262
+ label = f"Neurons connected to {name}"
1263
+ function = "get_neuron_neuron_connectivity"
1264
+ takes = {
1265
+ "short_form": {"$and": ["Individual", "Connected_neuron"]},
1266
+ "default": take_default,
1267
+ }
1268
+ preview = 5
1269
+ preview_columns = ["id", "label", "outputs", "inputs", "tags"]
1270
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1271
+
1272
+
1273
+ def NeuronRegionConnectivityQuery_to_schema(name, take_default):
1274
+ """
1275
+ Schema for neuron_region_connectivity_query.
1276
+ Shows connectivity to regions from a specified neuron.
1277
+ Matching criteria from XMI: Region_connectivity
1278
+ Query chain: Neo4j compound query → process
1279
+ """
1280
+ query = "NeuronRegionConnectivityQuery"
1281
+ label = f"Connectivity per region for {name}"
1282
+ function = "get_neuron_region_connectivity"
1283
+ takes = {
1284
+ "short_form": {"$and": ["Individual", "Region_connectivity"]},
1285
+ "default": take_default,
1286
+ }
1287
+ preview = 5
1288
+ preview_columns = ["id", "region", "presynaptic_terminals", "postsynaptic_terminals", "tags"]
1289
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1290
+
1291
+
1292
+ def TractsNervesInnervatingHere_to_schema(name, take_default):
1293
+ """
1294
+ Schema for TractsNervesInnervatingHere query.
1295
+ Finds tracts and nerves that innervate a synaptic neuropil.
1296
+
1297
+ Matching criteria from XMI:
1298
+ - Class + Synaptic_neuropil
1299
+ - Class + Synaptic_neuropil_domain
1300
+
1301
+ Query chain: Owlery subclass query → process → SOLR
1302
+ OWL query: 'Tract_or_nerve' that 'innervates' some '{short_form}'
1303
+ """
1304
+ query = "TractsNervesInnervatingHere"
1305
+ label = f"Tracts/nerves innervating {name}"
1306
+ function = "get_tracts_nerves_innervating_here"
1307
+ takes = {
1308
+ "short_form": {"$or": [{"$and": ["Class", "Synaptic_neuropil"]}, {"$and": ["Class", "Synaptic_neuropil_domain"]}]},
1309
+ "default": take_default,
1310
+ }
1311
+ preview = 5
1312
+ preview_columns = ["id", "label", "tags", "thumbnail"]
1313
+
1314
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1315
+
1316
+
1317
+ def LineageClonesIn_to_schema(name, take_default):
1318
+ """
1319
+ Schema for LineageClonesIn query.
1320
+ Finds lineage clones that overlap with a synaptic neuropil or domain.
1321
+
1322
+ Matching criteria from XMI:
1323
+ - Class + Synaptic_neuropil
1324
+ - Class + Synaptic_neuropil_domain
1325
+
1326
+ Query chain: Owlery subclass query → process → SOLR
1327
+ OWL query: 'Clone' that 'overlaps' some '{short_form}'
1328
+ """
1329
+ query = "LineageClonesIn"
1330
+ label = f"Lineage clones found in {name}"
1331
+ function = "get_lineage_clones_in"
1332
+ takes = {
1333
+ "short_form": {"$and": ["Class", "Synaptic_neuropil"]},
1334
+ "default": take_default,
1335
+ }
1336
+ preview = 5
1337
+ preview_columns = ["id", "label", "tags", "thumbnail"]
1338
+
1339
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1340
+
1341
+
1342
+ def ImagesNeurons_to_schema(name, take_default):
1343
+ """
1344
+ Schema for ImagesNeurons query.
1345
+ Finds individual neuron images with parts in a synaptic neuropil or domain.
1346
+
1347
+ Matching criteria from XMI:
1348
+ - Class + Synaptic_neuropil
1349
+ - Class + Synaptic_neuropil_domain
1350
+
1351
+ Query chain: Owlery instances query → process → SOLR
1352
+ OWL query: 'Neuron' that 'overlaps' some '{short_form}' (returns instances, not classes)
1353
+ """
1354
+ query = "ImagesNeurons"
1355
+ label = f"Images of neurons with some part in {name}"
1356
+ function = "get_images_neurons"
1357
+ takes = {
1358
+ "short_form": {"$or": [{"$and": ["Class", "Synaptic_neuropil"]}, {"$and": ["Class", "Synaptic_neuropil_domain"]}]},
1359
+ "default": take_default,
1360
+ }
1361
+ preview = 5
1362
+ preview_columns = ["id", "label", "tags", "thumbnail"]
1363
+
1364
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1365
+
1366
+
1367
+ def ImagesThatDevelopFrom_to_schema(name, take_default):
1368
+ """
1369
+ Schema for ImagesThatDevelopFrom query.
1370
+ Finds individual neuron images that develop from a neuroblast.
1371
+
1372
+ Matching criteria from XMI:
1373
+ - Class + Neuroblast
1374
+
1375
+ Query chain: Owlery instances query → process → SOLR
1376
+ OWL query: 'Neuron' that 'develops_from' some '{short_form}' (returns instances, not classes)
1377
+ """
1378
+ query = "ImagesThatDevelopFrom"
1379
+ label = f"Images of neurons that develop from {name}"
1380
+ function = "get_images_that_develop_from"
1381
+ takes = {
1382
+ "short_form": {"$and": ["Class", "Neuroblast"]},
1383
+ "default": take_default,
1384
+ }
1385
+ preview = 5
1386
+ preview_columns = ["id", "label", "tags", "thumbnail"]
1387
+
1388
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1389
+
1390
+
1391
+ def epFrag_to_schema(name, take_default):
1392
+ """
1393
+ Schema for epFrag query.
1394
+ Finds individual expression pattern fragment images that are part of an expression pattern.
1395
+
1396
+ XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
1397
+
1398
+ Matching criteria from XMI:
1399
+ - Class + Expression_pattern
1400
+
1401
+ Query chain: Owlery instances query → process → SOLR
1402
+ OWL query: instances that are 'part_of' some '{short_form}' (returns instances, not classes)
1403
+ """
1404
+ query = "epFrag"
1405
+ label = f"Images of fragments of {name}"
1406
+ function = "get_expression_pattern_fragments"
1407
+ takes = {
1408
+ "short_form": {"$and": ["Class", "Expression_pattern"]},
1409
+ "default": take_default,
1410
+ }
1411
+ preview = 5
1412
+ preview_columns = ["id", "label", "tags", "thumbnail"]
1413
+
1414
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1415
+
1416
+
1417
+ def ExpressionOverlapsHere_to_schema(name, take_default):
1418
+ """
1419
+ Schema for ExpressionOverlapsHere query.
1420
+ Finds expression patterns that overlap with a specified anatomical region.
1421
+
1422
+ XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
1423
+
1424
+ Matching criteria from XMI:
1425
+ - Class + Anatomy
1426
+
1427
+ Query chain: Neo4j anat_2_ep_query → process
1428
+ Cypher query: MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(anoni:Individual)-[:INSTANCEOF]->(anat:Class)
1429
+ WHERE anat.short_form = $id
1430
+ """
1431
+ query = "ExpressionOverlapsHere"
1432
+ label = f"Expression patterns overlapping {name}"
1433
+ function = "get_expression_overlaps_here"
1434
+ takes = {
1435
+ "short_form": {"$and": ["Class", "Anatomy"]},
1436
+ "default": take_default,
1437
+ }
1438
+ preview = 5
1439
+ preview_columns = ["id", "name", "tags", "pubs"]
1440
+
1441
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1442
+
1443
+
1444
+ def anatScRNAseqQuery_to_schema(name, take_default):
1445
+ """
1446
+ Schema for anatScRNAseqQuery query.
1447
+ Returns single cell transcriptomics data (clusters and datasets) for an anatomical region.
1448
+
1449
+ XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
1450
+
1451
+ Matching criteria from XMI:
1452
+ - Class + Anatomy + hasScRNAseq (has Single Cell RNA Seq Results)
1453
+
1454
+ Query chain: Owlery Subclasses → Owlery Pass → Neo4j anat_scRNAseq_query
1455
+ Cypher query: MATCH (primary:Class:Anatomy)<-[:composed_primarily_of]-(c:Cluster)-[:has_source]->(ds:scRNAseq_DataSet)
1456
+ WHERE primary.short_form = $id
1457
+ """
1458
+ query = "anatScRNAseqQuery"
1459
+ label = f"scRNAseq data for {name}"
1460
+ function = "get_anatomy_scrnaseq"
1461
+ takes = {
1462
+ "short_form": {"$and": ["Class", "Anatomy", "hasScRNAseq"]},
1463
+ "default": take_default,
1464
+ }
1465
+ preview = 5
1466
+ preview_columns = ["id", "name", "tags", "dataset", "pubs"]
1467
+
1468
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1469
+
1470
+
1471
+ def clusterExpression_to_schema(name, take_default):
1472
+ """
1473
+ Schema for clusterExpression query.
1474
+ Returns genes expressed in a specified cluster with expression levels.
1475
+
1476
+ XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
1477
+
1478
+ Matching criteria from XMI:
1479
+ - Individual + Cluster
1480
+
1481
+ Query chain: Neo4j cluster_expression_query → process
1482
+ Cypher query: MATCH (primary:Individual:Cluster)-[e:expresses]->(g:Gene:Class)
1483
+ WHERE primary.short_form = $id
1484
+ """
1485
+ query = "clusterExpression"
1486
+ label = f"Genes expressed in {name}"
1487
+ function = "get_cluster_expression"
1488
+ takes = {
1489
+ "short_form": {"$and": ["Individual", "Cluster"]},
1490
+ "default": take_default,
1491
+ }
1492
+ preview = 5
1493
+ preview_columns = ["id", "name", "tags", "expression_level", "expression_extent"]
1494
+
1495
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1496
+
1497
+
1498
+ def expressionCluster_to_schema(name, take_default):
1499
+ """
1500
+ Schema for expressionCluster query.
1501
+ Returns scRNAseq clusters expressing a specified gene.
1502
+
1503
+ XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
1504
+
1505
+ Matching criteria from XMI:
1506
+ - Class + Gene + hasScRNAseq (has Single Cell RNA Seq Results)
1507
+
1508
+ Query chain: Neo4j expression_cluster_query → process
1509
+ Cypher query: MATCH (primary:Individual:Cluster)-[e:expresses]->(g:Gene:Class)
1510
+ WHERE g.short_form = $id
1511
+ """
1512
+ query = "expressionCluster"
1513
+ label = f"Clusters expressing {name}"
1514
+ function = "get_expression_cluster"
1515
+ takes = {
1516
+ "short_form": {"$and": ["Class", "Gene", "hasScRNAseq"]},
1517
+ "default": take_default,
1518
+ }
1519
+ preview = 5
1520
+ preview_columns = ["id", "name", "tags", "expression_level", "expression_extent"]
1521
+
1522
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1523
+
1524
+
1525
+ def scRNAdatasetData_to_schema(name, take_default):
1526
+ """
1527
+ Schema for scRNAdatasetData query.
1528
+ Returns all clusters in a scRNAseq dataset.
1529
+
1530
+ XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
1531
+
1532
+ Matching criteria from XMI:
1533
+ - DataSet + hasScRNAseq (scRNAseq dataset type)
1534
+
1535
+ Query chain: Neo4j dataset_scRNAseq_query → process
1536
+ Cypher query: MATCH (c:Individual:Cluster)-[:has_source]->(ds:scRNAseq_DataSet)
1537
+ WHERE ds.short_form = $id
1538
+ """
1539
+ query = "scRNAdatasetData"
1540
+ label = f"Clusters in dataset {name}"
1541
+ function = "get_scrnaseq_dataset_data"
1542
+ takes = {
1543
+ "short_form": {"$and": ["DataSet", "hasScRNAseq"]},
1544
+ "default": take_default,
1545
+ }
1546
+ preview = 5
1547
+ preview_columns = ["id", "name", "tags", "anatomy", "pubs"]
1548
+
1549
+ return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
1550
+
1551
+
1552
+ def SimilarMorphologyToPartOf_to_schema(name, take_default):
1553
+ """Schema for SimilarMorphologyToPartOf (NBLASTexp) query."""
1554
+ return Query(query="SimilarMorphologyToPartOf", label=f"Similar morphology to part of {name}", function="get_similar_morphology_part_of", takes={"short_form": {"$and": ["Individual", "Neuron", "NBLASTexp"]}, "default": take_default}, preview=5, preview_columns=["id", "name", "score", "tags"])
1555
+
1556
+
1557
+ def SimilarMorphologyToPartOfexp_to_schema(name, take_default):
1558
+ """Schema for SimilarMorphologyToPartOfexp (reverse NBLASTexp) query."""
1559
+ return Query(query="SimilarMorphologyToPartOfexp", label=f"Similar morphology to part of {name}", function="get_similar_morphology_part_of_exp", takes={"short_form": {"$or": [{"$and": ["Individual", "Expression_pattern", "NBLASTexp"]}, {"$and": ["Individual", "Expression_pattern_fragment", "NBLASTexp"]}]}, "default": take_default}, preview=5, preview_columns=["id", "name", "score", "tags"])
1560
+
1561
+
1562
+ def SimilarMorphologyToNB_to_schema(name, take_default):
1563
+ """Schema for SimilarMorphologyToNB (NeuronBridge) query."""
1564
+ return Query(query="SimilarMorphologyToNB", label=f"NeuronBridge matches for {name}", function="get_similar_morphology_nb", takes={"short_form": {"$and": ["Individual", "neuronbridge"]}, "default": take_default}, preview=5, preview_columns=["id", "name", "score", "tags"])
1565
+
1566
+
1567
+ def SimilarMorphologyToNBexp_to_schema(name, take_default):
1568
+ """Schema for SimilarMorphologyToNBexp (NeuronBridge expression) query."""
1569
+ return Query(query="SimilarMorphologyToNBexp", label=f"NeuronBridge matches for {name}", function="get_similar_morphology_nb_exp", takes={"short_form": {"$and": ["Individual", "Expression_pattern", "neuronbridge"]}, "default": take_default}, preview=5, preview_columns=["id", "name", "score", "tags"])
1570
+
1571
+
1572
+ def SimilarMorphologyToUserData_to_schema(name, take_default):
1573
+ """Schema for SimilarMorphologyToUserData (user upload NBLAST) query."""
1574
+ return Query(query="SimilarMorphologyToUserData", label=f"NBLAST results for {name}", function="get_similar_morphology_userdata", takes={"short_form": {"$and": ["Individual", "UNBLAST"]}, "default": take_default}, preview=5, preview_columns=["id", "name", "score"])
1575
+
1576
+
1577
+ def PaintedDomains_to_schema(name, take_default):
1578
+ """Schema for PaintedDomains query."""
1579
+ return Query(query="PaintedDomains", label=f"Painted domains for {name}", function="get_painted_domains", takes={"short_form": {"$and": ["Template", "Individual"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "type", "thumbnail"])
1580
+
1581
+
1582
+ def DatasetImages_to_schema(name, take_default):
1583
+ """Schema for DatasetImages query."""
1584
+ return Query(query="DatasetImages", label=f"Images in dataset {name}", function="get_dataset_images", takes={"short_form": {"$and": ["DataSet", "has_image"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "tags", "type"])
1585
+
1586
+
1587
+ def AllAlignedImages_to_schema(name, take_default):
1588
+ """Schema for AllAlignedImages query."""
1589
+ return Query(query="AllAlignedImages", label=f"All images aligned to {name}", function="get_all_aligned_images", takes={"short_form": {"$and": ["Template", "Individual"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "tags", "type"])
1590
+
1591
+
1592
+ def AlignedDatasets_to_schema(name, take_default):
1593
+ """Schema for AlignedDatasets query."""
1594
+ return Query(query="AlignedDatasets", label=f"Datasets aligned to {name}", function="get_aligned_datasets", takes={"short_form": {"$and": ["Template", "Individual"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "tags"])
1595
+
1596
+
1597
+ def AllDatasets_to_schema(name, take_default):
1598
+ """Schema for AllDatasets query."""
1599
+ return Query(query="AllDatasets", label="All available datasets", function="get_all_datasets", takes={"short_form": {"$and": ["Template"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "tags"])
1600
+
1601
+
1602
+ def TermsForPub_to_schema(name, take_default):
1603
+ """Schema for TermsForPub query."""
1604
+ return Query(query="TermsForPub", label=f"Terms referencing {name}", function="get_terms_for_pub", takes={"short_form": {"$and": ["Individual", "pub"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "tags", "type"])
1605
+
1606
+
1607
+ def TransgeneExpressionHere_to_schema(name, take_default):
1608
+ """Schema for TransgeneExpressionHere query.
1609
+
1610
+ Matching criteria from XMI:
1611
+ - Class + Nervous_system + Anatomy
1612
+ - Class + Nervous_system + Neuron
1613
+
1614
+ Query chain: Multi-step Owlery and Neo4j queries
1615
+ """
1616
+ return Query(query="TransgeneExpressionHere", label=f"Transgene expression in {name}", function="get_transgene_expression_here", takes={"short_form": {"$and": ["Class", "Nervous_system", "Anatomy"]}, "default": take_default}, preview=5, preview_columns=["id", "name", "tags"])
1617
+
1618
+
1619
+ def serialize_solr_output(results):
1620
+ # Create a copy of the document and remove Solr-specific fields
1621
+ doc = dict(results.docs[0])
1622
+ # Remove the _version_ field which can cause serialization issues with large integers
1623
+ doc.pop('_version_', None)
1624
+
1625
+ # Serialize the sanitized dictionary to JSON using NumpyEncoder
1626
+ json_string = json.dumps(doc, ensure_ascii=False, cls=NumpyEncoder)
1627
+ json_string = json_string.replace('\\', '')
1628
+ json_string = json_string.replace('"{', '{')
1629
+ json_string = json_string.replace('}"', '}')
1630
+ json_string = json_string.replace("\'", '-')
1631
+ return json_string
1632
+
1633
+ @with_solr_cache('term_info')
1634
+ def get_term_info(short_form: str, preview: bool = True):
1635
+ """
1636
+ Retrieves the term info for the given term short form.
1637
+ Results are cached in SOLR for 3 months to improve performance.
1638
+
1639
+ :param short_form: short form of the term
1640
+ :param preview: if True, executes query previews to populate preview_results (default: True)
1641
+ :return: term info
1642
+ """
1643
+ parsed_object = None
1644
+ try:
1645
+ # Search for the term in the SOLR server
1646
+ results = vfb_solr.search('id:' + short_form)
1647
+ # Check if any results were returned
1648
+ parsed_object = term_info_parse_object(results, short_form)
1649
+ if parsed_object:
1650
+ # Only try to fill query results if preview is enabled and there are queries to fill
1651
+ if preview and parsed_object.get('Queries') and len(parsed_object['Queries']) > 0:
1652
+ try:
1653
+ term_info = fill_query_results(parsed_object)
1654
+ if term_info:
1655
+ return term_info
1656
+ else:
1657
+ print("Failed to fill query preview results!")
1658
+ # Set default values for queries when fill_query_results fails
1659
+ for query in parsed_object.get('Queries', []):
1660
+ # Set default preview_results structure
1661
+ query['preview_results'] = {'headers': query.get('preview_columns', ['id', 'label', 'tags', 'thumbnail']), 'rows': []}
1662
+ # Set count to 0 when we can't get the real count
1663
+ query['count'] = 0
1664
+ return parsed_object
1665
+ except Exception as e:
1666
+ print(f"Error filling query results (setting default values): {e}")
1667
+ # Set default values for queries when fill_query_results fails
1668
+ for query in parsed_object.get('Queries', []):
1669
+ # Set default preview_results structure
1670
+ query['preview_results'] = {'headers': query.get('preview_columns', ['id', 'label', 'tags', 'thumbnail']), 'rows': []}
1671
+ # Set count to 0 when we can't get the real count
1672
+ query['count'] = 0
1673
+ return parsed_object
1674
+ else:
1675
+ # No queries to fill (preview=False) or no queries defined, return parsed object directly
1676
+ return parsed_object
1677
+ else:
1678
+ print(f"No valid term info found for ID '{short_form}'")
1679
+ return None
1680
+ except ValidationError as e:
1681
+ # handle the validation error
1682
+ print("Schema validation error when parsing response")
1683
+ print("Error details:", e)
1684
+ print("Original data:", results)
1685
+ print("Parsed object:", parsed_object)
1686
+ return parsed_object
1687
+ except IndexError as e:
1688
+ print(f"No results found for ID '{short_form}'")
1689
+ print("Error details:", e)
1690
+ if parsed_object:
1691
+ print("Parsed object:", parsed_object)
1692
+ if 'term_info' in locals():
1693
+ print("Term info:", term_info)
1694
+ else:
1695
+ print("Error accessing SOLR server!")
1696
+ return None
1697
+ except Exception as e:
1698
+ print(f"Unexpected error when retrieving term info: {type(e).__name__}: {e}")
1699
+ return parsed_object
1700
+
1701
+ @with_solr_cache('instances')
1702
+ def get_instances(short_form: str, return_dataframe=True, limit: int = -1):
1703
+ """
1704
+ Retrieves available instances for the given class short form.
1705
+ Uses SOLR term_info data when Neo4j is unavailable (fallback mode).
1706
+ :param short_form: short form of the class
1707
+ :param limit: maximum number of results to return (default -1, returns all results)
1708
+ :return: results rows
1709
+ """
1710
+
1711
+ try:
1712
+ # Try to use original Neo4j implementation first
1713
+ # Get the total count of rows
1714
+ count_query = f"""
1715
+ MATCH (i:Individual:has_image)-[:INSTANCEOF]->(p:Class {{ short_form: '{short_form}' }}),
1716
+ (i)<-[:depicts]-(:Individual)-[r:in_register_with]->(:Template)
1717
+ RETURN COUNT(r) AS total_count
1718
+ """
1719
+ count_results = vc.nc.commit_list([count_query])
1720
+ count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
1721
+ total_count = count_df['total_count'][0] if not count_df.empty else 0
1722
+
1723
+ # Define the main Cypher query
1724
+ # Pattern: Individual ← depicts ← TemplateChannel → in_register_with → TemplateChannelTemplate → depicts → ActualTemplate
1725
+ query = f"""
1726
+ MATCH (i:Individual:has_image)-[:INSTANCEOF]->(p:Class {{ short_form: '{short_form}' }}),
1727
+ (i)<-[:depicts]-(tc:Individual)-[r:in_register_with]->(tct:Template)-[:depicts]->(templ:Template),
1728
+ (i)-[:has_source]->(ds:DataSet)
1729
+ OPTIONAL MATCH (i)-[rx:database_cross_reference]->(site:Site)
1730
+ OPTIONAL MATCH (ds)-[:license|licence]->(lic:License)
1731
+ RETURN i.short_form as id,
1732
+ apoc.text.format("[%s](%s)",[COALESCE(i.symbol[0],i.label),i.short_form]) AS label,
1733
+ apoc.text.join(i.uniqueFacets, '|') AS tags,
1734
+ apoc.text.format("[%s](%s)",[COALESCE(p.symbol[0],p.label),p.short_form]) AS parent,
1735
+ REPLACE(apoc.text.format("[%s](%s)",[COALESCE(site.symbol[0],site.label),site.short_form]), '[null](null)', '') AS source,
1736
+ REPLACE(apoc.text.format("[%s](%s)",[rx.accession[0],site.link_base[0] + rx.accession[0]]), '[null](null)', '') AS source_id,
1737
+ apoc.text.format("[%s](%s)",[COALESCE(templ.symbol[0],templ.label),templ.short_form]) AS template,
1738
+ apoc.text.format("[%s](%s)",[COALESCE(ds.symbol[0],ds.label),ds.short_form]) AS dataset,
1739
+ REPLACE(apoc.text.format("[%s](%s)",[COALESCE(lic.symbol[0],lic.label),lic.short_form]), '[null](null)', '') AS license,
1740
+ REPLACE(apoc.text.format("[![%s](%s '%s')](%s)",[COALESCE(i.symbol[0],i.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), REPLACE(COALESCE(r.thumbnail[0],""),"thumbnailT.png","thumbnail.png"), COALESCE(i.symbol[0],i.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), templ.short_form + "," + i.short_form]), "[![null]( 'null')](null)", "") as thumbnail
1741
+ ORDER BY id Desc
1742
+ """
1743
+
1744
+ if limit != -1:
1745
+ query += f" LIMIT {limit}"
1746
+
1747
+ # Run the query using VFB_connect
1748
+ results = vc.nc.commit_list([query])
1749
+
1750
+ # Convert the results to a DataFrame
1751
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
1752
+
1753
+ columns_to_encode = ['label', 'parent', 'source', 'source_id', 'template', 'dataset', 'license', 'thumbnail']
1754
+ df = encode_markdown_links(df, columns_to_encode)
1755
+
1756
+ if return_dataframe:
1757
+ return df
1758
+
1759
+ # Format the results
1760
+ formatted_results = {
1761
+ "headers": _get_instances_headers(),
1762
+ "rows": [
1763
+ {
1764
+ key: row[key]
1765
+ for key in [
1766
+ "id",
1767
+ "label",
1768
+ "tags",
1769
+ "parent",
1770
+ "source",
1771
+ "source_id",
1772
+ "template",
1773
+ "dataset",
1774
+ "license",
1775
+ "thumbnail"
1776
+ ]
1777
+ }
1778
+ for row in safe_to_dict(df)
1779
+ ],
1780
+ "count": total_count
1781
+ }
1782
+
1783
+ return formatted_results
1784
+
1785
+ except Exception as e:
1786
+ # Fallback to SOLR-based implementation when Neo4j is unavailable
1787
+ print(f"Neo4j unavailable ({e}), using SOLR fallback for get_instances")
1788
+ return _get_instances_from_solr(short_form, return_dataframe, limit)
1789
+
1790
+ def _get_instances_from_solr(short_form: str, return_dataframe=True, limit: int = -1):
1791
+ """
1792
+ SOLR-based fallback implementation for get_instances.
1793
+ Extracts instance data from term_info anatomy_channel_image array.
1794
+ """
1795
+ try:
1002
1796
  # Get term_info data from SOLR
1003
1797
  term_info_results = vc.get_TermInfo([short_form], return_dataframe=False)
1004
1798
 
1005
- if len(term_info_results) == 0:
1006
- # Return empty results with proper structure
1799
+ if len(term_info_results) == 0:
1800
+ # Return empty results with proper structure
1801
+ if return_dataframe:
1802
+ return pd.DataFrame()
1803
+ return {
1804
+ "headers": _get_instances_headers(),
1805
+ "rows": [],
1806
+ "count": 0
1807
+ }
1808
+
1809
+ term_info = term_info_results[0]
1810
+ anatomy_images = term_info.get('anatomy_channel_image', [])
1811
+
1812
+ # Apply limit if specified
1813
+ if limit != -1 and limit > 0:
1814
+ anatomy_images = anatomy_images[:limit]
1815
+
1816
+ # Convert anatomy_channel_image to instance rows with rich data
1817
+ rows = []
1818
+ for img in anatomy_images:
1819
+ anatomy = img.get('anatomy', {})
1820
+ channel_image = img.get('channel_image', {})
1821
+ image_info = channel_image.get('image', {}) if channel_image else {}
1822
+ template_anatomy = image_info.get('template_anatomy', {}) if image_info else {}
1823
+
1824
+ # Extract tags from unique_facets (matching original Neo4j format and ordering)
1825
+ unique_facets = anatomy.get('unique_facets', [])
1826
+ anatomy_types = anatomy.get('types', [])
1827
+
1828
+ # Create ordered list matching the expected Neo4j format
1829
+ # Based on test diff, expected order and tags: Nervous_system, Adult, Visual_system, Synaptic_neuropil_domain
1830
+ # Note: We exclude 'Synaptic_neuropil' as it doesn't appear in expected output
1831
+ ordered_tags = []
1832
+ for tag_type in ['Nervous_system', 'Adult', 'Visual_system', 'Synaptic_neuropil_domain']:
1833
+ if tag_type in anatomy_types or tag_type in unique_facets:
1834
+ ordered_tags.append(tag_type)
1835
+
1836
+ # Use the ordered tags to match expected format
1837
+ tags = '|'.join(ordered_tags)
1838
+
1839
+ # Extract thumbnail URL and convert to HTTPS
1840
+ thumbnail_url = image_info.get('image_thumbnail', '') if image_info else ''
1841
+ if thumbnail_url:
1842
+ # Replace http with https and thumbnailT.png with thumbnail.png
1843
+ thumbnail_url = thumbnail_url.replace('http://', 'https://').replace('thumbnailT.png', 'thumbnail.png')
1844
+
1845
+ # Format thumbnail with proper markdown link (matching Neo4j behavior)
1846
+ thumbnail = ''
1847
+ if thumbnail_url and template_anatomy:
1848
+ # Prefer symbol over label for template (matching Neo4j behavior)
1849
+ template_label = template_anatomy.get('label', '')
1850
+ if template_anatomy.get('symbol') and len(template_anatomy.get('symbol', '')) > 0:
1851
+ template_label = template_anatomy.get('symbol')
1852
+ # Decode URL-encoded strings from SOLR (e.g., ME%28R%29 -> ME(R))
1853
+ template_label = unquote(template_label)
1854
+ template_short_form = template_anatomy.get('short_form', '')
1855
+
1856
+ # Prefer symbol over label for anatomy (matching Neo4j behavior)
1857
+ anatomy_label = anatomy.get('label', '')
1858
+ if anatomy.get('symbol') and len(anatomy.get('symbol', '')) > 0:
1859
+ anatomy_label = anatomy.get('symbol')
1860
+ # Decode URL-encoded strings from SOLR (e.g., ME%28R%29 -> ME(R))
1861
+ anatomy_label = unquote(anatomy_label)
1862
+ anatomy_short_form = anatomy.get('short_form', '')
1863
+
1864
+ if template_label and anatomy_label:
1865
+ # Create thumbnail markdown link matching the original format
1866
+ # DO NOT encode brackets in alt text - that's done later by encode_markdown_links
1867
+ alt_text = f"{anatomy_label} aligned to {template_label}"
1868
+ link_target = f"{template_short_form},{anatomy_short_form}"
1869
+ thumbnail = f"[![{alt_text}]({thumbnail_url} '{alt_text}')]({link_target})"
1870
+
1871
+ # Format template information
1872
+ template_formatted = ''
1873
+ if template_anatomy:
1874
+ # Prefer symbol over label (matching Neo4j behavior)
1875
+ template_label = template_anatomy.get('label', '')
1876
+ if template_anatomy.get('symbol') and len(template_anatomy.get('symbol', '')) > 0:
1877
+ template_label = template_anatomy.get('symbol')
1878
+ # Decode URL-encoded strings from SOLR (e.g., ME%28R%29 -> ME(R))
1879
+ template_label = unquote(template_label)
1880
+ template_short_form = template_anatomy.get('short_form', '')
1881
+ if template_label and template_short_form:
1882
+ template_formatted = f"[{template_label}]({template_short_form})"
1883
+
1884
+ # Handle label formatting (match Neo4j format - prefer symbol over label)
1885
+ anatomy_label = anatomy.get('label', 'Unknown')
1886
+ if anatomy.get('symbol') and len(anatomy.get('symbol', '')) > 0:
1887
+ anatomy_label = anatomy.get('symbol')
1888
+ # Decode URL-encoded strings from SOLR (e.g., ME%28R%29 -> ME(R))
1889
+ anatomy_label = unquote(anatomy_label)
1890
+ anatomy_short_form = anatomy.get('short_form', '')
1891
+
1892
+ row = {
1893
+ 'id': anatomy_short_form,
1894
+ 'label': f"[{anatomy_label}]({anatomy_short_form})",
1895
+ 'tags': tags,
1896
+ 'parent': f"[{term_info.get('term', {}).get('core', {}).get('label', 'Unknown')}]({short_form})",
1897
+ 'source': '', # Not readily available in SOLR anatomy_channel_image
1898
+ 'source_id': '',
1899
+ 'template': template_formatted,
1900
+ 'dataset': '', # Not readily available in SOLR anatomy_channel_image
1901
+ 'license': '',
1902
+ 'thumbnail': thumbnail
1903
+ }
1904
+ rows.append(row)
1905
+
1906
+ # Sort by ID to match expected ordering (Neo4j uses "ORDER BY id Desc")
1907
+ rows.sort(key=lambda x: x['id'], reverse=True)
1908
+
1909
+ total_count = len(anatomy_images)
1910
+
1911
+ if return_dataframe:
1912
+ df = pd.DataFrame(rows)
1913
+ # Apply encoding to markdown links (matches Neo4j implementation)
1914
+ columns_to_encode = ['label', 'parent', 'source', 'source_id', 'template', 'dataset', 'license', 'thumbnail']
1915
+ df = encode_markdown_links(df, columns_to_encode)
1916
+ return df
1917
+
1918
+ return {
1919
+ "headers": _get_instances_headers(),
1920
+ "rows": rows,
1921
+ "count": total_count
1922
+ }
1923
+
1924
+ except Exception as e:
1925
+ print(f"Error in SOLR fallback for get_instances: {e}")
1926
+ # Return empty results with proper structure
1927
+ if return_dataframe:
1928
+ return pd.DataFrame()
1929
+ return {
1930
+ "headers": _get_instances_headers(),
1931
+ "rows": [],
1932
+ "count": 0
1933
+ }
1934
+
1935
+ def _get_instances_headers():
1936
+ """Return standard headers for get_instances results"""
1937
+ return {
1938
+ "id": {"title": "Add", "type": "selection_id", "order": -1},
1939
+ "label": {"title": "Name", "type": "markdown", "order": 0, "sort": {0: "Asc"}},
1940
+ "parent": {"title": "Parent Type", "type": "markdown", "order": 1},
1941
+ "template": {"title": "Template", "type": "markdown", "order": 4},
1942
+ "tags": {"title": "Gross Types", "type": "tags", "order": 3},
1943
+ "source": {"title": "Data Source", "type": "markdown", "order": 5},
1944
+ "source_id": {"title": "Data Source", "type": "markdown", "order": 6},
1945
+ "dataset": {"title": "Dataset", "type": "markdown", "order": 7},
1946
+ "license": {"title": "License", "type": "markdown", "order": 8},
1947
+ "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}
1948
+ }
1949
+
1950
+ def _get_templates_minimal(limit: int = -1, return_dataframe: bool = False):
1951
+ """
1952
+ Minimal fallback implementation for get_templates when Neo4j is unavailable.
1953
+ Returns hardcoded list of core templates with basic information.
1954
+ """
1955
+ # Core templates with their basic information
1956
+ # Include all columns to match full get_templates() structure
1957
+ templates_data = [
1958
+ {"id": "VFB_00101567", "name": "JRC2018Unisex", "tags": "VFB|VFB_vol|has_image", "order": 1, "thumbnail": "", "dataset": "", "license": ""},
1959
+ {"id": "VFB_00200000", "name": "JRC_FlyEM_Hemibrain", "tags": "VFB|VFB_vol|has_image", "order": 2, "thumbnail": "", "dataset": "", "license": ""},
1960
+ {"id": "VFB_00017894", "name": "Adult Brain", "tags": "VFB|VFB_painted|has_image", "order": 3, "thumbnail": "", "dataset": "", "license": ""},
1961
+ {"id": "VFB_00101384", "name": "JFRC2", "tags": "VFB|VFB_vol|has_image", "order": 4, "thumbnail": "", "dataset": "", "license": ""},
1962
+ {"id": "VFB_00050000", "name": "JFRC2010", "tags": "VFB|VFB_vol|has_image", "order": 5, "thumbnail": "", "dataset": "", "license": ""},
1963
+ {"id": "VFB_00049000", "name": "Ito2014", "tags": "VFB|VFB_painted|has_image", "order": 6, "thumbnail": "", "dataset": "", "license": ""},
1964
+ {"id": "VFB_00100000", "name": "FCWB", "tags": "VFB|VFB_vol|has_image", "order": 7, "thumbnail": "", "dataset": "", "license": ""},
1965
+ {"id": "VFB_00030786", "name": "Adult VNS", "tags": "VFB|VFB_painted|has_image", "order": 8, "thumbnail": "", "dataset": "", "license": ""},
1966
+ {"id": "VFB_00110000", "name": "L3 CNS", "tags": "VFB|VFB_vol|has_image", "order": 9, "thumbnail": "", "dataset": "", "license": ""},
1967
+ {"id": "VFB_00120000", "name": "L1 CNS", "tags": "VFB|VFB_vol|has_image", "order": 10, "thumbnail": "", "dataset": "", "license": ""},
1968
+ ]
1969
+
1970
+ # Apply limit if specified
1971
+ if limit > 0:
1972
+ templates_data = templates_data[:limit]
1973
+
1974
+ count = len(templates_data)
1975
+
1976
+ if return_dataframe:
1977
+ df = pd.DataFrame(templates_data)
1978
+ return df
1979
+
1980
+ # Format as dict with headers and rows (match full get_templates structure)
1981
+ formatted_results = {
1982
+ "headers": {
1983
+ "id": {"title": "Add", "type": "selection_id", "order": -1},
1984
+ "order": {"title": "Order", "type": "numeric", "order": 1, "sort": {0: "Asc"}},
1985
+ "name": {"title": "Name", "type": "markdown", "order": 1, "sort": {1: "Asc"}},
1986
+ "tags": {"title": "Tags", "type": "tags", "order": 2},
1987
+ "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9},
1988
+ "dataset": {"title": "Dataset", "type": "metadata", "order": 3},
1989
+ "license": {"title": "License", "type": "metadata", "order": 4}
1990
+ },
1991
+ "rows": templates_data,
1992
+ "count": count
1993
+ }
1994
+
1995
+ return formatted_results
1996
+
1997
+ @with_solr_cache('templates')
1998
+ def get_templates(limit: int = -1, return_dataframe: bool = False):
1999
+ """Get list of templates
2000
+
2001
+ :param limit: maximum number of results to return (default -1, returns all results)
2002
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns list of dicts.
2003
+ :return: list of templates (id, label, tags, source (db) id, accession_in_source) + similarity score.
2004
+ :rtype: pandas.DataFrame or list of dicts
2005
+
2006
+ """
2007
+ try:
2008
+ count_query = """MATCH (t:Template)<-[:depicts]-(tc:Template)-[r:in_register_with]->(tc:Template)
2009
+ RETURN COUNT(DISTINCT t) AS total_count"""
2010
+
2011
+ count_results = vc.nc.commit_list([count_query])
2012
+ count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
2013
+ total_count = count_df['total_count'][0] if not count_df.empty else 0
2014
+ except Exception as e:
2015
+ # Fallback to minimal template list when Neo4j is unavailable
2016
+ print(f"Neo4j unavailable ({e}), using minimal template list fallback")
2017
+ return _get_templates_minimal(limit, return_dataframe)
2018
+
2019
+ # Define the main Cypher query
2020
+ # Match full pattern to exclude template channel nodes
2021
+ # Use COLLECT to aggregate multiple datasets/licenses into single row per template
2022
+ query = f"""
2023
+ MATCH (p:Class)<-[:INSTANCEOF]-(t:Template)<-[:depicts]-(tc:Template)-[r:in_register_with]->(tc)
2024
+ OPTIONAL MATCH (t)-[:has_source]->(ds:DataSet)
2025
+ OPTIONAL MATCH (ds)-[:has_license|license]->(lic:License)
2026
+ WITH t, r, COLLECT(DISTINCT ds) as datasets, COLLECT(DISTINCT lic) as licenses
2027
+ RETURN DISTINCT t.short_form as id,
2028
+ apoc.text.format("[%s](%s)",[COALESCE(t.symbol[0],t.label),t.short_form]) AS name,
2029
+ apoc.text.join(t.uniqueFacets, '|') AS tags,
2030
+ apoc.text.join([ds IN datasets | apoc.text.format("[%s](%s)",[COALESCE(ds.symbol[0],ds.label),ds.short_form])], ', ') AS dataset,
2031
+ apoc.text.join([lic IN licenses | REPLACE(apoc.text.format("[%s](%s)",[COALESCE(lic.symbol[0],lic.label),lic.short_form]), '[null](null)', '')], ', ') AS license,
2032
+ COALESCE(REPLACE(apoc.text.format("[![%s](%s '%s')](%s)",[COALESCE(t.symbol[0],t.label), REPLACE(COALESCE(r.thumbnail[0],""),"thumbnailT.png","thumbnail.png"), COALESCE(t.symbol[0],t.label), t.short_form]), "[![null]( 'null')](null)", ""), "") as thumbnail,
2033
+ 99 as order
2034
+ ORDER BY id DESC
2035
+ """
2036
+
2037
+ if limit != -1:
2038
+ query += f" LIMIT {limit}"
2039
+
2040
+ # Run the query using VFB_connect
2041
+ results = vc.nc.commit_list([query])
2042
+
2043
+ # Convert the results to a DataFrame
2044
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
2045
+
2046
+ columns_to_encode = ['name', 'dataset', 'license', 'thumbnail']
2047
+ df = encode_markdown_links(df, columns_to_encode)
2048
+
2049
+ template_order = ["VFB_00101567","VFB_00200000","VFB_00017894","VFB_00101384","VFB_00050000","VFB_00049000","VFB_00100000","VFB_00030786","VFB_00110000","VFB_00120000"]
2050
+
2051
+ order = 1
2052
+
2053
+ for template in template_order:
2054
+ df.loc[df['id'] == template, 'order'] = order
2055
+ order += 1
2056
+
2057
+ # Sort the DataFrame by 'order'
2058
+ df = df.sort_values('order')
2059
+
2060
+ if return_dataframe:
2061
+ return df
2062
+
2063
+ # Format the results
2064
+ formatted_results = {
2065
+ "headers": {
2066
+ "id": {"title": "Add", "type": "selection_id", "order": -1},
2067
+ "order": {"title": "Order", "type": "numeric", "order": 1, "sort": {0: "Asc"}},
2068
+ "name": {"title": "Name", "type": "markdown", "order": 1, "sort": {1: "Asc"}},
2069
+ "tags": {"title": "Tags", "type": "tags", "order": 2},
2070
+ "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9},
2071
+ "dataset": {"title": "Dataset", "type": "metadata", "order": 3},
2072
+ "license": {"title": "License", "type": "metadata", "order": 4}
2073
+ },
2074
+ "rows": [
2075
+ {
2076
+ key: row[key]
2077
+ for key in [
2078
+ "id",
2079
+ "order",
2080
+ "name",
2081
+ "tags",
2082
+ "thumbnail",
2083
+ "dataset",
2084
+ "license"
2085
+ ]
2086
+ }
2087
+ for row in safe_to_dict(df)
2088
+ ],
2089
+ "count": total_count
2090
+ }
2091
+
2092
+ return formatted_results
2093
+
2094
+ def get_related_anatomy(template_short_form: str, limit: int = -1, return_dataframe: bool = False):
2095
+ """
2096
+ Retrieve related anatomical structures for a given template.
2097
+
2098
+ :param template_short_form: The short form of the template to query.
2099
+ :param limit: Maximum number of results to return. Default is -1, which returns all results.
2100
+ :param return_dataframe: If True, returns results as a pandas DataFrame. Otherwise, returns a list of dicts.
2101
+ :return: Related anatomical structures and paths.
2102
+ """
2103
+
2104
+ # Define the Cypher query
2105
+ query = f"""
2106
+ MATCH (root:Class)<-[:INSTANCEOF]-(t:Template {{short_form:'{template_short_form}'}})<-[:depicts]-(tc:Template)<-[ie:in_register_with]-(c:Individual)-[:depicts]->(image:Individual)-[r:INSTANCEOF]->(anat:Class:Anatomy)
2107
+ WHERE exists(ie.index)
2108
+ WITH root, anat,r,image
2109
+ MATCH p=allshortestpaths((root)<-[:SUBCLASSOF|part_of*..50]-(anat))
2110
+ UNWIND nodes(p) as n
2111
+ UNWIND nodes(p) as m
2112
+ WITH * WHERE id(n) < id(m)
2113
+ MATCH path = allShortestPaths( (n)-[:SUBCLASSOF|part_of*..1]-(m) )
2114
+ RETURN collect(distinct {{ node_id: id(anat), short_form: anat.short_form, image: image.short_form }}) AS image_nodes, id(root) AS root, collect(path)
2115
+ """
2116
+
2117
+ if limit != -1:
2118
+ query += f" LIMIT {limit}"
2119
+
2120
+ # Execute the query using your database connection (e.g., VFB_connect)
2121
+ results = vc.nc.commit_list([query])
2122
+
2123
+ # Convert the results to a DataFrame (if needed)
2124
+ if return_dataframe:
2125
+ df = pd.DataFrame.from_records(results)
2126
+ return df
2127
+
2128
+ # Otherwise, return the raw results
2129
+ return results
2130
+
2131
+ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_dataframe=True, limit: int = -1):
2132
+ """Get JSON report of individual neurons similar to input neuron
2133
+
2134
+ :param neuron:
2135
+ :param similarity_score: Optionally specify similarity score to chose
2136
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns list of dicts.
2137
+ :param limit: maximum number of results to return (default -1, returns all results)
2138
+ :return: list of similar neurons (id, label, tags, source (db) id, accession_in_source) + similarity score.
2139
+ :rtype: pandas.DataFrame or list of dicts
2140
+
2141
+ """
2142
+ count_query = f"""MATCH (c1:Class)<-[:INSTANCEOF]-(n1)-[r:has_similar_morphology_to]-(n2)-[:INSTANCEOF]->(c2:Class)
2143
+ WHERE n1.short_form = '{neuron}' and exists(r.{similarity_score})
2144
+ RETURN COUNT(DISTINCT n2) AS total_count"""
2145
+
2146
+ count_results = vc.nc.commit_list([count_query])
2147
+ count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
2148
+ total_count = count_df['total_count'][0] if not count_df.empty else 0
2149
+
2150
+ main_query = f"""MATCH (c1:Class)<-[:INSTANCEOF]-(n1)-[r:has_similar_morphology_to]-(n2)-[:INSTANCEOF]->(c2:Class)
2151
+ WHERE n1.short_form = '{neuron}' and exists(r.{similarity_score})
2152
+ WITH c1, n1, r, n2, c2
2153
+ OPTIONAL MATCH (n2)-[rx:database_cross_reference]->(site:Site)
2154
+ WHERE site.is_data_source
2155
+ WITH n2, r, c2, rx, site
2156
+ OPTIONAL MATCH (n2)<-[:depicts]-(:Individual)-[ri:in_register_with]->(:Template)-[:depicts]->(templ:Template)
2157
+ RETURN DISTINCT n2.short_form as id,
2158
+ apoc.text.format("[%s](%s)", [n2.label, n2.short_form]) AS name,
2159
+ r.{similarity_score}[0] AS score,
2160
+ apoc.text.join(n2.uniqueFacets, '|') AS tags,
2161
+ REPLACE(apoc.text.format("[%s](%s)",[COALESCE(site.symbol[0],site.label),site.short_form]), '[null](null)', '') AS source,
2162
+ REPLACE(apoc.text.format("[%s](%s)",[rx.accession[0], (site.link_base[0] + rx.accession[0])]), '[null](null)', '') AS source_id,
2163
+ REPLACE(apoc.text.format("[![%s](%s '%s')](%s)",[COALESCE(n2.symbol[0],n2.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), REPLACE(COALESCE(ri.thumbnail[0],""),"thumbnailT.png","thumbnail.png"), COALESCE(n2.symbol[0],n2.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), templ.short_form + "," + n2.short_form]), "[![null]( 'null')](null)", "") as thumbnail
2164
+ ORDER BY score DESC"""
2165
+
2166
+ if limit != -1:
2167
+ main_query += f" LIMIT {limit}"
2168
+
2169
+ # Run the query using VFB_connect
2170
+ results = vc.nc.commit_list([main_query])
2171
+
2172
+ # Convert the results to a DataFrame
2173
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
2174
+
2175
+ columns_to_encode = ['name', 'source', 'source_id', 'thumbnail']
2176
+ df = encode_markdown_links(df, columns_to_encode)
2177
+
2178
+ if return_dataframe:
2179
+ return df
2180
+ else:
2181
+ formatted_results = {
2182
+ "headers": {
2183
+ "id": {"title": "Add", "type": "selection_id", "order": -1},
2184
+ "score": {"title": "Score", "type": "numeric", "order": 1, "sort": {0: "Desc"}},
2185
+ "name": {"title": "Name", "type": "markdown", "order": 1, "sort": {1: "Asc"}},
2186
+ "tags": {"title": "Tags", "type": "tags", "order": 2},
2187
+ "source": {"title": "Source", "type": "metadata", "order": 3},
2188
+ "source_id": {"title": "Source ID", "type": "metadata", "order": 4},
2189
+ "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}
2190
+ },
2191
+ "rows": [
2192
+ {
2193
+ key: row[key]
2194
+ for key in [
2195
+ "id",
2196
+ "name",
2197
+ "score",
2198
+ "tags",
2199
+ "source",
2200
+ "source_id",
2201
+ "thumbnail"
2202
+ ]
2203
+ }
2204
+ for row in safe_to_dict(df, sort_by_id=False)
2205
+ ],
2206
+ "count": total_count
2207
+ }
2208
+ return formatted_results
2209
+
2210
+ def get_individual_neuron_inputs(neuron_short_form: str, return_dataframe=True, limit: int = -1, summary_mode: bool = False):
2211
+ """
2212
+ Retrieve neurons that have synapses into the specified neuron, along with the neurotransmitter
2213
+ types, and additional information about the neurons.
2214
+
2215
+ :param neuron_short_form: The short form identifier of the neuron to query.
2216
+ :param return_dataframe: If True, returns results as a pandas DataFrame. Otherwise, returns a dictionary.
2217
+ :param limit: Maximum number of results to return. Default is -1, which returns all results.
2218
+ :param summary_mode: If True, returns a preview of the results with summed weights for each neurotransmitter type.
2219
+ :return: Neurons, neurotransmitter types, and additional neuron information.
2220
+ """
2221
+
2222
+ # Define the common part of the Cypher query
2223
+ query_common = f"""
2224
+ MATCH (a:has_neuron_connectivity {{short_form:'{neuron_short_form}'}})<-[r:synapsed_to]-(b:has_neuron_connectivity)
2225
+ UNWIND(labels(b)) as l
2226
+ WITH * WHERE l contains "ergic"
2227
+ OPTIONAL MATCH (c:Class:Neuron) WHERE c.short_form starts with "FBbt_" AND toLower(c.label)=toLower(l+" neuron")
2228
+ """
2229
+ if not summary_mode:
2230
+ count_query = f"""{query_common}
2231
+ RETURN COUNT(DISTINCT b) AS total_count"""
2232
+ else:
2233
+ count_query = f"""{query_common}
2234
+ RETURN COUNT(DISTINCT c) AS total_count"""
2235
+
2236
+ count_results = vc.nc.commit_list([count_query])
2237
+ count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
2238
+ total_count = count_df['total_count'][0] if not count_df.empty else 0
2239
+
2240
+ # Define the part of the query for normal mode
2241
+ query_normal = f"""
2242
+ OPTIONAL MATCH (b)-[:INSTANCEOF]->(neuronType:Class),
2243
+ (b)<-[:depicts]-(imageChannel:Individual)-[image:in_register_with]->(templateChannel:Template)-[:depicts]->(templ:Template),
2244
+ (imageChannel)-[:is_specified_output_of]->(imagingTechnique:Class)
2245
+ RETURN
2246
+ b.short_form as id,
2247
+ apoc.text.format("[%s](%s)", [l, c.short_form]) as Neurotransmitter,
2248
+ sum(r.weight[0]) as Weight,
2249
+ apoc.text.format("[%s](%s)", [b.label, b.short_form]) as Name,
2250
+ apoc.text.format("[%s](%s)", [neuronType.label, neuronType.short_form]) as Type,
2251
+ apoc.text.join(b.uniqueFacets, '|') as Gross_Type,
2252
+ apoc.text.join(collect(apoc.text.format("[%s](%s)", [templ.label, templ.short_form])), ', ') as Template_Space,
2253
+ apoc.text.format("[%s](%s)", [imagingTechnique.label, imagingTechnique.short_form]) as Imaging_Technique,
2254
+ apoc.text.join(collect(REPLACE(apoc.text.format("[![%s](%s '%s')](%s)",[COALESCE(b.symbol[0],b.label), REPLACE(COALESCE(image.thumbnail[0],""),"thumbnailT.png","thumbnail.png"), COALESCE(b.symbol[0],b.label), b.short_form]), "[![null]( 'null')](null)", "")), ' | ') as Images
2255
+ ORDER BY Weight Desc
2256
+ """
2257
+
2258
+ # Define the part of the query for preview mode
2259
+ query_preview = f"""
2260
+ RETURN DISTINCT c.short_form as id,
2261
+ apoc.text.format("[%s](%s)", [l, c.short_form]) as Neurotransmitter,
2262
+ sum(r.weight[0]) as Weight
2263
+ ORDER BY Weight Desc
2264
+ """
2265
+
2266
+ # Choose the appropriate part of the query based on the summary_mode parameter
2267
+ query = query_common + (query_preview if summary_mode else query_normal)
2268
+
2269
+ if limit != -1 and not summary_mode:
2270
+ query += f" LIMIT {limit}"
2271
+
2272
+ # Execute the query using your database connection (e.g., vc.nc)
2273
+ results = vc.nc.commit_list([query])
2274
+
2275
+ # Convert the results to a DataFrame
2276
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
2277
+
2278
+ columns_to_encode = ['Neurotransmitter', 'Type', 'Name', 'Template_Space', 'Imaging_Technique', 'thumbnail']
2279
+ df = encode_markdown_links(df, columns_to_encode)
2280
+
2281
+ # If return_dataframe is True, return the results as a DataFrame
2282
+ if return_dataframe:
2283
+ return df
2284
+
2285
+ # Format the results for the preview
2286
+ if not summary_mode:
2287
+ results = {
2288
+ "headers": {
2289
+ "id": {"title": "ID", "type": "text", "order": -1},
2290
+ "Neurotransmitter": {"title": "Neurotransmitter", "type": "markdown", "order": 0},
2291
+ "Weight": {"title": "Weight", "type": "numeric", "order": 1},
2292
+ "Name": {"title": "Name", "type": "markdown", "order": 2},
2293
+ "Type": {"title": "Type", "type": "markdown", "order": 3},
2294
+ "Gross_Type": {"title": "Gross Type", "type": "text", "order": 4},
2295
+ "Template_Space": {"title": "Template Space", "type": "markdown", "order": 5},
2296
+ "Imaging_Technique": {"title": "Imaging Technique", "type": "markdown", "order": 6},
2297
+ "Images": {"title": "Images", "type": "markdown", "order": 7}
2298
+ },
2299
+ "rows": [
2300
+ {
2301
+ key: row[key]
2302
+ for key in [
2303
+ "id",
2304
+ "Neurotransmitter",
2305
+ "Weight",
2306
+ "Name",
2307
+ "Type",
2308
+ "Gross_Type",
2309
+ "Template_Space",
2310
+ "Imaging_Technique",
2311
+ "Images"
2312
+ ]
2313
+ }
2314
+ for row in safe_to_dict(df, sort_by_id=False)
2315
+ ],
2316
+ "count": total_count
2317
+ }
2318
+ else:
2319
+ results = {
2320
+ "headers": {
2321
+ "id": {"title": "ID", "type": "text", "order": -1},
2322
+ "Neurotransmitter": {"title": "Neurotransmitter", "type": "markdown", "order": 0},
2323
+ "Weight": {"title": "Weight", "type": "numeric", "order": 1},
2324
+ },
2325
+ "rows": [
2326
+ {
2327
+ key: row[key]
2328
+ for key in [
2329
+ "id",
2330
+ "Neurotransmitter",
2331
+ "Weight",
2332
+ ]
2333
+ }
2334
+ for row in safe_to_dict(df, sort_by_id=False)
2335
+ ],
2336
+ "count": total_count
2337
+ }
2338
+
2339
+ return results
2340
+
2341
+
2342
+ def get_expression_overlaps_here(anatomy_short_form: str, return_dataframe=True, limit: int = -1):
2343
+ """
2344
+ Retrieve expression patterns that overlap with the specified anatomical region.
2345
+
2346
+ This implements the ExpressionOverlapsHere query from the VFB XMI specification.
2347
+ Finds expression patterns where individual instances overlap with or are part of the anatomy.
2348
+
2349
+ :param anatomy_short_form: Short form identifier of the anatomical region (e.g., 'FBbt_00003982')
2350
+ :param return_dataframe: Returns pandas DataFrame if True, otherwise returns formatted dict (default: True)
2351
+ :param limit: Maximum number of results to return (default: -1 for all results)
2352
+ :return: Expression patterns with overlap relationships, publications, and images
2353
+ :rtype: pandas.DataFrame or dict
2354
+ """
2355
+
2356
+ # Count query: count distinct expression patterns
2357
+ count_query = f"""
2358
+ MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(anoni:Individual)-[:INSTANCEOF]->(anat:Class)
2359
+ WHERE anat.short_form = '{anatomy_short_form}'
2360
+ RETURN COUNT(DISTINCT ep) AS total_count
2361
+ """
2362
+
2363
+ count_results = vc.nc.commit_list([count_query])
2364
+ count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
2365
+ total_count = count_df['total_count'][0] if not count_df.empty else 0
2366
+
2367
+ # Main query: get expression patterns with details
2368
+ main_query = f"""
2369
+ MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(anoni:Individual)-[:INSTANCEOF]->(anat:Class)
2370
+ WHERE anat.short_form = '{anatomy_short_form}'
2371
+ WITH DISTINCT collect(DISTINCT ar.pub[0]) as pubs, anat, ep
2372
+ UNWIND pubs as p
2373
+ OPTIONAL MATCH (pub:pub {{ short_form: p}})
2374
+ WITH anat, ep, collect({{
2375
+ core: {{ short_form: pub.short_form, label: coalesce(pub.label,''), iri: pub.iri, types: labels(pub), symbol: coalesce(pub.symbol[0], '') }},
2376
+ PubMed: coalesce(pub.PMID[0], ''),
2377
+ FlyBase: coalesce(([]+pub.FlyBase)[0], ''),
2378
+ DOI: coalesce(pub.DOI[0], '')
2379
+ }}) as pubs
2380
+ RETURN
2381
+ ep.short_form AS id,
2382
+ apoc.text.format("[%s](%s)", [ep.label, ep.short_form]) AS name,
2383
+ apoc.text.join(ep.uniqueFacets, '|') AS tags,
2384
+ pubs
2385
+ ORDER BY ep.label
2386
+ """
2387
+
2388
+ if limit != -1:
2389
+ main_query += f" LIMIT {limit}"
2390
+
2391
+ # Execute the query
2392
+ results = vc.nc.commit_list([main_query])
2393
+
2394
+ # Convert to DataFrame
2395
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
2396
+
2397
+ # Encode markdown links
2398
+ if not df.empty:
2399
+ columns_to_encode = ['name']
2400
+ df = encode_markdown_links(df, columns_to_encode)
2401
+
2402
+ if return_dataframe:
2403
+ return df
2404
+ else:
2405
+ formatted_results = {
2406
+ "headers": {
2407
+ "id": {"title": "ID", "type": "selection_id", "order": -1},
2408
+ "name": {"title": "Expression Pattern", "type": "markdown", "order": 0},
2409
+ "tags": {"title": "Tags", "type": "tags", "order": 1},
2410
+ "pubs": {"title": "Publications", "type": "metadata", "order": 2}
2411
+ },
2412
+ "rows": [
2413
+ {
2414
+ key: row[key]
2415
+ for key in ["id", "name", "tags", "pubs"]
2416
+ }
2417
+ for row in safe_to_dict(df, sort_by_id=False)
2418
+ ],
2419
+ "count": total_count
2420
+ }
2421
+ return formatted_results
2422
+
2423
+
2424
+ def contains_all_tags(lst: List[str], tags: List[str]) -> bool:
2425
+ """
2426
+ Checks if the given list contains all the tags passed.
2427
+
2428
+ :param lst: list of strings to check
2429
+ :param tags: list of strings to check for in lst
2430
+ :return: True if lst contains all tags, False otherwise
2431
+ """
2432
+ return all(tag in lst for tag in tags)
2433
+
2434
+ @with_solr_cache('neurons_part_here')
2435
+ def get_neurons_with_part_in(short_form: str, return_dataframe=True, limit: int = -1):
2436
+ """
2437
+ Retrieves neuron classes that have some part overlapping with the specified anatomical region.
2438
+
2439
+ This implements the NeuronsPartHere query from the VFB XMI specification.
2440
+ Query chain (from XMI): Owlery (Index 1) → Process → SOLR (Index 3)
2441
+ OWL query (from XMI): <FBbt_00005106> and <RO_0002131> some <$ID>
2442
+ Where: FBbt_00005106 = neuron, RO_0002131 = overlaps
2443
+
2444
+ :param short_form: short form of the anatomical region (Class)
2445
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2446
+ :param limit: maximum number of results to return (default -1, returns all results)
2447
+ :return: Neuron classes with parts in the specified region
2448
+ """
2449
+ owl_query = f"<http://purl.obolibrary.org/obo/FBbt_00005106> and <http://purl.obolibrary.org/obo/RO_0002131> some <{_short_form_to_iri(short_form)}>"
2450
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit,
2451
+ solr_field='anat_query', include_source=True, query_by_label=False)
2452
+
2453
+
2454
+ @with_solr_cache('neurons_synaptic')
2455
+ def get_neurons_with_synapses_in(short_form: str, return_dataframe=True, limit: int = -1):
2456
+ """
2457
+ Retrieves neuron classes that have synaptic terminals in the specified anatomical region.
2458
+
2459
+ This implements the NeuronsSynaptic query from the VFB XMI specification.
2460
+ Query chain (from XMI): Owlery → Process → SOLR
2461
+ OWL query (from XMI): object=<http://purl.obolibrary.org/obo/FBbt_00005106> and <http://purl.obolibrary.org/obo/RO_0002130> some <http://purl.obolibrary.org/obo/$ID>
2462
+ Where: FBbt_00005106 = neuron, RO_0002130 = has synaptic terminals in
2463
+ Matching criteria: Class + Synaptic_neuropil, Class + Visual_system, Class + Synaptic_neuropil_domain
2464
+
2465
+ :param short_form: short form of the anatomical region (Class)
2466
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2467
+ :param limit: maximum number of results to return (default -1, returns all results)
2468
+ :return: Neuron classes with synaptic terminals in the specified region
2469
+ """
2470
+ owl_query = f"<http://purl.obolibrary.org/obo/FBbt_00005106> and <http://purl.obolibrary.org/obo/RO_0002130> some <{_short_form_to_iri(short_form)}>"
2471
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit, solr_field='anat_query', query_by_label=False)
2472
+
2473
+
2474
+ @with_solr_cache('neurons_presynaptic')
2475
+ def get_neurons_with_presynaptic_terminals_in(short_form: str, return_dataframe=True, limit: int = -1):
2476
+ """
2477
+ Retrieves neuron classes that have presynaptic terminals in the specified anatomical region.
2478
+
2479
+ This implements the NeuronsPresynapticHere query from the VFB XMI specification.
2480
+ Query chain (from XMI): Owlery → Process → SOLR
2481
+ OWL query (from XMI): object=<http://purl.obolibrary.org/obo/FBbt_00005106> and <http://purl.obolibrary.org/obo/RO_0002113> some <http://purl.obolibrary.org/obo/$ID>
2482
+ Where: FBbt_00005106 = neuron, RO_0002113 = has presynaptic terminal in
2483
+ Matching criteria: Class + Synaptic_neuropil, Class + Visual_system, Class + Synaptic_neuropil_domain
2484
+
2485
+ :param short_form: short form of the anatomical region (Class)
2486
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2487
+ :param limit: maximum number of results to return (default -1, returns all results)
2488
+ :return: Neuron classes with presynaptic terminals in the specified region
2489
+ """
2490
+ owl_query = f"<http://purl.obolibrary.org/obo/FBbt_00005106> and <http://purl.obolibrary.org/obo/RO_0002113> some <{_short_form_to_iri(short_form)}>"
2491
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit, solr_field='anat_query', query_by_label=False)
2492
+
2493
+
2494
+ @with_solr_cache('neurons_postsynaptic')
2495
+ def get_neurons_with_postsynaptic_terminals_in(short_form: str, return_dataframe=True, limit: int = -1):
2496
+ """
2497
+ Retrieves neuron classes that have postsynaptic terminals in the specified anatomical region.
2498
+
2499
+ This implements the NeuronsPostsynapticHere query from the VFB XMI specification.
2500
+ Query chain (from XMI): Owlery → Process → SOLR
2501
+ OWL query (from XMI): object=<http://purl.obolibrary.org/obo/FBbt_00005106> and <http://purl.obolibrary.org/obo/RO_0002110> some <http://purl.obolibrary.org/obo/$ID>
2502
+ Where: FBbt_00005106 = neuron, RO_0002110 = has postsynaptic terminal in
2503
+ Matching criteria: Class + Synaptic_neuropil, Class + Visual_system, Class + Synaptic_neuropil_domain
2504
+
2505
+ :param short_form: short form of the anatomical region (Class)
2506
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2507
+ :param limit: maximum number of results to return (default -1, returns all results)
2508
+ :return: Neuron classes with postsynaptic terminals in the specified region
2509
+ """
2510
+ owl_query = f"<http://purl.obolibrary.org/obo/FBbt_00005106> and <http://purl.obolibrary.org/obo/RO_0002110> some <{_short_form_to_iri(short_form)}>"
2511
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit, solr_field='anat_query', query_by_label=False)
2512
+
2513
+
2514
+ @with_solr_cache('components_of')
2515
+ def get_components_of(short_form: str, return_dataframe=True, limit: int = -1):
2516
+ """
2517
+ Retrieves components (parts) of the specified anatomical class.
2518
+
2519
+ This implements the ComponentsOf query from the VFB XMI specification.
2520
+ Query chain (from XMI): Owlery Part of → Process → SOLR
2521
+ OWL query (from XMI): object=<http://purl.obolibrary.org/obo/BFO_0000050> some <http://purl.obolibrary.org/obo/$ID>
2522
+ Where: BFO_0000050 = part of
2523
+ Matching criteria: Class + Clone
2524
+
2525
+ :param short_form: short form of the anatomical class
2526
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2527
+ :param limit: maximum number of results to return (default -1, returns all results)
2528
+ :return: Components of the specified class
2529
+ """
2530
+ owl_query = f"<http://purl.obolibrary.org/obo/BFO_0000050> some <{_short_form_to_iri(short_form)}>"
2531
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit, solr_field='anat_query', query_by_label=False)
2532
+
2533
+
2534
+ @with_solr_cache('parts_of')
2535
+ def get_parts_of(short_form: str, return_dataframe=True, limit: int = -1):
2536
+ """
2537
+ Retrieves parts of the specified anatomical class.
2538
+
2539
+ This implements the PartsOf query from the VFB XMI specification.
2540
+ Query chain (from XMI): Owlery Part of → Process → SOLR
2541
+ OWL query (from XMI): object=<http://purl.obolibrary.org/obo/BFO_0000050> some <http://purl.obolibrary.org/obo/$ID>
2542
+ Where: BFO_0000050 = part of
2543
+ Matching criteria: Class (any)
2544
+
2545
+ :param short_form: short form of the anatomical class
2546
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2547
+ :param limit: maximum number of results to return (default -1, returns all results)
2548
+ :return: Parts of the specified class
2549
+ """
2550
+ owl_query = f"<http://purl.obolibrary.org/obo/BFO_0000050> some <{_short_form_to_iri(short_form)}>"
2551
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit, solr_field='anat_query', query_by_label=False)
2552
+
2553
+
2554
+ @with_solr_cache('subclasses_of')
2555
+ def get_subclasses_of(short_form: str, return_dataframe=True, limit: int = -1):
2556
+ """
2557
+ Retrieves subclasses of the specified class.
2558
+
2559
+ This implements the SubclassesOf query from the VFB XMI specification.
2560
+ Query chain (from XMI): Owlery → Process → SOLR
2561
+ OWL query: Direct subclasses of '<class>'
2562
+ Matching criteria: Class (any)
2563
+
2564
+ :param short_form: short form of the class
2565
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2566
+ :param limit: maximum number of results to return (default -1, returns all results)
2567
+ :return: Subclasses of the specified class
2568
+ """
2569
+ # For subclasses, we query the class itself (Owlery subclasses endpoint handles this)
2570
+ # Use angle brackets for IRI conversion, not quotes
2571
+ owl_query = f"<{short_form}>"
2572
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit, solr_field='anat_query', query_by_label=False)
2573
+
2574
+
2575
+ @with_solr_cache('neuron_classes_fasciculating_here')
2576
+ def get_neuron_classes_fasciculating_here(short_form: str, return_dataframe=True, limit: int = -1):
2577
+ """
2578
+ Retrieves neuron classes that fasciculate with (run along) the specified tract or nerve.
2579
+
2580
+ This implements the NeuronClassesFasciculatingHere query from the VFB XMI specification.
2581
+ Query chain (from XMI): Owlery → Process → SOLR
2582
+ OWL query (from XMI): object=<http://purl.obolibrary.org/obo/FBbt_00005106> and <http://purl.obolibrary.org/obo/RO_0002101> some <http://purl.obolibrary.org/obo/$ID>
2583
+ Where: FBbt_00005106 = neuron, RO_0002101 = fasciculates with
2584
+ Matching criteria: Class + Tract_or_nerve
2585
+
2586
+ :param short_form: short form of the tract or nerve (Class)
2587
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2588
+ :param limit: maximum number of results to return (default -1, returns all results)
2589
+ :return: Neuron classes that fasciculate with the specified tract or nerve
2590
+ """
2591
+ owl_query = f"<http://purl.obolibrary.org/obo/FBbt_00005106> and <http://purl.obolibrary.org/obo/RO_0002101> some <{_short_form_to_iri(short_form)}>"
2592
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit, solr_field='anat_query', query_by_label=False)
2593
+
2594
+
2595
+ @with_solr_cache('tracts_nerves_innervating_here')
2596
+ def get_tracts_nerves_innervating_here(short_form: str, return_dataframe=True, limit: int = -1):
2597
+ """
2598
+ Retrieves tracts and nerves that innervate the specified synaptic neuropil.
2599
+
2600
+ This implements the TractsNervesInnervatingHere query from the VFB XMI specification.
2601
+ Query chain (from XMI): Owlery → Process → SOLR
2602
+ OWL query (from XMI): object=<http://purl.obolibrary.org/obo/FBbt_00005099> and <http://purl.obolibrary.org/obo/RO_0002134> some <http://purl.obolibrary.org/obo/$ID>
2603
+ Where: FBbt_00005099 = tract or nerve, RO_0002134 = innervates
2604
+ Matching criteria: Class + Synaptic_neuropil, Class + Synaptic_neuropil_domain
2605
+
2606
+ :param short_form: short form of the synaptic neuropil (Class)
2607
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2608
+ :param limit: maximum number of results to return (default -1, returns all results)
2609
+ :return: Tracts and nerves that innervate the specified neuropil
2610
+ """
2611
+ owl_query = f"<http://purl.obolibrary.org/obo/FBbt_00005099> and <http://purl.obolibrary.org/obo/RO_0002134> some <{_short_form_to_iri(short_form)}>"
2612
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit, solr_field='anat_query', query_by_label=False)
2613
+
2614
+
2615
+ @with_solr_cache('lineage_clones_in')
2616
+ def get_lineage_clones_in(short_form: str, return_dataframe=True, limit: int = -1):
2617
+ """
2618
+ Retrieves lineage clones that overlap with the specified synaptic neuropil.
2619
+
2620
+ This implements the LineageClonesIn query from the VFB XMI specification.
2621
+ Query chain (from XMI): Owlery → Process → SOLR
2622
+ OWL query (from XMI): object=<http://purl.obolibrary.org/obo/FBbt_00007683> and <http://purl.obolibrary.org/obo/RO_0002131> some <http://purl.obolibrary.org/obo/$ID>
2623
+ Where: FBbt_00007683 = clone, RO_0002131 = overlaps
2624
+ Matching criteria: Class + Synaptic_neuropil, Class + Synaptic_neuropil_domain
2625
+
2626
+ :param short_form: short form of the synaptic neuropil (Class)
2627
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2628
+ :param limit: maximum number of results to return (default -1, returns all results)
2629
+ :return: Lineage clones that overlap with the specified neuropil
2630
+ """
2631
+ owl_query = f"<http://purl.obolibrary.org/obo/FBbt_00007683> and <http://purl.obolibrary.org/obo/RO_0002131> some <{_short_form_to_iri(short_form)}>"
2632
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit, solr_field='anat_query', query_by_label=False)
2633
+
2634
+
2635
+ @with_solr_cache('neuron_neuron_connectivity_query')
2636
+ def get_neuron_neuron_connectivity(short_form: str, return_dataframe=True, limit: int = -1, min_weight: float = 0, direction: str = 'both'):
2637
+ """
2638
+ Retrieves neurons connected to the specified neuron.
2639
+
2640
+ This implements the neuron_neuron_connectivity_query from the VFB XMI specification.
2641
+ Query chain (from XMI): Neo4j compound query → process
2642
+ Matching criteria: Individual + Connected_neuron
2643
+
2644
+ Uses synapsed_to relationships to find partner neurons.
2645
+ Returns inputs (upstream) and outputs (downstream) connection information.
2646
+
2647
+ :param short_form: short form of the neuron (Individual)
2648
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2649
+ :param limit: maximum number of results to return (default -1, returns all results)
2650
+ :param min_weight: minimum connection weight threshold (default 0, XMI spec uses 1)
2651
+ :param direction: filter by connection direction - 'both' (default), 'upstream', or 'downstream'
2652
+ :return: Partner neurons with their input/output connection weights
2653
+
2654
+ Note: Caching only applies when all parameters are at default values (complete results).
2655
+ """
2656
+ # Build Cypher query to get connected neurons using synapsed_to relationships
2657
+ # XMI spec uses min_weight > 1, but we default to 0 to return all valid connections
2658
+ cypher = f"""
2659
+ MATCH (primary:Individual {{short_form: '{short_form}'}})
2660
+ MATCH (oi:Individual)-[r:synapsed_to]-(primary)
2661
+ WHERE exists(r.weight) AND r.weight[0] > {min_weight}
2662
+ WITH primary, oi
2663
+ OPTIONAL MATCH (oi)<-[down:synapsed_to]-(primary)
2664
+ WITH down, oi, primary
2665
+ OPTIONAL MATCH (primary)<-[up:synapsed_to]-(oi)
2666
+ RETURN
2667
+ oi.short_form AS id,
2668
+ oi.label AS label,
2669
+ coalesce(down.weight[0], 0) AS outputs,
2670
+ coalesce(up.weight[0], 0) AS inputs,
2671
+ oi.uniqueFacets AS tags
2672
+ """
2673
+ if limit != -1:
2674
+ cypher += f" LIMIT {limit}"
2675
+
2676
+ # Run query using Neo4j client
2677
+ results = vc.nc.commit_list([cypher])
2678
+ rows = get_dict_cursor()(results)
2679
+
2680
+ # Filter by direction if specified
2681
+ if direction != 'both':
2682
+ if direction == 'upstream':
2683
+ rows = [row for row in rows if row.get('inputs', 0) > 0]
2684
+ elif direction == 'downstream':
2685
+ rows = [row for row in rows if row.get('outputs', 0) > 0]
2686
+
2687
+ # Format output
2688
+ if return_dataframe:
2689
+ df = pd.DataFrame(rows)
2690
+ return df
2691
+
2692
+ headers = {
2693
+ 'id': {'title': 'Neuron ID', 'type': 'selection_id', 'order': -1},
2694
+ 'label': {'title': 'Partner Neuron', 'type': 'markdown', 'order': 0},
2695
+ 'outputs': {'title': 'Outputs', 'type': 'number', 'order': 1},
2696
+ 'inputs': {'title': 'Inputs', 'type': 'number', 'order': 2},
2697
+ 'tags': {'title': 'Neuron Types', 'type': 'list', 'order': 3},
2698
+ }
2699
+ return {
2700
+ 'headers': headers,
2701
+ 'data': rows,
2702
+ 'count': len(rows)
2703
+ }
2704
+
2705
+
2706
+ @with_solr_cache('neuron_region_connectivity_query')
2707
+ def get_neuron_region_connectivity(short_form: str, return_dataframe=True, limit: int = -1):
2708
+ """
2709
+ Retrieves brain regions where the specified neuron has synaptic terminals.
2710
+
2711
+ This implements the neuron_region_connectivity_query from the VFB XMI specification.
2712
+ Query chain (from XMI): Neo4j compound query → process
2713
+ Matching criteria: Individual + has_region_connectivity
2714
+
2715
+ Uses has_presynaptic_terminals_in and has_postsynaptic_terminal_in relationships
2716
+ to find brain regions where the neuron makes connections.
2717
+
2718
+ :param short_form: short form of the neuron (Individual)
2719
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2720
+ :param limit: maximum number of results to return (default -1, returns all results)
2721
+ :return: Brain regions with presynaptic and postsynaptic terminal counts
2722
+ """
2723
+ # Build Cypher query based on XMI spec pattern
2724
+ cypher = f"""
2725
+ MATCH (primary:Individual {{short_form: '{short_form}'}})
2726
+ MATCH (target:Individual)<-[r:has_presynaptic_terminals_in|has_postsynaptic_terminal_in]-(primary)
2727
+ WITH DISTINCT collect(properties(r)) + {{}} as props, target, primary
2728
+ WITH apoc.map.removeKeys(apoc.map.merge(props[0], props[1]), ['iri', 'short_form', 'Related', 'label', 'type']) as synapse_counts,
2729
+ target,
2730
+ primary
2731
+ RETURN
2732
+ target.short_form AS id,
2733
+ target.label AS region,
2734
+ synapse_counts.`pre` AS presynaptic_terminals,
2735
+ synapse_counts.`post` AS postsynaptic_terminals,
2736
+ target.uniqueFacets AS tags
2737
+ """
2738
+ if limit != -1:
2739
+ cypher += f" LIMIT {limit}"
2740
+
2741
+ # Run query using Neo4j client
2742
+ results = vc.nc.commit_list([cypher])
2743
+ rows = get_dict_cursor()(results)
2744
+
2745
+ # Format output
2746
+ if return_dataframe:
2747
+ df = pd.DataFrame(rows)
2748
+ return df
2749
+
2750
+ headers = {
2751
+ 'id': {'title': 'Region ID', 'type': 'selection_id', 'order': -1},
2752
+ 'region': {'title': 'Brain Region', 'type': 'markdown', 'order': 0},
2753
+ 'presynaptic_terminals': {'title': 'Presynaptic Terminals', 'type': 'number', 'order': 1},
2754
+ 'postsynaptic_terminals': {'title': 'Postsynaptic Terminals', 'type': 'number', 'order': 2},
2755
+ 'tags': {'title': 'Region Types', 'type': 'list', 'order': 3},
2756
+ }
2757
+ return {
2758
+ 'headers': headers,
2759
+ 'data': rows,
2760
+ 'count': len(rows)
2761
+ }
2762
+
2763
+
2764
+ @with_solr_cache('images_neurons')
2765
+ def get_images_neurons(short_form: str, return_dataframe=True, limit: int = -1):
2766
+ """
2767
+ Retrieves individual neuron images with parts in the specified synaptic neuropil.
2768
+
2769
+ This implements the ImagesNeurons query from the VFB XMI specification.
2770
+ Query chain (from XMI): Owlery instances → Process → SOLR
2771
+ OWL query (from XMI): object=<FBbt_00005106> and <RO_0002131> some <$ID> (instances)
2772
+ Where: FBbt_00005106 = neuron, RO_0002131 = overlaps
2773
+ Matching criteria: Class + Synaptic_neuropil, Class + Synaptic_neuropil_domain
2774
+
2775
+ Note: This query returns INSTANCES (individual neuron images) not classes.
2776
+
2777
+ :param short_form: short form of the synaptic neuropil (Class)
2778
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2779
+ :param limit: maximum number of results to return (default -1, returns all results)
2780
+ :return: Individual neuron images with parts in the specified neuropil
2781
+ """
2782
+ owl_query = f"<http://purl.obolibrary.org/obo/FBbt_00005106> and <http://purl.obolibrary.org/obo/RO_0002131> some <{_short_form_to_iri(short_form)}>"
2783
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit,
2784
+ solr_field='anat_image_query', query_by_label=False, query_instances=True)
2785
+
2786
+
2787
+ @with_solr_cache('images_that_develop_from')
2788
+ def get_images_that_develop_from(short_form: str, return_dataframe=True, limit: int = -1):
2789
+ """
2790
+ Retrieves individual neuron images that develop from the specified neuroblast.
2791
+
2792
+ This implements the ImagesThatDevelopFrom query from the VFB XMI specification.
2793
+ Query chain (from XMI): Owlery instances → Owlery Pass → SOLR
2794
+ OWL query (from XMI): object=<FBbt_00005106> and <RO_0002202> some <$ID> (instances)
2795
+ Where: FBbt_00005106 = neuron, RO_0002202 = develops_from
2796
+ Matching criteria: Class + Neuroblast
2797
+
2798
+ Note: This query returns INSTANCES (individual neuron images) not classes.
2799
+
2800
+ :param short_form: short form of the neuroblast (Class)
2801
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2802
+ :param limit: maximum number of results to return (default -1, returns all results)
2803
+ :return: Individual neuron images that develop from the specified neuroblast
2804
+ """
2805
+ owl_query = f"<http://purl.obolibrary.org/obo/FBbt_00005106> and <http://purl.obolibrary.org/obo/RO_0002202> some <{_short_form_to_iri(short_form)}>"
2806
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit,
2807
+ solr_field='anat_image_query', query_by_label=False, query_instances=True)
2808
+
2809
+
2810
+ def _short_form_to_iri(short_form: str) -> str:
2811
+ """
2812
+ Convert a short form ID to its full IRI.
2813
+
2814
+ First tries simple prefix mappings for common cases (VFB*, FB*).
2815
+ For other cases, queries SOLR to get the canonical IRI.
2816
+
2817
+ :param short_form: Short form ID (e.g., 'VFBexp_FBtp0022557', 'FBbt_00003748')
2818
+ :return: Full IRI
2819
+ """
2820
+ # VFB IDs use virtualflybrain.org/reports
2821
+ if short_form.startswith('VFB'):
2822
+ return f"http://virtualflybrain.org/reports/{short_form}"
2823
+
2824
+ # FB* IDs (FlyBase) use purl.obolibrary.org/obo
2825
+ # This includes FBbt_, FBtp_, FBdv_, etc.
2826
+ if short_form.startswith('FB'):
2827
+ return f"http://purl.obolibrary.org/obo/{short_form}"
2828
+
2829
+ # For other cases, query SOLR to get the IRI from term_info
2830
+ try:
2831
+ results = vfb_solr.search(
2832
+ q=f'id:{short_form}',
2833
+ fl='term_info',
2834
+ rows=1
2835
+ )
2836
+
2837
+ if results.docs and 'term_info' in results.docs[0]:
2838
+ term_info_str = results.docs[0]['term_info'][0]
2839
+ term_info = json.loads(term_info_str)
2840
+ iri = term_info.get('term', {}).get('core', {}).get('iri')
2841
+ if iri:
2842
+ return iri
2843
+ except Exception as e:
2844
+ # If SOLR query fails, fall back to OBO default
2845
+ print(f"Warning: Could not fetch IRI for {short_form} from SOLR: {e}")
2846
+
2847
+ # Default to OBO for other IDs (FBbi_, etc.)
2848
+ return f"http://purl.obolibrary.org/obo/{short_form}"
2849
+
2850
+
2851
+ @with_solr_cache('expression_pattern_fragments')
2852
+ def get_expression_pattern_fragments(short_form: str, return_dataframe=True, limit: int = -1):
2853
+ """
2854
+ Retrieves individual expression pattern fragment images that are part of an expression pattern.
2855
+
2856
+ This implements the epFrag query from the VFB XMI specification.
2857
+ XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
2858
+
2859
+ Query chain (from XMI): Owlery individual parts → Process → SOLR
2860
+ OWL query (from XMI): object=<BFO_0000050> some <$ID> (instances)
2861
+ Where: BFO_0000050 = part_of
2862
+ Matching criteria: Class + Expression_pattern
2863
+
2864
+ Note: This query returns INSTANCES (individual expression pattern fragments) not classes.
2865
+
2866
+ :param short_form: short form of the expression pattern (Class)
2867
+ :param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
2868
+ :param limit: maximum number of results to return (default -1, returns all results)
2869
+ :return: Individual expression pattern fragment images
2870
+ """
2871
+ iri = _short_form_to_iri(short_form)
2872
+ owl_query = f"<http://purl.obolibrary.org/obo/BFO_0000050> some <{iri}>"
2873
+ return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit,
2874
+ solr_field='anat_image_query', query_by_label=False, query_instances=True)
2875
+
2876
+
2877
+ def _get_neurons_part_here_headers():
2878
+ """Return standard headers for get_neurons_with_part_in results"""
2879
+ return {
2880
+ "id": {"title": "Add", "type": "selection_id", "order": -1},
2881
+ "label": {"title": "Name", "type": "markdown", "order": 0, "sort": {0: "Asc"}},
2882
+ "tags": {"title": "Tags", "type": "tags", "order": 2},
2883
+ "source": {"title": "Data Source", "type": "metadata", "order": 3},
2884
+ "source_id": {"title": "Data Source ID", "type": "metadata", "order": 4},
2885
+ "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}
2886
+ }
2887
+
2888
+
2889
+ def _get_standard_query_headers():
2890
+ """Return standard headers for most query results (no source/source_id)"""
2891
+ return {
2892
+ "id": {"title": "Add", "type": "selection_id", "order": -1},
2893
+ "label": {"title": "Name", "type": "markdown", "order": 0, "sort": {0: "Asc"}},
2894
+ "tags": {"title": "Tags", "type": "tags", "order": 2},
2895
+ "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}
2896
+ }
2897
+
2898
+
2899
+ def _owlery_query_to_results(owl_query_string: str, short_form: str, return_dataframe: bool = True,
2900
+ limit: int = -1, solr_field: str = 'anat_query',
2901
+ include_source: bool = False, query_by_label: bool = True,
2902
+ query_instances: bool = False):
2903
+ """
2904
+ Unified helper function for Owlery-based queries.
2905
+
2906
+ This implements the common pattern:
2907
+ 1. Query Owlery for class/instance IDs matching an OWL pattern
2908
+ 2. Fetch details from SOLR for each result
2909
+ 3. Format results as DataFrame or dict
2910
+
2911
+ :param owl_query_string: OWL query string (format depends on query_by_label parameter)
2912
+ :param short_form: The anatomical region or entity short form
2913
+ :param return_dataframe: Returns pandas DataFrame if True, otherwise returns formatted dict
2914
+ :param limit: Maximum number of results to return (default -1 for all)
2915
+ :param solr_field: SOLR field to query (default 'anat_query' for Class, 'anat_image_query' for Individuals)
2916
+ :param include_source: Whether to include source and source_id columns
2917
+ :param query_by_label: If True, use label syntax with quotes. If False, use IRI syntax with angle brackets.
2918
+ :param query_instances: If True, query for instances instead of subclasses
2919
+ :return: Query results
2920
+ """
2921
+ try:
2922
+ # Step 1: Query Owlery for classes or instances matching the OWL pattern
2923
+ if query_instances:
2924
+ result_ids = vc.vfb.oc.get_instances(
2925
+ query=owl_query_string,
2926
+ query_by_label=query_by_label,
2927
+ verbose=False
2928
+ )
2929
+ else:
2930
+ result_ids = vc.vfb.oc.get_subclasses(
2931
+ query=owl_query_string,
2932
+ query_by_label=query_by_label,
2933
+ verbose=False
2934
+ )
2935
+
2936
+ class_ids = result_ids # Keep variable name for compatibility
2937
+
2938
+ if not class_ids:
2939
+ # No results found - return empty
1007
2940
  if return_dataframe:
1008
2941
  return pd.DataFrame()
1009
2942
  return {
1010
- "headers": _get_instances_headers(),
2943
+ "headers": _get_standard_query_headers() if not include_source else _get_neurons_part_here_headers(),
1011
2944
  "rows": [],
1012
2945
  "count": 0
1013
2946
  }
1014
2947
 
1015
- term_info = term_info_results[0]
1016
- anatomy_images = term_info.get('anatomy_channel_image', [])
2948
+ total_count = len(class_ids)
1017
2949
 
1018
- # Apply limit if specified
2950
+ # Apply limit if specified (before SOLR query to save processing)
1019
2951
  if limit != -1 and limit > 0:
1020
- anatomy_images = anatomy_images[:limit]
2952
+ class_ids = class_ids[:limit]
1021
2953
 
1022
- # Convert anatomy_channel_image to instance rows with rich data
2954
+ # Step 2: Query SOLR for ALL classes in a single batch query
2955
+ # Use the {!terms f=id} syntax from XMI to fetch all results efficiently
1023
2956
  rows = []
1024
- for img in anatomy_images:
1025
- anatomy = img.get('anatomy', {})
1026
- channel_image = img.get('channel_image', {})
1027
- image_info = channel_image.get('image', {}) if channel_image else {}
1028
- template_anatomy = image_info.get('template_anatomy', {}) if image_info else {}
1029
-
1030
- # Extract tags from unique_facets (matching original Neo4j format and ordering)
1031
- unique_facets = anatomy.get('unique_facets', [])
1032
- anatomy_types = anatomy.get('types', [])
1033
-
1034
- # Create ordered list matching the expected Neo4j format
1035
- # Based on test diff, expected order and tags: Nervous_system, Adult, Visual_system, Synaptic_neuropil_domain
1036
- # Note: We exclude 'Synaptic_neuropil' as it doesn't appear in expected output
1037
- ordered_tags = []
1038
- for tag_type in ['Nervous_system', 'Adult', 'Visual_system', 'Synaptic_neuropil_domain']:
1039
- if tag_type in anatomy_types or tag_type in unique_facets:
1040
- ordered_tags.append(tag_type)
1041
-
1042
- # Use the ordered tags to match expected format
1043
- tags = '|'.join(ordered_tags)
1044
-
1045
- # Extract thumbnail URL and convert to HTTPS
1046
- thumbnail_url = image_info.get('image_thumbnail', '') if image_info else ''
1047
- if thumbnail_url:
1048
- # Replace http with https and thumbnailT.png with thumbnail.png
1049
- thumbnail_url = thumbnail_url.replace('http://', 'https://').replace('thumbnailT.png', 'thumbnail.png')
2957
+ try:
2958
+ # Build filter query with all class IDs
2959
+ id_list = ','.join(class_ids)
2960
+ results = vfb_solr.search(
2961
+ q='id:*',
2962
+ fq=f'{{!terms f=id}}{id_list}',
2963
+ fl=solr_field,
2964
+ rows=len(class_ids)
2965
+ )
1050
2966
 
1051
- # Format thumbnail with proper markdown link (matching Neo4j format)
1052
- thumbnail = ''
1053
- if thumbnail_url and template_anatomy:
1054
- # Prefer symbol over label for template (matching Neo4j behavior)
1055
- template_label = template_anatomy.get('label', '')
1056
- if template_anatomy.get('symbol') and len(template_anatomy.get('symbol', '')) > 0:
1057
- template_label = template_anatomy.get('symbol')
1058
- # Decode URL-encoded strings from SOLR (e.g., ME%28R%29 -> ME(R))
1059
- template_label = unquote(template_label)
1060
- template_short_form = template_anatomy.get('short_form', '')
2967
+ # Process all results
2968
+ for doc in results.docs:
2969
+ if solr_field not in doc:
2970
+ continue
2971
+
2972
+ # Parse the SOLR field JSON string
2973
+ field_data_str = doc[solr_field][0]
2974
+ field_data = json.loads(field_data_str)
1061
2975
 
1062
- # Prefer symbol over label for anatomy (matching Neo4j behavior)
1063
- anatomy_label = anatomy.get('label', '')
1064
- if anatomy.get('symbol') and len(anatomy.get('symbol', '')) > 0:
1065
- anatomy_label = anatomy.get('symbol')
1066
- # Decode URL-encoded strings from SOLR (e.g., ME%28R%29 -> ME(R))
1067
- anatomy_label = unquote(anatomy_label)
1068
- anatomy_short_form = anatomy.get('short_form', '')
2976
+ # Extract core term information
2977
+ term_core = field_data.get('term', {}).get('core', {})
2978
+ class_short_form = term_core.get('short_form', '')
1069
2979
 
1070
- if template_label and anatomy_label:
1071
- # Create thumbnail markdown link matching the original format
1072
- # DO NOT encode brackets in alt text - that's done later by encode_markdown_links
1073
- alt_text = f"{anatomy_label} aligned to {template_label}"
1074
- link_target = f"{template_short_form},{anatomy_short_form}"
1075
- thumbnail = f"[![{alt_text}]({thumbnail_url} '{alt_text}')]({link_target})"
1076
-
1077
- # Format template information
1078
- template_formatted = ''
1079
- if template_anatomy:
1080
- # Prefer symbol over label (matching Neo4j behavior)
1081
- template_label = template_anatomy.get('label', '')
1082
- if template_anatomy.get('symbol') and len(template_anatomy.get('symbol', '')) > 0:
1083
- template_label = template_anatomy.get('symbol')
1084
- # Decode URL-encoded strings from SOLR (e.g., ME%28R%29 -> ME(R))
1085
- template_label = unquote(template_label)
1086
- template_short_form = template_anatomy.get('short_form', '')
1087
- if template_label and template_short_form:
1088
- template_formatted = f"[{template_label}]({template_short_form})"
1089
-
1090
- # Handle label formatting (match Neo4j format - prefer symbol over label)
1091
- anatomy_label = anatomy.get('label', 'Unknown')
1092
- if anatomy.get('symbol') and len(anatomy.get('symbol', '')) > 0:
1093
- anatomy_label = anatomy.get('symbol')
1094
- # Decode URL-encoded strings from SOLR (e.g., ME%28R%29 -> ME(R))
1095
- anatomy_label = unquote(anatomy_label)
1096
- anatomy_short_form = anatomy.get('short_form', '')
1097
-
1098
- row = {
1099
- 'id': anatomy_short_form,
1100
- 'label': f"[{anatomy_label}]({anatomy_short_form})",
1101
- 'tags': tags,
1102
- 'parent': f"[{term_info.get('term', {}).get('core', {}).get('label', 'Unknown')}]({short_form})",
1103
- 'source': '', # Not readily available in SOLR anatomy_channel_image
1104
- 'source_id': '',
1105
- 'template': template_formatted,
1106
- 'dataset': '', # Not readily available in SOLR anatomy_channel_image
1107
- 'license': '',
1108
- 'thumbnail': thumbnail
1109
- }
1110
- rows.append(row)
1111
-
1112
- # Sort by ID to match expected ordering (Neo4j uses "ORDER BY id Desc")
1113
- rows.sort(key=lambda x: x['id'], reverse=True)
1114
-
1115
- total_count = len(anatomy_images)
2980
+ # Extract label (prefer symbol over label)
2981
+ label_text = term_core.get('label', 'Unknown')
2982
+ if term_core.get('symbol') and len(term_core.get('symbol', '')) > 0:
2983
+ label_text = term_core.get('symbol')
2984
+ label_text = unquote(label_text)
2985
+
2986
+ # Extract tags from unique_facets
2987
+ tags = '|'.join(term_core.get('unique_facets', []))
2988
+
2989
+ # Extract thumbnail from anatomy_channel_image if available
2990
+ thumbnail = ''
2991
+ anatomy_images = field_data.get('anatomy_channel_image', [])
2992
+ if anatomy_images and len(anatomy_images) > 0:
2993
+ first_img = anatomy_images[0]
2994
+ channel_image = first_img.get('channel_image', {})
2995
+ image_info = channel_image.get('image', {})
2996
+ thumbnail_url = image_info.get('image_thumbnail', '')
2997
+
2998
+ if thumbnail_url:
2999
+ # Convert to HTTPS and use non-transparent version
3000
+ thumbnail_url = thumbnail_url.replace('http://', 'https://').replace('thumbnailT.png', 'thumbnail.png')
3001
+
3002
+ # Format thumbnail with proper markdown link (matching Neo4j behavior)
3003
+ template_anatomy = image_info.get('template_anatomy', {})
3004
+ if template_anatomy:
3005
+ template_label = template_anatomy.get('symbol') or template_anatomy.get('label', '')
3006
+ template_label = unquote(template_label)
3007
+ anatomy_label = first_img.get('anatomy', {}).get('label', label_text)
3008
+ anatomy_label = unquote(anatomy_label)
3009
+ alt_text = f"{anatomy_label} aligned to {template_label}"
3010
+ thumbnail = f"[![{alt_text}]({thumbnail_url} '{alt_text}')]({class_short_form})"
3011
+
3012
+ # Build row
3013
+ row = {
3014
+ 'id': class_short_form,
3015
+ 'label': f"[{label_text}]({class_short_form})",
3016
+ 'tags': tags,
3017
+ 'thumbnail': thumbnail
3018
+ }
3019
+
3020
+ # Optionally add source information
3021
+ if include_source:
3022
+ source = ''
3023
+ source_id = ''
3024
+ xrefs = field_data.get('xrefs', [])
3025
+ if xrefs and len(xrefs) > 0:
3026
+ for xref in xrefs:
3027
+ if xref.get('is_data_source', False):
3028
+ site_info = xref.get('site', {})
3029
+ site_label = site_info.get('symbol') or site_info.get('label', '')
3030
+ site_short_form = site_info.get('short_form', '')
3031
+ if site_label and site_short_form:
3032
+ source = f"[{site_label}]({site_short_form})"
3033
+
3034
+ accession = xref.get('accession', '')
3035
+ link_base = xref.get('link_base', '')
3036
+ if accession and link_base:
3037
+ source_id = f"[{accession}]({link_base}{accession})"
3038
+ break
3039
+ row['source'] = source
3040
+ row['source_id'] = source_id
3041
+
3042
+ rows.append(row)
3043
+
3044
+ except Exception as e:
3045
+ print(f"Error fetching SOLR data: {e}")
3046
+ import traceback
3047
+ traceback.print_exc()
1116
3048
 
3049
+ # Convert to DataFrame if requested
1117
3050
  if return_dataframe:
1118
3051
  df = pd.DataFrame(rows)
1119
- # Apply encoding to markdown links (matches Neo4j implementation)
1120
- columns_to_encode = ['label', 'parent', 'source', 'source_id', 'template', 'dataset', 'license', 'thumbnail']
3052
+ # Apply markdown encoding
3053
+ columns_to_encode = ['label', 'thumbnail']
1121
3054
  df = encode_markdown_links(df, columns_to_encode)
1122
3055
  return df
1123
3056
 
3057
+ # Return formatted dict
1124
3058
  return {
1125
- "headers": _get_instances_headers(),
3059
+ "headers": _get_standard_query_headers(),
1126
3060
  "rows": rows,
1127
3061
  "count": total_count
1128
3062
  }
1129
3063
 
1130
- except Exception as e:
1131
- print(f"Error in SOLR fallback for get_instances: {e}")
1132
- # Return empty results with proper structure
3064
+ except Exception as e:
3065
+ # Construct the Owlery URL for debugging failed queries
3066
+ owlery_base = "http://owl.virtualflybrain.org/kbs/vfb"
3067
+ try:
3068
+ if hasattr(vc.vfb, 'oc') and hasattr(vc.vfb.oc, 'owlery_endpoint'):
3069
+ owlery_base = vc.vfb.oc.owlery_endpoint.rstrip('/')
3070
+ except Exception:
3071
+ pass
3072
+
3073
+ from urllib.parse import urlencode
3074
+
3075
+ # Build the full URL with all parameters exactly as the request would be made
3076
+ params = {
3077
+ 'object': owl_query_string,
3078
+ 'direct': 'true' if query_instances else 'false', # instances use direct=true, subclasses use direct=false
3079
+ 'includeDeprecated': 'false'
3080
+ }
3081
+
3082
+ # For subclasses queries, add includeEquivalent parameter
3083
+ if not query_instances:
3084
+ params['includeEquivalent'] = 'true'
3085
+
3086
+ endpoint = "/instances" if query_instances else "/subclasses"
3087
+ owlery_url = f"{owlery_base}{endpoint}?{urlencode(params)}"
3088
+
3089
+ import sys
3090
+ print(f"ERROR: Owlery {'instances' if query_instances else 'subclasses'} query failed: {e}", file=sys.stderr)
3091
+ print(f" Full URL: {owlery_url}", file=sys.stderr)
3092
+ print(f" Query string: {owl_query_string}", file=sys.stderr)
3093
+ import traceback
3094
+ traceback.print_exc()
3095
+ # Return error indication with count=-1
1133
3096
  if return_dataframe:
1134
3097
  return pd.DataFrame()
1135
3098
  return {
1136
- "headers": _get_instances_headers(),
3099
+ "headers": _get_standard_query_headers(),
1137
3100
  "rows": [],
1138
- "count": 0
3101
+ "count": -1
1139
3102
  }
1140
3103
 
1141
- def _get_instances_headers():
1142
- """Return standard headers for get_instances results"""
1143
- return {
1144
- "id": {"title": "Add", "type": "selection_id", "order": -1},
1145
- "label": {"title": "Name", "type": "markdown", "order": 0, "sort": {0: "Asc"}},
1146
- "parent": {"title": "Parent Type", "type": "markdown", "order": 1},
1147
- "template": {"title": "Template", "type": "markdown", "order": 4},
1148
- "tags": {"title": "Gross Types", "type": "tags", "order": 3},
1149
- "source": {"title": "Data Source", "type": "markdown", "order": 5},
1150
- "source_id": {"title": "Data Source", "type": "markdown", "order": 6},
1151
- "dataset": {"title": "Dataset", "type": "markdown", "order": 7},
1152
- "license": {"title": "License", "type": "markdown", "order": 8},
1153
- "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}
1154
- }
1155
3104
 
1156
- # Convert the results to a DataFrame
3105
+ def get_anatomy_scrnaseq(anatomy_short_form: str, return_dataframe=True, limit: int = -1):
3106
+ """
3107
+ Retrieve single cell RNA-seq data (clusters and datasets) for the specified anatomical region.
3108
+
3109
+ This implements the anatScRNAseqQuery from the VFB XMI specification.
3110
+ Returns clusters that are composed primarily of the anatomy, along with their parent datasets and publications.
3111
+
3112
+ XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
3113
+ Query: anat_scRNAseq_query
3114
+
3115
+ :param anatomy_short_form: Short form identifier of the anatomical region (e.g., 'FBbt_00003982')
3116
+ :param return_dataframe: Returns pandas DataFrame if True, otherwise returns formatted dict (default: True)
3117
+ :param limit: Maximum number of results to return (default: -1 for all results)
3118
+ :return: scRNAseq clusters and datasets for this anatomy
3119
+ :rtype: pandas.DataFrame or dict
3120
+ """
3121
+
3122
+ # Count query
3123
+ count_query = f"""
3124
+ MATCH (primary:Class:Anatomy)
3125
+ WHERE primary.short_form = '{anatomy_short_form}'
3126
+ WITH primary
3127
+ MATCH (primary)<-[:composed_primarily_of]-(c:Cluster)-[:has_source]->(ds:scRNAseq_DataSet)
3128
+ RETURN COUNT(c) AS total_count
3129
+ """
3130
+
3131
+ count_results = vc.nc.commit_list([count_query])
3132
+ count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
3133
+ total_count = count_df['total_count'][0] if not count_df.empty else 0
3134
+
3135
+ # Main query: get clusters with dataset and publication info
3136
+ main_query = f"""
3137
+ MATCH (primary:Class:Anatomy)
3138
+ WHERE primary.short_form = '{anatomy_short_form}'
3139
+ WITH primary
3140
+ MATCH (primary)<-[:composed_primarily_of]-(c:Cluster)-[:has_source]->(ds:scRNAseq_DataSet)
3141
+ OPTIONAL MATCH (ds)-[:has_reference]->(p:pub)
3142
+ WITH {{
3143
+ short_form: c.short_form,
3144
+ label: coalesce(c.label,''),
3145
+ iri: c.iri,
3146
+ types: labels(c),
3147
+ unique_facets: apoc.coll.sort(coalesce(c.uniqueFacets, [])),
3148
+ symbol: coalesce(([]+c.symbol)[0], '')
3149
+ }} AS cluster,
3150
+ {{
3151
+ short_form: ds.short_form,
3152
+ label: coalesce(ds.label,''),
3153
+ iri: ds.iri,
3154
+ types: labels(ds),
3155
+ unique_facets: apoc.coll.sort(coalesce(ds.uniqueFacets, [])),
3156
+ symbol: coalesce(([]+ds.symbol)[0], '')
3157
+ }} AS dataset,
3158
+ COLLECT({{
3159
+ core: {{
3160
+ short_form: p.short_form,
3161
+ label: coalesce(p.label,''),
3162
+ iri: p.iri,
3163
+ types: labels(p),
3164
+ unique_facets: apoc.coll.sort(coalesce(p.uniqueFacets, [])),
3165
+ symbol: coalesce(([]+p.symbol)[0], '')
3166
+ }},
3167
+ PubMed: coalesce(([]+p.PMID)[0], ''),
3168
+ FlyBase: coalesce(([]+p.FlyBase)[0], ''),
3169
+ DOI: coalesce(([]+p.DOI)[0], '')
3170
+ }}) AS pubs,
3171
+ primary
3172
+ RETURN
3173
+ cluster.short_form AS id,
3174
+ apoc.text.format("[%s](%s)", [cluster.label, cluster.short_form]) AS name,
3175
+ apoc.text.join(cluster.unique_facets, '|') AS tags,
3176
+ dataset,
3177
+ pubs
3178
+ ORDER BY cluster.label
3179
+ """
3180
+
3181
+ if limit != -1:
3182
+ main_query += f" LIMIT {limit}"
3183
+
3184
+ # Execute the query
3185
+ results = vc.nc.commit_list([main_query])
1157
3186
  df = pd.DataFrame.from_records(get_dict_cursor()(results))
1158
-
1159
- columns_to_encode = ['label', 'parent', 'source', 'source_id', 'template', 'dataset', 'license', 'thumbnail']
1160
- df = encode_markdown_links(df, columns_to_encode)
3187
+
3188
+ # Encode markdown links
3189
+ if not df.empty:
3190
+ columns_to_encode = ['name']
3191
+ df = encode_markdown_links(df, columns_to_encode)
1161
3192
 
1162
3193
  if return_dataframe:
1163
3194
  return df
3195
+ else:
3196
+ formatted_results = {
3197
+ "headers": {
3198
+ "id": {"title": "ID", "type": "selection_id", "order": -1},
3199
+ "name": {"title": "Cluster", "type": "markdown", "order": 0},
3200
+ "tags": {"title": "Tags", "type": "tags", "order": 1},
3201
+ "dataset": {"title": "Dataset", "type": "metadata", "order": 2},
3202
+ "pubs": {"title": "Publications", "type": "metadata", "order": 3}
3203
+ },
3204
+ "rows": [
3205
+ {key: row[key] for key in ["id", "name", "tags", "dataset", "pubs"]}
3206
+ for row in safe_to_dict(df, sort_by_id=False)
3207
+ ],
3208
+ "count": total_count
3209
+ }
3210
+ return formatted_results
1164
3211
 
1165
- # Format the results
1166
- formatted_results = {
1167
- "headers": {
1168
- "id": {"title": "Add", "type": "selection_id", "order": -1},
1169
- "label": {"title": "Name", "type": "markdown", "order": 0, "sort": {0: "Asc"}},
1170
- "parent": {"title": "Parent Type", "type": "markdown", "order": 1},
1171
- "template": {"title": "Template", "type": "markdown", "order": 4},
1172
- "tags": {"title": "Gross Types", "type": "tags", "order": 3},
1173
- "source": {"title": "Data Source", "type": "markdown", "order": 5},
1174
- "source_id": {"title": "Data Source", "type": "markdown", "order": 6},
1175
- "dataset": {"title": "Dataset", "type": "markdown", "order": 7},
1176
- "license": {"title": "License", "type": "markdown", "order": 8},
1177
- "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}
1178
- },
1179
- "rows": [
1180
- {
1181
- key: row[key]
1182
- for key in [
1183
- "id",
1184
- "label",
1185
- "tags",
1186
- "parent",
1187
- "source",
1188
- "source_id",
1189
- "template",
1190
- "dataset",
1191
- "license",
1192
- "thumbnail"
1193
- ]
1194
- }
1195
- for row in safe_to_dict(df)
1196
- ],
1197
- "count": total_count
1198
- }
1199
-
1200
- return formatted_results
1201
-
1202
- def get_templates(limit: int = -1, return_dataframe: bool = False):
1203
- """Get list of templates
1204
-
1205
- :param limit: maximum number of results to return (default -1, returns all results)
1206
- :param return_dataframe: Returns pandas dataframe if true, otherwise returns list of dicts.
1207
- :return: list of templates (id, label, tags, source (db) id, accession_in_source) + similarity score.
1208
- :rtype: pandas.DataFrame or list of dicts
1209
3212
 
3213
+ def get_cluster_expression(cluster_short_form: str, return_dataframe=True, limit: int = -1):
1210
3214
  """
1211
- count_query = """MATCH (t:Template)<-[:depicts]-(tc:Template)-[r:in_register_with]->(tc:Template)
1212
- RETURN COUNT(DISTINCT t) AS total_count"""
1213
-
3215
+ Retrieve genes expressed in the specified cluster.
3216
+
3217
+ This implements the clusterExpression query from the VFB XMI specification.
3218
+ Returns genes with expression levels and extents for a given cluster.
3219
+
3220
+ XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
3221
+ Query: cluster_expression_query
3222
+
3223
+ :param cluster_short_form: Short form identifier of the cluster (e.g., 'VFB_00101234')
3224
+ :param return_dataframe: Returns pandas DataFrame if True, otherwise returns formatted dict (default: True)
3225
+ :param limit: Maximum number of results to return (default: -1 for all results)
3226
+ :return: Genes expressed in this cluster with expression data
3227
+ :rtype: pandas.DataFrame or dict
3228
+ """
3229
+
3230
+ # Count query
3231
+ count_query = f"""
3232
+ MATCH (primary:Individual:Cluster)
3233
+ WHERE primary.short_form = '{cluster_short_form}'
3234
+ WITH primary
3235
+ MATCH (primary)-[e:expresses]->(g:Gene:Class)
3236
+ RETURN COUNT(g) AS total_count
3237
+ """
3238
+
1214
3239
  count_results = vc.nc.commit_list([count_query])
1215
3240
  count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
1216
3241
  total_count = count_df['total_count'][0] if not count_df.empty else 0
1217
-
1218
- # Define the main Cypher query
1219
- query = f"""
1220
- MATCH (t:Template)-[:INSTANCEOF]->(p:Class),
1221
- (t)<-[:depicts]-(tc:Template)-[r:in_register_with]->(tc:Template),
1222
- (t)-[:has_source]->(ds:DataSet)-[:has_license]->(lic:License)
1223
- RETURN t.short_form as id,
1224
- apoc.text.format("[%s](%s)",[COALESCE(t.symbol[0],t.label),t.short_form]) AS name,
1225
- apoc.text.join(t.uniqueFacets, '|') AS tags,
1226
- apoc.text.format("[%s](%s)",[COALESCE(ds.symbol[0],ds.label),ds.short_form]) AS dataset,
1227
- REPLACE(apoc.text.format("[%s](%s)",[COALESCE(lic.symbol[0],lic.label),lic.short_form]), '[null](null)', '') AS license,
1228
- REPLACE(apoc.text.format("[![%s](%s '%s')](%s)",[COALESCE(t.symbol[0],t.label), REPLACE(COALESCE(r.thumbnail[0],""),"thumbnailT.png","thumbnail.png"), COALESCE(t.symbol[0],t.label), t.short_form]), "[![null]( 'null')](null)", "") as thumbnail,
1229
- 99 as order
1230
- ORDER BY id Desc
3242
+
3243
+ # Main query: get genes with expression levels
3244
+ main_query = f"""
3245
+ MATCH (primary:Individual:Cluster)
3246
+ WHERE primary.short_form = '{cluster_short_form}'
3247
+ WITH primary
3248
+ MATCH (primary)-[e:expresses]->(g:Gene:Class)
3249
+ WITH coalesce(e.expression_level_padded[0], e.expression_level[0]) as expression_level,
3250
+ e.expression_extent[0] as expression_extent,
3251
+ {{
3252
+ short_form: g.short_form,
3253
+ label: coalesce(g.label,''),
3254
+ iri: g.iri,
3255
+ types: labels(g),
3256
+ unique_facets: apoc.coll.sort(coalesce(g.uniqueFacets, [])),
3257
+ symbol: coalesce(([]+g.symbol)[0], '')
3258
+ }} AS gene,
3259
+ primary
3260
+ MATCH (a:Anatomy)<-[:composed_primarily_of]-(primary)
3261
+ WITH {{
3262
+ short_form: a.short_form,
3263
+ label: coalesce(a.label,''),
3264
+ iri: a.iri,
3265
+ types: labels(a),
3266
+ unique_facets: apoc.coll.sort(coalesce(a.uniqueFacets, [])),
3267
+ symbol: coalesce(([]+a.symbol)[0], '')
3268
+ }} AS anatomy, primary, expression_level, expression_extent, gene
3269
+ RETURN
3270
+ gene.short_form AS id,
3271
+ apoc.text.format("[%s](%s)", [gene.symbol, gene.short_form]) AS name,
3272
+ apoc.text.join(gene.unique_facets, '|') AS tags,
3273
+ expression_level,
3274
+ expression_extent,
3275
+ anatomy
3276
+ ORDER BY expression_level DESC, gene.symbol
1231
3277
  """
1232
-
3278
+
1233
3279
  if limit != -1:
1234
- query += f" LIMIT {limit}"
1235
-
1236
- # Run the query using VFB_connect
1237
- results = vc.nc.commit_list([query])
1238
-
1239
- # Convert the results to a DataFrame
3280
+ main_query += f" LIMIT {limit}"
3281
+
3282
+ # Execute the query
3283
+ results = vc.nc.commit_list([main_query])
1240
3284
  df = pd.DataFrame.from_records(get_dict_cursor()(results))
1241
-
1242
- columns_to_encode = ['name', 'dataset', 'license', 'thumbnail']
1243
- df = encode_markdown_links(df, columns_to_encode)
1244
-
1245
- template_order = ["VFB_00101567","VFB_00200000","VFB_00017894","VFB_00101384","VFB_00050000","VFB_00049000","VFB_00100000","VFB_00030786","VFB_00110000","VFB_00120000"]
1246
-
1247
- order = 1
1248
-
1249
- for template in template_order:
1250
- df.loc[df['id'] == template, 'order'] = order
1251
- order += 1
1252
-
1253
- # Sort the DataFrame by 'order'
1254
- df = df.sort_values('order')
1255
-
3285
+
3286
+ # Encode markdown links
3287
+ if not df.empty:
3288
+ columns_to_encode = ['name']
3289
+ df = encode_markdown_links(df, columns_to_encode)
3290
+
1256
3291
  if return_dataframe:
1257
3292
  return df
1258
-
1259
- # Format the results
1260
- formatted_results = {
1261
- "headers": {
1262
- "id": {"title": "Add", "type": "selection_id", "order": -1},
1263
- "order": {"title": "Order", "type": "numeric", "order": 1, "sort": {0: "Asc"}},
1264
- "name": {"title": "Name", "type": "markdown", "order": 1, "sort": {1: "Asc"}},
1265
- "tags": {"title": "Tags", "type": "tags", "order": 2},
1266
- "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9},
1267
- "dataset": {"title": "Dataset", "type": "metadata", "order": 3},
1268
- "license": {"title": "License", "type": "metadata", "order": 4}
3293
+ else:
3294
+ formatted_results = {
3295
+ "headers": {
3296
+ "id": {"title": "ID", "type": "selection_id", "order": -1},
3297
+ "name": {"title": "Gene", "type": "markdown", "order": 0},
3298
+ "tags": {"title": "Tags", "type": "tags", "order": 1},
3299
+ "expression_level": {"title": "Expression Level", "type": "numeric", "order": 2},
3300
+ "expression_extent": {"title": "Expression Extent", "type": "numeric", "order": 3},
3301
+ "anatomy": {"title": "Anatomy", "type": "metadata", "order": 4}
1269
3302
  },
1270
3303
  "rows": [
1271
- {
1272
- key: row[key]
1273
- for key in [
1274
- "id",
1275
- "order",
1276
- "name",
1277
- "tags",
1278
- "thumbnail",
1279
- "dataset",
1280
- "license"
1281
- ]
1282
- }
1283
- for row in safe_to_dict(df)
3304
+ {key: row[key] for key in ["id", "name", "tags", "expression_level", "expression_extent", "anatomy"]}
3305
+ for row in safe_to_dict(df, sort_by_id=False)
1284
3306
  ],
1285
3307
  "count": total_count
1286
3308
  }
1287
- return formatted_results
3309
+ return formatted_results
1288
3310
 
1289
- def get_related_anatomy(template_short_form: str, limit: int = -1, return_dataframe: bool = False):
1290
- """
1291
- Retrieve related anatomical structures for a given template.
1292
3311
 
1293
- :param template_short_form: The short form of the template to query.
1294
- :param limit: Maximum number of results to return. Default is -1, which returns all results.
1295
- :param return_dataframe: If True, returns results as a pandas DataFrame. Otherwise, returns a list of dicts.
1296
- :return: Related anatomical structures and paths.
3312
+ def get_expression_cluster(gene_short_form: str, return_dataframe=True, limit: int = -1):
1297
3313
  """
1298
-
1299
- # Define the Cypher query
1300
- query = f"""
1301
- MATCH (root:Class)<-[:INSTANCEOF]-(t:Template {{short_form:'{template_short_form}'}})<-[:depicts]-(tc:Template)<-[ie:in_register_with]-(c:Individual)-[:depicts]->(image:Individual)-[r:INSTANCEOF]->(anat:Class:Anatomy)
1302
- WHERE exists(ie.index)
1303
- WITH root, anat,r,image
1304
- MATCH p=allshortestpaths((root)<-[:SUBCLASSOF|part_of*..50]-(anat))
1305
- UNWIND nodes(p) as n
1306
- UNWIND nodes(p) as m
1307
- WITH * WHERE id(n) < id(m)
1308
- MATCH path = allShortestPaths( (n)-[:SUBCLASSOF|part_of*..1]-(m) )
1309
- RETURN collect(distinct {{ node_id: id(anat), short_form: anat.short_form, image: image.short_form }}) AS image_nodes, id(root) AS root, collect(path)
3314
+ Retrieve scRNAseq clusters expressing the specified gene.
3315
+
3316
+ This implements the expressionCluster query from the VFB XMI specification.
3317
+ Returns clusters that express a given gene with expression levels and anatomy info.
3318
+
3319
+ XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
3320
+ Query: expression_cluster_query
3321
+
3322
+ :param gene_short_form: Short form identifier of the gene (e.g., 'FBgn_00001234')
3323
+ :param return_dataframe: Returns pandas DataFrame if True, otherwise returns formatted dict (default: True)
3324
+ :param limit: Maximum number of results to return (default: -1 for all results)
3325
+ :return: Clusters expressing this gene with expression data
3326
+ :rtype: pandas.DataFrame or dict
1310
3327
  """
1311
-
3328
+
3329
+ # Count query
3330
+ count_query = f"""
3331
+ MATCH (primary:Individual:Cluster)-[e:expresses]->(g:Gene:Class)
3332
+ WHERE g.short_form = '{gene_short_form}'
3333
+ RETURN COUNT(primary) AS total_count
3334
+ """
3335
+
3336
+ count_results = vc.nc.commit_list([count_query])
3337
+ count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
3338
+ total_count = count_df['total_count'][0] if not count_df.empty else 0
3339
+
3340
+ # Main query: get clusters with expression levels
3341
+ main_query = f"""
3342
+ MATCH (primary:Individual:Cluster)-[e:expresses]->(g:Gene:Class)
3343
+ WHERE g.short_form = '{gene_short_form}'
3344
+ WITH e.expression_level[0] as expression_level,
3345
+ e.expression_extent[0] as expression_extent,
3346
+ {{
3347
+ short_form: g.short_form,
3348
+ label: coalesce(g.label,''),
3349
+ iri: g.iri,
3350
+ types: labels(g),
3351
+ unique_facets: apoc.coll.sort(coalesce(g.uniqueFacets, [])),
3352
+ symbol: coalesce(([]+g.symbol)[0], '')
3353
+ }} AS gene,
3354
+ primary
3355
+ MATCH (a:Anatomy)<-[:composed_primarily_of]-(primary)
3356
+ WITH {{
3357
+ short_form: a.short_form,
3358
+ label: coalesce(a.label,''),
3359
+ iri: a.iri,
3360
+ types: labels(a),
3361
+ unique_facets: apoc.coll.sort(coalesce(a.uniqueFacets, [])),
3362
+ symbol: coalesce(([]+a.symbol)[0], '')
3363
+ }} AS anatomy, primary, expression_level, expression_extent, gene
3364
+ RETURN
3365
+ primary.short_form AS id,
3366
+ apoc.text.format("[%s](%s)", [primary.label, primary.short_form]) AS name,
3367
+ apoc.text.join(coalesce(primary.uniqueFacets, []), '|') AS tags,
3368
+ expression_level,
3369
+ expression_extent,
3370
+ anatomy
3371
+ ORDER BY expression_level DESC, primary.label
3372
+ """
3373
+
1312
3374
  if limit != -1:
1313
- query += f" LIMIT {limit}"
1314
-
1315
- # Execute the query using your database connection (e.g., VFB_connect)
1316
- results = vc.nc.commit_list([query])
1317
-
1318
- # Convert the results to a DataFrame (if needed)
3375
+ main_query += f" LIMIT {limit}"
3376
+
3377
+ # Execute the query
3378
+ results = vc.nc.commit_list([main_query])
3379
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
3380
+
3381
+ # Encode markdown links
3382
+ if not df.empty:
3383
+ columns_to_encode = ['name']
3384
+ df = encode_markdown_links(df, columns_to_encode)
3385
+
1319
3386
  if return_dataframe:
1320
- df = pd.DataFrame.from_records(results)
1321
3387
  return df
3388
+ else:
3389
+ formatted_results = {
3390
+ "headers": {
3391
+ "id": {"title": "ID", "type": "selection_id", "order": -1},
3392
+ "name": {"title": "Cluster", "type": "markdown", "order": 0},
3393
+ "tags": {"title": "Tags", "type": "tags", "order": 1},
3394
+ "expression_level": {"title": "Expression Level", "type": "numeric", "order": 2},
3395
+ "expression_extent": {"title": "Expression Extent", "type": "numeric", "order": 3},
3396
+ "anatomy": {"title": "Anatomy", "type": "metadata", "order": 4}
3397
+ },
3398
+ "rows": [
3399
+ {key: row[key] for key in ["id", "name", "tags", "expression_level", "expression_extent", "anatomy"]}
3400
+ for row in safe_to_dict(df, sort_by_id=False)
3401
+ ],
3402
+ "count": total_count
3403
+ }
3404
+ return formatted_results
1322
3405
 
1323
- # Otherwise, return the raw results
1324
- return results
1325
-
1326
- def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_dataframe=True, limit: int = -1):
1327
- """Get JSON report of individual neurons similar to input neuron
1328
-
1329
- :param neuron:
1330
- :param similarity_score: Optionally specify similarity score to chose
1331
- :param return_dataframe: Returns pandas dataframe if true, otherwise returns list of dicts.
1332
- :param limit: maximum number of results to return (default -1, returns all results)
1333
- :return: list of similar neurons (id, label, tags, source (db) id, accession_in_source) + similarity score.
1334
- :rtype: pandas.DataFrame or list of dicts
1335
3406
 
3407
+ def get_scrnaseq_dataset_data(dataset_short_form: str, return_dataframe=True, limit: int = -1):
1336
3408
  """
1337
- count_query = f"""MATCH (c1:Class)<-[:INSTANCEOF]-(n1)-[r:has_similar_morphology_to]-(n2)-[:INSTANCEOF]->(c2:Class)
1338
- WHERE n1.short_form = '{neuron}' and exists(r.{similarity_score})
1339
- RETURN COUNT(DISTINCT n2) AS total_count"""
1340
-
3409
+ Retrieve all clusters for a scRNAseq dataset.
3410
+
3411
+ This implements the scRNAdatasetData query from the VFB XMI specification.
3412
+ Returns all clusters in a dataset with anatomy info and publications.
3413
+
3414
+ XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
3415
+ Query: dataset_scRNAseq_query
3416
+
3417
+ :param dataset_short_form: Short form identifier of the dataset (e.g., 'VFB_00101234')
3418
+ :param return_dataframe: Returns pandas DataFrame if True, otherwise returns formatted dict (default: True)
3419
+ :param limit: Maximum number of results to return (default: -1 for all results)
3420
+ :return: Clusters in this dataset with anatomy and publication data
3421
+ :rtype: pandas.DataFrame or dict
3422
+ """
3423
+
3424
+ # Count query
3425
+ count_query = f"""
3426
+ MATCH (c:Individual)-[:has_source]->(ds:scRNAseq_DataSet)
3427
+ WHERE ds.short_form = '{dataset_short_form}'
3428
+ RETURN COUNT(c) AS total_count
3429
+ """
3430
+
1341
3431
  count_results = vc.nc.commit_list([count_query])
1342
3432
  count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
1343
3433
  total_count = count_df['total_count'][0] if not count_df.empty else 0
3434
+
3435
+ # Main query: get clusters with anatomy and publications
3436
+ main_query = f"""
3437
+ MATCH (c:Individual:Cluster)-[:has_source]->(ds:scRNAseq_DataSet)
3438
+ WHERE ds.short_form = '{dataset_short_form}'
3439
+ MATCH (a:Class:Anatomy)<-[:composed_primarily_of]-(c)
3440
+ WITH *, {{
3441
+ short_form: a.short_form,
3442
+ label: coalesce(a.label,''),
3443
+ iri: a.iri,
3444
+ types: labels(a),
3445
+ unique_facets: apoc.coll.sort(coalesce(a.uniqueFacets, [])),
3446
+ symbol: coalesce(([]+a.symbol)[0], '')
3447
+ }} AS anatomy
3448
+ OPTIONAL MATCH (ds)-[:has_reference]->(p:pub)
3449
+ WITH COLLECT({{
3450
+ core: {{
3451
+ short_form: p.short_form,
3452
+ label: coalesce(p.label,''),
3453
+ iri: p.iri,
3454
+ types: labels(p),
3455
+ unique_facets: apoc.coll.sort(coalesce(p.uniqueFacets, [])),
3456
+ symbol: coalesce(([]+p.symbol)[0], '')
3457
+ }},
3458
+ PubMed: coalesce(([]+p.PMID)[0], ''),
3459
+ FlyBase: coalesce(([]+p.FlyBase)[0], ''),
3460
+ DOI: coalesce(([]+p.DOI)[0], '')
3461
+ }}) AS pubs, c, anatomy
3462
+ RETURN
3463
+ c.short_form AS id,
3464
+ apoc.text.format("[%s](%s)", [c.label, c.short_form]) AS name,
3465
+ apoc.text.join(coalesce(c.uniqueFacets, []), '|') AS tags,
3466
+ anatomy,
3467
+ pubs
3468
+ ORDER BY c.label
3469
+ """
3470
+
3471
+ if limit != -1:
3472
+ main_query += f" LIMIT {limit}"
3473
+
3474
+ # Execute the query
3475
+ results = vc.nc.commit_list([main_query])
3476
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
3477
+
3478
+ # Encode markdown links
3479
+ if not df.empty:
3480
+ columns_to_encode = ['name']
3481
+ df = encode_markdown_links(df, columns_to_encode)
3482
+
3483
+ if return_dataframe:
3484
+ return df
3485
+ else:
3486
+ formatted_results = {
3487
+ "headers": {
3488
+ "id": {"title": "ID", "type": "selection_id", "order": -1},
3489
+ "name": {"title": "Cluster", "type": "markdown", "order": 0},
3490
+ "tags": {"title": "Tags", "type": "tags", "order": 1},
3491
+ "anatomy": {"title": "Anatomy", "type": "metadata", "order": 2},
3492
+ "pubs": {"title": "Publications", "type": "metadata", "order": 3}
3493
+ },
3494
+ "rows": [
3495
+ {key: row[key] for key in ["id", "name", "tags", "anatomy", "pubs"]}
3496
+ for row in safe_to_dict(df, sort_by_id=False)
3497
+ ],
3498
+ "count": total_count
3499
+ }
3500
+ return formatted_results
3501
+
1344
3502
 
1345
- main_query = f"""MATCH (c1:Class)<-[:INSTANCEOF]-(n1)-[r:has_similar_morphology_to]-(n2)-[:INSTANCEOF]->(c2:Class)
1346
- WHERE n1.short_form = '{neuron}' and exists(r.{similarity_score})
1347
- WITH c1, n1, r, n2, c2
1348
- OPTIONAL MATCH (n2)-[rx:database_cross_reference]->(site:Site)
1349
- WHERE site.is_data_source
1350
- WITH n2, r, c2, rx, site
1351
- OPTIONAL MATCH (n2)<-[:depicts]-(:Individual)-[ri:in_register_with]->(:Template)-[:depicts]->(templ:Template)
1352
- RETURN DISTINCT n2.short_form as id,
1353
- apoc.text.format("[%s](%s)", [n2.label, n2.short_form]) AS name,
1354
- r.{similarity_score}[0] AS score,
1355
- apoc.text.join(n2.uniqueFacets, '|') AS tags,
1356
- REPLACE(apoc.text.format("[%s](%s)",[COALESCE(site.symbol[0],site.label),site.short_form]), '[null](null)', '') AS source,
1357
- REPLACE(apoc.text.format("[%s](%s)",[rx.accession[0], (site.link_base[0] + rx.accession[0])]), '[null](null)', '') AS source_id,
1358
- REPLACE(apoc.text.format("[![%s](%s '%s')](%s)",[COALESCE(n2.symbol[0],n2.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), REPLACE(COALESCE(ri.thumbnail[0],""),"thumbnailT.png","thumbnail.png"), COALESCE(n2.symbol[0],n2.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), templ.short_form + "," + n2.short_form]), "[![null]( 'null')](null)", "") as thumbnail
1359
- ORDER BY score DESC"""
1360
-
3503
+ # ===== NBLAST Similarity Queries =====
3504
+
3505
+ def get_similar_morphology(neuron_short_form: str, return_dataframe=True, limit: int = -1):
3506
+ """
3507
+ Retrieve neurons with similar morphology to the specified neuron using NBLAST.
3508
+
3509
+ This implements the SimilarMorphologyTo query from the VFB XMI specification.
3510
+ Returns neurons with NBLAST similarity scores.
3511
+
3512
+ XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
3513
+ Query: has_similar_morphology_to (NBLAST_anat_image_query)
3514
+
3515
+ :param neuron_short_form: Short form identifier of the neuron (e.g., 'VFB_00101234')
3516
+ :param return_dataframe: Returns pandas DataFrame if True, otherwise returns formatted dict (default: True)
3517
+ :param limit: Maximum number of results to return (default: -1 for all results)
3518
+ :return: Neurons with similar morphology and NBLAST scores
3519
+ :rtype: pandas.DataFrame or dict
3520
+ """
3521
+
3522
+ # Count query
3523
+ count_query = f"""
3524
+ MATCH (n:Individual)-[nblast:has_similar_morphology_to]-(primary:Individual)
3525
+ WHERE n.short_form = '{neuron_short_form}' AND EXISTS(nblast.NBLAST_score)
3526
+ RETURN count(primary) AS count
3527
+ """
3528
+
3529
+ # Get total count
3530
+ count_results = vc.nc.commit_list([count_query])
3531
+ total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
3532
+
3533
+ # Main query
3534
+ main_query = f"""
3535
+ MATCH (n:Individual)-[nblast:has_similar_morphology_to]-(primary:Individual)
3536
+ WHERE n.short_form = '{neuron_short_form}' AND EXISTS(nblast.NBLAST_score)
3537
+ WITH primary, nblast
3538
+ OPTIONAL MATCH (primary)<-[:depicts]-(channel:Individual)-[irw:in_register_with]->(template:Individual)-[:depicts]->(template_anat:Individual)
3539
+ WITH template, channel, template_anat, irw, primary, nblast
3540
+ OPTIONAL MATCH (channel)-[:is_specified_output_of]->(technique:Class)
3541
+ WITH CASE WHEN channel IS NULL THEN [] ELSE collect({{
3542
+ channel: {{
3543
+ short_form: channel.short_form,
3544
+ label: coalesce(channel.label, ''),
3545
+ iri: channel.iri,
3546
+ types: labels(channel),
3547
+ unique_facets: apoc.coll.sort(coalesce(channel.uniqueFacets, [])),
3548
+ symbol: coalesce(channel.symbol[0], '')
3549
+ }},
3550
+ imaging_technique: {{
3551
+ short_form: technique.short_form,
3552
+ label: coalesce(technique.label, ''),
3553
+ iri: technique.iri,
3554
+ types: labels(technique),
3555
+ unique_facets: apoc.coll.sort(coalesce(technique.uniqueFacets, [])),
3556
+ symbol: coalesce(technique.symbol[0], '')
3557
+ }},
3558
+ image: {{
3559
+ template_channel: {{
3560
+ short_form: template.short_form,
3561
+ label: coalesce(template.label, ''),
3562
+ iri: template.iri,
3563
+ types: labels(template),
3564
+ unique_facets: apoc.coll.sort(coalesce(template.uniqueFacets, [])),
3565
+ symbol: coalesce(template.symbol[0], '')
3566
+ }},
3567
+ template_anatomy: {{
3568
+ short_form: template_anat.short_form,
3569
+ label: coalesce(template_anat.label, ''),
3570
+ iri: template_anat.iri,
3571
+ types: labels(template_anat),
3572
+ symbol: coalesce(template_anat.symbol[0], '')
3573
+ }},
3574
+ image_folder: COALESCE(irw.folder[0], ''),
3575
+ index: coalesce(apoc.convert.toInteger(irw.index[0]), []) + []
3576
+ }}
3577
+ }}) END AS channel_image, primary, nblast
3578
+ OPTIONAL MATCH (primary)-[:INSTANCEOF]->(typ:Class)
3579
+ WITH CASE WHEN typ IS NULL THEN [] ELSE collect({{
3580
+ short_form: typ.short_form,
3581
+ label: coalesce(typ.label, ''),
3582
+ iri: typ.iri,
3583
+ types: labels(typ),
3584
+ symbol: coalesce(typ.symbol[0], '')
3585
+ }}) END AS types, primary, channel_image, nblast
3586
+ RETURN
3587
+ primary.short_form AS id,
3588
+ '[' + primary.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + primary.short_form + ')' AS name,
3589
+ apoc.text.join(coalesce(primary.uniqueFacets, []), '|') AS tags,
3590
+ nblast.NBLAST_score[0] AS score,
3591
+ types,
3592
+ channel_image
3593
+ ORDER BY score DESC
3594
+ """
3595
+
1361
3596
  if limit != -1:
1362
3597
  main_query += f" LIMIT {limit}"
1363
-
1364
- # Run the query using VFB_connect
3598
+
3599
+ # Execute the query
1365
3600
  results = vc.nc.commit_list([main_query])
1366
-
1367
- # Convert the results to a DataFrame
1368
3601
  df = pd.DataFrame.from_records(get_dict_cursor()(results))
1369
-
1370
- columns_to_encode = ['name', 'source', 'source_id', 'thumbnail']
1371
- df = encode_markdown_links(df, columns_to_encode)
3602
+
3603
+ # Encode markdown links
3604
+ if not df.empty:
3605
+ columns_to_encode = ['name']
3606
+ df = encode_markdown_links(df, columns_to_encode)
1372
3607
 
1373
3608
  if return_dataframe:
1374
3609
  return df
1375
3610
  else:
1376
3611
  formatted_results = {
1377
3612
  "headers": {
1378
- "id": {"title": "Add", "type": "selection_id", "order": -1},
1379
- "score": {"title": "Score", "type": "numeric", "order": 1, "sort": {0: "Desc"}},
1380
- "name": {"title": "Name", "type": "markdown", "order": 1, "sort": {1: "Asc"}},
3613
+ "id": {"title": "ID", "type": "selection_id", "order": -1},
3614
+ "name": {"title": "Neuron", "type": "markdown", "order": 0},
3615
+ "score": {"title": "NBLAST Score", "type": "text", "order": 1},
1381
3616
  "tags": {"title": "Tags", "type": "tags", "order": 2},
1382
- "source": {"title": "Source", "type": "metadata", "order": 3},
1383
- "source_id": {"title": "Source ID", "type": "metadata", "order": 4},
1384
- "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}
3617
+ "types": {"title": "Types", "type": "metadata", "order": 3},
3618
+ "channel_image": {"title": "Images", "type": "metadata", "order": 4}
1385
3619
  },
1386
3620
  "rows": [
1387
- {
1388
- key: row[key]
1389
- for key in [
1390
- "id",
1391
- "name",
1392
- "score",
1393
- "tags",
1394
- "source",
1395
- "source_id",
1396
- "thumbnail"
1397
- ]
1398
- }
1399
- for row in safe_to_dict(df)
3621
+ {key: row[key] for key in ["id", "name", "score", "tags", "types", "channel_image"]}
3622
+ for row in safe_to_dict(df, sort_by_id=False)
1400
3623
  ],
1401
3624
  "count": total_count
1402
3625
  }
1403
3626
  return formatted_results
1404
3627
 
1405
- def get_individual_neuron_inputs(neuron_short_form: str, return_dataframe=True, limit: int = -1, summary_mode: bool = False):
1406
- """
1407
- Retrieve neurons that have synapses into the specified neuron, along with the neurotransmitter
1408
- types, and additional information about the neurons.
1409
3628
 
1410
- :param neuron_short_form: The short form identifier of the neuron to query.
1411
- :param return_dataframe: If True, returns results as a pandas DataFrame. Otherwise, returns a dictionary.
1412
- :param limit: Maximum number of results to return. Default is -1, which returns all results.
1413
- :param summary_mode: If True, returns a preview of the results with summed weights for each neurotransmitter type.
1414
- :return: Neurons, neurotransmitter types, and additional neuron information.
3629
+ def get_similar_morphology_part_of(neuron_short_form: str, return_dataframe=True, limit: int = -1):
1415
3630
  """
1416
-
1417
- # Define the common part of the Cypher query
1418
- query_common = f"""
1419
- MATCH (a:has_neuron_connectivity {{short_form:'{neuron_short_form}'}})<-[r:synapsed_to]-(b:has_neuron_connectivity)
1420
- UNWIND(labels(b)) as l
1421
- WITH * WHERE l contains "ergic"
1422
- OPTIONAL MATCH (c:Class:Neuron) WHERE c.short_form starts with "FBbt_" AND toLower(c.label)=toLower(l+" neuron")
3631
+ Retrieve expression patterns with similar morphology to part of the specified neuron (NBLASTexp).
3632
+
3633
+ XMI: has_similar_morphology_to_part_of
1423
3634
  """
1424
- if not summary_mode:
1425
- count_query = f"""{query_common}
1426
- RETURN COUNT(DISTINCT b) AS total_count"""
1427
- else:
1428
- count_query = f"""{query_common}
1429
- RETURN COUNT(DISTINCT c) AS total_count"""
3635
+ count_query = f"MATCH (n:Individual)-[nblast:has_similar_morphology_to_part_of]-(primary:Individual) WHERE n.short_form = '{neuron_short_form}' AND EXISTS(nblast.NBLAST_score) RETURN count(primary) AS count"
3636
+ count_results = vc.nc.commit_list([count_query])
3637
+ total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
3638
+
3639
+ main_query = f"""MATCH (n:Individual)-[nblast:has_similar_morphology_to_part_of]-(primary:Individual) WHERE n.short_form = '{neuron_short_form}' AND EXISTS(nblast.NBLAST_score) WITH primary, nblast
3640
+ OPTIONAL MATCH (primary)-[:INSTANCEOF]->(typ:Class) WITH CASE WHEN typ IS NULL THEN [] ELSE collect({{short_form: typ.short_form, label: coalesce(typ.label, ''), iri: typ.iri, types: labels(typ), symbol: coalesce(typ.symbol[0], '')}}) END AS types, primary, nblast
3641
+ RETURN primary.short_form AS id, '[' + primary.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + primary.short_form + ')' AS name, apoc.text.join(coalesce(primary.uniqueFacets, []), '|') AS tags, nblast.NBLAST_score[0] AS score, types ORDER BY score DESC"""
3642
+ if limit != -1: main_query += f" LIMIT {limit}"
3643
+
3644
+ results = vc.nc.commit_list([main_query])
3645
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
3646
+ if not df.empty: df = encode_markdown_links(df, ['name'])
3647
+
3648
+ if return_dataframe: return df
3649
+ return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Expression Pattern", "type": "markdown", "order": 0}, "score": {"title": "NBLAST Score", "type": "text", "order": 1}, "tags": {"title": "Tags", "type": "tags", "order": 2}}, "rows": [{key: row[key] for key in ["id", "name", "score", "tags"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count}
3650
+
1430
3651
 
3652
+ def get_similar_morphology_part_of_exp(expression_short_form: str, return_dataframe=True, limit: int = -1):
3653
+ """Neurons with similar morphology to part of expression pattern (reverse NBLASTexp)."""
3654
+ count_query = f"MATCH (n:Individual)-[nblast:has_similar_morphology_to_part_of]-(primary:Individual) WHERE n.short_form = '{expression_short_form}' AND EXISTS(nblast.NBLAST_score) RETURN count(primary) AS count"
1431
3655
  count_results = vc.nc.commit_list([count_query])
1432
- count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
1433
- total_count = count_df['total_count'][0] if not count_df.empty else 0
3656
+ total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
3657
+
3658
+ main_query = f"""MATCH (n:Individual)-[nblast:has_similar_morphology_to_part_of]-(primary:Individual) WHERE n.short_form = '{expression_short_form}' AND EXISTS(nblast.NBLAST_score) WITH primary, nblast
3659
+ OPTIONAL MATCH (primary)-[:INSTANCEOF]->(typ:Class) WITH CASE WHEN typ IS NULL THEN [] ELSE collect({{short_form: typ.short_form, label: coalesce(typ.label, ''), iri: typ.iri, types: labels(typ), symbol: coalesce(typ.symbol[0], '')}}) END AS types, primary, nblast
3660
+ RETURN primary.short_form AS id, '[' + primary.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + primary.short_form + ')' AS name, apoc.text.join(coalesce(primary.uniqueFacets, []), '|') AS tags, nblast.NBLAST_score[0] AS score, types ORDER BY score DESC"""
3661
+ if limit != -1: main_query += f" LIMIT {limit}"
3662
+
3663
+ results = vc.nc.commit_list([main_query])
3664
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
3665
+ if not df.empty: df = encode_markdown_links(df, ['name'])
3666
+
3667
+ if return_dataframe: return df
3668
+ return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Neuron", "type": "markdown", "order": 0}, "score": {"title": "NBLAST Score", "type": "text", "order": 1}, "tags": {"title": "Tags", "type": "tags", "order": 2}}, "rows": [{key: row[key] for key in ["id", "name", "score", "tags"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count}
1434
3669
 
1435
- # Define the part of the query for normal mode
1436
- query_normal = f"""
1437
- OPTIONAL MATCH (b)-[:INSTANCEOF]->(neuronType:Class),
1438
- (b)<-[:depicts]-(imageChannel:Individual)-[image:in_register_with]->(templateChannel:Template)-[:depicts]->(templ:Template),
1439
- (imageChannel)-[:is_specified_output_of]->(imagingTechnique:Class)
1440
- RETURN
1441
- b.short_form as id,
1442
- apoc.text.format("[%s](%s)", [l, c.short_form]) as Neurotransmitter,
1443
- sum(r.weight[0]) as Weight,
1444
- apoc.text.format("[%s](%s)", [b.label, b.short_form]) as Name,
1445
- apoc.text.format("[%s](%s)", [neuronType.label, neuronType.short_form]) as Type,
1446
- apoc.text.join(b.uniqueFacets, '|') as Gross_Type,
1447
- apoc.text.join(collect(apoc.text.format("[%s](%s)", [templ.label, templ.short_form])), ', ') as Template_Space,
1448
- apoc.text.format("[%s](%s)", [imagingTechnique.label, imagingTechnique.short_form]) as Imaging_Technique,
1449
- apoc.text.join(collect(REPLACE(apoc.text.format("[![%s](%s '%s')](%s)",[COALESCE(b.symbol[0],b.label), REPLACE(COALESCE(image.thumbnail[0],""),"thumbnailT.png","thumbnail.png"), COALESCE(b.symbol[0],b.label), b.short_form]), "[![null]( 'null')](null)", "")), ' | ') as Images
1450
- ORDER BY Weight Desc
1451
- """
1452
3670
 
1453
- # Define the part of the query for preview mode
1454
- query_preview = f"""
1455
- RETURN DISTINCT c.short_form as id,
1456
- apoc.text.format("[%s](%s)", [l, c.short_form]) as Neurotransmitter,
1457
- sum(r.weight[0]) as Weight
1458
- ORDER BY Weight Desc
1459
- """
3671
+ def get_similar_morphology_nb(neuron_short_form: str, return_dataframe=True, limit: int = -1):
3672
+ """NeuronBridge similarity matches for neurons."""
3673
+ count_query = f"MATCH (n:Individual)-[nblast:has_similar_morphology_to_part_of]-(primary:Individual) WHERE n.short_form = '{neuron_short_form}' AND EXISTS(nblast.neuronbridge_score) RETURN count(primary) AS count"
3674
+ count_results = vc.nc.commit_list([count_query])
3675
+ total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
3676
+
3677
+ main_query = f"""MATCH (n:Individual)-[nblast:has_similar_morphology_to_part_of]-(primary:Individual) WHERE n.short_form = '{neuron_short_form}' AND EXISTS(nblast.neuronbridge_score) WITH primary, nblast
3678
+ OPTIONAL MATCH (primary)-[:INSTANCEOF]->(typ:Class) WITH CASE WHEN typ IS NULL THEN [] ELSE collect({{short_form: typ.short_form, label: coalesce(typ.label, ''), iri: typ.iri, types: labels(typ), symbol: coalesce(typ.symbol[0], '')}}) END AS types, primary, nblast
3679
+ RETURN primary.short_form AS id, '[' + primary.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + primary.short_form + ')' AS name, apoc.text.join(coalesce(primary.uniqueFacets, []), '|') AS tags, nblast.neuronbridge_score[0] AS score, types ORDER BY score DESC"""
3680
+ if limit != -1: main_query += f" LIMIT {limit}"
3681
+
3682
+ results = vc.nc.commit_list([main_query])
3683
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
3684
+ if not df.empty: df = encode_markdown_links(df, ['name'])
3685
+
3686
+ if return_dataframe: return df
3687
+ return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Match", "type": "markdown", "order": 0}, "score": {"title": "NB Score", "type": "text", "order": 1}, "tags": {"title": "Tags", "type": "tags", "order": 2}}, "rows": [{key: row[key] for key in ["id", "name", "score", "tags"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count}
1460
3688
 
1461
- # Choose the appropriate part of the query based on the summary_mode parameter
1462
- query = query_common + (query_preview if summary_mode else query_normal)
1463
3689
 
1464
- if limit != -1 and not summary_mode:
1465
- query += f" LIMIT {limit}"
3690
+ def get_similar_morphology_nb_exp(expression_short_form: str, return_dataframe=True, limit: int = -1):
3691
+ """NeuronBridge similarity matches for expression patterns."""
3692
+ count_query = f"MATCH (n:Individual)-[nblast:has_similar_morphology_to_part_of]-(primary:Individual) WHERE n.short_form = '{expression_short_form}' AND EXISTS(nblast.neuronbridge_score) RETURN count(primary) AS count"
3693
+ count_results = vc.nc.commit_list([count_query])
3694
+ total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
3695
+
3696
+ main_query = f"""MATCH (n:Individual)-[nblast:has_similar_morphology_to_part_of]-(primary:Individual) WHERE n.short_form = '{expression_short_form}' AND EXISTS(nblast.neuronbridge_score) WITH primary, nblast
3697
+ OPTIONAL MATCH (primary)-[:INSTANCEOF]->(typ:Class) WITH CASE WHEN typ IS NULL THEN [] ELSE collect({{short_form: typ.short_form, label: coalesce(typ.label, ''), iri: typ.iri, types: labels(typ), symbol: coalesce(typ.symbol[0], '')}}) END AS types, primary, nblast
3698
+ RETURN primary.short_form AS id, '[' + primary.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + primary.short_form + ')' AS name, apoc.text.join(coalesce(primary.uniqueFacets, []), '|') AS tags, nblast.neuronbridge_score[0] AS score, types ORDER BY score DESC"""
3699
+ if limit != -1: main_query += f" LIMIT {limit}"
3700
+
3701
+ results = vc.nc.commit_list([main_query])
3702
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
3703
+ if not df.empty: df = encode_markdown_links(df, ['name'])
3704
+
3705
+ if return_dataframe: return df
3706
+ return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Match", "type": "markdown", "order": 0}, "score": {"title": "NB Score", "type": "text", "order": 1}, "tags": {"title": "Tags", "type": "tags", "order": 2}}, "rows": [{key: row[key] for key in ["id", "name", "score", "tags"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count}
1466
3707
 
1467
- # Execute the query using your database connection (e.g., vc.nc)
1468
- results = vc.nc.commit_list([query])
1469
3708
 
1470
- # Convert the results to a DataFrame
3709
+ def get_similar_morphology_userdata(upload_id: str, return_dataframe=True, limit: int = -1):
3710
+ """NBLAST results for user-uploaded data (cached in SOLR)."""
3711
+ try:
3712
+ solr_query = f'{{"params":{{"defType":"edismax","fl":"upload_nblast_query","indent":"true","q.op":"OR","q":"id:{upload_id}","qf":"id","rows":"99"}}}}'
3713
+ response = requests.post("https://solr.virtualflybrain.org/solr/vfb_json/select", data=solr_query, headers={"Content-Type": "application/json"})
3714
+ if response.status_code == 200:
3715
+ data = response.json()
3716
+ if data.get('response', {}).get('numFound', 0) > 0:
3717
+ results = data['response']['docs'][0].get('upload_nblast_query', [])
3718
+ if isinstance(results, str): results = json.loads(results)
3719
+ df = pd.DataFrame(results if isinstance(results, list) else [])
3720
+ if not df.empty and 'name' in df.columns: df = encode_markdown_links(df, ['name'])
3721
+ if return_dataframe: return df
3722
+ return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Match", "type": "markdown", "order": 0}, "score": {"title": "Score", "type": "text", "order": 1}}, "rows": safe_to_dict(df, sort_by_id=False), "count": len(df)}
3723
+ except Exception as e:
3724
+ print(f"Error fetching user NBLAST data: {e}")
3725
+ return pd.DataFrame() if return_dataframe else {"headers": {}, "rows": [], "count": 0}
3726
+
3727
+
3728
+ # ===== Dataset/Template Queries =====
3729
+
3730
+ def get_painted_domains(template_short_form: str, return_dataframe=True, limit: int = -1):
3731
+ """List all painted anatomy domains for a template."""
3732
+ count_query = f"MATCH (n:Template {{short_form:'{template_short_form}'}})<-[:depicts]-(:Template)<-[r:in_register_with]-(dc:Individual)-[:depicts]->(di:Individual)-[:INSTANCEOF]->(d:Class) WHERE EXISTS(r.index) RETURN count(di) AS count"
3733
+ count_results = vc.nc.commit_list([count_query])
3734
+ total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
3735
+
3736
+ main_query = f"""MATCH (n:Template {{short_form:'{template_short_form}'}})<-[:depicts]-(:Template)<-[r:in_register_with]-(dc:Individual)-[:depicts]->(di:Individual)-[:INSTANCEOF]->(d:Class) WHERE EXISTS(r.index)
3737
+ RETURN DISTINCT di.short_form AS id, '[' + di.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + di.short_form + ')' AS name, coalesce(di.description[0], d.description[0]) AS description, COLLECT(DISTINCT d.label) AS type, replace(r.folder[0],'http:','https:') + '/thumbnailT.png' AS thumbnail"""
3738
+ if limit != -1: main_query += f" LIMIT {limit}"
3739
+
3740
+ results = vc.nc.commit_list([main_query])
1471
3741
  df = pd.DataFrame.from_records(get_dict_cursor()(results))
3742
+ if not df.empty: df = encode_markdown_links(df, ['name', 'thumbnail'])
3743
+
3744
+ if return_dataframe: return df
3745
+ return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Domain", "type": "markdown", "order": 0}, "type": {"title": "Type", "type": "text", "order": 1}, "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 2}}, "rows": [{key: row[key] for key in ["id", "name", "type", "thumbnail"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count}
1472
3746
 
1473
- columns_to_encode = ['Neurotransmitter', 'Type', 'Name', 'Template_Space', 'Imaging_Technique', 'thumbnail']
1474
- df = encode_markdown_links(df, columns_to_encode)
3747
+
3748
+ def get_dataset_images(dataset_short_form: str, return_dataframe=True, limit: int = -1):
3749
+ """List all images in a dataset."""
3750
+ count_query = f"MATCH (c:DataSet {{short_form:'{dataset_short_form}'}})<-[:has_source]-(primary:Individual)<-[:depicts]-(channel:Individual)-[irw:in_register_with]->(template:Individual)-[:depicts]->(template_anat:Individual) RETURN count(primary) AS count"
3751
+ count_results = vc.nc.commit_list([count_query])
3752
+ total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
1475
3753
 
1476
- # If return_dataframe is True, return the results as a DataFrame
1477
- if return_dataframe:
1478
- return df
3754
+ main_query = f"""MATCH (c:DataSet {{short_form:'{dataset_short_form}'}})<-[:has_source]-(primary:Individual)<-[:depicts]-(channel:Individual)-[irw:in_register_with]->(template:Individual)-[:depicts]->(template_anat:Individual)
3755
+ OPTIONAL MATCH (primary)-[:INSTANCEOF]->(typ:Class)
3756
+ RETURN primary.short_form AS id, '[' + primary.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + primary.short_form + ')' AS name, apoc.text.join(coalesce(primary.uniqueFacets, []), '|') AS tags, typ.label AS type"""
3757
+ if limit != -1: main_query += f" LIMIT {limit}"
3758
+
3759
+ results = vc.nc.commit_list([main_query])
3760
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
3761
+ if not df.empty: df = encode_markdown_links(df, ['name'])
3762
+
3763
+ if return_dataframe: return df
3764
+ return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Image", "type": "markdown", "order": 0}, "tags": {"title": "Tags", "type": "tags", "order": 1}, "type": {"title": "Type", "type": "text", "order": 2}}, "rows": [{key: row[key] for key in ["id", "name", "tags", "type"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count}
1479
3765
 
1480
- # Format the results for the preview
1481
- if not summary_mode:
1482
- results = {
1483
- "headers": {
1484
- "id": {"title": "ID", "type": "text", "order": -1},
1485
- "Neurotransmitter": {"title": "Neurotransmitter", "type": "markdown", "order": 0},
1486
- "Weight": {"title": "Weight", "type": "numeric", "order": 1},
1487
- "Name": {"title": "Name", "type": "markdown", "order": 2},
1488
- "Type": {"title": "Type", "type": "markdown", "order": 3},
1489
- "Gross_Type": {"title": "Gross Type", "type": "text", "order": 4},
1490
- "Template_Space": {"title": "Template Space", "type": "markdown", "order": 5},
1491
- "Imaging_Technique": {"title": "Imaging Technique", "type": "markdown", "order": 6},
1492
- "Images": {"title": "Images", "type": "markdown", "order": 7}
1493
- },
1494
- "rows": [
1495
- {
1496
- key: row[key]
1497
- for key in [
1498
- "id",
1499
- "Neurotransmitter",
1500
- "Weight",
1501
- "Name",
1502
- "Type",
1503
- "Gross_Type",
1504
- "Template_Space",
1505
- "Imaging_Technique",
1506
- "Images"
1507
- ]
1508
- }
1509
- for row in safe_to_dict(df)
1510
- ],
1511
- "count": total_count
1512
- }
1513
- else:
1514
- results = {
1515
- "headers": {
1516
- "id": {"title": "ID", "type": "text", "order": -1},
1517
- "Neurotransmitter": {"title": "Neurotransmitter", "type": "markdown", "order": 0},
1518
- "Weight": {"title": "Weight", "type": "numeric", "order": 1},
1519
- },
1520
- "rows": [
1521
- {
1522
- key: row[key]
1523
- for key in [
1524
- "id",
1525
- "Neurotransmitter",
1526
- "Weight",
1527
- ]
1528
- }
1529
- for row in safe_to_dict(df)
1530
- ],
1531
- "count": total_count
1532
- }
3766
+
3767
+ def get_all_aligned_images(template_short_form: str, return_dataframe=True, limit: int = -1):
3768
+ """List all images aligned to a template."""
3769
+ count_query = f"MATCH (:Template {{short_form:'{template_short_form}'}})<-[:depicts]-(:Template)<-[:in_register_with]-(:Individual)-[:depicts]->(di:Individual) RETURN count(di) AS count"
3770
+ count_results = vc.nc.commit_list([count_query])
3771
+ total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
1533
3772
 
1534
- return results
3773
+ main_query = f"""MATCH (:Template {{short_form:'{template_short_form}'}})<-[:depicts]-(:Template)<-[:in_register_with]-(:Individual)-[:depicts]->(di:Individual)
3774
+ OPTIONAL MATCH (di)-[:INSTANCEOF]->(typ:Class)
3775
+ RETURN DISTINCT di.short_form AS id, '[' + di.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + di.short_form + ')' AS name, apoc.text.join(coalesce(di.uniqueFacets, []), '|') AS tags, typ.label AS type"""
3776
+ if limit != -1: main_query += f" LIMIT {limit}"
3777
+
3778
+ results = vc.nc.commit_list([main_query])
3779
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
3780
+ if not df.empty: df = encode_markdown_links(df, ['name'])
3781
+
3782
+ if return_dataframe: return df
3783
+ return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Image", "type": "markdown", "order": 0}, "tags": {"title": "Tags", "type": "tags", "order": 1}, "type": {"title": "Type", "type": "text", "order": 2}}, "rows": [{key: row[key] for key in ["id", "name", "tags", "type"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count}
1535
3784
 
1536
3785
 
1537
- def contains_all_tags(lst: List[str], tags: List[str]) -> bool:
1538
- """
1539
- Checks if the given list contains all the tags passed.
3786
+ def get_aligned_datasets(template_short_form: str, return_dataframe=True, limit: int = -1):
3787
+ """List all datasets aligned to a template."""
3788
+ count_query = f"MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual {{short_form:'{template_short_form}'}})<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds) RETURN count(ds) AS count"
3789
+ count_results = vc.nc.commit_list([count_query])
3790
+ total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
3791
+
3792
+ main_query = f"""MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual {{short_form:'{template_short_form}'}})<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds)
3793
+ RETURN DISTINCT ds.short_form AS id, '[' + ds.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + ds.short_form + ')' AS name, apoc.text.join(coalesce(ds.uniqueFacets, []), '|') AS tags"""
3794
+ if limit != -1: main_query += f" LIMIT {limit}"
3795
+
3796
+ results = vc.nc.commit_list([main_query])
3797
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
3798
+ if not df.empty: df = encode_markdown_links(df, ['name'])
3799
+
3800
+ if return_dataframe: return df
3801
+ return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Dataset", "type": "markdown", "order": 0}, "tags": {"title": "Tags", "type": "tags", "order": 1}}, "rows": [{key: row[key] for key in ["id", "name", "tags"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count}
3802
+
3803
+
3804
+ def get_all_datasets(return_dataframe=True, limit: int = -1):
3805
+ """List all available datasets."""
3806
+ count_query = "MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual)<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds) WITH DISTINCT ds RETURN count(ds) AS count"
3807
+ count_results = vc.nc.commit_list([count_query])
3808
+ total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
3809
+
3810
+ main_query = f"""MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual)<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds)
3811
+ RETURN DISTINCT ds.short_form AS id, '[' + ds.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + ds.short_form + ')' AS name, apoc.text.join(coalesce(ds.uniqueFacets, []), '|') AS tags"""
3812
+ if limit != -1: main_query += f" LIMIT {limit}"
3813
+
3814
+ results = vc.nc.commit_list([main_query])
3815
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
3816
+ if not df.empty: df = encode_markdown_links(df, ['name'])
3817
+
3818
+ if return_dataframe: return df
3819
+ return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Dataset", "type": "markdown", "order": 0}, "tags": {"title": "Tags", "type": "tags", "order": 1}}, "rows": [{key: row[key] for key in ["id", "name", "tags"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count}
3820
+
3821
+
3822
+ # ===== Publication Query =====
3823
+
3824
+ def get_terms_for_pub(pub_short_form: str, return_dataframe=True, limit: int = -1):
3825
+ """List all terms that reference a publication."""
3826
+ count_query = f"MATCH (:pub:Individual {{short_form:'{pub_short_form}'}})<-[:has_reference]-(primary:Individual) RETURN count(DISTINCT primary) AS count"
3827
+ count_results = vc.nc.commit_list([count_query])
3828
+ total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
3829
+
3830
+ main_query = f"""MATCH (:pub:Individual {{short_form:'{pub_short_form}'}})<-[:has_reference]-(primary:Individual)
3831
+ OPTIONAL MATCH (primary)-[:INSTANCEOF]->(typ:Class)
3832
+ RETURN DISTINCT primary.short_form AS id, '[' + primary.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + primary.short_form + ')' AS name, apoc.text.join(coalesce(primary.uniqueFacets, []), '|') AS tags, typ.label AS type"""
3833
+ if limit != -1: main_query += f" LIMIT {limit}"
3834
+
3835
+ results = vc.nc.commit_list([main_query])
3836
+ df = pd.DataFrame.from_records(get_dict_cursor()(results))
3837
+ if not df.empty: df = encode_markdown_links(df, ['name'])
3838
+
3839
+ if return_dataframe: return df
3840
+ return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Term", "type": "markdown", "order": 0}, "tags": {"title": "Tags", "type": "tags", "order": 1}, "type": {"title": "Type", "type": "text", "order": 2}}, "rows": [{key: row[key] for key in ["id", "name", "tags", "type"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count}
3841
+
3842
+
3843
+ # ===== Complex Transgene Expression Query =====
3844
+
3845
+ def get_transgene_expression_here(anatomy_short_form: str, return_dataframe=True, limit: int = -1):
3846
+ """Multi-step query: Owlery subclasses + expression overlaps."""
3847
+ # This uses a combination of Owlery and Neo4j similar to get_expression_overlaps_here
3848
+ # but specifically for transgenes. For now, we'll use the existing expression pattern logic
3849
+ return get_expression_overlaps_here(anatomy_short_form, return_dataframe, limit)
1540
3850
 
1541
- :param lst: list of strings to check
1542
- :param tags: list of strings to check for in lst
1543
- :return: True if lst contains all tags, False otherwise
1544
- """
1545
- return all(tag in lst for tag in tags)
1546
3851
 
1547
3852
  def fill_query_results(term_info):
1548
- for query in term_info['Queries']:
3853
+ def process_query(query):
1549
3854
  # print(f"Query Keys:{query.keys()}")
1550
3855
 
1551
3856
  if "preview" in query.keys() and (query['preview'] > 0 or query['count'] < 0) and query['count'] != 0:
@@ -1560,17 +3865,33 @@ def fill_query_results(term_info):
1560
3865
  function_args = query['takes'].get("default", {})
1561
3866
  # print(f"Function args: {function_args}")
1562
3867
 
3868
+ # Check function signature to see if it takes a positional argument for short_form
3869
+ sig = inspect.signature(function)
3870
+ params = list(sig.parameters.keys())
3871
+ # Skip 'self' if it's a method, and check if first param is not return_dataframe/limit/summary_mode
3872
+ first_param = params[1] if params and params[0] == 'self' else (params[0] if params else None)
3873
+ takes_short_form = first_param and first_param not in ['return_dataframe', 'limit', 'summary_mode']
3874
+
1563
3875
  # Modify this line to use the correct arguments and pass the default arguments
1564
3876
  if summary_mode:
1565
- result = function(return_dataframe=False, limit=query['preview'], summary_mode=summary_mode, **function_args)
3877
+ if function_args and takes_short_form:
3878
+ # Pass the short_form as positional argument
3879
+ short_form_value = list(function_args.values())[0]
3880
+ result = function(short_form_value, return_dataframe=False, limit=query['preview'], summary_mode=summary_mode)
3881
+ else:
3882
+ result = function(return_dataframe=False, limit=query['preview'], summary_mode=summary_mode)
1566
3883
  else:
1567
- result = function(return_dataframe=False, limit=query['preview'], **function_args)
3884
+ if function_args and takes_short_form:
3885
+ short_form_value = list(function_args.values())[0]
3886
+ result = function(short_form_value, return_dataframe=False, limit=query['preview'])
3887
+ else:
3888
+ result = function(return_dataframe=False, limit=query['preview'])
1568
3889
  except Exception as e:
1569
3890
  print(f"Error executing query function {query['function']}: {e}")
1570
3891
  # Set default values for failed query
1571
3892
  query['preview_results'] = {'headers': query.get('preview_columns', ['id', 'label', 'tags', 'thumbnail']), 'rows': []}
1572
3893
  query['count'] = 0
1573
- continue
3894
+ return
1574
3895
  # print(f"Function result: {result}")
1575
3896
 
1576
3897
  # Filter columns based on preview_columns
@@ -1602,17 +3923,38 @@ def fill_query_results(term_info):
1602
3923
  else:
1603
3924
  print(f"Unsupported result format for filtering columns in {query['function']}")
1604
3925
 
1605
- query['preview_results'] = {'headers': filtered_headers, 'rows': filtered_result}
1606
3926
  # Handle count extraction based on result type
1607
3927
  if isinstance(result, dict) and 'count' in result:
1608
- query['count'] = result['count']
3928
+ result_count = result['count']
1609
3929
  elif isinstance(result, pd.DataFrame):
1610
- query['count'] = len(result)
3930
+ result_count = len(result)
1611
3931
  else:
1612
- query['count'] = 0
3932
+ result_count = 0
3933
+
3934
+ # Store preview results (count is stored at query level, not in preview_results)
3935
+ # Sort rows based on the sort field in headers, default to ID descending if none
3936
+ sort_column = None
3937
+ sort_direction = None
3938
+ for col, info in filtered_headers.items():
3939
+ if 'sort' in info and isinstance(info['sort'], dict):
3940
+ sort_column = col
3941
+ sort_direction = list(info['sort'].values())[0] # e.g., 'Asc' or 'Desc'
3942
+ break
3943
+ if sort_column:
3944
+ reverse = sort_direction == 'Desc'
3945
+ filtered_result.sort(key=lambda x: x.get(sort_column, ''), reverse=reverse)
3946
+ else:
3947
+ # Default to ID descending if no sort specified
3948
+ filtered_result.sort(key=lambda x: x.get('id', ''), reverse=True)
3949
+ query['preview_results'] = {'headers': filtered_headers, 'rows': filtered_result}
3950
+ query['count'] = result_count
1613
3951
  # print(f"Filtered result: {filtered_result}")
1614
3952
  else:
1615
3953
  print(f"Function {query['function']} not found")
1616
3954
  else:
1617
3955
  print("Preview key not found or preview is 0")
3956
+
3957
+ with ThreadPoolExecutor() as executor:
3958
+ executor.map(process_query, term_info['Queries'])
3959
+
1618
3960
  return term_info