PyPI - vfbquery - Versions diffs - 0.2.12__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

vfbquery 0.2.12py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

test/readme_parser.py +103 -0
test/term_info_queries_test.py +87 -170
test/test_examples_diff.py +317 -0
vfbquery/solr_fetcher.py +89 -0
vfbquery/term_info_queries.py +63 -3
vfbquery/test_utils.py +39 -0
vfbquery/vfb_queries.py +313 -63
vfbquery-0.3.2.dist-info/METADATA +1323 -0
vfbquery-0.3.2.dist-info/RECORD +14 -0
{vfbquery-0.2.12.dist-info → vfbquery-0.3.2.dist-info}/WHEEL +1 -1
vfbquery-0.2.12.dist-info/METADATA +0 -1169
vfbquery-0.2.12.dist-info/RECORD +0 -10
{vfbquery-0.2.12.dist-info → vfbquery-0.3.2.dist-info}/LICENSE +0 -0
{vfbquery-0.2.12.dist-info → vfbquery-0.3.2.dist-info}/top_level.txt +0 -0

test/test_examples_diff.py ADDED Viewed

@@ -0,0 +1,317 @@
+import sys
+import json
+import vfbquery as vfb
+from deepdiff import DeepDiff
+from io import StringIO
+from colorama import Fore, Back, Style, init
+import numpy as np
+# Custom JSON encoder to handle NumPy types
+class NumpyEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, np.integer):
+            return int(obj)
+        elif isinstance(obj, np.floating):
+            return float(obj)
+        elif isinstance(obj, np.ndarray):
+            return obj.tolist()
+        elif isinstance(obj, np.bool_):
+            return bool(obj)
+        return super(NumpyEncoder, self).default(obj)
+def get_brief_dict_representation(d, max_items=3, max_len=50):
+    '''Create a brief representation of a dictionary'''
+    if not isinstance(d, dict):
+        return str(d)[:max_len] + '...' if len(str(d)) > max_len else str(d)
+    items = list(d.items())[:max_items]
+    brief = '{' + ', '.join(f"'{k}': {get_brief_dict_representation(v)}" for k, v in items)
+    if len(d) > max_items:
+        brief += ', ...'
+    brief += '}'
+    return brief[:max_len] + '...' if len(brief) > max_len else brief
+def compare_objects(obj1, obj2, path=''):
+    '''Compare two complex objects and return a human-readable diff'''
+    if isinstance(obj1, dict) and isinstance(obj2, dict):
+        result = []
+        all_keys = set(obj1.keys()) | set(obj2.keys())
+        for k in all_keys:
+            key_path = f'{path}.{k}' if path else k
+            if k not in obj1:
+                result.append(f'  {Fore.GREEN}+ {key_path}: {get_brief_dict_representation(obj2[k])}{Style.RESET_ALL}')
+            elif k not in obj2:
+                result.append(f'  {Fore.RED}- {key_path}: {get_brief_dict_representation(obj1[k])}{Style.RESET_ALL}')
+            else:
+                if obj1[k] != obj2[k]:
+                    sub_diff = compare_objects(obj1[k], obj2[k], key_path)
+                    if sub_diff:
+                        result.extend(sub_diff)
+        return result
+    elif isinstance(obj1, list) and isinstance(obj2, list):
+        if len(obj1) != len(obj2) or obj1 != obj2:
+            return [f'  {Fore.YELLOW}~ {path}: Lists differ in length or content{Style.RESET_ALL}',
+                    f'    {Fore.RED}- List 1: {len(obj1)} items{Style.RESET_ALL}',
+                    f'    {Fore.GREEN}+ List 2: {len(obj2)} items{Style.RESET_ALL}']
+        return []
+    else:
+        if obj1 != obj2:
+            return [f'  {Fore.YELLOW}~ {path}:{Style.RESET_ALL}',
+                    f'    {Fore.RED}- {obj1}{Style.RESET_ALL}',
+                    f'    {Fore.GREEN}+ {obj2}{Style.RESET_ALL}']
+        return []
+def stringify_numeric_keys(obj):
+    """Convert numeric dictionary keys to strings in nested objects"""
+    if isinstance(obj, dict):
+        result = {}
+        for k, v in obj.items():
+            # Convert numeric keys to strings
+            if isinstance(k, (int, float)):
+                key = str(k)
+            else:
+                key = k
+            # Recursively process nested structures
+            result[key] = stringify_numeric_keys(v)
+        return result
+    elif isinstance(obj, list):
+        return [stringify_numeric_keys(item) for item in obj]
+    else:
+        return obj
+def format_for_readme(data):
+    """Format data as nicely formatted JSON for README.md"""
+    try:
+        # First stringify any numeric dictionary keys
+        data_with_string_keys = stringify_numeric_keys(data)
+        # Remove keys with null values
+        data_filtered = remove_nulls(data_with_string_keys)
+        # Use json.dumps with indentation for pretty printing
+        # Use custom encoder to handle NumPy types
+        formatted = json.dumps(data_filtered, indent=3, cls=NumpyEncoder)
+        # Replace 'true' and 'false' with 'True' and 'False' for Python compatibility
+        formatted = formatted.replace('true', 'True').replace('false', 'False')
+        # Format as markdown code block
+        result = "```json\n" + formatted + "\n```"
+        return result
+    except Exception as e:
+        return f"Error formatting JSON: {str(e)}"
+def remove_nulls(data):
+    if isinstance(data, dict):
+        new_dict = {}
+        for k, v in data.items():
+            cleaned = remove_nulls(v)
+            # Skip None, empty dicts or empty lists
+            if cleaned is None or cleaned == {} or cleaned == []:
+                continue
+            new_dict[k] = cleaned
+        return new_dict
+    elif isinstance(data, list):
+        return [remove_nulls(item) for item in data if remove_nulls(item) not in [None, {}, []]]
+    return data
+def main():
+    init(autoreset=True)
+    # Import the results from generated files
+    try:
+        from test_results import results as json_blocks
+        from test_examples import results as python_blocks
+    except ImportError as e:
+        print(f"{Fore.RED}Error importing test files: {e}{Style.RESET_ALL}")
+        sys.exit(1)
+    print(f'Found {len(python_blocks)} Python code blocks')
+    print(f'Found {len(json_blocks)} JSON blocks')
+    if len(python_blocks) != len(json_blocks):
+        print(f"{Fore.RED}Error: Number of Python blocks ({len(python_blocks)}) doesn't match JSON blocks ({len(json_blocks)}){Style.RESET_ALL}")
+        sys.exit(1)
+    failed = False
+    for i, (python_code, expected_json) in enumerate(zip(python_blocks, json_blocks)):
+        python_code = stringify_numeric_keys(python_code)
+        expected_json = stringify_numeric_keys(expected_json)
+        # Apply remove_nulls to both dictionaries before diffing
+        python_code_filtered = remove_nulls(python_code)
+        expected_json_filtered = remove_nulls(expected_json)
+        diff = DeepDiff(expected_json_filtered, python_code_filtered,
+                        ignore_order=True,
+                        ignore_numeric_type_changes=True,
+                        report_repetition=True,
+                        verbose_level=2)
+        if diff:
+            failed = True
+            print(f'\n{Fore.RED}Error in example #{i+1}:{Style.RESET_ALL}')
+            # Print a cleaner diff output with context
+            if 'dictionary_item_added' in diff:
+                print(f'\n{Fore.GREEN}Added keys:{Style.RESET_ALL}')
+                for item in diff['dictionary_item_added']:
+                    key = item.replace('root', '')
+                    path_parts = key.strip('[]').split('][')
+                    # Get the actual value that was added
+                    current = python_code
+                    for part in path_parts:
+                        if part.startswith("'") and part.endswith("'"):
+                            part = part.strip("'")
+                        elif part.startswith('"') and part.endswith('"'):
+                            part = part.strip('"')
+                        try:
+                            if part.startswith('number:'):
+                                part = float(part.split(':')[1])
+                            current = current[part]
+                        except (KeyError, TypeError):
+                            current = '[Unable to access path]'
+                            break
+                    # Show the key and a brief representation of its value
+                    print(f'  {Fore.GREEN}+{key}: {get_brief_dict_representation(current)}{Style.RESET_ALL}')
+            if 'dictionary_item_removed' in diff:
+                print(f'\n{Fore.RED}Removed keys:{Style.RESET_ALL}')
+                for item in diff['dictionary_item_removed']:
+                    key = item.replace('root', '')
+                    path_parts = key.strip('[]').split('][')
+                    # Get the actual value that was removed
+                    current = expected_json
+                    for part in path_parts:
+                        if part.startswith("'") and part.endswith("'"):
+                            part = part.strip("'")
+                        elif part.startswith('"') and part.endswith('"'):
+                            part = part.strip('"')
+                        try:
+                            if part.startswith('number:'):
+                                part = float(part.split(':')[1])
+                            current = current[part]
+                        except (KeyError, TypeError):
+                            current = '[Unable to access path]'
+                            break
+                    print(f'  {Fore.RED}-{key}: {get_brief_dict_representation(current)}{Style.RESET_ALL}')
+            if 'values_changed' in diff:
+                print(f'\n{Fore.YELLOW}Changed values:{Style.RESET_ALL}')
+                for key, value in diff['values_changed'].items():
+                    path = key.replace('root', '')
+                    old_val = value.get('old_value', 'N/A')
+                    new_val = value.get('new_value', 'N/A')
+                    print(f'  {Fore.YELLOW}{path}:{Style.RESET_ALL}')
+                    print(f'    {Fore.RED}- {old_val}{Style.RESET_ALL}')
+                    print(f'    {Fore.GREEN}+ {new_val}{Style.RESET_ALL}')
+            if 'iterable_item_added' in diff:
+                print(f'\n{Fore.GREEN}Added list items:{Style.RESET_ALL}')
+                for key, value in diff['iterable_item_added'].items():
+                    path = key.replace('root', '')
+                    # Show the actual content for complex items
+                    if isinstance(value, (dict, list)):
+                        print(f'  {Fore.GREEN}+{path}:{Style.RESET_ALL}')
+                        if isinstance(value, dict):
+                            for k, v in value.items():
+                                brief_v = get_brief_dict_representation(v)
+                                print(f'    {Fore.GREEN}+{k}: {brief_v}{Style.RESET_ALL}')
+                        else:
+                            # Fixed the problematic line by breaking it into simpler parts
+                            items = value[:3]
+                            items_str = ", ".join([get_brief_dict_representation(item) for item in items])
+                            ellipsis = "..." if len(value) > 3 else ""
+                            print(f'    {Fore.GREEN}[{items_str}{ellipsis}]{Style.RESET_ALL}')
+                    else:
+                        print(f'  {Fore.GREEN}+{path}: {value}{Style.RESET_ALL}')
+            if 'iterable_item_removed' in diff:
+                print(f'\n{Fore.RED}Removed list items:{Style.RESET_ALL}')
+                for key, value in diff['iterable_item_removed'].items():
+                    path = key.replace('root', '')
+                    # Show the actual content for complex items
+                    if isinstance(value, (dict, list)):
+                        print(f'  {Fore.RED}-{path}:{Style.RESET_ALL}')
+                        if isinstance(value, dict):
+                            for k, v in value.items():
+                                brief_v = get_brief_dict_representation(v)
+                                print(f'    {Fore.RED}-{k}: {brief_v}{Style.RESET_ALL}')
+                        else:
+                            # Fixed the problematic line by breaking it into simpler parts
+                            items = value[:3]
+                            items_str = ", ".join([get_brief_dict_representation(item) for item in items])
+                            ellipsis = "..." if len(value) > 3 else ""
+                            print(f'    {Fore.RED}[{items_str}{ellipsis}]{Style.RESET_ALL}')
+                    else:
+                        print(f'  {Fore.RED}-{path}: {value}{Style.RESET_ALL}')
+            # For comparing complex row objects that have significant differences
+            if 'iterable_item_added' in diff and 'iterable_item_removed' in diff:
+                added_rows = [(k, v) for k, v in diff['iterable_item_added'].items() if 'rows' in k]
+                removed_rows = [(k, v) for k, v in diff['iterable_item_removed'].items() if 'rows' in k]
+                if added_rows and removed_rows:
+                    print(f'\n{Fore.YELLOW}Row differences (sample):{Style.RESET_ALL}')
+                    # Compare up to 2 rows to show examples of the differences
+                    for i in range(min(2, len(added_rows), len(removed_rows))):
+                        added_key, added_val = added_rows[i]
+                        removed_key, removed_val = removed_rows[i]
+                        if isinstance(added_val, dict) and isinstance(removed_val, dict):
+                            # Compare the two row objects and show key differences
+                            row_diff = compare_objects(removed_val, added_val, f'Row {i}')
+                            if row_diff:
+                                print(f'  {Fore.YELLOW}Row {i} differences:{Style.RESET_ALL}')
+                                for line in row_diff:
+                                    print(f'  {line}')
+            if 'type_changes' in diff:
+                print(f'\n{Fore.YELLOW}Type changes:{Style.RESET_ALL}')
+                for key, value in diff['type_changes'].items():
+                    path = key.replace('root', '')
+                    old_type = type(value.get('old_value', 'N/A')).__name__
+                    new_type = type(value.get('new_value', 'N/A')).__name__
+                    old_val = value.get('old_value', 'N/A')
+                    new_val = value.get('new_value', 'N/A')
+                    print(f'  {Fore.YELLOW}{path}:{Style.RESET_ALL}')
+                    print(f'    {Fore.RED}- {old_type}: {str(old_val)[:100] + "..." if len(str(old_val)) > 100 else old_val}{Style.RESET_ALL}')
+                    print(f'    {Fore.GREEN}+ {new_type}: {str(new_val)[:100] + "..." if len(str(new_val)) > 100 else new_val}{Style.RESET_ALL}')
+            # Print a summary of the differences
+            print(f'\n{Fore.YELLOW}Summary of differences:{Style.RESET_ALL}')
+            add_keys = len(diff.get('dictionary_item_added', []))
+            add_items = len(diff.get('iterable_item_added', {}))
+            rem_keys = len(diff.get('dictionary_item_removed', []))
+            rem_items = len(diff.get('iterable_item_removed', {}))
+            changed_vals = len(diff.get('values_changed', {}))
+            type_changes = len(diff.get('type_changes', {}))
+            print(f'  {Fore.GREEN}Added:{Style.RESET_ALL} {add_keys} keys, {add_items} list items')
+            print(f'  {Fore.RED}Removed:{Style.RESET_ALL} {rem_keys} keys, {rem_items} list items')
+            print(f'  {Fore.YELLOW}Changed:{Style.RESET_ALL} {changed_vals} values, {type_changes} type changes')
+            # After printing the summary, add the formatted output for README
+            print(f'\n{Fore.CYAN}Suggested README update for example #{i+1}:{Style.RESET_ALL}')
+            # Mark a clear copy-paste section
+            print(f'\n{Fore.CYAN}--- COPY FROM HERE ---{Style.RESET_ALL}')
+            print(format_for_readme(python_code).replace('\033[36m', '').replace('\033[0m', ''))
+            print(f'{Fore.CYAN}--- END COPY ---{Style.RESET_ALL}')
+        else:
+            print(f'\n{Fore.GREEN}Example #{i+1}: ✓ PASS{Style.RESET_ALL}')
+    if failed:
+        print(f'\n{Fore.RED}Some examples failed. Please check the differences above.{Style.RESET_ALL}')
+        sys.exit(1)
+    else:
+        print(f'\n{Fore.GREEN}All examples passed!{Style.RESET_ALL}')
+if __name__ == "__main__":
+    main()

vfbquery/solr_fetcher.py ADDED Viewed

@@ -0,0 +1,89 @@
+import requests
+import json
+import logging
+import pandas as pd
+from typing import List, Dict, Any, Optional, Union
+from vfb_connect import vfb
+class SolrTermInfoFetcher:
+    """Fetches term information directly from the Solr server instead of using VfbConnect"""
+    def __init__(self, solr_url: str = "https://solr.virtualflybrain.org/solr/vfb_json"):
+        """Initialize with the Solr server URL"""
+        self.solr_url = solr_url
+        self.logger = logging.getLogger(__name__)
+        self.vfb = vfb
+    def get_TermInfo(self, short_forms: List[str],
+                    return_dataframe: bool = False,
+                    summary: bool = False) -> Union[List[Dict[str, Any]], pd.DataFrame]:
+        """
+        Fetch term info from Solr directly, mimicking VFBconnect's interface
+        Args:
+            short_forms: List of term IDs to fetch
+            return_dataframe: If True, return as pandas DataFrame
+            summary: If True, return summarized version
+        Returns:
+            List of term info dictionaries or DataFrame
+        """
+        results = []
+        for short_form in short_forms:
+            try:
+                url = f"{self.solr_url}/select"
+                params = {
+                    "indent": "true",
+                    "fl": "term_info",
+                    "q.op": "OR",
+                    "q": f"id:{short_form}"
+                }
+                self.logger.debug(f"Querying Solr for {short_form}")
+                response = requests.get(url, params=params)
+                response.raise_for_status()
+                data = response.json()
+                docs = data.get("response", {}).get("docs", [])
+                if not docs:
+                    self.logger.warning(f"No results found for {short_form}")
+                    continue
+                if "term_info" not in docs[0] or not docs[0]["term_info"]:
+                    self.logger.warning(f"No term_info found for {short_form}")
+                    continue
+                # Extract and parse the term_info string which is itself JSON
+                term_info_str = docs[0]["term_info"][0]
+                term_info_obj = json.loads(term_info_str)
+                results.append(term_info_obj)
+            except requests.RequestException as e:
+                self.logger.error(f"Error fetching data from Solr: {e}")
+            except json.JSONDecodeError as e:
+                self.logger.error(f"Error decoding JSON for {short_form}: {e}")
+            except Exception as e:
+                self.logger.error(f"Unexpected error for {short_form}: {e}")
+        # Convert to DataFrame if requested
+        if return_dataframe and results:
+            try:
+                return pd.json_normalize(results)
+            except Exception as e:
+                self.logger.error(f"Error converting to DataFrame: {e}")
+                return results
+        return results
+    # Pass through any non-implemented methods to VFBconnect
+    def __getattr__(self, name):
+        """
+        Automatically pass through any non-implemented methods to VFBconnect
+        This allows us to use this class as a drop-in replacement for VfbConnect
+        while only implementing the methods we want to customize.
+        """
+        self.logger.debug(f"Passing through method call: {name}")
+        return getattr(self.vfb, name)

vfbquery/term_info_queries.py CHANGED Viewed

@@ -167,7 +167,17 @@ class PubSyn:
         return hash(self.__str__())
     def get_microrefs(self):
-        return "(" + ", ".join([pub.get_microref() for pub in self.pubs]) + ")"
+        """
+        Get a list of microreferences for all publications.
+        :return: A list of publication microreferences.
+        """
+        refs = []
+        if hasattr(self, 'pubs') and self.pubs:
+            for pub in self.pubs:
+                if hasattr(pub, 'get_microref') and pub.get_microref():
+                    refs.append(pub.get_microref())
+        return refs
 @dataclass_json
@@ -752,11 +762,61 @@ def deserialize_term_info(terminfo: str) -> VfbTerminfo:
 def deserialize_term_info_from_dict(terminfo: dict) -> VfbTerminfo:
     """
     Deserializes the given terminfo vfb_json dictionary to VfbTerminfo object.
+    Handles both direct VfbTerminfo dictionaries and API response dictionaries.
     :param terminfo: vfb_json dictionary
     :return: VfbTerminfo object
     """
-    return from_dict(data_class=VfbTerminfo, data=terminfo)
+    # Check if this is already in VfbTerminfo format with a 'term' field
+    if 'term' in terminfo:
+        return from_dict(data_class=VfbTerminfo, data=terminfo)
+    # Otherwise, convert from API response format to VfbTerminfo format
+    # Create the core MinimalEntityInfo structure
+    core = {
+        'short_form': terminfo.get('id', ''),
+        'iri': terminfo.get('iri', f"http://purl.obolibrary.org/obo/{terminfo.get('id', '')}"),
+        'label': terminfo.get('label', ''),
+        'types': terminfo.get('tags', []),
+        'unique_facets': terminfo.get('tags', []),
+        'symbol': terminfo.get('symbol', '')
+    }
+    # Create the term structure
+    term = {
+        'core': core,
+        'description': [terminfo.get('description', '')],
+        'comment': [],
+        'iri': '',
+        'link': terminfo.get('link', ''),
+        'icon': terminfo.get('logo', '')
+    }
+    # Create the minimal VfbTerminfo structure
+    structured_data = {
+        'term': term,
+        'query': 'Get JSON for Term',
+        'version': terminfo.get('version', ''),
+        'xrefs': []
+    }
+    # Add additional fields from the original terminfo if they exist
+    if 'parents_id' in terminfo and 'parents_label' in terminfo:
+        parents = []
+        for i, parent_id in enumerate(terminfo['parents_id']):
+            if i < len(terminfo['parents_label']):
+                parent_label = terminfo['parents_label'][i]
+                parents.append({
+                    'short_form': parent_id,
+                    'iri': f"http://purl.obolibrary.org/obo/{parent_id}",
+                    'label': parent_label,
+                    'types': [],
+                    'unique_facets': [],
+                    'symbol': ''
+                })
+        structured_data['parents'] = parents
+    return from_dict(data_class=VfbTerminfo, data=structured_data)
 def serialize_term_info_to_dict(vfb_term: VfbTerminfo, variable, loaded_template: Optional[str] = None, show_types=False) -> dict:
@@ -958,7 +1018,7 @@ def serialize_term_info_to_dict(vfb_term: VfbTerminfo, variable, loaded_template
     if vfb_term.get_references():
         data["references"] = vfb_term.get_references()
-    # queries
+        # queries
     # TODO requires geppettoModelAccess.getQueries() ??
     # Targeting Splits

vfbquery/test_utils.py ADDED Viewed

@@ -0,0 +1,39 @@
+import pandas as pd
+from typing import Any, Dict, Union
+def safe_extract_row(result: Any, index: int = 0) -> Dict:
+    """
+    Safely extract a row from a pandas DataFrame or return the object itself if not a DataFrame.
+    :param result: Result to extract from (DataFrame or other object)
+    :param index: Index of the row to extract (default: 0)
+    :return: Extracted row as dict or original object
+    """
+    if isinstance(result, pd.DataFrame):
+        if not result.empty and len(result.index) > index:
+            return result.iloc[index].to_dict()
+        else:
+            return {}
+    return result
+def patch_vfb_connect_query_wrapper():
+    """
+    Apply monkey patches to VfbConnect.neo_query_wrapper to make it handle DataFrame results safely.
+    Call this function in test setup if tests are expecting dictionary results from neo_query_wrapper methods.
+    """
+    try:
+        from vfb_connect.neo.query_wrapper import NeoQueryWrapper
+        original_get_term_info = NeoQueryWrapper._get_TermInfo
+        def patched_get_term_info(self, terms, *args, **kwargs):
+            result = original_get_term_info(self, terms, *args, **kwargs)
+            if isinstance(result, pd.DataFrame):
+                # Return list of row dictionaries instead of DataFrame
+                return [row.to_dict() for i, row in result.iterrows()]
+            return result
+        NeoQueryWrapper._get_TermInfo = patched_get_term_info
+        print("VfbConnect query wrapper patched for testing")
+    except ImportError:
+        print("Could not patch VfbConnect - module not found")

vfbquery 0.2.12__py3-none-any.whl → 0.3.2__py3-none-any.whl

vfbquery 0.2.12py3-none-any.whl → 0.3.2py3-none-any.whl