vfbquery 0.2.12__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/readme_parser.py +103 -0
- test/term_info_queries_test.py +87 -170
- test/test_examples_diff.py +317 -0
- vfbquery/solr_fetcher.py +89 -0
- vfbquery/term_info_queries.py +63 -3
- vfbquery/test_utils.py +39 -0
- vfbquery/vfb_queries.py +313 -63
- vfbquery-0.3.2.dist-info/METADATA +1323 -0
- vfbquery-0.3.2.dist-info/RECORD +14 -0
- {vfbquery-0.2.12.dist-info → vfbquery-0.3.2.dist-info}/WHEEL +1 -1
- vfbquery-0.2.12.dist-info/METADATA +0 -1169
- vfbquery-0.2.12.dist-info/RECORD +0 -10
- {vfbquery-0.2.12.dist-info → vfbquery-0.3.2.dist-info}/LICENSE +0 -0
- {vfbquery-0.2.12.dist-info → vfbquery-0.3.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import json
|
|
3
|
+
import vfbquery as vfb
|
|
4
|
+
from deepdiff import DeepDiff
|
|
5
|
+
from io import StringIO
|
|
6
|
+
from colorama import Fore, Back, Style, init
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
# Custom JSON encoder to handle NumPy types
|
|
10
|
+
class NumpyEncoder(json.JSONEncoder):
|
|
11
|
+
def default(self, obj):
|
|
12
|
+
if isinstance(obj, np.integer):
|
|
13
|
+
return int(obj)
|
|
14
|
+
elif isinstance(obj, np.floating):
|
|
15
|
+
return float(obj)
|
|
16
|
+
elif isinstance(obj, np.ndarray):
|
|
17
|
+
return obj.tolist()
|
|
18
|
+
elif isinstance(obj, np.bool_):
|
|
19
|
+
return bool(obj)
|
|
20
|
+
return super(NumpyEncoder, self).default(obj)
|
|
21
|
+
|
|
22
|
+
def get_brief_dict_representation(d, max_items=3, max_len=50):
|
|
23
|
+
'''Create a brief representation of a dictionary'''
|
|
24
|
+
if not isinstance(d, dict):
|
|
25
|
+
return str(d)[:max_len] + '...' if len(str(d)) > max_len else str(d)
|
|
26
|
+
|
|
27
|
+
items = list(d.items())[:max_items]
|
|
28
|
+
brief = '{' + ', '.join(f"'{k}': {get_brief_dict_representation(v)}" for k, v in items)
|
|
29
|
+
if len(d) > max_items:
|
|
30
|
+
brief += ', ...'
|
|
31
|
+
brief += '}'
|
|
32
|
+
return brief[:max_len] + '...' if len(brief) > max_len else brief
|
|
33
|
+
|
|
34
|
+
def compare_objects(obj1, obj2, path=''):
|
|
35
|
+
'''Compare two complex objects and return a human-readable diff'''
|
|
36
|
+
if isinstance(obj1, dict) and isinstance(obj2, dict):
|
|
37
|
+
result = []
|
|
38
|
+
all_keys = set(obj1.keys()) | set(obj2.keys())
|
|
39
|
+
|
|
40
|
+
for k in all_keys:
|
|
41
|
+
key_path = f'{path}.{k}' if path else k
|
|
42
|
+
if k not in obj1:
|
|
43
|
+
result.append(f' {Fore.GREEN}+ {key_path}: {get_brief_dict_representation(obj2[k])}{Style.RESET_ALL}')
|
|
44
|
+
elif k not in obj2:
|
|
45
|
+
result.append(f' {Fore.RED}- {key_path}: {get_brief_dict_representation(obj1[k])}{Style.RESET_ALL}')
|
|
46
|
+
else:
|
|
47
|
+
if obj1[k] != obj2[k]:
|
|
48
|
+
sub_diff = compare_objects(obj1[k], obj2[k], key_path)
|
|
49
|
+
if sub_diff:
|
|
50
|
+
result.extend(sub_diff)
|
|
51
|
+
return result
|
|
52
|
+
elif isinstance(obj1, list) and isinstance(obj2, list):
|
|
53
|
+
if len(obj1) != len(obj2) or obj1 != obj2:
|
|
54
|
+
return [f' {Fore.YELLOW}~ {path}: Lists differ in length or content{Style.RESET_ALL}',
|
|
55
|
+
f' {Fore.RED}- List 1: {len(obj1)} items{Style.RESET_ALL}',
|
|
56
|
+
f' {Fore.GREEN}+ List 2: {len(obj2)} items{Style.RESET_ALL}']
|
|
57
|
+
return []
|
|
58
|
+
else:
|
|
59
|
+
if obj1 != obj2:
|
|
60
|
+
return [f' {Fore.YELLOW}~ {path}:{Style.RESET_ALL}',
|
|
61
|
+
f' {Fore.RED}- {obj1}{Style.RESET_ALL}',
|
|
62
|
+
f' {Fore.GREEN}+ {obj2}{Style.RESET_ALL}']
|
|
63
|
+
return []
|
|
64
|
+
|
|
65
|
+
def stringify_numeric_keys(obj):
|
|
66
|
+
"""Convert numeric dictionary keys to strings in nested objects"""
|
|
67
|
+
if isinstance(obj, dict):
|
|
68
|
+
result = {}
|
|
69
|
+
for k, v in obj.items():
|
|
70
|
+
# Convert numeric keys to strings
|
|
71
|
+
if isinstance(k, (int, float)):
|
|
72
|
+
key = str(k)
|
|
73
|
+
else:
|
|
74
|
+
key = k
|
|
75
|
+
# Recursively process nested structures
|
|
76
|
+
result[key] = stringify_numeric_keys(v)
|
|
77
|
+
return result
|
|
78
|
+
elif isinstance(obj, list):
|
|
79
|
+
return [stringify_numeric_keys(item) for item in obj]
|
|
80
|
+
else:
|
|
81
|
+
return obj
|
|
82
|
+
|
|
83
|
+
def format_for_readme(data):
|
|
84
|
+
"""Format data as nicely formatted JSON for README.md"""
|
|
85
|
+
try:
|
|
86
|
+
# First stringify any numeric dictionary keys
|
|
87
|
+
data_with_string_keys = stringify_numeric_keys(data)
|
|
88
|
+
|
|
89
|
+
# Remove keys with null values
|
|
90
|
+
data_filtered = remove_nulls(data_with_string_keys)
|
|
91
|
+
|
|
92
|
+
# Use json.dumps with indentation for pretty printing
|
|
93
|
+
# Use custom encoder to handle NumPy types
|
|
94
|
+
formatted = json.dumps(data_filtered, indent=3, cls=NumpyEncoder)
|
|
95
|
+
|
|
96
|
+
# Replace 'true' and 'false' with 'True' and 'False' for Python compatibility
|
|
97
|
+
formatted = formatted.replace('true', 'True').replace('false', 'False')
|
|
98
|
+
|
|
99
|
+
# Format as markdown code block
|
|
100
|
+
result = "```json\n" + formatted + "\n```"
|
|
101
|
+
return result
|
|
102
|
+
except Exception as e:
|
|
103
|
+
return f"Error formatting JSON: {str(e)}"
|
|
104
|
+
|
|
105
|
+
def remove_nulls(data):
|
|
106
|
+
if isinstance(data, dict):
|
|
107
|
+
new_dict = {}
|
|
108
|
+
for k, v in data.items():
|
|
109
|
+
cleaned = remove_nulls(v)
|
|
110
|
+
# Skip None, empty dicts or empty lists
|
|
111
|
+
if cleaned is None or cleaned == {} or cleaned == []:
|
|
112
|
+
continue
|
|
113
|
+
new_dict[k] = cleaned
|
|
114
|
+
return new_dict
|
|
115
|
+
elif isinstance(data, list):
|
|
116
|
+
return [remove_nulls(item) for item in data if remove_nulls(item) not in [None, {}, []]]
|
|
117
|
+
return data
|
|
118
|
+
|
|
119
|
+
def main():
|
|
120
|
+
init(autoreset=True)
|
|
121
|
+
|
|
122
|
+
# Import the results from generated files
|
|
123
|
+
try:
|
|
124
|
+
from test_results import results as json_blocks
|
|
125
|
+
from test_examples import results as python_blocks
|
|
126
|
+
except ImportError as e:
|
|
127
|
+
print(f"{Fore.RED}Error importing test files: {e}{Style.RESET_ALL}")
|
|
128
|
+
sys.exit(1)
|
|
129
|
+
|
|
130
|
+
print(f'Found {len(python_blocks)} Python code blocks')
|
|
131
|
+
print(f'Found {len(json_blocks)} JSON blocks')
|
|
132
|
+
|
|
133
|
+
if len(python_blocks) != len(json_blocks):
|
|
134
|
+
print(f"{Fore.RED}Error: Number of Python blocks ({len(python_blocks)}) doesn't match JSON blocks ({len(json_blocks)}){Style.RESET_ALL}")
|
|
135
|
+
sys.exit(1)
|
|
136
|
+
|
|
137
|
+
failed = False
|
|
138
|
+
|
|
139
|
+
for i, (python_code, expected_json) in enumerate(zip(python_blocks, json_blocks)):
|
|
140
|
+
python_code = stringify_numeric_keys(python_code)
|
|
141
|
+
expected_json = stringify_numeric_keys(expected_json)
|
|
142
|
+
|
|
143
|
+
# Apply remove_nulls to both dictionaries before diffing
|
|
144
|
+
python_code_filtered = remove_nulls(python_code)
|
|
145
|
+
expected_json_filtered = remove_nulls(expected_json)
|
|
146
|
+
diff = DeepDiff(expected_json_filtered, python_code_filtered,
|
|
147
|
+
ignore_order=True,
|
|
148
|
+
ignore_numeric_type_changes=True,
|
|
149
|
+
report_repetition=True,
|
|
150
|
+
verbose_level=2)
|
|
151
|
+
|
|
152
|
+
if diff:
|
|
153
|
+
failed = True
|
|
154
|
+
print(f'\n{Fore.RED}Error in example #{i+1}:{Style.RESET_ALL}')
|
|
155
|
+
|
|
156
|
+
# Print a cleaner diff output with context
|
|
157
|
+
if 'dictionary_item_added' in diff:
|
|
158
|
+
print(f'\n{Fore.GREEN}Added keys:{Style.RESET_ALL}')
|
|
159
|
+
for item in diff['dictionary_item_added']:
|
|
160
|
+
key = item.replace('root', '')
|
|
161
|
+
path_parts = key.strip('[]').split('][')
|
|
162
|
+
|
|
163
|
+
# Get the actual value that was added
|
|
164
|
+
current = python_code
|
|
165
|
+
for part in path_parts:
|
|
166
|
+
if part.startswith("'") and part.endswith("'"):
|
|
167
|
+
part = part.strip("'")
|
|
168
|
+
elif part.startswith('"') and part.endswith('"'):
|
|
169
|
+
part = part.strip('"')
|
|
170
|
+
try:
|
|
171
|
+
if part.startswith('number:'):
|
|
172
|
+
part = float(part.split(':')[1])
|
|
173
|
+
current = current[part]
|
|
174
|
+
except (KeyError, TypeError):
|
|
175
|
+
current = '[Unable to access path]'
|
|
176
|
+
break
|
|
177
|
+
|
|
178
|
+
# Show the key and a brief representation of its value
|
|
179
|
+
print(f' {Fore.GREEN}+{key}: {get_brief_dict_representation(current)}{Style.RESET_ALL}')
|
|
180
|
+
|
|
181
|
+
if 'dictionary_item_removed' in diff:
|
|
182
|
+
print(f'\n{Fore.RED}Removed keys:{Style.RESET_ALL}')
|
|
183
|
+
for item in diff['dictionary_item_removed']:
|
|
184
|
+
key = item.replace('root', '')
|
|
185
|
+
path_parts = key.strip('[]').split('][')
|
|
186
|
+
|
|
187
|
+
# Get the actual value that was removed
|
|
188
|
+
current = expected_json
|
|
189
|
+
for part in path_parts:
|
|
190
|
+
if part.startswith("'") and part.endswith("'"):
|
|
191
|
+
part = part.strip("'")
|
|
192
|
+
elif part.startswith('"') and part.endswith('"'):
|
|
193
|
+
part = part.strip('"')
|
|
194
|
+
try:
|
|
195
|
+
if part.startswith('number:'):
|
|
196
|
+
part = float(part.split(':')[1])
|
|
197
|
+
current = current[part]
|
|
198
|
+
except (KeyError, TypeError):
|
|
199
|
+
current = '[Unable to access path]'
|
|
200
|
+
break
|
|
201
|
+
|
|
202
|
+
print(f' {Fore.RED}-{key}: {get_brief_dict_representation(current)}{Style.RESET_ALL}')
|
|
203
|
+
|
|
204
|
+
if 'values_changed' in diff:
|
|
205
|
+
print(f'\n{Fore.YELLOW}Changed values:{Style.RESET_ALL}')
|
|
206
|
+
for key, value in diff['values_changed'].items():
|
|
207
|
+
path = key.replace('root', '')
|
|
208
|
+
old_val = value.get('old_value', 'N/A')
|
|
209
|
+
new_val = value.get('new_value', 'N/A')
|
|
210
|
+
print(f' {Fore.YELLOW}{path}:{Style.RESET_ALL}')
|
|
211
|
+
print(f' {Fore.RED}- {old_val}{Style.RESET_ALL}')
|
|
212
|
+
print(f' {Fore.GREEN}+ {new_val}{Style.RESET_ALL}')
|
|
213
|
+
|
|
214
|
+
if 'iterable_item_added' in diff:
|
|
215
|
+
print(f'\n{Fore.GREEN}Added list items:{Style.RESET_ALL}')
|
|
216
|
+
for key, value in diff['iterable_item_added'].items():
|
|
217
|
+
path = key.replace('root', '')
|
|
218
|
+
# Show the actual content for complex items
|
|
219
|
+
if isinstance(value, (dict, list)):
|
|
220
|
+
print(f' {Fore.GREEN}+{path}:{Style.RESET_ALL}')
|
|
221
|
+
if isinstance(value, dict):
|
|
222
|
+
for k, v in value.items():
|
|
223
|
+
brief_v = get_brief_dict_representation(v)
|
|
224
|
+
print(f' {Fore.GREEN}+{k}: {brief_v}{Style.RESET_ALL}')
|
|
225
|
+
else:
|
|
226
|
+
# Fixed the problematic line by breaking it into simpler parts
|
|
227
|
+
items = value[:3]
|
|
228
|
+
items_str = ", ".join([get_brief_dict_representation(item) for item in items])
|
|
229
|
+
ellipsis = "..." if len(value) > 3 else ""
|
|
230
|
+
print(f' {Fore.GREEN}[{items_str}{ellipsis}]{Style.RESET_ALL}')
|
|
231
|
+
else:
|
|
232
|
+
print(f' {Fore.GREEN}+{path}: {value}{Style.RESET_ALL}')
|
|
233
|
+
|
|
234
|
+
if 'iterable_item_removed' in diff:
|
|
235
|
+
print(f'\n{Fore.RED}Removed list items:{Style.RESET_ALL}')
|
|
236
|
+
for key, value in diff['iterable_item_removed'].items():
|
|
237
|
+
path = key.replace('root', '')
|
|
238
|
+
# Show the actual content for complex items
|
|
239
|
+
if isinstance(value, (dict, list)):
|
|
240
|
+
print(f' {Fore.RED}-{path}:{Style.RESET_ALL}')
|
|
241
|
+
if isinstance(value, dict):
|
|
242
|
+
for k, v in value.items():
|
|
243
|
+
brief_v = get_brief_dict_representation(v)
|
|
244
|
+
print(f' {Fore.RED}-{k}: {brief_v}{Style.RESET_ALL}')
|
|
245
|
+
else:
|
|
246
|
+
# Fixed the problematic line by breaking it into simpler parts
|
|
247
|
+
items = value[:3]
|
|
248
|
+
items_str = ", ".join([get_brief_dict_representation(item) for item in items])
|
|
249
|
+
ellipsis = "..." if len(value) > 3 else ""
|
|
250
|
+
print(f' {Fore.RED}[{items_str}{ellipsis}]{Style.RESET_ALL}')
|
|
251
|
+
else:
|
|
252
|
+
print(f' {Fore.RED}-{path}: {value}{Style.RESET_ALL}')
|
|
253
|
+
|
|
254
|
+
# For comparing complex row objects that have significant differences
|
|
255
|
+
if 'iterable_item_added' in diff and 'iterable_item_removed' in diff:
|
|
256
|
+
added_rows = [(k, v) for k, v in diff['iterable_item_added'].items() if 'rows' in k]
|
|
257
|
+
removed_rows = [(k, v) for k, v in diff['iterable_item_removed'].items() if 'rows' in k]
|
|
258
|
+
|
|
259
|
+
if added_rows and removed_rows:
|
|
260
|
+
print(f'\n{Fore.YELLOW}Row differences (sample):{Style.RESET_ALL}')
|
|
261
|
+
# Compare up to 2 rows to show examples of the differences
|
|
262
|
+
for i in range(min(2, len(added_rows), len(removed_rows))):
|
|
263
|
+
added_key, added_val = added_rows[i]
|
|
264
|
+
removed_key, removed_val = removed_rows[i]
|
|
265
|
+
|
|
266
|
+
if isinstance(added_val, dict) and isinstance(removed_val, dict):
|
|
267
|
+
# Compare the two row objects and show key differences
|
|
268
|
+
row_diff = compare_objects(removed_val, added_val, f'Row {i}')
|
|
269
|
+
if row_diff:
|
|
270
|
+
print(f' {Fore.YELLOW}Row {i} differences:{Style.RESET_ALL}')
|
|
271
|
+
for line in row_diff:
|
|
272
|
+
print(f' {line}')
|
|
273
|
+
|
|
274
|
+
if 'type_changes' in diff:
|
|
275
|
+
print(f'\n{Fore.YELLOW}Type changes:{Style.RESET_ALL}')
|
|
276
|
+
for key, value in diff['type_changes'].items():
|
|
277
|
+
path = key.replace('root', '')
|
|
278
|
+
old_type = type(value.get('old_value', 'N/A')).__name__
|
|
279
|
+
new_type = type(value.get('new_value', 'N/A')).__name__
|
|
280
|
+
old_val = value.get('old_value', 'N/A')
|
|
281
|
+
new_val = value.get('new_value', 'N/A')
|
|
282
|
+
print(f' {Fore.YELLOW}{path}:{Style.RESET_ALL}')
|
|
283
|
+
print(f' {Fore.RED}- {old_type}: {str(old_val)[:100] + "..." if len(str(old_val)) > 100 else old_val}{Style.RESET_ALL}')
|
|
284
|
+
print(f' {Fore.GREEN}+ {new_type}: {str(new_val)[:100] + "..." if len(str(new_val)) > 100 else new_val}{Style.RESET_ALL}')
|
|
285
|
+
|
|
286
|
+
# Print a summary of the differences
|
|
287
|
+
print(f'\n{Fore.YELLOW}Summary of differences:{Style.RESET_ALL}')
|
|
288
|
+
add_keys = len(diff.get('dictionary_item_added', []))
|
|
289
|
+
add_items = len(diff.get('iterable_item_added', {}))
|
|
290
|
+
rem_keys = len(diff.get('dictionary_item_removed', []))
|
|
291
|
+
rem_items = len(diff.get('iterable_item_removed', {}))
|
|
292
|
+
changed_vals = len(diff.get('values_changed', {}))
|
|
293
|
+
type_changes = len(diff.get('type_changes', {}))
|
|
294
|
+
|
|
295
|
+
print(f' {Fore.GREEN}Added:{Style.RESET_ALL} {add_keys} keys, {add_items} list items')
|
|
296
|
+
print(f' {Fore.RED}Removed:{Style.RESET_ALL} {rem_keys} keys, {rem_items} list items')
|
|
297
|
+
print(f' {Fore.YELLOW}Changed:{Style.RESET_ALL} {changed_vals} values, {type_changes} type changes')
|
|
298
|
+
|
|
299
|
+
# After printing the summary, add the formatted output for README
|
|
300
|
+
print(f'\n{Fore.CYAN}Suggested README update for example #{i+1}:{Style.RESET_ALL}')
|
|
301
|
+
|
|
302
|
+
# Mark a clear copy-paste section
|
|
303
|
+
print(f'\n{Fore.CYAN}--- COPY FROM HERE ---{Style.RESET_ALL}')
|
|
304
|
+
print(format_for_readme(python_code).replace('\033[36m', '').replace('\033[0m', ''))
|
|
305
|
+
print(f'{Fore.CYAN}--- END COPY ---{Style.RESET_ALL}')
|
|
306
|
+
|
|
307
|
+
else:
|
|
308
|
+
print(f'\n{Fore.GREEN}Example #{i+1}: ✓ PASS{Style.RESET_ALL}')
|
|
309
|
+
|
|
310
|
+
if failed:
|
|
311
|
+
print(f'\n{Fore.RED}Some examples failed. Please check the differences above.{Style.RESET_ALL}')
|
|
312
|
+
sys.exit(1)
|
|
313
|
+
else:
|
|
314
|
+
print(f'\n{Fore.GREEN}All examples passed!{Style.RESET_ALL}')
|
|
315
|
+
|
|
316
|
+
if __name__ == "__main__":
|
|
317
|
+
main()
|
vfbquery/solr_fetcher.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from typing import List, Dict, Any, Optional, Union
|
|
6
|
+
from vfb_connect import vfb
|
|
7
|
+
|
|
8
|
+
class SolrTermInfoFetcher:
|
|
9
|
+
"""Fetches term information directly from the Solr server instead of using VfbConnect"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, solr_url: str = "https://solr.virtualflybrain.org/solr/vfb_json"):
|
|
12
|
+
"""Initialize with the Solr server URL"""
|
|
13
|
+
self.solr_url = solr_url
|
|
14
|
+
self.logger = logging.getLogger(__name__)
|
|
15
|
+
self.vfb = vfb
|
|
16
|
+
|
|
17
|
+
def get_TermInfo(self, short_forms: List[str],
|
|
18
|
+
return_dataframe: bool = False,
|
|
19
|
+
summary: bool = False) -> Union[List[Dict[str, Any]], pd.DataFrame]:
|
|
20
|
+
"""
|
|
21
|
+
Fetch term info from Solr directly, mimicking VFBconnect's interface
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
short_forms: List of term IDs to fetch
|
|
25
|
+
return_dataframe: If True, return as pandas DataFrame
|
|
26
|
+
summary: If True, return summarized version
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
List of term info dictionaries or DataFrame
|
|
30
|
+
"""
|
|
31
|
+
results = []
|
|
32
|
+
|
|
33
|
+
for short_form in short_forms:
|
|
34
|
+
try:
|
|
35
|
+
url = f"{self.solr_url}/select"
|
|
36
|
+
params = {
|
|
37
|
+
"indent": "true",
|
|
38
|
+
"fl": "term_info",
|
|
39
|
+
"q.op": "OR",
|
|
40
|
+
"q": f"id:{short_form}"
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
self.logger.debug(f"Querying Solr for {short_form}")
|
|
44
|
+
response = requests.get(url, params=params)
|
|
45
|
+
response.raise_for_status()
|
|
46
|
+
|
|
47
|
+
data = response.json()
|
|
48
|
+
docs = data.get("response", {}).get("docs", [])
|
|
49
|
+
|
|
50
|
+
if not docs:
|
|
51
|
+
self.logger.warning(f"No results found for {short_form}")
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
if "term_info" not in docs[0] or not docs[0]["term_info"]:
|
|
55
|
+
self.logger.warning(f"No term_info found for {short_form}")
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
# Extract and parse the term_info string which is itself JSON
|
|
59
|
+
term_info_str = docs[0]["term_info"][0]
|
|
60
|
+
term_info_obj = json.loads(term_info_str)
|
|
61
|
+
results.append(term_info_obj)
|
|
62
|
+
|
|
63
|
+
except requests.RequestException as e:
|
|
64
|
+
self.logger.error(f"Error fetching data from Solr: {e}")
|
|
65
|
+
except json.JSONDecodeError as e:
|
|
66
|
+
self.logger.error(f"Error decoding JSON for {short_form}: {e}")
|
|
67
|
+
except Exception as e:
|
|
68
|
+
self.logger.error(f"Unexpected error for {short_form}: {e}")
|
|
69
|
+
|
|
70
|
+
# Convert to DataFrame if requested
|
|
71
|
+
if return_dataframe and results:
|
|
72
|
+
try:
|
|
73
|
+
return pd.json_normalize(results)
|
|
74
|
+
except Exception as e:
|
|
75
|
+
self.logger.error(f"Error converting to DataFrame: {e}")
|
|
76
|
+
return results
|
|
77
|
+
|
|
78
|
+
return results
|
|
79
|
+
|
|
80
|
+
# Pass through any non-implemented methods to VFBconnect
|
|
81
|
+
def __getattr__(self, name):
|
|
82
|
+
"""
|
|
83
|
+
Automatically pass through any non-implemented methods to VFBconnect
|
|
84
|
+
|
|
85
|
+
This allows us to use this class as a drop-in replacement for VfbConnect
|
|
86
|
+
while only implementing the methods we want to customize.
|
|
87
|
+
"""
|
|
88
|
+
self.logger.debug(f"Passing through method call: {name}")
|
|
89
|
+
return getattr(self.vfb, name)
|
vfbquery/term_info_queries.py
CHANGED
|
@@ -167,7 +167,17 @@ class PubSyn:
|
|
|
167
167
|
return hash(self.__str__())
|
|
168
168
|
|
|
169
169
|
def get_microrefs(self):
|
|
170
|
-
|
|
170
|
+
"""
|
|
171
|
+
Get a list of microreferences for all publications.
|
|
172
|
+
|
|
173
|
+
:return: A list of publication microreferences.
|
|
174
|
+
"""
|
|
175
|
+
refs = []
|
|
176
|
+
if hasattr(self, 'pubs') and self.pubs:
|
|
177
|
+
for pub in self.pubs:
|
|
178
|
+
if hasattr(pub, 'get_microref') and pub.get_microref():
|
|
179
|
+
refs.append(pub.get_microref())
|
|
180
|
+
return refs
|
|
171
181
|
|
|
172
182
|
|
|
173
183
|
@dataclass_json
|
|
@@ -752,11 +762,61 @@ def deserialize_term_info(terminfo: str) -> VfbTerminfo:
|
|
|
752
762
|
def deserialize_term_info_from_dict(terminfo: dict) -> VfbTerminfo:
|
|
753
763
|
"""
|
|
754
764
|
Deserializes the given terminfo vfb_json dictionary to VfbTerminfo object.
|
|
765
|
+
Handles both direct VfbTerminfo dictionaries and API response dictionaries.
|
|
755
766
|
|
|
756
767
|
:param terminfo: vfb_json dictionary
|
|
757
768
|
:return: VfbTerminfo object
|
|
758
769
|
"""
|
|
759
|
-
|
|
770
|
+
# Check if this is already in VfbTerminfo format with a 'term' field
|
|
771
|
+
if 'term' in terminfo:
|
|
772
|
+
return from_dict(data_class=VfbTerminfo, data=terminfo)
|
|
773
|
+
|
|
774
|
+
# Otherwise, convert from API response format to VfbTerminfo format
|
|
775
|
+
# Create the core MinimalEntityInfo structure
|
|
776
|
+
core = {
|
|
777
|
+
'short_form': terminfo.get('id', ''),
|
|
778
|
+
'iri': terminfo.get('iri', f"http://purl.obolibrary.org/obo/{terminfo.get('id', '')}"),
|
|
779
|
+
'label': terminfo.get('label', ''),
|
|
780
|
+
'types': terminfo.get('tags', []),
|
|
781
|
+
'unique_facets': terminfo.get('tags', []),
|
|
782
|
+
'symbol': terminfo.get('symbol', '')
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
# Create the term structure
|
|
786
|
+
term = {
|
|
787
|
+
'core': core,
|
|
788
|
+
'description': [terminfo.get('description', '')],
|
|
789
|
+
'comment': [],
|
|
790
|
+
'iri': '',
|
|
791
|
+
'link': terminfo.get('link', ''),
|
|
792
|
+
'icon': terminfo.get('logo', '')
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
# Create the minimal VfbTerminfo structure
|
|
796
|
+
structured_data = {
|
|
797
|
+
'term': term,
|
|
798
|
+
'query': 'Get JSON for Term',
|
|
799
|
+
'version': terminfo.get('version', ''),
|
|
800
|
+
'xrefs': []
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
# Add additional fields from the original terminfo if they exist
|
|
804
|
+
if 'parents_id' in terminfo and 'parents_label' in terminfo:
|
|
805
|
+
parents = []
|
|
806
|
+
for i, parent_id in enumerate(terminfo['parents_id']):
|
|
807
|
+
if i < len(terminfo['parents_label']):
|
|
808
|
+
parent_label = terminfo['parents_label'][i]
|
|
809
|
+
parents.append({
|
|
810
|
+
'short_form': parent_id,
|
|
811
|
+
'iri': f"http://purl.obolibrary.org/obo/{parent_id}",
|
|
812
|
+
'label': parent_label,
|
|
813
|
+
'types': [],
|
|
814
|
+
'unique_facets': [],
|
|
815
|
+
'symbol': ''
|
|
816
|
+
})
|
|
817
|
+
structured_data['parents'] = parents
|
|
818
|
+
|
|
819
|
+
return from_dict(data_class=VfbTerminfo, data=structured_data)
|
|
760
820
|
|
|
761
821
|
|
|
762
822
|
def serialize_term_info_to_dict(vfb_term: VfbTerminfo, variable, loaded_template: Optional[str] = None, show_types=False) -> dict:
|
|
@@ -958,7 +1018,7 @@ def serialize_term_info_to_dict(vfb_term: VfbTerminfo, variable, loaded_template
|
|
|
958
1018
|
if vfb_term.get_references():
|
|
959
1019
|
data["references"] = vfb_term.get_references()
|
|
960
1020
|
|
|
961
|
-
|
|
1021
|
+
# queries
|
|
962
1022
|
# TODO requires geppettoModelAccess.getQueries() ??
|
|
963
1023
|
|
|
964
1024
|
# Targeting Splits
|
vfbquery/test_utils.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from typing import Any, Dict, Union
|
|
3
|
+
|
|
4
|
+
def safe_extract_row(result: Any, index: int = 0) -> Dict:
|
|
5
|
+
"""
|
|
6
|
+
Safely extract a row from a pandas DataFrame or return the object itself if not a DataFrame.
|
|
7
|
+
|
|
8
|
+
:param result: Result to extract from (DataFrame or other object)
|
|
9
|
+
:param index: Index of the row to extract (default: 0)
|
|
10
|
+
:return: Extracted row as dict or original object
|
|
11
|
+
"""
|
|
12
|
+
if isinstance(result, pd.DataFrame):
|
|
13
|
+
if not result.empty and len(result.index) > index:
|
|
14
|
+
return result.iloc[index].to_dict()
|
|
15
|
+
else:
|
|
16
|
+
return {}
|
|
17
|
+
return result
|
|
18
|
+
|
|
19
|
+
def patch_vfb_connect_query_wrapper():
|
|
20
|
+
"""
|
|
21
|
+
Apply monkey patches to VfbConnect.neo_query_wrapper to make it handle DataFrame results safely.
|
|
22
|
+
Call this function in test setup if tests are expecting dictionary results from neo_query_wrapper methods.
|
|
23
|
+
"""
|
|
24
|
+
try:
|
|
25
|
+
from vfb_connect.neo.query_wrapper import NeoQueryWrapper
|
|
26
|
+
original_get_term_info = NeoQueryWrapper._get_TermInfo
|
|
27
|
+
|
|
28
|
+
def patched_get_term_info(self, terms, *args, **kwargs):
|
|
29
|
+
result = original_get_term_info(self, terms, *args, **kwargs)
|
|
30
|
+
if isinstance(result, pd.DataFrame):
|
|
31
|
+
# Return list of row dictionaries instead of DataFrame
|
|
32
|
+
return [row.to_dict() for i, row in result.iterrows()]
|
|
33
|
+
return result
|
|
34
|
+
|
|
35
|
+
NeoQueryWrapper._get_TermInfo = patched_get_term_info
|
|
36
|
+
|
|
37
|
+
print("VfbConnect query wrapper patched for testing")
|
|
38
|
+
except ImportError:
|
|
39
|
+
print("Could not patch VfbConnect - module not found")
|