vfbquery 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,613 @@
1
+ """
2
+ SOLR-based Result Caching for VFBquery
3
+
4
+ This module implements server-side caching by storing computed VFBquery results
5
+ directly in the SOLR server, eliminating cold start delays for frequently
6
+ requested terms.
7
+
8
+ The approach uses a dedicated SOLR collection 'vfbquery_cache' to store
9
+ pre-computed results that can be retrieved instantly without expensive
10
+ Neo4j queries and data processing.
11
+ """
12
+
13
+ import json
14
+ import requests
15
+ import hashlib
16
+ import time
17
+ from datetime import datetime, timedelta
18
+ from typing import Dict, Any, Optional, List
19
+ import logging
20
+ from dataclasses import dataclass, asdict
21
+ from vfbquery.term_info_queries import NumpyEncoder
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ @dataclass
26
+ class CacheMetadata:
27
+ """Metadata for cached results"""
28
+ query_type: str # 'term_info', 'instances', etc.
29
+ term_id: str # The queried term ID
30
+ query_params: str # Hashed parameters for unique identification
31
+ created_at: str # ISO timestamp
32
+ expires_at: str # ISO timestamp
33
+ result_size: int # Size in bytes
34
+ version: str # VFBquery version
35
+ hit_count: int = 0 # How many times this cache entry was used
36
+
37
+ class SolrResultCache:
38
+ """
39
+ SOLR-based result caching system for VFBquery
40
+
41
+ Stores computed query results in a dedicated SOLR collection to enable
42
+ instant retrieval without expensive computation on cold starts.
43
+ """
44
+
45
+ def __init__(self,
46
+ cache_url: str = "https://solr.virtualflybrain.org/solr/vfb_json",
47
+ ttl_hours: int = 2160, # 3 months like VFB_connect
48
+ max_result_size_mb: int = 10):
49
+ """
50
+ Initialize SOLR result cache
51
+
52
+ Args:
53
+ cache_url: SOLR collection URL for caching
54
+ ttl_hours: Time-to-live for cache entries in hours
55
+ max_result_size_mb: Maximum result size to cache in MB
56
+ """
57
+ self.cache_url = cache_url
58
+ self.ttl_hours = ttl_hours
59
+ self.max_result_size_mb = max_result_size_mb
60
+ self.max_result_size_bytes = max_result_size_mb * 1024 * 1024
61
+
62
+ def _create_cache_metadata(self, result: Any) -> Optional[Dict[str, Any]]:
63
+ """Create metadata for cached result with 3-month expiration"""
64
+ serialized_result = json.dumps(result, cls=NumpyEncoder)
65
+ result_size = len(serialized_result.encode('utf-8'))
66
+
67
+ # Don't cache if result is too large
68
+ if result_size > self.max_result_size_bytes:
69
+ logger.warning(f"Result too large to cache: {result_size/1024/1024:.2f}MB > {self.max_result_size_mb}MB")
70
+ return None
71
+
72
+ now = datetime.now().astimezone()
73
+ expires_at = now + timedelta(hours=self.ttl_hours) # 2160 hours = 90 days = 3 months
74
+
75
+ return {
76
+ "result": result, # Store original object, not serialized string
77
+ "cached_at": now.isoformat(),
78
+ "expires_at": expires_at.isoformat(),
79
+ "result_size": result_size,
80
+ "hit_count": 0,
81
+ "cache_version": "1.0", # For future compatibility
82
+ "ttl_hours": self.ttl_hours # Store TTL for debugging
83
+ }
84
+
85
+ def get_cached_result(self, query_type: str, term_id: str, **params) -> Optional[Any]:
86
+ """
87
+ Retrieve cached result from separate cache document
88
+
89
+ Args:
90
+ query_type: Type of query ('term_info', 'instances', etc.)
91
+ term_id: Term identifier
92
+ **params: Query parameters for field name generation
93
+
94
+ Returns:
95
+ Cached result or None if not found/expired
96
+ """
97
+ try:
98
+ # Query for cache document with prefixed ID
99
+ cache_doc_id = f"vfb_query_{term_id}"
100
+
101
+ response = requests.get(f"{self.cache_url}/select", params={
102
+ "q": f"id:{cache_doc_id} AND query_type:{query_type}",
103
+ "fl": "cache_data",
104
+ "wt": "json"
105
+ }, timeout=5) # Short timeout for cache lookups
106
+
107
+ if response.status_code != 200:
108
+ logger.debug(f"Cache miss: HTTP {response.status_code}")
109
+ return None
110
+
111
+ data = response.json()
112
+ docs = data.get("response", {}).get("docs", [])
113
+
114
+ if not docs:
115
+ logger.debug(f"Cache miss: No cache document found for {query_type}:{term_id}")
116
+ return None
117
+
118
+ cached_field = docs[0].get("cache_data")
119
+ if not cached_field:
120
+ logger.debug(f"Cache miss: No cache_data field found for {term_id}")
121
+ return None
122
+
123
+ # Handle both list and string formats
124
+ if isinstance(cached_field, list):
125
+ cached_field = cached_field[0]
126
+
127
+ # Parse the cached metadata and result
128
+ cached_data = json.loads(cached_field)
129
+
130
+ # Check expiration (3-month max age)
131
+ try:
132
+ expires_at = datetime.fromisoformat(cached_data["expires_at"].replace('Z', '+00:00'))
133
+ cached_at = datetime.fromisoformat(cached_data["cached_at"].replace('Z', '+00:00'))
134
+ now = datetime.now().astimezone()
135
+
136
+ if now > expires_at:
137
+ age_days = (now - cached_at).days
138
+ logger.info(f"Cache expired for {query_type}({term_id}) - age: {age_days} days")
139
+ self._clear_expired_cache_document(cache_doc_id)
140
+ return None
141
+
142
+ # Log cache age for monitoring
143
+ age_hours = (now - cached_at).total_seconds() / 3600
144
+ logger.debug(f"Cache hit for {query_type}({term_id}) - age: {age_hours:.1f} hours")
145
+
146
+ except (KeyError, ValueError) as e:
147
+ logger.warning(f"Invalid cache metadata for {term_id}: {e}")
148
+ self._clear_expired_cache_document(cache_doc_id)
149
+ return None
150
+
151
+ # Increment hit count asynchronously
152
+ self._increment_cache_hit_count(cache_doc_id, cached_data.get("hit_count", 0))
153
+
154
+ # Return cached result
155
+ result = cached_data["result"]
156
+ # If result is a string, parse it as JSON
157
+ if isinstance(result, str):
158
+ try:
159
+ result = json.loads(result)
160
+ except json.JSONDecodeError:
161
+ logger.warning(f"Failed to parse cached result for {term_id}")
162
+ return None
163
+
164
+ logger.info(f"Cache hit for {query_type}({term_id})")
165
+ return result
166
+
167
+ except Exception as e:
168
+ logger.debug(f"Error retrieving cached result: {e}")
169
+ return None
170
+
171
+ def cache_result(self, query_type: str, term_id: str, result: Any, **params) -> bool:
172
+ """
173
+ Store result as separate cache document with prefixed ID
174
+
175
+ This approach is safer as it never touches original VFB documents,
176
+ eliminating risk of data loss.
177
+
178
+ Args:
179
+ query_type: Type of query being cached
180
+ term_id: Term identifier
181
+ result: Query result to cache
182
+ **params: Query parameters for field name generation
183
+
184
+ Returns:
185
+ True if successfully cached, False otherwise
186
+ """
187
+ if not result:
188
+ logger.debug("Empty result, not caching")
189
+ return False
190
+
191
+ try:
192
+ # Create cached metadata and result
193
+ cached_data = self._create_cache_metadata(result)
194
+ if not cached_data:
195
+ return False # Result too large or other issue
196
+
197
+ # Create cache document with prefixed ID
198
+ cache_doc_id = f"vfb_query_{term_id}"
199
+
200
+ cache_doc = {
201
+ "id": cache_doc_id,
202
+ "original_term_id": term_id,
203
+ "query_type": query_type,
204
+ "cache_data": json.dumps(cached_data, cls=NumpyEncoder),
205
+ "cached_at": cached_data["cached_at"],
206
+ "expires_at": cached_data["expires_at"]
207
+ }
208
+
209
+ # Store cache document
210
+ response = requests.post(
211
+ f"{self.cache_url}/update",
212
+ data=json.dumps([cache_doc]),
213
+ headers={"Content-Type": "application/json"},
214
+ params={"commit": "true"}, # Immediate commit for availability
215
+ timeout=10
216
+ )
217
+
218
+ if response.status_code == 200:
219
+ logger.info(f"Cached {query_type} for {term_id} as {cache_doc_id}, size: {cached_data['result_size']/1024:.1f}KB")
220
+ return True
221
+ else:
222
+ logger.error(f"Failed to cache result: HTTP {response.status_code} - {response.text}")
223
+ return False
224
+
225
+ except Exception as e:
226
+ logger.error(f"Error caching result: {e}")
227
+ return False
228
+
229
+
230
+ def _clear_expired_cache_document(self, cache_doc_id: str):
231
+ """Delete expired cache document from SOLR"""
232
+ try:
233
+ requests.post(
234
+ f"{self.cache_url}/update",
235
+ data=f'<delete><id>{cache_doc_id}</id></delete>',
236
+ headers={"Content-Type": "application/xml"},
237
+ params={"commit": "false"}, # Don't commit immediately for performance
238
+ timeout=2
239
+ )
240
+ except Exception as e:
241
+ logger.debug(f"Failed to clear expired cache document: {e}")
242
+
243
+ def _increment_cache_hit_count(self, cache_doc_id: str, current_count: int):
244
+ """Increment hit count for cache document (background operation)"""
245
+ try:
246
+ # Update hit count in cache document
247
+ new_count = current_count + 1
248
+ update_doc = {
249
+ "id": cache_doc_id,
250
+ "hit_count": {"set": new_count},
251
+ "last_accessed": {"set": datetime.now().isoformat() + "Z"}
252
+ }
253
+
254
+ requests.post(
255
+ f"{self.cache_url}/update",
256
+ data=json.dumps([update_doc]),
257
+ headers={"Content-Type": "application/json"},
258
+ params={"commit": "false"}, # Don't commit immediately for performance
259
+ timeout=2
260
+ )
261
+ except Exception as e:
262
+ logger.debug(f"Failed to update hit count: {e}")
263
+
264
+ def get_cache_age(self, query_type: str, term_id: str, **params) -> Optional[Dict[str, Any]]:
265
+ """
266
+ Get cache age information for a specific cached result
267
+
268
+ Returns:
269
+ Dictionary with cache age info or None if not cached
270
+ """
271
+ try:
272
+ cache_doc_id = f"vfb_query_{term_id}"
273
+
274
+ response = requests.get(f"{self.cache_url}/select", params={
275
+ "q": f"id:{cache_doc_id} AND query_type:{query_type}",
276
+ "fl": "cache_data,hit_count,last_accessed",
277
+ "wt": "json"
278
+ }, timeout=5)
279
+
280
+ if response.status_code == 200:
281
+ data = response.json()
282
+ docs = data.get("response", {}).get("docs", [])
283
+
284
+ if docs:
285
+ doc = docs[0]
286
+ cached_field = doc.get("cache_data")
287
+ if cached_field:
288
+ # Handle both list and string formats
289
+ if isinstance(cached_field, list):
290
+ cached_field = cached_field[0]
291
+
292
+ cached_data = json.loads(cached_field)
293
+
294
+ cached_at = datetime.fromisoformat(cached_data["cached_at"].replace('Z', '+00:00'))
295
+ expires_at = datetime.fromisoformat(cached_data["expires_at"].replace('Z', '+00:00'))
296
+ now = datetime.now().astimezone()
297
+
298
+ age = now - cached_at
299
+ time_to_expiry = expires_at - now
300
+
301
+ return {
302
+ "cached_at": cached_at.isoformat(),
303
+ "expires_at": expires_at.isoformat(),
304
+ "age_days": age.days,
305
+ "age_hours": age.total_seconds() / 3600,
306
+ "time_to_expiry_days": time_to_expiry.days,
307
+ "time_to_expiry_hours": time_to_expiry.total_seconds() / 3600,
308
+ "is_expired": now > expires_at,
309
+ "hit_count": doc.get("hit_count", cached_data.get("hit_count", 0)),
310
+ "size_kb": cached_data.get("result_size", 0) / 1024,
311
+ "last_accessed": doc.get("last_accessed", ["Never"])[0] if isinstance(doc.get("last_accessed"), list) else doc.get("last_accessed", "Never")
312
+ }
313
+ except Exception as e:
314
+ logger.debug(f"Error getting cache age: {e}")
315
+
316
+ return None
317
+
318
+ def cleanup_expired_entries(self) -> int:
319
+ """
320
+ Clean up expired VFBquery cache documents
321
+
322
+ This method scans for cache documents (IDs starting with vfb_query_) and removes expired ones.
323
+
324
+ Returns:
325
+ Number of expired cache documents cleaned up
326
+ """
327
+ try:
328
+ now = datetime.now().astimezone()
329
+ cleaned_count = 0
330
+
331
+ # Search for all cache documents
332
+ response = requests.get(f"{self.cache_url}/select", params={
333
+ "q": "id:vfb_query_*",
334
+ "fl": "id,cache_data,expires_at",
335
+ "rows": "1000", # Process in batches
336
+ "wt": "json"
337
+ }, timeout=30)
338
+
339
+ if response.status_code == 200:
340
+ data = response.json()
341
+ docs = data.get("response", {}).get("docs", [])
342
+ expired_ids = []
343
+
344
+ for doc in docs:
345
+ doc_id = doc["id"]
346
+
347
+ try:
348
+ # Check expiration using expires_at field if available, or cache_data
349
+ expires_at = None
350
+
351
+ if "expires_at" in doc:
352
+ expires_at_field = doc["expires_at"]
353
+ expires_at_str = expires_at_field[0] if isinstance(expires_at_field, list) else expires_at_field
354
+ expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
355
+ elif "cache_data" in doc:
356
+ # Fallback to parsing cache_data
357
+ cached_field = doc["cache_data"]
358
+ if isinstance(cached_field, list):
359
+ cached_field = cached_field[0]
360
+ cached_data = json.loads(cached_field)
361
+ expires_at = datetime.fromisoformat(cached_data["expires_at"].replace('Z', '+00:00'))
362
+
363
+ if expires_at and now > expires_at:
364
+ expired_ids.append(doc_id)
365
+ cleaned_count += 1
366
+ logger.debug(f"Marking cache document {doc_id} for removal (expired)")
367
+
368
+ except (json.JSONDecodeError, KeyError, ValueError) as e:
369
+ # Invalid cache data - remove it
370
+ expired_ids.append(doc_id)
371
+ cleaned_count += 1
372
+ logger.debug(f"Marking invalid cache document {doc_id} for removal: {e}")
373
+
374
+ # Delete expired cache documents in batch
375
+ if expired_ids:
376
+ delete_xml = "<delete>" + "".join(f"<id>{doc_id}</id>" for doc_id in expired_ids) + "</delete>"
377
+
378
+ delete_response = requests.post(
379
+ f"{self.cache_url}/update",
380
+ data=delete_xml,
381
+ headers={"Content-Type": "application/xml"},
382
+ params={"commit": "true"}, # Commit deletions immediately
383
+ timeout=10
384
+ )
385
+
386
+ if delete_response.status_code != 200:
387
+ logger.warning(f"Failed to delete expired cache documents: HTTP {delete_response.status_code}")
388
+ else:
389
+ logger.info(f"Cleaned up {cleaned_count} expired cache documents")
390
+
391
+ return cleaned_count
392
+
393
+ except Exception as e:
394
+ logger.error(f"Error during cache cleanup: {e}")
395
+ return 0
396
+
397
+ def get_cache_stats(self) -> Dict[str, Any]:
398
+ """
399
+ Get VFBquery cache statistics from cache documents
400
+
401
+ Returns:
402
+ Dictionary with cache statistics including document counts and age distribution
403
+ """
404
+ try:
405
+ # Get all cache documents
406
+ response = requests.get(f"{self.cache_url}/select", params={
407
+ "q": "id:vfb_query_*",
408
+ "fl": "id,query_type,cache_data,hit_count,last_accessed,cached_at,expires_at",
409
+ "rows": "1000", # Process in batches
410
+ "wt": "json"
411
+ }, timeout=30)
412
+
413
+ if response.status_code == 200:
414
+ data = response.json()
415
+ docs = data.get("response", {}).get("docs", [])
416
+ total_cache_docs = data.get("response", {}).get("numFound", 0)
417
+
418
+ type_stats = {}
419
+ total_size = 0
420
+ expired_count = 0
421
+ total_hits = 0
422
+ age_buckets = {"0-1d": 0, "1-7d": 0, "7-30d": 0, "30-90d": 0, ">90d": 0}
423
+
424
+ now = datetime.now().astimezone()
425
+
426
+ # Analyze each cache document
427
+ for doc in docs:
428
+ query_type_field = doc.get("query_type", "unknown")
429
+ # Handle both list and string formats
430
+ query_type = query_type_field[0] if isinstance(query_type_field, list) else query_type_field
431
+ type_stats[query_type] = type_stats.get(query_type, 0) + 1
432
+
433
+ try:
434
+ # Get cache data and metadata
435
+ cached_field = doc.get("cache_data")
436
+ if cached_field:
437
+ # Handle both list and string formats
438
+ if isinstance(cached_field, list):
439
+ cached_field = cached_field[0]
440
+
441
+ cached_data = json.loads(cached_field)
442
+ total_size += len(cached_field)
443
+
444
+ # Get timestamps from document fields or cache_data
445
+ cached_at = None
446
+ expires_at = None
447
+
448
+ # Try document fields first
449
+ if "cached_at" in doc:
450
+ cached_at_field = doc["cached_at"]
451
+ cached_at_str = cached_at_field[0] if isinstance(cached_at_field, list) else cached_at_field
452
+ cached_at = datetime.fromisoformat(cached_at_str.replace('Z', '+00:00'))
453
+
454
+ if "expires_at" in doc:
455
+ expires_at_field = doc["expires_at"]
456
+ expires_at_str = expires_at_field[0] if isinstance(expires_at_field, list) else expires_at_field
457
+ expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
458
+
459
+ # Fallback to cache_data
460
+ if not cached_at and "cached_at" in cached_data:
461
+ cached_at = datetime.fromisoformat(cached_data["cached_at"].replace('Z', '+00:00'))
462
+ if not expires_at and "expires_at" in cached_data:
463
+ expires_at = datetime.fromisoformat(cached_data["expires_at"].replace('Z', '+00:00'))
464
+
465
+ if cached_at and expires_at:
466
+ age_days = (now - cached_at).days
467
+
468
+ # Check if expired
469
+ if now > expires_at:
470
+ expired_count += 1
471
+
472
+ # Categorize by age
473
+ if age_days <= 1:
474
+ age_buckets["0-1d"] += 1
475
+ elif age_days <= 7:
476
+ age_buckets["1-7d"] += 1
477
+ elif age_days <= 30:
478
+ age_buckets["7-30d"] += 1
479
+ elif age_days <= 90:
480
+ age_buckets["30-90d"] += 1
481
+ else:
482
+ age_buckets[">90d"] += 1
483
+
484
+ # Get hit count
485
+ hit_count = doc.get("hit_count", cached_data.get("hit_count", 0))
486
+ if isinstance(hit_count, list):
487
+ hit_count = hit_count[0]
488
+ total_hits += int(hit_count) if hit_count else 0
489
+
490
+ except (json.JSONDecodeError, KeyError, ValueError):
491
+ # Invalid cache data
492
+ expired_count += 1
493
+
494
+ return {
495
+ "total_cache_documents": total_cache_docs,
496
+ "cache_by_type": type_stats,
497
+ "expired_documents": expired_count,
498
+ "age_distribution": age_buckets,
499
+ "total_hits": total_hits,
500
+ "estimated_size_bytes": total_size,
501
+ "estimated_size_mb": round(total_size / (1024 * 1024), 2),
502
+ "cache_efficiency": round((total_cache_docs - expired_count) / max(total_cache_docs, 1) * 100, 1)
503
+ }
504
+
505
+ except Exception as e:
506
+ logger.error(f"Error getting cache stats: {e}")
507
+
508
+ return {
509
+ "total_cache_documents": 0,
510
+ "cache_by_type": {},
511
+ "expired_documents": 0,
512
+ "age_distribution": {},
513
+ "total_hits": 0,
514
+ "estimated_size_bytes": 0,
515
+ "estimated_size_mb": 0.0,
516
+ "cache_efficiency": 0.0
517
+ }
518
+
519
+
520
+ # Global cache instance
521
+ _solr_cache = None
522
+
523
+ def get_solr_cache() -> SolrResultCache:
524
+ """Get global SOLR cache instance"""
525
+ global _solr_cache
526
+ if _solr_cache is None:
527
+ _solr_cache = SolrResultCache()
528
+ return _solr_cache
529
+
530
+ def with_solr_cache(query_type: str):
531
+ """
532
+ Decorator to add SOLR caching to query functions
533
+
534
+ Usage:
535
+ @with_solr_cache('term_info')
536
+ def get_term_info(short_form, **kwargs):
537
+ # ... existing implementation
538
+ """
539
+ def decorator(func):
540
+ def wrapper(*args, **kwargs):
541
+ # Extract term_id from first argument or kwargs
542
+ term_id = args[0] if args else kwargs.get('short_form') or kwargs.get('term_id')
543
+
544
+ if not term_id:
545
+ logger.warning("No term_id found for caching")
546
+ return func(*args, **kwargs)
547
+
548
+ cache = get_solr_cache()
549
+
550
+ # Try cache first
551
+ cached_result = cache.get_cached_result(query_type, term_id, **kwargs)
552
+ if cached_result is not None:
553
+ # Validate that cached result has essential fields for term_info
554
+ if query_type == 'term_info':
555
+ is_valid = (cached_result and isinstance(cached_result, dict) and
556
+ cached_result.get('Id') and cached_result.get('Name'))
557
+
558
+ # Additional validation for query results
559
+ if is_valid and 'Queries' in cached_result:
560
+ logger.debug(f"Validating {len(cached_result['Queries'])} queries for {term_id}")
561
+ for i, query in enumerate(cached_result['Queries']):
562
+ count = query.get('count', 0)
563
+ preview_results = query.get('preview_results')
564
+ headers = preview_results.get('headers', []) if isinstance(preview_results, dict) else []
565
+
566
+ logger.debug(f"Query {i}: count={count}, preview_results_type={type(preview_results)}, headers={headers}")
567
+
568
+ # Check if query has unrealistic count (0 or -1) which indicates failed execution
569
+ if count <= 0:
570
+ is_valid = False
571
+ logger.debug(f"Cached result has invalid query count {count} for {term_id}")
572
+ break
573
+ # Check if preview_results is missing or has empty headers when it should have data
574
+ if not isinstance(preview_results, dict) or not headers:
575
+ is_valid = False
576
+ logger.debug(f"Cached result has invalid preview_results structure for {term_id}")
577
+ break
578
+
579
+ if is_valid:
580
+ logger.debug(f"Using valid cached result for {term_id}")
581
+ return cached_result
582
+ else:
583
+ logger.warning(f"Cached result incomplete for {term_id}, re-executing function")
584
+ # Don't return the incomplete cached result, continue to execute function
585
+ else:
586
+ return cached_result
587
+
588
+ # Execute function and cache result
589
+ result = func(*args, **kwargs)
590
+
591
+ # Cache the result asynchronously to avoid blocking
592
+ if result:
593
+ # Validate result before caching for term_info
594
+ if query_type == 'term_info':
595
+ if (result and isinstance(result, dict) and
596
+ result.get('Id') and result.get('Name')):
597
+ try:
598
+ cache.cache_result(query_type, term_id, result, **kwargs)
599
+ logger.debug(f"Cached complete result for {term_id}")
600
+ except Exception as e:
601
+ logger.debug(f"Failed to cache result: {e}")
602
+ else:
603
+ logger.warning(f"Not caching incomplete result for {term_id}")
604
+ else:
605
+ try:
606
+ cache.cache_result(query_type, term_id, result, **kwargs)
607
+ except Exception as e:
608
+ logger.debug(f"Failed to cache result: {e}")
609
+
610
+ return result
611
+
612
+ return wrapper
613
+ return decorator
@@ -1,5 +1,21 @@
1
1
  import re
2
2
  import json
3
+ import numpy as np
4
+
5
+ # Custom JSON encoder to handle NumPy and pandas types
6
+ class NumpyEncoder(json.JSONEncoder):
7
+ def default(self, obj):
8
+ if isinstance(obj, np.integer):
9
+ return int(obj)
10
+ elif isinstance(obj, np.floating):
11
+ return float(obj)
12
+ elif isinstance(obj, np.ndarray):
13
+ return obj.tolist()
14
+ elif isinstance(obj, np.bool_):
15
+ return bool(obj)
16
+ elif hasattr(obj, 'item'): # Handle pandas scalar types
17
+ return obj.item()
18
+ return super(NumpyEncoder, self).default(obj)
3
19
  import requests
4
20
  from dataclasses import dataclass
5
21
  from dataclasses_json import dataclass_json
@@ -15,7 +31,7 @@ class Coordinates:
15
31
  Z: float
16
32
 
17
33
  def __str__(self):
18
- return json.dumps([str(self.X), str(self.Y), str(self.Z)])
34
+ return json.dumps([str(self.X), str(self.Y), str(self.Z)], cls=NumpyEncoder)
19
35
 
20
36
 
21
37
  class CoordinatesFactory:
@@ -1062,7 +1078,7 @@ def serialize_term_info_to_json(vfb_term: VfbTerminfo, show_types=False) -> str:
1062
1078
  :return: json string representation of the term info object
1063
1079
  """
1064
1080
  term_info_dict = serialize_term_info_to_dict(vfb_term, show_types)
1065
- return json.dumps(term_info_dict, indent=4)
1081
+ return json.dumps(term_info_dict, indent=4, cls=NumpyEncoder)
1066
1082
 
1067
1083
 
1068
1084
  def process(term_info_response: dict, variable, loaded_template: Optional[str] = None, show_types=False) -> dict: