tooluniverse 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (57) hide show
  1. tooluniverse/__init__.py +56 -5
  2. tooluniverse/agentic_tool.py +90 -14
  3. tooluniverse/arxiv_tool.py +113 -0
  4. tooluniverse/biorxiv_tool.py +97 -0
  5. tooluniverse/core_tool.py +153 -0
  6. tooluniverse/crossref_tool.py +73 -0
  7. tooluniverse/data/agentic_tools.json +2 -2
  8. tooluniverse/data/arxiv_tools.json +87 -0
  9. tooluniverse/data/biorxiv_tools.json +70 -0
  10. tooluniverse/data/core_tools.json +105 -0
  11. tooluniverse/data/crossref_tools.json +70 -0
  12. tooluniverse/data/dblp_tools.json +73 -0
  13. tooluniverse/data/doaj_tools.json +94 -0
  14. tooluniverse/data/fatcat_tools.json +72 -0
  15. tooluniverse/data/hal_tools.json +70 -0
  16. tooluniverse/data/medrxiv_tools.json +70 -0
  17. tooluniverse/data/odphp_tools.json +354 -0
  18. tooluniverse/data/openaire_tools.json +85 -0
  19. tooluniverse/data/osf_preprints_tools.json +77 -0
  20. tooluniverse/data/pmc_tools.json +109 -0
  21. tooluniverse/data/pubmed_tools.json +65 -0
  22. tooluniverse/data/unpaywall_tools.json +86 -0
  23. tooluniverse/data/wikidata_sparql_tools.json +42 -0
  24. tooluniverse/data/zenodo_tools.json +82 -0
  25. tooluniverse/dblp_tool.py +62 -0
  26. tooluniverse/default_config.py +18 -0
  27. tooluniverse/doaj_tool.py +124 -0
  28. tooluniverse/execute_function.py +70 -9
  29. tooluniverse/fatcat_tool.py +66 -0
  30. tooluniverse/hal_tool.py +77 -0
  31. tooluniverse/llm_clients.py +487 -0
  32. tooluniverse/mcp_tool_registry.py +3 -3
  33. tooluniverse/medrxiv_tool.py +97 -0
  34. tooluniverse/odphp_tool.py +226 -0
  35. tooluniverse/openaire_tool.py +145 -0
  36. tooluniverse/osf_preprints_tool.py +67 -0
  37. tooluniverse/pmc_tool.py +181 -0
  38. tooluniverse/pubmed_tool.py +110 -0
  39. tooluniverse/remote/boltz/boltz_mcp_server.py +2 -2
  40. tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +2 -2
  41. tooluniverse/smcp.py +313 -191
  42. tooluniverse/smcp_server.py +4 -7
  43. tooluniverse/test/test_claude_sdk.py +93 -0
  44. tooluniverse/test/test_odphp_tool.py +166 -0
  45. tooluniverse/test/test_openrouter_client.py +288 -0
  46. tooluniverse/test/test_stdio_hooks.py +1 -1
  47. tooluniverse/test/test_tool_finder.py +1 -1
  48. tooluniverse/unpaywall_tool.py +63 -0
  49. tooluniverse/wikidata_sparql_tool.py +61 -0
  50. tooluniverse/zenodo_tool.py +74 -0
  51. {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/METADATA +101 -74
  52. {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/RECORD +56 -19
  53. {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/entry_points.txt +1 -0
  54. tooluniverse-1.0.6.dist-info/licenses/LICENSE +201 -0
  55. tooluniverse-1.0.4.dist-info/licenses/LICENSE +0 -21
  56. {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/WHEEL +0 -0
  57. {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/top_level.txt +0 -0
@@ -21,6 +21,29 @@ class BaseLLMClient:
21
21
  ) -> Optional[str]:
22
22
  raise NotImplementedError
23
23
 
24
+ def infer_stream(
25
+ self,
26
+ messages: List[Dict[str, str]],
27
+ temperature: Optional[float],
28
+ max_tokens: Optional[int],
29
+ return_json: bool,
30
+ custom_format: Any = None,
31
+ max_retries: int = 5,
32
+ retry_delay: int = 5,
33
+ ):
34
+ """Default streaming implementation falls back to regular inference."""
35
+ result = self.infer(
36
+ messages=messages,
37
+ temperature=temperature,
38
+ max_tokens=max_tokens,
39
+ return_json=return_json,
40
+ custom_format=custom_format,
41
+ max_retries=max_retries,
42
+ retry_delay=retry_delay,
43
+ )
44
+ if result is not None:
45
+ yield result
46
+
24
47
 
25
48
  class AzureOpenAIClient(BaseLLMClient):
26
49
  # Built-in defaults for model families (can be overridden by env)
@@ -305,6 +328,179 @@ class AzureOpenAIClient(BaseLLMClient):
305
328
  self.logger.error("Max retries exceeded. Unable to complete the request.")
306
329
  return None
307
330
 
331
+ def infer_stream(
332
+ self,
333
+ messages: List[Dict[str, str]],
334
+ temperature: Optional[float],
335
+ max_tokens: Optional[int],
336
+ return_json: bool,
337
+ custom_format: Any = None,
338
+ max_retries: int = 5,
339
+ retry_delay: int = 5,
340
+ ):
341
+ if return_json or custom_format is not None:
342
+ yield from super().infer_stream(
343
+ messages,
344
+ temperature,
345
+ max_tokens,
346
+ return_json,
347
+ custom_format,
348
+ max_retries,
349
+ retry_delay,
350
+ )
351
+ return
352
+
353
+ retries = 0
354
+ eff_max = (
355
+ max_tokens
356
+ if max_tokens is not None
357
+ else self._resolve_default_max_tokens(self.model_name)
358
+ )
359
+
360
+ while retries < max_retries:
361
+ try:
362
+ kwargs: Dict[str, Any] = {
363
+ "model": self.model_name,
364
+ "messages": messages,
365
+ "stream": True,
366
+ }
367
+ if temperature is not None:
368
+ kwargs["temperature"] = temperature
369
+ if eff_max is not None:
370
+ kwargs["max_tokens"] = eff_max
371
+
372
+ stream = self.client.chat.completions.create(**kwargs)
373
+ for chunk in stream:
374
+ text = AzureOpenAIClient._extract_text_from_chunk(chunk) # type: ignore[attr-defined]
375
+ if text:
376
+ yield text
377
+ return
378
+ except self._openai.RateLimitError: # type: ignore[attr-defined]
379
+ self.logger.warning(
380
+ f"OpenRouter streaming rate limit hit. Retrying in {retry_delay} seconds..."
381
+ )
382
+ retries += 1
383
+ time.sleep(retry_delay * retries)
384
+ except Exception as e: # noqa: BLE001
385
+ self.logger.error(f"OpenRouter streaming error: {e}")
386
+ break
387
+
388
+ yield from super().infer_stream(
389
+ messages,
390
+ temperature,
391
+ max_tokens,
392
+ return_json,
393
+ custom_format,
394
+ max_retries,
395
+ retry_delay,
396
+ )
397
+
398
+ @staticmethod
399
+ def _extract_text_from_chunk(chunk) -> Optional[str]:
400
+ try:
401
+ choices = getattr(chunk, "choices", None)
402
+ except Exception:
403
+ choices = None
404
+ if not choices:
405
+ return None
406
+
407
+ first_choice = choices[0]
408
+ delta = getattr(first_choice, "delta", None)
409
+ if delta is None and isinstance(first_choice, dict):
410
+ delta = first_choice.get("delta")
411
+ if delta is None:
412
+ return None
413
+
414
+ content = getattr(delta, "content", None)
415
+ if content is None and isinstance(delta, dict):
416
+ content = delta.get("content")
417
+ if not content:
418
+ return None
419
+
420
+ if isinstance(content, str):
421
+ return content
422
+
423
+ if isinstance(content, list):
424
+ fragments: List[str] = []
425
+ for item in content:
426
+ text = getattr(item, "text", None)
427
+ if text is None and isinstance(item, dict):
428
+ text = item.get("text")
429
+ if text:
430
+ fragments.append(text)
431
+ return "".join(fragments) if fragments else None
432
+
433
+ return None
434
+
435
+ def infer_stream(
436
+ self,
437
+ messages: List[Dict[str, str]],
438
+ temperature: Optional[float],
439
+ max_tokens: Optional[int],
440
+ return_json: bool,
441
+ custom_format: Any = None,
442
+ max_retries: int = 5,
443
+ retry_delay: int = 5,
444
+ ):
445
+ if return_json or custom_format is not None:
446
+ yield from super().infer_stream(
447
+ messages,
448
+ temperature,
449
+ max_tokens,
450
+ return_json,
451
+ custom_format,
452
+ max_retries,
453
+ retry_delay,
454
+ )
455
+ return
456
+
457
+ retries = 0
458
+ eff_temp = self._normalize_temperature(self.model_name, temperature)
459
+ eff_max = (
460
+ max_tokens
461
+ if max_tokens is not None
462
+ else self._resolve_default_max_tokens(self.model_name)
463
+ )
464
+
465
+ while retries < max_retries:
466
+ try:
467
+ kwargs: Dict[str, Any] = {
468
+ "model": self.model_name,
469
+ "messages": messages,
470
+ "stream": True,
471
+ }
472
+ if eff_temp is not None:
473
+ kwargs["temperature"] = eff_temp
474
+ if eff_max is not None:
475
+ kwargs["max_tokens"] = eff_max
476
+
477
+ stream = self.client.chat.completions.create(**kwargs)
478
+ for chunk in stream:
479
+ text = self._extract_text_from_chunk(chunk)
480
+ if text:
481
+ yield text
482
+ return
483
+ except self._openai.RateLimitError: # type: ignore[attr-defined]
484
+ self.logger.warning(
485
+ f"Rate limit exceeded. Retrying in {retry_delay} seconds (streaming)..."
486
+ )
487
+ retries += 1
488
+ time.sleep(retry_delay * retries)
489
+ except Exception as e: # noqa: BLE001
490
+ self.logger.error(f"Streaming error: {e}")
491
+ break
492
+
493
+ # Fallback to non-streaming if streaming fails
494
+ yield from super().infer_stream(
495
+ messages,
496
+ temperature,
497
+ max_tokens,
498
+ return_json,
499
+ custom_format,
500
+ max_retries,
501
+ retry_delay,
502
+ )
503
+
308
504
 
309
505
  class GeminiClient(BaseLLMClient):
310
506
  def __init__(self, model_name: str, logger):
@@ -367,3 +563,294 @@ class GeminiClient(BaseLLMClient):
367
563
  retries += 1
368
564
  time.sleep(retry_delay * retries)
369
565
  return None
566
+
567
+ @staticmethod
568
+ def _extract_text_from_stream_chunk(chunk) -> Optional[str]:
569
+ if chunk is None:
570
+ return None
571
+ text = getattr(chunk, "text", None)
572
+ if text:
573
+ return text
574
+
575
+ candidates = getattr(chunk, "candidates", None)
576
+ if not candidates and isinstance(chunk, dict):
577
+ candidates = chunk.get("candidates")
578
+ if not candidates:
579
+ return None
580
+
581
+ candidate = candidates[0]
582
+ content = getattr(candidate, "content", None)
583
+ if content is None and isinstance(candidate, dict):
584
+ content = candidate.get("content")
585
+ if not content:
586
+ return None
587
+
588
+ parts = getattr(content, "parts", None)
589
+ if parts is None and isinstance(content, dict):
590
+ parts = content.get("parts")
591
+ if parts and isinstance(parts, list):
592
+ fragments: List[str] = []
593
+ for part in parts:
594
+ piece = getattr(part, "text", None)
595
+ if piece is None and isinstance(part, dict):
596
+ piece = part.get("text")
597
+ if piece:
598
+ fragments.append(piece)
599
+ return "".join(fragments) if fragments else None
600
+
601
+ final_text = getattr(content, "text", None)
602
+ if final_text is None and isinstance(content, dict):
603
+ final_text = content.get("text")
604
+ return final_text
605
+
606
+ def infer_stream(
607
+ self,
608
+ messages: List[Dict[str, str]],
609
+ temperature: Optional[float],
610
+ max_tokens: Optional[int],
611
+ return_json: bool,
612
+ custom_format: Any = None,
613
+ max_retries: int = 5,
614
+ retry_delay: int = 5,
615
+ ):
616
+ if return_json:
617
+ raise ValueError("Gemini JSON mode not supported here")
618
+
619
+ contents = ""
620
+ for m in messages:
621
+ if m["role"] in ("user", "system"):
622
+ contents += f"{m['content']}\n"
623
+
624
+ retries = 0
625
+ while retries < max_retries:
626
+ try:
627
+ gen_cfg: Dict[str, Any] = {
628
+ "temperature": (temperature if temperature is not None else 0)
629
+ }
630
+ if max_tokens is not None:
631
+ gen_cfg["max_output_tokens"] = max_tokens
632
+
633
+ model = self._build_model()
634
+ stream = model.generate_content(
635
+ contents, generation_config=gen_cfg, stream=True
636
+ )
637
+ for chunk in stream:
638
+ text = self._extract_text_from_stream_chunk(chunk)
639
+ if text:
640
+ yield text
641
+ return
642
+ except Exception as e: # noqa: BLE001
643
+ self.logger.error(f"Gemini streaming error: {e}")
644
+ retries += 1
645
+ time.sleep(retry_delay * retries)
646
+
647
+ yield from super().infer_stream(
648
+ messages,
649
+ temperature,
650
+ max_tokens,
651
+ return_json,
652
+ custom_format,
653
+ max_retries,
654
+ retry_delay,
655
+ )
656
+
657
+
658
+ class OpenRouterClient(BaseLLMClient):
659
+ """
660
+ OpenRouter client using OpenAI SDK with custom base URL.
661
+ Supports models from OpenAI, Anthropic, Google, Qwen, and many other providers.
662
+ """
663
+
664
+ # Default model limits based on latest OpenRouter offerings
665
+ DEFAULT_MODEL_LIMITS: Dict[str, Dict[str, int]] = {
666
+ "openai/gpt-5": {"max_output": 128_000, "context_window": 400_000},
667
+ "openai/gpt-5-codex": {"max_output": 128_000, "context_window": 400_000},
668
+ "google/gemini-2.5-flash": {"max_output": 65_536, "context_window": 1_000_000},
669
+ "google/gemini-2.5-pro": {"max_output": 65_536, "context_window": 1_000_000},
670
+ "anthropic/claude-sonnet-4.5": {"max_output": 16_384, "context_window": 1_000_000},
671
+ }
672
+
673
+ def __init__(self, model_id: str, logger):
674
+ try:
675
+ from openai import OpenAI as _OpenAI # type: ignore
676
+ import openai as _openai # type: ignore
677
+ except Exception as e: # pragma: no cover
678
+ raise RuntimeError("openai client is not available") from e
679
+
680
+ self._OpenAI = _OpenAI
681
+ self._openai = _openai
682
+ self.model_name = model_id
683
+ self.logger = logger
684
+
685
+ api_key = os.getenv("OPENROUTER_API_KEY")
686
+ if not api_key:
687
+ raise ValueError("OPENROUTER_API_KEY not set")
688
+
689
+ # Optional headers for OpenRouter
690
+ default_headers = {}
691
+ if site_url := os.getenv("OPENROUTER_SITE_URL"):
692
+ default_headers["HTTP-Referer"] = site_url
693
+ if site_name := os.getenv("OPENROUTER_SITE_NAME"):
694
+ default_headers["X-Title"] = site_name
695
+
696
+ self.client = self._OpenAI(
697
+ base_url="https://openrouter.ai/api/v1",
698
+ api_key=api_key,
699
+ default_headers=default_headers if default_headers else None,
700
+ )
701
+
702
+ # Load env overrides for model limits
703
+ env_limits_raw = os.getenv("OPENROUTER_DEFAULT_MODEL_LIMITS")
704
+ self._default_limits: Dict[str, Dict[str, int]] = (
705
+ self.DEFAULT_MODEL_LIMITS.copy()
706
+ )
707
+ if env_limits_raw:
708
+ try:
709
+ env_limits = _json.loads(env_limits_raw)
710
+ for k, v in env_limits.items():
711
+ if isinstance(v, dict):
712
+ base = self._default_limits.get(k, {}).copy()
713
+ base.update(
714
+ {
715
+ kk: int(vv)
716
+ for kk, vv in v.items()
717
+ if isinstance(vv, (int, float, str))
718
+ }
719
+ )
720
+ self._default_limits[k] = base
721
+ except Exception:
722
+ pass
723
+
724
+ def _resolve_default_max_tokens(self, model_id: str) -> Optional[int]:
725
+ """Resolve default max tokens for a model."""
726
+ # Highest priority: explicit env per-model tokens mapping
727
+ mapping_raw = os.getenv("OPENROUTER_MAX_TOKENS_BY_MODEL")
728
+ mapping: Dict[str, Any] = {}
729
+ if mapping_raw:
730
+ try:
731
+ mapping = _json.loads(mapping_raw)
732
+ except Exception:
733
+ mapping = {}
734
+
735
+ if model_id in mapping:
736
+ try:
737
+ return int(mapping[model_id])
738
+ except Exception:
739
+ pass
740
+
741
+ # Check for prefix match
742
+ for k, v in mapping.items():
743
+ try:
744
+ if model_id.startswith(k):
745
+ return int(v)
746
+ except Exception:
747
+ continue
748
+
749
+ # Next: built-in/default-limits map
750
+ if model_id in self._default_limits:
751
+ return int(self._default_limits[model_id].get("max_output", 0)) or None
752
+
753
+ # Check for prefix match in default limits
754
+ for k, v in self._default_limits.items():
755
+ try:
756
+ if model_id.startswith(k):
757
+ return int(v.get("max_output", 0)) or None
758
+ except Exception:
759
+ continue
760
+
761
+ return None
762
+
763
+ def test_api(self) -> None:
764
+ """Test API connectivity with minimal token usage."""
765
+ test_messages = [{"role": "user", "content": "ping"}]
766
+ token_attempts = [1, 4, 16, 32]
767
+ last_error: Optional[Exception] = None
768
+
769
+ for tok in token_attempts:
770
+ try:
771
+ self.client.chat.completions.create(
772
+ model=self.model_name,
773
+ messages=test_messages,
774
+ max_tokens=tok,
775
+ temperature=0,
776
+ )
777
+ return
778
+ except Exception as e: # noqa: BLE001
779
+ last_error = e
780
+ msg = str(e).lower()
781
+ if (
782
+ "max_tokens" in msg
783
+ or "model output limit" in msg
784
+ or "finish the message" in msg
785
+ ) and tok != token_attempts[-1]:
786
+ continue
787
+ break
788
+
789
+ if last_error:
790
+ raise ValueError(f"OpenRouter API test failed: {last_error}")
791
+ raise ValueError("OpenRouter API test failed: unknown error")
792
+
793
+ def infer(
794
+ self,
795
+ messages: List[Dict[str, str]],
796
+ temperature: Optional[float],
797
+ max_tokens: Optional[int],
798
+ return_json: bool,
799
+ custom_format: Any = None,
800
+ max_retries: int = 5,
801
+ retry_delay: int = 5,
802
+ ) -> Optional[str]:
803
+ """Execute inference using OpenRouter."""
804
+ retries = 0
805
+ call_fn = (
806
+ self.client.chat.completions.parse
807
+ if custom_format is not None
808
+ else self.client.chat.completions.create
809
+ )
810
+
811
+ response_format = (
812
+ custom_format
813
+ if custom_format is not None
814
+ else ({"type": "json_object"} if return_json else None)
815
+ )
816
+
817
+ eff_max = (
818
+ max_tokens
819
+ if max_tokens is not None
820
+ else self._resolve_default_max_tokens(self.model_name)
821
+ )
822
+
823
+ while retries < max_retries:
824
+ try:
825
+ kwargs: Dict[str, Any] = {
826
+ "model": self.model_name,
827
+ "messages": messages,
828
+ }
829
+
830
+ if response_format is not None:
831
+ kwargs["response_format"] = response_format
832
+ if temperature is not None:
833
+ kwargs["temperature"] = temperature
834
+ if eff_max is not None:
835
+ kwargs["max_tokens"] = eff_max
836
+
837
+ resp = call_fn(**kwargs)
838
+
839
+ if custom_format is not None:
840
+ return resp.choices[0].message.parsed.model_dump()
841
+ return resp.choices[0].message.content
842
+
843
+ except self._openai.RateLimitError: # type: ignore[attr-defined]
844
+ self.logger.warning(
845
+ f"Rate limit exceeded. Retrying in {retry_delay} seconds..."
846
+ )
847
+ retries += 1
848
+ time.sleep(retry_delay * retries)
849
+ except Exception as e: # noqa: BLE001
850
+ self.logger.error(f"OpenRouter error: {e}")
851
+ import traceback
852
+ traceback.print_exc()
853
+ break
854
+
855
+ self.logger.error("Max retries exceeded. Unable to complete the request.")
856
+ return None
@@ -327,13 +327,12 @@ def _start_server_for_port(port: int, **kwargs):
327
327
 
328
328
  print(f"🚀 Starting MCP server on port {port} with {len(tools)} tools...")
329
329
 
330
- # Create SMCP server with stateless mode for compatibility
330
+ # Create SMCP server for compatibility
331
331
  server = _get_smcp()(
332
332
  name=config["server_name"],
333
333
  auto_expose_tools=False, # We'll add tools manually
334
334
  search_enabled=True,
335
335
  max_workers=config.get("max_workers", 5),
336
- stateless_http=True, # Enable stateless mode for MCPAutoLoaderTool compatibility
337
336
  **kwargs,
338
337
  )
339
338
 
@@ -347,8 +346,9 @@ def _start_server_for_port(port: int, **kwargs):
347
346
  # Start server in background thread
348
347
  def run_server():
349
348
  try:
349
+ # Enable stateless mode for MCPAutoLoaderTool compatibility
350
350
  server.run_simple(
351
- transport=config["transport"], host=config["host"], port=port
351
+ transport=config["transport"], host=config["host"], port=port, stateless_http=True
352
352
  )
353
353
  except Exception as e:
354
354
  print(f"❌ Error running MCP server on port {port}: {e}")
@@ -0,0 +1,97 @@
1
+ import requests
2
+ from .base_tool import BaseTool
3
+ from .tool_registry import register_tool
4
+
5
+
6
+ @register_tool("MedRxivTool")
7
+ class MedRxivTool(BaseTool):
8
+ """
9
+ Search medRxiv preprints using medRxiv's API (same interface as bioRxiv).
10
+
11
+ Arguments:
12
+ query (str): Search term
13
+ max_results (int): Max results to return (default 10, max 200)
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ tool_config,
19
+ base_url="https://api.medrxiv.org/details",
20
+ ):
21
+ super().__init__(tool_config)
22
+ self.base_url = base_url
23
+
24
+ def run(self, arguments=None):
25
+ arguments = arguments or {}
26
+ query = arguments.get("query")
27
+ max_results = int(arguments.get("max_results", 10))
28
+ if not query:
29
+ return {"error": "`query` parameter is required."}
30
+ return self._search(query, max_results)
31
+
32
+ def _search(self, query, max_results):
33
+ # Use date range search for recent preprints
34
+ # Format: /medrxiv/{start_date}/{end_date}/{cursor}/json
35
+ from datetime import datetime, timedelta
36
+
37
+ # Search last 30 days
38
+ end_date = datetime.now()
39
+ start_date = end_date - timedelta(days=30)
40
+
41
+ url = (f"{self.base_url}/medrxiv/"
42
+ f"{start_date.strftime('%Y-%m-%d')}/"
43
+ f"{end_date.strftime('%Y-%m-%d')}/0/json")
44
+
45
+ try:
46
+ resp = requests.get(url, timeout=20)
47
+ resp.raise_for_status()
48
+ data = resp.json()
49
+ except requests.RequestException as e:
50
+ return {
51
+ "error": "Network/API error calling medRxiv",
52
+ "reason": str(e),
53
+ }
54
+ except ValueError:
55
+ return {"error": "Failed to decode medRxiv response as JSON"}
56
+
57
+ results = []
58
+ # The API returns a dictionary with a 'collection' key
59
+ collection = data.get("collection", [])
60
+ if not isinstance(collection, list):
61
+ return {"error": "Unexpected API response format"}
62
+
63
+ for item in collection:
64
+ title = item.get("title")
65
+ authors = item.get("authors", "")
66
+ if isinstance(authors, str):
67
+ authors = [a.strip() for a in authors.split(";") if a.strip()]
68
+ elif isinstance(authors, list):
69
+ authors = [str(a).strip() for a in authors if str(a).strip()]
70
+ else:
71
+ authors = []
72
+
73
+ year = None
74
+ date = item.get("date")
75
+ if date and len(date) >= 4 and date[:4].isdigit():
76
+ year = int(date[:4])
77
+
78
+ doi = item.get("doi")
79
+ url = f"https://www.medrxiv.org/content/{doi}" if doi else None
80
+
81
+ # Filter by query if provided
82
+ if query and query.lower() not in (title or "").lower():
83
+ continue
84
+
85
+ results.append(
86
+ {
87
+ "title": title,
88
+ "authors": authors,
89
+ "year": year,
90
+ "doi": doi,
91
+ "url": url,
92
+ "abstract": item.get("abstract", ""),
93
+ "source": "medRxiv",
94
+ }
95
+ )
96
+
97
+ return results[:max_results]