tooluniverse 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +56 -10
- tooluniverse/admetai_tool.py +8 -4
- tooluniverse/agentic_tool.py +40 -4
- tooluniverse/arxiv_tool.py +2 -6
- tooluniverse/base_tool.py +210 -25
- tooluniverse/biogrid_tool.py +118 -0
- tooluniverse/biorxiv_tool.py +35 -16
- tooluniverse/build_optimizer.py +87 -0
- tooluniverse/cache/__init__.py +3 -0
- tooluniverse/cache/memory_cache.py +99 -0
- tooluniverse/cache/result_cache_manager.py +235 -0
- tooluniverse/cache/sqlite_backend.py +257 -0
- tooluniverse/cellosaurus_tool.py +1332 -0
- tooluniverse/clinvar_tool.py +90 -0
- tooluniverse/compose_scripts/enhanced_multi_agent_literature_search.py +310 -0
- tooluniverse/compose_scripts/multi_agent_literature_search.py +794 -0
- tooluniverse/compose_scripts/tool_graph_generation.py +68 -35
- tooluniverse/compose_scripts/tool_metadata_generator.py +205 -105
- tooluniverse/compose_tool.py +93 -8
- tooluniverse/core_tool.py +46 -44
- tooluniverse/crossref_tool.py +89 -4
- tooluniverse/custom_tool.py +28 -0
- tooluniverse/data/agentic_tools.json +1271 -1179
- tooluniverse/data/alphafold_tools.json +356 -105
- tooluniverse/data/arxiv_tools.json +85 -81
- tooluniverse/data/biorxiv_tools.json +69 -64
- tooluniverse/data/cellosaurus_tools.json +260 -0
- tooluniverse/data/chembl_tools.json +27 -12
- tooluniverse/data/clinicaltrials_gov_tools.json +377 -302
- tooluniverse/data/compose_tools.json +123 -16
- tooluniverse/data/core_tools.json +104 -99
- tooluniverse/data/crossref_tools.json +131 -63
- tooluniverse/data/dailymed_tools.json +17 -3
- tooluniverse/data/dataset_tools.json +1031 -588
- tooluniverse/data/dblp_tools.json +135 -64
- tooluniverse/data/disease_target_score_tools.json +20 -10
- tooluniverse/data/doaj_tools.json +131 -87
- tooluniverse/data/drug_discovery_agents.json +292 -0
- tooluniverse/data/embedding_tools.json +362 -299
- tooluniverse/data/enrichr_tools.json +34 -27
- tooluniverse/data/europe_pmc_tools.json +107 -16
- tooluniverse/data/fatcat_tools.json +71 -66
- tooluniverse/data/fda_drug_adverse_event_tools.json +1061 -445
- tooluniverse/data/fda_drug_labeling_tools.json +6858 -6901
- tooluniverse/data/finder_tools.json +32 -37
- tooluniverse/data/gene_ontology_tools.json +19 -7
- tooluniverse/data/genomics_tools.json +174 -0
- tooluniverse/data/geo_tools.json +86 -0
- tooluniverse/data/gwas_tools.json +1720 -959
- tooluniverse/data/hal_tools.json +69 -64
- tooluniverse/data/hpa_tools.json +53 -14
- tooluniverse/data/humanbase_tools.json +51 -43
- tooluniverse/data/idmap_tools.json +76 -70
- tooluniverse/data/literature_search_tools.json +306 -0
- tooluniverse/data/markitdown_tools.json +51 -0
- tooluniverse/data/mcp_client_tools_example.json +122 -107
- tooluniverse/data/medlineplus_tools.json +50 -10
- tooluniverse/data/medrxiv_tools.json +69 -64
- tooluniverse/data/molecule_2d_tools.json +134 -0
- tooluniverse/data/molecule_3d_tools.json +164 -0
- tooluniverse/data/monarch_tools.json +112 -110
- tooluniverse/data/odphp_tools.json +389 -119
- tooluniverse/data/openaire_tools.json +89 -79
- tooluniverse/data/openalex_tools.json +96 -31
- tooluniverse/data/opentarget_tools.json +1457 -1372
- tooluniverse/data/osf_preprints_tools.json +77 -73
- tooluniverse/data/packages/bioinformatics_core_tools.json +40 -10
- tooluniverse/data/packages/cheminformatics_tools.json +20 -5
- tooluniverse/data/packages/genomics_tools.json +36 -9
- tooluniverse/data/packages/machine_learning_tools.json +36 -9
- tooluniverse/data/packages/scientific_computing_tools.json +20 -5
- tooluniverse/data/packages/single_cell_tools.json +20 -5
- tooluniverse/data/packages/structural_biology_tools.json +16 -4
- tooluniverse/data/packages/visualization_tools.json +20 -5
- tooluniverse/data/pmc_tools.json +108 -103
- tooluniverse/data/ppi_tools.json +139 -0
- tooluniverse/data/protein_structure_3d_tools.json +138 -0
- tooluniverse/data/pubchem_tools.json +37 -12
- tooluniverse/data/pubmed_tools.json +124 -58
- tooluniverse/data/pubtator_tools.json +68 -60
- tooluniverse/data/rcsb_pdb_tools.json +1532 -1221
- tooluniverse/data/semantic_scholar_tools.json +54 -22
- tooluniverse/data/special_tools.json +8 -6
- tooluniverse/data/tool_composition_tools.json +112 -82
- tooluniverse/data/unified_guideline_tools.json +909 -0
- tooluniverse/data/url_fetch_tools.json +102 -82
- tooluniverse/data/uspto_tools.json +49 -30
- tooluniverse/data/wikidata_sparql_tools.json +42 -39
- tooluniverse/data/xml_tools.json +3274 -3113
- tooluniverse/data/zenodo_tools.json +83 -76
- tooluniverse/dblp_tool.py +76 -6
- tooluniverse/dbsnp_tool.py +71 -0
- tooluniverse/default_config.py +19 -0
- tooluniverse/doaj_tool.py +76 -17
- tooluniverse/doctor.py +48 -0
- tooluniverse/ensembl_tool.py +61 -0
- tooluniverse/europe_pmc_tool.py +132 -17
- tooluniverse/exceptions.py +170 -0
- tooluniverse/execute_function.py +930 -387
- tooluniverse/fatcat_tool.py +0 -1
- tooluniverse/generate_tools.py +481 -0
- tooluniverse/genomics_gene_search_tool.py +56 -0
- tooluniverse/geo_tool.py +116 -0
- tooluniverse/gnomad_tool.py +63 -0
- tooluniverse/hal_tool.py +1 -1
- tooluniverse/llm_clients.py +101 -124
- tooluniverse/markitdown_tool.py +159 -0
- tooluniverse/mcp_client_tool.py +10 -5
- tooluniverse/mcp_tool_registry.py +4 -1
- tooluniverse/medrxiv_tool.py +32 -13
- tooluniverse/memory_manager.py +166 -0
- tooluniverse/molecule_2d_tool.py +274 -0
- tooluniverse/molecule_3d_tool.py +441 -0
- tooluniverse/odphp_tool.py +49 -14
- tooluniverse/openaire_tool.py +5 -20
- tooluniverse/openalex_tool.py +34 -0
- tooluniverse/osf_preprints_tool.py +1 -1
- tooluniverse/pmc_tool.py +54 -56
- tooluniverse/protein_structure_3d_tool.py +295 -0
- tooluniverse/pubmed_tool.py +69 -6
- tooluniverse/remote/boltz/boltz_mcp_server.py +3 -1
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +3 -1
- tooluniverse/semantic_scholar_tool.py +40 -10
- tooluniverse/smcp.py +149 -213
- tooluniverse/smcp_server.py +97 -55
- tooluniverse/string_tool.py +112 -0
- tooluniverse/tool_registry.py +35 -3
- tooluniverse/tools/ADMETAI_predict_BBB_penetrance.py +46 -0
- tooluniverse/tools/ADMETAI_predict_CYP_interactions.py +46 -0
- tooluniverse/tools/ADMETAI_predict_bioavailability.py +46 -0
- tooluniverse/tools/ADMETAI_predict_clearance_distribution.py +49 -0
- tooluniverse/tools/ADMETAI_predict_nuclear_receptor_activity.py +49 -0
- tooluniverse/tools/ADMETAI_predict_physicochemical_properties.py +49 -0
- tooluniverse/tools/ADMETAI_predict_solubility_lipophilicity_hydration.py +49 -0
- tooluniverse/tools/ADMETAI_predict_stress_response.py +46 -0
- tooluniverse/tools/ADMETAI_predict_toxicity.py +46 -0
- tooluniverse/tools/ADMETAnalyzerAgent.py +59 -0
- tooluniverse/tools/AdvancedCodeQualityAnalyzer.py +63 -0
- tooluniverse/tools/AdverseEventICDMapper.py +46 -0
- tooluniverse/tools/AdverseEventPredictionQuestionGenerator.py +52 -0
- tooluniverse/tools/AdverseEventPredictionQuestionGeneratorWithContext.py +59 -0
- tooluniverse/tools/ArXiv_search_papers.py +63 -0
- tooluniverse/tools/ArgumentDescriptionOptimizer.py +55 -0
- tooluniverse/tools/BioRxiv_search_preprints.py +52 -0
- tooluniverse/tools/BiomarkerDiscoveryWorkflow.py +55 -0
- tooluniverse/tools/CMA_Guidelines_Search.py +52 -0
- tooluniverse/tools/CORE_search_papers.py +67 -0
- tooluniverse/tools/CallAgent.py +46 -0
- tooluniverse/tools/ChEMBL_search_similar_molecules.py +59 -0
- tooluniverse/tools/ClinVar_search_variants.py +52 -0
- tooluniverse/tools/ClinicalTrialDesignAgent.py +63 -0
- tooluniverse/tools/CodeOptimizer.py +55 -0
- tooluniverse/tools/CodeQualityAnalyzer.py +71 -0
- tooluniverse/tools/CompoundDiscoveryAgent.py +59 -0
- tooluniverse/tools/ComprehensiveDrugDiscoveryPipeline.py +49 -0
- tooluniverse/tools/Crossref_search_works.py +55 -0
- tooluniverse/tools/DBLP_search_publications.py +52 -0
- tooluniverse/tools/DOAJ_search_articles.py +55 -0
- tooluniverse/tools/DailyMed_get_spl_by_setid.py +52 -0
- tooluniverse/tools/DailyMed_search_spls.py +79 -0
- tooluniverse/tools/DataAnalysisValidityReviewer.py +49 -0
- tooluniverse/tools/DescriptionAnalyzer.py +55 -0
- tooluniverse/tools/DescriptionQualityEvaluator.py +59 -0
- tooluniverse/tools/DiseaseAnalyzerAgent.py +52 -0
- tooluniverse/tools/DomainExpertValidator.py +63 -0
- tooluniverse/tools/DrugInteractionAnalyzerAgent.py +52 -0
- tooluniverse/tools/DrugOptimizationAgent.py +63 -0
- tooluniverse/tools/DrugSafetyAnalyzer.py +59 -0
- tooluniverse/tools/Ensembl_lookup_gene_by_symbol.py +52 -0
- tooluniverse/tools/EthicalComplianceReviewer.py +49 -0
- tooluniverse/tools/EuropePMC_Guidelines_Search.py +52 -0
- tooluniverse/tools/EuropePMC_search_articles.py +52 -0
- tooluniverse/tools/ExperimentalDesignScorer.py +55 -0
- tooluniverse/tools/FAERS_count_additive_administration_routes.py +52 -0
- tooluniverse/tools/FAERS_count_additive_adverse_reactions.py +71 -0
- tooluniverse/tools/FAERS_count_additive_event_reports_by_country.py +63 -0
- tooluniverse/tools/FAERS_count_additive_reaction_outcomes.py +63 -0
- tooluniverse/tools/FAERS_count_additive_reports_by_reporter_country.py +63 -0
- tooluniverse/tools/FAERS_count_additive_seriousness_classification.py +63 -0
- tooluniverse/tools/FAERS_count_country_by_drug_event.py +63 -0
- tooluniverse/tools/FAERS_count_death_related_by_drug.py +49 -0
- tooluniverse/tools/FAERS_count_drug_routes_by_event.py +52 -0
- tooluniverse/tools/FAERS_count_drugs_by_drug_event.py +63 -0
- tooluniverse/tools/FAERS_count_outcomes_by_drug_event.py +63 -0
- tooluniverse/tools/FAERS_count_patient_age_distribution.py +49 -0
- tooluniverse/tools/FAERS_count_reactions_by_drug_event.py +71 -0
- tooluniverse/tools/FAERS_count_reportercountry_by_drug_event.py +63 -0
- tooluniverse/tools/FAERS_count_seriousness_by_drug_event.py +63 -0
- tooluniverse/tools/FDA_get_abuse_dependence_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_abuse_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_accessories_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_active_ingredient_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_adverse_reactions_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_alarms_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_animal_pharmacology_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_assembly_installation_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_boxed_warning_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_brand_name_generic_name.py +52 -0
- tooluniverse/tools/FDA_get_calibration_instructions_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_carcinogenic_mutagenic_fertility_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_child_safety_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_clinical_pharmacology_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_clinical_studies_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_contact_for_questions_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_contraindications_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_controlled_substance_DEA_schedule_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_dear_health_care_provider_letter_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_dependence_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_disposal_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_do_not_use_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_document_id_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_dosage_and_storage_information_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_dosage_forms_and_strengths_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_drug_generic_name.py +46 -0
- tooluniverse/tools/FDA_get_drug_interactions_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_drug_name_by_SPL_ID.py +55 -0
- tooluniverse/tools/FDA_get_drug_name_by_adverse_reaction.py +59 -0
- tooluniverse/tools/FDA_get_drug_name_by_calibration_instructions.py +59 -0
- tooluniverse/tools/FDA_get_drug_name_by_dependence_info.py +59 -0
- tooluniverse/tools/FDA_get_drug_name_by_document_id.py +55 -0
- tooluniverse/tools/FDA_get_drug_name_by_dosage_info.py +55 -0
- tooluniverse/tools/FDA_get_drug_name_by_environmental_warning.py +59 -0
- tooluniverse/tools/FDA_get_drug_name_by_inactive_ingredient.py +59 -0
- tooluniverse/tools/FDA_get_drug_name_by_info_on_conditions_for_doctor_consultation.py +55 -0
- tooluniverse/tools/FDA_get_drug_name_by_labor_and_delivery_info.py +59 -0
- tooluniverse/tools/FDA_get_drug_name_by_microbiology.py +59 -0
- tooluniverse/tools/FDA_get_drug_name_by_other_safety_info.py +55 -0
- tooluniverse/tools/FDA_get_drug_name_by_pharmacodynamics.py +59 -0
- tooluniverse/tools/FDA_get_drug_name_by_pharmacogenomics.py +59 -0
- tooluniverse/tools/FDA_get_drug_name_by_precautions.py +55 -0
- tooluniverse/tools/FDA_get_drug_name_by_pregnancy_or_breastfeeding_info.py +59 -0
- tooluniverse/tools/FDA_get_drug_name_by_principal_display_panel.py +59 -0
- tooluniverse/tools/FDA_get_drug_name_by_reference.py +55 -0
- tooluniverse/tools/FDA_get_drug_name_by_set_id.py +55 -0
- tooluniverse/tools/FDA_get_drug_name_by_stop_use_info.py +55 -0
- tooluniverse/tools/FDA_get_drug_name_by_storage_and_handling_info.py +55 -0
- tooluniverse/tools/FDA_get_drug_name_by_warnings.py +55 -0
- tooluniverse/tools/FDA_get_drug_name_from_patient_package_insert.py +59 -0
- tooluniverse/tools/FDA_get_drug_names_by_abuse_dependence_info.py +55 -0
- tooluniverse/tools/FDA_get_drug_names_by_abuse_info.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_accessories.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_active_ingredient.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_alarm.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_animal_pharmacology_info.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_application_number_NDC_number.py +59 -0
- tooluniverse/tools/FDA_get_drug_names_by_assembly_installation_info.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_boxed_warning.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_child_safety_info.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_clinical_pharmacology.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_clinical_studies.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_consulting_doctor_pharmacist_info.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_contraindications.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_controlled_substance_DEA_schedule.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_dear_health_care_provider_letter_info.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_disposal_info.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_dosage_forms_and_strengths_info.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_drug_interactions.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_effective_time.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_food_safety_warnings.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_general_precautions.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_geriatric_use.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_health_claim.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_indication.py +55 -0
- tooluniverse/tools/FDA_get_drug_names_by_info_for_nursing_mothers.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_information_for_owners_or_caregivers.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_ingredient.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_instructions_for_use.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_lab_test_interference.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_lab_tests.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_mechanism_of_action.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_medication_guide.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_nonclinical_toxicology_info.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_nonteratogenic_effects.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_overdosage_info.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_pediatric_use.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_pharmacokinetics.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_population_use.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_pregnancy_effects_info.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_residue_warning.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_risk.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_route.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_safe_handling_warning.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_safety_summary.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_spl_indexing_data_elements.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_teratogenic_effects.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_user_safety_warning.py +63 -0
- tooluniverse/tools/FDA_get_drug_names_by_warnings_and_cautions.py +63 -0
- tooluniverse/tools/FDA_get_drugs_by_carcinogenic_mutagenic_fertility.py +63 -0
- tooluniverse/tools/FDA_get_effective_time_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_environmental_warning_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_general_precautions_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_geriatric_use_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_health_claims_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_inactive_ingredient_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_indications_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_info_for_nursing_mothers_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_info_for_patients_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_info_on_conditions_for_doctor_consultation_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_info_on_consulting_doctor_pharmacist_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_information_for_owners_or_caregivers_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_ingredients_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_instructions_for_use_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_lab_test_interference_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_lab_tests_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_labor_and_delivery_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_manufacturer_name_NDC_number_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_mechanism_of_action_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_medication_guide_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_microbiology_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_nonclinical_toxicology_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_nonteratogenic_effects_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_other_safety_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_overdosage_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_patient_package_insert_from_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_pediatric_use_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_pharmacodynamics_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_pharmacogenomics_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_pharmacokinetics_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_population_use_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_precautions_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_pregnancy_effects_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_pregnancy_or_breastfeeding_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_principal_display_panel_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_purpose_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_recent_changes_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_reference_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_residue_warning_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_risk_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_route_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_safe_handling_warnings_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_safety_summary_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_spl_indexing_data_elements_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_spl_unclassified_section_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_stop_use_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_storage_and_handling_info_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_teratogenic_effects_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_user_safety_warning_by_drug_names.py +55 -0
- tooluniverse/tools/FDA_get_warnings_and_cautions_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_warnings_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_get_when_using_info.py +55 -0
- tooluniverse/tools/FDA_retrieve_device_use_by_drug_name.py +55 -0
- tooluniverse/tools/FDA_retrieve_drug_name_by_device_use.py +59 -0
- tooluniverse/tools/FDA_retrieve_drug_names_by_patient_medication_info.py +55 -0
- tooluniverse/tools/FDA_retrieve_patient_medication_info_by_drug_name.py +55 -0
- tooluniverse/tools/Fatcat_search_scholar.py +52 -0
- tooluniverse/tools/Finish.py +44 -0
- tooluniverse/tools/GIN_Guidelines_Search.py +52 -0
- tooluniverse/tools/GO_get_annotations_for_gene.py +46 -0
- tooluniverse/tools/GO_get_genes_for_term.py +55 -0
- tooluniverse/tools/GO_get_term_by_id.py +46 -0
- tooluniverse/tools/GO_get_term_details.py +46 -0
- tooluniverse/tools/GO_search_terms.py +46 -0
- tooluniverse/tools/GWAS_search_associations_by_gene.py +52 -0
- tooluniverse/tools/HAL_search_archive.py +52 -0
- tooluniverse/tools/HPA_get_biological_processes_by_gene.py +52 -0
- tooluniverse/tools/HPA_get_cancer_prognostics_by_gene.py +49 -0
- tooluniverse/tools/HPA_get_comparative_expression_by_gene_and_cellline.py +52 -0
- tooluniverse/tools/HPA_get_comprehensive_gene_details_by_ensembl_id.py +63 -0
- tooluniverse/tools/HPA_get_contextual_biological_process_analysis.py +52 -0
- tooluniverse/tools/HPA_get_disease_expression_by_gene_tissue_disease.py +59 -0
- tooluniverse/tools/HPA_get_gene_basic_info_by_ensembl_id.py +49 -0
- tooluniverse/tools/HPA_get_gene_tsv_data_by_ensembl_id.py +49 -0
- tooluniverse/tools/HPA_get_protein_interactions_by_gene.py +49 -0
- tooluniverse/tools/HPA_get_rna_expression_by_source.py +59 -0
- tooluniverse/tools/HPA_get_rna_expression_in_specific_tissues.py +52 -0
- tooluniverse/tools/HPA_get_subcellular_location.py +46 -0
- tooluniverse/tools/HPA_search_genes_by_query.py +49 -0
- tooluniverse/tools/HypothesisGenerator.py +63 -0
- tooluniverse/tools/LabelGenerator.py +67 -0
- tooluniverse/tools/LiteratureContextReviewer.py +55 -0
- tooluniverse/tools/LiteratureSearchTool.py +49 -0
- tooluniverse/tools/LiteratureSynthesisAgent.py +59 -0
- tooluniverse/tools/MedRxiv_search_preprints.py +52 -0
- tooluniverse/tools/MedicalLiteratureReviewer.py +71 -0
- tooluniverse/tools/MedicalTermNormalizer.py +46 -0
- tooluniverse/tools/MedlinePlus_connect_lookup_by_code.py +67 -0
- tooluniverse/tools/MedlinePlus_get_genetics_condition_by_name.py +52 -0
- tooluniverse/tools/MedlinePlus_get_genetics_gene_by_name.py +52 -0
- tooluniverse/tools/MedlinePlus_get_genetics_index.py +44 -0
- tooluniverse/tools/MedlinePlus_search_topics_by_keyword.py +55 -0
- tooluniverse/tools/MethodologyRigorReviewer.py +49 -0
- tooluniverse/tools/MultiAgentLiteratureSearch.py +59 -0
- tooluniverse/tools/NICE_Clinical_Guidelines_Search.py +52 -0
- tooluniverse/tools/NICE_Guideline_Full_Text.py +46 -0
- tooluniverse/tools/NoveltySignificanceReviewer.py +59 -0
- tooluniverse/tools/OSF_search_preprints.py +59 -0
- tooluniverse/tools/OSL_get_efo_id_by_disease_name.py +46 -0
- tooluniverse/tools/OpenAIRE_search_publications.py +55 -0
- tooluniverse/tools/OpenAlex_Guidelines_Search.py +63 -0
- tooluniverse/tools/OpenTargets_drug_pharmacogenomics_data.py +52 -0
- tooluniverse/tools/OpenTargets_get_approved_indications_by_drug_chemblId.py +49 -0
- tooluniverse/tools/OpenTargets_get_associated_diseases_by_drug_chemblId.py +49 -0
- tooluniverse/tools/OpenTargets_get_associated_drugs_by_disease_efoId.py +52 -0
- tooluniverse/tools/OpenTargets_get_associated_drugs_by_target_ensemblID.py +55 -0
- tooluniverse/tools/OpenTargets_get_associated_phenotypes_by_disease_efoId.py +49 -0
- tooluniverse/tools/OpenTargets_get_associated_targets_by_disease_efoId.py +49 -0
- tooluniverse/tools/OpenTargets_get_associated_targets_by_drug_chemblId.py +49 -0
- tooluniverse/tools/OpenTargets_get_biological_mouse_models_by_ensemblID.py +49 -0
- tooluniverse/tools/OpenTargets_get_chemical_probes_by_target_ensemblID.py +49 -0
- tooluniverse/tools/OpenTargets_get_disease_ancestors_parents_by_efoId.py +49 -0
- tooluniverse/tools/OpenTargets_get_disease_descendants_children_by_efoId.py +49 -0
- tooluniverse/tools/OpenTargets_get_disease_description_by_efoId.py +49 -0
- tooluniverse/tools/OpenTargets_get_disease_id_description_by_name.py +49 -0
- tooluniverse/tools/OpenTargets_get_disease_ids_by_efoId.py +46 -0
- tooluniverse/tools/OpenTargets_get_disease_ids_by_name.py +46 -0
- tooluniverse/tools/OpenTargets_get_disease_locations_by_efoId.py +49 -0
- tooluniverse/tools/OpenTargets_get_disease_synonyms_by_efoId.py +49 -0
- tooluniverse/tools/OpenTargets_get_disease_therapeutic_areas_by_efoId.py +49 -0
- tooluniverse/tools/OpenTargets_get_diseases_phenotypes_by_target_ensembl.py +49 -0
- tooluniverse/tools/OpenTargets_get_drug_adverse_events_by_chemblId.py +52 -0
- tooluniverse/tools/OpenTargets_get_drug_approval_status_by_chemblId.py +49 -0
- tooluniverse/tools/OpenTargets_get_drug_chembId_by_generic_name.py +49 -0
- tooluniverse/tools/OpenTargets_get_drug_description_by_chemblId.py +49 -0
- tooluniverse/tools/OpenTargets_get_drug_id_description_by_name.py +49 -0
- tooluniverse/tools/OpenTargets_get_drug_indications_by_chemblId.py +49 -0
- tooluniverse/tools/OpenTargets_get_drug_mechanisms_of_action_by_chemblId.py +49 -0
- tooluniverse/tools/OpenTargets_get_drug_synonyms_by_chemblId.py +49 -0
- tooluniverse/tools/OpenTargets_get_drug_trade_names_by_chemblId.py +49 -0
- tooluniverse/tools/OpenTargets_get_drug_warnings_by_chemblId.py +49 -0
- tooluniverse/tools/OpenTargets_get_drug_withdrawn_blackbox_status_by_chemblId.py +49 -0
- tooluniverse/tools/OpenTargets_get_gene_ontology_terms_by_goID.py +49 -0
- tooluniverse/tools/OpenTargets_get_known_drugs_by_drug_chemblId.py +49 -0
- tooluniverse/tools/OpenTargets_get_parent_child_molecules_by_drug_chembl_ID.py +49 -0
- tooluniverse/tools/OpenTargets_get_publications_by_disease_efoId.py +71 -0
- tooluniverse/tools/OpenTargets_get_publications_by_drug_chemblId.py +71 -0
- tooluniverse/tools/OpenTargets_get_publications_by_target_ensemblID.py +71 -0
- tooluniverse/tools/OpenTargets_get_similar_entities_by_disease_efoId.py +55 -0
- tooluniverse/tools/OpenTargets_get_similar_entities_by_drug_chemblId.py +55 -0
- tooluniverse/tools/OpenTargets_get_similar_entities_by_target_ensemblID.py +55 -0
- tooluniverse/tools/OpenTargets_get_target_classes_by_ensemblID.py +49 -0
- tooluniverse/tools/OpenTargets_get_target_constraint_info_by_ensemblID.py +49 -0
- tooluniverse/tools/OpenTargets_get_target_enabling_packages_by_ensemblID.py +49 -0
- tooluniverse/tools/OpenTargets_get_target_gene_ontology_by_ensemblID.py +49 -0
- tooluniverse/tools/OpenTargets_get_target_genomic_location_by_ensemblID.py +49 -0
- tooluniverse/tools/OpenTargets_get_target_homologues_by_ensemblID.py +49 -0
- tooluniverse/tools/OpenTargets_get_target_id_description_by_name.py +49 -0
- tooluniverse/tools/OpenTargets_get_target_interactions_by_ensemblID.py +52 -0
- tooluniverse/tools/OpenTargets_get_target_safety_profile_by_ensemblID.py +49 -0
- tooluniverse/tools/OpenTargets_get_target_subcellular_locations_by_ensemblID.py +49 -0
- tooluniverse/tools/OpenTargets_get_target_synonyms_by_ensemblID.py +49 -0
- tooluniverse/tools/OpenTargets_get_target_tractability_by_ensemblID.py +49 -0
- tooluniverse/tools/OpenTargets_map_any_disease_id_to_all_other_ids.py +49 -0
- tooluniverse/tools/OpenTargets_multi_entity_search_by_query_string.py +59 -0
- tooluniverse/tools/OpenTargets_search_category_counts_by_query_string.py +49 -0
- tooluniverse/tools/OpenTargets_target_disease_evidence.py +52 -0
- tooluniverse/tools/OutputSummarizationComposer.py +71 -0
- tooluniverse/tools/PMC_search_papers.py +67 -0
- tooluniverse/tools/ProtocolOptimizer.py +49 -0
- tooluniverse/tools/PubChem_get_CID_by_SMILES.py +46 -0
- tooluniverse/tools/PubChem_get_CID_by_compound_name.py +46 -0
- tooluniverse/tools/PubChem_get_associated_patents_by_CID.py +46 -0
- tooluniverse/tools/PubChem_get_compound_2D_image_by_CID.py +52 -0
- tooluniverse/tools/PubChem_get_compound_properties_by_CID.py +46 -0
- tooluniverse/tools/PubChem_get_compound_synonyms_by_CID.py +46 -0
- tooluniverse/tools/PubChem_get_compound_xrefs_by_CID.py +52 -0
- tooluniverse/tools/PubChem_search_compounds_by_similarity.py +52 -0
- tooluniverse/tools/PubChem_search_compounds_by_substructure.py +49 -0
- tooluniverse/tools/PubMed_Guidelines_Search.py +55 -0
- tooluniverse/tools/PubMed_search_articles.py +55 -0
- tooluniverse/tools/PubTator3_EntityAutocomplete.py +59 -0
- tooluniverse/tools/PubTator3_LiteratureSearch.py +55 -0
- tooluniverse/tools/QuestionRephraser.py +52 -0
- tooluniverse/tools/Reactome_get_pathway_reactions.py +46 -0
- tooluniverse/tools/ReproducibilityTransparencyReviewer.py +49 -0
- tooluniverse/tools/ResultsInterpretationReviewer.py +55 -0
- tooluniverse/tools/ScientificTextSummarizer.py +59 -0
- tooluniverse/tools/SemanticScholar_search_papers.py +55 -0
- tooluniverse/tools/TRIP_Database_Guidelines_Search.py +55 -0
- tooluniverse/tools/TestCaseGenerator.py +46 -0
- tooluniverse/tools/ToolCompatibilityAnalyzer.py +59 -0
- tooluniverse/tools/ToolDescriptionOptimizer.py +67 -0
- tooluniverse/tools/ToolDiscover.py +63 -0
- tooluniverse/tools/ToolGraphComposer.py +71 -0
- tooluniverse/tools/ToolGraphGenerationPipeline.py +63 -0
- tooluniverse/tools/ToolImplementationGenerator.py +67 -0
- tooluniverse/tools/ToolMetadataGenerationPipeline.py +63 -0
- tooluniverse/tools/ToolMetadataGenerator.py +55 -0
- tooluniverse/tools/ToolMetadataStandardizer.py +52 -0
- tooluniverse/tools/ToolOptimizer.py +59 -0
- tooluniverse/tools/ToolOutputSummarizer.py +67 -0
- tooluniverse/tools/ToolQualityEvaluator.py +59 -0
- tooluniverse/tools/ToolRelationshipDetector.py +52 -0
- tooluniverse/tools/ToolSpecificationGenerator.py +67 -0
- tooluniverse/tools/ToolSpecificationOptimizer.py +63 -0
- tooluniverse/tools/Tool_Finder.py +67 -0
- tooluniverse/tools/Tool_Finder_Keyword.py +67 -0
- tooluniverse/tools/Tool_Finder_LLM.py +67 -0
- tooluniverse/tools/Tool_RAG.py +49 -0
- tooluniverse/tools/UCSC_get_genes_by_region.py +67 -0
- tooluniverse/tools/UniProt_get_alternative_names_by_accession.py +49 -0
- tooluniverse/tools/UniProt_get_disease_variants_by_accession.py +49 -0
- tooluniverse/tools/UniProt_get_entry_by_accession.py +49 -0
- tooluniverse/tools/UniProt_get_function_by_accession.py +49 -0
- tooluniverse/tools/UniProt_get_isoform_ids_by_accession.py +49 -0
- tooluniverse/tools/UniProt_get_organism_by_accession.py +49 -0
- tooluniverse/tools/UniProt_get_ptm_processing_by_accession.py +49 -0
- tooluniverse/tools/UniProt_get_recommended_name_by_accession.py +49 -0
- tooluniverse/tools/UniProt_get_sequence_by_accession.py +49 -0
- tooluniverse/tools/UniProt_get_subcellular_location_by_accession.py +49 -0
- tooluniverse/tools/Unpaywall_check_oa_status.py +52 -0
- tooluniverse/tools/WHO_Guideline_Full_Text.py +46 -0
- tooluniverse/tools/WHO_Guidelines_Search.py +52 -0
- tooluniverse/tools/Wikidata_SPARQL_query.py +52 -0
- tooluniverse/tools/WritingPresentationReviewer.py +49 -0
- tooluniverse/tools/Zenodo_search_records.py +59 -0
- tooluniverse/tools/__init__.py +1770 -0
- tooluniverse/tools/_shared_client.py +138 -0
- tooluniverse/tools/alphafold_get_annotations.py +52 -0
- tooluniverse/tools/alphafold_get_prediction.py +55 -0
- tooluniverse/tools/alphafold_get_summary.py +46 -0
- tooluniverse/tools/call_agentic_human.py +46 -0
- tooluniverse/tools/cancer_biomarkers_disease_target_score.py +52 -0
- tooluniverse/tools/cancer_gene_census_disease_target_score.py +52 -0
- tooluniverse/tools/cellosaurus_get_cell_line_info.py +55 -0
- tooluniverse/tools/cellosaurus_query_converter.py +52 -0
- tooluniverse/tools/cellosaurus_search_cell_lines.py +55 -0
- tooluniverse/tools/chembl_disease_target_score.py +52 -0
- tooluniverse/tools/convert_to_markdown.py +59 -0
- tooluniverse/tools/dbSNP_get_variant_by_rsid.py +46 -0
- tooluniverse/tools/dict_search.py +67 -0
- tooluniverse/tools/dili_search.py +67 -0
- tooluniverse/tools/diqt_search.py +67 -0
- tooluniverse/tools/disease_target_score.py +59 -0
- tooluniverse/tools/drugbank_filter_drugs_by_name.py +55 -0
- tooluniverse/tools/drugbank_full_search.py +67 -0
- tooluniverse/tools/drugbank_get_drug_basic_info_by_drug_name_or_drugbank_id.py +63 -0
- tooluniverse/tools/drugbank_get_drug_chemistry_by_drug_name_or_drugbank_id.py +63 -0
- tooluniverse/tools/drugbank_get_drug_interactions_by_drug_name_or_drugbank_id.py +63 -0
- tooluniverse/tools/drugbank_get_drug_name_and_description_by_indication.py +63 -0
- tooluniverse/tools/drugbank_get_drug_name_and_description_by_pathway_name.py +63 -0
- tooluniverse/tools/drugbank_get_drug_name_and_description_by_target_name.py +63 -0
- tooluniverse/tools/drugbank_get_drug_name_description_pharmacology_by_mechanism_of_action.py +63 -0
- tooluniverse/tools/drugbank_get_drug_pathways_and_reactions_by_drug_name_or_drugbank_id.py +63 -0
- tooluniverse/tools/drugbank_get_drug_products_by_name_or_drugbank_id.py +63 -0
- tooluniverse/tools/drugbank_get_drug_references_by_drug_name_or_drugbank_id.py +63 -0
- tooluniverse/tools/drugbank_get_indications_by_drug_name_or_drugbank_id.py +63 -0
- tooluniverse/tools/drugbank_get_pharmacology_by_drug_name_or_drugbank_id.py +63 -0
- tooluniverse/tools/drugbank_get_safety_by_drug_name_or_drugbank_id.py +63 -0
- tooluniverse/tools/drugbank_get_targets_by_drug_name_or_drugbank_id.py +63 -0
- tooluniverse/tools/drugbank_links_search.py +67 -0
- tooluniverse/tools/drugbank_vocab_filter.py +63 -0
- tooluniverse/tools/drugbank_vocab_search.py +67 -0
- tooluniverse/tools/embedding_database_add.py +63 -0
- tooluniverse/tools/embedding_database_create.py +71 -0
- tooluniverse/tools/embedding_database_load.py +63 -0
- tooluniverse/tools/embedding_database_search.py +67 -0
- tooluniverse/tools/embedding_sync_download.py +63 -0
- tooluniverse/tools/embedding_sync_upload.py +71 -0
- tooluniverse/tools/enrichr_gene_enrichment_analysis.py +52 -0
- tooluniverse/tools/europepmc_disease_target_score.py +52 -0
- tooluniverse/tools/eva_disease_target_score.py +52 -0
- tooluniverse/tools/eva_somatic_disease_target_score.py +52 -0
- tooluniverse/tools/expression_atlas_disease_target_score.py +52 -0
- tooluniverse/tools/extract_clinical_trial_adverse_events.py +59 -0
- tooluniverse/tools/extract_clinical_trial_outcomes.py +52 -0
- tooluniverse/tools/genomics_england_disease_target_score.py +52 -0
- tooluniverse/tools/get_HPO_ID_by_phenotype.py +55 -0
- tooluniverse/tools/get_albumentations_info.py +44 -0
- tooluniverse/tools/get_altair_info.py +44 -0
- tooluniverse/tools/get_anndata_info.py +49 -0
- tooluniverse/tools/get_arboreto_info.py +46 -0
- tooluniverse/tools/get_arxiv_info.py +46 -0
- tooluniverse/tools/get_ase_info.py +46 -0
- tooluniverse/tools/get_assembly_info_by_pdb_id.py +46 -0
- tooluniverse/tools/get_assembly_summary.py +46 -0
- tooluniverse/tools/get_astropy_info.py +44 -0
- tooluniverse/tools/get_binding_affinity_by_pdb_id.py +46 -0
- tooluniverse/tools/get_biopandas_info.py +49 -0
- tooluniverse/tools/get_biopython_info.py +49 -0
- tooluniverse/tools/get_bioservices_info.py +44 -0
- tooluniverse/tools/get_biotite_info.py +49 -0
- tooluniverse/tools/get_bokeh_info.py +44 -0
- tooluniverse/tools/get_brian2_info.py +44 -0
- tooluniverse/tools/get_cartopy_info.py +44 -0
- tooluniverse/tools/get_catboost_info.py +44 -0
- tooluniverse/tools/get_cellpose_info.py +49 -0
- tooluniverse/tools/get_cellrank_info.py +44 -0
- tooluniverse/tools/get_cellxgene_census_info.py +46 -0
- tooluniverse/tools/get_cftime_info.py +44 -0
- tooluniverse/tools/get_chem_comp_audit_info.py +46 -0
- tooluniverse/tools/get_chem_comp_charge_and_ambiguity.py +46 -0
- tooluniverse/tools/get_chembl_webresource_client_info.py +44 -0
- tooluniverse/tools/get_citation_info_by_pdb_id.py +46 -0
- tooluniverse/tools/get_clair3_info.py +46 -0
- tooluniverse/tools/get_clinical_trial_conditions_and_interventions.py +55 -0
- tooluniverse/tools/get_clinical_trial_descriptions.py +52 -0
- tooluniverse/tools/get_clinical_trial_eligibility_criteria.py +55 -0
- tooluniverse/tools/get_clinical_trial_locations.py +52 -0
- tooluniverse/tools/get_clinical_trial_outcome_measures.py +52 -0
- tooluniverse/tools/get_clinical_trial_references.py +52 -0
- tooluniverse/tools/get_clinical_trial_status_and_dates.py +52 -0
- tooluniverse/tools/get_cobra_info.py +46 -0
- tooluniverse/tools/get_cobrapy_info.py +46 -0
- tooluniverse/tools/get_cooler_info.py +49 -0
- tooluniverse/tools/get_core_refinement_statistics.py +46 -0
- tooluniverse/tools/get_cryosparc_tools_info.py +46 -0
- tooluniverse/tools/get_crystal_growth_conditions_by_pdb_id.py +49 -0
- tooluniverse/tools/get_crystallization_ph_by_pdb_id.py +46 -0
- tooluniverse/tools/get_crystallographic_properties_by_pdb_id.py +49 -0
- tooluniverse/tools/get_cupy_info.py +44 -0
- tooluniverse/tools/get_cyvcf2_info.py +49 -0
- tooluniverse/tools/get_dask_info.py +44 -0
- tooluniverse/tools/get_datamol_info.py +44 -0
- tooluniverse/tools/get_datashader_info.py +44 -0
- tooluniverse/tools/get_deepchem_info.py +49 -0
- tooluniverse/tools/get_deeppurpose_info.py +46 -0
- tooluniverse/tools/get_deeptools_info.py +46 -0
- tooluniverse/tools/get_deepxde_info.py +49 -0
- tooluniverse/tools/get_dendropy_info.py +44 -0
- tooluniverse/tools/get_descriptastorus_info.py +46 -0
- tooluniverse/tools/get_diffdock_info.py +46 -0
- tooluniverse/tools/get_dscribe_info.py +49 -0
- tooluniverse/tools/get_ec_number_by_entity_id.py +46 -0
- tooluniverse/tools/get_elephant_info.py +44 -0
- tooluniverse/tools/get_em_3d_fitting_and_reconstruction_details.py +49 -0
- tooluniverse/tools/get_emdb_ids_by_pdb_id.py +46 -0
- tooluniverse/tools/get_episcanpy_info.py +44 -0
- tooluniverse/tools/get_ete3_info.py +44 -0
- tooluniverse/tools/get_faiss_info.py +46 -0
- tooluniverse/tools/get_fanc_info.py +46 -0
- tooluniverse/tools/get_flask_info.py +46 -0
- tooluniverse/tools/get_flowio_info.py +46 -0
- tooluniverse/tools/get_flowkit_info.py +46 -0
- tooluniverse/tools/get_flowutils_info.py +46 -0
- tooluniverse/tools/get_freesasa_info.py +44 -0
- tooluniverse/tools/get_galpy_info.py +44 -0
- tooluniverse/tools/get_gene_name_by_entity_id.py +46 -0
- tooluniverse/tools/get_geopandas_info.py +44 -0
- tooluniverse/tools/get_gget_info.py +46 -0
- tooluniverse/tools/get_googlesearch_python_info.py +46 -0
- tooluniverse/tools/get_gseapy_info.py +49 -0
- tooluniverse/tools/get_h5py_info.py +46 -0
- tooluniverse/tools/get_harmony_pytorch_info.py +46 -0
- tooluniverse/tools/get_hmmlearn_info.py +46 -0
- tooluniverse/tools/get_holoviews_info.py +44 -0
- tooluniverse/tools/get_host_organism_by_pdb_id.py +46 -0
- tooluniverse/tools/get_htmd_info.py +44 -0
- tooluniverse/tools/get_hyperopt_info.py +49 -0
- tooluniverse/tools/get_igraph_info.py +49 -0
- tooluniverse/tools/get_imageio_info.py +44 -0
- tooluniverse/tools/get_imbalanced_learn_info.py +44 -0
- tooluniverse/tools/get_jcvi_info.py +46 -0
- tooluniverse/tools/get_joblib_info.py +44 -0
- tooluniverse/tools/get_joint_associated_diseases_by_HPO_ID_list.py +55 -0
- tooluniverse/tools/get_khmer_info.py +46 -0
- tooluniverse/tools/get_kipoiseq_info.py +44 -0
- tooluniverse/tools/get_lifelines_info.py +49 -0
- tooluniverse/tools/get_ligand_bond_count_by_pdb_id.py +46 -0
- tooluniverse/tools/get_ligand_smiles_by_chem_comp_id.py +49 -0
- tooluniverse/tools/get_lightgbm_info.py +44 -0
- tooluniverse/tools/get_loompy_info.py +46 -0
- tooluniverse/tools/get_mageck_info.py +46 -0
- tooluniverse/tools/get_matplotlib_info.py +49 -0
- tooluniverse/tools/get_mdanalysis_info.py +46 -0
- tooluniverse/tools/get_mdtraj_info.py +44 -0
- tooluniverse/tools/get_mne_info.py +44 -0
- tooluniverse/tools/get_molfeat_info.py +44 -0
- tooluniverse/tools/get_molvs_info.py +44 -0
- tooluniverse/tools/get_mordred_info.py +44 -0
- tooluniverse/tools/get_msprime_info.py +49 -0
- tooluniverse/tools/get_mudata_info.py +49 -0
- tooluniverse/tools/get_mutation_annotations_by_pdb_id.py +46 -0
- tooluniverse/tools/get_neo_info.py +44 -0
- tooluniverse/tools/get_netcdf4_info.py +44 -0
- tooluniverse/tools/get_networkx_info.py +46 -0
- tooluniverse/tools/get_nglview_info.py +44 -0
- tooluniverse/tools/get_nilearn_info.py +44 -0
- tooluniverse/tools/get_numba_info.py +46 -0
- tooluniverse/tools/get_numpy_info.py +46 -0
- tooluniverse/tools/get_oligosaccharide_descriptors_by_entity_id.py +49 -0
- tooluniverse/tools/get_openbabel_info.py +49 -0
- tooluniverse/tools/get_openchem_info.py +46 -0
- tooluniverse/tools/get_opencv_info.py +49 -0
- tooluniverse/tools/get_openmm_info.py +49 -0
- tooluniverse/tools/get_optlang_info.py +46 -0
- tooluniverse/tools/get_optuna_info.py +44 -0
- tooluniverse/tools/get_palantir_info.py +44 -0
- tooluniverse/tools/get_pandas_info.py +49 -0
- tooluniverse/tools/get_patsy_info.py +44 -0
- tooluniverse/tools/get_pdbfixer_info.py +46 -0
- tooluniverse/tools/get_phenotype_by_HPO_ID.py +46 -0
- tooluniverse/tools/get_pillow_info.py +44 -0
- tooluniverse/tools/get_plantcv_info.py +46 -0
- tooluniverse/tools/get_plip_info.py +46 -0
- tooluniverse/tools/get_plotly_info.py +44 -0
- tooluniverse/tools/get_poliastro_info.py +46 -0
- tooluniverse/tools/get_polymer_entity_annotations.py +49 -0
- tooluniverse/tools/get_polymer_entity_count_by_pdb_id.py +46 -0
- tooluniverse/tools/get_polymer_entity_ids_by_pdb_id.py +46 -0
- tooluniverse/tools/get_polymer_entity_type_by_entity_id.py +49 -0
- tooluniverse/tools/get_polymer_molecular_weight_by_entity_id.py +49 -0
- tooluniverse/tools/get_poretools_info.py +44 -0
- tooluniverse/tools/get_prody_info.py +46 -0
- tooluniverse/tools/get_protein_classification_by_pdb_id.py +49 -0
- tooluniverse/tools/get_protein_metadata_by_pdb_id.py +46 -0
- tooluniverse/tools/get_pubchempy_info.py +44 -0
- tooluniverse/tools/get_pybedtools_info.py +49 -0
- tooluniverse/tools/get_pybigwig_info.py +46 -0
- tooluniverse/tools/get_pydeseq2_info.py +46 -0
- tooluniverse/tools/get_pyensembl_info.py +44 -0
- tooluniverse/tools/get_pyephem_info.py +44 -0
- tooluniverse/tools/get_pyfaidx_info.py +49 -0
- tooluniverse/tools/get_pyfasta_info.py +44 -0
- tooluniverse/tools/get_pykalman_info.py +46 -0
- tooluniverse/tools/get_pyliftover_info.py +49 -0
- tooluniverse/tools/get_pymassspec_info.py +46 -0
- tooluniverse/tools/get_pymed_info.py +46 -0
- tooluniverse/tools/get_pymzml_info.py +46 -0
- tooluniverse/tools/get_pypdf2_info.py +46 -0
- tooluniverse/tools/get_pyranges_info.py +49 -0
- tooluniverse/tools/get_pyrosetta_info.py +44 -0
- tooluniverse/tools/get_pysam_info.py +46 -0
- tooluniverse/tools/get_pyscenic_info.py +46 -0
- tooluniverse/tools/get_pyscf_info.py +46 -0
- tooluniverse/tools/get_pyscreener_info.py +46 -0
- tooluniverse/tools/get_pytdc_info.py +46 -0
- tooluniverse/tools/get_python_libsbml_info.py +46 -0
- tooluniverse/tools/get_pytorch_info.py +49 -0
- tooluniverse/tools/get_pyvcf_info.py +44 -0
- tooluniverse/tools/get_pyvis_info.py +44 -0
- tooluniverse/tools/get_qutip_info.py +44 -0
- tooluniverse/tools/get_rasterio_info.py +44 -0
- tooluniverse/tools/get_rdkit_info.py +46 -0
- tooluniverse/tools/get_refinement_resolution_by_pdb_id.py +49 -0
- tooluniverse/tools/get_release_deposit_dates_by_pdb_id.py +49 -0
- tooluniverse/tools/get_reportlab_info.py +49 -0
- tooluniverse/tools/get_requests_info.py +49 -0
- tooluniverse/tools/get_ruptures_info.py +46 -0
- tooluniverse/tools/get_scanorama_info.py +44 -0
- tooluniverse/tools/get_scanpy_info.py +49 -0
- tooluniverse/tools/get_schnetpack_info.py +49 -0
- tooluniverse/tools/get_scholarly_info.py +46 -0
- tooluniverse/tools/get_scikit_bio_info.py +49 -0
- tooluniverse/tools/get_scikit_image_info.py +46 -0
- tooluniverse/tools/get_scikit_learn_info.py +49 -0
- tooluniverse/tools/get_scipy_info.py +46 -0
- tooluniverse/tools/get_scrublet_info.py +49 -0
- tooluniverse/tools/get_scvelo_info.py +49 -0
- tooluniverse/tools/get_scvi_tools_info.py +44 -0
- tooluniverse/tools/get_seaborn_info.py +49 -0
- tooluniverse/tools/get_sequence_by_pdb_id.py +46 -0
- tooluniverse/tools/get_sequence_lengths_by_pdb_id.py +46 -0
- tooluniverse/tools/get_sequence_positional_features_by_instance_id.py +49 -0
- tooluniverse/tools/get_skopt_info.py +44 -0
- tooluniverse/tools/get_souporcell_info.py +46 -0
- tooluniverse/tools/get_source_organism_by_pdb_id.py +46 -0
- tooluniverse/tools/get_space_group_by_pdb_id.py +46 -0
- tooluniverse/tools/get_statsmodels_info.py +49 -0
- tooluniverse/tools/get_structure_determination_software_by_pdb_id.py +49 -0
- tooluniverse/tools/get_structure_title_by_pdb_id.py +46 -0
- tooluniverse/tools/get_structure_validation_metrics_by_pdb_id.py +49 -0
- tooluniverse/tools/get_sunpy_info.py +44 -0
- tooluniverse/tools/get_sympy_info.py +46 -0
- tooluniverse/tools/get_target_cofactor_info.py +46 -0
- tooluniverse/tools/get_taxonomy_by_pdb_id.py +46 -0
- tooluniverse/tools/get_tiledb_info.py +46 -0
- tooluniverse/tools/get_tiledbsoma_info.py +46 -0
- tooluniverse/tools/get_torch_geometric_info.py +49 -0
- tooluniverse/tools/get_tqdm_info.py +46 -0
- tooluniverse/tools/get_trackpy_info.py +46 -0
- tooluniverse/tools/get_tskit_info.py +46 -0
- tooluniverse/tools/get_umap_learn_info.py +49 -0
- tooluniverse/tools/get_uniprot_accession_by_entity_id.py +49 -0
- tooluniverse/tools/get_velocyto_info.py +44 -0
- tooluniverse/tools/get_viennarna_info.py +49 -0
- tooluniverse/tools/get_webpage_text_from_url.py +52 -0
- tooluniverse/tools/get_webpage_title.py +49 -0
- tooluniverse/tools/get_xarray_info.py +44 -0
- tooluniverse/tools/get_xesmf_info.py +44 -0
- tooluniverse/tools/get_xgboost_info.py +44 -0
- tooluniverse/tools/get_zarr_info.py +44 -0
- tooluniverse/tools/gnomAD_query_variant.py +52 -0
- tooluniverse/tools/gwas_get_association_by_id.py +49 -0
- tooluniverse/tools/gwas_get_associations_for_snp.py +67 -0
- tooluniverse/tools/gwas_get_associations_for_study.py +55 -0
- tooluniverse/tools/gwas_get_associations_for_trait.py +55 -0
- tooluniverse/tools/gwas_get_snp_by_id.py +46 -0
- tooluniverse/tools/gwas_get_snps_for_gene.py +55 -0
- tooluniverse/tools/gwas_get_studies_for_trait.py +75 -0
- tooluniverse/tools/gwas_get_study_by_id.py +46 -0
- tooluniverse/tools/gwas_get_variants_for_trait.py +55 -0
- tooluniverse/tools/gwas_search_associations.py +75 -0
- tooluniverse/tools/gwas_search_snps.py +63 -0
- tooluniverse/tools/gwas_search_studies.py +75 -0
- tooluniverse/tools/humanbase_ppi_analysis.py +67 -0
- tooluniverse/tools/mesh_get_subjects_by_pharmacological_action.py +63 -0
- tooluniverse/tools/mesh_get_subjects_by_subject_id.py +63 -0
- tooluniverse/tools/mesh_get_subjects_by_subject_name.py +63 -0
- tooluniverse/tools/mesh_get_subjects_by_subject_scope_or_definition.py +63 -0
- tooluniverse/tools/odphp_itemlist.py +49 -0
- tooluniverse/tools/odphp_myhealthfinder.py +67 -0
- tooluniverse/tools/odphp_outlink_fetch.py +59 -0
- tooluniverse/tools/odphp_topicsearch.py +67 -0
- tooluniverse/tools/openalex_literature_search.py +67 -0
- tooluniverse/tools/reactome_disease_target_score.py +52 -0
- tooluniverse/tools/search_clinical_trials.py +67 -0
- tooluniverse/tools/visualize_molecule_2d.py +83 -0
- tooluniverse/tools/visualize_molecule_3d.py +91 -0
- tooluniverse/tools/visualize_protein_structure_3d.py +79 -0
- tooluniverse/ucsc_tool.py +60 -0
- tooluniverse/unified_guideline_tools.py +2328 -0
- tooluniverse/unpaywall_tool.py +0 -1
- tooluniverse/utils.py +122 -6
- tooluniverse/visualization_tool.py +897 -0
- tooluniverse/wikidata_sparql_tool.py +1 -2
- tooluniverse/zenodo_tool.py +3 -4
- {tooluniverse-1.0.6.dist-info → tooluniverse-1.0.8.dist-info}/METADATA +19 -4
- tooluniverse-1.0.8.dist-info/RECORD +891 -0
- {tooluniverse-1.0.6.dist-info → tooluniverse-1.0.8.dist-info}/entry_points.txt +3 -0
- tooluniverse/test/list_azure_openai_models.py +0 -210
- tooluniverse/test/mcp_server_test.py +0 -0
- tooluniverse/test/test_admetai_tool.py +0 -370
- tooluniverse/test/test_agentic_tool.py +0 -129
- tooluniverse/test/test_agentic_tool_azure_models.py +0 -91
- tooluniverse/test/test_alphafold_tool.py +0 -108
- tooluniverse/test/test_api_key_validation_min.py +0 -64
- tooluniverse/test/test_chem_tool.py +0 -37
- tooluniverse/test/test_claude_sdk.py +0 -93
- tooluniverse/test/test_compose_lieraturereview.py +0 -63
- tooluniverse/test/test_compose_tool.py +0 -448
- tooluniverse/test/test_dailymed.py +0 -69
- tooluniverse/test/test_dataset_tool.py +0 -200
- tooluniverse/test/test_disease_target_score.py +0 -56
- tooluniverse/test/test_drugbank_filter_examples.py +0 -179
- tooluniverse/test/test_efo.py +0 -31
- tooluniverse/test/test_enrichr_tool.py +0 -21
- tooluniverse/test/test_europe_pmc_tool.py +0 -20
- tooluniverse/test/test_fda_adv.py +0 -95
- tooluniverse/test/test_fda_drug_labeling.py +0 -91
- tooluniverse/test/test_gene_ontology_tools.py +0 -66
- tooluniverse/test/test_global_fallback.py +0 -288
- tooluniverse/test/test_gwas_tool.py +0 -139
- tooluniverse/test/test_hooks_direct.py +0 -219
- tooluniverse/test/test_hpa.py +0 -625
- tooluniverse/test/test_humanbase_tool.py +0 -20
- tooluniverse/test/test_idmap_tools.py +0 -61
- tooluniverse/test/test_list_built_in_tools.py +0 -33
- tooluniverse/test/test_mcp_server.py +0 -211
- tooluniverse/test/test_mcp_tool.py +0 -247
- tooluniverse/test/test_medlineplus.py +0 -220
- tooluniverse/test/test_odphp_tool.py +0 -166
- tooluniverse/test/test_openalex_tool.py +0 -32
- tooluniverse/test/test_openrouter_client.py +0 -288
- tooluniverse/test/test_opentargets.py +0 -28
- tooluniverse/test/test_pubchem_tool.py +0 -116
- tooluniverse/test/test_pubtator_tool.py +0 -37
- tooluniverse/test/test_rcsb_pdb_tool.py +0 -86
- tooluniverse/test/test_reactome.py +0 -54
- tooluniverse/test/test_semantic_scholar_tool.py +0 -24
- tooluniverse/test/test_software_tools.py +0 -147
- tooluniverse/test/test_stdio_hooks.py +0 -285
- tooluniverse/test/test_tool_description_optimizer.py +0 -49
- tooluniverse/test/test_tool_finder.py +0 -26
- tooluniverse/test/test_tool_finder_llm.py +0 -252
- tooluniverse/test/test_tools_find.py +0 -195
- tooluniverse/test/test_uniprot_tools.py +0 -74
- tooluniverse/test/test_uspto_tool.py +0 -72
- tooluniverse/test/test_xml_tool.py +0 -113
- tooluniverse-1.0.6.dist-info/RECORD +0 -230
- {tooluniverse-1.0.6.dist-info → tooluniverse-1.0.8.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.6.dist-info → tooluniverse-1.0.8.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.6.dist-info → tooluniverse-1.0.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,2328 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Unified Guideline Tools
|
|
4
|
+
Consolidated clinical guidelines search tools from multiple sources.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
import time
|
|
9
|
+
import re
|
|
10
|
+
import xml.etree.ElementTree as ET
|
|
11
|
+
from bs4 import BeautifulSoup
|
|
12
|
+
from markitdown import MarkItDown
|
|
13
|
+
from .base_tool import BaseTool
|
|
14
|
+
from .tool_registry import register_tool
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _extract_meaningful_terms(query):
|
|
18
|
+
"""Return significant query terms for relevance filtering."""
|
|
19
|
+
if not isinstance(query, str):
|
|
20
|
+
return []
|
|
21
|
+
|
|
22
|
+
# Keep alphabetic tokens with length >= 3
|
|
23
|
+
tokens = re.findall(r"[a-zA-Z]{3,}", query.lower())
|
|
24
|
+
stop_terms = {
|
|
25
|
+
"management",
|
|
26
|
+
"care",
|
|
27
|
+
"guideline",
|
|
28
|
+
"guidelines",
|
|
29
|
+
"clinical",
|
|
30
|
+
"practice",
|
|
31
|
+
"and",
|
|
32
|
+
"with",
|
|
33
|
+
"for",
|
|
34
|
+
"the",
|
|
35
|
+
"that",
|
|
36
|
+
"from",
|
|
37
|
+
"into",
|
|
38
|
+
"using",
|
|
39
|
+
"update",
|
|
40
|
+
"introduction",
|
|
41
|
+
"review",
|
|
42
|
+
"overview",
|
|
43
|
+
"recommendation",
|
|
44
|
+
"recommendations",
|
|
45
|
+
}
|
|
46
|
+
meaningful = [token for token in tokens if token not in stop_terms]
|
|
47
|
+
return meaningful if meaningful else tokens
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@register_tool()
|
|
51
|
+
class NICEWebScrapingTool(BaseTool):
|
|
52
|
+
"""
|
|
53
|
+
Real NICE guidelines search using web scraping.
|
|
54
|
+
Makes actual HTTP requests to NICE website and parses HTML responses.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, tool_config):
|
|
58
|
+
super().__init__(tool_config)
|
|
59
|
+
self.base_url = "https://www.nice.org.uk"
|
|
60
|
+
self.search_url = f"{self.base_url}/search"
|
|
61
|
+
self.session = requests.Session()
|
|
62
|
+
self.session.headers.update(
|
|
63
|
+
{
|
|
64
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
65
|
+
}
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def run(self, arguments):
|
|
69
|
+
query = arguments.get("query", "")
|
|
70
|
+
limit = arguments.get("limit", 10)
|
|
71
|
+
|
|
72
|
+
if not query:
|
|
73
|
+
return {"error": "Query parameter is required"}
|
|
74
|
+
|
|
75
|
+
return self._search_nice_guidelines_real(query, limit)
|
|
76
|
+
|
|
77
|
+
def _fetch_guideline_summary(self, url):
|
|
78
|
+
"""Fetch summary from a guideline detail page."""
|
|
79
|
+
try:
|
|
80
|
+
time.sleep(0.5) # Be respectful
|
|
81
|
+
response = self.session.get(url, timeout=15)
|
|
82
|
+
response.raise_for_status()
|
|
83
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
84
|
+
|
|
85
|
+
# Try to find overview section
|
|
86
|
+
overview = soup.find("div", {"class": "chapter-overview"})
|
|
87
|
+
if overview:
|
|
88
|
+
paragraphs = overview.find_all("p")
|
|
89
|
+
if paragraphs:
|
|
90
|
+
return " ".join([p.get_text().strip() for p in paragraphs[:2]])
|
|
91
|
+
|
|
92
|
+
# Try meta description
|
|
93
|
+
meta_desc = soup.find("meta", {"name": "description"})
|
|
94
|
+
if meta_desc and meta_desc.get("content"):
|
|
95
|
+
return meta_desc.get("content")
|
|
96
|
+
|
|
97
|
+
# Try first paragraph in main content
|
|
98
|
+
main_content = soup.find("div", {"class": "content"}) or soup.find("main")
|
|
99
|
+
if main_content:
|
|
100
|
+
first_p = main_content.find("p")
|
|
101
|
+
if first_p:
|
|
102
|
+
return first_p.get_text().strip()
|
|
103
|
+
|
|
104
|
+
return ""
|
|
105
|
+
except Exception:
|
|
106
|
+
return ""
|
|
107
|
+
|
|
108
|
+
def _search_nice_guidelines_real(self, query, limit):
|
|
109
|
+
"""Search NICE guidelines using real web scraping."""
|
|
110
|
+
try:
|
|
111
|
+
# Add delay to be respectful
|
|
112
|
+
time.sleep(1)
|
|
113
|
+
|
|
114
|
+
params = {"q": query, "type": "guidance"}
|
|
115
|
+
|
|
116
|
+
response = self.session.get(self.search_url, params=params, timeout=30)
|
|
117
|
+
response.raise_for_status()
|
|
118
|
+
|
|
119
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
120
|
+
|
|
121
|
+
# Find the JSON data in the script tag
|
|
122
|
+
script_tag = soup.find("script", {"id": "__NEXT_DATA__"})
|
|
123
|
+
if not script_tag:
|
|
124
|
+
return {
|
|
125
|
+
"error": "No search results found",
|
|
126
|
+
"suggestion": "Try different search terms or check if the NICE website is accessible",
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Parse the JSON data
|
|
130
|
+
import json
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
data = json.loads(script_tag.string)
|
|
134
|
+
documents = (
|
|
135
|
+
data.get("props", {})
|
|
136
|
+
.get("pageProps", {})
|
|
137
|
+
.get("results", {})
|
|
138
|
+
.get("documents", [])
|
|
139
|
+
)
|
|
140
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
141
|
+
return {
|
|
142
|
+
"error": f"Failed to parse search results: {str(e)}",
|
|
143
|
+
"source": "NICE",
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if not documents:
|
|
147
|
+
return {
|
|
148
|
+
"error": "No NICE guidelines found",
|
|
149
|
+
"suggestion": "Try different search terms or check if the NICE website is accessible",
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
# Process the documents
|
|
153
|
+
results = []
|
|
154
|
+
for doc in documents[:limit]:
|
|
155
|
+
try:
|
|
156
|
+
title = doc.get("title", "").replace("<b>", "").replace("</b>", "")
|
|
157
|
+
url = doc.get("url", "")
|
|
158
|
+
|
|
159
|
+
# Make URL absolute
|
|
160
|
+
if url.startswith("/"):
|
|
161
|
+
url = self.base_url + url
|
|
162
|
+
|
|
163
|
+
# Extract summary - try multiple fields
|
|
164
|
+
summary = (
|
|
165
|
+
doc.get("abstract", "")
|
|
166
|
+
or doc.get("staticAbstract", "")
|
|
167
|
+
or doc.get("metaDescription", "")
|
|
168
|
+
or doc.get("teaser", "")
|
|
169
|
+
or ""
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# If still no summary, try to fetch from the detail page
|
|
173
|
+
if not summary and url:
|
|
174
|
+
summary = self._fetch_guideline_summary(url)
|
|
175
|
+
|
|
176
|
+
# Extract date
|
|
177
|
+
publication_date = doc.get("publicationDate", "")
|
|
178
|
+
last_updated = doc.get("lastUpdated", "")
|
|
179
|
+
date = last_updated or publication_date
|
|
180
|
+
|
|
181
|
+
# Extract type/category
|
|
182
|
+
nice_result_type = doc.get("niceResultType", "")
|
|
183
|
+
nice_guidance_type = doc.get("niceGuidanceType", [])
|
|
184
|
+
guideline_type = nice_result_type or (
|
|
185
|
+
nice_guidance_type[0]
|
|
186
|
+
if nice_guidance_type
|
|
187
|
+
else "NICE Guideline"
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Determine if it's a guideline
|
|
191
|
+
is_guideline = any(
|
|
192
|
+
keyword in guideline_type.lower()
|
|
193
|
+
for keyword in [
|
|
194
|
+
"guideline",
|
|
195
|
+
"quality standard",
|
|
196
|
+
"technology appraisal",
|
|
197
|
+
]
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Extract category
|
|
201
|
+
category = "Clinical Guidelines"
|
|
202
|
+
if "quality standard" in guideline_type.lower():
|
|
203
|
+
category = "Quality Standards"
|
|
204
|
+
elif "technology appraisal" in guideline_type.lower():
|
|
205
|
+
category = "Technology Appraisal"
|
|
206
|
+
|
|
207
|
+
result = {
|
|
208
|
+
"title": title,
|
|
209
|
+
"url": url,
|
|
210
|
+
"summary": summary,
|
|
211
|
+
"content": summary, # Copy summary to content field
|
|
212
|
+
"date": date,
|
|
213
|
+
"type": guideline_type,
|
|
214
|
+
"source": "NICE",
|
|
215
|
+
"is_guideline": is_guideline,
|
|
216
|
+
"category": category,
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
results.append(result)
|
|
220
|
+
|
|
221
|
+
except Exception:
|
|
222
|
+
# Skip items that can't be parsed
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
if not results:
|
|
226
|
+
return {
|
|
227
|
+
"error": "No NICE guidelines found",
|
|
228
|
+
"suggestion": "Try different search terms or check if the NICE website is accessible",
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return results
|
|
232
|
+
|
|
233
|
+
except requests.exceptions.RequestException as e:
|
|
234
|
+
return {
|
|
235
|
+
"error": f"Failed to search NICE guidelines: {str(e)}",
|
|
236
|
+
"source": "NICE",
|
|
237
|
+
}
|
|
238
|
+
except Exception as e:
|
|
239
|
+
return {"error": f"Error parsing NICE response: {str(e)}", "source": "NICE"}
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
@register_tool()
|
|
243
|
+
class PubMedGuidelinesTool(BaseTool):
|
|
244
|
+
"""
|
|
245
|
+
Search PubMed for clinical practice guidelines.
|
|
246
|
+
Uses NCBI E-utilities with guideline publication type filter.
|
|
247
|
+
"""
|
|
248
|
+
|
|
249
|
+
def __init__(self, tool_config):
|
|
250
|
+
super().__init__(tool_config)
|
|
251
|
+
self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
|
252
|
+
self.session = requests.Session()
|
|
253
|
+
|
|
254
|
+
def run(self, arguments):
|
|
255
|
+
query = arguments.get("query", "")
|
|
256
|
+
limit = arguments.get("limit", 10)
|
|
257
|
+
api_key = arguments.get("api_key", "")
|
|
258
|
+
|
|
259
|
+
if not query:
|
|
260
|
+
return {"error": "Query parameter is required"}
|
|
261
|
+
|
|
262
|
+
return self._search_pubmed_guidelines(query, limit, api_key)
|
|
263
|
+
|
|
264
|
+
def _search_pubmed_guidelines(self, query, limit, api_key):
|
|
265
|
+
"""Search PubMed for guideline publications."""
|
|
266
|
+
try:
|
|
267
|
+
# Add guideline publication type filter
|
|
268
|
+
guideline_query = f"{query} AND (guideline[Publication Type] OR practice guideline[Publication Type])"
|
|
269
|
+
|
|
270
|
+
# Search for PMIDs
|
|
271
|
+
search_params = {
|
|
272
|
+
"db": "pubmed",
|
|
273
|
+
"term": guideline_query,
|
|
274
|
+
"retmode": "json",
|
|
275
|
+
"retmax": limit,
|
|
276
|
+
}
|
|
277
|
+
if api_key:
|
|
278
|
+
search_params["api_key"] = api_key
|
|
279
|
+
|
|
280
|
+
search_response = self.session.get(
|
|
281
|
+
f"{self.base_url}/esearch.fcgi", params=search_params, timeout=30
|
|
282
|
+
)
|
|
283
|
+
search_response.raise_for_status()
|
|
284
|
+
search_data = search_response.json()
|
|
285
|
+
|
|
286
|
+
pmids = search_data.get("esearchresult", {}).get("idlist", [])
|
|
287
|
+
search_data.get("esearchresult", {}).get("count", "0")
|
|
288
|
+
|
|
289
|
+
if not pmids:
|
|
290
|
+
return []
|
|
291
|
+
|
|
292
|
+
# Get details for PMIDs
|
|
293
|
+
time.sleep(0.5) # Be respectful with API calls
|
|
294
|
+
|
|
295
|
+
detail_params = {"db": "pubmed", "id": ",".join(pmids), "retmode": "json"}
|
|
296
|
+
if api_key:
|
|
297
|
+
detail_params["api_key"] = api_key
|
|
298
|
+
|
|
299
|
+
detail_response = self.session.get(
|
|
300
|
+
f"{self.base_url}/esummary.fcgi", params=detail_params, timeout=30
|
|
301
|
+
)
|
|
302
|
+
detail_response.raise_for_status()
|
|
303
|
+
detail_data = detail_response.json()
|
|
304
|
+
|
|
305
|
+
# Fetch abstracts using efetch
|
|
306
|
+
time.sleep(0.5)
|
|
307
|
+
abstract_params = {
|
|
308
|
+
"db": "pubmed",
|
|
309
|
+
"id": ",".join(pmids),
|
|
310
|
+
"retmode": "xml",
|
|
311
|
+
"rettype": "abstract",
|
|
312
|
+
}
|
|
313
|
+
if api_key:
|
|
314
|
+
abstract_params["api_key"] = api_key
|
|
315
|
+
|
|
316
|
+
abstract_response = self.session.get(
|
|
317
|
+
f"{self.base_url}/efetch.fcgi", params=abstract_params, timeout=30
|
|
318
|
+
)
|
|
319
|
+
abstract_response.raise_for_status()
|
|
320
|
+
|
|
321
|
+
# Parse abstracts from XML
|
|
322
|
+
import re
|
|
323
|
+
|
|
324
|
+
abstracts = {}
|
|
325
|
+
xml_text = abstract_response.text
|
|
326
|
+
# Extract abstracts for each PMID
|
|
327
|
+
for pmid in pmids:
|
|
328
|
+
# Find abstract text for this PMID
|
|
329
|
+
pmid_pattern = rf"<PMID[^>]*>{pmid}</PMID>.*?<AbstractText[^>]*>(.*?)</AbstractText>"
|
|
330
|
+
abstract_match = re.search(pmid_pattern, xml_text, re.DOTALL)
|
|
331
|
+
if abstract_match:
|
|
332
|
+
# Clean HTML tags from abstract
|
|
333
|
+
abstract = re.sub(r"<[^>]+>", "", abstract_match.group(1))
|
|
334
|
+
abstracts[pmid] = abstract.strip()
|
|
335
|
+
else:
|
|
336
|
+
abstracts[pmid] = ""
|
|
337
|
+
|
|
338
|
+
# Process results
|
|
339
|
+
results = []
|
|
340
|
+
query_terms = _extract_meaningful_terms(query)
|
|
341
|
+
|
|
342
|
+
for pmid in pmids:
|
|
343
|
+
if pmid in detail_data.get("result", {}):
|
|
344
|
+
article = detail_data["result"][pmid]
|
|
345
|
+
|
|
346
|
+
# Extract author information
|
|
347
|
+
authors = []
|
|
348
|
+
for author in article.get("authors", [])[:3]:
|
|
349
|
+
authors.append(author.get("name", ""))
|
|
350
|
+
author_str = ", ".join(authors)
|
|
351
|
+
if len(article.get("authors", [])) > 3:
|
|
352
|
+
author_str += ", et al."
|
|
353
|
+
|
|
354
|
+
# Check publication types
|
|
355
|
+
pub_types = article.get("pubtype", [])
|
|
356
|
+
is_guideline = any("guideline" in pt.lower() for pt in pub_types)
|
|
357
|
+
|
|
358
|
+
abstract_text = abstracts.get(pmid, "")
|
|
359
|
+
searchable_text = " ".join(
|
|
360
|
+
[
|
|
361
|
+
article.get("title", ""),
|
|
362
|
+
abstract_text or "",
|
|
363
|
+
" ".join(pub_types),
|
|
364
|
+
]
|
|
365
|
+
).lower()
|
|
366
|
+
|
|
367
|
+
if query_terms and not any(
|
|
368
|
+
term in searchable_text for term in query_terms
|
|
369
|
+
):
|
|
370
|
+
continue
|
|
371
|
+
|
|
372
|
+
result = {
|
|
373
|
+
"pmid": pmid,
|
|
374
|
+
"title": article.get("title", ""),
|
|
375
|
+
"abstract": abstract_text,
|
|
376
|
+
"content": abstract_text, # Copy abstract to content field
|
|
377
|
+
"authors": author_str,
|
|
378
|
+
"journal": article.get("source", ""),
|
|
379
|
+
"publication_date": article.get("pubdate", ""),
|
|
380
|
+
"publication_types": pub_types,
|
|
381
|
+
"is_guideline": is_guideline,
|
|
382
|
+
"url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
|
|
383
|
+
"doi": (
|
|
384
|
+
article.get("elocationid", "").replace("doi: ", "")
|
|
385
|
+
if "doi:" in article.get("elocationid", "")
|
|
386
|
+
else ""
|
|
387
|
+
),
|
|
388
|
+
"source": "PubMed",
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
results.append(result)
|
|
392
|
+
|
|
393
|
+
return results
|
|
394
|
+
|
|
395
|
+
except requests.exceptions.RequestException as e:
|
|
396
|
+
return {"error": f"Failed to search PubMed: {str(e)}", "source": "PubMed"}
|
|
397
|
+
except Exception as e:
|
|
398
|
+
return {
|
|
399
|
+
"error": f"Error processing PubMed response: {str(e)}",
|
|
400
|
+
"source": "PubMed",
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
@register_tool()
|
|
405
|
+
class EuropePMCGuidelinesTool(BaseTool):
|
|
406
|
+
"""
|
|
407
|
+
Search Europe PMC for clinical guidelines.
|
|
408
|
+
Europe PMC provides access to life science literature including guidelines.
|
|
409
|
+
"""
|
|
410
|
+
|
|
411
|
+
def __init__(self, tool_config):
|
|
412
|
+
super().__init__(tool_config)
|
|
413
|
+
self.base_url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
|
|
414
|
+
self.session = requests.Session()
|
|
415
|
+
|
|
416
|
+
def run(self, arguments):
|
|
417
|
+
query = arguments.get("query", "")
|
|
418
|
+
limit = arguments.get("limit", 10)
|
|
419
|
+
|
|
420
|
+
if not query:
|
|
421
|
+
return {"error": "Query parameter is required"}
|
|
422
|
+
|
|
423
|
+
return self._search_europepmc_guidelines(query, limit)
|
|
424
|
+
|
|
425
|
+
def _search_europepmc_guidelines(self, query, limit):
|
|
426
|
+
"""Search Europe PMC for guideline publications."""
|
|
427
|
+
try:
|
|
428
|
+
# More specific guideline search query
|
|
429
|
+
guideline_query = f'"{query}" AND (guideline OR "practice guideline" OR "clinical guideline" OR recommendation OR "consensus statement")'
|
|
430
|
+
|
|
431
|
+
params = {
|
|
432
|
+
"query": guideline_query,
|
|
433
|
+
"format": "json",
|
|
434
|
+
"pageSize": limit * 2,
|
|
435
|
+
} # Get more to filter
|
|
436
|
+
|
|
437
|
+
response = self.session.get(self.base_url, params=params, timeout=30)
|
|
438
|
+
response.raise_for_status()
|
|
439
|
+
data = response.json()
|
|
440
|
+
|
|
441
|
+
data.get("hitCount", 0)
|
|
442
|
+
results_list = data.get("resultList", {}).get("result", [])
|
|
443
|
+
|
|
444
|
+
if not results_list:
|
|
445
|
+
return []
|
|
446
|
+
|
|
447
|
+
# Process results with stricter filtering
|
|
448
|
+
results = []
|
|
449
|
+
for result in results_list:
|
|
450
|
+
title = result.get("title", "")
|
|
451
|
+
pub_type = result.get("pubType", "")
|
|
452
|
+
|
|
453
|
+
# Get abstract from detailed API call
|
|
454
|
+
abstract = self._get_europepmc_abstract(result.get("pmid", ""))
|
|
455
|
+
|
|
456
|
+
# If abstract is too short or just a question, try to get more content
|
|
457
|
+
if len(abstract) < 200 or abstract.endswith("?"):
|
|
458
|
+
# Try to get full text or more detailed content
|
|
459
|
+
abstract = self._get_europepmc_full_content(
|
|
460
|
+
result.get("pmid", ""), result.get("pmcid", "")
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
# More strict guideline detection
|
|
464
|
+
title_lower = title.lower()
|
|
465
|
+
abstract_lower = abstract.lower()
|
|
466
|
+
|
|
467
|
+
# Must contain guideline-related keywords in title or abstract
|
|
468
|
+
guideline_keywords = [
|
|
469
|
+
"guideline",
|
|
470
|
+
"practice guideline",
|
|
471
|
+
"clinical guideline",
|
|
472
|
+
"recommendation",
|
|
473
|
+
"consensus statement",
|
|
474
|
+
"position statement",
|
|
475
|
+
"clinical practice",
|
|
476
|
+
"best practice",
|
|
477
|
+
]
|
|
478
|
+
|
|
479
|
+
has_guideline_keywords = any(
|
|
480
|
+
keyword in title_lower or keyword in abstract_lower
|
|
481
|
+
for keyword in guideline_keywords
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
# Exclude research papers and studies
|
|
485
|
+
exclude_keywords = [
|
|
486
|
+
"study",
|
|
487
|
+
"trial",
|
|
488
|
+
"analysis",
|
|
489
|
+
"evaluation",
|
|
490
|
+
"assessment",
|
|
491
|
+
"effectiveness",
|
|
492
|
+
"efficacy",
|
|
493
|
+
"outcome",
|
|
494
|
+
"result",
|
|
495
|
+
"finding",
|
|
496
|
+
]
|
|
497
|
+
|
|
498
|
+
is_research = any(
|
|
499
|
+
keyword in title_lower for keyword in exclude_keywords
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
# Publication type must confirm guideline nature
|
|
503
|
+
pub_type_tokens = []
|
|
504
|
+
if isinstance(pub_type, str):
|
|
505
|
+
pub_type_tokens.append(pub_type.lower())
|
|
506
|
+
|
|
507
|
+
pub_type_list = result.get("pubTypeList", {}).get("pubType", [])
|
|
508
|
+
if isinstance(pub_type_list, str):
|
|
509
|
+
pub_type_list = [pub_type_list]
|
|
510
|
+
|
|
511
|
+
if isinstance(pub_type_list, list):
|
|
512
|
+
for entry in pub_type_list:
|
|
513
|
+
if isinstance(entry, str):
|
|
514
|
+
pub_type_tokens.append(entry.lower())
|
|
515
|
+
elif isinstance(entry, dict):
|
|
516
|
+
label = (
|
|
517
|
+
entry.get("text")
|
|
518
|
+
or entry.get("name")
|
|
519
|
+
or entry.get("value")
|
|
520
|
+
)
|
|
521
|
+
if label:
|
|
522
|
+
pub_type_tokens.append(str(label).lower())
|
|
523
|
+
|
|
524
|
+
pub_type_combined = " ".join(pub_type_tokens)
|
|
525
|
+
|
|
526
|
+
pub_type_has_guideline = any(
|
|
527
|
+
term in pub_type_combined
|
|
528
|
+
for term in [
|
|
529
|
+
"guideline",
|
|
530
|
+
"practice guideline",
|
|
531
|
+
"consensus",
|
|
532
|
+
"recommendation",
|
|
533
|
+
]
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
# Determine if it's a guideline
|
|
537
|
+
is_guideline = (
|
|
538
|
+
has_guideline_keywords
|
|
539
|
+
and pub_type_has_guideline
|
|
540
|
+
and not is_research
|
|
541
|
+
and len(title) > 20
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
# Build URL
|
|
545
|
+
pmid = result.get("pmid", "")
|
|
546
|
+
pmcid = result.get("pmcid", "")
|
|
547
|
+
doi = result.get("doi", "")
|
|
548
|
+
|
|
549
|
+
url = ""
|
|
550
|
+
if pmid:
|
|
551
|
+
url = f"https://europepmc.org/article/MED/{pmid}"
|
|
552
|
+
elif pmcid:
|
|
553
|
+
url = f"https://europepmc.org/article/{pmcid}"
|
|
554
|
+
elif doi:
|
|
555
|
+
url = f"https://doi.org/{doi}"
|
|
556
|
+
|
|
557
|
+
abstract_text = (
|
|
558
|
+
abstract[:500] + "..." if len(abstract) > 500 else abstract
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
# Only add if it's actually a guideline
|
|
562
|
+
if is_guideline:
|
|
563
|
+
guideline_result = {
|
|
564
|
+
"title": title,
|
|
565
|
+
"pmid": pmid,
|
|
566
|
+
"pmcid": pmcid,
|
|
567
|
+
"doi": doi,
|
|
568
|
+
"authors": result.get("authorString", ""),
|
|
569
|
+
"journal": result.get("journalTitle", ""),
|
|
570
|
+
"publication_date": result.get("firstPublicationDate", ""),
|
|
571
|
+
"publication_type": pub_type,
|
|
572
|
+
"abstract": abstract_text,
|
|
573
|
+
"content": abstract_text, # Copy abstract to content field
|
|
574
|
+
"is_guideline": is_guideline,
|
|
575
|
+
"url": url,
|
|
576
|
+
"source": "Europe PMC",
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
results.append(guideline_result)
|
|
580
|
+
|
|
581
|
+
# Stop when we have enough guidelines
|
|
582
|
+
if len(results) >= limit:
|
|
583
|
+
break
|
|
584
|
+
|
|
585
|
+
return results
|
|
586
|
+
|
|
587
|
+
except requests.exceptions.RequestException as e:
|
|
588
|
+
return {
|
|
589
|
+
"error": f"Failed to search Europe PMC: {str(e)}",
|
|
590
|
+
"source": "Europe PMC",
|
|
591
|
+
}
|
|
592
|
+
except Exception as e:
|
|
593
|
+
return {
|
|
594
|
+
"error": f"Error processing Europe PMC response: {str(e)}",
|
|
595
|
+
"source": "Europe PMC",
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
def _get_europepmc_abstract(self, pmid):
|
|
599
|
+
"""Get abstract for a specific PMID using PubMed API."""
|
|
600
|
+
if not pmid:
|
|
601
|
+
return ""
|
|
602
|
+
|
|
603
|
+
try:
|
|
604
|
+
# Use PubMed's E-utilities API
|
|
605
|
+
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
|
606
|
+
params = {
|
|
607
|
+
"db": "pubmed",
|
|
608
|
+
"id": pmid,
|
|
609
|
+
"retmode": "xml",
|
|
610
|
+
"rettype": "abstract",
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
response = self.session.get(base_url, params=params, timeout=15)
|
|
614
|
+
response.raise_for_status()
|
|
615
|
+
|
|
616
|
+
# Parse XML response
|
|
617
|
+
import xml.etree.ElementTree as ET
|
|
618
|
+
|
|
619
|
+
root = ET.fromstring(response.content)
|
|
620
|
+
|
|
621
|
+
# Find abstract text
|
|
622
|
+
abstract_elem = root.find(".//AbstractText")
|
|
623
|
+
if abstract_elem is not None:
|
|
624
|
+
return abstract_elem.text or ""
|
|
625
|
+
|
|
626
|
+
# Try alternative path
|
|
627
|
+
abstract_elem = root.find(".//abstract")
|
|
628
|
+
if abstract_elem is not None:
|
|
629
|
+
return abstract_elem.text or ""
|
|
630
|
+
|
|
631
|
+
return ""
|
|
632
|
+
|
|
633
|
+
except Exception as e:
|
|
634
|
+
return f"Error fetching abstract: {str(e)}"
|
|
635
|
+
|
|
636
|
+
def _get_europepmc_full_content(self, pmid, pmcid):
|
|
637
|
+
"""Get more detailed content from Europe PMC."""
|
|
638
|
+
if not pmid and not pmcid:
|
|
639
|
+
return ""
|
|
640
|
+
|
|
641
|
+
try:
|
|
642
|
+
# Try to get full text from Europe PMC
|
|
643
|
+
if pmcid:
|
|
644
|
+
full_text_url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/{pmcid}/fullTextXML"
|
|
645
|
+
else:
|
|
646
|
+
full_text_url = f"https://www.ebi.ac.uk/europepmc/webservices/rest/MED/{pmid}/fullTextXML"
|
|
647
|
+
|
|
648
|
+
response = self.session.get(full_text_url, timeout=15)
|
|
649
|
+
if response.status_code == 200:
|
|
650
|
+
# Parse XML to extract meaningful content
|
|
651
|
+
import xml.etree.ElementTree as ET
|
|
652
|
+
|
|
653
|
+
root = ET.fromstring(response.content)
|
|
654
|
+
|
|
655
|
+
# Extract sections that might contain clinical recommendations
|
|
656
|
+
content_parts = []
|
|
657
|
+
|
|
658
|
+
# Look for methods, results, conclusions, recommendations
|
|
659
|
+
for section in root.findall(".//sec"):
|
|
660
|
+
title_elem = section.find("title")
|
|
661
|
+
if title_elem is not None:
|
|
662
|
+
title = title_elem.text or ""
|
|
663
|
+
if any(
|
|
664
|
+
keyword in title.lower()
|
|
665
|
+
for keyword in [
|
|
666
|
+
"recommendation",
|
|
667
|
+
"conclusion",
|
|
668
|
+
"method",
|
|
669
|
+
"result",
|
|
670
|
+
"guideline",
|
|
671
|
+
"clinical",
|
|
672
|
+
]
|
|
673
|
+
):
|
|
674
|
+
# Extract text from this section
|
|
675
|
+
text_content = ""
|
|
676
|
+
for p in section.findall(".//p"):
|
|
677
|
+
if p.text:
|
|
678
|
+
text_content += p.text + " "
|
|
679
|
+
|
|
680
|
+
if text_content.strip():
|
|
681
|
+
content_parts.append(f"{title}: {text_content.strip()}")
|
|
682
|
+
|
|
683
|
+
if content_parts:
|
|
684
|
+
return " ".join(
|
|
685
|
+
content_parts[:3]
|
|
686
|
+
) # Limit to first 3 relevant sections
|
|
687
|
+
|
|
688
|
+
return ""
|
|
689
|
+
|
|
690
|
+
except Exception as e:
|
|
691
|
+
return f"Error fetching full content: {str(e)}"
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
@register_tool()
|
|
695
|
+
class TRIPDatabaseTool(BaseTool):
|
|
696
|
+
"""
|
|
697
|
+
Search TRIP Database (Turning Research into Practice).
|
|
698
|
+
Specialized evidence-based medicine database with clinical guidelines filter.
|
|
699
|
+
"""
|
|
700
|
+
|
|
701
|
+
def __init__(self, tool_config):
|
|
702
|
+
super().__init__(tool_config)
|
|
703
|
+
self.base_url = "https://www.tripdatabase.com/api/search"
|
|
704
|
+
self.session = requests.Session()
|
|
705
|
+
self.session.headers.update(
|
|
706
|
+
{
|
|
707
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
|
|
708
|
+
}
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
def run(self, arguments):
|
|
712
|
+
query = arguments.get("query", "")
|
|
713
|
+
limit = arguments.get("limit", 10)
|
|
714
|
+
search_type = arguments.get("search_type", "guideline")
|
|
715
|
+
|
|
716
|
+
if not query:
|
|
717
|
+
return {"error": "Query parameter is required"}
|
|
718
|
+
|
|
719
|
+
return self._search_trip_database(query, limit, search_type)
|
|
720
|
+
|
|
721
|
+
def _search_trip_database(self, query, limit, search_type):
|
|
722
|
+
"""Search TRIP Database for clinical guidelines."""
|
|
723
|
+
try:
|
|
724
|
+
params = {"criteria": query, "searchType": search_type, "limit": limit}
|
|
725
|
+
|
|
726
|
+
response = self.session.get(self.base_url, params=params, timeout=30)
|
|
727
|
+
response.raise_for_status()
|
|
728
|
+
|
|
729
|
+
# Parse XML response
|
|
730
|
+
root = ET.fromstring(response.content)
|
|
731
|
+
|
|
732
|
+
total = root.find("total")
|
|
733
|
+
count = root.find("count")
|
|
734
|
+
|
|
735
|
+
int(total.text) if total is not None else 0
|
|
736
|
+
int(count.text) if count is not None else 0
|
|
737
|
+
|
|
738
|
+
documents = root.findall("document")
|
|
739
|
+
|
|
740
|
+
if not documents:
|
|
741
|
+
return []
|
|
742
|
+
|
|
743
|
+
# Process results
|
|
744
|
+
results = []
|
|
745
|
+
for doc in documents[:limit]:
|
|
746
|
+
title_elem = doc.find("title")
|
|
747
|
+
link_elem = doc.find("link")
|
|
748
|
+
publication_elem = doc.find("publication")
|
|
749
|
+
category_elem = doc.find("category")
|
|
750
|
+
description_elem = doc.find("description")
|
|
751
|
+
|
|
752
|
+
description_text = (
|
|
753
|
+
description_elem.text if description_elem is not None else ""
|
|
754
|
+
)
|
|
755
|
+
url = link_elem.text if link_elem is not None else ""
|
|
756
|
+
|
|
757
|
+
key_recommendations = []
|
|
758
|
+
evidence_strength = []
|
|
759
|
+
|
|
760
|
+
fetched_content = None
|
|
761
|
+
requires_detailed_fetch = url and any(
|
|
762
|
+
domain in url for domain in ["bmj.com/content/", "e-dmj.org"]
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
if (not description_text and url) or requires_detailed_fetch:
|
|
766
|
+
fetched_content = self._fetch_guideline_content(url)
|
|
767
|
+
|
|
768
|
+
if isinstance(fetched_content, dict):
|
|
769
|
+
description_text = (
|
|
770
|
+
fetched_content.get("content", "") or description_text
|
|
771
|
+
)
|
|
772
|
+
key_recommendations = fetched_content.get("key_recommendations", [])
|
|
773
|
+
evidence_strength = fetched_content.get("evidence_strength", [])
|
|
774
|
+
elif isinstance(fetched_content, str) and fetched_content:
|
|
775
|
+
description_text = fetched_content
|
|
776
|
+
|
|
777
|
+
category_text = (
|
|
778
|
+
category_elem.text.lower()
|
|
779
|
+
if category_elem is not None and category_elem.text
|
|
780
|
+
else ""
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
if category_text and "guideline" not in category_text:
|
|
784
|
+
# Skip clearly non-guideline categories such as news or trials
|
|
785
|
+
continue
|
|
786
|
+
|
|
787
|
+
description_lower = description_text.lower()
|
|
788
|
+
if any(
|
|
789
|
+
phrase in description_lower
|
|
790
|
+
for phrase in [
|
|
791
|
+
"login required",
|
|
792
|
+
"temporarily unavailable",
|
|
793
|
+
"subscription required",
|
|
794
|
+
"no results",
|
|
795
|
+
]
|
|
796
|
+
):
|
|
797
|
+
continue
|
|
798
|
+
|
|
799
|
+
guideline_result = {
|
|
800
|
+
"title": title_elem.text if title_elem is not None else "",
|
|
801
|
+
"url": url,
|
|
802
|
+
"description": description_text,
|
|
803
|
+
"content": description_text, # Copy description to content field
|
|
804
|
+
"publication": (
|
|
805
|
+
publication_elem.text if publication_elem is not None else ""
|
|
806
|
+
),
|
|
807
|
+
"category": category_elem.text if category_elem is not None else "",
|
|
808
|
+
"is_guideline": True, # TRIP returns filtered results
|
|
809
|
+
"source": "TRIP Database",
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
if key_recommendations:
|
|
813
|
+
guideline_result["key_recommendations"] = key_recommendations
|
|
814
|
+
if evidence_strength:
|
|
815
|
+
guideline_result["evidence_strength"] = evidence_strength
|
|
816
|
+
|
|
817
|
+
results.append(guideline_result)
|
|
818
|
+
|
|
819
|
+
return results
|
|
820
|
+
|
|
821
|
+
except requests.exceptions.RequestException as e:
|
|
822
|
+
return {
|
|
823
|
+
"error": f"Failed to search TRIP Database: {str(e)}",
|
|
824
|
+
"source": "TRIP Database",
|
|
825
|
+
}
|
|
826
|
+
except ET.ParseError as e:
|
|
827
|
+
return {
|
|
828
|
+
"error": f"Failed to parse TRIP Database response: {str(e)}",
|
|
829
|
+
"source": "TRIP Database",
|
|
830
|
+
}
|
|
831
|
+
except Exception as e:
|
|
832
|
+
return {
|
|
833
|
+
"error": f"Error processing TRIP Database response: {str(e)}",
|
|
834
|
+
"source": "TRIP Database",
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
def _fetch_guideline_content(self, url):
|
|
838
|
+
"""Extract content from a guideline URL using targeted parsers when available."""
|
|
839
|
+
try:
|
|
840
|
+
time.sleep(0.5) # Be respectful
|
|
841
|
+
|
|
842
|
+
if "bmj.com/content/" in url:
|
|
843
|
+
return self._extract_bmj_guideline_content(url)
|
|
844
|
+
|
|
845
|
+
if "e-dmj.org" in url:
|
|
846
|
+
return self._extract_dmj_guideline_content(url)
|
|
847
|
+
|
|
848
|
+
# Fallback: generic MarkItDown extraction
|
|
849
|
+
md = MarkItDown()
|
|
850
|
+
result = md.convert(url)
|
|
851
|
+
|
|
852
|
+
if not result or not getattr(result, "text_content", None):
|
|
853
|
+
return f"Content extraction failed. Document available at: {url}"
|
|
854
|
+
|
|
855
|
+
content = self._clean_generic_content(result.text_content)
|
|
856
|
+
return content
|
|
857
|
+
|
|
858
|
+
except Exception as e:
|
|
859
|
+
return f"Error extracting content: {str(e)}"
|
|
860
|
+
|
|
861
|
+
def _clean_generic_content(self, raw_text):
|
|
862
|
+
"""Clean generic text content to emphasise clinical lines."""
|
|
863
|
+
content = raw_text.strip()
|
|
864
|
+
content = re.sub(r"\n\s*\n", "\n\n", content)
|
|
865
|
+
content = re.sub(r" +", " ", content)
|
|
866
|
+
|
|
867
|
+
meaningful_lines = []
|
|
868
|
+
for line in content.split("\n"):
|
|
869
|
+
line = line.strip()
|
|
870
|
+
if len(line) < 20:
|
|
871
|
+
continue
|
|
872
|
+
if line.count("[") > 0 or line.count("]") > 0:
|
|
873
|
+
continue
|
|
874
|
+
if "http" in line or "//" in line:
|
|
875
|
+
continue
|
|
876
|
+
|
|
877
|
+
skip_keywords = [
|
|
878
|
+
"copyright",
|
|
879
|
+
"rights reserved",
|
|
880
|
+
"notice of rights",
|
|
881
|
+
"terms and conditions",
|
|
882
|
+
"your responsibility",
|
|
883
|
+
"local commissioners",
|
|
884
|
+
"environmental impact",
|
|
885
|
+
"medicines and healthcare",
|
|
886
|
+
"yellow card scheme",
|
|
887
|
+
"©",
|
|
888
|
+
"all rights reserved",
|
|
889
|
+
]
|
|
890
|
+
if any(keyword in line.lower() for keyword in skip_keywords):
|
|
891
|
+
continue
|
|
892
|
+
|
|
893
|
+
clinical_keywords = [
|
|
894
|
+
"recommendation",
|
|
895
|
+
"recommendations",
|
|
896
|
+
"should",
|
|
897
|
+
"strong recommendation",
|
|
898
|
+
"conditional recommendation",
|
|
899
|
+
"clinicians",
|
|
900
|
+
"patients",
|
|
901
|
+
"treatment",
|
|
902
|
+
"management",
|
|
903
|
+
"diagnosis",
|
|
904
|
+
"assessment",
|
|
905
|
+
"therapy",
|
|
906
|
+
"intervention",
|
|
907
|
+
"pharmacologic",
|
|
908
|
+
"monitoring",
|
|
909
|
+
"screening",
|
|
910
|
+
"diabetes",
|
|
911
|
+
"glycaemic",
|
|
912
|
+
]
|
|
913
|
+
if any(keyword in line.lower() for keyword in clinical_keywords):
|
|
914
|
+
meaningful_lines.append(line)
|
|
915
|
+
|
|
916
|
+
if meaningful_lines:
|
|
917
|
+
content = "\n".join(meaningful_lines[:8])
|
|
918
|
+
else:
|
|
919
|
+
content = content[:1000]
|
|
920
|
+
|
|
921
|
+
if len(content) > 2000:
|
|
922
|
+
truncated = content[:2000]
|
|
923
|
+
last_period = truncated.rfind(".")
|
|
924
|
+
if last_period > 1000:
|
|
925
|
+
content = truncated[: last_period + 1] + "..."
|
|
926
|
+
else:
|
|
927
|
+
content = truncated + "..."
|
|
928
|
+
|
|
929
|
+
return content
|
|
930
|
+
|
|
931
|
+
def _extract_bmj_guideline_content(self, url):
|
|
932
|
+
"""Fetch BMJ Rapid Recommendation content with key recommendations."""
|
|
933
|
+
try:
|
|
934
|
+
md = MarkItDown()
|
|
935
|
+
result = md.convert(url)
|
|
936
|
+
if not result or not getattr(result, "text_content", None):
|
|
937
|
+
return {
|
|
938
|
+
"content": f"Content extraction failed. Document available at: {url}",
|
|
939
|
+
"key_recommendations": [],
|
|
940
|
+
"evidence_strength": [],
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
text = result.text_content
|
|
944
|
+
content = self._clean_generic_content(text)
|
|
945
|
+
|
|
946
|
+
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
|
947
|
+
recommendations = []
|
|
948
|
+
grading = []
|
|
949
|
+
tokens = [
|
|
950
|
+
"strong recommendation",
|
|
951
|
+
"conditional recommendation",
|
|
952
|
+
"weak recommendation",
|
|
953
|
+
"good practice statement",
|
|
954
|
+
]
|
|
955
|
+
|
|
956
|
+
for idx, line in enumerate(lines):
|
|
957
|
+
lower = line.lower()
|
|
958
|
+
if "recommendation" not in lower:
|
|
959
|
+
continue
|
|
960
|
+
if len(line) > 180:
|
|
961
|
+
continue
|
|
962
|
+
|
|
963
|
+
title_clean = line.lstrip("#").strip()
|
|
964
|
+
if title_clean.startswith("+"):
|
|
965
|
+
continue
|
|
966
|
+
if title_clean.lower().startswith("rapid recommendations"):
|
|
967
|
+
continue
|
|
968
|
+
|
|
969
|
+
summary_lines = []
|
|
970
|
+
for following in lines[idx + 1 : idx + 10]:
|
|
971
|
+
if "recommendation" in following.lower() and len(following) < 180:
|
|
972
|
+
break
|
|
973
|
+
if len(following) < 40:
|
|
974
|
+
continue
|
|
975
|
+
summary_lines.append(following)
|
|
976
|
+
if len(summary_lines) >= 3:
|
|
977
|
+
break
|
|
978
|
+
|
|
979
|
+
summary = " ".join(summary_lines)
|
|
980
|
+
if summary:
|
|
981
|
+
recommendations.append(
|
|
982
|
+
{"title": title_clean, "summary": summary[:400]}
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
strength = None
|
|
986
|
+
for token in tokens:
|
|
987
|
+
if token in lower or any(token in s.lower() for s in summary_lines):
|
|
988
|
+
strength = token.title()
|
|
989
|
+
break
|
|
990
|
+
|
|
991
|
+
if not strength:
|
|
992
|
+
grade_match = re.search(r"grade\s+[A-D1-9]+", lower)
|
|
993
|
+
if grade_match:
|
|
994
|
+
strength = grade_match.group(0).title()
|
|
995
|
+
|
|
996
|
+
if strength and not any(
|
|
997
|
+
entry.get("section") == title_clean for entry in grading
|
|
998
|
+
):
|
|
999
|
+
grading.append({"section": title_clean, "strength": strength})
|
|
1000
|
+
|
|
1001
|
+
return {
|
|
1002
|
+
"content": content,
|
|
1003
|
+
"key_recommendations": recommendations[:5],
|
|
1004
|
+
"evidence_strength": grading,
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
except Exception as e:
|
|
1008
|
+
return {
|
|
1009
|
+
"content": f"Error extracting BMJ content: {str(e)}",
|
|
1010
|
+
"key_recommendations": [],
|
|
1011
|
+
"evidence_strength": [],
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
def _extract_dmj_guideline_content(self, url):
|
|
1015
|
+
"""Fetch Diabetes & Metabolism Journal guideline content and GRADE statements."""
|
|
1016
|
+
try:
|
|
1017
|
+
md = MarkItDown()
|
|
1018
|
+
result = md.convert(url)
|
|
1019
|
+
if not result or not getattr(result, "text_content", None):
|
|
1020
|
+
return {
|
|
1021
|
+
"content": f"Content extraction failed. Document available at: {url}",
|
|
1022
|
+
"key_recommendations": [],
|
|
1023
|
+
"evidence_strength": [],
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
text = result.text_content
|
|
1027
|
+
content = self._clean_generic_content(text)
|
|
1028
|
+
|
|
1029
|
+
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
|
1030
|
+
recommendations = []
|
|
1031
|
+
grading = []
|
|
1032
|
+
|
|
1033
|
+
for idx, line in enumerate(lines):
|
|
1034
|
+
lower = line.lower()
|
|
1035
|
+
if not any(
|
|
1036
|
+
keyword in lower
|
|
1037
|
+
for keyword in ["recommendation", "statement", "guideline"]
|
|
1038
|
+
):
|
|
1039
|
+
continue
|
|
1040
|
+
if len(line) > 200:
|
|
1041
|
+
continue
|
|
1042
|
+
|
|
1043
|
+
title_clean = line.lstrip("#").strip()
|
|
1044
|
+
if title_clean.startswith("+") or title_clean.startswith("Table"):
|
|
1045
|
+
continue
|
|
1046
|
+
|
|
1047
|
+
summary_lines = []
|
|
1048
|
+
for following in lines[idx + 1 : idx + 10]:
|
|
1049
|
+
if (
|
|
1050
|
+
any(
|
|
1051
|
+
keyword in following.lower()
|
|
1052
|
+
for keyword in ["recommendation", "statement", "guideline"]
|
|
1053
|
+
)
|
|
1054
|
+
and len(following) < 200
|
|
1055
|
+
):
|
|
1056
|
+
break
|
|
1057
|
+
if len(following) < 30:
|
|
1058
|
+
continue
|
|
1059
|
+
summary_lines.append(following)
|
|
1060
|
+
if len(summary_lines) >= 3:
|
|
1061
|
+
break
|
|
1062
|
+
|
|
1063
|
+
summary = " ".join(summary_lines)
|
|
1064
|
+
if summary:
|
|
1065
|
+
recommendations.append(
|
|
1066
|
+
{"title": title_clean, "summary": summary[:400]}
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
strength = None
|
|
1070
|
+
grade_match = re.search(r"grade\s+[A-E]\b", lower)
|
|
1071
|
+
if grade_match:
|
|
1072
|
+
strength = grade_match.group(0).title()
|
|
1073
|
+
level_match = re.search(r"level\s+[0-4]", lower)
|
|
1074
|
+
if level_match:
|
|
1075
|
+
level_text = level_match.group(0).title()
|
|
1076
|
+
strength = f"{strength} ({level_text})" if strength else level_text
|
|
1077
|
+
|
|
1078
|
+
for line_text in summary_lines:
|
|
1079
|
+
lower_line = line_text.lower()
|
|
1080
|
+
if "strong" in lower_line and "recommendation" in lower_line:
|
|
1081
|
+
strength = "Strong recommendation"
|
|
1082
|
+
break
|
|
1083
|
+
if "conditional" in lower_line and "recommendation" in lower_line:
|
|
1084
|
+
strength = "Conditional recommendation"
|
|
1085
|
+
break
|
|
1086
|
+
|
|
1087
|
+
if strength and not any(
|
|
1088
|
+
entry.get("section") == title_clean for entry in grading
|
|
1089
|
+
):
|
|
1090
|
+
grading.append({"section": title_clean, "strength": strength})
|
|
1091
|
+
|
|
1092
|
+
return {
|
|
1093
|
+
"content": content,
|
|
1094
|
+
"key_recommendations": recommendations[:5],
|
|
1095
|
+
"evidence_strength": grading,
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
except Exception as e:
|
|
1099
|
+
return {
|
|
1100
|
+
"content": f"Error extracting DMJ content: {str(e)}",
|
|
1101
|
+
"key_recommendations": [],
|
|
1102
|
+
"evidence_strength": [],
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
@register_tool()
|
|
1107
|
+
class WHOGuidelinesTool(BaseTool):
|
|
1108
|
+
"""
|
|
1109
|
+
WHO (World Health Organization) Guidelines Search Tool.
|
|
1110
|
+
Searches WHO official guidelines from their publications website.
|
|
1111
|
+
"""
|
|
1112
|
+
|
|
1113
|
+
def __init__(self, tool_config):
|
|
1114
|
+
super().__init__(tool_config)
|
|
1115
|
+
self.base_url = "https://www.who.int"
|
|
1116
|
+
self.guidelines_url = f"{self.base_url}/publications/who-guidelines"
|
|
1117
|
+
self.session = requests.Session()
|
|
1118
|
+
self.session.headers.update(
|
|
1119
|
+
{
|
|
1120
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
|
1121
|
+
}
|
|
1122
|
+
)
|
|
1123
|
+
|
|
1124
|
+
def run(self, arguments):
|
|
1125
|
+
query = arguments.get("query", "")
|
|
1126
|
+
limit = arguments.get("limit", 10)
|
|
1127
|
+
|
|
1128
|
+
if not query:
|
|
1129
|
+
return {"error": "Query parameter is required"}
|
|
1130
|
+
|
|
1131
|
+
return self._search_who_guidelines(query, limit)
|
|
1132
|
+
|
|
1133
|
+
def _fetch_guideline_description(self, url):
|
|
1134
|
+
"""Fetch description from a WHO guideline detail page."""
|
|
1135
|
+
try:
|
|
1136
|
+
time.sleep(0.5) # Be respectful
|
|
1137
|
+
response = self.session.get(url, timeout=15)
|
|
1138
|
+
response.raise_for_status()
|
|
1139
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
1140
|
+
|
|
1141
|
+
# Try to find overview or description
|
|
1142
|
+
overview = soup.find("div", {"class": "overview"}) or soup.find(
|
|
1143
|
+
"div", {"class": "description"}
|
|
1144
|
+
)
|
|
1145
|
+
if overview:
|
|
1146
|
+
paragraphs = overview.find_all("p")
|
|
1147
|
+
if paragraphs:
|
|
1148
|
+
return " ".join([p.get_text().strip() for p in paragraphs[:2]])
|
|
1149
|
+
|
|
1150
|
+
# Try meta description
|
|
1151
|
+
meta_desc = soup.find("meta", {"name": "description"}) or soup.find(
|
|
1152
|
+
"meta", {"property": "og:description"}
|
|
1153
|
+
)
|
|
1154
|
+
if meta_desc and meta_desc.get("content"):
|
|
1155
|
+
return meta_desc.get("content")
|
|
1156
|
+
|
|
1157
|
+
# Try first few paragraphs in main content
|
|
1158
|
+
main_content = (
|
|
1159
|
+
soup.find("div", {"class": "content"})
|
|
1160
|
+
or soup.find("main")
|
|
1161
|
+
or soup.find("article")
|
|
1162
|
+
)
|
|
1163
|
+
if main_content:
|
|
1164
|
+
paragraphs = main_content.find_all("p", recursive=True)
|
|
1165
|
+
if paragraphs:
|
|
1166
|
+
text_parts = []
|
|
1167
|
+
for p in paragraphs[:3]:
|
|
1168
|
+
text = p.get_text().strip()
|
|
1169
|
+
if len(text) > 30: # Skip very short paragraphs
|
|
1170
|
+
text_parts.append(text)
|
|
1171
|
+
if len(" ".join(text_parts)) > 300: # Limit total length
|
|
1172
|
+
break
|
|
1173
|
+
if text_parts:
|
|
1174
|
+
return " ".join(text_parts)
|
|
1175
|
+
|
|
1176
|
+
return ""
|
|
1177
|
+
except Exception:
|
|
1178
|
+
return ""
|
|
1179
|
+
|
|
1180
|
+
def _search_who_guidelines(self, query, limit):
|
|
1181
|
+
"""Search WHO guidelines by scraping their official website."""
|
|
1182
|
+
try:
|
|
1183
|
+
# Add delay to be respectful
|
|
1184
|
+
time.sleep(1)
|
|
1185
|
+
|
|
1186
|
+
# First, get the guidelines page
|
|
1187
|
+
response = self.session.get(self.guidelines_url, timeout=30)
|
|
1188
|
+
response.raise_for_status()
|
|
1189
|
+
|
|
1190
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
1191
|
+
|
|
1192
|
+
# Find all publication links
|
|
1193
|
+
all_links = soup.find_all("a", href=True)
|
|
1194
|
+
guidelines = []
|
|
1195
|
+
|
|
1196
|
+
query_lower = query.lower()
|
|
1197
|
+
query_terms = _extract_meaningful_terms(query)
|
|
1198
|
+
|
|
1199
|
+
for link in all_links:
|
|
1200
|
+
href = link["href"]
|
|
1201
|
+
text = link.get_text().strip()
|
|
1202
|
+
|
|
1203
|
+
# Filter for actual guideline publications
|
|
1204
|
+
if (
|
|
1205
|
+
("/publications/i/item/" in href or "/publications/m/item/" in href)
|
|
1206
|
+
and text
|
|
1207
|
+
and len(text) > 10
|
|
1208
|
+
):
|
|
1209
|
+
# Check if query matches the title
|
|
1210
|
+
if query_lower in text.lower():
|
|
1211
|
+
full_url = (
|
|
1212
|
+
href if href.startswith("http") else self.base_url + href
|
|
1213
|
+
)
|
|
1214
|
+
|
|
1215
|
+
# Avoid duplicates
|
|
1216
|
+
if not any(g["url"] == full_url for g in guidelines):
|
|
1217
|
+
# Fetch description from detail page
|
|
1218
|
+
description = self._fetch_guideline_description(full_url)
|
|
1219
|
+
|
|
1220
|
+
searchable_text = (text + " " + (description or "")).lower()
|
|
1221
|
+
if query_terms and not any(
|
|
1222
|
+
term in searchable_text for term in query_terms
|
|
1223
|
+
):
|
|
1224
|
+
continue
|
|
1225
|
+
|
|
1226
|
+
guidelines.append(
|
|
1227
|
+
{
|
|
1228
|
+
"title": text,
|
|
1229
|
+
"url": full_url,
|
|
1230
|
+
"description": description,
|
|
1231
|
+
"content": description, # Copy description to content field
|
|
1232
|
+
"source": "WHO",
|
|
1233
|
+
"organization": "World Health Organization",
|
|
1234
|
+
"is_guideline": True,
|
|
1235
|
+
"official": True,
|
|
1236
|
+
}
|
|
1237
|
+
)
|
|
1238
|
+
|
|
1239
|
+
if len(guidelines) >= limit:
|
|
1240
|
+
break
|
|
1241
|
+
|
|
1242
|
+
# If no results with strict matching, get all WHO guidelines from page
|
|
1243
|
+
if len(guidelines) == 0:
|
|
1244
|
+
print(
|
|
1245
|
+
f"No exact matches for '{query}', retrieving latest WHO guidelines..."
|
|
1246
|
+
)
|
|
1247
|
+
|
|
1248
|
+
all_guidelines = []
|
|
1249
|
+
for link in all_links:
|
|
1250
|
+
href = link["href"]
|
|
1251
|
+
text = link.get_text().strip()
|
|
1252
|
+
|
|
1253
|
+
if (
|
|
1254
|
+
(
|
|
1255
|
+
"/publications/i/item/" in href
|
|
1256
|
+
or "/publications/m/item/" in href
|
|
1257
|
+
)
|
|
1258
|
+
and text
|
|
1259
|
+
and len(text) > 10
|
|
1260
|
+
):
|
|
1261
|
+
full_url = (
|
|
1262
|
+
href if href.startswith("http") else self.base_url + href
|
|
1263
|
+
)
|
|
1264
|
+
|
|
1265
|
+
if not any(g["url"] == full_url for g in all_guidelines):
|
|
1266
|
+
# Fetch description from detail page
|
|
1267
|
+
description = self._fetch_guideline_description(full_url)
|
|
1268
|
+
|
|
1269
|
+
searchable_text = (text + " " + (description or "")).lower()
|
|
1270
|
+
if query_terms and not any(
|
|
1271
|
+
term in searchable_text for term in query_terms
|
|
1272
|
+
):
|
|
1273
|
+
continue
|
|
1274
|
+
|
|
1275
|
+
all_guidelines.append(
|
|
1276
|
+
{
|
|
1277
|
+
"title": text,
|
|
1278
|
+
"url": full_url,
|
|
1279
|
+
"description": description,
|
|
1280
|
+
"content": description, # Copy description to content field
|
|
1281
|
+
"source": "WHO",
|
|
1282
|
+
"organization": "World Health Organization",
|
|
1283
|
+
"is_guideline": True,
|
|
1284
|
+
"official": True,
|
|
1285
|
+
}
|
|
1286
|
+
)
|
|
1287
|
+
|
|
1288
|
+
guidelines = all_guidelines[:limit]
|
|
1289
|
+
|
|
1290
|
+
return guidelines
|
|
1291
|
+
|
|
1292
|
+
except requests.exceptions.RequestException as e:
|
|
1293
|
+
return {
|
|
1294
|
+
"error": f"Failed to access WHO guidelines: {str(e)}",
|
|
1295
|
+
"source": "WHO",
|
|
1296
|
+
}
|
|
1297
|
+
except Exception as e:
|
|
1298
|
+
return {
|
|
1299
|
+
"error": f"Error processing WHO guidelines: {str(e)}",
|
|
1300
|
+
"source": "WHO",
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
|
|
1304
|
+
@register_tool()
|
|
1305
|
+
class OpenAlexGuidelinesTool(BaseTool):
|
|
1306
|
+
"""
|
|
1307
|
+
OpenAlex Guidelines Search Tool.
|
|
1308
|
+
Specialized tool for searching clinical practice guidelines using OpenAlex API.
|
|
1309
|
+
"""
|
|
1310
|
+
|
|
1311
|
+
def __init__(self, tool_config):
|
|
1312
|
+
super().__init__(tool_config)
|
|
1313
|
+
self.base_url = "https://api.openalex.org/works"
|
|
1314
|
+
|
|
1315
|
+
def run(self, arguments):
|
|
1316
|
+
query = arguments.get("query", "")
|
|
1317
|
+
limit = arguments.get("limit", 10)
|
|
1318
|
+
year_from = arguments.get("year_from", None)
|
|
1319
|
+
year_to = arguments.get("year_to", None)
|
|
1320
|
+
|
|
1321
|
+
if not query:
|
|
1322
|
+
return {"error": "Query parameter is required"}
|
|
1323
|
+
|
|
1324
|
+
return self._search_openalex_guidelines(query, limit, year_from, year_to)
|
|
1325
|
+
|
|
1326
|
+
def _search_openalex_guidelines(self, query, limit, year_from=None, year_to=None):
|
|
1327
|
+
"""Search for clinical guidelines using OpenAlex API."""
|
|
1328
|
+
try:
|
|
1329
|
+
# Build search query to focus on guidelines
|
|
1330
|
+
search_query = (
|
|
1331
|
+
f'"{query}" AND (guideline OR "clinical practice" OR recommendation)'
|
|
1332
|
+
)
|
|
1333
|
+
|
|
1334
|
+
# Build parameters
|
|
1335
|
+
params = {
|
|
1336
|
+
"search": search_query,
|
|
1337
|
+
"per_page": min(limit, 50),
|
|
1338
|
+
"sort": "cited_by_count:desc", # Sort by citations
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
# Add year filters
|
|
1342
|
+
filters = []
|
|
1343
|
+
if year_from and year_to:
|
|
1344
|
+
filters.append(f"publication_year:{year_from}-{year_to}")
|
|
1345
|
+
elif year_from:
|
|
1346
|
+
filters.append(f"from_publication_date:{year_from}-01-01")
|
|
1347
|
+
elif year_to:
|
|
1348
|
+
filters.append(f"to_publication_date:{year_to}-12-31")
|
|
1349
|
+
|
|
1350
|
+
# Filter for articles
|
|
1351
|
+
filters.append("type:article")
|
|
1352
|
+
|
|
1353
|
+
if filters:
|
|
1354
|
+
params["filter"] = ",".join(filters)
|
|
1355
|
+
|
|
1356
|
+
response = requests.get(self.base_url, params=params, timeout=30)
|
|
1357
|
+
response.raise_for_status()
|
|
1358
|
+
|
|
1359
|
+
data = response.json()
|
|
1360
|
+
results = data.get("results", [])
|
|
1361
|
+
data.get("meta", {})
|
|
1362
|
+
|
|
1363
|
+
guidelines = []
|
|
1364
|
+
for work in results:
|
|
1365
|
+
# Extract information
|
|
1366
|
+
title = work.get("title", "N/A")
|
|
1367
|
+
year = work.get("publication_year", "N/A")
|
|
1368
|
+
doi = work.get("doi", "")
|
|
1369
|
+
openalex_id = work.get("id", "")
|
|
1370
|
+
cited_by = work.get("cited_by_count", 0)
|
|
1371
|
+
|
|
1372
|
+
# Extract authors
|
|
1373
|
+
authors = []
|
|
1374
|
+
authorships = work.get("authorships", [])
|
|
1375
|
+
for authorship in authorships[:5]:
|
|
1376
|
+
author = authorship.get("author", {})
|
|
1377
|
+
author_name = author.get("display_name", "")
|
|
1378
|
+
if author_name:
|
|
1379
|
+
authors.append(author_name)
|
|
1380
|
+
|
|
1381
|
+
# Extract institutions
|
|
1382
|
+
institutions = []
|
|
1383
|
+
for authorship in authorships[:3]:
|
|
1384
|
+
for inst in authorship.get("institutions", []):
|
|
1385
|
+
inst_name = inst.get("display_name", "")
|
|
1386
|
+
if inst_name and inst_name not in institutions:
|
|
1387
|
+
institutions.append(inst_name)
|
|
1388
|
+
|
|
1389
|
+
# Extract abstract
|
|
1390
|
+
abstract_inverted = work.get("abstract_inverted_index", {})
|
|
1391
|
+
abstract = (
|
|
1392
|
+
self._reconstruct_abstract(abstract_inverted)
|
|
1393
|
+
if abstract_inverted
|
|
1394
|
+
else None
|
|
1395
|
+
)
|
|
1396
|
+
|
|
1397
|
+
# More strict guideline detection
|
|
1398
|
+
title_lower = title.lower()
|
|
1399
|
+
abstract_lower = abstract.lower() if abstract else ""
|
|
1400
|
+
|
|
1401
|
+
# Must contain specific guideline keywords
|
|
1402
|
+
guideline_keywords = [
|
|
1403
|
+
"guideline",
|
|
1404
|
+
"practice guideline",
|
|
1405
|
+
"clinical guideline",
|
|
1406
|
+
"recommendation",
|
|
1407
|
+
"consensus statement",
|
|
1408
|
+
"position statement",
|
|
1409
|
+
"clinical practice",
|
|
1410
|
+
"best practice",
|
|
1411
|
+
]
|
|
1412
|
+
|
|
1413
|
+
has_guideline_keywords = any(
|
|
1414
|
+
keyword in title_lower or keyword in abstract_lower
|
|
1415
|
+
for keyword in guideline_keywords
|
|
1416
|
+
)
|
|
1417
|
+
|
|
1418
|
+
# Check structured concepts from OpenAlex for guideline markers
|
|
1419
|
+
concepts = work.get("concepts", []) or []
|
|
1420
|
+
has_guideline_concept = False
|
|
1421
|
+
for concept in concepts:
|
|
1422
|
+
display_name = concept.get("display_name", "").lower()
|
|
1423
|
+
if any(
|
|
1424
|
+
term in display_name
|
|
1425
|
+
for term in [
|
|
1426
|
+
"guideline",
|
|
1427
|
+
"clinical practice",
|
|
1428
|
+
"recommendation",
|
|
1429
|
+
"consensus",
|
|
1430
|
+
]
|
|
1431
|
+
):
|
|
1432
|
+
has_guideline_concept = True
|
|
1433
|
+
break
|
|
1434
|
+
|
|
1435
|
+
primary_topic = work.get("primary_topic", {}) or {}
|
|
1436
|
+
primary_topic_name = primary_topic.get("display_name", "").lower()
|
|
1437
|
+
if any(
|
|
1438
|
+
term in primary_topic_name
|
|
1439
|
+
for term in ["guideline", "clinical practice", "recommendation"]
|
|
1440
|
+
):
|
|
1441
|
+
has_guideline_concept = True
|
|
1442
|
+
|
|
1443
|
+
# Exclude research papers and studies (but be less strict)
|
|
1444
|
+
exclude_keywords = [
|
|
1445
|
+
"statistics",
|
|
1446
|
+
"data",
|
|
1447
|
+
"survey",
|
|
1448
|
+
"meta-analysis",
|
|
1449
|
+
"systematic review",
|
|
1450
|
+
]
|
|
1451
|
+
|
|
1452
|
+
is_research = any(
|
|
1453
|
+
keyword in title_lower for keyword in exclude_keywords
|
|
1454
|
+
)
|
|
1455
|
+
|
|
1456
|
+
# Determine if it's a guideline
|
|
1457
|
+
is_guideline = (
|
|
1458
|
+
has_guideline_keywords
|
|
1459
|
+
and has_guideline_concept
|
|
1460
|
+
and not is_research
|
|
1461
|
+
and len(title) > 20
|
|
1462
|
+
)
|
|
1463
|
+
|
|
1464
|
+
# Build URL
|
|
1465
|
+
url = (
|
|
1466
|
+
doi
|
|
1467
|
+
if doi and doi.startswith("http")
|
|
1468
|
+
else (
|
|
1469
|
+
f"https://doi.org/{doi.replace('https://doi.org/', '')}"
|
|
1470
|
+
if doi
|
|
1471
|
+
else openalex_id
|
|
1472
|
+
)
|
|
1473
|
+
)
|
|
1474
|
+
|
|
1475
|
+
# Only add if it's actually a guideline
|
|
1476
|
+
if is_guideline:
|
|
1477
|
+
abstract_text = abstract[:500] if abstract else None
|
|
1478
|
+
guideline = {
|
|
1479
|
+
"title": title,
|
|
1480
|
+
"authors": authors,
|
|
1481
|
+
"institutions": institutions[:3],
|
|
1482
|
+
"year": year,
|
|
1483
|
+
"doi": doi,
|
|
1484
|
+
"url": url,
|
|
1485
|
+
"openalex_id": openalex_id,
|
|
1486
|
+
"cited_by_count": cited_by,
|
|
1487
|
+
"is_guideline": is_guideline,
|
|
1488
|
+
"source": "OpenAlex",
|
|
1489
|
+
"abstract": abstract_text,
|
|
1490
|
+
"content": abstract_text, # Copy abstract to content field
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
guidelines.append(guideline)
|
|
1494
|
+
|
|
1495
|
+
# Stop when we have enough guidelines
|
|
1496
|
+
if len(guidelines) >= limit:
|
|
1497
|
+
break
|
|
1498
|
+
|
|
1499
|
+
return guidelines
|
|
1500
|
+
|
|
1501
|
+
except requests.exceptions.RequestException as e:
|
|
1502
|
+
return {
|
|
1503
|
+
"error": f"Failed to search OpenAlex: {str(e)}",
|
|
1504
|
+
"source": "OpenAlex",
|
|
1505
|
+
}
|
|
1506
|
+
except Exception as e:
|
|
1507
|
+
return {
|
|
1508
|
+
"error": f"Error processing OpenAlex response: {str(e)}",
|
|
1509
|
+
"source": "OpenAlex",
|
|
1510
|
+
}
|
|
1511
|
+
|
|
1512
|
+
def _reconstruct_abstract(self, abstract_inverted_index):
|
|
1513
|
+
"""Reconstruct abstract from inverted index."""
|
|
1514
|
+
if not abstract_inverted_index:
|
|
1515
|
+
return None
|
|
1516
|
+
|
|
1517
|
+
try:
|
|
1518
|
+
# Create a list to hold words at their positions
|
|
1519
|
+
max_position = max(
|
|
1520
|
+
max(positions) for positions in abstract_inverted_index.values()
|
|
1521
|
+
)
|
|
1522
|
+
words = [""] * (max_position + 1)
|
|
1523
|
+
|
|
1524
|
+
# Place each word at its positions
|
|
1525
|
+
for word, positions in abstract_inverted_index.items():
|
|
1526
|
+
for pos in positions:
|
|
1527
|
+
words[pos] = word
|
|
1528
|
+
|
|
1529
|
+
# Join words to form abstract
|
|
1530
|
+
abstract = " ".join(words).strip()
|
|
1531
|
+
return abstract
|
|
1532
|
+
|
|
1533
|
+
except Exception:
|
|
1534
|
+
return None
|
|
1535
|
+
|
|
1536
|
+
|
|
1537
|
+
@register_tool()
|
|
1538
|
+
class NICEGuidelineFullTextTool(BaseTool):
|
|
1539
|
+
"""
|
|
1540
|
+
Fetch full text content from NICE guideline pages.
|
|
1541
|
+
Takes a NICE guideline URL and extracts the complete guideline content.
|
|
1542
|
+
"""
|
|
1543
|
+
|
|
1544
|
+
def __init__(self, tool_config):
|
|
1545
|
+
super().__init__(tool_config)
|
|
1546
|
+
self.base_url = "https://www.nice.org.uk"
|
|
1547
|
+
self.session = requests.Session()
|
|
1548
|
+
self.session.headers.update(
|
|
1549
|
+
{
|
|
1550
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
|
1551
|
+
}
|
|
1552
|
+
)
|
|
1553
|
+
|
|
1554
|
+
def run(self, arguments):
|
|
1555
|
+
url = arguments.get("url", "")
|
|
1556
|
+
|
|
1557
|
+
if not url:
|
|
1558
|
+
return {"error": "URL parameter is required"}
|
|
1559
|
+
|
|
1560
|
+
# Ensure it's a NICE URL
|
|
1561
|
+
if "nice.org.uk" not in url:
|
|
1562
|
+
return {"error": "URL must be a NICE guideline URL (nice.org.uk)"}
|
|
1563
|
+
|
|
1564
|
+
return self._fetch_full_guideline(url)
|
|
1565
|
+
|
|
1566
|
+
def _fetch_full_guideline(self, url):
|
|
1567
|
+
"""Fetch complete guideline content from NICE page."""
|
|
1568
|
+
try:
|
|
1569
|
+
time.sleep(1) # Be respectful
|
|
1570
|
+
response = self.session.get(url, timeout=30)
|
|
1571
|
+
response.raise_for_status()
|
|
1572
|
+
|
|
1573
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
1574
|
+
|
|
1575
|
+
# Extract title
|
|
1576
|
+
title_elem = soup.find("h1") or soup.find("title")
|
|
1577
|
+
title = title_elem.get_text().strip() if title_elem else "Unknown Title"
|
|
1578
|
+
|
|
1579
|
+
# Extract guideline metadata
|
|
1580
|
+
metadata = {}
|
|
1581
|
+
|
|
1582
|
+
# Published date
|
|
1583
|
+
date_elem = soup.find("time") or soup.find(
|
|
1584
|
+
"span", {"class": "published-date"}
|
|
1585
|
+
)
|
|
1586
|
+
if date_elem:
|
|
1587
|
+
metadata["published_date"] = date_elem.get_text().strip()
|
|
1588
|
+
|
|
1589
|
+
# Guideline code (e.g., NG28)
|
|
1590
|
+
code_match = re.search(r"\(([A-Z]{2,3}\d+)\)", title)
|
|
1591
|
+
if code_match:
|
|
1592
|
+
metadata["guideline_code"] = code_match.group(1)
|
|
1593
|
+
|
|
1594
|
+
# Extract main content sections
|
|
1595
|
+
content_sections = []
|
|
1596
|
+
|
|
1597
|
+
# Find main content div - NICE uses specific structure
|
|
1598
|
+
main_content = (
|
|
1599
|
+
soup.find("div", {"class": "content"})
|
|
1600
|
+
or soup.find("main")
|
|
1601
|
+
or soup.find("article")
|
|
1602
|
+
)
|
|
1603
|
+
|
|
1604
|
+
if main_content:
|
|
1605
|
+
# Extract all headings and their content
|
|
1606
|
+
all_headings = main_content.find_all(["h1", "h2", "h3", "h4", "h5"])
|
|
1607
|
+
|
|
1608
|
+
for heading in all_headings:
|
|
1609
|
+
heading_text = heading.get_text().strip()
|
|
1610
|
+
|
|
1611
|
+
# Find content between this heading and the next
|
|
1612
|
+
content_parts = []
|
|
1613
|
+
current = heading.find_next_sibling()
|
|
1614
|
+
|
|
1615
|
+
while current and current.name not in [
|
|
1616
|
+
"h1",
|
|
1617
|
+
"h2",
|
|
1618
|
+
"h3",
|
|
1619
|
+
"h4",
|
|
1620
|
+
"h5",
|
|
1621
|
+
]:
|
|
1622
|
+
if current.name == "p":
|
|
1623
|
+
text = current.get_text().strip()
|
|
1624
|
+
if text:
|
|
1625
|
+
content_parts.append(text)
|
|
1626
|
+
elif current.name in ["ul", "ol"]:
|
|
1627
|
+
items = current.find_all("li")
|
|
1628
|
+
for li in items:
|
|
1629
|
+
content_parts.append(f" • {li.get_text().strip()}")
|
|
1630
|
+
elif current.name == "div":
|
|
1631
|
+
# Check if div has paragraphs
|
|
1632
|
+
paras = current.find_all("p", recursive=False)
|
|
1633
|
+
for p in paras:
|
|
1634
|
+
text = p.get_text().strip()
|
|
1635
|
+
if text:
|
|
1636
|
+
content_parts.append(text)
|
|
1637
|
+
|
|
1638
|
+
current = current.find_next_sibling()
|
|
1639
|
+
|
|
1640
|
+
if content_parts:
|
|
1641
|
+
content_sections.append(
|
|
1642
|
+
{
|
|
1643
|
+
"heading": heading_text,
|
|
1644
|
+
"content": "\n\n".join(content_parts),
|
|
1645
|
+
}
|
|
1646
|
+
)
|
|
1647
|
+
|
|
1648
|
+
# If no sections found with headings, extract all paragraphs
|
|
1649
|
+
if not content_sections:
|
|
1650
|
+
all_paragraphs = main_content.find_all("p")
|
|
1651
|
+
all_text = "\n\n".join(
|
|
1652
|
+
[
|
|
1653
|
+
p.get_text().strip()
|
|
1654
|
+
for p in all_paragraphs
|
|
1655
|
+
if p.get_text().strip()
|
|
1656
|
+
]
|
|
1657
|
+
)
|
|
1658
|
+
if all_text:
|
|
1659
|
+
content_sections.append(
|
|
1660
|
+
{"heading": "Content", "content": all_text}
|
|
1661
|
+
)
|
|
1662
|
+
|
|
1663
|
+
# Compile full text
|
|
1664
|
+
full_text_parts = []
|
|
1665
|
+
for section in content_sections:
|
|
1666
|
+
if section["heading"]:
|
|
1667
|
+
full_text_parts.append(f"## {section['heading']}")
|
|
1668
|
+
full_text_parts.append(section["content"])
|
|
1669
|
+
|
|
1670
|
+
full_text = "\n\n".join(full_text_parts)
|
|
1671
|
+
|
|
1672
|
+
# Extract recommendations specifically
|
|
1673
|
+
recommendations = []
|
|
1674
|
+
rec_sections = soup.find_all(
|
|
1675
|
+
["div", "section"], class_=re.compile(r"recommendation")
|
|
1676
|
+
)
|
|
1677
|
+
for rec in rec_sections[:20]: # Limit to first 20 recommendations
|
|
1678
|
+
rec_text = rec.get_text().strip()
|
|
1679
|
+
if rec_text and len(rec_text) > 20:
|
|
1680
|
+
recommendations.append(rec_text)
|
|
1681
|
+
|
|
1682
|
+
return {
|
|
1683
|
+
"url": url,
|
|
1684
|
+
"title": title,
|
|
1685
|
+
"metadata": metadata,
|
|
1686
|
+
"full_text": full_text,
|
|
1687
|
+
"full_text_length": len(full_text),
|
|
1688
|
+
"sections_count": len(content_sections),
|
|
1689
|
+
"recommendations": recommendations[:20] if recommendations else None,
|
|
1690
|
+
"recommendations_count": len(recommendations) if recommendations else 0,
|
|
1691
|
+
"source": "NICE",
|
|
1692
|
+
"content_type": "full_guideline",
|
|
1693
|
+
"success": len(full_text) > 500,
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
except requests.exceptions.RequestException as e:
|
|
1697
|
+
return {"error": f"Failed to fetch NICE guideline: {str(e)}", "url": url}
|
|
1698
|
+
except Exception as e:
|
|
1699
|
+
return {"error": f"Error parsing NICE guideline: {str(e)}", "url": url}
|
|
1700
|
+
|
|
1701
|
+
|
|
1702
|
+
@register_tool()
|
|
1703
|
+
class WHOGuidelineFullTextTool(BaseTool):
|
|
1704
|
+
"""
|
|
1705
|
+
Fetch full text content from WHO guideline pages.
|
|
1706
|
+
Takes a WHO publication URL and extracts content or PDF download link.
|
|
1707
|
+
"""
|
|
1708
|
+
|
|
1709
|
+
def __init__(self, tool_config):
|
|
1710
|
+
super().__init__(tool_config)
|
|
1711
|
+
self.base_url = "https://www.who.int"
|
|
1712
|
+
self.session = requests.Session()
|
|
1713
|
+
self.session.headers.update(
|
|
1714
|
+
{
|
|
1715
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
|
1716
|
+
}
|
|
1717
|
+
)
|
|
1718
|
+
|
|
1719
|
+
def run(self, arguments):
|
|
1720
|
+
url = arguments.get("url", "")
|
|
1721
|
+
|
|
1722
|
+
if not url:
|
|
1723
|
+
return {"error": "URL parameter is required"}
|
|
1724
|
+
|
|
1725
|
+
# Ensure it's a WHO URL
|
|
1726
|
+
if "who.int" not in url:
|
|
1727
|
+
return {"error": "URL must be a WHO publication URL (who.int)"}
|
|
1728
|
+
|
|
1729
|
+
return self._fetch_who_guideline(url)
|
|
1730
|
+
|
|
1731
|
+
def _fetch_who_guideline(self, url):
|
|
1732
|
+
"""Fetch WHO guideline content and PDF link."""
|
|
1733
|
+
try:
|
|
1734
|
+
time.sleep(1) # Be respectful
|
|
1735
|
+
response = self.session.get(url, timeout=30)
|
|
1736
|
+
response.raise_for_status()
|
|
1737
|
+
|
|
1738
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
1739
|
+
|
|
1740
|
+
# Extract title
|
|
1741
|
+
title_elem = soup.find("h1") or soup.find("title")
|
|
1742
|
+
title = title_elem.get_text().strip() if title_elem else "Unknown Title"
|
|
1743
|
+
|
|
1744
|
+
# Extract metadata
|
|
1745
|
+
metadata = {}
|
|
1746
|
+
|
|
1747
|
+
# Publication date
|
|
1748
|
+
date_elem = soup.find("time") or soup.find(
|
|
1749
|
+
"span", class_=re.compile(r"date")
|
|
1750
|
+
)
|
|
1751
|
+
if date_elem:
|
|
1752
|
+
metadata["published_date"] = date_elem.get_text().strip()
|
|
1753
|
+
|
|
1754
|
+
# ISBN
|
|
1755
|
+
isbn_elem = soup.find(text=re.compile(r"ISBN"))
|
|
1756
|
+
if isbn_elem:
|
|
1757
|
+
isbn_match = re.search(r"ISBN[:\s]*([\d\-]+)", isbn_elem)
|
|
1758
|
+
if isbn_match:
|
|
1759
|
+
metadata["isbn"] = isbn_match.group(1)
|
|
1760
|
+
|
|
1761
|
+
# Find PDF download link
|
|
1762
|
+
pdf_link = None
|
|
1763
|
+
pdf_links = soup.find_all("a", href=re.compile(r"\.pdf$", re.I))
|
|
1764
|
+
|
|
1765
|
+
for link in pdf_links:
|
|
1766
|
+
href = link.get("href", "")
|
|
1767
|
+
if href:
|
|
1768
|
+
# Make absolute URL
|
|
1769
|
+
if href.startswith("http"):
|
|
1770
|
+
pdf_link = href
|
|
1771
|
+
elif href.startswith("//"):
|
|
1772
|
+
pdf_link = "https:" + href
|
|
1773
|
+
elif href.startswith("/"):
|
|
1774
|
+
pdf_link = self.base_url + href
|
|
1775
|
+
else:
|
|
1776
|
+
pdf_link = self.base_url + "/" + href
|
|
1777
|
+
|
|
1778
|
+
# Prefer full document over excerpts
|
|
1779
|
+
link_text = link.get_text().lower()
|
|
1780
|
+
if "full" in link_text or "complete" in link_text:
|
|
1781
|
+
break
|
|
1782
|
+
|
|
1783
|
+
# Extract overview/description
|
|
1784
|
+
overview = ""
|
|
1785
|
+
overview_section = soup.find(
|
|
1786
|
+
"div", class_=re.compile(r"overview|description|summary")
|
|
1787
|
+
) or soup.find(
|
|
1788
|
+
"section", class_=re.compile(r"overview|description|summary")
|
|
1789
|
+
)
|
|
1790
|
+
|
|
1791
|
+
if overview_section:
|
|
1792
|
+
paragraphs = overview_section.find_all("p")
|
|
1793
|
+
overview = "\n\n".join(
|
|
1794
|
+
[p.get_text().strip() for p in paragraphs if p.get_text().strip()]
|
|
1795
|
+
)
|
|
1796
|
+
|
|
1797
|
+
# Extract key facts/highlights
|
|
1798
|
+
key_facts = []
|
|
1799
|
+
facts_section = soup.find(
|
|
1800
|
+
["div", "section"], class_=re.compile(r"key.*facts|highlights")
|
|
1801
|
+
)
|
|
1802
|
+
if facts_section:
|
|
1803
|
+
items = facts_section.find_all("li")
|
|
1804
|
+
key_facts = [
|
|
1805
|
+
li.get_text().strip() for li in items if li.get_text().strip()
|
|
1806
|
+
]
|
|
1807
|
+
|
|
1808
|
+
# Try to extract main content
|
|
1809
|
+
main_content = ""
|
|
1810
|
+
content_div = (
|
|
1811
|
+
soup.find("div", {"class": "content"})
|
|
1812
|
+
or soup.find("main")
|
|
1813
|
+
or soup.find("article")
|
|
1814
|
+
)
|
|
1815
|
+
|
|
1816
|
+
if content_div:
|
|
1817
|
+
# Get all paragraphs
|
|
1818
|
+
paragraphs = content_div.find_all("p")
|
|
1819
|
+
content_parts = []
|
|
1820
|
+
for p in paragraphs[:50]: # Limit to avoid too much content
|
|
1821
|
+
text = p.get_text().strip()
|
|
1822
|
+
if len(text) > 30: # Skip very short paragraphs
|
|
1823
|
+
content_parts.append(text)
|
|
1824
|
+
|
|
1825
|
+
main_content = "\n\n".join(content_parts)
|
|
1826
|
+
|
|
1827
|
+
return {
|
|
1828
|
+
"url": url,
|
|
1829
|
+
"title": title,
|
|
1830
|
+
"metadata": metadata,
|
|
1831
|
+
"overview": overview,
|
|
1832
|
+
"main_content": main_content,
|
|
1833
|
+
"content_length": len(main_content),
|
|
1834
|
+
"key_facts": key_facts if key_facts else None,
|
|
1835
|
+
"pdf_download_url": pdf_link,
|
|
1836
|
+
"has_pdf": pdf_link is not None,
|
|
1837
|
+
"source": "WHO",
|
|
1838
|
+
"content_type": "guideline_page",
|
|
1839
|
+
"success": len(overview) > 100 or pdf_link is not None,
|
|
1840
|
+
"note": (
|
|
1841
|
+
"Full text available as PDF download"
|
|
1842
|
+
if pdf_link
|
|
1843
|
+
else "Limited web content available"
|
|
1844
|
+
),
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1847
|
+
except requests.exceptions.RequestException as e:
|
|
1848
|
+
return {"error": f"Failed to fetch WHO guideline: {str(e)}", "url": url}
|
|
1849
|
+
except Exception as e:
|
|
1850
|
+
return {"error": f"Error parsing WHO guideline: {str(e)}", "url": url}
|
|
1851
|
+
|
|
1852
|
+
|
|
1853
|
+
@register_tool()
|
|
1854
|
+
class GINGuidelinesTool(BaseTool):
|
|
1855
|
+
"""
|
|
1856
|
+
Guidelines International Network (GIN) Guidelines Search Tool.
|
|
1857
|
+
Searches the global guidelines database with 6400+ guidelines from various organizations.
|
|
1858
|
+
"""
|
|
1859
|
+
|
|
1860
|
+
def __init__(self, tool_config):
|
|
1861
|
+
super().__init__(tool_config)
|
|
1862
|
+
self.base_url = "https://www.g-i-n.net"
|
|
1863
|
+
self.search_url = f"{self.base_url}/library/international-guidelines-library"
|
|
1864
|
+
self.session = requests.Session()
|
|
1865
|
+
self.session.headers.update(
|
|
1866
|
+
{
|
|
1867
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
1868
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
1869
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
1870
|
+
"Accept-Encoding": "gzip, deflate",
|
|
1871
|
+
"Connection": "keep-alive",
|
|
1872
|
+
"Upgrade-Insecure-Requests": "1",
|
|
1873
|
+
}
|
|
1874
|
+
)
|
|
1875
|
+
|
|
1876
|
+
def run(self, arguments):
|
|
1877
|
+
query = arguments.get("query", "")
|
|
1878
|
+
limit = arguments.get("limit", 10)
|
|
1879
|
+
|
|
1880
|
+
if not query:
|
|
1881
|
+
return {"error": "Query parameter is required"}
|
|
1882
|
+
|
|
1883
|
+
return self._search_gin_guidelines(query, limit)
|
|
1884
|
+
|
|
1885
|
+
def _search_gin_guidelines(self, query, limit):
|
|
1886
|
+
"""Search GIN guidelines using web scraping."""
|
|
1887
|
+
try:
|
|
1888
|
+
time.sleep(1) # Be respectful
|
|
1889
|
+
|
|
1890
|
+
# Try to search GIN guidelines
|
|
1891
|
+
try:
|
|
1892
|
+
# GIN search typically uses form parameters
|
|
1893
|
+
search_params = {"search": query, "type": "guideline", "limit": limit}
|
|
1894
|
+
|
|
1895
|
+
response = self.session.get(
|
|
1896
|
+
self.search_url, params=search_params, timeout=30
|
|
1897
|
+
)
|
|
1898
|
+
response.raise_for_status()
|
|
1899
|
+
|
|
1900
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
1901
|
+
|
|
1902
|
+
# Find guideline results - common selectors for guideline databases
|
|
1903
|
+
guidelines = []
|
|
1904
|
+
|
|
1905
|
+
# Try different selectors for guideline results
|
|
1906
|
+
result_selectors = [
|
|
1907
|
+
"div.guideline-item",
|
|
1908
|
+
"div.search-result",
|
|
1909
|
+
"div.result-item",
|
|
1910
|
+
"article.guideline",
|
|
1911
|
+
"div.item",
|
|
1912
|
+
"li.guideline",
|
|
1913
|
+
]
|
|
1914
|
+
|
|
1915
|
+
results = []
|
|
1916
|
+
for selector in result_selectors:
|
|
1917
|
+
results = soup.select(selector)
|
|
1918
|
+
if results:
|
|
1919
|
+
break
|
|
1920
|
+
|
|
1921
|
+
if not results:
|
|
1922
|
+
# Fallback: look for any div with guideline-related content
|
|
1923
|
+
results = soup.find_all(
|
|
1924
|
+
"div",
|
|
1925
|
+
class_=lambda x: x
|
|
1926
|
+
and any(
|
|
1927
|
+
keyword in x.lower()
|
|
1928
|
+
for keyword in ["guideline", "result", "item", "card"]
|
|
1929
|
+
),
|
|
1930
|
+
)
|
|
1931
|
+
|
|
1932
|
+
for result in results[:limit]:
|
|
1933
|
+
try:
|
|
1934
|
+
# Extract title
|
|
1935
|
+
title_elem = (
|
|
1936
|
+
result.find("h3")
|
|
1937
|
+
or result.find("h2")
|
|
1938
|
+
or result.find("a", class_="title")
|
|
1939
|
+
or result.find("a")
|
|
1940
|
+
)
|
|
1941
|
+
if not title_elem:
|
|
1942
|
+
continue
|
|
1943
|
+
|
|
1944
|
+
title = title_elem.get_text().strip()
|
|
1945
|
+
if not title or len(title) < 10:
|
|
1946
|
+
continue
|
|
1947
|
+
|
|
1948
|
+
# Extract URL
|
|
1949
|
+
link_elem = result.find("a", href=True)
|
|
1950
|
+
if not link_elem:
|
|
1951
|
+
continue
|
|
1952
|
+
|
|
1953
|
+
url = link_elem.get("href", "")
|
|
1954
|
+
if url.startswith("/"):
|
|
1955
|
+
url = self.base_url + url
|
|
1956
|
+
elif not url.startswith("http"):
|
|
1957
|
+
continue
|
|
1958
|
+
|
|
1959
|
+
# Extract description/summary
|
|
1960
|
+
desc_elem = (
|
|
1961
|
+
result.find("p")
|
|
1962
|
+
or result.find("div", class_="description")
|
|
1963
|
+
or result.find("div", class_="summary")
|
|
1964
|
+
)
|
|
1965
|
+
description = desc_elem.get_text().strip() if desc_elem else ""
|
|
1966
|
+
|
|
1967
|
+
# Extract organization
|
|
1968
|
+
org_elem = (
|
|
1969
|
+
result.find("span", class_="organization")
|
|
1970
|
+
or result.find("div", class_="org")
|
|
1971
|
+
or result.find("cite")
|
|
1972
|
+
)
|
|
1973
|
+
organization = (
|
|
1974
|
+
org_elem.get_text().strip()
|
|
1975
|
+
if org_elem
|
|
1976
|
+
else "GIN Member Organization"
|
|
1977
|
+
)
|
|
1978
|
+
|
|
1979
|
+
# Extract date
|
|
1980
|
+
date_elem = (
|
|
1981
|
+
result.find("time")
|
|
1982
|
+
or result.find("span", class_="date")
|
|
1983
|
+
or result.find("div", class_="date")
|
|
1984
|
+
)
|
|
1985
|
+
date = date_elem.get_text().strip() if date_elem else ""
|
|
1986
|
+
|
|
1987
|
+
# Extract content from the guideline page
|
|
1988
|
+
content = self._extract_guideline_content(url)
|
|
1989
|
+
|
|
1990
|
+
guidelines.append(
|
|
1991
|
+
{
|
|
1992
|
+
"title": title,
|
|
1993
|
+
"url": url,
|
|
1994
|
+
"description": description,
|
|
1995
|
+
"content": content,
|
|
1996
|
+
"date": date,
|
|
1997
|
+
"source": "GIN",
|
|
1998
|
+
"organization": organization,
|
|
1999
|
+
"is_guideline": True,
|
|
2000
|
+
"official": True,
|
|
2001
|
+
}
|
|
2002
|
+
)
|
|
2003
|
+
|
|
2004
|
+
except Exception:
|
|
2005
|
+
continue
|
|
2006
|
+
|
|
2007
|
+
if guidelines:
|
|
2008
|
+
return guidelines
|
|
2009
|
+
|
|
2010
|
+
except requests.exceptions.RequestException as e:
|
|
2011
|
+
print(f"GIN website access failed: {e}, trying fallback search...")
|
|
2012
|
+
|
|
2013
|
+
# Fallback: Return sample guidelines based on query
|
|
2014
|
+
return self._get_fallback_gin_guidelines(query, limit)
|
|
2015
|
+
|
|
2016
|
+
except Exception as e:
|
|
2017
|
+
return {
|
|
2018
|
+
"error": f"Error processing GIN guidelines: {str(e)}",
|
|
2019
|
+
"source": "GIN",
|
|
2020
|
+
}
|
|
2021
|
+
|
|
2022
|
+
def _get_fallback_gin_guidelines(self, query, limit):
|
|
2023
|
+
"""Provide fallback guidelines when direct access fails."""
|
|
2024
|
+
# This would contain sample guidelines based on common queries
|
|
2025
|
+
# For now, return a message indicating the issue
|
|
2026
|
+
return [
|
|
2027
|
+
{
|
|
2028
|
+
"title": f"GIN Guidelines Search for '{query}'",
|
|
2029
|
+
"url": self.search_url,
|
|
2030
|
+
"description": "GIN guidelines database access temporarily unavailable. Please try again later or visit the GIN website directly.",
|
|
2031
|
+
"content": "The Guidelines International Network (GIN) maintains the world's largest database of clinical guidelines with over 6400 guidelines from various organizations worldwide.",
|
|
2032
|
+
"date": "",
|
|
2033
|
+
"source": "GIN",
|
|
2034
|
+
"organization": "Guidelines International Network",
|
|
2035
|
+
"is_guideline": False,
|
|
2036
|
+
"official": True,
|
|
2037
|
+
"is_placeholder": True,
|
|
2038
|
+
"note": "Direct access to GIN database failed. Please visit g-i-n.net for full access.",
|
|
2039
|
+
}
|
|
2040
|
+
]
|
|
2041
|
+
|
|
2042
|
+
def _extract_guideline_content(self, url):
|
|
2043
|
+
"""Extract actual content from a guideline URL."""
|
|
2044
|
+
try:
|
|
2045
|
+
time.sleep(0.5) # Be respectful
|
|
2046
|
+
response = self.session.get(url, timeout=15)
|
|
2047
|
+
response.raise_for_status()
|
|
2048
|
+
|
|
2049
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
2050
|
+
|
|
2051
|
+
# Extract main content
|
|
2052
|
+
content_selectors = [
|
|
2053
|
+
"main",
|
|
2054
|
+
".content",
|
|
2055
|
+
".article-content",
|
|
2056
|
+
".guideline-content",
|
|
2057
|
+
"article",
|
|
2058
|
+
".main-content",
|
|
2059
|
+
]
|
|
2060
|
+
|
|
2061
|
+
content_text = ""
|
|
2062
|
+
for selector in content_selectors:
|
|
2063
|
+
content_elem = soup.select_one(selector)
|
|
2064
|
+
if content_elem:
|
|
2065
|
+
# Get all text content
|
|
2066
|
+
paragraphs = content_elem.find_all("p")
|
|
2067
|
+
content_parts = []
|
|
2068
|
+
for p in paragraphs:
|
|
2069
|
+
text = p.get_text().strip()
|
|
2070
|
+
if len(text) > 20: # Skip very short paragraphs
|
|
2071
|
+
content_parts.append(text)
|
|
2072
|
+
|
|
2073
|
+
if content_parts:
|
|
2074
|
+
content_text = "\n\n".join(
|
|
2075
|
+
content_parts[:10]
|
|
2076
|
+
) # Limit to first 10 paragraphs
|
|
2077
|
+
break
|
|
2078
|
+
|
|
2079
|
+
# If no main content found, try to get any meaningful text
|
|
2080
|
+
if not content_text:
|
|
2081
|
+
all_text = soup.get_text()
|
|
2082
|
+
# Clean up the text
|
|
2083
|
+
lines = [line.strip() for line in all_text.split("\n") if line.strip()]
|
|
2084
|
+
content_text = "\n".join(lines[:20]) # First 20 meaningful lines
|
|
2085
|
+
|
|
2086
|
+
return content_text[:2000] # Limit content length
|
|
2087
|
+
|
|
2088
|
+
except Exception as e:
|
|
2089
|
+
return f"Error extracting content: {str(e)}"
|
|
2090
|
+
|
|
2091
|
+
|
|
2092
|
+
@register_tool()
|
|
2093
|
+
class CMAGuidelinesTool(BaseTool):
|
|
2094
|
+
"""
|
|
2095
|
+
Canadian Medical Association (CMA) Infobase Guidelines Search Tool.
|
|
2096
|
+
Searches the CMA Infobase with 1200+ Canadian clinical practice guidelines.
|
|
2097
|
+
"""
|
|
2098
|
+
|
|
2099
|
+
def __init__(self, tool_config):
|
|
2100
|
+
super().__init__(tool_config)
|
|
2101
|
+
self.base_url = "https://joulecma.ca"
|
|
2102
|
+
self.search_url = f"{self.base_url}/infobase"
|
|
2103
|
+
self.session = requests.Session()
|
|
2104
|
+
self.session.headers.update(
|
|
2105
|
+
{
|
|
2106
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
2107
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
2108
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
2109
|
+
"Accept-Encoding": "gzip, deflate",
|
|
2110
|
+
"Connection": "keep-alive",
|
|
2111
|
+
"Upgrade-Insecure-Requests": "1",
|
|
2112
|
+
}
|
|
2113
|
+
)
|
|
2114
|
+
|
|
2115
|
+
def run(self, arguments):
|
|
2116
|
+
query = arguments.get("query", "")
|
|
2117
|
+
limit = arguments.get("limit", 10)
|
|
2118
|
+
|
|
2119
|
+
if not query:
|
|
2120
|
+
return {"error": "Query parameter is required"}
|
|
2121
|
+
|
|
2122
|
+
return self._search_cma_guidelines(query, limit)
|
|
2123
|
+
|
|
2124
|
+
def _search_cma_guidelines(self, query, limit):
|
|
2125
|
+
"""Search CMA Infobase guidelines using web scraping."""
|
|
2126
|
+
try:
|
|
2127
|
+
time.sleep(1) # Be respectful
|
|
2128
|
+
|
|
2129
|
+
# Try to search CMA Infobase
|
|
2130
|
+
try:
|
|
2131
|
+
# CMA search typically uses form parameters
|
|
2132
|
+
search_params = {"search": query, "type": "guideline", "limit": limit}
|
|
2133
|
+
|
|
2134
|
+
response = self.session.get(
|
|
2135
|
+
self.search_url, params=search_params, timeout=30
|
|
2136
|
+
)
|
|
2137
|
+
response.raise_for_status()
|
|
2138
|
+
|
|
2139
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
2140
|
+
|
|
2141
|
+
# Find guideline results
|
|
2142
|
+
guidelines = []
|
|
2143
|
+
|
|
2144
|
+
# Try different selectors for guideline results
|
|
2145
|
+
result_selectors = [
|
|
2146
|
+
"div.guideline-item",
|
|
2147
|
+
"div.search-result",
|
|
2148
|
+
"div.result-item",
|
|
2149
|
+
"article.guideline",
|
|
2150
|
+
"div.item",
|
|
2151
|
+
"li.guideline",
|
|
2152
|
+
]
|
|
2153
|
+
|
|
2154
|
+
results = []
|
|
2155
|
+
for selector in result_selectors:
|
|
2156
|
+
results = soup.select(selector)
|
|
2157
|
+
if results:
|
|
2158
|
+
break
|
|
2159
|
+
|
|
2160
|
+
if not results:
|
|
2161
|
+
# Fallback: look for any div with guideline-related content
|
|
2162
|
+
results = soup.find_all(
|
|
2163
|
+
"div",
|
|
2164
|
+
class_=lambda x: x
|
|
2165
|
+
and any(
|
|
2166
|
+
keyword in x.lower()
|
|
2167
|
+
for keyword in ["guideline", "result", "item", "card"]
|
|
2168
|
+
),
|
|
2169
|
+
)
|
|
2170
|
+
|
|
2171
|
+
for result in results[:limit]:
|
|
2172
|
+
try:
|
|
2173
|
+
# Extract title
|
|
2174
|
+
title_elem = (
|
|
2175
|
+
result.find("h3")
|
|
2176
|
+
or result.find("h2")
|
|
2177
|
+
or result.find("a", class_="title")
|
|
2178
|
+
or result.find("a")
|
|
2179
|
+
)
|
|
2180
|
+
if not title_elem:
|
|
2181
|
+
continue
|
|
2182
|
+
|
|
2183
|
+
title = title_elem.get_text().strip()
|
|
2184
|
+
if not title or len(title) < 10:
|
|
2185
|
+
continue
|
|
2186
|
+
|
|
2187
|
+
# Extract URL
|
|
2188
|
+
link_elem = result.find("a", href=True)
|
|
2189
|
+
if not link_elem:
|
|
2190
|
+
continue
|
|
2191
|
+
|
|
2192
|
+
url = link_elem.get("href", "")
|
|
2193
|
+
if url.startswith("/"):
|
|
2194
|
+
url = self.base_url + url
|
|
2195
|
+
elif not url.startswith("http"):
|
|
2196
|
+
continue
|
|
2197
|
+
|
|
2198
|
+
# Extract description/summary
|
|
2199
|
+
desc_elem = (
|
|
2200
|
+
result.find("p")
|
|
2201
|
+
or result.find("div", class_="description")
|
|
2202
|
+
or result.find("div", class_="summary")
|
|
2203
|
+
)
|
|
2204
|
+
description = desc_elem.get_text().strip() if desc_elem else ""
|
|
2205
|
+
|
|
2206
|
+
# Extract organization
|
|
2207
|
+
org_elem = (
|
|
2208
|
+
result.find("span", class_="organization")
|
|
2209
|
+
or result.find("div", class_="org")
|
|
2210
|
+
or result.find("cite")
|
|
2211
|
+
)
|
|
2212
|
+
organization = (
|
|
2213
|
+
org_elem.get_text().strip()
|
|
2214
|
+
if org_elem
|
|
2215
|
+
else "Canadian Medical Association"
|
|
2216
|
+
)
|
|
2217
|
+
|
|
2218
|
+
# Extract date
|
|
2219
|
+
date_elem = (
|
|
2220
|
+
result.find("time")
|
|
2221
|
+
or result.find("span", class_="date")
|
|
2222
|
+
or result.find("div", class_="date")
|
|
2223
|
+
)
|
|
2224
|
+
date = date_elem.get_text().strip() if date_elem else ""
|
|
2225
|
+
|
|
2226
|
+
# Extract content from the guideline page
|
|
2227
|
+
content = self._extract_guideline_content(url)
|
|
2228
|
+
|
|
2229
|
+
guidelines.append(
|
|
2230
|
+
{
|
|
2231
|
+
"title": title,
|
|
2232
|
+
"url": url,
|
|
2233
|
+
"description": description,
|
|
2234
|
+
"content": content,
|
|
2235
|
+
"date": date,
|
|
2236
|
+
"source": "CMA",
|
|
2237
|
+
"organization": organization,
|
|
2238
|
+
"is_guideline": True,
|
|
2239
|
+
"official": True,
|
|
2240
|
+
}
|
|
2241
|
+
)
|
|
2242
|
+
|
|
2243
|
+
except Exception:
|
|
2244
|
+
continue
|
|
2245
|
+
|
|
2246
|
+
if guidelines:
|
|
2247
|
+
return guidelines
|
|
2248
|
+
|
|
2249
|
+
except requests.exceptions.RequestException as e:
|
|
2250
|
+
print(f"CMA Infobase access failed: {e}, trying fallback search...")
|
|
2251
|
+
|
|
2252
|
+
# Fallback: Return sample guidelines based on query
|
|
2253
|
+
return self._get_fallback_cma_guidelines(query, limit)
|
|
2254
|
+
|
|
2255
|
+
except Exception as e:
|
|
2256
|
+
return {
|
|
2257
|
+
"error": f"Error processing CMA guidelines: {str(e)}",
|
|
2258
|
+
"source": "CMA",
|
|
2259
|
+
}
|
|
2260
|
+
|
|
2261
|
+
def _get_fallback_cma_guidelines(self, query, limit):
|
|
2262
|
+
"""Provide fallback guidelines when direct access fails."""
|
|
2263
|
+
# This would contain sample guidelines based on common queries
|
|
2264
|
+
# For now, return a message indicating the issue
|
|
2265
|
+
return [
|
|
2266
|
+
{
|
|
2267
|
+
"title": f"CMA Infobase Guidelines Search for '{query}'",
|
|
2268
|
+
"url": self.search_url,
|
|
2269
|
+
"description": "CMA Infobase access temporarily unavailable. Please try again later or visit the CMA website directly.",
|
|
2270
|
+
"content": "The Canadian Medical Association Infobase contains over 1200 evidence-based clinical practice guidelines developed or endorsed by Canadian healthcare organizations.",
|
|
2271
|
+
"date": "",
|
|
2272
|
+
"source": "CMA",
|
|
2273
|
+
"organization": "Canadian Medical Association",
|
|
2274
|
+
"is_guideline": False,
|
|
2275
|
+
"official": True,
|
|
2276
|
+
"is_placeholder": True,
|
|
2277
|
+
"note": "Direct access to CMA Infobase failed. Please visit joulecma.ca/infobase for full access.",
|
|
2278
|
+
}
|
|
2279
|
+
]
|
|
2280
|
+
|
|
2281
|
+
def _extract_guideline_content(self, url):
|
|
2282
|
+
"""Extract actual content from a guideline URL."""
|
|
2283
|
+
try:
|
|
2284
|
+
time.sleep(0.5) # Be respectful
|
|
2285
|
+
response = self.session.get(url, timeout=15)
|
|
2286
|
+
response.raise_for_status()
|
|
2287
|
+
|
|
2288
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
2289
|
+
|
|
2290
|
+
# Extract main content
|
|
2291
|
+
content_selectors = [
|
|
2292
|
+
"main",
|
|
2293
|
+
".content",
|
|
2294
|
+
".article-content",
|
|
2295
|
+
".guideline-content",
|
|
2296
|
+
"article",
|
|
2297
|
+
".main-content",
|
|
2298
|
+
]
|
|
2299
|
+
|
|
2300
|
+
content_text = ""
|
|
2301
|
+
for selector in content_selectors:
|
|
2302
|
+
content_elem = soup.select_one(selector)
|
|
2303
|
+
if content_elem:
|
|
2304
|
+
# Get all text content
|
|
2305
|
+
paragraphs = content_elem.find_all("p")
|
|
2306
|
+
content_parts = []
|
|
2307
|
+
for p in paragraphs:
|
|
2308
|
+
text = p.get_text().strip()
|
|
2309
|
+
if len(text) > 20: # Skip very short paragraphs
|
|
2310
|
+
content_parts.append(text)
|
|
2311
|
+
|
|
2312
|
+
if content_parts:
|
|
2313
|
+
content_text = "\n\n".join(
|
|
2314
|
+
content_parts[:10]
|
|
2315
|
+
) # Limit to first 10 paragraphs
|
|
2316
|
+
break
|
|
2317
|
+
|
|
2318
|
+
# If no main content found, try to get any meaningful text
|
|
2319
|
+
if not content_text:
|
|
2320
|
+
all_text = soup.get_text()
|
|
2321
|
+
# Clean up the text
|
|
2322
|
+
lines = [line.strip() for line in all_text.split("\n") if line.strip()]
|
|
2323
|
+
content_text = "\n".join(lines[:20]) # First 20 meaningful lines
|
|
2324
|
+
|
|
2325
|
+
return content_text[:2000] # Limit content length
|
|
2326
|
+
|
|
2327
|
+
except Exception as e:
|
|
2328
|
+
return f"Error extracting content: {str(e)}"
|