valuesets 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valuesets might be problematic. Click here for more details.
- valuesets/__init__.py +7 -0
- valuesets/_version.py +8 -0
- valuesets/datamodel/valuesets.py +13796 -0
- valuesets/datamodel/valuesets_dataclass.py +24503 -0
- valuesets/datamodel/valuesets_pydantic.py +13796 -0
- valuesets/enums/__init__.py +590 -0
- valuesets/enums/academic/__init__.py +1 -0
- valuesets/enums/academic/research.py +559 -0
- valuesets/enums/analytical_chemistry/__init__.py +1 -0
- valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
- valuesets/enums/bio/__init__.py +1 -0
- valuesets/enums/bio/biological_colors.py +238 -0
- valuesets/enums/bio/cell_cycle.py +180 -0
- valuesets/enums/bio/currency_chemicals.py +52 -0
- valuesets/enums/bio/developmental_stages.py +103 -0
- valuesets/enums/bio/genome_features.py +182 -0
- valuesets/enums/bio/genomics.py +91 -0
- valuesets/enums/bio/go_aspect.py +32 -0
- valuesets/enums/bio/go_causality.py +58 -0
- valuesets/enums/bio/go_evidence.py +129 -0
- valuesets/enums/bio/human_developmental_stages.py +62 -0
- valuesets/enums/bio/insdc_geographic_locations.py +591 -0
- valuesets/enums/bio/insdc_missing_values.py +49 -0
- valuesets/enums/bio/lipid_categories.py +67 -0
- valuesets/enums/bio/mouse_developmental_stages.py +62 -0
- valuesets/enums/bio/plant_biology.py +86 -0
- valuesets/enums/bio/plant_developmental_stages.py +54 -0
- valuesets/enums/bio/plant_sex.py +81 -0
- valuesets/enums/bio/protein_evidence.py +61 -0
- valuesets/enums/bio/proteomics_standards.py +123 -0
- valuesets/enums/bio/psi_mi.py +306 -0
- valuesets/enums/bio/relationship_to_oxygen.py +37 -0
- valuesets/enums/bio/sequence_alphabets.py +449 -0
- valuesets/enums/bio/sequence_chemistry.py +357 -0
- valuesets/enums/bio/sequencing_platforms.py +302 -0
- valuesets/enums/bio/structural_biology.py +320 -0
- valuesets/enums/bio/taxonomy.py +238 -0
- valuesets/enums/bio/trophic_levels.py +85 -0
- valuesets/enums/bio/uniprot_species.py +344 -0
- valuesets/enums/bio/viral_genome_types.py +47 -0
- valuesets/enums/bioprocessing/__init__.py +1 -0
- valuesets/enums/bioprocessing/scale_up.py +249 -0
- valuesets/enums/business/__init__.py +1 -0
- valuesets/enums/business/human_resources.py +275 -0
- valuesets/enums/business/industry_classifications.py +181 -0
- valuesets/enums/business/management_operations.py +228 -0
- valuesets/enums/business/organizational_structures.py +236 -0
- valuesets/enums/business/quality_management.py +181 -0
- valuesets/enums/business/supply_chain.py +232 -0
- valuesets/enums/chemistry/__init__.py +1 -0
- valuesets/enums/chemistry/chemical_entities.py +315 -0
- valuesets/enums/chemistry/reaction_directionality.py +65 -0
- valuesets/enums/chemistry/reactions.py +256 -0
- valuesets/enums/clinical/__init__.py +1 -0
- valuesets/enums/clinical/nih_demographics.py +177 -0
- valuesets/enums/clinical/phenopackets.py +254 -0
- valuesets/enums/common_value_sets.py +8791 -0
- valuesets/enums/computing/__init__.py +1 -0
- valuesets/enums/computing/file_formats.py +294 -0
- valuesets/enums/computing/maturity_levels.py +196 -0
- valuesets/enums/computing/mime_types.py +227 -0
- valuesets/enums/confidence_levels.py +168 -0
- valuesets/enums/contributor.py +30 -0
- valuesets/enums/core.py +42 -0
- valuesets/enums/data/__init__.py +1 -0
- valuesets/enums/data/data_absent_reason.py +53 -0
- valuesets/enums/data_science/__init__.py +1 -0
- valuesets/enums/data_science/binary_classification.py +87 -0
- valuesets/enums/data_science/emotion_classification.py +66 -0
- valuesets/enums/data_science/priority_severity.py +73 -0
- valuesets/enums/data_science/quality_control.py +46 -0
- valuesets/enums/data_science/sentiment_analysis.py +50 -0
- valuesets/enums/data_science/text_classification.py +97 -0
- valuesets/enums/demographics.py +206 -0
- valuesets/enums/ecological_interactions.py +151 -0
- valuesets/enums/energy/__init__.py +1 -0
- valuesets/enums/energy/energy.py +343 -0
- valuesets/enums/energy/fossil_fuels.py +29 -0
- valuesets/enums/energy/nuclear/__init__.py +1 -0
- valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
- valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
- valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
- valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
- valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
- valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
- valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
- valuesets/enums/energy/nuclear/reactor_types.py +163 -0
- valuesets/enums/environmental_health/__init__.py +1 -0
- valuesets/enums/environmental_health/exposures.py +265 -0
- valuesets/enums/geography/__init__.py +1 -0
- valuesets/enums/geography/geographic_codes.py +741 -0
- valuesets/enums/health/__init__.py +12 -0
- valuesets/enums/health/vaccination.py +98 -0
- valuesets/enums/health.py +36 -0
- valuesets/enums/health_base.py +36 -0
- valuesets/enums/healthcare.py +45 -0
- valuesets/enums/industry/__init__.py +1 -0
- valuesets/enums/industry/extractive_industry.py +94 -0
- valuesets/enums/industry/mining.py +388 -0
- valuesets/enums/industry/safety_colors.py +201 -0
- valuesets/enums/investigation.py +27 -0
- valuesets/enums/materials_science/__init__.py +1 -0
- valuesets/enums/materials_science/characterization_methods.py +112 -0
- valuesets/enums/materials_science/crystal_structures.py +76 -0
- valuesets/enums/materials_science/material_properties.py +119 -0
- valuesets/enums/materials_science/material_types.py +104 -0
- valuesets/enums/materials_science/pigments_dyes.py +198 -0
- valuesets/enums/materials_science/synthesis_methods.py +109 -0
- valuesets/enums/medical/__init__.py +1 -0
- valuesets/enums/medical/clinical.py +277 -0
- valuesets/enums/medical/neuroimaging.py +119 -0
- valuesets/enums/mining_processing.py +302 -0
- valuesets/enums/physics/__init__.py +1 -0
- valuesets/enums/physics/states_of_matter.py +46 -0
- valuesets/enums/social/__init__.py +1 -0
- valuesets/enums/social/person_status.py +29 -0
- valuesets/enums/spatial/__init__.py +1 -0
- valuesets/enums/spatial/spatial_qualifiers.py +246 -0
- valuesets/enums/statistics/__init__.py +5 -0
- valuesets/enums/statistics/prediction_outcomes.py +31 -0
- valuesets/enums/statistics.py +31 -0
- valuesets/enums/time/__init__.py +1 -0
- valuesets/enums/time/temporal.py +254 -0
- valuesets/enums/units/__init__.py +1 -0
- valuesets/enums/units/measurements.py +310 -0
- valuesets/enums/visual/__init__.py +1 -0
- valuesets/enums/visual/colors.py +376 -0
- valuesets/generators/__init__.py +19 -0
- valuesets/generators/auto_slot_injector.py +280 -0
- valuesets/generators/enhanced_pydantic_generator.py +100 -0
- valuesets/generators/enum_slot_generator.py +201 -0
- valuesets/generators/modular_rich_generator.py +353 -0
- valuesets/generators/prefix_standardizer.py +198 -0
- valuesets/generators/rich_enum.py +127 -0
- valuesets/generators/rich_pydantic_generator.py +310 -0
- valuesets/generators/smart_slot_syncer.py +428 -0
- valuesets/generators/sssom_generator.py +394 -0
- valuesets/merged/merged_hierarchy.yaml +21649 -0
- valuesets/schema/README.md +3 -0
- valuesets/schema/academic/research.yaml +911 -0
- valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
- valuesets/schema/bio/bio_entities.yaml +364 -0
- valuesets/schema/bio/biological_colors.yaml +434 -0
- valuesets/schema/bio/cell_cycle.yaml +309 -0
- valuesets/schema/bio/currency_chemicals.yaml +70 -0
- valuesets/schema/bio/developmental_stages.yaml +226 -0
- valuesets/schema/bio/genome_features.yaml +342 -0
- valuesets/schema/bio/genomics.yaml +101 -0
- valuesets/schema/bio/go_aspect.yaml +39 -0
- valuesets/schema/bio/go_causality.yaml +119 -0
- valuesets/schema/bio/go_evidence.yaml +215 -0
- valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
- valuesets/schema/bio/insdc_missing_values.yaml +85 -0
- valuesets/schema/bio/lipid_categories.yaml +72 -0
- valuesets/schema/bio/plant_biology.yaml +125 -0
- valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
- valuesets/schema/bio/plant_sex.yaml +108 -0
- valuesets/schema/bio/protein_evidence.yaml +63 -0
- valuesets/schema/bio/proteomics_standards.yaml +116 -0
- valuesets/schema/bio/psi_mi.yaml +400 -0
- valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
- valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
- valuesets/schema/bio/sequence_chemistry.yaml +477 -0
- valuesets/schema/bio/sequencing_platforms.yaml +515 -0
- valuesets/schema/bio/structural_biology.yaml +428 -0
- valuesets/schema/bio/taxonomy.yaml +453 -0
- valuesets/schema/bio/trophic_levels.yaml +118 -0
- valuesets/schema/bio/uniprot_species.yaml +1209 -0
- valuesets/schema/bio/viral_genome_types.yaml +99 -0
- valuesets/schema/bioprocessing/scale_up.yaml +458 -0
- valuesets/schema/business/human_resources.yaml +752 -0
- valuesets/schema/business/industry_classifications.yaml +448 -0
- valuesets/schema/business/management_operations.yaml +602 -0
- valuesets/schema/business/organizational_structures.yaml +645 -0
- valuesets/schema/business/quality_management.yaml +502 -0
- valuesets/schema/business/supply_chain.yaml +688 -0
- valuesets/schema/chemistry/chemical_entities.yaml +639 -0
- valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
- valuesets/schema/chemistry/reactions.yaml +442 -0
- valuesets/schema/clinical/nih_demographics.yaml +285 -0
- valuesets/schema/clinical/phenopackets.yaml +429 -0
- valuesets/schema/computing/file_formats.yaml +631 -0
- valuesets/schema/computing/maturity_levels.yaml +229 -0
- valuesets/schema/computing/mime_types.yaml +266 -0
- valuesets/schema/confidence_levels.yaml +206 -0
- valuesets/schema/contributor.yaml +30 -0
- valuesets/schema/core.yaml +55 -0
- valuesets/schema/data/data_absent_reason.yaml +82 -0
- valuesets/schema/data_science/binary_classification.yaml +125 -0
- valuesets/schema/data_science/emotion_classification.yaml +109 -0
- valuesets/schema/data_science/priority_severity.yaml +122 -0
- valuesets/schema/data_science/quality_control.yaml +68 -0
- valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
- valuesets/schema/data_science/text_classification.yaml +135 -0
- valuesets/schema/demographics.yaml +238 -0
- valuesets/schema/ecological_interactions.yaml +298 -0
- valuesets/schema/energy/energy.yaml +595 -0
- valuesets/schema/energy/fossil_fuels.yaml +28 -0
- valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
- valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
- valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
- valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
- valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
- valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
- valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
- valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
- valuesets/schema/environmental_health/exposures.yaml +355 -0
- valuesets/schema/generated_slots.yaml +1828 -0
- valuesets/schema/geography/geographic_codes.yaml +1018 -0
- valuesets/schema/health/vaccination.yaml +102 -0
- valuesets/schema/health.yaml +38 -0
- valuesets/schema/healthcare.yaml +53 -0
- valuesets/schema/industry/extractive_industry.yaml +89 -0
- valuesets/schema/industry/mining.yaml +888 -0
- valuesets/schema/industry/safety_colors.yaml +375 -0
- valuesets/schema/investigation.yaml +64 -0
- valuesets/schema/materials_science/characterization_methods.yaml +193 -0
- valuesets/schema/materials_science/crystal_structures.yaml +138 -0
- valuesets/schema/materials_science/material_properties.yaml +135 -0
- valuesets/schema/materials_science/material_types.yaml +151 -0
- valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
- valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
- valuesets/schema/medical/clinical.yaml +610 -0
- valuesets/schema/medical/neuroimaging.yaml +325 -0
- valuesets/schema/mining_processing.yaml +295 -0
- valuesets/schema/physics/states_of_matter.yaml +46 -0
- valuesets/schema/slot_mixins.yaml +143 -0
- valuesets/schema/social/person_status.yaml +28 -0
- valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
- valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
- valuesets/schema/statistics.yaml +34 -0
- valuesets/schema/time/temporal.yaml +435 -0
- valuesets/schema/types.yaml +15 -0
- valuesets/schema/units/measurements.yaml +675 -0
- valuesets/schema/valuesets.yaml +100 -0
- valuesets/schema/visual/colors.yaml +778 -0
- valuesets/utils/__init__.py +6 -0
- valuesets/utils/comparison.py +102 -0
- valuesets/utils/expand_dynamic_enums.py +414 -0
- valuesets/utils/mapping_utils.py +236 -0
- valuesets/validators/__init__.py +11 -0
- valuesets/validators/enum_evaluator.py +669 -0
- valuesets/validators/oak_config.yaml +70 -0
- valuesets/validators/validate_with_ols.py +241 -0
- valuesets-0.3.1.dist-info/METADATA +395 -0
- valuesets-0.3.1.dist-info/RECORD +248 -0
- valuesets-0.3.1.dist-info/WHEEL +4 -0
- valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# OAK adapter configuration for ontology validation
|
|
2
|
+
# Prefixes listed here will use strict validation mode
|
|
3
|
+
# - Missing terms will be treated as errors (not just INFO)
|
|
4
|
+
# - Terms will be cached locally for consistency
|
|
5
|
+
|
|
6
|
+
ontology_adapters:
|
|
7
|
+
# Core biological ontologies
|
|
8
|
+
GO: sqlite:obo:go
|
|
9
|
+
SO: sqlite:obo:so
|
|
10
|
+
CHEBI: sqlite:obo:chebi
|
|
11
|
+
CL: sqlite:obo:cl
|
|
12
|
+
UBERON: sqlite:obo:uberon
|
|
13
|
+
PATO: sqlite:obo:pato
|
|
14
|
+
|
|
15
|
+
# Investigations, experiments, and protocols
|
|
16
|
+
OBI: sqlite:obo:obi
|
|
17
|
+
MS: sqlite:obo:ms
|
|
18
|
+
MIXS: sqlite:obo:mixs
|
|
19
|
+
#ALLOTROPE: sqlite:obo:allotrope
|
|
20
|
+
AFO:
|
|
21
|
+
SIO: sqlite:obo:sio
|
|
22
|
+
MSIO: sqlite:obo:msio
|
|
23
|
+
GENO: sqlite:obo:geno
|
|
24
|
+
CHMO: sqlite:obo:chmo
|
|
25
|
+
|
|
26
|
+
# Data format and computation ontologies
|
|
27
|
+
EDAM: sqlite:obo:edam
|
|
28
|
+
|
|
29
|
+
MI: sqlite:obo:mi
|
|
30
|
+
|
|
31
|
+
# Medical/clinical ontologies
|
|
32
|
+
MONDO: sqlite:obo:mondo
|
|
33
|
+
HP: sqlite:obo:hp
|
|
34
|
+
NCIT: sqlite:obo:ncit
|
|
35
|
+
VO: sqlite:obo:vo
|
|
36
|
+
|
|
37
|
+
# Organism taxonomies
|
|
38
|
+
NCBITaxon: sqlite:obo:ncbitaxon
|
|
39
|
+
|
|
40
|
+
# Units and measurements
|
|
41
|
+
UO: sqlite:obo:uo
|
|
42
|
+
|
|
43
|
+
# Additional common prefixes
|
|
44
|
+
ENVO: sqlite:obo:envo
|
|
45
|
+
RO: sqlite:obo:ro
|
|
46
|
+
BFO: sqlite:obo:bfo
|
|
47
|
+
IAO: sqlite:obo:iao
|
|
48
|
+
PROV:
|
|
49
|
+
|
|
50
|
+
# Nulls
|
|
51
|
+
FHIR_DATA_ABSENT_REASON:
|
|
52
|
+
ENM: sqlite:obo:enanomapper
|
|
53
|
+
FABIO:
|
|
54
|
+
GC:
|
|
55
|
+
GEONAMES:
|
|
56
|
+
GO_REF:
|
|
57
|
+
GREG:
|
|
58
|
+
HEX:
|
|
59
|
+
HL7:
|
|
60
|
+
IANA:
|
|
61
|
+
ISO3166LOC:
|
|
62
|
+
LOINC:
|
|
63
|
+
QUDT:
|
|
64
|
+
SCHEMA: sqlite:obo:schema
|
|
65
|
+
SNOMED:
|
|
66
|
+
SPDX:
|
|
67
|
+
SLM: sqlite:obo:swisslipid
|
|
68
|
+
TIME:
|
|
69
|
+
DAMLPT:
|
|
70
|
+
DCMITYPE:
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Direct validation script using OLS MCP for ontology term validation.
|
|
4
|
+
|
|
5
|
+
This script is designed to work with the actual MCP OLS tool when available.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Dict, Any, Optional
|
|
11
|
+
|
|
12
|
+
# Add parent directory to path
|
|
13
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
|
|
14
|
+
|
|
15
|
+
from src.valuesets.validators.enum_evaluator import (
|
|
16
|
+
EnumEvaluator,
|
|
17
|
+
ValidationResult,
|
|
18
|
+
ValidationIssue
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def validate_schema_with_ols(
|
|
23
|
+
schema_path: Path,
|
|
24
|
+
ols_search_func,
|
|
25
|
+
verbose: bool = False
|
|
26
|
+
) -> ValidationResult:
|
|
27
|
+
"""
|
|
28
|
+
Validate a schema using the OLS MCP tool.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
schema_path: Path to the LinkML schema file
|
|
32
|
+
ols_search_func: The mcp__ols__search_all_ontologies function
|
|
33
|
+
verbose: If True, print detailed progress
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
ValidationResult with all issues found
|
|
37
|
+
"""
|
|
38
|
+
if verbose:
|
|
39
|
+
print(f"Validating {schema_path.name} with OLS...")
|
|
40
|
+
|
|
41
|
+
# Create evaluator with OLS function
|
|
42
|
+
evaluator = EnumEvaluator(ols_search_func=ols_search_func)
|
|
43
|
+
|
|
44
|
+
# Run validation
|
|
45
|
+
result = evaluator.validate_schema(schema_path)
|
|
46
|
+
|
|
47
|
+
if verbose and result.total_mappings_checked > 0:
|
|
48
|
+
print(f" Checked {result.total_mappings_checked} ontology mappings")
|
|
49
|
+
|
|
50
|
+
return result
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def validate_and_report(
|
|
54
|
+
schema_path: Path,
|
|
55
|
+
ols_search_func,
|
|
56
|
+
show_all: bool = False
|
|
57
|
+
) -> bool:
|
|
58
|
+
"""
|
|
59
|
+
Validate a schema and print a detailed report.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
schema_path: Path to the schema file
|
|
63
|
+
ols_search_func: The OLS search function
|
|
64
|
+
show_all: If True, show all issues (not just first 10)
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
True if validation passed (no errors), False otherwise
|
|
68
|
+
"""
|
|
69
|
+
print(f"\n{'='*70}")
|
|
70
|
+
print(f"Validating: {schema_path.name}")
|
|
71
|
+
print(f"{'='*70}")
|
|
72
|
+
|
|
73
|
+
result = validate_schema_with_ols(schema_path, ols_search_func, verbose=True)
|
|
74
|
+
|
|
75
|
+
# Print summary stats
|
|
76
|
+
print(f"\nSummary:")
|
|
77
|
+
print(f" Enums processed: {result.total_enums_checked}")
|
|
78
|
+
print(f" Values checked: {result.total_values_checked}")
|
|
79
|
+
print(f" Mappings validated: {result.total_mappings_checked}")
|
|
80
|
+
|
|
81
|
+
# Categorize issues
|
|
82
|
+
errors = [i for i in result.issues if i.severity == "ERROR"]
|
|
83
|
+
warnings = [i for i in result.issues if i.severity == "WARNING"]
|
|
84
|
+
info = [i for i in result.issues if i.severity == "INFO"]
|
|
85
|
+
|
|
86
|
+
print(f"\nIssues:")
|
|
87
|
+
print(f" Errors: {len(errors)}")
|
|
88
|
+
print(f" Warnings: {len(warnings)}")
|
|
89
|
+
print(f" Info: {len(info)}")
|
|
90
|
+
|
|
91
|
+
# Show errors
|
|
92
|
+
if errors:
|
|
93
|
+
print(f"\n{'โ ERRORS:'}")
|
|
94
|
+
print("-" * 60)
|
|
95
|
+
limit = None if show_all else 10
|
|
96
|
+
for i, issue in enumerate(errors[:limit], 1):
|
|
97
|
+
print(f"{i}. {issue.enum_name}.{issue.value_name}")
|
|
98
|
+
print(f" Issue: {issue.message}")
|
|
99
|
+
if issue.meaning:
|
|
100
|
+
print(f" CURIE: {issue.meaning}")
|
|
101
|
+
if not show_all and len(errors) > 10:
|
|
102
|
+
print(f" ... and {len(errors) - 10} more errors")
|
|
103
|
+
|
|
104
|
+
# Show warnings
|
|
105
|
+
if warnings:
|
|
106
|
+
print(f"\n{'โ ๏ธ WARNINGS:'}")
|
|
107
|
+
print("-" * 60)
|
|
108
|
+
limit = None if show_all else 10
|
|
109
|
+
for i, issue in enumerate(warnings[:limit], 1):
|
|
110
|
+
print(f"{i}. {issue.enum_name}.{issue.value_name}")
|
|
111
|
+
print(f" Issue: {issue.message}")
|
|
112
|
+
if issue.meaning:
|
|
113
|
+
print(f" CURIE: {issue.meaning}")
|
|
114
|
+
if issue.expected_label and issue.actual_label:
|
|
115
|
+
print(f" Expected: '{issue.expected_label}'")
|
|
116
|
+
print(f" Actual: '{issue.actual_label}'")
|
|
117
|
+
if not show_all and len(warnings) > 10:
|
|
118
|
+
print(f" ... and {len(warnings) - 10} more warnings")
|
|
119
|
+
|
|
120
|
+
# Show info messages (only if verbose)
|
|
121
|
+
if info and show_all:
|
|
122
|
+
print(f"\n{'โน๏ธ INFO:'}")
|
|
123
|
+
print("-" * 60)
|
|
124
|
+
for i, issue in enumerate(info[:5], 1):
|
|
125
|
+
print(f"{i}. {issue.enum_name}.{issue.value_name}: {issue.message}")
|
|
126
|
+
if len(info) > 5:
|
|
127
|
+
print(f" ... and {len(info) - 5} more info messages")
|
|
128
|
+
|
|
129
|
+
# Final status
|
|
130
|
+
if not errors and not warnings:
|
|
131
|
+
print(f"\nโ
Validation PASSED - All ontology mappings are correct!")
|
|
132
|
+
return True
|
|
133
|
+
elif not errors:
|
|
134
|
+
print(f"\nโ ๏ธ Validation PASSED with warnings")
|
|
135
|
+
return True
|
|
136
|
+
else:
|
|
137
|
+
print(f"\nโ Validation FAILED - Errors found")
|
|
138
|
+
return False
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def run_validation_with_ols_mcp():
|
|
142
|
+
"""
|
|
143
|
+
Main function to run validation using the actual OLS MCP tool.
|
|
144
|
+
|
|
145
|
+
This function expects to be run in an environment where the
|
|
146
|
+
mcp__ols__search_all_ontologies function is available.
|
|
147
|
+
"""
|
|
148
|
+
import argparse
|
|
149
|
+
|
|
150
|
+
parser = argparse.ArgumentParser(
|
|
151
|
+
description="Validate LinkML enum ontology mappings using OLS"
|
|
152
|
+
)
|
|
153
|
+
parser.add_argument(
|
|
154
|
+
"path",
|
|
155
|
+
type=Path,
|
|
156
|
+
help="Path to schema file or directory"
|
|
157
|
+
)
|
|
158
|
+
parser.add_argument(
|
|
159
|
+
"--all",
|
|
160
|
+
action="store_true",
|
|
161
|
+
help="Show all issues (not just first 10)"
|
|
162
|
+
)
|
|
163
|
+
parser.add_argument(
|
|
164
|
+
"--quiet",
|
|
165
|
+
action="store_true",
|
|
166
|
+
help="Only show summary, not detailed issues"
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
args = parser.parse_args()
|
|
170
|
+
|
|
171
|
+
# Import the OLS function - this should be available in the MCP environment
|
|
172
|
+
try:
|
|
173
|
+
# In the MCP environment, this function should be available
|
|
174
|
+
from __main__ import mcp__ols__search_all_ontologies as ols_search
|
|
175
|
+
print("โ OLS MCP tool is available")
|
|
176
|
+
except ImportError:
|
|
177
|
+
print("โ Error: OLS MCP tool (mcp__ols__search_all_ontologies) is not available")
|
|
178
|
+
print("This script must be run in an MCP-enabled environment")
|
|
179
|
+
return 1
|
|
180
|
+
|
|
181
|
+
path = args.path
|
|
182
|
+
|
|
183
|
+
if path.is_file():
|
|
184
|
+
# Validate single file
|
|
185
|
+
success = validate_and_report(path, ols_search, show_all=args.all)
|
|
186
|
+
return 0 if success else 1
|
|
187
|
+
|
|
188
|
+
elif path.is_dir():
|
|
189
|
+
# Validate all schemas in directory
|
|
190
|
+
schema_files = list(path.rglob("*.yaml"))
|
|
191
|
+
if not schema_files:
|
|
192
|
+
print(f"No YAML files found in {path}")
|
|
193
|
+
return 1
|
|
194
|
+
|
|
195
|
+
print(f"Found {len(schema_files)} schema files to validate")
|
|
196
|
+
|
|
197
|
+
all_success = True
|
|
198
|
+
total_errors = 0
|
|
199
|
+
total_warnings = 0
|
|
200
|
+
|
|
201
|
+
for schema_file in sorted(schema_files):
|
|
202
|
+
result = validate_schema_with_ols(schema_file, ols_search, verbose=not args.quiet)
|
|
203
|
+
|
|
204
|
+
errors = sum(1 for i in result.issues if i.severity == "ERROR")
|
|
205
|
+
warnings = sum(1 for i in result.issues if i.severity == "WARNING")
|
|
206
|
+
|
|
207
|
+
total_errors += errors
|
|
208
|
+
total_warnings += warnings
|
|
209
|
+
|
|
210
|
+
if errors > 0:
|
|
211
|
+
all_success = False
|
|
212
|
+
status = "โ FAILED"
|
|
213
|
+
elif warnings > 0:
|
|
214
|
+
status = "โ ๏ธ WARNING"
|
|
215
|
+
else:
|
|
216
|
+
status = "โ
OK"
|
|
217
|
+
|
|
218
|
+
if not args.quiet:
|
|
219
|
+
print(f"{status} {schema_file.relative_to(path)}: "
|
|
220
|
+
f"{errors} errors, {warnings} warnings")
|
|
221
|
+
|
|
222
|
+
print(f"\n{'='*70}")
|
|
223
|
+
print(f"Overall Summary:")
|
|
224
|
+
print(f" Files validated: {len(schema_files)}")
|
|
225
|
+
print(f" Total errors: {total_errors}")
|
|
226
|
+
print(f" Total warnings: {total_warnings}")
|
|
227
|
+
|
|
228
|
+
if all_success:
|
|
229
|
+
print(f"\nโ
All schemas validated successfully!")
|
|
230
|
+
return 0
|
|
231
|
+
else:
|
|
232
|
+
print(f"\nโ Validation failed for some schemas")
|
|
233
|
+
return 1
|
|
234
|
+
|
|
235
|
+
else:
|
|
236
|
+
print(f"Error: {path} is neither a file nor a directory")
|
|
237
|
+
return 1
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
if __name__ == "__main__":
|
|
241
|
+
sys.exit(run_validation_with_ols_mcp())
|
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: valuesets
|
|
3
|
+
Version: 0.3.1
|
|
4
|
+
Summary: A collection of commonly used value sets
|
|
5
|
+
Author-email: Chris Mungall <cjmungall@lbl.gov>
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Python: <4.0,>=3.9
|
|
9
|
+
Requires-Dist: linkml-runtime>=1.9.4
|
|
10
|
+
Requires-Dist: oaklib>=0.6.23
|
|
11
|
+
Requires-Dist: ruamel-yaml>=0.18.15
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
<a href="https://github.com/dalito/linkml-project-copier"><img src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/copier-org/copier/master/img/badge/badge-grayscale-inverted-border-teal.json" alt="Copier Badge" style="max-width:100%;"/></a>
|
|
15
|
+
|
|
16
|
+
# Common Value Sets
|
|
17
|
+
|
|
18
|
+
A comprehensive collection of standardized enumerations and value sets for data science, bioinformatics, materials science, and beyond.
|
|
19
|
+
|
|
20
|
+
## ๐ฏ Why Common Value Sets?
|
|
21
|
+
|
|
22
|
+
Data standardization is hard. Every project reinvents the wheel with custom enums, inconsistent naming, and no semantic meaning.
|
|
23
|
+
**Common Value Sets** solves this by providing:
|
|
24
|
+
|
|
25
|
+
- ๐ **Rich, standardized enumerations** โ Pre-defined value sets across multiple domains
|
|
26
|
+
- ๐งฌ **Semantic meaning** โ Every value is linked to ontology terms (when possible)
|
|
27
|
+
- ๐ **Python-first convenience** โ Work with simple enums, get semantics for free
|
|
28
|
+
- ๐ **Multi-language support** โ Generate JSON Schema, TypeScript, and more
|
|
29
|
+
- ๐ **Interoperability** โ Built on LinkML standards for maximum compatibility
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
### ๐ A Simple Example
|
|
34
|
+
|
|
35
|
+
Different datasets often represent the same concept in incompatible ways:
|
|
36
|
+
|
|
37
|
+
- `M` / `F`
|
|
38
|
+
- `male` / `female`
|
|
39
|
+
- `1` / `2`
|
|
40
|
+
|
|
41
|
+
They all mean the same thing, but they donโt interoperate.
|
|
42
|
+
With **Common Value Sets**, you can instead use a shared enum:
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from valuesets.enums.core import SexEnum
|
|
46
|
+
|
|
47
|
+
s = SexEnum.MALE
|
|
48
|
+
print(s.value) # "MALE"
|
|
49
|
+
print(s.get_meaning()) # "NCIT:C20197"
|
|
50
|
+
print(s.get_description())# "Male sex"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## โก Quick Start
|
|
54
|
+
|
|
55
|
+
### For Python Developers
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from valuesets.enums.bio.structural_biology import StructuralBiologyTechnique
|
|
59
|
+
from valuesets.enums.spatial.spatial_qualifiers import AnatomicalSide
|
|
60
|
+
|
|
61
|
+
# Rich enums with metadata and ontology mappings
|
|
62
|
+
technique = StructuralBiologyTechnique.CRYO_EM
|
|
63
|
+
print(technique.value) # "CRYO_EM"
|
|
64
|
+
print(technique.get_description()) # "Cryo-electron microscopy"
|
|
65
|
+
print(technique.get_meaning()) # "CHMO:0002413" (Chemical Methods Ontology)
|
|
66
|
+
print(technique.get_annotations()) # {'resolution_range': '2-30 ร
typical', ...}
|
|
67
|
+
|
|
68
|
+
# Spatial relationships with BSPO mappings
|
|
69
|
+
side = AnatomicalSide.LEFT
|
|
70
|
+
print(side.get_meaning()) # "BSPO:0000000" (Biological Spatial Ontology)
|
|
71
|
+
|
|
72
|
+
# Look up enums by their ontology terms
|
|
73
|
+
found = AnatomicalSide.from_meaning("BSPO:0000000") # Returns LEFT
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### For Data Scientists
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from valuesets.enums.statistics import StatisticalTest, PValueThreshold
|
|
80
|
+
from valuesets.enums.data_science import DatasetSplitType, ModelType
|
|
81
|
+
|
|
82
|
+
# Standardized statistical tests with STATO ontology mappings
|
|
83
|
+
test = StatisticalTest.STUDENTS_T_TEST
|
|
84
|
+
print(test.get_meaning()) # "STATO:0000176"
|
|
85
|
+
print(test.get_description()) # "Student's t-test for comparing means"
|
|
86
|
+
|
|
87
|
+
# ML pipeline with standard splits
|
|
88
|
+
split = DatasetSplitType.TRAIN
|
|
89
|
+
model = ModelType.RANDOM_FOREST
|
|
90
|
+
|
|
91
|
+
# P-value thresholds with clear semantics
|
|
92
|
+
threshold = PValueThreshold.SIGNIFICANT
|
|
93
|
+
print(threshold.get_annotations()) # {'value': 0.05, 'symbol': '*'}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### For Bioinformaticians
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from valuesets.enums.bio.taxonomy import CommonOrganismTaxaEnum, BiologicalKingdom
|
|
100
|
+
from valuesets.enums.bio.cell_biology import CellCyclePhase, CellType
|
|
101
|
+
|
|
102
|
+
# Model organisms with NCBI Taxonomy IDs
|
|
103
|
+
human = CommonOrganismTaxaEnum.HUMAN
|
|
104
|
+
print(human.get_meaning()) # "NCBITaxon:9606"
|
|
105
|
+
print(human.get_description()) # "Homo sapiens (human)"
|
|
106
|
+
|
|
107
|
+
# Cell biology with CL and GO mappings
|
|
108
|
+
phase = CellCyclePhase.S_PHASE
|
|
109
|
+
print(phase.get_meaning()) # "GO:0000084"
|
|
110
|
+
|
|
111
|
+
neuron = CellType.NEURON
|
|
112
|
+
print(neuron.get_meaning()) # "CL:0000540"
|
|
113
|
+
|
|
114
|
+
# Get all organisms at a specific taxonomic level
|
|
115
|
+
mammals = [org for org in CommonOrganismTaxaEnum
|
|
116
|
+
if 'MAMMALIA' in str(org)]
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## ๐๏ธ Available Domains
|
|
120
|
+
|
|
121
|
+
### Core Domains (Most Mature)
|
|
122
|
+
- **๐งฌ Biology**:
|
|
123
|
+
- **Structural Biology**: Cryo-EM techniques, crystallization methods, detectors
|
|
124
|
+
- **Cell Biology**: Cell types, cell cycle phases, organelles
|
|
125
|
+
- **Taxonomy**: Model organisms (all with NCBI Taxonomy IDs)
|
|
126
|
+
- **๐ Spatial**: Anatomical directions, planes, relationships (BSPO mapped)
|
|
127
|
+
- **๐ Statistics**: Statistical tests (STATO mapped), p-value thresholds
|
|
128
|
+
|
|
129
|
+
### Expanding Domains
|
|
130
|
+
- **๐งช Data Science**: ML model types, dataset splits, metrics
|
|
131
|
+
- **โ๏ธ Materials Science**: Crystal structures, characterization methods
|
|
132
|
+
- **๐ฅ Clinical/Medical**: Blood types (SNOMED), vital status
|
|
133
|
+
- **๐ Environmental**: Exposure routes, pollutants
|
|
134
|
+
- **โก Energy**: Sources, storage methods, efficiency ratings
|
|
135
|
+
|
|
136
|
+
### Coming Soon
|
|
137
|
+
- **๐งญ Geography**: Country codes (ISO), time zones, coordinate systems
|
|
138
|
+
- **โฐ Time**: Temporal relationships, periods, frequencies
|
|
139
|
+
- **๐ผ Academic**: Publication types, research roles, funding sources
|
|
140
|
+
- **๐ญ Industrial**: Manufacturing processes, quality standards
|
|
141
|
+
|
|
142
|
+
## ๐ Multiple Use Cases
|
|
143
|
+
|
|
144
|
+
### 1. **LinkML Standards** (YAML schemas)
|
|
145
|
+
Use the raw LinkML schemas for data modeling, validation, and documentation:
|
|
146
|
+
```yaml
|
|
147
|
+
# Direct schema usage
|
|
148
|
+
Person:
|
|
149
|
+
attributes:
|
|
150
|
+
vital_status:
|
|
151
|
+
range: VitalStatusEnum # ALIVE, DECEASED, UNKNOWN
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### 2. **Python Programming** (Rich Enums)
|
|
155
|
+
Get Python enums with full IDE support, type checking, and semantic metadata:
|
|
156
|
+
```python
|
|
157
|
+
# Type-safe enums with ontology mappings
|
|
158
|
+
status = VitalStatusEnum.ALIVE
|
|
159
|
+
print(status.meaning) # "NCIT:C37987"
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### 3. **"Stealth Semantics"**
|
|
163
|
+
Write simple code, get semantic meaning automatically:
|
|
164
|
+
```python
|
|
165
|
+
# Example: Different systems use different names for the same concept
|
|
166
|
+
from valuesets.enums.medical import BloodTypeEnum
|
|
167
|
+
from external_system import PatientBloodType # Third-party enum
|
|
168
|
+
|
|
169
|
+
# Even though the enum values might be named differently:
|
|
170
|
+
# BloodTypeEnum.A_POSITIVE vs PatientBloodType.A_POS
|
|
171
|
+
# They map to the same SNOMED code: SNOMED:278149003
|
|
172
|
+
|
|
173
|
+
if blood_type.get_meaning() == patient_blood.get_meaning():
|
|
174
|
+
# Semantic interoperability - works across different naming conventions
|
|
175
|
+
process_compatible_blood_type()
|
|
176
|
+
|
|
177
|
+
# Or use the utility function
|
|
178
|
+
if same_meaning_as(blood_type, patient_blood):
|
|
179
|
+
process_compatible_blood_type()
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### 4. **Multi-language Interoperability**
|
|
183
|
+
Generate schemas and types for any language:
|
|
184
|
+
```bash
|
|
185
|
+
# Generate JSON Schema for web apps
|
|
186
|
+
linkml-convert schema.yaml -t json-schema
|
|
187
|
+
|
|
188
|
+
# Generate TypeScript definitions
|
|
189
|
+
linkml-convert schema.yaml -t typescript
|
|
190
|
+
|
|
191
|
+
# Generate SQL DDL
|
|
192
|
+
linkml-convert schema.yaml -t sql
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
### 5. **Integration & Tooling**
|
|
196
|
+
- **Excel/Google Sheets**: Generate dropdown validation lists
|
|
197
|
+
- **Web forms**: Auto-generate select options with descriptions
|
|
198
|
+
- **APIs**: Standardized response codes and classifications
|
|
199
|
+
- **Databases**: Consistent foreign key constraints
|
|
200
|
+
|
|
201
|
+
## ๐ ๏ธ Advanced Features
|
|
202
|
+
|
|
203
|
+
### Hierarchical Relationships
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
# Some enums support hierarchical is_a relationships
|
|
207
|
+
from valuesets.enums import ViralGenomeTypeEnum
|
|
208
|
+
|
|
209
|
+
# Baltimore classification with hierarchy
|
|
210
|
+
positive_rna = ViralGenomeTypeEnum.SSRNA_POSITIVE # Group IV
|
|
211
|
+
# inherits from SSRNA (single-stranded RNA)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Rich Metadata
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
from valuesets.enums.bio.structural_biology import CryoEMGridType
|
|
218
|
+
|
|
219
|
+
grid = CryoEMGridType.QUANTIFOIL
|
|
220
|
+
metadata = grid.get_metadata()
|
|
221
|
+
print(metadata)
|
|
222
|
+
# {
|
|
223
|
+
# 'name': 'QUANTIFOIL',
|
|
224
|
+
# 'value': 'QUANTIFOIL',
|
|
225
|
+
# 'description': 'Quantifoil holey carbon grid',
|
|
226
|
+
# 'annotations': {
|
|
227
|
+
# 'hole_sizes': '1.2/1.3, 2/1, 2/2 ฮผm common',
|
|
228
|
+
# 'manufacturer': 'Quantifoil'
|
|
229
|
+
# }
|
|
230
|
+
# }
|
|
231
|
+
|
|
232
|
+
# Get all grid types with their descriptions at once
|
|
233
|
+
all_grids = CryoEMGridType.get_all_descriptions()
|
|
234
|
+
# {'C_FLAT': 'C-flat holey carbon grid', 'QUANTIFOIL': ...}
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
### Utility Functions
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
from valuesets.enums.spatial import AnatomicalPlane
|
|
241
|
+
|
|
242
|
+
# Get all ontology mappings for an enum
|
|
243
|
+
mappings = AnatomicalPlane.get_all_meanings()
|
|
244
|
+
print(mappings)
|
|
245
|
+
# {'SAGITTAL': 'BSPO:0000417', 'CORONAL': 'BSPO:0000019', ...}
|
|
246
|
+
|
|
247
|
+
# List all metadata for every value in an enum
|
|
248
|
+
all_metadata = AnatomicalPlane.list_metadata()
|
|
249
|
+
for name, meta in all_metadata.items():
|
|
250
|
+
print(f"{name}: {meta.get('description', 'No description')}")
|
|
251
|
+
|
|
252
|
+
# Find enum by ontology term (useful for data integration)
|
|
253
|
+
plane = AnatomicalPlane.from_meaning("BSPO:0000417") # Returns SAGITTAL
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
### Dynamic Enums
|
|
257
|
+
|
|
258
|
+
Some enums in this collection are **dynamic enums** that can be expanded at runtime by querying ontologies. This uses LinkML's [Dynamic Enum](https://linkml.io/linkml/schemas/enums.html#dynamic-enums) feature.
|
|
259
|
+
|
|
260
|
+
```yaml
|
|
261
|
+
# Example: A dynamic enum that pulls values from an ontology
|
|
262
|
+
CellTypeEnum:
|
|
263
|
+
permissible_values:
|
|
264
|
+
NEURON:
|
|
265
|
+
meaning: CL:0000540
|
|
266
|
+
ASTROCYTE:
|
|
267
|
+
meaning: CL:0002585
|
|
268
|
+
# Dynamic expansion from Cell Ontology
|
|
269
|
+
reachable_from:
|
|
270
|
+
source_ontology: obo:cl
|
|
271
|
+
source_nodes:
|
|
272
|
+
- CL:0000540 # neuron
|
|
273
|
+
include_self: false
|
|
274
|
+
relationship_types:
|
|
275
|
+
- rdfs:subClassOf
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
**Note**: Runtime expansion support is coming soon! Currently, dynamic enums provide:
|
|
279
|
+
- โ
Static values with ontology mappings
|
|
280
|
+
- โ
Metadata and descriptions
|
|
281
|
+
- ๐ง Runtime expansion from ontologies (coming in next release)
|
|
282
|
+
|
|
283
|
+
When runtime expansion is available, you'll be able to:
|
|
284
|
+
```python
|
|
285
|
+
# Future: Dynamically expand enum with all neuron subtypes
|
|
286
|
+
cell_types = CellTypeEnum.expand_from_ontology()
|
|
287
|
+
# Would add: MOTOR_NEURON, SENSORY_NEURON, INTERNEURON, etc.
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## ๐ Documentation
|
|
291
|
+
|
|
292
|
+
[**Full Documentation Website โ**](https://linkml.io/valuesets/)
|
|
293
|
+
|
|
294
|
+
### OWL Ontology
|
|
295
|
+
|
|
296
|
+
**TODO**: The OWL artifact generated from these value sets will be available soon on:
|
|
297
|
+
- [BioPortal](https://bioportal.bioontology.org/)
|
|
298
|
+
- [Ontology Lookup Service (OLS)](https://www.ebi.ac.uk/ols/)
|
|
299
|
+
|
|
300
|
+
## ๐ Future Directions
|
|
301
|
+
|
|
302
|
+
### Maturity Levels
|
|
303
|
+
We plan to add maturity level metadata to each enum to help users understand their readiness:
|
|
304
|
+
|
|
305
|
+
- **๐ข Stable**: Production-ready, well-tested, unlikely to change
|
|
306
|
+
- **๐ก Beta**: Usable but may have minor changes
|
|
307
|
+
- **๐ด Draft**: Under development, expect changes
|
|
308
|
+
|
|
309
|
+
```python
|
|
310
|
+
# Future: Check maturity before use
|
|
311
|
+
if enum_def.maturity_level == MaturityLevel.STABLE:
|
|
312
|
+
use_in_production()
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
### Modularization
|
|
316
|
+
Split the package into domain-specific modules for lighter installs:
|
|
317
|
+
|
|
318
|
+
```bash
|
|
319
|
+
# Future: Install only what you need
|
|
320
|
+
pip install valuesets-core # Core functionality
|
|
321
|
+
pip install valuesets-bio # Biological domains
|
|
322
|
+
pip install valuesets-materials # Materials science
|
|
323
|
+
pip install valuesets-clinical # Clinical/medical
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### Community Extensions
|
|
327
|
+
- **Domain Packages**: Community-maintained domain-specific value sets
|
|
328
|
+
- **Organization Standards**: Company/institution-specific enums that extend base sets
|
|
329
|
+
- **Mapping Tables**: Cross-ontology and cross-standard mappings
|
|
330
|
+
|
|
331
|
+
### Advanced Features
|
|
332
|
+
- **๐ค AI/LLM Integration**: Semantic annotations optimized for language models
|
|
333
|
+
- **๐ Usage Analytics**: Track which enums are most used, identify gaps
|
|
334
|
+
- **๐ Version Management**: Handle enum evolution with deprecation warnings
|
|
335
|
+
- **๐ Multi-ontology Support**: Map single values to multiple ontologies
|
|
336
|
+
- **๐ Fuzzy Matching**: Find enums by approximate string matching
|
|
337
|
+
|
|
338
|
+
## ๐๏ธ Development
|
|
339
|
+
|
|
340
|
+
### Installation
|
|
341
|
+
```bash
|
|
342
|
+
git clone https://github.com/linkml/valuesets
|
|
343
|
+
cd valuesets
|
|
344
|
+
uv install
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
### Available Commands
|
|
348
|
+
```bash
|
|
349
|
+
just --list # Show all available commands
|
|
350
|
+
just test # Run tests
|
|
351
|
+
just doctest # Run doctests
|
|
352
|
+
just lint # Run linting
|
|
353
|
+
just site # Build documentation site
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
## ๐ค Contributing
|
|
357
|
+
|
|
358
|
+
We welcome contributions! Whether you're adding new domains, improving existing enums, or fixing bugs:
|
|
359
|
+
|
|
360
|
+
1. **Domain Experts**: Contribute standardized value sets for your field
|
|
361
|
+
2. **Developers**: Add utility functions, improve tooling, fix issues
|
|
362
|
+
3. **Users**: Report missing enums, suggest improvements, share use cases
|
|
363
|
+
|
|
364
|
+
## ๐ Repository Structure
|
|
365
|
+
|
|
366
|
+
```
|
|
367
|
+
โโโ src/valuesets/
|
|
368
|
+
โ โโโ schema/ # ๐ LinkML YAML schemas (source of truth)
|
|
369
|
+
โ โ โโโ bio/ # Biological domains
|
|
370
|
+
โ โ โ โโโ cell_biology.yaml
|
|
371
|
+
โ โ โ โโโ structural_biology.yaml
|
|
372
|
+
โ โ โ โโโ taxonomy.yaml
|
|
373
|
+
โ โ โโโ spatial/ # Spatial and anatomical
|
|
374
|
+
โ โ โ โโโ spatial_qualifiers.yaml
|
|
375
|
+
โ โ โโโ statistics.yaml
|
|
376
|
+
โ โ โโโ core.yaml
|
|
377
|
+
โ โโโ enums/ # ๐ Generated Python enums
|
|
378
|
+
โ โ โโโ <auto-generated from schemas>
|
|
379
|
+
โ โโโ generators/ # ๐ง Rich enum generator
|
|
380
|
+
โ โ โโโ rich_enum.py
|
|
381
|
+
โ โโโ validators/ # โ Ontology validation
|
|
382
|
+
โ โโโ enum_evaluator.py
|
|
383
|
+
โโโ docs/ # ๐ Documentation
|
|
384
|
+
โโโ tests/ # ๐งช Test cases
|
|
385
|
+
โโโ test_rich_enums.py # Rich enum functionality
|
|
386
|
+
โโโ validators/ # Ontology validation tests
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
## ๐ Credits
|
|
390
|
+
|
|
391
|
+
Built with [LinkML](https://linkml.io/) and the [linkml-project-copier](https://github.com/dalito/linkml-project-copier) template.
|
|
392
|
+
|
|
393
|
+
---
|
|
394
|
+
|
|
395
|
+
*Making data standardization simple, semantic, and scalable* ๐
|