valuesets 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valuesets might be problematic. Click here for more details.
- valuesets/__init__.py +7 -0
- valuesets/_version.py +8 -0
- valuesets/datamodel/valuesets.py +13796 -0
- valuesets/datamodel/valuesets_dataclass.py +24503 -0
- valuesets/datamodel/valuesets_pydantic.py +13796 -0
- valuesets/enums/__init__.py +590 -0
- valuesets/enums/academic/__init__.py +1 -0
- valuesets/enums/academic/research.py +559 -0
- valuesets/enums/analytical_chemistry/__init__.py +1 -0
- valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
- valuesets/enums/bio/__init__.py +1 -0
- valuesets/enums/bio/biological_colors.py +238 -0
- valuesets/enums/bio/cell_cycle.py +180 -0
- valuesets/enums/bio/currency_chemicals.py +52 -0
- valuesets/enums/bio/developmental_stages.py +103 -0
- valuesets/enums/bio/genome_features.py +182 -0
- valuesets/enums/bio/genomics.py +91 -0
- valuesets/enums/bio/go_aspect.py +32 -0
- valuesets/enums/bio/go_causality.py +58 -0
- valuesets/enums/bio/go_evidence.py +129 -0
- valuesets/enums/bio/human_developmental_stages.py +62 -0
- valuesets/enums/bio/insdc_geographic_locations.py +591 -0
- valuesets/enums/bio/insdc_missing_values.py +49 -0
- valuesets/enums/bio/lipid_categories.py +67 -0
- valuesets/enums/bio/mouse_developmental_stages.py +62 -0
- valuesets/enums/bio/plant_biology.py +86 -0
- valuesets/enums/bio/plant_developmental_stages.py +54 -0
- valuesets/enums/bio/plant_sex.py +81 -0
- valuesets/enums/bio/protein_evidence.py +61 -0
- valuesets/enums/bio/proteomics_standards.py +123 -0
- valuesets/enums/bio/psi_mi.py +306 -0
- valuesets/enums/bio/relationship_to_oxygen.py +37 -0
- valuesets/enums/bio/sequence_alphabets.py +449 -0
- valuesets/enums/bio/sequence_chemistry.py +357 -0
- valuesets/enums/bio/sequencing_platforms.py +302 -0
- valuesets/enums/bio/structural_biology.py +320 -0
- valuesets/enums/bio/taxonomy.py +238 -0
- valuesets/enums/bio/trophic_levels.py +85 -0
- valuesets/enums/bio/uniprot_species.py +344 -0
- valuesets/enums/bio/viral_genome_types.py +47 -0
- valuesets/enums/bioprocessing/__init__.py +1 -0
- valuesets/enums/bioprocessing/scale_up.py +249 -0
- valuesets/enums/business/__init__.py +1 -0
- valuesets/enums/business/human_resources.py +275 -0
- valuesets/enums/business/industry_classifications.py +181 -0
- valuesets/enums/business/management_operations.py +228 -0
- valuesets/enums/business/organizational_structures.py +236 -0
- valuesets/enums/business/quality_management.py +181 -0
- valuesets/enums/business/supply_chain.py +232 -0
- valuesets/enums/chemistry/__init__.py +1 -0
- valuesets/enums/chemistry/chemical_entities.py +315 -0
- valuesets/enums/chemistry/reaction_directionality.py +65 -0
- valuesets/enums/chemistry/reactions.py +256 -0
- valuesets/enums/clinical/__init__.py +1 -0
- valuesets/enums/clinical/nih_demographics.py +177 -0
- valuesets/enums/clinical/phenopackets.py +254 -0
- valuesets/enums/common_value_sets.py +8791 -0
- valuesets/enums/computing/__init__.py +1 -0
- valuesets/enums/computing/file_formats.py +294 -0
- valuesets/enums/computing/maturity_levels.py +196 -0
- valuesets/enums/computing/mime_types.py +227 -0
- valuesets/enums/confidence_levels.py +168 -0
- valuesets/enums/contributor.py +30 -0
- valuesets/enums/core.py +42 -0
- valuesets/enums/data/__init__.py +1 -0
- valuesets/enums/data/data_absent_reason.py +53 -0
- valuesets/enums/data_science/__init__.py +1 -0
- valuesets/enums/data_science/binary_classification.py +87 -0
- valuesets/enums/data_science/emotion_classification.py +66 -0
- valuesets/enums/data_science/priority_severity.py +73 -0
- valuesets/enums/data_science/quality_control.py +46 -0
- valuesets/enums/data_science/sentiment_analysis.py +50 -0
- valuesets/enums/data_science/text_classification.py +97 -0
- valuesets/enums/demographics.py +206 -0
- valuesets/enums/ecological_interactions.py +151 -0
- valuesets/enums/energy/__init__.py +1 -0
- valuesets/enums/energy/energy.py +343 -0
- valuesets/enums/energy/fossil_fuels.py +29 -0
- valuesets/enums/energy/nuclear/__init__.py +1 -0
- valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
- valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
- valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
- valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
- valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
- valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
- valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
- valuesets/enums/energy/nuclear/reactor_types.py +163 -0
- valuesets/enums/environmental_health/__init__.py +1 -0
- valuesets/enums/environmental_health/exposures.py +265 -0
- valuesets/enums/geography/__init__.py +1 -0
- valuesets/enums/geography/geographic_codes.py +741 -0
- valuesets/enums/health/__init__.py +12 -0
- valuesets/enums/health/vaccination.py +98 -0
- valuesets/enums/health.py +36 -0
- valuesets/enums/health_base.py +36 -0
- valuesets/enums/healthcare.py +45 -0
- valuesets/enums/industry/__init__.py +1 -0
- valuesets/enums/industry/extractive_industry.py +94 -0
- valuesets/enums/industry/mining.py +388 -0
- valuesets/enums/industry/safety_colors.py +201 -0
- valuesets/enums/investigation.py +27 -0
- valuesets/enums/materials_science/__init__.py +1 -0
- valuesets/enums/materials_science/characterization_methods.py +112 -0
- valuesets/enums/materials_science/crystal_structures.py +76 -0
- valuesets/enums/materials_science/material_properties.py +119 -0
- valuesets/enums/materials_science/material_types.py +104 -0
- valuesets/enums/materials_science/pigments_dyes.py +198 -0
- valuesets/enums/materials_science/synthesis_methods.py +109 -0
- valuesets/enums/medical/__init__.py +1 -0
- valuesets/enums/medical/clinical.py +277 -0
- valuesets/enums/medical/neuroimaging.py +119 -0
- valuesets/enums/mining_processing.py +302 -0
- valuesets/enums/physics/__init__.py +1 -0
- valuesets/enums/physics/states_of_matter.py +46 -0
- valuesets/enums/social/__init__.py +1 -0
- valuesets/enums/social/person_status.py +29 -0
- valuesets/enums/spatial/__init__.py +1 -0
- valuesets/enums/spatial/spatial_qualifiers.py +246 -0
- valuesets/enums/statistics/__init__.py +5 -0
- valuesets/enums/statistics/prediction_outcomes.py +31 -0
- valuesets/enums/statistics.py +31 -0
- valuesets/enums/time/__init__.py +1 -0
- valuesets/enums/time/temporal.py +254 -0
- valuesets/enums/units/__init__.py +1 -0
- valuesets/enums/units/measurements.py +310 -0
- valuesets/enums/visual/__init__.py +1 -0
- valuesets/enums/visual/colors.py +376 -0
- valuesets/generators/__init__.py +19 -0
- valuesets/generators/auto_slot_injector.py +280 -0
- valuesets/generators/enhanced_pydantic_generator.py +100 -0
- valuesets/generators/enum_slot_generator.py +201 -0
- valuesets/generators/modular_rich_generator.py +353 -0
- valuesets/generators/prefix_standardizer.py +198 -0
- valuesets/generators/rich_enum.py +127 -0
- valuesets/generators/rich_pydantic_generator.py +310 -0
- valuesets/generators/smart_slot_syncer.py +428 -0
- valuesets/generators/sssom_generator.py +394 -0
- valuesets/merged/merged_hierarchy.yaml +21649 -0
- valuesets/schema/README.md +3 -0
- valuesets/schema/academic/research.yaml +911 -0
- valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
- valuesets/schema/bio/bio_entities.yaml +364 -0
- valuesets/schema/bio/biological_colors.yaml +434 -0
- valuesets/schema/bio/cell_cycle.yaml +309 -0
- valuesets/schema/bio/currency_chemicals.yaml +70 -0
- valuesets/schema/bio/developmental_stages.yaml +226 -0
- valuesets/schema/bio/genome_features.yaml +342 -0
- valuesets/schema/bio/genomics.yaml +101 -0
- valuesets/schema/bio/go_aspect.yaml +39 -0
- valuesets/schema/bio/go_causality.yaml +119 -0
- valuesets/schema/bio/go_evidence.yaml +215 -0
- valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
- valuesets/schema/bio/insdc_missing_values.yaml +85 -0
- valuesets/schema/bio/lipid_categories.yaml +72 -0
- valuesets/schema/bio/plant_biology.yaml +125 -0
- valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
- valuesets/schema/bio/plant_sex.yaml +108 -0
- valuesets/schema/bio/protein_evidence.yaml +63 -0
- valuesets/schema/bio/proteomics_standards.yaml +116 -0
- valuesets/schema/bio/psi_mi.yaml +400 -0
- valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
- valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
- valuesets/schema/bio/sequence_chemistry.yaml +477 -0
- valuesets/schema/bio/sequencing_platforms.yaml +515 -0
- valuesets/schema/bio/structural_biology.yaml +428 -0
- valuesets/schema/bio/taxonomy.yaml +453 -0
- valuesets/schema/bio/trophic_levels.yaml +118 -0
- valuesets/schema/bio/uniprot_species.yaml +1209 -0
- valuesets/schema/bio/viral_genome_types.yaml +99 -0
- valuesets/schema/bioprocessing/scale_up.yaml +458 -0
- valuesets/schema/business/human_resources.yaml +752 -0
- valuesets/schema/business/industry_classifications.yaml +448 -0
- valuesets/schema/business/management_operations.yaml +602 -0
- valuesets/schema/business/organizational_structures.yaml +645 -0
- valuesets/schema/business/quality_management.yaml +502 -0
- valuesets/schema/business/supply_chain.yaml +688 -0
- valuesets/schema/chemistry/chemical_entities.yaml +639 -0
- valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
- valuesets/schema/chemistry/reactions.yaml +442 -0
- valuesets/schema/clinical/nih_demographics.yaml +285 -0
- valuesets/schema/clinical/phenopackets.yaml +429 -0
- valuesets/schema/computing/file_formats.yaml +631 -0
- valuesets/schema/computing/maturity_levels.yaml +229 -0
- valuesets/schema/computing/mime_types.yaml +266 -0
- valuesets/schema/confidence_levels.yaml +206 -0
- valuesets/schema/contributor.yaml +30 -0
- valuesets/schema/core.yaml +55 -0
- valuesets/schema/data/data_absent_reason.yaml +82 -0
- valuesets/schema/data_science/binary_classification.yaml +125 -0
- valuesets/schema/data_science/emotion_classification.yaml +109 -0
- valuesets/schema/data_science/priority_severity.yaml +122 -0
- valuesets/schema/data_science/quality_control.yaml +68 -0
- valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
- valuesets/schema/data_science/text_classification.yaml +135 -0
- valuesets/schema/demographics.yaml +238 -0
- valuesets/schema/ecological_interactions.yaml +298 -0
- valuesets/schema/energy/energy.yaml +595 -0
- valuesets/schema/energy/fossil_fuels.yaml +28 -0
- valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
- valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
- valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
- valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
- valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
- valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
- valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
- valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
- valuesets/schema/environmental_health/exposures.yaml +355 -0
- valuesets/schema/generated_slots.yaml +1828 -0
- valuesets/schema/geography/geographic_codes.yaml +1018 -0
- valuesets/schema/health/vaccination.yaml +102 -0
- valuesets/schema/health.yaml +38 -0
- valuesets/schema/healthcare.yaml +53 -0
- valuesets/schema/industry/extractive_industry.yaml +89 -0
- valuesets/schema/industry/mining.yaml +888 -0
- valuesets/schema/industry/safety_colors.yaml +375 -0
- valuesets/schema/investigation.yaml +64 -0
- valuesets/schema/materials_science/characterization_methods.yaml +193 -0
- valuesets/schema/materials_science/crystal_structures.yaml +138 -0
- valuesets/schema/materials_science/material_properties.yaml +135 -0
- valuesets/schema/materials_science/material_types.yaml +151 -0
- valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
- valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
- valuesets/schema/medical/clinical.yaml +610 -0
- valuesets/schema/medical/neuroimaging.yaml +325 -0
- valuesets/schema/mining_processing.yaml +295 -0
- valuesets/schema/physics/states_of_matter.yaml +46 -0
- valuesets/schema/slot_mixins.yaml +143 -0
- valuesets/schema/social/person_status.yaml +28 -0
- valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
- valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
- valuesets/schema/statistics.yaml +34 -0
- valuesets/schema/time/temporal.yaml +435 -0
- valuesets/schema/types.yaml +15 -0
- valuesets/schema/units/measurements.yaml +675 -0
- valuesets/schema/valuesets.yaml +100 -0
- valuesets/schema/visual/colors.yaml +778 -0
- valuesets/utils/__init__.py +6 -0
- valuesets/utils/comparison.py +102 -0
- valuesets/utils/expand_dynamic_enums.py +414 -0
- valuesets/utils/mapping_utils.py +236 -0
- valuesets/validators/__init__.py +11 -0
- valuesets/validators/enum_evaluator.py +669 -0
- valuesets/validators/oak_config.yaml +70 -0
- valuesets/validators/validate_with_ols.py +241 -0
- valuesets-0.3.1.dist-info/METADATA +395 -0
- valuesets-0.3.1.dist-info/RECORD +248 -0
- valuesets-0.3.1.dist-info/WHEEL +4 -0
- valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,669 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Enum evaluator for validating ontology mappings in LinkML schemas.
|
|
3
|
+
|
|
4
|
+
This module validates that ontology term mappings (meanings) in enum definitions
|
|
5
|
+
match the expected labels from the ontology.
|
|
6
|
+
|
|
7
|
+
Uses OAK (Ontology Access Kit) as the abstraction layer for all ontology access.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
import os
|
|
14
|
+
import warnings
|
|
15
|
+
import csv
|
|
16
|
+
import yaml
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import List, Optional, Dict, Set
|
|
20
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
21
|
+
from linkml_runtime.utils.schemaview import SchemaView
|
|
22
|
+
from linkml_runtime.linkml_model import EnumDefinition, PermissibleValue
|
|
23
|
+
|
|
24
|
+
LIMIT = 300
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
from oaklib import get_adapter
|
|
28
|
+
HAS_OAK = True
|
|
29
|
+
except ImportError:
|
|
30
|
+
HAS_OAK = False
|
|
31
|
+
|
|
32
|
+
logging.basicConfig(level=logging.INFO)
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ValidationConfig(BaseModel):
|
|
37
|
+
"""Configuration for validation."""
|
|
38
|
+
model_config = ConfigDict(extra="forbid")
|
|
39
|
+
|
|
40
|
+
oak_adapter_string: str = Field(
|
|
41
|
+
default="sqlite:obo:",
|
|
42
|
+
description="OAK adapter string (e.g., sqlite:obo:, ols:, bioportal:)"
|
|
43
|
+
)
|
|
44
|
+
strict_mode: bool = Field(
|
|
45
|
+
default=False,
|
|
46
|
+
description="Treat warnings as errors"
|
|
47
|
+
)
|
|
48
|
+
cache_labels: bool = Field(
|
|
49
|
+
default=True,
|
|
50
|
+
description="Cache ontology labels to avoid redundant lookups"
|
|
51
|
+
)
|
|
52
|
+
oak_config_path: Optional[Path] = Field(
|
|
53
|
+
default=None,
|
|
54
|
+
description="Path to OAK configuration YAML file"
|
|
55
|
+
)
|
|
56
|
+
cache_dir: Path = Field(
|
|
57
|
+
default=Path("cache"),
|
|
58
|
+
description="Directory for storing cached terms"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ValidationIssue(BaseModel):
|
|
63
|
+
"""Represents a single validation issue."""
|
|
64
|
+
model_config = ConfigDict(extra="forbid")
|
|
65
|
+
|
|
66
|
+
enum_name: str
|
|
67
|
+
value_name: str
|
|
68
|
+
severity: str = Field(pattern="^(ERROR|WARNING|INFO)$")
|
|
69
|
+
message: str
|
|
70
|
+
meaning: Optional[str] = None
|
|
71
|
+
expected_label: Optional[str] = None
|
|
72
|
+
actual_label: Optional[str] = None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ValidationResult(BaseModel):
|
|
76
|
+
"""Results from validating a schema."""
|
|
77
|
+
model_config = ConfigDict(extra="forbid")
|
|
78
|
+
|
|
79
|
+
schema_path: Optional[Path] = None
|
|
80
|
+
issues: List[ValidationIssue] = Field(default_factory=list)
|
|
81
|
+
total_enums_checked: int = 0
|
|
82
|
+
total_values_checked: int = 0
|
|
83
|
+
total_mappings_checked: int = 0
|
|
84
|
+
|
|
85
|
+
def has_errors(self) -> bool:
|
|
86
|
+
"""Check if there are any errors."""
|
|
87
|
+
return any(i.severity == "ERROR" for i in self.issues)
|
|
88
|
+
|
|
89
|
+
def has_warnings(self) -> bool:
|
|
90
|
+
"""Check if there are any warnings."""
|
|
91
|
+
return any(i.severity == "WARNING" for i in self.issues)
|
|
92
|
+
|
|
93
|
+
def print_summary(self):
|
|
94
|
+
"""Print a summary of validation results."""
|
|
95
|
+
print(f"\nValidation Summary:")
|
|
96
|
+
print(f" Enums checked: {self.total_enums_checked}")
|
|
97
|
+
print(f" Values checked: {self.total_values_checked}")
|
|
98
|
+
print(f" Mappings checked: {self.total_mappings_checked}")
|
|
99
|
+
|
|
100
|
+
errors = [i for i in self.issues if i.severity == "ERROR"]
|
|
101
|
+
warnings = [i for i in self.issues if i.severity == "WARNING"]
|
|
102
|
+
info = [i for i in self.issues if i.severity == "INFO"]
|
|
103
|
+
|
|
104
|
+
print(f" Errors: {len(errors)}")
|
|
105
|
+
print(f" Warnings: {len(warnings)}")
|
|
106
|
+
print(f" Info: {len(info)}")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class EnumEvaluator:
|
|
110
|
+
"""Evaluator for validating ontology mappings in enums."""
|
|
111
|
+
|
|
112
|
+
def __init__(self, config: Optional[ValidationConfig] = None):
|
|
113
|
+
"""
|
|
114
|
+
Initialize the evaluator.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
config: Validation configuration
|
|
118
|
+
"""
|
|
119
|
+
self.config = config or ValidationConfig()
|
|
120
|
+
self._label_cache = {} if self.config.cache_labels else None
|
|
121
|
+
self._per_prefix_adapters = {} # Cache of per-ontology adapters
|
|
122
|
+
self._oak_config = self._load_oak_config()
|
|
123
|
+
self._prefix_caches = {} # Initialize here to avoid AttributeError
|
|
124
|
+
self._warned_prefixes = set() # Track prefixes we've already warned about
|
|
125
|
+
self._initialize_oak()
|
|
126
|
+
|
|
127
|
+
def _load_oak_config(self) -> Dict[str, str]:
|
|
128
|
+
"""Load OAK configuration from YAML file."""
|
|
129
|
+
config_path = self.config.oak_config_path
|
|
130
|
+
if not config_path:
|
|
131
|
+
# Default to config file next to this module
|
|
132
|
+
config_path = Path(__file__).parent / "oak_config.yaml"
|
|
133
|
+
|
|
134
|
+
if not config_path.exists():
|
|
135
|
+
logger.warning(f"OAK config file not found: {config_path}")
|
|
136
|
+
return {}
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
with open(config_path, 'r') as f:
|
|
140
|
+
config_data = yaml.safe_load(f)
|
|
141
|
+
adapters = config_data.get('ontology_adapters', {})
|
|
142
|
+
# Convert keys to lowercase for case-insensitive lookup
|
|
143
|
+
return {k.lower(): v for k, v in adapters.items()}
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.warning(f"Could not load OAK config: {e}")
|
|
146
|
+
return {}
|
|
147
|
+
|
|
148
|
+
def _get_cache_file(self, prefix: str) -> Path:
|
|
149
|
+
"""Get the cache file path for a given prefix."""
|
|
150
|
+
cache_dir = self.config.cache_dir / prefix.lower()
|
|
151
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
152
|
+
return cache_dir / "terms.csv"
|
|
153
|
+
|
|
154
|
+
def _load_cache(self, prefix: str) -> Dict[str, str]:
|
|
155
|
+
"""Load cached terms for a prefix."""
|
|
156
|
+
cache_file = self._get_cache_file(prefix)
|
|
157
|
+
cache = {}
|
|
158
|
+
|
|
159
|
+
if cache_file.exists():
|
|
160
|
+
try:
|
|
161
|
+
with open(cache_file, 'r', newline='') as f:
|
|
162
|
+
reader = csv.reader(f)
|
|
163
|
+
next(reader) # Skip header
|
|
164
|
+
for row in reader:
|
|
165
|
+
if len(row) >= 2:
|
|
166
|
+
cache[row[0]] = row[1] # curie -> label
|
|
167
|
+
except Exception as e:
|
|
168
|
+
logger.warning(f"Could not load cache for {prefix}: {e}")
|
|
169
|
+
|
|
170
|
+
return cache
|
|
171
|
+
|
|
172
|
+
def _save_to_cache(self, prefix: str, curie: str, label: Optional[str]):
|
|
173
|
+
"""Save a term to cache."""
|
|
174
|
+
if prefix.lower() not in self._oak_config:
|
|
175
|
+
return # Only cache for configured prefixes
|
|
176
|
+
|
|
177
|
+
cache_file = self._get_cache_file(prefix)
|
|
178
|
+
|
|
179
|
+
# Read existing cache
|
|
180
|
+
existing_cache = set()
|
|
181
|
+
if cache_file.exists():
|
|
182
|
+
try:
|
|
183
|
+
with open(cache_file, 'r', newline='') as f:
|
|
184
|
+
reader = csv.reader(f)
|
|
185
|
+
next(reader) # Skip header
|
|
186
|
+
for row in reader:
|
|
187
|
+
if len(row) >= 1:
|
|
188
|
+
existing_cache.add(row[0])
|
|
189
|
+
except Exception:
|
|
190
|
+
pass
|
|
191
|
+
|
|
192
|
+
# Don't add if already exists
|
|
193
|
+
if curie in existing_cache:
|
|
194
|
+
return
|
|
195
|
+
|
|
196
|
+
# Append new entry
|
|
197
|
+
try:
|
|
198
|
+
# Create file with header if it doesn't exist
|
|
199
|
+
if not cache_file.exists():
|
|
200
|
+
with open(cache_file, 'w', newline='') as f:
|
|
201
|
+
writer = csv.writer(f)
|
|
202
|
+
writer.writerow(['curie', 'label', 'retrieved_at'])
|
|
203
|
+
|
|
204
|
+
# Append new row
|
|
205
|
+
with open(cache_file, 'a', newline='') as f:
|
|
206
|
+
writer = csv.writer(f)
|
|
207
|
+
timestamp = datetime.now().isoformat()
|
|
208
|
+
writer.writerow([curie, label or '', timestamp])
|
|
209
|
+
except Exception as e:
|
|
210
|
+
logger.warning(f"Could not save to cache for {prefix}: {e}")
|
|
211
|
+
|
|
212
|
+
def _initialize_oak(self):
|
|
213
|
+
"""Initialize OAK adapters dynamically based on usage."""
|
|
214
|
+
if not HAS_OAK:
|
|
215
|
+
logger.warning("OAK is not installed. Install with: pip install oaklib")
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
# Don't initialize a main adapter if using dynamic sqlite:obo:
|
|
219
|
+
# We'll create per-prefix adapters on demand
|
|
220
|
+
if self.config.oak_adapter_string == "sqlite:obo:":
|
|
221
|
+
logger.info("Using dynamic SemSQL adapter selection based on CURIE prefix")
|
|
222
|
+
return
|
|
223
|
+
|
|
224
|
+
# For other adapter types (ols:, bioportal:, etc), create a single adapter
|
|
225
|
+
try:
|
|
226
|
+
self._per_prefix_adapters['_default'] = get_adapter(self.config.oak_adapter_string)
|
|
227
|
+
logger.info(f"Initialized OAK adapter: {self.config.oak_adapter_string}")
|
|
228
|
+
except Exception as e:
|
|
229
|
+
logger.warning(f"Could not initialize OAK adapter: {e}")
|
|
230
|
+
|
|
231
|
+
def get_ontology_label(self, curie: str) -> Optional[str]:
|
|
232
|
+
"""
|
|
233
|
+
Get the label for an ontology term using OAK.
|
|
234
|
+
|
|
235
|
+
Checks local cache first, then tries OAK lookup, then saves to cache.
|
|
236
|
+
"""
|
|
237
|
+
# Check in-memory cache first
|
|
238
|
+
if self._label_cache is not None and curie in self._label_cache:
|
|
239
|
+
return self._label_cache[curie]
|
|
240
|
+
|
|
241
|
+
# Parse the CURIE to get the prefix
|
|
242
|
+
prefix = curie.split(":")[0] if ":" in curie else None
|
|
243
|
+
if not prefix:
|
|
244
|
+
return None
|
|
245
|
+
|
|
246
|
+
prefix_lower = prefix.lower()
|
|
247
|
+
|
|
248
|
+
# Check file cache for configured prefixes
|
|
249
|
+
if prefix_lower in self._prefix_caches:
|
|
250
|
+
if curie in self._prefix_caches[prefix_lower]:
|
|
251
|
+
label = self._prefix_caches[prefix_lower][curie]
|
|
252
|
+
# Also cache in memory
|
|
253
|
+
if self._label_cache is not None:
|
|
254
|
+
self._label_cache[curie] = label
|
|
255
|
+
return label if label else None
|
|
256
|
+
|
|
257
|
+
label = None
|
|
258
|
+
adapter = None
|
|
259
|
+
|
|
260
|
+
# Try configured adapter first for this prefix
|
|
261
|
+
if prefix_lower in self._oak_config:
|
|
262
|
+
adapter_string = self._oak_config[prefix_lower]
|
|
263
|
+
|
|
264
|
+
# If adapter string is empty or None, skip validation entirely
|
|
265
|
+
if not adapter_string:
|
|
266
|
+
logger.debug(f"Skipping validation for {prefix} (empty adapter string in config)")
|
|
267
|
+
self._per_prefix_adapters[prefix_lower] = None
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
if prefix_lower not in self._per_prefix_adapters:
|
|
271
|
+
try:
|
|
272
|
+
self._per_prefix_adapters[prefix_lower] = get_adapter(adapter_string)
|
|
273
|
+
logger.info(f"Created configured adapter for {prefix} ontology")
|
|
274
|
+
except Exception as e:
|
|
275
|
+
logger.warning(f"Could not create configured adapter for {prefix}: {e}")
|
|
276
|
+
self._per_prefix_adapters[prefix_lower] = None
|
|
277
|
+
|
|
278
|
+
adapter = self._per_prefix_adapters.get(prefix_lower)
|
|
279
|
+
elif self.config.oak_adapter_string == "sqlite:obo:" and prefix:
|
|
280
|
+
# Dynamic mode: create per-ontology adapter on demand
|
|
281
|
+
if prefix_lower not in self._per_prefix_adapters:
|
|
282
|
+
try:
|
|
283
|
+
adapter_string = f"sqlite:obo:{prefix_lower}"
|
|
284
|
+
self._per_prefix_adapters[prefix_lower] = get_adapter(adapter_string)
|
|
285
|
+
logger.info(f"Created adapter for {prefix} ontology")
|
|
286
|
+
except Exception as e:
|
|
287
|
+
logger.debug(f"Could not create adapter for {prefix}: {e}")
|
|
288
|
+
# Track unknown prefix for end-of-run reporting
|
|
289
|
+
if prefix_lower not in self._warned_prefixes:
|
|
290
|
+
self._warned_prefixes.add(prefix_lower)
|
|
291
|
+
self._per_prefix_adapters[prefix_lower] = None
|
|
292
|
+
|
|
293
|
+
adapter = self._per_prefix_adapters.get(prefix_lower)
|
|
294
|
+
else:
|
|
295
|
+
# Use default adapter for other configurations
|
|
296
|
+
adapter = self._per_prefix_adapters.get('_default')
|
|
297
|
+
|
|
298
|
+
# Get the label
|
|
299
|
+
if adapter:
|
|
300
|
+
try:
|
|
301
|
+
label = adapter.label(curie)
|
|
302
|
+
except Exception as e:
|
|
303
|
+
logger.debug(f"Could not get label for {curie}: {e}")
|
|
304
|
+
|
|
305
|
+
# Cache the result in memory
|
|
306
|
+
if self._label_cache is not None:
|
|
307
|
+
self._label_cache[curie] = label
|
|
308
|
+
|
|
309
|
+
# Save to file cache for configured prefixes
|
|
310
|
+
if prefix_lower in self._oak_config:
|
|
311
|
+
self._save_to_cache(prefix, curie, label)
|
|
312
|
+
# Also update in-memory cache
|
|
313
|
+
if prefix_lower in self._prefix_caches:
|
|
314
|
+
self._prefix_caches[prefix_lower][curie] = label or ''
|
|
315
|
+
|
|
316
|
+
return label
|
|
317
|
+
|
|
318
|
+
def is_prefix_configured(self, prefix: str) -> bool:
|
|
319
|
+
"""Check if a prefix is configured for strict validation."""
|
|
320
|
+
prefix_lower = prefix.lower()
|
|
321
|
+
return (prefix_lower in self._oak_config and
|
|
322
|
+
bool(self._oak_config[prefix_lower]))
|
|
323
|
+
|
|
324
|
+
def normalize_string(self, s: str) -> str:
|
|
325
|
+
"""
|
|
326
|
+
Normalize a string for comparison by removing non-alphanumeric chars
|
|
327
|
+
and converting to lowercase.
|
|
328
|
+
"""
|
|
329
|
+
if not s:
|
|
330
|
+
return ""
|
|
331
|
+
# Remove non-alphanumeric characters
|
|
332
|
+
s = re.sub(r'[^a-zA-Z0-9\s]', ' ', s)
|
|
333
|
+
# Collapse multiple spaces
|
|
334
|
+
s = re.sub(r'\s+', ' ', s)
|
|
335
|
+
return s.strip().lower()
|
|
336
|
+
|
|
337
|
+
def extract_aliases(self, pv: PermissibleValue, value_name: str) -> Set[str]:
|
|
338
|
+
"""
|
|
339
|
+
Extract all possible aliases for a permissible value.
|
|
340
|
+
|
|
341
|
+
This includes:
|
|
342
|
+
- The value name itself
|
|
343
|
+
- The title (if present)
|
|
344
|
+
- Any aliases (if present)
|
|
345
|
+
- Annotations that might contain display names
|
|
346
|
+
"""
|
|
347
|
+
aliases = {value_name}
|
|
348
|
+
|
|
349
|
+
if pv.title:
|
|
350
|
+
aliases.add(pv.title)
|
|
351
|
+
|
|
352
|
+
if pv.aliases:
|
|
353
|
+
aliases.update(pv.aliases)
|
|
354
|
+
|
|
355
|
+
# Add structured_aliases if present
|
|
356
|
+
if hasattr(pv, 'structured_aliases') and pv.structured_aliases:
|
|
357
|
+
for struct_alias in pv.structured_aliases:
|
|
358
|
+
if hasattr(struct_alias, 'literal_form') and struct_alias.literal_form:
|
|
359
|
+
aliases.add(struct_alias.literal_form)
|
|
360
|
+
|
|
361
|
+
# Check annotations for common alias fields
|
|
362
|
+
if pv.annotations:
|
|
363
|
+
for key in ['label', 'display_name', 'preferred_name', 'synonym']:
|
|
364
|
+
if key in pv.annotations:
|
|
365
|
+
val = pv.annotations[key]
|
|
366
|
+
if val and hasattr(val, 'value'):
|
|
367
|
+
aliases.add(str(val.value))
|
|
368
|
+
elif val:
|
|
369
|
+
aliases.add(str(val))
|
|
370
|
+
|
|
371
|
+
return aliases
|
|
372
|
+
|
|
373
|
+
def validate_enum(self, enum_def: EnumDefinition, enum_name: str) -> List[ValidationIssue]:
|
|
374
|
+
"""
|
|
375
|
+
Validate a single enum definition.
|
|
376
|
+
"""
|
|
377
|
+
issues = []
|
|
378
|
+
|
|
379
|
+
if not enum_def.permissible_values:
|
|
380
|
+
return issues
|
|
381
|
+
|
|
382
|
+
for value_name, pv in enum_def.permissible_values.items():
|
|
383
|
+
# Check if there's a meaning (ontology mapping)
|
|
384
|
+
meaning = pv.meaning
|
|
385
|
+
if not meaning:
|
|
386
|
+
continue
|
|
387
|
+
|
|
388
|
+
# Check if this prefix has an empty adapter string (skip validation)
|
|
389
|
+
prefix = meaning.split(":")[0] if ":" in meaning else None
|
|
390
|
+
if prefix and prefix.lower() in self._oak_config and not self._oak_config[prefix.lower()]:
|
|
391
|
+
logger.debug(f"Skipping validation for {meaning} (empty adapter string in config)")
|
|
392
|
+
continue
|
|
393
|
+
|
|
394
|
+
# Get the actual label from ontology
|
|
395
|
+
actual_label = self.get_ontology_label(meaning)
|
|
396
|
+
|
|
397
|
+
# Get all possible expected labels
|
|
398
|
+
expected_labels = self.extract_aliases(pv, value_name)
|
|
399
|
+
|
|
400
|
+
# Normalize for comparison
|
|
401
|
+
normalized_expected = {self.normalize_string(label) for label in expected_labels}
|
|
402
|
+
normalized_actual = self.normalize_string(actual_label) if actual_label else None
|
|
403
|
+
|
|
404
|
+
# Check if actual label matches any expected label
|
|
405
|
+
if actual_label is None:
|
|
406
|
+
# Could not retrieve label - severity depends on whether prefix is configured
|
|
407
|
+
prefix = meaning.split(":")[0] if ":" in meaning else None
|
|
408
|
+
if prefix and self.is_prefix_configured(prefix):
|
|
409
|
+
# Strict mode for configured prefixes
|
|
410
|
+
severity = "ERROR"
|
|
411
|
+
message = f"Could not retrieve label for configured ontology term {meaning}"
|
|
412
|
+
else:
|
|
413
|
+
# Lenient mode for unconfigured prefixes
|
|
414
|
+
severity = "INFO"
|
|
415
|
+
message = f"Could not retrieve label for {meaning}"
|
|
416
|
+
|
|
417
|
+
issue = ValidationIssue(
|
|
418
|
+
enum_name=enum_name,
|
|
419
|
+
value_name=value_name,
|
|
420
|
+
severity=severity,
|
|
421
|
+
message=message,
|
|
422
|
+
meaning=meaning
|
|
423
|
+
)
|
|
424
|
+
issues.append(issue)
|
|
425
|
+
elif normalized_actual not in normalized_expected:
|
|
426
|
+
# Label mismatch - treat as ERROR for configured prefixes or in strict mode
|
|
427
|
+
prefix = meaning.split(":")[0] if ":" in meaning else None
|
|
428
|
+
is_configured = prefix and self.is_prefix_configured(prefix)
|
|
429
|
+
severity = "ERROR" if (self.config.strict_mode or is_configured) else "WARNING"
|
|
430
|
+
issue = ValidationIssue(
|
|
431
|
+
enum_name=enum_name,
|
|
432
|
+
value_name=value_name,
|
|
433
|
+
severity=severity,
|
|
434
|
+
message=f"Ontology label mismatch: expected one of {expected_labels}, got '{actual_label}'",
|
|
435
|
+
meaning=meaning,
|
|
436
|
+
expected_label=value_name,
|
|
437
|
+
actual_label=actual_label
|
|
438
|
+
)
|
|
439
|
+
issues.append(issue)
|
|
440
|
+
|
|
441
|
+
return issues
|
|
442
|
+
|
|
443
|
+
def validate_schema(self, schema_path: Path) -> ValidationResult:
|
|
444
|
+
"""
|
|
445
|
+
Validate all enums in a schema.
|
|
446
|
+
"""
|
|
447
|
+
result = ValidationResult(schema_path=schema_path)
|
|
448
|
+
|
|
449
|
+
try:
|
|
450
|
+
# Load schema
|
|
451
|
+
sv = SchemaView(str(schema_path))
|
|
452
|
+
|
|
453
|
+
# Validate each enum
|
|
454
|
+
for enum_name, enum_def in sv.all_enums().items():
|
|
455
|
+
result.total_enums_checked += 1
|
|
456
|
+
|
|
457
|
+
if enum_def.permissible_values:
|
|
458
|
+
result.total_values_checked += len(enum_def.permissible_values)
|
|
459
|
+
|
|
460
|
+
# Count mappings
|
|
461
|
+
for pv in enum_def.permissible_values.values():
|
|
462
|
+
if pv.meaning:
|
|
463
|
+
result.total_mappings_checked += 1
|
|
464
|
+
|
|
465
|
+
# Validate the enum
|
|
466
|
+
issues = self.validate_enum(enum_def, enum_name)
|
|
467
|
+
result.issues.extend(issues)
|
|
468
|
+
|
|
469
|
+
except Exception as e:
|
|
470
|
+
logger.error(f"Error validating schema {schema_path}: {e}")
|
|
471
|
+
issue = ValidationIssue(
|
|
472
|
+
enum_name="<schema>",
|
|
473
|
+
value_name="<error>",
|
|
474
|
+
severity="ERROR",
|
|
475
|
+
message=f"Failed to validate schema: {e}",
|
|
476
|
+
meaning=None
|
|
477
|
+
)
|
|
478
|
+
result.issues.append(issue)
|
|
479
|
+
|
|
480
|
+
return result
|
|
481
|
+
|
|
482
|
+
def report_unknown_prefixes(self) -> None:
|
|
483
|
+
"""Report unknown ontology prefixes that were encountered during validation."""
|
|
484
|
+
if self._warned_prefixes:
|
|
485
|
+
print(f"\n📋 Unknown ontology prefixes encountered:")
|
|
486
|
+
print(" Consider adding these to oak_config.yaml if they are valid ontologies:")
|
|
487
|
+
for prefix in sorted(self._warned_prefixes):
|
|
488
|
+
print(f" • {prefix.upper()}: sqlite:obo:{prefix}")
|
|
489
|
+
print(f" Or remove the 'meaning:' mappings if these are not valid ontology terms.")
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def main():
|
|
493
|
+
"""Main function for CLI usage."""
|
|
494
|
+
import argparse
|
|
495
|
+
|
|
496
|
+
parser = argparse.ArgumentParser(description="Validate LinkML enum ontology mappings")
|
|
497
|
+
parser.add_argument("path", type=Path, help="Path to schema file or directory")
|
|
498
|
+
parser.add_argument("--adapter", default="sqlite:obo:",
|
|
499
|
+
help="OAK adapter string (e.g., sqlite:obo:, sqlite:obo:merged, ols:, bioportal:)")
|
|
500
|
+
parser.add_argument("--strict", action="store_true", help="Treat warnings as errors")
|
|
501
|
+
parser.add_argument("--no-cache", action="store_true", help="Disable label caching")
|
|
502
|
+
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output with detailed information")
|
|
503
|
+
|
|
504
|
+
args = parser.parse_args()
|
|
505
|
+
|
|
506
|
+
# Build configuration
|
|
507
|
+
config = ValidationConfig(
|
|
508
|
+
oak_adapter_string=args.adapter,
|
|
509
|
+
strict_mode=args.strict,
|
|
510
|
+
cache_labels=not args.no_cache
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
# Configure logging based on verbose flag
|
|
514
|
+
if args.verbose:
|
|
515
|
+
logging.basicConfig(level=logging.INFO, force=True)
|
|
516
|
+
else:
|
|
517
|
+
# Suppress all logging output in non-verbose mode
|
|
518
|
+
logging.basicConfig(level=logging.CRITICAL, force=True)
|
|
519
|
+
# Also suppress oaklib and other library logging
|
|
520
|
+
for logger_name in ['oaklib', 'root', 'pystow', 'linkml_runtime', 'urllib3', 'httpx', 'httpcore']:
|
|
521
|
+
logging.getLogger(logger_name).setLevel(logging.CRITICAL)
|
|
522
|
+
|
|
523
|
+
# Suppress pystow progress bars
|
|
524
|
+
import os
|
|
525
|
+
os.environ['PYSTOW_NO_PROGRESS'] = '1'
|
|
526
|
+
|
|
527
|
+
# Create evaluator
|
|
528
|
+
evaluator = EnumEvaluator(config)
|
|
529
|
+
|
|
530
|
+
if not HAS_OAK:
|
|
531
|
+
print("Error: OAK is not installed. Please install with: pip install oaklib")
|
|
532
|
+
return 1
|
|
533
|
+
|
|
534
|
+
# Process path
|
|
535
|
+
if args.path.is_file():
|
|
536
|
+
result = evaluator.validate_schema(args.path)
|
|
537
|
+
|
|
538
|
+
# Handle output based on results and verbosity
|
|
539
|
+
if not result.has_errors() and not result.has_warnings():
|
|
540
|
+
if args.verbose:
|
|
541
|
+
result.print_summary()
|
|
542
|
+
else:
|
|
543
|
+
print("✅") # Just a checkmark for success
|
|
544
|
+
|
|
545
|
+
# Report unknown prefixes even on success
|
|
546
|
+
evaluator.report_unknown_prefixes()
|
|
547
|
+
return 0
|
|
548
|
+
else:
|
|
549
|
+
# Always show errors and warnings, but format differently based on verbosity
|
|
550
|
+
if args.verbose:
|
|
551
|
+
result.print_summary()
|
|
552
|
+
# Show all issues in verbose mode
|
|
553
|
+
for issue in result.issues:
|
|
554
|
+
print(f"\n{issue.severity}: {issue.enum_name}.{issue.value_name}")
|
|
555
|
+
print(f" {issue.message}")
|
|
556
|
+
if issue.meaning:
|
|
557
|
+
print(f" CURIE: {issue.meaning}")
|
|
558
|
+
else:
|
|
559
|
+
# Concise output for non-verbose mode
|
|
560
|
+
errors = [i for i in result.issues if i.severity == "ERROR"]
|
|
561
|
+
warnings = [i for i in result.issues if i.severity == "WARNING"]
|
|
562
|
+
|
|
563
|
+
if errors:
|
|
564
|
+
print(f"❌ Validation failed with {len(errors)} error(s)\n")
|
|
565
|
+
print("ERRORS:")
|
|
566
|
+
for issue in errors:
|
|
567
|
+
print(f" • {args.path.name}:{issue.enum_name}.{issue.value_name}: {issue.message}")
|
|
568
|
+
if issue.meaning:
|
|
569
|
+
print(f" Fix: Check CURIE {issue.meaning}")
|
|
570
|
+
|
|
571
|
+
if warnings and not args.strict:
|
|
572
|
+
print(f"\n⚠️ {len(warnings)} warning(s):")
|
|
573
|
+
for issue in warnings[:LIMIT]: # Show first 100 warnings
|
|
574
|
+
id_info = f" [{issue.meaning}]" if issue.meaning else ""
|
|
575
|
+
print(f" • {issue.enum_name}.{issue.value_name}{id_info}: {issue.message}")
|
|
576
|
+
if len(warnings) > LIMIT:
|
|
577
|
+
print(f" ... and {len(warnings) - LIMIT} more warnings")
|
|
578
|
+
|
|
579
|
+
# Report unknown prefixes
|
|
580
|
+
evaluator.report_unknown_prefixes()
|
|
581
|
+
|
|
582
|
+
return 1 if result.has_errors() or (args.strict and result.has_warnings()) else 0
|
|
583
|
+
|
|
584
|
+
elif args.path.is_dir():
|
|
585
|
+
all_results = []
|
|
586
|
+
schema_files = sorted([f for f in args.path.rglob("*.yaml")
|
|
587
|
+
if "linkml_model" not in str(f)])
|
|
588
|
+
|
|
589
|
+
if args.verbose:
|
|
590
|
+
print(f"🔍 Validating {len(schema_files)} schema files...\n")
|
|
591
|
+
|
|
592
|
+
# Collect results
|
|
593
|
+
for schema_file in schema_files:
|
|
594
|
+
if args.verbose:
|
|
595
|
+
print(f"Validating {schema_file.name}...")
|
|
596
|
+
|
|
597
|
+
result = evaluator.validate_schema(schema_file)
|
|
598
|
+
result.schema_path = schema_file # Store path for error reporting
|
|
599
|
+
all_results.append(result)
|
|
600
|
+
|
|
601
|
+
if args.verbose:
|
|
602
|
+
result.print_summary()
|
|
603
|
+
|
|
604
|
+
# Calculate totals
|
|
605
|
+
total_errors = sum(len([i for i in r.issues if i.severity == "ERROR"]) for r in all_results)
|
|
606
|
+
total_warnings = sum(len([i for i in r.issues if i.severity == "WARNING"]) for r in all_results)
|
|
607
|
+
|
|
608
|
+
# Output based on results
|
|
609
|
+
if total_errors == 0 and total_warnings == 0:
|
|
610
|
+
if args.verbose:
|
|
611
|
+
print(f"\n{'='*60}")
|
|
612
|
+
print(f"✅ All {len(schema_files)} schemas validated successfully!")
|
|
613
|
+
else:
|
|
614
|
+
print("✅") # Just a checkmark for complete success
|
|
615
|
+
|
|
616
|
+
# Report unknown prefixes even on success
|
|
617
|
+
evaluator.report_unknown_prefixes()
|
|
618
|
+
return 0
|
|
619
|
+
else:
|
|
620
|
+
# Show errors and warnings
|
|
621
|
+
if not args.verbose:
|
|
622
|
+
# Concise error listing
|
|
623
|
+
if total_errors > 0:
|
|
624
|
+
print(f"❌ Validation failed with {total_errors} error(s) in {sum(1 for r in all_results if r.has_errors())} file(s)\n")
|
|
625
|
+
print("ERRORS:")
|
|
626
|
+
for result in all_results:
|
|
627
|
+
errors = [i for i in result.issues if i.severity == "ERROR"]
|
|
628
|
+
if errors:
|
|
629
|
+
for issue in errors:
|
|
630
|
+
schema_name = result.schema_path.name if hasattr(result, 'schema_path') else 'unknown'
|
|
631
|
+
print(f" • {schema_name}:{issue.enum_name}.{issue.value_name}: {issue.message}")
|
|
632
|
+
if issue.meaning:
|
|
633
|
+
print(f" Fix: Check CURIE {issue.meaning}")
|
|
634
|
+
|
|
635
|
+
if total_warnings > 0 and not args.strict:
|
|
636
|
+
print(f"\n⚠️ {total_warnings} warning(s) in {sum(1 for r in all_results if r.has_warnings())} file(s)")
|
|
637
|
+
# Show first few warnings
|
|
638
|
+
warning_count = 0
|
|
639
|
+
for result in all_results:
|
|
640
|
+
warnings = [i for i in result.issues if i.severity == "WARNING"]
|
|
641
|
+
for issue in warnings:
|
|
642
|
+
if warning_count < 100:
|
|
643
|
+
schema_name = result.schema_path.name if hasattr(result, 'schema_path') else 'unknown'
|
|
644
|
+
id_info = f" [{issue.meaning}]" if issue.meaning else ""
|
|
645
|
+
print(f" • {schema_name}:{issue.enum_name}.{issue.value_name}{id_info}: {issue.message}")
|
|
646
|
+
warning_count += 1
|
|
647
|
+
else:
|
|
648
|
+
break
|
|
649
|
+
if warning_count >= 100:
|
|
650
|
+
break
|
|
651
|
+
if total_warnings > 100:
|
|
652
|
+
print(f" ... and {total_warnings - 100} more warnings")
|
|
653
|
+
else:
|
|
654
|
+
# Verbose output
|
|
655
|
+
print(f"\n{'='*60}")
|
|
656
|
+
print(f"Overall: {total_errors} errors, {total_warnings} warnings in {len(schema_files)} files")
|
|
657
|
+
|
|
658
|
+
# Report unknown prefixes
|
|
659
|
+
evaluator.report_unknown_prefixes()
|
|
660
|
+
|
|
661
|
+
return 1 if total_errors > 0 or (args.strict and total_warnings > 0) else 0
|
|
662
|
+
else:
|
|
663
|
+
print(f"Error: {args.path} is not a file or directory")
|
|
664
|
+
return 1
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
if __name__ == "__main__":
|
|
668
|
+
import sys
|
|
669
|
+
sys.exit(main())
|