valuesets 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valuesets might be problematic. Click here for more details.
- valuesets/__init__.py +7 -0
- valuesets/_version.py +8 -0
- valuesets/datamodel/valuesets.py +13796 -0
- valuesets/datamodel/valuesets_dataclass.py +24503 -0
- valuesets/datamodel/valuesets_pydantic.py +13796 -0
- valuesets/enums/__init__.py +590 -0
- valuesets/enums/academic/__init__.py +1 -0
- valuesets/enums/academic/research.py +559 -0
- valuesets/enums/analytical_chemistry/__init__.py +1 -0
- valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
- valuesets/enums/bio/__init__.py +1 -0
- valuesets/enums/bio/biological_colors.py +238 -0
- valuesets/enums/bio/cell_cycle.py +180 -0
- valuesets/enums/bio/currency_chemicals.py +52 -0
- valuesets/enums/bio/developmental_stages.py +103 -0
- valuesets/enums/bio/genome_features.py +182 -0
- valuesets/enums/bio/genomics.py +91 -0
- valuesets/enums/bio/go_aspect.py +32 -0
- valuesets/enums/bio/go_causality.py +58 -0
- valuesets/enums/bio/go_evidence.py +129 -0
- valuesets/enums/bio/human_developmental_stages.py +62 -0
- valuesets/enums/bio/insdc_geographic_locations.py +591 -0
- valuesets/enums/bio/insdc_missing_values.py +49 -0
- valuesets/enums/bio/lipid_categories.py +67 -0
- valuesets/enums/bio/mouse_developmental_stages.py +62 -0
- valuesets/enums/bio/plant_biology.py +86 -0
- valuesets/enums/bio/plant_developmental_stages.py +54 -0
- valuesets/enums/bio/plant_sex.py +81 -0
- valuesets/enums/bio/protein_evidence.py +61 -0
- valuesets/enums/bio/proteomics_standards.py +123 -0
- valuesets/enums/bio/psi_mi.py +306 -0
- valuesets/enums/bio/relationship_to_oxygen.py +37 -0
- valuesets/enums/bio/sequence_alphabets.py +449 -0
- valuesets/enums/bio/sequence_chemistry.py +357 -0
- valuesets/enums/bio/sequencing_platforms.py +302 -0
- valuesets/enums/bio/structural_biology.py +320 -0
- valuesets/enums/bio/taxonomy.py +238 -0
- valuesets/enums/bio/trophic_levels.py +85 -0
- valuesets/enums/bio/uniprot_species.py +344 -0
- valuesets/enums/bio/viral_genome_types.py +47 -0
- valuesets/enums/bioprocessing/__init__.py +1 -0
- valuesets/enums/bioprocessing/scale_up.py +249 -0
- valuesets/enums/business/__init__.py +1 -0
- valuesets/enums/business/human_resources.py +275 -0
- valuesets/enums/business/industry_classifications.py +181 -0
- valuesets/enums/business/management_operations.py +228 -0
- valuesets/enums/business/organizational_structures.py +236 -0
- valuesets/enums/business/quality_management.py +181 -0
- valuesets/enums/business/supply_chain.py +232 -0
- valuesets/enums/chemistry/__init__.py +1 -0
- valuesets/enums/chemistry/chemical_entities.py +315 -0
- valuesets/enums/chemistry/reaction_directionality.py +65 -0
- valuesets/enums/chemistry/reactions.py +256 -0
- valuesets/enums/clinical/__init__.py +1 -0
- valuesets/enums/clinical/nih_demographics.py +177 -0
- valuesets/enums/clinical/phenopackets.py +254 -0
- valuesets/enums/common_value_sets.py +8791 -0
- valuesets/enums/computing/__init__.py +1 -0
- valuesets/enums/computing/file_formats.py +294 -0
- valuesets/enums/computing/maturity_levels.py +196 -0
- valuesets/enums/computing/mime_types.py +227 -0
- valuesets/enums/confidence_levels.py +168 -0
- valuesets/enums/contributor.py +30 -0
- valuesets/enums/core.py +42 -0
- valuesets/enums/data/__init__.py +1 -0
- valuesets/enums/data/data_absent_reason.py +53 -0
- valuesets/enums/data_science/__init__.py +1 -0
- valuesets/enums/data_science/binary_classification.py +87 -0
- valuesets/enums/data_science/emotion_classification.py +66 -0
- valuesets/enums/data_science/priority_severity.py +73 -0
- valuesets/enums/data_science/quality_control.py +46 -0
- valuesets/enums/data_science/sentiment_analysis.py +50 -0
- valuesets/enums/data_science/text_classification.py +97 -0
- valuesets/enums/demographics.py +206 -0
- valuesets/enums/ecological_interactions.py +151 -0
- valuesets/enums/energy/__init__.py +1 -0
- valuesets/enums/energy/energy.py +343 -0
- valuesets/enums/energy/fossil_fuels.py +29 -0
- valuesets/enums/energy/nuclear/__init__.py +1 -0
- valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
- valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
- valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
- valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
- valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
- valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
- valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
- valuesets/enums/energy/nuclear/reactor_types.py +163 -0
- valuesets/enums/environmental_health/__init__.py +1 -0
- valuesets/enums/environmental_health/exposures.py +265 -0
- valuesets/enums/geography/__init__.py +1 -0
- valuesets/enums/geography/geographic_codes.py +741 -0
- valuesets/enums/health/__init__.py +12 -0
- valuesets/enums/health/vaccination.py +98 -0
- valuesets/enums/health.py +36 -0
- valuesets/enums/health_base.py +36 -0
- valuesets/enums/healthcare.py +45 -0
- valuesets/enums/industry/__init__.py +1 -0
- valuesets/enums/industry/extractive_industry.py +94 -0
- valuesets/enums/industry/mining.py +388 -0
- valuesets/enums/industry/safety_colors.py +201 -0
- valuesets/enums/investigation.py +27 -0
- valuesets/enums/materials_science/__init__.py +1 -0
- valuesets/enums/materials_science/characterization_methods.py +112 -0
- valuesets/enums/materials_science/crystal_structures.py +76 -0
- valuesets/enums/materials_science/material_properties.py +119 -0
- valuesets/enums/materials_science/material_types.py +104 -0
- valuesets/enums/materials_science/pigments_dyes.py +198 -0
- valuesets/enums/materials_science/synthesis_methods.py +109 -0
- valuesets/enums/medical/__init__.py +1 -0
- valuesets/enums/medical/clinical.py +277 -0
- valuesets/enums/medical/neuroimaging.py +119 -0
- valuesets/enums/mining_processing.py +302 -0
- valuesets/enums/physics/__init__.py +1 -0
- valuesets/enums/physics/states_of_matter.py +46 -0
- valuesets/enums/social/__init__.py +1 -0
- valuesets/enums/social/person_status.py +29 -0
- valuesets/enums/spatial/__init__.py +1 -0
- valuesets/enums/spatial/spatial_qualifiers.py +246 -0
- valuesets/enums/statistics/__init__.py +5 -0
- valuesets/enums/statistics/prediction_outcomes.py +31 -0
- valuesets/enums/statistics.py +31 -0
- valuesets/enums/time/__init__.py +1 -0
- valuesets/enums/time/temporal.py +254 -0
- valuesets/enums/units/__init__.py +1 -0
- valuesets/enums/units/measurements.py +310 -0
- valuesets/enums/visual/__init__.py +1 -0
- valuesets/enums/visual/colors.py +376 -0
- valuesets/generators/__init__.py +19 -0
- valuesets/generators/auto_slot_injector.py +280 -0
- valuesets/generators/enhanced_pydantic_generator.py +100 -0
- valuesets/generators/enum_slot_generator.py +201 -0
- valuesets/generators/modular_rich_generator.py +353 -0
- valuesets/generators/prefix_standardizer.py +198 -0
- valuesets/generators/rich_enum.py +127 -0
- valuesets/generators/rich_pydantic_generator.py +310 -0
- valuesets/generators/smart_slot_syncer.py +428 -0
- valuesets/generators/sssom_generator.py +394 -0
- valuesets/merged/merged_hierarchy.yaml +21649 -0
- valuesets/schema/README.md +3 -0
- valuesets/schema/academic/research.yaml +911 -0
- valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
- valuesets/schema/bio/bio_entities.yaml +364 -0
- valuesets/schema/bio/biological_colors.yaml +434 -0
- valuesets/schema/bio/cell_cycle.yaml +309 -0
- valuesets/schema/bio/currency_chemicals.yaml +70 -0
- valuesets/schema/bio/developmental_stages.yaml +226 -0
- valuesets/schema/bio/genome_features.yaml +342 -0
- valuesets/schema/bio/genomics.yaml +101 -0
- valuesets/schema/bio/go_aspect.yaml +39 -0
- valuesets/schema/bio/go_causality.yaml +119 -0
- valuesets/schema/bio/go_evidence.yaml +215 -0
- valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
- valuesets/schema/bio/insdc_missing_values.yaml +85 -0
- valuesets/schema/bio/lipid_categories.yaml +72 -0
- valuesets/schema/bio/plant_biology.yaml +125 -0
- valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
- valuesets/schema/bio/plant_sex.yaml +108 -0
- valuesets/schema/bio/protein_evidence.yaml +63 -0
- valuesets/schema/bio/proteomics_standards.yaml +116 -0
- valuesets/schema/bio/psi_mi.yaml +400 -0
- valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
- valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
- valuesets/schema/bio/sequence_chemistry.yaml +477 -0
- valuesets/schema/bio/sequencing_platforms.yaml +515 -0
- valuesets/schema/bio/structural_biology.yaml +428 -0
- valuesets/schema/bio/taxonomy.yaml +453 -0
- valuesets/schema/bio/trophic_levels.yaml +118 -0
- valuesets/schema/bio/uniprot_species.yaml +1209 -0
- valuesets/schema/bio/viral_genome_types.yaml +99 -0
- valuesets/schema/bioprocessing/scale_up.yaml +458 -0
- valuesets/schema/business/human_resources.yaml +752 -0
- valuesets/schema/business/industry_classifications.yaml +448 -0
- valuesets/schema/business/management_operations.yaml +602 -0
- valuesets/schema/business/organizational_structures.yaml +645 -0
- valuesets/schema/business/quality_management.yaml +502 -0
- valuesets/schema/business/supply_chain.yaml +688 -0
- valuesets/schema/chemistry/chemical_entities.yaml +639 -0
- valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
- valuesets/schema/chemistry/reactions.yaml +442 -0
- valuesets/schema/clinical/nih_demographics.yaml +285 -0
- valuesets/schema/clinical/phenopackets.yaml +429 -0
- valuesets/schema/computing/file_formats.yaml +631 -0
- valuesets/schema/computing/maturity_levels.yaml +229 -0
- valuesets/schema/computing/mime_types.yaml +266 -0
- valuesets/schema/confidence_levels.yaml +206 -0
- valuesets/schema/contributor.yaml +30 -0
- valuesets/schema/core.yaml +55 -0
- valuesets/schema/data/data_absent_reason.yaml +82 -0
- valuesets/schema/data_science/binary_classification.yaml +125 -0
- valuesets/schema/data_science/emotion_classification.yaml +109 -0
- valuesets/schema/data_science/priority_severity.yaml +122 -0
- valuesets/schema/data_science/quality_control.yaml +68 -0
- valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
- valuesets/schema/data_science/text_classification.yaml +135 -0
- valuesets/schema/demographics.yaml +238 -0
- valuesets/schema/ecological_interactions.yaml +298 -0
- valuesets/schema/energy/energy.yaml +595 -0
- valuesets/schema/energy/fossil_fuels.yaml +28 -0
- valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
- valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
- valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
- valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
- valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
- valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
- valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
- valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
- valuesets/schema/environmental_health/exposures.yaml +355 -0
- valuesets/schema/generated_slots.yaml +1828 -0
- valuesets/schema/geography/geographic_codes.yaml +1018 -0
- valuesets/schema/health/vaccination.yaml +102 -0
- valuesets/schema/health.yaml +38 -0
- valuesets/schema/healthcare.yaml +53 -0
- valuesets/schema/industry/extractive_industry.yaml +89 -0
- valuesets/schema/industry/mining.yaml +888 -0
- valuesets/schema/industry/safety_colors.yaml +375 -0
- valuesets/schema/investigation.yaml +64 -0
- valuesets/schema/materials_science/characterization_methods.yaml +193 -0
- valuesets/schema/materials_science/crystal_structures.yaml +138 -0
- valuesets/schema/materials_science/material_properties.yaml +135 -0
- valuesets/schema/materials_science/material_types.yaml +151 -0
- valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
- valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
- valuesets/schema/medical/clinical.yaml +610 -0
- valuesets/schema/medical/neuroimaging.yaml +325 -0
- valuesets/schema/mining_processing.yaml +295 -0
- valuesets/schema/physics/states_of_matter.yaml +46 -0
- valuesets/schema/slot_mixins.yaml +143 -0
- valuesets/schema/social/person_status.yaml +28 -0
- valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
- valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
- valuesets/schema/statistics.yaml +34 -0
- valuesets/schema/time/temporal.yaml +435 -0
- valuesets/schema/types.yaml +15 -0
- valuesets/schema/units/measurements.yaml +675 -0
- valuesets/schema/valuesets.yaml +100 -0
- valuesets/schema/visual/colors.yaml +778 -0
- valuesets/utils/__init__.py +6 -0
- valuesets/utils/comparison.py +102 -0
- valuesets/utils/expand_dynamic_enums.py +414 -0
- valuesets/utils/mapping_utils.py +236 -0
- valuesets/validators/__init__.py +11 -0
- valuesets/validators/enum_evaluator.py +669 -0
- valuesets/validators/oak_config.yaml +70 -0
- valuesets/validators/validate_with_ols.py +241 -0
- valuesets-0.3.1.dist-info/METADATA +395 -0
- valuesets-0.3.1.dist-info/RECORD +248 -0
- valuesets-0.3.1.dist-info/WHEEL +4 -0
- valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
SSSOM TSV generator for LinkML enum mappings.
|
|
4
|
+
|
|
5
|
+
Generates Simple Standard for Sharing Ontological Mappings (SSSOM) TSV files
|
|
6
|
+
from LinkML enum definitions with ontology mappings.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import csv
|
|
10
|
+
import logging
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import List, Dict, Optional, Any
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
|
|
15
|
+
from linkml_runtime.utils.schemaview import SchemaView
|
|
16
|
+
from linkml_runtime.linkml_model import EnumDefinition, PermissibleValue
|
|
17
|
+
|
|
18
|
+
# Import shared mapping utilities
|
|
19
|
+
try:
|
|
20
|
+
from ..utils.mapping_utils import extract_all_mappings, deduplicate_mappings
|
|
21
|
+
except ImportError:
|
|
22
|
+
# Fallback for running as script
|
|
23
|
+
import sys
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
26
|
+
from valuesets.utils.mapping_utils import extract_all_mappings, deduplicate_mappings
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
from oaklib import get_adapter
|
|
30
|
+
HAS_OAK = True
|
|
31
|
+
except ImportError:
|
|
32
|
+
HAS_OAK = False
|
|
33
|
+
|
|
34
|
+
logging.basicConfig(level=logging.INFO)
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
# SSSOM required prefixes
|
|
38
|
+
SSSOM_PREFIXES = {
|
|
39
|
+
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
|
40
|
+
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
|
41
|
+
"owl": "http://www.w3.org/2002/07/owl#",
|
|
42
|
+
"skos": "http://www.w3.org/2004/02/skos/core#",
|
|
43
|
+
"sssom": "https://w3id.org/sssom/",
|
|
44
|
+
"dcterms": "http://purl.org/dc/terms/",
|
|
45
|
+
"semapv": "https://w3id.org/semapv/",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class SSSOMGenerator:
|
|
50
|
+
"""Generator for SSSOM TSV files from LinkML schemas."""
|
|
51
|
+
|
|
52
|
+
def __init__(self, oak_adapter_string: str = "sqlite:obo:", cache_labels: bool = True):
|
|
53
|
+
"""
|
|
54
|
+
Initialize the SSSOM generator.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
oak_adapter_string: OAK adapter configuration
|
|
58
|
+
cache_labels: Whether to cache ontology labels
|
|
59
|
+
"""
|
|
60
|
+
self.oak_adapter_string = oak_adapter_string
|
|
61
|
+
self._label_cache = {} if cache_labels else None
|
|
62
|
+
self._per_prefix_adapters = {}
|
|
63
|
+
self._initialize_oak()
|
|
64
|
+
|
|
65
|
+
def _initialize_oak(self):
|
|
66
|
+
"""Initialize OAK for label lookups."""
|
|
67
|
+
if not HAS_OAK:
|
|
68
|
+
logger.warning("OAK not installed - labels will not be retrieved")
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
# Similar to enum_evaluator, we'll create adapters on demand
|
|
72
|
+
if self.oak_adapter_string == "sqlite:obo:":
|
|
73
|
+
logger.info("Using dynamic OAK adapter selection")
|
|
74
|
+
else:
|
|
75
|
+
try:
|
|
76
|
+
self._per_prefix_adapters['_default'] = get_adapter(self.oak_adapter_string)
|
|
77
|
+
logger.info(f"Initialized OAK adapter: {self.oak_adapter_string}")
|
|
78
|
+
except Exception as e:
|
|
79
|
+
logger.warning(f"Could not initialize OAK: {e}")
|
|
80
|
+
|
|
81
|
+
def get_ontology_label(self, curie: str) -> Optional[str]:
|
|
82
|
+
"""Get label for an ontology term."""
|
|
83
|
+
if not HAS_OAK:
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
# Check cache
|
|
87
|
+
if self._label_cache is not None and curie in self._label_cache:
|
|
88
|
+
return self._label_cache[curie]
|
|
89
|
+
|
|
90
|
+
label = None
|
|
91
|
+
prefix = curie.split(":")[0].lower() if ":" in curie else None
|
|
92
|
+
|
|
93
|
+
# Get or create adapter
|
|
94
|
+
if self.oak_adapter_string == "sqlite:obo:" and prefix:
|
|
95
|
+
if prefix not in self._per_prefix_adapters:
|
|
96
|
+
try:
|
|
97
|
+
adapter_string = f"sqlite:obo:{prefix}"
|
|
98
|
+
self._per_prefix_adapters[prefix] = get_adapter(adapter_string)
|
|
99
|
+
logger.debug(f"Created adapter for {prefix}")
|
|
100
|
+
except:
|
|
101
|
+
# Try merged as fallback
|
|
102
|
+
try:
|
|
103
|
+
self._per_prefix_adapters[prefix] = get_adapter("sqlite:obo:merged")
|
|
104
|
+
except:
|
|
105
|
+
self._per_prefix_adapters[prefix] = None
|
|
106
|
+
|
|
107
|
+
adapter = self._per_prefix_adapters.get(prefix)
|
|
108
|
+
else:
|
|
109
|
+
adapter = self._per_prefix_adapters.get('_default')
|
|
110
|
+
|
|
111
|
+
# Get label
|
|
112
|
+
if adapter:
|
|
113
|
+
try:
|
|
114
|
+
label = adapter.label(curie)
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logger.debug(f"Could not get label for {curie}: {e}")
|
|
117
|
+
|
|
118
|
+
# Cache result
|
|
119
|
+
if self._label_cache is not None:
|
|
120
|
+
self._label_cache[curie] = label
|
|
121
|
+
|
|
122
|
+
return label
|
|
123
|
+
|
|
124
|
+
def generate_mappings(self, schema_path: Path) -> List[Dict[str, Any]]:
|
|
125
|
+
"""
|
|
126
|
+
Generate SSSOM mappings from a LinkML schema.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
schema_path: Path to LinkML schema file
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
List of mapping dictionaries
|
|
133
|
+
"""
|
|
134
|
+
mappings = []
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
sv = SchemaView(str(schema_path))
|
|
138
|
+
schema_id = sv.schema.id or str(schema_path)
|
|
139
|
+
|
|
140
|
+
# Process each enum
|
|
141
|
+
for enum_name, enum_def in sv.all_enums().items():
|
|
142
|
+
if not enum_def.permissible_values:
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
# Build enum URI
|
|
146
|
+
if sv.schema.default_prefix:
|
|
147
|
+
prefix = sv.schema.default_prefix
|
|
148
|
+
enum_uri = f"{prefix}:{enum_name}"
|
|
149
|
+
else:
|
|
150
|
+
enum_uri = f"{schema_id}#{enum_name}"
|
|
151
|
+
|
|
152
|
+
# Process each permissible value
|
|
153
|
+
for value_name, pv in enum_def.permissible_values.items():
|
|
154
|
+
# Extract all mappings using shared utility
|
|
155
|
+
pv_mappings = extract_all_mappings(pv, include_meaning=True, include_annotations=True)
|
|
156
|
+
|
|
157
|
+
# Skip if no mappings
|
|
158
|
+
if not pv_mappings:
|
|
159
|
+
continue
|
|
160
|
+
|
|
161
|
+
# Deduplicate mappings
|
|
162
|
+
pv_mappings = deduplicate_mappings(pv_mappings)
|
|
163
|
+
|
|
164
|
+
# Build subject URI
|
|
165
|
+
subject_id = f"{enum_uri}.{value_name}"
|
|
166
|
+
|
|
167
|
+
# Process each mapping
|
|
168
|
+
for object_id, predicate, mapping_comment in pv_mappings:
|
|
169
|
+
# Get object label
|
|
170
|
+
object_label = self.get_ontology_label(object_id)
|
|
171
|
+
|
|
172
|
+
# Build comment
|
|
173
|
+
comment_parts = []
|
|
174
|
+
if pv.description:
|
|
175
|
+
comment_parts.append(pv.description)
|
|
176
|
+
if mapping_comment:
|
|
177
|
+
comment_parts.append(mapping_comment)
|
|
178
|
+
comment = "; ".join(comment_parts)
|
|
179
|
+
|
|
180
|
+
# Determine confidence based on predicate
|
|
181
|
+
confidence = 1.0
|
|
182
|
+
if predicate == "skos:closeMatch":
|
|
183
|
+
confidence = 0.9
|
|
184
|
+
elif predicate == "skos:narrowMatch" or predicate == "skos:broadMatch":
|
|
185
|
+
confidence = 0.8
|
|
186
|
+
elif predicate == "skos:relatedMatch":
|
|
187
|
+
confidence = 0.7
|
|
188
|
+
|
|
189
|
+
# Create mapping
|
|
190
|
+
mapping = {
|
|
191
|
+
"subject_id": subject_id,
|
|
192
|
+
"subject_label": pv.title or value_name,
|
|
193
|
+
"predicate_id": predicate,
|
|
194
|
+
"object_id": object_id,
|
|
195
|
+
"object_label": object_label or "",
|
|
196
|
+
"mapping_justification": "semapv:ManualMappingCuration",
|
|
197
|
+
"subject_source": schema_id,
|
|
198
|
+
"object_source": self._extract_ontology_source(object_id),
|
|
199
|
+
"mapping_tool": "linkml-valuesets",
|
|
200
|
+
"confidence": confidence,
|
|
201
|
+
"subject_type": "enum_value",
|
|
202
|
+
"object_type": "ontology_class",
|
|
203
|
+
"comment": comment
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
mappings.append(mapping)
|
|
207
|
+
|
|
208
|
+
except Exception as e:
|
|
209
|
+
logger.error(f"Error processing schema {schema_path}: {e}")
|
|
210
|
+
|
|
211
|
+
return mappings
|
|
212
|
+
|
|
213
|
+
def _extract_ontology_source(self, curie: str) -> str:
|
|
214
|
+
"""Extract ontology source from CURIE."""
|
|
215
|
+
if ":" in curie:
|
|
216
|
+
prefix = curie.split(":")[0]
|
|
217
|
+
# Map common prefixes to ontology names
|
|
218
|
+
ontology_map = {
|
|
219
|
+
"NCIT": "ncit",
|
|
220
|
+
"CHEBI": "chebi",
|
|
221
|
+
"GO": "go",
|
|
222
|
+
"UBERON": "uberon",
|
|
223
|
+
"HP": "hp",
|
|
224
|
+
"MONDO": "mondo",
|
|
225
|
+
"ENVO": "envo",
|
|
226
|
+
"OBI": "obi",
|
|
227
|
+
"SNOMED": "snomed",
|
|
228
|
+
"LOINC": "loinc",
|
|
229
|
+
"MSIO": "msio",
|
|
230
|
+
"mesh": "mesh",
|
|
231
|
+
"IAO": "iao",
|
|
232
|
+
"FABIO": "fabio",
|
|
233
|
+
"PATO": "pato",
|
|
234
|
+
"GENO": "geno",
|
|
235
|
+
"GSSO": "gsso",
|
|
236
|
+
"MS": "ms",
|
|
237
|
+
"CRediT": "credit",
|
|
238
|
+
"TIME": "time",
|
|
239
|
+
"greg": "gregorian"
|
|
240
|
+
}
|
|
241
|
+
return ontology_map.get(prefix, prefix.lower())
|
|
242
|
+
return ""
|
|
243
|
+
|
|
244
|
+
def write_sssom_tsv(self, mappings: List[Dict[str, Any]], output_path: Path,
|
|
245
|
+
metadata: Optional[Dict[str, str]] = None):
|
|
246
|
+
"""
|
|
247
|
+
Write mappings to SSSOM TSV file.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
mappings: List of mapping dictionaries
|
|
251
|
+
output_path: Output file path
|
|
252
|
+
metadata: Optional metadata for SSSOM header
|
|
253
|
+
"""
|
|
254
|
+
if not mappings:
|
|
255
|
+
logger.warning("No mappings to write")
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
# Prepare metadata
|
|
259
|
+
meta = metadata or {}
|
|
260
|
+
meta.setdefault("mapping_set_id", f"https://w3id.org/linkml/valuesets/mappings")
|
|
261
|
+
meta.setdefault("mapping_set_version", datetime.now().strftime("%Y-%m-%d"))
|
|
262
|
+
meta.setdefault("license", "https://creativecommons.org/publicdomain/zero/1.0/")
|
|
263
|
+
meta.setdefault("creator_id", "https://github.com/linkml/linkml-valuesets")
|
|
264
|
+
|
|
265
|
+
with open(output_path, 'w', newline='') as f:
|
|
266
|
+
# Write metadata header
|
|
267
|
+
f.write("#curie_map:\n")
|
|
268
|
+
for prefix, uri in SSSOM_PREFIXES.items():
|
|
269
|
+
f.write(f"# {prefix}: \"{uri}\"\n")
|
|
270
|
+
f.write("#\n")
|
|
271
|
+
|
|
272
|
+
for key, value in meta.items():
|
|
273
|
+
f.write(f"#{key}: {value}\n")
|
|
274
|
+
f.write("#\n")
|
|
275
|
+
|
|
276
|
+
# Define field order (SSSOM standard order)
|
|
277
|
+
fieldnames = [
|
|
278
|
+
"subject_id",
|
|
279
|
+
"subject_label",
|
|
280
|
+
"predicate_id",
|
|
281
|
+
"object_id",
|
|
282
|
+
"object_label",
|
|
283
|
+
"mapping_justification",
|
|
284
|
+
"subject_source",
|
|
285
|
+
"object_source",
|
|
286
|
+
"mapping_tool",
|
|
287
|
+
"confidence",
|
|
288
|
+
"subject_type",
|
|
289
|
+
"object_type",
|
|
290
|
+
"comment"
|
|
291
|
+
]
|
|
292
|
+
|
|
293
|
+
# Write TSV data
|
|
294
|
+
writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter='\t',
|
|
295
|
+
extrasaction='ignore')
|
|
296
|
+
writer.writeheader()
|
|
297
|
+
writer.writerows(mappings)
|
|
298
|
+
|
|
299
|
+
logger.info(f"Wrote {len(mappings)} mappings to {output_path}")
|
|
300
|
+
|
|
301
|
+
def generate_from_directory(self, schema_dir: Path, output_path: Path,
|
|
302
|
+
metadata: Optional[Dict[str, str]] = None):
|
|
303
|
+
"""
|
|
304
|
+
Generate SSSOM TSV from all schemas in a directory.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
schema_dir: Directory containing LinkML schemas
|
|
308
|
+
output_path: Output TSV file path
|
|
309
|
+
metadata: Optional SSSOM metadata
|
|
310
|
+
"""
|
|
311
|
+
all_mappings = []
|
|
312
|
+
|
|
313
|
+
for schema_file in sorted(schema_dir.rglob("*.yaml")):
|
|
314
|
+
# Skip linkml model files
|
|
315
|
+
if "linkml_model" in str(schema_file):
|
|
316
|
+
continue
|
|
317
|
+
|
|
318
|
+
logger.info(f"Processing {schema_file.name}")
|
|
319
|
+
mappings = self.generate_mappings(schema_file)
|
|
320
|
+
all_mappings.extend(mappings)
|
|
321
|
+
|
|
322
|
+
self.write_sssom_tsv(all_mappings, output_path, metadata)
|
|
323
|
+
return all_mappings
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def main():
|
|
327
|
+
"""CLI entry point."""
|
|
328
|
+
import argparse
|
|
329
|
+
|
|
330
|
+
parser = argparse.ArgumentParser(
|
|
331
|
+
description="Generate SSSOM TSV from LinkML enum mappings"
|
|
332
|
+
)
|
|
333
|
+
parser.add_argument(
|
|
334
|
+
"input",
|
|
335
|
+
type=Path,
|
|
336
|
+
help="Input LinkML schema file or directory"
|
|
337
|
+
)
|
|
338
|
+
parser.add_argument(
|
|
339
|
+
"-o", "--output",
|
|
340
|
+
type=Path,
|
|
341
|
+
default=Path("mappings.sssom.tsv"),
|
|
342
|
+
help="Output SSSOM TSV file (default: mappings.sssom.tsv)"
|
|
343
|
+
)
|
|
344
|
+
parser.add_argument(
|
|
345
|
+
"--adapter",
|
|
346
|
+
default="sqlite:obo:",
|
|
347
|
+
help="OAK adapter string for label lookups"
|
|
348
|
+
)
|
|
349
|
+
parser.add_argument(
|
|
350
|
+
"--no-labels",
|
|
351
|
+
action="store_true",
|
|
352
|
+
help="Skip ontology label lookups"
|
|
353
|
+
)
|
|
354
|
+
parser.add_argument(
|
|
355
|
+
"--mapping-set-id",
|
|
356
|
+
help="Mapping set ID for SSSOM metadata"
|
|
357
|
+
)
|
|
358
|
+
parser.add_argument(
|
|
359
|
+
"--license",
|
|
360
|
+
help="License URL for SSSOM metadata"
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
args = parser.parse_args()
|
|
364
|
+
|
|
365
|
+
# Create generator
|
|
366
|
+
if args.no_labels:
|
|
367
|
+
generator = SSSOMGenerator(oak_adapter_string=None)
|
|
368
|
+
else:
|
|
369
|
+
generator = SSSOMGenerator(oak_adapter_string=args.adapter)
|
|
370
|
+
|
|
371
|
+
# Prepare metadata
|
|
372
|
+
metadata = {}
|
|
373
|
+
if args.mapping_set_id:
|
|
374
|
+
metadata["mapping_set_id"] = args.mapping_set_id
|
|
375
|
+
if args.license:
|
|
376
|
+
metadata["license"] = args.license
|
|
377
|
+
|
|
378
|
+
# Generate mappings
|
|
379
|
+
if args.input.is_file():
|
|
380
|
+
mappings = generator.generate_mappings(args.input)
|
|
381
|
+
generator.write_sssom_tsv(mappings, args.output, metadata)
|
|
382
|
+
elif args.input.is_dir():
|
|
383
|
+
generator.generate_from_directory(args.input, args.output, metadata)
|
|
384
|
+
else:
|
|
385
|
+
print(f"Error: {args.input} is not a file or directory")
|
|
386
|
+
return 1
|
|
387
|
+
|
|
388
|
+
print(f"Generated SSSOM TSV: {args.output}")
|
|
389
|
+
return 0
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
if __name__ == "__main__":
|
|
393
|
+
import sys
|
|
394
|
+
sys.exit(main())
|