valuesets 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valuesets might be problematic. Click here for more details.
- valuesets/__init__.py +7 -0
- valuesets/_version.py +8 -0
- valuesets/datamodel/valuesets.py +13796 -0
- valuesets/datamodel/valuesets_dataclass.py +24503 -0
- valuesets/datamodel/valuesets_pydantic.py +13796 -0
- valuesets/enums/__init__.py +590 -0
- valuesets/enums/academic/__init__.py +1 -0
- valuesets/enums/academic/research.py +559 -0
- valuesets/enums/analytical_chemistry/__init__.py +1 -0
- valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
- valuesets/enums/bio/__init__.py +1 -0
- valuesets/enums/bio/biological_colors.py +238 -0
- valuesets/enums/bio/cell_cycle.py +180 -0
- valuesets/enums/bio/currency_chemicals.py +52 -0
- valuesets/enums/bio/developmental_stages.py +103 -0
- valuesets/enums/bio/genome_features.py +182 -0
- valuesets/enums/bio/genomics.py +91 -0
- valuesets/enums/bio/go_aspect.py +32 -0
- valuesets/enums/bio/go_causality.py +58 -0
- valuesets/enums/bio/go_evidence.py +129 -0
- valuesets/enums/bio/human_developmental_stages.py +62 -0
- valuesets/enums/bio/insdc_geographic_locations.py +591 -0
- valuesets/enums/bio/insdc_missing_values.py +49 -0
- valuesets/enums/bio/lipid_categories.py +67 -0
- valuesets/enums/bio/mouse_developmental_stages.py +62 -0
- valuesets/enums/bio/plant_biology.py +86 -0
- valuesets/enums/bio/plant_developmental_stages.py +54 -0
- valuesets/enums/bio/plant_sex.py +81 -0
- valuesets/enums/bio/protein_evidence.py +61 -0
- valuesets/enums/bio/proteomics_standards.py +123 -0
- valuesets/enums/bio/psi_mi.py +306 -0
- valuesets/enums/bio/relationship_to_oxygen.py +37 -0
- valuesets/enums/bio/sequence_alphabets.py +449 -0
- valuesets/enums/bio/sequence_chemistry.py +357 -0
- valuesets/enums/bio/sequencing_platforms.py +302 -0
- valuesets/enums/bio/structural_biology.py +320 -0
- valuesets/enums/bio/taxonomy.py +238 -0
- valuesets/enums/bio/trophic_levels.py +85 -0
- valuesets/enums/bio/uniprot_species.py +344 -0
- valuesets/enums/bio/viral_genome_types.py +47 -0
- valuesets/enums/bioprocessing/__init__.py +1 -0
- valuesets/enums/bioprocessing/scale_up.py +249 -0
- valuesets/enums/business/__init__.py +1 -0
- valuesets/enums/business/human_resources.py +275 -0
- valuesets/enums/business/industry_classifications.py +181 -0
- valuesets/enums/business/management_operations.py +228 -0
- valuesets/enums/business/organizational_structures.py +236 -0
- valuesets/enums/business/quality_management.py +181 -0
- valuesets/enums/business/supply_chain.py +232 -0
- valuesets/enums/chemistry/__init__.py +1 -0
- valuesets/enums/chemistry/chemical_entities.py +315 -0
- valuesets/enums/chemistry/reaction_directionality.py +65 -0
- valuesets/enums/chemistry/reactions.py +256 -0
- valuesets/enums/clinical/__init__.py +1 -0
- valuesets/enums/clinical/nih_demographics.py +177 -0
- valuesets/enums/clinical/phenopackets.py +254 -0
- valuesets/enums/common_value_sets.py +8791 -0
- valuesets/enums/computing/__init__.py +1 -0
- valuesets/enums/computing/file_formats.py +294 -0
- valuesets/enums/computing/maturity_levels.py +196 -0
- valuesets/enums/computing/mime_types.py +227 -0
- valuesets/enums/confidence_levels.py +168 -0
- valuesets/enums/contributor.py +30 -0
- valuesets/enums/core.py +42 -0
- valuesets/enums/data/__init__.py +1 -0
- valuesets/enums/data/data_absent_reason.py +53 -0
- valuesets/enums/data_science/__init__.py +1 -0
- valuesets/enums/data_science/binary_classification.py +87 -0
- valuesets/enums/data_science/emotion_classification.py +66 -0
- valuesets/enums/data_science/priority_severity.py +73 -0
- valuesets/enums/data_science/quality_control.py +46 -0
- valuesets/enums/data_science/sentiment_analysis.py +50 -0
- valuesets/enums/data_science/text_classification.py +97 -0
- valuesets/enums/demographics.py +206 -0
- valuesets/enums/ecological_interactions.py +151 -0
- valuesets/enums/energy/__init__.py +1 -0
- valuesets/enums/energy/energy.py +343 -0
- valuesets/enums/energy/fossil_fuels.py +29 -0
- valuesets/enums/energy/nuclear/__init__.py +1 -0
- valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
- valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
- valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
- valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
- valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
- valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
- valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
- valuesets/enums/energy/nuclear/reactor_types.py +163 -0
- valuesets/enums/environmental_health/__init__.py +1 -0
- valuesets/enums/environmental_health/exposures.py +265 -0
- valuesets/enums/geography/__init__.py +1 -0
- valuesets/enums/geography/geographic_codes.py +741 -0
- valuesets/enums/health/__init__.py +12 -0
- valuesets/enums/health/vaccination.py +98 -0
- valuesets/enums/health.py +36 -0
- valuesets/enums/health_base.py +36 -0
- valuesets/enums/healthcare.py +45 -0
- valuesets/enums/industry/__init__.py +1 -0
- valuesets/enums/industry/extractive_industry.py +94 -0
- valuesets/enums/industry/mining.py +388 -0
- valuesets/enums/industry/safety_colors.py +201 -0
- valuesets/enums/investigation.py +27 -0
- valuesets/enums/materials_science/__init__.py +1 -0
- valuesets/enums/materials_science/characterization_methods.py +112 -0
- valuesets/enums/materials_science/crystal_structures.py +76 -0
- valuesets/enums/materials_science/material_properties.py +119 -0
- valuesets/enums/materials_science/material_types.py +104 -0
- valuesets/enums/materials_science/pigments_dyes.py +198 -0
- valuesets/enums/materials_science/synthesis_methods.py +109 -0
- valuesets/enums/medical/__init__.py +1 -0
- valuesets/enums/medical/clinical.py +277 -0
- valuesets/enums/medical/neuroimaging.py +119 -0
- valuesets/enums/mining_processing.py +302 -0
- valuesets/enums/physics/__init__.py +1 -0
- valuesets/enums/physics/states_of_matter.py +46 -0
- valuesets/enums/social/__init__.py +1 -0
- valuesets/enums/social/person_status.py +29 -0
- valuesets/enums/spatial/__init__.py +1 -0
- valuesets/enums/spatial/spatial_qualifiers.py +246 -0
- valuesets/enums/statistics/__init__.py +5 -0
- valuesets/enums/statistics/prediction_outcomes.py +31 -0
- valuesets/enums/statistics.py +31 -0
- valuesets/enums/time/__init__.py +1 -0
- valuesets/enums/time/temporal.py +254 -0
- valuesets/enums/units/__init__.py +1 -0
- valuesets/enums/units/measurements.py +310 -0
- valuesets/enums/visual/__init__.py +1 -0
- valuesets/enums/visual/colors.py +376 -0
- valuesets/generators/__init__.py +19 -0
- valuesets/generators/auto_slot_injector.py +280 -0
- valuesets/generators/enhanced_pydantic_generator.py +100 -0
- valuesets/generators/enum_slot_generator.py +201 -0
- valuesets/generators/modular_rich_generator.py +353 -0
- valuesets/generators/prefix_standardizer.py +198 -0
- valuesets/generators/rich_enum.py +127 -0
- valuesets/generators/rich_pydantic_generator.py +310 -0
- valuesets/generators/smart_slot_syncer.py +428 -0
- valuesets/generators/sssom_generator.py +394 -0
- valuesets/merged/merged_hierarchy.yaml +21649 -0
- valuesets/schema/README.md +3 -0
- valuesets/schema/academic/research.yaml +911 -0
- valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
- valuesets/schema/bio/bio_entities.yaml +364 -0
- valuesets/schema/bio/biological_colors.yaml +434 -0
- valuesets/schema/bio/cell_cycle.yaml +309 -0
- valuesets/schema/bio/currency_chemicals.yaml +70 -0
- valuesets/schema/bio/developmental_stages.yaml +226 -0
- valuesets/schema/bio/genome_features.yaml +342 -0
- valuesets/schema/bio/genomics.yaml +101 -0
- valuesets/schema/bio/go_aspect.yaml +39 -0
- valuesets/schema/bio/go_causality.yaml +119 -0
- valuesets/schema/bio/go_evidence.yaml +215 -0
- valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
- valuesets/schema/bio/insdc_missing_values.yaml +85 -0
- valuesets/schema/bio/lipid_categories.yaml +72 -0
- valuesets/schema/bio/plant_biology.yaml +125 -0
- valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
- valuesets/schema/bio/plant_sex.yaml +108 -0
- valuesets/schema/bio/protein_evidence.yaml +63 -0
- valuesets/schema/bio/proteomics_standards.yaml +116 -0
- valuesets/schema/bio/psi_mi.yaml +400 -0
- valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
- valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
- valuesets/schema/bio/sequence_chemistry.yaml +477 -0
- valuesets/schema/bio/sequencing_platforms.yaml +515 -0
- valuesets/schema/bio/structural_biology.yaml +428 -0
- valuesets/schema/bio/taxonomy.yaml +453 -0
- valuesets/schema/bio/trophic_levels.yaml +118 -0
- valuesets/schema/bio/uniprot_species.yaml +1209 -0
- valuesets/schema/bio/viral_genome_types.yaml +99 -0
- valuesets/schema/bioprocessing/scale_up.yaml +458 -0
- valuesets/schema/business/human_resources.yaml +752 -0
- valuesets/schema/business/industry_classifications.yaml +448 -0
- valuesets/schema/business/management_operations.yaml +602 -0
- valuesets/schema/business/organizational_structures.yaml +645 -0
- valuesets/schema/business/quality_management.yaml +502 -0
- valuesets/schema/business/supply_chain.yaml +688 -0
- valuesets/schema/chemistry/chemical_entities.yaml +639 -0
- valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
- valuesets/schema/chemistry/reactions.yaml +442 -0
- valuesets/schema/clinical/nih_demographics.yaml +285 -0
- valuesets/schema/clinical/phenopackets.yaml +429 -0
- valuesets/schema/computing/file_formats.yaml +631 -0
- valuesets/schema/computing/maturity_levels.yaml +229 -0
- valuesets/schema/computing/mime_types.yaml +266 -0
- valuesets/schema/confidence_levels.yaml +206 -0
- valuesets/schema/contributor.yaml +30 -0
- valuesets/schema/core.yaml +55 -0
- valuesets/schema/data/data_absent_reason.yaml +82 -0
- valuesets/schema/data_science/binary_classification.yaml +125 -0
- valuesets/schema/data_science/emotion_classification.yaml +109 -0
- valuesets/schema/data_science/priority_severity.yaml +122 -0
- valuesets/schema/data_science/quality_control.yaml +68 -0
- valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
- valuesets/schema/data_science/text_classification.yaml +135 -0
- valuesets/schema/demographics.yaml +238 -0
- valuesets/schema/ecological_interactions.yaml +298 -0
- valuesets/schema/energy/energy.yaml +595 -0
- valuesets/schema/energy/fossil_fuels.yaml +28 -0
- valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
- valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
- valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
- valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
- valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
- valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
- valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
- valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
- valuesets/schema/environmental_health/exposures.yaml +355 -0
- valuesets/schema/generated_slots.yaml +1828 -0
- valuesets/schema/geography/geographic_codes.yaml +1018 -0
- valuesets/schema/health/vaccination.yaml +102 -0
- valuesets/schema/health.yaml +38 -0
- valuesets/schema/healthcare.yaml +53 -0
- valuesets/schema/industry/extractive_industry.yaml +89 -0
- valuesets/schema/industry/mining.yaml +888 -0
- valuesets/schema/industry/safety_colors.yaml +375 -0
- valuesets/schema/investigation.yaml +64 -0
- valuesets/schema/materials_science/characterization_methods.yaml +193 -0
- valuesets/schema/materials_science/crystal_structures.yaml +138 -0
- valuesets/schema/materials_science/material_properties.yaml +135 -0
- valuesets/schema/materials_science/material_types.yaml +151 -0
- valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
- valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
- valuesets/schema/medical/clinical.yaml +610 -0
- valuesets/schema/medical/neuroimaging.yaml +325 -0
- valuesets/schema/mining_processing.yaml +295 -0
- valuesets/schema/physics/states_of_matter.yaml +46 -0
- valuesets/schema/slot_mixins.yaml +143 -0
- valuesets/schema/social/person_status.yaml +28 -0
- valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
- valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
- valuesets/schema/statistics.yaml +34 -0
- valuesets/schema/time/temporal.yaml +435 -0
- valuesets/schema/types.yaml +15 -0
- valuesets/schema/units/measurements.yaml +675 -0
- valuesets/schema/valuesets.yaml +100 -0
- valuesets/schema/visual/colors.yaml +778 -0
- valuesets/utils/__init__.py +6 -0
- valuesets/utils/comparison.py +102 -0
- valuesets/utils/expand_dynamic_enums.py +414 -0
- valuesets/utils/mapping_utils.py +236 -0
- valuesets/validators/__init__.py +11 -0
- valuesets/validators/enum_evaluator.py +669 -0
- valuesets/validators/oak_config.yaml +70 -0
- valuesets/validators/validate_with_ols.py +241 -0
- valuesets-0.3.1.dist-info/METADATA +395 -0
- valuesets-0.3.1.dist-info/RECORD +248 -0
- valuesets-0.3.1.dist-info/WHEEL +4 -0
- valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Automatically inject slot definitions for enums into LinkML schemas.
|
|
4
|
+
|
|
5
|
+
This script can:
|
|
6
|
+
1. Add a slots section to schemas that define enums
|
|
7
|
+
2. Generate appropriate slot definitions with correct ranges
|
|
8
|
+
3. Optionally create mixin classes that bundle related slots
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import yaml
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Dict, Any, List, Optional, Set
|
|
14
|
+
import re
|
|
15
|
+
import click
|
|
16
|
+
from collections import OrderedDict
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SlotInjector:
|
|
20
|
+
"""Utility to inject slots for enums into LinkML schemas."""
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def camel_to_snake(name: str) -> str:
|
|
24
|
+
"""Convert CamelCase to snake_case."""
|
|
25
|
+
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
|
26
|
+
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
|
27
|
+
|
|
28
|
+
@staticmethod
|
|
29
|
+
def snake_to_words(name: str) -> str:
|
|
30
|
+
"""Convert snake_case to human readable words."""
|
|
31
|
+
return name.replace('_', ' ')
|
|
32
|
+
|
|
33
|
+
def generate_slot_name(self, enum_name: str) -> str:
|
|
34
|
+
"""Generate slot name from enum name."""
|
|
35
|
+
# Remove common suffixes
|
|
36
|
+
slot_name = enum_name
|
|
37
|
+
for suffix in ['Enum', 'Type', 'Class', 'Code']:
|
|
38
|
+
if slot_name.endswith(suffix):
|
|
39
|
+
slot_name = slot_name[:-len(suffix)]
|
|
40
|
+
break
|
|
41
|
+
|
|
42
|
+
return self.camel_to_snake(slot_name)
|
|
43
|
+
|
|
44
|
+
def generate_slot_definition(self, enum_name: str,
|
|
45
|
+
enum_def: Dict[str, Any]) -> Dict[str, Any]:
|
|
46
|
+
"""Generate a complete slot definition for an enum."""
|
|
47
|
+
slot_name = self.generate_slot_name(enum_name)
|
|
48
|
+
|
|
49
|
+
# Extract description from enum
|
|
50
|
+
enum_desc = enum_def.get('description', '')
|
|
51
|
+
if enum_desc:
|
|
52
|
+
# Clean up multi-line descriptions
|
|
53
|
+
enum_desc = ' '.join(enum_desc.split())
|
|
54
|
+
# Get first sentence
|
|
55
|
+
first_sentence = enum_desc.split('.')[0]
|
|
56
|
+
slot_desc = first_sentence[:1].upper() + first_sentence[1:] if first_sentence else ''
|
|
57
|
+
else:
|
|
58
|
+
# Generate generic description
|
|
59
|
+
readable_name = self.snake_to_words(slot_name)
|
|
60
|
+
slot_desc = f"The {readable_name} classification"
|
|
61
|
+
|
|
62
|
+
slot_def = {
|
|
63
|
+
'description': slot_desc,
|
|
64
|
+
'range': enum_name
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Check if enum has many values (might be multivalued)
|
|
68
|
+
if 'permissible_values' in enum_def:
|
|
69
|
+
num_values = len(enum_def['permissible_values'])
|
|
70
|
+
# If it's a target/feature/metric type enum, might be multivalued
|
|
71
|
+
if any(keyword in slot_name for keyword in ['target', 'feature', 'metric', 'constraint']):
|
|
72
|
+
slot_def['multivalued'] = True
|
|
73
|
+
slot_def['comments'] = [f"Multiple {self.snake_to_words(slot_name)}s may apply"]
|
|
74
|
+
|
|
75
|
+
return slot_def
|
|
76
|
+
|
|
77
|
+
def inject_slots_into_schema(self, schema_path: Path,
|
|
78
|
+
output_path: Optional[Path] = None,
|
|
79
|
+
preserve_existing: bool = True) -> Dict[str, Any]:
|
|
80
|
+
"""
|
|
81
|
+
Inject slots into a schema file.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
schema_path: Path to the input schema
|
|
85
|
+
output_path: Path to write modified schema (if None, prints to stdout)
|
|
86
|
+
preserve_existing: If True, don't override existing slots
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Dictionary of generated slots
|
|
90
|
+
"""
|
|
91
|
+
# Load schema
|
|
92
|
+
with open(schema_path, 'r') as f:
|
|
93
|
+
schema_data = yaml.safe_load(f)
|
|
94
|
+
|
|
95
|
+
# Skip if no enums
|
|
96
|
+
if 'enums' not in schema_data:
|
|
97
|
+
return {}
|
|
98
|
+
|
|
99
|
+
# Initialize or get existing slots
|
|
100
|
+
if 'slots' not in schema_data:
|
|
101
|
+
schema_data['slots'] = {}
|
|
102
|
+
|
|
103
|
+
generated_slots = {}
|
|
104
|
+
|
|
105
|
+
# Generate slots for each enum
|
|
106
|
+
for enum_name, enum_def in schema_data['enums'].items():
|
|
107
|
+
slot_name = self.generate_slot_name(enum_name)
|
|
108
|
+
|
|
109
|
+
# Skip if exists and preserving
|
|
110
|
+
if preserve_existing and slot_name in schema_data['slots']:
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
slot_def = self.generate_slot_definition(enum_name, enum_def)
|
|
114
|
+
generated_slots[slot_name] = slot_def
|
|
115
|
+
schema_data['slots'][slot_name] = slot_def
|
|
116
|
+
|
|
117
|
+
# Write output
|
|
118
|
+
if output_path:
|
|
119
|
+
self.write_schema(schema_data, output_path)
|
|
120
|
+
else:
|
|
121
|
+
# Just return for preview
|
|
122
|
+
return generated_slots
|
|
123
|
+
|
|
124
|
+
return generated_slots
|
|
125
|
+
|
|
126
|
+
def write_schema(self, schema_data: Dict[str, Any], output_path: Path):
|
|
127
|
+
"""Write schema preserving key order."""
|
|
128
|
+
# Define preferred key order
|
|
129
|
+
key_order = [
|
|
130
|
+
'name', 'title', 'description', 'id', 'version', 'status',
|
|
131
|
+
'imports', 'prefixes', 'default_prefix', 'default_curi_maps',
|
|
132
|
+
'slots', 'classes', 'enums'
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
# Create ordered dict
|
|
136
|
+
ordered_data = OrderedDict()
|
|
137
|
+
|
|
138
|
+
# Add keys in preferred order
|
|
139
|
+
for key in key_order:
|
|
140
|
+
if key in schema_data:
|
|
141
|
+
ordered_data[key] = schema_data[key]
|
|
142
|
+
|
|
143
|
+
# Add any remaining keys
|
|
144
|
+
for key in schema_data:
|
|
145
|
+
if key not in ordered_data:
|
|
146
|
+
ordered_data[key] = schema_data[key]
|
|
147
|
+
|
|
148
|
+
# Write with nice formatting
|
|
149
|
+
with open(output_path, 'w') as f:
|
|
150
|
+
yaml.dump(dict(ordered_data), f,
|
|
151
|
+
default_flow_style=False,
|
|
152
|
+
sort_keys=False,
|
|
153
|
+
allow_unicode=True,
|
|
154
|
+
width=120)
|
|
155
|
+
|
|
156
|
+
def generate_typed_slots_schema(self, schema_dir: Path,
|
|
157
|
+
output_path: Path) -> None:
|
|
158
|
+
"""
|
|
159
|
+
Generate a comprehensive slots schema from all enums in a directory.
|
|
160
|
+
|
|
161
|
+
This creates a single schema file with all slot definitions that
|
|
162
|
+
reference the appropriate enums.
|
|
163
|
+
"""
|
|
164
|
+
all_slots = {}
|
|
165
|
+
enum_to_module = {} # Track which module each enum comes from
|
|
166
|
+
|
|
167
|
+
# Scan all schema files
|
|
168
|
+
yaml_files = list(schema_dir.rglob("*.yaml"))
|
|
169
|
+
|
|
170
|
+
for yaml_file in yaml_files:
|
|
171
|
+
# Skip meta files
|
|
172
|
+
if yaml_file.name in ['linkml-meta.yaml', 'types.yaml', 'slot_mixins.yaml']:
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
with open(yaml_file, 'r') as f:
|
|
177
|
+
schema_data = yaml.safe_load(f)
|
|
178
|
+
|
|
179
|
+
if 'enums' not in schema_data:
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
# Get module name from schema
|
|
183
|
+
module_name = schema_data.get('name', yaml_file.stem)
|
|
184
|
+
|
|
185
|
+
# Process each enum
|
|
186
|
+
for enum_name, enum_def in schema_data['enums'].items():
|
|
187
|
+
slot_name = self.generate_slot_name(enum_name)
|
|
188
|
+
slot_def = self.generate_slot_definition(enum_name, enum_def)
|
|
189
|
+
|
|
190
|
+
# Add module reference
|
|
191
|
+
slot_def['comments'] = slot_def.get('comments', [])
|
|
192
|
+
slot_def['comments'].append(f"Defined in module: {module_name}")
|
|
193
|
+
|
|
194
|
+
all_slots[slot_name] = slot_def
|
|
195
|
+
enum_to_module[enum_name] = module_name
|
|
196
|
+
|
|
197
|
+
except Exception as e:
|
|
198
|
+
print(f"Error processing {yaml_file}: {e}")
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
# Create comprehensive slots schema
|
|
202
|
+
slots_schema = {
|
|
203
|
+
'name': 'generated_slots',
|
|
204
|
+
'title': 'Auto-generated Slots for Value Sets',
|
|
205
|
+
'description': 'Automatically generated slot definitions for all enums in the value sets collection.',
|
|
206
|
+
'id': 'https://w3id.org/linkml-common/generated-slots',
|
|
207
|
+
'version': '1.0.0',
|
|
208
|
+
'status': 'release',
|
|
209
|
+
'imports': ['linkml:types'],
|
|
210
|
+
'prefixes': {
|
|
211
|
+
'linkml': 'https://w3id.org/linkml/',
|
|
212
|
+
'cval': 'https://w3id.org/linkml-common/'
|
|
213
|
+
},
|
|
214
|
+
'default_prefix': 'cval',
|
|
215
|
+
'default_curi_maps': ['semweb_context'],
|
|
216
|
+
'slots': all_slots
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
# Write the slots schema
|
|
220
|
+
self.write_schema(slots_schema, output_path)
|
|
221
|
+
print(f"Generated {len(all_slots)} slot definitions in {output_path}")
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@click.command()
|
|
225
|
+
@click.argument('schema_path', type=click.Path(exists=True, path_type=Path))
|
|
226
|
+
@click.option('--output', '-o', type=click.Path(path_type=Path),
|
|
227
|
+
help='Output path for modified schema or generated slots file')
|
|
228
|
+
@click.option('--mode', '-m',
|
|
229
|
+
type=click.Choice(['inject', 'generate', 'preview']),
|
|
230
|
+
default='preview',
|
|
231
|
+
help='Mode: inject (modify files), generate (create slots file), preview (dry run)')
|
|
232
|
+
@click.option('--preserve/--overwrite', default=True,
|
|
233
|
+
help='Preserve existing slots when injecting')
|
|
234
|
+
def main(schema_path: Path, output: Optional[Path], mode: str, preserve: bool):
|
|
235
|
+
"""
|
|
236
|
+
Generate or inject LinkML slots for enums.
|
|
237
|
+
|
|
238
|
+
SCHEMA_PATH: Path to schema file or directory
|
|
239
|
+
"""
|
|
240
|
+
injector = SlotInjector()
|
|
241
|
+
|
|
242
|
+
if mode == 'inject':
|
|
243
|
+
if schema_path.is_file():
|
|
244
|
+
# Single file injection
|
|
245
|
+
if not output:
|
|
246
|
+
output = schema_path # In-place modification
|
|
247
|
+
slots = injector.inject_slots_into_schema(
|
|
248
|
+
schema_path, output, preserve_existing=preserve
|
|
249
|
+
)
|
|
250
|
+
print(f"Injected {len(slots)} slots into {output}")
|
|
251
|
+
else:
|
|
252
|
+
print("Inject mode requires a single file. Use generate mode for directories.")
|
|
253
|
+
|
|
254
|
+
elif mode == 'generate':
|
|
255
|
+
if schema_path.is_dir():
|
|
256
|
+
# Generate comprehensive slots file
|
|
257
|
+
if not output:
|
|
258
|
+
output = schema_path / 'generated_slots.yaml'
|
|
259
|
+
injector.generate_typed_slots_schema(schema_path, output)
|
|
260
|
+
else:
|
|
261
|
+
print("Generate mode requires a directory.")
|
|
262
|
+
|
|
263
|
+
elif mode == 'preview':
|
|
264
|
+
# Preview mode - just show what would be generated
|
|
265
|
+
if schema_path.is_file():
|
|
266
|
+
slots = injector.inject_slots_into_schema(
|
|
267
|
+
schema_path, None, preserve_existing=preserve
|
|
268
|
+
)
|
|
269
|
+
print(f"Would generate {len(slots)} slots:")
|
|
270
|
+
for slot_name, slot_def in slots.items():
|
|
271
|
+
print(f" - {slot_name}: {slot_def.get('description', 'No description')}")
|
|
272
|
+
print(f" Range: {slot_def.get('range')}")
|
|
273
|
+
if slot_def.get('multivalued'):
|
|
274
|
+
print(f" Multivalued: true")
|
|
275
|
+
else:
|
|
276
|
+
print("Preview mode requires a single file.")
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
if __name__ == '__main__':
|
|
280
|
+
main()
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Enhanced Pydantic Generator that includes metadata (meanings, annotations) for enum values.
|
|
3
|
+
|
|
4
|
+
This custom generator extends the LinkML PydanticGenerator to pass additional
|
|
5
|
+
metadata fields to the templates, enabling rich enum generation with ontology
|
|
6
|
+
mappings and annotations.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Dict, Any, Optional
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from linkml.generators.pydanticgen import PydanticGenerator
|
|
12
|
+
from linkml.generators.pydanticgen.template import PydanticEnum, EnumValue
|
|
13
|
+
from linkml_runtime.linkml_model.meta import EnumDefinition, PermissibleValue
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class EnhancedEnumValue(EnumValue):
|
|
18
|
+
"""Extended EnumValue that includes meaning and annotations fields."""
|
|
19
|
+
meaning: Optional[str] = None
|
|
20
|
+
annotations: Optional[Dict[str, Any]] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class EnhancedPydanticGenerator(PydanticGenerator):
|
|
24
|
+
"""
|
|
25
|
+
Enhanced Pydantic generator that preserves enum metadata.
|
|
26
|
+
|
|
27
|
+
This generator extends the base PydanticGenerator to include
|
|
28
|
+
meaning (ontology mappings) and annotations in the enum template context.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def generate_enums(self) -> None:
|
|
32
|
+
"""Generate enums with enhanced metadata."""
|
|
33
|
+
enums = self.schemaview.all_enums()
|
|
34
|
+
|
|
35
|
+
for enum_name, enum_def in enums.items():
|
|
36
|
+
if enum_def.permissible_values:
|
|
37
|
+
# Create enhanced enum values with metadata
|
|
38
|
+
enum_values = {}
|
|
39
|
+
for pv_name, pv in enum_def.permissible_values.items():
|
|
40
|
+
# Convert permissible value to enhanced enum value
|
|
41
|
+
label = self._get_enum_label(pv_name, pv)
|
|
42
|
+
value = pv.text if pv.text is not None else pv_name
|
|
43
|
+
|
|
44
|
+
# Create enhanced enum value with all metadata
|
|
45
|
+
enhanced_value = EnhancedEnumValue(
|
|
46
|
+
label=label,
|
|
47
|
+
value=value,
|
|
48
|
+
description=pv.description,
|
|
49
|
+
meaning=pv.meaning,
|
|
50
|
+
annotations=dict(pv.annotations) if pv.annotations else None
|
|
51
|
+
)
|
|
52
|
+
enum_values[pv_name] = enhanced_value
|
|
53
|
+
|
|
54
|
+
# Create the enum with enhanced values
|
|
55
|
+
enum_model = PydanticEnum(
|
|
56
|
+
name=self._get_class_name(enum_name),
|
|
57
|
+
description=enum_def.description,
|
|
58
|
+
values=enum_values
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Add to the module's enums
|
|
62
|
+
self.enums[enum_name] = enum_model
|
|
63
|
+
|
|
64
|
+
def _get_enum_label(self, pv_name: str, pv: PermissibleValue) -> str:
|
|
65
|
+
"""Get the label for an enum value."""
|
|
66
|
+
# Use the same logic as the base generator
|
|
67
|
+
if hasattr(self, 'camelcase_enums') and self.camelcase_enums:
|
|
68
|
+
from linkml.utils.formatutils import camelcase
|
|
69
|
+
return camelcase(pv_name)
|
|
70
|
+
else:
|
|
71
|
+
# Convert to valid Python identifier
|
|
72
|
+
import re
|
|
73
|
+
label = re.sub(r'[^a-zA-Z0-9_]', '_', pv_name)
|
|
74
|
+
if label[0].isdigit():
|
|
75
|
+
label = f'_{label}'
|
|
76
|
+
return label
|
|
77
|
+
|
|
78
|
+
def _get_class_name(self, name: str) -> str:
|
|
79
|
+
"""Get the class name for an enum."""
|
|
80
|
+
# Use the base generator's logic if available
|
|
81
|
+
if hasattr(super(), '_get_class_name'):
|
|
82
|
+
return super()._get_class_name(name)
|
|
83
|
+
else:
|
|
84
|
+
# Default: just return the name as-is
|
|
85
|
+
return name
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def generate_enhanced_pydantic(schema_path: str, **kwargs) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Generate enhanced Pydantic models with metadata-rich enums.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
schema_path: Path to the LinkML schema file
|
|
94
|
+
**kwargs: Additional arguments to pass to the generator
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Generated Python code as a string
|
|
98
|
+
"""
|
|
99
|
+
generator = EnhancedPydanticGenerator(schema_path, **kwargs)
|
|
100
|
+
return generator.serialize()
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Utility to generate LinkML slots for each enum in a schema.
|
|
4
|
+
|
|
5
|
+
For each enum, creates a corresponding slot with:
|
|
6
|
+
- Slot name: enum name (removing 'Enum' suffix if present)
|
|
7
|
+
- Range: the enum itself
|
|
8
|
+
- Description: auto-generated from enum description
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import yaml
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Dict, Any, List, Optional
|
|
14
|
+
import re
|
|
15
|
+
import click
|
|
16
|
+
from linkml_runtime.utils.schemaview import SchemaView
|
|
17
|
+
from linkml_runtime.linkml_model import SchemaDefinition
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def camel_to_snake(name: str) -> str:
|
|
21
|
+
"""Convert CamelCase to snake_case."""
|
|
22
|
+
# Insert underscore before uppercase letters that follow lowercase letters
|
|
23
|
+
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
|
24
|
+
# Insert underscore before uppercase letters that follow lowercase or uppercase letters
|
|
25
|
+
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def generate_slot_name(enum_name: str) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Generate a slot name from an enum name.
|
|
31
|
+
|
|
32
|
+
Rules:
|
|
33
|
+
- Remove 'Enum' suffix if present
|
|
34
|
+
- Remove 'Type' suffix if present
|
|
35
|
+
- Convert to snake_case
|
|
36
|
+
"""
|
|
37
|
+
# Remove common suffixes
|
|
38
|
+
slot_name = enum_name
|
|
39
|
+
if slot_name.endswith('Enum'):
|
|
40
|
+
slot_name = slot_name[:-4]
|
|
41
|
+
elif slot_name.endswith('Type'):
|
|
42
|
+
slot_name = slot_name[:-4]
|
|
43
|
+
|
|
44
|
+
# Convert to snake_case
|
|
45
|
+
return camel_to_snake(slot_name)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def generate_slot_description(enum_name: str, enum_desc: Optional[str]) -> str:
|
|
49
|
+
"""Generate a description for the slot based on the enum."""
|
|
50
|
+
if enum_desc:
|
|
51
|
+
# Use first sentence of enum description
|
|
52
|
+
first_sentence = enum_desc.split('.')[0]
|
|
53
|
+
return f"The {generate_slot_name(enum_name).replace('_', ' ')} classification"
|
|
54
|
+
else:
|
|
55
|
+
return f"The {generate_slot_name(enum_name).replace('_', ' ')} for this entity"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def generate_slots_for_schema(schema_path: Path, in_place: bool = False,
|
|
59
|
+
output_path: Optional[Path] = None) -> Dict[str, Any]:
|
|
60
|
+
"""
|
|
61
|
+
Generate slots for all enums in a schema.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
schema_path: Path to the LinkML schema YAML file
|
|
65
|
+
in_place: If True, modify the schema file in place
|
|
66
|
+
output_path: If provided, write to this path instead
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Dictionary of generated slots
|
|
70
|
+
"""
|
|
71
|
+
# Load schema
|
|
72
|
+
with open(schema_path, 'r') as f:
|
|
73
|
+
schema_data = yaml.safe_load(f)
|
|
74
|
+
|
|
75
|
+
# Check if schema has enums
|
|
76
|
+
if 'enums' not in schema_data or not schema_data['enums']:
|
|
77
|
+
print(f"No enums found in {schema_path}")
|
|
78
|
+
return {}
|
|
79
|
+
|
|
80
|
+
# Initialize slots section if not present
|
|
81
|
+
if 'slots' not in schema_data:
|
|
82
|
+
schema_data['slots'] = {}
|
|
83
|
+
|
|
84
|
+
generated_slots = {}
|
|
85
|
+
|
|
86
|
+
# Generate slot for each enum
|
|
87
|
+
for enum_name, enum_def in schema_data['enums'].items():
|
|
88
|
+
slot_name = generate_slot_name(enum_name)
|
|
89
|
+
|
|
90
|
+
# Skip if slot already exists
|
|
91
|
+
if slot_name in schema_data['slots']:
|
|
92
|
+
print(f" Slot '{slot_name}' already exists, skipping")
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
# Create slot definition
|
|
96
|
+
slot_def = {
|
|
97
|
+
'description': generate_slot_description(enum_name, enum_def.get('description')),
|
|
98
|
+
'range': enum_name
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# Add optional fields if useful
|
|
102
|
+
if enum_def.get('description'):
|
|
103
|
+
# Add a more detailed description if available
|
|
104
|
+
slot_def['comments'] = [f"Value set: {enum_name}"]
|
|
105
|
+
|
|
106
|
+
generated_slots[slot_name] = slot_def
|
|
107
|
+
schema_data['slots'][slot_name] = slot_def
|
|
108
|
+
print(f" Generated slot '{slot_name}' for enum '{enum_name}'")
|
|
109
|
+
|
|
110
|
+
# Write output if requested
|
|
111
|
+
if in_place or output_path:
|
|
112
|
+
output_file = schema_path if in_place else output_path
|
|
113
|
+
|
|
114
|
+
# Preserve order and formatting as much as possible
|
|
115
|
+
with open(output_file, 'w') as f:
|
|
116
|
+
yaml.dump(schema_data, f,
|
|
117
|
+
default_flow_style=False,
|
|
118
|
+
sort_keys=False,
|
|
119
|
+
allow_unicode=True,
|
|
120
|
+
width=120)
|
|
121
|
+
print(f"Updated schema written to {output_file}")
|
|
122
|
+
|
|
123
|
+
return generated_slots
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def process_directory(schema_dir: Path, in_place: bool = False,
|
|
127
|
+
output_dir: Optional[Path] = None) -> None:
|
|
128
|
+
"""
|
|
129
|
+
Process all schema files in a directory.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
schema_dir: Directory containing LinkML schema files
|
|
133
|
+
in_place: If True, modify files in place
|
|
134
|
+
output_dir: If provided, write modified schemas to this directory
|
|
135
|
+
"""
|
|
136
|
+
# Find all YAML files
|
|
137
|
+
yaml_files = list(schema_dir.rglob("*.yaml")) + list(schema_dir.rglob("*.yml"))
|
|
138
|
+
|
|
139
|
+
print(f"Found {len(yaml_files)} YAML files in {schema_dir}")
|
|
140
|
+
|
|
141
|
+
total_slots = 0
|
|
142
|
+
processed_files = 0
|
|
143
|
+
|
|
144
|
+
for yaml_file in yaml_files:
|
|
145
|
+
# Skip certain files
|
|
146
|
+
if yaml_file.name in ['linkml-meta.yaml', 'meta.yaml', 'types.yaml']:
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
print(f"\nProcessing {yaml_file.relative_to(schema_dir)}...")
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
# Determine output path
|
|
153
|
+
output_path = None
|
|
154
|
+
if output_dir and not in_place:
|
|
155
|
+
# Maintain directory structure in output
|
|
156
|
+
rel_path = yaml_file.relative_to(schema_dir)
|
|
157
|
+
output_path = output_dir / rel_path
|
|
158
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
159
|
+
|
|
160
|
+
# Generate slots
|
|
161
|
+
slots = generate_slots_for_schema(yaml_file, in_place=in_place,
|
|
162
|
+
output_path=output_path)
|
|
163
|
+
|
|
164
|
+
if slots:
|
|
165
|
+
total_slots += len(slots)
|
|
166
|
+
processed_files += 1
|
|
167
|
+
|
|
168
|
+
except Exception as e:
|
|
169
|
+
print(f" Error processing {yaml_file}: {e}")
|
|
170
|
+
|
|
171
|
+
print(f"\n{'='*50}")
|
|
172
|
+
print(f"Summary: Generated {total_slots} slots across {processed_files} files")
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@click.command()
|
|
176
|
+
@click.argument('schema_path', type=click.Path(exists=True, path_type=Path))
|
|
177
|
+
@click.option('--in-place', '-i', is_flag=True,
|
|
178
|
+
help='Modify schema files in place')
|
|
179
|
+
@click.option('--output', '-o', type=click.Path(path_type=Path),
|
|
180
|
+
help='Output directory for modified schemas')
|
|
181
|
+
@click.option('--single-file', '-s', is_flag=True,
|
|
182
|
+
help='Process single file instead of directory')
|
|
183
|
+
def main(schema_path: Path, in_place: bool, output: Optional[Path], single_file: bool):
|
|
184
|
+
"""
|
|
185
|
+
Generate LinkML slots for enums in schema files.
|
|
186
|
+
|
|
187
|
+
SCHEMA_PATH: Path to schema file or directory
|
|
188
|
+
"""
|
|
189
|
+
if single_file or schema_path.is_file():
|
|
190
|
+
# Process single file
|
|
191
|
+
print(f"Processing single file: {schema_path}")
|
|
192
|
+
slots = generate_slots_for_schema(schema_path, in_place=in_place,
|
|
193
|
+
output_path=output)
|
|
194
|
+
print(f"Generated {len(slots)} slots")
|
|
195
|
+
else:
|
|
196
|
+
# Process directory
|
|
197
|
+
process_directory(schema_path, in_place=in_place, output_dir=output)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
if __name__ == '__main__':
|
|
201
|
+
main()
|