valuesets 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valuesets might be problematic. Click here for more details.
- valuesets/__init__.py +7 -0
- valuesets/_version.py +8 -0
- valuesets/datamodel/valuesets.py +13796 -0
- valuesets/datamodel/valuesets_dataclass.py +24503 -0
- valuesets/datamodel/valuesets_pydantic.py +13796 -0
- valuesets/enums/__init__.py +590 -0
- valuesets/enums/academic/__init__.py +1 -0
- valuesets/enums/academic/research.py +559 -0
- valuesets/enums/analytical_chemistry/__init__.py +1 -0
- valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
- valuesets/enums/bio/__init__.py +1 -0
- valuesets/enums/bio/biological_colors.py +238 -0
- valuesets/enums/bio/cell_cycle.py +180 -0
- valuesets/enums/bio/currency_chemicals.py +52 -0
- valuesets/enums/bio/developmental_stages.py +103 -0
- valuesets/enums/bio/genome_features.py +182 -0
- valuesets/enums/bio/genomics.py +91 -0
- valuesets/enums/bio/go_aspect.py +32 -0
- valuesets/enums/bio/go_causality.py +58 -0
- valuesets/enums/bio/go_evidence.py +129 -0
- valuesets/enums/bio/human_developmental_stages.py +62 -0
- valuesets/enums/bio/insdc_geographic_locations.py +591 -0
- valuesets/enums/bio/insdc_missing_values.py +49 -0
- valuesets/enums/bio/lipid_categories.py +67 -0
- valuesets/enums/bio/mouse_developmental_stages.py +62 -0
- valuesets/enums/bio/plant_biology.py +86 -0
- valuesets/enums/bio/plant_developmental_stages.py +54 -0
- valuesets/enums/bio/plant_sex.py +81 -0
- valuesets/enums/bio/protein_evidence.py +61 -0
- valuesets/enums/bio/proteomics_standards.py +123 -0
- valuesets/enums/bio/psi_mi.py +306 -0
- valuesets/enums/bio/relationship_to_oxygen.py +37 -0
- valuesets/enums/bio/sequence_alphabets.py +449 -0
- valuesets/enums/bio/sequence_chemistry.py +357 -0
- valuesets/enums/bio/sequencing_platforms.py +302 -0
- valuesets/enums/bio/structural_biology.py +320 -0
- valuesets/enums/bio/taxonomy.py +238 -0
- valuesets/enums/bio/trophic_levels.py +85 -0
- valuesets/enums/bio/uniprot_species.py +344 -0
- valuesets/enums/bio/viral_genome_types.py +47 -0
- valuesets/enums/bioprocessing/__init__.py +1 -0
- valuesets/enums/bioprocessing/scale_up.py +249 -0
- valuesets/enums/business/__init__.py +1 -0
- valuesets/enums/business/human_resources.py +275 -0
- valuesets/enums/business/industry_classifications.py +181 -0
- valuesets/enums/business/management_operations.py +228 -0
- valuesets/enums/business/organizational_structures.py +236 -0
- valuesets/enums/business/quality_management.py +181 -0
- valuesets/enums/business/supply_chain.py +232 -0
- valuesets/enums/chemistry/__init__.py +1 -0
- valuesets/enums/chemistry/chemical_entities.py +315 -0
- valuesets/enums/chemistry/reaction_directionality.py +65 -0
- valuesets/enums/chemistry/reactions.py +256 -0
- valuesets/enums/clinical/__init__.py +1 -0
- valuesets/enums/clinical/nih_demographics.py +177 -0
- valuesets/enums/clinical/phenopackets.py +254 -0
- valuesets/enums/common_value_sets.py +8791 -0
- valuesets/enums/computing/__init__.py +1 -0
- valuesets/enums/computing/file_formats.py +294 -0
- valuesets/enums/computing/maturity_levels.py +196 -0
- valuesets/enums/computing/mime_types.py +227 -0
- valuesets/enums/confidence_levels.py +168 -0
- valuesets/enums/contributor.py +30 -0
- valuesets/enums/core.py +42 -0
- valuesets/enums/data/__init__.py +1 -0
- valuesets/enums/data/data_absent_reason.py +53 -0
- valuesets/enums/data_science/__init__.py +1 -0
- valuesets/enums/data_science/binary_classification.py +87 -0
- valuesets/enums/data_science/emotion_classification.py +66 -0
- valuesets/enums/data_science/priority_severity.py +73 -0
- valuesets/enums/data_science/quality_control.py +46 -0
- valuesets/enums/data_science/sentiment_analysis.py +50 -0
- valuesets/enums/data_science/text_classification.py +97 -0
- valuesets/enums/demographics.py +206 -0
- valuesets/enums/ecological_interactions.py +151 -0
- valuesets/enums/energy/__init__.py +1 -0
- valuesets/enums/energy/energy.py +343 -0
- valuesets/enums/energy/fossil_fuels.py +29 -0
- valuesets/enums/energy/nuclear/__init__.py +1 -0
- valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
- valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
- valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
- valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
- valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
- valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
- valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
- valuesets/enums/energy/nuclear/reactor_types.py +163 -0
- valuesets/enums/environmental_health/__init__.py +1 -0
- valuesets/enums/environmental_health/exposures.py +265 -0
- valuesets/enums/geography/__init__.py +1 -0
- valuesets/enums/geography/geographic_codes.py +741 -0
- valuesets/enums/health/__init__.py +12 -0
- valuesets/enums/health/vaccination.py +98 -0
- valuesets/enums/health.py +36 -0
- valuesets/enums/health_base.py +36 -0
- valuesets/enums/healthcare.py +45 -0
- valuesets/enums/industry/__init__.py +1 -0
- valuesets/enums/industry/extractive_industry.py +94 -0
- valuesets/enums/industry/mining.py +388 -0
- valuesets/enums/industry/safety_colors.py +201 -0
- valuesets/enums/investigation.py +27 -0
- valuesets/enums/materials_science/__init__.py +1 -0
- valuesets/enums/materials_science/characterization_methods.py +112 -0
- valuesets/enums/materials_science/crystal_structures.py +76 -0
- valuesets/enums/materials_science/material_properties.py +119 -0
- valuesets/enums/materials_science/material_types.py +104 -0
- valuesets/enums/materials_science/pigments_dyes.py +198 -0
- valuesets/enums/materials_science/synthesis_methods.py +109 -0
- valuesets/enums/medical/__init__.py +1 -0
- valuesets/enums/medical/clinical.py +277 -0
- valuesets/enums/medical/neuroimaging.py +119 -0
- valuesets/enums/mining_processing.py +302 -0
- valuesets/enums/physics/__init__.py +1 -0
- valuesets/enums/physics/states_of_matter.py +46 -0
- valuesets/enums/social/__init__.py +1 -0
- valuesets/enums/social/person_status.py +29 -0
- valuesets/enums/spatial/__init__.py +1 -0
- valuesets/enums/spatial/spatial_qualifiers.py +246 -0
- valuesets/enums/statistics/__init__.py +5 -0
- valuesets/enums/statistics/prediction_outcomes.py +31 -0
- valuesets/enums/statistics.py +31 -0
- valuesets/enums/time/__init__.py +1 -0
- valuesets/enums/time/temporal.py +254 -0
- valuesets/enums/units/__init__.py +1 -0
- valuesets/enums/units/measurements.py +310 -0
- valuesets/enums/visual/__init__.py +1 -0
- valuesets/enums/visual/colors.py +376 -0
- valuesets/generators/__init__.py +19 -0
- valuesets/generators/auto_slot_injector.py +280 -0
- valuesets/generators/enhanced_pydantic_generator.py +100 -0
- valuesets/generators/enum_slot_generator.py +201 -0
- valuesets/generators/modular_rich_generator.py +353 -0
- valuesets/generators/prefix_standardizer.py +198 -0
- valuesets/generators/rich_enum.py +127 -0
- valuesets/generators/rich_pydantic_generator.py +310 -0
- valuesets/generators/smart_slot_syncer.py +428 -0
- valuesets/generators/sssom_generator.py +394 -0
- valuesets/merged/merged_hierarchy.yaml +21649 -0
- valuesets/schema/README.md +3 -0
- valuesets/schema/academic/research.yaml +911 -0
- valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
- valuesets/schema/bio/bio_entities.yaml +364 -0
- valuesets/schema/bio/biological_colors.yaml +434 -0
- valuesets/schema/bio/cell_cycle.yaml +309 -0
- valuesets/schema/bio/currency_chemicals.yaml +70 -0
- valuesets/schema/bio/developmental_stages.yaml +226 -0
- valuesets/schema/bio/genome_features.yaml +342 -0
- valuesets/schema/bio/genomics.yaml +101 -0
- valuesets/schema/bio/go_aspect.yaml +39 -0
- valuesets/schema/bio/go_causality.yaml +119 -0
- valuesets/schema/bio/go_evidence.yaml +215 -0
- valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
- valuesets/schema/bio/insdc_missing_values.yaml +85 -0
- valuesets/schema/bio/lipid_categories.yaml +72 -0
- valuesets/schema/bio/plant_biology.yaml +125 -0
- valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
- valuesets/schema/bio/plant_sex.yaml +108 -0
- valuesets/schema/bio/protein_evidence.yaml +63 -0
- valuesets/schema/bio/proteomics_standards.yaml +116 -0
- valuesets/schema/bio/psi_mi.yaml +400 -0
- valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
- valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
- valuesets/schema/bio/sequence_chemistry.yaml +477 -0
- valuesets/schema/bio/sequencing_platforms.yaml +515 -0
- valuesets/schema/bio/structural_biology.yaml +428 -0
- valuesets/schema/bio/taxonomy.yaml +453 -0
- valuesets/schema/bio/trophic_levels.yaml +118 -0
- valuesets/schema/bio/uniprot_species.yaml +1209 -0
- valuesets/schema/bio/viral_genome_types.yaml +99 -0
- valuesets/schema/bioprocessing/scale_up.yaml +458 -0
- valuesets/schema/business/human_resources.yaml +752 -0
- valuesets/schema/business/industry_classifications.yaml +448 -0
- valuesets/schema/business/management_operations.yaml +602 -0
- valuesets/schema/business/organizational_structures.yaml +645 -0
- valuesets/schema/business/quality_management.yaml +502 -0
- valuesets/schema/business/supply_chain.yaml +688 -0
- valuesets/schema/chemistry/chemical_entities.yaml +639 -0
- valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
- valuesets/schema/chemistry/reactions.yaml +442 -0
- valuesets/schema/clinical/nih_demographics.yaml +285 -0
- valuesets/schema/clinical/phenopackets.yaml +429 -0
- valuesets/schema/computing/file_formats.yaml +631 -0
- valuesets/schema/computing/maturity_levels.yaml +229 -0
- valuesets/schema/computing/mime_types.yaml +266 -0
- valuesets/schema/confidence_levels.yaml +206 -0
- valuesets/schema/contributor.yaml +30 -0
- valuesets/schema/core.yaml +55 -0
- valuesets/schema/data/data_absent_reason.yaml +82 -0
- valuesets/schema/data_science/binary_classification.yaml +125 -0
- valuesets/schema/data_science/emotion_classification.yaml +109 -0
- valuesets/schema/data_science/priority_severity.yaml +122 -0
- valuesets/schema/data_science/quality_control.yaml +68 -0
- valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
- valuesets/schema/data_science/text_classification.yaml +135 -0
- valuesets/schema/demographics.yaml +238 -0
- valuesets/schema/ecological_interactions.yaml +298 -0
- valuesets/schema/energy/energy.yaml +595 -0
- valuesets/schema/energy/fossil_fuels.yaml +28 -0
- valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
- valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
- valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
- valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
- valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
- valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
- valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
- valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
- valuesets/schema/environmental_health/exposures.yaml +355 -0
- valuesets/schema/generated_slots.yaml +1828 -0
- valuesets/schema/geography/geographic_codes.yaml +1018 -0
- valuesets/schema/health/vaccination.yaml +102 -0
- valuesets/schema/health.yaml +38 -0
- valuesets/schema/healthcare.yaml +53 -0
- valuesets/schema/industry/extractive_industry.yaml +89 -0
- valuesets/schema/industry/mining.yaml +888 -0
- valuesets/schema/industry/safety_colors.yaml +375 -0
- valuesets/schema/investigation.yaml +64 -0
- valuesets/schema/materials_science/characterization_methods.yaml +193 -0
- valuesets/schema/materials_science/crystal_structures.yaml +138 -0
- valuesets/schema/materials_science/material_properties.yaml +135 -0
- valuesets/schema/materials_science/material_types.yaml +151 -0
- valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
- valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
- valuesets/schema/medical/clinical.yaml +610 -0
- valuesets/schema/medical/neuroimaging.yaml +325 -0
- valuesets/schema/mining_processing.yaml +295 -0
- valuesets/schema/physics/states_of_matter.yaml +46 -0
- valuesets/schema/slot_mixins.yaml +143 -0
- valuesets/schema/social/person_status.yaml +28 -0
- valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
- valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
- valuesets/schema/statistics.yaml +34 -0
- valuesets/schema/time/temporal.yaml +435 -0
- valuesets/schema/types.yaml +15 -0
- valuesets/schema/units/measurements.yaml +675 -0
- valuesets/schema/valuesets.yaml +100 -0
- valuesets/schema/visual/colors.yaml +778 -0
- valuesets/utils/__init__.py +6 -0
- valuesets/utils/comparison.py +102 -0
- valuesets/utils/expand_dynamic_enums.py +414 -0
- valuesets/utils/mapping_utils.py +236 -0
- valuesets/validators/__init__.py +11 -0
- valuesets/validators/enum_evaluator.py +669 -0
- valuesets/validators/oak_config.yaml +70 -0
- valuesets/validators/validate_with_ols.py +241 -0
- valuesets-0.3.1.dist-info/METADATA +395 -0
- valuesets-0.3.1.dist-info/RECORD +248 -0
- valuesets-0.3.1.dist-info/WHEEL +4 -0
- valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Smart slot synchronizer for LinkML schemas with enums.
|
|
4
|
+
|
|
5
|
+
Designed for periodic synchronization with --in-place option to:
|
|
6
|
+
1. Add new slots for newly added enums
|
|
7
|
+
2. Update slot ranges when enum names change
|
|
8
|
+
3. Preserve manual customizations to slots
|
|
9
|
+
4. Remove orphaned slots for deleted enums (optional)
|
|
10
|
+
5. Track changes for review
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import yaml
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Dict, Any, List, Optional, Set, Tuple
|
|
16
|
+
import re
|
|
17
|
+
import click
|
|
18
|
+
from collections import OrderedDict
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
import json
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SmartSlotSyncer:
|
|
24
|
+
"""Intelligent slot synchronization for enum-based schemas."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, verbose: bool = False):
|
|
27
|
+
self.verbose = verbose
|
|
28
|
+
self.changes = [] # Track all changes made
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def camel_to_snake(name: str) -> str:
|
|
32
|
+
"""Convert CamelCase to snake_case."""
|
|
33
|
+
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
|
34
|
+
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
|
35
|
+
|
|
36
|
+
def generate_slot_name(self, enum_name: str) -> str:
|
|
37
|
+
"""Generate slot name from enum name."""
|
|
38
|
+
slot_name = enum_name
|
|
39
|
+
for suffix in ['Enum', 'Type', 'Class', 'Code']:
|
|
40
|
+
if slot_name.endswith(suffix):
|
|
41
|
+
slot_name = slot_name[:-len(suffix)]
|
|
42
|
+
break
|
|
43
|
+
return self.camel_to_snake(slot_name)
|
|
44
|
+
|
|
45
|
+
def generate_slot_definition(self, enum_name: str,
|
|
46
|
+
enum_def: Dict[str, Any]) -> Dict[str, Any]:
|
|
47
|
+
"""Generate a complete slot definition for an enum."""
|
|
48
|
+
slot_name = self.generate_slot_name(enum_name)
|
|
49
|
+
|
|
50
|
+
# Extract description from enum
|
|
51
|
+
enum_desc = enum_def.get('description', '')
|
|
52
|
+
if enum_desc:
|
|
53
|
+
enum_desc = ' '.join(enum_desc.split())
|
|
54
|
+
first_sentence = enum_desc.split('.')[0]
|
|
55
|
+
slot_desc = first_sentence[:1].upper() + first_sentence[1:] if first_sentence else ''
|
|
56
|
+
else:
|
|
57
|
+
readable_name = slot_name.replace('_', ' ')
|
|
58
|
+
slot_desc = f"The {readable_name} classification"
|
|
59
|
+
|
|
60
|
+
slot_def = {
|
|
61
|
+
'description': slot_desc,
|
|
62
|
+
'range': enum_name
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
# Check if should be multivalued
|
|
66
|
+
if any(keyword in slot_name for keyword in ['target', 'feature', 'metric', 'constraint']):
|
|
67
|
+
slot_def['multivalued'] = True
|
|
68
|
+
|
|
69
|
+
return slot_def
|
|
70
|
+
|
|
71
|
+
def detect_changes(self, existing_slot: Dict[str, Any],
|
|
72
|
+
new_slot: Dict[str, Any]) -> List[str]:
|
|
73
|
+
"""Detect what changed between existing and new slot definitions."""
|
|
74
|
+
changes = []
|
|
75
|
+
|
|
76
|
+
# Check range change (enum rename)
|
|
77
|
+
if existing_slot.get('range') != new_slot.get('range'):
|
|
78
|
+
changes.append(f"range: {existing_slot.get('range')} → {new_slot.get('range')}")
|
|
79
|
+
|
|
80
|
+
# Check multivalued change
|
|
81
|
+
if existing_slot.get('multivalued') != new_slot.get('multivalued'):
|
|
82
|
+
changes.append(f"multivalued: {existing_slot.get('multivalued')} → {new_slot.get('multivalued')}")
|
|
83
|
+
|
|
84
|
+
# Check if description was auto-generated and enum description changed
|
|
85
|
+
if (not existing_slot.get('_manual_description') and
|
|
86
|
+
existing_slot.get('description') != new_slot.get('description')):
|
|
87
|
+
changes.append("description updated from enum")
|
|
88
|
+
|
|
89
|
+
return changes
|
|
90
|
+
|
|
91
|
+
def has_manual_customizations(self, slot_def: Dict[str, Any]) -> bool:
|
|
92
|
+
"""
|
|
93
|
+
Detect if a slot has manual customizations beyond auto-generation.
|
|
94
|
+
|
|
95
|
+
Checks for:
|
|
96
|
+
- Additional fields beyond basic ones
|
|
97
|
+
- Comments indicating manual edit
|
|
98
|
+
- Complex constraints
|
|
99
|
+
"""
|
|
100
|
+
auto_fields = {'description', 'range', 'multivalued', 'comments'}
|
|
101
|
+
manual_indicators = [
|
|
102
|
+
'required', 'identifier', 'pattern', 'minimum_value', 'maximum_value',
|
|
103
|
+
'equals_string', 'equals_number', 'minimum_cardinality', 'maximum_cardinality',
|
|
104
|
+
'annotations', 'see_also', 'examples', 'in_subset', 'domain', 'subproperty_of',
|
|
105
|
+
'symmetric', 'transitive', 'reflexive', 'locally_reflexive', 'irreflexive',
|
|
106
|
+
'asymmetric', 'inverse', 'is_a', 'mixins'
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
# Check for manual indicator fields
|
|
110
|
+
for field in slot_def.keys():
|
|
111
|
+
if field in manual_indicators:
|
|
112
|
+
return True
|
|
113
|
+
|
|
114
|
+
# Check for manual edit markers in comments
|
|
115
|
+
if 'comments' in slot_def:
|
|
116
|
+
for comment in slot_def['comments']:
|
|
117
|
+
if any(marker in comment.lower() for marker in ['manual', 'custom', 'do not modify']):
|
|
118
|
+
return True
|
|
119
|
+
|
|
120
|
+
return False
|
|
121
|
+
|
|
122
|
+
def sync_slots(self, schema_path: Path,
|
|
123
|
+
mode: str = 'update',
|
|
124
|
+
remove_orphans: bool = False,
|
|
125
|
+
dry_run: bool = False) -> Dict[str, Any]:
|
|
126
|
+
"""
|
|
127
|
+
Synchronize slots with enums in a schema.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
schema_path: Path to the schema file
|
|
131
|
+
mode: 'update' (preserve customizations), 'refresh' (regenerate all), 'conservative' (only add new)
|
|
132
|
+
remove_orphans: Remove slots for deleted enums
|
|
133
|
+
dry_run: Preview changes without modifying file
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Summary of changes made
|
|
137
|
+
"""
|
|
138
|
+
# Load schema
|
|
139
|
+
with open(schema_path, 'r') as f:
|
|
140
|
+
schema_data = yaml.safe_load(f)
|
|
141
|
+
|
|
142
|
+
if 'enums' not in schema_data:
|
|
143
|
+
return {'status': 'no_enums', 'changes': []}
|
|
144
|
+
|
|
145
|
+
# Initialize slots if needed
|
|
146
|
+
if 'slots' not in schema_data:
|
|
147
|
+
schema_data['slots'] = {}
|
|
148
|
+
|
|
149
|
+
# Track changes
|
|
150
|
+
summary = {
|
|
151
|
+
'added': [],
|
|
152
|
+
'updated': [],
|
|
153
|
+
'preserved': [],
|
|
154
|
+
'removed': [],
|
|
155
|
+
'warnings': []
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
# Get current enum-based slots
|
|
159
|
+
enum_names = set(schema_data['enums'].keys())
|
|
160
|
+
expected_slots = {}
|
|
161
|
+
enum_to_slot = {}
|
|
162
|
+
|
|
163
|
+
# Generate expected slots from enums
|
|
164
|
+
for enum_name, enum_def in schema_data['enums'].items():
|
|
165
|
+
slot_name = self.generate_slot_name(enum_name)
|
|
166
|
+
expected_slots[slot_name] = self.generate_slot_definition(enum_name, enum_def)
|
|
167
|
+
enum_to_slot[enum_name] = slot_name
|
|
168
|
+
|
|
169
|
+
# Process each expected slot
|
|
170
|
+
for slot_name, new_slot_def in expected_slots.items():
|
|
171
|
+
existing_slot = schema_data['slots'].get(slot_name)
|
|
172
|
+
|
|
173
|
+
if not existing_slot:
|
|
174
|
+
# New slot - add it
|
|
175
|
+
schema_data['slots'][slot_name] = new_slot_def
|
|
176
|
+
summary['added'].append(slot_name)
|
|
177
|
+
self.log_change(f"ADD: {slot_name} (range: {new_slot_def['range']})")
|
|
178
|
+
|
|
179
|
+
elif mode == 'conservative':
|
|
180
|
+
# Conservative mode - only add new, never modify existing
|
|
181
|
+
summary['preserved'].append(slot_name)
|
|
182
|
+
|
|
183
|
+
elif mode == 'refresh':
|
|
184
|
+
# Refresh mode - regenerate all
|
|
185
|
+
schema_data['slots'][slot_name] = new_slot_def
|
|
186
|
+
summary['updated'].append(slot_name)
|
|
187
|
+
self.log_change(f"REFRESH: {slot_name}")
|
|
188
|
+
|
|
189
|
+
else: # mode == 'update' (default)
|
|
190
|
+
# Smart update - preserve customizations
|
|
191
|
+
if self.has_manual_customizations(existing_slot):
|
|
192
|
+
# Has manual customizations - only update range if enum renamed
|
|
193
|
+
if existing_slot.get('range') != new_slot_def['range']:
|
|
194
|
+
# Enum was renamed - update range but preserve other fields
|
|
195
|
+
old_range = existing_slot.get('range')
|
|
196
|
+
existing_slot['range'] = new_slot_def['range']
|
|
197
|
+
summary['updated'].append(f"{slot_name} (range only)")
|
|
198
|
+
summary['warnings'].append(
|
|
199
|
+
f"{slot_name}: Updated range {old_range} → {new_slot_def['range']}, preserved customizations"
|
|
200
|
+
)
|
|
201
|
+
self.log_change(f"UPDATE: {slot_name} range: {old_range} → {new_slot_def['range']}")
|
|
202
|
+
else:
|
|
203
|
+
summary['preserved'].append(slot_name)
|
|
204
|
+
else:
|
|
205
|
+
# No manual customizations - safe to update
|
|
206
|
+
changes = self.detect_changes(existing_slot, new_slot_def)
|
|
207
|
+
if changes:
|
|
208
|
+
schema_data['slots'][slot_name] = new_slot_def
|
|
209
|
+
summary['updated'].append(f"{slot_name} ({', '.join(changes)})")
|
|
210
|
+
self.log_change(f"UPDATE: {slot_name} - {', '.join(changes)}")
|
|
211
|
+
else:
|
|
212
|
+
summary['preserved'].append(slot_name)
|
|
213
|
+
|
|
214
|
+
# Handle orphaned slots (slots for deleted enums)
|
|
215
|
+
if remove_orphans:
|
|
216
|
+
current_slots = set(schema_data['slots'].keys())
|
|
217
|
+
expected_slot_names = set(expected_slots.keys())
|
|
218
|
+
|
|
219
|
+
for slot_name in current_slots:
|
|
220
|
+
slot_def = schema_data['slots'][slot_name]
|
|
221
|
+
# Check if this slot references an enum that no longer exists
|
|
222
|
+
if (slot_def.get('range') in enum_names or
|
|
223
|
+
slot_name in expected_slot_names):
|
|
224
|
+
continue # Slot is valid
|
|
225
|
+
|
|
226
|
+
# Check if it might be enum-related
|
|
227
|
+
if any(slot_def.get('range', '').endswith(suffix)
|
|
228
|
+
for suffix in ['Enum', 'Type', 'Class']):
|
|
229
|
+
if self.has_manual_customizations(slot_def):
|
|
230
|
+
summary['warnings'].append(
|
|
231
|
+
f"{slot_name}: Orphaned slot with customizations (range: {slot_def.get('range')})"
|
|
232
|
+
)
|
|
233
|
+
else:
|
|
234
|
+
del schema_data['slots'][slot_name]
|
|
235
|
+
summary['removed'].append(slot_name)
|
|
236
|
+
self.log_change(f"REMOVE: {slot_name} (orphaned, range: {slot_def.get('range')})")
|
|
237
|
+
|
|
238
|
+
# Write changes if not dry run
|
|
239
|
+
if not dry_run:
|
|
240
|
+
self.write_schema(schema_data, schema_path)
|
|
241
|
+
|
|
242
|
+
return summary
|
|
243
|
+
|
|
244
|
+
def write_schema(self, schema_data: Dict[str, Any], output_path: Path):
|
|
245
|
+
"""Write schema preserving key order and formatting."""
|
|
246
|
+
key_order = [
|
|
247
|
+
'name', 'title', 'description', 'id', 'version', 'status',
|
|
248
|
+
'imports', 'prefixes', 'default_prefix', 'default_curi_maps',
|
|
249
|
+
'slots', 'classes', 'enums'
|
|
250
|
+
]
|
|
251
|
+
|
|
252
|
+
ordered_data = OrderedDict()
|
|
253
|
+
for key in key_order:
|
|
254
|
+
if key in schema_data:
|
|
255
|
+
ordered_data[key] = schema_data[key]
|
|
256
|
+
|
|
257
|
+
for key in schema_data:
|
|
258
|
+
if key not in ordered_data:
|
|
259
|
+
ordered_data[key] = schema_data[key]
|
|
260
|
+
|
|
261
|
+
with open(output_path, 'w') as f:
|
|
262
|
+
yaml.dump(dict(ordered_data), f,
|
|
263
|
+
default_flow_style=False,
|
|
264
|
+
sort_keys=False,
|
|
265
|
+
allow_unicode=True,
|
|
266
|
+
width=120)
|
|
267
|
+
|
|
268
|
+
def log_change(self, message: str):
|
|
269
|
+
"""Log a change for audit trail."""
|
|
270
|
+
self.changes.append({
|
|
271
|
+
'timestamp': datetime.now().isoformat(),
|
|
272
|
+
'change': message
|
|
273
|
+
})
|
|
274
|
+
if self.verbose:
|
|
275
|
+
print(f" {message}")
|
|
276
|
+
|
|
277
|
+
def save_changelog(self, path: Path):
|
|
278
|
+
"""Save the changelog to a file."""
|
|
279
|
+
with open(path, 'w') as f:
|
|
280
|
+
json.dump(self.changes, f, indent=2)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@click.command()
|
|
284
|
+
@click.argument('schema_path', type=click.Path(exists=True, path_type=Path))
|
|
285
|
+
@click.option('--in-place', '-i', is_flag=True,
|
|
286
|
+
help='Modify schema file in place')
|
|
287
|
+
@click.option('--mode', '-m',
|
|
288
|
+
type=click.Choice(['update', 'refresh', 'conservative']),
|
|
289
|
+
default='update',
|
|
290
|
+
help='Sync mode: update (smart), refresh (regenerate), conservative (only add)')
|
|
291
|
+
@click.option('--remove-orphans', '-r', is_flag=True,
|
|
292
|
+
help='Remove slots for deleted enums')
|
|
293
|
+
@click.option('--dry-run', '-n', is_flag=True,
|
|
294
|
+
help='Preview changes without modifying files')
|
|
295
|
+
@click.option('--verbose', '-v', is_flag=True,
|
|
296
|
+
help='Show detailed change information')
|
|
297
|
+
@click.option('--changelog', '-c', type=click.Path(path_type=Path),
|
|
298
|
+
help='Save detailed changelog to file')
|
|
299
|
+
@click.option('--batch', '-b', is_flag=True,
|
|
300
|
+
help='Process all schemas in directory')
|
|
301
|
+
def main(schema_path: Path, in_place: bool, mode: str,
|
|
302
|
+
remove_orphans: bool, dry_run: bool, verbose: bool,
|
|
303
|
+
changelog: Optional[Path], batch: bool):
|
|
304
|
+
"""
|
|
305
|
+
Smart synchronization of LinkML slots with enums.
|
|
306
|
+
|
|
307
|
+
Designed for periodic updates with --in-place option.
|
|
308
|
+
|
|
309
|
+
Modes:
|
|
310
|
+
- update: Smart updates preserving manual customizations (default)
|
|
311
|
+
- refresh: Regenerate all enum-based slots
|
|
312
|
+
- conservative: Only add new slots, never modify existing
|
|
313
|
+
|
|
314
|
+
Examples:
|
|
315
|
+
|
|
316
|
+
\b
|
|
317
|
+
# Preview changes for a single file
|
|
318
|
+
smart_slot_syncer.py schema.yaml --dry-run -v
|
|
319
|
+
|
|
320
|
+
\b
|
|
321
|
+
# Update file in place, preserving customizations
|
|
322
|
+
smart_slot_syncer.py schema.yaml --in-place
|
|
323
|
+
|
|
324
|
+
\b
|
|
325
|
+
# Refresh all slots and remove orphans
|
|
326
|
+
smart_slot_syncer.py schema.yaml --in-place --mode refresh --remove-orphans
|
|
327
|
+
|
|
328
|
+
\b
|
|
329
|
+
# Batch process all schemas in directory
|
|
330
|
+
smart_slot_syncer.py src/valuesets/schema --batch --in-place
|
|
331
|
+
|
|
332
|
+
\b
|
|
333
|
+
# Conservative update with changelog
|
|
334
|
+
smart_slot_syncer.py schema.yaml --in-place --mode conservative --changelog changes.json
|
|
335
|
+
"""
|
|
336
|
+
syncer = SmartSlotSyncer(verbose=verbose)
|
|
337
|
+
|
|
338
|
+
if batch and schema_path.is_dir():
|
|
339
|
+
# Batch process all schemas
|
|
340
|
+
yaml_files = list(schema_path.rglob("*.yaml"))
|
|
341
|
+
total_summary = {
|
|
342
|
+
'files_processed': 0,
|
|
343
|
+
'total_added': 0,
|
|
344
|
+
'total_updated': 0,
|
|
345
|
+
'total_removed': 0
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
for yaml_file in yaml_files:
|
|
349
|
+
# Skip meta files
|
|
350
|
+
if yaml_file.name in ['linkml-meta.yaml', 'types.yaml',
|
|
351
|
+
'slot_mixins.yaml', 'generated_slots.yaml']:
|
|
352
|
+
continue
|
|
353
|
+
|
|
354
|
+
print(f"\nProcessing {yaml_file.relative_to(schema_path)}...")
|
|
355
|
+
|
|
356
|
+
if in_place or dry_run:
|
|
357
|
+
summary = syncer.sync_slots(yaml_file, mode=mode,
|
|
358
|
+
remove_orphans=remove_orphans,
|
|
359
|
+
dry_run=dry_run)
|
|
360
|
+
|
|
361
|
+
if summary.get('added') or summary.get('updated') or summary.get('removed'):
|
|
362
|
+
total_summary['files_processed'] += 1
|
|
363
|
+
total_summary['total_added'] += len(summary.get('added', []))
|
|
364
|
+
total_summary['total_updated'] += len(summary.get('updated', []))
|
|
365
|
+
total_summary['total_removed'] += len(summary.get('removed', []))
|
|
366
|
+
|
|
367
|
+
print(f" Added: {len(summary.get('added', []))}")
|
|
368
|
+
print(f" Updated: {len(summary.get('updated', []))}")
|
|
369
|
+
print(f" Preserved: {len(summary.get('preserved', []))}")
|
|
370
|
+
print(f" Removed: {len(summary.get('removed', []))}")
|
|
371
|
+
|
|
372
|
+
if summary.get('warnings'):
|
|
373
|
+
print(" Warnings:")
|
|
374
|
+
for warning in summary['warnings']:
|
|
375
|
+
print(f" - {warning}")
|
|
376
|
+
|
|
377
|
+
print(f"\n{'='*50}")
|
|
378
|
+
print(f"Batch Summary: {total_summary['files_processed']} files modified")
|
|
379
|
+
print(f" Total added: {total_summary['total_added']}")
|
|
380
|
+
print(f" Total updated: {total_summary['total_updated']}")
|
|
381
|
+
print(f" Total removed: {total_summary['total_removed']}")
|
|
382
|
+
|
|
383
|
+
else:
|
|
384
|
+
# Single file processing
|
|
385
|
+
if not in_place and not dry_run:
|
|
386
|
+
print("Error: Must use either --in-place or --dry-run")
|
|
387
|
+
return
|
|
388
|
+
|
|
389
|
+
summary = syncer.sync_slots(schema_path, mode=mode,
|
|
390
|
+
remove_orphans=remove_orphans,
|
|
391
|
+
dry_run=dry_run)
|
|
392
|
+
|
|
393
|
+
# Print summary
|
|
394
|
+
print(f"\n{'DRY RUN - ' if dry_run else ''}Summary for {schema_path.name}:")
|
|
395
|
+
print(f" Mode: {mode}")
|
|
396
|
+
print(f" Added: {len(summary.get('added', []))}")
|
|
397
|
+
if verbose and summary.get('added'):
|
|
398
|
+
for item in summary['added']:
|
|
399
|
+
print(f" + {item}")
|
|
400
|
+
|
|
401
|
+
print(f" Updated: {len(summary.get('updated', []))}")
|
|
402
|
+
if verbose and summary.get('updated'):
|
|
403
|
+
for item in summary['updated']:
|
|
404
|
+
print(f" ~ {item}")
|
|
405
|
+
|
|
406
|
+
print(f" Preserved: {len(summary.get('preserved', []))}")
|
|
407
|
+
if verbose and summary.get('preserved'):
|
|
408
|
+
for item in summary['preserved']:
|
|
409
|
+
print(f" = {item}")
|
|
410
|
+
|
|
411
|
+
print(f" Removed: {len(summary.get('removed', []))}")
|
|
412
|
+
if verbose and summary.get('removed'):
|
|
413
|
+
for item in summary['removed']:
|
|
414
|
+
print(f" - {item}")
|
|
415
|
+
|
|
416
|
+
if summary.get('warnings'):
|
|
417
|
+
print("\nWarnings:")
|
|
418
|
+
for warning in summary['warnings']:
|
|
419
|
+
print(f" ⚠ {warning}")
|
|
420
|
+
|
|
421
|
+
# Save changelog if requested
|
|
422
|
+
if changelog and syncer.changes:
|
|
423
|
+
syncer.save_changelog(changelog)
|
|
424
|
+
print(f"\nChangelog saved to {changelog}")
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
if __name__ == '__main__':
|
|
428
|
+
main()
|