valuesets 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valuesets might be problematic. Click here for more details.

Files changed (248) hide show
  1. valuesets/__init__.py +7 -0
  2. valuesets/_version.py +8 -0
  3. valuesets/datamodel/valuesets.py +13796 -0
  4. valuesets/datamodel/valuesets_dataclass.py +24503 -0
  5. valuesets/datamodel/valuesets_pydantic.py +13796 -0
  6. valuesets/enums/__init__.py +590 -0
  7. valuesets/enums/academic/__init__.py +1 -0
  8. valuesets/enums/academic/research.py +559 -0
  9. valuesets/enums/analytical_chemistry/__init__.py +1 -0
  10. valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
  11. valuesets/enums/bio/__init__.py +1 -0
  12. valuesets/enums/bio/biological_colors.py +238 -0
  13. valuesets/enums/bio/cell_cycle.py +180 -0
  14. valuesets/enums/bio/currency_chemicals.py +52 -0
  15. valuesets/enums/bio/developmental_stages.py +103 -0
  16. valuesets/enums/bio/genome_features.py +182 -0
  17. valuesets/enums/bio/genomics.py +91 -0
  18. valuesets/enums/bio/go_aspect.py +32 -0
  19. valuesets/enums/bio/go_causality.py +58 -0
  20. valuesets/enums/bio/go_evidence.py +129 -0
  21. valuesets/enums/bio/human_developmental_stages.py +62 -0
  22. valuesets/enums/bio/insdc_geographic_locations.py +591 -0
  23. valuesets/enums/bio/insdc_missing_values.py +49 -0
  24. valuesets/enums/bio/lipid_categories.py +67 -0
  25. valuesets/enums/bio/mouse_developmental_stages.py +62 -0
  26. valuesets/enums/bio/plant_biology.py +86 -0
  27. valuesets/enums/bio/plant_developmental_stages.py +54 -0
  28. valuesets/enums/bio/plant_sex.py +81 -0
  29. valuesets/enums/bio/protein_evidence.py +61 -0
  30. valuesets/enums/bio/proteomics_standards.py +123 -0
  31. valuesets/enums/bio/psi_mi.py +306 -0
  32. valuesets/enums/bio/relationship_to_oxygen.py +37 -0
  33. valuesets/enums/bio/sequence_alphabets.py +449 -0
  34. valuesets/enums/bio/sequence_chemistry.py +357 -0
  35. valuesets/enums/bio/sequencing_platforms.py +302 -0
  36. valuesets/enums/bio/structural_biology.py +320 -0
  37. valuesets/enums/bio/taxonomy.py +238 -0
  38. valuesets/enums/bio/trophic_levels.py +85 -0
  39. valuesets/enums/bio/uniprot_species.py +344 -0
  40. valuesets/enums/bio/viral_genome_types.py +47 -0
  41. valuesets/enums/bioprocessing/__init__.py +1 -0
  42. valuesets/enums/bioprocessing/scale_up.py +249 -0
  43. valuesets/enums/business/__init__.py +1 -0
  44. valuesets/enums/business/human_resources.py +275 -0
  45. valuesets/enums/business/industry_classifications.py +181 -0
  46. valuesets/enums/business/management_operations.py +228 -0
  47. valuesets/enums/business/organizational_structures.py +236 -0
  48. valuesets/enums/business/quality_management.py +181 -0
  49. valuesets/enums/business/supply_chain.py +232 -0
  50. valuesets/enums/chemistry/__init__.py +1 -0
  51. valuesets/enums/chemistry/chemical_entities.py +315 -0
  52. valuesets/enums/chemistry/reaction_directionality.py +65 -0
  53. valuesets/enums/chemistry/reactions.py +256 -0
  54. valuesets/enums/clinical/__init__.py +1 -0
  55. valuesets/enums/clinical/nih_demographics.py +177 -0
  56. valuesets/enums/clinical/phenopackets.py +254 -0
  57. valuesets/enums/common_value_sets.py +8791 -0
  58. valuesets/enums/computing/__init__.py +1 -0
  59. valuesets/enums/computing/file_formats.py +294 -0
  60. valuesets/enums/computing/maturity_levels.py +196 -0
  61. valuesets/enums/computing/mime_types.py +227 -0
  62. valuesets/enums/confidence_levels.py +168 -0
  63. valuesets/enums/contributor.py +30 -0
  64. valuesets/enums/core.py +42 -0
  65. valuesets/enums/data/__init__.py +1 -0
  66. valuesets/enums/data/data_absent_reason.py +53 -0
  67. valuesets/enums/data_science/__init__.py +1 -0
  68. valuesets/enums/data_science/binary_classification.py +87 -0
  69. valuesets/enums/data_science/emotion_classification.py +66 -0
  70. valuesets/enums/data_science/priority_severity.py +73 -0
  71. valuesets/enums/data_science/quality_control.py +46 -0
  72. valuesets/enums/data_science/sentiment_analysis.py +50 -0
  73. valuesets/enums/data_science/text_classification.py +97 -0
  74. valuesets/enums/demographics.py +206 -0
  75. valuesets/enums/ecological_interactions.py +151 -0
  76. valuesets/enums/energy/__init__.py +1 -0
  77. valuesets/enums/energy/energy.py +343 -0
  78. valuesets/enums/energy/fossil_fuels.py +29 -0
  79. valuesets/enums/energy/nuclear/__init__.py +1 -0
  80. valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
  81. valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
  82. valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
  83. valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
  84. valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
  85. valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
  86. valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
  87. valuesets/enums/energy/nuclear/reactor_types.py +163 -0
  88. valuesets/enums/environmental_health/__init__.py +1 -0
  89. valuesets/enums/environmental_health/exposures.py +265 -0
  90. valuesets/enums/geography/__init__.py +1 -0
  91. valuesets/enums/geography/geographic_codes.py +741 -0
  92. valuesets/enums/health/__init__.py +12 -0
  93. valuesets/enums/health/vaccination.py +98 -0
  94. valuesets/enums/health.py +36 -0
  95. valuesets/enums/health_base.py +36 -0
  96. valuesets/enums/healthcare.py +45 -0
  97. valuesets/enums/industry/__init__.py +1 -0
  98. valuesets/enums/industry/extractive_industry.py +94 -0
  99. valuesets/enums/industry/mining.py +388 -0
  100. valuesets/enums/industry/safety_colors.py +201 -0
  101. valuesets/enums/investigation.py +27 -0
  102. valuesets/enums/materials_science/__init__.py +1 -0
  103. valuesets/enums/materials_science/characterization_methods.py +112 -0
  104. valuesets/enums/materials_science/crystal_structures.py +76 -0
  105. valuesets/enums/materials_science/material_properties.py +119 -0
  106. valuesets/enums/materials_science/material_types.py +104 -0
  107. valuesets/enums/materials_science/pigments_dyes.py +198 -0
  108. valuesets/enums/materials_science/synthesis_methods.py +109 -0
  109. valuesets/enums/medical/__init__.py +1 -0
  110. valuesets/enums/medical/clinical.py +277 -0
  111. valuesets/enums/medical/neuroimaging.py +119 -0
  112. valuesets/enums/mining_processing.py +302 -0
  113. valuesets/enums/physics/__init__.py +1 -0
  114. valuesets/enums/physics/states_of_matter.py +46 -0
  115. valuesets/enums/social/__init__.py +1 -0
  116. valuesets/enums/social/person_status.py +29 -0
  117. valuesets/enums/spatial/__init__.py +1 -0
  118. valuesets/enums/spatial/spatial_qualifiers.py +246 -0
  119. valuesets/enums/statistics/__init__.py +5 -0
  120. valuesets/enums/statistics/prediction_outcomes.py +31 -0
  121. valuesets/enums/statistics.py +31 -0
  122. valuesets/enums/time/__init__.py +1 -0
  123. valuesets/enums/time/temporal.py +254 -0
  124. valuesets/enums/units/__init__.py +1 -0
  125. valuesets/enums/units/measurements.py +310 -0
  126. valuesets/enums/visual/__init__.py +1 -0
  127. valuesets/enums/visual/colors.py +376 -0
  128. valuesets/generators/__init__.py +19 -0
  129. valuesets/generators/auto_slot_injector.py +280 -0
  130. valuesets/generators/enhanced_pydantic_generator.py +100 -0
  131. valuesets/generators/enum_slot_generator.py +201 -0
  132. valuesets/generators/modular_rich_generator.py +353 -0
  133. valuesets/generators/prefix_standardizer.py +198 -0
  134. valuesets/generators/rich_enum.py +127 -0
  135. valuesets/generators/rich_pydantic_generator.py +310 -0
  136. valuesets/generators/smart_slot_syncer.py +428 -0
  137. valuesets/generators/sssom_generator.py +394 -0
  138. valuesets/merged/merged_hierarchy.yaml +21649 -0
  139. valuesets/schema/README.md +3 -0
  140. valuesets/schema/academic/research.yaml +911 -0
  141. valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
  142. valuesets/schema/bio/bio_entities.yaml +364 -0
  143. valuesets/schema/bio/biological_colors.yaml +434 -0
  144. valuesets/schema/bio/cell_cycle.yaml +309 -0
  145. valuesets/schema/bio/currency_chemicals.yaml +70 -0
  146. valuesets/schema/bio/developmental_stages.yaml +226 -0
  147. valuesets/schema/bio/genome_features.yaml +342 -0
  148. valuesets/schema/bio/genomics.yaml +101 -0
  149. valuesets/schema/bio/go_aspect.yaml +39 -0
  150. valuesets/schema/bio/go_causality.yaml +119 -0
  151. valuesets/schema/bio/go_evidence.yaml +215 -0
  152. valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
  153. valuesets/schema/bio/insdc_missing_values.yaml +85 -0
  154. valuesets/schema/bio/lipid_categories.yaml +72 -0
  155. valuesets/schema/bio/plant_biology.yaml +125 -0
  156. valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
  157. valuesets/schema/bio/plant_sex.yaml +108 -0
  158. valuesets/schema/bio/protein_evidence.yaml +63 -0
  159. valuesets/schema/bio/proteomics_standards.yaml +116 -0
  160. valuesets/schema/bio/psi_mi.yaml +400 -0
  161. valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
  162. valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
  163. valuesets/schema/bio/sequence_chemistry.yaml +477 -0
  164. valuesets/schema/bio/sequencing_platforms.yaml +515 -0
  165. valuesets/schema/bio/structural_biology.yaml +428 -0
  166. valuesets/schema/bio/taxonomy.yaml +453 -0
  167. valuesets/schema/bio/trophic_levels.yaml +118 -0
  168. valuesets/schema/bio/uniprot_species.yaml +1209 -0
  169. valuesets/schema/bio/viral_genome_types.yaml +99 -0
  170. valuesets/schema/bioprocessing/scale_up.yaml +458 -0
  171. valuesets/schema/business/human_resources.yaml +752 -0
  172. valuesets/schema/business/industry_classifications.yaml +448 -0
  173. valuesets/schema/business/management_operations.yaml +602 -0
  174. valuesets/schema/business/organizational_structures.yaml +645 -0
  175. valuesets/schema/business/quality_management.yaml +502 -0
  176. valuesets/schema/business/supply_chain.yaml +688 -0
  177. valuesets/schema/chemistry/chemical_entities.yaml +639 -0
  178. valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
  179. valuesets/schema/chemistry/reactions.yaml +442 -0
  180. valuesets/schema/clinical/nih_demographics.yaml +285 -0
  181. valuesets/schema/clinical/phenopackets.yaml +429 -0
  182. valuesets/schema/computing/file_formats.yaml +631 -0
  183. valuesets/schema/computing/maturity_levels.yaml +229 -0
  184. valuesets/schema/computing/mime_types.yaml +266 -0
  185. valuesets/schema/confidence_levels.yaml +206 -0
  186. valuesets/schema/contributor.yaml +30 -0
  187. valuesets/schema/core.yaml +55 -0
  188. valuesets/schema/data/data_absent_reason.yaml +82 -0
  189. valuesets/schema/data_science/binary_classification.yaml +125 -0
  190. valuesets/schema/data_science/emotion_classification.yaml +109 -0
  191. valuesets/schema/data_science/priority_severity.yaml +122 -0
  192. valuesets/schema/data_science/quality_control.yaml +68 -0
  193. valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
  194. valuesets/schema/data_science/text_classification.yaml +135 -0
  195. valuesets/schema/demographics.yaml +238 -0
  196. valuesets/schema/ecological_interactions.yaml +298 -0
  197. valuesets/schema/energy/energy.yaml +595 -0
  198. valuesets/schema/energy/fossil_fuels.yaml +28 -0
  199. valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
  200. valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
  201. valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
  202. valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
  203. valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
  204. valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
  205. valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
  206. valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
  207. valuesets/schema/environmental_health/exposures.yaml +355 -0
  208. valuesets/schema/generated_slots.yaml +1828 -0
  209. valuesets/schema/geography/geographic_codes.yaml +1018 -0
  210. valuesets/schema/health/vaccination.yaml +102 -0
  211. valuesets/schema/health.yaml +38 -0
  212. valuesets/schema/healthcare.yaml +53 -0
  213. valuesets/schema/industry/extractive_industry.yaml +89 -0
  214. valuesets/schema/industry/mining.yaml +888 -0
  215. valuesets/schema/industry/safety_colors.yaml +375 -0
  216. valuesets/schema/investigation.yaml +64 -0
  217. valuesets/schema/materials_science/characterization_methods.yaml +193 -0
  218. valuesets/schema/materials_science/crystal_structures.yaml +138 -0
  219. valuesets/schema/materials_science/material_properties.yaml +135 -0
  220. valuesets/schema/materials_science/material_types.yaml +151 -0
  221. valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
  222. valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
  223. valuesets/schema/medical/clinical.yaml +610 -0
  224. valuesets/schema/medical/neuroimaging.yaml +325 -0
  225. valuesets/schema/mining_processing.yaml +295 -0
  226. valuesets/schema/physics/states_of_matter.yaml +46 -0
  227. valuesets/schema/slot_mixins.yaml +143 -0
  228. valuesets/schema/social/person_status.yaml +28 -0
  229. valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
  230. valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
  231. valuesets/schema/statistics.yaml +34 -0
  232. valuesets/schema/time/temporal.yaml +435 -0
  233. valuesets/schema/types.yaml +15 -0
  234. valuesets/schema/units/measurements.yaml +675 -0
  235. valuesets/schema/valuesets.yaml +100 -0
  236. valuesets/schema/visual/colors.yaml +778 -0
  237. valuesets/utils/__init__.py +6 -0
  238. valuesets/utils/comparison.py +102 -0
  239. valuesets/utils/expand_dynamic_enums.py +414 -0
  240. valuesets/utils/mapping_utils.py +236 -0
  241. valuesets/validators/__init__.py +11 -0
  242. valuesets/validators/enum_evaluator.py +669 -0
  243. valuesets/validators/oak_config.yaml +70 -0
  244. valuesets/validators/validate_with_ols.py +241 -0
  245. valuesets-0.3.1.dist-info/METADATA +395 -0
  246. valuesets-0.3.1.dist-info/RECORD +248 -0
  247. valuesets-0.3.1.dist-info/WHEEL +4 -0
  248. valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,428 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Smart slot synchronizer for LinkML schemas with enums.
4
+
5
+ Designed for periodic synchronization with --in-place option to:
6
+ 1. Add new slots for newly added enums
7
+ 2. Update slot ranges when enum names change
8
+ 3. Preserve manual customizations to slots
9
+ 4. Remove orphaned slots for deleted enums (optional)
10
+ 5. Track changes for review
11
+ """
12
+
13
+ import yaml
14
+ from pathlib import Path
15
+ from typing import Dict, Any, List, Optional, Set, Tuple
16
+ import re
17
+ import click
18
+ from collections import OrderedDict
19
+ from datetime import datetime
20
+ import json
21
+
22
+
23
+ class SmartSlotSyncer:
24
+ """Intelligent slot synchronization for enum-based schemas."""
25
+
26
+ def __init__(self, verbose: bool = False):
27
+ self.verbose = verbose
28
+ self.changes = [] # Track all changes made
29
+
30
+ @staticmethod
31
+ def camel_to_snake(name: str) -> str:
32
+ """Convert CamelCase to snake_case."""
33
+ s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
34
+ return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
35
+
36
+ def generate_slot_name(self, enum_name: str) -> str:
37
+ """Generate slot name from enum name."""
38
+ slot_name = enum_name
39
+ for suffix in ['Enum', 'Type', 'Class', 'Code']:
40
+ if slot_name.endswith(suffix):
41
+ slot_name = slot_name[:-len(suffix)]
42
+ break
43
+ return self.camel_to_snake(slot_name)
44
+
45
+ def generate_slot_definition(self, enum_name: str,
46
+ enum_def: Dict[str, Any]) -> Dict[str, Any]:
47
+ """Generate a complete slot definition for an enum."""
48
+ slot_name = self.generate_slot_name(enum_name)
49
+
50
+ # Extract description from enum
51
+ enum_desc = enum_def.get('description', '')
52
+ if enum_desc:
53
+ enum_desc = ' '.join(enum_desc.split())
54
+ first_sentence = enum_desc.split('.')[0]
55
+ slot_desc = first_sentence[:1].upper() + first_sentence[1:] if first_sentence else ''
56
+ else:
57
+ readable_name = slot_name.replace('_', ' ')
58
+ slot_desc = f"The {readable_name} classification"
59
+
60
+ slot_def = {
61
+ 'description': slot_desc,
62
+ 'range': enum_name
63
+ }
64
+
65
+ # Check if should be multivalued
66
+ if any(keyword in slot_name for keyword in ['target', 'feature', 'metric', 'constraint']):
67
+ slot_def['multivalued'] = True
68
+
69
+ return slot_def
70
+
71
+ def detect_changes(self, existing_slot: Dict[str, Any],
72
+ new_slot: Dict[str, Any]) -> List[str]:
73
+ """Detect what changed between existing and new slot definitions."""
74
+ changes = []
75
+
76
+ # Check range change (enum rename)
77
+ if existing_slot.get('range') != new_slot.get('range'):
78
+ changes.append(f"range: {existing_slot.get('range')} → {new_slot.get('range')}")
79
+
80
+ # Check multivalued change
81
+ if existing_slot.get('multivalued') != new_slot.get('multivalued'):
82
+ changes.append(f"multivalued: {existing_slot.get('multivalued')} → {new_slot.get('multivalued')}")
83
+
84
+ # Check if description was auto-generated and enum description changed
85
+ if (not existing_slot.get('_manual_description') and
86
+ existing_slot.get('description') != new_slot.get('description')):
87
+ changes.append("description updated from enum")
88
+
89
+ return changes
90
+
91
+ def has_manual_customizations(self, slot_def: Dict[str, Any]) -> bool:
92
+ """
93
+ Detect if a slot has manual customizations beyond auto-generation.
94
+
95
+ Checks for:
96
+ - Additional fields beyond basic ones
97
+ - Comments indicating manual edit
98
+ - Complex constraints
99
+ """
100
+ auto_fields = {'description', 'range', 'multivalued', 'comments'}
101
+ manual_indicators = [
102
+ 'required', 'identifier', 'pattern', 'minimum_value', 'maximum_value',
103
+ 'equals_string', 'equals_number', 'minimum_cardinality', 'maximum_cardinality',
104
+ 'annotations', 'see_also', 'examples', 'in_subset', 'domain', 'subproperty_of',
105
+ 'symmetric', 'transitive', 'reflexive', 'locally_reflexive', 'irreflexive',
106
+ 'asymmetric', 'inverse', 'is_a', 'mixins'
107
+ ]
108
+
109
+ # Check for manual indicator fields
110
+ for field in slot_def.keys():
111
+ if field in manual_indicators:
112
+ return True
113
+
114
+ # Check for manual edit markers in comments
115
+ if 'comments' in slot_def:
116
+ for comment in slot_def['comments']:
117
+ if any(marker in comment.lower() for marker in ['manual', 'custom', 'do not modify']):
118
+ return True
119
+
120
+ return False
121
+
122
+ def sync_slots(self, schema_path: Path,
123
+ mode: str = 'update',
124
+ remove_orphans: bool = False,
125
+ dry_run: bool = False) -> Dict[str, Any]:
126
+ """
127
+ Synchronize slots with enums in a schema.
128
+
129
+ Args:
130
+ schema_path: Path to the schema file
131
+ mode: 'update' (preserve customizations), 'refresh' (regenerate all), 'conservative' (only add new)
132
+ remove_orphans: Remove slots for deleted enums
133
+ dry_run: Preview changes without modifying file
134
+
135
+ Returns:
136
+ Summary of changes made
137
+ """
138
+ # Load schema
139
+ with open(schema_path, 'r') as f:
140
+ schema_data = yaml.safe_load(f)
141
+
142
+ if 'enums' not in schema_data:
143
+ return {'status': 'no_enums', 'changes': []}
144
+
145
+ # Initialize slots if needed
146
+ if 'slots' not in schema_data:
147
+ schema_data['slots'] = {}
148
+
149
+ # Track changes
150
+ summary = {
151
+ 'added': [],
152
+ 'updated': [],
153
+ 'preserved': [],
154
+ 'removed': [],
155
+ 'warnings': []
156
+ }
157
+
158
+ # Get current enum-based slots
159
+ enum_names = set(schema_data['enums'].keys())
160
+ expected_slots = {}
161
+ enum_to_slot = {}
162
+
163
+ # Generate expected slots from enums
164
+ for enum_name, enum_def in schema_data['enums'].items():
165
+ slot_name = self.generate_slot_name(enum_name)
166
+ expected_slots[slot_name] = self.generate_slot_definition(enum_name, enum_def)
167
+ enum_to_slot[enum_name] = slot_name
168
+
169
+ # Process each expected slot
170
+ for slot_name, new_slot_def in expected_slots.items():
171
+ existing_slot = schema_data['slots'].get(slot_name)
172
+
173
+ if not existing_slot:
174
+ # New slot - add it
175
+ schema_data['slots'][slot_name] = new_slot_def
176
+ summary['added'].append(slot_name)
177
+ self.log_change(f"ADD: {slot_name} (range: {new_slot_def['range']})")
178
+
179
+ elif mode == 'conservative':
180
+ # Conservative mode - only add new, never modify existing
181
+ summary['preserved'].append(slot_name)
182
+
183
+ elif mode == 'refresh':
184
+ # Refresh mode - regenerate all
185
+ schema_data['slots'][slot_name] = new_slot_def
186
+ summary['updated'].append(slot_name)
187
+ self.log_change(f"REFRESH: {slot_name}")
188
+
189
+ else: # mode == 'update' (default)
190
+ # Smart update - preserve customizations
191
+ if self.has_manual_customizations(existing_slot):
192
+ # Has manual customizations - only update range if enum renamed
193
+ if existing_slot.get('range') != new_slot_def['range']:
194
+ # Enum was renamed - update range but preserve other fields
195
+ old_range = existing_slot.get('range')
196
+ existing_slot['range'] = new_slot_def['range']
197
+ summary['updated'].append(f"{slot_name} (range only)")
198
+ summary['warnings'].append(
199
+ f"{slot_name}: Updated range {old_range} → {new_slot_def['range']}, preserved customizations"
200
+ )
201
+ self.log_change(f"UPDATE: {slot_name} range: {old_range} → {new_slot_def['range']}")
202
+ else:
203
+ summary['preserved'].append(slot_name)
204
+ else:
205
+ # No manual customizations - safe to update
206
+ changes = self.detect_changes(existing_slot, new_slot_def)
207
+ if changes:
208
+ schema_data['slots'][slot_name] = new_slot_def
209
+ summary['updated'].append(f"{slot_name} ({', '.join(changes)})")
210
+ self.log_change(f"UPDATE: {slot_name} - {', '.join(changes)}")
211
+ else:
212
+ summary['preserved'].append(slot_name)
213
+
214
+ # Handle orphaned slots (slots for deleted enums)
215
+ if remove_orphans:
216
+ current_slots = set(schema_data['slots'].keys())
217
+ expected_slot_names = set(expected_slots.keys())
218
+
219
+ for slot_name in current_slots:
220
+ slot_def = schema_data['slots'][slot_name]
221
+ # Check if this slot references an enum that no longer exists
222
+ if (slot_def.get('range') in enum_names or
223
+ slot_name in expected_slot_names):
224
+ continue # Slot is valid
225
+
226
+ # Check if it might be enum-related
227
+ if any(slot_def.get('range', '').endswith(suffix)
228
+ for suffix in ['Enum', 'Type', 'Class']):
229
+ if self.has_manual_customizations(slot_def):
230
+ summary['warnings'].append(
231
+ f"{slot_name}: Orphaned slot with customizations (range: {slot_def.get('range')})"
232
+ )
233
+ else:
234
+ del schema_data['slots'][slot_name]
235
+ summary['removed'].append(slot_name)
236
+ self.log_change(f"REMOVE: {slot_name} (orphaned, range: {slot_def.get('range')})")
237
+
238
+ # Write changes if not dry run
239
+ if not dry_run:
240
+ self.write_schema(schema_data, schema_path)
241
+
242
+ return summary
243
+
244
+ def write_schema(self, schema_data: Dict[str, Any], output_path: Path):
245
+ """Write schema preserving key order and formatting."""
246
+ key_order = [
247
+ 'name', 'title', 'description', 'id', 'version', 'status',
248
+ 'imports', 'prefixes', 'default_prefix', 'default_curi_maps',
249
+ 'slots', 'classes', 'enums'
250
+ ]
251
+
252
+ ordered_data = OrderedDict()
253
+ for key in key_order:
254
+ if key in schema_data:
255
+ ordered_data[key] = schema_data[key]
256
+
257
+ for key in schema_data:
258
+ if key not in ordered_data:
259
+ ordered_data[key] = schema_data[key]
260
+
261
+ with open(output_path, 'w') as f:
262
+ yaml.dump(dict(ordered_data), f,
263
+ default_flow_style=False,
264
+ sort_keys=False,
265
+ allow_unicode=True,
266
+ width=120)
267
+
268
+ def log_change(self, message: str):
269
+ """Log a change for audit trail."""
270
+ self.changes.append({
271
+ 'timestamp': datetime.now().isoformat(),
272
+ 'change': message
273
+ })
274
+ if self.verbose:
275
+ print(f" {message}")
276
+
277
+ def save_changelog(self, path: Path):
278
+ """Save the changelog to a file."""
279
+ with open(path, 'w') as f:
280
+ json.dump(self.changes, f, indent=2)
281
+
282
+
283
+ @click.command()
284
+ @click.argument('schema_path', type=click.Path(exists=True, path_type=Path))
285
+ @click.option('--in-place', '-i', is_flag=True,
286
+ help='Modify schema file in place')
287
+ @click.option('--mode', '-m',
288
+ type=click.Choice(['update', 'refresh', 'conservative']),
289
+ default='update',
290
+ help='Sync mode: update (smart), refresh (regenerate), conservative (only add)')
291
+ @click.option('--remove-orphans', '-r', is_flag=True,
292
+ help='Remove slots for deleted enums')
293
+ @click.option('--dry-run', '-n', is_flag=True,
294
+ help='Preview changes without modifying files')
295
+ @click.option('--verbose', '-v', is_flag=True,
296
+ help='Show detailed change information')
297
+ @click.option('--changelog', '-c', type=click.Path(path_type=Path),
298
+ help='Save detailed changelog to file')
299
+ @click.option('--batch', '-b', is_flag=True,
300
+ help='Process all schemas in directory')
301
+ def main(schema_path: Path, in_place: bool, mode: str,
302
+ remove_orphans: bool, dry_run: bool, verbose: bool,
303
+ changelog: Optional[Path], batch: bool):
304
+ """
305
+ Smart synchronization of LinkML slots with enums.
306
+
307
+ Designed for periodic updates with --in-place option.
308
+
309
+ Modes:
310
+ - update: Smart updates preserving manual customizations (default)
311
+ - refresh: Regenerate all enum-based slots
312
+ - conservative: Only add new slots, never modify existing
313
+
314
+ Examples:
315
+
316
+ \b
317
+ # Preview changes for a single file
318
+ smart_slot_syncer.py schema.yaml --dry-run -v
319
+
320
+ \b
321
+ # Update file in place, preserving customizations
322
+ smart_slot_syncer.py schema.yaml --in-place
323
+
324
+ \b
325
+ # Refresh all slots and remove orphans
326
+ smart_slot_syncer.py schema.yaml --in-place --mode refresh --remove-orphans
327
+
328
+ \b
329
+ # Batch process all schemas in directory
330
+ smart_slot_syncer.py src/valuesets/schema --batch --in-place
331
+
332
+ \b
333
+ # Conservative update with changelog
334
+ smart_slot_syncer.py schema.yaml --in-place --mode conservative --changelog changes.json
335
+ """
336
+ syncer = SmartSlotSyncer(verbose=verbose)
337
+
338
+ if batch and schema_path.is_dir():
339
+ # Batch process all schemas
340
+ yaml_files = list(schema_path.rglob("*.yaml"))
341
+ total_summary = {
342
+ 'files_processed': 0,
343
+ 'total_added': 0,
344
+ 'total_updated': 0,
345
+ 'total_removed': 0
346
+ }
347
+
348
+ for yaml_file in yaml_files:
349
+ # Skip meta files
350
+ if yaml_file.name in ['linkml-meta.yaml', 'types.yaml',
351
+ 'slot_mixins.yaml', 'generated_slots.yaml']:
352
+ continue
353
+
354
+ print(f"\nProcessing {yaml_file.relative_to(schema_path)}...")
355
+
356
+ if in_place or dry_run:
357
+ summary = syncer.sync_slots(yaml_file, mode=mode,
358
+ remove_orphans=remove_orphans,
359
+ dry_run=dry_run)
360
+
361
+ if summary.get('added') or summary.get('updated') or summary.get('removed'):
362
+ total_summary['files_processed'] += 1
363
+ total_summary['total_added'] += len(summary.get('added', []))
364
+ total_summary['total_updated'] += len(summary.get('updated', []))
365
+ total_summary['total_removed'] += len(summary.get('removed', []))
366
+
367
+ print(f" Added: {len(summary.get('added', []))}")
368
+ print(f" Updated: {len(summary.get('updated', []))}")
369
+ print(f" Preserved: {len(summary.get('preserved', []))}")
370
+ print(f" Removed: {len(summary.get('removed', []))}")
371
+
372
+ if summary.get('warnings'):
373
+ print(" Warnings:")
374
+ for warning in summary['warnings']:
375
+ print(f" - {warning}")
376
+
377
+ print(f"\n{'='*50}")
378
+ print(f"Batch Summary: {total_summary['files_processed']} files modified")
379
+ print(f" Total added: {total_summary['total_added']}")
380
+ print(f" Total updated: {total_summary['total_updated']}")
381
+ print(f" Total removed: {total_summary['total_removed']}")
382
+
383
+ else:
384
+ # Single file processing
385
+ if not in_place and not dry_run:
386
+ print("Error: Must use either --in-place or --dry-run")
387
+ return
388
+
389
+ summary = syncer.sync_slots(schema_path, mode=mode,
390
+ remove_orphans=remove_orphans,
391
+ dry_run=dry_run)
392
+
393
+ # Print summary
394
+ print(f"\n{'DRY RUN - ' if dry_run else ''}Summary for {schema_path.name}:")
395
+ print(f" Mode: {mode}")
396
+ print(f" Added: {len(summary.get('added', []))}")
397
+ if verbose and summary.get('added'):
398
+ for item in summary['added']:
399
+ print(f" + {item}")
400
+
401
+ print(f" Updated: {len(summary.get('updated', []))}")
402
+ if verbose and summary.get('updated'):
403
+ for item in summary['updated']:
404
+ print(f" ~ {item}")
405
+
406
+ print(f" Preserved: {len(summary.get('preserved', []))}")
407
+ if verbose and summary.get('preserved'):
408
+ for item in summary['preserved']:
409
+ print(f" = {item}")
410
+
411
+ print(f" Removed: {len(summary.get('removed', []))}")
412
+ if verbose and summary.get('removed'):
413
+ for item in summary['removed']:
414
+ print(f" - {item}")
415
+
416
+ if summary.get('warnings'):
417
+ print("\nWarnings:")
418
+ for warning in summary['warnings']:
419
+ print(f" ⚠ {warning}")
420
+
421
+ # Save changelog if requested
422
+ if changelog and syncer.changes:
423
+ syncer.save_changelog(changelog)
424
+ print(f"\nChangelog saved to {changelog}")
425
+
426
+
427
+ if __name__ == '__main__':
428
+ main()