valuesets 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valuesets might be problematic. Click here for more details.

Files changed (248) hide show
  1. valuesets/__init__.py +7 -0
  2. valuesets/_version.py +8 -0
  3. valuesets/datamodel/valuesets.py +13796 -0
  4. valuesets/datamodel/valuesets_dataclass.py +24503 -0
  5. valuesets/datamodel/valuesets_pydantic.py +13796 -0
  6. valuesets/enums/__init__.py +590 -0
  7. valuesets/enums/academic/__init__.py +1 -0
  8. valuesets/enums/academic/research.py +559 -0
  9. valuesets/enums/analytical_chemistry/__init__.py +1 -0
  10. valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
  11. valuesets/enums/bio/__init__.py +1 -0
  12. valuesets/enums/bio/biological_colors.py +238 -0
  13. valuesets/enums/bio/cell_cycle.py +180 -0
  14. valuesets/enums/bio/currency_chemicals.py +52 -0
  15. valuesets/enums/bio/developmental_stages.py +103 -0
  16. valuesets/enums/bio/genome_features.py +182 -0
  17. valuesets/enums/bio/genomics.py +91 -0
  18. valuesets/enums/bio/go_aspect.py +32 -0
  19. valuesets/enums/bio/go_causality.py +58 -0
  20. valuesets/enums/bio/go_evidence.py +129 -0
  21. valuesets/enums/bio/human_developmental_stages.py +62 -0
  22. valuesets/enums/bio/insdc_geographic_locations.py +591 -0
  23. valuesets/enums/bio/insdc_missing_values.py +49 -0
  24. valuesets/enums/bio/lipid_categories.py +67 -0
  25. valuesets/enums/bio/mouse_developmental_stages.py +62 -0
  26. valuesets/enums/bio/plant_biology.py +86 -0
  27. valuesets/enums/bio/plant_developmental_stages.py +54 -0
  28. valuesets/enums/bio/plant_sex.py +81 -0
  29. valuesets/enums/bio/protein_evidence.py +61 -0
  30. valuesets/enums/bio/proteomics_standards.py +123 -0
  31. valuesets/enums/bio/psi_mi.py +306 -0
  32. valuesets/enums/bio/relationship_to_oxygen.py +37 -0
  33. valuesets/enums/bio/sequence_alphabets.py +449 -0
  34. valuesets/enums/bio/sequence_chemistry.py +357 -0
  35. valuesets/enums/bio/sequencing_platforms.py +302 -0
  36. valuesets/enums/bio/structural_biology.py +320 -0
  37. valuesets/enums/bio/taxonomy.py +238 -0
  38. valuesets/enums/bio/trophic_levels.py +85 -0
  39. valuesets/enums/bio/uniprot_species.py +344 -0
  40. valuesets/enums/bio/viral_genome_types.py +47 -0
  41. valuesets/enums/bioprocessing/__init__.py +1 -0
  42. valuesets/enums/bioprocessing/scale_up.py +249 -0
  43. valuesets/enums/business/__init__.py +1 -0
  44. valuesets/enums/business/human_resources.py +275 -0
  45. valuesets/enums/business/industry_classifications.py +181 -0
  46. valuesets/enums/business/management_operations.py +228 -0
  47. valuesets/enums/business/organizational_structures.py +236 -0
  48. valuesets/enums/business/quality_management.py +181 -0
  49. valuesets/enums/business/supply_chain.py +232 -0
  50. valuesets/enums/chemistry/__init__.py +1 -0
  51. valuesets/enums/chemistry/chemical_entities.py +315 -0
  52. valuesets/enums/chemistry/reaction_directionality.py +65 -0
  53. valuesets/enums/chemistry/reactions.py +256 -0
  54. valuesets/enums/clinical/__init__.py +1 -0
  55. valuesets/enums/clinical/nih_demographics.py +177 -0
  56. valuesets/enums/clinical/phenopackets.py +254 -0
  57. valuesets/enums/common_value_sets.py +8791 -0
  58. valuesets/enums/computing/__init__.py +1 -0
  59. valuesets/enums/computing/file_formats.py +294 -0
  60. valuesets/enums/computing/maturity_levels.py +196 -0
  61. valuesets/enums/computing/mime_types.py +227 -0
  62. valuesets/enums/confidence_levels.py +168 -0
  63. valuesets/enums/contributor.py +30 -0
  64. valuesets/enums/core.py +42 -0
  65. valuesets/enums/data/__init__.py +1 -0
  66. valuesets/enums/data/data_absent_reason.py +53 -0
  67. valuesets/enums/data_science/__init__.py +1 -0
  68. valuesets/enums/data_science/binary_classification.py +87 -0
  69. valuesets/enums/data_science/emotion_classification.py +66 -0
  70. valuesets/enums/data_science/priority_severity.py +73 -0
  71. valuesets/enums/data_science/quality_control.py +46 -0
  72. valuesets/enums/data_science/sentiment_analysis.py +50 -0
  73. valuesets/enums/data_science/text_classification.py +97 -0
  74. valuesets/enums/demographics.py +206 -0
  75. valuesets/enums/ecological_interactions.py +151 -0
  76. valuesets/enums/energy/__init__.py +1 -0
  77. valuesets/enums/energy/energy.py +343 -0
  78. valuesets/enums/energy/fossil_fuels.py +29 -0
  79. valuesets/enums/energy/nuclear/__init__.py +1 -0
  80. valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
  81. valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
  82. valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
  83. valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
  84. valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
  85. valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
  86. valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
  87. valuesets/enums/energy/nuclear/reactor_types.py +163 -0
  88. valuesets/enums/environmental_health/__init__.py +1 -0
  89. valuesets/enums/environmental_health/exposures.py +265 -0
  90. valuesets/enums/geography/__init__.py +1 -0
  91. valuesets/enums/geography/geographic_codes.py +741 -0
  92. valuesets/enums/health/__init__.py +12 -0
  93. valuesets/enums/health/vaccination.py +98 -0
  94. valuesets/enums/health.py +36 -0
  95. valuesets/enums/health_base.py +36 -0
  96. valuesets/enums/healthcare.py +45 -0
  97. valuesets/enums/industry/__init__.py +1 -0
  98. valuesets/enums/industry/extractive_industry.py +94 -0
  99. valuesets/enums/industry/mining.py +388 -0
  100. valuesets/enums/industry/safety_colors.py +201 -0
  101. valuesets/enums/investigation.py +27 -0
  102. valuesets/enums/materials_science/__init__.py +1 -0
  103. valuesets/enums/materials_science/characterization_methods.py +112 -0
  104. valuesets/enums/materials_science/crystal_structures.py +76 -0
  105. valuesets/enums/materials_science/material_properties.py +119 -0
  106. valuesets/enums/materials_science/material_types.py +104 -0
  107. valuesets/enums/materials_science/pigments_dyes.py +198 -0
  108. valuesets/enums/materials_science/synthesis_methods.py +109 -0
  109. valuesets/enums/medical/__init__.py +1 -0
  110. valuesets/enums/medical/clinical.py +277 -0
  111. valuesets/enums/medical/neuroimaging.py +119 -0
  112. valuesets/enums/mining_processing.py +302 -0
  113. valuesets/enums/physics/__init__.py +1 -0
  114. valuesets/enums/physics/states_of_matter.py +46 -0
  115. valuesets/enums/social/__init__.py +1 -0
  116. valuesets/enums/social/person_status.py +29 -0
  117. valuesets/enums/spatial/__init__.py +1 -0
  118. valuesets/enums/spatial/spatial_qualifiers.py +246 -0
  119. valuesets/enums/statistics/__init__.py +5 -0
  120. valuesets/enums/statistics/prediction_outcomes.py +31 -0
  121. valuesets/enums/statistics.py +31 -0
  122. valuesets/enums/time/__init__.py +1 -0
  123. valuesets/enums/time/temporal.py +254 -0
  124. valuesets/enums/units/__init__.py +1 -0
  125. valuesets/enums/units/measurements.py +310 -0
  126. valuesets/enums/visual/__init__.py +1 -0
  127. valuesets/enums/visual/colors.py +376 -0
  128. valuesets/generators/__init__.py +19 -0
  129. valuesets/generators/auto_slot_injector.py +280 -0
  130. valuesets/generators/enhanced_pydantic_generator.py +100 -0
  131. valuesets/generators/enum_slot_generator.py +201 -0
  132. valuesets/generators/modular_rich_generator.py +353 -0
  133. valuesets/generators/prefix_standardizer.py +198 -0
  134. valuesets/generators/rich_enum.py +127 -0
  135. valuesets/generators/rich_pydantic_generator.py +310 -0
  136. valuesets/generators/smart_slot_syncer.py +428 -0
  137. valuesets/generators/sssom_generator.py +394 -0
  138. valuesets/merged/merged_hierarchy.yaml +21649 -0
  139. valuesets/schema/README.md +3 -0
  140. valuesets/schema/academic/research.yaml +911 -0
  141. valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
  142. valuesets/schema/bio/bio_entities.yaml +364 -0
  143. valuesets/schema/bio/biological_colors.yaml +434 -0
  144. valuesets/schema/bio/cell_cycle.yaml +309 -0
  145. valuesets/schema/bio/currency_chemicals.yaml +70 -0
  146. valuesets/schema/bio/developmental_stages.yaml +226 -0
  147. valuesets/schema/bio/genome_features.yaml +342 -0
  148. valuesets/schema/bio/genomics.yaml +101 -0
  149. valuesets/schema/bio/go_aspect.yaml +39 -0
  150. valuesets/schema/bio/go_causality.yaml +119 -0
  151. valuesets/schema/bio/go_evidence.yaml +215 -0
  152. valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
  153. valuesets/schema/bio/insdc_missing_values.yaml +85 -0
  154. valuesets/schema/bio/lipid_categories.yaml +72 -0
  155. valuesets/schema/bio/plant_biology.yaml +125 -0
  156. valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
  157. valuesets/schema/bio/plant_sex.yaml +108 -0
  158. valuesets/schema/bio/protein_evidence.yaml +63 -0
  159. valuesets/schema/bio/proteomics_standards.yaml +116 -0
  160. valuesets/schema/bio/psi_mi.yaml +400 -0
  161. valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
  162. valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
  163. valuesets/schema/bio/sequence_chemistry.yaml +477 -0
  164. valuesets/schema/bio/sequencing_platforms.yaml +515 -0
  165. valuesets/schema/bio/structural_biology.yaml +428 -0
  166. valuesets/schema/bio/taxonomy.yaml +453 -0
  167. valuesets/schema/bio/trophic_levels.yaml +118 -0
  168. valuesets/schema/bio/uniprot_species.yaml +1209 -0
  169. valuesets/schema/bio/viral_genome_types.yaml +99 -0
  170. valuesets/schema/bioprocessing/scale_up.yaml +458 -0
  171. valuesets/schema/business/human_resources.yaml +752 -0
  172. valuesets/schema/business/industry_classifications.yaml +448 -0
  173. valuesets/schema/business/management_operations.yaml +602 -0
  174. valuesets/schema/business/organizational_structures.yaml +645 -0
  175. valuesets/schema/business/quality_management.yaml +502 -0
  176. valuesets/schema/business/supply_chain.yaml +688 -0
  177. valuesets/schema/chemistry/chemical_entities.yaml +639 -0
  178. valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
  179. valuesets/schema/chemistry/reactions.yaml +442 -0
  180. valuesets/schema/clinical/nih_demographics.yaml +285 -0
  181. valuesets/schema/clinical/phenopackets.yaml +429 -0
  182. valuesets/schema/computing/file_formats.yaml +631 -0
  183. valuesets/schema/computing/maturity_levels.yaml +229 -0
  184. valuesets/schema/computing/mime_types.yaml +266 -0
  185. valuesets/schema/confidence_levels.yaml +206 -0
  186. valuesets/schema/contributor.yaml +30 -0
  187. valuesets/schema/core.yaml +55 -0
  188. valuesets/schema/data/data_absent_reason.yaml +82 -0
  189. valuesets/schema/data_science/binary_classification.yaml +125 -0
  190. valuesets/schema/data_science/emotion_classification.yaml +109 -0
  191. valuesets/schema/data_science/priority_severity.yaml +122 -0
  192. valuesets/schema/data_science/quality_control.yaml +68 -0
  193. valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
  194. valuesets/schema/data_science/text_classification.yaml +135 -0
  195. valuesets/schema/demographics.yaml +238 -0
  196. valuesets/schema/ecological_interactions.yaml +298 -0
  197. valuesets/schema/energy/energy.yaml +595 -0
  198. valuesets/schema/energy/fossil_fuels.yaml +28 -0
  199. valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
  200. valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
  201. valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
  202. valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
  203. valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
  204. valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
  205. valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
  206. valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
  207. valuesets/schema/environmental_health/exposures.yaml +355 -0
  208. valuesets/schema/generated_slots.yaml +1828 -0
  209. valuesets/schema/geography/geographic_codes.yaml +1018 -0
  210. valuesets/schema/health/vaccination.yaml +102 -0
  211. valuesets/schema/health.yaml +38 -0
  212. valuesets/schema/healthcare.yaml +53 -0
  213. valuesets/schema/industry/extractive_industry.yaml +89 -0
  214. valuesets/schema/industry/mining.yaml +888 -0
  215. valuesets/schema/industry/safety_colors.yaml +375 -0
  216. valuesets/schema/investigation.yaml +64 -0
  217. valuesets/schema/materials_science/characterization_methods.yaml +193 -0
  218. valuesets/schema/materials_science/crystal_structures.yaml +138 -0
  219. valuesets/schema/materials_science/material_properties.yaml +135 -0
  220. valuesets/schema/materials_science/material_types.yaml +151 -0
  221. valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
  222. valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
  223. valuesets/schema/medical/clinical.yaml +610 -0
  224. valuesets/schema/medical/neuroimaging.yaml +325 -0
  225. valuesets/schema/mining_processing.yaml +295 -0
  226. valuesets/schema/physics/states_of_matter.yaml +46 -0
  227. valuesets/schema/slot_mixins.yaml +143 -0
  228. valuesets/schema/social/person_status.yaml +28 -0
  229. valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
  230. valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
  231. valuesets/schema/statistics.yaml +34 -0
  232. valuesets/schema/time/temporal.yaml +435 -0
  233. valuesets/schema/types.yaml +15 -0
  234. valuesets/schema/units/measurements.yaml +675 -0
  235. valuesets/schema/valuesets.yaml +100 -0
  236. valuesets/schema/visual/colors.yaml +778 -0
  237. valuesets/utils/__init__.py +6 -0
  238. valuesets/utils/comparison.py +102 -0
  239. valuesets/utils/expand_dynamic_enums.py +414 -0
  240. valuesets/utils/mapping_utils.py +236 -0
  241. valuesets/validators/__init__.py +11 -0
  242. valuesets/validators/enum_evaluator.py +669 -0
  243. valuesets/validators/oak_config.yaml +70 -0
  244. valuesets/validators/validate_with_ols.py +241 -0
  245. valuesets-0.3.1.dist-info/METADATA +395 -0
  246. valuesets-0.3.1.dist-info/RECORD +248 -0
  247. valuesets-0.3.1.dist-info/WHEEL +4 -0
  248. valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,280 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Automatically inject slot definitions for enums into LinkML schemas.
4
+
5
+ This script can:
6
+ 1. Add a slots section to schemas that define enums
7
+ 2. Generate appropriate slot definitions with correct ranges
8
+ 3. Optionally create mixin classes that bundle related slots
9
+ """
10
+
11
+ import yaml
12
+ from pathlib import Path
13
+ from typing import Dict, Any, List, Optional, Set
14
+ import re
15
+ import click
16
+ from collections import OrderedDict
17
+
18
+
19
+ class SlotInjector:
20
+ """Utility to inject slots for enums into LinkML schemas."""
21
+
22
+ @staticmethod
23
+ def camel_to_snake(name: str) -> str:
24
+ """Convert CamelCase to snake_case."""
25
+ s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
26
+ return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
27
+
28
+ @staticmethod
29
+ def snake_to_words(name: str) -> str:
30
+ """Convert snake_case to human readable words."""
31
+ return name.replace('_', ' ')
32
+
33
+ def generate_slot_name(self, enum_name: str) -> str:
34
+ """Generate slot name from enum name."""
35
+ # Remove common suffixes
36
+ slot_name = enum_name
37
+ for suffix in ['Enum', 'Type', 'Class', 'Code']:
38
+ if slot_name.endswith(suffix):
39
+ slot_name = slot_name[:-len(suffix)]
40
+ break
41
+
42
+ return self.camel_to_snake(slot_name)
43
+
44
+ def generate_slot_definition(self, enum_name: str,
45
+ enum_def: Dict[str, Any]) -> Dict[str, Any]:
46
+ """Generate a complete slot definition for an enum."""
47
+ slot_name = self.generate_slot_name(enum_name)
48
+
49
+ # Extract description from enum
50
+ enum_desc = enum_def.get('description', '')
51
+ if enum_desc:
52
+ # Clean up multi-line descriptions
53
+ enum_desc = ' '.join(enum_desc.split())
54
+ # Get first sentence
55
+ first_sentence = enum_desc.split('.')[0]
56
+ slot_desc = first_sentence[:1].upper() + first_sentence[1:] if first_sentence else ''
57
+ else:
58
+ # Generate generic description
59
+ readable_name = self.snake_to_words(slot_name)
60
+ slot_desc = f"The {readable_name} classification"
61
+
62
+ slot_def = {
63
+ 'description': slot_desc,
64
+ 'range': enum_name
65
+ }
66
+
67
+ # Check if enum has many values (might be multivalued)
68
+ if 'permissible_values' in enum_def:
69
+ num_values = len(enum_def['permissible_values'])
70
+ # If it's a target/feature/metric type enum, might be multivalued
71
+ if any(keyword in slot_name for keyword in ['target', 'feature', 'metric', 'constraint']):
72
+ slot_def['multivalued'] = True
73
+ slot_def['comments'] = [f"Multiple {self.snake_to_words(slot_name)}s may apply"]
74
+
75
+ return slot_def
76
+
77
+ def inject_slots_into_schema(self, schema_path: Path,
78
+ output_path: Optional[Path] = None,
79
+ preserve_existing: bool = True) -> Dict[str, Any]:
80
+ """
81
+ Inject slots into a schema file.
82
+
83
+ Args:
84
+ schema_path: Path to the input schema
85
+ output_path: Path to write modified schema (if None, prints to stdout)
86
+ preserve_existing: If True, don't override existing slots
87
+
88
+ Returns:
89
+ Dictionary of generated slots
90
+ """
91
+ # Load schema
92
+ with open(schema_path, 'r') as f:
93
+ schema_data = yaml.safe_load(f)
94
+
95
+ # Skip if no enums
96
+ if 'enums' not in schema_data:
97
+ return {}
98
+
99
+ # Initialize or get existing slots
100
+ if 'slots' not in schema_data:
101
+ schema_data['slots'] = {}
102
+
103
+ generated_slots = {}
104
+
105
+ # Generate slots for each enum
106
+ for enum_name, enum_def in schema_data['enums'].items():
107
+ slot_name = self.generate_slot_name(enum_name)
108
+
109
+ # Skip if exists and preserving
110
+ if preserve_existing and slot_name in schema_data['slots']:
111
+ continue
112
+
113
+ slot_def = self.generate_slot_definition(enum_name, enum_def)
114
+ generated_slots[slot_name] = slot_def
115
+ schema_data['slots'][slot_name] = slot_def
116
+
117
+ # Write output
118
+ if output_path:
119
+ self.write_schema(schema_data, output_path)
120
+ else:
121
+ # Just return for preview
122
+ return generated_slots
123
+
124
+ return generated_slots
125
+
126
+ def write_schema(self, schema_data: Dict[str, Any], output_path: Path):
127
+ """Write schema preserving key order."""
128
+ # Define preferred key order
129
+ key_order = [
130
+ 'name', 'title', 'description', 'id', 'version', 'status',
131
+ 'imports', 'prefixes', 'default_prefix', 'default_curi_maps',
132
+ 'slots', 'classes', 'enums'
133
+ ]
134
+
135
+ # Create ordered dict
136
+ ordered_data = OrderedDict()
137
+
138
+ # Add keys in preferred order
139
+ for key in key_order:
140
+ if key in schema_data:
141
+ ordered_data[key] = schema_data[key]
142
+
143
+ # Add any remaining keys
144
+ for key in schema_data:
145
+ if key not in ordered_data:
146
+ ordered_data[key] = schema_data[key]
147
+
148
+ # Write with nice formatting
149
+ with open(output_path, 'w') as f:
150
+ yaml.dump(dict(ordered_data), f,
151
+ default_flow_style=False,
152
+ sort_keys=False,
153
+ allow_unicode=True,
154
+ width=120)
155
+
156
+ def generate_typed_slots_schema(self, schema_dir: Path,
157
+ output_path: Path) -> None:
158
+ """
159
+ Generate a comprehensive slots schema from all enums in a directory.
160
+
161
+ This creates a single schema file with all slot definitions that
162
+ reference the appropriate enums.
163
+ """
164
+ all_slots = {}
165
+ enum_to_module = {} # Track which module each enum comes from
166
+
167
+ # Scan all schema files
168
+ yaml_files = list(schema_dir.rglob("*.yaml"))
169
+
170
+ for yaml_file in yaml_files:
171
+ # Skip meta files
172
+ if yaml_file.name in ['linkml-meta.yaml', 'types.yaml', 'slot_mixins.yaml']:
173
+ continue
174
+
175
+ try:
176
+ with open(yaml_file, 'r') as f:
177
+ schema_data = yaml.safe_load(f)
178
+
179
+ if 'enums' not in schema_data:
180
+ continue
181
+
182
+ # Get module name from schema
183
+ module_name = schema_data.get('name', yaml_file.stem)
184
+
185
+ # Process each enum
186
+ for enum_name, enum_def in schema_data['enums'].items():
187
+ slot_name = self.generate_slot_name(enum_name)
188
+ slot_def = self.generate_slot_definition(enum_name, enum_def)
189
+
190
+ # Add module reference
191
+ slot_def['comments'] = slot_def.get('comments', [])
192
+ slot_def['comments'].append(f"Defined in module: {module_name}")
193
+
194
+ all_slots[slot_name] = slot_def
195
+ enum_to_module[enum_name] = module_name
196
+
197
+ except Exception as e:
198
+ print(f"Error processing {yaml_file}: {e}")
199
+ continue
200
+
201
+ # Create comprehensive slots schema
202
+ slots_schema = {
203
+ 'name': 'generated_slots',
204
+ 'title': 'Auto-generated Slots for Value Sets',
205
+ 'description': 'Automatically generated slot definitions for all enums in the value sets collection.',
206
+ 'id': 'https://w3id.org/linkml-common/generated-slots',
207
+ 'version': '1.0.0',
208
+ 'status': 'release',
209
+ 'imports': ['linkml:types'],
210
+ 'prefixes': {
211
+ 'linkml': 'https://w3id.org/linkml/',
212
+ 'cval': 'https://w3id.org/linkml-common/'
213
+ },
214
+ 'default_prefix': 'cval',
215
+ 'default_curi_maps': ['semweb_context'],
216
+ 'slots': all_slots
217
+ }
218
+
219
+ # Write the slots schema
220
+ self.write_schema(slots_schema, output_path)
221
+ print(f"Generated {len(all_slots)} slot definitions in {output_path}")
222
+
223
+
224
+ @click.command()
225
+ @click.argument('schema_path', type=click.Path(exists=True, path_type=Path))
226
+ @click.option('--output', '-o', type=click.Path(path_type=Path),
227
+ help='Output path for modified schema or generated slots file')
228
+ @click.option('--mode', '-m',
229
+ type=click.Choice(['inject', 'generate', 'preview']),
230
+ default='preview',
231
+ help='Mode: inject (modify files), generate (create slots file), preview (dry run)')
232
+ @click.option('--preserve/--overwrite', default=True,
233
+ help='Preserve existing slots when injecting')
234
+ def main(schema_path: Path, output: Optional[Path], mode: str, preserve: bool):
235
+ """
236
+ Generate or inject LinkML slots for enums.
237
+
238
+ SCHEMA_PATH: Path to schema file or directory
239
+ """
240
+ injector = SlotInjector()
241
+
242
+ if mode == 'inject':
243
+ if schema_path.is_file():
244
+ # Single file injection
245
+ if not output:
246
+ output = schema_path # In-place modification
247
+ slots = injector.inject_slots_into_schema(
248
+ schema_path, output, preserve_existing=preserve
249
+ )
250
+ print(f"Injected {len(slots)} slots into {output}")
251
+ else:
252
+ print("Inject mode requires a single file. Use generate mode for directories.")
253
+
254
+ elif mode == 'generate':
255
+ if schema_path.is_dir():
256
+ # Generate comprehensive slots file
257
+ if not output:
258
+ output = schema_path / 'generated_slots.yaml'
259
+ injector.generate_typed_slots_schema(schema_path, output)
260
+ else:
261
+ print("Generate mode requires a directory.")
262
+
263
+ elif mode == 'preview':
264
+ # Preview mode - just show what would be generated
265
+ if schema_path.is_file():
266
+ slots = injector.inject_slots_into_schema(
267
+ schema_path, None, preserve_existing=preserve
268
+ )
269
+ print(f"Would generate {len(slots)} slots:")
270
+ for slot_name, slot_def in slots.items():
271
+ print(f" - {slot_name}: {slot_def.get('description', 'No description')}")
272
+ print(f" Range: {slot_def.get('range')}")
273
+ if slot_def.get('multivalued'):
274
+ print(f" Multivalued: true")
275
+ else:
276
+ print("Preview mode requires a single file.")
277
+
278
+
279
+ if __name__ == '__main__':
280
+ main()
@@ -0,0 +1,100 @@
1
+ """
2
+ Enhanced Pydantic Generator that includes metadata (meanings, annotations) for enum values.
3
+
4
+ This custom generator extends the LinkML PydanticGenerator to pass additional
5
+ metadata fields to the templates, enabling rich enum generation with ontology
6
+ mappings and annotations.
7
+ """
8
+
9
+ from typing import Dict, Any, Optional
10
+ from dataclasses import dataclass
11
+ from linkml.generators.pydanticgen import PydanticGenerator
12
+ from linkml.generators.pydanticgen.template import PydanticEnum, EnumValue
13
+ from linkml_runtime.linkml_model.meta import EnumDefinition, PermissibleValue
14
+
15
+
16
+ @dataclass
17
+ class EnhancedEnumValue(EnumValue):
18
+ """Extended EnumValue that includes meaning and annotations fields."""
19
+ meaning: Optional[str] = None
20
+ annotations: Optional[Dict[str, Any]] = None
21
+
22
+
23
+ class EnhancedPydanticGenerator(PydanticGenerator):
24
+ """
25
+ Enhanced Pydantic generator that preserves enum metadata.
26
+
27
+ This generator extends the base PydanticGenerator to include
28
+ meaning (ontology mappings) and annotations in the enum template context.
29
+ """
30
+
31
+ def generate_enums(self) -> None:
32
+ """Generate enums with enhanced metadata."""
33
+ enums = self.schemaview.all_enums()
34
+
35
+ for enum_name, enum_def in enums.items():
36
+ if enum_def.permissible_values:
37
+ # Create enhanced enum values with metadata
38
+ enum_values = {}
39
+ for pv_name, pv in enum_def.permissible_values.items():
40
+ # Convert permissible value to enhanced enum value
41
+ label = self._get_enum_label(pv_name, pv)
42
+ value = pv.text if pv.text is not None else pv_name
43
+
44
+ # Create enhanced enum value with all metadata
45
+ enhanced_value = EnhancedEnumValue(
46
+ label=label,
47
+ value=value,
48
+ description=pv.description,
49
+ meaning=pv.meaning,
50
+ annotations=dict(pv.annotations) if pv.annotations else None
51
+ )
52
+ enum_values[pv_name] = enhanced_value
53
+
54
+ # Create the enum with enhanced values
55
+ enum_model = PydanticEnum(
56
+ name=self._get_class_name(enum_name),
57
+ description=enum_def.description,
58
+ values=enum_values
59
+ )
60
+
61
+ # Add to the module's enums
62
+ self.enums[enum_name] = enum_model
63
+
64
+ def _get_enum_label(self, pv_name: str, pv: PermissibleValue) -> str:
65
+ """Get the label for an enum value."""
66
+ # Use the same logic as the base generator
67
+ if hasattr(self, 'camelcase_enums') and self.camelcase_enums:
68
+ from linkml.utils.formatutils import camelcase
69
+ return camelcase(pv_name)
70
+ else:
71
+ # Convert to valid Python identifier
72
+ import re
73
+ label = re.sub(r'[^a-zA-Z0-9_]', '_', pv_name)
74
+ if label[0].isdigit():
75
+ label = f'_{label}'
76
+ return label
77
+
78
+ def _get_class_name(self, name: str) -> str:
79
+ """Get the class name for an enum."""
80
+ # Use the base generator's logic if available
81
+ if hasattr(super(), '_get_class_name'):
82
+ return super()._get_class_name(name)
83
+ else:
84
+ # Default: just return the name as-is
85
+ return name
86
+
87
+
88
+ def generate_enhanced_pydantic(schema_path: str, **kwargs) -> str:
89
+ """
90
+ Generate enhanced Pydantic models with metadata-rich enums.
91
+
92
+ Args:
93
+ schema_path: Path to the LinkML schema file
94
+ **kwargs: Additional arguments to pass to the generator
95
+
96
+ Returns:
97
+ Generated Python code as a string
98
+ """
99
+ generator = EnhancedPydanticGenerator(schema_path, **kwargs)
100
+ return generator.serialize()
@@ -0,0 +1,201 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Utility to generate LinkML slots for each enum in a schema.
4
+
5
+ For each enum, creates a corresponding slot with:
6
+ - Slot name: enum name (removing 'Enum' suffix if present)
7
+ - Range: the enum itself
8
+ - Description: auto-generated from enum description
9
+ """
10
+
11
+ import yaml
12
+ from pathlib import Path
13
+ from typing import Dict, Any, List, Optional
14
+ import re
15
+ import click
16
+ from linkml_runtime.utils.schemaview import SchemaView
17
+ from linkml_runtime.linkml_model import SchemaDefinition
18
+
19
+
20
+ def camel_to_snake(name: str) -> str:
21
+ """Convert CamelCase to snake_case."""
22
+ # Insert underscore before uppercase letters that follow lowercase letters
23
+ s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
24
+ # Insert underscore before uppercase letters that follow lowercase or uppercase letters
25
+ return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
26
+
27
+
28
+ def generate_slot_name(enum_name: str) -> str:
29
+ """
30
+ Generate a slot name from an enum name.
31
+
32
+ Rules:
33
+ - Remove 'Enum' suffix if present
34
+ - Remove 'Type' suffix if present
35
+ - Convert to snake_case
36
+ """
37
+ # Remove common suffixes
38
+ slot_name = enum_name
39
+ if slot_name.endswith('Enum'):
40
+ slot_name = slot_name[:-4]
41
+ elif slot_name.endswith('Type'):
42
+ slot_name = slot_name[:-4]
43
+
44
+ # Convert to snake_case
45
+ return camel_to_snake(slot_name)
46
+
47
+
48
+ def generate_slot_description(enum_name: str, enum_desc: Optional[str]) -> str:
49
+ """Generate a description for the slot based on the enum."""
50
+ if enum_desc:
51
+ # Use first sentence of enum description
52
+ first_sentence = enum_desc.split('.')[0]
53
+ return f"The {generate_slot_name(enum_name).replace('_', ' ')} classification"
54
+ else:
55
+ return f"The {generate_slot_name(enum_name).replace('_', ' ')} for this entity"
56
+
57
+
58
+ def generate_slots_for_schema(schema_path: Path, in_place: bool = False,
59
+ output_path: Optional[Path] = None) -> Dict[str, Any]:
60
+ """
61
+ Generate slots for all enums in a schema.
62
+
63
+ Args:
64
+ schema_path: Path to the LinkML schema YAML file
65
+ in_place: If True, modify the schema file in place
66
+ output_path: If provided, write to this path instead
67
+
68
+ Returns:
69
+ Dictionary of generated slots
70
+ """
71
+ # Load schema
72
+ with open(schema_path, 'r') as f:
73
+ schema_data = yaml.safe_load(f)
74
+
75
+ # Check if schema has enums
76
+ if 'enums' not in schema_data or not schema_data['enums']:
77
+ print(f"No enums found in {schema_path}")
78
+ return {}
79
+
80
+ # Initialize slots section if not present
81
+ if 'slots' not in schema_data:
82
+ schema_data['slots'] = {}
83
+
84
+ generated_slots = {}
85
+
86
+ # Generate slot for each enum
87
+ for enum_name, enum_def in schema_data['enums'].items():
88
+ slot_name = generate_slot_name(enum_name)
89
+
90
+ # Skip if slot already exists
91
+ if slot_name in schema_data['slots']:
92
+ print(f" Slot '{slot_name}' already exists, skipping")
93
+ continue
94
+
95
+ # Create slot definition
96
+ slot_def = {
97
+ 'description': generate_slot_description(enum_name, enum_def.get('description')),
98
+ 'range': enum_name
99
+ }
100
+
101
+ # Add optional fields if useful
102
+ if enum_def.get('description'):
103
+ # Add a more detailed description if available
104
+ slot_def['comments'] = [f"Value set: {enum_name}"]
105
+
106
+ generated_slots[slot_name] = slot_def
107
+ schema_data['slots'][slot_name] = slot_def
108
+ print(f" Generated slot '{slot_name}' for enum '{enum_name}'")
109
+
110
+ # Write output if requested
111
+ if in_place or output_path:
112
+ output_file = schema_path if in_place else output_path
113
+
114
+ # Preserve order and formatting as much as possible
115
+ with open(output_file, 'w') as f:
116
+ yaml.dump(schema_data, f,
117
+ default_flow_style=False,
118
+ sort_keys=False,
119
+ allow_unicode=True,
120
+ width=120)
121
+ print(f"Updated schema written to {output_file}")
122
+
123
+ return generated_slots
124
+
125
+
126
+ def process_directory(schema_dir: Path, in_place: bool = False,
127
+ output_dir: Optional[Path] = None) -> None:
128
+ """
129
+ Process all schema files in a directory.
130
+
131
+ Args:
132
+ schema_dir: Directory containing LinkML schema files
133
+ in_place: If True, modify files in place
134
+ output_dir: If provided, write modified schemas to this directory
135
+ """
136
+ # Find all YAML files
137
+ yaml_files = list(schema_dir.rglob("*.yaml")) + list(schema_dir.rglob("*.yml"))
138
+
139
+ print(f"Found {len(yaml_files)} YAML files in {schema_dir}")
140
+
141
+ total_slots = 0
142
+ processed_files = 0
143
+
144
+ for yaml_file in yaml_files:
145
+ # Skip certain files
146
+ if yaml_file.name in ['linkml-meta.yaml', 'meta.yaml', 'types.yaml']:
147
+ continue
148
+
149
+ print(f"\nProcessing {yaml_file.relative_to(schema_dir)}...")
150
+
151
+ try:
152
+ # Determine output path
153
+ output_path = None
154
+ if output_dir and not in_place:
155
+ # Maintain directory structure in output
156
+ rel_path = yaml_file.relative_to(schema_dir)
157
+ output_path = output_dir / rel_path
158
+ output_path.parent.mkdir(parents=True, exist_ok=True)
159
+
160
+ # Generate slots
161
+ slots = generate_slots_for_schema(yaml_file, in_place=in_place,
162
+ output_path=output_path)
163
+
164
+ if slots:
165
+ total_slots += len(slots)
166
+ processed_files += 1
167
+
168
+ except Exception as e:
169
+ print(f" Error processing {yaml_file}: {e}")
170
+
171
+ print(f"\n{'='*50}")
172
+ print(f"Summary: Generated {total_slots} slots across {processed_files} files")
173
+
174
+
175
+ @click.command()
176
+ @click.argument('schema_path', type=click.Path(exists=True, path_type=Path))
177
+ @click.option('--in-place', '-i', is_flag=True,
178
+ help='Modify schema files in place')
179
+ @click.option('--output', '-o', type=click.Path(path_type=Path),
180
+ help='Output directory for modified schemas')
181
+ @click.option('--single-file', '-s', is_flag=True,
182
+ help='Process single file instead of directory')
183
+ def main(schema_path: Path, in_place: bool, output: Optional[Path], single_file: bool):
184
+ """
185
+ Generate LinkML slots for enums in schema files.
186
+
187
+ SCHEMA_PATH: Path to schema file or directory
188
+ """
189
+ if single_file or schema_path.is_file():
190
+ # Process single file
191
+ print(f"Processing single file: {schema_path}")
192
+ slots = generate_slots_for_schema(schema_path, in_place=in_place,
193
+ output_path=output)
194
+ print(f"Generated {len(slots)} slots")
195
+ else:
196
+ # Process directory
197
+ process_directory(schema_path, in_place=in_place, output_dir=output)
198
+
199
+
200
+ if __name__ == '__main__':
201
+ main()