valuesets 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valuesets might be problematic. Click here for more details.

Files changed (248) hide show
  1. valuesets/__init__.py +7 -0
  2. valuesets/_version.py +8 -0
  3. valuesets/datamodel/valuesets.py +13796 -0
  4. valuesets/datamodel/valuesets_dataclass.py +24503 -0
  5. valuesets/datamodel/valuesets_pydantic.py +13796 -0
  6. valuesets/enums/__init__.py +590 -0
  7. valuesets/enums/academic/__init__.py +1 -0
  8. valuesets/enums/academic/research.py +559 -0
  9. valuesets/enums/analytical_chemistry/__init__.py +1 -0
  10. valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
  11. valuesets/enums/bio/__init__.py +1 -0
  12. valuesets/enums/bio/biological_colors.py +238 -0
  13. valuesets/enums/bio/cell_cycle.py +180 -0
  14. valuesets/enums/bio/currency_chemicals.py +52 -0
  15. valuesets/enums/bio/developmental_stages.py +103 -0
  16. valuesets/enums/bio/genome_features.py +182 -0
  17. valuesets/enums/bio/genomics.py +91 -0
  18. valuesets/enums/bio/go_aspect.py +32 -0
  19. valuesets/enums/bio/go_causality.py +58 -0
  20. valuesets/enums/bio/go_evidence.py +129 -0
  21. valuesets/enums/bio/human_developmental_stages.py +62 -0
  22. valuesets/enums/bio/insdc_geographic_locations.py +591 -0
  23. valuesets/enums/bio/insdc_missing_values.py +49 -0
  24. valuesets/enums/bio/lipid_categories.py +67 -0
  25. valuesets/enums/bio/mouse_developmental_stages.py +62 -0
  26. valuesets/enums/bio/plant_biology.py +86 -0
  27. valuesets/enums/bio/plant_developmental_stages.py +54 -0
  28. valuesets/enums/bio/plant_sex.py +81 -0
  29. valuesets/enums/bio/protein_evidence.py +61 -0
  30. valuesets/enums/bio/proteomics_standards.py +123 -0
  31. valuesets/enums/bio/psi_mi.py +306 -0
  32. valuesets/enums/bio/relationship_to_oxygen.py +37 -0
  33. valuesets/enums/bio/sequence_alphabets.py +449 -0
  34. valuesets/enums/bio/sequence_chemistry.py +357 -0
  35. valuesets/enums/bio/sequencing_platforms.py +302 -0
  36. valuesets/enums/bio/structural_biology.py +320 -0
  37. valuesets/enums/bio/taxonomy.py +238 -0
  38. valuesets/enums/bio/trophic_levels.py +85 -0
  39. valuesets/enums/bio/uniprot_species.py +344 -0
  40. valuesets/enums/bio/viral_genome_types.py +47 -0
  41. valuesets/enums/bioprocessing/__init__.py +1 -0
  42. valuesets/enums/bioprocessing/scale_up.py +249 -0
  43. valuesets/enums/business/__init__.py +1 -0
  44. valuesets/enums/business/human_resources.py +275 -0
  45. valuesets/enums/business/industry_classifications.py +181 -0
  46. valuesets/enums/business/management_operations.py +228 -0
  47. valuesets/enums/business/organizational_structures.py +236 -0
  48. valuesets/enums/business/quality_management.py +181 -0
  49. valuesets/enums/business/supply_chain.py +232 -0
  50. valuesets/enums/chemistry/__init__.py +1 -0
  51. valuesets/enums/chemistry/chemical_entities.py +315 -0
  52. valuesets/enums/chemistry/reaction_directionality.py +65 -0
  53. valuesets/enums/chemistry/reactions.py +256 -0
  54. valuesets/enums/clinical/__init__.py +1 -0
  55. valuesets/enums/clinical/nih_demographics.py +177 -0
  56. valuesets/enums/clinical/phenopackets.py +254 -0
  57. valuesets/enums/common_value_sets.py +8791 -0
  58. valuesets/enums/computing/__init__.py +1 -0
  59. valuesets/enums/computing/file_formats.py +294 -0
  60. valuesets/enums/computing/maturity_levels.py +196 -0
  61. valuesets/enums/computing/mime_types.py +227 -0
  62. valuesets/enums/confidence_levels.py +168 -0
  63. valuesets/enums/contributor.py +30 -0
  64. valuesets/enums/core.py +42 -0
  65. valuesets/enums/data/__init__.py +1 -0
  66. valuesets/enums/data/data_absent_reason.py +53 -0
  67. valuesets/enums/data_science/__init__.py +1 -0
  68. valuesets/enums/data_science/binary_classification.py +87 -0
  69. valuesets/enums/data_science/emotion_classification.py +66 -0
  70. valuesets/enums/data_science/priority_severity.py +73 -0
  71. valuesets/enums/data_science/quality_control.py +46 -0
  72. valuesets/enums/data_science/sentiment_analysis.py +50 -0
  73. valuesets/enums/data_science/text_classification.py +97 -0
  74. valuesets/enums/demographics.py +206 -0
  75. valuesets/enums/ecological_interactions.py +151 -0
  76. valuesets/enums/energy/__init__.py +1 -0
  77. valuesets/enums/energy/energy.py +343 -0
  78. valuesets/enums/energy/fossil_fuels.py +29 -0
  79. valuesets/enums/energy/nuclear/__init__.py +1 -0
  80. valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
  81. valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
  82. valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
  83. valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
  84. valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
  85. valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
  86. valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
  87. valuesets/enums/energy/nuclear/reactor_types.py +163 -0
  88. valuesets/enums/environmental_health/__init__.py +1 -0
  89. valuesets/enums/environmental_health/exposures.py +265 -0
  90. valuesets/enums/geography/__init__.py +1 -0
  91. valuesets/enums/geography/geographic_codes.py +741 -0
  92. valuesets/enums/health/__init__.py +12 -0
  93. valuesets/enums/health/vaccination.py +98 -0
  94. valuesets/enums/health.py +36 -0
  95. valuesets/enums/health_base.py +36 -0
  96. valuesets/enums/healthcare.py +45 -0
  97. valuesets/enums/industry/__init__.py +1 -0
  98. valuesets/enums/industry/extractive_industry.py +94 -0
  99. valuesets/enums/industry/mining.py +388 -0
  100. valuesets/enums/industry/safety_colors.py +201 -0
  101. valuesets/enums/investigation.py +27 -0
  102. valuesets/enums/materials_science/__init__.py +1 -0
  103. valuesets/enums/materials_science/characterization_methods.py +112 -0
  104. valuesets/enums/materials_science/crystal_structures.py +76 -0
  105. valuesets/enums/materials_science/material_properties.py +119 -0
  106. valuesets/enums/materials_science/material_types.py +104 -0
  107. valuesets/enums/materials_science/pigments_dyes.py +198 -0
  108. valuesets/enums/materials_science/synthesis_methods.py +109 -0
  109. valuesets/enums/medical/__init__.py +1 -0
  110. valuesets/enums/medical/clinical.py +277 -0
  111. valuesets/enums/medical/neuroimaging.py +119 -0
  112. valuesets/enums/mining_processing.py +302 -0
  113. valuesets/enums/physics/__init__.py +1 -0
  114. valuesets/enums/physics/states_of_matter.py +46 -0
  115. valuesets/enums/social/__init__.py +1 -0
  116. valuesets/enums/social/person_status.py +29 -0
  117. valuesets/enums/spatial/__init__.py +1 -0
  118. valuesets/enums/spatial/spatial_qualifiers.py +246 -0
  119. valuesets/enums/statistics/__init__.py +5 -0
  120. valuesets/enums/statistics/prediction_outcomes.py +31 -0
  121. valuesets/enums/statistics.py +31 -0
  122. valuesets/enums/time/__init__.py +1 -0
  123. valuesets/enums/time/temporal.py +254 -0
  124. valuesets/enums/units/__init__.py +1 -0
  125. valuesets/enums/units/measurements.py +310 -0
  126. valuesets/enums/visual/__init__.py +1 -0
  127. valuesets/enums/visual/colors.py +376 -0
  128. valuesets/generators/__init__.py +19 -0
  129. valuesets/generators/auto_slot_injector.py +280 -0
  130. valuesets/generators/enhanced_pydantic_generator.py +100 -0
  131. valuesets/generators/enum_slot_generator.py +201 -0
  132. valuesets/generators/modular_rich_generator.py +353 -0
  133. valuesets/generators/prefix_standardizer.py +198 -0
  134. valuesets/generators/rich_enum.py +127 -0
  135. valuesets/generators/rich_pydantic_generator.py +310 -0
  136. valuesets/generators/smart_slot_syncer.py +428 -0
  137. valuesets/generators/sssom_generator.py +394 -0
  138. valuesets/merged/merged_hierarchy.yaml +21649 -0
  139. valuesets/schema/README.md +3 -0
  140. valuesets/schema/academic/research.yaml +911 -0
  141. valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
  142. valuesets/schema/bio/bio_entities.yaml +364 -0
  143. valuesets/schema/bio/biological_colors.yaml +434 -0
  144. valuesets/schema/bio/cell_cycle.yaml +309 -0
  145. valuesets/schema/bio/currency_chemicals.yaml +70 -0
  146. valuesets/schema/bio/developmental_stages.yaml +226 -0
  147. valuesets/schema/bio/genome_features.yaml +342 -0
  148. valuesets/schema/bio/genomics.yaml +101 -0
  149. valuesets/schema/bio/go_aspect.yaml +39 -0
  150. valuesets/schema/bio/go_causality.yaml +119 -0
  151. valuesets/schema/bio/go_evidence.yaml +215 -0
  152. valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
  153. valuesets/schema/bio/insdc_missing_values.yaml +85 -0
  154. valuesets/schema/bio/lipid_categories.yaml +72 -0
  155. valuesets/schema/bio/plant_biology.yaml +125 -0
  156. valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
  157. valuesets/schema/bio/plant_sex.yaml +108 -0
  158. valuesets/schema/bio/protein_evidence.yaml +63 -0
  159. valuesets/schema/bio/proteomics_standards.yaml +116 -0
  160. valuesets/schema/bio/psi_mi.yaml +400 -0
  161. valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
  162. valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
  163. valuesets/schema/bio/sequence_chemistry.yaml +477 -0
  164. valuesets/schema/bio/sequencing_platforms.yaml +515 -0
  165. valuesets/schema/bio/structural_biology.yaml +428 -0
  166. valuesets/schema/bio/taxonomy.yaml +453 -0
  167. valuesets/schema/bio/trophic_levels.yaml +118 -0
  168. valuesets/schema/bio/uniprot_species.yaml +1209 -0
  169. valuesets/schema/bio/viral_genome_types.yaml +99 -0
  170. valuesets/schema/bioprocessing/scale_up.yaml +458 -0
  171. valuesets/schema/business/human_resources.yaml +752 -0
  172. valuesets/schema/business/industry_classifications.yaml +448 -0
  173. valuesets/schema/business/management_operations.yaml +602 -0
  174. valuesets/schema/business/organizational_structures.yaml +645 -0
  175. valuesets/schema/business/quality_management.yaml +502 -0
  176. valuesets/schema/business/supply_chain.yaml +688 -0
  177. valuesets/schema/chemistry/chemical_entities.yaml +639 -0
  178. valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
  179. valuesets/schema/chemistry/reactions.yaml +442 -0
  180. valuesets/schema/clinical/nih_demographics.yaml +285 -0
  181. valuesets/schema/clinical/phenopackets.yaml +429 -0
  182. valuesets/schema/computing/file_formats.yaml +631 -0
  183. valuesets/schema/computing/maturity_levels.yaml +229 -0
  184. valuesets/schema/computing/mime_types.yaml +266 -0
  185. valuesets/schema/confidence_levels.yaml +206 -0
  186. valuesets/schema/contributor.yaml +30 -0
  187. valuesets/schema/core.yaml +55 -0
  188. valuesets/schema/data/data_absent_reason.yaml +82 -0
  189. valuesets/schema/data_science/binary_classification.yaml +125 -0
  190. valuesets/schema/data_science/emotion_classification.yaml +109 -0
  191. valuesets/schema/data_science/priority_severity.yaml +122 -0
  192. valuesets/schema/data_science/quality_control.yaml +68 -0
  193. valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
  194. valuesets/schema/data_science/text_classification.yaml +135 -0
  195. valuesets/schema/demographics.yaml +238 -0
  196. valuesets/schema/ecological_interactions.yaml +298 -0
  197. valuesets/schema/energy/energy.yaml +595 -0
  198. valuesets/schema/energy/fossil_fuels.yaml +28 -0
  199. valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
  200. valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
  201. valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
  202. valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
  203. valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
  204. valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
  205. valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
  206. valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
  207. valuesets/schema/environmental_health/exposures.yaml +355 -0
  208. valuesets/schema/generated_slots.yaml +1828 -0
  209. valuesets/schema/geography/geographic_codes.yaml +1018 -0
  210. valuesets/schema/health/vaccination.yaml +102 -0
  211. valuesets/schema/health.yaml +38 -0
  212. valuesets/schema/healthcare.yaml +53 -0
  213. valuesets/schema/industry/extractive_industry.yaml +89 -0
  214. valuesets/schema/industry/mining.yaml +888 -0
  215. valuesets/schema/industry/safety_colors.yaml +375 -0
  216. valuesets/schema/investigation.yaml +64 -0
  217. valuesets/schema/materials_science/characterization_methods.yaml +193 -0
  218. valuesets/schema/materials_science/crystal_structures.yaml +138 -0
  219. valuesets/schema/materials_science/material_properties.yaml +135 -0
  220. valuesets/schema/materials_science/material_types.yaml +151 -0
  221. valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
  222. valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
  223. valuesets/schema/medical/clinical.yaml +610 -0
  224. valuesets/schema/medical/neuroimaging.yaml +325 -0
  225. valuesets/schema/mining_processing.yaml +295 -0
  226. valuesets/schema/physics/states_of_matter.yaml +46 -0
  227. valuesets/schema/slot_mixins.yaml +143 -0
  228. valuesets/schema/social/person_status.yaml +28 -0
  229. valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
  230. valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
  231. valuesets/schema/statistics.yaml +34 -0
  232. valuesets/schema/time/temporal.yaml +435 -0
  233. valuesets/schema/types.yaml +15 -0
  234. valuesets/schema/units/measurements.yaml +675 -0
  235. valuesets/schema/valuesets.yaml +100 -0
  236. valuesets/schema/visual/colors.yaml +778 -0
  237. valuesets/utils/__init__.py +6 -0
  238. valuesets/utils/comparison.py +102 -0
  239. valuesets/utils/expand_dynamic_enums.py +414 -0
  240. valuesets/utils/mapping_utils.py +236 -0
  241. valuesets/validators/__init__.py +11 -0
  242. valuesets/validators/enum_evaluator.py +669 -0
  243. valuesets/validators/oak_config.yaml +70 -0
  244. valuesets/validators/validate_with_ols.py +241 -0
  245. valuesets-0.3.1.dist-info/METADATA +395 -0
  246. valuesets-0.3.1.dist-info/RECORD +248 -0
  247. valuesets-0.3.1.dist-info/WHEEL +4 -0
  248. valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,353 @@
1
+ """
2
+ Modular Rich Enum Generator for LinkML Schemas
3
+
4
+ This generator creates modular Python enum files from LinkML schemas,
5
+ maintaining the directory structure and generating one Python module per schema file.
6
+ """
7
+
8
+ import os
9
+ import re
10
+ from pathlib import Path
11
+ from typing import Dict, Any, List, Optional, Set
12
+ from linkml_runtime.utils.schemaview import SchemaView
13
+ from linkml_runtime.linkml_model.meta import EnumDefinition, PermissibleValue
14
+ import logging
15
+
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class ModularRichEnumGenerator:
21
+ """
22
+ Generate modular Python enum files with rich metadata support.
23
+ """
24
+
25
+ def __init__(self, schema_dir: str, output_dir: str):
26
+ self.schema_dir = Path(schema_dir)
27
+ self.output_dir = Path(output_dir)
28
+ self.generated_modules = {} # Track what we generate for __init__.py
29
+
30
+ def generate_all(self):
31
+ """Process all schema files and generate corresponding Python modules."""
32
+ # Find all YAML schema files
33
+ schema_files = list(self.schema_dir.rglob("*.yaml"))
34
+
35
+ # Skip the main valuesets.yaml
36
+ schema_files = [f for f in schema_files if f.name != "valuesets.yaml"]
37
+
38
+ logger.info(f"Found {len(schema_files)} schema files to process")
39
+
40
+ for schema_file in schema_files:
41
+ self.process_schema_file(schema_file)
42
+
43
+ # Generate top-level __init__.py
44
+ self.generate_init_file()
45
+
46
+ def process_schema_file(self, schema_path: Path):
47
+ """Process a single schema file and generate corresponding Python module."""
48
+ # Calculate relative path from schema dir
49
+ relative_path = schema_path.relative_to(self.schema_dir)
50
+
51
+ # Create corresponding output path
52
+ output_path = self.output_dir / relative_path.with_suffix('.py')
53
+
54
+ logger.info(f"Processing {relative_path} -> {output_path.relative_to(self.output_dir.parent.parent)}")
55
+
56
+ try:
57
+ # Load schema
58
+ schema_view = SchemaView(str(schema_path))
59
+
60
+ # Generate Python module
61
+ module_content = self.generate_module(schema_view, relative_path)
62
+
63
+ if module_content:
64
+ # Ensure output directory exists
65
+ output_path.parent.mkdir(parents=True, exist_ok=True)
66
+
67
+ # Write module
68
+ with open(output_path, 'w') as f:
69
+ f.write(module_content)
70
+
71
+ # Track for __init__.py generation
72
+ module_key = str(relative_path.with_suffix('')).replace('/', '.')
73
+ self.generated_modules[module_key] = {
74
+ 'path': relative_path,
75
+ 'enums': self._get_enum_names(schema_view)
76
+ }
77
+
78
+ # Also create __init__.py for subdirectories
79
+ self._ensure_package_structure(output_path.parent)
80
+
81
+ except Exception as e:
82
+ logger.error(f"Error processing {schema_path}: {e}")
83
+
84
+ def generate_module(self, schema_view: SchemaView, relative_path: Path) -> Optional[str]:
85
+ """Generate Python module content for a schema."""
86
+ output = []
87
+
88
+ # Header
89
+ output.append('"""')
90
+ if schema_view.schema.title:
91
+ output.append(f'{schema_view.schema.title}')
92
+ if schema_view.schema.description:
93
+ output.append('')
94
+ output.append(schema_view.schema.description)
95
+ output.append('')
96
+ output.append(f'Generated from: {relative_path}')
97
+ output.append('"""')
98
+ output.append('')
99
+ output.append('from __future__ import annotations')
100
+ output.append('')
101
+ output.append('from typing import Dict, Any, Optional')
102
+ output.append('from valuesets.generators.rich_enum import RichEnum')
103
+ output.append('')
104
+
105
+ # Get all enums in this schema
106
+ enum_names = schema_view.all_enums()
107
+
108
+ if not enum_names:
109
+ logger.info(f"No enums found in {relative_path}")
110
+ return None
111
+
112
+ # Generate each enum and track which ones we actually generated
113
+ generated_enums = []
114
+ for enum_name in enum_names:
115
+ enum_def = schema_view.get_enum(enum_name)
116
+ if enum_def:
117
+ # Skip dynamic enums
118
+ if hasattr(enum_def, 'reachable_from') and enum_def.reachable_from:
119
+ continue
120
+ if enum_def.permissible_values:
121
+ output.extend(self._generate_enum(enum_name, enum_def))
122
+ output.append('')
123
+ generated_enums.append(self._get_class_name(enum_name))
124
+
125
+ # Add __all__ export for generated enums only
126
+ if generated_enums:
127
+ output.append('__all__ = [')
128
+ for class_name in generated_enums:
129
+ output.append(f' "{class_name}",')
130
+ output.append(']')
131
+ else:
132
+ # No enums generated for this module
133
+ return None
134
+
135
+ return '\n'.join(output)
136
+
137
+ def _generate_enum(self, enum_name: str, enum_def: EnumDefinition) -> List[str]:
138
+ """Generate a single enum class."""
139
+ output = []
140
+
141
+ class_name = self._get_class_name(enum_name)
142
+
143
+ # Check if this is a dynamic enum
144
+ is_dynamic = hasattr(enum_def, 'reachable_from') and enum_def.reachable_from
145
+
146
+ if is_dynamic:
147
+ # For dynamic enums, generate a placeholder comment
148
+ output.append(f'# {class_name} is a dynamic enum')
149
+ output.append(f'# It would be populated from: {enum_def.reachable_from}')
150
+ output.append(f'# Skipping generation for dynamic enum')
151
+ return [] # Don't generate this enum
152
+
153
+ output.append(f'class {class_name}(RichEnum):')
154
+
155
+ # Add docstring
156
+ if enum_def.description:
157
+ output.append(' """')
158
+ # Handle multi-line descriptions
159
+ for line in enum_def.description.split('\n'):
160
+ output.append(f' {line}')
161
+ output.append(' """')
162
+
163
+ # Generate enum members
164
+ output.append(' # Enum members')
165
+
166
+ if not enum_def.permissible_values:
167
+ # Empty enum - add pass statement
168
+ output.append(' pass')
169
+ output.append('')
170
+ return output
171
+
172
+ for pv_name, pv in enum_def.permissible_values.items():
173
+ member_name = self._get_enum_member_name(pv_name)
174
+ member_value = pv.text if pv.text is not None else pv_name
175
+ output.append(f' {member_name} = "{member_value}"')
176
+
177
+ output.append('')
178
+
179
+ # Generate metadata
180
+ output.append(f'# Set metadata after class creation')
181
+ output.append(f'{class_name}._metadata = {{')
182
+
183
+ for pv_name, pv in enum_def.permissible_values.items():
184
+ member_name = self._get_enum_member_name(pv_name)
185
+ metadata = self._build_metadata(pv)
186
+
187
+ if metadata:
188
+ output.append(f' "{member_name}": {repr(metadata)},')
189
+
190
+ output.append('}')
191
+
192
+ return output
193
+
194
+ def _build_metadata(self, pv: PermissibleValue) -> Dict[str, Any]:
195
+ """Build metadata dictionary for a permissible value."""
196
+ metadata = {}
197
+
198
+ if pv.description:
199
+ metadata['description'] = pv.description
200
+
201
+ if pv.meaning:
202
+ metadata['meaning'] = pv.meaning
203
+
204
+ if pv.annotations:
205
+ annotations_dict = {}
206
+ for key, annotation in pv.annotations.items():
207
+ if hasattr(annotation, 'value'):
208
+ annotations_dict[key] = annotation.value
209
+ else:
210
+ annotations_dict[key] = str(annotation)
211
+ metadata['annotations'] = annotations_dict
212
+
213
+ if hasattr(pv, 'aliases') and pv.aliases:
214
+ metadata['aliases'] = list(pv.aliases)
215
+
216
+ if hasattr(pv, 'deprecated') and pv.deprecated:
217
+ metadata['deprecated'] = pv.deprecated
218
+
219
+ return metadata
220
+
221
+ def _get_enum_names(self, schema_view: SchemaView) -> List[str]:
222
+ """Get list of enum class names from schema (excluding dynamic enums)."""
223
+ result = []
224
+ for enum_name in schema_view.all_enums():
225
+ enum_def = schema_view.get_enum(enum_name)
226
+ # Skip dynamic enums
227
+ if enum_def and not (hasattr(enum_def, 'reachable_from') and enum_def.reachable_from):
228
+ result.append(self._get_class_name(enum_name))
229
+ return result
230
+
231
+ def _ensure_package_structure(self, directory: Path):
232
+ """Ensure __init__.py files exist for package structure."""
233
+ current = directory
234
+ while current != self.output_dir and current != current.parent:
235
+ init_file = current / '__init__.py'
236
+ if not init_file.exists():
237
+ init_file.write_text('"""Auto-generated package."""\n')
238
+ current = current.parent
239
+
240
+ def generate_init_file(self):
241
+ """Generate top-level __init__.py for convenient imports."""
242
+ output = []
243
+
244
+ output.append('"""')
245
+ output.append('Common Value Sets - Rich Enum Collection')
246
+ output.append('')
247
+ output.append('This module provides convenient access to all enum definitions.')
248
+ output.append('Each enum includes rich metadata (descriptions, ontology mappings, annotations)')
249
+ output.append('while maintaining full Python enum compatibility.')
250
+ output.append('')
251
+ output.append('Usage:')
252
+ output.append(' from valuesets.enums import Presenceenum, AnatomicalSide')
253
+ output.append(' ')
254
+ output.append(' # Or import everything')
255
+ output.append(' from valuesets.enums import *')
256
+ output.append('"""')
257
+ output.append('')
258
+ output.append('# flake8: noqa')
259
+ output.append('')
260
+
261
+ # Collect all enums from all modules
262
+ all_enums = []
263
+ imports_by_module = {}
264
+
265
+ for module_key, info in sorted(self.generated_modules.items()):
266
+ if info['enums']:
267
+ module_path = module_key.replace('/', '.')
268
+ imports_by_module[module_path] = info['enums']
269
+ all_enums.extend(info['enums'])
270
+
271
+ # Generate imports grouped by domain
272
+ domains = {}
273
+ for module_path, enums in imports_by_module.items():
274
+ parts = module_path.split('.')
275
+ domain = parts[0] if len(parts) > 1 else 'core'
276
+ if domain not in domains:
277
+ domains[domain] = {}
278
+ domains[domain][module_path] = enums
279
+
280
+ # Write imports organized by domain
281
+ for domain in sorted(domains.keys()):
282
+ output.append(f'# {domain.title()} domain')
283
+ for module_path, enums in sorted(domains[domain].items()):
284
+ if enums:
285
+ enum_list = ', '.join(enums)
286
+ output.append(f'from .{module_path} import {enum_list}')
287
+ output.append('')
288
+
289
+ # Generate __all__
290
+ output.append('__all__ = [')
291
+ for enum in sorted(set(all_enums)):
292
+ output.append(f' "{enum}",')
293
+ output.append(']')
294
+
295
+ # Write the init file
296
+ init_path = self.output_dir / '__init__.py'
297
+ init_path.parent.mkdir(parents=True, exist_ok=True)
298
+ with open(init_path, 'w') as f:
299
+ f.write('\n'.join(output))
300
+
301
+ logger.info(f"Generated {init_path} with {len(all_enums)} enum exports")
302
+
303
+ def _get_class_name(self, name: str) -> str:
304
+ """Convert LinkML name to Python class name with proper CamelCase."""
305
+ # Handle already CamelCase names
306
+ if not any(c in name for c in ['_', '-', ' ']):
307
+ # If it's already in some form of CamelCase, preserve it
308
+ # Just ensure first letter is capitalized
309
+ return name[0].upper() + name[1:] if name else ''
310
+
311
+ # Convert snake_case, kebab-case, or space-separated to CamelCase
312
+ words = re.split(r'[_\s-]+', name)
313
+
314
+ # Properly capitalize each word, preserving existing caps when appropriate
315
+ result = []
316
+ for word in words:
317
+ if word:
318
+ if word.isupper():
319
+ # If the word is all caps (like "ISO"), keep it that way
320
+ result.append(word)
321
+ elif word[0].isupper() and len(word) > 1:
322
+ # If already starts with capital, preserve the casing
323
+ result.append(word)
324
+ else:
325
+ # Otherwise, capitalize first letter
326
+ result.append(word[0].upper() + word[1:].lower())
327
+
328
+ return ''.join(result)
329
+
330
+ def _get_enum_member_name(self, name: str) -> str:
331
+ """Convert permissible value name to Python enum member name."""
332
+ member_name = re.sub(r'[^a-zA-Z0-9_]', '_', name).upper()
333
+ if member_name and member_name[0].isdigit():
334
+ member_name = f'_{member_name}'
335
+ return member_name
336
+
337
+
338
+ def main():
339
+ """CLI entry point."""
340
+ import argparse
341
+
342
+ parser = argparse.ArgumentParser(description='Generate modular rich enums from LinkML schemas')
343
+ parser.add_argument('schema_dir', help='Directory containing LinkML schema files')
344
+ parser.add_argument('-o', '--output-dir', required=True, help='Output directory for Python modules')
345
+
346
+ args = parser.parse_args()
347
+
348
+ generator = ModularRichEnumGenerator(args.schema_dir, args.output_dir)
349
+ generator.generate_all()
350
+
351
+
352
+ if __name__ == '__main__':
353
+ main()
@@ -0,0 +1,198 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Standardize prefixes across all LinkML schemas to use consistent valuesets prefix.
4
+
5
+ Sets:
6
+ - default_prefix: valuesets
7
+ - valuesets: https://w3id.org/valuesets/
8
+
9
+ Updates schemas in place with consistent prefixing.
10
+ """
11
+
12
+ import yaml
13
+ from pathlib import Path
14
+ from typing import Dict, Any
15
+ import click
16
+ from collections import OrderedDict
17
+
18
+
19
+ class PrefixStandardizer:
20
+ """Utility to standardize prefixes across LinkML schemas."""
21
+
22
+ def __init__(self, target_prefix: str = "valuesets",
23
+ target_uri: str = "https://w3id.org/valuesets/"):
24
+ self.target_prefix = target_prefix
25
+ self.target_uri = target_uri
26
+
27
+ def standardize_schema_prefixes(self, schema_path: Path, dry_run: bool = False) -> Dict[str, Any]:
28
+ """
29
+ Standardize prefixes in a schema file.
30
+
31
+ Args:
32
+ schema_path: Path to the schema file
33
+ dry_run: If True, only show what would be changed
34
+
35
+ Returns:
36
+ Summary of changes made
37
+ """
38
+ # Load schema
39
+ with open(schema_path, 'r') as f:
40
+ schema_data = yaml.safe_load(f)
41
+
42
+ changes = []
43
+
44
+ # Ensure prefixes section exists
45
+ if 'prefixes' not in schema_data:
46
+ schema_data['prefixes'] = {}
47
+
48
+ # Update/add the target prefix
49
+ current_valuesets_uri = schema_data['prefixes'].get(self.target_prefix)
50
+ if current_valuesets_uri != self.target_uri:
51
+ old_uri = current_valuesets_uri or "not defined"
52
+ schema_data['prefixes'][self.target_prefix] = self.target_uri
53
+ changes.append(f"Updated {self.target_prefix} prefix: {old_uri} → {self.target_uri}")
54
+
55
+ # Update default_prefix
56
+ current_default = schema_data.get('default_prefix')
57
+ if current_default != self.target_prefix:
58
+ old_default = current_default or "not defined"
59
+ schema_data['default_prefix'] = self.target_prefix
60
+ changes.append(f"Updated default_prefix: {old_default} → {self.target_prefix}")
61
+
62
+ # Write changes if not dry run
63
+ if not dry_run and changes:
64
+ self.write_schema(schema_data, schema_path)
65
+
66
+ return {
67
+ 'file': schema_path.name,
68
+ 'changes': changes,
69
+ 'modified': len(changes) > 0
70
+ }
71
+
72
+ def write_schema(self, schema_data: Dict[str, Any], output_path: Path):
73
+ """Write schema preserving key order and formatting."""
74
+ # Define preferred key order
75
+ key_order = [
76
+ 'name', 'title', 'description', 'id', 'version', 'status',
77
+ 'imports', 'prefixes', 'default_prefix', 'default_curi_maps',
78
+ 'slots', 'classes', 'enums', 'types', 'subsets', 'license', 'see_also'
79
+ ]
80
+
81
+ # Create ordered dict
82
+ ordered_data = OrderedDict()
83
+
84
+ # Add keys in preferred order
85
+ for key in key_order:
86
+ if key in schema_data:
87
+ ordered_data[key] = schema_data[key]
88
+
89
+ # Add any remaining keys
90
+ for key in schema_data:
91
+ if key not in ordered_data:
92
+ ordered_data[key] = schema_data[key]
93
+
94
+ # Write with nice formatting
95
+ with open(output_path, 'w') as f:
96
+ yaml.dump(dict(ordered_data), f,
97
+ default_flow_style=False,
98
+ sort_keys=False,
99
+ allow_unicode=True,
100
+ width=120)
101
+
102
+ def standardize_directory(self, schema_dir: Path, dry_run: bool = False) -> None:
103
+ """
104
+ Standardize prefixes for all schemas in a directory.
105
+
106
+ Args:
107
+ schema_dir: Directory containing LinkML schema files
108
+ dry_run: If True, only show what would be changed
109
+ """
110
+ # Find all YAML files
111
+ yaml_files = list(schema_dir.rglob("*.yaml"))
112
+
113
+ print(f"{'DRY RUN - ' if dry_run else ''}Standardizing prefixes in {len(yaml_files)} files")
114
+ print(f"Target: {self.target_prefix}: {self.target_uri}")
115
+ print()
116
+
117
+ total_modified = 0
118
+ total_changes = 0
119
+
120
+ for yaml_file in yaml_files:
121
+ # Skip certain files
122
+ if yaml_file.name in ['linkml-meta.yaml', 'meta.yaml']:
123
+ continue
124
+
125
+ try:
126
+ result = self.standardize_schema_prefixes(yaml_file, dry_run=dry_run)
127
+
128
+ if result['changes']:
129
+ total_modified += 1
130
+ total_changes += len(result['changes'])
131
+
132
+ print(f"{'[DRY RUN] ' if dry_run else ''}{result['file']}:")
133
+ for change in result['changes']:
134
+ print(f" - {change}")
135
+ print()
136
+
137
+ except Exception as e:
138
+ print(f"Error processing {yaml_file}: {e}")
139
+
140
+ print(f"{'='*50}")
141
+ print(f"{'DRY RUN - ' if dry_run else ''}Summary:")
142
+ print(f" Files modified: {total_modified}")
143
+ print(f" Total changes: {total_changes}")
144
+
145
+
146
+ @click.command()
147
+ @click.argument('schema_path', type=click.Path(exists=True, path_type=Path))
148
+ @click.option('--dry-run', '-n', is_flag=True,
149
+ help='Preview changes without modifying files')
150
+ @click.option('--prefix', '-p', default='valuesets',
151
+ help='Target prefix name (default: valuesets)')
152
+ @click.option('--uri', '-u', default='https://w3id.org/valuesets/',
153
+ help='Target prefix URI (default: https://w3id.org/valuesets/)')
154
+ @click.option('--single-file', '-s', is_flag=True,
155
+ help='Process single file instead of directory')
156
+ def main(schema_path: Path, dry_run: bool, prefix: str, uri: str, single_file: bool):
157
+ """
158
+ Standardize prefixes across LinkML schemas.
159
+
160
+ SCHEMA_PATH: Path to schema file or directory
161
+
162
+ Examples:
163
+
164
+ \b
165
+ # Preview changes for all schemas
166
+ prefix_standardizer.py src/valuesets/schema --dry-run
167
+
168
+ \b
169
+ # Standardize all schemas
170
+ prefix_standardizer.py src/valuesets/schema
171
+
172
+ \b
173
+ # Single file
174
+ prefix_standardizer.py schema.yaml --single-file
175
+
176
+ \b
177
+ # Custom prefix
178
+ prefix_standardizer.py src/valuesets/schema --prefix cval --uri https://w3id.org/linkml-common/
179
+ """
180
+ standardizer = PrefixStandardizer(target_prefix=prefix, target_uri=uri)
181
+
182
+ if single_file or schema_path.is_file():
183
+ # Process single file
184
+ result = standardizer.standardize_schema_prefixes(schema_path, dry_run=dry_run)
185
+
186
+ print(f"{'DRY RUN - ' if dry_run else ''}Results for {result['file']}:")
187
+ if result['changes']:
188
+ for change in result['changes']:
189
+ print(f" - {change}")
190
+ else:
191
+ print(" No changes needed")
192
+ else:
193
+ # Process directory
194
+ standardizer.standardize_directory(schema_path, dry_run=dry_run)
195
+
196
+
197
+ if __name__ == '__main__':
198
+ main()
@@ -0,0 +1,127 @@
1
+ """
2
+ Rich Enum Implementation with Metadata Support
3
+
4
+ This module provides enums that maintain full compatibility with standard Python
5
+ enums while adding metadata support using __init_subclass__.
6
+ """
7
+
8
+ from enum import Enum
9
+ from typing import Dict, Any, Optional, Type
10
+
11
+
12
+ class RichEnum(str, Enum):
13
+ """
14
+ Base class for enums with metadata support.
15
+
16
+ This class creates enums that:
17
+ 1. Are fully compatible with standard Python enums
18
+ 2. Support string values (inherit from str)
19
+ 3. Have metadata access methods
20
+ 4. Can be looked up by ontology meaning
21
+
22
+ The metadata should be set AFTER class creation to avoid it becoming
23
+ an enum member.
24
+
25
+ Usage:
26
+ class MyEnum(RichEnum):
27
+ VALUE1 = "value1"
28
+ VALUE2 = "value2"
29
+
30
+ # Set metadata after class creation
31
+ MyEnum._metadata = {
32
+ "VALUE1": {
33
+ "description": "First value",
34
+ "meaning": "ONTO:0000001",
35
+ "annotations": {"category": "group1"}
36
+ },
37
+ "VALUE2": {
38
+ "description": "Second value",
39
+ "meaning": "ONTO:0000002"
40
+ }
41
+ }
42
+ """
43
+
44
+ def __init_subclass__(cls, **kwargs):
45
+ super().__init_subclass__(**kwargs)
46
+
47
+ # Add metadata access methods to the class
48
+ def get_description(self) -> Optional[str]:
49
+ """Get the description for this enum member."""
50
+ metadata = self.__class__.__dict__.get('_metadata', {})
51
+ member_metadata = metadata.get(self.name, {})
52
+ return member_metadata.get("description")
53
+
54
+ def get_meaning(self) -> Optional[str]:
55
+ """Get the ontology meaning/mapping for this enum member."""
56
+ metadata = self.__class__.__dict__.get('_metadata', {})
57
+ member_metadata = metadata.get(self.name, {})
58
+ return member_metadata.get("meaning")
59
+
60
+ def get_annotations(self) -> Dict[str, Any]:
61
+ """Get the annotations dictionary for this enum member."""
62
+ metadata = self.__class__.__dict__.get('_metadata', {})
63
+ member_metadata = metadata.get(self.name, {})
64
+ return member_metadata.get("annotations", {})
65
+
66
+ def get_metadata(self) -> Dict[str, Any]:
67
+ """Get all metadata for this enum member."""
68
+ base = {"name": self.name, "value": self.value}
69
+ metadata = self.__class__.__dict__.get('_metadata', {})
70
+ base.update(metadata.get(self.name, {}))
71
+ return base
72
+
73
+ @classmethod
74
+ def from_meaning(cls_inner, meaning: str) -> Optional['RichEnum']:
75
+ """
76
+ Find an enum member by its ontology meaning.
77
+
78
+ Args:
79
+ meaning: The ontology term (e.g., "BSPO:0000000")
80
+
81
+ Returns:
82
+ The enum member with the given meaning, or None if not found
83
+ """
84
+ for member in cls_inner:
85
+ if member.get_meaning() == meaning:
86
+ return member
87
+ return None
88
+
89
+ @classmethod
90
+ def get_all_meanings(cls_inner) -> Dict[str, str]:
91
+ """Get a mapping of all member names to their meanings."""
92
+ meanings = {}
93
+ for member in cls_inner:
94
+ meaning = member.get_meaning()
95
+ if meaning:
96
+ meanings[member.name] = meaning
97
+ return meanings
98
+
99
+ @classmethod
100
+ def get_all_descriptions(cls_inner) -> Dict[str, str]:
101
+ """Get a mapping of all member names to their descriptions."""
102
+ descriptions = {}
103
+ for member in cls_inner:
104
+ description = member.get_description()
105
+ if description:
106
+ descriptions[member.name] = description
107
+ return descriptions
108
+
109
+ @classmethod
110
+ def list_metadata(cls_inner) -> Dict[str, Dict[str, Any]]:
111
+ """Get all metadata for all members."""
112
+ return {member.name: member.get_metadata() for member in cls_inner}
113
+
114
+ # Set methods on the class
115
+ setattr(cls, 'get_description', get_description)
116
+ setattr(cls, 'get_meaning', get_meaning)
117
+ setattr(cls, 'get_annotations', get_annotations)
118
+ setattr(cls, 'get_metadata', get_metadata)
119
+ setattr(cls, 'from_meaning', from_meaning)
120
+ setattr(cls, 'get_all_meanings', get_all_meanings)
121
+ setattr(cls, 'get_all_descriptions', get_all_descriptions)
122
+ setattr(cls, 'list_metadata', list_metadata)
123
+
124
+
125
+ # Type alias for clarity
126
+ RichEnumType = Type[RichEnum]
127
+ RichEnumMeta = None # For backwards compatibility