PyPI - valuesets - Versions diffs - 0.3.1__py3-none-any.whl - Mend

valuesets 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of valuesets might be problematic. Click here for more details.

Files changed (248) hide show

valuesets/__init__.py +7 -0
valuesets/_version.py +8 -0
valuesets/datamodel/valuesets.py +13796 -0
valuesets/datamodel/valuesets_dataclass.py +24503 -0
valuesets/datamodel/valuesets_pydantic.py +13796 -0
valuesets/enums/__init__.py +590 -0
valuesets/enums/academic/__init__.py +1 -0
valuesets/enums/academic/research.py +559 -0
valuesets/enums/analytical_chemistry/__init__.py +1 -0
valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
valuesets/enums/bio/__init__.py +1 -0
valuesets/enums/bio/biological_colors.py +238 -0
valuesets/enums/bio/cell_cycle.py +180 -0
valuesets/enums/bio/currency_chemicals.py +52 -0
valuesets/enums/bio/developmental_stages.py +103 -0
valuesets/enums/bio/genome_features.py +182 -0
valuesets/enums/bio/genomics.py +91 -0
valuesets/enums/bio/go_aspect.py +32 -0
valuesets/enums/bio/go_causality.py +58 -0
valuesets/enums/bio/go_evidence.py +129 -0
valuesets/enums/bio/human_developmental_stages.py +62 -0
valuesets/enums/bio/insdc_geographic_locations.py +591 -0
valuesets/enums/bio/insdc_missing_values.py +49 -0
valuesets/enums/bio/lipid_categories.py +67 -0
valuesets/enums/bio/mouse_developmental_stages.py +62 -0
valuesets/enums/bio/plant_biology.py +86 -0
valuesets/enums/bio/plant_developmental_stages.py +54 -0
valuesets/enums/bio/plant_sex.py +81 -0
valuesets/enums/bio/protein_evidence.py +61 -0
valuesets/enums/bio/proteomics_standards.py +123 -0
valuesets/enums/bio/psi_mi.py +306 -0
valuesets/enums/bio/relationship_to_oxygen.py +37 -0
valuesets/enums/bio/sequence_alphabets.py +449 -0
valuesets/enums/bio/sequence_chemistry.py +357 -0
valuesets/enums/bio/sequencing_platforms.py +302 -0
valuesets/enums/bio/structural_biology.py +320 -0
valuesets/enums/bio/taxonomy.py +238 -0
valuesets/enums/bio/trophic_levels.py +85 -0
valuesets/enums/bio/uniprot_species.py +344 -0
valuesets/enums/bio/viral_genome_types.py +47 -0
valuesets/enums/bioprocessing/__init__.py +1 -0
valuesets/enums/bioprocessing/scale_up.py +249 -0
valuesets/enums/business/__init__.py +1 -0
valuesets/enums/business/human_resources.py +275 -0
valuesets/enums/business/industry_classifications.py +181 -0
valuesets/enums/business/management_operations.py +228 -0
valuesets/enums/business/organizational_structures.py +236 -0
valuesets/enums/business/quality_management.py +181 -0
valuesets/enums/business/supply_chain.py +232 -0
valuesets/enums/chemistry/__init__.py +1 -0
valuesets/enums/chemistry/chemical_entities.py +315 -0
valuesets/enums/chemistry/reaction_directionality.py +65 -0
valuesets/enums/chemistry/reactions.py +256 -0
valuesets/enums/clinical/__init__.py +1 -0
valuesets/enums/clinical/nih_demographics.py +177 -0
valuesets/enums/clinical/phenopackets.py +254 -0
valuesets/enums/common_value_sets.py +8791 -0
valuesets/enums/computing/__init__.py +1 -0
valuesets/enums/computing/file_formats.py +294 -0
valuesets/enums/computing/maturity_levels.py +196 -0
valuesets/enums/computing/mime_types.py +227 -0
valuesets/enums/confidence_levels.py +168 -0
valuesets/enums/contributor.py +30 -0
valuesets/enums/core.py +42 -0
valuesets/enums/data/__init__.py +1 -0
valuesets/enums/data/data_absent_reason.py +53 -0
valuesets/enums/data_science/__init__.py +1 -0
valuesets/enums/data_science/binary_classification.py +87 -0
valuesets/enums/data_science/emotion_classification.py +66 -0
valuesets/enums/data_science/priority_severity.py +73 -0
valuesets/enums/data_science/quality_control.py +46 -0
valuesets/enums/data_science/sentiment_analysis.py +50 -0
valuesets/enums/data_science/text_classification.py +97 -0
valuesets/enums/demographics.py +206 -0
valuesets/enums/ecological_interactions.py +151 -0
valuesets/enums/energy/__init__.py +1 -0
valuesets/enums/energy/energy.py +343 -0
valuesets/enums/energy/fossil_fuels.py +29 -0
valuesets/enums/energy/nuclear/__init__.py +1 -0
valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
valuesets/enums/energy/nuclear/reactor_types.py +163 -0
valuesets/enums/environmental_health/__init__.py +1 -0
valuesets/enums/environmental_health/exposures.py +265 -0
valuesets/enums/geography/__init__.py +1 -0
valuesets/enums/geography/geographic_codes.py +741 -0
valuesets/enums/health/__init__.py +12 -0
valuesets/enums/health/vaccination.py +98 -0
valuesets/enums/health.py +36 -0
valuesets/enums/health_base.py +36 -0
valuesets/enums/healthcare.py +45 -0
valuesets/enums/industry/__init__.py +1 -0
valuesets/enums/industry/extractive_industry.py +94 -0
valuesets/enums/industry/mining.py +388 -0
valuesets/enums/industry/safety_colors.py +201 -0
valuesets/enums/investigation.py +27 -0
valuesets/enums/materials_science/__init__.py +1 -0
valuesets/enums/materials_science/characterization_methods.py +112 -0
valuesets/enums/materials_science/crystal_structures.py +76 -0
valuesets/enums/materials_science/material_properties.py +119 -0
valuesets/enums/materials_science/material_types.py +104 -0
valuesets/enums/materials_science/pigments_dyes.py +198 -0
valuesets/enums/materials_science/synthesis_methods.py +109 -0
valuesets/enums/medical/__init__.py +1 -0
valuesets/enums/medical/clinical.py +277 -0
valuesets/enums/medical/neuroimaging.py +119 -0
valuesets/enums/mining_processing.py +302 -0
valuesets/enums/physics/__init__.py +1 -0
valuesets/enums/physics/states_of_matter.py +46 -0
valuesets/enums/social/__init__.py +1 -0
valuesets/enums/social/person_status.py +29 -0
valuesets/enums/spatial/__init__.py +1 -0
valuesets/enums/spatial/spatial_qualifiers.py +246 -0
valuesets/enums/statistics/__init__.py +5 -0
valuesets/enums/statistics/prediction_outcomes.py +31 -0
valuesets/enums/statistics.py +31 -0
valuesets/enums/time/__init__.py +1 -0
valuesets/enums/time/temporal.py +254 -0
valuesets/enums/units/__init__.py +1 -0
valuesets/enums/units/measurements.py +310 -0
valuesets/enums/visual/__init__.py +1 -0
valuesets/enums/visual/colors.py +376 -0
valuesets/generators/__init__.py +19 -0
valuesets/generators/auto_slot_injector.py +280 -0
valuesets/generators/enhanced_pydantic_generator.py +100 -0
valuesets/generators/enum_slot_generator.py +201 -0
valuesets/generators/modular_rich_generator.py +353 -0
valuesets/generators/prefix_standardizer.py +198 -0
valuesets/generators/rich_enum.py +127 -0
valuesets/generators/rich_pydantic_generator.py +310 -0
valuesets/generators/smart_slot_syncer.py +428 -0
valuesets/generators/sssom_generator.py +394 -0
valuesets/merged/merged_hierarchy.yaml +21649 -0
valuesets/schema/README.md +3 -0
valuesets/schema/academic/research.yaml +911 -0
valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
valuesets/schema/bio/bio_entities.yaml +364 -0
valuesets/schema/bio/biological_colors.yaml +434 -0
valuesets/schema/bio/cell_cycle.yaml +309 -0
valuesets/schema/bio/currency_chemicals.yaml +70 -0
valuesets/schema/bio/developmental_stages.yaml +226 -0
valuesets/schema/bio/genome_features.yaml +342 -0
valuesets/schema/bio/genomics.yaml +101 -0
valuesets/schema/bio/go_aspect.yaml +39 -0
valuesets/schema/bio/go_causality.yaml +119 -0
valuesets/schema/bio/go_evidence.yaml +215 -0
valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
valuesets/schema/bio/insdc_missing_values.yaml +85 -0
valuesets/schema/bio/lipid_categories.yaml +72 -0
valuesets/schema/bio/plant_biology.yaml +125 -0
valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
valuesets/schema/bio/plant_sex.yaml +108 -0
valuesets/schema/bio/protein_evidence.yaml +63 -0
valuesets/schema/bio/proteomics_standards.yaml +116 -0
valuesets/schema/bio/psi_mi.yaml +400 -0
valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
valuesets/schema/bio/sequence_chemistry.yaml +477 -0
valuesets/schema/bio/sequencing_platforms.yaml +515 -0
valuesets/schema/bio/structural_biology.yaml +428 -0
valuesets/schema/bio/taxonomy.yaml +453 -0
valuesets/schema/bio/trophic_levels.yaml +118 -0
valuesets/schema/bio/uniprot_species.yaml +1209 -0
valuesets/schema/bio/viral_genome_types.yaml +99 -0
valuesets/schema/bioprocessing/scale_up.yaml +458 -0
valuesets/schema/business/human_resources.yaml +752 -0
valuesets/schema/business/industry_classifications.yaml +448 -0
valuesets/schema/business/management_operations.yaml +602 -0
valuesets/schema/business/organizational_structures.yaml +645 -0
valuesets/schema/business/quality_management.yaml +502 -0
valuesets/schema/business/supply_chain.yaml +688 -0
valuesets/schema/chemistry/chemical_entities.yaml +639 -0
valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
valuesets/schema/chemistry/reactions.yaml +442 -0
valuesets/schema/clinical/nih_demographics.yaml +285 -0
valuesets/schema/clinical/phenopackets.yaml +429 -0
valuesets/schema/computing/file_formats.yaml +631 -0
valuesets/schema/computing/maturity_levels.yaml +229 -0
valuesets/schema/computing/mime_types.yaml +266 -0
valuesets/schema/confidence_levels.yaml +206 -0
valuesets/schema/contributor.yaml +30 -0
valuesets/schema/core.yaml +55 -0
valuesets/schema/data/data_absent_reason.yaml +82 -0
valuesets/schema/data_science/binary_classification.yaml +125 -0
valuesets/schema/data_science/emotion_classification.yaml +109 -0
valuesets/schema/data_science/priority_severity.yaml +122 -0
valuesets/schema/data_science/quality_control.yaml +68 -0
valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
valuesets/schema/data_science/text_classification.yaml +135 -0
valuesets/schema/demographics.yaml +238 -0
valuesets/schema/ecological_interactions.yaml +298 -0
valuesets/schema/energy/energy.yaml +595 -0
valuesets/schema/energy/fossil_fuels.yaml +28 -0
valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
valuesets/schema/environmental_health/exposures.yaml +355 -0
valuesets/schema/generated_slots.yaml +1828 -0
valuesets/schema/geography/geographic_codes.yaml +1018 -0
valuesets/schema/health/vaccination.yaml +102 -0
valuesets/schema/health.yaml +38 -0
valuesets/schema/healthcare.yaml +53 -0
valuesets/schema/industry/extractive_industry.yaml +89 -0
valuesets/schema/industry/mining.yaml +888 -0
valuesets/schema/industry/safety_colors.yaml +375 -0
valuesets/schema/investigation.yaml +64 -0
valuesets/schema/materials_science/characterization_methods.yaml +193 -0
valuesets/schema/materials_science/crystal_structures.yaml +138 -0
valuesets/schema/materials_science/material_properties.yaml +135 -0
valuesets/schema/materials_science/material_types.yaml +151 -0
valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
valuesets/schema/medical/clinical.yaml +610 -0
valuesets/schema/medical/neuroimaging.yaml +325 -0
valuesets/schema/mining_processing.yaml +295 -0
valuesets/schema/physics/states_of_matter.yaml +46 -0
valuesets/schema/slot_mixins.yaml +143 -0
valuesets/schema/social/person_status.yaml +28 -0
valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
valuesets/schema/statistics.yaml +34 -0
valuesets/schema/time/temporal.yaml +435 -0
valuesets/schema/types.yaml +15 -0
valuesets/schema/units/measurements.yaml +675 -0
valuesets/schema/valuesets.yaml +100 -0
valuesets/schema/visual/colors.yaml +778 -0
valuesets/utils/__init__.py +6 -0
valuesets/utils/comparison.py +102 -0
valuesets/utils/expand_dynamic_enums.py +414 -0
valuesets/utils/mapping_utils.py +236 -0
valuesets/validators/__init__.py +11 -0
valuesets/validators/enum_evaluator.py +669 -0
valuesets/validators/oak_config.yaml +70 -0
valuesets/validators/validate_with_ols.py +241 -0
valuesets-0.3.1.dist-info/METADATA +395 -0
valuesets-0.3.1.dist-info/RECORD +248 -0
valuesets-0.3.1.dist-info/WHEEL +4 -0
valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0

valuesets/generators/modular_rich_generator.py ADDED Viewed

@@ -0,0 +1,353 @@
+"""
+Modular Rich Enum Generator for LinkML Schemas
+This generator creates modular Python enum files from LinkML schemas,
+maintaining the directory structure and generating one Python module per schema file.
+"""
+import os
+import re
+from pathlib import Path
+from typing import Dict, Any, List, Optional, Set
+from linkml_runtime.utils.schemaview import SchemaView
+from linkml_runtime.linkml_model.meta import EnumDefinition, PermissibleValue
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class ModularRichEnumGenerator:
+    """
+    Generate modular Python enum files with rich metadata support.
+    """
+    def __init__(self, schema_dir: str, output_dir: str):
+        self.schema_dir = Path(schema_dir)
+        self.output_dir = Path(output_dir)
+        self.generated_modules = {}  # Track what we generate for __init__.py
+    def generate_all(self):
+        """Process all schema files and generate corresponding Python modules."""
+        # Find all YAML schema files
+        schema_files = list(self.schema_dir.rglob("*.yaml"))
+        # Skip the main valuesets.yaml
+        schema_files = [f for f in schema_files if f.name != "valuesets.yaml"]
+        logger.info(f"Found {len(schema_files)} schema files to process")
+        for schema_file in schema_files:
+            self.process_schema_file(schema_file)
+        # Generate top-level __init__.py
+        self.generate_init_file()
+    def process_schema_file(self, schema_path: Path):
+        """Process a single schema file and generate corresponding Python module."""
+        # Calculate relative path from schema dir
+        relative_path = schema_path.relative_to(self.schema_dir)
+        # Create corresponding output path
+        output_path = self.output_dir / relative_path.with_suffix('.py')
+        logger.info(f"Processing {relative_path} -> {output_path.relative_to(self.output_dir.parent.parent)}")
+        try:
+            # Load schema
+            schema_view = SchemaView(str(schema_path))
+            # Generate Python module
+            module_content = self.generate_module(schema_view, relative_path)
+            if module_content:
+                # Ensure output directory exists
+                output_path.parent.mkdir(parents=True, exist_ok=True)
+                # Write module
+                with open(output_path, 'w') as f:
+                    f.write(module_content)
+                # Track for __init__.py generation
+                module_key = str(relative_path.with_suffix('')).replace('/', '.')
+                self.generated_modules[module_key] = {
+                    'path': relative_path,
+                    'enums': self._get_enum_names(schema_view)
+                }
+                # Also create __init__.py for subdirectories
+                self._ensure_package_structure(output_path.parent)
+        except Exception as e:
+            logger.error(f"Error processing {schema_path}: {e}")
+    def generate_module(self, schema_view: SchemaView, relative_path: Path) -> Optional[str]:
+        """Generate Python module content for a schema."""
+        output = []
+        # Header
+        output.append('"""')
+        if schema_view.schema.title:
+            output.append(f'{schema_view.schema.title}')
+        if schema_view.schema.description:
+            output.append('')
+            output.append(schema_view.schema.description)
+        output.append('')
+        output.append(f'Generated from: {relative_path}')
+        output.append('"""')
+        output.append('')
+        output.append('from __future__ import annotations')
+        output.append('')
+        output.append('from typing import Dict, Any, Optional')
+        output.append('from valuesets.generators.rich_enum import RichEnum')
+        output.append('')
+        # Get all enums in this schema
+        enum_names = schema_view.all_enums()
+        if not enum_names:
+            logger.info(f"No enums found in {relative_path}")
+            return None
+        # Generate each enum and track which ones we actually generated
+        generated_enums = []
+        for enum_name in enum_names:
+            enum_def = schema_view.get_enum(enum_name)
+            if enum_def:
+                # Skip dynamic enums
+                if hasattr(enum_def, 'reachable_from') and enum_def.reachable_from:
+                    continue
+                if enum_def.permissible_values:
+                    output.extend(self._generate_enum(enum_name, enum_def))
+                    output.append('')
+                    generated_enums.append(self._get_class_name(enum_name))
+        # Add __all__ export for generated enums only
+        if generated_enums:
+            output.append('__all__ = [')
+            for class_name in generated_enums:
+                output.append(f'    "{class_name}",')
+            output.append(']')
+        else:
+            # No enums generated for this module
+            return None
+        return '\n'.join(output)
+    def _generate_enum(self, enum_name: str, enum_def: EnumDefinition) -> List[str]:
+        """Generate a single enum class."""
+        output = []
+        class_name = self._get_class_name(enum_name)
+        # Check if this is a dynamic enum
+        is_dynamic = hasattr(enum_def, 'reachable_from') and enum_def.reachable_from
+        if is_dynamic:
+            # For dynamic enums, generate a placeholder comment
+            output.append(f'# {class_name} is a dynamic enum')
+            output.append(f'# It would be populated from: {enum_def.reachable_from}')
+            output.append(f'# Skipping generation for dynamic enum')
+            return []  # Don't generate this enum
+        output.append(f'class {class_name}(RichEnum):')
+        # Add docstring
+        if enum_def.description:
+            output.append('    """')
+            # Handle multi-line descriptions
+            for line in enum_def.description.split('\n'):
+                output.append(f'    {line}')
+            output.append('    """')
+        # Generate enum members
+        output.append('    # Enum members')
+        if not enum_def.permissible_values:
+            # Empty enum - add pass statement
+            output.append('    pass')
+            output.append('')
+            return output
+        for pv_name, pv in enum_def.permissible_values.items():
+            member_name = self._get_enum_member_name(pv_name)
+            member_value = pv.text if pv.text is not None else pv_name
+            output.append(f'    {member_name} = "{member_value}"')
+        output.append('')
+        # Generate metadata
+        output.append(f'# Set metadata after class creation')
+        output.append(f'{class_name}._metadata = {{')
+        for pv_name, pv in enum_def.permissible_values.items():
+            member_name = self._get_enum_member_name(pv_name)
+            metadata = self._build_metadata(pv)
+            if metadata:
+                output.append(f'    "{member_name}": {repr(metadata)},')
+        output.append('}')
+        return output
+    def _build_metadata(self, pv: PermissibleValue) -> Dict[str, Any]:
+        """Build metadata dictionary for a permissible value."""
+        metadata = {}
+        if pv.description:
+            metadata['description'] = pv.description
+        if pv.meaning:
+            metadata['meaning'] = pv.meaning
+        if pv.annotations:
+            annotations_dict = {}
+            for key, annotation in pv.annotations.items():
+                if hasattr(annotation, 'value'):
+                    annotations_dict[key] = annotation.value
+                else:
+                    annotations_dict[key] = str(annotation)
+            metadata['annotations'] = annotations_dict
+        if hasattr(pv, 'aliases') and pv.aliases:
+            metadata['aliases'] = list(pv.aliases)
+        if hasattr(pv, 'deprecated') and pv.deprecated:
+            metadata['deprecated'] = pv.deprecated
+        return metadata
+    def _get_enum_names(self, schema_view: SchemaView) -> List[str]:
+        """Get list of enum class names from schema (excluding dynamic enums)."""
+        result = []
+        for enum_name in schema_view.all_enums():
+            enum_def = schema_view.get_enum(enum_name)
+            # Skip dynamic enums
+            if enum_def and not (hasattr(enum_def, 'reachable_from') and enum_def.reachable_from):
+                result.append(self._get_class_name(enum_name))
+        return result
+    def _ensure_package_structure(self, directory: Path):
+        """Ensure __init__.py files exist for package structure."""
+        current = directory
+        while current != self.output_dir and current != current.parent:
+            init_file = current / '__init__.py'
+            if not init_file.exists():
+                init_file.write_text('"""Auto-generated package."""\n')
+            current = current.parent
+    def generate_init_file(self):
+        """Generate top-level __init__.py for convenient imports."""
+        output = []
+        output.append('"""')
+        output.append('Common Value Sets - Rich Enum Collection')
+        output.append('')
+        output.append('This module provides convenient access to all enum definitions.')
+        output.append('Each enum includes rich metadata (descriptions, ontology mappings, annotations)')
+        output.append('while maintaining full Python enum compatibility.')
+        output.append('')
+        output.append('Usage:')
+        output.append('    from valuesets.enums import Presenceenum, AnatomicalSide')
+        output.append('    ')
+        output.append('    # Or import everything')
+        output.append('    from valuesets.enums import *')
+        output.append('"""')
+        output.append('')
+        output.append('# flake8: noqa')
+        output.append('')
+        # Collect all enums from all modules
+        all_enums = []
+        imports_by_module = {}
+        for module_key, info in sorted(self.generated_modules.items()):
+            if info['enums']:
+                module_path = module_key.replace('/', '.')
+                imports_by_module[module_path] = info['enums']
+                all_enums.extend(info['enums'])
+        # Generate imports grouped by domain
+        domains = {}
+        for module_path, enums in imports_by_module.items():
+            parts = module_path.split('.')
+            domain = parts[0] if len(parts) > 1 else 'core'
+            if domain not in domains:
+                domains[domain] = {}
+            domains[domain][module_path] = enums
+        # Write imports organized by domain
+        for domain in sorted(domains.keys()):
+            output.append(f'# {domain.title()} domain')
+            for module_path, enums in sorted(domains[domain].items()):
+                if enums:
+                    enum_list = ', '.join(enums)
+                    output.append(f'from .{module_path} import {enum_list}')
+            output.append('')
+        # Generate __all__
+        output.append('__all__ = [')
+        for enum in sorted(set(all_enums)):
+            output.append(f'    "{enum}",')
+        output.append(']')
+        # Write the init file
+        init_path = self.output_dir / '__init__.py'
+        init_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(init_path, 'w') as f:
+            f.write('\n'.join(output))
+        logger.info(f"Generated {init_path} with {len(all_enums)} enum exports")
+    def _get_class_name(self, name: str) -> str:
+        """Convert LinkML name to Python class name with proper CamelCase."""
+        # Handle already CamelCase names
+        if not any(c in name for c in ['_', '-', ' ']):
+            # If it's already in some form of CamelCase, preserve it
+            # Just ensure first letter is capitalized
+            return name[0].upper() + name[1:] if name else ''
+        # Convert snake_case, kebab-case, or space-separated to CamelCase
+        words = re.split(r'[_\s-]+', name)
+        # Properly capitalize each word, preserving existing caps when appropriate
+        result = []
+        for word in words:
+            if word:
+                if word.isupper():
+                    # If the word is all caps (like "ISO"), keep it that way
+                    result.append(word)
+                elif word[0].isupper() and len(word) > 1:
+                    # If already starts with capital, preserve the casing
+                    result.append(word)
+                else:
+                    # Otherwise, capitalize first letter
+                    result.append(word[0].upper() + word[1:].lower())
+        return ''.join(result)
+    def _get_enum_member_name(self, name: str) -> str:
+        """Convert permissible value name to Python enum member name."""
+        member_name = re.sub(r'[^a-zA-Z0-9_]', '_', name).upper()
+        if member_name and member_name[0].isdigit():
+            member_name = f'_{member_name}'
+        return member_name
+def main():
+    """CLI entry point."""
+    import argparse
+    parser = argparse.ArgumentParser(description='Generate modular rich enums from LinkML schemas')
+    parser.add_argument('schema_dir', help='Directory containing LinkML schema files')
+    parser.add_argument('-o', '--output-dir', required=True, help='Output directory for Python modules')
+    args = parser.parse_args()
+    generator = ModularRichEnumGenerator(args.schema_dir, args.output_dir)
+    generator.generate_all()
+if __name__ == '__main__':
+    main()

valuesets/generators/prefix_standardizer.py ADDED Viewed

@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+"""
+Standardize prefixes across all LinkML schemas to use consistent valuesets prefix.
+Sets:
+- default_prefix: valuesets
+- valuesets: https://w3id.org/valuesets/
+Updates schemas in place with consistent prefixing.
+"""
+import yaml
+from pathlib import Path
+from typing import Dict, Any
+import click
+from collections import OrderedDict
+class PrefixStandardizer:
+    """Utility to standardize prefixes across LinkML schemas."""
+    def __init__(self, target_prefix: str = "valuesets",
+                 target_uri: str = "https://w3id.org/valuesets/"):
+        self.target_prefix = target_prefix
+        self.target_uri = target_uri
+    def standardize_schema_prefixes(self, schema_path: Path, dry_run: bool = False) -> Dict[str, Any]:
+        """
+        Standardize prefixes in a schema file.
+        Args:
+            schema_path: Path to the schema file
+            dry_run: If True, only show what would be changed
+        Returns:
+            Summary of changes made
+        """
+        # Load schema
+        with open(schema_path, 'r') as f:
+            schema_data = yaml.safe_load(f)
+        changes = []
+        # Ensure prefixes section exists
+        if 'prefixes' not in schema_data:
+            schema_data['prefixes'] = {}
+        # Update/add the target prefix
+        current_valuesets_uri = schema_data['prefixes'].get(self.target_prefix)
+        if current_valuesets_uri != self.target_uri:
+            old_uri = current_valuesets_uri or "not defined"
+            schema_data['prefixes'][self.target_prefix] = self.target_uri
+            changes.append(f"Updated {self.target_prefix} prefix: {old_uri} → {self.target_uri}")
+        # Update default_prefix
+        current_default = schema_data.get('default_prefix')
+        if current_default != self.target_prefix:
+            old_default = current_default or "not defined"
+            schema_data['default_prefix'] = self.target_prefix
+            changes.append(f"Updated default_prefix: {old_default} → {self.target_prefix}")
+        # Write changes if not dry run
+        if not dry_run and changes:
+            self.write_schema(schema_data, schema_path)
+        return {
+            'file': schema_path.name,
+            'changes': changes,
+            'modified': len(changes) > 0
+        }
+    def write_schema(self, schema_data: Dict[str, Any], output_path: Path):
+        """Write schema preserving key order and formatting."""
+        # Define preferred key order
+        key_order = [
+            'name', 'title', 'description', 'id', 'version', 'status',
+            'imports', 'prefixes', 'default_prefix', 'default_curi_maps',
+            'slots', 'classes', 'enums', 'types', 'subsets', 'license', 'see_also'
+        ]
+        # Create ordered dict
+        ordered_data = OrderedDict()
+        # Add keys in preferred order
+        for key in key_order:
+            if key in schema_data:
+                ordered_data[key] = schema_data[key]
+        # Add any remaining keys
+        for key in schema_data:
+            if key not in ordered_data:
+                ordered_data[key] = schema_data[key]
+        # Write with nice formatting
+        with open(output_path, 'w') as f:
+            yaml.dump(dict(ordered_data), f,
+                     default_flow_style=False,
+                     sort_keys=False,
+                     allow_unicode=True,
+                     width=120)
+    def standardize_directory(self, schema_dir: Path, dry_run: bool = False) -> None:
+        """
+        Standardize prefixes for all schemas in a directory.
+        Args:
+            schema_dir: Directory containing LinkML schema files
+            dry_run: If True, only show what would be changed
+        """
+        # Find all YAML files
+        yaml_files = list(schema_dir.rglob("*.yaml"))
+        print(f"{'DRY RUN - ' if dry_run else ''}Standardizing prefixes in {len(yaml_files)} files")
+        print(f"Target: {self.target_prefix}: {self.target_uri}")
+        print()
+        total_modified = 0
+        total_changes = 0
+        for yaml_file in yaml_files:
+            # Skip certain files
+            if yaml_file.name in ['linkml-meta.yaml', 'meta.yaml']:
+                continue
+            try:
+                result = self.standardize_schema_prefixes(yaml_file, dry_run=dry_run)
+                if result['changes']:
+                    total_modified += 1
+                    total_changes += len(result['changes'])
+                    print(f"{'[DRY RUN] ' if dry_run else ''}{result['file']}:")
+                    for change in result['changes']:
+                        print(f"  - {change}")
+                    print()
+            except Exception as e:
+                print(f"Error processing {yaml_file}: {e}")
+        print(f"{'='*50}")
+        print(f"{'DRY RUN - ' if dry_run else ''}Summary:")
+        print(f"  Files modified: {total_modified}")
+        print(f"  Total changes: {total_changes}")
+@click.command()
+@click.argument('schema_path', type=click.Path(exists=True, path_type=Path))
+@click.option('--dry-run', '-n', is_flag=True,
+              help='Preview changes without modifying files')
+@click.option('--prefix', '-p', default='valuesets',
+              help='Target prefix name (default: valuesets)')
+@click.option('--uri', '-u', default='https://w3id.org/valuesets/',
+              help='Target prefix URI (default: https://w3id.org/valuesets/)')
+@click.option('--single-file', '-s', is_flag=True,
+              help='Process single file instead of directory')
+def main(schema_path: Path, dry_run: bool, prefix: str, uri: str, single_file: bool):
+    """
+    Standardize prefixes across LinkML schemas.
+    SCHEMA_PATH: Path to schema file or directory
+    Examples:
+    \b
+    # Preview changes for all schemas
+    prefix_standardizer.py src/valuesets/schema --dry-run
+    \b
+    # Standardize all schemas
+    prefix_standardizer.py src/valuesets/schema
+    \b
+    # Single file
+    prefix_standardizer.py schema.yaml --single-file
+    \b
+    # Custom prefix
+    prefix_standardizer.py src/valuesets/schema --prefix cval --uri https://w3id.org/linkml-common/
+    """
+    standardizer = PrefixStandardizer(target_prefix=prefix, target_uri=uri)
+    if single_file or schema_path.is_file():
+        # Process single file
+        result = standardizer.standardize_schema_prefixes(schema_path, dry_run=dry_run)
+        print(f"{'DRY RUN - ' if dry_run else ''}Results for {result['file']}:")
+        if result['changes']:
+            for change in result['changes']:
+                print(f"  - {change}")
+        else:
+            print("  No changes needed")
+    else:
+        # Process directory
+        standardizer.standardize_directory(schema_path, dry_run=dry_run)
+if __name__ == '__main__':
+    main()

valuesets/generators/rich_enum.py ADDED Viewed

@@ -0,0 +1,127 @@
+"""
+Rich Enum Implementation with Metadata Support
+This module provides enums that maintain full compatibility with standard Python
+enums while adding metadata support using __init_subclass__.
+"""
+from enum import Enum
+from typing import Dict, Any, Optional, Type
+class RichEnum(str, Enum):
+    """
+    Base class for enums with metadata support.
+    This class creates enums that:
+    1. Are fully compatible with standard Python enums
+    2. Support string values (inherit from str)
+    3. Have metadata access methods
+    4. Can be looked up by ontology meaning
+    The metadata should be set AFTER class creation to avoid it becoming
+    an enum member.
+    Usage:
+        class MyEnum(RichEnum):
+            VALUE1 = "value1"
+            VALUE2 = "value2"
+        # Set metadata after class creation
+        MyEnum._metadata = {
+            "VALUE1": {
+                "description": "First value",
+                "meaning": "ONTO:0000001",
+                "annotations": {"category": "group1"}
+            },
+            "VALUE2": {
+                "description": "Second value",
+                "meaning": "ONTO:0000002"
+            }
+        }
+    """
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        # Add metadata access methods to the class
+        def get_description(self) -> Optional[str]:
+            """Get the description for this enum member."""
+            metadata = self.__class__.__dict__.get('_metadata', {})
+            member_metadata = metadata.get(self.name, {})
+            return member_metadata.get("description")
+        def get_meaning(self) -> Optional[str]:
+            """Get the ontology meaning/mapping for this enum member."""
+            metadata = self.__class__.__dict__.get('_metadata', {})
+            member_metadata = metadata.get(self.name, {})
+            return member_metadata.get("meaning")
+        def get_annotations(self) -> Dict[str, Any]:
+            """Get the annotations dictionary for this enum member."""
+            metadata = self.__class__.__dict__.get('_metadata', {})
+            member_metadata = metadata.get(self.name, {})
+            return member_metadata.get("annotations", {})
+        def get_metadata(self) -> Dict[str, Any]:
+            """Get all metadata for this enum member."""
+            base = {"name": self.name, "value": self.value}
+            metadata = self.__class__.__dict__.get('_metadata', {})
+            base.update(metadata.get(self.name, {}))
+            return base
+        @classmethod
+        def from_meaning(cls_inner, meaning: str) -> Optional['RichEnum']:
+            """
+            Find an enum member by its ontology meaning.
+            Args:
+                meaning: The ontology term (e.g., "BSPO:0000000")
+            Returns:
+                The enum member with the given meaning, or None if not found
+            """
+            for member in cls_inner:
+                if member.get_meaning() == meaning:
+                    return member
+            return None
+        @classmethod
+        def get_all_meanings(cls_inner) -> Dict[str, str]:
+            """Get a mapping of all member names to their meanings."""
+            meanings = {}
+            for member in cls_inner:
+                meaning = member.get_meaning()
+                if meaning:
+                    meanings[member.name] = meaning
+            return meanings
+        @classmethod
+        def get_all_descriptions(cls_inner) -> Dict[str, str]:
+            """Get a mapping of all member names to their descriptions."""
+            descriptions = {}
+            for member in cls_inner:
+                description = member.get_description()
+                if description:
+                    descriptions[member.name] = description
+            return descriptions
+        @classmethod
+        def list_metadata(cls_inner) -> Dict[str, Dict[str, Any]]:
+            """Get all metadata for all members."""
+            return {member.name: member.get_metadata() for member in cls_inner}
+        # Set methods on the class
+        setattr(cls, 'get_description', get_description)
+        setattr(cls, 'get_meaning', get_meaning)
+        setattr(cls, 'get_annotations', get_annotations)
+        setattr(cls, 'get_metadata', get_metadata)
+        setattr(cls, 'from_meaning', from_meaning)
+        setattr(cls, 'get_all_meanings', get_all_meanings)
+        setattr(cls, 'get_all_descriptions', get_all_descriptions)
+        setattr(cls, 'list_metadata', list_metadata)
+# Type alias for clarity
+RichEnumType = Type[RichEnum]
+RichEnumMeta = None  # For backwards compatibility