valuesets 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valuesets might be problematic. Click here for more details.
- valuesets/__init__.py +7 -0
- valuesets/_version.py +8 -0
- valuesets/datamodel/valuesets.py +13796 -0
- valuesets/datamodel/valuesets_dataclass.py +24503 -0
- valuesets/datamodel/valuesets_pydantic.py +13796 -0
- valuesets/enums/__init__.py +590 -0
- valuesets/enums/academic/__init__.py +1 -0
- valuesets/enums/academic/research.py +559 -0
- valuesets/enums/analytical_chemistry/__init__.py +1 -0
- valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
- valuesets/enums/bio/__init__.py +1 -0
- valuesets/enums/bio/biological_colors.py +238 -0
- valuesets/enums/bio/cell_cycle.py +180 -0
- valuesets/enums/bio/currency_chemicals.py +52 -0
- valuesets/enums/bio/developmental_stages.py +103 -0
- valuesets/enums/bio/genome_features.py +182 -0
- valuesets/enums/bio/genomics.py +91 -0
- valuesets/enums/bio/go_aspect.py +32 -0
- valuesets/enums/bio/go_causality.py +58 -0
- valuesets/enums/bio/go_evidence.py +129 -0
- valuesets/enums/bio/human_developmental_stages.py +62 -0
- valuesets/enums/bio/insdc_geographic_locations.py +591 -0
- valuesets/enums/bio/insdc_missing_values.py +49 -0
- valuesets/enums/bio/lipid_categories.py +67 -0
- valuesets/enums/bio/mouse_developmental_stages.py +62 -0
- valuesets/enums/bio/plant_biology.py +86 -0
- valuesets/enums/bio/plant_developmental_stages.py +54 -0
- valuesets/enums/bio/plant_sex.py +81 -0
- valuesets/enums/bio/protein_evidence.py +61 -0
- valuesets/enums/bio/proteomics_standards.py +123 -0
- valuesets/enums/bio/psi_mi.py +306 -0
- valuesets/enums/bio/relationship_to_oxygen.py +37 -0
- valuesets/enums/bio/sequence_alphabets.py +449 -0
- valuesets/enums/bio/sequence_chemistry.py +357 -0
- valuesets/enums/bio/sequencing_platforms.py +302 -0
- valuesets/enums/bio/structural_biology.py +320 -0
- valuesets/enums/bio/taxonomy.py +238 -0
- valuesets/enums/bio/trophic_levels.py +85 -0
- valuesets/enums/bio/uniprot_species.py +344 -0
- valuesets/enums/bio/viral_genome_types.py +47 -0
- valuesets/enums/bioprocessing/__init__.py +1 -0
- valuesets/enums/bioprocessing/scale_up.py +249 -0
- valuesets/enums/business/__init__.py +1 -0
- valuesets/enums/business/human_resources.py +275 -0
- valuesets/enums/business/industry_classifications.py +181 -0
- valuesets/enums/business/management_operations.py +228 -0
- valuesets/enums/business/organizational_structures.py +236 -0
- valuesets/enums/business/quality_management.py +181 -0
- valuesets/enums/business/supply_chain.py +232 -0
- valuesets/enums/chemistry/__init__.py +1 -0
- valuesets/enums/chemistry/chemical_entities.py +315 -0
- valuesets/enums/chemistry/reaction_directionality.py +65 -0
- valuesets/enums/chemistry/reactions.py +256 -0
- valuesets/enums/clinical/__init__.py +1 -0
- valuesets/enums/clinical/nih_demographics.py +177 -0
- valuesets/enums/clinical/phenopackets.py +254 -0
- valuesets/enums/common_value_sets.py +8791 -0
- valuesets/enums/computing/__init__.py +1 -0
- valuesets/enums/computing/file_formats.py +294 -0
- valuesets/enums/computing/maturity_levels.py +196 -0
- valuesets/enums/computing/mime_types.py +227 -0
- valuesets/enums/confidence_levels.py +168 -0
- valuesets/enums/contributor.py +30 -0
- valuesets/enums/core.py +42 -0
- valuesets/enums/data/__init__.py +1 -0
- valuesets/enums/data/data_absent_reason.py +53 -0
- valuesets/enums/data_science/__init__.py +1 -0
- valuesets/enums/data_science/binary_classification.py +87 -0
- valuesets/enums/data_science/emotion_classification.py +66 -0
- valuesets/enums/data_science/priority_severity.py +73 -0
- valuesets/enums/data_science/quality_control.py +46 -0
- valuesets/enums/data_science/sentiment_analysis.py +50 -0
- valuesets/enums/data_science/text_classification.py +97 -0
- valuesets/enums/demographics.py +206 -0
- valuesets/enums/ecological_interactions.py +151 -0
- valuesets/enums/energy/__init__.py +1 -0
- valuesets/enums/energy/energy.py +343 -0
- valuesets/enums/energy/fossil_fuels.py +29 -0
- valuesets/enums/energy/nuclear/__init__.py +1 -0
- valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
- valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
- valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
- valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
- valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
- valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
- valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
- valuesets/enums/energy/nuclear/reactor_types.py +163 -0
- valuesets/enums/environmental_health/__init__.py +1 -0
- valuesets/enums/environmental_health/exposures.py +265 -0
- valuesets/enums/geography/__init__.py +1 -0
- valuesets/enums/geography/geographic_codes.py +741 -0
- valuesets/enums/health/__init__.py +12 -0
- valuesets/enums/health/vaccination.py +98 -0
- valuesets/enums/health.py +36 -0
- valuesets/enums/health_base.py +36 -0
- valuesets/enums/healthcare.py +45 -0
- valuesets/enums/industry/__init__.py +1 -0
- valuesets/enums/industry/extractive_industry.py +94 -0
- valuesets/enums/industry/mining.py +388 -0
- valuesets/enums/industry/safety_colors.py +201 -0
- valuesets/enums/investigation.py +27 -0
- valuesets/enums/materials_science/__init__.py +1 -0
- valuesets/enums/materials_science/characterization_methods.py +112 -0
- valuesets/enums/materials_science/crystal_structures.py +76 -0
- valuesets/enums/materials_science/material_properties.py +119 -0
- valuesets/enums/materials_science/material_types.py +104 -0
- valuesets/enums/materials_science/pigments_dyes.py +198 -0
- valuesets/enums/materials_science/synthesis_methods.py +109 -0
- valuesets/enums/medical/__init__.py +1 -0
- valuesets/enums/medical/clinical.py +277 -0
- valuesets/enums/medical/neuroimaging.py +119 -0
- valuesets/enums/mining_processing.py +302 -0
- valuesets/enums/physics/__init__.py +1 -0
- valuesets/enums/physics/states_of_matter.py +46 -0
- valuesets/enums/social/__init__.py +1 -0
- valuesets/enums/social/person_status.py +29 -0
- valuesets/enums/spatial/__init__.py +1 -0
- valuesets/enums/spatial/spatial_qualifiers.py +246 -0
- valuesets/enums/statistics/__init__.py +5 -0
- valuesets/enums/statistics/prediction_outcomes.py +31 -0
- valuesets/enums/statistics.py +31 -0
- valuesets/enums/time/__init__.py +1 -0
- valuesets/enums/time/temporal.py +254 -0
- valuesets/enums/units/__init__.py +1 -0
- valuesets/enums/units/measurements.py +310 -0
- valuesets/enums/visual/__init__.py +1 -0
- valuesets/enums/visual/colors.py +376 -0
- valuesets/generators/__init__.py +19 -0
- valuesets/generators/auto_slot_injector.py +280 -0
- valuesets/generators/enhanced_pydantic_generator.py +100 -0
- valuesets/generators/enum_slot_generator.py +201 -0
- valuesets/generators/modular_rich_generator.py +353 -0
- valuesets/generators/prefix_standardizer.py +198 -0
- valuesets/generators/rich_enum.py +127 -0
- valuesets/generators/rich_pydantic_generator.py +310 -0
- valuesets/generators/smart_slot_syncer.py +428 -0
- valuesets/generators/sssom_generator.py +394 -0
- valuesets/merged/merged_hierarchy.yaml +21649 -0
- valuesets/schema/README.md +3 -0
- valuesets/schema/academic/research.yaml +911 -0
- valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
- valuesets/schema/bio/bio_entities.yaml +364 -0
- valuesets/schema/bio/biological_colors.yaml +434 -0
- valuesets/schema/bio/cell_cycle.yaml +309 -0
- valuesets/schema/bio/currency_chemicals.yaml +70 -0
- valuesets/schema/bio/developmental_stages.yaml +226 -0
- valuesets/schema/bio/genome_features.yaml +342 -0
- valuesets/schema/bio/genomics.yaml +101 -0
- valuesets/schema/bio/go_aspect.yaml +39 -0
- valuesets/schema/bio/go_causality.yaml +119 -0
- valuesets/schema/bio/go_evidence.yaml +215 -0
- valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
- valuesets/schema/bio/insdc_missing_values.yaml +85 -0
- valuesets/schema/bio/lipid_categories.yaml +72 -0
- valuesets/schema/bio/plant_biology.yaml +125 -0
- valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
- valuesets/schema/bio/plant_sex.yaml +108 -0
- valuesets/schema/bio/protein_evidence.yaml +63 -0
- valuesets/schema/bio/proteomics_standards.yaml +116 -0
- valuesets/schema/bio/psi_mi.yaml +400 -0
- valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
- valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
- valuesets/schema/bio/sequence_chemistry.yaml +477 -0
- valuesets/schema/bio/sequencing_platforms.yaml +515 -0
- valuesets/schema/bio/structural_biology.yaml +428 -0
- valuesets/schema/bio/taxonomy.yaml +453 -0
- valuesets/schema/bio/trophic_levels.yaml +118 -0
- valuesets/schema/bio/uniprot_species.yaml +1209 -0
- valuesets/schema/bio/viral_genome_types.yaml +99 -0
- valuesets/schema/bioprocessing/scale_up.yaml +458 -0
- valuesets/schema/business/human_resources.yaml +752 -0
- valuesets/schema/business/industry_classifications.yaml +448 -0
- valuesets/schema/business/management_operations.yaml +602 -0
- valuesets/schema/business/organizational_structures.yaml +645 -0
- valuesets/schema/business/quality_management.yaml +502 -0
- valuesets/schema/business/supply_chain.yaml +688 -0
- valuesets/schema/chemistry/chemical_entities.yaml +639 -0
- valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
- valuesets/schema/chemistry/reactions.yaml +442 -0
- valuesets/schema/clinical/nih_demographics.yaml +285 -0
- valuesets/schema/clinical/phenopackets.yaml +429 -0
- valuesets/schema/computing/file_formats.yaml +631 -0
- valuesets/schema/computing/maturity_levels.yaml +229 -0
- valuesets/schema/computing/mime_types.yaml +266 -0
- valuesets/schema/confidence_levels.yaml +206 -0
- valuesets/schema/contributor.yaml +30 -0
- valuesets/schema/core.yaml +55 -0
- valuesets/schema/data/data_absent_reason.yaml +82 -0
- valuesets/schema/data_science/binary_classification.yaml +125 -0
- valuesets/schema/data_science/emotion_classification.yaml +109 -0
- valuesets/schema/data_science/priority_severity.yaml +122 -0
- valuesets/schema/data_science/quality_control.yaml +68 -0
- valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
- valuesets/schema/data_science/text_classification.yaml +135 -0
- valuesets/schema/demographics.yaml +238 -0
- valuesets/schema/ecological_interactions.yaml +298 -0
- valuesets/schema/energy/energy.yaml +595 -0
- valuesets/schema/energy/fossil_fuels.yaml +28 -0
- valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
- valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
- valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
- valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
- valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
- valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
- valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
- valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
- valuesets/schema/environmental_health/exposures.yaml +355 -0
- valuesets/schema/generated_slots.yaml +1828 -0
- valuesets/schema/geography/geographic_codes.yaml +1018 -0
- valuesets/schema/health/vaccination.yaml +102 -0
- valuesets/schema/health.yaml +38 -0
- valuesets/schema/healthcare.yaml +53 -0
- valuesets/schema/industry/extractive_industry.yaml +89 -0
- valuesets/schema/industry/mining.yaml +888 -0
- valuesets/schema/industry/safety_colors.yaml +375 -0
- valuesets/schema/investigation.yaml +64 -0
- valuesets/schema/materials_science/characterization_methods.yaml +193 -0
- valuesets/schema/materials_science/crystal_structures.yaml +138 -0
- valuesets/schema/materials_science/material_properties.yaml +135 -0
- valuesets/schema/materials_science/material_types.yaml +151 -0
- valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
- valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
- valuesets/schema/medical/clinical.yaml +610 -0
- valuesets/schema/medical/neuroimaging.yaml +325 -0
- valuesets/schema/mining_processing.yaml +295 -0
- valuesets/schema/physics/states_of_matter.yaml +46 -0
- valuesets/schema/slot_mixins.yaml +143 -0
- valuesets/schema/social/person_status.yaml +28 -0
- valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
- valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
- valuesets/schema/statistics.yaml +34 -0
- valuesets/schema/time/temporal.yaml +435 -0
- valuesets/schema/types.yaml +15 -0
- valuesets/schema/units/measurements.yaml +675 -0
- valuesets/schema/valuesets.yaml +100 -0
- valuesets/schema/visual/colors.yaml +778 -0
- valuesets/utils/__init__.py +6 -0
- valuesets/utils/comparison.py +102 -0
- valuesets/utils/expand_dynamic_enums.py +414 -0
- valuesets/utils/mapping_utils.py +236 -0
- valuesets/validators/__init__.py +11 -0
- valuesets/validators/enum_evaluator.py +669 -0
- valuesets/validators/oak_config.yaml +70 -0
- valuesets/validators/validate_with_ols.py +241 -0
- valuesets-0.3.1.dist-info/METADATA +395 -0
- valuesets-0.3.1.dist-info/RECORD +248 -0
- valuesets-0.3.1.dist-info/WHEEL +4 -0
- valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,449 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Biological Sequence Alphabet Value Sets
|
|
3
|
+
|
|
4
|
+
Alphabets for biological sequences including DNA, RNA, and protein sequences. Includes standard alphabets and extended versions with ambiguity codes following IUPAC nomenclature and common bioinformatics standards.
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
Generated from: bio/sequence_alphabets.yaml
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import Dict, Any, Optional
|
|
13
|
+
from valuesets.generators.rich_enum import RichEnum
|
|
14
|
+
|
|
15
|
+
class DNABaseEnum(RichEnum):
|
|
16
|
+
"""
|
|
17
|
+
Standard DNA nucleotide bases (canonical)
|
|
18
|
+
"""
|
|
19
|
+
# Enum members
|
|
20
|
+
A = "A"
|
|
21
|
+
C = "C"
|
|
22
|
+
G = "G"
|
|
23
|
+
T = "T"
|
|
24
|
+
|
|
25
|
+
# Set metadata after class creation
|
|
26
|
+
DNABaseEnum._metadata = {
|
|
27
|
+
"A": {'meaning': 'CHEBI:16708', 'annotations': {'complement': 'T', 'purine': 'true', 'chemical_formula': 'C5H5N5'}, 'aliases': ['adenine']},
|
|
28
|
+
"C": {'meaning': 'CHEBI:16040', 'annotations': {'complement': 'G', 'pyrimidine': 'true', 'chemical_formula': 'C4H5N3O'}, 'aliases': ['cytosine']},
|
|
29
|
+
"G": {'meaning': 'CHEBI:16235', 'annotations': {'complement': 'C', 'purine': 'true', 'chemical_formula': 'C5H5N5O'}, 'aliases': ['guanine']},
|
|
30
|
+
"T": {'meaning': 'CHEBI:17821', 'annotations': {'complement': 'A', 'pyrimidine': 'true', 'chemical_formula': 'C5H6N2O2'}, 'aliases': ['thymine']},
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
class DNABaseExtendedEnum(RichEnum):
|
|
34
|
+
"""
|
|
35
|
+
Extended DNA alphabet with IUPAC ambiguity codes
|
|
36
|
+
"""
|
|
37
|
+
# Enum members
|
|
38
|
+
A = "A"
|
|
39
|
+
C = "C"
|
|
40
|
+
G = "G"
|
|
41
|
+
T = "T"
|
|
42
|
+
R = "R"
|
|
43
|
+
Y = "Y"
|
|
44
|
+
S = "S"
|
|
45
|
+
W = "W"
|
|
46
|
+
K = "K"
|
|
47
|
+
M = "M"
|
|
48
|
+
B = "B"
|
|
49
|
+
D = "D"
|
|
50
|
+
H = "H"
|
|
51
|
+
V = "V"
|
|
52
|
+
N = "N"
|
|
53
|
+
GAP = "GAP"
|
|
54
|
+
|
|
55
|
+
# Set metadata after class creation
|
|
56
|
+
DNABaseExtendedEnum._metadata = {
|
|
57
|
+
"A": {'meaning': 'CHEBI:16708', 'annotations': {'represents': 'A'}, 'aliases': ['adenine']},
|
|
58
|
+
"C": {'meaning': 'CHEBI:16040', 'annotations': {'represents': 'C'}, 'aliases': ['cytosine']},
|
|
59
|
+
"G": {'meaning': 'CHEBI:16235', 'annotations': {'represents': 'G'}, 'aliases': ['guanine']},
|
|
60
|
+
"T": {'meaning': 'CHEBI:17821', 'annotations': {'represents': 'T'}, 'aliases': ['thymine']},
|
|
61
|
+
"R": {'annotations': {'represents': 'A,G', 'iupac': 'true'}},
|
|
62
|
+
"Y": {'annotations': {'represents': 'C,T', 'iupac': 'true'}},
|
|
63
|
+
"S": {'annotations': {'represents': 'G,C', 'iupac': 'true', 'bond_strength': 'strong (3 H-bonds)'}},
|
|
64
|
+
"W": {'annotations': {'represents': 'A,T', 'iupac': 'true', 'bond_strength': 'weak (2 H-bonds)'}},
|
|
65
|
+
"K": {'annotations': {'represents': 'G,T', 'iupac': 'true'}},
|
|
66
|
+
"M": {'annotations': {'represents': 'A,C', 'iupac': 'true'}},
|
|
67
|
+
"B": {'annotations': {'represents': 'C,G,T', 'iupac': 'true'}},
|
|
68
|
+
"D": {'annotations': {'represents': 'A,G,T', 'iupac': 'true'}},
|
|
69
|
+
"H": {'annotations': {'represents': 'A,C,T', 'iupac': 'true'}},
|
|
70
|
+
"V": {'annotations': {'represents': 'A,C,G', 'iupac': 'true'}},
|
|
71
|
+
"N": {'annotations': {'represents': 'A,C,G,T', 'iupac': 'true'}},
|
|
72
|
+
"GAP": {'annotations': {'symbol': '-', 'represents': 'gap'}},
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
class RNABaseEnum(RichEnum):
|
|
76
|
+
"""
|
|
77
|
+
Standard RNA nucleotide bases (canonical)
|
|
78
|
+
"""
|
|
79
|
+
# Enum members
|
|
80
|
+
A = "A"
|
|
81
|
+
C = "C"
|
|
82
|
+
G = "G"
|
|
83
|
+
U = "U"
|
|
84
|
+
|
|
85
|
+
# Set metadata after class creation
|
|
86
|
+
RNABaseEnum._metadata = {
|
|
87
|
+
"A": {'meaning': 'CHEBI:16708', 'annotations': {'complement': 'U', 'purine': 'true', 'chemical_formula': 'C5H5N5'}, 'aliases': ['adenine']},
|
|
88
|
+
"C": {'meaning': 'CHEBI:16040', 'annotations': {'complement': 'G', 'pyrimidine': 'true', 'chemical_formula': 'C4H5N3O'}, 'aliases': ['cytosine']},
|
|
89
|
+
"G": {'meaning': 'CHEBI:16235', 'annotations': {'complement': 'C', 'purine': 'true', 'chemical_formula': 'C5H5N5O'}, 'aliases': ['guanine']},
|
|
90
|
+
"U": {'meaning': 'CHEBI:17568', 'annotations': {'complement': 'A', 'pyrimidine': 'true', 'chemical_formula': 'C4H4N2O2'}, 'aliases': ['uracil']},
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
class RNABaseExtendedEnum(RichEnum):
|
|
94
|
+
"""
|
|
95
|
+
Extended RNA alphabet with IUPAC ambiguity codes
|
|
96
|
+
"""
|
|
97
|
+
# Enum members
|
|
98
|
+
A = "A"
|
|
99
|
+
C = "C"
|
|
100
|
+
G = "G"
|
|
101
|
+
U = "U"
|
|
102
|
+
R = "R"
|
|
103
|
+
Y = "Y"
|
|
104
|
+
S = "S"
|
|
105
|
+
W = "W"
|
|
106
|
+
K = "K"
|
|
107
|
+
M = "M"
|
|
108
|
+
B = "B"
|
|
109
|
+
D = "D"
|
|
110
|
+
H = "H"
|
|
111
|
+
V = "V"
|
|
112
|
+
N = "N"
|
|
113
|
+
GAP = "GAP"
|
|
114
|
+
|
|
115
|
+
# Set metadata after class creation
|
|
116
|
+
RNABaseExtendedEnum._metadata = {
|
|
117
|
+
"A": {'meaning': 'CHEBI:16708', 'annotations': {'represents': 'A'}, 'aliases': ['adenine']},
|
|
118
|
+
"C": {'meaning': 'CHEBI:16040', 'annotations': {'represents': 'C'}, 'aliases': ['cytosine']},
|
|
119
|
+
"G": {'meaning': 'CHEBI:16235', 'annotations': {'represents': 'G'}, 'aliases': ['guanine']},
|
|
120
|
+
"U": {'meaning': 'CHEBI:17568', 'annotations': {'represents': 'U'}, 'aliases': ['uracil']},
|
|
121
|
+
"R": {'annotations': {'represents': 'A,G', 'iupac': 'true'}},
|
|
122
|
+
"Y": {'annotations': {'represents': 'C,U', 'iupac': 'true'}},
|
|
123
|
+
"S": {'annotations': {'represents': 'G,C', 'iupac': 'true'}},
|
|
124
|
+
"W": {'annotations': {'represents': 'A,U', 'iupac': 'true'}},
|
|
125
|
+
"K": {'annotations': {'represents': 'G,U', 'iupac': 'true'}},
|
|
126
|
+
"M": {'annotations': {'represents': 'A,C', 'iupac': 'true'}},
|
|
127
|
+
"B": {'annotations': {'represents': 'C,G,U', 'iupac': 'true'}},
|
|
128
|
+
"D": {'annotations': {'represents': 'A,G,U', 'iupac': 'true'}},
|
|
129
|
+
"H": {'annotations': {'represents': 'A,C,U', 'iupac': 'true'}},
|
|
130
|
+
"V": {'annotations': {'represents': 'A,C,G', 'iupac': 'true'}},
|
|
131
|
+
"N": {'annotations': {'represents': 'A,C,G,U', 'iupac': 'true'}},
|
|
132
|
+
"GAP": {'annotations': {'symbol': '-', 'represents': 'gap'}},
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
class AminoAcidEnum(RichEnum):
|
|
136
|
+
"""
|
|
137
|
+
Standard amino acid single letter codes
|
|
138
|
+
"""
|
|
139
|
+
# Enum members
|
|
140
|
+
A = "A"
|
|
141
|
+
C = "C"
|
|
142
|
+
D = "D"
|
|
143
|
+
E = "E"
|
|
144
|
+
F = "F"
|
|
145
|
+
G = "G"
|
|
146
|
+
H = "H"
|
|
147
|
+
I = "I"
|
|
148
|
+
K = "K"
|
|
149
|
+
L = "L"
|
|
150
|
+
M = "M"
|
|
151
|
+
N = "N"
|
|
152
|
+
P = "P"
|
|
153
|
+
Q = "Q"
|
|
154
|
+
R = "R"
|
|
155
|
+
S = "S"
|
|
156
|
+
T = "T"
|
|
157
|
+
V = "V"
|
|
158
|
+
W = "W"
|
|
159
|
+
Y = "Y"
|
|
160
|
+
|
|
161
|
+
# Set metadata after class creation
|
|
162
|
+
AminoAcidEnum._metadata = {
|
|
163
|
+
"A": {'meaning': 'CHEBI:16449', 'annotations': {'three_letter': 'Ala', 'polarity': 'nonpolar', 'essential': 'false', 'molecular_weight': '89.09'}, 'aliases': ['alanine']},
|
|
164
|
+
"C": {'meaning': 'CHEBI:17561', 'annotations': {'three_letter': 'Cys', 'polarity': 'polar', 'essential': 'false', 'molecular_weight': '121.15', 'special': 'forms disulfide bonds'}, 'aliases': ['L-cysteine']},
|
|
165
|
+
"D": {'meaning': 'CHEBI:17053', 'annotations': {'three_letter': 'Asp', 'polarity': 'acidic', 'essential': 'false', 'molecular_weight': '133.10', 'charge': 'negative'}, 'aliases': ['L-aspartic acid']},
|
|
166
|
+
"E": {'meaning': 'CHEBI:16015', 'annotations': {'three_letter': 'Glu', 'polarity': 'acidic', 'essential': 'false', 'molecular_weight': '147.13', 'charge': 'negative'}, 'aliases': ['L-glutamic acid']},
|
|
167
|
+
"F": {'meaning': 'CHEBI:17295', 'annotations': {'three_letter': 'Phe', 'polarity': 'nonpolar', 'essential': 'true', 'molecular_weight': '165.19', 'aromatic': 'true'}, 'aliases': ['L-phenylalanine']},
|
|
168
|
+
"G": {'meaning': 'CHEBI:15428', 'annotations': {'three_letter': 'Gly', 'polarity': 'nonpolar', 'essential': 'false', 'molecular_weight': '75.07', 'special': 'smallest, most flexible'}, 'aliases': ['glycine']},
|
|
169
|
+
"H": {'meaning': 'CHEBI:15971', 'annotations': {'three_letter': 'His', 'polarity': 'basic', 'essential': 'true', 'molecular_weight': '155.16', 'charge': 'positive'}, 'aliases': ['L-histidine']},
|
|
170
|
+
"I": {'meaning': 'CHEBI:17191', 'annotations': {'three_letter': 'Ile', 'polarity': 'nonpolar', 'essential': 'true', 'molecular_weight': '131.17', 'branched': 'true'}, 'aliases': ['L-isoleucine']},
|
|
171
|
+
"K": {'meaning': 'CHEBI:18019', 'annotations': {'three_letter': 'Lys', 'polarity': 'basic', 'essential': 'true', 'molecular_weight': '146.19', 'charge': 'positive'}, 'aliases': ['L-lysine']},
|
|
172
|
+
"L": {'meaning': 'CHEBI:15603', 'annotations': {'three_letter': 'Leu', 'polarity': 'nonpolar', 'essential': 'true', 'molecular_weight': '131.17', 'branched': 'true'}, 'aliases': ['L-leucine']},
|
|
173
|
+
"M": {'meaning': 'CHEBI:16643', 'annotations': {'three_letter': 'Met', 'polarity': 'nonpolar', 'essential': 'true', 'molecular_weight': '149.21', 'special': 'start codon'}, 'aliases': ['L-methionine']},
|
|
174
|
+
"N": {'meaning': 'CHEBI:17196', 'annotations': {'three_letter': 'Asn', 'polarity': 'polar', 'essential': 'false', 'molecular_weight': '132.12'}, 'aliases': ['L-asparagine']},
|
|
175
|
+
"P": {'meaning': 'CHEBI:17203', 'annotations': {'three_letter': 'Pro', 'polarity': 'nonpolar', 'essential': 'false', 'molecular_weight': '115.13', 'special': 'helix breaker, rigid'}, 'aliases': ['L-proline']},
|
|
176
|
+
"Q": {'meaning': 'CHEBI:18050', 'annotations': {'three_letter': 'Gln', 'polarity': 'polar', 'essential': 'false', 'molecular_weight': '146.15'}, 'aliases': ['L-glutamine']},
|
|
177
|
+
"R": {'meaning': 'CHEBI:16467', 'annotations': {'three_letter': 'Arg', 'polarity': 'basic', 'essential': 'false', 'molecular_weight': '174.20', 'charge': 'positive'}, 'aliases': ['L-arginine']},
|
|
178
|
+
"S": {'meaning': 'CHEBI:17115', 'annotations': {'three_letter': 'Ser', 'polarity': 'polar', 'essential': 'false', 'molecular_weight': '105.09', 'hydroxyl': 'true'}, 'aliases': ['L-serine']},
|
|
179
|
+
"T": {'meaning': 'CHEBI:16857', 'annotations': {'three_letter': 'Thr', 'polarity': 'polar', 'essential': 'true', 'molecular_weight': '119.12', 'hydroxyl': 'true'}, 'aliases': ['L-threonine']},
|
|
180
|
+
"V": {'meaning': 'CHEBI:16414', 'annotations': {'three_letter': 'Val', 'polarity': 'nonpolar', 'essential': 'true', 'molecular_weight': '117.15', 'branched': 'true'}, 'aliases': ['L-valine']},
|
|
181
|
+
"W": {'meaning': 'CHEBI:16828', 'annotations': {'three_letter': 'Trp', 'polarity': 'nonpolar', 'essential': 'true', 'molecular_weight': '204.23', 'aromatic': 'true', 'special': 'largest'}, 'aliases': ['L-tryptophan']},
|
|
182
|
+
"Y": {'meaning': 'CHEBI:17895', 'annotations': {'three_letter': 'Tyr', 'polarity': 'polar', 'essential': 'false', 'molecular_weight': '181.19', 'aromatic': 'true', 'hydroxyl': 'true'}, 'aliases': ['L-tyrosine']},
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
class AminoAcidExtendedEnum(RichEnum):
|
|
186
|
+
"""
|
|
187
|
+
Extended amino acid alphabet with ambiguity codes and special characters
|
|
188
|
+
"""
|
|
189
|
+
# Enum members
|
|
190
|
+
A = "A"
|
|
191
|
+
C = "C"
|
|
192
|
+
D = "D"
|
|
193
|
+
E = "E"
|
|
194
|
+
F = "F"
|
|
195
|
+
G = "G"
|
|
196
|
+
H = "H"
|
|
197
|
+
I = "I"
|
|
198
|
+
K = "K"
|
|
199
|
+
L = "L"
|
|
200
|
+
M = "M"
|
|
201
|
+
N = "N"
|
|
202
|
+
P = "P"
|
|
203
|
+
Q = "Q"
|
|
204
|
+
R = "R"
|
|
205
|
+
S = "S"
|
|
206
|
+
T = "T"
|
|
207
|
+
V = "V"
|
|
208
|
+
W = "W"
|
|
209
|
+
Y = "Y"
|
|
210
|
+
B = "B"
|
|
211
|
+
Z = "Z"
|
|
212
|
+
J = "J"
|
|
213
|
+
X = "X"
|
|
214
|
+
STOP = "STOP"
|
|
215
|
+
GAP = "GAP"
|
|
216
|
+
U = "U"
|
|
217
|
+
O = "O"
|
|
218
|
+
|
|
219
|
+
# Set metadata after class creation
|
|
220
|
+
AminoAcidExtendedEnum._metadata = {
|
|
221
|
+
"A": {'meaning': 'CHEBI:16449', 'annotations': {'three_letter': 'Ala'}, 'aliases': ['alanine']},
|
|
222
|
+
"C": {'meaning': 'CHEBI:17561', 'annotations': {'three_letter': 'Cys'}, 'aliases': ['L-cysteine']},
|
|
223
|
+
"D": {'meaning': 'CHEBI:17053', 'annotations': {'three_letter': 'Asp'}, 'aliases': ['L-aspartic acid']},
|
|
224
|
+
"E": {'meaning': 'CHEBI:16015', 'annotations': {'three_letter': 'Glu'}, 'aliases': ['L-glutamic acid']},
|
|
225
|
+
"F": {'meaning': 'CHEBI:17295', 'annotations': {'three_letter': 'Phe'}, 'aliases': ['L-phenylalanine']},
|
|
226
|
+
"G": {'meaning': 'CHEBI:15428', 'annotations': {'three_letter': 'Gly'}, 'aliases': ['glycine']},
|
|
227
|
+
"H": {'meaning': 'CHEBI:15971', 'annotations': {'three_letter': 'His'}, 'aliases': ['L-histidine']},
|
|
228
|
+
"I": {'meaning': 'CHEBI:17191', 'annotations': {'three_letter': 'Ile'}, 'aliases': ['L-isoleucine']},
|
|
229
|
+
"K": {'meaning': 'CHEBI:18019', 'annotations': {'three_letter': 'Lys'}, 'aliases': ['L-lysine']},
|
|
230
|
+
"L": {'meaning': 'CHEBI:15603', 'annotations': {'three_letter': 'Leu'}, 'aliases': ['L-leucine']},
|
|
231
|
+
"M": {'meaning': 'CHEBI:16643', 'annotations': {'three_letter': 'Met'}, 'aliases': ['L-methionine']},
|
|
232
|
+
"N": {'meaning': 'CHEBI:17196', 'annotations': {'three_letter': 'Asn'}, 'aliases': ['L-asparagine']},
|
|
233
|
+
"P": {'meaning': 'CHEBI:17203', 'annotations': {'three_letter': 'Pro'}, 'aliases': ['L-proline']},
|
|
234
|
+
"Q": {'meaning': 'CHEBI:18050', 'annotations': {'three_letter': 'Gln'}, 'aliases': ['L-glutamine']},
|
|
235
|
+
"R": {'meaning': 'CHEBI:16467', 'annotations': {'three_letter': 'Arg'}, 'aliases': ['L-arginine']},
|
|
236
|
+
"S": {'meaning': 'CHEBI:17115', 'annotations': {'three_letter': 'Ser'}, 'aliases': ['L-serine']},
|
|
237
|
+
"T": {'meaning': 'CHEBI:16857', 'annotations': {'three_letter': 'Thr'}, 'aliases': ['L-threonine']},
|
|
238
|
+
"V": {'meaning': 'CHEBI:16414', 'annotations': {'three_letter': 'Val'}, 'aliases': ['L-valine']},
|
|
239
|
+
"W": {'meaning': 'CHEBI:16828', 'annotations': {'three_letter': 'Trp'}, 'aliases': ['L-tryptophan']},
|
|
240
|
+
"Y": {'meaning': 'CHEBI:17895', 'annotations': {'three_letter': 'Tyr'}, 'aliases': ['L-tyrosine']},
|
|
241
|
+
"B": {'annotations': {'three_letter': 'Asx', 'represents': 'D,N', 'ambiguity': 'true'}, 'aliases': ['L-aspartic acid or Asparagine (D or N)']},
|
|
242
|
+
"Z": {'annotations': {'three_letter': 'Glx', 'represents': 'E,Q', 'ambiguity': 'true'}, 'aliases': ['L-glutamic acid or Glutamine (E or Q)']},
|
|
243
|
+
"J": {'annotations': {'three_letter': 'Xle', 'represents': 'L,I', 'ambiguity': 'true'}, 'aliases': ['L-leucine or Isoleucine (L or I)']},
|
|
244
|
+
"X": {'annotations': {'three_letter': 'Xaa', 'represents': 'any', 'ambiguity': 'true'}},
|
|
245
|
+
"STOP": {'annotations': {'symbol': '*', 'three_letter': 'Ter', 'represents': 'stop codon'}},
|
|
246
|
+
"GAP": {'annotations': {'symbol': '-', 'represents': 'gap'}},
|
|
247
|
+
"U": {'meaning': 'CHEBI:16633', 'annotations': {'three_letter': 'Sec', 'special': '21st amino acid', 'codon': 'UGA with SECIS element'}, 'aliases': ['L-selenocysteine']},
|
|
248
|
+
"O": {'meaning': 'CHEBI:21860', 'annotations': {'three_letter': 'Pyl', 'special': '22nd amino acid', 'codon': 'UAG in certain archaea/bacteria'}, 'aliases': ['L-pyrrolysine']},
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
class CodonEnum(RichEnum):
|
|
252
|
+
"""
|
|
253
|
+
Standard genetic code codons (DNA)
|
|
254
|
+
"""
|
|
255
|
+
# Enum members
|
|
256
|
+
TTT = "TTT"
|
|
257
|
+
TTC = "TTC"
|
|
258
|
+
TTA = "TTA"
|
|
259
|
+
TTG = "TTG"
|
|
260
|
+
CTT = "CTT"
|
|
261
|
+
CTC = "CTC"
|
|
262
|
+
CTA = "CTA"
|
|
263
|
+
CTG = "CTG"
|
|
264
|
+
ATT = "ATT"
|
|
265
|
+
ATC = "ATC"
|
|
266
|
+
ATA = "ATA"
|
|
267
|
+
ATG = "ATG"
|
|
268
|
+
GTT = "GTT"
|
|
269
|
+
GTC = "GTC"
|
|
270
|
+
GTA = "GTA"
|
|
271
|
+
GTG = "GTG"
|
|
272
|
+
TCT = "TCT"
|
|
273
|
+
TCC = "TCC"
|
|
274
|
+
TCA = "TCA"
|
|
275
|
+
TCG = "TCG"
|
|
276
|
+
AGT = "AGT"
|
|
277
|
+
AGC = "AGC"
|
|
278
|
+
CCT = "CCT"
|
|
279
|
+
CCC = "CCC"
|
|
280
|
+
CCA = "CCA"
|
|
281
|
+
CCG = "CCG"
|
|
282
|
+
ACT = "ACT"
|
|
283
|
+
ACC = "ACC"
|
|
284
|
+
ACA = "ACA"
|
|
285
|
+
ACG = "ACG"
|
|
286
|
+
GCT = "GCT"
|
|
287
|
+
GCC = "GCC"
|
|
288
|
+
GCA = "GCA"
|
|
289
|
+
GCG = "GCG"
|
|
290
|
+
TAT = "TAT"
|
|
291
|
+
TAC = "TAC"
|
|
292
|
+
TAA = "TAA"
|
|
293
|
+
TAG = "TAG"
|
|
294
|
+
TGA = "TGA"
|
|
295
|
+
CAT = "CAT"
|
|
296
|
+
CAC = "CAC"
|
|
297
|
+
CAA = "CAA"
|
|
298
|
+
CAG = "CAG"
|
|
299
|
+
AAT = "AAT"
|
|
300
|
+
AAC = "AAC"
|
|
301
|
+
AAA = "AAA"
|
|
302
|
+
AAG = "AAG"
|
|
303
|
+
GAT = "GAT"
|
|
304
|
+
GAC = "GAC"
|
|
305
|
+
GAA = "GAA"
|
|
306
|
+
GAG = "GAG"
|
|
307
|
+
TGT = "TGT"
|
|
308
|
+
TGC = "TGC"
|
|
309
|
+
TGG = "TGG"
|
|
310
|
+
CGT = "CGT"
|
|
311
|
+
CGC = "CGC"
|
|
312
|
+
CGA = "CGA"
|
|
313
|
+
CGG = "CGG"
|
|
314
|
+
AGA = "AGA"
|
|
315
|
+
AGG = "AGG"
|
|
316
|
+
GGT = "GGT"
|
|
317
|
+
GGC = "GGC"
|
|
318
|
+
GGA = "GGA"
|
|
319
|
+
GGG = "GGG"
|
|
320
|
+
|
|
321
|
+
# Set metadata after class creation
|
|
322
|
+
CodonEnum._metadata = {
|
|
323
|
+
"TTT": {'annotations': {'amino_acid': 'F', 'amino_acid_name': 'Phenylalanine'}},
|
|
324
|
+
"TTC": {'annotations': {'amino_acid': 'F', 'amino_acid_name': 'Phenylalanine'}},
|
|
325
|
+
"TTA": {'annotations': {'amino_acid': 'L', 'amino_acid_name': 'Leucine'}},
|
|
326
|
+
"TTG": {'annotations': {'amino_acid': 'L', 'amino_acid_name': 'Leucine'}},
|
|
327
|
+
"CTT": {'annotations': {'amino_acid': 'L', 'amino_acid_name': 'Leucine'}},
|
|
328
|
+
"CTC": {'annotations': {'amino_acid': 'L', 'amino_acid_name': 'Leucine'}},
|
|
329
|
+
"CTA": {'annotations': {'amino_acid': 'L', 'amino_acid_name': 'Leucine'}},
|
|
330
|
+
"CTG": {'annotations': {'amino_acid': 'L', 'amino_acid_name': 'Leucine'}},
|
|
331
|
+
"ATT": {'annotations': {'amino_acid': 'I', 'amino_acid_name': 'Isoleucine'}},
|
|
332
|
+
"ATC": {'annotations': {'amino_acid': 'I', 'amino_acid_name': 'Isoleucine'}},
|
|
333
|
+
"ATA": {'annotations': {'amino_acid': 'I', 'amino_acid_name': 'Isoleucine'}},
|
|
334
|
+
"ATG": {'annotations': {'amino_acid': 'M', 'amino_acid_name': 'Methionine', 'special': 'start codon'}},
|
|
335
|
+
"GTT": {'annotations': {'amino_acid': 'V', 'amino_acid_name': 'Valine'}},
|
|
336
|
+
"GTC": {'annotations': {'amino_acid': 'V', 'amino_acid_name': 'Valine'}},
|
|
337
|
+
"GTA": {'annotations': {'amino_acid': 'V', 'amino_acid_name': 'Valine'}},
|
|
338
|
+
"GTG": {'annotations': {'amino_acid': 'V', 'amino_acid_name': 'Valine'}},
|
|
339
|
+
"TCT": {'annotations': {'amino_acid': 'S', 'amino_acid_name': 'Serine'}},
|
|
340
|
+
"TCC": {'annotations': {'amino_acid': 'S', 'amino_acid_name': 'Serine'}},
|
|
341
|
+
"TCA": {'annotations': {'amino_acid': 'S', 'amino_acid_name': 'Serine'}},
|
|
342
|
+
"TCG": {'annotations': {'amino_acid': 'S', 'amino_acid_name': 'Serine'}},
|
|
343
|
+
"AGT": {'annotations': {'amino_acid': 'S', 'amino_acid_name': 'Serine'}},
|
|
344
|
+
"AGC": {'annotations': {'amino_acid': 'S', 'amino_acid_name': 'Serine'}},
|
|
345
|
+
"CCT": {'annotations': {'amino_acid': 'P', 'amino_acid_name': 'Proline'}},
|
|
346
|
+
"CCC": {'annotations': {'amino_acid': 'P', 'amino_acid_name': 'Proline'}},
|
|
347
|
+
"CCA": {'annotations': {'amino_acid': 'P', 'amino_acid_name': 'Proline'}},
|
|
348
|
+
"CCG": {'annotations': {'amino_acid': 'P', 'amino_acid_name': 'Proline'}},
|
|
349
|
+
"ACT": {'annotations': {'amino_acid': 'T', 'amino_acid_name': 'Threonine'}},
|
|
350
|
+
"ACC": {'annotations': {'amino_acid': 'T', 'amino_acid_name': 'Threonine'}},
|
|
351
|
+
"ACA": {'annotations': {'amino_acid': 'T', 'amino_acid_name': 'Threonine'}},
|
|
352
|
+
"ACG": {'annotations': {'amino_acid': 'T', 'amino_acid_name': 'Threonine'}},
|
|
353
|
+
"GCT": {'annotations': {'amino_acid': 'A', 'amino_acid_name': 'Alanine'}},
|
|
354
|
+
"GCC": {'annotations': {'amino_acid': 'A', 'amino_acid_name': 'Alanine'}},
|
|
355
|
+
"GCA": {'annotations': {'amino_acid': 'A', 'amino_acid_name': 'Alanine'}},
|
|
356
|
+
"GCG": {'annotations': {'amino_acid': 'A', 'amino_acid_name': 'Alanine'}},
|
|
357
|
+
"TAT": {'annotations': {'amino_acid': 'Y', 'amino_acid_name': 'Tyrosine'}},
|
|
358
|
+
"TAC": {'annotations': {'amino_acid': 'Y', 'amino_acid_name': 'Tyrosine'}},
|
|
359
|
+
"TAA": {'annotations': {'amino_acid': '*', 'name': 'ochre', 'special': 'stop codon'}},
|
|
360
|
+
"TAG": {'annotations': {'amino_acid': '*', 'name': 'amber', 'special': 'stop codon'}},
|
|
361
|
+
"TGA": {'annotations': {'amino_acid': '*', 'name': 'opal', 'special': 'stop codon or selenocysteine'}},
|
|
362
|
+
"CAT": {'annotations': {'amino_acid': 'H', 'amino_acid_name': 'Histidine'}},
|
|
363
|
+
"CAC": {'annotations': {'amino_acid': 'H', 'amino_acid_name': 'Histidine'}},
|
|
364
|
+
"CAA": {'annotations': {'amino_acid': 'Q', 'amino_acid_name': 'Glutamine'}},
|
|
365
|
+
"CAG": {'annotations': {'amino_acid': 'Q', 'amino_acid_name': 'Glutamine'}},
|
|
366
|
+
"AAT": {'annotations': {'amino_acid': 'N', 'amino_acid_name': 'Asparagine'}},
|
|
367
|
+
"AAC": {'annotations': {'amino_acid': 'N', 'amino_acid_name': 'Asparagine'}},
|
|
368
|
+
"AAA": {'annotations': {'amino_acid': 'K', 'amino_acid_name': 'Lysine'}},
|
|
369
|
+
"AAG": {'annotations': {'amino_acid': 'K', 'amino_acid_name': 'Lysine'}},
|
|
370
|
+
"GAT": {'annotations': {'amino_acid': 'D', 'amino_acid_name': 'Aspartic acid'}},
|
|
371
|
+
"GAC": {'annotations': {'amino_acid': 'D', 'amino_acid_name': 'Aspartic acid'}},
|
|
372
|
+
"GAA": {'annotations': {'amino_acid': 'E', 'amino_acid_name': 'Glutamic acid'}},
|
|
373
|
+
"GAG": {'annotations': {'amino_acid': 'E', 'amino_acid_name': 'Glutamic acid'}},
|
|
374
|
+
"TGT": {'annotations': {'amino_acid': 'C', 'amino_acid_name': 'Cysteine'}},
|
|
375
|
+
"TGC": {'annotations': {'amino_acid': 'C', 'amino_acid_name': 'Cysteine'}},
|
|
376
|
+
"TGG": {'annotations': {'amino_acid': 'W', 'amino_acid_name': 'Tryptophan'}},
|
|
377
|
+
"CGT": {'annotations': {'amino_acid': 'R', 'amino_acid_name': 'Arginine'}},
|
|
378
|
+
"CGC": {'annotations': {'amino_acid': 'R', 'amino_acid_name': 'Arginine'}},
|
|
379
|
+
"CGA": {'annotations': {'amino_acid': 'R', 'amino_acid_name': 'Arginine'}},
|
|
380
|
+
"CGG": {'annotations': {'amino_acid': 'R', 'amino_acid_name': 'Arginine'}},
|
|
381
|
+
"AGA": {'annotations': {'amino_acid': 'R', 'amino_acid_name': 'Arginine'}},
|
|
382
|
+
"AGG": {'annotations': {'amino_acid': 'R', 'amino_acid_name': 'Arginine'}},
|
|
383
|
+
"GGT": {'annotations': {'amino_acid': 'G', 'amino_acid_name': 'Glycine'}},
|
|
384
|
+
"GGC": {'annotations': {'amino_acid': 'G', 'amino_acid_name': 'Glycine'}},
|
|
385
|
+
"GGA": {'annotations': {'amino_acid': 'G', 'amino_acid_name': 'Glycine'}},
|
|
386
|
+
"GGG": {'annotations': {'amino_acid': 'G', 'amino_acid_name': 'Glycine'}},
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
class NucleotideModificationEnum(RichEnum):
|
|
390
|
+
"""
|
|
391
|
+
Common nucleotide modifications
|
|
392
|
+
"""
|
|
393
|
+
# Enum members
|
|
394
|
+
FIVE_METHYL_C = "FIVE_METHYL_C"
|
|
395
|
+
SIX_METHYL_A = "SIX_METHYL_A"
|
|
396
|
+
PSEUDOURIDINE = "PSEUDOURIDINE"
|
|
397
|
+
INOSINE = "INOSINE"
|
|
398
|
+
DIHYDROURIDINE = "DIHYDROURIDINE"
|
|
399
|
+
SEVEN_METHYL_G = "SEVEN_METHYL_G"
|
|
400
|
+
FIVE_HYDROXY_METHYL_C = "FIVE_HYDROXY_METHYL_C"
|
|
401
|
+
EIGHT_OXO_G = "EIGHT_OXO_G"
|
|
402
|
+
|
|
403
|
+
# Set metadata after class creation
|
|
404
|
+
NucleotideModificationEnum._metadata = {
|
|
405
|
+
"FIVE_METHYL_C": {'description': '5-methylcytosine', 'meaning': 'CHEBI:27551', 'annotations': {'symbol': 'm5C', 'type': 'DNA methylation', 'function': 'gene regulation'}},
|
|
406
|
+
"SIX_METHYL_A": {'description': 'N6-methyladenosine', 'meaning': 'CHEBI:21891', 'annotations': {'symbol': 'm6A', 'type': 'RNA modification', 'function': 'RNA stability, translation'}},
|
|
407
|
+
"PSEUDOURIDINE": {'description': 'Pseudouridine', 'meaning': 'CHEBI:17802', 'annotations': {'symbol': 'Ψ', 'type': 'RNA modification', 'function': 'RNA stability'}},
|
|
408
|
+
"INOSINE": {'description': 'Inosine', 'meaning': 'CHEBI:17596', 'annotations': {'symbol': 'I', 'type': 'RNA editing', 'pairs_with': 'A, C, U'}},
|
|
409
|
+
"DIHYDROURIDINE": {'description': 'Dihydrouridine', 'meaning': 'CHEBI:23774', 'annotations': {'symbol': 'D', 'type': 'tRNA modification'}},
|
|
410
|
+
"SEVEN_METHYL_G": {'description': '7-methylguanosine', 'meaning': 'CHEBI:20794', 'annotations': {'symbol': 'm7G', 'type': 'mRNA cap', 'function': 'translation initiation'}},
|
|
411
|
+
"FIVE_HYDROXY_METHYL_C": {'description': '5-hydroxymethylcytosine', 'meaning': 'CHEBI:76792', 'annotations': {'symbol': 'hmC', 'type': 'DNA modification', 'function': 'demethylation intermediate'}},
|
|
412
|
+
"EIGHT_OXO_G": {'description': '8-oxoguanine', 'meaning': 'CHEBI:44605', 'annotations': {'symbol': '8-oxoG', 'type': 'oxidative damage', 'pairs_with': 'A or C'}},
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
class SequenceQualityEnum(RichEnum):
|
|
416
|
+
"""
|
|
417
|
+
Sequence quality indicators (Phred scores)
|
|
418
|
+
"""
|
|
419
|
+
# Enum members
|
|
420
|
+
Q0 = "Q0"
|
|
421
|
+
Q10 = "Q10"
|
|
422
|
+
Q20 = "Q20"
|
|
423
|
+
Q30 = "Q30"
|
|
424
|
+
Q40 = "Q40"
|
|
425
|
+
Q50 = "Q50"
|
|
426
|
+
Q60 = "Q60"
|
|
427
|
+
|
|
428
|
+
# Set metadata after class creation
|
|
429
|
+
SequenceQualityEnum._metadata = {
|
|
430
|
+
"Q0": {'description': 'Phred quality 0 (100% error probability)', 'annotations': {'phred_score': '0', 'error_probability': '1.0', 'ascii_char': '!'}},
|
|
431
|
+
"Q10": {'description': 'Phred quality 10 (10% error probability)', 'annotations': {'phred_score': '10', 'error_probability': '0.1', 'ascii_char': '+'}},
|
|
432
|
+
"Q20": {'description': 'Phred quality 20 (1% error probability)', 'annotations': {'phred_score': '20', 'error_probability': '0.01', 'ascii_char': '5'}},
|
|
433
|
+
"Q30": {'description': 'Phred quality 30 (0.1% error probability)', 'annotations': {'phred_score': '30', 'error_probability': '0.001', 'ascii_char': '?'}},
|
|
434
|
+
"Q40": {'description': 'Phred quality 40 (0.01% error probability)', 'annotations': {'phred_score': '40', 'error_probability': '0.0001', 'ascii_char': 'I'}},
|
|
435
|
+
"Q50": {'description': 'Phred quality 50 (0.001% error probability)', 'annotations': {'phred_score': '50', 'error_probability': '0.00001', 'ascii_char': 'S'}},
|
|
436
|
+
"Q60": {'description': 'Phred quality 60 (0.0001% error probability)', 'annotations': {'phred_score': '60', 'error_probability': '0.000001', 'ascii_char': ']'}},
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
__all__ = [
|
|
440
|
+
"DNABaseEnum",
|
|
441
|
+
"DNABaseExtendedEnum",
|
|
442
|
+
"RNABaseEnum",
|
|
443
|
+
"RNABaseExtendedEnum",
|
|
444
|
+
"AminoAcidEnum",
|
|
445
|
+
"AminoAcidExtendedEnum",
|
|
446
|
+
"CodonEnum",
|
|
447
|
+
"NucleotideModificationEnum",
|
|
448
|
+
"SequenceQualityEnum",
|
|
449
|
+
]
|