valuesets 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valuesets might be problematic. Click here for more details.
- valuesets/__init__.py +7 -0
- valuesets/_version.py +8 -0
- valuesets/datamodel/valuesets.py +13796 -0
- valuesets/datamodel/valuesets_dataclass.py +24503 -0
- valuesets/datamodel/valuesets_pydantic.py +13796 -0
- valuesets/enums/__init__.py +590 -0
- valuesets/enums/academic/__init__.py +1 -0
- valuesets/enums/academic/research.py +559 -0
- valuesets/enums/analytical_chemistry/__init__.py +1 -0
- valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
- valuesets/enums/bio/__init__.py +1 -0
- valuesets/enums/bio/biological_colors.py +238 -0
- valuesets/enums/bio/cell_cycle.py +180 -0
- valuesets/enums/bio/currency_chemicals.py +52 -0
- valuesets/enums/bio/developmental_stages.py +103 -0
- valuesets/enums/bio/genome_features.py +182 -0
- valuesets/enums/bio/genomics.py +91 -0
- valuesets/enums/bio/go_aspect.py +32 -0
- valuesets/enums/bio/go_causality.py +58 -0
- valuesets/enums/bio/go_evidence.py +129 -0
- valuesets/enums/bio/human_developmental_stages.py +62 -0
- valuesets/enums/bio/insdc_geographic_locations.py +591 -0
- valuesets/enums/bio/insdc_missing_values.py +49 -0
- valuesets/enums/bio/lipid_categories.py +67 -0
- valuesets/enums/bio/mouse_developmental_stages.py +62 -0
- valuesets/enums/bio/plant_biology.py +86 -0
- valuesets/enums/bio/plant_developmental_stages.py +54 -0
- valuesets/enums/bio/plant_sex.py +81 -0
- valuesets/enums/bio/protein_evidence.py +61 -0
- valuesets/enums/bio/proteomics_standards.py +123 -0
- valuesets/enums/bio/psi_mi.py +306 -0
- valuesets/enums/bio/relationship_to_oxygen.py +37 -0
- valuesets/enums/bio/sequence_alphabets.py +449 -0
- valuesets/enums/bio/sequence_chemistry.py +357 -0
- valuesets/enums/bio/sequencing_platforms.py +302 -0
- valuesets/enums/bio/structural_biology.py +320 -0
- valuesets/enums/bio/taxonomy.py +238 -0
- valuesets/enums/bio/trophic_levels.py +85 -0
- valuesets/enums/bio/uniprot_species.py +344 -0
- valuesets/enums/bio/viral_genome_types.py +47 -0
- valuesets/enums/bioprocessing/__init__.py +1 -0
- valuesets/enums/bioprocessing/scale_up.py +249 -0
- valuesets/enums/business/__init__.py +1 -0
- valuesets/enums/business/human_resources.py +275 -0
- valuesets/enums/business/industry_classifications.py +181 -0
- valuesets/enums/business/management_operations.py +228 -0
- valuesets/enums/business/organizational_structures.py +236 -0
- valuesets/enums/business/quality_management.py +181 -0
- valuesets/enums/business/supply_chain.py +232 -0
- valuesets/enums/chemistry/__init__.py +1 -0
- valuesets/enums/chemistry/chemical_entities.py +315 -0
- valuesets/enums/chemistry/reaction_directionality.py +65 -0
- valuesets/enums/chemistry/reactions.py +256 -0
- valuesets/enums/clinical/__init__.py +1 -0
- valuesets/enums/clinical/nih_demographics.py +177 -0
- valuesets/enums/clinical/phenopackets.py +254 -0
- valuesets/enums/common_value_sets.py +8791 -0
- valuesets/enums/computing/__init__.py +1 -0
- valuesets/enums/computing/file_formats.py +294 -0
- valuesets/enums/computing/maturity_levels.py +196 -0
- valuesets/enums/computing/mime_types.py +227 -0
- valuesets/enums/confidence_levels.py +168 -0
- valuesets/enums/contributor.py +30 -0
- valuesets/enums/core.py +42 -0
- valuesets/enums/data/__init__.py +1 -0
- valuesets/enums/data/data_absent_reason.py +53 -0
- valuesets/enums/data_science/__init__.py +1 -0
- valuesets/enums/data_science/binary_classification.py +87 -0
- valuesets/enums/data_science/emotion_classification.py +66 -0
- valuesets/enums/data_science/priority_severity.py +73 -0
- valuesets/enums/data_science/quality_control.py +46 -0
- valuesets/enums/data_science/sentiment_analysis.py +50 -0
- valuesets/enums/data_science/text_classification.py +97 -0
- valuesets/enums/demographics.py +206 -0
- valuesets/enums/ecological_interactions.py +151 -0
- valuesets/enums/energy/__init__.py +1 -0
- valuesets/enums/energy/energy.py +343 -0
- valuesets/enums/energy/fossil_fuels.py +29 -0
- valuesets/enums/energy/nuclear/__init__.py +1 -0
- valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
- valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
- valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
- valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
- valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
- valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
- valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
- valuesets/enums/energy/nuclear/reactor_types.py +163 -0
- valuesets/enums/environmental_health/__init__.py +1 -0
- valuesets/enums/environmental_health/exposures.py +265 -0
- valuesets/enums/geography/__init__.py +1 -0
- valuesets/enums/geography/geographic_codes.py +741 -0
- valuesets/enums/health/__init__.py +12 -0
- valuesets/enums/health/vaccination.py +98 -0
- valuesets/enums/health.py +36 -0
- valuesets/enums/health_base.py +36 -0
- valuesets/enums/healthcare.py +45 -0
- valuesets/enums/industry/__init__.py +1 -0
- valuesets/enums/industry/extractive_industry.py +94 -0
- valuesets/enums/industry/mining.py +388 -0
- valuesets/enums/industry/safety_colors.py +201 -0
- valuesets/enums/investigation.py +27 -0
- valuesets/enums/materials_science/__init__.py +1 -0
- valuesets/enums/materials_science/characterization_methods.py +112 -0
- valuesets/enums/materials_science/crystal_structures.py +76 -0
- valuesets/enums/materials_science/material_properties.py +119 -0
- valuesets/enums/materials_science/material_types.py +104 -0
- valuesets/enums/materials_science/pigments_dyes.py +198 -0
- valuesets/enums/materials_science/synthesis_methods.py +109 -0
- valuesets/enums/medical/__init__.py +1 -0
- valuesets/enums/medical/clinical.py +277 -0
- valuesets/enums/medical/neuroimaging.py +119 -0
- valuesets/enums/mining_processing.py +302 -0
- valuesets/enums/physics/__init__.py +1 -0
- valuesets/enums/physics/states_of_matter.py +46 -0
- valuesets/enums/social/__init__.py +1 -0
- valuesets/enums/social/person_status.py +29 -0
- valuesets/enums/spatial/__init__.py +1 -0
- valuesets/enums/spatial/spatial_qualifiers.py +246 -0
- valuesets/enums/statistics/__init__.py +5 -0
- valuesets/enums/statistics/prediction_outcomes.py +31 -0
- valuesets/enums/statistics.py +31 -0
- valuesets/enums/time/__init__.py +1 -0
- valuesets/enums/time/temporal.py +254 -0
- valuesets/enums/units/__init__.py +1 -0
- valuesets/enums/units/measurements.py +310 -0
- valuesets/enums/visual/__init__.py +1 -0
- valuesets/enums/visual/colors.py +376 -0
- valuesets/generators/__init__.py +19 -0
- valuesets/generators/auto_slot_injector.py +280 -0
- valuesets/generators/enhanced_pydantic_generator.py +100 -0
- valuesets/generators/enum_slot_generator.py +201 -0
- valuesets/generators/modular_rich_generator.py +353 -0
- valuesets/generators/prefix_standardizer.py +198 -0
- valuesets/generators/rich_enum.py +127 -0
- valuesets/generators/rich_pydantic_generator.py +310 -0
- valuesets/generators/smart_slot_syncer.py +428 -0
- valuesets/generators/sssom_generator.py +394 -0
- valuesets/merged/merged_hierarchy.yaml +21649 -0
- valuesets/schema/README.md +3 -0
- valuesets/schema/academic/research.yaml +911 -0
- valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
- valuesets/schema/bio/bio_entities.yaml +364 -0
- valuesets/schema/bio/biological_colors.yaml +434 -0
- valuesets/schema/bio/cell_cycle.yaml +309 -0
- valuesets/schema/bio/currency_chemicals.yaml +70 -0
- valuesets/schema/bio/developmental_stages.yaml +226 -0
- valuesets/schema/bio/genome_features.yaml +342 -0
- valuesets/schema/bio/genomics.yaml +101 -0
- valuesets/schema/bio/go_aspect.yaml +39 -0
- valuesets/schema/bio/go_causality.yaml +119 -0
- valuesets/schema/bio/go_evidence.yaml +215 -0
- valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
- valuesets/schema/bio/insdc_missing_values.yaml +85 -0
- valuesets/schema/bio/lipid_categories.yaml +72 -0
- valuesets/schema/bio/plant_biology.yaml +125 -0
- valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
- valuesets/schema/bio/plant_sex.yaml +108 -0
- valuesets/schema/bio/protein_evidence.yaml +63 -0
- valuesets/schema/bio/proteomics_standards.yaml +116 -0
- valuesets/schema/bio/psi_mi.yaml +400 -0
- valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
- valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
- valuesets/schema/bio/sequence_chemistry.yaml +477 -0
- valuesets/schema/bio/sequencing_platforms.yaml +515 -0
- valuesets/schema/bio/structural_biology.yaml +428 -0
- valuesets/schema/bio/taxonomy.yaml +453 -0
- valuesets/schema/bio/trophic_levels.yaml +118 -0
- valuesets/schema/bio/uniprot_species.yaml +1209 -0
- valuesets/schema/bio/viral_genome_types.yaml +99 -0
- valuesets/schema/bioprocessing/scale_up.yaml +458 -0
- valuesets/schema/business/human_resources.yaml +752 -0
- valuesets/schema/business/industry_classifications.yaml +448 -0
- valuesets/schema/business/management_operations.yaml +602 -0
- valuesets/schema/business/organizational_structures.yaml +645 -0
- valuesets/schema/business/quality_management.yaml +502 -0
- valuesets/schema/business/supply_chain.yaml +688 -0
- valuesets/schema/chemistry/chemical_entities.yaml +639 -0
- valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
- valuesets/schema/chemistry/reactions.yaml +442 -0
- valuesets/schema/clinical/nih_demographics.yaml +285 -0
- valuesets/schema/clinical/phenopackets.yaml +429 -0
- valuesets/schema/computing/file_formats.yaml +631 -0
- valuesets/schema/computing/maturity_levels.yaml +229 -0
- valuesets/schema/computing/mime_types.yaml +266 -0
- valuesets/schema/confidence_levels.yaml +206 -0
- valuesets/schema/contributor.yaml +30 -0
- valuesets/schema/core.yaml +55 -0
- valuesets/schema/data/data_absent_reason.yaml +82 -0
- valuesets/schema/data_science/binary_classification.yaml +125 -0
- valuesets/schema/data_science/emotion_classification.yaml +109 -0
- valuesets/schema/data_science/priority_severity.yaml +122 -0
- valuesets/schema/data_science/quality_control.yaml +68 -0
- valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
- valuesets/schema/data_science/text_classification.yaml +135 -0
- valuesets/schema/demographics.yaml +238 -0
- valuesets/schema/ecological_interactions.yaml +298 -0
- valuesets/schema/energy/energy.yaml +595 -0
- valuesets/schema/energy/fossil_fuels.yaml +28 -0
- valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
- valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
- valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
- valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
- valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
- valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
- valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
- valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
- valuesets/schema/environmental_health/exposures.yaml +355 -0
- valuesets/schema/generated_slots.yaml +1828 -0
- valuesets/schema/geography/geographic_codes.yaml +1018 -0
- valuesets/schema/health/vaccination.yaml +102 -0
- valuesets/schema/health.yaml +38 -0
- valuesets/schema/healthcare.yaml +53 -0
- valuesets/schema/industry/extractive_industry.yaml +89 -0
- valuesets/schema/industry/mining.yaml +888 -0
- valuesets/schema/industry/safety_colors.yaml +375 -0
- valuesets/schema/investigation.yaml +64 -0
- valuesets/schema/materials_science/characterization_methods.yaml +193 -0
- valuesets/schema/materials_science/crystal_structures.yaml +138 -0
- valuesets/schema/materials_science/material_properties.yaml +135 -0
- valuesets/schema/materials_science/material_types.yaml +151 -0
- valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
- valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
- valuesets/schema/medical/clinical.yaml +610 -0
- valuesets/schema/medical/neuroimaging.yaml +325 -0
- valuesets/schema/mining_processing.yaml +295 -0
- valuesets/schema/physics/states_of_matter.yaml +46 -0
- valuesets/schema/slot_mixins.yaml +143 -0
- valuesets/schema/social/person_status.yaml +28 -0
- valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
- valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
- valuesets/schema/statistics.yaml +34 -0
- valuesets/schema/time/temporal.yaml +435 -0
- valuesets/schema/types.yaml +15 -0
- valuesets/schema/units/measurements.yaml +675 -0
- valuesets/schema/valuesets.yaml +100 -0
- valuesets/schema/visual/colors.yaml +778 -0
- valuesets/utils/__init__.py +6 -0
- valuesets/utils/comparison.py +102 -0
- valuesets/utils/expand_dynamic_enums.py +414 -0
- valuesets/utils/mapping_utils.py +236 -0
- valuesets/validators/__init__.py +11 -0
- valuesets/validators/enum_evaluator.py +669 -0
- valuesets/validators/oak_config.yaml +70 -0
- valuesets/validators/validate_with_ols.py +241 -0
- valuesets-0.3.1.dist-info/METADATA +395 -0
- valuesets-0.3.1.dist-info/RECORD +248 -0
- valuesets-0.3.1.dist-info/WHEEL +4 -0
- valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Sequence Chemistry Value Sets
|
|
3
|
+
|
|
4
|
+
Value sets for nucleic acid and protein sequence chemistry, including
|
|
5
|
+
standard and extended alphabets, quality encodings, and molecular representations
|
|
6
|
+
|
|
7
|
+
Generated from: bio/sequence_chemistry.yaml
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import Dict, Any, Optional
|
|
13
|
+
from valuesets.generators.rich_enum import RichEnum
|
|
14
|
+
|
|
15
|
+
class IUPACNucleotideCode(RichEnum):
|
|
16
|
+
"""
|
|
17
|
+
Complete IUPAC nucleotide codes including ambiguous bases for DNA/RNA sequences.
|
|
18
|
+
Used in FASTA and other sequence formats to represent uncertain nucleotides.
|
|
19
|
+
"""
|
|
20
|
+
# Enum members
|
|
21
|
+
A = "A"
|
|
22
|
+
T = "T"
|
|
23
|
+
U = "U"
|
|
24
|
+
G = "G"
|
|
25
|
+
C = "C"
|
|
26
|
+
R = "R"
|
|
27
|
+
Y = "Y"
|
|
28
|
+
S = "S"
|
|
29
|
+
W = "W"
|
|
30
|
+
K = "K"
|
|
31
|
+
M = "M"
|
|
32
|
+
B = "B"
|
|
33
|
+
D = "D"
|
|
34
|
+
H = "H"
|
|
35
|
+
V = "V"
|
|
36
|
+
N = "N"
|
|
37
|
+
GAP = "GAP"
|
|
38
|
+
|
|
39
|
+
# Set metadata after class creation
|
|
40
|
+
IUPACNucleotideCode._metadata = {
|
|
41
|
+
"A": {'description': 'Adenine'},
|
|
42
|
+
"T": {'description': 'Thymine (DNA)'},
|
|
43
|
+
"U": {'description': 'Uracil (RNA)'},
|
|
44
|
+
"G": {'description': 'Guanine'},
|
|
45
|
+
"C": {'description': 'Cytosine'},
|
|
46
|
+
"R": {'description': 'Purine (A or G)'},
|
|
47
|
+
"Y": {'description': 'Pyrimidine (C or T/U)'},
|
|
48
|
+
"S": {'description': 'Strong interaction (G or C)'},
|
|
49
|
+
"W": {'description': 'Weak interaction (A or T/U)'},
|
|
50
|
+
"K": {'description': 'Keto (G or T/U)'},
|
|
51
|
+
"M": {'description': 'Amino (A or C)'},
|
|
52
|
+
"B": {'description': 'Not A (C or G or T/U)'},
|
|
53
|
+
"D": {'description': 'Not C (A or G or T/U)'},
|
|
54
|
+
"H": {'description': 'Not G (A or C or T/U)'},
|
|
55
|
+
"V": {'description': 'Not T/U (A or C or G)'},
|
|
56
|
+
"N": {'description': 'Any nucleotide (A or C or G or T/U)'},
|
|
57
|
+
"GAP": {'description': 'Gap or deletion in alignment'},
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
class StandardAminoAcid(RichEnum):
|
|
61
|
+
"""
|
|
62
|
+
The 20 standard proteinogenic amino acids with IUPAC single-letter codes
|
|
63
|
+
"""
|
|
64
|
+
# Enum members
|
|
65
|
+
A = "A"
|
|
66
|
+
R = "R"
|
|
67
|
+
N = "N"
|
|
68
|
+
D = "D"
|
|
69
|
+
C = "C"
|
|
70
|
+
E = "E"
|
|
71
|
+
Q = "Q"
|
|
72
|
+
G = "G"
|
|
73
|
+
H = "H"
|
|
74
|
+
I = "I"
|
|
75
|
+
L = "L"
|
|
76
|
+
K = "K"
|
|
77
|
+
M = "M"
|
|
78
|
+
F = "F"
|
|
79
|
+
P = "P"
|
|
80
|
+
S = "S"
|
|
81
|
+
T = "T"
|
|
82
|
+
W = "W"
|
|
83
|
+
Y = "Y"
|
|
84
|
+
V = "V"
|
|
85
|
+
|
|
86
|
+
# Set metadata after class creation
|
|
87
|
+
StandardAminoAcid._metadata = {
|
|
88
|
+
"A": {'description': 'Alanine'},
|
|
89
|
+
"R": {'description': 'Arginine'},
|
|
90
|
+
"N": {'description': 'Asparagine'},
|
|
91
|
+
"D": {'description': 'Aspartic acid'},
|
|
92
|
+
"C": {'description': 'Cysteine'},
|
|
93
|
+
"E": {'description': 'Glutamic acid'},
|
|
94
|
+
"Q": {'description': 'Glutamine'},
|
|
95
|
+
"G": {'description': 'Glycine'},
|
|
96
|
+
"H": {'description': 'Histidine'},
|
|
97
|
+
"I": {'description': 'Isoleucine'},
|
|
98
|
+
"L": {'description': 'Leucine'},
|
|
99
|
+
"K": {'description': 'Lysine'},
|
|
100
|
+
"M": {'description': 'Methionine'},
|
|
101
|
+
"F": {'description': 'Phenylalanine'},
|
|
102
|
+
"P": {'description': 'Proline'},
|
|
103
|
+
"S": {'description': 'Serine'},
|
|
104
|
+
"T": {'description': 'Threonine'},
|
|
105
|
+
"W": {'description': 'Tryptophan'},
|
|
106
|
+
"Y": {'description': 'Tyrosine'},
|
|
107
|
+
"V": {'description': 'Valine'},
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
class IUPACAminoAcidCode(RichEnum):
|
|
111
|
+
"""
|
|
112
|
+
Complete IUPAC amino acid codes including standard amino acids,
|
|
113
|
+
rare amino acids, and ambiguity codes
|
|
114
|
+
"""
|
|
115
|
+
# Enum members
|
|
116
|
+
A = "A"
|
|
117
|
+
R = "R"
|
|
118
|
+
N = "N"
|
|
119
|
+
D = "D"
|
|
120
|
+
C = "C"
|
|
121
|
+
E = "E"
|
|
122
|
+
Q = "Q"
|
|
123
|
+
G = "G"
|
|
124
|
+
H = "H"
|
|
125
|
+
I = "I"
|
|
126
|
+
L = "L"
|
|
127
|
+
K = "K"
|
|
128
|
+
M = "M"
|
|
129
|
+
F = "F"
|
|
130
|
+
P = "P"
|
|
131
|
+
S = "S"
|
|
132
|
+
T = "T"
|
|
133
|
+
W = "W"
|
|
134
|
+
Y = "Y"
|
|
135
|
+
V = "V"
|
|
136
|
+
U = "U"
|
|
137
|
+
O = "O"
|
|
138
|
+
B = "B"
|
|
139
|
+
Z = "Z"
|
|
140
|
+
J = "J"
|
|
141
|
+
X = "X"
|
|
142
|
+
STOP = "STOP"
|
|
143
|
+
GAP = "GAP"
|
|
144
|
+
|
|
145
|
+
# Set metadata after class creation
|
|
146
|
+
IUPACAminoAcidCode._metadata = {
|
|
147
|
+
"A": {'description': 'Alanine'},
|
|
148
|
+
"R": {'description': 'Arginine'},
|
|
149
|
+
"N": {'description': 'Asparagine'},
|
|
150
|
+
"D": {'description': 'Aspartic acid'},
|
|
151
|
+
"C": {'description': 'Cysteine'},
|
|
152
|
+
"E": {'description': 'Glutamic acid'},
|
|
153
|
+
"Q": {'description': 'Glutamine'},
|
|
154
|
+
"G": {'description': 'Glycine'},
|
|
155
|
+
"H": {'description': 'Histidine'},
|
|
156
|
+
"I": {'description': 'Isoleucine'},
|
|
157
|
+
"L": {'description': 'Leucine'},
|
|
158
|
+
"K": {'description': 'Lysine'},
|
|
159
|
+
"M": {'description': 'Methionine'},
|
|
160
|
+
"F": {'description': 'Phenylalanine'},
|
|
161
|
+
"P": {'description': 'Proline'},
|
|
162
|
+
"S": {'description': 'Serine'},
|
|
163
|
+
"T": {'description': 'Threonine'},
|
|
164
|
+
"W": {'description': 'Tryptophan'},
|
|
165
|
+
"Y": {'description': 'Tyrosine'},
|
|
166
|
+
"V": {'description': 'Valine'},
|
|
167
|
+
"U": {'description': 'Selenocysteine (21st amino acid)', 'aliases': ['Sec']},
|
|
168
|
+
"O": {'description': 'Pyrrolysine (22nd amino acid)', 'aliases': ['Pyl']},
|
|
169
|
+
"B": {'description': 'Asparagine or Aspartic acid (N or D)'},
|
|
170
|
+
"Z": {'description': 'Glutamine or Glutamic acid (Q or E)'},
|
|
171
|
+
"J": {'description': 'Leucine or Isoleucine (L or I)'},
|
|
172
|
+
"X": {'description': 'Any amino acid'},
|
|
173
|
+
"STOP": {'description': 'Translation stop codon'},
|
|
174
|
+
"GAP": {'description': 'Gap or deletion in alignment'},
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
class SequenceAlphabet(RichEnum):
|
|
178
|
+
"""
|
|
179
|
+
Types of sequence alphabets used in bioinformatics
|
|
180
|
+
"""
|
|
181
|
+
# Enum members
|
|
182
|
+
DNA = "DNA"
|
|
183
|
+
RNA = "RNA"
|
|
184
|
+
PROTEIN = "PROTEIN"
|
|
185
|
+
IUPAC_DNA = "IUPAC_DNA"
|
|
186
|
+
IUPAC_RNA = "IUPAC_RNA"
|
|
187
|
+
IUPAC_PROTEIN = "IUPAC_PROTEIN"
|
|
188
|
+
RESTRICTED_DNA = "RESTRICTED_DNA"
|
|
189
|
+
RESTRICTED_RNA = "RESTRICTED_RNA"
|
|
190
|
+
BINARY = "BINARY"
|
|
191
|
+
|
|
192
|
+
# Set metadata after class creation
|
|
193
|
+
SequenceAlphabet._metadata = {
|
|
194
|
+
"DNA": {'description': 'Deoxyribonucleic acid alphabet (A, T, G, C)'},
|
|
195
|
+
"RNA": {'description': 'Ribonucleic acid alphabet (A, U, G, C)'},
|
|
196
|
+
"PROTEIN": {'description': 'Protein/amino acid alphabet (20 standard AAs)'},
|
|
197
|
+
"IUPAC_DNA": {'description': 'Extended DNA with IUPAC ambiguity codes'},
|
|
198
|
+
"IUPAC_RNA": {'description': 'Extended RNA with IUPAC ambiguity codes'},
|
|
199
|
+
"IUPAC_PROTEIN": {'description': 'Extended protein with ambiguity codes and rare AAs'},
|
|
200
|
+
"RESTRICTED_DNA": {'description': 'Unambiguous DNA bases only (A, T, G, C)'},
|
|
201
|
+
"RESTRICTED_RNA": {'description': 'Unambiguous RNA bases only (A, U, G, C)'},
|
|
202
|
+
"BINARY": {'description': 'Binary encoding of sequences'},
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
class SequenceQualityEncoding(RichEnum):
|
|
206
|
+
"""
|
|
207
|
+
Quality score encoding standards used in FASTQ files and sequencing data.
|
|
208
|
+
Different platforms and software versions use different ASCII offsets.
|
|
209
|
+
"""
|
|
210
|
+
# Enum members
|
|
211
|
+
SANGER = "SANGER"
|
|
212
|
+
SOLEXA = "SOLEXA"
|
|
213
|
+
ILLUMINA_1_3 = "ILLUMINA_1_3"
|
|
214
|
+
ILLUMINA_1_5 = "ILLUMINA_1_5"
|
|
215
|
+
ILLUMINA_1_8 = "ILLUMINA_1_8"
|
|
216
|
+
|
|
217
|
+
# Set metadata after class creation
|
|
218
|
+
SequenceQualityEncoding._metadata = {
|
|
219
|
+
"SANGER": {'description': 'Sanger/Phred+33 (PHRED scores, ASCII offset 33)', 'annotations': {'ascii_offset': 33, 'score_range': '0-93', 'platforms': 'NCBI SRA, Illumina 1.8+'}},
|
|
220
|
+
"SOLEXA": {'description': 'Solexa+64 (Solexa scores, ASCII offset 64)', 'annotations': {'ascii_offset': 64, 'score_range': '-5-62', 'platforms': 'Early Solexa/Illumina'}},
|
|
221
|
+
"ILLUMINA_1_3": {'description': 'Illumina 1.3+ (PHRED+64, ASCII offset 64)', 'annotations': {'ascii_offset': 64, 'score_range': '0-62', 'platforms': 'Illumina 1.3-1.7'}},
|
|
222
|
+
"ILLUMINA_1_5": {'description': 'Illumina 1.5+ (PHRED+64, special handling for 0-2)', 'annotations': {'ascii_offset': 64, 'score_range': '3-62', 'platforms': 'Illumina 1.5-1.7'}},
|
|
223
|
+
"ILLUMINA_1_8": {'description': 'Illumina 1.8+ (PHRED+33, modern standard)', 'annotations': {'ascii_offset': 33, 'score_range': '0-41', 'platforms': 'Illumina 1.8+, modern sequencers'}},
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
class GeneticCodeTable(RichEnum):
|
|
227
|
+
"""
|
|
228
|
+
NCBI genetic code translation tables for different organisms.
|
|
229
|
+
Table 1 is the universal genetic code used by most organisms.
|
|
230
|
+
"""
|
|
231
|
+
# Enum members
|
|
232
|
+
TABLE_1 = "TABLE_1"
|
|
233
|
+
TABLE_2 = "TABLE_2"
|
|
234
|
+
TABLE_3 = "TABLE_3"
|
|
235
|
+
TABLE_4 = "TABLE_4"
|
|
236
|
+
TABLE_5 = "TABLE_5"
|
|
237
|
+
TABLE_6 = "TABLE_6"
|
|
238
|
+
TABLE_9 = "TABLE_9"
|
|
239
|
+
TABLE_10 = "TABLE_10"
|
|
240
|
+
TABLE_11 = "TABLE_11"
|
|
241
|
+
TABLE_12 = "TABLE_12"
|
|
242
|
+
TABLE_13 = "TABLE_13"
|
|
243
|
+
TABLE_14 = "TABLE_14"
|
|
244
|
+
TABLE_16 = "TABLE_16"
|
|
245
|
+
TABLE_21 = "TABLE_21"
|
|
246
|
+
TABLE_22 = "TABLE_22"
|
|
247
|
+
TABLE_23 = "TABLE_23"
|
|
248
|
+
TABLE_24 = "TABLE_24"
|
|
249
|
+
TABLE_25 = "TABLE_25"
|
|
250
|
+
TABLE_26 = "TABLE_26"
|
|
251
|
+
TABLE_27 = "TABLE_27"
|
|
252
|
+
TABLE_28 = "TABLE_28"
|
|
253
|
+
TABLE_29 = "TABLE_29"
|
|
254
|
+
TABLE_30 = "TABLE_30"
|
|
255
|
+
TABLE_31 = "TABLE_31"
|
|
256
|
+
|
|
257
|
+
# Set metadata after class creation
|
|
258
|
+
GeneticCodeTable._metadata = {
|
|
259
|
+
"TABLE_1": {'description': 'Standard genetic code (universal)', 'annotations': {'ncbi_id': 1, 'name': 'Standard'}},
|
|
260
|
+
"TABLE_2": {'description': 'Vertebrate mitochondrial code', 'annotations': {'ncbi_id': 2, 'name': 'Vertebrate Mitochondrial'}},
|
|
261
|
+
"TABLE_3": {'description': 'Yeast mitochondrial code', 'annotations': {'ncbi_id': 3, 'name': 'Yeast Mitochondrial'}},
|
|
262
|
+
"TABLE_4": {'description': 'Mold, protozoan, coelenterate mitochondrial', 'annotations': {'ncbi_id': 4, 'name': 'Mold Mitochondrial'}},
|
|
263
|
+
"TABLE_5": {'description': 'Invertebrate mitochondrial code', 'annotations': {'ncbi_id': 5, 'name': 'Invertebrate Mitochondrial'}},
|
|
264
|
+
"TABLE_6": {'description': 'Ciliate, dasycladacean, hexamita nuclear code', 'annotations': {'ncbi_id': 6, 'name': 'Ciliate Nuclear'}},
|
|
265
|
+
"TABLE_9": {'description': 'Echinoderm and flatworm mitochondrial code', 'annotations': {'ncbi_id': 9, 'name': 'Echinoderm Mitochondrial'}},
|
|
266
|
+
"TABLE_10": {'description': 'Euplotid nuclear code', 'annotations': {'ncbi_id': 10, 'name': 'Euplotid Nuclear'}},
|
|
267
|
+
"TABLE_11": {'description': 'Bacterial, archaeal and plant plastid code', 'annotations': {'ncbi_id': 11, 'name': 'Bacterial'}},
|
|
268
|
+
"TABLE_12": {'description': 'Alternative yeast nuclear code', 'annotations': {'ncbi_id': 12, 'name': 'Alternative Yeast Nuclear'}},
|
|
269
|
+
"TABLE_13": {'description': 'Ascidian mitochondrial code', 'annotations': {'ncbi_id': 13, 'name': 'Ascidian Mitochondrial'}},
|
|
270
|
+
"TABLE_14": {'description': 'Alternative flatworm mitochondrial code', 'annotations': {'ncbi_id': 14, 'name': 'Alternative Flatworm Mitochondrial'}},
|
|
271
|
+
"TABLE_16": {'description': 'Chlorophycean mitochondrial code', 'annotations': {'ncbi_id': 16, 'name': 'Chlorophycean Mitochondrial'}},
|
|
272
|
+
"TABLE_21": {'description': 'Trematode mitochondrial code', 'annotations': {'ncbi_id': 21, 'name': 'Trematode Mitochondrial'}},
|
|
273
|
+
"TABLE_22": {'description': 'Scenedesmus obliquus mitochondrial code', 'annotations': {'ncbi_id': 22, 'name': 'Scenedesmus Mitochondrial'}},
|
|
274
|
+
"TABLE_23": {'description': 'Thraustochytrium mitochondrial code', 'annotations': {'ncbi_id': 23, 'name': 'Thraustochytrium Mitochondrial'}},
|
|
275
|
+
"TABLE_24": {'description': 'Rhabdopleuridae mitochondrial code', 'annotations': {'ncbi_id': 24, 'name': 'Rhabdopleuridae Mitochondrial'}},
|
|
276
|
+
"TABLE_25": {'description': 'Candidate division SR1 and gracilibacteria code', 'annotations': {'ncbi_id': 25, 'name': 'Candidate Division SR1'}},
|
|
277
|
+
"TABLE_26": {'description': 'Pachysolen tannophilus nuclear code', 'annotations': {'ncbi_id': 26, 'name': 'Pachysolen Nuclear'}},
|
|
278
|
+
"TABLE_27": {'description': 'Karyorelict nuclear code', 'annotations': {'ncbi_id': 27, 'name': 'Karyorelict Nuclear'}},
|
|
279
|
+
"TABLE_28": {'description': 'Condylostoma nuclear code', 'annotations': {'ncbi_id': 28, 'name': 'Condylostoma Nuclear'}},
|
|
280
|
+
"TABLE_29": {'description': 'Mesodinium nuclear code', 'annotations': {'ncbi_id': 29, 'name': 'Mesodinium Nuclear'}},
|
|
281
|
+
"TABLE_30": {'description': 'Peritrich nuclear code', 'annotations': {'ncbi_id': 30, 'name': 'Peritrich Nuclear'}},
|
|
282
|
+
"TABLE_31": {'description': 'Blastocrithidia nuclear code', 'annotations': {'ncbi_id': 31, 'name': 'Blastocrithidia Nuclear'}},
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
class SequenceStrand(RichEnum):
|
|
286
|
+
"""
|
|
287
|
+
Strand orientation for nucleic acid sequences
|
|
288
|
+
"""
|
|
289
|
+
# Enum members
|
|
290
|
+
PLUS = "PLUS"
|
|
291
|
+
MINUS = "MINUS"
|
|
292
|
+
BOTH = "BOTH"
|
|
293
|
+
UNKNOWN = "UNKNOWN"
|
|
294
|
+
|
|
295
|
+
# Set metadata after class creation
|
|
296
|
+
SequenceStrand._metadata = {
|
|
297
|
+
"PLUS": {'description': "Plus/forward/sense strand (5' to 3')"},
|
|
298
|
+
"MINUS": {'description': "Minus/reverse/antisense strand (3' to 5')"},
|
|
299
|
+
"BOTH": {'description': 'Both strands'},
|
|
300
|
+
"UNKNOWN": {'description': 'Strand not specified or unknown'},
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
class SequenceTopology(RichEnum):
|
|
304
|
+
"""
|
|
305
|
+
Topological structure of nucleic acid molecules
|
|
306
|
+
"""
|
|
307
|
+
# Enum members
|
|
308
|
+
LINEAR = "LINEAR"
|
|
309
|
+
CIRCULAR = "CIRCULAR"
|
|
310
|
+
BRANCHED = "BRANCHED"
|
|
311
|
+
UNKNOWN = "UNKNOWN"
|
|
312
|
+
|
|
313
|
+
# Set metadata after class creation
|
|
314
|
+
SequenceTopology._metadata = {
|
|
315
|
+
"LINEAR": {'description': 'Linear sequence molecule', 'meaning': 'SO:0000987'},
|
|
316
|
+
"CIRCULAR": {'description': 'Circular sequence molecule', 'meaning': 'SO:0000988'},
|
|
317
|
+
"BRANCHED": {'description': 'Branched sequence structure'},
|
|
318
|
+
"UNKNOWN": {'description': 'Topology not specified'},
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
class SequenceModality(RichEnum):
|
|
322
|
+
"""
|
|
323
|
+
Types of sequence data based on experimental method
|
|
324
|
+
"""
|
|
325
|
+
# Enum members
|
|
326
|
+
SINGLE_CELL = "SINGLE_CELL"
|
|
327
|
+
BULK = "BULK"
|
|
328
|
+
SPATIAL = "SPATIAL"
|
|
329
|
+
LONG_READ = "LONG_READ"
|
|
330
|
+
SHORT_READ = "SHORT_READ"
|
|
331
|
+
PAIRED_END = "PAIRED_END"
|
|
332
|
+
SINGLE_END = "SINGLE_END"
|
|
333
|
+
MATE_PAIR = "MATE_PAIR"
|
|
334
|
+
|
|
335
|
+
# Set metadata after class creation
|
|
336
|
+
SequenceModality._metadata = {
|
|
337
|
+
"SINGLE_CELL": {'description': 'Single-cell sequencing data'},
|
|
338
|
+
"BULK": {'description': 'Bulk/population sequencing data'},
|
|
339
|
+
"SPATIAL": {'description': 'Spatially-resolved sequencing'},
|
|
340
|
+
"LONG_READ": {'description': 'Long-read sequencing (PacBio, Oxford Nanopore)'},
|
|
341
|
+
"SHORT_READ": {'description': 'Short-read sequencing (Illumina)'},
|
|
342
|
+
"PAIRED_END": {'description': 'Paired-end sequencing reads'},
|
|
343
|
+
"SINGLE_END": {'description': 'Single-end sequencing reads'},
|
|
344
|
+
"MATE_PAIR": {'description': 'Mate-pair sequencing libraries'},
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
__all__ = [
|
|
348
|
+
"IUPACNucleotideCode",
|
|
349
|
+
"StandardAminoAcid",
|
|
350
|
+
"IUPACAminoAcidCode",
|
|
351
|
+
"SequenceAlphabet",
|
|
352
|
+
"SequenceQualityEncoding",
|
|
353
|
+
"GeneticCodeTable",
|
|
354
|
+
"SequenceStrand",
|
|
355
|
+
"SequenceTopology",
|
|
356
|
+
"SequenceModality",
|
|
357
|
+
]
|