valuesets 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valuesets might be problematic. Click here for more details.

Files changed (248) hide show
  1. valuesets/__init__.py +7 -0
  2. valuesets/_version.py +8 -0
  3. valuesets/datamodel/valuesets.py +13796 -0
  4. valuesets/datamodel/valuesets_dataclass.py +24503 -0
  5. valuesets/datamodel/valuesets_pydantic.py +13796 -0
  6. valuesets/enums/__init__.py +590 -0
  7. valuesets/enums/academic/__init__.py +1 -0
  8. valuesets/enums/academic/research.py +559 -0
  9. valuesets/enums/analytical_chemistry/__init__.py +1 -0
  10. valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
  11. valuesets/enums/bio/__init__.py +1 -0
  12. valuesets/enums/bio/biological_colors.py +238 -0
  13. valuesets/enums/bio/cell_cycle.py +180 -0
  14. valuesets/enums/bio/currency_chemicals.py +52 -0
  15. valuesets/enums/bio/developmental_stages.py +103 -0
  16. valuesets/enums/bio/genome_features.py +182 -0
  17. valuesets/enums/bio/genomics.py +91 -0
  18. valuesets/enums/bio/go_aspect.py +32 -0
  19. valuesets/enums/bio/go_causality.py +58 -0
  20. valuesets/enums/bio/go_evidence.py +129 -0
  21. valuesets/enums/bio/human_developmental_stages.py +62 -0
  22. valuesets/enums/bio/insdc_geographic_locations.py +591 -0
  23. valuesets/enums/bio/insdc_missing_values.py +49 -0
  24. valuesets/enums/bio/lipid_categories.py +67 -0
  25. valuesets/enums/bio/mouse_developmental_stages.py +62 -0
  26. valuesets/enums/bio/plant_biology.py +86 -0
  27. valuesets/enums/bio/plant_developmental_stages.py +54 -0
  28. valuesets/enums/bio/plant_sex.py +81 -0
  29. valuesets/enums/bio/protein_evidence.py +61 -0
  30. valuesets/enums/bio/proteomics_standards.py +123 -0
  31. valuesets/enums/bio/psi_mi.py +306 -0
  32. valuesets/enums/bio/relationship_to_oxygen.py +37 -0
  33. valuesets/enums/bio/sequence_alphabets.py +449 -0
  34. valuesets/enums/bio/sequence_chemistry.py +357 -0
  35. valuesets/enums/bio/sequencing_platforms.py +302 -0
  36. valuesets/enums/bio/structural_biology.py +320 -0
  37. valuesets/enums/bio/taxonomy.py +238 -0
  38. valuesets/enums/bio/trophic_levels.py +85 -0
  39. valuesets/enums/bio/uniprot_species.py +344 -0
  40. valuesets/enums/bio/viral_genome_types.py +47 -0
  41. valuesets/enums/bioprocessing/__init__.py +1 -0
  42. valuesets/enums/bioprocessing/scale_up.py +249 -0
  43. valuesets/enums/business/__init__.py +1 -0
  44. valuesets/enums/business/human_resources.py +275 -0
  45. valuesets/enums/business/industry_classifications.py +181 -0
  46. valuesets/enums/business/management_operations.py +228 -0
  47. valuesets/enums/business/organizational_structures.py +236 -0
  48. valuesets/enums/business/quality_management.py +181 -0
  49. valuesets/enums/business/supply_chain.py +232 -0
  50. valuesets/enums/chemistry/__init__.py +1 -0
  51. valuesets/enums/chemistry/chemical_entities.py +315 -0
  52. valuesets/enums/chemistry/reaction_directionality.py +65 -0
  53. valuesets/enums/chemistry/reactions.py +256 -0
  54. valuesets/enums/clinical/__init__.py +1 -0
  55. valuesets/enums/clinical/nih_demographics.py +177 -0
  56. valuesets/enums/clinical/phenopackets.py +254 -0
  57. valuesets/enums/common_value_sets.py +8791 -0
  58. valuesets/enums/computing/__init__.py +1 -0
  59. valuesets/enums/computing/file_formats.py +294 -0
  60. valuesets/enums/computing/maturity_levels.py +196 -0
  61. valuesets/enums/computing/mime_types.py +227 -0
  62. valuesets/enums/confidence_levels.py +168 -0
  63. valuesets/enums/contributor.py +30 -0
  64. valuesets/enums/core.py +42 -0
  65. valuesets/enums/data/__init__.py +1 -0
  66. valuesets/enums/data/data_absent_reason.py +53 -0
  67. valuesets/enums/data_science/__init__.py +1 -0
  68. valuesets/enums/data_science/binary_classification.py +87 -0
  69. valuesets/enums/data_science/emotion_classification.py +66 -0
  70. valuesets/enums/data_science/priority_severity.py +73 -0
  71. valuesets/enums/data_science/quality_control.py +46 -0
  72. valuesets/enums/data_science/sentiment_analysis.py +50 -0
  73. valuesets/enums/data_science/text_classification.py +97 -0
  74. valuesets/enums/demographics.py +206 -0
  75. valuesets/enums/ecological_interactions.py +151 -0
  76. valuesets/enums/energy/__init__.py +1 -0
  77. valuesets/enums/energy/energy.py +343 -0
  78. valuesets/enums/energy/fossil_fuels.py +29 -0
  79. valuesets/enums/energy/nuclear/__init__.py +1 -0
  80. valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
  81. valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
  82. valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
  83. valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
  84. valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
  85. valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
  86. valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
  87. valuesets/enums/energy/nuclear/reactor_types.py +163 -0
  88. valuesets/enums/environmental_health/__init__.py +1 -0
  89. valuesets/enums/environmental_health/exposures.py +265 -0
  90. valuesets/enums/geography/__init__.py +1 -0
  91. valuesets/enums/geography/geographic_codes.py +741 -0
  92. valuesets/enums/health/__init__.py +12 -0
  93. valuesets/enums/health/vaccination.py +98 -0
  94. valuesets/enums/health.py +36 -0
  95. valuesets/enums/health_base.py +36 -0
  96. valuesets/enums/healthcare.py +45 -0
  97. valuesets/enums/industry/__init__.py +1 -0
  98. valuesets/enums/industry/extractive_industry.py +94 -0
  99. valuesets/enums/industry/mining.py +388 -0
  100. valuesets/enums/industry/safety_colors.py +201 -0
  101. valuesets/enums/investigation.py +27 -0
  102. valuesets/enums/materials_science/__init__.py +1 -0
  103. valuesets/enums/materials_science/characterization_methods.py +112 -0
  104. valuesets/enums/materials_science/crystal_structures.py +76 -0
  105. valuesets/enums/materials_science/material_properties.py +119 -0
  106. valuesets/enums/materials_science/material_types.py +104 -0
  107. valuesets/enums/materials_science/pigments_dyes.py +198 -0
  108. valuesets/enums/materials_science/synthesis_methods.py +109 -0
  109. valuesets/enums/medical/__init__.py +1 -0
  110. valuesets/enums/medical/clinical.py +277 -0
  111. valuesets/enums/medical/neuroimaging.py +119 -0
  112. valuesets/enums/mining_processing.py +302 -0
  113. valuesets/enums/physics/__init__.py +1 -0
  114. valuesets/enums/physics/states_of_matter.py +46 -0
  115. valuesets/enums/social/__init__.py +1 -0
  116. valuesets/enums/social/person_status.py +29 -0
  117. valuesets/enums/spatial/__init__.py +1 -0
  118. valuesets/enums/spatial/spatial_qualifiers.py +246 -0
  119. valuesets/enums/statistics/__init__.py +5 -0
  120. valuesets/enums/statistics/prediction_outcomes.py +31 -0
  121. valuesets/enums/statistics.py +31 -0
  122. valuesets/enums/time/__init__.py +1 -0
  123. valuesets/enums/time/temporal.py +254 -0
  124. valuesets/enums/units/__init__.py +1 -0
  125. valuesets/enums/units/measurements.py +310 -0
  126. valuesets/enums/visual/__init__.py +1 -0
  127. valuesets/enums/visual/colors.py +376 -0
  128. valuesets/generators/__init__.py +19 -0
  129. valuesets/generators/auto_slot_injector.py +280 -0
  130. valuesets/generators/enhanced_pydantic_generator.py +100 -0
  131. valuesets/generators/enum_slot_generator.py +201 -0
  132. valuesets/generators/modular_rich_generator.py +353 -0
  133. valuesets/generators/prefix_standardizer.py +198 -0
  134. valuesets/generators/rich_enum.py +127 -0
  135. valuesets/generators/rich_pydantic_generator.py +310 -0
  136. valuesets/generators/smart_slot_syncer.py +428 -0
  137. valuesets/generators/sssom_generator.py +394 -0
  138. valuesets/merged/merged_hierarchy.yaml +21649 -0
  139. valuesets/schema/README.md +3 -0
  140. valuesets/schema/academic/research.yaml +911 -0
  141. valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
  142. valuesets/schema/bio/bio_entities.yaml +364 -0
  143. valuesets/schema/bio/biological_colors.yaml +434 -0
  144. valuesets/schema/bio/cell_cycle.yaml +309 -0
  145. valuesets/schema/bio/currency_chemicals.yaml +70 -0
  146. valuesets/schema/bio/developmental_stages.yaml +226 -0
  147. valuesets/schema/bio/genome_features.yaml +342 -0
  148. valuesets/schema/bio/genomics.yaml +101 -0
  149. valuesets/schema/bio/go_aspect.yaml +39 -0
  150. valuesets/schema/bio/go_causality.yaml +119 -0
  151. valuesets/schema/bio/go_evidence.yaml +215 -0
  152. valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
  153. valuesets/schema/bio/insdc_missing_values.yaml +85 -0
  154. valuesets/schema/bio/lipid_categories.yaml +72 -0
  155. valuesets/schema/bio/plant_biology.yaml +125 -0
  156. valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
  157. valuesets/schema/bio/plant_sex.yaml +108 -0
  158. valuesets/schema/bio/protein_evidence.yaml +63 -0
  159. valuesets/schema/bio/proteomics_standards.yaml +116 -0
  160. valuesets/schema/bio/psi_mi.yaml +400 -0
  161. valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
  162. valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
  163. valuesets/schema/bio/sequence_chemistry.yaml +477 -0
  164. valuesets/schema/bio/sequencing_platforms.yaml +515 -0
  165. valuesets/schema/bio/structural_biology.yaml +428 -0
  166. valuesets/schema/bio/taxonomy.yaml +453 -0
  167. valuesets/schema/bio/trophic_levels.yaml +118 -0
  168. valuesets/schema/bio/uniprot_species.yaml +1209 -0
  169. valuesets/schema/bio/viral_genome_types.yaml +99 -0
  170. valuesets/schema/bioprocessing/scale_up.yaml +458 -0
  171. valuesets/schema/business/human_resources.yaml +752 -0
  172. valuesets/schema/business/industry_classifications.yaml +448 -0
  173. valuesets/schema/business/management_operations.yaml +602 -0
  174. valuesets/schema/business/organizational_structures.yaml +645 -0
  175. valuesets/schema/business/quality_management.yaml +502 -0
  176. valuesets/schema/business/supply_chain.yaml +688 -0
  177. valuesets/schema/chemistry/chemical_entities.yaml +639 -0
  178. valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
  179. valuesets/schema/chemistry/reactions.yaml +442 -0
  180. valuesets/schema/clinical/nih_demographics.yaml +285 -0
  181. valuesets/schema/clinical/phenopackets.yaml +429 -0
  182. valuesets/schema/computing/file_formats.yaml +631 -0
  183. valuesets/schema/computing/maturity_levels.yaml +229 -0
  184. valuesets/schema/computing/mime_types.yaml +266 -0
  185. valuesets/schema/confidence_levels.yaml +206 -0
  186. valuesets/schema/contributor.yaml +30 -0
  187. valuesets/schema/core.yaml +55 -0
  188. valuesets/schema/data/data_absent_reason.yaml +82 -0
  189. valuesets/schema/data_science/binary_classification.yaml +125 -0
  190. valuesets/schema/data_science/emotion_classification.yaml +109 -0
  191. valuesets/schema/data_science/priority_severity.yaml +122 -0
  192. valuesets/schema/data_science/quality_control.yaml +68 -0
  193. valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
  194. valuesets/schema/data_science/text_classification.yaml +135 -0
  195. valuesets/schema/demographics.yaml +238 -0
  196. valuesets/schema/ecological_interactions.yaml +298 -0
  197. valuesets/schema/energy/energy.yaml +595 -0
  198. valuesets/schema/energy/fossil_fuels.yaml +28 -0
  199. valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
  200. valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
  201. valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
  202. valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
  203. valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
  204. valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
  205. valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
  206. valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
  207. valuesets/schema/environmental_health/exposures.yaml +355 -0
  208. valuesets/schema/generated_slots.yaml +1828 -0
  209. valuesets/schema/geography/geographic_codes.yaml +1018 -0
  210. valuesets/schema/health/vaccination.yaml +102 -0
  211. valuesets/schema/health.yaml +38 -0
  212. valuesets/schema/healthcare.yaml +53 -0
  213. valuesets/schema/industry/extractive_industry.yaml +89 -0
  214. valuesets/schema/industry/mining.yaml +888 -0
  215. valuesets/schema/industry/safety_colors.yaml +375 -0
  216. valuesets/schema/investigation.yaml +64 -0
  217. valuesets/schema/materials_science/characterization_methods.yaml +193 -0
  218. valuesets/schema/materials_science/crystal_structures.yaml +138 -0
  219. valuesets/schema/materials_science/material_properties.yaml +135 -0
  220. valuesets/schema/materials_science/material_types.yaml +151 -0
  221. valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
  222. valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
  223. valuesets/schema/medical/clinical.yaml +610 -0
  224. valuesets/schema/medical/neuroimaging.yaml +325 -0
  225. valuesets/schema/mining_processing.yaml +295 -0
  226. valuesets/schema/physics/states_of_matter.yaml +46 -0
  227. valuesets/schema/slot_mixins.yaml +143 -0
  228. valuesets/schema/social/person_status.yaml +28 -0
  229. valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
  230. valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
  231. valuesets/schema/statistics.yaml +34 -0
  232. valuesets/schema/time/temporal.yaml +435 -0
  233. valuesets/schema/types.yaml +15 -0
  234. valuesets/schema/units/measurements.yaml +675 -0
  235. valuesets/schema/valuesets.yaml +100 -0
  236. valuesets/schema/visual/colors.yaml +778 -0
  237. valuesets/utils/__init__.py +6 -0
  238. valuesets/utils/comparison.py +102 -0
  239. valuesets/utils/expand_dynamic_enums.py +414 -0
  240. valuesets/utils/mapping_utils.py +236 -0
  241. valuesets/validators/__init__.py +11 -0
  242. valuesets/validators/enum_evaluator.py +669 -0
  243. valuesets/validators/oak_config.yaml +70 -0
  244. valuesets/validators/validate_with_ols.py +241 -0
  245. valuesets-0.3.1.dist-info/METADATA +395 -0
  246. valuesets-0.3.1.dist-info/RECORD +248 -0
  247. valuesets-0.3.1.dist-info/WHEEL +4 -0
  248. valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,357 @@
1
+ """
2
+ Sequence Chemistry Value Sets
3
+
4
+ Value sets for nucleic acid and protein sequence chemistry, including
5
+ standard and extended alphabets, quality encodings, and molecular representations
6
+
7
+ Generated from: bio/sequence_chemistry.yaml
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Dict, Any, Optional
13
+ from valuesets.generators.rich_enum import RichEnum
14
+
15
+ class IUPACNucleotideCode(RichEnum):
16
+ """
17
+ Complete IUPAC nucleotide codes including ambiguous bases for DNA/RNA sequences.
18
+ Used in FASTA and other sequence formats to represent uncertain nucleotides.
19
+ """
20
+ # Enum members
21
+ A = "A"
22
+ T = "T"
23
+ U = "U"
24
+ G = "G"
25
+ C = "C"
26
+ R = "R"
27
+ Y = "Y"
28
+ S = "S"
29
+ W = "W"
30
+ K = "K"
31
+ M = "M"
32
+ B = "B"
33
+ D = "D"
34
+ H = "H"
35
+ V = "V"
36
+ N = "N"
37
+ GAP = "GAP"
38
+
39
+ # Set metadata after class creation
40
+ IUPACNucleotideCode._metadata = {
41
+ "A": {'description': 'Adenine'},
42
+ "T": {'description': 'Thymine (DNA)'},
43
+ "U": {'description': 'Uracil (RNA)'},
44
+ "G": {'description': 'Guanine'},
45
+ "C": {'description': 'Cytosine'},
46
+ "R": {'description': 'Purine (A or G)'},
47
+ "Y": {'description': 'Pyrimidine (C or T/U)'},
48
+ "S": {'description': 'Strong interaction (G or C)'},
49
+ "W": {'description': 'Weak interaction (A or T/U)'},
50
+ "K": {'description': 'Keto (G or T/U)'},
51
+ "M": {'description': 'Amino (A or C)'},
52
+ "B": {'description': 'Not A (C or G or T/U)'},
53
+ "D": {'description': 'Not C (A or G or T/U)'},
54
+ "H": {'description': 'Not G (A or C or T/U)'},
55
+ "V": {'description': 'Not T/U (A or C or G)'},
56
+ "N": {'description': 'Any nucleotide (A or C or G or T/U)'},
57
+ "GAP": {'description': 'Gap or deletion in alignment'},
58
+ }
59
+
60
+ class StandardAminoAcid(RichEnum):
61
+ """
62
+ The 20 standard proteinogenic amino acids with IUPAC single-letter codes
63
+ """
64
+ # Enum members
65
+ A = "A"
66
+ R = "R"
67
+ N = "N"
68
+ D = "D"
69
+ C = "C"
70
+ E = "E"
71
+ Q = "Q"
72
+ G = "G"
73
+ H = "H"
74
+ I = "I"
75
+ L = "L"
76
+ K = "K"
77
+ M = "M"
78
+ F = "F"
79
+ P = "P"
80
+ S = "S"
81
+ T = "T"
82
+ W = "W"
83
+ Y = "Y"
84
+ V = "V"
85
+
86
+ # Set metadata after class creation
87
+ StandardAminoAcid._metadata = {
88
+ "A": {'description': 'Alanine'},
89
+ "R": {'description': 'Arginine'},
90
+ "N": {'description': 'Asparagine'},
91
+ "D": {'description': 'Aspartic acid'},
92
+ "C": {'description': 'Cysteine'},
93
+ "E": {'description': 'Glutamic acid'},
94
+ "Q": {'description': 'Glutamine'},
95
+ "G": {'description': 'Glycine'},
96
+ "H": {'description': 'Histidine'},
97
+ "I": {'description': 'Isoleucine'},
98
+ "L": {'description': 'Leucine'},
99
+ "K": {'description': 'Lysine'},
100
+ "M": {'description': 'Methionine'},
101
+ "F": {'description': 'Phenylalanine'},
102
+ "P": {'description': 'Proline'},
103
+ "S": {'description': 'Serine'},
104
+ "T": {'description': 'Threonine'},
105
+ "W": {'description': 'Tryptophan'},
106
+ "Y": {'description': 'Tyrosine'},
107
+ "V": {'description': 'Valine'},
108
+ }
109
+
110
+ class IUPACAminoAcidCode(RichEnum):
111
+ """
112
+ Complete IUPAC amino acid codes including standard amino acids,
113
+ rare amino acids, and ambiguity codes
114
+ """
115
+ # Enum members
116
+ A = "A"
117
+ R = "R"
118
+ N = "N"
119
+ D = "D"
120
+ C = "C"
121
+ E = "E"
122
+ Q = "Q"
123
+ G = "G"
124
+ H = "H"
125
+ I = "I"
126
+ L = "L"
127
+ K = "K"
128
+ M = "M"
129
+ F = "F"
130
+ P = "P"
131
+ S = "S"
132
+ T = "T"
133
+ W = "W"
134
+ Y = "Y"
135
+ V = "V"
136
+ U = "U"
137
+ O = "O"
138
+ B = "B"
139
+ Z = "Z"
140
+ J = "J"
141
+ X = "X"
142
+ STOP = "STOP"
143
+ GAP = "GAP"
144
+
145
+ # Set metadata after class creation
146
+ IUPACAminoAcidCode._metadata = {
147
+ "A": {'description': 'Alanine'},
148
+ "R": {'description': 'Arginine'},
149
+ "N": {'description': 'Asparagine'},
150
+ "D": {'description': 'Aspartic acid'},
151
+ "C": {'description': 'Cysteine'},
152
+ "E": {'description': 'Glutamic acid'},
153
+ "Q": {'description': 'Glutamine'},
154
+ "G": {'description': 'Glycine'},
155
+ "H": {'description': 'Histidine'},
156
+ "I": {'description': 'Isoleucine'},
157
+ "L": {'description': 'Leucine'},
158
+ "K": {'description': 'Lysine'},
159
+ "M": {'description': 'Methionine'},
160
+ "F": {'description': 'Phenylalanine'},
161
+ "P": {'description': 'Proline'},
162
+ "S": {'description': 'Serine'},
163
+ "T": {'description': 'Threonine'},
164
+ "W": {'description': 'Tryptophan'},
165
+ "Y": {'description': 'Tyrosine'},
166
+ "V": {'description': 'Valine'},
167
+ "U": {'description': 'Selenocysteine (21st amino acid)', 'aliases': ['Sec']},
168
+ "O": {'description': 'Pyrrolysine (22nd amino acid)', 'aliases': ['Pyl']},
169
+ "B": {'description': 'Asparagine or Aspartic acid (N or D)'},
170
+ "Z": {'description': 'Glutamine or Glutamic acid (Q or E)'},
171
+ "J": {'description': 'Leucine or Isoleucine (L or I)'},
172
+ "X": {'description': 'Any amino acid'},
173
+ "STOP": {'description': 'Translation stop codon'},
174
+ "GAP": {'description': 'Gap or deletion in alignment'},
175
+ }
176
+
177
+ class SequenceAlphabet(RichEnum):
178
+ """
179
+ Types of sequence alphabets used in bioinformatics
180
+ """
181
+ # Enum members
182
+ DNA = "DNA"
183
+ RNA = "RNA"
184
+ PROTEIN = "PROTEIN"
185
+ IUPAC_DNA = "IUPAC_DNA"
186
+ IUPAC_RNA = "IUPAC_RNA"
187
+ IUPAC_PROTEIN = "IUPAC_PROTEIN"
188
+ RESTRICTED_DNA = "RESTRICTED_DNA"
189
+ RESTRICTED_RNA = "RESTRICTED_RNA"
190
+ BINARY = "BINARY"
191
+
192
+ # Set metadata after class creation
193
+ SequenceAlphabet._metadata = {
194
+ "DNA": {'description': 'Deoxyribonucleic acid alphabet (A, T, G, C)'},
195
+ "RNA": {'description': 'Ribonucleic acid alphabet (A, U, G, C)'},
196
+ "PROTEIN": {'description': 'Protein/amino acid alphabet (20 standard AAs)'},
197
+ "IUPAC_DNA": {'description': 'Extended DNA with IUPAC ambiguity codes'},
198
+ "IUPAC_RNA": {'description': 'Extended RNA with IUPAC ambiguity codes'},
199
+ "IUPAC_PROTEIN": {'description': 'Extended protein with ambiguity codes and rare AAs'},
200
+ "RESTRICTED_DNA": {'description': 'Unambiguous DNA bases only (A, T, G, C)'},
201
+ "RESTRICTED_RNA": {'description': 'Unambiguous RNA bases only (A, U, G, C)'},
202
+ "BINARY": {'description': 'Binary encoding of sequences'},
203
+ }
204
+
205
+ class SequenceQualityEncoding(RichEnum):
206
+ """
207
+ Quality score encoding standards used in FASTQ files and sequencing data.
208
+ Different platforms and software versions use different ASCII offsets.
209
+ """
210
+ # Enum members
211
+ SANGER = "SANGER"
212
+ SOLEXA = "SOLEXA"
213
+ ILLUMINA_1_3 = "ILLUMINA_1_3"
214
+ ILLUMINA_1_5 = "ILLUMINA_1_5"
215
+ ILLUMINA_1_8 = "ILLUMINA_1_8"
216
+
217
+ # Set metadata after class creation
218
+ SequenceQualityEncoding._metadata = {
219
+ "SANGER": {'description': 'Sanger/Phred+33 (PHRED scores, ASCII offset 33)', 'annotations': {'ascii_offset': 33, 'score_range': '0-93', 'platforms': 'NCBI SRA, Illumina 1.8+'}},
220
+ "SOLEXA": {'description': 'Solexa+64 (Solexa scores, ASCII offset 64)', 'annotations': {'ascii_offset': 64, 'score_range': '-5-62', 'platforms': 'Early Solexa/Illumina'}},
221
+ "ILLUMINA_1_3": {'description': 'Illumina 1.3+ (PHRED+64, ASCII offset 64)', 'annotations': {'ascii_offset': 64, 'score_range': '0-62', 'platforms': 'Illumina 1.3-1.7'}},
222
+ "ILLUMINA_1_5": {'description': 'Illumina 1.5+ (PHRED+64, special handling for 0-2)', 'annotations': {'ascii_offset': 64, 'score_range': '3-62', 'platforms': 'Illumina 1.5-1.7'}},
223
+ "ILLUMINA_1_8": {'description': 'Illumina 1.8+ (PHRED+33, modern standard)', 'annotations': {'ascii_offset': 33, 'score_range': '0-41', 'platforms': 'Illumina 1.8+, modern sequencers'}},
224
+ }
225
+
226
+ class GeneticCodeTable(RichEnum):
227
+ """
228
+ NCBI genetic code translation tables for different organisms.
229
+ Table 1 is the universal genetic code used by most organisms.
230
+ """
231
+ # Enum members
232
+ TABLE_1 = "TABLE_1"
233
+ TABLE_2 = "TABLE_2"
234
+ TABLE_3 = "TABLE_3"
235
+ TABLE_4 = "TABLE_4"
236
+ TABLE_5 = "TABLE_5"
237
+ TABLE_6 = "TABLE_6"
238
+ TABLE_9 = "TABLE_9"
239
+ TABLE_10 = "TABLE_10"
240
+ TABLE_11 = "TABLE_11"
241
+ TABLE_12 = "TABLE_12"
242
+ TABLE_13 = "TABLE_13"
243
+ TABLE_14 = "TABLE_14"
244
+ TABLE_16 = "TABLE_16"
245
+ TABLE_21 = "TABLE_21"
246
+ TABLE_22 = "TABLE_22"
247
+ TABLE_23 = "TABLE_23"
248
+ TABLE_24 = "TABLE_24"
249
+ TABLE_25 = "TABLE_25"
250
+ TABLE_26 = "TABLE_26"
251
+ TABLE_27 = "TABLE_27"
252
+ TABLE_28 = "TABLE_28"
253
+ TABLE_29 = "TABLE_29"
254
+ TABLE_30 = "TABLE_30"
255
+ TABLE_31 = "TABLE_31"
256
+
257
+ # Set metadata after class creation
258
+ GeneticCodeTable._metadata = {
259
+ "TABLE_1": {'description': 'Standard genetic code (universal)', 'annotations': {'ncbi_id': 1, 'name': 'Standard'}},
260
+ "TABLE_2": {'description': 'Vertebrate mitochondrial code', 'annotations': {'ncbi_id': 2, 'name': 'Vertebrate Mitochondrial'}},
261
+ "TABLE_3": {'description': 'Yeast mitochondrial code', 'annotations': {'ncbi_id': 3, 'name': 'Yeast Mitochondrial'}},
262
+ "TABLE_4": {'description': 'Mold, protozoan, coelenterate mitochondrial', 'annotations': {'ncbi_id': 4, 'name': 'Mold Mitochondrial'}},
263
+ "TABLE_5": {'description': 'Invertebrate mitochondrial code', 'annotations': {'ncbi_id': 5, 'name': 'Invertebrate Mitochondrial'}},
264
+ "TABLE_6": {'description': 'Ciliate, dasycladacean, hexamita nuclear code', 'annotations': {'ncbi_id': 6, 'name': 'Ciliate Nuclear'}},
265
+ "TABLE_9": {'description': 'Echinoderm and flatworm mitochondrial code', 'annotations': {'ncbi_id': 9, 'name': 'Echinoderm Mitochondrial'}},
266
+ "TABLE_10": {'description': 'Euplotid nuclear code', 'annotations': {'ncbi_id': 10, 'name': 'Euplotid Nuclear'}},
267
+ "TABLE_11": {'description': 'Bacterial, archaeal and plant plastid code', 'annotations': {'ncbi_id': 11, 'name': 'Bacterial'}},
268
+ "TABLE_12": {'description': 'Alternative yeast nuclear code', 'annotations': {'ncbi_id': 12, 'name': 'Alternative Yeast Nuclear'}},
269
+ "TABLE_13": {'description': 'Ascidian mitochondrial code', 'annotations': {'ncbi_id': 13, 'name': 'Ascidian Mitochondrial'}},
270
+ "TABLE_14": {'description': 'Alternative flatworm mitochondrial code', 'annotations': {'ncbi_id': 14, 'name': 'Alternative Flatworm Mitochondrial'}},
271
+ "TABLE_16": {'description': 'Chlorophycean mitochondrial code', 'annotations': {'ncbi_id': 16, 'name': 'Chlorophycean Mitochondrial'}},
272
+ "TABLE_21": {'description': 'Trematode mitochondrial code', 'annotations': {'ncbi_id': 21, 'name': 'Trematode Mitochondrial'}},
273
+ "TABLE_22": {'description': 'Scenedesmus obliquus mitochondrial code', 'annotations': {'ncbi_id': 22, 'name': 'Scenedesmus Mitochondrial'}},
274
+ "TABLE_23": {'description': 'Thraustochytrium mitochondrial code', 'annotations': {'ncbi_id': 23, 'name': 'Thraustochytrium Mitochondrial'}},
275
+ "TABLE_24": {'description': 'Rhabdopleuridae mitochondrial code', 'annotations': {'ncbi_id': 24, 'name': 'Rhabdopleuridae Mitochondrial'}},
276
+ "TABLE_25": {'description': 'Candidate division SR1 and gracilibacteria code', 'annotations': {'ncbi_id': 25, 'name': 'Candidate Division SR1'}},
277
+ "TABLE_26": {'description': 'Pachysolen tannophilus nuclear code', 'annotations': {'ncbi_id': 26, 'name': 'Pachysolen Nuclear'}},
278
+ "TABLE_27": {'description': 'Karyorelict nuclear code', 'annotations': {'ncbi_id': 27, 'name': 'Karyorelict Nuclear'}},
279
+ "TABLE_28": {'description': 'Condylostoma nuclear code', 'annotations': {'ncbi_id': 28, 'name': 'Condylostoma Nuclear'}},
280
+ "TABLE_29": {'description': 'Mesodinium nuclear code', 'annotations': {'ncbi_id': 29, 'name': 'Mesodinium Nuclear'}},
281
+ "TABLE_30": {'description': 'Peritrich nuclear code', 'annotations': {'ncbi_id': 30, 'name': 'Peritrich Nuclear'}},
282
+ "TABLE_31": {'description': 'Blastocrithidia nuclear code', 'annotations': {'ncbi_id': 31, 'name': 'Blastocrithidia Nuclear'}},
283
+ }
284
+
285
+ class SequenceStrand(RichEnum):
286
+ """
287
+ Strand orientation for nucleic acid sequences
288
+ """
289
+ # Enum members
290
+ PLUS = "PLUS"
291
+ MINUS = "MINUS"
292
+ BOTH = "BOTH"
293
+ UNKNOWN = "UNKNOWN"
294
+
295
+ # Set metadata after class creation
296
+ SequenceStrand._metadata = {
297
+ "PLUS": {'description': "Plus/forward/sense strand (5' to 3')"},
298
+ "MINUS": {'description': "Minus/reverse/antisense strand (3' to 5')"},
299
+ "BOTH": {'description': 'Both strands'},
300
+ "UNKNOWN": {'description': 'Strand not specified or unknown'},
301
+ }
302
+
303
+ class SequenceTopology(RichEnum):
304
+ """
305
+ Topological structure of nucleic acid molecules
306
+ """
307
+ # Enum members
308
+ LINEAR = "LINEAR"
309
+ CIRCULAR = "CIRCULAR"
310
+ BRANCHED = "BRANCHED"
311
+ UNKNOWN = "UNKNOWN"
312
+
313
+ # Set metadata after class creation
314
+ SequenceTopology._metadata = {
315
+ "LINEAR": {'description': 'Linear sequence molecule', 'meaning': 'SO:0000987'},
316
+ "CIRCULAR": {'description': 'Circular sequence molecule', 'meaning': 'SO:0000988'},
317
+ "BRANCHED": {'description': 'Branched sequence structure'},
318
+ "UNKNOWN": {'description': 'Topology not specified'},
319
+ }
320
+
321
+ class SequenceModality(RichEnum):
322
+ """
323
+ Types of sequence data based on experimental method
324
+ """
325
+ # Enum members
326
+ SINGLE_CELL = "SINGLE_CELL"
327
+ BULK = "BULK"
328
+ SPATIAL = "SPATIAL"
329
+ LONG_READ = "LONG_READ"
330
+ SHORT_READ = "SHORT_READ"
331
+ PAIRED_END = "PAIRED_END"
332
+ SINGLE_END = "SINGLE_END"
333
+ MATE_PAIR = "MATE_PAIR"
334
+
335
+ # Set metadata after class creation
336
+ SequenceModality._metadata = {
337
+ "SINGLE_CELL": {'description': 'Single-cell sequencing data'},
338
+ "BULK": {'description': 'Bulk/population sequencing data'},
339
+ "SPATIAL": {'description': 'Spatially-resolved sequencing'},
340
+ "LONG_READ": {'description': 'Long-read sequencing (PacBio, Oxford Nanopore)'},
341
+ "SHORT_READ": {'description': 'Short-read sequencing (Illumina)'},
342
+ "PAIRED_END": {'description': 'Paired-end sequencing reads'},
343
+ "SINGLE_END": {'description': 'Single-end sequencing reads'},
344
+ "MATE_PAIR": {'description': 'Mate-pair sequencing libraries'},
345
+ }
346
+
347
+ __all__ = [
348
+ "IUPACNucleotideCode",
349
+ "StandardAminoAcid",
350
+ "IUPACAminoAcidCode",
351
+ "SequenceAlphabet",
352
+ "SequenceQualityEncoding",
353
+ "GeneticCodeTable",
354
+ "SequenceStrand",
355
+ "SequenceTopology",
356
+ "SequenceModality",
357
+ ]