valuesets 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valuesets might be problematic. Click here for more details.

Files changed (248) hide show
  1. valuesets/__init__.py +7 -0
  2. valuesets/_version.py +8 -0
  3. valuesets/datamodel/valuesets.py +13796 -0
  4. valuesets/datamodel/valuesets_dataclass.py +24503 -0
  5. valuesets/datamodel/valuesets_pydantic.py +13796 -0
  6. valuesets/enums/__init__.py +590 -0
  7. valuesets/enums/academic/__init__.py +1 -0
  8. valuesets/enums/academic/research.py +559 -0
  9. valuesets/enums/analytical_chemistry/__init__.py +1 -0
  10. valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
  11. valuesets/enums/bio/__init__.py +1 -0
  12. valuesets/enums/bio/biological_colors.py +238 -0
  13. valuesets/enums/bio/cell_cycle.py +180 -0
  14. valuesets/enums/bio/currency_chemicals.py +52 -0
  15. valuesets/enums/bio/developmental_stages.py +103 -0
  16. valuesets/enums/bio/genome_features.py +182 -0
  17. valuesets/enums/bio/genomics.py +91 -0
  18. valuesets/enums/bio/go_aspect.py +32 -0
  19. valuesets/enums/bio/go_causality.py +58 -0
  20. valuesets/enums/bio/go_evidence.py +129 -0
  21. valuesets/enums/bio/human_developmental_stages.py +62 -0
  22. valuesets/enums/bio/insdc_geographic_locations.py +591 -0
  23. valuesets/enums/bio/insdc_missing_values.py +49 -0
  24. valuesets/enums/bio/lipid_categories.py +67 -0
  25. valuesets/enums/bio/mouse_developmental_stages.py +62 -0
  26. valuesets/enums/bio/plant_biology.py +86 -0
  27. valuesets/enums/bio/plant_developmental_stages.py +54 -0
  28. valuesets/enums/bio/plant_sex.py +81 -0
  29. valuesets/enums/bio/protein_evidence.py +61 -0
  30. valuesets/enums/bio/proteomics_standards.py +123 -0
  31. valuesets/enums/bio/psi_mi.py +306 -0
  32. valuesets/enums/bio/relationship_to_oxygen.py +37 -0
  33. valuesets/enums/bio/sequence_alphabets.py +449 -0
  34. valuesets/enums/bio/sequence_chemistry.py +357 -0
  35. valuesets/enums/bio/sequencing_platforms.py +302 -0
  36. valuesets/enums/bio/structural_biology.py +320 -0
  37. valuesets/enums/bio/taxonomy.py +238 -0
  38. valuesets/enums/bio/trophic_levels.py +85 -0
  39. valuesets/enums/bio/uniprot_species.py +344 -0
  40. valuesets/enums/bio/viral_genome_types.py +47 -0
  41. valuesets/enums/bioprocessing/__init__.py +1 -0
  42. valuesets/enums/bioprocessing/scale_up.py +249 -0
  43. valuesets/enums/business/__init__.py +1 -0
  44. valuesets/enums/business/human_resources.py +275 -0
  45. valuesets/enums/business/industry_classifications.py +181 -0
  46. valuesets/enums/business/management_operations.py +228 -0
  47. valuesets/enums/business/organizational_structures.py +236 -0
  48. valuesets/enums/business/quality_management.py +181 -0
  49. valuesets/enums/business/supply_chain.py +232 -0
  50. valuesets/enums/chemistry/__init__.py +1 -0
  51. valuesets/enums/chemistry/chemical_entities.py +315 -0
  52. valuesets/enums/chemistry/reaction_directionality.py +65 -0
  53. valuesets/enums/chemistry/reactions.py +256 -0
  54. valuesets/enums/clinical/__init__.py +1 -0
  55. valuesets/enums/clinical/nih_demographics.py +177 -0
  56. valuesets/enums/clinical/phenopackets.py +254 -0
  57. valuesets/enums/common_value_sets.py +8791 -0
  58. valuesets/enums/computing/__init__.py +1 -0
  59. valuesets/enums/computing/file_formats.py +294 -0
  60. valuesets/enums/computing/maturity_levels.py +196 -0
  61. valuesets/enums/computing/mime_types.py +227 -0
  62. valuesets/enums/confidence_levels.py +168 -0
  63. valuesets/enums/contributor.py +30 -0
  64. valuesets/enums/core.py +42 -0
  65. valuesets/enums/data/__init__.py +1 -0
  66. valuesets/enums/data/data_absent_reason.py +53 -0
  67. valuesets/enums/data_science/__init__.py +1 -0
  68. valuesets/enums/data_science/binary_classification.py +87 -0
  69. valuesets/enums/data_science/emotion_classification.py +66 -0
  70. valuesets/enums/data_science/priority_severity.py +73 -0
  71. valuesets/enums/data_science/quality_control.py +46 -0
  72. valuesets/enums/data_science/sentiment_analysis.py +50 -0
  73. valuesets/enums/data_science/text_classification.py +97 -0
  74. valuesets/enums/demographics.py +206 -0
  75. valuesets/enums/ecological_interactions.py +151 -0
  76. valuesets/enums/energy/__init__.py +1 -0
  77. valuesets/enums/energy/energy.py +343 -0
  78. valuesets/enums/energy/fossil_fuels.py +29 -0
  79. valuesets/enums/energy/nuclear/__init__.py +1 -0
  80. valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
  81. valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
  82. valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
  83. valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
  84. valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
  85. valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
  86. valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
  87. valuesets/enums/energy/nuclear/reactor_types.py +163 -0
  88. valuesets/enums/environmental_health/__init__.py +1 -0
  89. valuesets/enums/environmental_health/exposures.py +265 -0
  90. valuesets/enums/geography/__init__.py +1 -0
  91. valuesets/enums/geography/geographic_codes.py +741 -0
  92. valuesets/enums/health/__init__.py +12 -0
  93. valuesets/enums/health/vaccination.py +98 -0
  94. valuesets/enums/health.py +36 -0
  95. valuesets/enums/health_base.py +36 -0
  96. valuesets/enums/healthcare.py +45 -0
  97. valuesets/enums/industry/__init__.py +1 -0
  98. valuesets/enums/industry/extractive_industry.py +94 -0
  99. valuesets/enums/industry/mining.py +388 -0
  100. valuesets/enums/industry/safety_colors.py +201 -0
  101. valuesets/enums/investigation.py +27 -0
  102. valuesets/enums/materials_science/__init__.py +1 -0
  103. valuesets/enums/materials_science/characterization_methods.py +112 -0
  104. valuesets/enums/materials_science/crystal_structures.py +76 -0
  105. valuesets/enums/materials_science/material_properties.py +119 -0
  106. valuesets/enums/materials_science/material_types.py +104 -0
  107. valuesets/enums/materials_science/pigments_dyes.py +198 -0
  108. valuesets/enums/materials_science/synthesis_methods.py +109 -0
  109. valuesets/enums/medical/__init__.py +1 -0
  110. valuesets/enums/medical/clinical.py +277 -0
  111. valuesets/enums/medical/neuroimaging.py +119 -0
  112. valuesets/enums/mining_processing.py +302 -0
  113. valuesets/enums/physics/__init__.py +1 -0
  114. valuesets/enums/physics/states_of_matter.py +46 -0
  115. valuesets/enums/social/__init__.py +1 -0
  116. valuesets/enums/social/person_status.py +29 -0
  117. valuesets/enums/spatial/__init__.py +1 -0
  118. valuesets/enums/spatial/spatial_qualifiers.py +246 -0
  119. valuesets/enums/statistics/__init__.py +5 -0
  120. valuesets/enums/statistics/prediction_outcomes.py +31 -0
  121. valuesets/enums/statistics.py +31 -0
  122. valuesets/enums/time/__init__.py +1 -0
  123. valuesets/enums/time/temporal.py +254 -0
  124. valuesets/enums/units/__init__.py +1 -0
  125. valuesets/enums/units/measurements.py +310 -0
  126. valuesets/enums/visual/__init__.py +1 -0
  127. valuesets/enums/visual/colors.py +376 -0
  128. valuesets/generators/__init__.py +19 -0
  129. valuesets/generators/auto_slot_injector.py +280 -0
  130. valuesets/generators/enhanced_pydantic_generator.py +100 -0
  131. valuesets/generators/enum_slot_generator.py +201 -0
  132. valuesets/generators/modular_rich_generator.py +353 -0
  133. valuesets/generators/prefix_standardizer.py +198 -0
  134. valuesets/generators/rich_enum.py +127 -0
  135. valuesets/generators/rich_pydantic_generator.py +310 -0
  136. valuesets/generators/smart_slot_syncer.py +428 -0
  137. valuesets/generators/sssom_generator.py +394 -0
  138. valuesets/merged/merged_hierarchy.yaml +21649 -0
  139. valuesets/schema/README.md +3 -0
  140. valuesets/schema/academic/research.yaml +911 -0
  141. valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
  142. valuesets/schema/bio/bio_entities.yaml +364 -0
  143. valuesets/schema/bio/biological_colors.yaml +434 -0
  144. valuesets/schema/bio/cell_cycle.yaml +309 -0
  145. valuesets/schema/bio/currency_chemicals.yaml +70 -0
  146. valuesets/schema/bio/developmental_stages.yaml +226 -0
  147. valuesets/schema/bio/genome_features.yaml +342 -0
  148. valuesets/schema/bio/genomics.yaml +101 -0
  149. valuesets/schema/bio/go_aspect.yaml +39 -0
  150. valuesets/schema/bio/go_causality.yaml +119 -0
  151. valuesets/schema/bio/go_evidence.yaml +215 -0
  152. valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
  153. valuesets/schema/bio/insdc_missing_values.yaml +85 -0
  154. valuesets/schema/bio/lipid_categories.yaml +72 -0
  155. valuesets/schema/bio/plant_biology.yaml +125 -0
  156. valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
  157. valuesets/schema/bio/plant_sex.yaml +108 -0
  158. valuesets/schema/bio/protein_evidence.yaml +63 -0
  159. valuesets/schema/bio/proteomics_standards.yaml +116 -0
  160. valuesets/schema/bio/psi_mi.yaml +400 -0
  161. valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
  162. valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
  163. valuesets/schema/bio/sequence_chemistry.yaml +477 -0
  164. valuesets/schema/bio/sequencing_platforms.yaml +515 -0
  165. valuesets/schema/bio/structural_biology.yaml +428 -0
  166. valuesets/schema/bio/taxonomy.yaml +453 -0
  167. valuesets/schema/bio/trophic_levels.yaml +118 -0
  168. valuesets/schema/bio/uniprot_species.yaml +1209 -0
  169. valuesets/schema/bio/viral_genome_types.yaml +99 -0
  170. valuesets/schema/bioprocessing/scale_up.yaml +458 -0
  171. valuesets/schema/business/human_resources.yaml +752 -0
  172. valuesets/schema/business/industry_classifications.yaml +448 -0
  173. valuesets/schema/business/management_operations.yaml +602 -0
  174. valuesets/schema/business/organizational_structures.yaml +645 -0
  175. valuesets/schema/business/quality_management.yaml +502 -0
  176. valuesets/schema/business/supply_chain.yaml +688 -0
  177. valuesets/schema/chemistry/chemical_entities.yaml +639 -0
  178. valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
  179. valuesets/schema/chemistry/reactions.yaml +442 -0
  180. valuesets/schema/clinical/nih_demographics.yaml +285 -0
  181. valuesets/schema/clinical/phenopackets.yaml +429 -0
  182. valuesets/schema/computing/file_formats.yaml +631 -0
  183. valuesets/schema/computing/maturity_levels.yaml +229 -0
  184. valuesets/schema/computing/mime_types.yaml +266 -0
  185. valuesets/schema/confidence_levels.yaml +206 -0
  186. valuesets/schema/contributor.yaml +30 -0
  187. valuesets/schema/core.yaml +55 -0
  188. valuesets/schema/data/data_absent_reason.yaml +82 -0
  189. valuesets/schema/data_science/binary_classification.yaml +125 -0
  190. valuesets/schema/data_science/emotion_classification.yaml +109 -0
  191. valuesets/schema/data_science/priority_severity.yaml +122 -0
  192. valuesets/schema/data_science/quality_control.yaml +68 -0
  193. valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
  194. valuesets/schema/data_science/text_classification.yaml +135 -0
  195. valuesets/schema/demographics.yaml +238 -0
  196. valuesets/schema/ecological_interactions.yaml +298 -0
  197. valuesets/schema/energy/energy.yaml +595 -0
  198. valuesets/schema/energy/fossil_fuels.yaml +28 -0
  199. valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
  200. valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
  201. valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
  202. valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
  203. valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
  204. valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
  205. valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
  206. valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
  207. valuesets/schema/environmental_health/exposures.yaml +355 -0
  208. valuesets/schema/generated_slots.yaml +1828 -0
  209. valuesets/schema/geography/geographic_codes.yaml +1018 -0
  210. valuesets/schema/health/vaccination.yaml +102 -0
  211. valuesets/schema/health.yaml +38 -0
  212. valuesets/schema/healthcare.yaml +53 -0
  213. valuesets/schema/industry/extractive_industry.yaml +89 -0
  214. valuesets/schema/industry/mining.yaml +888 -0
  215. valuesets/schema/industry/safety_colors.yaml +375 -0
  216. valuesets/schema/investigation.yaml +64 -0
  217. valuesets/schema/materials_science/characterization_methods.yaml +193 -0
  218. valuesets/schema/materials_science/crystal_structures.yaml +138 -0
  219. valuesets/schema/materials_science/material_properties.yaml +135 -0
  220. valuesets/schema/materials_science/material_types.yaml +151 -0
  221. valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
  222. valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
  223. valuesets/schema/medical/clinical.yaml +610 -0
  224. valuesets/schema/medical/neuroimaging.yaml +325 -0
  225. valuesets/schema/mining_processing.yaml +295 -0
  226. valuesets/schema/physics/states_of_matter.yaml +46 -0
  227. valuesets/schema/slot_mixins.yaml +143 -0
  228. valuesets/schema/social/person_status.yaml +28 -0
  229. valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
  230. valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
  231. valuesets/schema/statistics.yaml +34 -0
  232. valuesets/schema/time/temporal.yaml +435 -0
  233. valuesets/schema/types.yaml +15 -0
  234. valuesets/schema/units/measurements.yaml +675 -0
  235. valuesets/schema/valuesets.yaml +100 -0
  236. valuesets/schema/visual/colors.yaml +778 -0
  237. valuesets/utils/__init__.py +6 -0
  238. valuesets/utils/comparison.py +102 -0
  239. valuesets/utils/expand_dynamic_enums.py +414 -0
  240. valuesets/utils/mapping_utils.py +236 -0
  241. valuesets/validators/__init__.py +11 -0
  242. valuesets/validators/enum_evaluator.py +669 -0
  243. valuesets/validators/oak_config.yaml +70 -0
  244. valuesets/validators/validate_with_ols.py +241 -0
  245. valuesets-0.3.1.dist-info/METADATA +395 -0
  246. valuesets-0.3.1.dist-info/RECORD +248 -0
  247. valuesets-0.3.1.dist-info/WHEEL +4 -0
  248. valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,302 @@
1
+ """
2
+ Sequencing Platform Value Sets
3
+
4
+ Value sets for DNA/RNA sequencing platforms, technologies, and methodologies
5
+
6
+ Generated from: bio/sequencing_platforms.yaml
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Dict, Any, Optional
12
+ from valuesets.generators.rich_enum import RichEnum
13
+
14
+ class SequencingPlatform(RichEnum):
15
+ """
16
+ Major DNA/RNA sequencing platforms and instruments used in genomics research
17
+ """
18
+ # Enum members
19
+ ILLUMINA_HISEQ_2000 = "ILLUMINA_HISEQ_2000"
20
+ ILLUMINA_HISEQ_2500 = "ILLUMINA_HISEQ_2500"
21
+ ILLUMINA_HISEQ_3000 = "ILLUMINA_HISEQ_3000"
22
+ ILLUMINA_HISEQ_4000 = "ILLUMINA_HISEQ_4000"
23
+ ILLUMINA_HISEQ_X = "ILLUMINA_HISEQ_X"
24
+ ILLUMINA_NOVASEQ_6000 = "ILLUMINA_NOVASEQ_6000"
25
+ ILLUMINA_NEXTSEQ_500 = "ILLUMINA_NEXTSEQ_500"
26
+ ILLUMINA_NEXTSEQ_550 = "ILLUMINA_NEXTSEQ_550"
27
+ ILLUMINA_NEXTSEQ_1000 = "ILLUMINA_NEXTSEQ_1000"
28
+ ILLUMINA_NEXTSEQ_2000 = "ILLUMINA_NEXTSEQ_2000"
29
+ ILLUMINA_MISEQ = "ILLUMINA_MISEQ"
30
+ ILLUMINA_ISEQ_100 = "ILLUMINA_ISEQ_100"
31
+ PACBIO_RS = "PACBIO_RS"
32
+ PACBIO_RS_II = "PACBIO_RS_II"
33
+ PACBIO_SEQUEL = "PACBIO_SEQUEL"
34
+ PACBIO_SEQUEL_II = "PACBIO_SEQUEL_II"
35
+ PACBIO_REVIO = "PACBIO_REVIO"
36
+ NANOPORE_MINION = "NANOPORE_MINION"
37
+ NANOPORE_GRIDION = "NANOPORE_GRIDION"
38
+ NANOPORE_PROMETHION = "NANOPORE_PROMETHION"
39
+ NANOPORE_FLONGLE = "NANOPORE_FLONGLE"
40
+ ELEMENT_AVITI = "ELEMENT_AVITI"
41
+ MGI_DNBSEQ_T7 = "MGI_DNBSEQ_T7"
42
+ MGI_DNBSEQ_G400 = "MGI_DNBSEQ_G400"
43
+ MGI_DNBSEQ_G50 = "MGI_DNBSEQ_G50"
44
+ SANGER_SEQUENCING = "SANGER_SEQUENCING"
45
+ ROCHE_454_GS = "ROCHE_454_GS"
46
+ LIFE_TECHNOLOGIES_ION_TORRENT = "LIFE_TECHNOLOGIES_ION_TORRENT"
47
+ ABI_SOLID = "ABI_SOLID"
48
+
49
+ # Set metadata after class creation
50
+ SequencingPlatform._metadata = {
51
+ "ILLUMINA_HISEQ_2000": {'description': 'Illumina HiSeq 2000', 'meaning': 'OBI:0002001', 'annotations': {'manufacturer': 'Illumina', 'read_type': 'short', 'chemistry': 'sequencing by synthesis'}},
52
+ "ILLUMINA_HISEQ_2500": {'description': 'Illumina HiSeq 2500', 'meaning': 'OBI:0002002', 'annotations': {'manufacturer': 'Illumina', 'read_type': 'short', 'chemistry': 'sequencing by synthesis'}},
53
+ "ILLUMINA_HISEQ_3000": {'description': 'Illumina HiSeq 3000', 'meaning': 'OBI:0002048', 'annotations': {'manufacturer': 'Illumina', 'read_type': 'short', 'chemistry': 'sequencing by synthesis'}},
54
+ "ILLUMINA_HISEQ_4000": {'description': 'Illumina HiSeq 4000', 'meaning': 'OBI:0002049', 'annotations': {'manufacturer': 'Illumina', 'read_type': 'short', 'chemistry': 'sequencing by synthesis'}},
55
+ "ILLUMINA_HISEQ_X": {'description': 'Illumina HiSeq X', 'meaning': 'OBI:0002129', 'annotations': {'manufacturer': 'Illumina', 'read_type': 'short', 'chemistry': 'sequencing by synthesis'}, 'aliases': ['Illumina HiSeq X Ten']},
56
+ "ILLUMINA_NOVASEQ_6000": {'description': 'Illumina NovaSeq 6000', 'meaning': 'OBI:0002630', 'annotations': {'manufacturer': 'Illumina', 'read_type': 'short', 'chemistry': 'sequencing by synthesis'}},
57
+ "ILLUMINA_NEXTSEQ_500": {'description': 'Illumina NextSeq 500', 'meaning': 'OBI:0002021', 'annotations': {'manufacturer': 'Illumina', 'read_type': 'short', 'chemistry': 'sequencing by synthesis'}},
58
+ "ILLUMINA_NEXTSEQ_550": {'description': 'Illumina NextSeq 550', 'meaning': 'OBI:0003387', 'annotations': {'manufacturer': 'Illumina', 'read_type': 'short', 'chemistry': 'sequencing by synthesis'}},
59
+ "ILLUMINA_NEXTSEQ_1000": {'description': 'Illumina NextSeq 1000', 'annotations': {'manufacturer': 'Illumina', 'read_type': 'short', 'chemistry': 'sequencing by synthesis'}},
60
+ "ILLUMINA_NEXTSEQ_2000": {'description': 'Illumina NextSeq 2000', 'annotations': {'manufacturer': 'Illumina', 'read_type': 'short', 'chemistry': 'sequencing by synthesis'}},
61
+ "ILLUMINA_MISEQ": {'description': 'Illumina MiSeq', 'meaning': 'OBI:0002003', 'annotations': {'manufacturer': 'Illumina', 'read_type': 'short', 'chemistry': 'sequencing by synthesis'}},
62
+ "ILLUMINA_ISEQ_100": {'description': 'Illumina iSeq 100', 'annotations': {'manufacturer': 'Illumina', 'read_type': 'short', 'chemistry': 'sequencing by synthesis'}},
63
+ "PACBIO_RS": {'description': 'PacBio RS', 'annotations': {'manufacturer': 'Pacific Biosciences', 'read_type': 'long', 'chemistry': 'single molecule real time'}},
64
+ "PACBIO_RS_II": {'description': 'PacBio RS II', 'meaning': 'OBI:0002012', 'annotations': {'manufacturer': 'Pacific Biosciences', 'read_type': 'long', 'chemistry': 'single molecule real time'}},
65
+ "PACBIO_SEQUEL": {'description': 'PacBio Sequel', 'meaning': 'OBI:0002632', 'annotations': {'manufacturer': 'Pacific Biosciences', 'read_type': 'long', 'chemistry': 'single molecule real time'}},
66
+ "PACBIO_SEQUEL_II": {'description': 'PacBio Sequel II', 'meaning': 'OBI:0002633', 'annotations': {'manufacturer': 'Pacific Biosciences', 'read_type': 'long', 'chemistry': 'single molecule real time'}},
67
+ "PACBIO_REVIO": {'description': 'PacBio Revio', 'annotations': {'manufacturer': 'Pacific Biosciences', 'read_type': 'long', 'chemistry': 'single molecule real time'}},
68
+ "NANOPORE_MINION": {'description': 'Oxford Nanopore MinION', 'meaning': 'OBI:0002750', 'annotations': {'manufacturer': 'Oxford Nanopore Technologies', 'read_type': 'long', 'chemistry': 'nanopore sequencing'}, 'aliases': ['Oxford Nanopore MinION']},
69
+ "NANOPORE_GRIDION": {'description': 'Oxford Nanopore GridION', 'meaning': 'OBI:0002751', 'annotations': {'manufacturer': 'Oxford Nanopore Technologies', 'read_type': 'long', 'chemistry': 'nanopore sequencing'}, 'aliases': ['Oxford Nanopore GridION Mk1']},
70
+ "NANOPORE_PROMETHION": {'description': 'Oxford Nanopore PromethION', 'meaning': 'OBI:0002752', 'annotations': {'manufacturer': 'Oxford Nanopore Technologies', 'read_type': 'long', 'chemistry': 'nanopore sequencing'}, 'aliases': ['Oxford Nanopore PromethION']},
71
+ "NANOPORE_FLONGLE": {'description': 'Oxford Nanopore Flongle', 'annotations': {'manufacturer': 'Oxford Nanopore Technologies', 'read_type': 'long', 'chemistry': 'nanopore sequencing'}},
72
+ "ELEMENT_AVITI": {'description': 'Element Biosciences AVITI', 'annotations': {'manufacturer': 'Element Biosciences', 'read_type': 'short', 'chemistry': 'sequencing by avidity'}},
73
+ "MGI_DNBSEQ_T7": {'description': 'MGI DNBSEQ-T7', 'annotations': {'manufacturer': 'MGI/BGI', 'read_type': 'short', 'chemistry': 'DNA nanoball sequencing'}},
74
+ "MGI_DNBSEQ_G400": {'description': 'MGI DNBSEQ-G400', 'annotations': {'manufacturer': 'MGI/BGI', 'read_type': 'short', 'chemistry': 'DNA nanoball sequencing'}},
75
+ "MGI_DNBSEQ_G50": {'description': 'MGI DNBSEQ-G50', 'annotations': {'manufacturer': 'MGI/BGI', 'read_type': 'short', 'chemistry': 'DNA nanoball sequencing'}},
76
+ "SANGER_SEQUENCING": {'description': 'Sanger chain termination sequencing', 'meaning': 'OBI:0000695', 'annotations': {'manufacturer': 'Various', 'read_type': 'short', 'chemistry': 'chain termination'}, 'aliases': ['chain termination sequencing assay']},
77
+ "ROCHE_454_GS": {'description': 'Roche 454 Genome Sequencer', 'meaning': 'OBI:0000702', 'annotations': {'manufacturer': 'Roche/454', 'read_type': 'short', 'chemistry': 'pyrosequencing', 'status': 'discontinued'}, 'aliases': ['454 Genome Sequencer FLX']},
78
+ "LIFE_TECHNOLOGIES_ION_TORRENT": {'description': 'Life Technologies Ion Torrent', 'annotations': {'manufacturer': 'Life Technologies/Thermo Fisher', 'read_type': 'short', 'chemistry': 'semiconductor sequencing'}},
79
+ "ABI_SOLID": {'description': 'ABI SOLiD', 'annotations': {'manufacturer': 'Life Technologies/Applied Biosystems', 'read_type': 'short', 'chemistry': 'sequencing by ligation', 'status': 'discontinued'}},
80
+ }
81
+
82
+ class SequencingChemistry(RichEnum):
83
+ """
84
+ Fundamental chemical methods used for DNA/RNA sequencing
85
+ """
86
+ # Enum members
87
+ SEQUENCING_BY_SYNTHESIS = "SEQUENCING_BY_SYNTHESIS"
88
+ SINGLE_MOLECULE_REAL_TIME = "SINGLE_MOLECULE_REAL_TIME"
89
+ NANOPORE_SEQUENCING = "NANOPORE_SEQUENCING"
90
+ PYROSEQUENCING = "PYROSEQUENCING"
91
+ SEQUENCING_BY_LIGATION = "SEQUENCING_BY_LIGATION"
92
+ CHAIN_TERMINATION = "CHAIN_TERMINATION"
93
+ SEMICONDUCTOR_SEQUENCING = "SEMICONDUCTOR_SEQUENCING"
94
+ DNA_NANOBALL_SEQUENCING = "DNA_NANOBALL_SEQUENCING"
95
+ SEQUENCING_BY_AVIDITY = "SEQUENCING_BY_AVIDITY"
96
+
97
+ # Set metadata after class creation
98
+ SequencingChemistry._metadata = {
99
+ "SEQUENCING_BY_SYNTHESIS": {'description': 'Sequencing by synthesis (Illumina)', 'meaning': 'OBI:0000734', 'aliases': ['DNA sequencing by synthesis assay']},
100
+ "SINGLE_MOLECULE_REAL_TIME": {'description': 'Single molecule real-time sequencing (PacBio)'},
101
+ "NANOPORE_SEQUENCING": {'description': 'Nanopore sequencing (Oxford Nanopore)'},
102
+ "PYROSEQUENCING": {'description': 'Pyrosequencing (454)'},
103
+ "SEQUENCING_BY_LIGATION": {'description': 'Sequencing by ligation (SOLiD)', 'meaning': 'OBI:0000723', 'aliases': ['DNA sequencing by ligation assay']},
104
+ "CHAIN_TERMINATION": {'description': 'Chain termination method (Sanger)', 'meaning': 'OBI:0000695', 'aliases': ['chain termination sequencing assay']},
105
+ "SEMICONDUCTOR_SEQUENCING": {'description': 'Semiconductor/Ion semiconductor sequencing'},
106
+ "DNA_NANOBALL_SEQUENCING": {'description': 'DNA nanoball sequencing (MGI/BGI)'},
107
+ "SEQUENCING_BY_AVIDITY": {'description': 'Sequencing by avidity (Element Biosciences)'},
108
+ }
109
+
110
+ class LibraryPreparation(RichEnum):
111
+ """
112
+ Methods for preparing sequencing libraries from nucleic acid samples
113
+ """
114
+ # Enum members
115
+ GENOMIC_DNA = "GENOMIC_DNA"
116
+ WHOLE_GENOME_AMPLIFICATION = "WHOLE_GENOME_AMPLIFICATION"
117
+ PCR_AMPLICON = "PCR_AMPLICON"
118
+ RNA_SEQ = "RNA_SEQ"
119
+ SMALL_RNA_SEQ = "SMALL_RNA_SEQ"
120
+ SINGLE_CELL_RNA_SEQ = "SINGLE_CELL_RNA_SEQ"
121
+ ATAC_SEQ = "ATAC_SEQ"
122
+ CHIP_SEQ = "CHIP_SEQ"
123
+ BISULFITE_SEQ = "BISULFITE_SEQ"
124
+ HI_C = "HI_C"
125
+ CUT_AND_RUN = "CUT_AND_RUN"
126
+ CUT_AND_TAG = "CUT_AND_TAG"
127
+ CAPTURE_SEQUENCING = "CAPTURE_SEQUENCING"
128
+ EXOME_SEQUENCING = "EXOME_SEQUENCING"
129
+ METAGENOMICS = "METAGENOMICS"
130
+ AMPLICON_SEQUENCING = "AMPLICON_SEQUENCING"
131
+ DIRECT_RNA = "DIRECT_RNA"
132
+ CDNA_SEQUENCING = "CDNA_SEQUENCING"
133
+ RIBOSOME_PROFILING = "RIBOSOME_PROFILING"
134
+
135
+ # Set metadata after class creation
136
+ LibraryPreparation._metadata = {
137
+ "GENOMIC_DNA": {'description': 'Genomic DNA library preparation'},
138
+ "WHOLE_GENOME_AMPLIFICATION": {'description': 'Whole genome amplification (WGA)'},
139
+ "PCR_AMPLICON": {'description': 'PCR amplicon sequencing'},
140
+ "RNA_SEQ": {'description': 'RNA sequencing library prep'},
141
+ "SMALL_RNA_SEQ": {'description': 'Small RNA sequencing'},
142
+ "SINGLE_CELL_RNA_SEQ": {'description': 'Single-cell RNA sequencing'},
143
+ "ATAC_SEQ": {'description': 'ATAC-seq (chromatin accessibility)'},
144
+ "CHIP_SEQ": {'description': 'ChIP-seq (chromatin immunoprecipitation)'},
145
+ "BISULFITE_SEQ": {'description': 'Bisulfite sequencing (methylation)'},
146
+ "HI_C": {'description': 'Hi-C (chromosome conformation capture)'},
147
+ "CUT_AND_RUN": {'description': 'CUT&RUN (chromatin profiling)'},
148
+ "CUT_AND_TAG": {'description': 'CUT&Tag (chromatin profiling)'},
149
+ "CAPTURE_SEQUENCING": {'description': 'Target capture/enrichment sequencing'},
150
+ "EXOME_SEQUENCING": {'description': 'Whole exome sequencing'},
151
+ "METAGENOMICS": {'description': 'Metagenomic sequencing'},
152
+ "AMPLICON_SEQUENCING": {'description': '16S/ITS amplicon sequencing'},
153
+ "DIRECT_RNA": {'description': 'Direct RNA sequencing (nanopore)'},
154
+ "CDNA_SEQUENCING": {'description': 'cDNA sequencing'},
155
+ "RIBOSOME_PROFILING": {'description': 'Ribosome profiling (Ribo-seq)'},
156
+ }
157
+
158
+ class SequencingApplication(RichEnum):
159
+ """
160
+ Primary applications or assays using DNA/RNA sequencing
161
+ """
162
+ # Enum members
163
+ WHOLE_GENOME_SEQUENCING = "WHOLE_GENOME_SEQUENCING"
164
+ WHOLE_EXOME_SEQUENCING = "WHOLE_EXOME_SEQUENCING"
165
+ TRANSCRIPTOME_SEQUENCING = "TRANSCRIPTOME_SEQUENCING"
166
+ TARGETED_SEQUENCING = "TARGETED_SEQUENCING"
167
+ EPIGENOMICS = "EPIGENOMICS"
168
+ METAGENOMICS = "METAGENOMICS"
169
+ SINGLE_CELL_GENOMICS = "SINGLE_CELL_GENOMICS"
170
+ SINGLE_CELL_TRANSCRIPTOMICS = "SINGLE_CELL_TRANSCRIPTOMICS"
171
+ CHROMATIN_IMMUNOPRECIPITATION = "CHROMATIN_IMMUNOPRECIPITATION"
172
+ CHROMATIN_ACCESSIBILITY = "CHROMATIN_ACCESSIBILITY"
173
+ DNA_METHYLATION = "DNA_METHYLATION"
174
+ CHROMOSOME_CONFORMATION = "CHROMOSOME_CONFORMATION"
175
+ VARIANT_CALLING = "VARIANT_CALLING"
176
+ PHARMACOGENOMICS = "PHARMACOGENOMICS"
177
+ CLINICAL_DIAGNOSTICS = "CLINICAL_DIAGNOSTICS"
178
+ POPULATION_GENOMICS = "POPULATION_GENOMICS"
179
+
180
+ # Set metadata after class creation
181
+ SequencingApplication._metadata = {
182
+ "WHOLE_GENOME_SEQUENCING": {'description': 'Whole genome sequencing (WGS)', 'meaning': 'EDAM:topic_3673'},
183
+ "WHOLE_EXOME_SEQUENCING": {'description': 'Whole exome sequencing (WES)', 'meaning': 'EDAM:topic_3676', 'aliases': ['Exome sequencing']},
184
+ "TRANSCRIPTOME_SEQUENCING": {'description': 'RNA sequencing (RNA-seq)', 'meaning': 'EDAM:topic_3170', 'aliases': ['RNA-Seq']},
185
+ "TARGETED_SEQUENCING": {'description': 'Targeted gene panel sequencing'},
186
+ "EPIGENOMICS": {'description': 'Epigenomic profiling'},
187
+ "METAGENOMICS": {'description': 'Metagenomic sequencing', 'meaning': 'EDAM:topic_3837', 'aliases': ['Metagenomic sequencing']},
188
+ "SINGLE_CELL_GENOMICS": {'description': 'Single-cell genomics'},
189
+ "SINGLE_CELL_TRANSCRIPTOMICS": {'description': 'Single-cell transcriptomics', 'meaning': 'EDAM:topic_4028', 'aliases': ['Single-cell sequencing']},
190
+ "CHROMATIN_IMMUNOPRECIPITATION": {'description': 'ChIP-seq', 'meaning': 'EDAM:topic_3656', 'aliases': ['Immunoprecipitation experiment']},
191
+ "CHROMATIN_ACCESSIBILITY": {'description': 'ATAC-seq/FAIRE-seq'},
192
+ "DNA_METHYLATION": {'description': 'Bisulfite/methylation sequencing'},
193
+ "CHROMOSOME_CONFORMATION": {'description': 'Hi-C/3C-seq'},
194
+ "VARIANT_CALLING": {'description': 'Genetic variant discovery'},
195
+ "PHARMACOGENOMICS": {'description': 'Pharmacogenomic sequencing'},
196
+ "CLINICAL_DIAGNOSTICS": {'description': 'Clinical diagnostic sequencing'},
197
+ "POPULATION_GENOMICS": {'description': 'Population-scale genomics'},
198
+ }
199
+
200
+ class ReadType(RichEnum):
201
+ """
202
+ Configuration of sequencing reads generated by different platforms
203
+ """
204
+ # Enum members
205
+ SINGLE_END = "SINGLE_END"
206
+ PAIRED_END = "PAIRED_END"
207
+ MATE_PAIR = "MATE_PAIR"
208
+ LONG_READ = "LONG_READ"
209
+ ULTRA_LONG_READ = "ULTRA_LONG_READ"
210
+ CONTINUOUS_LONG_READ = "CONTINUOUS_LONG_READ"
211
+
212
+ # Set metadata after class creation
213
+ ReadType._metadata = {
214
+ "SINGLE_END": {'description': 'Single-end reads'},
215
+ "PAIRED_END": {'description': 'Paired-end reads'},
216
+ "MATE_PAIR": {'description': 'Mate-pair reads (large insert)'},
217
+ "LONG_READ": {'description': 'Long reads (>1kb typical)'},
218
+ "ULTRA_LONG_READ": {'description': 'Ultra-long reads (>10kb)'},
219
+ "CONTINUOUS_LONG_READ": {'description': 'Continuous long reads (nanopore)'},
220
+ }
221
+
222
+ class SequenceFileFormat(RichEnum):
223
+ """
224
+ Standard file formats used for storing sequence data
225
+ """
226
+ # Enum members
227
+ FASTA = "FASTA"
228
+ FASTQ = "FASTQ"
229
+ SAM = "SAM"
230
+ BAM = "BAM"
231
+ CRAM = "CRAM"
232
+ VCF = "VCF"
233
+ BCF = "BCF"
234
+ GFF3 = "GFF3"
235
+ GTF = "GTF"
236
+ BED = "BED"
237
+ BIGWIG = "BIGWIG"
238
+ BIGBED = "BIGBED"
239
+ HDF5 = "HDF5"
240
+ SFF = "SFF"
241
+ FAST5 = "FAST5"
242
+ POD5 = "POD5"
243
+
244
+ # Set metadata after class creation
245
+ SequenceFileFormat._metadata = {
246
+ "FASTA": {'description': 'FASTA sequence format', 'meaning': 'EDAM:format_1929', 'annotations': {'extensions': '.fa, .fasta, .fna, .ffn, .faa, .frn', 'content': 'sequences only'}},
247
+ "FASTQ": {'description': 'FASTQ sequence with quality format', 'meaning': 'EDAM:format_1930', 'annotations': {'extensions': '.fq, .fastq', 'content': 'sequences and quality scores'}},
248
+ "SAM": {'description': 'Sequence Alignment Map format', 'meaning': 'EDAM:format_2573', 'annotations': {'extensions': '.sam', 'content': 'aligned sequences (text)'}},
249
+ "BAM": {'description': 'Binary Alignment Map format', 'meaning': 'EDAM:format_2572', 'annotations': {'extensions': '.bam', 'content': 'aligned sequences (binary)'}},
250
+ "CRAM": {'description': 'Compressed Reference-oriented Alignment Map', 'annotations': {'extensions': '.cram', 'content': 'compressed aligned sequences'}},
251
+ "VCF": {'description': 'Variant Call Format', 'meaning': 'EDAM:format_3016', 'annotations': {'extensions': '.vcf', 'content': 'genetic variants'}},
252
+ "BCF": {'description': 'Binary Variant Call Format', 'meaning': 'EDAM:format_3020', 'annotations': {'extensions': '.bcf', 'content': 'genetic variants (binary)'}},
253
+ "GFF3": {'description': 'Generic Feature Format version 3', 'annotations': {'extensions': '.gff, .gff3', 'content': 'genomic annotations'}},
254
+ "GTF": {'description': 'Gene Transfer Format', 'annotations': {'extensions': '.gtf', 'content': 'gene annotations'}},
255
+ "BED": {'description': 'Browser Extensible Data format', 'annotations': {'extensions': '.bed', 'content': 'genomic intervals'}},
256
+ "BIGWIG": {'description': 'BigWig format for continuous data', 'annotations': {'extensions': '.bw, .bigwig', 'content': 'continuous genomic data'}},
257
+ "BIGBED": {'description': 'BigBed format for interval data', 'annotations': {'extensions': '.bb, .bigbed', 'content': 'genomic intervals (indexed)'}},
258
+ "HDF5": {'description': 'Hierarchical Data Format 5', 'annotations': {'extensions': '.h5, .hdf5', 'content': 'multi-dimensional arrays'}},
259
+ "SFF": {'description': 'Standard Flowgram Format (454)', 'meaning': 'EDAM:format_3284', 'annotations': {'extensions': '.sff', 'content': '454 sequencing data', 'status': 'legacy'}},
260
+ "FAST5": {'description': 'Fast5 format (Oxford Nanopore)', 'annotations': {'extensions': '.fast5', 'content': 'nanopore raw signal data'}},
261
+ "POD5": {'description': 'POD5 format (Oxford Nanopore, newer)', 'annotations': {'extensions': '.pod5', 'content': 'nanopore raw signal data (compressed)'}},
262
+ }
263
+
264
+ class DataProcessingLevel(RichEnum):
265
+ """
266
+ Levels of processing applied to raw sequencing data
267
+ """
268
+ # Enum members
269
+ RAW = "RAW"
270
+ QUALITY_FILTERED = "QUALITY_FILTERED"
271
+ TRIMMED = "TRIMMED"
272
+ ALIGNED = "ALIGNED"
273
+ DEDUPLICATED = "DEDUPLICATED"
274
+ RECALIBRATED = "RECALIBRATED"
275
+ VARIANT_CALLED = "VARIANT_CALLED"
276
+ NORMALIZED = "NORMALIZED"
277
+ ASSEMBLED = "ASSEMBLED"
278
+ ANNOTATED = "ANNOTATED"
279
+
280
+ # Set metadata after class creation
281
+ DataProcessingLevel._metadata = {
282
+ "RAW": {'description': 'Raw unprocessed sequencing reads'},
283
+ "QUALITY_FILTERED": {'description': 'Quality filtered reads'},
284
+ "TRIMMED": {'description': 'Adapter/quality trimmed reads'},
285
+ "ALIGNED": {'description': 'Aligned to reference genome'},
286
+ "DEDUPLICATED": {'description': 'PCR duplicates removed'},
287
+ "RECALIBRATED": {'description': 'Base quality score recalibrated'},
288
+ "VARIANT_CALLED": {'description': 'Variants called from alignments'},
289
+ "NORMALIZED": {'description': 'Expression normalized (RNA-seq)'},
290
+ "ASSEMBLED": {'description': 'De novo assembled sequences'},
291
+ "ANNOTATED": {'description': 'Functionally annotated sequences'},
292
+ }
293
+
294
+ __all__ = [
295
+ "SequencingPlatform",
296
+ "SequencingChemistry",
297
+ "LibraryPreparation",
298
+ "SequencingApplication",
299
+ "ReadType",
300
+ "SequenceFileFormat",
301
+ "DataProcessingLevel",
302
+ ]
@@ -0,0 +1,320 @@
1
+ """
2
+ Structural Biology Value Sets
3
+
4
+ Value sets for structural biology techniques, including cryo-EM, X-ray crystallography, SAXS/SANS, mass spectrometry, and related sample preparation and data processing methods.
5
+
6
+
7
+ Generated from: bio/structural_biology.yaml
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Dict, Any, Optional
13
+ from valuesets.generators.rich_enum import RichEnum
14
+
15
+ class SampleType(RichEnum):
16
+ """
17
+ Types of biological samples used in structural biology
18
+ """
19
+ # Enum members
20
+ PROTEIN = "PROTEIN"
21
+ NUCLEIC_ACID = "NUCLEIC_ACID"
22
+ PROTEIN_COMPLEX = "PROTEIN_COMPLEX"
23
+ MEMBRANE_PROTEIN = "MEMBRANE_PROTEIN"
24
+ VIRUS = "VIRUS"
25
+ ORGANELLE = "ORGANELLE"
26
+ CELL = "CELL"
27
+ TISSUE = "TISSUE"
28
+
29
+ # Set metadata after class creation
30
+ SampleType._metadata = {
31
+ "PROTEIN": {'description': 'Purified protein sample'},
32
+ "NUCLEIC_ACID": {'description': 'Nucleic acid sample (DNA or RNA)'},
33
+ "PROTEIN_COMPLEX": {'description': 'Protein-protein or protein-nucleic acid complex'},
34
+ "MEMBRANE_PROTEIN": {'description': 'Membrane-associated protein sample'},
35
+ "VIRUS": {'description': 'Viral particle or capsid'},
36
+ "ORGANELLE": {'description': 'Cellular organelle (mitochondria, chloroplast, etc.)'},
37
+ "CELL": {'description': 'Whole cell sample'},
38
+ "TISSUE": {'description': 'Tissue sample'},
39
+ }
40
+
41
+ class StructuralBiologyTechnique(RichEnum):
42
+ """
43
+ Structural biology experimental techniques
44
+ """
45
+ # Enum members
46
+ CRYO_EM = "CRYO_EM"
47
+ CRYO_ET = "CRYO_ET"
48
+ X_RAY_CRYSTALLOGRAPHY = "X_RAY_CRYSTALLOGRAPHY"
49
+ NEUTRON_CRYSTALLOGRAPHY = "NEUTRON_CRYSTALLOGRAPHY"
50
+ SAXS = "SAXS"
51
+ SANS = "SANS"
52
+ WAXS = "WAXS"
53
+ NMR = "NMR"
54
+ MASS_SPECTROMETRY = "MASS_SPECTROMETRY"
55
+ NEGATIVE_STAIN_EM = "NEGATIVE_STAIN_EM"
56
+
57
+ # Set metadata after class creation
58
+ StructuralBiologyTechnique._metadata = {
59
+ "CRYO_EM": {'description': 'Cryo-electron microscopy', 'meaning': 'CHMO:0002413', 'annotations': {'resolution_range': '2-30 Å typical', 'aliases': 'cryoEM, electron cryo-microscopy'}},
60
+ "CRYO_ET": {'description': 'Cryo-electron tomography', 'annotations': {'resolution_range': '20-100 Å typical', 'aliases': 'cryoET, electron cryo-tomography'}},
61
+ "X_RAY_CRYSTALLOGRAPHY": {'description': 'X-ray crystallography', 'meaning': 'CHMO:0000159', 'annotations': {'resolution_range': '1-4 Å typical', 'aliases': 'XRC, macromolecular crystallography'}},
62
+ "NEUTRON_CRYSTALLOGRAPHY": {'description': 'Neutron crystallography', 'annotations': {'advantages': 'hydrogen positions, deuteration studies'}},
63
+ "SAXS": {'description': 'Small-angle X-ray scattering', 'meaning': 'CHMO:0000204', 'annotations': {'information': 'low-resolution structure, conformational changes'}},
64
+ "SANS": {'description': 'Small-angle neutron scattering', 'annotations': {'advantages': 'contrast variation with deuteration'}},
65
+ "WAXS": {'description': 'Wide-angle X-ray scattering'},
66
+ "NMR": {'description': 'Nuclear magnetic resonance spectroscopy', 'meaning': 'CHMO:0000591', 'annotations': {'information': 'solution structure, dynamics'}},
67
+ "MASS_SPECTROMETRY": {'description': 'Mass spectrometry', 'meaning': 'CHMO:0000470', 'annotations': {'applications': 'native MS, crosslinking, HDX'}},
68
+ "NEGATIVE_STAIN_EM": {'description': 'Negative stain electron microscopy', 'annotations': {'resolution_range': '15-30 Å typical'}},
69
+ }
70
+
71
+ class CryoEMPreparationType(RichEnum):
72
+ """
73
+ Types of cryo-EM sample preparation
74
+ """
75
+ # Enum members
76
+ VITREOUS_ICE = "VITREOUS_ICE"
77
+ CRYO_SECTIONING = "CRYO_SECTIONING"
78
+ FREEZE_SUBSTITUTION = "FREEZE_SUBSTITUTION"
79
+ HIGH_PRESSURE_FREEZING = "HIGH_PRESSURE_FREEZING"
80
+
81
+ # Set metadata after class creation
82
+ CryoEMPreparationType._metadata = {
83
+ "VITREOUS_ICE": {'description': 'Sample embedded in vitreous ice'},
84
+ "CRYO_SECTIONING": {'description': 'Cryo-sectioned sample'},
85
+ "FREEZE_SUBSTITUTION": {'description': 'Freeze-substituted sample'},
86
+ "HIGH_PRESSURE_FREEZING": {'description': 'High-pressure frozen sample'},
87
+ }
88
+
89
+ class CryoEMGridType(RichEnum):
90
+ """
91
+ Types of electron microscopy grids
92
+ """
93
+ # Enum members
94
+ C_FLAT = "C_FLAT"
95
+ QUANTIFOIL = "QUANTIFOIL"
96
+ LACEY_CARBON = "LACEY_CARBON"
97
+ ULTRATHIN_CARBON = "ULTRATHIN_CARBON"
98
+ GOLD_GRID = "GOLD_GRID"
99
+ GRAPHENE_OXIDE = "GRAPHENE_OXIDE"
100
+
101
+ # Set metadata after class creation
102
+ CryoEMGridType._metadata = {
103
+ "C_FLAT": {'description': 'C-flat holey carbon grid', 'annotations': {'hole_sizes': '1.2/1.3, 2/1, 2/2 μm common', 'manufacturer': 'Protochips'}},
104
+ "QUANTIFOIL": {'description': 'Quantifoil holey carbon grid', 'annotations': {'hole_sizes': '1.2/1.3, 2/1, 2/2 μm common', 'manufacturer': 'Quantifoil'}},
105
+ "LACEY_CARBON": {'description': 'Lacey carbon support film', 'annotations': {'structure': 'irregular holes, thin carbon film'}},
106
+ "ULTRATHIN_CARBON": {'description': 'Ultrathin carbon film on holey support', 'annotations': {'thickness': '3-5 nm typical'}},
107
+ "GOLD_GRID": {'description': 'Pure gold grid', 'annotations': {'advantages': 'inert, high-resolution imaging'}},
108
+ "GRAPHENE_OXIDE": {'description': 'Graphene oxide support', 'annotations': {'advantages': 'atomically thin, good contrast'}},
109
+ }
110
+
111
+ class VitrificationMethod(RichEnum):
112
+ """
113
+ Methods for sample vitrification
114
+ """
115
+ # Enum members
116
+ PLUNGE_FREEZING = "PLUNGE_FREEZING"
117
+ HIGH_PRESSURE_FREEZING = "HIGH_PRESSURE_FREEZING"
118
+ SLAM_FREEZING = "SLAM_FREEZING"
119
+ SPRAY_FREEZING = "SPRAY_FREEZING"
120
+
121
+ # Set metadata after class creation
122
+ VitrificationMethod._metadata = {
123
+ "PLUNGE_FREEZING": {'description': 'Plunge freezing in liquid ethane', 'annotations': {'temperature': '-180°C ethane', 'equipment': 'Vitrobot, Leica GP'}},
124
+ "HIGH_PRESSURE_FREEZING": {'description': 'High pressure freezing', 'annotations': {'pressure': '2100 bar typical', 'advantages': 'thick samples, no ice crystals'}},
125
+ "SLAM_FREEZING": {'description': 'Slam freezing against metal block', 'annotations': {'cooling_rate': '10,000 K/s'}},
126
+ "SPRAY_FREEZING": {'description': 'Spray freezing into liquid nitrogen', 'annotations': {'applications': 'large samples, tissues'}},
127
+ }
128
+
129
+ class CrystallizationMethod(RichEnum):
130
+ """
131
+ Methods for protein crystallization
132
+ """
133
+ # Enum members
134
+ VAPOR_DIFFUSION_HANGING = "VAPOR_DIFFUSION_HANGING"
135
+ VAPOR_DIFFUSION_SITTING = "VAPOR_DIFFUSION_SITTING"
136
+ MICROBATCH = "MICROBATCH"
137
+ DIALYSIS = "DIALYSIS"
138
+ FREE_INTERFACE_DIFFUSION = "FREE_INTERFACE_DIFFUSION"
139
+ LCP = "LCP"
140
+
141
+ # Set metadata after class creation
142
+ CrystallizationMethod._metadata = {
143
+ "VAPOR_DIFFUSION_HANGING": {'description': 'Vapor diffusion hanging drop method', 'annotations': {'volume': '2-10 μL drops typical', 'advantages': 'visual monitoring, easy optimization'}},
144
+ "VAPOR_DIFFUSION_SITTING": {'description': 'Vapor diffusion sitting drop method', 'annotations': {'advantages': 'automated setup, stable drops'}},
145
+ "MICROBATCH": {'description': 'Microbatch under oil method', 'annotations': {'oil_type': 'paraffin, silicone oil', 'advantages': 'prevents evaporation'}},
146
+ "DIALYSIS": {'description': 'Dialysis crystallization', 'annotations': {'applications': 'large volume samples, gentle conditions'}},
147
+ "FREE_INTERFACE_DIFFUSION": {'description': 'Free interface diffusion', 'annotations': {'setup': 'capillary tubes, gel interface'}},
148
+ "LCP": {'description': 'Lipidic cubic phase crystallization', 'annotations': {'applications': 'membrane proteins', 'lipid': 'monoolein most common'}},
149
+ }
150
+
151
+ class XRaySource(RichEnum):
152
+ """
153
+ Types of X-ray sources
154
+ """
155
+ # Enum members
156
+ SYNCHROTRON = "SYNCHROTRON"
157
+ ROTATING_ANODE = "ROTATING_ANODE"
158
+ MICROFOCUS = "MICROFOCUS"
159
+ METAL_JET = "METAL_JET"
160
+
161
+ # Set metadata after class creation
162
+ XRaySource._metadata = {
163
+ "SYNCHROTRON": {'description': 'Synchrotron radiation source', 'annotations': {'advantages': 'high intensity, tunable wavelength', 'brightness': '10^15-10^18 photons/s/mm²/mrad²'}},
164
+ "ROTATING_ANODE": {'description': 'Rotating anode generator', 'annotations': {'power': '3-18 kW typical', 'target': 'copper, molybdenum common'}},
165
+ "MICROFOCUS": {'description': 'Microfocus sealed tube', 'annotations': {'spot_size': '10-50 μm', 'applications': 'small crystals, in-house screening'}},
166
+ "METAL_JET": {'description': 'Liquid metal jet source', 'annotations': {'advantages': 'higher power density, longer lifetime', 'metals': 'gallium, indium'}},
167
+ }
168
+
169
+ class Detector(RichEnum):
170
+ """
171
+ Types of detectors for structural biology
172
+ """
173
+ # Enum members
174
+ DIRECT_ELECTRON = "DIRECT_ELECTRON"
175
+ CCD = "CCD"
176
+ CMOS = "CMOS"
177
+ HYBRID_PIXEL = "HYBRID_PIXEL"
178
+ PHOTOSTIMULABLE_PHOSPHOR = "PHOTOSTIMULABLE_PHOSPHOR"
179
+
180
+ # Set metadata after class creation
181
+ Detector._metadata = {
182
+ "DIRECT_ELECTRON": {'description': 'Direct electron detector (DED)', 'annotations': {'examples': 'K2, K3, Falcon, DE-series', 'advantages': 'high DQE, fast readout'}},
183
+ "CCD": {'description': 'Charge-coupled device camera', 'annotations': {'applications': 'legacy EM, some crystallography'}},
184
+ "CMOS": {'description': 'Complementary metal-oxide semiconductor detector', 'annotations': {'advantages': 'fast readout, low noise'}},
185
+ "HYBRID_PIXEL": {'description': 'Hybrid pixel detector', 'annotations': {'examples': 'Pilatus, Eiger', 'advantages': 'photon counting, zero noise'}},
186
+ "PHOTOSTIMULABLE_PHOSPHOR": {'description': 'Photostimulable phosphor (image plate)', 'annotations': {'applications': 'legacy crystallography'}},
187
+ }
188
+
189
+ class WorkflowType(RichEnum):
190
+ """
191
+ Types of computational processing workflows
192
+ """
193
+ # Enum members
194
+ MOTION_CORRECTION = "MOTION_CORRECTION"
195
+ CTF_ESTIMATION = "CTF_ESTIMATION"
196
+ PARTICLE_PICKING = "PARTICLE_PICKING"
197
+ CLASSIFICATION_2D = "CLASSIFICATION_2D"
198
+ CLASSIFICATION_3D = "CLASSIFICATION_3D"
199
+ REFINEMENT_3D = "REFINEMENT_3D"
200
+ MODEL_BUILDING = "MODEL_BUILDING"
201
+ MODEL_REFINEMENT = "MODEL_REFINEMENT"
202
+ PHASING = "PHASING"
203
+ DATA_INTEGRATION = "DATA_INTEGRATION"
204
+ DATA_SCALING = "DATA_SCALING"
205
+ SAXS_ANALYSIS = "SAXS_ANALYSIS"
206
+
207
+ # Set metadata after class creation
208
+ WorkflowType._metadata = {
209
+ "MOTION_CORRECTION": {'description': 'Motion correction for cryo-EM movies', 'annotations': {'software': 'MotionCorr, Unblur, RELION'}},
210
+ "CTF_ESTIMATION": {'description': 'Contrast transfer function estimation', 'annotations': {'software': 'CTFFIND, Gctf, RELION'}},
211
+ "PARTICLE_PICKING": {'description': 'Particle picking from micrographs', 'annotations': {'methods': 'template matching, deep learning', 'software': 'RELION, cryoSPARC, Topaz'}},
212
+ "CLASSIFICATION_2D": {'description': '2D classification of particles', 'annotations': {'purpose': 'sorting, cleaning particle dataset'}},
213
+ "CLASSIFICATION_3D": {'description': '3D classification of particles', 'annotations': {'purpose': 'conformational sorting, resolution improvement'}},
214
+ "REFINEMENT_3D": {'description': '3D refinement of particle orientations', 'annotations': {'algorithms': 'expectation maximization, gradient descent'}},
215
+ "MODEL_BUILDING": {'description': 'Atomic model building into density', 'annotations': {'software': 'Coot, ChimeraX, Isolde'}},
216
+ "MODEL_REFINEMENT": {'description': 'Atomic model refinement', 'annotations': {'software': 'PHENIX, REFMAC, Buster'}},
217
+ "PHASING": {'description': 'Phase determination for crystallography', 'annotations': {'methods': 'SAD, MAD, MR, MIR'}},
218
+ "DATA_INTEGRATION": {'description': 'Integration of diffraction data', 'annotations': {'software': 'XDS, DIALS, HKL'}},
219
+ "DATA_SCALING": {'description': 'Scaling and merging of diffraction data', 'annotations': {'software': 'SCALA, AIMLESS, XSCALE'}},
220
+ "SAXS_ANALYSIS": {'description': 'SAXS data analysis and modeling', 'annotations': {'software': 'PRIMUS, CRYSOL, FoXS'}},
221
+ }
222
+
223
+ class FileFormat(RichEnum):
224
+ """
225
+ File formats used in structural biology
226
+ """
227
+ # Enum members
228
+ MRC = "MRC"
229
+ TIFF = "TIFF"
230
+ HDF5 = "HDF5"
231
+ STAR = "STAR"
232
+ PDB = "PDB"
233
+ MMCIF = "MMCIF"
234
+ MTZ = "MTZ"
235
+ CBF = "CBF"
236
+ DM3 = "DM3"
237
+ SER = "SER"
238
+
239
+ # Set metadata after class creation
240
+ FileFormat._metadata = {
241
+ "MRC": {'description': 'MRC format for EM density maps', 'annotations': {'extension': '.mrc, .map', 'applications': 'EM volumes, tomograms'}},
242
+ "TIFF": {'description': 'Tagged Image File Format', 'annotations': {'extension': '.tif, .tiff', 'applications': 'micrographs, general imaging'}},
243
+ "HDF5": {'description': 'Hierarchical Data Format 5', 'annotations': {'extension': '.h5, .hdf5', 'applications': 'large datasets, metadata storage'}},
244
+ "STAR": {'description': 'Self-defining Text Archival and Retrieval format', 'annotations': {'extension': '.star', 'applications': 'RELION metadata, particle parameters'}},
245
+ "PDB": {'description': 'Protein Data Bank coordinate format', 'annotations': {'extension': '.pdb', 'applications': 'atomic coordinates, legacy format'}},
246
+ "MMCIF": {'description': 'Macromolecular Crystallographic Information File', 'annotations': {'extension': '.cif', 'applications': 'atomic coordinates, modern PDB format'}},
247
+ "MTZ": {'description': 'MTZ reflection data format', 'annotations': {'extension': '.mtz', 'applications': 'crystallographic reflections, phases'}},
248
+ "CBF": {'description': 'Crystallographic Binary Format', 'annotations': {'extension': '.cbf', 'applications': 'detector images, diffraction data'}},
249
+ "DM3": {'description': 'Digital Micrograph format', 'annotations': {'extension': '.dm3, .dm4', 'applications': 'FEI/Thermo Fisher EM data'}},
250
+ "SER": {'description': 'FEI series format', 'annotations': {'extension': '.ser', 'applications': 'FEI movie stacks'}},
251
+ }
252
+
253
+ class DataType(RichEnum):
254
+ """
255
+ Types of structural biology data
256
+ """
257
+ # Enum members
258
+ MICROGRAPH = "MICROGRAPH"
259
+ MOVIE = "MOVIE"
260
+ DIFFRACTION = "DIFFRACTION"
261
+ SCATTERING = "SCATTERING"
262
+ PARTICLES = "PARTICLES"
263
+ VOLUME = "VOLUME"
264
+ TOMOGRAM = "TOMOGRAM"
265
+ MODEL = "MODEL"
266
+ METADATA = "METADATA"
267
+
268
+ # Set metadata after class creation
269
+ DataType._metadata = {
270
+ "MICROGRAPH": {'description': 'Electron micrograph image', 'annotations': {'typical_size': '4k x 4k pixels'}},
271
+ "MOVIE": {'description': 'Movie stack of frames', 'annotations': {'applications': 'motion correction, dose fractionation'}},
272
+ "DIFFRACTION": {'description': 'X-ray diffraction pattern', 'annotations': {'information': 'structure factors, crystal lattice'}},
273
+ "SCATTERING": {'description': 'Small-angle scattering data', 'annotations': {'information': 'I(q) vs scattering vector'}},
274
+ "PARTICLES": {'description': 'Particle stack for single particle analysis', 'annotations': {'format': 'boxed particles, aligned'}},
275
+ "VOLUME": {'description': '3D electron density volume', 'annotations': {'applications': 'cryo-EM maps, crystallographic maps'}},
276
+ "TOMOGRAM": {'description': '3D tomographic reconstruction', 'annotations': {'resolution': '5-50 Å typical'}},
277
+ "MODEL": {'description': 'Atomic coordinate model', 'annotations': {'formats': 'PDB, mmCIF'}},
278
+ "METADATA": {'description': 'Associated metadata file', 'annotations': {'formats': 'STAR, XML, JSON'}},
279
+ }
280
+
281
+ class ProcessingStatus(RichEnum):
282
+ """
283
+ Status of data processing workflows
284
+ """
285
+ # Enum members
286
+ RAW = "RAW"
287
+ PREPROCESSING = "PREPROCESSING"
288
+ PROCESSING = "PROCESSING"
289
+ COMPLETED = "COMPLETED"
290
+ FAILED = "FAILED"
291
+ QUEUED = "QUEUED"
292
+ PAUSED = "PAUSED"
293
+ CANCELLED = "CANCELLED"
294
+
295
+ # Set metadata after class creation
296
+ ProcessingStatus._metadata = {
297
+ "RAW": {'description': 'Raw unprocessed data'},
298
+ "PREPROCESSING": {'description': 'Initial preprocessing in progress'},
299
+ "PROCESSING": {'description': 'Main processing workflow running'},
300
+ "COMPLETED": {'description': 'Processing completed successfully'},
301
+ "FAILED": {'description': 'Processing failed with errors'},
302
+ "QUEUED": {'description': 'Queued for processing'},
303
+ "PAUSED": {'description': 'Processing paused by user'},
304
+ "CANCELLED": {'description': 'Processing cancelled by user'},
305
+ }
306
+
307
+ __all__ = [
308
+ "SampleType",
309
+ "StructuralBiologyTechnique",
310
+ "CryoEMPreparationType",
311
+ "CryoEMGridType",
312
+ "VitrificationMethod",
313
+ "CrystallizationMethod",
314
+ "XRaySource",
315
+ "Detector",
316
+ "WorkflowType",
317
+ "FileFormat",
318
+ "DataType",
319
+ "ProcessingStatus",
320
+ ]