valuesets 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valuesets might be problematic. Click here for more details.
- valuesets/__init__.py +7 -0
- valuesets/_version.py +8 -0
- valuesets/datamodel/valuesets.py +13796 -0
- valuesets/datamodel/valuesets_dataclass.py +24503 -0
- valuesets/datamodel/valuesets_pydantic.py +13796 -0
- valuesets/enums/__init__.py +590 -0
- valuesets/enums/academic/__init__.py +1 -0
- valuesets/enums/academic/research.py +559 -0
- valuesets/enums/analytical_chemistry/__init__.py +1 -0
- valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
- valuesets/enums/bio/__init__.py +1 -0
- valuesets/enums/bio/biological_colors.py +238 -0
- valuesets/enums/bio/cell_cycle.py +180 -0
- valuesets/enums/bio/currency_chemicals.py +52 -0
- valuesets/enums/bio/developmental_stages.py +103 -0
- valuesets/enums/bio/genome_features.py +182 -0
- valuesets/enums/bio/genomics.py +91 -0
- valuesets/enums/bio/go_aspect.py +32 -0
- valuesets/enums/bio/go_causality.py +58 -0
- valuesets/enums/bio/go_evidence.py +129 -0
- valuesets/enums/bio/human_developmental_stages.py +62 -0
- valuesets/enums/bio/insdc_geographic_locations.py +591 -0
- valuesets/enums/bio/insdc_missing_values.py +49 -0
- valuesets/enums/bio/lipid_categories.py +67 -0
- valuesets/enums/bio/mouse_developmental_stages.py +62 -0
- valuesets/enums/bio/plant_biology.py +86 -0
- valuesets/enums/bio/plant_developmental_stages.py +54 -0
- valuesets/enums/bio/plant_sex.py +81 -0
- valuesets/enums/bio/protein_evidence.py +61 -0
- valuesets/enums/bio/proteomics_standards.py +123 -0
- valuesets/enums/bio/psi_mi.py +306 -0
- valuesets/enums/bio/relationship_to_oxygen.py +37 -0
- valuesets/enums/bio/sequence_alphabets.py +449 -0
- valuesets/enums/bio/sequence_chemistry.py +357 -0
- valuesets/enums/bio/sequencing_platforms.py +302 -0
- valuesets/enums/bio/structural_biology.py +320 -0
- valuesets/enums/bio/taxonomy.py +238 -0
- valuesets/enums/bio/trophic_levels.py +85 -0
- valuesets/enums/bio/uniprot_species.py +344 -0
- valuesets/enums/bio/viral_genome_types.py +47 -0
- valuesets/enums/bioprocessing/__init__.py +1 -0
- valuesets/enums/bioprocessing/scale_up.py +249 -0
- valuesets/enums/business/__init__.py +1 -0
- valuesets/enums/business/human_resources.py +275 -0
- valuesets/enums/business/industry_classifications.py +181 -0
- valuesets/enums/business/management_operations.py +228 -0
- valuesets/enums/business/organizational_structures.py +236 -0
- valuesets/enums/business/quality_management.py +181 -0
- valuesets/enums/business/supply_chain.py +232 -0
- valuesets/enums/chemistry/__init__.py +1 -0
- valuesets/enums/chemistry/chemical_entities.py +315 -0
- valuesets/enums/chemistry/reaction_directionality.py +65 -0
- valuesets/enums/chemistry/reactions.py +256 -0
- valuesets/enums/clinical/__init__.py +1 -0
- valuesets/enums/clinical/nih_demographics.py +177 -0
- valuesets/enums/clinical/phenopackets.py +254 -0
- valuesets/enums/common_value_sets.py +8791 -0
- valuesets/enums/computing/__init__.py +1 -0
- valuesets/enums/computing/file_formats.py +294 -0
- valuesets/enums/computing/maturity_levels.py +196 -0
- valuesets/enums/computing/mime_types.py +227 -0
- valuesets/enums/confidence_levels.py +168 -0
- valuesets/enums/contributor.py +30 -0
- valuesets/enums/core.py +42 -0
- valuesets/enums/data/__init__.py +1 -0
- valuesets/enums/data/data_absent_reason.py +53 -0
- valuesets/enums/data_science/__init__.py +1 -0
- valuesets/enums/data_science/binary_classification.py +87 -0
- valuesets/enums/data_science/emotion_classification.py +66 -0
- valuesets/enums/data_science/priority_severity.py +73 -0
- valuesets/enums/data_science/quality_control.py +46 -0
- valuesets/enums/data_science/sentiment_analysis.py +50 -0
- valuesets/enums/data_science/text_classification.py +97 -0
- valuesets/enums/demographics.py +206 -0
- valuesets/enums/ecological_interactions.py +151 -0
- valuesets/enums/energy/__init__.py +1 -0
- valuesets/enums/energy/energy.py +343 -0
- valuesets/enums/energy/fossil_fuels.py +29 -0
- valuesets/enums/energy/nuclear/__init__.py +1 -0
- valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
- valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
- valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
- valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
- valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
- valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
- valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
- valuesets/enums/energy/nuclear/reactor_types.py +163 -0
- valuesets/enums/environmental_health/__init__.py +1 -0
- valuesets/enums/environmental_health/exposures.py +265 -0
- valuesets/enums/geography/__init__.py +1 -0
- valuesets/enums/geography/geographic_codes.py +741 -0
- valuesets/enums/health/__init__.py +12 -0
- valuesets/enums/health/vaccination.py +98 -0
- valuesets/enums/health.py +36 -0
- valuesets/enums/health_base.py +36 -0
- valuesets/enums/healthcare.py +45 -0
- valuesets/enums/industry/__init__.py +1 -0
- valuesets/enums/industry/extractive_industry.py +94 -0
- valuesets/enums/industry/mining.py +388 -0
- valuesets/enums/industry/safety_colors.py +201 -0
- valuesets/enums/investigation.py +27 -0
- valuesets/enums/materials_science/__init__.py +1 -0
- valuesets/enums/materials_science/characterization_methods.py +112 -0
- valuesets/enums/materials_science/crystal_structures.py +76 -0
- valuesets/enums/materials_science/material_properties.py +119 -0
- valuesets/enums/materials_science/material_types.py +104 -0
- valuesets/enums/materials_science/pigments_dyes.py +198 -0
- valuesets/enums/materials_science/synthesis_methods.py +109 -0
- valuesets/enums/medical/__init__.py +1 -0
- valuesets/enums/medical/clinical.py +277 -0
- valuesets/enums/medical/neuroimaging.py +119 -0
- valuesets/enums/mining_processing.py +302 -0
- valuesets/enums/physics/__init__.py +1 -0
- valuesets/enums/physics/states_of_matter.py +46 -0
- valuesets/enums/social/__init__.py +1 -0
- valuesets/enums/social/person_status.py +29 -0
- valuesets/enums/spatial/__init__.py +1 -0
- valuesets/enums/spatial/spatial_qualifiers.py +246 -0
- valuesets/enums/statistics/__init__.py +5 -0
- valuesets/enums/statistics/prediction_outcomes.py +31 -0
- valuesets/enums/statistics.py +31 -0
- valuesets/enums/time/__init__.py +1 -0
- valuesets/enums/time/temporal.py +254 -0
- valuesets/enums/units/__init__.py +1 -0
- valuesets/enums/units/measurements.py +310 -0
- valuesets/enums/visual/__init__.py +1 -0
- valuesets/enums/visual/colors.py +376 -0
- valuesets/generators/__init__.py +19 -0
- valuesets/generators/auto_slot_injector.py +280 -0
- valuesets/generators/enhanced_pydantic_generator.py +100 -0
- valuesets/generators/enum_slot_generator.py +201 -0
- valuesets/generators/modular_rich_generator.py +353 -0
- valuesets/generators/prefix_standardizer.py +198 -0
- valuesets/generators/rich_enum.py +127 -0
- valuesets/generators/rich_pydantic_generator.py +310 -0
- valuesets/generators/smart_slot_syncer.py +428 -0
- valuesets/generators/sssom_generator.py +394 -0
- valuesets/merged/merged_hierarchy.yaml +21649 -0
- valuesets/schema/README.md +3 -0
- valuesets/schema/academic/research.yaml +911 -0
- valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
- valuesets/schema/bio/bio_entities.yaml +364 -0
- valuesets/schema/bio/biological_colors.yaml +434 -0
- valuesets/schema/bio/cell_cycle.yaml +309 -0
- valuesets/schema/bio/currency_chemicals.yaml +70 -0
- valuesets/schema/bio/developmental_stages.yaml +226 -0
- valuesets/schema/bio/genome_features.yaml +342 -0
- valuesets/schema/bio/genomics.yaml +101 -0
- valuesets/schema/bio/go_aspect.yaml +39 -0
- valuesets/schema/bio/go_causality.yaml +119 -0
- valuesets/schema/bio/go_evidence.yaml +215 -0
- valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
- valuesets/schema/bio/insdc_missing_values.yaml +85 -0
- valuesets/schema/bio/lipid_categories.yaml +72 -0
- valuesets/schema/bio/plant_biology.yaml +125 -0
- valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
- valuesets/schema/bio/plant_sex.yaml +108 -0
- valuesets/schema/bio/protein_evidence.yaml +63 -0
- valuesets/schema/bio/proteomics_standards.yaml +116 -0
- valuesets/schema/bio/psi_mi.yaml +400 -0
- valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
- valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
- valuesets/schema/bio/sequence_chemistry.yaml +477 -0
- valuesets/schema/bio/sequencing_platforms.yaml +515 -0
- valuesets/schema/bio/structural_biology.yaml +428 -0
- valuesets/schema/bio/taxonomy.yaml +453 -0
- valuesets/schema/bio/trophic_levels.yaml +118 -0
- valuesets/schema/bio/uniprot_species.yaml +1209 -0
- valuesets/schema/bio/viral_genome_types.yaml +99 -0
- valuesets/schema/bioprocessing/scale_up.yaml +458 -0
- valuesets/schema/business/human_resources.yaml +752 -0
- valuesets/schema/business/industry_classifications.yaml +448 -0
- valuesets/schema/business/management_operations.yaml +602 -0
- valuesets/schema/business/organizational_structures.yaml +645 -0
- valuesets/schema/business/quality_management.yaml +502 -0
- valuesets/schema/business/supply_chain.yaml +688 -0
- valuesets/schema/chemistry/chemical_entities.yaml +639 -0
- valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
- valuesets/schema/chemistry/reactions.yaml +442 -0
- valuesets/schema/clinical/nih_demographics.yaml +285 -0
- valuesets/schema/clinical/phenopackets.yaml +429 -0
- valuesets/schema/computing/file_formats.yaml +631 -0
- valuesets/schema/computing/maturity_levels.yaml +229 -0
- valuesets/schema/computing/mime_types.yaml +266 -0
- valuesets/schema/confidence_levels.yaml +206 -0
- valuesets/schema/contributor.yaml +30 -0
- valuesets/schema/core.yaml +55 -0
- valuesets/schema/data/data_absent_reason.yaml +82 -0
- valuesets/schema/data_science/binary_classification.yaml +125 -0
- valuesets/schema/data_science/emotion_classification.yaml +109 -0
- valuesets/schema/data_science/priority_severity.yaml +122 -0
- valuesets/schema/data_science/quality_control.yaml +68 -0
- valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
- valuesets/schema/data_science/text_classification.yaml +135 -0
- valuesets/schema/demographics.yaml +238 -0
- valuesets/schema/ecological_interactions.yaml +298 -0
- valuesets/schema/energy/energy.yaml +595 -0
- valuesets/schema/energy/fossil_fuels.yaml +28 -0
- valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
- valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
- valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
- valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
- valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
- valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
- valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
- valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
- valuesets/schema/environmental_health/exposures.yaml +355 -0
- valuesets/schema/generated_slots.yaml +1828 -0
- valuesets/schema/geography/geographic_codes.yaml +1018 -0
- valuesets/schema/health/vaccination.yaml +102 -0
- valuesets/schema/health.yaml +38 -0
- valuesets/schema/healthcare.yaml +53 -0
- valuesets/schema/industry/extractive_industry.yaml +89 -0
- valuesets/schema/industry/mining.yaml +888 -0
- valuesets/schema/industry/safety_colors.yaml +375 -0
- valuesets/schema/investigation.yaml +64 -0
- valuesets/schema/materials_science/characterization_methods.yaml +193 -0
- valuesets/schema/materials_science/crystal_structures.yaml +138 -0
- valuesets/schema/materials_science/material_properties.yaml +135 -0
- valuesets/schema/materials_science/material_types.yaml +151 -0
- valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
- valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
- valuesets/schema/medical/clinical.yaml +610 -0
- valuesets/schema/medical/neuroimaging.yaml +325 -0
- valuesets/schema/mining_processing.yaml +295 -0
- valuesets/schema/physics/states_of_matter.yaml +46 -0
- valuesets/schema/slot_mixins.yaml +143 -0
- valuesets/schema/social/person_status.yaml +28 -0
- valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
- valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
- valuesets/schema/statistics.yaml +34 -0
- valuesets/schema/time/temporal.yaml +435 -0
- valuesets/schema/types.yaml +15 -0
- valuesets/schema/units/measurements.yaml +675 -0
- valuesets/schema/valuesets.yaml +100 -0
- valuesets/schema/visual/colors.yaml +778 -0
- valuesets/utils/__init__.py +6 -0
- valuesets/utils/comparison.py +102 -0
- valuesets/utils/expand_dynamic_enums.py +414 -0
- valuesets/utils/mapping_utils.py +236 -0
- valuesets/validators/__init__.py +11 -0
- valuesets/validators/enum_evaluator.py +669 -0
- valuesets/validators/oak_config.yaml +70 -0
- valuesets/validators/validate_with_ols.py +241 -0
- valuesets-0.3.1.dist-info/METADATA +395 -0
- valuesets-0.3.1.dist-info/RECORD +248 -0
- valuesets-0.3.1.dist-info/WHEEL +4 -0
- valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
name: sequence_chemistry
|
|
2
|
+
title: Sequence Chemistry Value Sets
|
|
3
|
+
description: 'Value sets for nucleic acid and protein sequence chemistry, including
|
|
4
|
+
|
|
5
|
+
standard and extended alphabets, quality encodings, and molecular representations'
|
|
6
|
+
id: https://w3id.org/linkml/valuesets/bio/sequence_chemistry
|
|
7
|
+
imports:
|
|
8
|
+
- linkml:types
|
|
9
|
+
prefixes:
|
|
10
|
+
valuesets: https://w3id.org/valuesets/
|
|
11
|
+
IUPAC: http://purl.obolibrary.org/obo/IUPAC_
|
|
12
|
+
SO: http://purl.obolibrary.org/obo/SO_
|
|
13
|
+
CHEBI: http://purl.obolibrary.org/obo/CHEBI_
|
|
14
|
+
default_prefix: valuesets
|
|
15
|
+
slots:
|
|
16
|
+
iupac_nucleotide:
|
|
17
|
+
description: Complete IUPAC nucleotide codes including ambiguous bases for DNA/RNA sequences
|
|
18
|
+
range: IUPACNucleotideCode
|
|
19
|
+
standard_amino_acid:
|
|
20
|
+
description: The 20 standard proteinogenic amino acids with IUPAC single-letter codes
|
|
21
|
+
range: StandardAminoAcid
|
|
22
|
+
iupac_amino_acid:
|
|
23
|
+
description: Complete IUPAC amino acid codes including standard amino acids, rare amino acids, and ambiguity codes
|
|
24
|
+
range: IUPACAminoAcidCode
|
|
25
|
+
sequence_alphabet:
|
|
26
|
+
description: Types of sequence alphabets used in bioinformatics
|
|
27
|
+
range: SequenceAlphabet
|
|
28
|
+
sequence_quality_encoding:
|
|
29
|
+
description: Quality score encoding standards used in FASTQ files and sequencing data
|
|
30
|
+
range: SequenceQualityEncoding
|
|
31
|
+
genetic_code_table:
|
|
32
|
+
description: NCBI genetic code translation tables for different organisms
|
|
33
|
+
range: GeneticCodeTable
|
|
34
|
+
sequence_strand:
|
|
35
|
+
description: Strand orientation for nucleic acid sequences
|
|
36
|
+
range: SequenceStrand
|
|
37
|
+
sequence_topology:
|
|
38
|
+
description: Topological structure of nucleic acid molecules
|
|
39
|
+
range: SequenceTopology
|
|
40
|
+
sequence_modality:
|
|
41
|
+
description: Types of sequence data based on experimental method
|
|
42
|
+
range: SequenceModality
|
|
43
|
+
enums:
|
|
44
|
+
IUPACNucleotideCode:
|
|
45
|
+
title: IUPAC Nucleotide Ambiguity Codes
|
|
46
|
+
description: 'Complete IUPAC nucleotide codes including ambiguous bases for DNA/RNA sequences.
|
|
47
|
+
|
|
48
|
+
Used in FASTA and other sequence formats to represent uncertain nucleotides.'
|
|
49
|
+
permissible_values:
|
|
50
|
+
A:
|
|
51
|
+
title: A
|
|
52
|
+
description: Adenine
|
|
53
|
+
T:
|
|
54
|
+
title: T
|
|
55
|
+
description: Thymine (DNA)
|
|
56
|
+
U:
|
|
57
|
+
title: U
|
|
58
|
+
description: Uracil (RNA)
|
|
59
|
+
G:
|
|
60
|
+
title: G
|
|
61
|
+
description: Guanine
|
|
62
|
+
C:
|
|
63
|
+
title: C
|
|
64
|
+
description: Cytosine
|
|
65
|
+
R:
|
|
66
|
+
title: R
|
|
67
|
+
description: Purine (A or G)
|
|
68
|
+
Y:
|
|
69
|
+
title: Y
|
|
70
|
+
description: Pyrimidine (C or T/U)
|
|
71
|
+
S:
|
|
72
|
+
description: Strong interaction (G or C)
|
|
73
|
+
W:
|
|
74
|
+
description: Weak interaction (A or T/U)
|
|
75
|
+
K:
|
|
76
|
+
description: Keto (G or T/U)
|
|
77
|
+
M:
|
|
78
|
+
description: Amino (A or C)
|
|
79
|
+
B:
|
|
80
|
+
description: Not A (C or G or T/U)
|
|
81
|
+
D:
|
|
82
|
+
description: Not C (A or G or T/U)
|
|
83
|
+
H:
|
|
84
|
+
description: Not G (A or C or T/U)
|
|
85
|
+
V:
|
|
86
|
+
description: Not T/U (A or C or G)
|
|
87
|
+
N:
|
|
88
|
+
description: Any nucleotide (A or C or G or T/U)
|
|
89
|
+
GAP:
|
|
90
|
+
title: '-'
|
|
91
|
+
description: Gap or deletion in alignment
|
|
92
|
+
StandardAminoAcid:
|
|
93
|
+
title: Standard Amino Acids
|
|
94
|
+
description: The 20 standard proteinogenic amino acids with IUPAC single-letter codes
|
|
95
|
+
permissible_values:
|
|
96
|
+
A:
|
|
97
|
+
title: A
|
|
98
|
+
description: Alanine
|
|
99
|
+
R:
|
|
100
|
+
title: R
|
|
101
|
+
description: Arginine
|
|
102
|
+
N:
|
|
103
|
+
title: N
|
|
104
|
+
description: Asparagine
|
|
105
|
+
D:
|
|
106
|
+
title: D
|
|
107
|
+
description: Aspartic acid
|
|
108
|
+
C:
|
|
109
|
+
title: C
|
|
110
|
+
description: Cysteine
|
|
111
|
+
E:
|
|
112
|
+
title: E
|
|
113
|
+
description: Glutamic acid
|
|
114
|
+
Q:
|
|
115
|
+
title: Q
|
|
116
|
+
description: Glutamine
|
|
117
|
+
G:
|
|
118
|
+
title: G
|
|
119
|
+
description: Glycine
|
|
120
|
+
H:
|
|
121
|
+
title: H
|
|
122
|
+
description: Histidine
|
|
123
|
+
I:
|
|
124
|
+
title: I
|
|
125
|
+
description: Isoleucine
|
|
126
|
+
L:
|
|
127
|
+
title: L
|
|
128
|
+
description: Leucine
|
|
129
|
+
K:
|
|
130
|
+
title: K
|
|
131
|
+
description: Lysine
|
|
132
|
+
M:
|
|
133
|
+
title: M
|
|
134
|
+
description: Methionine
|
|
135
|
+
F:
|
|
136
|
+
title: F
|
|
137
|
+
description: Phenylalanine
|
|
138
|
+
P:
|
|
139
|
+
title: P
|
|
140
|
+
description: Proline
|
|
141
|
+
S:
|
|
142
|
+
title: S
|
|
143
|
+
description: Serine
|
|
144
|
+
T:
|
|
145
|
+
title: T
|
|
146
|
+
description: Threonine
|
|
147
|
+
W:
|
|
148
|
+
title: W
|
|
149
|
+
description: Tryptophan
|
|
150
|
+
Y:
|
|
151
|
+
title: Y
|
|
152
|
+
description: Tyrosine
|
|
153
|
+
V:
|
|
154
|
+
title: V
|
|
155
|
+
description: Valine
|
|
156
|
+
IUPACAminoAcidCode:
|
|
157
|
+
title: IUPAC Amino Acid Codes
|
|
158
|
+
description: 'Complete IUPAC amino acid codes including standard amino acids,
|
|
159
|
+
|
|
160
|
+
rare amino acids, and ambiguity codes'
|
|
161
|
+
permissible_values:
|
|
162
|
+
A:
|
|
163
|
+
title: A
|
|
164
|
+
description: Alanine
|
|
165
|
+
R:
|
|
166
|
+
title: R
|
|
167
|
+
description: Arginine
|
|
168
|
+
N:
|
|
169
|
+
title: N
|
|
170
|
+
description: Asparagine
|
|
171
|
+
D:
|
|
172
|
+
title: D
|
|
173
|
+
description: Aspartic acid
|
|
174
|
+
C:
|
|
175
|
+
title: C
|
|
176
|
+
description: Cysteine
|
|
177
|
+
E:
|
|
178
|
+
title: E
|
|
179
|
+
description: Glutamic acid
|
|
180
|
+
Q:
|
|
181
|
+
title: Q
|
|
182
|
+
description: Glutamine
|
|
183
|
+
G:
|
|
184
|
+
title: G
|
|
185
|
+
description: Glycine
|
|
186
|
+
H:
|
|
187
|
+
title: H
|
|
188
|
+
description: Histidine
|
|
189
|
+
I:
|
|
190
|
+
title: I
|
|
191
|
+
description: Isoleucine
|
|
192
|
+
L:
|
|
193
|
+
title: L
|
|
194
|
+
description: Leucine
|
|
195
|
+
K:
|
|
196
|
+
title: K
|
|
197
|
+
description: Lysine
|
|
198
|
+
M:
|
|
199
|
+
title: M
|
|
200
|
+
description: Methionine
|
|
201
|
+
F:
|
|
202
|
+
title: F
|
|
203
|
+
description: Phenylalanine
|
|
204
|
+
P:
|
|
205
|
+
title: P
|
|
206
|
+
description: Proline
|
|
207
|
+
S:
|
|
208
|
+
title: S
|
|
209
|
+
description: Serine
|
|
210
|
+
T:
|
|
211
|
+
title: T
|
|
212
|
+
description: Threonine
|
|
213
|
+
W:
|
|
214
|
+
title: W
|
|
215
|
+
description: Tryptophan
|
|
216
|
+
Y:
|
|
217
|
+
title: Y
|
|
218
|
+
description: Tyrosine
|
|
219
|
+
V:
|
|
220
|
+
title: V
|
|
221
|
+
description: Valine
|
|
222
|
+
U:
|
|
223
|
+
title: U
|
|
224
|
+
description: Selenocysteine (21st amino acid)
|
|
225
|
+
aliases:
|
|
226
|
+
- Sec
|
|
227
|
+
O:
|
|
228
|
+
title: O
|
|
229
|
+
description: Pyrrolysine (22nd amino acid)
|
|
230
|
+
aliases:
|
|
231
|
+
- Pyl
|
|
232
|
+
B:
|
|
233
|
+
description: Asparagine or Aspartic acid (N or D)
|
|
234
|
+
Z:
|
|
235
|
+
description: Glutamine or Glutamic acid (Q or E)
|
|
236
|
+
J:
|
|
237
|
+
description: Leucine or Isoleucine (L or I)
|
|
238
|
+
X:
|
|
239
|
+
description: Any amino acid
|
|
240
|
+
STOP:
|
|
241
|
+
title: '*'
|
|
242
|
+
description: Translation stop codon
|
|
243
|
+
GAP:
|
|
244
|
+
title: '-'
|
|
245
|
+
description: Gap or deletion in alignment
|
|
246
|
+
SequenceAlphabet:
|
|
247
|
+
title: Sequence Alphabet Types
|
|
248
|
+
description: Types of sequence alphabets used in bioinformatics
|
|
249
|
+
permissible_values:
|
|
250
|
+
DNA:
|
|
251
|
+
description: Deoxyribonucleic acid alphabet (A, T, G, C)
|
|
252
|
+
RNA:
|
|
253
|
+
description: Ribonucleic acid alphabet (A, U, G, C)
|
|
254
|
+
PROTEIN:
|
|
255
|
+
description: Protein/amino acid alphabet (20 standard AAs)
|
|
256
|
+
IUPAC_DNA:
|
|
257
|
+
description: Extended DNA with IUPAC ambiguity codes
|
|
258
|
+
IUPAC_RNA:
|
|
259
|
+
description: Extended RNA with IUPAC ambiguity codes
|
|
260
|
+
IUPAC_PROTEIN:
|
|
261
|
+
description: Extended protein with ambiguity codes and rare AAs
|
|
262
|
+
RESTRICTED_DNA:
|
|
263
|
+
description: Unambiguous DNA bases only (A, T, G, C)
|
|
264
|
+
RESTRICTED_RNA:
|
|
265
|
+
description: Unambiguous RNA bases only (A, U, G, C)
|
|
266
|
+
BINARY:
|
|
267
|
+
description: Binary encoding of sequences
|
|
268
|
+
SequenceQualityEncoding:
|
|
269
|
+
title: Sequence Quality Score Encodings
|
|
270
|
+
description: 'Quality score encoding standards used in FASTQ files and sequencing data.
|
|
271
|
+
|
|
272
|
+
Different platforms and software versions use different ASCII offsets.'
|
|
273
|
+
permissible_values:
|
|
274
|
+
SANGER:
|
|
275
|
+
description: Sanger/Phred+33 (PHRED scores, ASCII offset 33)
|
|
276
|
+
annotations:
|
|
277
|
+
ascii_offset: 33
|
|
278
|
+
score_range: 0-93
|
|
279
|
+
platforms: NCBI SRA, Illumina 1.8+
|
|
280
|
+
SOLEXA:
|
|
281
|
+
description: Solexa+64 (Solexa scores, ASCII offset 64)
|
|
282
|
+
annotations:
|
|
283
|
+
ascii_offset: 64
|
|
284
|
+
score_range: -5-62
|
|
285
|
+
platforms: Early Solexa/Illumina
|
|
286
|
+
ILLUMINA_1_3:
|
|
287
|
+
description: Illumina 1.3+ (PHRED+64, ASCII offset 64)
|
|
288
|
+
annotations:
|
|
289
|
+
ascii_offset: 64
|
|
290
|
+
score_range: 0-62
|
|
291
|
+
platforms: Illumina 1.3-1.7
|
|
292
|
+
ILLUMINA_1_5:
|
|
293
|
+
description: Illumina 1.5+ (PHRED+64, special handling for 0-2)
|
|
294
|
+
annotations:
|
|
295
|
+
ascii_offset: 64
|
|
296
|
+
score_range: 3-62
|
|
297
|
+
platforms: Illumina 1.5-1.7
|
|
298
|
+
ILLUMINA_1_8:
|
|
299
|
+
description: Illumina 1.8+ (PHRED+33, modern standard)
|
|
300
|
+
annotations:
|
|
301
|
+
ascii_offset: 33
|
|
302
|
+
score_range: 0-41
|
|
303
|
+
platforms: Illumina 1.8+, modern sequencers
|
|
304
|
+
GeneticCodeTable:
|
|
305
|
+
title: NCBI Genetic Code Translation Tables
|
|
306
|
+
description: 'NCBI genetic code translation tables for different organisms.
|
|
307
|
+
|
|
308
|
+
Table 1 is the universal genetic code used by most organisms.'
|
|
309
|
+
permissible_values:
|
|
310
|
+
TABLE_1:
|
|
311
|
+
description: Standard genetic code (universal)
|
|
312
|
+
annotations:
|
|
313
|
+
ncbi_id: 1
|
|
314
|
+
name: Standard
|
|
315
|
+
TABLE_2:
|
|
316
|
+
description: Vertebrate mitochondrial code
|
|
317
|
+
annotations:
|
|
318
|
+
ncbi_id: 2
|
|
319
|
+
name: Vertebrate Mitochondrial
|
|
320
|
+
TABLE_3:
|
|
321
|
+
description: Yeast mitochondrial code
|
|
322
|
+
annotations:
|
|
323
|
+
ncbi_id: 3
|
|
324
|
+
name: Yeast Mitochondrial
|
|
325
|
+
TABLE_4:
|
|
326
|
+
description: Mold, protozoan, coelenterate mitochondrial
|
|
327
|
+
annotations:
|
|
328
|
+
ncbi_id: 4
|
|
329
|
+
name: Mold Mitochondrial
|
|
330
|
+
TABLE_5:
|
|
331
|
+
description: Invertebrate mitochondrial code
|
|
332
|
+
annotations:
|
|
333
|
+
ncbi_id: 5
|
|
334
|
+
name: Invertebrate Mitochondrial
|
|
335
|
+
TABLE_6:
|
|
336
|
+
description: Ciliate, dasycladacean, hexamita nuclear code
|
|
337
|
+
annotations:
|
|
338
|
+
ncbi_id: 6
|
|
339
|
+
name: Ciliate Nuclear
|
|
340
|
+
TABLE_9:
|
|
341
|
+
description: Echinoderm and flatworm mitochondrial code
|
|
342
|
+
annotations:
|
|
343
|
+
ncbi_id: 9
|
|
344
|
+
name: Echinoderm Mitochondrial
|
|
345
|
+
TABLE_10:
|
|
346
|
+
description: Euplotid nuclear code
|
|
347
|
+
annotations:
|
|
348
|
+
ncbi_id: 10
|
|
349
|
+
name: Euplotid Nuclear
|
|
350
|
+
TABLE_11:
|
|
351
|
+
description: Bacterial, archaeal and plant plastid code
|
|
352
|
+
annotations:
|
|
353
|
+
ncbi_id: 11
|
|
354
|
+
name: Bacterial
|
|
355
|
+
TABLE_12:
|
|
356
|
+
description: Alternative yeast nuclear code
|
|
357
|
+
annotations:
|
|
358
|
+
ncbi_id: 12
|
|
359
|
+
name: Alternative Yeast Nuclear
|
|
360
|
+
TABLE_13:
|
|
361
|
+
description: Ascidian mitochondrial code
|
|
362
|
+
annotations:
|
|
363
|
+
ncbi_id: 13
|
|
364
|
+
name: Ascidian Mitochondrial
|
|
365
|
+
TABLE_14:
|
|
366
|
+
description: Alternative flatworm mitochondrial code
|
|
367
|
+
annotations:
|
|
368
|
+
ncbi_id: 14
|
|
369
|
+
name: Alternative Flatworm Mitochondrial
|
|
370
|
+
TABLE_16:
|
|
371
|
+
description: Chlorophycean mitochondrial code
|
|
372
|
+
annotations:
|
|
373
|
+
ncbi_id: 16
|
|
374
|
+
name: Chlorophycean Mitochondrial
|
|
375
|
+
TABLE_21:
|
|
376
|
+
description: Trematode mitochondrial code
|
|
377
|
+
annotations:
|
|
378
|
+
ncbi_id: 21
|
|
379
|
+
name: Trematode Mitochondrial
|
|
380
|
+
TABLE_22:
|
|
381
|
+
description: Scenedesmus obliquus mitochondrial code
|
|
382
|
+
annotations:
|
|
383
|
+
ncbi_id: 22
|
|
384
|
+
name: Scenedesmus Mitochondrial
|
|
385
|
+
TABLE_23:
|
|
386
|
+
description: Thraustochytrium mitochondrial code
|
|
387
|
+
annotations:
|
|
388
|
+
ncbi_id: 23
|
|
389
|
+
name: Thraustochytrium Mitochondrial
|
|
390
|
+
TABLE_24:
|
|
391
|
+
description: Rhabdopleuridae mitochondrial code
|
|
392
|
+
annotations:
|
|
393
|
+
ncbi_id: 24
|
|
394
|
+
name: Rhabdopleuridae Mitochondrial
|
|
395
|
+
TABLE_25:
|
|
396
|
+
description: Candidate division SR1 and gracilibacteria code
|
|
397
|
+
annotations:
|
|
398
|
+
ncbi_id: 25
|
|
399
|
+
name: Candidate Division SR1
|
|
400
|
+
TABLE_26:
|
|
401
|
+
description: Pachysolen tannophilus nuclear code
|
|
402
|
+
annotations:
|
|
403
|
+
ncbi_id: 26
|
|
404
|
+
name: Pachysolen Nuclear
|
|
405
|
+
TABLE_27:
|
|
406
|
+
description: Karyorelict nuclear code
|
|
407
|
+
annotations:
|
|
408
|
+
ncbi_id: 27
|
|
409
|
+
name: Karyorelict Nuclear
|
|
410
|
+
TABLE_28:
|
|
411
|
+
description: Condylostoma nuclear code
|
|
412
|
+
annotations:
|
|
413
|
+
ncbi_id: 28
|
|
414
|
+
name: Condylostoma Nuclear
|
|
415
|
+
TABLE_29:
|
|
416
|
+
description: Mesodinium nuclear code
|
|
417
|
+
annotations:
|
|
418
|
+
ncbi_id: 29
|
|
419
|
+
name: Mesodinium Nuclear
|
|
420
|
+
TABLE_30:
|
|
421
|
+
description: Peritrich nuclear code
|
|
422
|
+
annotations:
|
|
423
|
+
ncbi_id: 30
|
|
424
|
+
name: Peritrich Nuclear
|
|
425
|
+
TABLE_31:
|
|
426
|
+
description: Blastocrithidia nuclear code
|
|
427
|
+
annotations:
|
|
428
|
+
ncbi_id: 31
|
|
429
|
+
name: Blastocrithidia Nuclear
|
|
430
|
+
SequenceStrand:
|
|
431
|
+
title: DNA/RNA Strand Orientation
|
|
432
|
+
description: Strand orientation for nucleic acid sequences
|
|
433
|
+
permissible_values:
|
|
434
|
+
PLUS:
|
|
435
|
+
title: PLUS
|
|
436
|
+
description: Plus/forward/sense strand (5' to 3')
|
|
437
|
+
MINUS:
|
|
438
|
+
title: MINUS
|
|
439
|
+
description: Minus/reverse/antisense strand (3' to 5')
|
|
440
|
+
BOTH:
|
|
441
|
+
description: Both strands
|
|
442
|
+
UNKNOWN:
|
|
443
|
+
description: Strand not specified or unknown
|
|
444
|
+
SequenceTopology:
|
|
445
|
+
title: Sequence Topology
|
|
446
|
+
description: Topological structure of nucleic acid molecules
|
|
447
|
+
permissible_values:
|
|
448
|
+
LINEAR:
|
|
449
|
+
description: Linear sequence molecule
|
|
450
|
+
meaning: SO:0000987
|
|
451
|
+
CIRCULAR:
|
|
452
|
+
description: Circular sequence molecule
|
|
453
|
+
meaning: SO:0000988
|
|
454
|
+
BRANCHED:
|
|
455
|
+
description: Branched sequence structure
|
|
456
|
+
UNKNOWN:
|
|
457
|
+
description: Topology not specified
|
|
458
|
+
SequenceModality:
|
|
459
|
+
title: Sequence Data Modality
|
|
460
|
+
description: Types of sequence data based on experimental method
|
|
461
|
+
permissible_values:
|
|
462
|
+
SINGLE_CELL:
|
|
463
|
+
description: Single-cell sequencing data
|
|
464
|
+
BULK:
|
|
465
|
+
description: Bulk/population sequencing data
|
|
466
|
+
SPATIAL:
|
|
467
|
+
description: Spatially-resolved sequencing
|
|
468
|
+
LONG_READ:
|
|
469
|
+
description: Long-read sequencing (PacBio, Oxford Nanopore)
|
|
470
|
+
SHORT_READ:
|
|
471
|
+
description: Short-read sequencing (Illumina)
|
|
472
|
+
PAIRED_END:
|
|
473
|
+
description: Paired-end sequencing reads
|
|
474
|
+
SINGLE_END:
|
|
475
|
+
description: Single-end sequencing reads
|
|
476
|
+
MATE_PAIR:
|
|
477
|
+
description: Mate-pair sequencing libraries
|