supremo-lite 0.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025, Gladstone Institutes
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,216 @@
1
+ Metadata-Version: 2.4
2
+ Name: supremo_lite
3
+ Version: 0.5.4
4
+ Summary: A lightweight memory first, model agnostic version of SuPreMo
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Author: Natalie Gill
8
+ Requires-Python: >=3.9,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
17
+ Provides-Extra: fast
18
+ Requires-Dist: brisket (>=0.1.2) ; extra == "fast"
19
+ Requires-Dist: pandas (>=1.5.0)
20
+ Requires-Dist: pyfaidx (>=0.7.0)
21
+ Description-Content-Type: text/markdown
22
+
23
+ # supremo_lite
24
+
25
+ A lightweight memory-first, model-agnostic version of [SuPreMo](https://github.com/ketringjoni/SuPreMo).
26
+
27
+ ## Key Features
28
+
29
+ - 🧬 **Personalized Genome Generation**: Apply variants from VCF files to reference genomes
30
+ - 🎯 **Variant-Centered Sequences**: Generate sequence windows around variants
31
+ - βœ‚οΈ **PAM Site Analysis**: Identify variants that disrupt CRISPR PAM sites
32
+ - πŸ§ͺ **Saturation Mutagenesis**: Systematic single-nucleotide mutations at every position for predictive modeling
33
+ - πŸ”§ **Memory Efficient**: Chunked processing for large VCF files
34
+ - πŸ—ΊοΈ **Chromosome Matching**: Optional handling of chromosome naming differences (chr1 ↔ 1, chrM ↔ MT) via `auto_map_chromosomes=True`
35
+ - ⚑ **PyTorch Integration**: Automatic tensor support when PyTorch is available
36
+
37
+ ## Installation
38
+
39
+ ### Install from GitHub (Recommended)
40
+
41
+ For the latest features and bug fixes:
42
+
43
+ ```bash
44
+ # Install directly latest release
45
+ pip install supremo_lite
46
+
47
+ # Or install a specific version/tag
48
+ pip install git+https://github.com/gladstone-institutes/supremo_lite.git@v0.5.0
49
+
50
+ # Or install from a specific branch
51
+ pip install git+https://github.com/gladstone-institutes/supremo_lite.git@main
52
+ ```
53
+
54
+ ### Dependencies
55
+
56
+ Required dependencies will be installed automatically:
57
+ - `pandas` - For VCF data handling
58
+ - `numpy` - For numerical operations
59
+ - `pyfaidx` - For FASTA file reading
60
+
61
+ Optional dependencies:
62
+ - `torch` - For PyTorch tensor support (automatically detected)
63
+ - [https://github.com/gladstone-institutes/brisket](brisket) - Cython powered faster 1 hot encoding for DNA sequences (automatically detected)
64
+
65
+ ## Quick Start
66
+
67
+ ```python
68
+ import supremo_lite as sl
69
+ from pyfaidx import Fasta
70
+
71
+ # Load reference genome and variants
72
+ reference = Fasta('hg38.fa')
73
+ variants = sl.read_vcf('variants.vcf')
74
+ ```
75
+
76
+ ### DNA Sequence Encoding
77
+
78
+ supremo_lite uses **one-hot encoding** by default:
79
+ - `A` = `[1,0,0,0]`, `C` = `[0,1,0,0]`, `G` = `[0,0,1,0]`, `T` = `[0,0,0,1]`
80
+ - Ambiguous bases = `[0,0,0,0]`
81
+ - Returns PyTorch tensors when available, otherwise NumPy arrays
82
+
83
+ ### Personalized Genome Generation
84
+
85
+ ```python
86
+ # Apply variants to create personalized genome
87
+ personal_genome = sl.get_personal_genome(
88
+ reference_fn=reference,
89
+ variants_fn=variants,
90
+ encode=True, # One-hot encoded (or False for strings)
91
+ chunk_size=10000, # Process 10k variants at a time
92
+ verbose=True # Show progress
93
+ )
94
+
95
+ # If your VCF uses 'chr1' and reference uses '1', enable chromosome mapping
96
+ personal_genome = sl.get_personal_genome(
97
+ reference_fn=reference,
98
+ variants_fn=variants,
99
+ auto_map_chromosomes=True # Handle chromosome name differences
100
+ )
101
+ ```
102
+
103
+ **πŸ“– [Full Guide: Personalized Genomes](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/personalization.md) | [Tutorial Notebook](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/notebooks/02_personalized_genomes.ipynb)**
104
+
105
+ ### Variant-Centered Sequences
106
+
107
+ ```python
108
+ # Generate reference and alternate sequences around variants
109
+ # Note: get_alt_ref_sequences is a generator that yields chunks
110
+ results = list(sl.get_alt_ref_sequences(
111
+ reference_fn=reference,
112
+ variants_fn=variants,
113
+ seq_len=1000,
114
+ encode=True
115
+ ))
116
+ # Unpack from the first chunk
117
+ alt_seqs, ref_seqs, metadata = results[0]
118
+ # Returns: (n_variants, seq_len, 4) shaped arrays
119
+ ```
120
+
121
+ **πŸ“– [Full Guide: Variant-Centered Sequences](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/variant_centered_sequences.md) | [Getting Started Notebook](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/notebooks/01_getting_started.ipynb)**
122
+
123
+ ### Prediction Alignment
124
+
125
+ ```python
126
+ # Align model predictions accounting for variant coordinate changes
127
+ from supremo_lite.mock_models import TestModel
128
+
129
+ model = TestModel(n_targets=2, bin_size=8, crop_length=10)
130
+ ref_preds = model(ref_seqs)
131
+ alt_preds = model(alt_seqs)
132
+
133
+ ref_aligned, alt_aligned = sl.align_predictions_by_coordinate(
134
+ ref_pred=ref_preds[0],
135
+ alt_pred=alt_preds[0],
136
+ metadata=metadata[0],
137
+ prediction_type="1D",
138
+ bin_size=8,
139
+ crop_length=10
140
+ )
141
+ ```
142
+
143
+ **πŸ“– [Full Guide: Prediction Alignment](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/prediction_alignment.md) | [Tutorial with Visualizations](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/notebooks/03_prediction_alignment.ipynb)**
144
+
145
+ ### Saturation Mutagenesis
146
+
147
+ ```python
148
+ # Mutate every position in a region
149
+ ref_seq, alt_seqs, metadata = sl.get_sm_sequences(
150
+ chrom='chr1',
151
+ start=1000,
152
+ end=1100, # 100 bp β†’ 300 mutations (3 per position)
153
+ reference_fasta=reference
154
+ )
155
+ ```
156
+
157
+ **πŸ“– [Full Guide: Mutagenesis](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/mutagenesis.md)**
158
+
159
+ ## Documentation
160
+
161
+ ### πŸ“š User Guides
162
+ Detailed documentation for each major feature:
163
+ - **[Personalized Genomes](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/personalization.md)** - Apply variants to genomes
164
+ - **[Variant-Centered Sequences](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/variant_centered_sequences.md)** - Extract sequence windows around variants
165
+ - **[Prediction Alignment](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/prediction_alignment.md)** - Align model predictions for variant effect analysis
166
+ - **[Saturation Mutagenesis](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/mutagenesis.md)** - In-silico mutagenesis workflows
167
+ - **[Variant Classification](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/_static/images/variant_classification.png)** - Flow chart showing automatic variant classification logic
168
+
169
+ ### πŸ““ Interactive Tutorials
170
+ Hands-on Jupyter notebooks with visualizations:
171
+ - **[Getting Started](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/notebooks/01_getting_started.ipynb)** - Installation and basic concepts
172
+ - **[Personalized Genomes](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/notebooks/02_personalized_genomes.ipynb)** - Genome personalization workflows
173
+ - **[Prediction Alignment](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/notebooks/03_prediction_alignment.ipynb)** - Complete prediction workflow with visualizations ⭐
174
+
175
+ ### πŸ” API Reference
176
+ **Core Functions:**
177
+ - `get_personal_genome()` - Generate personalized genomes
178
+ - `get_alt_ref_sequences()` - Generate variant-centered sequences
179
+ - `align_predictions_by_coordinate()` - Align model predictions
180
+ - `get_sm_sequences()` - Saturation mutagenesis
181
+ - `read_vcf()` - Read VCF files
182
+
183
+ For complete API documentation with all parameters, see the [docs/](https://github.com/gladstone-institutes/supremo_lite/tree/main/docs) directory.
184
+
185
+ ## Issues and Support
186
+
187
+ We welcome feedback, bug reports, and feature requests! If you encounter any issues or have suggestions for improvements, please:
188
+
189
+ 1. **Check existing issues** first to see if your problem has already been reported
190
+ 2. **File a new issue** on our [GitHub Issues page](https://github.com/gladstone-institutes/supremo_lite/issues)
191
+ 3. **Provide detailed information** including:
192
+ - Python version and operating system
193
+ - Package version (`supremo_lite.__version__`)
194
+ - Complete error messages and stack traces
195
+ - Minimal reproducible example
196
+ - Expected vs. actual behavior
197
+
198
+ ### Common Issues to Report
199
+
200
+ - **Performance problems** with large genomes or variant files
201
+ - **Unexpected behavior** with edge cases
202
+ - **Documentation gaps** or unclear examples
203
+ - **Feature requests** for new functionality
204
+
205
+
206
+ ## Contributing
207
+
208
+ Interested in contributing? Check out the contributing guidelines. Please note that this project is released with a Code of Conduct. By contributing to this project, you agree to abide by its terms.
209
+
210
+ ## License
211
+
212
+ `supremo_lite` was created by Natalie Gill and Sean Whalen, based on Sequence Mutator for Predictive Models ([SuPreMo](https://github.com/ketringjoni/SuPreMo)) by Katie Gjoni. It is licensed under the terms of the MIT license.
213
+
214
+ ## Credits
215
+
216
+ `supremo_lite` was created with [`cookiecutter`](https://cookiecutter.readthedocs.io/en/latest/) and the `py-pkgs-cookiecutter` [template](https://github.com/py-pkgs/py-pkgs-cookiecutter).
@@ -0,0 +1,194 @@
1
+ # supremo_lite
2
+
3
+ A lightweight memory-first, model-agnostic version of [SuPreMo](https://github.com/ketringjoni/SuPreMo).
4
+
5
+ ## Key Features
6
+
7
+ - 🧬 **Personalized Genome Generation**: Apply variants from VCF files to reference genomes
8
+ - 🎯 **Variant-Centered Sequences**: Generate sequence windows around variants
9
+ - βœ‚οΈ **PAM Site Analysis**: Identify variants that disrupt CRISPR PAM sites
10
+ - πŸ§ͺ **Saturation Mutagenesis**: Systematic single-nucleotide mutations at every position for predictive modeling
11
+ - πŸ”§ **Memory Efficient**: Chunked processing for large VCF files
12
+ - πŸ—ΊοΈ **Chromosome Matching**: Optional handling of chromosome naming differences (chr1 ↔ 1, chrM ↔ MT) via `auto_map_chromosomes=True`
13
+ - ⚑ **PyTorch Integration**: Automatic tensor support when PyTorch is available
14
+
15
+ ## Installation
16
+
17
+ ### Install from GitHub (Recommended)
18
+
19
+ For the latest features and bug fixes:
20
+
21
+ ```bash
22
+ # Install directly latest release
23
+ pip install supremo_lite
24
+
25
+ # Or install a specific version/tag
26
+ pip install git+https://github.com/gladstone-institutes/supremo_lite.git@v0.5.0
27
+
28
+ # Or install from a specific branch
29
+ pip install git+https://github.com/gladstone-institutes/supremo_lite.git@main
30
+ ```
31
+
32
+ ### Dependencies
33
+
34
+ Required dependencies will be installed automatically:
35
+ - `pandas` - For VCF data handling
36
+ - `numpy` - For numerical operations
37
+ - `pyfaidx` - For FASTA file reading
38
+
39
+ Optional dependencies:
40
+ - `torch` - For PyTorch tensor support (automatically detected)
41
+ - [https://github.com/gladstone-institutes/brisket](brisket) - Cython powered faster 1 hot encoding for DNA sequences (automatically detected)
42
+
43
+ ## Quick Start
44
+
45
+ ```python
46
+ import supremo_lite as sl
47
+ from pyfaidx import Fasta
48
+
49
+ # Load reference genome and variants
50
+ reference = Fasta('hg38.fa')
51
+ variants = sl.read_vcf('variants.vcf')
52
+ ```
53
+
54
+ ### DNA Sequence Encoding
55
+
56
+ supremo_lite uses **one-hot encoding** by default:
57
+ - `A` = `[1,0,0,0]`, `C` = `[0,1,0,0]`, `G` = `[0,0,1,0]`, `T` = `[0,0,0,1]`
58
+ - Ambiguous bases = `[0,0,0,0]`
59
+ - Returns PyTorch tensors when available, otherwise NumPy arrays
60
+
61
+ ### Personalized Genome Generation
62
+
63
+ ```python
64
+ # Apply variants to create personalized genome
65
+ personal_genome = sl.get_personal_genome(
66
+ reference_fn=reference,
67
+ variants_fn=variants,
68
+ encode=True, # One-hot encoded (or False for strings)
69
+ chunk_size=10000, # Process 10k variants at a time
70
+ verbose=True # Show progress
71
+ )
72
+
73
+ # If your VCF uses 'chr1' and reference uses '1', enable chromosome mapping
74
+ personal_genome = sl.get_personal_genome(
75
+ reference_fn=reference,
76
+ variants_fn=variants,
77
+ auto_map_chromosomes=True # Handle chromosome name differences
78
+ )
79
+ ```
80
+
81
+ **πŸ“– [Full Guide: Personalized Genomes](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/personalization.md) | [Tutorial Notebook](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/notebooks/02_personalized_genomes.ipynb)**
82
+
83
+ ### Variant-Centered Sequences
84
+
85
+ ```python
86
+ # Generate reference and alternate sequences around variants
87
+ # Note: get_alt_ref_sequences is a generator that yields chunks
88
+ results = list(sl.get_alt_ref_sequences(
89
+ reference_fn=reference,
90
+ variants_fn=variants,
91
+ seq_len=1000,
92
+ encode=True
93
+ ))
94
+ # Unpack from the first chunk
95
+ alt_seqs, ref_seqs, metadata = results[0]
96
+ # Returns: (n_variants, seq_len, 4) shaped arrays
97
+ ```
98
+
99
+ **πŸ“– [Full Guide: Variant-Centered Sequences](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/variant_centered_sequences.md) | [Getting Started Notebook](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/notebooks/01_getting_started.ipynb)**
100
+
101
+ ### Prediction Alignment
102
+
103
+ ```python
104
+ # Align model predictions accounting for variant coordinate changes
105
+ from supremo_lite.mock_models import TestModel
106
+
107
+ model = TestModel(n_targets=2, bin_size=8, crop_length=10)
108
+ ref_preds = model(ref_seqs)
109
+ alt_preds = model(alt_seqs)
110
+
111
+ ref_aligned, alt_aligned = sl.align_predictions_by_coordinate(
112
+ ref_pred=ref_preds[0],
113
+ alt_pred=alt_preds[0],
114
+ metadata=metadata[0],
115
+ prediction_type="1D",
116
+ bin_size=8,
117
+ crop_length=10
118
+ )
119
+ ```
120
+
121
+ **πŸ“– [Full Guide: Prediction Alignment](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/prediction_alignment.md) | [Tutorial with Visualizations](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/notebooks/03_prediction_alignment.ipynb)**
122
+
123
+ ### Saturation Mutagenesis
124
+
125
+ ```python
126
+ # Mutate every position in a region
127
+ ref_seq, alt_seqs, metadata = sl.get_sm_sequences(
128
+ chrom='chr1',
129
+ start=1000,
130
+ end=1100, # 100 bp β†’ 300 mutations (3 per position)
131
+ reference_fasta=reference
132
+ )
133
+ ```
134
+
135
+ **πŸ“– [Full Guide: Mutagenesis](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/mutagenesis.md)**
136
+
137
+ ## Documentation
138
+
139
+ ### πŸ“š User Guides
140
+ Detailed documentation for each major feature:
141
+ - **[Personalized Genomes](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/personalization.md)** - Apply variants to genomes
142
+ - **[Variant-Centered Sequences](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/variant_centered_sequences.md)** - Extract sequence windows around variants
143
+ - **[Prediction Alignment](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/prediction_alignment.md)** - Align model predictions for variant effect analysis
144
+ - **[Saturation Mutagenesis](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/user_guide/mutagenesis.md)** - In-silico mutagenesis workflows
145
+ - **[Variant Classification](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/_static/images/variant_classification.png)** - Flow chart showing automatic variant classification logic
146
+
147
+ ### πŸ““ Interactive Tutorials
148
+ Hands-on Jupyter notebooks with visualizations:
149
+ - **[Getting Started](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/notebooks/01_getting_started.ipynb)** - Installation and basic concepts
150
+ - **[Personalized Genomes](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/notebooks/02_personalized_genomes.ipynb)** - Genome personalization workflows
151
+ - **[Prediction Alignment](https://github.com/gladstone-institutes/supremo_lite/blob/main/docs/notebooks/03_prediction_alignment.ipynb)** - Complete prediction workflow with visualizations ⭐
152
+
153
+ ### πŸ” API Reference
154
+ **Core Functions:**
155
+ - `get_personal_genome()` - Generate personalized genomes
156
+ - `get_alt_ref_sequences()` - Generate variant-centered sequences
157
+ - `align_predictions_by_coordinate()` - Align model predictions
158
+ - `get_sm_sequences()` - Saturation mutagenesis
159
+ - `read_vcf()` - Read VCF files
160
+
161
+ For complete API documentation with all parameters, see the [docs/](https://github.com/gladstone-institutes/supremo_lite/tree/main/docs) directory.
162
+
163
+ ## Issues and Support
164
+
165
+ We welcome feedback, bug reports, and feature requests! If you encounter any issues or have suggestions for improvements, please:
166
+
167
+ 1. **Check existing issues** first to see if your problem has already been reported
168
+ 2. **File a new issue** on our [GitHub Issues page](https://github.com/gladstone-institutes/supremo_lite/issues)
169
+ 3. **Provide detailed information** including:
170
+ - Python version and operating system
171
+ - Package version (`supremo_lite.__version__`)
172
+ - Complete error messages and stack traces
173
+ - Minimal reproducible example
174
+ - Expected vs. actual behavior
175
+
176
+ ### Common Issues to Report
177
+
178
+ - **Performance problems** with large genomes or variant files
179
+ - **Unexpected behavior** with edge cases
180
+ - **Documentation gaps** or unclear examples
181
+ - **Feature requests** for new functionality
182
+
183
+
184
+ ## Contributing
185
+
186
+ Interested in contributing? Check out the contributing guidelines. Please note that this project is released with a Code of Conduct. By contributing to this project, you agree to abide by its terms.
187
+
188
+ ## License
189
+
190
+ `supremo_lite` was created by Natalie Gill and Sean Whalen, based on Sequence Mutator for Predictive Models ([SuPreMo](https://github.com/ketringjoni/SuPreMo)) by Katie Gjoni. It is licensed under the terms of the MIT license.
191
+
192
+ ## Credits
193
+
194
+ `supremo_lite` was created with [`cookiecutter`](https://cookiecutter.readthedocs.io/en/latest/) and the `py-pkgs-cookiecutter` [template](https://github.com/py-pkgs/py-pkgs-cookiecutter).
@@ -0,0 +1,37 @@
1
+ [tool.poetry]
2
+ name = "supremo_lite"
3
+ version = "0.5.4"
4
+ description = "A lightweight memory first, model agnostic version of SuPreMo"
5
+ authors = ["Natalie Gill", "Sean Whalen"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "^3.9"
11
+ pandas = ">=1.5.0"
12
+ pyfaidx = ">=0.7.0"
13
+ torch = {version = ">=1.13.0", optional = true}
14
+ brisket = {version = ">=0.1.2", optional = true}
15
+
16
+ [tool.poetry.extras]
17
+ fast = ["brisket"]
18
+
19
+ [tool.poetry.group.dev.dependencies]
20
+ pytest = ">=6.0.0"
21
+ pytest-cov = ">=3.0.0"
22
+ notebook = ">=6.0.0"
23
+ black = ">=20.0.0"
24
+ ipython = ">=7.0.0"
25
+ tokenize-rt = ">=3.2.0"
26
+ matplotlib = "3.9"
27
+ brisket = "^0.1.2"
28
+ torch = ">=1.13.0"
29
+ seaborn = "^0.13.2"
30
+ sphinx = "<8.0"
31
+ myst-nb = "^1.3.0"
32
+ sphinx-autoapi = "^3.6.1"
33
+ sphinx-rtd-theme = "^3.0.2"
34
+
35
+ [build-system]
36
+ requires = ["poetry-core>=1.0.0"]
37
+ build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,59 @@
1
+ """
2
+ supremo_lite: A module for generating personalized genome sequences from a reference
3
+ fasta and a variants file, or sequences for in-silico mutagenesis.
4
+
5
+ This package provides functionality for:
6
+ - Sequence encoding and transformation
7
+ - Variant reading and application
8
+ - In-silico mutagenesis
9
+ """
10
+
11
+ # Import core components
12
+ from .core import TORCH_AVAILABLE, BRISKET_AVAILABLE, nt_to_1h, nts
13
+
14
+ # Import sequence transformation utilities
15
+ from .sequence_utils import encode_seq, decode_seq, rc, rc_str
16
+
17
+ # Import variant reading utilities
18
+ from .variant_utils import (
19
+ read_vcf,
20
+ read_vcf_chunked,
21
+ get_vcf_chromosomes,
22
+ read_vcf_chromosome,
23
+ classify_variant_type,
24
+ parse_vcf_info,
25
+ )
26
+
27
+ # Import chromosome matching utilities
28
+ from .chromosome_utils import (
29
+ normalize_chromosome_name,
30
+ create_chromosome_mapping,
31
+ match_chromosomes_with_report,
32
+ ChromosomeMismatchError,
33
+ )
34
+
35
+ # Import personalize functions
36
+ from .personalize import (
37
+ get_personal_genome,
38
+ get_alt_sequences,
39
+ get_ref_sequences,
40
+ get_pam_disrupting_alt_sequences,
41
+ get_alt_ref_sequences,
42
+ )
43
+
44
+ # Import mutagenesis functions
45
+ from .mutagenesis import get_sm_sequences, get_sm_subsequences
46
+
47
+ # Import prediction alignment functions
48
+ from .prediction_alignment import align_predictions_by_coordinate
49
+
50
+ # Mock models are available in a separate submodule
51
+ # Import with: from supremo_lite.mock_models import TestModel, TestModel2D
52
+ # This allows users who don't have PyTorch to still use the main package
53
+
54
+ # Version
55
+ __version__ = "0.5.4"
56
+ # Package metadata
57
+ __description__ = (
58
+ "A module for generating personalized genome sequences and in-silico mutagenesis"
59
+ )