trace-crispr 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trace_crispr-0.1.0/LICENSE +21 -0
- trace_crispr-0.1.0/PKG-INFO +204 -0
- trace_crispr-0.1.0/README.md +166 -0
- trace_crispr-0.1.0/pyproject.toml +73 -0
- trace_crispr-0.1.0/setup.cfg +4 -0
- trace_crispr-0.1.0/tests/__init__.py +1 -0
- trace_crispr-0.1.0/tests/test_config.py +129 -0
- trace_crispr-0.1.0/tests/test_core.py +257 -0
- trace_crispr-0.1.0/tests/test_utils.py +131 -0
- trace_crispr-0.1.0/trace_crispr/__init__.py +26 -0
- trace_crispr-0.1.0/trace_crispr/__main__.py +6 -0
- trace_crispr-0.1.0/trace_crispr/cli.py +313 -0
- trace_crispr-0.1.0/trace_crispr/config.py +389 -0
- trace_crispr-0.1.0/trace_crispr/core/__init__.py +68 -0
- trace_crispr-0.1.0/trace_crispr/core/cigar.py +308 -0
- trace_crispr-0.1.0/trace_crispr/core/classification.py +366 -0
- trace_crispr-0.1.0/trace_crispr/core/kmer.py +280 -0
- trace_crispr-0.1.0/trace_crispr/core/scoring.py +257 -0
- trace_crispr-0.1.0/trace_crispr/integrations/__init__.py +34 -0
- trace_crispr-0.1.0/trace_crispr/integrations/aligners.py +378 -0
- trace_crispr-0.1.0/trace_crispr/integrations/crispresso.py +316 -0
- trace_crispr-0.1.0/trace_crispr/io/__init__.py +28 -0
- trace_crispr-0.1.0/trace_crispr/io/output.py +220 -0
- trace_crispr-0.1.0/trace_crispr/io/sample_key.py +120 -0
- trace_crispr-0.1.0/trace_crispr/pipeline.py +425 -0
- trace_crispr-0.1.0/trace_crispr/preprocessing/__init__.py +52 -0
- trace_crispr-0.1.0/trace_crispr/preprocessing/contamination.py +211 -0
- trace_crispr-0.1.0/trace_crispr/preprocessing/detection.py +274 -0
- trace_crispr-0.1.0/trace_crispr/preprocessing/trimming.py +204 -0
- trace_crispr-0.1.0/trace_crispr/qc/__init__.py +0 -0
- trace_crispr-0.1.0/trace_crispr/utils/__init__.py +35 -0
- trace_crispr-0.1.0/trace_crispr/utils/sequence.py +255 -0
- trace_crispr-0.1.0/trace_crispr.egg-info/PKG-INFO +204 -0
- trace_crispr-0.1.0/trace_crispr.egg-info/SOURCES.txt +36 -0
- trace_crispr-0.1.0/trace_crispr.egg-info/dependency_links.txt +1 -0
- trace_crispr-0.1.0/trace_crispr.egg-info/entry_points.txt +2 -0
- trace_crispr-0.1.0/trace_crispr.egg-info/requires.txt +18 -0
- trace_crispr-0.1.0/trace_crispr.egg-info/top_level.txt +5 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kevin R. Roy
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: trace-crispr
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: TRACE: Triple-aligner Read Analysis for CRISPR Editing
|
|
5
|
+
Author-email: "Kevin R. Roy" <kevinroy@stanford.edu>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/k-roy/trace
|
|
8
|
+
Project-URL: Documentation, https://trace-crispr.readthedocs.io
|
|
9
|
+
Project-URL: Repository, https://github.com/k-roy/trace
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: click>=8.0
|
|
22
|
+
Requires-Dist: pysam>=0.20
|
|
23
|
+
Requires-Dist: pandas>=1.5
|
|
24
|
+
Requires-Dist: numpy>=1.20
|
|
25
|
+
Requires-Dist: pyyaml>=6.0
|
|
26
|
+
Requires-Dist: rapidfuzz>=3.0
|
|
27
|
+
Requires-Dist: tqdm>=4.60
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
30
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
31
|
+
Requires-Dist: black; extra == "dev"
|
|
32
|
+
Requires-Dist: ruff; extra == "dev"
|
|
33
|
+
Requires-Dist: mypy; extra == "dev"
|
|
34
|
+
Provides-Extra: visualization
|
|
35
|
+
Requires-Dist: matplotlib>=3.5; extra == "visualization"
|
|
36
|
+
Requires-Dist: seaborn>=0.12; extra == "visualization"
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
|
|
39
|
+
# TRACE
|
|
40
|
+
|
|
41
|
+
**T**riple-aligner **R**ead **A**nalysis for **C**RISPR **E**diting
|
|
42
|
+
|
|
43
|
+
## Features
|
|
44
|
+
|
|
45
|
+
- **Triple-aligner consensus**: Uses BWA-MEM, BBMap, and minimap2 for robust alignment
|
|
46
|
+
- **Automatic inference**: Detects PAM, cleavage site, homology arms, and edits from sequences
|
|
47
|
+
- **K-mer classification**: Fast pre-alignment HDR/WT detection using 12-mers
|
|
48
|
+
- **Multi-nuclease support**: Cas9 and Cas12a (Cpf1) with correct cleavage geometry
|
|
49
|
+
- **Auto-detection**: Library type (TruSeq/Tn5), read merging need, CRISPResso mode
|
|
50
|
+
- **CRISPResso2 integration**: Validation with standard CRISPR analysis tool
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
### pip (Python package only)
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install trace-crispr
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### conda (includes external aligners)
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
conda install -c bioconda -c conda-forge trace-crispr
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Development installation
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
git clone https://github.com/k-roy/trace.git
|
|
70
|
+
cd trace
|
|
71
|
+
pip install -e ".[dev]"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Quick Start
|
|
75
|
+
|
|
76
|
+
### Minimal run (3 required inputs)
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
trace run \
|
|
80
|
+
--reference amplicon.fasta \
|
|
81
|
+
--hdr-template hdr_template.fasta \
|
|
82
|
+
--guide GCTGAAGCACTGCACGCCGT \
|
|
83
|
+
--r1 sample_R1.fastq.gz \
|
|
84
|
+
--r2 sample_R2.fastq.gz \
|
|
85
|
+
--output results/
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Check locus configuration without running
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
trace info \
|
|
92
|
+
--reference amplicon.fasta \
|
|
93
|
+
--hdr-template hdr_template.fasta \
|
|
94
|
+
--guide GCTGAAGCACTGCACGCCGT
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
This will print:
|
|
98
|
+
|
|
99
|
+
```
|
|
100
|
+
=== TRACE Analysis Configuration ===
|
|
101
|
+
|
|
102
|
+
Reference sequence: 500 bp
|
|
103
|
+
HDR template: 500 bp
|
|
104
|
+
|
|
105
|
+
Donor template analysis:
|
|
106
|
+
- Left homology arm: positions 1-245 on reference (245 bp)
|
|
107
|
+
- Right homology arm: positions 255-500 on reference (245 bp)
|
|
108
|
+
- Donor edits detected at positions: 246, 247 on reference
|
|
109
|
+
* Position 246: C → G (PAM-silencing mutation)
|
|
110
|
+
* Position 247: C → T (chromophore Y66H mutation)
|
|
111
|
+
|
|
112
|
+
Guide analysis:
|
|
113
|
+
- Guide sequence: GCTGAAGCACTGCACGCCGT
|
|
114
|
+
- Guide targets: positions 248-267 on reference (- strand)
|
|
115
|
+
- PAM: GGG at positions 245-247 on reference
|
|
116
|
+
- Cleavage site: position 248 on reference
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Multiple samples
|
|
120
|
+
|
|
121
|
+
Create a sample key TSV:
|
|
122
|
+
|
|
123
|
+
```
|
|
124
|
+
sample_id r1_path r2_path condition
|
|
125
|
+
sample_1 /path/to/S1_R1.fastq.gz /path/to/S1_R2.fastq.gz treatment
|
|
126
|
+
sample_2 /path/to/S2_R1.fastq.gz /path/to/S2_R2.fastq.gz control
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Then run:
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
trace run \
|
|
133
|
+
--reference amplicon.fasta \
|
|
134
|
+
--hdr-template hdr_template.fasta \
|
|
135
|
+
--guide GCTGAAGCACTGCACGCCGT \
|
|
136
|
+
--sample-key samples.tsv \
|
|
137
|
+
--output results/ \
|
|
138
|
+
--threads 16
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Using Cas12a
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
trace run \
|
|
145
|
+
--reference amplicon.fasta \
|
|
146
|
+
--hdr-template hdr_template.fasta \
|
|
147
|
+
--guide GCTGAAGCACTGCACGCCGTAA \
|
|
148
|
+
--nuclease cas12a \
|
|
149
|
+
--sample-key samples.tsv \
|
|
150
|
+
--output results/
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Nuclease Support
|
|
154
|
+
|
|
155
|
+
### Cas9 (SpCas9)
|
|
156
|
+
- PAM: NGG (3' of protospacer)
|
|
157
|
+
- Cleavage: 3 bp upstream of PAM (blunt ends)
|
|
158
|
+
|
|
159
|
+
### Cas12a (LbCpf1)
|
|
160
|
+
- PAM: TTTN (5' of protospacer)
|
|
161
|
+
- Cleavage: 18-19 bp downstream on target strand, 23 bp on non-target
|
|
162
|
+
- Creates 4-5 nt 5' overhang (staggered cut)
|
|
163
|
+
|
|
164
|
+
## Output
|
|
165
|
+
|
|
166
|
+
The main output is a TSV file with per-sample editing outcomes:
|
|
167
|
+
|
|
168
|
+
| Column | Description |
|
|
169
|
+
|--------|-------------|
|
|
170
|
+
| sample | Sample ID |
|
|
171
|
+
| classifiable_reads | Total classifiable reads |
|
|
172
|
+
| duplicate_rate | PCR duplicate rate (Tn5) |
|
|
173
|
+
| Dedup_WT_% | Wild-type % (deduplicated) |
|
|
174
|
+
| Dedup_HDR_% | HDR % (deduplicated) |
|
|
175
|
+
| Dedup_NHEJ_% | NHEJ % (deduplicated) |
|
|
176
|
+
| Dedup_LgDel_% | Large deletion % |
|
|
177
|
+
| kmer_hdr_rate | K-mer method HDR rate |
|
|
178
|
+
| crispresso_hdr_rate | CRISPResso2 HDR rate |
|
|
179
|
+
|
|
180
|
+
## Dependencies
|
|
181
|
+
|
|
182
|
+
### Python
|
|
183
|
+
- click>=8.0
|
|
184
|
+
- pysam>=0.20
|
|
185
|
+
- pandas>=1.5
|
|
186
|
+
- numpy>=1.20
|
|
187
|
+
- pyyaml>=6.0
|
|
188
|
+
- rapidfuzz>=3.0
|
|
189
|
+
- tqdm>=4.60
|
|
190
|
+
|
|
191
|
+
### External tools (via conda)
|
|
192
|
+
- bwa>=0.7
|
|
193
|
+
- bbmap>=39
|
|
194
|
+
- minimap2>=2.24
|
|
195
|
+
- samtools>=1.16
|
|
196
|
+
- crispresso2 (optional, but enabled by default)
|
|
197
|
+
|
|
198
|
+
## Author
|
|
199
|
+
|
|
200
|
+
Kevin R. Roy
|
|
201
|
+
|
|
202
|
+
## License
|
|
203
|
+
|
|
204
|
+
MIT
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# TRACE
|
|
2
|
+
|
|
3
|
+
**T**riple-aligner **R**ead **A**nalysis for **C**RISPR **E**diting
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Triple-aligner consensus**: Uses BWA-MEM, BBMap, and minimap2 for robust alignment
|
|
8
|
+
- **Automatic inference**: Detects PAM, cleavage site, homology arms, and edits from sequences
|
|
9
|
+
- **K-mer classification**: Fast pre-alignment HDR/WT detection using 12-mers
|
|
10
|
+
- **Multi-nuclease support**: Cas9 and Cas12a (Cpf1) with correct cleavage geometry
|
|
11
|
+
- **Auto-detection**: Library type (TruSeq/Tn5), read merging need, CRISPResso mode
|
|
12
|
+
- **CRISPResso2 integration**: Validation with standard CRISPR analysis tool
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
### pip (Python package only)
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install trace-crispr
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
### conda (includes external aligners)
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
conda install -c bioconda -c conda-forge trace-crispr
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Development installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
git clone https://github.com/k-roy/trace.git
|
|
32
|
+
cd trace
|
|
33
|
+
pip install -e ".[dev]"
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
### Minimal run (3 required inputs)
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
trace run \
|
|
42
|
+
--reference amplicon.fasta \
|
|
43
|
+
--hdr-template hdr_template.fasta \
|
|
44
|
+
--guide GCTGAAGCACTGCACGCCGT \
|
|
45
|
+
--r1 sample_R1.fastq.gz \
|
|
46
|
+
--r2 sample_R2.fastq.gz \
|
|
47
|
+
--output results/
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Check locus configuration without running
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
trace info \
|
|
54
|
+
--reference amplicon.fasta \
|
|
55
|
+
--hdr-template hdr_template.fasta \
|
|
56
|
+
--guide GCTGAAGCACTGCACGCCGT
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
This will print:
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
=== TRACE Analysis Configuration ===
|
|
63
|
+
|
|
64
|
+
Reference sequence: 500 bp
|
|
65
|
+
HDR template: 500 bp
|
|
66
|
+
|
|
67
|
+
Donor template analysis:
|
|
68
|
+
- Left homology arm: positions 1-245 on reference (245 bp)
|
|
69
|
+
- Right homology arm: positions 255-500 on reference (245 bp)
|
|
70
|
+
- Donor edits detected at positions: 246, 247 on reference
|
|
71
|
+
* Position 246: C → G (PAM-silencing mutation)
|
|
72
|
+
* Position 247: C → T (chromophore Y66H mutation)
|
|
73
|
+
|
|
74
|
+
Guide analysis:
|
|
75
|
+
- Guide sequence: GCTGAAGCACTGCACGCCGT
|
|
76
|
+
- Guide targets: positions 248-267 on reference (- strand)
|
|
77
|
+
- PAM: GGG at positions 245-247 on reference
|
|
78
|
+
- Cleavage site: position 248 on reference
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Multiple samples
|
|
82
|
+
|
|
83
|
+
Create a sample key TSV:
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
sample_id r1_path r2_path condition
|
|
87
|
+
sample_1 /path/to/S1_R1.fastq.gz /path/to/S1_R2.fastq.gz treatment
|
|
88
|
+
sample_2 /path/to/S2_R1.fastq.gz /path/to/S2_R2.fastq.gz control
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Then run:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
trace run \
|
|
95
|
+
--reference amplicon.fasta \
|
|
96
|
+
--hdr-template hdr_template.fasta \
|
|
97
|
+
--guide GCTGAAGCACTGCACGCCGT \
|
|
98
|
+
--sample-key samples.tsv \
|
|
99
|
+
--output results/ \
|
|
100
|
+
--threads 16
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Using Cas12a
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
trace run \
|
|
107
|
+
--reference amplicon.fasta \
|
|
108
|
+
--hdr-template hdr_template.fasta \
|
|
109
|
+
--guide GCTGAAGCACTGCACGCCGTAA \
|
|
110
|
+
--nuclease cas12a \
|
|
111
|
+
--sample-key samples.tsv \
|
|
112
|
+
--output results/
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Nuclease Support
|
|
116
|
+
|
|
117
|
+
### Cas9 (SpCas9)
|
|
118
|
+
- PAM: NGG (3' of protospacer)
|
|
119
|
+
- Cleavage: 3 bp upstream of PAM (blunt ends)
|
|
120
|
+
|
|
121
|
+
### Cas12a (LbCpf1)
|
|
122
|
+
- PAM: TTTN (5' of protospacer)
|
|
123
|
+
- Cleavage: 18-19 bp downstream on target strand, 23 bp on non-target
|
|
124
|
+
- Creates 4-5 nt 5' overhang (staggered cut)
|
|
125
|
+
|
|
126
|
+
## Output
|
|
127
|
+
|
|
128
|
+
The main output is a TSV file with per-sample editing outcomes:
|
|
129
|
+
|
|
130
|
+
| Column | Description |
|
|
131
|
+
|--------|-------------|
|
|
132
|
+
| sample | Sample ID |
|
|
133
|
+
| classifiable_reads | Total classifiable reads |
|
|
134
|
+
| duplicate_rate | PCR duplicate rate (Tn5) |
|
|
135
|
+
| Dedup_WT_% | Wild-type % (deduplicated) |
|
|
136
|
+
| Dedup_HDR_% | HDR % (deduplicated) |
|
|
137
|
+
| Dedup_NHEJ_% | NHEJ % (deduplicated) |
|
|
138
|
+
| Dedup_LgDel_% | Large deletion % |
|
|
139
|
+
| kmer_hdr_rate | K-mer method HDR rate |
|
|
140
|
+
| crispresso_hdr_rate | CRISPResso2 HDR rate |
|
|
141
|
+
|
|
142
|
+
## Dependencies
|
|
143
|
+
|
|
144
|
+
### Python
|
|
145
|
+
- click>=8.0
|
|
146
|
+
- pysam>=0.20
|
|
147
|
+
- pandas>=1.5
|
|
148
|
+
- numpy>=1.20
|
|
149
|
+
- pyyaml>=6.0
|
|
150
|
+
- rapidfuzz>=3.0
|
|
151
|
+
- tqdm>=4.60
|
|
152
|
+
|
|
153
|
+
### External tools (via conda)
|
|
154
|
+
- bwa>=0.7
|
|
155
|
+
- bbmap>=39
|
|
156
|
+
- minimap2>=2.24
|
|
157
|
+
- samtools>=1.16
|
|
158
|
+
- crispresso2 (optional, but enabled by default)
|
|
159
|
+
|
|
160
|
+
## Author
|
|
161
|
+
|
|
162
|
+
Kevin R. Roy
|
|
163
|
+
|
|
164
|
+
## License
|
|
165
|
+
|
|
166
|
+
MIT
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "trace-crispr"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "TRACE: Triple-aligner Read Analysis for CRISPR Editing"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
authors = [{name = "Kevin R. Roy", email = "kevinroy@stanford.edu"}]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"Intended Audience :: Science/Research",
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Programming Language :: Python :: 3.9",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
21
|
+
]
|
|
22
|
+
requires-python = ">=3.9"
|
|
23
|
+
dependencies = [
|
|
24
|
+
"click>=8.0",
|
|
25
|
+
"pysam>=0.20",
|
|
26
|
+
"pandas>=1.5",
|
|
27
|
+
"numpy>=1.20",
|
|
28
|
+
"pyyaml>=6.0",
|
|
29
|
+
"rapidfuzz>=3.0",
|
|
30
|
+
"tqdm>=4.60",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.optional-dependencies]
|
|
34
|
+
dev = [
|
|
35
|
+
"pytest>=7.0",
|
|
36
|
+
"pytest-cov",
|
|
37
|
+
"black",
|
|
38
|
+
"ruff",
|
|
39
|
+
"mypy",
|
|
40
|
+
]
|
|
41
|
+
visualization = [
|
|
42
|
+
"matplotlib>=3.5",
|
|
43
|
+
"seaborn>=0.12",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
[project.scripts]
|
|
47
|
+
trace = "trace_crispr.cli:cli"
|
|
48
|
+
|
|
49
|
+
[project.urls]
|
|
50
|
+
Homepage = "https://github.com/k-roy/trace"
|
|
51
|
+
Documentation = "https://trace-crispr.readthedocs.io"
|
|
52
|
+
Repository = "https://github.com/k-roy/trace"
|
|
53
|
+
|
|
54
|
+
[tool.setuptools.packages.find]
|
|
55
|
+
where = ["."]
|
|
56
|
+
|
|
57
|
+
[tool.setuptools.package-data]
|
|
58
|
+
trace_crispr = ["templates/*"]
|
|
59
|
+
|
|
60
|
+
[tool.black]
|
|
61
|
+
line-length = 100
|
|
62
|
+
target-version = ['py39', 'py310', 'py311']
|
|
63
|
+
|
|
64
|
+
[tool.ruff]
|
|
65
|
+
line-length = 100
|
|
66
|
+
select = ["E", "F", "I", "N", "W"]
|
|
67
|
+
ignore = ["E501"]
|
|
68
|
+
|
|
69
|
+
[tool.mypy]
|
|
70
|
+
python_version = "3.9"
|
|
71
|
+
warn_return_any = true
|
|
72
|
+
warn_unused_configs = true
|
|
73
|
+
ignore_missing_imports = true
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Tests for CRISPRo."""
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Tests for trace.config module."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from trace_crispr.config import LocusConfig, NucleaseType
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# BFP/GFP test sequences
|
|
8
|
+
BFP_REFERENCE = (
|
|
9
|
+
"TGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACC"
|
|
10
|
+
"CACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCC"
|
|
11
|
+
"CGAAGGCTACGTCCAGGAGCGCACCAT"
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
GFP_HDR_TEMPLATE = (
|
|
15
|
+
"TGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACG"
|
|
16
|
+
"TACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCC"
|
|
17
|
+
"CGAAGGCTACGTCCAGGAGCGCACCAT"
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
GUIDE_SEQUENCE = "GCTGAAGCACTGCACGCCGT"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestLocusConfig:
|
|
24
|
+
"""Test LocusConfig class."""
|
|
25
|
+
|
|
26
|
+
def test_basic_initialization(self):
|
|
27
|
+
"""Test basic LocusConfig creation."""
|
|
28
|
+
locus = LocusConfig(
|
|
29
|
+
name="test",
|
|
30
|
+
reference=BFP_REFERENCE,
|
|
31
|
+
hdr_template=GFP_HDR_TEMPLATE,
|
|
32
|
+
guide=GUIDE_SEQUENCE,
|
|
33
|
+
nuclease=NucleaseType.CAS9,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
assert locus.name == "test"
|
|
37
|
+
assert locus.reference == BFP_REFERENCE
|
|
38
|
+
assert locus.hdr_template == GFP_HDR_TEMPLATE
|
|
39
|
+
assert locus.guide == GUIDE_SEQUENCE
|
|
40
|
+
assert locus.nuclease == NucleaseType.CAS9
|
|
41
|
+
|
|
42
|
+
def test_analyze_detects_edits(self):
|
|
43
|
+
"""Test that analyze() detects HDR edits."""
|
|
44
|
+
locus = LocusConfig(
|
|
45
|
+
name="test",
|
|
46
|
+
reference=BFP_REFERENCE,
|
|
47
|
+
hdr_template=GFP_HDR_TEMPLATE,
|
|
48
|
+
guide=GUIDE_SEQUENCE,
|
|
49
|
+
nuclease=NucleaseType.CAS9,
|
|
50
|
+
).analyze()
|
|
51
|
+
|
|
52
|
+
assert locus.edits is not None
|
|
53
|
+
assert len(locus.edits) == 2
|
|
54
|
+
|
|
55
|
+
# Check edit positions (0-indexed: 70, 71)
|
|
56
|
+
edit_positions = [e.position for e in locus.edits]
|
|
57
|
+
assert 70 in edit_positions
|
|
58
|
+
assert 71 in edit_positions
|
|
59
|
+
|
|
60
|
+
def test_analyze_detects_homology_arms(self):
|
|
61
|
+
"""Test that analyze() detects homology arms."""
|
|
62
|
+
locus = LocusConfig(
|
|
63
|
+
name="test",
|
|
64
|
+
reference=BFP_REFERENCE,
|
|
65
|
+
hdr_template=GFP_HDR_TEMPLATE,
|
|
66
|
+
guide=GUIDE_SEQUENCE,
|
|
67
|
+
nuclease=NucleaseType.CAS9,
|
|
68
|
+
).analyze()
|
|
69
|
+
|
|
70
|
+
assert locus.homology_arms is not None
|
|
71
|
+
assert locus.homology_arms.left_start == 0
|
|
72
|
+
assert locus.homology_arms.left_end == 70 # First edit at position 70
|
|
73
|
+
|
|
74
|
+
def test_analyze_finds_guide_on_minus_strand(self):
|
|
75
|
+
"""Test that analyze() finds the guide on the minus strand."""
|
|
76
|
+
locus = LocusConfig(
|
|
77
|
+
name="test",
|
|
78
|
+
reference=BFP_REFERENCE,
|
|
79
|
+
hdr_template=GFP_HDR_TEMPLATE,
|
|
80
|
+
guide=GUIDE_SEQUENCE,
|
|
81
|
+
nuclease=NucleaseType.CAS9,
|
|
82
|
+
).analyze()
|
|
83
|
+
|
|
84
|
+
assert locus.guide_info is not None
|
|
85
|
+
assert locus.guide_info.strand == '-'
|
|
86
|
+
# Guide is on minus strand, so PAM is upstream
|
|
87
|
+
|
|
88
|
+
def test_analyze_calculates_cleavage_site(self):
|
|
89
|
+
"""Test that analyze() calculates cleavage site for Cas9."""
|
|
90
|
+
locus = LocusConfig(
|
|
91
|
+
name="test",
|
|
92
|
+
reference=BFP_REFERENCE,
|
|
93
|
+
hdr_template=GFP_HDR_TEMPLATE,
|
|
94
|
+
guide=GUIDE_SEQUENCE,
|
|
95
|
+
nuclease=NucleaseType.CAS9,
|
|
96
|
+
).analyze()
|
|
97
|
+
|
|
98
|
+
assert locus.guide_info is not None
|
|
99
|
+
# Cleavage site should be within reasonable distance of guide
|
|
100
|
+
assert locus.guide_info.cleavage_site > 0
|
|
101
|
+
|
|
102
|
+
def test_guide_not_found_raises_error(self):
|
|
103
|
+
"""Test that analyze() raises error when guide not found."""
|
|
104
|
+
locus = LocusConfig(
|
|
105
|
+
name="test",
|
|
106
|
+
reference=BFP_REFERENCE,
|
|
107
|
+
hdr_template=GFP_HDR_TEMPLATE,
|
|
108
|
+
guide="NNNNNNNNNNNNNNNNNNNN", # Non-existent guide
|
|
109
|
+
nuclease=NucleaseType.CAS9,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
with pytest.raises(ValueError, match="not found"):
|
|
113
|
+
locus.analyze()
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class TestNucleaseType:
|
|
117
|
+
"""Test NucleaseType enum."""
|
|
118
|
+
|
|
119
|
+
def test_cas9_value(self):
|
|
120
|
+
"""Test Cas9 enum value."""
|
|
121
|
+
assert NucleaseType.CAS9.value == "cas9"
|
|
122
|
+
|
|
123
|
+
def test_cas12a_value(self):
|
|
124
|
+
"""Test Cas12a enum value."""
|
|
125
|
+
assert NucleaseType.CAS12A.value == "cas12a"
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
if __name__ == "__main__":
|
|
129
|
+
pytest.main([__file__, "-v"])
|