trace-crispr 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. trace_crispr-0.1.0/LICENSE +21 -0
  2. trace_crispr-0.1.0/PKG-INFO +204 -0
  3. trace_crispr-0.1.0/README.md +166 -0
  4. trace_crispr-0.1.0/pyproject.toml +73 -0
  5. trace_crispr-0.1.0/setup.cfg +4 -0
  6. trace_crispr-0.1.0/tests/__init__.py +1 -0
  7. trace_crispr-0.1.0/tests/test_config.py +129 -0
  8. trace_crispr-0.1.0/tests/test_core.py +257 -0
  9. trace_crispr-0.1.0/tests/test_utils.py +131 -0
  10. trace_crispr-0.1.0/trace_crispr/__init__.py +26 -0
  11. trace_crispr-0.1.0/trace_crispr/__main__.py +6 -0
  12. trace_crispr-0.1.0/trace_crispr/cli.py +313 -0
  13. trace_crispr-0.1.0/trace_crispr/config.py +389 -0
  14. trace_crispr-0.1.0/trace_crispr/core/__init__.py +68 -0
  15. trace_crispr-0.1.0/trace_crispr/core/cigar.py +308 -0
  16. trace_crispr-0.1.0/trace_crispr/core/classification.py +366 -0
  17. trace_crispr-0.1.0/trace_crispr/core/kmer.py +280 -0
  18. trace_crispr-0.1.0/trace_crispr/core/scoring.py +257 -0
  19. trace_crispr-0.1.0/trace_crispr/integrations/__init__.py +34 -0
  20. trace_crispr-0.1.0/trace_crispr/integrations/aligners.py +378 -0
  21. trace_crispr-0.1.0/trace_crispr/integrations/crispresso.py +316 -0
  22. trace_crispr-0.1.0/trace_crispr/io/__init__.py +28 -0
  23. trace_crispr-0.1.0/trace_crispr/io/output.py +220 -0
  24. trace_crispr-0.1.0/trace_crispr/io/sample_key.py +120 -0
  25. trace_crispr-0.1.0/trace_crispr/pipeline.py +425 -0
  26. trace_crispr-0.1.0/trace_crispr/preprocessing/__init__.py +52 -0
  27. trace_crispr-0.1.0/trace_crispr/preprocessing/contamination.py +211 -0
  28. trace_crispr-0.1.0/trace_crispr/preprocessing/detection.py +274 -0
  29. trace_crispr-0.1.0/trace_crispr/preprocessing/trimming.py +204 -0
  30. trace_crispr-0.1.0/trace_crispr/qc/__init__.py +0 -0
  31. trace_crispr-0.1.0/trace_crispr/utils/__init__.py +35 -0
  32. trace_crispr-0.1.0/trace_crispr/utils/sequence.py +255 -0
  33. trace_crispr-0.1.0/trace_crispr.egg-info/PKG-INFO +204 -0
  34. trace_crispr-0.1.0/trace_crispr.egg-info/SOURCES.txt +36 -0
  35. trace_crispr-0.1.0/trace_crispr.egg-info/dependency_links.txt +1 -0
  36. trace_crispr-0.1.0/trace_crispr.egg-info/entry_points.txt +2 -0
  37. trace_crispr-0.1.0/trace_crispr.egg-info/requires.txt +18 -0
  38. trace_crispr-0.1.0/trace_crispr.egg-info/top_level.txt +5 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Kevin R. Roy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,204 @@
1
+ Metadata-Version: 2.4
2
+ Name: trace-crispr
3
+ Version: 0.1.0
4
+ Summary: TRACE: Triple-aligner Read Analysis for CRISPR Editing
5
+ Author-email: "Kevin R. Roy" <kevinroy@stanford.edu>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/k-roy/trace
8
+ Project-URL: Documentation, https://trace-crispr.readthedocs.io
9
+ Project-URL: Repository, https://github.com/k-roy/trace
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
+ Requires-Python: >=3.9
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: click>=8.0
22
+ Requires-Dist: pysam>=0.20
23
+ Requires-Dist: pandas>=1.5
24
+ Requires-Dist: numpy>=1.20
25
+ Requires-Dist: pyyaml>=6.0
26
+ Requires-Dist: rapidfuzz>=3.0
27
+ Requires-Dist: tqdm>=4.60
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.0; extra == "dev"
30
+ Requires-Dist: pytest-cov; extra == "dev"
31
+ Requires-Dist: black; extra == "dev"
32
+ Requires-Dist: ruff; extra == "dev"
33
+ Requires-Dist: mypy; extra == "dev"
34
+ Provides-Extra: visualization
35
+ Requires-Dist: matplotlib>=3.5; extra == "visualization"
36
+ Requires-Dist: seaborn>=0.12; extra == "visualization"
37
+ Dynamic: license-file
38
+
39
+ # TRACE
40
+
41
+ **T**riple-aligner **R**ead **A**nalysis for **C**RISPR **E**diting
42
+
43
+ ## Features
44
+
45
+ - **Triple-aligner consensus**: Uses BWA-MEM, BBMap, and minimap2 for robust alignment
46
+ - **Automatic inference**: Detects PAM, cleavage site, homology arms, and edits from sequences
47
+ - **K-mer classification**: Fast pre-alignment HDR/WT detection using 12-mers
48
+ - **Multi-nuclease support**: Cas9 and Cas12a (Cpf1) with correct cleavage geometry
49
+ - **Auto-detection**: Library type (TruSeq/Tn5), read merging need, CRISPResso mode
50
+ - **CRISPResso2 integration**: Validation with standard CRISPR analysis tool
51
+
52
+ ## Installation
53
+
54
+ ### pip (Python package only)
55
+
56
+ ```bash
57
+ pip install trace-crispr
58
+ ```
59
+
60
+ ### conda (includes external aligners)
61
+
62
+ ```bash
63
+ conda install -c bioconda -c conda-forge trace-crispr
64
+ ```
65
+
66
+ ### Development installation
67
+
68
+ ```bash
69
+ git clone https://github.com/k-roy/trace.git
70
+ cd trace
71
+ pip install -e ".[dev]"
72
+ ```
73
+
74
+ ## Quick Start
75
+
76
+ ### Minimal run (3 required inputs)
77
+
78
+ ```bash
79
+ trace run \
80
+ --reference amplicon.fasta \
81
+ --hdr-template hdr_template.fasta \
82
+ --guide GCTGAAGCACTGCACGCCGT \
83
+ --r1 sample_R1.fastq.gz \
84
+ --r2 sample_R2.fastq.gz \
85
+ --output results/
86
+ ```
87
+
88
+ ### Check locus configuration without running
89
+
90
+ ```bash
91
+ trace info \
92
+ --reference amplicon.fasta \
93
+ --hdr-template hdr_template.fasta \
94
+ --guide GCTGAAGCACTGCACGCCGT
95
+ ```
96
+
97
+ This will print:
98
+
99
+ ```
100
+ === TRACE Analysis Configuration ===
101
+
102
+ Reference sequence: 500 bp
103
+ HDR template: 500 bp
104
+
105
+ Donor template analysis:
106
+ - Left homology arm: positions 1-245 on reference (245 bp)
107
+ - Right homology arm: positions 255-500 on reference (245 bp)
108
+ - Donor edits detected at positions: 246, 247 on reference
109
+ * Position 246: C → G (PAM-silencing mutation)
110
+ * Position 247: C → T (chromophore Y66H mutation)
111
+
112
+ Guide analysis:
113
+ - Guide sequence: GCTGAAGCACTGCACGCCGT
114
+ - Guide targets: positions 248-267 on reference (- strand)
115
+ - PAM: GGG at positions 245-247 on reference
116
+ - Cleavage site: position 248 on reference
117
+ ```
118
+
119
+ ### Multiple samples
120
+
121
+ Create a sample key TSV:
122
+
123
+ ```
124
+ sample_id r1_path r2_path condition
125
+ sample_1 /path/to/S1_R1.fastq.gz /path/to/S1_R2.fastq.gz treatment
126
+ sample_2 /path/to/S2_R1.fastq.gz /path/to/S2_R2.fastq.gz control
127
+ ```
128
+
129
+ Then run:
130
+
131
+ ```bash
132
+ trace run \
133
+ --reference amplicon.fasta \
134
+ --hdr-template hdr_template.fasta \
135
+ --guide GCTGAAGCACTGCACGCCGT \
136
+ --sample-key samples.tsv \
137
+ --output results/ \
138
+ --threads 16
139
+ ```
140
+
141
+ ### Using Cas12a
142
+
143
+ ```bash
144
+ trace run \
145
+ --reference amplicon.fasta \
146
+ --hdr-template hdr_template.fasta \
147
+ --guide GCTGAAGCACTGCACGCCGTAA \
148
+ --nuclease cas12a \
149
+ --sample-key samples.tsv \
150
+ --output results/
151
+ ```
152
+
153
+ ## Nuclease Support
154
+
155
+ ### Cas9 (SpCas9)
156
+ - PAM: NGG (3' of protospacer)
157
+ - Cleavage: 3 bp upstream of PAM (blunt ends)
158
+
159
+ ### Cas12a (LbCpf1)
160
+ - PAM: TTTN (5' of protospacer)
161
+ - Cleavage: 18-19 bp downstream on target strand, 23 bp on non-target
162
+ - Creates 4-5 nt 5' overhang (staggered cut)
163
+
164
+ ## Output
165
+
166
+ The main output is a TSV file with per-sample editing outcomes:
167
+
168
+ | Column | Description |
169
+ |--------|-------------|
170
+ | sample | Sample ID |
171
+ | classifiable_reads | Total classifiable reads |
172
+ | duplicate_rate | PCR duplicate rate (Tn5) |
173
+ | Dedup_WT_% | Wild-type % (deduplicated) |
174
+ | Dedup_HDR_% | HDR % (deduplicated) |
175
+ | Dedup_NHEJ_% | NHEJ % (deduplicated) |
176
+ | Dedup_LgDel_% | Large deletion % |
177
+ | kmer_hdr_rate | K-mer method HDR rate |
178
+ | crispresso_hdr_rate | CRISPResso2 HDR rate |
179
+
180
+ ## Dependencies
181
+
182
+ ### Python
183
+ - click>=8.0
184
+ - pysam>=0.20
185
+ - pandas>=1.5
186
+ - numpy>=1.20
187
+ - pyyaml>=6.0
188
+ - rapidfuzz>=3.0
189
+ - tqdm>=4.60
190
+
191
+ ### External tools (via conda)
192
+ - bwa>=0.7
193
+ - bbmap>=39
194
+ - minimap2>=2.24
195
+ - samtools>=1.16
196
+ - crispresso2 (optional, but enabled by default)
197
+
198
+ ## Author
199
+
200
+ Kevin R. Roy
201
+
202
+ ## License
203
+
204
+ MIT
@@ -0,0 +1,166 @@
1
+ # TRACE
2
+
3
+ **T**riple-aligner **R**ead **A**nalysis for **C**RISPR **E**diting
4
+
5
+ ## Features
6
+
7
+ - **Triple-aligner consensus**: Uses BWA-MEM, BBMap, and minimap2 for robust alignment
8
+ - **Automatic inference**: Detects PAM, cleavage site, homology arms, and edits from sequences
9
+ - **K-mer classification**: Fast pre-alignment HDR/WT detection using 12-mers
10
+ - **Multi-nuclease support**: Cas9 and Cas12a (Cpf1) with correct cleavage geometry
11
+ - **Auto-detection**: Library type (TruSeq/Tn5), read merging need, CRISPResso mode
12
+ - **CRISPResso2 integration**: Validation with standard CRISPR analysis tool
13
+
14
+ ## Installation
15
+
16
+ ### pip (Python package only)
17
+
18
+ ```bash
19
+ pip install trace-crispr
20
+ ```
21
+
22
+ ### conda (includes external aligners)
23
+
24
+ ```bash
25
+ conda install -c bioconda -c conda-forge trace-crispr
26
+ ```
27
+
28
+ ### Development installation
29
+
30
+ ```bash
31
+ git clone https://github.com/k-roy/trace.git
32
+ cd trace
33
+ pip install -e ".[dev]"
34
+ ```
35
+
36
+ ## Quick Start
37
+
38
+ ### Minimal run (3 required inputs)
39
+
40
+ ```bash
41
+ trace run \
42
+ --reference amplicon.fasta \
43
+ --hdr-template hdr_template.fasta \
44
+ --guide GCTGAAGCACTGCACGCCGT \
45
+ --r1 sample_R1.fastq.gz \
46
+ --r2 sample_R2.fastq.gz \
47
+ --output results/
48
+ ```
49
+
50
+ ### Check locus configuration without running
51
+
52
+ ```bash
53
+ trace info \
54
+ --reference amplicon.fasta \
55
+ --hdr-template hdr_template.fasta \
56
+ --guide GCTGAAGCACTGCACGCCGT
57
+ ```
58
+
59
+ This will print:
60
+
61
+ ```
62
+ === TRACE Analysis Configuration ===
63
+
64
+ Reference sequence: 500 bp
65
+ HDR template: 500 bp
66
+
67
+ Donor template analysis:
68
+ - Left homology arm: positions 1-245 on reference (245 bp)
69
+ - Right homology arm: positions 255-500 on reference (245 bp)
70
+ - Donor edits detected at positions: 246, 247 on reference
71
+ * Position 246: C → G (PAM-silencing mutation)
72
+ * Position 247: C → T (chromophore Y66H mutation)
73
+
74
+ Guide analysis:
75
+ - Guide sequence: GCTGAAGCACTGCACGCCGT
76
+ - Guide targets: positions 248-267 on reference (- strand)
77
+ - PAM: GGG at positions 245-247 on reference
78
+ - Cleavage site: position 248 on reference
79
+ ```
80
+
81
+ ### Multiple samples
82
+
83
+ Create a sample key TSV:
84
+
85
+ ```
86
+ sample_id r1_path r2_path condition
87
+ sample_1 /path/to/S1_R1.fastq.gz /path/to/S1_R2.fastq.gz treatment
88
+ sample_2 /path/to/S2_R1.fastq.gz /path/to/S2_R2.fastq.gz control
89
+ ```
90
+
91
+ Then run:
92
+
93
+ ```bash
94
+ trace run \
95
+ --reference amplicon.fasta \
96
+ --hdr-template hdr_template.fasta \
97
+ --guide GCTGAAGCACTGCACGCCGT \
98
+ --sample-key samples.tsv \
99
+ --output results/ \
100
+ --threads 16
101
+ ```
102
+
103
+ ### Using Cas12a
104
+
105
+ ```bash
106
+ trace run \
107
+ --reference amplicon.fasta \
108
+ --hdr-template hdr_template.fasta \
109
+ --guide GCTGAAGCACTGCACGCCGTAA \
110
+ --nuclease cas12a \
111
+ --sample-key samples.tsv \
112
+ --output results/
113
+ ```
114
+
115
+ ## Nuclease Support
116
+
117
+ ### Cas9 (SpCas9)
118
+ - PAM: NGG (3' of protospacer)
119
+ - Cleavage: 3 bp upstream of PAM (blunt ends)
120
+
121
+ ### Cas12a (LbCpf1)
122
+ - PAM: TTTN (5' of protospacer)
123
+ - Cleavage: 18-19 bp downstream on target strand, 23 bp on non-target
124
+ - Creates 4-5 nt 5' overhang (staggered cut)
125
+
126
+ ## Output
127
+
128
+ The main output is a TSV file with per-sample editing outcomes:
129
+
130
+ | Column | Description |
131
+ |--------|-------------|
132
+ | sample | Sample ID |
133
+ | classifiable_reads | Total classifiable reads |
134
+ | duplicate_rate | PCR duplicate rate (Tn5) |
135
+ | Dedup_WT_% | Wild-type % (deduplicated) |
136
+ | Dedup_HDR_% | HDR % (deduplicated) |
137
+ | Dedup_NHEJ_% | NHEJ % (deduplicated) |
138
+ | Dedup_LgDel_% | Large deletion % |
139
+ | kmer_hdr_rate | K-mer method HDR rate |
140
+ | crispresso_hdr_rate | CRISPResso2 HDR rate |
141
+
142
+ ## Dependencies
143
+
144
+ ### Python
145
+ - click>=8.0
146
+ - pysam>=0.20
147
+ - pandas>=1.5
148
+ - numpy>=1.20
149
+ - pyyaml>=6.0
150
+ - rapidfuzz>=3.0
151
+ - tqdm>=4.60
152
+
153
+ ### External tools (via conda)
154
+ - bwa>=0.7
155
+ - bbmap>=39
156
+ - minimap2>=2.24
157
+ - samtools>=1.16
158
+ - crispresso2 (optional, but enabled by default)
159
+
160
+ ## Author
161
+
162
+ Kevin R. Roy
163
+
164
+ ## License
165
+
166
+ MIT
@@ -0,0 +1,73 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "trace-crispr"
7
+ version = "0.1.0"
8
+ description = "TRACE: Triple-aligner Read Analysis for CRISPR Editing"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ authors = [{name = "Kevin R. Roy", email = "kevinroy@stanford.edu"}]
12
+ classifiers = [
13
+ "Development Status :: 4 - Beta",
14
+ "Intended Audience :: Science/Research",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.9",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
21
+ ]
22
+ requires-python = ">=3.9"
23
+ dependencies = [
24
+ "click>=8.0",
25
+ "pysam>=0.20",
26
+ "pandas>=1.5",
27
+ "numpy>=1.20",
28
+ "pyyaml>=6.0",
29
+ "rapidfuzz>=3.0",
30
+ "tqdm>=4.60",
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ dev = [
35
+ "pytest>=7.0",
36
+ "pytest-cov",
37
+ "black",
38
+ "ruff",
39
+ "mypy",
40
+ ]
41
+ visualization = [
42
+ "matplotlib>=3.5",
43
+ "seaborn>=0.12",
44
+ ]
45
+
46
+ [project.scripts]
47
+ trace = "trace_crispr.cli:cli"
48
+
49
+ [project.urls]
50
+ Homepage = "https://github.com/k-roy/trace"
51
+ Documentation = "https://trace-crispr.readthedocs.io"
52
+ Repository = "https://github.com/k-roy/trace"
53
+
54
+ [tool.setuptools.packages.find]
55
+ where = ["."]
56
+
57
+ [tool.setuptools.package-data]
58
+ trace_crispr = ["templates/*"]
59
+
60
+ [tool.black]
61
+ line-length = 100
62
+ target-version = ['py39', 'py310', 'py311']
63
+
64
+ [tool.ruff]
65
+ line-length = 100
66
+ select = ["E", "F", "I", "N", "W"]
67
+ ignore = ["E501"]
68
+
69
+ [tool.mypy]
70
+ python_version = "3.9"
71
+ warn_return_any = true
72
+ warn_unused_configs = true
73
+ ignore_missing_imports = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ """Tests for CRISPRo."""
@@ -0,0 +1,129 @@
1
+ """Tests for trace.config module."""
2
+
3
+ import pytest
4
+ from trace_crispr.config import LocusConfig, NucleaseType
5
+
6
+
7
+ # BFP/GFP test sequences
8
+ BFP_REFERENCE = (
9
+ "TGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACC"
10
+ "CACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCC"
11
+ "CGAAGGCTACGTCCAGGAGCGCACCAT"
12
+ )
13
+
14
+ GFP_HDR_TEMPLATE = (
15
+ "TGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACG"
16
+ "TACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCC"
17
+ "CGAAGGCTACGTCCAGGAGCGCACCAT"
18
+ )
19
+
20
+ GUIDE_SEQUENCE = "GCTGAAGCACTGCACGCCGT"
21
+
22
+
23
+ class TestLocusConfig:
24
+ """Test LocusConfig class."""
25
+
26
+ def test_basic_initialization(self):
27
+ """Test basic LocusConfig creation."""
28
+ locus = LocusConfig(
29
+ name="test",
30
+ reference=BFP_REFERENCE,
31
+ hdr_template=GFP_HDR_TEMPLATE,
32
+ guide=GUIDE_SEQUENCE,
33
+ nuclease=NucleaseType.CAS9,
34
+ )
35
+
36
+ assert locus.name == "test"
37
+ assert locus.reference == BFP_REFERENCE
38
+ assert locus.hdr_template == GFP_HDR_TEMPLATE
39
+ assert locus.guide == GUIDE_SEQUENCE
40
+ assert locus.nuclease == NucleaseType.CAS9
41
+
42
+ def test_analyze_detects_edits(self):
43
+ """Test that analyze() detects HDR edits."""
44
+ locus = LocusConfig(
45
+ name="test",
46
+ reference=BFP_REFERENCE,
47
+ hdr_template=GFP_HDR_TEMPLATE,
48
+ guide=GUIDE_SEQUENCE,
49
+ nuclease=NucleaseType.CAS9,
50
+ ).analyze()
51
+
52
+ assert locus.edits is not None
53
+ assert len(locus.edits) == 2
54
+
55
+ # Check edit positions (0-indexed: 70, 71)
56
+ edit_positions = [e.position for e in locus.edits]
57
+ assert 70 in edit_positions
58
+ assert 71 in edit_positions
59
+
60
+ def test_analyze_detects_homology_arms(self):
61
+ """Test that analyze() detects homology arms."""
62
+ locus = LocusConfig(
63
+ name="test",
64
+ reference=BFP_REFERENCE,
65
+ hdr_template=GFP_HDR_TEMPLATE,
66
+ guide=GUIDE_SEQUENCE,
67
+ nuclease=NucleaseType.CAS9,
68
+ ).analyze()
69
+
70
+ assert locus.homology_arms is not None
71
+ assert locus.homology_arms.left_start == 0
72
+ assert locus.homology_arms.left_end == 70 # First edit at position 70
73
+
74
+ def test_analyze_finds_guide_on_minus_strand(self):
75
+ """Test that analyze() finds the guide on the minus strand."""
76
+ locus = LocusConfig(
77
+ name="test",
78
+ reference=BFP_REFERENCE,
79
+ hdr_template=GFP_HDR_TEMPLATE,
80
+ guide=GUIDE_SEQUENCE,
81
+ nuclease=NucleaseType.CAS9,
82
+ ).analyze()
83
+
84
+ assert locus.guide_info is not None
85
+ assert locus.guide_info.strand == '-'
86
+ # Guide is on minus strand, so PAM is upstream
87
+
88
+ def test_analyze_calculates_cleavage_site(self):
89
+ """Test that analyze() calculates cleavage site for Cas9."""
90
+ locus = LocusConfig(
91
+ name="test",
92
+ reference=BFP_REFERENCE,
93
+ hdr_template=GFP_HDR_TEMPLATE,
94
+ guide=GUIDE_SEQUENCE,
95
+ nuclease=NucleaseType.CAS9,
96
+ ).analyze()
97
+
98
+ assert locus.guide_info is not None
99
+ # Cleavage site should be within reasonable distance of guide
100
+ assert locus.guide_info.cleavage_site > 0
101
+
102
+ def test_guide_not_found_raises_error(self):
103
+ """Test that analyze() raises error when guide not found."""
104
+ locus = LocusConfig(
105
+ name="test",
106
+ reference=BFP_REFERENCE,
107
+ hdr_template=GFP_HDR_TEMPLATE,
108
+ guide="NNNNNNNNNNNNNNNNNNNN", # Non-existent guide
109
+ nuclease=NucleaseType.CAS9,
110
+ )
111
+
112
+ with pytest.raises(ValueError, match="not found"):
113
+ locus.analyze()
114
+
115
+
116
+ class TestNucleaseType:
117
+ """Test NucleaseType enum."""
118
+
119
+ def test_cas9_value(self):
120
+ """Test Cas9 enum value."""
121
+ assert NucleaseType.CAS9.value == "cas9"
122
+
123
+ def test_cas12a_value(self):
124
+ """Test Cas12a enum value."""
125
+ assert NucleaseType.CAS12A.value == "cas12a"
126
+
127
+
128
+ if __name__ == "__main__":
129
+ pytest.main([__file__, "-v"])