synth-cryo-em 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ Metadata-Version: 2.4
2
+ Name: synth-cryo-em
3
+ Version: 0.1.0
4
+ Summary: Synthetic Cryo-EM map generator from PDB models
5
+ Requires-Python: >=3.9
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: numpy
8
+ Requires-Dist: mrcfile
9
+ Requires-Dist: gemmi
10
+ Requires-Dist: scipy
11
+ Requires-Dist: click
12
+ Provides-Extra: test
13
+ Requires-Dist: pytest; extra == "test"
14
+ Provides-Extra: docs
15
+ Requires-Dist: mkdocs; extra == "docs"
16
+ Requires-Dist: mkdocs-material; extra == "docs"
17
+ Requires-Dist: mkdocstrings[python]; extra == "docs"
18
+
19
+ # synth-cryo-em
20
+
21
+ [![Tests](https://github.com/elkins/synth-cryo-em/actions/workflows/test.yml/badge.svg)](https://github.com/elkins/synth-cryo-em/actions/workflows/test.yml)
22
+ [![Documentation](https://img.shields.io/badge/docs-latest-blue)](https://elkins.github.io/synth-cryo-em/)
23
+
24
+ A lightweight Pythonic utility to convert atomic models (PDB/CIF) into synthetic 3D Cryo-EM maps with realistic noise, CTF effects, and varying resolutions.
25
+
26
+ ## 🌟 Features
27
+ - **Voxelize** atomic models with accurate resolution simulation.
28
+ - **Simulate Physics:** Apply Contrast Transfer Functions (CTF) and envelope functions.
29
+ - **Noise Modeling:** Add adjustable Gaussian noise to simulate low-SNR experimental data.
30
+ - **Standard Format:** Export results to MRC files compatible with RELION, ChimeraX, and other tools.
31
+
32
+ ## 🚀 Quick Start
33
+
34
+ ### Installation
35
+ ```bash
36
+ pip install synth-cryo-em
37
+ ```
38
+
39
+ ### Basic Generation
40
+ ```bash
41
+ synth-cryo-em structure.pdb output.mrc --resolution 4.0
42
+ ```
43
+
44
+ ### Realistic Simulation
45
+ ```bash
46
+ synth-cryo-em structure.pdb output.mrc --resolution 3.5 --apply-physics --snr 5
47
+ ```
48
+
49
+ ## 📚 Documentation
50
+ For detailed guides and API reference, visit the [Documentation Site](https://elkins.github.io/synth-cryo-em/).
51
+
52
+ ## 🛠️ Development
53
+ To install for development and documentation:
54
+ ```bash
55
+ pip install -e ".[test,docs]"
56
+ ```
57
+
58
+ Run tests:
59
+ ```bash
60
+ pytest tests/
61
+ ```
62
+
63
+ Build docs locally:
64
+ ```bash
65
+ mkdocs serve
66
+ ```
@@ -0,0 +1,48 @@
1
+ # synth-cryo-em
2
+
3
+ [![Tests](https://github.com/elkins/synth-cryo-em/actions/workflows/test.yml/badge.svg)](https://github.com/elkins/synth-cryo-em/actions/workflows/test.yml)
4
+ [![Documentation](https://img.shields.io/badge/docs-latest-blue)](https://elkins.github.io/synth-cryo-em/)
5
+
6
+ A lightweight Pythonic utility to convert atomic models (PDB/CIF) into synthetic 3D Cryo-EM maps with realistic noise, CTF effects, and varying resolutions.
7
+
8
+ ## 🌟 Features
9
+ - **Voxelize** atomic models with accurate resolution simulation.
10
+ - **Simulate Physics:** Apply Contrast Transfer Functions (CTF) and envelope functions.
11
+ - **Noise Modeling:** Add adjustable Gaussian noise to simulate low-SNR experimental data.
12
+ - **Standard Format:** Export results to MRC files compatible with RELION, ChimeraX, and other tools.
13
+
14
+ ## 🚀 Quick Start
15
+
16
+ ### Installation
17
+ ```bash
18
+ pip install synth-cryo-em
19
+ ```
20
+
21
+ ### Basic Generation
22
+ ```bash
23
+ synth-cryo-em structure.pdb output.mrc --resolution 4.0
24
+ ```
25
+
26
+ ### Realistic Simulation
27
+ ```bash
28
+ synth-cryo-em structure.pdb output.mrc --resolution 3.5 --apply-physics --snr 5
29
+ ```
30
+
31
+ ## 📚 Documentation
32
+ For detailed guides and API reference, visit the [Documentation Site](https://elkins.github.io/synth-cryo-em/).
33
+
34
+ ## 🛠️ Development
35
+ To install for development and documentation:
36
+ ```bash
37
+ pip install -e ".[test,docs]"
38
+ ```
39
+
40
+ Run tests:
41
+ ```bash
42
+ pytest tests/
43
+ ```
44
+
45
+ Build docs locally:
46
+ ```bash
47
+ mkdocs serve
48
+ ```
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "synth-cryo-em"
7
+ version = "0.1.0"
8
+ description = "Synthetic Cryo-EM map generator from PDB models"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ dependencies = [
12
+ "numpy",
13
+ "mrcfile",
14
+ "gemmi",
15
+ "scipy",
16
+ "click",
17
+ ]
18
+
19
+ [project.optional-dependencies]
20
+ test = ["pytest"]
21
+ docs = ["mkdocs", "mkdocs-material", "mkdocstrings[python]"]
22
+
23
+ [project.scripts]
24
+ synth-cryo-em = "synth_cryo_em.cli:main"
25
+ synth-cryo-em-validate = "synth_cryo_em.validate:main"
26
+
27
+ [tool.setuptools.packages.find]
28
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,43 @@
1
+ import click
2
+ from .core import generate_density_map, add_gaussian_noise, save_mrc, apply_ctf
3
+ import numpy as np
4
+
5
+ @click.command()
6
+ @click.argument('input_path', type=click.Path(exists=True))
7
+ @click.argument('output_path', type=click.Path())
8
+ @click.option('--resolution', '-r', default=4.0, help='Resolution in Angstroms')
9
+ @click.option('--spacing', '-s', default=None, type=float, help='Grid spacing in Angstroms')
10
+ @click.option('--snr', default=None, type=float, help='Signal-to-noise ratio')
11
+ @click.option('--defocus', default=2.0, help='Defocus in micrometers')
12
+ @click.option('--voltage', default=300.0, help='Acceleration voltage in kV')
13
+ @click.option('--cs', default=2.7, help='Spherical aberration in mm')
14
+ @click.option('--bfactor', default=0.0, help='Envelope B-factor')
15
+ @click.option('--bfactors/--no-bfactors', default=False, help='Use atomic B-factors for local resolution')
16
+ @click.option('--apply-physics/--no-physics', default=False, help='Apply CTF effects')
17
+ def main(input_path, output_path, resolution, spacing, snr, defocus, voltage, cs, bfactor, bfactors, apply_physics):
18
+ """
19
+ Generate a synthetic Cryo-EM map from an atomic model (PDB, mmCIF, or BCIF).
20
+ """
21
+ click.echo(f"Generating map for {input_path} at {resolution}A resolution...")
22
+
23
+ grid, origin = generate_density_map(input_path, resolution, grid_spacing=spacing, use_bfactors=bfactors)
24
+
25
+ data = np.array(grid, copy=True)
26
+
27
+ # Voxel size is from the unit cell
28
+ uc = grid.unit_cell
29
+ vox_size = (uc.a / grid.nu, uc.b / grid.nv, uc.c / grid.nw)
30
+
31
+ if apply_physics:
32
+ click.echo(f"Applying CTF (defocus={defocus}um, voltage={voltage}kV, B-factor={bfactor})...")
33
+ data = apply_ctf(data, vox_size, defoc=defocus, cs=cs, voltage=voltage, b_factor=bfactor)
34
+
35
+ if snr is not None:
36
+ click.echo(f"Adding Gaussian noise (SNR={snr})...")
37
+ data = add_gaussian_noise(data, snr)
38
+
39
+ save_mrc(data, output_path, origin=origin, spacing=vox_size)
40
+ click.echo(f"Saved synthetic map to {output_path}")
41
+
42
+ if __name__ == '__main__':
43
+ main()
@@ -0,0 +1,230 @@
1
+ import gemmi
2
+ import numpy as np
3
+ import mrcfile
4
+ from scipy.ndimage import gaussian_filter
5
+
6
+ def generate_density_map(input_path, resolution, grid_spacing=None, use_bfactors=False, margin=None):
7
+ """
8
+ Generate a density map from an atomic model file (PDB, mmCIF, BCIF) using gemmi.
9
+ If use_bfactors is True, use atomic B-factors for local resolution.
10
+ """
11
+ st = gemmi.read_structure(input_path)
12
+ # If grid_spacing is not provided, use a rule of thumb (resolution / 3 or 4)
13
+ if grid_spacing is None:
14
+ grid_spacing = resolution / 3.0
15
+
16
+ # Get all atomic positions
17
+ positions = []
18
+ for model in st:
19
+ for chain in model:
20
+ for residue in chain:
21
+ for atom in residue:
22
+ positions.append(atom.pos.tolist())
23
+
24
+ if not positions:
25
+ raise ValueError("No atoms found in structure")
26
+
27
+ positions = np.array(positions)
28
+ if margin is None:
29
+ margin = resolution * 2.0
30
+
31
+ min_pos = positions.min(axis=0) - margin
32
+ max_pos = positions.max(axis=0) + margin
33
+ size = max_pos - min_pos
34
+
35
+ st_shifted = st.clone()
36
+ cell = gemmi.UnitCell(size[0], size[1], size[2], 90, 90, 90)
37
+ st_shifted.cell = cell
38
+
39
+ for model in st_shifted:
40
+ for chain in model:
41
+ for residue in chain:
42
+ for atom in residue:
43
+ atom.pos.x -= min_pos[0]
44
+ atom.pos.y -= min_pos[1]
45
+ atom.pos.z -= min_pos[2]
46
+
47
+ # Map the atoms to the grid using DensityCalculatorE
48
+ calc = gemmi.DensityCalculatorE()
49
+ calc.d_min = resolution
50
+
51
+ # Calculate sampling rate to match grid_spacing
52
+ # spacing = d_min / (2 * rate) => rate = d_min / (2 * spacing)
53
+ calc.rate = resolution / (2.0 * grid_spacing)
54
+
55
+ # Initialize grid
56
+ calc.set_grid_cell_and_spacegroup(st_shifted)
57
+ calc.initialize_grid()
58
+
59
+ # If use_bfactors is True, we use the atomic B-factors.
60
+ # Gemmi's DensityCalculatorE uses atomic B-factors by default
61
+ # when calling put_model_density_on_grid.
62
+ # However, we can add a constant "base" blur to represent resolution.
63
+ # resolution (d) relates to B-factor roughly by B = 8 * pi^2 * (d/2)^2 = 2 * pi^2 * d^2
64
+ # But gemmi also uses d_min as a cutoff.
65
+
66
+ if not use_bfactors:
67
+ # If not using B-factors, we set them all to 0 and use a global blur
68
+ # equivalent to the target resolution.
69
+ for model in st_shifted:
70
+ for chain in model:
71
+ for residue in chain:
72
+ for atom in residue:
73
+ atom.b_iso = 0.0
74
+ # Set a global blur to match the target resolution
75
+ # A common heuristic is B = 8 * res^2 for synthetic maps
76
+ calc.blur = 8.0 * resolution**2
77
+
78
+ calc.initialize_grid()
79
+ if len(st_shifted) > 0:
80
+ calc.put_model_density_on_grid(st_shifted[0])
81
+
82
+ return calc.grid, min_pos
83
+
84
+ def add_gaussian_noise(data, snr):
85
+ """
86
+ Add Gaussian noise to the data based on desired SNR.
87
+ """
88
+ signal_power = np.mean(data**2)
89
+ noise_power = signal_power / snr
90
+ noise = np.random.normal(0, np.sqrt(noise_power), data.shape)
91
+ return data + noise
92
+
93
+ def apply_ctf(data, voxel_size, defoc=2.0, cs=2.7, voltage=300, amplitude_contrast=0.1, b_factor=0.0):
94
+ """
95
+ Apply a simple Contrast Transfer Function (CTF) to the 3D data.
96
+ defoc: defocus in micrometers
97
+ cs: spherical aberration in mm
98
+ voltage: acceleration voltage in kV
99
+ b_factor: envelope function B-factor
100
+ """
101
+ # Constants
102
+ wl = 12.26 / np.sqrt(voltage * 1000 + 0.9784 * voltage**2) # wavelength in Angstroms
103
+ cs_a = cs * 1e7 # cs in Angstroms
104
+ defoc_a = defoc * 10000 # defocus in Angstroms
105
+
106
+ nz, ny, nx = data.shape
107
+ # Frequencies
108
+ kz = np.fft.fftfreq(nz, d=voxel_size[2])
109
+ ky = np.fft.fftfreq(ny, d=voxel_size[1])
110
+ kx = np.fft.fftfreq(nx, d=voxel_size[0])
111
+
112
+ Kz, Ky, Kx = np.meshgrid(kz, ky, kx, indexing='ij')
113
+ k2 = Kz**2 + Ky**2 + Kx**2
114
+
115
+ # Phase shift
116
+ chi = np.pi * wl * k2 * (defoc_a - 0.5 * wl**2 * k2 * cs_a)
117
+
118
+ # CTF
119
+ ctf = - (np.sqrt(1 - amplitude_contrast**2) * np.sin(chi) + amplitude_contrast * np.cos(chi))
120
+
121
+ # Envelope function
122
+ if b_factor > 0:
123
+ envelope = np.exp(-b_factor * k2 / 4.0)
124
+ ctf *= envelope
125
+
126
+ # Apply in Fourier domain
127
+ data_f = np.fft.fftn(data)
128
+ data_f *= ctf
129
+ return np.real(np.fft.ifftn(data_f))
130
+
131
+ def compute_fsc(data1, data2, voxel_size):
132
+ """
133
+ Compute the Fourier Shell Correlation (FSC) between two 3D maps.
134
+ Returns frequencies and correlation values.
135
+ """
136
+ assert data1.shape == data2.shape
137
+
138
+ # Fourier transforms
139
+ f1 = np.fft.fftn(data1)
140
+ f2 = np.fft.fftn(data2)
141
+
142
+ # Cross-spectral density
143
+ cross = f1 * np.conj(f2)
144
+ p1 = np.real(f1 * np.conj(f1))
145
+ p2 = np.real(f2 * np.conj(f2))
146
+
147
+ # Calculate radial bins
148
+ nz, ny, nx = data1.shape
149
+ kz = np.fft.fftfreq(nz, d=voxel_size[2])
150
+ ky = np.fft.fftfreq(ny, d=voxel_size[1])
151
+ kx = np.fft.fftfreq(nx, d=voxel_size[0])
152
+
153
+ Kz, Ky, Kx = np.meshgrid(kz, ky, kx, indexing='ij')
154
+ k = np.sqrt(Kz**2 + Ky**2 + Kx**2)
155
+
156
+ # Flatten everything
157
+ k = k.ravel()
158
+ cross = cross.ravel()
159
+ p1 = p1.ravel()
160
+ p2 = p2.ravel()
161
+
162
+ # Sort by frequency
163
+ idx = np.argsort(k)
164
+ k_sorted = k[idx]
165
+ cross_sorted = cross[idx]
166
+ p1_sorted = p1[idx]
167
+ p2_sorted = p2[idx]
168
+
169
+ # Binning
170
+ n_bins = min(nx, ny, nz) // 2
171
+ bins = np.linspace(0, k_sorted.max(), n_bins + 1)
172
+
173
+ fsc = []
174
+ freqs = []
175
+
176
+ for i in range(n_bins):
177
+ mask = (k_sorted >= bins[i]) & (k_sorted < bins[i+1])
178
+ if np.any(mask):
179
+ c_bin = cross_sorted[mask]
180
+ p1_bin = p1_sorted[mask]
181
+ p2_bin = p2_sorted[mask]
182
+
183
+ # Sum of cross power and individual powers
184
+ sum_cross = np.sum(c_bin)
185
+ sum_p1 = np.sum(p1_bin)
186
+ sum_p2 = np.sum(p2_bin)
187
+
188
+ # FSC is real part of cross correlation / sqrt(power1 * power2)
189
+ # Standard definition uses the real part of the sum
190
+ num = np.real(sum_cross)
191
+ den = np.sqrt(sum_p1 * sum_p2)
192
+
193
+ if den > 0:
194
+ fsc.append(num / den)
195
+ freqs.append((bins[i] + bins[i+1]) / 2.0)
196
+
197
+ return np.array(freqs), np.array(fsc)
198
+
199
+ def compute_ccc(data1, data2):
200
+ """
201
+ Compute the Cross-Correlation Coefficient (CCC) between two 3D maps.
202
+ """
203
+ assert data1.shape == data2.shape
204
+
205
+ # Flatten and remove mean
206
+ d1 = data1.ravel()
207
+ d2 = data2.ravel()
208
+
209
+ d1 = d1 - np.mean(d1)
210
+ d2 = d2 - np.mean(d2)
211
+
212
+ num = np.sum(d1 * d2)
213
+ den = np.sqrt(np.sum(d1**2) * np.sum(d2**2))
214
+
215
+ if den == 0:
216
+ return 0.0
217
+ return num / den
218
+
219
+ def save_mrc(data, output_path, origin=(0,0,0), spacing=(1,1,1)):
220
+ """
221
+ Save numpy array to MRC file.
222
+ """
223
+ with mrcfile.new(output_path, overwrite=True) as mrc:
224
+ mrc.set_data(data.astype(np.float32))
225
+ mrc.voxel_size = spacing
226
+ # mrcfile uses x, y, z for origin
227
+ mrc.header.origin.x = origin[0]
228
+ mrc.header.origin.y = origin[1]
229
+ mrc.header.origin.z = origin[2]
230
+ mrc.update_header_from_data()
@@ -0,0 +1,50 @@
1
+ import click
2
+ import mrcfile
3
+ import numpy as np
4
+ from .core import compute_fsc, compute_ccc
5
+
6
+ @click.command()
7
+ @click.argument('map1_path', type=click.Path(exists=True))
8
+ @click.argument('map2_path', type=click.Path(exists=True))
9
+ @click.option('--output', '-o', help='Path to save FSC data (CSV)')
10
+ def main(map1_path, map2_path, output):
11
+ """
12
+ Compare two Cryo-EM maps using Fourier Shell Correlation (FSC) and CCC.
13
+ """
14
+ click.echo(f"Comparing {map1_path} and {map2_path}...")
15
+
16
+ with mrcfile.open(map1_path) as m1, mrcfile.open(map2_path) as m2:
17
+ d1 = m1.data
18
+ d2 = m2.data
19
+ v1 = m1.voxel_size
20
+
21
+ if d1.shape != d2.shape:
22
+ click.echo("Error: Maps have different shapes. Resampling not yet supported.", err=True)
23
+ return
24
+
25
+ voxel_size = (v1.x, v1.y, v1.z)
26
+ freqs, fsc = compute_fsc(d1, d2, voxel_size)
27
+ ccc = compute_ccc(d1, d2)
28
+
29
+ click.echo(f"\nOverall Cross-Correlation Coefficient (CCC): {ccc:.4f}\n")
30
+
31
+ # Print some key values
32
+ click.echo(f"{'Resolution (A)':<15} | {'FSC':<10}")
33
+ click.echo("-" * 30)
34
+ for i in range(0, len(freqs), len(freqs)//10):
35
+ res = 1.0 / freqs[i] if freqs[i] > 0 else float('inf')
36
+ click.echo(f"{res:<15.2f} | {fsc[i]:<10.4f}")
37
+
38
+ # Find 0.5 and 0.143 crossings
39
+ for val in [0.5, 0.143]:
40
+ cross_idx = np.where(fsc < val)[0]
41
+ if len(cross_idx) > 0:
42
+ res = 1.0 / freqs[cross_idx[0]]
43
+ click.echo(f"\nFSC={val} crossing at {res:.2f} Angstroms")
44
+
45
+ if output:
46
+ np.savetxt(output, np.column_stack((freqs, fsc)), delimiter=',', header='frequency,fsc')
47
+ click.echo(f"\nFSC data saved to {output}")
48
+
49
+ if __name__ == '__main__':
50
+ main()
@@ -0,0 +1,66 @@
1
+ Metadata-Version: 2.4
2
+ Name: synth-cryo-em
3
+ Version: 0.1.0
4
+ Summary: Synthetic Cryo-EM map generator from PDB models
5
+ Requires-Python: >=3.9
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: numpy
8
+ Requires-Dist: mrcfile
9
+ Requires-Dist: gemmi
10
+ Requires-Dist: scipy
11
+ Requires-Dist: click
12
+ Provides-Extra: test
13
+ Requires-Dist: pytest; extra == "test"
14
+ Provides-Extra: docs
15
+ Requires-Dist: mkdocs; extra == "docs"
16
+ Requires-Dist: mkdocs-material; extra == "docs"
17
+ Requires-Dist: mkdocstrings[python]; extra == "docs"
18
+
19
+ # synth-cryo-em
20
+
21
+ [![Tests](https://github.com/elkins/synth-cryo-em/actions/workflows/test.yml/badge.svg)](https://github.com/elkins/synth-cryo-em/actions/workflows/test.yml)
22
+ [![Documentation](https://img.shields.io/badge/docs-latest-blue)](https://elkins.github.io/synth-cryo-em/)
23
+
24
+ A lightweight Pythonic utility to convert atomic models (PDB/CIF) into synthetic 3D Cryo-EM maps with realistic noise, CTF effects, and varying resolutions.
25
+
26
+ ## 🌟 Features
27
+ - **Voxelize** atomic models with accurate resolution simulation.
28
+ - **Simulate Physics:** Apply Contrast Transfer Functions (CTF) and envelope functions.
29
+ - **Noise Modeling:** Add adjustable Gaussian noise to simulate low-SNR experimental data.
30
+ - **Standard Format:** Export results to MRC files compatible with RELION, ChimeraX, and other tools.
31
+
32
+ ## 🚀 Quick Start
33
+
34
+ ### Installation
35
+ ```bash
36
+ pip install synth-cryo-em
37
+ ```
38
+
39
+ ### Basic Generation
40
+ ```bash
41
+ synth-cryo-em structure.pdb output.mrc --resolution 4.0
42
+ ```
43
+
44
+ ### Realistic Simulation
45
+ ```bash
46
+ synth-cryo-em structure.pdb output.mrc --resolution 3.5 --apply-physics --snr 5
47
+ ```
48
+
49
+ ## 📚 Documentation
50
+ For detailed guides and API reference, visit the [Documentation Site](https://elkins.github.io/synth-cryo-em/).
51
+
52
+ ## 🛠️ Development
53
+ To install for development and documentation:
54
+ ```bash
55
+ pip install -e ".[test,docs]"
56
+ ```
57
+
58
+ Run tests:
59
+ ```bash
60
+ pytest tests/
61
+ ```
62
+
63
+ Build docs locally:
64
+ ```bash
65
+ mkdocs serve
66
+ ```
@@ -0,0 +1,14 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/synth_cryo_em/cli.py
4
+ src/synth_cryo_em/core.py
5
+ src/synth_cryo_em/validate.py
6
+ src/synth_cryo_em.egg-info/PKG-INFO
7
+ src/synth_cryo_em.egg-info/SOURCES.txt
8
+ src/synth_cryo_em.egg-info/dependency_links.txt
9
+ src/synth_cryo_em.egg-info/entry_points.txt
10
+ src/synth_cryo_em.egg-info/requires.txt
11
+ src/synth_cryo_em.egg-info/top_level.txt
12
+ tests/test_core.py
13
+ tests/test_empirical.py
14
+ tests/test_real_data.py
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ synth-cryo-em = synth_cryo_em.cli:main
3
+ synth-cryo-em-validate = synth_cryo_em.validate:main
@@ -0,0 +1,13 @@
1
+ numpy
2
+ mrcfile
3
+ gemmi
4
+ scipy
5
+ click
6
+
7
+ [docs]
8
+ mkdocs
9
+ mkdocs-material
10
+ mkdocstrings[python]
11
+
12
+ [test]
13
+ pytest
@@ -0,0 +1 @@
1
+ synth_cryo_em
@@ -0,0 +1,113 @@
1
+ import unittest
2
+ import os
3
+ import numpy as np
4
+ from synth_cryo_em.core import generate_density_map, apply_ctf, add_gaussian_noise
5
+
6
+ class TestSynthCryoEM(unittest.TestCase):
7
+ def setUp(self):
8
+ self.pdb_content = """ATOM 1 N ALA A 1 11.104 6.132 11.469 1.00 20.00 N
9
+ ATOM 2 CA ALA A 1 12.000 12.000 12.000 1.00 20.00 C
10
+ ATOM 3 C ALA A 1 13.104 18.132 13.469 1.00 20.00 C
11
+ TER
12
+ END
13
+ """
14
+ self.test_pdb = "test_temp.pdb"
15
+ with open(self.test_pdb, "w") as f:
16
+ f.write(self.pdb_content)
17
+
18
+ def tearDown(self):
19
+ if os.path.exists(self.test_pdb):
20
+ os.remove(self.test_pdb)
21
+
22
+ def test_generate_density(self):
23
+ grid, origin = generate_density_map(self.test_pdb, resolution=4.0)
24
+ data = np.array(grid, copy=False)
25
+ self.assertGreater(np.sum(data), 0)
26
+ self.assertEqual(len(data.shape), 3)
27
+
28
+ def test_apply_ctf(self):
29
+ grid, origin = generate_density_map(self.test_pdb, resolution=4.0)
30
+ data = np.array(grid, copy=True)
31
+ uc = grid.unit_cell
32
+ vox_size = (uc.a / grid.nu, uc.b / grid.nv, uc.c / grid.nw)
33
+ data_ctf = apply_ctf(data, vox_size, defoc=1.0)
34
+ self.assertEqual(data_ctf.shape, data.shape)
35
+ # CTF should change the values
36
+ self.assertFalse(np.allclose(data, data_ctf))
37
+
38
+ def test_add_noise(self):
39
+ data = np.ones((10, 10, 10))
40
+ noisy = add_gaussian_noise(data, snr=10)
41
+ self.assertEqual(noisy.shape, data.shape)
42
+ self.assertNotEqual(np.mean(noisy), 1.0)
43
+
44
+ def test_apply_ctf_with_bfactor(self):
45
+ grid, origin = generate_density_map(self.test_pdb, resolution=4.0)
46
+ data = np.array(grid, copy=True)
47
+ uc = grid.unit_cell
48
+ vox_size = (uc.a / grid.nu, uc.b / grid.nv, uc.c / grid.nw)
49
+ data_ctf = apply_ctf(data, vox_size, defoc=1.0, b_factor=100.0)
50
+ self.assertEqual(data_ctf.shape, data.shape)
51
+ self.assertFalse(np.allclose(data, data_ctf))
52
+
53
+ def test_generate_density_no_atoms(self):
54
+ empty_pdb = "empty.pdb"
55
+ with open(empty_pdb, "w") as f:
56
+ f.write("END\n")
57
+ try:
58
+ with self.assertRaises(ValueError):
59
+ generate_density_map(empty_pdb, resolution=4.0)
60
+ finally:
61
+ if os.path.exists(empty_pdb):
62
+ os.remove(empty_pdb)
63
+
64
+ def test_generate_with_bfactors(self):
65
+ grid, origin = generate_density_map(self.test_pdb, resolution=4.0, use_bfactors=True)
66
+ data = np.array(grid, copy=False)
67
+ self.assertGreater(np.sum(data), 0)
68
+
69
+ def test_mmcif_support(self):
70
+ import gemmi
71
+ cif_path = "test_mmcif.cif"
72
+ st = gemmi.Structure()
73
+ model = gemmi.Model("1")
74
+ chain = gemmi.Chain("A")
75
+ res = gemmi.Residue()
76
+ res.name = "ALA"
77
+ res.seqid = gemmi.SeqId(1, ' ')
78
+ atom = gemmi.Atom()
79
+ atom.name = "CA"
80
+ atom.element = gemmi.Element("C")
81
+ atom.pos = gemmi.Position(10, 10, 10)
82
+ res.add_atom(atom)
83
+ chain.add_residue(res)
84
+ model.add_chain(chain)
85
+ st.add_model(model)
86
+ st.make_mmcif_document().write_file(cif_path)
87
+
88
+ try:
89
+ grid, origin = generate_density_map(cif_path, resolution=4.0)
90
+ data = np.array(grid, copy=False)
91
+ self.assertGreater(np.sum(data), 0)
92
+ finally:
93
+ if os.path.exists(cif_path):
94
+ os.remove(cif_path)
95
+
96
+ def test_save_mrc(self):
97
+ from synth_cryo_em.core import save_mrc
98
+ import mrcfile
99
+ data = np.zeros((10, 10, 10), dtype=np.float32)
100
+ test_mrc = "test_output.mrc"
101
+ try:
102
+ save_mrc(data, test_mrc, origin=(1, 2, 3), spacing=(1.1, 1.1, 1.1))
103
+ self.assertTrue(os.path.exists(test_mrc))
104
+ with mrcfile.open(test_mrc) as mrc:
105
+ self.assertEqual(mrc.data.shape, (10, 10, 10))
106
+ self.assertAlmostEqual(mrc.voxel_size.x, 1.1, places=5)
107
+ self.assertAlmostEqual(mrc.header.origin.x, 1.0)
108
+ finally:
109
+ if os.path.exists(test_mrc):
110
+ os.remove(test_mrc)
111
+
112
+ if __name__ == '__main__':
113
+ unittest.main()
@@ -0,0 +1,96 @@
1
+ import unittest
2
+ import os
3
+ import numpy as np
4
+ import mrcfile
5
+ import gemmi
6
+ import urllib.request
7
+ from synth_cryo_em.core import generate_density_map, compute_ccc, compute_fsc
8
+
9
+ class TestEmpiricalValidation(unittest.TestCase):
10
+ """
11
+ Functional tests comparing synthetic results with empirical expectations.
12
+ Uses small real-world structures to validate correlation.
13
+ """
14
+
15
+ @classmethod
16
+ def setUpClass(cls):
17
+ # We use a very small, well-known protein: Crambin (PDB: 1CRN)
18
+ # It's small (46 residues) and stable.
19
+ cls.pdb_id = "1crn"
20
+ cls.pdb_path = f"{cls.pdb_id}.pdb"
21
+
22
+ if not os.path.exists(cls.pdb_path):
23
+ url = f"https://files.rcsb.org/download/{cls.pdb_id}.pdb"
24
+ try:
25
+ urllib.request.urlretrieve(url, cls.pdb_path)
26
+ except Exception as e:
27
+ print(f"Skipping empirical test: Could not download {url}. Error: {e}")
28
+ cls.pdb_path = None
29
+
30
+ @classmethod
31
+ def tearDownClass(cls):
32
+ if cls.pdb_path and os.path.exists(cls.pdb_path):
33
+ os.remove(cls.pdb_path)
34
+
35
+ def test_crambin_reconstruction_consistency(self):
36
+ """
37
+ Validate that generating a map at a specific resolution and
38
+ re-evaluating it against the same model yields high correlation.
39
+ This serves as a baseline 'internal' empirical check.
40
+ """
41
+ if not self.pdb_path:
42
+ self.skipTest("PDB file not available")
43
+
44
+ res = 3.0
45
+ # Use fixed parameters for consistency
46
+ spacing = 1.0
47
+ margin = 10.0
48
+ grid, _ = generate_density_map(self.pdb_path, resolution=res, grid_spacing=spacing, margin=margin)
49
+ data = np.array(grid, copy=True)
50
+
51
+ # Now generate a 'reference' map using the same parameters
52
+ grid_ref, _ = generate_density_map(self.pdb_path, resolution=res, grid_spacing=spacing, margin=margin)
53
+ data_ref = np.array(grid_ref, copy=True)
54
+
55
+ ccc = compute_ccc(data, data_ref)
56
+
57
+ # Identical parameters should yield identical results
58
+ self.assertAlmostEqual(ccc, 1.0, places=5)
59
+
60
+ def test_resolution_cutoffs(self):
61
+ """
62
+ Validate that the FSC correctly reflects the simulated resolution.
63
+ If we simulate at 6A, the FSC against a higher resolution (3A) version
64
+ should show a significant drop.
65
+ """
66
+ if not self.pdb_path:
67
+ self.skipTest("PDB file not available")
68
+
69
+ res_low = 8.0
70
+ res_high = 2.0
71
+ # Use a fixed grid spacing and margin for both to ensure same shape
72
+ spacing = 1.0
73
+ margin = 15.0
74
+
75
+ grid_low, _ = generate_density_map(self.pdb_path, resolution=res_low, grid_spacing=spacing, margin=margin)
76
+ grid_high, _ = generate_density_map(self.pdb_path, resolution=res_high, grid_spacing=spacing, margin=margin)
77
+
78
+ # Ensure grids are the same size for comparison
79
+ data_low = np.array(grid_low, copy=True)
80
+ data_high = np.array(grid_high, copy=True)
81
+
82
+ uc = grid_low.unit_cell
83
+ voxel_size = (uc.a / grid_low.nu, uc.b / grid_low.nv, uc.c / grid_low.nw)
84
+
85
+ freqs, fsc = compute_fsc(data_low, data_high, voxel_size)
86
+
87
+ # The FSC should be lower at higher frequencies
88
+ # Check that FSC at high frequency (near Nyquist) is much lower than at low frequency
89
+ self.assertGreater(fsc[1], fsc[-1], "FSC should decrease with frequency")
90
+
91
+ # Check for a drop below 0.5 at some point
92
+ low_fsc_indices = np.where(fsc < 0.5)[0]
93
+ self.assertGreater(len(low_fsc_indices), 0, "FSC should drop below 0.5 for different resolutions")
94
+
95
+ if __name__ == '__main__':
96
+ unittest.main()
@@ -0,0 +1,90 @@
1
+ import unittest
2
+ import os
3
+ import numpy as np
4
+ import mrcfile
5
+ import gemmi
6
+ import urllib.request
7
+ import gzip
8
+ import shutil
9
+ from synth_cryo_em.core import generate_density_map, compute_ccc, compute_fsc
10
+
11
+ class TestRealDataValidation(unittest.TestCase):
12
+ """
13
+ Validation against real empirical data from PDB and EMDB.
14
+ """
15
+
16
+ @classmethod
17
+ def setUpClass(cls):
18
+ cls.pdb_id = "6k7v"
19
+ cls.emdb_id = "9943"
20
+ cls.pdb_path = f"{cls.pdb_id}.pdb"
21
+ cls.mrc_gz_path = f"emd_{cls.emdb_id}.map.gz"
22
+ cls.mrc_path = f"emd_{cls.emdb_id}.map"
23
+
24
+ # Download PDB
25
+ if not os.path.exists(cls.pdb_path):
26
+ url = f"https://files.rcsb.org/download/{cls.pdb_id}.pdb"
27
+ try:
28
+ urllib.request.urlretrieve(url, cls.pdb_path)
29
+ except Exception as e:
30
+ print(f"Failed to download PDB: {e}")
31
+ cls.pdb_path = None
32
+
33
+ # Download EMDB map
34
+ if not os.path.exists(cls.mrc_path):
35
+ url = f"https://ftp.ebi.ac.uk/pub/databases/emdb/structures/EMD-{cls.emdb_id}/map/emd_{cls.emdb_id}.map.gz"
36
+ try:
37
+ urllib.request.urlretrieve(url, cls.mrc_gz_path)
38
+ with gzip.open(cls.mrc_gz_path, 'rb') as f_in:
39
+ with open(cls.mrc_path, 'wb') as f_out:
40
+ shutil.copyfileobj(f_in, f_out)
41
+ except Exception as e:
42
+ print(f"Failed to download/extract EMDB map: {e}")
43
+ cls.mrc_path = None
44
+
45
+ @classmethod
46
+ def tearDownClass(cls):
47
+ for p in [cls.pdb_path, cls.mrc_gz_path, cls.mrc_path]:
48
+ if p and os.path.exists(p):
49
+ # We might want to keep them for faster debugging,
50
+ # but for CI/tests we should clean up.
51
+ os.remove(p)
52
+
53
+ def test_emd_9943_correlation(self):
54
+ """
55
+ Compare synthetic map from 6K7V with experimental map EMD-9943.
56
+ """
57
+ if not self.pdb_path or not self.mrc_path:
58
+ self.skipTest("Data not available")
59
+
60
+ # Load experimental map to get its parameters
61
+ with mrcfile.open(self.mrc_path) as mrc:
62
+ exp_data = mrc.data.copy()
63
+ vox_size = mrc.voxel_size
64
+ origin = (mrc.header.origin.x, mrc.header.origin.y, mrc.header.origin.z)
65
+ spacing = vox_size.x # Assuming cubic voxels
66
+
67
+ # The experimental map has a specific resolution (3.7 A)
68
+ res = 3.7
69
+
70
+ # Generate synthetic map matching experimental grid as much as possible
71
+ # Note: we need to handle the origin and grid size to match exactly.
72
+ # For now, let's just check if we can get a decent correlation.
73
+
74
+ # We'll use our generator with the same spacing
75
+ grid, gen_origin = generate_density_map(self.pdb_path, resolution=res, grid_spacing=spacing)
76
+ gen_data = np.array(grid, copy=True)
77
+
78
+ # Since shapes might differ (generator adds margins), we'll just check
79
+ # if the code runs and reports a non-zero correlation.
80
+ # A full structural alignment would be needed for a perfect CCC.
81
+
82
+ self.assertGreater(np.sum(gen_data), 0)
83
+ self.assertEqual(len(gen_data.shape), 3)
84
+
85
+ # In a real functional test, we would resample/align them.
86
+ # Here we at least validate the tool can process real PDBs.
87
+ print(f"Generated map shape: {gen_data.shape}, Experimental: {exp_data.shape}")
88
+
89
+ if __name__ == '__main__':
90
+ unittest.main()