synth-cryo-em 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_cryo_em-0.1.0/PKG-INFO +66 -0
- synth_cryo_em-0.1.0/README.md +48 -0
- synth_cryo_em-0.1.0/pyproject.toml +28 -0
- synth_cryo_em-0.1.0/setup.cfg +4 -0
- synth_cryo_em-0.1.0/src/synth_cryo_em/cli.py +43 -0
- synth_cryo_em-0.1.0/src/synth_cryo_em/core.py +230 -0
- synth_cryo_em-0.1.0/src/synth_cryo_em/validate.py +50 -0
- synth_cryo_em-0.1.0/src/synth_cryo_em.egg-info/PKG-INFO +66 -0
- synth_cryo_em-0.1.0/src/synth_cryo_em.egg-info/SOURCES.txt +14 -0
- synth_cryo_em-0.1.0/src/synth_cryo_em.egg-info/dependency_links.txt +1 -0
- synth_cryo_em-0.1.0/src/synth_cryo_em.egg-info/entry_points.txt +3 -0
- synth_cryo_em-0.1.0/src/synth_cryo_em.egg-info/requires.txt +13 -0
- synth_cryo_em-0.1.0/src/synth_cryo_em.egg-info/top_level.txt +1 -0
- synth_cryo_em-0.1.0/tests/test_core.py +113 -0
- synth_cryo_em-0.1.0/tests/test_empirical.py +96 -0
- synth_cryo_em-0.1.0/tests/test_real_data.py +90 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: synth-cryo-em
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Synthetic Cryo-EM map generator from PDB models
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: numpy
|
|
8
|
+
Requires-Dist: mrcfile
|
|
9
|
+
Requires-Dist: gemmi
|
|
10
|
+
Requires-Dist: scipy
|
|
11
|
+
Requires-Dist: click
|
|
12
|
+
Provides-Extra: test
|
|
13
|
+
Requires-Dist: pytest; extra == "test"
|
|
14
|
+
Provides-Extra: docs
|
|
15
|
+
Requires-Dist: mkdocs; extra == "docs"
|
|
16
|
+
Requires-Dist: mkdocs-material; extra == "docs"
|
|
17
|
+
Requires-Dist: mkdocstrings[python]; extra == "docs"
|
|
18
|
+
|
|
19
|
+
# synth-cryo-em
|
|
20
|
+
|
|
21
|
+
[](https://github.com/elkins/synth-cryo-em/actions/workflows/test.yml)
|
|
22
|
+
[](https://elkins.github.io/synth-cryo-em/)
|
|
23
|
+
|
|
24
|
+
A lightweight Pythonic utility to convert atomic models (PDB/CIF) into synthetic 3D Cryo-EM maps with realistic noise, CTF effects, and varying resolutions.
|
|
25
|
+
|
|
26
|
+
## 🌟 Features
|
|
27
|
+
- **Voxelize** atomic models with accurate resolution simulation.
|
|
28
|
+
- **Simulate Physics:** Apply Contrast Transfer Functions (CTF) and envelope functions.
|
|
29
|
+
- **Noise Modeling:** Add adjustable Gaussian noise to simulate low-SNR experimental data.
|
|
30
|
+
- **Standard Format:** Export results to MRC files compatible with RELION, ChimeraX, and other tools.
|
|
31
|
+
|
|
32
|
+
## 🚀 Quick Start
|
|
33
|
+
|
|
34
|
+
### Installation
|
|
35
|
+
```bash
|
|
36
|
+
pip install synth-cryo-em
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Basic Generation
|
|
40
|
+
```bash
|
|
41
|
+
synth-cryo-em structure.pdb output.mrc --resolution 4.0
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Realistic Simulation
|
|
45
|
+
```bash
|
|
46
|
+
synth-cryo-em structure.pdb output.mrc --resolution 3.5 --apply-physics --snr 5
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## 📚 Documentation
|
|
50
|
+
For detailed guides and API reference, visit the [Documentation Site](https://elkins.github.io/synth-cryo-em/).
|
|
51
|
+
|
|
52
|
+
## 🛠️ Development
|
|
53
|
+
To install for development and documentation:
|
|
54
|
+
```bash
|
|
55
|
+
pip install -e ".[test,docs]"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Run tests:
|
|
59
|
+
```bash
|
|
60
|
+
pytest tests/
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Build docs locally:
|
|
64
|
+
```bash
|
|
65
|
+
mkdocs serve
|
|
66
|
+
```
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# synth-cryo-em
|
|
2
|
+
|
|
3
|
+
[](https://github.com/elkins/synth-cryo-em/actions/workflows/test.yml)
|
|
4
|
+
[](https://elkins.github.io/synth-cryo-em/)
|
|
5
|
+
|
|
6
|
+
A lightweight Pythonic utility to convert atomic models (PDB/CIF) into synthetic 3D Cryo-EM maps with realistic noise, CTF effects, and varying resolutions.
|
|
7
|
+
|
|
8
|
+
## 🌟 Features
|
|
9
|
+
- **Voxelize** atomic models with accurate resolution simulation.
|
|
10
|
+
- **Simulate Physics:** Apply Contrast Transfer Functions (CTF) and envelope functions.
|
|
11
|
+
- **Noise Modeling:** Add adjustable Gaussian noise to simulate low-SNR experimental data.
|
|
12
|
+
- **Standard Format:** Export results to MRC files compatible with RELION, ChimeraX, and other tools.
|
|
13
|
+
|
|
14
|
+
## 🚀 Quick Start
|
|
15
|
+
|
|
16
|
+
### Installation
|
|
17
|
+
```bash
|
|
18
|
+
pip install synth-cryo-em
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Basic Generation
|
|
22
|
+
```bash
|
|
23
|
+
synth-cryo-em structure.pdb output.mrc --resolution 4.0
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Realistic Simulation
|
|
27
|
+
```bash
|
|
28
|
+
synth-cryo-em structure.pdb output.mrc --resolution 3.5 --apply-physics --snr 5
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## 📚 Documentation
|
|
32
|
+
For detailed guides and API reference, visit the [Documentation Site](https://elkins.github.io/synth-cryo-em/).
|
|
33
|
+
|
|
34
|
+
## 🛠️ Development
|
|
35
|
+
To install for development and documentation:
|
|
36
|
+
```bash
|
|
37
|
+
pip install -e ".[test,docs]"
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Run tests:
|
|
41
|
+
```bash
|
|
42
|
+
pytest tests/
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Build docs locally:
|
|
46
|
+
```bash
|
|
47
|
+
mkdocs serve
|
|
48
|
+
```
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "synth-cryo-em"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Synthetic Cryo-EM map generator from PDB models"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"numpy",
|
|
13
|
+
"mrcfile",
|
|
14
|
+
"gemmi",
|
|
15
|
+
"scipy",
|
|
16
|
+
"click",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[project.optional-dependencies]
|
|
20
|
+
test = ["pytest"]
|
|
21
|
+
docs = ["mkdocs", "mkdocs-material", "mkdocstrings[python]"]
|
|
22
|
+
|
|
23
|
+
[project.scripts]
|
|
24
|
+
synth-cryo-em = "synth_cryo_em.cli:main"
|
|
25
|
+
synth-cryo-em-validate = "synth_cryo_em.validate:main"
|
|
26
|
+
|
|
27
|
+
[tool.setuptools.packages.find]
|
|
28
|
+
where = ["src"]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from .core import generate_density_map, add_gaussian_noise, save_mrc, apply_ctf
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
@click.command()
|
|
6
|
+
@click.argument('input_path', type=click.Path(exists=True))
|
|
7
|
+
@click.argument('output_path', type=click.Path())
|
|
8
|
+
@click.option('--resolution', '-r', default=4.0, help='Resolution in Angstroms')
|
|
9
|
+
@click.option('--spacing', '-s', default=None, type=float, help='Grid spacing in Angstroms')
|
|
10
|
+
@click.option('--snr', default=None, type=float, help='Signal-to-noise ratio')
|
|
11
|
+
@click.option('--defocus', default=2.0, help='Defocus in micrometers')
|
|
12
|
+
@click.option('--voltage', default=300.0, help='Acceleration voltage in kV')
|
|
13
|
+
@click.option('--cs', default=2.7, help='Spherical aberration in mm')
|
|
14
|
+
@click.option('--bfactor', default=0.0, help='Envelope B-factor')
|
|
15
|
+
@click.option('--bfactors/--no-bfactors', default=False, help='Use atomic B-factors for local resolution')
|
|
16
|
+
@click.option('--apply-physics/--no-physics', default=False, help='Apply CTF effects')
|
|
17
|
+
def main(input_path, output_path, resolution, spacing, snr, defocus, voltage, cs, bfactor, bfactors, apply_physics):
|
|
18
|
+
"""
|
|
19
|
+
Generate a synthetic Cryo-EM map from an atomic model (PDB, mmCIF, or BCIF).
|
|
20
|
+
"""
|
|
21
|
+
click.echo(f"Generating map for {input_path} at {resolution}A resolution...")
|
|
22
|
+
|
|
23
|
+
grid, origin = generate_density_map(input_path, resolution, grid_spacing=spacing, use_bfactors=bfactors)
|
|
24
|
+
|
|
25
|
+
data = np.array(grid, copy=True)
|
|
26
|
+
|
|
27
|
+
# Voxel size is from the unit cell
|
|
28
|
+
uc = grid.unit_cell
|
|
29
|
+
vox_size = (uc.a / grid.nu, uc.b / grid.nv, uc.c / grid.nw)
|
|
30
|
+
|
|
31
|
+
if apply_physics:
|
|
32
|
+
click.echo(f"Applying CTF (defocus={defocus}um, voltage={voltage}kV, B-factor={bfactor})...")
|
|
33
|
+
data = apply_ctf(data, vox_size, defoc=defocus, cs=cs, voltage=voltage, b_factor=bfactor)
|
|
34
|
+
|
|
35
|
+
if snr is not None:
|
|
36
|
+
click.echo(f"Adding Gaussian noise (SNR={snr})...")
|
|
37
|
+
data = add_gaussian_noise(data, snr)
|
|
38
|
+
|
|
39
|
+
save_mrc(data, output_path, origin=origin, spacing=vox_size)
|
|
40
|
+
click.echo(f"Saved synthetic map to {output_path}")
|
|
41
|
+
|
|
42
|
+
if __name__ == '__main__':
|
|
43
|
+
main()
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import gemmi
|
|
2
|
+
import numpy as np
|
|
3
|
+
import mrcfile
|
|
4
|
+
from scipy.ndimage import gaussian_filter
|
|
5
|
+
|
|
6
|
+
def generate_density_map(input_path, resolution, grid_spacing=None, use_bfactors=False, margin=None):
|
|
7
|
+
"""
|
|
8
|
+
Generate a density map from an atomic model file (PDB, mmCIF, BCIF) using gemmi.
|
|
9
|
+
If use_bfactors is True, use atomic B-factors for local resolution.
|
|
10
|
+
"""
|
|
11
|
+
st = gemmi.read_structure(input_path)
|
|
12
|
+
# If grid_spacing is not provided, use a rule of thumb (resolution / 3 or 4)
|
|
13
|
+
if grid_spacing is None:
|
|
14
|
+
grid_spacing = resolution / 3.0
|
|
15
|
+
|
|
16
|
+
# Get all atomic positions
|
|
17
|
+
positions = []
|
|
18
|
+
for model in st:
|
|
19
|
+
for chain in model:
|
|
20
|
+
for residue in chain:
|
|
21
|
+
for atom in residue:
|
|
22
|
+
positions.append(atom.pos.tolist())
|
|
23
|
+
|
|
24
|
+
if not positions:
|
|
25
|
+
raise ValueError("No atoms found in structure")
|
|
26
|
+
|
|
27
|
+
positions = np.array(positions)
|
|
28
|
+
if margin is None:
|
|
29
|
+
margin = resolution * 2.0
|
|
30
|
+
|
|
31
|
+
min_pos = positions.min(axis=0) - margin
|
|
32
|
+
max_pos = positions.max(axis=0) + margin
|
|
33
|
+
size = max_pos - min_pos
|
|
34
|
+
|
|
35
|
+
st_shifted = st.clone()
|
|
36
|
+
cell = gemmi.UnitCell(size[0], size[1], size[2], 90, 90, 90)
|
|
37
|
+
st_shifted.cell = cell
|
|
38
|
+
|
|
39
|
+
for model in st_shifted:
|
|
40
|
+
for chain in model:
|
|
41
|
+
for residue in chain:
|
|
42
|
+
for atom in residue:
|
|
43
|
+
atom.pos.x -= min_pos[0]
|
|
44
|
+
atom.pos.y -= min_pos[1]
|
|
45
|
+
atom.pos.z -= min_pos[2]
|
|
46
|
+
|
|
47
|
+
# Map the atoms to the grid using DensityCalculatorE
|
|
48
|
+
calc = gemmi.DensityCalculatorE()
|
|
49
|
+
calc.d_min = resolution
|
|
50
|
+
|
|
51
|
+
# Calculate sampling rate to match grid_spacing
|
|
52
|
+
# spacing = d_min / (2 * rate) => rate = d_min / (2 * spacing)
|
|
53
|
+
calc.rate = resolution / (2.0 * grid_spacing)
|
|
54
|
+
|
|
55
|
+
# Initialize grid
|
|
56
|
+
calc.set_grid_cell_and_spacegroup(st_shifted)
|
|
57
|
+
calc.initialize_grid()
|
|
58
|
+
|
|
59
|
+
# If use_bfactors is True, we use the atomic B-factors.
|
|
60
|
+
# Gemmi's DensityCalculatorE uses atomic B-factors by default
|
|
61
|
+
# when calling put_model_density_on_grid.
|
|
62
|
+
# However, we can add a constant "base" blur to represent resolution.
|
|
63
|
+
# resolution (d) relates to B-factor roughly by B = 8 * pi^2 * (d/2)^2 = 2 * pi^2 * d^2
|
|
64
|
+
# But gemmi also uses d_min as a cutoff.
|
|
65
|
+
|
|
66
|
+
if not use_bfactors:
|
|
67
|
+
# If not using B-factors, we set them all to 0 and use a global blur
|
|
68
|
+
# equivalent to the target resolution.
|
|
69
|
+
for model in st_shifted:
|
|
70
|
+
for chain in model:
|
|
71
|
+
for residue in chain:
|
|
72
|
+
for atom in residue:
|
|
73
|
+
atom.b_iso = 0.0
|
|
74
|
+
# Set a global blur to match the target resolution
|
|
75
|
+
# A common heuristic is B = 8 * res^2 for synthetic maps
|
|
76
|
+
calc.blur = 8.0 * resolution**2
|
|
77
|
+
|
|
78
|
+
calc.initialize_grid()
|
|
79
|
+
if len(st_shifted) > 0:
|
|
80
|
+
calc.put_model_density_on_grid(st_shifted[0])
|
|
81
|
+
|
|
82
|
+
return calc.grid, min_pos
|
|
83
|
+
|
|
84
|
+
def add_gaussian_noise(data, snr):
|
|
85
|
+
"""
|
|
86
|
+
Add Gaussian noise to the data based on desired SNR.
|
|
87
|
+
"""
|
|
88
|
+
signal_power = np.mean(data**2)
|
|
89
|
+
noise_power = signal_power / snr
|
|
90
|
+
noise = np.random.normal(0, np.sqrt(noise_power), data.shape)
|
|
91
|
+
return data + noise
|
|
92
|
+
|
|
93
|
+
def apply_ctf(data, voxel_size, defoc=2.0, cs=2.7, voltage=300, amplitude_contrast=0.1, b_factor=0.0):
|
|
94
|
+
"""
|
|
95
|
+
Apply a simple Contrast Transfer Function (CTF) to the 3D data.
|
|
96
|
+
defoc: defocus in micrometers
|
|
97
|
+
cs: spherical aberration in mm
|
|
98
|
+
voltage: acceleration voltage in kV
|
|
99
|
+
b_factor: envelope function B-factor
|
|
100
|
+
"""
|
|
101
|
+
# Constants
|
|
102
|
+
wl = 12.26 / np.sqrt(voltage * 1000 + 0.9784 * voltage**2) # wavelength in Angstroms
|
|
103
|
+
cs_a = cs * 1e7 # cs in Angstroms
|
|
104
|
+
defoc_a = defoc * 10000 # defocus in Angstroms
|
|
105
|
+
|
|
106
|
+
nz, ny, nx = data.shape
|
|
107
|
+
# Frequencies
|
|
108
|
+
kz = np.fft.fftfreq(nz, d=voxel_size[2])
|
|
109
|
+
ky = np.fft.fftfreq(ny, d=voxel_size[1])
|
|
110
|
+
kx = np.fft.fftfreq(nx, d=voxel_size[0])
|
|
111
|
+
|
|
112
|
+
Kz, Ky, Kx = np.meshgrid(kz, ky, kx, indexing='ij')
|
|
113
|
+
k2 = Kz**2 + Ky**2 + Kx**2
|
|
114
|
+
|
|
115
|
+
# Phase shift
|
|
116
|
+
chi = np.pi * wl * k2 * (defoc_a - 0.5 * wl**2 * k2 * cs_a)
|
|
117
|
+
|
|
118
|
+
# CTF
|
|
119
|
+
ctf = - (np.sqrt(1 - amplitude_contrast**2) * np.sin(chi) + amplitude_contrast * np.cos(chi))
|
|
120
|
+
|
|
121
|
+
# Envelope function
|
|
122
|
+
if b_factor > 0:
|
|
123
|
+
envelope = np.exp(-b_factor * k2 / 4.0)
|
|
124
|
+
ctf *= envelope
|
|
125
|
+
|
|
126
|
+
# Apply in Fourier domain
|
|
127
|
+
data_f = np.fft.fftn(data)
|
|
128
|
+
data_f *= ctf
|
|
129
|
+
return np.real(np.fft.ifftn(data_f))
|
|
130
|
+
|
|
131
|
+
def compute_fsc(data1, data2, voxel_size):
|
|
132
|
+
"""
|
|
133
|
+
Compute the Fourier Shell Correlation (FSC) between two 3D maps.
|
|
134
|
+
Returns frequencies and correlation values.
|
|
135
|
+
"""
|
|
136
|
+
assert data1.shape == data2.shape
|
|
137
|
+
|
|
138
|
+
# Fourier transforms
|
|
139
|
+
f1 = np.fft.fftn(data1)
|
|
140
|
+
f2 = np.fft.fftn(data2)
|
|
141
|
+
|
|
142
|
+
# Cross-spectral density
|
|
143
|
+
cross = f1 * np.conj(f2)
|
|
144
|
+
p1 = np.real(f1 * np.conj(f1))
|
|
145
|
+
p2 = np.real(f2 * np.conj(f2))
|
|
146
|
+
|
|
147
|
+
# Calculate radial bins
|
|
148
|
+
nz, ny, nx = data1.shape
|
|
149
|
+
kz = np.fft.fftfreq(nz, d=voxel_size[2])
|
|
150
|
+
ky = np.fft.fftfreq(ny, d=voxel_size[1])
|
|
151
|
+
kx = np.fft.fftfreq(nx, d=voxel_size[0])
|
|
152
|
+
|
|
153
|
+
Kz, Ky, Kx = np.meshgrid(kz, ky, kx, indexing='ij')
|
|
154
|
+
k = np.sqrt(Kz**2 + Ky**2 + Kx**2)
|
|
155
|
+
|
|
156
|
+
# Flatten everything
|
|
157
|
+
k = k.ravel()
|
|
158
|
+
cross = cross.ravel()
|
|
159
|
+
p1 = p1.ravel()
|
|
160
|
+
p2 = p2.ravel()
|
|
161
|
+
|
|
162
|
+
# Sort by frequency
|
|
163
|
+
idx = np.argsort(k)
|
|
164
|
+
k_sorted = k[idx]
|
|
165
|
+
cross_sorted = cross[idx]
|
|
166
|
+
p1_sorted = p1[idx]
|
|
167
|
+
p2_sorted = p2[idx]
|
|
168
|
+
|
|
169
|
+
# Binning
|
|
170
|
+
n_bins = min(nx, ny, nz) // 2
|
|
171
|
+
bins = np.linspace(0, k_sorted.max(), n_bins + 1)
|
|
172
|
+
|
|
173
|
+
fsc = []
|
|
174
|
+
freqs = []
|
|
175
|
+
|
|
176
|
+
for i in range(n_bins):
|
|
177
|
+
mask = (k_sorted >= bins[i]) & (k_sorted < bins[i+1])
|
|
178
|
+
if np.any(mask):
|
|
179
|
+
c_bin = cross_sorted[mask]
|
|
180
|
+
p1_bin = p1_sorted[mask]
|
|
181
|
+
p2_bin = p2_sorted[mask]
|
|
182
|
+
|
|
183
|
+
# Sum of cross power and individual powers
|
|
184
|
+
sum_cross = np.sum(c_bin)
|
|
185
|
+
sum_p1 = np.sum(p1_bin)
|
|
186
|
+
sum_p2 = np.sum(p2_bin)
|
|
187
|
+
|
|
188
|
+
# FSC is real part of cross correlation / sqrt(power1 * power2)
|
|
189
|
+
# Standard definition uses the real part of the sum
|
|
190
|
+
num = np.real(sum_cross)
|
|
191
|
+
den = np.sqrt(sum_p1 * sum_p2)
|
|
192
|
+
|
|
193
|
+
if den > 0:
|
|
194
|
+
fsc.append(num / den)
|
|
195
|
+
freqs.append((bins[i] + bins[i+1]) / 2.0)
|
|
196
|
+
|
|
197
|
+
return np.array(freqs), np.array(fsc)
|
|
198
|
+
|
|
199
|
+
def compute_ccc(data1, data2):
|
|
200
|
+
"""
|
|
201
|
+
Compute the Cross-Correlation Coefficient (CCC) between two 3D maps.
|
|
202
|
+
"""
|
|
203
|
+
assert data1.shape == data2.shape
|
|
204
|
+
|
|
205
|
+
# Flatten and remove mean
|
|
206
|
+
d1 = data1.ravel()
|
|
207
|
+
d2 = data2.ravel()
|
|
208
|
+
|
|
209
|
+
d1 = d1 - np.mean(d1)
|
|
210
|
+
d2 = d2 - np.mean(d2)
|
|
211
|
+
|
|
212
|
+
num = np.sum(d1 * d2)
|
|
213
|
+
den = np.sqrt(np.sum(d1**2) * np.sum(d2**2))
|
|
214
|
+
|
|
215
|
+
if den == 0:
|
|
216
|
+
return 0.0
|
|
217
|
+
return num / den
|
|
218
|
+
|
|
219
|
+
def save_mrc(data, output_path, origin=(0,0,0), spacing=(1,1,1)):
|
|
220
|
+
"""
|
|
221
|
+
Save numpy array to MRC file.
|
|
222
|
+
"""
|
|
223
|
+
with mrcfile.new(output_path, overwrite=True) as mrc:
|
|
224
|
+
mrc.set_data(data.astype(np.float32))
|
|
225
|
+
mrc.voxel_size = spacing
|
|
226
|
+
# mrcfile uses x, y, z for origin
|
|
227
|
+
mrc.header.origin.x = origin[0]
|
|
228
|
+
mrc.header.origin.y = origin[1]
|
|
229
|
+
mrc.header.origin.z = origin[2]
|
|
230
|
+
mrc.update_header_from_data()
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import click
|
|
2
|
+
import mrcfile
|
|
3
|
+
import numpy as np
|
|
4
|
+
from .core import compute_fsc, compute_ccc
|
|
5
|
+
|
|
6
|
+
@click.command()
|
|
7
|
+
@click.argument('map1_path', type=click.Path(exists=True))
|
|
8
|
+
@click.argument('map2_path', type=click.Path(exists=True))
|
|
9
|
+
@click.option('--output', '-o', help='Path to save FSC data (CSV)')
|
|
10
|
+
def main(map1_path, map2_path, output):
|
|
11
|
+
"""
|
|
12
|
+
Compare two Cryo-EM maps using Fourier Shell Correlation (FSC) and CCC.
|
|
13
|
+
"""
|
|
14
|
+
click.echo(f"Comparing {map1_path} and {map2_path}...")
|
|
15
|
+
|
|
16
|
+
with mrcfile.open(map1_path) as m1, mrcfile.open(map2_path) as m2:
|
|
17
|
+
d1 = m1.data
|
|
18
|
+
d2 = m2.data
|
|
19
|
+
v1 = m1.voxel_size
|
|
20
|
+
|
|
21
|
+
if d1.shape != d2.shape:
|
|
22
|
+
click.echo("Error: Maps have different shapes. Resampling not yet supported.", err=True)
|
|
23
|
+
return
|
|
24
|
+
|
|
25
|
+
voxel_size = (v1.x, v1.y, v1.z)
|
|
26
|
+
freqs, fsc = compute_fsc(d1, d2, voxel_size)
|
|
27
|
+
ccc = compute_ccc(d1, d2)
|
|
28
|
+
|
|
29
|
+
click.echo(f"\nOverall Cross-Correlation Coefficient (CCC): {ccc:.4f}\n")
|
|
30
|
+
|
|
31
|
+
# Print some key values
|
|
32
|
+
click.echo(f"{'Resolution (A)':<15} | {'FSC':<10}")
|
|
33
|
+
click.echo("-" * 30)
|
|
34
|
+
for i in range(0, len(freqs), len(freqs)//10):
|
|
35
|
+
res = 1.0 / freqs[i] if freqs[i] > 0 else float('inf')
|
|
36
|
+
click.echo(f"{res:<15.2f} | {fsc[i]:<10.4f}")
|
|
37
|
+
|
|
38
|
+
# Find 0.5 and 0.143 crossings
|
|
39
|
+
for val in [0.5, 0.143]:
|
|
40
|
+
cross_idx = np.where(fsc < val)[0]
|
|
41
|
+
if len(cross_idx) > 0:
|
|
42
|
+
res = 1.0 / freqs[cross_idx[0]]
|
|
43
|
+
click.echo(f"\nFSC={val} crossing at {res:.2f} Angstroms")
|
|
44
|
+
|
|
45
|
+
if output:
|
|
46
|
+
np.savetxt(output, np.column_stack((freqs, fsc)), delimiter=',', header='frequency,fsc')
|
|
47
|
+
click.echo(f"\nFSC data saved to {output}")
|
|
48
|
+
|
|
49
|
+
if __name__ == '__main__':
|
|
50
|
+
main()
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: synth-cryo-em
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Synthetic Cryo-EM map generator from PDB models
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: numpy
|
|
8
|
+
Requires-Dist: mrcfile
|
|
9
|
+
Requires-Dist: gemmi
|
|
10
|
+
Requires-Dist: scipy
|
|
11
|
+
Requires-Dist: click
|
|
12
|
+
Provides-Extra: test
|
|
13
|
+
Requires-Dist: pytest; extra == "test"
|
|
14
|
+
Provides-Extra: docs
|
|
15
|
+
Requires-Dist: mkdocs; extra == "docs"
|
|
16
|
+
Requires-Dist: mkdocs-material; extra == "docs"
|
|
17
|
+
Requires-Dist: mkdocstrings[python]; extra == "docs"
|
|
18
|
+
|
|
19
|
+
# synth-cryo-em
|
|
20
|
+
|
|
21
|
+
[](https://github.com/elkins/synth-cryo-em/actions/workflows/test.yml)
|
|
22
|
+
[](https://elkins.github.io/synth-cryo-em/)
|
|
23
|
+
|
|
24
|
+
A lightweight Pythonic utility to convert atomic models (PDB/CIF) into synthetic 3D Cryo-EM maps with realistic noise, CTF effects, and varying resolutions.
|
|
25
|
+
|
|
26
|
+
## 🌟 Features
|
|
27
|
+
- **Voxelize** atomic models with accurate resolution simulation.
|
|
28
|
+
- **Simulate Physics:** Apply Contrast Transfer Functions (CTF) and envelope functions.
|
|
29
|
+
- **Noise Modeling:** Add adjustable Gaussian noise to simulate low-SNR experimental data.
|
|
30
|
+
- **Standard Format:** Export results to MRC files compatible with RELION, ChimeraX, and other tools.
|
|
31
|
+
|
|
32
|
+
## 🚀 Quick Start
|
|
33
|
+
|
|
34
|
+
### Installation
|
|
35
|
+
```bash
|
|
36
|
+
pip install synth-cryo-em
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Basic Generation
|
|
40
|
+
```bash
|
|
41
|
+
synth-cryo-em structure.pdb output.mrc --resolution 4.0
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Realistic Simulation
|
|
45
|
+
```bash
|
|
46
|
+
synth-cryo-em structure.pdb output.mrc --resolution 3.5 --apply-physics --snr 5
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## 📚 Documentation
|
|
50
|
+
For detailed guides and API reference, visit the [Documentation Site](https://elkins.github.io/synth-cryo-em/).
|
|
51
|
+
|
|
52
|
+
## 🛠️ Development
|
|
53
|
+
To install for development and documentation:
|
|
54
|
+
```bash
|
|
55
|
+
pip install -e ".[test,docs]"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Run tests:
|
|
59
|
+
```bash
|
|
60
|
+
pytest tests/
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Build docs locally:
|
|
64
|
+
```bash
|
|
65
|
+
mkdocs serve
|
|
66
|
+
```
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/synth_cryo_em/cli.py
|
|
4
|
+
src/synth_cryo_em/core.py
|
|
5
|
+
src/synth_cryo_em/validate.py
|
|
6
|
+
src/synth_cryo_em.egg-info/PKG-INFO
|
|
7
|
+
src/synth_cryo_em.egg-info/SOURCES.txt
|
|
8
|
+
src/synth_cryo_em.egg-info/dependency_links.txt
|
|
9
|
+
src/synth_cryo_em.egg-info/entry_points.txt
|
|
10
|
+
src/synth_cryo_em.egg-info/requires.txt
|
|
11
|
+
src/synth_cryo_em.egg-info/top_level.txt
|
|
12
|
+
tests/test_core.py
|
|
13
|
+
tests/test_empirical.py
|
|
14
|
+
tests/test_real_data.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
synth_cryo_em
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
import os
|
|
3
|
+
import numpy as np
|
|
4
|
+
from synth_cryo_em.core import generate_density_map, apply_ctf, add_gaussian_noise
|
|
5
|
+
|
|
6
|
+
class TestSynthCryoEM(unittest.TestCase):
|
|
7
|
+
def setUp(self):
|
|
8
|
+
self.pdb_content = """ATOM 1 N ALA A 1 11.104 6.132 11.469 1.00 20.00 N
|
|
9
|
+
ATOM 2 CA ALA A 1 12.000 12.000 12.000 1.00 20.00 C
|
|
10
|
+
ATOM 3 C ALA A 1 13.104 18.132 13.469 1.00 20.00 C
|
|
11
|
+
TER
|
|
12
|
+
END
|
|
13
|
+
"""
|
|
14
|
+
self.test_pdb = "test_temp.pdb"
|
|
15
|
+
with open(self.test_pdb, "w") as f:
|
|
16
|
+
f.write(self.pdb_content)
|
|
17
|
+
|
|
18
|
+
def tearDown(self):
|
|
19
|
+
if os.path.exists(self.test_pdb):
|
|
20
|
+
os.remove(self.test_pdb)
|
|
21
|
+
|
|
22
|
+
def test_generate_density(self):
|
|
23
|
+
grid, origin = generate_density_map(self.test_pdb, resolution=4.0)
|
|
24
|
+
data = np.array(grid, copy=False)
|
|
25
|
+
self.assertGreater(np.sum(data), 0)
|
|
26
|
+
self.assertEqual(len(data.shape), 3)
|
|
27
|
+
|
|
28
|
+
def test_apply_ctf(self):
|
|
29
|
+
grid, origin = generate_density_map(self.test_pdb, resolution=4.0)
|
|
30
|
+
data = np.array(grid, copy=True)
|
|
31
|
+
uc = grid.unit_cell
|
|
32
|
+
vox_size = (uc.a / grid.nu, uc.b / grid.nv, uc.c / grid.nw)
|
|
33
|
+
data_ctf = apply_ctf(data, vox_size, defoc=1.0)
|
|
34
|
+
self.assertEqual(data_ctf.shape, data.shape)
|
|
35
|
+
# CTF should change the values
|
|
36
|
+
self.assertFalse(np.allclose(data, data_ctf))
|
|
37
|
+
|
|
38
|
+
def test_add_noise(self):
|
|
39
|
+
data = np.ones((10, 10, 10))
|
|
40
|
+
noisy = add_gaussian_noise(data, snr=10)
|
|
41
|
+
self.assertEqual(noisy.shape, data.shape)
|
|
42
|
+
self.assertNotEqual(np.mean(noisy), 1.0)
|
|
43
|
+
|
|
44
|
+
def test_apply_ctf_with_bfactor(self):
|
|
45
|
+
grid, origin = generate_density_map(self.test_pdb, resolution=4.0)
|
|
46
|
+
data = np.array(grid, copy=True)
|
|
47
|
+
uc = grid.unit_cell
|
|
48
|
+
vox_size = (uc.a / grid.nu, uc.b / grid.nv, uc.c / grid.nw)
|
|
49
|
+
data_ctf = apply_ctf(data, vox_size, defoc=1.0, b_factor=100.0)
|
|
50
|
+
self.assertEqual(data_ctf.shape, data.shape)
|
|
51
|
+
self.assertFalse(np.allclose(data, data_ctf))
|
|
52
|
+
|
|
53
|
+
def test_generate_density_no_atoms(self):
|
|
54
|
+
empty_pdb = "empty.pdb"
|
|
55
|
+
with open(empty_pdb, "w") as f:
|
|
56
|
+
f.write("END\n")
|
|
57
|
+
try:
|
|
58
|
+
with self.assertRaises(ValueError):
|
|
59
|
+
generate_density_map(empty_pdb, resolution=4.0)
|
|
60
|
+
finally:
|
|
61
|
+
if os.path.exists(empty_pdb):
|
|
62
|
+
os.remove(empty_pdb)
|
|
63
|
+
|
|
64
|
+
def test_generate_with_bfactors(self):
|
|
65
|
+
grid, origin = generate_density_map(self.test_pdb, resolution=4.0, use_bfactors=True)
|
|
66
|
+
data = np.array(grid, copy=False)
|
|
67
|
+
self.assertGreater(np.sum(data), 0)
|
|
68
|
+
|
|
69
|
+
def test_mmcif_support(self):
|
|
70
|
+
import gemmi
|
|
71
|
+
cif_path = "test_mmcif.cif"
|
|
72
|
+
st = gemmi.Structure()
|
|
73
|
+
model = gemmi.Model("1")
|
|
74
|
+
chain = gemmi.Chain("A")
|
|
75
|
+
res = gemmi.Residue()
|
|
76
|
+
res.name = "ALA"
|
|
77
|
+
res.seqid = gemmi.SeqId(1, ' ')
|
|
78
|
+
atom = gemmi.Atom()
|
|
79
|
+
atom.name = "CA"
|
|
80
|
+
atom.element = gemmi.Element("C")
|
|
81
|
+
atom.pos = gemmi.Position(10, 10, 10)
|
|
82
|
+
res.add_atom(atom)
|
|
83
|
+
chain.add_residue(res)
|
|
84
|
+
model.add_chain(chain)
|
|
85
|
+
st.add_model(model)
|
|
86
|
+
st.make_mmcif_document().write_file(cif_path)
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
grid, origin = generate_density_map(cif_path, resolution=4.0)
|
|
90
|
+
data = np.array(grid, copy=False)
|
|
91
|
+
self.assertGreater(np.sum(data), 0)
|
|
92
|
+
finally:
|
|
93
|
+
if os.path.exists(cif_path):
|
|
94
|
+
os.remove(cif_path)
|
|
95
|
+
|
|
96
|
+
def test_save_mrc(self):
|
|
97
|
+
from synth_cryo_em.core import save_mrc
|
|
98
|
+
import mrcfile
|
|
99
|
+
data = np.zeros((10, 10, 10), dtype=np.float32)
|
|
100
|
+
test_mrc = "test_output.mrc"
|
|
101
|
+
try:
|
|
102
|
+
save_mrc(data, test_mrc, origin=(1, 2, 3), spacing=(1.1, 1.1, 1.1))
|
|
103
|
+
self.assertTrue(os.path.exists(test_mrc))
|
|
104
|
+
with mrcfile.open(test_mrc) as mrc:
|
|
105
|
+
self.assertEqual(mrc.data.shape, (10, 10, 10))
|
|
106
|
+
self.assertAlmostEqual(mrc.voxel_size.x, 1.1, places=5)
|
|
107
|
+
self.assertAlmostEqual(mrc.header.origin.x, 1.0)
|
|
108
|
+
finally:
|
|
109
|
+
if os.path.exists(test_mrc):
|
|
110
|
+
os.remove(test_mrc)
|
|
111
|
+
|
|
112
|
+
if __name__ == '__main__':
|
|
113
|
+
unittest.main()
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
import os
|
|
3
|
+
import numpy as np
|
|
4
|
+
import mrcfile
|
|
5
|
+
import gemmi
|
|
6
|
+
import urllib.request
|
|
7
|
+
from synth_cryo_em.core import generate_density_map, compute_ccc, compute_fsc
|
|
8
|
+
|
|
9
|
+
class TestEmpiricalValidation(unittest.TestCase):
|
|
10
|
+
"""
|
|
11
|
+
Functional tests comparing synthetic results with empirical expectations.
|
|
12
|
+
Uses small real-world structures to validate correlation.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
@classmethod
|
|
16
|
+
def setUpClass(cls):
|
|
17
|
+
# We use a very small, well-known protein: Crambin (PDB: 1CRN)
|
|
18
|
+
# It's small (46 residues) and stable.
|
|
19
|
+
cls.pdb_id = "1crn"
|
|
20
|
+
cls.pdb_path = f"{cls.pdb_id}.pdb"
|
|
21
|
+
|
|
22
|
+
if not os.path.exists(cls.pdb_path):
|
|
23
|
+
url = f"https://files.rcsb.org/download/{cls.pdb_id}.pdb"
|
|
24
|
+
try:
|
|
25
|
+
urllib.request.urlretrieve(url, cls.pdb_path)
|
|
26
|
+
except Exception as e:
|
|
27
|
+
print(f"Skipping empirical test: Could not download {url}. Error: {e}")
|
|
28
|
+
cls.pdb_path = None
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def tearDownClass(cls):
|
|
32
|
+
if cls.pdb_path and os.path.exists(cls.pdb_path):
|
|
33
|
+
os.remove(cls.pdb_path)
|
|
34
|
+
|
|
35
|
+
def test_crambin_reconstruction_consistency(self):
|
|
36
|
+
"""
|
|
37
|
+
Validate that generating a map at a specific resolution and
|
|
38
|
+
re-evaluating it against the same model yields high correlation.
|
|
39
|
+
This serves as a baseline 'internal' empirical check.
|
|
40
|
+
"""
|
|
41
|
+
if not self.pdb_path:
|
|
42
|
+
self.skipTest("PDB file not available")
|
|
43
|
+
|
|
44
|
+
res = 3.0
|
|
45
|
+
# Use fixed parameters for consistency
|
|
46
|
+
spacing = 1.0
|
|
47
|
+
margin = 10.0
|
|
48
|
+
grid, _ = generate_density_map(self.pdb_path, resolution=res, grid_spacing=spacing, margin=margin)
|
|
49
|
+
data = np.array(grid, copy=True)
|
|
50
|
+
|
|
51
|
+
# Now generate a 'reference' map using the same parameters
|
|
52
|
+
grid_ref, _ = generate_density_map(self.pdb_path, resolution=res, grid_spacing=spacing, margin=margin)
|
|
53
|
+
data_ref = np.array(grid_ref, copy=True)
|
|
54
|
+
|
|
55
|
+
ccc = compute_ccc(data, data_ref)
|
|
56
|
+
|
|
57
|
+
# Identical parameters should yield identical results
|
|
58
|
+
self.assertAlmostEqual(ccc, 1.0, places=5)
|
|
59
|
+
|
|
60
|
+
def test_resolution_cutoffs(self):
|
|
61
|
+
"""
|
|
62
|
+
Validate that the FSC correctly reflects the simulated resolution.
|
|
63
|
+
If we simulate at 6A, the FSC against a higher resolution (3A) version
|
|
64
|
+
should show a significant drop.
|
|
65
|
+
"""
|
|
66
|
+
if not self.pdb_path:
|
|
67
|
+
self.skipTest("PDB file not available")
|
|
68
|
+
|
|
69
|
+
res_low = 8.0
|
|
70
|
+
res_high = 2.0
|
|
71
|
+
# Use a fixed grid spacing and margin for both to ensure same shape
|
|
72
|
+
spacing = 1.0
|
|
73
|
+
margin = 15.0
|
|
74
|
+
|
|
75
|
+
grid_low, _ = generate_density_map(self.pdb_path, resolution=res_low, grid_spacing=spacing, margin=margin)
|
|
76
|
+
grid_high, _ = generate_density_map(self.pdb_path, resolution=res_high, grid_spacing=spacing, margin=margin)
|
|
77
|
+
|
|
78
|
+
# Ensure grids are the same size for comparison
|
|
79
|
+
data_low = np.array(grid_low, copy=True)
|
|
80
|
+
data_high = np.array(grid_high, copy=True)
|
|
81
|
+
|
|
82
|
+
uc = grid_low.unit_cell
|
|
83
|
+
voxel_size = (uc.a / grid_low.nu, uc.b / grid_low.nv, uc.c / grid_low.nw)
|
|
84
|
+
|
|
85
|
+
freqs, fsc = compute_fsc(data_low, data_high, voxel_size)
|
|
86
|
+
|
|
87
|
+
# The FSC should be lower at higher frequencies
|
|
88
|
+
# Check that FSC at high frequency (near Nyquist) is much lower than at low frequency
|
|
89
|
+
self.assertGreater(fsc[1], fsc[-1], "FSC should decrease with frequency")
|
|
90
|
+
|
|
91
|
+
# Check for a drop below 0.5 at some point
|
|
92
|
+
low_fsc_indices = np.where(fsc < 0.5)[0]
|
|
93
|
+
self.assertGreater(len(low_fsc_indices), 0, "FSC should drop below 0.5 for different resolutions")
|
|
94
|
+
|
|
95
|
+
if __name__ == '__main__':
|
|
96
|
+
unittest.main()
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
import os
|
|
3
|
+
import numpy as np
|
|
4
|
+
import mrcfile
|
|
5
|
+
import gemmi
|
|
6
|
+
import urllib.request
|
|
7
|
+
import gzip
|
|
8
|
+
import shutil
|
|
9
|
+
from synth_cryo_em.core import generate_density_map, compute_ccc, compute_fsc
|
|
10
|
+
|
|
11
|
+
class TestRealDataValidation(unittest.TestCase):
|
|
12
|
+
"""
|
|
13
|
+
Validation against real empirical data from PDB and EMDB.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def setUpClass(cls):
|
|
18
|
+
cls.pdb_id = "6k7v"
|
|
19
|
+
cls.emdb_id = "9943"
|
|
20
|
+
cls.pdb_path = f"{cls.pdb_id}.pdb"
|
|
21
|
+
cls.mrc_gz_path = f"emd_{cls.emdb_id}.map.gz"
|
|
22
|
+
cls.mrc_path = f"emd_{cls.emdb_id}.map"
|
|
23
|
+
|
|
24
|
+
# Download PDB
|
|
25
|
+
if not os.path.exists(cls.pdb_path):
|
|
26
|
+
url = f"https://files.rcsb.org/download/{cls.pdb_id}.pdb"
|
|
27
|
+
try:
|
|
28
|
+
urllib.request.urlretrieve(url, cls.pdb_path)
|
|
29
|
+
except Exception as e:
|
|
30
|
+
print(f"Failed to download PDB: {e}")
|
|
31
|
+
cls.pdb_path = None
|
|
32
|
+
|
|
33
|
+
# Download EMDB map
|
|
34
|
+
if not os.path.exists(cls.mrc_path):
|
|
35
|
+
url = f"https://ftp.ebi.ac.uk/pub/databases/emdb/structures/EMD-{cls.emdb_id}/map/emd_{cls.emdb_id}.map.gz"
|
|
36
|
+
try:
|
|
37
|
+
urllib.request.urlretrieve(url, cls.mrc_gz_path)
|
|
38
|
+
with gzip.open(cls.mrc_gz_path, 'rb') as f_in:
|
|
39
|
+
with open(cls.mrc_path, 'wb') as f_out:
|
|
40
|
+
shutil.copyfileobj(f_in, f_out)
|
|
41
|
+
except Exception as e:
|
|
42
|
+
print(f"Failed to download/extract EMDB map: {e}")
|
|
43
|
+
cls.mrc_path = None
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def tearDownClass(cls):
|
|
47
|
+
for p in [cls.pdb_path, cls.mrc_gz_path, cls.mrc_path]:
|
|
48
|
+
if p and os.path.exists(p):
|
|
49
|
+
# We might want to keep them for faster debugging,
|
|
50
|
+
# but for CI/tests we should clean up.
|
|
51
|
+
os.remove(p)
|
|
52
|
+
|
|
53
|
+
def test_emd_9943_correlation(self):
|
|
54
|
+
"""
|
|
55
|
+
Compare synthetic map from 6K7V with experimental map EMD-9943.
|
|
56
|
+
"""
|
|
57
|
+
if not self.pdb_path or not self.mrc_path:
|
|
58
|
+
self.skipTest("Data not available")
|
|
59
|
+
|
|
60
|
+
# Load experimental map to get its parameters
|
|
61
|
+
with mrcfile.open(self.mrc_path) as mrc:
|
|
62
|
+
exp_data = mrc.data.copy()
|
|
63
|
+
vox_size = mrc.voxel_size
|
|
64
|
+
origin = (mrc.header.origin.x, mrc.header.origin.y, mrc.header.origin.z)
|
|
65
|
+
spacing = vox_size.x # Assuming cubic voxels
|
|
66
|
+
|
|
67
|
+
# The experimental map has a specific resolution (3.7 A)
|
|
68
|
+
res = 3.7
|
|
69
|
+
|
|
70
|
+
# Generate synthetic map matching experimental grid as much as possible
|
|
71
|
+
# Note: we need to handle the origin and grid size to match exactly.
|
|
72
|
+
# For now, let's just check if we can get a decent correlation.
|
|
73
|
+
|
|
74
|
+
# We'll use our generator with the same spacing
|
|
75
|
+
grid, gen_origin = generate_density_map(self.pdb_path, resolution=res, grid_spacing=spacing)
|
|
76
|
+
gen_data = np.array(grid, copy=True)
|
|
77
|
+
|
|
78
|
+
# Since shapes might differ (generator adds margins), we'll just check
|
|
79
|
+
# if the code runs and reports a non-zero correlation.
|
|
80
|
+
# A full structural alignment would be needed for a perfect CCC.
|
|
81
|
+
|
|
82
|
+
self.assertGreater(np.sum(gen_data), 0)
|
|
83
|
+
self.assertEqual(len(gen_data.shape), 3)
|
|
84
|
+
|
|
85
|
+
# In a real functional test, we would resample/align them.
|
|
86
|
+
# Here we at least validate the tool can process real PDBs.
|
|
87
|
+
print(f"Generated map shape: {gen_data.shape}, Experimental: {exp_data.shape}")
|
|
88
|
+
|
|
89
|
+
if __name__ == '__main__':
|
|
90
|
+
unittest.main()
|