Skip to content

Commit 561ffa7

Browse files
committed
test(sfc): add atomarray_to_gemmi tests using 6b8x
1 parent faa6b72 commit 561ffa7

2 files changed

Lines changed: 124 additions & 1 deletion

File tree

src/sampleworks/eval/generate_synthetic_sf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import torch
2020
from atomworks.io.transforms.atom_array import remove_waters
2121
from biotite.structure import AtomArray
22-
from joblib import delayed, Parallel
2322
from loguru import logger
2423
from sampleworks.utils.atom_array_utils import (
2524
assign_occupancies,
@@ -447,6 +446,8 @@ def process_batch(
447446
simulate_solvent_and_scale
448447
If True, compute bulk solvent and scale factors in addition to F_protein.
449448
"""
449+
from joblib import delayed, Parallel
450+
450451
rows = load_batch_csv(csv_path)
451452
logger.info(f"Processing {len(rows)} structures from {csv_path} using {n_jobs} jobs")
452453

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
"""Tests for atomarray_to_gemmi in generate_synthetic_sf, using real 6b8x structure."""
2+
3+
from pathlib import Path
4+
5+
import numpy as np
6+
import pytest
7+
import torch
8+
9+
10+
pytest.importorskip("SFC_Torch", reason="SFC_Torch not available; run in analysis-dev-sfc env")
11+
12+
import gemmi
13+
from atomworks.io.transforms.atom_array import remove_waters
14+
from sampleworks.eval.generate_synthetic_sf import atomarray_to_gemmi
15+
from sampleworks.utils.atom_array_utils import (
16+
assign_occupancies,
17+
detect_altlocs,
18+
keep_amino_acids,
19+
keep_polymer,
20+
load_structure_with_altlocs,
21+
remove_hydrogens,
22+
)
23+
from SFC_Torch import SFcalculator
24+
from SFC_Torch.io import PDBParser
25+
from SFC_Torch.utils import assert_numpy
26+
27+
28+
DMIN = 2.0
29+
30+
31+
@pytest.fixture(scope="module")
32+
def stripped_gemmi(resources_dir: Path) -> gemmi.Structure:
33+
s = gemmi.read_structure(str(resources_dir / "6b8x" / "6b8x_final.pdb"))
34+
s.remove_hydrogens()
35+
s.remove_ligands_and_waters()
36+
return s
37+
38+
39+
@pytest.fixture(scope="module")
40+
def stripped_atom_array(resources_dir: Path):
41+
arr = load_structure_with_altlocs(resources_dir / "6b8x" / "6b8x_final.pdb")
42+
arr = remove_hydrogens(arr)
43+
arr = remove_waters(arr)
44+
return keep_polymer(keep_amino_acids(arr))
45+
46+
47+
@pytest.fixture(scope="module")
48+
def gemmi_structure_from_atomarray(
49+
stripped_atom_array, stripped_gemmi: gemmi.Structure
50+
) -> gemmi.Structure:
51+
return atomarray_to_gemmi(
52+
stripped_atom_array, stripped_gemmi.cell, stripped_gemmi.spacegroup_hm
53+
)
54+
55+
56+
def _compute_fprotein(gemmi_structure: gemmi.Structure, device: torch.device) -> np.ndarray:
57+
sfc = SFcalculator(
58+
PDBParser(gemmi_structure),
59+
mtzdata=None,
60+
dmin=DMIN,
61+
mode="xray",
62+
anomalous=False,
63+
set_experiment=False,
64+
device=device,
65+
)
66+
sfc.calc_fprotein()
67+
return assert_numpy(sfc.Fprotein_asu)
68+
69+
70+
class TestAtomArrayToGemmi:
71+
def test_cell_matches_pdb(self, gemmi_structure_from_atomarray, stripped_gemmi):
72+
result = gemmi_structure_from_atomarray.cell
73+
expected = stripped_gemmi.cell
74+
assert result.a == pytest.approx(expected.a)
75+
assert result.b == pytest.approx(expected.b)
76+
assert result.c == pytest.approx(expected.c)
77+
assert result.alpha == pytest.approx(expected.alpha)
78+
assert result.beta == pytest.approx(expected.beta)
79+
assert result.gamma == pytest.approx(expected.gamma)
80+
81+
def test_space_group_matches_pdb(self, gemmi_structure_from_atomarray, stripped_gemmi):
82+
assert gemmi_structure_from_atomarray.spacegroup_hm == stripped_gemmi.spacegroup_hm
83+
84+
def test_atom_count_matches_pdb(self, gemmi_structure_from_atomarray, stripped_gemmi):
85+
assert (
86+
gemmi_structure_from_atomarray[0].count_atom_sites()
87+
== stripped_gemmi[0].count_atom_sites()
88+
)
89+
90+
def test_occupancy_change_is_applied(self, stripped_atom_array, stripped_gemmi):
91+
occ_values = [0.2, 0.8, 0.0]
92+
altloc_info = detect_altlocs(stripped_atom_array)
93+
arr = assign_occupancies(stripped_atom_array, altloc_info, "custom", occ_values)
94+
parser = PDBParser(
95+
atomarray_to_gemmi(arr, stripped_gemmi.cell, stripped_gemmi.spacegroup_hm)
96+
)
97+
for altloc, expected in zip(altloc_info.altloc_ids, occ_values):
98+
assert np.allclose(parser.atom_occ[altloc_info.atom_masks[altloc]], expected)
99+
100+
def test_fprotein_matches_direct_gemmi(
101+
self, gemmi_structure_from_atomarray, stripped_gemmi, device
102+
):
103+
f_atomarray = _compute_fprotein(gemmi_structure_from_atomarray, device)
104+
f_direct = _compute_fprotein(stripped_gemmi, device)
105+
np.testing.assert_allclose(np.abs(f_atomarray), np.abs(f_direct), atol=1e-3)
106+
107+
def test_fprotein_changes_with_occupancy(self, stripped_atom_array, stripped_gemmi, device):
108+
altloc_info = detect_altlocs(stripped_atom_array)
109+
110+
arr_default = assign_occupancies(stripped_atom_array, altloc_info, "default")
111+
f_default = _compute_fprotein(
112+
atomarray_to_gemmi(arr_default, stripped_gemmi.cell, stripped_gemmi.spacegroup_hm),
113+
device,
114+
)
115+
116+
arr_custom = assign_occupancies(stripped_atom_array, altloc_info, "custom", [0.2, 0.8, 0.0])
117+
f_custom = _compute_fprotein(
118+
atomarray_to_gemmi(arr_custom, stripped_gemmi.cell, stripped_gemmi.spacegroup_hm),
119+
device,
120+
)
121+
122+
assert not np.allclose(np.abs(f_default), np.abs(f_custom), atol=1e-3)

0 commit comments

Comments
 (0)