Skip to content

Commit 750cada

Browse files
authored
Merge pull request #290 from OpenBioSim/fix_287
Fix issue #287
2 parents d44541e + 7ac6ed2 commit 750cada

File tree

5 files changed

+52
-5
lines changed

5 files changed

+52
-5
lines changed

corelib/src/libs/SireIO/sdf.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -235,11 +235,11 @@ namespace SireIO
235235
case 0:
236236
return 0;
237237
case 1:
238-
return 1;
238+
return 3;
239239
case 2:
240240
return 2;
241241
case 3:
242-
return 3;
242+
return 1;
243243
case 4:
244244
return 0;
245245
case 5:

doc/source/changelog.rst

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ organisation on `GitHub <https://github.com/openbiosim/sire>`__.
1717
------------------------------------------------------------------------------------------
1818

1919
* Please add an item to this CHANGELOG for any new features or bug fixes when creating a PR.
20+
* Allow user to force fresh inference of stereochemistry when converting to RDKit format.
21+
* Fix setting of positive formal charge when reading SDF files.
2022

2123
`2024.4.0 <https://github.com/openbiosim/sire/compare/2024.3.1...2024.4.0>`__ - Feb 2025
2224
----------------------------------------------------------------------------------------

tests/convert/test_rdkit.py

-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ def test_rdkit_returns_null():
118118
"rdkit" not in sr.convert.supported_formats(),
119119
reason="rdkit support is not available",
120120
)
121-
@pytest.mark.xfail(reason="SMILES now mismatches since SDF stereochemistry is preserved")
122121
def test_rdkit_infer_bonds(ejm55_sdf, ejm55_gro):
123122
sdf = ejm55_sdf[0].molecule()
124123
gro = ejm55_gro["not (protein or water)"].molecule()

tests/io/test_sdf.py

+35
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,38 @@ def test_cis_or_trans():
66
Make sure that we can read an SDF file containing a cis or trans double bond.
77
"""
88
mols = sr.load_test_files("cis_trans_double_bond.sdf")
9+
10+
11+
def test_charge():
12+
"""
13+
Make sure that the SDF formal charge field is parsed correctly.
14+
"""
15+
mol = sr.load_test_files("charge.sdf")[0]
16+
17+
from math import isclose
18+
19+
import tempfile
20+
21+
# SDF format has a weird mapping for formal charge. Here the keys are
22+
# the SDF value, and the values are the formal charge.
23+
mapping = {
24+
7: -3,
25+
6: -2,
26+
5: -1,
27+
0: 0,
28+
3: 1,
29+
2: 2,
30+
1: 3,
31+
}
32+
33+
# Make sure the charges are parsed correctly.
34+
for c0, c1 in zip(mol.property("formal_charge").to_list(), mapping.values()):
35+
assert isclose(c0.value(), c1)
36+
37+
# Write back to file.
38+
with tempfile.NamedTemporaryFile(suffix=".sdf") as f:
39+
sr.save(mol, f.name)
40+
41+
# Read back in and check that the charges are still correct.
42+
for c0, c1 in zip(mol.property("formal_charge").to_list(), mapping.values()):
43+
assert isclose(c0.value(), c1)

wrapper/Convert/SireRDKit/sire_rdkit.cpp

+13-2
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,12 @@ namespace SireRDKit
606606
SireMaths::Vector *coords_data = coords.data();
607607
bool has_coords = false;
608608

609+
bool force_stereo_inference = false;
610+
if (map.specified("force_stereo_inference"))
611+
{
612+
force_stereo_inference = map["force_stereo_inference"].value().asABoolean();
613+
}
614+
609615
for (int i = 0; i < atoms.count(); ++i)
610616
{
611617
const auto atom = atoms(i);
@@ -640,6 +646,9 @@ namespace SireRDKit
640646

641647
a->setAtomicNum(element.nProtons());
642648

649+
// don't automatically add hydrogens
650+
a->setNoImplicit(true);
651+
643652
elements.append(element);
644653

645654
try
@@ -776,9 +785,11 @@ namespace SireRDKit
776785
molecule.commitBatchEdit();
777786
molecule.updatePropertyCache(false);
778787

779-
if (atoms.count() > 1 and (not has_bond_info))
788+
if (atoms.count() > 1 and (not has_bond_info or force_stereo_inference))
789+
{
780790
// we need to infer the bond information
781791
infer_bond_info(molecule);
792+
}
782793

783794
// try each sanitisation step in turn, skipping failed
784795
try
@@ -855,7 +866,7 @@ namespace SireRDKit
855866

856867
// try assigning stereochemistry from 3D coordinates as it is the most
857868
// reliable way to do it
858-
if (has_coords)
869+
if (has_coords and not force_stereo_inference)
859870
{
860871
try
861872
{

0 commit comments

Comments
 (0)