Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP, ENH: add TPR position and velocity read support #4873

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
ENH: add TPR position read support
* Add support for reading positions from
GMX `.tpr` files at `tpx` version `134`.
tylerjereddy committed Dec 30, 2024
commit 4af047d86ed13ac8a42987934e1892d31ad8f066
73 changes: 73 additions & 0 deletions package/MDAnalysis/coordinates/TPR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

header needs expanding to full version


from . import base
from ..lib import util
from .timestep import Timestep
import MDAnalysis.topology.tpr.utils as tpr_utils
import MDAnalysis.topology.tpr.setting as S


import numpy as np


class TPRReader(base.SingleFrameReaderBase):
# TODO: reduce duplication with `TPRparser`;
# we could share some state for the position
# in the binary file to avoid re-reading topology
# or perhaps combine the topology and coordinate reading
# with some inheritance shenanigans?
format = "TPR"
units = {"length": "nm"}
_Timestep = Timestep

def _read_first_frame(self):
# Read header/move over topology
# TODO: reduce duplication with TPRparser perhaps...
with util.openany(self.filename, mode='rb') as infile:
tprf = infile.read()
data = tpr_utils.TPXUnpacker(tprf)
try:
th = tpr_utils.read_tpxheader(data) # tpxheader
except (EOFError, ValueError):
msg = f"{self.filename}: Invalid tpr file or cannot be recognized"
logger.critical(msg)
raise IOError(msg)

self.ts = ts = self._Timestep(th.natoms, **self._ts_kwargs)
self.n_atoms = th.natoms

# Starting with gromacs 2020 (tpx version 119), the body of the file
# is encoded differently. We change the unpacker accordingly.
if th.fver >= S.tpxv_AddSizeField and th.fgen >= 27:
actual_body_size = len(data.get_buffer()) - data.get_position()
if actual_body_size == 4 * th.sizeOfTprBody:
# See issue #2428.
msg = (
"TPR files produced with beta versions of gromacs 2020 "
"are not supported."
)
logger.critical(msg)
raise IOError(msg)
data = tpr_utils.TPXUnpacker2020.from_unpacker(data)

state_ngtc = th.ngtc # done init_state() in src/gmxlib/tpxio.c
if th.bBox:
tpr_utils.extract_box_info(data, th.fver)

if state_ngtc > 0:
if th.fver < 69: # redundancy due to different versions
tpr_utils.ndo_real(data, state_ngtc)
tpr_utils.ndo_real(data, state_ngtc) # relevant to Berendsen tcoupl_lambda

if th.bTop:
tpr_top = tpr_utils.do_mtop(data, th.fver,
tpr_resid_from_one=True)
else:
msg = f"{self.filename}: No topology found in tpr file"
logger.critical(msg)
raise IOError(msg)

if th.bX:
self.ts._pos = np.asarray(tpr_utils.ndo_rvec(data, th.natoms),
dtype=np.float32)
1 change: 1 addition & 0 deletions package/MDAnalysis/coordinates/__init__.py
Original file line number Diff line number Diff line change
@@ -776,6 +776,7 @@ class can choose an appropriate reader automatically.
from . import PDB
from . import PDBQT
from . import PQR
from . import TPR
from . import TRC
from . import TRJ
from . import TRR
42 changes: 42 additions & 0 deletions package/MDAnalysis/topology/tpr/utils.py
Original file line number Diff line number Diff line change
@@ -423,6 +423,48 @@ def do_mtop(data, fver, tpr_resid_from_one=False):
elements = Elements(np.array(elements, dtype=object))
top.add_TopologyAttr(elements)

# NOTE: the tpr striding code below serves the
# purpose of placing us in a suitable "seek" position
# in the binary file such that we are well placed
# for reading coords and velocities if they are present
# the order of operations is based on an analysis of
# the C++ code in do_mtop() function in the GROMACS
# source at:
# src/gromacs/fileio/tpxio.cpp
# TODO: expand tpx version support for striding to
# the coordinates
if fver == 134:
# TODO: the following value is important, and not sure
# how to access programmatically yet...
# from GMX source code:
# api/legacy/include/gromacs/topology/topology_enums.h
# worst case scenario we hard code it based on
# tpx/GMX version?
SimulationAtomGroupType_size = 10
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On latest GMX main branch, this is the data structure in question:

enum class SimulationAtomGroupType : int
{
    TemperatureCoupling,
    EnergyOutput,
    Acceleration,
    Freeze,
    User1,
    User2,
    MassCenterVelocityRemoval,
    CompressedPositionOutput,
    OrientationRestraintsFit,
    QuantumMechanics,
    Count
};

n_atoms = data.unpack_int()
interm = data.unpack_uchar()
ngrid = data.unpack_int()
grid_spacing = data.unpack_int()
n_elements = grid_spacing ** 2
for i in range(ngrid):
for j in range(n_elements):
ndo_real(data, 4)
for i in range(SimulationAtomGroupType_size):
group_size = data.unpack_int()
ndo_int(data, group_size)
n_group_names = data.unpack_int()
for i in range(n_group_names):
data.unpack_int()
for i in range(SimulationAtomGroupType_size):
n_grp_numbers = data.unpack_int()
if n_grp_numbers != 0:
for i in range(n_grp_numbers):
data.unpack_uchar()
im_excl_grp_size = data.unpack_int()
ndo_int(data, im_excl_grp_size)
# TODO: why is this needed?
data.unpack_int()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The above can probably be cleaned up a bit.. probably has some unused vars, etc.

It was produced by careful printf-ing the GMX source as you might expect. Expanding to support other tpx versions may not be too bad, although this was fairly time consuming to draft.


return top


34 changes: 34 additions & 0 deletions testsuite/MDAnalysisTests/coordinates/test_tpr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from MDAnalysisTests.datafiles import (TPR2024_4_bonded,
TPR_EXTRA_2024_4,
TPR2024_4)
import MDAnalysis as mda


import pytest
from numpy.testing import assert_allclose, assert_equal


@pytest.mark.parametrize("tpr_file, exp_first_atom, exp_last_atom, exp_shape", [
(TPR2024_4_bonded,
[4.446, 4.659, 2.384],
[4.446, 4.659, 2.384],
(14, 3),
),
# same coordinates, different shape
(TPR_EXTRA_2024_4,
[4.446, 4.659, 2.384],
[4.446, 4.659, 2.384],
(18, 3),
),
# different coordinates and different shape
(TPR2024_4,
[3.25000e-01, 1.00400e+00, 1.03800e+00],
[-2.56000e-01, 1.37300e+00, 3.59800e+00],
(2263, 3),
),
])
def test_basic_read_tpr(tpr_file, exp_first_atom, exp_last_atom, exp_shape):
u = mda.Universe(tpr_file)
assert_allclose(u.atoms.positions[0, ...], exp_first_atom)
assert_allclose(u.atoms.positions[-1, ...], exp_last_atom)
assert_equal(u.atoms.positions.shape, exp_shape)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we have specific conventions for coordinate reader testing, but for now this is where I've started. Should be easy to expand to include velocities as well.

Cases with only positions and no velocities (etc.) may also be sensible to add, on top of older tpx files...