Skip to content

Commit

Permalink
Issue 4039 large dcd (#4048)
Browse files Browse the repository at this point in the history
Fixes #4039
* Fixes DCD seeking for large (2Gb+) files.
  • Loading branch information
richardjgowers authored Mar 29, 2023
1 parent 94afa34 commit 628e0f7
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 4 deletions.
9 changes: 9 additions & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,15 @@ Changes
Deprecations


03/29/23 richardjgowers

* 2.4.3

Fixes
* Fixed DCD reading for large (>2Gb) files (Issue #4039). This was broken
for versions 2.4.0, 2.4.1 and 2.4.2


01/04/23 IAlibay

* 2.4.2
Expand Down
6 changes: 3 additions & 3 deletions package/MDAnalysis/lib/formats/libdcd.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,11 @@ cdef class DCDFile:
# The current DCD frame
cdef int current_frame
# size of the first DCD frame
cdef readonly int _firstframesize
cdef readonly fio_size_t _firstframesize
# Size of a DCD frame
cdef readonly int _framesize
cdef readonly fio_size_t _framesize
# Size of the DCD header
cdef readonly int _header_size
cdef readonly fio_size_t _header_size
# Is the file open?
cdef int is_open
# Have we reached the end of the file
Expand Down
4 changes: 3 additions & 1 deletion package/MDAnalysis/lib/formats/libdcd.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,9 @@ cdef class DCDFile:
if frame == 0:
offset = self._header_size
else:
offset = self._header_size + self._firstframesize + self._framesize * (frame - 1)
offset = self._header_size
offset += self._firstframesize
offset += self._framesize * (frame - 1)

cdef int ok = fio_fseek(self.fp, offset, _whence_vals['FIO_SEEK_SET'])
if ok != 0:
Expand Down
36 changes: 36 additions & 0 deletions testsuite/MDAnalysisTests/coordinates/test_dcd.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
import os
from pathlib import Path
import numpy as np

Expand Down Expand Up @@ -436,3 +437,38 @@ def test_pathlib():
# we really only care that pathlib
# object handling worked
assert u.atoms.n_atoms == 3341


@pytest.fixture
def large_dcdfile(tmpdir):
# creates a >2Gb DCD file
fsize = 3.8 # mb
nreps_reqs = int(2100 // fsize) # times to duplicate traj to hit 2.1Gb

newf = str(tmpdir / "jabba.dcd")

u = mda.Universe(PSF, DCD)

with mda.Writer(newf, n_atoms=len(u.atoms)) as w:
for _ in range(nreps_reqs):
for ts in u.trajectory:
w.write(u.atoms)

yield newf, nreps_reqs


@pytest.mark.skipif(
not os.environ.get("LARGEDCD", False), reason="Skipping large file test"
)
def test_large_dcdfile(large_dcdfile):
DCD_large, nreps = large_dcdfile

u_small = mda.Universe(PSF, DCD)
u = mda.Universe(PSF, DCD_large)

assert len(u.trajectory) == len(u_small.trajectory) * nreps

u_small.trajectory[-1]
u.trajectory[-1]

assert_array_almost_equal(u.atoms.positions, u_small.atoms.positions)

0 comments on commit 628e0f7

Please sign in to comment.