MDAnalysis · hp115 · Sep 29, 2023 · Sep 29, 2023 · Sep 29, 2023 · Sep 29, 2023
diff --git a/package/MDAnalysis/coordinates/CIF.py b/package/MDAnalysis/coordinates/CIF.py
@@ -0,0 +1,89 @@
+
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+# MDAnalysis --- https://www.mdanalysis.org
+# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
+# (see the file AUTHORS for the full list of names)
+#
+# Released under the GNU Public Licence, v2 or any higher version
+#
+# Please cite your use of MDAnalysis in published work:
+#
+# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
+# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
+# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
+# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
+# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
+# doi: 10.25080/majora-629e541a-00e
+#
+# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
+# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
+# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
+#
+
+"""
+PDBx (mmcif) files in MDAnalysis --- :mod:`MDAnalysis.coordinates.PDBx`
+=======================================================================
+
+Reads coordinates from a PDBx_ (mmcif) format file.  Will populate the Universe positions from the
+``_atom_site.Cartn_x`` field in the PDBx file.  Will populate the unitcell dimensions from the ``_cell`` section.
+
+
+.. _PDBx:
+   https://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/beginner’s-guide-to-pdb-structures-and-the-pdbx-mmcif-format
+"""
+import gemmi
+import numpy as np
+
+from . import base
+
+
+
+
+class PDBxReader(base.SingleFrameReaderBase):
+    format = ['cif', 'pdbx']
+    units = {'time': None, 'length': 'Angstrom'}
+
+    def _read_first_frame(self):
+        doc = gemmi.cif.read(self.filename)
+
+        block = doc.sole_block()
+
+        coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z'])
+        self.natoms = len(coords)
+
+        xyz = np.zeros((self.natoms, 3), dtype=np.float32)
+
+        for i, (x, y, z) in enumerate(coords):
+            xyz[i, :] = x, y, z
+
+        ts = self.ts = base.Timestep.from_coordinates(xyz, **self._ts_kwargs)
+        ts.frame = 0
+
+        box = block.find('_cell.', ['length_a', 'length_b', 'length_c',
+                                    'angle_alpha', 'angle_beta', 'angle_gamma'])
+        if box:
+            unitcell = np.zeros(6, dtype=np.float64)
+            unitcell[:] = box[0]
+
+            ts.dimensions = unitcell
+
+        if self.convert_units:
+            # in-place !
+            self.convert_pos_from_native(self.ts._pos)
+            if self.ts.dimensions is not None:
+                self.convert_pos_from_native(self.ts.dimensions[:3])
+
+
+
+    @staticmethod
+    def parse_n_atoms(filename, **kwargs):
+        doc = gemmi.cif.read(self.filename)
+        block = doc.sole_block()
+        coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z'])
+        natoms = len(coords)
+        del doc
+        return n_atoms
+
+
diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py
@@ -0,0 +1,101 @@
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+# MDAnalysis --- https://www.mdanalysis.org
+# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
+# (see the file AUTHORS for the full list of names)
+#
+# Released under the GNU Public Licence, v2 or any higher version
+#
+# Please cite your use of MDAnalysis in published work:
+#
+# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
+# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
+# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
+# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
+# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
+# doi: 10.25080/majora-629e541a-00e
+#
+# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
+# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
+# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
+#
+
+"""
+PDBx (mmcif) files in MDAnalysis --- :mod:`MDAnalysis.coordinates.PDBx`
+=======================================================================
+
+Reads coordinates from a PDBx_ (mmcif) format file.  Will populate the Universe positions from the
+``_atom_site.Cartn_x`` field in the PDBx file.  Will populate the unitcell dimensions from the ``_cell`` section.
+
+
+.. _PDBx:
+   https://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/beginner’s-guide-to-pdb-structures-and-the-pdbx-mmcif-format
+"""
+import gemmi
+import numpy as np
+from gemmi import FractionalBox
+
+from . import base
+
+
+class PDBxReader(base.SingleFrameReaderBase):
+    format = ['cif', 'pdbx']
+    units = {'time': None, 'length': 'Angstrom'}
+
+
+    def __init__(self, filename, convert_units=True, **kwargs):
+        super().__init__(filename, convert_units=convert_units, **kwargs)
+
+    def _read_first_frame(self):
+        doc = gemmi.cif.read(self.filename)
+
+        block = doc.sole_block()
+
+        # PDBx/mmCIF with _cell. sections
+        box = block.find('_cell.', ['length_a', 'length_b', 'length_c',
+                                    'angle_alpha', 'angle_beta', 'angle_gamma'])
+        # CIF file with _cell_ sections
+        if not box:
+            box = block.find('_cell_', ['length_a', 'length_b', 'length_c',
+                                    'angle_alpha', 'angle_beta', 'angle_gamma'])
+
+        if box:
+            unitcell = np.zeros(6, dtype=np.float64)
+            unitcell[:] = box[0]
+
+        # PDBx/mmCIF with _cell. sections
+        coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z'])
+        fractional = lambda xyz: xyz
+
+        # CIF file with _cell_ sections
+        if not coords:
+            coords = block.find('_atom_site_', ['fract_x', 'fract_y', 'fract_z'])
+            fractional = lambda xyz: np.multiply(xyz, unitcell[:3])
+
+        if not coords:
+            raise ValueError("No coordinates found in the file")
+
+        self.n_atoms = len(coords)
+
+        xyz = np.zeros((self.n_atoms, 3), dtype=np.float32)
+
+        for i, (x, y, z) in enumerate(coords):
+            xyz[i, :] = x, y, z
+
+        # for CIF: multiply fractional coordinates by unitcell lengths
+        xyz = fractional(xyz)
+
+        ts = self.ts = base.Timestep.from_coordinates(xyz, **self._ts_kwargs)
+        ts.frame = 0
+        ts.dimensions = unitcell
+
+        if self.convert_units:
+            # in-place !
+            self.convert_pos_from_native(self.ts._pos)
+            if self.ts.dimensions is not None:
+                self.convert_pos_from_native(self.ts.dimensions[:3])
+
+        return ts
+
+
diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py
@@ -791,3 +791,4 @@ class can choose an appropriate reader automatically.
 from . import NAMDBIN
 from . import FHIAIMS
 from . import TNG
+from . import PDBx
diff --git a/package/MDAnalysis/topology/PDBxParser.py b/package/MDAnalysis/topology/PDBxParser.py
@@ -0,0 +1,124 @@
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+# MDAnalysis --- https://www.mdanalysis.org
+# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
+# (see the file AUTHORS for the full list of names)
+#
+# Released under the GNU Public Licence, v2 or any higher version
+#
+# Please cite your use of MDAnalysis in published work:
+#
+# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
+# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
+# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
+# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
+# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
+# doi: 10.25080/majora-629e541a-00e
+#
+# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
+# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
+# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
+#
+"""
+PDBx topology parser
+====================
+
+
+See Also
+--------
+:class:`MDAnalysis.coordinates.PDBx`
+
+"""
+import gemmi
+import numpy as np
+
+from .base import TopologyReaderBase, change_squash
+from ..core.topology import Topology
+from ..core.topologyattrs import (
+    Atomnames,
+    Atomids,
+    AltLocs,
+    Elements,
+    ICodes,
+    RecordTypes,
+    Resids,
+    Resnames,
+    Segids,
+)
+
+
+class PDBxParser(TopologyReaderBase):
+    """Read a Topology from a PDBx file
+
+    Creates the following attributes from these "_atom_site" PDBx loop entries
+    - "group_PDB" RecordType
+    - "id" AtomId
+    - "label_alt_id" AltLoc
+    - "label_type_symbol" Element
+    - "label_atom_id" AtomName
+    - "auth_seq_id" Resid
+    - "auth_comp_id" Resname
+    - "pdbx_PDB_ins_code" ICode
+    - "auth_asym_id" ChainID
+    """
+    format = ['PDBx', 'cif']
+
+    def parse(self, **kwargs) -> Topology:
+        doc = gemmi.cif.read(self.filename)
+        block = doc.sole_block()
+
+        attrs = []
+
+        def objarr(x):
+            return np.array(x, dtype=object)
+
+        # hierarchy correspondence:
+        # seq_id -> residues
+        # entity_id -> chains
+        if recordtypes := block.find('_atom_site.', ['group_PDB']):
+            attrs.append(RecordTypes(recordtypes))
+        ids = block.find_loop('_atom_site.id')
+        n_atoms = len(ids)
+        attrs.append(Atomids(ids))
+        if altlocs := block.find_loop('_atom_site.label_alt_id'):
+            altlocs = np.array(altlocs, dtype=object)
+            altlocs[altlocs == '.'] = ''
+            attrs.append(AltLocs(altlocs))
+        if elements_loop := block.find_loop('_atom_site.type_symbol'):
+            attrs.append(Elements(objarr(elements_loop)))
+        if names_loop := block.find_loop('_atom_site.label_atom_id'):
+            attrs.append(Atomnames(objarr(names_loop)))
+
+        # sort out residues/segments
+        # label_seq_id seems to not cover entire model unlike author versions
+        resids = np.array(block.find_loop('_entity_poly_seq.num'))
+        resnames = np.array(block.find_loop('_entity_poly_seq.mon_id'))
+        icodes = np.array(block.find_loop('_atom_site.pdbx_PDB_ins_code'))
+        chainids = np.array(block.find_loop('_atom_site.auth_asym_id'))
+
+        try:
+            residx, (resids, icodes, resnames, chainids) = change_squash(
+            (resids, icodes), (resids, icodes, resnames, chainids)
+        )
+            segidx, (chainids,) = change_squash((chainids,), (chainids,))
+        except IndexError:
+            ...
+        attrs.extend((
+            Resids(resids),
+            Resnames(objarr(resnames)),
+            ICodes(objarr(icodes)),
+            Segids(chainids),
+        ))
+
+        n_residues = len(resids)
+        n_segments = len(chainids)
+
+        return Topology(
+            n_atoms=n_atoms,
+            n_res=n_residues,
+            n_seg=n_segments,
+            attrs=attrs,
+            atom_resindex=residx,
+            residue_segindex=segidx,
+        )
diff --git a/package/MDAnalysis/topology/__init__.py b/package/MDAnalysis/topology/__init__.py
@@ -333,3 +333,4 @@
 from . import MinimalParser
 from . import ITPParser
 from . import FHIAIMSParser
+from . import PDBxParser
diff --git a/package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst b/package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst
@@ -0,0 +1,2 @@
+.. automodule:: MDAnalysis.coordinates.PDBx
+   :members:
diff --git a/testsuite/MDAnalysisTests/coordinates/test_cif.py b/testsuite/MDAnalysisTests/coordinates/test_cif.py
@@ -0,0 +1,8 @@
+import pytest 
+from MDAnalysisTests.datafiles import PDBX, CIF, MMCIF
+
+
+
+def test_pdbx()
+
+def test_
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		.. automodule:: MDAnalysis.coordinates.PDBx
		:members: