Source code for parmed.formats.pqr

"""
This module contains classes for reading and writing PQR files
"""
from __future__ import absolute_import, print_function, division

from contextlib import closing
import numpy as np
from parmed.exceptions import PDBError, PDBWarning
from parmed.formats.registry import FileFormatType
from parmed.formats.pdb import _standardize_resname, PDBFile, _is_hetatm
from parmed.periodic_table import AtomicNum, Mass, Element, element_by_name
from parmed.structure import Structure
from parmed.topologyobjects import Atom, ExtraPoint
from parmed.utils.io import genopen
from parmed.utils.six import string_types, add_metaclass
from parmed.utils.six.moves import range
import warnings

[docs]@add_metaclass(FileFormatType)
class PQRFile(object):
    """ Standard PDB file format parser and writer """
    #===================================================

[docs]    @staticmethod
    def id_format(filename):
        """ Identifies the file type as a PDB file

        Parameters
        ----------
        filename : str
            Name of the file to check format for

        Returns
        -------
        is_fmt : bool
            True if it is a PQR file
        """
        with closing(genopen(filename, 'r')) as f:
            for line in f:
                words = line.split()
                if not words:
                    continue
                elif words[0] in ('CRYST1', 'END', 'END', 'HEADER', 'NUMMDL',
                        'MASTER', 'AUTHOR', 'CAVEAT', 'COMPND', 'EXPDTA',
                        'MDLTYP', 'KEYWDS', 'OBSLTE', 'SOURCE', 'SPLIT',
                        'SPRSDE', 'TITLE ', 'ANISOU', 'CISPEP', 'CONECT',
                        'DBREF ', 'HELIX ', 'HET', 'LINK', 'MODRES',
                        'REVDAT', 'SEQADV', 'SHEET', 'SSBOND', 'FORMUL',
                        'HETNAM', 'HETSYN', 'SEQRES', 'SITE', 'ENDMDL', 'MODEL',
                        'JRNL', 'REMARK', 'TER', 'USER'):
                    continue
                elif line[:5] in ('ORIGX', 'SCALE', 'MTRIX'):
                    if line[5] not in '123':
                        return False
                elif words[0] in ('ATOM', 'HETATM'):
                    # Format is:
                    # rec atnum atname resname [chain] resnum x y z chg radius
                    # Where the chain ID is optional. rec must be ATOM or HETATM
                    if len(words) < 10:
                        return False
                    elif PDBFile.id_format(filename):
                        return False # It is a PDB file

                    if len(words) == 10:
                        offset = 0
                    elif len(words) >= 11:
                        offset = 1
                        try:
                            float(words[10])
                        except ValueError:
                            offset = 0
                    if not words[1].isdigit(): return False
                    if words[2].isdigit(): return False
                    if words[3].isdigit(): return False
                    if not words[4+offset].isdigit(): return False
                    try:
                        float(words[5+offset])
                        float(words[6+offset])
                        float(words[7+offset])
                        float(words[8+offset])
                        float(words[9+offset])
                    except ValueError:
                        return False
                    return True
                else:
                    return False
            return False

    #===================================================

[docs]    @staticmethod
    def parse(filename, skip_bonds=True):
        """ Read a PQR file and return a populated `Structure` class

        Parameters
        ----------
        filename : str or file-like
            Name of the PQR file to read, or a file-like object that can iterate
            over the lines of a PQR. Compressed file names can be specified and
            are determined by file-name extension (e.g., file.pqr.gz,
            file.pqr.bz2)
        skip_bonds : bool, optional
            If True, skip trying to assign bonds. This can save substantial time
            when parsing large files with non-standard residue names. However,
            no bonds are assigned. This is OK if, for instance, the PQR file is
            being parsed simply for its coordinates. Default is False.

        Returns
        -------
        structure : :class:`Structure`
            The Structure object initialized with all of the information from
            the PDB file.  No bonds or other topological features are added by
            default.
        """
        if isinstance(filename, string_types):
            own_handle = True
            fileobj = genopen(filename, 'r')
        else:
            own_handle = False
            fileobj = filename

        struct = Structure()
        # Add metadata fields
        modelno = 1 # For PDB files with multiple MODELs
        atomno = 0
        coordinates = []
        all_coordinates = []

        # Support hexadecimal numbering like that printed by VMD
        try:
            for line in fileobj:
                words = line.split()
                if words[0] in ('ATOM', 'HETATM'):
                    atomno += 1
                    if len(words) == 10:
                        _, num, nam, res, resn, x, y, z, chg, rad = words
                        chn = ''
                    elif len(words) >= 11:
                        _, num, nam, res, chn, resn, x, y, z, chg, rad = (
                                words[i] for i in range(11))
                        # If the radius is not a float (but rather a letter,
                        # like the element or something), then the chain might
                        # be missing. In this case, shift all tokens "back" one
                        # and empty the chn string
                        try:
                            float(rad)
                        except ValueError:
                            resn, x, y, z, chg, rad = chn, resn, x, y, z, chg
                    else:
                        raise ValueError('Illegal PQR record format: expected '
                                         '10 or 11 tokens on the atom line')
                    x, y, z = float(x), float(y), float(z)
                    chg, rad = float(chg), float(rad)
                    resn, num = int(resn), int(num)
                    elem = element_by_name(nam) # Yuck
                    atomic_number = AtomicNum[elem]
                    mass = Mass[elem]
                    if nam in ('EP', 'LP'): # lone pair
                        atom = ExtraPoint(atomic_number=atomic_number, name=nam,
                                          charge=chg, mass=mass, number=num,
                                          solvent_radius=rad)
                    else:
                        atom = Atom(atomic_number=atomic_number, name=nam,
                                    charge=chg, mass=mass, number=num,
                                    solvent_radius=rad)
                    atom.xx, atom.xy, atom.xz = float(x), float(y), float(z)
                    if modelno == 1:
                        struct.add_atom(atom, res, resn, chn)
                    else:
                        try:
                            orig_atom = struct.atoms[atomno-1]
                        except IndexError:
                            raise PDBError('Extra atom in MODEL %d' % modelno)
                        if (orig_atom.residue.name != res.strip()
                                or orig_atom.name != nam.strip()):
                            raise PDBError('Atom %d differs in MODEL %d [%s %s '
                                           'vs. %s %s]' % (atomno, modelno,
                                           orig_atom.residue.name,
                                           orig_atom.name, res, nam))
                    coordinates.extend([atom.xx, atom.xy, atom.xz])
                elif words[0] == 'ENDMDL':
                    # End the current model
                    if len(struct.atoms) == 0:
                        raise PDBError('MODEL ended before any atoms read in')
                    modelno += 1
                    if len(struct.atoms)*3 != len(coordinates):
                        raise PDBError(
                                'Inconsistent atom numbers in some PDB models')
                    all_coordinates.append(coordinates)
                    atomno = 0
                    coordinates = []
                elif words[0] == 'MODEL':
                    if modelno == 1 and len(struct.atoms) == 0: continue
                    if len(coordinates) > 0:
                        if len(struct.atoms)*3 != len(coordinates):
                            raise PDBError('Inconsistent atom numbers in '
                                           'some PDB models')
                        warnings.warn('MODEL not explicitly ended', PDBWarning)
                        all_coordinates.append(coordinates)
                        coordinates = []
                    modelno += 1
                    atomno = 0
                elif words[0] == 'CRYST1':
                    a, b, c = (float(w) for w in words[1:4])
                    try:
                        A, B, C = (float(w) for w in words[4:7])
                    except ValueError:
                        A = B = C = 90.0
                    struct.box = [a, b, c, A, B, C]
        finally:
            if own_handle: fileobj.close()

        struct.unchange()
        if not skip_bonds:
            struct.assign_bonds()
        if coordinates:
            if len(coordinates) != 3*len(struct.atoms):
                raise PDBError('bad number of atoms in some PQR models')
            all_coordinates.append(coordinates)
        struct._coordinates = np.array(all_coordinates).reshape(
                        (-1, len(struct.atoms), 3))
        return struct

    #===================================================

[docs]    @staticmethod
    def write(struct, dest, renumber=True, coordinates=None,
              standard_resnames=False):
        """ Write a PDB file from a Structure instance

        Parameters
        ----------
        struct : :class:`Structure`
            The structure from which to write the PDB file
        dest : str or file-like
            Either a file name or a file-like object containing a `write`
            method to which to write the PDB file. If it is a filename that
            ends with .gz or .bz2, a compressed version will be written using
            either gzip or bzip2, respectively.
        renumber : bool, optional
            If True, renumber the atoms and residues sequentially as they are
            stored in the structure.  If False, use the original numbering if
            it was assigned previously. Default is True
        coordinates : array-like of float, optional
            If provided, these coordinates will be written to the PDB file
            instead of the coordinates stored in the structure. These
            coordinates should line up with the atom order in the structure
            (not necessarily the order of the "original" PDB file if they
            differ)
        standard_resnames : bool, optional
            If True, common aliases for various amino and nucleic acid residues
            will be converted into the PDB-standard values. Default is False
        """
        own_handle = False
        if not hasattr(dest, 'write'):
            dest = genopen(dest, 'w')
            own_handle = True
        atomrec = ('ATOM  %5d %-3s  %-3s %1s %3d    %7.3f %7.3f %7.3f %8.4f '
                   '%8.4f\n')
        hetatomrec = atomrec.replace('ATOM  ', 'HETATM')
        if struct.box is not None:
            dest.write('CRYST1 %8.3f %8.3f %8.3f %6.2f %6.2f %6.2f\n' % (
                    struct.box[0], struct.box[1], struct.box[2], struct.box[3],
                    struct.box[4], struct.box[5]))
        if coordinates is not None:
            coords = np.array(coordinates, copy=False, subok=True)
            try:
                coords = coords.reshape((-1, len(struct.atoms), 3))
            except ValueError:
                raise TypeError("Coordinates has unexpected shape")
        else:
            coords = struct.get_coordinates('all')
        # Create a function to process each atom and return which one we want
        # to print, based on our alternate location choice
        if standard_resnames:
            standardize = lambda x: _standardize_resname(x)
        else:
            standardize = lambda x: (x, _is_hetatm(x))
        last_number = 0
        last_rnumber = 0
        for model, coord in enumerate(coords):
            if coords.shape[0] > 1:
                dest.write('MODEL      %5d\n' % (model+1))
            for res in struct.residues:
                if renumber:
                    atoms = res.atoms
                else:
                    atoms = sorted(res.atoms, key=lambda atom: atom.number)
                for atom in atoms:
                    # Figure out the serial numbers we want to print
                    if renumber:
                        anum = (atom.idx + 1)
                        rnum = (res.idx + 1)
                    else:
                        anum = (atom.number or last_number + 1)
                        rnum = (atom.residue.number or last_rnumber + 1)
                    last_number = anum
                    last_rnumber = rnum
                    # Do any necessary name munging to respect the PDB spec
                    if (len(atom.name) < 4 and
                            len(Element[atom.atomic_number]) != 2):
                        aname = ' %-3s' % atom.name
                    else:
                        aname = atom.name
                    xyz = coord[atom.idx]
                    resname, hetatm = standardize(res.name)
                    if hetatm:
                        rec = hetatomrec
                    else:
                        rec = atomrec
                    dest.write(rec % (anum, aname, resname, res.chain, rnum,
                                      xyz[0], xyz[1], xyz[2], atom.charge,
                                      atom.solvent_radius))
            if coords.shape[0] > 1:
                dest.write('ENDMDL\n')

        dest.write("%-80s\n" % "END")
        if own_handle:
            dest.close()