Source code for parmed.amber.offlib

"""
Tool for parsing and writing OFF library files to and from dictionaries of
ResidueTemplate objects
"""
from __future__ import print_function

from collections import OrderedDict
from contextlib import closing
import numpy as np
from ..topologyobjects import Atom
from ..constants import RAD_TO_DEG
from ..exceptions import AmberWarning
from ..formats.registry import FileFormatType
from ..modeller.residue import ResidueTemplate, ResidueTemplateContainer
from ..modeller.residue import PROTEIN, NUCLEIC, SOLVENT, UNKNOWN
from .. import periodic_table as pt
from ..utils.io import genopen
from ..utils.six import string_types, add_metaclass
from ..utils.six.moves import range
import re
import warnings

[docs]@add_metaclass(FileFormatType)
class AmberOFFLibrary(object):
    """
    Class containing static methods responsible for parsing and writing OFF
    libraries
    """
    #===================================================

    # Useful regexes
    _headerre = re.compile(r'!!index *array *str')
    _resre = re.compile(r'\s*"(\S*?)"\s*$')
    _sec1re = re.compile(r'!entry\.(\S*?)\.unit\.atoms *table *str *name *str'
                         r' *type *int *typex *int *resx *int *flags *int'
                         r' *seq *int *elmnt *dbl *chg')
    _sec2re = re.compile(r'!entry\.(\S*?)\.unit\.atomspertinfo *table *str'
                         r' *pname *str *ptype *int *ptypex *int *pelmnt'
                         r' *dbl *pchg')
    _sec3re = re.compile(r'!entry\.(\S*?)\.unit\.boundbox *array *dbl')
    _sec4re = re.compile(r'!entry\.(\S*?)\.unit\.childsequence *single *int')
    _sec5re = re.compile(r'!entry\.(\S*?)\.unit\.connect *array *int')
    _sec6re = re.compile(r'!entry\.(\S*?)\.unit\.connectivity *table *int'
                         r' *atom1x *int *atom2x *int *flags')
    _sec7re = re.compile(r'!entry\.(\S*?)\.unit\.hierarchy *table *str'
                         r' *abovetype *int *abovex *str *belowtype *int'
                         r' *belowx')
    _sec8re = re.compile(r'!entry\.(\S*?)\.unit\.name *single *str')
    _sec9re = re.compile(r'!entry\.(\S*?)\.unit\.positions *table *dbl *x'
                         r' *dbl *y *dbl *z')
    _sec10re = re.compile(r'!entry\.(\S*?)\.unit\.residueconnect *table'
                          r' *int *c1x *int *c2x *int *c3x *int *c4x *int'
                          r' *c5x *int *c6x')
    _sec11re = re.compile(r'!entry\.(\S*?)\.unit\.residues *table *str *name'
                          r' *int *seq *int *childseq *int *startatomx *str'
                          r' *restype *int *imagingx')
    _sec12re = re.compile(r'!entry\.(\S*?)\.unit\.residuesPdbSequenceNumber'
                          r' *array *int')
    _sec13re = re.compile(r'!entry\.(\S*?)\.unit\.solventcap *array *dbl')
    _sec14re = re.compile(r'!entry\.(\S*?)\.unit\.velocities *table *dbl *x'
                          r' *dbl *y *dbl *z')

    #===================================================

[docs]    @staticmethod
    def id_format(filename):
        """ Sees if an open file is an OFF library file.

        Parameters
        ----------
        filename : str
            The name of the file to see if it is an OFF file format

        Returns
        -------
        is_fmt : bool
            True if it is recognized as OFF, False otherwise
        """
        with closing(genopen(filename, 'r')) as f:
            if AmberOFFLibrary._headerre.match(f.readline()):
                return True
            return False

    #===================================================

[docs]    @staticmethod
    def parse(filename):
        """ Parses an Amber OFF library

        Parameters
        ----------
        filename : str or file-like iterable
            The file name or file object to parse. If it is an iterable, it will
            be exhausted

        Returns
        -------
        residues : OrderedDict {str : :class:`ResidueTemplate`}
            Dictionary pairing residue names with their :class:`ResidueTemplate`
            objects

        Raises
        ------
        ValueError if the first line does not match the file format. This line
        will be consumed

        IOError if filename is the name of a file that does not exist

        RuntimeError if EOF is reached prematurely or other formatting issues
        found
        """
        if isinstance(filename, string_types):
            fileobj = genopen(filename, 'r')
            own_handle = True
        else:
            fileobj = filename
            own_handle = False
        # Now parse the library file
        line = fileobj.readline()
        if not AmberOFFLibrary._headerre.match(line):
            raise ValueError('Unrecognized OFF file format')
        # Build the return value
        residues = OrderedDict()
        # Pull a list of all the residues we expect to find
        line = fileobj.readline()
        rematch = AmberOFFLibrary._resre.match(line)
        while rematch and line:
            name = rematch.groups()[0]
            residues[name] = None
            line = fileobj.readline()
            rematch = AmberOFFLibrary._resre.match(line)
        if not line:
            raise RuntimeError('Unexpected EOF in Amber OFF library')
        # Now make sure we have the next expected line
        while line:
            if not line.strip():
                line = fileobj.readline()
                continue
            rematch = AmberOFFLibrary._sec1re.match(line)
            if not rematch:
                raise RuntimeError('Expected atoms table not found')
            name = rematch.groups()[0]
            residues[name] = AmberOFFLibrary._parse_residue(fileobj, name)
            line = fileobj.readline()

        if own_handle: fileobj.close()

        return residues

    #===================================================

    @staticmethod
    def _parse_residue(fileobj, name):
        """
        Parses the residue information out of the OFF file assuming the file
        is pointed at the first line of an atoms table section of the OFF file

        Parameters
        ----------
        fileobj : file-like
            Assumed to be open for read, this file is parsed until the *next*
            atom table is read
        name : str
            The name of the residue being processed right now
        """
        container = ResidueTemplateContainer(name)
        nres = 1
        templ = ResidueTemplate(name)
        line = fileobj.readline()
        while line[0] != '!':
            nam, typ, typx, resx, flags, seq, elmnt, chg = line.split()
            nam = _strip_enveloping_quotes(nam)
            typ = _strip_enveloping_quotes(typ)
            typx = int(typx)
            resx = int(resx)
            flags = int(flags)
            seq = int(seq)
            elmnt = int(elmnt)
            chg = float(chg)
            atom = Atom(atomic_number=elmnt, type=typ, name=nam, charge=chg)
            if resx == nres + 1:
                container.append(templ)
                nres += 1
                templ = ResidueTemplate(name)
            templ.add_atom(atom)
            line = fileobj.readline()
            # Skip blank lines
            while line and not line.strip():
                line = fileobj.readline()
        container.append(templ)
        if nres > 1:
            start_atoms = []
            runsum = 0
            for res in container:
                start_atoms.append(runsum)
                runsum += len(res)
        # Make sure we get the next section
        rematch = AmberOFFLibrary._sec2re.match(line)
        if not rematch:
            raise RuntimeError('Expected pertinfo table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        line = fileobj.readline()
        while line[0] != '!':
            if not line:
                raise RuntimeError('Unexpected EOF in Amber OFF library')
            # Not used, just skip
            # TODO sanity check
            line = fileobj.readline()
        rematch = AmberOFFLibrary._sec3re.match(line)
        if not rematch:
            raise RuntimeError('Expected boundbox table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        # Only 5 lines
        try:
            hasbox = float(fileobj.readline().strip())
            angle = float(fileobj.readline().strip())
            a = float(fileobj.readline().strip())
            b = float(fileobj.readline().strip())
            c = float(fileobj.readline().strip())
        except ValueError:
            raise RuntimeError('Error processing boundbox table entries')
        else:
            if hasbox > 0:
                if angle < 3.15:
                    # No box is this acute -- must be in radians
                    angle *= RAD_TO_DEG
                container.box = [a, b, c, angle, angle, angle]
        # Get the child sequence entry
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec4re.match(line)
        if not rematch:
            raise RuntimeError('Expected childsequence table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        n = int(fileobj.readline().strip())
        if nres + 1 != n:
            warnings.warn('Unexpected childsequence (%d); expected %d for '
                          'residue %s' % (n, nres+1, name), AmberWarning)
        elif not isinstance(templ, ResidueTemplate) and n != len(templ) + 1:
            raise RuntimeError('child sequence must be 1 greater than the '
                               'number of residues in the unit')
        # Get the CONNECT array to set head and tail
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec5re.match(line)
        if not rematch:
            raise RuntimeError('Expected connect array not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        try:
            head = int(fileobj.readline().strip())
            tail = int(fileobj.readline().strip())
        except ValueError:
            raise RuntimeError('Error processing connect table entries')
        if head > 0 and nres == 1:
            templ.head = templ[head-1]
        elif head > 0 and nres > 1:
            if head < sum((len(r) for r in container)):
                raise RuntimeError('HEAD on multi-residue unit not supported')
        if tail > 0 and nres == 1:
            templ.tail = templ[tail-1]
        elif tail > 0 and nres > 1:
            if tail < sum((len(r) for r in container)):
                warnings.warn('TAIL on multi-residue unit not supported (%s). '
                              'Ignored...' % name, AmberWarning)
        # Get the connectivity array to set bonds
        line = fileobj.readline()
        if len(templ.atoms) > 1:
            rematch = AmberOFFLibrary._sec6re.match(line)
            if not rematch:
                raise RuntimeError('Expected connectivity table not found')
            elif rematch.groups()[0] != name:
                raise RuntimeError('Found residue %s while processing residue %s' %
                                   (rematch.groups()[0], name))
            line = fileobj.readline()
            while line[0] != '!':
                i, j, flag = line.split()
                line = fileobj.readline()
                if nres > 1:
                    # Find which residue we belong in
                    i = int(i) - 1
                    j = int(j) - 1
                    for ii, idx in enumerate(start_atoms):
                        if idx > i:
                            ii -= 1
                            break
                    start_idx = start_atoms[ii]
                    container[ii].add_bond(i-start_idx, j-start_idx)
                else:
                    templ.add_bond(int(i)-1, int(j)-1)
        # Get the hierarchy table
        rematch = AmberOFFLibrary._sec7re.match(line)
        if not rematch:
            raise RuntimeError('Expected hierarchy table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        line = fileobj.readline()
        while line[0] != '!':
            # Skip this section... not used
            # TODO turn this into a sanity check
            line = fileobj.readline()
        # Get the unit name
        rematch = AmberOFFLibrary._sec8re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit name string not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        fileobj.readline() # Skip this... not used
        line = fileobj.readline()
        # Get the atomic positions
        rematch = AmberOFFLibrary._sec9re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit positions table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for res in container:
            for atom in res:
                x, y, z = fileobj.readline().split()
                atom.xx, atom.xy, atom.xz = float(x), float(y), float(z)
        line = fileobj.readline()
        # Get the residueconnect table
        rematch = AmberOFFLibrary._sec10re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit residueconnect table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for i in range(nres):
            c1,c2,c3,c4,c5,c6 = (int(x) for x in fileobj.readline().split())
            if (c1 > 0 and templ.head is not None and
                    templ.head is not templ[c1-1]):
                raise RuntimeError('HEAD atom is not connect0')
            if (c2 > 0 and templ.tail is not None and
                    templ.tail is not templ[c2-1]):
                raise RuntimeError('TAIL atom is not connect1')
            for i in (c3, c4, c5, c6):
                if i == 0: continue
                templ.connections.append(templ[i-1])
        # Get the residues table
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec11re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit residues table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for i in range(nres):
            resname, id, next, start, typ, img = fileobj.readline().split()
            resname = _strip_enveloping_quotes(resname)
            id = int(id)
            start = int(start)
            next = int(next)
            typ = _strip_enveloping_quotes(typ)
            img = int(img)
            if next - start != len(container[i]):
                warnings.warn('residue table predicted %d, not %d atoms for '
                              'residue %s' % (next-start, len(container[i]),
                              name), AmberWarning)
            if typ == 'p':
                container[i].type = PROTEIN
            elif typ == 'n':
                container[i].type = NUCLEIC
            elif typ == 'w':
                container[i].type = SOLVENT
            elif typ != '?':
                warnings.warn('Unknown residue type "%s"' % typ, AmberWarning)
            if nres > 1:
                container[i].name = resname
        # Get the residues sequence table
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec12re.match(line)
        if not rematch:
            raise RuntimeError('Expected residue sequence number not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for i in range(nres):
            #TODO sanity check
            fileobj.readline()
        line = fileobj.readline()
        # Get the solventcap array
        rematch = AmberOFFLibrary._sec13re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit solventcap array not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        # Ignore the solvent cap
        fileobj.readline()
        fileobj.readline()
        fileobj.readline()
        fileobj.readline()
        fileobj.readline()
        # Velocities
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec14re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit solventcap array not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for res in container:
            for atom in res:
                vx, vy, vz = (float(x) for x in fileobj.readline().split())
                atom.vx, atom.vy, atom.vz = vx, vy, vz

        if nres > 1:
            return container
        return templ

    #===================================================

[docs]    @staticmethod
    def write(lib, dest):
        """ Writes a dictionary of ResidueTemplate units to a file in OFF format

        Parameters
        ----------
        lib : dict {str : :class:`ResidueTemplate`}
            Items can be either :class:`ResidueTemplate` or
            :class:`ResidueTemplateContainer` instances
        dest : str or file-like
            Either a file name or a file-like object to write the file to
        """
        own_handle = False
        if not hasattr(dest, 'write'):
            dest = genopen(dest, 'w')
            own_handle = True
        # Write the residues in alphabetical order
        names = sorted(lib.keys())
        dest.write('!!index array str\n')
        for name in names:
            dest.write(' "%s"\n' % name)
        for name in names:
            AmberOFFLibrary._write_residue(dest, lib[name])

        if own_handle: dest.close()

    #===================================================

    @staticmethod
    def _write_residue(dest, res):
        """ Writes a residue to an open file handle

        Parameters
        ----------
        dest : file-like
            File object to write the residue information to
        res : :class:`ResidueTemplate` or :class:`ResidueTemplateContainer`
            The residue template (or template container) to write to the file
        """
        if isinstance(res, ResidueTemplate):
            # Put it into a template container with the same name
            tmp = ResidueTemplateContainer(res.name)
            tmp.append(res)
            res = tmp
        dest.write('!entry.%s.unit.atoms table  str name  str type  int typex  '
                   'int resx  int flags  int seq  int elmnt  dbl chg\n' %
                   res.name)
        for i, r in enumerate(res):
            for atom in r:
                dest.write(' "%s" "%s" 0 %d 131072 %d %d %.6f\n' % (atom.name,
                           atom.type, i+1, atom.idx+1, atom.atomic_number,
                           atom.charge))
        dest.write('!entry.%s.unit.atomspertinfo table  str pname  str ptype  '
                   'int ptypex  int pelmnt  dbl pchg\n' % res.name)
        for r in res:
            for atom in r:
                dest.write(' "%s" "%s" 0 -1 0.0\n' % (atom.name, atom.type))
        dest.write('!entry.%s.unit.boundbox array dbl\n' % res.name)
        if res.box is None:
            dest.write((' -1.000000\n' + ' 0.0\n' * 4))
        else:
            dest.write(' 1.000000\n')
            if res.box[3] == res.box[4] == res.box[5]:
                dest.write(' %f\n' % res.box[3])
            else:
                raise RuntimeError('Cannot write boxes with different angles')
            dest.write(' %f\n' % res.box[0])
            dest.write(' %f\n' % res.box[1])
            dest.write(' %f\n' % res.box[2])
        dest.write('!entry.%s.unit.childsequence single int\n %d\n' %
                   (res.name, len(res)+1))
        dest.write('!entry.%s.unit.connect array int\n' % res.name)
        if len(res) > 1:
            dest.write(' 0\n 0\n')
        else:
            if res[0].head is not None:
                dest.write(' %d\n' % (res[0].head.idx + 1))
            else:
                dest.write(' 0\n')
            if res[0].tail is not None:
                dest.write(' %d\n' % (res[0].tail.idx + 1))
            else:
                dest.write(' 0\n')
        if any(len(r) > 1 for r in res):
            dest.write('!entry.%s.unit.connectivity table  int atom1x  '
                       'int atom2x  int flags\n' % res.name)
            base = 1
            for r in res:
                for bond in r.bonds:
                    dest.write(' %d %d 1\n' % (bond.atom1.idx+base,
                                               bond.atom2.idx+base))
                base += len(r)
        dest.write('!entry.%s.unit.hierarchy table  str abovetype  int '
                   'abovex  str belowtype  int belowx\n' % res.name)
        c = 1
        for i, r in enumerate(res):
            dest.write(' "U" 0 "R" %d\n' % (i+1))
            for atom in r:
                dest.write(' "R" %d "A" %d\n' % (i+1, c))
                c += 1
        dest.write('!entry.%s.unit.name single str\n' % res.name)
        dest.write(' "%s"\n' % res.name)
        dest.write('!entry.%s.unit.positions table  dbl x  dbl y  dbl z\n' %
                   res.name)
        for r in res:
            for atom in r:
                dest.write(' %.6g %.6g %.6g\n' % (atom.xx, atom.xy, atom.xz))
        dest.write('!entry.%s.unit.residueconnect table  int c1x  int c2x  '
                   'int c3x  int c4x  int c5x  int c6x\n' % res.name)
        c = 1
        for r in res:
            # Make the CONECT1 and 0 default to first and last atom so that the
            # TREE gets set correctly by tleap. Not used for anything else...
            conn = [c, c+len(r)-1, 0, 0, 0, 0]
            if r.head is not None: conn[0] = r.head.idx + 1
            if r.tail is not None: conn[1] = r.tail.idx + 1
            for i, at in enumerate(r.connections[:4]):
                conn[i+2] = at.idx + 1
            dest.write(' %d %d %d %d %d %d\n' % tuple(conn))
            c += len(r)
        dest.write('!entry.%s.unit.residues table  str name  int seq  int '
                   'childseq  int startatomx  str restype  int imagingx\n' %
                   res.name)
        c = 1
        for i, r in enumerate(res):
            if r.type is PROTEIN:
                typ = 'p'
            elif r.type is NUCLEIC:
                typ = 'n'
            elif r.type is SOLVENT:
                typ='w'
            elif r.type is UNKNOWN:
                typ='?'
            else:
                warnings.warn('Unrecognized residue type %r' % r.type,
                              AmberWarning)
                typ = '?'
            dest.write(' "%s" %d %d %d "%s" %d\n' % (r.name, i+1, 1+len(r), c,
                       typ, _imaging_atom(r)+c))
            c += len(r)
        dest.write('!entry.%s.unit.residuesPdbSequenceNumber array int\n' %
                   res.name)
        for i, r in enumerate(res):
            if len(res) == 1:
                dest.write(' 0\n')
            else:
                dest.write(' %d\n' % (i+1))
        dest.write('!entry.%s.unit.solventcap array dbl\n' % res.name)
        dest.write(' -1.000000\n' + ' 0.0\n' * 4)
        dest.write('!entry.%s.unit.velocities table  dbl x  dbl y  dbl z\n' %
                   res.name)
        for r in res:
            for atom in r:
                try:
                    s = ' %g %g %g\n' % (atom.vx, atom.vy, atom.vz)
                except AttributeError:
                    dest.write(' 0.0 0.0 0.0\n')
                else:
                    dest.write(s)

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Helper routines
def _strip_enveloping_quotes(inp):
    """ Strips the quotation marks enveloping a string """
    if inp[0] == inp[-1] == '"' or inp[0] == inp[-1] == "'":
        return inp[1:-1]
    return inp

def _imaging_atom(res):
    """
    Determines the imaging atom for the residue. If all atoms are hydrogen
    except 1, it is the heavy atom. Otherwise, it is the atom *closest* to the
    COM of the residue
    """
    from parmed.geometry import center_of_mass
    #TODO implement the docstring
    found_heavy = False
    heavy_idx = -1
    for i, atom in enumerate(res):
        if atom.atomic_number > 1:
            heavy_idx = i
            if found_heavy: break
            found_heavy = True
    else:
        if heavy_idx != -1:
            return heavy_idx
        return 0 # No heavy atoms?? No imaging atom, then.
    coords = res.coordinates.reshape((len(res), 3))
    masses = np.zeros(len(res))
    for i, atom in enumerate(res):
        if atom.mass == 0:
            masses[i] = pt.Mass[pt.Element[atom.atomic_number]]
        else:
            masses[i] = atom.mass
    com = center_of_mass(coords, masses)
    diff = coords - com
    return np.argmin((diff * diff).sum(axis=1))