Source code for parmed.charmm.psf

"""
Provides a Python class for parsing a PSF file and setting up a system
structure for it

Author: Jason M. Swails
Contributors:
Date: April 20, 2014
"""
from __future__ import division

from copy import copy as _copy
from ..topologyobjects import (Bond, Angle, Dihedral, Improper, AcceptorDonor, Group, Cmap,
                               UreyBradley, NoUreyBradley, Atom, DihedralType, ImproperType,
                               UnassignedAtomType)
from ..exceptions import CharmmError, CharmmWarning, ParameterError
from ..structure import needs_openmm, Structure
from ..utils.io import genopen
from ..utils.six import wraps
from ..utils.six.moves import zip, range
from ..utils.six import string_types
import re
import warnings

def _catchindexerror(func):
    """
    Protects a function from raising an index error, and replace that exception
    with a CharmmError instead
    """
    @wraps(func)
    def newfunc(*args, **kwargs):
        """ Catch the index error """
        try:
            return func(*args, **kwargs)
        except IndexError as e:
            raise CharmmError('Array is too short: %s' % e)

    return newfunc

class _FileEOF(Exception):
    """ For control flow """

class _ZeroDict(dict):
    """
    Contains a dict that returns dummy (zero) arguments when a key is not
    present rather than raising a KeyError.  The return value for non-existent
    items is (0, []). It also special-case sections that have multiple pointers
    to avoid index errors if those are not present in the PSF file
    """
    def __getitem__(self, key):
        try:
            return dict.__getitem__(self, key)
        except KeyError:
            if key.startswith('NGRP'):
                for k in self:
                    if k.startswith('NGRP'):
                        return dict.__getitem__(self, k)
                return [0, 0], []
            elif key.startswith('NUMLP'):
                for k in self:
                    if k.startswith('NUMLP'):
                        return dict.__getitem__(self, k)
                return [0, 0], []
            return 0, []

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

_resre = re.compile(r'(-?\d+)([a-zA-Z]*)')

[docs]class CharmmPsfFile(Structure): """ A chemical :class:`Structure` instantiated from CHARMM files. Parameters ---------- psf_name : str, optional Name of the PSF file (it must exist) Raises ------ IOError : If file ``psf_name`` does not exist CharmmPsfError : If any parsing errors are encountered """ @staticmethod def _convert(string, type, message): """ Converts a string to a specific type, making sure to raise CharmmError with the given message in the event of a failure. Parameters ---------- string : str Input string to process type : type Type of data to convert to (e.g., ``int``) message : str Error message to put in exception if failed """ try: return type(string) except ValueError: raise CharmmError('Could not convert %s [%s]' % (message,string)) #=================================================== @classmethod def _parse_psf_title_line(cls, line): words = line[:line.index('!')].split() title = line[line.index('!')+1:].strip().upper() # Strip out description if ':' in title: title = title[:title.index(':')] if len(words) == 1: pointers = cls._convert(words[0], int, 'pointer') else: pointers = tuple([cls._convert(w, int, 'pointer') for w in words]) return title, pointers #=================================================== @classmethod def _parse_psf_section(cls, psf): """ This method parses a section of the PSF file Parameters ---------- psf : file Open file that is pointing to the first line of the section that is to be parsed Returns ------- title : str The label of the PSF section we are parsing pointers : (int/tuple of ints) If one pointer is set, pointers is simply the integer that is value of that pointer. Otherwise it is a tuple with every pointer value defined in the first line data : list A list of all data in the parsed section converted to integers """ line = psf.readline() while not line.strip(): if not line: raise _FileEOF('Unexpected EOF in PSF file') else: line = psf.readline() if '!' in line: title, pointers = cls._parse_psf_title_line(line) else: raise CharmmError('Could not determine section title') # pragma: no cover line = psf.readline().strip() if not line and title.startswith('NNB'): # This will correctly handle the NNB section (which has a spurious # blank line) as well as any sections that have 0 members. line = psf.readline().strip() # If this line has a title in it, then it's one of those weird PSF files that # has basically no NNB section. Just skip over NNB and take the next section # instead if '!' in line: title, pointers = cls._parse_psf_title_line(line) line = psf.readline().strip() data = [] if title == 'NATOM' or title == 'NTITLE': # Store these two sections as strings (ATOM section we will parse # later). The rest of the sections are integer pointers while line: data.append(line) line = psf.readline().strip() else: while line: words = line.split() data.extend([cls._convert(w, int, 'PSF data') for w in words]) line = psf.readline().strip() return title, pointers, data #===================================================
[docs] @_catchindexerror def __init__(self, psf_name=None): """ Opens and parses a PSF file, then instantiates a CharmmPsfFile instance from the data. """ global _resre Structure.__init__(self) # Bail out if we don't have a filename if psf_name is None: return # Open the PSF and read the first line. It must start with "PSF" if isinstance(psf_name, string_types): fileobj = genopen(psf_name, 'r') own_handle = True else: fileobj = psf_name own_handle = False try: self.name = psf_name if isinstance(psf_name, string_types) else '' line = fileobj.readline() if not line.startswith('PSF'): raise CharmmError('Unrecognized PSF file. First line is %s' % line.strip()) # Store the flags psf_flags = line.split()[1:] # Now get all of the sections and store them in a dict fileobj.readline() # Now get all of the sections psfsections = _ZeroDict() while True: try: sec, ptr, data = CharmmPsfFile._parse_psf_section(fileobj) except _FileEOF: break psfsections[sec] = (ptr, data) # store the title self.title = psfsections['NTITLE'][1] # Next is the number of atoms natom = self._convert(psfsections['NATOM'][0], int, 'natom') # Parse all of the atoms for i in range(natom): words = psfsections['NATOM'][1][i].split() atid = int(words[0]) if atid != i + 1: raise CharmmError('Nonsequential atoms detected!') segid = words[1] rematch = _resre.match(words[2]) if not rematch: raise CharmmError('Could not interpret residue number %s' % # pragma: no cover words[2]) resid, inscode = rematch.groups() resid = self._convert(resid, int, 'residue number') resname = words[3] name = words[4] attype = words[5] # Try to convert the atom type to an integer a la CHARMM try: attype = int(attype) except ValueError: pass charge = self._convert(words[6], float, 'partial charge') mass = self._convert(words[7], float, 'atomic mass') props = words[8:] atom = Atom(name=name, type=attype, charge=charge, mass=mass) atom.props = props self.add_atom(atom, resname, resid, chain=segid, inscode=inscode, segid=segid) # Now get the number of bonds nbond = self._convert(psfsections['NBOND'][0], int, 'number of bonds') if len(psfsections['NBOND'][1]) != nbond * 2: raise CharmmError('Got %d indexes for %d bonds' % # pragma: no cover (len(psfsections['NBOND'][1]), nbond)) it = iter(psfsections['NBOND'][1]) for i, j in zip(it, it): self.bonds.append(Bond(self.atoms[i-1], self.atoms[j-1])) # Now get the number of angles and the angle list ntheta = self._convert(psfsections['NTHETA'][0], int, 'number of angles') if len(psfsections['NTHETA'][1]) != ntheta * 3: raise CharmmError('Got %d indexes for %d angles' % # pragma: no cover (len(psfsections['NTHETA'][1]), ntheta)) it = iter(psfsections['NTHETA'][1]) for i, j, k in zip(it, it, it): self.angles.append( Angle(self.atoms[i-1], self.atoms[j-1], self.atoms[k-1]) ) self.angles[-1].funct = 5 # urey-bradley # Now get the number of torsions and the torsion list nphi = self._convert(psfsections['NPHI'][0], int, 'number of torsions') if len(psfsections['NPHI'][1]) != nphi * 4: raise CharmmError('Got %d indexes for %d torsions' % # pragma: no cover (len(psfsections['NPHI']), nphi)) it = iter(psfsections['NPHI'][1]) for i, j, k, l in zip(it, it, it, it): self.dihedrals.append( Dihedral(self.atoms[i-1], self.atoms[j-1], self.atoms[k-1], self.atoms[l-1]) ) self.dihedrals.split = False # Now get the number of improper torsions nimphi = self._convert(psfsections['NIMPHI'][0], int, 'number of impropers') if len(psfsections['NIMPHI'][1]) != nimphi * 4: raise CharmmError('Got %d indexes for %d impropers' % # pragma: no cover (len(psfsections['NIMPHI'][1]), nimphi)) it = iter(psfsections['NIMPHI'][1]) for i, j, k, l in zip(it, it, it, it): self.impropers.append( Improper(self.atoms[i-1], self.atoms[j-1], self.atoms[k-1], self.atoms[l-1]) ) # Now handle the donors (what is this used for??) ndon = self._convert(psfsections['NDON'][0], int, 'number of donors') if len(psfsections['NDON'][1]) != ndon * 2: raise CharmmError('Got %d indexes for %d donors' % # pragma: no cover (len(psfsections['NDON'][1]), ndon)) it = iter(psfsections['NDON'][1]) for i, j in zip(it, it): self.donors.append( AcceptorDonor(self.atoms[i-1], self.atoms[j-1]) ) # Now handle the acceptors (what is this used for??) nacc = self._convert(psfsections['NACC'][0], int, 'number of acceptors') if len(psfsections['NACC'][1]) != nacc * 2: raise CharmmError('Got %d indexes for %d acceptors' % # pragma: no cover (len(psfsections['NACC'][1]), nacc)) it = iter(psfsections['NACC'][1]) for i, j in zip(it, it): self.acceptors.append( AcceptorDonor(self.atoms[i-1], self.atoms[j-1]) ) # Now get the group sections try: ngrp, nst2 = psfsections['NGRP NST2'][0] except ValueError: # pragma: no cover raise CharmmError('Could not unpack GROUP pointers') # pragma: no cover tmp = psfsections['NGRP NST2'][1] self.groups.nst2 = nst2 # Now handle the groups if len(psfsections['NGRP NST2'][1]) != ngrp * 3: raise CharmmError('Got %d indexes for %d groups' % # pragma: no cover (len(tmp), ngrp)) it = iter(psfsections['NGRP NST2'][1]) for i, j, k in zip(it, it, it): self.groups.append(Group(self.atoms[i], j, k)) # Assign all of the atoms to molecules recursively tmp = psfsections['MOLNT'][1] set_molecules(self.atoms) molecule_list = [a.marked for a in self.atoms] if len(tmp) == len(self.atoms): if molecule_list != tmp: warnings.warn('Detected PSF molecule section that is WRONG. ' 'Resetting molecularity.', CharmmWarning) # We have a CHARMM PSF file; now do NUMLP/NUMLPH sections numlp, numlph = psfsections['NUMLP NUMLPH'][0] if numlp != 0 or numlph != 0: raise NotImplementedError('Cannot currently handle PSFs with ' 'lone pairs defined in the NUMLP/' 'NUMLPH section.') # Now do the CMAPs ncrterm = self._convert(psfsections['NCRTERM'][0], int, 'Number of cross-terms') if len(psfsections['NCRTERM'][1]) != ncrterm * 8: raise CharmmError('Got %d CMAP indexes for %d cmap terms' % # pragma: no cover (len(psfsections['NCRTERM']), ncrterm)) it = iter(psfsections['NCRTERM'][1]) for i, j, k, l, m, n, o, p in zip(it, it, it, it, it, it, it, it): self.cmaps.append( Cmap.extended(self.atoms[i-1], self.atoms[j-1], self.atoms[k-1], self.atoms[l-1], self.atoms[m-1], self.atoms[n-1], self.atoms[o-1], self.atoms[p-1]) ) self.unchange() self.flags = psf_flags finally: if own_handle: fileobj.close()
#===================================================
[docs] @classmethod def from_structure(cls, struct, copy=False): """ Instantiates a CharmmPsfFile from an input Structure instance. This method makes sure all atom types have uppercase-only names Parameters ---------- struct : :class:`parmed.structure.Structure` The input structure to convert to a CharmmPsfFile instance copy : bool, optional If True, a copy of all items are made. Otherwise, the resulting CharmmPsfFile is a shallow copy Returns ------- psf : :class:`CharmmPsfFile` CHARMM PSF file Raises ------ ValueError if the functional form is not recognized or cannot be implemented through the PSF and parameter/stream files Notes ----- If copy is False, the original object may have its atom type names changed if any of them have lower-case letters """ from parmed.charmm.parameters import _typeconv as typeconv if (struct.rb_torsions or struct.trigonal_angles or struct.out_of_plane_bends or struct.pi_torsions or struct.stretch_bends or struct.torsion_torsions or struct.chiral_frames or struct.multipole_frames or struct.nrexcl != 3): raise ValueError('Unsupported functional form for CHARMM PSF') if copy: struct = _copy(struct) psf = cls() psf.atoms = struct.atoms psf.residues = struct.residues psf.bonds = struct.bonds psf.angles = struct.angles psf.urey_bradleys = struct.urey_bradleys psf.dihedrals = struct.dihedrals psf.impropers = struct.impropers psf.acceptors = struct.acceptors psf.donors = struct.donors psf.groups = struct.groups psf.cmaps = struct.cmaps psf.bond_types = struct.bond_types psf.angle_types = struct.angle_types psf.urey_bradley_types = struct.urey_bradley_types psf.dihedral_types = struct.dihedral_types psf.improper_types = struct.improper_types psf.cmap_types = struct.cmap_types for atom in psf.atoms: atom.type = typeconv(atom.type) if atom.atom_type is not UnassignedAtomType: atom.atom_type.name = typeconv(atom.atom_type.name) # If no groups are defined, make each residue its own group if not psf.groups: for residue in psf.residues: chg = sum(a.charge for a in residue) if chg < 1e-4: psf.groups.append(Group(residue[0], 1, 0)) else: psf.groups.append(Group(residue[0], 2, 0)) psf.groups.nst2 = 0 return psf
#=================================================== def __str__(self): return self.name #===================================================
[docs] @needs_openmm def createSystem(self, params=None, *args, **kwargs): """ Creates an OpenMM System object from the CHARMM PSF file. This is a shortcut for calling `load_parameters` followed by Structure.createSystem. If params is not None, `load_parameters` will be called on that parameter set, and Structure.createSystem will be called with the remaining args and kwargs Parameters ---------- params : CharmmParameterSet=None If not None, this parameter set will be loaded See Also -------- :meth:`parmed.structure.Structure.createSystem` In addition to `params`, this method also takes all arguments for :meth:`parmed.structure.Structure.createSystem` """ if params is not None: self.load_parameters(params) return super(CharmmPsfFile, self).createSystem(*args, **kwargs)
#===================================================
[docs] def load_parameters(self, parmset, copy_parameters=True): """ Loads parameters from a parameter set that was loaded via CHARMM RTF, PAR, and STR files. Parameters ---------- parmset : :class:`CharmmParameterSet` List of all parameters copy_parameters : bool, optional, default=True If False, parmset will not be copied. WARNING: ------- Not copying parmset will cause ParameterSet and Structure to share references to types. If you modify the original parameter set, the references in Structure list_types will be silently modified. However, if you change any reference in the parameter set, then that reference will no longer be shared with structure. Example where the reference in ParameterSet is changed. The following will NOT modify the parameters in the psf:: psf.load_parameters(parmset, copy_parameters=False) parmset.angle_types[('a1', 'a2', a3')] = AngleType(1, 2) The following WILL change the parameter in the psf because the reference has not been changed in ``ParameterSet``:: psf.load_parameters(parmset, copy_parameters=False) a = parmset.angle_types[('a1', 'a2', 'a3')] a.k = 10 a.theteq = 100 Extra care should be taken when trying this with dihedral_types. Since dihedral_type is a Fourier sequence, ParameterSet stores DihedralType for every term in DihedralTypeList. Therefore, the example below will STILL modify the type in the :class:`Structure` list_types:: parmset.dihedral_types[('a', 'b', 'c', 'd')][0] = DihedralType(1, 2, 3) This assigns a new instance of DihedralType to an existing DihedralTypeList that ParameterSet and Structure are tracking and the shared reference is NOT changed. Use with caution! Notes ----- - If any dihedral or improper parameters cannot be found, I will try inserting wildcards (at either end for dihedrals and as the two central atoms in impropers) and see if that matches. Wild-cards will apply ONLY if specific parameters cannot be found. - This method will expand the dihedrals attribute by adding a separate Dihedral object for each term for types that have a multi-term expansion Raises ------ ParameterError if any parameters cannot be found """ if copy_parameters: parmset = _copy(parmset) self.combining_rule = parmset.combining_rule # First load the atom types for atom in self.atoms: try: if isinstance(atom.type, int): atype = parmset.atom_types_int[atom.type] else: atype = parmset.atom_types_str[atom.type.upper()] except KeyError: raise ParameterError('Could not find atom type for %s' % atom.type) atom.atom_type = atype # Change to string type to look up the rest of the parameters. Use # upper-case since all parameter sets were read in as upper-case atom.type = str(atom.atom_type).upper() atom.atomic_number = atype.atomic_number # Next load all of the bonds for bond in self.bonds: # Construct the key key = (min(bond.atom1.type, bond.atom2.type), max(bond.atom1.type, bond.atom2.type)) try: bond.type = parmset.bond_types[key] except KeyError: raise ParameterError('Missing bond type for %r' % bond) bond.type.used = False # Build the bond_types list del self.bond_types[:] for bond in self.bonds: if bond.type.used: continue bond.type.used = True self.bond_types.append(bond.type) bond.type.list = self.bond_types # Next load all of the angles. If a Urey-Bradley term is defined for # this angle, also build the urey_bradley and urey_bradley_type lists del self.urey_bradleys[:] for ang in self.angles: # Construct the key key = (min(ang.atom1.type, ang.atom3.type), ang.atom2.type, max(ang.atom1.type, ang.atom3.type)) try: ang.type = parmset.angle_types[key] ang.type.used = False ubt = parmset.urey_bradley_types[key] if ubt is not NoUreyBradley: ub = UreyBradley(ang.atom1, ang.atom3, ubt) self.urey_bradleys.append(ub) ubt.used = False except KeyError: raise ParameterError('Missing angle type for %r' % ang) del self.urey_bradley_types[:] del self.angle_types[:] for ub in self.urey_bradleys: if ub.type.used: continue ub.type.used = True self.urey_bradley_types.append(ub.type) ub.type.list = self.urey_bradley_types for ang in self.angles: if ang.type.used: continue ang.type.used = True self.angle_types.append(ang.type) ang.type.list = self.angle_types # Next load all of the dihedrals. active_dih_list = set() for dih in self.dihedrals: # Store the atoms a1, a2, a3, a4 = dih.atom1, dih.atom2, dih.atom3, dih.atom4 key = (a1.type, a2.type, a3.type, a4.type) # First see if the exact dihedral is specified if not key in parmset.dihedral_types: # Check for wild-cards key = ('X', a2.type, a3.type, 'X') if not key in parmset.dihedral_types: raise ParameterError('No dihedral parameters found for ' '%r' % dih) dih.type = parmset.dihedral_types[key] dih.type.used = False pair = (dih.atom1.idx, dih.atom4.idx) # To determine exclusions if (dih.atom1 in dih.atom4.bond_partners or dih.atom1 in dih.atom4.angle_partners): dih.ignore_end = True elif pair in active_dih_list: dih.ignore_end = True else: active_dih_list.add(pair) active_dih_list.add((dih.atom4.idx, dih.atom1.idx)) del self.dihedral_types[:] for dihedral in self.dihedrals: if dihedral.type.used: continue dihedral.type.used = True self.dihedral_types.append(dihedral.type) dihedral.type.list = self.dihedral_types # Now do the impropers for imp in self.impropers: a1, a2, a3, a4 = imp.atom1.type, imp.atom2.type, imp.atom3.type, imp.atom4.type imp.type = parmset.match_improper_type(a1, a2, a3, a4) if imp.type is None: raise ParameterError('No improper type for %s, %s, %s, and %s', a1, a2, a3, a4) imp.type.used = False # prepare list of harmonic impropers present in system del self.improper_types[:] for improper in self.impropers: if improper.type.used: continue improper.type.used = True if isinstance(improper.type, ImproperType): self.improper_types.append(improper.type) improper.type.list = self.improper_types elif isinstance(improper.type, DihedralType): self.dihedral_types.append(improper.type) improper.type.list = self.dihedral_types else: assert False, 'Should not be here' # Look through the list of impropers -- if there are any periodic # impropers, move them over to the dihedrals list for i in reversed(range(len(self.impropers))): if isinstance(self.impropers[i].type, DihedralType): imp = self.impropers.pop(i) dih = Dihedral(imp.atom1, imp.atom2, imp.atom3, imp.atom4, improper=True, ignore_end=True, type=imp.type) imp.delete() self.dihedrals.append(dih) # Now do the cmaps. These will not have wild-cards for cmap in self.cmaps: key = (cmap.atom1.type, cmap.atom2.type, cmap.atom3.type, cmap.atom4.type, cmap.atom2.type, cmap.atom3.type, cmap.atom4.type, cmap.atom5.type) try: cmap.type = parmset.cmap_types[key] except KeyError: raise ParameterError('No CMAP parameters found for %r' % cmap) cmap.type.used = False del self.cmap_types[:] for cmap in self.cmaps: if cmap.type.used: continue cmap.type.used = True self.cmap_types.append(cmap.type) cmap.type.list = self.cmap_types
#===================================================
[docs] def clear_cmap(self): " Clear the cmap list to prevent any CMAP parameters from being used " del self.cmaps[:]
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[docs]def set_molecules(atoms): """ Correctly sets the molecularity of the system based on connectivity. """ from sys import setrecursionlimit, getrecursionlimit # Since we use a recursive function here, we make sure that the recursion # limit is large enough to handle the maximum possible recursion depth we'll # need (NATOM). We don't want to shrink it, though, since we use list # comprehensions in list constructors in some places that have an implicit # (shallow) recursion, therefore, reducing the recursion limit too much here # could raise a recursion depth exceeded exception during a _Type/Atom/XList # creation. Therefore, set the recursion limit to the greater of the current # limit or the number of atoms setrecursionlimit(max(len(atoms), getrecursionlimit())) # Unmark all atoms so we can track which molecule each goes into atoms.unmark() # The molecule "ownership" list owner = [] # The way I do this is via a recursive algorithm, in which # the "set_owner" method is called for each bonded partner an atom # has, which in turn calls set_owner for each of its partners and # so on until everything has been assigned. molecule_number = 1 # which molecule number we are on for i in range(len(atoms)): # If this atom has not yet been "owned", make it the next molecule # However, we only increment which molecule number we're on if # we actually assigned a new molecule (obviously) if not atoms[i].marked: tmp = [i] _set_owner(atoms, tmp, i, molecule_number) # Make sure the atom indexes are sorted tmp.sort() owner.append(tmp) molecule_number += 1 return owner
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ def _set_owner(atoms, owner_array, atm, mol_id): """ Recursively sets ownership of given atom and all bonded partners """ atoms[atm].marked = mol_id for partner in atoms[atm].bond_partners: if not partner.marked: owner_array.append(partner.idx) _set_owner(atoms, owner_array, partner.idx, mol_id) assert partner.marked == mol_id, 'Atom in multiple molecules!' # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++