"""
Provides a Python class for parsing a PSF file and setting up a system
structure for it
Author: Jason M. Swails
Contributors:
Date: April 20, 2014
"""
from __future__ import division
from copy import copy as _copy
from ..topologyobjects import (Bond, Angle, Dihedral, Improper, AcceptorDonor, Group, Cmap,
UreyBradley, NoUreyBradley, Atom, DihedralType, ImproperType,
UnassignedAtomType)
from ..exceptions import CharmmError, CharmmWarning, ParameterError
from ..structure import needs_openmm, Structure
from ..utils.io import genopen
from ..utils.six import wraps
from ..utils.six.moves import zip, range
from ..utils.six import string_types
import re
import warnings
def _catchindexerror(func):
"""
Protects a function from raising an index error, and replace that exception
with a CharmmError instead
"""
@wraps(func)
def newfunc(*args, **kwargs):
""" Catch the index error """
try:
return func(*args, **kwargs)
except IndexError as e:
raise CharmmError('Array is too short: %s' % e)
return newfunc
class _FileEOF(Exception):
""" For control flow """
class _ZeroDict(dict):
"""
Contains a dict that returns dummy (zero) arguments when a key is not
present rather than raising a KeyError. The return value for non-existent
items is (0, []). It also special-case sections that have multiple pointers
to avoid index errors if those are not present in the PSF file
"""
def __getitem__(self, key):
try:
return dict.__getitem__(self, key)
except KeyError:
if key.startswith('NGRP'):
for k in self:
if k.startswith('NGRP'):
return dict.__getitem__(self, k)
return [0, 0], []
elif key.startswith('NUMLP'):
for k in self:
if k.startswith('NUMLP'):
return dict.__getitem__(self, k)
return [0, 0], []
return 0, []
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
_resre = re.compile(r'(-?\d+)([a-zA-Z]*)')
[docs]class CharmmPsfFile(Structure):
"""
A chemical :class:`Structure` instantiated from CHARMM files.
Parameters
----------
psf_name : str, optional
Name of the PSF file (it must exist)
Raises
------
IOError : If file ``psf_name`` does not exist
CharmmPsfError : If any parsing errors are encountered
"""
@staticmethod
def _convert(string, type, message):
"""
Converts a string to a specific type, making sure to raise
CharmmError with the given message in the event of a failure.
Parameters
----------
string : str
Input string to process
type : type
Type of data to convert to (e.g., ``int``)
message : str
Error message to put in exception if failed
"""
try:
return type(string)
except ValueError:
raise CharmmError('Could not convert %s [%s]' % (message,string))
#===================================================
@classmethod
def _parse_psf_title_line(cls, line):
words = line[:line.index('!')].split()
title = line[line.index('!')+1:].strip().upper()
# Strip out description
if ':' in title:
title = title[:title.index(':')]
if len(words) == 1:
pointers = cls._convert(words[0], int, 'pointer')
else:
pointers = tuple([cls._convert(w, int, 'pointer') for w in words])
return title, pointers
#===================================================
@classmethod
def _parse_psf_section(cls, psf):
"""
This method parses a section of the PSF file
Parameters
----------
psf : file
Open file that is pointing to the first line of the section that is
to be parsed
Returns
-------
title : str
The label of the PSF section we are parsing
pointers : (int/tuple of ints)
If one pointer is set, pointers is simply the integer that is value
of that pointer. Otherwise it is a tuple with every pointer value
defined in the first line
data : list
A list of all data in the parsed section converted to integers
"""
line = psf.readline()
while not line.strip():
if not line:
raise _FileEOF('Unexpected EOF in PSF file')
else:
line = psf.readline()
if '!' in line:
title, pointers = cls._parse_psf_title_line(line)
else:
raise CharmmError('Could not determine section title') # pragma: no cover
line = psf.readline().strip()
if not line and title.startswith('NNB'):
# This will correctly handle the NNB section (which has a spurious
# blank line) as well as any sections that have 0 members.
line = psf.readline().strip()
# If this line has a title in it, then it's one of those weird PSF files that
# has basically no NNB section. Just skip over NNB and take the next section
# instead
if '!' in line:
title, pointers = cls._parse_psf_title_line(line)
line = psf.readline().strip()
data = []
if title == 'NATOM' or title == 'NTITLE':
# Store these two sections as strings (ATOM section we will parse
# later). The rest of the sections are integer pointers
while line:
data.append(line)
line = psf.readline().strip()
else:
while line:
words = line.split()
data.extend([cls._convert(w, int, 'PSF data') for w in words])
line = psf.readline().strip()
return title, pointers, data
#===================================================
[docs] @_catchindexerror
def __init__(self, psf_name=None):
"""
Opens and parses a PSF file, then instantiates a CharmmPsfFile
instance from the data.
"""
global _resre
Structure.__init__(self)
# Bail out if we don't have a filename
if psf_name is None:
return
# Open the PSF and read the first line. It must start with "PSF"
if isinstance(psf_name, string_types):
fileobj = genopen(psf_name, 'r')
own_handle = True
else:
fileobj = psf_name
own_handle = False
try:
self.name = psf_name if isinstance(psf_name, string_types) else ''
line = fileobj.readline()
if not line.startswith('PSF'):
raise CharmmError('Unrecognized PSF file. First line is %s' % line.strip())
# Store the flags
psf_flags = line.split()[1:]
# Now get all of the sections and store them in a dict
fileobj.readline()
# Now get all of the sections
psfsections = _ZeroDict()
while True:
try:
sec, ptr, data = CharmmPsfFile._parse_psf_section(fileobj)
except _FileEOF:
break
psfsections[sec] = (ptr, data)
# store the title
self.title = psfsections['NTITLE'][1]
# Next is the number of atoms
natom = self._convert(psfsections['NATOM'][0], int, 'natom')
# Parse all of the atoms
for i in range(natom):
words = psfsections['NATOM'][1][i].split()
atid = int(words[0])
if atid != i + 1:
raise CharmmError('Nonsequential atoms detected!')
segid = words[1]
rematch = _resre.match(words[2])
if not rematch:
raise CharmmError('Could not interpret residue number %s' % # pragma: no cover
words[2])
resid, inscode = rematch.groups()
resid = self._convert(resid, int, 'residue number')
resname = words[3]
name = words[4]
attype = words[5]
# Try to convert the atom type to an integer a la CHARMM
try:
attype = int(attype)
except ValueError:
pass
charge = self._convert(words[6], float, 'partial charge')
mass = self._convert(words[7], float, 'atomic mass')
props = words[8:]
atom = Atom(name=name, type=attype, charge=charge, mass=mass)
atom.props = props
self.add_atom(atom, resname, resid, chain=segid,
inscode=inscode, segid=segid)
# Now get the number of bonds
nbond = self._convert(psfsections['NBOND'][0], int, 'number of bonds')
if len(psfsections['NBOND'][1]) != nbond * 2:
raise CharmmError('Got %d indexes for %d bonds' % # pragma: no cover
(len(psfsections['NBOND'][1]), nbond))
it = iter(psfsections['NBOND'][1])
for i, j in zip(it, it):
self.bonds.append(Bond(self.atoms[i-1], self.atoms[j-1]))
# Now get the number of angles and the angle list
ntheta = self._convert(psfsections['NTHETA'][0], int, 'number of angles')
if len(psfsections['NTHETA'][1]) != ntheta * 3:
raise CharmmError('Got %d indexes for %d angles' % # pragma: no cover
(len(psfsections['NTHETA'][1]), ntheta))
it = iter(psfsections['NTHETA'][1])
for i, j, k in zip(it, it, it):
self.angles.append(
Angle(self.atoms[i-1], self.atoms[j-1], self.atoms[k-1])
)
self.angles[-1].funct = 5 # urey-bradley
# Now get the number of torsions and the torsion list
nphi = self._convert(psfsections['NPHI'][0], int, 'number of torsions')
if len(psfsections['NPHI'][1]) != nphi * 4:
raise CharmmError('Got %d indexes for %d torsions' % # pragma: no cover
(len(psfsections['NPHI']), nphi))
it = iter(psfsections['NPHI'][1])
for i, j, k, l in zip(it, it, it, it):
self.dihedrals.append(
Dihedral(self.atoms[i-1], self.atoms[j-1],
self.atoms[k-1], self.atoms[l-1])
)
self.dihedrals.split = False
# Now get the number of improper torsions
nimphi = self._convert(psfsections['NIMPHI'][0], int, 'number of impropers')
if len(psfsections['NIMPHI'][1]) != nimphi * 4:
raise CharmmError('Got %d indexes for %d impropers' % # pragma: no cover
(len(psfsections['NIMPHI'][1]), nimphi))
it = iter(psfsections['NIMPHI'][1])
for i, j, k, l in zip(it, it, it, it):
self.impropers.append(
Improper(self.atoms[i-1], self.atoms[j-1],
self.atoms[k-1], self.atoms[l-1])
)
# Now handle the donors (what is this used for??)
ndon = self._convert(psfsections['NDON'][0], int, 'number of donors')
if len(psfsections['NDON'][1]) != ndon * 2:
raise CharmmError('Got %d indexes for %d donors' % # pragma: no cover
(len(psfsections['NDON'][1]), ndon))
it = iter(psfsections['NDON'][1])
for i, j in zip(it, it):
self.donors.append(
AcceptorDonor(self.atoms[i-1], self.atoms[j-1])
)
# Now handle the acceptors (what is this used for??)
nacc = self._convert(psfsections['NACC'][0], int, 'number of acceptors')
if len(psfsections['NACC'][1]) != nacc * 2:
raise CharmmError('Got %d indexes for %d acceptors' % # pragma: no cover
(len(psfsections['NACC'][1]), nacc))
it = iter(psfsections['NACC'][1])
for i, j in zip(it, it):
self.acceptors.append(
AcceptorDonor(self.atoms[i-1], self.atoms[j-1])
)
# Now get the group sections
try:
ngrp, nst2 = psfsections['NGRP NST2'][0]
except ValueError: # pragma: no cover
raise CharmmError('Could not unpack GROUP pointers') # pragma: no cover
tmp = psfsections['NGRP NST2'][1]
self.groups.nst2 = nst2
# Now handle the groups
if len(psfsections['NGRP NST2'][1]) != ngrp * 3:
raise CharmmError('Got %d indexes for %d groups' % # pragma: no cover
(len(tmp), ngrp))
it = iter(psfsections['NGRP NST2'][1])
for i, j, k in zip(it, it, it):
self.groups.append(Group(self.atoms[i], j, k))
# Assign all of the atoms to molecules recursively
tmp = psfsections['MOLNT'][1]
set_molecules(self.atoms)
molecule_list = [a.marked for a in self.atoms]
if len(tmp) == len(self.atoms):
if molecule_list != tmp:
warnings.warn('Detected PSF molecule section that is WRONG. '
'Resetting molecularity.', CharmmWarning)
# We have a CHARMM PSF file; now do NUMLP/NUMLPH sections
numlp, numlph = psfsections['NUMLP NUMLPH'][0]
if numlp != 0 or numlph != 0:
raise NotImplementedError('Cannot currently handle PSFs with '
'lone pairs defined in the NUMLP/'
'NUMLPH section.')
# Now do the CMAPs
ncrterm = self._convert(psfsections['NCRTERM'][0], int, 'Number of cross-terms')
if len(psfsections['NCRTERM'][1]) != ncrterm * 8:
raise CharmmError('Got %d CMAP indexes for %d cmap terms' % # pragma: no cover
(len(psfsections['NCRTERM']), ncrterm))
it = iter(psfsections['NCRTERM'][1])
for i, j, k, l, m, n, o, p in zip(it, it, it, it, it, it, it, it):
self.cmaps.append(
Cmap.extended(self.atoms[i-1], self.atoms[j-1],
self.atoms[k-1], self.atoms[l-1],
self.atoms[m-1], self.atoms[n-1],
self.atoms[o-1], self.atoms[p-1])
)
self.unchange()
self.flags = psf_flags
finally:
if own_handle:
fileobj.close()
#===================================================
[docs] @classmethod
def from_structure(cls, struct, copy=False):
"""
Instantiates a CharmmPsfFile from an input Structure instance. This
method makes sure all atom types have uppercase-only names
Parameters
----------
struct : :class:`parmed.structure.Structure`
The input structure to convert to a CharmmPsfFile instance
copy : bool, optional
If True, a copy of all items are made. Otherwise, the resulting
CharmmPsfFile is a shallow copy
Returns
-------
psf : :class:`CharmmPsfFile`
CHARMM PSF file
Raises
------
ValueError if the functional form is not recognized or cannot be
implemented through the PSF and parameter/stream files
Notes
-----
If copy is False, the original object may have its atom type names
changed if any of them have lower-case letters
"""
from parmed.charmm.parameters import _typeconv as typeconv
if (struct.rb_torsions or struct.trigonal_angles or
struct.out_of_plane_bends or struct.pi_torsions or
struct.stretch_bends or struct.torsion_torsions or
struct.chiral_frames or struct.multipole_frames or
struct.nrexcl != 3):
raise ValueError('Unsupported functional form for CHARMM PSF')
if copy:
struct = _copy(struct)
psf = cls()
psf.atoms = struct.atoms
psf.residues = struct.residues
psf.bonds = struct.bonds
psf.angles = struct.angles
psf.urey_bradleys = struct.urey_bradleys
psf.dihedrals = struct.dihedrals
psf.impropers = struct.impropers
psf.acceptors = struct.acceptors
psf.donors = struct.donors
psf.groups = struct.groups
psf.cmaps = struct.cmaps
psf.bond_types = struct.bond_types
psf.angle_types = struct.angle_types
psf.urey_bradley_types = struct.urey_bradley_types
psf.dihedral_types = struct.dihedral_types
psf.improper_types = struct.improper_types
psf.cmap_types = struct.cmap_types
for atom in psf.atoms:
atom.type = typeconv(atom.type)
if atom.atom_type is not UnassignedAtomType:
atom.atom_type.name = typeconv(atom.atom_type.name)
# If no groups are defined, make each residue its own group
if not psf.groups:
for residue in psf.residues:
chg = sum(a.charge for a in residue)
if chg < 1e-4:
psf.groups.append(Group(residue[0], 1, 0))
else:
psf.groups.append(Group(residue[0], 2, 0))
psf.groups.nst2 = 0
return psf
#===================================================
def __str__(self):
return self.name
#===================================================
[docs] @needs_openmm
def createSystem(self, params=None, *args, **kwargs):
"""
Creates an OpenMM System object from the CHARMM PSF file. This is a
shortcut for calling `load_parameters` followed by
Structure.createSystem. If params is not None, `load_parameters` will be
called on that parameter set, and Structure.createSystem will be called
with the remaining args and kwargs
Parameters
----------
params : CharmmParameterSet=None
If not None, this parameter set will be loaded
See Also
--------
:meth:`parmed.structure.Structure.createSystem`
In addition to `params`, this method also takes all arguments for
:meth:`parmed.structure.Structure.createSystem`
"""
if params is not None: self.load_parameters(params)
return super(CharmmPsfFile, self).createSystem(*args, **kwargs)
#===================================================
[docs] def load_parameters(self, parmset, copy_parameters=True):
"""
Loads parameters from a parameter set that was loaded via CHARMM RTF,
PAR, and STR files.
Parameters
----------
parmset : :class:`CharmmParameterSet`
List of all parameters
copy_parameters : bool, optional, default=True
If False, parmset will not be copied.
WARNING:
-------
Not copying parmset will cause ParameterSet and Structure to share
references to types. If you modify the original parameter set, the
references in Structure list_types will be silently modified.
However, if you change any reference in the parameter set, then that
reference will no longer be shared with structure.
Example where the reference in ParameterSet is changed. The
following will NOT modify the parameters in the psf::
psf.load_parameters(parmset, copy_parameters=False)
parmset.angle_types[('a1', 'a2', a3')] = AngleType(1, 2)
The following WILL change the parameter in the psf because the
reference has not been changed in ``ParameterSet``::
psf.load_parameters(parmset, copy_parameters=False)
a = parmset.angle_types[('a1', 'a2', 'a3')]
a.k = 10
a.theteq = 100
Extra care should be taken when trying this with dihedral_types.
Since dihedral_type is a Fourier sequence, ParameterSet stores
DihedralType for every term in DihedralTypeList. Therefore, the
example below will STILL modify the type in the :class:`Structure`
list_types::
parmset.dihedral_types[('a', 'b', 'c', 'd')][0] = DihedralType(1, 2, 3)
This assigns a new instance of DihedralType to an existing
DihedralTypeList that ParameterSet and Structure are tracking and
the shared reference is NOT changed.
Use with caution!
Notes
-----
- If any dihedral or improper parameters cannot be found, I will try
inserting wildcards (at either end for dihedrals and as the two
central atoms in impropers) and see if that matches. Wild-cards will
apply ONLY if specific parameters cannot be found.
- This method will expand the dihedrals attribute by adding a separate
Dihedral object for each term for types that have a multi-term
expansion
Raises
------
ParameterError if any parameters cannot be found
"""
if copy_parameters:
parmset = _copy(parmset)
self.combining_rule = parmset.combining_rule
# First load the atom types
for atom in self.atoms:
try:
if isinstance(atom.type, int):
atype = parmset.atom_types_int[atom.type]
else:
atype = parmset.atom_types_str[atom.type.upper()]
except KeyError:
raise ParameterError('Could not find atom type for %s' % atom.type)
atom.atom_type = atype
# Change to string type to look up the rest of the parameters. Use
# upper-case since all parameter sets were read in as upper-case
atom.type = str(atom.atom_type).upper()
atom.atomic_number = atype.atomic_number
# Next load all of the bonds
for bond in self.bonds:
# Construct the key
key = (min(bond.atom1.type, bond.atom2.type),
max(bond.atom1.type, bond.atom2.type))
try:
bond.type = parmset.bond_types[key]
except KeyError:
raise ParameterError('Missing bond type for %r' % bond)
bond.type.used = False
# Build the bond_types list
del self.bond_types[:]
for bond in self.bonds:
if bond.type.used: continue
bond.type.used = True
self.bond_types.append(bond.type)
bond.type.list = self.bond_types
# Next load all of the angles. If a Urey-Bradley term is defined for
# this angle, also build the urey_bradley and urey_bradley_type lists
del self.urey_bradleys[:]
for ang in self.angles:
# Construct the key
key = (min(ang.atom1.type, ang.atom3.type), ang.atom2.type,
max(ang.atom1.type, ang.atom3.type))
try:
ang.type = parmset.angle_types[key]
ang.type.used = False
ubt = parmset.urey_bradley_types[key]
if ubt is not NoUreyBradley:
ub = UreyBradley(ang.atom1, ang.atom3, ubt)
self.urey_bradleys.append(ub)
ubt.used = False
except KeyError:
raise ParameterError('Missing angle type for %r' % ang)
del self.urey_bradley_types[:]
del self.angle_types[:]
for ub in self.urey_bradleys:
if ub.type.used: continue
ub.type.used = True
self.urey_bradley_types.append(ub.type)
ub.type.list = self.urey_bradley_types
for ang in self.angles:
if ang.type.used: continue
ang.type.used = True
self.angle_types.append(ang.type)
ang.type.list = self.angle_types
# Next load all of the dihedrals.
active_dih_list = set()
for dih in self.dihedrals:
# Store the atoms
a1, a2, a3, a4 = dih.atom1, dih.atom2, dih.atom3, dih.atom4
key = (a1.type, a2.type, a3.type, a4.type)
# First see if the exact dihedral is specified
if not key in parmset.dihedral_types:
# Check for wild-cards
key = ('X', a2.type, a3.type, 'X')
if not key in parmset.dihedral_types:
raise ParameterError('No dihedral parameters found for '
'%r' % dih)
dih.type = parmset.dihedral_types[key]
dih.type.used = False
pair = (dih.atom1.idx, dih.atom4.idx) # To determine exclusions
if (dih.atom1 in dih.atom4.bond_partners or
dih.atom1 in dih.atom4.angle_partners):
dih.ignore_end = True
elif pair in active_dih_list:
dih.ignore_end = True
else:
active_dih_list.add(pair)
active_dih_list.add((dih.atom4.idx, dih.atom1.idx))
del self.dihedral_types[:]
for dihedral in self.dihedrals:
if dihedral.type.used: continue
dihedral.type.used = True
self.dihedral_types.append(dihedral.type)
dihedral.type.list = self.dihedral_types
# Now do the impropers
for imp in self.impropers:
a1, a2, a3, a4 = imp.atom1.type, imp.atom2.type, imp.atom3.type, imp.atom4.type
imp.type = parmset.match_improper_type(a1, a2, a3, a4)
if imp.type is None:
raise ParameterError('No improper type for %s, %s, %s, and %s', a1, a2, a3, a4)
imp.type.used = False
# prepare list of harmonic impropers present in system
del self.improper_types[:]
for improper in self.impropers:
if improper.type.used: continue
improper.type.used = True
if isinstance(improper.type, ImproperType):
self.improper_types.append(improper.type)
improper.type.list = self.improper_types
elif isinstance(improper.type, DihedralType):
self.dihedral_types.append(improper.type)
improper.type.list = self.dihedral_types
else:
assert False, 'Should not be here'
# Look through the list of impropers -- if there are any periodic
# impropers, move them over to the dihedrals list
for i in reversed(range(len(self.impropers))):
if isinstance(self.impropers[i].type, DihedralType):
imp = self.impropers.pop(i)
dih = Dihedral(imp.atom1, imp.atom2, imp.atom3, imp.atom4,
improper=True, ignore_end=True, type=imp.type)
imp.delete()
self.dihedrals.append(dih)
# Now do the cmaps. These will not have wild-cards
for cmap in self.cmaps:
key = (cmap.atom1.type, cmap.atom2.type, cmap.atom3.type,
cmap.atom4.type, cmap.atom2.type, cmap.atom3.type,
cmap.atom4.type, cmap.atom5.type)
try:
cmap.type = parmset.cmap_types[key]
except KeyError:
raise ParameterError('No CMAP parameters found for %r' % cmap)
cmap.type.used = False
del self.cmap_types[:]
for cmap in self.cmaps:
if cmap.type.used: continue
cmap.type.used = True
self.cmap_types.append(cmap.type)
cmap.type.list = self.cmap_types
#===================================================
[docs] def clear_cmap(self):
" Clear the cmap list to prevent any CMAP parameters from being used "
del self.cmaps[:]
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[docs]def set_molecules(atoms):
"""
Correctly sets the molecularity of the system based on connectivity.
"""
from sys import setrecursionlimit, getrecursionlimit
# Since we use a recursive function here, we make sure that the recursion
# limit is large enough to handle the maximum possible recursion depth we'll
# need (NATOM). We don't want to shrink it, though, since we use list
# comprehensions in list constructors in some places that have an implicit
# (shallow) recursion, therefore, reducing the recursion limit too much here
# could raise a recursion depth exceeded exception during a _Type/Atom/XList
# creation. Therefore, set the recursion limit to the greater of the current
# limit or the number of atoms
setrecursionlimit(max(len(atoms), getrecursionlimit()))
# Unmark all atoms so we can track which molecule each goes into
atoms.unmark()
# The molecule "ownership" list
owner = []
# The way I do this is via a recursive algorithm, in which
# the "set_owner" method is called for each bonded partner an atom
# has, which in turn calls set_owner for each of its partners and
# so on until everything has been assigned.
molecule_number = 1 # which molecule number we are on
for i in range(len(atoms)):
# If this atom has not yet been "owned", make it the next molecule
# However, we only increment which molecule number we're on if
# we actually assigned a new molecule (obviously)
if not atoms[i].marked:
tmp = [i]
_set_owner(atoms, tmp, i, molecule_number)
# Make sure the atom indexes are sorted
tmp.sort()
owner.append(tmp)
molecule_number += 1
return owner
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
def _set_owner(atoms, owner_array, atm, mol_id):
""" Recursively sets ownership of given atom and all bonded partners """
atoms[atm].marked = mol_id
for partner in atoms[atm].bond_partners:
if not partner.marked:
owner_array.append(partner.idx)
_set_owner(atoms, owner_array, partner.idx, mol_id)
assert partner.marked == mol_id, 'Atom in multiple molecules!'
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++