""" A collection of utilities for use with Pandas objects """
from __future__ import absolute_import, division, print_function
import numpy as np
try:
import pandas as pd
except ImportError:
pd = None
from parmed.exceptions import ParameterWarning
from parmed.utils.six.moves import zip
from parmed.utils.six import iteritems
import warnings
# Utility function for generating a DataFrame based on a collection of items.
# The passed object must have an `atoms` attribute
[docs]def create_dataframe(obj):
""" Creates a pandas.DataFrame object from the current instance
Returns
-------
dataframe : :class:`pandas.DataFrame`
Notes
-----
The DataFrame will be over all atoms. The columns will be the attributes
of the atom (as well as its containing residue). Some columns will
*always* exist. Others will only exist if those attributes have been set
on the Atom instances (see the :class:`Atom` docs for possible
attributes and their meaning). The columns that will always be present
are:
- number : int
- name : str
- type : str
- atomic_number : int
- charge : float
- mass : float
- nb_idx : int
- solvent_radius : float
- screen : float
- occupancy : float
- bfactor : float
- altloc : str
- tree : str
- join : int
- irotat : int
- rmin : float
- epsilon : float
- rmin_14 : float
- epsilon_14 : float
- resname : str (name of the containing residue)
- resid : int (Sequential index of the containing residue)
- resnum : int (original residue number in the input structure)
- chain : str (chain ID that the containing residue belongs to)
The following attributes are optionally present if they were present in
the original file defining the structure:
- xx : float (x-coordinate position)
- xy : float (y-coordinate position)
- xz : float (z-coordinate position)
- vx : float (x-coordinate velocity)
- vy : float (y-coordinate velocity)
- vz : float (z-coordinate velocity)
- type_idx : int (integer type index for AMOEBA)
- class_idx : int (integer class type index for AMOEBA)
- multipole_111 : float (Monopole)
- multipole_211 : float (1,1 Dipole component)
- multipole_212 : float (1,2 Dipole component)
- multipole_222 : float (2,2 Dipole component)
- multipole_411 : float (1,1 Quadrupole component)
- multipole_412 : float (1,2 Quadrupole component)
- multipole_422 : float (2,2 Quadrupole component)
- multipole_413 : float (1,3 Quadrupole component)
- multipole_423 : float (2,3 Quadrupole component)
- multipole_433 : float (3,3 Quadrupole component)
- polarizability : float (dipole polarizability)
- vdw_parent : int (index of the vdW parent atom of this atom)
- U11 : float (U[1][1] of anisotropic b-factor tensor)
- U22 : float (U[2][2] of anisotropic b-factor tensor)
- U33 : float (U[3][3] of anisotropic b-factor tensor)
- U12 : float (U[1][2] of anisotropic b-factor tensor)
- U13 : float (U[1][3] of anisotropic b-factor tensor)
- U23 : float (U[2][3] of anisotropic b-factor tensor)
"""
if pd is None:
raise ImportError('pandas is not available; cannot create a pandas '
'DataFrame from this Structure')
ret = pd.DataFrame()
atoms = obj.atoms
ret['number'] = [atom.number for atom in atoms]
ret['name'] = [atom.name for atom in atoms]
ret['type'] = [atom.type for atom in atoms]
ret['atomic_number'] = [atom.atomic_number for atom in atoms]
ret['charge'] = [atom.charge for atom in atoms]
ret['mass'] = [atom.mass for atom in atoms]
ret['nb_idx'] = [atom.nb_idx for atom in atoms]
ret['solvent_radius'] = [atom.solvent_radius for atom in atoms]
ret['screen'] = [atom.screen for atom in atoms]
ret['occupancy'] = [atom.occupancy for atom in atoms]
ret['bfactor'] = [atom.bfactor for atom in atoms]
ret['altloc'] = [atom.altloc for atom in atoms]
ret['tree'] = [atom.tree for atom in atoms]
ret['join'] = [atom.join for atom in atoms]
ret['irotat'] = [atom.irotat for atom in atoms]
ret['rmin'] = [atom.rmin for atom in atoms]
ret['epsilon'] = [atom.epsilon for atom in atoms]
ret['rmin_14'] = [atom.rmin_14 for atom in atoms]
ret['epsilon_14'] = [atom.epsilon_14 for atom in atoms]
ret['resname'] = [atom.residue.name for atom in atoms]
ret['resid'] = [atom.residue.idx for atom in atoms]
ret['resnum'] = [atom.residue.number for atom in atoms]
ret['chain'] = [atom.residue.chain for atom in atoms]
ret['segid'] = [atom.residue.segid for atom in atoms]
# Now for optional attributes
# Coordinates
try:
coords = pd.DataFrame(
[[atom.xx, atom.xy, atom.xz] for atom in atoms],
columns=['xx', 'xy', 'xz']
)
except AttributeError:
pass
else:
ret = ret.join(coords)
# Velocities
try:
vels = pd.DataFrame(
[[atom.vx, atom.vy, atom.vz] for atom in atoms],
columns=['vx', 'vy', 'vz']
)
except AttributeError:
pass
else:
ret = ret.join(vels)
# AMOEBA LJ type
try:
ret['type_idx'] = [atom.type_idx for atom in atoms]
except AttributeError:
pass
# AMOEBA class type
try:
ret['class_idx'] = [atom.class_idx for atom in atoms]
except AttributeError:
pass
# AMOEBA multipoles
try:
multipoles = pd.DataFrame(
[atom.multipoles for atom in atoms],
columns=['multipole_111', 'multipole_211', 'multipole_212',
'multipole_222', 'multipole_411', 'multipole_412',
'multipole_422', 'multipole_413', 'multipole_423',
'multipole_433']
)
except AttributeError:
pass
else:
ret = ret.join(multipoles)
# AMOEBA polarizabilities
try:
ret['polarizability'] = [atom.polarizability for atom in atoms]
except AttributeError:
pass
# AMOEBA vdw parent atom
try:
ret['vdw_parent'] = [atom.vdw_parent.idx for atom in atoms]
except AttributeError:
pass
# anisotropic b-factors
none6 = [None] * 6
anisos = [atom.anisou for atom in atoms]
for i, aniso in enumerate(anisos):
if hasattr(aniso, 'tolist'):
anisos[i] = aniso.tolist()
all_nones = True
for i, aniso in enumerate(anisos):
if aniso is None:
anisos[i] = none6
elif all_nones:
all_nones = False
if not all_nones:
ret = ret.join(
pd.DataFrame(anisos,
columns=['U11', 'U22', 'U33', 'U12', 'U13', 'U23'])
)
return ret
[docs]def load_dataframe(obj, dataframe):
"""
Loads a DataFrame into the current object, setting atomic properties based
on the entries of the DataFrame. Supported atomic properties are:
- number : int
- name : str
- type : str
- atomic_number : int
- charge : float
- mass : float
- nb_idx : int
- solvent_radius : float
- screen : float
- occupancy : float
- bfactor : float
- altloc : str
- tree : str
- join : int
- irotat : int
- rmin : float
- epsilon : float
- rmin_14 : float
- epsilon_14 : float
- xx : float (x-coordinate position)
- xy : float (y-coordinate position)
- xz : float (z-coordinate position)
- vx : float (x-coordinate velocity)
- vy : float (y-coordinate velocity)
- vz : float (z-coordinate velocity)
- type_idx : int (integer type index for AMOEBA)
- class_idx : int (integer class type index for AMOEBA)
- multipole_111 : float (Monopole)
- multipole_211 : float (1,1 Dipole component)
- multipole_212 : float (1,2 Dipole component)
- multipole_222 : float (2,2 Dipole component)
- multipole_411 : float (1,1 Quadrupole component)
- multipole_412 : float (1,2 Quadrupole component)
- multipole_422 : float (2,2 Quadrupole component)
- multipole_413 : float (1,3 Quadrupole component)
- multipole_423 : float (2,3 Quadrupole component)
- multipole_433 : float (3,3 Quadrupole component)
- polarizability : float (dipole polarizability)
- vdw_parent : int (index of the vdW parent atom of this atom)
- segid : segment ID (similar to chain, but for CHARMM)
- U11 : float (U[1][1] of anisotropic b-factor tensor)
- U22 : float (U[2][2] of anisotropic b-factor tensor)
- U33 : float (U[3][3] of anisotropic b-factor tensor)
- U12 : float (U[1][2] of anisotropic b-factor tensor)
- U13 : float (U[1][3] of anisotropic b-factor tensor)
- U23 : float (U[2][3] of anisotropic b-factor tensor)
The resname, resid, chain, and resnum attributes are ignored. Other
attributes emit a ParameterWarning
"""
atoms = obj.atoms
def set_attribute(attr, data):
""" Set the attribute list from the data """
if len(data) != len(atoms):
raise ValueError('Data does not match length of atoms list')
for atom, x in zip(atoms, data):
setattr(atom, attr, x)
def set_residue_attr(attr, data):
if len(data) != len(atoms):
raise ValueError('Data does not match length of atoms list')
for atom, x in zip(atoms, data):
setattr(atom.residue, attr, x)
multipoles = [None for i in range(10)]
anisous = [None for i in range(6)]
for key, data in iteritems(dataframe):
if key == 'number':
set_attribute('number', data)
elif key == 'name':
set_attribute('name', data)
elif key == 'type':
set_attribute('type', data)
elif key == 'atomic_number':
set_attribute('atomic_number', data)
elif key == 'charge':
set_attribute('charge', data)
elif key == 'mass':
set_attribute('mass', data)
elif key == 'nb_idx':
set_attribute('nb_idx', data)
elif key == 'solvent_radius':
set_attribute('solvent_radius', data)
elif key == 'screen':
set_attribute('screen', data)
elif key == 'occupancy':
set_attribute('occupancy', data)
elif key == 'bfactor':
set_attribute('bfactor', data)
elif key == 'altloc':
set_attribute('altloc', data)
elif key == 'tree':
set_attribute('tree', data)
elif key == 'join':
set_attribute('join', data)
elif key == 'irotat':
set_attribute('irotat', data)
elif key == 'rmin':
set_attribute('rmin', data)
elif key == 'epsilon':
set_attribute('epsilon', data)
elif key == 'rmin_14':
set_attribute('rmin_14', data)
elif key == 'epsilon_14':
set_attribute('epsilon_14', data)
elif key == 'xx':
set_attribute('xx', data)
elif key == 'xy':
set_attribute('xy', data)
elif key == 'xz':
set_attribute('xz', data)
elif key == 'vx':
set_attribute('vx', data)
elif key == 'vy':
set_attribute('vy', data)
elif key == 'vz':
set_attribute('vz', data)
elif key == 'type_idx':
set_attribute('type_idx', data)
elif key == 'class_idx':
set_attribute('class_idx', data)
elif key == 'vdw_parent':
if len(data) != len(atoms):
raise ValueError('vdw_parent length not equal to natom')
for atom, parent in zip(atoms, data):
atom.vdw_parent = atoms[parent]
elif key == 'polarizability':
set_attribute('polarizability', data)
elif key == 'multipole_111':
multipoles[0] = data
elif key == 'multipole_211':
multipoles[1] = data
elif key == 'multipole_212':
multipoles[2] = data
elif key == 'multipole_213':
multipoles[3] = data
elif key == 'multipole_411':
multipoles[4] = data
elif key == 'multipole_412':
multipoles[5] = data
elif key == 'multipole_422':
multipoles[6] = data
elif key == 'multipole_413':
multipoles[7] = data
elif key == 'multipole_423':
multipoles[8] = data
elif key == 'multipole_433':
multipoles[9] = data
elif key == 'U11':
anisous[0] = data
elif key == 'U22':
anisous[1] = data
elif key == 'U33':
anisous[2] = data
elif key == 'U12':
anisous[3] = data
elif key == 'U13':
anisous[4] = data
elif key == 'U23':
anisous[5] = data
elif key in ('resname', 'resid', 'resnum', 'chain', 'segid'):
set_residue_attr(key, data)
continue
else:
warnings.warn('Atomic property %s not recognized' % key,
ParameterWarning)
# Now combine the multipoles and anisous if they are all specified
if not None in anisous:
set_attribute('anisou', np.vstack(anisous).T)
if not None in multipoles:
set_attribute('multipoles', np.vstack(multipoles).T)