Source code for parmed.residue

"""
This module contains basic information and functionality related to individual
residues in typical biopolymers.
"""

__all__ = ['AminoAcidResidue', 'RNAResidue', 'DNAResidue', 'ALA', 'ARG', 'ASN',
           'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'HYP', 'ILE', 'LEU', 'LYS',
           'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', 'DA', 'DT',
           'DG', 'DC', 'A', 'U', 'G', 'C', 'SOLVENT_NAMES', 'EXTRA_POINT_NAMES',
           'CATION_NAMES', 'ANION_NAMES', 'ALLION_NAMES']

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

class BiomolecularResidue(object):
    """ Base class for different classes of biopolymer residues """
    _all_residues_by_name = dict()
    _all_residues_by_abbr = dict()
    _all_residues_by_symbol = dict()
    all_residues = []

    def __init_(self, *args, **kwargs):
        raise NotImplementedError('BiomolecularResidue must be subclassed')

    @classmethod
    def get(cls, key):
        raise NotImplementedError('BiomolecularResidue must be subclassed')

    def __str__(self):
        return self.name

    @classmethod
    def has(cls, thing):
        """
        Determines if a particular BiomolecularResidue or residue name is
        present in this classification of biomolecular residues

        Parameters
        ----------
        thing : str or :class:`BiomolecularResidue`

        Returns
        -------
        contains : bool
            If the residue or residue name *is* of this type, True. Otherwise,
            False.
        """
        if isinstance(thing, BiomolecularResidue):
            return thing in cls.all_residues
        try:
            cls.get(thing)
        except KeyError:
            return False
        else:
            return True

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

[docs]class AminoAcidResidue(BiomolecularResidue):
    """
    An individual amino acid residue.

    Parameters
    ----------
    name : str
        The name of the residue
    abbr : str
        The 3-letter abbreviation of the amino acid residue
    symbol : str
        The 1-letter symbol of the amino acid
    aliases : list of str, optional
        A list of other abbreviations that *also* refer to this residue

    Raises
    ------
    ValueError
        If any aliases have the same abbreviation as *other*
    """
    _all_residues_by_name = dict()
    _all_residues_by_abbr = dict()
    _all_residues_by_symbol = dict()
    all_residues = []

    def __init__(self, name, abbr, symbol, aliases=None):
        self.name = name
        self.abbr = abbr
        self.symbol = symbol
        type(self)._all_residues_by_name[name.upper()] = self
        type(self)._all_residues_by_abbr[abbr.upper()] = self
        if symbol is not None:
            type(self)._all_residues_by_symbol[symbol.upper()] = self
        type(self).all_residues.append(self)
        if aliases is not None:
            for alias in aliases:
                alias = alias.upper()
                if alias in type(self)._all_residues_by_abbr:
                    raise ValueError('%s is already an abbreviation' % alias)
                type(self)._all_residues_by_abbr[alias] = self

    def __repr__(self):
        return '<Amino Acid Residue %s: %s [%s]>' % (self.name, self.abbr,
                self.symbol)

[docs]    @classmethod
    def get(cls, key, abbronly=False):
        """
        Gets the amino acid corresponding to either the residue name, 3-letter
        abbreviation or 1-letter symbol. It is case-insensitive.

        Parameters
        ----------
        key : str
            1-letter symbol, 3-letter abbreviation, or residue name
        abbronly : bool
            If True, only look for the 3-letter abbreviation (not the 1-letter
            symbol)

        Returns
        -------
        residue : :class:`AminoAcidResidue`
            The residue corresponding to the given key

        Raises
        ------
        KeyError if ``key`` is not a symbol, abbreviation, or case-insensitive
        name of an amino acid residue, or any of its abbreviations.
        """
        if len(key) == 1 and not abbronly:
            return cls._all_residues_by_symbol[key.upper()]
        if len(key) == 3:
            return cls._all_residues_by_abbr[key.upper()]
        # Handle C- and N-termini that may be prepended with C or N
        if len(key) == 4 and key[0].upper() in 'CN':
            return cls._all_residues_by_abbr[key[1:].upper()]
        return cls._all_residues_by_name[key.upper()]

ALA = AminoAcidResidue('Alanine', 'ALA', 'A')
ARG = AminoAcidResidue('Arginine', 'ARG', 'R')
ASN = AminoAcidResidue('Asparagine', 'ASN', 'N')
ASP = AminoAcidResidue('Aspartate' ,'ASP', 'D', ['ASH', 'AS4'])
CYS = AminoAcidResidue('Cysteine', 'CYS', 'C', ['CYM', 'CYX'])
GLU = AminoAcidResidue('Glutamate', 'GLU', 'E', ['GLH', 'GL4'])
GLN = AminoAcidResidue('Glutamine', 'GLN', 'Q')
GLY = AminoAcidResidue('Glycine', 'GLY', 'G')
HIS = AminoAcidResidue('Histidine', 'HIS', 'H', ['HIP', 'HIE', 'HID'])
HYP = AminoAcidResidue('Hydroxyproline', 'HYP', None)
ILE = AminoAcidResidue('Isoleucine', 'ILE', 'I')
LEU = AminoAcidResidue('Leucine', 'LEU', 'L')
LYS = AminoAcidResidue('Lysine', 'LYS', 'K', ['LYN'])
MET = AminoAcidResidue('Methionine', 'MET', 'M')
PHE = AminoAcidResidue('Phenylalanine', 'PHE', 'F')
PRO = AminoAcidResidue('Proline', 'PRO', 'P')
SER = AminoAcidResidue('Serine', 'SER', 'S')
THR = AminoAcidResidue('Threonine', 'THR', 'T')
TRP = AminoAcidResidue('Tryptophan', 'TRP', 'W')
TYR = AminoAcidResidue('Tyrosine', 'TYR', 'Y')
VAL = AminoAcidResidue('Valine', 'VAL', 'V')

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

[docs]class DNAResidue(BiomolecularResidue):
    """ An individual DNA residue

    Parameters
    ----------
    name : str
        The name of the residue
    abbr : str
        The abbreviation of the nucleic acid residue
    aliases : list of str, optional
        A list of other abbreviations that *also* refer to this residue
    """
    _all_residues_by_name = dict()
    _all_residues_by_abbr = dict()
    _all_residues_by_symbol = dict()
    all_residues = []

    def __init__(self, name, abbr, aliases=None):
        self.name = name
        self.abbr = abbr
        type(self)._all_residues_by_name[name.upper()] = self
        type(self)._all_residues_by_abbr[abbr.upper()] = self
        type(self).all_residues.append(self)
        if aliases is not None:
            for alias in aliases:
                alias = alias.upper()
                if alias in type(self)._all_residues_by_abbr:
                    raise ValueError('%s is already an abbreviation' % alias)
                type(self)._all_residues_by_abbr[alias] = self

    def __repr__(self):
        return '<DNA Residue %s: %s>' % (self.name, self.abbr)

[docs]    @classmethod
    def get(cls, key):
        """
        Gets the nucleic acid corresponding to either the residue name or
        abbreviation. It is case-insensitive.

        Parameters
        ----------
        key : str
            abbreviation or residue name

        Returns
        -------
        residue : :class:`DNAResidue`
            The residue corresponding to the given key

        Raises
        ------
        KeyError if ``key`` is not a recognized residue name or abbreviation for
        an DNA residue.
        """
        try:
            if key[-1] in '35':
                return cls._all_residues_by_abbr[key[:-1].upper()]
            return cls._all_residues_by_abbr[key.upper()]
        except KeyError:
            return cls._all_residues_by_name[key.upper()]

[docs]class RNAResidue(DNAResidue):
    """ An individual RNA residue

    Parameters
    ----------
    name : str
        The name of the residue
    abbr : str
        The abbreviation of the nucleic acid residue
    aliases : list of str, optional
        A list of other abbreviations that *also* refer to this residue
    """
    _all_residues_by_name = dict()
    _all_residues_by_abbr = dict()
    _all_residues_by_symbol = dict()
    all_residues = []

    def __repr__(self):
        return '<RNA Residue %s: %s>' % (self.name, self.abbr)

[docs]    @classmethod
    def get(cls, key):
        """
        Gets the nucleic acid corresponding to either the residue name or
        abbreviation. It is case-insensitive.

        Parameters
        ----------
        key : str
            abbreviation or residue name

        Returns
        -------
        residue : :class:`RNAResidue`
            The residue corresponding to the given key

        Raises
        ------
        KeyError if ``key`` is not a recognized residue name or abbreviation for
        an RNA residue.
        """
        try:
            if key[-1] in '35':
                return cls._all_residues_by_abbr[key[:-1].upper()]
            return cls._all_residues_by_abbr[key.upper()]
        except KeyError:
            return cls._all_residues_by_name[key.upper()]

DG = DNAResidue('Guanine', 'DG', ['GUA', 'DG5', 'DG3', 'DGN'])
DC = DNAResidue('Cytosine', 'DC', ['CYT', 'DC5', 'DC3', 'DCN', 'DCP'])
DA = DNAResidue('Adenine', 'DA', ['ADE', 'DA5', 'DA3', 'DAN', 'DAP'])
DT = DNAResidue('Thymine', 'DT', ['THY', 'DT5', 'DT3'])
G = RNAResidue('Guanine', 'G', ['GUA', 'G5', 'G3', 'GN', 'RG', 'RG3', 'RG5',
                                'RGN', 'GF2', 'M2G', 'YYG', '7MG', 'OMG',
                                '2MG',])
C = RNAResidue('Cytosine', 'C', ['CYT', 'CP', 'C5', 'C3', 'CN', 'RC', 'RC5',
                                 'RC3', 'RCN', 'CFZ', '5MC', 'OMC',])
A = RNAResidue('Adenine', 'A', ['ADE', 'AP', 'A5', 'A3', 'AN',
                                'RA', 'RA3', 'RA5', 'AF2', '1MA'])
U = RNAResidue('Uracil', 'U', ['URA', 'U3', 'U5', 'UN', 'RU', 'RU3', 'RU5',
                               'RUN', 'UFT', '5MU', 'H2U', 'PSU',])
T = RNAResidue('Thymine', 'T', ['THY', 'T3', 'T5', 'TN',
                                'RT', 'RT3', 'RT5', 'RTN'])

WATER_NAMES = {'WAT', 'HOH', 'TIP3', 'TIP4', 'TIP5', 'SPCE', 'SPC'}
SOLVENT_NAMES = WATER_NAMES | {'SOL'}
EXTRA_POINT_NAMES = {'EP', 'LP'}
CATION_NAMES = {'Na+', 'Li+', 'Mg+', 'Rb+', 'MG', 'Cs+', 'POT', 'SOD', 'MG2',
                'CAL', 'RUB', 'LIT', 'ZN2', 'CD2', 'NA', 'K+', 'K', 'NA+'}
ANION_NAMES = {'Cl-', 'Br-', 'F-', 'I-', 'CLA', 'CL', 'BR', 'CL-'}
ALLION_NAMES = CATION_NAMES | ANION_NAMES