Source code for parmed.residue

"""
This module contains basic information and functionality related to individual
residues in typical biopolymers.
"""

__all__ = ['AminoAcidResidue', 'RNAResidue', 'DNAResidue', 'ALA', 'ARG', 'ASN',
           'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'HYP', 'ILE', 'LEU', 'LYS',
           'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', 'DA', 'DT',
           'DG', 'DC', 'A', 'U', 'G', 'C', 'SOLVENT_NAMES', 'EXTRA_POINT_NAMES',
           'CATION_NAMES', 'ANION_NAMES', 'ALLION_NAMES']

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

class BiomolecularResidue(object):
    """ Base class for different classes of biopolymer residues """
    _all_residues_by_name = dict()
    _all_residues_by_abbr = dict()
    _all_residues_by_symbol = dict()
    all_residues = []

    def __init_(self, *args, **kwargs):
        raise NotImplementedError('BiomolecularResidue must be subclassed')

    @classmethod
    def get(cls, key):
        raise NotImplementedError('BiomolecularResidue must be subclassed')

    def __str__(self):
        return self.name

    @classmethod
    def has(cls, thing):
        """
        Determines if a particular BiomolecularResidue or residue name is
        present in this classification of biomolecular residues

        Parameters
        ----------
        thing : str or :class:`BiomolecularResidue`

        Returns
        -------
        contains : bool
            If the residue or residue name *is* of this type, True. Otherwise,
            False.
        """
        if isinstance(thing, BiomolecularResidue):
            return thing in cls.all_residues
        try:
            cls.get(thing)
        except KeyError:
            return False
        else:
            return True

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

[docs]class AminoAcidResidue(BiomolecularResidue): """ An individual amino acid residue. Parameters ---------- name : str The name of the residue abbr : str The 3-letter abbreviation of the amino acid residue symbol : str The 1-letter symbol of the amino acid aliases : list of str, optional A list of other abbreviations that *also* refer to this residue Raises ------ ValueError If any aliases have the same abbreviation as *other* """ _all_residues_by_name = dict() _all_residues_by_abbr = dict() _all_residues_by_symbol = dict() all_residues = [] def __init__(self, name, abbr, symbol, aliases=None): self.name = name self.abbr = abbr self.symbol = symbol type(self)._all_residues_by_name[name.upper()] = self type(self)._all_residues_by_abbr[abbr.upper()] = self if symbol is not None: type(self)._all_residues_by_symbol[symbol.upper()] = self type(self).all_residues.append(self) if aliases is not None: for alias in aliases: alias = alias.upper() if alias in type(self)._all_residues_by_abbr: raise ValueError('%s is already an abbreviation' % alias) type(self)._all_residues_by_abbr[alias] = self def __repr__(self): return '<Amino Acid Residue %s: %s [%s]>' % (self.name, self.abbr, self.symbol)
[docs] @classmethod def get(cls, key, abbronly=False): """ Gets the amino acid corresponding to either the residue name, 3-letter abbreviation or 1-letter symbol. It is case-insensitive. Parameters ---------- key : str 1-letter symbol, 3-letter abbreviation, or residue name abbronly : bool If True, only look for the 3-letter abbreviation (not the 1-letter symbol) Returns ------- residue : :class:`AminoAcidResidue` The residue corresponding to the given key Raises ------ KeyError if ``key`` is not a symbol, abbreviation, or case-insensitive name of an amino acid residue, or any of its abbreviations. """ if len(key) == 1 and not abbronly: return cls._all_residues_by_symbol[key.upper()] if len(key) == 3: return cls._all_residues_by_abbr[key.upper()] # Handle C- and N-termini that may be prepended with C or N if len(key) == 4 and key[0].upper() in 'CN': return cls._all_residues_by_abbr[key[1:].upper()] return cls._all_residues_by_name[key.upper()]
ALA = AminoAcidResidue('Alanine', 'ALA', 'A') ARG = AminoAcidResidue('Arginine', 'ARG', 'R') ASN = AminoAcidResidue('Asparagine', 'ASN', 'N') ASP = AminoAcidResidue('Aspartate' ,'ASP', 'D', ['ASH', 'AS4']) CYS = AminoAcidResidue('Cysteine', 'CYS', 'C', ['CYM', 'CYX']) GLU = AminoAcidResidue('Glutamate', 'GLU', 'E', ['GLH', 'GL4']) GLN = AminoAcidResidue('Glutamine', 'GLN', 'Q') GLY = AminoAcidResidue('Glycine', 'GLY', 'G') HIS = AminoAcidResidue('Histidine', 'HIS', 'H', ['HIP', 'HIE', 'HID']) HYP = AminoAcidResidue('Hydroxyproline', 'HYP', None) ILE = AminoAcidResidue('Isoleucine', 'ILE', 'I') LEU = AminoAcidResidue('Leucine', 'LEU', 'L') LYS = AminoAcidResidue('Lysine', 'LYS', 'K', ['LYN']) MET = AminoAcidResidue('Methionine', 'MET', 'M') PHE = AminoAcidResidue('Phenylalanine', 'PHE', 'F') PRO = AminoAcidResidue('Proline', 'PRO', 'P') SER = AminoAcidResidue('Serine', 'SER', 'S') THR = AminoAcidResidue('Threonine', 'THR', 'T') TRP = AminoAcidResidue('Tryptophan', 'TRP', 'W') TYR = AminoAcidResidue('Tyrosine', 'TYR', 'Y') VAL = AminoAcidResidue('Valine', 'VAL', 'V') # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[docs]class DNAResidue(BiomolecularResidue): """ An individual DNA residue Parameters ---------- name : str The name of the residue abbr : str The abbreviation of the nucleic acid residue aliases : list of str, optional A list of other abbreviations that *also* refer to this residue """ _all_residues_by_name = dict() _all_residues_by_abbr = dict() _all_residues_by_symbol = dict() all_residues = [] def __init__(self, name, abbr, aliases=None): self.name = name self.abbr = abbr type(self)._all_residues_by_name[name.upper()] = self type(self)._all_residues_by_abbr[abbr.upper()] = self type(self).all_residues.append(self) if aliases is not None: for alias in aliases: alias = alias.upper() if alias in type(self)._all_residues_by_abbr: raise ValueError('%s is already an abbreviation' % alias) type(self)._all_residues_by_abbr[alias] = self def __repr__(self): return '<DNA Residue %s: %s>' % (self.name, self.abbr)
[docs] @classmethod def get(cls, key): """ Gets the nucleic acid corresponding to either the residue name or abbreviation. It is case-insensitive. Parameters ---------- key : str abbreviation or residue name Returns ------- residue : :class:`DNAResidue` The residue corresponding to the given key Raises ------ KeyError if ``key`` is not a recognized residue name or abbreviation for an DNA residue. """ try: if key[-1] in '35': return cls._all_residues_by_abbr[key[:-1].upper()] return cls._all_residues_by_abbr[key.upper()] except KeyError: return cls._all_residues_by_name[key.upper()]
[docs]class RNAResidue(DNAResidue): """ An individual RNA residue Parameters ---------- name : str The name of the residue abbr : str The abbreviation of the nucleic acid residue aliases : list of str, optional A list of other abbreviations that *also* refer to this residue """ _all_residues_by_name = dict() _all_residues_by_abbr = dict() _all_residues_by_symbol = dict() all_residues = [] def __repr__(self): return '<RNA Residue %s: %s>' % (self.name, self.abbr)
[docs] @classmethod def get(cls, key): """ Gets the nucleic acid corresponding to either the residue name or abbreviation. It is case-insensitive. Parameters ---------- key : str abbreviation or residue name Returns ------- residue : :class:`RNAResidue` The residue corresponding to the given key Raises ------ KeyError if ``key`` is not a recognized residue name or abbreviation for an RNA residue. """ try: if key[-1] in '35': return cls._all_residues_by_abbr[key[:-1].upper()] return cls._all_residues_by_abbr[key.upper()] except KeyError: return cls._all_residues_by_name[key.upper()]
DG = DNAResidue('Guanine', 'DG', ['GUA', 'DG5', 'DG3', 'DGN']) DC = DNAResidue('Cytosine', 'DC', ['CYT', 'DC5', 'DC3', 'DCN', 'DCP']) DA = DNAResidue('Adenine', 'DA', ['ADE', 'DA5', 'DA3', 'DAN', 'DAP']) DT = DNAResidue('Thymine', 'DT', ['THY', 'DT5', 'DT3']) G = RNAResidue('Guanine', 'G', ['GUA', 'G5', 'G3', 'GN', 'RG', 'RG3', 'RG5', 'RGN', 'GF2', 'M2G', 'YYG', '7MG', 'OMG', '2MG',]) C = RNAResidue('Cytosine', 'C', ['CYT', 'CP', 'C5', 'C3', 'CN', 'RC', 'RC5', 'RC3', 'RCN', 'CFZ', '5MC', 'OMC',]) A = RNAResidue('Adenine', 'A', ['ADE', 'AP', 'A5', 'A3', 'AN', 'RA', 'RA3', 'RA5', 'AF2', '1MA']) U = RNAResidue('Uracil', 'U', ['URA', 'U3', 'U5', 'UN', 'RU', 'RU3', 'RU5', 'RUN', 'UFT', '5MU', 'H2U', 'PSU',]) T = RNAResidue('Thymine', 'T', ['THY', 'T3', 'T5', 'TN', 'RT', 'RT3', 'RT5', 'RTN']) WATER_NAMES = {'WAT', 'HOH', 'TIP3', 'TIP4', 'TIP5', 'SPCE', 'SPC'} SOLVENT_NAMES = WATER_NAMES | {'SOL'} EXTRA_POINT_NAMES = {'EP', 'LP'} CATION_NAMES = {'Na+', 'Li+', 'Mg+', 'Rb+', 'MG', 'Cs+', 'POT', 'SOD', 'MG2', 'CAL', 'RUB', 'LIT', 'ZN2', 'CD2', 'NA', 'K+', 'K', 'NA+'} ANION_NAMES = {'Cl-', 'Br-', 'F-', 'I-', 'CLA', 'CL', 'BR', 'CL-'} ALLION_NAMES = CATION_NAMES | ANION_NAMES