Source code for idpet.data.topology

"""
Work with topologies of protein systems.
"""

import mdtraj
import numpy as np


aa_three_letters = ["GLN", "TRP", "GLU", "ARG", "THR",
                    "TYR", "ILE", "PRO", "ALA", "SER",
                    "ASP", "PHE", "GLY", "HIS", "LYS",
                    "LEU", "CYS", "VAL", "ASN", "MET"]

aa_one_to_three_dict = {
    "G": "GLY", "A": "ALA", "L": "LEU", "I": "ILE", "R": "ARG", "K": "LYS",
    "M": "MET", "C": "CYS", "Y": "TYR", "T": "THR", "P": "PRO", "S": "SER",
    "W": "TRP", "D": "ASP", "E": "GLU", "N": "ASN", "Q": "GLN", "F": "PHE",
    "H": "HIS", "V": "VAL", "X": "UNK"
}

[docs] def get_ca_topology(sequence: str, bead_name: str = "CA") -> mdtraj.Topology: """ input: amino acid sequence. output: a mdtraj topology with one bead per residue. """ topology = mdtraj.Topology() chain = topology.add_chain() for res in sequence: res_obj = topology.add_residue(aa_one_to_three_dict[res], chain) topology.add_atom(bead_name, mdtraj.core.topology.elem.carbon, res_obj) return topology
[docs] def slice_traj_to_com(traj: mdtraj.Trajectory) -> mdtraj.Trajectory: ha_ids = [a.index for a in traj.topology.atoms if \ a.residue.name in aa_three_letters and \ a.element.symbol != "H"] ha_traj = traj.atom_slice(ha_ids) residues = list(ha_traj.topology.residues) com_xyz = np.zeros((ha_traj.xyz.shape[0], len(residues), 3)) for i, residue_i in enumerate(residues): ha_ids_i = [a.index for a in residue_i.atoms] masses_i = np.array([a.element.mass for a in residue_i.atoms]) masses_i = masses_i[None,:,None] tot_mass_i = masses_i.sum() com_xyz_i = np.sum(ha_traj.xyz[:,ha_ids_i,:]*masses_i, axis=1)/tot_mass_i com_xyz[:,i,:] = com_xyz_i return mdtraj.Trajectory( xyz=com_xyz, topology=get_ca_topology( sequence="".join([r.code for r in ha_traj.topology.residues]) ))