Source code for idpet.featurization.distances

from typing import List, Tuple, Union
import numpy as np
import mdtraj
from idpet.data.topology import slice_traj_to_com
from idpet.featurization.utils import get_max_sep


#--------------------------------------------------------------------
# Calculate (N, L, L) distance maps. Mostly used for visualization. -
#--------------------------------------------------------------------

ca_selector = "protein and name CA"

def _calc_dmap(traj: mdtraj.Trajectory, 
               min_sep: int = 1, 
               max_sep: Union[int, None] = None) -> np.ndarray:
    ca_ids = traj.topology.select(ca_selector)
    ca_xyz = traj.xyz[:, ca_ids]  # shape: (n_frames, n_res, 3)
    n_res = ca_xyz.shape[1]

    # Compute full pairwise distance map
    dmap = np.sqrt(
        np.sum((ca_xyz[:, :, None, :] - ca_xyz[:, None, :, :]) ** 2, axis=3)
    )  # shape: (n_frames, n_res, n_res)

    # Compute sequence separation mask
    seq_sep = np.abs(np.arange(n_res)[:, None] - np.arange(n_res)[None, :])
    mask = seq_sep >= min_sep
    if max_sep is not None:
        mask &= seq_sep <= max_sep

    # Apply mask: set distances outside the range to NaN
    dmap[:, ~mask] = np.nan

    return dmap

[docs] def calc_ca_dmap(traj: mdtraj.Trajectory, min_sep: int = 1, max_sep: Union[int, None] = None) -> np.ndarray: """ Calculate the (N, L, L) distance maps between C-alpha atoms for visualization. Parameters ---------- traj : mdtraj.Trajectory The MDtraj trajectory object. Returns ------- dmap : numpy.ndarray The distance maps of shape (N, L, L), where N is the number of frames and L is the number of C-alpha atoms. Notes ----- This function calculates the distance maps between C-alpha atoms for visualization purposes. """ return _calc_dmap(traj=traj, min_sep=min_sep, max_sep=max_sep)
[docs] def calc_com_dmap(traj: mdtraj.Trajectory, min_sep: int = 1, max_sep: Union[int, None] = None) -> np.ndarray: """ Calculate the (N, L, L) distance maps between center of mass (COM) atoms for visualization. Parameters ---------- traj : mdtraj.Trajectory The MDtraj trajectory object. Returns ------- dmap : numpy.ndarray The distance maps of shape (N, L, L), where N is the number of frames and L is the number of center of mass (COM) atoms. Notes ----- This function calculates the distance maps between center of mass (COM) atoms for visualization purposes. """ traj = slice_traj_to_com(traj) return _calc_dmap(traj=traj, min_sep=min_sep, max_sep=max_sep)
#--------------------------------------------------------------------- # Calculate (N, *) distance features. Mostly used for featurization. - #--------------------------------------------------------------------- def _featurize_dist( traj: mdtraj.Trajectory, min_sep: int = 2, max_sep: Union[None, int, float] = None, inverse: bool = False, get_names: bool = True, atom_selector: str = "name == CA" ): # Get all C-alpha indices. ca_ids = traj.topology.select(atom_selector) atoms = list(traj.topology.atoms) max_sep = get_max_sep(L=len(atoms), max_sep=max_sep) # Get all pair of ids. pair_ids = [] names = [] for i, id_i in enumerate(ca_ids): for j, id_j in enumerate(ca_ids): if j - i >= min_sep: if j - i > max_sep: continue pair_ids.append([id_i, id_j]) if get_names: names.append( f"{repr(atoms[id_i].residue)}-{repr(atoms[id_j].residue)}" ) # Calculate C-alpha - C-alpha distances. ca_dist = mdtraj.compute_distances(traj=traj, atom_pairs=pair_ids) if inverse: ca_dist = 1 / ca_dist if get_names: return ca_dist, names else: return ca_dist
[docs] def featurize_ca_dist( traj: mdtraj.Trajectory, get_names: bool = True, atom_selector: str = "name CA", *args, **kwargs) -> Union[np.ndarray, Tuple[np.ndarray, List[str]]]: """ Calculate C-alpha distances between pairs of residues. Parameters ---------- traj : mdtraj.Trajectory The MDtraj trajectory object. min_sep : int, optional The minimum sequence separation for distance calculations. Default is 2. max_sep : int or None, optional The maximum sequence separation for distance calculations. Default is None. inverse : bool, optional Whether to calculate inverse distances. Default is False. get_names : bool, optional Whether to return the names of the calculated features. Default is True. atom_selector : str, optional The atom selection string. Default is "name CA". Returns ------- distances : numpy.ndarray or Tuple The calculated C-alpha distances. If get_names is True, returns a tuple containing distances and corresponding feature names. Notes ----- This function calculates C-alpha distances between pairs of residues. """ return _featurize_dist(traj=traj, get_names=get_names, atom_selector=atom_selector, *args, **kwargs)
[docs] def featurize_com_dist( traj: mdtraj.Trajectory, min_sep: int = 2, max_sep: int = None, inverse: bool = False, get_names: bool = True, atom_selector: str = "name == CA") -> Union[np.ndarray, Tuple[np.ndarray, List[str]]]: """ Calculate center of mass (COM) distances between pairs of residues. Parameters ---------- traj : mdtraj.Trajectory The MDtraj trajectory object. min_sep : int, optional The minimum sequence separation for distance calculations. Default is 2. max_sep : int or None, optional The maximum sequence separation for distance calculations. Default is None. inverse : bool, optional Whether to calculate inverse distances. Default is False. get_names : bool, optional Whether to return the names of the calculated features. Default is True. atom_selector : str, optional The atom selection string. Default is "name == CA". Returns ------- distances : numpy.ndarray or Tuple The calculated center of mass (COM) distances. If get_names is True, returns a tuple containing distances and corresponding feature names. Notes ----- This function calculates center of mass (COM) distances between pairs of residues. """ traj = slice_traj_to_com(traj) return _featurize_dist(traj=traj, min_sep=min_sep, max_sep=max_sep, inverse=inverse, get_names=get_names, atom_selector=atom_selector)
[docs] def rmsd( traj: mdtraj.Trajectory, ): rmsd_matrix = np.empty((traj.n_frames, traj.n_frames)) for i in range(traj.n_frames): rmsd_matrix[i] = mdtraj.rmsd(traj,traj, i) return rmsd_matrix