Source code for idpet.coord

import numpy as np
import mdtraj


[docs]
def get_distance_matrix(xyz):
    """
    Gets an ensemble of xyz conformations with shape (N, L, 3) and
    returns the corresponding distance matrices with shape (N, L, L).
    """
    return np.sqrt(np.sum(np.square(xyz[:,None,:,:]-xyz[:,:,None,:]), axis=3))



[docs]
def get_contact_map(dmap, threshold=0.8, pseudo_count=0.01):
    """
    Gets a trajectory of distance maps with shape (N, L, L) and
    returns a (L, L) contact probability map.
    """
    n = dmap.shape[0]
    cmap = ((dmap <= threshold).astype(int).sum(axis=0)+pseudo_count)/n
    return cmap




[docs]
def calc_chain_dihedrals(xyz, norm=False):
    # not sure where this function is used
    r_sel = xyz
    b0 = -(r_sel[:,1:-2,:] - r_sel[:,0:-3,:])
    b1 = r_sel[:,2:-1,:] - r_sel[:,1:-2,:]
    b2 = r_sel[:,3:,:] - r_sel[:,2:-1,:]
    b0xb1 = np.cross(b0, b1)
    b1xb2 = np.cross(b2, b1)
    b0xb1_x_b1xb2 = np.cross(b0xb1, b1xb2)
    y = np.sum(b0xb1_x_b1xb2*b1, axis=2)*(1.0/np.linalg.norm(b1, dim=2))
    x = np.sum(b0xb1*b1xb2, axis=2)
    dh_vals = np.atan2(y, x)
    if not norm:
        return dh_vals
    else:
        return dh_vals/np.pi

    


[docs]
def create_consecutive_indices_matrix(ca_indices):

    """This function gets the CA indices of (L,) shape and 
    create all possible 4 consecutive indices with the shape (L-3, 4) """

    n = len(ca_indices)
    if n < 4:
        raise ValueError("Input array must contain at least 4 indices.")

    # Calculate the number of rows in the resulting matrix
    num_rows = n - 3

    # Create an empty matrix to store the consecutive indices
    consecutive_indices_matrix = np.zeros((num_rows, 4), dtype=int)

    # Fill the matrix with consecutive indices
    for i in range(num_rows):
        consecutive_indices_matrix[i] = ca_indices[i:i+4]

    return consecutive_indices_matrix



[docs]
def contact_probability_map(traj, scheme='ca', contact='all', threshold=0.8):
    cmap_out = mdtraj.compute_contacts(traj, contacts=contact, scheme=scheme)
    distances, res_pair = cmap_out
    contact_distance = mdtraj.geometry.squareform(distances, res_pair)
    # Get the number of distances below `threshold` and divide by number of
    # conformers.
    matrix_prob_avg = np.mean(contact_distance < threshold, axis=0)
    return matrix_prob_avg



[docs]
def dict_phi_psi_normal_cases(dict_phi_psi):
    dict_phi_psi_normal_case={}
    for key in dict_phi_psi.keys():
        array_phi=dict_phi_psi[key][0][:,:-1]
        array_psi=dict_phi_psi[key][1][:,1:]
        dict_phi_psi_normal_case[key]=[array_phi,array_psi]
    return dict_phi_psi_normal_case



[docs]
def split_dictionary_phipsiangles(features_dict):
    dict_phi_psi={}
    for ens_code, features in features_dict.items():
        num_columns=len(features[0])
        split_index=num_columns//2
        phi_list=features[:,:split_index]
        psi_list=features[:,split_index:]
        dict_phi_psi[ens_code]=[phi_list,psi_list]
    return dict_phi_psi



[docs]
def ss_measure_disorder(features_dict:dict):
    
    """This function accepts the dictionary of phi-psi arrays
    which is saved in featurized_data attribute and as an output provide
    flexibility parameter for each residue in the ensemble
    Note: this function only works on phi/psi feature """

    f = {}
    R_square_dict = {}

    for key in dict_phi_psi_normal_cases(split_dictionary_phipsiangles(features_dict)).keys():
        Rsquare_phi = []
        Rsquare_psi = []

        phi_array = dict_phi_psi_normal_cases(split_dictionary_phipsiangles(features_dict))[key][0]
        psi_array = dict_phi_psi_normal_cases(split_dictionary_phipsiangles(features_dict))[key][1]
        if isinstance(phi_array, np.ndarray) and phi_array.ndim == 2:
            for i in range(phi_array.shape[1]):
                Rsquare_phi.append(round(np.square(np.sum(np.fromiter(((1 / phi_array.shape[0]) * np.cos(phi_array[c][i]) for c in range(phi_array.shape[0])), dtype=float))) + \
                        np.square(np.sum(np.fromiter(((1 / phi_array.shape[0]) * np.sin(phi_array[c][i]) for c in range(phi_array.shape[0])), dtype=float))),5))

        if isinstance(psi_array, np.ndarray) and psi_array.ndim == 2:
                for j in range(psi_array.shape[1]):
                    Rsquare_psi.append(round(np.square(np.sum(np.fromiter(((1 / psi_array.shape[0]) * np.cos(psi_array[c][j]) for c in range(psi_array.shape[0])), dtype=float))) + \
                          np.square(np.sum(np.fromiter(((1 / psi_array.shape[0]) * np.sin(psi_array[c][j]) for c in range(psi_array.shape[0])), dtype=float))),5))


        R_square_dict[key] = [Rsquare_phi, Rsquare_psi]

    for k in R_square_dict.keys():
        f_i=[]
        for z in range(len(R_square_dict[k][0])):
            f_i.append(round(1 - (1/2 * np.sqrt(R_square_dict[k][0][z])) - (1/2 * np.sqrt(R_square_dict[k][1][z])),5))
            f[k]=f_i
    return f




[docs]
def site_specific_order_parameter(ca_xyz_dict: dict) -> dict: 
    """
    Computes site-specific order parameters for a set of protein conformations.
    Parameters:
        ca_xyz_dict (dict): A dictionary where keys represent unique identifiers for proteins,
        and values are 3D arrays containing the coordinates of alpha-carbon (CA) atoms for different conformations of the protein.
    Returns:
        dict: A dictionary where keys are the same protein identifiers provided in `ca_xyz_dict`,
        and values are one-dimensional arrays containing the site-specific order parameters computed for each residue of the protein.
    """
    computed_data = {}
    for key, ca_xyz in ca_xyz_dict.items():
        starting_matrix = np.zeros((ca_xyz.shape[0], ca_xyz.shape[1]-1, ca_xyz.shape[1]-1))
        for conformation in range(ca_xyz.shape[0]):
            for i in range(ca_xyz.shape[1]-1):
                vector_i_i_plus1 = ca_xyz[conformation][i+1] - ca_xyz[conformation][i]
                vector_i_i_plus1 /= np.linalg.norm(vector_i_i_plus1)
                for j in range(ca_xyz.shape[1]-1):
                    vector_j_j_plus1 = ca_xyz[conformation][j+1] - ca_xyz[conformation][j]
                    vector_j_j_plus1 /= np.linalg.norm(vector_j_j_plus1)
                    cos_i_j = np.dot(vector_i_i_plus1, vector_j_j_plus1)
                    starting_matrix[conformation][i][j] = cos_i_j
        mean_cos_tetha = np.mean(starting_matrix, axis=0)
        square_diff = (starting_matrix - mean_cos_tetha) ** 2
        variance_cos_tetha = np.mean(square_diff, axis=0)
        K_ij = np.array([[1 - np.sqrt(2 * variance_cos_tetha[i][j]) for j in range(variance_cos_tetha.shape[1])] for i in range(variance_cos_tetha.shape[0])])
        o_i = np.mean(K_ij, axis=1)
        computed_data[key] = o_i
    return computed_data