import numpy as np
import mdtraj
[docs]
def get_distance_matrix(xyz):
"""
Gets an ensemble of xyz conformations with shape (N, L, 3) and
returns the corresponding distance matrices with shape (N, L, L).
"""
return np.sqrt(np.sum(np.square(xyz[:,None,:,:]-xyz[:,:,None,:]), axis=3))
[docs]
def calc_chain_dihedrals(xyz, norm=False):
# not sure where this function is used
r_sel = xyz
b0 = -(r_sel[:,1:-2,:] - r_sel[:,0:-3,:])
b1 = r_sel[:,2:-1,:] - r_sel[:,1:-2,:]
b2 = r_sel[:,3:,:] - r_sel[:,2:-1,:]
b0xb1 = np.cross(b0, b1)
b1xb2 = np.cross(b2, b1)
b0xb1_x_b1xb2 = np.cross(b0xb1, b1xb2)
y = np.sum(b0xb1_x_b1xb2*b1, axis=2)*(1.0/np.linalg.norm(b1, dim=2))
x = np.sum(b0xb1*b1xb2, axis=2)
dh_vals = np.atan2(y, x)
if not norm:
return dh_vals
else:
return dh_vals/np.pi
[docs]
def create_consecutive_indices_matrix(ca_indices):
"""This function gets the CA indices of (L,) shape and
create all possible 4 consecutive indices with the shape (L-3, 4) """
n = len(ca_indices)
if n < 4:
raise ValueError("Input array must contain at least 4 indices.")
# Calculate the number of rows in the resulting matrix
num_rows = n - 3
# Create an empty matrix to store the consecutive indices
consecutive_indices_matrix = np.zeros((num_rows, 4), dtype=int)
# Fill the matrix with consecutive indices
for i in range(num_rows):
consecutive_indices_matrix[i] = ca_indices[i:i+4]
return consecutive_indices_matrix
[docs]
def dict_phi_psi_normal_cases(dict_phi_psi):
dict_phi_psi_normal_case={}
for key in dict_phi_psi.keys():
array_phi=dict_phi_psi[key][0][:,:-1]
array_psi=dict_phi_psi[key][1][:,1:]
dict_phi_psi_normal_case[key]=[array_phi,array_psi]
return dict_phi_psi_normal_case
[docs]
def split_dictionary_phipsiangles(features_dict):
dict_phi_psi={}
for ens_code, features in features_dict.items():
num_columns=len(features[0])
split_index=num_columns//2
phi_list=features[:,:split_index]
psi_list=features[:,split_index:]
dict_phi_psi[ens_code]=[phi_list,psi_list]
return dict_phi_psi
[docs]
def ss_measure_disorder(features_dict:dict):
"""This function accepts the dictionary of phi-psi arrays
which is saved in featurized_data attribute and as an output provide
flexibility parameter for each residue in the ensemble
Note: this function only works on phi/psi feature """
f = {}
R_square_dict = {}
for key in dict_phi_psi_normal_cases(split_dictionary_phipsiangles(features_dict)).keys():
Rsquare_phi = []
Rsquare_psi = []
phi_array = dict_phi_psi_normal_cases(split_dictionary_phipsiangles(features_dict))[key][0]
psi_array = dict_phi_psi_normal_cases(split_dictionary_phipsiangles(features_dict))[key][1]
if isinstance(phi_array, np.ndarray) and phi_array.ndim == 2:
for i in range(phi_array.shape[1]):
Rsquare_phi.append(round(np.square(np.sum(np.fromiter(((1 / phi_array.shape[0]) * np.cos(phi_array[c][i]) for c in range(phi_array.shape[0])), dtype=float))) + \
np.square(np.sum(np.fromiter(((1 / phi_array.shape[0]) * np.sin(phi_array[c][i]) for c in range(phi_array.shape[0])), dtype=float))),5))
if isinstance(psi_array, np.ndarray) and psi_array.ndim == 2:
for j in range(psi_array.shape[1]):
Rsquare_psi.append(round(np.square(np.sum(np.fromiter(((1 / psi_array.shape[0]) * np.cos(psi_array[c][j]) for c in range(psi_array.shape[0])), dtype=float))) + \
np.square(np.sum(np.fromiter(((1 / psi_array.shape[0]) * np.sin(psi_array[c][j]) for c in range(psi_array.shape[0])), dtype=float))),5))
R_square_dict[key] = [Rsquare_phi, Rsquare_psi]
for k in R_square_dict.keys():
f_i=[]
for z in range(len(R_square_dict[k][0])):
f_i.append(round(1 - (1/2 * np.sqrt(R_square_dict[k][0][z])) - (1/2 * np.sqrt(R_square_dict[k][1][z])),5))
f[k]=f_i
return f
[docs]
def site_specific_order_parameter(ca_xyz_dict: dict) -> dict:
"""
Computes site-specific order parameters for a set of protein conformations.
Parameters:
ca_xyz_dict (dict): A dictionary where keys represent unique identifiers for proteins,
and values are 3D arrays containing the coordinates of alpha-carbon (CA) atoms for different conformations of the protein.
Returns:
dict: A dictionary where keys are the same protein identifiers provided in `ca_xyz_dict`,
and values are one-dimensional arrays containing the site-specific order parameters computed for each residue of the protein.
"""
computed_data = {}
for key, ca_xyz in ca_xyz_dict.items():
starting_matrix = np.zeros((ca_xyz.shape[0], ca_xyz.shape[1]-1, ca_xyz.shape[1]-1))
for conformation in range(ca_xyz.shape[0]):
for i in range(ca_xyz.shape[1]-1):
vector_i_i_plus1 = ca_xyz[conformation][i+1] - ca_xyz[conformation][i]
vector_i_i_plus1 /= np.linalg.norm(vector_i_i_plus1)
for j in range(ca_xyz.shape[1]-1):
vector_j_j_plus1 = ca_xyz[conformation][j+1] - ca_xyz[conformation][j]
vector_j_j_plus1 /= np.linalg.norm(vector_j_j_plus1)
cos_i_j = np.dot(vector_i_i_plus1, vector_j_j_plus1)
starting_matrix[conformation][i][j] = cos_i_j
mean_cos_tetha = np.mean(starting_matrix, axis=0)
square_diff = (starting_matrix - mean_cos_tetha) ** 2
variance_cos_tetha = np.mean(square_diff, axis=0)
K_ij = np.array([[1 - np.sqrt(2 * variance_cos_tetha[i][j]) for j in range(variance_cos_tetha.shape[1])] for i in range(variance_cos_tetha.shape[0])])
o_i = np.mean(K_ij, axis=1)
computed_data[key] = o_i
return computed_data