Source code for evcouplings.compare.ecs

"""
Compare evolutionary couplings to distances in 3D structures

Authors:
  Thomas A. Hopf
"""

import numpy as np


[docs]def add_distances(ec_table, dist_map, target_column="dist"): """ Add pair distances to EC score table Parameters ---------- ec_table : pandas.DataFrame List of evolutionary couplings, with pair positions in columns i and j dist_map : DistanceMap Distance map that will be used to annotate distances in ec_table target_column : str Name of column in which distances will be stored Returns ------- pandas.DataFrame Couplings table with added distances in target_column. Pairs where no distance information is available will be np.nan """ ec_table = ec_table.copy() ec_table.loc[:, target_column] = [ dist_map.dist(i, j, raise_na=False) for i, j in zip(ec_table.i, ec_table.j) ] return ec_table
[docs]def add_precision(ec_table, dist_cutoff=5, score="cn", min_sequence_dist=6, target_column="precision", dist_column="dist"): """ Compute precision of evolutionary couplings as predictor of 3D structure contacts Parameters ---------- ec_table : pandas.DataFrame List of evolutionary couplings dist_cutoff : float, optional (default: 5) Upper distance cutoff (in Angstrom) for a pair to be considered a true positive contact score : str, optional (default: "cn") Column which contains coupling score. Table will be sorted in descending order by this score. min_sequence_dist : int, optional (default: 6) Minimal distance in primary sequence for an EC to be included in precision calculation target_column : str, optional (default: "precision") Name of column in which precision will be stored dist_column : str, optional (default: "dist") Name of column which contains pair distances Returns ------- pandas.DataFrame EC table with added precision values as a function of EC rank (returned table will be sorted by score column) """ # make sure list is sorted by score ec_table = ec_table.sort_values(by=score, ascending=False) if min_sequence_dist is not None: ec_table = ec_table.query("abs(i - j) >= @min_sequence_dist") ec_table = ec_table.copy() # number of true positive contacts true_pos_count = (ec_table.loc[:, dist_column] <= dist_cutoff).cumsum() # total number of contacts with specified distance pos_count = ec_table.loc[:, dist_column].notnull().cumsum() ec_table.loc[:, target_column] = true_pos_count / pos_count return ec_table
[docs]def coupling_scores_compared(ec_table, dist_map, dist_map_multimer=None, dist_cutoff=5, output_file=None, score="cn", min_sequence_dist=6): """ Utility function to create "CouplingScores.csv"-style table Parameters ---------- ec_table : pandas.DataFrame List of evolutionary couplings dist_map : DistanceMap Distance map that will be used to annotate distances in ec_table dist_map_multimer : DistanceMap, optional (default: None) Additional multimer distance map. If given, the distance for any EC pair will be the minimum out of the monomer and multimer distances. dist_cutoff : float, optional (default: 5) Upper distance cutoff (in Angstrom) for a pair to be considered a true positive contact output_file : str, optional (default: None) Store final table to this file score : str, optional (default: "cn") Column which contains coupling score. Table will be sorted in descending order by this score. min_sequence_dist : int, optional (default: 6) Minimal distance in primary sequence for an EC to be included in precision calculation Returns ------- pandas.DataFrame EC table with added distances, and precision if dist_cutoff is given. """ if dist_map_multimer is None: x = add_distances(ec_table, dist_map) else: x = add_distances(ec_table, dist_map, "dist_intra") x = add_distances(x, dist_map_multimer, "dist_multimer") x.loc[:, "dist"] = np.fmin( x.dist_intra, x.dist_multimer ) if min_sequence_dist is not None: x = x.query("abs(i - j) >= @min_sequence_dist") # if distance cutoff is given, add precision if dist_cutoff is not None: x = add_precision( x, dist_cutoff, score=score, min_sequence_dist=min_sequence_dist ) # also save to file if path is given if output_file is not None: x.to_csv(output_file, index=False) return x