Source code for evcouplings.utils.calculations

"""
General calculation functions.

Authors:
  Thomas A. Hopf
"""

import numpy as np


[docs]def entropy(X, normalize=False):
    """
    Calculate entropy of distribution

    Parameters
    ----------
    X : np.array
        Vector for which entropy will be calculated
    normalize:
        Rescale entropy to range from 0 ("variable", "flat")
        to 1 ("conserved")

    Returns
    -------
    float
        Entropy of X
    """
    X_ = X[X > 0]
    H = -np.sum(X_ * np.log2(X_))

    if normalize:
        return 1 - (H / np.log2(len(X)))
    else:
        return H


[docs]def entropy_vector(model, normalize=True):
    """
    Compute vector of positional entropies for
    single-site frequencies in a CouplingsModel

    Parameters
    ----------
    model : CouplingsModel
        Model for which entropy of sequence
        alignment will be computed (based on
        single-site frequencies f_i(A_i)
        contained in model)
    normalize : bool, default: True
        Normalize entropy to range 0 (variable)
        to 1 (conserved) instead of raw values

    Returns
    -------
    np.array
        Vector of length model.L containing
        entropy for each position
    """
    cons = np.apply_along_axis(
        lambda x: entropy(x, normalize=normalize),
        axis=1, arr=model.fi()
    )

    return cons


[docs]def entropy_map(model, normalize=True):
    """
    Compute dictionary of positional entropies for
    single-site frequencies in a CouplingsModel

    Parameters
    ----------
    model : CouplingsModel
        Model for which entropy of sequence
        alignment will be computed (based on
        single-site frequencies f_i(A_i)
        contained in model)
    normalize : bool, default: True
        Normalize entropy to range 0 (variable)
        to 1 (conserved) instead of raw values

    Returns
    -------
    dict
        Map from positions in sequence (int) to
        entropy of column (float) in alignment
    """
    cons = entropy_vector(model, normalize)

    return dict(
        zip(model.index_list, cons)
    )


[docs]def dihedral_angle(p0, p1, p2, p3):
    """
    Compute dihedral angle given four points
    
    Adapted from the following source:
    http://stackoverflow.com/questions/20305272/dihedral-torsion-angle-from-four-points-in-cartesian-coordinates-in-python
    (answer by user Praxeolitic)
    
    Parameters
    ----------
    p0 : np.array
        Coordinates of first point
    p1 : np.array
        Coordinates of second point
    p2 : np.array
        Coordinates of third point
    p3 : np.array
        Coordinates of fourth point

    Returns
    -------
    numpy.float
        Dihedral angle (in radians)
    """
    b0 = -1.0*(p1 - p0)
    b1 = p2 - p1
    b2 = p3 - p2

    # normalize b1 so that it does not influence magnitude of vector
    # rejections that come next
    b1 /= np.linalg.norm(b1)

    # vector rejections
    # v = projection of b0 onto plane perpendicular to b1
    #   = b0 minus component that aligns with b1
    # w = projection of b2 onto plane perpendicular to b1
    #   = b2 minus component that aligns with b1
    v = b0 - np.dot(b0, b1)*b1
    w = b2 - np.dot(b2, b1)*b1

    # angle between v and w in a plane is the torsion angle
    # v and w may not be normalized but that's fine since tan is y/x
    x = np.dot(v, w)
    y = np.dot(np.cross(b1, v), w)

    return np.arctan2(y, x)


[docs]def median_absolute_deviation(x, scale=1.4826):
    """
    Compute median absolute deviation of a set of numbers
    (median of deviations from median)

    Parameters
    ----------
    x : list-like of float
        Numbers for which median absolute deviation
        will be computed
    scale : float, optional (default: 1.4826)
        Rescale median absolute deviation by this factor;
        default value is such that median absolute
        deviation will match regular standard deviation
        of Gaussian distribution
    """
    med = np.median(x)
    mad = np.median(np.abs(x - med))

    return scale * mad