Source code for dit.shannon.shannon

"""
Some basic Shannon information quantities.
"""

import numpy as np

from ..math import LogOperations

__all__ = (
    "conditional_entropy",
    "entropy",
    "entropy_pmf",
    "mutual_information",
)


def entropy_pmf(pmf):
    """
    Returns the entropy of the probability mass function.

    Assumption: Linearly distributed probabilities.

    Parameters
    ----------
    pmf : NumPy array, shape (k,) or (n,k)
        Returns the entropy over the last index.

    """
    pmf = np.asarray(pmf)
    return np.nansum(-pmf * np.log2(pmf), axis=-1)



[docs]
def entropy(dist, rvs=None):
    """
    Returns the entropy H[X] over the random variables in `rvs`.

    If the distribution represents linear probabilities, then the entropy
    is calculated with units of 'bits' (base-2). Otherwise, the entropy is
    calculated in whatever base that matches the distribution's pmf.

    Parameters
    ----------
    dist : Distribution or float
        The distribution from which the entropy is calculated. If a float,
        then we calculate the binary entropy.
    rvs : list, None
        The indexes of the random variable used to calculate the entropy.
        If None, then the entropy is calculated over all random variables.
        This should remain `None` for scalar distributions.

    Returns
    -------
    H : float
        The entropy of the distribution.

    """
    try:
        # Handle binary entropy.
        float(dist)
    except TypeError:
        pass
    else:
        # Assume linear probability for binary entropy.
        import dit

        dist = dit.Distribution([dist, 1 - dist])

    d = dist.marginal(rvs) if rvs is not None else dist

    pmf = d.pmf
    if d.is_symbolic():
        return _symbolic_entropy(pmf)
    if d.is_log():
        base = d.get_base(numerical=True)
        terms = -(base**pmf) * pmf
    else:
        # Calculate entropy in bits.
        log = LogOperations(2).log
        terms = -pmf * log(pmf)

    H = np.nansum(terms)
    return H



def _symbolic_entropy(pmf):
    """Shannon entropy (base 2) of a symbolic pmf, as a sympy expression.

    Uses the convention ``0 * log(0) = 0``: any probability that is literally
    zero contributes nothing.
    """
    import sympy

    terms = []
    for p in pmf:
        p = sympy.sympify(p)
        if p == 0:
            continue
        terms.append(-p * sympy.log(p, 2))
    return sympy.Add(*terms)



[docs]
def conditional_entropy(dist, rvs_X, rvs_Y):
    """
    Returns the conditional entropy of H[X|Y].

    If the distribution represents linear probabilities, then the entropy
    is calculated with units of 'bits' (base-2).

    Parameters
    ----------
    dist : Distribution
        The distribution from which the conditional entropy is calculated.
    rvs_X : list, None
        The indexes of the random variables defining X.
    rvs_Y : list, None
        The indexes of the random variables defining Y.

    Returns
    -------
    H_XgY : float
        The conditional entropy H[X|Y].

    """
    if set(rvs_X).issubset(rvs_Y):
        # This is not necessary, but it makes the answer *exactly* zero,
        # instead of 1e-12 or something smaller.
        return 0.0

    MI_XY = mutual_information(dist, rvs_X, rvs_Y)
    H_X = entropy(dist, rvs_X)
    H_XgY = H_X - MI_XY
    return H_XgY




[docs]
def mutual_information(dist, rvs_X, rvs_Y):
    """
    Returns the mutual information I[X:Y].

    If the distribution represents linear probabilities, then the entropy
    is calculated with units of 'bits' (base-2).

    Parameters
    ----------
    dist : Distribution
        The distribution from which the mutual information is calculated.
    rvs_X : list, None
        The indexes of the random variables defining X.
    rvs_Y : list, None
        The indexes of the random variables defining Y.

    Returns
    -------
    I : float
        The mutual information I[X:Y].

    """
    H_X = entropy(dist, rvs_X)
    H_Y = entropy(dist, rvs_Y)
    # Make sure to union the indexes. This handles the case when X and Y
    # do not partition the set of all indexes.
    H_XY = entropy(dist, set(rvs_X) | set(rvs_Y))
    I = H_X + H_Y - H_XY
    return I