Source code for nerva_torch.softmax_functions

# Copyright 2023 Wieger Wesselink.
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE or http://www.boost.org/LICENSE_1_0.txt)

"""Softmax and log-softmax functions together with stable variants.

This module provides both function-only forms and simple callable classes.
"""


from nerva_torch.matrix_operations import Diag, column_repeat, exp, hadamard, identity, is_row_vector, log, reciprocal, \
    row_repeat, rows_max, rows_sum, Matrix



[docs]
def softmax(X: Matrix) -> Matrix:
    """Row-wise softmax with explicit normalization (numerically unsafe)."""
    N, D = X.shape
    E = exp(X)
    return hadamard(E, column_repeat(reciprocal(rows_sum(E)), D))




[docs]
def softmax_jacobian(x: Matrix) -> Matrix:
    """Jacobian matrix of softmax for a single row vector."""
    assert is_row_vector(x)
    y = softmax(x)
    return Diag(y) - y.T * y




[docs]
def stable_softmax(X: Matrix) -> Matrix:
    """Row-wise softmax using max-subtraction for numerical stability."""
    N, D = X.shape
    Y = X - column_repeat(rows_max(X), D)
    E = exp(Y)
    return hadamard(E, column_repeat(reciprocal(rows_sum(E)), D))




[docs]
def stable_softmax_jacobian(x: Matrix) -> Matrix:
    """Jacobian matrix of stable softmax for a single row vector."""
    assert is_row_vector(x)
    y = stable_softmax(x)
    return Diag(y) - y.T * y




[docs]
def log_softmax(X: Matrix) -> Matrix:
    """Row-wise log-softmax (numerically unsafe version)."""
    N, D = X.shape
    return X - column_repeat(log(rows_sum(exp(X))), D)




[docs]
def log_softmax_jacobian(x: Matrix) -> Matrix:
    """Jacobian matrix of log_softmax for a single row vector."""
    assert is_row_vector(x)
    N, D = x.shape
    return identity(D) - row_repeat(softmax(x), D)




[docs]
def stable_log_softmax(X: Matrix) -> Matrix:
    """Row-wise log-softmax with max-subtraction for stability."""
    N, D = X.shape
    Y = X - column_repeat(rows_max(X), D)
    return Y - column_repeat(log(rows_sum(exp(Y))), D)




[docs]
def stable_log_softmax_jacobian(x: Matrix) -> Matrix:
    """Jacobian matrix of stable log_softmax (same as log_softmax)."""
    return log_softmax_jacobian(x)




[docs]
class SoftmaxFunction(object):
    """Callable implementing row-wise softmax and its Jacobian."""

    def __call__(self, X: Matrix) -> Matrix:
        return softmax(X)


[docs]
    def jacobian(self, X: Matrix) -> Matrix:
        return softmax_jacobian(X)





[docs]
class StableSoftmaxFunction(object):
    """Callable implementing numerically stable row-wise softmax and its Jacobian."""

    def __call__(self, X: Matrix) -> Matrix:
        return stable_softmax(X)


[docs]
    def jacobian(self, X: Matrix) -> Matrix:
        return stable_softmax_jacobian(X)





[docs]
class LogSoftmaxFunction(object):
    """Callable implementing row-wise log-softmax and its Jacobian."""

    def __call__(self, X: Matrix) -> Matrix:
        return log_softmax(X)


[docs]
    def jacobian(self, X: Matrix) -> Matrix:
        return log_softmax_jacobian(X)





[docs]
class StableLogSoftmaxFunction(object):
    """Callable implementing numerically stable row-wise log-softmax and its Jacobian."""

    def __call__(self, X: Matrix) -> Matrix:
        return stable_log_softmax(X)


[docs]
    def jacobian(self, X: Matrix) -> Matrix:
        return stable_log_softmax_jacobian(X)