Source code for nerva_jax.multilayer_perceptron

# Copyright 2023 Wieger Wesselink.
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE or http://www.boost.org/LICENSE_1_0.txt)

"""A simple multilayer perceptron (MLP) class."""

from typing import List

import jax.numpy as jnp
from nerva_jax.layers import BatchNormalizationLayer, LinearLayer, parse_linear_layer
from nerva_jax.matrix_operations import Matrix
from nerva_jax.utilities import load_dict_from_npz, pp, save_dict_to_npz



[docs]
class MultilayerPerceptron(object):
    """
    Multilayer perceptron
    """
    def __init__(self, layers=None):
        if not layers:
            layers = []
        self.layers = layers


[docs]
    def feedforward(self, X: Matrix) -> Matrix:
        for layer in self.layers:
            X = layer.feedforward(X)
        return X



[docs]
    def backpropagate(self, Y: Matrix, DY: Matrix) -> None:
        for layer in reversed(self.layers):
            layer.backpropagate(Y, DY)
            Y, DY = layer.X, layer.DX



[docs]
    def optimize(self, eta: float):
        for layer in self.layers:
            layer.optimize(eta)



[docs]
    def info(self):
        index = 1
        for layer in self.layers:
            if isinstance(layer, LinearLayer):
                pp(f'W{index}', layer.W)
                pp(f'b{index}', layer.b)
                index += 1



[docs]
    def load_weights_and_bias(self, filename: str):
        """
        Loads the weights and biases from a file in .npz format

        The weight matrices are stored using the keys W1, W2, ... and the bias vectors using the keys b1, b2, ...
        :param filename: the name of the file
        """
        print(f'Loading weights and bias from {filename}')
        data = load_dict_from_npz(filename)
        index = 1
        for layer in self.layers:
            if isinstance(layer, LinearLayer):
                layer.W = jnp.array(data[f'W{index}'])
                layer.b = jnp.array(data[f'b{index}'])
                index += 1



[docs]
    def save_weights_and_bias(self, filename: str):
        """Saves the weights and biases to a file in compressed .npz format.

        The weight matrices are stored using the keys W1, W2, ... and the bias vectors using the keys b1, b2, ...
        """
        print(f"Saving weights and bias to {filename}")
        data = {}
        index = 1
        for layer in self.layers:
            if isinstance(layer, LinearLayer):
                data[f"W{index}"] = layer.W
                data[f"b{index}"] = layer.b
                index += 1
        save_dict_to_npz(filename, data)





[docs]
def parse_multilayer_perceptron(layer_specifications: List[str],
                                linear_layer_sizes: List[int],
                                optimizers: List[str],
                                linear_layer_weight_initializers: List[str]
                               ) -> MultilayerPerceptron:

    """Construct an MLP from textual layer specs and size/optimizer configs.

    layer_specifications: e.g. ["ReLU", "BatchNormalization", "LogSoftmax"]
    linear_layer_sizes: e.g. [784, 128, 10] for two linear layers
    optimizers: one per layer (including BatchNormalization)
    linear_layer_weight_initializers: one per linear layer
    """
    assert len(linear_layer_weight_initializers) == len(linear_layer_sizes) - 1
    layers = []

    linear_layer_index = 0
    optimizer_index = 0
    D = linear_layer_sizes[linear_layer_index]  # the input size of the current layer

    for specification in layer_specifications:
        if specification == 'BatchNormalization':
            layer = BatchNormalizationLayer(D)
            optimizer = optimizers[optimizer_index]
            layer.set_optimizer(optimizer)
            optimizer_index += 1
        else:
            K = linear_layer_sizes[linear_layer_index + 1]  # the output size of the layer
            optimizer = optimizers[optimizer_index]
            weight_initializer = linear_layer_weight_initializers[linear_layer_index]
            layer = parse_linear_layer(specification, D, K, optimizer, weight_initializer)
            optimizer_index += 1
            linear_layer_index += 1
            D = K
        layers.append(layer)
    return MultilayerPerceptron(layers)