Source code for nerva_numpy.multilayer_perceptron

# Copyright 2023 Wieger Wesselink.
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE or http://www.boost.org/LICENSE_1_0.txt)

"""A simple multilayer perceptron (MLP) class."""

from typing import List

import numpy as np
from nerva_numpy.layers import BatchNormalizationLayer, LinearLayer, parse_linear_layer
from nerva_numpy.matrix_operations import Matrix
from nerva_numpy.utilities import load_dict_from_npz, pp, save_dict_to_npz



[docs]
class MultilayerPerceptron(object):
    """
    Multilayer perceptron
    """
    def __init__(self, layers=None):
        if not layers:
            layers = []
        self.layers = layers


[docs]
    def feedforward(self, X: Matrix) -> Matrix:
        for layer in self.layers:
            X = layer.feedforward(X)
        return X



[docs]
    def backpropagate(self, Y: Matrix, DY: Matrix) -> None:
        for layer in reversed(self.layers):
            layer.backpropagate(Y, DY)
            Y, DY = layer.X, layer.DX



[docs]
    def optimize(self, eta: float):
        for layer in self.layers:
            layer.optimize(eta)



[docs]
    def info(self):
        index = 1
        for layer in self.layers:
            if isinstance(layer, LinearLayer):
                pp(f'W{index}', layer.W)
                pp(f'b{index}', layer.b)
                index += 1



[docs]
    def load_weights_and_bias(self, filename: str):
        """Loads the weights and biases from a file in .npz format

        The weight matrices are stored using the keys W1, W2, ... and the bias vectors using the keys b1, b2, ...
        """
        print(f'Loading weights and bias from {filename}')
        data = load_dict_from_npz(filename)
        index = 1
        for layer in self.layers:
            if isinstance(layer, LinearLayer):
                layer.W[:] = data[f'W{index}']
                layer.b[:] = data[f'b{index}']
                index += 1



[docs]
    def save_weights_and_bias(self, filename: str):
        """Saves the weights and biases to a file in compressed .npz format.

        The weight matrices are stored using the keys W1, W2, ... and the bias vectors using the keys b1, b2, ...
        """
        print(f"Saving weights and bias to {filename}")
        data = {}
        index = 1
        for layer in self.layers:
            if isinstance(layer, LinearLayer):
                data[f"W{index}"] = layer.W
                data[f"b{index}"] = layer.b
                index += 1
        save_dict_to_npz(filename, data)





[docs]
def parse_multilayer_perceptron(layer_specifications: List[str],
                                linear_layer_sizes: List[int],
                                optimizers: List[str],
                                linear_layer_weight_initializers: List[str]
                               ) -> MultilayerPerceptron:

    """Construct an MLP from textual layer specs and size/optimizer configs.

    layer_specifications: e.g. ["ReLU", "BatchNormalization", "LogSoftmax"]
    linear_layer_sizes: e.g. [784, 128, 10] for two linear layers
    optimizers: one per layer (including BatchNormalization)
    linear_layer_weight_initializers: one per linear layer
    """
    assert len(linear_layer_weight_initializers) == len(linear_layer_sizes) - 1
    layers = []

    linear_layer_index = 0
    optimizer_index = 0
    D = linear_layer_sizes[linear_layer_index]  # the input size of the current layer

    for specification in layer_specifications:
        if specification == 'BatchNormalization':
            layer = BatchNormalizationLayer(D)
            optimizer = optimizers[optimizer_index]
            layer.set_optimizer(optimizer)
            optimizer_index += 1
        else:
            K = linear_layer_sizes[linear_layer_index + 1]  # the output size of the layer
            optimizer = optimizers[optimizer_index]
            weight_initializer = linear_layer_weight_initializers[linear_layer_index]
            layer = parse_linear_layer(specification, D, K, optimizer, weight_initializer)
            optimizer_index += 1
            linear_layer_index += 1
            D = K
        layers.append(layer)
    return MultilayerPerceptron(layers)