{ "cells": [ { "cell_type": "markdown", "id": "3ca77ade", "metadata": { "id": "3ca77ade" }, "source": [ "## NN578_network.ipynb (Spring 2024)\n", "\n", "### Neural network code \"network.py\" from Mike Nielsen's NNDL book. Modified slightly for our course." ] }, { "cell_type": "code", "execution_count": 1, "id": "7d258c17", "metadata": { "id": "7d258c17" }, "outputs": [], "source": [ "import random\n", "import json\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "id": "fda89966", "metadata": { "id": "fda89966" }, "outputs": [], "source": [ "class Network(object):\n", " def __init__(self, sizes):\n", " \"\"\"The list ``sizes`` contains the number of neurons in the\n", " respective layers of the network, for example [2, 3, 1].\n", " The biases and weights are initialized randomly. Note that\n", " the first layer is an input layer, and by convention we\n", " won't set any biases for those neurons.\"\"\"\n", " self.num_layers = len(sizes)\n", " self.sizes = sizes\n", " self.biases = [np.random.randn(y, 1) for y in sizes[1:]]\n", " self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]\n", " ##\n", " self.init_acts_shape = []\n", "\n", " def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):\n", " \"\"\"Train the neural network using mini-batch stochastic\n", " gradient descent. The ``training_data`` is a list of tuples\n", " ``(x, y)``. If ``test_data`` is provided then the\n", " network will be evaluated against the test data after each\n", " epoch -- essentially the role as a validation data.\"\"\"\n", " n = len(training_data)\n", " if test_data:\n", " nvalid = len(test_data)\n", "\n", " for j in range(epochs):\n", " # random.shuffle(training_data) # supressed for now\n", " mini_batches = [\n", " training_data[k : k + mini_batch_size]\n", " for k in range(0, n, mini_batch_size)\n", " ]\n", "\n", " for mini_batch in mini_batches:\n", " self.update_mini_batch(mini_batch, eta)\n", "\n", " # Compute the training result for this epoch.\n", " acc_train = self.evaluate(training_data)/n\n", " if not test_data:\n", " print(\"Epoch {}: train acc {:.4f}\".format(j, acc_train))\n", " else:\n", " acc_valid = self.evaluate(test_data)/nvalid\n", " print(\"Epoch {}: train acc {:.4f}, valid acc {:.4f}\".format\n", " (j, acc_train, acc_valid))\n", " # Early exit if applies\n", " if acc_train == 1.0:\n", " break\n", "\n", " def update_mini_batch(self, mini_batch, eta):\n", " \"\"\"Update the network's weights and biases by applying\n", " gradient descent using backpropagation to a single mini batch.\n", " The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``\n", " is the learning rate.\"\"\"\n", " nabla_b = [np.zeros(b.shape) for b in self.biases]\n", " nabla_w = [np.zeros(w.shape) for w in self.weights]\n", "\n", " for x, y in mini_batch:\n", " delta_nabla_b, delta_nabla_w = self.backprop(x, y)\n", " nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]\n", " nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]\n", "\n", " self.weights = [\n", " w - (eta / len(mini_batch)) * nw for w, nw in zip(self.weights, nabla_w)\n", " ]\n", " self.biases = [\n", " b - (eta / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)\n", " ]\n", "\n", " def backprop(self, x, y):\n", " \"\"\"Return a tuple ``(nabla_b, nabla_w)`` representing the\n", " gradient for the cost function C_x. ``nabla_b`` and\n", " ``nabla_w`` are layer-by-layer lists of numpy arrays, similar\n", " to ``self.biases`` and ``self.weights``.\"\"\"\n", " nabla_b = [np.zeros(b.shape) for b in self.biases]\n", " nabla_w = [np.zeros(w.shape) for w in self.weights]\n", "\n", " # forward pass\n", " activation = x\n", " activations = [x] # list to store all the activations, layer by layer\n", "\n", " ## nt: DO NOT REMOVE THIS LINE!!\n", " self.init_acts_shape = [act.shape for act in activations]\n", "\n", " zs = [] # list to store all the z vectors, layer by layer\n", " for b, w in zip(self.biases, self.weights):\n", " z = np.dot(w, activation) + b\n", " zs.append(z)\n", " activation = sigmoid(z)\n", " activations.append(activation)\n", "\n", " # backward pass\n", " delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])\n", " nabla_b[-1] = delta\n", " nabla_w[-1] = np.dot(delta, activations[-2].transpose())\n", "\n", " # Note that the variable l in the loop below is used a little\n", " # differently to the notation in Chapter 2 of the book. Here,\n", " # l = 1 means the last layer of neurons, l = 2 is the\n", " # second-last layer, and so on.\n", " for l in range(2, self.num_layers):\n", " z = zs[-l]\n", " sp = sigmoid_prime(z)\n", " delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp\n", " nabla_b[-l] = delta\n", " nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())\n", " return (nabla_b, nabla_w)\n", "\n", " def evaluate(self, test_data):\n", " \"\"\"Return the number of test inputs for which the neural\n", " network outputs the correct result.\"\"\"\n", " test_results = [\n", " (np.argmax(self.feedforward(x)), np.argmax(y)) for (x, y) in test_data\n", " ]\n", " return sum(int(x == y) for (x, y) in test_results)\n", "\n", " def feedforward(self, a):\n", " \"\"\"Return the output of the network if ``a`` is input. Note this\n", " function is called during evaluation; not during training/backprop.\"\"\"\n", " for b, w in zip(self.biases, self.weights):\n", " a = sigmoid(np.dot(w, a) + b)\n", " return a\n", "\n", " def cost_derivative(self, output_activations, y):\n", " \"\"\"Return the vector of partial derivatives \\partial C_x /\n", " \\partial a for the output activations.\"\"\"\n", " return output_activations - y\n", "\n", " @classmethod\n", " def load_network(cls, filename):\n", " \"\"\"Load a neural network from a json file ``filename``. Returns an\n", " instance of Network. \"\"\"\n", " f = open(filename, \"r\")\n", " data = json.load(f)\n", " f.close()\n", " net = cls(data[\"sizes\"])\n", " net.weights = [np.array(w) for w in data[\"weights\"]]\n", " net.biases = [np.array(b) for b in data[\"biases\"]]\n", " return net\n", "\n", " def save_network(self, filename):\n", " \"\"\"Save the neural network to a json file ``filename``.\"\"\"\n", " data = {\n", " \"sizes\": self.sizes,\n", " \"weights\": [w.tolist() for w in self.weights],\n", " \"biases\": [b.tolist() for b in self.biases] # ,\n", " # \"cost\": str(self.cost.__name__)\n", " }\n", " f = open(filename, \"w\")\n", " json.dump(data, f)\n", " f.close()" ] }, { "cell_type": "code", "execution_count": 3, "id": "f747f644", "metadata": { "id": "f747f644" }, "outputs": [], "source": [ "def sigmoid(z):\n", " \"\"\"The sigmoid function.\"\"\"\n", " return 1.0 / (1.0 + np.exp(-z))\n", "\n", "def sigmoid_prime(z):\n", " \"\"\"Derivative of the sigmoid function.\"\"\"\n", " return sigmoid(z) * (1 - sigmoid(z))\n", "\n", "def vectorize_target(n, target):\n", " \"\"\"Return an array of shape (n,1) with a 1.0 in the target position\n", " and zeroes elsewhere. The parameter target is assumed to be\n", " an array of size 1, and the 0th item is the target position (1). \"\"\"\n", " e = np.zeros((n, 1))\n", " e[int(target[0])] = 1.0\n", " return e" ] }, { "cell_type": "code", "execution_count": 4, "id": "22046bf1", "metadata": { "id": "22046bf1" }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "def my_load_csv(fname, input_size, target_size, seednum=517):\n", " ''' Function to load the data from a csv file. Note the target (y)\n", " is assumed to be already in the one-hot-vector notation.\n", " Also each instance in the returned data is made into column vectors.'''\n", " # Read in the data into pandas dataframe\n", " df = pd.read_csv(fname, header=None)\n", "\n", " # Set the random seed if specified to shuffle, for reproducibility.\n", " # Otherwise no shuffling.\n", " if seednum:\n", " df = df.sample(frac=1, random_state=seednum)\n", "\n", " # Separate the X and Y parts\n", " X = df[df.columns[:input_size]].values.tolist()\n", " Y = df[df.columns[-target_size:]].values.tolist()\n", "\n", " # Combine the parts for each instance and put all in a list.\n", " # Note: x and y are both converted into a column vector/array.\n", " dataset = [(np.reshape(x, (input_size, 1)), np.reshape(y, (target_size, 1)))\n", " for x, y in zip(X, Y)]\n", " return dataset" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }