{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "3ca77ade",
   "metadata": {
    "id": "3ca77ade"
   },
   "source": [
    "## NN578_network.ipynb (Spring 2024)\n",
    "\n",
    "### Neural network code \"network.py\" from Mike Nielsen's NNDL book.  Modified slightly for our course."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7d258c17",
   "metadata": {
    "id": "7d258c17"
   },
   "outputs": [],
   "source": [
    "import random\n",
    "import json\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "fda89966",
   "metadata": {
    "id": "fda89966"
   },
   "outputs": [],
   "source": [
    "class Network(object):\n",
    "    def __init__(self, sizes):\n",
    "        \"\"\"The list ``sizes`` contains the number of neurons in the\n",
    "        respective layers of the network, for example [2, 3, 1].\n",
    "        The biases and weights are initialized randomly.  Note that\n",
    "        the first layer is an input layer, and by convention we\n",
    "        won't set any biases for those neurons.\"\"\"\n",
    "        self.num_layers = len(sizes)\n",
    "        self.sizes = sizes\n",
    "        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]\n",
    "        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]\n",
    "        ##\n",
    "        self.init_acts_shape = []\n",
    "\n",
    "    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):\n",
    "        \"\"\"Train the neural network using mini-batch stochastic\n",
    "        gradient descent.  The ``training_data`` is a list of tuples\n",
    "        ``(x, y)``.  If ``test_data`` is provided then the\n",
    "        network will be evaluated against the test data after each\n",
    "        epoch -- essentially the role as a validation data.\"\"\"\n",
    "        n = len(training_data)\n",
    "        if test_data:\n",
    "            nvalid = len(test_data)\n",
    "\n",
    "        for j in range(epochs):\n",
    "            # random.shuffle(training_data) # supressed for now\n",
    "            mini_batches = [\n",
    "                training_data[k : k + mini_batch_size]\n",
    "                for k in range(0, n, mini_batch_size)\n",
    "            ]\n",
    "\n",
    "            for mini_batch in mini_batches:\n",
    "                self.update_mini_batch(mini_batch, eta)\n",
    "\n",
    "            # Compute the training result for this epoch.\n",
    "            acc_train = self.evaluate(training_data)/n\n",
    "            if not test_data:\n",
    "                print(\"Epoch {}: train acc {:.4f}\".format(j, acc_train))\n",
    "            else:\n",
    "                acc_valid = self.evaluate(test_data)/nvalid\n",
    "                print(\"Epoch {}: train acc {:.4f}, valid acc {:.4f}\".format\n",
    "                      (j, acc_train, acc_valid))\n",
    "            # Early exit if applies\n",
    "            if acc_train == 1.0:\n",
    "                break\n",
    "\n",
    "    def update_mini_batch(self, mini_batch, eta):\n",
    "        \"\"\"Update the network's weights and biases by applying\n",
    "        gradient descent using backpropagation to a single mini batch.\n",
    "        The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``\n",
    "        is the learning rate.\"\"\"\n",
    "        nabla_b = [np.zeros(b.shape) for b in self.biases]\n",
    "        nabla_w = [np.zeros(w.shape) for w in self.weights]\n",
    "\n",
    "        for x, y in mini_batch:\n",
    "            delta_nabla_b, delta_nabla_w = self.backprop(x, y)\n",
    "            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]\n",
    "            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]\n",
    "\n",
    "        self.weights = [\n",
    "            w - (eta / len(mini_batch)) * nw for w, nw in zip(self.weights, nabla_w)\n",
    "        ]\n",
    "        self.biases = [\n",
    "            b - (eta / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)\n",
    "        ]\n",
    "\n",
    "    def backprop(self, x, y):\n",
    "        \"\"\"Return a tuple ``(nabla_b, nabla_w)`` representing the\n",
    "        gradient for the cost function C_x.  ``nabla_b`` and\n",
    "        ``nabla_w`` are layer-by-layer lists of numpy arrays, similar\n",
    "        to ``self.biases`` and ``self.weights``.\"\"\"\n",
    "        nabla_b = [np.zeros(b.shape) for b in self.biases]\n",
    "        nabla_w = [np.zeros(w.shape) for w in self.weights]\n",
    "\n",
    "        # forward pass\n",
    "        activation = x\n",
    "        activations = [x]  # list to store all the activations, layer by layer\n",
    "\n",
    "        ## nt: DO NOT REMOVE THIS LINE!!\n",
    "        self.init_acts_shape = [act.shape for act in activations]\n",
    "\n",
    "        zs = []  # list to store all the z vectors, layer by layer\n",
    "        for b, w in zip(self.biases, self.weights):\n",
    "            z = np.dot(w, activation) + b\n",
    "            zs.append(z)\n",
    "            activation = sigmoid(z)\n",
    "            activations.append(activation)\n",
    "\n",
    "        # backward pass\n",
    "        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])\n",
    "        nabla_b[-1] = delta\n",
    "        nabla_w[-1] = np.dot(delta, activations[-2].transpose())\n",
    "\n",
    "        # Note that the variable l in the loop below is used a little\n",
    "        # differently to the notation in Chapter 2 of the book.  Here,\n",
    "        # l = 1 means the last layer of neurons, l = 2 is the\n",
    "        # second-last layer, and so on.\n",
    "        for l in range(2, self.num_layers):\n",
    "            z = zs[-l]\n",
    "            sp = sigmoid_prime(z)\n",
    "            delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp\n",
    "            nabla_b[-l] = delta\n",
    "            nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())\n",
    "        return (nabla_b, nabla_w)\n",
    "\n",
    "    def evaluate(self, test_data):\n",
    "        \"\"\"Return the number of test inputs for which the neural\n",
    "        network outputs the correct result.\"\"\"\n",
    "        test_results = [\n",
    "            (np.argmax(self.feedforward(x)), np.argmax(y)) for (x, y) in test_data\n",
    "        ]\n",
    "        return sum(int(x == y) for (x, y) in test_results)\n",
    "\n",
    "    def feedforward(self, a):\n",
    "        \"\"\"Return the output of the network if ``a`` is input.  Note this\n",
    "        function is called during evaluation; not during training/backprop.\"\"\"\n",
    "        for b, w in zip(self.biases, self.weights):\n",
    "            a = sigmoid(np.dot(w, a) + b)\n",
    "        return a\n",
    "\n",
    "    def cost_derivative(self, output_activations, y):\n",
    "        \"\"\"Return the vector of partial derivatives \\partial C_x /\n",
    "        \\partial a for the output activations.\"\"\"\n",
    "        return output_activations - y\n",
    "\n",
    "    @classmethod\n",
    "    def load_network(cls, filename):\n",
    "        \"\"\"Load a neural network from a json file ``filename``.  Returns an\n",
    "        instance of Network. \"\"\"\n",
    "        f = open(filename, \"r\")\n",
    "        data = json.load(f)\n",
    "        f.close()\n",
    "        net = cls(data[\"sizes\"])\n",
    "        net.weights = [np.array(w) for w in data[\"weights\"]]\n",
    "        net.biases = [np.array(b) for b in data[\"biases\"]]\n",
    "        return net\n",
    "\n",
    "    def save_network(self, filename):\n",
    "        \"\"\"Save the neural network to a json file ``filename``.\"\"\"\n",
    "        data = {\n",
    "            \"sizes\": self.sizes,\n",
    "            \"weights\": [w.tolist() for w in self.weights],\n",
    "            \"biases\": [b.tolist() for b in self.biases]  # ,\n",
    "            # \"cost\": str(self.cost.__name__)\n",
    "        }\n",
    "        f = open(filename, \"w\")\n",
    "        json.dump(data, f)\n",
    "        f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f747f644",
   "metadata": {
    "id": "f747f644"
   },
   "outputs": [],
   "source": [
    "def sigmoid(z):\n",
    "    \"\"\"The sigmoid function.\"\"\"\n",
    "    return 1.0 / (1.0 + np.exp(-z))\n",
    "\n",
    "def sigmoid_prime(z):\n",
    "    \"\"\"Derivative of the sigmoid function.\"\"\"\n",
    "    return sigmoid(z) * (1 - sigmoid(z))\n",
    "\n",
    "def vectorize_target(n, target):\n",
    "    \"\"\"Return an array of shape (n,1) with a 1.0 in the target position\n",
    "    and zeroes elsewhere.  The parameter target is assumed to be\n",
    "    an array of size 1, and the 0th item is the target position (1). \"\"\"\n",
    "    e = np.zeros((n, 1))\n",
    "    e[int(target[0])] = 1.0\n",
    "    return e"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "22046bf1",
   "metadata": {
    "id": "22046bf1"
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "def my_load_csv(fname, input_size, target_size, seednum=517):\n",
    "    ''' Function to load the data from a csv file.  Note the target (y)\n",
    "        is assumed to be already in the one-hot-vector notation.\n",
    "        Also each instance in the returned data is made into column vectors.'''\n",
    "    # Read in the data into pandas dataframe\n",
    "    df = pd.read_csv(fname, header=None)\n",
    "\n",
    "    # Set the random seed if specified to shuffle, for reproducibility.\n",
    "    # Otherwise no shuffling.\n",
    "    if seednum:\n",
    "        df = df.sample(frac=1, random_state=seednum)\n",
    "\n",
    "    # Separate the X and Y parts\n",
    "    X = df[df.columns[:input_size]].values.tolist()\n",
    "    Y = df[df.columns[-target_size:]].values.tolist()\n",
    "\n",
    "    # Combine the parts for each instance and put all in a list.\n",
    "    # Note: x and y are both converted into a column vector/array.\n",
    "    dataset = [(np.reshape(x, (input_size, 1)), np.reshape(y, (target_size, 1)))\n",
    "               for x, y in zip(X, Y)]\n",
    "    return dataset"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}