 { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## 1 Feature Linear Regression" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Python Modules" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from mpl_toolkits.mplot3d.axes3d import Axes3D\n", "\n", "from dp_new import Optimizer,Model,Node,SGD, SGD_Momentum,RMS_Prop, Adam\n", "import Animation_Error_Surface as ani" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Data\n", "\n", "True data:\n", "\n", "$$\n", " y_{True} = \\theta_0 x_0 + \\mathcal{Noise}\n", "$$\n", "\n", "Model:\n", "\n", "$$\n", " y = \\theta_0 x_0 + \\mathcal{Bias}\n", "$$\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "N = 200\n", "Feature = 1\n", "\n", "x = np.array(np.random.rand(N,Feature))\n", "Noise = np.array(np.random.normal(0,0.2,size=N))\n", "t0 = 3.\n", "t1 = 2.\n", "Theta = np.array((t0))\n", "y_True = np.sum(x*Theta,axis=1) + Noise\n", "\n", "#print(x)\n", "#print(x[:,0].shape)\n", "\n", "#print(Noise)\n", "#print(Noise.shape)\n", "\n", "#print(y_True)\n", "#print(y_True.shape)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "plt.plot(x[:,0],y_True,'bo',label='Data')\n", "plt.grid()\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set up Neural Net" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Net(Model): \n", " def __init__(self):\n", " super(Net, self).__init__()\n", " self.h = self.Linear_Layer(1, 1, \"h0\",initializer=\"Xavier\")\n", " #self.h2 = self.Linear_Layer(1, 10, \"h1\")\n", " #self.h3 = self.Linear_Layer(10, 10, \"h2\")\n", " #self.h4 = self.Linear_Layer(10, 1, \"h3\")\n", " \n", " def loss(self, x, y):\n", " #l = y.shape[0]\n", " #print('length:',l)\n", " if not type(y) == Node: \n", " y = Node(y)\n", " out = self.forward(x)\n", " loss = (out-y).square()\n", " return loss.sum() #/l\n", " \n", " def forward(self, x):\n", " if not type(x) == Node:\n", " x = Node(x)\n", " #x = self.h(x)\n", " #x = self.h2(x).tanh()\n", " #x = self.h3(x).relu()\n", " #out = self.h4(x)\n", " out = self.h(x)\n", " \n", " return out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Training Neural Net" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ " def train_NN(optimizer,learning_rate=0.001):\n", " np.random.seed(42)\n", " hyperpara = {'alpha' : learning_rate}\n", " net = Net()\n", " optim = optimizer(net,x,y_True,batch_size=64,hyperparam = hyperpara)\n", " loss,loss_hist,para_hist = optim.train(steps=1000,print_each=50,err_hist=True)\n", " plt.plot(np.arange(len(loss_hist)),loss_hist, label=optimizer.__name__)\n", " plt.xlabel('iterations')\n", " plt.ylabel('loss')\n", " plt.legend()\n", " plt.show()\n", " \n", " return loss_hist,para_hist" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Visualize Training" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def model_linear(XData,Para):\n", " return np.sum(XData*Para[0] + Para[1],axis=1)\n", "\n", "def errf_MSE(YData,XData,Para,Model):\n", " return sum((YData - Model(XData,Para))**2)#/len(YData)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "def hist_adjust(loss_hist,para_hist):\n", " loss_hist = np.array(loss_hist)\n", " para_his = np.zeros((len(para_hist[:]),2))\n", " for i in range(len(para_hist)):\n", " para_his[i,0] = para_hist[i][\"h0_weight\"][0][0]\n", " para_his[i,1] = para_hist[i][\"h0_bias\"][0][0]\n", " return loss_hist,para_his" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "%matplotlib inline\n", "loss_hist_SGD, para_hist_SGD = train_NN(SGD)\n", "loss_hist_SGD_Momentum, para_hist_SGD_Momentum = train_NN(SGD_Momentum)\n", "loss_hist_RMS, para_hist_RMS = train_NN(RMS_Prop ,learning_rate=0.05)\n", "loss_hist_Adam, para_hist_Adam = train_NN(Adam ,learning_rate=0.05)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "loss_hist_SGD, para_his_SGD = hist_adjust(loss_hist_SGD, para_hist_SGD)\n", "loss_hist_SGD_Momentum, para_his_SGD_Momentum = hist_adjust(loss_hist_SGD_Momentum, para_hist_SGD_Momentum)\n", "loss_hist_RMS, para_his_RMS = hist_adjust(loss_hist_RMS, para_hist_RMS)\n", "loss_hist_Adam, para_his_Adam = hist_adjust(loss_hist_Adam, para_hist_Adam)\n", "\n", "para_his = np.array([para_his_SGD,para_his_SGD_Momentum,para_his_RMS,para_his_Adam])\n", "loss_hist = np.array([loss_hist_SGD,loss_hist_SGD_Momentum,loss_hist_RMS,loss_hist_Adam])\n", "\n", "#para_his = np.array([para_his_SGD,para_his_SGD_Momentum])\n", "#loss_hist = np.array([loss_hist_SGD,loss_hist_SGD_Momentum])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('Parameter start values')\n", "print(para_his[0][0])\n", "print(para_his[1][0])\n", "print(para_his[2][0])\n", "print(para_his[3][0])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "%matplotlib notebook\n", "Limits = [-1,4,-2,2]\n", "Lables = [\"SGD\",\"SGD_Momentum\",'RMS l=0.05','Adam l=0.05']\n", "Animation = ani.optimizer_animation(Limits,x,y_True,errf_MSE,model_linear,para_his,loss_hist,opt_labels=Lables)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "plt.figure(figsize=(20,10))\n", "plt.plot(x[:,0],y_True,'bo',label='Data')\n", "\n", "xx = np.linspace(0,1,100)\n", "y_SGD = xx*para_his_SGD[-1,0] + para_his_SGD[-1,1] \n", "y_SGD_Momentum = xx*para_his_SGD_Momentum[-1,0] + para_his_SGD_Momentum[-1,1]\n", "y_RMS = xx*para_his_RMS[-1,0] + para_his_RMS[-1,1] \n", "y_Adam = xx*para_his_Adam[-1,0] + para_his_Adam[-1,1] \n", "\n", "\n", "plt.plot(xx,y_SGD,label='SGD')\n", "plt.plot(xx,y_SGD_Momentum,label='SGD_Momentum')\n", "plt.plot(xx,y_RMS,label='RMS')\n", "plt.plot(xx,y_Adam,label='Adam')\n", "\n", "plt.grid()\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "deep_teaching_kernel", "language": "python", "name": "deep_teaching_kernel" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }
 # -*- coding: utf-8 -*- """ Created on Wed Jun 5 11:18:27 2019 @author: Admin """ import numpy as np import matplotlib.pyplot as plt import matplotlib.animation as animation import matplotlib.patches as mpatches import mpl_toolkits.mplot3d.axes3d as p3 import matplotlib.lines as mlines def plot_err_surface(Limit,XData,YData,errfunc,model): Num = 50 P1 = np.linspace(Limit[0],Limit[1],num=Num) P2 = np.linspace(Limit[2],Limit[3],num=Num) P1_mesh , P2_mesh = np.meshgrid(P1,P2) Error = np.zeros([len(P1),len(P2)]) for i in range(Num): for j in range(Num): Error[i,j] = errfunc(YData,XData,[P1_mesh[i,j],P2_mesh[i,j]],model) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') surf = ax.plot_surface(P1_mesh,P2_mesh,Error,rstride = 1, cstride=1, cmap='viridis', linewidth=0, antialiased=False) fig.colorbar(surf) ax.set_xlabel('Para_1') ax.set_ylabel('Para_2') def optimizer_animation(Limit,XData,YData,errfunc,model,para_hist,err_hist,opt_labels=None): Num = 10 P1 = np.linspace(Limit[0],Limit[1],num=Num) P2 = np.linspace(Limit[2],Limit[3],num=Num) P1_mesh , P2_mesh = np.meshgrid(P1,P2) Error = np.zeros([len(P1),len(P2)]) for i in range(Num): for j in range(Num): Error[i,j] = errfunc(YData,XData,[P1_mesh[i,j],P2_mesh[i,j]],model) fig = plt.figure() ax = p3.Axes3D(fig) surf = ax.plot_surface(P1_mesh,P2_mesh,Error,rstride = 1, cstride=1, cmap='viridis', linewidth=0, antialiased=False, alpha=0.4) fig.colorbar(surf) ax.set_xlabel('Para_1') ax.set_ylabel('Para_2') colors = ['r','b','g','c','m','y','k'] def err_dot(i): ax.plot([para_hist[i,0]],[para_hist[i,1]],[err_hist[i]],'ro') def err_dot_multi(i): for j in range(len(err_hist)): ax.plot([para_hist[j,i,0]],[para_hist[j,i,1]],[err_hist[j,i]],str(colors[j])+'o',MarkerSize=6,label='opti'+str(j)) print('New Dot') if len(err_hist.shape)>=2: dot = err_dot_multi epochs = len(err_hist[1]) print('Training epochs: ',epochs) legend_items = [] for j in range(len(err_hist)): if opt_labels == None: labels = 'Opt '+str(j) legend_items.append(mpatches.Patch(color=colors[j],label=labels)) else: labels = opt_labels[j] legend_items.append(mpatches.Patch(color=colors[j], label=labels)) else: dot=err_dot epochs = len(err_hist) print('Training epochs: ',epochs) if opt_labels == None: labels = 'Opt '+str(j) else: labels = opt_labels[0] legend_items = [mpatches.Patch(color='r', label=labels)] DotAnimation = animation.FuncAnimation(fig, dot, epochs, interval=400, blit=True, repeat=True) plt.legend(handles=legend_items) plt.show() return DotAnimation
 { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### 2D convolution as matrix operation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Strictly speaking we are working here with cross correlation and not with a convolution." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Kernel:\n", "\n", "$$\n", "\\begin{pmatrix}\n", "w_{11} & w_{12} \\\\\n", "w_{21} & w_{22} \n", "\\end{pmatrix}\n", "$$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Image:\n", "\n", "$$\n", "\\begin{pmatrix}\n", "x_{11} & x_{12} & x_{13} & x_{14}\\\\\n", "x_{21} & x_{22} & x_{23} & x_{24}\\\\\n", "x_{31} & x_{32} & x_{33} & x_{34}\\\\\n", "\\end{pmatrix}\n", "$$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Exercise\n", "\n", "The 2D cross correlation can be expressed as a matrix multiplication:\n", "\n", "$$\n", "\\vec y = W \\cdot \\vec x\n", "$$\n", "\n", "How must the matrix $W$ and the vector $\\vec x$ looks like?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Exercise:\n", "\n", "Describe how this can be extended if we have more than one input (and output) channel in a CNN. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Programming exercise\n", " \n", " \n", "Implement a function with\n", "\n", "- Input:\n", " - image (with k-channels)\n", " - k convolutional kernel(s)\n", "- Output\n", " - convolution kernels as matrix\n", " - image as vector " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "deep_teaching_kernel", "language": "python", "name": "deep_teaching_kernel" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }
 ... ... @@ -82,7 +82,6 @@ ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ ... ... @@ -90,7 +89,7 @@ "\n", "### Recap Backpropagation\n", "\n", "Learning of (deep) neural networks relys on the backpropagation algorithm.\n", "Learning of (deep) neural networks relies on the backpropagation algorithm.\n", "\n", "Imagine a network with three hidden layers, each consisting of only one neuron:\n", "\n", ... ... @@ -105,7 +104,7 @@ "\n", "* First we calculate the error between our output and the true label:\n", " * $error = cost(a_3, y_{true})$\n", "* Second, we calculate the partial derivatives for the weights $\\frac{\\partial error}{\\partial w_j}$ and the bias $\\frac{\\partial error}{\\partial w_j}$ to find out in which direction we have to adjust them in order to lower the costs ($\\alpha$ the learning rate), e.g.:\n", "* Second, we calculate the partial derivatives for the weights $\\frac{\\partial error}{\\partial w_j}$ and the bias $\\frac{\\partial error}{\\partial b_j}$ to find out in which direction we have to adjust them in order to lower the costs ($\\alpha$ the learning rate), e.g.:\n", " * $w_1 \\leftarrow w_1 - \\alpha \\frac{\\partial error}{\\partial w_1}$" ] }, ... ... @@ -243,7 +242,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ "The range where $\\frac{\\partial ReLU(z)}{\\partial z} = 1$ (infinity points) is a lot bigger than for $\\frac{\\partial tanh(z)}{\\partial z}$ (only at one point)." "The range where $\\frac{\\partial ReLU(z)}{\\partial z} = 1$ (infinitely points) is a lot bigger than for $\\frac{\\partial tanh(z)}{\\partial z}$ (only at one point)." ] }, { ... ... @@ -300,7 +299,7 @@