Commit 1aab52f3 authored by Klaus Strohmenger's avatar Klaus Strohmenger
Browse files

new notebooks and reviews

parent 84570d1a
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1 Feature Linear Regression"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Python Modules"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from mpl_toolkits.mplot3d.axes3d import Axes3D\n",
"\n",
"from dp_new import Optimizer,Model,Node,SGD, SGD_Momentum,RMS_Prop, Adam\n",
"import Animation_Error_Surface as ani"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Data\n",
"\n",
"True data:\n",
"\n",
"$$\n",
" y_{True} = \\theta_0 x_0 + \\mathcal{Noise}\n",
"$$\n",
"\n",
"Model:\n",
"\n",
"$$\n",
" y = \\theta_0 x_0 + \\mathcal{Bias}\n",
"$$\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"N = 200\n",
"Feature = 1\n",
"\n",
"x = np.array(np.random.rand(N,Feature))\n",
"Noise = np.array(np.random.normal(0,0.2,size=N))\n",
"t0 = 3.\n",
"t1 = 2.\n",
"Theta = np.array((t0))\n",
"y_True = np.sum(x*Theta,axis=1) + Noise\n",
"\n",
"#print(x)\n",
"#print(x[:,0].shape)\n",
"\n",
"#print(Noise)\n",
"#print(Noise.shape)\n",
"\n",
"#print(y_True)\n",
"#print(y_True.shape)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"plt.plot(x[:,0],y_True,'bo',label='Data')\n",
"plt.grid()\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set up Neural Net"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class Net(Model): \n",
" def __init__(self):\n",
" super(Net, self).__init__()\n",
" self.h = self.Linear_Layer(1, 1, \"h0\",initializer=\"Xavier\")\n",
" #self.h2 = self.Linear_Layer(1, 10, \"h1\")\n",
" #self.h3 = self.Linear_Layer(10, 10, \"h2\")\n",
" #self.h4 = self.Linear_Layer(10, 1, \"h3\")\n",
" \n",
" def loss(self, x, y):\n",
" #l = y.shape[0]\n",
" #print('length:',l)\n",
" if not type(y) == Node: \n",
" y = Node(y)\n",
" out = self.forward(x)\n",
" loss = (out-y).square()\n",
" return loss.sum() #/l\n",
" \n",
" def forward(self, x):\n",
" if not type(x) == Node:\n",
" x = Node(x)\n",
" #x = self.h(x)\n",
" #x = self.h2(x).tanh()\n",
" #x = self.h3(x).relu()\n",
" #out = self.h4(x)\n",
" out = self.h(x)\n",
" \n",
" return out"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Training Neural Net"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" def train_NN(optimizer,learning_rate=0.001):\n",
" np.random.seed(42)\n",
" hyperpara = {'alpha' : learning_rate}\n",
" net = Net()\n",
" optim = optimizer(net,x,y_True,batch_size=64,hyperparam = hyperpara)\n",
" loss,loss_hist,para_hist = optim.train(steps=1000,print_each=50,err_hist=True)\n",
" plt.plot(np.arange(len(loss_hist)),loss_hist, label=optimizer.__name__)\n",
" plt.xlabel('iterations')\n",
" plt.ylabel('loss')\n",
" plt.legend()\n",
" plt.show()\n",
" \n",
" return loss_hist,para_hist"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Visualize Training"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def model_linear(XData,Para):\n",
" return np.sum(XData*Para[0] + Para[1],axis=1)\n",
"\n",
"def errf_MSE(YData,XData,Para,Model):\n",
" return sum((YData - Model(XData,Para))**2)#/len(YData)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"def hist_adjust(loss_hist,para_hist):\n",
" loss_hist = np.array(loss_hist)\n",
" para_his = np.zeros((len(para_hist[:]),2))\n",
" for i in range(len(para_hist)):\n",
" para_his[i,0] = para_hist[i][\"h0_weight\"][0][0]\n",
" para_his[i,1] = para_hist[i][\"h0_bias\"][0][0]\n",
" return loss_hist,para_his"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"loss_hist_SGD, para_hist_SGD = train_NN(SGD)\n",
"loss_hist_SGD_Momentum, para_hist_SGD_Momentum = train_NN(SGD_Momentum)\n",
"loss_hist_RMS, para_hist_RMS = train_NN(RMS_Prop ,learning_rate=0.05)\n",
"loss_hist_Adam, para_hist_Adam = train_NN(Adam ,learning_rate=0.05)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"loss_hist_SGD, para_his_SGD = hist_adjust(loss_hist_SGD, para_hist_SGD)\n",
"loss_hist_SGD_Momentum, para_his_SGD_Momentum = hist_adjust(loss_hist_SGD_Momentum, para_hist_SGD_Momentum)\n",
"loss_hist_RMS, para_his_RMS = hist_adjust(loss_hist_RMS, para_hist_RMS)\n",
"loss_hist_Adam, para_his_Adam = hist_adjust(loss_hist_Adam, para_hist_Adam)\n",
"\n",
"para_his = np.array([para_his_SGD,para_his_SGD_Momentum,para_his_RMS,para_his_Adam])\n",
"loss_hist = np.array([loss_hist_SGD,loss_hist_SGD_Momentum,loss_hist_RMS,loss_hist_Adam])\n",
"\n",
"#para_his = np.array([para_his_SGD,para_his_SGD_Momentum])\n",
"#loss_hist = np.array([loss_hist_SGD,loss_hist_SGD_Momentum])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('Parameter start values')\n",
"print(para_his[0][0])\n",
"print(para_his[1][0])\n",
"print(para_his[2][0])\n",
"print(para_his[3][0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"%matplotlib notebook\n",
"Limits = [-1,4,-2,2]\n",
"Lables = [\"SGD\",\"SGD_Momentum\",'RMS l=0.05','Adam l=0.05']\n",
"Animation = ani.optimizer_animation(Limits,x,y_True,errf_MSE,model_linear,para_his,loss_hist,opt_labels=Lables)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"plt.figure(figsize=(20,10))\n",
"plt.plot(x[:,0],y_True,'bo',label='Data')\n",
"\n",
"xx = np.linspace(0,1,100)\n",
"y_SGD = xx*para_his_SGD[-1,0] + para_his_SGD[-1,1] \n",
"y_SGD_Momentum = xx*para_his_SGD_Momentum[-1,0] + para_his_SGD_Momentum[-1,1]\n",
"y_RMS = xx*para_his_RMS[-1,0] + para_his_RMS[-1,1] \n",
"y_Adam = xx*para_his_Adam[-1,0] + para_his_Adam[-1,1] \n",
"\n",
"\n",
"plt.plot(xx,y_SGD,label='SGD')\n",
"plt.plot(xx,y_SGD_Momentum,label='SGD_Momentum')\n",
"plt.plot(xx,y_RMS,label='RMS')\n",
"plt.plot(xx,y_Adam,label='Adam')\n",
"\n",
"plt.grid()\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"celltoolbar": "Slideshow",
"kernelspec": {
"display_name": "deep_teaching_kernel",
"language": "python",
"name": "deep_teaching_kernel"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 5 11:18:27 2019
@author: Admin
"""
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import matplotlib.patches as mpatches
import mpl_toolkits.mplot3d.axes3d as p3
import matplotlib.lines as mlines
def plot_err_surface(Limit,XData,YData,errfunc,model):
Num = 50
P1 = np.linspace(Limit[0],Limit[1],num=Num)
P2 = np.linspace(Limit[2],Limit[3],num=Num)
P1_mesh , P2_mesh = np.meshgrid(P1,P2)
Error = np.zeros([len(P1),len(P2)])
for i in range(Num):
for j in range(Num):
Error[i,j] = errfunc(YData,XData,[P1_mesh[i,j],P2_mesh[i,j]],model)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(P1_mesh,P2_mesh,Error,rstride = 1, cstride=1, cmap='viridis', linewidth=0, antialiased=False)
fig.colorbar(surf)
ax.set_xlabel('Para_1')
ax.set_ylabel('Para_2')
def optimizer_animation(Limit,XData,YData,errfunc,model,para_hist,err_hist,opt_labels=None):
Num = 10
P1 = np.linspace(Limit[0],Limit[1],num=Num)
P2 = np.linspace(Limit[2],Limit[3],num=Num)
P1_mesh , P2_mesh = np.meshgrid(P1,P2)
Error = np.zeros([len(P1),len(P2)])
for i in range(Num):
for j in range(Num):
Error[i,j] = errfunc(YData,XData,[P1_mesh[i,j],P2_mesh[i,j]],model)
fig = plt.figure()
ax = p3.Axes3D(fig)
surf = ax.plot_surface(P1_mesh,P2_mesh,Error,rstride = 1, cstride=1, cmap='viridis', linewidth=0, antialiased=False, alpha=0.4)
fig.colorbar(surf)
ax.set_xlabel('Para_1')
ax.set_ylabel('Para_2')
colors = ['r','b','g','c','m','y','k']
def err_dot(i):
ax.plot([para_hist[i,0]],[para_hist[i,1]],[err_hist[i]],'ro')
def err_dot_multi(i):
for j in range(len(err_hist)):
ax.plot([para_hist[j,i,0]],[para_hist[j,i,1]],[err_hist[j,i]],str(colors[j])+'o',MarkerSize=6,label='opti'+str(j))
print('New Dot')
if len(err_hist.shape)>=2:
dot = err_dot_multi
epochs = len(err_hist[1])
print('Training epochs: ',epochs)
legend_items = []
for j in range(len(err_hist)):
if opt_labels == None:
labels = 'Opt '+str(j)
legend_items.append(mpatches.Patch(color=colors[j],label=labels))
else:
labels = opt_labels[j]
legend_items.append(mpatches.Patch(color=colors[j], label=labels))
else:
dot=err_dot
epochs = len(err_hist)
print('Training epochs: ',epochs)
if opt_labels == None:
labels = 'Opt '+str(j)
else:
labels = opt_labels[0]
legend_items = [mpatches.Patch(color='r', label=labels)]
DotAnimation = animation.FuncAnimation(fig, dot, epochs, interval=400, blit=True, repeat=True)
plt.legend(handles=legend_items)
plt.show()
return DotAnimation
This diff is collapsed.
This diff is collapsed.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2D convolution as matrix operation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Strictly speaking we are working here with cross correlation and not with a convolution."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Kernel:\n",
"\n",
"$$\n",
"\\begin{pmatrix}\n",
"w_{11} & w_{12} \\\\\n",
"w_{21} & w_{22} \n",
"\\end{pmatrix}\n",
"$$"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Image:\n",
"\n",
"$$\n",
"\\begin{pmatrix}\n",
"x_{11} & x_{12} & x_{13} & x_{14}\\\\\n",
"x_{21} & x_{22} & x_{23} & x_{24}\\\\\n",
"x_{31} & x_{32} & x_{33} & x_{34}\\\\\n",
"\\end{pmatrix}\n",
"$$"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Exercise\n",
"\n",
"The 2D cross correlation can be expressed as a matrix multiplication:\n",
"\n",
"$$\n",
"\\vec y = W \\cdot \\vec x\n",
"$$\n",
"\n",
"How must the matrix $W$ and the vector $\\vec x$ looks like?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Exercise:\n",
"\n",
"Describe how this can be extended if we have more than one input (and output) channel in a CNN. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Programming exercise\n",
" \n",
" \n",
"Implement a function with\n",
"\n",
"- Input:\n",
" - image (with k-channels)\n",
" - k convolutional kernel(s)\n",
"- Output\n",
" - convolution kernels as matrix\n",
" - image as vector "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"celltoolbar": "Slideshow",
"kernelspec": {
"display_name": "deep_teaching_kernel",
"language": "python",
"name": "deep_teaching_kernel"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
......@@ -82,7 +82,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
......@@ -90,7 +89,7 @@
"\n",
"### Recap Backpropagation\n",
"\n",
"Learning of (deep) neural networks relys on the backpropagation algorithm.\n",
"Learning of (deep) neural networks relies on the backpropagation algorithm.\n",
"\n",
"Imagine a network with three hidden layers, each consisting of only one neuron:\n",
"\n",
......@@ -105,7 +104,7 @@
"\n",
"* First we calculate the error between our output and the true label:\n",
" * $error = cost(a_3, y_{true})$\n",
"* Second, we calculate the partial derivatives for the weights $\\frac{\\partial error}{\\partial w_j}$ and the bias $\\frac{\\partial error}{\\partial w_j}$ to find out in which direction we have to adjust them in order to lower the costs ($\\alpha$ the learning rate), e.g.:\n",
"* Second, we calculate the partial derivatives for the weights $\\frac{\\partial error}{\\partial w_j}$ and the bias $\\frac{\\partial error}{\\partial b_j}$ to find out in which direction we have to adjust them in order to lower the costs ($\\alpha$ the learning rate), e.g.:\n",
" * $w_1 \\leftarrow w_1 - \\alpha \\frac{\\partial error}{\\partial w_1}$"
]
},
......@@ -243,7 +242,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"The range where $\\frac{\\partial ReLU(z)}{\\partial z} = 1$ (infinity points) is a lot bigger than for $\\frac{\\partial tanh(z)}{\\partial z}$ (only at one point)."
"The range where $\\frac{\\partial ReLU(z)}{\\partial z} = 1$ (infinitely points) is a lot bigger than for $\\frac{\\partial tanh(z)}{\\partial z}$ (only at one point)."
]
},
{
......@@ -300,7 +299,7 @@