Commit 9e6a8e58 authored by Jim Hefferon's avatar Jim Hefferon

line of best fit

parent c2d1d434
......@@ -215,6 +215,11 @@
\emph{An Introduction to Probability Theory and Its Applications}
(vol.~1, 3rd~ed.),
John Wiley, 1968.
\bibitem[Federal Reserve]{FedReserve}
Federal Reserve Bank of Richmond,
\emph{FAQs Currency and Coin},
\url{http://www.richmondfed.org/faqs/currency/}
Jan~2012.
\bibitem[Finkbeiner]{Finkbeiner}
Daniel T.\ Finkbeiner~III,
\emph{Introduction to Matrices and Linear Transformations},
......
This diff is collapsed.
......@@ -19258,18 +19258,18 @@
\end{equation*}
Projecting into the linear subspace gives this
\begin{equation*}
\frac{\colvec{4 \\ 9 \\ 13 \\ 17 \\20}
\frac{\colvec[r]{4 \\ 9 \\ 13 \\ 17 \\20}
\dotprod
\colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}}{%
\colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}
\colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}}{%
\colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}
\dotprod
\colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}}
\colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}}
\cdot
\colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}
\colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}
=
\frac{1832}{3520}
\cdot
\colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}
\colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}
\end{equation*}
so the slope of the line of best fit is approximately $0.52$.
\begin{center} \small
......@@ -19281,13 +19281,13 @@
With this input
\begin{equation*}
A =
\begin{pmatrix}
\begin{mat}
1 & 1852.71 \\
1 & 1858.88 \\
\vdots &\vdots \\
1 & 1985.54 \\
1 & 1993.71
\end{pmatrix}
\end{mat}
\qquad
b = \colvec{ 292.0 \\
285.0 \\
......@@ -19308,13 +19308,13 @@
With this input (the years are zeroed at $1900$)
\begin{equation*}
A :=
\begin{pmatrix}
\begin{mat}
1 & .38 \\
1 & .54 \\
\vdots \vdots \\
1 & 92.71 \\
1 & 95.54
\end{pmatrix}
\end{mat}
\qquad
b = \colvec{ 249.0 \\
246.2 \\
......@@ -19337,13 +19337,13 @@
With this input (the years are zeroed at $1900$)
\begin{equation*}
A =
\begin{pmatrix}
\begin{mat}
1 & 21.46 \\
1 & 32.63 \\
\vdots &\vdots \\
1 & 89.54 \\
1 & 96.63
\end{pmatrix}
\end{mat}
\qquad
b =
\colvec{ 373.2 \\
......@@ -19396,7 +19396,7 @@
\partsitem On the basis of this information
\begin{equation*}
A =
\begin{pmatrix}
\begin{mat}
1 & 53 \\
1 & 75 \\
% 1 & 57 \\
......@@ -19422,7 +19422,7 @@
\vdots \\
1 & 80 \\
1 & 81
\end{pmatrix}
\end{mat}
\qquad
b =
\colvec{ 3 \\
......@@ -19482,7 +19482,7 @@
With this input
\begin{equation*}
A =
\begin{pmatrix}
\begin{mat}[r]
1 & 1 \\
1 & 2 \\
1 & 3 \\
......@@ -19490,9 +19490,9 @@
1 & 6 \\
1 & 7 \\
1 & 8
\end{pmatrix}
\end{mat}
\qquad
b = \colvec{-0.40893539 \\
b = \colvec[r]{-0.40893539 \\
-0.1426675 \\
0 \\
0.18184359 \\
......@@ -19519,7 +19519,7 @@
\partsitem With this input
\begin{equation*}
A =
\begin{pmatrix}
\begin{mat}[r]
1 & 306 \\
1 & 329 \\
1 & 356 \\
......@@ -19528,10 +19528,10 @@
1 & 427 \\
1 & 415 \\
1 & 424
\end{pmatrix}
\end{mat}
\qquad
b =
\colvec{975 \\
\colvec[r]{975 \\
969 \\
948 \\
910 \\
......@@ -2183,11 +2183,11 @@ beginfig(44) % least squares money denom vs life
numeric u; %scaling factor
numeric v; %vertical scaling factor
numeric w; %horizontal scaling factor
u:=.01in; w:=u; v:=0.618*4w;
u:=.02in; w:=u; v:=4w;
z0=(0w,0v);
z1=(105w,0v);
z2=(0w,22.5v);
z2=(0w,8.5v);
% the axes
pickup pensquare scaled line_width_light
......@@ -2200,25 +2200,25 @@ beginfig(44) % least squares money denom vs life
label.bot(btex {\tiny 70} etex,(70w,0v));
label.bot(btex {\tiny 90} etex,(90w,0v));
draw z0--z2; % heads axis (y-axis)
label.ulft(btex {\small \textit{avg life}} etex,z2);
sidetoside_ticks(4,(0w,5v),(0w,5v));
label.lft(btex {\tiny 5} etex,(0w,5v));
label.lft(btex {\tiny 15} etex,(0w,15v));
label.ulft(btex {\small \textit{life (yrs)}} etex,z2);
sidetoside_ticks(8,(0w,1v),(0w,1v));
label.lft(btex {\tiny 4} etex,(0w,4v));
label.lft(btex {\tiny 8} etex,(0w,8v));
% the line
pickup pencircle scaled line_width_light;
z3=(100w,(0.18*100v)+1.05v); %
z4=(0w,1.05v);
draw z4--z3 dashed evenly scaled .707;
z3=(100w,((0.7513/12)*100v)+(14.1582/12)*v); %
z4=(0w,(14.1582/12)*v);
draw z4--z3 dashed evenly scaled .707; % .7[white,black];
pickup pencircle scaled line_width_dark;
% the points
z10=(1w,1.5v);
z11=(5w,2v);
z12=(10w,3v);
z13=(20w,5v);
z14=(50w,9v);
z15=(100w,20v);
z10=(1w,(22/12)*v);
z11=(5w,(15.9/12)*v);
z12=(10w,(18.3/12)*v);
z13=(20w,(24.3/12)*v);
z14=(50w,(55.4/12)*v);
z15=(100w,(88.8/12)*v);
for k = 10 upto 15:
drawpoint(z[k]);
endfor
......
# currency.sage
# data for line of best fit topic
# inside sage, say
# load "currency.sage"
# (A.transpose()*A).inverse()*A.transpose()*v
A = Matrix([[1, 1],
[1, 5],
[1, 10],
[1, 20],
[1, 50],
[1, 100]])
v = vector([22.0,
15.9,
18.3,
24.3,
55.4,
88.8])
\ No newline at end of file
four_minute_mile.png

16 KB | W: | H:

four_minute_mile.png

10.3 KB | W: | H:

four_minute_mile.png
four_minute_mile.png
four_minute_mile.png
four_minute_mile.png
  • 2-up
  • Swipe
  • Onion skin
This diff is collapsed.
......@@ -208,9 +208,11 @@
\settoheight{\heightofcdot}{$\cdot$}
\settowidth{\widthofcdot}{$\cdot$}
\newsavebox{\dotprodcircle}
% 2012-Jan-06 JH too big: \savebox{\dotprodcircle}{$\bullet$}
\savebox{\dotprodcircle}{\includegraphics{dotprod.1}}
\newcommand{\dotprod}{\mathbin{\raisebox{.5\heightofcdot}{%
% 2012-Jan-06 JH \bullet too big:
\savebox{\dotprodcircle}{\scalebox{0.55}{$\bullet$}}
% 2012-Jan JH mpost won't take the graphic
% \savebox{\dotprodcircle}{\includegraphics{dotprod.1}}
\newcommand{\dotprod}{\mathbin{\raisebox{.25\heightofcdot}{%
\makebox[\widthofcdot]{$\smash{\usebox{\dotprodcircle}}$}}}}}
\newcommand{\nbyn}[1]{#1 \! \times \! #1 } % \! is negative thinspace
......
......@@ -6,7 +6,7 @@
\index{line of best fit|(}
\index{linear equation!inconsistent systems}
\textit{This Topic requires the formulas from the subsections on
Orthogonal Projection Into a Line, and Projection Into a
Orthogonal Projection Into a Line and Projection Into a
Subspace.}
Scientists are often presented with a system that
......@@ -36,22 +36,23 @@ So we expect that the system derived from the experiment has no solution.
90m &= &51
\end{linsys}
\end{equation*}
That is, the vector of experimental data is not in the subspace
of solutions.
That is, the vector of data that we collected is not in the subspace
where in theory we should find it.
\begin{equation*}
\colvec{16 \\ 34 \\ 51}\not\in
\set{ m\colvec{30 \\ 60 \\ 90} \suchthat m\in\Re}
\end{equation*}
However, we want to find the~$m$ that most nearly works.
We have to do something, so we look for the~$m$ that most nearly works.
An orthogonal projection of the data vector into the line subspace
gives our best guess.
gives this best guess.
\begin{equation*}
\frac{ \colvec{16 \\ 34 \\ 51}\dotprod\colvec{30 \\ 60 \\ 90} }{
\colvec{30 \\ 60 \\ 90}\dotprod\colvec{30 \\ 60 \\ 90} }
\cdot\colvec{30 \\ 60 \\ 90}
=\frac{7110}{12600}\cdot \colvec{30 \\ 60 \\ 90}
\frac{ \colvec[r]{16 \\ 34 \\ 51}\dotprod\colvec[r]{30 \\ 60 \\ 90} }{
\colvec[r]{30 \\ 60 \\ 90}\dotprod\colvec[r]{30 \\ 60 \\ 90} }
\cdot\colvec[r]{30 \\ 60 \\ 90}
=\frac{7110}{12600}\cdot \colvec[r]{30 \\ 60 \\ 90}
\end{equation*}
The estimate (\( m=7110/12600\approx 0.56 \)) is a bit high but not much,
The estimate (\( m=7110/12600\approx 0.56 \)) is a bit more than one half,
but not much,
so probably the penny is fair enough.
The line with the slope \( m\approx 0.56 \)
......@@ -82,14 +83,18 @@ We can also handle cases where the line need not
pass through the origin.
For example, the different denominations of U.S.\ money have different average
times in circulation
(the $\$2$~bill is left off as a special case).
How long should we expect a $\$25$~bill to last?
times in circulation.\cite{FedReserve}
(The $\$2$~bill is left off as a special case because
Americans mistakenly believe that
it is a collectible, and do not circulate these bills.)
How long should a $\$25$~bill last?
\begin{center} \small
\begin{tabular}{r|cccccc}
\textit{denomination} &$1$ &$5$ &$10$ &$20$ &$50$ &$100$ \\
\textit{denomination}
&$1$ &$5$ &$10$ &$20$ &$50$ &$100$ \\
\hline
\textit{average life (years)} &$1.5$ &$2$ &$3$ &$5$ &$9$ &$20$ \\
\textit{average life (mos)}
&$22.0$ &$15.9$ &$18.3$ &$24.3$ &$55.4$ &$88.8$ \\
\end{tabular}
\end{center}
The plot (see below) looks roughly linear.
......@@ -101,16 +106,16 @@ Consider the matrix of coefficients of that linear system and also its
vector of constants, the experimentally-determined values.
\begin{equation*}
A=
\begin{pmatrix}
\begin{mat}[r]
1 &1 \\
1 &5 \\
1 &10 \\
1 &20 \\
1 &50 \\
1 &100
\end{pmatrix}
\end{mat}
\qquad
\vec{v}=\colvec{1.5 \\ 2 \\ 3 \\ 5 \\ 9 \\ 20}
\vec{v}=\colvec[r]{22.0 \\ 15.9 \\ 18.3 \\ 24.3 \\ 55.4 \\ 88.8}
\end{equation*}
The ending result in the subsection on Projection into a Subspace says
that coefficients $b$ and $m$ so that the linear combination
......@@ -162,6 +167,7 @@ we can then compare to the actual date.
% \end{tabular}
A few minutes in \textit{Sage} gives the slope and intercept.
% see mile.sage
\begin{indented}\small
\begin{verbatim}
sage: data=[[1870,268.8], [1880,264.5], [1890,258.4], [1900,255.6],
......@@ -183,8 +189,10 @@ sage: points(data)+plot(model(intercept=find_fit(data,model)[0].rhs(),
\end{indented}
gives this graph.
\begin{center}
\scannedpicture{four_minute_mile_greyscale}
% or the color version: \scannedpicture{four_minute_mile}
% black and white
% \scannedpicture{four_minute_mile_greyscale}
% or the color version:
\includegraphics{four_minute_mile.png}
\end{center}
Note that the progression is surprisingly linear.
Our prediction is $1958.73$; the actual date of Roger Bannister's
......@@ -211,7 +219,7 @@ record was 1954-May-06.
%With this input
%\begin{equation*}
% A=
% \begin{pmatrix}
% \begin{mat}
% 1 & 1860 \\
% 1 & 1870 \\
%% 1 & 1880 \\
......@@ -228,7 +236,7 @@ record was 1954-May-06.
% \vdots &\vdots \\
% 1 &1990 \\
% 1 &2000
% \end{pmatrix}
% \end{mat}
% \qquad
% \vec{v} = \colvec{280.0 \\
% 268.8 \\
......@@ -464,18 +472,18 @@ record was 1954-May-06.
\end{equation*}
Projecting into the linear subspace gives this
\begin{equation*}
\frac{\colvec{4 \\ 9 \\ 13 \\ 17 \\20}
\frac{\colvec[r]{4 \\ 9 \\ 13 \\ 17 \\20}
\dotprod
\colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}}{%
\colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}
\colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}}{%
\colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}
\dotprod
\colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}}
\colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}}
\cdot
\colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}
\colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}
=
\frac{1832}{3520}
\cdot
\colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}
\colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}
\end{equation*}
so the slope of the line of best fit is approximately $0.52$.
\begin{center} \small
......@@ -491,13 +499,13 @@ record was 1954-May-06.
With this input
\begin{equation*}
A =
\begin{pmatrix}
\begin{mat}
1 & 1852.71 \\
1 & 1858.88 \\
\vdots &\vdots \\
1 & 1985.54 \\
1 & 1993.71
\end{pmatrix}
\end{mat}
\qquad
b = \colvec{ 292.0 \\
285.0 \\
......@@ -521,13 +529,13 @@ record was 1954-May-06.
With this input (the years are zeroed at $1900$)
\begin{equation*}
A :=
\begin{pmatrix}
\begin{mat}
1 & .38 \\
1 & .54 \\
\vdots \vdots \\
1 & 92.71 \\
1 & 95.54
\end{pmatrix}
\end{mat}
\qquad
b = \colvec{ 249.0 \\
246.2 \\
......@@ -550,13 +558,13 @@ record was 1954-May-06.
With this input (the years are zeroed at $1900$)
\begin{equation*}
A =
\begin{pmatrix}
\begin{mat}
1 & 21.46 \\
1 & 32.63 \\
\vdots &\vdots \\
1 & 89.54 \\
1 & 96.63
\end{pmatrix}
\end{mat}
\qquad
b =
\colvec{ 373.2 \\
......@@ -652,7 +660,7 @@ record was 1954-May-06.
\partsitem On the basis of this information
\begin{equation*}
A =
\begin{pmatrix}
\begin{mat}
1 & 53 \\
1 & 75 \\
% 1 & 57 \\
......@@ -678,7 +686,7 @@ record was 1954-May-06.
\vdots \\
1 & 80 \\
1 & 81
\end{pmatrix}
\end{mat}
\qquad
b =
\colvec{ 3 \\
......@@ -772,7 +780,7 @@ record was 1954-May-06.
With this input
\begin{equation*}
A =
\begin{pmatrix}
\begin{mat}[r]
1 & 1 \\
1 & 2 \\
1 & 3 \\
......@@ -780,9 +788,9 @@ record was 1954-May-06.
1 & 6 \\
1 & 7 \\
1 & 8
\end{pmatrix}
\end{mat}
\qquad
b = \colvec{-0.40893539 \\
b = \colvec[r]{-0.40893539 \\
-0.1426675 \\
0 \\
0.18184359 \\
......@@ -838,7 +846,7 @@ record was 1954-May-06.
\partsitem With this input
\begin{equation*}
A =
\begin{pmatrix}
\begin{mat}[r]
1 & 306 \\
1 & 329 \\
1 & 356 \\
......@@ -847,10 +855,10 @@ record was 1954-May-06.
1 & 427 \\
1 & 415 \\
1 & 424
\end{pmatrix}
\end{mat}
\qquad
b =
\colvec{975 \\
\colvec[r]{975 \\
969 \\
948 \\
910 \\
......
# mile.sage
# Data for mens mile for lstsqs topic
# inside sage run load "mile.sage" and the .png appears
data=[[1870,268.8],
[1880,264.5],
[1890,258.4],
[1900,255.6],
[1910,255.6],
[1920,252.6],
[1930,250.4],
[1940,246.4],
[1950,241.4]]
var('slope,intercept')
model(x) = slope*x+intercept
g=points(data)+plot(model(intercept=find_fit(data,model)[0].rhs(),slope=find_fit(data,model)[1].rhs()),(x,1860,1960),color='red',figsize=3)
g.save("four_minute_mile.png")
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment