Commit 9e6a8e58 by Jim Hefferon

### line of best fit

parent c2d1d434
 ... ... @@ -215,6 +215,11 @@ \emph{An Introduction to Probability Theory and Its Applications} (vol.~1, 3rd~ed.), John Wiley, 1968. \bibitem[Federal Reserve]{FedReserve} Federal Reserve Bank of Richmond, \emph{FAQs Currency and Coin}, \url{http://www.richmondfed.org/faqs/currency/} Jan~2012. \bibitem[Finkbeiner]{Finkbeiner} Daniel T.\ Finkbeiner~III, \emph{Introduction to Matrices and Linear Transformations}, ... ...
This diff is collapsed.
 ... ... @@ -19258,18 +19258,18 @@ \end{equation*} Projecting into the linear subspace gives this \begin{equation*} \frac{\colvec{4 \\ 9 \\ 13 \\ 17 \\20} \frac{\colvec[r]{4 \\ 9 \\ 13 \\ 17 \\20} \dotprod \colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}}{% \colvec{8 \\ 16 \\ 24 \\ 32 \\ 40} \colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}}{% \colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40} \dotprod \colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}} \colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}} \cdot \colvec{8 \\ 16 \\ 24 \\ 32 \\ 40} \colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40} = \frac{1832}{3520} \cdot \colvec{8 \\ 16 \\ 24 \\ 32 \\ 40} \colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40} \end{equation*} so the slope of the line of best fit is approximately $0.52$. \begin{center} \small ... ... @@ -19281,13 +19281,13 @@ With this input \begin{equation*} A = \begin{pmatrix} \begin{mat} 1 & 1852.71 \\ 1 & 1858.88 \\ \vdots &\vdots \\ 1 & 1985.54 \\ 1 & 1993.71 \end{pmatrix} \end{mat} \qquad b = \colvec{ 292.0 \\ 285.0 \\ ... ... @@ -19308,13 +19308,13 @@ With this input (the years are zeroed at $1900$) \begin{equation*} A := \begin{pmatrix} \begin{mat} 1 & .38 \\ 1 & .54 \\ \vdots \vdots \\ 1 & 92.71 \\ 1 & 95.54 \end{pmatrix} \end{mat} \qquad b = \colvec{ 249.0 \\ 246.2 \\ ... ... @@ -19337,13 +19337,13 @@ With this input (the years are zeroed at $1900$) \begin{equation*} A = \begin{pmatrix} \begin{mat} 1 & 21.46 \\ 1 & 32.63 \\ \vdots &\vdots \\ 1 & 89.54 \\ 1 & 96.63 \end{pmatrix} \end{mat} \qquad b = \colvec{ 373.2 \\ ... ... @@ -19396,7 +19396,7 @@ \partsitem On the basis of this information \begin{equation*} A = \begin{pmatrix} \begin{mat} 1 & 53 \\ 1 & 75 \\ % 1 & 57 \\ ... ... @@ -19422,7 +19422,7 @@ \vdots \\ 1 & 80 \\ 1 & 81 \end{pmatrix} \end{mat} \qquad b = \colvec{ 3 \\ ... ... @@ -19482,7 +19482,7 @@ With this input \begin{equation*} A = \begin{pmatrix} \begin{mat}[r] 1 & 1 \\ 1 & 2 \\ 1 & 3 \\ ... ... @@ -19490,9 +19490,9 @@ 1 & 6 \\ 1 & 7 \\ 1 & 8 \end{pmatrix} \end{mat} \qquad b = \colvec{-0.40893539 \\ b = \colvec[r]{-0.40893539 \\ -0.1426675 \\ 0 \\ 0.18184359 \\ ... ... @@ -19519,7 +19519,7 @@ \partsitem With this input \begin{equation*} A = \begin{pmatrix} \begin{mat}[r] 1 & 306 \\ 1 & 329 \\ 1 & 356 \\ ... ... @@ -19528,10 +19528,10 @@ 1 & 427 \\ 1 & 415 \\ 1 & 424 \end{pmatrix} \end{mat} \qquad b = \colvec{975 \\ \colvec[r]{975 \\ 969 \\ 948 \\ 910 \\
 ... ... @@ -2183,11 +2183,11 @@ beginfig(44) % least squares money denom vs life numeric u; %scaling factor numeric v; %vertical scaling factor numeric w; %horizontal scaling factor u:=.01in; w:=u; v:=0.618*4w; u:=.02in; w:=u; v:=4w; z0=(0w,0v); z1=(105w,0v); z2=(0w,22.5v); z2=(0w,8.5v); % the axes pickup pensquare scaled line_width_light ... ... @@ -2200,25 +2200,25 @@ beginfig(44) % least squares money denom vs life label.bot(btex {\tiny 70} etex,(70w,0v)); label.bot(btex {\tiny 90} etex,(90w,0v)); draw z0--z2; % heads axis (y-axis) label.ulft(btex {\small \textit{avg life}} etex,z2); sidetoside_ticks(4,(0w,5v),(0w,5v)); label.lft(btex {\tiny 5} etex,(0w,5v)); label.lft(btex {\tiny 15} etex,(0w,15v)); label.ulft(btex {\small \textit{life (yrs)}} etex,z2); sidetoside_ticks(8,(0w,1v),(0w,1v)); label.lft(btex {\tiny 4} etex,(0w,4v)); label.lft(btex {\tiny 8} etex,(0w,8v)); % the line pickup pencircle scaled line_width_light; z3=(100w,(0.18*100v)+1.05v); % z4=(0w,1.05v); draw z4--z3 dashed evenly scaled .707; z3=(100w,((0.7513/12)*100v)+(14.1582/12)*v); % z4=(0w,(14.1582/12)*v); draw z4--z3 dashed evenly scaled .707; % .7[white,black]; pickup pencircle scaled line_width_dark; % the points z10=(1w,1.5v); z11=(5w,2v); z12=(10w,3v); z13=(20w,5v); z14=(50w,9v); z15=(100w,20v); z10=(1w,(22/12)*v); z11=(5w,(15.9/12)*v); z12=(10w,(18.3/12)*v); z13=(20w,(24.3/12)*v); z14=(50w,(55.4/12)*v); z15=(100w,(88.8/12)*v); for k = 10 upto 15: drawpoint(z[k]); endfor ... ...
currency.sage 0 → 100644
 # currency.sage # data for line of best fit topic # inside sage, say # load "currency.sage" # (A.transpose()*A).inverse()*A.transpose()*v A = Matrix([[1, 1], [1, 5], [1, 10], [1, 20], [1, 50], [1, 100]]) v = vector([22.0, 15.9, 18.3, 24.3, 55.4, 88.8]) \ No newline at end of file

16 KB | W: | H:

10.3 KB | W: | H:

• 2-up
• Swipe
• Onion skin
This diff is collapsed.
 ... ... @@ -208,9 +208,11 @@ \settoheight{\heightofcdot}{$\cdot$} \settowidth{\widthofcdot}{$\cdot$} \newsavebox{\dotprodcircle} % 2012-Jan-06 JH too big: \savebox{\dotprodcircle}{$\bullet$} \savebox{\dotprodcircle}{\includegraphics{dotprod.1}} \newcommand{\dotprod}{\mathbin{\raisebox{.5\heightofcdot}{% % 2012-Jan-06 JH \bullet too big: \savebox{\dotprodcircle}{\scalebox{0.55}{$\bullet$}} % 2012-Jan JH mpost won't take the graphic % \savebox{\dotprodcircle}{\includegraphics{dotprod.1}} \newcommand{\dotprod}{\mathbin{\raisebox{.25\heightofcdot}{% \makebox[\widthofcdot]{$\smash{\usebox{\dotprodcircle}}$}}}}} \newcommand{\nbyn}[1]{#1 \! \times \! #1 } % \! is negative thinspace ... ...
 ... ... @@ -6,7 +6,7 @@ \index{line of best fit|(} \index{linear equation!inconsistent systems} \textit{This Topic requires the formulas from the subsections on Orthogonal Projection Into a Line, and Projection Into a Orthogonal Projection Into a Line and Projection Into a Subspace.} Scientists are often presented with a system that ... ... @@ -36,22 +36,23 @@ So we expect that the system derived from the experiment has no solution. 90m &= &51 \end{linsys} \end{equation*} That is, the vector of experimental data is not in the subspace of solutions. That is, the vector of data that we collected is not in the subspace where in theory we should find it. \begin{equation*} \colvec{16 \\ 34 \\ 51}\not\in \set{ m\colvec{30 \\ 60 \\ 90} \suchthat m\in\Re} \end{equation*} However, we want to find the~$m$ that most nearly works. We have to do something, so we look for the~$m$ that most nearly works. An orthogonal projection of the data vector into the line subspace gives our best guess. gives this best guess. \begin{equation*} \frac{ \colvec{16 \\ 34 \\ 51}\dotprod\colvec{30 \\ 60 \\ 90} }{ \colvec{30 \\ 60 \\ 90}\dotprod\colvec{30 \\ 60 \\ 90} } \cdot\colvec{30 \\ 60 \\ 90} =\frac{7110}{12600}\cdot \colvec{30 \\ 60 \\ 90} \frac{ \colvec[r]{16 \\ 34 \\ 51}\dotprod\colvec[r]{30 \\ 60 \\ 90} }{ \colvec[r]{30 \\ 60 \\ 90}\dotprod\colvec[r]{30 \\ 60 \\ 90} } \cdot\colvec[r]{30 \\ 60 \\ 90} =\frac{7110}{12600}\cdot \colvec[r]{30 \\ 60 \\ 90} \end{equation*} The estimate ($$m=7110/12600\approx 0.56$$) is a bit high but not much, The estimate ($$m=7110/12600\approx 0.56$$) is a bit more than one half, but not much, so probably the penny is fair enough. The line with the slope $$m\approx 0.56$$ ... ... @@ -82,14 +83,18 @@ We can also handle cases where the line need not pass through the origin. For example, the different denominations of U.S.\ money have different average times in circulation (the $\$2$~bill is left off as a special case). How long should we expect a$\$25$~bill to last? times in circulation.\cite{FedReserve} (The $\$2$~bill is left off as a special case because Americans mistakenly believe that it is a collectible, and do not circulate these bills.) How long should a$\$25$~bill last? \begin{center} \small \begin{tabular}{r|cccccc} \textit{denomination} &$1$ &$5$ &$10$ &$20$ &$50$ &$100$ \\ \textit{denomination} &$1$ &$5$ &$10$ &$20$ &$50$ &$100$ \\ \hline \textit{average life (years)} &$1.5$ &$2$ &$3$ &$5$ &$9$ &$20$ \\ \textit{average life (mos)} &$22.0$ &$15.9$ &$18.3$ &$24.3$ &$55.4$ &$88.8$ \\ \end{tabular} \end{center} The plot (see below) looks roughly linear. ... ... @@ -101,16 +106,16 @@ Consider the matrix of coefficients of that linear system and also its vector of constants, the experimentally-determined values. \begin{equation*} A= \begin{pmatrix} \begin{mat}[r] 1 &1 \\ 1 &5 \\ 1 &10 \\ 1 &20 \\ 1 &50 \\ 1 &100 \end{pmatrix} \end{mat} \qquad \vec{v}=\colvec{1.5 \\ 2 \\ 3 \\ 5 \\ 9 \\ 20} \vec{v}=\colvec[r]{22.0 \\ 15.9 \\ 18.3 \\ 24.3 \\ 55.4 \\ 88.8} \end{equation*} The ending result in the subsection on Projection into a Subspace says that coefficients $b$ and $m$ so that the linear combination ... ... @@ -162,6 +167,7 @@ we can then compare to the actual date. % \end{tabular} A few minutes in \textit{Sage} gives the slope and intercept. % see mile.sage \begin{indented}\small \begin{verbatim} sage: data=[[1870,268.8], [1880,264.5], [1890,258.4], [1900,255.6], ... ... @@ -183,8 +189,10 @@ sage: points(data)+plot(model(intercept=find_fit(data,model)[0].rhs(), \end{indented} gives this graph. \begin{center} \scannedpicture{four_minute_mile_greyscale} % or the color version: \scannedpicture{four_minute_mile} % black and white % \scannedpicture{four_minute_mile_greyscale} % or the color version: \includegraphics{four_minute_mile.png} \end{center} Note that the progression is surprisingly linear. Our prediction is $1958.73$; the actual date of Roger Bannister's ... ... @@ -211,7 +219,7 @@ record was 1954-May-06. %With this input %\begin{equation*} % A= % \begin{pmatrix} % \begin{mat} % 1 & 1860 \\ % 1 & 1870 \\ %% 1 & 1880 \\ ... ... @@ -228,7 +236,7 @@ record was 1954-May-06. % \vdots &\vdots \\ % 1 &1990 \\ % 1 &2000 % \end{pmatrix} % \end{mat} % \qquad % \vec{v} = \colvec{280.0 \\ % 268.8 \\ ... ... @@ -464,18 +472,18 @@ record was 1954-May-06. \end{equation*} Projecting into the linear subspace gives this \begin{equation*} \frac{\colvec{4 \\ 9 \\ 13 \\ 17 \\20} \frac{\colvec[r]{4 \\ 9 \\ 13 \\ 17 \\20} \dotprod \colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}}{% \colvec{8 \\ 16 \\ 24 \\ 32 \\ 40} \colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}}{% \colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40} \dotprod \colvec{8 \\ 16 \\ 24 \\ 32 \\ 40}} \colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40}} \cdot \colvec{8 \\ 16 \\ 24 \\ 32 \\ 40} \colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40} = \frac{1832}{3520} \cdot \colvec{8 \\ 16 \\ 24 \\ 32 \\ 40} \colvec[r]{8 \\ 16 \\ 24 \\ 32 \\ 40} \end{equation*} so the slope of the line of best fit is approximately $0.52$. \begin{center} \small ... ... @@ -491,13 +499,13 @@ record was 1954-May-06. With this input \begin{equation*} A = \begin{pmatrix} \begin{mat} 1 & 1852.71 \\ 1 & 1858.88 \\ \vdots &\vdots \\ 1 & 1985.54 \\ 1 & 1993.71 \end{pmatrix} \end{mat} \qquad b = \colvec{ 292.0 \\ 285.0 \\ ... ... @@ -521,13 +529,13 @@ record was 1954-May-06. With this input (the years are zeroed at $1900$) \begin{equation*} A := \begin{pmatrix} \begin{mat} 1 & .38 \\ 1 & .54 \\ \vdots \vdots \\ 1 & 92.71 \\ 1 & 95.54 \end{pmatrix} \end{mat} \qquad b = \colvec{ 249.0 \\ 246.2 \\ ... ... @@ -550,13 +558,13 @@ record was 1954-May-06. With this input (the years are zeroed at $1900$) \begin{equation*} A = \begin{pmatrix} \begin{mat} 1 & 21.46 \\ 1 & 32.63 \\ \vdots &\vdots \\ 1 & 89.54 \\ 1 & 96.63 \end{pmatrix} \end{mat} \qquad b = \colvec{ 373.2 \\ ... ... @@ -652,7 +660,7 @@ record was 1954-May-06. \partsitem On the basis of this information \begin{equation*} A = \begin{pmatrix} \begin{mat} 1 & 53 \\ 1 & 75 \\ % 1 & 57 \\ ... ... @@ -678,7 +686,7 @@ record was 1954-May-06. \vdots \\ 1 & 80 \\ 1 & 81 \end{pmatrix} \end{mat} \qquad b = \colvec{ 3 \\ ... ... @@ -772,7 +780,7 @@ record was 1954-May-06. With this input \begin{equation*} A = \begin{pmatrix} \begin{mat}[r] 1 & 1 \\ 1 & 2 \\ 1 & 3 \\ ... ... @@ -780,9 +788,9 @@ record was 1954-May-06. 1 & 6 \\ 1 & 7 \\ 1 & 8 \end{pmatrix} \end{mat} \qquad b = \colvec{-0.40893539 \\ b = \colvec[r]{-0.40893539 \\ -0.1426675 \\ 0 \\ 0.18184359 \\ ... ... @@ -838,7 +846,7 @@ record was 1954-May-06. \partsitem With this input \begin{equation*} A = \begin{pmatrix} \begin{mat}[r] 1 & 306 \\ 1 & 329 \\ 1 & 356 \\ ... ... @@ -847,10 +855,10 @@ record was 1954-May-06. 1 & 427 \\ 1 & 415 \\ 1 & 424 \end{pmatrix} \end{mat} \qquad b = \colvec{975 \\ \colvec[r]{975 \\ 969 \\ 948 \\ 910 \\ ... ...
mile.sage 0 → 100644
 # mile.sage # Data for mens mile for lstsqs topic # inside sage run load "mile.sage" and the .png appears data=[[1870,268.8], [1880,264.5], [1890,258.4], [1900,255.6], [1910,255.6], [1920,252.6], [1930,250.4], [1940,246.4], [1950,241.4]] var('slope,intercept') model(x) = slope*x+intercept g=points(data)+plot(model(intercept=find_fit(data,model)[0].rhs(),slope=find_fit(data,model)[1].rhs()),(x,1860,1960),color='red',figsize=3) g.save("four_minute_mile.png") \ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!