Polynomial data fitting

$\newenvironment {prompt}{}{} \newcommand {\ungraded }[0]{} \newcommand {\definedTerm }[1]{\textbf {#1}} \newcommand {\dfn }[1]{\textbf {#1}} \newcommand {\wt }[0]{\widetilde } \newcommand {\ov }[0]{\overline } \newcommand {\inj }[0]{\rightarrowtail } \newcommand {\surj }[0]{\twoheadrightarrow } \newcommand {\harpoon }[0]{\overset {\rightharpoonup }} \newcommand {\orderof }[1]{\sim #1} \newcommand {\Z }[0]{\mathbb {Z}} \newcommand {\reals }[0]{\mathbb {R}} \newcommand {\real }[1]{\mathbb {R}^{#1}} \newcommand {\complexes }[0]{\mathbb {C}} \newcommand {\complex }[1]{\mathbb {C}^{#1}} \newcommand {\CC }[0]{\mathbb {C}} \newcommand {\conjugate }[1]{\overline {#1}} \newcommand {\modulus }[1]{\left \lvert #1\right \rvert } \newcommand {\zerovector }[0]{\vect {0}} \newcommand {\zeromatrix }[0]{\mathcal {O}} \newcommand {\innerproduct }[2]{\left \langle #1,\,#2\right \rangle } \newcommand {\norm }[1]{\left \lVert #1\right \rVert } \newcommand {\dimension }[1]{\dim \left (#1\right )} \newcommand {\nullity }[1]{n\left (#1\right )} \newcommand {\rank }[1]{r\left (#1\right )} \newcommand {\ds }[0]{\oplus } \newcommand {\detname }[1]{\det \left (#1\right )} \newcommand {\detbars }[1]{\left \lvert #1\right \rvert } \newcommand {\trace }[1]{t\left (#1\right )} \newcommand {\sr }[1]{#1^{1/2}} \newcommand {\spn }[1]{\left \langle #1\right \rangle } \newcommand {\nsp }[1]{\mathcal {N}\!\left (#1\right )} \newcommand {\csp }[1]{\mathcal {C}\!\left (#1\right )} \newcommand {\rsp }[1]{\mathcal {R}\!\left (#1\right )} \newcommand {\lns }[1]{\mathcal {L}\!\left (#1\right )} \newcommand {\per }[1]{#1^\perp } \newcommand {\augmented }[2]{\left \lbrack \left .#1\,\right \rvert \,#2\right \rbrack } \newcommand {\linearsystem }[2]{\mathcal {LS}\!\left (#1,\,#2\right )} \newcommand {\homosystem }[1]{\linearsystem {#1}{\zerovector }} \newcommand {\rowopswap }[2]{R_{#1}\leftrightarrow R_{#2}} \newcommand {\rowopmult }[2]{#1R_{#2}} \newcommand {\rowopadd }[3]{#1R_{#2}+R_{#3}} \newcommand {\leading }[1]{\fbox {#1}} \newcommand {\rref }[0]{\xrightarrow {\text {RREF}}} \newcommand {\elemswap }[2]{E_{#1,#2}} \newcommand {\elemmult }[2]{E_{#2}\left (#1\right )} \newcommand {\elemadd }[3]{E_{#2,#3}\left (#1\right )} \newcommand {\scalarlist }[2]{{#1}_{1},\,{#1}_{2},\,{#1}_{3},\,\ldots ,\,{#1}_{#2}} \newcommand {\vect }[1]{\mathbf {#1}} \newcommand {\colvector }[1]{\begin {bmatrix}#1\end {bmatrix}} \newcommand {\vectorcomponents }[2]{\colvector {#1_{1}\\#1_{2}\\#1_{3}\\\vdots \\#1_{#2}}} \newcommand {\vectorlist }[2]{\vect {#1}_{1},\,\vect {#1}_{2},\,\vect {#1}_{3},\,\ldots ,\,\vect {#1}_{#2}} \newcommand {\vectorentry }[2]{\left \lbrack #1\right \rbrack _{#2}} \newcommand {\matrixentry }[2]{\left \lbrack #1\right \rbrack _{#2}} \newcommand {\lincombo }[3]{#1_{1}\vect {#2}_{1}+#1_{2}\vect {#2}_{2}+#1_{3}\vect {#2}_{3}+\cdots +#1_{#3}\vect {#2}_{#3}} \newcommand {\matrixcolumns }[2]{\left \lbrack \vect {#1}_{1}|\vect {#1}_{2}|\vect {#1}_{3}|\ldots |\vect {#1}_{#2}\right \rbrack } \newcommand {\transpose }[1]{#1^{t}} \newcommand {\inverse }[1]{#1^{-1}} \newcommand {\submatrix }[3]{#1\left (#2|#3\right )} \newcommand {\adj }[1]{\transpose {\left (\conjugate {#1}\right )}} \newcommand {\adjoint }[1]{#1^\ast } \newcommand {\set }[1]{\left \{#1\right \}} \newcommand {\setparts }[2]{\left \lbrace #1\,\middle |\,#2\right \rbrace } \newcommand {\card }[1]{\left \lvert #1\right \rvert } \newcommand {\setcomplement }[1]{\overline {#1}} \newcommand {\charpoly }[2]{p_{#1}\left (#2\right )} \newcommand {\eigenspace }[2]{\mathcal {E}_{#1}\left (#2\right )} \newcommand {\eigensystem }[3]{\lambda &amp;=#2&amp;\eigenspace {#1}{#2}&amp;=\spn {\set {#3}}} \newcommand {\geneigenspace }[2]{\mathcal {G}_{#1}\left (#2\right )} \newcommand {\algmult }[2]{\alpha _{#1}\left (#2\right )} \newcommand {\geomult }[2]{\gamma _{#1}\left (#2\right )} \newcommand {\indx }[2]{\iota _{#1}\left (#2\right )} \newcommand {\ltdefn }[3]{#1\colon #2\rightarrow #3} \newcommand {\lteval }[2]{#1\left (#2\right )} \newcommand {\ltinverse }[1]{#1^{-1}} \newcommand {\restrict }[2]{{#1}|_{#2}} \newcommand {\preimage }[2]{#1^{-1}\left (#2\right )} \newcommand {\rng }[1]{\mathcal {R}\!\left (#1\right )} \newcommand {\krn }[1]{\mathcal {K}\!\left (#1\right )} \newcommand {\compose }[2]{{#1}\circ {#2}} \newcommand {\vslt }[2]{\mathcal {LT}\left (#1,\,#2\right )} \newcommand {\isomorphic }[0]{\cong } \newcommand {\similar }[2]{\inverse {#2}#1#2} \newcommand {\vectrepname }[1]{\rho _{#1}} \newcommand {\vectrep }[2]{\lteval {\vectrepname {#1}}{#2}} \newcommand {\vectrepinvname }[1]{\ltinverse {\vectrepname {#1}}} \newcommand {\vectrepinv }[2]{\lteval {\ltinverse {\vectrepname {#1}}}{#2}} \newcommand {\matrixrep }[3]{M^{#1}_{#2,#3}} \newcommand {\matrixrepcolumns }[4]{\left \lbrack \left .\vectrep {#2}{\lteval {#1}{\vect {#3}_{1}}}\right |\left .\vectrep {#2}{\lteval {#1}{\vect {#3}_{2}}}\right |\left .\vectrep {#2}{\lteval {#1}{\vect {#3}_{3}}}\right |\ldots \left |\vectrep {#2}{\lteval {#1}{\vect {#3}_{#4}}}\right .\right \rbrack } \newcommand {\cbm }[2]{C_{#1,#2}} \newcommand {\jordan }[2]{J_{#1}\left (#2\right )} \newcommand {\hadamard }[2]{#1\circ #2} \newcommand {\hadamardidentity }[1]{J_{#1}} \newcommand {\hadamardinverse }[1]{\widehat {#1}} \newcommand {\id }[0]{\mathrm {id}} \newcommand {\C }[0]{\mathbb {C}} \newcommand {\R }[0]{\mathbb {R}} \newcommand {\Q }[0]{\mathbb {Q}} \newcommand {\N }[0]{\mathbb {N}} \newcommand {\F }[0]{\mathbb {F}} \newcommand {\bbH }[0]{\mathbb {H}} \newcommand {\SO }[0]{\operatorname {SO}} \newcommand {\Dten }[0]{D_{10}} \newcommand {\calC }[0]{\mathcal {C}} \newcommand {\calD }[0]{\mathcal {D}} \newcommand {\calF }[0]{\mathcal {F}} \newcommand {\calO }[0]{\mathcal {O}} \newcommand {\calP }[0]{\mathcal {P}} \newcommand {\calN }[0]{\mathcal {N}} \newcommand {\calR }[0]{\mathcal {R}} \newcommand {\calS }[0]{\mathcal {S}} \newcommand {\calX }[0]{\mathcal {X}} \newcommand {\rmZ }[0]{\mathrm {Z}} \newcommand {\rmC }[0]{\mathrm {C}} \newcommand {\rmH }[0]{\mathrm {H}} \newcommand {\trans }[0]{\mathrm {T}} \newcommand {\Span }[0]{\operatorname {Span}} \newcommand {\Rep }[0]{\operatorname {Rep}} \newcommand {\Vect }[0]{\operatorname {Vec}} \newcommand {\Obj }[0]{\operatorname {Obj}} \newcommand {\Adj }[0]{\operatorname {Adj}} \newcommand {\Aut }[0]{\operatorname {Aut}} \newcommand {\Hom }[0]{\operatorname {Hom}} \newcommand {\End }[0]{\operatorname {End}} \newcommand {\tr }[0]{\operatorname {tr}} \newcommand {\Stab }[0]{\operatorname {Stab}} \newcommand {\FPdim }[0]{\operatorname {FPdim}} \newcommand {\lcm }[0]{\mathrm {l.c.m}} \newcommand {\proj }[0]{\operatorname {proj}} \newcommand {\Repart }[0]{\operatorname {Re}} \newcommand {\Impart }[0]{\operatorname {Im}} \newcommand {\im }[0]{\operatorname {im}} \newcommand {\rk }[0]{\operatorname {rank}} \newcommand {\diag }[0]{\operatorname {diag}} \newcommand {\Zmod }[1]{\Z /#1 \Z } \newcommand {\Reptwo }[0]{\Rep (\Z /2\Z )} \newcommand {\qaH }[0]{\mathrm {H}_{\mathrm {qa}}} \newcommand {\abH }[0]{\mathrm {H}_{\mathrm {ab}}} \newcommand {\qaZ }[0]{\mathrm {Z}_{\mathrm {qa}}} \newcommand {\qaB }[0]{\mathrm {B}_{\mathrm {qa}}} \newcommand {\1}[0]{\mathbf {1}} \newcommand {\Ctimes }[0]{\mathbb {C}^{\times }} \newcommand {\HyperFirstAtBeginDocument }[0]{\AtBeginDocument }$

We suppose given $n$ points $\{(x_1,y_1), (x_2,y_2),\dots ,(x_n,y_n)\}$ in the plane $\mathbb R^2$ , with distinct $x$ -coordinates (in practice, such sets of points can arise as data based on the measurement of some quantity - recorded as the $y$ -coordinate - as a function of some parameter recorded as the $x$ -coordinate). Then we would like to find the equation of the line that best fits these points (by exactly what measurement the line represents a best possible fit is explained below). if we write the equation of the line as $y = l(x) = c_0 + c_1x$ for indeterminants $c_0, c_1$ , then what we are looking for is a least-squares solution to the $n\times 2$ system of equations

$\begin{align*} c_0 + c_1 x_1 &= y_1\\ c_0 + c_1 x_2 &= y_2\\ &\vdots\\ c_0 + c_1 x_n &= y_n \end{align*}$

Note that, in this system, the $x_i$ and $y_j$ are constants, and we are trying to solve for $c_0$ and $c_1$ . For $n\le 2$ there will be a solution, but in the overdetermined case there almost always fails to be one. Hence the need to work in the least-squares setting.

We wish to find the least-squares fit by a linear equation to the set of points $(2,3), (4,6), (7,10), (9,14)$ . This problem can be represented by the matrix equation $A1*{\bf c} = {\bf y}$ Where $A1 = \begin {bmatrix} 1 & 2\\1 & 4\\1 & 7\\1 & 9\end {bmatrix}$ , ${\bf c} = \begin {bmatrix}c_0\\c_1\end {bmatrix}$ , and ${\bf y} = \begin {bmatrix} 3\\6\\10\\14\end {bmatrix}$ . We note that this matrix is full rank. Therefore least-squares solution is unique and given by ${\bf c} = (A1^T*A1)^{-1}*A1^T*{\bf y} = \begin {bmatrix}-.18966\\ 1.53448\end {bmatrix}$ Thus the desired equation is given by $l(x) = -.18966 + 1.53448 x$ We can also measure the degree to which this comes close to being an actual solution (which would only exist if the points were colinear). Given $\bf c$ , the vector ${\bf y}_1 := A1*{\bf c} = \begin {bmatrix} 2.8793\\ 5.9483\\ 10.5517\\ 13.6207\end {bmatrix}$ is (by the above) the least-squares approximation to $\bf y$ by a vector in the column space of $A1$ (accurate to 4 decimal places). The accuracy can then be estimated by the distance of this approximation to the original vector $\bf y$ : $e_1 := \|{\bf y} - {\bf y}_1\| = 0.68229$

The last computation in this example indicates what is being minimized when one fits data points in this way.

Using least-squares linear approximation techniques to find the best linear fit to a set of $n$ data points $\{(x_1,y_1), (x_2,y_2),\dots ,(x_n,y_n)\}$ results in the equation of a line $l(x) = c_0 + c_1(x)$ which minimizes the sum of the squares of the vertical distances from the given points to the line: $\sum _{i=1}^n (y_i - l(x_i))^2$ Note that, unless the line is horizontal, the vertical distance will be slightly larger than the actual distance, which is measured in the direction orthogonal to the line, and minimizing the sum of squares of those distances would correspond geometrically to what one might normally think of as constituting a least-squares fit. However, the computation needed to find the best fit with respect to this sum is quite a bit more involved,. This linear algebraic approach provides a simple and efficient method for finding a good approximation by a line which will be exact whenever the points are colinear.

The setup above provides a method for finding not just linear approximations, but higher order ones as well. The linear algebra is essentially the same. To illustrate,

Suppose instead we were asked to find the least-squares fit by a quadratic equation to the same set set of points $(2,3), (4,6), (7,10), (9,14)$ . As before, this problem can be represented by the matrix equation $A2*{\bf c} = {\bf y}$ Where $A2 = \begin {bmatrix} 1 & 2 & 4\\1 & 4 & 16\\1 & 7 & 49\\1 & 9 & 81\end {bmatrix}$ , ${\bf c} = \begin {bmatrix}c_0\\c_1\\c_2\end {bmatrix}$ , and ${\bf y} = \begin {bmatrix} 3\\6\\10\\14\end {bmatrix}$ . We note that the matrix $A_2$ is again full rank (it has rank 3). Therefore least-squares solution is unique and given by ${\bf c} = (A2^T*A2)^{-1}*A2^T*{\bf y} = \begin {bmatrix} 0.960345\\ 0.984483\\0.050000\end {bmatrix}$ Thus the desired equation is given by $q(x) = 0.960345 + 0.984483 x + 0.050000 x^2$ Measuring the degree to which this comes close to being an actual solution (which would only exist if the points all lay on the same quadratic graph), we compute ${\bf y}_2 := A2*{\bf c} = \begin {bmatrix} 3.1293\\ 5.6983\\ 10.3017\\ 13.8707\end {bmatrix}$ is (by the above) the least-squares approximation to $\bf y$ by a vector in the column space of $A2$ (accurate to 4 decimal places). The accuracy can then be estimated by the distance of this approximation to the original vector $\bf y$ : $e_2 := \|{\bf y} - {\bf y}_2\| = 0.46424$ As with the linear fit, the quantity being minimized is the sum of squares of vertical distances of the original points to the graph of this quadratic function. Notice the modest improvement; from $0.68229$ to $0.46424$ . Because the column space of $A2$ contains the columns space of $A1$ , the least-squares approximation ${\bf y}_2$ has to be at least as good as the linear one ${\bf y}_1$ , and almost always will be closer to the original vector $\bf y$ .

We will illustrate our final point by looking at what happens if we go one degree higher.

We will find the least-squares fit by a cubic equation to the same set set of points $(2,3), (4,6), (7,10), (9,14)$ . As before, this problem can be represented by the matrix equation $A3*{\bf c} = {\bf y}$ Where $A3 = \begin {bmatrix} 1 & 2 & 4 & 8\\1 & 4 & 16 & 64\\1 & 7 & 49 & 343\\1 & 9 & 81 & 729\end {bmatrix}$ , ${\bf c} = \begin {bmatrix}c_0\\c_1\\c_2\\c_3\end {bmatrix}$ , and ${\bf y} = \begin {bmatrix} 3\\6\\10\\14\end {bmatrix}$ . The matrix $A_3$ is still full rank (it has rank 4). Therefore least-squares solution is unique and given by ${\bf c} = (A3^T*A3)^{-1}*A3^T*{\bf y} = \begin {bmatrix} -1.6\\ 2.890476\\-0.342857\\0.023810\end {bmatrix}$ Thus the desired equation is given by $f(x) = -1.6 + 2.890476 x + -0.342857 x^2 + 0.023810 x^3$ However, now when we compute the least-squares approximation we get ${\bf y}_3 := A3*{\bf c} = \begin {bmatrix} 3\\ 6\\ 10\\ 14\end {bmatrix}$ which is not just an approximation but rather the vector $\bf y$ on the nose; $e_3 = 0$ .. In other words, given these four points, there is a unique cubic equation which fits the points exactly. Inspecting the computation more carefully, we see why: the matrix $A3$ is both full rank and square. In other words, non-singular. In this case the system of equations is no longer over determined but rather balanced. And with a non-singular coefficient matrix, we get a unique solution. Symbolically, this can be seen by noting that the non-singularity of $A3$ results in a simplified expression for $\bf c$ , confirming it is indeed an exact solution: ${\bf c} = (A3^T*A3)^{-1}*A3^T*{\bf y} = A3^{-1}*(A3^T)^{-1}*A3^T*{\bf y} = A3^{-1}*y$

This set of examples, in which we compute successively higher order approximations to a set of $n$ data points until we finally arrive at an exact fit, is part of a more general phenomenon, which we record without proof by the following theorem.

Given $n$ points in $\mathbb R^2$ with distinct $x$ -coordinates $\{(x_1,y_1), (x_2,y_2),\dots ,(x_n,y_n)\}$ , the least-squares fit by a polynomial of degree $k$ is computed by finding the least-squares solution to the matrix equation $A_k*{\bf c} = {\bf y}$ where ${\bf y} = [y_1\ y_2\ \dots y_n]$ and $A_k$ is the $n\times (k+1)$ matrix with $A_k(i,j) = x_i^{j-1}$ . The matrix $A_k$ will have full column rank for all $k\le (n-1)$ , and so the least-squares solution $\bf c$ is unique and given by $[c_0\ c_1\ \dots c_k]^T = {\bf c} = (A_k^T*A_k)^{-1}*A_k^T*{\bf y}$ with degree $k$ polynomial least-squares fit given by $p_k(x) = \sum _{i=0}^k c_i x^i$ Because $A_{n-1}$ is non-singular, there will be a polynomial of degree at most $(n-1)$ which fits the points exactly. Moreover, the polynomial of degree at most $(n-1)$ which accomplishes this will be unique.

Press...	...to do
left/right arrows	Move cursor
shift+left/right arrows	Select region
ctrl+a	Select all
ctrl+x/c/v	Cut/copy/paste
ctrl+z/y	Undo/redo
ctrl+left/right	Add entry to list or column to matrix
shift+ctrl+left/right	Add copy of current entry/column to to list/matrix
ctrl+up/down	Add row to matrix
shift+ctrl+up/down	Add copy of current row to matrix
ctrl+backspace	Delete current entry in list or column in matrix
ctrl+shift+backspace	Delete current row in matrix

Type...	...to get
norm	$\|\|\blue{[?]}\|\|$
text	$\text{\blue{[?]}}$
sym_name	$\backslash\texttt{\blue{[?]}}$
abs	$\left\|\blue{[?]}\right\|$
sqrt	$\sqrt{\blue{[?]}}$
paren	$\left(\blue{[?]}\right)$
floor	$\lfloor \blue{[?]} \rfloor$
factorial	$\blue{[?]}!$
exp	${\blue{[?]}}^{\blue{[?]}}$
sub	${\blue{[?]}}_{\blue{[?]}}$
frac	$\dfrac{\blue{[?]}}{\blue{[?]}}$
int	$\displaystyle\int{\blue{[?]}}d\blue{[?]}$
defi	$\displaystyle\int_{\blue{[?]}}^{\blue{[?]}}\blue{[?]}d\blue{[?]}$
deriv	$\displaystyle\frac{d}{d\blue{[?]}}\blue{[?]}$
sum	$\displaystyle\sum_{\blue{[?]}}^{\blue{[?]}}\blue{[?]}$
prod	$\displaystyle\prod_{\blue{[?]}}^{\blue{[?]}}\blue{[?]}$
root	$\sqrt[\blue{[?]}]{\blue{[?]}}$
vec	$\left\langle \blue{[?]} \right\rangle$
mat	$\left(\begin{matrix} \blue{[?]} \end{matrix}\right)$
*	$\cdot$
infinity	$\infty$
arcsin	$\arcsin\left(\blue{[?]}\right)$
arccos	$\arccos\left(\blue{[?]}\right)$
arctan	$\arctan\left(\blue{[?]}\right)$
sin	$\sin\left(\blue{[?]}\right)$
cos	$\cos\left(\blue{[?]}\right)$
tan	$\tan\left(\blue{[?]}\right)$
sec	$\sec\left(\blue{[?]}\right)$
csc	$\csc\left(\blue{[?]}\right)$
cot	$\cot\left(\blue{[?]}\right)$
log	$\log\left(\blue{[?]}\right)$
ln	$\ln\left(\blue{[?]}\right)$
alpha	$\alpha$
beta	$\beta$
gamma	$\gamma$
delta	$\delta$
epsilon	$\epsilon$
zeta	$\zeta$
eta	$\eta$
theta	$\theta$
iota	$\iota$
kappa	$\kappa$
lambda	$\lambda$
mu	$\mu$
nu	$\nu$
xi	$\xi$
omicron	$\omicron$
pi	$\pi$
rho	$\rho$
sigma	$\sigma$
tau	$\tau$
upsilon	$\upsilon$
phi	$\phi$
chi	$\chi$
psi	$\psi$
omega	$\omega$
Gamma	$\Gamma$
Delta	$\Delta$
Theta	$\Theta$
Lambda	$\Lambda$
Xi	$\Xi$
Pi	$\Pi$
Sigma	$\Sigma$
Phi	$\Phi$
Psi	$\Psi$
Omega	$\Omega$

Controls

Symbols

Settings