Least-squares solutions and the Fundamental Subspaces theorem

$\newenvironment {prompt}{}{} \newcommand {\ungraded }[0]{} \newcommand {\definedTerm }[1]{\textbf {#1}} \newcommand {\dfn }[1]{\textbf {#1}} \newcommand {\wt }[0]{\widetilde } \newcommand {\ov }[0]{\overline } \newcommand {\inj }[0]{\rightarrowtail } \newcommand {\surj }[0]{\twoheadrightarrow } \newcommand {\harpoon }[0]{\overset {\rightharpoonup }} \newcommand {\orderof }[1]{\sim #1} \newcommand {\Z }[0]{\mathbb {Z}} \newcommand {\reals }[0]{\mathbb {R}} \newcommand {\real }[1]{\mathbb {R}^{#1}} \newcommand {\complexes }[0]{\mathbb {C}} \newcommand {\complex }[1]{\mathbb {C}^{#1}} \newcommand {\CC }[0]{\mathbb {C}} \newcommand {\conjugate }[1]{\overline {#1}} \newcommand {\modulus }[1]{\left \lvert #1\right \rvert } \newcommand {\zerovector }[0]{\vect {0}} \newcommand {\zeromatrix }[0]{\mathcal {O}} \newcommand {\innerproduct }[2]{\left \langle #1,\,#2\right \rangle } \newcommand {\norm }[1]{\left \lVert #1\right \rVert } \newcommand {\dimension }[1]{\dim \left (#1\right )} \newcommand {\nullity }[1]{n\left (#1\right )} \newcommand {\rank }[1]{r\left (#1\right )} \newcommand {\ds }[0]{\oplus } \newcommand {\detname }[1]{\det \left (#1\right )} \newcommand {\detbars }[1]{\left \lvert #1\right \rvert } \newcommand {\trace }[1]{t\left (#1\right )} \newcommand {\sr }[1]{#1^{1/2}} \newcommand {\spn }[1]{\left \langle #1\right \rangle } \newcommand {\nsp }[1]{\mathcal {N}\!\left (#1\right )} \newcommand {\csp }[1]{\mathcal {C}\!\left (#1\right )} \newcommand {\rsp }[1]{\mathcal {R}\!\left (#1\right )} \newcommand {\lns }[1]{\mathcal {L}\!\left (#1\right )} \newcommand {\per }[1]{#1^\perp } \newcommand {\augmented }[2]{\left \lbrack \left .#1\,\right \rvert \,#2\right \rbrack } \newcommand {\linearsystem }[2]{\mathcal {LS}\!\left (#1,\,#2\right )} \newcommand {\homosystem }[1]{\linearsystem {#1}{\zerovector }} \newcommand {\rowopswap }[2]{R_{#1}\leftrightarrow R_{#2}} \newcommand {\rowopmult }[2]{#1R_{#2}} \newcommand {\rowopadd }[3]{#1R_{#2}+R_{#3}} \newcommand {\leading }[1]{\fbox {#1}} \newcommand {\rref }[0]{\xrightarrow {\text {RREF}}} \newcommand {\elemswap }[2]{E_{#1,#2}} \newcommand {\elemmult }[2]{E_{#2}\left (#1\right )} \newcommand {\elemadd }[3]{E_{#2,#3}\left (#1\right )} \newcommand {\scalarlist }[2]{{#1}_{1},\,{#1}_{2},\,{#1}_{3},\,\ldots ,\,{#1}_{#2}} \newcommand {\vect }[1]{\mathbf {#1}} \newcommand {\colvector }[1]{\begin {bmatrix}#1\end {bmatrix}} \newcommand {\vectorcomponents }[2]{\colvector {#1_{1}\\#1_{2}\\#1_{3}\\\vdots \\#1_{#2}}} \newcommand {\vectorlist }[2]{\vect {#1}_{1},\,\vect {#1}_{2},\,\vect {#1}_{3},\,\ldots ,\,\vect {#1}_{#2}} \newcommand {\vectorentry }[2]{\left \lbrack #1\right \rbrack _{#2}} \newcommand {\matrixentry }[2]{\left \lbrack #1\right \rbrack _{#2}} \newcommand {\lincombo }[3]{#1_{1}\vect {#2}_{1}+#1_{2}\vect {#2}_{2}+#1_{3}\vect {#2}_{3}+\cdots +#1_{#3}\vect {#2}_{#3}} \newcommand {\matrixcolumns }[2]{\left \lbrack \vect {#1}_{1}|\vect {#1}_{2}|\vect {#1}_{3}|\ldots |\vect {#1}_{#2}\right \rbrack } \newcommand {\transpose }[1]{#1^{t}} \newcommand {\inverse }[1]{#1^{-1}} \newcommand {\submatrix }[3]{#1\left (#2|#3\right )} \newcommand {\adj }[1]{\transpose {\left (\conjugate {#1}\right )}} \newcommand {\adjoint }[1]{#1^\ast } \newcommand {\set }[1]{\left \{#1\right \}} \newcommand {\setparts }[2]{\left \lbrace #1\,\middle |\,#2\right \rbrace } \newcommand {\card }[1]{\left \lvert #1\right \rvert } \newcommand {\setcomplement }[1]{\overline {#1}} \newcommand {\charpoly }[2]{p_{#1}\left (#2\right )} \newcommand {\eigenspace }[2]{\mathcal {E}_{#1}\left (#2\right )} \newcommand {\eigensystem }[3]{\lambda &amp;=#2&amp;\eigenspace {#1}{#2}&amp;=\spn {\set {#3}}} \newcommand {\geneigenspace }[2]{\mathcal {G}_{#1}\left (#2\right )} \newcommand {\algmult }[2]{\alpha _{#1}\left (#2\right )} \newcommand {\geomult }[2]{\gamma _{#1}\left (#2\right )} \newcommand {\indx }[2]{\iota _{#1}\left (#2\right )} \newcommand {\ltdefn }[3]{#1\colon #2\rightarrow #3} \newcommand {\lteval }[2]{#1\left (#2\right )} \newcommand {\ltinverse }[1]{#1^{-1}} \newcommand {\restrict }[2]{{#1}|_{#2}} \newcommand {\preimage }[2]{#1^{-1}\left (#2\right )} \newcommand {\rng }[1]{\mathcal {R}\!\left (#1\right )} \newcommand {\krn }[1]{\mathcal {K}\!\left (#1\right )} \newcommand {\compose }[2]{{#1}\circ {#2}} \newcommand {\vslt }[2]{\mathcal {LT}\left (#1,\,#2\right )} \newcommand {\isomorphic }[0]{\cong } \newcommand {\similar }[2]{\inverse {#2}#1#2} \newcommand {\vectrepname }[1]{\rho _{#1}} \newcommand {\vectrep }[2]{\lteval {\vectrepname {#1}}{#2}} \newcommand {\vectrepinvname }[1]{\ltinverse {\vectrepname {#1}}} \newcommand {\vectrepinv }[2]{\lteval {\ltinverse {\vectrepname {#1}}}{#2}} \newcommand {\matrixrep }[3]{M^{#1}_{#2,#3}} \newcommand {\matrixrepcolumns }[4]{\left \lbrack \left .\vectrep {#2}{\lteval {#1}{\vect {#3}_{1}}}\right |\left .\vectrep {#2}{\lteval {#1}{\vect {#3}_{2}}}\right |\left .\vectrep {#2}{\lteval {#1}{\vect {#3}_{3}}}\right |\ldots \left |\vectrep {#2}{\lteval {#1}{\vect {#3}_{#4}}}\right .\right \rbrack } \newcommand {\cbm }[2]{C_{#1,#2}} \newcommand {\jordan }[2]{J_{#1}\left (#2\right )} \newcommand {\hadamard }[2]{#1\circ #2} \newcommand {\hadamardidentity }[1]{J_{#1}} \newcommand {\hadamardinverse }[1]{\widehat {#1}} \newcommand {\id }[0]{\mathrm {id}} \newcommand {\C }[0]{\mathbb {C}} \newcommand {\R }[0]{\mathbb {R}} \newcommand {\Q }[0]{\mathbb {Q}} \newcommand {\N }[0]{\mathbb {N}} \newcommand {\F }[0]{\mathbb {F}} \newcommand {\bbH }[0]{\mathbb {H}} \newcommand {\SO }[0]{\operatorname {SO}} \newcommand {\Dten }[0]{D_{10}} \newcommand {\calC }[0]{\mathcal {C}} \newcommand {\calD }[0]{\mathcal {D}} \newcommand {\calF }[0]{\mathcal {F}} \newcommand {\calO }[0]{\mathcal {O}} \newcommand {\calP }[0]{\mathcal {P}} \newcommand {\calN }[0]{\mathcal {N}} \newcommand {\calR }[0]{\mathcal {R}} \newcommand {\calS }[0]{\mathcal {S}} \newcommand {\calX }[0]{\mathcal {X}} \newcommand {\rmZ }[0]{\mathrm {Z}} \newcommand {\rmC }[0]{\mathrm {C}} \newcommand {\rmH }[0]{\mathrm {H}} \newcommand {\trans }[0]{\mathrm {T}} \newcommand {\Span }[0]{\operatorname {Span}} \newcommand {\Rep }[0]{\operatorname {Rep}} \newcommand {\Vect }[0]{\operatorname {Vec}} \newcommand {\Obj }[0]{\operatorname {Obj}} \newcommand {\Adj }[0]{\operatorname {Adj}} \newcommand {\Aut }[0]{\operatorname {Aut}} \newcommand {\Hom }[0]{\operatorname {Hom}} \newcommand {\End }[0]{\operatorname {End}} \newcommand {\tr }[0]{\operatorname {tr}} \newcommand {\Stab }[0]{\operatorname {Stab}} \newcommand {\FPdim }[0]{\operatorname {FPdim}} \newcommand {\lcm }[0]{\mathrm {l.c.m}} \newcommand {\proj }[0]{\operatorname {proj}} \newcommand {\Repart }[0]{\operatorname {Re}} \newcommand {\Impart }[0]{\operatorname {Im}} \newcommand {\im }[0]{\operatorname {im}} \newcommand {\rk }[0]{\operatorname {rank}} \newcommand {\diag }[0]{\operatorname {diag}} \newcommand {\Zmod }[1]{\Z /#1 \Z } \newcommand {\Reptwo }[0]{\Rep (\Z /2\Z )} \newcommand {\qaH }[0]{\mathrm {H}_{\mathrm {qa}}} \newcommand {\abH }[0]{\mathrm {H}_{\mathrm {ab}}} \newcommand {\qaZ }[0]{\mathrm {Z}_{\mathrm {qa}}} \newcommand {\qaB }[0]{\mathrm {B}_{\mathrm {qa}}} \newcommand {\1}[0]{\mathbf {1}} \newcommand {\Ctimes }[0]{\mathbb {C}^{\times }} \newcommand {\HyperFirstAtBeginDocument }[0]{\AtBeginDocument }$

Suppose we are given a matrix equation $A*{\bf x} = {\bf b}$ with $\bf x$ a vector variable taking values in $\mathbb R^n$ , and $\bf b$ a fixed vector in $\mathbb R^m$ (implying that $A$ is an $m\times n$ matrix). The consistency theorem for systems of equations tells us that the equation is consistent precisely when $\bf b$ is in the span of the columns of $A$ , or alternatively, when ${\bf b}\in C(A)$ . But what if it is not? In other words, the system is inconsistent? Up until now we have simply left it at that; inconsistency was the end of the story.

But it is not. Because whether or not the original system is consistent, one can always find a solution to the related equation

$\begin{equation} \label{eqn:lss} A*{\bf x} = pr_{C(A)}({\bf b}) \end{equation}$ because the projection $pr_{C(A)}({\bf b})$ of $\bf b$ onto the column space of $A$ will always be in the column space of $A$ regardless of whether or not the original vector $\bf b$ is.

The question then becomes: given that we know (eqn:lss) has at least one solution, how do we go about finding it (or them)? The starting point for answering that question is the following theorem, often referred to as the Fundamental Subspaces theorem (originally proven by Gauss)

For any $m\times n$ matrix $A$ , there are equalities

$C(A)^\perp = N(A^T)$
$C(A) = N(A^T)^\perp$
$C(A^T)^\perp = N(A)$
$C(A^T) = N(A)^\perp$

Proof: Because the theorem is stated for all matrices, and because $(W^\perp )^\perp = W$ for any subspace $W$ , the second, third and fourth statements are consequences of the first, and is suffices to verify that case. To see this, we recall that $C(A)$ is the subspace of $\mathbb R^m$ spanned by the columns of $A$ ; then ${\bf v}\in C(A)^\perp$ iff $A(:,i)\cdot {\bf v} = 0$ for all $1\le i\le n$ iff $A^T(i,:)*{\bf v} = A(:,i)^T*{\bf v} = 0$ for all $1\le i\le n$ iff ${\bf v}\in N(A^T)$ . $\blacksquare$

Write ${\bf b}'$ for $pr_{C(A)}({\bf b})$ . Then

$A*{\bf x} = {\bf b}'$
$A*{\bf x} -{\bf b}\in C(A)^\perp$ (as ${\bf b}'$ is the unique vector in $C(A)$ with ${\bf b}' - {\bf b}\in C(A)^\perp$ )
$A*{\bf x} -{\bf b}\in N(A^T)$ (by Theorem thm:fst)
$A^T*A*{\bf x} - A^T*{\bf b} = A^T*(A*{\bf x} -{\bf b}) = {\bf 0}$
$A^T*A*{\bf x} = A^T*{\bf b}$

This last equation $A^T*A*{\bf x} = A^T*{\bf b}$ has the same set of solutions as the equation that started the sequence, namely $A*{\bf x} = {\bf b}'$ , and is therefore always consistent. It is derived from our original equation $A*{\bf x} = {\bf b}$ by simply multiplying both sides on the left by $A^T$ , and is often referred to as the associated normal equation of the original matrix equation from which it was derived.

This yields a straightforward procedure for finding the least-squares solution to our original equation $A*{\bf x} = {\bf b}$ ; i.e., a solution to the associated normal equation $A^T*A*{\bf x} = A^T*{\bf b}$ , which by the above is equivalent to a solution to the related equation $A*{\bf x} = pr_{C(A)}({\bf b})$ . Note that the original equation is consistent precisely when ${\bf b}\in C(A)$ , or equivalently when ${\bf b} = pr_{C(A)}({\bf b})$ ; in other words, when the least-squares solution is an exact solution. The advantages to seeking a least-squares solution are i) it always exists (regardless of whether or not the original equation is consistent), and ii) it yields an actual solution whenever an actual solutions exist. Because this procedure finds the least-squares solution first, it can be also applied to finding the least-squares approximation to $\bf b$ as $pr_{C(A)}({\bf b}) = A*{\bf x}$ , where $\bf x$ is a least-squares solution to the original equation.

The steps are:

Form the associated normal equation $A^T*A*{\bf x} = A^T*{\bf b}$ ;
find the solution(s) to the normal equation by computing $rref([A^T*A\ |\ A^T*{\bf b}])$ . These will be the least-squares solution(s) to the original equation;
for any least-squares solution $\bf x$ from Step 2, compute $A*{\bf x}$ . This will yield the least-squares approximation $pr_{C(A)}({\bf b})$ to $\bf b$ by a vector in the column space of $A$ .

Again, there will only be one least-squares approximation to $\bf b$ by a vector in $C(A)$ , because we have already seen such a vector is unique. However, the set of least-squares solutions to the original equation may not be unique. Thus another consequence of this theory is

The value of $A*{\bf x}$ remains constant as $\bf x$ ranges over the set of least-squares solutions to the matrix equation $A*{\bf x} = {\bf b}$ .

A final question then remains; when will there be a unique least-squares solution? We say that the matrix $A$ has full column rank (or just full rank when there is no confusion) if the columns of $A$ are linearly independent; namely that $rank(A) = n$ . If $A$ is $m\times n$ , this imposes the constraint that $m\ge n$ (otherwise the rank would have to be less than $n$ the number of columns). A useful fact about the ranks of matrices (which we do not prove here) is

For any matrix $A$ , $rank(A) = rank(A^T*A)$ . In particular, $A$ has full column rank iff $A^T*A$ is non-singular.

As the normal equation is always consistent, we see

$A^T*A*{\bf x} = A^T*{\bf b}$ will have a unique solution precisely when $N(A^T*A) = \{{\bf 0}\}$ , which happens iff $A^T*A$ is non-singular. In this case, the unique least-squares solution is given by ${\bf x} = (A^T*A)^{-1}*A^T*{\bf b}$ and the least-squares approximation to $\bf b$ by a vector in the column space of $A$ is $pr_{C(A)}({\bf b}) = A*{\bf x} = A*(A^T*A)^{-1}*A^T*{\bf b}$

Press...	...to do
left/right arrows	Move cursor
shift+left/right arrows	Select region
ctrl+a	Select all
ctrl+x/c/v	Cut/copy/paste
ctrl+z/y	Undo/redo
ctrl+left/right	Add entry to list or column to matrix
shift+ctrl+left/right	Add copy of current entry/column to to list/matrix
ctrl+up/down	Add row to matrix
shift+ctrl+up/down	Add copy of current row to matrix
ctrl+backspace	Delete current entry in list or column in matrix
ctrl+shift+backspace	Delete current row in matrix

Type...	...to get
norm	$\|\|\blue{[?]}\|\|$
text	$\text{\blue{[?]}}$
sym_name	$\backslash\texttt{\blue{[?]}}$
abs	$\left\|\blue{[?]}\right\|$
sqrt	$\sqrt{\blue{[?]}}$
paren	$\left(\blue{[?]}\right)$
floor	$\lfloor \blue{[?]} \rfloor$
factorial	$\blue{[?]}!$
exp	${\blue{[?]}}^{\blue{[?]}}$
sub	${\blue{[?]}}_{\blue{[?]}}$
frac	$\dfrac{\blue{[?]}}{\blue{[?]}}$
int	$\displaystyle\int{\blue{[?]}}d\blue{[?]}$
defi	$\displaystyle\int_{\blue{[?]}}^{\blue{[?]}}\blue{[?]}d\blue{[?]}$
deriv	$\displaystyle\frac{d}{d\blue{[?]}}\blue{[?]}$
sum	$\displaystyle\sum_{\blue{[?]}}^{\blue{[?]}}\blue{[?]}$
prod	$\displaystyle\prod_{\blue{[?]}}^{\blue{[?]}}\blue{[?]}$
root	$\sqrt[\blue{[?]}]{\blue{[?]}}$
vec	$\left\langle \blue{[?]} \right\rangle$
mat	$\left(\begin{matrix} \blue{[?]} \end{matrix}\right)$
*	$\cdot$
infinity	$\infty$
arcsin	$\arcsin\left(\blue{[?]}\right)$
arccos	$\arccos\left(\blue{[?]}\right)$
arctan	$\arctan\left(\blue{[?]}\right)$
sin	$\sin\left(\blue{[?]}\right)$
cos	$\cos\left(\blue{[?]}\right)$
tan	$\tan\left(\blue{[?]}\right)$
sec	$\sec\left(\blue{[?]}\right)$
csc	$\csc\left(\blue{[?]}\right)$
cot	$\cot\left(\blue{[?]}\right)$
log	$\log\left(\blue{[?]}\right)$
ln	$\ln\left(\blue{[?]}\right)$
alpha	$\alpha$
beta	$\beta$
gamma	$\gamma$
delta	$\delta$
epsilon	$\epsilon$
zeta	$\zeta$
eta	$\eta$
theta	$\theta$
iota	$\iota$
kappa	$\kappa$
lambda	$\lambda$
mu	$\mu$
nu	$\nu$
xi	$\xi$
omicron	$\omicron$
pi	$\pi$
rho	$\rho$
sigma	$\sigma$
tau	$\tau$
upsilon	$\upsilon$
phi	$\phi$
chi	$\chi$
psi	$\psi$
omega	$\omega$
Gamma	$\Gamma$
Delta	$\Delta$
Theta	$\Theta$
Lambda	$\Lambda$
Xi	$\Xi$
Pi	$\Pi$
Sigma	$\Sigma$
Phi	$\Phi$
Psi	$\Psi$
Omega	$\Omega$

Controls

Symbols

Settings