\documentclass[10pt]{article}
\usepackage{amsfonts}
\usepackage{fullpage}
%\usepackage{epsfig}
\usepackage[usenames,dvipsnames]{pstricks}
\usepackage{epsfig}
\usepackage{pst-grad} % For gradients
\usepackage{pst-plot} % For axes
\begin{document}
\input{preamble.tex}
\setlength\parindent{0pt}
\lecture{16}{April 8 2013}{Madhu Sudan}{Themis Gouleakis}
\section{Overview}
In the previous lecture we discussed a graph based code due to Spielman \cite{Spielman96} which can has linear-time encoding and decoding. The main result can be summarized in the following theorem.
\begin{theorem}\label{Spielman}
There exist values $R>0,\tau>0$ and a family of codes $\{C_n\subseteq \mathbb{F}_2^n\}$ of rate $R$ correcting $\tau$ fraction of errors with linear time encoder and decoder.
\end{theorem}
The following corollary of the above theorem is for the case of binary symmetric channel:
\begin{corollary}
For every $p\in (0,1/2)$ and $\epsilon >0 $ there exists some $\delta>0$ and encoding/decoding functions:
\[E: \{0,1\}^{R\cdot n}\rightarrow \{0,1\}^{n}\]
\[D: \{0,1\}^{n}\rightarrow \{0,1\}^{R\cdot n}\]
for $R=1-H(p)-\epsilon$ such that $E,D$ are computable in linear time and
\[\Pr [D(E(x)+\eta)\not = x]\leq 2^{-\delta\cdot n}\]
where $\eta$ is the (random) error introduced by the channel.
\end{corollary}
\section{Error reduction codes}
The key ingredient we will need in order to prove theorem \ref{Spielman}, will be the notion of error reduction codes. In the following, we will see how the encoding and decoding works for these codes.
\subsection{Encoder}
\begin{figure}[h]
\begin{center}
% Generated with LaTeXDraw 2.0.8
% Sat Apr 13 19:27:11 EDT 2013
% \usepackage[usenames,dvipsnames]{pstricks}
% \usepackage{epsfig}
% \usepackage{pst-grad} % For gradients
% \usepackage{pst-plot} % For axes
\scalebox{1} % Change this value to rescale the drawing.
{
\begin{pspicture}(0,-4.51)(12.373125,4.51)
\psellipse[linewidth=0.04,dimen=outer](0.93,0.0)(0.93,4.51)
\psellipse[linewidth=0.04,dimen=outer](6.12,0.09)(0.78,2.34)
\psdots[dotsize=0.12](6.06,0.73)
\psdots[dotsize=0.12](0.84,3.43)
\psdots[dotsize=0.12](0.84,2.49)
\psdots[dotsize=0.12](0.84,1.49)
\psdots[dotsize=0.12](0.86,0.51)
\psdots[dotsize=0.12](0.84,-0.41)
\psdots[dotsize=0.12](0.84,-1.09)
\psdots[dotsize=0.12](0.82,-2.01)
\psdots[dotsize=0.12](0.86,-2.79)
\psdots[dotsize=0.12](0.84,-3.59)
\usefont{T1}{ptm}{m}{n}
\rput(9.85125,0.48){$y_i=(x_2+x_4+x_6+x_7) \;\;mod\;\; 2$}
\psline[linewidth=0.04cm](6.0,0.73)(0.82,2.51)
\psline[linewidth=0.04cm](6.04,0.71)(0.88,-1.11)
\psline[linewidth=0.04cm](6.0,0.71)(0.86,0.53)
\psline[linewidth=0.04cm](6.04,0.73)(0.78,-2.01)
\usefont{T1}{ptm}{m}{n}
\rput(0.5228125,3.2){$x_1$}
\usefont{T1}{ptm}{m}{n}
\rput(0.52671874,-3.34){$x_9$}
\usefont{T1}{ptm}{m}{n}
\rput(0.38140625,2.48){$x_2$}
\end{pspicture}
}
\caption{Example of a parity check bit.}
\end{center}
\end{figure}
We are given a $(c,d)$-regular bipartite graph $G$, which has $k$ left edges (and thus $k\cdot c/d$ right edges).
The left edges of the graph correspond to the bits $\{x_1, ..., x_k\}$ of the message, whereas the right edges are the parity check bits $\{y_1,...,y_m\}$, where
\[y_j=\sum_{\{i,j\}\in G} x_i \;\;mod\;\; 2\]
Then the codeword, is computed as follows:
\[E(x_1,...,x_k)=(x_1,...,x_k,y_1,...,y_m)\]
So, the encoding can be done in linear time, since we consider the degrees of the graph $G$ to be constant.
\subsection{Decoder (error reducer)}
Let $(\tilde{x},\tilde{y})$ be the (possibly) corrupted version of $(x,y)$ that is given to the decoder and let $\hat{x}$ be the output of the decoder. We will call a right vertex of $G$ \textbf{satisfied} if $\displaystyle \tilde{y_i}=\sum_{\{i,j\}\in G} \tilde{x_i} \;\;mod\;\; 2$.
The decoding (or more accurately, error reducing) in this kind of codes is done using the following flipping algorithm:\\
\textbf{ERROR-Reducer$(\tilde{x},\tilde{y})\rightarrow \hat{x}$ }\\
\begin{enumerate}
\item \textbf{While} there exists some $i\in[k]$ such that the $i$-th vertex on the left has more unsatisfied neighbors than satisfied ones \textbf{do} $\tilde{x_i}\leftarrow \neg \tilde{x_i}$.
\item \textbf{Return} $\hat{x}=\tilde{x}$
\end{enumerate}
In other words, we flip the values of the bits in $\tilde{x}$, such that for each left vertex of $G$, the majority of its neighbors are satisfied when the algorithm terminates.
\subsection{Good graphs for error reduction}
The following definition describes a desirable property for graphs used in error reduction.
\begin{definition}
A bipartite graph $G$ is a $\tau$- error reducer if for every $\tau_1,\tau_2\leq \tau$ and $(x,y)=E_G(x)$, if $\delta(\tilde{x},x)\leq \tau_1\cdot k \;,\; \delta(\tilde{y},y)\leq \tau_2\cdot k$ then $\delta(\hat{x},x)\leq \frac{\tau_2}{2}\cdot k$.
Where $\delta(a,b)$ denotes the hamming distance between $a$ and $b$ and $k$ is the size of the left independent set of $G$.
\end{definition}
\textbf{Remark:} Note that the upper bound on number of errors in the first part of the codeword after the algorithm has terminated, depends only on the number of errors in the second part. More importantly, if $\tau_2=0$, then $\hat{x}=x$. \\
In the following lemma, we are going to give a sufficient condition for a graph $G$ to be a $\tau$-error reducer for some $\tau$.
\begin{lemma}
If $G$ is a $(c,d)$-regular bipartite graph and a $(\gamma,\delta)$-expander for some $\gamma>\frac{7}{8}c$ and an odd integer $c > 8$, then $G$ is a $\frac{\delta}{2+c}$- error reducer.
\begin{proof}
Let $\tau_1, \tau_2$ be such that
\[\delta(\tilde{x},x)\leq \tau_1\cdot k;,\; \delta(\tilde{y},y)\leq \tau_2\cdot k\]
We first prove the following two claims: \\
\textbf{Claim 1}: The number of iterations of the flipping algorithm is at most: $\tau_2\cdot k+\tau_1\cdot c\cdot k$.
\textbf{Proof}: This can be addressed as follows: Since the degrees of all left vertices are odd, after each flip, the number of unsatisfied right vertices decreases by at least $1$. Moreover, this number is initially at most $\tau_2\cdot k+\tau_1\cdot c\cdot k$, since there can be at most $\tau_2\cdot k$ unsatisfied vertices due to their value being incorrect ($\tilde{y_i}\not = y_i$) and each of the at most $\tau_1\cdot k$ incorrect left vertices ($\tilde{x_i}\not = x_i$), can affect at most $c$ right vertices and make them unsatisfied.
\textbf{Claim 2}: Throughout the execution of the flipping algorithm: $\delta(\tilde{x},x)\leq \tau_1\cdot k+\tau_2\cdot k+\tau_1\cdot c\cdot k$.
\textbf{Proof}: This follows easily from claim 1 using the fact that in each iteration, at most $1$ bit can be changed. \\
Furthermore, we want to choose the parameter $\tau$ to be such that $\delta(\tilde{x},x)\leq \delta\cdot k$. In this way, we can use the expander graph property.
So, we get \[\delta(\tilde{x},x)\leq \tau_1\cdot k+\tau_2\cdot k+\tau_1\cdot c\cdot k \leq \tau(2+c)\leq \delta\cdot k\quad\Leftrightarrow\]
\[\tau\leq \frac{\delta}{2+c}\]
\textbf{Claim 3:}After the flipping algorithm terminates, the following should hold: \[(2\gamma-c)\cdot \vert S\vert -\tau_2\cdot k\leq\frac{c}{2}\vert S\vert\] \\
\textbf{Proof:}
Consider the set $S$ of the left vertices for which $\tilde{x_i}\not = x_i$.
Using a lemma from previous lectures, we can get that $S$ has at least $(2\gamma-c)\cdot \vert S\vert$ unique neighbors. Thus, it has at least $(2\gamma-c)\cdot \vert S\vert-\tau_2\cdot k$ unaltered ($\tilde{y_i}=y_i$) unique neighbors. Notice that all these right vertices are unsatisfied, since only one of their neighbors is altered and they are not (so they have the different parity).
So, if \[(2\gamma-c)\cdot \vert S\vert -\tau_2\cdot k>\frac{c}{2}\vert S\vert\] after termination,
then there will be at least $\frac{c}{2}\vert S\vert$ unsatisfied neighbors of $S$ and by the pigeonhole principle at least one left vertex in $S$ will have at least $\frac{c}{2}$ out of its $c$ neighbors unsatisfied. So, the algorithm should not have terminated. So, claim 3 holds by contradiction.
From claim 3, we directly get: \[\vert S\vert\leq \frac{\tau_2\cdot k}{2\gamma-\frac{3}{2}c}\]
By comparing with the $\tau$-error-reducer definition, we can see that we need: \[2\gamma-\frac{3}{2}c\geq 2\Leftrightarrow\]
\[\gamma\geq 1+\frac{3}{4}c\]
We have that $\gamma\geq \frac{7}{8}c$, so it suffices that \[ \frac{7}{8}c\geq 1+\frac{3}{4}c\Leftrightarrow\]
which holds from the hypothesis as well.
\[c\geq 8\]
\end{proof}
\end{lemma}
\section{Error correction codes from error reduction codes}
Now, we are ready to use our error reduction codes in order to make error correction codes that correct all the errors given that they are not too many. The main idea behind that, is that we can use the error reduction guarantees of error reduction codes and apply them recursively in order to make error correcting codes.
Let \[E_k: \{0,1\}^k \rightarrow \{0,1\}^{4k}\] \[D_k:\{0,1\}^{4k} \rightarrow \{0,1\}^{k}\] be the encoder and decoder of our error-correcting code (of message length $k$) respectively and \[ERE_k: \{0,1\}^k \rightarrow \{0,1\}^{3k/2}\]\[ERD_k:\{0,1\}^{3k/2} \rightarrow \{0,1\}^{k}\] be the encoder and decoder of a $\tau$-error reduction code of message length $k$.
Our error correcting code, will be able to correct $\tau\cdot k$ errors (which is a $\tau/4$ fraction of the codeword).
\subsection{Encoding}
\begin{figure}[h]
\begin{center}
% Generated with LaTeXDraw 2.0.8
% Sat Apr 13 19:09:05 EDT 2013
% \usepackage[usenames,dvipsnames]{pstricks}
% \usepackage{epsfig}
% \usepackage{pst-grad} % For gradients
% \usepackage{pst-plot} % For axes
\scalebox{0.8} % Change this value to rescale the drawing.
{
\begin{pspicture}(0,-6.679219)(11.18,6.679219)
\psellipse[linewidth=0.04,dimen=outer](0.78,0.9657813)(0.78,4.94)
\psellipse[linewidth=0.04,dimen=outer](4.2998314,1.1273566)(0.620168,2.5455322)
\psline[linewidth=0.04cm](0.88890755,5.7857814)(4.3342853,3.6527662)
\psline[linewidth=0.04cm](0.82,-4.0341372)(4.2826047,-1.4584211)
\psline[linewidth=0.04cm](4.2653775,-1.4785439)(4.2653775,-1.4181755)
\psframe[linewidth=0.04,dimen=outer](11.18,3.2857811)(6.8,-1.0342188)
\psellipse[linewidth=0.04,dimen=outer](7.63,-4.874219)(3.33,0.96)
\psline[linewidth=0.04cm](3.9,-0.95421875)(4.34,-4.7342186)
\psline[linewidth=0.04cm](11.12,-1.0542188)(10.92,-4.894219)
\usefont{T1}{ptm}{m}{n}
\rput(0.9585937,6.4757814){$k$ bits}
\usefont{T1}{ptm}{m}{n}
\rput(4.788594,4.795781){$k/2$ bits}
\usefont{T1}{ptm}{m}{n}
\rput(9.70125,3.9757812){$3k/2$ bits}
\usefont{T1}{ptm}{m}{n}
\rput(7.838594,-6.5242186){$k$ bits}
\usefont{T1}{ptm}{m}{n}
\rput(0.8154687,1.5757812){$x$}
\usefont{T1}{ptm}{m}{n}
\rput(4.275781,1.4357812){$y$}
\usefont{T1}{ptm}{m}{n}
\rput(9.018907,1.2757813){$z$}
\usefont{T1}{ptm}{m}{n}
\rput(7.6121874,-4.9642186){$w$}
\usefont{T1}{ptm}{m}{n}
\rput(2.6470313,1.1957812){$ERE_k(x)$}
\psline[linewidth=0.04cm,arrowsize=0.05291667cm 2.0,arrowlength=1.4,arrowinset=0.4]{->}(1.06,1.8457812)(3.6,1.7657813)
\psline[linewidth=0.04cm,arrowsize=0.05291667cm 2.0,arrowlength=1.4,arrowinset=0.4]{->}(4.44,2.3257813)(7.3,2.3657813)
\usefont{T1}{ptm}{m}{n}
\rput(5.907031,2.0757813){$E_{k/2}(x)$}
\psline[linewidth=0.04cm](4.52,-1.1742188)(6.18,-2.4142187)
\psline[linewidth=0.04cm](6.14,-2.3742187)(7.68,-1.0342188)
\psline[linewidth=0.04cm,arrowsize=0.05291667cm 2.0,arrowlength=1.4,arrowinset=0.4]{->}(6.16,-2.3342187)(6.48,-4.7342186)
\usefont{T1}{ptm}{m}{n}
\rput(7.2770314,-3.0042188){$ERE_{2k}(yz)$}
\end{pspicture}
}
\end{center}
\caption{ Construction of the $E_k(x)$ error correcting code}
\end{figure}
The encoder $E_k$ does the following transformation: $x\rightarrow xyzw$.
The bit strings $y,z,w$ are computed using the following transformations:
\[xy\leftarrow ERE_k(x)\]
\[z\leftarrow E_{k/2}\]
\[yzw\leftarrow ERE_{2k}(yz)\]
Note that the second step is a recursive application of the encoding function on a bitstring with half the size. When the size falls below some constant $n$, we can use any error-correcting code $C$ of rate $R=1/4$ which corrects a $\tau/4$ fraction of the errors.
\subsection{Decoding}
For the decoding procedure ($D_k(\tilde{x}\tilde{y}\tilde{z}\tilde{w})=\hat{x}=x$), we work as follows:\\
\[\hat{y}\hat{z}\leftarrow ERD_{2k}(\tilde{y}\tilde{z}\tilde{w})\]
\[\hat{\hat{y}} \leftarrow D_{k/2}(\hat{y}\hat{z})\]
\[\hat{x}\leftarrow ERD_k(\hat{x}\hat{\hat{y}})\]
Our middle step for the decoding is recursive as well and we use the decoder of the code $C$ we used above at the base case. Note that for the base case, we can use any type of code with the desired properties without affecting the running time, since the size of the message being encoded and decoded will be constant.\\
In order to prove the correctness of decoding (i.e $\hat{x}=x$ if at most $\tau\cdot k$ bits are corrupted), we do the following:
\[\delta(\tilde{x}\tilde{y}\tilde{z}\tilde{w})\leq \tau\cdot k \Rightarrow \delta(\tilde{w},w)\leq \tau\cdot k\leq \tau\cdot (2k)\wedge \delta(\tilde{y}\tilde{z},yz)\leq \tau\cdot (2k)\]
Since the graph involved in the error reduction code is a $\tau$-error reducer, it follows that :
\[\delta(\hat{y}\hat{z},yz)\leq \frac{\tau\cdot k}{2}=\frac{\tau}{4}(2k)\]
Since, by the induction hypothesis, our code can handle $\tau/4$ fraction of errors, we can recurse up to an $O(\log k)$ depth until we can apply the error-correcting code $C$ which corrects all errors. Then, making our way up in the recursion we can prove by induction that $\hat{\hat{y}}=y$.
In other words $\delta(\hat{\hat{y}},y)\leq \tau_2\cdot k, \;\tau_2=0$ and it also holds that $\delta(\tilde{x},x)\leq \tau\cdot k$. So, by the properties of the error reduction codes, since $\tau_2=0$, it follows that $\hat{x}=x$. \\
So, by applying a $\tau$-error reducer recursively, we managed to construct an error correcting code of rate $R=1/4$ which correct a $\tau/4$ fraction of errors. The key intuition for why this works, is that in the worst case (i.e when all the $\tau\cdot k$ errors appear in $w$) the error reducer halves the number of errors that are handed over to a similar subproblem of half the size. So, the conditions are still satisfied and we can reduce the size of the problem to constant and then apply any suitable error correcting code.
\subsection{Running time analysis}
We have already established that the running time for the error reduction encoding and decoding is linear. So, let $c_0,c_1,c_2$ be constants such that $T_{ERE}(k)=c_1\cdot k$ is the running time for the encoding $ERE_k$, $T_{ERD}(k)=c_2\cdot k$ is the running time for the decoding $ERD_k$ and $c_0$ is the an upper bound for either encoding or decoding the code $C$ (remember that only use codewords of up to some constant size for the code $C$). \\
We will prove by induction that the total running time of the encoding is $T_E(k)=6c_1\cdot k+c_0$ and for the decoding it is $T_D(k)=6c_2\cdot k+c_0=O(k)$.
We have the following:
\[T_{E}(k)=T_{ERE}(k)+T_E(k/2)+T_{ERE}(2k)\Leftrightarrow\]
\[6c_1\cdot k+c_0=c_1\cdot k+(3c_1\cdot k+c_0)+2c_1\cdot k\]
which holds.
The analysis for the decoding running time is completely analogous.
\section{Erasure correcting codes\cite{LubyMSS01}}
Consider the model of a binary erasure channel, where each transmitted bit is erased (i.e turns into a "?" symbol) with probability $p$ and is received correctly with probability $1-p$. It turns out that the capacity of this channel is $1-p$. In this case we can do a similar encoding using bipartite graphs where the right vertices do a parity check of their neighbors. Now, consider the erased bits $S$ which corresponds to a subset of the left vertices in the graph, and its neighborhood $\Gamma(S)$. In order to recover the erased bits, we run the following algorithm:
\begin{enumerate}
\item Find a degree $1$ left vertex $v$, by which you can deduce the correct value of its neighbor in $S$ (since it is the only erased neighbor, we can just compute the XOR of $v$ with its non-erased neighbors and find the correct value).
\item Remove the left vertex corresponding newly discovered message bit from the set $S$ and repeat step $1$.
\end{enumerate}
Notice that in step $2$, by removing the vertex, the degrees of its neighbors within the new set $S$ will decrease by $1$. So, it is possible that some degree $2$ vertices will become degree $1$ and under certain conditions we will be able to correct all the erasures using this algorithm. In fact, there exist codes of rate $1-p-\epsilon$ that correct a $p$ fraction of random erasures (i.e probability $p$ of erasure) in time $O(n\log (\frac{1}{\epsilon}))$.
\bibliography{lect16}{}
\bibliographystyle{plain}
\end{document}