\documentclass[10pt]{article}
\usepackage{amsfonts,amsthm,amsmath,amssymb}
\usepackage{array}
\usepackage{epsfig}
\usepackage{fullpage}
\usepackage[colorlinks,bookmarks=true]{hyperref}
\usepackage[capitalize,nameinlink,noabbrev]{cleveref}
\input{preamble.tex}
\newcommand{\bin}{\{0, 1\}}
\newcommand{\Vol}{\operatorname{Vol}}
\newcommand{\f}{\frac}
\newcommand{\sbe}{\subseteq}
\newcommand{\la}{\langle}
\newcommand{\ra}{\rangle}
\newcommand{\im}{\operatorname{im}}
\begin{document}
\renewcommand{\binset}{\bbF_2}
\handout{CS 229r Essential Coding Theory}{Feb 2, 2017}{Instructor: Madhu Sudan}{Scribes: Greg Yang}{Lecture 4}
\section{Constructivity}
What do we mean by constructivity in coding theory?
For example, we say that the Gilbert construction is ``nonconstructive'' because finding the code takes $\exp(n)$ time.
Ideally, the encoding function $E: \{0, 1\}^k \to \{0, 1\}^n$ should be polytime for a ``constructive'' result.
But what if, say, we have a linear code, whose generator matrix requires $\exp(n)$ time to find, but the actually matrix multiplication is polytime?
Most people would agree this is ``constructive.''
In general, we could have a nonuniform family of encoding functions $E_i: \{0, 1\}^{k_i} \to \{0, 1\}^{n_i}$, each of which requires $\poly(k_i)$ to compute; another example would be nonuniform families of polysized circuits.
A refresher on notation: For a code $E: \Sigma^k \to \Sigma^n$, the rate $R$ is $\f k n$.
The distance $\Delta(E) = \min_{u, v \in \im E} \Delta(u, v)$ is often written $d$.
The relative distance $\delta$ of $\im E$ is $\f d n$.
\section{Positive Results}
\subsection{The GV bound}
Last time we established the following Gilbert bound
\begin{theorem}[Gilbert]\label{theorem:Gilbert}
There exists a parity check matrix $H \in \bin^{n \times (n-k)}$ such that
$$2^k \ge 2^n/\Vol(n, d-1)$$
such that $C := \{y| yH = 0\}$ has $\Delta C \ge d$.
\end{theorem}
This implies the corollary
\begin{corollary}[GV bound]\label{cor:GV}
For the binary alphabet, the following is achievable:
$$R \ge 1 - H(\delta)$$
\end{corollary}
Today we sketch the proof of the Varshamov bound, which slightly improves \cref{theorem:Gilbert}.
\begin{theorem}[Varshamov]\label{theorem:Varshamov}
There exists a parity check matrix $H \in \bin^{n \times (n-k)}$ such that
$$2^k \ge 2^n/\Vol(n, d-2)$$
such that $C := \{y| yH = 0\}$ has $\Delta C \ge d$.
\end{theorem}
\begin{proof}[Proof sketch]
Construct the parity check matrix greedily, row by row, such that no $d-1$ rows are linearly dependent.
As long as $2^{n-k} > \Vol(n, d-2)$, this is possible.
\end{proof}
\begin{exercise}
Complete the proof above.
\end{exercise}
Like with the Gilbert bound, we recover \cref{cor:GV}
\subsection{Random construction}
We can try constructing codes randomly, which will again recover \cref{cor:GV} (asymptotically), by 1) picking $2^{k+1}$ codewords at random from $\bin^n$, and 2) remove all nearby pairs of distance $ (\f 1 2 -\epsilon)$ is nonzero.
Therefore as the relative distance $\delta = \f d n$ approaches $\f 1 2$ as $\f 1 2 - \epsilon$, we can achieve the rate $\Omega(\epsilon^2)$, although nonconstructively.
\subsection{Partition proofs}
Suppose $C_1, \ldots, C_m$ are all linear codes with $C_i \sbe \{0, 1\}^n$.
\begin{definition}
We call $C_1, \ldots, C_m$ a \textbf{partition} if $C_i \cap C_j = \{\vec 0\}$, $\bigcup C_i = \bin^n$, and $|C_i| = |C_j|$.
\end{definition}
\begin{lemma} \label{lemma:partition_proof}
If $C_1, \ldots, C_m$ forms a partition, then for some $i$, $\Delta(C_i) \ge d$ if $m > \Vol(n, d-1)$.
\end{lemma}
\begin{exercise}
Prove \cref{lemma:partition_proof}
\end{exercise}
Note that $m(|C_i| - 1) = 2^n -1$ for each $i$, so that if the above bound on $m$ holds, we get something like the Gilbert bound:
$$\f{2^n-1}{\Vol(n, d-1)} > \f{2^n - 1}{m} = 2^k - 1.$$
\section{Negative Results}
We have a very simple bound using the pigeonhole principle.
\begin{theorem}[Singleton Bound]
For any code over any alphabet $\Sigma$,
$R \le 1 - \delta$.
\end{theorem}
\begin{proof}
Let $E: \Sigma^k \to \Sigma^n$ be the encoding function, and $\pi: \Sigma^n \to \Sigma^{k-1}$ be projection to the first $k-1$ coordinates.
By pigeonhole applied to $\pi \circ E: \Sigma^k \to \Sigma^{k-1}$, there are $x, x' \in \Sigma^k$, distinct, s.t., $\pi(E(x)) = \pi(E(x'))$.
Thus $\Delta(C) \le \Delta(E(x), E(x')) \le n -k + 1$, where $C = \im E$ is the set of codewords.
Dividing by $n$ and taking $n \to \infty$ gives the desired bound.
\end{proof}
Using the familiar packing arguments, we can also show the following.
\begin{theorem}
[Hamming (Packing) Bound]
For any code in the binary alphabet,
$R \le 1 - H(\delta/2)$.
\end{theorem}
\begin{proof}
Each code of distance $d$ corrects $\le \f{d-1}2$ errors.
By packing arguments, (over binary alphabet)
\begin{align*}
|C| \Vol(n, \f{d-1} 2) &\le 2^n\\
2^k 2^{H(\delta/2) n - o(n)} &\le 2^n\\
k&\le n - H(\delta/2) n + o(n)\\
R &\le 1 - H(\delta/2)
\end{align*}
\end{proof}
Let's now work with $\{-1, 1\} \cong \{1, 0\}$.
For $x \in \{1, -1\}^n$,
\begin{align*}
\la x, x \ra &= n\\
\la x, y \ra &= n - 2\Delta(x, y)
\end{align*}
Suppose $C = (V_1, \ldots, V_K) \sbe \{-1, 1\}^n$ and $\Delta(C) > \f 1 2$.
Then $\la V_i, V_j \ra < 0$ for $i \not = j$.
We claim that the number of such vectors can be at most $n+1$.
If instead of strict inequality, we have $\Delta (C) \ge \f 1 2$, then we claim that the number of such vectors can be at most $2n$.
If $\Delta(C) = \f 1 2$, then $V_i$ have to be multiples of coordinate vectors in $\R^n$, so there are at most $n$ of them.
Let's prove one of these claims.
\begin{lemma}
If $\la V_i, V_j \ra < 0$ for $i \not = j$, then $K \le n+1$.
\end{lemma}
\begin{proof}
Suppose $K = n+2$.
Then there is a nontrivial linear dependence
$$\sum_{i=1}^{n+1} \lambda_i V_i = 0.$$
If $\lambda_i \ge 0$ for all $i$, then
\begin{align*}
0 &= \la V_{n+2}, \sum_{i=1}^{n+1} \lambda_i V_i\ra \\
&= \sum_{i=1}^{n+1} \lambda_i \la V_{n+2}, V_i \ra\\
& < 0,
\end{align*}
a contradiction.
We reach a similar contradiction if all $\lambda_i \le 0$.
Therefore, if $I = \{i: \lambda_i > 0\}$ and $J = \{j: \lambda_j < 0\}$, then neither $I$ nor $J$ is empty.
We can therefore write
$$\sum_{i \in I} \lambda_i V_i = \sum_{j \in J} - \lambda_j V_j.$$
But then
$$0 \le \|\sum_{i \in I} \lambda_i V_i\|^2 = \la \sum_{i \in I} \lambda_i V_i, \sum_{j \in J} - \lambda_j V_j \ra < 0,$$
a contradiction again.
Therefore $K < n+2$.
\end{proof}
\begin{exercise}
Show that if $\la V_i, V_j \ra \le 0$, then $K\le 2n$.
\end{exercise}
We have shown the following
\begin{theorem}[Plotkin]
Over a binary alphabet, if the relative distance of the code is $> \f 1 2$, then the rate is $O(\log n / n)$ which goes to 0 as $n \to \infty$.
\end{theorem}
\begin{exercise}
Show that if the distance of the code is $\f 1 2 + \epsilon$, then the rate is $\le \f 1 n \log(1 + \f 1 {2 \epsilon}).$
\end{exercise}
Suppose we have a $(n, k, d)_q$ code.
We can obtain a $(n-1, k, d-1)_q$ code by ``puncturing the code'', i.e. chopping off the last coordinate.
We can also obtain a $(n-1, k-1, d)_q$ code by finding the most popular letter in the first coordinate, taking the codewords that start with this letter, and removing this coordinate.
In the first case, we get slightly better rate at the cost of some relative distance; in the second case, we get slightly better relative distance at the cost of some rate.
\begin{exercise}
Show $R \le 1 - 2 \delta$ given our observations above.
\end{exercise}
%%%%%%%%
\bibliographystyle{alpha}
\bibliography{bib}
\end{document}