\documentclass[11pt]{article}
\usepackage{amsfonts,amsthm,amsmath,amssymb}
\usepackage{array}
\usepackage{epsfig}
\usepackage{fullpage}
\usepackage{color, soul}
\usepackage{comment}
\usepackage{enumitem}
\newcommand{\1}{\mathbbm{1}}
\DeclareMathOperator*{\argmin}{argmin}
\DeclareMathOperator*{\argmax}{argmax}
\DeclareMathOperator{\Bern}{Bern}
\newcommand{\x}{\times}
\newcommand{\ep}{\epsilon}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\Q}{\mathbb{Q}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\BF}{\mathbb{F}}
\newcommand{\MC}{\mathcal{C}}
\newcommand{\p}{\mathbb{P}}
\newcommand{\E}{\mathop{\mathbb{E}}}
\renewcommand{\bar}{\overline}
\renewcommand{\epsilon}{\varepsilon}
\newcommand{\eps}{\varepsilon}
\newcommand{\Aen}{A_\eps^{(n)}}
\newcommand{\DTIME}{\textbf{DTIME}}
\renewcommand{\P}{\textbf{P}}
\newcommand{\SPACE}{\textbf{SPACE}}
\newcommand*{\op}[1]{\operatorname{#1}}
\newcommand{\mcT}{\mathcal{T}}
\begin{document}
\input{preamble.tex}
\newcount\Sols % 0 suppresses notes to selves in text
\Sols=1
\newcommand{\soln}[1]{\ifnum\Sols=1 {\color{blue} [SOLUTION: #1]}\fi}
\newcount\Comments % 0 suppresses notes to selves in text
\Comments=1
\newcommand{\todo}[1]{\ifnum\Comments=1 {\color{red} [TODO: #1]}\fi}
\handout{CS 229r Information Theory in CS}{{\bf Due: March 15, 2019}}{Instructor:
Madhu Sudan}{TAs: Mitali Bafna, Noah Golowich}{Problem Set 3}
\section*{Usual Instructions --- See Pset 2.}
\iffalse{
\begin{description}
\item[Collaboration:]
Collaboration is allowed, but limit yourselves to
groups of size at most four.
\item[References:]
In general, try not to run to reference material to answer
questions. Try to think about the problem to see if you can
solve it without consulting any external sources. If this fails,
you may look up any reference material.
\item[Writeup:]
You must write the solutions by yourselves.
Cite all references and collaborators.
Explain why you needed to consult any of the references,
if you did consult any.
\end{description}
}\fi
\section*{Problems}
\begin{enumerate}
\item {\bf Channel capacity.} Suppose we are given finite alphabets $\Omega_X, \Omega_Y$, and a channel $P(Y|X)$. Recall that the {\it channel capacity} is defined as $\max_{P_X} I(X;Y)$, where the maximum is taken over all distributions $P_X$ on $\Omega_X$.
Construct a channel $P(Y|X)$ such that the uniform distribution on $\Omega_X$ does {\it not} achieve the channel capacity.
\item {\bf Random linear codes.} Recall that a {\it linear code} of {\it block length} $n$ and {\it dimension} $k$ (where $k \leq n$) is specified by a matrix $G \in \BF_2^{n \times k}$ with rank $k$. The encoding of a codeword $a \in \BF_2^k$ is given by $E(a) = G a \in \BF_2^n$.
\begin{enumerate}
\item {\bf (Not to be turned in.)} Show that there exists a matrix $H \in \BF_2^{(n-k) \times n}$ of rank $n-k$ such that $H\cdot G = 0$. Conclude that the code $\MC := \{Ga : a \in \BF_2^k \}$ is equivalently given by $\{ x \in \BF_2^n : Hx = 0\}$. The matrix $H$ is known as the {\it parity check matrix} corresponding to the code $\MC$.
\item {\bf (Not to be turned in.)} Given $G$, find an algorithm (as efficient as you can) that computes any parity check matrix $H$ as described in part (a). What is the runtime?
\item Apply your solution from parts (a) and (b) to give the matrix $H$ corresponding to \[G^T = \left[ \begin{array}{ccccccc}
1 & 0 & 0 & 1 & 1 & 0 & 1 \\
0 & 1 & 0 & 1 & 0 & 1 & 1 \\
0 & 0 & 1 & 0 & 1 & 1 & 1 \\
\end{array} \right].\]
\item Show that a random linear encoder achieves capacity over the binary symmetric channel with parameter $p$. Specifically show that for every $\eps > 0$ if $n$ is large enough, then a random encoder given by a uniformly random matrix $G \in \F_2^{n \times k}$ has a decoder that correctly decodes the code (with error probability going to zero exponentially fast) for some $k \geq (1-h(p) - \eps)n$, over the {\em worst case} choice of the message!
\end{enumerate}
\item {\bf Polarization Preprocessing:} Recall that the algorithm for compression based on polarization started with $Z \in \Bern (p)^n$ and compressed it using the recursive expression $P_n(U,V) = (P_{n/2}(U+V),P_{n/2}(V))$, where $Z = (U,V)$ for $U,V \in \F_2^{n/2}$. The final compressed output is $P_n(Z)_S$ for some set $S \subseteq [n]$, where
$S = \{j \in [n] | H(P_n(Z)_j | P_n(Z)_{ 0$ there exists $\sigma > 0$ such that if $H(U|A) = H(V|B) \in (\tau,1-\tau)$ then $H(U+V|A,B) \geq H(U|A) +\sigma$.
\begin{enumerate}
\item {\bf (Not to be turned in.)} In what step in the analysis of Polar coding is such a statement useful? What choices of $U$, $V$, $A$ and $B$ do we use?
\item Prove that if $X,Y$ are independent random variables with $X \sim \Bern(p_1)$ and $Y \sim \Bern(p_2)$ then $H(X+Y) \geq \max\{H(X),H(Y)\}$. Furthermore if $H(X),H(Y) \in (\tau/2,1-\tau/2)$ prove that there is some $\epsilon > 0$ (depending only on $\tau$) such that $H(X+Y) \geq \max\{H(X),H(Y)\} + \epsilon$.
\item {\bf (Not to be turned in.)} Use case analysis on $\Pr_a[H(U|A=a) \in (\tau/2,1-\tau/2)]$ to show that $H(U+V|A,B) \geq H(U|A) +\sigma$. (Roughly, if $H(U|A= a)$ is in the interval with noticeable probability then Part (b) yields the entropy increase. And if this probability is tiny then with noticeable probability $H(U|a)$ is low, $H(V|b)$ is high and so $H(U+V|a,b)$ is high compared to $H(U|a)$ and this leads to an entropy increase in $H(U+V|A,B)$ compared to $H(U|A)$.
\end{enumerate}
\item {\bf Generalizations of local polarization.}
Our construction of polar codes was based off of the primitive polarizing matrix $P_2 = \left[ \begin{matrix} 1 & 1 \\ 0 & 1\end{matrix} \right]$ corresponding to the transformation $(U,V) \mapsto (U+V,V)$. We then defined a {\it basic polarizing matrix} $P_n$, for $n = 2^t$, recursively, via $P_n((U,V)) = P_{n/2}(U+V) + P_{n/2}(V)$ for $U,V \in \BF_2^{n/2}$.\footnote{The matrix $P_n$ is actually the $\log n$th {\it tensor power} of $P_2$.} This consruction can be generalized to the setting where we replace $P_2$ by an arbitrary square matrix. For instance, consider the basic polarizing matrix
$$
P_4 := \left[ \begin{matrix}
1 & 1 & 1 & 0\\
0 & 0 & 1 & 1\\
0 & 1 & 0 & 0 \\
1 & 0 & 0 & 0
\end{matrix} \right],
$$
which leads to the recursion, for $n = 4^t$, $P_n((U,V,W,X)) = (P_{n/4}(U+V+W), P_{n/4}(W+X), P_{n/4}(V), P_{n/4}(U))$, where $U,V,W,X \in \BF_2^{n/4}$.
Will this lead to a Polar code that gets $\epsilon$ close to capacity at polynomial block lengths? If so, what aspects of the analysis need to change to show this? If not, what aspects of the analysis break down?
\end{enumerate}
\end{document}