\documentclass[11pt]{article}
\usepackage{amsfonts,amsthm,amsmath,amssymb}
\usepackage{array}
\usepackage{epsfig}
\usepackage{fullpage}
\usepackage{color, soul}
\usepackage{comment}
\usepackage{enumitem}
\newcommand{\1}{\mathbbm{1}}
\DeclareMathOperator*{\argmin}{argmin}
\DeclareMathOperator*{\argmax}{argmax}
\newcommand{\x}{\times}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\Q}{\mathbb{Q}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\E}{\mathop{\mathbb{E}}}
\renewcommand{\bar}{\overline}
\renewcommand{\epsilon}{\varepsilon}
\newcommand{\eps}{\varepsilon}
\newcommand{\Aen}{A_\eps^{(n)}}
\newcommand{\DTIME}{\textbf{DTIME}}
\renewcommand{\P}{\textbf{P}}
\newcommand{\SPACE}{\textbf{SPACE}}
\newcommand*{\op}[1]{\operatorname{#1}}
\newcommand{\mcT}{\mathcal{T}}
\begin{document}
\input{preamble.tex}
\handout{CS 229r Information Theory in CS}{{\bf Due: February 26, 2019}}{Instructor:
Madhu Sudan}{TA: Mitali Bafna}{Problem Set 2}
\section*{Instructions}
\begin{description}
\item[Collaboration:]
Collaboration is allowed, but limit yourselves to
groups of size at most four.
\item[References:]
In general, try not to run to reference material to answer
questions. Try to think about the problem to see if you can
solve it without consulting any external sources. If this fails,
you may look up any reference material.
\item[Writeup:]
You must write the solutions by yourselves.
Cite all references and collaborators.
Explain why you needed to consult any of the references,
if you did consult any.
\end{description}
\section*{Problems}
\begin{enumerate}
\item {\bf Worst-case Lempel-Ziv.}
\begin{enumerate}
\item What is the maximum possible length (expressed in big-Oh notation) of the Lempel-Ziv compression of an $n$-bit string? (Prove the upper bound, and display a string whose compression matches your bound to within constant factors.)
\item What is the shortest length (again in big-Oh notation) of the Lempel-Ziv compression of an $n$-bit string? (Prove the lower bound, and display a string whose compression matches your bound to within constant factors.)
\item Suggest a variation of Lempel-Ziv that would achieve a minimum compression length (as defined in Part (b)) of $\poly \log n$.
\end{enumerate}
\item {\bf Fixed-length Encoding.} In the lectures we focussed on zero-error compression methods, i.e., where the decompressor always recovered the original message, and used the expected length of the encoding as the performance measure. The goal of this exercise is to show that if one prefers to work with worst-case encoding length, one can do so, at the expense of an exponentially small probability of error.
\begin{enumerate}
\item State a precise theorem that captures the ability to compress using a fixed-length encoding, at rates arbitrarily close to the entropy of the source, for an i.i.d. source over a finite alphabet, with an exponentially small probability of decoding error.
\item Prove your theorem above.
\end{enumerate}
(Note that an aim of this exercise is to test your ability to express yourselves mathematically. Watch out for imprecision, inaccuracy, and weaknesses in the theorem statement in Part (a).)
\item {\bf Typical sets and divergence.}
Recall that a set $S \in \Omega^n$ is $(\epsilon,n)$-typical for a distribution $P$ supported on $\Omega$ if the probability under $P^n$ of every element in the set is in the interval $(1/|S|^{1+\epsilon},1/|S|^{1-\epsilon})$ and the probability of $S$ under $P^n$ is at least $1-\epsilon$. For distributions $P,Q$ supported on $\Omega$ prove that for every $\epsilon > 0$ and for sufficiently large $n$, there is an $(\epsilon,n)$-typical set $S$ for $Q$ such that $\Pr_{Z \sim P^n} [ Z \in S] \leq 2^{-(1-\epsilon) \cdot D(Q||P)\cdot n}$.
\newcommand{\bern}{\mathrm{Bern}}
\item {\bf Compressing Hidden Markov Models and Learning Parity with Noise.}
The ``Learning Parity with Noise'' (LPN) problem is a classic in learning theory and asks to solve the following problem. We are given black box access to a source $L = L_S$ for some (unknown) $S \subseteq [k]$. Every time we press button on the black box it outputs a pair $(v,b)$ where $v \sim \mathrm{Uniform}(\{0,1\}^k)$ and $b = \bigoplus_{i \in S} v_i \oplus \eta$ where $\eta \sim \bern(p)$. Each time the button is pressed the box outputs a sample from this distribution independently. The goal of the LPN problem is to to discover the secret $S$ after sampling from $L$ polynomially many times (in $k$) with running time also polynomial in $k$. It is believed by many that such an algorithm does not exist and that any algorithm must take time exponential in $k$ (even if it is allowed exponentially many samples).
\begin{enumerate}
\item For every $p < 1/2$ show that there is an exponential time algorithm that samples $L$ polynomially many times and produces the secret $S$ with high probability.
\item Describe an $O(k)$-state hidden markov model $M$ that generates samples according to $L$. I.e., the output sequence of the model $M$ can be divided into blocks of length $k+1$ where each block has distribution identical to that of the black-box $L$.
\item Determine the entropy rate $H(M)$ of the source $M$.
\item Show that if a polynomial time universal compression algorithm for HMMs can compress $n$ length sequences from this source to an expected length of $(H(M)+\epsilon)\cdot n$ in time $\poly(k,1/\epsilon)$ for some $n = k^{O(1)}$ then there is a polynomial time algorithm to solve LPN.
(Conclude that universal compression algorithms must take time that is exponential in $k$ or exponential in $1/\epsilon$.)
\end{enumerate}
\item {\bf Markov Chains.}
Recall that a $k$-state Markovian source is given by a $k\times k$ stochastic matrix, whose entries specify the transition probability, i.e. $M_{ij} = \Pr[Z_2 = j]| Z_1 = i]$.
\begin{enumerate}
\item
Given as input a matrix $M$ describing a Markovian chain that produces a state sequence $Z_1,Z_2,\ldots$, describe an algorithm to compute its entropy rate, i.e., $\lim_{n \to \infty} H(Z_n | Z_1,\ldots, Z_{n-1})$. (You may assume standard linear algebra.)
\item {\bf The entropy rate of a dog looking for a bone:}
A dog walks on the integers, moving one step in its current direction on typical time steps, while pausing to dig for a bone on some steps and reversing directions occasionally. Specifically, if the dog is at integer $i$ at a given moment of time and $i = 0 (\bmod 3)$ then the dog pauses with probability $.2$ and continues w.p. $.8$ (in its current direction). If $i \ne 0 (\bmod 3)$ then the dog continues in its current direction with probability $.9$ and reverses directions with probability $0.1$.
Let $X_1,X_2,\ldots,X_t,\ldots$ describe the sequence of locations of the dog over time. (Is this sequence a Markov chain?)
Use your algorithm from Part (a) to compute the entropy rate of this sequence, i.e., the quantity $\lim_{t \to \infty} \{H(X_t | X_1,\ldots,X_{t - 1})\}$.
\end{enumerate}
\end{enumerate}
\end{document}