\documentclass[11pt]{article}
\usepackage{amsfonts,amsthm,amsmath,amssymb}
\usepackage{array}
\usepackage{epsfig}
\usepackage{fullpage}
\usepackage{color, soul}
\usepackage{comment}
\usepackage{enumitem}
\newcommand{\1}{\mathbbm{1}}
\DeclareMathOperator*{\argmin}{argmin}
\DeclareMathOperator*{\argmax}{argmax}
\DeclareMathOperator{\Bern}{Bern}
\newcommand{\x}{\times}
\newcommand{\ep}{\epsilon}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\Q}{\mathbb{Q}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\BF}{\mathbb{F}}
\newcommand{\MC}{\mathcal{C}}
\newcommand{\p}{\mathbb{P}}
\newcommand{\E}{\mathop{\mathbb{E}}}
\renewcommand{\bar}{\overline}
\renewcommand{\epsilon}{\varepsilon}
\newcommand{\eps}{\varepsilon}
\newcommand{\Aen}{A_\eps^{(n)}}
\newcommand{\DTIME}{\textbf{DTIME}}
\renewcommand{\P}{\textbf{P}}
\newcommand{\SPACE}{\textbf{SPACE}}
\newcommand*{\op}[1]{\operatorname{#1}}
\newcommand{\mcT}{\mathcal{T}}
\begin{document}
\input{preamble.tex}
\newcount\Sols % 0 suppresses notes to selves in text
\Sols=1
\newcommand{\soln}[1]{\ifnum\Sols=1 {\color{blue} [SOLUTION: #1]}\fi}
\newcount\Comments % 0 suppresses notes to selves in text
\Comments=1
\newcommand{\todo}[1]{\ifnum\Comments=1 {\color{red} [TODO: #1]}\fi}
\handout{CS 229r Information Theory in CS}{{\bf Due: Never}}{Instructor:
Madhu Sudan}{TAs: Mitali Bafna, Noah Golowich}{Practice Problem Set 4}
\section*{Instructions}
For Self-testing your understanding - not to be turned in.
\iffalse{
\begin{description}
\item[Collaboration:]
Collaboration is allowed, but limit yourselves to
groups of size at most four.
\item[References:]
In general, try not to run to reference material to answer
questions. Try to think about the problem to see if you can
solve it without consulting any external sources. If this fails,
you may look up any reference material.
\item[Writeup:]
You must write the solutions by yourselves.
Cite all references and collaborators.
Explain why you needed to consult any of the references,
if you did consult any.
\end{description}
}\fi
\section*{Problems}
\begin{enumerate}
\item {\bf Indexing:} In the Indexing function Alice gets an input $X \sim \mathrm{Unif}(\{0,1\}^n)$ and Bob gets an input $i \sim \mathrm{Unif}([n])$, and their goal is to compute $\mathrm{Index}(X,i) = X_i$. Interest in the problem is in the ``one-way'' communication complexity of the problem where one player sends a message to the other and the other player outputs the function value.
\begin{enumerate}
\item Show that the one-way communication complexity of Indexing is $\log_2 n - \Theta(1)$ if Bob sends the message and Alice is to output the answer. (For starters, you can prove a lower bound of $\Omega(\log n)$.)
\item Show that the one-way communication complexity of Indexing is $\Theta(n)$ if Alice sends the message and Bob is to output the answer. Try to prove the lower bound using the properties of the entropy function (and chain rule etc.).
\end{enumerate}
\newcommand{\IP}{\mathrm{IP}}
\item {\bf Biased Inner Product:} Recall that the $n$-bit inner product function $\IP_n(X,Y) = \oplus_{i \in [n]} X_i \cdot Y_i$. Give a lower bound as a function of $p$ and $n$ on the distributional communication complexity of $\IP_n(X,Y)$ when $X,Y \in \Bern(p)^n$ are independent. How tight is your bound?
\item {\bf Information Complexity of AND.} Recall the protocol for the two bit AND function where Alice gets $U\in \{0,1\}$, Bob gets $V \in \{0,1\}$ and they wish to compute $U \wedge V$. The protocol involves Alice picking a time $t_A \in [0,1]$ uniformly at random.
If $U=0$ then at time $t_A$ Alice sends $U$ to Bob, unless Bob has already sent her a message.
Similarly Bob picks a time $t_B \in [0,1]$ uniformly at random.
If $V=0$ then at time $t_B$ Bob sends $V$ to Alice, unless Alice has already sent him a message. At time $t=1$, if neither party has spoken, both exchange $U$ and $V$. The ``rough'' argument in class that has information complexity of $5/4$ is {\em not} correct. Compute the correct information complexity of the protocol. Specifically if an observer sees Alice sending a $0$ to Bob at time $t$, what can the observer infer about $V$? (What is its bias, and what is its entropy). Based on the above compute the Information Complexity of this protocol (taking expectations over $t$ etc.).
\item {\bf Divergence, TVD, and Mutual Information:}
In the concluding parts of the lower bound for Disjointness we considered two distributions, lets call them $\Pi_0$ and $\Pi_1$, on some finite universe $\Omega$ and established a lower bound on $\delta(\Pi_0,\Pi_1)$, the total variation distance between the distributions. We then used this lower bound to get a lower bound on the mutual information $I(U;X)$ where $U \sim \Bern(1/2)$ and $X \sim \Pi_U$. We used Pinsker's Inequality to establish this lower bound, but we didn't apply Pinsker directly to bound $D(\Pi_0||\Pi_1)$. Instead we did something much more complicated. This exercise aims to explain why.
\begin{enumerate}
\item Suppose we only have a lower bound on $D(\Pi_0||\Pi_1)$. Prove that this is insufficient for a lower bound on $I(U;X)$ by giving examples of distributions $\Pi_0,\Pi_1$ where $D(\Pi_0||\Pi_1)$ is arbitrarily high while $I(U;X)$ is arbitrarily close to zero.
\item Having established the above, now give the best lower bound you can give on $I(U,X)$ in terms of $\delta(\Pi_0,\Pi_1)$. Show your bound is tight!
\item Establish an upper bound on $I(U,X)$ in terms of $\delta(\Pi_0,\Pi_1)$. Again show your bound is tight.
\end{enumerate}
\item {\bf Applying Disjointness Lower Bound:} Consider the following ``real'' communication problem. Alice's input is $X \in \R^n$ and Bob's is $Y \in \R^n$. Their goal is to exchange few {\bf bits} to determine if $X$ and $Y$ are drawn from $\mu^n_Y$ or from $\mu^n_N$ where the ``No'' distribution $\mu^n_N$ samples $X_1,\ldots,X_n$ and $Y_1,\ldots,Y_n$ i.i.d. from $N(0,1)$ (the normal distribution with mean $0$ and variance $1$), while the ``Yes" distribution samples $(X_i,Y_i)$ i.i.d. from the bivariate normal distribution with mean $0$, variance $1$ and correlation $\rho$. (I describe this as sampling $X_i,Z_i$ independently from $N(0,1)$ and then outputting $(X_i,Y_i)$ where $Y_i = \rho X_i + \sqrt{1-\rho^2} Z_i$ though people who know better can write the denisty function.)
\begin{enumerate}
\item Show that there is a protocol with $O(1/\rho^2)$ bits of communication solving this problem (in the distributional sense). (In other words, the protocol should output $1$ with probability at least $2/3$ if the input is drawn from $\mu^n_Y$ and should output $0$ with probability at least $2/3$ is the input is drawn from $\mu^n_N$.)
\item Show that any protocol solving this problem should communicate at least $\Omega(1/\rho)$ bits. (Reduce disjointness to this problem!)
\end{enumerate}
\end{enumerate}
\end{document}