\documentclass[10pt]{article}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{bm}
\usepackage[margin=1in] {geometry}
\newcommand\independent{\protect\mathpalette{\protect\independenT}{\perp}}
\newcommand{\Lim}[1]{\raisebox{0.1ex}{{$\displaystyle \lim_{#1}\;$}}}
\def\independenT#1#2{\mathrel{\rlap{$#1#2$}\mkern2mu{#1#2}}}
%\usepackage{epsfig}
\begin{document}
\input{preamble.tex}
\lecture{11}{March 01, 2016}{Madhu Sudan}{Tanay Mehta}
%%%% body goes in here %%%%
Today, we will finish proving the lower bound for the set disjointness problem. To do this, we will introduce new concepts such as embeddings and the Hellinger distance.
\section*{Information Complexity and Set Disjointness (Minjae)}
Last time, we introduced the set disjointness problem for communication protocols. In order to prove lower bounds for this, we defined the concept of information cost as the mutual information between the inputs to the problem and communication protocol. The information cost of a protocol computing a function is a lower bound for the communication complexity. Therefore, we can prove lower bounds for the communication complexity of computing a function by showing lower bounds for its information cost. In particular, our goal is to prove the following theorem.
\begin{theorem}
$\mathrm{IC}_{\mu, f}(\mathrm{Disj}) = \Omega(n)$
\end{theorem}
\noindent We will show this theorem by proving the series of lemmas outlined in the previous lecture. Last time, we proved the following.
\begin{lemma}
$\mathrm{I}(X, Y; \Pi | Z) \geq \sum_{i=1}^n \mathrm{I}(X_i, Y_i; \Pi | Z)$
\end{lemma}
\noindent We will now prove the following.
\begin{lemma}
$\mathrm{I}(X_i, Y_i; \Pi |Z) \geq \mathrm{IC}_{\mu_i|Z_i}(\mathrm{Disj^1})$
\end{lemma}
\noindent Recall that the distribution $\mu$ is defined as follows. Let $\tilde{X}_1, \dots, \tilde{X}_n$, $\tilde{Y}_1, \dots, \tilde{Y}_n$, and $Z_1, \dots, Z_n$ be i.i.d. from the uniform distribution $U(\{0,1\})$. Define $X_i = \tilde{X}_i \wedge Z_i$ and $Y_i = \tilde{Y}_i \wedge \bar{Z}_i$. Then, $\mu$ is the distribution $\mu \rightarrow (X, Y, Z)$. \\
\\Note that Lemma 3 does not follow immediately from definitions because $\Pi$ does not depend on one-bit inputs but $n$-bit inputs. We can get around this issue by using \emph{embeddings}.\\
\\ \textbf{Embedding}:
\begin{eqnarray*}
R &=& \{Z_j \}_{j=1, j \neq i}^n \\
R_A &=& \{\tilde{X}_j\}_{j=1; j \neq i}^n \\
R_B &=& \{\tilde{Y}_j\}_{j=1; j \neq i}^n \\
\\
X_j &:=& \tilde{X}_j \wedge Z_j \\
Y_j &:=& \tilde{Y}_j \wedge \bar{Z_j} \\
\\
& X & \leftarrow \{X_j\} \\
& Y & \leftarrow \{Y_j\} \\
\end{eqnarray*}
Embeddings can be thought of as a zero-communication protocol with common randomness independent of the inputs $X_i$ and $Y_i$. For this specific protocol, we claim the following.
\begin{claim}
$\mathrm{Disj}^n(X, Y) = \mathrm{Disj}^1(X_i, Y_i)$
\end{claim}
\begin{proof}
\begin{eqnarray*}
\mathrm{Disj}^n(X, Y) &=& \bigvee_{j=1}^n X_j \wedge Y_j \\
&=& \left(\bigvee_{j=1}^n X_j \wedge Y_j \right) \vee \left(X_i \wedge Y_i \right) \\
&=& \left(\bigvee_{j=1}^n \tilde{X}_j \wedge \tilde{Y}_j \wedge Z_j \wedge \bar{Z}_j \right) \vee \left(X_i \wedge Y_i \right)\\
&=& X_i \wedge Y_i
\end{eqnarray*}
where the last equality follows since $Z_j \wedge \bar{Z}_j = 0$ from the previous line.\\
\end{proof}
\begin{proof}[Lemma 3]
\noindent Define the following protocol making use of the above embedding.
\begin{eqnarray*}
\Pi^1 &:=& \Pi(\mathrm{embed}(X_i), \mathrm{embed}(Y_i))
\end{eqnarray*}
From definitions, we have that
\begin{eqnarray*}
\mathrm{I}(X_i, Y_i; \Pi^1 |Z) & \geq & \mathrm{IC}_{\mu_i | Z_i} (\mathrm{Disj}^1)
\end{eqnarray*}
since $\mathrm{IC}$ is taken as the minimum over distributions. Furthermore, we have by construction that
\begin{eqnarray*}
\mathrm{I}(X_i, Y_i ; \Pi | Z) &=& \mathrm{I}(X_i, Y_i ; \Pi^1 | Z)
\end{eqnarray*}
Combining these two, we have the statement of lemma 3. \\
\end{proof}
\noindent Note that the embedding construction fully requires common randomness $Z$. \\
\\We will now prove one final lemma that implies Theorem 1.
\begin{lemma}
$\mathrm{I}(X_i, Y_i ; \Pi | Z) > 0$ where $X_i, Y_i \in \{0, 1\}$
\end{lemma}
\noindent Since $X_i, Y_i$ are one-bit values, we will suppress the subscripts for ease and write them as $X, Y$. We can also now restrict $Z$ to one-bit. There is some protocol to account for throwing out $n-1$ bits. We will also define the notation $\Pi^{XY} := \Pi(X, Y)$. Therefore, we have $4$ possible transcripts: $\Pi^{00}, \Pi^{01}, \Pi^{10}, \Pi^{11}$. Note that we have $$\mathrm{I}(X, Y; \Pi | Z) = \frac{1}{2}(\mathrm{I}(Y; \Pi^{0Y}) + \mathrm{I}(X; \Pi^{X0}))$$
Now, we claim that all possible values of $X, Y$ cannot be identically distributed. Suppose that $\Pi^{00} \equiv \Pi^{01}$ and $\Pi^{00} \equiv \Pi^{10}$. This implies $\mathrm{I}(Y; \Pi^{0Y}) = 0$ and $\mathrm{I}(X; \Pi^{X0})$ due to the \emph{rectangle property}. \\
\\\textbf{Rectangle property}:\\
Suppose we have $X, Y$ and $X', Y'$ such that $$\Pi(X, Y) = t = \Pi(X', Y')$$
then for deterministic protocols, we have that $$\Pi(X', Y) = t = \Pi(X, Y')$$
We can extend this property to private randomized communication protocols by factoring out the private randomness and making it part of the input.
\begin{eqnarray*}
& \Pi((X, R_A), (Y, R_B)) &= t =\Pi((X', R_A'), (Y', R_B')) \\
\implies & \Pi((X', R_A'), (Y, R_B)) &= t = \Pi((X, R_A), (Y', R_B')) \\
\end{eqnarray*}
\noindent This statement implies the following by looking at the probability for $X, Y$ to yield a specific transcript $t$.
$$\Pr[\Pi(X, Y) = t] \cdot \Pr[\Pi(X', Y') = t] = \Pr[\Pi(X, Y') = t] \cdot \Pr[\Pi(X', Y) = t]$$
\noindent The rectangle property for probabilistic protocols suggests that there is distance property between the input distributions. The correct distance measure for this case turns out to be the \emph{Hellinger distance}.\\
\noindent \textbf{Hellinger distance}:
For a finite universe, we define the Hellinger distance to be
\begin{eqnarray*}
h(P, Q) & = & \frac{1}{\sqrt{2}} || \sqrt{P} - \sqrt{Q}||_2 \\
& = & \sqrt{1 - \sum_{w \in S} \sqrt{P(w) \cdot Q(w)}}
\end{eqnarray*}
where $S$ is the support. The above sum is known as the \emph{Bhattacharya coefficient}. The $\frac{1}{\sqrt{2}}$ factor scales the distance to be between $0$ and $1$. The Hellinger distance has the triangle property and other properties of distance measures. As a side note, the Hellinger distance can be generalized to continuous settings with the Lebesgue measure. \\
\\ Recall the \emph{total variation distance}.
$$\mathrm{TVD}(P, Q) = \frac{1}{2} ||P-Q||_1$$
By the properties of the $1$ and $2$-norms, we have the following bounds.
$$h^2(P, Q) \leq \mathrm{TVD}(P,Q) \leq \sqrt{2} h(P, Q)$$
Ideally, we would like to find the total variation of the distributions in questions. However, this is difficult due to the rectangular property. The Hellinger distance has a property similar to the rectangular property.
\begin{lemma}[Cut \& Paste Lemma]
$h(\Pi(X, Y), \Pi(X', Y')) = h(\Pi(X', Y), \Pi(X, Y'))$
\end{lemma}
Therefore, we have $h(\Pi^{01}, \Pi^{10}) = 0 \implies h(\Pi^{00}, \Pi^{11}) = 0$. Now, we can prove Lemma 5. \\ \\
\begin{proof}[Lemma 5]
Take $Z = 0$. Then, we can view $Y$ as an indicator in $\mathrm{I}(Y; \Pi^{0Y})$ where
\begin{eqnarray*}
Y = 1 \rightarrow t ~ \Pi^{01} \\
Y = 0 \rightarrow t ~ \Pi^{00}
\end{eqnarray*}
Define the \emph{Jensen-Shannon divergence} for arbitrary distributions $P, Q$ to be
$$D_{JS}(P || Q) = \frac{1}{2} D_{KL}(P || M) + \frac{1}{2} D_{KL}(Q || M)$$
\emph{•}where $M := \frac{1}{2}(P+Q)$ and $D_{KL}$ is the KL-divergence. $D_{JS}$ can be thought of as a symmetric (on the inputs) version of the KL-divergence. \\
\\Then, we have that
\begin{eqnarray*}
\mathrm{I}(Y; \Pi^{0Y}) &=& D_{JS}(\Pi^{01} || \Pi{00}) \\
&=& \frac{1}{2} \left( D_{KL}(\Pi^{01} || \Pi^{0\frac{1}{2}}) + D_{KL}(\Pi^{00} || \Pi^{0\frac{1}{2}}) \right)
\end{eqnarray*}
where $\Pi^{0\frac{1}{2}} = \frac{(\Pi^{00} + \Pi^{01})}{2}$. By Pinsker's inequality,
\begin{eqnarray*}
D_{KL}(\Pi^{01} || \Pi^{0\frac{1}{2}}) & \geq & \frac{1}{2} \left(\mathrm{TVD}(\Pi^{01}, \Pi^{0\frac{1}{2}}) \right)^2 \\
&=& \frac{1}{2} \left( \frac{\mathrm{TVD}(\Pi^{01}, \Pi^{00})}{2} \right)^2
\end{eqnarray*}
Note that $\mathrm{TVD}(\Pi^{01}, \Pi^{10}) \geq \beta$ for some $\beta$. This implies that $\mathrm{TVD}(\Pi^{01}, \Pi^{00}) \geq \beta/2$, or $\mathrm{TVD}(\Pi^{00}, \Pi^{10}) \geq \beta/2$. Therefore we have a lower bound of $\frac{\beta^2}{16}$. \\
\\We want a lower bound for the following.
\begin{eqnarray*}
\mathrm{TVD}(\Pi^{01}, \Pi^{10}) & \geq & h^2(\Pi^{01}, \Pi^{10}) \\
&=& h^2(\Pi^{00}, \Pi^{11}) \\
& \geq & \frac{\mathrm{TVD(\Pi^{00}, \Pi^{11})^2}}{2}
\end{eqnarray*}
For these distributions, note that $\Pi^{11}$ accepts transcript t with $1-\epsilon$ probability and $\Pi^{00}$ accepts with probability $\epsilon$. Therefore, $\mathrm{TVD}(\Pi^{00}, \Pi^{11}) \geq 1 - 2 \epsilon$. This gives us a final, positive lower bound of $\frac{(1-2\epsilon)^4}{64}$. \\
\end{proof}
\end{document}