\documentclass[11pt]{article}
\usepackage{enumerate, graphicx}
\usepackage{fullpage}
\usepackage{amsmath,amsfonts,amssymb}
\usepackage{url}
\usepackage{bbm}
\usepackage{float}
\usepackage{framed}
\usepackage{enumerate}
\usepackage{color}
\usepackage[usenames,dvipsnames,svgnames,table]{xcolor}
\usepackage[colorlinks=true, linkcolor=red, urlcolor=blue, citecolor=blue]{hyperref}
\usepackage{xspace}
\newcommand{\bs}[1]{\boldsymbol{#1}}
\newcommand{\norm}[1]{\|#1\|}
\newcommand*\xor{\oplus}
\newcommand{\fancy}[1]{\textup{\fontencoding{T1}\fontfamily{ppl}\fontseries{m
}\fontshape{n}\selectfont #1}\xspace}
\def\etal{\text{et al.}\xspace}
\def\cc{\fancy{cc}}
\def\script#1{\mathcal{#1}}
\def\sA{\script{A}}
\def\sB{\script{B}}
\def\sC{\script{C}}
\def\sE{\script{E}}
\def\sI{\script{I}}
\def\sL{\script{L}}
\def\sM{\script{M}}
\def\sP{\script{P}}
\def\sQ{\script{Q}}
\def\sR{\script{R}}
\def\sT{\script{T}}
\begin{document}
\input{preamble.tex}
\lecture{9}{Feb 23, 2016}{Madhu Sudan}{Ali Vakilian}
%%%% body goes in here %%%%
In this lecture we explain the \fancy{Communication Complexity (CC)} model introduced by Yao~\cite{Yao79} and motivate the model with some well-studied examples. Furthermore, we define the probabilistic variant of \fancy{CC} model and at the end describe some the general techniques such as \fancy{Rank, Discrepancy and Information theory} for proving lower bounds in this model.
\section{Basic Concepts and Definitions}
\paragraph{What's the goal in \fancy{Communication Complexity}?} Intuitively speaking, the goal of this model is to study the problems in which two (or more) players each has some input and they want to compute some pre-specified function $f$ on their inputs. However, each of the players is only aware of its own input and (usually) does not know much about the other player's input. The goal is to give upper/lower bound on the \emph{size of message} (in bits) they have to communicate in order to evaluate the function $f$ over their inputs. More precisely, there are two players: Alice and Bob. Alice has input $x$ and Bob has input $y$ (see Figure~\ref{fig:cc-model}). Alice and Bob want to compute $f(x,y)$ together with communicating the fewest possible number of bits (which works for all $x,y$ in the input domain).
\begin{figure}[h]
\centering
\includegraphics[height=1.15in]{figs/cc-model.pdf}
\caption{Alice has input $x$ and Bob receives input $y$ and the goal is to compute $f(x,y)$.}
\label{fig:cc-model}
\end{figure}
A more interesting question arises in the probabilistic variant of \fancy{CC} in which we assume that Alice and Bob are allowed to use \emph{randomized} protocols and at the end they need to output $f(x,y)$ correctly with probability $(1-\epsilon)$. In general we consider the following variants in \fancy{CC}.
\begin{itemize}
\item{\fancy{Deterministic Communication Complexity:} As described above, the protocols of Alice and Bob are \emph{deterministic} and at the end they need to compute $f(x,y)$ correctly (no error is allowed). $\cc_{0}(f)$ denotes the minimum number bits that is required for computing $f$ in this model.}
\item{\fancy{Private Randomized Communication Complexity:} In this model each of the players have some \emph{private} random bits and their protocols are randomized (based on their private random bits) and at the end they must compute $f(x,y)$ correctly with probability $1-\epsilon$ (probability is over random bits). Similarly, $\cc_{\epsilon}(f)$ denotes the minimum number bits that is required for computing $f$ in this model. }
\item{\fancy{Public/Shared Randomized Communication Complexity:} In this model, in addition to the private random bits, Alice and Bob are allowed to share some public random bits which are \emph{independent} of their input $x,y$. As before, Alice and Bob are required to compute $f(x,y)$ correctly with probability at least $1-\epsilon$ and their protocol can be randomized on both their private random bits and shared random bits. Here, $\fancy{Priv-cc}_{\epsilon}(f)$ denotes the minimum number of bits that is required for computing $f$. }
\end{itemize}
Note that it is straightforward to show that for any function $f$, $\cc_{0}(f) \geq \cc_{\epsilon}(f) \geq \fancy{Priv-}\cc_{\epsilon}(f)$.
\paragraph{Examples:} In the following we compute $\cc_0$ of different functions $f:\set{0,1}^n \times \set{0,1}^n \rightarrow \set{0,1}$.
\begin{itemize}
\item{The trivial upper bound we can give for any arbitrary function $f$ is $\cc_{0} \leq n+1$. Alice sends $x$ to Bob ($n$ bits). Then Bob computes $f(x,y)$ and send it ($1$ bit) to Alice. In particular, for many functions $f$, this naive approach is the best we can do.}
\item{$\fancy{Parity}(x,y) = \xor^{n}_{i=1} (x_i \xor y_i)$:} Alice sends $\xor^n_{i=1}x_i$ to Bob (parity of her bits string which is a single bit) and then Bob computes $\fancy{Parity}(x,y)$ and return the solution to Alice. Thus $\cc_{0}(\fancy{Parity}) \leq 2$.
\item{$\fancy{EQ}(x,y) = 1$ if $x=y$ and $0$ otherwise:} It is known that $\cc_{0}(\fancy{EQ}) =n+1$ and in this lecture we prove that $\cc_{0}(\fancy{EQ}) \geq n$.
\end{itemize}
\noindent
For proving lower bounds in this model, it is crucial to specify the model \fancy{CC} model carefully. Here, we give a formal description of protocols in \fancy{Deterministic CC} which is due to Yao \cite{Yao79}.
\paragraph{Problem)} $f:\set{0,1}^n \times \set{0,1}^n \rightarrow \set{0,1,?}$ (In many cases we are only interested in computing $f$ over some parts of the domain and for the rest we do not care about the output of $f$. Here ``$?$'' denotes the output to the parts we do not care. However, in this lecture we do not work with ``$?$''.)
\paragraph{Solution)} Or \textbf{protocol} specifies at each step \emph{who} speaks and \emph{what} says. A $k$-bit protocol $\Pi = (\Pi_1,\cdots,\Pi_k)$ can be specified as follows:
\begin{itemize}
\item{$\Pi_i = (z_i, f_i)$ where $z_i \in \set{x,y}$ and $f_i(z_i,b_1,\cdots,b_{i-1}) \in \set{0,1}$ where $b_j$ denote the $j^{\mathrm{th}}$ communicated between Alice and Bob. Let $g_A$ and $g_B$ be respectively the output function of Alice and Bob.}
\begin{itemize}
\color{blue}\item{In randomized \fancy{CC} models, $z_i\in \set{(x,R_A,R), (y,R_B,R)}$ where $R_A$, $R_B$ and $R$ respectively denote the random bits of Alice, Bob and the shared random bits.\color{black}}
\end{itemize}
\end{itemize}
\noindent
We also need to specify the \emph{correctness} and \emph{efficiency} of a given protocol.
\paragraph{Correctness)} If $\forall i, b_i=f_i(z_i,b_i\cdots,b_{i-1})$ then $\forall x,y:$ $g_{A}(x,b_1,\cdots, b_k) = g_B(y,b_1,\cdots,b_k) = f(x,y)$.
\begin{itemize}
\color{blue}\item In randomized \fancy{CC} models, the condition is if $\forall i, b_i=f_i(z_i,b_i\cdots,b_{i-1})$ then $\forall x,y:$ $\fancy{Pr}[g_{A}(x,b_1,\cdots, b_k)$ $= g_B(y,b_1,\cdots,b_k) = f(x,y)] \geq 1-\epsilon$.
\color{red}\item Another \fancy{CC} model which is widely used in the literature is \fancy{One-way CC}. In this model the correctness condition is relaxed and we only require Bob to output $f(x,y)$. More precisely, the correctness in this model is if $\forall i, b_i=f_i(z_i,b_i\cdots,b_{i-1})$ then $\forall x,y:$ $\fancy{Pr}[g_B(y,b_1,\cdots,b_k) = f(x,y)] \geq 1-\epsilon$.\color{black}
\end{itemize}
\paragraph{Efficiency)} is defined as the length of the protocol. Here it is $k$.
\section{Communication Complexity of \fancy{EQ}}
In this section we focus on giving the promised lower bound for $\cc_0(\fancy{EQ})$ via a \emph{matrix} representation of the problem. Any function $f$ (where $f:\set{0,1}^n \times \set{0,1}^n \rightarrow \set{0,1}$) can be shown by a $2^n \times 2^n$ matrix $M_f$ whose rows correspond to the set of all possible inputs of Alice and columns correspond to all possible value of $y$ and $M_f[x_i, y_j] = f(x_i,y_j)$.
\begin{definition}[Rectangle] A (combinatorial) rectangle in $\set{0,1}^n \times \set{0,1}^n$ is a subset $R \subseteq \set{0,1}^n \times \set{0,1}^n$ such that $R = A \times B$ for some $A \subseteq \set{0,1}^n$ and $B \subseteq\set{0,1}^n$.
\end{definition}
A useful observation in the matrix view of $f$ in \fancy{CC} model is that at the end of $\Pi_i$, $M_{f}$ is partitioned into a set of rectangles (at the beginning $M_f$ is the only rectangle) and as Alice (Bob) communicates the next bit each existing rectangle may partition into at most two (combinatorial) rectangles (be careful about the definition of rectangles) by horizontal (vertical) lines. Let $\sR_i$ be the set of the rectangles at the end of $\Pi_i$.
Then for all values of $x$ and $y$ that lie in a rectangle $R \in \sR_i$, the set of communicated bits is the same and if the protocol has to output at after $\Pi_i$, the output would be the same for all such $x,y$ (see Figure~\ref{fig:chromatic_rec}).
The rectangle for which the output is the same are called \emph{monochromatic} rectangles (Figure~\ref{fig:chromatic_rec}(b)).
\begin{figure}[h]
\centering
\includegraphics[height=2.5in]{figs/chromatic_rec.pdf}
\caption{$(a)$ shows the set of communicated bits in each rectangle and $(b)$ is the value of $f$ at each rectangle.}
\label{fig:chromatic_rec}
\end{figure}
The transcript of a protocol $\Pi$ on $(x,y)$ is defined as $t(x,y):=(b_1,\cdots,b_k)$, the set of communicated bits given $(x,y)$. Then we have the following result which formalizes what we described above.
\begin{claim}
For all $t \in \set{0,1}^k$, there exists $R = S\times T$ where $S\subseteq \set{0,1}^n$ and $T \subseteq\set{0,1}^n$ such that $t(x,y) = t_R \iff (x,y)\in R$.
\end{claim}\label{clm:rectangle}
\begin{proof}
By induction.
\end{proof}
Claim~\ref{clm:rectangle} implies that if $t(x,y)=t(x',y')$ then $t(x',y) = t(x,y')$. Since $g_A$ ($g_B$) only depends on $t(x,y)$ and $x$ ($y$), correctness condition implies that the output all points in a rectangle of $R_k$ have value $r_{k} \in\set{0,1}$.
\paragraph{Conclusion)} $\Pi = (\Pi_1, \cdots, \Pi_k)$ partitions $M_f$ into $2^k$ monochromatic rectangles.
\paragraph{Algebraic view of monochromatic rectangles.} If the number of monochromatic rectangles in $M_f$ is $2^k$ ($R_1,\cdots,R_{2^k}$) then $\fancy{rank}(M_f)$ is at most $2^k$. The reason is that we can write $M_f$ as sum of $2^k$ matrices $M_1,\cdots,M_{2^k}$ such that $M_i[x,y] =0$ if $(x,y) \notin R_i$ and $r_i$ if $(x,y)\in R_i$ where $r_i$ is the value of a cell in $R_i$. It is straightforward to verify that $\fancy{rank}(M_i)\le 1$; hence, $\fancy{rank}(M_f)$ is at most $2^k$.
This implies that $\fancy{rank}(M_f) \leq 2^{\cc_0(f)} \iff \cc_0(f) \geq \log(\fancy{rank}(M_f))$. This observation is particularly useful in proving lower bounds for many problems in \fancy{Deterministic CC}. As an application of \emph{rank} method, we show how to bound the number of required bits of communication of \fancy{EQ}.
In order to apply rank method, first we need to figure out the matrix representation of \fancy{EQ}. $M_{\fancy{EQ}} = \mathbb{I}$ and it is straightforward to check that the minimum number of required monochromatic rectangles of any protocol $\Pi_{\fancy{EQ}}$ is $2^{n}$. Thus any protocol $\Pi_{\fancy{EQ}}$ requires at least $n$ bits of communications.
\begin{theorem}[\cite{Yao79}]\label{thm:lb-EQ}
$\cc_{0}(\fancy{EQ}) \geq n$.
\end{theorem}
\subsection{Randomized Protocol for \fancy{EQ}}
Using the following result from \emph{error correcting codes}, we can design an efficient protocol for \fancy{EQ} with randomness.
\begin{theorem}\label{thm:error-correcting-codes}
There exists a function $E:\set{0,1}^n \rightarrow \set{0,1}^n$ such that $\forall xy$, $E(x)$ and $E(y)$ disagree in at least ${n \over 10}$ coordinates.
\end{theorem}
\begin{figure}[h]
\centering
\includegraphics[height=1.5in]{figs/rand-eq.pdf}
\caption{(a) shows an efficient protocol for \fancy{EQ} with public randomness and $(b)$ shows the adoption of the same protocol with private randomness.}
\label{fig:rand_eq}
\end{figure}
\noindent
Consider the following protocol $\sP$ for \fancy{EQ} using an error correcting function $E$.
\begin{enumerate}
\item{Alice compute $E(x)$ and sends its $i^{\mathrm{th}}$ coordinate (which is determined by the public random bits) to Bob}
\item{Bob also computes $E(y)$ and sends its $i^{\mathrm{th}}$ coordinate to Alice.}
\item{They accept if their $i^{\mathrm{th}}$ coordinates agree.}
\end{enumerate}
\begin{remark}
If we want to work with private randomness, Alice has to communicate the random index she selected as well (which requires $O(\log n)$ bits of communication). In fact, the protocol $\sP$ gives the following bounds on communication complexity of \fancy{EQ}: $\cc_{\epsilon}(EQ) = O(1)$ and $\fancy{Priv-}\cc_{\epsilon}(O(\log n))$ where $\epsilon = O(1)$.
\end{remark}
\noindent
To show the correctness of $\sP$ with constant probability note that,
\begin{itemize}
\item{If $x=y$, \qquad $\fancy{Pr}[$Alice and Bob accept$] = 1$.}
\item{If $x\neq y$, \qquad $\fancy{Pr}[$Alice and Bob accept]$ < {9\over 10}$.}
\end{itemize}
We can decrease the failure probability by repeating the protocol several times.
%\noindent
So far we have proved lower bound on the number of required bits of communication for problems in \fancy{Deterministic CC} model. However, it is more interesting to obtain lower bounds that rule out certain bound for randomized protocols as well. The main tool for proving lower bounds of randomized protocols is via Yao's principle which is stated as follows.
Proving lower bound for the worst case performance of any randomized protocols solving $P$ is the same as proving lower bound for the performance of any deterministic protocol $P$ over a random instance. This leads to another variant of \fancy{CC} which is called \fancy{Distributional Communication Complexity} (see Section~\ref{sec:distributed-cc}).
\section{Set Disjointness Problem}
Another problem in \fancy{CC} which has lots of applications in other area of TCS such as streaming algorithms is \fancy{Set Disjointness (DISJ)} problem. In $\fancy{DISJ}_{n}$, each of Alice and Bob gets a subset of $[n]$ and their goal is to determine whether their sets intersect. More precisely, for $S\subseteq [n]$ and $T\subseteq [n]$, \fancy{DISJ}$(S,T)=1$ if $S\cap T \neq \emptyset$ and $0$ otherwise.
Proving lower bounds for the deterministic protocols of $\fancy{DISJ}_n$ is not so hard, however, a more challenging problem is to give a lower bound for the randomized protocols of the problem. One of the seminal results in \fancy{CC} is the following lower bound on $\cc_{\epsilon}(\fancy{DISJ}_n)$ which we will see its proof later in this course.
\begin{theorem}[\cite{ks-pccsi-92,r-dcd-92}]\label{thm:set-disj-rnd}
$\cc_{\epsilon}(\fancy{DISJ}_n) \geq \Omega(n)$.
\end{theorem}
Theorem~\ref{thm:set-disj-rnd} has in particular many applications in proving lower bounds for problems in \emph{streaming} setup. As an example, one of the well-known problems in the streaming setup is the \fancy{Frequent Item} problem in which given a data stream of items $\sE \subseteq [n]^m$, the goal is to output the most frequent item in the stream using the lowest possible memory space. Suppose there exists an algorithm $\sA$ for \fancy{Frequent Item} that returns the correct answer with probability $1-\epsilon$ and uses $o(n)$ space. Then we can come up with the following protocol $\sP$ for $\fancy{DISJ}_n$:
\begin{enumerate}[(a)]
\item Alice runs $\sA$ on her inputs, $S$, (as an stream) and then sends the memory state of $\sA$ to Bob.
\item Bob resumes $\sA$ on his input, $T$, from the state sent by Alice and sends to her the final output of $\sA$ and a bit stating whether Bob has the item or not.
\item Alice sends a bit to Bob stating whether she has the most frequent item or not.
\end{enumerate}
It is straightforward to check that the described protocol returns $2$ iff $S$ and $T$ intersect and $1$ otherwise. Thus $\sP$ solves $\fancy{DISJ}_{n}$ with probability at least $1-\epsilon$ using $o(n)$ bits of communication which is a contradiction. \begin{corollary}
Any (randomized) streaming algorithm of $\fancy{Frequent Item}$ that with probability at least $1-\epsilon$ returns a most frequent item requires $\Omega(n)$ space.
\end{corollary}
\section{Distributional Communication Complexity}\label{sec:distributed-cc}
In this setting we assume that inputs of both Alice and Bob come from a distribution over $\set{0,1}^n$: $(x,y) \sim \mu(\set{0,1}^n\times \set{0,1}^n)$.
\begin{definition} [\fancy{Distributional Communication Complexity}] The distributional communication
complexity of a function $f: [n]^2 \rightarrow \set{0, 1}$ over the distribution $\mu$ and with success probability at
least $1-\epsilon$, denoted by $\cc_{\mu,\epsilon}(f)$, is the cost of the best deterministic protocol that gives the correct
answer on at least $(1-\epsilon)$ fraction of all the inputs, weighted by $\mu$.
\end{definition}
\noindent
For any function $f$, we have the following relation between the $\cc_{\epsilon}$ and $\cc_{\mu, \epsilon}$.
\begin{proposition}
If $\cc_{\epsilon}(f) \leq k$, then $\cc_{\mu, \epsilon}(f) \leq k$.
\end{proposition}
\begin{proof}
If there exists a randomized protocol $\Pi$ for function $f$ with expected error at most $\epsilon$ (over $\mu$ and $R$), then there exists a deterministic protocol $\Pi'$ whose expected error is at most $\epsilon$ (over $\mu$).
\begin{align*}
\mathbb{E}_{R,(x,y)\sim \mu}[\fancy{Error}(f,\Pi,x,y,R)] \leq \epsilon \Rightarrow \exists r, {\tt s.t.}\quad \mathbb{E}_{(x,y)\sim \mu}[\fancy{Error}(f,\Pi,x,y,R=r)] \leq \epsilon.
\end{align*}
\end{proof}
\noindent
It is usually useful to work with matrix view of \fancy{Distributional CC}.
\begin{itemize}
\item{$f:\set{0,1}^n \times \set{0,1}^n \rightarrow \set{-1,1}$. For convenience, we change $0$ to $-1$ which you see that will help a lot!}
\item{$M_{\mu,f}(x,u) = \mu(x,y)f(x,y)$}
\end{itemize}
\begin{observation}
$\sum_{x,y}|M_{\mu,f}(x,y)|=1$.
\end{observation}
Consider (combinatorial) rectangles in $M_f$. By definition, $R_{S\times T}$ denotes the set of $(x,y)$ such that $x\in S$ and $y\in T$ and are all either $-1$ or $1$. Furthermore, we define matrix $M_{S,T}$ as follows: $M_{S,T}[x,y] = 0$ if $(x,y) \notin S\times T$ and $r$ otherwise where $r\in \set{-1,1}$.
\begin{definition}[Discrepancy function]
Let $f: \set{0,1}^n \times \set{0,1}^n \rightarrow \set{0,1}$ be a function and let $\mu$ be a probability distribution on $\set{0,1}\times\set{0,1}$. For rectangle $R$ in $M_f$, $\fancy{Disc}_{\mu,f}(R) = |\sum_{(x,y)\in R} M_{\mu,f}(x,y)|$. Moreover, the discrepancy of $f$ under $\mu$ is defined as the maximum discrepancy over all its rectangles, $\fancy{Disc}_{\mu}(f) = \max_{R \in\sR_f} \fancy{Disc}_{\mu,f}(R)$.
\end{definition}
\begin{lemma}\label{lem:error}
Error of protocol $\Pi$ on $f$ is $\epsilon$ iff $|\sR| \cdot \fancy{Disc}_{\mu}(f) \geq 1-2\epsilon$.
\end{lemma}
\begin{proof}
Let $\epsilon = \fancy{Error}(\Pi,f)$. Then,
$(1-\epsilon) -\epsilon = \mathbb{E}_{x,y}[\mu(x,y)\cdot \fancy{Pr}(\text{returns correctly on} (x,y))] - \mathbb{E}_{x,y}[\mu(x,y)\cdot \fancy{Pr}(\text{returns incorrectly on} (x,y))] \leq \sum_{R\in \sR}\fancy{Disc}_{\mu,f}(R) \leq |\sR| \cdot \fancy{Disc}_{\mu}(f).$
\end{proof}
\begin{lemma}\label{lem:error-disc}
If $\cc_{\mu,\epsilon}(f) \leq k$ then there exists a rectangle $R$ such that $\fancy{Disc}(M_{\mu,f},R) \geq 2^{-k}(1-2\epsilon)$.
\end{lemma}
\begin{proof}
It follows from Lemma~\ref{lem:error} and the fact that $|\sR| = 2^k$.
\end{proof}
\subsection{Inner Product Function}
In this part, we study the \fancy{Inner Product(IP)} function and prove a lower bound on its communication complexity with (private) randomness. \fancy{IP} function of two vectors $x,y$ is defined as $\fancy{IP}(x,y) = \xor_{i=1}^n (x_i \wedge y_i)$.
Using Lemma~\ref{lem:error-disc} and following lemma which will be shown in next lecture we obtain a lower bound on $\cc_{\epsilon}(\fancy{IP})$.
\begin{lemma}
For all $R$, $\fancy{Disc}(M_{\mathrm{uniform},\fancy{IP}},R) \leq 2^{-n \over 2}$.
\end{lemma}
\begin{thebibliography}{lect9}
\bibitem[KS92]{ks-pccsi-92}
B.~Kalyanasundaram and G.~Schintger.
\newblock The probabilistic communication complexity of set intersection.
\newblock {\em SIAM J. Discrete Math.}, 5(4):545--557, 1992.
\bibitem[Raz92]{r-dcd-92}
A.~A. Razborov.
\newblock On the distributional complexity of disjointness.
\newblock {\em Theo. Comp. Sci.}, 106(2):385--390, 1992.
\bibitem[Yao79]{Yao79}
A.~ C. Yao.
\newblock Some complexity questions related to distributive computing
(preliminary report).
\newblock In {\em Proceedings of the eleventh annual ACM symposium on Theory of
computing}, pages 209--213. ACM, 1979.
\end{thebibliography}
\end{document}