\documentclass[11pt]{article}
\usepackage{amsfonts,amsthm,amsmath,amssymb}
\usepackage{array}
\usepackage{epsfig}
\usepackage{fullpage}
\usepackage{color, soul}
\usepackage{comment}
\usepackage{enumitem}
\usepackage[
n,
advantage,
sets,
adversary,
landau,
probability,
notions,
logic,
ff,
mm,
primitives,
events,
complexity,
asymptotics,
keys]{cryptocode}
%\setlength\parindent{0pt}
\newcommand{\1}{\mathbbm{1}}
\DeclareMathOperator*{\argmin}{argmin}
\DeclareMathOperator*{\argmax}{argmax}
\newcommand{\x}{\times}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\Q}{\mathbb{Q}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\E}{\mathop{\mathbb{E}}}
\renewcommand{\bar}{\overline}
\renewcommand{\epsilon}{\varepsilon}
\newcommand{\eps}{\varepsilon}
\newcommand{\Aen}{A_\eps^{(n)}}
\newcommand{\DTIME}{\textbf{DTIME}}
\renewcommand{\P}{\textbf{P}}
\newcommand{\SPACE}{\textbf{SPACE}}
\newcommand*{\op}[1]{\operatorname{#1}}
\newcommand{\mcT}{\mathcal{T}}
\newcommand{\SD}{\textsc{SD}}
\newcommand{\GI}{\textsc{GI}}
\newcommand{\GNI}{\textsc{GNI}}
\newcommand{\SZK}{\textsc{SZK}}
\begin{document}
\input{preamble.tex}
\handout{CS 229r Information Theory in CS}{{\bf April 18, 2019}}{Instructor: Madhu Sudan}{Scribe: Jayshree Sarathy}{Lecture 22}
\section{Administrative Notes}
\begin{itemize}
\item Project check-in on Canvas and email or in person. Look at Piazza post for details.
\item Project presentations should be less than 15 minutes each, including the time for transitions, Q\&A, and projector trouble.
\item Think carefully about what really needs to be in the presentation (ie. proofs should be skippable) and the division within the team.
\item Try to link your presentations to the course material.
\end{itemize}
\section{Today's Agenda}
\begin{itemize}
\item Distributions in theoretical CS
\item Sampleable distributions
\item Statistical distance problem
\item Statistical distance reduces to its complement
\item Motivation and context for statistical distance problem
\end{itemize}
\section{Distributions in TCS}
The story of derandomization and randomness extraction starts with Von Neumann \cite{vonneumann}. He considered the question: how can we conduct an unbiased coin toss using a biased coin? For example, consider the following biased coin that outputs
$$
\begin{cases}
\text{Heads} & \text{w.p.}~~ $p$ \\
\text{Tails} & \text{w.p.}~~ $1-p$
\end{cases}
$$
where $p$ is unknown.
The insight is that the event of getting heads and tails on consecutive tosses has probability $p (1-p)$, and the event of getting tails and heads on consecutive tosses also has probability $(1-p)p$. Thus, we can toss the biased coin repeatedly, only recording when we get one of these two events, which gives us an unbiased outcome.
Next, Blum \cite{blum} considered the problem of generating a large number of independent bits from a Markovian source. Instead of the strong independence in the previous problem, the independence here is weaker, as we have some hidden state.
Santhai and Vazirani \cite{santhai-vazirani} took this idea further to consider ``slightly random sources."
$$ X_n | X_1, \ldots, X_{n-1} $$
Finally, Srinivasan and Zuckerman \cite{sz} considered min-entropy sources. Min-entropy is similar to the Shannon entropy, except that we replace the expectation with a minimum.
$$ H_\infty(x) \triangleq \min_{w \in \{0, 1\}^n} \log \frac{1}{\Pr[x=w]} $$
Since the expectation of a random variable is always greater than or equal to its minimum, we have that
$$ H_\infty(x) \leq H(x) $$
Even if $H(x)$ is large, it might be insufficient for randomness extraction. For example, if we define $x$ to be
$$ x =
\begin{cases}
y & \text{w.p.}~~ $1/2$ \\
\text{Unif}(\Omega) & \text{w.p.}~~ $1/2$
\end{cases}
$$
then the entropy is high, but the min-entropy is low, and thus $x$ is not conducive for extracting randomness.
\begin{exercise} Show a second example of a random variable or distribution for which the min-entropy is low but the entropy is high.
\end{exercise}
Therefore, Srinivasan and Zuckerman consider min-entropy sources where
$$ H_\infty (x) \geq \epsilon \cdot n $$
The ideas of randomized algorithms we have discussed are also used in cryptography, distributed algorithms, and game theory. For example, in cryptography, randomness is the key idea that allows us to encrypt a message with low entropy, such as a single bit. In distributed algorithms, it is shown that randomness is required for $n$ players to select a leader amongst themselves.
\section{Sampleable Distributions}
To define sampleable distributions, we consider Boolean circuits that take as input random bits and encode a sampling algorithm.
\begin{definition}[Distributions encoded as circuits]
Let $C: \{0,1\}^m \rightarrow \{0,1\}^n$ be a circuit with $m$ inputs and $n$ outputs, where $m, |C| = \textrm{poly}(n)$. The distribution associated with $C$ is $Z = C(Y)$ when $Y \sim \textrm{Bern}(1/2)^m$.
\end{definition}
\noindent Note that circuits can be evaluated in time polyonomial to their size and express general computations. Since the circuits defined above are polynomial in size, they capture the notion of efficiently sampleable distributions.
% Note that while we started with pure randomness in the inputs, there can be all kinds of correlations between bits of $Z$.
\section{Statistical Distance}
\begin{definition}[Statistical Difference] If $P$ and $Q$
are distributions (or random variables) supported on $\Omega$, the statistical difference between $P$ and $Q$ is defined in two ways.
\begin{align*}
\delta(P, Q)
&\triangleq \frac{1}{2} \sum_{w \in \Omega}|P(w) - Q(w)| \\
&\triangleq \max_{T:\Omega \rightarrow \{0,1\}} E_{X \sim P}[T(x)] - E_{x \sim Q}[T(Y)]
\end{align*}
\end{definition}
\noindent The statistical difference problem is the problem of deciding whether a pair of efficiently sampleable distributions is statistically close or far, as measured by statistical difference. The parameters for the problem are $c$ and $f$, where $0 \leq c \leq f \leq 1$, which are used to define the sets \textsc{CLOSE} and \textsc{FAR}.
\begin{align*}
\textsc{CLOSE}^c &= \{(c_1, c_2) : \delta(c_1, c_2) \leq c\} \\
\textsc{FAR}^f &= \{(c_1, c_2) : \delta(c_1, c_2) > f\}
\end{align*}
\begin{definition}[Statistical Difference Problem]
Given input $(c_1, c_2) \in \textsc{CLOSE}^c \cup \textsc{FAR}^f$, the statistical difference problem $\textsc{SD}^{c,f}$ is to decide whether the input is in $\textsc{CLOSE}$ or whether it is in $\textsc{FAR}$.
\end{definition}
How hard is it to solve $\textsc{SD}^{1/3, 2/3}$? We don't know how to solve it efficiently. Only recently have we found a near-polytime algorithm for the problem of Graph Isomorphism (GI), which is contained in $\textsc{SD}^{1/3, 2/3}$. However, we suspect that there do not exist efficient algorithms for $\textsc{SD}^{1/3, 2/3}$.
Next, we ask whether we can amplify $\textsc{SD}^{1/3, 2/3}$, which has relatively close statistical difference, into $\textsc{SD}^{2^{-\sqrt{n}}, 1-2^{-\sqrt{n}}}$, which has very large statistical distance? The answer is yes, from a result by Sahai and Vadhan in $2000$. However, it is still an open problem whether we can amplify $\textsc{SD}^{.49, .5}$ into $\textsc{SD}^{1/3, 2/3}$. A good reference for this is the survey by Goldreich and Vadhan.
\begin{definition}[Complement of Statistical Difference Problem]
$\bar{\textsc{SD}}^{c, f}$ returns NO if $(c_1, c_2)$ are close and YES if they are far.
\end{definition}
It turns out that $\textsc{SD} = \bar{\textsc{SD}}$.
\begin{theorem}[$SD = \bar{\textsc{SD}}$] There exists a poly-time reduction $R$ such that, when given $(c_1, c_2)$, returns $(d_1, d_2)$ such that
\begin{itemize}
\item if $(c_1, c_2)$ are close, then $(d_1, d_2)$ are far.
\item if $(c_1, c_2$ are far, then $(d_1, d_2)$ are close.
\end{itemize}
\end{theorem}
\section{Motivation and Context}
Where does the notion of statistical distance come up? One important motivation is from work in cryptography and zero-knowledge by Goldwasser, Micali, and Rackoff. They define a complexity class called statistical zero knowledge (\textsc{SZK}) and show that Graph Non-Isomorphism (\textsc{GNI}) is in \textsc{SZK}.
Before describing this result, we define Graph Isomorphism (\textsc{GI}) and Non-Isomorphism (\textsc{GNI}).
\begin{definition}[Isomorphic graphs] Let $G = (V, E)$ be an undirected graph, where $V = [n]$. Let $\pi: [n] \rightarrow [n]$ be a permutation on $V$, such that $\pi(G) = (V, E')$, where $E' = \{(\pi(u), \pi(v): (u, v) \in E)\}$. Let $H = (V, F)$ be a second graph that has the same vertex set as $G$. If there exists a $\pi$ such that $H = \pi(G)$, then $G$ and $H$ are isomorphic, denoted by $G \cong H$.
\end{definition}
\begin{definition}[GI and GNI] Graph isomorphism is the language $\textsc{GI} = \{G, H: G \cong H\}$. Graph non-isomorphism is the complement of $\textsc{GI}$.
\end{definition}
$\textsc{GI} \in NP$, so graph isomorphism can be proved easily; the certificate is simply the isomorphism, which can be checked efficiently. Graph non-isomorphism, however, is harder. There are no classical proofs known for $\textsc{GNI}$.
However, there is an efficient interactive proof for $\textsc{GNI}$, which relies on two observations. Let $S_G$ and $S_H$ be the set of graphs that are isomorphic to $G$ and $H$, respectively.
\begin{enumerate}
\item If $G \ncong H$, then $S_G$ is disjoint from $S_H$.
\item If $G \cong H$, then a uniform draw from $S_G$ is indistinguishable from a uniform draw from $S_H$.
\end{enumerate}
Thus, the interactive proof for GNI below tests whether the prover can distinguish between a uniformly selected draw from $S_G$ versus $S_H$. Note that we will switch notation, so that the inputs are $G_0, G_1$ instead of $G, H$.\\
{\centering
\fbox{
\pseudocode{%
\textbf{Verifier} \< \< \textbf{Prover} \\
[0.1 \baselineskip][\hline]
\< \< \\
[-0.5 \baselineskip]
b \leftarrow_{\$} \{0, 1\} \< \< \\
\pi \leftarrow_{\$} \Sigma_n \< \< \\
K = \pi(G_b) \< \< \\
\< \sendmessageright*{K} \< \\
\< \< \textrm{If } G_0 \cong K, \\
\< \< \textrm{ set } \hat{b} = 0 \\
\< \< \textrm{Else, set } \hat{b} = 1 \\
\< \sendmessageleft*{\hat{b}} \\
\textrm{Accept if } \hat{b} = b. \< \< \\
\textrm{Otherwise, reject.}
}
}
\par}
\bigskip
Note that instead of testing all possible permutations, the verifier only has to test one randomly sampled permutation $\pi$.
Now, we informally outline three properties necessary to show that $\textsc{GNI} \in \textsc{SZK}$.
\begin{itemize}
\item Completeness: If $G_0 \ncong G_1$, then an unbounded prover can find $\hat{b} = b$, and the verifier will accept with probability $1$.
\item Soundness: If $G_0 \cong G_1$, then
$I(K; b|G_0, G_1) = 0$
which implies that $\{K | b=0\} \cong_d \{K | b=1\}$. The verifier should accept with probability negligibly close to $1/2$.
\item Zero-Knowledge: The verifier should learn nothing from the interaction other than the statement that $G_0 \ncong G_1$.
\end{itemize}
\begin{exercise}
Prove that this interactive \textsc{GNI} protocol fulfills the first two properties. Proving the third will require reading more on the formal definition zero-knowledge and simulator-based proofs.
\end{exercise}
We will now describe some main results in this area. First, $\SD^{1/2, 1/3} \in \SZK$, and is in fact $\SZK$-complete. Second, $\GNI \leq \bar{SD}^{0,1}$, and by a simple negation, $\GI \leq SD^{0,1}$. By the equivalence of $\SD$ and $\bar{\SD}$, the first result implies that $\bar{\SD}^{1/2, 1/3}$ also has an $\SZK$ proof. Since $\GI \in \SD^{1/3, 2/3}$, $\GI$ has an $\SZK$ proof as well.
\begin{exercise}
Show that $\bar{SD}^{2^{-\sqrt{n}}, 1-2^{-\sqrt{n}}}$ has an $\SZK$ proof. Remember that the zero-knowledge condition is approximate, so we allow for $2^{-\sqrt{n}}$ close to zero-knowledge.
\end{exercise}
\begin{thebibliography}{999}
\bibitem{GMR} Goldwasser, S., Micali, S. and Rackoff, C., 1989. The knowledge complexity of interactive proof systems. SIAM Journal on computing, 18(1), pp.186-208.
\bibitem{blum}
Blum, M., 1986. Independent unbiased coin flips from a correlated biased sourceāa finite state Markov chain. Combinatorica, 6(2), pp.97-108.
\bibitem{complete}
Sahai, A. and Vadhan, S., 2003. A complete problem for statistical zero knowledge. Journal of the ACM (JACM), 50(2), pp.196-249.
\bibitem{santhai-vazirani} Santha, M. and Vazirani, U.V., 1986. Generating quasi-random sequences from semi-random sources. Journal of computer and system sciences, 33(1), pp.75-87.
\bibitem{sz} Srinivasan, A. and Zuckerman, D., 1999. Computing with very weak random sources. SIAM Journal on Computing, 28(4), pp.1433-1459.
\bibitem{thesis}
Vadhan, S.P., 1999. A study of statistical zero-knowledge proofs (Doctoral dissertation, Massachusetts Institute of Technology).
\bibitem{vonneumann} Von Neumann, J., 1951. 13. various techniques used in connection with random digits. Appl. Math Ser, 12(36-38), p.5.
\end{thebibliography}
\end{document}