\documentclass[10pt]{article}
\usepackage{amsfonts,amsthm,amsmath,amssymb}
\usepackage{array}
\usepackage{epsfig}
\usepackage{fullpage}
\usepackage{amssymb}
\usepackage[colorlinks = false]{hyperref}
\newcommand{\1}{\mathbbm{1}}
\DeclareMathOperator*{\argmin}{argmin}
\DeclareMathOperator*{\argmax}{argmax}
\newcommand{\x}{\times}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\Q}{\mathbb{Q}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\E}{\mathop{\mathbb{E}}}
\renewcommand{\bar}{\overline}
\renewcommand{\epsilon}{\varepsilon}
\newcommand{\eps}{\varepsilon}
\newcommand{\DTIME}{\textbf{DTIME}}
\renewcommand{\P}{\textbf{P}}
\newcommand{\SPACE}{\textbf{SPACE}}
% CN
\usepackage{color}
\newcommand{\Cnote}[1]{{\color{red}[CN: #1]}}
\begin{document}
\input{preamble.tex}
\newtheorem{example}[theorem]{Example}
\theoremstyle{definition}
\newtheorem{defn}[theorem]{Definition}
\handout{CS 229r Essential Coding Theory}{Apr 22, 2020}{Instructor:
Madhu Sudan}{Scribe: Pratap Singh}{Lecture 23}
\section{Today}
\subsection{Admin}
\begin{itemize}
\item Optional, updated problem set 6 now out on Piazza.
\end{itemize}
\subsection{Coding Theory in Complexity/Cryptography}
Two major topic areas for today:
\begin{itemize}
\item Hardcore predicates for one-way permutations, using list decoding.
\item Worst-case to average-case reduction (example: matrix permanent), using a locally decodable code.
\end{itemize}
\section{Hardcore predicates for one-way permutations}
\subsection{One-way permutations}
\begin{defn}[One-way permutation]\label{def:owp}
A function $f : \{0,1\}^k \to \{0,1\}^k$ is a \emph{one-way permutation} iff the following hold:
\begin{enumerate}
\item $f$ is a permutation: $\exists f^{-1}: \{0,1\}^k \to \{0,1\}^k$ s.t.\ $\forall x \in \{0,1\}^k, f^{-1}(f(x)) = x$.
\item $f$ is computable in polynomial time in the worst case.
\item $f^{-1}$ is ``very hard'' to compute on average.
\end{enumerate}
\end{defn}
We define ``very hard'' as follows:
\begin{defn}[$\epsilon(k)$-approximable]\label{def:eps_approx}
A function $g : \{0,1\}^k \to \{0,1\}^k$ is $\epsilon(k)$-approximable if there exists some polynomial-time computable algorithm $A$ s.t.\
$ \Pr_x\left[ A(x) = g(x) \right] \geq \epsilon(k) $.
\end{defn}
\begin{defn}[``Very hard'' function]\label{def:very_hard}
A function $f : \{0,1\}^k \to \{0,1\}^k$ is said to be ``very hard'' to compute if for every polynomial $p(k)$, $f$ is not $(1/p(k))$-approximable.
\end{defn}
\subsection{Hardcore predicates}
One-way permutations are relevant in cryptography--we could encrypt a message efficiently using an o.w.p., but an eavesdropper would not be able to recover the message from its encryption. But some o.w.p.s may ``leak'' information--some part of the message may be present unchanged in the output. Consider the following exercise:
\begin{exercise}\label{ex:padding}
Let $f : \{0,1\}^k \to \{0,1\}^k$ be an o.w.p., and let $F : \{0,1\}^{2k} \to \{0,1\}^{2k}$ be defined by $F(x,y) = (f(x),y)$. Prove that $F$ is an o.w.p.
\end{exercise}
\begin{proof-sketch}
Clearly, if $f$ is a permutation then $F$ is also a permutation, and if $f$ is computable in worst-case polynomial time then $F$ is also computable in worst-case polynomial time. In order to $\epsilon$-approximate $F^{-1}$ we would need to $\epsilon$-approximate $f^{-1}$, which by definition is not possible for any polynomial accuracy $\epsilon$; therefore $F^{-1}$ is not $\epsilon$-approximable for any polynomial epsilon.
\end{proof-sketch}
The $F$ defined in Exercise~\ref{ex:padding} is an o.w.p., but the last $k$ bits of the message are visible in the last $k$ bits of the output. This is not much good for encryption. In general, we want a way of discussing what information abut $x$ can be gleaned just by looking at $f(x)$. This gives us the following definition.
\begin{defn}[Hardcore predicate]\label{def:hard_pred}
For some function $f : \{0,1\}^k \to \{0,1\}^k$, a \emph{hardcore predicate} is some function $b$ that satisfies the following properties:
\begin{itemize}
\item $b$ is a predicate: $b : \{0,1\}^k \to \{0,1\}$.
\item $b$ is ``hardcore'':
\begin{enumerate}
\item[1.\phantom{$'$}] $b$ is computable in polynomial time given $x$.
\item[2.\phantom{$'$}] $b$ is hard to compute given $f(x)$.
\item[$2'$.] It is ``very hard'' to compute $b(x)$ given $f(x)$.\\ Specifically, for every polynomial $p$, $b \circ f^{-1}$ is not $\left(\frac{1}{2} + \frac{1}{p(k)}\right)$-approximable.
\end{enumerate}
\end{itemize}
\end{defn}
Note that property $2'$ implies property $2$ in Definition~\ref{def:hard_pred}. Since $b$ is a predicate, one of the constant 0 or constant 1 functions would be a $1/2$-approximator; hence we require that we cannot do any better than a $1/2$-approximation.
\begin{exercise}\label{ex:hardcore_implies_owp}
Suppose $b$ is a hardcore predicate for some permutation $f$. If $f$ is easy (poly-time computable), show that $f$ is an o.w.p. (This should be immediate from definitions.)
\end{exercise}
\begin{proof-sketch}
Immediate from definitions. If $b$ is hardcore for $f$, then since $b$ is computable in poly-time (property 1 from Definition~\ref{def:hard_pred}) but $b \circ f^{-1}$ is not poly-time approximable (property 2 from Definition~\ref{def:hard_pred}), it cannot be the case that $f^{-1}$ is poly-time approximable. If $f^{-1}$ were poly-time approximable, we could approximate $b \circ f^{-1}$ in the obvious way. Therefore, since $f$ is easy but $f^{-1}$ is hard, $f$ is an o.w.p.
\end{proof-sketch}
\subsection{Motivation: PRGs from hardcore predicates and o.w.p.s}
We shall see that it is possible to create pseudorandom generators from hardcore predicates and o.w.p.s. For this class, we will use a much stronger cryptographic definition of PRG than last class: a PRG that can ``fool every polynomial-time function to $\epsilon$''.
\begin{definition}[Cryptographic PRG]\label{def:prg}
A function $G : \{0,1\}^n \to \{0,1\}^m$ is a \emph{pseudorandom generator} iff $m > n$ and $\forall$~polynomial-time functions $A$, $\forall$~polynomials $P$, $\forall x$,
$$ A(G(x)) \approx_{\frac{1}{P(n)}} A(\mathrm{Unif}(\{0,1\}^m)) $$
where $\mathrm{Unif}(\{0,1\}^m)$ is the uniform distribution over $\{0,1\}^m$.
If $\frac{1}{P(n)} = \epsilon(n)$, we say that $G$ is an $\epsilon$-PRG.
\end{definition}
We can generate a PRG from an o.w.p.\ and a hardcore predicate, as follows.
\begin{theorem}[PRG from o.w.p. and hardcore predicate]\label{thm:prg_from_owp}
Suppose $f : \{0,1\}^k \to \{0,1\}^k$ is an o.w.p.\ and $b$ is hardcore for $f$. Then
$$ G(x) = (b(x), b(f(x)), b(f^2(x)), \dots, b(f^{m-1}(x)) $$ is a PRG (for $m > k$).
\end{theorem}
\begin{proof}
We omit many of the details, but the general structure is as follows. Let $G_i(x) = (b(x), G_{i-1}(f(x)))$, $G_0(x) = x$. First, show that if $b$ is hardcore for $f$, $G_1$ is an $\epsilon$-PRG. Second, show that if $G_1(x)$ is an $\epsilon$-PRG, then $G_m$ is an $m \cdot \epsilon$-PRG.
\end{proof}
\begin{exercise}\label{ex:prg_from_owp}
Fill in the details of the proof of theorem~\ref{thm:prg_from_owp}.
\end{exercise}
\begin{proof-sketch}
We first show that $G_1(x) = (b(x), f(x))$ is a PRG. For contraposition, assume $G_1$ is not a PRG and there exists some algorithm $B$ that distinguishes it from the uniform distribution. In this case we could break property $2'$ of Definition~\ref{def:hard_pred} as follows. Suppose we want to compute $b \circ f^{-1}$ on some input $y$. We compute $A(1, y)$, which will tell us if $(1,y)$ is a uniform random string or the output of $G_1$. If it is the output of $G_1$, we know that $b\circ f^{-1}(y) = 1$; otherwise, $b\circ f^{-1}(y)$ must be $0$ and we can output $0$. Since $A$ is poly-time computable and gives us advantage $\epsilon$, we can thus poly-time approximate $b\circ f^{-1}$ with advantage $\epsilon$.
We can use a similar argument to show that if $G_1$ is a PRG, the $m$-bit construction of $G$ is not a PRG. Suppose we can distinguish $G$ from the uniform distribution using algorithm $A$. Now, consider $h(x) = (U_1, b(f(x)), b(f^2(x)), \dots, b(f^{m-1}(x))$, where $U_1$ is a single uniform bit. If we can distinguish $h(x)$ from $G(x)$ then we are clearly able to break the PRG property of $G_1$, since we would then be able to compute $b(x)$ given $f(x)$ (as described above). We can use the algorithm $A$ to distinguish $h(x)$ from $G(x)$ by applying $A$ on $m$ samples from $h$. Note also that this argument can extend to $t$ uniform bits at the start of the message, thus giving us the required inductive argument. Each step reduces the advantage of the PRG by $\epsilon$, so that if $G_1$ is an $\epsilon$-PRG, then $G$ is an $(m \cdot \epsilon)$-PRG.
\end{proof-sketch}
\subsection{A hardcore predicate for (many) o.w.p.s}
We would like to be able to generate hardcore predicates for every o.w.p. At present, specific examples are known, such as $f = $ RSA and $b = $ most significant bit. But these constructions are not known to generalize nicely; for example, $f = $ RSA and $b = $ least significant bit. The ideal result would be if there was one $b$ that is hardcore for every o.w.p., but this is unlikely to be the case. Today we will get close by using a list-decodable code.
\begin{theorem}[Hardcore predicate for many o.w.p.s]\label{thm:hardcore_owps}
For all $k$, there exists some $m > k$ and some function $b : \{0,1\}^m \to \{0,1\}$ such that for every o.w.p.\ $f : \{0,1\}^k \to \{0,1\}^k$, there exists some o.w.p.\ $F : \{0,1\}^m \to \{0,1\}^m$ that is a padding of $f$ (in the sense of exercise~\ref{ex:padding}) and such that $b$ is hardcore for $F$.
\end{theorem}
\begin{proof}
We first give the $m$ and $b$, then prove that the theorem holds. We use some code $C : \{0,1\}^k \to \{0,1\}^n$ which is efficiently encodable and list-decodable from $(\frac{1}{2} - \epsilon)$ fraction of errors. Let $m = k + \log n$, $i \in [n]$. Then let $F(x, i) \triangleq (f(x), i)$, and let $b(x, i) = C(x)_i$.
We give a contrapositive proof of the theorem: suppose that $b$ is not hardcore for $F$, then show that $f$ is not an o.w.p. In particular, assume that there exists some poly-time computable $A$ s.t.\ $A(f(x), i) = C(x)_i$ with probability at least $\frac{1}{2} + \alpha$ over $x$ and $i$.
By Markov's inequality, we have that $$ \Pr_x \left[ \Pr_i \left[ A(f(x), i) = C(x)_i \right] \geq \frac{1}{2} + \frac{\alpha}{2} \right] \geq \frac \alpha 2. $$
(We apply Markov's inequality by thinking of $\Pr_i \left[ A(f(x), i) = C(x)_i \right]$ as a random variable over $[0,1]$, then bounding the tail.)
Let a particular $x$ be ``good'' if it satisfies the inner condition that $\Pr_i \left[ A(f(x), i) = C(x)_i \right] \geq \frac{1}{2} + \frac{\alpha}{2}$. We claim that the following algorithm will approximate the inverse of $f$.
\begin{itemize}
\item Input: $y = f(x)$ for some $x$.
\item for $i$ in $[n]$ let $w_i = A(y, i)$.
\item List-decode $w$ as $\{x_1, x_2, \dots, x_L\}$.
\item If $f(x_i) = y$, output $x_i$.
\end{itemize}
Now, if $x$ is ``good'' then $\Delta(w, C(x)) \leq \frac{1}{2} - \frac{\alpha}{2}$. In this case, list-decoding is successful and the inverter algorithm will output $x$. Since $x$ is good with probability at least $\alpha/2$, if $\alpha/2 \geq \epsilon$, then we can invert $f$ with probability at least $\epsilon$. Thus, if $b$ is $2\epsilon$-approximable, then $f$ is $\epsilon$-approximable. This gives us the contraposition we need. We also note that since we can list-decode $C$ in polynomial time (by assumption), our construction yields a polynomial-time algorithm.
\end{proof}
\section{Worst-case to average-case reduction}
In general it is (relatively) easy to reason about worst-case complexity of functions, and we have lots of results about worst-case complexity. But often we are more interested in the complexity of an ``average'' case, or the kind of case we are likely to encounter in applications. For example, SAT is a well-known NP-complete problem, for which the best known deterministic algorithms are exponential-time, but in practice there are many widely-used SAT solvers which can efficiently solve SAT for many instances.
In cryptography, average-case complexity is particularly important. If complexity measures the security of a particular encryption scheme, then it is not enough that there exists some pathological key which causes high complexity. We want a randomly chosen key to give high complexity, such that all (or most) keys will be secure.
We haven't been able to do worst-case to average-case reduction for NP as a whole; but note that the worst-case to average-case reduction is trivial for problems in P. We give an example of a non-trivial reduction below.
\subsection{The matrix permanent}
\newcommand{\Perm}{\mathrm{Perm}}
\begin{defn}[Matrix permanent]\label{def:perm}
$\forall M \in \F_q^{n\times n}$, the \emph{permanent} of $M$, $\Perm(M)$, is defined as:
$$ \Perm(M) = \sum_{\pi \in S_n} \prod_{i=1}^n M_{i \pi(i)} $$
where $S_n$ is the set of all permutations of $[n]$.
\end{defn}
Intuitively, the permanent is the determinant without sign. In the determinant calculation we multiply each permutation product by the sign of the permutation. Unlike the determinant, however, permanent is very hard to compute (over any field except $\F_2$, where $\Perm(M) = \det(M)$. In fact, $\Perm$ is $\#P$-complete, meaning that (in particular) $\#$SAT $\leq_p$ $\Perm$.
Notice that $\Perm$ is a degree-$n$ polynomial over $n^2$ variables, the $M_{ij}$. This is the key to the worst-case to average-case reduction.
\begin{theorem}[Worst-case to average-case for permanent]\label{thm:perm_reduc}
If $q > n^2$ and there exists some polynomial time algorithm $A$ s.t.\
$$ \Pr_{R \sim \F_q^{n\times n}} \left[ A(R) = \Perm(R) \right] \geq \frac{7}{8}, $$
then there exists some polynomial time algorithm $B$ s.t.\
$ \forall M \in \F_q^{n \times n}, B(M) = \Perm(M)$ w.p.\ $1 - o(1)$.
\end{theorem}
(Note the contraposition: we show that if the random case is ``easy'', then the worst case is ``easy''. Equivalently, if the worst case is ``hard'', then the random case is ``hard''.)
\begin{proof}
The idea of the proof is to consider calculating the matrix permanent as a local decoding problem. Suppose we list the permanents of every matrix in $\F_q^{n \times n}$ as a single vector $v_P$ of length $q^{n^2}$, as follows:
\begin{center}
\includegraphics[width = 0.8\textwidth]{fig1.pdf}
\end{center}
If we could access arbitrary elements of $v_P$, we would be done. Of course, we can't access arbitrary elements of this vector. The best we can do is construct arbitrary elements of the corresponding vector $v_A$ built using $A$ instead of $\Perm$:
\begin{center}
\includegraphics[width = 0.8\textwidth]{fig2.pdf}
\end{center}
Now, since $\Perm$ is jut a degree-$n$ polynomial in $n^2$ variables over $\F_q$, $v_P$ is a codeword in the Reed-Muller code RM$(q, n, n^2)$. So in order to construct $v_P$, we could just decode $v_A$ using the Reed-Muller list-decoding algorithm we have seen before. However, this would not be efficient since the codewords have size exponential in $n$. Instead, we can use the local decoding algorithm for RM codes.
We can specify the algorithm $B$:
\begin{itemize}
\item Input: $M \in \F_q^{n \times n}$
\item Choose $R$ uniformly at random in $\F_q^{n \times n}$.
\item Choose $\{\alpha_1, \alpha_2, \dots, \alpha_{10n}\}$ distinct from $\F_q \setminus \{0\}$.
\item For each $i \in [10n]$, let $\beta_i = A(M + \alpha_i R)$.
\item Reed-Solomon decode $\{(\alpha_i, \beta_i)\}_i$ to polynomial $P(\alpha)$.
\item Output $P(0)$.
\end{itemize}
This algorithm effectively follows the pattern of the RM local decoding algorithm we have seen before. We now prove it correct.
Let $g(t) \triangleq \Perm(M + t R)$; then we want to output $g(0)$. Note that by definition of $A$, $\Pr_M[\beta_i = g(\alpha_i)] \geq 7/8$ for every $i$. This then implies that the probability of getting more than a $1/4$ fraction of incorrect $\beta_i$ is at most $1/4$. But this inner condition is just the condition for the RS decoding algorithm to work. So with probability at least $3/4$, we will output $\Perm(M)$.
\end{proof}
The lower bound of $7/8$ on the probability that $A$ succeeds can be improved to $1/2 + \epsilon$ by using a more careful decoding and reduction. Further, we can improve to $1/poly(n)$ by using a concept called ``local list decoding'' which is not discussed in this course.
The above is an of a general class of theorem due to Sudan, Trevisan, and Vadhan~\cite{stv01}. An example of this is (roughly) that if there exists some $f \in TIME(2^{O(n)})$ but $f \notin P_{/poly}$, then there exists some $F \in TIME(2^{O(n)})$ that is ``very hard'' relative to $P_{/poly}$. The proof of this result relies again on locally list-decodable codes.
\begin{thebibliography}{9}
\bibitem{stv01}
Madhu Sudan, Luca Trevisan, and Salil Vadhan.
``Pseudorandom Generators without the XOR Lemma.''
In: \textit{J. Computer and System Sciences} 62.2 (March 2001).
\textsc{issn:} 0022-0000. \textsc{doi:} \url{10.1006/jcss.2000.1730}.
\textsc{url:} \url{https://doi.org/10.1006/jcss.2000.1730}.
\end{thebibliography}
\end{document}