\documentclass[11pt]{article}
\usepackage{amsmath,amssymb,amsthm}
\DeclareMathOperator*{\E}{\mathbb{E}}
\let\Pr\relax
\DeclareMathOperator*{\Pr}{\mathbb{P}}
\newcommand{\eps}{\varepsilon}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbb{R}}
\newcommand{\handout}[5]{
\noindent
\begin{center}
\framebox{
\vbox{
\hbox to 5.78in { {\bf CS 229r: Algorithms for Big Data } \hfill #2 }
\vspace{4mm}
\hbox to 5.78in { {\Large \hfill #5 \hfill} }
\vspace{2mm}
\hbox to 5.78in { {\em #3 \hfill #4} }
}
}
\end{center}
\vspace*{4mm}
}
\newcommand{\lecture}[4]{\handout{#1}{#2}{#3}{Scribe: #4}{Lecture #1}}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{observation}[theorem]{Observation}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{assumption}[theorem]{Assumption}
% 1-inch margins, from fullpage.sty by H.Partl, Version 2, Dec. 15, 1988.
\topmargin 0pt
\advance \topmargin by -\headheight
\advance \topmargin by -\headsep
\textheight 8.9in
\oddsidemargin 0pt
\evensidemargin \oddsidemargin
\marginparwidth 0.5in
\textwidth 6.5in
\parindent 0in
\parskip 1.5ex
\begin{document}
\lecture{7 --- September 24, 2013}{Fall 2013}{Prof.\ Jelani Nelson}{David Liu}
\section{Overview}
In this lecture, we cover a randomized approximate algorithm to solve the DTM (distance to monotonicity) problem.
\section{Distance to Monotonicty}
For a given sequence $\{x_i\}$, the distance to monotonicity is the fewest number of elements we must remove from the sequence such that the remaining subsequence is an increasing subsequence. If we have $n$ elements in our sequence, this number is precisely $n-K$ where $K$ is the length of the longest increasing subsequence in our original sequence.
\subsection{DP algorithm}
Below is the DP algorithm to solve this problem. Essentially, we remember for each position $i$ in the sequence the fewest number of elements we can remove from $\{1,2,..., i-1\}$ such that we have a subsequence ending at $i$ that is an increasing subsequence.
\begin{enumerate}
\item Give each item $i$ a weight $w(i) = 1$
\item Prepend and postpend items of value $-\infty$ and $\infty$, with weights $w(0) = w(n+1) = \infty$
\item Initialize $R=\{0\}$, $W(0) = 0$, $s(0) = 0$. $W(t)$ will be just the total weight of the first $t$ elements.
\item For $t=1$ to $n+1$, do the following:
\begin{itemize}
\item Set $W(t) = W(t-1) + w(t)$
\item Set $s(t) = min\{s(i) + W(t-1) - W(i): i\in R, x[i]\leq x[t]\}$
\item Add $t$ to the set $R$
\end{itemize}
\item Output $s(n+1)$
\end{enumerate}
This algorithm uses $O(n)$ space.
\section{Streaming algorithms}
Suppose that for each $i$ that $x[i]\in [m]$. \\
A deterministic 1-pass streaming algorithm for the LIS (longest increasing subsequence) problem that gives a $(1+\epsilon)$ approximation to the solution and uses space $O(\sqrt\frac{n}{\epsilon}\log m)$ was demonstrated in a paper by Gopalan, Jayram, Krauthgamer, and Kumar \cite{GopalanTJ07}. Furthermore, in papers by Erg\"un and Jahari \cite{ErgunHJ08} and G\'al and Gopalan \cite{G‡lPG10} it was shown that any deterministic 1-pass algorithm needs $\Omega(\sqrt\frac{n}{\epsilon}\log(\frac{m}{n\epsilon}))$ space. An open question is whether there exists a polylogarithmic space algorithm for a 2-approximation for the LIS problem.
\section{DTM algorithm}
Below, we will present an 1-pass streaming algorithm discovered by Saks and Seshadri \cite{SaksCS13} that provides a $(1+\epsilon)$ approximation to the DTM problem, uses $O(\frac{log^2n}{\epsilon})$ space, and succeeds with probability $1 - \frac{1}{n^3}$.
\subsection{Description}
Essentially, the algorithm is the same as the one presented on the previous page, except now we sometimes forget values with a certain probability.
\begin{enumerate}
\item Give each item $i$ a weight $w(i) = 1$
\item Prepend and postpend items of value $-\infty$ and $\infty$, with weights $w(0) = w(n+1) = \infty$
\item Initialize $R=\{0\}$, $W(0) = 0$, $S(0) = 0$.
\item For $t=1$ to $n+1$, do the following:
\begin{itemize}
\item Set $W(t) = W(t-1) + w(t)$
\item Set $r(t) = min\{r(i) + W(t-1) - W(i): i\in R, x[i]\leq x[t]\}$
\item Add $t$ to the set $R$
\item For each $i\in R$, remove $i$ from $R$ with probability $1-p(i,t)$
\end{itemize}
\item Output $r(n+1)$
\end{enumerate}
It is clear that $r(n+1)$ (the output value for this algorithm) is always greater than or equal to $s(n+1)$ (the true optimal solution). Thus, we need to show the following about our algorithm -
\begin{enumerate}
\item With probability greater than $1 - \frac{1}{\text{poly } n}$, $r(n+1) \leq (1+\epsilon)s(n+1)$
\item With probability greater than $1-\frac{1}{\text{poly } n}$, $|R| \leq O(\frac{log^2 n}{\epsilon})$ at all times during the algorithm
\end{enumerate}
Write the above $1 - \frac{1}{poly n}$ as $1 - \frac{\delta}{2}$.
$p(i,t)$ is defined as follows. First, we define
\begin{equation*}
q(i,t) = \text{min}\{1, \frac{\epsilon}{1+\epsilon}\ln(\frac{4t^3}{\delta})\frac{w(i)}{W([i,t])}\}
\end{equation*}
We then define
\begin{equation*}
p(i,i) = 1
\end{equation*}
\begin{equation*}
p(i,t)_{t > i} = \frac{q(i,t)}{q(i, t - 1)}
\end{equation*}
Thus, this tells us that defining $R^t$ to be the set $R$ after $t$ steps, that $\Pr(i\in R^t) = q(i,t)$.
\subsection{Proof}
Let $C$ be the indices of some optimal solution to the LIS problem. We say that $i\in C$ is unsafe at time $t$ if the following is true:
\begin{equation*}
(C\cap[i,t])\cap R^t = \emptyset
\end{equation*}
The above is the same as saying that we have forgotten everything in $C$ between $i$ and $t$ at time $t$. \\
Let $U^t$ bet the set of unsafe indices at time $T$. Define
$$
U = \bigcup_{t=1}^{n+1}U_t\\
$$
By definition $U\subset C$ \\
\begin{lemma}
$r(n+1)\leq W(\overline{C}\cup U)$
\end{lemma}
Note: $\overline{C}$ is the complement of $C$\\
We induct to show that $r(t)\leq W(\overline{C}_{\leq t - 1}\cup U^{t-1})$, where $\overline{C}_{\leq t - 1} = \overline{C}\cap [1,t-1]$. The base case is clear as $r(1) = 0$. \\
For the inductive step, there are two cases. The first is that $U^{t-1} = C_{\leq t - 1}$, i.e. the case when everything is unsafe. In this case $W(\overline{C}_{\leq t - 1}\cup U^{t - 1}) = W([1, t - 1])\geq r(t)$. For the second case, $U^{t-1}\backslash C\neq\emptyset$. Choose $j\in C, j