\documentclass[10pt]{article}
\usepackage{amsfonts}
\usepackage{graphicx}
\newtheorem{define}{Definition}
%\newcommand{\Z}{{\bf Z}}
\usepackage{psfig}
\oddsidemargin=0.15in
\evensidemargin=0.15in
\topmargin=-.5in
\textheight=9in
\textwidth=6.25in
\begin{document}
\input{preamble.tex}
\lecture{22}{November 28, 2005}{Madhu Sudan}{Krzysztof Onak}
\noindent Today we present various algebraic models of computation, and discover
a few lower bounds.
\section{Algebraic models of computation}
\subsection{Considered problems}
Let $f:R^n \to R^m$ be a function that maps $n$ elements of a ring $R$
into $m$ elements of the same ring. Given $x_1, x_2, \ldots, x_n \in R$,
compute $f(x_1,x_2,\ldots,x_n)$.
Alternatively, for a function $f:R^n \times R^m \to R$, given
$x_1, x_2, \ldots, x_n \in R$, determine
$y_1, y_2, \ldots, y_n \in R$ such that
$f(x_1, x_2, \ldots, x_n,y_1,y_2,\ldots,y_m) = 0$.
\subsection{Uniform model of computation}
In the late 1980's Blum, Shub and Smale came up with a uniform model
model of computation. It was a ``Turing machine'' over a ring.
In this lecture we consider only non-uniform models of computation.
\subsection{Algebraic circuits (Straight line programs)}
An \emph{algebraic circuit} is an acyclic network of
gates with the following properties:
\begin{itemize}
\item the circuit has $n$ inputs accepting $x_1,x_2,\ldots,x_n\in R$
and an arbitrary number of constants $\alpha_i$ in $R$,
\item the circuit has $m$ outputs, $y_1,y_2,\ldots,y_m \in R$,
and computes a function $f$, i.e.\ $f(x_1,\ldots,x_n)=(y_1,\ldots,y_m)$,
\item each gate has two inputs and one output, and computes
either the sum or product of input values (if $R$ is a field,
we allow as well division).
\end{itemize}
The number of gates is a complexity measure of algebraic circuits.
\bigskip
\begin{center}
\includegraphics[scale=0.9]{ac.eps}
\end{center}
\bigskip
Note that every algebraic circuit is equivalent to a straight line
program in which every instruction
corresponds to a single gate and
has the form
``$v_i \leftarrow v_j \mathbin{\diamondsuit} v_k$'',
where $\diamondsuit$ is one of allowed operations.
\subsection{Algebraic decision trees}
An \emph{algebraic decision tree} is a decision tree of the
following properties:
\begin{itemize}
\item at each internal node we evaluate a polynomial
of input elements $x_1$, $x_2$, \ldots, $x_n$,
and branch, depending on whether the computed value
of the polynomial equals 0 or not,
\item each leaf contains a polynomial of the input elements
which is the required values which we want to compute.
\end{itemize}
\begin{center}
\includegraphics[scale=0.85]{adt.eps}
\end{center}
There are two complexity measures:
\begin{enumerate}
\item The depth of a tree.
\item The degree of polynomials at internal nodes.
\end{enumerate}
\subsection{Algebraic computation trees}
An \emph{algebraic computation tree} is a tree in which at each
internal node we perform a single instruction of the form
``$v_i \leftarrow v_j \mathbin{\diamondsuit} v_k$'',
where $\diamondsuit$ is one of basic operations allowed
over $R$, and branch, depending on whether the computed value
equals 0 or not.
\begin{center}
\includegraphics{act.eps}
\end{center}
\section{Ostrowski's conjecture}
\subsection{The problem: univariate polynomial evaluation}
Given $a_0,a_1,\ldots,a_n \in R$ and $x \in R$,
compute $$\sum_{i=0}^{n}a_i x^i.$$
\subsection{Horner's rule}
Horner's rule enables us to evaluate a polynomial by
$n$ additions and $n$ multiplications in the following way:
\begin{eqnarray}
\nonumber v_1 & \leftarrow & a_n \cdot x + a_{n-1}\\
\nonumber v_2 & \leftarrow & v_1 \cdot x + a_{n-2}\\
\nonumber & \ldots & \\
\nonumber v_i & \leftarrow & v_{i-1} \cdot x + a_{n-i}\\
\nonumber & \ldots & \\
\nonumber v_n & \leftarrow & v_{n-1} \cdot x + a_{0}
\end{eqnarray}
\subsection{The conjecture}
Ostrowski came up in 1954 with the conjecture that
Horner's rule is optimal, i.e.\ one needs $n$ additions
and $n$ multiplications (in the algebraic circuit model).
He managed to prove that $n$ additions are necessary,
and in 1966 Pan proved that so are $n$ multiplications.
\subsection{Ostrowski's lower bound}
To show that we need $n$ additions, we substitute $x=1$,
and the problem of evaluation of the polynomial
reduces to the problem of computing the sum of
coefficients.
\begin{claim}
To evaluate the sum of $a_0$ to $a_n$ over a ring
at least $n$ additions are necessary in the algebraic circuit model.
\end{claim}
\begin{proof}
The proof goes by induction on $n$. For $n=1$, all that we can compute,
not using additions, is $ca_0^{d_0}a_1^{d_1}$, where $c\in R$,
which definitely differs
from $a_0 + a_1$. For $n>1$, the first addition in any straight line
program looks like
$$c_1 \prod_{i=1}^{n}a_i^{d_i} + c_2 \prod_{i=1}^{n}a_i^{e_i},$$
and since it does not make sense to add constants as they can
be hardcoded, we can assume that one of $d_i$'s or $e_i$'s is
nonzero. Without loss of generality $d_n \ne 0$, for
$a_n=0$ the first addend disappears, and
by the induction assumption we still need to spend
$n-1$ additions to compute $a_0 + a_1 + \cdots + a_{n-1}$.
\end{proof}
\subsection{Pan's lower bound}
This time we substitute $a_0 = 0$. Note first that any
algebraic circuit computes some polynomial in
$R[a_1,a_2,\ldots,a_n,x]$. A multiplication $v_j \cdot v_k$
is \emph{insignificant} if one of the following holds:
\begin{enumerate}
\item Both $v_j$ and $v_k$ belong to $R[x]$.
\item One of $v_j$ and $v_k$ belongs to $R$.
\end{enumerate}
Certainly, a multiplication that is not insignificant
is \emph{significant}.
We will show that the number of significant multiplications
is large enough in some more general case.
\newcommand{\rank}{\mathop{\mbox{\rm rank}}\nolimits}
\begin{claim}
Let $f:R^{n+1} \to R$ be a function of the form
$$f(a_1,a_2,\ldots,a_{n},x) = \sum_{i=1}^{k} l_i(a_1,\ldots,a_n) x^i + r(x)
+ l_0(a_1,a_2,\ldots,a_n),$$
where each $l_i$ is a linear function, and $R$ is a field.
An algebraic circuit computing $f$ has at least $\rank\{l_1,l_2,\ldots,l_k\}$
significant multiplications.
\end{claim}
\begin{proof}
Look at the first significant multiplication. It has the following form:
$$\left(\sum_i c_i a_i + c_0(x)\right) \cdot
\left(\sum_i d_i a_i + d_0(x)\right).$$
Without loss of generality $c_1 \ne 0$, and we
restrict $(a_1,\ldots,a_n,x)$ so that the first term equals $c \in R$,
achieving
\begin{eqnarray}
\nonumber c&=&\sum_i c_i a_i + c_0(x),\\
\nonumber a_1&=&\frac{c - c_0(x) - \sum_{i=2}^{k}c_i a_i}{c_1}
= l(a_2,\ldots,a_n)+p(x)
\end{eqnarray}
for some linear function $l$ and polynomial $p$.
Now we have a circuit that using one fewer significant multiplication
computes
$$\sum_{i=1}^{k}l_i\left(l(a_2,\ldots,a_n)+p(x),a_2,\ldots,a_n\right)x^i + r(x)+l_0(l(a_2,\ldots,a_n)+p(x),a_2,\ldots,a_n)$$
$$=\sum_{i=1}^{k}l'_i(a_2,\ldots,a_n)x^i + r'(x) + l'_0(a_2,\ldots,a_n),$$
where $l'_i(a_2,\ldots,a_n) = l_i(l(a_2,\ldots,a_n),a_2,\ldots,a_n)$,
and by basic linear algebra
$$\rank\{l'_1,l'_2,\ldots,l'_n\} \ge \rank\{l_1,l_2,\ldots,l_n\} - 1.$$
This implies by induction on the number of $a_i$'s that we need at least
$\rank\{l_1,l_2,\ldots,l_n\}$ significant multiplications.
\end{proof}
\section{Fixed coefficients}
If coefficients of the polynomial are fixed, that is we compute
a function $f_{a_0 \ldots a_n}:R \to R$ such that
$$f_{a_0 \ldots a_n}(x) = \sum a_i x^i,$$
it turns out that we need at most $n/2 + 1$ multiplications,
and that for most choices of coefficients this number of
multiplications is necessary. The main idea is that
we can express $f$ as
$$f(x) = q_1(x)(x^2-b_1)+r_1(x),$$
there exists $b_1$ so that $r_1$ is of degree 0,
and both $b_1$ and $r_1$ can be hardwired into a circuit.
To show the lower bound we take $a_0$, $a_1$, \ldots, $a_n$ transcendent
over $\widetilde R$, and prove that if a program computes
$\sum a_i x^i$ with $k$ multiplications, then
$(a_1,\ldots,a_n)$ lie in a $2k$-dimensional extension of $\widetilde R$.
\section{Evaluation in $n$ points}
Given $a_0$, \ldots, $a_n$, $x_0$, \ldots, $x_n$ in a field $K$,
our goal is to compute $z_1$ to $z_n$ such that
$z_i = \sum a_j x_i^j$. Using fast Fourier transform, we
can achieve this in $O(n \log^{O(1)}n)$ time, and
Strassen has proven that we need $\Omega(n \log n)$ operations
in any algebraic computation tree. We will cover this topic in the
next lecture.
\end{document}