\documentclass[10pt]{article}
\usepackage{amsfonts,amsthm,amsmath,amssymb}
\usepackage{array}
\usepackage{epsfig}
\usepackage{fullpage}
\usepackage{amssymb}
\usepackage[colorlinks = false]{hyperref}
\newcommand{\1}{\mathbbm{1}}
\DeclareMathOperator*{\argmin}{argmin}
\DeclareMathOperator*{\argmax}{argmax}
\newcommand{\x}{\times}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\Q}{\mathbb{Q}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\E}{\mathop{\mathbb{E}}}
\renewcommand{\bar}{\overline}
\renewcommand{\epsilon}{\varepsilon}
\newcommand{\eps}{\varepsilon}
\newcommand{\DTIME}{\textbf{DTIME}}
\renewcommand{\P}{\textbf{P}}
\newcommand{\SPACE}{\textbf{SPACE}}
\begin{document}
\input{preamble.tex}
\newtheorem{example}[theorem]{Example}
\theoremstyle{definition}
\newtheorem{defn}[theorem]{Definition}
\handout{CS 229r Essential Coding Theory}{March 2, 2020}{Instructor:
Madhu Sudan}{Scribe: David Xiang}{Lecture 10}
\section{Today}
\begin{itemize}
\item Reed-Solomon Decoding + List Decoding
\item Abstracting Reed-Solomon
\item Decoding Concatenated Codes
\end{itemize}
\section{Reed-Solomon Decoding}
We recall from last lecture the Reed-Solomon Decoding problem:
\textbf{Input}: code parameters $n,k, \F_q$ and points $\{(\alpha_i, \beta_i)\}_{i=1}^n$, $\alpha_i, \beta_i \in \F_q$.
\textbf{Output}: Find a polynomial $M(x)$ of degree $n$ unknowns, dimension considerations will tell us that a solution will exist.
\item[(2)] As in regular RS decoding, we will need an additional assumption on the number of errors. Let us assume for now that the quantity $n-t$ (the number of agreements of $M$ with the $(\alpha_i, \beta_i)$) is larger than $2k\sqrt{n}$.
\begin{proof}
Let $S = \{(\alpha_i,\beta_i): M(\alpha_i) = \beta_i)\}$. $S$ is a subset of the zeros of polynomial $y-M(x) \triangleq P(x,y)$, it's also a subset of the zeros of $Q(x,y)$. By assumption $|S|\geq 2k\sqrt{n}$.
By Bezout's theorem, if $P$ and $Q$ do not have a common factor, then since $\deg P < k$ and $\deg Q \leq 2\sqrt{n}$ they can have no more than $2k\sqrt{n}$ common zeros, but this contradicts the assumption that we have a large number of agreements.
We conclude that $P$ and $Q$ must then share a common factor, but since $P$ is irreducible we conclude $P|Q$ as desired.
\end{proof}
\end{enumerate}
Since the $y$ degree of $Q$ is at most $\sqrt{n}$ we will not output more than $\sqrt{n}$ factors (in fact, by considering the $x$-degree we see we will not output more than $\sqrt{n}/k$ list elements). Putting this all together, we've shown that
\begin{theorem}
The Reed-Solomon Code is $(1-2\frac{k}{\sqrt{n}}, \sqrt{n})$ list-decodeable.
\end{theorem}
What have we shown? We've shown that we can list-decode from a very large fraction of errors, but in order to get this we need $k$ to be sublinear in $n$ (in particular, we needed to assume that the number of agreements is at least $2k\sqrt{n}$, so $k$ must be $O(\sqrt{n})$).
The fact that there exists asymmetry in the $x$-degree and $y$-degree (as we observed earlier, by considering the $x$-degree we actually show that the lists are not larger than $\sqrt{n}/k$) gives us room to exploit. More concretely, we should think of $Q(x,y)$ as a polynomial of $x$, into which we substitute $y = M(x)$ (from this perspective, a factor $y-M(x)$ corresponds to a root of $Q(x,y)$ viewed as a polynomial over $\F_q[x][y]$), and so in reality the $y$ degrees are much much impactful than the $x$ degrees.
So, let's repeat the above analysis, except now take $\deg_x Q \leq \sqrt{kn}$, and $\deg_y Q \leq \sqrt{\frac{n}{k}}$ (ignore integrality issues). We show the same claims (1) there exist a solution $Q$ and (2) $y-M(x)$ divides $Q$ under assumptions on $t$.
\begin{enumerate}
\item[(1)] The analysis here works out to be basically the same (again, we just compare number of unknowns to number of equations).
\item[(2)] We take $n-t \geq 2\sqrt{kn}$ now. Again, define $S = \{(\alpha_i,\beta_i): M(\alpha_i) = \beta_i)\}$, so that $|S| \geq 2\sqrt{kn}$. Define $g(x) = Q(x,M(x))$, we observe that $\deg g = 2\sqrt{kn}$ since $\deg_x M < k, \deg_yQ < \sqrt{n/k}$.
On the other hand, $g(\alpha_i)= 0$ if $(\alpha_i, \beta_i) \in S$ , this is by definition. By assumption on the size of $S$ then, it follows that $g(X)$ is identically zero, so $Q(x,M(x))$ is identically zero. This is just the statement that $y-M(x)$ divides $Q(x,y)$. (again, view $Q(x,y)$ as an element of $\F_q[x][y]$)
\end{enumerate}
Interpreting this in the language of list-decoding shows that
\begin{theorem}
The Reed-Solomon Code is $(1-2\sqrt{R}, \sqrt{1/R})$-list decodeable.
\end{theorem}
More careful fine-tuning lets one move $n-t \geq \sqrt{2kn}$. One can get rid of the factor of 2 altogether by complicated algebraic geometry, (so we have explicit algorithms achieving the bound of exercise 2) we will take a different route (in a later class).
\section{Abstracting Reed-Solomon}
If one carefully follows the analysis for Reed-Solomon decoding, one observes that essentially the main property we used is the fact that Reed-Solomon codes have large distance. The only algebraic fact we really exploited is that the product of a degree $a$ and degree $b$ polynomial has degree at most $a+b$. This lets us generalize our Reed-Solomon decoding algorithm to arbitrary codes by generalizing the notion of
This gives us an abstract decoding algorithm (to half the distance) for general codes.
Let $\mathcal{E}, \mathcal{W}$ be error-correcting codes (an "error locating pair" for the code $C$) such that
\begin{enumerate}
\item $\dim \mathcal{E} \geq t+1$
\item The distance of $\mathcal{W}$ is $\geq t$
\item $\mathcal{E}*\mathcal{C}\subseteq \mathcal{W}$
\end{enumerate}
What is the $*$ operation?
\begin{definition}
Given $a,b \in \F_q^n$, we define $(a*b) \in \F_q^n$ to be the vector whose $j$th coordinate equals $a_jb_j$ (i.e. $a*b$ is the coordinatewise product of $a$ and $b$).
\end{definition}
The motivation here is that if $a$ and $b$ are the evaluations of polynomials $f$ and $g$ on $n$ points in $\F_q$, then $a*b$ is the evaluation of $fg$ on the same $n$ points in $\F_q$.
\begin{definition}
Given two codes $\mathcal{E},\mathcal{C}$, we define $\mathcal{E}*\mathcal{C}$ to be the set $\{e*c, e \in \mathcal{E}, c \in\mathcal{C}\}$
\end{definition}
Note that $\mathcal{E}*\mathcal{C}$ is not generally a vector space.
This lets us define a generalization of our previous Reed-Solomon algorithm. Given $r$ in $\F_q^n$, we decode it to a codeword in $C$ by
\begin{enumerate}
\item[(1)] Find $a,b \in \mathcal{E},\mathcal{W}$ with $b = a*r$, $a$ not identically zero.
\item[(2)] Let $b'_i = \frac{b_i}{a_i}$ if $a_i \neq 0$, and $b'_i = ?$ if $a_i = 0$. Finally, erasure decode $b'$ for $C$.
\end{enumerate}
\begin{exercise}
Fill in the details/prove correctness of the above scheme.
\end{exercise}
\end{document}