\documentclass[11pt]{article}
\usepackage{amsfonts,amsthm,amsmath,amssymb}
\usepackage{array}
\usepackage{epsfig}
\usepackage{fullpage}
\usepackage{color, soul}
\usepackage{comment}
\usepackage{enumitem}
\newcommand{\1}{\mathbbm{1}}
\DeclareMathOperator*{\argmin}{argmin}
\DeclareMathOperator*{\argmax}{argmax}
\newcommand{\x}{\times}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\Q}{\mathbb{Q}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\E}{\mathop{\mathbb{E}}}
\renewcommand{\bar}{\overline}
\renewcommand{\epsilon}{\varepsilon}
\newcommand{\eps}{\varepsilon}
\newcommand{\DTIME}{\textbf{DTIME}}
\renewcommand{\P}{\textbf{P}}
\newcommand{\SPACE}{\textbf{SPACE}}
\begin{document}
\input{preamble.tex}
\handout{CS 229r Information Theory in CS}{{\bf Due: Feb 8, 2019 at 8pm}}{Instructor:
Madhu Sudan}{TA: Mitali Bafna}{Problem Set 1}
\section*{Instructions}
\begin{description}
\item[Collaboration:]
Collaboration is allowed, but limit yourselves to
groups of size at most four.
\item[References:]
In general, try not to run to reference material to answer
questions. Try to think about the problem to see if you can
solve it without consulting any external sources. If this fails,
you may look up any reference material.
\item[Writeup:]
You must write the solutions in latex, by yourselves.
Cite all references and collaborators.
Explain why you needed to consult any of the references,
if you did consult any.
\end{description}
\section*{Problems}
\begin{enumerate}
\item {\bf Conditional Probabilities:}\\
Suppose that in a certain city, $3/4$ of the high-school students pass and $1/4$ fail. Of those who pass, $10$ percent own cars, while $50$ percent of the failing students own cars. All of the car-owning students belong to clubs, while $40$ percent of those who do not own cars but pass, as well as $40$ percent of those who do not own cars but fail, belong to clubs.
\begin{enumerate}
\item How much information is conveyed about a student's academic standing by their ownership of a car?
\item How much information is conveyed about a student's academic standing by their membership in a club?
\item If three successive bits convey a student's academic status, their car owning status and their membership in clubs, how much entropy is there in each bit conditioned on the previous bits.
\end{enumerate}
\item {\bf Shearer's Lemma:}\\
Let $S \subset \mathbb{R}^n$ be a set of points and $T_1,\ldots,T_m$ be subsets of $[n]$ such that, for every $i \in [m]$, the set $S$ projected to the coordinate positions picked by $T_i$ has at most $n_i$ distinct elements. Suppose each position $j \in [n]$ is included in at least $k$ of the sets $T_1, \ldots, T_m$. Show that
$$|S|^k \leq \prod_{i \in [m]} n_i.$$
\item {\bf Homomorphism Counting:}
Prove that every directed graph has more ``$\vee$'s'' than ``$C_3$'s'' (see definition below), in the sense formalized below.
For directed graphs $H$ and $G$, let $\phi: H \rightarrow G$ be a homomorphism if for every $(u,v)$ in $E(H)$ it is the case that $(\phi(u),\phi(v))$ is in $E(G)$. Note
that homomorphisms are not required to be injective, and our graphs do not have self-loops - so $(u,u)$ is not in $E(H)$. (An undirected graph $G$ is simply a directed graphs with $(u,v) \in E(G)$ iff $(v,u) \in E(G)$.)
\begin{enumerate}
\item {\bf Pre-Warmup:} What can you say about $H$ if there is a homomorphism from $H$ to $K_3$, the complete graph on 3 vertices?
\end{enumerate}
Let $\#(H,G)$ denote the number of homomorphisms from $H$ to $G$. (So roughly $\#(H,G)$ counts the number of copies of $H$ in $G$.) Let $\vee$ be
the graph on 3 vertices $\{A,B,C\}$ with edges $A \rightarrow B$ and $A \rightarrow C$. Let $C_3$ be the
graph on 3 vertices $\{D,E,F\}$ with edges $D \rightarrow E$, $E \rightarrow F$, and $F \rightarrow D$. Our
question above formalizes to the assertion: ``For all $G$, $\#(\vee,G) \geq \#(C_3,G).$''
\begin{enumerate}[resume]
\item {\bf Warmup:} Give an example of a graph $G$ for which $\#(\vee,G) = \#(C_3,G) = 3$.
\end{enumerate}
The rest of the parts refer to an arbitrary graph $G$.
\begin{enumerate}[resume]
\item Give a distribution on triples $(X,Y,Z)$ such that $(X,Y), (Y,Z)$ and
$(Z,X)$ are all edges of $G$ and $H(X,Y,Z) = \log \#(C_3,G)$.
\item
Give a distribution on triples $(X',Y',Z')$ such $(X',Y'), (X',Z')$ are both edges of $G$ and $H(X',Y',Z') \geq H(X,Y,Z)$.
\item
Conclude that $\#(\vee,G) \geq \#(C_3,G)$ for every $G$.
\end{enumerate}
\item {\bf Kraft's inequality:}
\begin{enumerate}
\item An encoding $E:[n]\to \{0,1\}^*$ is prefix-free, if for every $i \ne j$, $E(i)$ is not a prefix of $E(j)$. Given a sequence $\ell_1,\ldots,\ell_n \in \Z^{\geq 0}$ show that a prefix-free encoding $E:[n]\to\{0,1\}^*$ satisfying $E(i) \in \{0,1\}^{\ell_i}$ exists if and only if $\sum_{i\in [n]} 2^{-\ell_i} \leq 1$.
\item Let $A_1,...A_n \subseteq [m]$ with $\sum 1/2^{|A_i|} < 1$. Paul and Carole alternately select distinct vertices from $[m]$, Paul having the first move, until all vertices have been selected. Carole wins if she has selected all the vertices of some set $A_i$. Paul wins if Carole does not win. Give a winning strategy for Paul.
\item {\bf (optional) Open Question:} An encoding $E:[n]\to \{0,1\}^*$ is fix-free if it is simultaneously prefix-free and suffix-free. As above let $\ell_i$ denote the length of the encoding $E(i)$. Give best possible bounds $L$ and $U$ such that if $\sum_{i\in [n]} 2^{-\ell_i} \leq L$ then a fix-free encoding exists, and if a fix-free encoding exists $E$ then $\sum_{i\in [n]} 2^{-\ell_i} \leq U$.
\end{enumerate}
\item {\bf Fano's inequality and (weak) converse:}
The aim of this problem is to see that the conditional entropy of $Y$ is small given $X$ if and only if $Y$ is ``predictable'' given $X$.
\begin{enumerate}
\item Show that there is a deterministic function $g(X)$ such that the $\Pr[Y \ne g(X)] \leq H(Y|X)$. (Note that this is useful only when $H(Y|X) < 1$.)
\item Show that if there is a function $g(X)$ such that $\Pr[Y \ne g(X)] = e$ then $H(Y|X) \leq h(e) + e\cdot \log (|\mathcal{Y}| - 1)$ where $h(p) = -p\log p - (1-p)\log(1-p)$ is the binary entropy function, and $\mathcal{Y}$ is the domain of $Y$.
\end{enumerate}
\end{enumerate}
\end{document}