\documentclass[11pt]{article}
\usepackage{latexsym}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{epsfig}
\usepackage{blkarray}
\newcommand{\handout}[5]{
\noindent
\begin{center}
\framebox{
\vbox{
\hbox to 5.78in { {\bf MATH888: High-dimensional probability and statistics } \hfill #2 }
\vspace{4mm}
\hbox to 5.78in { {\Large \hfill #5 \hfill} }
\vspace{2mm}
\hbox to 5.78in { {\em #3 \hfill #4} }
}
}
\end{center}
\vspace*{4mm}
}
\newcommand{\lecture}[4]{\handout{#1}{#2}{#3}{Scribe: #4}{Lecture #1}}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{observation}[theorem]{Observation}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{assumption}[theorem]{Assumption}
% 1-inch margins, from fullpage.sty by H.Partl, Version 2, Dec. 15, 1988.
\topmargin 0pt
\advance \topmargin by -\headheight
\advance \topmargin by -\headsep
\textheight 8.9in
\oddsidemargin 0pt
\evensidemargin \oddsidemargin
\marginparwidth 0.5in
\textwidth 6.5in
\parindent 0in
\parskip 1.5ex
%\renewcommand{\baselinestretch}{1.25}
\begin{document}
\lecture{39 --- December 8, 2021}{Fall 2021}{Sebastien Roch, UW-Madison}{Joowon Lee, Sebastien Roch}
\section{Overview}
Community recovery remains a popular research area in statistical network analysis, where we seek to find a latent community structure in a network. This technique has been applied to many real-world networks from various scientific domains such as social, biological, physical, and economic contexts. A statistical guarantee should necessarily assume some underlying random graph model. In this lecture, we introduce community recovery under the framework of Stochastic Block Model (SBM), which is a multi-community generalization of the well-known Erd\"{o}s-Reny\'{i} random graph. Specifically, we examined conditions for recovery by conveying relevant notions and investigated a desired estimator for recovery.
This lecture is based on~\cite{abbe_community_2018}.
\section{The Stochastic Block Model}
Let us consider a simple case with two (strictly) balanced communities.
\subsection{Definition}
We consider a random graph model on $n$ (even) nodes where there are two communities, say, $+1$ and $-1$, each consisting of $n/2$ nodes. For two nodes $(i, j)$, we randomly assign an edge between them with probability $q_{in}$ if they belong to the same community, and with probability $q_{out}$ otherwise. Then, the following $2\times 2$ matrix describes the edge density within and across the two communities:
\begin{align*}
W =
\begin{blockarray}{ccc}
& +1 & -1 \\
\begin{block}{c[cc]}
+1 & q_{in} & q_{out} \\
-1 & q_{out} & q_{in} \\
\end{block}
\end{blockarray}.
\end{align*}
Since nodes belonging to the same community should be more likely to share an edge (at least in some applications), we assume $q_{in} \ge q_{out}$. Let $\textup{SBM}(n,q_{in},q_{out})$ denote the resulting random graph model, which is a probability distribution on the set of all $n$-node simple graphs. Namely, the model defines an $n$-vertex random graph with vertices
split in two communities, where each vertex is assigned a community label in $\{1, -1\}$ independently under the community prior $(q_{in}, q_{out})$, and pairs of vertices with labels $i$ and $j$
connect independently with probability $W_{i, j}$ where $i, j \in \{ +1, -1\}$.
More specifically, we say that $(X, G) \sim$ SBM$(n, q_{in}, q_{out})$ if
\begin{enumerate}
\item {\em [Community assignment]} $X$ is uniformly random over
\begin{align*}
\Pi_2^{n} := \{\mathbf{x} \in \{+1,-1\}^{n}: \mathbf{x}^{T}\mathbf{1} = 0 \} \text{ where } \mathbf{1}= (1, \cdots, 1)^{T}
\end{align*}
which indicates the number of $+1$ and $-1$ cases is the same \textit{(balanced)}.
\item {\em [Graph]} $G$ has independent edges where $(i, j)$ is present with probability $W_{X_i, X_j}$ for $\forall i \neq j$ .
\end{enumerate}
\iffalse
i) [Community assignment] $X$ is uniformly random over
\begin{align*}
\Pi_2^{n} = \{\mathbf{x} \in \{+1,-1\}^{n}: \mathbf{x}^{T}\mathbf{1} = 0 \} \text{ where } \mathbf{1}= (1, \cdots, 1)^{T}
\end{align*}
which indicates there is the same number of $+1$ and $-1$ cases (balanced).
ii) [Graph] $G$ has independent edges where $(i, j)$ is present with probability $W_{X_i, X_j}$ for $\forall i \neq j$ .
\fi
\subsection{Recovery Requirement}
The goal of community detection is to estimate (recover) the labels $X$ by observing $G$. We define the notions of agreement.
\begin{definition}(Agreement)
The agreement between two community vectors $\mathbf{x}, \mathbf{y} \in \{+1, -1\}^{n}$ is obtained by maximizing the common components between $\mathbf{x}$ and any relabelling of $\mathbf{y}$, i.e.,
\begin{align*}
A(\mathbf{x}, \mathbf{y}) = \max_{s \in \{+1, -1\}} \frac{1}{n}\sum_{i=1}^{n}\mathbf{1}\{x_{i} = s (y_{i})\}
\end{align*}
\end{definition}
Now consider the following recovery requirements, which is going to be asymptotic, taking place with high probability as $n$ tends to infinity.
\begin{definition}
Let $(X, G) \sim$ SBM$(n, q_{in}, q_{out})$ and for $\hat{X} = \hat{X}(G) \in \Pi_2^{n}$, an estimate of $X$, we say that we achieve
\end{definition}
\begin{itemize}
\item \textbf{\em Exact recovery:} $\mathbb{P}(A(X, \hat{X}) = 1) = 1 - o(1)$
\item \textbf{\em Almost exact recovery:} $\mathbb{P}(A(X, \hat{X}) = 1 - o(1)) = 1 - o(1)$
\end{itemize}
Then when is it possible to achieve (almost) exact recovery? The following theorem provides the conditions of $q_{in}$ and $q_{out}$ for exact recovery.
\begin{theorem} Exact recovery in SBM$(n, \alpha \log(n)/n, \beta \log(n)/n)$ is achievable and efficiently so if and only if $\sqrt{\alpha} - \sqrt{\beta} > 2$ and not achievable if $\sqrt{\alpha} - \sqrt{\beta} < 2$.
\end{theorem}
%A few remarks regarding this result:
% O(logn /n) - one isolated vertex - which community is coming from
% connect regine - alpha, beta large enough - achieve exact
% if special relationship is satisfied
\subsection{MAP estimator}
A natural starting point is to resolve the estimation of $X$ from the noisy observation $G$ by taking the \textbf{Maximum A Posteriori (MAP)} estimator.
Let $\Omega(X)$ be the partition corresponding to $X$ and $\hat{\Omega}(G)$ be the partition corresponding to $\hat{X}(G)$. The probability of error (not recovering the true partition), $\mathbb{P}_e$, is given by
\begin{align*}
\mathbb{P}_e
:= \mathbb{P}(\Omega \neq \hat{\Omega}(G))
= \sum_{g} \mathbb{P}(\hat{\Omega}(g) \neq \Omega | G = g) \mathbb{P}(G=g),
\end{align*}
and an estimator $\hat{\Omega}^{MAP}(\cdot)$ minimizing the above must minimize $\mathbb{P}(\hat{\Omega}(g) \neq \Omega | G = g)$ for every $g$. To minimize $\mathbb{P}(\hat{\Omega}(g) \neq \Omega | G = g)$, we need to choose $\omega$ that maximizes the posterior probability
\begin{align*}
\mathbb{P}(\Omega = \omega|G=g)
&= \frac{\mathbb{P}(G=g|\Omega=\omega) \mathbb{P}(\Omega=\omega)}{\mathbb{P}(G=g)} \qquad (\because \text{Bayes rule)} \\
& \propto \mathbb{P}(G=g|\Omega=\omega) \mathbb{P}(\Omega=\omega) \\
& \propto \mathbb{P}(G=g|\Omega=\omega) \qquad \left(\because \mathbb{P}(G=g|\Omega=\omega)=\frac{1}{\# \text{ of partitions}}\right)
\end{align*}
Then MAP is thus equivalent to the Maximum Likelihood estimator: maximize $\mathbb{P}(G=g|\Omega=\omega)$ over equal size partitions $\omega$.
For fixed $g$, let $N := N(g)$ be the number of edges in $g$. For any $\omega$, denote $N_{in} := N_{in}(g, \omega)$ and $N_{out} := N_{out}(g, \omega)$ by the number of edges within and across communities, respectively, and note that $N_{in} = N - N_{out}$. Then
\begin{align*}
\mathbb{P}(G=g|\Omega=\omega)
&= (q_{out})^{N_{out}} (1-q_{out})^{(\frac{n}{2})^{2} - N_{out}} (q_{in})^{N-N_{out}} (1-q_{in})^{\{ \binom{n}{2} - (\frac{n}{2})^{2} \} - \{N-N_{out}\}} \\
&\propto \left[ \frac{q_{out}}{1-q_{out}} \times \frac{1-q_{in}}{q_{in}} \right]^{N_{out}}
\end{align*}
Since we assume $q_{in} \ge q_{out}$, we have $\left[ \frac{q_{out}}{1-q_{out}} \times \frac{1-q_{in}}{q_{in}} \right] \le 1$. Therefore, to maximize $\mathbb{P}(G=g|\Omega=\omega)$, we need to choose $\omega$ that minimizes $N_{out}$. In this sense, MAP is equivalent to solving the \textit{min-bisection problem}.
Alternatively, the same problem can be written as follows:
\begin{align*}
\max_{\mathbf{x} \in \{+1, -1\}^{n}, \mathbf{x}^{T}\mathbf{1}=0} \mathbf{x}^{T}A\mathbf{x}
\end{align*}
where $A$ is $n \times n$ adjacency matrix. Due to the constraint of $\mathbf{x}^{T}\mathbf{1}=0$, it is reasonable to take the second largest eigenvector of $A$ for an appropriate relaxation of MAP. This will be discussed this in more detail next time.
%\bibliography{roch-hdps-scribe39}
\bibliographystyle{alpha}
\begin{thebibliography}{1}
\bibitem[Abb18]{abbe_community_2018}
Emmanuel Abbe.
\newblock Community {Detection} and {Stochastic} {Block} {Models}.
\newblock {\em Foundations and TrendsĀ® in Communications and Information
Theory}, 14(1-2):1--162, June 2018.
\newblock Publisher: Now Publishers, Inc.
%
% \bibitem{abbe2016community}
% Emmanuel Abbe.
% \newblock Community Detection and Stochastic Block Models
% \newblock {\em Manuscript}, 2016.
%
% \bibitem{abbe2017community}
% Emmanuel Abbe.
% \newblock Community detection and stochastic block models: recent developments.
% \newblock {\em The Journal of Machine Learning Research}, 18(1):6446--6531,
% 2017.
%
\end{thebibliography}
\end{document}