diff --git a/poster/figures.jl b/poster/figures.jl
index 0ff5fa2..695bbf3 100644
--- a/poster/figures.jl
+++ b/poster/figures.jl
@@ -121,7 +121,7 @@ function pvalues_comparison()
         grid = "major",
         title_style = { align = "center" },
         width = raw"0.325\linewidth",
-        height = raw"0.26\linewidth",
+        height = raw"0.25\linewidth",
         "every x tick scale label/.style" = {at = "{(1,0)}", anchor = "west"},
         ylabel_style = { font = raw"\small" },
         xmin = 0, xmax = 1, ymin=-0.1, ymax=1.1
diff --git a/poster/figures/pvalues_comparison.tex b/poster/figures/pvalues_comparison.tex
index b1a0267..fac66c5 100644
--- a/poster/figures/pvalues_comparison.tex
+++ b/poster/figures/pvalues_comparison.tex
@@ -1,5 +1,5 @@
 \begin{tikzpicture}
-\begin{groupplot}[group style={group size={3 by 6}, xlabels at={edge bottom}, ylabels at={edge left}, horizontal sep={0.07\linewidth}, vertical sep={0.03\linewidth}, xticklabels at={edge bottom}, yticklabels at={edge left}}, no markers, tick label style={font={\tiny}}, grid={major}, title style={align={center}}, width={0.325\linewidth}, height={0.26\linewidth}, every x tick scale label/.style={at={{(1,0)}}, anchor={west}}, ylabel style={font={\small}}, xmin={0}, xmax={1}, ymin={-0.1}, ymax={1.1}]
+\begin{groupplot}[group style={group size={3 by 6}, xlabels at={edge bottom}, ylabels at={edge left}, horizontal sep={0.07\linewidth}, vertical sep={0.03\linewidth}, xticklabels at={edge bottom}, yticklabels at={edge left}}, no markers, tick label style={font={\tiny}}, grid={major}, title style={align={center}}, width={0.325\linewidth}, height={0.25\linewidth}, every x tick scale label/.style={at={{(1,0)}}, anchor={west}}, ylabel style={font={\small}}, xmin={0}, xmax={1}, ymin={-0.1}, ymax={1.1}]
     \nextgroupplot[title={$\symbf{M1}$}, ylabel={$\symbf{C}$}]
     \addplot+[thick]
         table[row sep={\\}]
diff --git a/poster/neurips.pdf b/poster/neurips.pdf
index 23b5273..d4603a3 100644
Binary files a/poster/neurips.pdf and b/poster/neurips.pdf differ
diff --git a/poster/neurips.tex b/poster/neurips.tex
index 5a79196..8e2a5e7 100644
--- a/poster/neurips.tex
+++ b/poster/neurips.tex
@@ -158,7 +158,7 @@
 % metadata
 \title{Calibration tests in multi-class classification:\\ A unifying framework}
 \author{David Widmann$^\star$ Fredrik Lindsten$^\ddagger$ Dave Zachariah$^\star$}
-\date{\today}
+\date{}
 \makeatletter
 \pgfkeys{%
   /my poster/.cd,
@@ -484,6 +484,13 @@
       \end{center}
     \end{minipage}
 
+    \vspace{\baselineskip}
+
+    We see that the standard estimator of the $\ECE$ exhibits both
+    negative and positive bias, whereas, theoretically guaranteed,
+    $\biasedestimator$ is biased upwards and $\unbiasedestimator$
+    and $\linearestimator$ are unbiased.
+
     \tcbsubtitle{Empirical test errors}
 
     \begin{minipage}[t]{0.4\linewidth}
@@ -494,20 +501,19 @@
       $\symbf{A}_{\mathup{l}}$ of the p-value based on the
       $\squaredkernelmeasure$. We compare them with a previously
       proposed hypothesis test for the standard $\ECE$ estimator
-      ($\symbf{C}$).
+      ($\symbf{C}$). We show the empirical test errors computed
+      from the p-value approximations for different significance
+      levels.
 
       \vspace{\baselineskip}
 
-      For a chosen significance level $\alpha$ we compute from the
-      p-value approximations $p_1,\ldots,p_{10^4}$ the empirical test
-      error
-      \begin{equation*}
-        \frac{1}{10^4} \sum_{i=1}^{10^4} \mathbb{1}_{[0, \alpha]}(p_i) \quad \text{ (for } \symbf{M1} \text{)}
-      \end{equation*}
-      and
-      \begin{equation*}
-        \frac{1}{10^4} \sum_{i=1}^{10^4} \mathbb{1}_{(\alpha, 1]}(p_i) \quad \text{ (for } \symbf{M2} \text{ and } \symbf{M3} \text{)}.
-      \end{equation*}
+      We see that consistency resampling can lead to unreliable
+      calibration tests. Bounds $\symbf{D}_{\mathup{b}}$,
+      $\symbf{D}_{\mathup{uq}}$, and $\symbf{D}_{\mathup{ul}}$ yield
+      reliable but usually not powerful tests, whereas
+      based on approximations $\symbf{A}_{\mathup{uq}}$ and
+      $\symbf{A}_{\mathup{l}}$ we obtain reliable and powerful
+      calibration tests in our experiments.
     \end{minipage}%
     \begin{minipage}[t]{0.6\linewidth}
       \vspace*{0pt}