import numpy as np
from scipy.stats import beta as Beta, binom, norm, rv_continuous
from scipy.special import gamma
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm
np.random.seed(42)
Verify 11.7:
Let $X_1, \dots, X_n \sim N(\theta, \sigma^2)$. For simplicity, let us assume that $\sigma$ is known. Let us take as a prior $\theta \sim N(a, b^2)$. Show the posterior for $\theta$ is
$$\theta \mid X^n \sim N(\bar{\theta}, \tau^2)$$where
$$\bar{\theta} = w\bar{X} + (1-w)a,$$$$w = \frac{\frac{1}{\text{se}^2}}{\frac{1}{\text{se}^2} + \frac{1}{b^2}}, \frac{1}{\tau^2} = \frac{1}{\text{se}^2} + \frac{1}{b^2},$$and $\text{se} = \sigma / \sqrt{n}$ is the standard error of the MLE $\bar{X}$.
Solution:
Note that it suffices to show the posterior pdf is proportional to $\exp \left(-\frac{1}{2} \left(\frac{\theta - \bar{\theta}}{\tau}\right) ^ 2 \right)$. Ignoring constant factors will be a common theme in this chapter. Letting $\mathcal{L}(\theta \mid X^n)$ and $f(\theta)$ denote the likelihood and prior, respectively, we have
\begin{align*} f(\theta \mid X^n) &\propto \mathcal{L}(\theta \mid X^n) f(\theta) \\ &\propto \exp \left( -\frac{1}{2\sigma^2} \sum_i (X_i - \theta)^2 \right) \cdot \exp \left( -\frac{1}{b^2}(\theta - a)^2 \right) \\ &= \exp \left( -\frac{1}{2} \left[ \frac{\sum_i (X_i - \theta)^2}{\sigma^2} + \frac{(\theta - a)^2}{b^2} \right] \right) \end{align*}Now note that it suffices to show the contents of the square brackets are equal to $\left(\frac{\theta - \bar{\theta}}{\tau}\right) ^ 2 + C$ for some $C$ that doesn't depend on $\theta$. Note that $\sum_i (X_i - \theta)^2 = n(\bar{X} - \theta)^2 + nS^2$, where $S^2 = n^{-1}\sum_i (X_i - \bar{X})^2$ (shown by adding $(-\bar{X} + \bar{X})$ and expanding the square). Also note that $\frac{1}{\text{se}^2} = \frac{w}{\tau^2}$, and $\frac{1}{b^2} = \frac{1-w}{\tau^2}$.
\begin{align*} \frac{\sum_i (X_i - \theta)^2}{\sigma^2} + \frac{(\theta - a)^2}{b^2} &= \frac{n(\theta - \bar{X})^2 + nS^2}{\sigma^2} + \frac{(\theta - a)^2}{b^2} \\ &= \frac{(\theta - \bar{X})^2}{\text{se}^2} + \frac{(\theta - a)^2}{b^2} + \frac{S^2}{\text{se}^2} \\ &= \frac{\theta^2 - 2\bar{X}\theta + \bar{X}^2}{\text{se}^2} + \frac{\theta^2 - 2a\theta + a^2}{b^2} + C' \tag{$C' = \frac{S^2}{\text{se}^2}$} \\ &= \left(\frac{1}{\text{se}^2} + \frac{1}{b^2} \right)\theta^2 - 2\left(\frac{\bar{X}}{\text{se}^2} + \frac{a}{b^2}\right)\theta + \left(\frac{\bar{X}^2}{\text{se}^2} + \frac{a^2}{b^2} \right) + C'\\ &= \frac{1}{\tau^2} \theta^2 - 2 \left(\frac{w}{\tau^2}\bar{X} + \frac{(1-w)}{\tau^2} a\right)\theta + \left(\frac{w}{\tau^2}\bar{X}^2 + \frac{1-w}{\tau^2} a^2 \right) +C' \\ &= \frac{1}{\tau^2} \left[\theta^2 - 2 \hat{\theta}\theta + w\bar{X}^2 + (1-w)a^2 + 2w(1-w)\bar{X}a - 2w(1-w)\bar{X}a\right] +C' \\ &= \frac{1}{\tau^2} \left[\theta^2 - 2 \hat{\theta}\theta + (w\bar{X} + (1-w)a)^2 - 2w(1-w)\bar{X}a\right] +C'\\ &= \frac{(\theta - \hat{\theta})^2}{\tau^2} - \frac{2w(1-w)\bar{X}a}{\tau^2} + C' \\ &= \frac{(\theta - \hat{\theta})^2}{\tau^2} + C \tag{$C = - \frac{2w(1-w)\bar{X}a}{\tau^2} + C'$} \end{align*}Let $X_1, \dots, X_n \sim \text{Normal}(\mu, 1)$.
(a) Simulate a data set (using $\mu = 5$) consisting of $n=100$ observations.
mu = 5
n = 100
X = norm.rvs(loc=mu, scale=1, size=n)
(b) Take $f(\mu) = 1$ and find the posterior density. Plot the density.
The posterior density is given by: \begin{align*} f(\mu \mid X^n) &\propto \mathcal{L}_n(\mu \mid X^n) f(\mu) \\ &= (2 \pi)^{-n/2} \exp \left( (-1/2) \sum_i (X_i - \mu)^2 \right) \\ &= (2 \pi)^{-n/2} \exp \left( (-n/2)[(\bar{X} - \mu)^2 + S^2 ]\right) \\ &\propto \exp \left[-\frac12 \left(\frac{\bar{X} - \mu}{1 / \sqrt{n}} \right)^2 \right] \\ &\propto \sqrt{\frac{n}{2\pi}} \exp \left[-\frac12 \left(\frac{\bar{X} - \mu}{1 / \sqrt{n}} \right)^2 \right] \tag{Normalized} \end{align*}
Thus, $\mu \mid X^n \sim N(\bar{X},1/ n)$.
X_bar = np.mean(X)
sigma = np.sqrt(1/n)
XX = np.arange(4, 6, .01)
plt.plot(XX, norm.pdf(XX, loc=X_bar, scale=sigma))
plt.title("Posterior Density for $\mu$")
plt.grid()
plt.show()
(c) Simulate 1000 draws from the posterior. Plot a histogram of the simulated values and compare the histogram to the answers in (b).
k = 1000
mu_post = norm.rvs(loc=X_bar, scale=sigma, size=k)
plt.plot(XX, norm.pdf(XX, loc=X_bar, scale=np.sqrt(1/n)), label='Posterior Density')
plt.hist(mu_post, bins=30, histtype='step', density=True, label='(Normalized) Histogram of Draws')
plt.grid()
plt.legend(loc='upper right')
plt.show()
(d) Let $\theta = e^\mu$. Find the posterior density for $\theta$ analytically and by simulation.
The posterior cdf is given by: \begin{align*} H(\theta | X^n) &= P(e^\mu \le \theta | X^n) \\ &= \int_{\{\mu : e^\mu \le \theta\}} f(\mu \mid X^n) \, d\mu \\ &= \int_{-\infty}^{\log(\theta)} f(\mu \mid X^n) \, d\mu \\ &= \Phi \left( \frac{\log(\theta) - \bar{X}}{\sqrt{1/n}}\right) \end{align*} Meaning the posterior pdf is:
\begin{align*} H'(\theta | X^n) = \phi \left( \frac{\log(\theta) - \bar{X}}{\sqrt{1/n}}\right)\frac{1}{\theta \sqrt{1/n}} \end{align*}TT = np.arange(100, 200, .1)
theta_post = norm.pdf((np.log(TT) - X_bar) / np.sqrt(1/n)) * (1 / (TT * np.sqrt(1/n)))
plt.plot(TT, theta_post)
theta_post = np.exp(mu_post)
plt.hist(theta_post, bins=30, histtype='step', density=True, label='(Normalized) Histogram of Draws')
plt.grid()
plt.legend()
plt.show()
(e) Find a 95% posterior interval for $\mu$.
As $\mu | X^n \sim N(\bar{X}, 1/n)$, a 95% CI is given by $(\bar{X} - n^{-1/2}z_{0.025},\bar{X} + n^{-1/2}z_{0.025})$.
alpha = 0.05
z = norm.ppf(1 - alpha/2)
print(f"{100 *(1-alpha):.0f}% C.I. for mu: ({X_bar - (1/np.sqrt(n)) * z:.3f}, {X_bar + (1/np.sqrt(n)) * z:.3f})")
95% C.I. for mu: (4.700, 5.092)
(f) Find a 95% posterior interval for $\theta$.
We can transform the interval for $\mu$, yielding $(\exp(\bar{X} - n^{-1/2}z_{0.025}),\exp(\bar{X} + n^{-1/2}z_{0.025}))$:
print(f"{100 *(1-alpha):.0f}% C.I. for theta: " \
f"({np.exp(X_bar - (1/np.sqrt(n)) * z):.3f}," \
f" {np.exp(X_bar + (1/np.sqrt(n)) * z):.3f})")
95% C.I. for theta: (109.964, 162.739)
or we can get it from our simulation:
print(f"{100 *(1-alpha):.0f}% C.I. for theta: " \
f"({np.quantile(theta_post, 0.025):.3f}," \
f" {np.quantile(theta_post, 0.975):.3f})")
95% C.I. for theta: (111.968, 164.394)
Let $X_1, \dots, X_n \sim \text{Uniform}(0, \theta)$. Let $f(\theta) \propto 1/\theta$. Find the posterior density.
Solution:
Let $X_{\max} = \max\{X_1, \dots, X_n\}$. Recalling $$\mathcal{L}_n(\theta \mid X^n) = \begin{cases} 0 & \theta \le X_{\max} \\ \frac{1}{\theta^n} & X_{\max} < \theta, \end{cases} $$
we have
\begin{align*} f(\theta \mid X^n) &\propto \mathcal{L}_n(\theta \mid X^n) f(\theta) \\ &= \begin{cases} 0 & \theta \le X_{\max}\\ \frac{1}{\theta^{n+1}} & X_{\max} < \theta \end{cases} \\ &= \mathbb{1}_{X_{\max} < \theta} \cdot \theta^{-(n+1)}. \end{align*}Normalizing, we get
$$f(\theta \mid X^n) = \mathbb{1}_{X_{\max} < \theta} \cdot \frac{\theta^{-(n+1)}}{n (X_{\max})^n} $$Suppose that 50 people are given a placebo and 50 are given a new treatment. 30 placebo patients show improvement while 40 treated patients show improvement. Let $\tau = p_2 − p_1$ where $p_2$ is the probability of improving under treatment and $p_1$ is the probability of improving under placebo.
(a) Find the MLE of $\tau$. Find the standard error and 90 percent confidence interval using the delta method.
Let $\mathbf{p} = [p_1, p_2]^T$, and let $g(\mathbf{p}) = p_2 - p_1$. Note that the likelihood of $\mathbf{p}$ can be expressed as the product of two independent likelihoods:
\begin{align*} \mathcal{L}_n(p_1,p_2) &= {50 \choose 30}p_1^{30}(1-p_1)^{20} \cdot {50 \choose 40}p_2^{40}(1-p_2)^{10} \\ &= \mathcal{L}_n(p_1) \cdot \mathcal{L}_n(p_2) \end{align*}Hence $\max \mathcal{L}_n(p_1,p_2) = \max \mathcal{L}_n(p_1) \cdot \max \mathcal{L}_n(p_2)$.
Maximizing the individual likelihoods:
$\mathcal{l}(p_1) = 30\log(p_1) + 20\log(1-p_1)$
$\mathcal{l}'(p_1) = \frac{30}{p_1} - \frac{20}{1-p_1} \Rightarrow 30(1-\hat{p}_1) = 20\hat{p}_1 \Rightarrow \hat{p}_1 = \frac{3}{5}$
Similarly, $\hat{p}_2 = \frac{4}{5}$, and so $\hat{\mathbf{p}} = [\hat{p}_1, \hat{p}_2]^T$. By the invariance property of the MLE, $\hat{\tau} = g(\hat{\mathbf{p}}) = \hat{p}_2 - \hat{p}_1 = \frac{1}{5}$.
We now seek the standard error. We have the gradient of $g$:
$\nabla g = \begin{bmatrix} -1 \\ 1 \end{bmatrix}$
the Fisher Information matrix (the negated expected value of the Hessian of the log likelihood):
$I_n = \begin{bmatrix} \frac{50}{p_1(1-p_1)} & 0 \\ 0 & \frac{50}{p_2(1-p_2)} \end{bmatrix}$
and its inverse:
$J_n = I_n^{-1} = \begin{bmatrix} \frac{p_1(1-p_1)}{50} & 0 \\ 0 & \frac{p_2(1-p_2)}{50} \end{bmatrix}.$
By Theorem 9.28, we have
$\hat{\text{se}}(\hat{\tau}) = \sqrt{(\hat{\nabla}g)^T \hat{J}_n \hat{\nabla}g} = \sqrt{\frac{\hat{p}_1(1-\hat{p}_1)}{50} + \frac{\hat{p}_2(1-\hat{p}_2)}{50}} = \frac{\sqrt{5}}{25} \approx 0.0894$
where $\hat{\nabla}g = \nabla g(\hat{\mathbf{p}})$ ($=\nabla g$, in our case), and $\hat{J}_n = J_n(\hat{\mathbf{p}})$.
Thus, a 90 percent confidence interval is given by:
$(\frac{1}{5} - z_{.05}\frac{\sqrt{5}}{25}, \frac{1}{5} + z_{0.05}\frac{\sqrt{5}}{25}) \approx (0.0529, 0.3471)$
(b) Find the standard error and 90 percent confidence interval using the parametric bootstrap.
B = 100000
p_1_hat = 3 / 5
p_2_hat = 4 / 5
tau_hat = p_2_hat - p_1_hat
tau_boot = np.empty(B)
for i in range(B):
p_1_boot = binom.rvs(n=50, p=p_1_hat) / 50
p_2_boot = binom.rvs(n=50, p=p_2_hat) / 50
tau_boot[i] = p_2_boot - p_1_boot
se_boot = np.std(tau_boot)
z = norm.ppf(1-0.05)
print(f"Standard error: {se_boot:.3f}")
print(f"90% C.I.: ({tau_hat - z * se_boot:.3f},{tau_hat + z * se_boot:.3f})")
Standard error: 0.089 90% C.I.: (0.053,0.347)
(c) Use the prior $f(p_1, p_2) = 1$. Use simulation to find the posterior mean and posterior 90 percent interval for $\tau$.
We have the posterior:
\begin{align*} p(\mathbf{p} \mid X) &\propto \mathcal{L}(\mathbf{p})f(\mathbf{p}) \\ &= p_1^{30}(1-p_1)^{20}p_2^{40}(1-p_2)^{10} \\ &\propto \frac{p_1^{30}(1-p_1)^{20}p_2^{40}(1-p_2)^{10}}{\int \int p_1^{30}(1-p_1)^{20}p_2^{40}(1-p_2)^{10} \,dp_1 \, dp_2} \\ &\propto p_1^{(30 + 1) - 1} (1-p_1)^{(20 + 1)-1}p_2^{(40 + 1) - 1} (1-p_2)^{(10 + 1)-1} \end{align*}Thus we can sample $p_1$ from $\text{Beta}(31, 21)$ and $p_2$ from $\text{Beta}(41, 11)$.
k = 100000
p_1_samples = Beta.rvs(a=31, b=21, size=k)
p_2_samples = Beta.rvs(a=41, b=11, size=k)
tau_samples = p_2_samples - p_1_samples
print(f"Simulated posterior mean: {np.mean(tau_samples):.3f}")
print(f"90% C.I.: " \
f"({np.quantile(tau_samples, 0.05):.3f}," \
f" {np.quantile(tau_samples, 0.95):.3f})")
Simulated posterior mean: 0.193 90% C.I.: (0.047, 0.336)
(d) Let $$\psi = \log \left( \left(\frac{p_1}{1-p_1}\right) \div \left(\frac{p_2}{1-p_2}\right) \right)$$
be the log-odds ratio. Note that $\psi = 0$ if $p_1 = p_2$. Find the MLE of $\psi$. Use the delta method to find a 90 percent confidence interval for $\psi$.
By the equivariance of the MLE, the MLE of $\psi$ is simply
$$\hat{\psi} = \log \left( \left(\frac{\hat{p}_1}{1-\hat{p}_1}\right) \div \left(\frac{\hat{p}_2}{1-\hat{p}_2}\right) \right) \approx -0.981$$We now compute the standard error using the delta method: $$\hat{\text{se}}(\hat{\tau}) = \sqrt{(\hat{\nabla}g)^T \hat{J}_n \hat{\nabla}g}$$ where $\hat{J}_n$ is as in part (a) and now $$g(p_1, p_2) = \log \left( \left(\frac{p_1}{1-p_1}\right) \div \left(\frac{p_2}{1-p_2}\right) \right),$$ meaning $$\nabla g = \begin{bmatrix} \frac{1}{p_1(1-p_1)} \\ \frac{1}{p_2(1-p_2)}\end{bmatrix}.$$
Thus, \begin{align*} \hat{\text{se}}(\hat{\tau}) &= \sqrt{\frac{1}{50\hat{p}_1(1-\hat{p}_1)} + \frac{1}{50\hat{p}_2(1-\hat{p}_2)}} \\ &= \frac{\sqrt{30}}{12} \approx 0.456 \end{align*}
psi_hat = np.log((p_1_hat / (1 - p_1_hat)) / (p_2_hat / (1 - p_2_hat)))
z = norm.ppf(1-0.05)
print(f"90% C.I.: ({psi_hat - z * np.sqrt(30)/12:.3f}, {psi_hat + z * np.sqrt(30)/12:.3f})")
90% C.I.: (-1.732, -0.230)
(e) Use simulation to find the posterior mean and posterior 90 percent interval for $\psi$.
psi_samples = np.log((p_1_samples / (1 - p_1_samples)) / (p_2_samples / (1 - p_2_samples)))
print(f"Posterior mean: {np.mean(psi_samples):.3f}")
print(f"90% C.I.: ({np.quantile(psi_samples, 0.05):.3f}, "
f"{np.quantile(psi_samples, 0.95):.3f})")
Posterior mean: -0.954 90% C.I.: (-1.705, -0.226)
Consider the $\text{Bernoulli}(p)$ observations $$ 0\,1\,0\,1\,0\,0\,0\,0\,0\,0 $$ Plot the posterior for $p$ using these priors: $\text{Beta}(1/2,1/2)$, $\text{Beta}(1,1)$, $\text{Beta}(10,10)$, $\text{Beta}(100,100)$.
Solution:
Recall a random variable has a $\text{Beta}$ distribution with parameters $\alpha$ and $\beta$ if its density is $$ f(p; \alpha, \beta) = \frac{\Gamma(\alpha + \beta)}{\Gamma(\alpha)\Gamma(\beta)} p^{\alpha - 1}(1-p)^{\beta - 1}$$
Since
$\mathcal{L}_n(p \mid X) = \prod p^X_i (1-p)^{1-X_i} = p^2(1-p)^8$,
\begin{align*} f(p \mid X) &\propto \mathcal{L}_n(p \mid X) f(p) \\ &\propto p^{\alpha + 1}(1-p)^{\beta + 7} \\ &= p^{(\alpha + 2) - 1}(1-p)^{(\beta + 8)-1} \\ &\propto \frac{\Gamma(\alpha + \beta + 10)}{\Gamma(\alpha + 2)\Gamma(\beta + 8)} p^{(\alpha + 2) - 1}(1-p)^{(\beta + 8)-1}. \\ \end{align*}That is, $p \mid X \sim \text{Beta}(\alpha + 2, \beta + 8)$.
p_hat = 2 / 10
alphas = [.5, 1, 10, 100]
betas = [.5, 1, 10, 100]
X = np.arange(0, 1, 0.01)
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
for alpha, beta in zip(alphas, betas):
ax1.plot(X, Beta.pdf(X, alpha, beta))
ax2.plot(X, Beta.pdf(X, alpha + 2, beta + 8), label=f"a={alpha}, b={beta}")
ax1.set_title("Prior Distributions")
ax2.set_title("Posterior Distributions")
ax2.vlines(p_hat, 0, 12, linestyle='--', color='black', label='MLE')
ax1.grid(True)
ax2.grid(True)
ax2.legend()
plt.show()
Let $X_1, \dots, X_n \sim \text{Poisson}(\lambda)$.
(a) Let $\lambda \sim \text{Gamma}(\alpha, \beta)$ be the prior. Show that the posterior is also a Gamma. Find the posterior mean.
$\mathcal{L}_n(\lambda) = \prod_i e^{-\lambda}\frac{\lambda^{X_i}}{X_i!} = \frac{1}{\prod_i X_i!} e^{-n\lambda}\lambda^{(\sum_i X_i)}$
\begin{align*} f(\lambda \mid X) &\propto \mathcal{L}_n(\lambda)f(\lambda) \\ &=\frac{1}{\prod_i X_i!} e^{-n\lambda}\lambda^{(\sum_i X_i)} \frac{1}{\beta^{\alpha} \Gamma(\alpha)} \lambda^{\alpha - 1} e^{-\lambda / \beta} \\ &= \frac{1}{\beta^{\alpha} \Gamma(\alpha) \prod_i X_i!} \lambda^{(\alpha + \sum_i X_i) - 1} e^{-\lambda (n + \frac{1}{\beta})} \\ &\propto \lambda^{\alpha' - 1} e^{-\lambda/\beta'} \end{align*}where $\alpha' = \alpha + \sum_i X_i$ and $\beta' = \frac{\beta}{n\beta + 1}$. Thus, $\lambda \mid X \sim \text{Gamma}(\alpha + \sum_i X_i, \frac{\beta}{n\beta + 1})$. The posterior mean is thus the mean of this distribution:
$$E(\lambda \mid X) = \frac{\beta(\alpha + \sum_i X_i)}{n\beta + 1}$$(b) Find the Jeffrey's prior. Find the posterior.
Computing the Jeffrey's prior: \begin{align*} f(x;\lambda) &= e^{-\lambda} \frac{\lambda^x}{x!} \\ \Rightarrow \log f(x;\lambda) &= -\lambda + x\log(\lambda) - \log(x!) \\ \Rightarrow s(x, \lambda) = \frac{\partial}{\partial \lambda} \log f(x;\lambda) &= -1 + \frac{x}{\lambda} \\ \Rightarrow \frac{\partial s(x, \lambda)}{\partial \lambda} &= -\frac{x}{\lambda^2} \\ \Rightarrow I(\lambda) = -\mathbb{E}_{\lambda}\left(\frac{\partial s(x, \lambda)}{\partial \lambda}\right) &= \frac{1}{\lambda} \\ \Rightarrow f(\lambda) \propto \sqrt{I(\lambda)} &= \sqrt{1 / \lambda} \\ \end{align*}
Thus, the Jeffrey's prior is $\sqrt{1 / \lambda}$, an improper prior.
Computing the posterior:
\begin{align*} f(\lambda \mid X) &\propto \mathcal{L}_n(\lambda)f(\lambda) \\ &=\frac{1}{\prod_i X_i!} e^{-n\lambda}\lambda^{(\sum_i X_i)} \lambda^{-1/2} \\ &=\frac{1}{\prod_i X_i!} e^{-n\lambda}\lambda^{(\sum_i X_i) - 1/2} \\ &\propto \lambda^{\alpha' - 1} e^{-\lambda/\beta'} \end{align*}where $\alpha' = (\sum_i X_i) + 1/2$ and $\beta' = 1/n$. Hence $\lambda \mid X \sim \text{Beta}(\sum_i X_i + 1/2, 1/n)$.
In Example 11.9, verify 11.11.
See Example 11.9 for definitions.
We seek to show the Horwitz-Thompson estimator:
$$\hat{\psi} = \frac{1}{n} \sum_{i=1}^n \frac{R_iY_i}{\xi_{X_i}}$$is unbiased with variance not exceeding $\frac{1}{n\delta^2}$.
Solution: Noting that $\mathbb{E}\left[ \frac{R_iY_i}{\xi_{X_i}} \right] = \mathbb{E}\left[ \frac{R_jY_j}{\xi_{X_j}} \right]$ for all $i,j \in \{1,\dots, n\}$, we may establish that $\hat{\psi}$ is unbiased: \begin{align*} \mathbb{E}(\hat{\psi}) &= \frac{1}{n} \sum_{j=1}^n \mathbb{E}\left[ \frac{R_jY_j}{\xi_{X_j}} \right] \\ &= \mathbb{E}\left[ \frac{R_iY_i}{\xi_{X_i}} \right] \\ &= \frac{1}{\xi_{X_i}}\mathbb{E}\left[R_iY_i \right] \\ &= \frac{1}{\xi_{X_i}} \xi_{X_i} \theta_{X_i} \\ &= \theta_{X_i} = P(Y_i = 1) = \psi. \end{align*}
Similarly noting that $\mathbb{V}\left[ \frac{R_iY_i}{\xi_{X_i}} \right] = \mathbb{V}\left[ \frac{R_jY_j}{\xi_{X_j}} \right]$ for all $i,j \in \{1,\dots, n\}$, we may bound the variance:
\begin{align*} \mathbb{V}(\hat{\psi}) &= \mathbb{V} \left[\frac{1}{n} \sum_{i=1}^n \frac{R_iY_i}{\xi_{X_i}} \right] \\ &= \frac{1}{n^2} \mathbb{V} \left[\sum_{i=1}^n \frac{R_iY_i}{\xi_{X_i}} \right] \\ &= \frac{1}{n} \mathbb{V} \left[ \frac{R_iY_i}{\xi_{X_i}} \right] \tag{Independence + above observation}\\ &= \frac{1}{n} \left[\mathbb{E}\left[ \left(\frac{R_iY_i}{\xi_{X_i}} \right)^2\right] - \psi^2\right]. \\ &\le \frac{1}{n} \mathbb{E}\left[ \left(\frac{R_iY_i}{\xi_{X_i}} \right)^2\right] \tag{$\psi \ge 0$} \\ &= \frac{1}{n\xi_{X_i}^2} \mathbb{E} [R_i^2Y_i^2] \tag{Linearity of Expectation}\\ &\le \frac{1}{n\delta^2} \mathbb{E} [R_i^2Y_i^2] \tag{$\xi_{X_i} \ge \delta$}\\ &\le \frac{1}{n\delta^2} \tag{$R_iY_i \le 1$} \end{align*}Let $X \sim N(\mu, 1)$. Consider testing
$$H_0 : \mu = 0 \text{ versus } H_1 : \mu \ne 0$$Take $P(H_0) = P(H_1) = \frac12$. Let the prior for $\mu$ under $H_1$ be $\mu \sim N(0, b^2)$. Find an expression for $P(H_0 \mid X = x)$. Compare $P(H_0 \mid X = x)$ to the $p$-value of the Wald test. Do the comparison numerically for a variety of values of $x$ and $b$. Now repeat the problem using a sample of size $n$. You will see that the posterior probability of $H_0$ can be large even when the $p$-value is small, especially when $n$ is large. This disagreement between Bayesian and frequentist testing is called the Jeffreys-Lindley paradox.
Solution:
The MLE is $\bar{X}$, with standard error $\sqrt{\sigma/n} = n^{-1/2}$. So the Wald statistic is $W = \sqrt{n}\bar{X}$, with $p$-value $2 \Phi(-|W|)$.
We have
\begin{align*} P(H_0 \mid X^n) &= \frac{f(X^n \mid H_0) P(H_0)}{f(X^n \mid H_0) P(H_0) + f(X^n \mid H_1) P(H_1)} \\ &= \frac{f(X^n \mid H_0)}{f(X^n \mid H_0) + f(X^n \mid H_1)} \\ &= \frac{f(X^n \mid \mu = 0)}{f(X^n \mid \mu = 0) + \int f(X^n \mid \mu) f(\mu) \, d\mu} \\ &= \frac{1}{1 + \gamma} \end{align*}where
$$\gamma = \frac{\int f(X^n \mid \mu) f(\mu)}{f(X^n \mid \mu = 0)}.$$The integrand is:
\begin{align*} f(X^n \mid \mu) f(\mu) &= (2 \pi)^{-n/2} \exp \left\{ -\frac12 \sum_i (X_i - \mu)^2 \right\} (2 \pi b^2)^{-1/2} \exp \left\{ -\frac12 (\mu / b)^2 \right\} \\ &= \frac{1}{b} (2 \pi)^{-(n+1)/2} \exp \left\{ -\frac12 \left[\sum_i (X_i - \mu)^2 + (\mu / b)^2 \right] \right\}. \end{align*}Letting
$$\sigma^2 = \frac{b^2}{1 + nb^2},$$we can expand and complete the square:
\begin{align*} \sum_i (X_i - \mu)^2 + (\mu / b)^2 &= \frac{1}{b^2} \mu^2 + \sum_i [X_i^2 - 2X_i\mu + \mu^2] \\ &= \left(\frac{1}{b^2} + n\right)\mu^2 - 2n\bar{X}\mu + \sum_i X_i^2 \\ &= \frac{1}{\sigma^2}\mu^2 - 2n\bar{X}\mu + \sum_i X_i^2 \\ &= \frac{1}{\sigma^2}\left(\mu^2 - 2n\sigma^2\bar{X}\mu\right) + \sum_i X_i^2 \\ &= \frac{1}{\sigma^2}\left(\mu^2 - n\sigma^2\bar{X}\right)^2 + \sum_i X_i^2 - (n\sigma\bar{X})^2. \\ \end{align*}Thus, we can express the integrand as the product of a constant and a density:
\begin{align*} f(X^n \mid \mu) f(\mu) &= \frac{1}{b}(2 \pi)^{-(n+1)/2} \exp \left\{ -\frac{\left(\mu^2 - n\sigma^2\bar{X}\right)^2}{2\sigma^2} \right\} \exp \left\{ -\frac12 \left[ \sum_i X_i^2 - (n\sigma\bar{X})^2 \right] \right\} \\ &= \frac{\sigma}{b}(2 \pi)^{-n/2}\exp \left\{ -\frac12 \left[\sum_i X_i^2 - (n\sigma\bar{X})^2 \right] \right\} \cdot (2 \pi \sigma^2)^{-1/2} \exp \left\{ -\frac{\left(\mu^2 - n\sigma^2\bar{X}\right)^2}{2\sigma^2} \right\}. \end{align*}Meaning:
$$\int f(X^n \mid \mu) f(\mu) = \frac{\sigma}{b}(2 \pi)^{-n/2}\exp \left\{ -\frac12 \left[\sum_i X_i^2 - (n\sigma\bar{X})^2 \right] \right\}$$Meanwhile,
\begin{align*} f(X^n \mid \mu = 0) &= (2\pi)^{-n/2} \exp\left\{-\frac{1}{2} \sum_i X_i^2 \right\} \end{align*}Thus,
$$ \frac{\int f(X^n \mid \mu) f(\mu)}{f(X^n \mid \mu = 0)} = \frac{\sigma}{b} \exp \left\{ (n\sigma\bar{X})^2 / 2 \right\},$$and
\begin{align*} P(H_0 \mid X^n) &= \frac{1}{1 + \frac{\sigma}{b} \exp \left\{(n\sigma\bar{X})^2 / 2 \right\}} \end{align*}for n in [10, 100, 1000]:
X = norm.rvs(loc=0.1, scale=1, size=n)
X_bar = X.mean()
p_value = 2 * norm.cdf(-np.sqrt(n) * np.abs(X_bar))
print(f"n = {n}, p-value: {p_value:.3f}")
b = np.logspace(-4, 5, num=50, base=10)
sigma_2 = b ** 2 / (1 + n * (b ** 2))
z = (np.sqrt(sigma_2) / b) * np.exp(( np.sqrt(sigma_2) * n * X_bar) ** 2 / 2)
posterior = 1 / (1 + z)
plt.plot(b, posterior, label=f"n={n}")
plt.xscale('log')
plt.xlabel('b')
plt.ylabel("Posterior Probability of $H_0$")
plt.legend()
plt.grid()
plt.show()
n = 10, p-value: 0.008 n = 100, p-value: 0.193 n = 1000, p-value: 0.000