import numpy as np
import pandas as pd
import math
from numpy.random import default_rng
rng = default_rng(42)
from scipy.stats import norm
import matplotlib as mpl
from matplotlib import pyplot as plt
import plotly.graph_objects as go
from tqdm.notebook import tqdm

mpl.rcParams['font.size'] = 18


alpha = 54
beta = 11
n = 1000
X = rng.gamma(alpha, beta, size = n)
X_bar = np.mean(X)
X2_bar = np.mean(np.power(X, 2))
alpha_hat = X_bar ** 2 / (X2_bar - X_bar ** 2) 
beta_hat = (X2_bar - X_bar ** 2) / X_bar
print(f"True parameter values: alpha: {alpha:.0f} beta: {beta:.0f}")
print(f"Method of Moments estimators (n={n}): alpha_hat: {alpha_hat:.3f}, beta_hat: {beta_hat:.3f}")

True parameter values: alpha: 54 beta: 11
Method of Moments estimators (n=1000): alpha_hat: 54.032, beta_hat: 10.932


a = 3
b = 7
n = 100000
X = rng.uniform(low=a, high=b, size = n)
X_bar = np.mean(X)
X2_bar = np.mean(np.power(X, 2))
a_hat = X_bar - np.sqrt(3 * (X2_bar - X_bar ** 2))
b_hat = X_bar + np.sqrt(3 * (X2_bar - X_bar ** 2))
print(f"True parameter values: a: {a:.0f} b: {b:.0f}")
print(f"Method of Moments estimators (n={n}): a_hat: {a_hat:.3f}, b_hat: {b_hat:.3f}")

True parameter values: a: 3 b: 7
Method of Moments estimators (n=100000): a_hat: 3.005, b_hat: 7.002


n = 10
k = 1000000 # one million simulations
a = 1
b = 3
mu = (a + b) / 2
X = rng.uniform(low=a, high=b, size=(k,n))
a_hat = np.min(X, axis=1)
b_hat = np.max(X, axis=1)
tau_hat= (a_hat + b_hat) / 2
mse = np.mean(np.power(tau_hat - mu, 2))
print(f"MSE of MLE for tau: {mse:.3f}")

MSE of MLE for tau: 0.015


X = np.array([
    3.23, -2.50,  1.88, -0.68,  4.43, 0.17,
    1.03, -0.07, -0.01,  0.76,  1.76, 3.18,
    0.33, -0.31,  0.30, -0.61,  1.52, 5.43,
    1.54,  2.28,  0.42,  2.33, -1.03, 4.00,
    0.39   
])


# computing the MLE of tau, tau_hat = x_bar + z_{0.05} * s
def tau(X):
    x_bar = np.mean(X)
    s = np.std(X)
    z = norm.ppf(0.95)
    return x_bar + z * s


tau_hat = tau(X)
print(f'MLE of tau: {tau_hat:.3f}')

# computing standard error of tau_hat
# using the delta method
n = X.size
x_bar = np.mean(X)
s = np.std(X)
z = norm.ppf(0.95)
se_delta = s * math.sqrt((1 + ((z ** 2) / 2)) / n)
print(f'Standard error (delta method): {se_delta:.3f}')

# using the parametric bootstrap
B = 1000
tau_boot = np.empty(B)
x_bar = np.mean(X)
s = np.std(X)
for i in range(B):
    X_boot = rng.normal(loc=x_bar, scale=s, size=n)
    tau_boot[i] = tau(X_boot)
se_boot = math.sqrt(np.sum(np.power(tau_boot - tau_hat, 2)) / B)
print(f'Standard error (bootstrap method): {se_boot:.3f}')

MLE of tau: 4.180
Standard error (delta method): 0.558
Standard error (bootstrap method): 0.548


theta = 0.3
n = 100
X = rng.normal(loc=theta, scale=1, size=n)
X_bar = np.mean(X)
psi_hat = norm.cdf(X_bar)
alpha = 0.05
z = norm.ppf(1 - alpha / 2)
se = norm.pdf(X_bar) * n ** (-0.5)
print(f"95% CI for psi: ({psi_hat - z * se:.3f},{psi_hat + z * se:.3f})")

95% CI for psi: (0.486,0.641)


n_1 = 200
n_2 = n_1
X_1 = 160
X_2 = 148
p_1_hat = X_1 / n_1
p_2_hat = X_2 / n_2
psi_hat = p_1_hat - p_2_hat

alpha = 0.10
z = norm.ppf(1 - alpha / 2)
se_delta  = math.sqrt((p_1_hat * (1 - p_1_hat) / n_1) + (p_2_hat * (1 - p_2_hat) / n_2))
print(f"90% confidence interval (delta method): ({psi_hat - z * se_delta:.3f}, {psi_hat + z * se_delta:.3f})")

# parametric bootstrap method
B = 10000
psi_boot = np.empty(B)
for i in range(B):
    X_1_boot = rng.binomial(n_1, p_1_hat)
    X_2_boot = rng.binomial(n_2, p_2_hat)
    psi_boot[i] = X_1_boot / n_1 + X_2_boot / n_2
se_boot = math.sqrt(np.var(psi_boot))
print(f"90% confidence interval (bootstrap method): ({psi_hat - z * se_boot:.3f}, {psi_hat + z * se_boot:.3f})")

90% confidence interval (delta method): (-0.009, 0.129)
90% confidence interval (bootstrap method): (-0.009, 0.129)


mu = 5
n = 100
X = rng.normal(mu, scale=1, size=n)
X_bar = np.mean(X)
theta = np.e ** 5
theta_hat = np.e ** X_bar

alpha = 0.05
z = norm.ppf(1 - alpha / 2)
print(f"True value of theta: {theta:.3f}")
print(f"MLE: {theta_hat:.3f}")


# delta method
se_delta = np.sqrt(1 / n) * np.abs(np.e ** X_bar)
print(f"{100 * (1 - alpha):.0f}% C.I. (delta method): ({theta_hat - z * se_delta:.3f}, {theta_hat + z * se_delta:.3f})")

# parametric bootstrap
B = 100000
theta_param_boot = np.empty(B)
for i in range(B):
    X_boot = rng.normal(X_bar, scale=1, size=n)
    theta_param_boot[i] = np.e ** np.mean(X_boot)
se_param_boot = np.std(theta_param_boot)
print(f"{100 * (1 - alpha):.0f}% C.I. (parametric bootstrap): ({theta_hat - z * se_param_boot:.3f}, {theta_hat + z * se_param_boot:.3f})")

# nonparametric bootstrap
theta_boot = np.empty(B)
for i in range(B):
    X_boot = rng.choice(X, size=n, replace=True)
    theta_boot[i] = np.e ** np.mean(X_boot)
se_boot = np.std(theta_boot)
print(f"{100 * (1 - alpha):.0f}% C.I. (nonparametric bootstrap): ({theta_hat - z * se_boot:.3f}, {theta_hat + z * se_boot:.3f})")

True value of theta: 148.413
MLE: 143.331
95% C.I. (delta method): (115.239, 171.424)
95% C.I. (parametric bootstrap): (115.071, 171.592)
95% C.I. (nonparametric bootstrap): (118.237, 168.425)


def theta_cdf(x):
    return norm.cdf(math.log(x), loc=mu, scale = np.sqrt(1 / n))

bins = np.linspace(50, 200, 150)
theta_cdf_bins = list(map(theta_cdf, bins))
theta_cdf_bins_delta = np.empty(len(bins))
theta_cdf_bins_delta[0] = 0
theta_cdf_bins_delta[1:] = np.diff(theta_cdf_bins)

fig = go.Figure()
fig.add_trace(go.Histogram(x=theta_param_boot, 
                               opacity=0.5, 
                               histnorm='probability density', 
                               name='Parametric Bootstrap'))
fig.add_trace(go.Scatter(x=bins, y=theta_cdf_bins_delta, name='True Sampling Distribution'))
fig.show()

fig = go.Figure()
fig.add_trace(go.Histogram(x=theta_boot, 
                               opacity=0.5, 
                               histnorm='probability density', 
                               name='Nonparametric Bootstrap'))
fig.add_trace(go.Scatter(x=bins, y=theta_cdf_bins_delta, name='True Sampling Distribution'))
fig.show()

fig = go.Figure()
fig.add_trace(go.Scatter(x=bins, y=norm.pdf(bins, loc=theta_hat, scale=se_delta), name='Delta Approximation'))
fig.add_trace(go.Scatter(x=bins, y=theta_cdf_bins_delta, name='True Sampling Distribution'))
fig.show()


n = 50
theta = 1

# true distribution of theta_hat
def theta_hat_density(x):
    return (n / (theta ** n)) * (x ** (n - 1))
xx = np.linspace(0, theta, 100)
theta_hat_density = list(map(theta_hat_density, xx))

# get data
X = rng.uniform(low=0, high=theta, size=n)
theta_hat = max(X)

# bootstrap
B = 1000
T_boot = np.empty(B)
for i in range(B):
    X_boot = rng.choice(X, size=n, replace=True)
    T_boot[i] = max(X_boot)
    
# parametric bootstrap
T_param_boot = np.empty(B)
for i in range(B):
    X_param_boot = rng.uniform(0, theta_hat, size=n)
    T_param_boot[i] = max(X_param_boot)

# plot nonparametric bootstrap vs. true density
fig = go.Figure(data=go.Histogram(x=T_boot, 
                                  nbinsx=10,
                                  opacity=0.5, 
                                  histnorm='probability density', 
                                  name='Nonparametric Bootstrap Replications'))
fig.add_trace(go.Scatter(x=xx, y=theta_hat_density, name='True Sampling Distribution'))
fig.update_xaxes(range=[0.85, 1.05])
fig.show()

# plot nonparametric bootstrap vs. true density
fig = go.Figure(data=go.Histogram(x=T_param_boot, 
                                  nbinsx=10,
                                  opacity=0.5, 
                                  histnorm='probability density', 
                                  name='Parametric Bootstrap Replications'))
fig.add_trace(go.Scatter(x=xx, y=theta_hat_density, name='True Sampling Distribution'))
fig.update_xaxes(range=[0.85, 1.05])
fig.show()

1¶

2¶

3¶

4¶

5¶

6¶

7¶

8¶

9¶

10¶