import numpy as np
import pandas as pd
import math
from numpy.random import default_rng
rng = default_rng(42)
from scipy.stats import norm
import matplotlib as mpl
from matplotlib import pyplot as plt
import plotly.graph_objects as go
from tqdm.notebook import tqdm

mpl.rcParams['font.size'] = 18


Y = np.array([576, 635, 558, 578, 666, 580, 555, 661, 
                 651, 605, 653, 575, 545, 572, 594]) # LSAT
Z = np.array([3.39, 3.30, 2.81, 3.03, 3.44, 3.07, 3.00, 3.43,
                3.36, 3.13, 3.12, 2.74, 2.76, 2.88, 3.96]) # GPA
n_1 = Y.size
n_2 = Z.size

def corr(Y, Z):
    Y_bar = np.mean(Y)
    Z_bar = np.mean(Z)
    return np.sum((Y - Y_bar) * (Z - Z_bar)) / np.sqrt(np.sum(np.power((Y - Y_bar), 2)) * np.sum(np.power((Z - Z_bar), 2)))
theta_hat = corr(Y, Z)
print(f"Estimate of correlation coefficient: {theta_hat:.3f}")

B = 10000
corr_boot = np.empty(B)
for i in range(B):
    indices = rng.choice(list(range(n_1)), size=n_1, replace=True)
    Y_B = Y[indices]
    Z_B = Z[indices]
    corr_boot[i] = corr(Y_B, Z_B)
se = math.sqrt(np.var(corr_boot))
print(f"Bootstrap estimate of standard error: {se:.3f}")

Estimate of correlation coefficient: 0.546
Bootstrap estimate of standard error: 0.196


alpha = 0.05
z = norm.ppf(1-alpha/2)
print("95% confidence intervals:")
print(f"Normal method: ({corr(Y,Z) - z * se:.3f}, {corr(Y,Z) + z * se:.3f})")

a_hat = 2 * theta_hat - np.quantile(corr_boot, 1 - alpha / 2)
b_hat = 2 * theta_hat - np.quantile(corr_boot, alpha / 2)
print(f"Pivot method: ({a_hat:.3f}, {b_hat:.3f})")

lb = np.quantile(corr_boot, alpha / 2)
ub = np.quantile(corr_boot, 1 - alpha / 2)
print(f"Percentile method: ({lb:.3f}, {ub:.3f})")

95% confidence intervals:
Normal method: (0.162, 0.930)
Pivot method: (0.153, 0.894)
Percentile method: (0.198, 0.939)


n = 50

def skew(x):
    mu_bar = np.mean(x)
    sigma = np.std(x, ddof=1)
    return np.sum((x - mu_bar) ** 3) / (n * sigma ** 3)

Y = rng.normal(loc=0, scale=1, size=n)
X = np.exp(Y)
theta_hat = skew(X)
print(f'Estimated skewness: {theta_hat:.3f}')

Estimated skewness: 3.365


def get_bootstrap_se(X):
    B = 10000
    skew_boot = np.empty(B)
    for i in range(B):
        X_boot = rng.choice(X, size=n, replace=True)
        skew_boot[i] = skew(X_boot)
    return math.sqrt(np.var(skew_boot)), skew_boot

se, skew_boot = get_bootstrap_se(X)
print(f'Bootstrap standard error: {se:.3f}')

alpha = 0.05
z = norm.ppf(1-alpha/2)
print("95% confidence intervals:")
print(f"Normal method: ({theta_hat - z * se:.3f}, {theta_hat + z * se:.3f})")

a_hat = 2 * theta_hat - np.quantile(skew_boot, 1 - alpha / 2)
b_hat = 2 * theta_hat - np.quantile(skew_boot, alpha / 2)
print(f"Pivot method: ({a_hat:.3f}, {b_hat:.3f})")

lb = np.quantile(skew_boot, alpha / 2)
ub = np.quantile(skew_boot, 1 - alpha / 2)
print(f"Percentile method: ({lb:.3f}, {ub:.3f})")

Bootstrap standard error: 0.817
95% confidence intervals:
Normal method: (1.764, 4.966)
Pivot method: (2.168, 5.542)
Percentile method: (1.188, 4.562)


m = 100 # num trials
normal_coverage = 0
pivot_coverage = 0
percentile_coverage = 0

alpha = 0.05
z = norm.ppf(1-alpha/2)
theta = (math.e + 2) * math.sqrt(math.e - 1)

for j in range(m):
    Y = rng.normal(loc=0, scale=1, size=n)
    X = np.exp(Y)
    theta_hat = skew(X)
    se, skew_boot = get_bootstrap_se(X)
    if (theta_hat - z * se <= theta) and (theta <= theta_hat + z * se):
        normal_coverage += 1
        
    a_hat = 2 * theta_hat - np.quantile(skew_boot, 1 - alpha / 2)
    b_hat = 2 * theta_hat - np.quantile(skew_boot, alpha / 2)
    if (a_hat <= theta) and (theta <= b_hat):
        pivot_coverage += 1
    
    lb = np.quantile(skew_boot, alpha / 2)
    ub = np.quantile(skew_boot, 1 - alpha / 2)
    if (lb <= theta) and (theta <= ub):
        percentile_coverage += 1
print(f'Normal Coverage: {100 * normal_coverage / m}%')
print(f'Pivot Coverage: {100 * pivot_coverage / m}%')
print(f'Percentile Coverage: {100 * percentile_coverage / m}%')

Normal Coverage: 11.0%
Pivot Coverage: 14.0%
Percentile Coverage: 1.0%


from scipy.stats import t

theta = (t.ppf(.75, df=3) - t.ppf(0.25, df=3)) / 1.34
print(f"Theta: {theta:.3f}")

n = 25
X = rng.standard_t(df=3, size=n)

def T(x):
    q_b = np.quantile(x, 0.75)
    q_a = np.quantile(x, 0.25)
    return (q_b - q_a) / 1.34

theta_hat = T(X)
print(f"Theta_hat: {theta_hat:.3f}")

print("Getting bootstrap standard error...")
def get_bootstrap(X):
    n = X.size
    B = 1000
    T_boot = np.empty(B)
    for i in range(B):
        X_boot = rng.choice(X, size=n, replace=True)
        T_boot[i] = T(X_boot)
    return T_boot

T_boot = get_bootstrap(X)

theta_hat = T(X)
se = math.sqrt(np.var(T_boot))
print(f"Bootstrap standard error: {se:.3f}")

def get_confidence_intervals(theta_hat, T_boot, show=False):
    
    se = math.sqrt(np.var(T_boot))
    
    alpha = 0.05
    z = norm.ppf(1 - alpha / 2)
    normal_ci = (theta_hat - z * se, theta_hat + z * se)
    
    a_hat = 2 * theta_hat - np.quantile(T_boot, 1 - alpha / 2)
    b_hat = 2 * theta_hat - np.quantile(T_boot, alpha / 2)
    pivot_ci = (a_hat, b_hat)
    
    lb = np.quantile(T_boot, alpha / 2)
    ub = np.quantile(T_boot, 1 - alpha / 2)
    percentile_ci = (lb, ub)
    if show:
        print("95% confidence intervals:")
        print(f"Normal method: ({normal_ci[0]:.3f}, {normal_ci[1]:.3f})")
        print(f"Pivot method: ({pivot_ci[0]:.3f}, {pivot_ci[1]:.3f})")
        print(f"Percentile method: ({percentile_ci[0]:.3f}, {percentile_ci[1]:.3f})")
    return normal_ci, pivot_ci, percentile_ci

normal_ci, pivot_ci, percentile_ci = get_confidence_intervals(theta_hat, T_boot, show=True)

Theta: 1.142
Theta_hat: 1.378
Getting bootstrap standard error...
Bootstrap standard error: 0.330
95% confidence intervals:
Normal method: (0.731, 2.025)
Pivot method: (0.822, 2.027)
Percentile method: (0.729, 1.935)


results = {'Normal LB':[], 'Normal UB':[], 'Pivot LB':[], 'Pivot UB':[], 'Percentile LB':[], 'Percentile UB':[]}
for m in range(100):
    X = rng.standard_t(df=3, size=n)
    theta_hat = T(X)
    T_boot = get_bootstrap(X)
    normal_ci, pivot_ci, quantile_ci = get_confidence_intervals(theta_hat, T_boot)
    results['Normal LB'] += [normal_ci[0]]
    results['Normal UB'] += [normal_ci[1]]
    results['Pivot LB'] += [pivot_ci[0]]
    results['Pivot UB'] += [pivot_ci[1]]
    results['Percentile LB'] += [quantile_ci[0]]
    results['Percentile UB'] += [quantile_ci[1]]
df = pd.DataFrame.from_records(results)
for method in ['Normal', 'Pivot', 'Percentile']:
    df[f'{method} Length'] = df[f'{method} UB'] - df[f'{method} LB']
    df[f'{method} Cover'] = (theta <= df[f'{method} UB']) & (df[f'{method} LB'] <= theta)


summary_data = []
for method in ['Normal', 'Pivot', 'Percentile']:
    summary_data.append([method, df[f'{method} Length'].mean(), df[f'{method} Cover'].mean()])
summary = pd.DataFrame(summary_data, columns=['Method','Average Length','Coverage'])
summary


mu = 5
n = 100
theta = math.e ** mu
X = rng.normal(loc=mu, scale=1, size=n)
X_bar = np.mean(X)
math.e ** X_bar

def T(x):
    x_bar = np.mean(x)
    return math.e ** x_bar

B = 100000
T_boot = np.empty(B)
for i in range(B):
    X_boot = rng.choice(X, size=n, replace=True)
    T_boot[i] = T(X_boot)

theta_hat = T(X)
se = math.sqrt(np.var(T_boot))

alpha = 0.05
z = norm.ppf(1 - alpha / 2)

print(f"True Value: {theta:.3f}")
print(f"Estimate: {theta_hat:.3f}")
print(f"Normal 95% CI: ({theta_hat - z * se:.3f}, {theta_hat + z * se:.3f})")

True Value: 148.413
Estimate: 167.502
Normal 95% CI: (134.503, 200.500)


def theta_cdf(x):
    return norm.cdf(math.log(x), loc=5, scale = np.sqrt(1 / n))

bins = np.linspace(50, 200, 150)
theta_cdf_bins = list(map(theta_cdf, bins))
theta_cdf_bins_delta = np.empty(len(bins))
theta_cdf_bins_delta[0] = 0
theta_cdf_bins_delta[1:] = np.diff(theta_cdf_bins)


fig = go.Figure(data=go.Histogram(x=T_boot, 
                                  opacity=0.5, 
                                  histnorm='probability density', 
                                  name='Bootstrap Replications'))

fig.add_trace(go.Scatter(x=bins, y=theta_cdf_bins_delta, name='True Sampling Distribution'))

fig.update_layout(
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="right",
        x=0.99
    ),
    
)
fig.show()


n = 50
theta = 1
X = rng.uniform(low=0, high=theta, size=n)
theta_hat = max(X)

B = 1000
T_boot = np.empty(B)
for i in range(B):
    X_boot = rng.choice(X, size=n, replace=True)
    T_boot[i] = max(X_boot)

xx = np.linspace(0, theta, 100)

def theta_hat_density(x):
    return (n / (theta ** n)) * (x ** (n - 1))

theta_hat_density = list(map(theta_hat_density, xx))

fig = go.Figure(data=go.Histogram(x=T_boot, 
                                  opacity=0.5, 
                                  histnorm='probability density', 
                                  name='Bootstrap Replications'))

fig.add_trace(go.Scatter(x=xx, y=theta_hat_density, name='True Sampling Distribution'))

fig.show()

1¶

2¶

3¶

4¶

5¶

6¶

7¶

8¶

	Method	Average Length	Coverage
0	Normal	1.302371	0.97
1	Pivot	1.287781	0.84
2	Percentile	1.287781	0.97