import math
from matplotlib import pyplot as plt
import numpy as np
import plotly.graph_objects as go
from scipy.stats import norm

np.random.seed(42)


nsim = int(1e5)
n = int(1e2)
p = np.random.random(5)
p /= sum(p)


def get_obs(nsim, n, p):
    """
    Performs the following n times:
        - Generate nsim random variables from a uniform distribution.
        - Returns the resulting histogram, where the bins are defined
            by the cumulative sum of p.
    """
    data = np.random.random((nsim, n))
    bins = np.append([0], p.cumsum())
    obs = np.apply_along_axis(lambda x: np.histogram(x, bins)[0], 1, data)
    return obs


result = get_obs(nsim, n, p)
print(p)
result

[0.13319703 0.33810082 0.26031769 0.21289984 0.05548463]

array([[18, 32, 26, 19,  5],
       [12, 35, 26, 24,  3],
       [14, 26, 34, 21,  5],
       ...,
       [14, 33, 28, 18,  7],
       [18, 31, 14, 32,  5],
       [ 7, 35, 27, 24,  7]])


k = 2
mu = np.array([1, 1])
sigma = np.array([[1, 3 / 5], 
                  [3 / 5, 2]])
nsim = int(1e5)

def get_samples(mu, sigma, nsim):
    k = mu.size
    u_1 = np.random.uniform(size=((nsim // 2) + 1, k))
    u_2 = np.random.uniform(size=((nsim // 2) + 1, k))
    z_1 = np.sqrt(-2 * np.log(u_1)) * np.cos(2 * np.pi * u_2)
    z_2 = np.sqrt(-2 * np.log(u_1)) * np.sin(2 * np.pi * u_2)
    z = np.concatenate((z_1, z_2), axis=0)[:nsim]

    L = np.linalg.cholesky(sigma) # cholesky factor of sigma

    return mu + z @ L.T

z = get_samples(mu, sigma, nsim)


import plotly.graph_objects as go

fig = go.Figure(
        go.Histogram2d(
            x=z[:, 0],
            y=z[:, 1]
        )
    )

fig.update_layout(
    width=500,
    height=500,
    paper_bgcolor="LightSteelBlue",
    xaxis_range=[max(-5, min(z[:,0])), 
                 min(max(z[:,0]), 5)],
    yaxis_range=[max(-5, min(z[:,1])), 
                 min(max(z[:,1]), 5)]
)

fig.show()


mu = np.array([3, 8])
sigma = np.array([
    [1, 1],
    [1, 2],
])
n = 100
X = get_samples(mu, sigma, nsim=n)
plt.grid()
plt.scatter(x=X[:, 0], y=X[:, 1])
plt.show()


mu_hat = np.mean(X, axis=0)
sigma_hat = (X - mu_hat).T @ (X - mu_hat) / (n - 1)
sigma_hat_numpy = np.cov(X, rowvar=False)
assert np.allclose(sigma_hat, sigma_hat_numpy)

print(f"mu_hat:")
print(f"{mu_hat}")
print(f"sigma_hat: ")
print(f"{sigma_hat}")

mu_hat:
[3.04491859 8.00680898]
sigma_hat: 
[[0.9948494  1.14602857]
 [1.14602857 2.32019661]]


s_1 = np.sqrt(np.power(np.linalg.norm(X[:, 0] - mu_hat[0]), 2) / (n - 1))
s_2 = np.sqrt(np.power(np.linalg.norm(X[:, 1] - mu_hat[1]), 2) / (n - 1))
rho_hat = np.sum((X[:, 0] - mu_hat[0]) * (X[:, 1] - mu_hat[1])) / ((n-1) * s_1 * s_2)
rho_hat_numpy = np.corrcoef(X[:,0], X[:, 1])[0,1]
assert np.isclose(rho_hat, rho_hat_numpy)
print(f"Sample correlation: {rho_hat:.3f}")

Sample correlation: 0.754


def get_rho_hat(X):
    mu_hat = np.mean(X, axis=0)
    s_1 = np.sqrt(np.power(np.linalg.norm(X[:, 0] - mu_hat[0]), 2) / (n - 1))
    s_2 = np.sqrt(np.power(np.linalg.norm(X[:, 1] - mu_hat[1]), 2) / (n - 1))
    sample_cov = np.sum((X[:, 0] - mu_hat[0]) * (X[:, 1] - mu_hat[1]))
    return sample_cov / ((n - 1) * s_1 * s_2)

def get_bootstrap_ci(X):
    rho_hat = get_rho_hat(X)
    B = 1000
    T = np.empty(B)
    for i in range(B):
        X_star = X[np.random.choice(X.shape[0], size=n, replace=True)]
        T[i] = get_rho_hat(X_star)
    se_boot = np.sqrt(np.var(T))
    lb = rho_hat - z * se_boot
    ub = rho_hat + z * se_boot
    return (lb, ub)

z = norm.ppf(1 - 0.05 / 2)
ci_boot = get_bootstrap_ci(X)
print(f"Normal 95% CI: ({ci_boot[0]:.3f}, {ci_boot[1]:.3f})")

Normal 95% CI: (0.670, 0.839)


def get_fishers_method_ci(X):
    rho_hat = get_rho_hat(X)
    theta_hat = (1 / 2) * (math.log(1 + rho_hat) - math.log(1 - rho_hat))
    se_theta_hat = 1 / math.sqrt(n - 3)
    a = theta_hat - z * se_theta_hat
    b = theta_hat + z * se_theta_hat
    lb = (math.exp(2 * a) - 1) / (math.exp(2 * a) + 1)
    ub = (math.exp(2 * b) - 1) / (math.exp(2 * b) + 1)
    return (lb, ub)

ci_fisher = get_fishers_method_ci(X)
print(f"Fisher's Method 95% CI: ({ci_fisher[0]:.3f}, {ci_fisher[1]:.3f})")

Fisher's Method 95% CI: (0.655, 0.828)


boot_ci_width= ci_boot[1] - ci_boot[0]
fisher_ci_width = ci_fisher[1] - ci_fisher[0]
print(f"Bootstrap C.I. width: {boot_ci_width:.3f}")
print(f"Fisher's Method C.I. width: {fisher_ci_width:.3f}")

Bootstrap C.I. width: 0.169
Fisher's Method C.I. width: 0.173


rho = math.sqrt(2) / 2

K = 1000
coverage = {"Bootstrap": np.zeros(K),
            "Fisher": np.zeros(K)}
width = {"Bootstrap": np.zeros(K),
         "Fisher": np.zeros(K)}

for k in range(K):
    X = get_samples(mu=mu, sigma=sigma, nsim=100)
    ci_boot = get_bootstrap_ci(X)
    ci_fisher = get_fishers_method_ci(X)

    if (ci_boot[0] <= rho) and (rho <= ci_boot[1]):
        coverage["Bootstrap"][k] = 1

    if (ci_fisher[0] <= rho) and (rho <= ci_fisher[1]):
        coverage["Fisher"][k] = 1

    width["Bootstrap"][k] = ci_boot[1] - ci_boot[0]
    width["Fisher"][k] = ci_fisher[1] - ci_fisher[0]


print(f"Bootstrap Coverage: {100 * coverage['Bootstrap'].mean():.1f}%")
print(f"Fisher's Method Coverage: {100 * coverage['Fisher'].mean():.1f}%")

print(f"Bootstrap C.I. Avg. Width: {width['Bootstrap'].mean():.4f}%")
print(f"Fisher's Method C.I. Avg. Width: {width['Fisher'].mean():.4f}%")

Bootstrap Coverage: 95.4%
Fisher's Method Coverage: 96.0%
Bootstrap C.I. Avg. Width: 0.1967%
Fisher's Method C.I. Avg. Width: 0.2001%

1¶

2¶

3¶

4¶

5¶

6¶