import matplotlib.pyplot as plt
import numpy as np
from numpy.random import default_rng
import pandas as pd
from scipy.integrate import quad
from scipy.stats import gamma, norm, uniform
from scipy.optimize import minimize

np.set_printoptions(formatter={"float": "{: 0.3f}".format})
np.random.seed(0)


def h(x):
    return np.where(a <= x, np.where(x <= b, 1, 0), 0)


N = 100_000
a = 1
b = 2
X = norm.rvs(loc=0, scale=1, size=N)
Y = h(X)
I_hat = np.mean(Y)
s2 = (np.linalg.norm(Y - I_hat) ** 2) / (N - 1)  # sample variance
se = np.sqrt(s2) / np.sqrt(N)  # estimated standard error
print(f"I_hat : {I_hat:.5f}, estimated standard error: {se:.3e}")
print(f"Approximate 95% C.I.: ({I_hat - 2 * se:.3e}, {I_hat + 2 * se:.3e})")

I_hat : 0.13457, estimated standard error: 1.079e-03
Approximate 95% C.I.: (1.324e-01, 1.367e-01)


I = norm.cdf(2) - norm.cdf(1)
se_true = np.sqrt(I * (1 - I) / (N - 1))
print(f"True standard error: {se_true:.3e}")
print(f"Estimated standard error: {se:.3e}")
print(f"Difference: {se_true - se:.3e}")

True standard error: 1.084e-03
Estimated standard error: 1.079e-03
Difference: 4.503e-06


def f(x):
    return norm.pdf(x, loc=0, scale=1)


def g(x):
    return norm.pdf(x, loc=1.5, scale=v**2)


fig, ax = plt.subplots(figsize=(14, 4), nrows=1, ncols=3)

estimates = {}
for i, v in enumerate([0.1, 1, 10]):
    X = norm.rvs(loc=1.5, scale=v**2, size=N)
    Y = (h(X) * f(X)) / g(X)
    estimates[v] = np.mean(Y)
    ax[i].hist(Y, bins=100)
    ax[i].set_title(f"$v$ = {v}")
print(estimates)
fig.suptitle("Histograms for $Y_i$", fontsize=14)
plt.show()

{0.1: 0.013070056030923984, 1: 0.13611405986928374, 10: 0.131887993546821}


m = 1.5


def f(x):
    return (v / np.sqrt(2 * np.pi)) * np.exp(
        -(x**2) + ((x - m) ** 2) / (2 * (v**2))
    )


vs = [0.1, 1, 10]
ses = {}
for v in vs:
    true_value, _ = quad(f, 1, 2)
    sigma_y_2 = true_value - I**2
    sigma_y_2 = sigma_y_2
    ses[v] = sigma_y_2 / np.sqrt(N)
pd.DataFrame.from_dict({"Standard Error": ses}, orient="index").T


vv = np.logspace(-1, 1)
sigma_y_2s = {}
ses = {}
for i, v in enumerate(vv):

    def f(x):
        return (v / np.sqrt(2 * np.pi)) * np.exp(
            -(x**2) + ((x - m) ** 2) / (2 * (v**2))
        )

    true_value, _ = quad(f, 1, 2)
    sigma_y_2 = true_value - I**2
    sigma_y_2s[v] = sigma_y_2
    ses[v] = sigma_y_2 / np.sqrt(N)

plt.plot(ses.keys(), ses.values())
plt.xscale("log")
plt.yscale("log")
plt.xlabel("$v$")
plt.ylabel("Standard Error")
plt.grid()
plt.show()


optimal_v = list(ses.keys())[np.array(list(ses.values())).argmin()]
print(f"Optimal v: {optimal_v:.3e}, Standard Error: {ses[optimal_v]:.3e}")

Optimal v: 3.393e-01, Standard Error: 3.548e-05


N = 100_000

Y = norm.rvs(loc=0, scale=1, size=N)
X = norm.rvs(loc=Y, scale=np.sqrt(1 + Y**2))


def f(x, y):
    coef = 1 / (2 * np.pi * np.sqrt(1 + y**2))
    exp = -(1 / 2) * ((x - y) ** 2 / (1 + y**2) + y**2)
    return coef * np.exp(exp)


def w(x):
    return norm.pdf(x, loc=5, scale=10)


xx = np.arange(-10, 10, step=0.05)
f_X = np.zeros_like(xx)
for i, x in enumerate(xx):
    f_X[i] = np.nanmean(f(x, Y) * w(X) / f(X, Y))
plt.figure(figsize=(8, 4))
plt.plot(xx, f_X)
plt.xlabel("x")
plt.ylabel("Estimate of f_X")
plt.grid()
plt.show()


from scipy.stats import cauchy, uniform


def f(x):
    return norm.pdf(x)


def g(x):
    return 1 / (1 + x**2)


np.random.seed(0)
M = 1
N = 1000
counter = 0
Ys = []
while counter < N:
    X = cauchy.rvs()
    U = uniform.rvs()
    if U <= f(X) / (M * g(X)):
        Ys.append(X)
    counter += 1
plt.hist(Ys, histtype="step", bins=20, label="samples", density=True)
xx = np.linspace(-3, 3)
plt.plot(xx, norm.pdf(xx), label="N(0,1)")
plt.legend()
plt.show()


from numpy.random import default_rng
from scipy.integrate import quad


def f(z):
    exponent = (
        -theta_1 * z
        - theta_2 / z
        + 2 * np.sqrt(theta_1 * theta_2)
        + np.log(np.sqrt(2 * theta_2))
    )
    return (z ** (-3 / 2)) * np.exp(exponent)


def g(z, a):
    return gamma.pdf(z, a)


theta_1 = 1.5
theta_2 = 2
mu = np.sqrt(theta_2 / theta_1)
nu = np.sqrt(theta_1 / theta_2) + 1 / (2 * theta_2)
# get normalization constant numerically
C, _ = quad(f, a=-1e-6, b=1e3)


def independence_MCMC(func, n, a):
    x = gamma.rvs(a=a, random_state=rng)  # initialize x
    cursor = 0
    X = np.zeros(n)
    while cursor < n:
        y = gamma.rvs(a=a, random_state=rng)  # get proposal
        r = min(1, func(y) / func(x) * g(x, a=a) / g(y, a=a))
        if uniform.rvs(random_state=rng) < r:
            x = y
            X[cursor] = x
            cursor += 1
    return X


def plot_results(values):
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))
    ax[0].plot(values, mu_hats, label="Estimated")
    ax[0].hlines(mu, min(values), max(a_values), color="orange", label="True")
    ax[0].set_xlabel("a")
    ax[0].legend()
    ax[0].set_title("Mean")

    ax[1].plot(values, nu_hats, label="Estimated")
    ax[1].hlines(
        nu,
        min(values),
        max(values),
        color="orange",
        label="True",
    )
    ax[1].set_xlabel("a")
    ax[1].legend()
    ax[1].set_title("Mean of Inverse")
    plt.show()


n = 1000
a_values = np.linspace(0.3, 2, 21)
samples = np.zeros((len(a_values), n))
mu_hats = np.zeros_like(a_values)
nu_hats = np.zeros_like(a_values)
print(f"Actual mu: {mu:.3f} \t Actual nu: {nu:.3f}")
print("a\t\tmu_hat\t\tnu_hat\tmu_hat_error\tnu_hat_error\ttotal_abs_error")
for i, a in enumerate(a_values):
    rng = default_rng(seed=0)
    samples[i] = independence_MCMC(f, n, a)

    mu_hats[i] = np.mean(samples[i])
    nu_hats[i] = np.mean(np.power(np.array(samples[i]), -1))
    print(
        f"{a:.3e}\t{mu_hats[i]:.3f}\t\t{nu_hats[i]:.3f}\t{abs(mu_hats[i] - mu):.3f}\t\t{abs(nu_hats[i] - nu):.3f}"
        f"\t\t{np.abs((mu_hats[i] - mu)) + np.abs((nu_hats[i] - nu)):.3f}"
    )
plot_results(a_values)

Actual mu: 1.155 	 Actual nu: 1.116
a		mu_hat		nu_hat	mu_hat_error	nu_hat_error	total_abs_error
3.000e-01	1.085		1.261	0.069		0.145		0.215
3.850e-01	1.085		1.216	0.070		0.100		0.170
4.700e-01	1.084		1.204	0.070		0.088		0.158
5.550e-01	1.092		1.182	0.063		0.066		0.129
6.400e-01	1.119		1.191	0.036		0.075		0.111
7.250e-01	1.113		1.195	0.041		0.079		0.120
8.100e-01	1.119		1.200	0.036		0.084		0.120
8.950e-01	1.124		1.191	0.031		0.075		0.106
9.800e-01	1.155		1.158	0.000		0.042		0.043
1.065e+00	1.185		1.138	0.030		0.022		0.053
1.150e+00	1.205		1.136	0.050		0.020		0.070
1.235e+00	1.201		1.146	0.046		0.030		0.076
1.320e+00	1.210		1.133	0.055		0.017		0.072
1.405e+00	1.205		1.146	0.051		0.030		0.080
1.490e+00	1.187		1.134	0.033		0.018		0.051
1.575e+00	1.212		1.120	0.057		0.004		0.061
1.660e+00	1.220		1.114	0.065		0.002		0.068
1.745e+00	1.232		1.092	0.077		0.024		0.101
1.830e+00	1.244		1.084	0.089		0.032		0.121
1.915e+00	1.256		1.062	0.101		0.054		0.155
2.000e+00	1.261		1.067	0.106		0.049		0.155


rng = default_rng(seed=0)
Z_ind = independence_MCMC(f, n, a=0.98)
mu_hat_ind = np.mean(Z_ind)
nu_hat_ind = np.mean(1 / Z_ind)
plt.hist(Z_ind, bins=50, density=True, label="Samples")
xx = np.linspace(1e-3, 5, 100)
plt.plot(xx, f(xx) / C, label="True density")
plt.legend()
plt.show()


def f_W(z):
    return np.exp(z) * f(np.exp(z))


def random_walk_MCMC(func, n, b):
    x = 0  # initialize x
    cursor = 0
    W = np.zeros(n)
    while cursor < n:
        eps = norm.rvs(scale=b, random_state=rng)  # get proposal
        y = x + eps
        r = min(1, func(y) / func(x))
        if uniform.rvs(random_state=rng) < r:
            x = y
            W[cursor] = x
            cursor += 1
    return W


b_values = np.logspace(-3, 1, 20)
samples = np.zeros((len(b_values), n))
mu_hats = np.zeros_like(b_values)
nu_hats = np.zeros_like(b_values)
print(f"Actual mu: {mu:.3f} \t Actual nu: {nu:.3f}")

print("b\t\tmu_hat\t\tnu_hat\tmu_hat_error\tnu_hat_error\ttotal_abs_error")
for i, b in enumerate(b_values):
    rng = default_rng(seed=0)
    samples[i] = np.exp(random_walk_MCMC(f_W, n, b))
    mu_hats[i] = np.mean(samples[i])
    nu_hats[i] = np.mean(np.power(np.array(samples[i]), -1))
    print(
        f"{b:.3e}\t{mu_hats[i]:.3f}\t\t{nu_hats[i]:.3f}\t{abs(mu_hats[i] - mu):.3f}\t\t{abs(nu_hats[i] - nu):.3f}"
        f"\t\t{np.abs((mu_hats[i] - mu)) + np.abs((nu_hats[i] - nu)):.3f}"
    )

Actual mu: 1.155 	 Actual nu: 1.116
b		mu_hat		nu_hat	mu_hat_error	nu_hat_error	total_abs_error
1.000e-03	0.988		1.013	0.167		0.103		0.270
1.624e-03	0.980		1.021	0.175		0.095		0.270
2.637e-03	0.968		1.034	0.187		0.082		0.269
4.281e-03	0.949		1.056	0.206		0.060		0.266
6.952e-03	0.922		1.091	0.233		0.025		0.259
1.129e-02	0.884		1.146	0.270		0.030		0.300
1.833e-02	0.830		1.257	0.325		0.141		0.466
2.976e-02	0.964		1.175	0.191		0.059		0.249
4.833e-02	1.109		1.087	0.046		0.029		0.074
7.848e-02	1.217		1.042	0.062		0.074		0.137
1.274e-01	1.178		1.069	0.023		0.047		0.071
2.069e-01	1.136		1.094	0.019		0.022		0.041
3.360e-01	1.148		1.140	0.007		0.024		0.030
5.456e-01	1.147		1.116	0.008		0.000		0.008
8.859e-01	1.177		1.105	0.023		0.011		0.034
1.438e+00	1.174		1.130	0.020		0.014		0.034
2.336e+00	1.180		1.168	0.025		0.052		0.078
3.793e+00	1.157		1.175	0.002		0.059		0.060
6.158e+00	1.188		1.140	0.033		0.024		0.057
1.000e+01	1.247		1.144	0.093		0.028		0.120


rng = default_rng(seed=0)
Z_rw = np.exp(random_walk_MCMC(f_W, n, b=5.456e-01))
mu_hat_rw = np.mean(Z_rw)
nu_hat_rw = np.mean(1 / Z_rw)

plt.hist(Z_rw, bins=50, density=True, label="Samples")
xx = np.linspace(1e-3, 5, 100)
plt.plot(xx, f(xx) / C, label="True density")
plt.legend()
plt.show()


results = {
    "mu": {
        "actual": mu,
        "Independence MCMC Estimate": mu_hat_ind,
        "Random-Walk MCMC Estimate": mu_hat_rw,
    },
    "nu": {
        "actual": nu,
        "Independence MCMC Estimate": nu_hat_ind,
        "Random-Walk MCMC Estimate": nu_hat_rw,
    },
}
pd.DataFrame.from_dict(results, orient="index")


data = pd.read_csv(
    "data/coris.dat",
    skiprows=3,
    names="sbp tobacco ldl adiposity famhist typea obesity alcohol age chd".split(),
)
target = "chd"
features = data.columns.drop(target)
X, Y = data[features].values, data[target].values

# add a constant to design matrix
features = features.insert(0, "constant")
X = np.insert(X, 0, values=np.ones_like(X.shape[0]), axis=1)

n = int(1e4)  # desired sample size


from scipy.special import expit


def log_likelihood(beta):
    logits = X @ beta
    y_pred = expit(logits)
    y_true = Y
    epsilon = 1e-15  # small value to avoid taking the logarithm of zero

    y_pred = np.clip(
        y_pred, epsilon, 1 - epsilon
    )  # clip values to avoid extreme probabilities

    likelihood = np.sum(
        y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)
    )

    return likelihood


res = minimize(fun=lambda beta: -log_likelihood(beta), x0=np.zeros(10))
my_beta_star = res.x
import statsmodels.api as sm

model = sm.Logit(Y, X)
result = model.fit()
stats_models_beta_star = result.params
assert np.isclose(my_beta_star, stats_models_beta_star, atol=1e-4).all()

Optimization terminated successfully.
         Current function value: 0.510974
         Iterations 6


def gibbs_metropolis(n, sigmas):
    k = len(features)
    acceptance_rates = np.zeros(k)
    B = np.zeros((n, k))
    for i in range(n):
        if i > 0:
            B[i] = B[i - 1]
        for j in range(k):
            B_current = B[i].copy()
            B_ij_current = B[i, j]
            Z = norm.rvs(loc=B_ij_current, scale=sigmas[j])
            B_proposal = B_current.copy()
            B_proposal[j] = Z

            r = min(
                np.exp(log_likelihood(B_proposal))
                / np.exp(log_likelihood(B_current)),
                1,
            )
            if uniform.rvs() < r:
                B[i, j] = Z
                acceptance_rates[j] += 1
    return B, acceptance_rates / n


# sigmas selected heuristically so that acceptance rates are ~50%.
sigmas = np.zeros(10)
sigmas[0] = 0.23
sigmas[1] = 0.0015
sigmas[2] = 0.04
sigmas[3] = 0.05
sigmas[4] = 0.01
sigmas[5] = 0.3
sigmas[6] = 0.005
sigmas[7] = 0.01
sigmas[8] = 0.005
sigmas[9] = 0.005

samples, acceptance_rates = gibbs_metropolis(n, sigmas)


pd.DataFrame.from_dict(
    dict(zip(features, acceptance_rates)),
    columns=["Acceptance Rates"],
    orient="index",
)


fig, ax = plt.subplots(nrows=2, ncols=5, figsize=(20, 8))
for index, feature_name in enumerate(features):
    ax[index // 5][index % 5].hist(samples[:, index], histtype="step", bins=30)
    ax[index // 5][index % 5].set_title(feature_name)
plt.show()


posterior_mean, lb, ub = (
    samples.mean(axis=0),
    np.quantile(samples, 0.025, axis=0),
    np.quantile(samples, 0.975, axis=0),
)
posterior = pd.DataFrame(
    {"mean": posterior_mean, "95% LB": lb, "95% UB": ub},
    index=features,
)
plt.figure(figsize=(10, 5))
plt.bar(posterior.index, posterior["mean"], label="Posterior Mean")
plt.vlines(
    posterior.index,
    ymin=posterior["95% LB"],
    ymax=posterior["95% UB"],
    color="black",
    label="95% Posterior Interval",
)
plt.grid()
plt.legend()
plt.show()


res = minimize(fun=lambda beta: -log_likelihood(beta), x0=np.zeros(10))
frequentist_beta = res.x

plt.figure(figsize=(10, 5))
plt.bar(posterior.index, frequentist_beta)
plt.title("Maximum Likelihood Estimates")
plt.grid()
plt.show()


bayesian_beta = posterior_mean
k = len(features)
for j in range(0, k):
    plt.plot(
        [frequentist_beta[j], bayesian_beta[j]],
        [0, 1],
        color="black",
        marker="o",
    )
plt.yticks([0, 1], ["Frequentist", "Bayesian"])
plt.grid()
plt.show()

	Standard Error
0.1	0.263522
1.0	0.000121
10.0	0.001649

	actual	Independence MCMC Estimate	Random-Walk MCMC Estimate
mu	1.154701	1.154551	1.147062
nu	1.116025	1.158405	1.116451

1¶

2¶

3¶

4¶

5¶

	Acceptance Rates
constant	0.5036
sbp	0.5256
tobacco	0.4716
ldl	0.4446
adiposity	0.4400
famhist	0.5273
typea	0.4536
obesity	0.4500
alcohol	0.6196
age	0.4850