import math
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import norm, chi2, beta as Beta
plt.style.use("default.mplstyle")
Prove Theorem 21.5:
The risk of $\hat{f}$ is
$$R(J) = \sum_{j=1}^J \frac{\sigma_j^2}{n} + \sum_{j=J+1}^{\infty} \beta_j^2.$$Solution:
Recall the squared bias-variance decomposition of the mean integrated squared error (MISE), aka risk:
\begin{align*} R(J) &= \int \mathbb{V}(\hat{f}(x)) + \left(b(x)\right)^2 \, dx. \end{align*}After computing the bias:
\begin{align*} b(x) &= f(x) - \mathbb{E}[\hat{f}(x)] \\ &= \sum_{j=1}^{\infty}\beta_j \phi_j(x) - \sum_{j=1}^J\mathbb{E}[\hat{\beta}_j]\phi_j(x) \\ &= \sum_{j=J+1}^{\infty}\beta_j \phi_j(x) \tag{$\hat{\beta}_j$ is unbiased}\\ \end{align*}then the variance:
\begin{align*} \mathbb{V}[\hat{f}(x)] &= \mathbb{V}\left[ \sum_{j=1}^J \hat{\beta}_j \phi_j(x)\right] \\ &= \sum_{j=1}^J \phi_j^2(x) \mathbb{V}[\hat{\beta}_j] + \sum_{j=1}^J \sum_{k=1}^J \text{Cov}[\hat{\beta}_j, \hat{\beta}_k] \phi_j(x) \phi_k(x) \\ &= \sum_{j=1}^J \phi_j^2(x) \frac{\sigma_j^2}{n} + \sum_{j=1}^J \sum_{k=1}^J \text{Cov}[\hat{\beta}_j, \hat{\beta}_k] \phi_j(x) \phi_k(x) \\ \end{align*}we may express the risk
\begin{align*} R(J) &= \int \left[\sum_{j=1}^J \phi_j^2(x) \frac{\sigma_j^2}{n} + \sum_{j=1}^J \sum_{k=1}^J \text{Cov}[\hat{\beta}_j, \hat{\beta}_k] \phi_j(x) \phi_k(x) + \sum_{j=J+1}^{\infty}\beta_j^2 \phi_j^2(x) + \sum_{j=J+1}^{\infty} \sum_{k=J+1, k\ne j}^{\infty} \beta_j \beta_{k} \phi_j(x) \phi_k(x) \right] \, dx \\ &= \sum_{j=1}^J \frac{\sigma_j^2}{n} \int \phi_j^2(x) \, dx + \sum_{j=1}^J \sum_{k=1}^J \text{Cov}[\hat{\beta}_j, \hat{\beta}_k] \int \phi_j(x) \phi_k(x) + \sum_{j=J+1}^{\infty} \beta_j^2 \int \phi_j^2(x) + \sum_{j=J+1}^{\infty} \sum_{k=J+1, k\ne j}^{\infty} \beta_j \beta_{k} \int \phi_j(x) \phi_k(x) \, dx \\ &= \sum_{j=1}^J \frac{\sigma_j^2}{n} + \sum_{j=J+1}^{\infty} \beta_j^2. \tag{orthonormality of basis functions} \end{align*}Prove Theorem 21.9:
The risk $R(J)$ of the estimator $\hat{r}_n(x) = \sum_{j=1}^J \hat{\beta}_j \phi_j(x)$ is
$$R(J) = \frac{J\sigma^2}{n} + \sum_{j=J+1}^{\infty} \beta_j^2$$Solution: This follows the same lines as the previous question. From the bias-variance decomposition:
\begin{align*} R(J) &= \int \mathbb{V}[\hat{r}(x)] + b(x)^2 \, dx \end{align*}\begin{align*} b(x) &= r(x) - \mathbb{E}[\hat{r}(x)] \\ &= \sum_{j=1}^{\infty} \beta_j \phi_j(x) - \sum_{j=1}^J \mathbb{E}[\hat{\beta}_j]\phi_j(x) \\ &= \sum_{j=J+1}^{\infty} \beta_j \phi_j(x) \tag{$\hat{\beta}_j$ is unbiased} \end{align*}\begin{align*} \mathbb{V}[\hat{r}(x)] &= \mathbb{V}\left[\sum_{j=1}^J \hat{\beta}_j \phi_j(x) \right] \\ &= \sum_{j=1}^J \mathbb{V}[\hat{\beta_j}]\phi_j^2(x) - \sum_{j=1}^J \sum_{k=1}^J \phi_j(x) \phi_k(x) \text{Cov}[\hat{\beta}_j, \hat{\beta}_k] \end{align*}Thus,
\begin{align*} R(J) &= \int \left[ \sum_{j=1}^J \mathbb{V}[\hat{\beta_j}]\phi_j^2(x) - \sum_{j=1}^J \sum_{k=1}^J \phi_j(x) \phi_k(x) \text{Cov}[\hat{\beta}_j, \hat{\beta}_k] + \sum_{j=J+1}^{\infty} \beta_j^2 \phi_j^2(x) \right] \, dx \\ &= \sum_{j=1}^J \mathbb{V}[\hat{\beta_j}]\int \phi_j^2(x) \, dx - \sum_{j=1}^J \sum_{k=1}^J \text{Cov}[\hat{\beta}_j, \hat{\beta}_k] \int \phi_j(x) \phi_k(x) \, dx + \sum_{j=J+1}^{\infty} \beta_j^2 \int \phi_j^2(x) \, dx \\ &= \frac{J\sigma^2}{n} + \sum_{j=J+1}^{\infty} \beta_j^2 \end{align*}Let
$$\psi_1 = \left(\frac{1}{\sqrt{3}}, \frac{1}{\sqrt{3}}, \frac{1}{\sqrt{3}} \right), \psi_2 = \left(\frac{1}{\sqrt{2}}, -\frac{1}{\sqrt{2}}, 0 \right), \psi_1 = \left(\frac{1}{\sqrt{6}}, \frac{1}{\sqrt{6}}, -\frac{2}{\sqrt{6}} \right).$$Show that these vectors have norm 1 and are orthogonal.
Solution:
Computing norms:
\begin{align*} ||\psi_1|| &= \left( \left(\frac{1}{\sqrt{3}} \right)^2 + \left(\frac{1}{\sqrt{3}} \right)^2 + \left(\frac{1}{\sqrt{3}} \right)^2 \right)^{1/2} = \left( \frac13 + \frac13 + \frac13 \right)^{1/2} = 1 \\ ||\psi_2|| &= \left( \left(\frac{1}{\sqrt{2}} \right)^2 + \left(-\frac{1}{\sqrt{2}} \right)^2 + 0^2 \right)^{1/2} = \left( \frac12 + \frac12 + 0\right)^{1/2} = 1 \\ ||\psi_3|| &= \left( \left(\frac{1}{\sqrt{6}} \right)^2 + \left(\frac{1}{\sqrt{6}} \right)^2 + \left(-\frac{2}{\sqrt{6}} \right)^2 \right)^{1/2} = \left( \frac16 + \frac16 + \frac23 \right)^{1/2} = 1 \\ \end{align*}Taking inner products (noting, by commutativity of the inner product, we need only show three such products):
\begin{align*} \langle \psi_1, \psi_2 \rangle = \frac{1}{\sqrt{3}}\frac{1}{\sqrt{2}} + \frac{1}{\sqrt{3}}\left(-\frac{1}{\sqrt{2}}\right) + \frac{1}{\sqrt{3}}(0) = 0 \\ \langle \psi_1, \psi_3 \rangle = \frac{1}{\sqrt{3}}\frac{1}{\sqrt{6}} + \frac{1}{\sqrt{3}}\frac{1}{\sqrt{6}} + \frac{1}{\sqrt{3}}\left(-\frac{2}{\sqrt{6}} \right) = 0 \\ \langle \psi_2, \psi_3 \rangle = \frac{1}{\sqrt{2}}\frac{1}{\sqrt{6}} + \left(-\frac{1}{\sqrt{2}}\right)\frac{1}{\sqrt{6}} + 0\left(-\frac{2}{\sqrt{6}} \right) = 0 \\ \end{align*}Prove Parseval's relation equation (21.6):
$$||f||^2 \equiv \int f^2(x) \, dx = \sum_{j=1}^{\infty} \beta_j^2 \equiv ||\beta||^2$$Solution:
\begin{align*} \int f^2(x) \, dx &= \int \sum_{j=1}^{\infty} \beta_j^2 \phi_j^2(x) - \sum_{j=1}^{\infty}\sum_{k=1}^{\infty} \beta_j \beta_k \phi_j(x) \phi_k(x) \, dx \\ &= \sum_{j=1}^{\infty} \beta_j^2 \int \phi_j(x) \, dx - \sum_{j=1}^{\infty} \sum_{k=1}^{\infty} \beta_j \beta_k \int \phi_j(x) \phi_k(x) \, dx \tag{$f \in L_2$, plus Fubini's theorem} \\ &= \sum_{j=1}^{\infty} \beta_j^2 \tag{Orthonormality of basis functions} \end{align*}Plot the first five Legendre polynomials. Verify, numerically, that they are orthonormal.
Solution:
The first few are given in the text:
We compute a fourth using the recursion relation (with $j=3$):
\begin{align*} P_4(x) &= \frac{(2 \cdot 3 + 1)xP_3(x) - 3P_2(x)}{4} \\ &= \frac{7x \left[\frac{1}{2}\left(5x^3 - 3x\right)\right] - 3\left[\frac{1}{2}\left(3x^2 - 1\right)\right]}{4} \\ &=\frac{1}{8}\left(35x^4 - 30x^2 + 3\right) \end{align*}Plotting the functions (after normalization):
def get_polynomial(x, coefficients):
yy = np.zeros(len(x))
for order, value in enumerate(coefficients):
yy += value * np.power(x, order)
return yy
legendre_coefficients = {"P_0": [1],
"P_1": [0, 1],
"P_2": [-1/2, 0, 3/2],
"P_3": [0, -3/2, 0, 5/2],
"P_4": [3/8, 0, -30/8, 0, 35/8]}
fig = plt.figure(figsize=(10, 6))
x = np.linspace(-1, 1, 100)
for j, (key, coefs) in enumerate(legendre_coefficients.items()):
normalized_coefs = np.array(coefs) * math.sqrt((2 * j + 1) / 2) # normalize
y = get_polynomial(x, normalized_coefs)
plt.plot(x, y, label=f"${key}(x)$")
plt.legend()
plt.show()
Verifying orthonormality:
N = int(1e6)
x = np.linspace(-1, 1, N + 1)
for j_a, (key_a, value_a) in enumerate(legendre_coefficients.items()):
value_a = np.array(value_a) * math.sqrt((2 * j_a + 1) / 2) # normalization
for j_b, (key_b, value_b) in enumerate(legendre_coefficients.items()):
if key_b > key_a:
value_b = np.array(value_b) * math.sqrt((2 * j_b + 1) / 2) # normalization
y_a = get_polynomial(x, value_a)
y_b = get_polynomial(x, value_b)
print(f"<{key_a},{key_b}> = {(2 / (N)) * np.sum(y_a * y_b):.3f}")
<P_0,P_1> = -0.000 <P_0,P_2> = 0.000 <P_0,P_3> = -0.000 <P_0,P_4> = 0.000 <P_1,P_2> = -0.000 <P_1,P_3> = 0.000 <P_1,P_4> = -0.000 <P_2,P_3> = -0.000 <P_2,P_4> = 0.000 <P_3,P_4> = -0.000
Expand the following functions in the cosine basis on [0, 1]. For (a) and (b), find the coefficients $\beta_j$ analytically. For $(c)$ and $(d)$, find the coefficients $\beta_j$ numerically, i.e.
$$\beta_j = \int_0^1 f(x) \phi_j(x) \approx \frac{1}{N} \sum_{r=1}^N f \left( \frac{r}{N} \right) \phi_j \left(\frac{r}{N}\right)$$for som large integer $N$. Then plot the partial sum $\sum_{j=1}^n \beta_j \phi_j(x)$ for increasing values of $n$.
(a) $f(x) = \sqrt{2} \cos(3\pi x)$
(b) $f(x) = \sin(\pi x)$
(c) $f(x) = \sum_{j=1}^{11} h_j K(x-t_j)$ where $K(t) = (1 + \text{sign}(t))/2$, $\text{sign}(x) = -1$ if $x < 0$, $\text{sign}(x) = 0$ if $x = 0$, and $\text{sign}(x) = 1$ if $x > 0$,
$$(t_j) = (.1, .13, .15, .23, .25, .40, .44, .65, .76, .78, .81),$$
$$(h_j) = (4, −5, 3, −4, 5, −4.2, 2.1, 4.3, −3.1, 2.1, −4.2).$$
(d) $f(x) = \sqrt{x(1-x)}\sin\left(\frac{2.1 \pi}{(x + 0.05)}\right)$.
Solution:
(a) Observe $f(x) = \phi_3(x)$. Thus, the coefficients are
$$ \beta_j = \begin{cases} 1 & j = 3 \\ 0 & \text{otherwise.} \\ \end{cases} $$(b)
\begin{align*} \beta_j &= \int_a^b f(x) \phi_j(x) \, dx \\ &= \sqrt{2} \int_0^1 \sin(\pi x) \cos(j \pi x) \, dx \\ \end{align*}Integrating by parts: \begin{align*} I = \int_0^1 \cos(j \pi x) \sin(\pi x) \, dx &= \left.\left[-\frac{\cos(j \pi x) \cos(\pi x)}{\pi}\right]\right\rvert_0^1 + j \int_0^1 \cos(\pi x) \sin(j \pi x) \, dx \\ &= \frac{\cos(j \pi) + 1}{\pi} - j(I_2), \end{align*}
where $I_2 = \int_0^1 \cos(\pi x) \sin(j \pi x) \, dx$. Applying integration by parts to $I_2$, we get:
\begin{align*} I_2 = \int_0^1 \cos(\pi x) \sin(j \pi x) \, dx &= \left.\left[\frac{\sin(j \pi x) \sin(\pi x)}{\pi} \right]\right\rvert_0^1 - j \int_0^1 \cos(j \pi x) \sin(\pi x) \, dx. \\ &= 0 + jI \end{align*}Therefore,
$$\beta_j = \sqrt{2} \int_0^1 \cos(j \pi x) \sin(\pi x) \, dx = \frac{\sqrt{2}(\cos(j \pi) + 1)}{\pi(1-j^2)}.$$def f_a(x):
return np.sqrt(2) * np.cos(3 * np.pi * x)
def f_b(x):
return np.sin(np.pi * x)
t = np.array([.1, .13, .15, .23, .25, .40, .44, .65, .76, .78, .81])
h = np.array([4, -5, 3, -4, 5, -4.2, 2.1, 4.3, -3.1, 2.1, -4.2])
def f_c(x):
return np.sum((np.sign(x - np.repeat(np.reshape(t, [-1, 1]), len(x), axis=1)) / 2).T * h, axis=1)
def f_d(x):
return np.sqrt(x * (1 - x)) * np.sin(2.1 * np.pi / (x + 0.05))
def beta_function_a(j):
return 1 if j == 3 else 0
def beta_function_b(j):
if j == 0:
return 2 / np.pi
elif j == 1:
return 0
else:
return (math.sqrt(2) * (np.cos(np.pi * j) + 1)) / (np.pi * (1 - j ** 2))
def cosine_phi(x, j):
"""cosine basis function on [0,1]"""
if j == 0:
return np.ones(len(x))
else:
return math.sqrt(2) * np.cos(j * np.pi * x)
class orthogonal_function_estimator():
def __init__(self, f, phi, beta_function=None):
self.f = f
self.phi = phi
N = 10000 # size of mesh
n = 1028 # number of coefficients to compute
self.beta = {}
if beta_function is None:
inputs = np.linspace(0, 1, N)
for j in range(n):
self.beta[j] = (1 / N) * np.sum(f(inputs) * self.phi(inputs, j))
else:
for j in range(n):
self.beta[j] = beta_function(j)
def plot_f(self):
xx = np.linspace(0, 1, N)
plt.plot(xx, self.f(xx), label=f"$f(x)$")
def plot_phi(self, k):
xx = np.linspace(0, 1, N)
plt.plot(xx, phi(xx, k), label=f"$\phi_k(x)$")
def partial_sum(self, x, J):
y = np.zeros(len(x))
for j in range(0, J):
y = y + (self.beta[j] * self.phi(x, j))
return y
def plot(self):
_, ax = plt.subplots(nrows=1, ncols=4, figsize=(20,4))
xx = np.linspace(0, 1, N)
ax[0].plot(xx, self.f(xx))
ax[0].set_title("$f(x)$")
y_min = min(np.array(self.f(xx))) * 1.5
y_max = max(np.array(self.f(xx))) * 1.5
ax[0].set_ylim(y_min, y_max)
for index, J in enumerate([16, 64, 512]):
ax[index + 1].plot(xx, self.partial_sum(xx, J))
ax[index + 1].set_title("$\hat{f}$" + f"$[{J}](x)$")
ax[index + 1].set_ylim(y_min, y_max)
plt.show()
ofe = orthogonal_function_estimator(f_a, cosine_phi, beta_function_a)
ofe.plot()
ofe = orthogonal_function_estimator(f_b, cosine_phi, beta_function_b)
ofe.plot()
ofe = orthogonal_function_estimator(f_c, cosine_phi)
ofe.plot()
ofe = orthogonal_function_estimator(f_d, cosine_phi)
ofe.plot()
Consider the glass fragments data from the book's website. Let $Y$ be refractive index and let $X$ be aluminum content (the fourth variable).
(a) Do a nonparametric regression to fit the model $Y = f(x) + \epsilon$ using the cosine basis method. The data are not on a regular grid. Ignore this when estimating the function. (But do sort the data first according to $x$.) Provide a function estimate, an estimate of the risk, and a confidence band.
(b) Use the wavelet method to estimate $f$.
data = pd.read_csv("data/glass.dat", sep="\s+", usecols=["RI", "Al"])
Y = data["RI"].values
X = data["Al"].values
sorted_indices = X.argsort()
X = X[sorted_indices]
Y = Y[sorted_indices]
X = (X - min(X)) / (max(X) - min(X))
class orthogonal_function_regressor():
def __init__(self, x, y, phi):
self.x = x
self.y = y
self.phi = phi
self.n = 1000
self.beta = {}
for j in range(self.n):
self.beta[j] = np.mean(y * self.phi(x, j))
self.k = int(self.n / 4)
self.sigma_2_hat = (self.n / self.k) * np.sum(np.power(np.array([self.beta[j] for j in range(self.n-self.k+1, self.n)]), 2))
def risk_estimate(self, J):
R = (J * self.sigma_2_hat / self.n)
betas = np.array([self.beta[j] for j in range(J+1, self.n)])
terms = np.power(betas, 2) - self.sigma_2_hat / self.n
positive_terms = np.where(terms > 0, terms, 0)
R += np.sum(positive_terms)
return R
def partial_sum(self, x, J, confidence_band=False):
y = np.zeros(len(x))
for j in range(0, J):
y = y + (self.beta[j] * self.phi(x, j))
if confidence_band:
a = np.sqrt(np.sum([np.power(ofr.phi(x, j), 2) for j in range(5)], axis=0))
c = (a * np.sqrt(self.sigma_2_hat) * chi2.ppf(0.05, df=J)) / np.sqrt(self.n)
l = y - c
u = y + c
return y, l, u
else:
return y
ofr = orthogonal_function_regressor(X, Y, cosine_phi)
Js = np.arange(20)
plt.plot(Js, [ofr.risk_estimate(j) for j in Js], linewidth=1)
plt.xticks(Js)
plt.xlabel("$J$")
plt.ylabel("Estimated Risk")
plt.show()
xx = np.linspace(min(X), max(X), 1000)
plt.scatter(X, Y, alpha=0.25)
y, l, u = ofr.partial_sum(xx, 6, confidence_band=True)
plt.plot(xx, y, label="Orthogonal Function Regressor (J=6)")
plt.plot(xx, l, color='orange')
plt.plot(xx, u, color='orange', label="95% Confidence Band")
plt.xlabel("Aluminum Content")
plt.ylabel("Refractive Index")
plt.legend()
plt.show()
print(f"Estimated Risk: {ofr.risk_estimate(6):.3f}")
Estimated Risk: 25.075
Using the wavelet method:
def haar_phi(x):
return (np.where(((0 <= x) & (x < 1)), 1, 0)).astype(np.float64)
def haar_psi(x):
return np.where((x >= 0) & (x < 1), np.where((x < 1/2), -1, 1), 0)
def haar_psi_jk(x, j, k):
return (2 ** (j / 2)) * haar_psi(((2 ** j) * x) - k)
n = len(X)
alpha_hat = (haar_phi(X) @ Y) / n
J = int(np.ceil(np.log2(n)))
D = {}
for j in range(J):
for k in range(0, int(2 ** (J))):
D[j,k] = (haar_psi_jk(X, j, k) @ Y) / n
# universal thresholding
D_values = np.array([D[J - 1, k] for k in range(0, int(2 ** (J)))])
sigma_hat = np.median(np.abs(D_values)) / 0.6745
for j in range(J):
for k in range(0, int(2 ** (J ))):
if np.abs(D[j,k]) <= sigma_hat * np.sqrt(2 * np.log(n) / n):
D[j,k] = 0
x = np.linspace(0, 1, 1000)
partial_sum = alpha_hat * haar_phi(x)
for j in range(J):
for k in range(0, int((2 ** (J)))):
partial_sum += D[j,k] * haar_psi_jk(x, j, k)
plt.scatter(X,Y, alpha=0.5)
plt.plot(x, partial_sum)
plt.show()
Show that the Haar wavelets are orthonormal.
Solution:
The Haar scaling function is defined:
$$ \phi(x) = \begin{cases} 1 & \text{if} \,\, 0 \le x < 1 \\ 0 & \text{otherwise,} \\ \end{cases} $$the mother Haar wavelet is defined:
$$ \psi(x) = \begin{cases} -1 & \text{if} \,\, 0 \le x \le \frac12 \\ 1 & \text{if} \,\, \frac12 < x \le 1, \end{cases} $$and finally, for any integers $j$ and $k$, we define the daughter wavelet:
$$ \psi_{jk}(x) = 2 ^ {j / 2} \psi(2^j x - k).$$We first establish normality:
\begin{align*} ||\phi(x)||^2 &= \int \phi^2(x) \, dx = \int_0^1 \, dx = 1 \end{align*}\begin{align*} ||\psi(x)||^2 &= \int_{0}^{1/2} (-1)^2 + \int_{1/2}^1 1^2 \, dx = 1 \end{align*}\begin{align*} ||\psi_{jk}(x)||^2 &= \int_{2^{-j}k}^{(k + 1/2)2^{-j}} (-2^{-j/2})^2 \, dx + \int_{(k + 1/2)2^{-j}}^{(k + 1)2^{-j}} (2^{-j/2})^2 \, dx = 1. \end{align*}Orthogonality can be established by the following observations. If $(j_1, k_1) \ne (j_2, k_2)$, it must be the case that either the supports $\psi_{j_1k_1}$ and $\psi_{j_2k_2}$ are disjoint, or that the support of one is a subset of the other. In the first case, the pointwise product of the functions is zero, and hence so is the inner product. Suppose the second case holds, and WLOG, suppose $\text{supp}(\psi_{j_1k_1}) \subseteq \text{supp}(\psi_{j_2k_2})$. It must be the case that $\psi_{j_2k_2}$ is constant on $\text{supp}(\psi_{j_1k_1})$ (assuming a value of -1 or 1, depending on if the support of $\psi_{j_1k_1}$ is on the left or right side). In either case, the pointwise product yields a multiple of $\psi_{j_1k_1}$, which has integral 0.
Consider again the doppler signal:
$$f(x) = \sqrt{x(1 - x)} \sin \left( \frac{2.1\pi}{x + 0.05}\right).$$Let $n = 1,024$, $\sigma = 0.1$, and let $(x_1, \dots, x_n) = (1 / n, \dots, 1)$. Generate data
$$Y_i = f(x_i) + \sigma * \epsilon_i$$where $\epsilon_i \sim N(0, 1)$.
(a) Fit the curve using the cosine basis method. Plot the function estimate and confidence band for $J = 10, 20, \dots, 100$.
(b) Use Haar wavelets to fit the curve.
N = 1_024
sigma = 0.1
x = np.arange(1 / N, 1 + 1/N, 1/N)
eps = norm.rvs(size=N)
Y = f_d(x) + sigma * eps
# fitting using cosine basis method
def cosine_phi(x, j):
if j == 0:
return np.ones(len(x))
else:
return math.sqrt(2) * np.cos(j * np.pi * x)
ofr = orthogonal_function_regressor(x, Y, cosine_phi)
Js = np.arange(10, 110, 10)
fig, ax = plt.subplots(nrows=2, ncols=5, figsize=(22,8))
ax = ax.flatten()
for index, J in enumerate(Js):
xx = np.linspace(0, 1, 1000)
y_est, lb, ub = ofr.partial_sum(xx, J=J, confidence_band=True)
ax[index].plot(xx, y_est, color='blue')
ax[index].plot(xx, lb, color='orange')
ax[index].plot(xx, ub, color='orange')
ax[index].set_ylim(-1, 1)
ax[index].set_title(f"J={J}")
ax[index].grid(False)
ax[index].set_yticks([])
# computing the coefficients using the DWT for Haar Wavelets
n = len(Y)
J = int(np.ceil(np.log2(n)))
D = {x: None for x in range(J)}
temp = Y / np.sqrt(n)
for j in range(J - 1, -1, -1):
m = 2 ** j
I = np.array(list(range(1, m + 1))).astype(int)
D[j] = ((temp[2 * I - 1] - temp[np.array(2 * I) - 2]) / np.sqrt(2))
temp = (temp[2 * I - 1] + temp[np.array(2 * I) - 2]) / np.sqrt(2)
# applying universal thresholding
D_values = np.array([D[J-1][k] for k in range(0, int(2 ** (J-1)))])
sigma_hat = np.sqrt(n) * np.median(np.abs(D_values)) / 0.6745
for j in range(J):
for k in range(0, int(((2 ** (j))))):
if np.abs(D[j][k]) <= sigma_hat * np.sqrt(2 * np.log(n) / n):
D[j][k] = 0
# computing the approximation
xx = np.linspace(0, 1, 1_000)
alpha_hat = (haar_phi(x) @ Y) / n
partial_sum = alpha_hat * haar_phi(xx)
for j in range(J):
for k in range(0, int(2 ** j)):
partial_sum = partial_sum + D[j][k] * haar_psi_jk(xx, j, k)
plt.scatter(x, Y, alpha=0.03)
plt.plot(xx, partial_sum)
[<matplotlib.lines.Line2D at 0x7f0bc7e433d0>]
(Haar density Estimation.) Let $X_1, \dots, X_n \sim f$ for some density $f$ on $[0, 1]$. Let's consider constructing a wavelet histogram. Let $\phi$ and $\psi$ be the Haar father and mother wavelet. Write
$$f(x) \approx \phi(x) + \sum_{j=0}^{J-1}\sum_{k=0}^{2^j - 1} \beta_{j,k} \psi_{j,k}(x)$$where $J \approx \log_2(n)$. Let
$$\hat{\beta}_{jk} = \frac{1}{n} \sum_{i=1}^n \psi_{j,k}(X_i).$$(a) Show that $\hat{\beta}_{j,k}$ is an unbiased estimate of $\beta_{j,k}$.
(b) Define the Haar histogram
$$\hat{f}(x) = \phi(x) + \sum_{j=1}^B \sum_{k=0}^{2^j - 1} \hat{\beta}_{j,k} \psi_{j,k}(x)$$for $0 \le B \le J-1$.
(c) Find an approximate expression for the MSE as a function of $B$.
(d) Generate $n=1,000$ observations from a $\text{Beta}(15,4)$ density. Estimate the density using the Haar histogram. Use leave-one-out cross validation to choose $B$.
Solution:
Establishing unbiasedness. Note: we must assume $f \in L^2[0,1]$. This is the case if, e.g., $f$ has finite variance, or $f$ is continuous on $[0,1]$.
\begin{align*} \mathbb{E}[\hat{\beta}_{jk}] &= \frac{1}{n} \sum_{i=1}^n \mathbb{E}[\psi_{jk}(X_i)] \\ &= \mathbb{E}[\psi_{jk}(X_1)] \\ &= \int_0^1 \psi_{jk}(x) f(x) \, dx = \beta_{jk} \tag{Requires $f \in L^2[0,1]$} \end{align*}Finding an expression for the Mean Squared Error:
\begin{align*} \mathbb{E}[(\hat{f}(x) - f(x))^2] &= \mathbb{V}[\hat{f}(x) - f(x)] + \mathbb{E}[\hat{f}(x) - f(x)]^2 \\ &= \mathbb{V}[\hat{f}(x)] + \mathbb{E}[\hat{f}(x) - f(x)]^2 \\ \end{align*}\begin{align*} \mathbb{E}[\hat{f}(x) - f(x)] &= \left[\phi(x) + \sum_{j=0}^{J-1}\sum_{k=0}^{2^j - 1} \beta_{j,k} \psi_{j,k}(x) \right] - \left[\phi(x) + \sum_{j=1}^B \sum_{k=0}^{2^j - 1} \mathbb{E}[\hat{\beta}_{j,k}] \psi_{j,k}(x)\right] \\ &= \sum_{j=B+1}^{J-1}\sum_{k=0}^{2^j - 1} \beta_{j,k} \psi_{j,k}(x) \end{align*}\begin{align*} \mathbb{V}[\hat{f}(x)] &= \mathbb{V} \left[\phi(x) + \sum_{j=0}^{B}\sum_{k=0}^{2^j - 1} \hat{\beta}_{j,k} \psi_{j,k}(x) \right] \\ &= \sum_{j=0}^{B}\sum_{k=0}^{2^j - 1} \frac{\sigma^2_{jk}}{n}\psi_{j,k}^2(x) + \sum_{j=0}^{B}\sum_{k=0}^{2^j - 1}\sum_{j'=0, j' \ne j}^{B}\sum_{k'=0, k'\ne k}^{2^{j'} - 1} \text{Cov}[\hat{\beta}_{j,k}, \hat{\beta}_{j', k'}] \psi_{jk}(x) \psi_{j'k'}(x) \\ \end{align*}When integrating the squared bias plus the variance, the cross-terms cancel due to orthonormality, and we get:
\begin{align*} \text{MISE} = \sum_{j=B+1}^{J-1}\sum_{k=0}^{2^j - 1} \beta^2_{j,k} + \sum_{j=0}^{B}\sum_{k=0}^{2^j - 1} \frac{\sigma^2_{jk}}{n} \end{align*}def haar_histogram(X, B):
def compute_beta(x):
beta = {}
for j in range(B + 1):
for k in range(2 ** j):
beta[j,k] = np.sum(haar_psi_jk(x, j, k)) / n
return beta
beta = compute_beta(X)
def f_hat(z):
f_hat = haar_phi(z).astype(np.float64)
for j in range(B + 1):
for k in range(2 ** j):
f_hat += beta[j,k] * haar_psi_jk(z, j, k)
return f_hat
return f_hat
n = 1_000
J = np.round(np.log2(n)).astype(int)
X = Beta.rvs(a=15, b=4, size=n)
f_hat = haar_histogram(X, 4)
step = 1e-4
xx = np.arange(0, 1 + step, step)
plt.plot(xx, f_hat(xx))
plt.show()
def risk_of_haar_histogram(X, B):
f_hat = haar_histogram(X, B)
xx = np.arange(0, 1 + step, step)
second_moment = np.sum((f_hat(xx) ** 2) * step)
return second_moment - (2 / n) * np.sum([haar_histogram(np.delete(X, i), B)(X[i]) for i in range(n)])
risk = {}
for B in range(J+1):
risk[B] = risk_of_haar_histogram(X, B)
plt.plot(risk.keys(), risk.values())
plt.xlabel("$B$")
plt.ylabel("$\hat{J}$", rotation=0)
plt.title("Risk of Haar Histogram by $B$")
print(f"Risk minimizer: B={np.argmin(list(risk.values()))}")
Risk minimizer: B=4
plt.figure(figsize=(20, 8))
for index, B in enumerate(range(J)):
f_hat = haar_histogram(X, B)
ax = plt.subplot(2, 5, index + 1)
ax.plot(xx, f_hat(xx))
ax.grid(False)
ax.set_title(f"$B = {B}$, $J = {risk[B]:.3f}$")
plt.show()
In this question, we will explore the motivation for equation (21.37). Let $X_1, \dots, X_n \sim N(0, \sigma^2)$. Let
$$\hat{\sigma} = \sqrt{n} \times \frac{\text{median}(|X_1|, \dots, |X_n|)}{0.6745}.$$(a) Show that $\mathbb{E}[\hat{\sigma}] = \sigma$.
(b) Simulate $n = 100$ observations from a $N(0,1)$ distribution. Compute $\hat{\sigma}$ as well as the usual estimate of $\sigma$. Repeat 1,000 times and compare the MSE.
(c) Repeat (b) but add some outliers to the data. To do this, simulate each observation from a $N(0, 1)$ with probability 0.95 and simulate each observation from a $N(0, 10)$ with probability 0.05.
Solution:
Note: the equation is incorrect; we do not scale by $\sqrt{n}$. The estimate should be
$$\hat{\sigma} = \frac{\text{median}(|X_1|, \dots, |X_n|)}{0.6745}.$$Fact: the expected value of the sample median is the population median. That is, if $Y_1, \dots, Y_n \sim f$, $\mathbb{E}[\text{median}(Y_1, \dots, Y_n)] = F_{Y_1}^{-1}(1/2)$. The proof is nontrivial. Therefore,
$$\mathbb{E}[\hat{\sigma}] = \frac{F_{|X|}^{-1}(1/2)}{0.6745}$$where $X \sim N(0, \sigma^2)$. The result follows from observing $|X|$ follows a half-normal distribution with parameter $\sigma$, which has median
$$F_{|X|}^{-1}(1/2) = \sigma \sqrt{2} \text{erf}^{-1}(1/2) \approx 0.6745 \sigma.$$The new estimate for $\sigma$ improves on the MSE of the usual estimate:
$$\hat{\sigma}_{\text{old}} = \frac{1}{n-1}\sum_{i=1}^n (X_i - \bar{X})^2.$$n = 100
k = 1000
X = norm.rvs(size=(n, k))
sigma_hat_old = np.sum(np.power(X - np.mean(X, axis=0), 2), axis=0) / (n - 1)
sigma_hat_new = np.median(np.abs(X), axis=0) / 0.6745
mse_old = np.mean(np.power(sigma_hat_old - 1, 2))
mse_new = np.mean(np.power(sigma_hat_new - 1, 2))
print(f"MSE of old estimate: {mse_old:.4f}")
print(f"MSE of new estimate: {mse_new:.4f}")
MSE of old estimate: 0.0195 MSE of new estimate: 0.0132
The second experiment demonstrates the new estimate is much more robust to outliers:
rand_ints = np.random.randint(0, 100, size=(n, k))
mask_5_percent = rand_ints < 5
normal_1 = norm.rvs(0, 1, size=(n, k))
normal_10 = norm.rvs(0, 10, size=(n, k))
X = np.where(mask_5_percent, normal_10, normal_1)
sigma_hat_old = np.sum(np.power(X - np.mean(X, axis=0), 2), axis=0) / (n - 1)
sigma_hat_new = np.median(np.abs(X), axis=0) / 0.6745
mse_old = np.mean(np.power(sigma_hat_old - 1, 2))
mse_new = np.mean(np.power(sigma_hat_new - 1, 2))
print(f"MSE of old estimate: {mse_old:.4f}")
print(f"MSE of new estimate: {mse_new:.4f}")
MSE of old estimate: 41.2768 MSE of new estimate: 0.0183
Repeat question 6 using the Haar basis.
Solution:
(a)
The coefficient of a basis function is computed by taking the inner product of the basis function and the function of interest:
\begin{align*} \alpha = \int_0^1 f(x) \phi(x) \, dx. \end{align*}\begin{align*} \beta_{jk} = \int_0^1 f(x) \psi_{jk}(x) \, dx. \end{align*}\begin{align*} \alpha &= \sqrt{2} \int_0^1 \cos(3 \pi x) \, dx = \frac{\sqrt{2}}{3 \pi} \sin(3 \pi) = 0 \end{align*}\begin{align*} \beta_{jk} &= \sqrt{2} \cdot 2^{j/2} \left[ -\int_{2^{-j}k}^{2 ^ {-j} (k + 1/2)} \cos(3 \pi x) \, dx + \int_{2^{-j}(k {-j}+ 1/2)}^{2 ^ {-j} (k + 1)} \cos(3 \pi x) \, dx \right]\\ &= \frac{2^{(j + 1)/2}}{3 \pi}\left[\sin(3 \pi (k + 1) 2^{-j}) - 2\sin(3 \pi (k + 1/2) 2^{-j}) + \sin(3 \pi k 2^{-j})) \right] \end{align*}def beta_function_a(j, k):
first_term = np.sin(3 * np.pi * (k + 1) * 2 ** (-j))
second_term = - 2 * np.sin(3 * np.pi * (k + 0.5) * 2 ** (-j))
third_term = np.sin(3 * np.pi * (k) * 2 ** (-j))
return (2 ** ((j + 1) / 2)/ (3 * np.pi)) * (first_term + second_term + third_term)
def wavelet_regressor(f, father_wavelet, daughter_wavelet, alpha=None, beta_function=None):
beta = {}
J = 6
z = np.arange(0, 1, 1e-4)
Y = f(z)
if beta_function is None:
alpha = (Y @ father_wavelet(z)) / len(Y)
for j in range(J + 1):
for k in range(2 ** j):
if beta_function is None:
beta[j,k] = (Y @ daughter_wavelet(z, j, k)) / len(Y)
else:
beta[j, k] = beta_function(j, k)
def f_hat(x):
result = alpha * father_wavelet(x)
for j in range(J + 1):
for k in range(2 ** j):
result += beta[j, k] * daughter_wavelet(x, j, k)
return result
return f_hat
f_hat = wavelet_regressor(f_a, haar_phi, haar_psi_jk, alpha=0, beta_function=beta_function_a)
xx = np.arange(0, 1, 1e-4)
plt.plot(xx, f_hat(xx))
[<matplotlib.lines.Line2D at 0x7f0bc7e5ad60>]
(b)
\begin{align*} \alpha = \int_0^1 \sin(\pi x) \, dx = \left.-\frac{\cos(\pi x)}{\pi}\right\rvert_0^1 = \frac{2}{\pi} \end{align*}\begin{align*} \beta_{jk} &= 2^{j/2} \left[-\int_{2^{-j}k}^{2 ^ {-j} (k + 1/2)} \sin(\pi x) \, dx + \int_{2^{-j}(k {-j}+ 1/2)}^{2 ^ {-j} (k + 1)} \sin(\pi x) \, dx \right]\\ &=2^{j /2} \left[ \left. \frac{\cos(\pi x)}{\pi} \right\rvert_{2^{-j}k}^{2 ^ {-j} (k + 1/2)} - \left. \frac{\cos(\pi x)}{\pi} \right\rvert_{2^{-j}(k + 1/2)}^{2 ^ {-j} (k + 1)} \right] \\ &= \frac{2^{j/2}}{\pi}\left[ 2\cos \left(\frac{(k + 1/2) \pi}{ 2 ^ {j}}\right) - \cos \left(\frac{k \pi}{ 2 ^ {j}}\right) - \cos \left(\frac{(k + 1) \pi}{ 2 ^ {j}}\right)\right] \end{align*}def beta_function_b(j,k):
return ((2 ** (j / 2)) / np.pi) \
* (2 * np.cos(((k + (1/2)) * np.pi)/(2 ** j)) \
- np.cos(k * np.pi / (2 ** j)) - np.cos(((k + 1) * np.pi)/(2 ** j)))
f_hat = wavelet_regressor(f_b, haar_phi, haar_psi_jk, alpha = (2 / np.pi), beta_function=beta_function_b)
xx = np.arange(0, 1, 1e-4)
plt.plot(xx, f_hat(xx))
[<matplotlib.lines.Line2D at 0x7f0bcc24ab50>]
f_hat = wavelet_regressor(f_c, haar_phi, haar_psi_jk)
xx = np.arange(0, 1, 1e-4)
plt.plot(xx, f_hat(xx))
[<matplotlib.lines.Line2D at 0x7f0bcdf23610>]
f_hat = wavelet_regressor(f_d, haar_phi, haar_psi_jk)
xx = np.arange(0, 1, 1e-4)
plt.plot(xx, f_hat(xx))
[<matplotlib.lines.Line2D at 0x7f0bd05ff970>]