import numpy as np
import sklearn.datasets
def load_iris():
data = sklearn.datasets.load_iris()
D = data['data'].T
L = data['target'].T
return D, L
def split_db_2to1(D, L, seed = 0):
nTrain = int(D.shape[1] * 2.0 / 3.0)
np.random.seed(seed)
idx = np.random.permutation(D.shape[1])
idxTrain = idx[:nTrain]
idxTest = idx[nTrain:]
DTR = D[:, idxTrain]
DTE = D[:, idxTest]
LTR = L[idxTrain]
LTE = L[idxTest]
return (DTR, LTR), (DTE, LTE)
def ml_estimate_parameters(DTR, LTR):
"""
Compute ML estimates of mean and covariance for each class.
Returns:
mus = list of means [mu_c for c in {0, 1, 2}]
sigmas = list of covariance matrices [Sigma_c for c in {0, 1, 2}]
"""
classes = np.unique(LTR)
mus = []
sigmas = []
for c in classes:
Dc = DTR[:, LTR == c] # all training samples of class c
mu_c = Dc.mean(axis = 1, keepdims = True) # (4, 1)
# Compute covariance with (1/Nc) factor
Dc_centered = Dc - mu_c
Sigma_c = (Dc_centered @ Dc_centered.T) / Dc.shape[1]
mus.append(mu_c)
sigmas.append(Sigma_c)
return mus, sigmas
# --- Main execution ---
D, L = load_iris()
(DTR, LTR), (DTE, LTE) = split_db_2to1(D, L, seed = 0)
mus, sigmas = ml_estimate_parameters(DTR, LTR)
for i, c in enumerate(np.unique(LTR)):
print(f"Class {c} ML mean (mu_{c}):\\n", mus[i].flatten())
print(f"Class {c} ML covariance: (Sigma_{c}):\\n", sigmas[i], "\\n")
Explanation of the key steps:
load_iris()
Loads the IRIS dataset from sklearn.datasets
, returning the data matrix $\bold{D}$ transposed (so shape = features $\times$ samples) and the label array $\bold{L}$.
split_db_2to1(D, L, seed = 0)
Splits the dataset into a training set (2/3 of the samples) and an evaluation set (1/3) using a fixed random permutation seed = 0 to ensure reproducibility.
ml_estimate_parameters(DTR, LTR)
For each class $c\in\{0,1,2\}$, extract the training samples belonging to that class $\bold{D}_c$.
Compute the empirical mean $\bold{\mu}_c$ as
$$ \bold{\mu}c=\frac{1}{N_c}\sum{i=1}^{N_c}\bold{x}_{c,i} $$
Compute the empirical covariance $\bold{\Sigma}_c$ as
$$ \bold{\Sigma}c=\frac{1}{N_c}\sum{i=1}^{N_c}(\bold{x}_{c,i}-\bold{\mu}c)(\bold{x}{c,i}-\bold{\mu}_c)^T $$