# STA 414/2104, Spring 2014, Assignment #2 - Mixture model functions. # CONSTANTS REGARDING THE DIGIT CLASSIFICATION PROBLEM. ndigits <- 10 # Number of different digits npixels <- 14*14 # Numbers of pixels in digit images # FIT A MIXTURE MODEL WITH K COMPONENTS FOR EACH DIGIT. Arguments are # the matrix of pixels for each training case (trn), the vector of # labels for the training cases (trnlab), the number of components for # each digit (K), the penalty magnitude (alpha), and the number of # iterations of EM to do. The result is a list with elements pi # (component probabilities), theta (pixel probabilities for each # component), and r (responsibilities). Responsibilities are set # randomly to start, then M and E steps alternate for the specified # number of iterations. The log likelihood and log likelihood plus # penalty are printed after each iteration. digit_mix_train <- function(trn,trnlab,K,alpha,iters) { # Allocate the matrix of pixel probabilities, with rows for pixels and # columns for mixture components. theta <- matrix(NA,npixels,ndigits*K) # Set the responsibilities randomly to start. r <- matrix(0,ntrn,ndigits*K) for (i in 1:ntrn) { sub <- K*trnlab[i]+(1:K) r[i,sub] <- runif(K,1,2) r[i,sub] <- r[i,sub] / sum(r[i,sub]) } # Do "iters" iterations of the EM algorithm, beginning with an M step. prev_pen_ll <- -Inf for (iter in 1:iters) { # The M step. Re-estimates pi and theta. pi <- colMeans(r) for (k in 1:(ndigits*K)) { theta[,k] <- (colSums(r[,k]*trn) + alpha) / (sum(r[,k]) + 2*alpha) } # The E step. Computes responsibilities, and also the log likelihood. ll <- 0 for (i in 1:ntrn) { sub <- K*trnlab[i]+(1:K) pixels <- trn[i,] p <- pi[sub] * exp (colSums (log (theta[,sub,drop=FALSE]^pixels * (1-theta[,sub,drop=FALSE])^(1-pixels)))) r[i,sub] <- p / sum(p) ll <- ll + log(sum(p)) } # Print the log likelihood (ll) and log likelihood plus penalty (pen_ll). pen_ll <- ll + alpha * sum(log(theta*(1-theta))) cat("Iteration",iter,": log likelihood =",round(ll,3), " log likelihood + penalty =",round(pen_ll,3),"\n") # Warn if the log likelihood plus penalty has gone down (it shouldn't!). # But ignore decreases of less than 1e-6 (possibly just due to rounding). if (pen_ll < prev_pen_ll-1e-6) { cat("WARNING: log likelihood + penalty decreased!\n") } prev_pen_ll <- pen_ll } # Return the parameter estimates and responsibilities. list(pi=pi,theta=theta,r=r) } # FIND THE PROBABILITY DISTRIBUTIONS FOR LABELS IN TEST CASES. The # arguments are the pi and theta parameters (as found by digit_mix_train), # and the matrix of pixels for test cases (tst). The result is a matrix # of label probabilities for each test case. digit_mix_predict <- function (pi,theta,tst) { K <- length(pi) / ndigits pr <- matrix(NA,nrow(tst),ndigits) for (i in 1:nrow(tst)) { # Compute the matrix of probabilities that this digit's image would be # generated from each component. Note that theta is a matrix with # rows for pixels and columns for components, so when it is raised # to a vector of pixel values, the operation is done column-by-column. p <- pi * exp (colSums (log (theta^tst[i,] * (1-theta)^(1-tst[i,])))) # Sum these probabilities separately for each possible digit label. for (y in 0:(ndigits-1)) pr[i,y+1] <- sum(p[K*y+(1:K)]) # Normalize these sums to produce a probability distribution over # labels for this test case. pr[i,] <- pr[i,] / sum(pr[i,]) } pr }