# STA 414/2104 ASSIGNMENT 3, Spring 2013. # COMPUTE THE PENALIZED LOG LIKELIHOOD. This isn't actually required for # the assignment, but does let one see how well the EM algorithm is working. # Arguments are the data matrix, the inverse covariance matrix for the # component means, the mixing proportions, the noise standard deviation, # and the matrix of component means for all variables. gp_mix_pll <- function (X, Cinv, mpi, sigma, mu) { N <- nrow(X) # Number of data items M <- ncol(X) # Number of variables K <- nrow(mu) # Number of mixture components # Set lp to the log density of the mu parameters under the GP model. lp <- ( - 0.5*K*M*log(2*pi) + 0.5*M*determinant(Cinv)$modulus - 0.5 * sum (mu * (Cinv %*% mu)) ) # Set ll to the log likelihood. ll <- 0 for (i in 1:N) { pr <- 0 for (k in 1:K) { pr <- pr + mpi[k] * prod (dnorm(X[i,],mu[k,],sigma)) } ll <- ll + log(pr) } # Return the penalty plus log likelihood. lp + ll } # RUN THE EM ALGORITHM TO FIT THE MODEL. Arguments are the data matrix, the # number of mixture components, the scale factor for the covariance of component # means, the initial noise standard deviation to use, and the number of # iterations of EM to do. Optional arguments are a matrix of responsibilities # and a flag for enabling trace output. gp_mix_em <- function (X, K, s, sigma, iters, r = matrix(runif(K*N,0.1,1),N,K), trace=FALSE) { N <- nrow(X) # Number of data items M <- ncol(X) # Number of variables if (nrow(r)!=N || ncol(r)!=K) { stop("Matrix of responsibilities is the wrong size") } # Make sure initial responsibilities sum to one. for (i in 1:N) { r[i,] <- r[i,] / sum(r[i,]) } # Compute the inverse covariance matrix for the mu parameters for a variable. C <- matrix(numeric(0),K,K) for (i in 1:K) { for (j in 1:K) { C[i,j] <- 2^2 + 2^2*exp(-((i-j)/(K*s))^2) } } diag(C) <- diag(C) + 0.001^2 Cinv <- solve(C) # Do EM iterations, starting with M step, then E step. The M step does # not find the final maximum, but instead maximizes mu given the previous # value of sigma (hence the need for passing an initial value for sigma). mu <- matrix(numeric(0),K,M) for (t in seq_len(iters)) { if (trace) cat("\nITERATION",t,"\n\n") # DO THE M STEP. # Find number of items each component is (fractionally) responsible for. rt <- colSums(r) # Re-estimate the mixing proportions. mpi <- rt / sum(rt) # Re-estimate mu, using current sigma. D <- Cinv diag(D) <- diag(D) + rt/sigma^2 for (j in 1:M) { mu[,j] <- solve(D,as.vector(X[,j]%*%r/sigma^2)) } # Re-estimate sigma, using the mu just estimated. sigma_sq <- 0 for (j in 1:M) { for (i in 1:N) { sigma_sq <- sigma_sq + sum(r[i,]*(X[i,j]-mu[,j])^2) } } sigma <- sqrt (sigma_sq / (N*M)) if (trace) { cat("log penalized likelihood:",gp_mix_pll(X, Cinv, mpi, sigma, mu),"\n") cat ("mpi:", round(mpi,4), "\nsigma:",round(sigma,4), "\nmu:\n") print(round(mu,4)) } # DO THE E STEP. for (i in 1:N) { for (k in 1:K) { r[i,k] <- mpi[k] * prod (dnorm(X[i,],mu[k,],sigma)) } r[i,] <- r[i,] / sum(r[i,]) } } # Return a list with the parameter estimates and final responsibilities. list (mpi=mpi, mu=mu, sigma=sigma, r=r) } # FIND AVERAGE LOG PROBABILITY OF DATA ITEMS. Arguments are the mixing # proportions, the component means, the noise standard deviation, and # a matrix of data items. Returns the average log probability of the # data items. mix_pred <- function (mpi, mu, sigma, X) { K <- length(mpi) lpd <- 0 for (i in 1:nrow(X)) { pr <- 0 for (k in 1:K) { pr <- pr + mpi[k] * prod(dnorm(X[i,],mu[k,],sigma)) } lpd <- lpd + log(pr) } lpd / nrow(X) }