# STA 414, 2012, ASSIGNMENT 2 SOLUTION, SCRIPT FOR DATA SET 1. # # Radford M. Neal, 2012 source("mlp2.r") # Read and scale data. trn <- as.matrix (read.table("artc-trn.dat",head=TRUE)) test <- as.matrix (read.table("artc-test.dat",head=TRUE)) test.y <- test[,1] trn.y <- trn[,1] trn.x <- scale (trn[,-1]) test.x <- scale (test[,-1], center = attr(trn.x,"scaled:center"), scale = attr(trn.x,"scaled:scale")) # Print performance just predicting the mean of the training responses. cat("Average test log probability with training mean:", mean(log(ifelse(test.y==1,mean(trn.y),1-mean(trn.y)))), "\n\n") # Learning constants. iters <- 10000 m <- 6 S <- 4 # Do runs with no stepsize adjustment. pdf("artc-noadj.pdf",width=6,height=6) par(mfrow=c(2,2)) cat("No stepsize adjustment:\n\n") set.seed(1) r.noadj <- mlp.cross.val (trn.y, trn.x, eta1=0.0003, eta2=0.0003, try1=c(0,1,2,4,8,16), try2=0, rel.eta=FALSE, S=S, iters=iters, m=m, Xtst=test.x, ytst=test.y, cv.plot=TRUE) r.noadj$best.tst.pred <- NULL print(r.noadj) dev.off() # Do runs with stepsize adjustment. pdf("artc-adj.pdf",width=6,height=6) par(mfrow=c(2,2)) cat("\nWith stepsize adjustment:\n\n") set.seed(1) r.adj <- mlp.cross.val (trn.y, trn.x, eta1=0.0003, eta2=0.0003, try1=c(0,1,2,4,8,16), try2=0, rel.eta=TRUE, S=S, iters=iters, m=m, Xtst=test.x, ytst=test.y, cv.plot=TRUE) r.adj$best.tst.pred <- NULL print(r.adj) dev.off() # Compare weight magnitudes over runs with and without stepsize adjustment. pdf("artc-cmp.pdf",width=6,height=4) par(mfrow=c(1,2)) n <- nrow(trn.x) p <- ncol(trn.x) wh <- (round(0*n/S)+1):round(1*n/S) for (rel.eta in c(FALSE,TRUE)) { set.seed(1) res <- mlp.train (trn.y[wh], trn.x[wh,], eta1=0.0003, eta2=0.0003, lambda1=0, lambda2=0, rel.eta=rel.eta, iters=iters, m=m) wsq <- matrix(NA,iters,p) wts <- res$params [, (2*m+2):ncol(res$params)] for (i in 1:iters) { wsq[i,] <- rowSums(matrix(wts[i,]^2,p,m)) } plot (c(1,iters), c(1e-4,1e1), type="n", log="y", xlab="iteration", ylab="log sum of squares of input-hidden weights") title(paste("rel.eta =",rel.eta)) for (j in 1:p) { lines (wsq[,j], col=(j-1)%%5+1, lty=(j-1)%/%5+1) } } dev.off()