# GAUSSIAN BASIS FUNCTION MODEL EXAMPLE. # # Radford Neal, 2011. # These functions are for the examples in lectures of a Gaussian basis # function model for a real-valued target given a single real-valued input # in the interval (0,1). Includes selection of the basis function width # and penalty magnitude by cross validation. # CREATE A MATRIX OF BASIS FUNCTION VALUES. Takes the vector of input # values, xv, and width of a basis function, s, as arguments. Returns the # matrix of basis function values, in which the first column is all 1s, # and subsequent columns are for basis functions centred at -2s, -s, 0, # s, ..., 1, 1+s, 1+2s (assuming that 1/s is an integer). Phi <- function (xv, s) { m <- seq (-2*s, 1+2*s, by=s) Phi <- matrix (1, length(xv), length(m)+1) for (j in 1:length(m)) { Phi[,1+j] <- exp(-0.5*(xv-m[j])^2/s^2) } Phi } # FIND A PENALIZED LEAST SQUARES ESTIMATE. Takes a matrix of basis function # values, Phi, (with first column all 1s), the vector of target values, tv, # and the magnitude of the penalty, lambda, as arguments. A penalty magnitude # of zero (the default) will give the unpenalized least squares estimate. # Returns the vector of estimates for the regression coefficients. penalized.least.squares <- function (Phi, tv, lambda=0) { S <- diag(ncol(Phi)) S[1,1] <- 0 as.vector (solve (lambda*S + t(Phi)%*%Phi) %*% t(Phi) %*% tv) } # COMPUTE PREDICTIONS FOR TEST CASES. Takes a matrix of basis function # values for test cases, Phi, and a vector of regression coefficients, w, # as arguments. Returns a vector of predicted values for the targets in # these test cases. predictions <- function (Phi, w) { as.vector (Phi %*% w) } # FIND VALIDATION ERROR. Computes the validation squared error using specified # values of s and lambda, with a specified subset of training cases reserved # for a validation set. Arguments are the input and target vectors for the # training cases, xv and tv, the values of s and lambda, and a vector of # indexes for the validation cases, vix. validation.error <- function (xv, tv, s, lambda, vix) { w <- penalized.least.squares (Phi(xv[-vix],s), tv[-vix], lambda) p <- predictions (Phi(xv[vix],s), w) mean ((tv[vix]-p)^2) } # FIND ARRAY OF VALIDATION ERRORS. Computes the validation error for # all combinations of s and lambda in vectors try.s and try.lambda, using # a specified validation set, given by vix, with inputs and targets given # by xv and tv. Returns an array of average squared errors on validation # cases, with row and column names indicating the values of s and lambda used. val.array <- function (xv, tv, try.s, try.lambda, vix) { V <- matrix (NA, length(try.s), length(try.lambda)) rownames(V) <- paste("s=",try.s,sep="") colnames(V) <- paste("lambda=",try.lambda,sep="") for (i in 1:length(try.s)) { for (j in 1:length(try.lambda)) { V[i,j] <- validation.error (xv, tv, try.s[i], try.lambda[j], vix) } } V } # FIND ARRAY OF S-FOLD CROSS-VALIDATION ERRORS. Computes the S-fold # cross-validation error for all combinations of s and lambda in vectors # try.s and try.lambda, with inputs and targets given by xv and tv. # Returns an array of square roots of average squared error on validation # cases, with row and column names indicating the values of s and lambda # used. The training cases are assumed to be in random order, so that # the S validation sets can be taken consecutively. cross.val.array <- function (xv, tv, try.s, try.lambda, S) { n <- length(tv) for (r in 1:S) { vix <- floor(1 + n*(r-1)/S) : floor(n*r/S) V <- val.array (xv, tv, try.s, try.lambda, vix) V.sum <- if (r==1) V else V.sum + V } V.sum / S } # FIND THE POSTERIOR MEAN OF THE REGRESSION COEFFICIENTS. Takes as arguments # a matrix of basis function values, Phi, (with first column all 1s), the # vector of target values, tv, the noise standard deviation, sigma, the # prior standard deviation of the regression coefficients (other than w0), # omeag, and the prior standard deviation of w0, omega0 (default the same # as omega). Returns the mean (which is also the mode) of the posterior # distribution of the regression coefficients. posterior.mean <- function (Phi, tv, sigma, omega, omega0=omega) { S0.inv <- diag(1/c(omega0^2,rep(omega^2,ncol(Phi)-1))) as.vector (solve (sigma^2*S0.inv + t(Phi)%*%Phi) %*% t(Phi) %*% tv) } # COMPUTE MARGINAL LIKELIHOOD FOR A LINEAR BASIS FUNCTION MODEL. Computes # the log of the marginal likelihood for a data set with targets in vector # tv and basis function values in the matrix Phi of a model with noise # standard deviation sigma, prior standard deviation of the regression # coefficients (other than w0) omega, and prior standard deviation of w0 # omega0 (default the same as omega). marg.like <- function (Phi, tv, sigma, omega, omega0=omega) { N <- length(tv) M <- ncol(Phi) S0 <- diag(c(omega0^2,rep(omega^2,M-1))) S0.inv <- diag(1/diag(S0)) S0.log.det <- sum(log(diag(S0))) ml <- -(N/2)*log(2*pi) - (N/2)*log(sigma^2) - (1/2)*S0.log.det SN.inv <- S0.inv + t(Phi) %*% Phi / sigma^2 SN <- solve(SN.inv) SN.chol <- chol(SN) SN.log.det <- 2*sum(log(diag(SN.chol))) ml <- ml + (1/2)*SN.log.det mN <- SN %*% t(Phi) %*% tv / sigma^2 ml <- ml - (1/2) * sum((tv-Phi%*%mN)^2) / sigma^2 - (1/2) * t(mN) %*% S0.inv %*% mN as.vector(ml) }