# GAUSSIAN PROCESS REGRESSION FUNCTIONS. # # For STA 414/2014, Spring 2014, Assignment #1. # CREATE THE COVARIANCE MATRIX FOR TRAINING RESPONSES. The arguments are # the matrix of training inputs, the covariance function (taking an input # vector and a matrix as arguments), and the vector of hyperparameters. # The first element in the hyperparameter vector is the residual # variance, which is added to the diagonal of the covariance matrix, # but which is ignored by the covariance function. The result is the # matrix of covariances for responses in training cases. gp_cov_matrix <- function (x_train, covf, hypers) { n <- nrow(x_train) C <- matrix(NA,n,n) for (i in 1:n) { C[i,] <- covf(x_train[i,],x_train,hypers) } diag(C) <- diag(C) + hypers[1]^2 C } # PREDICT THE RESPONSE FOR TEST CASES. The arguments are the matrix of # inputs for training cases, the vector of responses for training cases, # the matrix of inputs for test cases, and the covariance function and # hyperparameter vector as described for gp_cov_matrix. The result is # a vector of predictions of the responses for test cases (the predictive # mean). gp_predict <- function (x_train, y_train, x_test, covf, hypers) { n <- nrow(x_train) C <- gp_cov_matrix(x_train,covf,hypers) b <- solve(C,y_train) r <- numeric(nrow(x_test)) for (i in 1:nrow(x_test)) { k <- covf(x_test[i,],x_train,hypers) r[i] <- k %*% b } r } # FIND THE LOG LIKELIHOOD BASED ON RESPONSES IN THE TRAINING SET. # The arguments are the matrix of inputs for training cases, the # vector of responses for training cases, and the covariance function # and hyperparameter vector as described for gp_cov_matrix. The # result is the log of the probability density for the training # responses (with all normalizing factors included). gp_log_likelihood <- function (x_train, y_train, covf, hypers) { n <- nrow(x_train) C <- gp_cov_matrix(x_train,covf,hypers) b <- solve(C,y_train) - (n/2)*log(2*pi) - determinant(C)$modulus/2 - y_train%*%b/2 } # FIND HYPERPARAMETER VALUES MAXIMIZING LOG PROBABILITY. Arguments # are the matrix of training inputs, the vector of training responses, # the covariance function (as for gp_cov_matrix), and a vector of # initial values for the hyperparameters. (Any additional arguments are # passed on to the nlm function.) The result is a vector of values # for the hyperparameters that should usually at least a locally maximize # the likelihood. # # The maximization is done in terms of the log likelihood, with hyperparameter # h transformed to sqrt(h-0.01) in order to avoid problems that would arise # with hyperparameters that are negative or near zero (less than 0.01). gp_find_hypers <- function (x_train, y_train, covf, hypers0, ...) { m <- nlm ( function (h) - gp_log_likelihood(x_train,y_train,covf, h^2+0.01), sqrt(hypers0-0.01), ... ) cat ("Maximum log likelihood:", -m$minimum, "\n") m$estimate^2 + 0.01 } # FIND HYPERPARAMETERS USING CROSS VALIDATION. Arguments are the matrix # of training inputs, the vector of training responses, the covariance # function (as for gp_cov_matrix), the number of cross-validation folds, # and a vector of initial values for the hyperparameters. (Any additional # arguments are passed on to the nlm function.) The result is a vector of # values for the hyperparameters that should usually at least locally # minimize the squared error from cross validation. # # The minimization is done in terms of hyperparameters h transformed to # sqrt(h-0.01) in order to avoid problems that would arise with hyperparameters # that are negative or near zero (less than 0.01). gp_find_hypers_cv <- function (x_train, y_train, covf, K, hypers0, ...) { div <- floor(seq(0,length(y_train),length=K+1)) e <- rep(0,length(y_train)) m <- nlm ( function (h) { for (i in 1:K) { r <- (div[i]+1):div[i+1] e[r] <- (y_train[r] - gp_predict (x_train[-r,], y_train[-r], x_train[r,], covf, h^2 + 0.01))^2 } mean(e) }, sqrt(hypers0-0.01), ... ) cat ("Minimum cv error:", m$minimum, "\n") m$estimate^2 + 0.01 } # BAYESIAN PREDICTION USING IMPORTANCE SAMPLING. Arguments are the matrix # of training inputs, the vector of training responses, the matrix of test # inputs, the covariance function (as for gp_cov_matrix), the vector of # prior means for the logs of the parameters, the vector of prior standard # deviations for the logs of the parameters, and the number of parameter # values to sample from the prior. The value returned is a list with # element "pred" being the vector of mean predictions for test cases, the # element "hypers" being the mean values for the hyperparameters, and the # element "ml" being the estimated marginal likelihood. gp_imp_samp <- function (x_train, y_train, x_test, covf, prmean, prsd, K) { h <- length(prmean) pred <- 0 hypers <- 0 tot <- 0 for (i in 1:K) { hyp <- exp(rnorm(h,prmean,prsd)) wgt <- exp(gp_log_likelihood(x_train,y_train,covf,hyp)) pred <- pred + wgt * gp_predict(x_train,y_train,x_test,covf,hyp) hypers <- hypers + wgt * hyp tot <- tot + wgt } list (pred=pred/tot, hypers=hypers/tot, ml=mean(wgt)) }