# GAUSSIAN BASIS FUNCTION MODEL EXAMPLE.
#
# Radford Neal, 2011.

# These functions are for the examples in lectures of a Gaussian basis
# function model for a real-valued target given a single real-valued input
# in the interval (0,1).  Includes selection of the basis function width 
# and penalty magnitude by cross validation.


# CREATE A MATRIX OF BASIS FUNCTION VALUES.  Takes the vector of input
# values, xv, and width of a basis function, s, as arguments.  Returns the
# matrix of basis function values, in which the first column is all 1s,
# and subsequent columns are for basis functions centred at -2s, -s, 0,
# s, ..., 1, 1+s, 1+2s (assuming that 1/s is an integer).

Phi <- function (xv, s)
{
  m <- seq (-2*s, 1+2*s, by=s)
  Phi <- matrix (1, length(xv), length(m)+1)
  for (j in 1:length(m))
  { Phi[,1+j] <- exp(-0.5*(xv-m[j])^2/s^2)
  }
  Phi
}


# FIND A PENALIZED LEAST SQUARES ESTIMATE.  Takes a matrix of basis function
# values, Phi, (with first column all 1s), the vector of target values, tv, 
# and the magnitude of the penalty, lambda, as arguments.  A penalty magnitude 
# of zero (the default) will give the unpenalized least squares estimate.  
# Returns the vector of estimates for the regression coefficients.

penalized.least.squares <- function (Phi, tv, lambda=0)
{
  S <- diag(ncol(Phi)) 
  S[1,1] <- 0
  as.vector (solve (lambda*S + t(Phi)%*%Phi) %*% t(Phi) %*% tv)
}


# COMPUTE PREDICTIONS FOR TEST CASES.  Takes a matrix of basis function 
# values for test cases, Phi, and a vector of regression coefficients, w, 
# as arguments.  Returns a vector of predicted values for the targets in 
# these test cases.

predictions <- function (Phi, w)
{
  as.vector (Phi %*% w)
}


# FIND VALIDATION ERROR.  Computes the validation squared error using specified
# values of s and lambda, with a specified subset of training cases reserved
# for a validation set.  Arguments are the input and target vectors for the
# training cases, xv and tv, the values of s and lambda, and a vector of
# indexes for the validation cases, vix.

validation.error <- function (xv, tv, s, lambda, vix)
{ 
  w <- penalized.least.squares (Phi(xv[-vix],s), tv[-vix], lambda)
  p <- predictions (Phi(xv[vix],s), w)
  mean ((tv[vix]-p)^2)
}


# FIND ARRAY OF VALIDATION ERRORS.  Computes the validation error for
# all combinations of s and lambda in vectors try.s and try.lambda, using
# a specified validation set, given by vix, with inputs and targets given
# by xv and tv.  Returns an array of average squared errors on validation 
# cases, with row and column names indicating the values of s and lambda used.

val.array <- function (xv, tv, try.s, try.lambda, vix)
{
  V <- matrix (NA, length(try.s), length(try.lambda))
  rownames(V) <- paste("s=",try.s,sep="")
  colnames(V) <- paste("lambda=",try.lambda,sep="")

  for (i in 1:length(try.s))
  { for (j in 1:length(try.lambda))
    { V[i,j] <- validation.error (xv, tv, try.s[i], try.lambda[j], vix)
    }
  }

  V
}


# FIND ARRAY OF S-FOLD CROSS-VALIDATION ERRORS.  Computes the S-fold
# cross-validation error for all combinations of s and lambda in vectors
# try.s and try.lambda, with inputs and targets given by xv and tv.  
# Returns an array of square roots of average squared error on validation 
# cases, with row and column names indicating the values of s and lambda 
# used.  The training cases are assumed to be in random order, so that 
# the S validation sets can be taken consecutively.

cross.val.array <- function (xv, tv, try.s, try.lambda, S)
{
  n <- length(tv)
  for (r in 1:S)
  { vix <- floor(1 + n*(r-1)/S) : floor(n*r/S)
    V <- val.array (xv, tv, try.s, try.lambda, vix)
    V.sum <- if (r==1) V else V.sum + V
  }

  V.sum / S
}


# FIND THE POSTERIOR MEAN OF THE REGRESSION COEFFICIENTS.  Takes as arguments
# a matrix of basis function values, Phi, (with first column all 1s), the 
# vector of target values, tv, the noise standard deviation, sigma, the
# prior standard deviation of the regression coefficients (other than w0),
# omeag, and the prior standard deviation of w0, omega0 (default the same
# as omega).  Returns the mean (which is also the mode) of the posterior 
# distribution of the regression coefficients.

posterior.mean <- function (Phi, tv, sigma, omega, omega0=omega)
{
  S0.inv <- diag(1/c(omega0^2,rep(omega^2,ncol(Phi)-1)))
  as.vector (solve (sigma^2*S0.inv + t(Phi)%*%Phi) %*% t(Phi) %*% tv)
}


# COMPUTE MARGINAL LIKELIHOOD FOR A LINEAR BASIS FUNCTION MODEL.  Computes
# the log of the marginal likelihood for a data set with targets in vector
# tv and basis function values in the matrix Phi of a model with noise
# standard deviation sigma, prior standard deviation of the regression
# coefficients (other than w0) omega, and prior standard deviation of w0
# omega0 (default the same as omega).

marg.like <- function (Phi, tv, sigma, omega, omega0=omega)
{
  N <- length(tv)
  M <- ncol(Phi)

  S0 <- diag(c(omega0^2,rep(omega^2,M-1)))
  S0.inv <- diag(1/diag(S0))
  S0.log.det <- sum(log(diag(S0)))

  ml <- -(N/2)*log(2*pi) - (N/2)*log(sigma^2) - (1/2)*S0.log.det

  SN.inv <- S0.inv + t(Phi) %*% Phi / sigma^2  
  SN <- solve(SN.inv)
  SN.chol <- chol(SN)
  SN.log.det <- 2*sum(log(diag(SN.chol)))

  ml <- ml + (1/2)*SN.log.det

  mN <- SN %*% t(Phi) %*% tv / sigma^2

  ml <- ml - 
    (1/2) * sum((tv-Phi%*%mN)^2) / sigma^2 - (1/2) * t(mN) %*% S0.inv %*% mN 

  as.vector(ml)
}