# FIND PRINCIPAL COMPONENT VECTORS. Takes the training cases (as an N by p # matrix) as the first argument. The number of principle components to find, # k, is the second argument. The input variables are centred, but not rescaled, # before the first k principal components are found. The result is a list # in which the element "vectors" is a matrix containing k unit vectors in the # principal component directions (eigenvectors) as columns, the element "values" # is the sample variance in each direction (eigenvalues), and the element # "means" is a vector of samples means of the input variables (needed later to # center inputs). pca.vectors <- function (x.train, k) { # Subtract the sample mean from each input variable. means <- apply(x.train,2,mean) for (i in 1:ncol(x.train)) { x.train[,i] <- x.train[,i] - means[i] } # Find the eigenvectors and eigenvalues of the covariance matrix. eig <- eigen (t(x.train) %*% x.train / nrow(x.train)) vectors <- eig$vectors[,1:k] values <- eig$values[1:k] # Return the results. list (vectors=vectors, values=values, means=means) } # FIND PROJECTIONS OF DATA ONTO PRINCIPAL COMPONENTS. The first argument is # a list as returned by pca.vectors. The second argument is a matrix of data # to project, with rows being cases and columns input values. The results is # a matrix containing the projections of each case onto the principal component # directions. pca.proj <- function (pc, x) { # Subtract the sample mean over training cases from each input variable. for (i in 1:ncol(x)) { x[,i] <- x[,i] - pc$means[i] } # Return the projections onto the principal component directions. x %*% pc$vectors }