# STA 437/1005, Fall 2008, Assignment #3. # Get the PCA functions. source("pca.r") # Read the gene expression data, and define some parts of it. d <- read.table("gene.txt") prev <- d[,1] M <- t(as.matrix(d)) alpha <- 2:19 cdc15 <- 20:43 both <- c(alpha,cdc15) # Do PCA in six ways: alpha, or cdc15, or both, and cov or corr matrix. k <- 4 # number of PCs to find pc.both <- pca.vectors(M[both,],k) pc.both.s <- pca.vectors(M[both,],k,scale=TRUE) pc.alpha <- pca.vectors(M[alpha,],k) pc.alpha.s <- pca.vectors(M[alpha,],k,scale=TRUE) pc.cdc15 <- pca.vectors(M[cdc15,],k) pc.cdc15.s <- pca.vectors(M[cdc15,],k,scale=TRUE) # Find projections of the alpha and cdc15 data onto each of the sets of PCs. pp.both.alpha <- pca.proj(pc.both,M[alpha,]) pp.both.cdc15 <- pca.proj(pc.both,M[cdc15,]) pp.both.s.alpha <- pca.proj(pc.both.s,M[alpha,]) pp.both.s.cdc15 <- pca.proj(pc.both.s,M[cdc15,]) pp.alpha.alpha <- pca.proj(pc.alpha,M[alpha,]) pp.alpha.cdc15 <- pca.proj(pc.alpha,M[cdc15,]) pp.alpha.s.alpha <- pca.proj(pc.alpha.s,M[alpha,]) pp.alpha.s.cdc15 <- pca.proj(pc.alpha.s,M[cdc15,]) pp.cdc15.alpha <- pca.proj(pc.cdc15,M[alpha,]) pp.cdc15.cdc15 <- pca.proj(pc.cdc15,M[cdc15,]) pp.cdc15.s.alpha <- pca.proj(pc.cdc15.s,M[alpha,]) pp.cdc15.s.cdc15 <- pca.proj(pc.cdc15.s,M[cdc15,]) # Produce plots of the projections onto the PCs against time. pdf("ass3-plots.pdf",horizontal=FALSE,pointsize=8) par(mfrow=c(k,2)) for (i in 1:k) { plot(pp.both.alpha[,i],type="b",pch=20,ylab="", xlab=paste("PC",i,"from both experiments, using covariance matrix")) title("alpha experiment") plot(pp.both.cdc15[,i],type="b",pch=20,ylab="", xlab=paste("PC",i,"from both experiments, using covariance matrix")) title("cdc15 experiment") } for (i in 1:k) { plot(pp.both.s.alpha[,i],type="b",pch=20,ylab="", xlab=paste("PC",i,"from both experiments, using correlation matrix")) title("alpha experiment") plot(pp.both.s.cdc15[,i],type="b",pch=20,ylab="", xlab=paste("PC",i,"from both experiments, using correlation matrix")) title("cdc15 experiment") } for (i in 1:k) { plot(pp.alpha.alpha[,i],type="b",pch=20,ylab="", xlab=paste("PC",i,"from alpha experiment, using covariance matrix")) title("alpha experiment") plot(pp.alpha.cdc15[,i],type="b",pch=20,ylab="", xlab=paste("PC",i,"from alpha experiment, using covariance matrix")) title("cdc15 experiment") } for (i in 1:k) { plot(pp.alpha.s.alpha[,i],type="b",pch=20,ylab="", xlab=paste("PC",i,"from alpha experiment, using correlation matrix")) title("alpha experiment") plot(pp.alpha.s.cdc15[,i],type="b",pch=20,ylab="", xlab=paste("PC",i,"from alpha experiment, using correlation matrix")) title("cdc15 experiment") } for (i in 1:k) { plot(pp.cdc15.alpha[,i],type="b",pch=20,ylab="", xlab=paste("PC",i,"from cdc15 experiment, using covariance matrix")) title("alpha experiment") plot(pp.cdc15.cdc15[,i],type="b",pch=20,ylab="", xlab=paste("PC",i,"from cdc15 experiment, using covariance matrix")) title("cdc15 experiment") } for (i in 1:k) { plot(pp.cdc15.s.alpha[,i],type="b",pch=20,ylab="", xlab=paste("PC",i,"from cdc15 experiment, using correlation matrix")) title("alpha experiment") plot(pp.cdc15.s.cdc15[,i],type="b",pch=20,ylab="", xlab=paste("PC",i,"from cdc15 experiment, using correlation matrix")) title("cdc15 experiment") } # Produce plots of some of the projections onto PCs against each other. par(mfrow=c(3,2)) plot(pp.both.alpha[,2],pp.both.alpha[,4],type="b",pch=20, xlab="PC 2 from both experiments, using covariance matrix", ylab="PC 4 from both experiments, using covariance matrix") title("alpha experiment") plot(pp.both.cdc15[,2],pp.both.cdc15[,4],type="b",pch=20, xlab="PC 2 from both experiments, using covariance matrix", ylab="PC 4 from both experiments, using covariance matrix") title("cdc15 experiment") plot(pp.alpha.alpha[,2],pp.alpha.alpha[,3],type="b",pch=20, xlab="PC 2 from alpha experiment, using covariance matrix", ylab="PC 3 from alpha experiment, using covariance matrix") title("alpha experiment") plot(pp.alpha.cdc15[,2],pp.alpha.cdc15[,3],type="b",pch=20, xlab="PC 2 from alpha experiment, using covariance matrix", ylab="PC 3 from alpha experiment, using covariance matrix") title("cdc15 experiment") plot(pp.cdc15.alpha[,3],pp.cdc15.alpha[,4],type="b",pch=20, xlab="PC 3 from cdc15 experiment, using covariance matrix", ylab="PC 4 from cdc15 experiment, using covariance matrix") title("alpha experiment") plot(pp.cdc15.cdc15[,3],pp.cdc15.cdc15[,4],type="b",pch=20, xlab="PC 3 from cdc15 experiment, using covariance matrix", ylab="PC 4 from cdc15 experiment, using covariance matrix") title("cdc15 experiment") # Identify genes involved in cell cycle based on PCs 2 and 4 from PCA on # covariance matrix of data from both experiments, using the sum of squares # of eigenvector components. cyc.measure <- pc.both$e[,2]^2 + pc.both$e[,4]^2 cyc.genes <- order(-cyc.measure)[1:800] # Plot a histogram of the log of the cyclic measure, marking cut-off point. par(mfrow=c(2,1)) hist(log(cyc.measure),nclass=30) abline(v=log(cyc.measure[cyc.genes[800]]),lwd=2) # See how well we did on previously identified cyclic genes. cat("\nFraction of previously identified genes in our top 800\n\n") print (sum(prev[cyc.genes]) / sum(prev)) # Close plot file. dev.off()