# STA 437/1005, Fall 2009, Assignment #1, Dataset 2. # Read data, separate class (cls) and other variables (as matrix ds). d0 <- read.table("sat.dat",head=TRUE) cls <- d0[,ncol(d0)] ds <- as.matrix(d0[,-ncol(d0)]) # Separate data by class. ds.1 <- ds[cls==1,] ds.2 <- ds[cls==2,] ds.3 <- ds[cls==3,] # Look at histograms for each class, for E1, E2, E3, E4. Make the range # of data values be the same for histograms in different classes, so that # the distributions can be compared. pdf("a2-sat-hist.pdf",pointsize=9) par(mfrow=c(4,3)) hist(ds.1[,"E1"],nclass=20,xlim=c(40,105)) hist(ds.2[,"E1"],nclass=20,xlim=c(40,105)) hist(ds.3[,"E1"],nclass=20,xlim=c(40,105)) hist(ds.1[,"E2"],nclass=20,xlim=c(30,130)) hist(ds.2[,"E2"],nclass=20,xlim=c(30,130)) hist(ds.3[,"E2"],nclass=20,xlim=c(30,130)) hist(ds.1[,"E3"],nclass=20,xlim=c(70,140)) hist(ds.2[,"E3"],nclass=20,xlim=c(70,140)) hist(ds.3[,"E3"],nclass=20,xlim=c(70,140)) hist(ds.1[,"E4"],nclass=20,xlim=c(60,160)) hist(ds.2[,"E4"],nclass=20,xlim=c(60,160)) hist(ds.3[,"E4"],nclass=20,xlim=c(60,160)) dev.off() # See if histograms look better with power transformations. pdf("a2-sat-hist-t.pdf",pointsize=9) par(mfrow=c(4,3)) hist(ds.1[,"E1"]^0.2,nclass=20,xlim=c(40,105)^0.2) hist(ds.2[,"E1"]^0.2,nclass=20,xlim=c(40,105)^0.2) hist(ds.3[,"E1"]^0.2,nclass=20,xlim=c(40,105)^0.2) hist(ds.1[,"E2"]^0.2,nclass=20,xlim=c(30,130)^0.2) hist(ds.2[,"E2"]^0.2,nclass=20,xlim=c(30,130)^0.2) hist(ds.3[,"E2"]^0.2,nclass=20,xlim=c(30,130)^0.2) hist(ds.1[,"E3"]^1.2,nclass=20,xlim=c(70,140)^1.2) hist(ds.2[,"E3"]^1.2,nclass=20,xlim=c(70,140)^1.2) hist(ds.3[,"E3"]^1.2,nclass=20,xlim=c(70,140)^1.2) hist(ds.1[,"E4"]^2.0,nclass=20,xlim=c(60,160)^2.0) hist(ds.2[,"E4"]^2.0,nclass=20,xlim=c(60,160)^2.0) hist(ds.3[,"E4"]^2.0,nclass=20,xlim=c(60,160)^2.0) dev.off() # Set up vector of colours according to class. cols <- c("red","green","gray")[cls] # Scatterplot of jittered values for centre pixel, for all classes # (with colour) and for each class separately. dsj <- as.data.frame(jitter(ds,amount=0.5)) pdf("a2-sat-centre.pdf",pointsize=9) plot(dsj[,c("E1","E2","E3","E4")],col=cols,pch=20) plot(dsj[cls==1,c("E1","E2","E3","E4")],pch=20) plot(dsj[cls==2,c("E1","E2","E3","E4")],pch=20) plot(dsj[cls==3,c("E1","E2","E3","E4")],pch=20) dev.off() # Reduce data by finding means and medians. mean.s1 <- apply(ds[,c("A1","B1","C1","D1","E1","F1","G1","H1","I1")],1,mean) mean.s2 <- apply(ds[,c("A2","B2","C2","D2","E2","F2","G2","H2","I2")],1,mean) mean.s3 <- apply(ds[,c("A3","B3","C3","D3","E3","F3","G3","H3","I3")],1,mean) mean.s4 <- apply(ds[,c("A4","B4","C4","D4","E4","F4","G4","H4","I4")],1,mean) median.s1 <-apply(ds[,c("A1","B1","C1","D1","E1","F1","G1","H1","I1")],1,median) median.s2 <-apply(ds[,c("A2","B2","C2","D2","E2","F2","G2","H2","I2")],1,median) median.s3 <-apply(ds[,c("A3","B3","C3","D3","E3","F3","G3","H3","I3")],1,median) median.s4 <-apply(ds[,c("A4","B4","C4","D4","E4","F4","G4","H4","I4")],1,median) dmean <- cbind(mean.s1,mean.s2,mean.s3,mean.s4) dmedian <- cbind(median.s1,median.s2,median.s3,median.s4) # Scatter plots with reduced data. pdf("a2-sat-mn-md.pdf",pointsize=9) plot(as.data.frame(dmean),pch=20,col=cols) plot(as.data.frame(dmedian),pch=20,col=cols) dev.off()