library(MASS) library(mda) # MARS function from MDA library does NOT use any of Friedman's MARS code # Therefore the results are similar to Friedman's but NOT the same. # This function does NOT handle properly CATEGORICAL predictors; # Is NOT well-tested # Among the arguments of MARS function # the maximum number of model terms: nk = max(21, 2 * ncol(x) + 1) # degree - optional integer specifying the maximum interaction degree # ========================================================================= spam<-read.table("C:\\AnaMaria\\DATA\\spam.data", sep="") # use only a fragment of data (size = 3000) # use a test set of size 1000 # use a train set of size 2000 spam.red <- spam[sample(1:4601,3000),] rand<-sample(1:3000,1000) test.spam<-spam.red[rand,] train.spam<-spam.red[-rand,] X.spam <- train.spam[,1:57] y.spam <- train.spam[,58] train.MARS <- mars(X.spam, y.spam, degree=2) # train.MARS$gcv # [1] 0.05910394 X.spam.test<-test.spam[,1:57] y.spam.test<-test.spam[,58] missclassif <- matrix(0,1000,100) error <- rep(0,100) for (i in 1:100) { train.MARS<- mars(X.spam, y.spam, degree=2, nk=i) missclassif[,i]<-predict.mars(train.MARS, X.spam.test) error[i] <- sum((y.spam.test-missclassif[,i] )^2)/1000 } rank <- c(1:100) plot(x=rank, y=error, xlab="Rank of Model", ylab="Test Misclassification Error", xlim=c(0,100), ylim=c(0.05,0.4),cex=0.5, col="green", pch=7) lines(x=rank.red, y=err.red, lty=1, col="green") abline(h=0.055, lty=3)