> library(ElemStatLearn)
> data(phoneme)
> phoneme.aa=phoneme[phoneme$g=="aa",]
> phoneme.ao=phoneme[phoneme$g=="ao",]
> dim(phoneme.aa)
[1] 695 258
> dim(phoneme.ao)
[1] 1022  258
> rowsaa=sample(695,400)  ## a random selection of 400 training "aa"s
> rowsao=sample(1022,600) ## and 600 "ao"s
> aa.train=phoneme.aa[rowsaa,]
> aa.test=phoneme.aa[-rowsaa,]
> ao.train=phoneme.ao[rowsao,]
> ao.test=phoneme.ao[-rowsao,]
> plotaa = sample(600,15)  # plot a few green
> plotao = sample(400,15)  #  and red rows
> plot(1:256, aa.train[plotaa[1],2:257],type="l",col="green")
> lines(1:256,ao.train[plotao[1],2:257],col="red")
> lines(1:256,ao.train[plotao[2],2:257],col="red")
> lines(1:256,ao.train[plotao[3],2:257],col="red")
> lines(1:256,aa.train[plotaa[2],2:257],col="green")
> lines(1:256,aa.train[plotaa[3],2:257],col="green")

## try it -- this reproduces the top Figure in 5.5

> train=rbind(aa.train,ao.train)  ## we eliminated rows for other sounds
> test=rbind(aa.test,ao.test)  ## so now we have our dataset 
> rm(ao.train,ao.test,aa.train,aa.test, plotaa,plotao)

> logreg = glm(g~. - speaker , data=train[,-258], family=binomial)

## I dropped "speaker" from the data set rather than from the formula
## because I got into trouble later with predictions

> plot(1:256,logreg$coef[2:257],type="l",col="gray",ylim=c(-0.4,0.4),xlab="Frequency")
> smooth.fit = lm(logreg$coef[2:257] ~ ns(1:256,df=12))
> lines(1:256,smooth.fit$fitted.values,col="red")
> abline(h=0)  ## not quite as much shrinkage as they show in Fig 5.5

## classification errors

> dim(test)
[1] 717 258

> phat = rep(717,0)
> for(j in 1:717){phat[j]=as.numeric(as.numeric(test[j,1:256])%*%as.vector(smooth.fit$fitted.values))}

## this uses the red curve as the coefficients, instead of the original
## gray coefficients

> table(phat<0,test$g)
       
         aa  ao dcl  iy  sh
  FALSE   1  71   0   0   0
  TRUE  294 351   0   0   0
> 72/717
[1] 0.1004184

## now see how it does on training data
> phat2=rep(0,1000)
> for(j in 1:1000){phat2[j]=as.numeric(as.numeric(train[j,1:256])%*%as.vector(smooth.fit$fitted.values))}
> table(phat2<0,train$g)
       
         aa  ao dcl  iy  sh
  FALSE   0  75   0   0   0
  TRUE  400 525   0   0   0

## error rate 0.075

## what about predicting with the original coefficients?

> table(predict.glm(logreg,newdata=test[,1:257],type="response")>0.5, test$g)
       
         aa  ao dcl  iy  sh
  FALSE 207  87   0   0   0
  TRUE   88 335   0   0   0
> 294/717
[1] 0.4100418

## and on training data
> table(predict.glm(logreg,type="response")>0.5, train$g)
       
         aa  ao dcl  iy  sh
  FALSE 366  31   0   0   0
  TRUE   34 569   0   0   0

## error rate (366+31)/1000 = 0.397

## results are more dramatic than in text; also switch of the sign is a
## puzzle; did I get unlucky with my random subset?