R : Copyright 2003, The R Development Core Team Version 1.7.1 (2003-06-16) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type `license()' or `licence()' for distribution details. R is a collaborative project with many contributors. Type `contributors()' for more information. Type `demo()' for some demos, `help()' for on-line help, or `help.start()' for a HTML browser interface to help. Type `q()' to quit R. > pr<-read.table("prostate.data") > dim(pr) [1] 97 10 > pr[1:4,] lcavol lweight age lbph svi lcp gleason pgg45 lpsa 1 -0.5798185 2.769459 50 -1.386294 0 -1.386294 6 0 -0.4307829 2 -0.9942523 3.319626 58 -1.386294 0 -1.386294 6 0 -0.1625189 3 -0.5108256 2.691243 74 -1.386294 0 -1.386294 7 20 -0.1625189 4 -1.2039728 3.282789 58 -1.386294 0 -1.386294 6 0 -0.1625189 train 1 TRUE 2 TRUE 3 TRUE 4 TRUE > is.factor(pr$gleason) [1] FALSE > prostate.frame<-data.frame(pr) > attach(prostate.frame) > rm(pr) > pairs(prostate.frame) > std<-function(x){(x-mean(x))/sqrt(var(x))} > lcavol.z<-std(lcavol) > lweight.z <- std(lweight) > age.z <-std(age) > lbph.z <- std(lbph) > svi.z <- std(svi) > lcp.z <- std(lcp) > gleason.z <- std(gleason) > pgg45.z <- std(pgg45) > prostate.z.frame<-cbind(lpsa,lcavol.z,lweight.z,age.z,lbph.z,svi.z,lcp.z,gleason.z,pgg45.z,train) > dim(prostate.z.frame) [1] 97 10 > is.data.frame(prostate.z.frame) [1] FALSE > prostate.z.frame<-data.frame(prostate.z.frame) > pr.z.train<-subset(prostate.z.frame,train=="1",select=c(lpsa,lcavol.z,lweight.z,age.z,lbph.z,svi.z,lcp.z,gleason.z,pgg45.z)) > dim(pr.z.train) [1] 67 9 > is.data.frame(pr.z.train) [1] TRUE > dim(pr.z.train) [1] 67 9 > lm(lpsa~lcavol.z+lweight.z+age.z+lbph.z+svi.z+lcp.z+gleason.z+pgg45.z,data=pr.z.train) Call: lm(formula = lpsa ~ lcavol.z + lweight.z + age.z + lbph.z + svi.z + lcp.z + gleason.z + pgg45.z, data = pr.z.train) Coefficients: (Intercept) lcavol.z lweight.z age.z lbph.z svi.z 2.47951 0.67953 0.30494 -0.14146 0.21015 0.30520 lcp.z gleason.z pgg45.z -0.28849 -0.02131 0.26696 > pr.z.lm<-.Last.value > summary(pr.z.lm) Call: lm(formula = lpsa ~ lcavol.z + lweight.z + age.z + lbph.z + svi.z + lcp.z + gleason.z + pgg45.z, data = pr.z.train) Residuals: Min 1Q Median 3Q Max -1.64870 -0.34147 -0.05424 0.44941 1.48675 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 2.47951 0.08963 27.665 < 2e-16 *** lcavol.z 0.67953 0.12663 5.366 1.47e-06 *** lweight.z 0.30494 0.11086 2.751 0.00792 ** age.z -0.14146 0.10134 -1.396 0.16806 lbph.z 0.21015 0.10222 2.056 0.04431 * svi.z 0.30520 0.12360 2.469 0.01651 * lcp.z -0.28849 0.15453 -1.867 0.06697 . gleason.z -0.02131 0.14525 -0.147 0.88389 pgg45.z 0.26696 0.15361 1.738 0.08755 . --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.7123 on 58 degrees of freedom Multiple R-Squared: 0.6944, Adjusted R-squared: 0.6522 F-statistic: 16.47 on 8 and 58 DF, p-value: 2.042e-12 > mean(pr.z.train$lpsa) [1] 2.452345 > mean(prostate.frame$lpsa) [1] 2.478387 > pr.z.test = subset(prostate.z.frame,train=="0",select=c(lpsa,lcavol.z,lweight.z,age.z,lbph.z,svi.z,lcp.z,gleason.z,pgg45.z)) > dim(pr.z.test) [1] 30 9 > is.data.frame(pr.z.test) [1] TRUE > x=cbind(pr.z.test$lcavol.z,pr.z.test$lweight.z,pr.z.test$age.z,pr.z.test$lbph.z,pr.z.test$svi.z,pr.z.test$lcp.z,pr.z.test$gleason.z,pr.z.test$pgg45.z) > dim(x) [1] 30 8 > test.fitted = x %*% coef(pr.z.lm) Error in x %*% coef(pr.z.lm) : non-conformable arguments > coef(pr.z.lm) (Intercept) lcavol.z lweight.z age.z lbph.z svi.z 2.47951205 0.67952814 0.30494116 -0.14146483 0.21014656 0.30520060 lcp.z gleason.z pgg45.z -0.28849277 -0.02130504 0.26695576 > x=cbind(rep(1,30),x) > dim(x) [1] 30 9 > test.fitted = x %*% coef(pr.z.lm) > sum((lpsa-test.fitted)^2) [1] 17.58988 > .Last.value/30 [1] 0.5863292 > sum((lpsa-2.47951205)^2)/30 [1] 1.052896 > test.x=x > rm(x) > save.image(jan16) > q()