# Solution to STA 410/2102 Assignment #1, Spring 2003 - Program code. # GENERATE DATA FROM A ONE-WAY RANDOM EFFECTS ANOVA MODEL. The arguments # are the number of groups, the number of subjects in each group, the # overall mean, the variance of group means, and the variance of subjects # within groups. The result is a data frame with ng*ns rows, and two columns # called "group" and "value", giving the group id for each subject (1 to ng), # and the value for that subject. gen.data <- function (ng, ns, mean, varg, vars) { # Check for bad arguments. if (ng<1 || ns<1 || varg<0 || vars<0) { stop("Bad argument for gen.data") } # Initialize "g" and "v" to empty vectors of group ids and values. g <- c() v <- c() # Generate data for each group in turn. Append data and ids to "g" and "v". for (i in 1:ng) { # The ns subjects in this group all have id i. g <- c (g, rep(i,ns)) # The values for subjects in this group are the sum of the overall mean, a # mean for this group (randomly generated), and variation for each subject. v <- c (v, mean + rnorm(1,0,sqrt(varg)) + rnorm(ns,0,sqrt(vars))) } data.frame (group=g, value=v) } # PERFORM THE STANDARD T TEST OF MEAN=0 FOR THE ONE-WAY RANDOM EFFECTS MODEL. # The argument is a data frame with columns called "group" and "value", holding # group ids and values for each subject. All groups must have the same number # of subjects. The value returned is a list in which "p.value" contains the # p-value for the standard two-sided test of the overall mean being zero, and # "group.means" is the vector of means of values within each group. # # An error is reported if the number of groups is less than two, or if the # number of subjects per group varies. standard.t <- function (d) { # Set "g.vec" to the vector of group ids, "ng" to the number of groups, # and "ns" to the number of subjects per group. g.vec <- unique(d$group) ng <- length(g.vec) ns <- length(d$group)/ng if (ng<2) { stop("Can't do a t test with less than two groups") } # Set "m" to the vector of means for subjects within each group. Also # check that the number of subjects in each group is the same. m <- numeric(ng) for (i in g.vec) { if (sum(d$group==i)!=ns) { stop("number of subjects not the same for all groups") } m[i] <- mean(d$value[d$group==i]) } # Find the standard t statistic for testing whether the mean of the group # means is zero. This is done the hard way rather than using t.test so # that the relationship with the modified test will be clear. t <- mean(m) / sqrt(var(m)/ng) # Return the p.value for the two-sided test, along with the vector of # group means. list (p.value = 2 * pt (-abs(t), ng-1), group.means = m) } # PERFORM THE MODIFIED T TEST OF MEAN=0 FOR THE ONE-WAY RANDOM EFFECTS MODEL. # The argument is a data frame with columns called "group" and "value", holding # group ids and values for each subject. All groups must have the same number # of subjects. The value returned is a list in which "p.value" contains the # p-value for the modified two-sided test of the overall mean being zero, and # "group.means" is the vector of means of values within each group. # # An error is reported if the number of groups is less than two, or if the # number of subjects per group varies. modified.t <- function (d) { # Set "g.vec" to the vector of group ids, "ng" to the number of groups, # and "ns" to the number of subjects per group. g.vec <- unique(d$group) ng <- length(g.vec) ns <- length(d$group)/ng if (ng<2) { stop("Can't do a t test with less than two groups") } if (ns<2) { stop("Can't do the modified t test with less than two subjects per group") } # Set "m" to the vector of means for subjects within each group, and "s2" # to the vector of variances for subjects within each group. Also check # that the number of subjects in each group is the same. m <- numeric(ng) s2 <- numeric(ng) for (i in g.vec) { if (sum(d$group==i)!=ns) { stop("number of subjects not the same for all groups") } m[i] <- mean(d$value[d$group==i]) s2[i] <- var(d$value[d$group==i]) } # Estimate the variance of the sample means for groups as the larger # of the sample variance of the actual sample means and the variance in # sample means that would be expected just due to the (estimated) variability # within a group. v <- max (var(m), mean(s2)/ns) # Find the modified t statistic for testing whether the mean of the group # means is zero. t <- mean(m) / sqrt(v/ng) # Return the p.value for the two-sided test, along with the vectors of # group means and group variances. list (p.value = 2 * pt (-abs(t), ng-1), group.means = m, group.vars=s2) } # APPLY A TEST TO SIMULATED ONE-WAY ANOVA DATA SETS. The arguments are the # number of simulated data sets, the number of groups in a data set, the # number of subjects per group, the overall mean, the variance of group means, # the variance within a group, and the function to perform the test (which # must return a list with a "p.value" element). The result of simulate.test # is the vector of p-values from the tests. simulate.test <- function (k, ng, ns, mean, varg, vars, test) { if (k<1 || ng<2 || ns<1 || varg<0 || vars<0) { stop("Bad argument for simulate.test") } p.values <- numeric(k) for (i in 1:k) { d <- gen.data(ng,ns,mean,varg,vars) p.values[i] <- test(d)$p.value } p.values } # ESTIMATE THE REJECTION PROBABILITY FOR A TEST AT A GIVEN LEVEL. The test # is performed for various values of the overall mean, and the rejection # probability estimated for each such value. The arguments are the number # of data sets to use in finding each estimate, the significance level, the # vector of means for the data sets, the number of groups, the number of # subjects per group, the variance of group means, the within-group variance, # and the function for performing the test (which must return a list with # a "p.value" element). The result of estimate.rejection.prob is the vector # of estimated rejection probabilities for each mean. estimate.rejection.prob <- function (k, level, means, ng, ns, varg, vars, test) { if (k<1 || level<=0 || level>=1 || ng<1 || ns<1 || varg<0 || vars<0) { stop("Bad argument for estimate.rejection.prob") } rej.prob <- numeric(length(means)) for (i in 1:length(means)) { p.values <- simulate.test (k, ng, ns, means[i], varg, vars, test) rej.prob[i] <- mean (p.values<=level) } rej.prob }