# SOLUTION TO STA 410/2102 ASSIGNMENT 1, SPRING 2004. # "SUM TEST" OF NULL HYPOTHESIS OF UNIFORM DISTRIBUTION. Takes a vector # of angular data as its argument, and returns the p-value for the test. sum.test <- function (a) { if (length(a)<2) { stop("This test makes no sense with less than two points") } S <- (2/length(a)) * (sum(cos(a))^2 + sum(sin(a))^2) 1 - pchisq(S,2) } # "GAP TEST" OF NULL HYPOTHESIS OF UNIFORM DISTRIBUTION. Takes a vector # of angular data as its first argument, and returns the p-value for the test. # An optional second argument (default 99) gives the number of uniformly # distributed data sets to simulate in order to calculate the p-value. gap.test <- function (a, r=99) { n <- length(a) if (n<2) { stop("This test makes no sense with less than two points") } # Find the size of the largest gap in the actual data set. G <- gap.size(a) # Count how many simulated data sets have a gap at least as large as the # largest gap in the actual data set, add this to a count of one for the # actual data set. count <- 1 for (i in 1:r) { if (gap.size(runif(n,0,2*pi))>=G) { count <- count+1 } } # Return the p-value, computed as the fraction of the total number of # data sets (one actual, r simulated) that have gaps at least as large # as the actual data set. count/(r+1) } # COMPUTE THE SIZE OF THE LARGEST GAP IN AN ANGULAR DATA SET. gap.size <- function (a) { a <- sort(a) n <- length(a) max (a[1]+2*pi-a[n], a[2:n]-a[1:(n-1)]) } # SIMULATE THE BEHAVIOUR OF A TEST WHEN THE DATA IS UNIFORMLY DISTRIBUTED. # The first argument is the function that computes the p-value for the test # to be evaluated. The second argument is the size of data set to simulate. # The third argument is the number of data sets to simulate (more is better, # but slower). sim.unif <- function (test, n, k) { p.values <- numeric(k) for (i in 1:k) { p.values[i] <- test(runif(n,0,2*pi)) } p.values } # SIMULATE THE BEHAVIOUR OF A TEST WHEN THE DATA IS NOT UNIFORMLY DISTRIBUTED. # The first argument is the function that computes the p-value for the test # to be evaluated. The second argument is the size of data set to simulate. # The third argument is the number of data sets to simulate (more is better, # but slower). The fourth and fifth arguments specify the distribution, by # giving values to add to uniformly distributed data points, and to then # multiply them by. (Hence, shift=0 and scale=1 would produce uniformly # distributed data sets.) sim.nonunif <- function (test, n, k, shift, scale) { p.values <- numeric(k) for (i in 1:k) { p.values[i] <- test(gen.nonunif(n,shift,scale)) } p.values } # GENERATE NON-UNIFORMLY DISTRIBUTED DATA. Arguments are the size of data # set, and shift and scale parameters, as for sim.nonunif. gen.nonunif <- function (n, shift, scale) { # Generate angles uniformly. a <- runif(n,0,2*pi) # Get x and y coordinates of points on the circle, and then change x by # shifting and scaling. x <- scale*(cos(a)+shift) y <- sin(a) # Convert back to angles, making sure that they are represented by numbers # in the range 0 to 2pi. a <- atan2(y,x) a[a<0] <- a[a<0]+2*pi # Return the modified angles as the result. a } # TRY OUT THE TESTS ON UNIFORMLY DISTRIBUTED DATA. The arguments are the # sample sizes to try and the numbers of data sets to simulate for the # sum test and the gap test. These default to the values requested in the # assignment sheet. do.unif.tests <- function (ns=c(2,4,10,20), k.sum=20000, k.gap=1000) { par(mfrow=c(4,2)) for (n in ns) { hist (sim.unif (sum.test, n, k.sum), prob=T, nclass=20, xlim=c(0,1), main="", xlab= paste(k.sum,"p-values from sum test for",n,"uniformly distributed points")) hist (sim.unif (gap.test, n, k.gap), prob=T, nclass=20, xlim=c(0,1), main="", xlab= paste(k.gap,"p-values from gap test for",n,"uniformly distributed points")) } invisible() } # TRY OUT THE TESTS ON NON-UNIFORMLY DISTRIBUTED DATA. The arguments are the # sample sizes to try, the shifts and scales to try for each sample size, and # the numbers of data sets to simulate for the sum test and the gap test. # These default to the values requested in the assignment sheet. do.nonunif.tests <- function (ns=c(2,4,10,20), shift=c(0.5,0), scale=c(1.0,1.5), k.sum=20000, k.gap=1000) { if (length(shift)!=length(scale)) { stop("Number of shift values must be the same as number of scale values") } par(mfrow=c(4,2)) for (i in 1:length(shift)) { for (n in ns) { hist (sim.nonunif (sum.test, n, k.sum, shift[i], scale[i]), prob=T, nclass=20, xlim=c(0,1), main="", xlab= paste(k.sum,"p-values from sum test, n =",n, "shift =",shift[i],"scale =",scale[i])) hist (sim.nonunif (gap.test, n, k.gap, shift[i], scale[i]), prob=T, nclass=20, xlim=c(0,1), main="", xlab= paste(k.gap,"p-values from gap test, n =",n, "shift =",shift[i],"scale =",scale[i])) } } invisible() }