############################### Study4Template.txt ################################### # # # Study4 is on F-tests with significance level 0.05 under conditions # # of publication bias and heterogeneity in both sample size and effect size. # # Sample size after selection is Poisson(86). The 9 variables are # # Simulation Sigma Nstudies AvePower MeanTruePower # # Pcurve, Puniform, MaxLike, Zcurve # # AvePower is population mean power, while MeanTruePower is SAMPLE mean true power. # # Copy to template.txt, and run with runR on Depth Charge or sniffR on PsychLab # # # # # # Note the degrees of freedom could be 1, 3 or 5 in separate runs. # # This version writes one simulation at a time to the output file. # # # ###################################################################################### # Need to loop through sigma values, and now there will be 9 columns. The # analysis program is affected. ################################ Setup ################################ rm(list=ls()); options(scipen=999) # To avoid scientific notation # Run separately for each df. Change name of output file accordingly. DFree1 = c(1,3,5); index = 1; Dfree1 = DFree1[index] nsim=37 # Number of simulated data sets in each condition: 36*280 = 10080 nrows = 5*3*3*nsim # Number of studies x Number of power values x Number of SD values simdata = matrix(0,nrows,9) # Simulation number plus eight variables Lambda = 86 # Poisson mean sample size top = round(Lambda + 6*sqrt(Lambda)) # Poisson probabilities are zero above here nval = Dfree1+2:top Dfree2val = nval - Dfree1 - 1 # Assuming a one-way ANOVA or something like that. critvalval = qf(0.95,Dfree1,Dfree2val) source("http://www.utstat.toronto.edu/~brunner/Rfunctions/estimatR.txt") # Loop through these variables Nstudies = c(100,250,500,1000,2000) # 5 levels Sigmaval = c(0.1, 0.2, 0.3) # SD of effect size AvePower = c(0.25,0.50,0.75) # 3 levels of population mean power Methods = c("Pcurve","Puniform","MaxLike","Zcurve") # Minimize fun over (a,b) to find values giving desired mean power # and desired SD of effect size after selection for significance. # This function uses material above. fun = function(ab, wantpow, wantsigma, info=F) { aa = abs(ab[1]); bb = abs(ab[2]) # w is effect size, with ncp = n*w^2 fun1 = function(w,A,B) # Function to be integrated for E(G) { sum( (1-pf(critvalval,df1=Dfree1,df2=Dfree2val,ncp=nval*w^2)) * dgamma(w,shape=A,scale=B) * dpois(nval,Lambda) ) } # End fun1 fun1v = Vectorize(fun1,vectorize.args="w") fun2 = function(w,A,B) # Function to be integrated for E(G^2) { sum( (1-pf(critvalval,df1=Dfree1,df2=Dfree2val,ncp=nval*w^2))^2 * dgamma(w,shape=A,scale=B) * dpois(nval,Lambda) ) } # End fun2 fun2v = Vectorize(fun2,vectorize.args="w") EG = integrate(fun1v,0,Inf,A=aa,B=bb)$value EG2 = integrate(fun2v,0,Inf,A=aa,B=bb)$value # E(G^2) meanPOW = EG2/EG # E(G|T>c) by Principle 3. fun3 = function(w,A,B) # Function to be integrated for E(W|T>c) { sum( w * (1-pf(critvalval,df1=Dfree1,df2=Dfree2val,ncp=nval*w^2)) * dgamma(w,shape=A,scale=B) * dpois(nval,Lambda) ) } # End fun3 fun3v = Vectorize(fun3,vectorize.args="w") fun4 = function(w,A,B) # Function to be integrated for E(W^2|T>c) { sum( w^2 * (1-pf(critvalval,df1=Dfree1,df2=Dfree2val,ncp=nval*w^2)) * dgamma(w,shape=A,scale=B) * dpois(nval,Lambda) ) } # End fun4 fun4v = Vectorize(fun4,vectorize.args="w") EW = integrate(fun3v,0,Inf,A=aa,B=bb)$value / EG EW2 = integrate(fun4v,0,Inf,A=aa,B=bb)$value / EG sdES = sqrt(EW2-EW^2) if(info) cat("(a,b) =",c(aa,bb), " sdES =",sdES," meanPOW =",meanPOW,"\n") value = (wantpow-meanPOW)^2 + (wantsigma-sdES)^2 if(info) cat(" value =",value,"\n\n") return(value) } # End of all the fun # fun1 is used outside the function fun too. fun1 = function(w,A,B) # Function to be integrated for E(G) { sum( (1-pf(critvalval,df1=Dfree1,df2=Dfree2val,ncp=nval*w^2)) * dgamma(w,shape=A,scale=B) * dpois(nval,Lambda) ) } # End fun1 fun1v = Vectorize(fun1,vectorize.args="w") ################################################## # Find gamma parameters yielding desired mean power and SD of effect size after selection. # This takes a while. Speed up and avoid numerical problems by starting at the answer for # df1 = 3. Note that these values are specific to Poisson(86) sample size. ################################################## Astart = rbind( c(1.73478097844601730770364, 4.8650903590956895428121, 10.693977766200100987248), c(0.10739168893767299384212, 0.6824296598904118216211, 2.413408765712715897678), c(0.03154680672117692735723, 0.2190798181397437538465, 1.030222513479821522253) ) Bstart = rbind( c(0.05533061323905076600571, 0.04426501626266488020889, 0.03316626165486077221223), c(0.22637392549508644767897, 0.16549567597017253439695, 0.12899207873748869124242), c(0.51182312306869570672774, 0.33533287204483941401634, 0.27073123024297296446150) ) cat("\nCalculating required gamma parameters ... \n\n") Amat = Bmat = matrix(0,3,3) for(i in 1:3) # Loop over rows = SD of effect size {cat("Sigma =",Sigmaval[i],"\n") for(j in 1:3) # Loop over desired power {cat(" Mean Power =",AvePower[j],"\n") startvals = c(Astart[i,j],Bstart[i,j]) Search = nlm(fun, p=startvals, wantpow=AvePower[j], wantsigma=Sigmaval[i], info=F) Amat[i,j] = abs(Search$estimate[1]); Bmat[i,j] = abs(Search$estimate[2]) cat("(a,b) =",c(Amat[i,j],Bmat[i,j]),"\n") } # Next j } # Next i rownames(Amat) = rownames(Bmat) = Sigmaval colnames(Amat) = colnames(Bmat) = AvePower cat("\n\n") cat(" Amat \n") print(Amat,digits=22); cat("\n") cat(" Bmat \n") print(Bmat,digits=22); cat("\n\n") system("touch simoutX.txt") # Append results to this file, one simulation at a time for(Simulation in 1:nsim) {cat("Simulation ",Simulation,"\n") datta = matrix(0,45,9); rowno=0 # Accumulate the results for one simulation in datta colnames(datta) = c("Simulation", "SD_EffectSize", "Nstudies", "PopMeanPower", "SampleMeanPower", "Pcurve", "Puniform", "MaxLike", "Zcurve") for(h in 1:length(Sigmaval)) {cat(" Sigma =",Sigmaval[h],"\n") for(i in 1:length(Nstudies)) {cat(" K =",Nstudies[i],"\n") k = Nstudies[i] for(j in 1:length(AvePower)) {cat(" Mean power =",AvePower[j],"\n") AA = Amat[h,j]; BB = Bmat[h,j] # Gamma parameters ExpectedG = integrate(fun1v,0,Inf,A=AA,B=BB)$value # Expected proportion significant is ExpectedG. aveneed = round(k/ExpectedG) # cat("Simulating ",aveneed, " test statistics before selection ...\n") N = rpois(aveneed,lambda=Lambda) DF2 = N-Dfree1-1 critval = qf(0.95,df1=Dfree1,df2=DF2) W = rgamma(aveneed,shape=AA,scale=BB) Y = rf(aveneed,df1=Dfree1,df2=DF2,ncp=N*W^2) # Debug: G holds true power values before selection # G = 1 - pf(critval,df1=Dfree1,df2=DF2,ncp=N*W^2) # c(mean(G^2)/mean(G),AvePower[j] ) # For debugging: Should be very close # Select significant results N = subset(N,Y>critval); W = subset(W,Y>critval) DF2 = subset(DF2,Y>critval); Y = subset(Y,Y>critval) critval = qf(0.95,df1=Dfree1,df2=DF2) # Re-calculate rather than select # cat(length(Y), ", or",length(Y)/aveneed," are significant.", # " Compare E(G) =", ExpectedG,"\n") # Add more tests if there are not enough nmore = nsim while(length(Y)morecrit)) DF2 = c(DF2,subset(moreDF,moreY>morecrit)) critval = c(critval,subset(morecrit,moreY>morecrit)) W = c(W,subset(moreW,moreY>morecrit)) Y = c(Y,subset(moreY,moreY>morecrit)) } # End while not enough significant results # Discard extras N = N[1:k]; W = W[1:k] Y = Y[1:k]; DF2 = DF2[1:k] critval = critval[1:k] # Calculate true power values for the significant results TruePower = 1 - pf(critval,df1=Dfree1,df2=DF2,ncp=N*W^2) # cat("Mean power of the significant tests is",mean(TruePower), # ", compared to expected true power of",AvePower[j],"\n\n") # Compute Estimates MeanTruePower = mean(TruePower) # Sample mean of the True Power values # cat("Calculating p-curve \n") Pcurve = heteroNpcurveF(FF=Y,dfree1=Dfree1,dfree2=DF2) # cat("Calculating p-uniform \n") Puniform = heteroNpunifF(FF=Y,dfree1=Dfree1,dfree2=DF2,CI=F) ######## Calculate the MLE with multiple starts - slow! ######## MLEmatrix = matrix(0,3,4) colnames(MLEmatrix) = c("astart","bstart","MLE","Minus LL") begin = c(rexp(1),runif(1)) # Random start trapmle = try(heteromleF(FF=Y, dfree1=Dfree1, dfree2=DF2, startvals=begin)) MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) MLEmatrix[1,] = c(begin,MaxLike) begin = rexp(2) # Another random start trapmle = try(heteromleF(FF=Y, dfree1=Dfree1, dfree2=DF2, startvals=begin)) MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) MLEmatrix[2,] = c(begin,MaxLike) begin = c(rexp(1,rate=1/2),runif(1)) # A third random start trapmle = try(heteromleF(FF=Y, dfree1=Dfree1, dfree2=DF2, startvals=begin)) MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) MLEmatrix[3,] = c(begin,MaxLike) MaxLike = subset(MLEmatrix[,3],MLEmatrix[,4]==min(MLEmatrix[,4])) if(is.na(MaxLike)) # Will catch NaN too. { # Discard NA and NaN values, then choose minimum MLE = MLEmatrix[,3]; MLL = MLEmatrix[,4] MLE[MLE>1] = NA bad = subset(1:3,is.na(MLE)) MLE = MLE[-bad]; MLL = MLL[-bad] MaxLike = subset( MLE , MLL==min(MLL) ) if(length(MaxLike)==0) MaxLike=NA } # End if MaxLike is NA or NaN # Length of MaxLike could be more than one if there is a tie in # the minus log likelihood. MaxLike = MaxLike[1] # cat(" MLEmatrix = \n"); print(MLEmatrix) # cat(" MaxLike = ",MaxLike,"\n") ######## End of Maximum Likelihood ######## pval = 1-pf(Y,df1=Dfree1,df2=DF2 ) Zcurve = zcurve(pval,Plot=0,Verbose=F) resultz = c(Pcurve, Puniform, MaxLike, Zcurve) # cat(" resultz =",resultz,"\n\n") # Put them in the table rowno = rowno+1 datta[rowno,] = c(Simulation, Sigmaval[h], Nstudies[i], AvePower[j], MeanTruePower, resultz) } # Next value of TruePower } # Next value of Nstudies } # Next value of Sigmaval write(t(datta),file="simoutX.txt",ncolumns=9,append=T) } # Next Simulation