############################### Study7Template.txt ################################### # # # Study7 is about estimation when sample size and effect size are correlated. # # Because of conscious or unconscious power analysis, the correlation should be # # negative. Just F-tests with df1=1 and significance level 0.05 under conditions # # of selection and heterogeneity in both sample size and effect size. # # Effect size is beta with a Poisson regression model for sample size. Mean sample # # size is roughly 86. Everything is after selection. The 9 variables are # # Simulation Correlation Ntests AvePower MeanTruePower # # Pcurve, Puniform, MaxLike, Zcurve # # AvePower is population mean power, while MeanTruePower is SAMPLE mean true power. # # Copy to template.txt, and run with runR on Depth Charge or sniffR on PsychLab # # # # # # This version writes one simulation at a time to the output file. # # # ###################################################################################### ################################ Setup ################################ rm(list=ls()); options(scipen=999) # To avoid scientific notation nsim=35 # Number of simulated data sets in each condition: 35*73*4 = 10220 nrows = 5*3*5*nsim # Number of tests x Number of power values x Number of Corr values simdata = matrix(0,nrows,9) # Simulation number plus eight variables v = 0.09 # Variance of beta effect size, yielding SD(es) = 0.3 source("http://www.utstat.toronto.edu/~brunner/Rfunctions/estimatR.txt") # Loop through these variables Ntests = c(100,250,500,1000,2000) # 5 levels Corr = c(0.00, -0.20, -0.40, -0.60, -0.80) # Correlation between N and effect size AvePower = c(0.25,0.50,0.75) # 3 levels of population mean power Methods = c("Pcurve","Puniform","MaxLike","Zcurve") ######## Find (m,v,beta0,beta1) to give desired mean power and desired correlation ######## mMAT = matrix(0,5,3); rownames(mMAT) = Corr; colnames(mMAT) = AvePower b1MAT = matrix(0,5,3); rownames(b1MAT) = Corr; colnames(b1MAT) = AvePower # Define functions -- see hand-written notes, because Corr(X,N) is not obvious. fun1 = function(w,M,V,B0,B1) # Function to be integrated for mean power { nval = 5:200; DF2 = nval-2; critvalval = qf(0.95,1,DF2) A = -(M^3 - M^2 + M*V)/V; B = (M^3 + (M - 1)*V - 2*M^2 + M)/V LAMBDA = exp(B0+B1*w) sum( (1-pf(critvalval,df1=1,df2=DF2,ncp=nval*w^2)) * dpois(nval,LAMBDA) * dbeta(w,A,B) ) } # End fun1 fun1v = Vectorize(fun1,vectorize.args="w") fun2 = function(w,M,V,B0,B1) # Integrate to get E(N) { A = -(M^3 - M^2 + M*V)/V; B = (M^3 + (M - 1)*V - 2*M^2 + M)/V exp(B0+B1*w) * dbeta(w,shape1=A,shape2=B) } # End fun2 fun3 = function(w,M,V,B0,B1) # Integrate to get E(N^2) { A = -(M^3 - M^2 + M*V)/V; B = (M^3 + (M - 1)*V - 2*M^2 + M)/V Lambda = exp(B0+B1*w) Lambda*(1+Lambda) * dbeta(w,shape1=A,shape2=B) } # End fun3 fun4 = function(w,M,V,B0,B1) # Integrate to get E(XN) { A = -(M^3 - M^2 + M*V)/V; B = (M^3 + (M - 1)*V - 2*M^2 + M)/V w * exp(B0+B1*w) * dbeta(w,shape1=A,shape2=B) } # End fun4 fun = function(theta,wantpow,wantcorr,v=0.09) # theta = (m,beta1) # Minimize this to find (m,beta0,beta1) yielding desired mean power and correlation # between sample size and effect size. Note v < m(1-m), because Bernoulli variance is greatest. { # cat("m, beta1 =",theta,"\n") # Debug m = theta[1]; beta1 = theta[2] beta0 = log(86)-beta1*m # Gives average sample size of 86 at average effect size # Calculate mean power and correlation epow = integrate(fun1v,0,1,M=m,V=v,B0=beta0,B1=beta1)$value # Calculate exact Corr(X,N) EN = integrate(fun2,0,1,M=m,V=v,B0=beta0,B1=beta1)$value ENsq = integrate(fun3,0,1,M=m,V=v,B0=beta0,B1=beta1)$value SDN = sqrt(ENsq-EN^2) EXN = integrate(fun4,0,1,M=m,V=v,B0=beta0,B1=beta1)$value rho = (EXN-m*EN)/(sqrt(v)*SDN) # cat("Mean power, Correlation =",c(epow,rho),"\n") # Debug return( abs(epow-wantpow) + abs(rho-wantcorr) ) } # End of all the fun # Takes a while -- comment out and enter values manually # for(i in 1:length(Corr)) # # { # cat("Correlation =",Corr[i],"\n") # for(j in 1:length(AvePower)) # { # cat(" Power =",AvePower[j],"\n") # begin=c(0.50,-0.25) # Starting values for (m,corr) # if(Corr[i]==0) begin[2] = 0 # best = optim(par=begin,fun,lower=c(0.12,-1),upper=c(0.88,0), # method="L-BFGS-B",wantcorr=Corr[i],wantpow=AvePower[j]) # mMAT[i,j] = best$par[1]; b1MAT[i,j] = best$par[2] # } # Next j (Power value) # } # Next i (Correlation) # print(mMAT,digits=22); print(b0MAT,digits=22) mMAT = rbind( c(0.1547286643407420336782, 0.2965565361381660713924, 0.4784694650994398279487), c(0.1548377731227441322925, 0.2963263361789551519898, 0.4768297807020639433873), c(0.1549738174955386238452, 0.2960695854355749889741, 0.4749670005746801648705), c(0.1551798189476286671251, 0.2957151958000013292072, 0.4724730358897300530607), c(0.1556586764473793516039, 0.2950544431790082522404, 0.4678675710361732131837) ) b0mat = rbind( c(0.00000000000000000000000, 0.00000000000000000000000, 0.00000000000000000000000), c(-0.0749290999810406227466, -0.07403899402777110172469, -0.07343577116871970178469), c(-0.1642299932679451845985, -0.15997368512198306689243, -0.15719474079343254135921), c(-0.2923671731818158758820, -0.27898222343141670931388, -0.27067199714252238029744), c(-0.5593194803292570460584, -0.51158446526716483404584, -0.48446671984845995906355) ) system("touch simoutX.txt") # Append results to this file, one simulation at a time for(Simulation in 1:nsim) {cat("Simulation ",Simulation,"\n") datta = matrix(0,nrows/nsim,9); rowno=0 # Accumulate the results for one simulation in datta colnames(datta) = c("Simulation", "Correlation", "Ntests", "PopMeanPower", "SampleMeanPower", "Pcurve", "Puniform", "MaxLike", "Zcurve") for(h in 1:length(Corr)) {cat(" Correlation =",Corr[h],"\n") for(i in 1:length(Ntests)) {cat(" K =",Ntests[i],"\n") k = Ntests[i] for(j in 1:length(AvePower)) {cat(" Mean power =",AvePower[j],"\n") m = mMAT[h,j]; beta1 = b1MAT[h,j] beta0 = log(86)-beta1*m # Average N = 86 at average effect size a = -(m^3 - m^2 + m*v)/v; b = (m^3 + (m - 1)*v - 2*m^2 + m)/v es = rbeta(Ntests[i],a,b) lambda = exp(beta0+beta1*es) N = rpois(Ntests[i],lambda) NCP = N*es^2 df2 = N-2; critval = qf(0.95,1,df2) Fstat = rsigF(Ntests[i],1,df2,NCP) TruePower = 1-pf(critval,1,df2,ncp=NCP) MeanTruePower = mean(TruePower) # Sample mean of the True Power values # cat("Mean power of the significant tests is",MeanTruePower, # ", compared to expected true power of",AvePower[j],"\n\n") # COMPUTE ESTIMATES # cat("Calculating p-curve \n") Pcurve = heteroNpcurveF(FF=Fstat,dfree1=1,dfree2=df2) # cat("Calculating p-uniform \n") Puniform = heteroNpunifF(FF=Fstat,dfree1=1,dfree2=df2,CI=F) ######## Calculate the MLE with multiple starts - slow! ######## MLEmatrix = matrix(0,3,4) colnames(MLEmatrix) = c("astart","bstart","MLE","Minus LL") begin = c(rexp(1),runif(1)) # Random start trapmle = try(heteromleF(FF=Fstat, dfree1=1, dfree2=df2, startvals=begin)) MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) MLEmatrix[1,] = c(begin,MaxLike) begin = rexp(2) # Another random start trapmle = try(heteromleF(FF=Fstat, dfree1=1, dfree2=df2, startvals=begin)) MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) MLEmatrix[2,] = c(begin,MaxLike) begin = c(rexp(1,rate=1/2),runif(1)) # A third random start trapmle = try(heteromleF(FF=Fstat, dfree1=1, dfree2=df2, startvals=begin)) MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) MLEmatrix[3,] = c(begin,MaxLike) MaxLike = subset(MLEmatrix[,3],MLEmatrix[,4]==min(MLEmatrix[,4])) if(is.na(MaxLike)) # Will catch NaN too. { # Discard NA and NaN values, then choose minimum MLE = MLEmatrix[,3]; MLL = MLEmatrix[,4] MLE[MLE>1] = NA bad = subset(1:3,is.na(MLE)) MLE = MLE[-bad]; MLL = MLL[-bad] MaxLike = subset( MLE , MLL==min(MLL) ) if(length(MaxLike)==0) MaxLike=NA } # End if MaxLike is NA or NaN # Length of MaxLike could be more than one if there is a tie in # the minus log likelihood. MaxLike = MaxLike[1] # cat(" MLEmatrix = \n"); print(MLEmatrix) # cat(" MaxLike = ",MaxLike,"\n") ######## End of Maximum Likelihood ######## logp = pf(Fstat,1,df2,lower.tail=F,log.p=T) pval = exp(logp) # Z = qnorm(logp-log(2),lower.tail=F,log.p=T) Zcurve = zcurve(pval,Plot=0,Verbose=F) resultz = c(Pcurve, Puniform, MaxLike, Zcurve) # cat(" resultz =",resultz,"\n\n") # Put them in the table rowno = rowno+1 datta[rowno,] = c(Simulation, Corr[h], Ntests[i], AvePower[j], MeanTruePower, resultz) } # Next value of TruePower } # Next value of Ntests } # Next value of Corr write(t(datta),file="simoutX.txt",ncolumns=9,append=T) } # Next Simulation