############################### Study4Template.txt ###################################
#                                                                                    #
# Study4 is on F-tests with significance level 0.05 under conditions                 #
# of publication bias and heterogeneity in both sample size and effect size.         #
# Sample size after selection is Poisson(86).  The 9 variables are                   #
#    Simulation Sigma Nstudies AvePower MeanTruePower                                #
#               Pcurve, Puniform, MaxLike, Zcurve                                    #
# AvePower is population mean power, while MeanTruePower is SAMPLE mean true power.  #
# Copy to template.txt, and run with runR on Depth Charge or sniffR on PsychLab      #
#                                                                                    #
#                                                                                    #
#     Note the degrees of freedom could be 1, 3 or 5 in separate runs.               #  
#     This version writes one simulation at a time to the output file.               # 
#                                                                                    #
######################################################################################
# Need to loop through sigma values, and now there will be 9 columns. The 
# analysis program is affected. 
################################ Setup ################################
rm(list=ls()); options(scipen=999) # To avoid scientific notation
# Run separately for each df. Change name of output file accordingly.
DFree1 = c(1,3,5); index = 1; Dfree1 = DFree1[index]
nsim=37 # Number of simulated data sets in each condition: 36*280 = 10080
nrows = 5*3*3*nsim # Number of studies x Number of power values x Number of SD values
simdata = matrix(0,nrows,9) # Simulation number plus eight variables  
Lambda = 86 # Poisson mean sample size
top = round(Lambda + 6*sqrt(Lambda)) # Poisson probabilities are zero above here
nval = Dfree1+2:top
Dfree2val = nval - Dfree1 - 1 # Assuming a one-way ANOVA or something like that.
critvalval = qf(0.95,Dfree1,Dfree2val)
source("http://www.utstat.toronto.edu/~brunner/Rfunctions/estimatR.txt")
# Loop through these variables
Nstudies = c(100,250,500,1000,2000) # 5 levels
Sigmaval = c(0.1, 0.2, 0.3) # SD of effect size 
AvePower = c(0.25,0.50,0.75) # 3 levels of population mean power
Methods = c("Pcurve","Puniform","MaxLike","Zcurve") 

# Minimize fun over (a,b) to find values giving desired mean power 
# and desired SD of effect size after selection for significance.
# This function uses material above.
fun = function(ab, wantpow, wantsigma, info=F) 
    {
    aa = abs(ab[1]); bb = abs(ab[2])
    # w is effect size, with ncp = n*w^2
    fun1 = function(w,A,B) # Function to be integrated for E(G)
        { 
        sum( (1-pf(critvalval,df1=Dfree1,df2=Dfree2val,ncp=nval*w^2)) * 
                dgamma(w,shape=A,scale=B) 
              * dpois(nval,Lambda) ) 
        } # End fun1
    fun1v = Vectorize(fun1,vectorize.args="w")
    fun2 = function(w,A,B) # Function to be integrated for E(G^2)
        { 
        sum( (1-pf(critvalval,df1=Dfree1,df2=Dfree2val,ncp=nval*w^2))^2 * 
            dgamma(w,shape=A,scale=B) * dpois(nval,Lambda) ) 
        } # End fun2
    fun2v = Vectorize(fun2,vectorize.args="w")
    EG = integrate(fun1v,0,Inf,A=aa,B=bb)$value 
    EG2 = integrate(fun2v,0,Inf,A=aa,B=bb)$value # E(G^2)
    meanPOW = EG2/EG # E(G|T>c) by Principle 3.
    fun3 = function(w,A,B) # Function to be integrated for E(W|T>c)
        { 
        sum( w * (1-pf(critvalval,df1=Dfree1,df2=Dfree2val,ncp=nval*w^2)) * 
              dgamma(w,shape=A,scale=B) * dpois(nval,Lambda) ) 
        } # End fun3
    fun3v = Vectorize(fun3,vectorize.args="w")
    fun4 = function(w,A,B) # Function to be integrated for E(W^2|T>c)
        { 
        sum( w^2 * (1-pf(critvalval,df1=Dfree1,df2=Dfree2val,ncp=nval*w^2)) * 
              dgamma(w,shape=A,scale=B) * dpois(nval,Lambda) ) 
        } # End fun4
    fun4v = Vectorize(fun4,vectorize.args="w")
    EW = integrate(fun3v,0,Inf,A=aa,B=bb)$value / EG
    EW2 = integrate(fun4v,0,Inf,A=aa,B=bb)$value / EG
    sdES = sqrt(EW2-EW^2)
    if(info) cat("(a,b) =",c(aa,bb), " sdES =",sdES," meanPOW =",meanPOW,"\n") 
    value = (wantpow-meanPOW)^2 + (wantsigma-sdES)^2
    if(info) cat("   value =",value,"\n\n")
    return(value)
    } # End of all the fun
# fun1 is used outside the function fun too.
    fun1 = function(w,A,B) # Function to be integrated for E(G)
        { 
        sum( (1-pf(critvalval,df1=Dfree1,df2=Dfree2val,ncp=nval*w^2)) * 
                dgamma(w,shape=A,scale=B) 
              * dpois(nval,Lambda) ) 
        } # End fun1
    fun1v = Vectorize(fun1,vectorize.args="w")

################################################## 
# Find gamma parameters yielding desired mean power and SD of effect size after selection.
# This takes a while. Speed up and avoid numerical problems by starting at the answer for 
# df1 = 3. Note that these values are specific to Poisson(86) sample size.
##################################################


Astart = rbind(
c(1.73478097844601730770364, 4.8650903590956895428121, 10.693977766200100987248),
c(0.10739168893767299384212, 0.6824296598904118216211,  2.413408765712715897678),
c(0.03154680672117692735723, 0.2190798181397437538465,  1.030222513479821522253)  )
Bstart = rbind(
c(0.05533061323905076600571, 0.04426501626266488020889, 0.03316626165486077221223),
c(0.22637392549508644767897, 0.16549567597017253439695, 0.12899207873748869124242),
c(0.51182312306869570672774, 0.33533287204483941401634, 0.27073123024297296446150) )

cat("\nCalculating required gamma parameters ...  \n\n")
Amat = Bmat = matrix(0,3,3)
for(i in 1:3) # Loop over rows = SD of effect size
    {cat("Sigma =",Sigmaval[i],"\n")
    for(j in 1:3) # Loop over desired power
        {cat("  Mean Power =",AvePower[j],"\n")
        startvals = c(Astart[i,j],Bstart[i,j])
        Search = nlm(fun, p=startvals, wantpow=AvePower[j], wantsigma=Sigmaval[i], info=F)
        Amat[i,j] = abs(Search$estimate[1]); Bmat[i,j] = abs(Search$estimate[2])
        cat("(a,b) =",c(Amat[i,j],Bmat[i,j]),"\n")
        } # Next j
    } # Next i
rownames(Amat) = rownames(Bmat) = Sigmaval
colnames(Amat) = colnames(Bmat) = AvePower
cat("\n\n")
cat("       Amat \n")
print(Amat,digits=22); cat("\n")
cat("       Bmat \n")
print(Bmat,digits=22); cat("\n\n")

system("touch simoutX.txt") # Append results to this file, one simulation at a time
for(Simulation in 1:nsim)
    {cat("Simulation ",Simulation,"\n")
    datta = matrix(0,45,9); rowno=0 # Accumulate the results for one simulation in datta
    colnames(datta) = c("Simulation", "SD_EffectSize", "Nstudies", "PopMeanPower", "SampleMeanPower", 
                     "Pcurve", "Puniform", "MaxLike", "Zcurve")
    for(h in 1:length(Sigmaval))
        {cat("  Sigma =",Sigmaval[h],"\n")
            for(i in 1:length(Nstudies))
                {cat("      K =",Nstudies[i],"\n")
                k = Nstudies[i]
                for(j in 1:length(AvePower))
                {cat("          Mean power =",AvePower[j],"\n")
                AA = Amat[h,j]; BB = Bmat[h,j] # Gamma parameters
                ExpectedG = integrate(fun1v,0,Inf,A=AA,B=BB)$value
                # Expected proportion significant is ExpectedG.
                aveneed = round(k/ExpectedG) 
                # cat("Simulating ",aveneed, " test statistics before selection ...\n")
                N = rpois(aveneed,lambda=Lambda)
                DF2 = N-Dfree1-1 
                critval = qf(0.95,df1=Dfree1,df2=DF2)
                W = rgamma(aveneed,shape=AA,scale=BB)
                Y = rf(aveneed,df1=Dfree1,df2=DF2,ncp=N*W^2)
                # Debug: G holds true power values before selection
                # G = 1 - pf(critval,df1=Dfree1,df2=DF2,ncp=N*W^2) 
                # c(mean(G^2)/mean(G),AvePower[j] ) # For debugging: Should be very close
                # Select significant results
                N = subset(N,Y>critval); W = subset(W,Y>critval)
                DF2 = subset(DF2,Y>critval); Y = subset(Y,Y>critval)
                critval = qf(0.95,df1=Dfree1,df2=DF2) # Re-calculate rather than select
                # cat(length(Y), ", or",length(Y)/aveneed," are significant.",
                # " Compare E(G) =", ExpectedG,"\n")
                # Add more tests if there are not enough
                nmore = nsim
                while(length(Y)<k)
                    { 
                    # Simulate more if needed
                    moreN = rpois(nmore,lambda=Lambda)
                    moreDF = moreN-Dfree1-1 
                    morecrit = qf(0.95,df1=Dfree1,df2=moreDF)
                    moreW = rgamma(nmore,shape=AA,scale=BB)
                    moreY = rf(nmore,df1=Dfree1,df2=moreDF,ncp=moreN*moreW^2)
                    # Add significant ones to the end of what we have already
                    N = c(N,subset(moreN,moreY>morecrit))
                    DF2 = c(DF2,subset(moreDF,moreY>morecrit))
                    critval = c(critval,subset(morecrit,moreY>morecrit))
                    W = c(W,subset(moreW,moreY>morecrit))
                    Y = c(Y,subset(moreY,moreY>morecrit))
                } # End while not enough significant results
                # Discard extras
                N = N[1:k]; W = W[1:k]
                Y = Y[1:k]; DF2 = DF2[1:k]
                critval = critval[1:k]
                # Calculate true power values for the significant results
                TruePower = 1 - pf(critval,df1=Dfree1,df2=DF2,ncp=N*W^2) 
                # cat("Mean power of the significant tests is",mean(TruePower),
                # ", compared to expected true power of",AvePower[j],"\n\n")
                # Compute Estimates
            MeanTruePower = mean(TruePower) # Sample mean of the True Power values
            # cat("Calculating p-curve \n")
            Pcurve     = heteroNpcurveF(FF=Y,dfree1=Dfree1,dfree2=DF2)
            # cat("Calculating p-uniform \n")
            Puniform   = heteroNpunifF(FF=Y,dfree1=Dfree1,dfree2=DF2,CI=F) 

            ######## Calculate the MLE with multiple starts - slow! ########
            MLEmatrix = matrix(0,3,4)
            colnames(MLEmatrix) = c("astart","bstart","MLE","Minus LL")
            begin = c(rexp(1),runif(1)) # Random start
            trapmle = try(heteromleF(FF=Y, dfree1=Dfree1, dfree2=DF2, startvals=begin))
            MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) 
            MLEmatrix[1,] = c(begin,MaxLike)
            begin = rexp(2) # Another random start
            trapmle = try(heteromleF(FF=Y, dfree1=Dfree1, dfree2=DF2, startvals=begin))
            MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) 
            MLEmatrix[2,] = c(begin,MaxLike)
            begin = c(rexp(1,rate=1/2),runif(1)) # A third random start
            trapmle = try(heteromleF(FF=Y, dfree1=Dfree1, dfree2=DF2, startvals=begin))
            MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) 
            MLEmatrix[3,] = c(begin,MaxLike)
            MaxLike = subset(MLEmatrix[,3],MLEmatrix[,4]==min(MLEmatrix[,4]))
            if(is.na(MaxLike)) # Will catch NaN too.
                { # Discard NA and NaN values, then choose minimum
                MLE = MLEmatrix[,3]; MLL = MLEmatrix[,4]
                MLE[MLE>1] = NA
                bad = subset(1:3,is.na(MLE)) 
                MLE = MLE[-bad]; MLL = MLL[-bad] 
                MaxLike = subset( MLE , MLL==min(MLL) )
                if(length(MaxLike)==0) MaxLike=NA
                } # End if MaxLike is NA or NaN
            # Length of MaxLike could be more than one if there is a tie in 
            # the minus log likelihood.
            MaxLike = MaxLike[1]
            # cat("          MLEmatrix = \n"); print(MLEmatrix) 
            # cat("          MaxLike = ",MaxLike,"\n")        
            ######## End of Maximum Likelihood ########

            pval = 1-pf(Y,df1=Dfree1,df2=DF2 )
            Zcurve = zcurve(pval,Plot=0,Verbose=F)
            resultz = c(Pcurve, Puniform, MaxLike, Zcurve)
            # cat("          resultz =",resultz,"\n\n")
            # Put them in the table
            rowno = rowno+1
            datta[rowno,] = c(Simulation, Sigmaval[h], Nstudies[i], AvePower[j], MeanTruePower, resultz)
            }  # Next value of TruePower
        } # Next value of Nstudies
    } # Next value of Sigmaval
    write(t(datta),file="simoutX.txt",ncolumns=9,append=T)
} # Next Simulation