############################### Study3Template.txt ###################################
#                                                                                    #
# Study3 is on chi-squared tests with significance level 0.05 under conditions        #
# of publication bias and heterogeneity in both sample size and effect size.         #
# Sample size after selection is Poisson(86).  The 9 variables are                   #
#    Simulation Sigma Nstudies AvePower MeanTruePower                                #
#               Pcurve, Puniform, MaxLike, Zcurve                                    #
# AvePower is population mean power, while MeanTruePower is SAMPLE mean true power.  #
# Copy to template.txt, and run with runR on Depth Charge or sniffR on PsychLab      #
#                                                                                    #
#                                                                                    #
#     Note the degrees of freedom could be 1, 3 or 5 in separate runs.               #   
#                                                                                    #
######################################################################################
# Need to loop through sigma values, and now there will be 9 columns. The 
# analysis program is affected. 
################################ Setup ################################
rm(list=ls()); options(scipen=999) # To avoid scientific notation
# Run separately for each df. Change name of output file accordingly.
DFree = c(1,3,5); index = 1; Dfree = DFree[index] 
nsim=37 # Number of simulated data sets in each condition: 36*280 = 10080
nrows = 5*3*3*nsim # Number of studies x Number of power values x Number of SD values
simdata = matrix(0,nrows,9) # Simulation number plus eight variables  
Lambda = 86 # Poisson mean sample size
top = round(Lambda + 6*sqrt(Lambda)) # Poisson probabilities are zero above here
nval = 1:top
alpha=0.05; critval = qchisq(1-alpha,df=Dfree)
source("http://www.utstat.toronto.edu/~brunner/Rfunctions/estimatR.txt")
# Loop through these variables
Nstudies = c(100,250,500,1000,2000) # 5 levels
Sigmaval = c(0.1, 0.2, 0.3) # SD of effect size 
AvePower = c(0.25,0.50,0.75) # 3 levels of population mean power
Methods = c("Pcurve","Puniform","MaxLike","Zcurve") 

# Minimize fun over (a,b) to find values giving desired mean power 
# and desired SD of effect size after selection for significance.
# This function uses material above.
fun = function(ab, wantpow, wantsigma, info=F) 
    {
    aa = abs(ab[1]); bb = abs(ab[2])
    # w is effect size, with ncp = n*w^2
    fun1 = function(w,A,B) # Function to be integrated for E(G)
        { 
        sum( (1-pchisq(critval,df=Dfree,ncp=nval*w^2)) * dgamma(w,shape=A,scale=B) 
              * dpois(nval,Lambda) ) 
        } # End fun1
    fun1v = Vectorize(fun1,vectorize.args="w")
    fun2 = function(w,A,B,BEFORE) # Function to be integrated for E(G^2)
        { 
        sum( (1-pchisq(critval,df=Dfree,ncp=nval*w^2))^2 * 
              dgamma(w,shape=A,scale=B) * dpois(nval,Lambda) ) 
        } # End fun2
    fun2v = Vectorize(fun2,vectorize.args="w")
    EG = integrate(fun1v,0,Inf,A=aa,B=bb)$value 
    EG2 = integrate(fun2v,0,Inf,A=aa,B=bb)$value # E(G^2)
    meanPOW = EG2/EG # E(G|T>c) by Principle 3.
    fun3 = function(w,A,B) # Function to be integrated for E(W|T>c)
        { 
        sum( w * (1-pchisq(critval,df=Dfree,ncp=nval*w^2)) * 
              dgamma(w,shape=A,scale=B) * dpois(nval,Lambda) ) 
        } # End fun3
    fun3v = Vectorize(fun3,vectorize.args="w")
    fun4 = function(w,A,B) # Function to be integrated for E(W^2|T>c)
        { 
        sum( w^2 * (1-pchisq(critval,df=Dfree,ncp=nval*w^2)) * 
              dgamma(w,shape=A,scale=B) * dpois(nval,Lambda) ) 
        } # End fun4
    fun4v = Vectorize(fun4,vectorize.args="w")
    EW = integrate(fun3v,0,Inf,A=aa,B=bb)$value / EG
    EW2 = integrate(fun4v,0,Inf,A=aa,B=bb)$value / EG
    sdES = sqrt(EW2-EW^2)
    if(info) cat("(a,b) =",c(aa,bb), " sdES =",sdES," meanPOW =",meanPOW,"\n") 
    value = (wantpow-meanPOW)^2 + (wantsigma-sdES)^2
    if(info) cat("   value =",value,"\n\n")
    return(value)
    } # End of all the fun
# fun1 is used outside the function fun too.
    fun1 = function(w,A,B) # Function to be integrated for E(G)
        { 
        sum( (1-pchisq(critval,df=Dfree,ncp=nval*w^2)) * dgamma(w,shape=A,scale=B) 
              * dpois(nval,Lambda) ) } # End fun1
    fun1v = Vectorize(fun1,vectorize.args="w")

################################################## 
# Find gamma parameters yielding desired mean power and SD of effect size after selection.
# This takes a while. Speed up and avoid numerical problems by starting at the answer for 
# df = 3. Note that these values are specific to Poisson(86) sample size.
##################################################
Astart = rbind(
c(1.60798217773761198401417, 4.5732203271733151694889, 10.1541999491541137246031),
c(0.09773736934680485322069, 0.6355680205035049157303,  2.2914204131213717907656),
c(0.02996460139985784865146, 0.2063546030805227526894,  0.9806863762555257935105) )
Bstart = rbind(
c(0.05410720104557791010347, 0.04325098169457004121385, 0.03240131560907458241338),
c(0.23294385735951750326933, 0.16873326287976297965798, 0.13193325352073351219850),
c(0.52171952359795192855074, 0.34079192712814232457674, 0.27547718176653751553928) )

cat("\nCalculating required gamma parameters ...  \n\n")
Amat = Bmat = matrix(0,3,3)
for(i in 1:3) # Loop over rows = SD of effect size
    {cat("Sigma =",Sigmaval[i],"\n")
    for(j in 1:3) # Loop over desired power
        {cat("  Mean Power =",AvePower[j],"\n")
        startvals = c(Astart[i,j],Bstart[i,j])
        Search = nlm(fun, p=startvals, wantpow=AvePower[j], wantsigma=Sigmaval[i], info=F)
        Amat[i,j] = abs(Search$estimate[1]); Bmat[i,j] = abs(Search$estimate[2])
        cat("(a,b) =",c(Amat[i,j],Bmat[i,j]),"\n")
        } # Next j
    } # Next i
rownames(Amat) = rownames(Bmat) = Sigmaval
colnames(Amat) = colnames(Bmat) = AvePower
cat("\n\n")
cat("       Amat \n")
print(Amat,digits=22); cat("\n")
cat("       Bmat \n")
print(Bmat,digits=22); cat("\n\n")

# Make the main simulation a function so it's easier to time when run from the unix 
# command line.
# doit = function()
# {
rowno = 0 # Getting ready to replace one row at a time, much faster than rbind.
for(h in 1:length(Sigmaval))
{cat("Sigma =",Sigmaval[h],"\n")
for(i in 1:length(Nstudies))
    {cat("  K =",Nstudies[i],"\n")
    k = Nstudies[i]
    for(j in 1:length(AvePower))
        {cat("      Mean power =",AvePower[j],"\n")
        # Generate data all at once in matrices to process one row at a time.
        AA = Amat[h,j]; BB = Bmat[h,j] # Gamma parameters
        ExpectedG = integrate(fun1v,0,Inf,A=AA,B=BB)$value
        # cat("E(G) =",ExpectedG,"\n")
        # Assemble a big matrix of significant test statistics, because vector 
        # simulation is faster. Expected proportion significant is ExpectedG.
        aveneed = round(nsim*k/ExpectedG) 
        # cat("\n(SD of effect size, Number of studies, Expected power) =",
        #     c(Sigmaval[h], k,AvePower[j]),"\n")
        # cat("Simulating ",aveneed, " test statistics before selection ...\n")
        Nmat = rpois(aveneed,lambda=Lambda)
        Wmat = rgamma(aveneed,shape=AA,scale=BB)
        Ymat = rchisq(aveneed,df=Dfree,ncp=Nmat*Wmat^2)
        G = 1 - pchisq(critval,df=Dfree,ncp=Nmat*Wmat^2) # True power values before selection
        # c(mean(G^2)/mean(G),AvePower[j] ) # For debugging: Should be very close
        # Select significant results
        Nmat = subset(Nmat,Ymat>critval); Wmat = subset(Wmat,Ymat>critval)
        Ymat = subset(Ymat,Ymat>critval)
        # cat(length(Ymat), ", or",length(Ymat)/aveneed," are significant.",
        # " Compare E(G) =", ExpectedG,"\n")
        # Add more tests if there are not enough
        nmore = nsim
        while(length(Ymat)<k*nsim)
            { 
            # Simulate more if needed
            moreN = rpois(nmore,lambda=Lambda)
            moreW = rgamma(nmore,shape=AA,scale=BB)
            moreY = rchisq(nmore,df=Dfree,ncp=Nmat*Wmat^2)
            # Add significant ones to the end of what we have already
            Nmat = c(Nmat,subset(moreN,moreY>critval))
            Wmat = c(Wmat,subset(moreW,moreY>critval))
            Ymat = c(Ymat,subset(moreY,moreY>critval))
            } # End while not enough significant results
        # Discard extras
        Nmat = Nmat[1:(k*nsim)]; Wmat = Wmat[1:(k*nsim)]
        Ymat = Ymat[1:(k*nsim)]
        # Calculate true power values for the significant results
        Gmat = 1 - pchisq(critval,df=Dfree,ncp=Nmat*Wmat^2)
        # cat("Mean power of the significant tests is",mean(Gmat),
        # ", compared to expected true power of",AvePower[j],"\n\n")
        # Format matrices to process by row
        dim(Nmat) = dim(Wmat) =  dim(Ymat) =  dim(Gmat) =  c(nsim,k)
        # Estimate
        for(Simulation in 1:nsim)
            {cat("          Simulation ",Simulation,"\n")
            N = Nmat[Simulation,]; Y = Ymat[Simulation,]
            # DF2 = DF2mat[Simulation,] # For F tests
            TruePower = Gmat[Simulation,] # k values, all potentially different
            MeanTruePower = mean(TruePower) # Sample mean of the True Power values
            rowno = rowno+1
            Pcurve     = heteroNpcurveCHI(Y=Y,dfree=Dfree,nn=N)
            Puniform   = heteroNpunifCHI(Y=Y,dfree=Dfree,nn=N,CI=F) 
            ######## Calculate the MLE with multiple starts - slow! ########
            MLEmatrix = matrix(0,3,4)
            colnames(MLEmatrix) = c("astart","bstart","MLE","Minus LL")
            begin = c(rexp(1),runif(1)) # Random start
            trapmle = try(heteromleCHI(YY=Y,NN=N,dfree=Dfree,info=F,startvals=begin))
            MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) 
            MLEmatrix[1,] = c(begin,MaxLike)
            begin = rexp(2) # Another random start
            trapmle = try(heteromleCHI(YY=Y,NN=N,dfree=Dfree,info=F,startvals=begin))
            MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) 
            MLEmatrix[2,] = c(begin,MaxLike)
            begin = c(rexp(1,rate=1/2),runif(1)) # A third random start
            trapmle = try(heteromleCHI(YY=Y,NN=N,dfree=Dfree,info=F,startvals=begin))
            MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) 
            MLEmatrix[3,] = c(begin,MaxLike)
            MaxLike = subset(MLEmatrix[,3],MLEmatrix[,4]==min(MLEmatrix[,4]))
            if(is.na(MaxLike)) # Will catch NaN too.
                { # Discard NA and NaN values, then choose minimum
                MLE = MLEmatrix[,3]; MLL = MLEmatrix[,4]
                MLE[MLE>1] = NA
                bad = subset(1:3,is.na(MLE)) 
                MLE = MLE[-bad]; MLL = MLL[-bad] 
                MaxLike = subset( MLE , MLL==min(MLL) )
                if(length(MaxLike)==0) MaxLike=NA
                } # End if MaxLike is NA or NaN
            # Length of Maxlike could be more than one if there is a tie in 
            # the minus log likelihood.
            MaxLike = MaxLike[1]
            # cat("          MLEmatrix = \n"); print(MLEmatrix) 
            # cat("          MaxLike = ",MaxLike,"\n")        
            ######## End of Maximum Likelihood ########
            pval = 1-pchisq(Y,df=Dfree)
            Zcurve = zcurve(pval,Plot=0,Verbose=F)
            resultz = c(Pcurve, Puniform, MaxLike[1], Zcurve)
            # cat("          resultz =",resultz,"\n\n")
            # Put them in the table
            simdata[rowno,] = c(Simulation, Sigmaval[h], Nstudies[i], AvePower[j], MeanTruePower,
                                resultz)
            } # Next Simulation
        }  # Next value of TruePower
    } # Next value of Nstudies
} # Next value of Sigmaval
# Write the data frame to a file
colnames(simdata) = c("Simulation", "SD_EffectSize", "Nstudies", "PopMeanPower", "SampleMeanPower", 
                     "Pcurve", "Puniform", "MaxLike", "Zcurve")
# Write the data frame to a file. File will not have names. Could do something like
# write.table(data.frame(simdata),"Study1.1.Data.txt",quote=F) 
# But to accomodate large data files, ...
# Data are written one column at a time, so write the transpose.
write(t(simdata),"simoutX.txt",ncolumns=9) # The only appearance of upper case x.
#} # End function doit, which just does it.

# system.time(doit()) # How long did it take to do it?