############################### Study5Template.txt ###################################
#                                                                                    #
# Study5 is on full heterogeneity under conditions of publication bias and           #
# heterogeneity in sample size, effect size and F vs. chi-square.                    #
# Sample size and df after selection is Psych Science.  The 8 variables are          #
#                                                                                    #
#    Simulation Nstudies AvePower MeanTruePower                                      #
#               Pcurve, Puniform, MaxLike, Zcurve                                    #
# AvePower is population mean power, while MeanTruePower is SAMPLE mean true power.  #
# Copy to template.txt, and run with runR on Depth Charge or sniffR on PsychLab      #
#                                                                                    #
#                                                                                    #
#     This version writes one simulation at a time to the output file.               # 
#                                                                                    #
######################################################################################

################################ Setup ################################
rm(list=ls()); options(scipen=999) # To avoid scientific notation
options(warn = -1) # Suppress warnings -- they can safely be ignored here.
nsim=36 # Number of simulated data sets in each condition: 36*280 = 10080
source("http://www.utstat.toronto.edu/~brunner/Rfunctions/estimatR.txt")
PsychScience2 = read.table("http://www.utstat.toronto.edu/~brunner/data/power/PsychScience.urn2.txt",header=T)
attach(PsychScience2) # Now have df1, df2, n
cvF = qf(0.95,df1,df2); cvCHI = qchisq(0.95,df1)
ntests = length(n) # 7,000
probF = 0.80 # Probability of F-test rather than chi-squared
# Either H0 is true, or effect size is continuous (beta), or power = 1
probH0 = 0.10; probpower1 = 0.05; probcontinuous = 1-probH0-probpower1
esprob = c(probH0,probcontinuous,probpower1)
p1 = probH0; p2 = probF*probcontinuous
p3 = (1-probF)*probcontinuous; p4 = probpower1
# Loop through these variables
Nstudies = c(100,250,500,1000,2000) # 5 levels
AvePower = c(0.25,0.50,0.75) # 3 levels of population mean power
Methods = c("Pcurve","Puniform","MaxLike","Zcurve") 

# Minimize fun over M = mean of beta (V is fixed) to find value giving desired
# mean power. All of this is after selection. Uses material above.
fun = function(M,wantpow,V,info=F)
    {
    if(info) cat("M =",M,"      ")
    fun1 = function(w,M,V) # Function to be integrated for middle
        {
        A = -(M^3 - M^2 + M*V)/V; B = (M^3 + (M - 1)*V - 2*M^2 + M)/V
        p2*mean(1 - pf(cvF,df1,df2,ncp=n*w^2)) * dbeta(w,A,B) + 
        p3*mean(1 - pchisq(cvCHI,df1,ncp=n*w^2)) * dbeta(w,A,B)
        } # End fun1
    fun1v = Vectorize(fun1,vectorize.args="w")
    fun2 = function(w) # Function to be integrated for high
        {
        p4*mean(1 - pf(cvF,df1,df2,ncp=n*w^2)) * exp(-(w-1)) + 
        p5*mean(1 - pchisq(cvCHI,df1,ncp=n*w^2)) * exp(-(w-1))
        } # End fun2
    fun2v = Vectorize(fun2,vectorize.args="w")
    middle = integrate(fun1v,0,Inf,M=M,V=V)$value 
    high = integrate(fun2v,1,Inf)$value
    POW = p1*0.05 + middle + high    
    value = (POW-wantpow)^2
    if(info) cat("Mean power =",POW,"   Value =",value,"\n")
    return(value)
    } # End of all the fun

# Mean and variance of Beta effect size
v = 0.011 
meanes = numeric(3)
# This takes a while -- comment out and enter manually.
# meanes[1] = optimize(fun,interval=c(0,1),wantpow=0.25,V=v,info=T)$minimum
# meanes[2] = optimize(fun,interval=c(0,1),wantpow=0.50,V=v,info=T)$minimum
# meanes[3] = optimize(fun,interval=c(0,1),wantpow=0.75,V=v,info=T)$minimum
meanes = c(0.1287981213780003408864, 0.2737284441811304591674, 0.4555767070170602672796 ) 
names(meanes) = c("Power = 0.25", "Power = 0.50", "Power = 0.75")
cat("Required mean beta effect sizes for desired mean power: \n")
print(meanes,digits=22)
# Manual meanes values are specific to variance of beta effect size = 0.011

system("touch simoutX.txt") # Append results to this file, one simulation at a time
for(Simulation in 1:nsim)
    {cat("Simulation ",Simulation,"\n")
    datta = matrix(0,15,8); rowno=0 # Accumulate the results for one simulation in datta
    colnames(datta) = c("Simulation", "Nstudies", "PopMeanPower", "SampleMeanPower", 
                     "Pcurve", "Puniform", "MaxLike", "Zcurve")
    for(i in 1:length(Nstudies))
        {cat("      K =",Nstudies[i],"\n")
        k = Nstudies[i]
        for(j in 1:length(AvePower))
            {cat("          Mean power =",AvePower[j],"\n")

            m = meanes[j]
            # cat("Mean and Variance of beta (m,v) =",c(m,v),"\n")
            # Beta parameters for effect size
            a = -(m^3 - m^2 + m*v)/v; b = (m^3 + (m - 1)*v - 2*m^2 + m)/v
            # cat("(a,b) =",c(a,b),"\n")
            # cat("Mean effect size =",a/(a+b),"\n")
            # Choose number of F-tests and number of chi-squared tests.
            numF = rbinom(1,k,probF); numCHI = k-numF
            # cat("There are ",numF,"F-tests and ",numCHI,"chi-square tests.\n")
            # Sample df and n values for F
            whichF = sample(1:ntests,size=numF,replace=T)
            DF1 = df1[whichF]; DF2 = df2[whichF]
            nF = n[whichF] # Sample sizes for F, not to be confused with numF
            critF = qf(0.95,DF1,DF2)
            # Choose number of F-tests to have effect size 0, beta or 2
            Fesfreq = as.numeric(rmultinom(1,numF,esprob))
            # Effect sizes for the F-tests
            Wf = c( numeric(Fesfreq[1]), rbeta(Fesfreq[2],a,b), rexp(Fesfreq[3])+1 )  
            ncpF = nF*Wf^2 # Non-centrality parameters for F
            powF = 1-pf(critF,df1=DF1,df2=DF2,ncp=ncpF) # Power for F
            # Sample df and n values for CHI
            whichCHI = sample(1:ntests,size=numCHI,replace=T)
            DF = df1[whichCHI]
            nCHI = n[whichCHI] # Chi-squared sample sizes: Not to be confused with numCHI
            critCHI = qchisq(0.95,df=DF)
            # Choose number of F-tests to have effect size 0, 2 or beta
            CHIesfreq = as.numeric(rmultinom(1,numCHI,esprob))
            # Effect sizes for the Chi-squared tests
            Wchi = c( numeric(CHIesfreq[1]), rbeta(CHIesfreq[2],a,b), rexp(CHIesfreq[3])+1 )  
            ncpCHI = nCHI*Wchi^2 # Non-centrality parameters for CHI
            powCHI = 1-pchisq(critCHI,df=DF,ncp=ncpCHI) # Power for CHI
            # Put them together
            pow = c(powF,powCHI)
            MeanTruePower = mean(pow)
            # cat("          Mean power after selection is ",mean(pow),"\n")
            W = c(Wf,Wchi)
            # cat("          Standard deviation of effect size after selection is ",sd(W),"\n")
            # Simulate significant test statistics
            Fstat = rsigF(numF,DF1,DF2,ncpF); pvalF = 1-pf(Fstat,DF1,DF2)
            CHIstat = rsigCHI(numCHI,DF,ncpCHI); pvalCHI = 1-pchisq(CHIstat,DF)
            pval = c(pvalF,pvalCHI)
            # Calculate estimates
            propF = numF/k; propCHI = numCHI/k
            # P-curve estimate
            pcurveCHI = mixedCHIpcurve(CHIstat,N=nCHI,DF)
            pcurveF = mixedFpcurve(Fstat,DF1,DF2)
            Pcurve = pcurveCHI*propCHI + pcurveF*propF
            # P-uniform estimate
            puniformCHI = mixedCHIpuniform(CHIstat,N=nCHI,DF)
            puniformF = mixedFpuniform(Fstat,DF1,DF2)
            Puniform = puniformCHI*propCHI + puniformF*propF
            # Maximum Likelihood estimate
            maxlikeCHI = mixedCHImle(CHIstat,nCHI,DF)
            maxlikeF = mixedFmle(Fstat,DF1,DF2)
            MaxLike =  maxlikeCHI*propCHI + maxlikeF*propF
            # Z-curve estimate
            Zcurve = zcurve(pval,Plot=0,Verbose=F,ncomp=5)
            resultz = c(Pcurve, Puniform, MaxLike, Zcurve)
            names(resultz) = Methods
            # cat("          resultz ="); print(resultz)
            # Put them in the table
            rowno = rowno+1
            datta[rowno,] = c(Simulation, Nstudies[i], AvePower[j], MeanTruePower, resultz)

            }  # Next value of TruePower
        } # Next value of Nstudies
    write(t(datta),file="simoutX.txt",ncolumns=8,append=T)
    
} # Next Simulation