############################### Study7Template.txt ###################################
#                                                                                    #
# Study7 is about estimation when sample size and effect size are correlated.        #
# Because of conscious or unconscious power analysis, the correlation should be      #
# negative. Just F-tests with df1=1 and significance level 0.05 under conditions     #
# of selection and heterogeneity in both sample size and effect size.                #
# Effect size is beta with a Poisson regression model for sample size. Mean sample   #
# size is roughly 86.  Everything is after selection. The 9 variables are            #
#    Simulation Correlation Ntests AvePower MeanTruePower                            #
#               Pcurve, Puniform, MaxLike, Zcurve                                    #
# AvePower is population mean power, while MeanTruePower is SAMPLE mean true power.  #
# Copy to template.txt, and run with runR on Depth Charge or sniffR on PsychLab      #
#                                                                                    #
#                                                                                    #
#     This version writes one simulation at a time to the output file.               # 
#                                                                                    #
######################################################################################

################################ Setup ################################
rm(list=ls()); options(scipen=999) # To avoid scientific notation
nsim=35 # Number of simulated data sets in each condition: 35*73*4 = 10220
nrows = 5*3*5*nsim # Number of tests x Number of power values x Number of Corr values
simdata = matrix(0,nrows,9) # Simulation number plus eight variables  
v = 0.09 # Variance of beta effect size, yielding SD(es) = 0.3
source("http://www.utstat.toronto.edu/~brunner/Rfunctions/estimatR.txt")
# Loop through these variables
Ntests = c(100,250,500,1000,2000) # 5 levels
Corr = c(0.00, -0.20, -0.40, -0.60, -0.80) # Correlation between N and effect size
AvePower = c(0.25,0.50,0.75) # 3 levels of population mean power
Methods = c("Pcurve","Puniform","MaxLike","Zcurve") 

######## Find (m,v,beta0,beta1) to give desired mean power and desired correlation ########
mMAT = matrix(0,5,3); rownames(mMAT) = Corr; colnames(mMAT) = AvePower
b1MAT = matrix(0,5,3); rownames(b1MAT) = Corr; colnames(b1MAT) = AvePower

# Define functions -- see hand-written notes, because Corr(X,N) is not obvious.
fun1 = function(w,M,V,B0,B1) # Function to be integrated for mean power
    { 
    nval = 5:200; DF2 = nval-2; critvalval = qf(0.95,1,DF2)
    A = -(M^3 - M^2 + M*V)/V; B = (M^3 + (M - 1)*V - 2*M^2 + M)/V
    LAMBDA = exp(B0+B1*w)
    sum( (1-pf(critvalval,df1=1,df2=DF2,ncp=nval*w^2)) * 
            dpois(nval,LAMBDA) * dbeta(w,A,B)  ) 
    } # End fun1
fun1v = Vectorize(fun1,vectorize.args="w")
fun2 = function(w,M,V,B0,B1) # Integrate to get E(N)
    {
    A = -(M^3 - M^2 + M*V)/V; B = (M^3 + (M - 1)*V - 2*M^2 + M)/V
    exp(B0+B1*w) * dbeta(w,shape1=A,shape2=B) 
    } # End fun2
fun3 = function(w,M,V,B0,B1) # Integrate to get E(N^2)
     {
     A = -(M^3 - M^2 + M*V)/V; B = (M^3 + (M - 1)*V - 2*M^2 + M)/V
     Lambda = exp(B0+B1*w)
     Lambda*(1+Lambda) * dbeta(w,shape1=A,shape2=B) 
     } # End fun3
fun4 = function(w,M,V,B0,B1) # Integrate to get E(XN)
    {
    A = -(M^3 - M^2 + M*V)/V; B = (M^3 + (M - 1)*V - 2*M^2 + M)/V
    w * exp(B0+B1*w) * dbeta(w,shape1=A,shape2=B) 
    } # End fun4

fun = function(theta,wantpow,wantcorr,v=0.09) # theta = (m,beta1)
# Minimize this to find (m,beta0,beta1) yielding desired mean power and correlation
# between sample size and effect size. Note v < m(1-m), because Bernoulli variance is greatest.
    {
    # cat("m, beta1 =",theta,"\n") # Debug
    m = theta[1]; beta1 = theta[2]
    beta0 = log(86)-beta1*m # Gives average sample size of 86 at average effect size
    # Calculate mean power and correlation
    epow = integrate(fun1v,0,1,M=m,V=v,B0=beta0,B1=beta1)$value
    # Calculate exact Corr(X,N) 
    EN = integrate(fun2,0,1,M=m,V=v,B0=beta0,B1=beta1)$value
    ENsq = integrate(fun3,0,1,M=m,V=v,B0=beta0,B1=beta1)$value
    SDN = sqrt(ENsq-EN^2)
    EXN = integrate(fun4,0,1,M=m,V=v,B0=beta0,B1=beta1)$value
    rho = (EXN-m*EN)/(sqrt(v)*SDN)
    # cat("Mean power, Correlation =",c(epow,rho),"\n") # Debug
    return( abs(epow-wantpow) + abs(rho-wantcorr) ) 
    } # End of all the fun

# Takes a while -- comment out and enter values manually
# for(i in 1:length(Corr))
#  #    {
#     cat("Correlation =",Corr[i],"\n")
#         for(j in 1:length(AvePower))
#             {
#             cat("     Power =",AvePower[j],"\n")
#             begin=c(0.50,-0.25) # Starting values for (m,corr)
#             if(Corr[i]==0) begin[2] = 0
#             best = optim(par=begin,fun,lower=c(0.12,-1),upper=c(0.88,0),
#                     method="L-BFGS-B",wantcorr=Corr[i],wantpow=AvePower[j])
#             mMAT[i,j] = best$par[1]; b1MAT[i,j] = best$par[2]
#             } # Next j (Power value)
#     } # Next i (Correlation)
# print(mMAT,digits=22); print(b0MAT,digits=22)

mMAT = rbind(
c(0.1547286643407420336782, 0.2965565361381660713924, 0.4784694650994398279487),
c(0.1548377731227441322925, 0.2963263361789551519898, 0.4768297807020639433873),
c(0.1549738174955386238452, 0.2960695854355749889741, 0.4749670005746801648705),
c(0.1551798189476286671251, 0.2957151958000013292072, 0.4724730358897300530607),
c(0.1556586764473793516039, 0.2950544431790082522404, 0.4678675710361732131837)  )
b0mat = rbind(
c(0.00000000000000000000000,  0.00000000000000000000000,  0.00000000000000000000000),
c(-0.0749290999810406227466, -0.07403899402777110172469, -0.07343577116871970178469),
c(-0.1642299932679451845985, -0.15997368512198306689243, -0.15719474079343254135921),
c(-0.2923671731818158758820, -0.27898222343141670931388, -0.27067199714252238029744),
c(-0.5593194803292570460584, -0.51158446526716483404584, -0.48446671984845995906355)  )

system("touch simoutX.txt") # Append results to this file, one simulation at a time
for(Simulation in 1:nsim)
    {cat("Simulation ",Simulation,"\n")
    datta = matrix(0,nrows/nsim,9); rowno=0 # Accumulate the results for one simulation in datta
    colnames(datta) = c("Simulation", "Correlation", "Ntests", "PopMeanPower", "SampleMeanPower", 
                     "Pcurve", "Puniform", "MaxLike", "Zcurve")
    for(h in 1:length(Corr))
        {cat("  Correlation =",Corr[h],"\n")
            for(i in 1:length(Ntests))
                {cat("      K =",Ntests[i],"\n")
                k = Ntests[i]
                for(j in 1:length(AvePower))
                {cat("          Mean power =",AvePower[j],"\n")
                m = mMAT[h,j]; beta1 = b1MAT[h,j]
                beta0 = log(86)-beta1*m # Average N = 86 at average effect size
                a = -(m^3 - m^2 + m*v)/v; b = (m^3 + (m - 1)*v - 2*m^2 + m)/v
                es = rbeta(Ntests[i],a,b)
                lambda = exp(beta0+beta1*es)
                N = rpois(Ntests[i],lambda)
                NCP = N*es^2
                df2 = N-2; critval = qf(0.95,1,df2)
                Fstat = rsigF(Ntests[i],1,df2,NCP)
                TruePower = 1-pf(critval,1,df2,ncp=NCP)
                MeanTruePower = mean(TruePower) # Sample mean of the True Power values
                # cat("Mean power of the significant tests is",MeanTruePower,
                # ", compared to expected true power of",AvePower[j],"\n\n")
                # COMPUTE ESTIMATES
                # cat("Calculating p-curve \n")
                Pcurve     = heteroNpcurveF(FF=Fstat,dfree1=1,dfree2=df2)
                # cat("Calculating p-uniform \n")
                Puniform   = heteroNpunifF(FF=Fstat,dfree1=1,dfree2=df2,CI=F) 

            ######## Calculate the MLE with multiple starts - slow! ########
            MLEmatrix = matrix(0,3,4)
            colnames(MLEmatrix) = c("astart","bstart","MLE","Minus LL")
            begin = c(rexp(1),runif(1)) # Random start
            trapmle = try(heteromleF(FF=Fstat, dfree1=1, dfree2=df2, startvals=begin))
            MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) 
            MLEmatrix[1,] = c(begin,MaxLike)
            begin = rexp(2) # Another random start
            trapmle = try(heteromleF(FF=Fstat, dfree1=1, dfree2=df2, startvals=begin))
            MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) 
            MLEmatrix[2,] = c(begin,MaxLike)
            begin = c(rexp(1,rate=1/2),runif(1)) # A third random start
            trapmle = try(heteromleF(FF=Fstat, dfree1=1, dfree2=df2, startvals=begin))
            MaxLike = c(as.numeric(trapmle[1]),as.numeric(trapmle[2])) 
            MLEmatrix[3,] = c(begin,MaxLike)
            MaxLike = subset(MLEmatrix[,3],MLEmatrix[,4]==min(MLEmatrix[,4]))
            if(is.na(MaxLike)) # Will catch NaN too.
                { # Discard NA and NaN values, then choose minimum
                MLE = MLEmatrix[,3]; MLL = MLEmatrix[,4]
                MLE[MLE>1] = NA
                bad = subset(1:3,is.na(MLE)) 
                MLE = MLE[-bad]; MLL = MLL[-bad] 
                MaxLike = subset( MLE , MLL==min(MLL) )
                if(length(MaxLike)==0) MaxLike=NA
                } # End if MaxLike is NA or NaN
            # Length of MaxLike could be more than one if there is a tie in 
            # the minus log likelihood.
            MaxLike = MaxLike[1]
            # cat("          MLEmatrix = \n"); print(MLEmatrix) 
            # cat("          MaxLike = ",MaxLike,"\n")        
            ######## End of Maximum Likelihood ########

            logp = pf(Fstat,1,df2,lower.tail=F,log.p=T)
            pval = exp(logp)
            # Z = qnorm(logp-log(2),lower.tail=F,log.p=T)
            Zcurve = zcurve(pval,Plot=0,Verbose=F)
            resultz = c(Pcurve, Puniform, MaxLike, Zcurve)
            # cat("          resultz =",resultz,"\n\n")
            # Put them in the table
            rowno = rowno+1
            datta[rowno,] = 
            c(Simulation, Corr[h], Ntests[i], AvePower[j], MeanTruePower, resultz)
            }  # Next value of TruePower
        } # Next value of Ntests
    } # Next value of Corr
    write(t(datta),file="simoutX.txt",ncolumns=9,append=T)
} # Next Simulation