# SOLUTION TO STA 410/2102, SPRING 2002, ASSIGNMENT #2.
#
# Radford Neal, March 2002


# THE LOGISTIC FUNCTION.  This is the "g" function in the assignment writeup.

logistic <- function (z) 1/(1+exp(-z))


# LOG LIKELIHOOD FUNCTION.  Takes as arguments the vector of values
# for the explanatory variable, x, the vector of values for the
# response variable, y (which are either -1 or +1), and the vector of
# parameters (gamma, beta0, beta1).

log.like <- function (x,y,params)
{ 
  g <- logistic (y * (params[2]+params[3]*x))
  a <- logistic (params[1])
  p <- a/2 + (1-a)*g
  sum(log(p))
}


# GRADIENT OF THE LOG LIKELIHOOD.  Takes arguments as for log.like.
# Returns the vector of partial derivatives of the log likelihood
# with respect to the parameters.

log.like.grad <- function (x,y,params)
{ 
  g <- logistic (y * (params[2]+params[3]*x))
  a <- logistic (params[1])
  p <- a/2 + (1-a)*g

  a1 <- a*(1-a)
  g1 <- g*(1-g)

  c (sum ((0.5-g)*a1/p), sum ((1-a) * (y*g1) / p), sum ((1-a) * (y*x*g1) / p))
}


# HESSIAN OF THE LOG LIKELIHOOD.  Takes arguments as for log.like.
# Returns the matrix of second derivatives of the log likelihood with
# respect to the parameters.

log.like.hessian <- function (x,y,params)
{ 
  g <- logistic (y * (params[2]+params[3]*x))
  a <- logistic (params[1])
  p <- a/2 + (1-a)*g

  a1 <- a * (1-a)
  g1 <- g * (1-g)
  g2 <- g1 * (2*g-1)

  H <- matrix(0,3,3)
  
  H[1,1] <- sum (-((0.5-g)*a1/p)^2 + (0.5-g)*a1*(1-2*a)/p)
  H[2,2] <- sum ((1-a) * (-(1-a)*(g1/p)^2 - g2/p))
  H[3,3] <- sum ((1-a) * (-(1-a)*(x*g1/p)^2 - x^2*g2/p))

  H[1,2] <- H[2,1] <- sum ((y*g1) * (-a1/p - a1*(1-a)*(0.5-g)/p^2))
  H[1,3] <- H[3,1] <- sum ((y*x*g1) * (-a1/p - a1*(1-a)*(0.5-g)/p^2))
  H[2,3] <- H[3,2] <- sum ((1-a) * (-(1-a)*x*(g1/p)^2 - x*g2/p))
  H
}


# FIND MLE BY NEWTON-RAPHSON ITERATION AND PERHAP GRADIENT ASCENT.  Takes 
# arguments as follows:
#
#    x         Vector of values for the explanatory variable.
#    y         Vector of values for the response variable (-1 or +1).
#    iters     Number of iterations to do (defaults to 100)
#    g.ascent  Should gradient ascent be used if Newton fails? (default T)
#    p.info    Should information be printed at each iteration? (default T)
#    initial   Vector of initial values for the parameters (defaults to
#              gamma=-2 (ie, alpha=0.12) and beta0 and beta1 from glm).
#
# The value returned is a list with elements "mle" for the final estimate
# and "se" for the vector of standard errors for the parameters.  
#
# If p.info is T, the parameter values, the log likelihood, the gradient of 
# the log likelihood, and the type of step done (newton or gradient/factor) 
# are done are printed for each iteration.  The eigenvalues of the hessian
# at the final point found are also printed; they allow determination of
# whether this point is a local maximum, local minimum, or saddle point.

mle <- function (x, y, iters=100, g.ascent=T, p.info=T,
                 initial = c(-2, coef (glm ((y+1)/2 ~ x, family=binomial))))
{
  params <- initial

  cur.log.like <- log.like(x,y,params)

  if (p.info) 
  { cat("Iteration",0,":",round(cur.log.like,3),":",params,":",
        round(log.like.grad(x,y,params),3),"\n");
  }

  for (i in 1:iters)
  { 
    ll.grad    <- log.like.grad(x,y,params)
    ll.hessian <- log.like.hessian(x,y,params)

    v <- solve(ll.hessian,ll.grad)

    new.params <- params-v
    new.log.like <- log.like(x,y,new.params)
    type <- "newton"

    if (g.ascent && new.log.like<cur.log.like)
    { 
      f <- 0.5

      while (new.log.like<cur.log.like)
      { f <- f*2
        new.params <- params + ll.grad/f
        new.log.like <- log.like(x,y,new.params)
      }

      type <- paste("grad/",f,sep="")
    }

    params <- new.params
    cur.log.like <- new.log.like
    
    if (p.info) 
    { cat("Iteration",i,":",round(cur.log.like,3),":",params,":",
           round(log.like.grad(x,y,params),3),":",type,"\n");
    }
  }

  ll.hessian <- log.like.hessian(x,y,params)

  if (p.info) 
  { cat("\nEigenvalues of Hessian:",eigen(ll.hessian)$values,"\n")
  }

  rbind (mle=params, se=sqrt(diag(solve(-ll.hessian))))
}