# AN EM ALGORITHM FOR FINDING THE MLE FOR A CENSORED POISSON MODEL.
#
# The data consists of n observed counts, whose mean is m, plus c counts
# that are observed to be less than 2 (ie, 0 or 1), but whose exact value
# is not known. The counts are assumed to be Poisson distributed with
# unknown mean, lambda.
#
# The function below finds the maximum likelihood estimate for lambda given
# the data, using the EM algorithm started from the specified guess at lambda
# (default being the mean count with censored counts set to 1), run for the
# specified number of iterations (default 10). The log likelihood is printed
# at each iteration if the debug argument is TRUE. It should never decrease.
censored_poisson_em <- function (n, m, c, lambda0=(n*m+c)/(n+c), iterations=10,
debug=FALSE)
{
# Function for computing log likelihood. Needed only for printing debugging
# information.
log_likelihood <- function (n, m, c, lambda)
{
n*m*log(lambda) - (n+c)*lambda + c*log(1+lambda)
}
# Set initial guess, and print it and its log likelihood.
lambda <- lambda0
cat (0, lambda, log_likelihood(n,m,c,lambda), "\n")
# Do EM iterations.
for (i in 1:iterations)
{
# The E step: Figure out the distribution of the unobserved data. For
# this model, we need the probability that an unobserved count that is
# either 0 or 1 is actually equal to 1, which is p1 below.
p1 <- lambda / (1+lambda)
# The M step: Find the lambda that maximizes the expected log likelihood
# with unobserved data filled in according to the distribution found in
# the E step.
lambda <- (n*m + c*p1) / (n+c)
# Print the new guess for lambda and its log likelihood.
if (debug) cat (i, lambda, log_likelihood(n,m,c,lambda), "\n")
}
# Return the value for lambda from the final EM iteration.
lambda
}