[R] using mle2 for multinomial model optimization

Fri Feb 12 21:12:29 CET 2010

Ravi Varadhan <rvaradhan <at> jhmi.edu> writes:

> 
> 
> Use "Nelder-Mead" as the method in optim.  This will give you the correct
solution.
> 
> m1 <- mle2(mfun, start=svec, vecpar=TRUE, fixed=svec[1:(l-1)], method="Nelder")
> 
> Hope this is helpful,
> Ravi.
> 

  {Resending because it doesn't seem to have gone through, hope
it doesn't go twice}

  Nelder-Mead works OK in this case, but won't work in general.
The two problems are (1) dmultinom() rounds its 'x' argument,
leading to a likelihood surface with little flat spots -- see
below; (2) the likelihood minimum is subtle enough that even
after I fixed up the function (by creating a version of the
likelihood that doesn't round), naive optimization ended up
on a flat spot at large negative values, so I used L-BFGS-B
(bounded optimization) instead.

cohort<-c(50,54,50)
##imaginary harvest numbers of a cohort harvested 
## over 3 years

## avoid "l" as a variable name, easy to confuse with "1"
L <- length(cohort)+1    #nr of cell probabilities
h <- c(0.2,0.3,1)        #harvest rates for the 3 diferent years
S <- c(0.9,0.8)          #survival rates

## more efficient way to do it -- avoid looping
predprob <- function(S=S,h=h) {
  L <- length(h)+1
  p <- h*c(1,cumprod((1-h[1:length(S)])*S))
  p[L] <- 1-sum(p[-L]) ## add one more value
  p
}

## multinom WITHOUT ROUNDING & without
##   error-checking
dmnom <- function(x,prob,log=FALSE) {
  r <- lgamma(sum(x) + 1) + sum(x * log(prob) - lgamma(x + 1))
  if (log) r else exp(r)
}

## pass S and h as parameters
## (would be more efficient to precompute predprob() since
##  it doesn't depend on parameters ...)
mfun <- function(d,S=S,h=h) {
  -dmnom(exp(d),prob=predprob(S,h),log=TRUE)
}

## use dmultinom instead (for illustrative purposes, below)
mfun0 <- function(d,S=S,h=h) {
  -dmultinom(exp(d),prob=predprob(S,h),log=TRUE)
}

## avoid using 'c' as a variable name 
c0 <- c(cohort,100)            ## untransformed initial values for the 
                               ## optimization,->100=N-x1-x2-x3
nvec<-c(rep("x",L-1),"N")     #names for the initial vector
nrs<-c(-L,1)            #nrs for the initial vector
svec = log(c0)             ##transformation of the data to avoid 
                                        #  constraints (x>0)
names(svec) <- paste(nvec,nrs,sep="")
parnames(mfun0) <- parnames(mfun) <- names(svec)

library(bbmle)

## use bounded optimization
m1 = mle2(mfun2,start=svec,
  method="L-BFGS-B",lower=0,upper=5,
  vecpar=TRUE,fixed=svec[-L],
  data=list(S=S,h=h))

## try with Nelder-Mead -- does actually work here
mle2(mfun0,start=svec,
     method="Nelder-Mead",vecpar=TRUE,
     fixed=svec[-L],
       data=list(S=S,h=h))

coef(m1)
plot(profile(m1))

N1vec <- seq(0,5,length=500)
LLvec <-sapply(N1vec,
               function(x) { mfun0(c(svec[-L],N1=x),S=S,h=h)})

LLvec2 <-sapply(N1vec,
               function(x) { mfun(c(svec[-L],N1=x),S=S,h=h)})

plot(N1vec,llvec,type="l")
lines(N1vec,llvec2,col="red")
plot(N1vec[-1],diff(LLvec),type="l")  ## ???
lines(N1vec[-1],diff(LLvec2),col=2)

## zoom in
plot(N1vec[-1],diff(llvec),type="l",
     xlim=c(1,4),ylim=c(-0.2,0.3))
lines(N1vec[-1],diff(llvec2),col=2,lwd=2)