[R] Plotting survival curves after multiple imputation

Thu Feb 14 15:07:34 CET 2013

I am working with some survival data with missing values.

I am using the mice package to do multiple imputation.

I have found code in this thread which handles pooling of the MI results:
https://stat.ethz.ch/pipermail/r-help/2007-May/132180.html

Now I would like to plot a survival curve using the pooled results.

Here is a reproducible example:

require(survival)
require(mice)

set.seed(2)

dt <- colon

fit <- coxph(Surv(time,etype)~rx + sex + age, data=colon)

dummy <- data.frame(sex=c(1,1,1),rx=c("Obs","Lev","Lev+5FU"),age=c(40,40,40))
plot(survfit(fit, newdata=dummy) )

# now create some missing values in the data
dt <- colon
dt$rx[sample(1:nrow(dt),50)] <- NA
dt$sex [sample(1:nrow(dt),50)] <- NA
dt$age[sample(1:nrow(dt),50)] <- NA

imp <-mice(dt)

fit.imp <- coxph.mids(Surv(time,etype)~rx + sex + age,imp)
# Note, this function is defined below...

imputed=summary.impute(pool.impute(fit.imp))
print(imputed)

# now, how to plot a survival curve with the pooled results ?

########## begin code from linked thread above

coxph.mids <- function (formula, data, ...) {

     call <- match.call()
     if (!is.mids(data)) stop("The data must have class mids")

     analyses <- as.list(1:data$m)

     for (i in 1:data$m) {
       data.i        <- complete(data, i)
       analyses[[i]] <- coxph(formula, data = data.i, ...)
     }

     object <- list(call = call, call1 = data$call,
                    nmis = data$nmis, analyses = analyses)

     return(object)
}

pool.impute <- function (object, method = "smallsample") {

     if ((m <- length(object$analyses)) < 2)
       stop("At least two imputations are needed for pooling.\n")

     analyses <- object$analyses

     k     <- length(coef(analyses[[1]]))
     names <- names(coef(analyses[[1]]))
     qhat  <- matrix(NA, nrow = m, ncol = k, dimnames = list(1:m,names))
     u     <- array(NA, dim = c(m, k, k),
                    dimnames = list(1:m, names, names))

     for (i in 1:m) {
       fit       <- analyses[[i]]
       qhat[i, ] <- coef(fit)
       u[i, , ]  <- vcov(fit)
     }

     qbar <- apply(qhat, 2, mean)
     ubar <- apply(u, c(2, 3), mean)
     e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE)
     b <- (t(e) %*% e)/(m - 1)
     t <- ubar + (1 + 1/m) * b
     r <- (1 + 1/m) * diag(b/ubar)
     f <- (1 + 1/m) * diag(b/t)
     df <- (m - 1) * (1 + 1/r)^2

     if (method == "smallsample") {

       if( any( class(fit) == "coxph" ) ){

         ### this loop is the hack for survival analysis ###

         status   <- fit$y[ , 2]
         n.events <- sum(status == max(status))
         p        <- length( coefficients( fit )  )
         dfc      <- n.events - p

       } else {

         dfc <- fit$df.residual
       }

       df <- dfc/((1 - (f/(m + 1)))/(1 - f) + dfc/df)
     }

     names(r) <- names(df) <- names(f) <- names
     fit <- list(call = call, call1 = object$call, call2 = object$call1,
                 nmis = object$nmis, m = m, qhat = qhat, u = u,
                 qbar = qbar, ubar = ubar, b = b, t = t, r = r, df = df,
                 f = f)

     return(fit)
}

summary.impute <- function(object){

      if (!is.null(object$call1)){
        cat("Call: ")
        dput(object$call1)
      }

      est  <- object$qbar
      se   <- sqrt(diag(object$t))
      tval <- est/se
      df   <- object$df
      pval <- 2 * pt(abs(tval), df, lower.tail = FALSE)

      coefmat <- cbind(est, se, tval, pval)
      colnames(coefmat) <- c("Estimate", "Std. Error",
                                           "t value", "Pr(>|t|)")

      cat("\nCoefficients:\n")
      printCoefmat( coefmat, P.values=T, has.Pvalue=T, signif.legend=T )

      cat("\nFraction of information about the coefficients
                                      missing due to nonresponse:","\n")
      print(object$f)

      ans <- list( coefficients=coefmat, df=df,
                   call=object$call1, fracinfo.miss=object$f )
      invisible( ans )
  }

### end code from linked thread above