[R] Plotting survival curves after multiple imputation

Sun Feb 24 01:42:33 CET 2013

Can anyone help with this ?

On 14/02/2013 14:07, Robert Long wrote:
> I am working with some survival data with missing values.
>
> I am using the mice package to do multiple imputation.
>
> I have found code in this thread which handles pooling of the MI results:
> https://stat.ethz.ch/pipermail/r-help/2007-May/132180.html
>
> Now I would like to plot a survival curve using the pooled results.
>
> Here is a reproducible example:
>
> require(survival)
> require(mice)
>
> set.seed(2)
>
> dt <- colon
>
> fit <- coxph(Surv(time,etype)~rx + sex + age, data=colon)
>
> dummy <- data.frame(sex=c(1,1,1),rx=c("Obs","Lev","Lev+5FU"),age=c(40,40,40))
> plot(survfit(fit, newdata=dummy) )
>
> # now create some missing values in the data
> dt <- colon
> dt$rx[sample(1:nrow(dt),50)] <- NA
> dt$sex [sample(1:nrow(dt),50)] <- NA
> dt$age[sample(1:nrow(dt),50)] <- NA
>
> imp <-mice(dt)
>
> fit.imp <- coxph.mids(Surv(time,etype)~rx + sex + age,imp)
> # Note, this function is defined below...
>
> imputed=summary.impute(pool.impute(fit.imp))
> print(imputed)
>
> # now, how to plot a survival curve with the pooled results ?
>
>
>
>
> ########## begin code from linked thread above
>
> coxph.mids <- function (formula, data, ...) {
>
>       call <- match.call()
>       if (!is.mids(data)) stop("The data must have class mids")
>
>       analyses <- as.list(1:data$m)
>
>       for (i in 1:data$m) {
>         data.i        <- complete(data, i)
>         analyses[[i]] <- coxph(formula, data = data.i, ...)
>       }
>
>       object <- list(call = call, call1 = data$call,
>                      nmis = data$nmis, analyses = analyses)
>
>       return(object)
> }
>
> pool.impute <- function (object, method = "smallsample") {
>
>       if ((m <- length(object$analyses)) < 2)
>         stop("At least two imputations are needed for pooling.\n")
>
>       analyses <- object$analyses
>
>       k     <- length(coef(analyses[[1]]))
>       names <- names(coef(analyses[[1]]))
>       qhat  <- matrix(NA, nrow = m, ncol = k, dimnames = list(1:m,names))
>       u     <- array(NA, dim = c(m, k, k),
>                      dimnames = list(1:m, names, names))
>
>       for (i in 1:m) {
>         fit       <- analyses[[i]]
>         qhat[i, ] <- coef(fit)
>         u[i, , ]  <- vcov(fit)
>       }
>
>       qbar <- apply(qhat, 2, mean)
>       ubar <- apply(u, c(2, 3), mean)
>       e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE)
>       b <- (t(e) %*% e)/(m - 1)
>       t <- ubar + (1 + 1/m) * b
>       r <- (1 + 1/m) * diag(b/ubar)
>       f <- (1 + 1/m) * diag(b/t)
>       df <- (m - 1) * (1 + 1/r)^2
>
>       if (method == "smallsample") {
>
>         if( any( class(fit) == "coxph" ) ){
>
>           ### this loop is the hack for survival analysis ###
>
>           status   <- fit$y[ , 2]
>           n.events <- sum(status == max(status))
>           p        <- length( coefficients( fit )  )
>           dfc      <- n.events - p
>
>         } else {
>
>           dfc <- fit$df.residual
>         }
>
>         df <- dfc/((1 - (f/(m + 1)))/(1 - f) + dfc/df)
>       }
>
>       names(r) <- names(df) <- names(f) <- names
>       fit <- list(call = call, call1 = object$call, call2 = object$call1,
>                   nmis = object$nmis, m = m, qhat = qhat, u = u,
>                   qbar = qbar, ubar = ubar, b = b, t = t, r = r, df = df,
>                   f = f)
>
>       return(fit)
> }
>
> summary.impute <- function(object){
>
>        if (!is.null(object$call1)){
>          cat("Call: ")
>          dput(object$call1)
>        }
>
>        est  <- object$qbar
>        se   <- sqrt(diag(object$t))
>        tval <- est/se
>        df   <- object$df
>        pval <- 2 * pt(abs(tval), df, lower.tail = FALSE)
>
>        coefmat <- cbind(est, se, tval, pval)
>        colnames(coefmat) <- c("Estimate", "Std. Error",
>                                             "t value", "Pr(>|t|)")
>
>        cat("\nCoefficients:\n")
>        printCoefmat( coefmat, P.values=T, has.Pvalue=T, signif.legend=T )
>
>        cat("\nFraction of information about the coefficients
>                                        missing due to nonresponse:","\n")
>        print(object$f)
>
>        ans <- list( coefficients=coefmat, df=df,
>                     call=object$call1, fracinfo.miss=object$f )
>        invisible( ans )
>    }
>
> ### end code from linked thread above
>
> ______________________________________________
> R-help at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.