[R] error message from predict.coxph

Tue Feb 12 12:58:42 CET 2013

In one particular situation predict.coxph gives an error message.  Namely: stratified data, predict='expected', new data, se=TRUE.  I think I found the error but I'll leave that to you to decide.

Thanks,
Chris

######## CODE
library(survival)

set.seed(20121221)
nn <- 10 # sample size in each group
lambda0 <- 0.1 # event rate in group 0
lambda1 <- 0.2 # event rate in group 1
t0 <- 10 # time to estimate expected numbers of events

# 'correct' number of events at time t0 = rate of events (lambda) times time (t0)
t0*lambda0
t0*lambda1

# generate uncensored data
T0 <- rexp(nn, rate=lambda0)
T1 <- rexp(nn, rate=lambda1)
thedata <- data.frame(Time=c(T0, T1), X=rep(0:1, e=nn), X2=rep(0:1, nn))

# 4 covariate combinations
(ndf <- data.frame(Time=t0, X=rep(0:1,e=2), X2=rep(0:1, 2)))

# model with 2 effects
mod <- coxph(Surv(Time) ~ X + X2, data=thedata)
summary(mod)
# coefficient of X2 should be 0
# coefficient of X  should be log(lambda1/lambda0)
log(lambda1/lambda0)

predict(mod , newdata=ndf, type="expected", se.fit=TRUE)

# stratified model
mod2 <- coxph(Surv(Time) ~ strata(X) + X2, data=thedata)
predict(mod2, newdata=ndf, type="expected"             ) # no se
predict(mod2, newdata=ndf,                  se.fit=TRUE) # type="lp"
predict(mod2,              type="expected", se.fit=TRUE) # prediction at old data, which has different Time
try(predict(mod2, newdata=ndf, type="expected", se.fit=TRUE)) # error?
#Error in predict.coxph(mod2, newdata = ndf, type = "expected", se.fit = TRUE) : 
#  subscript out of bounds

#mypc <- getAnywhere(predict.coxph)
#mypc2 <- mypc$objs[[1]]
#debug(mypc2)
#mypc2(mod2, newdata=ndf, type="expected", se.fit=TRUE)

# The error appears to be an incorrect subscripting of xbar (and xbar2) in defining dt (and dt2) in one part of predict.coxph:
#
#if (ncol(y) == 2) {
#  if (se.fit) {
#    dt <- (chaz * newx[indx2, ]) - xbar[indx2, ]  # *** SHOULD BE JUST xbar
#    se[indx2] <- sqrt(varh + rowSums((dt %*% object$var) * dt)) * newrisk[indx2]
#  }
#}
#else {
#  j2 <- approx(afit$time, 1:afit.n, newy[indx2, 2], method = "constant", f = 0, yleft = 0, yright = afit.n)$y #  chaz2 <- approx(-afit$time, afit$cumhaz, -newy[indx2, 2], method = "constant", rule = 2, f = 0)$y #  chaz2 <- c(0, afit$cumhaz)[j2 + 1] #  pred[indx2] <- (chaz2 - chaz) * newrisk[indx2] #  if (se.fit) {
#    varh2 <- c(0, cumsum(afit$varhaz))[j1 + 1]
#    xbar2 <- rbind(0, afit$xbar)[j1 + 1, , drop = F]
#    dt <- (chaz * newx[indx2, ]) - xbar[indx2, ] # *** SHOULD BE JUST xbar
#    dt2 <- (chaz2 * newx[indx2, ]) - xbar2[indx2, ] # *** SHOULD BE JUST xbar2
#   
# To be like other sections of the code, xbar (and xbar2) should not be subscripted.

sessionInfo()
maintainer('survival')

########################## OUTPUT

> library(survival)
Loading required package: splines

> set.seed(20121221)

> nn <- 10 # sample size in each group

> lambda0 <- 0.1 # event rate in group 0

> lambda1 <- 0.2 # event rate in group 1

> t0 <- 10 # time to estimate expected numbers of events

> # 'correct' number of events at time t0 = rate of events (lambda) 
> times time (t0)
> t0*lambda0
[1] 1

> t0*lambda1
[1] 2

> # generate uncensored data
> T0 <- rexp(nn, rate=lambda0)

> T1 <- rexp(nn, rate=lambda1)

> thedata <- data.frame(Time=c(T0, T1), X=rep(0:1, e=nn), X2=rep(0:1, 
> nn))

> # 4 covariate combinations
> (ndf <- data.frame(Time=t0, X=rep(0:1,e=2), X2=rep(0:1, 2)))
  Time X X2
1   10 0  0
2   10 0  1
3   10 1  0
4   10 1  1

> # model with 2 effects
> mod <- coxph(Surv(Time) ~ X + X2, data=thedata)

> summary(mod)
Call:
coxph(formula = Surv(Time) ~ X + X2, data = thedata)

  n= 20, number of events= 20 

     coef exp(coef) se(coef)     z Pr(>|z|)
X  0.8788    2.4081   0.5531 1.589    0.112
X2 0.4000    1.4918   0.4856 0.824    0.410

   exp(coef) exp(-coef) lower .95 upper .95
X      2.408     0.4153    0.8145     7.120
X2     1.492     0.6703    0.5759     3.864

Concordance= 0.605  (se = 0.078 )
Rsquare= 0.132   (max possible= 0.985 )
Likelihood ratio test= 2.83  on 2 df,   p=0.2433
Wald test            = 2.68  on 2 df,   p=0.2613
Score (logrank) test = 2.78  on 2 df,   p=0.2485

> # coefficient of X2 should be 0
> # coefficient of X  should be log(lambda1/lambda0)
> log(lambda1/lambda0)
[1] 0.6931472

> predict(mod , newdata=ndf, type="expected", se.fit=TRUE)
$fit
[1] 0.6994528 1.0434404 1.6843646 2.5127274

$se.fit
[1] 0.3686963 0.4900025 0.6499294 1.2322156

> # stratified model
> mod2 <- coxph(Surv(Time) ~ strata(X) + X2, data=thedata)

> predict(mod2, newdata=ndf, type="expected"             ) # no se
[1] 0.5997226 1.0753357 1.7129833 3.0714738

> predict(mod2, newdata=ndf,                  se.fit=TRUE) # type="lp"
$fit
         1          2          3          4 
-0.2919605  0.2919605 -0.2919605  0.2919605 

$se.fit
        1         2         3         4 
0.2593819 0.2593819 0.2593819 0.2593819 

> predict(mod2,              type="expected", se.fit=TRUE) # prediction at old data, which has different Time
$fit
         1          2          3          4          5          6          7          8          9         10         11 
0.46420590 0.26669055 0.34486228 1.07533571 1.40040859 1.39632027 1.04237772 0.42718283 0.07160617 3.51100997 0.67101482 
        12         13         14         15         16         17         18         19         20 
0.89364858 0.36657448 0.27570098 1.71298334 0.44845622 2.71298334 1.57726107 1.21298334 0.12839383 

$se.fit
 [1] 0.26085117 0.19422979 0.20792982 0.48950350 0.70220792 0.60752909 0.52192083 0.25906143 0.07547268 1.48037985 [11] 0.32793825 0.46759948 0.21152685 0.20306531 0.72580338 0.27877094 1.23563366 0.78323843 0.52610887 0.13058964

> try(predict(mod2, newdata=ndf, type="expected", se.fit=TRUE)) # error?
Error in predict.coxph(mod2, newdata = ndf, type = "expected", se.fit = TRUE) : 
  subscript out of bounds

> #Error in predict.coxph(mod2, newdata = ndf, type = "expected", se.fit = TRUE) : 
> #  subscript out of bounds
> 
> 
> 
> #mypc <- getAnywhere(predi .... [TRUNCATED]
R version 2.15.2 (2012-10-26)
Platform: x86_64-w64-mingw32/x64 (64-bit)

locale:
[1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252    LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                           LC_TIME=English_United States.1252    

attached base packages:
[1] splines   stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] survival_2.37-2

loaded via a namespace (and not attached):
[1] tools_2.15.2

> maintainer('survival')
[1] "Terry Therneau <therneau.terry at mayo.edu>"

**********************************************************
Electronic Mail is not secure, may not be read every day, and should not be used for urgent or sensitive issues