[R] error message from predict.coxph
Andrews, Chris
chrisaa at med.umich.edu
Tue Feb 12 12:58:42 CET 2013
In one particular situation predict.coxph gives an error message. Namely: stratified data, predict='expected', new data, se=TRUE. I think I found the error but I'll leave that to you to decide.
Thanks,
Chris
######## CODE
library(survival)
set.seed(20121221)
nn <- 10 # sample size in each group
lambda0 <- 0.1 # event rate in group 0
lambda1 <- 0.2 # event rate in group 1
t0 <- 10 # time to estimate expected numbers of events
# 'correct' number of events at time t0 = rate of events (lambda) times time (t0)
t0*lambda0
t0*lambda1
# generate uncensored data
T0 <- rexp(nn, rate=lambda0)
T1 <- rexp(nn, rate=lambda1)
thedata <- data.frame(Time=c(T0, T1), X=rep(0:1, e=nn), X2=rep(0:1, nn))
# 4 covariate combinations
(ndf <- data.frame(Time=t0, X=rep(0:1,e=2), X2=rep(0:1, 2)))
# model with 2 effects
mod <- coxph(Surv(Time) ~ X + X2, data=thedata)
summary(mod)
# coefficient of X2 should be 0
# coefficient of X should be log(lambda1/lambda0)
log(lambda1/lambda0)
predict(mod , newdata=ndf, type="expected", se.fit=TRUE)
# stratified model
mod2 <- coxph(Surv(Time) ~ strata(X) + X2, data=thedata)
predict(mod2, newdata=ndf, type="expected" ) # no se
predict(mod2, newdata=ndf, se.fit=TRUE) # type="lp"
predict(mod2, type="expected", se.fit=TRUE) # prediction at old data, which has different Time
try(predict(mod2, newdata=ndf, type="expected", se.fit=TRUE)) # error?
#Error in predict.coxph(mod2, newdata = ndf, type = "expected", se.fit = TRUE) :
# subscript out of bounds
#mypc <- getAnywhere(predict.coxph)
#mypc2 <- mypc$objs[[1]]
#debug(mypc2)
#mypc2(mod2, newdata=ndf, type="expected", se.fit=TRUE)
# The error appears to be an incorrect subscripting of xbar (and xbar2) in defining dt (and dt2) in one part of predict.coxph:
#
#if (ncol(y) == 2) {
# if (se.fit) {
# dt <- (chaz * newx[indx2, ]) - xbar[indx2, ] # *** SHOULD BE JUST xbar
# se[indx2] <- sqrt(varh + rowSums((dt %*% object$var) * dt)) * newrisk[indx2]
# }
#}
#else {
# j2 <- approx(afit$time, 1:afit.n, newy[indx2, 2], method = "constant", f = 0, yleft = 0, yright = afit.n)$y # chaz2 <- approx(-afit$time, afit$cumhaz, -newy[indx2, 2], method = "constant", rule = 2, f = 0)$y # chaz2 <- c(0, afit$cumhaz)[j2 + 1] # pred[indx2] <- (chaz2 - chaz) * newrisk[indx2] # if (se.fit) {
# varh2 <- c(0, cumsum(afit$varhaz))[j1 + 1]
# xbar2 <- rbind(0, afit$xbar)[j1 + 1, , drop = F]
# dt <- (chaz * newx[indx2, ]) - xbar[indx2, ] # *** SHOULD BE JUST xbar
# dt2 <- (chaz2 * newx[indx2, ]) - xbar2[indx2, ] # *** SHOULD BE JUST xbar2
#
# To be like other sections of the code, xbar (and xbar2) should not be subscripted.
sessionInfo()
maintainer('survival')
########################## OUTPUT
> library(survival)
Loading required package: splines
> set.seed(20121221)
> nn <- 10 # sample size in each group
> lambda0 <- 0.1 # event rate in group 0
> lambda1 <- 0.2 # event rate in group 1
> t0 <- 10 # time to estimate expected numbers of events
> # 'correct' number of events at time t0 = rate of events (lambda)
> times time (t0)
> t0*lambda0
[1] 1
> t0*lambda1
[1] 2
> # generate uncensored data
> T0 <- rexp(nn, rate=lambda0)
> T1 <- rexp(nn, rate=lambda1)
> thedata <- data.frame(Time=c(T0, T1), X=rep(0:1, e=nn), X2=rep(0:1,
> nn))
> # 4 covariate combinations
> (ndf <- data.frame(Time=t0, X=rep(0:1,e=2), X2=rep(0:1, 2)))
Time X X2
1 10 0 0
2 10 0 1
3 10 1 0
4 10 1 1
> # model with 2 effects
> mod <- coxph(Surv(Time) ~ X + X2, data=thedata)
> summary(mod)
Call:
coxph(formula = Surv(Time) ~ X + X2, data = thedata)
n= 20, number of events= 20
coef exp(coef) se(coef) z Pr(>|z|)
X 0.8788 2.4081 0.5531 1.589 0.112
X2 0.4000 1.4918 0.4856 0.824 0.410
exp(coef) exp(-coef) lower .95 upper .95
X 2.408 0.4153 0.8145 7.120
X2 1.492 0.6703 0.5759 3.864
Concordance= 0.605 (se = 0.078 )
Rsquare= 0.132 (max possible= 0.985 )
Likelihood ratio test= 2.83 on 2 df, p=0.2433
Wald test = 2.68 on 2 df, p=0.2613
Score (logrank) test = 2.78 on 2 df, p=0.2485
> # coefficient of X2 should be 0
> # coefficient of X should be log(lambda1/lambda0)
> log(lambda1/lambda0)
[1] 0.6931472
> predict(mod , newdata=ndf, type="expected", se.fit=TRUE)
$fit
[1] 0.6994528 1.0434404 1.6843646 2.5127274
$se.fit
[1] 0.3686963 0.4900025 0.6499294 1.2322156
> # stratified model
> mod2 <- coxph(Surv(Time) ~ strata(X) + X2, data=thedata)
> predict(mod2, newdata=ndf, type="expected" ) # no se
[1] 0.5997226 1.0753357 1.7129833 3.0714738
> predict(mod2, newdata=ndf, se.fit=TRUE) # type="lp"
$fit
1 2 3 4
-0.2919605 0.2919605 -0.2919605 0.2919605
$se.fit
1 2 3 4
0.2593819 0.2593819 0.2593819 0.2593819
> predict(mod2, type="expected", se.fit=TRUE) # prediction at old data, which has different Time
$fit
1 2 3 4 5 6 7 8 9 10 11
0.46420590 0.26669055 0.34486228 1.07533571 1.40040859 1.39632027 1.04237772 0.42718283 0.07160617 3.51100997 0.67101482
12 13 14 15 16 17 18 19 20
0.89364858 0.36657448 0.27570098 1.71298334 0.44845622 2.71298334 1.57726107 1.21298334 0.12839383
$se.fit
[1] 0.26085117 0.19422979 0.20792982 0.48950350 0.70220792 0.60752909 0.52192083 0.25906143 0.07547268 1.48037985 [11] 0.32793825 0.46759948 0.21152685 0.20306531 0.72580338 0.27877094 1.23563366 0.78323843 0.52610887 0.13058964
> try(predict(mod2, newdata=ndf, type="expected", se.fit=TRUE)) # error?
Error in predict.coxph(mod2, newdata = ndf, type = "expected", se.fit = TRUE) :
subscript out of bounds
> #Error in predict.coxph(mod2, newdata = ndf, type = "expected", se.fit = TRUE) :
> # subscript out of bounds
>
>
>
> #mypc <- getAnywhere(predi .... [TRUNCATED]
R version 2.15.2 (2012-10-26)
Platform: x86_64-w64-mingw32/x64 (64-bit)
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C LC_TIME=English_United States.1252
attached base packages:
[1] splines stats graphics grDevices utils datasets methods base
other attached packages:
[1] survival_2.37-2
loaded via a namespace (and not attached):
[1] tools_2.15.2
> maintainer('survival')
[1] "Terry Therneau <therneau.terry at mayo.edu>"
**********************************************************
Electronic Mail is not secure, may not be read every day, and should not be used for urgent or sensitive issues
More information about the R-help
mailing list