[R] doubt with Odds ratio - URGENT HELP NEEDED
Michael Dewey
lists at dewey.myzen.co.uk
Wed Sep 23 17:29:37 CEST 2015
Dear Rosa
Can you remove all the code which is not relevant to calculating the
odds ratio so we can see what is going on?
On 23/09/2015 16:06, Rosa Oliveira wrote:
> Dear Michael,
>
>
> I found some of the errors, but others I wasn’t able to.
>
> And my huge huge problem concerns OR and OR confidence interval :(
>
>
> *New Corrected code:*
>
>
> casedata <-read.spss("tas_05112008.sav")
> tas.data<-data.frame(casedata)
>
> #Delete patients that were not discharged
> tas.data <- tas.data[ tas.data$hosp!="si ",]
> tas.data$resultado.hosp <- ifelse(tas.data$hosp=="l", 0, 1)
>
> tas.data$tas_d2 <-
> log(ifelse(tas.data$tas_d2==88888|tas.data$tas_d2==99999, NA,
> tas.data$tas_d2))
> tas.data$tas_d3 <-
> log(ifelse(tas.data$tas_d3==88888|tas.data$tas_d3==99999, NA,
> tas.data$tas_d3))
> tas.data$tas_d4 <-
> log(ifelse(tas.data$tas_d4==88888|tas.data$tas_d4==99999, NA,
> tas.data$tas_d4))
> tas.data$tas_d5 <-
> log(ifelse(tas.data$tas_d5==88888|tas.data$tas_d5==99999, NA,
> tas.data$tas_d5))
> tas.data$tas_d6 <-
> log(ifelse(tas.data$tas_d6==88888|tas.data$tas_d6==99999, NA,
> tas.data$tas_d6))
>
> tas.data$age <-
> ifelse(tas.data$age==88888|tas.data$age==99999, NA, tas.data$age)
>
>
> tas.data <- data.frame(tas1 =
> tas.data$tas_d2, tas2 = tas.data$tas_d3,
> tas3 = tas.data$tas_d4,
> tas4 = tas.data$tas_d5,
> tas5 = tas.data$tas_d6,
> age = tas.data$age,
> discharge =
> tas.data$resultado.hosp, id.pat=tas.data$id)
>
> # tas.data$discharge <- factor( tas.data$discharge ,
> levels=c(0,1), labels = c("dead", "alive"))
>
> #select only cases that have more than 3 tas
> tas.data <- tas.data[apply(tas.data[,-8:-6],
> 1, function(x) sum(!is.na(x)))>2,]
>
>
> nsample <- n.obs <- dim(tas.data)[1] #nr of patients
> with more than 2 tas measurements
>
> tas.data.long <- data.frame(
> tas=c(unlist(tas.data[,-8:-6])), time=rep(c(0:4), each=n.obs),
> age=rep(tas.data$age, 5), discharge=rep(tas.data$discharge, 5),
> id=rep(c(1:n.obs), 5))
> tas.data.long <- tas.data.long
> [order(tas.data.long$id),]
>
> age=tas.data$age
>
> ##################################################################################################
> #PLOT EMPIRICAL MEANS OF CRP FOR ALIVE DEATh
> ##################################################################################################
> mean.alive <-
> apply(tas.data[tas.data$discharge==0, -8:-6], 2, mean, na.rm=T)
> mean.dead <-
> apply(tas.data[tas.data$discharge==1, -8:-6], 2, mean, na.rm=T)
> stderr <- function(x)
> sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
> se.alive <-
> apply(tas.data[tas.data$discharge==0, -8:-6], 2, stderr)
> se.dead <-
> apply(tas.data[tas.data$discharge==1, -8:-6], 2, stderr)
> summarytas <- data.frame (media = c(mean.dead,
> mean.alive),
> standarderror = c( se.dead,
> se.alive), discharge = c(rep("dead",5), rep("alive", 5)))
>
>
> ggplot(summarytas, aes(x=c(c(1:5), c(1:5)), y=mean, colour=discharge)) +
> geom_errorbar(aes(ymin=mean-2* standarderror, ymax=media+2*
> standarderror), width=.1) +
> scale_color_manual(values=c("blue", "red")) +
> theme(legend.text=element_text(size=20),
> axis.text=element_text(size=16),
> axis.title=element_text(size=20,face="bold")) +
> scale_x_continuous(name="Days") +
> scale_y_continuous(name="log tas") +
> geom_line() +
> geom_point()
>
>
> library(verification)
> prev <- summary(glm(tas.data[,7]~tas.data[,4],family=binomial(link=probit)))
> answer = c(prev$coefficients[,1:2])
>
>
> roc.plot(tas.data[,7], prev, show.thres = FALSE, legen=F )
>
>
>
> modelo<-function (dataainit)
> {
>
> #dataa<-tas.data
> dataa<-dataainit
>
> dataa$ident<-seq(1:90)
> tas.6days<-cbind(id=rep(dataa$id,5),tas=c(dataa$tas_d2, dataa$tas_d3,
> dataa$tas_d4, dataa$tas_d5, dataa$tas_d6),
> time=rep(c(2:6)-2, each=90), out.come=rep(dataa$hosp,5),
> ident=rep(dataa$ident,5))
> tas.6days<-data.frame(tas.6days[order(tas.6days[,1]),])
> tas.6days$tas[tas.6days$tas==88888|tas.6days$tas==99999 ]<-NA
> #mixed model for the longitudinal tas
> lme.1 <- lme(tas~ time+1, random = ~ time+1 |ident, data=tas.6days,
> na.action=na.exclude )
> #Random intercept and slopes
> pred.lme<-predict(lme.1)
> lme.intercept<-lme.1$coef$random$ident[,1]+lme.1$coef$fixed[1]
> lme.slope<- lme.1$coef$random$ident[,2]+lme.1$coef$fixed[2]
> selector<-as.numeric(names(lme.intercept)) #to select not NA rows. Apply
> to the vector in the dataset
> test(dataa$intercept[resultado.hosp==1],
> dataa$intercept[resultado.hosp==0])
> print(summary(model.surv1))
> return(model.surv1$coef)
> }
>
>
> *CONSOLE RESULT: (errors in red)*
>
> > casedata <-read.spss("tas_05112008.sav")
> Warning message:
> In read.spss("tas_05112008.sav") :
> tas_05112008.sav: Unrecognized record type 7, subtype 18 encountered
> in system file
> > tas.data<-data.frame(casedata)
> >
> > #Delete patients that were not discharged
> > tas.data <- tas.data[ tas.data$hosp!="si ",]
> > tas.data$resultado.hosp <- ifelse(tas.data$hosp=="l", 0, 1)
> >
> > tas.data$tas_d2 <-
> log(ifelse(tas.data$tas_d2==88888|tas.data$tas_d2==99999, NA,
> tas.data$tas_d2))
> > tas.data$tas_d3 <-
> log(ifelse(tas.data$tas_d3==88888|tas.data$tas_d3==99999, NA,
> tas.data$tas_d3))
> > tas.data$tas_d4 <-
> log(ifelse(tas.data$tas_d4==88888|tas.data$tas_d4==99999, NA,
> tas.data$tas_d4))
> > tas.data$tas_d5 <-
> log(ifelse(tas.data$tas_d5==88888|tas.data$tas_d5==99999, NA,
> tas.data$tas_d5))
> > tas.data$tas_d6 <-
> log(ifelse(tas.data$tas_d6==88888|tas.data$tas_d6==99999, NA,
> tas.data$tas_d6))
> >
> > tas.data$age <-
> ifelse(tas.data$age==88888|tas.data$age==99999, NA, tas.data$age)
> >
> >
> > tas.data <- data.frame(tas1 =
> tas.data$tas_d2, tas2 = tas.data$tas_d3,
> + tas3 = tas.data$tas_d4,
> tas4 = tas.data$tas_d5,
> + tas5 = tas.data$tas_d6,
> age = tas.data$age,
> + discharge =
> tas.data$resultado.hosp, id.pat=tas.data$id)
> >
> > # tas.data$discharge <- factor( tas.data$discharge
> , levels=c(0,1), labels = c("dead", "alive"))
> >
> > #select only cases that have more than 3 tas
> > tas.data <- tas.data[apply(tas.data[,-8:-6],
> 1, function(x) sum(!is.na(x)))>2,]
> >
> >
> >
> > nsample <- n.obs <- dim(tas.data)[1] #nr of
> patients with more than 2 tas measurements
> >
> > tas.data.long <- data.frame(
> tas=c(unlist(tas.data[,-8:-6])), time=rep(c(0:4), each=n.obs),
> age=rep(tas.data$age, 5), discharge=rep(tas.data$discharge, 5),
> + id=rep(c(1:n.obs), 5))
> > tas.data.long <- tas.data.long
> [order(tas.data.long$id),]
> >
> > age=tas.data$age
> >
> >
> ##################################################################################################
> > #PLOT EMPIRICAL MEANS OF CRP FOR ALIVE DEATh
> >
> ##################################################################################################
> > mean.alive <-
> apply(tas.data[tas.data$discharge==0, -8:-6], 2, mean, na.rm=T)
> > mean.dead <-
> apply(tas.data[tas.data$discharge==1, -8:-6], 2, mean, na.rm=T)
> > stderr <- function(x)
> sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
> > se.alive <-
> apply(tas.data[tas.data$discharge==0, -8:-6], 2, stderr)
> > se.dead <-
> apply(tas.data[tas.data$discharge==1, -8:-6], 2, stderr)
> > summarytas <- data.frame (media = c(mean.dead,
> mean.alive),
> + standarderror = c( se.dead,
> se.alive), discharge = c(rep("dead",5), rep("alive", 5)))
> >
> >
> > ggplot(summarytas, aes(x=c(c(1:5), c(1:5)), y=mean, colour=discharge)) +
> + geom_errorbar(aes(ymin=mean-2* standarderror, ymax=media+2*
> standarderror), width=.1) +
> + scale_color_manual(values=c("blue", "red")) +
> + theme(legend.text=element_text(size=20),
> axis.text=element_text(size=16),
> axis.title=element_text(size=20,face="bold")) +
> + scale_x_continuous(name="Days") +
> + scale_y_continuous(name="log tas") +
> + geom_line() +
> + geom_point()
> Error in mean - 2 * standarderror :
> non-numeric argument to binary operator
> >
> >
> > library(verification)
> > prev <-
> summary(glm(tas.data[,7]~tas.data[,4],family=binomial(link=probit)))
> > answer = c(prev$coefficients[,1:2])
> >
> >
> > roc.plot(tas.data[,7], prev, show.thres = FALSE, legen=F )
> Error in is.finite(x) : default method not implemented for type 'list'
> >
> >
> >
> > modelo<-function (dataainit)
> +
> + {
> +
> + #dataa<-tas.data
> + dataa<-dataainit
> +
> + dataa$ident<-seq(1:90)
> + tas.6days<-cbind(id=rep(dataa$id,5),tas=c(dataa$tas_d2, dataa$tas_d3,
> + dataa$tas_d4, dataa$tas_d5, dataa$tas_d6),
> + time=rep(c(2:6)-2, each=90), out.come=rep(dataa$hosp,5),
> ident=rep(dataa$ident,5))
> +
> + tas.6days<-data.frame(tas.6days[order(tas.6days[,1]),])
> + tas.6days$tas[tas.6days$tas==88888|tas.6days$tas==99999 ]<-NA
> +
> + #mixed model for the longitudinal tas
> + lme.1 <- lme(tas~ time+1, random = ~ time+1 |ident, data=tas.6days,
> na.action=na.exclude )
> +
> + #Random intercept and slopes
> + pred.lme<-predict(lme.1)
> + lme.intercept<-lme.1$coef$random$ident[,1]+lme.1$coef$fixed[1]
> + lme.slope<- lme.1$coef$random$ident[,2]+lme.1$coef$fixed[2]
> + selector<-as.numeric(names(lme.intercept)) #to select not NA rows.
> Apply to the vector in the dataset
> +
> + test(dataa$intercept[resultado.hosp==1],
> dataa$intercept[resultado.hosp==0])
> +
> + print(summary(model.surv1))
> + return(model.surv1$coef)
> +
> + }
> >
>
> I can’t get the OR and OR CI :(
>
>
> *DATA:*
>
>
>
>
>
>
> Best,
>
> RO
>
>
>
>
> Atenciosamente,
> Rosa Oliveira
>
> --
> ____________________________________________________________________________
>
>
>
>
> Rosa Celeste dos Santos Oliveira,
>
> E-mail:rosita21 at gmail.com <mailto:rosita21 at gmail.com>
> Tlm: +351 939355143
> Linkedin: https://pt.linkedin.com/in/rosacsoliveira
> ____________________________________________________________________________
> "Many admire, few know"
> Hippocrates
>
>> On 23 Sep 2015, at 12:02, Michael Dewey <lists at dewey.myzen.co.uk
>> <mailto:lists at dewey.myzen.co.uk>> wrote:
>>
>> Dear Rosa
>>
>> It would help if you posted the error messages where they occur so
>> that we can see which of your commands caused which error. However see
>> comment inline below.
>>
>> On 22/09/2015 22:17, Rosa Oliveira wrote:
>>> Dear all,
>>>
>>>
>>> I’m trying to compute Odds ratio and OR confidence interval.
>>>
>>> I’m really naive, sorry for that.
>>>
>>>
>>> I attach my data and my code.
>>>
>>> I’m having lots of errors:
>>>
>>> 1. Error in data.frame(tas1 = tas.data$tas_d2, tas2 =
>>> tas.data$tas_d3, tas3 = tas.data$tas_d4, :
>>> arguments imply differing number of rows: 90, 0
>>>
>>
>> At least one of tas_d2, tas_d3, tas_d4 does not exist
>>
>> I suggest fixing that one and hoping the rest go away.
>>
>>> 2. Error in data.frame(tas = c(unlist(tas.data[, -8:-6])), time =
>>> rep(c(0:4), :
>>> arguments imply differing number of rows: 630, 450, 0
>>>
>>> 3. Error: object 'tas.data.long' not found
>>>
>>> 4. Error in data.frame(media = c(mean.dead, mean.alive),
>>> standarderror = c(se.dead, :
>>> arguments imply differing number of rows: 14, 10
>>>
>>> 5. Error in ggplot(summarytas, aes(x = c(c(1:5), c(1:5)), y = mean,
>>> colour = discharge)) :
>>> object 'summarytas' not found
>>>
>>> 6. Error in summary(glm(tas.data[, 6] ~ tas.data[, 4], family =
>>> binomial(link = probit))) :
>>> error in evaluating the argument 'object' in selecting a method for
>>> function 'summary': Error in eval(expr, envir, enclos) : y values
>>> must be 0 <= y <= 1
>>>
>>> 7. Error in wilcox.test.default(pred[obs == 1], pred[obs == 0],
>>> alternative = "great") :
>>> not enough (finite) 'x' observations
>>> In addition: Warning message:
>>> In is.finite(x) & apply(pred, 1, f) :
>>> longer object length is not a multiple of shorter object length
>>>
>>>
>>> and off course I’m not getting OR.
>>>
>>> Nonetheless all this errors, I think I have not been able to compute
>>> de code to get OR and OR confidence interval.
>>>
>>>
>>> Can anyone help me please. It’s really urgent.
>>>
>>> PLEASE
>>>
>>> THE CODE:
>>>
>>> the hospital outcome is discharge.
>>>
>>> require(gdata)
>>> library(foreign)
>>> library(nlme)
>>> library(lme4)
>>> library(boot)
>>> library(MASS)
>>> library(Hmisc)
>>> library(plotrix)
>>> library(verification)
>>> library(mvtnorm)
>>> library(statmod)
>>> library(epiR)
>>>
>>> #########################################################################################
>>> # Data preparation
>>> #
>>> #########################################################################################
>>>
>>> setwd("/Users/RO/Desktop")
>>>
>>> casedata <-read.spss("tas_05112008.sav")
>>> tas.data<-data.frame(casedata)
>>>
>>> #Delete patients that were not discharged
>>> tas.data <- tas.data[ tas.data$hosp!="si ",]
>>> tas.data$resultado.hosp <- ifelse(tas.data$hosp=="l", 0, 1)
>>>
>>> tas.data$tas_d2 <-
>>> log(ifelse(tas.data$tas_d2==88888|tas.data$tas_d2==99999, NA,
>>> tas.data$tas_d2))
>>> tas.data$tas_d3 <-
>>> log(ifelse(tas.data$tas_d3==88888|tas.data$tas_d3==99999, NA,
>>> tas.data$tas_d3))
>>> tas.data$tas_d4 <-
>>> log(ifelse(tas.data$tas_d4==88888|tas.data$tas_d4==99999, NA,
>>> tas.data$tas_d4))
>>> tas.data$tas_d5 <-
>>> log(ifelse(tas.data$tas_d5==88888|tas.data$tas_d5==99999, NA,
>>> tas.data$tas_d5))
>>> tas.data$tas_d6 <-
>>> log(ifelse(tas.data$tas_d6==88888|tas.data$tas_d6==99999, NA,
>>> tas.data$tas_d6))
>>>
>>> tas.data$age <-
>>> ifelse(tas.data$age==88888|tas.data$age==99999, NA, tas.data$age)
>>>
>>>
>>> tas.data <- data.frame(tas1 =
>>> tas.data$tas_d2, tas2 = tas.data$tas_d3,
>>> tas3 = tas.data$tas_d4,
>>> tas4 = tas.data$tas_d5,
>>> tas5 = tas.data$tas_d6,
>>> age = tas.data$age,
>>> discharge =
>>> tas.data$resultado.hosp, id.pat=tas.data$ID)
>>>
>>> # tas.data$discharge <- factor( tas.data$discharge
>>> , levels=c(0,1), labels = c("dead", "alive"))
>>>
>>> #select only cases that have more than 3 tas
>>> tas.data <- tas.data[apply(tas.data[,-8:-6],
>>> 1, function(x) sum(!is.na(x)))>2,]
>>>
>>>
>>>
>>> nsample <- n.obs <- dim(tas.data)[1] #nr of
>>> patients with more than 2 tas measurements
>>>
>>> tas.data.long <- data.frame(
>>> tas=c(unlist(tas.data[,-8:-6])), time=rep(c(0:4), each=n.obs),
>>> age=rep(tas.data$age, 5), discharge=rep(tas.data$discharge, 5),
>>> id=rep(c(1:n.obs), 5))
>>> tas.data.long <- tas.data.long
>>> [order(tas.data.long$id),]
>>>
>>> age=tas.data$age
>>>
>>> ##################################################################################################
>>> #PLOT EMPIRICAL MEANS OF CRP FOR ALIVE DEATh
>>> ##################################################################################################
>>> mean.alive <-
>>> apply(tas.data[tas.data$discharge==0, -8:-6], 2, mean, na.rm=T)
>>> mean.dead <-
>>> apply(tas.data[tas.data$discharge==1, -8:-6], 2, mean, na.rm=T)
>>> stderr <- function(x)
>>> sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
>>> se.alive <-
>>> apply(tas.data[tas.data$discharge==0, -8:-6], 2, stderr)
>>> se.dead <-
>>> apply(tas.data[tas.data$discharge==1, -8:-6], 2, stderr)
>>> summarytas <- data.frame (media = c(mean.dead,
>>> mean.alive),
>>> standarderror = c( se.dead,
>>> se.alive), discharge = c(rep("dead",5), rep("alive", 5)))
>>>
>>>
>>> ggplot(summarytas, aes(x=c(c(1:5), c(1:5)), y=mean, colour=discharge)) +
>>> geom_errorbar(aes(ymin=mean-2* standarderror, ymax=media+2*
>>> standarderror), width=.1) +
>>> scale_color_manual(values=c("blue", "red")) +
>>> theme(legend.text=element_text(size=20),
>>> axis.text=element_text(size=16),
>>> axis.title=element_text(size=20,face="bold")) +
>>> scale_x_continuous(name="Days") +
>>> scale_y_continuous(name="log tas") +
>>> geom_line() +
>>> geom_point()
>>>
>>>
>>> library(verification)
>>> prev <-
>>> summary(glm(tas.data[,6]~tas.data[,4],family=binomial(link=probit)))
>>> answer = c(prev$coefficients[,1:2])
>>>
>>>
>>> roc.plot(tas.data[,6], prev, show.thres = FALSE, legen=F )
>>>
>>>
>>> modelo<-function (dataainit)
>>>
>>> {
>>>
>>> #dataa<-tas.data
>>> dataa<-dataainit
>>>
>>> dataa$ident<-seq(1:90)
>>> tas.6days<-cbind(id=rep(dataa$id,5),tas=c(dataa$tas_d2, dataa$tas_d3,
>>> dataa$tas_d4, dataa$tas_d5, dataa$tas_d6),
>>> time=rep(c(2:6)-2, each=90), out.come=rep(dataa$hosp,5),
>>> ident=rep(dataa$ident,5))
>>>
>>> tas.6days<-data.frame(tas.6days[order(tas.6days[,1]),])
>>> tas.6days$tas[tas.6days$tas==88888|tas.6days$tas==99999 ]<-NA
>>>
>>> #mixed model for the longitudinal tas
>>> lme.1 <- lme(tas~ time+1, random = ~ time+1 |ident, data=tas.6days,
>>> na.action=na.exclude )
>>>
>>> #Random intercept and slopes
>>> pred.lme<-predict(lme.1)
>>> lme.intercept<-lme.1$coef$random$ident[,1]+lme.1$coef$fixed[1]
>>> lme.slope<- lme.1$coef$random$ident[,2]+lme.1$coef$fixed[2]
>>> selector<-as.numeric(names(lme.intercept)) #to select not NA rows.
>>> Apply to the vector in the dataset
>>>
>>> test(dataa$intercept[resultado.hosp==1],
>>> dataa$intercept[resultado.hosp==0])
>>>
>>> print(summary(model.surv1))
>>> return(model.surv1$coef)
>>>
>>> }
>>>
>>>
>>>
>>>
>>> Best,
>>> RO
>>>
>>>
>>>
>>> ______________________________________________
>>> R-help at r-project.org <mailto:R-help at r-project.org> mailing list -- To
>>> UNSUBSCRIBE and more, see
>>> https://stat.ethz.ch/mailman/listinfo/r-help
>>> PLEASE do read the posting guide
>>> http://www.R-project.org/posting-guide.html
>>> and provide commented, minimal, self-contained, reproducible code.
>>>
>>
>> --
>> Michael
>> http://www.dewey.myzen.co.uk/home.html
>
--
Michael
http://www.dewey.myzen.co.uk/home.html
More information about the R-help
mailing list