[R] spearman correlation and p-value as a matrix
arun
smartpink111 at yahoo.com
Thu Feb 14 16:50:05 CET 2013
Hi Ozgul,
Try this:
#Here I used the "pearson correlation" as `spearman` was not significant
resr<-do.call(rbind,lapply(split(bg_ag,1:nrow(bg_ag)),function(x) {res<-rcorr(cbind(bg[,x[,1]],ag[,x[,2]]),type="pearson")$r; row.names(res)<- rep(paste(x[1],x[2],sep="_"),2);res}))
#p-values
resP<-do.call(rbind,lapply(split(bg_ag,1:nrow(bg_ag)),function(x) {res<-rcorr(cbind(bg[,x[,1]],ag[,x[,2]]),type="pearson")$P; row.names(res)<- rep(paste(x[1],x[2],sep="_"),2);res}))
indx<-row(resr)%%2!=1
resPnew<-as.matrix(resP[indx[,1],1])
resrnew<-as.matrix(resr[indx[,1],1])
resrnew1<-data.frame(read.table(text=row.names(resrnew),sep="_",stringsAsFactors=FALSE),value=resrnew)
row.names(resrnew1)<-1:nrow(resrnew1)
library(reshape2)
resrnew2<- dcast(resrnew1,V2~V1,value.var="value")
resPnew1<-data.frame(read.table(text=row.names(resPnew),sep="_",stringsAsFactors=FALSE),value=resPnew)
row.names(resPnew1)<-1:nrow(resPnew1)
resPnew2<-dcast(resPnew1,V2~V1,value.var="value")
resrnew2[,-1][resPnew2[,-1]>0.10|is.na(resPnew2[,-1])]<- 0 #also I used the cut-off limit as 0.10 because:
#correlation value for a p-value of 0.07 was also included in the ?cor2m() result
resPnew2
# V2 Otu00022 Otu00029 Otu00039 Otu00042 Otu00101 Otu00105 Otu00125
#1 ag1 0.81044832 NaN 0.81044832 NaN 0.17157638 NaN NaN
#2 ag2 0.19359426 NaN 0.19359426 NaN 0.07062624 NaN NaN
#3 ag3 0.01745244 NaN 0.01745244 NaN 0.61105696 NaN NaN
#4 ag4 0.95446965 NaN 0.95446965 NaN 0.24387489 NaN NaN
#5 ag5 0.87628767 NaN 0.87628767 NaN 0.62117658 NaN NaN
# Otu00131 Otu00137 Otu00155 Otu00158 Otu00172 Otu00181 Otu00185 Otu00190
#1 NaN 0.81044832 NaN NaN NaN NaN NaN 0.81044832
#2 NaN 0.19359426 NaN NaN NaN NaN NaN 0.19359426
#3 NaN 0.01745244 NaN NaN NaN NaN NaN 0.01745244
#4 NaN 0.95446965 NaN NaN NaN NaN NaN 0.95446965
#5 NaN 0.87628767 NaN NaN NaN NaN NaN 0.87628767
# Otu00209 Otu00218
#1 NaN 0.81044832
#2 NaN 0.19359426
#3 NaN 0.01745244
#4 NaN 0.95446965
#5 NaN 0.87628767
cor2m(bg,ag,trim=TRUE,alpha=0.05)
# Otu00022 Otu00029 Otu00039 Otu00042 Otu00101 Otu00105 Otu00125
#ag1 0.0000000 NaN 0.0000000 NaN 0.0000000 NaN NaN
#ag2 0.0000000 NaN 0.0000000 NaN 0.7743597 NaN NaN
#ag3 -0.8901029 NaN -0.8901029 NaN 0.0000000 NaN NaN
#ag4 0.0000000 NaN 0.0000000 NaN 0.0000000 NaN NaN
#ag5 0.0000000 NaN 0.0000000 NaN 0.0000000 NaN NaN
# Otu00131 Otu00137 Otu00155 Otu00158 Otu00172 Otu00181 Otu00185 Otu00190
#ag1 NaN 0.0000000 NaN NaN NaN NaN NaN 0.0000000
#ag2 NaN 0.0000000 NaN NaN NaN NaN NaN 0.0000000
#ag3 NaN -0.8901029 NaN NaN NaN NaN NaN -0.8901029
#ag4 NaN 0.0000000 NaN NaN NaN NaN NaN 0.0000000
#ag5 NaN 0.0000000 NaN NaN NaN NaN NaN 0.0000000
# Otu00209 Otu00218
#ag1 NaN 0.0000000
#ag2 NaN 0.0000000
#ag3 NaN -0.8901029
#ag4 NaN 0.0000000
#ag5 NaN 0.0000000
resrnew2
# V2 Otu00022 Otu00029 Otu00039 Otu00042 Otu00101 Otu00105 Otu00125
#1 ag1 0.0000000 0 0.0000000 0 0.0000000 0 0
#2 ag2 0.0000000 0 0.0000000 0 0.7743597 0 0
#3 ag3 -0.8901029 0 -0.8901029 0 0.0000000 0 0
#4 ag4 0.0000000 0 0.0000000 0 0.0000000 0 0
#5 ag5 0.0000000 0 0.0000000 0 0.0000000 0 0
# Otu00131 Otu00137 Otu00155 Otu00158 Otu00172 Otu00181 Otu00185 Otu00190
#1 0 0.0000000 0 0 0 0 0 0.0000000
#2 0 0.0000000 0 0 0 0 0 0.0000000
#3 0 -0.8901029 0 0 0 0 0 -0.8901029
#4 0 0.0000000 0 0 0 0 0 0.0000000
#5 0 0.0000000 0 0 0 0 0 0.0000000
# Otu00209 Otu00218
#1 0 0.0000000
#2 0 0.0000000
#3 0 -0.8901029
#4 0 0.0000000
#5 0 0.0000000
#If you want the Na values to remain as such use:
resrnew2[,-1][resPnew2[,-1]>0.10]<- 0
resrnew2
# V2 Otu00022 Otu00029 Otu00039 Otu00042 Otu00101 Otu00105 Otu00125
#1 ag1 0.0000000 NaN 0.0000000 NaN 0.0000000 NaN NaN
#2 ag2 0.0000000 NaN 0.0000000 NaN 0.7743597 NaN NaN
#3 ag3 -0.8901029 NaN -0.8901029 NaN 0.0000000 NaN NaN
#4 ag4 0.0000000 NaN 0.0000000 NaN 0.0000000 NaN NaN
#5 ag5 0.0000000 NaN 0.0000000 NaN 0.0000000 NaN NaN
# Otu00131 Otu00137 Otu00155 Otu00158 Otu00172 Otu00181 Otu00185 Otu00190
#1 NaN 0.0000000 NaN NaN NaN NaN NaN 0.0000000
#2 NaN 0.0000000 NaN NaN NaN NaN NaN 0.0000000
#3 NaN -0.8901029 NaN NaN NaN NaN NaN -0.8901029
#4 NaN 0.0000000 NaN NaN NaN NaN NaN 0.0000000
#5 NaN 0.0000000 NaN NaN NaN NaN NaN 0.0000000
# Otu00209 Otu00218
#1 NaN 0.0000000
#2 NaN 0.0000000
#3 NaN -0.8901029
#4 NaN 0.0000000
#5 NaN 0.0000000
Hope it helps.
A.K.
----- Original Message -----
From: Ozgul Inceoglu <Ozgul.Inceoglu at ulb.ac.be>
To: arun <smartpink111 at yahoo.com>
Cc: R help <r-help at r-project.org>
Sent: Thursday, February 14, 2013 10:22 AM
Subject: re:Re: [R] spearman correlation and p-value as a matrix
you are right! I was so happy to find out the p-value criteria, I totally forgot about the spearman correlation, no I dont think there is an option of spearman correlation. Thank you for pointing that out!
Cheers,
Ö
>Dear Ozgul,
>
>I made a comment in the previous email, which I didn't check it.
>Thank you for the ?cor2m().
>I checked the results of the example dataset with cor2m(). Does it have an option for spearman correlation?
>
>The values below seem to be pearson
>#cor2m
> cor2m(bg,ag,trim=TRUE,alpha=0.05) ######I
> Otu00022 Otu00029 Otu00039 Otu00042 Otu00101 Otu00105 Otu00125
>ag1 0.0000000 NaN 0.0000000 NaN 0.0000000 NaN NaN
>ag2 0.0000000 NaN 0.0000000 NaN 0.7743597 NaN NaN
>ag3 -0.8901029 NaN -0.8901029 NaN 0.0000000 NaN NaN
>ag4 0.0000000 NaN 0.0000000 NaN 0.0000000 NaN NaN
>ag5 0.0000000 NaN 0.0000000 NaN 0.0000000 NaN NaN
> Otu00131 Otu00137 Otu00155 Otu00158 Otu00172 Otu00181 Otu00185 Otu00190
>ag1 NaN 0.0000000 NaN NaN NaN NaN NaN 0.0000000
>ag2 NaN 0.0000000 NaN NaN NaN NaN NaN 0.0000000
>ag3 NaN -0.8901029 NaN NaN NaN NaN NaN -0.8901029
>ag4 NaN 0.0000000 NaN NaN NaN NaN NaN 0.0000000
>ag5 NaN 0.0000000 NaN NaN NaN NaN NaN 0.0000000
> Otu00209 Otu00218
>ag1 NaN 0.0000000
>ag2 NaN 0.0000000
>ag3 NaN -0.8901029
>ag4 NaN 0.0000000
>ag5 NaN 0.0000000
>
>#Pearson correlation from my solution:
>resr<-do.call(rbind,lapply(split(bg_ag,1:nrow(bg_ag)),function(x)
> {res<-rcorr(cbind(bg[,x[,1]],ag[,x[,2]]))$r; row.names(res)<-
>rep(paste(x[1],x[2],sep="_"),2);res}))
> indx<-row(resr)%%2!=1
>resrnew<-as.matrix(resr[indx[,1],1])
>resrnew1<-data.frame(read.table(text=row.names(resrnew),sep="_",stringsAsFactors=FALSE),value=resrnew)
> row.names(resrnew1)<-1:nrow(resrnew1)
>library(reshape2)
> dcast(resrnew1,V2~V1,value.var="value") V2 Otu00022 Otu00029 Otu00039 Otu00042 Otu00101 Otu00105 Otu00125
>#1 ag1 -0.12705141 NaN -0.12705141 NaN -0.6394308 NaN NaN
>#2 ag2 -0.61522514 NaN -0.61522514 NaN 0.7743597 NaN NaN
>#3 ag3 -0.89010286 NaN -0.89010286 NaN -0.2655363 NaN NaN
>#4 ag4 0.03036290 NaN 0.03036290 NaN -0.5638320 NaN NaN
>#5 ag5 0.08266317 NaN 0.08266317 NaN -0.2582930 NaN NaN
> # Otu00131 Otu00137 Otu00155 Otu00158 Otu00172 Otu00181 Otu00185 Otu00190
>#1 NaN -0.12705141 NaN NaN NaN NaN NaN -0.12705141
>#2 NaN -0.61522514 NaN NaN NaN NaN NaN -0.61522514
>#3 NaN -0.89010286 NaN NaN NaN NaN NaN -0.89010286
>#4 NaN 0.03036290 NaN NaN NaN NaN NaN 0.03036290
>#5 NaN 0.08266317 NaN NaN NaN NaN NaN 0.08266317
> # Otu00209 Otu00218
>#1 NaN -0.12705141
>#2 NaN -0.61522514
>#3 NaN -0.89010286
>#4 NaN 0.03036290
>#5 NaN 0.08266317
>
>A.K.
>
>
>
>----- Original Message -----
>From: Ozgul Inceoglu <Ozgul.Inceoglu at ulb.ac.be>
>To: R help <r-help at r-project.org>
>Cc:
>Sent: Thursday, February 14, 2013 1:10 AM
>Subject: Re: [R] spearman correlation and p-value as a matrix
>
>Thank you all for the answers. I really need to learn a lot.
>Bu I also discover cor2m(x, y, trim = TRUE, alpha = 0.05)
>in ecodist package, which gives an output file with significant correlations.
>
>Özgül
>
>Yj>HI,
>>
>>#ag data was created
>>bg<- as.matrix(read.table(text="
>> Otu00022 Otu00029 Otu00039 Otu00042 Otu00101 Otu00105 Otu00125 Otu00131 Otu00137 Otu00155 Otu00158 Otu00172 Otu00181 Otu00185 Otu00190 Otu00209 Otu00218
>>Gi20Jun11 0.001217 0 0.001217 0 0.000000 0 0 0 0.001217 0 0 0 0 0 0.001217 0 0.001217
>>Gi40Jun11 0.000000 0 0.000000 0 0.000000 0 0 0 0.000000 0 0 0 0 0 0.000000 0 0.000000
>>Gi425Jun11 0.000000 0 0.000000 0 0.000000 0 0 0 0.000000 0 0 0 0 0 0.000000 0 0.000000
>>Gi45Jun11 0.000000 0 0.000000 0 0.001513 0 0 0 0.000000 0 0 0 0 0 0.000000 0 0.000000
>>Gi475Jun11 0.000000 0 0.000000 0 0.000000 0 0 0 0.000000 0 0 0 0 0 0.000000 0 0.000000
>>Gi50Jun11 0.000000 0 0.000000 0 0.000000 0 0 0 0.000000 0 0 0 0 0 0.000000 0 0.000000
>>",sep="",header=TRUE,stringsAsFactors=F))
>>set.seed(128)
>>ag<- matrix(rnorm(30),nrow=6)
>>colnames(ag)<- paste("ag",1:5,sep="")
>>bg_ag<-expand.grid(colnames(bg),colnames(ag),stringsAsFactors=FALSE)
>> attr(bg_ag,"out.attrs")<- NULL
>> library(Hmisc)
>>#correlation
>>resr<-do.call(rbind,lapply(split(bg_ag,1:nrow(bg_ag)),function(x) {res<-rcorr(cbind(bg[,x[,1]],ag[,x[,2]]),type="spearman")$r; row.names(res)<- rep(paste(x[1],x[2],sep="_"),2);res}))
>> head(resr)
>># [,1] [,2]
>>#Otu00022_ag1 1.0000000 0.1309307
>>#Otu00022_ag1 0.1309307 1.0000000
>>#Otu00029_ag1 1.0000000 NaN
>>#Otu00029_ag1 NaN 1.0000000
>>#Otu00039_ag1 1.0000000 0.1309307
>>#Otu00039_ag1 0.1309307 1.0000000
>>
>>#p-values
>>resP<-do.call(rbind,lapply(split(bg_ag,1:nrow(bg_ag)),function(x) {res<-rcorr(cbind(bg[,x[,1]],ag[,x[,2]]),type="spearman")$P; row.names(res)<- rep(paste(x[1],x[2],sep="_"),2);res}))
>> head(resP)
>># [,1] [,2]
>>#Otu00022_ag1 NA 0.8047262
>>#Otu00022_ag1 0.8047262 NA
>>#Otu00029_ag1 NA NaN
>>#Otu00029_ag1 NaN NA
>>#Otu00039_ag1 NA 0.8047262
>>#Otu00039_ag1 0.8047262 NA
>>
>>#If you need only the values
>>indx<-row(resr)%%2!=1
>> resPnew<-as.matrix(resP[indx[,1],1])
>> resrnew<-as.matrix(resr[indx[,1],1])
>>
>>head(resPnew)
>># [,1]
>>#Otu00022_ag1 0.8047262
>>#Otu00029_ag1 NaN
>>#Otu00039_ag1 0.8047262
>>#Otu00042_ag1 NaN
>>#Otu00101_ag1 0.1583024
>>#Otu00105_ag1 NaN
>>
>>head(resrnew)
>># [,1]
>>#Otu00022_ag1 0.1309307
>>#Otu00029_ag1 NaN
>>#Otu00039_ag1 0.1309307
>>#Otu00042_ag1 NaN
>>#Otu00101_ag1 -0.6546537
>>#Otu00105_ag1 NaN
>>
>>A.K.
>>
>>
>>
>>
>>----- Original Message -----
>>From: Ozgul Inceoglu <Ozgul.Inceoglu at ulb.ac.be>
>>To: r-help at r-project.org
>>Cc:
>>Sent: Wednesday, February 13, 2013 4:48 AM
>>Subject: [R] spearman correlation and p-value as a matrix
>>
>>I have two data matrices that I want to make the correlation between each column from data1 and each column from data 2 and also calculate the p-value Matrices dont have the same size and I tried such a script.
>>> bg <- read.table (file.choose(), header=T, row.names)
>>> bg
>>> Otu00022 Otu00029 Otu00039 Otu00042 Otu00101 Otu00105 Otu00125 Otu00131 Otu00137 Otu00155 Otu00158 Otu00172 Otu00181 Otu00185 Otu00190 Otu00209 Otu00218
>>> Gi20Jun11 0.001217 0 0.001217 0 0.000000 0 0 0 0.001217 0 0 0 0 0 0.001217 0 0.001217
>>> Gi40Jun11 0.000000 0 0.000000 0 0.000000 0 0 0 0.000000 0 0 0 0 0 0.000000 0 0.000000
>>> Gi425Jun11 0.000000 0 0.000000 0 0.000000 0 0 0 0.000000 0 0 0 0 0 0.000000 0 0.000000
>>> Gi45Jun11 0.000000 0 0.000000 0 0.001513 0 0 0 0.000000 0 0 0 0 0 0.000000 0 0.000000
>>> Gi475Jun11 0.000000 0 0.000000 0 0.000000 0 0 0 0.000000 0 0 0 0 0 0.000000 0 0.000000
>>> Gi50Jun11 0.000000 0 0.000000 0 0.000000 0 0 0 0.000000 0 0 0 0 0 0.000000 0 0.000000
>>ag <- read.table (file.choose(), header=T, row.names)
>>
>>for (i in 1:(ncol(bg)))
>>for (j in 1:(ncol(ag)))
>>print(c(i,j))
>>final_matrix <- matrix(rep("0",ncol(bg)*ncol(ag)),ncol=ncol(bg),nrow=ncol(ag))
>>
>>cor <- cor.test(as.vector(as.matrix(bg[,i])),as.vector(as.matrix(ag[,j])), method="spearman")
>>
>>#but the output is not matrice with all the values but a single correlation value
>>
>>data: bg[, i] and ag[, j]
>>t = 2.2992, df = 26, p-value = 0.02978
>>alternative hypothesis: true correlation is not equal to 0
>>95 percent confidence interval:
>>0.04485289 0.67986803
>>sample estimates:
>> cor
>>0.4110515
>>
>># How I can creat an outfile with all the correlations and p-values?
>>
>>Thank you very much!
>>Özgül
>>
>>______________________________________________
>>R-help at r-project.org mailing list
>>https://stat.ethz.ch/mailman/listinfo/r-help
>>PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
>>and provide commented, minimal, self-contained, reproducible code.
>>
>>
>>
>
>Özgül Inceoglu, PhD.
>
>Université Libre de Bruxelles
>Ecology of Aquatic Systems
>Campus de la plaine CP221
>Boulevard du triomphe
>1050 Bruxelles Belgium
>
>______________________________________________
>R-help at r-project.org mailing list
>https://stat.ethz.ch/mailman/listinfo/r-help
>PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
>and provide commented, minimal, self-contained, reproducible code.
>
>
>
Özgül Inceoglu, PhD.
Université Libre de Bruxelles
Ecology of Aquatic Systems
Campus de la plaine CP221
Boulevard du triomphe
1050 Bruxelles Belgium
More information about the R-help
mailing list