[R] reading data
arun
smartpink111 at yahoo.com
Wed Feb 20 01:29:43 CET 2013
Hi,
Try this:
files<-paste("MSMS_",23,"PepInfo.txt",sep="")
read.data<-function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,sep = "\t",stringsAsFactors=FALSE,fill=TRUE))}
lista<-do.call("c",lapply(list.files(recursive=T)[grep(files,list.files(recursive=T))],read.data))
names(lista)<-paste("group_",gsub("\\d+","",names(lista)),sep="")
res2<-split(lista,names(lista))
res3<- lapply(res2,function(x) {names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x})
#Freq whole data
res4<-lapply(seq_along(res3),function(i) do.call(rbind,lapply(res3[[i]],function(x) as.data.frame(table(factor(x$z,levels=1:3))))))
names(res4)<- names(res2)
library(reshape2)
freq.i1<-do.call(rbind,lapply(res4,function(x) dcast(melt(data.frame(id=gsub("\\..*","",row.names(x)),x),id.var=c("id","Var1")),id~Var1,value.var="value")))
freq.i1
# id 1 2 3
#group_a a1 1 12 6
#group_c.1 c1 0 10 3
#group_c.2 c2 0 12 3
#group_c.3 c3 0 13 4
#group_t.1 t1 0 10 4
#group_t.2 t2 1 12 6
freq.rel.i1<- as.matrix(freq.i1[,-1]/rowSums(freq.i1[,-1]) )
freq.rel.i1
# 1 2 3
#group_a 0.05263158 0.6315789 0.3157895
#group_c.1 0.00000000 0.7692308 0.2307692
#group_c.2 0.00000000 0.8000000 0.2000000
#group_c.3 0.00000000 0.7647059 0.2352941
#group_t.1 0.00000000 0.7142857 0.2857143
#group_t.2 0.05263158 0.6315789 0.3157895
#Freq with FDR< 0.01
res5<-lapply(seq_along(res3),function(i) do.call(rbind,lapply(res3[[i]],function(x) as.data.frame(table(factor(x$z[x[["FDR"]]<0.01],levels=1:3))))))
names(res5)<- names(res2)
freq.f1<- do.call(rbind,lapply(res5,function(x) dcast(melt(data.frame(id=gsub("\\..*","",row.names(x)),x),id.var=c("id","Var1")),id~Var1,value.var="value")))
freq.f1
# id 1 2 3
#group_a a1 1 10 5
#group_c.1 c1 0 7 2
#group_c.2 c2 0 8 2
#group_c.3 c3 0 6 4
#group_t.1 t1 0 7 4
#group_t.2 t2 1 10 5
freq.rel.f1<- as.matrix(freq.f1[,-1]/rowSums(freq.f1[,-1]))
colour<-sample(rainbow(nrow(freq.rel.i1)))
par(mfrow=c(1,2))
barplot(freq.rel.i1,beside=T,main=("Sample"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.i1))
barplot(freq.rel.f1,beside=T,main=("Sample with FDR<0.01"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.f1))
#change the legend position
Also, didn't check the rest of the code from chisquare test.
A.K.
________________________________
From: Vera Costa <veracosta.rt at gmail.com>
To: arun <smartpink111 at yahoo.com>
Sent: Tuesday, February 19, 2013 4:19 PM
Subject: Re: reading data
Here is the code and some outputs.
z.plot <- function(directory,number) {
#reading data
setwd(directory)
direct<-dir(directory,pattern = paste("MSMS_",number,"PepInfo.txt",sep=""), full.names = FALSE, recursive = TRUE)
directT <- direct[grepl("^t", direct)]
directC <- direct[grepl("^c", direct)]
lista<-lapply(direct, function(x) read.table(x,header=TRUE, sep = "\t"))
listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = "\t"))
listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = "\t"))
#count different z values
cab <- vector()
for (i in 1:length(lista)) {
dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),]
dc<-table(dc$z)
cab <- c(cab, names(dc))
}
#Relative freqs to construct the graph
cab <- unique(cab)
print(cab)
###[1] "2" "3" "1"
d <- matrix(ncol=length(cab))
dci<- d[-1,]
dcf <- d[-1,]
dti <- d[-1,]
dtf <- d[-1,]
for (i in 1:length(listaC)) {
#Relative freq of all data
dcc<-listaC[[i]]
dcc<-table(factor(dcc$z, levels=cab))
dci<- rbind(dci, dcc)
rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c")
#Relative freq of data with FDR<0.01
dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),]
dcc1<-table(factor(dcc1$z, levels=cab))
dcf<- rbind(dcf,dcc1)
rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c")
}
for (i in 1:length(listaT)) {
#Relative freq of all data
dct<-listaT[[i]]
dct<-table(factor(dct$z, levels=cab))
dti<- rbind(dti, dct)
rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "t")
#Relative freq of data with FDR<0.01
dct1<-listaT[[i]][ifelse(listaT[[i]]$FDR<0.01, TRUE, FALSE),]
dct1<-table(factor(dct1$z, levels=cab))
dtf<- rbind(dtf,dct1)
rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "t")
}
freq.i<-rbind(dci,dti)
freq.f<-rbind(dcf,dtf)
freq.rel.i<-freq.i/apply(freq.i,1,sum)
freq.rel.f<-freq.f/apply(freq.f,1,sum)
print(freq.i)
## 2 3 1
#c1 10 3 0
#c2 12 3 0
#c3 13 4 0
#t1 10 4 0
#t2 12 6 1
print(freq.f)
### 2 3 1
#c1 7 2 0
#c2 8 2 0
#c3 6 4 0
#t1 7 4 0
#t2 10 5 1
print(freq.rel.i)
### 2 3 1
#c1 0.7692308 0.2307692 0.00000000
#c2 0.8000000 0.2000000 0.00000000
#c3 0.7647059 0.2352941 0.00000000
#t1 0.7142857 0.2857143 0.00000000
#t2 0.6315789 0.3157895 0.05263158
print(freq.rel.f)
### 2 3 1
#c1 0.7777778 0.2222222 0.0000
#c2 0.8000000 0.2000000 0.0000
#c3 0.6000000 0.4000000 0.0000
#t1 0.6363636 0.3636364 0.0000
#t2 0.6250000 0.3125000 0.0625
#Graph plot
colour<-sample(rainbow(nrow(freq.rel.i)))
par(mfrow=c(1,2))
barplot(freq.rel.i,beside=T,main=("Sample"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.i))
barplot(freq.rel.f,beside=T,main=("Sample with FDR<0.01"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.f))
#average of the group (except c1&t1)
freqs<-rbind(dcf[-1,], dtf[-1,])
average<-apply(freqs,2,mean)
print(average)
### 2 3 1
#8.0000000 3.6666667 0.3333333
#chisquare test function
chisq.test<-function(x,y){
somax<-sum(x)
somay<-sum(y)
nj.<-x+y
nj<-sum(nj.)
ejx<-(nj./nj)*somax
ejy<-(nj./nj)*somay
ETx<-((x-ejx)^2)/ejx
ETy<-((y-ejy)^2)/ejy
ETobs<-sum(ETx)+sum(ETy)
pvalue<-1-pchisq(c(ETobs),df=length(x|y)-1,lower.tail=TRUE)
return(pvalue)
}
#pvalues of the chisquare test between sample and average (H0: two samples has the same distribution)
pvalues<-c()
for (i in 1:(nrow(freqs))){
a<-chisq.test(freqs[i,],average)
pvalues<-c(pvalues,a)
}
#data frame with final p-values
dataframe<-data.frame(c(rownames(freqs)), c(pvalues))
colnames(dataframe)<-c("sample name","pvalue")
print(dataframe)
### sample name pvalue
#1 c2 0.7235907
#2 c3 0.7963287
#3 0.9079200
}
z.plot("C:/Users/Vera Costa/Desktop/dados",23)
###and two barplots..
Here, I remove the group a1.
Thank you
2013/2/19 arun <smartpink111 at yahoo.com>
Hi,
>
>Could you send the results for the folder that was sent to me? It will be easy for me.
>
>Arun
>
>
>
>
>
>
>________________________________
>From: Vera Costa <veracosta.rt at gmail.com>
>To: arun <smartpink111 at yahoo.com>
>Sent: Tuesday, February 19, 2013 3:47 PM
>
>Subject: Re: reading data
>
>
>Oh sorry, I change the folder.
>
>I send for your folder
>
>
>
>2013/2/19 arun <smartpink111 at yahoo.com>
>
>Hello,
>>
>>
>> Regarding the results, is it from the same folder that you sent to me??
>>I am getting different results by running your steps.
>>
>>
>>direct<- list.files(recursive=TRUE)
>> direct
>>#[1] "a1/MSMS_23PepInfo.txt" "c1/MSMS_23PepInfo.txt" "c2/MSMS_23PepInfo.txt"
>>#[4] "c3/MSMS_23PepInfo.txt" "t1/MSMS_23PepInfo.txt" "t2/MSMS_23PepInfo.txt"
>>
>> directT<- list.files(recursive=TRUE)[grepl("^t",dir())]
>>
>>directT
>>#[1] "t1/MSMS_23PepInfo.txt" "t2/MSMS_23PepInfo.txt"
>>
>>
>>directC<- list.files(recursive=TRUE)[grepl("^c",dir())]
>>
>>directC
>>#[1] "c1/MSMS_23PepInfo.txt" "c2/MSMS_23PepInfo.txt" "c3/MSMS_23PepInfo.txt"
>>
>>
>>
>>lista<- lapply(direct,function(x) read.table(x,header=TRUE,stringsAsFactors=FALSE,sep="\t",fill=TRUE))
>>
>>listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = "\t",fill=TRUE))
>>listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = "\t",fill=TRUE))
>>
>> #count different z values
>> cab <- vector()
>> for (i in 1:length(lista)) {
>> dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),]
>> dc<-table(dc$z)
>> cab <- c(cab, names(dc))
>> }
>>
>> #Relative freqs to construct the graph
>> cab <- unique(cab)
>> print(cab)
>>
>>#[1] "1" "2" "3" #Here results are not correct
>>
>>
>>d <- matrix(ncol=length(cab))
>> dci<- d[-1,]
>> dcf <- d[-1,]
>> dti <- d[-1,]
>> dtf <- d[-1,]
>>
>> for (i in 1:length(listaC)) {
>>
>> #Relative freq of all data
>> dcc<-listaC[[i]]
>> dcc<-table(factor(dcc$z, levels=cab))
>> dci<- rbind(dci, dcc)
>> rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c")
>>
>>
>> #Relative freq of data with FDR<0.01
>> dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),]
>> dcc1<-table(factor(dcc1$z, levels=cab))
>> dcf<- rbind(dcf,dcc1)
>> rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c")
>> }
>> print(dci) #here too.
>>
>># 1 2 3
>>#c1 0 10 3
>>#c2 0 12 3
>>#c3 0 13 4
>>
>>
>>It is important to clear this before I make any changes to the script. You need to send me the output of the same data folder to understand what is going on.
>>
>>
>>Arun
>>________________________________
>>From: Vera Costa <veracosta.rt at gmail.com>
>>To: arun <smartpink111 at yahoo.com>
>>Sent: Tuesday, February 19, 2013 9:24 AM
>>
>>Subject: Re: reading data
>>
>>
>>Ok.
>>
>>Here is the code and some outputs.
>>
>>z.plot <- function(directory,number) {
>> #reading data
>> setwd(directory)
>> direct<-dir(directory,pattern = paste("MSMS_",number,"PepInfo.txt",sep=""), full.names = FALSE, recursive = TRUE)
>> directT <- direct[grepl("^t", direct)]
>> directC <- direct[grepl("^c", direct)]
>>
>> lista<-lapply(direct, function(x) read.table(x,header=TRUE, sep = "\t"))
>> listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = "\t"))
>> listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = "\t"))
>>
>> #count different z values
>> cab <- vector()
>> for (i in 1:length(lista)) {
>> dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),]
>> dc<-table(dc$z)
>> cab <- c(cab, names(dc))
>> }
>>
>> #Relative freqs to construct the graph
>> cab <- unique(cab)
>> print(cab)
>>
>>###[1] "1" "2" "3" "4" "5"
>>
>>
>>
>> d <- matrix(ncol=length(cab))
>> dci<- d[-1,]
>> dcf <- d[-1,]
>> dti <- d[-1,]
>> dtf <- d[-1,]
>>
>> for (i in 1:length(listaC)) {
>>
>> #Relative freq of all data
>> dcc<-listaC[[i]]
>> dcc<-table(factor(dcc$z, levels=cab))
>> dci<- rbind(dci, dcc)
>> rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c")
>>
>>
>> #Relative freq of data with FDR<0.01
>> dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),]
>> dcc1<-table(factor(dcc1$z, levels=cab))
>> dcf<- rbind(dcf,dcc1)
>> rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c")
>> }
>> print(dci)
>>
>>### 1 2 3 4 5
>>#c1 93 8356 3621 450 55
>>#c2 108 13513 6859 793 73
>>#c3 97 13526 6724 739 82
>>#c4 101 13417 6574 761 62
>>
>> print(dcf)
>>
>>### 1 2 3 4 5
>>#c1 10 4576 2100 199 17
>>#c2 7 7831 4039 314 23
>>#c3 16 7887 4087 286 22
>>#c4 20 7824 4045 311 20
>>
>> for (i in 1:length(listaT)) {
>>
>> #Relative freq of all data
>> dct<-listaT[[i]]
>> dct<-table(factor(dct$z, levels=cab))
>> dti<- rbind(dti, dct)
>> rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "t")
>>
>>
>> #Relative freq of data with FDR<0.01
>> dct1<-listaT[[i]][ifelse(listaT[[i]]$FDR<0.01, TRUE, FALSE),]
>> dct1<-table(factor(dct1$z, levels=cab))
>> dtf<- rbind(dtf,dct1)
>> rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "t")
>> }
>>
>> print(dti)
>>
>>### 1 2 3 4 5
>>#t1 32 8640 4098 429 36
>>#t2 128 13209 6723 788 75
>>#t3 85 13043 6691 754 82
>>#t4 139 13750 7036 807 84
>>
>> print(dtf)
>>
>>
>>#### 1 2 3 4 5
>>#t1 5 4885 2571 196 8
>>#t2 12 7752 4209 360 28
>>#t3 19 7563 4086 336 18
>>#t4 14 8108 4218 312 26
>>
>>
>> freq.i<-rbind(dci,dti)
>> freq.f<-rbind(dcf,dtf)
>> freq.rel.i<-freq.i/apply(freq.i,1,sum)
>> freq.rel.f<-freq.f/apply(freq.f,1,sum)
>> print(freq.i)
>>## 1 2 3 4 5
>>#c1 93 8356 3621 450 55
>>#c2 108 13513 6859 793 73
>>#c3 97 13526 6724 739 82
>>#c4 101 13417 6574 761 62
>>#t1 32 8640 4098 429 36
>>#t2 128 13209 6723 788 75
>>#t3 85 13043 6691 754 82
>>#t4 139 13750 7036 807 84
>>
>> print(freq.f)
>> ### 1 2 3 4 5
>>#c1 10 4576 2100 199 17
>>#c2 7 7831 4039 314 23
>>#c3 16 7887 4087 286 22
>>#c4 20 7824 4045 311 20
>>#t1 5 4885 2571 196 8
>>#t2 12 7752 4209 360 28
>>#t3 19 7563 4086 336 18
>>#t4 14 8108 4218 312 26
>>
>> print(freq.rel.i)
>>### 1 2 3 4 5
>>#c1 0.007395626 0.6644930 0.2879523 0.03578529 0.004373757
>>#c2 0.005059496 0.6330460 0.3213248 0.03714982 0.003419844
>>#c3 0.004582389 0.6389834 0.3176493 0.03491119 0.003873772
>>#c4 0.004829070 0.6415013 0.3143199 0.03638537 0.002964380
>>#t1 0.002417832 0.6528145 0.3096335 0.03241405 0.002720060
>>#t2 0.006117670 0.6313148 0.3213210 0.03766190 0.003584572
>>#t3 0.004115226 0.6314694 0.3239409 0.03650448 0.003969983
>>#t4 0.006371470 0.6302714 0.3225156 0.03699120 0.003850385
>> print(freq.rel.f)
>>
>>### 1 2 3 4 5
>>#c1 0.0014488554 0.6629962 0.3042596 0.02883222 0.002463054
>>#c2 0.0005731128 0.6411495 0.3306861 0.02570820 0.001883085
>>#c3 0.0013010246 0.6413238 0.3323305 0.02325581 0.001788909
>>#c4 0.0016366612 0.6402619 0.3310147 0.02545008 0.001636661
>>#t1 0.0006523157 0.6373125 0.3354207 0.02557078 0.001043705
>>#t2 0.0009707952 0.6271337 0.3405064 0.02912386 0.002265189
>>#t3 0.0015804359 0.6290967 0.3398769 0.02794876 0.001497255
>>#t4 0.0011042751 0.6395330 0.3327023 0.02460956 0.002050797
>>
>>#Graph plot
>>colour<-sample(rainbow(nrow(freq.rel.i)))
>>par(mfrow=c(1,2))
>>barplot(freq.rel.i,beside=T,main=("Sample"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.i))
>>barplot(freq.rel.f,beside=T,main=("Sample with FDR<0.01"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.f))
>>
>>#average of the group (except c1&t1)
>>freqs<-rbind(dcf[-1,], dtf[-1,])
>>average<-apply(freqs,2,mean)
>>print(average)
>>
>>### 1 2 3 4 5
>> # 14.66667 7827.50000 4114.00000 319.83333 22.83333
>>
>>#chisquare test function
>>chisq.test<-function(x,y){
>> somax<-sum(x)
>> somay<-sum(y)
>> nj.<-x+y
>> nj<-sum(nj.)
>> ejx<-(nj./nj)*somax
>> ejy<-(nj./nj)*somay
>> ETx<-((x-ejx)^2)/ejx
>> ETy<-((y-ejy)^2)/ejy
>> ETobs<-sum(ETx)+sum(ETy)
>> pvalue<-1-pchisq(c(ETobs),df=length(x|y)-1,lower.tail=TRUE)
>> return(pvalue)
>> }
>>
>>#pvalues of the chisquare test between sample and average (H0: two samples has the same distribution)
>>pvalues<-c()
>>for (i in 1:(nrow(freqs))){
>>a<-chisq.test(freqs[i,],average)
>>pvalues<-c(pvalues,a)
>>}
>>print(pvalues)
>>##[1] 0.5307206 0.6849480 0.8332661 0.3474956 0.5546527 0.9387602
>>
>>#data frame with final p-values
>>dataframe<-data.frame(c(rownames(freqs)), c(pvalues))
>>colnames(dataframe)<-c("sample name","pvalue")
>>print(dataframe)
>>
>>### sample name pvalue
>>#1 c2 0.5307206
>>#2 c3 0.6849480
>>#3 c4 0.8332661
>>#4 t2 0.3474956
>>#5 t3 0.5546527
>>#6 t4 0.9387602
>>}
>>z.plot("C:/Users/Vera Costa/Desktop/dados",23)
>>
>>###and two barplots...
>>
>>Thank you
>>
>>
>>
>>
>>2013/2/19 arun <smartpink111 at yahoo.com>
>>
>>Got it.
>>>
>>>So, if I run your codes that you sent yesterday, will I get the correct results for relative frequency etc. It would be also great if you can sent me the output generated using your codes (on two groups as you showed yesterday). It will help me in checking results much faster than running your code and see if that is the result (because I have to do some adjustment to your code for running in linux especially the ?dir()).
>>>
>>>I may be able to run it only later.
>>>
>>>Arun
>>>
>>>
>>>
>>>
>>>
>>>
>>>________________________________
>>>From: Vera Costa <veracosta.rt at gmail.com>
>>>To: arun <smartpink111 at yahoo.com>
>>>Sent: Tuesday, February 19, 2013 8:53 AM
>>>
>>>Subject: Re: reading data
>>>
>>>
>>>I sent in second email.
>>>
>>>But I send again.
>>>
>>>
>>>
>>>2013/2/19 arun <smartpink111 at yahoo.com>
>>>
>>>
>>>>
>>>>Your attachment didn't came through.
>>>>
>>>>Arun
>>>>
>>>>
>>>>
>>>>
>>>>________________________________
>>>>From: Vera Costa <veracosta.rt at gmail.com>
>>>>To: arun <smartpink111 at yahoo.com>
>>>>Sent: Tuesday, February 19, 2013 8:47 AM
>>>>
>>>>Subject: Re: reading data
>>>>
>>>>
>>>>Sorry about a lot of questions.
>>>>
>>>>I attach a small part of my real data (I have a lot of row).
>>>>
>>>>My main objective is construct two graph. The first with the relative frequencies of each group (c1,c2,c3....). The second with the same frequencies but with FDR<0.01.
>>>>
>>>>After that I need to do the average in each group (but without the first group-c1,t1,a1....) and do the qui square test to see if the groups has the same distribution. You understand?
>>>>
>>>>At first, I had only two groups, and I did the code that I sent you. But I need a general code, not for two groups that I know the names, but for all groups (sometimes I can have 7 or 8 or 9 groups).
>>>>
>>>>it´s better now my explanation? :-)
>>>>My English isn't also very good :-)
>>>>
>>>>Please not publish this data in forum...
>>>>
>>>>Thank you
>>>>
>>>>
>>>>
>>>>
>>>>2013/2/18 arun <smartpink111 at yahoo.com>
>>>>
>>>>Hi,
>>>>>
>>>>>I run the codes to understand what was going on.
>>>>>
>>>>>I didn't fully understand it as you constructed the codes for your original dataset and not for the 'data` directory you sent to me.
>>>>>
>>>>>A.K.
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>________________________________
>>>>>From: Vera Costa <veracosta.rt at gmail.com>
>>>>>To: arun <smartpink111 at yahoo.com>
>>>>>Sent: Monday, February 18, 2013 4:02 PM
>>>>>
>>>>>Subject: Re: reading data
>>>>>
>>>>>
>>>>>Thank you.
>>>>>I don't need the same,but equivalent. I will try your suggestions.
>>>>>Thank you.
>>>>>No dia 18 de Fev de 2013 19:41, "arun" <smartpink111 at yahoo.com> escreveu:
>>>>>
>>>>>Hi,
>>>>>>I am not able to open your graph. I am using linux.
>>>>>>
>>>>>>Also, the codes in the function are not reproducible
>>>>>> directT <- direct[grepl("^t", direct)]
>>>>>> directC <- direct[grepl("^c", direct)]
>>>>>>
>>>>>>It takes double the time to know what is going on.
>>>>>>
>>>>>>dir()
>>>>>>#[1] "a1" "a2" "a3" "b1" "b2" "c1"
>>>>>>
>>>>>>direct<- list.files(recursive=TRUE)[grepl("^a|^b",dir())]
>>>>>>
>>>>>> direct
>>>>>>#[1] "MSMS_23PepInfo.txt" "MSMS_23PepInfo.txt" "MSMS_23PepInfo.txt"
>>>>>>#[4] "MSMS_23PepInfo.txt" "MSMS_23PepInfo.txt"
>>>>>>directA<- list.files(recursive=TRUE)[grepl("^a",dir())]
>>>>>>directB<- list.files(recursive=TRUE)[grepl("^b",dir())]
>>>>>>lista<- lapply(direct,function(x) read.table(x,header=TRUE,stringsAsFactors=FALSE,sep="\t",fill=TRUE))
>>>>>>
>>>>>>listaA<-lapply(directA, function(x) read.table(x,header=TRUE, sep = "\t",fill=TRUE))
>>>>>>listaB<-lapply(directB, function(x) read.table(x,header=TRUE, sep = "\t",fill=TRUE))
>>>>>>
>>>>>>#here I am changing the names listaT, z, etc..
>>>>>>
>>>>>>count different mm values
>>>>>> cab <- vector()
>>>>>> for (i in 1:length(lista)) {
>>>>>> dc<-lista[[i]][ifelse(lista[[i]]$b<0.01, TRUE, FALSE),]
>>>>>> dc<-table(dc$mm)
>>>>>> cab <- c(cab, names(dc))
>>>>>> }
>>>>>>
>>>>>> #Relative freqs to construct the graph
>>>>>> cab <- unique(cab)
>>>>>> d <- matrix(ncol=length(cab))
>>>>>> dci<- d[-1,]
>>>>>> dcf <- d[-1,]
>>>>>> dti <- d[-1,]
>>>>>> dtf <- d[-1,]
>>>>>>
>>>>>> ########################################
>>>>>> for (i in 1:length(listaA)) {
>>>>>>
>>>>>> #Relative freq of all data
>>>>>> dcc<-listaA[[i]]
>>>>>> dcc<-table(factor(dcc$mm, levels=cab))
>>>>>> dci<- rbind(dci, dcc)
>>>>>> rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "a")
>>>>>>
>>>>>>
>>>>>> #Relative freq of data with FDR<0.01
>>>>>> dcc1<-listaA[[i]][ifelse(listaA[[i]]$FDR<0.01, TRUE, FALSE),]
>>>>>> dcc1<-table(factor(dcc1$mm, levels=cab))
>>>>>> dcf<- rbind(dcf,dcc1)
>>>>>> rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "a")
>>>>>> }
>>>>>>
>>>>>> for (i in 1:length(listaB)) {
>>>>>>
>>>>>> #Relative freq of all data
>>>>>> dct<-listaB[[i]]
>>>>>> dct<-table(factor(dct$mm, levels=cab))
>>>>>> dti<- rbind(dti, dct)
>>>>>> rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "b")
>>>>>>
>>>>>>
>>>>>> #Relative freq of data with FDR<0.01
>>>>>> dct1<-listaB[[i]][ifelse(listaB[[i]]$FDR<0.01, TRUE, FALSE),]
>>>>>> dct1<-table(factor(dct1$mm, levels=cab))
>>>>>> dtf<- rbind(dtf,dct1)
>>>>>> rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "b")
>>>>>> }
>>>>>> freq.i<-rbind(dci,dti)
>>>>>> freq.f<-rbind(dcf,dtf)
>>>>>> freq.rel.i<-freq.i/apply(freq.i,1,sum)
>>>>>> freq.rel.f<-freq.f/apply(freq.f,1,sum)
>>>>>>
>>>>>>
>>>>>> freq.i
>>>>>># 2 3
>>>>>>#a1 4 1
>>>>>>#a2 4 1
>>>>>>#a3 4 1
>>>>>>#b1 4 1
>>>>>>#b2 4 1
>>>>>>#b3 4 1
>>>>>>#b4 4 1
>>>>>>#result from my code.
>>>>>> files<-paste("MSMS_",23,"PepInfo.txt",sep="")
>>>>>>read.data<-function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,sep = "\t",stringsAsFactors=FALSE,fill=TRUE))}
>>>>>>lista<-do.call("c",lapply(list.files(recursive=T)[grep(files,list.files(recursive=T))],read.data))
>>>>>>names(lista)<-paste("group_",gsub("\\d+","",names(lista)),sep="")
>>>>>>
>>>>>>res2<-split(lista,names(lista))
>>>>>>res3<- lapply(res2,function(x) {names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x})
>>>>>>res4<-lapply(seq_along(res3),function(i) do.call(rbind,lapply(res3[[i]], function(x) table(x$mm[x[["b"]]<0.01]))))
>>>>>> names(res4)<- names(res2)
>>>>>>
>>>>>>
>>>>>>res4
>>>>>>$group_a
>>>>>># 2 3
>>>>>>#a1 3 1
>>>>>>#a2 3 1
>>>>>>#a3 3 1
>>>>>>
>>>>>>#$group_b
>>>>>> # 2 3
>>>>>>#b1 3 1
>>>>>>#b2 3 1
>>>>>>
>>>>>>#$group_c
>>>>>> # 2 3
>>>>>>#c1 3 1
>>>>>>
>>>>>>There is a difference in output from freq.i and res4. There were only two files under 'group_b`. So, check your codes.
>>>>>>A.K.
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>________________________________
>>>>>>From: Vera Costa <veracosta.rt at gmail.com>
>>>>>>To: arun <smartpink111 at yahoo.com>
>>>>>>Sent: Monday, February 18, 2013 10:27 AM
>>>>>>Subject: Re: reading data
>>>>>>
>>>>>>
>>>>>>Hi!!!
>>>>>>
>>>>>>I'm coming to ask a new question.
>>>>>>
>>>>>>I want a function to do my statistics. I start with you had send me:
>>>>>>
>>>>>>z.plot <- function(directory,number) {
>>>>>> setwd(directory)
>>>>>> indx<-gsub("[./]","",list.dirs())
>>>>>> indx1<- indx[indx!=""]
>>>>>> print(indx1)
>>>>>> files<-paste("MSMS_",number,"PepInfo.txt",sep="")
>>>>>> read.data<-function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,sep = "\t",stringsAsFactors=FALSE,fill=TRUE))}
>>>>>> lista<-do.call("c",lapply(list.files(recursive=T)[grep(files,list.files(recursive=T))],read.data))
>>>>>> print(lista)
>>>>>> #names(lista)<-paste("group_",gsub("\\d+","",names(lista)),sep="") ve = TRUE)
>>>>>> }
>>>>>>z.plot("C:/Users/Vera Costa/Desktop/dados.lixo",23)
>>>>>>
>>>>>>
>>>>>>In my lista I can´t merge rows to have the group, because the idea is for each file count frequencies of mm, when b<0.01. after that I want a graph like the graph in attach.
>>>>>>
>>>>>>
>>>>>>When I had 2 groups and knew the name of the groups, I did the code (but Know I have more groups and, maybe, I don´t know the name of the groups):
>>>>>>
>>>>>>z.plot <- function(directory,number) {
>>>>>> #reading data
>>>>>> setwd(directory)
>>>>>> direct<-dir(directory,pattern = paste("MSMS_",number,"PepInfo.txt",sep=""), full.names = FALSE, recursive = TRUE)
>>>>>> directT <- direct[grepl("^t", direct)]
>>>>>> directC <- direct[grepl("^c", direct)]
>>>>>>
>>>>>> lista<-lapply(direct, function(x) read.table(x,header=TRUE, sep = "\t"))
>>>>>> listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = "\t"))
>>>>>> listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = "\t"))
>>>>>>
>>>>>> #count different z values
>>>>>> cab <- vector()
>>>>>> for (i in 1:length(lista)) {
>>>>>> dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),]
>>>>>> dc<-table(dc$z)
>>>>>> cab <- c(cab, names(dc))
>>>>>> }
>>>>>>
>>>>>> #Relative freqs to construct the graph
>>>>>> cab <- unique(cab)
>>>>>> d <- matrix(ncol=length(cab))
>>>>>> dci<- d[-1,]
>>>>>> dcf <- d[-1,]
>>>>>> dti <- d[-1,]
>>>>>> dtf <- d[-1,]
>>>>>>
>>>>>> for (i in 1:length(listaC)) {
>>>>>>
>>>>>> #Relative freq of all data
>>>>>> dcc<-listaC[[i]]
>>>>>> dcc<-table(factor(dcc$z, levels=cab))
>>>>>> dci<- rbind(dci, dcc)
>>>>>> rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c")
>>>>>>
>>>>>>
>>>>>> #Relative freq of data with FDR<0.01
>>>>>> dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),]
>>>>>> dcc1<-table(factor(dcc1$z, levels=cab))
>>>>>> dcf<- rbind(dcf,dcc1)
>>>>>> rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c")
>>>>>> }
>>>>>>
>>>>>> for (i in 1:length(listaT)) {
>>>>>>
>>>>>> #Relative freq of all data
>>>>>> dct<-listaT[[i]]
>>>>>> dct<-table(factor(dct$z, levels=cab))
>>>>>> dti<- rbind(dti, dct)
>>>>>> rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "t")
>>>>>>
>>>>>>
>>>>>> #Relative freq of data with FDR<0.01
>>>>>> dct1<-listaT[[i]][ifelse(listaT[[i]]$FDR<0.01, TRUE, FALSE),]
>>>>>> dct1<-table(factor(dct1$z, levels=cab))
>>>>>> dtf<- rbind(dtf,dct1)
>>>>>> rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "t")
>>>>>> }
>>>>>> freq.i<-rbind(dci,dti)
>>>>>> freq.f<-rbind(dcf,dtf)
>>>>>> freq.rel.i<-freq.i/apply(freq.i,1,sum)
>>>>>> freq.rel.f<-freq.f/apply(freq.f,1,sum)
>>>>>>
>>>>>>#Graph plot
>>>>>>colour<-sample(rainbow(nrow(freq.rel.i)))
>>>>>>par(mfrow=c(1,2))
>>>>>>barplot(freq.rel.i,beside=T,main=("Sample"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.i))
>>>>>>barplot(freq.rel.f,beside=T,main=("Sample with FDR<0.01"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.f))
>>>>>>#average of the group (except c1&t1)
>>>>>>freqs<-rbind(dcf[-1,], dtf[-1,])
>>>>>>average<-apply(freqs,2,mean)
>>>>>>
>>>>>>#chisquare test function
>>>>>>chisq.test<-function(x,y){
>>>>>> somax<-sum(x)
>>>>>> somay<-sum(y)
>>>>>> nj.<-x+y
>>>>>> nj<-sum(nj.)
>>>>>> ejx<-(nj./nj)*somax
>>>>>> ejy<-(nj./nj)*somay
>>>>>> ETx<-((x-ejx)^2)/ejx
>>>>>> ETy<-((y-ejy)^2)/ejy
>>>>>> ETobs<-sum(ETx)+sum(ETy)
>>>>>> pvalue<-1-pchisq(c(ETobs),df=length(x|y)-1,lower.tail=TRUE)
>>>>>> return(pvalue)
>>>>>> }
>>>>>>
>>>>>>#pvalues of the chisquare test between sample and average (H0: two samples has the same distribution)
>>>>>>pvalues<-c()
>>>>>>for (i in 1:(nrow(freqs))){
>>>>>>a<-chisq.test(freqs[i,],average)
>>>>>>pvalues<-c(pvalues,a)
>>>>>>}
>>>>>>#data frame with final p-values
>>>>>>dataframe<-data.frame(c(rownames(freqs)), c(pvalues))
>>>>>>colnames(dataframe)<-c("sample name","pvalue")
>>>>>>print(dataframe)
>>>>>>}
>>>>>>z.plot("C:/Users/Vera/Desktop/data",23)
>>>>>>
>>>>>>
>>>>>>
>>>>>>Thank you again
>>>>>>
>>>>>>
>>>>>>
>>>>>>2013/2/17 arun <smartpink111 at yahoo.com>
>>>>>>
>>>>>>HI Vera,
>>>>>>>
>>>>>>>No problem. I am cc:ing to r-help.
>>>>>>>
>>>>>>>A.K.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>________________________________
>>>>>>>From: Vera Costa <veracosta.rt at gmail.com>
>>>>>>>To: arun <smartpink111 at yahoo.com>
>>>>>>>Sent: Sunday, February 17, 2013 5:44 AM
>>>>>>>Subject: Re: reading data
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>Hi. Thank you. It works now:-)
>>>>>>>And yes, I use windows.
>>>>>>>Thank you very much.
>>>>>>>No dia 17 de Fev de 2013 00:44, "arun" <smartpink111 at yahoo.com> escreveu:
>>>>>>>
>>>>>>>Hi Vera,
>>>>>>>>
>>>>>>>>Have you tried the suggestion?
>>>>>>>>
>>>>>>>>Are you using Windows?
>>>>>>>>Thanks,
>>>>>>>>Arun
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>________________________________
>>>>>>>>From: Vera Costa <veracosta.rt at gmail.com>
>>>>>>>>To: arun <smartpink111 at yahoo.com>
>>>>>>>>Sent: Saturday, February 16, 2013 7:10 PM
>>>>>>>>Subject: Re: reading data
>>>>>>>>
>>>>>>>>
>>>>>>>>Thank you.
>>>>>>>>In mine, I have an error " 'what' must be a character string or a function".
>>>>>>>>I need to do equivalent in my system.
>>>>>>>>Thank you and sorry one more time.
>>>>>>>>No dia 16 de Fev de 2013 23:53, "arun" <smartpink111 at yahoo.com> escreveu:
>>>>>>>>
>>>>>>>>Hi,
>>>>>>>>>You didn't mention what the error message or whether you are reading file names which are not "mmmmm11kk.txt".
>>>>>>>>>
>>>>>>>>>It is workiing on my system as I run it again.
>>>>>>>>>?c() combine values into a vector or list.
>>>>>>>>>
>>>>>>>>> sessionInfo()
>>>>>>>>>R version 2.15.1 (2012-06-22)
>>>>>>>>>Platform: x86_64-pc-linux-gnu (64-bit)
>>>>>>>>>
>>>>>>>>>locale:
>>>>>>>>> [1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C
>>>>>>>>> [3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8
>>>>>>>>> [5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8
>>>>>>>>> [7] LC_PAPER=C LC_NAME=C
>>>>>>>>> [9] LC_ADDRESS=C LC_TELEPHONE=C
>>>>>>>>>[11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
>>>>>>>>>
>>>>>>>>>attached base packages:
>>>>>>>>>[1] stats graphics grDevices utils datasets methods base
>>>>>>>>>
>>>>>>>>>other attached packages:
>>>>>>>>>[1] stringr_0.6.2 reshape2_1.2.2
>>>>>>>>>
>>>>>>>>>loaded via a namespace (and not attached):
>>>>>>>>>[1] plyr_1.8
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>#code
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>res<-do.call(c,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})) #it seems like one of the rows of your file doesn't have 6 elements, so added fill=TRUE
>>>>>>>>> names(res)<-paste("group_",gsub("\\d+","",names(res)),sep="")
>>>>>>>>>res2<-split(res,names(res))
>>>>>>>>>res3<- lapply(res2,function(x) {names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x})
>>>>>>>>>#result
>>>>>>>>>
>>>>>>>>>res3
>>>>>>>>>#$group_a
>>>>>>>>>#$group_a$a1
>>>>>>>>> Id M mm x b u k j y p v
>>>>>>>>>1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>
>>>>>>>>>$group_a$a2
>>>>>>>>> Id M mm x b u k j y p v
>>>>>>>>>1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>
>>>>>>>>>$group_a$a3
>>>>>>>>> Id M mm x b u k j y p v
>>>>>>>>>1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>$group_b
>>>>>>>>>$group_b$b1
>>>>>>>>> Id M mm x b u k j y p v
>>>>>>>>>1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>
>>>>>>>>>$group_b$b2
>>>>>>>>> Id M mm x b u k j y p v
>>>>>>>>>1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>$group_c
>>>>>>>>>$group_c$c1
>>>>>>>>> Id M mm x b u k j y p v
>>>>>>>>>1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>A.K.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>________________________________
>>>>>>>>>From: Vera Costa <veracosta.rt at gmail.com>
>>>>>>>>>To: arun <smartpink111 at yahoo.com>
>>>>>>>>>Sent: Saturday, February 16, 2013 6:32 PM
>>>>>>>>>Subject: Re: reading data
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>Sorry again... In:
>>>>>>>>>res<-do.call(c,lapply(list.files(recursive=T)[grep("...
>>>>>>>>>What is this c? In do.call(c, When I put this row im R, I have an error.
>>>>>>>>>Thank you
>>>>>>>>>No dia 15 de Fev de 2013 18:11, "arun" <smartpink111 at yahoo.com> escreveu:
>>>>>>>>>
>>>>>>>>>Hi,
>>>>>>>>>>No problem.
>>>>>>>>>>
>>>>>>>>>>BTW, these questions are not stupid..
>>>>>>>>>>Arun
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>________________________________
>>>>>>>>>>From: Vera Costa <veracosta.rt at gmail.com>
>>>>>>>>>>To: arun <smartpink111 at yahoo.com>
>>>>>>>>>>Sent: Friday, February 15, 2013 1:08 PM
>>>>>>>>>>Subject: Re: reading data
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>Thank you very much.
>>>>>>>>>>
>>>>>>>>>>I will try to apply and after I tell you if it is ok :-)
>>>>>>>>>>
>>>>>>>>>>Thank you and sorry about this questions (sometimes stupid questions).
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>2013/2/15 arun <smartpink111 at yahoo.com>
>>>>>>>>>>
>>>>>>>>>>HI,
>>>>>>>>>>>No problem.
>>>>>>>>>>>?c() for concatenate to vector or list().
>>>>>>>>>>>If I use do.call(cbind,..) or do.call(rbind,...)
>>>>>>>>>>>
>>>>>>>>>>>do.call(cbind,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))}))
>>>>>>>>>>># [,1] [,2] [,3] [,4] [,5] [,6]
>>>>>>>>>>>#a1 List,11 List,11 List,11 List,11 List,11 List,11
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> do.call(rbind,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))}))
>>>>>>>>>>># a1
>>>>>>>>>>>#[1,] List,11
>>>>>>>>>>>#[2,] List,11
>>>>>>>>>>>#[3,] List,11
>>>>>>>>>>>#[4,] List,11
>>>>>>>>>>>#[5,] List,11
>>>>>>>>>>>#[6,] List,11
>>>>>>>>>>>ie.
>>>>>>>>>>>list within in a list
>>>>>>>>>>>
>>>>>>>>>>> restrial<-lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})
>>>>>>>>>>> str(restrial)
>>>>>>>>>>>#List of 6
>>>>>>>>>>># $ :List of 1
>>>>>>>>>>> #..$ a1:'data.frame': 6 obs. of 11 variables:
>>>>>>>>>>> .#. ..$ Id: chr [1:6] "aAA" "aAAAA" "aA" "aAA" ...
>>>>>>>>>>> #.. ..$ M : chr [1:6] "1" "1" "2" "1" ...
>>>>>>>>>>> #. ..$ mm: int [1:6] 2 2 1 2 3 2
>>>>>>>>>>> #. ..$ x : int [1:6] 739 2263 1 1965 3660 1972
>>>>>>>>>>> -----------------------------------------------------------------
>>>>>>>>>>>str(res)
>>>>>>>>>>>#List of 6
>>>>>>>>>>># $ a1:'data.frame': 6 obs. of 11 variables:
>>>>>>>>>>> # ..$ Id: chr [1:6] "aAA" "aAAAA" "aA" "aAA" ...
>>>>>>>>>>> #..$ M : chr [1:6] "1" "1" "2" "1" ...
>>>>>>>>>>> # ..$ mm: int [1:6] 2 2 1 2 3 2
>>>>>>>>>>> # ..$ x : int [1:6] 739 2263 1 1965 3660 1972
>>>>>>>>>>>-----------------------------------------------------------------
>>>>>>>>>>>
>>>>>>>>>>>You mentioned about naming this to "group_a","group_b". etc..
>>>>>>>>>>>
>>>>>>>>>>> names(res)<-paste("group_",gsub("\\d+","",names(res)),sep="")
>>>>>>>>>>>res2<-split(res,names(res))
>>>>>>>>>>>
>>>>>>>>>>>res3<- lapply(res2,function(x) {names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x})
>>>>>>>>>>> res3$group_a
>>>>>>>>>>>$a1
>>>>>>>>>>>
>>>>>>>>>>># Id M mm x b u k j y p v
>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>>>
>>>>>>>>>>>#$a2
>>>>>>>>>>>
>>>>>>>>>>># Id M mm x b u k j y p v
>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>>>
>>>>>>>>>>>#$a3
>>>>>>>>>>>
>>>>>>>>>>> # Id M mm x b u k j y p v
>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>>>A.K.
>>>>>>>>>>>
>>>>>>>>>>>________________________________
>>>>>>>>>>>From: Vera Costa <veracosta.rt at gmail.com>
>>>>>>>>>>>To: arun <smartpink111 at yahoo.com>
>>>>>>>>>>>Sent: Friday, February 15, 2013 12:39 PM
>>>>>>>>>>>Subject: Re: reading data
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>Thank you very much and sorry my questions.
>>>>>>>>>>>
>>>>>>>>>>>But this code isn't grouping for letters sure? I mean, a1,a2,a3 is the same group, (the first letter give me the name of the group)
>>>>>>>>>>>
>>>>>>>>>>>Another question, in do.call, you did do.call (c,.....) .What is c?
>>>>>>>>>>>
>>>>>>>>>>>Sorry
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>2013/2/15 arun <smartpink111 at yahoo.com>
>>>>>>>>>>>
>>>>>>>>>>>HI,
>>>>>>>>>>>>
>>>>>>>>>>>>Just to add:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>res<-do.call(c,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})) #it seems like one of the rows of your file doesn't have 6 elements, so added fill=TRUE
>>>>>>>>>>>>
>>>>>>>>>>>> names(res)<-paste("group_",gsub("\\d+","",names(res)),sep="")
>>>>>>>>>>>>res[grep("group_b",names(res))]
>>>>>>>>>>>>
>>>>>>>>>>>>I am not sure how you want the grouped data to look like. If you want something like this:
>>>>>>>>>>>>res1<-do.call(rbind,res)
>>>>>>>>>>>>res2<-lapply(split(res1,gsub("[.0-9]","",row.names(res1))),function(x) {row.names(x)<-1:nrow(x);x})
>>>>>>>>>>>>res2
>>>>>>>>>>>>#$group_a
>>>>>>>>>>>>
>>>>>>>>>>>> # Id M mm x b u k j y p v
>>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>>>>#7 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>>>>#8 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>>>>#9 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>>>>#10 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>>>>#11 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>>>>#12 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>>>>#13 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>>>>#14 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>>>>#15 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>>>>#16 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>>>>#17 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>>>>#18 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>#$group_b
>>>>>>>>>>>> # Id M mm x b u k j y p v
>>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>>>>#7 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>>>>#8 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>>>>#9 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>>>>#10 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>>>>#11 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>>>>#12 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>>>>
>>>>>>>>>>>>#$group_c
>>>>>>>>>>>>
>>>>>>>>>>>> # Id M mm x b u k j y p v
>>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>#or if you want it like this:
>>>>>>>>>>>>res2<-split(res,names(res))
>>>>>>>>>>>>
>>>>>>>>>>>>res2[["group_b"]]
>>>>>>>>>>>>
>>>>>>>>>>>>#$group_b
>>>>>>>>>>>># Id M mm x b u k j y p v
>>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>>>>
>>>>>>>>>>>>#$group_b
>>>>>>>>>>>> # Id M mm x b u k j y p v
>>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926
>>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926
>>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA
>>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926
>>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496
>>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734
>>>>>>>>>>>>
>>>>>>>>>>>>Hope this helps.
>>>>>>>>>>>>
>>>>>>>>>>>>A.K.
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>----- Original Message -----
>>>>>>>>>>>>From: "veracosta.rt at gmail.com" <veracosta.rt at gmail.com>
>>>>>>>>>>>>To: smartpink111 at yahoo.com
>>>>>>>>>>>>Cc:
>>>>>>>>>>>>Sent: Friday, February 15, 2013 9:15 AM
>>>>>>>>>>>>Subject: reading data
>>>>>>>>>>>>
>>>>>>>>>>>>Hi,
>>>>>>>>>>>>I post yesterday and you helped me. I have little problem.
>>>>>>>>>>>>
>>>>>>>>>>>>At first, I never worked with regular expressions...
>>>>>>>>>>>>
>>>>>>>>>>>>The code that you gave me it's ok, but my files are inside the folders a1,a2,a3. I try to explain better.
>>>>>>>>>>>>
>>>>>>>>>>>>I have one folder named "data". Inside this folder I have some other folders named "a1","a2","b1",b2",...and inside of each one of that I have some files. I want only the file "mmmmmm.txt" (in all folders I have One file with this name).
>>>>>>>>>>>>The name of the folder give me the name of the group,but I need to read the file inside. And after, have "group_a", group_"b"...because I need to work with this data grouped (and know the name of the group).
>>>>>>>>>>>>
>>>>>>>>>>>>Thank you.
>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
>
More information about the R-help
mailing list