[R] average and median values for each of the class
Berend Hasselman
bhh at xs4all.nl
Sun Apr 27 11:01:51 CEST 2014
On 27-04-2014, at 02:37, Nico Met <nicomet80 at gmail.com> wrote:
> Dear all,
>
>
>
> I have a matrix (dimension, 16 x 12) where 2nd column represents class
> (1,1,1,1,1,2,2,2, etc) information. I want to estimate average and median
> values for each of the class and add this information as a row at end of
> the each classes.
>
>
> for example:
>
> dput(dat)
>
> structure(list(class = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
>
> 3L, 3L, 3L, 4L, 4L, 4L, 5L), name1 = c(2.554923977, 2.371586762,
>
> 2.497293431, 2.464827875, 2.981934845, 2.228995664, 2.099640729,
>
> 1.900314302, 2.630005966, 2.632590262, 2.581887814, 2.408797563,
>
> 2.098761103, 3.070460716, 1.436980716, 1.645121806), name2 = c(1.297412278,
>
> 1.104804244, 1.30621114, 1.126009533, 1.466740841, 1.012041118,
>
> 0.923466541, 0.840575023, 1.285530176, 1.041909333, 1.194917856,
>
> 1.085015826, 1.047492703, 1.587558217, 0.593340012, 0.723630088
>
> ), name3 = c(0.587160798, 0.596127884, 0.623760721, 0.549016135,
>
> 0.686642084, 0.487523394, 0.458620467, 0.397974913, 0.615928976,
>
> 0.546005649, 0.657383069, 0.546613129, 0.476503461, 0.749062102,
>
> 0.304160587, 0.29037358), name4 = c(2.833441759, 2.713374426,
>
> 2.532626548, 2.409093102, 3.014912721, 2.113507947, 2.017291324,
>
> 1.667744912, 2.602560666, 2.31649643, 2.761204809, 2.433963493,
>
> 2.229911767, 3.191646399, 1.269919241, 1.387479858), name5 = c(2.172365295,
>
> 1.955695471, 2.141072829, 1.975743278, 2.377018372, 1.791300389,
>
> 1.669079382, 1.500209628, 2.164401874, 1.830038378, 2.106750025,
>
> 1.92888294, 1.707217549, 2.585082653, 1.114841754, 1.315712452
>
> ), name6 = c(0.715129844, 0.688186262, 0.70133748, 0.709362008,
>
> 0.712145174, 0.563593885, 0.532109761, 0.472197304, 0.690165016,
>
> 0.65635473, 0.615835066, 0.64310098, 0.562974891, 0.900622255,
>
> 0.408546784, 0.416284408), name7 = c(1.995505133, 1.860095899,
>
> 1.843151597, 1.709861774, 2.155993511, 1.506409746, 1.315405587,
>
> 1.234544153, 1.96629927, 1.74879757, 1.93994009, 1.660173854,
>
> 1.556735295, 2.355723318, 0.866634243, 1.013367677), name8 = c(0.275484997,
>
> 0.233856392, 0.294021245, 0.315504347, 0.251906585, 0.250263636,
>
> 0.348599173, 0.273806933, 0.32067937, 0.278581115, 0.293726291,
>
> 0.308350808, 0.201297444, 0.351927886, 0.204230625, 0.185681471
>
> ), name9 = c(2.461066627, 2.210756164, 2.289047888, 2.253988252,
>
> 2.668184733, 1.911697836, 1.793443775, 1.560027186, 2.36941155,
>
> 1.961911111, 2.391501376, 2.002215107, 1.932144233, 2.73705052,
>
> 1.15580754, 1.807697999), name10 = c(0.723025351, 0.613147422,
>
> 0.805399925, 0.65651577, 0.779389048, 0.54260459, 0.492283542,
>
> 0.507969501, 0.749700016, 0.644231327, 0.810319215, 0.620331891,
>
> 0.600240557, 0.884775748, 0.40006142, 0.391661912), name11 = c(0.308565619,
>
> 0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.361013073,
>
> 0.430744786, 0.468818055, 0.166072668, 0.369262627, 0.297666411,
>
> 0.256091173, 0.123021464, 0.308188684, 0.646436241, 0.722972632
>
> )), .Names = c("class", "name1", "name2", "name3", "name4", "name5",
>
> "name6", "name7", "name8", "name9", "name10", "name11"), class = "data.frame",
> row.names = c("ara1",
>
> "ara2", "ara3", "ara4", "ara5", "ara6", "ara7", "ara8", "ara9",
>
> "ara10", "ara11", "ara12", "ara13", "ara14", "ara15", "ara16"
>
> ))
>
>
> I wrote this:
>
>
>
> avg<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"],
> function(x) mean(x,na.rm=T)) )
>
>
> med<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x)
> median(x,na.rm=T)) )
>
>
> # avg
>
> # class name1 name2 name3 name4 name5 name6 name7
> name#8 name9 name10 name11
>
> #1 1 2.574113 1.2602356 0.6085415 2.700690 2.124379 0.7052322 1.912922
> #0.2741547 2.376609 0.7154955 0.3654845
>
> #2 2 2.214739 1.0154032 0.4900119 2.100276 1.781248 0.5645165 1.505665
> #0.2983373 1.908645 0.5731394 0.3566621
>
> #3 3 2.541092 1.1072810 0.5833339 2.503888 1.955224 0.6384303 1.782971
> #0.2935527 2.118543 0.6916275 0.3076734
>
> #4 4 2.202068 1.0761303 0.5099087 2.230492 1.802381 0.6240480 1.593031
> #0.2524853 1.941667 0.6283592 0.3592155
>
> #5 5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368
> #0.1856815 1.807698 0.3916619 0.7229726
>
> #> med
>
> # class name1 name2 name3 name4 name5 name6 name7
> name#8 name9 name10 name11
>
> #1 1 2.497293 1.2974123 0.5961279 2.713374 2.141073 0.7093620 1.860096
> #0.2754850 2.289048 0.7230254 0.3637169
>
> #2 2 2.164318 0.9677538 0.4730719 2.065400 1.730190 0.5478518 1.410908
> #0.2972432 1.852571 0.5252870 0.3958789
>
> #3 3 2.581888 1.0850158 0.5466131 2.433963 1.928883 0.6431010 1.748798
> #0.2937263 2.002215 0.6442313 0.2976664
>
> #4 4 2.098761 1.0474927 0.4765035 2.229912 1.707218 0.5629749 1.556735
> #0.2042306 1.932144 0.6002406 0.3081887
>
> #5 5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368
> #0.1856815 1.807698 0.3916619 0.7229726
>
>
>
>
> But I do not know how can I add this information in the original data?
>
>
> For example, for class 1, the output will look like this:
>
> dput(res1)
>
> structure(list(class = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), name1 =
> c(2.554923977,
>
> 2.371586762, 2.497293431, 2.464827875, 2.981934845, 2.574113378,
>
> 2.497293431), name2 = c(1.297412278, 1.104804244, 1.30621114,
>
> 1.126009533, 1.466740841, 1.260235607, 1.297412278), name3 = c(0.587160798,
>
> 0.596127884, 0.623760721, 0.549016135, 0.686642084, 0.608541525,
>
> 0.596127884), name4 = c(2.833441759, 2.713374426, 2.532626548,
>
> 2.409093102, 3.014912721, 2.700689711, 2.713374426), name5 = c(2.172365295,
>
> 1.955695471, 2.141072829, 1.975743278, 2.377018372, 2.124379049,
>
> 2.141072829), name6 = c(0.715129844, 0.688186262, 0.70133748,
>
> 0.709362008, 0.712145174, 0.705232154, 0.709362008), name7 = c(1.995505133,
>
> 1.860095899, 1.843151597, 1.709861774, 2.155993511, 1.912921583,
>
> 1.860095899), name8 = c(0.275484997, 0.233856392, 0.294021245,
>
> 0.315504347, 0.251906585, 0.274154713, 0.275484997), name9 = c(2.461066627,
>
> 2.210756164, 2.289047888, 2.253988252, 2.668184733, 2.376608733,
>
> 2.289047888), name10 = c(0.723025351, 0.613147422, 0.805399925,
>
> 0.65651577, 0.779389048, 0.715495503, 0.723025351), name11 = c(0.308565619,
>
> 0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.365484455,
>
> 0.363716904)), .Names = c("class", "name1", "name2", "name3",
>
> "name4", "name5", "name6", "name7", "name8", "name9", "name10",
>
> "name11"), class = "data.frame", row.names = c("ara1", "ara2",
>
> "ara3", "ara4", "ara5", "Avg", "Med"))
>
>
>
> And same will be for other classes.
Please do not post in HTML, as requested by Posting Guide.
It tends to mess things up and makes your code and results unreadable.
You cannot use “Avg” and “Med” unmodified as rownames.
For each “class” (group would ba better name) you must append something different e.g. the “class”-number.
Try this:
library(plyr)
g <- function(dat) {
avg <- as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x) mean(x,na.rm=T)) )
med <- as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x) median(x,na.rm=T)) )
z <- rbind(dat,avg,med)
z
}
DAT1 <- ddply(dat,.(class),.fun=g)
rownames(DAT1) <- do.call(c,lapply(split(dat,dat["class"]),
FUN=function(x) c(rownames(x),paste0("Avg",x[,"class"][1]),paste0("Med",x[,"class"][1]))))
DAT1
Convoluted but it works. Maybe someone else can come up with something shorter and more elegant.
Berend
More information about the R-help
mailing list