[R] Finding unique terms
Dénes Tóth
toth@dene@ @end|ng |rom kogentum@hu
Fri Oct 12 00:29:06 CEST 2018
On 10/12/2018 12:12 AM, roslinazairimah zakaria wrote:
> Dear r-users,
>
> I have this data:
>
> structure(list(STUDENT_ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
> 2L, 2L, 2L, 2L, 2L), .Label = c("AA15285", "AA15286"), class = "factor"),
> COURSE_CODE = structure(c(1L, 2L, 5L, 6L, 7L, 8L, 2L, 3L,
> 4L, 5L, 6L), .Label = c("BAA1113", "BAA1322", "BAA2113",
> "BAA2513", "BAA2713", "BAA2921", "BAA4273", "BAA4513"), class =
> "factor"),
> PO1M = c(155.7, 48.9, 83.2, NA, NA, NA, 48.05, 68.4, 41.65,
> 82.35, NA), PO1T = c(180, 70, 100, NA, NA, NA, 70, 100, 60,
> 100, NA), PO2M = c(NA, NA, NA, 37, NA, NA, NA, NA, NA, NA,
> 41), PO2T = c(NA, NA, NA, 50, NA, NA, NA, NA, NA, NA, 50),
> X = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X.1 = c(NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("STUDENT_ID",
> "COURSE_CODE", "PO1M", "PO1T", "PO2M", "PO2T", "X", "X.1"), class =
> "data.frame", row.names = c(NA,
> -11L))
>
> I want to combine the same Student ID and add up all the values for PO1M,
> PO1T,...,PO2T obtained by the same ID.
dat <- structure(list(STUDENT_ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L), .Label = c("AA15285", "AA15286"), class = "factor"),
COURSE_CODE = structure(c(1L, 2L, 5L, 6L, 7L, 8L, 2L, 3L,
4L, 5L, 6L), .Label = c("BAA1113", "BAA1322", "BAA2113",
"BAA2513", "BAA2713", "BAA2921", "BAA4273", "BAA4513"), class =
"factor"),
PO1M = c(155.7, 48.9, 83.2, NA, NA, NA, 48.05, 68.4, 41.65,
82.35, NA), PO1T = c(180, 70, 100, NA, NA, NA, 70, 100, 60,
100, NA), PO2M = c(NA, NA, NA, 37, NA, NA, NA, NA, NA, NA,
41), PO2T = c(NA, NA, NA, 50, NA, NA, NA, NA, NA, NA, 50),
X = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X.1 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("STUDENT_ID",
"COURSE_CODE", "PO1M", "PO1T", "PO2M", "PO2T", "X", "X.1"), class =
"data.frame", row.names = c(NA,
-11L))
# I assume you would like to add up the values with na.rm = TRUE
meanFn <- function(x) mean(x, na.rm = TRUE)
# see ?aggregate
aggregate(dat[, c("PO1M", "PO1T", "PO2M")],
by = dat["STUDENT_ID"],
FUN = meanFn)
# if you have largish or large data
library(data.table)
dat2 <- as.data.table(dat)
dat2[, lapply(.SD, meanFn),
by = STUDENT_ID,
.SDcols = c("PO1M", "PO1T", "PO2M")]
Regards,
Denes
>
> How do I do that?
> Thank you for any help given.
>
More information about the R-help
mailing list