[R] vectorisation
Berend Hasselman
bhh at xs4all.nl
Sun Feb 3 09:33:26 CET 2013
On 02-02-2013, at 17:38, Brett Robinson <brett.robinson at 7dials.com> wrote:
> Hi
> I'm trying to set up a simulation problem without resorting to (m)any loops. I want to set entries in a data frame of zeros ('starts' in the code below) to 1 at certain points and the points have been randomly generated and stored in a separate data.frame ('sl'), which has the same number of columns.
>
> An example of the procedure is as follows:
> ml <- data.frame(matrix(sample(1:50,80, replace=TRUE),20,4))
> mm <- apply(ml, 2, cumsum)
> starts<- data.frame(matrix(0,600,4))
>
> I can achieve the result I want with a loop:
> for (i in 1:4){
> lstarts[,i][mm[,i]] <-1
> }
>
> But as I want to use a large number of columns I would like to do away with the loop
>
> Can anyone suggest how this might be done?
Another way is this
f2 <- function(starts, mm) {
mn <- cbind(as.vector(mm),rep(1:ncol(mm),each=nrow(mm)))
x <- as.matrix(starts)
x[mn] <- 1
as.data.frame(x)
}
starts2 <- f2(starts,mm)
#> identical(starts2,starts1)
# [1] TRUE
Collect all the options presented so far in functions, use the compiler package to see if that helps
and do some speed tests with Arun's parameters.
# Brett
f1 <- function(starts, mm) {
for (i in 1:ncol(mm)){
starts[,i][mm[,i]] <-1
}
starts
}
# Berend
f2 <- function(starts, mm) {
mn <- cbind(as.vector(mm),rep(1:ncol(mm),each=nrow(mm)))
x <- as.matrix(starts)
x[mn] <- 1
as.data.frame(x)
}
# Rui
f3 <- function(s2,mm) {
s2[] <- lapply(seq_len(ncol(mm)), function(i) {s2[,i][mm[,i]] <- 1; s2[,i]})
s2
}
# Arun
f4 <- function(starts,mm) {
starts2 <- as.data.frame(do.call(cbind,lapply(1:ncol(mm),function(i) {starts[,i][mm[,i]]<-1;starts[,i]})))
colnames(starts2)<- colnames(starts)
starts2
}
library(compiler)
f1c <- cmpfun(f1)
f2c <- cmpfun(f2)
f3c <- cmpfun(f3)
f4c <- cmpfun(f4)
library(rbenchmark)
# Arun's test
set.seed(11)
starts <- data.frame(matrix(0,1e6,4))
ml <- data.frame(matrix(sample(1:1e4,1e3, replace=TRUE),100,4))
mm <- apply(ml, 2, cumsum)
z1 <- f1(starts,mm)
z2 <- f2(starts,mm)
z3 <- f3(starts,mm)
z4 <- f4(starts,mm)
z1c <- f1c(starts,mm)
z2c <- f2c(starts,mm)
z3c <- f3c(starts,mm)
z4c <- f4c(starts,mm)
identical(z2,z1)
identical(z3,z1)
identical(z4,z1)
identical(z1c,z1)
identical(z2c,z1)
identical(z3c,z1)
identical(z4c,z1)
benchmark( f1(starts,mm) , f2(starts,mm),
f1c(starts,mm), f2c(starts,mm),
f3(starts,mm) , f4(starts,mm),
f3c(starts,mm), f4c(starts,mm),
replications=1,order="relative", columns=c("test","relative","elapsed","replications"))
Result:
# > identical(z2,z1)
# [1] TRUE
# > identical(z3,z1)
# [1] TRUE
# > identical(z4,z1)
# [1] TRUE
# > identical(z1c,z1)
# [1] TRUE
# > identical(z2c,z1)
# [1] TRUE
# > identical(z3c,z1)
# [1] TRUE
# > identical(z4c,z1)
# [1] TRUE
# >
# > benchmark( f1(starts,mm) , f2(starts,mm),
# + f1c(starts,mm), f2c(starts,mm),
# + f3(starts,mm) , f4(starts,mm),
# + f3c(starts,mm), f4c(starts,mm),
# + replications=1,order="relative", columns=c("test","relative","elapsed","replications"))
# test relative elapsed replications
# 2 f2(starts, mm) 1.000 0.195 1
# 4 f2c(starts, mm) 1.005 0.196 1
# 1 f1(starts, mm) 2.990 0.583 1
# 3 f1c(starts, mm) 3.082 0.601 1
# 7 f3c(starts, mm) 3.903 0.761 1
# 5 f3(starts, mm) 3.949 0.770 1
# 8 f4c(starts, mm) 4.436 0.865 1
# 6 f4(starts, mm) 4.462 0.870 1
Compiling doesn't deliver significant speed gains in this case.
Function f2 is the quickest.
Berend
More information about the R-help
mailing list