[R] Speeding up "accumulation" code in large matrix calc?
Berend Hasselman
bhh at xs4all.nl
Fri Feb 24 21:06:28 CET 2012
On 24-02-2012, at 20:02, Petr Savicky wrote:
> On Fri, Feb 24, 2012 at 08:59:44AM -0800, robertfeldt wrote:
>> Hi,
>>
>> I have R code like so:
>>
>> num.columns.back.since.last.occurence <- function(m, outcome) {
>> nrows <- dim(m)[1];
>> ncols <- dim(m)[2];
>> res <- matrix(rep.int(0, nrows*ncols), nrow=nrows);
>> for(row in 1:nrows) {
>> for(col in 2:ncols) {
>> res[row,col] <- if(m[row,col-1]==outcome) {0} else {1+res[row,col-1]}
>> }
>> }
>> res;
>> }
>>
>> but on the very large matrices I apply this the execution times are a
>> problem. I would appreciate any help to rewrite this with more
>> "standard"/native R functions to speed things up.
>
> Hi.
>
> If the number of rows is large and the number of columns is not,
> then try the following.
>
> # random matrix
> A <- matrix((runif(49) < 0.2) + 0, nrow=7)
> outcome <- 1
>
> # transformation
> B <- array(0, dim=dim(A))
> curr <- B[, 1]
> for (i in seq.int(from=2, length=ncol(A)-1)) {
> curr <- ifelse (A[, i-1] == outcome, 0, 1 + curr)
> B[, i] <- curr
> }
I've done some speed tests.
t1 <- function(m, outcome) {
nrows <- dim(m)[1]
ncols <- dim(m)[2]
res <- matrix(rep.int(0, nrows*ncols), nrow=nrows)
for(row in 1:nrows) {
for(col in 2:ncols) {
res[row,col] <- if(m[row,col-1]==outcome) {0} else {1+res[row,col-1]}
}
}
res
}
# by row
t2 <- function(A, outcome) {
oneRow <- function(x, outcome)
{
n <- length(x)
y <- c(0, cumsum(x[-n] == outcome))
ave(x, y, FUN = function(z) seq.int(along=z) - 1)
}
t(apply(A, 1, oneRow, outcome=1))
}
# transformation
t3 <- function(A, outcome) {
B <- array(0, dim=dim(A))
curr <- B[, 1]
for (i in seq.int(from=2, length=ncol(A)-1)) {
curr <- ifelse (A[, i-1] == outcome, 0, 1 + curr)
B[, i] <- curr
}
B
}
# Compile functions to byte code
library(compiler)
t1.c <- cmpfun(t1)
t2.c <- cmpfun(t2)
t3.c <- cmpfun(t3)
Nrow <- 100
Ncol <- 1000
A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
z1 <- system.time(res1 <- t1(A,outcome=1))
z2 <- system.time(res2 <- t2(A,outcome=1))
z3 <- system.time(res3 <- t3(A, outcome=1))
z4 <- system.time(res4 <- t1.c(A,outcome=1))
z5 <- system.time(res5 <- t2.c(A,outcome=1))
z6 <- system.time(res6 <- t3.c(A, outcome=1))
print(all(res2==res1))
print(all(res3==res1))
print(all(res4==res1))
print(all(res5==res1))
print(all(res6==res1))
print(rbind(z1,z2,z3,z4,z5,z6))
Nrow <- 1000
Ncol <- 100
A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
z1 <- system.time(res1 <- t1(A,outcome=1))
z2 <- system.time(res2 <- t2(A,outcome=1))
z3 <- system.time(res3 <- t3(A, outcome=1))
z4 <- system.time(res4 <- t1.c(A,outcome=1))
z5 <- system.time(res5 <- t2.c(A,outcome=1))
z6 <- system.time(res6 <- t3.c(A, outcome=1))
print(all(res2==res1))
print(all(res3==res1))
print(all(res4==res1))
print(all(res5==res1))
print(all(res6==res1))
print(rbind(z1,z2,z3,z4,z5,z6))
Nrow <- 1000
Ncol <- 1000
A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
z1 <- system.time(res1 <- t1(A,outcome=1))
z2 <- system.time(res2 <- t2(A,outcome=1))
z3 <- system.time(res3 <- t3(A, outcome=1))
z4 <- system.time(res4 <- t1.c(A,outcome=1))
z5 <- system.time(res5 <- t2.c(A,outcome=1))
z6 <- system.time(res6 <- t3.c(A, outcome=1))
print(all(res2==res1))
print(all(res3==res1))
print(all(res4==res1))
print(all(res5==res1))
print(all(res6==res1))
print(rbind(z1,z2,z3,z4,z5,z6))
------------------------------------------
Summary of results
> Nrow <- 100
> Ncol <- 1000
> A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
> print(rbind(z1,z2,z3,z4,z5,z6))
user.self sys.self elapsed user.child sys.child
z1 0.543 0.005 0.551 0 0
z2 0.299 0.004 0.304 0 0
z3 0.070 0.012 0.082 0 0
z4 0.112 0.002 0.114 0 0
z5 0.263 0.007 0.271 0 0
z6 0.062 0.009 0.070 0 0
> Nrow <- 1000
> Ncol <- 100
> A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
>
> z1 <- system.time(res1 <- t1(A,outcome=1))
> z2 <- system.time(res2 <- t2(A,outcome=1))
> z3 <- system.time(res3 <- t3(A, outcome=1))
> z4 <- system.time(res4 <- t1.c(A,outcome=1))
> z5 <- system.time(res5 <- t2.c(A,outcome=1))
> z6 <- system.time(res6 <- t3.c(A, outcome=1))
user.self sys.self elapsed user.child sys.child
z1 0.543 0.005 0.551 0 0
z2 0.299 0.004 0.304 0 0
z3 0.070 0.012 0.082 0 0
z4 0.112 0.002 0.114 0 0
z5 0.263 0.007 0.271 0 0
z6 0.062 0.009 0.070 0 0
> Nrow <- 1000
> Ncol <- 1000
> A <- matrix((runif(Ncol*Nrow)<0.2)+0, nrow=Nrow)
> print(rbind(z1,z2,z3,z4,z5,z6))
user.self sys.self elapsed user.child sys.child
z1 5.381 0.019 5.417 0 0
z2 2.812 0.044 2.858 0 0
z3 0.307 0.049 0.357 0 0
z4 1.176 0.015 1.191 0 0
z5 2.686 0.038 2.728 0 0
z6 0.305 0.049 0.355 0 0
<End of timing results>
The original function (t1) benefits a lot from compiling to byte code.
The function that operates on columns (t3) seems to be always the quickest in this experiment.
Berend
More information about the R-help
mailing list