[R] fastest R platform
aperrin@socrates.berkeley.edu
aperrin at socrates.berkeley.edu
Mon Apr 9 14:30:07 CEST 2001
I haven't looked into this with R in particular, but with some textual
analysis software I developed in perl, the same job ran about 3.5x faster
on the same hardware running linux over running Windows NT. The hardware
was much slower than yours (Pentium 150, 96M RAM). The linux variant was
Debian potato (2.2r2). YMMV.
Andy Perrin
------------------------------------------------------------------
Andrew J Perrin - Ph.D. Candidate, UC Berkeley, Dept. of Sociology
Chapel Hill NC 27514 USA | aperrin at socrates.berkeley.edu
On Sun, 8 Apr 2001, Jason Liao wrote:
> Hello, everyone! I picked up R several months ago and have adopted it
> as my choice for statistical programming. Coming from a Java
> background, I can honestly say that R is not only free, it is better
> tha S-plus: the lexical scope in R makes it very simple to simulate
> Java's object model. For this, I encourage everyone to read the artcle:
>
> Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical
> Computing", Journal of Computational and Graphical Statistics, 9,
> 491-508.
>
> I have coded a simulation program which runs at 1/5 the speed of
> minimum usability on a Pentium 700 PC (windows me). I have spent quite
> some time trying to optimize the program. So my question is: on which
> platform does R run the fastest? I would be willing to invest on a
> faster platform. Thanks in advance.
>
> My program follows in case someone can help me improve it.
>
> Thanks.
>
> Jason
>
> # Liao and Lipsitz (2001)
> # A REML type estimate of variance components in generalized linear
> mixed models
> # This program runs on R. It does not run on S-plus.
>
> rm(list=ls(all=TRUE))
>
> ###small and common functions##################################
> glmm.sample.y <- function(x, z, b, D.u)
> {
> pp <- matrix(0, nrow = m, ncol = n);
> D.u.half <- t( chol(D.u) );
> zero <- numeric(n.random);
>
> for(i in 1:m)
> {
> obj <- rmulti.norm(zero, D.u.half);
> pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran );
> }
> pp <- exp(pp)/( 1+exp(pp) );
>
> y <- runif(m*n);
> dim(y) <- c(m, n);
> ifelse(y < pp, 1, 0); #returning vector
> }
> ################
> rmulti.norm <- function(mean, var.half)
> {
> ran <- rnorm( length(mean) );
> log.prob <- -sum(ran*ran)/2;
>
> ran <- mean + as.vector( var.half %*% ran );
> return(ran, log.prob);
> }
> #################
> my.outer <- function(x) #this is much faster than R's outer()
> function
> {
> dim(x) <- c(length(x), 1);
> x %*% t(x);
> }
>
> quadratic.form <- function(A, x) { as.vector(x %*% A %*% x) }
>
>
> #######################################################################
>
> REML.b.D.u <- function(b, D.u, y, x, z)
> {
> TT <- matrix(0, n.random, n.random);
> for(i in 1:30)
> {
> obj <- sample.u(b, D.u, x, y, z);
> b <- b - solve(obj$Hessian, obj$score);
> D.u <- obj$uu;
>
> yy <- glmm.sample.y(x, z, b, D.u); #this is the sampling stage
> obj <- mle.b(b, D.u, yy, x, z); #given D.u, solves for the
> mle of b
>
> TT <- TT*(1-1/i) + obj$uu.diff/i;
> D.u <- D.u + TT;
> print(i); print(date());
> print("inside REML");
> print(D.u);print("TT"); print(TT);
> }
>
> return(b, D.u);
> }
> mle.b <- function(b, D.u, y, x, z) #b here is the initial value
> {
> for(i in 1:30)
> {
> obj <- sample.u(b, D.u, x, y, z);
> b <- b - solve(obj$Hessian, obj$score);
> if(i==1) uu.first <- obj$uu;
> }
>
> uu.diff <- uu.first - obj$uu;
> return(b, uu=obj$uu, uu.diff);
> }
>
> mle.b.D.u <- function(b, D.u, y, x, z)
> {
> for(i in 1:30)
> {
> obj <- sample.u(b, D.u, x, y, z);
> b <- b - solve(obj$Hessian, obj$score);
> D.u <- obj$uu;
> }
> return(b, D.u);
> }
>
> sample.u <- function(b, D.u, x, y, z)
> {
> D.u.inv <- solve(D.u);
>
> uu <- matrix(0, n.random, n.random);
> score <- numeric(n.fixed);
> Hessian <- matrix(0, n.fixed, n.fixed);
>
> for(i in 1:m)
> {
> obj <- sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b),
> x[i, ,], y[i,], z[i, ,]);
> uu <- uu + obj$uu;
> score <- score + obj$score;
> Hessian <- Hessian + obj$Hessian;
> }
>
> uu <- uu/m;
> return(uu, score, Hessian);
> }
>
> sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y,
> z for one subject only
> {
> func <- one.log.like(D.u.inv, ada.part, y, z); #function to be
> maximized
> obj <- optim( numeric(n.random), func, control=list(fnscale=-1) )
>
> obj <- optim(obj$par, func, method = c("BFGS"),
> control=list(fnscale=-1), hessian = T )
>
> mean <- obj$par;
> var.half <- solve(-obj$hessian)*1.2;
> var.half <- t( chol(var.half) );
>
> n.simu <- 1000 #sample size of importance sampling
>
> sum.weight <- 0;
> uu <- matrix(0, n.random, n.random);
> pi <- numeric(n);
> product <- numeric(n);
>
> func <- one.score.Hessian(D.u.inv, ada.part, x, y, z);
> for(i in 1:n.simu)
> {
> obj1 <- rmulti.norm(mean, var.half);
> obj2 <- func(obj1$ran);
> weight <- exp(obj2$log.likelihood - obj1$log.prob);
>
> uu <- uu + my.outer(obj1$ran) * weight;
> pi <- pi + obj2$pi * weight;
> product <- product + obj2$pi*(1-obj2$pi) * weight;
>
> sum.weight <- sum.weight + weight;
> }
>
> uu <- uu/sum.weight;
> pi <- pi/sum.weight;
> product <- product/sum.weight;
>
> score <- as.vector( (y - pi) %*% x );
> Hessian <- -t(x) %*% diag(product) %*% x;
>
> return(uu, score, Hessian);
> }
>
> one.log.like <- function(D.u.inv, ada.part, y, z)
> {
> function(u)
> {
> pp <- exp( ada.part + as.vector( z %*% u ) );
> pp <- pp/(1+pp);
> -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 -
> y)*log(1-pp) );
> }
> }
>
> one.score.Hessian <- function(D.u.inv, ada.part, x, y, z)
> {
> function(u)
> {
> pi <- exp( ada.part + as.vector( z %*% u ) );
> pi <- pi/(1 + pi);
> log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum(
> y*log(pi) + (1-y)*log(1-pi) );
>
> return(pi, log.likelihood);
> }
> }
>
> main <- function()
> {
> b <- c(.5 , 1, -1, -.5);
> if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) );
> D.u <- c(.5, 0, 0, .25);
> D.u <- matrix(D.u, nrow=2);
>
> x <- array(0, c(m, n, n.fixed) );
> x[, , 1] <- 1;
>
> for(j in 1:n) x[, j, 2] <- j-5;
> x[1:trunc(m/2), , 3] <- 0;
> x[trunc(m/2+1):m, , 3] <- 1;
>
> x[, , 4] <- x[, , 2]*x[, , 3];
> if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) );
>
> z <- x[, , 1:2];
>
> for(i in 1:50)
> {
> y <- glmm.sample.y(x, z, b, D.u);
>
> obj <- mle.b.D.u(b, D.u, y, x, z); print("mle"); print(obj$D.u);
>
> write(obj$D.u, file = "simu1.dat", append=T);
>
> obj <- REML.b.D.u(b, D.u, y, x, z); print("REML");
> print(obj$D.u);
> write(obj$D.u, file = "simu2.dat", append=T);
> }
> }
> ###################################################
> #global variable m, n, n.fixed, n.random
>
> n <- 10;
> m <- 25;
>
> n.fixed <- 4;
> n.random <- 2;
>
> main();
> print(date());
>
>
>
>
> =====
> Jason G. Liao
> Department of Biometry and Epidemiology
> Medical University of South Carolina
> 135 Rutledge Ave., STE 1148, Charleston, SC 29425
> phone (843) 876-1114, fax (843) 876-1126
>
> http://www.geocities.com/jg_liao/index.html
>
> __________________________________________________
> Do You Yahoo!?
> Get email at your own domain with Yahoo! Mail.
> http://personal.mail.yahoo.com/
> -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
> r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
> Send "info", "help", or "[un]subscribe"
> (in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch
> _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
>
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
More information about the R-help
mailing list