[R] fastest R platform

Jason Liao jg_liao at yahoo.com
Mon Apr 9 04:47:53 CEST 2001


Hello, everyone! I picked up R several months ago and have adopted it
as my choice for statistical programming. Coming from a Java
background, I can honestly say that R is not only free, it is better
tha S-plus: the lexical scope in R makes it very simple to simulate
Java's object model. For this, I encourage everyone to read the artcle:

Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical
Computing", Journal of Computational and Graphical Statistics, 9,
491-508. 

I have coded a simulation program which runs at 1/5 the speed of
minimum usability on a Pentium 700 PC (windows me). I have spent quite
some time trying to optimize the program. So my question is: on which
platform does R run the fastest? I would be willing to invest on a
faster platform. Thanks in advance.

My program follows in case someone can help me improve it.

Thanks.

Jason

# Liao and Lipsitz (2001)
# A REML type estimate of variance components in generalized linear
mixed models
# This program runs on R. It does not run on S-plus.

  rm(list=ls(all=TRUE))
  
 ###small and common functions##################################
  glmm.sample.y <- function(x, z, b, D.u)
  {
    pp <- matrix(0, nrow = m, ncol = n);
    D.u.half <- t( chol(D.u) );
    zero <- numeric(n.random);
    
    for(i in 1:m)
    {
      obj <- rmulti.norm(zero, D.u.half);    
      pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran );  
    }   
    pp <- exp(pp)/( 1+exp(pp) );    
        
    y <- runif(m*n);
    dim(y) <- c(m, n);  
    ifelse(y < pp, 1, 0);    #returning vector  
  }
################
  rmulti.norm <- function(mean, var.half)
  {    
     ran <- rnorm( length(mean) );
     log.prob <-  -sum(ran*ran)/2;
     
     ran <- mean + as.vector( var.half %*% ran );   
     return(ran, log.prob);   
  }  
  #################   
   my.outer <- function(x) #this is much faster than R's outer()
function
   {
      dim(x) <- c(length(x), 1);
      x %*% t(x);    
   }
   
   quadratic.form <- function(A, x) {  as.vector(x %*% A %*% x)  }
   

#######################################################################
 
  REML.b.D.u <- function(b, D.u, y, x, z)
  {
     TT <- matrix(0, n.random, n.random);
     for(i in 1:30)
     {             
        obj <- sample.u(b, D.u, x, y, z);  
        b <- b - solve(obj$Hessian, obj$score); 
        D.u <- obj$uu;         
       
        yy <- glmm.sample.y(x, z, b, D.u);  #this is the sampling stage
        obj <- mle.b(b, D.u, yy, x, z);     #given D.u, solves for the
mle of b
       
        TT <-  TT*(1-1/i) + obj$uu.diff/i;     
        D.u <- D.u + TT;
        print(i); print(date());
        print("inside REML");
        print(D.u);print("TT"); print(TT);
     } 
     
      return(b, D.u);
  }
  mle.b <- function(b, D.u, y, x, z)  #b here is the initial value
  {    
     for(i in 1:30)
     { 
       obj <- sample.u(b, D.u, x, y, z);      
       b <- b - solve(obj$Hessian, obj$score); 
       if(i==1) uu.first <- obj$uu;  
     }    
    
     uu.diff <- uu.first - obj$uu;
     return(b, uu=obj$uu, uu.diff);
  }
    
    mle.b.D.u <- function(b, D.u, y, x, z)
    {  
      for(i in 1:30)
      {        
        obj <- sample.u(b, D.u, x, y, z);  
        b <- b - solve(obj$Hessian, obj$score); 
        D.u <- obj$uu;  
      }         
      return(b, D.u);
     }
    
   sample.u <- function(b, D.u, x, y, z)
   {                 
     D.u.inv <- solve(D.u);
     
     uu <- matrix(0, n.random, n.random);
     score <- numeric(n.fixed);
     Hessian <- matrix(0, n.fixed, n.fixed);
   
     for(i in 1:m) 
     { 
       obj <-  sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b),
                            x[i, ,], y[i,], z[i, ,]);
       uu <- uu + obj$uu;
       score <- score + obj$score;
       Hessian <- Hessian + obj$Hessian;
     }

     uu <- uu/m;
     return(uu, score, Hessian);     
  } 
 
   sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y,
z for one subject only 
   {          
      func <- one.log.like(D.u.inv, ada.part, y, z);  #function to be
maximized
      obj <- optim( numeric(n.random), func, control=list(fnscale=-1) )
                  
      obj <- optim(obj$par, func, method = c("BFGS"),
control=list(fnscale=-1), hessian = T )                
                                                
      mean <- obj$par;      
      var.half <-  solve(-obj$hessian)*1.2;    
      var.half <- t( chol(var.half) );        
          
      n.simu <- 1000  #sample size of importance sampling
      
      sum.weight <- 0;    
      uu <- matrix(0, n.random, n.random);  
      pi <- numeric(n);
      product <- numeric(n);
           
      func <- one.score.Hessian(D.u.inv, ada.part, x, y, z);    
      for(i in 1:n.simu)
      {
        obj1 <- rmulti.norm(mean, var.half);
        obj2 <- func(obj1$ran); 
        weight <- exp(obj2$log.likelihood - obj1$log.prob);
        
        uu <- uu + my.outer(obj1$ran) * weight;     
        pi <- pi + obj2$pi * weight;  
        product <- product + obj2$pi*(1-obj2$pi) * weight; 
                
        sum.weight <- sum.weight + weight;  
      }
         
      uu <- uu/sum.weight;
      pi <- pi/sum.weight;
      product <- product/sum.weight; 
      
      score <- as.vector( (y - pi) %*% x );   
      Hessian <- -t(x) %*% diag(product) %*% x;    
      
      return(uu, score, Hessian);
    } 
     
      one.log.like <- function(D.u.inv, ada.part, y, z)
      { 
        function(u)
        {
          pp <- exp(  ada.part + as.vector( z %*% u )  );
          pp <- pp/(1+pp);             
          -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 -
y)*log(1-pp) );  
        }  
     }
     
    one.score.Hessian <- function(D.u.inv, ada.part, x, y, z)
    {
       function(u)
       {
         pi <- exp(  ada.part + as.vector( z %*% u )  );
         pi <- pi/(1 + pi);           
         log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum(
y*log(pi) + (1-y)*log(1-pi) ); 
                
         return(pi, log.likelihood);
       }
     }    
 
  main <- function()
  {
    b <- c(.5 , 1, -1, -.5);
    if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) );
    D.u <- c(.5, 0, 0, .25); 
    D.u <- matrix(D.u, nrow=2);  

    x <- array(0, c(m, n, n.fixed) );  
    x[, , 1] <- 1;
  
    for(j in 1:n) x[, j, 2] <- j-5;  
    x[1:trunc(m/2), , 3] <- 0;
    x[trunc(m/2+1):m, , 3] <- 1;

    x[, , 4] <- x[, , 2]*x[, , 3];  
    if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) );
  
    z <- x[, , 1:2];      

    for(i in 1:50)
    {    
      y <- glmm.sample.y(x, z, b, D.u); 
      
      obj <- mle.b.D.u(b, D.u, y, x, z); print("mle"); print(obj$D.u); 
 
      write(obj$D.u, file = "simu1.dat", append=T);  
      
      obj <- REML.b.D.u(b, D.u, y, x, z); print("REML");
print(obj$D.u);            
      write(obj$D.u, file = "simu2.dat", append=T);  
    } 
  }     
###################################################
#global variable m, n, n.fixed, n.random
  
  n <- 10;
  m <- 25;

  n.fixed <- 4;
  n.random <- 2;
 
  main();
  print(date());




=====
Jason G. Liao
Department of Biometry and Epidemiology
Medical University of South Carolina
135 Rutledge Ave., STE 1148, Charleston, SC 29425
phone (843) 876-1114, fax (843) 876-1126

http://www.geocities.com/jg_liao/index.html

__________________________________________________
Do You Yahoo!?
Get email at your own domain with Yahoo! Mail. 
http://personal.mail.yahoo.com/
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._



More information about the R-help mailing list