[R] How can I avoid the for and If loops in my function?
Mramba,Lazarus K
lmramba at ufl.edu
Wed Jun 18 21:14:17 CEST 2014
Hi R-users,
### reproducible example:
library(gmp)
library(matlab)
library(Matrix)
library(foreach)
library(MASS)
library(mvtnorm)
#####################################
## A function to create a field experiment
## Returns a data frame, called matdf
######################################
setup<-function(b,g,rb,cb,r,c)
{
# where
# b = number of blocks
# g = number of treatments per block
# rb = number of rows per block
# cb = number of columns per block
# r = total number of rows for the layout
# c = total number of columns for the layout
### Check points
stopifnot(is.numeric(b),is.whole(b),is.numeric(g),g>1)
## Compatibility checks
genot<<-seq(1,g,1)
stopifnot(rb*cb==length(genot),r/rb * c/cb == b)
## Generate the design
genotypes<-times(b) %do% sample(genot,g)
block<-rep(1:b,each=length(genot))
genotypes<-factor(genotypes)
block<-factor(block)
### generate the base design
k<-c/cb # number of blocks on the x-axis
y<-rep(rep(1:r,each=cb),k) # X-coordinate
#w<-rb
l<-cb
p<-r/rb
m<-l+1
d<-l*b/p
x<-c(rep(1:l,r),rep(m:d,r)) # Y-coordinate
## compact
matdf<-data.frame(x,y,block,genotypes)
matdf[order(matdf$x),]
}
#########################################################################
## a function to calculate trace from the original data frame
## Returns trace of the variance-covariance matrix, named C22
######################################################################
mainF<-function(matdf,h2=h2,rhox=rhox,rhoy=rhoy,ped="F",s2e=1,c=c,r=r)
{
N<-nrow(matdf)
## Identity matrices
X<<-model.matrix(~matdf$block)
s2g<-varG(s2e,h2)
## calculate G and Z
IG<<-(1/s2g)*eye(length(genot))
z<-model.matrix(~matdf$genotypes-1) # changes everytime there is a
swap
## calculate R and IR
# rhox=rhoy=0.3;s2e=1
# # calculate R and IR
sigx <- diag(c)
sigx <- rhox^ abs(row(sigx) - col(sigx))
sigy <- diag(r)
sigy <- rhoy ^ abs(row(sigy) - col(sigy))
R<- s2e * kronecker(sigx, sigy) # takes 0.01 second
################
# find inverse of R by choleski decomposition
IR<<-chol2inv(chol(R)) # takes about 20 seconds
####
#### brute force matrix multiplication
C11<-t(X)%*%IR%*%X
C11inv<-chol2inv(chol(C11))
k1<<-IR%*%X # 0.2 seconds
k2<-C11inv%*%t(X) # 0 seconds
k3<-k2%*%IR # 0.2 seconds
K<<-k1%*%k3 # 0.16 seconds
### Variance covariance matrices
temp<-t(z)%*%IR%*%z+IG - t(z)%*%K%*%z
C22<-chol2inv(chol(temp))
##########################
## Optimality Criteria
#########################
traceI=sum(diag(C22)) # A-Optimality
traceI
}
## ################################################
### My function to randomly swap two elements in the same block of a
dataframe
### returns a dataframe, called newmatdf
####################################################
swapsimple<-function(matdf)
{
## now, new design after swapping is
attach(matdf,warn.conflict=FALSE)
b1<-sample(matdf$block,1,replace=TRUE);b1
gg1<-matdf$genotypes[block==b1];gg1
g1<-sample(gg1,2);g1
samp<-Matrix(c(g1=g1,block=b1),nrow=1,ncol=3,
dimnames=list(NULL,c("gen1","gen2","block")));samp
newGen<-matdf$genotypes
newG<-ifelse(matdf$genotypes==samp[,1] &
block==samp[,3],samp[,2],matdf$genotypes)
NewG<-ifelse(matdf$genotypes==samp[,2] &
block==samp[,3],samp[,1],newG)
NewG<-factor(NewG)
## now, new design after swapping is
newmatdf<-cbind(matdf,NewG)
newmatdf<-as.data.frame(newmatdf)
names(newmatdf)[names(newmatdf)=="genotypes"] <- "old_G"
names(newmatdf)[names(newmatdf)=="NewG"] <- "genotypes"
#newmatdf <- remove.vars(newmatdf, "old_G")
newmatdf$old_G <- newmatdf$old_G <- NULL
newmatdf[order(newmatdf$x),]
}
#############################################################
### My function to re-calculate trace after swaping the pairs of
elements
################################################
swapmainF<-function(newmatdf)
{
Z<-model.matrix(~newmatdf$genotypes-1)
### Variance covariance matrices
temp<-t(Z)%*%IR%*%Z+IG - t(Z)%*%K%*%Z
C22<-chol2inv(chol(temp))
##########################
## Optimality Criteria
#########################
traceI=sum(diag(C22)) # A-Optimality
traceI
}
#######################################
#I need help in the function below
## I need to avoid the for loops and if loops here :
########################################################
########################################################
#Take an original matrix, called matrix0, calculate its trace. Generate
#a new matrix, called newmatdf after swapping two elements of the old
#one and calculate the trace. If the trace of the newmatrix is smaller
#than that of the previous matrix, store both the current trace
together with
#the older trace and their iteration values. If the newer matrix has a
#trace larger than the previous trace, drop this trace and drop this
#matrix too (but count its iteration).
#Re-swap the old matrix that you stored previously and recalculate the
#trace. Repeat the process many times, say 10,000. The final results
should be a list
#with the original initial matrix and its trace, the final best
#matrix that had the smallest trace after the 10000 simulations and a
#dataframe showing the values of the accepted traces that
#were smaller than the previous and their respective iterations.
####################################################################
funF<- function(newmatdf,n,traceI)
{
matrix0<-newmatdf
trace<-traceI
res <- list(mat = NULL, Design_best = newmatdf, Original_design =
matrix0)
res$mat <- rbind(res$mat, c(value = trace, iterations = 0))
Des<-list()
for(i in seq_len(n)){
ifelse(i==1,
newmatdf<-swapsimple(matrix0),newmatdf<-swapsimple(newmatdf))
Des[[i]]<-newmatdf
if(swapmainF(newmatdf) < trace){
newmatdf<-Des[[i]]
Des[[i]]<-newmatdf
trace<- swapmainF(newmatdf)
res$mat <- rbind(res$mat, c(trace = trace, iterations = i))
res$Design_best <- newmatdf
}
if(swapmainF(newmatdf) > trace & nrow(res$mat)<=1){
newmatdf<-matrix0
Des[[i]]<-matrix0
res$Design_best<-matrix0
}
if(swapmainF(newmatdf)> trace & nrow(res$mat)>1){
newmatdf<-Des[[length(Des)-1]]
Des[[i]]<-newmatdf
res$Design_best<-newmatdf
}
}
res
}
######################################
## Call the function setup, generate 100 designs,
## calculate their traces using the function mainF,
## choose the dataframe with the smallest trace
## store only that dataframe and its trace for later use
######################################
M2F<-function(D,ped="F",k=1,b,g,rb,cb,r,c,
h2,rhox,rhoy,s2e=1)
{
matrix0<-list()
start0 <- c()
value0 <- c()
for (i in 1:D)
{
print(sprintf("generating initial design: %d", i, "complete\n",
sep=""))
flush.console()
matrix0[[i]]<-setup(b=b,g=g,rb=rb,cb=cb,r=r,c=c)
start0[i]<-mainF(matdf=matrix0[[i]],h2=h2,rhox=rhox,rhoy=rhoy,s2e=s2e,c=c,r=r)
s<-which.min(start0)
newmatdf<-matrix0[s][[1]]
trace0<-start0[s][[1]]
}
list(newmatdf=newmatdf,start0=start0,trace0=trace0,index=s)
}
################################################
#### Test my functions ### works perfectly but takes too long
###########################################
b=16;g=196;rb=14;cb=14;r=56;c=56;h2=0.1;rhox=0.3;rhoy=0.3
h2=0.1;rhox=0.3;rhoy=0.3;s2e=1
#
tic() # takes 42.020000 seconds for D==2. but for D==100 , takes
about 30 minutes !!!
res1<-M2F(D=2,ped="F",k=1,b=b,g=g,rb=rb,cb=cb,r=r,c=c,
h2=0.1,rhox=0.3,rhoy=0.3,s2e=1)
toc()
tic() # takes 37.720000 seconds for n==5 but I need for n==4000 or more
takes >7hours
ans1<-funF(res1$newmatdf,traceI=res1$trace0,n=5)
toc()
ans1$mat
regards,
Laz
More information about the R-help
mailing list