[R] predict.poly for multivariate data

Fri Jul 10 12:18:20 CEST 2015

A recent stackoverflow post 
<http://stackoverflow.com/questions/31134985> "How do you make R poly() 
evaluate (or “predict”) multivariate new data (orthogonal or raw)?" made 
me look at poly and polym again.

predict.poly doesn't work with multivariate data because for such data 
poly calls polym which does not:
a) return an object inheriting from class poly;
b) return the coefficients needed to make predictions;
c) accept coefficients as an argument or include code to make predictions.

This does lead to some wrong answers without warnings. e.g.
################## vanilla poly and polym ###########
library(datasets)
alm <- lm(stack.loss ~ poly(Air.Flow, Water.Temp, degree=3), stackloss)
# "correct" prediction values [1:10]
alm$fitted.values[1:10]
# predict(alldata)[1:10] gives correct values
predict(alm, stackloss)[1:10]
# predict(partdata) gives wrong values
predict(alm, stackloss[1:10,])
#########
I guess - but haven't confirmed - that with multivariate newdata 
predict.lm(alm, newdata) calculates new orthogonal polynomials based on 
newdata rather than applying the original coefficients.

Below I append versions of:
a) polym edited to address the three points above;
b) poly slightly edited to reflect the changes in polym;
c) predict.poly unaltered, just to get it in the same environment as 
polym and poly for testing.
After implementing these the three sets of predictions above all agree.

I'm very ready to believe that I've got the wrong end of the stick 
and/or my suggestions can be improved so I welcome correction.

Otherwise, how do I go about getting these changes implemented?
I see stats is maintained by R Core Team. Are they likely to pick it up 
from here, or do I need to take any other action?

Best regards

Keith Jewell

### polym ##############################################
polym <- function (..., degree = 1, coefs = NULL, raw = FALSE)
# add coefs argument
{
   if(is.null(coefs)) {
     dots <- list(...)
     nd <- length(dots)
     if (nd == 0)
       stop("must supply one or more vectors")
     if (nd == 1)
       return(poly(dots[[1L]], degree, raw = raw))
     n <- sapply(dots, length)
     if (any(n != n[1L]))
       stop("arguments must have the same length")
     z <- do.call("expand.grid", rep.int(list(0:degree), nd))
     s <- rowSums(z)
     ind <- (s > 0) & (s <= degree)
     z <- z[ind, ]
     s <- s[ind]
     aPoly <- poly(dots[[1L]], degree, raw = raw) # avoid 2 calcs
     res <- cbind(1, aPoly)[, 1 + z[, 1]]
# attribute "coefs" = list of coefs from individual variables
     if (!raw) coefs <- list(attr(aPoly, "coefs"))
     for (i in 2:nd) {
       aPoly <- poly(dots[[i]], degree, raw = raw)
       res <- res * cbind(1, aPoly)[, 1 + z[, i]]
       if (!raw) coefs <- c(coefs, list(attr(aPoly, "coefs")))
     }
     colnames(res) <- apply(z, 1L, function(x) paste(x, collapse = "."))
     attr(res, "degree") <- as.vector(s)
     if (!raw) attr(res, "coefs") <- coefs
     class(res) <- c("poly", "matrix") # add poly class
     res
   }
   else
   {
     nd <- length(coefs)    # number of variables
     newdata <- as.data.frame(list(...)) # new data
     if (nd != ncol(newdata)) stop("wrong number of columns in newdata")
     z <- do.call("expand.grid", rep.int(list(0:degree), nd))
     s <- rowSums(z)
     ind <- (s > 0) & (s <= degree)
     z <- z[ind, ]
     res <- cbind(1, poly(newdata[[1]], degree=degree, 
coefs=coefs[[1]]))[, 1 + z[, 1]]
     for (i in 2:nd) res <- res*cbind(1, poly(newdata[[i]], 
degree=degree, coefs=coefs[[i]]))[, 1 + z[, i]]
     colnames(res) <- apply(z, 1L, function(x) paste(x, collapse = "."))
     res
   }
}
######################

#### poly ##################
poly <- function (x, ..., degree = 1, coefs = NULL, raw = FALSE)
{
   dots <- list(...)
   if (nd <- length(dots)) {
     if (nd == 1 && length(dots[[1L]]) == 1L)
       degree <- dots[[1L]]
# pass coefs argument as well
     else return(polym(x, ..., degree = degree, coefs=coefs, raw = raw))
   }
   if (is.matrix(x)) {
     m <- unclass(as.data.frame(cbind(x, ...)))
# pass coefs argument as well
     return(do.call("polym", c(m, degree = degree, raw = raw, 
list(coefs=coefs))))
   }
   if (degree < 1)
     stop("'degree' must be at least 1")
   if (anyNA(x))
     stop("missing values are not allowed in 'poly'")
   n <- degree + 1
   if (raw) {
     Z <- outer(x, 1L:degree, "^")
     colnames(Z) <- 1L:degree
     attr(Z, "degree") <- 1L:degree
     class(Z) <- c("poly", "matrix")
     return(Z)
   }
   if (is.null(coefs)) {
     if (degree >= length(unique(x)))
       stop("'degree' must be less than number of unique points")
     xbar <- mean(x)
     x <- x - xbar
     X <- outer(x, seq_len(n) - 1, "^")
     QR <- qr(X)
     if (QR$rank < degree)
       stop("'degree' must be less than number of unique points")
     z <- QR$qr
     z <- z * (row(z) == col(z))
     raw <- qr.qy(QR, z)
     norm2 <- colSums(raw^2)
     alpha <- (colSums(x * raw^2)/norm2 + xbar)[1L:degree]
     Z <- raw/rep(sqrt(norm2), each = length(x))
     colnames(Z) <- 1L:n - 1L
     Z <- Z[, -1, drop = FALSE]
     attr(Z, "degree") <- 1L:degree
     attr(Z, "coefs") <- list(alpha = alpha, norm2 = c(1,
                                                       norm2))
     class(Z) <- c("poly", "matrix")
   }
   else {
     alpha <- coefs$alpha
     norm2 <- coefs$norm2
     Z <- matrix(, length(x), n)
     Z[, 1] <- 1
     Z[, 2] <- x - alpha[1L]
     if (degree > 1)
       for (i in 2:degree) Z[, i + 1] <- (x - alpha[i]) *
       Z[, i] - (norm2[i + 1]/norm2[i]) * Z[, i - 1]
     Z <- Z/rep(sqrt(norm2[-1L]), each = length(x))
     colnames(Z) <- 0:degree
     Z <- Z[, -1, drop = FALSE]
     attr(Z, "degree") <- 1L:degree
     attr(Z, "coefs") <- list(alpha = alpha, norm2 = norm2)
     class(Z) <- c("poly", "matrix")
   }
   Z
}
################################################

### predict.poly ####
predict.poly <- function (object, newdata, ...)
{
   if (missing(newdata))
     return(object)
   if (is.null(attr(object, "coefs")))
     poly(newdata, degree = max(attr(object, "degree")), raw = TRUE)
   else poly(newdata, degree = max(attr(object, "degree")),
             coefs = attr(object, "coefs"))
}
######################