[R] predict.poly for multivariate data
Keith Jewell
Keith.Jewell at campdenbri.co.uk
Fri Jul 10 12:18:20 CEST 2015
A recent stackoverflow post
<http://stackoverflow.com/questions/31134985> "How do you make R poly()
evaluate (or “predict”) multivariate new data (orthogonal or raw)?" made
me look at poly and polym again.
predict.poly doesn't work with multivariate data because for such data
poly calls polym which does not:
a) return an object inheriting from class poly;
b) return the coefficients needed to make predictions;
c) accept coefficients as an argument or include code to make predictions.
This does lead to some wrong answers without warnings. e.g.
################## vanilla poly and polym ###########
library(datasets)
alm <- lm(stack.loss ~ poly(Air.Flow, Water.Temp, degree=3), stackloss)
# "correct" prediction values [1:10]
alm$fitted.values[1:10]
# predict(alldata)[1:10] gives correct values
predict(alm, stackloss)[1:10]
# predict(partdata) gives wrong values
predict(alm, stackloss[1:10,])
#########
I guess - but haven't confirmed - that with multivariate newdata
predict.lm(alm, newdata) calculates new orthogonal polynomials based on
newdata rather than applying the original coefficients.
Below I append versions of:
a) polym edited to address the three points above;
b) poly slightly edited to reflect the changes in polym;
c) predict.poly unaltered, just to get it in the same environment as
polym and poly for testing.
After implementing these the three sets of predictions above all agree.
I'm very ready to believe that I've got the wrong end of the stick
and/or my suggestions can be improved so I welcome correction.
Otherwise, how do I go about getting these changes implemented?
I see stats is maintained by R Core Team. Are they likely to pick it up
from here, or do I need to take any other action?
Best regards
Keith Jewell
### polym ##############################################
polym <- function (..., degree = 1, coefs = NULL, raw = FALSE)
# add coefs argument
{
if(is.null(coefs)) {
dots <- list(...)
nd <- length(dots)
if (nd == 0)
stop("must supply one or more vectors")
if (nd == 1)
return(poly(dots[[1L]], degree, raw = raw))
n <- sapply(dots, length)
if (any(n != n[1L]))
stop("arguments must have the same length")
z <- do.call("expand.grid", rep.int(list(0:degree), nd))
s <- rowSums(z)
ind <- (s > 0) & (s <= degree)
z <- z[ind, ]
s <- s[ind]
aPoly <- poly(dots[[1L]], degree, raw = raw) # avoid 2 calcs
res <- cbind(1, aPoly)[, 1 + z[, 1]]
# attribute "coefs" = list of coefs from individual variables
if (!raw) coefs <- list(attr(aPoly, "coefs"))
for (i in 2:nd) {
aPoly <- poly(dots[[i]], degree, raw = raw)
res <- res * cbind(1, aPoly)[, 1 + z[, i]]
if (!raw) coefs <- c(coefs, list(attr(aPoly, "coefs")))
}
colnames(res) <- apply(z, 1L, function(x) paste(x, collapse = "."))
attr(res, "degree") <- as.vector(s)
if (!raw) attr(res, "coefs") <- coefs
class(res) <- c("poly", "matrix") # add poly class
res
}
else
{
nd <- length(coefs) # number of variables
newdata <- as.data.frame(list(...)) # new data
if (nd != ncol(newdata)) stop("wrong number of columns in newdata")
z <- do.call("expand.grid", rep.int(list(0:degree), nd))
s <- rowSums(z)
ind <- (s > 0) & (s <= degree)
z <- z[ind, ]
res <- cbind(1, poly(newdata[[1]], degree=degree,
coefs=coefs[[1]]))[, 1 + z[, 1]]
for (i in 2:nd) res <- res*cbind(1, poly(newdata[[i]],
degree=degree, coefs=coefs[[i]]))[, 1 + z[, i]]
colnames(res) <- apply(z, 1L, function(x) paste(x, collapse = "."))
res
}
}
######################
#### poly ##################
poly <- function (x, ..., degree = 1, coefs = NULL, raw = FALSE)
{
dots <- list(...)
if (nd <- length(dots)) {
if (nd == 1 && length(dots[[1L]]) == 1L)
degree <- dots[[1L]]
# pass coefs argument as well
else return(polym(x, ..., degree = degree, coefs=coefs, raw = raw))
}
if (is.matrix(x)) {
m <- unclass(as.data.frame(cbind(x, ...)))
# pass coefs argument as well
return(do.call("polym", c(m, degree = degree, raw = raw,
list(coefs=coefs))))
}
if (degree < 1)
stop("'degree' must be at least 1")
if (anyNA(x))
stop("missing values are not allowed in 'poly'")
n <- degree + 1
if (raw) {
Z <- outer(x, 1L:degree, "^")
colnames(Z) <- 1L:degree
attr(Z, "degree") <- 1L:degree
class(Z) <- c("poly", "matrix")
return(Z)
}
if (is.null(coefs)) {
if (degree >= length(unique(x)))
stop("'degree' must be less than number of unique points")
xbar <- mean(x)
x <- x - xbar
X <- outer(x, seq_len(n) - 1, "^")
QR <- qr(X)
if (QR$rank < degree)
stop("'degree' must be less than number of unique points")
z <- QR$qr
z <- z * (row(z) == col(z))
raw <- qr.qy(QR, z)
norm2 <- colSums(raw^2)
alpha <- (colSums(x * raw^2)/norm2 + xbar)[1L:degree]
Z <- raw/rep(sqrt(norm2), each = length(x))
colnames(Z) <- 1L:n - 1L
Z <- Z[, -1, drop = FALSE]
attr(Z, "degree") <- 1L:degree
attr(Z, "coefs") <- list(alpha = alpha, norm2 = c(1,
norm2))
class(Z) <- c("poly", "matrix")
}
else {
alpha <- coefs$alpha
norm2 <- coefs$norm2
Z <- matrix(, length(x), n)
Z[, 1] <- 1
Z[, 2] <- x - alpha[1L]
if (degree > 1)
for (i in 2:degree) Z[, i + 1] <- (x - alpha[i]) *
Z[, i] - (norm2[i + 1]/norm2[i]) * Z[, i - 1]
Z <- Z/rep(sqrt(norm2[-1L]), each = length(x))
colnames(Z) <- 0:degree
Z <- Z[, -1, drop = FALSE]
attr(Z, "degree") <- 1L:degree
attr(Z, "coefs") <- list(alpha = alpha, norm2 = norm2)
class(Z) <- c("poly", "matrix")
}
Z
}
################################################
### predict.poly ####
predict.poly <- function (object, newdata, ...)
{
if (missing(newdata))
return(object)
if (is.null(attr(object, "coefs")))
poly(newdata, degree = max(attr(object, "degree")), raw = TRUE)
else poly(newdata, degree = max(attr(object, "degree")),
coefs = attr(object, "coefs"))
}
######################
More information about the R-help
mailing list