# clean up workspace
rm(list = ls())
Function for sweep and inverse sweep operator, as defined in Section 7.4 of Kenneth Lange’s book Numerical Analysis for Statisticians. It’s only for demostration purpose, without efficiency optimization.
regsweep <- function(A, k, inv = FALSE) {
d <- A[k, k] # sweep out row k, col k
A[k, ] <- A[k, ] / d
b <- A[, k]
b[k] <- 0 # don't change row k here
A <- A - outer(b, A[k, ]) # main operation
A[, k] <- b / d # fix col k
A[k, k] <- - 1 / d # diagonal element & done
if (inv) { # inverse sweep
A[k, ] = - A[k, ]
A[, k] = - A[, k]
}
regsweep <- A
}
Try on the Longley data
# read the Longley data
all <-
read.table("http://hua-zhou.github.io/teaching/st758-2014fall/longley.dat")
Xy <- cbind(rep(1, 16), as.matrix(all[, c(2:7, 1)])) # X and y
colnames(Xy) <- c("intercept", "GNPdef", "GNP", "unemployed", "armed",
"pop", "year", "employed")
Xy
## intercept GNPdef GNP unemployed armed pop year employed
## [1,] 1 83.0 234289 2356 1590 107608 1947 60323
## [2,] 1 88.5 259426 2325 1456 108632 1948 61122
## [3,] 1 88.2 258054 3682 1616 109773 1949 60171
## [4,] 1 89.5 284599 3351 1650 110929 1950 61187
## [5,] 1 96.2 328975 2099 3099 112075 1951 63221
## [6,] 1 98.1 346999 1932 3594 113270 1952 63639
## [7,] 1 99.0 365385 1870 3547 115094 1953 64989
## [8,] 1 100.0 363112 3578 3350 116219 1954 63761
## [9,] 1 101.2 397469 2904 3048 117388 1955 66019
## [10,] 1 104.6 419180 2822 2857 118734 1956 67857
## [11,] 1 108.4 442769 2936 2798 120445 1957 68169
## [12,] 1 110.8 444546 4681 2637 121950 1958 66513
## [13,] 1 112.6 482704 3813 2552 123366 1959 68655
## [14,] 1 114.2 502601 3931 2514 125368 1960 69564
## [15,] 1 115.7 518173 4806 2572 127852 1961 69331
## [16,] 1 116.9 554894 4007 2827 130081 1962 70551
# Grammian matrix
yXXy <- crossprod(Xy)
yXXy
## intercept GNPdef GNP unemployed armed pop
## intercept 16 1627 6.203e+06 5.109e+04 4.171e+04 1.879e+06
## GNPdef 1627 167172 6.467e+08 5.289e+06 4.293e+06 1.921e+08
## GNP 6203175 646700650 2.553e+12 2.065e+10 1.663e+10 7.387e+11
## unemployed 51093 5289080 2.065e+10 1.763e+08 1.315e+08 6.066e+09
## armed 41707 4293174 1.663e+10 1.315e+08 1.160e+08 4.924e+09
## pop 1878784 192139651 7.387e+11 6.066e+09 4.924e+09 2.213e+11
## year 31272 3180540 1.213e+10 9.991e+07 8.154e+07 3.673e+09
## employed 1045072 106816177 4.103e+11 3.362e+09 2.741e+09 1.231e+11
## year employed
## intercept 3.127e+04 1.045e+06
## GNPdef 3.181e+06 1.068e+08
## GNP 1.213e+10 4.103e+11
## unemployed 9.991e+07 3.362e+09
## armed 8.154e+07 2.741e+09
## pop 3.673e+09 1.231e+11
## year 6.112e+07 2.043e+09
## employed 2.043e+09 6.845e+10
# sweep in 1:7
for (k in 1:7) {
yXXy <- regsweep(yXXy, k)
}
# reg coeff, s.e., and SSE appear
print(yXXy)
## intercept GNPdef GNP unemployed armed
## intercept -8.531e+06 1.667e+02 -2.619e-01 -3.912e+00 -1.129e+00
## GNPdef 1.667e+02 -7.759e-02 1.987e-05 2.477e-04 6.829e-05
## GNP -2.619e-01 1.987e-05 -1.207e-08 -1.664e-07 -3.618e-08
## unemployed -3.912e+00 2.477e-04 -1.664e-07 -2.567e-06 -6.965e-07
## armed -1.129e+00 6.829e-05 -3.618e-08 -6.965e-07 -4.940e-07
## pop 8.896e-01 -1.362e-04 6.788e-08 9.009e-07 9.847e-08
## year 4.363e+03 -7.753e-02 1.316e-04 1.973e-03 5.769e-04
## employed -3.482e+06 1.506e+01 -3.582e-02 -2.020e+00 -1.033e+00
## pop year employed
## intercept 8.896e-01 4.363e+03 -3.482e+06
## GNPdef -1.362e-04 -7.753e-02 1.506e+01
## GNP 6.788e-08 1.316e-04 -3.582e-02
## unemployed 9.009e-07 1.973e-03 -2.020e+00
## armed 9.847e-08 5.769e-04 -1.033e+00
## pop -5.499e-07 -4.301e-04 -5.110e-02
## year -4.301e-04 -2.232e+00 1.829e+03
## employed -5.110e-02 1.829e+03 8.364e+05
# then invere sweep out 1:7
for (k in 1:7) {
yXXy <- regsweep(yXXy, k, inv = TRUE)
}
# last tableau should be (save rounding) same as first
print(yXXy)
## intercept GNPdef GNP unemployed armed pop
## intercept 16 1627 6.203e+06 5.109e+04 4.171e+04 1.879e+06
## GNPdef 1627 167172 6.467e+08 5.289e+06 4.293e+06 1.921e+08
## GNP 6203175 646700659 2.553e+12 2.065e+10 1.663e+10 7.387e+11
## unemployed 51093 5289080 2.065e+10 1.763e+08 1.315e+08 6.066e+09
## armed 41707 4293174 1.663e+10 1.315e+08 1.160e+08 4.924e+09
## pop 1878784 192139654 7.387e+11 6.066e+09 4.924e+09 2.213e+11
## year 31272 3180540 1.213e+10 9.991e+07 8.154e+07 3.673e+09
## employed 1045072 106816179 4.103e+11 3.362e+09 2.741e+09 1.231e+11
## year employed
## intercept 3.127e+04 1.045e+06
## GNPdef 3.181e+06 1.068e+08
## GNP 1.213e+10 4.103e+11
## unemployed 9.991e+07 3.362e+09
## armed 8.154e+07 2.741e+09
## pop 3.673e+09 1.231e+11
## year 6.112e+07 2.043e+09
## employed 2.043e+09 6.845e+10
# clean and close
rm(list = ls())