In [1]:
# ----------------------------------------------------------------
# How to do Linear Regression in R Using Stepwise Regression
# ----------------------------------------------------------------

# load data longley data for Econometrics
Data <- as.matrix(longley)


x <- Data[,1:6]
y <- Data[,7]

# -----------------------------
# Using Stepwise Regression
# -----------------------------

# fit model
base <- lm(Employed~., longley)

# summarize the fit

# perform step-wise feature selection
fit <- step(base)

# summarize the selected model

# make predictions
predictions <- predict(fit, longley)

# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)

# Visualise Regression
plot(longley$Employed, predictions)
lm(formula = Employed ~ ., data = longley)

     Min       1Q   Median       3Q      Max 
-0.41011 -0.15767 -0.02816  0.10155  0.45539 

               Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -3.482e+03  8.904e+02  -3.911 0.003560 ** 
GNP.deflator  1.506e-02  8.492e-02   0.177 0.863141    
GNP          -3.582e-02  3.349e-02  -1.070 0.312681    
Unemployed   -2.020e-02  4.884e-03  -4.136 0.002535 ** 
Armed.Forces -1.033e-02  2.143e-03  -4.822 0.000944 ***
Population   -5.110e-02  2.261e-01  -0.226 0.826212    
Year          1.829e+00  4.555e-01   4.016 0.003037 ** 
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3049 on 9 degrees of freedom
Multiple R-squared:  0.9955,	Adjusted R-squared:  0.9925 
F-statistic: 330.3 on 6 and 9 DF,  p-value: 4.984e-10
Start:  AIC=-33.22
Employed ~ GNP.deflator + GNP + Unemployed + Armed.Forces + Population + 

               Df Sum of Sq     RSS     AIC
- GNP.deflator  1   0.00292 0.83935 -35.163
- Population    1   0.00475 0.84117 -35.129
- GNP           1   0.10631 0.94273 -33.305
<none>                      0.83642 -33.219
- Year          1   1.49881 2.33524 -18.792
- Unemployed    1   1.59014 2.42656 -18.178
- Armed.Forces  1   2.16091 2.99733 -14.798

Step:  AIC=-35.16
Employed ~ GNP + Unemployed + Armed.Forces + Population + Year

               Df Sum of Sq    RSS     AIC
- Population    1   0.01933 0.8587 -36.799
<none>                      0.8393 -35.163
- GNP           1   0.14637 0.9857 -34.592
- Year          1   1.52725 2.3666 -20.578
- Unemployed    1   2.18989 3.0292 -16.628
- Armed.Forces  1   2.39752 3.2369 -15.568

Step:  AIC=-36.8
Employed ~ GNP + Unemployed + Armed.Forces + Year

               Df Sum of Sq    RSS     AIC
<none>                      0.8587 -36.799
- GNP           1    0.4647 1.3234 -31.879
- Year          1    1.8980 2.7567 -20.137
- Armed.Forces  1    2.3806 3.2393 -17.556
- Unemployed    1    4.0491 4.9077 -10.908
lm(formula = Employed ~ GNP + Unemployed + Armed.Forces + Year, 
    data = longley)

     Min       1Q   Median       3Q      Max 
-0.42165 -0.12457 -0.02416  0.08369  0.45268 

               Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -3.599e+03  7.406e+02  -4.859 0.000503 ***
GNP          -4.019e-02  1.647e-02  -2.440 0.032833 *  
Unemployed   -2.088e-02  2.900e-03  -7.202 1.75e-05 ***
Armed.Forces -1.015e-02  1.837e-03  -5.522 0.000180 ***
Year          1.887e+00  3.828e-01   4.931 0.000449 ***
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2794 on 11 degrees of freedom
Multiple R-squared:  0.9954,	Adjusted R-squared:  0.9937 
F-statistic: 589.8 on 4 and 11 DF,  p-value: 9.5e-13
[1] 0.05366753