Remeber that the variance inflation factor (VIF) is given by

where \(R^2_j\) is the \(R^2\) from the regression of \(X_j\) on the other covariates.

However, when first learning about VIF is can be hard to comprehend. To make it easier some illustrations are often nice.

Below is some code to illustrate the variance inflation factor in R.

library ( ggplot2 )
library ( reshape )
require ( tikzDevice )
set.seed ( 32 ) # Set the seed for reproducible results
sims <- 1000 # Set the number of simulations at the top of the script
V .1 <- array ( 0 , dim = c ( sims , 3 ))
B .1 <- array ( 0 , dim = c ( sims , 3 ))
V .2 <- array ( 0 , dim = c ( sims , 3 ))
B .2 <- array ( 0 , dim = c ( sims , 3 ))
V .3 <- array ( 0 , dim = c ( sims , 3 ))
B .3 <- array ( 0 , dim = c ( sims , 3 ))
a <- 1 # True value for the intercept
b 1 <- 4 # True value for the slope
b 2 <- 2
n <- 100 # sample size
p <- 0.1
for ( i in 1 : sims ){ # Start the loop
u <- rnorm ( n , 0 , 1 )
v <- rnorm ( n , 0 , 1 )
x 1 <- 2 * u * +2
x 2 <- 2 * ( p * u + sqrt ( 1 - p ^ 2 ) * v ) +2
Y <- a + b 1 * x 1 + b 2 * x 2 + rnorm ( n , 0 , 1 ) # The true DGP, with N(0, 1) error
model <- lm ( Y ~ x 1 + x 2 ) # Estimate OLS Model
B .1 [ i ,] <- model $ coef # store coefficients
V .1 [ i ,] <- sqrt ( diag ( vcov ( model ))) # store variance
}
p <- 0.5
for ( i in 1 : sims ){ # Start the loop
u <- rnorm ( n , 0 , 1 )
v <- rnorm ( n , 0 , 1 )
x 1 <- 2 * u * +2
x 2 <- 2 * ( p * u + sqrt ( 1 - p ^ 2 ) * v ) +2
Y <- a + b 1 * x 1 + b 2 * x 2 + rnorm ( n , 0 , 1 ) # The true DGP, with N(0, 1) error
model <- lm ( Y ~ x 1 + x 2 ) # Estimate OLS Model
B .2 [ i ,] <- model $ coef # store coefficients
V .2 [ i ,] <- sqrt ( diag ( vcov ( model ))) # store variance
} # End loop
p <- 0.9
for ( i in 1 : sims ){ # Start the loop
u <- rnorm ( n , 0 , 1 )
v <- rnorm ( n , 0 , 1 )
x 1 <- 2 * u * +2
x 2 <- 2 * ( p * u + sqrt ( 1 - p ^ 2 ) * v ) +2
Y <- a + b 1 * x 1 + b 2 * x 2 + rnorm ( n , 0 , 1 ) # The true DGP, with N(0, 1) error
model <- lm ( Y ~ x 1 + x 2 ) # Estimate OLS Model
B .3 [ i ,] <- model $ coef # store coefficients
V .3 [ i ,] <- sqrt ( diag ( vcov ( model ))) # store variance
} # End loop
B 1 <- data.frame ( B .1 )
V 1 <- data.frame ( V .1 )
B 1 $ corr <- "A"
V 1 $ corr <- "A"
B 2 <- data.frame ( B .2 )
V 2 <- data.frame ( V .2 )
B 2 $ corr <- "B"
V 2 $ corr <- "B"
B 3 <- data.frame ( B .3 )
V 3 <- data.frame ( V .3 )
B 3 $ corr <- "C"
V 3 $ corr <- "C"
B <- rbind ( B 1 , B 2 , B 3 )
V <- rbind ( V 1 , V 2 , V 3 )
tikz ( 'densityb1.tex' , standAlone = FALSE , width = 4 , height = 2.5 )
ggplot ( B , aes ( x = X 2 , colour = corr )) +
geom_density () +
scale_colour_discrete ( name = "Correlation" ,
breaks = c ( "A" , "B" , "C" ),
labels = c ( "Low" , "Medium" , "High" )) +
xlab ( "" ) +
ylab ( "Density" )
dev.off ()
ggplot ( B , aes ( x = X 3 , colour = corr )) +
geom_density () +
scale_colour_discrete ( name = "Correlation" ,
breaks = c ( "A" , "B" , "C" ),
labels = c ( "Low" , "Medium" , "High" )) +
xlab ( "" ) +
ylab ( "Density" )

R code to illuatrate VIF in OLS was published on June 20, 2014 and last modified on October 10, 2014 .