ylab
=
"Absolute Residuals"
)
> mtext(
"Figure 5.3.1 Tree Data - Residual versus Fitted Plots"
,
cex
=
1.25
,
outer
=
TRUE
)
> par(
mfrow
=c(
1
,
1
))
> dev.off()
null device
1
>
>
###################################################################
>
# Example patterns of Q-Q plot based on simulated data from
#
>
# Normal distribution
#
>
# Lognormal (skewed) distribution
#
>
# Cauchy (long tailed) distribution
#
>
# Uniform (short tailed) distribution
#
>
###################################################################
>
> postscript(
"LecOct16fig5.ps"
)
> par(
mfrow
=c(
2
,
2
),
oma
=c(
0
,
0
,
2
,
2
))
> qqnorm(rnorm(
100
),
main
=
"Q-Q plot, sample from a normal distribution"
)
> qqnorm(exp(rnorm(
100
)),
main
=
"Q-Q plot, sample from a skewed distribution"
)
> qqnorm(rcauchy(
100
),
main
=
"Q-Q plot, sample from a heavy tailed distribution"
)
> qqnorm(runif(
100
),
main
=
"Q-Q plot, sample from a light tailed distribution"
)
> mtext(
"Figure 5.4.1 Examples of Q-Q Plots"
,
cex
=
1.25
,
outer
=
TRUE
)
> par(
mfrow
=c(
1
,
1
))
> dev.off()
null device
1
10

>
>
>
#####################################################################
>
# Non-parametric Local Regression by LOWESS
#
>
# Fit the LOWESS curve to Volume and Girth
#
>
# Fit the LOWESS curve to Volume and Height
#
>
# Note: this is done by considering each explanatory variable
#
>
# individually, but not all together as in the multiple linear
#
>
# regression case
#
>
#####################################################################
> postscript(
"LecOct16fig6.ps"
)
> par(
mfrow
=c(
1
,
2
),
oma
=c(
0
,
0
,
2
,
2
))
> plot(
tree
$
Girth
,
tree
$
Volume
,
xlab
=
"Girth (inches)"
,
ylab
=
"Volume (cubic feet)"
)
> lines(lowess(
tree
$
Girth
,
tree
$
Volume
,
f
=
0.6
),
col
=
3
)
> plot(
tree
$
Height
,
tree
$
Volume
,
xlab
=
"Height (feet)"
,
ylab
=
"Volume (cubic feet)"
)
> lines(lowess(
tree
$
Height
,
tree
$
Volume
,
f
=
0.2
),
col
=
3
)
> mtext(
"Figure 5.2.4 Tree Data - Volume Versus Predictor Plots"
,
cex
=
1.25
,
outer
=
TRUE
)
> par(
mfrow
=c(
1
,
1
))
> dev.off()
null device
1
11

Lecture Oct. 18 - Handout
5.5
Residual plots for detecting correlation in
i
’s
None of the diagnostic plots discussed so far has questioned the assumption that the
random errors are uncorrelated. In general, checking this assumption is very difficult,
if not impossible, by inspection of the data.
Scrutiny of the data collection method is
often all that one can do. For example, if the tree data include adjacent trees, one tree
might shade its neighbour leading to correlation. Care in selecting trees that are widely
separated would make the assumption of uncorrelated errors more credible.
Only if there is some structure to the correlations that might exist, do we have some basis
for checking. For example temporally (or spatially) related data, it is often reasonable
to suspect that observations close together in time (or in space) are the most likely to be
correlated (e.g. daily stock price data, temperature data, etc). It is then wise to check the
uncorrelated assumption.
If this assumption is violated, the first-order property of the least square estimate
ˆ
β
will
not be affected (e.g.
E
(
ˆ
β
) =
β
), but the second-order, variance properties will be. As
a matter of fact, a fairly small correlation between the errors may lead to the estimated
variance of
ˆ
β
being an order of magnitude wrong, hence there is potential for standard
errors to be very wrong with corresponding effects on confidence intervals , etc. The rea-
son for this is that, although the correlation is small, there are many pairwise correlations
contributing to the true variance.
Graphical checks for correlation in
i
’s include plots of the residual
r
against time and
r
i