SlidesRLec7

Stat 302 Statistical Software and Its...

=

spirit <- read.csv("SpiritStLouis.csv",header=T) names(spirit) [1] "gas" "weight" "headwind" "TO.distance" par(mfrow=c(1,2)) with(spirit,plot(gas,weight)) fit <- with(spirit,lm(weight~gas)) fit\$coef (Intercept) gas 2378.137922 6.063311 # pretty good!!? abline(fit,col="blue") qqnorm(fit\$residuals) qqline(fit\$residuals)
50 100 150 200 250 300 3000 3500 4000 gas weight -1.0 -0.5 0.0 0.5 1.0 -5 0 5 Normal Q-Q Plot Theoretical Quantiles Sample Quantiles

y = weight x = gas y i = α + β x i + i i i . i . d . N ( , σ ) i i th α β n X i = ( y i - α - β x i ) = n X i = (( y i - ¯ y ) + (¯ y - β ¯ x - α ) - β ( x i - ¯ x )) = n X i = ( y i - ¯ y ) + n y - β ¯ x - α ) + β SXX z }| { n X i = ( x i - ¯ x ) - β SXY z }| { n X i = ( x i - ¯ x )( y i - ¯ y )
= n X i = ( y i - ¯ y ) + n y - β ¯ x - α ) + SXX β - β SXY SXX + SXY SXX ! - SXX SXY SXX = n X i = ( y i - ¯ y ) + n y - β ¯ x - α ) + β - SXY SXX - SXY SXX β = ˆ β = SXY SXX α = ˆ α = ¯ y - ˆ β ¯ x ˆ y i = ˆ α + ˆ β x i = ¯ y + ˆ β ( x i - ¯ x ) ¯ ˆ y = ¯ y RSS = n X i = ( y i - ˆ y i ) = SYY - SXY SXX

SYY = n X i = ( y i - ¯ y ) = n X i = ( y i - ˆ y i + ˆ y i - ¯ y ) = n X i = ( y i - ˆ y i ) + n X i = y i - ¯ y ) + n X i = ( y i - ˆ y i )(ˆ y i - ¯ y ) = RSS + SS reg SS reg n X i = ( y i - ˆ y i )(ˆ y i - ¯ y ) = n X i = ( y i - ¯ y - ˆ β ( x i - ¯ x ))( x i - ¯ x ) ˆ β = ˆ β SXY - ˆ β SXX = SYY - RSS = SS reg = SXY SXX
R R = SYY - RSS SYY = SS reg SYY = SXY SXX · SYY = - RSS / ( n - ) SYY / ( n - ) = Y X R x y r = SXY / SXX · SYY R = ¯ R n - RSS ¯ R = - RSS / ( n - ) SYY / ( n - ) = - n - n - ( - R )

with(spirit,plot(gas,weight)) # avoids the clumsy plot(spirit\$gas,spirit\$weight, xlab="gas",ylab="weight") # similarly, use fit <- with(spirit,lm(weight~gas)) # but not with(spirit,fit <- lm(weight~gas)) # Hall’s report gives gasoline at # 6.12 lbs per gallon # It would seem that they figured # the weight of aircraft from that
with(spirit, plot(TO.distance,weight, xlim=c(200,3000),ylim=c(2000,6000))) x <- seq(200,3000,10) y <- 10^2.6503023 * x^0.3237002; lines(x,y) points(c(2000,3000),c(5000,5500),pch=16,col="red") with(spirit, plot(TO.distance,weight,log="xy", xlim=c(200,3000),ylim=c(2000,6000))) fit <- with(spirit, lm(log10(weight)~log10(TO.distance))) fit\$coef (Intercept) log10(TO.distance) 2.6503023 0.3237002 abline(fit,col="blue") points(c(2000,3000),c(5000,5500),pch=16,col="red") qqnorm(fit\$residuals); qqline(fit\$residuals) with(spirit,plot(headwind,fit\$residuals)) abline(h=0)

500 1000 1500 2000 2500 3000 2000 3000 4000 5000 6000 TO.distance weight 200 500 1000 2000 2000 3000 4000 5000 6000 TO.distance weight
-1.0 -0.5 0.0 0.5 1.0 -0.004 -0.002 0.000 0.002 Normal Q-Q Plot Theoretical Quantiles Sample Quantiles 0 2 4 6 8 -0.004 -0.002 0.000 0.002 headwind fit\$residuals

log = "xy" # plots both axes on log10 basis, note ticks ( y ) = α + β ( x ) ⇐⇒ y = α · x β headwind x y
Davis car install.packages("car") library(car) > names(Davis) [1] "sex" "weight" "height" "repwt" "repht" > attach(Davis) # allows using weight in place of Davis\$weight > Davis.model <- lm(weight~repwt) > summary(Davis.model)

Call: lm(formula = weight ~ repwt) Residuals: Min 1Q Median 3Q Max -7.048 -1.868 -0.728 0.601 108.705 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 5.3363 3.0369 1.757 0.0806 . repwt 0.9278 0.0453 20.484 <2e-16 *** --- Signif. codes: 0 ’ *** ’ 0.001 ’ ** ’ 0.01 ’ * ’ 0.05 ’.’ 0.1 ’ ’ 1 Residual standard error: 8.419 on 181 degrees of freedom (17 observ. deleted due to missingness) Multiple R-squared: 0.6986, Adj. R-squared: 0.697 F-stat.: 419.6 on 1 and 181 DF, p-value: < 2.2e-16
> plot(repwt,weight) > abline(Davis.model) > abline(0,1,lty=2)

