## Question-1 (Sample)

Read Example 3.1 Delivery Time Data.

1. Graphics can be very useful in analyzing the data. Plot two useful visualization of the data. First plot three dimensional scatterplot of delivery time data. Then plot scatterplot matrix (which is an array of 2D plots where each plot is a scatter diagram between two variables).

2. Fit a regression model for the reduced model relating delivery time to number of cases. Plot the joint confidence region of the coefficients (slope and intercept). Also add a point to the plot to show the estimated slope and intercept.

3. Calculate the extra sum of squares due to the regressor variable Distance.

# Computation part of the answer :

x1Cases <- delivery$Cases x2Distance <- delivery$Distance
yTime <- delivery$Time cat ("Part (a) \n") ## Part (a) # 3D scatter diagram library("plot3D") library("scatterplot3d") sc1 <- scatterplot3d(x1Cases, x2Distance, yTime, pch=17 , type = 'p', angle = 15 , highlight.3d = T )  # Plot scatterplot matrix plot(delivery[,-1]) cat("Part (b) \n") ## Part (b) library(ellipse) reducedFit <- lm(Time ~ x1Cases, data = delivery) plot(ellipse(reducedFit), type = "l", xlab = "Intercept", ylab = "Slope", main = "Joint Confidence Region") points (reducedFit$coeff[[1]] , reducedFit$coeff[[2]] ) cat("Part (c) \n") ## Part (c) fullFit <- lm(Time ~ Cases + Distance, data = delivery) reducedSSR <- sum((predict(reducedFit) - mean(yTime))^2) fullSSR <- sum((predict(fullFit) - mean(yTime))^2) cat ( "Extra sum of square due to distance is : " , fullSSR - reducedSSR , "\n") ## Extra sum of square due to distance is : ## 168.4021 ## Question-2 Load the kin viscosity data (explained in Problem 3.14 and table B-10 in the book) at https://math.dartmouth.edu/~m50f17/kin.csv Solve the parts (a) to (e) of the Problem 3.14 and use $$\alpha=0.05$$. In addition, do the following. 1. Calculate the extra sum of squares due to the regressor variable x1. 2. Plot scatterplot matrix and scatter diagram in order to visualize the data. Can you make any connection between the visualization of data and the results you found in previous parts? Discuss. ### Answer: # Computation part of the answer : # Loading data kin <- read.table("https://math.dartmouth.edu/~m50f17/kinematic.csv", header = TRUE, sep=",") x1 <- kin$x1
x2 <- kin$x2 y <- kin$y
k <- 2
p <- k + 1
n <- length(y)

alpha <- 0.05

fitted <- lm(y ~ x1 + x2 )
beta <- fitted$coefficients cat("\n\n Part(a) Fitted multiple linear regression model is yHat = ", beta[1], "+",beta[2], " * x1", "+",beta[3], "* x2") ## ## ## Part(a) ## Fitted multiple linear regression model is ## yHat = 0.679439 + 1.407331 * x1 + -0.01562885 * x2 yHat <- beta[1] + beta[2]*x1 + beta[3]*x2 SSr <- sum((yHat-mean(y))^2) SSres <- sum((yHat-y)^2) SSt <- sum((y-mean(y))^2) # F0 for significance of regression F0 <- (SSr/k)/(SSres/(n-p)) quantile = qf(1-alpha,k,n-p) ; cat("\n\n Part (b) F0 is ", F0 , " and it is much greater than the critical quantile value " , quantile , ". Therefore regression analysis is significant (at least one of the regressors contribute significantly to the model)" )  ## ## ## Part (b) ## F0 is 85.46169 and it is much greater than the critical quantile value 3.251924 . ## Therefore regression analysis is significant (at least one of the regressors contribute significantly to the model) sum1 <- summary(fitted) quantIndiv <- qt(1 - alpha/2, n-k-1 ) cat("\n\n Part (c) Coefficients beta1 and beta2 have t-statistics t0 =", sum1$coefficients[2,3] , " and t0 =" ,  sum1$coefficients[3,3] , " respectively, which are both greater than the critical quantile value " , quantIndiv, " in absolute value. Therefore both contribute significantly to the model." )  ## ## ## Part (c) ## Coefficients beta1 and beta2 have t-statistics t0 = 7.146521 and t0 = -10.94763 ## respectively, which are both greater than the critical quantile value 2.026192 in absolute value. ## Therefore both contribute significantly to the model. fitRed=lm(y~x2) sum2=summary(fitRed) cat("\n\n Part (d) For this model R2 =", sum1$r.squared, ", adjusted R2= " , sum1$adj.r.squared, " which are significantly greater than the temperature only model's R2 =", sum2$r.squared, "and adjusted R2= " , sum2$adj.r.squared, ". Therefore we conclude the ratio of the solvents improves the fit.") ## ## ## Part (d) ## For this model R2 = 0.8220498 , adjusted R2= 0.8124309 which are significantly ## greater than the temperature only model's R2 = 0.5764172 and adjusted R2= 0.5652703 . ## Therefore we conclude the ratio of the solvents improves the fit. q01= qt(1-.01/2,n-k-1) std1 <- sum1$coefficients[2,2]
std2 <- sum1$coefficients[3,2] betaRed <- fitRed$coefficients
std2Red <- sum2$coefficients[2,2] q01Red= qt(1-.01/2,n-k-1 + 1) cat("\n\n Part (e) For this model the 99% CI are : ", beta[3] - std2*q01 , " < beta2 < " , beta[3] + std2*q01 , " For the temperature only model : ", betaRed[2] - std2Red*q01Red , " < beta2 < " , betaRed[2] + std2Red*q01Red , " which is slightly wider than the previous one." ) ## ## ## Part (e) ## For this model the 99% CI are : ## -0.01950537 < beta2 < -0.01175233 ## For the temperature only model : ## -0.0215221 < beta2 < -0.009735601 ## which is slightly wider than the previous one. reducedSSR <- sum((predict(fitRed) - mean(y))^2) fullSSR <- sum((predict(fitted) - mean(y))^2) cat("\n\n Part (f) Extra sum of square due to distance is : " , fullSSR - reducedSSR , "\n") ## ## ## Part (f) ## Extra sum of square due to distance is : ## 3.434923 # Plot scatterplot matrix plot(kin) cat("\n\n Part (g) Looking at the individual plots x1 (or x2) doesn't seem to well explain y alone (these plots looks like they consist of several different curves which suggests to add another regressor). This is parallel to the low R2 for the reduced model calculated above.") ## ## ## Part (g) ## Looking at the individual plots x1 (or x2) doesn't seem ## to well explain y alone (these plots looks like they consist of several different curves ## which suggests to add another regressor). ## This is parallel to the low R2 for the reduced model calculated above. ## Question-3 Load the Mortality data (explained in Problem 3.15 and table B-15 in the book) at https://math.dartmouth.edu/~m50f17/mortality.csv Solve the parts (a) to (e) of the Problem 3.15 (use $$\alpha=0.05$$ if you need). In addition do the following. 1. You want to quantify the contribution of regressors Educ,NOX,SO2 together to the model. Choose $$\alpha=0.01$$. Using F test (the partial F test given in equation 3.35) comment on this contribution to the model. (Note the different $$\alpha$$ value). 2. Consider the individual contribution test you calculated in part (c). Now choose the two regressor variables with the lowest t-statistic values (in absolute value). Using partial F test comment on their contribution to the model. Use $$\alpha=0.01$$. ### Answer: # Computation part of the answer : # Loading data mor <- read.table("https://math.dartmouth.edu/~m50f17/mortality.csv", header = TRUE) prec <- mor$PRECIP
educ <- mor$EDUC nonw <- mor$NONWHITE
nox <- mor$NOX so2 <- mor$SO2
y <- mor$MORT k <- 5 p <- k + 1 n <- length(y) fitted <- lm( y ~ prec + educ + nonw + nox + so2 ) beta <- fitted$coefficients

cat("\n\n Part(a)
Fitted multiple linear regression model is
yHat = ", beta[1], "+",beta[2], " * x1",
"+",beta[3], "* x2",
"+",beta[4], "* x3",
"+",beta[5], "* x4",
"+",beta[6], "* x5"  )
##
##
##  Part(a)
##      Fitted multiple linear regression model is
##      yHat =  995.6365 + 1.40734  * x1 + -14.80139 * x2 + 3.199091 * x3 + -0.1079698 * x4 + 0.3551762 * x5
yHat <- beta[1] + beta[2]*prec + beta[3]*educ + beta[4]*nonw + beta[5]*nox + beta[6]*so2
SSr <- sum((yHat-mean(y))^2)
SSres <- sum((yHat-y)^2)
SSt <- sum((y-mean(y))^2)

# F0 for significance of regression
F0 <- (SSr/k)/(SSres/(n-p))

quantile = qf(1-alpha,k,n-p) ;

cat("\n\n Part (b)
F0 is ", F0 , " and it is much greater than the critical quantile value " , quantile , ".
Therefore regression analysis is significant (at least one of the regressors contribute significantly to the model)" )  
##
##
##  Part (b)
##     F0 is  22.38624  and it is much greater than the critical quantile value  2.38607 .
##     Therefore regression analysis is significant (at least one of the regressors contribute significantly to the model)
sum1 <- summary(fitted)
quantIndiv <- qt(1 - alpha/2, n-k-1 )

cat("\n\n Part (c)
Looking at the table of the  t-statistics below we see that except NOX all regressors contribute significantly to the model. NOX t-statistic is less than (in absolute value) the critical quantile value : ", quantIndiv )  
##
##
##  Part (c)
##     Looking at the table of the  t-statistics below we see that except NOX all regressors contribute significantly to the model. NOX t-statistic is less than (in absolute value) the critical quantile value :  2.004879
sum1$coefficients  ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 995.6364550 91.64099269 10.8645315 3.348290e-15 ## prec 1.4073404 0.68914315 2.0421598 4.603200e-02 ## educ -14.8013907 7.02746635 -2.1062201 3.984867e-02 ## nonw 3.1990908 0.62230854 5.1406827 3.886855e-06 ## nox -0.1079698 0.13502379 -0.7996351 4.274258e-01 ## so2 0.3551762 0.09096026 3.9047402 2.642230e-04 cat("\n\n Part (d) R2 =", sum1$r.squared, ", adjusted R2= " , sum1$adj.r.squared )  ## ## ## Part (d) ## R2 = 0.6745639 , adjusted R2= 0.6444309 q05 <- qt(1-.05/2,n-k-1) std5 <- sum1$coefficients[6,2]
cat("\n\n Part (e)
For this model the 95% CI are :
", beta[6] - std5*q05 , " <  beta5 < " , beta[6] + std5*q05   ) 
##
##
##  Part (e)
##     For this model the 95% CI are :
##      0.1728118  <  beta5 <  0.5375405
fitRed <- lm( y ~ prec + nonw)
sum2 =summary(fitRed)
r <- 3
qf01 <- qf(1-0.01 , r , n-p)
reducedSSR <- sum((predict(fitRed) - mean(y))^2)
fullSSR <- sum((predict(fitted) - mean(y))^2)
F0 <- ((fullSSR - reducedSSR)/r)/(SSres/(n-p))
cat("\n\n Part (f)
F0 is ", F0 , " and it is much greater than the critical quantile value " , qf01 , ".
Therefore regression analysis is significant (at least one of the regressors  Educ,NOX,SO2  contribute significantly to the model)" )  
##
##
##  Part (f)
##     F0 is  10.0446  and it is much greater than the critical quantile value  4.166501 .
##     Therefore regression analysis is significant (at least one of the regressors  Educ,NOX,SO2  contribute significantly to the model)
fitRed <- lm( y ~ educ + nonw + so2)
sum2 =summary(fitRed)
r <- 2
qf01 <- qf(1-0.01 , r , n-p)
reducedSSR <- sum((predict(fitRed) - mean(y))^2)
fullSSR <- sum((predict(fitted) - mean(y))^2)
F0 <- ((fullSSR - reducedSSR)/r)/(SSres/(n-p))
cat("\n\n Part (g)
Lowest two are PRECIP and NOX.
F0 is ", F0 , " and it is smaller than the critical quantile value " , qf01 , ".
F-test suggests that PRECIP and NOX don't contribute significantly to the model
(Note that this is at significance level 0.01, whereas individuals test above were at alpha=0.05)" )  
##
##
##  Part (g)
##     Lowest two are PRECIP and NOX.
##     F0 is  3.713821  and it is smaller than the critical quantile value  5.021217 .
##     F-test suggests that PRECIP and NOX don't contribute significantly to the model
##     (Note that this is at significance level 0.01, whereas individuals test above were at alpha=0.05)