# read the data
from read_data_fx import read_tb_dataTA
data=read_tb_dataTA(1,1) # this data is given in table 1.1 in the book
X=data[:,0] # Lot size
Y=data[:,1] #Work Hours
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize=(10,8))
plt.plot(X,Y,'bo',ms=20)
plt.xlim(0,160);
plt.ylim(90,600);
plt.xlabel('Lot size',fontsize=20)
plt.ylabel('Hours',fontsize=20)
# how to import lowess function
import statsmodels.api as sm
lowess = sm.nonparametric.lowess
Z = lowess(Y,X,frac=0.5,it=2)
# farc = neighborhood size, it= number of iterations
# See http://statsmodels.sourceforge.net/devel/generated/statsmodels.nonparametric.smoothers_lowess.lowess.html
# for further description
plt.plot(Z[:,0],Z[:,1],'g-',lw=5);
def lin_reg(X,Y):
barX=np.mean(X); barY=np.mean(Y)
XminusbarX=X-barX; YminusbarY=Y-barY
b1=sum(XminusbarX*YminusbarY)/sum(XminusbarX**2)
b0=barY-b1*barX
Yhat=b0+b1*X
e_i=Y-Yhat
sse=np.sum(e_i**2)
ssr=np.sum((Yhat-barY )**2)
return {'Yhat':Yhat,'b0':b0,'b1':b1,'e_i': e_i,'sse':sse,'ssr':ssr}
# confidence band
from scipy import stats
n=len(X)
linreg=lin_reg(X,Y)
Yhat=linreg['Yhat'];
sse=linreg['sse'];
MSE=sse/np.float(n-2)
barX=np.mean(X)
XminusbarX=X-barX
s_of_yh_hat=np.sqrt(MSE*(1.0/n+(X-barX)**2/sum(XminusbarX**2)))
W=np.sqrt(2.0*stats.f.ppf(0.95,2,n-2))
cb_upper=Yhat+W*s_of_yh_hat
cb_lower=Yhat-W*s_of_yh_hat
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize=(10,8))
plt.plot(X,Y,'bo',ms=20)
plt.xlim(0,160);
plt.ylim(90,600);
# X values are not sorted in this example, sort it to plot the confidence band properly
idx=np.argsort(X)
plt.plot(X[idx],cb_lower[idx],'k--',linewidth=3)
plt.plot(X[idx],cb_upper[idx],'k--',linewidth=3);
plt.plot(Z[:,0],Z[:,1],'g-',lw=5);