
########### %   regression lab 2

x = runif(100, 0, 100) ;
y = 2 + 2*x +.7*x^2 + rnorm(100, 0, 500) ;
plot(x,y, type='p')

####  eigenvalues: http://www.math.hmc.edu/calculus/tutorials/eigenstuff/


###########  %%%%%%%%%%%  correct inversion

A = cbind(rep(1, length(x)), x, x^2)

ATA = t(A) %*%  A ;

ATAinv  = ginv(ATA);

b = ATAinv %*% t(A) %*% y ;

Yhat = A %*% b ;

plot(x,y,type=p, col=2)

points(x,Yhat, col=4)

res = y - Yhat ;

plot(x, res, type=p)

qqnorm(res)

###########%%%%%%%%%%%  incorrect inversion

A = cbind(rep(1, length(x)), x)

ATA = t(A) %*%  A ;

ATAinv  = ginv(ATA);

b = ATAinv %*% t(A) %*% y ;

Yhat = A %*% b ;

plot(x,y,type=p, col=2)

points(x,Yhat, col=4)

res = y - Yhat ;

plot(x, res, type=p)

qqnorm(res)

###  set up a function to do some of the calculations

Areg<-function(x,y)
{
A = cbind(rep(1, length(x)), x)

ATA = t(A) %*%  A ;

ATAinv  = ginv(ATA);

b = ATAinv %*% t(A) %*% y ;

Yhat = A %*% b ;

return(list(A=A, b=b, ATAinv=ATAinv, Yhat=Yhat))

}

###########%%%%%%%%%%%%%%%%%%

     lm(formula, data, subset, weights, na.action,

        method = "qr", model = TRUE, x = FALSE, y = FALSE, qr = TRUE,

        singular.ok = TRUE, contrasts = NULL, offset = NULL, ...)

 

 

g1 = lm(y ~ x)

 

plot(x,y, type='p')

 

abline(g1$coefficients)

points(x, g1$fitted.values, col=4)

 

###  this model does not look too good,

##    so lets try a better model

g2 = lm(y ~ cbind(x, x^2) )

points(x, g2$fitted.values, col=5, pch=3)

##  we know in advance that this model is

##  better than the previous one, although

##  in real life we would not really know this a priori

names(g1)
## %%%%%%%%%%%

## %%%   example from page 198 in Davis

LOU = scan(file='', what=list(x=0, y=0, flag=0) )

0 124 1
5 78 1
10 54 1
15 35 1
20 30 1
25 21 1
30 22 1
35 18 1
0 137 2
5 84 2
10 50 2
15 32 2
20 28 2
25 24 2
30 23 2
35 20 2

## get data into x and y forms
x = LOU$x[LOU$flag==1]
y = LOU$y[LOU$flag==1]

plot(x,y, type='p')

SST = sum( (y-mean(y))*(y-mean(y)))

yh = Areg(x,y)

SSR = sum( (yh$Yhat-mean(y))*(yh$Yhat-mean(y)))

SSE = SST- SSR

dfR = 1;

dfT = length(y) - 1

dfE = dfT - dfR

MSSR = SSR/dfR

MSSE = SSE/dfE

FT = MSSR/MSSE

Fcrit = qf(.95, dfR, dfE)

#############  %%%%%%%%%%%

#############  %%%   Davis PAge 212

 

## here we have duplicate measurements at each point.

##  this allows us to estimate a goodness of fit

 

x = LOU$x

y = LOU$y

plot(x,y,type=p)

 

%%  simple linear regression

 

yh = Areg(x,y)

 

SSR = sum( (yh$Yhat-mean(y))*(yh$Yhat-mean(y)))

 

###  %%%% quadratic regression

 

### Aq = [ones(size(x)) x x.*x];

 

 

ATA = Aq'* Aq ;

 

ATAinv  = inv(ATA);

 

 

bq = ATAinv * Aq' * y ;

 

Yhatq = Aq * bq ;

 

SSQ = sum( (Yhatq-mean(y)).*(Yhatq-mean(y)))

 

 

SSAdd = SSQ-SSR

 

Qdev = SST - SSQ

 

### %%%% quadratic regression
########%%%%%%%%%%%

########%%%   example from page 212 in Davis

########%  read in OK data

x = OK(:,1);

 

y = OK(:,2);

plot(x,y,'ro')

SST = sum( (y-mean(y)).*(y-mean(y)))

 

#########################################
#####  pure error

x = 1:10

##  is lineary related to x with noise
y = x*.7+3+rnorm(10, x, 2)

plot(x,y)

## create pure error data


z = 1
xz = x[1]

n = 3+floor(length(x)*runif(length(x)))

z = rep(0, sum(n))
xz = z
j = 0
for(i in 1:length(x) )
{
k = n[i]
j1 = j+1
j2 = j+k

z[j1:j2] = y[i]+rnorm(n[i], mean=y[i], sd=1)
xz[j1:j2] = rep(x[i], length=n[i])

j = j+k

}


plot(xz, z)
abline(lm(z ~ xz))


###  now we have a data-set


LMPE<-function(xz, z)
{
j = unique(xz)
m = length(j)
ens = rep(0, length=m)
spe = rep(0, length=m)
for(i in 1:length(j))
{
w = which(j[i]==xz)
kz = z[w]
mw = mean(kz)
spe[i] = sum( (kz-mw)^2)
ens[i] = length(kz)
}

SSPE = sum(spe)
ne = sum(ens)-m
SE2  = SSPE/ne

SST = sum((z-mean(z))^2)

reg = lm(z ~ xz)
b = reg$coefficients

SSreg = sum( (reg$fitted.values-mean(z))^2)
nreg = 1
MSreg = SSreg

SSres = sum(reg$residuals^2)

N = length(z)
nres = N-2

nlof = (nres)-ne
SSLOF = SSres - SSPE
MSlof = SSLOF/nlof

testv = qf(0.95, nlof, ne)

MSres = SSres/nres

Fres = qf(.95, 22, 1)



return(list(N=N, nres=nres, ens=ens, ne=ne, spe=spe, SSPE=SSPE,  SE2=SE2, SSres=SSres, nlof=nlof, SSLOF=SSLOF, MSlof=MSlof, testv=testv, REG=reg))

}
 
##############################################

testd=scan(, list(x=0, y=0))
1.3 2.3
1.3 1.8
2.0 2.8
2.0 1.5
2.7 2.2
3.3 3.8
3.3 1.8
3.7 3.7
3.7 1.7
4.0 2.8
4.0 2.8
4.0 2.2
4.7 5.4
4.7 3.2
4.7 1.9
5.0 1.8
5.3 3.5
5.3 2.8
5.3 2.1
5.7 3.4
6.0 3.2
6.0 3.0
6.3 3.0
6.7 5.9



G = LMPE(testd$x, testd$y)
