##################################################
##################################################
#######   Discriminant analysis 
##################################################

#####    Group A = Beach sands, Group B = Offshore sands,
#     Column 2 = Median grain size in mm.,
#     Column 3 = Sorting coefficient
sands = scan(file="/home/lees/CLASS/Data_Analysis/dos_files/SANDS.TXT", skip=1, list(cat='', gs=0, so=0))
sands = scan(file="D:/LEES/CLASSES/Data_Analysis/dos_files/SANDS.TXT", skip=1, list(cat='', gs=0, so=0))

### discriminatn analysis:


plot(sands$gs, sands$so, type='p', pch=sands$cat)


##   calculate the vectors and matrices on table 6-5 page 474 in Davis

###  
d = c( mean(sands$gs[sands$cat=="A"])-mean(sands$gs[sands$cat=="B"]),  mean(sands$so[sands$cat=="A"])-mean(sands$so[sands$cat=="B"]))

na = length(sands$gs[sands$cat=="A"])

nb  = length(sands$gs[sands$cat=="B"])

Va = cbind(sands$gs[sands$cat=="A"]-mean(sands$gs[sands$cat=="A"]), sands$so[sands$cat=="A"]-mean(sands$so[sands$cat=="A"]) )


SPa = t(Va) %*% Va

Vb  = cbind(sands$gs[sands$cat=="B"]-mean(sands$gs[sands$cat=="B"]), sands$so[sands$cat=="B"]-mean(sands$so[sands$cat=="B"]) )

SPb = t(Vb) %*% Vb
###  note table in book has a mistake!
SPool = (SPa + SPb)/(na+nb-2)
###   invS  = ginv(SPool)

### use standard way of solving without library(MASS)
invS =  solve(SPool)

lamb = invS %*% d

### plot data

###  plot line
slope = lamb[2]/lamb[1]
slope = lamb[1]/lamb[2]

xj = c( (mean(sands$gs[sands$cat=="A"])+mean(sands$gs[sands$cat=="B"]))/2,
(mean(sands$so[sands$cat=="A"])+mean(sands$so[sands$cat=="B"]))/2)

plot(sands$gs, sands$so, type='p', pch=sands$cat)

points(xj[1], xj[2], col=2, pch=6)
points(mean(sands$gs[sands$cat=="A"]) , mean(sands$so[sands$cat=="A"]) , col=4, pch=6)
points(mean(sands$gs[sands$cat=="B"]) , mean(sands$so[sands$cat=="B"]) , col=4, pch=6)

cept = xj[2]-xj[1]*slope
abline(cept, slope)

R0 = lamb[1]*xj[1] + lamb[2]* xj[2]
RA = lamb[1]*mean(sands$gs[sands$cat=="A"]) + lamb[2]*mean(sands$so[sands$cat=="A"])
RB = lamb[1]*mean(sands$gs[sands$cat=="B"]) + lamb[2]*mean(sands$so[sands$cat=="B"])



###

Ri = t(lamb) %*% rbind(sands$gs, sands$so)
Ri = as.vector(Ri)

Y = abs(rnorm(length(Ri) ))
col = rep(2, length(sands$cat))
col[sands$cat=="A"] =4
plot(-Ri, Y, type="p",  col=col,  pch=sands$cat, ylab='', xlab="Raw discriminant scores" )

abline(v=-c(RA, R0, RB), col=c(4,2,4) )

title("Figure 6-3 in Davis")
#########################################

AA = hist(sands$gs[sands$cat=="A"])
BB = hist(sands$gs[sands$cat=="B"])


##################################################
##################################################
#######   PCA analysis 
##################################################

t16= scan(file="/home/lees/CLASS/Data_Analysis/dos_files/TABLE612.TXT", skip=1, list(x1=0, x2=0, r=0, ran=0))

t16= scan(file="D:/LEES/CLASSES/Data_Analysis/dos_files/TABLE612.TXT", skip=1, list(x1=0, x2=0, r=0, ran=0))


plot(t16$x1, t16$x2, type='p')
grid()

v = cbind(t16$x1, t16$x2 )


###  use variance function or
##  variance matrix = 
#########    w1 = apply(v, 2, "scale")
##########   w2 = t(w1) %*% w1 / (nrow(w1)-1)

sv = var(v)

esv = eigen(sv)

sr = v %*% esv$vectors

######################     Figure Davis 6-16, page 511
plot(c(-40, 40), c(-40,40), asp=TRUE,type='n')
abline(h=0)
abline(v=0)

arrows(0, 0, sv[1,1], sv[2,1], col=4, lty=2)
arrows(0, 0, -sv[1,1], -sv[2,1], col=4, lty=2)
arrows(0, 0, sv[1,2], sv[2,2], col=2, lty=2)
arrows(0, 0, -sv[1,2], -sv[2,2], col=2, lty=2)

arrows(0, 0, esv$values[1]*esv$vectors[1,1], esv$values[1]*esv$vectors[2,1], col=4, lty=1)
arrows(0, 0, -esv$values[1]*esv$vectors[1,1], -esv$values[1]*esv$vectors[2,1], col=4, lty=1)
arrows(0, 0, esv$values[2]*esv$vectors[1,2], esv$values[2]*esv$vectors[2,2], col=2, lty=1)
arrows(0, 0, -esv$values[2]*esv$vectors[1,2], -esv$values[2]*esv$vectors[2,2], col=2, lty=1)

##  draw the ellipse

phi = -180*atan(esv$vectors[2,1], esv$vectors[1,1])/pi
theta=seq(0,360,by=5)*pi/180
cosp=cos(phi*pi/180)
sinp=sin(phi*pi/180)
a = esv$values[1]
b = esv$values[2]
r=matrix(c(cosp,sinp, -sinp, cosp), ncol=2)
m=matrix(rep(0,2*length(theta)),ncol=2)
m[,1]=a*cos(theta)
m[,2]=b*sin(theta)
nm=m %*% r
lines(nm[,1],nm[,2], col=3)

title("Figure Davis 6-16, page 511")
##################################################

##  to get the figure in the book on page 513 (fig 6-17)
plot(sr[,1], -sr[,2])
title("Davis  513 (fig 6-17)")

##################################################
##################################################

###############  page 520 in Davis

BAR = scan(file="/home/lees/CLASS/Data_Analysis/dos_files/BARATARA.TXT", skip=1, list(t=0, p1=0, p2=0, p3=0, p4=0, p5=0, p6=0, p7=0))

BAR = scan(file="D:/LEES/CLASSES/Data_Analysis/dos_files/BARATARA.TXT", skip=1, list(t=0, p1=0, p2=0, p3=0, p4=0, p5=0, p6=0, p7=0))




v = cbind(BAR$p1,BAR$p2,BAR$p3,BAR$p4,BAR$p5, BAR$p6,BAR$p7 )
sv = var(v)


esv = eigen(sv)
sr = v %*% esv$vectors

syms = rep(1,length(BAR$t))
syms[BAR$t==2]=22
syms[BAR$t==3]=5
syms[BAR$t==4]=2
syms[BAR$t==5]=6

cols = rep(1,length(BAR$t))
cols[BAR$t==2]=2
cols[BAR$t==3]=3
cols[BAR$t==4]=4
cols[BAR$t==5]=5

h = v-mean(v)
tv = (t(h) %*% h)/length(h)
etv = eigen(tv)


h = sweep(v, 2, FUN="mean")


tv = (t(h) %*% h)/length(h)
etv = eigen(tv)

sr = v %*% esv$vectors

plot(sr[,1], sr[,2], asp=TRUE, pch=syms, col=cols  )
title("Davis  522 (fig 6-24)")

#####################################################################


WW = scan(file="D:/LEES/CLASSES/Data_Analysis/dos_files/WELLWATR.TXT", skip=1, list(x1=0, x2=0, x3=0, x4=0, x5=0, n1=0))


w = cbind(WW$x1, WW$x2,WW$x3,WW$x4,WW$x5)

L = w[WW$n1==2,]
A = w[WW$n1==1,]

xlbar = apply(L, 2, mean)
xabar = apply(A, 2, mean)

var(L)

var(A)

############################
or  = scan(file="D:/LEES/CLASSES/Data_Analysis/dos_files/OREODONT.TXT", skip=1, list(nam="", a=0, b=0,c=0, d=0))

a = width of brain case
b = length of cheek tooth
c = length of bulla (depression below opening of ear)
d = depth of bulla 

cols = match(or$nam,unique(or$nam) )


plot(or$a, or$b, col=cols)
plot(or$a, or$c, col=cols)


plot(or$c, or$d, col=cols)


(see page 284 in Davis)


nms = unique(or$nam)


A = cbind(or$a, or$b, or$c, or$d)

va = var(A)

sv = cor(A)
esv = eigen(sv)

### eigne values of the correlation matrix=
esv$values

##  see spectrum of eigenvalues
plot(esv$values)



##  proportions are:
esv$values/sum(esv$values)



sr = A  %*% esv$vectors


###   loadings:


B = cbind(or$a-mean(or$a), or$b-mean(or$b), or$c-mean(or$c), or$d-mean(or$d))


enew = esv$vectors
enew[,2] = -enew[,2]

ga = A  %*%  enew


plot(ga[,1], ga[,2], col=cols)

#########################################

bx  = scan(file="D:/LEES/CLASSES/Data_Analysis/dos_files/BOXES.TXT", skip=1, list(nam="", x1=0, x2=0, x3=0, x4=0, x5=0, x6=0, x7=0))

B = cbind(bx$x1,bx$x2,bx$x3,bx$x4,bx$x5,bx$x6,bx$x7) 

va = var(B)

##  sv = cor(B)
esv = eigen(va)

### eigne values of the correlation matrix=
esv$values

##  see spectrum of eigenvalues
plot(esv$values)


ga = B  %*%  esv$vectors


plot(ga[,1], -ga[,2], type='n', xlab="PC1", ylab="PC2")
text(ga[,1], -ga[,2], labels = bx$nam)
title("Davis Figure 6-22, p 519")

