# R program for additional exercise 7.4 (VHM 802)

fish <- read.csv("r:/hs07_4.csv")

# define factors, to ease notation
fish$Temp <- as.factor(fish$temp)
fish$Oxygen <- as.factor(fish$oxygen)
fish$Cyanide <- as.factor(fish$cyanide)

summary (lm(surv ~ Temp*Oxygen*Cyanide, data=fish))
# residual DF=0 so not useful

fish.full <- lm(surv ~ Temp*Oxygen*Cyanide-Temp:Oxygen:Cyanide, data=fish)
summary(fish.full)
anova(fish.full)

# residual analysis
fish.diag <- fish #all diagnostics to be stored in btb.diag (for convenience, not at all necessary)
fish.diag$fit <- fitted(fish.full) 
fish.diag$stdres <- rstandard(fish.full)
plot(stdres~fit, data=fish.diag)
hist(fish.diag$stdres, nclass=7) # no overlaid normal distribution
qqnorm(fish.diag$stdres)
qqline(fish.diag$stdres)
shapiro.test(fish.diag$stdres)

# box-cox analysis
library(MASS)
bc <- boxcox(surv ~ Temp*Oxygen*Cyanide-Temp:Oxygen:Cyanide, data=fish, lambda=seq(-1, 1, 0.05))
bc <- boxcox(surv ~ Temp*Oxygen*Cyanide-Temp:Oxygen:Cyanide, data=fish, lambda=seq(0, 1.2, 0.01))
bc

# analysis on square-root scale
fish.sqrt.full <- lm(sqrt(surv) ~ Temp*Oxygen*Cyanide-Temp:Oxygen:Cyanide, data=fish)
summary(fish.sqrt.full)
anova(fish.sqrt.full)
# no diagnostics included here: use same method as above

# prior to quantitative modelling of predictors, we remove non-sign. interactions
fish.sqrt.red <- lm(sqrt(surv) ~ Temp+Oxygen*Cyanide, data=fish)
summary(fish.sqrt.red)
anova(fish.sqrt.red)
anova(fish.sqrt.full,fish.sqrt.red)
# quantitative modelling of temp
fish.sqrt.temp <- lm(sqrt(surv) ~ temp+Oxygen*Cyanide, data=fish)
summary(fish.sqrt.temp)
anova(fish.sqrt.temp)
anova(fish.sqrt.temp,fish.sqrt.red)
# interaction plot for oxygen:cyanide
interaction.plot(x.factor=fish$cyanide, trace.factor=fish$oxygen, response=sqrt(fish$surv), fun=mean)
# note: we can use simple here means because the data is balanced

# pairwise comparisons for oxygen:cyanide
# to simplify setting up the contrasts, we refit without main effects
fish.sqrt.temp2 <- lm(sqrt(surv) ~ temp+Oxygen:Cyanide-1, data=fish)
summary(fish.sqrt.temp2) # same model!
# oxygen comparisons within cyanide, using contrasts
coef <- coef(fish.sqrt.temp2)
vcov <- vcov(fish.sqrt.temp2)
w<- matrix(c(0,1,-1,0, 0,0,0, 0,0,0, 0,0,0, 0,0,0,
             0,1,0,-1, 0,0,0, 0,0,0, 0,0,0, 0,0,0, 
             0,0,1,-1, 0,0,0, 0,0,0, 0,0,0, 0,0,0,
             0,0,0,0, 1,-1,0, 0,0,0, 0,0,0, 0,0,0,
             0,0,0,0, 1,0,-1, 0,0,0, 0,0,0, 0,0,0, 
             0,0,0,0, 0,1,-1, 0,0,0, 0,0,0, 0,0,0,
             0,0,0,0, 0,0,0, 1,-1,0, 0,0,0, 0,0,0,
             0,0,0,0, 0,0,0, 1,0,-1, 0,0,0, 0,0,0, 
             0,0,0,0, 0,0,0, 0,1,-1, 0,0,0, 0,0,0,
             0,0,0,0, 0,0,0, 0,0,0, 1,-1,0, 0,0,0,
             0,0,0,0, 0,0,0, 0,0,0, 1,0,-1, 0,0,0, 
             0,0,0,0, 0,0,0, 0,0,0, 0,1,-1, 0,0,0,
             0,0,0,0, 0,0,0, 0,0,0, 0,0,0, 1,-1,0,
             0,0,0,0, 0,0,0, 0,0,0, 0,0,0, 1,0,-1, 
             0,0,0,0, 0,0,0, 0,0,0, 0,0,0, 0,1,-1), ncol=15); w
est<- t(w)%*%coef
var <- diag(t(w)%*%vcov%*%w)
se <- matrix(sqrt(var), ncol=1)
t<- est/se
p<- 2*(1-pt(abs(t),df.residual(fish.sqrt.temp2)))
data.frame(est,se,t,p)
# cyanide comparisons within oxygen, using contrasts
# first reorder coefficients to simplify the contrasts
fish.sqrt.temp3 <- lm(sqrt(surv) ~ temp+Cyanide:Oxygen-1, data=fish)
summary(fish.sqrt.temp3) # same model!
coef <- coef(fish.sqrt.temp3)
vcov <- vcov(fish.sqrt.temp3)
w<- matrix(c(0,1,-1,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
             0,1,0,-1,0,0, 0,0,0,0,0, 0,0,0,0,0,
             0,1,0,0,-1,0, 0,0,0,0,0, 0,0,0,0,0,
             0,1,0,0,0,-1, 0,0,0,0,0, 0,0,0,0,0,
             0,0,1,-1,0,0, 0,0,0,0,0, 0,0,0,0,0,
             0,0,1,0,-1,0, 0,0,0,0,0, 0,0,0,0,0,
             0,0,1,0,0,-1, 0,0,0,0,0, 0,0,0,0,0,
             0,0,0,1,-1,0, 0,0,0,0,0, 0,0,0,0,0,
             0,0,0,1,0,-1, 0,0,0,0,0, 0,0,0,0,0,
             0,0,0,0,1,-1, 0,0,0,0,0, 0,0,0,0,0,

             0,0,0,0,0,0, 1,-1,0,0,0, 0,0,0,0,0,
             0,0,0,0,0,0, 1,0,-1,0,0, 0,0,0,0,0,
             0,0,0,0,0,0, 1,0,0,-1,0, 0,0,0,0,0,
             0,0,0,0,0,0, 1,0,0,0,-1, 0,0,0,0,0,
             0,0,0,0,0,0, 0,1,-1,0,0, 0,0,0,0,0,
             0,0,0,0,0,0, 0,1,0,-1,0, 0,0,0,0,0,
             0,0,0,0,0,0, 0,1,0,0,-1, 0,0,0,0,0,
             0,0,0,0,0,0, 0,0,1,-1,0, 0,0,0,0,0,
             0,0,0,0,0,0, 0,0,1,0,-1, 0,0,0,0,0,
             0,0,0,0,0,0, 0,0,0,1,-1, 0,0,0,0,0,

             0,0,0,0,0,0, 0,0,0,0,0, 1,-1,0,0,0,
             0,0,0,0,0,0, 0,0,0,0,0, 1,0,-1,0,0,
             0,0,0,0,0,0, 0,0,0,0,0, 1,0,0,-1,0,
             0,0,0,0,0,0, 0,0,0,0,0, 1,0,0,0,-1,
             0,0,0,0,0,0, 0,0,0,0,0, 0,1,-1,0,0,
             0,0,0,0,0,0, 0,0,0,0,0, 0,1,0,-1,0,
             0,0,0,0,0,0, 0,0,0,0,0, 0,1,0,0,-1,
             0,0,0,0,0,0, 0,0,0,0,0, 0,0,1,-1,0,
             0,0,0,0,0,0, 0,0,0,0,0, 0,0,1,0,-1,
             0,0,0,0,0,0, 0,0,0,0,0, 0,0,0,1,-1), ncol=30); w
est<- t(w)%*%coef
var <- diag(t(w)%*%vcov%*%w)
se <- matrix(sqrt(var), ncol=1)
t<- est/se
p<- 2*(1-pt(abs(t),df.residual(fish.sqrt.temp3)))
data.frame(est,se,t,p)

# oxygen as a covariate
fish.sqrt.tempoxy <- lm(sqrt(surv) ~ temp+oxygen*Cyanide, data=fish)
summary(fish.sqrt.tempoxy)
anova(fish.sqrt.tempoxy)
anova(fish.sqrt.temp,fish.sqrt.tempoxy) # not a good fit

# cyanide as a covariate
fish.sqrt.tempcyan <- lm(sqrt(surv) ~ temp+Oxygen*cyanide, data=fish)
summary(fish.sqrt.tempcyan)
anova(fish.sqrt.tempcyan)
anova(fish.sqrt.temp,fish.sqrt.tempcyan) # absolutely not linear
