# R program for logistic regression exercise # 2 (VER 16.2)
# Jenny Yu / Henrik Stryhn, March 2014

calf <- read.csv("h:/vhm/vhm802/data_csv/calf.csv")

#Q2: linearity of predictor -age- (several approaches)
summary(calf$age)
calf$age_c4 <- cut(calf$age, breaks=c(0,5,8,13,30), labels=FALSE, right=FALSE)
table(calf$age_c4)
calf.age_c4 <- glm(sepsis~ as.factor(age_c4), family=binomial(), data=calf)
summary(calf.age_c4)
# lintrend-type plot for -age-
age.logits <- coef(glm(sepsis~ as.factor(age_c4)-1, family=binomial(), data=calf))
library(doBy) # package to facilitate processing by groups
t<- summaryBy( age~ age_c4, data=calf, FUN=mean, na.rm=TRUE)
age.means <- t[1:4,2]
plot(age.means, age.logits)
abline(lsfit(age.means, age.logits))
# logistic model with quadratic term
calf.age2 <- glm(sepsis~ I(age-10)+I((age-10)^2), family=binomial(), data=calf)
summary(calf.age2)

#Q3(b): automated backwards elimination, based on best AIC (not same as in the solution)
with(calf, print(table(jnts, sepsis)))
calf$jnts3<- ifelse(calf$jnts>2, 2, calf$jnts)
calf.full <- glm(sepsis~ I(age-10)+I((age-10)^2)+dehy+resp+temp+eye+umb+as.factor(attd)
             +as.factor(jnts3)+as.factor(post), family=binomial(), data=calf)
step(calf.full, direction="backward", trace=TRUE)
# final model refitted with all observations included
calf.finalAIC<- glm(sepsis ~ I(age-10)+I((age-10)^2)+eye+umb+as.factor(attd)+as.factor(post),
                family=binomial(), data=calf)
summary(calf.finalAIC)
drop1(calf.finalAIC, test="LRT")

#Q4: confounding and interaction (based on model in solution)
calf.final<- glm(sepsis ~ I(age-10)+I((age-10)^2)+umb+as.factor(post), family=binomial(), data=calf)
summary(calf.final)
# (a) interaction between -umb- and -post-
calf.int_postumb<- update(calf.final, .~.+umb:as.factor(post))
summary(calf.int_postumb)
anova(calf.final, calf.int_postumb, test="LRT") # interaction not significant
# confounding effects of -umb- and -post-
chisq.test(calf$umb, calf$post) # no significant association
calf.finalnopost <- glm(sepsis ~ I(age-10)+I((age-10)^2)+umb, family=binomial(), 
                    data=calf[!is.na(calf$post),])
summary(calf.finalnopost)
# alternative way to express the selection
calf.finalnopost2 <- glm(sepsis ~ I(age-10)+I((age-10)^2)+umb, family=binomial(), 
                     data=na.omit(calf[,c("sepsis", "post","umb","age")]))
calf.finalnoumb <- glm(sepsis ~ I(age-10)+I((age-10)^2)+as.factor(post), family=binomial(), 
                   data=calf[!is.na(calf$umb),])
summary(calf.finalnoumb)
#* (b) -age- as a confounder
calf.finalnoage <- glm(sepsis ~ umb+as.factor(post), family=binomial(), 
                   data=calf[!is.na(calf$age),])
summary(calf.finalnoage)
