* do-file for Additional exercise 10.7, VHM 802
version 15 /* works also with versions 13-14 */
set more off
cd "r:\"

import delimited hs10_7.csv, clear
rename salsolinol sals
* profile plots
twoway (connected sals day), by(group person)
xtline sals if group==1, i(person) t(day) overlay
xtline sals if group==2, i(person) t(day) overlay
overlay sals day if group==1, by(person) c(l)
overlay sals day if group==2, by(person) c(l)

* anova for hierarchical model
anova sals group / person|group day day#group
* residuals analysis for split-plot model
predict yhat
predict stdres, rstandard
scatter stdres yhat
* Box-Cox analysis although not quite correct because the
* analysis has fixed subject effects instead of random person effects
xi: boxcox sals i.group*i.day i.person*i.group
drop _I* /* cleaning up after xi: */

* log-transformation
generate lnsals=ln(sals)
* new profile plots on log-scale
twoway (connected lnsals day), by(group person)
overlay lnsals day if group==1, by(person) c(l)
overlay lnsals day if group==2, by(person) c(l)
* anova for hierarchical model on log-scale
anova lnsals group / person|group day##group
* residuals analysis
predict yhat2
predict stdres2, rstandard
scatter stdres2 yhat2
qnorm stdres2
swilk stdres2
* random effect checks
preserve /* run together with next 5/7 lines */
collapse (mean) lnsals, by(group person)
anova lnsals group
predict pred, xb
predict stdres, rstandard
scatter stdres pred
qnorm stdres
swilk stdres

* analysis for each time point 
* could also use anova or oneway or ttest commands 
regress lnsals i.group if day==1
regress lnsals i.group if day==2
regress lnsals i.group if day==3
regress lnsals i.group if day==4
* for demonstration purposes: reshape to wide data format
generate personid=group*100+person 
keep group personid day lnsals
reshape wide lnsals, i(personid) j(day)
regress lnsals1 i.group /* similarly for lnsals2-lnsals4 */

import delimited data_csv\hs10_7.csv, clear
generate lnsals=ln(salsolinol)
* repeated measures ANOVA
anova lnsals group / person|group day##group, repeated(day)
* additional analyses with day as a covariate (cannot use repeated here)
anova lnsals group / person|group c.day##group
anova lnsals group / person|group c.day

* likelihood-based analysis of hierarchical model
gen personid=group*100+person
mixed lnsals group##day || personid:, reml
* residuals analysis
predict pred3, fitted
predict stdres3, rstandard
scatter stdres3 pred3
qnorm stdres3
swilk stdres3
predict pred_ref, reffects
bysort personid: gen pers_n=_n
qnorm pred_ref if pers_n==1
swilk pred_ref if pers_n==1
* additional analyses with day as a covariate
mixed lnsals group##c.day || personid:, reml
mixed lnsals i.group day || personid:, reml

* models with added correlation structure
mixed lnsals group##day || personid:, reml residuals(ar 1, t(day))
mixed lnsals group##day || personid:, reml nocons residuals(toep, t(day))
* note: the Toeplitz model has improved fit, but the correlations seem difficult to interpret
