* do-file for Additional multivariate exercise 9, VHM 802, Winter 2021
version 16 /* works also with versions 14-15 */
set more off
cd "r:\

use beef_ultra, clear
sort id
codebook, c
summarize backfat ribeye imfat days carc_wt
codebook grade breed sex bckgrnd implant
* farm, id, breed are nominal categorical so not useful for PCA etc; grade was not be included
graph box backfat ribeye imfat
graph box days
graph box carc_wt

correlate sex-carc_wt
pca sex-carc_wt
screeplot
loadingplot
loadingplot, comp(4)
scoreplot, comp(4)
predict sco1-sco4
tabstat sco1-sco4, statistics( mean semean ) by(grade)
foreach sco of varlist sco1-sco4 {
  oneway `sco' grade, tab bon
}
twoway (scatter sco2 sco1 if grade==1, msymbol(smx)) ///
  (scatter sco2 sco1 if grade==2, msymbol(smcircle_hollow)) /// 
  (scatter sco2 sco1 if grade==3, msymbol(smtriangle_hollow)), ///
  ytitle(2nd principal component) xtitle(1st principal component) title(Ordinary PCA (x=grade AAA, o=grade AA, triangle=grade A), size(medium)) legend(off)

* -choric PCA
polychoric sex-carc_wt
sort id
matrix define polyc=r(R)
matrix list polyc
pcamat polyc, n(487)
screeplot
loadingplot
foreach var of varlist sex-carc_wt {
  egen s`var'=std(`var')
  }
matrix define load=e(L)
mkmat ssex-scarc_wt, matrix(sdata)
matrix psco = sdata*load
svmat psco
tabstat psco1-psco4, statistics( mean semean ) by(grade)
foreach psco of varlist psco1-psco4 {
  oneway `psco' grade, tab bon
}
twoway (scatter psco2 psco1 if grade==1, msymbol(smx)) ///
  (scatter psco2 psco1 if grade==2, msymbol(smcircle_hollow)) /// 
  (scatter psco2 psco1 if grade==3, msymbol(smtriangle_hollow)), ///
  ytitle(2nd principal component) xtitle(1st principal component) title(Polychoric PCA (x=grade AAA, o=grade AA, triangle=grade A), size(medium)) legend(off)

* factor analysis, first the simpler version with Pearson correlation matrix
factor sex-carc_wt, pcf mineigen(0.9)
loadingplot
rotate, varimax 
loadingplot, comp(4)
predict rsco1-rsco4
tabstat rsco1-rsco4, statistics( mean semean ) by(grade)
foreach rsco of varlist rsco1-rsco4 {
  oneway `rsco' grade, tab bon
}
twoway (scatter rsco4 rsco2 if grade==1, msymbol(smx)) ///
  (scatter rsco4 rsco2 if grade==2, msymbol(smcircle_hollow)) /// 
  (scatter rsco4 rsco2 if grade==3, msymbol(smtriangle_hollow)), ///
  ytitle(4th principal component) xtitle(2nd principal component) title(Ordinary Factor analysis (x=grade AAA, o=grade AA, triangle=grade A), size(medium)) legend(off)
* next with -choric correlations
factormat polyc, pcf n(487) mineigen(0.8)
rotate, varimax 
loadingplot, comp(4)
matrix rload=e(L)
matrix fsco=sdata*rload*inv(rload'*rload) /* formula (7.4) in Manly */
svmat fsco
tabstat fsco1-fsco4, statistics( mean semean ) by(grade)
foreach fsco of varlist fsco1-fsco4 {
  oneway `fsco' grade, tab bon
}
twoway (scatter fsco2 fsco1 if grade==1, msymbol(smx)) ///
  (scatter fsco2 fsco1 if grade==2, msymbol(smcircle_hollow)) /// 
  (scatter fsco2 fsco1 if grade==3, msymbol(smtriangle_hollow)), ///
  ytitle(2nd principal component) xtitle(1st principal component) title(Polychoric Factor analysis (x=grade AAA, o=grade AA, triangle=grade A), size(medium)) legend(off)
