nutrigenomic.Rd
Study the effects of five diet treatments on 21 liver lipids and 120 hepatic gene expression in wild-type and PPAR-alpha deficient mice. We use a multivariate mixed random forest analysis by regressing gene expression, diet and genotype (the x-variables) on lipid expressions (the multivariate y-responses).
Martin P.G. et al. (2007). Novel aspects of PPAR-alpha-mediated regulation of lipid and xenobiotic metabolism revealed through a nutrigenomic study. Hepatology, 45(3), 767--777.
# \donttest{
## ------------------------------------------------------------
## multivariate regression forests using Mahalanobis splitting
## lipids (all real values) used as the multivariate y
## ------------------------------------------------------------
## load the data
data(nutrigenomic, package = "randomForestSRC")
## parse into y and x data
ydta <- nutrigenomic$lipids
xdta <- data.frame(nutrigenomic$genes,
diet = nutrigenomic$diet,
genotype = nutrigenomic$genotype)
## multivariate mixed forest call
obj <- rfsrc(get.mv.formula(colnames(ydta)),
data.frame(ydta, xdta),
importance=TRUE, nsplit = 10,
splitrule = "mahalanobis")
print(obj)
## ------------------------------------------------------------
## plot the standarized performance and VIMP values
## ------------------------------------------------------------
## acquire the error rate for each of the 21-coordinates
## standardize to allow for comparison across coordinates
serr <- get.mv.error(obj, standardize = TRUE)
## acquire standardized VIMP
svimp <- get.mv.vimp(obj, standardize = TRUE)
par(mfrow = c(1,2))
plot(serr, xlab = "Lipids", ylab = "Standardized Performance")
matplot(svimp, xlab = "Genes/Diet/Genotype", ylab = "Standardized VIMP")
## ------------------------------------------------------------
## plot some trees
## ------------------------------------------------------------
plot(get.tree(obj, 1))
plot(get.tree(obj, 2))
plot(get.tree(obj, 3))
## ------------------------------------------------------------
##
## Compare above to (1) user specified covariance matrix
## (2) default composite (independent) splitting
##
## ------------------------------------------------------------
## user specified sigma matrix
obj2 <- rfsrc(get.mv.formula(colnames(ydta)),
data.frame(ydta, xdta),
importance = TRUE, nsplit = 10,
splitrule = "mahalanobis",
sigma = cov(ydta))
print(obj2)
## default independence split rule
obj3 <- rfsrc(get.mv.formula(colnames(ydta)),
data.frame(ydta, xdta),
importance=TRUE, nsplit = 10)
print(obj3)
## compare vimp
imp <- data.frame(mahalanobis = rowMeans(get.mv.vimp(obj, standardize = TRUE)),
mahalanobis2 = rowMeans(get.mv.vimp(obj2, standardize = TRUE)),
default = rowMeans(get.mv.vimp(obj3, standardize = TRUE)))
print(head(100 * imp[order(imp$mahalanobis, decreasing = TRUE), ], 15))
# }