This script defines the models which are used in subsequent scripts.
Function to run Model 1, which is exp ~ APOE + CERAD/Braak
.
#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)
#' @param annot data.table; annotation for genes;
#' @param useVoom T/F; use limma-voom or not
run_model1 = function(mData, SF, E, annot, useVoom = F) {
# remove samples with NA meta data
sel = (!is.na(SF) & !is.na(E))
SF = SF[sel]
E = E[sel]
mData = mData[,sel]
cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
cat("Final sample size: ", sum(sel), "\n")
# design matrix
design.sfE = model.matrix(~ E + SF + 0 )
colnames(design.sfE) = gsub("EE","E", colnames(design.sfE))
print(head(design.sfE))
# limma model
fit.sfE = lmFit(mData, design.sfE)
fit.c.sfE = contrasts.fit(fit.sfE, makeContrasts(C1="E4-E3", C2="E2-E3", levels=design.sfE))
if(useVoom){
fit.eb.sfE = eBayes(fit.c.sfE, robust=TRUE)
}else{
fit.eb.sfE = eBayes(fit.c.sfE, trend = T)
}
# annotation
fit.eb.sfE$genes = annot
# call topTable
tT1.sfE = topTable(fit.eb.sfE, sort.by="none", number=Inf, coef="C1", confint=TRUE)
tT2.sfE = topTable(fit.eb.sfE, sort.by="none", number=Inf, coef="C2", confint=TRUE)
colnames(tT1.sfE)[4:11] = paste0("E4vsE3_", colnames(tT1.sfE)[4:11])
colnames(tT2.sfE)[4:11] = paste0("E2vsE3_", colnames(tT2.sfE)[4:11])
setDT(tT1.sfE)
setDT(tT2.sfE)
tT.sfE = tT1.sfE[tT2.sfE, on=c("GeneSymbol", "EnsemblID", "Description")]
return(tT.sfE)
}
Function to run Model 1.5, which is exp ~ E4Num + E2Num + CERAD
.
#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E4Num numeric; the number of copy of e4
#' @param E2Num numeric; the number of copy of e2
#' @param annot data.table; annotation for genes;
#' @param useVoom T/F; use limma-voom or not
run_model1.5 = function(mData, SF, E4Num, E2Num, annot, useVoom = F) {
# remove samples with NA meta data
sel = (!is.na(SF) & !is.na(E4Num) & !is.na(E2Num) )
SF = SF[sel]
E4Num = E4Num[sel]
E2Num = E2Num[sel]
mData = mData[,sel]
cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
cat("Final sample size: ", sum(sel), "\n")
# design matrix
design.sfE = model.matrix(~ E4Num + E2Num + SF )
print(head(design.sfE))
# limma model
fit.sfE = lmFit(mData, design.sfE)
if(useVoom){
fit.eb.sfE = eBayes(fit.sfE, robust=TRUE)
}else{
fit.eb.sfE = eBayes(fit.sfE, trend = T)
}
# annotation
fit.eb.sfE$genes = annot
# call toptable
tT_CERAD_E4Num = topTable(fit.eb.sfE, sort.by="none", number=Inf, coef=2, confint=TRUE)
tT_CERAD_E2Num = topTable(fit.eb.sfE, sort.by="none", number=Inf, coef=3, confint=TRUE)
setDT(tT_CERAD_E4Num)
setDT(tT_CERAD_E2Num)
tT = copy(annot)
tT = tT[tT_CERAD_E4Num, on = .(EnsemblID, GeneSymbol, Description),
c("e4_logFC", "e4_CI.L", "e4_CI.R",
"e4_P.Value", "e4_adj.P.Val") := .(
i.logFC, i.CI.L, i.CI.R, i.P.Value, i.adj.P.Val)]
tT = tT[tT_CERAD_E2Num, on = .(EnsemblID, GeneSymbol, Description),
c("e2_logFC", "e2_CI.L", "e2_CI.R",
"e2_P.Value", "e2_adj.P.Val") := .(
i.logFC, i.CI.L, i.CI.R, i.P.Value, i.adj.P.Val)]
return(tT)
}
Function to run Model 2 baseline, which is C0E4-C0E3, C0E2-C0E3 | B1E4-B1E3, B1E2-B1E3
.
#' @param sf string; analysis for Braak or CERAD
#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)
#' @param annot data.table; annotation for genes;
#' @param useVoom T/F; use limma-voom or not
run_model2 = function(sf, mData, SF, E, annot, useVoom = F) {
# remove samples with NA meta data
sel = (!is.na(SF) & !is.na(E))
SF = SF[sel]
E = E[sel]
mData = mData[,sel]
cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
cat("Final sample size: ", sum(sel), "\n")
if(sf=="CERAD") {
f3=factor(paste0("C",SF,E))
} else {
f3=factor(paste0("B",SF,E))
}
# design matrix
design.EBase = model.matrix(~f3 + 0)
colnames(design.EBase) = gsub("f3","", colnames(design.EBase))
print(head(design.EBase))
# limma model
if(sf=="CERAD") {
fit.EBase = lmFit(mData, design.EBase)
fit.c.EBase = contrasts.fit(fit.EBase, makeContrasts(C1="C0E4-C0E3", C2="C0E2-C0E3", levels=design.EBase))
} else {
fit.EBase = lmFit(mData, design.EBase)
fit.c.EBase = contrasts.fit(fit.EBase, makeContrasts(C1="B1E4-B1E3", C2="B1E2-B1E3", levels=design.EBase))
}
if(useVoom){
fit.eb.EBase = eBayes(fit.c.EBase, robust=TRUE)
}else{
fit.eb.EBase = eBayes(fit.c.EBase, trend = T)
}
# annotation
fit.eb.EBase$genes = annot
# call topTable
tT1.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C1", confint=TRUE)
tT2.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C2", confint=TRUE)
colnames(tT1.EBase)[4:11] = paste0("E4vsE3_", colnames(tT1.EBase)[4:11])
colnames(tT2.EBase)[4:11] = paste0("E2vsE3_", colnames(tT2.EBase)[4:11])
setDT(tT1.EBase)
setDT(tT2.EBase)
tT.EBase = tT1.EBase[tT2.EBase, on=c("GeneSymbol", "EnsemblID", "Description")]
return(tT.EBase)
}
Function to run Model 2.5, which is exp ~ E4Num + E2Num
for C0 subjects only.
#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E4Num numeric; the number of copy of e4
#' @param E2Num numeric; the number of copy of e2
#' @param annot data.table; annotation for genes;
#' @param useVoom T/F; use limma-voom or not
run_model2.5 = function(mData, E4Num, E2Num, annot, useVoom = F) {
# remove samples with NA meta data
sel = (!is.na(E4Num) & !is.na(E2Num) )
E4Num = E4Num[sel]
E2Num = E2Num[sel]
mData = mData[,sel]
cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
cat("Final sample size: ", sum(sel), "\n")
# design matrix
design.sfE = model.matrix(~ E4Num + E2Num)
print(head(design.sfE))
# limma model
fit.sfE = lmFit(mData, design.sfE)
if(useVoom){
fit.eb.sfE = eBayes(fit.sfE, robust=TRUE)
}else{
fit.eb.sfE = eBayes(fit.sfE, trend = T)
}
# annotation
fit.eb.sfE$genes = annot
# call toptable
tT_CERAD_E4Num = topTable(fit.eb.sfE, sort.by="none", number=Inf, coef=2, confint=TRUE)
tT_CERAD_E2Num = topTable(fit.eb.sfE, sort.by="none", number=Inf, coef=3, confint=TRUE)
setDT(tT_CERAD_E4Num)
setDT(tT_CERAD_E2Num)
tT = copy(annot)
tT = tT[tT_CERAD_E4Num, on = .(EnsemblID, GeneSymbol, Description),
c("e4_logFC", "e4_CI.L", "e4_CI.R",
"e4_P.Value", "e4_adj.P.Val") := .(
i.logFC, i.CI.L, i.CI.R, i.P.Value, i.adj.P.Val)]
tT = tT[tT_CERAD_E2Num, on = .(EnsemblID, GeneSymbol, Description),
c("e2_logFC", "e2_CI.L", "e2_CI.R",
"e2_P.Value", "e2_adj.P.Val") := .(
i.logFC, i.CI.L, i.CI.R, i.P.Value, i.adj.P.Val)]
return(tT)
}
Function to run Model 3, which is C3E4-C3E3, C3E2-C3E3 | B3E4-B3E3, B3E2-B3E3
.
#' @param sf string; analysis for Braak or CERAD
#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)
#' @param annot data.table; annotation for genes;
#' @param useVoom T/F; use limma-voom or not
run_model3 = function(sf, mData, SF, E, annot, useVoom = F) {
# remove samples with NA meta data
sel = (!is.na(SF) & !is.na(E))
SF = SF[sel]
E = E[sel]
mData = mData[,sel]
cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
cat("Final sample size: ", sum(sel), "\n")
# factors for creating design matrix: SF.E
# SF: CERAD or Braak stages
# E: APOE2 (including 22, 23), APOE3 (including 33), APOE4 (including 44, 34, 24)
if(sf=="CERAD") {
f3=factor(paste0("C",SF,E))
} else {
f3=factor(paste0("B",SF,E))
}
design.EBase = model.matrix(~f3 + 0)
colnames(design.EBase) = gsub("f3","", colnames(design.EBase))
print(head(design.EBase))
# limma model
if(sf=="CERAD") {
fit.EBase = lmFit(mData, design.EBase)
fit.c.EBase = contrasts.fit(fit.EBase, makeContrasts(C1="C3E4-C3E3", C2="C3E2-C3E3", levels=design.EBase))
} else {
fit.EBase = lmFit(mData, design.EBase)
fit.c.EBase = contrasts.fit(fit.EBase, makeContrasts(C1="B3E4-B3E3", C2="B3E2-B3E3", levels=design.EBase))
}
if(useVoom){
fit.eb.EBase = eBayes(fit.c.EBase, robust=TRUE)
}else{
fit.eb.EBase = eBayes(fit.c.EBase, trend = T)
}
# annotation
fit.eb.EBase$genes = annot
# call topTable
tT1.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C1", confint=TRUE)
tT2.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C2", confint=TRUE)
colnames(tT1.EBase)[4:11] = paste0("E4vsE3_", colnames(tT1.EBase)[4:11])
colnames(tT2.EBase)[4:11] = paste0("E2vsE3_", colnames(tT2.EBase)[4:11])
setDT(tT1.EBase)
setDT(tT2.EBase)
tT.EBase = tT1.EBase[tT2.EBase, on=c("GeneSymbol", "EnsemblID", "Description")]
return(tT.EBase)
}
Function to run Model 4, which is C23E4-C23E3, C23E2-C23E3
.
#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)
#' @param annot data.table; annotation for genes;
#' @param useVoom T/F; use limma-voom or not
run_model4 = function(mData, SF, E, annot, useVoom = F) {
# remove samples with NA meta data
sel = (!is.na(SF) & !is.na(E))
SF = SF[sel]
E = E[sel]
mData = mData[,sel]
cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
cat("Final sample size: ", sum(sel), "\n")
SF = gsub("2|3", "23", SF)
f3 = factor(paste0("C",SF,E))
design.EBase = model.matrix(~f3 + 0)
colnames(design.EBase) = gsub("f3","", colnames(design.EBase))
print(head(design.EBase))
# limma model
fit.EBase = lmFit(mData, design.EBase)
fit.c.EBase = contrasts.fit(fit.EBase, makeContrasts(C1="C23E4-C23E3", C2="C23E2-C23E3", levels=design.EBase))
if(useVoom){
fit.eb.EBase = eBayes(fit.c.EBase, robust=TRUE)
}else{
fit.eb.EBase = eBayes(fit.c.EBase, trend = T)
}
# annotation
fit.eb.EBase$genes = annot
# call topTable
tT1.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C1", confint=TRUE)
tT2.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C2", confint=TRUE)
colnames(tT1.EBase)[4:11] = paste0("E4vsE3_", colnames(tT1.EBase)[4:11])
colnames(tT2.EBase)[4:11] = paste0("E2vsE3_", colnames(tT2.EBase)[4:11])
setDT(tT1.EBase)
setDT(tT2.EBase)
tT.EBase = tT1.EBase[tT2.EBase, on=c("GeneSymbol", "EnsemblID", "Description")]
return(tT.EBase)
}
Function to run Model 5, which is CXE4-CXE3, CXE2-CXE3
, where X
could be either 1 or 2.
#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)
#' @param annot data.table; annotation for genes;
#' @param useVoom T/F; use limma-voom or not
run_model5 = function(mData, SF, E, annot, useVoom = F) {
# remove samples with NA meta data
sel = (!is.na(SF) & !is.na(E))
SF = SF[sel]
E = E[sel]
mData = mData[,sel]
cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
cat("Final sample size: ", sum(sel), "\n")
f3 = factor(paste0("C",SF,E))
design.EBase = model.matrix(~f3 + 0)
colnames(design.EBase) = gsub("f3","", colnames(design.EBase))
print(head(design.EBase))
# limma model
fit.EBase = lmFit(mData, design.EBase)
fit.c.EBase = contrasts.fit(fit.EBase, makeContrasts(C1="C1E4-C1E3", C2="C1E2-C1E3",
C3="C2E4-C2E3", C4="C2E2-C2E3",levels=design.EBase))
if(useVoom){
fit.eb.EBase = eBayes(fit.c.EBase, robust=TRUE)
}else{
fit.eb.EBase = eBayes(fit.c.EBase, trend = T)
}
# annotation
fit.eb.EBase$genes = annot
# call topTable
tT1.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C1", confint=TRUE)
tT2.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C2", confint=TRUE)
tT3.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C3", confint=TRUE)
tT4.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C4", confint=TRUE)
colnames(tT1.EBase)[4:11] = paste0("C1E4vsC1E3_", colnames(tT1.EBase)[4:11])
colnames(tT2.EBase)[4:11] = paste0("C1E2vsC1E3_", colnames(tT2.EBase)[4:11])
colnames(tT3.EBase)[4:11] = paste0("C2E4vsC2E3_", colnames(tT3.EBase)[4:11])
colnames(tT4.EBase)[4:11] = paste0("C2E2vsC2E3_", colnames(tT4.EBase)[4:11])
setDT(tT1.EBase)
setDT(tT2.EBase)
setDT(tT3.EBase)
setDT(tT4.EBase)
tT.EBase = tT1.EBase[tT2.EBase, on=c("GeneSymbol", "EnsemblID", "Description")]
tT.EBase = tT.EBase[tT3.EBase, on=c("GeneSymbol", "EnsemblID", "Description")]
tT.EBase = tT.EBase[tT4.EBase, on=c("GeneSymbol", "EnsemblID", "Description")]
return(tT.EBase)
}
If you see mistakes or want to suggest changes, please create an issue on the source repository.