Models

This script defines the models which are used in subsequent scripts.

Model 1

Function to run Model 1, which is exp ~ APOE + CERAD/Braak.

#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)
#' @param annot data.table; annotation for genes; 
#' @param useVoom T/F; use limma-voom or not
run_model1 = function(mData, SF, E, annot, useVoom = F) {
  # remove samples with NA meta data
  sel = (!is.na(SF) & !is.na(E))
  SF = SF[sel]
  E = E[sel]
  mData = mData[,sel]
  cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
  cat("Final sample size: ", sum(sel), "\n")
  
  # design matrix
  design.sfE = model.matrix(~ E + SF + 0 )
  colnames(design.sfE) = gsub("EE","E", colnames(design.sfE))
  print(head(design.sfE))
  
  # limma model
  fit.sfE = lmFit(mData, design.sfE)
  fit.c.sfE =  contrasts.fit(fit.sfE, makeContrasts(C1="E4-E3", C2="E2-E3", levels=design.sfE))
  if(useVoom){ 
    fit.eb.sfE = eBayes(fit.c.sfE, robust=TRUE)
  }else{
    fit.eb.sfE = eBayes(fit.c.sfE, trend = T)
  }
  
  # annotation
  fit.eb.sfE$genes = annot
  
  # call topTable
  tT1.sfE = topTable(fit.eb.sfE, sort.by="none", number=Inf, coef="C1", confint=TRUE)
  tT2.sfE = topTable(fit.eb.sfE, sort.by="none", number=Inf, coef="C2", confint=TRUE)
  
  colnames(tT1.sfE)[4:11] = paste0("E4vsE3_", colnames(tT1.sfE)[4:11])
  colnames(tT2.sfE)[4:11] = paste0("E2vsE3_", colnames(tT2.sfE)[4:11])
  setDT(tT1.sfE)
  setDT(tT2.sfE)
  tT.sfE = tT1.sfE[tT2.sfE, on=c("GeneSymbol", "EnsemblID", "Description")]
  
  return(tT.sfE)
}

Model 1.5

Function to run Model 1.5, which is exp ~ E4Num + E2Num + CERAD.

#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E4Num numeric; the number of copy of e4 
#' @param E2Num numeric; the number of copy of e2
#' @param annot data.table; annotation for genes; 
#' @param useVoom T/F; use limma-voom or not
run_model1.5 = function(mData, SF, E4Num, E2Num, annot, useVoom = F) {
  # remove samples with NA meta data
  sel = (!is.na(SF) & !is.na(E4Num) & !is.na(E2Num) )
  SF = SF[sel]
  E4Num = E4Num[sel]
  E2Num = E2Num[sel]
  mData = mData[,sel]
  cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
  cat("Final sample size: ", sum(sel), "\n")
  
  # design matrix
  design.sfE = model.matrix(~ E4Num + E2Num + SF )
  print(head(design.sfE))
  
  # limma model
  fit.sfE = lmFit(mData, design.sfE)
  if(useVoom){ 
    fit.eb.sfE = eBayes(fit.sfE, robust=TRUE)
  }else{
    fit.eb.sfE = eBayes(fit.sfE, trend = T)
  }
  
  # annotation
  fit.eb.sfE$genes = annot
  
  # call toptable
  tT_CERAD_E4Num = topTable(fit.eb.sfE, sort.by="none", number=Inf, coef=2, confint=TRUE)
  tT_CERAD_E2Num = topTable(fit.eb.sfE, sort.by="none", number=Inf, coef=3, confint=TRUE)
  
  setDT(tT_CERAD_E4Num)
  setDT(tT_CERAD_E2Num)
  
  tT = copy(annot)
  tT = tT[tT_CERAD_E4Num, on = .(EnsemblID, GeneSymbol, Description), 
          c("e4_logFC", "e4_CI.L", "e4_CI.R",
            "e4_P.Value", "e4_adj.P.Val") := .(
              i.logFC, i.CI.L, i.CI.R, i.P.Value, i.adj.P.Val)]
  tT = tT[tT_CERAD_E2Num, on = .(EnsemblID, GeneSymbol, Description), 
          c("e2_logFC", "e2_CI.L", "e2_CI.R",
            "e2_P.Value", "e2_adj.P.Val") := .(
              i.logFC, i.CI.L, i.CI.R, i.P.Value, i.adj.P.Val)]
  
  return(tT)
}

Model 2

Function to run Model 2 baseline, which is C0E4-C0E3, C0E2-C0E3 | B1E4-B1E3, B1E2-B1E3.

#' @param sf string; analysis for Braak or CERAD
#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)
#' @param annot data.table; annotation for genes; 
#' @param useVoom T/F; use limma-voom or not
run_model2 = function(sf, mData, SF, E, annot, useVoom = F) {
  
  # remove samples with NA meta data
  sel = (!is.na(SF) & !is.na(E))
  SF = SF[sel]
  E = E[sel]
  mData = mData[,sel]
  cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
  cat("Final sample size: ", sum(sel), "\n")
  
  if(sf=="CERAD") {
    f3=factor(paste0("C",SF,E))
  } else {
    f3=factor(paste0("B",SF,E))
  }
  
  # design matrix
  design.EBase = model.matrix(~f3 + 0)
  colnames(design.EBase) = gsub("f3","", colnames(design.EBase))
  print(head(design.EBase))
  
  # limma model
  if(sf=="CERAD") {
    fit.EBase = lmFit(mData, design.EBase)
    fit.c.EBase =  contrasts.fit(fit.EBase, makeContrasts(C1="C0E4-C0E3", C2="C0E2-C0E3", levels=design.EBase))
  } else {
    fit.EBase = lmFit(mData, design.EBase)
    fit.c.EBase =  contrasts.fit(fit.EBase, makeContrasts(C1="B1E4-B1E3", C2="B1E2-B1E3", levels=design.EBase))
  }
  
  if(useVoom){ 
    fit.eb.EBase = eBayes(fit.c.EBase, robust=TRUE)
  }else{
    fit.eb.EBase = eBayes(fit.c.EBase, trend = T)
  }
  
  # annotation
  fit.eb.EBase$genes = annot
  
  # call topTable
  tT1.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C1", confint=TRUE)
  tT2.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C2", confint=TRUE)
  
  colnames(tT1.EBase)[4:11] = paste0("E4vsE3_", colnames(tT1.EBase)[4:11])
  colnames(tT2.EBase)[4:11] = paste0("E2vsE3_", colnames(tT2.EBase)[4:11])
  
  setDT(tT1.EBase)
  setDT(tT2.EBase)
  tT.EBase = tT1.EBase[tT2.EBase, on=c("GeneSymbol", "EnsemblID", "Description")]

  return(tT.EBase)
}

Model 2.5

Function to run Model 2.5, which is exp ~ E4Num + E2Num for C0 subjects only.

#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E4Num numeric; the number of copy of e4 
#' @param E2Num numeric; the number of copy of e2
#' @param annot data.table; annotation for genes; 
#' @param useVoom T/F; use limma-voom or not
run_model2.5 = function(mData, E4Num, E2Num, annot, useVoom = F) {
  # remove samples with NA meta data
  sel = (!is.na(E4Num) & !is.na(E2Num) )
  E4Num = E4Num[sel]
  E2Num = E2Num[sel]
  mData = mData[,sel]
  cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
  cat("Final sample size: ", sum(sel), "\n")
  
  # design matrix
  design.sfE = model.matrix(~ E4Num + E2Num)
  print(head(design.sfE))
  
  # limma model
  fit.sfE = lmFit(mData, design.sfE)
  if(useVoom){ 
    fit.eb.sfE = eBayes(fit.sfE, robust=TRUE)
  }else{
    fit.eb.sfE = eBayes(fit.sfE, trend = T)
  }
  
  # annotation
  fit.eb.sfE$genes = annot
  
  # call toptable
  tT_CERAD_E4Num = topTable(fit.eb.sfE, sort.by="none", number=Inf, coef=2, confint=TRUE)
  tT_CERAD_E2Num = topTable(fit.eb.sfE, sort.by="none", number=Inf, coef=3, confint=TRUE)
  
  setDT(tT_CERAD_E4Num)
  setDT(tT_CERAD_E2Num)
  
  tT = copy(annot)
  tT = tT[tT_CERAD_E4Num, on = .(EnsemblID, GeneSymbol, Description), 
          c("e4_logFC", "e4_CI.L", "e4_CI.R",
            "e4_P.Value", "e4_adj.P.Val") := .(
              i.logFC, i.CI.L, i.CI.R, i.P.Value, i.adj.P.Val)]
  tT = tT[tT_CERAD_E2Num, on = .(EnsemblID, GeneSymbol, Description), 
          c("e2_logFC", "e2_CI.L", "e2_CI.R",
            "e2_P.Value", "e2_adj.P.Val") := .(
              i.logFC, i.CI.L, i.CI.R, i.P.Value, i.adj.P.Val)]
  
  return(tT)
}

Model 3

Function to run Model 3, which is C3E4-C3E3, C3E2-C3E3 | B3E4-B3E3, B3E2-B3E3.

#' @param sf string; analysis for Braak or CERAD
#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)
#' @param annot data.table; annotation for genes; 
#' @param useVoom T/F; use limma-voom or not
run_model3 = function(sf, mData, SF, E, annot, useVoom = F) {
  
  # remove samples with NA meta data
  sel = (!is.na(SF) & !is.na(E))
  SF = SF[sel]
  E = E[sel]
  mData = mData[,sel]
  cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
  cat("Final sample size: ", sum(sel), "\n")
  
  # factors for creating design matrix: SF.E
  # SF: CERAD or Braak stages
  # E: APOE2 (including 22, 23), APOE3 (including 33), APOE4 (including 44, 34, 24)
  if(sf=="CERAD") {
    f3=factor(paste0("C",SF,E))
  } else {
    f3=factor(paste0("B",SF,E))
  }
  
  design.EBase = model.matrix(~f3 + 0)
  colnames(design.EBase) = gsub("f3","", colnames(design.EBase))
  print(head(design.EBase))
  
  # limma model
  if(sf=="CERAD") {
    fit.EBase = lmFit(mData, design.EBase)
    fit.c.EBase =  contrasts.fit(fit.EBase, makeContrasts(C1="C3E4-C3E3", C2="C3E2-C3E3", levels=design.EBase))
  } else {
    fit.EBase = lmFit(mData, design.EBase)
    fit.c.EBase =  contrasts.fit(fit.EBase, makeContrasts(C1="B3E4-B3E3", C2="B3E2-B3E3", levels=design.EBase))
  }
  
  if(useVoom){ 
    fit.eb.EBase = eBayes(fit.c.EBase, robust=TRUE)
  }else{
    fit.eb.EBase = eBayes(fit.c.EBase, trend = T)
  }
  
  # annotation
  fit.eb.EBase$genes = annot
  
  # call topTable
  tT1.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C1", confint=TRUE)
  tT2.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C2", confint=TRUE)
  colnames(tT1.EBase)[4:11] = paste0("E4vsE3_", colnames(tT1.EBase)[4:11])
  colnames(tT2.EBase)[4:11] = paste0("E2vsE3_", colnames(tT2.EBase)[4:11])
  
  setDT(tT1.EBase)
  setDT(tT2.EBase)
  
  tT.EBase = tT1.EBase[tT2.EBase, on=c("GeneSymbol", "EnsemblID", "Description")]
  
  return(tT.EBase)
}

Model 4

Function to run Model 4, which is C23E4-C23E3, C23E2-C23E3.

#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)
#' @param annot data.table; annotation for genes; 
#' @param useVoom T/F; use limma-voom or not
run_model4 = function(mData, SF, E, annot, useVoom = F) {
  # remove samples with NA meta data
  sel = (!is.na(SF) & !is.na(E))
  SF = SF[sel]
  E = E[sel]
  mData = mData[,sel]
  cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
  cat("Final sample size: ", sum(sel), "\n")
  
  SF = gsub("2|3", "23", SF)
  f3 = factor(paste0("C",SF,E))
  
  design.EBase = model.matrix(~f3 + 0)
  colnames(design.EBase) = gsub("f3","", colnames(design.EBase))
  print(head(design.EBase))
  
  # limma model
  fit.EBase = lmFit(mData, design.EBase)
  fit.c.EBase =  contrasts.fit(fit.EBase, makeContrasts(C1="C23E4-C23E3", C2="C23E2-C23E3", levels=design.EBase))
  
  if(useVoom){ 
    fit.eb.EBase = eBayes(fit.c.EBase, robust=TRUE)
  }else{
    fit.eb.EBase = eBayes(fit.c.EBase, trend = T)
  }
  
  # annotation
  fit.eb.EBase$genes = annot
  
  # call topTable
  tT1.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C1", confint=TRUE)
  tT2.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C2", confint=TRUE)
  colnames(tT1.EBase)[4:11] = paste0("E4vsE3_", colnames(tT1.EBase)[4:11])
  colnames(tT2.EBase)[4:11] = paste0("E2vsE3_", colnames(tT2.EBase)[4:11])
  
  setDT(tT1.EBase)
  setDT(tT2.EBase)

  tT.EBase = tT1.EBase[tT2.EBase, on=c("GeneSymbol", "EnsemblID", "Description")]
  
  return(tT.EBase)
}

Model 5

Function to run Model 5, which is CXE4-CXE3, CXE2-CXE3, where X could be either 1 or 2.

#' @param mData matrix; normalized gene expression matrix in shape [genes, subjects]
#' @param SF factor; Braak or CERAD stage
#' @param E factor; E2 (22,23), E3 (33), E4 (44,34,24)
#' @param annot data.table; annotation for genes; 
#' @param useVoom T/F; use limma-voom or not
run_model5 = function(mData, SF, E, annot, useVoom = F) {
  
  # remove samples with NA meta data
  sel = (!is.na(SF) & !is.na(E))
  SF = SF[sel]
  E = E[sel]
  mData = mData[,sel]
  cat(sum(!sel), "out of", length(sel), "samples were removed because of lack of clinical records\n")
  cat("Final sample size: ", sum(sel), "\n")
  
  f3 = factor(paste0("C",SF,E))
  
  design.EBase = model.matrix(~f3 + 0)
  colnames(design.EBase) = gsub("f3","", colnames(design.EBase))
  print(head(design.EBase))
  
  # limma model
  fit.EBase = lmFit(mData, design.EBase)
  fit.c.EBase =  contrasts.fit(fit.EBase, makeContrasts(C1="C1E4-C1E3", C2="C1E2-C1E3", 
                  C3="C2E4-C2E3", C4="C2E2-C2E3",levels=design.EBase))
  
  if(useVoom){ 
    fit.eb.EBase = eBayes(fit.c.EBase, robust=TRUE)
  }else{
    fit.eb.EBase = eBayes(fit.c.EBase, trend = T)
  }
  
  # annotation
  fit.eb.EBase$genes = annot
  
  # call topTable
  tT1.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C1", confint=TRUE)
  tT2.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C2", confint=TRUE)
  tT3.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C3", confint=TRUE)
  tT4.EBase = topTable(fit.eb.EBase, sort.by="none", number=Inf, coef="C4", confint=TRUE)
  colnames(tT1.EBase)[4:11] = paste0("C1E4vsC1E3_", colnames(tT1.EBase)[4:11])
  colnames(tT2.EBase)[4:11] = paste0("C1E2vsC1E3_", colnames(tT2.EBase)[4:11])
  colnames(tT3.EBase)[4:11] = paste0("C2E4vsC2E3_", colnames(tT3.EBase)[4:11])
  colnames(tT4.EBase)[4:11] = paste0("C2E2vsC2E3_", colnames(tT4.EBase)[4:11])
  
  setDT(tT1.EBase)
  setDT(tT2.EBase)
  setDT(tT3.EBase)
  setDT(tT4.EBase)
  
  tT.EBase = tT1.EBase[tT2.EBase, on=c("GeneSymbol", "EnsemblID", "Description")]
  tT.EBase = tT.EBase[tT3.EBase, on=c("GeneSymbol", "EnsemblID", "Description")]
  tT.EBase = tT.EBase[tT4.EBase, on=c("GeneSymbol", "EnsemblID", "Description")]
  
  return(tT.EBase)
}

Corrections

If you see mistakes or want to suggest changes, please create an issue on the source repository.