This script defines the functions which are used to perform spectral clustering.
Calculate affinity matrix for spectral clustering. Code is derived from the SNFtool
package, see associated publication in Nature Methods here.
#' @param diff
affinityCustom = function (diff, sigma = 0.5)
{
N <- nrow(diff)
diff <- (diff + t(diff))/2
diag(diff) <- 0
sortedColumns <- as.matrix(t(apply(diff, 2, sort)))
finiteMean <- function(x) {
return(mean(x[is.finite(x)]))
}
# this line has been modified to remove [, 1:K + 1]
means <- apply(sortedColumns, 1, finiteMean) +
.Machine$double.eps
avg <- function(x, y) {
return((x + y)/2)
}
Sig <- outer(means, means, avg)/3 * 2 + diff/3 + .Machine$double.eps
Sig[Sig <= .Machine$double.eps] <- .Machine$double.eps
densities <- dnorm(diff, 0, sigma * Sig, log = FALSE)
W <- (densities + t(densities))/2
return(W)
}
#' @param sf string; C(CERAD)/B(Braak);
#' @param level string; level of CERAD or Braak stage;
#' @param mData matrix; expression matrix
#' @param cov data.table; clinical data
to_z_score = function(sf, level, mDataG, cov){
if(!all(colnames(mDataG) == cov$projid)){
stop()
}
if(sf == "C"){
cat("select CERAD", level, "samples..\n")
remove_proj = cov[,is.na(C) | is.na(E)]
if(any(remove_proj)){
cat("remove", sum(remove_proj), "samples with missing values..\n")
mDataG = mDataG[, !remove_proj]
cov = cov[!is.na(C) & !is.na(E),]
}
E = cov[C == level, E]
sel = cov$C == level
}else if(sf == "B"){
cat("select Braak", level, "samples..\n")
remove_proj = cov[,is.na(B) | is.na(E)]
if(any(remove_proj)){
cat("remove", sum(remove_proj),"samples with missing values..\n")
mDataG = mDataG[, !remove_proj]
cov = cov[!is.na(B) & !is.na(E),]
}
E = cov[B == level, E]
sel = cov$B == level
}else{
stop()
}
means = rowMeans(mDataG[, sel])
sds = apply(mDataG[, sel], 1, sd)
zscore = function(x) return((x - means)/sds)
mData_z = apply(mDataG[, sel], 2, zscore)
# check should be TRUE
cat("random check:",
all(mData_z[9,] == (mDataG[9,sel] - mean(as.numeric(mDataG[9,sel])))/sd(as.numeric(mDataG[9,sel]))),
"\n")
return(list(z = mData_z, E = E, mData = mDataG[, sel],
means = means, sds = sds))
}
#' @param avez data.table/data.frame/matrix containing the average z-scores of E2, E3, and E4
#' @param k number of cluster
spectral_clustering = function(avez, k){
set.seed(9)
zMtx = as.matrix(avez[, c("E2", "E3", "E4")])
rownames(zMtx) = avez$Genes
zMtx = zMtx[!duplicated(rownames(zMtx)), ]
# calculate distance matrix
distM = zMtx %>% dist2(., .) %>% .^(1/2)
# calculate similarity matrix
simM = affinityCustom(distM)
# perform spectral clustering
clustA = spectralClustering(simM, K = k)
return(list(clustA = clustA,
zMtx = zMtx))
}
Statistical testing of interested microglia-APOE genes across APOE genotypes.
#' @param z matrix; z
#' @param genes vector; vector of interested genes
#' @param cov data.table; clinical data
#' @param adj.donor T/F; adjusted by donor or not
#' @param donor donor id; need provide if adj.donor = T
test_avez = function(zmtx, clust_genes, cov, donor = NULL, adj.donor = FALSE){
data = data.frame(
ave_z = colMeans(zmtx[clust_genes, ]),
APO = cov$E)
if(levels(cov$E)[1] != "E3"){
stop("must use E3 as reference\n")
}
if(adj.donor){
require(lme4)
library(lmerTest)
data$donor = donor
fit = lmer(ave_z ~ (1|donor)+ APO, data = data)
}else{
fit = lm(ave_z ~ APO, data = data)
}
return(summary(fit, confint = TRUE, digits = 3))
}
Hypergeometric tests of interested microglia-APOE genes across APOE genotypes.
If you see mistakes or want to suggest changes, please create an issue on the source repository.