This script removes low-expressed ROSMAP genes and adjusts by age, sex, and post-mortem interval (PMI).
Load requisite packages.
library(data.table)
Load .Rdata
object saved by prior script.
# load data
load("../Data/ROSMAP-24.Rdata")
rosmap_fpkm = rosmap$fpkm
cov = rosmap$cov
Remove low-expressed genes.
# remove low expressed genes
gene_sel = rowMeans(rosmap_fpkm) > 0.1
rosmap_fpkm = rosmap_fpkm[gene_sel, ]
Adjust by age, sex, and post-mortem interval (PMI), then log-transform data.
# create empty matrix to populate
adj_exp = matrix(NA, ncol = ncol(rosmap_fpkm), nrow = nrow(rosmap_fpkm))
colnames(adj_exp) = cov$projid
rownames(adj_exp) = rownames(rosmap_fpkm)
# adjust by age, sex, and PMI
msex = cov$msex
age_at_death = cov$age_at_death
pmi = cov$pmi
# impute missing values with mean
pmi[is.na(pmi)] = mean(pmi, na.rm = T)
Log-transform data.
# log2(fpkm + 0.0001) adjusted by age, sex, and PMI
for(gene in rownames(rosmap_fpkm)){
exp = log2(as.numeric(rosmap_fpkm[gene,]) + 0.1^4)
lm_fit = lm(exp ~ msex + age_at_death + pmi)
adj_exp[gene, ] = lm_fit$residuals
}
If you see mistakes or want to suggest changes, please create an issue on the source repository.