source(file.path("..", "helper_functions.r"), verbose=TRUE)
source(file.path("..", "_preloader.r"), verbose=TRUE)
################################################################
# EdgeR - Used with counts data, such as RNA-Seq #
################################################################
masterTable <- masterTable.backup %>%
dplyr::filter(
as.character(group) %in% c("Control", "COPD.3.4")
) %>%
dplyr::mutate(
group = droplevels(group) # We need to do this, to make sure the design does not expand the unnecessary levels (That are filtered out).
)
expressionData <- expressionData.backup %>%
select.columns.in.order(c("Gene", masterTable$mirna.sample.id))
design <- model.matrix(
~0 + group + gender + age + packyears, # 'Group' is the testing variable, you'll correct for gender, age and packyears.
data = masterTable
)
DGEL <- edgeR::DGEList(expressionData %>% tibble::column_to_rownames("Gene"))
keep <- edgeR::filterByExpr(DGEL)
DGEL <- DGEL[keep, , keep.lib.sizes=FALSE]
DGEL <- edgeR::calcNormFactors(DGEL, method = "TMM")
DGEL <- edgeR::estimateDisp(DGEL, design)
fit <- edgeR::glmQLFit(DGEL, design)
contrasts <- limma::makeContrasts(
test = groupCOPD.3.4 - groupControl, # This is the contrast/comparison in this analysis. You can make multiple. 'Test' is the name.
levels = design
)
fit <- edgeR::glmQLFTest(
fit,
contrast = contrasts[,"test"] # test refers to the contrast
)
de.results <- edgeR::topTags(
fit,
n = nrow(DGEL),
adjust.method = "BH"
) %>%
as.data.frame() %>%
tibble::rownames_to_column("ensembl_gene_id") %>%
dplyr::left_join(
y = geneData %>%
dplyr::select(
ensembl_gene_id,
hgnc_symbol
),
by = c("ensembl_gene_id" = "ensembl_gene_id")
)