Zettelkasten

source(file.path("..", "helper_functions.r"), verbose=TRUE)
source(file.path("..", "_preloader.r"), verbose=TRUE)

################################################################
# EdgeR - Used with counts data, such as RNA-Seq               #
################################################################
masterTable <- masterTable.backup %>%
  dplyr::filter(
    as.character(group) %in% c("Control", "COPD.3.4")
  ) %>%
  dplyr::mutate(
    group = droplevels(group) # We need to do this, to make sure the design does not expand the unnecessary levels (That are filtered out).
  )
expressionData <- expressionData.backup %>%
  select.columns.in.order(c("Gene", masterTable$mirna.sample.id))


design <- model.matrix(
    ~0 + group + gender + age + packyears, # 'Group' is the testing variable, you'll correct for gender, age and packyears. 
    data = masterTable
  )

DGEL <- edgeR::DGEList(expressionData %>% tibble::column_to_rownames("Gene"))
keep <- edgeR::filterByExpr(DGEL)
DGEL <- DGEL[keep, , keep.lib.sizes=FALSE]
DGEL <- edgeR::calcNormFactors(DGEL, method = "TMM")

DGEL <- edgeR::estimateDisp(DGEL, design)
fit <- edgeR::glmQLFit(DGEL, design)

contrasts <- limma::makeContrasts(
  test = groupCOPD.3.4 - groupControl, # This is the contrast/comparison in this analysis. You can make multiple. 'Test' is the name.
  levels = design
)

fit <- edgeR::glmQLFTest(
  fit,
  contrast = contrasts[,"test"] # test refers to the contrast
)

de.results <- edgeR::topTags(
    fit,
    n = nrow(DGEL),
    adjust.method = "BH"
  ) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("ensembl_gene_id") %>%
  dplyr::left_join(
    y = geneData %>%
      dplyr::select(
        ensembl_gene_id,
        hgnc_symbol
      ),
    by = c("ensembl_gene_id" = "ensembl_gene_id")
  )