Zettelkasten

source(file.path("..","_preloader.r"), verbose=TRUE)

# load
PsychMasterTable <- readr::read_csv(
    file.path(resultsDir, "results.03 - 1", "psych.patientTable.csv")
  ) %>%
  tibble::column_to_rownames("row.names")

ge <- readr::read_csv(
    file.path(resultsDir, "results.03 - 1", "ge.csv")
  ) %>%
  tibble::column_to_rownames(
    "entrez.id"
  )

gsva.groups.raw <- readr::read_csv(
    file.path(resultsDir, "results.07/fdr.sign.neg/pathway.genes/enriched.csv")
  ) %>%
  dplyr::filter(
    !is.na(RUNNING.ES.g) # GLUCOLD is leading
  )

out.dir = file.path(resultsDir, "results.08")
dir.create(out.dir, recursive = TRUE)

######
# GSVA
######
gsva.groups <- list()
for (c.miRNA in unique(gsva.groups.raw$miRNA)) {
  gsva.groups[[c.miRNA]] <- gsva.groups.raw %>%
    dplyr::filter(miRNA == c.miRNA) %>%
    dplyr::pull(SYMBOL)
}

# Ok, since I have only 4 or 1 overlapping gene, I can't use GSEA. Use GSVA, T-test and plot the results.
# Groups are miRNA, values in groups are the overlapping genes. Input are gene expression values.
source(file.path(singleCellSignaturesDir, "create signature table.r"), chdir=T, verbose=T)

gene.identifiers.overlap = miRNAGeneExprGeneTable %>%
  dplyr::filter(
    ensembl_gene_id %in% rownames(miRNAGeneExprData)
  ) %>%
  dplyr::left_join(
    y = genes %>%
      dplyr::select(
        gene,
        cellType,
        Color,
        order
      ),
      by = c("hgnc_symbol" = "gene")
  ) %>%
  dplyr::rename(
    cell.type = cellType,
    color = Color,
    ensembl.id = ensembl_gene_id,
    symbol = hgnc_symbol
  ) %>%
  dplyr::arrange(
    cell.type,
    symbol
  )

data.to.plot = ge[ # limma voomed
    gene.identifiers.overlap$ensembl.id,
    rownames(PsychMasterTable)
  ] %>%
  as.data.frame() %>%
  tibble::rownames_to_column("row.name") %>%
  dplyr::left_join(
    y = gene.identifiers.overlap %>%
      dplyr::select(
        ensembl.id,
        symbol,
        order,
        color
      ),
    by = c(
        "row.name" = "ensembl.id"
      )
  ) %>%
  dplyr::group_by( symbol ) %>%
  dplyr::mutate(
    row.name = case_when(
      dplyr::n() == 1 ~ symbol,
      dplyr::n() > 1 ~ paste0(symbol, " (", row.name ,")")
    )
  ) %>%
  dplyr::ungroup() %>%
  dplyr::arrange( dplyr::desc(as.numeric(order)) ) %>%
  dplyr::select(
    -order,
    -symbol
  )

gsva.data = data.to.plot %>%
  dplyr::filter(
    !is.na(row.name) & row.name != ""
  ) %>%
  tibble::column_to_rownames("row.name") %>%
  dplyr::select(-color) %>%
  as.matrix()

gsva_res = gsva(
  gsva.data,
  gsva.groups,
  mx.diff = TRUE,
  verbose = FALSE,
  parallel.sz = 4
)


t.test.groups = PsychMasterTable %>%
  tibble::rownames_to_column(
    "Sample.Id"
  ) %>%
  dplyr::group_by(
    Current.Smoke
  ) %>%
  dplyr::select(
    Current.Smoke,
    Sample.Id
  ) %>%
  dplyr::group_split()


t.test.res = data.frame(
  name = character(),
  p.val = numeric(),
  statistic = numeric(),
  stderr = numeric()
)


for (i in 1:nrow(gsva_res)) {
  name = row.names(gsva_res)[i]

  xxx = t.test(
    gsva_res[name, t.test.groups[[1]]$Sample.Id],
    gsva_res[name, t.test.groups[[2]]$Sample.Id]
  )

  t.test.res = rbind(
    t.test.res,
    data.frame(
      name = name,
      p.val = xxx$p.value,
      statistic = xxx$statistic,
      stderr = xxx$stderr
    )
  )

  rm(xxx, name)
}



readr::write_csv(
  gsva_res %>%
    as.data.frame() %>%
    tibble::rownames_to_column("row.names") %>%
    dplyr::mutate(
      split = 0
    ) %>%
    dplyr::select(
      row.names,
      dplyr::one_of(t.test.groups[[1]]$Sample.Id),
      split,
      dplyr::one_of(t.test.groups[[2]]$Sample.Id)
    ),
  path = file.path(out.dir, "values.csv"),
)

readr::write_csv(
  t.test.res %>% tibble::rownames_to_column("row.names"),
  path = file.path(out.dir, "t.test results.csv"),
)

plotData = gsva_res %>%
  as.data.frame() %>%
  tibble::rownames_to_column(
    "miRNA"
  ) %>%
  tidyr::gather(
    key = "Sample.Id",
    value = "gsva.val",
    -miRNA
  ) %>% 
  dplyr::left_join(
    y = PsychMasterTable %>%
      tibble::rownames_to_column(
        "Sample.Id"
      ) %>%
      dplyr::group_by(
        Current.Smoke
      ) %>%
      dplyr::select(
        Current.Smoke,
        Sample.Id
      ),
    by = c("Sample.Id" = "Sample.Id")
  ) %>%
  dplyr::mutate(
    Current.Smoke = ifelse(
      Current.Smoke,
      "Smoker",
      "Ex smoker"
    )
  )

for (miRNA.id in unique(plotData$miRNA)) {
  df = plotData %>%
    dplyr::filter(
      miRNA == miRNA.id
    )

  ggplot2::ggplot(
      data = df,
      mapping = ggplot2::aes(
        x = Current.Smoke,
        y = gsva.val
      )
    ) +
    ggplot2::geom_boxplot(
      mapping = ggplot2::aes(
        fill = Current.Smoke
      )
    ) +
    ggplot2::geom_point() +
    ggpubr::stat_compare_means(
      comparisons = list(c("Smoker", "Ex smoker")),
      label = "p.signif",
      label.y.npc = "top",
      method = "t.test"
    ) +
    ggplot2::labs(
      title = miRNA.id,
      x = "miRNA values",
      y = "GSVA values",
      color = "Smoking Status"
    ) +
    ggplot2::theme(
      legend.position = "none"
    )

  ggplot2::ggsave(
    filename = file.path(out.dir, paste0("gsva.results.", miRNA.id, ".png")),
    plot = last_plot(),
    scale = 1,
    width = 15,
    height = 15,
    units = "cm"
  )

  df %>%
    readr::write_csv(
      path = file.path(out.dir, paste0("gsva.results.", miRNA.id, ".csv"))
    )

  rm(df)
}


data.to.plot %>%
  readr::write_csv(
    file.path(out.dir, "data.to.plot.csv")
  )

gene.identifiers.overlap %>%
  readr::write_csv(
    file.path(out.dir, "gene.identifiers.overlap.csv")
  )

gsva.data %>%
  as.data.frame() %>%
  tibble::rownames_to_column("row.names") %>%
  readr::write_csv(
    file.path(out.dir, "gsva.data.csv")
  )