Zettelkasten

# pathway overlap GLUCOLD & NORM

library(tidyverse)
source("../helper_functions.r", verbose=TRUE)

g.dir <- "../Results - GLUCOLD/"
n.dir <- "../Results - NORM/"

out.dir <- "results.nosync"


# Directories containing the results from the previous script.
dirs.to.compare <- c(
  "all/neg",
  "all/pos",
  "fdr.sign.neg/neg",
  "fdr.sign.neg/pos"
)


for (dir in dirs.to.compare) {
  g.dir.to.compare <- file.path(g.dir, dir)
  n.dir.to.compare <- file.path(n.dir, dir)

  if (!dir.exists(g.dir.to.compare) || !dir.exists(n.dir.to.compare)) {
    next
  }

  dest.dir <- file.path(out.dir, dir)
  dir.create(dest.dir, recursive=TRUE)

  g.dir.files <- list.files(g.dir.to.compare, pattern="*.csv")
  n.dir.files <- list.files(n.dir.to.compare, pattern="*.csv")

  for (file.name in intersect(g.dir.files, n.dir.files)) {
    g.file <- readr::read_csv(file.path(g.dir.to.compare, file.name)) %>%
      dplyr::mutate(
        NES = as.numeric(NES),
        NOM.p.val = as.numeric(NOM.p.val)
      )
    n.file <- readr::read_csv(file.path(n.dir.to.compare, file.name)) %>%
      dplyr::mutate(
        NES = as.numeric(NES),
        NOM.p.val = as.numeric(NOM.p.val)
      )

    g.n <- nrow(g.file)
    n.n <- nrow(n.file)

    new.file <- g.file %>%
      dplyr::full_join(
        y = n.file,
        by = c(
          "NAME" = "NAME",
          "mirna" = "mirna"
        ),
        suffix = c(".g", ".n")
      )

    if (g.n > 0 && n.n > 0) {
      new.file <- meta_zscore_df(new.file, "NOM.p.val.g", "NOM.p.val.n", "NES.g", "NES.n", g.n, n.n)
    }

    new.file %>%
      readr::write_csv(
        file.path(dest.dir, file.name)
      ) %>%
      dplyr::filter(
        !is.na(NES.g) & !is.na(NES.n)
      ) %>%
      readr::write_csv(
        file.path(
          dest.dir,
          paste0(
            stringr::str_remove(
              file.name,
              pattern=".csv"
            ),
            ".overlap.csv"
          )
        )
      )
  }
}



# Pathway genes
dirs.to.compare <- c(
  "all/pathway.genes",
  "fdr.sign.neg/pathway.genes"
)

for (dir in dirs.to.compare) {
  g.dir.to.compare <- file.path(g.dir, dir)
  n.dir.to.compare <- file.path(n.dir, dir)

  if (!dir.exists(g.dir.to.compare) || !dir.exists(n.dir.to.compare)) {
    next
  }

  dest.dir <- file.path(out.dir, dir)
  dir.create(dest.dir, recursive=TRUE)

  g.dir.files <- list.files(g.dir.to.compare, pattern="*.csv")
  n.dir.files <- list.files(n.dir.to.compare, pattern="*.csv")

  for (file.name in intersect(g.dir.files, n.dir.files)) {
    g.file <- readr::read_csv(file.path(g.dir.to.compare, file.name))
    n.file <- readr::read_csv(file.path(n.dir.to.compare, file.name))

    new.file <- g.file %>%
      dplyr::full_join(
        y = n.file,
        by = c(
          "miRNA" = "miRNA",
          "pathway" = "pathway",
          "SYMBOL" = "SYMBOL"
        ),
        suffix = c(".g", ".n")
      )

    new.file %>%
      readr::write_csv(
        file.path(dest.dir, file.name)
      ) %>%
      dplyr::filter(
        !is.na(RUNNING.ES.g) & !is.na(RUNNING.ES.n)
      ) %>%
      readr::write_csv(
        file.path(
          dest.dir,
          paste0(
            stringr::str_remove(
              file.name,
              pattern=".csv"
            ),
            ".overlap.csv"
          )
        )
      )
  }
}