# pathway overlap GLUCOLD & NORM
library(tidyverse)
source("../helper_functions.r", verbose=TRUE)
g.dir <- "../Results - GLUCOLD/"
n.dir <- "../Results - NORM/"
out.dir <- "results.nosync"
# Directories containing the results from the previous script.
dirs.to.compare <- c(
"all/neg",
"all/pos",
"fdr.sign.neg/neg",
"fdr.sign.neg/pos"
)
for (dir in dirs.to.compare) {
g.dir.to.compare <- file.path(g.dir, dir)
n.dir.to.compare <- file.path(n.dir, dir)
if (!dir.exists(g.dir.to.compare) || !dir.exists(n.dir.to.compare)) {
next
}
dest.dir <- file.path(out.dir, dir)
dir.create(dest.dir, recursive=TRUE)
g.dir.files <- list.files(g.dir.to.compare, pattern="*.csv")
n.dir.files <- list.files(n.dir.to.compare, pattern="*.csv")
for (file.name in intersect(g.dir.files, n.dir.files)) {
g.file <- readr::read_csv(file.path(g.dir.to.compare, file.name)) %>%
dplyr::mutate(
NES = as.numeric(NES),
NOM.p.val = as.numeric(NOM.p.val)
)
n.file <- readr::read_csv(file.path(n.dir.to.compare, file.name)) %>%
dplyr::mutate(
NES = as.numeric(NES),
NOM.p.val = as.numeric(NOM.p.val)
)
g.n <- nrow(g.file)
n.n <- nrow(n.file)
new.file <- g.file %>%
dplyr::full_join(
y = n.file,
by = c(
"NAME" = "NAME",
"mirna" = "mirna"
),
suffix = c(".g", ".n")
)
if (g.n > 0 && n.n > 0) {
new.file <- meta_zscore_df(new.file, "NOM.p.val.g", "NOM.p.val.n", "NES.g", "NES.n", g.n, n.n)
}
new.file %>%
readr::write_csv(
file.path(dest.dir, file.name)
) %>%
dplyr::filter(
!is.na(NES.g) & !is.na(NES.n)
) %>%
readr::write_csv(
file.path(
dest.dir,
paste0(
stringr::str_remove(
file.name,
pattern=".csv"
),
".overlap.csv"
)
)
)
}
}
# Pathway genes
dirs.to.compare <- c(
"all/pathway.genes",
"fdr.sign.neg/pathway.genes"
)
for (dir in dirs.to.compare) {
g.dir.to.compare <- file.path(g.dir, dir)
n.dir.to.compare <- file.path(n.dir, dir)
if (!dir.exists(g.dir.to.compare) || !dir.exists(n.dir.to.compare)) {
next
}
dest.dir <- file.path(out.dir, dir)
dir.create(dest.dir, recursive=TRUE)
g.dir.files <- list.files(g.dir.to.compare, pattern="*.csv")
n.dir.files <- list.files(n.dir.to.compare, pattern="*.csv")
for (file.name in intersect(g.dir.files, n.dir.files)) {
g.file <- readr::read_csv(file.path(g.dir.to.compare, file.name))
n.file <- readr::read_csv(file.path(n.dir.to.compare, file.name))
new.file <- g.file %>%
dplyr::full_join(
y = n.file,
by = c(
"miRNA" = "miRNA",
"pathway" = "pathway",
"SYMBOL" = "SYMBOL"
),
suffix = c(".g", ".n")
)
new.file %>%
readr::write_csv(
file.path(dest.dir, file.name)
) %>%
dplyr::filter(
!is.na(RUNNING.ES.g) & !is.na(RUNNING.ES.n)
) %>%
readr::write_csv(
file.path(
dest.dir,
paste0(
stringr::str_remove(
file.name,
pattern=".csv"
),
".overlap.csv"
)
)
)
}
}