See also: P. Cingolani (2012), B. Knaus (2016) Tags: #rna-seq, #snp, #vcf
N/A
java -Xmx8g -jar snpEff.jar GRCh38.86 '$vcf_file' > ${vcf_file/.vcf/_ann.vcf}
.
GRCh38.86
is the database that is being used. In this case, the 38th version of the human genome. To list all the databases, run java -Xmx8g -jar snpEff.jar databases
Example VCF to Count Table script:
library(tidyverse)
library(vcfR)
final <- NA_character_
file_names <- list.files(
path = ".",
pattern = "*.vcf"
)
for (vcf_file in file_names) {
vcf <- read.vcfR(
vcf_file,
verbose = FALSE
)
HIGH <- grep("HIGH", vcf@fix[,"INFO"])
MODERATE <- grep("MODERATE", vcf@fix[,"INFO"])
vcf <- vcf[c(HIGH,MODERATE),]
test <- cbind(
extract.gt(vcf, element = "GT", as.numeric=FALSE),
extract.gt(vcf, element = "DP", as.numeric=TRUE),
ANN = extract.info(vcf, element = "ANN", as.numeric=FALSE)
)
if (!is.na(final)) {
final <- rbind(final, test)
} else {
final <- test
}
}
final <- final %>%
as.data.frame() %>%
tibble::rownames_to_column("rownames") %>%
readr::write_csv("all_variants.csv")