r osd2014_dada2_inference
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了r osd2014_dada2_inference相关的知识,希望对你有一定的参考价值。
library(dada2); packageVersion("dada2")
# File parsing
wdir <- "/scratch/antonio/dada_osd2014"
setwd(wdir)
pathF <- file.path(wdir, "files")
pathR <- file.path(wdir, "files")
filtpathF <- file.path(pathF, "filtered") # Filtered forward files go into the pathF/filtered/ subdirectory
filtpathR <- file.path(pathR, "filtered") # ...
fastqFs <- sort(list.files(pathF, pattern="R1.fastq.gz"))
fastqRs <- sort(list.files(pathR, pattern="R2.fastq.gz"))
if(length(fastqFs) != length(fastqRs)) stop("Forward and reverse files do not match.")
sample.names <- sapply(strsplit(fastqFs, "_R1.fastq.gz"), `[`, 1)
filtpathF <- file.path(filtpathF, gsub("R1.fastq.gz","R1.filt.fastq.gz", fastqFs))
filtpathR <- file.path(filtpathR, gsub("R2.fastq.gz","R2.filt.fastq.gz", fastqRs))
fastqFs <- file.path(pathF, fastqFs)
fastqRs <- file.path(pathR, fastqRs)
qpF <- plotQualityProfile(fastqFs[1:5], aggregate = TRUE)
qpR <- plotQualityProfile(fastqRs[1:5], aggregate = TRUE)
# Filter and Trim reads (TruncLen and maxEE based on previous adjustments with reads.out mostly >= 70%)
ft <- filterAndTrim(fwd=fastqFs[1:5], filt=filtpathF[1:5],
rev=fastqRs[1:5], filt.rev=filtpathR[1:5],
truncLen=c(220,200), maxEE=c(8,8), maxN=0, rm.phix=TRUE,
compress=TRUE, verbose=TRUE, multithread=TRUE)
# Learn forward error rates
errF <- learnErrors(filtpathF, nread = 2e6, multithread = 64)
# Learn reverse error rates
errR <- learnErrors(filtpathR, nread = 2e6, multithread = 64)
ErrorPlotF <- plotErrors(errF, nominalQ = TRUE)
ErrorPlotR <- plotErrors(errR, nominalQ = TRUE)
#Dereplication
derepFs <- derepFastq(filtpathF, verbose=TRUE)
derepRs <- derepFastq(filtpathR, verbose=TRUE)
#Sample Inference
dadaFs <- dada(derepFs, err=errF, multithread = TRUE)
dadaRs <- dada(derepRs, err=errR, multithread = TRUE)
dadaFs[[1]]
#Merge paired reads
mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose = TRUE)
head(mergers[[1]])
#Construct sequence table
seqtab <- makeSequenceTable(mergers)
dim(seqtab)
table(nchar(getSequences(seqtab)))
#Remove chimeras
seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread = TRUE, verbose = TRUE)
dim(seqtab.nochim)
sum(seqtab.nochim)/sum(seqtab)
#Track reads throughout the pipeline
getN <- function(x) sum(getUniques(x))
track <- cbind(ft, sapply(dadaFs, getN), sapply(mergers, getN), rowSums(seqtab), rowSums(seqtab.nochim))
colnames(track) <- c("input", "filtered", "denoised", "merged", "tabled", "nonchim")
rownames(track) <- sample.names
head(track)
as.data.frame(track)
write.table(track, "/scratch/aprondzi/data/paired/DADA_All.txt", sep="\\t")
track_long <- as.data.frame(track) %>%
rownames_to_column(var = "sample") %>%
mutate(sample = gsub('/scratch/aprondzi/data/paired', '', sample)) %>%
gather(variable, value, -sample) %>%
tbl_df()
track_long$variable <- factor(track_long$variable, levels = colnames(track))
ggplot(track_long, aes(variable, value, fill = variable)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_brewer(palette="Paired") +
facet_wrap(~sample, scales = "free_y") +
xlab("DADA2 steps") +
ylab("Number of sequences")
saveRDS(seqtab, "/bioinf/home/aprondzi/")
"
以上是关于r osd2014_dada2_inference的主要内容,如果未能解决你的问题,请参考以下文章