http://www.htslib.org/workflow/#mapping_to_variant
分析流程:
step0—安装相关软件
step1—下载参考基因组数据和分析数据以及原始测序数据sra格式转换为fastq格式
sratoolkit :https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software
sratoolkit documentation:https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=toolkit_doc
如:
ls *.sra |while read id; do ~/biosoft/sratoolkit/sratoolkit.2.6.3-centos_linux64/bin/fastq-dump $id;done
rm *sra
step2—质控fastqc、去低值FASTX-Toolkit再质控
(1)
ls *.fastq | while read id ; do ~/biosoft/fastqc/FastQC/fastqc $id;done
Then we need to filter the bad quality reads according to the QC results.
##write a script by cat >filter.sh
(2)
ls *fastq |while read id
do
echo $id
## you need to adjust the parameter by yourself according to the QC results.
~/biosoft/fastx_toolkit_0.0.13/bin/fastq_quality_filter -v -q 20 -p 80 -Q33 -i $id -o tmp ;
~/biosoft/fastx_toolkit_0.0.13/bin/fastx_trimmer -v -f 1 -l 27 -i tmp -Q33 -z -o ${id%%.*}_clean.fq.gz ;
done
rm tmp
(3)
ls *_clean.fq.gz | while read id ; do ~/biosoft/fastqc/FastQC/fastqc $id;done
step3—alignment(bowtie2)
(1)build index
~/biosoft/bowtie/bowtie2-2.2.9/bowtie2-build ~/biosoft/bowtie/hg19_index /hg19.fa ~/biosoft/bowtie/hg19_index/hg19
(2)alignment
ls *.fastq | while read id ;
do
echo $id
~/biosoft/bowtie/bowtie2-2.2.9/bowtie2 -p 20 -x ~/biosoft/bowtie/hg19_index/hg19 -U $id -S ${id%%.*}.sam 2>${id%%.*}.align.log;
(3)## *.sam to *.bam
samtools view -bhS -q 30 ${id%%.*}.sam > ${id%%.*}.highQuality.bam
(http://www.htslib.org/doc/samtools.html)
## -F 1548 https://broadinstitute.github.io/picard/explain-flags.html
samtools sort ${id%%.*}.highQuality.bam ${id%%.*}.highQuality.sorted
samtools index ${id%%.*}.highQuality.sorted.bam
(4)unique mapping reads
grep -v "XS:i:" ${id%%.*}.sam |samtools view -bhS - >${id%%.*}.unique.bam
amtools sort ${id%%.*}.unique.bam ${id%%.*}.unique.sorted
samtools index ${id%%.*}.unique.sorted.bam
done
step4—peaks-calling-by-macs2