python 用python编写的bcftools的轻量级包装器(正在进行的工作)
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 用python编写的bcftools的轻量级包装器(正在进行的工作)相关的知识,希望对你有一定的参考价值。
import os, subprocess, uuid, re
import vcf.filters
class bcf(file):
def __init__(self, file):
# Start by storing basic information about the vcf/bcf
self.file = file
self.ops = []
self.header = subprocess.Popen("bcftools view -h %s" % self.file, shell=True, stdout=subprocess.PIPE).communicate()[0]
print self.header
# Samples
self.samples = filter(len,subprocess.Popen("bcftools query -l %s" % self.file, shell=True, stdout=subprocess.PIPE).communicate()[0].split("\n"))
# Meta Data
self.metadata = re.compile(r'''^##(?P<key>[^<#]+?)=(?P<val>[^<#]+$)''', re.M).findall(self.header)
# Contigs
self.contigs = [x.split(",") for x in re.compile(r'''^##contig=<(?P<data>.*)>''', re.M).findall(self.header)]
self.contigs = [{x.split("=")[0]:x.split("=")[1] for x in f} for f in self.contigs]
# Thanks to pyVCF for these patterns.
# Info
self.info_set = re.compile(r'''\#\#INFO=<
ID=(?P<id>[^,]+),
Number=(?P<number>-?\d+|\.|[AG]),
Type=(?P<type>Integer|Float|Flag|Character|String),
Description="(?P<desc>[^"]*)"
>''', re.VERBOSE).findall(self.header)
# Filter
self.filter_set = re.compile(r'''\#\#FILTER=<
ID=(?P<id>[^,]+),
Description="(?P<desc>[^"]*)"
>''', re.VERBOSE).findall(self.header)
self.format_pattern = re.compile(r'''\#\#FORMAT=<
ID=(?P<id>.+),
Number=(?P<number>-?\d+|\.|[AG]),
Type=(?P<type>.+),
Description="(?P<desc>.*)"
>''', re.VERBOSE).findall(self.header)
# Parse Header
def filename(self):
return self.file
def meta(self,term=None):
# Return all meta-data; term can be used to get a specific term
if term == None:
return self.metadata
else:
return [x[1] for x in self.metadata if x[0] == term][0]
def region(self,chrom,start,end):
print "bcftools filter -H -r %s:%s-%s %s" % (chrom, start, end, self.file)
self.ops += ["bcftools view -r %s:%s-%s %s" % (chrom, start, end, self.file)]
return self
def include(self,depth):
self.ops += ["bcftools filter --include 'DP<%s'" % (depth)]
return self
def out(self):
print self.ops
print ' | '.join(self.ops)
return (len,subprocess.Popen(' | '.join(self.ops + ["bcftools view -H"]), shell=True, stdout=subprocess.PIPE).communicate()[0].split("\n"))
x = bcf("vcf/mmp.vcf.gz")
print dir(x)
print x.meta(term="fileformat")
以上是关于python 用python编写的bcftools的轻量级包装器(正在进行的工作)的主要内容,如果未能解决你的问题,请参考以下文章
pysam - 多种格式基因组数据(sam/bam/vcf/bcf/cram/…)读写与处理模块(python)
bcftools 处理vcf文件,寻找多个vcf文件中突变的交集
bcftools常用命令总结
bcftools将vcf生成bgzip和index格式
用python编写一程序?
sh 平行的bcftools