python 杂合极化 - 极化基于先前在VCF文件中识别杂合调用的可能性的杂合调用。 Usefu

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 杂合极化 - 极化基于先前在VCF文件中识别杂合调用的可能性的杂合调用。 Usefu相关的知识,希望对你有一定的参考价值。

#!bin/usr/python
'''
Heterozygote Polarization Script
usage:
bcftools view -M 2 <filename> | python het_polarization.py  | bcftools view -O b > <filename.het.polarized.bcf>

Tags variants 'pushed' to ref or alt as follows:

AA - Pushed towards reference
AB - Kept as het
BB - Pushed towards alternative
'''
 
import sys
import math

def phred2p(phred):
    return 10**(phred/-10.0)
 
def GL2PL(gl):
    """ Converts Genotype likelyhoods to phred scaled (PL) genotype likelyhoods. """
    return -int(gl*10)

def main():
    format_added = False
    for l in sys.stdin.xreadlines():
        l = l.strip()
        if l.startswith("#CHROM"):
            # Get Sample information and count
            samples = l.strip().split("\t")[9:]
        elif l.startswith("##FORMAT=<ID=GL,"):
            l = """##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">"""
        elif l.startswith("#"):
            # Add Info line for het polarization flag
            if l.startswith("##FORMAT") and format_added == False:
                format_added = True
                l = l + "\n##FORMAT=<ID=HP,Number=1,Type=String,Description=\"Flag used to mark whether a variant was polarized\">"
            # Pass comment lines.
        else:
            if l.split('\t')[8].find("GL") > 0:
                # Replace GL with PL scores.
                l = l.split('\t')
                GL_loc = l[8].split(":").index("GL")
                l[8] = l[8].replace("GL","PL")
                geno_set = []
                for k,v in enumerate(l[9:]):
                    GT = v.split(":")
                    GL_set = GT[GL_loc].split(",")
                    try:
                        GT[GL_loc] = ','.join([str(GL2PL(float(i))) for i in GL_set])
                    except:
                        GT[GL_loc] = ",".join(GL_set)
                    geno_set.append(':'.join(GT))
                    l = l[0:9] + geno_set
                l = '\t'.join(l)
            l = l.strip().split("\t")
            if l[8].find("PL") > -1:
                PL = l[8].split(":").index("PL")
                add_HP_flag = 0
                for k,v in enumerate(l[9:]):
                    PL_set = v.split(":")[PL].split(",")
                    v = v.split(":")
                    if len(PL_set) == 3:
                        PL_set = [phred2p(int(i)) for i in PL_set]
                        log_score = -math.log10(PL_set[0]/PL_set[2])
                        if add_HP_flag == 0:
                            if l[8].find("HP") == -1:
                                l[8] = l[8] + ":HP" 
                            add_HP_flag = 1
                        if (log_score < -2):
                            v[0] = "0/0"
                            if v[-1] not in ["AA","AB","BB"]:
                                l[k+9] = v + ["AA"]
                            else:
                                l[k+9] = v[0:-1] + ["AA"]
                        elif (log_score > 2):
                            v[0] = "1/1"
                            if v[-1] not in ["AA","AB","BB"]:
                                l[k+9] = v + ["BB"]
                            else:
                                l[k+9] = v[0:-1] + ["BB"]
                        else:
                            v[0] = "0/1"
                            if v[-1] not in ["AA","AB","BB"]:
                                l[k+9] = v + ["AB"]
                            else:
                                l[k+9] = v[0:-1] + ["AB"]
                    else:
                        l[k+9] = v + [":."]
                    l[k+9] = ":".join(l[k+9])
            l = "\t".join(l)
        sys.stdout.write(l + "\n")



if __name__ == '__main__':
    main()

以上是关于python 杂合极化 - 极化基于先前在VCF文件中识别杂合调用的可能性的杂合调用。 Usefu的主要内容,如果未能解决你的问题,请参考以下文章

极化码之高斯近似

基于梯度方向极化变换和聚类算法的图像主特征直线检测

HFSS怎么从结果看天线是左旋圆极化还是右旋圆极化

极化码小结

极化码之tal-vardy算法

卫星接收机种只有左旋和右旋。。怎么接收水平和垂直极化方式的信号?