awk 打印 4 列和 $8 的子字符串
Posted
技术标签:
【中文标题】awk 打印 4 列和 $8 的子字符串【英文标题】:awk print 4 columns and a substring of $8 【发布时间】:2020-03-17 09:35:09 【问题描述】:我在VCF format 中有一个文件,第 8 列中有几个信息:
# ... rest of file ...
1 11850891 rs753917964 C T 22276.39 PASS non_cancer_nhomalt_nfe_seu=0;AC_eas=0;AN_eas=18390;AF_eas=0.00000e+00;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=1;non_neuro_AN_nfe_female=39830;non_neuro_AF_nfe_female=2.51067e-05;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=0;non_neuro_AN_afr=16214;non_neuro_AF_afr=0.00000e+00;non_neuro_nhomalt_afr=0;controls_AC_raw=2;controls_AN_raw=109408;controls_AF_raw=1.82802e-05;controls_nhomalt_raw=0;non_cancer_AC_eas=0;non_cancer_AN_eas=17690;non_cancer_AF_eas=0.00000e+00;non_cancer_nhomalt_eas=0;non_cancer_AC_amr_female=0;non_cancer_AN_amr_female=20086;non_cancer_AF_amr_female=0.00000e+00;non_cancer_nhomalt_amr_female=0;non_neuro_AC_nfe_swe=0;non_neuro_AN_nfe_swe=14780;non_neuro_AF_nfe_swe=0.00000e+00;non_neuro_nhomalt_nfe_swe=0;controls_AC_male=2;controls_AN_male=58114;controls_AF_male=3.44151e-05;controls_nhomalt_male=0;non_topmed_AC_male=5;non_topmed_AN_male=133538;non_topmed_AF_male=3.74425e-05;non_topmed_nhomalt_male=0;controls_AC_eas_jpn=0;controls_AN_eas_jpn=114;controls_AF_eas_jpn=0.00000e+00;controls_nhomalt_eas_jpn=0;controls_AC_nfe_female=0;controls_AN_nfe_female=19148;controls_AF_nfe_female=0.00000e+00;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=30522;non_neuro_AF_amr=0.00000e+00;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=6838;non_neuro_AF_eas_female=0.00000e+00;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=5180;AF_asj_male=0.00000e+00;nhomalt_asj_male=0;controls_AC_nfe_male=1;controls_AN_nfe_male=23620;controls_AF_nfe_male=4.23370e-05;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=16732;non_neuro_AF_fin=0.00000e+00;non_neuro_nhomalt_fin=0;non_topmed_AC_sas=2;non_topmed_AN_sas=30616;non_topmed_AF_sas=6.53253e-05;non_topmed_nhomalt_sas=0;non_cancer_AC_nfe_female=1;non_cancer_AN_nfe_female=44296;non_cancer_AF_nfe_female=2.25754e-05;non_cancer_nhomalt_nfe_female=0;AC_oth_female=0;AN_oth_female=2928;AF_oth_female=0.00000e+00;nhomalt_oth_female=0;non_cancer_AC_asj=0;non_cancer_AN_asj=9572;non_cancer_AF_asj=0.00000e+00;non_cancer_nhomalt_asj=0;AC_nfe_swe=0;AN_nfe_swe=26134;AF_nfe_swe=0.00000e+00;nhomalt_nfe_swe=0;controls_AC_nfe=1;controls_AN_nfe=42768;controls_AF_nfe=2.33820e-05;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=986;controls_AF_oth_female=0.00000e+00;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=2320;controls_AF_asj=0.00000e+00;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=12256;non_neuro_AF_amr_male=0.00000e+00;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=0;controls_AN_nfe_nwe=14452;controls_AF_nfe_nwe=0.00000e+00;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=0;AN_nfe_nwe=42210;AF_nfe_nwe=0.00000e+00;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=4760;controls_AF_nfe_seu=0.00000e+00;controls_nhomalt_nfe_seu=0;controls_AC_sas_female=0;controls_AN_sas_female=4214;controls_AF_sas_female=0.00000e+00;controls_nhomalt_sas_female=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=18266;non_neuro_AF_amr_female=0.00000e+00;non_neuro_nhomalt_amr_female=0;non_cancer_AC_eas_jpn=0;non_cancer_AN_eas_jpn=124;non_cancer_AF_eas_jpn=0.00000e+00;non_cancer_nhomalt_eas_jpn=0;non_neuro_AC_nfe_onf=2;non_neuro_AN_nfe_onf=27808;non_neuro_AF_nfe_onf=7.19217e-05;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=0;non_topmed_AN_eas_male=9062;non_topmed_AF_eas_male=0.00000e+00;non_topmed_nhomalt_eas_male=0;AC_eas_jpn=0;AN_eas_jpn=152;AF_eas_jpn=0.00000e+00;nhomalt_eas_jpn=0;non_cancer_AC_afr_male=0;non_cancer_AN_afr_male=5672;non_cancer_AF_afr_male=0.00000e+00;non_cancer_nhomalt_afr_male=0;non_cancer_AC_afr=0;non_cancer_AN_afr=14902;non_cancer_AF_afr=0.00000e+00;non_cancer_nhomalt_afr=0;controls_AC_amr_female=0;controls_AN_amr_female=10226;controls_AF_amr_female=0.00000e+00;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=9082;non_neuro_AF_fin_male=0.00000e+00;non_neuro_nhomalt_fin_male=0;AC_female=1;AN_female=115558;AF_female=8.65366e-06;nhomalt_female=0;non_neuro_AC_nfe_bgr=0;non_neuro_AN_nfe_bgr=452;non_neuro_AF_nfe_bgr=0.00000e+00;non_neuro_nhomalt_nfe_bgr=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=2476;non_neuro_AF_oth_male=0.00000e+00;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=238;non_topmed_AF_nfe_est=0.00000e+00;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=0;non_topmed_AN_nfe_nwe=41090;non_topmed_AF_nfe_nwe=0.00000e+00;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=14262;non_topmed_AF_amr_male=0.00000e+00;non_topmed_nhomalt_amr_male=0;non_cancer_AC_amr=0;non_cancer_AN_amr=34258;non_cancer_AF_amr=0.00000e+00;non_cancer_nhomalt_amr=0;non_topmed_AC_nfe_swe=0;non_topmed_AN_nfe_swe=26072;non_topmed_AF_nfe_swe=0.00000e+00;non_topmed_nhomalt_nfe_swe=0;non_topmed_AC_nfe_onf=2;non_topmed_AN_nfe_onf=30190;non_topmed_AF_nfe_onf=6.62471e-05;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_kor=0;controls_AN_eas_kor=1888;controls_AF_eas_kor=0.00000e+00;controls_nhomalt_eas_kor=0;non_topmed_AC_eas_oea=0;non_topmed_AN_eas_oea=14416;non_topmed_AF_eas_oea=0.00000e+00;non_topmed_nhomalt_eas_oea=0;controls_AC_eas_male=0;controls_AN_eas_male=4258;controls_AF_eas_male=0.00000e+00;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=928;controls_AF_oth_male=0.00000e+00;controls_nhomalt_oth_male=0;non_topmed_AC=6;non_topmed_AN=244846;non_topmed_AF=2.45052e-05;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=13392;controls_AF_fin=0.00000e+00;controls_nhomalt_fin=0;AC_eas_kor=0;AN_eas_kor=3818;AF_eas_kor=0.00000e+00;nhomalt_eas_kor=0;non_neuro_AC_nfe=4;non_neuro_AN_nfe=89556;non_neuro_AF_nfe=4.46648e-05;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=7650;non_neuro_AF_fin_female=0.00000e+00;non_neuro_nhomalt_fin_female=0;non_cancer_AC_nfe_male=3;non_cancer_AN_nfe_male=58440;non_cancer_AF_nfe_male=5.13347e-05;non_cancer_nhomalt_nfe_male=0;controls_AC_eas_oea=0;controls_AN_eas_oea=7044;controls_AF_eas_oea=0.00000e+00;controls_nhomalt_eas_oea=0;non_topmed_AC_nfe_seu=2;non_topmed_AN_nfe_seu=11408;non_topmed_AF_nfe_seu=1.75316e-04;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=4788;controls_AF_eas_female=0.00000e+00;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=9998;non_topmed_AF_asj=0.00000e+00;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=1;controls_AN_nfe_onf=9998;controls_AF_nfe_onf=1.00020e-04;controls_nhomalt_nfe_onf=0;non_neuro_AC=6;non_neuro_AN=208122;non_neuro_AF=2.88292e-05;non_neuro_nhomalt=0;AC_eas_oea=0;AN_eas_oea=14420;AF_eas_oea=0.00000e+00;nhomalt_eas_oea=0;non_topmed_AC_nfe=4;non_topmed_AN_nfe=111660;non_topmed_AF_nfe=3.58230e-05;non_topmed_nhomalt_nfe=0;non_cancer_AC_oth=0;non_cancer_AN_oth=5620;non_cancer_AF_oth=0.00000e+00;non_cancer_nhomalt_oth=0;non_topmed_AC_raw=6;non_topmed_AN_raw=244878;non_topmed_AF_raw=2.45020e-05;non_topmed_nhomalt_raw=0;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=216;non_neuro_AF_nfe_est=0.00000e+00;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=3186;non_topmed_AF_oth_male=0.00000e+00;non_topmed_nhomalt_oth_male=0;non_cancer_AC_oth_male=0;non_cancer_AN_oth_male=2938;non_cancer_AF_oth_male=0.00000e+00;non_cancer_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=242;AF_nfe_est=0.00000e+00;nhomalt_nfe_est=0;non_cancer_AC_afr_female=0;non_cancer_AN_afr_female=9230;non_cancer_AF_afr_female=0.00000e+00;non_cancer_nhomalt_afr_female=0;non_topmed_AC_afr_male=0;non_topmed_AN_afr_male=4686;non_topmed_AF_afr_male=0.00000e+00;non_topmed_nhomalt_afr_male=0;AC_eas_male=0;AN_eas_male=9066;AF_eas_male=0.00000e+00;nhomalt_eas_male=0;controls_AC_eas=0;controls_AN_eas=9046;controls_AF_eas=0.00000e+00;controls_nhomalt_eas=0;non_neuro_AC_eas_male=0;non_neuro_AN_eas_male=6574;non_neuro_AF_eas_male=0.00000e+00;non_neuro_nhomalt_eas_male=0;non_cancer_AC_nfe_nwe=0;non_cancer_AN_nfe_nwe=39490;non_cancer_AF_nfe_nwe=0.00000e+00;non_cancer_nhomalt_nfe_nwe=0;controls_AC_sas=1;controls_AN_sas=15690;controls_AF_sas=6.37349e-05;controls_nhomalt_sas=0;non_neuro_AC_sas_male=2;non_neuro_AN_sas_male=23066;non_neuro_AF_sas_male=8.67077e-05;non_neuro_nhomalt_sas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=3126;non_neuro_AF_asj_male=0.00000e+00;non_neuro_nhomalt_asj_male=0;non_cancer_AC_nfe_bgr=0;non_cancer_AN_nfe_bgr=2526;non_cancer_AF_nfe_bgr=0.00000e+00;non_cancer_nhomalt_nfe_bgr=0;controls_AC_oth=0;controls_AN_oth=1914;controls_AF_oth=0.00000e+00;controls_nhomalt_oth=0;non_cancer_AC_eas_female=0;non_cancer_AN_eas_female=8946;non_cancer_AF_eas_female=0.00000e+00;non_cancer_nhomalt_eas_female=0;AC_nfe=4;AN_nfe=113750;AF_nfe=3.51648e-05;nhomalt_nfe=0;non_topmed_AC_female=1;non_topmed_AN_female=111308;non_topmed_AF_female=8.98408e-06;non_topmed_nhomalt_female=0;non_neuro_AC_asj=0;non_neuro_AN_asj=6212;non_neuro_AF_asj=0.00000e+00;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=9324;non_topmed_AF_eas_female=0.00000e+00;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=6;non_neuro_AN_raw=208136;non_neuro_AF_raw=2.88273e-05;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=0;non_topmed_AN_eas=18386;non_topmed_AF_eas=0.00000e+00;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=11272;non_topmed_AF_fin_male=0.00000e+00;non_topmed_nhomalt_fin_male=0;non_cancer_AC_asj_male=0;non_cancer_AN_asj_male=4976;non_cancer_AF_asj_male=0.00000e+00;non_cancer_nhomalt_asj_male=0;AC_fin=0;AN_fin=21646;AF_fin=0.00000e+00;nhomalt_fin=0;AC_nfe_male=3;AN_nfe_male=63598;AF_nfe_male=4.71713e-05;nhomalt_nfe_male=0;non_topmed_AC_eas_kor=0;non_topmed_AN_eas_kor=3818;non_topmed_AF_eas_kor=0.00000e+00;non_topmed_nhomalt_eas_kor=0;controls_AC_amr_male=0;controls_AN_amr_male=6884;controls_AF_amr_male=0.00000e+00;controls_nhomalt_amr_male=0;non_neuro_AC_eas_oea=0;non_neuro_AN_eas_oea=9446;non_neuro_AF_eas_oea=0.00000e+00;non_neuro_nhomalt_eas_oea=0;AC_sas_female=0;AN_sas_female=7544;AF_sas_female=0.00000e+00;nhomalt_sas_female=0;controls_AC_afr_female=0;controls_AN_afr_female=4240;controls_AF_afr_female=0.00000e+00;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=17110;controls_AF_amr=0.00000e+00;controls_nhomalt_amr=0;non_topmed_AC_eas_jpn=0;non_topmed_AN_eas_jpn=152;non_topmed_AF_eas_jpn=0.00000e+00;non_topmed_nhomalt_eas_jpn=0;AC_asj_female=0;AN_asj_female=4900;AF_asj_female=0.00000e+00;nhomalt_asj_female=0;non_topmed_AC_nfe_bgr=0;non_topmed_AN_nfe_bgr=2662;non_topmed_AF_nfe_bgr=0.00000e+00;non_topmed_nhomalt_nfe_bgr=0;non_cancer_AC_nfe_est=0;non_cancer_AN_nfe_est=158;non_cancer_AF_nfe_est=0.00000e+00;non_cancer_nhomalt_nfe_est=0;non_neuro_AC_eas=0;non_neuro_AN_eas=13412;non_neuro_AF_eas=0.00000e+00;non_neuro_nhomalt_eas=0;non_cancer_AC_nfe=4;non_cancer_AN_nfe=102736;non_cancer_AF_nfe=3.89347e-05;non_cancer_nhomalt_nfe=0;non_neuro_AC_male=5;non_neuro_AN_male=112470;non_neuro_AF_male=4.44563e-05;non_neuro_nhomalt_male=0;non_neuro_AC_sas_female=0;non_neuro_AN_sas_female=7542;non_neuro_AF_sas_female=0.00000e+00;non_neuro_nhomalt_sas_female=0;AC_asj=0;AN_asj=10080;AF_asj=0.00000e+00;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=70;controls_AF_nfe_est=0.00000e+00;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=4854;non_topmed_AF_asj_female=0.00000e+00;non_topmed_nhomalt_asj_female=0;non_cancer_AC_nfe_swe=0;non_cancer_AN_nfe_swe=25290;non_cancer_AF_nfe_swe=0.00000e+00;non_cancer_nhomalt_nfe_swe=0;non_cancer_AC=6;non_cancer_AN=236934;non_cancer_AF=2.53235e-05;non_cancer_nhomalt=0;non_topmed_AC_oth=0;non_topmed_AN_oth=6064;non_topmed_AF_oth=0.00000e+00;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=10372;non_topmed_AF_fin_female=0.00000e+00;non_topmed_nhomalt_fin_female=0;non_cancer_AC_fin_female=0;non_cancer_AN_fin_female=10362;non_cancer_AF_fin_female=0.00000e+00;non_cancer_nhomalt_fin_female=0;AC_oth=0;AN_oth=6140;AF_oth=0.00000e+00;nhomalt_oth=0;non_neuro_AC_nfe_male=3;non_neuro_AN_nfe_male=49726;non_neuro_AF_nfe_male=6.03306e-05;non_neuro_nhomalt_nfe_male=0;controls_AC_female=0;controls_AN_female=51290;controls_AF_female=0.00000e+00;controls_nhomalt_female=0;non_cancer_AC_fin=0;non_cancer_AN_fin=21630;non_cancer_AF_fin=0.00000e+00;non_cancer_nhomalt_fin=0;non_topmed_AC_fin=0;non_topmed_AN_fin=21644;non_topmed_AF_fin=0.00000e+00;non_topmed_nhomalt_fin=0;non_cancer_AC_eas_oea=0;non_cancer_AN_eas_oea=13792;non_cancer_AF_eas_oea=0.00000e+00;non_cancer_nhomalt_eas_oea=0;non_topmed_AC_nfe_female=1;non_topmed_AN_nfe_female=48806;non_topmed_AF_nfe_female=2.04893e-05;non_topmed_nhomalt_nfe_female=0;non_cancer_AC_sas_male=2;non_cancer_AN_sas_male=23032;non_cancer_AF_sas_male=8.68357e-05;non_cancer_nhomalt_sas_male=0;controls_AC_asj_male=0;controls_AN_asj_male=1106;controls_AF_asj_male=0.00000e+00;controls_nhomalt_asj_male=0;non_cancer_AC_raw=6;non_cancer_AN_raw=236958;non_cancer_AF_raw=2.53209e-05;non_cancer_nhomalt_raw=0;non_cancer_AC_eas_male=0;non_cancer_AN_eas_male=8744;non_cancer_AF_eas_male=0.00000e+00;non_cancer_nhomalt_eas_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=5144;non_topmed_AF_asj_male=0.00000e+00;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=0;non_neuro_AN_oth=4866;non_neuro_AF_oth=0.00000e+00;non_neuro_nhomalt_oth=0;AC_male=5;AN_male=135906;AF_male=3.67901e-05;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=6474;controls_AF_fin_female=0.00000e+00;controls_nhomalt_fin_female=0;controls_AC_nfe_bgr=0;controls_AN_nfe_bgr=678;controls_AF_nfe_bgr=0.00000e+00;controls_nhomalt_nfe_bgr=0;controls_AC_asj_female=0;controls_AN_asj_female=1214;controls_AF_asj_female=0.00000e+00;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=14320;AF_amr_male=0.00000e+00;nhomalt_amr_male=0;AC_amr_female=0;AN_amr_female=20270;AF_amr_female=0.00000e+00;nhomalt_amr_female=0;non_topmed_AC_sas_male=2;non_topmed_AN_sas_male=23072;non_topmed_AF_sas_male=8.66852e-05;non_topmed_nhomalt_sas_male=0;AC_oth_male=0;AN_oth_male=3212;AF_oth_male=0.00000e+00;nhomalt_oth_male=0;non_cancer_AC_sas=2;non_cancer_AN_sas=30526;non_cancer_AF_sas=6.55179e-05;non_cancer_nhomalt_sas=0;non_neuro_AC_nfe_seu=2;non_neuro_AN_nfe_seu=10942;non_neuro_AF_nfe_seu=1.82782e-04;non_neuro_nhomalt_nfe_seu=0;non_cancer_AC_eas_kor=0;non_cancer_AN_eas_kor=3774;controls_AF_sas_male=8.71384e-05;controls_nhomalt_sas_male=0;non_topmed_AC_sas_female=0;non_topmed_AN_sas_female=7544;non_topmed_AF_sas_female=0.00000e+00;non_topmed_nhomalt_sas_female=0;non_topmed_AC_afr=0;non_topmed_AN_afr=12022;non_topmed_AF_afr=0.00000e+00;non_topmed_nhomalt_afr=0;controls_AC=2;controls_AN=109404;controls_AF=1.82809e-05;AN_popmax=30616;AF_popmax=6.53253e-05;nhomalt_popmax=0;non_cancer_AF_eas_kor=0.00000e+00;non_cancer_nhomalt_eas_kor=0;non_topmed_AC_afr_female=0;non_topmed_AN_afr_female=7336;non_topmed_AF_afr_female=0.00000e+00;non_topmed_nhomalt_afr_female=0;controls_AC_sas_male=1;controls_AN_sas_male=11476
1 11850892 rs373747884 G A 34745.78 PASS non_topmed_nhomalt_male=0;controls_AC_eas_jpn=0;controls_AN_eas_jpn=114;controls_AF_eas_jpn=0.00000e+00;controls_nhomalt_eas_jpn=0;controls_AC_nfe_female=2;controls_AN_nfe_female=19148;controls_AF_nfe_female=1.04450e-04;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=1;non_neuro_AN_amr=30522;non_neuro_AF_amr=3.27633e-05;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=6840;non_neuro_AF_eas_female=0.00000e+00;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=5180;AF_asj_male=0.00000e+00;nhomalt_asj_male=0;controls_AC_nfe_male=0;controls_AN_nfe_male=23620;controls_AF_nfe_male=0.00000e+00;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=16734;non_neuro_AF_fin=0.00000e+00;non_neuro_nhomalt_fin=0;non_topmed_AC_sas=2;non_topmed_AN_sas=30614;non_topmed_AF_sas=6.53296e-05;non_topmed_nhomalt_sas=0;non_cancer_AC_nfe_female=4;non_cancer_AN_nfe_female=44298;non_cancer_AF_nfe_female=9.02975e-05;non_cancer_nhomalt_nfe_female=0;AC_oth_female=0;AN_oth_female=2928;AF_oth_female=0.00000e+00;nhomalt_oth_female=0;non_cancer_AC_asj=0;non_cancer_AN_asj=9572;non_cancer_AF_asj=0.00000e+00;non_cancer_nhomalt_asj=0;AC_nfe_swe=0;AN_nfe_swe=26134;AF_nfe_swe=0.00000e+00;nhomalt_nfe_swe=0;controls_AC_nfe=2;controls_AN_nfe=42768;controls_AF_nfe=4.67639e-05;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=986;controls_AF_oth_female=0.00000e+00;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=2320;controls_AF_asj=0.00000e+00;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=12256;non_neuro_AF_amr_male=0.00000e+00;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=2;controls_AN_nfe_nwe=14452;controls_AF_nfe_nwe=1.38389e-04;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=4;AN_nfe_nwe=42212;AF_nfe_nwe=9.47598e-05;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=4760;controls_AF_nfe_seu=0.00000e+00;controls_nhomalt_nfe_seu=0;controls_AC_sas_female=1;controls_AN_sas_female=4212;controls_AF_sas_female=2.37417e-04;controls_nhomalt_sas_female=0;non_neuro_AC_amr_female=1;non_neuro_AN_amr_female=18266;non_neuro_AF_amr_female=5.47465e-05;non_neuro_nhomalt_amr_female=0;non_cancer_AC_eas_jpn=0;non_cancer_AN_eas_jpn=124;non_cancer_AF_eas_jpn=0.00000e+00;non_cancer_nhomalt_eas_jpn=0;non_neuro_AC_nfe_onf=1;non_neuro_AN_nfe_onf=27808;non_neuro_AF_nfe_onf=3.59609e-05;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=2;non_topmed_AN_eas_male=9062;non_topmed_AF_eas_male=2.20702e-04;non_topmed_nhomalt_eas_male=0;AC_eas_jpn=0;AN_eas_jpn=152;AF_eas_jpn=0.00000e+00;nhomalt_eas_jpn=0;non_cancer_AC_afr_male=0;non_cancer_AN_afr_male=5672;non_cancer_AF_afr_male=0.00000e+00;non_cancer_nhomalt_afr_male=0;non_cancer_AC_afr=1;non_cancer_AN_afr=14902;non_cancer_AF_afr=6.71051e-05;non_cancer_nhomalt_afr=0;controls_AC_amr_female=0;controls_AN_amr_female=10226;controls_AF_amr_female=0.00000e+00;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=9084;non_neuro_AF_fin_male=0.00000e+00;non_neuro_nhomalt_fin_male=0;AC_female=7;AN_female=115558;AF_female=6.05756e-05;nhomalt_female=0;non_neuro_AC_nfe_bgr=0;non_neuro_AN_nfe_bgr=452;non_neuro_AF_nfe_bgr=0.00000e+00;non_neuro_nhomalt_nfe_bgr=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=2476;non_neuro_AF_oth_male=0.00000e+00;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=238;non_topmed_AF_nfe_est=0.00000e+00;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=4;non_topmed_AN_nfe_nwe=41092;non_topmed_AF_nfe_nwe=9.73425e-05;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=14262;non_topmed_AF_amr_male=0.00000e+00;non_topmed_nhomalt_amr_male=0;non_cancer_AC_amr=1;non_cancer_AN_amr=34258;non_cancer_AF_amr=2.91903e-05;non_cancer_nhomalt_amr=0;non_topmed_AC_nfe_swe=0;non_topmed_AN_nfe_swe=26072;non_topmed_AF_nfe_swe=0.00000e+00;non_topmed_nhomalt_nfe_swe=0;non_topmed_AC_nfe_onf=1;non_topmed_AN_nfe_onf=30190;non_topmed_AF_nfe_onf=3.31236e-05;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_kor=0;controls_AN_eas_kor=1888;controls_AF_eas_kor=0.00000e+00;controls_nhomalt_eas_kor=0;non_topmed_AC_eas_oea=2;non_topmed_AN_eas_oea=14418;non_topmed_AF_eas_oea=1.38715e-04;non_topmed_nhomalt_eas_oea=0;controls_AC_eas_male=2;controls_AN_eas_male=4258;controls_AF_eas_male=4.69704e-04;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=928;controls_AF_oth_male=0.00000e+00;controls_nhomalt_oth_male=0;non_topmed_AC=11;non_topmed_AN=244848;non_topmed_AF=4.49258e-05;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=13394;controls_AF_fin=0.00000e+00;controls_nhomalt_fin=0;AC_eas_kor=0;AN_eas_kor=3818;AF_eas_kor=0.00000e+00;nhomalt_eas_kor=0;non_neuro_AC_nfe=4;non_neuro_AN_nfe=89554;non_neuro_AF_nfe=4.46658e-05;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=7650;non_neuro_AF_fin_female=0.00000e+00;non_neuro_nhomalt_fin_female=0;non_cancer_AC_nfe_male=1;non_cancer_AN_nfe_male=58442;non_cancer_AF_nfe_male=1.71110e-05;non_cancer_nhomalt_nfe_male=0;controls_AC_eas_oea=2;controls_AN_eas_oea=7044;controls_AF_eas_oea=2.83930e-04;controls_nhomalt_eas_oea=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=11408;non_topmed_AF_nfe_seu=0.00000e+00;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=4788;controls_AF_eas_female=0.00000e+00;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=9998;non_topmed_AF_asj=0.00000e+00;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=9998;controls_AF_nfe_onf=0.00000e+00;controls_nhomalt_nfe_onf=0;non_neuro_AC=9;non_neuro_AN=208120;non_neuro_AF=4.32443e-05;non_neuro_nhomalt=0;AC_eas_oea=2;AN_eas_oea=14422;AF_eas_oea=1.38677e-04;nhomalt_eas_oea=0;non_topmed_AC_nfe=5;non_topmed_AN_nfe=111662;non_topmed_AF_nfe=4.47780e-05;non_topmed_nhomalt_nfe=0;non_cancer_AC_oth=0;non_cancer_AN_oth=5620;non_cancer_AF_oth=0.00000e+00;non_cancer_nhomalt_oth=0;non_topmed_AC_raw=11;non_topmed_AN_raw=244878;non_topmed_AF_raw=4.49203e-05;non_topmed_nhomalt_raw=0;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=216;non_neuro_AF_nfe_est=0.00000e+00;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=3186;non_topmed_AF_oth_male=0.00000e+00;non_topmed_nhomalt_oth_male=0;non_cancer_AC_oth_male=0;non_cancer_AN_oth_male=2938;non_cancer_AF_oth_male=0.00000e+00;non_cancer_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=242;AF_nfe_est=0.00000e+00;nhomalt_nfe_est=0;non_cancer_AC_afr_female=1;non_cancer_AN_afr_female=9230;non_cancer_AF_afr_female=1.08342e-04;non_cancer_nhomalt_afr_female=0;non_topmed_AC_afr_male=0;non_topmed_AN_afr_male=4686;non_topmed_AF_afr_male=0.00000e+00;non_topmed_nhomalt_afr_male=0;AC_eas_male=2;AN_eas_male=9066;AF_eas_male=2.20604e-04;nhomalt_eas_male=0;controls_AC_eas=2;controls_AN_eas=9046;controls_AF_eas=2.21092e-04;controls_nhomalt_eas=0;non_neuro_AC_eas_male=1;non_neuro_AN_eas_male=6574;non_neuro_AF_eas_male=1.52114e-04;non_neuro_nhomalt_eas_male=0;non_cancer_AC_nfe_nwe=4;non_cancer_AN_nfe_nwe=39494;non_cancer_AF_nfe_nwe=1.01281e-04;non_cancer_nhomalt_nfe_nwe=0;controls_AC_sas=1;controls_AN_sas=15688;controls_AF_sas=6.37430e-05;controls_nhomalt_sas=0;non_neuro_AC_sas_male=1;non_neuro_AN_sas_male=23066;non_neuro_AF_sas_male=4.33539e-05;non_neuro_nhomalt_sas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=3126;non_neuro_AF_asj_male=0.00000e+00;non_neuro_nhomalt_asj_male=0;non_cancer_AC_nfe_bgr=0;non_cancer_AN_nfe_bgr=2526;non_cancer_AF_nfe_bgr=0.00000e+00;non_cancer_nhomalt_nfe_bgr=0;controls_AC_oth=0;controls_AN_oth=1914;controls_AF_oth=0.00000e+00;controls_nhomalt_oth=0;non_cancer_AC_eas_female=0;non_cancer_AN_eas_female=8948;non_cancer_AF_eas_female=0.00000e+00;non_cancer_nhomalt_eas_female=0;AC_nfe=5;AN_nfe=113752;AF_nfe=4.39553e-05;nhomalt_nfe=0;non_topmed_AC_female=7;non_topmed_AN_female=111308;non_topmed_AF_female=6.28886e-05;non_topmed_nhomalt_female=0;non_neuro_AC_asj=0;non_neuro_AN_asj=6212;non_neuro_AF_asj=0.00000e+00;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=9326;non_topmed_AF_eas_female=0.00000e+00;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=9;non_neuro_AN_raw=208136;non_neuro_AF_raw=4.32410e-05;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=2;non_topmed_AN_eas=18388;non_topmed_AF_eas=1.08767e-04;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=11274;non_topmed_AF_fin_male=0.00000e+00;non_topmed_nhomalt_fin_male=0;non_cancer_AC_asj_male=0;non_cancer_AN_asj_male=4976;non_cancer_AF_asj_male=0.00000e+00;non_cancer_nhomalt_asj_male=0;AC_fin=0;AN_fin=21648;AF_fin=0.00000e+00;nhomalt_fin=0;AC_nfe_male=1;AN_nfe_male=63598;AF_nfe_male=1.57238e-05;nhomalt_nfe_male=0;non_topmed_AC_eas_kor=0;non_topmed_AN_eas_kor=3818;non_topmed_AF_eas_kor=0.00000e+00;non_topmed_nhomalt_eas_kor=0;controls_AC_amr_male=0;controls_AN_amr_male=6884;controls_AF_amr_male=0.00000e+00;controls_nhomalt_amr_male=0;non_neuro_AC_eas_oea=1;non_neuro_AN_eas_oea=9448;non_neuro_AF_eas_oea=1.05843e-04;non_neuro_nhomalt_eas_oea=0;AC_sas_female=1;AN_sas_female=7542;AF_sas_female=1.32591e-04;nhomalt_sas_female=0;controls_AC_afr_female=1;controls_AN_afr_female=4240;controls_AF_afr_female=2.35849e-04;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=17110;controls_AF_amr=0.00000e+00;controls_nhomalt_amr=0;non_topmed_AC_eas_jpn=0;non_topmed_AN_eas_jpn=152;non_topmed_AF_eas_jpn=0.00000e+00;non_topmed_nhomalt_eas_jpn=0;AC_asj_female=0;AN_asj_female=4900;AF_asj_female=0.00000e+00;nhomalt_asj_female=0;non_topmed_AC_nfe_bgr=0;non_topmed_AN_nfe_bgr=2662;non_topmed_AF_nfe_bgr=0.00000e+00;non_topmed_nhomalt_nfe_bgr=0;non_cancer_AC_nfe_est=0;non_cancer_AN_nfe_est=158;non_cancer_AF_nfe_est=0.00000e+00;non_cancer_nhomalt_nfe_est=0;non_neuro_AC_eas=1;non_neuro_AN_eas=13414;non_neuro_AF_eas=7.45490e-05;non_neuro_nhomalt_eas=0;non_cancer_AC_nfe=5;non_cancer_AN_nfe=102740;non_cancer_AF_nfe=4.86665e-05;non_cancer_nhomalt_nfe=0;non_neuro_AC_male=3;non_neuro_AN_male=112470;non_neuro_AF_male=2.66738e-05;non_neuro_nhomalt_male=0;non_neuro_AC_sas_female=1;non_neuro_AN_sas_female=7540;non_neuro_AF_sas_female=1.32626e-04;non_neuro_nhomalt_sas_female=0;AC_asj=0;AN_asj=10080;AF_asj=0.00000e+00;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=70;controls_AF_nfe_est=0.00000e+00;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=4854;non_topmed_AF_asj_female=0.00000e+00;non_topmed_nhomalt_asj_female=0;non_cancer_AC_nfe_swe=0;non_cancer_AN_nfe_swe=25290;non_cancer_AF_nfe_swe=0.00000e+00;non_cancer_nhomalt_nfe_swe=0;non_cancer_AC=11;non_cancer_AN=236940;non_cancer_AF=4.64253e-05;non_cancer_nhomalt=0;non_topmed_AC_oth=0;non_topmed_AN_oth=6064;non_topmed_AF_oth=0.00000e+00;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=10372;non_topmed_AF_fin_female=0.00000e+00;non_topmed_nhomalt_fin_female=0;non_cancer_AC_fin_female=0;non_cancer_AN_fin_female=10362;non_cancer_AF_fin_female=0.00000e+00;non_cancer_nhomalt_fin_female=0;AC_oth=0;AN_oth=6140;AF_oth=0.00000e+00;nhomalt_oth=0;non_neuro_AC_nfe_male=1;non_neuro_AN_nfe_male=49724;non_neuro_AF_nfe_male=2.01110e-05;non_neuro_nhomalt_nfe_male=0;controls_AC_female=4;controls_AN_female=51288;controls_AF_female=7.79910e-05;controls_nhomalt_female=0;non_cancer_AC_fin=0;non_cancer_AN_fin=21632;non_cancer_AF_fin=0.00000e+00;non_cancer_nhomalt_fin=0;non_topmed_AC_fin=0;non_topmed_AN_fin=21646;non_topmed_AF_fin=0.00000e+00;non_topmed_nhomalt_fin=0;non_cancer_AC_eas_oea=2;non_cancer_AN_eas_oea=13794;non_cancer_AF_eas_oea=1.44991e-04;non_cancer_nhomalt_eas_oea=0;non_topmed_AC_nfe_female=4;non_topmed_AN_nfe_female=48808;non_topmed_AF_nfe_female=8.19538e-05;non_topmed_nhomalt_nfe_female=0;non_cancer_AC_sas_male=1;non_cancer_AN_sas_male=23032;non_cancer_AF_sas_male=4.34179e-05;non_cancer_nhomalt_sas_male=0;controls_AC_asj_male=0;controls_AN_asj_male=1106;controls_AF_asj_male=0.00000e+00;controls_nhomalt_asj_male=0;non_cancer_AC_raw=11;non_cancer_AN_raw=236958;non_cancer_AF_raw=4.64217e-05;non_cancer_nhomalt_raw=0;non_cancer_AC_eas_male=2;non_cancer_AN_eas_male=8744;non_cancer_AF_eas_male=2.28728e-04;non_cancer_nhomalt_eas_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=5144;non_topmed_AF_asj_male=0.00000e+00;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=0;non_neuro_AN_oth=4866;non_neuro_AF_oth=0.00000e+00;non_neuro_nhomalt_oth=0;AC_male=4;AN_male=135908;AF_male=2.94317e-05;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=6474;controls_AF_fin_female=0.00000e+00;controls_nhomalt_fin_female=0;controls_AC_nfe_bgr=0;controls_AN_nfe_bgr=678;controls_AF_nfe_bgr=0.00000e+00;controls_nhomalt_nfe_bgr=0;controls_AC_asj_female=0;controls_AN_asj_female=1214;controls_AF_asj_female=0.00000e+00;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=14320;AF_amr_male=0.00000e+00;nhomalt_amr_male=0;AC_amr_female=1;AN_amr_female=20270;AF_amr_female=4.93340e-05;nhomalt_amr_female=0;non_topmed_AC_sas_male=1;non_topmed_AN_sas_male=23072;non_topmed_AF_sas_male=4.33426e-05;non_topmed_nhomalt_sas_male=0;AC_oth_male=0;AN_oth_male=3212;AF_oth_male=0.00000e+00;nhomalt_oth_male=0;non_cancer_AC_sas=2;non_cancer_AN_sas=30524;non_cancer_AF_sas=6.55222e-05;controls_AF_sas_male=0.00000e+00;controls_nhomalt_sas_male=0;non_topmed_AC_sas_female=1;non_topmed_AN_sas_female=7542;non_topmed_AF_sas_female=1.32591e-04;non_topmed_nhomalt_sas_female=0;non_topmed_AC_afr=1;non_topmed_AN_afr=12020;non_topmed_AF_afr=8.31947e-05;non_topmed_nhomalt_afr=0;controls_AC=6;controls_AN=109404;controls_AF=5.48426e-05;AF_popmax=1.08743e-04;nhomalt_popmax=0;non_cancer_nhomalt_sas=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=10942;non_neuro_AF_nfe_seu=0.00000e+00;non_neuro_nhomalt_nfe_seu=0;non_cancer_AC_eas_kor=0;non_cancer_AN_eas_kor=3774;non_cancer_AF_eas_kor=0.00000e+00;non_cancer_nhomalt_eas_kor=0;non_topmed_AC_afr_female=1;non_topmed_AN_afr_female=7334;non_topmed_AF_afr_female=1.36351e-04;non_topmed_nhomalt_afr_female=0;controls_AC_sas_male=0;controls_AN_sas_male=11476
# ... rest of file ...
我需要将tabix command 与awk
合并以打印$1、$2、$4、$5 和$8 的子字符串(仅AF_popmax=
之后的数字和nhomalt_popmax=
之后的数字),例如:
# ...
1 11850891 C T AF_popmax=6.53253e-05;nhomalt_popmax=0
1 11850892 G A AF_popmax=1.08743e-04;nhomalt_popmax=0
# ...
我试过这个命令:
tabix file_input.vcf.bgz 1:11850891-55525202 | awk '$8=substr(/;AF_popmax=[^;]*/,""); print $1,$2,$4,$5,$8'
但我只得到一个 0/1 的列 $8:
1 11850891 C T 1
1 11850892 G A 1
有人知道吗?
非常感谢您的任何帮助(欢迎其他方法)
【问题讨论】:
【参考方案1】:请您尝试关注一下。
awk '
match($0,/AF_popmax=[^;]*/)
val1=substr($0,RSTART,RLENGTH)
match($0,/nhomalt_popmax=[^;]*/)
val2=substr($0,RSTART,RLENGTH)
print $1,$2,$4,$5,val1,val2
' Input_file
【讨论】:
【参考方案2】:我将尝试向您解释一种非常通用的方法来执行此操作,如果您想打印其他内容,您可以轻松适应。
假设您有一个格式如下的字符串:
key1=value1;key2=value2;key3=value3
如果您想使用这些值进行选择或一些操作,那么最简单的方法是将它们存储在关联数组中,这样我们就有:
array["key1"] => value1
array["key2"] => value2
array["key3"] => value3
array["key1","full"] => "key1=value1"
array["key2","full"] => "key2=value2"
array["key3","full"] => "key3=value3"
您可以使用以下功能:
function str2map(str,fs1,fs2,map, n,tmp)
n=split(str,map,fs1)
for (;n>0;n--)
split(map[n],tmp,fs2);
map[tmp[1]]=tmp[2]; map[tmp[1],"full"]=map[n]
delete map[n]
这会导致下面的 awk 程序:
awk '
function str2map(str,fs1,fs2,map, n,tmp)
n=split(str,map,fs1)
for (;n>0;n--)
split(map[n],tmp,fs2);
map[tmp[1]]=tmp[2]; map[tmp[1],"full"]=map[n]
delete map[n]
str2map($8,";","=",map)
print $1,$2,$4,$5,map["AF_popmax","full"] ";" map["nhomalt_popmax","full"]
' file
这个输出
1 11850891 C T AF_popmax=6.53253e-05;nhomalt_popmax=0
1 11850892 G A AF_popmax=1.08743e-04;nhomalt_popmax=0
这种方法的优点是您可以轻松地调整您的代码以打印您感兴趣的任何其他键,甚至基于此进行选择。
【讨论】:
以上是关于awk 打印 4 列和 $8 的子字符串的主要内容,如果未能解决你的问题,请参考以下文章