#List 1000g Europeans
awk '{if ($3 == "EUR") print $1}' integrated_call_samples_v3.20130502.ALL.panel > 1000g_europeans.subjects
for i in 22 # {1..22}
do
vcf-subset -c 1000g_europeans.subjects ALL.chr"$i".phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz | fill-an-ac | gzip -c > CEU.chr"$i".phase1.vcf.gz
done
#Get marker info for all markers. Only take rs markers that are di-allelic
for i in {1..22}
do
#zcat ALL.chr"$i".phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz | awk 'NR>=253{print $3,$8}' | grep -v MULTI_ALLELIC | grep rs | gzip -c > info_ALL.chr"$i".phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz
# zcat info_ALL.chr"$i".phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz | awk -F ";|\ " ' {print $1,$9}' | sed 's/AFR_AF=//g' | awk '{if ($2 >= 0.01 && $2 <= 0.99) print $1}' | gzip -c > afmaf01_ALL.chr"$i".phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz
# zcat info_ALL.chr"$i".phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz | awk -F ";|\ " ' {print $1,$10}' | sed 's/EUR_AF=//g' | awk '{if ($2 >= 0.01 && $2 <= 0.99) print $1}' | gzip -c > eurmaf01_ALL.chr"$i".phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz
zcat info_ALL.chr"$i".phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz | awk -F ";|\ " ' {print $1,$3}' | sed 's/AF=//g' | awk '{if ($2 >= 0.01 && $2 <= 0.99) print $1}' | gzip -c > allmaf01_ALL.chr"$i".phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz
done
zcat afmaf01_ALL.chr*.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz | gzip -c > afrmaf01_ALL.allchr.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz
zcat eurmaf01_ALL.chr*.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz | gzip -c > eurmaf01_ALL.allchr.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz
zcat allmaf01_ALL.chr*.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz | gzip -c > allmaf01_ALL.allchr.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz
#Some markers have multiple names, annoyingly. Fuck it, just print name . These mave multiple AFs, delimited by ,
#Multi allelic markers have commas to delimit AF of the minor variants. We won't deal with that shit here
#Fuck it and just get the SNPS
zcat info_ALL.chr"$i".phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz | grep "EUR_AF=0.0209,0.0149" -w -m1
#Get marker alleles for all markers
for i in {1..22}
do
zcat ALL.chr"$i".phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz | awk 'NR>=253{print $1,$2,$3,$4,$5,$8}' | gzip -c > acs_ALL.chr"$i".phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz
done