@@ -19,6 +19,7 @@ BCFTOOLS=/usr/local/bin/bcftools
1919vcf=$1
2020backgroundlist=$2
2121famfile=$3
22+ allosome_fai=$4
2223
2324# #get sampleids from VCF##
2425zcat $vcf \
@@ -58,6 +59,7 @@ zcat convertsvtype.vcf.gz \
5859 | awk ' NR==FNR{inFileA[$1]=$2; next} {if ($3 in inFileA && $1!~"#") $6=inFileA[$3]; print }' OFS=' \t' vargq.persample - \
5960 | bgzip \
6061 > cleaninfo.vcf.gz
62+ tabix -p vcf cleaninfo.vcf.gz
6163
6264
6365# #fix sex chr if necessary##
@@ -83,21 +85,20 @@ awk '{if ($5==2) print $2}' $famfile \
8385 if [ $( cat clean.bed.ids.txt| wc -l) -gt 0 ]
8486 then
8587
86- zcat cleaninfo.vcf.gz \
87- | awk ' {if ($1!~"#" && ($1~"X" || $1~"Y")) $1=$3;print} ' OFS= " \t " \
88- | vcftools --vcf - --stdout --extract-FORMAT-info RD_CN \
89- | awk -F " \t " ' NR==1{for (i=3;i<=NF;i++) header[i]=$i} NR>1{for(j=3;j<=NF;j++) print $1"\t"header[j] "\t" $j } ' \
90- | fgrep -wf clean.bed.ids.txt \
91- | awk ' {if ($3!=".") print} ' \
92- | gzip \
93- > RD_CN.sexcheck.FORMAT.gz
94-
95- zcat RD_CN.sexcheck.FORMAT.gz | fgrep -wf male.txt | Rscript -e ' d<-read.table("stdin")' \
88+ awk ' {print $1"\t0\t"$2} ' < ${allosome_fai} > allosomes.list
89+ ${BCFTOOLS} query -R allosomes.list -S male.txt -i ' ID=@clean.bed.ids.txt ' -f ' [%ID\t%SAMPLE\t%RD_CN\n] ' cleaninfo.vcf.gz \
90+ | awk ' {if ($3!=".") print} ' \
91+ | gzip > RD_CN.sexcheck.FORMAT.male.gz
92+
93+ ${BCFTOOLS} query -R allosomes.list -S female.txt -i ' ID=@clean.bed.ids.txt ' -f ' [%ID\t%SAMPLE\t%RD_CN\n] ' cleaninfo.vcf.gz \
94+ | awk ' {if ($3!=".") print} ' \
95+ | gzip > RD_CN.sexcheck.FORMAT.female .gz
96+
97+ zcat RD_CN.sexcheck.FORMAT.male.gz | Rscript -e ' d<-read.table("stdin")' \
9698 -e ' x<-tapply(d[,3],d[,1],median)' \
9799 -e ' write.table(x,"male.median.value.pervar.txt",col.names=FALSE,quote=FALSE,sep = "\t")'
98100
99-
100- zcat RD_CN.sexcheck.FORMAT.gz| fgrep -wf female.txt| Rscript -e ' d<-read.table("stdin")' \
101+ zcat RD_CN.sexcheck.FORMAT.female.gz| Rscript -e ' d<-read.table("stdin")' \
101102 -e ' x<-tapply(d[,3],d[,1],median)' \
102103 -e ' write.table(x,"female.median.value.pervar.txt",col.names=FALSE,quote=FALSE,sep = "\t")'
103104 fi
242243
243244
244245 tabix -p vcf combinedsex.vcf.gz
245- tabix -p vcf cleaninfo.vcf.gz
246246
247247zcat combinedsex.vcf.gz| awk ' {if ($1!~"#") print $3}' > modified.ids.txt
248248
0 commit comments