1. 程式人生 > 其它 >linux shell 統計plink格式樣本缺失率

linux shell 統計plink格式樣本缺失率

1、shell指令碼

[root@centos79 test]# cat test.sh
#!/bin/bash

## step1
for i in $(seq `sed -n "$=" $1`); do sed -n "$i"p $1 | cut -d " " -f 7- | grep -o "0" | wc -l >> result1; done
echo "step1 done!"


## step2
paste result1 $1 | awk '{OFS = "\t"} {print $3, $1/2, $1/(NF - 7)}' > result.txt
rm -f result1
echo 
"step2 done!" ## step3 sed '1i id\tmisssite\trate' result.txt -i echo "finished!"

2、測試資料、及測試

[root@centos79 test]# ls
outcome.map  outcome.ped  test.sh
[root@centos79 test]# cat outcome.map
1       snp1    0       55910
1       snp2    0       85204
1       snp3    0       122948
1       snp4    0       203750
1 snp5 0 312707 1 snp6 0 356863 1 snp7 0 400518 1 snp8 0 487423 [root@centos79 test]# cat outcome.ped DOR 1 0 0 0 -9 G G 0 0 0 0 0 0 A A T T G G 0 0 DOR 2 0 0 0 -9 0 0 G G 0 0 0 0 A A T T A G 0 0 DOR 3 0 0 0 -9 0 0 C C 0 0 G G G G T T A G 0 0 DOR 4 0 0 0 -9 0 0 C C 0 0 G G G G A A G G G G DOR
5 0 0 0 -9 G G C C 0 0 G G G G A A A G G C DOR 6 0 0 0 -9 G G C C 0 0 G G G G A A A A C C [root@centos79 test]# bash test.sh outcome.ped step1 done! step2 done! finished! [root@centos79 test]# ls outcome.map outcome.ped result.txt test.sh [root@centos79 test]# cat result.txt id misssite rate 1 4 0.5 2 4 0.5 3 3 0.375 4 2 0.25 5 1 0.125 6 1 0.125

3、plink驗證

[root@centos79 test]# ls
outcome.map  outcome.ped  result.txt  test.sh
[root@centos79 test]# plink --file outcome --missing --out verify > /dev/null; rm *.log *.nosex
[root@centos79 test]# ls
outcome.map  outcome.ped  result.txt  test.sh  verify.imiss  verify.lmiss
[root@centos79 test]# cat verify.imiss
 FID  IID MISS_PHENO   N_MISS   N_GENO   F_MISS
 DOR    1          Y        4        8      0.5
 DOR    2          Y        4        8      0.5
 DOR    3          Y        3        8    0.375
 DOR    4          Y        2        8     0.25
 DOR    5          Y        1        8    0.125
 DOR    6          Y        1        8    0.125
[root@centos79 test]# cat result.txt
id      misssite        rate
1       4       0.5
2       4       0.5
3       3       0.375
4       2       0.25
5       1       0.125
6       1       0.125