1、准備測試數據
[root@linuxprobe test]# ls test.map test.ped [root@linuxprobe test]# wc -l * ## 46827個位點,60個樣本
46827 test.map 60 test.ped 46887 total [root@linuxprobe test]# cut -f 1 test.map | uniq -c ## 一共 26條染色體 5244 1
4970 2
4465 3
2436 4
2109 5
2318 6
2005 7
1851 8
1901 9
1649 10
1077 11
1528 12
1527 13
1045 14
1506 15
1395 16
1291 17
1272 18
1124 19
998 20
768 21
984 22
1005 23
662 24
892 25
805 26
2、依據染色體拆分數據
for i in `seq 26`;do plink --file test --chr $i --sheep --recode tab --out chr$i;done ## 利用for循環,結合plink軟件提取每條染色體數據,(plink提前加入環境變量或者給絕對路徑) …… …… [root@linuxprobe test]# ls chr10.log chr12.ped chr15.nosex chr18.map chr20.log chr22.ped chr25.nosex chr3.map chr6.log chr8.ped chr10.map chr13.log chr15.ped chr18.nosex chr20.map chr23.log chr25.ped chr3.nosex chr6.map chr9.log chr10.nosex chr13.map chr16.log chr18.ped chr20.nosex chr23.map chr26.log chr3.ped chr6.nosex chr9.map chr10.ped chr13.nosex chr16.map chr19.log chr20.ped chr23.nosex chr26.map chr4.log chr6.ped chr9.nosex chr11.log chr13.ped chr16.nosex chr19.map chr21.log chr23.ped chr26.nosex chr4.map chr7.log chr9.ped chr11.map chr14.log chr16.ped chr19.nosex chr21.map chr24.log chr26.ped chr4.nosex chr7.map test.map chr11.nosex chr14.map chr17.log chr19.ped chr21.nosex chr24.map chr2.log chr4.ped chr7.nosex test.ped chr11.ped chr14.nosex chr17.map chr1.log chr21.ped chr24.nosex chr2.map chr5.log chr7.ped chr12.log chr14.ped chr17.nosex chr1.map chr22.log chr24.ped chr2.nosex chr5.map chr8.log chr12.map chr15.log chr17.ped chr1.nosex chr22.map chr25.log chr2.ped chr5.nosex chr8.map chr12.nosex chr15.map chr18.log chr1.ped chr22.nosex chr25.map chr3.log chr5.ped chr8.nosex
[root@linuxprobe test]# wc -l *.map
1649 chr10.map
1077 chr11.map
1528 chr12.map
1527 chr13.map
1045 chr14.map
1506 chr15.map
1395 chr16.map
1291 chr17.map
1272 chr18.map
1124 chr19.map
5244 chr1.map
998 chr20.map
768 chr21.map
984 chr22.map
1005 chr23.map
662 chr24.map
892 chr25.map
805 chr26.map
4970 chr2.map
4465 chr3.map
2436 chr4.map
2109 chr5.map
2318 chr6.map
2005 chr7.map
1851 chr8.map
1901 chr9.map
46827 test.map
93654 total
3、依據染色體合並數據
[root@linuxprobe test]# cp chr1.map merge.map ##復制染色體1為起始的合並結果 [root@linuxprobe test]# cp chr1.ped merge.ped ##同上 for i in `seq 2 26`;do plink --file merge --merge chr$i.ped chr$i.map --sheep --recode tab --out merge;done ## 利用for循環,結合plink進行合並 (這種合並僅適合樣本完全一致時合並)
[root@linuxprobe test]# wc -l merge.ped merge.map ##統計合並結果
60 merge.ped
46827 merge.map
46887 total
[root@linuxprobe test]# md5sum test.ped test.map merge.ped merge.map ## 檢查合並后是否一致
04a12361169ff0fdc77349f280f11b6b test.ped
8d4537314a4c7beaeb0da726a8c5fcba test.map
89b9ee552b2ae3b5e9fcb5282cbbb5c9 merge.ped
8d4537314a4c7beaeb0da726a8c5fcba merge.map
## ped文件不一致是由於在部分位點基因頻率為0.5時,主次等位基因順序顛倒引起的,不影響結果
4、驗證ped文件md5sum不一致的情況
[root@linuxprobe test]# mkdir test ## 單獨建立文件夾,將需要驗證結果復制至該文件夾 [root@linuxprobe test]# cp test.map test.ped merge.map merge.ped test [root@linuxprobe test]# cd test/ [root@linuxprobe test]# ls merge.map merge.ped test.map test.ped
plink --file merge --freq --sheep --out merge;rm *.nosex ## 利用plink統計基因頻率 plink --file test --freq --sheep --out test ;rm *.nosex ## 利用plink統計基因頻率 [root@linuxprobe test]# ls merge.frq merge.log merge.map merge.ped test.frq test.log test.map test.ped [root@linuxprobe test]# head merge.frq ## 查看生成文件格式 CHR SNP A1 A2 MAF NCHROBS 1 s64199.1 A G 0.1667 120
1 OAR19_64675012.1 G C 0.25 120
1 OAR19_64715327.1 A G 0.1583 120
1 OAR19_64803054.1 A G 0.25 120
1 DU281551_498.1 G A 0.5 120
1 s18939.1 0 A 0 120
1 OAR1_88143.1 A G 0.2083 120
1 s09912.1 G C 0.3417 120
1 s36301.1 A G 0.325 120 [root@linuxprobe test]# sed 's/^[\t ]*//' merge.frq | sed 's/[\t ]\+/\t/g' | head ## 去除行首空格,將多個空格或者制表符轉換為一個制表符,查看前十行 CHR SNP A1 A2 MAF NCHROBS 1 s64199.1 A G 0.1667 120
1 OAR19_64675012.1 G C 0.25 120
1 OAR19_64715327.1 A G 0.1583 120
1 OAR19_64803054.1 A G 0.25 120
1 DU281551_498.1 G A 0.5 120
1 s18939.1 0 A 0 120
1 OAR1_88143.1 A G 0.2083 120
1 s09912.1 G C 0.3417 120
1 s36301.1 A G 0.325 120 [root@linuxprobe test]# sed 's/^[\t ]*//' merge.frq | sed 's/[\t ]\+/\t/g' | cut -f 3-4 --complement | md5sum ## 剔除3、4列后查看md5sum 643867ea0404ada96aa61702bc61f678 - [root@linuxprobe test]# sed 's/^[\t ]*//' test.frq | sed 's/[\t ]\+/\t/g' | cut -f 3-4 --complement | md5sum ##剔除3、4列后查看買的sum 643867ea0404ada96aa61702bc61f678 -
## 以上說明每個穩點的基因頻率完全相同
5、利用diff檢查
[root@linuxprobe test]# diff merge.frq test.frq | head ## 差異的部分基因頻率全部為0.5,主次等位基因顛倒,這種顛倒不影響結果 6c6 < 1 DU281551_498.1 G A 0.5 120
---
> 1 DU281551_498.1 A G 0.5 120 218c218 < 1 s64317.1 G A 0.5 120
---
> 1 s64317.1 A G 0.5 120 251c251 < 1 s45057.1 G A 0.5 120 [root@linuxprobe test]# diff merge.frq test.frq | tail ---
> 24 OAR24_21591643.1 A G 0.5 120 46100c46100 < 26 OAR26_5333971.1 A G 0.5 120
---
> 26 OAR26_5333971.1 G A 0.5 120 46697c46697 < 26 s52004.1 G A 0.5 120
---
> 26 s52004.1 A G 0.5 120