plink格式数据依据染色体拆分数据、依据染色体合并数据


1、准备测试数据

[root@linuxprobe test]# ls test.map test.ped [root@linuxprobe test]# wc -l *  ## 46827个位点,60个样本
   46827 test.map 60 test.ped 46887 total [root@linuxprobe test]# cut -f 1 test.map | uniq -c ## 一共 26条染色体 5244 1
   4970 2
   4465 3
   2436 4
   2109 5
   2318 6
   2005 7
   1851 8
   1901 9
   1649 10
   1077 11
   1528 12
   1527 13
   1045 14
   1506 15
   1395 16
   1291 17
   1272 18
   1124 19
    998 20
    768 21
    984 22
   1005 23
    662 24
    892 25
    805 26

 

2、依据染色体拆分数据 

for i in `seq 26`;do plink --file test --chr $i --sheep --recode tab --out chr$i;done ## 利用for循环,结合plink软件提取每条染色体数据,(plink提前加入环境变量或者给绝对路径) …… …… [root@linuxprobe test]# ls chr10.log chr12.ped chr15.nosex chr18.map chr20.log chr22.ped chr25.nosex chr3.map chr6.log chr8.ped chr10.map chr13.log chr15.ped chr18.nosex chr20.map chr23.log chr25.ped chr3.nosex chr6.map chr9.log chr10.nosex chr13.map chr16.log chr18.ped chr20.nosex chr23.map chr26.log chr3.ped chr6.nosex chr9.map chr10.ped chr13.nosex chr16.map chr19.log chr20.ped chr23.nosex chr26.map chr4.log chr6.ped chr9.nosex chr11.log chr13.ped chr16.nosex chr19.map chr21.log chr23.ped chr26.nosex chr4.map chr7.log chr9.ped chr11.map chr14.log chr16.ped chr19.nosex chr21.map chr24.log chr26.ped chr4.nosex chr7.map test.map chr11.nosex chr14.map chr17.log chr19.ped chr21.nosex chr24.map chr2.log chr4.ped chr7.nosex test.ped chr11.ped chr14.nosex chr17.map chr1.log chr21.ped chr24.nosex chr2.map chr5.log chr7.ped chr12.log chr14.ped chr17.nosex chr1.map chr22.log chr24.ped chr2.nosex chr5.map chr8.log chr12.map chr15.log chr17.ped chr1.nosex chr22.map chr25.log chr2.ped chr5.nosex chr8.map chr12.nosex chr15.map chr18.log chr1.ped chr22.nosex chr25.map chr3.log chr5.ped chr8.nosex

 [root@linuxprobe test]# wc -l *.map
 1649 chr10.map
 1077 chr11.map
 1528 chr12.map
 1527 chr13.map
 1045 chr14.map
 1506 chr15.map
 1395 chr16.map
 1291 chr17.map
 1272 chr18.map
 1124 chr19.map
 5244 chr1.map
 998 chr20.map
 768 chr21.map
 984 chr22.map
 1005 chr23.map
 662 chr24.map
 892 chr25.map
 805 chr26.map
 4970 chr2.map
 4465 chr3.map
 2436 chr4.map
 2109 chr5.map
 2318 chr6.map
 2005 chr7.map
 1851 chr8.map
 1901 chr9.map
 46827 test.map
 93654 total

 

3、依据染色体合并数据  

[root@linuxprobe test]# cp chr1.map merge.map ##复制染色体1为起始的合并结果 [root@linuxprobe test]# cp chr1.ped merge.ped ##同上 for i in `seq 2 26`;do plink --file merge --merge chr$i.ped chr$i.map --sheep --recode tab --out merge;done  ## 利用for循环,结合plink进行合并 (这种合并仅适合样本完全一致时合并)

  [root@linuxprobe test]# wc -l merge.ped merge.map  ##统计合并结果
  60 merge.ped
  46827 merge.map
  46887 total
  [root@linuxprobe test]# md5sum test.ped test.map merge.ped merge.map ## 检查合并后是否一致 
  04a12361169ff0fdc77349f280f11b6b test.ped
  8d4537314a4c7beaeb0da726a8c5fcba test.map
  89b9ee552b2ae3b5e9fcb5282cbbb5c9 merge.ped
  8d4537314a4c7beaeb0da726a8c5fcba merge.map

## ped文件不一致是由于在部分位点基因频率为0.5时,主次等位基因顺序颠倒引起的,不影响结果

 

4、验证ped文件md5sum不一致的情况   

[root@linuxprobe test]# mkdir test ## 单独建立文件夹,将需要验证结果复制至该文件夹 [root@linuxprobe test]# cp test.map test.ped merge.map merge.ped test [root@linuxprobe test]# cd test/ [root@linuxprobe test]# ls merge.map merge.ped test.map test.ped
 plink --file merge --freq --sheep --out merge;rm *.nosex ## 利用plink统计基因频率 plink --file test --freq --sheep --out test ;rm *.nosex ## 利用plink统计基因频率 [root@linuxprobe test]# ls merge.frq merge.log merge.map merge.ped test.frq test.log test.map test.ped [root@linuxprobe test]# head merge.frq ## 查看生成文件格式 CHR SNP A1 A2 MAF NCHROBS 1                      s64199.1    A    G       0.1667      120
   1              OAR19_64675012.1    G    C         0.25      120
   1              OAR19_64715327.1    A    G       0.1583      120
   1              OAR19_64803054.1    A    G         0.25      120
   1                DU281551_498.1    G    A          0.5      120
   1                      s18939.1    0    A            0      120
   1                  OAR1_88143.1    A    G       0.2083      120
   1                      s09912.1    G    C       0.3417      120
   1                      s36301.1    A    G        0.325      120 [root@linuxprobe test]# sed 's/^[\t ]*//' merge.frq | sed 's/[\t ]\+/\t/g' | head ## 去除行首空格,将多个空格或者制表符转换为一个制表符,查看前十行 CHR SNP A1 A2 MAF NCHROBS 1       s64199.1        A       G       0.1667  120
1       OAR19_64675012.1        G       C       0.25    120
1       OAR19_64715327.1        A       G       0.1583  120
1       OAR19_64803054.1        A       G       0.25    120
1       DU281551_498.1  G       A       0.5     120
1       s18939.1        0       A       0       120
1       OAR1_88143.1    A       G       0.2083  120
1       s09912.1        G       C       0.3417  120
1       s36301.1        A       G       0.325   120 [root@linuxprobe test]# sed 's/^[\t ]*//' merge.frq | sed 's/[\t ]\+/\t/g' | cut -f 3-4 --complement | md5sum ## 剔除3、4列后查看md5sum 643867ea0404ada96aa61702bc61f678 - [root@linuxprobe test]# sed 's/^[\t ]*//' test.frq | sed 's/[\t ]\+/\t/g' | cut -f 3-4 --complement | md5sum ##剔除3、4列后查看买的sum 643867ea0404ada96aa61702bc61f678 -
## 以上说明每个稳点的基因频率完全相同

 

5、利用diff检查

[root@linuxprobe test]# diff merge.frq test.frq | head ## 差异的部分基因频率全部为0.5,主次等位基因颠倒,这种颠倒不影响结果 6c6 <    1                DU281551_498.1    G    A          0.5      120
---
>    1                DU281551_498.1    A    G          0.5      120 218c218 <    1                      s64317.1    G    A          0.5      120
---
>    1                      s64317.1    A    G          0.5      120 251c251 <    1                      s45057.1    G    A          0.5      120 [root@linuxprobe test]# diff merge.frq test.frq | tail ---
>   24              OAR24_21591643.1    A    G          0.5      120 46100c46100 <   26               OAR26_5333971.1    A    G          0.5      120
---
>   26               OAR26_5333971.1    G    A          0.5      120 46697c46697 <   26                      s52004.1    G    A          0.5      120
---
>   26                      s52004.1    A    G          0.5      120

 

 

   


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM