1. 程式人生 > 其它 >shell 和 R 實現具有對映關係的資料的批量替換

shell 和 R 實現具有對映關係的資料的批量替換

1、測試資料

root@PC1:/home/test# ls
5gene_CDs.bed  id_mapping.txt
root@PC1:/home/test# head -n 3 5gene_CDs.bed
chr6    117609654       117609965       NM_001378891.1_cds_0_0_chr6_117609655_r 0       -
chr6    117622136       117622300       NM_001378891.1_cds_1_0_chr6_117622137_r 0       -
chr6    117629956       117630091
NM_001378891.1_cds_2_0_chr6_117629957_r 0 - root@PC1:/home/test# head -n 3 id_mapping.txt ## 在5gene_CDs.bed檔案中實現id_mapping.txt中第二列對第一列的批量替換 NM_001378891.1 ROS1 NM_001378902.1 ROS1 NM_002944.3 ROS1

2、shell實現

root@PC1:/home/test# ls
5gene_CDs.bed  id_mapping.txt
root@PC1:/home/test# head -n 3 5gene_CDs.bed
chr6    
117609654 117609965 NM_001378891.1_cds_0_0_chr6_117609655_r 0 - chr6 117622136 117622300 NM_001378891.1_cds_1_0_chr6_117622137_r 0 - chr6 117629956 117630091 NM_001378891.1_cds_2_0_chr6_117629957_r 0 - root@PC1:/home/test# head -n 3 id_mapping.txt NM_001378891.1 ROS1 NM_001378902.
1 ROS1 NM_002944.3 ROS1 root@PC1:/home/test# cp 5gene_CDs.bed 5gene_CDs.bed.bak ## 要在原始檔中修改,防止以外發生對資料做備份 root@PC1:/home/test# ls 5gene_CDs.bed 5gene_CDs.bed.bak id_mapping.txt root@PC1:/home/test# cat id_mapping.txt | while read {i,j}; do sed -i "s/$i/$j/" 5gene_CDs.bed; done ## 迴圈中i和j分別儲存每行中對應的兩個變數 root@PC1:/home/test# head -n 3 5gene_CDs.bed chr6 117609654 117609965 ROS1_cds_0_0_chr6_117609655_r 0 - chr6 117622136 117622300 ROS1_cds_1_0_chr6_117622137_r 0 - chr6 117629956 117630091 ROS1_cds_2_0_chr6_117629957_r 0 -

3、R實現

> dir()
[1] "5gene_CDs.bed"  "id_mapping.txt"
> mapping=read.table("id_mapping.txt",sep="\t")
> head(mapping,2)
              V1   V2
1 NM_001378891.1 ROS1
2 NM_001378902.1 ROS1
> bed=read.table("5gene_CDs.bed",sep="\t")
> head(bed, 2)
    V1        V2        V3                                      V4 V5 V6
1 chr6 117609654 117609965 NM_001378891.1_cds_0_0_chr6_117609655_r  0  -
2 chr6 117622136 117622300 NM_001378891.1_cds_1_0_chr6_117622137_r  0  -
> for (i in 1:nrow(mapping)) {
+   bed$V4 <- sub(mapping$V1[i], mapping$V2[i], bed$V4)
+ }
> head(bed,2)
    V1        V2        V3                            V4 V5 V6
1 chr6 117609654 117609965 ROS1_cds_0_0_chr6_117609655_r  0  -
2 chr6 117622136 117622300 ROS1_cds_1_0_chr6_117622137_r  0  -