熱圖可以聚合大量的數據,並可以用一種漸進色來優雅地表現,可以很直觀地展現數據的疏密程度或頻率高低。
本文利用R語言 pheatmap 包從頭開始繪制各種漂亮的熱圖。參數像積木,拼湊出你最喜歡的熱圖即可,如下圖:
基因和樣本都可以單獨聚類,排序,聚類再分組,行列注釋,配色調整,調整聚類線以及單元格的寬度和高度均可實現。
載入數據,R包
#R包
library(pheatmap)
# 構建測試數據
set.seed(1234)
test = matrix(rnorm(200), 20, 10)
test[1:10, seq(1, 10, 2)] = test[1:10, seq(1, 10, 2)] + 3
test[11:20, seq(2, 10, 2)] = test[11:20, seq(2, 10, 2)] + 2
test[15:20, seq(2, 10, 2)] = test[15:20, seq(2, 10, 2)] + 4
colnames(test) = paste("Test", 1:10, sep = "")
rownames(test) = paste("Gene", 1:20, sep = "")
head(test[,1:6])
繪制熱圖
繪制默認熱圖
pheatmap(test)
基本參數
# scale = "row"參數對行進行歸一化
# clustering_method參數設定不同聚類方法,默認為"complete",可以設定為'ward', 'ward.D', 'ward.D2', 'single', 'complete', 'average', 'mcquitty', 'median' or 'centroid'
pheatmap(test,scale = "row", clustering_method = "average")
#表示行聚類使用皮爾森相關系數聚類,默認為歐氏距離"euclidean"
pheatmap(test, scale = "row", clustering_distance_rows = "correlation")
#行 列是否聚類,cluster_row ,cluster_col
pheatmap(test, cluster_row = FALSE,cluster_col = TRUE)
# treeheight_row和treeheight_col參數設定行和列聚類樹的高度,默認為50
pheatmap(test, treeheight_row = 30, treeheight_col = 50)
# 設定cell 的大小
pheatmap(test, cellwidth = 15, cellheight = 12, fontsize = 10)
設定 text
熱圖中展示數值
# display_numbers = TRUE參數設定在每個熱圖格子中顯示相應的數值,#number_color參數設置數值字體的顏色
pheatmap(test, display_numbers = TRUE,number_color = "blue")
# 設定數值的顯示格式
pheatmap(test, display_numbers = TRUE, number_format = "%.1e")
#設定條件式展示
pheatmap(test, display_numbers = matrix(ifelse(test > 5, "*", ""), nrow(test)))
設置 legend
設定legend展示的值
#legend_breaks參數設定圖例顯示范圍,legend_labels參數添加圖例標簽
pheatmap(test, cluster_row = FALSE, legend_breaks = -1:4, legend_labels = c("0", "1e-4", "1e-3", "1e-2", "1e-1", "1"))
#去掉legend
pheatmap(test, legend = FALSE)
設定 color
自定義顏色
#colorRampPalette
pheatmap(test, color = colorRampPalette(c("navy", "white", "firebrick3"))(50))
# border_color參數設定每個熱圖格子的邊框色
# border=TRIUE/FALSE參數是否要邊框線
pheatmap(test, border_color = "red", border=TRUE)
設定 annotations
# 生成行 列的注釋
annotation_col = data.frame( CellType = factor(rep(c("CT1", "CT2"), 5)), Time = 1:5 )
rownames(annotation_col) = paste("Test", 1:10, sep = "")
annotation_row = data.frame( GeneClass = factor(rep(c("Path1", "Path2", "Path3"), c(10, 4, 6))))
rownames(annotation_row) = paste("Gene", 1:20, sep = "")
#添加列的注釋
pheatmap(test, annotation_col = annotation_col)
#添加行 列的注釋
#angle_col 改變列標簽的角度
pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row, angle_col = "45")
# 根據聚類結果,自定義注釋分組及顏色
ann_colors = list( Time = c("white", "firebrick"), CellType = c(CT1 = "#1B9E77", CT2 = "#D95F02"), GeneClass = c(Path1 = "#7570B3", Path2 = "#E7298A", Path3 = "#66A61E") )
pheatmap(test, annotation_col = annotation_col,annotation_row=annotation_row, annotation_colors = ann_colors, main = "Title")
設定 gap
#根據聚類結果,設定行gap
pheatmap(test, annotation_col = annotation_col, cluster_rows = FALSE, gaps_row = c(10, 14))
#根據聚類結果,設定列gap
pheatmap(test,annotation_col = annotation_col, cluster_rows = FALSE,cutree_col = 2)
#展示行或者列的label
labels_row = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "Il10", "Il15", "Il1b")
pheatmap(test, annotation_col = annotation_col, labels_row = labels_row)
熱圖匯總
pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row, annotation_colors = ann_colors,gaps_row = c(10, 14),cutree_col = 2,main = "Pheatmap")
輸出結果
A = pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row, annotation_colors = ann_colors,gaps_row = c(10, 14),cutree_col = 2,main = "Pheatmap") #記錄熱圖的行排序
order_row = A$tree_row$order
#記錄熱圖的列排序
order_col = A$tree_col$order
# 按照熱圖的順序,重新排原始數據
result = data.frame(test[order_row,order_col])
# 將行名加到表格數據中
result = data.frame(rownames(result),result,check.names =F)
colnames(result)[1] = "geneid"
#result結果按照熱圖中的順序
write.table(result,file="reorder.txt",row.names=FALSE,quote = FALSE,sep='\t')
R的當前工作目錄下即可查看熱圖的結果。
【公眾號對話框,回復 R熱圖 即可獲得上述熱圖R代碼】
更多關於生信,R,Python的內容請掃碼關注小號,謝謝。