發表級ggplot繪圖流程和技巧 | 論文發表


The R Graph Gallery - R繪圖代碼庫

themes - https://www.r-graph-gallery.com/ggplot2-package.html#themes

 

要開始修飾以前的核心圖片,准備發表論文了。

把之前比較raw的圖修飾格式,統一生成高清晰圖片,准備放入paper中。

會慢慢補充所有常見的繪圖代碼。

 

一個raw image的代碼:

p1 <- ggplot(oxidation.df, aes(x=group, y=score, color=group)) + 
        geom_boxplot() + 
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        labs(title = "Fatty acid metabolism")
p1

  

第一步:修改df里的標簽

oxidation.df$group <- plyr::mapvalues(oxidation.df$group, 
                                      from = c("GFP- early","GFP+ early","GFP- late","GFP+ late"),
                                      to = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))

  

第二步:修改標簽順序

oxidation.df$group <- factor(oxidation.df$group, levels = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))

  

第三部:精修格式主題字體

主題

常用的主題:https://www.r-graph-gallery.com/ggplot2-package.html#themes

  • theme_bw - 去掉了灰白背景,加了邊框,最常用
  • theme_classic - 只留下了加粗的左下邊框,最經典,適合實驗圖
  • egg::theme_article - 只有四周的邊框,最適合發表文章,缺點:圖例間隔太小
  • theme_minimal - 只留下了grid,沒有邊框
  • theme_minimal_hgrid - 只留下了hgrid
  • theme_void - 只留下了圖例,適合tSNE圖
theme_bw()
theme_void()
# remove grid
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

 

修改title

labs(x = "", y = "Pathway score\n", title = "Fatty acid metabolism")

xy軸標簽字體大小

theme(axis.text.x  = element_text(face="plain", angle=30, size = 14, color = "black", vjust=0.6),
        axis.text.y  = element_text(size = 10),
        axis.title.y = element_text(size = 14))

去掉多余的圖例

theme(legend.position = "none")

 

填充顏色

library(RColorBrewer)
scale_fill_manual(values=brewer.pal(9,"Paired"))
scale_color_manual(values=brewer.pal(9,"Paired")[c(3,4,5,6)])

 

其他

限制xy坐標范圍

scale_x_continuous(limits = c(0,2.5))
scale_y_continuous(limits = c(0,2.5))

  

一組數據的比較

library(ggpubr)
stat_compare_means(label.y = 2.3, label.x = 1, size=5)

  

多組數據的比較

library(ggpubr)
my_comparisons <- list(c("HhOFF early", "HhON early"), c("HhOFF late", "HhON late"))

stat_compare_means(method = "anova", label.y = 1.29) + # global
stat_compare_means(comparisons = my_comparisons, label.y = 1, label = "p.signif") + # paired
scale_y_continuous(limits = c(-0.52, 1.3))

  

代碼匯總

tmp$group <- plyr::mapvalues(tmp$group, from = c("GFP- early","GFP+ early","GFP- late","GFP+ late"),
                                      to = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))

tmp$group <- factor(tmp$group, levels = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))

library(ggpubr)
my_comparisons <- list(c("HhOFF early", "HhON early"), c("HhOFF late", "HhON late"))

options(repr.plot.width=4, repr.plot.height=4)
p1 <- ggplot(tmp, aes(x=group, y=score, color=group)) + 
        geom_boxplot() + 
        theme_bw() +
        labs(x = "", y = "Pathway score\n", title = "Fatty acid metabolism") +
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        theme(legend.position = "none") + 
        theme(axis.text.x  = element_text(face="plain", angle=30, size = 14, color = "black", vjust=0.6),
        axis.text.y  = element_text(size = 10),
        axis.title.y = element_text(size = 14)) +
        # scale_fill_manual(values=brewer.pal(9,"Paired"))
        scale_color_manual(values=brewer.pal(9,"Paired")[c(3,4,5,6)]) +
        stat_compare_means(method = "anova", label.y = 1.29) + # global
        stat_compare_means(comparisons = my_comparisons, label.y = 1, label = "p.signif") + # paired
        scale_y_continuous(limits = c(-0.52, 1.3))
p1

  

多圖拼接

options(repr.plot.width=8, repr.plot.height=9)
cowplot::plot_grid(p1,p2,p3,p4,ncol = 2)

  

PDF出圖

ggsave(filename = "HhOFF HhON metabolic pathways.pdf", width = 8, height = 9)

  

有些圖不能這么保存,比如heatmap,這時就要用到pdf函數

# traditional save
pdf("manuscript/HSCR.cluster.heatmap.pdf", width=8, height=7)
p
dev.off()

  

 

lnkscape里修改文字【對齊,上下標等等】

 

 

OK, 一個准發表級的圖就制作好了,可能還需要精修。


 

其余細節

 

點的類型

# change the border of point
geom_point(shape = 21, colour = "black", fill = "white", size = 5, stroke = 5)

 

把點擬合成線

stat_smooth(method = "loess", size = 1.1, se = F, span = 0.2)

 

散點圖顯示mean

stat_summary(fun.y=mean, geom="point", shape=20, size=7, color="black", fill="black") +

 

直線

geom_hline(yintercept=20, linetype="dashed", color = "red", size=2)

線段

geom_segment(data=seg_1, aes(x=x,y=y,xend=xend,yend=yend), arrow=arrow(length=unit(0.3,"cm")), size=1)

矩形

geom_rect(data=AS_1, aes(xmin=EXONSTART, xmax=EXONEND,ymin= -0.1,ymax=0.1),fill="#4DAF4A")

 

圖例,比如改legend title,改點大小,去掉圖例

labs(x = "\nTranscriptional level",y = "\nPost-transcriptional level", title = "", color = "Clinical score")
# change legend dot size
guides(colour = guide_legend(override.aes = list(size=10)))
# ggplot remove legend title
theme(legend.title = element_blank())
# position
theme(legend.text = element_text(size = 12), legend.position = c(0.8, 0.75))
# remove legend background
theme(legend.background=element_blank())

去掉legend的白色背景布,一步到位

theme(legend.title = element_blank(), legend.text = element_text(size = 11), legend.position = c(0.85, 0.15),
          legend.background = element_blank())

 

標題格式,比如居中

theme(plot.title = element_text(hjust = 0.5, size = 18))

  

去掉邊框,軸線,刻度;去掉右上邊框

# empty border, ticks, text
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_blank()) +
labs(x = "",y = "", title = "") +
theme(axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
# remove top and right border
theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank())

 

坐標軸,比如修改起點,范圍

# force y start from 0
scale_y_continuous(expand = c(0, 0), limits = c(0, NA))

  

去掉畫布中的網格線條

# just remove inside grid
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

  

添加文本

# add text
annotate("text", label = "Wilcoxon test\nP-value = 1.48e-12", x = 0.5, y =2, size = 6, colour = "black")

 

添加背景色

# add background color to mark different region
geom_rect(xmin=0, xmax=2.5, ymin=-2, ymax=-1, fill="#4DAF4A", alpha=1, color=NA)

  

修改填充顏色

# color
scale_color_manual(values=brewer.pal(9,"Set1")[c(1:5,7:9)])

 

精准控制圈圖的兩種alpha,比如venn圖

scale_color_manual(values = sample.colors) +
scale_fill_manual(values = alpha(sample.colors, .2))

  

快速統計分析

# quick statistic testing
# Wilcoxon test
wilcox.test(subset(time.df,GeneSet=="Common risk")$Time,
subset(time.df,GeneSet=="L-HSCR specific")$Time, alternative = "two.sided")

 

# packages
# significance
https://github.com/const-ae/ggsignif

分組計算,如取mean,單列

# quick data process
# get group mean
weather %>% group_by(city) %>% summarise(mean_temperature = mean(temperature))

 

分組取mean,多列  

d <- read.table(text=
'Name     Month  Rate1     Rate2
Aira       1      12        23
Aira       2      18        73
Aira       3      19        45
Ben        1      53        19
Ben        2      22        87
Ben        3      19        45
Cat        1      22        87
Cat        2      67        43
Cat        3      45        32', header=TRUE)

aggregate(d[, 3:4], list(d$Name), mean)

  Group.1    Rate1    Rate2
1    Aira 16.33333 47.00000
2     Ben 31.33333 50.33333
3     Cat 44.66667 54.00000

  

小數點保留,科學計數法

# format decimals
formatC(0.46, format = "e", digits = 1)
library(scales)
scientific(0.46, digits = 2)

  

查看默認的顏色 - 畫圖的結果數據

# see the colors in ggplot
# To see what colors are used to make your plot you can use function ggplot_build() and then look at data part of this object (in column colour are codes).
ggplot_build(p)$data

  

其他圖種

柱狀圖barplot

改bar的順序,以及添加邊框

geom_bar(stat="identity", alpha=1, position = position_fill(reverse = TRUE), color = "grey50") + # position="fill", 

 

配對的箱線圖、柱狀圖、折線圖 - 用於比較case和control

data

aes里加一個fill就可以把x軸分組(顏色填充默認就會分組),非常適合case和control的比較。

這里想加點需要用另一個函數geom_dotplot

    lineage	lineage.sub	stage	S.Score	G2M.Score	cc.score
    <chr>	<fct>	<chr>	<dbl>	<dbl>	<dbl>
ctrl_AAACCTGAGACATAAC	NP	NPlate	Control	-0.8162696	-0.98076576	-0.8162696
ctrl_AAACCTGCAAGTAATG	BP	BP	Control	0.3118349	-0.05584626	0.3118349
ctrl_AAACCTGCATGCTAGT	GP	GP	Control	0.4443853	0.27702244	0.4443853
# http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
library(ggpubr)

options(repr.plot.width=5, repr.plot.height=4)
p <- ggplot(cc.df, aes(x=lineage.sub, y=cc.score, fill=stage)) +
  geom_boxplot(position=position_dodge(1)) +
  geom_dotplot(binaxis='y', stackdir='center', position = "dodge", dotsize=0.15, binwidth=1/25, binpositions="all") +
  theme_bw() +
  labs(x = "", y = "Proliferation score\n", title = "") +
  theme(axis.text.x  = element_text(face="plain", angle=0, size = 14, color = "black", vjust=0.6),
        axis.text.y  = element_text(size = 10),
        axis.title.y = element_text(size = 14)) +
  scale_fill_manual(values=c("blue","red")) +
  stat_compare_means(aes(group = stage), label = "p.signif", label.y = 4) +
  theme(legend.title = element_blank())
p

  

封裝好的函數

ggbarplot(ToothGrowth, x = "dose", y = "len", add = "mean_se",
          color = "supp", palette = "jco", 
          position = position_dodge(0.8))+
  stat_compare_means(aes(group = supp), label = "p.signif", label.y = 29)
ggline(ToothGrowth, x = "dose", y = "len", add = "mean_se",
          color = "supp", palette = "jco")+
  stat_compare_means(aes(group = supp), label = "p.signif", 
                     label.y = c(16, 25, 29))

  

熱圖 - 最直觀

# heatmap
https://jokergoo.github.io/ComplexHeatmap-reference/book/

熱圖騷操作

聚類熱圖怎么按自己的意願調整分支的順序? 

 

平滑熱圖 - smooth heatmap 

monocle里面的一種熱圖,很多頂刊都在用,也確實很漂亮。對應函數:plot_pseudotime_heatmap

問題是不夠靈活,需要用monocle處理后才行,需要自定義一個處理函數。

 

 

小提琴圖marker - 分布

stacked violin plot for visualizing single-cell data in Seurat

參見:mouse/singleCell/case/Kif7_ENCC/Kif7-integration/integration_public_and_Kif7.ipynb 

 

Venn韋恩圖/UpSetR圖 - 交集

R繪制韋恩圖 | Venn圖 | UpSetR圖

 

Beeswarm Plot 蜂群圖 - 序列數據展開

https://github.com/eclarke/ggbeeswarm

#With different beeswarm point distribution priority
dat <- data.frame(x=rep(1:3,c(20,40,80)))
dat$y <- rnorm(nrow(dat),dat$x)
dat$z <- 1
ggplot(dat, aes(z,y)) + 
    geom_beeswarm(size=2,priority='descending', cex=3) + 
    ggtitle('Descending') + 
    scale_x_continuous(expand=expansion(add=c(0.5, 0.5)))

  

我的代碼

set.seed(49)
library(ggplot2)
library(ggbeeswarm)

pca_HSCR2$z <- 1
pca_HSCR2$pseudotime <- -pca_HSCR2$X2

options(repr.plot.width=6, repr.plot.height=4)
ggplot(pca_HSCR2, aes(x=z, y=pseudotime, fill=severity, color=severity)) + 
    geom_beeswarm(size=1.2,priority='ascending', cex=1.4) + 
    # ggtitle('ascending') + # Descending
    scale_x_continuous(expand=expansion(add=c(0.5, 0.5))) +
    coord_flip() +
    theme_void() +
    scale_color_manual(values=severity.colors)

  

基因模塊在pseudotime表達的line圖

參見:mouse/singleCell/case/Kif7_ENCC/Kif7/Kif7_basic_analysis.ipynb

 

火山圖/對角線圖 - 特殊散點圖  

參考:mouse/singleCell/case/Kif7_ENCC/Kif7-integration/Ezh2_analysis.ipynb

# prepare data
log2FC <- data.frame(gene=rownames(HSCR.DEG.log2FC.df.final), S_log2FC=S.log2FC, L_log2FC=L.log2FC)

# add color label
log2FC$color <- "none"
log2FC[log2FC$gene %in% c("HDAC1"),]$color <- "red"

# the genes want to be labeled
label.genes <- c('RAMP2', 'HEY1', 'STAMBP', 'CCNB1IP1', 'LMOD3', 'NUP107', 'HEY2', 'FOXO1', 'CRLF1', 'ZFP36L2', 'NR2F2', 'TUBB3', 
                'ZNF385A', 'TMEM14C', 'FLNA', 'TFAP2A', 'SOX11', 'HDAC1', 'GLI3', 'BCL11A')
label.df <- subset(log2FC, gene %in% label.genes)

options(repr.plot.width=4.5, repr.plot.height=5)
library(ggplot2)
library("ggrepel")
# Basic scatter plot
ggplot(log2FC, aes(x=S_log2FC, y=L_log2FC, color=color)) + # , color=coregene
    geom_hline(yintercept=0) +
    geom_vline(xintercept=0) +
    geom_abline(intercept = 0, slope = 1, color="black", linetype="dashed", size=1) +
    geom_point(size=0.5) +
    geom_point(data = label.df, size=2, color = "red") +
    theme_bw() +
    labs(x = "\nLog2FC in S-HSCR",y = "Log2FC in L-HSCR", title = "") +
    theme(legend.title=element_blank()) +
    # Change fontface. Allowed values : 1(normal), 2(bold), 3(italic), 4(bold.italic)
    geom_text_repel(data=label.df, aes(label = gene), size = 3.5, fontface=3, color="red",
                    box.padding = 0.4, max.overlaps = Inf) +
    theme(legend.position = "none", 
        axis.text  = element_text(size = 10),
        # axis.text.y  = element_text(size = 10),
        axis.title = element_text(size = 16, face="plain")) +
    scale_x_continuous(limits = c(-8, 8)) +
    scale_y_continuous(limits = c(-8, 8)) +
    scale_color_manual(values=c("grey","red"))

  

點的文本標記

geom_text_repel,基本用法

 

 

 


進階篇 - 風格統一

為什么頂刊的圖那么的賞心悅目?而自己的圖拼到一起卻那么的不和諧,都被自己丑哭了,卻不知從何下手。

 

這里有幾個教程還不錯:

 

 

 

 

 



 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM