这种可视化方法在GO富集分析中是相当科学和常用的。
这种可视化方法的科学标准体现在:
推荐的图例设计:
这种可视化方法在生物信息学顶级期刊中非常常见,如Nature系列、Genome Research等。
library(ggplot2)
library(dplyr)
library(tidyr)
# 读取数据
go3 = read.csv("tmp.csv", header = TRUE, sep = ",")
# 提取GeneRatio的数值部分
go3$GeneRatioNumeric <- as.numeric(sapply(go3$GeneRatio, function(x) {
parts <- strsplit(as.character(x), "/")[[1]]
as.numeric(parts[1]) / as.numeric(parts[2])
}))
# 提取前20个最显著的结果(已经按p值排序)
top_results <- head(go3, 20)
# 定义形状映射
ontology_shapes <- c("BP" = 16, # 圆形
"CC" = 17, # 三角形
"MF" = 15) # 正方形
# 直接生成PNG图片
png("tmp.png", width=1800, height=900, res=120, bg="white")
# 创建气泡图
ggplot(top_results, aes(x=reorder(Description, -log10(p.adjust)),
y=-log10(p.adjust),
size=Count,
fill=GeneRatioNumeric,
shape=ONTOLOGY)) +
geom_point(alpha=0.7, color="black") + # 气泡
coord_flip() + # 翻转坐标轴使描述文本易于阅读
scale_shape_manual(values = ontology_shapes,
name="Ontology",
labels=c(
"BP"="Biological Process",
"CC"="Cellular Component",
"MF"="Molecular Function"
)) +
scale_size(range = c(3, 15), name="Gene Count") + # 控制气泡大小
scale_fill_gradient(low = "lightblue", high = "darkblue", name = "Gene Ratio") +
theme_bw() + # 使用清爽的白色背景
theme(
text = element_text(size=14),
axis.title.y = element_blank(), # 移除Y轴标题(实际是翻转后的X轴)
axis.text.y = element_text(size=12),
axis.title.x = element_text(size=14, face="bold"),
legend.title = element_text(size=14),
legend.text = element_text(size=12),
plot.title = element_text(size=16, hjust=0.5)
) +
labs(
title = "GO Enrichment Analysis",
x = NULL,
y = "-log10(adjusted p-value)"
)
dev.off()
# 打印额外信息
cat("Visualization details:\n")
cat("- X-axis: -log10(adjusted p-value)\n")
cat("- Bubble size: Gene Count\n")
cat("- Bubble fill: Gene Ratio\n")
cat("- Bubble shape: Ontology type\n")