差异基因富集分析—科研工具箱

请关注公众号【叨客学习资料】 在使用网站的过程中有疑问,请来公众号进行反馈哦
############################################################
# Producer=Jin Wang (https://www.jingege.wang)
############################################################
# 分析之前,先安装Bioconductor 和 clusterProfiler,org.Hs.eg.db
# source(\"https://bioconductor.org/biocLite.R\")
# biocLite(\"org.Hs.eg.db\")
# biocLite(\"clusterProfiler\")
# biocLite(\"pathview\")
############################################################
library(clusterProfiler)
library(org.Hs.eg.db)
library(\"pathview\")

############################################################
args <- commandArgs(TRUE)
#work_dir <-  args[1]
#deg_file <- args[2]


work_dir <- \"/Users/zhangqiuxue/Documents/Train/TCGA/lab/Annotation\"
deg_file <- \"/Users/zhangqiuxue/Documents/Train/TCGA/lab/DEG/Gene/DE_genes.txt\"


# 设置工作目录
setwd(work_dir)

# 读取差异表达的文件,获得差异表达基因列表
degs = read.table(deg_file,header=T,comment.char = \"\",check.names=F)
DEG_list <- rownames(degs)
head(DEG_list)
#GO 富集
# ont:  MF,BP,CC
ego <- enrichGO(gene          = DEG_list,
                keyType       = \"ENSEMBL\",
                OrgDb         = org.Hs.eg.db,
                ont           = \"BP\",
                pAdjustMethod = \"BH\",
                pvalueCutoff  = 0.05,
                qvalueCutoff  = 0.05,
                readable      = TRUE)

write.table(ego, file = \"GO_enrichment_stat.txt\",sep=\"\\t\", row.names =F, quote = F)


ego_results<-summary(ego)
ego_results
# 绘制相关图
pdf(file = \"ego_barplot.pdf\")
barplot(ego, showCategory=20, x = \"GeneRatio\")
dev.off()

# 点图
pdf(file = \"ego_dotplot.pdf\")
dotplot(ego,showCategory=20)
dev.off()

pdf(file = \"enrich_map.pdf\")
enrichMap(ego)
dev.off()

# 绘制topGO图
pdf(file = \"topgo.pdf\")
plotGOgraph(ego)
dev.off()


# KEGG pathway 富集
gene_ids <- bitr(DEG_list, fromType=\"ENSEMBL\", toType=\"ENTREZID\", OrgDb=\"org.Hs.eg.db\")
kegg <- enrichKEGG(gene         = gene_ids$ENTREZID,
                   organism     = \'hsa\',
                   pvalueCutoff = 0.05)
head(kegg)
write.table(kegg, file = \"KEGG_enrichment_stat.txt\",sep=\"\\t\", row.names =F, quote = F)

# 点图
png(file = \"kegg_dotplot.png\")
dotplot(kegg,title=\"Enrichment KEGG_dot\")
dev.off()

# 合并表达信息和基因信息
deg_info <- degs[\'logFC\']
deg_info[\'ENSEMBL\'] <- rownames(deg_info)
gene_ids_merge <- merge(gene_ids, deg_info,by=\'ENSEMBL\')
# 去掉ENTREZID 重复的基因
index<-duplicated(gene_ids_merge$ENTREZID)
gene_ids_merge<-gene_ids_merge[!index,]

# 提取FC的值,在map上进行颜色标注
map_ids <- as.matrix(gene_ids_merge[\'logFC\'])
rownames(map_ids) <- gene_ids_merge$ENTREZID

#  绘制富集的pathway 以hsa00130 为例
pathway_id <- \"hsa00280\"
map <- pathview(gene.data  = map_ids[,1],
                     pathway.id = pathway_id,
                     species    = \"hsa\", kegg.native = TRUE)
© 版权声明
THE END
喜欢就支持一下吧
点赞0 分享
评论 抢沙发
头像
请输入有效评论哦,肆意灌水或者乱打评论是不会通过的,会影响您评论后获得资源哦~~
提交
头像

昵称

取消
昵称表情

    暂无评论内容