SENIC的使用

本文轉載自查看原文 2020-03-06 10:35 3458 單細胞/ 轉錄組

軟件介紹

SENIC是一種同時重建基因調控網絡並從單細胞RNA-seq數據中鑒定stable cell states的工具。基於共表達和DNA模基序（motif）分析推斷基因調控網絡，然后在每個細胞中分析網絡活性以鑒定細胞狀態

https://www.nature.com/articles/nmeth.4463
參考幫助文檔：https://rawcdn.githack.com/aertslab/SCENIC/0a4c96ed8d930edd8868f07428090f9dae264705/inst/doc/SCENIC_Running.html#optional_steps:

輸入：SCENIC需要輸入的是單細胞RNA-seq表達矩陣—— 每列對應於樣品（細胞），每行對應一個基因。基因ID應該是gene-symbol並存儲為rownames （尤其是基因名字部分是為了與RcisTarget數據庫兼容）；表達數據是Gene的reads count。根據作者的測試，提供原始的或Normalized UMI count，無論是否log轉換，或使用TPM值，結果相差不大。

軟件的安裝

if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")

BiocManager::install(c("AUCell", "RcisTarget"))
BiocManager::install(c("GENIE3"))
BiocManager::install(c("zoo", "mixtools", "rbokeh"))

BiocManager::install(c("DT", "NMF", "pheatmap", "R2HTML", "Rtsne"))

BiocManager::install(c("doMC", "doRNG"))
BiocManager::install(c("SingleCellExperiment"))

if (!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools")
devtools::install_github("aertslab/SCopeLoomR", build_vignettes = TRUE)

if (!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools")
devtools::install_github("aertslab/SCENIC")
packageVersion("SCENIC")

下載評分數據庫

需要下載RcisTarget的物種特定數據庫（https://resources.aertslab.org/cistarget/）

For Human,Mouse,Fly

mm_url="https://resources.aertslab.org/cistarget/databases/mus_musculus/mm9/refseq_r45/mc9nr/gene_based/mm9-500bp-upstream-7species.mc9nr.feather"
mm_url2="https://resources.aertslab.org/cistarget/databases/mus_musculus/mm9/refseq_r45/mc9nr/gene_based/mm9-tss-centered-10kb-7species.mc9nr.feather"
hg_url="https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg19/refseq_r45/mc9nr/gene_based/hg19-500bp-upstream-7species.mc9nr.feather"
hg_url2="https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg19/refseq_r45/mc9nr/gene_based/hg19-tss-centered-10kb-7species.mc9nr.feather"
fly_url="https://resources.aertslab.org/cistarget/databases/drosophila_melanogaster/dm6/flybase_r6.02/mc8nr/gene_based/dm6-5kb-upstream-full-tx-11species.mc8nr.feather"

wget -c $mm_url
wget -c $mm_url2
wget -c $hg_url
wget -c $hg_url2
wget -c $fly_url

不同數據格式的讀入

對於loom文件

download.file("http://loom.linnarssonlab.org/clone/Previously%20Published/Cortex.loom", "Cortex.loom")
loomPath <- "Cortex.loom"

10x的輸出文件

singleCellMatrix <- Seurat::Read10X(data.dir="data/pbmc3k/filtered_gene_bc_matrices/hg19/")
cellInfo <- data.frame(seuratCluster=Idents(seuratObject))

R objects (e.g. Seurat, SingleCellExperiment)

sce <- load_as_sce(dataPath) # any SingleCellExperiment object
exprMat <- counts(sce)
cellInfo <- colData(sce)

簡單的SENIC工作流程

setwd("/media/sdb/project/20200223/SCENIC_MouseBrain")

loomPath <- system.file(package="SCENIC", "examples/mouseBrain_toy.loom")
library(SCopeLoomR)
loom <- open_loom(loomPath)
exprMat <- get_dgem(loom)
cellInfo <- get_cellAnnotation(loom)
close_loom(loom)

#查看矩陣大小
#dim(exprMat)

library(SCENIC)
#scenicOptions <- initializeScenic(org="mgi", dbDir="cisTarget_databases", nCores=10)
scenicOptions <- initializeScenic(org="mgi", dbDir="/media/sdb/project/20200223/data", nCores=10)

saveRDS(scenicOptions, file="int/scenicOptions.Rds") 

### Co-expression network
genesKept <- geneFiltering(exprMat, scenicOptions)
exprMat_filtered <- exprMat[genesKept, ]
runCorrelation(exprMat_filtered, scenicOptions)
exprMat_filtered_log <- log2(exprMat_filtered+1) 
runGenie3(exprMat_filtered_log, scenicOptions)

### Build and score the GRN
exprMat_log <- log2(exprMat+1)
scenicOptions@settings$dbs <- scenicOptions@settings$dbs["10kb"] # Toy run settings
runSCENIC_1_coexNetwork2modules(scenicOptions)
runSCENIC_2_createRegulons(scenicOptions, coexMethod=c("top5perTarget")) # Toy run settings
runSCENIC_3_scoreCells(scenicOptions, exprMat_log)

# Export: 運行這個時可能報錯
#export2scope(scenicOptions, exprMat)

# Binarize activity?
# aucellApp <- plotTsne_AUCellApp(scenicOptions, exprMat_log)
# savedSelections <- shiny::runApp(aucellApp)
# newThresholds <- savedSelections$thresholds
# scenicOptions@fileNames$int["aucell_thresholds",1] <- "int/newThresholds.Rds"
# saveRDS(newThresholds, file=getIntName(scenicOptions, "aucell_thresholds"))
# saveRDS(scenicOptions, file="int/scenicOptions.Rds")
runSCENIC_4_aucell_binarize(scenicOptions)

### Exploring output 
# Check files in folder 'output'
# .loom file @ http://scope.aertslab.org

# output/Step2_MotifEnrichment_preview.html in detail/subset:
motifEnrichment_selfMotifs_wGenes <- loadInt(scenicOptions, "motifEnrichment_selfMotifs_wGenes")
tableSubset <- motifEnrichment_selfMotifs_wGenes[highlightedTFs=="Sox8"]
viewMotifs(tableSubset) 

# output/Step2_regulonTargetsInfo.tsv in detail: 
regulonTargetsInfo <- loadInt(scenicOptions, "regulonTargetsInfo")
tableSubset <- regulonTargetsInfo[TF=="Stat6" & highConfAnnot==TRUE]
viewMotifs(tableSubset)

運行SENIC

建立基因調控網絡（Gene Regulation Network，GRN）：

基於共表達識別每個轉錄因子TF的潛在靶標。過濾表達矩陣並運行GENIE3或者GRNBoost，它們是利用表達矩陣推斷基因調控網絡的一種算法，能得到轉錄因子和潛在靶標的相關性網絡；將目標從GENIE3或者GRNBoost格式轉為共表達模塊。
根據DNA模序分析（motif）選擇潛在的直接結合靶標（調節因子）（利用RcisTarget包：TF基序分析）

確定細胞狀態及其調節因子：
3. 分析每個細胞中的網絡活性（AUCell）在細胞中評分調節子（計算AUC）

SCENIC完整流程

導入數據

loomPath <- system.file(package="SCENIC", "examples/mouseBrain_toy.loom")
library(SCopeLoomR)
loom <- open_loom(loomPath) #mode='r' 如果報錯可以加上
exprMat <- get_dgem(loom)
cellInfo <- get_cellAnnotation(loom)
close_loom(loom)

Initialize settings 初始設置，導入評分數據庫

library(SCENIC)
#先下載數據庫，org用來選擇物種，這里選擇的是小鼠
scenicOptions <- initializeScenic(org="mgi", dbDir="cisTarget_databases", nCores=10)
# scenicOptions@inputDatasetInfo$cellInfo <- "int/cellInfo.Rds"
saveRDS(scenicOptions, file="int/scenicOptions.Rds")

共表達網絡

根據已有的表達矩陣推斷潛在的轉錄因子靶標，使用GENIE3或GRNBoost。首先需要進行基因的過濾。

genesKept <- geneFiltering(exprMat, scenicOptions=scenicOptions,
                           minCountsPerGene=3*.01*ncol(exprMat),
                           minSamples=ncol(exprMat)*.01)

過濾表達矩陣，保留只有過濾后的基因

exprMat_filtered <- exprMat[genesKept, ]

計算相關性，這一步時間會比較長

runCorrelation(exprMat_filtered, scenicOptions)
exprMat_filtered_log <- log2(exprMat_filtered+1)
runGenie3(exprMat_filtered_log, scenicOptions)

Build and score the GRN 構建並給基因調控網絡（GRN）打分

exprMat_log <- log2(exprMat+1)
scenicOptions@settings$dbs <- scenicOptions@settings$dbs["10kb"] # Toy run settings
runSCENIC_1_coexNetwork2modules(scenicOptions)
runSCENIC_2_createRegulons(scenicOptions, coexMethod=c("top5perTarget")) # Toy run settings
runSCENIC_3_scoreCells(scenicOptions, exprMat_log)

輸入表達矩陣

在本教程中，我們提供了一個示例，樣本是小鼠大腦的200個細胞和862個基因：

loomPath <- system.file(package="SCENIC", "examples/mouseBrain_toy.loom")

打開loom文件並加載表達矩陣；

library(SCopeLoomR)
loom <- open_loom(loomPath, mode="r")
exprMat <- get_dgem(loom)
cellInfo <- get_cellAnnotation(loom)
close_loom(loom)
#dim(exprMat)

細胞信息/表型

# cellInfo$nGene <- colSums(exprMat>0)
head(cellInfo)

cellInfo <- data.frame(cellInfo)
cellTypeColumn <- "Class"
colnames(cellInfo)[which(colnames(cellInfo)==cellTypeColumn)] <- "CellType"
cbind(table(cellInfo$CellType))

saveRDS(cellInfo, file="int/cellInfo.Rds")

# Color to assign to the variables (same format as for NMF::aheatmap)
colVars <- list(CellType=c("microglia"="forestgreen",
                            "endothelial-mural"="darkorange",
                            "astrocytes_ependymal"="magenta4", 
                            "oligodendrocytes"="hotpink", 
                            "interneurons"="red3",
                            "pyramidal CA1"="skyblue",
                            "pyramida SS"="darkblue"
                            ))
colVars$CellType <- colVars$CellType[intersect(names(colVars$CellType), cellInfo$CellType)]
saveRDS(colVars, file="int/colVars.Rds")
plot.new()
legend(0,1, fill=colVars$CellType, legend=names(colVars$CellType))

初始化SCENIC設置

為了在SCENIC的多個步驟中保持設置一致，SCENIC包中的大多數函數使用一個公共對象，該對象存儲當前運行的選項並代替大多數函數的“參數”。比如下面的org，dbDir等，可以在開始就將物種rog（mgi—— mouse， hgnc —— human， dmel —— fly）和RcisTarge數據庫位置分別讀給對象org，dbDir，之后統一用函數initializeScenic得到對象scenicOptions。具體參數設置可以用?initializeScenichelp一下。

library(SCENIC)
org="mgi" # or hgnc, or 
dmeldbDir="cisTarget_databases" # RcisTarget databases location
myDatasetTitle="SCENIC example on Mouse brain" # choose a name for your analysis
data(defaultDbNames)
dbs <- defaultDbNames[[org]]
scenicOptions <- initializeScenic(org=org, dbDir=dbDir, dbs=dbs, datasetTitle=myDatasetTitle, nCores=10)

# 如果有需要就修改這個地方
scenicOptions@inputDatasetInfo$cellInfo <- "int/cellInfo.Rds
"scenicOptions@inputDatasetInfo$colVars <- "int/colVars.Rds"
# Databases:
# scenicOptions@settings$dbs <- c("mm9-5kb-mc8nr"="mm9-tss-centered-5kb-10species.mc8nr.feather")
# scenicOptions@settings$db_mcVersion <- "v8"
# Save to use at a later time...
saveRDS(scenicOptions, file="int/scenicOptions.Rds")

共表達網絡

SCENIC工作流程的第一步是根據表達數據推斷潛在的轉錄因子靶標。為此，我們使用GENIE3或GRNBoost，輸入文件是表達矩陣（過濾后的）和轉錄因子列表。GENIE3/GRBBoost的輸出結果和相關矩陣將用於創建共表達模塊（runSCENIC_1_coexNetwork2modules（））。

基因過濾/選擇

按每個基因的reads總數進行過濾。該filter旨在去除最可能是噪音的基因。默認情況下，它（minCountsPerGene）保留所有樣品中至少帶有6個UMI reads的基因（例如，如果在1％的細胞中以3的值表達，則基因將具有的總數）。
通過基因的細胞數來實現過濾（例如 UMI > 0 ，或log 2（TPM）> 1 ）。默認情況下(minSamples)，保留下來的基因能在至少1％的細胞中檢測得到。
最后，只保留RcisTarget數據庫中可用的基因。

# (Adjust minimum values according to your dataset)
genesKept <- geneFiltering(exprMat, scenicOptions=scenicOptions, 
                            minCountsPerGene=3*.01*ncol(exprMat),
                            minSamples=ncol(exprMat)*.01)

在進行網絡推斷之前，檢查是否有任何已知的相關基因被過濾掉（如果缺少任何相關基因，請仔細檢查filter設置是否合適）：

interestingGenes <- c("Sox9", "Sox10", "Dlx5")
interestingGenes[which(!interestingGenes %in% genesKept)]

運行`GENIE3`得到潛在轉錄因子TF

## If launched in a new session, you will need to reload...
# setwd("...")
# loomPath <- "..."
# loom <- open_loom(loomPath, mode="r")
# exprMat <- get_dgem(loom)
# close_loom(loom)
# genesKept <- loadInt(scenicOptions, "genesKept")
# exprMat_filtered <- exprMat[genesKept,]
# library(SCENIC)
# scenicOptions <- readRDS("int/scenicOptions.Rds")
# Optional: add log (if it is not logged/normalized already)
exprMat_filtered <- log2(exprMat_filtered+1)
# Run GENIE3
runGenie3(exprMat_filtered, scenicOptions)

構建並評分GRN（runSCENIC_ …）

必要時重新加載表達式矩陣：

loom <- open_loom(loomPath, mode="r")
exprMat <- get_dgem(loom)
close_loom(loom)
# Optional: log expression (for TF expression plot, it does not affect any other calculation)
logMat <- log2(exprMat+1)
dim(exprMat)

使用wrapper函數運行其余步驟：

library(SCENIC)
scenicOptions <- readRDS("int/scenicOptions.Rds")
scenicOptions@settings$verbose <- TRUE
scenicOptions@settings$nCores <- 10
scenicOptions@settings$seed <- 123
# For a very quick run:
# coexMethod=c("top5perTarget")
scenicOptions@settings$dbs <- scenicOptions@settings$dbs["10kb"] # For toy run
# save...
runSCENIC_1_coexNetwork2modules(scenicOptions)
runSCENIC_2_createRegulons(scenicOptions, coexMethod=c("top5perTarget")) #** Only for toy run!! 只用於測試數據
runSCENIC_3_scoreCells(scenicOptions, logMat)

可選步驟

將network activity轉換成ON/OFF（二進制）格式

nPcs <- c(5) # For toy dataset
# nPcs <- c(5,15,50)

scenicOptions@settings$seed <- 123 # same seed for all of them
#使用不同的參數運行t-SNE
fileNames <- tsneAUC(scenicOptions, aucType="AUC", nPcs=nPcs, perpl=c(5,15,50))
fileNames <- tsneAUC(scenicOptions, aucType="AUC", nPcs=nPcs, perpl=c(5,15,50), onlyHighConf=TRUE, filePrefix="int/tSNE_oHC")
# 畫圖 (individual files in int/):
fileNames <- paste0("int/",grep(".Rds", grep("tSNE_", list.files("int"), value=T), value=T))

par(mfrow=c(length(nPcs), 3))
fileNames <- paste0("int/",grep(".Rds", grep("tSNE_AUC", list.files("int"), value=T, perl = T), value=T))
plotTsne_compareSettings(fileNames, scenicOptions, showLegend=FALSE, varName="CellType", cex=.5)

# Using only "high-confidence" regulons (normally similar)
par(mfrow=c(3,3))
fileNames <- paste0("int/",grep(".Rds", grep("tSNE_oHC_AUC", list.files("int"), value=T, perl = T), value=T))
plotTsne_compareSettings(fileNames, scenicOptions, showLegend=FALSE, varName="CellType", cex=.5)

輸出到 loom/SCope

SCENIC生成的結果既能在http://scope.aertslab.org查看，還能用函數export2scope()（需要SCopeLoomR包）保存成.loom文件。

# DGEM (Digital gene expression matrix)
# (non-normalized counts)
# exprMat <- get_dgem(open_loom(loomPath))
# dgem <- exprMat
# head(colnames(dgem))  #should contain the Cell ID/name
# Export:
scenicOptions@fileNames$output["loomFile",] <- "output/mouseBrain_SCENIC.loom"
export2scope(scenicOptions, exprMat)

加載.loom文件中的結果

SCopeLoomR中也有函數可以導入.loom文件中的內容，比如調節因子，AUC和封裝內容（比如regulon activity的t-SNE和UMAP結果）。


library(SCopeLoomR)
scenicLoomPath <- getOutName(scenicOptions, "loomFile")
loom <- open_loom(scenicLoomPath)
# Read information from loom file:
regulons_incidMat <- get_regulons(loom)
regulons <- regulonsToGeneLists(regulons_incidMat)
regulonsAUC <- get_regulonsAuc(loom)
regulonsAucThresholds <- get_regulonThresholds(loom)
embeddings <- get_embeddings(loom)

解讀結果

1. 細胞狀態

AUCell提供跨細胞的調節子的活性,AUCell使用“Area under Curve 曲線下面積”（AUC）來計算輸入基因集的關鍵子集是否在每個細胞的表達基因中富集。通過該調節子活性（連續或二進制AUC矩陣）來聚類細胞，我們可以看出是否存在傾向於具有相同調節子活性的細胞群，並揭示在多個細胞中反復發生的網絡狀態。這些狀態等同於網絡的吸引子狀態。將這些聚類與不同的可視化方法相結合，我們可以探索細胞狀態與特定調節子的關聯。

將AUC和TF表達投射到t-SNE上

logMat <- exprMat # Better if it is logged/normalized
aucellApp <- plotTsne_AUCellApp(scenicOptions, logMat) # default t-SNE
savedSelections <- shiny::runApp(aucellApp)

print(tsneFileName(scenicOptions))


tSNE_scenic <- readRDS(tsneFileName(scenicOptions))
aucell_regulonAUC <- loadInt(scenicOptions, "aucell_regulonAUC")
# Show TF expression:
par(mfrow=c(2,3))
AUCell::AUCell_plotTSNE(tSNE_scenic$Y, exprMat, aucell_regulonAUC[onlyNonDuplicatedExtended(rownames(aucell_regulonAUC))[c("Dlx5", "Sox10", "Sox9","Irf1", "Stat6")],], plots="Expression")

# 保存AUC圖片:
Cairo::CairoPDF("output/Step4_BinaryRegulonActivity_tSNE_colByAUC.pdf", width=20, height=15)
par(mfrow=c(4,6))
AUCell::AUCell_plotTSNE(tSNE_scenic$Y, cellsAUC=aucell_regulonAUC, plots="AUC")
dev.off()

library(KernSmooth)

library(RColorBrewer)
dens2d <- bkde2D(tSNE_scenic$Y, 1)$fhat
image(dens2d, col=brewer.pal(9, "YlOrBr"), axes=FALSE)
contour(dens2d, add=TRUE, nlevels=5, drawlabels=FALSE)

#par(bg = "black")
par(mfrow=c(1,2))
regulonNames <- c( "Dlx5","Sox10")
cellCol <- plotTsne_rgb(scenicOptions, regulonNames, aucType="AUC", aucMaxContrast=0.6)
text(0, 10, attr(cellCol,"red"), col="red", cex=.7, pos=4)
text(-20,-10, attr(cellCol,"green"), col="green3", cex=.7, pos=4)
regulonNames <- list(red=c("Sox10", "Sox8"),
                     green=c("Irf1"),
                     blue=c( "Tef"))
cellCol <- plotTsne_rgb(scenicOptions, regulonNames, aucType="Binary")
text(5, 15, attr(cellCol,"red"), col="red", cex=.7, pos=4)
text(5, 15-4, attr(cellCol,"green"), col="green3", cex=.7, pos=4)
text(5, 15-8, attr(cellCol,"blue"), col="blue", cex=.7, pos=4)

GRN：Regulon靶標和模序

regulons <- loadInt(scenicOptions, "regulons")
regulons[c("Dlx5", "Irf1")]

regulons <- loadInt(scenicOptions, "aucell_regulons")
head(cbind(onlyNonDuplicatedExtended(names(regulons))))

regulonTargetsInfo <- loadInt(scenicOptions, "regulonTargetsInfo")
tableSubset <- regulonTargetsInfo[TF=="Stat6" & highConfAnnot==TRUE]
viewMotifs(tableSubset)

2. 細胞群的調控因子

regulonAUC <- loadInt(scenicOptions, "aucell_regulonAUC")
regulonAUC <- regulonAUC[onlyNonDuplicatedExtended(rownames(regulonAUC)),]
regulonActivity_byCellType <- sapply(split(rownames(cellInfo), cellInfo$CellType),function(cells) rowMeans(getAUC(regulonAUC)[,cells]))
regulonActivity_byCellType_Scaled <- t(scale(t(regulonActivity_byCellType), center = T, scale=T))
pheatmap::pheatmap(regulonActivity_byCellType_Scaled, color=colorRampPalette(c("blue","white","red"))(100), breaks=seq(-3, 3, length.out = 100),treeheight_row=10, treeheight_col=10, border_color=NA)

# filename="regulonActivity_byCellType.pdf", width=10, height=20)
topRegulators <- reshape2::melt(regulonActivity_byCellType_Scaled)
colnames(topRegulators) <- c("Regulon", "CellType", "RelativeActivity")
topRegulators <- topRegulators[which(topRegulators$RelativeActivity>0),]
viewTable(topRegulators)

minPerc <- .7
binaryRegulonActivity <- loadInt(scenicOptions, "aucell_binary_nonDupl")
cellInfo_binarizedCells <- cellInfo[which(rownames(cellInfo)%in% colnames(binaryRegulonActivity)),, drop=FALSE]
regulonActivity_byCellType_Binarized <- sapply(split(rownames(cellInfo_binarizedCells), cellInfo_binarizedCells$CellType),function(cells) rowMeans(binaryRegulonActivity[,cells, drop=FALSE]))
binaryActPerc_subset <- regulonActivity_byCellType_Binarized[which(rowSums(regulonActivity_byCellType_Binarized>minPerc)>0),]
pheatmap::pheatmap(binaryActPerc_subset,color = colorRampPalette(c("white","pink","red"))(100), breaks=seq(0, 1, length.out = 100),treeheight_row=10, treeheight_col=10, border_color=NA)

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 Quartz的使用和springTask的使用工具使用——Hydra使用 Afinal的使用（一）：FinalActivity的使用為什么使用Nginx & Nginx的使用 token使用原理及使用 arrayList使用與 foreach 使用 Ubuntu 使用Git 使用 phpQuery的使用 puppteer的使用 confluence的使用

SENIC的使用

軟件介紹

軟件的安裝

下載評分數據庫

不同數據格式的讀入

簡單的SENIC工作流程

運行SENIC

SCENIC完整流程

導入數據

Initialize settings 初始設置，導入評分數據庫

共表達網絡

Build and score the GRN 構建並給基因調控網絡（GRN）打分

輸入表達矩陣

初始化SCENIC設置

共表達網絡

基因過濾/選擇

相關性

運行GENIE3得到潛在轉錄因子TF

構建並評分GRN（runSCENIC_ …）

可選步驟

解讀結果

1. 細胞狀態

2. 細胞群的調控因子

免責聲明！

運行`GENIE3`得到潛在轉錄因子TF