介紹
TIDE軟件計算的TIDE score由dysfunction score和exclusion score兩個部分構成。dysfunction score的計算原理是,免疫失調作用的基因擁有更高的權重,再乘以表達量就能得到dysfunction score。而exclusion score是由免疫排斥的基因擁有更高的權重,再乘以表達量得到的。免疫失調的權重是用癌症的生存數據得到的,而免疫排斥的權重是用免疫排斥的樣本的基因表達得到的。本文主要講的是失調score的計算。
代碼講解
讀入數據,准備矩陣
#!/usr/bin/env Rscript suppressPackageStartupMessages(library(survival)) CTL_genes = c('CD8A', 'CD8B', 'GZMA', 'GZMB', 'PRF1') # 細胞毒性T淋巴細胞相關的基因 readmat = function(mat) as.matrix(read.table(mat, sep='\t', header=T, check.names=F, quote=NULL)) writemat = function(result, output) { result = result[!is.na(result[,"p"]),,drop=F] FDR = p.adjust(result[,"p"], method="fdr") result = cbind(result, FDR) write.table(result, file=output, sep='\t', quote=F) } commands = commandArgs(trailingOnly=T) # three inputs here mat = t(readmat(commands[1])) # expression, mutation, or RPPA matrix pivot = t(readmat(commands[2])) # CD8A, CD8B, GZMA, GZMB, PRF1 expression, 作為支點 survival = readmat(commands[3]) # clinical information
篩選數據
# remove negative survival values survival = survival[survival[,1] > 0,] #去掉生存時間為負的錯誤信息 output = commands[4] # 取出在所有數據中都有的樣本 common = Reduce(intersect, list(rownames(mat),rownames(survival),rownames(pivot))) print(paste(length(common), "samples")) # stop at low sample numbers if(length(common) < 20) stop("two few samples") # 如果樣本太少則不能分析 not_included = setdiff(CTL_genes, colnames(pivot)) # CTL基因與支點的基因不能有交集 if(length(not_included) > 0) stop(paste(c("pivot genes", not_included, "are not included"), ' ')) pivot = rowMeans(pivot[common,CTL_genes]) # 支點(細胞毒性T細胞表達)的數據 mat = mat[common,,drop=F] # 所有可用樣本的表達的矩陣 survival = survival[common,,drop=F] # 所有可用樣本的生存信息 # stop at low death rate death_rate = sum(survival[,2])/dim(survival)[1] if(death_rate < 0.1) stop(paste("death rate", death_rate, "is too low")) # 死亡率太低的話得到的dysfunction不可靠 # split up survival and background surv = Surv(survival[,1], survival[,2]) # 生存狀態和時間 if(dim(survival)[2] > 2){ #除了生存狀態和時間外的各種變量 B = survival[,3:dim(survival)[2], drop=F] }else{ B = survival[,c(), drop=F] } # build up regression data space # 構建數據 B = cbind(B, pivot, pivot, pivot) B = as.data.frame(B) N_B = dim(B)[2] # 數據的列數
進行生存分析
# test pivot effect first # 得到TIL的回歸系數 coxph.pivot = summary(coxph(surv~., data=B[,c(-N_B, -(N_B-1)), drop=F]))$coef write.table(coxph.pivot, file=paste0(output, ".pivot"), sep='\t', quote=F) colnames(B)[N_B-1] = "partner" colnames(B)[N_B] = "Interaction" # iterate over features # 得到所有的特征變量准備進行循環 features = colnames(mat) N = length(features) # 存放結果的矩陣 result_interaction = matrix(nrow=N, ncol=2, dimnames=list(features, c('z','p'))) result_main = matrix(nrow=N, ncol=2, dimnames=list(features, c('z','p'))) result_partial = matrix(nrow=N, ncol=2, dimnames=list(features, c('z','p'))) result_base = matrix(nrow=N, ncol=2, dimnames=list(features, c('z','p'))) # 進行循環 for (i in 1:N) { title = features[i] partner = mat[,i] B[,N_B-1] = partner # partner就是當前循環的基因的表達量 B[,N_B] = partner * pivot # partner和pivot的乘積就是interaction # region 1: model with interaction # 有interaction的模型,得到的interaction部分的z值就是dysfunction的權重大小 errflag = F coxph.fit = tryCatch(coxph(surv~., data=B), error = function(e) errflag <<- T, warning = function(w) errflag <<- T) if(!errflag) { reg.summary = summary(coxph.fit)$coef result_interaction[i,] = reg.summary["Interaction", c("z", "Pr(>|z|)")] result_main[i,] = reg.summary["partner", c("z", "Pr(>|z|)")] } # region 2: model without interaction # 沒有interaction的模型 errflag = F coxph.fit = tryCatch(coxph(surv~., data=B[,-N_B]), error = function(e) errflag <<- T, warning = function(w) errflag <<- T) if(!errflag) { reg.summary = summary(coxph.fit)$coef result_partial[i,] = reg.summary["partner", c("z", "Pr(>|z|)")] } # region 3: model with only base effect # 只有其他臨床信息的模型 errflag = F coxph.fit = tryCatch(coxph(surv~., data=B[,c(-N_B, -(N_B-2)), drop=F]), error = function(e) errflag <<- T, warning = function(w) errflag <<- T) if(!errflag) { reg.summary = summary(coxph.fit)$coef result_base[i,] = reg.summary["partner", c("z", "Pr(>|z|)")] } } writemat(result_interaction, paste0(output, ".interaction")) writemat(result_main, paste0(output, ".main")) writemat(result_partial, paste0(output, ".partial")) writemat(result_base, paste0(output, ".base"))
結果講解
如上圖所示, TGFB1與CTL相乘的回歸系數為正值,那么它是一個跟CTL起到拮抗(antagnonistic)作用的基因,SOX10與CTL相乘的回歸系數為負值,那么它是一個跟CTL起協同作用的基因。這里得到的基因的zscore就是dysfunction的權重,將權重乘以基因的表達量再求和得到的就是這個樣本的dysfunction score
http://www.thecodesearch.com/2020/08/26/tide%e4%b8%addysfunction-score%e7%9a%84%e8%ae%a1%e7%ae%97/