介绍
TIDE软件计算的TIDE score由dysfunction score和exclusion score两个部分构成。dysfunction score的计算原理是,免疫失调作用的基因拥有更高的权重,再乘以表达量就能得到dysfunction score。而exclusion score是由免疫排斥的基因拥有更高的权重,再乘以表达量得到的。免疫失调的权重是用癌症的生存数据得到的,而免疫排斥的权重是用免疫排斥的样本的基因表达得到的。本文主要讲的是失调score的计算。
代码讲解
读入数据,准备矩阵
#!/usr/bin/env Rscript suppressPackageStartupMessages(library(survival)) CTL_genes = c('CD8A', 'CD8B', 'GZMA', 'GZMB', 'PRF1') # 细胞毒性T淋巴细胞相关的基因 readmat = function(mat) as.matrix(read.table(mat, sep='\t', header=T, check.names=F, quote=NULL)) writemat = function(result, output) { result = result[!is.na(result[,"p"]),,drop=F] FDR = p.adjust(result[,"p"], method="fdr") result = cbind(result, FDR) write.table(result, file=output, sep='\t', quote=F) } commands = commandArgs(trailingOnly=T) # three inputs here mat = t(readmat(commands[1])) # expression, mutation, or RPPA matrix pivot = t(readmat(commands[2])) # CD8A, CD8B, GZMA, GZMB, PRF1 expression, 作为支点 survival = readmat(commands[3]) # clinical information
筛选数据
# remove negative survival values survival = survival[survival[,1] > 0,] #去掉生存时间为负的错误信息 output = commands[4] # 取出在所有数据中都有的样本 common = Reduce(intersect, list(rownames(mat),rownames(survival),rownames(pivot))) print(paste(length(common), "samples")) # stop at low sample numbers if(length(common) < 20) stop("two few samples") # 如果样本太少则不能分析 not_included = setdiff(CTL_genes, colnames(pivot)) # CTL基因与支点的基因不能有交集 if(length(not_included) > 0) stop(paste(c("pivot genes", not_included, "are not included"), ' ')) pivot = rowMeans(pivot[common,CTL_genes]) # 支点(细胞毒性T细胞表达)的数据 mat = mat[common,,drop=F] # 所有可用样本的表达的矩阵 survival = survival[common,,drop=F] # 所有可用样本的生存信息 # stop at low death rate death_rate = sum(survival[,2])/dim(survival)[1] if(death_rate < 0.1) stop(paste("death rate", death_rate, "is too low")) # 死亡率太低的话得到的dysfunction不可靠 # split up survival and background surv = Surv(survival[,1], survival[,2]) # 生存状态和时间 if(dim(survival)[2] > 2){ #除了生存状态和时间外的各种变量 B = survival[,3:dim(survival)[2], drop=F] }else{ B = survival[,c(), drop=F] } # build up regression data space # 构建数据 B = cbind(B, pivot, pivot, pivot) B = as.data.frame(B) N_B = dim(B)[2] # 数据的列数
进行生存分析
# test pivot effect first # 得到TIL的回归系数 coxph.pivot = summary(coxph(surv~., data=B[,c(-N_B, -(N_B-1)), drop=F]))$coef write.table(coxph.pivot, file=paste0(output, ".pivot"), sep='\t', quote=F) colnames(B)[N_B-1] = "partner" colnames(B)[N_B] = "Interaction" # iterate over features # 得到所有的特征变量准备进行循环 features = colnames(mat) N = length(features) # 存放结果的矩阵 result_interaction = matrix(nrow=N, ncol=2, dimnames=list(features, c('z','p'))) result_main = matrix(nrow=N, ncol=2, dimnames=list(features, c('z','p'))) result_partial = matrix(nrow=N, ncol=2, dimnames=list(features, c('z','p'))) result_base = matrix(nrow=N, ncol=2, dimnames=list(features, c('z','p'))) # 进行循环 for (i in 1:N) { title = features[i] partner = mat[,i] B[,N_B-1] = partner # partner就是当前循环的基因的表达量 B[,N_B] = partner * pivot # partner和pivot的乘积就是interaction # region 1: model with interaction # 有interaction的模型,得到的interaction部分的z值就是dysfunction的权重大小 errflag = F coxph.fit = tryCatch(coxph(surv~., data=B), error = function(e) errflag <<- T, warning = function(w) errflag <<- T) if(!errflag) { reg.summary = summary(coxph.fit)$coef result_interaction[i,] = reg.summary["Interaction", c("z", "Pr(>|z|)")] result_main[i,] = reg.summary["partner", c("z", "Pr(>|z|)")] } # region 2: model without interaction # 没有interaction的模型 errflag = F coxph.fit = tryCatch(coxph(surv~., data=B[,-N_B]), error = function(e) errflag <<- T, warning = function(w) errflag <<- T) if(!errflag) { reg.summary = summary(coxph.fit)$coef result_partial[i,] = reg.summary["partner", c("z", "Pr(>|z|)")] } # region 3: model with only base effect # 只有其他临床信息的模型 errflag = F coxph.fit = tryCatch(coxph(surv~., data=B[,c(-N_B, -(N_B-2)), drop=F]), error = function(e) errflag <<- T, warning = function(w) errflag <<- T) if(!errflag) { reg.summary = summary(coxph.fit)$coef result_base[i,] = reg.summary["partner", c("z", "Pr(>|z|)")] } } writemat(result_interaction, paste0(output, ".interaction")) writemat(result_main, paste0(output, ".main")) writemat(result_partial, paste0(output, ".partial")) writemat(result_base, paste0(output, ".base"))
结果讲解
如上图所示, TGFB1与CTL相乘的回归系数为正值,那么它是一个跟CTL起到拮抗(antagnonistic)作用的基因,SOX10与CTL相乘的回归系数为负值,那么它是一个跟CTL起协同作用的基因。这里得到的基因的zscore就是dysfunction的权重,将权重乘以基因的表达量再求和得到的就是这个样本的dysfunction score
http://www.thecodesearch.com/2020/08/26/tide%e4%b8%addysfunction-score%e7%9a%84%e8%ae%a1%e7%ae%97/