假阳性:实际上是,但是检测出来不是真阳性:实际上是,检测出来是
from sklearn.metrics import roc_curve, auc import numpy as np y = np.array([1, 1, 2, 2]) scores = np.array([0.1, 0.4, 0.35, 0.8]) fpr, tpr, thresholds = roc_curve(y, scores, pos_label=2) auc = auc(fpr, tpr) >>> fpr array([ 0. , 0.5, 0.5, 1. ]) >>> tpr array([ 0.5, 0.5, 1. , 1. ]) >>> thresholds array([ 0.8 , 0.4 , 0.35, 0.1 ])
可以看出,阈值thresholds就是对概率scores进行了排序(倒序)。不断改变阈值,得到ROC曲线上不同的点。步骤如下:
import matplotlib.pyplot as plt plt.figure() lw = 2 plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % auc) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic example') plt.legend(loc="lower right") plt.show()
图片alt
完整的ROC代码
from sklearn.metrics import roc_curve, auc cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1) # cv = KFold(n_splits=5, shuffle=True, random_state=1) tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) fig, ax = plt.subplots(figsize=(6, 6)) for fold, (train, test) in enumerate(cv.split(select_X, Y)): clf.fit(select_X[train], Y[train]) scores = clf.predict_proba(select_X[test]) fpr, tpr, thresholds = roc_curve(Y[test], scores[:,0], pos_label=0) roc_auc = auc(fpr, tpr) interp_tpr = np.interp(mean_fpr, fpr, tpr) interp_tpr[0] = 0.0 tprs.append(interp_tpr) aucs.append(roc_auc) ax.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)") mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) std_auc = np.std(aucs) ax.plot( mean_fpr, mean_tpr, color="b", label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc), lw=2, alpha=0.8, ) std_tpr = np.std(tprs, axis=0) tprs_upper = np.minimum(mean_tpr + std_tpr, 1) tprs_lower = np.maximum(mean_tpr - std_tpr, 0) ax.fill_between( mean_fpr, tprs_lower, tprs_upper, color="grey", alpha=0.2, label=r"$\pm$ 1 std. dev.", ) ax.set( xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], xlabel="False Positive Rate", ylabel="True Positive Rate", title=f"Mean ROC curve with variability", ) ax.axis("square") ax.legend(loc="lower right") plt.show()
options(repr.plot.width=9, repr.plot.height=9) (function(lasso){ png(filename = "COAD_ROC.png", width = 7, height = 7, res = 300, units = "in") color <- brewer.pal(8, "Dark2") plot(0,0,type="l",xlab="False postive rate",ylab="True positive rate",xlim=c(0,1),ylim=c(0,1), cex.lab =1.6,cex.axis=1.5) abline(0,1) legend_vector <- vector() color_vector <- vector() for(i in 1:5){ roc <- survivalROC(Stime=lasso@clinical$futime_year , status=lasso@clinical$fustat, marker=lasso@clinical$riskScoreNum, predict.time=i, method="KM") if(roc$AUC>0.7){ lines(roc$FP,roc$TP,col=color[i],lwd=2) legend_vector <- c(legend_vector,paste0(i,"-year surval:",round(roc$AUC,3))) color_vector <- c(color_vector,color[i]) } } legend(border=NA,0.5,0.4, legend_vector, col=color_vector, text.col=color_vector,lty=c(1,1,1),inset=.5,cex=1.3) dev.off() })(lncRNA_lasso)