假阳性:实际上是,但是检测出来不是
真阳性:实际上是,检测出来是
from sklearn.metrics import roc_curve, auc
import numpy as np
y = np.array([1, 1, 2, 2])
scores = np.array([0.1, 0.4, 0.35, 0.8])
fpr, tpr, thresholds = roc_curve(y, scores, pos_label=2)
auc = auc(fpr, tpr)
>>> fpr
array([ 0. , 0.5, 0.5, 1. ])
>>> tpr
array([ 0.5, 0.5, 1. , 1. ])
>>> thresholds
array([ 0.8 , 0.4 , 0.35, 0.1 ])
可以看出,阈值thresholds就是对概率scores进行了排序(倒序)。不断改变阈值,得到ROC曲线上不同的点。步骤如下:
import matplotlib.pyplot as plt
plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',
lw=lw, label='ROC curve (area = %0.2f)' % auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()
完整的ROC代码
from sklearn.metrics import roc_curve, auc
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
# cv = KFold(n_splits=5, shuffle=True, random_state=1)
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
fig, ax = plt.subplots(figsize=(6, 6))
for fold, (train, test) in enumerate(cv.split(select_X, Y)):
clf.fit(select_X[train], Y[train])
scores = clf.predict_proba(select_X[test])
fpr, tpr, thresholds = roc_curve(Y[test], scores[:,0], pos_label=0)
roc_auc = auc(fpr, tpr)
interp_tpr = np.interp(mean_fpr, fpr, tpr)
interp_tpr[0] = 0.0
tprs.append(interp_tpr)
aucs.append(roc_auc)
ax.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
ax.plot(
mean_fpr,
mean_tpr,
color="b",
label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc),
lw=2,
alpha=0.8,
)
std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
ax.fill_between(
mean_fpr,
tprs_lower,
tprs_upper,
color="grey",
alpha=0.2,
label=r"$\pm$ 1 std. dev.",
)
ax.set(
xlim=[-0.05, 1.05],
ylim=[-0.05, 1.05],
xlabel="False Positive Rate",
ylabel="True Positive Rate",
title=f"Mean ROC curve with variability",
)
ax.axis("square")
ax.legend(loc="lower right")
plt.show()
options(repr.plot.width=9, repr.plot.height=9)
(function(lasso){
png(filename = "COAD_ROC.png", width = 7, height = 7, res = 300, units = "in")
color <- brewer.pal(8, "Dark2")
plot(0,0,type="l",xlab="False postive rate",ylab="True positive rate",xlim=c(0,1),ylim=c(0,1), cex.lab =1.6,cex.axis=1.5)
abline(0,1)
legend_vector <- vector()
color_vector <- vector()
for(i in 1:5){
roc <- survivalROC(Stime=lasso@clinical$futime_year ,
status=lasso@clinical$fustat,
marker=lasso@clinical$riskScoreNum,
predict.time=i,
method="KM")
if(roc$AUC>0.7){
lines(roc$FP,roc$TP,col=color[i],lwd=2)
legend_vector <- c(legend_vector,paste0(i,"-year surval:",round(roc$AUC,3)))
color_vector <- c(color_vector,color[i])
}
}
legend(border=NA,0.5,0.4,
legend_vector,
col=color_vector,
text.col=color_vector,lty=c(1,1,1),inset=.5,cex=1.3)
dev.off()
})(lncRNA_lasso)