图片alt
SAMD11 NOC2L KLHL17 PLEKHN1 GSM5576716 3.013174 6.577959 5.494301 1.7501401 GSM5576717 1.133067 6.177461 4.656629 0.1330668 GSM5576718 3.588339 6.600312 5.488803 3.5883389 GSM5576719 1.267004 5.821593 4.910860 0.2670040
cv_kfold <- function (data,k=10,seed=2022){ n_row <- nrow(data) n_foldmarkers <- rep(1:k, ceiling(n_row/k))[1:n_row] set.seed(seed) n_foldmarkers <- sample(n_foldmarkers) k_fold <- lapply(1:k,function (i){ (1:n_row)[n_foldmarkers==i] }) return(k_fold) }
[[1]] [1] 2 18 31 36 42 48 57 81 82 86 88 102 [[2]] [1] 10 20 23 29 33 34 49 56 60 62 96 97 .... [[10]] [1] 1 25 27 32 47 72 76 79 80 110 112
sp <- Sys.time() cat(as.character(sp),"\n") kfolds <- cv_kfold(input) for (i in 1:length(kfolds)){ curr_fold <- kfolds[[i]] train_set <- input[-curr_fold,] test_set <- input[curr_fold,] predicted_train <- kknn(group~., train=train_set, test=train_set, k=best_k, kernel = best_kernel)$fit imetrics("kknn","Train",predicted_train,train_set$group) predicted_test <- kknn(group~., train=train_set, test=test_set, k=best_k, kernel = best_kernel)$fit imetrics("kknn","test",predicted_train,train_set$group) } ep <- Sys.time() cat(as.character(ep),"\n") difftime(ep,sp,units = "secs")
类别不均衡:查全率、查准率;类别相对均衡:错误率、正确率
global_performance <- NULL imetrics <- function (method,type,predicted,actual){ con_table <- table(predicted,actual) cur_one <- data.frame(method=method, type=type, accuray=sum(diag(con_table)) /sum(con_table), error_rate=1-accuray) assign("global_performance", rbind(get("global_performance",envir = .GlobalEnv), cur_one), envir = .GlobalEnv) }