https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/2_BasicModels/nearest_neighbor.py
import tensorflow as tf import numpy as np print(tf.__version__) print(np.__version__)
from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
Xtrain, Ytrain = mnist.train.next_batch(5000) #从数据集中选取5000个样本作为训练集 Xtest, Ytest = mnist.test.next_batch(200) #从数据集中选取200个样本作为测试集
# 输入占位符 xtr = tf.placeholder("float", [None, 784]) xte = tf.placeholder("float", [784]) # 计算L1距离 distance = tf.reduce_sum(tf.abs(tf.add(xtr, tf.negative(xte))), reduction_indices=1) # 获取最小距离的索引 pred = tf.arg_min(distance, 0) # np.argmin(sess.run(distance, feed_dict={xtr: Xtrain, xte: Xtest[i, :]}))
# 初始化变量 init = tf.global_variables_initializer() #分类精确度 accuracy = 0. # 运行会话,训练模型 with tf.Session() as sess: # 运行初始化 sess.run(init) # 遍历测试数据 for i in range(len(Xtest)): # 获取当前样本的最近邻索引 nn_index = sess.run(pred, feed_dict={xtr: Xtrain, xte: Xtest[i, :]}) #向占位符传入训练数据 # print("Test", i, "Prediction:", Ytrain[nn_index],"True Class:", Ytest[i]) # print(np.argmin(sess.run(distance, feed_dict={xtr: Xtrain, xte: Xtest[i, :]})),nn_index) # 最近邻分类标签与真实标签比较 if i%50==0: print("Test", i, "Prediction:", np.argmax(Ytrain[nn_index]),"True Class:", np.argmax(Ytest[i]))# argmax最大值的下标 # 计算精确度 if np.argmax(Ytrain[nn_index]) == np.argmax(Ytest[i]): accuracy += 1./len(Xtest) print("Accuracy:", accuracy)
模拟数据下载
import pandas as pd df = pd.read_csv("data/cjb.csv")
train_set = np.random.choice(len(df), 500, replace=False) Xtrain = np.array(df.iloc[train_set,3:12]) Ytrain = np.array(df.iloc[train_set,[12]]) Xtest = np.array(df.iloc[-train_set,3:12]) Ytest = np.array(df.iloc[-train_set,[12]])
# 输入占位符 xtr = tf.placeholder("float", [None, 9]) # 不规定有多少sample,每个sample有9个属性 xte = tf.placeholder("float", [9]) # 计算L1距离 distance = tf.reduce_sum(tf.abs(tf.add(xtr, tf.negative(xte))), reduction_indices=1) # 获取最小距离的索引 pred = tf.arg_min(distance, 0) # np.argmin(sess.run(distance, feed_dict={xtr: Xtrain, xte: Xtest[i, :]}))
# 初始化变量 init = tf.global_variables_initializer() #分类精确度 accuracy = 0. # 运行会话,训练模型 with tf.Session() as sess: # 运行初始化 sess.run(init) # 遍历测试数据 for i in range(len(Xtest)): # 获取当前样本的最近邻索引 nn_index = sess.run(pred, feed_dict={xtr: Xtrain, xte: Xtest[i, :]}) #向占位符传入训练数据 if i%50==0: print("Test", i, "Prediction:", Ytrain[nn_index],"True Class:", Ytest[i]) # 计算精确度 if Ytrain[nn_index] == Ytest[i]: accuracy += 1./len(Xtest) print("Accuracy:", accuracy)
library("tidyverse") library('kknn') cjb <- read.csv("data/cjb.csv") cjb1 <- cjb %>% select(4:13) %>% mutate(wlfk = factor(wlfk)) train_set_idx <- sample(nrow(cjb), 0.7 * nrow(cjb)) test_set_idx <- (1:nrow(cjb))[-train_set_idx]
imodel <- kknn(wlfk~., train = cjb[train_set_idx,], test = cjb[train_set_idx,])
predicted_train <- imodel$fit
Metrics::ce(cjb$wlfk[train_set_idx], predicted_train)