# 字符型向量 name <- c("张三","李四") # 数值型向量 num <- c(1,2,3.4,5) # 逻辑型向量 b <- c(F,T,FALSE,TRUE) # 不能有混合类型 a <- c(1,2,T,F) # 1 2 1 0 # 不存在子向量 d <- c(1,c(2,3),c(4,5)) # 1 2 3 4 5 # 创建固定长度向量 x1 <- vector("numeric",3) # 0 0 0 x2 <- numeric(3) # 0 0 0 x3 <- character(3) # "" "" "" x4 <- logical(3) # FALSE FALSE FALSE x5 <- vector(length = 3) # FALSE FALSE FALSE
seq(from = 1,to = 10,by=2) # 1 3 5 7 9 seq(from = 10,to = 1,by=-2) # 10 8 6 4 2 seq(from = 1,to = 10,len=3) # 1.0 5.5 10.0 # 特别的,步长为1 1:5 # 1 2 3 4 5 pi:1 # 3.141593 2.141593 1.141593 1:5-1 # 0 1 2 3 4 1:(5-1) # 1 2 3 4
sample(5) # 2 4 5 1 3 sample(c('a','b','c','d')) # "d" "c" "b" "a" set.seed(2020) # 设置随机数种子 sample(5) # 5 2 4 3 1 sample(1:5,3) # 1 5 2 随机选三个 # 有放回的抽样 re_sample = sample(1:100,100,replace = TRUE) unique_re_sample = unique(re_sample) length(unique_re_sample)
score <- c(95,96,85,98,88,90) score[c(3,5)] # 85 88 score[-c(3,5)] # 95 96 98 90 反向取出 score[c(3,5)] - 90 # -5 -2 score[c(3,5)] <- score[c(3,5)] +6 score # 95 96 91 98 94 90
注意下标的特殊用法
score[] <- mean(score) # 每一个元素获得平均分 score # 94 94 94 94 94 94 score <- mean(score) # 一个数值平均分 score # 94 # 下标可以重复,顺序可以改变 name <- c("张三","李四","王五") name[c(1,1,3,2)] # "张三" "张三" "王五" "李四"
score <- c(95,96,85,98,88,90) score[-c(3,5)] # 95 96 98 90 idx <- which(score<90) # 3 5 小于90的下标 score[-idx] # 95 96 98 90
score <- c(95,96,85,98,88,90) name <- c("张三","李四","王五","刘备","曹操","张飞") score < 90 # FALSE FALSE TRUE FALSE TRUE FALSE score[score<90] # 85 88 name[score<90] # "王五" "曹操" 小于90的姓名
score <- c(95,96,85,98,88,90) xm <- c("张三","李四","王五","刘备","曹操","张飞") names(score)<- xm score # 张三 李四 王五 刘备 曹操 张飞 # 95 96 85 98 88 90 score[c("刘备","张飞")] # 刘备 张飞 # 98 90
v1 <- c(a=5,b=10,c=12,d=6) sort(v1) # a d b c # 5 6 10 12 order(v1,decreasing = TRUE) # 3 2 4 1 下标排序 v1[order(v1,decreasing = TRUE)] # c b d a # 12 10 6 5 score <- c(95,96,85,98,88,90) rev(score) # 90 88 98 85 96 95 score[length(score)] # 90 取最后一个元素 tail(score,n=1) # 90 取最后一个元素 rev(tail(score,n=3)) # 90 88 98 倒数3个元素
p0 <- c(0,0) p1 <- c(1,2) p2 <- c(2,1) # 求和 p3 <- p1+p2 # 3 3 # 数乘 p4 <- 1.5*p3 # 4.5 4.5 p1_on_p2 <- sum(p1*p2)/ sum(p1*p2)*p2 # 2 1 计算投影向量
gender <- c("male","male","female","female") typeof(gender) # [1] "character" gender # [1] "male" "male" "female" "female" gender <- factor(gender) typeof(gender) # [1] "integer" gender # [1] male male female female # Levels: female male
gender <- c("male","male","female","female") gender <- factor(gender) gender[c(1,2:3)] # [1] male male female # Levels: female male nlevels(gender) # 2 levels(gender)# [1] "female" "male" gender[1]<-"female" # 此时只能赋值"female" "male" gender # [1] female male female female # Levels: female male
定义因子
gender <- c("male","male","female","female") gender <- factor(gender,levels = c("male","female","shemale")) gender[1] <- "shemale" gender # [1] shemale male female female # Levels: male female shemale
gender <- c("male","male","female","female") gender <- factor(gender) as.numeric(gender) # [1] 2 2 1 1 as.character(gender) # [1] "male" "male" "female" "female" number_factor <- c(10,20,40,20,30,10,20) number_factor <- factor(number_factor) as.numeric(number_factor) # [1] 1 2 4 2 3 1 2 # 因子正确求平均值 mean(as.numeric(as.character(number_factor))) # [1] 21.42857 mean(as.numeric(levels(number_factor)[number_factor])) # [1] 21.42857
score <- factor(c('优','良','中','差','优','良','中')) # error score[1]<score[2] score <- factor(c('优','良','中','差','优','良','中'),ordered = TRUE) score[1] <score[2] # TEUE 默认按字母排序 y l z c score <- factor(c('优','良','中','差','优','良','中') ,ordered = TRUE ,levels = c('差','中','良','优')) score[1] <score[2] # FALSE 中 > 差
# 将百分制转为5分制 score <- c(94,87,92,91,85,92) score_factor_5 <- cut(score, breaks = c(0,(6:10)*10), include.lowest = TRUE, right = FALSE, ordered_result = TRUE, labels = c('不及格','及格','中','良','优')) score_factor_5 # [1] 优 良 优 优 良 优 # Levels: 不及格 < 及格 < 中 < 良 < 优