R 기초; 서브셋
# 자동차 데이터 description
str(mtcars)
# mtcars 데이터의 mpg 열 데이터 불러오기
mtcars$mpg
mtcars[["mpg"]]
mtcars[[1]]
[1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8
[12] 16.4 17.3 15.2 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5
[23] 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7 15.0 21.4
# 데이터 프레임 형식으로 1열 4열 데이터 불러오기
mtcars[c(1, 4)]
mpg hp
Mazda RX4 21.0 110
Mazda RX4 Wag 21.0 110
Datsun 710 22.8 93
Hornet 4 Drive 21.4 110
Hornet Sportabout 18.7 175
mtcars[c("mpg", "hp)]
Mazda RX4 21.0 110
Mazda RX4 Wag 21.0 110
Datsun 710 22.8 93
Hornet 4 Drive 21.4 110
Hornet Sportabout 18.7 175
# 해당 열 제거하기. 1, 4, 6 열만 불러오기.
mtcars[-c(2, 3, 5, 7:11)]
mpg hp wt
Mazda RX4 21.0 110 2.620
Mazda RX4 Wag 21.0 110 2.875
Datsun 710 22.8 93 2.320
Hornet 4 Drive 21.4 110 3.215
# mpg 열을 제외하고 출력하기
mtcars[-1] # 1열을 제외한 데이터
mtcars[1] <- NULL # 1열에 NULL 대입함으로써 삭제
mtcars
# 에러 음수와 양수를 같이 못 쓴다.
mtcars[c(-1, 2)]
str(iris)
iris[1:5, ] # 1행-5행 출력
iris[, c("Sepal.Length", "Sepal.Width")]
iris[, "Sepal.Length"]
[1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3
[15] 5.8 5.7 5.4 5.1 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0 5.2
[29] 5.2 4.7 4.8 5.4 5.2 5.5 4.9 5.0 5.5 4.9 4.4 5.1 5.0 4.5
[43] 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0 6.4 6.9 5.5 6.5 5.7
iris[, "Sepal.Length", drop=FALSE]
Sepal.Length
1 5.1
2 4.9
3 4.7
4 4.6
5 5.0
iris["Sepal.Length"]
Sepal.Length
1 5.1
2 4.9
3 4.7
4 4.6
5 5.0
6 5.4
iris[1:5, c("Sepal.Length", "Sepal.Width")]
Sepal.Length Sepal.Width
1 5.1 3.5
2 4.9 3.0
3 4.7 3.2
4 4.6 3.1
5 5.0 3.6
iris[iris$Sepal.Length > 7, ]
Sepal.Length Sepal.Width Petal.Length Petal.Width
103 7.1 3.0 5.9 2.1
106 7.6 3.0 6.6 2.1
108 7.3 2.9 6.3 1.8
110 7.2 3.6 6.1 2.5
118 7.7 3.8 6.7 2.2
iris[iris$Sepal.Length > 7, c("Sepal.Length, "Sepal.Width", "Species")]
Sepal.Length Sepal.Width Species
103 7.1 3.0 virginica
106 7.6 3.0 virginica
108 7.3 2.9 virginica
110 7.2 3.6 virginica
subset(iris, subset=(Sepal.Length > 7), select=c("Sepal.Length", "Sepal.Width", "Species"))
Sepal.Length Sepal.Width Species
103 7.1 3.0 virginica
106 7.6 3.0 virginica
108 7.3 2.9 virginica
110 7.2 3.6 virginica
118 7.7 3.8 virginica
sample(x=1:10, size=5)
[1] 4 1 5 7 6
sample(x=10, size=5)
[1] 9 3 2 5 7
# 1부터 10까지 무작위 순서 정수 10개 만들기
sample(10)
[1] 8 5 3 7 9 1 4 6 2 10
set.seed(1)
sample(x=10, size=5, replace=TRUE)
[1] 9 4 7 1 2
set.seed(1)
sample(x=10, size=5, replace=TRUE)
[1] 9 4 7 1 2
sample(iris, 3)
set.seed(1)
index <- sample(nrow(iris), 3)
index
[1] 68 129 43
iris[index, ]
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
68 5.8 2.7 4.1 1.0 versicolor
129 6.4 2.8 5.6 2.1 virginica
43 4.4 3.2 1.3 0.2 setosa
duplicated(c(1,2,3,1,1,4,3))
id <- c("A001", "A002", "A006")
name <- c("Mouse", "Keyboard", "USB")
price <- c(30000, 90000, 50000)
product <- data.frame(id=id, name=name, price=price)
product
product <- rbind(product, c("A001", "Mouse", 30000))
product
id name price
1 A001 Mouse 30000
2 A002 Keyboard 90000
3 A006 USB 50000
4 A001 Mouse 30000
duplicated(product)
[1] FALSE FALSE FALSE TRUE
product[!duplicated(product), ]
id name price
1 A001 Mouse 30000
2 A002 Keyboard 90000
3 A006 USB 50000
which(duplicated(product))
[1] 4
index <- which(duplicated(product))
product[-index, ]
id name price
1 A001 Mouse 30000
2 A002 Keyboard 90000
3 A006 USB 50000
unique(product)
id name price
1 A001 Mouse 30000
2 A002 Keyboard 90000
3 A006 USB 50000
str(airquality)
complete.cases(airquality)
airquality.nona <- airquality[complete.cases(airquality), ]
str(airquality.nona)
airquality.nona <- na.omit(airquality)
str(airquality.nona)
cut(x=iris$Sepal.Width, breaks=c(0,1,2,3,4,5))
[1] (3,4] (2,3] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4] (2,3]
[10] (3,4] (3,4] (3,4] (2,3] (2,3] (3,4] (4,5] (3,4] (3,4]
[19] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4] (3,4] (2,3] (3,4]
[28] (3,4] (3,4] (3,4] (3,4] (3,4] (4,5] (4,5] (3,4] (3,4]
[37] (3,4] (3,4] (2,3] (3,4] (3,4] (2,3] (3,4] (3,4] (3,4]
cut(x=iris$Sepal.Width, breaks=5)
[1] (3.44,3.92] (2.96,3.44] (2.96,3.44] (2.96,3.44]
[5] (3.44,3.92] (3.44,3.92] (2.96,3.44] (2.96,3.44]
[9] (2.48,2.96] (2.96,3.44] (3.44,3.92] (2.96,3.44]
[13] (2.96,3.44] (2.96,3.44] (3.92,4.4] (3.92,4.4]
[17] (3.44,3.92] (3.44,3.92] (3.44,3.92] (3.44,3.92]
iris.cut <- cut(x=iris$Sepal.Width, breaks=5)
table(iris.cut)
(0,1] (1,2] (2,3] (3,4] (4,5]
0 1 82 64 3
summary(iris.cut)
(0,1] (1,2] (2,3] (3,4] (4,5]
0 1 82 64 3
iris.cut <- cut(x=iris$Sepal.Width, breaks=c(0, 1, 2, 3, 4, 5), labels=c("Smaller", "Small", "Medium", "Big"))
iris.cut
[1] Big Medium Big Big Big Big Big Big Medium Big Big
[12] Big Medium Medium Big Bigger Big Big Big Big Big Big
[23] Big Big Big Medium Big Big Big Big Big Big Bigger
[34] Bigger Big Big Big Big Medium Big Big Medium Big Big
table(iris.cut)
iris.cut
Smaller Small Medium Big Bigger
0 1 82 64 3
'공부 > R Programming' 카테고리의 다른 글
R 기초; 집단 요약 (0) | 2021.01.16 |
---|---|
R 기초; 반복 적용 - Apply Family (0) | 2021.01.16 |
R 기초; 논리흐름 제어 (0) | 2021.01.16 |
R 기초; 함수 Function (0) | 2021.01.16 |
R 기초; 출력 (0) | 2021.01.15 |
댓글