R기초; 분할-적용-결합 - dplyr
arrange()
filter()
select()
mutate()
summarise()
# 미국 뉴욕시 airquality 데이터 불러오기
head(airquality)
Ozone Solar.R Wind Temp Month Day
1 41 190 7.4 67 5 1
2 36 118 8.0 72 5 2
3 12 149 12.6 74 5 3
install.packages("dplyr")
library(dplyr)
air <- filter(airquality, Month==6)
head(air)
Ozone Solar.R Wind Temp Month Day
1 NA 286 8.6 78 6 1
2 NA 287 9.7 74 6 2
3 NA 242 16.1 67 6 3
4 NA 186 9.2 84 6 4
5 NA 220 8.6 85 6 5
airquality[airquality$Month==6, ]
Ozone Solar.R Wind Temp Month Day
32 NA 286 8.6 78 6 1
33 NA 287 9.7 74 6 2
34 NA 242 16.1 67 6 3
35 NA 186 9.2 84 6 4
36 NA 220 8.6 85 6 5
subset(airquality, subset=(Month==6))
Ozone Solar.R Wind Temp Month Day
32 NA 286 8.6 78 6 1
33 NA 287 9.7 74 6 2
34 NA 242 16.1 67 6 3
35 NA 186 9.2 84 6 4
36 NA 220 8.6 85 6 5
# AND
air <- filter(airquality, Month==6 & Temp > 90)
air <- filter(airquality, Month==6, Temp > 90)
head(air)
Ozone Solar.R Wind Temp Month Day
1 NA 259 10.9 93 6 11
2 NA 250 9.2 92 6 12
# OR
air <- filter(airquality, Ozone >80 | Temp > 90)
head(air)
Ozone Solar.R Wind Temp Month Day
1 115 223 5.7 79 5 30
2 NA 259 10.9 93 6 11
3 NA 250 9.2 92 6 12
4 135 269 4.1 84 7 1
5 97 267 6.3 92 7 8
slice(airquality, 6:10)
Ozone Solar.R Wind Temp Month Day
1 28 NA 14.9 66 5 6
2 23 299 8.6 65 5 7
3 19 99 13.8 59 5 8
4 8 19 20.1 61 5 9
5 NA 194 8.6 69 5 10
slice(airquality, n())
# 마지막 5개 불러오기
slice(airquality, (n()-4) : n())
Ozone Solar.R Wind Temp Month Day
1 30 193 6.9 70 9 26
2 NA 145 13.2 77 9 27
3 14 191 14.3 75 9 28
4 18 131 8.0 76 9 29
5 20 223 11.5 68 9 30
# 특정 열을 기준으로 오름차순 정렬하고 저장하기
# Temp를 기준으로 오름차순으로 정렬함
air <- arrange(airquality, Temp, Month, Day)
head(air)
Ozone Solar.R Wind Temp Month Day
1 NA NA 14.3 56 5 5
2 6 78 18.4 57 5 18
3 NA 66 16.6 57 5 25
4 NA NA 8.0 57 5 27
5 18 65 13.2 58 5 15
6 NA 266 14.9 58 5 26
# 내림차순으로 정렬하기
air <- arrange(airquality, desc(Temp), Month, Day)
head(air)
Ozone Solar.R Wind Temp Month Day
1 76 203 9.7 97 8 28
2 84 237 6.3 96 8 30
3 118 225 2.3 94 8 29
4 85 188 6.3 94 8 31
5 NA 259 10.9 93 6 11
6 73 183 2.8 93 9 3
# 원하는 특정 열을 선정하기
air <- select(airquality, Month, Day, Temp)
head(air)
Month Day Temp
1 5 1 67
2 5 2 72
3 5 3 74
4 5 4 62
5 5 5 56
6 5 6 66
# 원하는 열을 지우고 나머지 열을 선정하기
air <- select(airquality, -(Temp:Day))
head(air)
Ozone Solar.R Wind
1 41 190 7.4
2 36 118 8.0
3 12 149 12.6
4 18 313 11.5
5 NA NA 14.3
6 28 NA 14.9
# 열의 이름을 바꾸기 (Solar.R에서 R로)
air <- rename(airquality, Solar=Solar.R)
head(air)
Ozone Solar Wind Temp Month Day
1 41 190 7.4 67 5 1
2 36 118 8.0 72 5 2
3 12 149 12.6 74 5 3
4 18 313 11.5 62 5 4
5 NA NA 14.3 56 5 5
6 28 NA 14.9 66 5 6
# 중복 데이터 지우기 distinct()
distinct(select(airquality, Month))
Month
1 5
2 6
3 7
4 8
5 9
# 새로운 열을 추가할 때 mutate().. Temp.C 와 Diff 열 생성하기
air <- mutate(airquality, Temp.C=(Temp-32)/1.8, Diff=Temp.C-mean(Temp.C))
head(air)
Ozone Solar.R Wind Temp Month Day Temp.C Diff
1 41 190 7.4 67 5 1 19.44444 -6.045752
2 36 118 8.0 72 5 2 22.22222 -3.267974
3 12 149 12.6 74 5 3 23.33333 -2.156863
4 18 313 11.5 62 5 4 16.66667 -8.823529
5 NA NA 14.3 56 5 5 13.33333 -12.156863
6 28 NA 14.9 66 5 6 18.88889 -6.601307
air <- transform(airquality, Temp.C=(Temp-32)/1.8)
head(air)
Ozone Solar.R Wind Temp Month Day Temp.C
1 41 190 7.4 67 5 1 19.44444
2 36 118 8.0 72 5 2 22.22222
3 12 149 12.6 74 5 3 23.33333
4 18 313 11.5 62 5 4 16.66667
5 NA NA 14.3 56 5 5 13.33333
6 28 NA 14.9 66 5 6 18.88889
# 통계함수의 요약
summarise(airquality, mean(Temp))
mean(Temp)
1 77.88235
summarise(airquality, mean(Temp, na.rm=TRUE), median(Temp, na.rm=TRUE), sd(Temp, na.rm=TRUE))
mean(Temp, na.rm=TRUE median(Temp, na.rm=TRUE) sd(Temp, na.rm=TRUE)
1 77.88235 79 9.46527
sample_n(airquality, 5)
Ozone Solar.R Wind Temp Month Day
1 52 82 12.0 86 7 27
2 NA 287 9.7 74 6 2
3 10 264 14.3 73 7 12
4 37 284 20.7 72 6 17
5 NA 255 12.6 75 8 23
sample_frac(airquality, 0.05, replace=TRUE)
Ozone Solar.R Wind Temp Month Day
1 78 197 5.1 92 9 2
2 10 264 14.3 73 7 12
3 NA 250 6.3 76 6 24
4 64 175 4.6 83 7 5
5 NA 66 16.6 57 5 25
6 91 189 4.6 93 9 4
air.group <- group_by(airquality, Month)
class(air.group)
[1] "grouped_df" "tbl_df" "tbl" "data.frame."
air.group
# A tibble: 153 x 6
# Groups: Month [5]
Ozone Solar.R Wind Temp Month Day
1 41 190 7.4 67 5 1
2 36 118 8 72 5 2
summarise(air.group, Mean.Temp=mean(Temp, na.rm=TRUE))
summarise(air.group, Mean.Temp=mean(Temp, na.rm=TRUE), SD.Temp=sd(Temp, na.rm=TRUE), Days=n())
iris %>% head
head(iris)
1:10 %>& mean
[1] 5.5
mean(1:10)
[1] 5.5
a1 <- select(airquality, Ozone, Temp, Month)
a1
Ozone Temp Month
1 41 67 5
2 36 72 5
3 12 74 5
4 18 62 5
5 NA 56 5
a2 <- group_by(a1, Month)
a2
a3 <- summarise(a2, Mean.Ozone=mean(Ozone, na.rm=TRUE), Mean.Temp=mean(Temp, na.rm=TRUE))
a4 <- filter(a3, Mean.Ozone > 40 | Mean.Temp > 80)
air <- airquality %>% select(Ozone, Temp, Month) %>% group_by(Month) %>% summarise(Mean.Ozone=mean(Ozone, na.rm=TRUE), Mean.Temp=mean(Temp, na.rm=TRUE)) %>% filter(Mean.Ozone > 40 | Mean.Temp > 80)
air
'공부 > R Programming' 카테고리의 다른 글
R 기초; 형태 변환2 - tidyr (0) | 2021.01.23 |
---|---|
R 기초; 형태 변환1 - reshape2 (0) | 2021.01.23 |
R 기초; 집단 요약 (0) | 2021.01.16 |
R 기초; 반복 적용 - Apply Family (0) | 2021.01.16 |
R 기초; 서브셋 (0) | 2021.01.16 |
댓글