library(dplyr)
library(tidyr)
library(stringr)
library(lubridate)
load("2020.RData")
Q1.
str(pums.sample)
head(pums.sample)
Q2.
pums.sample$SEX <- as.character(pums.sample$SEX)
pums.sample$SEX <- str_replace(pums.sample$SEX, "1", "Male")
pums.sample$SEX <- str_replace(pums.sample$SEX, "2", "Female")
pums.sample$SEX <- as.factor(pums.sample$SEX)
Q3.
pums.sample$MAR <- as.character(pums.sample$MAR)
named_vector <- c("1" = "Married", "2" = "Widowed", "3"= "Divorced", "4" = "Separated", "5" = "Never married or under 15 years old")
pums.sample$MAR <- sapply(pums.sample$MAR, function(x){named_vector[x]})
head(pums.sample)
Q4.
colSums(is.na(pums.sample))
nrow(pums.sample)
Q5.
pums.sample.male <- pums.sample[which(pums.sample$SEX == "Male"), ]
nrow(pums.sample.male)
colSums(is.na(pums.sample.male))
pums.sample.female <- pums.sample[which(pums.sample$SEX == "Female"), ]
pums.sample.young_f <- filter(pums.sample.female, AGEP<15)
pums.sample.female[which(pums.sample.male$AGEP<15), ]
filter(pums.sample, AGEP ==12)
pums.sample.young_f
pums.sample.female$AGEP < 15
pums.sample.old_f <- filter(pums.sample.female, AGEP>50)
colSums(is.na(pums.sample.old_f))
nrow(pums.sample.old_f)
Q6.
summary(pums.sample)
boxplot(pums.sample$WKHP)
boxplot(pums.sample$PINCP)
Q7.
df <- pums.sample[, c("COW", "SCHL")]
table(df)
nrow(pums.sample)
Q8.
summary(pums.sample$AGEP)
cut_point <- cut(pums.sample$AGEP, breaks = c(20,30,40,50,60,84), right = F, labels = c("20s", "30s", "40s", "50s", "over 60s"))
cut_point
cut_point[is.na(cut_point)] <- "20s"
pums.sample$age_group <- cut_point
tapply(pums.sample$PINCP, pums.sample$age_group, mean)
Q9.
plot(pums.sample$WKHP, pums.sample$PINCP)
cor(pums.sample$WKHP, pums.sample$PINCP)
Q10.
save(pums.sample, file = "21600685.RData")
colnames(iris)
iris_1 <- gather(iris, Part, value, 1:4)
name <- ifelse((iris_1$Part == "Sepal.Length") | (iris_1$Part == "Sepal.Width"), "Sepal", "Petal")
length(name)
iris_1$name <- name
iris_1 <- iris_1[, 1:3]
spread(iris_1, Part, value)
iris_1
iris_1_length <- filter(iris_1, (Part == "Sepal.Length" | Part == "Petal.Length"))
iris_1_length$Part <- ifelse((iris_1_length$Part == "Sepal.Length" | iris_1_length$Part == "Petal.Length"), "Length", iris_1_length)
iris_1_length <- iris_1_length[, 2:3]
spread(iris_1_length, Part, value)
iris_1_Width <- filter(iris_1, (Part == "Sepal.Width" | Part == "Petal.Width"))
iris_1_Width$Part <- ifelse((iris_1_Width$Part == "Sepal.Width" | iris_1_Width$Part == "Petal.Width"), "Width", iris_1_Width)
iris_1_Width <- iris_1_Width[, 2:3]
iris_1_Width
iris.wide <- iris_1["Species"]
iris.wide$name <- name
iris.wide <- cbind(iris.wide, iris_1_length)
iris.wide <- cbind(iris.wide, iris_1_Width)
iris.wide
iris_1 <- gather(iris, Part, value, 1:4)
iris_1$name <- ifelse((iris_1$Part == "Sepal.Length" | iris_1$Part == "Sepal.Width"), "Sepal", "Petal")
iris_1
iris_1$Length <- ifelse((iris_1$Part == "Sepal.Length" | iris_1$Part == "Petal.Length"), "Length", "Width")
iris_1 <- iris_1[, c(1,3,4,5)]
iris_1 <- iris_1[, c(1, 2, 4)]
iris_1
nrow(iris_1)
iris_1$id <- 1:600
iris_1 <- iris_1[, 1:4]
iris_1 <- iris_1[, 2:4]
iris_1
spread(iris_1, Length, value)
iris_2 <- iris_1[, c(2, 4)]
colnames(iris_1)[3] <- "Part"
spread(iris_1, Char, value)
iris_2 <- iris_1[, c(1,2,4)]
spread(iris_1, Char, value)
iris_2
iris_2 <- iris_1[, c(1, 3, 4, 5)]
iris_2 <- iris_1[, c(1, 2, 4)]
iris_2$id <- 1:600
spread(iris_2, Length, value)
iris_2 <- iris_1[, c(1, 3,4,5)]
colnames(iris_2)[2] <- "Value"
colnames(iris_2)[4] <- "Measure"
colnames(iris_2)[3] <- "Part"
iris.tidy <- iris_2
save(pums.sample, iris.tidy, file = "2020.RData")
'공부 > R Programming' 카테고리의 다른 글
Data Science Week 10 (0) | 2021.05.03 |
---|---|
Data Science Week 09 (0) | 2021.05.03 |
[Week 06] Lectures (0) | 2021.04.09 |
[Week 04] Lectures (0) | 2021.03.28 |
[Week 03] Lectures (0) | 2021.03.28 |
댓글