본문 바로가기
공부/R Programming

pums.sample R

by 혼밥맨 2021. 4. 17.
반응형

library(dplyr)

library(tidyr)

library(stringr)

library(lubridate)

 

load("2020.RData")

 

Q1.

str(pums.sample)

head(pums.sample)

 

Q2.

pums.sample$SEX <- as.character(pums.sample$SEX)

pums.sample$SEX <- str_replace(pums.sample$SEX, "1", "Male")

pums.sample$SEX <- str_replace(pums.sample$SEX, "2", "Female")

pums.sample$SEX <- as.factor(pums.sample$SEX)

 

 

Q3.

pums.sample$MAR <- as.character(pums.sample$MAR)

 

named_vector <- c("1" = "Married", "2" = "Widowed", "3"= "Divorced", "4" = "Separated", "5" = "Never married or under 15 years old")

 

pums.sample$MAR <- sapply(pums.sample$MAR, function(x){named_vector[x]})

 

head(pums.sample)

 

 

Q4.

colSums(is.na(pums.sample))

nrow(pums.sample)

 

 

Q5.

 

pums.sample.male <- pums.sample[which(pums.sample$SEX == "Male"), ]

nrow(pums.sample.male)

colSums(is.na(pums.sample.male))

 

 

pums.sample.female <- pums.sample[which(pums.sample$SEX == "Female"), ]

pums.sample.young_f <- filter(pums.sample.female, AGEP<15)

pums.sample.female[which(pums.sample.male$AGEP<15), ]

filter(pums.sample, AGEP ==12)

pums.sample.young_f

pums.sample.female$AGEP < 15

 

 

pums.sample.old_f <- filter(pums.sample.female, AGEP>50)

colSums(is.na(pums.sample.old_f))

nrow(pums.sample.old_f)

Q6.

summary(pums.sample)

 

boxplot(pums.sample$WKHP)

boxplot(pums.sample$PINCP)

 

 

Q7.

 

df <- pums.sample[, c("COW", "SCHL")]

table(df)

nrow(pums.sample)

 

Q8.

summary(pums.sample$AGEP)

cut_point <- cut(pums.sample$AGEP, breaks = c(20,30,40,50,60,84), right = F, labels = c("20s", "30s", "40s", "50s", "over 60s"))

cut_point

cut_point[is.na(cut_point)] <- "20s"

 

pums.sample$age_group <- cut_point

tapply(pums.sample$PINCP, pums.sample$age_group, mean)

 

Q9.

plot(pums.sample$WKHP, pums.sample$PINCP)

cor(pums.sample$WKHP, pums.sample$PINCP)

 

Q10.

save(pums.sample, file = "21600685.RData")

 

colnames(iris)

iris_1 <- gather(iris, Part, value, 1:4)

name <- ifelse((iris_1$Part == "Sepal.Length") | (iris_1$Part == "Sepal.Width"), "Sepal", "Petal")

length(name)

iris_1$name <- name

iris_1 <- iris_1[, 1:3]

spread(iris_1, Part, value)

iris_1

iris_1_length <- filter(iris_1, (Part == "Sepal.Length" | Part == "Petal.Length"))

iris_1_length$Part <- ifelse((iris_1_length$Part == "Sepal.Length" | iris_1_length$Part == "Petal.Length"), "Length", iris_1_length)

iris_1_length <- iris_1_length[, 2:3]

spread(iris_1_length, Part, value)

iris_1_Width <- filter(iris_1, (Part == "Sepal.Width" | Part == "Petal.Width"))

iris_1_Width$Part <- ifelse((iris_1_Width$Part == "Sepal.Width" | iris_1_Width$Part == "Petal.Width"), "Width", iris_1_Width)

iris_1_Width <- iris_1_Width[, 2:3]

iris_1_Width

iris.wide <- iris_1["Species"]

iris.wide$name <- name

iris.wide <- cbind(iris.wide, iris_1_length)

iris.wide <- cbind(iris.wide, iris_1_Width)

iris.wide

 

iris_1 <- gather(iris, Part, value, 1:4)

iris_1$name <- ifelse((iris_1$Part == "Sepal.Length" | iris_1$Part == "Sepal.Width"), "Sepal", "Petal")

iris_1

iris_1$Length <- ifelse((iris_1$Part == "Sepal.Length" | iris_1$Part == "Petal.Length"), "Length", "Width")

iris_1 <- iris_1[, c(1,3,4,5)]

 

iris_1 <- iris_1[, c(1, 2, 4)]

iris_1

nrow(iris_1)

iris_1$id <- 1:600

iris_1 <- iris_1[, 1:4]

iris_1 <- iris_1[, 2:4]

iris_1

spread(iris_1, Length, value)

iris_2 <- iris_1[, c(2, 4)]

colnames(iris_1)[3] <- "Part"

spread(iris_1, Char, value)

iris_2 <- iris_1[, c(1,2,4)]

spread(iris_1, Char, value)

iris_2

iris_2 <- iris_1[, c(1, 3, 4, 5)]

iris_2 <- iris_1[, c(1, 2, 4)]

iris_2$id <- 1:600

spread(iris_2, Length, value)

iris_2 <- iris_1[, c(1, 3,4,5)]

colnames(iris_2)[2] <- "Value"

colnames(iris_2)[4] <- "Measure"

colnames(iris_2)[3] <- "Part"

iris.tidy <- iris_2

 

save(pums.sample, iris.tidy, file = "2020.RData")

반응형

'공부 > R Programming' 카테고리의 다른 글

Data Science Week 10  (0) 2021.05.03
Data Science Week 09  (0) 2021.05.03
[Week 06] Lectures  (0) 2021.04.09
[Week 04] Lectures  (0) 2021.03.28
[Week 03] Lectures  (0) 2021.03.28

댓글