R 기초; 웹 스크레이핑 stringr 패키지

string <- c("data analytics is useful", "business analytics is helpful", "visualization of data is interesting for data scientists")

install.packages("stringr", repose="http://cran.us.r-project.org')

library(stringr)

str_detect(string=string, pattern="data")

[1] TRUE FALSE TRUE

str_detect(string, "DATA")

[1] FALSE FALSE FALSE

str_detect(string, fixed("DATA", ignore_case=TRUE))

[1] TRUE FALSE TRUE

str_detect(c("abz", "ayz", "a.z"), "a.z")

[1] TRUE TRUE TRUE

str_detect(c("abz", "ayz", "a.z"), fixed("a.z"))

[1] FALSE FALSE TRUE

str_detect(c("abz", "ayz", "a.z"), "a\\.z")

[1] FALSE FALSE TRUE

str_locate()

str_locate_all()

regexpr()

gregexpr()

str_locate(string, "data")

     start end
[1,]     1   4
[2,]    NA  NA
[3,]    18  21

str_locate_all(string, "data")

[[1]]
     start end
[1,]     1   4

[[2]]
     start end

[[3]]
     start end
[1,]    18  21
[2,]    42  45

str_extract_all(string, "data", simplify=TRUE)

     [,1]   [,2]
[1,] "data" ""
[2,] ""     ""
[3,] "data" "data"

unlist(str_extract_all(string, "data"))

[1] "data" "data" "data"

str_match()

str_match_all()

sentences5 <- sentences[1:5]

sentences5

[1] "The birch canoe slid on the smooth planks." "Glue the sheet to the dark blue background." "It's easy to tell the depth of a well."
[4] "These days a chicken leg is a rare dish." "Rice is often served in round bowls."

str_extract(sentences5, "(a|A|the|The) (\\w+)")

[1] "The birch" "the sheet" "the depth" "a chicken" NA

str_replace_all(string=string, pattern="data", replacement="text')

[1] "text analytics is useful" "business analytics is helpful"
[3] "visualization of text is interesting for text scientists"

str_split(string, " ")

[[1]]
[1] "data"      "analytics" "is"        "useful"

[[2]]
[1] "business"  "analytics" "is"        "helpful"

[[3]]
[1] "visualization" "of"            "data"          "is"            "interesting"   "for"           "data"          "scientists"

unlist(str_split(string, " "))

[1] "data" "analytics" "is" "useful" "business" "analytics" "is" "helpful" "visualization"
[10] "of" "data" "is" "interesting" "for" "data" "scientists"

unique(unlist(str_split(string, " ")))

[1] "data" "analytics" "is" "useful" "business" "helpful" "visualization" "of" "interesting"
[10] "for" "scientists"

str_split(string, " ", n=3)

[[1]]
[1] "data"      "analytics" "is useful"

[[2]]
[1] "business"   "analytics"  "is helpful"

[[3]]
[1] "visualization"                           "of"                                      "data is interesting for data scientists"

str_split(string, " ", n=3, simplify=TRUE)

     [,1]            [,2]        [,3]
[1,] "data"          "analytics" "is useful"
[2,] "business"      "analytics" "is helpful"
[3,] "visualization" "of"        "data is interesting for data scientists"

str_length(string)

[1] 24 29 56

str_count(string, "data")

[1] 1 0 2

str_count(string, "\\w+")

[1] 4 4 8

str_pad(string=c("a", "abc", "abcde"), width=6, side="left", pad=" ")

[1] " a" " abc" " abcde"

mon <- 1:12

mon

[1] 1 2 3 4 5 6 7 8 9 10 11 12

str_pad(mon, width=2, side="left", pad="0")

[1] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"

str_trim()

str.pad <- str_pad(string, width=max(str_length(string)), side="both", pad=" ")

str.pad

[1] " data analytics is useful " " business analytics is helpful "
[3] "visualization of data is interesting for data scientists"

str_trim(str.pad, side="both")

[1] "data analytics is useful" "business analytics is helpful"
[3] "visualization of data is interesting for data scientists"

str_c("data", "mining", sep=" ")

[1] "data mining"

str.mining <- str_c(c("data mining", "text mining"), "is useful", sep=" ")

str.mining

[1] "data mining is useful" "text mining is useful"

str_c(str.mining, collapse="; ")

[1] "data mining is useful; text mining is useful"

str_c(str.mining, collapse="\n")

[1] "data mining is useful\ntext mining is useful"

cat(str_c(str.mining, collapse="\n"))

data mining is useful
text mining is useful

str_sub(string=str.mining, start=1, end=4)

[1] "data" "text"

str_sub(str.mining, 5, 5) <- "-"

str.mining

[1] "data-mining is useful" "text-mining is useful"

str_sub("abcedfg", start=-2)

[1] "fg"

str_sub("abcedfg", end=-3)

[1] "abced"

'공부 > R Programming' 카테고리의 다른 글

R 기초; 입력 (2)	2021.01.15
R 기초; 날짜와 시간 (0)	2021.01.15
R 기초; 웹스크레이핑 base 패키지 (0)	2021.01.14
R 기초; 텍스트 (0)	2021.01.13
R 기초; 데이터프레임 인덱싱-3 (0)	2021.01.10

혼밥맨

R 기초; 웹 스크레이핑 stringr 패키지