R 기초; 웹 스크레이핑 stringr 패키지
string <- c("data analytics is useful", "business analytics is helpful", "visualization of data is interesting for data scientists")
install.packages("stringr", repose="http://cran.us.r-project.org')
library(stringr)
str_detect(string=string, pattern="data")
[1] TRUE FALSE TRUE
str_detect(string, "DATA")
[1] FALSE FALSE FALSE
str_detect(string, fixed("DATA", ignore_case=TRUE))
[1] TRUE FALSE TRUE
str_detect(c("abz", "ayz", "a.z"), "a.z")
[1] TRUE TRUE TRUE
str_detect(c("abz", "ayz", "a.z"), fixed("a.z"))
[1] FALSE FALSE TRUE
str_detect(c("abz", "ayz", "a.z"), "a\\.z")
[1] FALSE FALSE TRUE
str_locate()
str_locate_all()
regexpr()
gregexpr()
str_locate(string, "data")
start end
[1,] 1 4
[2,] NA NA
[3,] 18 21
str_locate_all(string, "data")
[[1]]
start end
[1,] 1 4
[[2]]
start end
[[3]]
start end
[1,] 18 21
[2,] 42 45
str_extract_all(string, "data", simplify=TRUE)
[,1] [,2]
[1,] "data" ""
[2,] "" ""
[3,] "data" "data"
unlist(str_extract_all(string, "data"))
[1] "data" "data" "data"
str_match()
str_match_all()
sentences5 <- sentences[1:5]
sentences5
[1] "The birch canoe slid on the smooth planks." "Glue the sheet to the dark blue background." "It's easy to tell the depth of a well."
[4] "These days a chicken leg is a rare dish." "Rice is often served in round bowls."
str_extract(sentences5, "(a|A|the|The) (\\w+)")
[1] "The birch" "the sheet" "the depth" "a chicken" NA
str_replace_all(string=string, pattern="data", replacement="text')
[1] "text analytics is useful" "business analytics is helpful"
[3] "visualization of text is interesting for text scientists"
str_split(string, " ")
[[1]]
[1] "data" "analytics" "is" "useful"
[[2]]
[1] "business" "analytics" "is" "helpful"
[[3]]
[1] "visualization" "of" "data" "is" "interesting" "for" "data" "scientists"
unlist(str_split(string, " "))
[1] "data" "analytics" "is" "useful" "business" "analytics" "is" "helpful" "visualization"
[10] "of" "data" "is" "interesting" "for" "data" "scientists"
unique(unlist(str_split(string, " ")))
[1] "data" "analytics" "is" "useful" "business" "helpful" "visualization" "of" "interesting"
[10] "for" "scientists"
str_split(string, " ", n=3)
[[1]]
[1] "data" "analytics" "is useful"
[[2]]
[1] "business" "analytics" "is helpful"
[[3]]
[1] "visualization" "of" "data is interesting for data scientists"
str_split(string, " ", n=3, simplify=TRUE)
[,1] [,2] [,3]
[1,] "data" "analytics" "is useful"
[2,] "business" "analytics" "is helpful"
[3,] "visualization" "of" "data is interesting for data scientists"
str_length(string)
[1] 24 29 56
str_count(string, "data")
[1] 1 0 2
str_count(string, "\\w+")
[1] 4 4 8
str_pad(string=c("a", "abc", "abcde"), width=6, side="left", pad=" ")
[1] " a" " abc" " abcde"
mon <- 1:12
mon
[1] 1 2 3 4 5 6 7 8 9 10 11 12
str_pad(mon, width=2, side="left", pad="0")
[1] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12"
str_trim()
str.pad <- str_pad(string, width=max(str_length(string)), side="both", pad=" ")
str.pad
[1] " data analytics is useful " " business analytics is helpful "
[3] "visualization of data is interesting for data scientists"
str_trim(str.pad, side="both")
[1] "data analytics is useful" "business analytics is helpful"
[3] "visualization of data is interesting for data scientists"
str_c("data", "mining", sep=" ")
[1] "data mining"
str.mining <- str_c(c("data mining", "text mining"), "is useful", sep=" ")
str.mining
[1] "data mining is useful" "text mining is useful"
str_c(str.mining, collapse="; ")
[1] "data mining is useful; text mining is useful"
str_c(str.mining, collapse="\n")
[1] "data mining is useful\ntext mining is useful"
cat(str_c(str.mining, collapse="\n"))
data mining is useful
text mining is useful
str_sub(string=str.mining, start=1, end=4)
[1] "data" "text"
str_sub(str.mining, 5, 5) <- "-"
str.mining
[1] "data-mining is useful" "text-mining is useful"
str_sub("abcedfg", start=-2)
[1] "fg"
str_sub("abcedfg", end=-3)
[1] "abced"
'공부 > R Programming' 카테고리의 다른 글
R 기초; 입력 (0) | 2021.01.15 |
---|---|
R 기초; 날짜와 시간 (0) | 2021.01.15 |
R 기초; 웹스크레이핑 base 패키지 (0) | 2021.01.14 |
R 기초; 텍스트 (0) | 2021.01.13 |
R 기초; 데이터프레임 인덱싱-3 (0) | 2021.01.10 |
댓글