Week 01: Basics of R
print("hello")
[1] "hello"
a = 10
b = 20
a+b
[1] 30
install.package("randomForest")
<to load package>
library("package name")
or require("package name")
<Variable>
Variable is a container to hold data (or information) that we want to work with.
Variable can hold
- a single value: 10, 10.5, "abc", factor, NA, NULL
- multiple values: vector, matrix, list
- specially formatted data (values): data.frame
<Naming Rule>
1. Consist of alphabet letter, '.' (dot), '_' (underscore) only.
2. First letter should be alphabet letter or dot('.')
3. Second letter after '.' cannot be numeric letter.
ex) my_first_variable <- 35.121
john_weight <- 67
my_weight <- 70
1 > 3
[1] FALSE
john_weight < my_weight
[1] TRUE
typeof(john_weight < my_weight)
[1] "logical"
<Operators>
a <- 10.5
b <- 20
c <- 4
a + b ## addition
[1] 30.5
a - c ## subtraction
[1] 6.5
a * c ## multiplication
[1] 42
b / c ## division
[1] 5
a %% c ## remainer
[1] 2.5
a > b ## inequality
[1] FALSE
a * 2 == b ## equality
[1] FALSE
!(a > b) ## negation
[1] TRUE
(b > a) & (b > c) ## logical AND
[1] TRUE
(a > b) | (a > c) ## logical OR
[1] TRUE
<Data Type - Missing Value (NA)>
my.grade <- 100
your.grade <- 50
his.grade <- NA
is.na(my.grade)
[1] FALSE
is.na(his.grade)
[1] TRUE
<Data type of some special values>
typeof(Inf) ## Infinity
[1] "double"
typeof(-Inf) ## Minus Infinity
[1] "double"
typeof(NA) ## Missing Value
[1] "logical"
<VECTOR>
- A vector is a sequence of data elements of the same basic type.
- All members should be of same data type.
numeric_vector <- c(1, 10, 49)
character_vector <- c("a", "b", "c")
boolean_vector <- c(TRUE, FALSE, TRUE)
typeof(numeric_vector)
[1] "double"
typeof("character_vector)
[1] "character"
typeof(boolean_vector)
[1] "logical"
length(numeric_value) ## number of members in the vector
[1] 3
new_vector <- c(numeric_vector, 50)
new_vector
[1] 1 10 49 50
name_vector = c("John", "Bob", "Sarah", "Alice")
name_vector[1:3]
[1] "John" "Bob" "Sarah"
name_vector[-2]
[1] "John" "Sarah" "Alice"
name_vector[c(-1, -2)]
[1] "Sarah" "Alice"
name_vector[c(1,3,4)]
[1] "John" "Sarah" "Alice"
name_vector[5]
[1] NA
name_vector[c(1,2)]
[1] "John" "Bob"
name_vector[c(1,1,4,4,4,)]
[1] "John" "John" "Alice" "Alice" "Alice"
name_vector[-1]
[1] "Bob" "Sarah" "Alice"
some_vector <- c("John Doe", "poker player")
names(some_vector) <- c("Name", "Profession")
some_vector
Name Profession
"John Doe" "poker player"
some_vector['Name']
Name
"John Doe"
some_vector['Profession']
Profession
"poker player"
some_vector[1]
Name
"John Doe"
weather_vector <- c("Mon" = "Sunny", "Tues" = "Rainy", "Wed" = "Cloudy")
weather_vector
Mon Tues Wed
Sunny Rainy Cloudy
names(weather_vector)
[1] "Mon" "Tues" "Wed"
a_vector <- 1:10 ## numbers from 1 to 10
b_vector <- seq(1, 10, 2) ## numbers from 1 to 10 increasing by 2
a_vector
[1] 1 2 3 4 5 6 7 8 9 10
b_vector
[1] 1 3 5 7 9
c_vector <- rep(1:3, 3)
d_vector <- rep(1:3, each=3)
c_vector
[1] 1 2 3 1 2 3 1 2 3
d_vector
[1] 1 1 1 2 2 2 3 3 3
c(a_vector, b_vector) ## combine vectors to single vector
[1] 1 2 3 4 5 6 7 8 9 10 1 3 5 7 9
a_vector <- c(1, 5, 2, 7, 8, 2, 3)
b_vector <- seq(1, 10, 3)
intersect(a_vector, b_vector) ## intersection
[1] 1 7
union(a_vector, b_vector) ## union
[1] 1 5 2 7 8 3 4 10
setdiff(a_vector, b_vector) ## set difference
[1] 5 2 8 3
unique(a_vector) ## find distinct members
[1] 1 5 2 7 8 3
a_vector <- c(1, 5, 2, 7, 8)
b_vector <- seq(1, 10, 2)
sum(a_vector) ## summation
[1] 23
mean(a_vector) ## average
[1] 4.6
# operation of Vector and Scala
[1] 11 15 12 17 18
a_vector > 4
[1] FALSE TRUE FALSE TRUE TRUE
sum(a_vector > 4)
[1] 3 ## number of TRUE
# operation of Vector and Vector
a_vector - b_vector
[1] 0 2 -3 0 -1
a_vector == b_vector
[1] TRUE FALSE FALSE TRUE FALSE
sum(a_vector == b_vector)
[1] 2
<Vector Indexing (Selection)>
sample_vector <- c(1, 4, NA, 2, 1, NA, 4, NA)
sample_vector[1:5]
[1] 1 4 NA 2 1
sample_vector[c(1,3,5)]
[1] 1 NA 1
sample_vector[-1]
[1] 4 NA 2 1 NA 4 NA
sample_vector[c(-1, -3, -5)]
[1] 4 2 NA 4 NA
sample_vector[c(T, T, F, T, F, T, F, T)]
[1] 1 4 2 NA NA
is.na(sample_vector)
[1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE TRUE
sum(is.na(sample_vector))
[1] 3
<<MATRIX>>
matrix(1:9, byrow = TRUE, nrow = 3)
[,1] [,2] [,3]
[1,] 1 2 3
[2,] 4 5 6
[3,] 7 8 9
my_vect <- c(1, 2, 3, 4, 5, 10, 9, 8, 7, 6)
my_mat <- matrix(my_vect, nrow = 5, byrow = TRUE)
<Naming a matrix>
Similar to vectors, you can add names for the rows and the columns of a matrix.
rownames(my_matrix) <- row_names_vector
colnames(my_matrix) <- col_names_vector
# Box Office Star Wars (in millions!)
new_hope <- c(460.998, 314.4)
empire_strikes <- c(290.475, 247.900)
return_jedi <- c(309.306, 165.8)
# Construct matrix
star_wars_matrix <- matrix(c(new_hope, empire_strikes, return_jedi), nrow = 3, byrow = TRUE)
star_wars_matrix
[ ,1] [ ,2]
[1, ] 460.998 314.4
[2, ] 290.475 247.9
[3, ] 309.306 165.8
# Vectors region and titles, used for naming
region <- c("US", "non-US")
titles <- c("A New Hope", "The Empire Strikes Back", "Return of the Jedi")
# Name the columns with region
colnames(star_wars_matrix) <- region
# Name the rows with titles
rownames(star_wars_matrix) <- titles
star_wars_matrix
US non-US
A New Hope 460.998 314.4
The Empire Strikes Back 290.475 247.9
Return of the Jedi 309.306 165.8
# The worldwide box office figures
rowSums(star_wars_matrix)
A New Hope The Empire Strikes Back Return of the Jedi
775.398 538.375 475.106
# Total revenue for entire Series
colSums(star_wars_matrix)
US non-US
1060.779 728.100
# The worldwide box office figures
worldwide_vector <- rowSums(star_wars_matrix)
# Bind the new variable worldwide_vector as a column to star_wars_matrix
all_wars_matrix <- cbind(star_wars_matrix, worldwide_vector)
all_wars_matrix
US non-US worldwide_vector
A New Hope 460.998 314.4 775.398
The Empire Strikes Back 290.475 247.9 538.375
Return of the Jedi 309.306 165.8 475.106
# Construct star_wars_matrix2
box_office <- c(474.5, 552.5, 310.7, 338.7, 380.3, 468.5)
star_wars_matrix2 <- matrix(box_office, nrow = 3, byrow = TRUE, dimnames = list(c("The Phantom Menace", "Attack of the Clones", "Revenge of the Sith"), c("US", "non-US")))
To be continued...
'공부 > R Programming' 카테고리의 다른 글
Data Science Week 03 - 02 (0) | 2021.03.19 |
---|---|
Data Science Week 03 - 01 (0) | 2021.03.18 |
R기초; R 기초 - ggplot2 그래픽6 - 그래프 배치 및 저장 (0) | 2021.01.24 |
R기초; R 기초 - ggplot2 그래픽5 - 테마 (2) | 2021.01.24 |
R 기초 - ggplot2 그래픽4 - 그래프 옵션 (0) | 2021.01.24 |
댓글