본문 바로가기
공부/R Programming

Week 01: Basics of R

by 혼밥맨 2021. 3. 2.
반응형

Week 01: Basics of R

 

print("hello")

[1] "hello"

a = 10

b = 20

a+b

[1] 30

?print

 

install.package("randomForest")

 

<to load package>

library("package name")

or require("package name")

 

<Variable>

Variable is a container to hold data (or information) that we want to work with.

Variable can hold

 - a single value: 10, 10.5, "abc", factor, NA, NULL

 - multiple values: vector, matrix, list

 - specially formatted data (values): data.frame

 

<Naming Rule>

1. Consist of alphabet letter, '.' (dot), '_' (underscore) only.

2. First letter should be alphabet letter or dot('.')

3. Second letter after '.' cannot be numeric letter.

 

ex) my_first_variable <- 35.121

john_weight <- 67

my_weight <- 70

1 > 3

[1] FALSE

 

john_weight < my_weight

[1] TRUE

 

typeof(john_weight < my_weight)

[1] "logical"

 

 

<Operators>

a <- 10.5

b <- 20

c <- 4

a + b ## addition

[1] 30.5

 

a - c ## subtraction

[1] 6.5

 

a * c ## multiplication

[1] 42

 

b / c ## division

[1] 5

 

a %% c ## remainer

[1] 2.5

 

a > b ## inequality

[1] FALSE

 

a * 2 == b ## equality

[1] FALSE

 

!(a > b) ## negation

[1] TRUE

 

(b > a)  & (b > c) ## logical AND

[1] TRUE

 

(a > b) | (a > c) ## logical OR

[1] TRUE

 

<Data Type - Missing Value (NA)>

my.grade <- 100

your.grade <- 50

his.grade <- NA

is.na(my.grade)

[1] FALSE

is.na(his.grade)

[1] TRUE

 

<Data type of some special values>

typeof(Inf)    ## Infinity

[1] "double"

typeof(-Inf)   ## Minus Infinity

[1] "double"

typeof(NA)   ## Missing Value

[1] "logical"

 

 

<VECTOR>

- A vector is a sequence of data elements of the same basic type.

- All members should be of same data type.

 

numeric_vector <- c(1, 10, 49)

character_vector <- c("a", "b", "c")

boolean_vector <- c(TRUE, FALSE, TRUE)

 

typeof(numeric_vector)

[1] "double"

typeof("character_vector)

[1] "character"

typeof(boolean_vector)

[1] "logical"

length(numeric_value) ## number of members in the vector

[1] 3 

 

new_vector <- c(numeric_vector, 50)

new_vector

[1] 1 10 49 50

 

name_vector = c("John", "Bob", "Sarah", "Alice")

name_vector[1:3]

[1] "John" "Bob" "Sarah"

name_vector[-2]

[1] "John" "Sarah" "Alice"

name_vector[c(-1, -2)]

[1] "Sarah" "Alice"

name_vector[c(1,3,4)]

[1] "John" "Sarah" "Alice"

name_vector[5]

[1] NA

name_vector[c(1,2)]

[1] "John" "Bob"

name_vector[c(1,1,4,4,4,)]

[1] "John" "John" "Alice" "Alice" "Alice" 

name_vector[-1]

[1] "Bob" "Sarah" "Alice"

 

some_vector <- c("John Doe", "poker player")

names(some_vector) <- c("Name", "Profession")

 

some_vector

    Name     Profession

"John Doe"  "poker player"

 

some_vector['Name']

    Name

"John Doe"

 

some_vector['Profession']

     Profession

"poker player"

 

some_vector[1]

    Name

"John Doe"

 

weather_vector <- c("Mon" = "Sunny", "Tues" = "Rainy", "Wed" = "Cloudy")

weather_vector

   Mon    Tues    Wed

Sunny   Rainy   Cloudy

 

names(weather_vector)

[1] "Mon" "Tues" "Wed"

 

a_vector <- 1:10            ## numbers from 1 to 10

b_vector <- seq(1, 10, 2)  ## numbers from 1 to 10 increasing by 2

 

a_vector

[1] 1 2 3 4 5 6 7 8 9 10

b_vector

[1] 1 3 5 7 9

c_vector <- rep(1:3, 3)

d_vector <- rep(1:3, each=3)

 

c_vector

[1] 1 2 3 1 2 3 1 2 3 

d_vector

[1] 1 1 1 2 2 2 3 3 3

 

c(a_vector, b_vector)   ## combine vectors to single vector

[1] 1 2 3 4 5 6 7 8 9 10 1 3 5 7 9

 

a_vector <- c(1, 5, 2, 7, 8, 2, 3)

b_vector <- seq(1, 10, 3)

 

intersect(a_vector, b_vector) ## intersection

[1] 1  7

union(a_vector, b_vector)     ## union

[1] 1 5 2 7 8 3 4 10

setdiff(a_vector, b_vector)     ## set difference

[1] 5 2 8 3 

unique(a_vector)         ## find distinct members

[1] 1 5 2 7 8 3

 

a_vector <- c(1, 5, 2, 7, 8)

b_vector <- seq(1, 10, 2)

 

sum(a_vector)        ## summation

[1] 23 

mean(a_vector)      ## average

[1] 4.6

 

# operation of Vector and Scala

[1] 11 15 12 17 18

a_vector > 4

[1] FALSE TRUE FALSE TRUE TRUE

sum(a_vector > 4)

[1] 3 ## number of TRUE

 

# operation of Vector and Vector

a_vector - b_vector

[1] 0 2 -3 0 -1

a_vector == b_vector

[1] TRUE FALSE FALSE TRUE FALSE

sum(a_vector == b_vector) 

[1] 2

 

 

 

<Vector Indexing (Selection)>

sample_vector <- c(1, 4, NA, 2, 1, NA, 4, NA)

sample_vector[1:5]

[1] 1 4 NA 2 1 

sample_vector[c(1,3,5)]

[1] 1 NA 1

sample_vector[-1]

[1] 4 NA 2 1 NA 4 NA

sample_vector[c(-1, -3, -5)]

[1] 4 2 NA 4 NA

sample_vector[c(T, T, F, T, F, T, F, T)]

[1] 1 4 2 NA NA

is.na(sample_vector)

[1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE TRUE

sum(is.na(sample_vector))

[1] 3

 

 

<<MATRIX>>

matrix(1:9, byrow = TRUE, nrow = 3)

       [,1] [,2] [,3]

[1,]   1    2   3

[2,]   4    5   6

[3,]   7    8   9

 

 

my_vect <- c(1, 2, 3, 4, 5, 10, 9, 8, 7, 6)

my_mat <- matrix(my_vect, nrow = 5, byrow = TRUE)

 

<Naming a matrix>

Similar to vectors, you can add names for the rows and the columns of a matrix.

rownames(my_matrix) <- row_names_vector

colnames(my_matrix) <- col_names_vector

 

# Box Office Star Wars (in millions!)

new_hope <- c(460.998, 314.4)

empire_strikes <- c(290.475, 247.900)

return_jedi <- c(309.306, 165.8)

 

# Construct matrix

star_wars_matrix <- matrix(c(new_hope, empire_strikes, return_jedi), nrow = 3, byrow = TRUE)

 

star_wars_matrix

             [ ,1]     [ ,2]

[1, ]   460.998   314.4

[2, ]   290.475   247.9

[3, ]   309.306   165.8

 

# Vectors region and titles, used for naming

region <- c("US", "non-US")

titles <- c("A New Hope", "The Empire Strikes Back", "Return of the Jedi")

 

# Name the columns with region

colnames(star_wars_matrix) <- region

 

# Name the rows with titles

rownames(star_wars_matrix) <- titles

 

star_wars_matrix

                                        US    non-US

A New Hope                   460.998  314.4

The Empire Strikes Back     290.475  247.9

Return of the Jedi             309.306  165.8

 

# The worldwide box office figures

rowSums(star_wars_matrix)

A New Hope             The Empire Strikes Back          Return of the Jedi

775.398                    538.375                                475.106

 

# Total revenue for entire Series

colSums(star_wars_matrix)

        US      non-US

1060.779      728.100

 

# The worldwide box office figures

worldwide_vector <- rowSums(star_wars_matrix)

 

# Bind the new variable worldwide_vector as a column to star_wars_matrix

all_wars_matrix <- cbind(star_wars_matrix, worldwide_vector)

 

all_wars_matrix

                                         US      non-US          worldwide_vector

A New Hope                 460.998     314.4                775.398

The Empire Strikes Back   290.475     247.9               538.375

Return of the Jedi           309.306     165.8               475.106

 

# Construct star_wars_matrix2

box_office <- c(474.5, 552.5, 310.7, 338.7, 380.3, 468.5)

star_wars_matrix2 <- matrix(box_office, nrow = 3, byrow = TRUE, dimnames = list(c("The Phantom Menace", "Attack of the Clones", "Revenge of the Sith"), c("US", "non-US")))

 

 

To be continued...

반응형

댓글