###Using
package dplyr for data visualization###
rm(list=ls())
options(repos = c(CRAN
= "http://cran.rstudio.com"))
install.packages("nycflights13")
library(nycflights13)
library(tidyverse)
###Using flights
data to practice###
int stands for integers.
dbl stands for doubles, or real numbers.
chr stands for character
vectors, or strings.
dttm stands for date-times
(a date + a time)
lgl stands for logical,
vectors that contain only TRUE or FALSE
fctr stands for factors,
which R uses to represent categorical variables with fixed possible values
date stands for dates
?flights
nycflights13::flights
head(flights, n= 10)
### Using dplyr function to
manipulate the data ###
Filter (): Pick observations by their values
Arrange (): Reorder the rows
Select (): Pick variables by
their names
Mutate(): Create new variables
with functions of existing variables
Summarize (): Collapse many values
down to a single summary
Group_by():changes the scope of each
function from operating on the entire dataset to operating on it group-by-group
Filter:
filter(flights, month
== 1, day == 1)
filter(flights, month
== 2, day == 2)
jan1 <-
filter(flights, month ==1, day ==1)
head(jan1, n =10)
dec25 <- filter(flights,
month == 12, day ==25)
head(dec25, n =9)
###
Comparision: the standard suite: >, >=, <, <=, != (not equal), and
== (equal)###
###Boolean operators:
& is “and,” | is “or,” and ! is “not.”
filter(flights, month
== 11 | month ==12)
### x %in% y:
This will select every row where x is one of the values in y
nov_dec <-
filter(flights, month %in% c(11,12))
### find flights that weren’t
delayed (on arrival or departure) by more than two hours
filter(flights,
!(arr_delay >=120 | dep_delay >=120))
filter(flights,
!(arr_delay >= 45 & dep_delay >=45))
filter(flights,
(arr_delay <=-3 | dep_delay <=-3 ))
No comments:
Post a Comment