Try drawing a mental model of last lecture's material on ggplot2
Art by Allison Horst
Rmd
, and html
, files. Art by Allison Horst
Art by Allison Horst
Parsing dates & time zones using ymd()
ymd()
can take a character inputymd("20190810")
## [1] "2019-08-10"
ymd()
can also take other kinds of separatorsymd("2019-08-10")
## [1] "2019-08-10"
ymd("2019/08/10")
## [1] "2019-08-10"
ymd()
can also take other kinds of separatorsymd("2019-08-10")
## [1] "2019-08-10"
ymd("2019/08/10")
## [1] "2019-08-10"
ymd("??2019-.-08//10---")
## [1] "2019-08-10"
mdy("10/15/2019")
## [1] "2019-10-15"
mdy("10/15/2019")
## [1] "2019-10-15"
mdy()
expects month, day, year.mdy("10/15/2019")
## [1] "2019-10-15"
mdy()
expects month, day, year.dmy()
expects day, month, year.dmy("10/08/2019")
## [1] "2019-08-10"
If you add a time zone, what changes?
ymd("2019-08-10", tz = "Australia/Melbourne")
## [1] "2019-08-10 AEST"
ymd("2019-08-10", tz = "Africa/Abidjan")
## [1] "2019-08-10 GMT"
ymd("2019-08-10", tz = "America/Los_Angeles")
## [1] "2019-08-10 PDT"
A list of acceptable time zones can be found here (google wiki timezone database)
today()
## [1] "2019-08-16"
today()
## [1] "2019-08-16"
today(tz = "America/Los_Angeles")
## [1] "2019-08-15"
today()
## [1] "2019-08-16"
today(tz = "America/Los_Angeles")
## [1] "2019-08-15"
now()
## [1] "2019-08-16 07:31:37 AEST"
today()
## [1] "2019-08-16"
today(tz = "America/Los_Angeles")
## [1] "2019-08-15"
now()
## [1] "2019-08-16 07:31:37 AEST"
now(tz = "America/Los_Angeles")
## [1] "2019-08-15 14:31:37 PDT"
ymd_hms()
ymd_hms("2019-08-10 10:05:30", tz = "Australia/Melbourne")
## [1] "2019-08-10 10:05:30 AEST"
ymd_hms("2019-08-10 10:05:30", tz = "America/Los_Angeles")
## [1] "2019-08-10 10:05:30 PDT"
wday("2019-08-12")
## [1] 2
wday("2019-08-12", label = TRUE)
## [1] Mon## Levels: Sun < Mon < Tue < Wed < Thu < Fri < Sat
wday("2019-08-12", label = TRUE, abbr = FALSE)
## [1] Monday## Levels: Sunday < Monday < Tuesday < Wednesday < Thursday < Friday < Saturday
wday("2019-08-12", label = TRUE, week_start = 1)
## [1] Mon## Levels: Mon < Tue < Wed < Thu < Fri < Sat < Sun
month("2019-08-10")
## [1] 8
month("2019-08-10", label = TRUE)
## [1] Aug## Levels: Jan < Feb < Mar < Apr < May < Jun < Jul < Aug < Sep < Oct < Nov < Dec
month("2019-08-10", label = TRUE, abbr = FALSE)
## [1] August## 12 Levels: January < February < March < April < May < June < July < ... < December
quarter("2019-08-10")
## [1] 3
semester("2019-08-10")
## [1] 2
yday("2019-08-10")
## [1] 222
library(rwalkr)walk_all <- melb_walk_fast(year = 2018)library(dplyr)walk <- walk_all %>% filter(Sensor == "Melbourne Central")write_csv(walk, path = "data/walk_2018.csv")
walk <- readr::read_csv("data/walk_2018.csv")walk
## # A tibble: 8,760 x 5## Sensor Date_Time Date Time Count## <chr> <dttm> <date> <dbl> <dbl>## 1 Melbourne Central 2017-12-31 13:00:00 2018-01-01 0 2996## 2 Melbourne Central 2017-12-31 14:00:00 2018-01-01 1 3481## 3 Melbourne Central 2017-12-31 15:00:00 2018-01-01 2 1721## 4 Melbourne Central 2017-12-31 16:00:00 2018-01-01 3 1056## 5 Melbourne Central 2017-12-31 17:00:00 2018-01-01 4 417## 6 Melbourne Central 2017-12-31 18:00:00 2018-01-01 5 222## 7 Melbourne Central 2017-12-31 19:00:00 2018-01-01 6 110## 8 Melbourne Central 2017-12-31 20:00:00 2018-01-01 7 180## 9 Melbourne Central 2017-12-31 21:00:00 2018-01-01 8 205## 10 Melbourne Central 2017-12-31 22:00:00 2018-01-01 9 326## # … with 8,750 more rows
walk
## # A tibble: 8,760 x 5## Sensor Date_Time Date Time Count## <chr> <dttm> <date> <dbl> <dbl>## 1 Melbourne Central 2017-12-31 13:00:00 2018-01-01 0 2996## 2 Melbourne Central 2017-12-31 14:00:00 2018-01-01 1 3481## 3 Melbourne Central 2017-12-31 15:00:00 2018-01-01 2 1721## 4 Melbourne Central 2017-12-31 16:00:00 2018-01-01 3 1056## 5 Melbourne Central 2017-12-31 17:00:00 2018-01-01 4 417## 6 Melbourne Central 2017-12-31 18:00:00 2018-01-01 5 222## 7 Melbourne Central 2017-12-31 19:00:00 2018-01-01 6 110## 8 Melbourne Central 2017-12-31 20:00:00 2018-01-01 7 180## 9 Melbourne Central 2017-12-31 21:00:00 2018-01-01 8 205## 10 Melbourne Central 2017-12-31 22:00:00 2018-01-01 9 326## # … with 8,750 more rows
walk_tidy <- walk %>% mutate(month = month(Date, label = TRUE, abbr = TRUE), wday = wday(Date, label = TRUE, abbr = TRUE, week_start = 1))walk_tidy
## # A tibble: 8,760 x 7## Sensor Date_Time Date Time Count month wday ## <chr> <dttm> <date> <dbl> <dbl> <ord> <ord>## 1 Melbourne Central 2017-12-31 13:00:00 2018-01-01 0 2996 Jan Mon ## 2 Melbourne Central 2017-12-31 14:00:00 2018-01-01 1 3481 Jan Mon ## 3 Melbourne Central 2017-12-31 15:00:00 2018-01-01 2 1721 Jan Mon ## 4 Melbourne Central 2017-12-31 16:00:00 2018-01-01 3 1056 Jan Mon ## 5 Melbourne Central 2017-12-31 17:00:00 2018-01-01 4 417 Jan Mon ## 6 Melbourne Central 2017-12-31 18:00:00 2018-01-01 5 222 Jan Mon ## 7 Melbourne Central 2017-12-31 19:00:00 2018-01-01 6 110 Jan Mon ## 8 Melbourne Central 2017-12-31 20:00:00 2018-01-01 7 180 Jan Mon ## 9 Melbourne Central 2017-12-31 21:00:00 2018-01-01 8 205 Jan Mon ## 10 Melbourne Central 2017-12-31 22:00:00 2018-01-01 9 326 Jan Mon ## # … with 8,750 more rows
ggplot(walk_tidy, aes(x = month, y = Count)) + geom_col()
ggplot(walk_tidy, aes(x = wday, y = Count)) + geom_col()
How would you describe the pattern?
walk_day <- walk_tidy %>% group_by(Date) %>% summarise(day_count = sum(Count, na.rm = TRUE))walk_day
## # A tibble: 365 x 2## Date day_count## <date> <dbl>## 1 2018-01-01 30832## 2 2018-01-02 26136## 3 2018-01-03 26567## 4 2018-01-04 26532## 5 2018-01-05 28203## 6 2018-01-06 20845## 7 2018-01-07 24052## 8 2018-01-08 26530## 9 2018-01-09 27116## 10 2018-01-10 28203## # … with 355 more rows
walk_week_day <- walk_day %>% mutate(wday = wday(Date, label = TRUE, abbr = TRUE, week_start = 1)) %>% group_by(wday) %>% summarise(m = mean(day_count, na.rm = TRUE), s = sd(day_count, na.rm = TRUE))walk_week_day
## # A tibble: 7 x 3## wday m s## <ord> <dbl> <dbl>## 1 Mon 25590. 8995.## 2 Tue 26242. 8989.## 3 Wed 27627. 9535.## 4 Thu 27887. 8744.## 5 Fri 31544. 10239.## 6 Sat 30470. 9823.## 7 Sun 25296. 9024.
ggplot(walk_week_day) + geom_errorbar(aes(x = wday, ymin = m - s, ymax = m + s)) + ylim(c(0, 45000)) + labs(x = "Day of week", y = "Average number of predestrians")
Side-by-side boxplots show the distribution of counts over different temporal elements.
ggplot(walk_tidy, aes(x = as.factor(Time), y = Count)) + geom_boxplot()
ggplot(walk_tidy, aes(x = wday, y = Count)) + geom_boxplot()
ggplot(walk_tidy, aes(x = month, y = Count)) + geom_boxplot()
ggplot(walk_tidy, aes(x = Time, y = Count, group = Date)) + geom_line()
ggplot(walk_tidy, aes(x = Time, y = Count, group = Date)) + geom_line() + facet_wrap( ~ month)
ggplot(walk_tidy, aes(x = Time, y = Count, group = Date)) + geom_line() + facet_grid(month ~ wday)
library(sugrrants)walk_tidy_calendar <- frame_calendar(walk_tidy, x = Time, y = Count, date = Date, nrow = 4)p1 <- ggplot(walk_tidy_calendar, aes(x = .Time, y = .Count, group = Date)) + geom_line()prettify(p1)
library(tsibble)library(sugrrants)library(timeDate)vic_holidays <- holiday_aus(2018, state = "VIC")vic_holidays
## # A tibble: 12 x 2## holiday date ## <chr> <date> ## 1 New Year's Day 2018-01-01## 2 Australia Day 2018-01-26## 3 Labour Day 2018-03-12## 4 Good Friday 2018-03-30## 5 Easter Saturday 2018-03-31## 6 Easter Sunday 2018-04-01## 7 Easter Monday 2018-04-02## 8 ANZAC Day 2018-04-25## 9 Queen's Birthday 2018-06-11## 10 Melbourne Cup 2018-11-06## 11 Christmas Day 2018-12-25## 12 Boxing Day 2018-12-26
pull-right[
]
walk_holiday <- walk_tidy %>% mutate(holiday = if_else(condition = Date %in% vic_holidays$date, true = "yes", false = "no")) %>% mutate(holiday = if_else(condition = wday %in% c("Sat", "Sun"), true = "yes", false = holiday)) walk_holiday
## # A tibble: 8,760 x 8## Sensor Date_Time Date Time Count month wday holiday## <chr> <dttm> <date> <dbl> <dbl> <ord> <ord> <chr> ## 1 Melbourne Central 2017-12-31 13:00:00 2018-01-01 0 2996 Jan Mon yes ## 2 Melbourne Central 2017-12-31 14:00:00 2018-01-01 1 3481 Jan Mon yes ## 3 Melbourne Central 2017-12-31 15:00:00 2018-01-01 2 1721 Jan Mon yes ## 4 Melbourne Central 2017-12-31 16:00:00 2018-01-01 3 1056 Jan Mon yes ## 5 Melbourne Central 2017-12-31 17:00:00 2018-01-01 4 417 Jan Mon yes ## 6 Melbourne Central 2017-12-31 18:00:00 2018-01-01 5 222 Jan Mon yes ## 7 Melbourne Central 2017-12-31 19:00:00 2018-01-01 6 110 Jan Mon yes ## 8 Melbourne Central 2017-12-31 20:00:00 2018-01-01 7 180 Jan Mon yes ## 9 Melbourne Central 2017-12-31 21:00:00 2018-01-01 8 205 Jan Mon yes ## 10 Melbourne Central 2017-12-31 22:00:00 2018-01-01 9 326 Jan Mon yes ## # … with 8,750 more rows
walk_holiday_calendar <- frame_calendar(data = walk_holiday, x = Time, y = Count, date = Date, nrow = 6)p2 <- ggplot(walk_holiday_calendar, aes(x = .Time, y = .Count, group = Date, colour = holiday)) + geom_line() + scale_colour_brewer(palette = "Dark2")
This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
Esc | Back to slideshow |