Getting Over It

This summer I crossed the alps with my road bike. I’ve recorded the whole ride and as a nice memory, I would like to visualise this ride.

A short time ago I’ve discovered the awesome R package drake. The use of this package transformed the way I do my analysis and it helps me to make my post more reproducible. The following blog post describes the underlying workflow, after which I’ve developed the underlying package transalp for this post.

Data

At first you have to install the package from github. Then you have to load it.

remotes::install_github("duju211/transalp")

library(transalp)

Load the other necessary libraries:

library(tidyverse)
library(patchwork)
library(here)
library(fs)

The package includes the underlying data:

df_act_meas_raw <- transalp::df_act_meas

df_act_meas_raw
## # A tibble: 90,917 x 10
##    id    act_date   moving velocity_smooth grade_smooth distance altitude  time
##    <chr> <date>     <lgl>            <dbl>        <dbl>    <dbl>    <dbl> <int>
##  1 3669~ 2020-06-25 FALSE              0            0        0      1452.     0
##  2 3669~ 2020-06-25 FALSE              0            0        0.9    1446    412
##  3 3669~ 2020-06-25 TRUE               0          -44.6      4.6    1446    413
##  4 3669~ 2020-06-25 TRUE               0            1.4      8.4    1446    414
##  5 3669~ 2020-06-25 TRUE               0            2.8     12.1    1446.   415
##  6 3669~ 2020-06-25 TRUE               0            2.8     15.6    1446.   416
##  7 3669~ 2020-06-25 TRUE               0            1.4     19.1    1446.   417
##  8 3669~ 2020-06-25 TRUE               3.6          1.4     22.5    1446.   418
##  9 3669~ 2020-06-25 TRUE               3.5          0       26      1446.   419
## 10 3669~ 2020-06-25 TRUE               3.5          1.6     29.5    1446.   420
## # ... with 90,907 more rows, and 2 more variables: lat <dbl>, lng <dbl>

Preprocessing

Look at the first function:

#' Function to preprocess measurements of the included activities.
#'
#' @param df_act_meas
#'
#' @return Preprocessed activities
#' @export
#'
#' @examples
#' pre_process_meas(df_act_meas)
pre_process_meas <- function(df_act_meas) {
  df_act_meas %>%
    dplyr::mutate(
      act_date_chr = as.character(act_date),
      altitude_norm = altitude / max(altitude)) %>%
    dplyr::group_by(id) %>%
    dplyr::mutate(distance_norm = distance / max(distance)) %>%
    dplyr::ungroup()
}

The function does some basic preprocessing on the included activities:

  • Turn activity date into character (for easier plotting)
  • Normalize altitude and distance
df_act_meas <- pre_process_meas(df_act_meas_raw)

Nest the data frame by id and act_date_chr. Create a new sf column with the geospatial information of the activities:

#' Turn every activity into an sf object. Nest the data frame by 'id' and
#' 'act_date_chr' to do this.
#'
#' @param df_act_meas
#'
#' @return sf object
#' @export
#'
#' @examples
convert_to_sf <- function(df_act_meas) {
  df_act_meas %>%
    tidyr::nest(act_data = -c(id, act_date_chr)) %>%
    dplyr::mutate(
      line = purrr::map(
        act_data,
        ~ sf::st_linestring(as.matrix(.x[, c("lng", "lat", "altitude")]))),
      geom = purrr::map(line, sf::st_sfc, crs = 4326)) %>%
    sf::st_as_sf()
}
sf_act_meas <- convert_to_sf(df_act_meas)

sf_act_meas
## Simple feature collection with 5 features and 4 fields
## geometry type:  LINESTRING
## dimension:      XYZ
## bbox:           xmin: 8.317612 ymin: 46.0061 xmax: 9.035269 ymax: 48.21307
## z_range:        zmin: 206.8 zmax: 2477.6
## CRS:            NA
## # A tibble: 5 x 5
##   id      act_date_chr act_data                                    line geom    
##   <chr>   <chr>        <list>                              <LINESTRING> <list>  
## 1 366972~ 2020-06-25   <tibble [2~ Z (8.596761 46.63501 1451.6, 8.5963~ <LINEST~
## 2 366465~ 2020-06-24   <tibble [2~ Z (8.602206 46.63612 1510.2, 8.6022~ <LINEST~
## 3 365904~ 2020-06-23   <tibble [1~ Z (8.625703 46.90152 447, 8.625694 ~ <LINEST~
## 4 365424~ 2020-06-22   <tibble [1~ Z (8.741265 47.49397 432, 8.741308 ~ <LINEST~
## 5 365044~ 2020-06-21   <tibble [1~ Z (9.021049 48.21307 750.2, 9.02104~ <LINEST~

Extract the start point of every tour except for the last one. Extract the end point for this tour.

#' Extract points of interest.
#'
#' @param sf_act_meas
#'
#' @return Data frame with points of interest
#' @export
#'
#' @examples
extract_poi <- function(sf_act_meas) {
  sf_act_meas %>%
    tibble::as_tibble() %>%
    dplyr::transmute(
      id, first_row = purrr::map(act_data, ~ .x[1,]),
      last_row = purrr::map(act_data, ~ .x[nrow(.x), ]),
      decisive_row = dplyr::if_else(id == "3669729902", last_row, first_row)) %>%
    tidyr::unnest(decisive_row) %>%
    dplyr::select(where(purrr::negate(purrr::is_list))) %>%
    dplyr::mutate(
      poi_name = dplyr::case_when(
        act_date == "2020-06-21" ~ "Albstadt",
        act_date == "2020-06-22" ~ "Winterthur",
        act_date == "2020-06-23" ~ "Flüelen",
        act_date == "2020-06-24" ~ "Andermatt",
        act_date == "2020-06-25" ~ "Lugano",
        TRUE ~ NA_character_)) %>%
    dplyr::rename(lon = lng)
}
df_poi <- extract_poi(sf_act_meas)

Visualisation

Altitude

Visualise the altitude data:

#' Visualise the altitude by using a ridge plot.
#'
#' @param df_act_meas
#'
#' @return ggplot of altitude data
#' @export
#'
#' @examples
vis_altitude_ridge <- function(df_act_meas) {
  df_act_meas %>%
    tibble::as_tibble() %>%
    tidyr::unnest_legacy(act_data) %>%
    dplyr::mutate(distance = distance / 1000) %>%
    ggplot2::ggplot(ggplot2::aes(
      x = distance, y = act_date_chr, height = altitude,
      group = act_date_chr, color = act_date_chr)) +
    ggridges::geom_ridgeline(scale = 0.0025, alpha = 0.2) +
    ggplot2::labs(
      x = "Distance [km]", y = ggplot2::element_blank()) +
    ggplot2::theme_light() +
    ggplot2::theme(
      legend.position = "none") +
    ggplot2::scale_y_discrete(position = "right") +
    ggplot2::scale_x_continuous(breaks = scales::breaks_width(10))
}

The function mainly applies the ggridges::geom_ridgeline function to the data. A form of visualisation that I’ve already used a lot. Its style is reminiscent of the famous Joy Division album cover.

gg_altitude_ridge <- vis_altitude_ridge(sf_act_meas)

gg_altitude_ridge

Spatial Data

Determine the map data for all the activities. Use the ggmap::get_stamenmap function to download the data. The bounding box is calculated from the sf_act_meas object.

#' Get the ground map for the visualisation of the spatial data.
#'
#' @param sf_act_meas
#' @param tol_bbox
#' @param map_zoom
#'
#' @return
#' @export
#'
#' @examples
get_alpen_map <- function(sf_act_meas, tol_bbox = 0.015, map_zoom = 10) {
  bbox <- sf::st_bbox(sf_act_meas)

  stamen_map <- ggmap::get_stamenmap(
    bbox = c(
      left = bbox$xmin[[1]] - tol_bbox,
      right = bbox$xmax[[1]] + tol_bbox,
      bottom = bbox$ymin[[1]] - tol_bbox,
      top = bbox$ymax[[1]] + tol_bbox),
    maptype = "terrain-background", zoom = map_zoom, color = "bw")
}
gg_alpen_map <- get_alpen_map(sf_act_meas, tol_bbox = 0.1, map_zoom = 9)

ggmap::ggmap(gg_alpen_map)

Plot the activity and the point of interest data onto the map. Use the ggrepel package to plot the labels of the points of interest. This avoids too much overplotting.

#' Plot the spatial data (route + point of interest) onto the map
#'
#' @param sf_act_meas
#' @param gg_alpen
#'
#' @return ggplot of spatial data
#' @export
#'
#' @examples
vis_ride <- function(sf_act_meas, gg_alpen, df_poi) {
  ggmap::ggmap(gg_alpen) +
    ggplot2::geom_sf(
      data = sf_act_meas, inherit.aes = FALSE,
      mapping = aes(color = act_date_chr), size = 1.2) +
    ggrepel::geom_label_repel(
      data = df_poi, mapping = aes(label = poi_name), alpha = 0.6,
      family = "Fira Code Retina", size = 2.5) +
    ggplot2::theme_light() +
    ggplot2::labs(
      x = "Longitude", y = "Latitude") +
    ggplot2::theme(legend.position = "none")
}
gg_rides <- vis_ride(sf_act_meas, gg_alpen_map, df_poi)

gg_rides

Combine Visualisations

Combine everything into one big plot using the patchwork package:

# extrafont::font_import()
extrafont::loadfonts(device = "win")

final_plot <- (gg_rides + gg_altitude_ridge) +
      plot_annotation(
        title = "Transalp 2020",
        subtitle = "Albstadt - Lugano") &
      theme(text = element_text(family = "Fira Code Retina"))

final_plot

The extrafont package helps with using some fancy fonts. The font_import function has to be called once. Comment the function call in the above code chunk because of this.

Save the final plot as a png file. This file serves as the title photo of the blog post.

ggsave(here::here("static", "post", "transalp_plot_final-1.png"))