How to make a bar graph with ggplot2?

Asked

Viewed 62 times

0

I have a graph generated by a package that would like to replicate it and make some changes to the colors, but I’m not able to understand the code behind the chart.

Graph:

inserir a descrição da imagem aqui

Code used by the package:

function (data = download_merged_data(cached = TRUE, silent = TRUE), 
    type = "deaths", min_cases = ifelse(per_capita, ifelse(type == 
        "deaths", 5, 50), ifelse(type == "deaths", 500, 5000)), 
    cumulative = FALSE, change_ave = 7, per_capita = FALSE, population_cutoff = 0, 
    diverging_color_scale = FALSE, countries = NULL, sort_countries = NULL, 
    data_date_str = format(lubridate::as_date(data$timestamp[1]), 
        "%B %d, %Y")) 
{
    if (!type %in% c("confirmed", "deaths", "recovered", "active")) 
        stop("Wrong 'type': Only 'confirmed', 'deaths', 'recovered' and 'active' are supported")
    if (!is.logical(cumulative)) 
        stop("'cumulative' needs to be a logical value")
    change_ave <- as.integer(change_ave)
    if (change_ave < 0) 
        stop("'change_ave' needs to be a positive integer")
    if (population_cutoff > 0 || per_capita) 
        message(paste("Population data required. Observations for the following jurisdictions", 
            "will be dropped as the World Bank is not providing population data for", 
            "them: ", paste(unique(data$iso3c[is.na(data$population)]), 
                collapse = ", ")))
    if (population_cutoff > 0) {
        data <- data %>% dplyr::filter(.data$population > 1e+06 * 
            population_cutoff)
    }
    data <- data %>% dplyr::mutate(active = .data$confirmed - 
        .data$recovered - .data$deaths, orig_type = !!rlang::sym(type))
    if (!cumulative) 
        data <- data %>% dplyr::group_by(.data$iso3c) %>% dplyr::mutate(delta = !!rlang::sym(type) - 
            dplyr::lag(!!rlang::sym(type)), change = zoo::rollmean(.data$delta, 
            change_ave, na.pad = TRUE, align = "right")) %>% 
            dplyr::ungroup()
    data <- df <- data %>% dplyr::group_by(.data$iso3c) %>% dplyr::filter(!is.na(!!rlang::sym(type)))
    if (!cumulative) 
        df <- df %>% dplyr::mutate(`:=`(!!type, .data$change))
    if (per_capita) 
        df <- df %>% dplyr::filter(!is.na(.data$population)) %>% 
            dplyr::mutate(`:=`(!!type, 1e+05 * (!!rlang::sym(type))/.data$population), 
                orig_type = 1e+05 * .data$orig_type/.data$population)
    df <- df %>% dplyr::filter(max(.data$orig_type, na.rm = TRUE) >= 
        min_cases) %>% dplyr::filter(!is.na(!!rlang::sym(type))) %>% 
        dplyr::ungroup() %>% dplyr::select(.data$iso3c, .data$country, 
        .data$date, .data$orig_type, !!rlang::sym(type))
    if (!diverging_color_scale) {
        df[df[, type] <= 0, type] <- min(df[df[, type] > 0, type])
    }
    if (!is.null(countries) && (length(countries) > 1 || countries != 
        "") && !any(countries %in% df$iso3c)) 
        warning(paste("Non-NULL 'countries' value but no countries matched in data", 
            "(Did you specify correct ISO3c codes or do values for 'min_cases'", 
            "lead to the exclusion of your selected countries' data?)"))
    if (!is.null(countries) && (length(countries) > 1 || countries != 
        "")) {
        df <- df %>% dplyr::filter(.data$iso3c %in% countries)
    }
    if (!is.null(sort_countries)) {
        if (!sort_countries %in% c("start", "magnitude", "countries")) 
            stop("'sort_countries' needs to be either 'start', 'magnitude' or 'countries'")
        if (sort_countries == "start") {
            sortdf <- df %>% dplyr::group_by(.data$iso3c) %>% 
                dplyr::filter(.data$orig_type > min_cases) %>% 
                dplyr::summarise(min_date = min(.data$date)) %>% 
                dplyr::arrange(.data$min_date)
            df$iso3c <- factor(df$iso3c, levels = sortdf$iso3c)
        }
        if (sort_countries == "magnitude") {
            sortdf <- df %>% dplyr::group_by(.data$iso3c) %>% 
                dplyr::summarise(max_vals = max(.data$orig_type)) %>% 
                dplyr::arrange(-.data$max_vals)
            df$iso3c <- factor(df$iso3c, levels = sortdf$iso3c)
        }
        if (sort_countries == "countries") {
            if (!(!is.null(countries) && (length(countries) > 
                1 || countries != ""))) {
                stop("'sort_countries' == 'countries' but 'countries' is not set")
            }
            df$iso3c <- factor(df$iso3c, levels = countries)
        }
    }
    caption_str <- paste("Case data: Johns Hopkins University Center for Systems Science", 
        "and Engineering (JHU CSSE).")
    if (per_capita || population_cutoff > 0) {
        caption_str <- paste(caption_str, "Population data: Worldbank.")
    }
    caption_str <- paste(caption_str, sprintf("Data obtained on %s.", 
        data_date_str))
    if (min_cases > 0) {
        caption_str <- paste(caption_str, "The sample is limited to countries with", 
            sprintf(ifelse(round(min_cases) == min_cases, "more than %d %s.", 
                "more than %.2f %s."), min_cases, dplyr::case_when(type == 
                "deaths" ~ "deaths", type == "confirmed" ~ "confirmed cases", 
                type == "recovered" ~ "recovered cases", type == 
                  "active" ~ "active cases")))
    }
    if (population_cutoff > 0) {
        caption_str <- paste(caption_str, "The sample is limited to countries with", 
            sprintf("a population exceeding %.0f million.", population_cutoff))
    }
    caption_str <- paste(strwrap(paste(caption_str, "Code: https://github.com/joachim-gassen/tidycovid19."), 
        width = 100), collapse = "\n")
    type_str <- dplyr::case_when(type == "deaths" ~ "deaths\n", 
        type == "confirmed" ~ "confirmed cases\n", type == "recovered" ~ 
            "recovered cases\n", type == "active" ~ "active cases\n")
    if (!cumulative) 
        color_str <- paste("Daily change in", type_str)
    else {
        substr(type_str, 1, 1) <- toupper(substr(type_str, 1, 
            1))
        color_str <- type_str
    }
    if (per_capita) {
        color_str <- paste(color_str, "per 100,000 inhabitants")
    }
    if (!cumulative && change_ave > 1) 
        color_str <- paste(color_str, sprintf("(averaged over %d days)", 
            change_ave))
    title_str <- "Covid19 Stripes:"
    title_str <- paste(title_str, dplyr::case_when(type == "deaths" ~ 
        "Reported deaths", type == "confirmed" ~ "Confirmed cases", 
        type == "recovered" ~ "Recovered cases", type == "active" ~ 
            "Active cases"))
    if (!cumulative) 
        title_str <- paste(title_str, "(new cases per day)")
    else title_str <- paste(title_str, "(cumulative)")
    p <- ggplot2::ggplot(df, ggplot2::aes(x = .data$date, color = !!rlang::sym(type))) + 
        ggplot2::geom_segment(ggplot2::aes(xend = .data$date), 
            size = 2, y = 0, yend = 1)
    if (diverging_color_scale) {
        p <- p + ggplot2::scale_color_gradient2(name = color_str, 
            low = grDevices::rgb(0.23, 0.299, 0.754), mid = grDevices::rgb(0.865, 
                0.865, 0.865), high = grDevices::rgb(0.706, 0.016, 
                0.15), trans = "pseudo_log", breaks = c(0))
    }
    else {
        p <- p + ggplot2::scale_color_continuous(name = color_str, 
            type = "viridis", trans = "log10")
    }
    p <- p + ggplot2::facet_grid(rows = ggplot2::vars(iso3c)) + 
        ggplot2::theme_minimal() + ggplot2::guides(color = ggplot2::guide_colourbar(title.vjust = 0.8, 
        barheight = 0.5, barwidth = 10)) + ggplot2::theme(plot.title.position = "plot", 
        plot.caption.position = "plot", plot.caption = ggplot2::element_text(hjust = 0), 
        axis.title.x = ggplot2::element_text(hjust = 1), legend.position = "bottom", 
        strip.text.y.right = ggplot2::element_text(angle = 0), 
        panel.spacing = ggplot2::unit(0, "lines")) + ggplot2::labs(x = NULL, 
        title = title_str, caption = caption_str)
    iso3c <- NULL
    p
}

Package and function that generates the graph I’m using:

remotes::install_github("joachim-gassen/tidycovid19")

library(tidycovid19)

updates <- download_merged_data(cached = TRUE)

  updates %>%
    filter(date >= "2020-02-10") %>%
    plot_covid19_stripes(
      type = "confirmed",
      cumulative = FALSE,
      change_ave = 7,
      per_capita = FALSE,
      min_cases = 15000,
      sort_countries = "start") +
    labs(x = "", y = "", title = "", caption = "") +
    ggthemes::scale_fill_economist() +
    guides(color = guide_colourbar(barwidth = 20)) +
    geom_segment(aes(xend = date), size = 5, y = 0, yend = 1)
  • 1

    Look, your changes have some problems. To begin with you can’t use the scale_fill_economist because the scale is color, in addition the values are continuous while the colors you want to use are discrete.

  • 1

    An idea for you is to save the graph of the function in an object, like putting a p <- updates... in your code and then try to do something with the data using p$data. Or change the chart type in the original function.

No answers

Browser other questions tagged

You are not signed in. Login or sign up in order to post.