0
I have a graph generated by a package that would like to replicate it and make some changes to the colors, but I’m not able to understand the code behind the chart.
Graph:
Code used by the package:
function (data = download_merged_data(cached = TRUE, silent = TRUE),
type = "deaths", min_cases = ifelse(per_capita, ifelse(type ==
"deaths", 5, 50), ifelse(type == "deaths", 500, 5000)),
cumulative = FALSE, change_ave = 7, per_capita = FALSE, population_cutoff = 0,
diverging_color_scale = FALSE, countries = NULL, sort_countries = NULL,
data_date_str = format(lubridate::as_date(data$timestamp[1]),
"%B %d, %Y"))
{
if (!type %in% c("confirmed", "deaths", "recovered", "active"))
stop("Wrong 'type': Only 'confirmed', 'deaths', 'recovered' and 'active' are supported")
if (!is.logical(cumulative))
stop("'cumulative' needs to be a logical value")
change_ave <- as.integer(change_ave)
if (change_ave < 0)
stop("'change_ave' needs to be a positive integer")
if (population_cutoff > 0 || per_capita)
message(paste("Population data required. Observations for the following jurisdictions",
"will be dropped as the World Bank is not providing population data for",
"them: ", paste(unique(data$iso3c[is.na(data$population)]),
collapse = ", ")))
if (population_cutoff > 0) {
data <- data %>% dplyr::filter(.data$population > 1e+06 *
population_cutoff)
}
data <- data %>% dplyr::mutate(active = .data$confirmed -
.data$recovered - .data$deaths, orig_type = !!rlang::sym(type))
if (!cumulative)
data <- data %>% dplyr::group_by(.data$iso3c) %>% dplyr::mutate(delta = !!rlang::sym(type) -
dplyr::lag(!!rlang::sym(type)), change = zoo::rollmean(.data$delta,
change_ave, na.pad = TRUE, align = "right")) %>%
dplyr::ungroup()
data <- df <- data %>% dplyr::group_by(.data$iso3c) %>% dplyr::filter(!is.na(!!rlang::sym(type)))
if (!cumulative)
df <- df %>% dplyr::mutate(`:=`(!!type, .data$change))
if (per_capita)
df <- df %>% dplyr::filter(!is.na(.data$population)) %>%
dplyr::mutate(`:=`(!!type, 1e+05 * (!!rlang::sym(type))/.data$population),
orig_type = 1e+05 * .data$orig_type/.data$population)
df <- df %>% dplyr::filter(max(.data$orig_type, na.rm = TRUE) >=
min_cases) %>% dplyr::filter(!is.na(!!rlang::sym(type))) %>%
dplyr::ungroup() %>% dplyr::select(.data$iso3c, .data$country,
.data$date, .data$orig_type, !!rlang::sym(type))
if (!diverging_color_scale) {
df[df[, type] <= 0, type] <- min(df[df[, type] > 0, type])
}
if (!is.null(countries) && (length(countries) > 1 || countries !=
"") && !any(countries %in% df$iso3c))
warning(paste("Non-NULL 'countries' value but no countries matched in data",
"(Did you specify correct ISO3c codes or do values for 'min_cases'",
"lead to the exclusion of your selected countries' data?)"))
if (!is.null(countries) && (length(countries) > 1 || countries !=
"")) {
df <- df %>% dplyr::filter(.data$iso3c %in% countries)
}
if (!is.null(sort_countries)) {
if (!sort_countries %in% c("start", "magnitude", "countries"))
stop("'sort_countries' needs to be either 'start', 'magnitude' or 'countries'")
if (sort_countries == "start") {
sortdf <- df %>% dplyr::group_by(.data$iso3c) %>%
dplyr::filter(.data$orig_type > min_cases) %>%
dplyr::summarise(min_date = min(.data$date)) %>%
dplyr::arrange(.data$min_date)
df$iso3c <- factor(df$iso3c, levels = sortdf$iso3c)
}
if (sort_countries == "magnitude") {
sortdf <- df %>% dplyr::group_by(.data$iso3c) %>%
dplyr::summarise(max_vals = max(.data$orig_type)) %>%
dplyr::arrange(-.data$max_vals)
df$iso3c <- factor(df$iso3c, levels = sortdf$iso3c)
}
if (sort_countries == "countries") {
if (!(!is.null(countries) && (length(countries) >
1 || countries != ""))) {
stop("'sort_countries' == 'countries' but 'countries' is not set")
}
df$iso3c <- factor(df$iso3c, levels = countries)
}
}
caption_str <- paste("Case data: Johns Hopkins University Center for Systems Science",
"and Engineering (JHU CSSE).")
if (per_capita || population_cutoff > 0) {
caption_str <- paste(caption_str, "Population data: Worldbank.")
}
caption_str <- paste(caption_str, sprintf("Data obtained on %s.",
data_date_str))
if (min_cases > 0) {
caption_str <- paste(caption_str, "The sample is limited to countries with",
sprintf(ifelse(round(min_cases) == min_cases, "more than %d %s.",
"more than %.2f %s."), min_cases, dplyr::case_when(type ==
"deaths" ~ "deaths", type == "confirmed" ~ "confirmed cases",
type == "recovered" ~ "recovered cases", type ==
"active" ~ "active cases")))
}
if (population_cutoff > 0) {
caption_str <- paste(caption_str, "The sample is limited to countries with",
sprintf("a population exceeding %.0f million.", population_cutoff))
}
caption_str <- paste(strwrap(paste(caption_str, "Code: https://github.com/joachim-gassen/tidycovid19."),
width = 100), collapse = "\n")
type_str <- dplyr::case_when(type == "deaths" ~ "deaths\n",
type == "confirmed" ~ "confirmed cases\n", type == "recovered" ~
"recovered cases\n", type == "active" ~ "active cases\n")
if (!cumulative)
color_str <- paste("Daily change in", type_str)
else {
substr(type_str, 1, 1) <- toupper(substr(type_str, 1,
1))
color_str <- type_str
}
if (per_capita) {
color_str <- paste(color_str, "per 100,000 inhabitants")
}
if (!cumulative && change_ave > 1)
color_str <- paste(color_str, sprintf("(averaged over %d days)",
change_ave))
title_str <- "Covid19 Stripes:"
title_str <- paste(title_str, dplyr::case_when(type == "deaths" ~
"Reported deaths", type == "confirmed" ~ "Confirmed cases",
type == "recovered" ~ "Recovered cases", type == "active" ~
"Active cases"))
if (!cumulative)
title_str <- paste(title_str, "(new cases per day)")
else title_str <- paste(title_str, "(cumulative)")
p <- ggplot2::ggplot(df, ggplot2::aes(x = .data$date, color = !!rlang::sym(type))) +
ggplot2::geom_segment(ggplot2::aes(xend = .data$date),
size = 2, y = 0, yend = 1)
if (diverging_color_scale) {
p <- p + ggplot2::scale_color_gradient2(name = color_str,
low = grDevices::rgb(0.23, 0.299, 0.754), mid = grDevices::rgb(0.865,
0.865, 0.865), high = grDevices::rgb(0.706, 0.016,
0.15), trans = "pseudo_log", breaks = c(0))
}
else {
p <- p + ggplot2::scale_color_continuous(name = color_str,
type = "viridis", trans = "log10")
}
p <- p + ggplot2::facet_grid(rows = ggplot2::vars(iso3c)) +
ggplot2::theme_minimal() + ggplot2::guides(color = ggplot2::guide_colourbar(title.vjust = 0.8,
barheight = 0.5, barwidth = 10)) + ggplot2::theme(plot.title.position = "plot",
plot.caption.position = "plot", plot.caption = ggplot2::element_text(hjust = 0),
axis.title.x = ggplot2::element_text(hjust = 1), legend.position = "bottom",
strip.text.y.right = ggplot2::element_text(angle = 0),
panel.spacing = ggplot2::unit(0, "lines")) + ggplot2::labs(x = NULL,
title = title_str, caption = caption_str)
iso3c <- NULL
p
}
Package and function that generates the graph I’m using:
remotes::install_github("joachim-gassen/tidycovid19")
library(tidycovid19)
updates <- download_merged_data(cached = TRUE)
updates %>%
filter(date >= "2020-02-10") %>%
plot_covid19_stripes(
type = "confirmed",
cumulative = FALSE,
change_ave = 7,
per_capita = FALSE,
min_cases = 15000,
sort_countries = "start") +
labs(x = "", y = "", title = "", caption = "") +
ggthemes::scale_fill_economist() +
guides(color = guide_colourbar(barwidth = 20)) +
geom_segment(aes(xend = date), size = 5, y = 0, yend = 1)
Look, your changes have some problems. To begin with you can’t use the
scale_fill_economist
because thescale
iscolor
, in addition the values are continuous while the colors you want to use are discrete.– Jorge Mendes
An idea for you is to save the graph of the function in an object, like putting a
p <- updates...
in your code and then try to do something with the data usingp$data
. Or change the chart type in the original function.– Jorge Mendes