# Load Libraries ----
library(sf)
library(dplyr)
library(assertr)
library(ggplot2)
library(colorspace)
library(ggpubr)
library(furrr)
# Source function ----
source(here::here("health_data", "R", "map-function.R"))
# Prepare output directory ----
out_dir <- here::here("health_data", "outputs", "maps")
fs::dir_create(out_dir)
# Load data ----
health_data <- readr::read_csv(
here::here(
"health_data", "data",
"lsoa-general_health.csv"
)
)
look_up <- readr::read_csv(
here::here(
"health_data", "data",
"output_area_lookup.csv"
)
)
boundaries <- read_sf(
here::here(
"health_data", "data",
"lsoa_boundaries.geojson"
)
)
# Merge data and validate ----
all_data <- left_join(health_data, look_up,
relationship = "many-to-one"
) %>%
left_join(boundaries, relationship = "many-to-one") %>%
st_as_sf() %>%
assert(not_na, lsoa_code, lsoa_name, lad_name, lad_code, geometry) %>%
verify(inherits(., "sf"))
# Process data ----
# Get ordered health category levels
health_cat_levels <- health_data %>%
select(gen_health_code, gen_health_cat) %>%
distinct() %>%
arrange(gen_health_code) %>%
pull(gen_health_cat)
# Create addition variables obs_perc & z_score, cast gen_health_cat to factor
all_data <- all_data %>%
mutate(gen_health_cat = factor(gen_health_cat,
levels = health_cat_levels
)) %>%
group_by(lsoa_code) %>%
mutate(obs_perc = observation / sum(observation) * 100) %>%
ungroup() %>%
group_by(gen_health_cat) %>%
mutate(z_score = (obs_perc - mean(obs_perc)) / sd(obs_perc)) %>%
ungroup()
# Create iteration indexes ----
idx_start <- NULL
idx_end <- NULL
# Collect command line arguments if present
args <- commandArgs(trailingOnly = TRUE)
# If idx_* values have already been set (i.e. are NULL) do nothing.
# If a command line argument is supplied (not NA) and no corresponding idx value
# has been set, convert it to integer and assign it.
# If any command arg is missing and idx_* value has not been set, set to 1 or
# total number of LADs.
lad_n <- length(unique(all_data$lad_code))
if (is.na(args[1])) {
if (is.null(idx_start)) {
idx_start <- 1L
}
} else {
idx_start <- as.integer(args[1])
}
if (is.na(args[2])) {
if (is.null(idx_start)) {
idx_end <- lad_n
}
} else {
idx_end <- as.integer(args[2])
}
idx <- idx_start:idx_end
# If running within a slurm array, split idx into chunks and subset the idxs for the
# particular task id.
array_id <- Sys.getenv('SLURM_ARRAY_TASK_ID')
array_task_n <- Sys.getenv('SLURM_ARRAY_TASK_COUNT')
if (array_id != "") {
array_task_n <- as.integer(array_task_n)
array_id <- as.integer(array_id)
idx <- idx[furrr:::make_chunks(length(idx), array_task_n)[[array_id]]]
}
# Split and subset data ----
split_data <- split(all_data, f = all_data$lad_code)[idx]
# Create maps ----
# Iterate plotting function over each LAD plot data chunk in idx
tictoc::tic()
plan(multisession)
furrr::future_walk(
split_data,
~ plot_lad_map(.x, out_dir),
.progress = TRUE,
.options = furrr_options(seed = TRUE)
)
cli::cli_h1("Job Complete")
cli::cli_alert_success("{length(idx)} map{?s} written to {.path {out_dir}}.")
cli::cli_h2("Total time elapsed: {.val {tictoc::toc(quiet = TRUE)$callback_msg}}")
Appendix
Final Scripts
On this page you will find the complete versions of final scripts as completed in the exercises
Exercise 1: Health Categories Maps final scripts
Exercise 2: NBA Playoffs final scripts
#' Play all rounds for an individual conference
#'
#' @param qualified a character vector of team slugs of conference qualifiers
#' @param nba_stats data.frame of nba stats data
#'
#' @return a list of length 1 containg the name slug of the round winner. Match logs
#' for each round are also compiled and assigned to attribute `match_logs` of the output.
play_conference <- function(qualified, nba_stats) {
round_1 <- play_round(
qualified,
nba_stats
)
semis <- play_round(
round_1,
nba_stats
)
finals <- play_round(
semis,
nba_stats
)
return(finals)
}
#' Play a round of matches. Should either be used for matches of teams in a single
#' conference or for the play-offs final.
#'
#' @param teams list of character strings of team slugs of all teams competing in round.
#' @param nba_stats data.frame of nba stats data
#' @param round_name Round name. If `NULL` (default), round name is auto-detected
#' by number of competing teams. For play-off final should be `"Play off Finals"`.
#' @param seed seed to set for lapply function.
#'
#' @return returns a list of the round winners of each match. Match logs are also
#' compiled and appended to the input's logs as attribute `match_logs` of the output
play_round <- function(teams, nba_stats, round_name = NULL, seed = TRUE) {
# Detect round name from number of teams
if (is.null(round_name)) {
round_id <- as.character(length(teams))
round_name <- switch(round_id,
"8" = "Conference Round 1",
"4" = "Conference Semi finals",
"2" = "Conference finals"
)
}
if (round_name == "Play off Finals") {
conf <- NA
conf_msg <- ""
} else {
conf <- unique(nba_stats[nba_stats$slug_team %in% unlist(teams), ]$id_conference)
conf_msg <- paste0("(conf ", conf, ") ")
}
# Signal start of round
cli::cli_h2("{round_name} {conf_msg}started!")
# Create list of round pair match ups
round_pairs <- draw_match_pairs(unlist(teams))
# Use future_lapply to play each match
round_winners <- future.apply::future_lapply(
X = round_pairs,
FUN = play_match,
nba_stats,
round_name,
future.seed = seed
)
# Compile match logs and append them to input match logs attribute
attr(round_winners, "match_logs") <- rbind(
attr(teams, "match_logs"),
compile_match_logs(round_winners)
)
# Signal round completion and return results
cli::cli_h2("{round_name} {conf_msg}COMPLETE!")
return(round_winners)
}
#' Play a single match
#'
#' @param matchup a character vector of length 2 containing the name slugs of teams
#' competing.
#' @param nba_stats data.frame of nba stats data
#' @param round_name Character string. Round name.
#'
#' @return a list of length 1 containg the name slug of the round winner. Match logs are also
#' appended as attribute `match_logs`.
play_match <- function(matchup, nba_stats, round_name) {
if (round_name == "Play off Finals") {
conf <- NA
conf_msg <- ""
} else {
conf <- unique(nba_stats[nba_stats$slug_team %in% matchup, ]$id_conference)
conf_msg <- paste0("(conf ", conf, ") ")
}
# Create list of probabilities for sampling game length
probs <- list(
c(0.9452, 0.0481, 0.0057, 6e-04, 4e-04),
c(0.7007, 0.1452, 0.0902, 0.0413, 0.0225)
)
# Assign higher probabilities for longer games to conference 2
if (length(conf) > 1) {
prob <- probs[[1]]
} else {
prob <- probs[[conf]]
}
pid <- Sys.getpid()
node <- replace_ip(system2("hostname", stdout = TRUE))
# play game
cli::cli_h3("Playing {round_name} {conf_msg}game: {.var {matchup[1]}} VS {.var {matchup[2]}}")
cli::cli_alert_info("Game location: {.val {pid}} ({node})")
# Sample game length
game_length <- sample(c(2.40, 2.65, 2.90, 3.15, 3.40), 1,
prob = prob
)
# Send system to sleep to simulate playing match
Sys.sleep(game_length)
# SAMPLE WINNER from probability of winning stats
# subset nba_stats to only team matchup data
match_df <- nba_stats[nba_stats$slug_team %in% matchup, ]
# sample winner
winner <- sample(match_df$slug_team, 1, prob = match_df$prop_win)
# print messages
cli::cli_alert_info("{matchup[1]} VS {matchup[2]} match complete in {game_length * 50} minutes")
cli::cli_alert_success("Winner: {winner}")
# Compile match information into match logs data.frame and append as attribute.
match_logs <- data.frame(
winner = winner,
team_1 = matchup[1],
team_2 = matchup[2],
pid = pid,
node = node,
game_length = game_length * 50,
date = Sys.time(),
conf = conf,
round_name = round_name
)
attr(winner, "match_logs") <- match_logs
return(winner)
}
#' Play Conference qualifiers
#'
#' @param teams a character vector of team slugs of teams competing in qualifiers
#'
#' @return a character vector of team slugs of qualified teams.
play_qualifiers <- function(teams) {
conf <- unique(teams$id_conference)
# play season
cli::cli_h2("Playing qualifiers for conference {.val {conf}} on {.var {Sys.getpid()}}")
Sys.sleep(5)
# sample qualifiers
qualified <- sample(teams$slug_team, 8, prob = teams$prop_win)
cli::cli_alert_success("Conference {.val {conf}} qualifying round complete")
return(qualified)
}
#' Draw pair matches.
#'
#' @param teams a character vector of team slugs of teams competing in round
#'
#' @return a list half the size of `teams` containing match pairs.
draw_match_pairs <- function(teams) {
n_teams <- length(teams)
matches <- sample(rep(1:(n_teams / 2), each = 2), size = n_teams)
match_pairs <- split(teams, matches)
return(match_pairs)
}
# Compile and order match_logs from lists of results
compile_match_logs <- function(x) {
logs_list <- lapply(x, function(x) {
attr(x, "match_logs")
})
logs <- do.call(rbind, logs_list)
logs[order(logs$date), ]
}
# Function replaces local IP addresses with fake IP address
replace_ip <- function(hostname) {
# Regular expression for IPv4
ipv4_pattern <- "\\b(?:[0-9]{1,3}\\.){3}[0-9]{1,3}\\b"
# Regular expression for IPv6
ipv6_pattern <- "\\b(?:[A-Fa-f0-9]{1,4}:){7}[A-Fa-f0-9]{1,4}\\b"
# Replace detected IPv4 addresses with a fake address
hostname <- gsub(ipv4_pattern, "192.0.2.0", hostname)
# Replace detected IPv6 addresses with a fake address
hostname <- gsub(ipv6_pattern, "2001:db8::", hostname)
hostname
}
# Load Libraries ----
library(future.apply)
# Source function ----
source(here::here("nba", "R", "playoff-future_apply-functions.R"))
# Load data ----
nba_stats <- readr::read_csv(
here::here(
"nba", "data",
"nba_stats_summaries_19-21.csv"
)
)
# Play qualifiers ----
tictoc::tic(msg = "Total Play-offs Duration")
cli::cli_h1("Qualifiers have begun!")
split_confs <- split(nba_stats,
f = nba_stats$id_conference
)
plan(multisession)
qualified_confs <- future_lapply(
split_confs,
play_qualifiers,
future.seed = 5
)
cli::cli_h2("ALL Qualifying matches COMPLETE!")
# Play Conference Rounds ----
cli::cli_h1("Conference Rounds have begun!")
# Set up parallel plan
outer_cores <- 2L
inner_cores <- parallelly::availableCores(
omit = outer_cores
) %/% outer_cores
plan(list(
tweak(multisession, workers = outer_cores),
tweak(multisession, workers = I(inner_cores))
))
conf_winners <- future_lapply(
X = qualified_confs,
FUN = play_conference,
nba_stats,
future.seed = 8
)
attr(conf_winners, "match_logs") <- compile_match_logs(conf_winners)
cli::cli_h2("ALL Conference matches COMPLETE!")
# Play Play-offs FINAL ----
cli::cli_h1("Overall Playoff final has begun!")
plan(multisession)
playoff_winner <- play_round(conf_winners,
nba_stats,
round_name = "Play off Finals",
seed = 7
)
# Announce Winner!
winner_stats <- nba_stats[nba_stats$slug_team == playoff_winner, ]
cli::cli_h1("Playoffs complete!")
cli::cli_alert_success("Winner: {.field {winner_stats$name_team}} ({winner_stats$slug_team})")
tictoc::toc()
# Write match logs to csv ----
fs::dir_create(here::here("nba", "outputs"))
write.csv(attr(playoff_winner, "match_logs"),
file = here::here("nba", "outputs", "playoff_results.csv"),
row.names = FALSE
)
library(future.apply)
# Source function ----
source(here::here("nba", "R", "playoff-future_apply-functions.R"))
nba_stats <- readr::read_csv(
here::here(
"nba", "data",
"nba_stats_summaries_19-21.csv"
)
)
# Play qualifiers ----
tictoc::tic(msg = "Total Play-offs Duration")
cli::cli_h1("Qualifiers have begun!")
split_confs <- split(nba_stats,
f = nba_stats$id_conference
)
plan(multisession)
qualified_confs <- future.apply::future_lapply(
split_confs,
play_qualifiers,
future.seed = 5
)
cli::cli_h2("ALL Qualifying matches COMPLETE!")
# Play Conference Rounds ----
cli::cli_h1("Conference Rounds have begun!")
# Set up nested parallel plan
plan(list(
tweak(future.batchtools::batchtools_slurm,
template = here::here("nba", "slurm", "batchtools.slurm.tmpl"),
resources = list(
ncpus = 4,
memory = "1GB",
walltime = 180
)
),
multisession
))
conf_winners <- future_lapply(
X = qualified_confs,
FUN = play_conference,
nba_stats,
future.seed = 8
)
attr(conf_winners, "match_logs") <- compile_match_logs(conf_winners)
cli::cli_h2("ALL Conference matches COMPLETE!")
# Play Play-offs FINAL ----
cli::cli_h1("Overall Playoff final has begun!")
plan(multisession)
playoff_winner <- play_round(conf_winners,
nba_stats,
round_name = "Play off Finals",
seed = 7
)
# Announce Winner!
winner_stats <- nba_stats[nba_stats$slug_team == playoff_winner, ]
cli::cli_h1("Playoffs complete!")
cli::cli_alert_success("Winner: {.field {winner_stats$name_team}} ({winner_stats$slug_team})")
tictoc::toc()
# Write match logs to csv ----
fs::dir_create(here::here("nba", "outputs"))
write.csv(attr(playoff_winner, "match_logs"),
file = here::here("nba", "outputs", "playoff_results.csv"),
row.names = FALSE
)