---
title: "Visualize Sports Injury Data"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{visualize-injury-data}
%\VignetteEncoding{UTF-8}
%\VignetteEngine{knitr::rmarkdown}
editor_options:
chunk_output_type: console
---
```{r, include = FALSE}
library(knitr)
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
options(rmarkdown.html_vignette.check_title = FALSE) # to supress R-CMD check
## to fold/hook the code
hook_output <- knit_hooks$get("output")
knit_hooks$set(output = function(x, options) {
lines <- options$output.lines
if (is.null(lines)) {
return(hook_output(x, options)) # pass to default hook
}
x <- unlist(strsplit(x, "\n"))
more <- "..."
if (length(lines) == 1) {
if (length(x) > lines) {
# truncate the output, but add ....
x <- c(head(x, lines), more)
}
} else {
x <- c(if (abs(lines[1]) > 1) more else NULL,
x[lines],
if (length(x) > lines[abs(length(lines))]) more else NULL
)
}
# paste these lines together
x <- paste(c(x, ""), collapse = "\n")
hook_output(x, options)
})
modern_r <- getRversion() >= "4.1.0"
```
```{r setup, message = F}
library(injurytools)
library(ggplot2)
library(dplyr)
library(gridExtra)
library(grid)
library(knitr)
```
**Example data**: we continue exploring the cohort of Liverpool Football Club male's first team players over two consecutive seasons, 2017-2018 and 2018-2019, scrapped from https://www.transfermarkt.com/ website[^visualize-note-1].
[^visualize-note-1]: These data sets are provided for illustrative purposes. We warn that they might not be accurate and could potentially include discrepancies or incomplete information compared to what actually occurred.
# A quick glance
```{r, fig.width = 13.7, fig.height = 7}
gg_photo(injd,
title = "Overview of injuries:\nLiverpool FC 1st male team during 2017-2018 and 2018-2019 seasons",
by_date = "2 month",
fix = TRUE) +
## plus some lines of ggplot2 code..
xlab("Follow-up date") + ylab("Players") + labs(caption = "source: transfermarkt.com") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 22),
axis.text.x.bottom = element_text(size = 13, angle = 20, hjust = 1),
axis.text.y.left = element_text(size = 12),
axis.title.x = element_text(size = 20, face = "bold", vjust = -1),
axis.title.y = element_text(size = 20, face = "bold", vjust = 1.8),
legend.text = element_text(size = 20),
plot.caption = element_text(face = "italic", size = 12, colour = "gray10"))
```
Let's count how many injuries (red crosses in the graph) occurred and how severe they were (length of the thick black line).
```{r, warning = FALSE}
# warnings set to FALSE
df_summary <- calc_summary(injd)
df_summary_perinj <- calc_summary(injd, by = "injury_type")
# injds
```
Code for tidying up the tables
```{r, eval = F}
df_summary |>
mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
burden_new = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |>
dplyr::select(2, 7, 1, incidence_new, burden_new) |>
kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
caption = "Injury incidence and injury burden are reported as 100 player-matches",
align = "c")
df_summary_perinj |>
mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
burden_new = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |>
dplyr::select(1:2, 9, 4, incidence_new, burden_new) |>
kable(col.names = c("Type of injury", "N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
caption = "Injury incidence and injury burden are reported as 100 player-matches",
align = "c")
```
**Overall**
```{r, echo = F, eval = modern_r}
df_summary |>
mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
burden_new = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |>
dplyr::select(2, 7, 1, incidence_new, burden_new) |>
kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
caption = "Injury incidence and injury burden are reported as 100 player-matches",
align = "c")
```
**Overall per type of injury**
```{r, echo = F, eval = modern_r}
df_summary_perinj |>
mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
burden_new = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |>
dplyr::select(1:2, 9, 4, incidence_new, burden_new) |>
kable(col.names = c("Type of injury", "N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
caption = "Injury incidence and injury burden are reported as 100 player-matches",
align = "c")
```
Let's plot the information shown in the second table in a risk matrix that displays injury incidence against injury burden.
```{r, eval = F}
# warnings set to FALSE
gg_riskmatrix(injd,
by = "injury_type",
title = "Risk matrix")
```
Code for further plot specifications
```{r, eval = F}
# warnings set to FALSE
palette <- c("#000000", Ligament = "#E69F00", Muscle = "#56B4E9", "#009E73",
Unknown = "#F0E442", "#0072B2", Bone = "#D55E00", Concussion = "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
theme3 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
axis.text.x.bottom = element_text(size = 20),
axis.text.y.left = element_text(size = 20),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
legend.title = element_text(size = 15),
legend.text = element_text(size = 15))
gg_riskmatrix(injd,
by = "injury_type",
title = "Risk matrix") +
scale_fill_manual(name = "Type of injury",
values = palette) +
guides(fill = guide_legend(override.aes = list(size = 5))) +
theme3
```
```{r, echo = F, fig.width = 9, fig.height = 5.8, warning = F}
palette <- c("#000000", Ligament = "#E69F00", Muscle = "#56B4E9", "#009E73",
Unknown = "#F0E442", "#0072B2", Bone = "#D55E00", Concussion = "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
theme3 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
axis.text.x.bottom = element_text(size = 18),
axis.text.y.left = element_text(size = 18),
axis.title.x = element_text(size = 18),
axis.title.y = element_text(size = 18),
legend.title = element_text(size = 15),
legend.text = element_text(size = 15))
gg_riskmatrix(injd,
by = "injury_type",
title = "Risk matrix") +
scale_fill_manual(name = "Type of injury",
values = palette) +
guides(fill = guide_legend(override.aes = list(size = 5))) +
theme3
```
# Comparing injuries occurred in 17/18 vs. 18/19
We prepare two `injd` objects:
```{r, warning = F}
# warnings set to FALSE
injd1 <- cut_injd(injd, datef = 2017)
injd2 <- cut_injd(injd, date0 = 2018)
```
```{r, eval = F}
## Plot just for checking whether cut_injd() worked well
p1 <- gg_photo(injd1, fix = TRUE, by_date = "3 months")
p2 <- gg_photo(injd2, fix = TRUE, by_date = "3 months")
grid.arrange(p1, p2, ncol = 2)
```
```{r, echo = F, fig.width = 18, fig.height = 4}
p1 <- gg_photo(injd1, fix = TRUE, by_date = "3 months")
p1$layers[[3]]$aes_params$size <- 2
p2 <- gg_photo(injd2, fix = TRUE, by_date = "3 months")
p2$layers[[3]]$aes_params$size <- 2
grid.arrange(p1, p2, ncol = 2)
```
Let's compute injury summary statistics for each season.
```{r, warning = FALSE}
# warnings set to FALSE
df_summary1 <- calc_summary(injd1, quiet = T)
df_summary2 <- calc_summary(injd2, quiet = T)
```
Code for tidying up the tables
```{r, eval = F}
## **Season 2017/2018**
df_summary1 |>
mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
burden_new = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |>
dplyr::select(2, 7, 1, incidence_new, burden_new) |>
kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
caption = "Injury incidence and injury burden are reported as 100 player-matches",
align = "c")
## **Season 2018/2019**
df_summary2 |>
mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
burden_new = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |>
dplyr::select(2, 7, 1, incidence_new, burden_new) |>
kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
caption = "Injury incidence and injury burden are reported as 100 player-matches",
align = "c")
```
**Season 2017/2018**
```{r, echo = F, eval = modern_r}
df_summary1 |>
mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
burden_new = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |>
dplyr::select(2, 7, 1, incidence_new, burden_new) |>
kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
caption = "Injury incidence and injury burden are reported as 100 player-matches",
align = "c")
```
**Season 2018/2019**
```{r, echo = F, eval = modern_r}
df_summary2 |>
mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
burden_new = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |>
dplyr::select(2, 7, 1, incidence_new, burden_new) |>
kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
caption = "Injury incidence and injury burden are reported as 100 player-matches",
align = "c")
```
## - Who were the most injured players? And the most severely affected?
Player-wise statistics can be computed by `df_summay1_pl <- calc_summary(injd1, overall = FALSE)`. Then, we plot them:
```{r}
p11 <- gg_rank(injd1, line_overall = TRUE)
p12 <- gg_rank(injd1, summary_stat = "burden", line_overall = TRUE)
p21 <- gg_rank(injd2, line_overall = TRUE)
p22 <- gg_rank(injd2, summary_stat = "burden", line_overall = TRUE)
# grid.arrange(p11, p21, p12, p22, nrow = 2)
```
Code for further plot specifications
```{r, eval = F}
theme2 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 26),
axis.text.x.bottom = element_text(size = 18),
axis.text.y.left = element_text(size = 13),
axis.title.x = element_text(size = 11, vjust = 1),
axis.title.y = element_text(size = 22, face = "bold", vjust = 1))
p11 <- p11 +
xlab("Injury incidence") +
ylab("Player-wise incidence (injuries per 100 player-match)") +
ggtitle("2017/2018 season") +
scale_y_continuous(limits = c(0, 80)) + ## same x axis
theme2 +
theme(plot.margin = margin(0.2, 0.2, 0.2, 0.5, "cm"))
p12 <- p12 +
xlab("Injury burden") +
ylab("Player-wise burden (days lost per 100 player-match)") +
scale_y_continuous(limits = c(0, 6110)) +
theme2 +
theme(plot.margin = margin(0.2, 0.2, 0.2, 0.65, "cm"))
p21 <- p21 +
ylab("Player-wise incidence (injuries per 100 player-match)") +
ggtitle("2018/2019 season") +
scale_y_continuous(limits = c(0, 80)) +
theme2
p22 <- p22 +
ylab("Player-wise burden (days lost per 100 player-match)") +
scale_y_continuous(limits = c(0, 6110)) +
theme2
grid.arrange(p11, p21, p12, p22, nrow = 2)
```
```{r, echo = F, fig.width = 14, fig.height = 11.8}
theme2 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 26),
axis.text.x.bottom = element_text(size = 18),
axis.text.y.left = element_text(size = 13),
axis.title.x = element_text(size = 11, vjust = 1),
axis.title.y = element_text(size = 22, face = "bold", vjust = 1))
p11 <- p11 +
xlab("Injury incidence") +
ylab("Player-wise incidence (injuries per 100 player-match)") +
ggtitle("2017/2018 season") +
scale_y_continuous(limits = c(0, 80)) + ## same x axis
theme2 +
theme(plot.margin = margin(0.2, 0.2, 0.2, 0.5, "cm"))
p12 <- p12 +
xlab("Injury burden") +
ylab("Player-wise burden (days lost per 100 player-match)") +
scale_y_continuous(limits = c(0, 6110)) +
theme2 +
theme(plot.margin = margin(0.2, 0.2, 0.2, 0.65, "cm"))
p21 <- p21 +
ylab("Player-wise incidence (injuries per 100 player-match)") +
ggtitle("2018/2019 season") +
scale_y_continuous(limits = c(0, 80)) +
theme2
p22 <- p22 +
ylab("Player-wise burden (days lost per 100 player-match)") +
scale_y_continuous(limits = c(0, 6110)) +
theme2
grid.arrange(p11, p21, p12, p22, nrow = 2)
```
## - Which injuries were more frequent? And more burdensome?
```{r, warning = F}
# warnings set to FALSE
p1 <- gg_riskmatrix(injd1, by = "injury_type",
title = "Season 2017/2018", add_contour = FALSE)
p2 <- gg_riskmatrix(injd2, by = "injury_type",
title = "Season 2018/2019", add_contour = FALSE)
# Print both plots side by side
# grid.arrange(p1, p2, nrow = 1)
```
Code for further plot specifications
```{r, eval = F}
palette <- c("#000000", Ligament = "#E69F00", Muscle = "#56B4E9", "#009E73",
Unknown = "#F0E442", "#0072B2", Bone = "#D55E00", Concussion = "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
theme3 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
axis.text.x.bottom = element_text(size = 18),
axis.text.y.left = element_text(size = 18),
axis.title.x = element_text(size = 18),
axis.title.y = element_text(size = 18),
legend.title = element_text(size = 15),
legend.text = element_text(size = 15))
## Plot
p1 <- gg_riskmatrix(injd1, by = "injury_type",
title = "Season 2017/2018", add_contour = T,
cont_max_x = 5.2, cont_max_y = 125, ## after checking the data
bins = 10)
p2 <- gg_riskmatrix(injd2, by = "injury_type",
title = "Season 2018/2019", add_contour = T,
cont_max_x = 5.2, cont_max_y = 125,
bins = 10)
p1 <- p1 +
scale_x_continuous(limits = c(-0.05, 5.2)) +
scale_y_continuous(limits = c(-0.05, 125)) +
scale_fill_manual(name = "Type of injury",
values = palette) +
guides(fill = guide_legend(override.aes = list(size = 5))) +
theme3
p2 <- p2 +
scale_x_continuous(limits = c(-0.5, 5.2)) +
scale_y_continuous(limits = c(-0.5, 125)) +
scale_fill_manual(name = "Type of injury",
values = palette) + # keep the same color coding
guides(fill = guide_legend(override.aes = list(size = 5))) +
theme3
grid.arrange(p1, p2, ncol = 2,
top = textGrob("Risk matrices", gp = gpar(fontsize = 26, font = 2))) ## for the main title
```
```{r, echo = F, fig.width = 13, fig.height = 5.8, warning = FALSE, message = FALSE}
palette <- c("#000000", Ligament = "#E69F00", Muscle = "#56B4E9", "#009E73",
Unknown = "#F0E442", "#0072B2", Bone = "#D55E00", Concussion = "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
theme3 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
axis.text.x.bottom = element_text(size = 18),
axis.text.y.left = element_text(size = 18),
axis.title.x = element_text(size = 18),
axis.title.y = element_text(size = 18),
legend.title = element_text(size = 15),
legend.text = element_text(size = 15))
## Plot
p1 <- gg_riskmatrix(injd1, by = "injury_type",
title = "Season 2017/2018", add_contour = T,
cont_max_x = 5.2, cont_max_y = 125, ## after checking the data
bins = 10)
p2 <- gg_riskmatrix(injd2, by = "injury_type",
title = "Season 2018/2019", add_contour = T,
cont_max_x = 5.2, cont_max_y = 125,
bins = 10)
p1 <- p1 +
scale_x_continuous(limits = c(-0.05, 5.2)) +
scale_y_continuous(limits = c(-0.05, 125)) +
scale_fill_manual(name = "Type of injury",
values = palette) + # get rid off the green (pos: 4)
guides(fill = guide_legend(override.aes = list(size = 5))) +
theme3
p2 <- p2 +
scale_x_continuous(limits = c(-0.5, 5.2)) +
scale_y_continuous(limits = c(-0.5, 125)) +
scale_fill_manual(name = "Type of injury",
values = palette) + # keep the same color coding
guides(fill = guide_legend(override.aes = list(size = 5))) +
theme3
grid.arrange(p1, p2, ncol = 2,
top = textGrob("Risk matrices", gp = gpar(fontsize = 26, font = 2))) ## for the main title
```
## - How many players were injured in each month?
We will create bar plots, with each bar representing the monthly prevalence[^visualize-note-2].
[^visualize-note-2]: See the *Note* section in `?calc_prevalence()` or have a look at this section in [Estimate summary statistics](https://lzumeta.github.io/injurytools/articles/estimate-epi-measures.html#calc_prevalence) vignette, to better understand what the proportions refer to.
```{r, eval = F}
gg_prevalence(injd, time_period = "monthly",
line_mean = TRUE)
```
Code for further plot specifications
```{r, eval = F}
theme4 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
axis.text.x = element_text(size = 13.5),
axis.text.y = element_text(size = 18),
legend.title = element_text(size = 20),
legend.text = element_text(size = 20),
strip.text = element_text(size = 20))
gg_prevalence(injd, time_period = "monthly",
line_mean = TRUE,
title = "Monthly prevalence of sports injuries") +
theme4
```
```{r, echo = F, fig.width = 12.8, fig.height = 5.2}
theme4 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
axis.text.x = element_text(size = 13.5),
axis.text.y = element_text(size = 18),
legend.title = element_text(size = 20),
legend.text = element_text(size = 20),
strip.text = element_text(size = 20))
gg_prevalence(injd, time_period = "monthly",
line_mean = TRUE,
title = "Monthly prevalence of sports injuries") +
theme4
```
```{r, eval = F}
gg_prevalence(injd, time_period = "monthly",
by = "injury_type", line_mean = TRUE)
```
Code for further plot specifications
```{r, eval = F}
palette2 <- c("seagreen3", "#000000", Ligament = "#E69F00", Muscle = "#56B4E9", "#009E73",
Unknown = "#F0E442", "#0072B2", Bone = "#D55E00", Concussion = "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
gg_prevalence(injd, time_period = "monthly",
by = "injury_type", line_mean = TRUE,
title = "Monthly prevalence of each type of sports injuries") +
scale_fill_manual(name = "Type of injury",
values = palette2) +
theme4
```
```{r, echo = F, fig.width = 12.8, fig.height = 5.2}
palette2 <- c("seagreen3", "#000000", Ligament = "#E69F00", Muscle = "#56B4E9", "#009E73",
Unknown = "#F0E442", "#0072B2", Bone = "#D55E00", Concussion = "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
gg_prevalence(injd, time_period = "monthly",
by = "injury_type", line_mean = TRUE,
title = "Monthly prevalence of each type of sports injuries") +
scale_fill_manual(name = "Type of injury",
values = palette2) +
theme4
```