Back to Article
MixSIAR results plotting and model performance - design specific mixtures
Download Source

MixSIAR results plotting and model performance - design specific mixtures

Author

Alex Koiter

Load Libraries

In [1]:
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.0     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(scoringRules)
library(patchwork)
library(viridis)
Loading required package: viridisLite
library(gt)

Load data

In [2]:
grid <- read_csv(here::here("./notebooks/Mixtures_grid/grid_final_results.csv")) %>%
  mutate(sampling_design = "Grid")
Rows: 63000 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): model_filename, run
dbl (4): prop_forest, alpha.prior, Agriculture, Forest
lgl (4): mix, source, discr, model

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
transect <- read_csv(here::here("./notebooks/Mixtures_transect/transect_final_results.csv")) %>%
  mutate(sampling_design = "Transect")
Rows: 63000 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): model_filename, run
dbl (4): prop_forest, alpha.prior, Agriculture, Forest
lgl (4): mix, source, discr, model

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
likely <- read_csv(here::here("./notebooks/Mixtures_likely/likely_final_results.csv")) %>%
  mutate(sampling_design = "Likely to erode")
Rows: 63000 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): model_filename, run
dbl (4): prop_forest, alpha.prior, Agriculture, Forest
lgl (4): mix, source, discr, model

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all <- grid %>%
  bind_rows(transect, likely) %>%
  mutate(sampling_design = fct_relevel(sampling_design, "Grid", "Transect", "Likely to erode"))

Plotting

In [3]:
supfig6 <- ggplot(data = all, aes(x = as.factor(prop_forest), y = Forest, fill = sampling_design)) +
  geom_boxplot(size = 0.1, outlier.size = 0.1) +
  theme_bw() +
  scale_y_continuous(expand = c(0,0.01)) +
  labs(y = "Modelled Forest Proportion", x ="Virtual Mixture Forest Proportion") +
  theme(legend.position = "bottom", legend.title = element_blank()) +
  scale_fill_viridis_d()
supfig6

saveRDS(supfig6, file = here::here("./images/supfig6.rds"))
# ggsave(plot = mixing_plot1, filename = "Figures R1/Mixture_design_boxplot.png", width = 174, height = 100, units = "mm", dpi =600)
In [4]:
supfig7 <- ggplot(data = all, aes(x = as.factor(prop_forest), y = Forest - prop_forest, fill = sampling_design)) +
  geom_hline(yintercept = 0) +
  geom_boxplot(size = 0.1, outlier.size = 0.1) +
  theme_bw() +
  scale_y_continuous(expand = c(0,0.01), limits = c(-0.5, 0.5)) +
  labs(y = "Proportion Difference (Modelled - Mixture)", x ="Virtual Mixture Forest Proportion") +
  theme(legend.position = "bottom", legend.title = element_blank()) +
  scale_fill_viridis_d() +
  annotate(geom = "text", x = "0.5", y = 0.5, label = "Over estimation", vjust = 1) +
  annotate(geom = "text", x = "0.5", y = -0.5, label = "Under estimation", vjust = 0)
supfig7

saveRDS(supfig7, file = here::here("./images/supfig7.rds"))
# ggsave(plot = mixing_plot2, filename = "Figures R1/Mixture_design_diff_boxplot.png", width = 174, height = 100, units = "mm", dpi =600)
In [5]:
plot1 <- all %>%
  select(prop_forest, Agriculture, Forest, sampling_design) %>%
  pivot_longer(cols = c(Agriculture,  Forest), names_to = "source", values_to = "value") %>%
  group_by(prop_forest, sampling_design, source) %>%
  summarise(`0.5 quantile` = median(value),
            `0.25 quantile` = quantile(value, 0.25),
            `0.75 quantile` = quantile(value, 0.75),
            `0.975 quantile` = quantile(value, 0.975),
            `0.025 quantile` = quantile(value, 0.025)) %>%
  pivot_longer(cols = `0.5 quantile`:`0.025 quantile`, names_to = "quartile", values_to = "value") %>%
  #filter(source == "Forest") %>%
  pivot_wider(names_from = sampling_design, values_from = value)
`summarise()` has grouped output by 'prop_forest', 'sampling_design'. You can
override using the `.groups` argument.
plot1 %>%
  group_by(source, quartile) %>%
  summarise(r = round(cor(Transect, `Likely to erode`), 3)) 
`summarise()` has grouped output by 'source'. You can override using the
`.groups` argument.
# A tibble: 10 × 3
# Groups:   source [2]
   source      quartile           r
   <chr>       <chr>          <dbl>
 1 Agriculture 0.025 quantile 0.999
 2 Agriculture 0.25 quantile  1    
 3 Agriculture 0.5 quantile   1    
 4 Agriculture 0.75 quantile  1    
 5 Agriculture 0.975 quantile 1    
 6 Forest      0.025 quantile 1    
 7 Forest      0.25 quantile  1    
 8 Forest      0.5 quantile   1    
 9 Forest      0.75 quantile  1    
10 Forest      0.975 quantile 0.999
plot1 %>%
  group_by(source, quartile) %>%
  summarise(r = round(cor(Grid, `Likely to erode`), 3)) 
`summarise()` has grouped output by 'source'. You can override using the
`.groups` argument.
# A tibble: 10 × 3
# Groups:   source [2]
   source      quartile           r
   <chr>       <chr>          <dbl>
 1 Agriculture 0.025 quantile 1    
 2 Agriculture 0.25 quantile  1    
 3 Agriculture 0.5 quantile   1    
 4 Agriculture 0.75 quantile  1    
 5 Agriculture 0.975 quantile 0.999
 6 Forest      0.025 quantile 0.999
 7 Forest      0.25 quantile  1    
 8 Forest      0.5 quantile   1    
 9 Forest      0.75 quantile  1    
10 Forest      0.975 quantile 1    
plot1 %>%
  group_by(source, quartile) %>%
  summarise(r = round(cor(Transect, Grid), 3)) 
`summarise()` has grouped output by 'source'. You can override using the
`.groups` argument.
# A tibble: 10 × 3
# Groups:   source [2]
   source      quartile           r
   <chr>       <chr>          <dbl>
 1 Agriculture 0.025 quantile 0.999
 2 Agriculture 0.25 quantile  1    
 3 Agriculture 0.5 quantile   1    
 4 Agriculture 0.75 quantile  1    
 5 Agriculture 0.975 quantile 0.999
 6 Forest      0.025 quantile 0.999
 7 Forest      0.25 quantile  1    
 8 Forest      0.5 quantile   1    
 9 Forest      0.75 quantile  1    
10 Forest      0.975 quantile 0.999
p1 <- ggplot(data = plot1, aes(x = Grid, y = Transect, colour = source)) +
  geom_point() +
  theme_bw() +
  geom_abline(slope = 1) +
  labs(x = "Grid", y = "Transect") +
  scale_y_continuous(expand = c(0,0.01)) +
  scale_x_continuous(expand = c(0,0.01)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_colour_manual(values = c("#C5B477", "#439078")) +
  facet_wrap(~quartile, ncol = 5)

p2 <- ggplot(data = plot1, aes(x = Grid, y = `Likely to erode`, colour = source)) +
  geom_point() +
  theme_bw() +
  geom_abline(slope = 1) +
  labs(x = "Grid", y = "Likely to erode") +
  scale_y_continuous(expand = c(0,0.01)) +
  scale_x_continuous(expand = c(0,0.01)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_colour_manual(values = c("#C5B477", "#439078")) +
  facet_wrap(~quartile, ncol = 5)

p3 <- ggplot(data = plot1, aes(x = Transect, y = `Likely to erode`, colour = source)) +
  geom_point() +
  theme_bw() +
  geom_abline(slope = 1) +
  labs(x = "Transect",y = "Likely to erode") +
  scale_y_continuous(expand = c(0,0.01)) +
  scale_x_continuous(expand = c(0,0.01)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_colour_manual(values = c("#C5B477", "#439078")) +
  facet_wrap(~quartile, ncol = 5)

mixing_plot3 <- p1 / p2 / p3 + plot_layout(guides = "collect") & theme(legend.position = 'bottom', legend.title = element_blank())
mixing_plot3

# ggsave(plot = mixing_plot3, filename = "Figures R1/Mixture_design_1to1.png", width = 174, height = 200, units = "mm", dpi =600)

Model performance

In [6]:
summary_1 <- all %>%
  select(prop_forest, Agriculture, Forest, sampling_design) %>%
  pivot_longer(cols = c(Agriculture,  Forest), names_to = "source", values_to = "value") %>%
  group_by(sampling_design, prop_forest, source) %>%
  summarise(median = median(value),
            q25 = quantile(value, 0.25),
            q75 = quantile(value, 0.75),
            W50 = quantile(value, 0.75) - quantile(value, 0.25),
            W95 = quantile(value, 0.975) - quantile(value, 0.025),
            P50 = sum(value[value < quantile(value, 0.75) & value > quantile(value, 0.25)])/n(),
            P95 = sum(value[value < quantile(value, 0.975) & value > quantile(value, 0.025)])/n()) %>%
  group_by(sampling_design, source) %>%
  summarise(W50 = mean(W50),
            W95 = mean(W95),
            P50 = mean(P50),
            P95 = mean(P95))
`summarise()` has grouped output by 'sampling_design', 'prop_forest'. You can
override using the `.groups` argument.
`summarise()` has grouped output by 'sampling_design'. You can override using
the `.groups` argument.
summary_2 <- all %>%
  mutate(prop_ag = 1 - prop_forest) %>%
  group_by(prop_forest, sampling_design) %>%
  summarise(CRPS_forest = crps_sample(y = unique(prop_forest), dat = Forest),
            CRPS_ag = crps_sample(y = unique(prop_ag), dat = Agriculture)) %>%
  group_by(sampling_design) %>%
  summarise(Forest = mean(CRPS_forest),
            Agriculture = mean(CRPS_ag)) %>%
  pivot_longer(cols = c(Forest, Agriculture), names_to = "source", values_to = "CRPS")
`summarise()` has grouped output by 'prop_forest'. You can override using the
`.groups` argument.
CRPS_plot <- all %>%
  mutate(prop_ag = 1 - prop_forest) %>%
  group_by(prop_forest, prop_ag, sampling_design) %>%
  summarise(Forest = crps_sample(y = unique(prop_forest), dat = Forest),
            Agriculture = crps_sample(y = unique(prop_ag), dat = Agriculture)) 
`summarise()` has grouped output by 'prop_forest', 'prop_ag'. You can override
using the `.groups` argument.
In [7]:
supfig9 <- ggplot(data = CRPS_plot, aes(x = prop_forest, y = Forest, fill = sampling_design)) +
  geom_line(aes(colour = sampling_design)) +
  geom_point(shape = 21) +
  theme_bw() +
  scale_y_continuous(expand = c(0,0.01)) +
  scale_x_continuous(expand = c(0,0.01)) +
  scale_fill_viridis_d() +
  scale_colour_viridis_d() +
  theme(legend.position = "bottom", legend.title = element_blank()) +
  labs(x = "Virtual Mixture Forest Proportion", y = "CRPS") 
supfig9

saveRDS(supfig9, file = here::here("./images/supfig9.rds"))
# ggsave(plot = mixing_plot4, filename = "Figures R1/Mixture_design_CRPS.png", width = 84, height = 84, units = "mm", dpi =600)
In [8]:
summary_3 <- all %>%
  select(prop_forest, Agriculture, Forest, sampling_design) %>%
  #mutate(prop_ag = 1 - prop_forest) %>%
  pivot_longer(cols = c(Agriculture,  Forest), names_to = "source", values_to = "value") %>%
  #pivot_longer(cols = c(prop_forest,  prop_ag), names_to = "proportion", values_to = "actual") %>%
  group_by(sampling_design, source, prop_forest) %>%
  mutate(error50 = ifelse(value >= quantile(value, 0.25) & value <= quantile(value, 0.75), 0,
                     ifelse(value < quantile(value, 0.25), quantile(value, 0.25) - value,
                            quantile(value, 0.75) - value)),
         error95 = ifelse(value >= quantile(value, 0.025) & value <= quantile(value, 0.975), 0,
                          ifelse(value < quantile(value, 0.025), quantile(value, 0.025) - value,
                                 quantile(value, 0.975) - value)),
         Main = value > .5,
         Main50 = quantile(value, 0.75) > .5,
         Main95 = quantile(value, 0.975) > .5,
         Hit50 = ifelse(Main == T & Main50 == T, T, F),
         Hit95 = ifelse(Main == T & Main95 == T, T, F),
         Miss50 = ifelse(Main == T & Main50 == F, T, F),
         Miss95 = ifelse(Main == T & Main95 == F, T, F),
         FA50 = ifelse(Main == F & Main50 == T, T, F),
         FA95 = ifelse(Main == F & Main95 == T, T, F)) %>%
  group_by(sampling_design, source) %>%
  summarise(MAE50 = mean(abs(error50)),
            MAE95 = mean(abs(error95)),
            ME50 = mean(error50),
            ME95 = mean(error95),
            NSE50 = 1 - (sum(error50^2)) / sum((value - mean(value))^2),
            NSE95 = 1 - (sum(error95^2)) / sum((value - mean(value))^2),
            CSI50 = sum(Hit50)/(sum(Hit50) + sum(Miss50) + sum(FA50)),
            CSI95 = sum(Hit95)/(sum(Hit95) + sum(Miss95) + sum(FA95)),
            HR50 = sum(Hit50) / (sum(Hit50) + sum(Miss50)),
            HR95 = sum(Hit95) / (sum(Hit95) + sum(Miss95)))
`summarise()` has grouped output by 'sampling_design'. You can override using
the `.groups` argument.
summary_all <- summary_3 %>%
  right_join(summary_2) %>%
  right_join(summary_1) %>% 
  ungroup() %>%
  mutate(analyis = paste(sampling_design, source)) %>%
  select(-sampling_design, -source) %>%
  pivot_longer(cols = MAE50:P95, names_to = "Parameter", values_to = "value") %>%
  mutate(value = format(round(value, 2), digits = 2)) %>%
  pivot_wider(names_from = analyis, values_from = value) %>%
  mutate(`Evaluation criteria` = case_when(Parameter %in% c("P50", "P95", "W50", "W95") ~ "Uncertainty",
                                           Parameter %in% c("MAE50", "MAE95", "ME50", "ME95") ~ "Residuals",
                                           Parameter %in% c("CRPS", "NSE50", "NSE95") ~ "Performance",
                                           Parameter %in% c("CSI50", "CSI95", "HR50", "HR95") ~ "Contingency"))
Joining with `by = join_by(sampling_design, source)`
Joining with `by = join_by(sampling_design, source)`
summary_all
# A tibble: 15 × 8
   Parameter `Grid Agriculture` `Grid Forest` `Transect Agriculture`
   <chr>     <chr>              <chr>         <chr>                 
 1 MAE50     0.01               0.01          0.02                  
 2 MAE95     0.00               0.00          0.00                  
 3 ME50      0.00               0.00          0.00                  
 4 ME95      0.00               0.00          0.00                  
 5 NSE50     0.99               0.99          0.99                  
 6 NSE95     1.00               1.00          1.00                  
 7 CSI50     0.93               0.92          0.91                  
 8 CSI95     0.82               0.86          0.81                  
 9 HR50      0.98               0.99          0.97                  
10 HR95      1.00               1.00          1.00                  
11 CRPS      0.02               0.02          0.02                  
12 W50       0.06               0.06          0.08                  
13 W95       0.18               0.18          0.24                  
14 P50       0.25               0.25          0.25                  
15 P95       0.48               0.47          0.48                  
# ℹ 4 more variables: `Transect Forest` <chr>,
#   `Likely to erode Agriculture` <chr>, `Likely to erode Forest` <chr>,
#   `Evaluation criteria` <chr>
In [9]:
suptab7 <- summary_all |>
  group_by(`Evaluation criteria`) |>
  gt()|>
  tab_style(style =  cell_text(weight = "bold", align = "center"), locations =  cells_row_groups()) |>
  tab_options(column_labels.font.weight = "bold")
suptab7
Parameter Grid Agriculture Grid Forest Transect Agriculture Transect Forest Likely to erode Agriculture Likely to erode Forest
Residuals
MAE50 0.01 0.01 0.02 0.02 0.01 0.01
MAE95 0.00 0.00 0.00 0.00 0.00 0.00
ME50 0.00 0.00 0.00 0.00 0.00 0.00
ME95 0.00 0.00 0.00 0.00 0.00 0.00
Performance
NSE50 0.99 0.99 0.99 0.99 0.99 0.99
NSE95 1.00 1.00 1.00 1.00 1.00 1.00
CRPS 0.02 0.02 0.02 0.02 0.01 0.01
Contingency
CSI50 0.93 0.92 0.91 0.91 0.93 0.93
CSI95 0.82 0.86 0.81 0.80 0.87 0.87
HR50 0.98 0.99 0.97 0.98 0.99 0.99
HR95 1.00 1.00 1.00 1.00 1.00 1.00
Uncertainty
W50 0.06 0.06 0.08 0.08 0.06 0.06
W95 0.18 0.18 0.24 0.24 0.18 0.18
P50 0.25 0.25 0.25 0.25 0.25 0.25
P95 0.48 0.47 0.48 0.47 0.47 0.48
saveRDS(suptab7, file = here::here("./images/suptab7.rds"))