MixSIAR results plotting and model performance

Load Libraries

In [1]:

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.0     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(scoringRules)
library(patchwork)
library(viridis)

Loading required package: viridisLite

library(gt)

Load data

In [2]:

grid <- read_csv(here::here("./notebooks/Mixtures_grid/grid_final_results.csv")) %>%
  mutate(sampling_design = "Grid")

Rows: 63000 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): model_filename, run
dbl (4): prop_forest, alpha.prior, Agriculture, Forest
lgl (4): mix, source, discr, model

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

transect <- read_csv(here::here("./notebooks/Mixtures_transect/transect_final_results.csv")) %>%
  mutate(sampling_design = "Transect")

Rows: 63000 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): model_filename, run
dbl (4): prop_forest, alpha.prior, Agriculture, Forest
lgl (4): mix, source, discr, model

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

likely <- read_csv(here::here("./notebooks/Mixtures_likely/likely_final_results.csv")) %>%
  mutate(sampling_design = "Likely to erode")

Rows: 63000 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): model_filename, run
dbl (4): prop_forest, alpha.prior, Agriculture, Forest
lgl (4): mix, source, discr, model

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

all <- grid %>%
  bind_rows(transect, likely) %>%
  mutate(sampling_design = fct_relevel(sampling_design, "Grid", "Transect", "Likely to erode"))

Plotting

In [3]:

supfig6 <- ggplot(data = all, aes(x = as.factor(prop_forest), y = Forest, fill = sampling_design)) +
  geom_boxplot(size = 0.1, outlier.size = 0.1) +
  theme_bw() +
  scale_y_continuous(expand = c(0,0.01)) +
  labs(y = "Modelled Forest Proportion", x ="Virtual Mixture Forest Proportion") +
  theme(legend.position = "bottom", legend.title = element_blank()) +
  scale_fill_viridis_d()
supfig6

saveRDS(supfig6, file = here::here("./images/supfig6.rds"))
# ggsave(plot = mixing_plot1, filename = "Figures R1/Mixture_design_boxplot.png", width = 174, height = 100, units = "mm", dpi =600)

In [4]:

supfig7 <- ggplot(data = all, aes(x = as.factor(prop_forest), y = Forest - prop_forest, fill = sampling_design)) +
  geom_hline(yintercept = 0) +
  geom_boxplot(size = 0.1, outlier.size = 0.1) +
  theme_bw() +
  scale_y_continuous(expand = c(0,0.01), limits = c(-0.5, 0.5)) +
  labs(y = "Proportion Difference (Modelled - Mixture)", x ="Virtual Mixture Forest Proportion") +
  theme(legend.position = "bottom", legend.title = element_blank()) +
  scale_fill_viridis_d() +
  annotate(geom = "text", x = "0.5", y = 0.5, label = "Over estimation", vjust = 1) +
  annotate(geom = "text", x = "0.5", y = -0.5, label = "Under estimation", vjust = 0)
supfig7

saveRDS(supfig7, file = here::here("./images/supfig7.rds"))
# ggsave(plot = mixing_plot2, filename = "Figures R1/Mixture_design_diff_boxplot.png", width = 174, height = 100, units = "mm", dpi =600)

In [5]:

plot1 <- all %>%
  select(prop_forest, Agriculture, Forest, sampling_design) %>%
  pivot_longer(cols = c(Agriculture,  Forest), names_to = "source", values_to = "value") %>%
  group_by(prop_forest, sampling_design, source) %>%
  summarise(`0.5 quantile` = median(value),
            `0.25 quantile` = quantile(value, 0.25),
            `0.75 quantile` = quantile(value, 0.75),
            `0.975 quantile` = quantile(value, 0.975),
            `0.025 quantile` = quantile(value, 0.025)) %>%
  pivot_longer(cols = `0.5 quantile`:`0.025 quantile`, names_to = "quartile", values_to = "value") %>%
  #filter(source == "Forest") %>%
  pivot_wider(names_from = sampling_design, values_from = value)

`summarise()` has grouped output by 'prop_forest', 'sampling_design'. You can
override using the `.groups` argument.

plot1 %>%
  group_by(source, quartile) %>%
  summarise(r = round(cor(Transect, `Likely to erode`), 3))

`summarise()` has grouped output by 'source'. You can override using the
`.groups` argument.

# A tibble: 10 × 3
# Groups:   source [2]
   source      quartile           r
   <chr>       <chr>          <dbl>
 1 Agriculture 0.025 quantile 0.999
 2 Agriculture 0.25 quantile  1    
 3 Agriculture 0.5 quantile   1    
 4 Agriculture 0.75 quantile  1    
 5 Agriculture 0.975 quantile 1    
 6 Forest      0.025 quantile 1    
 7 Forest      0.25 quantile  1    
 8 Forest      0.5 quantile   1    
 9 Forest      0.75 quantile  1    
10 Forest      0.975 quantile 0.999

plot1 %>%
  group_by(source, quartile) %>%
  summarise(r = round(cor(Grid, `Likely to erode`), 3))

`summarise()` has grouped output by 'source'. You can override using the
`.groups` argument.

# A tibble: 10 × 3
# Groups:   source [2]
   source      quartile           r
   <chr>       <chr>          <dbl>
 1 Agriculture 0.025 quantile 1    
 2 Agriculture 0.25 quantile  1    
 3 Agriculture 0.5 quantile   1    
 4 Agriculture 0.75 quantile  1    
 5 Agriculture 0.975 quantile 0.999
 6 Forest      0.025 quantile 0.999
 7 Forest      0.25 quantile  1    
 8 Forest      0.5 quantile   1    
 9 Forest      0.75 quantile  1    
10 Forest      0.975 quantile 1

plot1 %>%
  group_by(source, quartile) %>%
  summarise(r = round(cor(Transect, Grid), 3))

`summarise()` has grouped output by 'source'. You can override using the
`.groups` argument.

# A tibble: 10 × 3
# Groups:   source [2]
   source      quartile           r
   <chr>       <chr>          <dbl>
 1 Agriculture 0.025 quantile 0.999
 2 Agriculture 0.25 quantile  1    
 3 Agriculture 0.5 quantile   1    
 4 Agriculture 0.75 quantile  1    
 5 Agriculture 0.975 quantile 0.999
 6 Forest      0.025 quantile 0.999
 7 Forest      0.25 quantile  1    
 8 Forest      0.5 quantile   1    
 9 Forest      0.75 quantile  1    
10 Forest      0.975 quantile 0.999

p1 <- ggplot(data = plot1, aes(x = Grid, y = Transect, colour = source)) +
  geom_point() +
  theme_bw() +
  geom_abline(slope = 1) +
  labs(x = "Grid", y = "Transect") +
  scale_y_continuous(expand = c(0,0.01)) +
  scale_x_continuous(expand = c(0,0.01)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_colour_manual(values = c("#C5B477", "#439078")) +
  facet_wrap(~quartile, ncol = 5)

p2 <- ggplot(data = plot1, aes(x = Grid, y = `Likely to erode`, colour = source)) +
  geom_point() +
  theme_bw() +
  geom_abline(slope = 1) +
  labs(x = "Grid", y = "Likely to erode") +
  scale_y_continuous(expand = c(0,0.01)) +
  scale_x_continuous(expand = c(0,0.01)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_colour_manual(values = c("#C5B477", "#439078")) +
  facet_wrap(~quartile, ncol = 5)

p3 <- ggplot(data = plot1, aes(x = Transect, y = `Likely to erode`, colour = source)) +
  geom_point() +
  theme_bw() +
  geom_abline(slope = 1) +
  labs(x = "Transect",y = "Likely to erode") +
  scale_y_continuous(expand = c(0,0.01)) +
  scale_x_continuous(expand = c(0,0.01)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_colour_manual(values = c("#C5B477", "#439078")) +
  facet_wrap(~quartile, ncol = 5)

mixing_plot3 <- p1 / p2 / p3 + plot_layout(guides = "collect") & theme(legend.position = 'bottom', legend.title = element_blank())
mixing_plot3

# ggsave(plot = mixing_plot3, filename = "Figures R1/Mixture_design_1to1.png", width = 174, height = 200, units = "mm", dpi =600)

Model performance

In [6]:

summary_1 <- all %>%
  select(prop_forest, Agriculture, Forest, sampling_design) %>%
  pivot_longer(cols = c(Agriculture,  Forest), names_to = "source", values_to = "value") %>%
  group_by(sampling_design, prop_forest, source) %>%
  summarise(median = median(value),
            q25 = quantile(value, 0.25),
            q75 = quantile(value, 0.75),
            W50 = quantile(value, 0.75) - quantile(value, 0.25),
            W95 = quantile(value, 0.975) - quantile(value, 0.025),
            P50 = sum(value[value < quantile(value, 0.75) & value > quantile(value, 0.25)])/n(),
            P95 = sum(value[value < quantile(value, 0.975) & value > quantile(value, 0.025)])/n()) %>%
  group_by(sampling_design, source) %>%
  summarise(W50 = mean(W50),
            W95 = mean(W95),
            P50 = mean(P50),
            P95 = mean(P95))

`summarise()` has grouped output by 'sampling_design', 'prop_forest'. You can
override using the `.groups` argument.
`summarise()` has grouped output by 'sampling_design'. You can override using
the `.groups` argument.

summary_2 <- all %>%
  mutate(prop_ag = 1 - prop_forest) %>%
  group_by(prop_forest, sampling_design) %>%
  summarise(CRPS_forest = crps_sample(y = unique(prop_forest), dat = Forest),
            CRPS_ag = crps_sample(y = unique(prop_ag), dat = Agriculture)) %>%
  group_by(sampling_design) %>%
  summarise(Forest = mean(CRPS_forest),
            Agriculture = mean(CRPS_ag)) %>%
  pivot_longer(cols = c(Forest, Agriculture), names_to = "source", values_to = "CRPS")

`summarise()` has grouped output by 'prop_forest'. You can override using the
`.groups` argument.

CRPS_plot <- all %>%
  mutate(prop_ag = 1 - prop_forest) %>%
  group_by(prop_forest, prop_ag, sampling_design) %>%
  summarise(Forest = crps_sample(y = unique(prop_forest), dat = Forest),
            Agriculture = crps_sample(y = unique(prop_ag), dat = Agriculture))

`summarise()` has grouped output by 'prop_forest', 'prop_ag'. You can override
using the `.groups` argument.

In [7]:

supfig9 <- ggplot(data = CRPS_plot, aes(x = prop_forest, y = Forest, fill = sampling_design)) +
  geom_line(aes(colour = sampling_design)) +
  geom_point(shape = 21) +
  theme_bw() +
  scale_y_continuous(expand = c(0,0.01)) +
  scale_x_continuous(expand = c(0,0.01)) +
  scale_fill_viridis_d() +
  scale_colour_viridis_d() +
  theme(legend.position = "bottom", legend.title = element_blank()) +
  labs(x = "Virtual Mixture Forest Proportion", y = "CRPS") 
supfig9

saveRDS(supfig9, file = here::here("./images/supfig9.rds"))
# ggsave(plot = mixing_plot4, filename = "Figures R1/Mixture_design_CRPS.png", width = 84, height = 84, units = "mm", dpi =600)

In [8]:

summary_3 <- all %>%
  select(prop_forest, Agriculture, Forest, sampling_design) %>%
  #mutate(prop_ag = 1 - prop_forest) %>%
  pivot_longer(cols = c(Agriculture,  Forest), names_to = "source", values_to = "value") %>%
  #pivot_longer(cols = c(prop_forest,  prop_ag), names_to = "proportion", values_to = "actual") %>%
  group_by(sampling_design, source, prop_forest) %>%
  mutate(error50 = ifelse(value >= quantile(value, 0.25) & value <= quantile(value, 0.75), 0,
                     ifelse(value < quantile(value, 0.25), quantile(value, 0.25) - value,
                            quantile(value, 0.75) - value)),
         error95 = ifelse(value >= quantile(value, 0.025) & value <= quantile(value, 0.975), 0,
                          ifelse(value < quantile(value, 0.025), quantile(value, 0.025) - value,
                                 quantile(value, 0.975) - value)),
         Main = value > .5,
         Main50 = quantile(value, 0.75) > .5,
         Main95 = quantile(value, 0.975) > .5,
         Hit50 = ifelse(Main == T & Main50 == T, T, F),
         Hit95 = ifelse(Main == T & Main95 == T, T, F),
         Miss50 = ifelse(Main == T & Main50 == F, T, F),
         Miss95 = ifelse(Main == T & Main95 == F, T, F),
         FA50 = ifelse(Main == F & Main50 == T, T, F),
         FA95 = ifelse(Main == F & Main95 == T, T, F)) %>%
  group_by(sampling_design, source) %>%
  summarise(MAE50 = mean(abs(error50)),
            MAE95 = mean(abs(error95)),
            ME50 = mean(error50),
            ME95 = mean(error95),
            NSE50 = 1 - (sum(error50^2)) / sum((value - mean(value))^2),
            NSE95 = 1 - (sum(error95^2)) / sum((value - mean(value))^2),
            CSI50 = sum(Hit50)/(sum(Hit50) + sum(Miss50) + sum(FA50)),
            CSI95 = sum(Hit95)/(sum(Hit95) + sum(Miss95) + sum(FA95)),
            HR50 = sum(Hit50) / (sum(Hit50) + sum(Miss50)),
            HR95 = sum(Hit95) / (sum(Hit95) + sum(Miss95)))

`summarise()` has grouped output by 'sampling_design'. You can override using
the `.groups` argument.

summary_all <- summary_3 %>%
  right_join(summary_2) %>%
  right_join(summary_1) %>% 
  ungroup() %>%
  mutate(analyis = paste(sampling_design, source)) %>%
  select(-sampling_design, -source) %>%
  pivot_longer(cols = MAE50:P95, names_to = "Parameter", values_to = "value") %>%
  mutate(value = format(round(value, 2), digits = 2)) %>%
  pivot_wider(names_from = analyis, values_from = value) %>%
  mutate(`Evaluation criteria` = case_when(Parameter %in% c("P50", "P95", "W50", "W95") ~ "Uncertainty",
                                           Parameter %in% c("MAE50", "MAE95", "ME50", "ME95") ~ "Residuals",
                                           Parameter %in% c("CRPS", "NSE50", "NSE95") ~ "Performance",
                                           Parameter %in% c("CSI50", "CSI95", "HR50", "HR95") ~ "Contingency"))

Joining with `by = join_by(sampling_design, source)`
Joining with `by = join_by(sampling_design, source)`

summary_all

# A tibble: 15 × 8
   Parameter `Grid Agriculture` `Grid Forest` `Transect Agriculture`
   <chr>     <chr>              <chr>         <chr>                 
 1 MAE50     0.01               0.01          0.02                  
 2 MAE95     0.00               0.00          0.00                  
 3 ME50      0.00               0.00          0.00                  
 4 ME95      0.00               0.00          0.00                  
 5 NSE50     0.99               0.99          0.99                  
 6 NSE95     1.00               1.00          1.00                  
 7 CSI50     0.93               0.92          0.91                  
 8 CSI95     0.82               0.86          0.81                  
 9 HR50      0.98               0.99          0.97                  
10 HR95      1.00               1.00          1.00                  
11 CRPS      0.02               0.02          0.02                  
12 W50       0.06               0.06          0.08                  
13 W95       0.18               0.18          0.24                  
14 P50       0.25               0.25          0.25                  
15 P95       0.48               0.47          0.48                  
# ℹ 4 more variables: `Transect Forest` <chr>,
#   `Likely to erode Agriculture` <chr>, `Likely to erode Forest` <chr>,
#   `Evaluation criteria` <chr>

In [9]:

suptab7 <- summary_all |>
  group_by(`Evaluation criteria`) |>
  gt()|>
  tab_style(style =  cell_text(weight = "bold", align = "center"), locations =  cells_row_groups()) |>
  tab_options(column_labels.font.weight = "bold")
suptab7

Parameter	Grid Agriculture	Grid Forest	Transect Agriculture	Transect Forest	Likely to erode Agriculture	Likely to erode Forest
Residuals
MAE50	0.01	0.01	0.02	0.02	0.01	0.01
MAE95	0.00	0.00	0.00	0.00	0.00	0.00
ME50	0.00	0.00	0.00	0.00	0.00	0.00
ME95	0.00	0.00	0.00	0.00	0.00	0.00
Performance
NSE50	0.99	0.99	0.99	0.99	0.99	0.99
NSE95	1.00	1.00	1.00	1.00	1.00	1.00
CRPS	0.02	0.02	0.02	0.02	0.01	0.01
Contingency
CSI50	0.93	0.92	0.91	0.91	0.93	0.93
CSI95	0.82	0.86	0.81	0.80	0.87	0.87
HR50	0.98	0.99	0.97	0.98	0.99	0.99
HR95	1.00	1.00	1.00	1.00	1.00	1.00
Uncertainty
W50	0.06	0.06	0.08	0.08	0.06	0.06
W95	0.18	0.18	0.24	0.24	0.18	0.18
P50	0.25	0.25	0.25	0.25	0.25	0.25
P95	0.48	0.47	0.48	0.47	0.47	0.48

saveRDS(suptab7, file = here::here("./images/suptab7.rds"))

MixSIAR results plotting and model performance - design specific mixtures

MixSIAR results plotting and model performance - design specific mixtures

Load Libraries

Load data

Plotting

Model performance