--- title: "Getting Started" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Getting Started} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r} library(BASSr) library(sf) library(ggplot2) library(dplyr) ``` ## Setup 1. Get spatial data - Hexagonal grid (primary spatial units - PSU) with land cover characteristics for each hex 2. Get cost data 3. Run `full_BASS_run()` ### 1. Spatial Data - Spatial data frame (sf object) with - Columns Cell/Hex ID (e.g., `ET_Index`, `hex_id`) - Columns defining land cover (e.g., CORINE land cover CLC `CLC15_1`) - This must be either POINT or (MULTI)POLYGON (will be converted to points) > Land cover characteristics should not be percentages, but should be XXXX? **Clean up hex data** ```{r} psu_hex <- clean_land_cover(psu_hex_dirty, pattern = "CLC0013_") %>% units::drop_units() # for plotting, get rid of m^2 ggplot(data = psu_hex, aes(fill = LC01)) + geom_sf() + scale_fill_viridis_c() ``` ### Basic Run ```{r} d <- full_BASS_run(land_hex = psu_hex, num_runs = 10, n_samples = 3) ggplot(data = d, aes(colour = benefit)) + geom_sf() + labs(colour = "Benefit") + scale_colour_viridis_c() d_hex <- left_join(psu_hex, st_drop_geometry(d), by = "hex_id") ggplot(data = d_hex, aes(fill = benefit)) + geom_sf() + labs(fill = "Benefit") + scale_fill_viridis_c() ``` ### Including Costs ```{r} d <- full_BASS_run(land_hex = psu_hex, num_runs = 10, n_samples = 3, costs = psu_costs) d_hex <- left_join(psu_hex, st_drop_geometry(d), by = "hex_id") ggplot(data = d_hex, aes(fill = inclpr)) + geom_sf() + labs(fill = "Inclusion\nProbability") + scale_fill_viridis_c() ``` What if the costs of that highly beneficial points was much higher? ```{r} which.max(d$benefit) high_cost <- psu_costs high_cost$RawCost[26] <- high_cost$RawCost[26] * 100 d <- full_BASS_run(land_hex = psu_hex, num_runs = 10, n_samples = 3, costs = high_cost) d_hex <- left_join(psu_hex, st_drop_geometry(d), by = "hex_id") ggplot(data = d_hex, aes(fill = inclpr)) + geom_sf() + labs(fill = "Inclusion\nProbability") + scale_fill_viridis_c() ``` Still high inclusion probability, but other points become relatively 'better'. ### Runs by 'hand' ```{r} psu_hex <- clean_land_cover(psu_hex_dirty, pattern = "CLC0013_") samples <- draw_random_samples(psu_hex, num_runs = 10, n_samples = 3) benefit <- calculate_benefit(psu_hex, samples) inc_prob <- calculate_inclusion_probs(benefit, costs = psu_costs) d_hex <- left_join(psu_hex, st_drop_geometry(inc_prob), by = "hex_id") ggplot(data = d_hex, aes(fill = inclpr)) + geom_sf() + labs(fill = "Inclusion\nProbability") + scale_fill_viridis_c() ``` Alternative pipe ```{r} final <- psu_hex_dirty %>% clean_land_cover(pattern = "CLC0013_") %>% draw_random_samples(num_runs = 10, n_samples = 3) %>% calculate_benefit(psu_hex, samples = .) %>% calculate_inclusion_probs(costs = psu_costs) ``` ### Weights? (e.g., Akimiski_Island.Rmd) ## Calculating Costs - Need number of ARUs which to deploy ... ## Selection probabilities ### Simple selection Here we'll sample 12 sites with a 20% over sample, resulting in a total of 14 sites selected. ```{r} g <- ggplot() + geom_sf(data = psu_hex, fill = "white") + geom_sf(data = final, colour = "grey70") g sel <- run_grts_on_BASS(probs = final, num_runs = 1, nARUs = 12, os = 0.2) sel_plot <- bind_rows(sel[["sites_base"]], sel[["sites_over"]]) g + geom_sf(data = sel_plot, aes(colour = siteuse), size = 5) + scale_colour_viridis_d(name = "Type of\nsites sampled", end = 0.7) ``` ### Stratified selection First let's create a dummy stratification and add it to our hexes for plotting ```{r} final <- mutate(final, strat = c(rep("A", 15), rep("B", 18))) psu_hex_strat <- select(final, "hex_id", "strat") |> st_drop_geometry() |> left_join(psu_hex, y = _, by = "hex_id") g <- ggplot() + geom_sf(data = psu_hex_strat, aes(fill = strat), alpha = 0.4) + geom_sf(data = final, colour = "grey70") g ``` Now we'll define how we want to sample these two strata. Let's assume we don't really care about habitat A, so we don't want to sample that one very much. ```{r} nARUs <- list("A" = 2, "B" = 10) sel <- run_grts_on_BASS(probs = final, num_runs = 1, stratum_id = strat, nARUs = nARUs, os = 0.2) sel_plot <- bind_rows(sel[["sites_base"]], sel[["sites_over"]]) g + geom_sf(data = sel_plot, aes(colour = siteuse), size = 5) + scale_colour_viridis_d(name = "Type of\nsites sampled", end = 0.7) ``` You can see that we've sampled much more of B than A, and that there are no over samples in A, which makes sense: 0.2 * 2 = 0.4 which rounds down to 0 If we wanted an over sample for A, we could define specific over sample amounts instead. ```{r} nARUs <- list("A" = 2, "B" = 10) os <- list("A" = 1, "B" = 4) sel <- run_grts_on_BASS(probs = final, num_runs = 1, stratum_id = strat, nARUs = nARUs, os = os, seed = 123) sel_plot <- bind_rows(sel[["sites_base"]], sel[["sites_over"]]) g + geom_sf(data = sel_plot, aes(colour = siteuse), size = 5) + scale_colour_viridis_d(name = "Type of\nsites sampled", end = 0.7) ``` Alternatively at this point (and especially with more strata) it might be easier to supply a data frame rather than a series of lists. ```{r} nARUs <- data.frame(n = c(2, 10), strat = c("A", "B"), n_os = c(1, 4)) sel <- run_grts_on_BASS(probs = final, num_runs = 1, stratum_id = strat, nARUs = nARUs, seed = 123) sel_plot <- bind_rows(sel[["sites_base"]], sel[["sites_over"]]) g + geom_sf(data = sel_plot, aes(colour = siteuse), size = 5) + scale_colour_viridis_d(name = "Type of\nsites sampled", end = 0.7) ```