::p_load(sf, tmap, tidyverse, rvest) pacman
Hands-on Exercise 1B - Choropleth Mapping with R
1. Install and launching R packages
2. Importing the Geopatial data
2.1. Singapore subzone’s boundary from Geospatial folder
<- st_read("data/geospatial/MasterPlan2019SubzoneBoundaryNoSeaKML.kml") mpsz
Reading layer `URA_MP19_SUBZONE_NO_SEA_PL' from data source
`D:\ssinha8752\ISSS608-VAA\Hands-on_Ex\Hands-on_Ex01\data\geospatial\MasterPlan2019SubzoneBoundaryNoSeaKML.kml'
using driver `KML'
Simple feature collection with 332 features and 2 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
3. Helper Function
3.1. Defining the helper function
Creates a function to extract specific fields (like REGION_N, PLN_AREA_N) from the embedded HTML in the KML’s Description column.
<- function(html_text, field_name) {
extract_kml_field if (is.na(html_text) || html_text == "") return(NA_character_)
<- read_html(html_text)
page <- page %>% html_elements("tr")
rows
<- rows %>%
value keep(~ html_text2(html_element(.x, "th")) == field_name) %>%
html_element("td") %>%
html_text2()
if (length(value) == 0) NA_character_ else value
}
3.2. Applying the function
Extracting the relevant fields and adds 4 columns to mpsz
<- mpsz %>%
mpsz mutate(
REGION_N = map_chr(Description, extract_kml_field, "REGION_N"),
PLN_AREA_N = map_chr(Description, extract_kml_field, "PLN_AREA_N"),
SUBZONE_N = map_chr(Description, extract_kml_field, "SUBZONE_N"),
SUBZONE_C = map_chr(Description, extract_kml_field, "SUBZONE_C")
%>%
) select(-Name, -Description) %>%
relocate(geometry, .after = last_col())
3.3. View the Cleaned Spatial Data
mpsz
Simple feature collection with 332 features and 4 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
First 10 features:
REGION_N PLN_AREA_N SUBZONE_N SUBZONE_C
1 CENTRAL REGION BUKIT MERAH DEPOT ROAD BMSZ12
2 CENTRAL REGION BUKIT MERAH BUKIT MERAH BMSZ02
3 CENTRAL REGION OUTRAM CHINATOWN OTSZ03
4 CENTRAL REGION DOWNTOWN CORE PHILLIP DTSZ04
5 CENTRAL REGION DOWNTOWN CORE RAFFLES PLACE DTSZ05
6 CENTRAL REGION OUTRAM CHINA SQUARE OTSZ04
7 CENTRAL REGION BUKIT MERAH TIONG BAHRU BMSZ10
8 CENTRAL REGION DOWNTOWN CORE BAYFRONT SUBZONE DTSZ12
9 CENTRAL REGION BUKIT MERAH TIONG BAHRU STATION BMSZ04
10 CENTRAL REGION DOWNTOWN CORE CLIFFORD PIER DTSZ06
geometry
1 MULTIPOLYGON (((103.8145 1....
2 MULTIPOLYGON (((103.8221 1....
3 MULTIPOLYGON (((103.8438 1....
4 MULTIPOLYGON (((103.8496 1....
5 MULTIPOLYGON (((103.8525 1....
6 MULTIPOLYGON (((103.8486 1....
7 MULTIPOLYGON (((103.8311 1....
8 MULTIPOLYGON (((103.8589 1....
9 MULTIPOLYGON (((103.8283 1....
10 MULTIPOLYGON (((103.8552 1....
It shows only first 10 rows as R automatically prints only the first 10 rows of any data frame or sf object to keep the console tidy.
4. Importing the Aspatial data
4.1. respopagesextod2024.csv from apatial folder into tibble df
<- st_read("data/aspatial/respopagesextod2024.csv") popdata
Reading layer `respopagesextod2024' from data source
`D:\ssinha8752\ISSS608-VAA\Hands-on_Ex\Hands-on_Ex01\data\aspatial\respopagesextod2024.csv'
using driver `CSV'
Warning: no simple feature geometries present: returning a data.frame or tbl_df
<- popdata %>%
popdata mutate(Pop = as.numeric(Pop))
5. Data Prepration
5.1. Data Wrangling and transformation functions
<- popdata %>%
popdata2024 group_by(PA, SZ, AG) %>%
summarise(`POP` = sum(`Pop`)) %>%
ungroup()%>%
pivot_wider(names_from=AG,
values_from=POP) %>%
mutate(YOUNG = rowSums(.[3:6])
+rowSums(.[12])) %>%
mutate(`ECONOMY ACTIVE` = rowSums(.[7:11])+
rowSums(.[13:15]))%>%
mutate(`AGED`=rowSums(.[16:21])) %>%
mutate(`TOTAL`=rowSums(.[3:21])) %>%
mutate(`DEPENDENCY` = (`YOUNG` + `AGED`)
/`ECONOMY ACTIVE`) %>%
select(`PA`, `SZ`, `YOUNG`,
`ECONOMY ACTIVE`, `AGED`,
`TOTAL`, `DEPENDENCY`)
`summarise()` has grouped output by 'PA', 'SZ'. You can override using the
`.groups` argument.
5.2. Joining attribute and Geospatial data
Standardize Text Case for Join Compatibility
<- popdata2024 %>%
popdata2024 mutate_at(.vars = vars(PA, SZ), .funs = list(toupper)) %>%
filter(`ECONOMY ACTIVE` > 0)
Perform Georelational Join with left_join()
<- left_join(mpsz, popdata2024, by = c("SUBZONE_N" = "SZ")) mpsz_pop2024
Save the Joined Data for Future Use
dir.create("data/rds", recursive = TRUE, showWarnings = FALSE)
write_rds(mpsz_pop2024, "data/rds/mpsz_pop2024.rds")
6. Choropleth mapping
6.1. Plotting the choropleth map
Setting the tmap to static mode and create the map
tmap_mode("plot")
ℹ tmap mode set to "plot".
qtm(shp = mpsz_pop2024,
fill = "DEPENDENCY")
6.2. tmap Mapping
tm_shape(mpsz_pop2024) +
tm_polygons(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
style = "quantile",
n = 5,
values = "brewer.blues"),
fill.legend = tm_legend(
title = "Dependency ratio")) +
tm_title("Distribution of Dependency Ratio by planning subzone") +
tm_layout(frame = TRUE) +
tm_borders(fill_alpha = 0.5) +
tm_compass(type="8star", size = 2) +
tm_scalebar() +
tm_grid(alpha =0.2) +
tm_credits("Source: Planning Sub-zone boundary from Urban Redevelopment Authorithy (URA)\n and Population data from Department of Statistics DOS",
position = c("left", "bottom"))
6.3. tmap with tm_shape() and tm_polygon()
tm_shape(mpsz_pop2024) +
tm_polygons()
6.4. tmap with tm_polygon()
tm_shape(mpsz_pop2024)+ tm_polygons(fill = "DEPENDENCY")
6.5. Mapping with tm_fills and tm_borders()
Basic Choropleth with fill only
tm_shape(mpsz_pop2024) +
tm_fill("DEPENDENCY")
Adding Borders
tm_shape(mpsz_pop2024) +
tm_fill("DEPENDENCY") +
tm_borders()
Customizing Borders
tm_shape(mpsz_pop2024) +
tm_fill("DEPENDENCY") +
tm_borders(col = "grey60", lwd = 0.1, lty = "dashed")
7. Data classification functions of tmap
7.1. Using inbuilt functions
tm_shape(mpsz_pop2024) +
tm_polygons(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
style = "quantile",
n = 5)) +
tm_borders(fill_alpha = 0.5)
tm_shape(mpsz_pop2024) +
tm_polygons(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
style = "equal",
n = 5)) +
tm_borders(fill_alpha = 0.5)
7.2. Plotting with custome break
Exploring the data
summary(mpsz_pop2024$DEPENDENCY)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.1905 0.7450 0.8377 0.8738 0.9366 12.7500 94
Plotting with custome breaks
tm_shape(mpsz_pop2024) +
tm_polygons(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
breaks = c(0, 0.60, 0.70, 0.80, 0.90, 1.00))) +
tm_borders(fill_alpha = 0.5)
Warning: Values have found that are higher than the highest break. They are
assigned to the highest interval
8. Colour Scheme
8.1. Using Colour Brewer Pallete
tm_shape(mpsz_pop2024) +
tm_polygons(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
style = "quantile",
n = 5,
values = "brewer.greens")) +
tm_borders(fill_alpha = 0.5)
tm_shape(mpsz_pop2024) +
tm_polygons(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
style = "quantile",
n = 5,
values = "-brewer.greens")) +
tm_borders(fill_alpha = 0.5)
8.2. Cartographic Furniture
tm_shape(mpsz_pop2024) +
tm_polygons(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
style = "quantile",
n = 5)) +
tm_borders(fill_alpha = 0.5) +
tm_compass(type="8star", size = 2) +
tm_scalebar() +
tm_grid(lwd = 0.1, alpha = 0.2) +
tm_credits("Source: data.gov.sg & singstat",
position = c("left", "bottom"))
9. Map Layout
9.1. Map Legend
tm_shape(mpsz_pop2024) +
tm_polygons(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
style = "quantile",
n = 5),
fill.legend = tm_legend(
title = "Dependency ratio")) +
tm_pos_auto_in() +
tm_borders(fill_alpha = 0.5) +
tm_compass(type="8star", size = 2) +
tm_scalebar() +
tm_grid(lwd = 0.1, alpha = 0.2) +
tm_credits("Source: data.gov.sg & singstat",
position = c("left", "bottom"))
9.2. Map Style
tm_shape(mpsz_pop2024) +
tm_polygons(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
style = "quantile",
n = 5,
values = "-brewer.greens")) +
tm_borders(fill_alpha = 0.5) +
tmap_style("natural")
style set to "natural"
other available styles are: "white" (tmap default), "gray", "cobalt", "albatross", "beaver", "bw", "classic", "watercolor"
tmap v3 styles: "v3" (tmap v3 default), "gray_v3", "natural_v3", "cobalt_v3", "albatross_v3", "beaver_v3", "bw_v3", "classic_v3", "watercolor_v3"
10. Drawing Small Choropleth maps
10.1. By assigning multiple values to at least one of the aesthetic arguments
tm_shape(mpsz_pop2024) +
tm_polygons(
fill = c("YOUNG", "AGED"),
fill.legend =
tm_legend(position = tm_pos_in(
"right", "bottom")),
fill.scale = tm_scale_intervals(
style = "equal",
n = 5,
values = "brewer.blues")) +
tm_borders(fill_alpha = 0.5) +
tmap_style("natural")
style set to "natural"
other available styles are: "white" (tmap default), "gray", "cobalt", "albatross", "beaver", "bw", "classic", "watercolor"
tmap v3 styles: "v3" (tmap v3 default), "gray_v3", "natural_v3", "cobalt_v3", "albatross_v3", "beaver_v3", "bw_v3", "classic_v3", "watercolor_v3"
10.2. By arrange multiples choropleth maps in a grid layout
<- tm_shape(mpsz_pop2024)+
youngmap tm_polygons(fill = "YOUNG",
fill.legend = tm_legend(
position = tm_pos_in(
"right", "bottom"),
item.height = 0.8),
fill.scale = tm_scale_intervals(
style = "quantile",
values = "brewer.blues")) +
tm_borders(fill_alpha = 0.5) +
tm_title("Distribution of young population")
<- tm_shape(mpsz_pop2024)+
agedmap tm_polygons(fill = "AGED",
fill.legend = tm_legend(
position = tm_pos_in(
"right", "bottom"),
item.height = 0.8),
fill.scale = tm_scale_intervals(
style = "quantile",
values = "brewer.blues")) +
tm_borders(fill_alpha = 0.5) +
tm_title("Distribution of aged population")
tmap_arrange(youngmap, agedmap, asp=1, ncol=2)
10.3. By defining group-by variable in tm_facets()
tm_shape(mpsz_pop2024) +
tm_fill(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
style = "quantile",
values = "brewer.blues")) +
tm_facets(by = "REGION_N",
nrow = 2,
ncols = 3,
free.coords=TRUE,
drop.units=TRUE) +
tm_layout(legend.show = TRUE,
title.position = c("center", "center"),
title.size = 20) +
tm_borders(fill_alpha = 0.5)
11. Mappping Spatial Object Meeting a Selection Criterion
Instead of creating small multiple choropleth map, you can also use filter() of dplyr package to select geographical area of interest and plot a choropleth map focus only on the selected region.
%>%
mpsz_pop2024 filter(REGION_N == "CENTRAL REGION") %>%
tm_shape() +
tm_polygons(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
style = "quantile",
values = "brewer.greens"),
fill.legend = tm_legend()) +
tm_borders(fill_alpha = 0.5)
12. Complementing Thematic Map with Statistical Chart
Maps and statistical charts complement each other by visually representing different aspects of the same data, offering a more comprehensive understanding. Maps excel at showing spatial relationships and geographical patterns, while charts effectively display numerical data, trends, and comparisons. Combining both allows for a more insightful and engaging data narrative, especially when dealing with spatial data that also has quantifiable characteristics.
With tmap, statistical chart and be incorporate into the map visualisation by using fill.chat argument of map layers and legend chart feature as shown in the code chunk below.
%>%
mpsz_pop2024 filter(REGION_N == "CENTRAL REGION") %>%
tm_shape() +
tm_polygons(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
style = "quantile",
values = "brewer.greens"),
fill.legend = tm_legend(),
fill.chart = tm_chart_box()) +
tm_borders() +
tm_layout(asp = 0.8)
In the code chunk below, We improve the visual representation further by highlighting and lebaling the outliers on the choropleth map.
<- mpsz_pop2024 %>%
mpsz_selected filter(REGION_N == "CENTRAL REGION")
<- boxplot.stats(mpsz_selected$DEPENDENCY)
stats
<- stats$out
outlier_vals
<- mpsz_selected[mpsz_selected$DEPENDENCY %in% outlier_vals, ]
outlier_sf
tm_shape(mpsz_selected) +
tm_polygons(fill = "DEPENDENCY",
fill.scale = tm_scale_intervals(
style = "quantile",
values = "brewer.blues"),
fill.legend = tm_legend(),
fill.chart = tm_chart_box()) +
tm_borders(fill_alpha = 0.5) +
tm_shape(outlier_sf) +
tm_borders(col = "red", lwd = 2) +
tm_text("SUBZONE_N", col = "red", size = 0.7) +
tm_layout(asp = 0.8)
13. Create Interactive Mapping
<- mpsz_pop2024 %>%
region_selected filter(REGION_N == "CENTRAL REGION")
<- st_bbox(region_selected)
region_bbox
<- boxplot.stats(region_selected$DEPENDENCY)
stats <- stats$out
outlier_vals <- region_selected[region_selected$DEPENDENCY %in% outlier_vals, ]
outlier_sf
tmap_mode("view")
ℹ tmap mode set to "view".
tm_shape(region_selected,
bbox = region_bbox) +
tm_fill("DEPENDENCY",
id = "SUBZONE_N",
popup.vars = c(
"Name" = "SUBZONE_N",
"Dependency" = "DEPENDENCY")) +
tm_borders() +
tm_shape(outlier_sf) +
tm_borders(col = "red", lwd = 2)
tmap_mode("plot")
ℹ tmap mode set to "plot".
The interactive map above is far from satisfactory. While we want to encourage users to engage and explore the interactive by zooming in and out of the study area freely. But, users might lost in the cyberspace with too much freedom to zoom-in and zoom-out.
To address this issue, set_zoom_limits argument can be used to limit the map extend users can zooming in and out of the map areas as shown below.
<- mpsz_pop2024 %>%
region_selected filter(REGION_N == "CENTRAL REGION")
<- st_bbox(region_selected)
region_bbox
<- boxplot.stats(region_selected$DEPENDENCY)
stats <- stats$out
outlier_vals <- region_selected[region_selected$DEPENDENCY %in% outlier_vals, ]
outlier_sf
tmap_mode("view")
ℹ tmap mode set to "view".
tm_shape(region_selected,
bbox = region_bbox) +
tm_fill("DEPENDENCY",
id = "SUBZONE_N",
popup.vars = c(
"Name" = "SUBZONE_N",
"Dependency" = "DEPENDENCY")) +
tm_borders() +
tm_shape(outlier_sf) +
tm_borders(col = "red", lwd = 2) +
tm_view(set_zoom_limits = c(12,14))