Hands-on Exercise 1B - Choropleth Mapping with R

Author

Shubham Sinha

Published

August 31, 2024

Modified

September 14, 2025

1. Install and launching R packages

pacman::p_load(sf, tmap, tidyverse, rvest)

2. Importing the Geopatial data

2.1. Singapore subzone’s boundary from Geospatial folder

mpsz <- st_read("data/geospatial/MasterPlan2019SubzoneBoundaryNoSeaKML.kml")
Reading layer `URA_MP19_SUBZONE_NO_SEA_PL' from data source 
  `D:\ssinha8752\ISSS608-VAA\Hands-on_Ex\Hands-on_Ex01\data\geospatial\MasterPlan2019SubzoneBoundaryNoSeaKML.kml' 
  using driver `KML'
Simple feature collection with 332 features and 2 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS:  WGS 84

3. Helper Function

3.1. Defining the helper function

Creates a function to extract specific fields (like REGION_N, PLN_AREA_N) from the embedded HTML in the KML’s Description column.

extract_kml_field <- function(html_text, field_name) {
  if (is.na(html_text) || html_text == "") return(NA_character_)
  
  page <- read_html(html_text)
  rows <- page %>% html_elements("tr")
  
  value <- rows %>%
    keep(~ html_text2(html_element(.x, "th")) == field_name) %>%
    html_element("td") %>%
    html_text2()
  
  if (length(value) == 0) NA_character_ else value
}

3.2. Applying the function

Extracting the relevant fields and adds 4 columns to mpsz

mpsz <- mpsz %>%
  mutate(
    REGION_N = map_chr(Description, extract_kml_field, "REGION_N"),
    PLN_AREA_N = map_chr(Description, extract_kml_field, "PLN_AREA_N"),
    SUBZONE_N = map_chr(Description, extract_kml_field, "SUBZONE_N"),
    SUBZONE_C = map_chr(Description, extract_kml_field, "SUBZONE_C")
  ) %>%
  select(-Name, -Description) %>%
  relocate(geometry, .after = last_col())

3.3. View the Cleaned Spatial Data

mpsz
Simple feature collection with 332 features and 4 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS:  WGS 84
First 10 features:
         REGION_N    PLN_AREA_N           SUBZONE_N SUBZONE_C
1  CENTRAL REGION   BUKIT MERAH          DEPOT ROAD    BMSZ12
2  CENTRAL REGION   BUKIT MERAH         BUKIT MERAH    BMSZ02
3  CENTRAL REGION        OUTRAM           CHINATOWN    OTSZ03
4  CENTRAL REGION DOWNTOWN CORE             PHILLIP    DTSZ04
5  CENTRAL REGION DOWNTOWN CORE       RAFFLES PLACE    DTSZ05
6  CENTRAL REGION        OUTRAM        CHINA SQUARE    OTSZ04
7  CENTRAL REGION   BUKIT MERAH         TIONG BAHRU    BMSZ10
8  CENTRAL REGION DOWNTOWN CORE    BAYFRONT SUBZONE    DTSZ12
9  CENTRAL REGION   BUKIT MERAH TIONG BAHRU STATION    BMSZ04
10 CENTRAL REGION DOWNTOWN CORE       CLIFFORD PIER    DTSZ06
                         geometry
1  MULTIPOLYGON (((103.8145 1....
2  MULTIPOLYGON (((103.8221 1....
3  MULTIPOLYGON (((103.8438 1....
4  MULTIPOLYGON (((103.8496 1....
5  MULTIPOLYGON (((103.8525 1....
6  MULTIPOLYGON (((103.8486 1....
7  MULTIPOLYGON (((103.8311 1....
8  MULTIPOLYGON (((103.8589 1....
9  MULTIPOLYGON (((103.8283 1....
10 MULTIPOLYGON (((103.8552 1....

It shows only first 10 rows as R automatically prints only the first 10 rows of any data frame or sf object to keep the console tidy.

4. Importing the Aspatial data

4.1. respopagesextod2024.csv from apatial folder into tibble df

popdata <- st_read("data/aspatial/respopagesextod2024.csv")
Reading layer `respopagesextod2024' from data source 
  `D:\ssinha8752\ISSS608-VAA\Hands-on_Ex\Hands-on_Ex01\data\aspatial\respopagesextod2024.csv' 
  using driver `CSV'
Warning: no simple feature geometries present: returning a data.frame or tbl_df
popdata <- popdata %>%
  mutate(Pop = as.numeric(Pop))

5. Data Prepration

5.1. Data Wrangling and transformation functions

popdata2024 <- popdata %>%
  group_by(PA, SZ, AG) %>%
  summarise(`POP` = sum(`Pop`)) %>%
  ungroup()%>%
  pivot_wider(names_from=AG, 
              values_from=POP) %>%
  mutate(YOUNG = rowSums(.[3:6])
         +rowSums(.[12])) %>%
mutate(`ECONOMY ACTIVE` = rowSums(.[7:11])+
rowSums(.[13:15]))%>%
mutate(`AGED`=rowSums(.[16:21])) %>%
mutate(`TOTAL`=rowSums(.[3:21])) %>%  
mutate(`DEPENDENCY` = (`YOUNG` + `AGED`)
/`ECONOMY ACTIVE`) %>%
  select(`PA`, `SZ`, `YOUNG`, 
       `ECONOMY ACTIVE`, `AGED`, 
       `TOTAL`, `DEPENDENCY`)
`summarise()` has grouped output by 'PA', 'SZ'. You can override using the
`.groups` argument.

5.2. Joining attribute and Geospatial data

Standardize Text Case for Join Compatibility

popdata2024 <- popdata2024 %>%
  mutate_at(.vars = vars(PA, SZ), .funs = list(toupper)) %>%
  filter(`ECONOMY ACTIVE` > 0)

Perform Georelational Join with left_join()

mpsz_pop2024 <- left_join(mpsz, popdata2024, by = c("SUBZONE_N" = "SZ"))

Save the Joined Data for Future Use

dir.create("data/rds", recursive = TRUE, showWarnings = FALSE)
write_rds(mpsz_pop2024, "data/rds/mpsz_pop2024.rds")

6. Choropleth mapping

6.1. Plotting the choropleth map

Setting the tmap to static mode and create the map

tmap_mode("plot")
ℹ tmap mode set to "plot".
qtm(shp = mpsz_pop2024, 
    fill = "DEPENDENCY")

6.2. tmap Mapping

tm_shape(mpsz_pop2024) +
  tm_polygons(fill = "DEPENDENCY",
              fill.scale = tm_scale_intervals(
                style = "quantile", 
                n = 5,
                values = "brewer.blues"),
              fill.legend = tm_legend(
                title = "Dependency ratio")) +
  tm_title("Distribution of Dependency Ratio by planning subzone") +
  tm_layout(frame = TRUE) +
  tm_borders(fill_alpha = 0.5) +
  tm_compass(type="8star", size = 2) +
  tm_scalebar() +
  tm_grid(alpha =0.2) +
  tm_credits("Source: Planning Sub-zone boundary from Urban Redevelopment Authorithy (URA)\n and Population data from Department of Statistics DOS", 
             position = c("left", "bottom"))

6.3. tmap with tm_shape() and tm_polygon()

tm_shape(mpsz_pop2024) +
  tm_polygons()

6.4. tmap with tm_polygon()

tm_shape(mpsz_pop2024)+ tm_polygons(fill = "DEPENDENCY")

6.5. Mapping with tm_fills and tm_borders()

Basic Choropleth with fill only

tm_shape(mpsz_pop2024) +
  tm_fill("DEPENDENCY")

Adding Borders

tm_shape(mpsz_pop2024) +
  tm_fill("DEPENDENCY") +
  tm_borders()

Customizing Borders

tm_shape(mpsz_pop2024) +
  tm_fill("DEPENDENCY") +
  tm_borders(col = "grey60", lwd = 0.1, lty = "dashed")

7. Data classification functions of tmap

7.1. Using inbuilt functions

tm_shape(mpsz_pop2024) +
  tm_polygons(fill = "DEPENDENCY",
              fill.scale = tm_scale_intervals(
                style = "quantile",
                n = 5)) +
  tm_borders(fill_alpha = 0.5)

tm_shape(mpsz_pop2024) +
  tm_polygons(fill = "DEPENDENCY",
              fill.scale = tm_scale_intervals(
                style = "equal",
                n = 5)) +
  tm_borders(fill_alpha = 0.5)

7.2. Plotting with custome break

Exploring the data

summary(mpsz_pop2024$DEPENDENCY)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 0.1905  0.7450  0.8377  0.8738  0.9366 12.7500      94 

Plotting with custome breaks

tm_shape(mpsz_pop2024) +
  tm_polygons(fill = "DEPENDENCY",
              fill.scale = tm_scale_intervals(
                breaks = c(0, 0.60, 0.70, 0.80, 0.90, 1.00))) +
  tm_borders(fill_alpha = 0.5)
Warning: Values have found that are higher than the highest break. They are
assigned to the highest interval

8. Colour Scheme

8.1. Using Colour Brewer Pallete

tm_shape(mpsz_pop2024) +
  tm_polygons(fill = "DEPENDENCY",
              fill.scale = tm_scale_intervals(
                style = "quantile",
                n = 5,
                values = "brewer.greens")) +
  tm_borders(fill_alpha = 0.5)

tm_shape(mpsz_pop2024) +
  tm_polygons(fill = "DEPENDENCY",
              fill.scale = tm_scale_intervals(
                style = "quantile",
                n = 5,
                values = "-brewer.greens")) +
  tm_borders(fill_alpha = 0.5)

8.2. Cartographic Furniture

tm_shape(mpsz_pop2024) +
  tm_polygons(fill = "DEPENDENCY",
              fill.scale = tm_scale_intervals(
                style = "quantile",
                n = 5)) +
  tm_borders(fill_alpha = 0.5) +
  tm_compass(type="8star", size = 2) +
  tm_scalebar() +
  tm_grid(lwd = 0.1, alpha = 0.2) +
  tm_credits("Source: data.gov.sg & singstat",
             position = c("left", "bottom"))

9. Map Layout

9.1. Map Legend

tm_shape(mpsz_pop2024) +
  tm_polygons(fill = "DEPENDENCY",
              fill.scale = tm_scale_intervals(
                style = "quantile",
                n = 5),
              fill.legend = tm_legend(
                title = "Dependency ratio")) +
  tm_pos_auto_in() +
  tm_borders(fill_alpha = 0.5) +
  tm_compass(type="8star", size = 2) +
  tm_scalebar() +
  tm_grid(lwd = 0.1, alpha = 0.2) +
  tm_credits("Source: data.gov.sg & singstat",
             position = c("left", "bottom"))

9.2. Map Style

tm_shape(mpsz_pop2024) +
  tm_polygons(fill = "DEPENDENCY",
              fill.scale = tm_scale_intervals(
                style = "quantile",
                n = 5,
                values = "-brewer.greens")) + 
  tm_borders(fill_alpha = 0.5) + 
  tmap_style("natural")
style set to "natural"
other available styles are: "white" (tmap default), "gray", "cobalt", "albatross", "beaver", "bw", "classic", "watercolor"
tmap v3 styles: "v3" (tmap v3 default), "gray_v3", "natural_v3", "cobalt_v3", "albatross_v3", "beaver_v3", "bw_v3", "classic_v3", "watercolor_v3"

10. Drawing Small Choropleth maps

10.1. By assigning multiple values to at least one of the aesthetic arguments

tm_shape(mpsz_pop2024) + 
  tm_polygons(
    fill = c("YOUNG", "AGED"),
    fill.legend = 
      tm_legend(position = tm_pos_in(
        "right", "bottom")),
    fill.scale = tm_scale_intervals(
      style = "equal", 
      n = 5,
      values = "brewer.blues")) +
  tm_borders(fill_alpha = 0.5) +
  tmap_style("natural")
style set to "natural"
other available styles are: "white" (tmap default), "gray", "cobalt", "albatross", "beaver", "bw", "classic", "watercolor"
tmap v3 styles: "v3" (tmap v3 default), "gray_v3", "natural_v3", "cobalt_v3", "albatross_v3", "beaver_v3", "bw_v3", "classic_v3", "watercolor_v3"

10.2. By arrange multiples choropleth maps in a grid layout

youngmap <- tm_shape(mpsz_pop2024)+ 
  tm_polygons(fill = "YOUNG",
              fill.legend = tm_legend(
                position = tm_pos_in(
                  "right", "bottom"),
                  item.height = 0.8),
              fill.scale = tm_scale_intervals(
                style = "quantile", 
                values = "brewer.blues")) +
  tm_borders(fill_alpha = 0.5) +
  tm_title("Distribution of young population")
                
agedmap <- tm_shape(mpsz_pop2024)+ 
  tm_polygons(fill = "AGED",
              fill.legend = tm_legend(
                position = tm_pos_in(
                  "right", "bottom"),
                item.height = 0.8),
              fill.scale = tm_scale_intervals(
              style = "quantile", 
              values = "brewer.blues")) +
  tm_borders(fill_alpha = 0.5) +
  tm_title("Distribution of aged population")

tmap_arrange(youngmap, agedmap, asp=1, ncol=2)

10.3. By defining group-by variable in tm_facets()

tm_shape(mpsz_pop2024) +
  tm_fill(fill = "DEPENDENCY",
          fill.scale = tm_scale_intervals(
            style = "quantile",
            values = "brewer.blues")) + 
  tm_facets(by = "REGION_N",
            nrow = 2, 
            ncols = 3,
            free.coords=TRUE, 
            drop.units=TRUE) +
  tm_layout(legend.show = TRUE,
            title.position = c("center", "center"), 
            title.size = 20) +
  tm_borders(fill_alpha = 0.5)

11. Mappping Spatial Object Meeting a Selection Criterion

Instead of creating small multiple choropleth map, you can also use filter() of dplyr package to select geographical area of interest and plot a choropleth map focus only on the selected region.

mpsz_pop2024 %>%
  filter(REGION_N == "CENTRAL REGION") %>%
  tm_shape() +
  tm_polygons(fill = "DEPENDENCY",
              fill.scale = tm_scale_intervals(
                style = "quantile", 
                values = "brewer.greens"),
              fill.legend = tm_legend()) +
  tm_borders(fill_alpha = 0.5)

12. Complementing Thematic Map with Statistical Chart

Maps and statistical charts complement each other by visually representing different aspects of the same data, offering a more comprehensive understanding. Maps excel at showing spatial relationships and geographical patterns, while charts effectively display numerical data, trends, and comparisons. Combining both allows for a more insightful and engaging data narrative, especially when dealing with spatial data that also has quantifiable characteristics.

With tmap, statistical chart and be incorporate into the map visualisation by using fill.chat argument of map layers and legend chart feature as shown in the code chunk below.

mpsz_pop2024 %>%
  filter(REGION_N == "CENTRAL REGION") %>%
  tm_shape() +
  tm_polygons(fill = "DEPENDENCY",
              fill.scale = tm_scale_intervals(
                style = "quantile", 
                values = "brewer.greens"),
              fill.legend = tm_legend(),
              fill.chart = tm_chart_box()) +
  tm_borders() +
  tm_layout(asp = 0.8)

In the code chunk below, We improve the visual representation further by highlighting and lebaling the outliers on the choropleth map.

mpsz_selected <- mpsz_pop2024 %>%
  filter(REGION_N == "CENTRAL REGION")

stats <- boxplot.stats(mpsz_selected$DEPENDENCY)

outlier_vals <- stats$out

outlier_sf <- mpsz_selected[mpsz_selected$DEPENDENCY %in% outlier_vals, ]

tm_shape(mpsz_selected) +
  tm_polygons(fill = "DEPENDENCY",
          fill.scale = tm_scale_intervals(
            style = "quantile", 
            values = "brewer.blues"),
          fill.legend = tm_legend(),
          fill.chart = tm_chart_box()) +
  tm_borders(fill_alpha = 0.5) +
tm_shape(outlier_sf) +
  tm_borders(col = "red", lwd = 2) +
  tm_text("SUBZONE_N", col = "red", size = 0.7) +
  tm_layout(asp = 0.8)

13. Create Interactive Mapping

region_selected <- mpsz_pop2024 %>%
  filter(REGION_N == "CENTRAL REGION")
region_bbox <- st_bbox(region_selected)

stats <- boxplot.stats(region_selected$DEPENDENCY)
outlier_vals <- stats$out
outlier_sf <- region_selected[region_selected$DEPENDENCY %in% outlier_vals, ]

tmap_mode("view")
ℹ tmap mode set to "view".
tm_shape(region_selected, 
         bbox = region_bbox) +
  tm_fill("DEPENDENCY",
          id = "SUBZONE_N",
          popup.vars = c(
            "Name" = "SUBZONE_N", 
            "Dependency" = "DEPENDENCY")) +
  tm_borders() +
  tm_shape(outlier_sf) +
  tm_borders(col = "red", lwd = 2)
tmap_mode("plot")
ℹ tmap mode set to "plot".

The interactive map above is far from satisfactory. While we want to encourage users to engage and explore the interactive by zooming in and out of the study area freely. But, users might lost in the cyberspace with too much freedom to zoom-in and zoom-out.

To address this issue, set_zoom_limits argument can be used to limit the map extend users can zooming in and out of the map areas as shown below.

region_selected <- mpsz_pop2024 %>%
  filter(REGION_N == "CENTRAL REGION")
region_bbox <- st_bbox(region_selected)

stats <- boxplot.stats(region_selected$DEPENDENCY)
outlier_vals <- stats$out
outlier_sf <- region_selected[region_selected$DEPENDENCY %in% outlier_vals, ]

tmap_mode("view")
ℹ tmap mode set to "view".
tm_shape(region_selected, 
         bbox = region_bbox) +
  tm_fill("DEPENDENCY",
          id = "SUBZONE_N",
          popup.vars = c(
            "Name" = "SUBZONE_N", 
            "Dependency" = "DEPENDENCY")) +
  tm_borders() +
  tm_shape(outlier_sf) +
  tm_borders(col = "red", lwd = 2) +
  tm_view(set_zoom_limits = c(12,14))