Installation
geofi
can be installed from CRAN using
# install from CRAN
install.packages("geofi")
# Install development version from GitHub
::install_github("ropengov/geofi") remotes
# Let's first create a function that checks if the suggested
# packages are available
<- function(pkgs){
check_namespaces return(all(unlist(sapply(pkgs, requireNamespace,quietly = TRUE))))
}
Manuel Gimonds Intro to GIS and Spatial Analysis and especially it’s appendixes provide a good introduction to working with spatial data in R.
From the book above, have a look at chapter Coordinate Systems in R first.
When using spatial data in R it necessary to have all data in same
coordinate reference system (CRS). You can check the CRS of you
sf
-object with sf::st_crs()
-function. All the
data you can obtain using geofi
is transformed
automatically into EPSG:3067
. Most of spatial data
providers in Finland provide their data in the same CRS.
library(geofi)
library(sf)
library(dplyr)
<- get_municipalities()
muni <- municipality_central_localities
point <- st_crs(muni)
crs $input
crs#> [1] "EPSG:3067"
However, sometimes data is not correctly projected and you have to
reproject it using st_transform
. Web maps like Google maps
or Leaflet use WGS 1984 geographic (long/lat) coordinate system
which is fine with added interactive feature, but should be avoided when
plotting data on maps elsewhere. This is especially the case with large
northern countries like Finland. To demonstrate the effect lets
reproject the municipality data to WGS 1984 (usin EPSG code equivalent
4326
) and plot it side to side with
EPSG:3067
<- st_transform(muni, "EPSG:4326")
muni_4326 <- st_crs(muni_4326)
crs $input
crs#> [1] "EPSG:4326"
<- c("ggplot2","patchwork")
libs if (check_namespaces(pkgs = libs)) {
library(ggplot2)
<- ggplot(muni %>% st_union()) +
p1 geom_sf() +
labs(subtitle = "EPSG:3067")
<- ggplot(muni_4326 %>% st_union()) +
p2 geom_sf() +
labs(subtitle = "EPSG:4326")
library(patchwork)
wrap_plots(list(p1,p2), nrow = 1) +
plot_annotation(title = "Map of Finland in two different CRS")
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
You can see the that northern Finland is larger on the right and the grid below is different.
To compute the area of polygons (municipality in this case), ordering them by size and plotting largest/smalles 10 can be as this.
<- c("ggplot2")
libs if (check_namespaces(pkgs = libs)) {
# compute area
$area <- sf::st_area(muni)
muni# largest
%>%
muni arrange(desc(area)) %>%
slice(1:10) %>%
ggplot() +
geom_sf() +
geom_sf_label(aes(label = name_fi)) +
labs(title = "largest 10")
# smallest
%>%
muni arrange(area) %>%
slice(1:10) %>%
ggplot() +
geom_sf() +
geom_sf_label(aes(label = name_fi)) +
labs(title = "smallest 10")
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
You can subset data your plotting by subsetting your data in conventional filtering codes/names, or you can use geometric operations such as bounding box or intersection.
Lets imagine that we need a more detailed view of the metropolitan area of the Greater Helsinki that consist of the following municipalities: Espoo, Helsinki, Vantaa, Hyvinkää, Järvenpää, Kauniainen, Kerava, Kirkkonummi, Mäntsälä, Nurmijärvi, Pornainen, Sipoo, Tuusula and Vihti. You can subset the data just using the names of municipalities.
<- c('Espoo','Helsinki','Vantaa','Hyvinkää',
greater_helsinki 'Järvenpää','Kauniainen','Kerava','Kirkkonummi',
'Mäntsälä','Nurmijärvi','Pornainen','Sipoo','Tuusula','Vihti')
<- muni %>% filter(municipality_name_fi %in% greater_helsinki)
greater_helsinki_polygon
if (check_namespaces(pkgs = libs)) {
ggplot(greater_helsinki_polygon) +
geom_sf() +
geom_sf(data = point %>%
filter(teksti %in% toupper(greater_helsinki)))
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
First, let’s create bounding box from greater Helsinki polygons.
<- st_as_sfc(st_bbox(muni %>% filter(municipality_name_fi %in% greater_helsinki)))
bounding_box_polygon
if (check_namespaces(pkgs = libs)) {
ggplot(st_intersection(bounding_box_polygon, muni)) +
geom_sf() +
geom_sf(data = point %>% filter(teksti %in% toupper(greater_helsinki)))
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
Then, let’s use the point data (municipality central localities) to create the bounding box
<- st_as_sfc(st_bbox(point %>% filter(teksti %in% toupper(greater_helsinki))))
bounding_box_point
if (check_namespaces(pkgs = libs)) {
ggplot(st_intersection(bounding_box_point, muni)) +
geom_sf() +
geom_sf(data = point %>% filter(teksti %in% toupper(greater_helsinki)))
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
Neighboring or intersecting objects can be found using
st_intersection()
in following manner where we plot
Helsinki and it’s neighbors.
<- muni %>% filter(municipality_name_fi == "Helsinki")
helsinki <- st_intersection(muni,helsinki) %>%
neigbour_codes pull(municipality_code)
if (check_namespaces(pkgs = libs)) {
ggplot(muni %>% filter(municipality_code %in% neigbour_codes)) +
geom_sf() +
geom_sf_label(aes(label = municipality_name_fi))
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
Often there is need to create alternative regional breakdown to
existing ones and aggregating data accordingly. First we need to subset
the required members and then dissolve them using
st_union()
. Below we classify municipalities in three equal
size categories based on area, dissolve them and plot.
Lets first plot the smallest category as a single multipolygon.
$area_class <- cut_number(x = as.numeric(muni$area), n = 3)
muni#
#
if (check_namespaces(pkgs = libs)) {
%>%
muni filter(area_class == levels(muni$area_class)[1]) %>%
st_union() %>%
ggplot() +
geom_sf()
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
To union all three into same data you can use group_by
and summarise
if (check_namespaces(pkgs = libs)) {
%>%
muni group_by(area_class) %>%
summarise() %>%
ggplot() +
geom_sf(aes(fill = area_class))
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
The following operarions derive from Marko Kallio’s course at CSC in February 2020 Spatial data analysis with R.
geofi
contains data on municipality central locations
(geofi::municipality_central_localities
). Instead of those
you may need the actual geographical centers ie. centroids of a polygon
that can be computed using st_centroid
and plotted with
ggplot.
<- st_centroid(muni)
muni_centroids
if (check_namespaces(pkgs = libs)) {
ggplot() +
geom_sf(data = muni) +
geom_sf(data = muni_centroids, color = "blue") +
# plot also the municipality_central_localities
geom_sf(data = municipality_central_localities, color = "red")
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
Buffers can be useful, for instance, calculating the share of buildings that are within certain radius from central localities. That example is not explained here, but we only show how to create 15km radius around polygon centroids.
<- muni_centroids %>%
muni_centroids_buffer st_buffer(dist = 15000)
if (check_namespaces(pkgs = libs)) {
ggplot() +
geom_sf(data = muni) +
geom_sf(data = muni_centroids_buffer) +
geom_sf(data = muni_centroids, shape = 3)
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
You can download predefined grids from Statistics Finland using
get_statistical_grid
and get_population_grid()
-functions in geofi
-package. These data contains not just
the geographical shape, but also attribute data on population etc.
within the grid cells.
However, you may need to create your own custom grid, for instance to
aggregate your own point data, which can be created with
st_make_grid()
function.
As describes by Marko Kallio in Spatial data analysis with R.
It creates a regular grid over bounding box of an ‘sf’ object. Can be given a certain cell size, or number of cells in x and y directions. ‘what’ tells the function what kind of regular grid is wanted (polygons, corners, centers). Fishnets of lines rather than polygons can be created simply by casting the polygons as “LINESTRING”s. The resulting polygon grid is an ‘sfc’ object, so it needs to be made ‘sf’ in order for us to add the ID-attribute.
For this example we pick northern Muonio municipality and create a custom 2km*4km grid on top of it. Afterward we could aggregate the number of reindeer for each grid cell if we would have the data.
<- muni %>% filter(municipality_name_fi == "Muonio")
muonio
<- st_make_grid(muonio, cellsize = c(2000,4000), what="polygons") %>%
grid_sf st_sf()
<- st_intersection(grid_sf, muonio)
grid_clip $rank <- 1:nrow(grid_clip)
grid_clip
if (check_namespaces(pkgs = libs)) {
ggplot(grid_clip) +
geom_sf(aes(fill = rank), color = alpha("white", 1/3), size = 3) +
scale_fill_viridis_c() +
theme_minimal()
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
In mathematics, a Voronoi diagram is a partition of a plane into regions close to each of a given set of objects. Source: Wikipedia: Voronoi diagram
Perhaps not the most useful operation, but worth taking a look as
readily available in sf
as function
st_voronoi
. In this case, it creates a layer of polygons
that are closest to each municipality central locality.
library(geofi)
library(sf)
<- municipality_central_localities %>%
muni_voronoi st_union() %>%
st_voronoi() %>%
st_cast() %>%
st_sf() %>%
st_intersection(st_union(muni)) %>%
mutate(rnk = 1:nrow(.))
if (check_namespaces(pkgs = libs)) {
ggplot(muni_voronoi) +
geom_sf(aes(fill = rnk)) +
geom_sf(data = municipality_central_localities, shape = 4) +
scale_fill_fermenter(palette = "YlGnBu") +
theme_minimal() +
theme(axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
legend.position = "none")
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
Calculating distances is certainly at the core of geospatial analysis. In this exercise we are interested on calculating distances between municipality central localities in Finland. As expected, we are talking great circle distances here, ie. as the crow flies type of distances.
You can calculate distance matrices using st_distance()
-function
from sf-package
that computes distances between pairs of geometries.
geofi
-package contains an internal data municipality_central_localities
that we use to calculate distances between points.
First we need to subset the data and replace original municipality
name column (teksti
) with better from latest
municipality key.
<- geofi::municipality_central_localities %>%
kunta select(teksti,kuntatunnus) %>%
mutate(kuntatunnus = as.integer(kuntatunnus)) %>%
select(-teksti) %>%
left_join(geofi::municipality_key_2022 %>% select(municipality_code, municipality_name_fi),
by = c("kuntatunnus" = "municipality_code")) %>%
rename(teksti = municipality_name_fi)
kunta#> Simple feature collection with 311 features and 2 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 91954.76 ymin: 6638646 xmax: 701036.8 ymax: 7755601
#> Projected CRS: ETRS89 / TM35FIN(E,N)
#> First 10 features:
#> kuntatunnus teksti geom
#> 1 478 Maarianhamina POINT (108054 6683064)
#> 2 51 Eurajoki POINT (217467.8 6797006)
#> 3 578 Paltamo POINT (540087.8 7142933)
#> 4 423 Lieto POINT (250416.5 6716555)
#> 5 749 Siilinjärvi POINT (533390.1 6994111)
#> 6 529 Naantali POINT (226598 6713685)
#> 7 284 Koski Tl POINT (288730.3 6730134)
#> 8 561 Oripää POINT (265926 6753450)
#> 9 102 Huittinen POINT (268830.4 6790126)
#> 10 536 Nokia POINT (313894.1 6820874)
Let’s aim at a data structure as below with both municipality name and code both from origin and destination.
origin_name | origin_code | destination_name | destination_code | dist_m |
---|---|---|---|---|
Helsinki | 91 | Espoo | 92 | 15000 |
Vantaa | 93 | Espoo | 92 | 18000 |
Espoo | 92 | Espoo | 92 | 0 |
To get there lets create a for loop that calculates distance matrices from each municipality to all municipalities, assign each data to a list and finally bind the data frames into a single data.
<- list()
d_list <- sf::st_drop_geometry(kunta) %>%
kuntadatan_teksti_ja_kuntatunnus select(teksti,kuntatunnus)
for (i in 1:nrow(kunta)){
<- sf::st_distance(x = kunta[i,], y = kunta)
dist_tmp tibble(origin_name = kunta[i,]$teksti,
origin_code = kunta[i,]$kuntatunnus) %>%
bind_cols(kuntadatan_teksti_ja_kuntatunnus %>% rename(destination_name = teksti,
destination_code = kuntatunnus)) %>%
mutate(dist = dist_tmp[1,]) -> d_list[[i]]
}<- do.call("bind_rows", d_list) %>%
kunta_dist mutate(dist = as.numeric(dist))
head(kunta_dist)
#> # A tibble: 6 × 5
#> origin_name origin_code destination_name destination_code dist
#> <chr> <int> <chr> <int> <dbl>
#> 1 Maarianhamina 478 Maarianhamina 478 0
#> 2 Maarianhamina 478 Eurajoki 51 157968.
#> 3 Maarianhamina 478 Paltamo 578 630977.
#> 4 Maarianhamina 478 Lieto 423 146249.
#> 5 Maarianhamina 478 Siilinjärvi 749 526935.
#> 6 Maarianhamina 478 Naantali 529 122435.
Finally we can draw two plots to verify our results. First let’s draw a bar plot of 20 municipality central localities nearest to Helsinki
if (check_namespaces(pkgs = libs)) {
ggplot(kunta_dist %>%
filter(origin_name == "Helsinki") %>%
arrange(dist) %>% slice(1:20),
aes(x = dist, y = reorder(destination_name, dist), label = round(dist))) +
geom_col() + geom_text(aes(x = 1000), color = "white", hjust = 0) +
labs(title = "Nearest 20 municipality localities to Helsinki", x = "distance in meters")
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
Then, lets find the municipality central locality that is nearest to the center of Finland.
# We firt need the country map as a single polygon
::get_municipalities() %>%
geofi::st_union() %>%
sf# then we need to compute the centroid of that polygon
::st_centroid() -> fin_centroid
sf
# The let's find the nearest neighbour with
<- st_distance(x = fin_centroid, y = kunta)
distance %>%
kuntadatan_teksti_ja_kuntatunnus mutate(dist = as.numeric(distance)) %>%
arrange(dist) -> closest_to_center
head(closest_to_center)
#> teksti kuntatunnus dist
#> 1 Siikalatva 791 13945.36
#> 2 Pyhäntä 630 17512.41
#> 3 Kärsämäki 317 33270.89
#> 4 Haapavesi 71 38730.91
#> 5 Vaala 785 50150.04
#> 6 Utajärvi 889 61041.42
And finally lets draw a bar plot of 20 furthest localities
<- kunta_dist %>%
furthest20 filter(origin_name == closest_to_center[1,]$teksti) %>%
arrange(desc(dist)) %>% slice(1:20)
if (check_namespaces(pkgs = libs)) {
ggplot(furthest20,
aes(x = dist, y = reorder(destination_name, dist), label = round(dist))) +
geom_col() + geom_text(aes(x = 10000), color = "white", hjust = 0) +
labs(title = paste("Furthest 20 municipality localities \nfrom the most central locality of ", closest_to_center[1,]$teksti), x = "distance in meters")
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
And at very last, we need to show those distances also on a map.
<- kunta %>%
sf_lahto filter(teksti %in% closest_to_center[1,]$teksti) %>%
select(teksti)
<- kunta %>%
sf_paate filter(teksti %in% furthest20$destination_name) %>%
select(teksti)
<- list()
triplst for (i in 1:nrow(sf_paate)){
<- rbind(
triplst[[i]]
sf_lahto,
sf_paate[i,]%>%
) summarize(m = mean(row_number()),do_union=FALSE) %>%
st_cast("LINESTRING")
}<- do.call("rbind", triplst)
trips
if (check_namespaces(pkgs = libs)) {
ggplot() +
geom_sf(data = muni %>% st_union(), alpha = .3) +
geom_sf(data = trips, color = "dim grey") +
geom_sf_label(data = sf_lahto, aes(label = teksti)) +
geom_sf_text(data = sf_paate, aes(label = teksti))
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}