使用 tidyverse + sf R 创建每个多边形的线密度
Posted
技术标签:
【中文标题】使用 tidyverse + sf R 创建每个多边形的线密度【英文标题】:Create line density per polygon using tidyverse + sf R 【发布时间】:2018-08-23 19:12:07 【问题描述】:我有一个 GIS 问题困扰了我一段时间。最终目标是使用 tidyverse/sf 包提取每个像素/体素/多边形的线密度。到目前为止,我有一个函数可以在我逐行执行时工作,但不能作为函数。最终目标是在并行运行的snowfall
包的sfLappy
中使用此功能。任何有关使其作为功能工作的帮助将不胜感激!所涉及的数据可以在这里找到......
https://www.dropbox.com/s/zg9o2b5x4wizafo/hexagons.gpkg?dl=0 https://www.dropbox.com/s/x2gxx36pjkutxzm/railroad_lines.gpkg?dl=0
我创建的函数,再次,逐行工作,但不是作为函数,可以在这里找到:
length_in_poly <- function(fishnet, spatial_lines)
require(sf)
require(tidyverse)
require(magrittr)
fishnet <- st_as_sf(do.call(rbind, fishnet))
spatial_lines <- st_as_sf(do.call(rbind, spatial_lines))
fish_length <- list()
for (i in 1:nrow(fishnet))
split_lines <- spatial_lines %>%
st_cast(., "MULTILINESTRING", group_or_split = FALSE) %>%
st_intersection(., fishnet[i, ]) %>%
mutate(lineid = row_number())
fish_length[[i]] <- split_lines %>%
mutate(length = sum(st_length(.)))
fish_length <- do.call(rbind, fish_length) %>%
group_by(hexid4k) %>%
summarize(length = sum(length))
fishnet <- fishnet %>%
st_join(., fish_length, join = st_intersects) %>%
mutate(hexid4k = hexid4k.x,
length = ifelse(is.na(length), 0, length),
pixel_area = as.numeric(st_area(geom)),
density = length/pixel_area)
准备数据:
library(sf)
library(tidyverse)
library(snowfall)
input_hexagons <- st_read("hexagons.gpkg")
input_rail_lines <- st_read("railroad_lines.gpkg")
使用来自here的一些代码:
faster_as_tibble <- function(x)
structure(x, class = c("tbl_df", "tbl", "data.frame", "sfc"), row.names = as.character(seq_along(x[[1]])))
split_fast_tibble <- function (x, f, drop = FALSE, ...)
lapply(split(x = seq_len(nrow(x)), f = f, ...),
function(ind) faster_as_tibble(lapply(x, "[", ind)))
创建一个状态列表:
sub_hexnet <- split_fast_tibble(input_hexagons, input_hexagons$STUSPS) %>%
lapply(st_as_sf)
最后,作为单核进程运行:
test <- lapply(fishnet = as.list(sub_hexnet),
FUN = length_in_poly,
spatial_lines = input_rail_lines)
或者,在完美世界中,多核进程:
sfInit(parallel = TRUE, cpus = parallel::detectCores())
sfExport(list = c("sub_hexnet", "mask_rails"))
extractions <- sfLapply(fishnet = sub_hexnet,
fun = length_in_poly,
spatial_lines = input_rail_lines)
sfStop()
提前感谢您的帮助 - 我完全被难住了!
【问题讨论】:
错误信息是什么?这对我来说听起来像是一个 dplyr 错误。您是否尝试过在mutate
和summarize
语句中使用rlang::.data
?
我认为最后是dplyr错误,但重构了整个代码。
【参考方案1】:
搞砸了一段时间后,我终于想出了一个解决方案。
使用的关键辅助函数:
load_data <- function(url, dir, layer, outname)
file <- paste0(dir, "/", layer, ".shp")
if (!file.exists(file))
download.file(url, destfile = paste0(dir, ".zip"))
unzip(paste0(dir, ".zip"),
exdir = dir)
unlink(paste0(dir, ".zip"))
name <- paste0(outname, "_shp")
name <- sf::st_read(dsn = dir, layer = layer)
name
get_density <- function(x, grids, lines)
require(tidyverse)
require(lubridate)
require(sf)
sub_grids <- grids %>%
dplyr::filter(hexid4k == x)
single_lines_hexid <- lines %>%
dplyr::filter(hexid4k == x) %>%
sf::st_intersection(., sub_grids) %>%
dplyr::select(hexid4k, STUSPS) %>%
dplyr::mutate(length_line = st_length(.),
length_line = ifelse(is.na(length_line), 0, length_line))
sub_grids <- sub_grids %>%
sf::st_join(., single_lines_hexid, join = st_intersects) %>%
dplyr::mutate(hexid4k = hexid4k.x) %>%
dplyr::group_by(hexid4k) %>%
dplyr::summarize(length_line = sum(length_line)) %>%
dplyr::mutate(pixel_area = as.numeric(st_area(geom)),
density = length_line/pixel_area) %>%
dplyr::select(hexid4k, length_line, density, pixel_area)
return(sub_grids)
准备输入数据:
usa_shp <- load_data(url = "https://www2.census.gov/geo/tiger/GENZ2016/shp/cb_2016_us_state_20m.zip",
dir = 'data',
layer = "cb_2016_us_state_20m",
outname = "usa") %>%
sf::st_transform(p4string_ea) %>%
dplyr::filter(!STUSPS %in% c("HI", "AK", "PR"))
usa_shp$STUSPS <- droplevels(usa_shp$STUSPS)
hex_points <- spsample(as(usa_shp, 'Spatial'), type = "hexagonal", cellsize = 4000)
hex_grid <- HexPoints2SpatialPolygons(hex_points, dx = 4000)
hexnet_4k <- st_as_sf(hex_grid) %>%
mutate(hexid4k = row_number()) %>%
st_intersection(., st_union(usa_shp)) %>%
st_join(., usa_shp, join = st_intersects) %>%
dplyr::select(hexid4k, STUSPS)
transmission_lines_hex <- load_data( url = "https://hifld-dhs-gii.opendata.arcgis.com/datasets/75af06441c994aaf8e36208b7cd44014_0.zip",
dir = 'data',
layer = 'Electric_Power_Transmission_Lines',
outname = 'tl')%>%
dplyr::select(LINEARID, STUSPS) %>%
st_join(., hexnet_4k, join = st_intersects) %>%
mutate(STUSPS = STUSPS.x) %>%
dplyr::select(LINEARID, hexid4k, STUSPS)
产生的并行过程如下:
hexnet_list <- hexnet_4k %>%
split(., .$STUSPS)
sfInit(parallel = TRUE, cpus = num_cores)
sfExport('transmission_lines_hex')
sfSource('src/functions/helper_functions.R')
transmission_lines_density <- lapply(hexnet_list,
function (input_list)
require(tidyverse)
require(magrittr)
require(lubridate)
require(lubridate)
require(sf)
sub_grid <- dplyr:::bind_cols(input_list)
unique_ids <- unique(sub_grid$hexid4k)
state_name <- unique(sub_grid$STUSPS)[1]
print(paste0('Working on ', state_name))
got_density <- lapply(unique_ids,
FUN = get_density,
grids = sub_grid,
lines = transmission_lines_hex)
print(paste0('Finishing ', state_name))
return(got_density)
)
sfStop()
我希望其中的一些内容可能对您有用,并且一如既往地欢迎有关优化的建议。
【讨论】:
以上是关于使用 tidyverse + sf R 创建每个多边形的线密度的主要内容,如果未能解决你的问题,请参考以下文章
在 R 中使用 openxlsx 进行条件格式化的 Tidyverse/更快的解决方案?
R语言实战应用精讲50篇(三十一)-R语言入门系列-tidyverse数据分析流程