Hi
I recently updated R and all my packages.
Now I have some code that does not work anymore.
It looks like duckdb cannot the handle units datatype anymore?
I do not know what my previous version of this package was. Now I have v1.1.2.
There are some related issues (solved and unsolved).
Reproducible example:
# Load required libraries
library(arrow)
library(duckdb)
library(sf)
library(dplyr)
# Define a CRS (e.g., EPSG:4326 for WGS84)
crs_epsg <- 4326
# Create a sample spatial dataset `data_sf` with points and set CRS
data_sf <- st_as_sf(data.frame(
id = 1:5,
value = runif(5, 10, 20),
geometry = st_sfc(
st_point(c(1, 1)),
st_point(c(2, 2)),
st_point(c(3, 3)),
st_point(c(4, 4)),
st_point(c(5, 5))
)
)) %>%
st_set_crs(crs_epsg) # Set CRS for `data_sf`
# Create a sample `cell_areas` dataset with polygons and set CRS
cell_areas <- st_as_sf(data.frame(
id = 1:5,
geometry = st_sfc(
st_polygon(list(rbind(c(0, 0), c(1, 0), c(1, 1), c(0, 1), c(0, 0)))),
st_polygon(list(rbind(c(1, 1), c(2, 1), c(2, 2), c(1, 2), c(1, 1)))),
st_polygon(list(rbind(c(2, 2), c(3, 2), c(3, 3), c(2, 3), c(2, 2)))),
st_polygon(list(rbind(c(3, 3), c(4, 3), c(4, 4), c(3, 4), c(3, 3)))),
st_polygon(list(rbind(c(4, 4), c(5, 4), c(5, 5), c(4, 5), c(4, 4))))
)
)) %>%
st_set_crs(crs_epsg) %>% # Set CRS for `cell_areas`
mutate(area = st_area(geometry)) %>% # Calculate area for each polygon
st_drop_geometry()
# Define the temporary Parquet file
temparrow <- tempfile(fileext = ".parquet")
# Process and write data to Parquet
joined_data_sf <- data_sf %>%
st_drop_geometry() %>% # Drop geometry from `data_sf`
inner_join(cell_areas, by = "id") %>% # Join with `cell_areas` by `id`
arrow::write_dataset(path = temparrow) # Write to Parquet
# Read the Parquet dataset and convert to DuckDB format
arrow_dataset <- arrow::open_dataset(temparrow)
arrow::to_duckdb(arrow_dataset)
#> Error in `db_query_fields.DBIConnection()`:
#> ! Can't query fields.
#> ℹ Using SQL: SELECT * FROM (FROM arrow_001) q01 WHERE (0 = 1)
#> Caused by error in `dbSendQuery()`:
#> ! rapi_prepare: Unknown column type for prepare: INVALID
Created on 2024-11-13 with reprex v2.1.1
If I save the area as a numeric variable. The code works:
# Load required libraries
library(arrow)
library(duckdb)
library(sf)
library(dplyr)
# Define a CRS (e.g., EPSG:4326 for WGS84)
crs_epsg <- 4326
# Create a sample spatial dataset `data_sf` with points and set CRS
data_sf <- st_as_sf(data.frame(
id = 1:5,
value = runif(5, 10, 20),
geometry = st_sfc(
st_point(c(1, 1)),
st_point(c(2, 2)),
st_point(c(3, 3)),
st_point(c(4, 4)),
st_point(c(5, 5))
)
)) %>%
st_set_crs(crs_epsg) # Set CRS for `data_sf`
# Create a sample `cell_areas` dataset with polygons and set CRS
cell_areas <- st_as_sf(data.frame(
id = 1:5,
geometry = st_sfc(
st_polygon(list(rbind(c(0, 0), c(1, 0), c(1, 1), c(0, 1), c(0, 0)))),
st_polygon(list(rbind(c(1, 1), c(2, 1), c(2, 2), c(1, 2), c(1, 1)))),
st_polygon(list(rbind(c(2, 2), c(3, 2), c(3, 3), c(2, 3), c(2, 2)))),
st_polygon(list(rbind(c(3, 3), c(4, 3), c(4, 4), c(3, 4), c(3, 3)))),
st_polygon(list(rbind(c(4, 4), c(5, 4), c(5, 5), c(4, 5), c(4, 4))))
)
)) %>%
st_set_crs(crs_epsg) %>% # Set CRS
mutate(area = as.numeric(st_area(geometry))) %>% # Calculate area
st_drop_geometry()
# Define the temporary Parquet file
temparrow <- tempfile(fileext = ".parquet")
# Process and write data to Parquet
joined_data_sf <- data_sf %>%
st_drop_geometry() %>% # Drop geometry from `data_sf`
inner_join(cell_areas, by = "id") %>% # Join with `cell_areas` by `id`
arrow::write_dataset(path = temparrow) # Write to Parquet
# Read the Parquet dataset and convert to DuckDB format
arrow_dataset <- arrow::open_dataset(temparrow)
arrow::to_duckdb(arrow_dataset)
#> # Source: table<arrow_001> [5 x 3]
#> # Database: DuckDB v1.1.2 [ward_langeraert@Windows 10 x64:R 4.4.2/:memory:]
#> id value area
#> <int> <dbl> <dbl>
#> 1 1 10.8 12364036567.
#> 2 2 17.7 12360269788.
#> 3 3 17.8 12352737380.
#> 4 4 19.4 12341441640.
#> 5 5 14.1 12326386013.
Created on 2024-11-13 with reprex v2.1.1
Hi
I recently updated R and all my packages.
Now I have some code that does not work anymore.
It looks like duckdb cannot the handle
unitsdatatype anymore?I do not know what my previous version of this package was. Now I have v1.1.2.
There are some related issues (solved and unsolved).
Reproducible example:
Created on 2024-11-13 with reprex v2.1.1
If I save the area as a numeric variable. The code works:
Created on 2024-11-13 with reprex v2.1.1