Skip to content

Updated package verision does not handle units datatype anymore rapi_prepare: Unknown column type for prepare: INVALID #590

@wlangera

Description

@wlangera

Hi

I recently updated R and all my packages.
Now I have some code that does not work anymore.
It looks like duckdb cannot the handle units datatype anymore?
I do not know what my previous version of this package was. Now I have v1.1.2.
There are some related issues (solved and unsolved).

Reproducible example:

# Load required libraries
library(arrow)
library(duckdb)
library(sf)
library(dplyr)

# Define a CRS (e.g., EPSG:4326 for WGS84)
crs_epsg <- 4326

# Create a sample spatial dataset `data_sf` with points and set CRS
data_sf <- st_as_sf(data.frame(
    id = 1:5,
    value = runif(5, 10, 20),
    geometry = st_sfc(
      st_point(c(1, 1)),
      st_point(c(2, 2)),
      st_point(c(3, 3)),
      st_point(c(4, 4)),
      st_point(c(5, 5))
    )
  )) %>%
  st_set_crs(crs_epsg)  # Set CRS for `data_sf`

# Create a sample `cell_areas` dataset with polygons and set CRS
cell_areas <- st_as_sf(data.frame(
    id = 1:5,
    geometry = st_sfc(
      st_polygon(list(rbind(c(0, 0), c(1, 0), c(1, 1), c(0, 1), c(0, 0)))),
      st_polygon(list(rbind(c(1, 1), c(2, 1), c(2, 2), c(1, 2), c(1, 1)))),
      st_polygon(list(rbind(c(2, 2), c(3, 2), c(3, 3), c(2, 3), c(2, 2)))),
      st_polygon(list(rbind(c(3, 3), c(4, 3), c(4, 4), c(3, 4), c(3, 3)))),
      st_polygon(list(rbind(c(4, 4), c(5, 4), c(5, 5), c(4, 5), c(4, 4))))
    )
  )) %>%
  st_set_crs(crs_epsg) %>%                # Set CRS for `cell_areas`
  mutate(area = st_area(geometry)) %>%    # Calculate area for each polygon
  st_drop_geometry()

# Define the temporary Parquet file
temparrow <- tempfile(fileext = ".parquet")

# Process and write data to Parquet
joined_data_sf <- data_sf %>%
  st_drop_geometry() %>%                  # Drop geometry from `data_sf`
  inner_join(cell_areas, by = "id") %>%   # Join with `cell_areas` by `id`
  arrow::write_dataset(path = temparrow)  # Write to Parquet

# Read the Parquet dataset and convert to DuckDB format
arrow_dataset <- arrow::open_dataset(temparrow)

arrow::to_duckdb(arrow_dataset)
#> Error in `db_query_fields.DBIConnection()`:
#> ! Can't query fields.
#> ℹ Using SQL: SELECT * FROM (FROM arrow_001) q01 WHERE (0 = 1)
#> Caused by error in `dbSendQuery()`:
#> ! rapi_prepare: Unknown column type for prepare: INVALID

Created on 2024-11-13 with reprex v2.1.1

If I save the area as a numeric variable. The code works:

# Load required libraries
library(arrow)
library(duckdb)
library(sf)
library(dplyr)

# Define a CRS (e.g., EPSG:4326 for WGS84)
crs_epsg <- 4326

# Create a sample spatial dataset `data_sf` with points and set CRS
data_sf <- st_as_sf(data.frame(
    id = 1:5,
    value = runif(5, 10, 20),
    geometry = st_sfc(
      st_point(c(1, 1)),
      st_point(c(2, 2)),
      st_point(c(3, 3)),
      st_point(c(4, 4)),
      st_point(c(5, 5))
    )
  )) %>%
  st_set_crs(crs_epsg)  # Set CRS for `data_sf`

# Create a sample `cell_areas` dataset with polygons and set CRS
cell_areas <- st_as_sf(data.frame(
    id = 1:5,
    geometry = st_sfc(
      st_polygon(list(rbind(c(0, 0), c(1, 0), c(1, 1), c(0, 1), c(0, 0)))),
      st_polygon(list(rbind(c(1, 1), c(2, 1), c(2, 2), c(1, 2), c(1, 1)))),
      st_polygon(list(rbind(c(2, 2), c(3, 2), c(3, 3), c(2, 3), c(2, 2)))),
      st_polygon(list(rbind(c(3, 3), c(4, 3), c(4, 4), c(3, 4), c(3, 3)))),
      st_polygon(list(rbind(c(4, 4), c(5, 4), c(5, 5), c(4, 5), c(4, 4))))
    )
  )) %>%
  st_set_crs(crs_epsg) %>%                            # Set CRS
  mutate(area = as.numeric(st_area(geometry))) %>%    # Calculate area
  st_drop_geometry()

# Define the temporary Parquet file
temparrow <- tempfile(fileext = ".parquet")

# Process and write data to Parquet
joined_data_sf <- data_sf %>%
  st_drop_geometry() %>%                  # Drop geometry from `data_sf`
  inner_join(cell_areas, by = "id") %>%   # Join with `cell_areas` by `id`
  arrow::write_dataset(path = temparrow)  # Write to Parquet

# Read the Parquet dataset and convert to DuckDB format
arrow_dataset <- arrow::open_dataset(temparrow)

arrow::to_duckdb(arrow_dataset)
#> # Source:   table<arrow_001> [5 x 3]
#> # Database: DuckDB v1.1.2 [ward_langeraert@Windows 10 x64:R 4.4.2/:memory:]
#>      id value         area
#>   <int> <dbl>        <dbl>
#> 1     1  10.8 12364036567.
#> 2     2  17.7 12360269788.
#> 3     3  17.8 12352737380.
#> 4     4  19.4 12341441640.
#> 5     5  14.1 12326386013.

Created on 2024-11-13 with reprex v2.1.1

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions