diff --git a/README.md b/README.md index ce8b69c..5e757cc 100644 --- a/README.md +++ b/README.md @@ -423,6 +423,7 @@ There is currently only one GUC parameter to enable/disable the `pg_parquet`: | `time` | INT64 | TIME_MICROS | | `timetz`(3) | INT64 | TIME_MICROS | | `geometry`(4) | BYTE_ARRAY | | +| `geography`(4) | BYTE_ARRAY | | ### Nested Types | PostgreSQL Type | Parquet Physical Type | Logical Type | @@ -440,7 +441,7 @@ There is currently only one GUC parameter to enable/disable the `pg_parquet`: > * `numeric` is allowed by Postgres. (precision and scale not specified). These are represented by a default precision (38) and scale (9) instead of writing them as string. You get runtime error if your table tries to read or write a numeric value which is not allowed by the default precision and scale (29 integral digits before decimal point, 9 digits after decimal point). > - (2) The `date` type is represented according to `Unix epoch` when writing to Parquet files. It is converted back according to `PostgreSQL epoch` when reading from Parquet files. > - (3) The `timestamptz` and `timetz` types are adjusted to `UTC` when writing to Parquet files. They are converted back with `UTC` timezone when reading from Parquet files. -> - (4) The `geometry` type is represented as `BYTE_ARRAY` encoded as `WKB`, specified by [geoparquet spec](https://geoparquet.org/releases/v1.1.0/), when `postgis` extension is created. Otherwise, it is represented as `BYTE_ARRAY` with `STRING` logical type. +> - (4) The `geometry` and `geography` type is represented as `BYTE_ARRAY` encoded as `WKB`, specified by [geoparquet spec](https://geoparquet.org/releases/v1.1.0/), when `postgis` extension is created. Otherwise, it is represented as `BYTE_ARRAY` with `STRING` logical type. Orientation and edges metadata are written when `postgis_sfcgal` extension is created. > - (5) `crunchy_map` is dependent on functionality provided by [Crunchy Bridge](https://www.crunchydata.com/products/crunchy-bridge). The `crunchy_map` type is represented as `GROUP` with `MAP` logical type when `crunchy_map` extension is created. Otherwise, it is represented as `BYTE_ARRAY` with `STRING` logical type. > [!WARNING] diff --git a/src/arrow_parquet/arrow_to_pg.rs b/src/arrow_parquet/arrow_to_pg.rs index 1d3578f..2fff5ac 100644 --- a/src/arrow_parquet/arrow_to_pg.rs +++ b/src/arrow_parquet/arrow_to_pg.rs @@ -20,7 +20,7 @@ use crate::{ }, type_compat::{ fallback_to_text::{reset_fallback_to_text_context, FallbackToText}, - geometry::{is_postgis_geometry_type, Geometry}, + geometry::{Geography, Geometry}, map::{is_map_type, reset_map_type_context, Map}, }, }; @@ -34,6 +34,7 @@ pub(crate) mod date; pub(crate) mod fallback_to_text; pub(crate) mod float4; pub(crate) mod float8; +pub(crate) mod geography; pub(crate) mod geometry; pub(crate) mod int2; pub(crate) mod int4; @@ -127,6 +128,8 @@ fn to_pg_nonarray_datum( DataType::Binary => { if attribute_context.is_geometry() { to_pg_datum!(BinaryArray, Geometry, primitive_array, attribute_context) + } else if attribute_context.is_geography() { + to_pg_datum!(BinaryArray, Geography, primitive_array, attribute_context) } else { to_pg_datum!(BinaryArray, Vec, primitive_array, attribute_context) } @@ -271,6 +274,13 @@ fn to_pg_array_datum( list_array, element_context ) + } else if element_context.is_geography() { + to_pg_datum!( + BinaryArray, + Vec>, + list_array, + element_context + ) } else { to_pg_datum!( BinaryArray, diff --git a/src/arrow_parquet/arrow_to_pg/context.rs b/src/arrow_parquet/arrow_to_pg/context.rs index aba4c1f..b015c4d 100644 --- a/src/arrow_parquet/arrow_to_pg/context.rs +++ b/src/arrow_parquet/arrow_to_pg/context.rs @@ -6,11 +6,14 @@ use pgrx::{ PgTupleDesc, }; -use crate::type_compat::pg_arrow_type_conversions::extract_precision_and_scale_from_numeric_typmod; +use crate::type_compat::{ + geometry::{is_postgis_geography_type, is_postgis_geometry_type}, + pg_arrow_type_conversions::extract_precision_and_scale_from_numeric_typmod, +}; use super::{ array_element_typoid, collect_attributes_for, domain_array_base_elem_type, is_array_type, - is_composite_type, is_map_type, is_postgis_geometry_type, tuple_desc, CollectAttributesFor, + is_composite_type, is_map_type, tuple_desc, CollectAttributesFor, }; // ArrowToPgAttributeContext contains the information needed to convert an Arrow array @@ -100,6 +103,7 @@ impl ArrowToPgAttributeContext { pub(crate) enum ArrowToPgAttributeTypeContext { Primitive { is_geometry: bool, + is_geography: bool, precision: Option, scale: Option, timezone: Option, @@ -144,6 +148,8 @@ impl ArrowToPgAttributeTypeContext { let is_geometry = is_postgis_geometry_type(typoid); + let is_geography = is_postgis_geography_type(typoid); + let timezone = match &data_type { DataType::Timestamp(_, Some(timezone)) => Some(timezone.to_string()), _ => None, @@ -151,6 +157,7 @@ impl ArrowToPgAttributeTypeContext { Self::Primitive { is_geometry, + is_geography, precision, scale, timezone, @@ -278,6 +285,13 @@ impl ArrowToPgAttributeTypeContext { _ => false, } } + + pub(crate) fn is_geography(&self) -> bool { + match &self { + ArrowToPgAttributeTypeContext::Primitive { is_geography, .. } => *is_geography, + _ => false, + } + } } pub(crate) fn collect_arrow_to_pg_attribute_contexts( diff --git a/src/arrow_parquet/arrow_to_pg/geography.rs b/src/arrow_parquet/arrow_to_pg/geography.rs new file mode 100644 index 0000000..94056ca --- /dev/null +++ b/src/arrow_parquet/arrow_to_pg/geography.rs @@ -0,0 +1,32 @@ +use arrow::array::{Array, BinaryArray}; + +use crate::type_compat::geometry::Geography; + +use super::{ArrowArrayToPgType, ArrowToPgAttributeContext}; + +// Geography +impl ArrowArrayToPgType for BinaryArray { + fn to_pg_type(self, _context: &ArrowToPgAttributeContext) -> Option { + if self.is_null(0) { + None + } else { + Some(self.value(0).to_vec().into()) + } + } +} + +// Geography[] +impl ArrowArrayToPgType>> for BinaryArray { + fn to_pg_type(self, _context: &ArrowToPgAttributeContext) -> Option>> { + let mut vals = vec![]; + for val in self.iter() { + if let Some(val) = val { + vals.push(Some(val.to_vec().into())); + } else { + vals.push(None); + } + } + + Some(vals) + } +} diff --git a/src/arrow_parquet/parquet_writer.rs b/src/arrow_parquet/parquet_writer.rs index ce1faab..dc63ea8 100644 --- a/src/arrow_parquet/parquet_writer.rs +++ b/src/arrow_parquet/parquet_writer.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::{cell::RefCell, rc::Rc, sync::Arc}; use arrow::array::RecordBatch; use arrow_schema::SchemaRef; @@ -24,7 +24,7 @@ use crate::{ parquet_copy_hook::copy_to_split_dest_receiver::CopyToParquetOptions, pgrx_utils::{collect_attributes_for, CollectAttributesFor}, type_compat::{ - geometry::{geoparquet_metadata_json_from_tupledesc, reset_postgis_context}, + geometry::{geoparquet_metadata_to_json, reset_postgis_context, GeoparquetMetadata}, map::reset_map_context, }, PG_BACKEND_TOKIO_RUNTIME, @@ -43,6 +43,7 @@ pub(crate) struct ParquetWriterContext { parquet_writer: AsyncArrowWriter, schema: SchemaRef, attribute_contexts: Vec, + geoparquet_metadata: Rc>, options: CopyToParquetOptions, } @@ -71,44 +72,41 @@ impl ParquetWriterContext { let schema = Arc::new(schema); - let writer_props = Self::writer_props(tupledesc, options); + let writer_props = Self::writer_props(options); let parquet_writer = parquet_writer_from_uri(&uri_info, schema.clone(), writer_props); - let attribute_contexts = - collect_pg_to_arrow_attribute_contexts(&attributes, &schema.fields); + let geoparquet_metadata = Rc::new(RefCell::new(GeoparquetMetadata::new())); + + let attribute_contexts = collect_pg_to_arrow_attribute_contexts( + &attributes, + &schema.fields, + geoparquet_metadata.clone(), + ); ParquetWriterContext { parquet_writer, schema, attribute_contexts, + geoparquet_metadata, options, } } - fn writer_props(tupledesc: &PgTupleDesc, options: CopyToParquetOptions) -> WriterProperties { + fn writer_props(options: CopyToParquetOptions) -> WriterProperties { let compression = PgParquetCompressionWithLevel { compression: options.compression, compression_level: options.compression_level, }; - let mut writer_props_builder = WriterProperties::builder() - .set_statistics_enabled(EnabledStatistics::Page) + WriterProperties::builder() + .set_statistics_enabled(EnabledStatistics::Chunk) + .set_offset_index_disabled(true) // use page index instead of data page statistics .set_compression(compression.into()) .set_max_row_group_size(options.row_group_size as usize) + .set_created_by("pg_parquet".to_string()) .set_writer_version(options.parquet_version.into()) - .set_created_by("pg_parquet".to_string()); - - let geometry_columns_metadata_value = geoparquet_metadata_json_from_tupledesc(tupledesc); - - if geometry_columns_metadata_value.is_some() { - let key_value_metadata = KeyValue::new("geo".into(), geometry_columns_metadata_value); - - writer_props_builder = - writer_props_builder.set_key_value_metadata(Some(vec![key_value_metadata])); - } - - writer_props_builder.build() + .build() } // write_tuples writes the tuples to the parquet file. It flushes the in progress rows to a new row group @@ -132,10 +130,32 @@ impl ParquetWriterContext { } } + fn write_geoparquet_metadata_if_exists(&mut self) { + let geoparquet_metadata = self.geoparquet_metadata.borrow_mut(); + + let has_geoparquet_columns = !geoparquet_metadata.columns.is_empty(); + + if !has_geoparquet_columns { + // No geoparquet columns to write, so we skip writing metadata. + return; + } + + let geometry_columns_metadata_value = geoparquet_metadata_to_json(&geoparquet_metadata); + + let key_value_metadata = KeyValue::new("geo".into(), geometry_columns_metadata_value); + + self.parquet_writer + .append_key_value_metadata(key_value_metadata); + } + // finalize flushes the in progress rows to a new row group and finally writes metadata to the file. pub(crate) fn finalize(&mut self) { PG_BACKEND_TOKIO_RUNTIME - .block_on(self.parquet_writer.finish()) + .block_on(async { + self.write_geoparquet_metadata_if_exists(); + + self.parquet_writer.finish().await + }) .unwrap_or_else(|e| panic!("failed to finish parquet writer: {e}")); } diff --git a/src/arrow_parquet/pg_to_arrow.rs b/src/arrow_parquet/pg_to_arrow.rs index c5b658d..9749919 100644 --- a/src/arrow_parquet/pg_to_arrow.rs +++ b/src/arrow_parquet/pg_to_arrow.rs @@ -21,7 +21,7 @@ use crate::{ }, type_compat::{ fallback_to_text::{reset_fallback_to_text_context, FallbackToText}, - geometry::{is_postgis_geometry_type, Geometry}, + geometry::{Geography, Geometry}, map::{is_map_type, reset_map_type_context, Map}, pg_arrow_type_conversions::{ extract_precision_and_scale_from_numeric_typmod, should_write_numeric_as_text, @@ -38,6 +38,7 @@ pub(crate) mod date; pub(crate) mod fallback_to_text; pub(crate) mod float4; pub(crate) mod float8; +pub(crate) mod geography; pub(crate) mod geometry; pub(crate) mod int2; pub(crate) mod int4; @@ -205,6 +206,8 @@ fn to_arrow_primitive_array( to_arrow_primitive_array!(Map, tuples, attribute_context) } else if attribute_context.is_geometry() { to_arrow_primitive_array!(Geometry, tuples, attribute_context) + } else if attribute_context.is_geography() { + to_arrow_primitive_array!(Geography, tuples, attribute_context) } else { reset_fallback_to_text_context( attribute_context.typoid(), @@ -314,6 +317,8 @@ fn to_arrow_list_array( to_arrow_list_array!(pgrx::Array, tuples, element_context) } else if element_context.is_geometry() { to_arrow_list_array!(pgrx::Array, tuples, element_context) + } else if element_context.is_geography() { + to_arrow_list_array!(pgrx::Array, tuples, element_context) } else { reset_fallback_to_text_context(element_typoid, element_typmod); diff --git a/src/arrow_parquet/pg_to_arrow/context.rs b/src/arrow_parquet/pg_to_arrow/context.rs index 3d13b41..47315ff 100644 --- a/src/arrow_parquet/pg_to_arrow/context.rs +++ b/src/arrow_parquet/pg_to_arrow/context.rs @@ -1,4 +1,8 @@ -use std::ops::Deref; +use std::{ + cell::{RefCell, RefMut}, + ops::Deref, + rc::Rc, +}; use arrow_schema::{FieldRef, Fields}; use pgrx::{ @@ -6,10 +10,14 @@ use pgrx::{ PgTupleDesc, }; +use crate::type_compat::geometry::{ + is_postgis_geography_type, is_postgis_geometry_type, GeoparquetMetadata, +}; + use super::{ array_element_typoid, collect_attributes_for, domain_array_base_elem_type, extract_precision_and_scale_from_numeric_typmod, is_array_type, is_composite_type, is_map_type, - is_postgis_geometry_type, tuple_desc, CollectAttributesFor, + tuple_desc, CollectAttributesFor, }; // PgToArrowAttributeContext contains the information needed to convert a PostgreSQL attribute @@ -35,15 +43,27 @@ impl Deref for PgToArrowAttributeContext { } impl PgToArrowAttributeContext { - fn new(name: String, attnum: i16, typoid: Oid, typmod: i32, fields: Fields) -> Self { + fn new( + name: String, + attnum: i16, + typoid: Oid, + typmod: i32, + fields: Fields, + geoparquet_metadata: Rc>, + ) -> Self { let field = fields .iter() .find(|field| field.name() == &name) .unwrap_or_else(|| panic!("failed to find field {name}")) .clone(); - let type_context = - PgToArrowAttributeTypeContext::new(attnum, typoid, typmod, field.clone()); + let type_context = PgToArrowAttributeTypeContext::new( + attnum, + typoid, + typmod, + field.clone(), + geoparquet_metadata, + ); Self { field, @@ -77,6 +97,8 @@ impl PgToArrowAttributeContext { pub(crate) enum PgToArrowAttributeTypeContext { Primitive { is_geometry: bool, + is_geography: bool, + geoparquet_metadata: Rc>, precision: Option, scale: Option, }, @@ -94,19 +116,29 @@ pub(crate) enum PgToArrowAttributeTypeContext { impl PgToArrowAttributeTypeContext { // constructors - fn new(attnum: i16, typoid: Oid, typmod: i32, field: FieldRef) -> Self { + fn new( + attnum: i16, + typoid: Oid, + typmod: i32, + field: FieldRef, + geoparquet_metadata: Rc>, + ) -> Self { if is_array_type(typoid) { - Self::new_array(attnum, typoid, typmod, field) + Self::new_array(attnum, typoid, typmod, field, geoparquet_metadata) } else if is_composite_type(typoid) { - Self::new_composite(typoid, typmod, field) + Self::new_composite(typoid, typmod, field, geoparquet_metadata) } else if is_map_type(typoid) { - Self::new_map(attnum, typoid, field) + Self::new_map(attnum, typoid, field, geoparquet_metadata) } else { - Self::new_primitive(typoid, typmod) + Self::new_primitive(typoid, typmod, geoparquet_metadata) } } - fn new_primitive(typoid: Oid, typmod: i32) -> Self { + fn new_primitive( + typoid: Oid, + typmod: i32, + geoparquet_metadata: Rc>, + ) -> Self { let precision; let scale; if typoid == NUMERICOID { @@ -120,14 +152,24 @@ impl PgToArrowAttributeTypeContext { let is_geometry = is_postgis_geometry_type(typoid); + let is_geography = is_postgis_geography_type(typoid); + Self::Primitive { is_geometry, + is_geography, + geoparquet_metadata, precision, scale, } } - fn new_array(attnum: i16, typoid: Oid, typmod: i32, field: FieldRef) -> Self { + fn new_array( + attnum: i16, + typoid: Oid, + typmod: i32, + field: FieldRef, + geoparquet_metadata: Rc>, + ) -> Self { let element_typoid = array_element_typoid(typoid); let element_typmod = typmod; @@ -141,6 +183,7 @@ impl PgToArrowAttributeTypeContext { element_typoid, element_typmod, element_field.clone(), + geoparquet_metadata, ); let element_context = Box::new(PgToArrowAttributeContext { @@ -154,7 +197,12 @@ impl PgToArrowAttributeTypeContext { Self::Array { element_context } } - fn new_composite(typoid: Oid, typmod: i32, field: FieldRef) -> Self { + fn new_composite( + typoid: Oid, + typmod: i32, + field: FieldRef, + geoparquet_metadata: Rc>, + ) -> Self { let tupledesc = tuple_desc(typoid, typmod); let fields = match field.data_type() { arrow::datatypes::DataType::Struct(fields) => fields.clone(), @@ -163,7 +211,8 @@ impl PgToArrowAttributeTypeContext { let attributes = collect_attributes_for(CollectAttributesFor::Other, &tupledesc); - let attribute_contexts = collect_pg_to_arrow_attribute_contexts(&attributes, &fields); + let attribute_contexts = + collect_pg_to_arrow_attribute_contexts(&attributes, &fields, geoparquet_metadata); Self::Composite { tupledesc, @@ -171,7 +220,12 @@ impl PgToArrowAttributeTypeContext { } } - fn new_map(attnum: i16, typoid: Oid, field: FieldRef) -> Self { + fn new_map( + attnum: i16, + typoid: Oid, + field: FieldRef, + geoparquet_metadata: Rc>, + ) -> Self { let (entries_typoid, entries_typmod) = domain_array_base_elem_type(typoid); let entries_field = match field.data_type() { @@ -184,6 +238,7 @@ impl PgToArrowAttributeTypeContext { entries_typoid, entries_typmod, entries_field.clone(), + geoparquet_metadata, ); let entries_context = Box::new(PgToArrowAttributeContext { @@ -270,11 +325,29 @@ impl PgToArrowAttributeTypeContext { _ => false, } } + + pub(crate) fn is_geography(&self) -> bool { + match &self { + PgToArrowAttributeTypeContext::Primitive { is_geography, .. } => *is_geography, + _ => false, + } + } + + pub(crate) fn geoparquet_metadata(&'_ self) -> RefMut<'_, GeoparquetMetadata> { + match self { + PgToArrowAttributeTypeContext::Primitive { + geoparquet_metadata, + .. + } => geoparquet_metadata.borrow_mut(), + _ => panic!("missing geoparquet metadata in context"), + } + } } pub(crate) fn collect_pg_to_arrow_attribute_contexts( attributes: &[FormData_pg_attribute], fields: &Fields, + geoparquet_metadata: Rc>, ) -> Vec { let mut attribute_contexts = vec![]; @@ -290,6 +363,7 @@ pub(crate) fn collect_pg_to_arrow_attribute_contexts( attribute_typoid, attribute_typmod, fields.clone(), + geoparquet_metadata.clone(), ); attribute_contexts.push(attribute_context); diff --git a/src/arrow_parquet/pg_to_arrow/geography.rs b/src/arrow_parquet/pg_to_arrow/geography.rs new file mode 100644 index 0000000..019ad1e --- /dev/null +++ b/src/arrow_parquet/pg_to_arrow/geography.rs @@ -0,0 +1,74 @@ +use std::sync::Arc; + +use arrow::array::{ArrayRef, BinaryArray, ListArray}; + +use crate::{ + arrow_parquet::{arrow_utils::arrow_array_offsets, pg_to_arrow::PgTypeToArrowArray}, + type_compat::geometry::{Geography, GeometryColumn}, +}; + +use super::PgToArrowAttributeContext; + +// Geography +impl PgTypeToArrowArray for Vec> { + fn to_arrow_array(self, context: &PgToArrowAttributeContext) -> ArrayRef { + // update GeoParquet metadata + let geometry_column: GeometryColumn = self + .clone() + .into_iter() + .flatten() + .collect::>() + .into(); + + context + .geoparquet_metadata() + .update(context.field().name(), geometry_column); + + // prepare WKB array + let wkbs = self + .iter() + .map(|geography| geography.as_deref()) + .collect::>(); + let wkb_array = BinaryArray::from(wkbs); + Arc::new(wkb_array) + } +} + +// Geography[] +impl PgTypeToArrowArray for Vec>>> { + fn to_arrow_array(self, element_context: &PgToArrowAttributeContext) -> ArrayRef { + let (offsets, nulls) = arrow_array_offsets(&self); + + // gets rid of the first level of Option, then flattens the inner Vec>. + let pg_array = self.into_iter().flatten().flatten().collect::>(); + + // update GeoParquet metadata + let geometry_column: GeometryColumn = pg_array + .clone() + .into_iter() + .flatten() + .collect::>() + .into(); + + element_context + .geoparquet_metadata() + .update(element_context.field().name(), geometry_column); + + // prepare WKB array + let wkbs = pg_array + .iter() + .map(|geography| geography.as_deref()) + .collect::>(); + + let wkb_array = BinaryArray::from(wkbs); + + let list_array = ListArray::new( + element_context.field(), + offsets, + Arc::new(wkb_array), + Some(nulls), + ); + + Arc::new(list_array) + } +} diff --git a/src/arrow_parquet/pg_to_arrow/geometry.rs b/src/arrow_parquet/pg_to_arrow/geometry.rs index 2a1d754..b4bd49a 100644 --- a/src/arrow_parquet/pg_to_arrow/geometry.rs +++ b/src/arrow_parquet/pg_to_arrow/geometry.rs @@ -4,14 +4,27 @@ use arrow::array::{ArrayRef, BinaryArray, ListArray}; use crate::{ arrow_parquet::{arrow_utils::arrow_array_offsets, pg_to_arrow::PgTypeToArrowArray}, - type_compat::geometry::Geometry, + type_compat::geometry::{Geometry, GeometryColumn}, }; use super::PgToArrowAttributeContext; // Geometry impl PgTypeToArrowArray for Vec> { - fn to_arrow_array(self, _context: &PgToArrowAttributeContext) -> ArrayRef { + fn to_arrow_array(self, context: &PgToArrowAttributeContext) -> ArrayRef { + // update GeoParquet metadata + let geometry_column: GeometryColumn = self + .clone() + .into_iter() + .flatten() + .collect::>() + .into(); + + context + .geoparquet_metadata() + .update(context.field().name(), geometry_column); + + // prepare WKB array let wkbs = self .iter() .map(|geometry| geometry.as_deref()) @@ -29,6 +42,19 @@ impl PgTypeToArrowArray for Vec>>> { // gets rid of the first level of Option, then flattens the inner Vec>. let pg_array = self.into_iter().flatten().flatten().collect::>(); + // update GeoParquet metadata + let geometry_column: GeometryColumn = pg_array + .clone() + .into_iter() + .flatten() + .collect::>() + .into(); + + element_context + .geoparquet_metadata() + .update(element_context.field().name(), geometry_column); + + // prepare WKB array let wkbs = pg_array .iter() .map(|geometry| geometry.as_deref()) diff --git a/src/arrow_parquet/schema/parse_schema.rs b/src/arrow_parquet/schema/parse_schema.rs index f3b0377..7ad9766 100644 --- a/src/arrow_parquet/schema/parse_schema.rs +++ b/src/arrow_parquet/schema/parse_schema.rs @@ -21,7 +21,7 @@ use crate::{ is_composite_type, tuple_desc, CollectAttributesFor, }, type_compat::{ - geometry::is_postgis_geometry_type, + geometry::{is_postgis_geography_type, is_postgis_geometry_type}, map::is_map_type, pg_arrow_type_conversions::{ extract_precision_and_scale_from_numeric_typmod, should_write_numeric_as_text, @@ -407,7 +407,7 @@ fn parse_primitive_schema( BYTEAOID => Field::new(scalar_name, arrow::datatypes::DataType::Binary, nullable), OIDOID => Field::new(scalar_name, arrow::datatypes::DataType::UInt32, nullable), _ => { - if is_postgis_geometry_type(typoid) { + if is_postgis_geometry_type(typoid) || is_postgis_geography_type(typoid) { Field::new(scalar_name, arrow::datatypes::DataType::Binary, nullable) } else { Field::new(scalar_name, arrow::datatypes::DataType::Utf8, nullable) diff --git a/src/parquet_copy_hook/copy_utils.rs b/src/parquet_copy_hook/copy_utils.rs index fe0d30d..acd809c 100644 --- a/src/parquet_copy_hook/copy_utils.rs +++ b/src/parquet_copy_hook/copy_utils.rs @@ -24,7 +24,7 @@ use crate::{ parquet_writer::{DEFAULT_ROW_GROUP_SIZE, DEFAULT_ROW_GROUP_SIZE_BYTES}, uri_utils::ParsedUriInfo, }, - pgrx_utils::extension_exists, + pgrx_utils::is_extension_created, }; use self::field_ids::FieldIds; @@ -483,12 +483,12 @@ fn is_copy_parquet_stmt(p_stmt: &PgBox, copy_from: bool) -> bool { // this is why we check them after the uri checks // crunchy_query_engine should not be created - if extension_exists("crunchy_query_engine") { + if is_extension_created("crunchy_query_engine") { return false; } // pg_parquet should be created - if !extension_exists("pg_parquet") { + if !is_extension_created("pg_parquet") { ereport!( PgLogLevel::WARNING, PgSqlErrorCode::ERRCODE_WARNING, diff --git a/src/pgrx_tests/common.rs b/src/pgrx_tests/common.rs index d6a2837..a455769 100644 --- a/src/pgrx_tests/common.rs +++ b/src/pgrx_tests/common.rs @@ -358,7 +358,7 @@ pub(crate) fn timetz_array_to_utc_time_array( ) } -pub(crate) fn extension_exists(extension_name: &str) -> bool { +pub(crate) fn is_extension_available(extension_name: &str) -> bool { let quoted_extension = spi::quote_literal(extension_name); let query = format!("select count(*) = 1 from pg_available_extensions where name = {quoted_extension}"); @@ -375,7 +375,7 @@ pub(crate) fn write_record_batch_to_parquet(schema: SchemaRef, record_batch: Rec } pub(crate) fn create_crunchy_map_type(key_type: &str, val_type: &str) -> String { - assert!(extension_exists("crunchy_map")); + assert!(is_extension_available("crunchy_map")); let command = format!("SELECT crunchy_map.create('{key_type}','{val_type}')::text;",); Spi::get_one(&command).unwrap().unwrap() diff --git a/src/pgrx_tests/copy_from_coerce.rs b/src/pgrx_tests/copy_from_coerce.rs index c4c2e82..f8e61f6 100644 --- a/src/pgrx_tests/copy_from_coerce.rs +++ b/src/pgrx_tests/copy_from_coerce.rs @@ -5,7 +5,7 @@ mod tests { use std::vec; use crate::pgrx_tests::common::{ - extension_exists, write_record_batch_to_parquet, LOCAL_TEST_FILE_PATH, + is_extension_available, write_record_batch_to_parquet, LOCAL_TEST_FILE_PATH, }; use crate::type_compat::pg_arrow_type_conversions::{ date_to_i32, time_to_i64, timestamp_to_i64, timestamptz_to_i64, timetz_to_i64, @@ -886,7 +886,7 @@ mod tests { #[pg_test] fn test_coerce_map_types() { // Skip the test if crunchy_map extension is not available - if !extension_exists("crunchy_map") { + if !is_extension_available("crunchy_map") { return; } @@ -946,7 +946,7 @@ mod tests { #[pg_test] fn test_coerce_list_of_map() { // Skip the test if crunchy_map extension is not available - if !extension_exists("crunchy_map") { + if !is_extension_available("crunchy_map") { return; } diff --git a/src/pgrx_tests/copy_options.rs b/src/pgrx_tests/copy_options.rs index c6b71d7..b2f06ce 100644 --- a/src/pgrx_tests/copy_options.rs +++ b/src/pgrx_tests/copy_options.rs @@ -6,8 +6,8 @@ mod tests { use crate::{ pgrx_tests::common::{ - create_crunchy_map_type, extension_exists, CopyOptionValue, FileCleanup, TestTable, - LOCAL_TEST_FILE_PATH, + create_crunchy_map_type, is_extension_available, CopyOptionValue, FileCleanup, + TestTable, LOCAL_TEST_FILE_PATH, }, PgParquetCompression, }; @@ -799,7 +799,7 @@ mod tests { #[pg_test] fn test_auto_field_ids_with_map() { // Skip the test if crunchy_map extension is not available - if !extension_exists("crunchy_map") { + if !is_extension_available("crunchy_map") { return; } @@ -891,7 +891,7 @@ mod tests { #[pg_test] fn test_explicit_field_ids_with_map() { // Skip the test if crunchy_map extension is not available - if !extension_exists("crunchy_map") { + if !is_extension_available("crunchy_map") { return; } diff --git a/src/pgrx_tests/copy_type_roundtrip.rs b/src/pgrx_tests/copy_type_roundtrip.rs index 3fd6d93..7b7b77c 100644 --- a/src/pgrx_tests/copy_type_roundtrip.rs +++ b/src/pgrx_tests/copy_type_roundtrip.rs @@ -1,15 +1,17 @@ #[pgrx::pg_schema] mod tests { + use std::collections::HashSet; use std::vec; use crate::pgrx_tests::common::{ assert_double, assert_float, assert_int_text_map, assert_json, assert_jsonb, - extension_exists, timetz_array_to_utc_time_array, timetz_to_utc_time, TestResult, + is_extension_available, timetz_array_to_utc_time_array, timetz_to_utc_time, TestResult, TestTable, LOCAL_TEST_FILE_PATH, }; use crate::type_compat::fallback_to_text::FallbackToText; use crate::type_compat::geometry::{ - Geometry, GeometryColumnsMetadata, GeometryEncoding, GeometryType, + Geography, Geometry, GeometryEdgeType, GeometryEncoding, GeometryOrientation, GeometryType, + GeoparquetMetadata, EPSG_4326_PROJJSON, }; use crate::type_compat::map::Map; use crate::type_compat::pg_arrow_type_conversions::{ @@ -372,7 +374,7 @@ mod tests { #[pg_test] fn test_map() { // Skip the test if crunchy_map extension is not available - if !extension_exists("crunchy_map") { + if !is_extension_available("crunchy_map") { return; } @@ -393,7 +395,7 @@ mod tests { #[pg_test] fn test_map_array() { // Skip the test if crunchy_map extension is not available - if !extension_exists("crunchy_map") { + if !is_extension_available("crunchy_map") { return; } @@ -425,7 +427,7 @@ mod tests { #[pg_test] fn test_table_with_multiple_maps() { // Skip the test if crunchy_map extension is not available - if !extension_exists("crunchy_map") { + if !is_extension_available("crunchy_map") { return; } @@ -498,7 +500,7 @@ mod tests { #[should_panic(expected = "MapArray entries cannot contain nulls")] fn test_map_null_entries() { // Skip the test if crunchy_map extension is not available - if !extension_exists("crunchy_map") { + if !is_extension_available("crunchy_map") { // let the test pass panic!("MapArray entries cannot contain nulls"); } @@ -524,7 +526,7 @@ mod tests { )] fn test_map_null_entry_key() { // Skip the test if crunchy_map extension is not available - if !extension_exists("crunchy_map") { + if !is_extension_available("crunchy_map") { // let the test pass panic!("Found unmasked nulls for non-nullable StructArray field \\\"key\\\""); } @@ -1037,13 +1039,17 @@ mod tests { #[pg_test] fn test_geometry() { - // Skip the test if postgis extension is not available - if !extension_exists("postgis") { + // Skip the test if postgis or postgis_sfcgal extension is not available + if !is_extension_available("postgis") || !is_extension_available("postgis_sfcgal") { return; } - let query = "DROP EXTENSION IF EXISTS postgis; CREATE EXTENSION postgis;"; - Spi::run(query).unwrap(); + let create_postgis = "DROP EXTENSION IF EXISTS postgis; CREATE EXTENSION postgis;"; + Spi::run(create_postgis).unwrap(); + + let create_postgis_sfcgal = + "DROP EXTENSION IF EXISTS postgis_sfcgal; CREATE EXTENSION postgis_sfcgal;"; + Spi::run(create_postgis_sfcgal).unwrap(); let test_table = TestTable::::new("geometry".into()); test_table.insert("INSERT INTO test_expected (a) VALUES (ST_GeomFromText('POINT(1 1)')), @@ -1055,13 +1061,17 @@ mod tests { #[pg_test] fn test_geometry_array() { - // Skip the test if postgis extension is not available - if !extension_exists("postgis") { + // Skip the test if postgis or postgis_sfcgal extension is not available + if !is_extension_available("postgis") || !is_extension_available("postgis_sfcgal") { return; } - let query = "DROP EXTENSION IF EXISTS postgis; CREATE EXTENSION postgis;"; - Spi::run(query).unwrap(); + let create_postgis = "DROP EXTENSION IF EXISTS postgis; CREATE EXTENSION postgis;"; + Spi::run(create_postgis).unwrap(); + + let create_postgis_sfcgal = + "DROP EXTENSION IF EXISTS postgis_sfcgal; CREATE EXTENSION postgis_sfcgal;"; + Spi::run(create_postgis_sfcgal).unwrap(); let test_table = TestTable::>>::new("geometry[]".into()); test_table.insert("INSERT INTO test_expected (a) VALUES (array[ST_GeomFromText('POINT(1 1)'), ST_GeomFromText('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'), null]), (null), (array[]::geometry[]);"); @@ -1070,13 +1080,17 @@ mod tests { #[pg_test] fn test_geometry_geoparquet_metadata() { - // Skip the test if postgis extension is not available - if !extension_exists("postgis") { + // Skip the test if postgis or postgis_sfcgal extension is not available + if !is_extension_available("postgis") || !is_extension_available("postgis_sfcgal") { return; } - let query = "DROP EXTENSION IF EXISTS postgis; CREATE EXTENSION postgis;"; - Spi::run(query).unwrap(); + let create_postgis = "DROP EXTENSION IF EXISTS postgis; CREATE EXTENSION postgis;"; + Spi::run(create_postgis).unwrap(); + + let create_postgis_sfcgal = + "DROP EXTENSION IF EXISTS postgis_sfcgal; CREATE EXTENSION postgis_sfcgal;"; + Spi::run(create_postgis_sfcgal).unwrap(); let copy_to_query = format!( "COPY (SELECT ST_GeomFromText('POINT(1 1)')::geometry(point) as a, @@ -1101,7 +1115,7 @@ mod tests { .unwrap() .unwrap(); - let geoparquet_metadata: GeometryColumnsMetadata = + let geoparquet_metadata: GeoparquetMetadata = serde_json::from_value(geoparquet_metadata_json.0).unwrap(); // assert common metadata @@ -1115,7 +1129,7 @@ mod tests { ); assert_eq!( geoparquet_metadata.columns.get("a").unwrap().geometry_types, - vec![GeometryType::Point] + HashSet::from([GeometryType::Point]) ); // linestring @@ -1125,7 +1139,7 @@ mod tests { ); assert_eq!( geoparquet_metadata.columns.get("b").unwrap().geometry_types, - vec![GeometryType::LineString] + HashSet::from([GeometryType::LineString]) ); // polygon @@ -1135,7 +1149,7 @@ mod tests { ); assert_eq!( geoparquet_metadata.columns.get("c").unwrap().geometry_types, - vec![GeometryType::Polygon] + HashSet::from([GeometryType::Polygon]) ); // multipoint @@ -1145,7 +1159,7 @@ mod tests { ); assert_eq!( geoparquet_metadata.columns.get("d").unwrap().geometry_types, - vec![GeometryType::MultiPoint] + HashSet::from([GeometryType::MultiPoint]) ); // multilinestring @@ -1155,7 +1169,7 @@ mod tests { ); assert_eq!( geoparquet_metadata.columns.get("e").unwrap().geometry_types, - vec![GeometryType::MultiLineString] + HashSet::from([GeometryType::MultiLineString]) ); // multipolygon @@ -1165,7 +1179,7 @@ mod tests { ); assert_eq!( geoparquet_metadata.columns.get("f").unwrap().geometry_types, - vec![GeometryType::MultiPolygon] + HashSet::from([GeometryType::MultiPolygon]) ); // geometrycollection @@ -1175,7 +1189,236 @@ mod tests { ); assert_eq!( geoparquet_metadata.columns.get("g").unwrap().geometry_types, - vec![GeometryType::GeometryCollection] + HashSet::from([GeometryType::GeometryCollection]) + ); + } + + #[pg_test] + fn test_geography() { + // Skip the test if postgis or postgis_sfcgal extension is not available + if !is_extension_available("postgis") || !is_extension_available("postgis_sfcgal") { + return; + } + + let create_postgis = "DROP EXTENSION IF EXISTS postgis; CREATE EXTENSION postgis;"; + Spi::run(create_postgis).unwrap(); + + let create_postgis_sfcgal = + "DROP EXTENSION IF EXISTS postgis_sfcgal; CREATE EXTENSION postgis_sfcgal;"; + Spi::run(create_postgis_sfcgal).unwrap(); + + let test_table = TestTable::::new("geography".into()); + test_table.insert("INSERT INTO test_expected (a) VALUES (ST_GeogFromText('POINT(1 1)')), + (ST_GeogFromText('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')), + (ST_GeogFromText('LINESTRING(0 0, 1 1)')), + (null);"); + test_table.assert_expected_and_result_rows(); + } + + #[pg_test] + fn test_geography_array() { + // Skip the test if postgis or postgis_sfcgal extension is not available + if !is_extension_available("postgis") || !is_extension_available("postgis_sfcgal") { + return; + } + + let create_postgis = "DROP EXTENSION IF EXISTS postgis; CREATE EXTENSION postgis;"; + Spi::run(create_postgis).unwrap(); + + let create_postgis_sfcgal = + "DROP EXTENSION IF EXISTS postgis_sfcgal; CREATE EXTENSION postgis_sfcgal;"; + Spi::run(create_postgis_sfcgal).unwrap(); + + let test_table = TestTable::>>::new("geography[]".into()); + test_table.insert("INSERT INTO test_expected (a) VALUES (array[ST_GeogFromText('POINT(1 1)'), ST_GeogFromText('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'), null]), (null), (array[]::geography[]);"); + test_table.assert_expected_and_result_rows(); + } + + #[pg_test] + fn test_geography_geoparquet_metadata() { + // Skip the test if postgis or postgis_sfcgal extension is not available + if !is_extension_available("postgis") || !is_extension_available("postgis_sfcgal") { + return; + } + + let create_postgis = "DROP EXTENSION IF EXISTS postgis; CREATE EXTENSION postgis;"; + Spi::run(create_postgis).unwrap(); + + let create_postgis_sfcgal = + "DROP EXTENSION IF EXISTS postgis_sfcgal; CREATE EXTENSION postgis_sfcgal;"; + Spi::run(create_postgis_sfcgal).unwrap(); + + let copy_to_query = format!( + "COPY (SELECT g FROM (VALUES (ST_GeogFromText('POINT(1 1)')::geography(point)), + (ST_GeogFromText('LINESTRING(0 0, 1 1)')::geography(linestring)), + (ST_GeogFromText('POLYGON((0 0, 1 1, 2 2, 0 0))')::geography(polygon)), + (ST_GeogFromText('MULTIPOINT((0 0), (1 1))')::geography(multipoint)), + (ST_GeogFromText('MULTILINESTRING((0 0, 1 1), (2 2, 3 3))')::geography(multilinestring)), + (ST_GeogFromText('MULTIPOLYGON(((0 0, 1 1, 2 2, 0 0)), ((3 3, 4 4, 5 5, 3 3)))')::geography(multipolygon)), + (ST_GeogFromText('GEOMETRYCOLLECTION(POINT(1 1), LINESTRING(0 0, 1 1))')::geography(geometrycollection)) + ) g(g)) + TO '{LOCAL_TEST_FILE_PATH}' WITH (format parquet);", + ); + Spi::run(copy_to_query.as_str()).unwrap(); + + // Check geoparquet metadata + let geoparquet_metadata_query = format!( + "select encode(value, 'escape')::jsonb + from parquet.kv_metadata('{LOCAL_TEST_FILE_PATH}') + where encode(key, 'escape') = 'geo';", + ); + let geoparquet_metadata_json = Spi::get_one::(geoparquet_metadata_query.as_str()) + .unwrap() + .unwrap(); + + let geoparquet_metadata: GeoparquetMetadata = + serde_json::from_value(geoparquet_metadata_json.0).unwrap(); + + // assert metadata + assert_eq!(geoparquet_metadata.version, "1.1.0"); + assert_eq!(geoparquet_metadata.primary_column, "g"); + + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().encoding, + GeometryEncoding::WKB + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().geometry_types, + HashSet::from([ + GeometryType::Point, + GeometryType::LineString, + GeometryType::Polygon, + GeometryType::MultiPoint, + GeometryType::MultiLineString, + GeometryType::MultiPolygon, + GeometryType::GeometryCollection, + ]) + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().bbox, + Some(vec![0.0, 0.0, 5.0, 5.0]) + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().crs, + Some(serde_json::from_str(EPSG_4326_PROJJSON).unwrap()) + ); + assert_eq!(geoparquet_metadata.columns.get("g").unwrap().edges, None); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().orientation, + None + ); + } + + #[pg_test] + fn test_geography_polygon_geoparquet_metadata() { + // Skip the test if postgis or postgis_sfcgal extension is not available + if !is_extension_available("postgis") || !is_extension_available("postgis_sfcgal") { + return; + } + + let create_postgis = "DROP EXTENSION IF EXISTS postgis; CREATE EXTENSION postgis;"; + Spi::run(create_postgis).unwrap(); + + let create_postgis_sfcgal = + "DROP EXTENSION IF EXISTS postgis_sfcgal; CREATE EXTENSION postgis_sfcgal;"; + Spi::run(create_postgis_sfcgal).unwrap(); + + // planar polygon + let copy_to_query = format!( + "COPY (SELECT ST_GeogFromText('POLYGON((0 0, 1 1, -1 1, 0 0))')::geography(polygon) g) + TO '{LOCAL_TEST_FILE_PATH}' WITH (format parquet);", + ); + Spi::run(copy_to_query.as_str()).unwrap(); + + // Check geoparquet metadata + let geoparquet_metadata_query = format!( + "select encode(value, 'escape')::jsonb + from parquet.kv_metadata('{LOCAL_TEST_FILE_PATH}') + where encode(key, 'escape') = 'geo';", + ); + let geoparquet_metadata_json = Spi::get_one::(geoparquet_metadata_query.as_str()) + .unwrap() + .unwrap(); + + let geoparquet_metadata: GeoparquetMetadata = + serde_json::from_value(geoparquet_metadata_json.0).unwrap(); + + // assert metadata + assert_eq!(geoparquet_metadata.version, "1.1.0"); + assert_eq!(geoparquet_metadata.primary_column, "g"); + + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().encoding, + GeometryEncoding::WKB + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().geometry_types, + HashSet::from([GeometryType::Polygon]) + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().bbox, + Some(vec![-1.0, 0.0, 1.0, 1.0]) + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().crs, + Some(serde_json::from_str(EPSG_4326_PROJJSON).unwrap()) + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().edges, + Some(GeometryEdgeType::Planar) + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().orientation, + Some(GeometryOrientation::CounterClockwise) + ); + + // spherical polygon + let copy_to_query = format!( + "COPY (SELECT ST_GeogFromText('POLYGON((1 1 0, 1 2 0, 2 2 0, 2 1 0, 1 1 1))') g) + TO '{LOCAL_TEST_FILE_PATH}' WITH (format parquet);", + ); + Spi::run(copy_to_query.as_str()).unwrap(); + + // Check geoparquet metadata + let geoparquet_metadata_query = format!( + "select encode(value, 'escape')::jsonb + from parquet.kv_metadata('{LOCAL_TEST_FILE_PATH}') + where encode(key, 'escape') = 'geo';", + ); + let geoparquet_metadata_json = Spi::get_one::(geoparquet_metadata_query.as_str()) + .unwrap() + .unwrap(); + + let geoparquet_metadata: GeoparquetMetadata = + serde_json::from_value(geoparquet_metadata_json.0).unwrap(); + + // assert metadata + assert_eq!(geoparquet_metadata.version, "1.1.0"); + assert_eq!(geoparquet_metadata.primary_column, "g"); + + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().encoding, + GeometryEncoding::WKB + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().geometry_types, + HashSet::from([GeometryType::Polygon]) + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().bbox, + Some(vec![1.0, 1.0, 0.0, 2.0, 2.0, 1.0]) + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().crs, + Some(serde_json::from_str(EPSG_4326_PROJJSON).unwrap()) + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().edges, + Some(GeometryEdgeType::Spherical) + ); + assert_eq!( + geoparquet_metadata.columns.get("g").unwrap().orientation, + None ); } diff --git a/src/pgrx_utils.rs b/src/pgrx_utils.rs index 96b8366..73dbbd6 100644 --- a/src/pgrx_utils.rs +++ b/src/pgrx_utils.rs @@ -107,7 +107,7 @@ pub(crate) fn domain_array_base_elem_type(domain_typoid: Oid) -> (Oid, i32) { (array_element_typoid(base_array_typoid), base_array_typmod) } -pub(crate) fn extension_exists(extension_name: &str) -> bool { +pub(crate) fn is_extension_created(extension_name: &str) -> bool { let extension_name = extension_name.as_pg_cstr(); let extension_oid = unsafe { get_extension_oid(extension_name, true) }; extension_oid != InvalidOid diff --git a/src/type_compat/geometry.rs b/src/type_compat/geometry.rs index 91ab184..eddb77d 100644 --- a/src/type_compat/geometry.rs +++ b/src/type_compat/geometry.rs @@ -1,18 +1,23 @@ -use std::{collections::HashMap, ffi::CString, ops::Deref}; +use std::{ + collections::{HashMap, HashSet}, + ffi::CString, + ops::Deref, + str::FromStr, +}; use once_cell::sync::OnceCell; use pgrx::{ - datum::UnboxDatum, + datum::{DatumWithOid, UnboxDatum}, pg_sys::{ get_extension_oid, makeString, Anum_pg_type_oid, AsPgCStr, Datum, GetSysCacheOid, InvalidOid, LookupFuncName, Oid, OidFunctionCall1Coll, SysCacheIdentifier::TYPENAMENSP, BYTEAOID, }, - FromDatum, IntoDatum, PgList, PgTupleDesc, Spi, + FromDatum, IntoDatum, PgList, Spi, }; use serde::{Deserialize, Serialize}; -use crate::pgrx_utils::{collect_attributes_for, CollectAttributesFor}; +use crate::pgrx_utils::array_typoid; // we need to reset the postgis context at each copy start static mut POSTGIS_CONTEXT: OnceCell = OnceCell::new(); @@ -48,7 +53,15 @@ pub(crate) fn is_postgis_geometry_type(typoid: Oid) -> bool { false } -#[derive(Debug, PartialEq, Serialize, Deserialize)] +pub(crate) fn is_postgis_geography_type(typoid: Oid) -> bool { + if let Some(geography_typoid) = get_postgis_context().geography_typoid { + return typoid == geography_typoid; + } + + false +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub(crate) enum GeometryType { Point, LineString, @@ -59,92 +72,471 @@ pub(crate) enum GeometryType { GeometryCollection, } -impl GeometryType { - fn from_typmod(typmod: i32) -> Option { - // see postgis: https://github.com/postgis/postgis/blob/2845d3f37896e64ad24a2ee6863213b297da1301/liblwgeom/liblwgeom.h.in#L194 - let geom_type = (typmod & 0x000000FC) >> 2; - - match geom_type { - 1 => Some(GeometryType::Point), - 2 => Some(GeometryType::LineString), - 3 => Some(GeometryType::Polygon), - 4 => Some(GeometryType::MultiPoint), - 5 => Some(GeometryType::MultiLineString), - 6 => Some(GeometryType::MultiPolygon), - 7 => Some(GeometryType::GeometryCollection), - _ => None, +impl FromStr for GeometryType { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "point" => Ok(Self::Point), + "linestring" => Ok(Self::LineString), + "polygon" => Ok(Self::Polygon), + "multipoint" => Ok(Self::MultiPoint), + "multilinestring" => Ok(Self::MultiLineString), + "multipolygon" => Ok(Self::MultiPolygon), + "geometrycollection" => Ok(Self::GeometryCollection), + _ => Err(format!("Invalid GeometryType: {s}")), } } } -#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub(crate) enum GeometryEncoding { // only WKB is supported for now #[allow(clippy::upper_case_acronyms)] WKB, } -#[derive(Debug, Serialize, Deserialize)] -pub(crate) struct GeometryColumn { - pub(crate) encoding: GeometryEncoding, - pub(crate) geometry_types: Vec, +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub(crate) enum GeometryOrientation { + #[serde(rename = "counterclockwise")] + CounterClockwise, } -#[derive(Debug, Serialize, Deserialize)] -pub(crate) struct GeometryColumnsMetadata { - pub(crate) version: String, - pub(crate) primary_column: String, - pub(crate) columns: HashMap, +impl FromStr for GeometryOrientation { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "counterclockwise" => Ok(Self::CounterClockwise), + _ => Err(format!("Invalid GeometryOrientation: {s}")), + } + } } -impl GeometryColumnsMetadata { - fn from_tupledesc(tupledesc: &PgTupleDesc) -> Option { - let mut columns = HashMap::new(); - let mut primary_column = String::new(); +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub(crate) enum GeometryEdgeType { + #[serde(rename = "planar")] + Planar, + #[serde(rename = "spherical")] + Spherical, +} - let attributes = collect_attributes_for(CollectAttributesFor::CopyTo, tupledesc); +impl FromStr for GeometryEdgeType { + type Err = String; - for attribute in attributes { - let attribute_typoid = attribute.type_oid().value(); + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "planar" => Ok(Self::Planar), + "spherical" => Ok(Self::Spherical), + _ => Err(format!("Invalid GeometryEdgeType: {s}")), + } + } +} - if !is_postgis_geometry_type(attribute_typoid) { - continue; - } +// (min_x, min_y, min_z, max_x, max_y, max_z) if 3d +// (min_x, min_y, max_x, max_y) if 2d +// (min_x, max_x) if 1d +pub(crate) type GeometryBbox = Vec; - let typmod = attribute.type_mod(); +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) struct GeometryColumn { + pub(crate) encoding: GeometryEncoding, + pub(crate) geometry_types: HashSet, + pub(crate) crs: Option, // e.g. "EPSG:4326" in pproj format + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) orientation: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) edges: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) bbox: Option, +} + +impl GeometryColumn { + fn merge(&mut self, other: Self) { + if self.crs != other.crs { + // have different crs, set it to null + self.crs = None; + } + + if self.orientation != other.orientation { + // have different orientation, set it to null + self.orientation = None; + } + + if self.edges != other.edges { + // have different edges, set it to null + self.edges = None; + } - let geometry_types = if let Some(geom_type) = GeometryType::from_typmod(typmod) { - vec![geom_type] + // update geometry types by merging the two sets + self.geometry_types.extend(other.geometry_types); + + // update bbox + if let Some(old_bbox) = &self.bbox { + if let Some(new_bbox) = &other.bbox { + if new_bbox.len() == 2 && old_bbox.len() == 2 { + self.bbox = Some(vec![ + old_bbox[0].min(new_bbox[0]), + old_bbox[1].max(new_bbox[1]), + ]); + } else if new_bbox.len() == 2 && old_bbox.len() == 4 { + self.bbox = Some(vec![ + old_bbox[0].min(new_bbox[0]), + old_bbox[1], + old_bbox[2].max(new_bbox[1]), + old_bbox[3], + ]); + } else if new_bbox.len() == 2 && old_bbox.len() == 6 { + self.bbox = Some(vec![ + old_bbox[0].min(new_bbox[0]), + old_bbox[1], + old_bbox[2], + old_bbox[3].max(new_bbox[1]), + old_bbox[4], + old_bbox[5], + ]); + } else if new_bbox.len() == 4 && old_bbox.len() == 2 { + self.bbox = Some(vec![ + new_bbox[0].min(old_bbox[0]), + new_bbox[1], + new_bbox[2].max(old_bbox[1]), + new_bbox[3], + ]); + } else if new_bbox.len() == 4 && old_bbox.len() == 4 { + self.bbox = Some(vec![ + new_bbox[0].min(old_bbox[0]), + new_bbox[1].min(old_bbox[1]), + new_bbox[2].max(old_bbox[2]), + new_bbox[3].max(old_bbox[3]), + ]); + } else if new_bbox.len() == 4 && old_bbox.len() == 6 { + self.bbox = Some(vec![ + old_bbox[0].min(new_bbox[0]), + old_bbox[1].min(new_bbox[1]), + old_bbox[2], + old_bbox[3].max(new_bbox[2]), + old_bbox[4].max(new_bbox[3]), + old_bbox[5], + ]); + } else if new_bbox.len() == 6 && old_bbox.len() == 2 { + self.bbox = Some(vec![ + new_bbox[0].min(old_bbox[0]), + new_bbox[1], + new_bbox[2], + new_bbox[3].max(old_bbox[1]), + new_bbox[4], + new_bbox[5], + ]); + } else if new_bbox.len() == 6 && old_bbox.len() == 4 { + self.bbox = Some(vec![ + new_bbox[0].min(old_bbox[0]), + new_bbox[1].min(old_bbox[1]), + new_bbox[2], + new_bbox[3].max(old_bbox[2]), + new_bbox[4].max(old_bbox[3]), + new_bbox[5], + ]); + } else if new_bbox.len() == 6 && old_bbox.len() == 6 { + self.bbox = Some(vec![ + new_bbox[0].min(old_bbox[0]), + new_bbox[1].min(old_bbox[1]), + new_bbox[2].min(old_bbox[2]), + new_bbox[3].max(old_bbox[3]), + new_bbox[4].max(old_bbox[4]), + new_bbox[5].max(old_bbox[5]), + ]); + } } else { - vec![] - }; + // new bbox is None, keep the old one + self.bbox = Some(old_bbox.clone()); + } + } else { + // no old bbox, set the new one + self.bbox = other.bbox; + } + } +} - let encoding = GeometryEncoding::WKB; +impl From> for GeometryColumn { + fn from(datum_with_oid: DatumWithOid<'_>) -> Self { + debug_assert!( + { + if !crate::pgrx_utils::is_array_type(datum_with_oid.oid()) { + false + } else { + let element_oid = crate::pgrx_utils::array_element_typoid(datum_with_oid.oid()); + is_postgis_geometry_type(element_oid) || is_postgis_geography_type(element_oid) + } + }, + "expected geometry or geography type" + ); + + let query = if get_postgis_context().postgis_sfcgal_ext_exists { + "select + -- geometry type + array_agg(distinct regexp_replace(st_geometrytype(g::geometry), '^ST_', '')) as geom_types, + + -- srid_name + case + when count(distinct st_srid(g::geometry)) > 1 then null + else ( + select s.auth_name || ':' || s.auth_srid + FROM spatial_ref_sys s + where s.srid = any_value(st_srid(g::geometry)) + LIMIT 1 + ) + end :: text as srid_name, + + -- orientation + case + when count(distinct case + when regexp_replace(st_geometrytype(g::geometry), '^ST_', '') = 'Polygon' then cg_orientation(g::geometry) + else -1000 + end) = 1 and any_value(case + when regexp_replace(st_geometrytype(g::geometry), '^ST_', '') = 'Polygon' then cg_orientation(g::geometry) + else -1000 + end) = -1 then 'counterclockwise' + else null + end as orientation, + + -- edges + case + when count(distinct case + when regexp_replace(st_geometrytype(g::geometry), '^ST_', '') = 'Polygon' then + case + when CG_IsPlanar(g::geometry) = true then 'planar' + when CG_IsPlanar(g::geometry) = false then 'spherical' + else 'none' + end + else 'none' + end) = 1 then + any_value(case + when regexp_replace(st_geometrytype(g::geometry), '^ST_', '') = 'Polygon' then + case + when CG_IsPlanar(g::geometry) = true then 'planar' + when CG_IsPlanar(g::geometry) = false then 'spherical' + else null + end + else null + end) + else null + end as edges, + + -- bbox + case + when count(distinct st_ndims(g::geometry)) = 1 and any_value(st_ndims(g::geometry)) = 1 then + array[ + min(ST_XMin(g::geometry)), + max(ST_XMax(g::geometry)) + ] + when count(distinct st_ndims(g::geometry)) = 1 and any_value(st_ndims(g::geometry)) = 2 then + array[ + min(ST_XMin(g::geometry)), + min(ST_YMin(g::geometry)), + max(ST_XMax(g::geometry)), + max(ST_YMax(g::geometry)) + ] + when count(distinct st_ndims(g::geometry)) = 1 and any_value(st_ndims(g::geometry)) = 3 then + array[ + min(ST_XMin(g::geometry)), + min(ST_YMin(g::geometry)), + min(ST_ZMin(g::geometry)), + max(ST_XMax(g::geometry)), + max(ST_YMax(g::geometry)), + max(ST_ZMax(g::geometry)) + ] + else array[]::float8[] + end as bbox + from unnest($1::geometry[]) as g;" + } else { + "select + -- geometry type + array_agg(distinct regexp_replace(st_geometrytype(g::geometry), '^ST_', '')) as geom_types, + + -- srid_name + case + when count(distinct st_srid(g::geometry)) > 1 then null + else ( + select s.auth_name || ':' || s.auth_srid + FROM spatial_ref_sys s + where s.srid = any_value(st_srid(g::geometry)) + LIMIT 1 + ) + end :: text as srid_name, + + -- orientation (needs postgis_sfcgal extension) + null as orientation, + + -- edges (needs postgis_sfcgal extension) + null as edges, + + -- bbox + case + when count(distinct st_ndims(g::geometry)) = 1 and any_value(st_ndims(g::geometry)) = 1 then + array[ + min(ST_XMin(g::geometry)), + max(ST_XMax(g::geometry)) + ] + when count(distinct st_ndims(g::geometry)) = 1 and any_value(st_ndims(g::geometry)) = 2 then + array[ + min(ST_XMin(g::geometry)), + min(ST_YMin(g::geometry)), + max(ST_XMax(g::geometry)), + max(ST_YMax(g::geometry)) + ] + when count(distinct st_ndims(g::geometry)) = 1 and any_value(st_ndims(g::geometry)) = 3 then + array[ + min(ST_XMin(g::geometry)), + min(ST_YMin(g::geometry)), + min(ST_ZMin(g::geometry)), + max(ST_XMax(g::geometry)), + max(ST_YMax(g::geometry)), + max(ST_ZMax(g::geometry)) + ] + else array[]::float8[] + end as bbox + from unnest($1::geometry[]) as g;" + }; - let geometry_column = GeometryColumn { - encoding, + Spi::connect(|client| { + let tup_table = client.select(query, None, &[datum_with_oid]).unwrap(); + + debug_assert!( + tup_table.len() == 1, + "expected exactly one row in the result" + ); + + let result = tup_table.into_iter().next().unwrap(); + + let geometry_types_strs: Vec = result["geom_types"] + .value::>() + .unwrap() + .unwrap(); + + let geometry_types: HashSet = geometry_types_strs + .into_iter() + .filter_map(|s| GeometryType::from_str(&s).ok()) + .collect(); + + let crs = result["srid_name"] + .value::() + .unwrap() + .and_then(|srid_name| { + // we only support EPSG:4326 for now + if srid_name == "EPSG:4326" { + Some(serde_json::from_str(EPSG_4326_PROJJSON).unwrap()) + } else { + None + } + }); + + let orientation = result["orientation"] + .value::() + .unwrap() + .and_then(|s| GeometryOrientation::from_str(&s).ok()); + + let edges = result["edges"] + .value::() + .unwrap() + .and_then(|s| GeometryEdgeType::from_str(&s).ok()); + + let bbox = result["bbox"].value::>().unwrap(); + + GeometryColumn { + encoding: GeometryEncoding::WKB, geometry_types, + crs, + orientation, + edges, + bbox, + } + }) + } +} + +impl From> for GeometryColumn { + fn from(geometries: Vec) -> Self { + if geometries.is_empty() { + return GeometryColumn { + encoding: GeometryEncoding::WKB, + geometry_types: HashSet::new(), + crs: None, + orientation: None, + edges: None, + bbox: None, }; + } - let column_name = attribute.name().to_string(); + let geom_array_datum = geometries + .into_datum() + .expect("cannot convert geometry vec to datum"); - // we use the first geometry column as the primary column - if primary_column.is_empty() { - primary_column = column_name.clone(); - } + let typoid = array_typoid(Geometry::type_oid()); - columns.insert(column_name, geometry_column); - } + let datum_with_oid = unsafe { DatumWithOid::new(geom_array_datum, typoid) }; - if columns.is_empty() { - return None; + GeometryColumn::from(datum_with_oid) + } +} + +impl From> for GeometryColumn { + fn from(geographies: Vec) -> Self { + if geographies.is_empty() { + return GeometryColumn { + encoding: GeometryEncoding::WKB, + geometry_types: HashSet::new(), + crs: None, + orientation: None, + edges: None, + bbox: None, + }; } - Some(GeometryColumnsMetadata { + let geog_array_datum = geographies + .into_datum() + .expect("cannot convert geography vec to datum"); + + let typoid = array_typoid(Geography::type_oid()); + + let datum_with_oid = unsafe { DatumWithOid::new(geog_array_datum, typoid) }; + + GeometryColumn::from(datum_with_oid) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) struct GeoparquetMetadata { + pub(crate) version: String, + pub(crate) primary_column: String, + pub(crate) columns: HashMap, +} + +impl GeoparquetMetadata { + pub(crate) fn new() -> Self { + Self { version: "1.1.0".into(), - primary_column, - columns, - }) + primary_column: String::new(), + columns: HashMap::new(), + } + } + + pub(crate) fn update(&mut self, column_name: &str, geometry_column: GeometryColumn) { + if geometry_column.geometry_types.is_empty() { + // do not update for empty geometry columns + return; + } + + // we use the first geometry column as the primary column + if self.primary_column.is_empty() { + self.primary_column = column_name.to_string(); + } + + if let Some(old_geometry_column) = self.columns.get_mut(column_name) { + old_geometry_column.merge(geometry_column); + } else { + self.columns + .insert(column_name.to_string(), geometry_column); + } } } @@ -154,21 +546,21 @@ impl GeometryColumnsMetadata { // \"primary_column\":\"a\", // \"columns\":{\"a\":{\"encoding\":\"WKB\", \"geometry_types\":[\"Point\"]}, // \"b\":{\"encoding\":\"WKB\", \"geometry_types\":[\"LineString\"]}}}" -pub(crate) fn geoparquet_metadata_json_from_tupledesc(tupledesc: &PgTupleDesc) -> Option { - let geometry_columns_metadata = GeometryColumnsMetadata::from_tupledesc(tupledesc); - - geometry_columns_metadata.map(|metadata| { - serde_json::to_string(&metadata).unwrap_or_else(|_| { - panic!("failed to serialize geometry columns metadata {metadata:?}") - }) +pub(crate) fn geoparquet_metadata_to_json(geoparquet_metadata: &GeoparquetMetadata) -> String { + serde_json::to_string(geoparquet_metadata).unwrap_or_else(|_| { + panic!("failed to serialize geometry columns metadata {geoparquet_metadata:?}") }) } #[derive(Debug, PartialEq, Clone)] struct PostgisContext { + postgis_sfcgal_ext_exists: bool, geometry_typoid: Option, - st_asbinary_funcoid: Option, - st_geomfromwkb_funcoid: Option, + geography_typoid: Option, + geom_towkb_funcoid: Option, + geog_towkb_funcoid: Option, + geom_fromwkb_funcoid: Option, + geog_fromwkb_funcoid: Option, } impl PostgisContext { @@ -182,26 +574,60 @@ impl PostgisContext { let postgis_ext_schema_oid = postgis_ext_oid.map(|_| Self::extension_schema_oid()); - let st_asbinary_funcoid = postgis_ext_oid.map(|postgis_ext_oid| { - Self::st_asbinary_funcoid( + let postgis_sfcgal_ext_exists = postgis_ext_oid + .map(|_| { + let postgis_sfcgal_ext_oid = + unsafe { get_extension_oid("postgis_sfcgal".as_pg_cstr(), true) }; + postgis_sfcgal_ext_oid != InvalidOid + }) + .unwrap_or(false); + + let geom_towkb_funcoid = postgis_ext_oid.map(|postgis_ext_oid| { + Self::towkb_funcoid( + postgis_ext_oid, + postgis_ext_schema_oid.expect("expected postgis is created"), + "geometry", + ) + }); + + let geog_towkb_funcoid = postgis_ext_oid.map(|postgis_ext_oid| { + Self::towkb_funcoid( postgis_ext_oid, postgis_ext_schema_oid.expect("expected postgis is created"), + "geography", ) }); - let st_geomfromwkb_funcoid = postgis_ext_oid.map(|_| Self::st_geomfromwkb_funcoid()); + let geom_fromwkb_funcoid = + postgis_ext_oid.map(|_| Self::from_wkb_funcoid("st_geomfromwkb")); + + let geog_fromwkb_funcoid = + postgis_ext_oid.map(|_| Self::from_wkb_funcoid("st_geogfromwkb")); let geometry_typoid = postgis_ext_oid.map(|_| { - Self::geometry_typoid( + Self::postgis_typoid( + postgis_ext_oid.expect("expected postgis is created"), + postgis_ext_schema_oid.expect("expected postgis is created"), + "geometry", + ) + }); + + let geography_typoid = postgis_ext_oid.map(|_| { + Self::postgis_typoid( postgis_ext_oid.expect("expected postgis is created"), postgis_ext_schema_oid.expect("expected postgis is created"), + "geography", ) }); Self { + postgis_sfcgal_ext_exists, geometry_typoid, - st_asbinary_funcoid, - st_geomfromwkb_funcoid, + geography_typoid, + geom_towkb_funcoid, + geog_towkb_funcoid, + geom_fromwkb_funcoid, + geog_fromwkb_funcoid, } } @@ -211,16 +637,20 @@ impl PostgisContext { .expect("postgis extension schema not found") } - fn st_asbinary_funcoid(postgis_ext_oid: Oid, postgis_ext_schema_oid: Oid) -> Oid { + fn towkb_funcoid( + postgis_ext_oid: Oid, + postgis_ext_schema_oid: Oid, + postgis_typname: &str, + ) -> Oid { unsafe { - let postgis_geometry_typoid = - Self::geometry_typoid(postgis_ext_oid, postgis_ext_schema_oid); + let postgis_typoid = + Self::postgis_typoid(postgis_ext_oid, postgis_ext_schema_oid, postgis_typname); let function_name = makeString("st_asbinary".as_pg_cstr()); let mut function_name_list = PgList::new(); function_name_list.push(function_name); - let mut arg_types = vec![postgis_geometry_typoid]; + let mut arg_types = vec![postgis_typoid]; LookupFuncName( function_name_list.as_ptr(), @@ -231,9 +661,9 @@ impl PostgisContext { } } - fn st_geomfromwkb_funcoid() -> Oid { + fn from_wkb_funcoid(from_wkb_funcname: &str) -> Oid { unsafe { - let function_name = makeString("st_geomfromwkb".as_pg_cstr()); + let function_name = makeString(from_wkb_funcname.as_pg_cstr()); let mut function_name_list = PgList::new(); function_name_list.push(function_name); @@ -248,33 +678,38 @@ impl PostgisContext { } } - fn geometry_typoid(postgis_ext_oid: Oid, postgis_ext_schema_oid: Oid) -> Oid { + fn postgis_typoid( + postgis_ext_oid: Oid, + postgis_ext_schema_oid: Oid, + postgis_typename: &str, + ) -> Oid { if postgis_ext_oid == InvalidOid { return InvalidOid; } - let postgis_geometry_type_name = CString::new("geometry").expect("CString::new failed"); + let postgis_type_name = CString::new(postgis_typename).expect("CString::new failed"); - let postgis_geometry_typoid = unsafe { + let postgis_typoid = unsafe { GetSysCacheOid( TYPENAMENSP as _, Anum_pg_type_oid as _, - postgis_geometry_type_name.into_datum().unwrap(), + postgis_type_name.into_datum().unwrap(), postgis_ext_schema_oid.into_datum().unwrap(), Datum::from(0), // not used key Datum::from(0), // not used key ) }; - if postgis_geometry_typoid == InvalidOid { + if postgis_typoid == InvalidOid { return InvalidOid; } - postgis_geometry_typoid + postgis_typoid } } -#[derive(Debug, PartialEq)] +// Geometry is a wrapper around a byte vector that represents a PostGIS geometry in WKB format. +#[derive(Debug, Clone, PartialEq)] pub(crate) struct Geometry(pub(crate) Vec); // we store Geometry as a WKB byte vector, and we allow it to be dereferenced as such @@ -294,13 +729,13 @@ impl From> for Geometry { impl IntoDatum for Geometry { fn into_datum(self) -> Option { - let st_geomfromwkb_funcoid = get_postgis_context() - .st_geomfromwkb_funcoid - .expect("st_geomfromwkb_funcoid"); + let geom_fromwkb_funcoid = get_postgis_context() + .geom_fromwkb_funcoid + .expect("geom_fromwkb_funcoid"); let wkb_datum = self.0.into_datum().expect("cannot convert wkb to datum"); - Some(unsafe { OidFunctionCall1Coll(st_geomfromwkb_funcoid, InvalidOid, wkb_datum) }) + Some(unsafe { OidFunctionCall1Coll(geom_fromwkb_funcoid, InvalidOid, wkb_datum) }) } fn type_oid() -> Oid { @@ -319,8 +754,8 @@ impl FromDatum for Geometry { None } else { let st_asbinary_func_oid = get_postgis_context() - .st_asbinary_funcoid - .expect("st_asbinary_funcoid"); + .geom_towkb_funcoid + .expect("geom_towkb_funcoid"); let geom_datum = datum; @@ -343,8 +778,8 @@ unsafe impl UnboxDatum for Geometry { Self: 'src, { let st_asbinary_func_oid = get_postgis_context() - .st_asbinary_funcoid - .expect("st_asbinary_funcoid"); + .geom_towkb_funcoid + .expect("geom_towkb_funcoid"); let geom_datum = datum.sans_lifetime(); @@ -355,3 +790,192 @@ unsafe impl UnboxDatum for Geometry { Geometry(wkb) } } + +// Geography is a wrapper around a byte vector that represents a PostGIS geography in WKB format. +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct Geography(pub(crate) Vec); + +// we store Geography as a WKB byte vector, and we allow it to be dereferenced as such +impl Deref for Geography { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl From> for Geography { + fn from(wkb: Vec) -> Self { + Self(wkb) + } +} + +impl IntoDatum for Geography { + fn into_datum(self) -> Option { + let geog_fromwkb_funcoid = get_postgis_context() + .geog_fromwkb_funcoid + .expect("geog_fromwkb_funcoid"); + + let wkb_datum = self.0.into_datum().expect("cannot convert wkb to datum"); + + Some(unsafe { OidFunctionCall1Coll(geog_fromwkb_funcoid, InvalidOid, wkb_datum) }) + } + + fn type_oid() -> Oid { + get_postgis_context() + .geography_typoid + .expect("postgis context not initialized") + } +} + +impl FromDatum for Geography { + unsafe fn from_polymorphic_datum(datum: Datum, is_null: bool, _typoid: Oid) -> Option + where + Self: Sized, + { + if is_null { + None + } else { + let st_asbinary_func_oid = get_postgis_context() + .geog_towkb_funcoid + .expect("geog_towkb_funcoid"); + + let geog_datum = datum; + + let wkb_datum = + unsafe { OidFunctionCall1Coll(st_asbinary_func_oid, InvalidOid, geog_datum) }; + + let is_null = false; + let wkb = + Vec::::from_datum(wkb_datum, is_null).expect("cannot convert datum to wkb"); + Some(Self(wkb)) + } + } +} + +unsafe impl UnboxDatum for Geography { + type As<'src> = Geography; + + unsafe fn unbox<'src>(datum: pgrx::datum::Datum<'src>) -> Self::As<'src> + where + Self: 'src, + { + let st_asbinary_func_oid = get_postgis_context() + .geog_towkb_funcoid + .expect("geog_towkb_funcoid"); + + let geog_datum = datum.sans_lifetime(); + + let wkb_datum = OidFunctionCall1Coll(st_asbinary_func_oid, InvalidOid, geog_datum); + + let is_null = false; + let wkb = Vec::::from_datum(wkb_datum, is_null).expect("cannot convert datum to wkb"); + Geography(wkb) + } +} + +pub(crate) static EPSG_4326_PROJJSON: &str = r#"{ + "$schema": "https://proj.org/schemas/v0.7/projjson.schema.json", + "type": "GeographicCRS", + "name": "WGS 84", + "datum_ensemble": { + "name": "World Geodetic System 1984 ensemble", + "members": [ + { + "name": "World Geodetic System 1984 (Transit)", + "id": { + "authority": "EPSG", + "code": 1166 + } + }, + { + "name": "World Geodetic System 1984 (G730)", + "id": { + "authority": "EPSG", + "code": 1152 + } + }, + { + "name": "World Geodetic System 1984 (G873)", + "id": { + "authority": "EPSG", + "code": 1153 + } + }, + { + "name": "World Geodetic System 1984 (G1150)", + "id": { + "authority": "EPSG", + "code": 1154 + } + }, + { + "name": "World Geodetic System 1984 (G1674)", + "id": { + "authority": "EPSG", + "code": 1155 + } + }, + { + "name": "World Geodetic System 1984 (G1762)", + "id": { + "authority": "EPSG", + "code": 1156 + } + }, + { + "name": "World Geodetic System 1984 (G2139)", + "id": { + "authority": "EPSG", + "code": 1309 + } + }, + { + "name": "World Geodetic System 1984 (G2296)", + "id": { + "authority": "EPSG", + "code": 1383 + } + } + ], + "ellipsoid": { + "name": "WGS 84", + "semi_major_axis": 6378137, + "inverse_flattening": 298.257223563 + }, + "accuracy": "2.0", + "id": { + "authority": "EPSG", + "code": 6326 + } + }, + "coordinate_system": { + "subtype": "ellipsoidal", + "axis": [ + { + "name": "Geodetic latitude", + "abbreviation": "Lat", + "direction": "north", + "unit": "degree" + }, + { + "name": "Geodetic longitude", + "abbreviation": "Lon", + "direction": "east", + "unit": "degree" + } + ] + }, + "scope": "Horizontal component of 3D system.", + "area": "World.", + "bbox": { + "south_latitude": -90, + "west_longitude": -180, + "north_latitude": 90, + "east_longitude": 180 + }, + "id": { + "authority": "EPSG", + "code": 4326 + } +}"#;