spiceai
diff --git a/‎Cargo.lock‎
Lines changed: 19 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/catalog/hadoop/Cargo.toml‎
Lines changed: 46 additions & 0 deletions b/‎crates/catalog/hadoop/Cargo.toml‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎crates/catalog/hadoop/README.md‎
Lines changed: 27 additions & 0 deletions b/‎crates/catalog/hadoop/README.md‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎crates/catalog/hadoop/src/catalog.rs‎
Lines changed: 185 additions & 0 deletions b/‎crates/catalog/hadoop/src/catalog.rs‎
Lines changed: 185 additions & 0 deletions
diff --git a/‎crates/catalog/hadoop/src/lib.rs‎
Lines changed: 23 additions & 0 deletions b/‎crates/catalog/hadoop/src/lib.rs‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎crates/catalog/hadoop/testdata/README.md‎
Lines changed: 38 additions & 0 deletions b/‎crates/catalog/hadoop/testdata/README.md‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎crates/catalog/hadoop/testdata/hadoop_warehouse/test/my_table_1/data/.00000-0-e9a16672-4fb1-46ec-881e-0e2cee8faa69-0-00001.parquet.crc‎
16 Bytes b/‎crates/catalog/hadoop/testdata/hadoop_warehouse/test/my_table_1/data/.00000-0-e9a16672-4fb1-46ec-881e-0e2cee8faa69-0-00001.parquet.crc‎
16 Bytes
diff --git a/‎crates/catalog/hadoop/testdata/hadoop_warehouse/test/my_table_1/data/.00001-1-e9a16672-4fb1-46ec-881e-0e2cee8faa69-0-00001.parquet.crc‎
16 Bytes b/‎crates/catalog/hadoop/testdata/hadoop_warehouse/test/my_table_1/data/.00001-1-e9a16672-4fb1-46ec-881e-0e2cee8faa69-0-00001.parquet.crc‎
16 Bytes
diff --git a/‎crates/catalog/hadoop/testdata/hadoop_warehouse/test/my_table_1/data/00000-0-e9a16672-4fb1-46ec-881e-0e2cee8faa69-0-00001.parquet‎
650 Bytes b/‎crates/catalog/hadoop/testdata/hadoop_warehouse/test/my_table_1/data/00000-0-e9a16672-4fb1-46ec-881e-0e2cee8faa69-0-00001.parquet‎
650 Bytes
@@ -97,6 +97,7 @@ Apache Iceberg is an active open-source project, governed under the Apache Softw
   at [Slack #rust channel](https://join.slack.com/t/apache-iceberg/shared_invite/zt-1zbov3k6e-KtJfoaxp97YfX6dPz1Bk7A).
 
 The Apache Iceberg community is built on the principles described in the [Apache Way](https://www.apache.org/theapacheway/index.html) and all who engage with the community are expected to be respectful, open, come with the best interests of the community in mind, and abide by the Apache Foundation [Code of Conduct](https://www.apache.org/foundation/policies/conduct.html).
+
 ## Users
 
 - [Databend](https://github.com/datafuselabs/databend/): An open-source cloud data warehouse that serves as a cost-effective alternative to Snowflake.
 
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+edition = { workspace = true }
+homepage = { workspace = true }
+name = "iceberg-catalog-hadoop"
+rust-version = { workspace = true }
+version = { workspace = true }
+
+categories = ["database"]
+description = "Apache Iceberg Hadoop Catalog Support"
+keywords = ["iceberg", "hadoop", "catalog"]
+license = { workspace = true }
+repository = { workspace = true }
+
+[dependencies]
+anyhow = { workspace = true }
+async-trait = { workspace = true }
+futures = { workspace = true }
+iceberg = { workspace = true }
+opendal = { workspace = true }
+serde_json = { workspace = true }
+tokio = { workspace = true }
+tracing = { workspace = true }
+typed-builder = { workspace = true }
+uuid = { workspace = true }
+
+[dev-dependencies]
+ctor = { workspace = true }
+iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
+port_scanner = { workspace = true }
@@ -0,0 +1,27 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.
+-->
+
+# Apache Iceberg Hadoop Catalog Official Native Rust Implementation
+
+[![crates.io](https://img.shields.io/crates/v/iceberg.svg)](https://crates.io/crates/iceberg-catalog-hadoop)
+[![docs.rs](https://img.shields.io/docsrs/iceberg.svg)](https://docs.rs/iceberg/latest/iceberg-catalog-hadoop/)
+
+This crate contains the official Native Rust implementation of Apache Iceberg Hadoop Catalog.
+
+See the [API documentation](https://docs.rs/iceberg-catalog-hadoop/latest) for examples and the full API.
@@ -0,0 +1,185 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Iceberg Hadoop Catalog implementation.
+
+use std::collections::HashMap;
+
+use async_trait::async_trait;
+use futures::TryStreamExt;
+use iceberg::io::FileIO;
+use iceberg::table::Table;
+use iceberg::{
+    Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation,
+    TableIdent,
+};
+use opendal::EntryMode;
+
+/// Represents a hadoop catalog backed by storage from a `FileIO`
+#[derive(Debug)]
+pub struct HadoopCatalog {
+    file_io: FileIO,
+    warehouse_root: String,
+}
+
+impl HadoopCatalog {
+    /// Creates a new instance of a `HadoopCatalog`
+    /// The `warehouse_root` should be the absolute path to the warehouse directory, including the scheme prefix for the FileIO
+    pub fn new(warehouse_root: String, file_io: FileIO) -> Self {
+        // TODO: validate the warehouse_root starts with the same scheme as the FileIO
+        Self {
+            file_io,
+            warehouse_root,
+        }
+    }
+}
+
+#[async_trait]
+impl Catalog for HadoopCatalog {
+    // Unsupported operations in Hadoop Catalog
+    async fn create_namespace(
+        &self,
+        _namespace: &NamespaceIdent,
+        _properties: HashMap<String, String>,
+    ) -> Result<Namespace> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "Creating namespaces is not supported in hadoop catalog",
+        ))
+    }
+
+    async fn update_namespace(
+        &self,
+        _namespace: &NamespaceIdent,
+        _properties: HashMap<String, String>,
+    ) -> Result<()> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "Updating namespaces is not supported in hadoop catalog",
+        ))
+    }
+
+    async fn drop_namespace(&self, _namespace: &NamespaceIdent) -> Result<()> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "Dropping namespaces is not supported in hadoop catalog",
+        ))
+    }
+
+    async fn create_table(
+        &self,
+        _namespace: &NamespaceIdent,
+        _creation: TableCreation,
+    ) -> Result<Table> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "Creating tables is not supported in hadoop catalog",
+        ))
+    }
+
+    async fn drop_table(&self, _table: &TableIdent) -> Result<()> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "Dropping tables is not supported in hadoop catalog",
+        ))
+    }
+
+    async fn rename_table(&self, _src: &TableIdent, _dest: &TableIdent) -> Result<()> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "Renaming tables is not supported in hadoop catalog",
+        ))
+    }
+
+    async fn update_table(&self, _commit: TableCommit) -> Result<Table> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "Updating tables is not supported in hadoop catalog",
+        ))
+    }
+
+    // Supported operations in Hadoop Catalog
+    async fn list_namespaces(
+        &self,
+        _parent: Option<&NamespaceIdent>,
+    ) -> Result<Vec<NamespaceIdent>> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "Not implemented yet",
+        ))
+    }
+
+    async fn namespace_exists(&self, _namespace: &NamespaceIdent) -> Result<bool> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "Not implemented yet",
+        ))
+    }
+
+    async fn get_namespace(&self, _namespace: &NamespaceIdent) -> Result<Namespace> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "Not implemented yet",
+        ))
+    }
+
+    async fn load_table(&self, _table_identifier: &TableIdent) -> Result<Table> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "Not implemented yet",
+        ))
+    }
+
+    async fn table_exists(&self, _table: &TableIdent) -> Result<bool> {
+        Err(Error::new(
+            ErrorKind::FeatureUnsupported,
+            "Not implemented yet",
+        ))
+    }
+
+    async fn list_tables(&self, namespace: &NamespaceIdent) -> Result<Vec<TableIdent>> {
+        // List the tables in the specified namespace
+        let path = format!("{}/{}/", self.warehouse_root, namespace.to_string());
+        let mut tables = Vec::new();
+
+        let mut lister = self.file_io.lister(&path).await?;
+        while let Some(entry) = lister.try_next().await? {
+            if matches!(entry.metadata().mode(), EntryMode::DIR) {
+                if path.ends_with(entry.path()) {
+                    // Skip the root directory itself
+                    continue;
+                }
+
+                let table_name = entry
+                    .name()
+                    .strip_suffix("/")
+                    .unwrap_or(entry.name())
+                    .to_string();
+
+                let table_ident = TableIdent {
+                    namespace: namespace.clone(),
+                    name: table_name,
+                };
+
+                // TODO: validate the directory contains metadata files
+                tables.push(table_ident);
+            }
+        }
+
+        Ok(tables)
+    }
+}
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Iceberg Glue Catalog implementation.
+
+#![deny(missing_docs)]
+
+mod catalog;
+pub use catalog::*;
@@ -0,0 +1,38 @@
+# Hadoop Test Data
+
+* `./hadoop_warehouse` contains a single namespace `test` with a 2 tables: `my_table_1` and `my_table_2`. Each table contains 2 rows.
+    * `my_table_1`:
+        ```console
++---+----+
+| id|name|
++---+----+
+|  1| foo|
+|  2| bar|
++---+----+
+        ```
+    * `my_table_2`:
+        ```console
++---+----+
+| id|name|
++---+----+
+|  3| foo|
+|  4| bar|
++---+----+
+        ```
+* `./hadoop_warehouse` was generated with `spark-shell`:
+    ```bash
+./spark-shell \
+  --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.13:1.9.2
+
+spark.conf.set("spark.sql.catalog.hadoop_prod", "org.apache.iceberg.spark.SparkCatalog")
+spark.conf.set("spark.sql.catalog.hadoop_prod.type", "hadoop")
+spark.conf.set("spark.sql.catalog.hadoop_prod.warehouse", "file:///tmp/multi_table_warehouse")
+
+spark.sql("CREATE NAMESPACE hadoop_prod.test")
+spark.sql("CREATE TABLE hadoop_prod.test.my_table_1 (id INT, name STRING) USING iceberg")
+spark.sql("INSERT INTO hadoop_prod.test.my_table_1 VALUES (1, 'foo'), (2, 'bar')")
+spark.sql("SELECT * FROM hadoop_prod.test.my_table_1").show()
+spark.sql("CREATE TABLE hadoop_prod.test.my_table_2 (id INT, name STRING) USING iceberg")
+spark.sql("INSERT INTO hadoop_prod.test.my_table_2 VALUES (3, 'foo'), (4, 'bar')")
+spark.sql("SELECT * FROM hadoop_prod.test.my_table_2").show()
+    ```