Skip to content

Commit 09aae45

Browse files
committed
Add lakehouse connector
1 parent b5797d2 commit 09aae45

35 files changed

+4055
-0
lines changed

Diff for: .github/workflows/ci.yml

+2
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ jobs:
348348
!:trino-ignite,
349349
!:trino-jdbc,
350350
!:trino-kafka,
351+
!:trino-lakehouse,
351352
!:trino-main,
352353
!:trino-mariadb,
353354
!:trino-memory,
@@ -468,6 +469,7 @@ jobs:
468469
- { modules: plugin/trino-iceberg, profile: minio-and-avro }
469470
- { modules: plugin/trino-ignite }
470471
- { modules: plugin/trino-kafka }
472+
- { modules: plugin/trino-lakehouse }
471473
- { modules: plugin/trino-mariadb }
472474
- { modules: plugin/trino-mongodb }
473475
- { modules: plugin/trino-mysql }

Diff for: core/trino-server/src/main/provisio/trino.xml

+6
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,12 @@
160160
</artifact>
161161
</artifactSet>
162162

163+
<artifactSet to="plugin/lakehouse">
164+
<artifact id="${project.groupId}:trino-lakehouse:zip:${project.version}">
165+
<unpack />
166+
</artifact>
167+
</artifactSet>
168+
163169
<artifactSet to="plugin/loki">
164170
<artifact id="${project.groupId}:trino-loki:zip:${project.version}">
165171
<unpack />

Diff for: docs/src/main/sphinx/connector.md

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Iceberg <connector/iceberg>
2525
Ignite <connector/ignite>
2626
JMX <connector/jmx>
2727
Kafka <connector/kafka>
28+
Lakehouse <connector/lakehouse>
2829
Loki <connector/loki>
2930
MariaDB <connector/mariadb>
3031
Memory <connector/memory>

Diff for: docs/src/main/sphinx/connector/lakehouse.md

+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Lakehouse connector
2+
3+
The Lakehouse connector combines the features of the
4+
[Hive](/connector/hive), [Iceberg](/connector/iceberg),
5+
[Delta Lake](/connector/delta-lake), and [Hudi](/connector/hudi)
6+
connectors into a single connector. It allows you to query or write
7+
to data stored in multiple table types (also known as table formats)
8+
that all share the same file system and metastore service.
9+
10+
## General configuration
11+
12+
To configure the Lakehouse connector, create a catalog properties file
13+
`etc/catalog/example.properties` with the following content, replacing the
14+
properties as appropriate:
15+
16+
```text
17+
connector.name=lakehouse
18+
```
19+
20+
You must configure a [metastore for metadata](/object-storage/metastores).
21+
The `hive.metastore` property will also configure the Iceberg catalog.
22+
Do not specify `iceberg.catalog.type`.
23+
24+
You must select and configure one of the
25+
[supported file systems](lakehouse-file-system-configuration).
26+
27+
## Configuration properties
28+
29+
The following configuration properties are available:
30+
31+
:::{list-table}
32+
:widths: 30, 58, 12
33+
:header-rows: 1
34+
35+
* - Property name
36+
- Description
37+
- Default
38+
* - `lakehouse.table-type`
39+
- The default table type for newly created tables when the `format`
40+
table property is not specified. Possible values:
41+
* `HIVE`
42+
* `ICEBERG`
43+
* `DELTA`
44+
- `ICEBERG`
45+
:::
46+
47+
(lakehouse-file-system-configuration)=
48+
## File system access configuration
49+
50+
The connector supports accessing the following file systems:
51+
52+
* [](/object-storage/file-system-azure)
53+
* [](/object-storage/file-system-gcs)
54+
* [](/object-storage/file-system-s3)
55+
* [](/object-storage/file-system-hdfs)
56+
57+
You must enable and configure the specific file system access.
58+
59+
## Examples
60+
61+
Create an Iceberg table:
62+
63+
```sql
64+
CREATE TABLE iceberg_table (
65+
c1 INTEGER,
66+
c2 DATE,
67+
c3 DOUBLE
68+
)
69+
WITH (
70+
type = 'ICEBERG'
71+
format = 'PARQUET',
72+
partitioning = ARRAY['c1', 'c2'],
73+
sorted_by = ARRAY['c3']
74+
);
75+
```
76+
77+
Create a Hive table:
78+
79+
```sql
80+
CREATE TABLE hive_page_views (
81+
view_time TIMESTAMP,
82+
user_id BIGINT,
83+
page_url VARCHAR,
84+
ds DATE,
85+
country VARCHAR
86+
)
87+
WITH (
88+
type = 'HIVE',
89+
format = 'ORC',
90+
partitioned_by = ARRAY['ds', 'country'],
91+
bucketed_by = ARRAY['user_id'],
92+
bucket_count = 50
93+
)
94+
```

Diff for: plugin/trino-lakehouse/pom.xml

+266
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
5+
<parent>
6+
<groupId>io.trino</groupId>
7+
<artifactId>trino-root</artifactId>
8+
<version>473-SNAPSHOT</version>
9+
<relativePath>../../pom.xml</relativePath>
10+
</parent>
11+
12+
<artifactId>trino-lakehouse</artifactId>
13+
<packaging>trino-plugin</packaging>
14+
<description>Trino - Lakehouse connector</description>
15+
16+
<dependencies>
17+
<dependency>
18+
<groupId>com.google.guava</groupId>
19+
<artifactId>guava</artifactId>
20+
</dependency>
21+
22+
<dependency>
23+
<groupId>com.google.inject</groupId>
24+
<artifactId>guice</artifactId>
25+
</dependency>
26+
27+
<dependency>
28+
<groupId>io.airlift</groupId>
29+
<artifactId>bootstrap</artifactId>
30+
</dependency>
31+
32+
<dependency>
33+
<groupId>io.airlift</groupId>
34+
<artifactId>configuration</artifactId>
35+
</dependency>
36+
37+
<dependency>
38+
<groupId>io.airlift</groupId>
39+
<artifactId>json</artifactId>
40+
</dependency>
41+
42+
<dependency>
43+
<groupId>io.trino</groupId>
44+
<artifactId>trino-delta-lake</artifactId>
45+
</dependency>
46+
47+
<dependency>
48+
<groupId>io.trino</groupId>
49+
<artifactId>trino-filesystem-manager</artifactId>
50+
</dependency>
51+
52+
<dependency>
53+
<groupId>io.trino</groupId>
54+
<artifactId>trino-hive</artifactId>
55+
</dependency>
56+
57+
<dependency>
58+
<groupId>io.trino</groupId>
59+
<artifactId>trino-hudi</artifactId>
60+
</dependency>
61+
62+
<dependency>
63+
<groupId>io.trino</groupId>
64+
<artifactId>trino-iceberg</artifactId>
65+
</dependency>
66+
67+
<dependency>
68+
<groupId>io.trino</groupId>
69+
<artifactId>trino-metastore</artifactId>
70+
</dependency>
71+
72+
<dependency>
73+
<groupId>io.trino</groupId>
74+
<artifactId>trino-plugin-toolkit</artifactId>
75+
</dependency>
76+
77+
<dependency>
78+
<groupId>jakarta.validation</groupId>
79+
<artifactId>jakarta.validation-api</artifactId>
80+
</dependency>
81+
82+
<dependency>
83+
<groupId>org.weakref</groupId>
84+
<artifactId>jmxutils</artifactId>
85+
</dependency>
86+
87+
<dependency>
88+
<groupId>com.fasterxml.jackson.core</groupId>
89+
<artifactId>jackson-annotations</artifactId>
90+
<scope>provided</scope>
91+
</dependency>
92+
93+
<dependency>
94+
<groupId>io.airlift</groupId>
95+
<artifactId>slice</artifactId>
96+
<scope>provided</scope>
97+
</dependency>
98+
99+
<dependency>
100+
<groupId>io.opentelemetry</groupId>
101+
<artifactId>opentelemetry-api</artifactId>
102+
<scope>provided</scope>
103+
</dependency>
104+
105+
<dependency>
106+
<groupId>io.opentelemetry</groupId>
107+
<artifactId>opentelemetry-api-incubator</artifactId>
108+
<scope>provided</scope>
109+
</dependency>
110+
111+
<dependency>
112+
<groupId>io.opentelemetry</groupId>
113+
<artifactId>opentelemetry-context</artifactId>
114+
<scope>provided</scope>
115+
</dependency>
116+
117+
<dependency>
118+
<groupId>io.trino</groupId>
119+
<artifactId>trino-spi</artifactId>
120+
<scope>provided</scope>
121+
</dependency>
122+
123+
<dependency>
124+
<groupId>org.openjdk.jol</groupId>
125+
<artifactId>jol-core</artifactId>
126+
<scope>provided</scope>
127+
</dependency>
128+
129+
<dependency>
130+
<groupId>io.airlift</groupId>
131+
<artifactId>log-manager</artifactId>
132+
<scope>runtime</scope>
133+
</dependency>
134+
135+
<dependency>
136+
<groupId>io.airlift</groupId>
137+
<artifactId>configuration-testing</artifactId>
138+
<scope>test</scope>
139+
</dependency>
140+
141+
<dependency>
142+
<groupId>io.airlift</groupId>
143+
<artifactId>http-server</artifactId>
144+
<scope>test</scope>
145+
</dependency>
146+
147+
<dependency>
148+
<groupId>io.airlift</groupId>
149+
<artifactId>junit-extensions</artifactId>
150+
<scope>test</scope>
151+
</dependency>
152+
153+
<dependency>
154+
<groupId>io.airlift</groupId>
155+
<artifactId>testing</artifactId>
156+
<scope>test</scope>
157+
</dependency>
158+
159+
<dependency>
160+
<groupId>io.minio</groupId>
161+
<artifactId>minio</artifactId>
162+
<scope>test</scope>
163+
<exclusions>
164+
<exclusion>
165+
<groupId>com.github.spotbugs</groupId>
166+
<artifactId>spotbugs-annotations</artifactId>
167+
</exclusion>
168+
<exclusion>
169+
<groupId>net.jcip</groupId>
170+
<artifactId>jcip-annotations</artifactId>
171+
</exclusion>
172+
</exclusions>
173+
</dependency>
174+
175+
<dependency>
176+
<groupId>io.trino</groupId>
177+
<artifactId>trino-hive</artifactId>
178+
<type>test-jar</type>
179+
<scope>test</scope>
180+
</dependency>
181+
182+
<dependency>
183+
<groupId>io.trino</groupId>
184+
<artifactId>trino-main</artifactId>
185+
<scope>test</scope>
186+
</dependency>
187+
188+
<dependency>
189+
<groupId>io.trino</groupId>
190+
<artifactId>trino-main</artifactId>
191+
<type>test-jar</type>
192+
<scope>test</scope>
193+
</dependency>
194+
195+
<dependency>
196+
<groupId>io.trino</groupId>
197+
<artifactId>trino-spi</artifactId>
198+
<type>test-jar</type>
199+
<scope>test</scope>
200+
</dependency>
201+
202+
<dependency>
203+
<groupId>io.trino</groupId>
204+
<artifactId>trino-testing</artifactId>
205+
<scope>test</scope>
206+
</dependency>
207+
208+
<dependency>
209+
<groupId>io.trino</groupId>
210+
<artifactId>trino-testing-containers</artifactId>
211+
<scope>test</scope>
212+
</dependency>
213+
214+
<dependency>
215+
<groupId>io.trino</groupId>
216+
<artifactId>trino-testing-services</artifactId>
217+
<scope>test</scope>
218+
</dependency>
219+
220+
<dependency>
221+
<groupId>io.trino</groupId>
222+
<artifactId>trino-tpcds</artifactId>
223+
<scope>test</scope>
224+
</dependency>
225+
226+
<dependency>
227+
<groupId>io.trino</groupId>
228+
<artifactId>trino-tpch</artifactId>
229+
<scope>test</scope>
230+
</dependency>
231+
232+
<dependency>
233+
<groupId>org.assertj</groupId>
234+
<artifactId>assertj-core</artifactId>
235+
<scope>test</scope>
236+
</dependency>
237+
238+
<dependency>
239+
<groupId>org.junit.jupiter</groupId>
240+
<artifactId>junit-jupiter-api</artifactId>
241+
<scope>test</scope>
242+
</dependency>
243+
244+
<dependency>
245+
<groupId>org.junit.jupiter</groupId>
246+
<artifactId>junit-jupiter-engine</artifactId>
247+
<scope>test</scope>
248+
</dependency>
249+
</dependencies>
250+
251+
<build>
252+
<plugins>
253+
<plugin>
254+
<groupId>org.basepom.maven</groupId>
255+
<artifactId>duplicate-finder-maven-plugin</artifactId>
256+
<configuration>
257+
<ignoredResourcePatterns>
258+
<ignoredResourcePattern>iceberg-build.properties</ignoredResourcePattern>
259+
<ignoredResourcePattern>mozilla/public-suffix-list.txt</ignoredResourcePattern>
260+
<ignoredResourcePattern>mime.types</ignoredResourcePattern>
261+
</ignoredResourcePatterns>
262+
</configuration>
263+
</plugin>
264+
</plugins>
265+
</build>
266+
</project>

0 commit comments

Comments
 (0)