Skip to content

Commit 2a28c44

Browse files
committed
Implement native ESRI reader
1 parent 579d325 commit 2a28c44

File tree

17 files changed

+6726
-4
lines changed

17 files changed

+6726
-4
lines changed

Diff for: lib/trino-hive-formats/pom.xml

+5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
</properties>
1818

1919
<dependencies>
20+
<dependency>
21+
<groupId>com.esri.geometry</groupId>
22+
<artifactId>esri-geometry-api</artifactId>
23+
</dependency>
24+
2025
<dependency>
2126
<groupId>com.fasterxml.jackson.core</groupId>
2227
<artifactId>jackson-core</artifactId>

Diff for: lib/trino-hive-formats/src/main/java/io/trino/hive/formats/HiveClassNames.java

+2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ public final class HiveClassNames
4545
public static final String SEQUENCEFILE_INPUT_FORMAT_CLASS = "org.apache.hadoop.mapred.SequenceFileInputFormat";
4646
public static final String SYMLINK_TEXT_INPUT_FORMAT_CLASS = "org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat";
4747
public static final String TEXT_INPUT_FORMAT_CLASS = "org.apache.hadoop.mapred.TextInputFormat";
48+
public static final String ESRI_SERDE_CLASS = "com.esri.hadoop.hive.serde.EsriJsonSerDe";
49+
public static final String ESRI_INPUT_FORMAT_CLASS = "com.esri.json.hadoop.EnclosedEsriJsonInputFormat";
4850

4951
private HiveClassNames() {}
5052
}

Diff for: lib/trino-hive-formats/src/main/java/io/trino/hive/formats/esri/EsriDeserializer.java

+384
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.hive.formats.esri;
15+
16+
import com.fasterxml.jackson.core.JsonFactory;
17+
import com.fasterxml.jackson.core.JsonParser;
18+
import com.fasterxml.jackson.core.JsonToken;
19+
import com.google.common.io.Closer;
20+
import com.google.common.io.CountingInputStream;
21+
import io.trino.spi.PageBuilder;
22+
23+
import java.io.Closeable;
24+
import java.io.IOException;
25+
import java.io.InputStream;
26+
27+
import static com.fasterxml.jackson.core.JsonFactory.Feature.INTERN_FIELD_NAMES;
28+
import static io.trino.plugin.base.util.JsonUtils.jsonFactoryBuilder;
29+
import static java.util.Objects.requireNonNull;
30+
31+
public class EsriReader
32+
implements Closeable
33+
{
34+
private static final JsonFactory JSON_FACTORY = jsonFactoryBuilder()
35+
.disable(INTERN_FIELD_NAMES)
36+
.build();
37+
38+
private final CountingInputStream inputStream;
39+
private final PageBuilder pageBuilder;
40+
private final EsriDeserializer esriDeserializer;
41+
private JsonParser parser;
42+
private long readTimeNanos;
43+
private boolean closed;
44+
45+
private static final String FEATURES_ARRAY_NAME = "features";
46+
47+
public EsriReader(InputStream inputStream, PageBuilder pageBuilder, EsriDeserializer esriDeserializer)
48+
throws IOException
49+
{
50+
requireNonNull(inputStream, "inputStream is null");
51+
this.inputStream = new CountingInputStream(inputStream);
52+
this.pageBuilder = requireNonNull(pageBuilder, "pageBuilder is null");
53+
this.esriDeserializer = requireNonNull(esriDeserializer, "esriDeserializer is null");
54+
55+
this.initializeParser();
56+
}
57+
58+
private void initializeParser()
59+
throws IOException
60+
{
61+
parser = JSON_FACTORY.createParser(inputStream);
62+
63+
// Find features array
64+
while (true) {
65+
JsonToken token = parser.nextToken();
66+
if (token == null) {
67+
return;
68+
}
69+
if (token == JsonToken.START_ARRAY &&
70+
FEATURES_ARRAY_NAME.equals(parser.currentName())) {
71+
break;
72+
}
73+
}
74+
}
75+
76+
@Override
77+
public void close()
78+
throws IOException
79+
{
80+
closed = true;
81+
try (Closer closer = Closer.create()) {
82+
closer.register(inputStream);
83+
closer.register(parser);
84+
}
85+
}
86+
87+
public boolean next()
88+
throws IOException
89+
{
90+
long start = System.nanoTime();
91+
92+
try {
93+
JsonToken token = parser.nextToken();
94+
if (token == JsonToken.START_OBJECT && parser.currentName() == null) {
95+
esriDeserializer.deserialize(pageBuilder, parser);
96+
return true;
97+
}
98+
else {
99+
return false;
100+
}
101+
}
102+
finally {
103+
long duration = System.nanoTime() - start;
104+
readTimeNanos += duration;
105+
}
106+
}
107+
108+
public long getBytesRead()
109+
{
110+
return inputStream.getCount();
111+
}
112+
113+
public long getReadTimeNanos()
114+
{
115+
return readTimeNanos;
116+
}
117+
118+
public boolean isClosed()
119+
{
120+
return closed;
121+
}
122+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.hive.formats.esri;
15+
16+
public enum OGCType {
17+
UNKNOWN(0),
18+
ST_POINT(1),
19+
ST_LINESTRING(2),
20+
ST_POLYGON(3),
21+
ST_MULTIPOINT(4),
22+
ST_MULTILINESTRING(5),
23+
ST_MULTIPOLYGON(6);
24+
25+
private final int index;
26+
27+
OGCType(int index)
28+
{
29+
this.index = index;
30+
}
31+
32+
public int getIndex()
33+
{
34+
return this.index;
35+
}
36+
}

0 commit comments

Comments
 (0)